cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

ena_netdev.c (126603B)


      1// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
      2/*
      3 * Copyright 2015-2020 Amazon.com, Inc. or its affiliates. All rights reserved.
      4 */
      5
      6#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
      7
      8#ifdef CONFIG_RFS_ACCEL
      9#include <linux/cpu_rmap.h>
     10#endif /* CONFIG_RFS_ACCEL */
     11#include <linux/ethtool.h>
     12#include <linux/kernel.h>
     13#include <linux/module.h>
     14#include <linux/numa.h>
     15#include <linux/pci.h>
     16#include <linux/utsname.h>
     17#include <linux/version.h>
     18#include <linux/vmalloc.h>
     19#include <net/ip.h>
     20
     21#include "ena_netdev.h"
     22#include <linux/bpf_trace.h>
     23#include "ena_pci_id_tbl.h"
     24
     25MODULE_AUTHOR("Amazon.com, Inc. or its affiliates");
     26MODULE_DESCRIPTION(DEVICE_NAME);
     27MODULE_LICENSE("GPL");
     28
     29/* Time in jiffies before concluding the transmitter is hung. */
     30#define TX_TIMEOUT  (5 * HZ)
     31
     32#define ENA_MAX_RINGS min_t(unsigned int, ENA_MAX_NUM_IO_QUEUES, num_possible_cpus())
     33
     34#define DEFAULT_MSG_ENABLE (NETIF_MSG_DRV | NETIF_MSG_PROBE | NETIF_MSG_IFUP | \
     35		NETIF_MSG_TX_DONE | NETIF_MSG_TX_ERR | NETIF_MSG_RX_ERR)
     36
     37static struct ena_aenq_handlers aenq_handlers;
     38
     39static struct workqueue_struct *ena_wq;
     40
     41MODULE_DEVICE_TABLE(pci, ena_pci_tbl);
     42
     43static int ena_rss_init_default(struct ena_adapter *adapter);
     44static void check_for_admin_com_state(struct ena_adapter *adapter);
     45static void ena_destroy_device(struct ena_adapter *adapter, bool graceful);
     46static int ena_restore_device(struct ena_adapter *adapter);
     47
     48static void ena_init_io_rings(struct ena_adapter *adapter,
     49			      int first_index, int count);
     50static void ena_init_napi_in_range(struct ena_adapter *adapter, int first_index,
     51				   int count);
     52static void ena_del_napi_in_range(struct ena_adapter *adapter, int first_index,
     53				  int count);
     54static int ena_setup_tx_resources(struct ena_adapter *adapter, int qid);
     55static int ena_setup_tx_resources_in_range(struct ena_adapter *adapter,
     56					   int first_index,
     57					   int count);
     58static int ena_create_io_tx_queue(struct ena_adapter *adapter, int qid);
     59static void ena_free_tx_resources(struct ena_adapter *adapter, int qid);
     60static int ena_clean_xdp_irq(struct ena_ring *xdp_ring, u32 budget);
     61static void ena_destroy_all_tx_queues(struct ena_adapter *adapter);
     62static void ena_free_all_io_tx_resources(struct ena_adapter *adapter);
     63static void ena_napi_disable_in_range(struct ena_adapter *adapter,
     64				      int first_index, int count);
     65static void ena_napi_enable_in_range(struct ena_adapter *adapter,
     66				     int first_index, int count);
     67static int ena_up(struct ena_adapter *adapter);
     68static void ena_down(struct ena_adapter *adapter);
     69static void ena_unmask_interrupt(struct ena_ring *tx_ring,
     70				 struct ena_ring *rx_ring);
     71static void ena_update_ring_numa_node(struct ena_ring *tx_ring,
     72				      struct ena_ring *rx_ring);
     73static void ena_unmap_tx_buff(struct ena_ring *tx_ring,
     74			      struct ena_tx_buffer *tx_info);
     75static int ena_create_io_tx_queues_in_range(struct ena_adapter *adapter,
     76					    int first_index, int count);
     77
     78/* Increase a stat by cnt while holding syncp seqlock on 32bit machines */
     79static void ena_increase_stat(u64 *statp, u64 cnt,
     80			      struct u64_stats_sync *syncp)
     81{
     82	u64_stats_update_begin(syncp);
     83	(*statp) += cnt;
     84	u64_stats_update_end(syncp);
     85}
     86
     87static void ena_ring_tx_doorbell(struct ena_ring *tx_ring)
     88{
     89	ena_com_write_sq_doorbell(tx_ring->ena_com_io_sq);
     90	ena_increase_stat(&tx_ring->tx_stats.doorbells, 1, &tx_ring->syncp);
     91}
     92
     93static void ena_tx_timeout(struct net_device *dev, unsigned int txqueue)
     94{
     95	struct ena_adapter *adapter = netdev_priv(dev);
     96
     97	/* Change the state of the device to trigger reset
     98	 * Check that we are not in the middle or a trigger already
     99	 */
    100
    101	if (test_and_set_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags))
    102		return;
    103
    104	ena_reset_device(adapter, ENA_REGS_RESET_OS_NETDEV_WD);
    105	ena_increase_stat(&adapter->dev_stats.tx_timeout, 1, &adapter->syncp);
    106
    107	netif_err(adapter, tx_err, dev, "Transmit time out\n");
    108}
    109
    110static void update_rx_ring_mtu(struct ena_adapter *adapter, int mtu)
    111{
    112	int i;
    113
    114	for (i = 0; i < adapter->num_io_queues; i++)
    115		adapter->rx_ring[i].mtu = mtu;
    116}
    117
    118static int ena_change_mtu(struct net_device *dev, int new_mtu)
    119{
    120	struct ena_adapter *adapter = netdev_priv(dev);
    121	int ret;
    122
    123	ret = ena_com_set_dev_mtu(adapter->ena_dev, new_mtu);
    124	if (!ret) {
    125		netif_dbg(adapter, drv, dev, "Set MTU to %d\n", new_mtu);
    126		update_rx_ring_mtu(adapter, new_mtu);
    127		dev->mtu = new_mtu;
    128	} else {
    129		netif_err(adapter, drv, dev, "Failed to set MTU to %d\n",
    130			  new_mtu);
    131	}
    132
    133	return ret;
    134}
    135
    136static int ena_xmit_common(struct net_device *dev,
    137			   struct ena_ring *ring,
    138			   struct ena_tx_buffer *tx_info,
    139			   struct ena_com_tx_ctx *ena_tx_ctx,
    140			   u16 next_to_use,
    141			   u32 bytes)
    142{
    143	struct ena_adapter *adapter = netdev_priv(dev);
    144	int rc, nb_hw_desc;
    145
    146	if (unlikely(ena_com_is_doorbell_needed(ring->ena_com_io_sq,
    147						ena_tx_ctx))) {
    148		netif_dbg(adapter, tx_queued, dev,
    149			  "llq tx max burst size of queue %d achieved, writing doorbell to send burst\n",
    150			  ring->qid);
    151		ena_ring_tx_doorbell(ring);
    152	}
    153
    154	/* prepare the packet's descriptors to dma engine */
    155	rc = ena_com_prepare_tx(ring->ena_com_io_sq, ena_tx_ctx,
    156				&nb_hw_desc);
    157
    158	/* In case there isn't enough space in the queue for the packet,
    159	 * we simply drop it. All other failure reasons of
    160	 * ena_com_prepare_tx() are fatal and therefore require a device reset.
    161	 */
    162	if (unlikely(rc)) {
    163		netif_err(adapter, tx_queued, dev,
    164			  "Failed to prepare tx bufs\n");
    165		ena_increase_stat(&ring->tx_stats.prepare_ctx_err, 1,
    166				  &ring->syncp);
    167		if (rc != -ENOMEM)
    168			ena_reset_device(adapter,
    169					 ENA_REGS_RESET_DRIVER_INVALID_STATE);
    170		return rc;
    171	}
    172
    173	u64_stats_update_begin(&ring->syncp);
    174	ring->tx_stats.cnt++;
    175	ring->tx_stats.bytes += bytes;
    176	u64_stats_update_end(&ring->syncp);
    177
    178	tx_info->tx_descs = nb_hw_desc;
    179	tx_info->last_jiffies = jiffies;
    180	tx_info->print_once = 0;
    181
    182	ring->next_to_use = ENA_TX_RING_IDX_NEXT(next_to_use,
    183						 ring->ring_size);
    184	return 0;
    185}
    186
    187/* This is the XDP napi callback. XDP queues use a separate napi callback
    188 * than Rx/Tx queues.
    189 */
    190static int ena_xdp_io_poll(struct napi_struct *napi, int budget)
    191{
    192	struct ena_napi *ena_napi = container_of(napi, struct ena_napi, napi);
    193	u32 xdp_work_done, xdp_budget;
    194	struct ena_ring *xdp_ring;
    195	int napi_comp_call = 0;
    196	int ret;
    197
    198	xdp_ring = ena_napi->xdp_ring;
    199
    200	xdp_budget = budget;
    201
    202	if (!test_bit(ENA_FLAG_DEV_UP, &xdp_ring->adapter->flags) ||
    203	    test_bit(ENA_FLAG_TRIGGER_RESET, &xdp_ring->adapter->flags)) {
    204		napi_complete_done(napi, 0);
    205		return 0;
    206	}
    207
    208	xdp_work_done = ena_clean_xdp_irq(xdp_ring, xdp_budget);
    209
    210	/* If the device is about to reset or down, avoid unmask
    211	 * the interrupt and return 0 so NAPI won't reschedule
    212	 */
    213	if (unlikely(!test_bit(ENA_FLAG_DEV_UP, &xdp_ring->adapter->flags))) {
    214		napi_complete_done(napi, 0);
    215		ret = 0;
    216	} else if (xdp_budget > xdp_work_done) {
    217		napi_comp_call = 1;
    218		if (napi_complete_done(napi, xdp_work_done))
    219			ena_unmask_interrupt(xdp_ring, NULL);
    220		ena_update_ring_numa_node(xdp_ring, NULL);
    221		ret = xdp_work_done;
    222	} else {
    223		ret = xdp_budget;
    224	}
    225
    226	u64_stats_update_begin(&xdp_ring->syncp);
    227	xdp_ring->tx_stats.napi_comp += napi_comp_call;
    228	xdp_ring->tx_stats.tx_poll++;
    229	u64_stats_update_end(&xdp_ring->syncp);
    230	xdp_ring->tx_stats.last_napi_jiffies = jiffies;
    231
    232	return ret;
    233}
    234
    235static int ena_xdp_tx_map_frame(struct ena_ring *xdp_ring,
    236				struct ena_tx_buffer *tx_info,
    237				struct xdp_frame *xdpf,
    238				struct ena_com_tx_ctx *ena_tx_ctx)
    239{
    240	struct ena_adapter *adapter = xdp_ring->adapter;
    241	struct ena_com_buf *ena_buf;
    242	int push_len = 0;
    243	dma_addr_t dma;
    244	void *data;
    245	u32 size;
    246
    247	tx_info->xdpf = xdpf;
    248	data = tx_info->xdpf->data;
    249	size = tx_info->xdpf->len;
    250
    251	if (xdp_ring->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV) {
    252		/* Designate part of the packet for LLQ */
    253		push_len = min_t(u32, size, xdp_ring->tx_max_header_size);
    254
    255		ena_tx_ctx->push_header = data;
    256
    257		size -= push_len;
    258		data += push_len;
    259	}
    260
    261	ena_tx_ctx->header_len = push_len;
    262
    263	if (size > 0) {
    264		dma = dma_map_single(xdp_ring->dev,
    265				     data,
    266				     size,
    267				     DMA_TO_DEVICE);
    268		if (unlikely(dma_mapping_error(xdp_ring->dev, dma)))
    269			goto error_report_dma_error;
    270
    271		tx_info->map_linear_data = 0;
    272
    273		ena_buf = tx_info->bufs;
    274		ena_buf->paddr = dma;
    275		ena_buf->len = size;
    276
    277		ena_tx_ctx->ena_bufs = ena_buf;
    278		ena_tx_ctx->num_bufs = tx_info->num_of_bufs = 1;
    279	}
    280
    281	return 0;
    282
    283error_report_dma_error:
    284	ena_increase_stat(&xdp_ring->tx_stats.dma_mapping_err, 1,
    285			  &xdp_ring->syncp);
    286	netif_warn(adapter, tx_queued, adapter->netdev, "Failed to map xdp buff\n");
    287
    288	return -EINVAL;
    289}
    290
    291static int ena_xdp_xmit_frame(struct ena_ring *xdp_ring,
    292			      struct net_device *dev,
    293			      struct xdp_frame *xdpf,
    294			      int flags)
    295{
    296	struct ena_com_tx_ctx ena_tx_ctx = {};
    297	struct ena_tx_buffer *tx_info;
    298	u16 next_to_use, req_id;
    299	int rc;
    300
    301	next_to_use = xdp_ring->next_to_use;
    302	req_id = xdp_ring->free_ids[next_to_use];
    303	tx_info = &xdp_ring->tx_buffer_info[req_id];
    304	tx_info->num_of_bufs = 0;
    305
    306	rc = ena_xdp_tx_map_frame(xdp_ring, tx_info, xdpf, &ena_tx_ctx);
    307	if (unlikely(rc))
    308		return rc;
    309
    310	ena_tx_ctx.req_id = req_id;
    311
    312	rc = ena_xmit_common(dev,
    313			     xdp_ring,
    314			     tx_info,
    315			     &ena_tx_ctx,
    316			     next_to_use,
    317			     xdpf->len);
    318	if (rc)
    319		goto error_unmap_dma;
    320
    321	/* trigger the dma engine. ena_ring_tx_doorbell()
    322	 * calls a memory barrier inside it.
    323	 */
    324	if (flags & XDP_XMIT_FLUSH)
    325		ena_ring_tx_doorbell(xdp_ring);
    326
    327	return rc;
    328
    329error_unmap_dma:
    330	ena_unmap_tx_buff(xdp_ring, tx_info);
    331	tx_info->xdpf = NULL;
    332	return rc;
    333}
    334
    335static int ena_xdp_xmit(struct net_device *dev, int n,
    336			struct xdp_frame **frames, u32 flags)
    337{
    338	struct ena_adapter *adapter = netdev_priv(dev);
    339	struct ena_ring *xdp_ring;
    340	int qid, i, nxmit = 0;
    341
    342	if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK))
    343		return -EINVAL;
    344
    345	if (!test_bit(ENA_FLAG_DEV_UP, &adapter->flags))
    346		return -ENETDOWN;
    347
    348	/* We assume that all rings have the same XDP program */
    349	if (!READ_ONCE(adapter->rx_ring->xdp_bpf_prog))
    350		return -ENXIO;
    351
    352	qid = smp_processor_id() % adapter->xdp_num_queues;
    353	qid += adapter->xdp_first_ring;
    354	xdp_ring = &adapter->tx_ring[qid];
    355
    356	/* Other CPU ids might try to send thorugh this queue */
    357	spin_lock(&xdp_ring->xdp_tx_lock);
    358
    359	for (i = 0; i < n; i++) {
    360		if (ena_xdp_xmit_frame(xdp_ring, dev, frames[i], 0))
    361			break;
    362		nxmit++;
    363	}
    364
    365	/* Ring doorbell to make device aware of the packets */
    366	if (flags & XDP_XMIT_FLUSH)
    367		ena_ring_tx_doorbell(xdp_ring);
    368
    369	spin_unlock(&xdp_ring->xdp_tx_lock);
    370
    371	/* Return number of packets sent */
    372	return nxmit;
    373}
    374
    375static int ena_xdp_execute(struct ena_ring *rx_ring, struct xdp_buff *xdp)
    376{
    377	struct bpf_prog *xdp_prog;
    378	struct ena_ring *xdp_ring;
    379	u32 verdict = XDP_PASS;
    380	struct xdp_frame *xdpf;
    381	u64 *xdp_stat;
    382
    383	xdp_prog = READ_ONCE(rx_ring->xdp_bpf_prog);
    384
    385	if (!xdp_prog)
    386		goto out;
    387
    388	verdict = bpf_prog_run_xdp(xdp_prog, xdp);
    389
    390	switch (verdict) {
    391	case XDP_TX:
    392		xdpf = xdp_convert_buff_to_frame(xdp);
    393		if (unlikely(!xdpf)) {
    394			trace_xdp_exception(rx_ring->netdev, xdp_prog, verdict);
    395			xdp_stat = &rx_ring->rx_stats.xdp_aborted;
    396			verdict = XDP_ABORTED;
    397			break;
    398		}
    399
    400		/* Find xmit queue */
    401		xdp_ring = rx_ring->xdp_ring;
    402
    403		/* The XDP queues are shared between XDP_TX and XDP_REDIRECT */
    404		spin_lock(&xdp_ring->xdp_tx_lock);
    405
    406		if (ena_xdp_xmit_frame(xdp_ring, rx_ring->netdev, xdpf,
    407				       XDP_XMIT_FLUSH))
    408			xdp_return_frame(xdpf);
    409
    410		spin_unlock(&xdp_ring->xdp_tx_lock);
    411		xdp_stat = &rx_ring->rx_stats.xdp_tx;
    412		break;
    413	case XDP_REDIRECT:
    414		if (likely(!xdp_do_redirect(rx_ring->netdev, xdp, xdp_prog))) {
    415			xdp_stat = &rx_ring->rx_stats.xdp_redirect;
    416			break;
    417		}
    418		trace_xdp_exception(rx_ring->netdev, xdp_prog, verdict);
    419		xdp_stat = &rx_ring->rx_stats.xdp_aborted;
    420		verdict = XDP_ABORTED;
    421		break;
    422	case XDP_ABORTED:
    423		trace_xdp_exception(rx_ring->netdev, xdp_prog, verdict);
    424		xdp_stat = &rx_ring->rx_stats.xdp_aborted;
    425		break;
    426	case XDP_DROP:
    427		xdp_stat = &rx_ring->rx_stats.xdp_drop;
    428		break;
    429	case XDP_PASS:
    430		xdp_stat = &rx_ring->rx_stats.xdp_pass;
    431		break;
    432	default:
    433		bpf_warn_invalid_xdp_action(rx_ring->netdev, xdp_prog, verdict);
    434		xdp_stat = &rx_ring->rx_stats.xdp_invalid;
    435	}
    436
    437	ena_increase_stat(xdp_stat, 1, &rx_ring->syncp);
    438out:
    439	return verdict;
    440}
    441
    442static void ena_init_all_xdp_queues(struct ena_adapter *adapter)
    443{
    444	adapter->xdp_first_ring = adapter->num_io_queues;
    445	adapter->xdp_num_queues = adapter->num_io_queues;
    446
    447	ena_init_io_rings(adapter,
    448			  adapter->xdp_first_ring,
    449			  adapter->xdp_num_queues);
    450}
    451
    452static int ena_setup_and_create_all_xdp_queues(struct ena_adapter *adapter)
    453{
    454	int rc = 0;
    455
    456	rc = ena_setup_tx_resources_in_range(adapter, adapter->xdp_first_ring,
    457					     adapter->xdp_num_queues);
    458	if (rc)
    459		goto setup_err;
    460
    461	rc = ena_create_io_tx_queues_in_range(adapter,
    462					      adapter->xdp_first_ring,
    463					      adapter->xdp_num_queues);
    464	if (rc)
    465		goto create_err;
    466
    467	return 0;
    468
    469create_err:
    470	ena_free_all_io_tx_resources(adapter);
    471setup_err:
    472	return rc;
    473}
    474
    475/* Provides a way for both kernel and bpf-prog to know
    476 * more about the RX-queue a given XDP frame arrived on.
    477 */
    478static int ena_xdp_register_rxq_info(struct ena_ring *rx_ring)
    479{
    480	int rc;
    481
    482	rc = xdp_rxq_info_reg(&rx_ring->xdp_rxq, rx_ring->netdev, rx_ring->qid, 0);
    483
    484	if (rc) {
    485		netif_err(rx_ring->adapter, ifup, rx_ring->netdev,
    486			  "Failed to register xdp rx queue info. RX queue num %d rc: %d\n",
    487			  rx_ring->qid, rc);
    488		goto err;
    489	}
    490
    491	rc = xdp_rxq_info_reg_mem_model(&rx_ring->xdp_rxq, MEM_TYPE_PAGE_SHARED,
    492					NULL);
    493
    494	if (rc) {
    495		netif_err(rx_ring->adapter, ifup, rx_ring->netdev,
    496			  "Failed to register xdp rx queue info memory model. RX queue num %d rc: %d\n",
    497			  rx_ring->qid, rc);
    498		xdp_rxq_info_unreg(&rx_ring->xdp_rxq);
    499	}
    500
    501err:
    502	return rc;
    503}
    504
    505static void ena_xdp_unregister_rxq_info(struct ena_ring *rx_ring)
    506{
    507	xdp_rxq_info_unreg_mem_model(&rx_ring->xdp_rxq);
    508	xdp_rxq_info_unreg(&rx_ring->xdp_rxq);
    509}
    510
    511static void ena_xdp_exchange_program_rx_in_range(struct ena_adapter *adapter,
    512						 struct bpf_prog *prog,
    513						 int first, int count)
    514{
    515	struct ena_ring *rx_ring;
    516	int i = 0;
    517
    518	for (i = first; i < count; i++) {
    519		rx_ring = &adapter->rx_ring[i];
    520		xchg(&rx_ring->xdp_bpf_prog, prog);
    521		if (prog) {
    522			ena_xdp_register_rxq_info(rx_ring);
    523			rx_ring->rx_headroom = XDP_PACKET_HEADROOM;
    524		} else {
    525			ena_xdp_unregister_rxq_info(rx_ring);
    526			rx_ring->rx_headroom = NET_SKB_PAD;
    527		}
    528	}
    529}
    530
    531static void ena_xdp_exchange_program(struct ena_adapter *adapter,
    532				     struct bpf_prog *prog)
    533{
    534	struct bpf_prog *old_bpf_prog = xchg(&adapter->xdp_bpf_prog, prog);
    535
    536	ena_xdp_exchange_program_rx_in_range(adapter,
    537					     prog,
    538					     0,
    539					     adapter->num_io_queues);
    540
    541	if (old_bpf_prog)
    542		bpf_prog_put(old_bpf_prog);
    543}
    544
    545static int ena_destroy_and_free_all_xdp_queues(struct ena_adapter *adapter)
    546{
    547	bool was_up;
    548	int rc;
    549
    550	was_up = test_bit(ENA_FLAG_DEV_UP, &adapter->flags);
    551
    552	if (was_up)
    553		ena_down(adapter);
    554
    555	adapter->xdp_first_ring = 0;
    556	adapter->xdp_num_queues = 0;
    557	ena_xdp_exchange_program(adapter, NULL);
    558	if (was_up) {
    559		rc = ena_up(adapter);
    560		if (rc)
    561			return rc;
    562	}
    563	return 0;
    564}
    565
    566static int ena_xdp_set(struct net_device *netdev, struct netdev_bpf *bpf)
    567{
    568	struct ena_adapter *adapter = netdev_priv(netdev);
    569	struct bpf_prog *prog = bpf->prog;
    570	struct bpf_prog *old_bpf_prog;
    571	int rc, prev_mtu;
    572	bool is_up;
    573
    574	is_up = test_bit(ENA_FLAG_DEV_UP, &adapter->flags);
    575	rc = ena_xdp_allowed(adapter);
    576	if (rc == ENA_XDP_ALLOWED) {
    577		old_bpf_prog = adapter->xdp_bpf_prog;
    578		if (prog) {
    579			if (!is_up) {
    580				ena_init_all_xdp_queues(adapter);
    581			} else if (!old_bpf_prog) {
    582				ena_down(adapter);
    583				ena_init_all_xdp_queues(adapter);
    584			}
    585			ena_xdp_exchange_program(adapter, prog);
    586
    587			if (is_up && !old_bpf_prog) {
    588				rc = ena_up(adapter);
    589				if (rc)
    590					return rc;
    591			}
    592		} else if (old_bpf_prog) {
    593			rc = ena_destroy_and_free_all_xdp_queues(adapter);
    594			if (rc)
    595				return rc;
    596		}
    597
    598		prev_mtu = netdev->max_mtu;
    599		netdev->max_mtu = prog ? ENA_XDP_MAX_MTU : adapter->max_mtu;
    600
    601		if (!old_bpf_prog)
    602			netif_info(adapter, drv, adapter->netdev,
    603				   "XDP program is set, changing the max_mtu from %d to %d",
    604				   prev_mtu, netdev->max_mtu);
    605
    606	} else if (rc == ENA_XDP_CURRENT_MTU_TOO_LARGE) {
    607		netif_err(adapter, drv, adapter->netdev,
    608			  "Failed to set xdp program, the current MTU (%d) is larger than the maximum allowed MTU (%lu) while xdp is on",
    609			  netdev->mtu, ENA_XDP_MAX_MTU);
    610		NL_SET_ERR_MSG_MOD(bpf->extack,
    611				   "Failed to set xdp program, the current MTU is larger than the maximum allowed MTU. Check the dmesg for more info");
    612		return -EINVAL;
    613	} else if (rc == ENA_XDP_NO_ENOUGH_QUEUES) {
    614		netif_err(adapter, drv, adapter->netdev,
    615			  "Failed to set xdp program, the Rx/Tx channel count should be at most half of the maximum allowed channel count. The current queue count (%d), the maximal queue count (%d)\n",
    616			  adapter->num_io_queues, adapter->max_num_io_queues);
    617		NL_SET_ERR_MSG_MOD(bpf->extack,
    618				   "Failed to set xdp program, there is no enough space for allocating XDP queues, Check the dmesg for more info");
    619		return -EINVAL;
    620	}
    621
    622	return 0;
    623}
    624
    625/* This is the main xdp callback, it's used by the kernel to set/unset the xdp
    626 * program as well as to query the current xdp program id.
    627 */
    628static int ena_xdp(struct net_device *netdev, struct netdev_bpf *bpf)
    629{
    630	switch (bpf->command) {
    631	case XDP_SETUP_PROG:
    632		return ena_xdp_set(netdev, bpf);
    633	default:
    634		return -EINVAL;
    635	}
    636	return 0;
    637}
    638
    639static int ena_init_rx_cpu_rmap(struct ena_adapter *adapter)
    640{
    641#ifdef CONFIG_RFS_ACCEL
    642	u32 i;
    643	int rc;
    644
    645	adapter->netdev->rx_cpu_rmap = alloc_irq_cpu_rmap(adapter->num_io_queues);
    646	if (!adapter->netdev->rx_cpu_rmap)
    647		return -ENOMEM;
    648	for (i = 0; i < adapter->num_io_queues; i++) {
    649		int irq_idx = ENA_IO_IRQ_IDX(i);
    650
    651		rc = irq_cpu_rmap_add(adapter->netdev->rx_cpu_rmap,
    652				      pci_irq_vector(adapter->pdev, irq_idx));
    653		if (rc) {
    654			free_irq_cpu_rmap(adapter->netdev->rx_cpu_rmap);
    655			adapter->netdev->rx_cpu_rmap = NULL;
    656			return rc;
    657		}
    658	}
    659#endif /* CONFIG_RFS_ACCEL */
    660	return 0;
    661}
    662
    663static void ena_init_io_rings_common(struct ena_adapter *adapter,
    664				     struct ena_ring *ring, u16 qid)
    665{
    666	ring->qid = qid;
    667	ring->pdev = adapter->pdev;
    668	ring->dev = &adapter->pdev->dev;
    669	ring->netdev = adapter->netdev;
    670	ring->napi = &adapter->ena_napi[qid].napi;
    671	ring->adapter = adapter;
    672	ring->ena_dev = adapter->ena_dev;
    673	ring->per_napi_packets = 0;
    674	ring->cpu = 0;
    675	ring->no_interrupt_event_cnt = 0;
    676	u64_stats_init(&ring->syncp);
    677}
    678
    679static void ena_init_io_rings(struct ena_adapter *adapter,
    680			      int first_index, int count)
    681{
    682	struct ena_com_dev *ena_dev;
    683	struct ena_ring *txr, *rxr;
    684	int i;
    685
    686	ena_dev = adapter->ena_dev;
    687
    688	for (i = first_index; i < first_index + count; i++) {
    689		txr = &adapter->tx_ring[i];
    690		rxr = &adapter->rx_ring[i];
    691
    692		/* TX common ring state */
    693		ena_init_io_rings_common(adapter, txr, i);
    694
    695		/* TX specific ring state */
    696		txr->ring_size = adapter->requested_tx_ring_size;
    697		txr->tx_max_header_size = ena_dev->tx_max_header_size;
    698		txr->tx_mem_queue_type = ena_dev->tx_mem_queue_type;
    699		txr->sgl_size = adapter->max_tx_sgl_size;
    700		txr->smoothed_interval =
    701			ena_com_get_nonadaptive_moderation_interval_tx(ena_dev);
    702		txr->disable_meta_caching = adapter->disable_meta_caching;
    703		spin_lock_init(&txr->xdp_tx_lock);
    704
    705		/* Don't init RX queues for xdp queues */
    706		if (!ENA_IS_XDP_INDEX(adapter, i)) {
    707			/* RX common ring state */
    708			ena_init_io_rings_common(adapter, rxr, i);
    709
    710			/* RX specific ring state */
    711			rxr->ring_size = adapter->requested_rx_ring_size;
    712			rxr->rx_copybreak = adapter->rx_copybreak;
    713			rxr->sgl_size = adapter->max_rx_sgl_size;
    714			rxr->smoothed_interval =
    715				ena_com_get_nonadaptive_moderation_interval_rx(ena_dev);
    716			rxr->empty_rx_queue = 0;
    717			rxr->rx_headroom = NET_SKB_PAD;
    718			adapter->ena_napi[i].dim.mode = DIM_CQ_PERIOD_MODE_START_FROM_EQE;
    719			rxr->xdp_ring = &adapter->tx_ring[i + adapter->num_io_queues];
    720		}
    721	}
    722}
    723
    724/* ena_setup_tx_resources - allocate I/O Tx resources (Descriptors)
    725 * @adapter: network interface device structure
    726 * @qid: queue index
    727 *
    728 * Return 0 on success, negative on failure
    729 */
    730static int ena_setup_tx_resources(struct ena_adapter *adapter, int qid)
    731{
    732	struct ena_ring *tx_ring = &adapter->tx_ring[qid];
    733	struct ena_irq *ena_irq = &adapter->irq_tbl[ENA_IO_IRQ_IDX(qid)];
    734	int size, i, node;
    735
    736	if (tx_ring->tx_buffer_info) {
    737		netif_err(adapter, ifup,
    738			  adapter->netdev, "tx_buffer_info info is not NULL");
    739		return -EEXIST;
    740	}
    741
    742	size = sizeof(struct ena_tx_buffer) * tx_ring->ring_size;
    743	node = cpu_to_node(ena_irq->cpu);
    744
    745	tx_ring->tx_buffer_info = vzalloc_node(size, node);
    746	if (!tx_ring->tx_buffer_info) {
    747		tx_ring->tx_buffer_info = vzalloc(size);
    748		if (!tx_ring->tx_buffer_info)
    749			goto err_tx_buffer_info;
    750	}
    751
    752	size = sizeof(u16) * tx_ring->ring_size;
    753	tx_ring->free_ids = vzalloc_node(size, node);
    754	if (!tx_ring->free_ids) {
    755		tx_ring->free_ids = vzalloc(size);
    756		if (!tx_ring->free_ids)
    757			goto err_tx_free_ids;
    758	}
    759
    760	size = tx_ring->tx_max_header_size;
    761	tx_ring->push_buf_intermediate_buf = vzalloc_node(size, node);
    762	if (!tx_ring->push_buf_intermediate_buf) {
    763		tx_ring->push_buf_intermediate_buf = vzalloc(size);
    764		if (!tx_ring->push_buf_intermediate_buf)
    765			goto err_push_buf_intermediate_buf;
    766	}
    767
    768	/* Req id ring for TX out of order completions */
    769	for (i = 0; i < tx_ring->ring_size; i++)
    770		tx_ring->free_ids[i] = i;
    771
    772	/* Reset tx statistics */
    773	memset(&tx_ring->tx_stats, 0x0, sizeof(tx_ring->tx_stats));
    774
    775	tx_ring->next_to_use = 0;
    776	tx_ring->next_to_clean = 0;
    777	tx_ring->cpu = ena_irq->cpu;
    778	return 0;
    779
    780err_push_buf_intermediate_buf:
    781	vfree(tx_ring->free_ids);
    782	tx_ring->free_ids = NULL;
    783err_tx_free_ids:
    784	vfree(tx_ring->tx_buffer_info);
    785	tx_ring->tx_buffer_info = NULL;
    786err_tx_buffer_info:
    787	return -ENOMEM;
    788}
    789
    790/* ena_free_tx_resources - Free I/O Tx Resources per Queue
    791 * @adapter: network interface device structure
    792 * @qid: queue index
    793 *
    794 * Free all transmit software resources
    795 */
    796static void ena_free_tx_resources(struct ena_adapter *adapter, int qid)
    797{
    798	struct ena_ring *tx_ring = &adapter->tx_ring[qid];
    799
    800	vfree(tx_ring->tx_buffer_info);
    801	tx_ring->tx_buffer_info = NULL;
    802
    803	vfree(tx_ring->free_ids);
    804	tx_ring->free_ids = NULL;
    805
    806	vfree(tx_ring->push_buf_intermediate_buf);
    807	tx_ring->push_buf_intermediate_buf = NULL;
    808}
    809
    810static int ena_setup_tx_resources_in_range(struct ena_adapter *adapter,
    811					   int first_index,
    812					   int count)
    813{
    814	int i, rc = 0;
    815
    816	for (i = first_index; i < first_index + count; i++) {
    817		rc = ena_setup_tx_resources(adapter, i);
    818		if (rc)
    819			goto err_setup_tx;
    820	}
    821
    822	return 0;
    823
    824err_setup_tx:
    825
    826	netif_err(adapter, ifup, adapter->netdev,
    827		  "Tx queue %d: allocation failed\n", i);
    828
    829	/* rewind the index freeing the rings as we go */
    830	while (first_index < i--)
    831		ena_free_tx_resources(adapter, i);
    832	return rc;
    833}
    834
    835static void ena_free_all_io_tx_resources_in_range(struct ena_adapter *adapter,
    836						  int first_index, int count)
    837{
    838	int i;
    839
    840	for (i = first_index; i < first_index + count; i++)
    841		ena_free_tx_resources(adapter, i);
    842}
    843
    844/* ena_free_all_io_tx_resources - Free I/O Tx Resources for All Queues
    845 * @adapter: board private structure
    846 *
    847 * Free all transmit software resources
    848 */
    849static void ena_free_all_io_tx_resources(struct ena_adapter *adapter)
    850{
    851	ena_free_all_io_tx_resources_in_range(adapter,
    852					      0,
    853					      adapter->xdp_num_queues +
    854					      adapter->num_io_queues);
    855}
    856
    857/* ena_setup_rx_resources - allocate I/O Rx resources (Descriptors)
    858 * @adapter: network interface device structure
    859 * @qid: queue index
    860 *
    861 * Returns 0 on success, negative on failure
    862 */
    863static int ena_setup_rx_resources(struct ena_adapter *adapter,
    864				  u32 qid)
    865{
    866	struct ena_ring *rx_ring = &adapter->rx_ring[qid];
    867	struct ena_irq *ena_irq = &adapter->irq_tbl[ENA_IO_IRQ_IDX(qid)];
    868	int size, node, i;
    869
    870	if (rx_ring->rx_buffer_info) {
    871		netif_err(adapter, ifup, adapter->netdev,
    872			  "rx_buffer_info is not NULL");
    873		return -EEXIST;
    874	}
    875
    876	/* alloc extra element so in rx path
    877	 * we can always prefetch rx_info + 1
    878	 */
    879	size = sizeof(struct ena_rx_buffer) * (rx_ring->ring_size + 1);
    880	node = cpu_to_node(ena_irq->cpu);
    881
    882	rx_ring->rx_buffer_info = vzalloc_node(size, node);
    883	if (!rx_ring->rx_buffer_info) {
    884		rx_ring->rx_buffer_info = vzalloc(size);
    885		if (!rx_ring->rx_buffer_info)
    886			return -ENOMEM;
    887	}
    888
    889	size = sizeof(u16) * rx_ring->ring_size;
    890	rx_ring->free_ids = vzalloc_node(size, node);
    891	if (!rx_ring->free_ids) {
    892		rx_ring->free_ids = vzalloc(size);
    893		if (!rx_ring->free_ids) {
    894			vfree(rx_ring->rx_buffer_info);
    895			rx_ring->rx_buffer_info = NULL;
    896			return -ENOMEM;
    897		}
    898	}
    899
    900	/* Req id ring for receiving RX pkts out of order */
    901	for (i = 0; i < rx_ring->ring_size; i++)
    902		rx_ring->free_ids[i] = i;
    903
    904	/* Reset rx statistics */
    905	memset(&rx_ring->rx_stats, 0x0, sizeof(rx_ring->rx_stats));
    906
    907	rx_ring->next_to_clean = 0;
    908	rx_ring->next_to_use = 0;
    909	rx_ring->cpu = ena_irq->cpu;
    910
    911	return 0;
    912}
    913
    914/* ena_free_rx_resources - Free I/O Rx Resources
    915 * @adapter: network interface device structure
    916 * @qid: queue index
    917 *
    918 * Free all receive software resources
    919 */
    920static void ena_free_rx_resources(struct ena_adapter *adapter,
    921				  u32 qid)
    922{
    923	struct ena_ring *rx_ring = &adapter->rx_ring[qid];
    924
    925	vfree(rx_ring->rx_buffer_info);
    926	rx_ring->rx_buffer_info = NULL;
    927
    928	vfree(rx_ring->free_ids);
    929	rx_ring->free_ids = NULL;
    930}
    931
    932/* ena_setup_all_rx_resources - allocate I/O Rx queues resources for all queues
    933 * @adapter: board private structure
    934 *
    935 * Return 0 on success, negative on failure
    936 */
    937static int ena_setup_all_rx_resources(struct ena_adapter *adapter)
    938{
    939	int i, rc = 0;
    940
    941	for (i = 0; i < adapter->num_io_queues; i++) {
    942		rc = ena_setup_rx_resources(adapter, i);
    943		if (rc)
    944			goto err_setup_rx;
    945	}
    946
    947	return 0;
    948
    949err_setup_rx:
    950
    951	netif_err(adapter, ifup, adapter->netdev,
    952		  "Rx queue %d: allocation failed\n", i);
    953
    954	/* rewind the index freeing the rings as we go */
    955	while (i--)
    956		ena_free_rx_resources(adapter, i);
    957	return rc;
    958}
    959
    960/* ena_free_all_io_rx_resources - Free I/O Rx Resources for All Queues
    961 * @adapter: board private structure
    962 *
    963 * Free all receive software resources
    964 */
    965static void ena_free_all_io_rx_resources(struct ena_adapter *adapter)
    966{
    967	int i;
    968
    969	for (i = 0; i < adapter->num_io_queues; i++)
    970		ena_free_rx_resources(adapter, i);
    971}
    972
    973static struct page *ena_alloc_map_page(struct ena_ring *rx_ring,
    974				       dma_addr_t *dma)
    975{
    976	struct page *page;
    977
    978	/* This would allocate the page on the same NUMA node the executing code
    979	 * is running on.
    980	 */
    981	page = dev_alloc_page();
    982	if (!page) {
    983		ena_increase_stat(&rx_ring->rx_stats.page_alloc_fail, 1,
    984				  &rx_ring->syncp);
    985		return ERR_PTR(-ENOSPC);
    986	}
    987
    988	/* To enable NIC-side port-mirroring, AKA SPAN port,
    989	 * we make the buffer readable from the nic as well
    990	 */
    991	*dma = dma_map_page(rx_ring->dev, page, 0, ENA_PAGE_SIZE,
    992			    DMA_BIDIRECTIONAL);
    993	if (unlikely(dma_mapping_error(rx_ring->dev, *dma))) {
    994		ena_increase_stat(&rx_ring->rx_stats.dma_mapping_err, 1,
    995				  &rx_ring->syncp);
    996		__free_page(page);
    997		return ERR_PTR(-EIO);
    998	}
    999
   1000	return page;
   1001}
   1002
   1003static int ena_alloc_rx_buffer(struct ena_ring *rx_ring,
   1004			       struct ena_rx_buffer *rx_info)
   1005{
   1006	int headroom = rx_ring->rx_headroom;
   1007	struct ena_com_buf *ena_buf;
   1008	struct page *page;
   1009	dma_addr_t dma;
   1010	int tailroom;
   1011
   1012	/* restore page offset value in case it has been changed by device */
   1013	rx_info->page_offset = headroom;
   1014
   1015	/* if previous allocated page is not used */
   1016	if (unlikely(rx_info->page))
   1017		return 0;
   1018
   1019	/* We handle DMA here */
   1020	page = ena_alloc_map_page(rx_ring, &dma);
   1021	if (unlikely(IS_ERR(page)))
   1022		return PTR_ERR(page);
   1023
   1024	netif_dbg(rx_ring->adapter, rx_status, rx_ring->netdev,
   1025		  "Allocate page %p, rx_info %p\n", page, rx_info);
   1026
   1027	tailroom = SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
   1028
   1029	rx_info->page = page;
   1030	ena_buf = &rx_info->ena_buf;
   1031	ena_buf->paddr = dma + headroom;
   1032	ena_buf->len = ENA_PAGE_SIZE - headroom - tailroom;
   1033
   1034	return 0;
   1035}
   1036
   1037static void ena_unmap_rx_buff(struct ena_ring *rx_ring,
   1038			      struct ena_rx_buffer *rx_info)
   1039{
   1040	struct ena_com_buf *ena_buf = &rx_info->ena_buf;
   1041
   1042	dma_unmap_page(rx_ring->dev, ena_buf->paddr - rx_ring->rx_headroom,
   1043		       ENA_PAGE_SIZE,
   1044		       DMA_BIDIRECTIONAL);
   1045}
   1046
   1047static void ena_free_rx_page(struct ena_ring *rx_ring,
   1048			     struct ena_rx_buffer *rx_info)
   1049{
   1050	struct page *page = rx_info->page;
   1051
   1052	if (unlikely(!page)) {
   1053		netif_warn(rx_ring->adapter, rx_err, rx_ring->netdev,
   1054			   "Trying to free unallocated buffer\n");
   1055		return;
   1056	}
   1057
   1058	ena_unmap_rx_buff(rx_ring, rx_info);
   1059
   1060	__free_page(page);
   1061	rx_info->page = NULL;
   1062}
   1063
   1064static int ena_refill_rx_bufs(struct ena_ring *rx_ring, u32 num)
   1065{
   1066	u16 next_to_use, req_id;
   1067	u32 i;
   1068	int rc;
   1069
   1070	next_to_use = rx_ring->next_to_use;
   1071
   1072	for (i = 0; i < num; i++) {
   1073		struct ena_rx_buffer *rx_info;
   1074
   1075		req_id = rx_ring->free_ids[next_to_use];
   1076
   1077		rx_info = &rx_ring->rx_buffer_info[req_id];
   1078
   1079		rc = ena_alloc_rx_buffer(rx_ring, rx_info);
   1080		if (unlikely(rc < 0)) {
   1081			netif_warn(rx_ring->adapter, rx_err, rx_ring->netdev,
   1082				   "Failed to allocate buffer for rx queue %d\n",
   1083				   rx_ring->qid);
   1084			break;
   1085		}
   1086		rc = ena_com_add_single_rx_desc(rx_ring->ena_com_io_sq,
   1087						&rx_info->ena_buf,
   1088						req_id);
   1089		if (unlikely(rc)) {
   1090			netif_warn(rx_ring->adapter, rx_status, rx_ring->netdev,
   1091				   "Failed to add buffer for rx queue %d\n",
   1092				   rx_ring->qid);
   1093			break;
   1094		}
   1095		next_to_use = ENA_RX_RING_IDX_NEXT(next_to_use,
   1096						   rx_ring->ring_size);
   1097	}
   1098
   1099	if (unlikely(i < num)) {
   1100		ena_increase_stat(&rx_ring->rx_stats.refil_partial, 1,
   1101				  &rx_ring->syncp);
   1102		netif_warn(rx_ring->adapter, rx_err, rx_ring->netdev,
   1103			   "Refilled rx qid %d with only %d buffers (from %d)\n",
   1104			   rx_ring->qid, i, num);
   1105	}
   1106
   1107	/* ena_com_write_sq_doorbell issues a wmb() */
   1108	if (likely(i))
   1109		ena_com_write_sq_doorbell(rx_ring->ena_com_io_sq);
   1110
   1111	rx_ring->next_to_use = next_to_use;
   1112
   1113	return i;
   1114}
   1115
   1116static void ena_free_rx_bufs(struct ena_adapter *adapter,
   1117			     u32 qid)
   1118{
   1119	struct ena_ring *rx_ring = &adapter->rx_ring[qid];
   1120	u32 i;
   1121
   1122	for (i = 0; i < rx_ring->ring_size; i++) {
   1123		struct ena_rx_buffer *rx_info = &rx_ring->rx_buffer_info[i];
   1124
   1125		if (rx_info->page)
   1126			ena_free_rx_page(rx_ring, rx_info);
   1127	}
   1128}
   1129
   1130/* ena_refill_all_rx_bufs - allocate all queues Rx buffers
   1131 * @adapter: board private structure
   1132 */
   1133static void ena_refill_all_rx_bufs(struct ena_adapter *adapter)
   1134{
   1135	struct ena_ring *rx_ring;
   1136	int i, rc, bufs_num;
   1137
   1138	for (i = 0; i < adapter->num_io_queues; i++) {
   1139		rx_ring = &adapter->rx_ring[i];
   1140		bufs_num = rx_ring->ring_size - 1;
   1141		rc = ena_refill_rx_bufs(rx_ring, bufs_num);
   1142
   1143		if (unlikely(rc != bufs_num))
   1144			netif_warn(rx_ring->adapter, rx_status, rx_ring->netdev,
   1145				   "Refilling Queue %d failed. allocated %d buffers from: %d\n",
   1146				   i, rc, bufs_num);
   1147	}
   1148}
   1149
   1150static void ena_free_all_rx_bufs(struct ena_adapter *adapter)
   1151{
   1152	int i;
   1153
   1154	for (i = 0; i < adapter->num_io_queues; i++)
   1155		ena_free_rx_bufs(adapter, i);
   1156}
   1157
   1158static void ena_unmap_tx_buff(struct ena_ring *tx_ring,
   1159			      struct ena_tx_buffer *tx_info)
   1160{
   1161	struct ena_com_buf *ena_buf;
   1162	u32 cnt;
   1163	int i;
   1164
   1165	ena_buf = tx_info->bufs;
   1166	cnt = tx_info->num_of_bufs;
   1167
   1168	if (unlikely(!cnt))
   1169		return;
   1170
   1171	if (tx_info->map_linear_data) {
   1172		dma_unmap_single(tx_ring->dev,
   1173				 dma_unmap_addr(ena_buf, paddr),
   1174				 dma_unmap_len(ena_buf, len),
   1175				 DMA_TO_DEVICE);
   1176		ena_buf++;
   1177		cnt--;
   1178	}
   1179
   1180	/* unmap remaining mapped pages */
   1181	for (i = 0; i < cnt; i++) {
   1182		dma_unmap_page(tx_ring->dev, dma_unmap_addr(ena_buf, paddr),
   1183			       dma_unmap_len(ena_buf, len), DMA_TO_DEVICE);
   1184		ena_buf++;
   1185	}
   1186}
   1187
   1188/* ena_free_tx_bufs - Free Tx Buffers per Queue
   1189 * @tx_ring: TX ring for which buffers be freed
   1190 */
   1191static void ena_free_tx_bufs(struct ena_ring *tx_ring)
   1192{
   1193	bool print_once = true;
   1194	u32 i;
   1195
   1196	for (i = 0; i < tx_ring->ring_size; i++) {
   1197		struct ena_tx_buffer *tx_info = &tx_ring->tx_buffer_info[i];
   1198
   1199		if (!tx_info->skb)
   1200			continue;
   1201
   1202		if (print_once) {
   1203			netif_notice(tx_ring->adapter, ifdown, tx_ring->netdev,
   1204				     "Free uncompleted tx skb qid %d idx 0x%x\n",
   1205				     tx_ring->qid, i);
   1206			print_once = false;
   1207		} else {
   1208			netif_dbg(tx_ring->adapter, ifdown, tx_ring->netdev,
   1209				  "Free uncompleted tx skb qid %d idx 0x%x\n",
   1210				  tx_ring->qid, i);
   1211		}
   1212
   1213		ena_unmap_tx_buff(tx_ring, tx_info);
   1214
   1215		dev_kfree_skb_any(tx_info->skb);
   1216	}
   1217	netdev_tx_reset_queue(netdev_get_tx_queue(tx_ring->netdev,
   1218						  tx_ring->qid));
   1219}
   1220
   1221static void ena_free_all_tx_bufs(struct ena_adapter *adapter)
   1222{
   1223	struct ena_ring *tx_ring;
   1224	int i;
   1225
   1226	for (i = 0; i < adapter->num_io_queues + adapter->xdp_num_queues; i++) {
   1227		tx_ring = &adapter->tx_ring[i];
   1228		ena_free_tx_bufs(tx_ring);
   1229	}
   1230}
   1231
   1232static void ena_destroy_all_tx_queues(struct ena_adapter *adapter)
   1233{
   1234	u16 ena_qid;
   1235	int i;
   1236
   1237	for (i = 0; i < adapter->num_io_queues + adapter->xdp_num_queues; i++) {
   1238		ena_qid = ENA_IO_TXQ_IDX(i);
   1239		ena_com_destroy_io_queue(adapter->ena_dev, ena_qid);
   1240	}
   1241}
   1242
   1243static void ena_destroy_all_rx_queues(struct ena_adapter *adapter)
   1244{
   1245	u16 ena_qid;
   1246	int i;
   1247
   1248	for (i = 0; i < adapter->num_io_queues; i++) {
   1249		ena_qid = ENA_IO_RXQ_IDX(i);
   1250		cancel_work_sync(&adapter->ena_napi[i].dim.work);
   1251		ena_com_destroy_io_queue(adapter->ena_dev, ena_qid);
   1252	}
   1253}
   1254
   1255static void ena_destroy_all_io_queues(struct ena_adapter *adapter)
   1256{
   1257	ena_destroy_all_tx_queues(adapter);
   1258	ena_destroy_all_rx_queues(adapter);
   1259}
   1260
   1261static int handle_invalid_req_id(struct ena_ring *ring, u16 req_id,
   1262				 struct ena_tx_buffer *tx_info, bool is_xdp)
   1263{
   1264	if (tx_info)
   1265		netif_err(ring->adapter,
   1266			  tx_done,
   1267			  ring->netdev,
   1268			  "tx_info doesn't have valid %s. qid %u req_id %u",
   1269			   is_xdp ? "xdp frame" : "skb", ring->qid, req_id);
   1270	else
   1271		netif_err(ring->adapter,
   1272			  tx_done,
   1273			  ring->netdev,
   1274			  "Invalid req_id %u in qid %u\n",
   1275			  req_id, ring->qid);
   1276
   1277	ena_increase_stat(&ring->tx_stats.bad_req_id, 1, &ring->syncp);
   1278	ena_reset_device(ring->adapter, ENA_REGS_RESET_INV_TX_REQ_ID);
   1279
   1280	return -EFAULT;
   1281}
   1282
   1283static int validate_tx_req_id(struct ena_ring *tx_ring, u16 req_id)
   1284{
   1285	struct ena_tx_buffer *tx_info;
   1286
   1287	tx_info = &tx_ring->tx_buffer_info[req_id];
   1288	if (likely(tx_info->skb))
   1289		return 0;
   1290
   1291	return handle_invalid_req_id(tx_ring, req_id, tx_info, false);
   1292}
   1293
   1294static int validate_xdp_req_id(struct ena_ring *xdp_ring, u16 req_id)
   1295{
   1296	struct ena_tx_buffer *tx_info;
   1297
   1298	tx_info = &xdp_ring->tx_buffer_info[req_id];
   1299	if (likely(tx_info->xdpf))
   1300		return 0;
   1301
   1302	return handle_invalid_req_id(xdp_ring, req_id, tx_info, true);
   1303}
   1304
   1305static int ena_clean_tx_irq(struct ena_ring *tx_ring, u32 budget)
   1306{
   1307	struct netdev_queue *txq;
   1308	bool above_thresh;
   1309	u32 tx_bytes = 0;
   1310	u32 total_done = 0;
   1311	u16 next_to_clean;
   1312	u16 req_id;
   1313	int tx_pkts = 0;
   1314	int rc;
   1315
   1316	next_to_clean = tx_ring->next_to_clean;
   1317	txq = netdev_get_tx_queue(tx_ring->netdev, tx_ring->qid);
   1318
   1319	while (tx_pkts < budget) {
   1320		struct ena_tx_buffer *tx_info;
   1321		struct sk_buff *skb;
   1322
   1323		rc = ena_com_tx_comp_req_id_get(tx_ring->ena_com_io_cq,
   1324						&req_id);
   1325		if (rc) {
   1326			if (unlikely(rc == -EINVAL))
   1327				handle_invalid_req_id(tx_ring, req_id, NULL,
   1328						      false);
   1329			break;
   1330		}
   1331
   1332		/* validate that the request id points to a valid skb */
   1333		rc = validate_tx_req_id(tx_ring, req_id);
   1334		if (rc)
   1335			break;
   1336
   1337		tx_info = &tx_ring->tx_buffer_info[req_id];
   1338		skb = tx_info->skb;
   1339
   1340		/* prefetch skb_end_pointer() to speedup skb_shinfo(skb) */
   1341		prefetch(&skb->end);
   1342
   1343		tx_info->skb = NULL;
   1344		tx_info->last_jiffies = 0;
   1345
   1346		ena_unmap_tx_buff(tx_ring, tx_info);
   1347
   1348		netif_dbg(tx_ring->adapter, tx_done, tx_ring->netdev,
   1349			  "tx_poll: q %d skb %p completed\n", tx_ring->qid,
   1350			  skb);
   1351
   1352		tx_bytes += skb->len;
   1353		dev_kfree_skb(skb);
   1354		tx_pkts++;
   1355		total_done += tx_info->tx_descs;
   1356
   1357		tx_ring->free_ids[next_to_clean] = req_id;
   1358		next_to_clean = ENA_TX_RING_IDX_NEXT(next_to_clean,
   1359						     tx_ring->ring_size);
   1360	}
   1361
   1362	tx_ring->next_to_clean = next_to_clean;
   1363	ena_com_comp_ack(tx_ring->ena_com_io_sq, total_done);
   1364	ena_com_update_dev_comp_head(tx_ring->ena_com_io_cq);
   1365
   1366	netdev_tx_completed_queue(txq, tx_pkts, tx_bytes);
   1367
   1368	netif_dbg(tx_ring->adapter, tx_done, tx_ring->netdev,
   1369		  "tx_poll: q %d done. total pkts: %d\n",
   1370		  tx_ring->qid, tx_pkts);
   1371
   1372	/* need to make the rings circular update visible to
   1373	 * ena_start_xmit() before checking for netif_queue_stopped().
   1374	 */
   1375	smp_mb();
   1376
   1377	above_thresh = ena_com_sq_have_enough_space(tx_ring->ena_com_io_sq,
   1378						    ENA_TX_WAKEUP_THRESH);
   1379	if (unlikely(netif_tx_queue_stopped(txq) && above_thresh)) {
   1380		__netif_tx_lock(txq, smp_processor_id());
   1381		above_thresh =
   1382			ena_com_sq_have_enough_space(tx_ring->ena_com_io_sq,
   1383						     ENA_TX_WAKEUP_THRESH);
   1384		if (netif_tx_queue_stopped(txq) && above_thresh &&
   1385		    test_bit(ENA_FLAG_DEV_UP, &tx_ring->adapter->flags)) {
   1386			netif_tx_wake_queue(txq);
   1387			ena_increase_stat(&tx_ring->tx_stats.queue_wakeup, 1,
   1388					  &tx_ring->syncp);
   1389		}
   1390		__netif_tx_unlock(txq);
   1391	}
   1392
   1393	return tx_pkts;
   1394}
   1395
   1396static struct sk_buff *ena_alloc_skb(struct ena_ring *rx_ring, void *first_frag)
   1397{
   1398	struct sk_buff *skb;
   1399
   1400	if (!first_frag)
   1401		skb = napi_alloc_skb(rx_ring->napi, rx_ring->rx_copybreak);
   1402	else
   1403		skb = napi_build_skb(first_frag, ENA_PAGE_SIZE);
   1404
   1405	if (unlikely(!skb)) {
   1406		ena_increase_stat(&rx_ring->rx_stats.skb_alloc_fail, 1,
   1407				  &rx_ring->syncp);
   1408
   1409		netif_dbg(rx_ring->adapter, rx_err, rx_ring->netdev,
   1410			  "Failed to allocate skb. first_frag %s\n",
   1411			  first_frag ? "provided" : "not provided");
   1412		return NULL;
   1413	}
   1414
   1415	return skb;
   1416}
   1417
   1418static struct sk_buff *ena_rx_skb(struct ena_ring *rx_ring,
   1419				  struct ena_com_rx_buf_info *ena_bufs,
   1420				  u32 descs,
   1421				  u16 *next_to_clean)
   1422{
   1423	struct ena_rx_buffer *rx_info;
   1424	struct ena_adapter *adapter;
   1425	u16 len, req_id, buf = 0;
   1426	struct sk_buff *skb;
   1427	void *page_addr;
   1428	u32 page_offset;
   1429	void *data_addr;
   1430
   1431	len = ena_bufs[buf].len;
   1432	req_id = ena_bufs[buf].req_id;
   1433
   1434	rx_info = &rx_ring->rx_buffer_info[req_id];
   1435
   1436	if (unlikely(!rx_info->page)) {
   1437		adapter = rx_ring->adapter;
   1438		netif_err(adapter, rx_err, rx_ring->netdev,
   1439			  "Page is NULL. qid %u req_id %u\n", rx_ring->qid, req_id);
   1440		ena_increase_stat(&rx_ring->rx_stats.bad_req_id, 1, &rx_ring->syncp);
   1441		ena_reset_device(adapter, ENA_REGS_RESET_INV_RX_REQ_ID);
   1442		return NULL;
   1443	}
   1444
   1445	netif_dbg(rx_ring->adapter, rx_status, rx_ring->netdev,
   1446		  "rx_info %p page %p\n",
   1447		  rx_info, rx_info->page);
   1448
   1449	/* save virt address of first buffer */
   1450	page_addr = page_address(rx_info->page);
   1451	page_offset = rx_info->page_offset;
   1452	data_addr = page_addr + page_offset;
   1453
   1454	prefetch(data_addr);
   1455
   1456	if (len <= rx_ring->rx_copybreak) {
   1457		skb = ena_alloc_skb(rx_ring, NULL);
   1458		if (unlikely(!skb))
   1459			return NULL;
   1460
   1461		netif_dbg(rx_ring->adapter, rx_status, rx_ring->netdev,
   1462			  "RX allocated small packet. len %d. data_len %d\n",
   1463			  skb->len, skb->data_len);
   1464
   1465		/* sync this buffer for CPU use */
   1466		dma_sync_single_for_cpu(rx_ring->dev,
   1467					dma_unmap_addr(&rx_info->ena_buf, paddr),
   1468					len,
   1469					DMA_FROM_DEVICE);
   1470		skb_copy_to_linear_data(skb, data_addr, len);
   1471		dma_sync_single_for_device(rx_ring->dev,
   1472					   dma_unmap_addr(&rx_info->ena_buf, paddr),
   1473					   len,
   1474					   DMA_FROM_DEVICE);
   1475
   1476		skb_put(skb, len);
   1477		skb->protocol = eth_type_trans(skb, rx_ring->netdev);
   1478		rx_ring->free_ids[*next_to_clean] = req_id;
   1479		*next_to_clean = ENA_RX_RING_IDX_ADD(*next_to_clean, descs,
   1480						     rx_ring->ring_size);
   1481		return skb;
   1482	}
   1483
   1484	ena_unmap_rx_buff(rx_ring, rx_info);
   1485
   1486	skb = ena_alloc_skb(rx_ring, page_addr);
   1487	if (unlikely(!skb))
   1488		return NULL;
   1489
   1490	/* Populate skb's linear part */
   1491	skb_reserve(skb, page_offset);
   1492	skb_put(skb, len);
   1493	skb->protocol = eth_type_trans(skb, rx_ring->netdev);
   1494
   1495	do {
   1496		netif_dbg(rx_ring->adapter, rx_status, rx_ring->netdev,
   1497			  "RX skb updated. len %d. data_len %d\n",
   1498			  skb->len, skb->data_len);
   1499
   1500		rx_info->page = NULL;
   1501
   1502		rx_ring->free_ids[*next_to_clean] = req_id;
   1503		*next_to_clean =
   1504			ENA_RX_RING_IDX_NEXT(*next_to_clean,
   1505					     rx_ring->ring_size);
   1506		if (likely(--descs == 0))
   1507			break;
   1508
   1509		buf++;
   1510		len = ena_bufs[buf].len;
   1511		req_id = ena_bufs[buf].req_id;
   1512
   1513		rx_info = &rx_ring->rx_buffer_info[req_id];
   1514
   1515		ena_unmap_rx_buff(rx_ring, rx_info);
   1516
   1517		skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, rx_info->page,
   1518				rx_info->page_offset, len, ENA_PAGE_SIZE);
   1519
   1520	} while (1);
   1521
   1522	return skb;
   1523}
   1524
   1525/* ena_rx_checksum - indicate in skb if hw indicated a good cksum
   1526 * @adapter: structure containing adapter specific data
   1527 * @ena_rx_ctx: received packet context/metadata
   1528 * @skb: skb currently being received and modified
   1529 */
   1530static void ena_rx_checksum(struct ena_ring *rx_ring,
   1531				   struct ena_com_rx_ctx *ena_rx_ctx,
   1532				   struct sk_buff *skb)
   1533{
   1534	/* Rx csum disabled */
   1535	if (unlikely(!(rx_ring->netdev->features & NETIF_F_RXCSUM))) {
   1536		skb->ip_summed = CHECKSUM_NONE;
   1537		return;
   1538	}
   1539
   1540	/* For fragmented packets the checksum isn't valid */
   1541	if (ena_rx_ctx->frag) {
   1542		skb->ip_summed = CHECKSUM_NONE;
   1543		return;
   1544	}
   1545
   1546	/* if IP and error */
   1547	if (unlikely((ena_rx_ctx->l3_proto == ENA_ETH_IO_L3_PROTO_IPV4) &&
   1548		     (ena_rx_ctx->l3_csum_err))) {
   1549		/* ipv4 checksum error */
   1550		skb->ip_summed = CHECKSUM_NONE;
   1551		ena_increase_stat(&rx_ring->rx_stats.csum_bad, 1,
   1552				  &rx_ring->syncp);
   1553		netif_dbg(rx_ring->adapter, rx_err, rx_ring->netdev,
   1554			  "RX IPv4 header checksum error\n");
   1555		return;
   1556	}
   1557
   1558	/* if TCP/UDP */
   1559	if (likely((ena_rx_ctx->l4_proto == ENA_ETH_IO_L4_PROTO_TCP) ||
   1560		   (ena_rx_ctx->l4_proto == ENA_ETH_IO_L4_PROTO_UDP))) {
   1561		if (unlikely(ena_rx_ctx->l4_csum_err)) {
   1562			/* TCP/UDP checksum error */
   1563			ena_increase_stat(&rx_ring->rx_stats.csum_bad, 1,
   1564					  &rx_ring->syncp);
   1565			netif_dbg(rx_ring->adapter, rx_err, rx_ring->netdev,
   1566				  "RX L4 checksum error\n");
   1567			skb->ip_summed = CHECKSUM_NONE;
   1568			return;
   1569		}
   1570
   1571		if (likely(ena_rx_ctx->l4_csum_checked)) {
   1572			skb->ip_summed = CHECKSUM_UNNECESSARY;
   1573			ena_increase_stat(&rx_ring->rx_stats.csum_good, 1,
   1574					  &rx_ring->syncp);
   1575		} else {
   1576			ena_increase_stat(&rx_ring->rx_stats.csum_unchecked, 1,
   1577					  &rx_ring->syncp);
   1578			skb->ip_summed = CHECKSUM_NONE;
   1579		}
   1580	} else {
   1581		skb->ip_summed = CHECKSUM_NONE;
   1582		return;
   1583	}
   1584
   1585}
   1586
   1587static void ena_set_rx_hash(struct ena_ring *rx_ring,
   1588			    struct ena_com_rx_ctx *ena_rx_ctx,
   1589			    struct sk_buff *skb)
   1590{
   1591	enum pkt_hash_types hash_type;
   1592
   1593	if (likely(rx_ring->netdev->features & NETIF_F_RXHASH)) {
   1594		if (likely((ena_rx_ctx->l4_proto == ENA_ETH_IO_L4_PROTO_TCP) ||
   1595			   (ena_rx_ctx->l4_proto == ENA_ETH_IO_L4_PROTO_UDP)))
   1596
   1597			hash_type = PKT_HASH_TYPE_L4;
   1598		else
   1599			hash_type = PKT_HASH_TYPE_NONE;
   1600
   1601		/* Override hash type if the packet is fragmented */
   1602		if (ena_rx_ctx->frag)
   1603			hash_type = PKT_HASH_TYPE_NONE;
   1604
   1605		skb_set_hash(skb, ena_rx_ctx->hash, hash_type);
   1606	}
   1607}
   1608
   1609static int ena_xdp_handle_buff(struct ena_ring *rx_ring, struct xdp_buff *xdp)
   1610{
   1611	struct ena_rx_buffer *rx_info;
   1612	int ret;
   1613
   1614	rx_info = &rx_ring->rx_buffer_info[rx_ring->ena_bufs[0].req_id];
   1615	xdp_prepare_buff(xdp, page_address(rx_info->page),
   1616			 rx_info->page_offset,
   1617			 rx_ring->ena_bufs[0].len, false);
   1618	/* If for some reason we received a bigger packet than
   1619	 * we expect, then we simply drop it
   1620	 */
   1621	if (unlikely(rx_ring->ena_bufs[0].len > ENA_XDP_MAX_MTU))
   1622		return XDP_DROP;
   1623
   1624	ret = ena_xdp_execute(rx_ring, xdp);
   1625
   1626	/* The xdp program might expand the headers */
   1627	if (ret == XDP_PASS) {
   1628		rx_info->page_offset = xdp->data - xdp->data_hard_start;
   1629		rx_ring->ena_bufs[0].len = xdp->data_end - xdp->data;
   1630	}
   1631
   1632	return ret;
   1633}
   1634/* ena_clean_rx_irq - Cleanup RX irq
   1635 * @rx_ring: RX ring to clean
   1636 * @napi: napi handler
   1637 * @budget: how many packets driver is allowed to clean
   1638 *
   1639 * Returns the number of cleaned buffers.
   1640 */
   1641static int ena_clean_rx_irq(struct ena_ring *rx_ring, struct napi_struct *napi,
   1642			    u32 budget)
   1643{
   1644	u16 next_to_clean = rx_ring->next_to_clean;
   1645	struct ena_com_rx_ctx ena_rx_ctx;
   1646	struct ena_rx_buffer *rx_info;
   1647	struct ena_adapter *adapter;
   1648	u32 res_budget, work_done;
   1649	int rx_copybreak_pkt = 0;
   1650	int refill_threshold;
   1651	struct sk_buff *skb;
   1652	int refill_required;
   1653	struct xdp_buff xdp;
   1654	int xdp_flags = 0;
   1655	int total_len = 0;
   1656	int xdp_verdict;
   1657	int rc = 0;
   1658	int i;
   1659
   1660	netif_dbg(rx_ring->adapter, rx_status, rx_ring->netdev,
   1661		  "%s qid %d\n", __func__, rx_ring->qid);
   1662	res_budget = budget;
   1663	xdp_init_buff(&xdp, ENA_PAGE_SIZE, &rx_ring->xdp_rxq);
   1664
   1665	do {
   1666		xdp_verdict = XDP_PASS;
   1667		skb = NULL;
   1668		ena_rx_ctx.ena_bufs = rx_ring->ena_bufs;
   1669		ena_rx_ctx.max_bufs = rx_ring->sgl_size;
   1670		ena_rx_ctx.descs = 0;
   1671		ena_rx_ctx.pkt_offset = 0;
   1672		rc = ena_com_rx_pkt(rx_ring->ena_com_io_cq,
   1673				    rx_ring->ena_com_io_sq,
   1674				    &ena_rx_ctx);
   1675		if (unlikely(rc))
   1676			goto error;
   1677
   1678		if (unlikely(ena_rx_ctx.descs == 0))
   1679			break;
   1680
   1681		/* First descriptor might have an offset set by the device */
   1682		rx_info = &rx_ring->rx_buffer_info[rx_ring->ena_bufs[0].req_id];
   1683		rx_info->page_offset += ena_rx_ctx.pkt_offset;
   1684
   1685		netif_dbg(rx_ring->adapter, rx_status, rx_ring->netdev,
   1686			  "rx_poll: q %d got packet from ena. descs #: %d l3 proto %d l4 proto %d hash: %x\n",
   1687			  rx_ring->qid, ena_rx_ctx.descs, ena_rx_ctx.l3_proto,
   1688			  ena_rx_ctx.l4_proto, ena_rx_ctx.hash);
   1689
   1690		if (ena_xdp_present_ring(rx_ring))
   1691			xdp_verdict = ena_xdp_handle_buff(rx_ring, &xdp);
   1692
   1693		/* allocate skb and fill it */
   1694		if (xdp_verdict == XDP_PASS)
   1695			skb = ena_rx_skb(rx_ring,
   1696					 rx_ring->ena_bufs,
   1697					 ena_rx_ctx.descs,
   1698					 &next_to_clean);
   1699
   1700		if (unlikely(!skb)) {
   1701			for (i = 0; i < ena_rx_ctx.descs; i++) {
   1702				int req_id = rx_ring->ena_bufs[i].req_id;
   1703
   1704				rx_ring->free_ids[next_to_clean] = req_id;
   1705				next_to_clean =
   1706					ENA_RX_RING_IDX_NEXT(next_to_clean,
   1707							     rx_ring->ring_size);
   1708
   1709				/* Packets was passed for transmission, unmap it
   1710				 * from RX side.
   1711				 */
   1712				if (xdp_verdict == XDP_TX || xdp_verdict == XDP_REDIRECT) {
   1713					ena_unmap_rx_buff(rx_ring,
   1714							  &rx_ring->rx_buffer_info[req_id]);
   1715					rx_ring->rx_buffer_info[req_id].page = NULL;
   1716				}
   1717			}
   1718			if (xdp_verdict != XDP_PASS) {
   1719				xdp_flags |= xdp_verdict;
   1720				res_budget--;
   1721				continue;
   1722			}
   1723			break;
   1724		}
   1725
   1726		ena_rx_checksum(rx_ring, &ena_rx_ctx, skb);
   1727
   1728		ena_set_rx_hash(rx_ring, &ena_rx_ctx, skb);
   1729
   1730		skb_record_rx_queue(skb, rx_ring->qid);
   1731
   1732		if (rx_ring->ena_bufs[0].len <= rx_ring->rx_copybreak)
   1733			rx_copybreak_pkt++;
   1734
   1735		total_len += skb->len;
   1736
   1737		napi_gro_receive(napi, skb);
   1738
   1739		res_budget--;
   1740	} while (likely(res_budget));
   1741
   1742	work_done = budget - res_budget;
   1743	rx_ring->per_napi_packets += work_done;
   1744	u64_stats_update_begin(&rx_ring->syncp);
   1745	rx_ring->rx_stats.bytes += total_len;
   1746	rx_ring->rx_stats.cnt += work_done;
   1747	rx_ring->rx_stats.rx_copybreak_pkt += rx_copybreak_pkt;
   1748	u64_stats_update_end(&rx_ring->syncp);
   1749
   1750	rx_ring->next_to_clean = next_to_clean;
   1751
   1752	refill_required = ena_com_free_q_entries(rx_ring->ena_com_io_sq);
   1753	refill_threshold =
   1754		min_t(int, rx_ring->ring_size / ENA_RX_REFILL_THRESH_DIVIDER,
   1755		      ENA_RX_REFILL_THRESH_PACKET);
   1756
   1757	/* Optimization, try to batch new rx buffers */
   1758	if (refill_required > refill_threshold) {
   1759		ena_com_update_dev_comp_head(rx_ring->ena_com_io_cq);
   1760		ena_refill_rx_bufs(rx_ring, refill_required);
   1761	}
   1762
   1763	if (xdp_flags & XDP_REDIRECT)
   1764		xdp_do_flush_map();
   1765
   1766	return work_done;
   1767
   1768error:
   1769	adapter = netdev_priv(rx_ring->netdev);
   1770
   1771	if (rc == -ENOSPC) {
   1772		ena_increase_stat(&rx_ring->rx_stats.bad_desc_num, 1,
   1773				  &rx_ring->syncp);
   1774		ena_reset_device(adapter, ENA_REGS_RESET_TOO_MANY_RX_DESCS);
   1775	} else {
   1776		ena_increase_stat(&rx_ring->rx_stats.bad_req_id, 1,
   1777				  &rx_ring->syncp);
   1778		ena_reset_device(adapter, ENA_REGS_RESET_INV_RX_REQ_ID);
   1779	}
   1780	return 0;
   1781}
   1782
   1783static void ena_dim_work(struct work_struct *w)
   1784{
   1785	struct dim *dim = container_of(w, struct dim, work);
   1786	struct dim_cq_moder cur_moder =
   1787		net_dim_get_rx_moderation(dim->mode, dim->profile_ix);
   1788	struct ena_napi *ena_napi = container_of(dim, struct ena_napi, dim);
   1789
   1790	ena_napi->rx_ring->smoothed_interval = cur_moder.usec;
   1791	dim->state = DIM_START_MEASURE;
   1792}
   1793
   1794static void ena_adjust_adaptive_rx_intr_moderation(struct ena_napi *ena_napi)
   1795{
   1796	struct dim_sample dim_sample;
   1797	struct ena_ring *rx_ring = ena_napi->rx_ring;
   1798
   1799	if (!rx_ring->per_napi_packets)
   1800		return;
   1801
   1802	rx_ring->non_empty_napi_events++;
   1803
   1804	dim_update_sample(rx_ring->non_empty_napi_events,
   1805			  rx_ring->rx_stats.cnt,
   1806			  rx_ring->rx_stats.bytes,
   1807			  &dim_sample);
   1808
   1809	net_dim(&ena_napi->dim, dim_sample);
   1810
   1811	rx_ring->per_napi_packets = 0;
   1812}
   1813
   1814static void ena_unmask_interrupt(struct ena_ring *tx_ring,
   1815					struct ena_ring *rx_ring)
   1816{
   1817	struct ena_eth_io_intr_reg intr_reg;
   1818	u32 rx_interval = 0;
   1819	/* Rx ring can be NULL when for XDP tx queues which don't have an
   1820	 * accompanying rx_ring pair.
   1821	 */
   1822	if (rx_ring)
   1823		rx_interval = ena_com_get_adaptive_moderation_enabled(rx_ring->ena_dev) ?
   1824			rx_ring->smoothed_interval :
   1825			ena_com_get_nonadaptive_moderation_interval_rx(rx_ring->ena_dev);
   1826
   1827	/* Update intr register: rx intr delay,
   1828	 * tx intr delay and interrupt unmask
   1829	 */
   1830	ena_com_update_intr_reg(&intr_reg,
   1831				rx_interval,
   1832				tx_ring->smoothed_interval,
   1833				true);
   1834
   1835	ena_increase_stat(&tx_ring->tx_stats.unmask_interrupt, 1,
   1836			  &tx_ring->syncp);
   1837
   1838	/* It is a shared MSI-X.
   1839	 * Tx and Rx CQ have pointer to it.
   1840	 * So we use one of them to reach the intr reg
   1841	 * The Tx ring is used because the rx_ring is NULL for XDP queues
   1842	 */
   1843	ena_com_unmask_intr(tx_ring->ena_com_io_cq, &intr_reg);
   1844}
   1845
   1846static void ena_update_ring_numa_node(struct ena_ring *tx_ring,
   1847					     struct ena_ring *rx_ring)
   1848{
   1849	int cpu = get_cpu();
   1850	int numa_node;
   1851
   1852	/* Check only one ring since the 2 rings are running on the same cpu */
   1853	if (likely(tx_ring->cpu == cpu))
   1854		goto out;
   1855
   1856	numa_node = cpu_to_node(cpu);
   1857	put_cpu();
   1858
   1859	if (numa_node != NUMA_NO_NODE) {
   1860		ena_com_update_numa_node(tx_ring->ena_com_io_cq, numa_node);
   1861		if (rx_ring)
   1862			ena_com_update_numa_node(rx_ring->ena_com_io_cq,
   1863						 numa_node);
   1864	}
   1865
   1866	tx_ring->cpu = cpu;
   1867	if (rx_ring)
   1868		rx_ring->cpu = cpu;
   1869
   1870	return;
   1871out:
   1872	put_cpu();
   1873}
   1874
   1875static int ena_clean_xdp_irq(struct ena_ring *xdp_ring, u32 budget)
   1876{
   1877	u32 total_done = 0;
   1878	u16 next_to_clean;
   1879	u32 tx_bytes = 0;
   1880	int tx_pkts = 0;
   1881	u16 req_id;
   1882	int rc;
   1883
   1884	if (unlikely(!xdp_ring))
   1885		return 0;
   1886	next_to_clean = xdp_ring->next_to_clean;
   1887
   1888	while (tx_pkts < budget) {
   1889		struct ena_tx_buffer *tx_info;
   1890		struct xdp_frame *xdpf;
   1891
   1892		rc = ena_com_tx_comp_req_id_get(xdp_ring->ena_com_io_cq,
   1893						&req_id);
   1894		if (rc) {
   1895			if (unlikely(rc == -EINVAL))
   1896				handle_invalid_req_id(xdp_ring, req_id, NULL,
   1897						      true);
   1898			break;
   1899		}
   1900
   1901		/* validate that the request id points to a valid xdp_frame */
   1902		rc = validate_xdp_req_id(xdp_ring, req_id);
   1903		if (rc)
   1904			break;
   1905
   1906		tx_info = &xdp_ring->tx_buffer_info[req_id];
   1907		xdpf = tx_info->xdpf;
   1908
   1909		tx_info->xdpf = NULL;
   1910		tx_info->last_jiffies = 0;
   1911		ena_unmap_tx_buff(xdp_ring, tx_info);
   1912
   1913		netif_dbg(xdp_ring->adapter, tx_done, xdp_ring->netdev,
   1914			  "tx_poll: q %d skb %p completed\n", xdp_ring->qid,
   1915			  xdpf);
   1916
   1917		tx_bytes += xdpf->len;
   1918		tx_pkts++;
   1919		total_done += tx_info->tx_descs;
   1920
   1921		xdp_return_frame(xdpf);
   1922		xdp_ring->free_ids[next_to_clean] = req_id;
   1923		next_to_clean = ENA_TX_RING_IDX_NEXT(next_to_clean,
   1924						     xdp_ring->ring_size);
   1925	}
   1926
   1927	xdp_ring->next_to_clean = next_to_clean;
   1928	ena_com_comp_ack(xdp_ring->ena_com_io_sq, total_done);
   1929	ena_com_update_dev_comp_head(xdp_ring->ena_com_io_cq);
   1930
   1931	netif_dbg(xdp_ring->adapter, tx_done, xdp_ring->netdev,
   1932		  "tx_poll: q %d done. total pkts: %d\n",
   1933		  xdp_ring->qid, tx_pkts);
   1934
   1935	return tx_pkts;
   1936}
   1937
   1938static int ena_io_poll(struct napi_struct *napi, int budget)
   1939{
   1940	struct ena_napi *ena_napi = container_of(napi, struct ena_napi, napi);
   1941	struct ena_ring *tx_ring, *rx_ring;
   1942	int tx_work_done;
   1943	int rx_work_done = 0;
   1944	int tx_budget;
   1945	int napi_comp_call = 0;
   1946	int ret;
   1947
   1948	tx_ring = ena_napi->tx_ring;
   1949	rx_ring = ena_napi->rx_ring;
   1950
   1951	tx_budget = tx_ring->ring_size / ENA_TX_POLL_BUDGET_DIVIDER;
   1952
   1953	if (!test_bit(ENA_FLAG_DEV_UP, &tx_ring->adapter->flags) ||
   1954	    test_bit(ENA_FLAG_TRIGGER_RESET, &tx_ring->adapter->flags)) {
   1955		napi_complete_done(napi, 0);
   1956		return 0;
   1957	}
   1958
   1959	tx_work_done = ena_clean_tx_irq(tx_ring, tx_budget);
   1960	/* On netpoll the budget is zero and the handler should only clean the
   1961	 * tx completions.
   1962	 */
   1963	if (likely(budget))
   1964		rx_work_done = ena_clean_rx_irq(rx_ring, napi, budget);
   1965
   1966	/* If the device is about to reset or down, avoid unmask
   1967	 * the interrupt and return 0 so NAPI won't reschedule
   1968	 */
   1969	if (unlikely(!test_bit(ENA_FLAG_DEV_UP, &tx_ring->adapter->flags) ||
   1970		     test_bit(ENA_FLAG_TRIGGER_RESET, &tx_ring->adapter->flags))) {
   1971		napi_complete_done(napi, 0);
   1972		ret = 0;
   1973
   1974	} else if ((budget > rx_work_done) && (tx_budget > tx_work_done)) {
   1975		napi_comp_call = 1;
   1976
   1977		/* Update numa and unmask the interrupt only when schedule
   1978		 * from the interrupt context (vs from sk_busy_loop)
   1979		 */
   1980		if (napi_complete_done(napi, rx_work_done) &&
   1981		    READ_ONCE(ena_napi->interrupts_masked)) {
   1982			smp_rmb(); /* make sure interrupts_masked is read */
   1983			WRITE_ONCE(ena_napi->interrupts_masked, false);
   1984			/* We apply adaptive moderation on Rx path only.
   1985			 * Tx uses static interrupt moderation.
   1986			 */
   1987			if (ena_com_get_adaptive_moderation_enabled(rx_ring->ena_dev))
   1988				ena_adjust_adaptive_rx_intr_moderation(ena_napi);
   1989
   1990			ena_unmask_interrupt(tx_ring, rx_ring);
   1991		}
   1992
   1993		ena_update_ring_numa_node(tx_ring, rx_ring);
   1994
   1995		ret = rx_work_done;
   1996	} else {
   1997		ret = budget;
   1998	}
   1999
   2000	u64_stats_update_begin(&tx_ring->syncp);
   2001	tx_ring->tx_stats.napi_comp += napi_comp_call;
   2002	tx_ring->tx_stats.tx_poll++;
   2003	u64_stats_update_end(&tx_ring->syncp);
   2004
   2005	tx_ring->tx_stats.last_napi_jiffies = jiffies;
   2006
   2007	return ret;
   2008}
   2009
   2010static irqreturn_t ena_intr_msix_mgmnt(int irq, void *data)
   2011{
   2012	struct ena_adapter *adapter = (struct ena_adapter *)data;
   2013
   2014	ena_com_admin_q_comp_intr_handler(adapter->ena_dev);
   2015
   2016	/* Don't call the aenq handler before probe is done */
   2017	if (likely(test_bit(ENA_FLAG_DEVICE_RUNNING, &adapter->flags)))
   2018		ena_com_aenq_intr_handler(adapter->ena_dev, data);
   2019
   2020	return IRQ_HANDLED;
   2021}
   2022
   2023/* ena_intr_msix_io - MSI-X Interrupt Handler for Tx/Rx
   2024 * @irq: interrupt number
   2025 * @data: pointer to a network interface private napi device structure
   2026 */
   2027static irqreturn_t ena_intr_msix_io(int irq, void *data)
   2028{
   2029	struct ena_napi *ena_napi = data;
   2030
   2031	/* Used to check HW health */
   2032	WRITE_ONCE(ena_napi->first_interrupt, true);
   2033
   2034	WRITE_ONCE(ena_napi->interrupts_masked, true);
   2035	smp_wmb(); /* write interrupts_masked before calling napi */
   2036
   2037	napi_schedule_irqoff(&ena_napi->napi);
   2038
   2039	return IRQ_HANDLED;
   2040}
   2041
   2042/* Reserve a single MSI-X vector for management (admin + aenq).
   2043 * plus reserve one vector for each potential io queue.
   2044 * the number of potential io queues is the minimum of what the device
   2045 * supports and the number of vCPUs.
   2046 */
   2047static int ena_enable_msix(struct ena_adapter *adapter)
   2048{
   2049	int msix_vecs, irq_cnt;
   2050
   2051	if (test_bit(ENA_FLAG_MSIX_ENABLED, &adapter->flags)) {
   2052		netif_err(adapter, probe, adapter->netdev,
   2053			  "Error, MSI-X is already enabled\n");
   2054		return -EPERM;
   2055	}
   2056
   2057	/* Reserved the max msix vectors we might need */
   2058	msix_vecs = ENA_MAX_MSIX_VEC(adapter->max_num_io_queues);
   2059	netif_dbg(adapter, probe, adapter->netdev,
   2060		  "Trying to enable MSI-X, vectors %d\n", msix_vecs);
   2061
   2062	irq_cnt = pci_alloc_irq_vectors(adapter->pdev, ENA_MIN_MSIX_VEC,
   2063					msix_vecs, PCI_IRQ_MSIX);
   2064
   2065	if (irq_cnt < 0) {
   2066		netif_err(adapter, probe, adapter->netdev,
   2067			  "Failed to enable MSI-X. irq_cnt %d\n", irq_cnt);
   2068		return -ENOSPC;
   2069	}
   2070
   2071	if (irq_cnt != msix_vecs) {
   2072		netif_notice(adapter, probe, adapter->netdev,
   2073			     "Enable only %d MSI-X (out of %d), reduce the number of queues\n",
   2074			     irq_cnt, msix_vecs);
   2075		adapter->num_io_queues = irq_cnt - ENA_ADMIN_MSIX_VEC;
   2076	}
   2077
   2078	if (ena_init_rx_cpu_rmap(adapter))
   2079		netif_warn(adapter, probe, adapter->netdev,
   2080			   "Failed to map IRQs to CPUs\n");
   2081
   2082	adapter->msix_vecs = irq_cnt;
   2083	set_bit(ENA_FLAG_MSIX_ENABLED, &adapter->flags);
   2084
   2085	return 0;
   2086}
   2087
   2088static void ena_setup_mgmnt_intr(struct ena_adapter *adapter)
   2089{
   2090	u32 cpu;
   2091
   2092	snprintf(adapter->irq_tbl[ENA_MGMNT_IRQ_IDX].name,
   2093		 ENA_IRQNAME_SIZE, "ena-mgmnt@pci:%s",
   2094		 pci_name(adapter->pdev));
   2095	adapter->irq_tbl[ENA_MGMNT_IRQ_IDX].handler =
   2096		ena_intr_msix_mgmnt;
   2097	adapter->irq_tbl[ENA_MGMNT_IRQ_IDX].data = adapter;
   2098	adapter->irq_tbl[ENA_MGMNT_IRQ_IDX].vector =
   2099		pci_irq_vector(adapter->pdev, ENA_MGMNT_IRQ_IDX);
   2100	cpu = cpumask_first(cpu_online_mask);
   2101	adapter->irq_tbl[ENA_MGMNT_IRQ_IDX].cpu = cpu;
   2102	cpumask_set_cpu(cpu,
   2103			&adapter->irq_tbl[ENA_MGMNT_IRQ_IDX].affinity_hint_mask);
   2104}
   2105
   2106static void ena_setup_io_intr(struct ena_adapter *adapter)
   2107{
   2108	struct net_device *netdev;
   2109	int irq_idx, i, cpu;
   2110	int io_queue_count;
   2111
   2112	netdev = adapter->netdev;
   2113	io_queue_count = adapter->num_io_queues + adapter->xdp_num_queues;
   2114
   2115	for (i = 0; i < io_queue_count; i++) {
   2116		irq_idx = ENA_IO_IRQ_IDX(i);
   2117		cpu = i % num_online_cpus();
   2118
   2119		snprintf(adapter->irq_tbl[irq_idx].name, ENA_IRQNAME_SIZE,
   2120			 "%s-Tx-Rx-%d", netdev->name, i);
   2121		adapter->irq_tbl[irq_idx].handler = ena_intr_msix_io;
   2122		adapter->irq_tbl[irq_idx].data = &adapter->ena_napi[i];
   2123		adapter->irq_tbl[irq_idx].vector =
   2124			pci_irq_vector(adapter->pdev, irq_idx);
   2125		adapter->irq_tbl[irq_idx].cpu = cpu;
   2126
   2127		cpumask_set_cpu(cpu,
   2128				&adapter->irq_tbl[irq_idx].affinity_hint_mask);
   2129	}
   2130}
   2131
   2132static int ena_request_mgmnt_irq(struct ena_adapter *adapter)
   2133{
   2134	unsigned long flags = 0;
   2135	struct ena_irq *irq;
   2136	int rc;
   2137
   2138	irq = &adapter->irq_tbl[ENA_MGMNT_IRQ_IDX];
   2139	rc = request_irq(irq->vector, irq->handler, flags, irq->name,
   2140			 irq->data);
   2141	if (rc) {
   2142		netif_err(adapter, probe, adapter->netdev,
   2143			  "Failed to request admin irq\n");
   2144		return rc;
   2145	}
   2146
   2147	netif_dbg(adapter, probe, adapter->netdev,
   2148		  "Set affinity hint of mgmnt irq.to 0x%lx (irq vector: %d)\n",
   2149		  irq->affinity_hint_mask.bits[0], irq->vector);
   2150
   2151	irq_set_affinity_hint(irq->vector, &irq->affinity_hint_mask);
   2152
   2153	return rc;
   2154}
   2155
   2156static int ena_request_io_irq(struct ena_adapter *adapter)
   2157{
   2158	u32 io_queue_count = adapter->num_io_queues + adapter->xdp_num_queues;
   2159	unsigned long flags = 0;
   2160	struct ena_irq *irq;
   2161	int rc = 0, i, k;
   2162
   2163	if (!test_bit(ENA_FLAG_MSIX_ENABLED, &adapter->flags)) {
   2164		netif_err(adapter, ifup, adapter->netdev,
   2165			  "Failed to request I/O IRQ: MSI-X is not enabled\n");
   2166		return -EINVAL;
   2167	}
   2168
   2169	for (i = ENA_IO_IRQ_FIRST_IDX; i < ENA_MAX_MSIX_VEC(io_queue_count); i++) {
   2170		irq = &adapter->irq_tbl[i];
   2171		rc = request_irq(irq->vector, irq->handler, flags, irq->name,
   2172				 irq->data);
   2173		if (rc) {
   2174			netif_err(adapter, ifup, adapter->netdev,
   2175				  "Failed to request I/O IRQ. index %d rc %d\n",
   2176				   i, rc);
   2177			goto err;
   2178		}
   2179
   2180		netif_dbg(adapter, ifup, adapter->netdev,
   2181			  "Set affinity hint of irq. index %d to 0x%lx (irq vector: %d)\n",
   2182			  i, irq->affinity_hint_mask.bits[0], irq->vector);
   2183
   2184		irq_set_affinity_hint(irq->vector, &irq->affinity_hint_mask);
   2185	}
   2186
   2187	return rc;
   2188
   2189err:
   2190	for (k = ENA_IO_IRQ_FIRST_IDX; k < i; k++) {
   2191		irq = &adapter->irq_tbl[k];
   2192		free_irq(irq->vector, irq->data);
   2193	}
   2194
   2195	return rc;
   2196}
   2197
   2198static void ena_free_mgmnt_irq(struct ena_adapter *adapter)
   2199{
   2200	struct ena_irq *irq;
   2201
   2202	irq = &adapter->irq_tbl[ENA_MGMNT_IRQ_IDX];
   2203	synchronize_irq(irq->vector);
   2204	irq_set_affinity_hint(irq->vector, NULL);
   2205	free_irq(irq->vector, irq->data);
   2206}
   2207
   2208static void ena_free_io_irq(struct ena_adapter *adapter)
   2209{
   2210	u32 io_queue_count = adapter->num_io_queues + adapter->xdp_num_queues;
   2211	struct ena_irq *irq;
   2212	int i;
   2213
   2214#ifdef CONFIG_RFS_ACCEL
   2215	if (adapter->msix_vecs >= 1) {
   2216		free_irq_cpu_rmap(adapter->netdev->rx_cpu_rmap);
   2217		adapter->netdev->rx_cpu_rmap = NULL;
   2218	}
   2219#endif /* CONFIG_RFS_ACCEL */
   2220
   2221	for (i = ENA_IO_IRQ_FIRST_IDX; i < ENA_MAX_MSIX_VEC(io_queue_count); i++) {
   2222		irq = &adapter->irq_tbl[i];
   2223		irq_set_affinity_hint(irq->vector, NULL);
   2224		free_irq(irq->vector, irq->data);
   2225	}
   2226}
   2227
   2228static void ena_disable_msix(struct ena_adapter *adapter)
   2229{
   2230	if (test_and_clear_bit(ENA_FLAG_MSIX_ENABLED, &adapter->flags))
   2231		pci_free_irq_vectors(adapter->pdev);
   2232}
   2233
   2234static void ena_disable_io_intr_sync(struct ena_adapter *adapter)
   2235{
   2236	u32 io_queue_count = adapter->num_io_queues + adapter->xdp_num_queues;
   2237	int i;
   2238
   2239	if (!netif_running(adapter->netdev))
   2240		return;
   2241
   2242	for (i = ENA_IO_IRQ_FIRST_IDX; i < ENA_MAX_MSIX_VEC(io_queue_count); i++)
   2243		synchronize_irq(adapter->irq_tbl[i].vector);
   2244}
   2245
   2246static void ena_del_napi_in_range(struct ena_adapter *adapter,
   2247				  int first_index,
   2248				  int count)
   2249{
   2250	int i;
   2251
   2252	for (i = first_index; i < first_index + count; i++) {
   2253		netif_napi_del(&adapter->ena_napi[i].napi);
   2254
   2255		WARN_ON(!ENA_IS_XDP_INDEX(adapter, i) &&
   2256			adapter->ena_napi[i].xdp_ring);
   2257	}
   2258}
   2259
   2260static void ena_init_napi_in_range(struct ena_adapter *adapter,
   2261				   int first_index, int count)
   2262{
   2263	int i;
   2264
   2265	for (i = first_index; i < first_index + count; i++) {
   2266		struct ena_napi *napi = &adapter->ena_napi[i];
   2267
   2268		netif_napi_add(adapter->netdev,
   2269			       &napi->napi,
   2270			       ENA_IS_XDP_INDEX(adapter, i) ? ena_xdp_io_poll : ena_io_poll,
   2271			       NAPI_POLL_WEIGHT);
   2272
   2273		if (!ENA_IS_XDP_INDEX(adapter, i)) {
   2274			napi->rx_ring = &adapter->rx_ring[i];
   2275			napi->tx_ring = &adapter->tx_ring[i];
   2276		} else {
   2277			napi->xdp_ring = &adapter->tx_ring[i];
   2278		}
   2279		napi->qid = i;
   2280	}
   2281}
   2282
   2283static void ena_napi_disable_in_range(struct ena_adapter *adapter,
   2284				      int first_index,
   2285				      int count)
   2286{
   2287	int i;
   2288
   2289	for (i = first_index; i < first_index + count; i++)
   2290		napi_disable(&adapter->ena_napi[i].napi);
   2291}
   2292
   2293static void ena_napi_enable_in_range(struct ena_adapter *adapter,
   2294				     int first_index,
   2295				     int count)
   2296{
   2297	int i;
   2298
   2299	for (i = first_index; i < first_index + count; i++)
   2300		napi_enable(&adapter->ena_napi[i].napi);
   2301}
   2302
   2303/* Configure the Rx forwarding */
   2304static int ena_rss_configure(struct ena_adapter *adapter)
   2305{
   2306	struct ena_com_dev *ena_dev = adapter->ena_dev;
   2307	int rc;
   2308
   2309	/* In case the RSS table wasn't initialized by probe */
   2310	if (!ena_dev->rss.tbl_log_size) {
   2311		rc = ena_rss_init_default(adapter);
   2312		if (rc && (rc != -EOPNOTSUPP)) {
   2313			netif_err(adapter, ifup, adapter->netdev,
   2314				  "Failed to init RSS rc: %d\n", rc);
   2315			return rc;
   2316		}
   2317	}
   2318
   2319	/* Set indirect table */
   2320	rc = ena_com_indirect_table_set(ena_dev);
   2321	if (unlikely(rc && rc != -EOPNOTSUPP))
   2322		return rc;
   2323
   2324	/* Configure hash function (if supported) */
   2325	rc = ena_com_set_hash_function(ena_dev);
   2326	if (unlikely(rc && (rc != -EOPNOTSUPP)))
   2327		return rc;
   2328
   2329	/* Configure hash inputs (if supported) */
   2330	rc = ena_com_set_hash_ctrl(ena_dev);
   2331	if (unlikely(rc && (rc != -EOPNOTSUPP)))
   2332		return rc;
   2333
   2334	return 0;
   2335}
   2336
   2337static int ena_up_complete(struct ena_adapter *adapter)
   2338{
   2339	int rc;
   2340
   2341	rc = ena_rss_configure(adapter);
   2342	if (rc)
   2343		return rc;
   2344
   2345	ena_change_mtu(adapter->netdev, adapter->netdev->mtu);
   2346
   2347	ena_refill_all_rx_bufs(adapter);
   2348
   2349	/* enable transmits */
   2350	netif_tx_start_all_queues(adapter->netdev);
   2351
   2352	ena_napi_enable_in_range(adapter,
   2353				 0,
   2354				 adapter->xdp_num_queues + adapter->num_io_queues);
   2355
   2356	return 0;
   2357}
   2358
   2359static int ena_create_io_tx_queue(struct ena_adapter *adapter, int qid)
   2360{
   2361	struct ena_com_create_io_ctx ctx;
   2362	struct ena_com_dev *ena_dev;
   2363	struct ena_ring *tx_ring;
   2364	u32 msix_vector;
   2365	u16 ena_qid;
   2366	int rc;
   2367
   2368	ena_dev = adapter->ena_dev;
   2369
   2370	tx_ring = &adapter->tx_ring[qid];
   2371	msix_vector = ENA_IO_IRQ_IDX(qid);
   2372	ena_qid = ENA_IO_TXQ_IDX(qid);
   2373
   2374	memset(&ctx, 0x0, sizeof(ctx));
   2375
   2376	ctx.direction = ENA_COM_IO_QUEUE_DIRECTION_TX;
   2377	ctx.qid = ena_qid;
   2378	ctx.mem_queue_type = ena_dev->tx_mem_queue_type;
   2379	ctx.msix_vector = msix_vector;
   2380	ctx.queue_size = tx_ring->ring_size;
   2381	ctx.numa_node = cpu_to_node(tx_ring->cpu);
   2382
   2383	rc = ena_com_create_io_queue(ena_dev, &ctx);
   2384	if (rc) {
   2385		netif_err(adapter, ifup, adapter->netdev,
   2386			  "Failed to create I/O TX queue num %d rc: %d\n",
   2387			  qid, rc);
   2388		return rc;
   2389	}
   2390
   2391	rc = ena_com_get_io_handlers(ena_dev, ena_qid,
   2392				     &tx_ring->ena_com_io_sq,
   2393				     &tx_ring->ena_com_io_cq);
   2394	if (rc) {
   2395		netif_err(adapter, ifup, adapter->netdev,
   2396			  "Failed to get TX queue handlers. TX queue num %d rc: %d\n",
   2397			  qid, rc);
   2398		ena_com_destroy_io_queue(ena_dev, ena_qid);
   2399		return rc;
   2400	}
   2401
   2402	ena_com_update_numa_node(tx_ring->ena_com_io_cq, ctx.numa_node);
   2403	return rc;
   2404}
   2405
   2406static int ena_create_io_tx_queues_in_range(struct ena_adapter *adapter,
   2407					    int first_index, int count)
   2408{
   2409	struct ena_com_dev *ena_dev = adapter->ena_dev;
   2410	int rc, i;
   2411
   2412	for (i = first_index; i < first_index + count; i++) {
   2413		rc = ena_create_io_tx_queue(adapter, i);
   2414		if (rc)
   2415			goto create_err;
   2416	}
   2417
   2418	return 0;
   2419
   2420create_err:
   2421	while (i-- > first_index)
   2422		ena_com_destroy_io_queue(ena_dev, ENA_IO_TXQ_IDX(i));
   2423
   2424	return rc;
   2425}
   2426
   2427static int ena_create_io_rx_queue(struct ena_adapter *adapter, int qid)
   2428{
   2429	struct ena_com_dev *ena_dev;
   2430	struct ena_com_create_io_ctx ctx;
   2431	struct ena_ring *rx_ring;
   2432	u32 msix_vector;
   2433	u16 ena_qid;
   2434	int rc;
   2435
   2436	ena_dev = adapter->ena_dev;
   2437
   2438	rx_ring = &adapter->rx_ring[qid];
   2439	msix_vector = ENA_IO_IRQ_IDX(qid);
   2440	ena_qid = ENA_IO_RXQ_IDX(qid);
   2441
   2442	memset(&ctx, 0x0, sizeof(ctx));
   2443
   2444	ctx.qid = ena_qid;
   2445	ctx.direction = ENA_COM_IO_QUEUE_DIRECTION_RX;
   2446	ctx.mem_queue_type = ENA_ADMIN_PLACEMENT_POLICY_HOST;
   2447	ctx.msix_vector = msix_vector;
   2448	ctx.queue_size = rx_ring->ring_size;
   2449	ctx.numa_node = cpu_to_node(rx_ring->cpu);
   2450
   2451	rc = ena_com_create_io_queue(ena_dev, &ctx);
   2452	if (rc) {
   2453		netif_err(adapter, ifup, adapter->netdev,
   2454			  "Failed to create I/O RX queue num %d rc: %d\n",
   2455			  qid, rc);
   2456		return rc;
   2457	}
   2458
   2459	rc = ena_com_get_io_handlers(ena_dev, ena_qid,
   2460				     &rx_ring->ena_com_io_sq,
   2461				     &rx_ring->ena_com_io_cq);
   2462	if (rc) {
   2463		netif_err(adapter, ifup, adapter->netdev,
   2464			  "Failed to get RX queue handlers. RX queue num %d rc: %d\n",
   2465			  qid, rc);
   2466		goto err;
   2467	}
   2468
   2469	ena_com_update_numa_node(rx_ring->ena_com_io_cq, ctx.numa_node);
   2470
   2471	return rc;
   2472err:
   2473	ena_com_destroy_io_queue(ena_dev, ena_qid);
   2474	return rc;
   2475}
   2476
   2477static int ena_create_all_io_rx_queues(struct ena_adapter *adapter)
   2478{
   2479	struct ena_com_dev *ena_dev = adapter->ena_dev;
   2480	int rc, i;
   2481
   2482	for (i = 0; i < adapter->num_io_queues; i++) {
   2483		rc = ena_create_io_rx_queue(adapter, i);
   2484		if (rc)
   2485			goto create_err;
   2486		INIT_WORK(&adapter->ena_napi[i].dim.work, ena_dim_work);
   2487	}
   2488
   2489	return 0;
   2490
   2491create_err:
   2492	while (i--) {
   2493		cancel_work_sync(&adapter->ena_napi[i].dim.work);
   2494		ena_com_destroy_io_queue(ena_dev, ENA_IO_RXQ_IDX(i));
   2495	}
   2496
   2497	return rc;
   2498}
   2499
   2500static void set_io_rings_size(struct ena_adapter *adapter,
   2501			      int new_tx_size,
   2502			      int new_rx_size)
   2503{
   2504	int i;
   2505
   2506	for (i = 0; i < adapter->num_io_queues; i++) {
   2507		adapter->tx_ring[i].ring_size = new_tx_size;
   2508		adapter->rx_ring[i].ring_size = new_rx_size;
   2509	}
   2510}
   2511
   2512/* This function allows queue allocation to backoff when the system is
   2513 * low on memory. If there is not enough memory to allocate io queues
   2514 * the driver will try to allocate smaller queues.
   2515 *
   2516 * The backoff algorithm is as follows:
   2517 *  1. Try to allocate TX and RX and if successful.
   2518 *  1.1. return success
   2519 *
   2520 *  2. Divide by 2 the size of the larger of RX and TX queues (or both if their size is the same).
   2521 *
   2522 *  3. If TX or RX is smaller than 256
   2523 *  3.1. return failure.
   2524 *  4. else
   2525 *  4.1. go back to 1.
   2526 */
   2527static int create_queues_with_size_backoff(struct ena_adapter *adapter)
   2528{
   2529	int rc, cur_rx_ring_size, cur_tx_ring_size;
   2530	int new_rx_ring_size, new_tx_ring_size;
   2531
   2532	/* current queue sizes might be set to smaller than the requested
   2533	 * ones due to past queue allocation failures.
   2534	 */
   2535	set_io_rings_size(adapter, adapter->requested_tx_ring_size,
   2536			  adapter->requested_rx_ring_size);
   2537
   2538	while (1) {
   2539		if (ena_xdp_present(adapter)) {
   2540			rc = ena_setup_and_create_all_xdp_queues(adapter);
   2541
   2542			if (rc)
   2543				goto err_setup_tx;
   2544		}
   2545		rc = ena_setup_tx_resources_in_range(adapter,
   2546						     0,
   2547						     adapter->num_io_queues);
   2548		if (rc)
   2549			goto err_setup_tx;
   2550
   2551		rc = ena_create_io_tx_queues_in_range(adapter,
   2552						      0,
   2553						      adapter->num_io_queues);
   2554		if (rc)
   2555			goto err_create_tx_queues;
   2556
   2557		rc = ena_setup_all_rx_resources(adapter);
   2558		if (rc)
   2559			goto err_setup_rx;
   2560
   2561		rc = ena_create_all_io_rx_queues(adapter);
   2562		if (rc)
   2563			goto err_create_rx_queues;
   2564
   2565		return 0;
   2566
   2567err_create_rx_queues:
   2568		ena_free_all_io_rx_resources(adapter);
   2569err_setup_rx:
   2570		ena_destroy_all_tx_queues(adapter);
   2571err_create_tx_queues:
   2572		ena_free_all_io_tx_resources(adapter);
   2573err_setup_tx:
   2574		if (rc != -ENOMEM) {
   2575			netif_err(adapter, ifup, adapter->netdev,
   2576				  "Queue creation failed with error code %d\n",
   2577				  rc);
   2578			return rc;
   2579		}
   2580
   2581		cur_tx_ring_size = adapter->tx_ring[0].ring_size;
   2582		cur_rx_ring_size = adapter->rx_ring[0].ring_size;
   2583
   2584		netif_err(adapter, ifup, adapter->netdev,
   2585			  "Not enough memory to create queues with sizes TX=%d, RX=%d\n",
   2586			  cur_tx_ring_size, cur_rx_ring_size);
   2587
   2588		new_tx_ring_size = cur_tx_ring_size;
   2589		new_rx_ring_size = cur_rx_ring_size;
   2590
   2591		/* Decrease the size of the larger queue, or
   2592		 * decrease both if they are the same size.
   2593		 */
   2594		if (cur_rx_ring_size <= cur_tx_ring_size)
   2595			new_tx_ring_size = cur_tx_ring_size / 2;
   2596		if (cur_rx_ring_size >= cur_tx_ring_size)
   2597			new_rx_ring_size = cur_rx_ring_size / 2;
   2598
   2599		if (new_tx_ring_size < ENA_MIN_RING_SIZE ||
   2600		    new_rx_ring_size < ENA_MIN_RING_SIZE) {
   2601			netif_err(adapter, ifup, adapter->netdev,
   2602				  "Queue creation failed with the smallest possible queue size of %d for both queues. Not retrying with smaller queues\n",
   2603				  ENA_MIN_RING_SIZE);
   2604			return rc;
   2605		}
   2606
   2607		netif_err(adapter, ifup, adapter->netdev,
   2608			  "Retrying queue creation with sizes TX=%d, RX=%d\n",
   2609			  new_tx_ring_size,
   2610			  new_rx_ring_size);
   2611
   2612		set_io_rings_size(adapter, new_tx_ring_size,
   2613				  new_rx_ring_size);
   2614	}
   2615}
   2616
   2617static int ena_up(struct ena_adapter *adapter)
   2618{
   2619	int io_queue_count, rc, i;
   2620
   2621	netif_dbg(adapter, ifup, adapter->netdev, "%s\n", __func__);
   2622
   2623	io_queue_count = adapter->num_io_queues + adapter->xdp_num_queues;
   2624	ena_setup_io_intr(adapter);
   2625
   2626	/* napi poll functions should be initialized before running
   2627	 * request_irq(), to handle a rare condition where there is a pending
   2628	 * interrupt, causing the ISR to fire immediately while the poll
   2629	 * function wasn't set yet, causing a null dereference
   2630	 */
   2631	ena_init_napi_in_range(adapter, 0, io_queue_count);
   2632
   2633	rc = ena_request_io_irq(adapter);
   2634	if (rc)
   2635		goto err_req_irq;
   2636
   2637	rc = create_queues_with_size_backoff(adapter);
   2638	if (rc)
   2639		goto err_create_queues_with_backoff;
   2640
   2641	rc = ena_up_complete(adapter);
   2642	if (rc)
   2643		goto err_up;
   2644
   2645	if (test_bit(ENA_FLAG_LINK_UP, &adapter->flags))
   2646		netif_carrier_on(adapter->netdev);
   2647
   2648	ena_increase_stat(&adapter->dev_stats.interface_up, 1,
   2649			  &adapter->syncp);
   2650
   2651	set_bit(ENA_FLAG_DEV_UP, &adapter->flags);
   2652
   2653	/* Enable completion queues interrupt */
   2654	for (i = 0; i < adapter->num_io_queues; i++)
   2655		ena_unmask_interrupt(&adapter->tx_ring[i],
   2656				     &adapter->rx_ring[i]);
   2657
   2658	/* schedule napi in case we had pending packets
   2659	 * from the last time we disable napi
   2660	 */
   2661	for (i = 0; i < io_queue_count; i++)
   2662		napi_schedule(&adapter->ena_napi[i].napi);
   2663
   2664	return rc;
   2665
   2666err_up:
   2667	ena_destroy_all_tx_queues(adapter);
   2668	ena_free_all_io_tx_resources(adapter);
   2669	ena_destroy_all_rx_queues(adapter);
   2670	ena_free_all_io_rx_resources(adapter);
   2671err_create_queues_with_backoff:
   2672	ena_free_io_irq(adapter);
   2673err_req_irq:
   2674	ena_del_napi_in_range(adapter, 0, io_queue_count);
   2675
   2676	return rc;
   2677}
   2678
   2679static void ena_down(struct ena_adapter *adapter)
   2680{
   2681	int io_queue_count = adapter->num_io_queues + adapter->xdp_num_queues;
   2682
   2683	netif_info(adapter, ifdown, adapter->netdev, "%s\n", __func__);
   2684
   2685	clear_bit(ENA_FLAG_DEV_UP, &adapter->flags);
   2686
   2687	ena_increase_stat(&adapter->dev_stats.interface_down, 1,
   2688			  &adapter->syncp);
   2689
   2690	netif_carrier_off(adapter->netdev);
   2691	netif_tx_disable(adapter->netdev);
   2692
   2693	/* After this point the napi handler won't enable the tx queue */
   2694	ena_napi_disable_in_range(adapter, 0, io_queue_count);
   2695
   2696	/* After destroy the queue there won't be any new interrupts */
   2697
   2698	if (test_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags)) {
   2699		int rc;
   2700
   2701		rc = ena_com_dev_reset(adapter->ena_dev, adapter->reset_reason);
   2702		if (rc)
   2703			netif_err(adapter, ifdown, adapter->netdev,
   2704				  "Device reset failed\n");
   2705		/* stop submitting admin commands on a device that was reset */
   2706		ena_com_set_admin_running_state(adapter->ena_dev, false);
   2707	}
   2708
   2709	ena_destroy_all_io_queues(adapter);
   2710
   2711	ena_disable_io_intr_sync(adapter);
   2712	ena_free_io_irq(adapter);
   2713	ena_del_napi_in_range(adapter, 0, io_queue_count);
   2714
   2715	ena_free_all_tx_bufs(adapter);
   2716	ena_free_all_rx_bufs(adapter);
   2717	ena_free_all_io_tx_resources(adapter);
   2718	ena_free_all_io_rx_resources(adapter);
   2719}
   2720
   2721/* ena_open - Called when a network interface is made active
   2722 * @netdev: network interface device structure
   2723 *
   2724 * Returns 0 on success, negative value on failure
   2725 *
   2726 * The open entry point is called when a network interface is made
   2727 * active by the system (IFF_UP).  At this point all resources needed
   2728 * for transmit and receive operations are allocated, the interrupt
   2729 * handler is registered with the OS, the watchdog timer is started,
   2730 * and the stack is notified that the interface is ready.
   2731 */
   2732static int ena_open(struct net_device *netdev)
   2733{
   2734	struct ena_adapter *adapter = netdev_priv(netdev);
   2735	int rc;
   2736
   2737	/* Notify the stack of the actual queue counts. */
   2738	rc = netif_set_real_num_tx_queues(netdev, adapter->num_io_queues);
   2739	if (rc) {
   2740		netif_err(adapter, ifup, netdev, "Can't set num tx queues\n");
   2741		return rc;
   2742	}
   2743
   2744	rc = netif_set_real_num_rx_queues(netdev, adapter->num_io_queues);
   2745	if (rc) {
   2746		netif_err(adapter, ifup, netdev, "Can't set num rx queues\n");
   2747		return rc;
   2748	}
   2749
   2750	rc = ena_up(adapter);
   2751	if (rc)
   2752		return rc;
   2753
   2754	return rc;
   2755}
   2756
   2757/* ena_close - Disables a network interface
   2758 * @netdev: network interface device structure
   2759 *
   2760 * Returns 0, this is not allowed to fail
   2761 *
   2762 * The close entry point is called when an interface is de-activated
   2763 * by the OS.  The hardware is still under the drivers control, but
   2764 * needs to be disabled.  A global MAC reset is issued to stop the
   2765 * hardware, and all transmit and receive resources are freed.
   2766 */
   2767static int ena_close(struct net_device *netdev)
   2768{
   2769	struct ena_adapter *adapter = netdev_priv(netdev);
   2770
   2771	netif_dbg(adapter, ifdown, netdev, "%s\n", __func__);
   2772
   2773	if (!test_bit(ENA_FLAG_DEVICE_RUNNING, &adapter->flags))
   2774		return 0;
   2775
   2776	if (test_bit(ENA_FLAG_DEV_UP, &adapter->flags))
   2777		ena_down(adapter);
   2778
   2779	/* Check for device status and issue reset if needed*/
   2780	check_for_admin_com_state(adapter);
   2781	if (unlikely(test_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags))) {
   2782		netif_err(adapter, ifdown, adapter->netdev,
   2783			  "Destroy failure, restarting device\n");
   2784		ena_dump_stats_to_dmesg(adapter);
   2785		/* rtnl lock already obtained in dev_ioctl() layer */
   2786		ena_destroy_device(adapter, false);
   2787		ena_restore_device(adapter);
   2788	}
   2789
   2790	return 0;
   2791}
   2792
   2793int ena_update_queue_sizes(struct ena_adapter *adapter,
   2794			   u32 new_tx_size,
   2795			   u32 new_rx_size)
   2796{
   2797	bool dev_was_up;
   2798
   2799	dev_was_up = test_bit(ENA_FLAG_DEV_UP, &adapter->flags);
   2800	ena_close(adapter->netdev);
   2801	adapter->requested_tx_ring_size = new_tx_size;
   2802	adapter->requested_rx_ring_size = new_rx_size;
   2803	ena_init_io_rings(adapter,
   2804			  0,
   2805			  adapter->xdp_num_queues +
   2806			  adapter->num_io_queues);
   2807	return dev_was_up ? ena_up(adapter) : 0;
   2808}
   2809
   2810int ena_update_queue_count(struct ena_adapter *adapter, u32 new_channel_count)
   2811{
   2812	struct ena_com_dev *ena_dev = adapter->ena_dev;
   2813	int prev_channel_count;
   2814	bool dev_was_up;
   2815
   2816	dev_was_up = test_bit(ENA_FLAG_DEV_UP, &adapter->flags);
   2817	ena_close(adapter->netdev);
   2818	prev_channel_count = adapter->num_io_queues;
   2819	adapter->num_io_queues = new_channel_count;
   2820	if (ena_xdp_present(adapter) &&
   2821	    ena_xdp_allowed(adapter) == ENA_XDP_ALLOWED) {
   2822		adapter->xdp_first_ring = new_channel_count;
   2823		adapter->xdp_num_queues = new_channel_count;
   2824		if (prev_channel_count > new_channel_count)
   2825			ena_xdp_exchange_program_rx_in_range(adapter,
   2826							     NULL,
   2827							     new_channel_count,
   2828							     prev_channel_count);
   2829		else
   2830			ena_xdp_exchange_program_rx_in_range(adapter,
   2831							     adapter->xdp_bpf_prog,
   2832							     prev_channel_count,
   2833							     new_channel_count);
   2834	}
   2835
   2836	/* We need to destroy the rss table so that the indirection
   2837	 * table will be reinitialized by ena_up()
   2838	 */
   2839	ena_com_rss_destroy(ena_dev);
   2840	ena_init_io_rings(adapter,
   2841			  0,
   2842			  adapter->xdp_num_queues +
   2843			  adapter->num_io_queues);
   2844	return dev_was_up ? ena_open(adapter->netdev) : 0;
   2845}
   2846
   2847static void ena_tx_csum(struct ena_com_tx_ctx *ena_tx_ctx,
   2848			struct sk_buff *skb,
   2849			bool disable_meta_caching)
   2850{
   2851	u32 mss = skb_shinfo(skb)->gso_size;
   2852	struct ena_com_tx_meta *ena_meta = &ena_tx_ctx->ena_meta;
   2853	u8 l4_protocol = 0;
   2854
   2855	if ((skb->ip_summed == CHECKSUM_PARTIAL) || mss) {
   2856		ena_tx_ctx->l4_csum_enable = 1;
   2857		if (mss) {
   2858			ena_tx_ctx->tso_enable = 1;
   2859			ena_meta->l4_hdr_len = tcp_hdr(skb)->doff;
   2860			ena_tx_ctx->l4_csum_partial = 0;
   2861		} else {
   2862			ena_tx_ctx->tso_enable = 0;
   2863			ena_meta->l4_hdr_len = 0;
   2864			ena_tx_ctx->l4_csum_partial = 1;
   2865		}
   2866
   2867		switch (ip_hdr(skb)->version) {
   2868		case IPVERSION:
   2869			ena_tx_ctx->l3_proto = ENA_ETH_IO_L3_PROTO_IPV4;
   2870			if (ip_hdr(skb)->frag_off & htons(IP_DF))
   2871				ena_tx_ctx->df = 1;
   2872			if (mss)
   2873				ena_tx_ctx->l3_csum_enable = 1;
   2874			l4_protocol = ip_hdr(skb)->protocol;
   2875			break;
   2876		case 6:
   2877			ena_tx_ctx->l3_proto = ENA_ETH_IO_L3_PROTO_IPV6;
   2878			l4_protocol = ipv6_hdr(skb)->nexthdr;
   2879			break;
   2880		default:
   2881			break;
   2882		}
   2883
   2884		if (l4_protocol == IPPROTO_TCP)
   2885			ena_tx_ctx->l4_proto = ENA_ETH_IO_L4_PROTO_TCP;
   2886		else
   2887			ena_tx_ctx->l4_proto = ENA_ETH_IO_L4_PROTO_UDP;
   2888
   2889		ena_meta->mss = mss;
   2890		ena_meta->l3_hdr_len = skb_network_header_len(skb);
   2891		ena_meta->l3_hdr_offset = skb_network_offset(skb);
   2892		ena_tx_ctx->meta_valid = 1;
   2893	} else if (disable_meta_caching) {
   2894		memset(ena_meta, 0, sizeof(*ena_meta));
   2895		ena_tx_ctx->meta_valid = 1;
   2896	} else {
   2897		ena_tx_ctx->meta_valid = 0;
   2898	}
   2899}
   2900
   2901static int ena_check_and_linearize_skb(struct ena_ring *tx_ring,
   2902				       struct sk_buff *skb)
   2903{
   2904	int num_frags, header_len, rc;
   2905
   2906	num_frags = skb_shinfo(skb)->nr_frags;
   2907	header_len = skb_headlen(skb);
   2908
   2909	if (num_frags < tx_ring->sgl_size)
   2910		return 0;
   2911
   2912	if ((num_frags == tx_ring->sgl_size) &&
   2913	    (header_len < tx_ring->tx_max_header_size))
   2914		return 0;
   2915
   2916	ena_increase_stat(&tx_ring->tx_stats.linearize, 1, &tx_ring->syncp);
   2917
   2918	rc = skb_linearize(skb);
   2919	if (unlikely(rc)) {
   2920		ena_increase_stat(&tx_ring->tx_stats.linearize_failed, 1,
   2921				  &tx_ring->syncp);
   2922	}
   2923
   2924	return rc;
   2925}
   2926
   2927static int ena_tx_map_skb(struct ena_ring *tx_ring,
   2928			  struct ena_tx_buffer *tx_info,
   2929			  struct sk_buff *skb,
   2930			  void **push_hdr,
   2931			  u16 *header_len)
   2932{
   2933	struct ena_adapter *adapter = tx_ring->adapter;
   2934	struct ena_com_buf *ena_buf;
   2935	dma_addr_t dma;
   2936	u32 skb_head_len, frag_len, last_frag;
   2937	u16 push_len = 0;
   2938	u16 delta = 0;
   2939	int i = 0;
   2940
   2941	skb_head_len = skb_headlen(skb);
   2942	tx_info->skb = skb;
   2943	ena_buf = tx_info->bufs;
   2944
   2945	if (tx_ring->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV) {
   2946		/* When the device is LLQ mode, the driver will copy
   2947		 * the header into the device memory space.
   2948		 * the ena_com layer assume the header is in a linear
   2949		 * memory space.
   2950		 * This assumption might be wrong since part of the header
   2951		 * can be in the fragmented buffers.
   2952		 * Use skb_header_pointer to make sure the header is in a
   2953		 * linear memory space.
   2954		 */
   2955
   2956		push_len = min_t(u32, skb->len, tx_ring->tx_max_header_size);
   2957		*push_hdr = skb_header_pointer(skb, 0, push_len,
   2958					       tx_ring->push_buf_intermediate_buf);
   2959		*header_len = push_len;
   2960		if (unlikely(skb->data != *push_hdr)) {
   2961			ena_increase_stat(&tx_ring->tx_stats.llq_buffer_copy, 1,
   2962					  &tx_ring->syncp);
   2963
   2964			delta = push_len - skb_head_len;
   2965		}
   2966	} else {
   2967		*push_hdr = NULL;
   2968		*header_len = min_t(u32, skb_head_len,
   2969				    tx_ring->tx_max_header_size);
   2970	}
   2971
   2972	netif_dbg(adapter, tx_queued, adapter->netdev,
   2973		  "skb: %p header_buf->vaddr: %p push_len: %d\n", skb,
   2974		  *push_hdr, push_len);
   2975
   2976	if (skb_head_len > push_len) {
   2977		dma = dma_map_single(tx_ring->dev, skb->data + push_len,
   2978				     skb_head_len - push_len, DMA_TO_DEVICE);
   2979		if (unlikely(dma_mapping_error(tx_ring->dev, dma)))
   2980			goto error_report_dma_error;
   2981
   2982		ena_buf->paddr = dma;
   2983		ena_buf->len = skb_head_len - push_len;
   2984
   2985		ena_buf++;
   2986		tx_info->num_of_bufs++;
   2987		tx_info->map_linear_data = 1;
   2988	} else {
   2989		tx_info->map_linear_data = 0;
   2990	}
   2991
   2992	last_frag = skb_shinfo(skb)->nr_frags;
   2993
   2994	for (i = 0; i < last_frag; i++) {
   2995		const skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
   2996
   2997		frag_len = skb_frag_size(frag);
   2998
   2999		if (unlikely(delta >= frag_len)) {
   3000			delta -= frag_len;
   3001			continue;
   3002		}
   3003
   3004		dma = skb_frag_dma_map(tx_ring->dev, frag, delta,
   3005				       frag_len - delta, DMA_TO_DEVICE);
   3006		if (unlikely(dma_mapping_error(tx_ring->dev, dma)))
   3007			goto error_report_dma_error;
   3008
   3009		ena_buf->paddr = dma;
   3010		ena_buf->len = frag_len - delta;
   3011		ena_buf++;
   3012		tx_info->num_of_bufs++;
   3013		delta = 0;
   3014	}
   3015
   3016	return 0;
   3017
   3018error_report_dma_error:
   3019	ena_increase_stat(&tx_ring->tx_stats.dma_mapping_err, 1,
   3020			  &tx_ring->syncp);
   3021	netif_warn(adapter, tx_queued, adapter->netdev, "Failed to map skb\n");
   3022
   3023	tx_info->skb = NULL;
   3024
   3025	tx_info->num_of_bufs += i;
   3026	ena_unmap_tx_buff(tx_ring, tx_info);
   3027
   3028	return -EINVAL;
   3029}
   3030
   3031/* Called with netif_tx_lock. */
   3032static netdev_tx_t ena_start_xmit(struct sk_buff *skb, struct net_device *dev)
   3033{
   3034	struct ena_adapter *adapter = netdev_priv(dev);
   3035	struct ena_tx_buffer *tx_info;
   3036	struct ena_com_tx_ctx ena_tx_ctx;
   3037	struct ena_ring *tx_ring;
   3038	struct netdev_queue *txq;
   3039	void *push_hdr;
   3040	u16 next_to_use, req_id, header_len;
   3041	int qid, rc;
   3042
   3043	netif_dbg(adapter, tx_queued, dev, "%s skb %p\n", __func__, skb);
   3044	/*  Determine which tx ring we will be placed on */
   3045	qid = skb_get_queue_mapping(skb);
   3046	tx_ring = &adapter->tx_ring[qid];
   3047	txq = netdev_get_tx_queue(dev, qid);
   3048
   3049	rc = ena_check_and_linearize_skb(tx_ring, skb);
   3050	if (unlikely(rc))
   3051		goto error_drop_packet;
   3052
   3053	skb_tx_timestamp(skb);
   3054
   3055	next_to_use = tx_ring->next_to_use;
   3056	req_id = tx_ring->free_ids[next_to_use];
   3057	tx_info = &tx_ring->tx_buffer_info[req_id];
   3058	tx_info->num_of_bufs = 0;
   3059
   3060	WARN(tx_info->skb, "SKB isn't NULL req_id %d\n", req_id);
   3061
   3062	rc = ena_tx_map_skb(tx_ring, tx_info, skb, &push_hdr, &header_len);
   3063	if (unlikely(rc))
   3064		goto error_drop_packet;
   3065
   3066	memset(&ena_tx_ctx, 0x0, sizeof(struct ena_com_tx_ctx));
   3067	ena_tx_ctx.ena_bufs = tx_info->bufs;
   3068	ena_tx_ctx.push_header = push_hdr;
   3069	ena_tx_ctx.num_bufs = tx_info->num_of_bufs;
   3070	ena_tx_ctx.req_id = req_id;
   3071	ena_tx_ctx.header_len = header_len;
   3072
   3073	/* set flags and meta data */
   3074	ena_tx_csum(&ena_tx_ctx, skb, tx_ring->disable_meta_caching);
   3075
   3076	rc = ena_xmit_common(dev,
   3077			     tx_ring,
   3078			     tx_info,
   3079			     &ena_tx_ctx,
   3080			     next_to_use,
   3081			     skb->len);
   3082	if (rc)
   3083		goto error_unmap_dma;
   3084
   3085	netdev_tx_sent_queue(txq, skb->len);
   3086
   3087	/* stop the queue when no more space available, the packet can have up
   3088	 * to sgl_size + 2. one for the meta descriptor and one for header
   3089	 * (if the header is larger than tx_max_header_size).
   3090	 */
   3091	if (unlikely(!ena_com_sq_have_enough_space(tx_ring->ena_com_io_sq,
   3092						   tx_ring->sgl_size + 2))) {
   3093		netif_dbg(adapter, tx_queued, dev, "%s stop queue %d\n",
   3094			  __func__, qid);
   3095
   3096		netif_tx_stop_queue(txq);
   3097		ena_increase_stat(&tx_ring->tx_stats.queue_stop, 1,
   3098				  &tx_ring->syncp);
   3099
   3100		/* There is a rare condition where this function decide to
   3101		 * stop the queue but meanwhile clean_tx_irq updates
   3102		 * next_to_completion and terminates.
   3103		 * The queue will remain stopped forever.
   3104		 * To solve this issue add a mb() to make sure that
   3105		 * netif_tx_stop_queue() write is vissible before checking if
   3106		 * there is additional space in the queue.
   3107		 */
   3108		smp_mb();
   3109
   3110		if (ena_com_sq_have_enough_space(tx_ring->ena_com_io_sq,
   3111						 ENA_TX_WAKEUP_THRESH)) {
   3112			netif_tx_wake_queue(txq);
   3113			ena_increase_stat(&tx_ring->tx_stats.queue_wakeup, 1,
   3114					  &tx_ring->syncp);
   3115		}
   3116	}
   3117
   3118	if (netif_xmit_stopped(txq) || !netdev_xmit_more())
   3119		/* trigger the dma engine. ena_ring_tx_doorbell()
   3120		 * calls a memory barrier inside it.
   3121		 */
   3122		ena_ring_tx_doorbell(tx_ring);
   3123
   3124	return NETDEV_TX_OK;
   3125
   3126error_unmap_dma:
   3127	ena_unmap_tx_buff(tx_ring, tx_info);
   3128	tx_info->skb = NULL;
   3129
   3130error_drop_packet:
   3131	dev_kfree_skb(skb);
   3132	return NETDEV_TX_OK;
   3133}
   3134
   3135static u16 ena_select_queue(struct net_device *dev, struct sk_buff *skb,
   3136			    struct net_device *sb_dev)
   3137{
   3138	u16 qid;
   3139	/* we suspect that this is good for in--kernel network services that
   3140	 * want to loop incoming skb rx to tx in normal user generated traffic,
   3141	 * most probably we will not get to this
   3142	 */
   3143	if (skb_rx_queue_recorded(skb))
   3144		qid = skb_get_rx_queue(skb);
   3145	else
   3146		qid = netdev_pick_tx(dev, skb, NULL);
   3147
   3148	return qid;
   3149}
   3150
   3151static void ena_config_host_info(struct ena_com_dev *ena_dev, struct pci_dev *pdev)
   3152{
   3153	struct device *dev = &pdev->dev;
   3154	struct ena_admin_host_info *host_info;
   3155	int rc;
   3156
   3157	/* Allocate only the host info */
   3158	rc = ena_com_allocate_host_info(ena_dev);
   3159	if (rc) {
   3160		dev_err(dev, "Cannot allocate host info\n");
   3161		return;
   3162	}
   3163
   3164	host_info = ena_dev->host_attr.host_info;
   3165
   3166	host_info->bdf = (pdev->bus->number << 8) | pdev->devfn;
   3167	host_info->os_type = ENA_ADMIN_OS_LINUX;
   3168	host_info->kernel_ver = LINUX_VERSION_CODE;
   3169	strlcpy(host_info->kernel_ver_str, utsname()->version,
   3170		sizeof(host_info->kernel_ver_str) - 1);
   3171	host_info->os_dist = 0;
   3172	strncpy(host_info->os_dist_str, utsname()->release,
   3173		sizeof(host_info->os_dist_str) - 1);
   3174	host_info->driver_version =
   3175		(DRV_MODULE_GEN_MAJOR) |
   3176		(DRV_MODULE_GEN_MINOR << ENA_ADMIN_HOST_INFO_MINOR_SHIFT) |
   3177		(DRV_MODULE_GEN_SUBMINOR << ENA_ADMIN_HOST_INFO_SUB_MINOR_SHIFT) |
   3178		("K"[0] << ENA_ADMIN_HOST_INFO_MODULE_TYPE_SHIFT);
   3179	host_info->num_cpus = num_online_cpus();
   3180
   3181	host_info->driver_supported_features =
   3182		ENA_ADMIN_HOST_INFO_RX_OFFSET_MASK |
   3183		ENA_ADMIN_HOST_INFO_INTERRUPT_MODERATION_MASK |
   3184		ENA_ADMIN_HOST_INFO_RX_BUF_MIRRORING_MASK |
   3185		ENA_ADMIN_HOST_INFO_RSS_CONFIGURABLE_FUNCTION_KEY_MASK;
   3186
   3187	rc = ena_com_set_host_attributes(ena_dev);
   3188	if (rc) {
   3189		if (rc == -EOPNOTSUPP)
   3190			dev_warn(dev, "Cannot set host attributes\n");
   3191		else
   3192			dev_err(dev, "Cannot set host attributes\n");
   3193
   3194		goto err;
   3195	}
   3196
   3197	return;
   3198
   3199err:
   3200	ena_com_delete_host_info(ena_dev);
   3201}
   3202
   3203static void ena_config_debug_area(struct ena_adapter *adapter)
   3204{
   3205	u32 debug_area_size;
   3206	int rc, ss_count;
   3207
   3208	ss_count = ena_get_sset_count(adapter->netdev, ETH_SS_STATS);
   3209	if (ss_count <= 0) {
   3210		netif_err(adapter, drv, adapter->netdev,
   3211			  "SS count is negative\n");
   3212		return;
   3213	}
   3214
   3215	/* allocate 32 bytes for each string and 64bit for the value */
   3216	debug_area_size = ss_count * ETH_GSTRING_LEN + sizeof(u64) * ss_count;
   3217
   3218	rc = ena_com_allocate_debug_area(adapter->ena_dev, debug_area_size);
   3219	if (rc) {
   3220		netif_err(adapter, drv, adapter->netdev,
   3221			  "Cannot allocate debug area\n");
   3222		return;
   3223	}
   3224
   3225	rc = ena_com_set_host_attributes(adapter->ena_dev);
   3226	if (rc) {
   3227		if (rc == -EOPNOTSUPP)
   3228			netif_warn(adapter, drv, adapter->netdev,
   3229				   "Cannot set host attributes\n");
   3230		else
   3231			netif_err(adapter, drv, adapter->netdev,
   3232				  "Cannot set host attributes\n");
   3233		goto err;
   3234	}
   3235
   3236	return;
   3237err:
   3238	ena_com_delete_debug_area(adapter->ena_dev);
   3239}
   3240
   3241int ena_update_hw_stats(struct ena_adapter *adapter)
   3242{
   3243	int rc;
   3244
   3245	rc = ena_com_get_eni_stats(adapter->ena_dev, &adapter->eni_stats);
   3246	if (rc) {
   3247		netdev_err(adapter->netdev, "Failed to get ENI stats\n");
   3248		return rc;
   3249	}
   3250
   3251	return 0;
   3252}
   3253
   3254static void ena_get_stats64(struct net_device *netdev,
   3255			    struct rtnl_link_stats64 *stats)
   3256{
   3257	struct ena_adapter *adapter = netdev_priv(netdev);
   3258	struct ena_ring *rx_ring, *tx_ring;
   3259	unsigned int start;
   3260	u64 rx_drops;
   3261	u64 tx_drops;
   3262	int i;
   3263
   3264	if (!test_bit(ENA_FLAG_DEV_UP, &adapter->flags))
   3265		return;
   3266
   3267	for (i = 0; i < adapter->num_io_queues; i++) {
   3268		u64 bytes, packets;
   3269
   3270		tx_ring = &adapter->tx_ring[i];
   3271
   3272		do {
   3273			start = u64_stats_fetch_begin_irq(&tx_ring->syncp);
   3274			packets = tx_ring->tx_stats.cnt;
   3275			bytes = tx_ring->tx_stats.bytes;
   3276		} while (u64_stats_fetch_retry_irq(&tx_ring->syncp, start));
   3277
   3278		stats->tx_packets += packets;
   3279		stats->tx_bytes += bytes;
   3280
   3281		rx_ring = &adapter->rx_ring[i];
   3282
   3283		do {
   3284			start = u64_stats_fetch_begin_irq(&rx_ring->syncp);
   3285			packets = rx_ring->rx_stats.cnt;
   3286			bytes = rx_ring->rx_stats.bytes;
   3287		} while (u64_stats_fetch_retry_irq(&rx_ring->syncp, start));
   3288
   3289		stats->rx_packets += packets;
   3290		stats->rx_bytes += bytes;
   3291	}
   3292
   3293	do {
   3294		start = u64_stats_fetch_begin_irq(&adapter->syncp);
   3295		rx_drops = adapter->dev_stats.rx_drops;
   3296		tx_drops = adapter->dev_stats.tx_drops;
   3297	} while (u64_stats_fetch_retry_irq(&adapter->syncp, start));
   3298
   3299	stats->rx_dropped = rx_drops;
   3300	stats->tx_dropped = tx_drops;
   3301
   3302	stats->multicast = 0;
   3303	stats->collisions = 0;
   3304
   3305	stats->rx_length_errors = 0;
   3306	stats->rx_crc_errors = 0;
   3307	stats->rx_frame_errors = 0;
   3308	stats->rx_fifo_errors = 0;
   3309	stats->rx_missed_errors = 0;
   3310	stats->tx_window_errors = 0;
   3311
   3312	stats->rx_errors = 0;
   3313	stats->tx_errors = 0;
   3314}
   3315
   3316static const struct net_device_ops ena_netdev_ops = {
   3317	.ndo_open		= ena_open,
   3318	.ndo_stop		= ena_close,
   3319	.ndo_start_xmit		= ena_start_xmit,
   3320	.ndo_select_queue	= ena_select_queue,
   3321	.ndo_get_stats64	= ena_get_stats64,
   3322	.ndo_tx_timeout		= ena_tx_timeout,
   3323	.ndo_change_mtu		= ena_change_mtu,
   3324	.ndo_set_mac_address	= NULL,
   3325	.ndo_validate_addr	= eth_validate_addr,
   3326	.ndo_bpf		= ena_xdp,
   3327	.ndo_xdp_xmit		= ena_xdp_xmit,
   3328};
   3329
   3330static int ena_device_validate_params(struct ena_adapter *adapter,
   3331				      struct ena_com_dev_get_features_ctx *get_feat_ctx)
   3332{
   3333	struct net_device *netdev = adapter->netdev;
   3334	int rc;
   3335
   3336	rc = ether_addr_equal(get_feat_ctx->dev_attr.mac_addr,
   3337			      adapter->mac_addr);
   3338	if (!rc) {
   3339		netif_err(adapter, drv, netdev,
   3340			  "Error, mac address are different\n");
   3341		return -EINVAL;
   3342	}
   3343
   3344	if (get_feat_ctx->dev_attr.max_mtu < netdev->mtu) {
   3345		netif_err(adapter, drv, netdev,
   3346			  "Error, device max mtu is smaller than netdev MTU\n");
   3347		return -EINVAL;
   3348	}
   3349
   3350	return 0;
   3351}
   3352
   3353static void set_default_llq_configurations(struct ena_llq_configurations *llq_config)
   3354{
   3355	llq_config->llq_header_location = ENA_ADMIN_INLINE_HEADER;
   3356	llq_config->llq_stride_ctrl = ENA_ADMIN_MULTIPLE_DESCS_PER_ENTRY;
   3357	llq_config->llq_num_decs_before_header = ENA_ADMIN_LLQ_NUM_DESCS_BEFORE_HEADER_2;
   3358	llq_config->llq_ring_entry_size = ENA_ADMIN_LIST_ENTRY_SIZE_128B;
   3359	llq_config->llq_ring_entry_size_value = 128;
   3360}
   3361
   3362static int ena_set_queues_placement_policy(struct pci_dev *pdev,
   3363					   struct ena_com_dev *ena_dev,
   3364					   struct ena_admin_feature_llq_desc *llq,
   3365					   struct ena_llq_configurations *llq_default_configurations)
   3366{
   3367	int rc;
   3368	u32 llq_feature_mask;
   3369
   3370	llq_feature_mask = 1 << ENA_ADMIN_LLQ;
   3371	if (!(ena_dev->supported_features & llq_feature_mask)) {
   3372		dev_warn(&pdev->dev,
   3373			"LLQ is not supported Fallback to host mode policy.\n");
   3374		ena_dev->tx_mem_queue_type = ENA_ADMIN_PLACEMENT_POLICY_HOST;
   3375		return 0;
   3376	}
   3377
   3378	rc = ena_com_config_dev_mode(ena_dev, llq, llq_default_configurations);
   3379	if (unlikely(rc)) {
   3380		dev_err(&pdev->dev,
   3381			"Failed to configure the device mode.  Fallback to host mode policy.\n");
   3382		ena_dev->tx_mem_queue_type = ENA_ADMIN_PLACEMENT_POLICY_HOST;
   3383	}
   3384
   3385	return 0;
   3386}
   3387
   3388static int ena_map_llq_mem_bar(struct pci_dev *pdev, struct ena_com_dev *ena_dev,
   3389			       int bars)
   3390{
   3391	bool has_mem_bar = !!(bars & BIT(ENA_MEM_BAR));
   3392
   3393	if (!has_mem_bar) {
   3394		if (ena_dev->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV) {
   3395			dev_err(&pdev->dev,
   3396				"ENA device does not expose LLQ bar. Fallback to host mode policy.\n");
   3397			ena_dev->tx_mem_queue_type = ENA_ADMIN_PLACEMENT_POLICY_HOST;
   3398		}
   3399
   3400		return 0;
   3401	}
   3402
   3403	ena_dev->mem_bar = devm_ioremap_wc(&pdev->dev,
   3404					   pci_resource_start(pdev, ENA_MEM_BAR),
   3405					   pci_resource_len(pdev, ENA_MEM_BAR));
   3406
   3407	if (!ena_dev->mem_bar)
   3408		return -EFAULT;
   3409
   3410	return 0;
   3411}
   3412
   3413static int ena_device_init(struct ena_com_dev *ena_dev, struct pci_dev *pdev,
   3414			   struct ena_com_dev_get_features_ctx *get_feat_ctx,
   3415			   bool *wd_state)
   3416{
   3417	struct ena_llq_configurations llq_config;
   3418	struct device *dev = &pdev->dev;
   3419	bool readless_supported;
   3420	u32 aenq_groups;
   3421	int dma_width;
   3422	int rc;
   3423
   3424	rc = ena_com_mmio_reg_read_request_init(ena_dev);
   3425	if (rc) {
   3426		dev_err(dev, "Failed to init mmio read less\n");
   3427		return rc;
   3428	}
   3429
   3430	/* The PCIe configuration space revision id indicate if mmio reg
   3431	 * read is disabled
   3432	 */
   3433	readless_supported = !(pdev->revision & ENA_MMIO_DISABLE_REG_READ);
   3434	ena_com_set_mmio_read_mode(ena_dev, readless_supported);
   3435
   3436	rc = ena_com_dev_reset(ena_dev, ENA_REGS_RESET_NORMAL);
   3437	if (rc) {
   3438		dev_err(dev, "Can not reset device\n");
   3439		goto err_mmio_read_less;
   3440	}
   3441
   3442	rc = ena_com_validate_version(ena_dev);
   3443	if (rc) {
   3444		dev_err(dev, "Device version is too low\n");
   3445		goto err_mmio_read_less;
   3446	}
   3447
   3448	dma_width = ena_com_get_dma_width(ena_dev);
   3449	if (dma_width < 0) {
   3450		dev_err(dev, "Invalid dma width value %d", dma_width);
   3451		rc = dma_width;
   3452		goto err_mmio_read_less;
   3453	}
   3454
   3455	rc = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(dma_width));
   3456	if (rc) {
   3457		dev_err(dev, "dma_set_mask_and_coherent failed %d\n", rc);
   3458		goto err_mmio_read_less;
   3459	}
   3460
   3461	/* ENA admin level init */
   3462	rc = ena_com_admin_init(ena_dev, &aenq_handlers);
   3463	if (rc) {
   3464		dev_err(dev,
   3465			"Can not initialize ena admin queue with device\n");
   3466		goto err_mmio_read_less;
   3467	}
   3468
   3469	/* To enable the msix interrupts the driver needs to know the number
   3470	 * of queues. So the driver uses polling mode to retrieve this
   3471	 * information
   3472	 */
   3473	ena_com_set_admin_polling_mode(ena_dev, true);
   3474
   3475	ena_config_host_info(ena_dev, pdev);
   3476
   3477	/* Get Device Attributes*/
   3478	rc = ena_com_get_dev_attr_feat(ena_dev, get_feat_ctx);
   3479	if (rc) {
   3480		dev_err(dev, "Cannot get attribute for ena device rc=%d\n", rc);
   3481		goto err_admin_init;
   3482	}
   3483
   3484	/* Try to turn all the available aenq groups */
   3485	aenq_groups = BIT(ENA_ADMIN_LINK_CHANGE) |
   3486		BIT(ENA_ADMIN_FATAL_ERROR) |
   3487		BIT(ENA_ADMIN_WARNING) |
   3488		BIT(ENA_ADMIN_NOTIFICATION) |
   3489		BIT(ENA_ADMIN_KEEP_ALIVE);
   3490
   3491	aenq_groups &= get_feat_ctx->aenq.supported_groups;
   3492
   3493	rc = ena_com_set_aenq_config(ena_dev, aenq_groups);
   3494	if (rc) {
   3495		dev_err(dev, "Cannot configure aenq groups rc= %d\n", rc);
   3496		goto err_admin_init;
   3497	}
   3498
   3499	*wd_state = !!(aenq_groups & BIT(ENA_ADMIN_KEEP_ALIVE));
   3500
   3501	set_default_llq_configurations(&llq_config);
   3502
   3503	rc = ena_set_queues_placement_policy(pdev, ena_dev, &get_feat_ctx->llq,
   3504					     &llq_config);
   3505	if (rc) {
   3506		dev_err(dev, "ENA device init failed\n");
   3507		goto err_admin_init;
   3508	}
   3509
   3510	return 0;
   3511
   3512err_admin_init:
   3513	ena_com_delete_host_info(ena_dev);
   3514	ena_com_admin_destroy(ena_dev);
   3515err_mmio_read_less:
   3516	ena_com_mmio_reg_read_request_destroy(ena_dev);
   3517
   3518	return rc;
   3519}
   3520
   3521static int ena_enable_msix_and_set_admin_interrupts(struct ena_adapter *adapter)
   3522{
   3523	struct ena_com_dev *ena_dev = adapter->ena_dev;
   3524	struct device *dev = &adapter->pdev->dev;
   3525	int rc;
   3526
   3527	rc = ena_enable_msix(adapter);
   3528	if (rc) {
   3529		dev_err(dev, "Can not reserve msix vectors\n");
   3530		return rc;
   3531	}
   3532
   3533	ena_setup_mgmnt_intr(adapter);
   3534
   3535	rc = ena_request_mgmnt_irq(adapter);
   3536	if (rc) {
   3537		dev_err(dev, "Can not setup management interrupts\n");
   3538		goto err_disable_msix;
   3539	}
   3540
   3541	ena_com_set_admin_polling_mode(ena_dev, false);
   3542
   3543	ena_com_admin_aenq_enable(ena_dev);
   3544
   3545	return 0;
   3546
   3547err_disable_msix:
   3548	ena_disable_msix(adapter);
   3549
   3550	return rc;
   3551}
   3552
   3553static void ena_destroy_device(struct ena_adapter *adapter, bool graceful)
   3554{
   3555	struct net_device *netdev = adapter->netdev;
   3556	struct ena_com_dev *ena_dev = adapter->ena_dev;
   3557	bool dev_up;
   3558
   3559	if (!test_bit(ENA_FLAG_DEVICE_RUNNING, &adapter->flags))
   3560		return;
   3561
   3562	netif_carrier_off(netdev);
   3563
   3564	del_timer_sync(&adapter->timer_service);
   3565
   3566	dev_up = test_bit(ENA_FLAG_DEV_UP, &adapter->flags);
   3567	adapter->dev_up_before_reset = dev_up;
   3568	if (!graceful)
   3569		ena_com_set_admin_running_state(ena_dev, false);
   3570
   3571	if (test_bit(ENA_FLAG_DEV_UP, &adapter->flags))
   3572		ena_down(adapter);
   3573
   3574	/* Stop the device from sending AENQ events (in case reset flag is set
   3575	 *  and device is up, ena_down() already reset the device.
   3576	 */
   3577	if (!(test_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags) && dev_up))
   3578		ena_com_dev_reset(adapter->ena_dev, adapter->reset_reason);
   3579
   3580	ena_free_mgmnt_irq(adapter);
   3581
   3582	ena_disable_msix(adapter);
   3583
   3584	ena_com_abort_admin_commands(ena_dev);
   3585
   3586	ena_com_wait_for_abort_completion(ena_dev);
   3587
   3588	ena_com_admin_destroy(ena_dev);
   3589
   3590	ena_com_mmio_reg_read_request_destroy(ena_dev);
   3591
   3592	/* return reset reason to default value */
   3593	adapter->reset_reason = ENA_REGS_RESET_NORMAL;
   3594
   3595	clear_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags);
   3596	clear_bit(ENA_FLAG_DEVICE_RUNNING, &adapter->flags);
   3597}
   3598
   3599static int ena_restore_device(struct ena_adapter *adapter)
   3600{
   3601	struct ena_com_dev_get_features_ctx get_feat_ctx;
   3602	struct ena_com_dev *ena_dev = adapter->ena_dev;
   3603	struct pci_dev *pdev = adapter->pdev;
   3604	bool wd_state;
   3605	int rc;
   3606
   3607	set_bit(ENA_FLAG_ONGOING_RESET, &adapter->flags);
   3608	rc = ena_device_init(ena_dev, adapter->pdev, &get_feat_ctx, &wd_state);
   3609	if (rc) {
   3610		dev_err(&pdev->dev, "Can not initialize device\n");
   3611		goto err;
   3612	}
   3613	adapter->wd_state = wd_state;
   3614
   3615	rc = ena_device_validate_params(adapter, &get_feat_ctx);
   3616	if (rc) {
   3617		dev_err(&pdev->dev, "Validation of device parameters failed\n");
   3618		goto err_device_destroy;
   3619	}
   3620
   3621	rc = ena_enable_msix_and_set_admin_interrupts(adapter);
   3622	if (rc) {
   3623		dev_err(&pdev->dev, "Enable MSI-X failed\n");
   3624		goto err_device_destroy;
   3625	}
   3626	/* If the interface was up before the reset bring it up */
   3627	if (adapter->dev_up_before_reset) {
   3628		rc = ena_up(adapter);
   3629		if (rc) {
   3630			dev_err(&pdev->dev, "Failed to create I/O queues\n");
   3631			goto err_disable_msix;
   3632		}
   3633	}
   3634
   3635	set_bit(ENA_FLAG_DEVICE_RUNNING, &adapter->flags);
   3636
   3637	clear_bit(ENA_FLAG_ONGOING_RESET, &adapter->flags);
   3638	if (test_bit(ENA_FLAG_LINK_UP, &adapter->flags))
   3639		netif_carrier_on(adapter->netdev);
   3640
   3641	mod_timer(&adapter->timer_service, round_jiffies(jiffies + HZ));
   3642	adapter->last_keep_alive_jiffies = jiffies;
   3643
   3644	return rc;
   3645err_disable_msix:
   3646	ena_free_mgmnt_irq(adapter);
   3647	ena_disable_msix(adapter);
   3648err_device_destroy:
   3649	ena_com_abort_admin_commands(ena_dev);
   3650	ena_com_wait_for_abort_completion(ena_dev);
   3651	ena_com_admin_destroy(ena_dev);
   3652	ena_com_dev_reset(ena_dev, ENA_REGS_RESET_DRIVER_INVALID_STATE);
   3653	ena_com_mmio_reg_read_request_destroy(ena_dev);
   3654err:
   3655	clear_bit(ENA_FLAG_DEVICE_RUNNING, &adapter->flags);
   3656	clear_bit(ENA_FLAG_ONGOING_RESET, &adapter->flags);
   3657	dev_err(&pdev->dev,
   3658		"Reset attempt failed. Can not reset the device\n");
   3659
   3660	return rc;
   3661}
   3662
   3663static void ena_fw_reset_device(struct work_struct *work)
   3664{
   3665	struct ena_adapter *adapter =
   3666		container_of(work, struct ena_adapter, reset_task);
   3667
   3668	rtnl_lock();
   3669
   3670	if (likely(test_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags))) {
   3671		ena_destroy_device(adapter, false);
   3672		ena_restore_device(adapter);
   3673
   3674		dev_err(&adapter->pdev->dev, "Device reset completed successfully\n");
   3675	}
   3676
   3677	rtnl_unlock();
   3678}
   3679
   3680static int check_for_rx_interrupt_queue(struct ena_adapter *adapter,
   3681					struct ena_ring *rx_ring)
   3682{
   3683	struct ena_napi *ena_napi = container_of(rx_ring->napi, struct ena_napi, napi);
   3684
   3685	if (likely(READ_ONCE(ena_napi->first_interrupt)))
   3686		return 0;
   3687
   3688	if (ena_com_cq_empty(rx_ring->ena_com_io_cq))
   3689		return 0;
   3690
   3691	rx_ring->no_interrupt_event_cnt++;
   3692
   3693	if (rx_ring->no_interrupt_event_cnt == ENA_MAX_NO_INTERRUPT_ITERATIONS) {
   3694		netif_err(adapter, rx_err, adapter->netdev,
   3695			  "Potential MSIX issue on Rx side Queue = %d. Reset the device\n",
   3696			  rx_ring->qid);
   3697
   3698		ena_reset_device(adapter, ENA_REGS_RESET_MISS_INTERRUPT);
   3699		return -EIO;
   3700	}
   3701
   3702	return 0;
   3703}
   3704
   3705static int check_missing_comp_in_tx_queue(struct ena_adapter *adapter,
   3706					  struct ena_ring *tx_ring)
   3707{
   3708	struct ena_napi *ena_napi = container_of(tx_ring->napi, struct ena_napi, napi);
   3709	unsigned int time_since_last_napi;
   3710	unsigned int missing_tx_comp_to;
   3711	bool is_tx_comp_time_expired;
   3712	struct ena_tx_buffer *tx_buf;
   3713	unsigned long last_jiffies;
   3714	u32 missed_tx = 0;
   3715	int i, rc = 0;
   3716
   3717	for (i = 0; i < tx_ring->ring_size; i++) {
   3718		tx_buf = &tx_ring->tx_buffer_info[i];
   3719		last_jiffies = tx_buf->last_jiffies;
   3720
   3721		if (last_jiffies == 0)
   3722			/* no pending Tx at this location */
   3723			continue;
   3724
   3725		is_tx_comp_time_expired = time_is_before_jiffies(last_jiffies +
   3726			 2 * adapter->missing_tx_completion_to);
   3727
   3728		if (unlikely(!READ_ONCE(ena_napi->first_interrupt) && is_tx_comp_time_expired)) {
   3729			/* If after graceful period interrupt is still not
   3730			 * received, we schedule a reset
   3731			 */
   3732			netif_err(adapter, tx_err, adapter->netdev,
   3733				  "Potential MSIX issue on Tx side Queue = %d. Reset the device\n",
   3734				  tx_ring->qid);
   3735			ena_reset_device(adapter, ENA_REGS_RESET_MISS_INTERRUPT);
   3736			return -EIO;
   3737		}
   3738
   3739		is_tx_comp_time_expired = time_is_before_jiffies(last_jiffies +
   3740			adapter->missing_tx_completion_to);
   3741
   3742		if (unlikely(is_tx_comp_time_expired)) {
   3743			if (!tx_buf->print_once) {
   3744				time_since_last_napi = jiffies_to_usecs(jiffies - tx_ring->tx_stats.last_napi_jiffies);
   3745				missing_tx_comp_to = jiffies_to_msecs(adapter->missing_tx_completion_to);
   3746				netif_notice(adapter, tx_err, adapter->netdev,
   3747					     "Found a Tx that wasn't completed on time, qid %d, index %d. %u usecs have passed since last napi execution. Missing Tx timeout value %u msecs\n",
   3748					     tx_ring->qid, i, time_since_last_napi, missing_tx_comp_to);
   3749			}
   3750
   3751			tx_buf->print_once = 1;
   3752			missed_tx++;
   3753		}
   3754	}
   3755
   3756	if (unlikely(missed_tx > adapter->missing_tx_completion_threshold)) {
   3757		netif_err(adapter, tx_err, adapter->netdev,
   3758			  "The number of lost tx completions is above the threshold (%d > %d). Reset the device\n",
   3759			  missed_tx,
   3760			  adapter->missing_tx_completion_threshold);
   3761		ena_reset_device(adapter, ENA_REGS_RESET_MISS_TX_CMPL);
   3762		rc = -EIO;
   3763	}
   3764
   3765	ena_increase_stat(&tx_ring->tx_stats.missed_tx, missed_tx,
   3766			  &tx_ring->syncp);
   3767
   3768	return rc;
   3769}
   3770
   3771static void check_for_missing_completions(struct ena_adapter *adapter)
   3772{
   3773	struct ena_ring *tx_ring;
   3774	struct ena_ring *rx_ring;
   3775	int i, budget, rc;
   3776	int io_queue_count;
   3777
   3778	io_queue_count = adapter->xdp_num_queues + adapter->num_io_queues;
   3779	/* Make sure the driver doesn't turn the device in other process */
   3780	smp_rmb();
   3781
   3782	if (!test_bit(ENA_FLAG_DEV_UP, &adapter->flags))
   3783		return;
   3784
   3785	if (test_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags))
   3786		return;
   3787
   3788	if (adapter->missing_tx_completion_to == ENA_HW_HINTS_NO_TIMEOUT)
   3789		return;
   3790
   3791	budget = ENA_MONITORED_TX_QUEUES;
   3792
   3793	for (i = adapter->last_monitored_tx_qid; i < io_queue_count; i++) {
   3794		tx_ring = &adapter->tx_ring[i];
   3795		rx_ring = &adapter->rx_ring[i];
   3796
   3797		rc = check_missing_comp_in_tx_queue(adapter, tx_ring);
   3798		if (unlikely(rc))
   3799			return;
   3800
   3801		rc =  !ENA_IS_XDP_INDEX(adapter, i) ?
   3802			check_for_rx_interrupt_queue(adapter, rx_ring) : 0;
   3803		if (unlikely(rc))
   3804			return;
   3805
   3806		budget--;
   3807		if (!budget)
   3808			break;
   3809	}
   3810
   3811	adapter->last_monitored_tx_qid = i % io_queue_count;
   3812}
   3813
   3814/* trigger napi schedule after 2 consecutive detections */
   3815#define EMPTY_RX_REFILL 2
   3816/* For the rare case where the device runs out of Rx descriptors and the
   3817 * napi handler failed to refill new Rx descriptors (due to a lack of memory
   3818 * for example).
   3819 * This case will lead to a deadlock:
   3820 * The device won't send interrupts since all the new Rx packets will be dropped
   3821 * The napi handler won't allocate new Rx descriptors so the device will be
   3822 * able to send new packets.
   3823 *
   3824 * This scenario can happen when the kernel's vm.min_free_kbytes is too small.
   3825 * It is recommended to have at least 512MB, with a minimum of 128MB for
   3826 * constrained environment).
   3827 *
   3828 * When such a situation is detected - Reschedule napi
   3829 */
   3830static void check_for_empty_rx_ring(struct ena_adapter *adapter)
   3831{
   3832	struct ena_ring *rx_ring;
   3833	int i, refill_required;
   3834
   3835	if (!test_bit(ENA_FLAG_DEV_UP, &adapter->flags))
   3836		return;
   3837
   3838	if (test_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags))
   3839		return;
   3840
   3841	for (i = 0; i < adapter->num_io_queues; i++) {
   3842		rx_ring = &adapter->rx_ring[i];
   3843
   3844		refill_required = ena_com_free_q_entries(rx_ring->ena_com_io_sq);
   3845		if (unlikely(refill_required == (rx_ring->ring_size - 1))) {
   3846			rx_ring->empty_rx_queue++;
   3847
   3848			if (rx_ring->empty_rx_queue >= EMPTY_RX_REFILL) {
   3849				ena_increase_stat(&rx_ring->rx_stats.empty_rx_ring, 1,
   3850						  &rx_ring->syncp);
   3851
   3852				netif_err(adapter, drv, adapter->netdev,
   3853					  "Trigger refill for ring %d\n", i);
   3854
   3855				napi_schedule(rx_ring->napi);
   3856				rx_ring->empty_rx_queue = 0;
   3857			}
   3858		} else {
   3859			rx_ring->empty_rx_queue = 0;
   3860		}
   3861	}
   3862}
   3863
   3864/* Check for keep alive expiration */
   3865static void check_for_missing_keep_alive(struct ena_adapter *adapter)
   3866{
   3867	unsigned long keep_alive_expired;
   3868
   3869	if (!adapter->wd_state)
   3870		return;
   3871
   3872	if (adapter->keep_alive_timeout == ENA_HW_HINTS_NO_TIMEOUT)
   3873		return;
   3874
   3875	keep_alive_expired = adapter->last_keep_alive_jiffies +
   3876			     adapter->keep_alive_timeout;
   3877	if (unlikely(time_is_before_jiffies(keep_alive_expired))) {
   3878		netif_err(adapter, drv, adapter->netdev,
   3879			  "Keep alive watchdog timeout.\n");
   3880		ena_increase_stat(&adapter->dev_stats.wd_expired, 1,
   3881				  &adapter->syncp);
   3882		ena_reset_device(adapter, ENA_REGS_RESET_KEEP_ALIVE_TO);
   3883	}
   3884}
   3885
   3886static void check_for_admin_com_state(struct ena_adapter *adapter)
   3887{
   3888	if (unlikely(!ena_com_get_admin_running_state(adapter->ena_dev))) {
   3889		netif_err(adapter, drv, adapter->netdev,
   3890			  "ENA admin queue is not in running state!\n");
   3891		ena_increase_stat(&adapter->dev_stats.admin_q_pause, 1,
   3892				  &adapter->syncp);
   3893		ena_reset_device(adapter, ENA_REGS_RESET_ADMIN_TO);
   3894	}
   3895}
   3896
   3897static void ena_update_hints(struct ena_adapter *adapter,
   3898			     struct ena_admin_ena_hw_hints *hints)
   3899{
   3900	struct net_device *netdev = adapter->netdev;
   3901
   3902	if (hints->admin_completion_tx_timeout)
   3903		adapter->ena_dev->admin_queue.completion_timeout =
   3904			hints->admin_completion_tx_timeout * 1000;
   3905
   3906	if (hints->mmio_read_timeout)
   3907		/* convert to usec */
   3908		adapter->ena_dev->mmio_read.reg_read_to =
   3909			hints->mmio_read_timeout * 1000;
   3910
   3911	if (hints->missed_tx_completion_count_threshold_to_reset)
   3912		adapter->missing_tx_completion_threshold =
   3913			hints->missed_tx_completion_count_threshold_to_reset;
   3914
   3915	if (hints->missing_tx_completion_timeout) {
   3916		if (hints->missing_tx_completion_timeout == ENA_HW_HINTS_NO_TIMEOUT)
   3917			adapter->missing_tx_completion_to = ENA_HW_HINTS_NO_TIMEOUT;
   3918		else
   3919			adapter->missing_tx_completion_to =
   3920				msecs_to_jiffies(hints->missing_tx_completion_timeout);
   3921	}
   3922
   3923	if (hints->netdev_wd_timeout)
   3924		netdev->watchdog_timeo = msecs_to_jiffies(hints->netdev_wd_timeout);
   3925
   3926	if (hints->driver_watchdog_timeout) {
   3927		if (hints->driver_watchdog_timeout == ENA_HW_HINTS_NO_TIMEOUT)
   3928			adapter->keep_alive_timeout = ENA_HW_HINTS_NO_TIMEOUT;
   3929		else
   3930			adapter->keep_alive_timeout =
   3931				msecs_to_jiffies(hints->driver_watchdog_timeout);
   3932	}
   3933}
   3934
   3935static void ena_update_host_info(struct ena_admin_host_info *host_info,
   3936				 struct net_device *netdev)
   3937{
   3938	host_info->supported_network_features[0] =
   3939		netdev->features & GENMASK_ULL(31, 0);
   3940	host_info->supported_network_features[1] =
   3941		(netdev->features & GENMASK_ULL(63, 32)) >> 32;
   3942}
   3943
   3944static void ena_timer_service(struct timer_list *t)
   3945{
   3946	struct ena_adapter *adapter = from_timer(adapter, t, timer_service);
   3947	u8 *debug_area = adapter->ena_dev->host_attr.debug_area_virt_addr;
   3948	struct ena_admin_host_info *host_info =
   3949		adapter->ena_dev->host_attr.host_info;
   3950
   3951	check_for_missing_keep_alive(adapter);
   3952
   3953	check_for_admin_com_state(adapter);
   3954
   3955	check_for_missing_completions(adapter);
   3956
   3957	check_for_empty_rx_ring(adapter);
   3958
   3959	if (debug_area)
   3960		ena_dump_stats_to_buf(adapter, debug_area);
   3961
   3962	if (host_info)
   3963		ena_update_host_info(host_info, adapter->netdev);
   3964
   3965	if (unlikely(test_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags))) {
   3966		netif_err(adapter, drv, adapter->netdev,
   3967			  "Trigger reset is on\n");
   3968		ena_dump_stats_to_dmesg(adapter);
   3969		queue_work(ena_wq, &adapter->reset_task);
   3970		return;
   3971	}
   3972
   3973	/* Reset the timer */
   3974	mod_timer(&adapter->timer_service, round_jiffies(jiffies + HZ));
   3975}
   3976
   3977static u32 ena_calc_max_io_queue_num(struct pci_dev *pdev,
   3978				     struct ena_com_dev *ena_dev,
   3979				     struct ena_com_dev_get_features_ctx *get_feat_ctx)
   3980{
   3981	u32 io_tx_sq_num, io_tx_cq_num, io_rx_num, max_num_io_queues;
   3982
   3983	if (ena_dev->supported_features & BIT(ENA_ADMIN_MAX_QUEUES_EXT)) {
   3984		struct ena_admin_queue_ext_feature_fields *max_queue_ext =
   3985			&get_feat_ctx->max_queue_ext.max_queue_ext;
   3986		io_rx_num = min_t(u32, max_queue_ext->max_rx_sq_num,
   3987				  max_queue_ext->max_rx_cq_num);
   3988
   3989		io_tx_sq_num = max_queue_ext->max_tx_sq_num;
   3990		io_tx_cq_num = max_queue_ext->max_tx_cq_num;
   3991	} else {
   3992		struct ena_admin_queue_feature_desc *max_queues =
   3993			&get_feat_ctx->max_queues;
   3994		io_tx_sq_num = max_queues->max_sq_num;
   3995		io_tx_cq_num = max_queues->max_cq_num;
   3996		io_rx_num = min_t(u32, io_tx_sq_num, io_tx_cq_num);
   3997	}
   3998
   3999	/* In case of LLQ use the llq fields for the tx SQ/CQ */
   4000	if (ena_dev->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV)
   4001		io_tx_sq_num = get_feat_ctx->llq.max_llq_num;
   4002
   4003	max_num_io_queues = min_t(u32, num_online_cpus(), ENA_MAX_NUM_IO_QUEUES);
   4004	max_num_io_queues = min_t(u32, max_num_io_queues, io_rx_num);
   4005	max_num_io_queues = min_t(u32, max_num_io_queues, io_tx_sq_num);
   4006	max_num_io_queues = min_t(u32, max_num_io_queues, io_tx_cq_num);
   4007	/* 1 IRQ for mgmnt and 1 IRQs for each IO direction */
   4008	max_num_io_queues = min_t(u32, max_num_io_queues, pci_msix_vec_count(pdev) - 1);
   4009
   4010	return max_num_io_queues;
   4011}
   4012
   4013static void ena_set_dev_offloads(struct ena_com_dev_get_features_ctx *feat,
   4014				 struct net_device *netdev)
   4015{
   4016	netdev_features_t dev_features = 0;
   4017
   4018	/* Set offload features */
   4019	if (feat->offload.tx &
   4020		ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L4_IPV4_CSUM_PART_MASK)
   4021		dev_features |= NETIF_F_IP_CSUM;
   4022
   4023	if (feat->offload.tx &
   4024		ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L4_IPV6_CSUM_PART_MASK)
   4025		dev_features |= NETIF_F_IPV6_CSUM;
   4026
   4027	if (feat->offload.tx & ENA_ADMIN_FEATURE_OFFLOAD_DESC_TSO_IPV4_MASK)
   4028		dev_features |= NETIF_F_TSO;
   4029
   4030	if (feat->offload.tx & ENA_ADMIN_FEATURE_OFFLOAD_DESC_TSO_IPV6_MASK)
   4031		dev_features |= NETIF_F_TSO6;
   4032
   4033	if (feat->offload.tx & ENA_ADMIN_FEATURE_OFFLOAD_DESC_TSO_ECN_MASK)
   4034		dev_features |= NETIF_F_TSO_ECN;
   4035
   4036	if (feat->offload.rx_supported &
   4037		ENA_ADMIN_FEATURE_OFFLOAD_DESC_RX_L4_IPV4_CSUM_MASK)
   4038		dev_features |= NETIF_F_RXCSUM;
   4039
   4040	if (feat->offload.rx_supported &
   4041		ENA_ADMIN_FEATURE_OFFLOAD_DESC_RX_L4_IPV6_CSUM_MASK)
   4042		dev_features |= NETIF_F_RXCSUM;
   4043
   4044	netdev->features =
   4045		dev_features |
   4046		NETIF_F_SG |
   4047		NETIF_F_RXHASH |
   4048		NETIF_F_HIGHDMA;
   4049
   4050	netdev->hw_features |= netdev->features;
   4051	netdev->vlan_features |= netdev->features;
   4052}
   4053
   4054static void ena_set_conf_feat_params(struct ena_adapter *adapter,
   4055				     struct ena_com_dev_get_features_ctx *feat)
   4056{
   4057	struct net_device *netdev = adapter->netdev;
   4058
   4059	/* Copy mac address */
   4060	if (!is_valid_ether_addr(feat->dev_attr.mac_addr)) {
   4061		eth_hw_addr_random(netdev);
   4062		ether_addr_copy(adapter->mac_addr, netdev->dev_addr);
   4063	} else {
   4064		ether_addr_copy(adapter->mac_addr, feat->dev_attr.mac_addr);
   4065		eth_hw_addr_set(netdev, adapter->mac_addr);
   4066	}
   4067
   4068	/* Set offload features */
   4069	ena_set_dev_offloads(feat, netdev);
   4070
   4071	adapter->max_mtu = feat->dev_attr.max_mtu;
   4072	netdev->max_mtu = adapter->max_mtu;
   4073	netdev->min_mtu = ENA_MIN_MTU;
   4074}
   4075
   4076static int ena_rss_init_default(struct ena_adapter *adapter)
   4077{
   4078	struct ena_com_dev *ena_dev = adapter->ena_dev;
   4079	struct device *dev = &adapter->pdev->dev;
   4080	int rc, i;
   4081	u32 val;
   4082
   4083	rc = ena_com_rss_init(ena_dev, ENA_RX_RSS_TABLE_LOG_SIZE);
   4084	if (unlikely(rc)) {
   4085		dev_err(dev, "Cannot init indirect table\n");
   4086		goto err_rss_init;
   4087	}
   4088
   4089	for (i = 0; i < ENA_RX_RSS_TABLE_SIZE; i++) {
   4090		val = ethtool_rxfh_indir_default(i, adapter->num_io_queues);
   4091		rc = ena_com_indirect_table_fill_entry(ena_dev, i,
   4092						       ENA_IO_RXQ_IDX(val));
   4093		if (unlikely(rc)) {
   4094			dev_err(dev, "Cannot fill indirect table\n");
   4095			goto err_fill_indir;
   4096		}
   4097	}
   4098
   4099	rc = ena_com_fill_hash_function(ena_dev, ENA_ADMIN_TOEPLITZ, NULL,
   4100					ENA_HASH_KEY_SIZE, 0xFFFFFFFF);
   4101	if (unlikely(rc && (rc != -EOPNOTSUPP))) {
   4102		dev_err(dev, "Cannot fill hash function\n");
   4103		goto err_fill_indir;
   4104	}
   4105
   4106	rc = ena_com_set_default_hash_ctrl(ena_dev);
   4107	if (unlikely(rc && (rc != -EOPNOTSUPP))) {
   4108		dev_err(dev, "Cannot fill hash control\n");
   4109		goto err_fill_indir;
   4110	}
   4111
   4112	return 0;
   4113
   4114err_fill_indir:
   4115	ena_com_rss_destroy(ena_dev);
   4116err_rss_init:
   4117
   4118	return rc;
   4119}
   4120
   4121static void ena_release_bars(struct ena_com_dev *ena_dev, struct pci_dev *pdev)
   4122{
   4123	int release_bars = pci_select_bars(pdev, IORESOURCE_MEM) & ENA_BAR_MASK;
   4124
   4125	pci_release_selected_regions(pdev, release_bars);
   4126}
   4127
   4128
   4129static void ena_calc_io_queue_size(struct ena_adapter *adapter,
   4130				   struct ena_com_dev_get_features_ctx *get_feat_ctx)
   4131{
   4132	struct ena_admin_feature_llq_desc *llq = &get_feat_ctx->llq;
   4133	struct ena_com_dev *ena_dev = adapter->ena_dev;
   4134	u32 tx_queue_size = ENA_DEFAULT_RING_SIZE;
   4135	u32 rx_queue_size = ENA_DEFAULT_RING_SIZE;
   4136	u32 max_tx_queue_size;
   4137	u32 max_rx_queue_size;
   4138
   4139	if (ena_dev->supported_features & BIT(ENA_ADMIN_MAX_QUEUES_EXT)) {
   4140		struct ena_admin_queue_ext_feature_fields *max_queue_ext =
   4141			&get_feat_ctx->max_queue_ext.max_queue_ext;
   4142		max_rx_queue_size = min_t(u32, max_queue_ext->max_rx_cq_depth,
   4143					  max_queue_ext->max_rx_sq_depth);
   4144		max_tx_queue_size = max_queue_ext->max_tx_cq_depth;
   4145
   4146		if (ena_dev->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV)
   4147			max_tx_queue_size = min_t(u32, max_tx_queue_size,
   4148						  llq->max_llq_depth);
   4149		else
   4150			max_tx_queue_size = min_t(u32, max_tx_queue_size,
   4151						  max_queue_ext->max_tx_sq_depth);
   4152
   4153		adapter->max_tx_sgl_size = min_t(u16, ENA_PKT_MAX_BUFS,
   4154						 max_queue_ext->max_per_packet_tx_descs);
   4155		adapter->max_rx_sgl_size = min_t(u16, ENA_PKT_MAX_BUFS,
   4156						 max_queue_ext->max_per_packet_rx_descs);
   4157	} else {
   4158		struct ena_admin_queue_feature_desc *max_queues =
   4159			&get_feat_ctx->max_queues;
   4160		max_rx_queue_size = min_t(u32, max_queues->max_cq_depth,
   4161					  max_queues->max_sq_depth);
   4162		max_tx_queue_size = max_queues->max_cq_depth;
   4163
   4164		if (ena_dev->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV)
   4165			max_tx_queue_size = min_t(u32, max_tx_queue_size,
   4166						  llq->max_llq_depth);
   4167		else
   4168			max_tx_queue_size = min_t(u32, max_tx_queue_size,
   4169						  max_queues->max_sq_depth);
   4170
   4171		adapter->max_tx_sgl_size = min_t(u16, ENA_PKT_MAX_BUFS,
   4172						 max_queues->max_packet_tx_descs);
   4173		adapter->max_rx_sgl_size = min_t(u16, ENA_PKT_MAX_BUFS,
   4174						 max_queues->max_packet_rx_descs);
   4175	}
   4176
   4177	max_tx_queue_size = rounddown_pow_of_two(max_tx_queue_size);
   4178	max_rx_queue_size = rounddown_pow_of_two(max_rx_queue_size);
   4179
   4180	tx_queue_size = clamp_val(tx_queue_size, ENA_MIN_RING_SIZE,
   4181				  max_tx_queue_size);
   4182	rx_queue_size = clamp_val(rx_queue_size, ENA_MIN_RING_SIZE,
   4183				  max_rx_queue_size);
   4184
   4185	tx_queue_size = rounddown_pow_of_two(tx_queue_size);
   4186	rx_queue_size = rounddown_pow_of_two(rx_queue_size);
   4187
   4188	adapter->max_tx_ring_size  = max_tx_queue_size;
   4189	adapter->max_rx_ring_size = max_rx_queue_size;
   4190	adapter->requested_tx_ring_size = tx_queue_size;
   4191	adapter->requested_rx_ring_size = rx_queue_size;
   4192}
   4193
   4194/* ena_probe - Device Initialization Routine
   4195 * @pdev: PCI device information struct
   4196 * @ent: entry in ena_pci_tbl
   4197 *
   4198 * Returns 0 on success, negative on failure
   4199 *
   4200 * ena_probe initializes an adapter identified by a pci_dev structure.
   4201 * The OS initialization, configuring of the adapter private structure,
   4202 * and a hardware reset occur.
   4203 */
   4204static int ena_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
   4205{
   4206	struct ena_com_dev_get_features_ctx get_feat_ctx;
   4207	struct ena_com_dev *ena_dev = NULL;
   4208	struct ena_adapter *adapter;
   4209	struct net_device *netdev;
   4210	static int adapters_found;
   4211	u32 max_num_io_queues;
   4212	bool wd_state;
   4213	int bars, rc;
   4214
   4215	dev_dbg(&pdev->dev, "%s\n", __func__);
   4216
   4217	rc = pci_enable_device_mem(pdev);
   4218	if (rc) {
   4219		dev_err(&pdev->dev, "pci_enable_device_mem() failed!\n");
   4220		return rc;
   4221	}
   4222
   4223	rc = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(ENA_MAX_PHYS_ADDR_SIZE_BITS));
   4224	if (rc) {
   4225		dev_err(&pdev->dev, "dma_set_mask_and_coherent failed %d\n", rc);
   4226		goto err_disable_device;
   4227	}
   4228
   4229	pci_set_master(pdev);
   4230
   4231	ena_dev = vzalloc(sizeof(*ena_dev));
   4232	if (!ena_dev) {
   4233		rc = -ENOMEM;
   4234		goto err_disable_device;
   4235	}
   4236
   4237	bars = pci_select_bars(pdev, IORESOURCE_MEM) & ENA_BAR_MASK;
   4238	rc = pci_request_selected_regions(pdev, bars, DRV_MODULE_NAME);
   4239	if (rc) {
   4240		dev_err(&pdev->dev, "pci_request_selected_regions failed %d\n",
   4241			rc);
   4242		goto err_free_ena_dev;
   4243	}
   4244
   4245	ena_dev->reg_bar = devm_ioremap(&pdev->dev,
   4246					pci_resource_start(pdev, ENA_REG_BAR),
   4247					pci_resource_len(pdev, ENA_REG_BAR));
   4248	if (!ena_dev->reg_bar) {
   4249		dev_err(&pdev->dev, "Failed to remap regs bar\n");
   4250		rc = -EFAULT;
   4251		goto err_free_region;
   4252	}
   4253
   4254	ena_dev->ena_min_poll_delay_us = ENA_ADMIN_POLL_DELAY_US;
   4255
   4256	ena_dev->dmadev = &pdev->dev;
   4257
   4258	netdev = alloc_etherdev_mq(sizeof(struct ena_adapter), ENA_MAX_RINGS);
   4259	if (!netdev) {
   4260		dev_err(&pdev->dev, "alloc_etherdev_mq failed\n");
   4261		rc = -ENOMEM;
   4262		goto err_free_region;
   4263	}
   4264
   4265	SET_NETDEV_DEV(netdev, &pdev->dev);
   4266	adapter = netdev_priv(netdev);
   4267	adapter->ena_dev = ena_dev;
   4268	adapter->netdev = netdev;
   4269	adapter->pdev = pdev;
   4270	adapter->msg_enable = DEFAULT_MSG_ENABLE;
   4271
   4272	ena_dev->net_device = netdev;
   4273
   4274	pci_set_drvdata(pdev, adapter);
   4275
   4276	rc = ena_device_init(ena_dev, pdev, &get_feat_ctx, &wd_state);
   4277	if (rc) {
   4278		dev_err(&pdev->dev, "ENA device init failed\n");
   4279		if (rc == -ETIME)
   4280			rc = -EPROBE_DEFER;
   4281		goto err_netdev_destroy;
   4282	}
   4283
   4284	rc = ena_map_llq_mem_bar(pdev, ena_dev, bars);
   4285	if (rc) {
   4286		dev_err(&pdev->dev, "ENA llq bar mapping failed\n");
   4287		goto err_device_destroy;
   4288	}
   4289
   4290	/* Initial TX and RX interrupt delay. Assumes 1 usec granularity.
   4291	 * Updated during device initialization with the real granularity
   4292	 */
   4293	ena_dev->intr_moder_tx_interval = ENA_INTR_INITIAL_TX_INTERVAL_USECS;
   4294	ena_dev->intr_moder_rx_interval = ENA_INTR_INITIAL_RX_INTERVAL_USECS;
   4295	ena_dev->intr_delay_resolution = ENA_DEFAULT_INTR_DELAY_RESOLUTION;
   4296	max_num_io_queues = ena_calc_max_io_queue_num(pdev, ena_dev, &get_feat_ctx);
   4297	ena_calc_io_queue_size(adapter, &get_feat_ctx);
   4298	if (unlikely(!max_num_io_queues)) {
   4299		rc = -EFAULT;
   4300		goto err_device_destroy;
   4301	}
   4302
   4303	ena_set_conf_feat_params(adapter, &get_feat_ctx);
   4304
   4305	adapter->reset_reason = ENA_REGS_RESET_NORMAL;
   4306
   4307	adapter->num_io_queues = max_num_io_queues;
   4308	adapter->max_num_io_queues = max_num_io_queues;
   4309	adapter->last_monitored_tx_qid = 0;
   4310
   4311	adapter->xdp_first_ring = 0;
   4312	adapter->xdp_num_queues = 0;
   4313
   4314	adapter->rx_copybreak = ENA_DEFAULT_RX_COPYBREAK;
   4315	if (ena_dev->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV)
   4316		adapter->disable_meta_caching =
   4317			!!(get_feat_ctx.llq.accel_mode.u.get.supported_flags &
   4318			   BIT(ENA_ADMIN_DISABLE_META_CACHING));
   4319
   4320	adapter->wd_state = wd_state;
   4321
   4322	snprintf(adapter->name, ENA_NAME_MAX_LEN, "ena_%d", adapters_found);
   4323
   4324	rc = ena_com_init_interrupt_moderation(adapter->ena_dev);
   4325	if (rc) {
   4326		dev_err(&pdev->dev,
   4327			"Failed to query interrupt moderation feature\n");
   4328		goto err_device_destroy;
   4329	}
   4330	ena_init_io_rings(adapter,
   4331			  0,
   4332			  adapter->xdp_num_queues +
   4333			  adapter->num_io_queues);
   4334
   4335	netdev->netdev_ops = &ena_netdev_ops;
   4336	netdev->watchdog_timeo = TX_TIMEOUT;
   4337	ena_set_ethtool_ops(netdev);
   4338
   4339	netdev->priv_flags |= IFF_UNICAST_FLT;
   4340
   4341	u64_stats_init(&adapter->syncp);
   4342
   4343	rc = ena_enable_msix_and_set_admin_interrupts(adapter);
   4344	if (rc) {
   4345		dev_err(&pdev->dev,
   4346			"Failed to enable and set the admin interrupts\n");
   4347		goto err_worker_destroy;
   4348	}
   4349	rc = ena_rss_init_default(adapter);
   4350	if (rc && (rc != -EOPNOTSUPP)) {
   4351		dev_err(&pdev->dev, "Cannot init RSS rc: %d\n", rc);
   4352		goto err_free_msix;
   4353	}
   4354
   4355	ena_config_debug_area(adapter);
   4356
   4357	memcpy(adapter->netdev->perm_addr, adapter->mac_addr, netdev->addr_len);
   4358
   4359	netif_carrier_off(netdev);
   4360
   4361	rc = register_netdev(netdev);
   4362	if (rc) {
   4363		dev_err(&pdev->dev, "Cannot register net device\n");
   4364		goto err_rss;
   4365	}
   4366
   4367	INIT_WORK(&adapter->reset_task, ena_fw_reset_device);
   4368
   4369	adapter->last_keep_alive_jiffies = jiffies;
   4370	adapter->keep_alive_timeout = ENA_DEVICE_KALIVE_TIMEOUT;
   4371	adapter->missing_tx_completion_to = TX_TIMEOUT;
   4372	adapter->missing_tx_completion_threshold = MAX_NUM_OF_TIMEOUTED_PACKETS;
   4373
   4374	ena_update_hints(adapter, &get_feat_ctx.hw_hints);
   4375
   4376	timer_setup(&adapter->timer_service, ena_timer_service, 0);
   4377	mod_timer(&adapter->timer_service, round_jiffies(jiffies + HZ));
   4378
   4379	dev_info(&pdev->dev,
   4380		 "%s found at mem %lx, mac addr %pM\n",
   4381		 DEVICE_NAME, (long)pci_resource_start(pdev, 0),
   4382		 netdev->dev_addr);
   4383
   4384	set_bit(ENA_FLAG_DEVICE_RUNNING, &adapter->flags);
   4385
   4386	adapters_found++;
   4387
   4388	return 0;
   4389
   4390err_rss:
   4391	ena_com_delete_debug_area(ena_dev);
   4392	ena_com_rss_destroy(ena_dev);
   4393err_free_msix:
   4394	ena_com_dev_reset(ena_dev, ENA_REGS_RESET_INIT_ERR);
   4395	/* stop submitting admin commands on a device that was reset */
   4396	ena_com_set_admin_running_state(ena_dev, false);
   4397	ena_free_mgmnt_irq(adapter);
   4398	ena_disable_msix(adapter);
   4399err_worker_destroy:
   4400	del_timer(&adapter->timer_service);
   4401err_device_destroy:
   4402	ena_com_delete_host_info(ena_dev);
   4403	ena_com_admin_destroy(ena_dev);
   4404err_netdev_destroy:
   4405	free_netdev(netdev);
   4406err_free_region:
   4407	ena_release_bars(ena_dev, pdev);
   4408err_free_ena_dev:
   4409	vfree(ena_dev);
   4410err_disable_device:
   4411	pci_disable_device(pdev);
   4412	return rc;
   4413}
   4414
   4415/*****************************************************************************/
   4416
   4417/* __ena_shutoff - Helper used in both PCI remove/shutdown routines
   4418 * @pdev: PCI device information struct
   4419 * @shutdown: Is it a shutdown operation? If false, means it is a removal
   4420 *
   4421 * __ena_shutoff is a helper routine that does the real work on shutdown and
   4422 * removal paths; the difference between those paths is with regards to whether
   4423 * dettach or unregister the netdevice.
   4424 */
   4425static void __ena_shutoff(struct pci_dev *pdev, bool shutdown)
   4426{
   4427	struct ena_adapter *adapter = pci_get_drvdata(pdev);
   4428	struct ena_com_dev *ena_dev;
   4429	struct net_device *netdev;
   4430
   4431	ena_dev = adapter->ena_dev;
   4432	netdev = adapter->netdev;
   4433
   4434#ifdef CONFIG_RFS_ACCEL
   4435	if ((adapter->msix_vecs >= 1) && (netdev->rx_cpu_rmap)) {
   4436		free_irq_cpu_rmap(netdev->rx_cpu_rmap);
   4437		netdev->rx_cpu_rmap = NULL;
   4438	}
   4439#endif /* CONFIG_RFS_ACCEL */
   4440
   4441	/* Make sure timer and reset routine won't be called after
   4442	 * freeing device resources.
   4443	 */
   4444	del_timer_sync(&adapter->timer_service);
   4445	cancel_work_sync(&adapter->reset_task);
   4446
   4447	rtnl_lock(); /* lock released inside the below if-else block */
   4448	adapter->reset_reason = ENA_REGS_RESET_SHUTDOWN;
   4449	ena_destroy_device(adapter, true);
   4450	if (shutdown) {
   4451		netif_device_detach(netdev);
   4452		dev_close(netdev);
   4453		rtnl_unlock();
   4454	} else {
   4455		rtnl_unlock();
   4456		unregister_netdev(netdev);
   4457		free_netdev(netdev);
   4458	}
   4459
   4460	ena_com_rss_destroy(ena_dev);
   4461
   4462	ena_com_delete_debug_area(ena_dev);
   4463
   4464	ena_com_delete_host_info(ena_dev);
   4465
   4466	ena_release_bars(ena_dev, pdev);
   4467
   4468	pci_disable_device(pdev);
   4469
   4470	vfree(ena_dev);
   4471}
   4472
   4473/* ena_remove - Device Removal Routine
   4474 * @pdev: PCI device information struct
   4475 *
   4476 * ena_remove is called by the PCI subsystem to alert the driver
   4477 * that it should release a PCI device.
   4478 */
   4479
   4480static void ena_remove(struct pci_dev *pdev)
   4481{
   4482	__ena_shutoff(pdev, false);
   4483}
   4484
   4485/* ena_shutdown - Device Shutdown Routine
   4486 * @pdev: PCI device information struct
   4487 *
   4488 * ena_shutdown is called by the PCI subsystem to alert the driver that
   4489 * a shutdown/reboot (or kexec) is happening and device must be disabled.
   4490 */
   4491
   4492static void ena_shutdown(struct pci_dev *pdev)
   4493{
   4494	__ena_shutoff(pdev, true);
   4495}
   4496
   4497/* ena_suspend - PM suspend callback
   4498 * @dev_d: Device information struct
   4499 */
   4500static int __maybe_unused ena_suspend(struct device *dev_d)
   4501{
   4502	struct pci_dev *pdev = to_pci_dev(dev_d);
   4503	struct ena_adapter *adapter = pci_get_drvdata(pdev);
   4504
   4505	ena_increase_stat(&adapter->dev_stats.suspend, 1, &adapter->syncp);
   4506
   4507	rtnl_lock();
   4508	if (unlikely(test_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags))) {
   4509		dev_err(&pdev->dev,
   4510			"Ignoring device reset request as the device is being suspended\n");
   4511		clear_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags);
   4512	}
   4513	ena_destroy_device(adapter, true);
   4514	rtnl_unlock();
   4515	return 0;
   4516}
   4517
   4518/* ena_resume - PM resume callback
   4519 * @dev_d: Device information struct
   4520 */
   4521static int __maybe_unused ena_resume(struct device *dev_d)
   4522{
   4523	struct ena_adapter *adapter = dev_get_drvdata(dev_d);
   4524	int rc;
   4525
   4526	ena_increase_stat(&adapter->dev_stats.resume, 1, &adapter->syncp);
   4527
   4528	rtnl_lock();
   4529	rc = ena_restore_device(adapter);
   4530	rtnl_unlock();
   4531	return rc;
   4532}
   4533
   4534static SIMPLE_DEV_PM_OPS(ena_pm_ops, ena_suspend, ena_resume);
   4535
   4536static struct pci_driver ena_pci_driver = {
   4537	.name		= DRV_MODULE_NAME,
   4538	.id_table	= ena_pci_tbl,
   4539	.probe		= ena_probe,
   4540	.remove		= ena_remove,
   4541	.shutdown	= ena_shutdown,
   4542	.driver.pm	= &ena_pm_ops,
   4543	.sriov_configure = pci_sriov_configure_simple,
   4544};
   4545
   4546static int __init ena_init(void)
   4547{
   4548	ena_wq = create_singlethread_workqueue(DRV_MODULE_NAME);
   4549	if (!ena_wq) {
   4550		pr_err("Failed to create workqueue\n");
   4551		return -ENOMEM;
   4552	}
   4553
   4554	return pci_register_driver(&ena_pci_driver);
   4555}
   4556
   4557static void __exit ena_cleanup(void)
   4558{
   4559	pci_unregister_driver(&ena_pci_driver);
   4560
   4561	if (ena_wq) {
   4562		destroy_workqueue(ena_wq);
   4563		ena_wq = NULL;
   4564	}
   4565}
   4566
   4567/******************************************************************************
   4568 ******************************** AENQ Handlers *******************************
   4569 *****************************************************************************/
   4570/* ena_update_on_link_change:
   4571 * Notify the network interface about the change in link status
   4572 */
   4573static void ena_update_on_link_change(void *adapter_data,
   4574				      struct ena_admin_aenq_entry *aenq_e)
   4575{
   4576	struct ena_adapter *adapter = (struct ena_adapter *)adapter_data;
   4577	struct ena_admin_aenq_link_change_desc *aenq_desc =
   4578		(struct ena_admin_aenq_link_change_desc *)aenq_e;
   4579	int status = aenq_desc->flags &
   4580		ENA_ADMIN_AENQ_LINK_CHANGE_DESC_LINK_STATUS_MASK;
   4581
   4582	if (status) {
   4583		netif_dbg(adapter, ifup, adapter->netdev, "%s\n", __func__);
   4584		set_bit(ENA_FLAG_LINK_UP, &adapter->flags);
   4585		if (!test_bit(ENA_FLAG_ONGOING_RESET, &adapter->flags))
   4586			netif_carrier_on(adapter->netdev);
   4587	} else {
   4588		clear_bit(ENA_FLAG_LINK_UP, &adapter->flags);
   4589		netif_carrier_off(adapter->netdev);
   4590	}
   4591}
   4592
   4593static void ena_keep_alive_wd(void *adapter_data,
   4594			      struct ena_admin_aenq_entry *aenq_e)
   4595{
   4596	struct ena_adapter *adapter = (struct ena_adapter *)adapter_data;
   4597	struct ena_admin_aenq_keep_alive_desc *desc;
   4598	u64 rx_drops;
   4599	u64 tx_drops;
   4600
   4601	desc = (struct ena_admin_aenq_keep_alive_desc *)aenq_e;
   4602	adapter->last_keep_alive_jiffies = jiffies;
   4603
   4604	rx_drops = ((u64)desc->rx_drops_high << 32) | desc->rx_drops_low;
   4605	tx_drops = ((u64)desc->tx_drops_high << 32) | desc->tx_drops_low;
   4606
   4607	u64_stats_update_begin(&adapter->syncp);
   4608	/* These stats are accumulated by the device, so the counters indicate
   4609	 * all drops since last reset.
   4610	 */
   4611	adapter->dev_stats.rx_drops = rx_drops;
   4612	adapter->dev_stats.tx_drops = tx_drops;
   4613	u64_stats_update_end(&adapter->syncp);
   4614}
   4615
   4616static void ena_notification(void *adapter_data,
   4617			     struct ena_admin_aenq_entry *aenq_e)
   4618{
   4619	struct ena_adapter *adapter = (struct ena_adapter *)adapter_data;
   4620	struct ena_admin_ena_hw_hints *hints;
   4621
   4622	WARN(aenq_e->aenq_common_desc.group != ENA_ADMIN_NOTIFICATION,
   4623	     "Invalid group(%x) expected %x\n",
   4624	     aenq_e->aenq_common_desc.group,
   4625	     ENA_ADMIN_NOTIFICATION);
   4626
   4627	switch (aenq_e->aenq_common_desc.syndrome) {
   4628	case ENA_ADMIN_UPDATE_HINTS:
   4629		hints = (struct ena_admin_ena_hw_hints *)
   4630			(&aenq_e->inline_data_w4);
   4631		ena_update_hints(adapter, hints);
   4632		break;
   4633	default:
   4634		netif_err(adapter, drv, adapter->netdev,
   4635			  "Invalid aenq notification link state %d\n",
   4636			  aenq_e->aenq_common_desc.syndrome);
   4637	}
   4638}
   4639
   4640/* This handler will called for unknown event group or unimplemented handlers*/
   4641static void unimplemented_aenq_handler(void *data,
   4642				       struct ena_admin_aenq_entry *aenq_e)
   4643{
   4644	struct ena_adapter *adapter = (struct ena_adapter *)data;
   4645
   4646	netif_err(adapter, drv, adapter->netdev,
   4647		  "Unknown event was received or event with unimplemented handler\n");
   4648}
   4649
   4650static struct ena_aenq_handlers aenq_handlers = {
   4651	.handlers = {
   4652		[ENA_ADMIN_LINK_CHANGE] = ena_update_on_link_change,
   4653		[ENA_ADMIN_NOTIFICATION] = ena_notification,
   4654		[ENA_ADMIN_KEEP_ALIVE] = ena_keep_alive_wd,
   4655	},
   4656	.unimplemented_handler = unimplemented_aenq_handler
   4657};
   4658
   4659module_init(ena_init);
   4660module_exit(ena_cleanup);