cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

mana_en.c (48115B)


      1// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
      2/* Copyright (c) 2021, Microsoft Corporation. */
      3
      4#include <uapi/linux/bpf.h>
      5
      6#include <linux/inetdevice.h>
      7#include <linux/etherdevice.h>
      8#include <linux/ethtool.h>
      9#include <linux/mm.h>
     10
     11#include <net/checksum.h>
     12#include <net/ip6_checksum.h>
     13
     14#include "mana.h"
     15
     16/* Microsoft Azure Network Adapter (MANA) functions */
     17
     18static int mana_open(struct net_device *ndev)
     19{
     20	struct mana_port_context *apc = netdev_priv(ndev);
     21	int err;
     22
     23	err = mana_alloc_queues(ndev);
     24	if (err)
     25		return err;
     26
     27	apc->port_is_up = true;
     28
     29	/* Ensure port state updated before txq state */
     30	smp_wmb();
     31
     32	netif_carrier_on(ndev);
     33	netif_tx_wake_all_queues(ndev);
     34
     35	return 0;
     36}
     37
     38static int mana_close(struct net_device *ndev)
     39{
     40	struct mana_port_context *apc = netdev_priv(ndev);
     41
     42	if (!apc->port_is_up)
     43		return 0;
     44
     45	return mana_detach(ndev, true);
     46}
     47
     48static bool mana_can_tx(struct gdma_queue *wq)
     49{
     50	return mana_gd_wq_avail_space(wq) >= MAX_TX_WQE_SIZE;
     51}
     52
     53static unsigned int mana_checksum_info(struct sk_buff *skb)
     54{
     55	if (skb->protocol == htons(ETH_P_IP)) {
     56		struct iphdr *ip = ip_hdr(skb);
     57
     58		if (ip->protocol == IPPROTO_TCP)
     59			return IPPROTO_TCP;
     60
     61		if (ip->protocol == IPPROTO_UDP)
     62			return IPPROTO_UDP;
     63	} else if (skb->protocol == htons(ETH_P_IPV6)) {
     64		struct ipv6hdr *ip6 = ipv6_hdr(skb);
     65
     66		if (ip6->nexthdr == IPPROTO_TCP)
     67			return IPPROTO_TCP;
     68
     69		if (ip6->nexthdr == IPPROTO_UDP)
     70			return IPPROTO_UDP;
     71	}
     72
     73	/* No csum offloading */
     74	return 0;
     75}
     76
     77static int mana_map_skb(struct sk_buff *skb, struct mana_port_context *apc,
     78			struct mana_tx_package *tp)
     79{
     80	struct mana_skb_head *ash = (struct mana_skb_head *)skb->head;
     81	struct gdma_dev *gd = apc->ac->gdma_dev;
     82	struct gdma_context *gc;
     83	struct device *dev;
     84	skb_frag_t *frag;
     85	dma_addr_t da;
     86	int i;
     87
     88	gc = gd->gdma_context;
     89	dev = gc->dev;
     90	da = dma_map_single(dev, skb->data, skb_headlen(skb), DMA_TO_DEVICE);
     91
     92	if (dma_mapping_error(dev, da))
     93		return -ENOMEM;
     94
     95	ash->dma_handle[0] = da;
     96	ash->size[0] = skb_headlen(skb);
     97
     98	tp->wqe_req.sgl[0].address = ash->dma_handle[0];
     99	tp->wqe_req.sgl[0].mem_key = gd->gpa_mkey;
    100	tp->wqe_req.sgl[0].size = ash->size[0];
    101
    102	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
    103		frag = &skb_shinfo(skb)->frags[i];
    104		da = skb_frag_dma_map(dev, frag, 0, skb_frag_size(frag),
    105				      DMA_TO_DEVICE);
    106
    107		if (dma_mapping_error(dev, da))
    108			goto frag_err;
    109
    110		ash->dma_handle[i + 1] = da;
    111		ash->size[i + 1] = skb_frag_size(frag);
    112
    113		tp->wqe_req.sgl[i + 1].address = ash->dma_handle[i + 1];
    114		tp->wqe_req.sgl[i + 1].mem_key = gd->gpa_mkey;
    115		tp->wqe_req.sgl[i + 1].size = ash->size[i + 1];
    116	}
    117
    118	return 0;
    119
    120frag_err:
    121	for (i = i - 1; i >= 0; i--)
    122		dma_unmap_page(dev, ash->dma_handle[i + 1], ash->size[i + 1],
    123			       DMA_TO_DEVICE);
    124
    125	dma_unmap_single(dev, ash->dma_handle[0], ash->size[0], DMA_TO_DEVICE);
    126
    127	return -ENOMEM;
    128}
    129
    130int mana_start_xmit(struct sk_buff *skb, struct net_device *ndev)
    131{
    132	enum mana_tx_pkt_format pkt_fmt = MANA_SHORT_PKT_FMT;
    133	struct mana_port_context *apc = netdev_priv(ndev);
    134	u16 txq_idx = skb_get_queue_mapping(skb);
    135	struct gdma_dev *gd = apc->ac->gdma_dev;
    136	bool ipv4 = false, ipv6 = false;
    137	struct mana_tx_package pkg = {};
    138	struct netdev_queue *net_txq;
    139	struct mana_stats_tx *tx_stats;
    140	struct gdma_queue *gdma_sq;
    141	unsigned int csum_type;
    142	struct mana_txq *txq;
    143	struct mana_cq *cq;
    144	int err, len;
    145
    146	if (unlikely(!apc->port_is_up))
    147		goto tx_drop;
    148
    149	if (skb_cow_head(skb, MANA_HEADROOM))
    150		goto tx_drop_count;
    151
    152	txq = &apc->tx_qp[txq_idx].txq;
    153	gdma_sq = txq->gdma_sq;
    154	cq = &apc->tx_qp[txq_idx].tx_cq;
    155
    156	pkg.tx_oob.s_oob.vcq_num = cq->gdma_id;
    157	pkg.tx_oob.s_oob.vsq_frame = txq->vsq_frame;
    158
    159	if (txq->vp_offset > MANA_SHORT_VPORT_OFFSET_MAX) {
    160		pkg.tx_oob.l_oob.long_vp_offset = txq->vp_offset;
    161		pkt_fmt = MANA_LONG_PKT_FMT;
    162	} else {
    163		pkg.tx_oob.s_oob.short_vp_offset = txq->vp_offset;
    164	}
    165
    166	pkg.tx_oob.s_oob.pkt_fmt = pkt_fmt;
    167
    168	if (pkt_fmt == MANA_SHORT_PKT_FMT)
    169		pkg.wqe_req.inline_oob_size = sizeof(struct mana_tx_short_oob);
    170	else
    171		pkg.wqe_req.inline_oob_size = sizeof(struct mana_tx_oob);
    172
    173	pkg.wqe_req.inline_oob_data = &pkg.tx_oob;
    174	pkg.wqe_req.flags = 0;
    175	pkg.wqe_req.client_data_unit = 0;
    176
    177	pkg.wqe_req.num_sge = 1 + skb_shinfo(skb)->nr_frags;
    178	WARN_ON_ONCE(pkg.wqe_req.num_sge > 30);
    179
    180	if (pkg.wqe_req.num_sge <= ARRAY_SIZE(pkg.sgl_array)) {
    181		pkg.wqe_req.sgl = pkg.sgl_array;
    182	} else {
    183		pkg.sgl_ptr = kmalloc_array(pkg.wqe_req.num_sge,
    184					    sizeof(struct gdma_sge),
    185					    GFP_ATOMIC);
    186		if (!pkg.sgl_ptr)
    187			goto tx_drop_count;
    188
    189		pkg.wqe_req.sgl = pkg.sgl_ptr;
    190	}
    191
    192	if (skb->protocol == htons(ETH_P_IP))
    193		ipv4 = true;
    194	else if (skb->protocol == htons(ETH_P_IPV6))
    195		ipv6 = true;
    196
    197	if (skb_is_gso(skb)) {
    198		pkg.tx_oob.s_oob.is_outer_ipv4 = ipv4;
    199		pkg.tx_oob.s_oob.is_outer_ipv6 = ipv6;
    200
    201		pkg.tx_oob.s_oob.comp_iphdr_csum = 1;
    202		pkg.tx_oob.s_oob.comp_tcp_csum = 1;
    203		pkg.tx_oob.s_oob.trans_off = skb_transport_offset(skb);
    204
    205		pkg.wqe_req.client_data_unit = skb_shinfo(skb)->gso_size;
    206		pkg.wqe_req.flags = GDMA_WR_OOB_IN_SGL | GDMA_WR_PAD_BY_SGE0;
    207		if (ipv4) {
    208			ip_hdr(skb)->tot_len = 0;
    209			ip_hdr(skb)->check = 0;
    210			tcp_hdr(skb)->check =
    211				~csum_tcpudp_magic(ip_hdr(skb)->saddr,
    212						   ip_hdr(skb)->daddr, 0,
    213						   IPPROTO_TCP, 0);
    214		} else {
    215			ipv6_hdr(skb)->payload_len = 0;
    216			tcp_hdr(skb)->check =
    217				~csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
    218						 &ipv6_hdr(skb)->daddr, 0,
    219						 IPPROTO_TCP, 0);
    220		}
    221	} else if (skb->ip_summed == CHECKSUM_PARTIAL) {
    222		csum_type = mana_checksum_info(skb);
    223
    224		if (csum_type == IPPROTO_TCP) {
    225			pkg.tx_oob.s_oob.is_outer_ipv4 = ipv4;
    226			pkg.tx_oob.s_oob.is_outer_ipv6 = ipv6;
    227
    228			pkg.tx_oob.s_oob.comp_tcp_csum = 1;
    229			pkg.tx_oob.s_oob.trans_off = skb_transport_offset(skb);
    230
    231		} else if (csum_type == IPPROTO_UDP) {
    232			pkg.tx_oob.s_oob.is_outer_ipv4 = ipv4;
    233			pkg.tx_oob.s_oob.is_outer_ipv6 = ipv6;
    234
    235			pkg.tx_oob.s_oob.comp_udp_csum = 1;
    236		} else {
    237			/* Can't do offload of this type of checksum */
    238			if (skb_checksum_help(skb))
    239				goto free_sgl_ptr;
    240		}
    241	}
    242
    243	if (mana_map_skb(skb, apc, &pkg))
    244		goto free_sgl_ptr;
    245
    246	skb_queue_tail(&txq->pending_skbs, skb);
    247
    248	len = skb->len;
    249	net_txq = netdev_get_tx_queue(ndev, txq_idx);
    250
    251	err = mana_gd_post_work_request(gdma_sq, &pkg.wqe_req,
    252					(struct gdma_posted_wqe_info *)skb->cb);
    253	if (!mana_can_tx(gdma_sq)) {
    254		netif_tx_stop_queue(net_txq);
    255		apc->eth_stats.stop_queue++;
    256	}
    257
    258	if (err) {
    259		(void)skb_dequeue_tail(&txq->pending_skbs);
    260		netdev_warn(ndev, "Failed to post TX OOB: %d\n", err);
    261		err = NETDEV_TX_BUSY;
    262		goto tx_busy;
    263	}
    264
    265	err = NETDEV_TX_OK;
    266	atomic_inc(&txq->pending_sends);
    267
    268	mana_gd_wq_ring_doorbell(gd->gdma_context, gdma_sq);
    269
    270	/* skb may be freed after mana_gd_post_work_request. Do not use it. */
    271	skb = NULL;
    272
    273	tx_stats = &txq->stats;
    274	u64_stats_update_begin(&tx_stats->syncp);
    275	tx_stats->packets++;
    276	tx_stats->bytes += len;
    277	u64_stats_update_end(&tx_stats->syncp);
    278
    279tx_busy:
    280	if (netif_tx_queue_stopped(net_txq) && mana_can_tx(gdma_sq)) {
    281		netif_tx_wake_queue(net_txq);
    282		apc->eth_stats.wake_queue++;
    283	}
    284
    285	kfree(pkg.sgl_ptr);
    286	return err;
    287
    288free_sgl_ptr:
    289	kfree(pkg.sgl_ptr);
    290tx_drop_count:
    291	ndev->stats.tx_dropped++;
    292tx_drop:
    293	dev_kfree_skb_any(skb);
    294	return NETDEV_TX_OK;
    295}
    296
    297static void mana_get_stats64(struct net_device *ndev,
    298			     struct rtnl_link_stats64 *st)
    299{
    300	struct mana_port_context *apc = netdev_priv(ndev);
    301	unsigned int num_queues = apc->num_queues;
    302	struct mana_stats_rx *rx_stats;
    303	struct mana_stats_tx *tx_stats;
    304	unsigned int start;
    305	u64 packets, bytes;
    306	int q;
    307
    308	if (!apc->port_is_up)
    309		return;
    310
    311	netdev_stats_to_stats64(st, &ndev->stats);
    312
    313	for (q = 0; q < num_queues; q++) {
    314		rx_stats = &apc->rxqs[q]->stats;
    315
    316		do {
    317			start = u64_stats_fetch_begin_irq(&rx_stats->syncp);
    318			packets = rx_stats->packets;
    319			bytes = rx_stats->bytes;
    320		} while (u64_stats_fetch_retry_irq(&rx_stats->syncp, start));
    321
    322		st->rx_packets += packets;
    323		st->rx_bytes += bytes;
    324	}
    325
    326	for (q = 0; q < num_queues; q++) {
    327		tx_stats = &apc->tx_qp[q].txq.stats;
    328
    329		do {
    330			start = u64_stats_fetch_begin_irq(&tx_stats->syncp);
    331			packets = tx_stats->packets;
    332			bytes = tx_stats->bytes;
    333		} while (u64_stats_fetch_retry_irq(&tx_stats->syncp, start));
    334
    335		st->tx_packets += packets;
    336		st->tx_bytes += bytes;
    337	}
    338}
    339
    340static int mana_get_tx_queue(struct net_device *ndev, struct sk_buff *skb,
    341			     int old_q)
    342{
    343	struct mana_port_context *apc = netdev_priv(ndev);
    344	u32 hash = skb_get_hash(skb);
    345	struct sock *sk = skb->sk;
    346	int txq;
    347
    348	txq = apc->indir_table[hash & MANA_INDIRECT_TABLE_MASK];
    349
    350	if (txq != old_q && sk && sk_fullsock(sk) &&
    351	    rcu_access_pointer(sk->sk_dst_cache))
    352		sk_tx_queue_set(sk, txq);
    353
    354	return txq;
    355}
    356
    357static u16 mana_select_queue(struct net_device *ndev, struct sk_buff *skb,
    358			     struct net_device *sb_dev)
    359{
    360	int txq;
    361
    362	if (ndev->real_num_tx_queues == 1)
    363		return 0;
    364
    365	txq = sk_tx_queue_get(skb->sk);
    366
    367	if (txq < 0 || skb->ooo_okay || txq >= ndev->real_num_tx_queues) {
    368		if (skb_rx_queue_recorded(skb))
    369			txq = skb_get_rx_queue(skb);
    370		else
    371			txq = mana_get_tx_queue(ndev, skb, txq);
    372	}
    373
    374	return txq;
    375}
    376
    377static const struct net_device_ops mana_devops = {
    378	.ndo_open		= mana_open,
    379	.ndo_stop		= mana_close,
    380	.ndo_select_queue	= mana_select_queue,
    381	.ndo_start_xmit		= mana_start_xmit,
    382	.ndo_validate_addr	= eth_validate_addr,
    383	.ndo_get_stats64	= mana_get_stats64,
    384	.ndo_bpf		= mana_bpf,
    385};
    386
    387static void mana_cleanup_port_context(struct mana_port_context *apc)
    388{
    389	kfree(apc->rxqs);
    390	apc->rxqs = NULL;
    391}
    392
    393static int mana_init_port_context(struct mana_port_context *apc)
    394{
    395	apc->rxqs = kcalloc(apc->num_queues, sizeof(struct mana_rxq *),
    396			    GFP_KERNEL);
    397
    398	return !apc->rxqs ? -ENOMEM : 0;
    399}
    400
    401static int mana_send_request(struct mana_context *ac, void *in_buf,
    402			     u32 in_len, void *out_buf, u32 out_len)
    403{
    404	struct gdma_context *gc = ac->gdma_dev->gdma_context;
    405	struct gdma_resp_hdr *resp = out_buf;
    406	struct gdma_req_hdr *req = in_buf;
    407	struct device *dev = gc->dev;
    408	static atomic_t activity_id;
    409	int err;
    410
    411	req->dev_id = gc->mana.dev_id;
    412	req->activity_id = atomic_inc_return(&activity_id);
    413
    414	err = mana_gd_send_request(gc, in_len, in_buf, out_len,
    415				   out_buf);
    416	if (err || resp->status) {
    417		dev_err(dev, "Failed to send mana message: %d, 0x%x\n",
    418			err, resp->status);
    419		return err ? err : -EPROTO;
    420	}
    421
    422	if (req->dev_id.as_uint32 != resp->dev_id.as_uint32 ||
    423	    req->activity_id != resp->activity_id) {
    424		dev_err(dev, "Unexpected mana message response: %x,%x,%x,%x\n",
    425			req->dev_id.as_uint32, resp->dev_id.as_uint32,
    426			req->activity_id, resp->activity_id);
    427		return -EPROTO;
    428	}
    429
    430	return 0;
    431}
    432
    433static int mana_verify_resp_hdr(const struct gdma_resp_hdr *resp_hdr,
    434				const enum mana_command_code expected_code,
    435				const u32 min_size)
    436{
    437	if (resp_hdr->response.msg_type != expected_code)
    438		return -EPROTO;
    439
    440	if (resp_hdr->response.msg_version < GDMA_MESSAGE_V1)
    441		return -EPROTO;
    442
    443	if (resp_hdr->response.msg_size < min_size)
    444		return -EPROTO;
    445
    446	return 0;
    447}
    448
    449static int mana_query_device_cfg(struct mana_context *ac, u32 proto_major_ver,
    450				 u32 proto_minor_ver, u32 proto_micro_ver,
    451				 u16 *max_num_vports)
    452{
    453	struct gdma_context *gc = ac->gdma_dev->gdma_context;
    454	struct mana_query_device_cfg_resp resp = {};
    455	struct mana_query_device_cfg_req req = {};
    456	struct device *dev = gc->dev;
    457	int err = 0;
    458
    459	mana_gd_init_req_hdr(&req.hdr, MANA_QUERY_DEV_CONFIG,
    460			     sizeof(req), sizeof(resp));
    461	req.proto_major_ver = proto_major_ver;
    462	req.proto_minor_ver = proto_minor_ver;
    463	req.proto_micro_ver = proto_micro_ver;
    464
    465	err = mana_send_request(ac, &req, sizeof(req), &resp, sizeof(resp));
    466	if (err) {
    467		dev_err(dev, "Failed to query config: %d", err);
    468		return err;
    469	}
    470
    471	err = mana_verify_resp_hdr(&resp.hdr, MANA_QUERY_DEV_CONFIG,
    472				   sizeof(resp));
    473	if (err || resp.hdr.status) {
    474		dev_err(dev, "Invalid query result: %d, 0x%x\n", err,
    475			resp.hdr.status);
    476		if (!err)
    477			err = -EPROTO;
    478		return err;
    479	}
    480
    481	*max_num_vports = resp.max_num_vports;
    482
    483	return 0;
    484}
    485
    486static int mana_query_vport_cfg(struct mana_port_context *apc, u32 vport_index,
    487				u32 *max_sq, u32 *max_rq, u32 *num_indir_entry)
    488{
    489	struct mana_query_vport_cfg_resp resp = {};
    490	struct mana_query_vport_cfg_req req = {};
    491	int err;
    492
    493	mana_gd_init_req_hdr(&req.hdr, MANA_QUERY_VPORT_CONFIG,
    494			     sizeof(req), sizeof(resp));
    495
    496	req.vport_index = vport_index;
    497
    498	err = mana_send_request(apc->ac, &req, sizeof(req), &resp,
    499				sizeof(resp));
    500	if (err)
    501		return err;
    502
    503	err = mana_verify_resp_hdr(&resp.hdr, MANA_QUERY_VPORT_CONFIG,
    504				   sizeof(resp));
    505	if (err)
    506		return err;
    507
    508	if (resp.hdr.status)
    509		return -EPROTO;
    510
    511	*max_sq = resp.max_num_sq;
    512	*max_rq = resp.max_num_rq;
    513	*num_indir_entry = resp.num_indirection_ent;
    514
    515	apc->port_handle = resp.vport;
    516	ether_addr_copy(apc->mac_addr, resp.mac_addr);
    517
    518	return 0;
    519}
    520
    521static int mana_cfg_vport(struct mana_port_context *apc, u32 protection_dom_id,
    522			  u32 doorbell_pg_id)
    523{
    524	struct mana_config_vport_resp resp = {};
    525	struct mana_config_vport_req req = {};
    526	int err;
    527
    528	mana_gd_init_req_hdr(&req.hdr, MANA_CONFIG_VPORT_TX,
    529			     sizeof(req), sizeof(resp));
    530	req.vport = apc->port_handle;
    531	req.pdid = protection_dom_id;
    532	req.doorbell_pageid = doorbell_pg_id;
    533
    534	err = mana_send_request(apc->ac, &req, sizeof(req), &resp,
    535				sizeof(resp));
    536	if (err) {
    537		netdev_err(apc->ndev, "Failed to configure vPort: %d\n", err);
    538		goto out;
    539	}
    540
    541	err = mana_verify_resp_hdr(&resp.hdr, MANA_CONFIG_VPORT_TX,
    542				   sizeof(resp));
    543	if (err || resp.hdr.status) {
    544		netdev_err(apc->ndev, "Failed to configure vPort: %d, 0x%x\n",
    545			   err, resp.hdr.status);
    546		if (!err)
    547			err = -EPROTO;
    548
    549		goto out;
    550	}
    551
    552	apc->tx_shortform_allowed = resp.short_form_allowed;
    553	apc->tx_vp_offset = resp.tx_vport_offset;
    554out:
    555	return err;
    556}
    557
    558static int mana_cfg_vport_steering(struct mana_port_context *apc,
    559				   enum TRI_STATE rx,
    560				   bool update_default_rxobj, bool update_key,
    561				   bool update_tab)
    562{
    563	u16 num_entries = MANA_INDIRECT_TABLE_SIZE;
    564	struct mana_cfg_rx_steer_req *req = NULL;
    565	struct mana_cfg_rx_steer_resp resp = {};
    566	struct net_device *ndev = apc->ndev;
    567	mana_handle_t *req_indir_tab;
    568	u32 req_buf_size;
    569	int err;
    570
    571	req_buf_size = sizeof(*req) + sizeof(mana_handle_t) * num_entries;
    572	req = kzalloc(req_buf_size, GFP_KERNEL);
    573	if (!req)
    574		return -ENOMEM;
    575
    576	mana_gd_init_req_hdr(&req->hdr, MANA_CONFIG_VPORT_RX, req_buf_size,
    577			     sizeof(resp));
    578
    579	req->vport = apc->port_handle;
    580	req->num_indir_entries = num_entries;
    581	req->indir_tab_offset = sizeof(*req);
    582	req->rx_enable = rx;
    583	req->rss_enable = apc->rss_state;
    584	req->update_default_rxobj = update_default_rxobj;
    585	req->update_hashkey = update_key;
    586	req->update_indir_tab = update_tab;
    587	req->default_rxobj = apc->default_rxobj;
    588
    589	if (update_key)
    590		memcpy(&req->hashkey, apc->hashkey, MANA_HASH_KEY_SIZE);
    591
    592	if (update_tab) {
    593		req_indir_tab = (mana_handle_t *)(req + 1);
    594		memcpy(req_indir_tab, apc->rxobj_table,
    595		       req->num_indir_entries * sizeof(mana_handle_t));
    596	}
    597
    598	err = mana_send_request(apc->ac, req, req_buf_size, &resp,
    599				sizeof(resp));
    600	if (err) {
    601		netdev_err(ndev, "Failed to configure vPort RX: %d\n", err);
    602		goto out;
    603	}
    604
    605	err = mana_verify_resp_hdr(&resp.hdr, MANA_CONFIG_VPORT_RX,
    606				   sizeof(resp));
    607	if (err) {
    608		netdev_err(ndev, "vPort RX configuration failed: %d\n", err);
    609		goto out;
    610	}
    611
    612	if (resp.hdr.status) {
    613		netdev_err(ndev, "vPort RX configuration failed: 0x%x\n",
    614			   resp.hdr.status);
    615		err = -EPROTO;
    616	}
    617out:
    618	kfree(req);
    619	return err;
    620}
    621
    622static int mana_create_wq_obj(struct mana_port_context *apc,
    623			      mana_handle_t vport,
    624			      u32 wq_type, struct mana_obj_spec *wq_spec,
    625			      struct mana_obj_spec *cq_spec,
    626			      mana_handle_t *wq_obj)
    627{
    628	struct mana_create_wqobj_resp resp = {};
    629	struct mana_create_wqobj_req req = {};
    630	struct net_device *ndev = apc->ndev;
    631	int err;
    632
    633	mana_gd_init_req_hdr(&req.hdr, MANA_CREATE_WQ_OBJ,
    634			     sizeof(req), sizeof(resp));
    635	req.vport = vport;
    636	req.wq_type = wq_type;
    637	req.wq_gdma_region = wq_spec->gdma_region;
    638	req.cq_gdma_region = cq_spec->gdma_region;
    639	req.wq_size = wq_spec->queue_size;
    640	req.cq_size = cq_spec->queue_size;
    641	req.cq_moderation_ctx_id = cq_spec->modr_ctx_id;
    642	req.cq_parent_qid = cq_spec->attached_eq;
    643
    644	err = mana_send_request(apc->ac, &req, sizeof(req), &resp,
    645				sizeof(resp));
    646	if (err) {
    647		netdev_err(ndev, "Failed to create WQ object: %d\n", err);
    648		goto out;
    649	}
    650
    651	err = mana_verify_resp_hdr(&resp.hdr, MANA_CREATE_WQ_OBJ,
    652				   sizeof(resp));
    653	if (err || resp.hdr.status) {
    654		netdev_err(ndev, "Failed to create WQ object: %d, 0x%x\n", err,
    655			   resp.hdr.status);
    656		if (!err)
    657			err = -EPROTO;
    658		goto out;
    659	}
    660
    661	if (resp.wq_obj == INVALID_MANA_HANDLE) {
    662		netdev_err(ndev, "Got an invalid WQ object handle\n");
    663		err = -EPROTO;
    664		goto out;
    665	}
    666
    667	*wq_obj = resp.wq_obj;
    668	wq_spec->queue_index = resp.wq_id;
    669	cq_spec->queue_index = resp.cq_id;
    670
    671	return 0;
    672out:
    673	return err;
    674}
    675
    676static void mana_destroy_wq_obj(struct mana_port_context *apc, u32 wq_type,
    677				mana_handle_t wq_obj)
    678{
    679	struct mana_destroy_wqobj_resp resp = {};
    680	struct mana_destroy_wqobj_req req = {};
    681	struct net_device *ndev = apc->ndev;
    682	int err;
    683
    684	mana_gd_init_req_hdr(&req.hdr, MANA_DESTROY_WQ_OBJ,
    685			     sizeof(req), sizeof(resp));
    686	req.wq_type = wq_type;
    687	req.wq_obj_handle = wq_obj;
    688
    689	err = mana_send_request(apc->ac, &req, sizeof(req), &resp,
    690				sizeof(resp));
    691	if (err) {
    692		netdev_err(ndev, "Failed to destroy WQ object: %d\n", err);
    693		return;
    694	}
    695
    696	err = mana_verify_resp_hdr(&resp.hdr, MANA_DESTROY_WQ_OBJ,
    697				   sizeof(resp));
    698	if (err || resp.hdr.status)
    699		netdev_err(ndev, "Failed to destroy WQ object: %d, 0x%x\n", err,
    700			   resp.hdr.status);
    701}
    702
    703static void mana_destroy_eq(struct mana_context *ac)
    704{
    705	struct gdma_context *gc = ac->gdma_dev->gdma_context;
    706	struct gdma_queue *eq;
    707	int i;
    708
    709	if (!ac->eqs)
    710		return;
    711
    712	for (i = 0; i < gc->max_num_queues; i++) {
    713		eq = ac->eqs[i].eq;
    714		if (!eq)
    715			continue;
    716
    717		mana_gd_destroy_queue(gc, eq);
    718	}
    719
    720	kfree(ac->eqs);
    721	ac->eqs = NULL;
    722}
    723
    724static int mana_create_eq(struct mana_context *ac)
    725{
    726	struct gdma_dev *gd = ac->gdma_dev;
    727	struct gdma_context *gc = gd->gdma_context;
    728	struct gdma_queue_spec spec = {};
    729	int err;
    730	int i;
    731
    732	ac->eqs = kcalloc(gc->max_num_queues, sizeof(struct mana_eq),
    733			  GFP_KERNEL);
    734	if (!ac->eqs)
    735		return -ENOMEM;
    736
    737	spec.type = GDMA_EQ;
    738	spec.monitor_avl_buf = false;
    739	spec.queue_size = EQ_SIZE;
    740	spec.eq.callback = NULL;
    741	spec.eq.context = ac->eqs;
    742	spec.eq.log2_throttle_limit = LOG2_EQ_THROTTLE;
    743
    744	for (i = 0; i < gc->max_num_queues; i++) {
    745		err = mana_gd_create_mana_eq(gd, &spec, &ac->eqs[i].eq);
    746		if (err)
    747			goto out;
    748	}
    749
    750	return 0;
    751out:
    752	mana_destroy_eq(ac);
    753	return err;
    754}
    755
    756static int mana_fence_rq(struct mana_port_context *apc, struct mana_rxq *rxq)
    757{
    758	struct mana_fence_rq_resp resp = {};
    759	struct mana_fence_rq_req req = {};
    760	int err;
    761
    762	init_completion(&rxq->fence_event);
    763
    764	mana_gd_init_req_hdr(&req.hdr, MANA_FENCE_RQ,
    765			     sizeof(req), sizeof(resp));
    766	req.wq_obj_handle =  rxq->rxobj;
    767
    768	err = mana_send_request(apc->ac, &req, sizeof(req), &resp,
    769				sizeof(resp));
    770	if (err) {
    771		netdev_err(apc->ndev, "Failed to fence RQ %u: %d\n",
    772			   rxq->rxq_idx, err);
    773		return err;
    774	}
    775
    776	err = mana_verify_resp_hdr(&resp.hdr, MANA_FENCE_RQ, sizeof(resp));
    777	if (err || resp.hdr.status) {
    778		netdev_err(apc->ndev, "Failed to fence RQ %u: %d, 0x%x\n",
    779			   rxq->rxq_idx, err, resp.hdr.status);
    780		if (!err)
    781			err = -EPROTO;
    782
    783		return err;
    784	}
    785
    786	if (wait_for_completion_timeout(&rxq->fence_event, 10 * HZ) == 0) {
    787		netdev_err(apc->ndev, "Failed to fence RQ %u: timed out\n",
    788			   rxq->rxq_idx);
    789		return -ETIMEDOUT;
    790	}
    791
    792	return 0;
    793}
    794
    795static void mana_fence_rqs(struct mana_port_context *apc)
    796{
    797	unsigned int rxq_idx;
    798	struct mana_rxq *rxq;
    799	int err;
    800
    801	for (rxq_idx = 0; rxq_idx < apc->num_queues; rxq_idx++) {
    802		rxq = apc->rxqs[rxq_idx];
    803		err = mana_fence_rq(apc, rxq);
    804
    805		/* In case of any error, use sleep instead. */
    806		if (err)
    807			msleep(100);
    808	}
    809}
    810
    811static int mana_move_wq_tail(struct gdma_queue *wq, u32 num_units)
    812{
    813	u32 used_space_old;
    814	u32 used_space_new;
    815
    816	used_space_old = wq->head - wq->tail;
    817	used_space_new = wq->head - (wq->tail + num_units);
    818
    819	if (WARN_ON_ONCE(used_space_new > used_space_old))
    820		return -ERANGE;
    821
    822	wq->tail += num_units;
    823	return 0;
    824}
    825
    826static void mana_unmap_skb(struct sk_buff *skb, struct mana_port_context *apc)
    827{
    828	struct mana_skb_head *ash = (struct mana_skb_head *)skb->head;
    829	struct gdma_context *gc = apc->ac->gdma_dev->gdma_context;
    830	struct device *dev = gc->dev;
    831	int i;
    832
    833	dma_unmap_single(dev, ash->dma_handle[0], ash->size[0], DMA_TO_DEVICE);
    834
    835	for (i = 1; i < skb_shinfo(skb)->nr_frags + 1; i++)
    836		dma_unmap_page(dev, ash->dma_handle[i], ash->size[i],
    837			       DMA_TO_DEVICE);
    838}
    839
    840static void mana_poll_tx_cq(struct mana_cq *cq)
    841{
    842	struct gdma_comp *completions = cq->gdma_comp_buf;
    843	struct gdma_posted_wqe_info *wqe_info;
    844	unsigned int pkt_transmitted = 0;
    845	unsigned int wqe_unit_cnt = 0;
    846	struct mana_txq *txq = cq->txq;
    847	struct mana_port_context *apc;
    848	struct netdev_queue *net_txq;
    849	struct gdma_queue *gdma_wq;
    850	unsigned int avail_space;
    851	struct net_device *ndev;
    852	struct sk_buff *skb;
    853	bool txq_stopped;
    854	int comp_read;
    855	int i;
    856
    857	ndev = txq->ndev;
    858	apc = netdev_priv(ndev);
    859
    860	comp_read = mana_gd_poll_cq(cq->gdma_cq, completions,
    861				    CQE_POLLING_BUFFER);
    862
    863	if (comp_read < 1)
    864		return;
    865
    866	for (i = 0; i < comp_read; i++) {
    867		struct mana_tx_comp_oob *cqe_oob;
    868
    869		if (WARN_ON_ONCE(!completions[i].is_sq))
    870			return;
    871
    872		cqe_oob = (struct mana_tx_comp_oob *)completions[i].cqe_data;
    873		if (WARN_ON_ONCE(cqe_oob->cqe_hdr.client_type !=
    874				 MANA_CQE_COMPLETION))
    875			return;
    876
    877		switch (cqe_oob->cqe_hdr.cqe_type) {
    878		case CQE_TX_OKAY:
    879			break;
    880
    881		case CQE_TX_SA_DROP:
    882		case CQE_TX_MTU_DROP:
    883		case CQE_TX_INVALID_OOB:
    884		case CQE_TX_INVALID_ETH_TYPE:
    885		case CQE_TX_HDR_PROCESSING_ERROR:
    886		case CQE_TX_VF_DISABLED:
    887		case CQE_TX_VPORT_IDX_OUT_OF_RANGE:
    888		case CQE_TX_VPORT_DISABLED:
    889		case CQE_TX_VLAN_TAGGING_VIOLATION:
    890			WARN_ONCE(1, "TX: CQE error %d: ignored.\n",
    891				  cqe_oob->cqe_hdr.cqe_type);
    892			break;
    893
    894		default:
    895			/* If the CQE type is unexpected, log an error, assert,
    896			 * and go through the error path.
    897			 */
    898			WARN_ONCE(1, "TX: Unexpected CQE type %d: HW BUG?\n",
    899				  cqe_oob->cqe_hdr.cqe_type);
    900			return;
    901		}
    902
    903		if (WARN_ON_ONCE(txq->gdma_txq_id != completions[i].wq_num))
    904			return;
    905
    906		skb = skb_dequeue(&txq->pending_skbs);
    907		if (WARN_ON_ONCE(!skb))
    908			return;
    909
    910		wqe_info = (struct gdma_posted_wqe_info *)skb->cb;
    911		wqe_unit_cnt += wqe_info->wqe_size_in_bu;
    912
    913		mana_unmap_skb(skb, apc);
    914
    915		napi_consume_skb(skb, cq->budget);
    916
    917		pkt_transmitted++;
    918	}
    919
    920	if (WARN_ON_ONCE(wqe_unit_cnt == 0))
    921		return;
    922
    923	mana_move_wq_tail(txq->gdma_sq, wqe_unit_cnt);
    924
    925	gdma_wq = txq->gdma_sq;
    926	avail_space = mana_gd_wq_avail_space(gdma_wq);
    927
    928	/* Ensure tail updated before checking q stop */
    929	smp_mb();
    930
    931	net_txq = txq->net_txq;
    932	txq_stopped = netif_tx_queue_stopped(net_txq);
    933
    934	/* Ensure checking txq_stopped before apc->port_is_up. */
    935	smp_rmb();
    936
    937	if (txq_stopped && apc->port_is_up && avail_space >= MAX_TX_WQE_SIZE) {
    938		netif_tx_wake_queue(net_txq);
    939		apc->eth_stats.wake_queue++;
    940	}
    941
    942	if (atomic_sub_return(pkt_transmitted, &txq->pending_sends) < 0)
    943		WARN_ON_ONCE(1);
    944
    945	cq->work_done = pkt_transmitted;
    946}
    947
    948static void mana_post_pkt_rxq(struct mana_rxq *rxq)
    949{
    950	struct mana_recv_buf_oob *recv_buf_oob;
    951	u32 curr_index;
    952	int err;
    953
    954	curr_index = rxq->buf_index++;
    955	if (rxq->buf_index == rxq->num_rx_buf)
    956		rxq->buf_index = 0;
    957
    958	recv_buf_oob = &rxq->rx_oobs[curr_index];
    959
    960	err = mana_gd_post_and_ring(rxq->gdma_rq, &recv_buf_oob->wqe_req,
    961				    &recv_buf_oob->wqe_inf);
    962	if (WARN_ON_ONCE(err))
    963		return;
    964
    965	WARN_ON_ONCE(recv_buf_oob->wqe_inf.wqe_size_in_bu != 1);
    966}
    967
    968static struct sk_buff *mana_build_skb(void *buf_va, uint pkt_len,
    969				      struct xdp_buff *xdp)
    970{
    971	struct sk_buff *skb = build_skb(buf_va, PAGE_SIZE);
    972
    973	if (!skb)
    974		return NULL;
    975
    976	if (xdp->data_hard_start) {
    977		skb_reserve(skb, xdp->data - xdp->data_hard_start);
    978		skb_put(skb, xdp->data_end - xdp->data);
    979	} else {
    980		skb_reserve(skb, XDP_PACKET_HEADROOM);
    981		skb_put(skb, pkt_len);
    982	}
    983
    984	return skb;
    985}
    986
    987static void mana_rx_skb(void *buf_va, struct mana_rxcomp_oob *cqe,
    988			struct mana_rxq *rxq)
    989{
    990	struct mana_stats_rx *rx_stats = &rxq->stats;
    991	struct net_device *ndev = rxq->ndev;
    992	uint pkt_len = cqe->ppi[0].pkt_len;
    993	u16 rxq_idx = rxq->rxq_idx;
    994	struct napi_struct *napi;
    995	struct xdp_buff xdp = {};
    996	struct sk_buff *skb;
    997	u32 hash_value;
    998	u32 act;
    999
   1000	rxq->rx_cq.work_done++;
   1001	napi = &rxq->rx_cq.napi;
   1002
   1003	if (!buf_va) {
   1004		++ndev->stats.rx_dropped;
   1005		return;
   1006	}
   1007
   1008	act = mana_run_xdp(ndev, rxq, &xdp, buf_va, pkt_len);
   1009
   1010	if (act != XDP_PASS && act != XDP_TX)
   1011		goto drop_xdp;
   1012
   1013	skb = mana_build_skb(buf_va, pkt_len, &xdp);
   1014
   1015	if (!skb)
   1016		goto drop;
   1017
   1018	skb->dev = napi->dev;
   1019
   1020	skb->protocol = eth_type_trans(skb, ndev);
   1021	skb_checksum_none_assert(skb);
   1022	skb_record_rx_queue(skb, rxq_idx);
   1023
   1024	if ((ndev->features & NETIF_F_RXCSUM) && cqe->rx_iphdr_csum_succeed) {
   1025		if (cqe->rx_tcp_csum_succeed || cqe->rx_udp_csum_succeed)
   1026			skb->ip_summed = CHECKSUM_UNNECESSARY;
   1027	}
   1028
   1029	if (cqe->rx_hashtype != 0 && (ndev->features & NETIF_F_RXHASH)) {
   1030		hash_value = cqe->ppi[0].pkt_hash;
   1031
   1032		if (cqe->rx_hashtype & MANA_HASH_L4)
   1033			skb_set_hash(skb, hash_value, PKT_HASH_TYPE_L4);
   1034		else
   1035			skb_set_hash(skb, hash_value, PKT_HASH_TYPE_L3);
   1036	}
   1037
   1038	u64_stats_update_begin(&rx_stats->syncp);
   1039	rx_stats->packets++;
   1040	rx_stats->bytes += pkt_len;
   1041
   1042	if (act == XDP_TX)
   1043		rx_stats->xdp_tx++;
   1044	u64_stats_update_end(&rx_stats->syncp);
   1045
   1046	if (act == XDP_TX) {
   1047		skb_set_queue_mapping(skb, rxq_idx);
   1048		mana_xdp_tx(skb, ndev);
   1049		return;
   1050	}
   1051
   1052	napi_gro_receive(napi, skb);
   1053
   1054	return;
   1055
   1056drop_xdp:
   1057	u64_stats_update_begin(&rx_stats->syncp);
   1058	rx_stats->xdp_drop++;
   1059	u64_stats_update_end(&rx_stats->syncp);
   1060
   1061drop:
   1062	WARN_ON_ONCE(rxq->xdp_save_page);
   1063	rxq->xdp_save_page = virt_to_page(buf_va);
   1064
   1065	++ndev->stats.rx_dropped;
   1066
   1067	return;
   1068}
   1069
   1070static void mana_process_rx_cqe(struct mana_rxq *rxq, struct mana_cq *cq,
   1071				struct gdma_comp *cqe)
   1072{
   1073	struct mana_rxcomp_oob *oob = (struct mana_rxcomp_oob *)cqe->cqe_data;
   1074	struct gdma_context *gc = rxq->gdma_rq->gdma_dev->gdma_context;
   1075	struct net_device *ndev = rxq->ndev;
   1076	struct mana_recv_buf_oob *rxbuf_oob;
   1077	struct device *dev = gc->dev;
   1078	void *new_buf, *old_buf;
   1079	struct page *new_page;
   1080	u32 curr, pktlen;
   1081	dma_addr_t da;
   1082
   1083	switch (oob->cqe_hdr.cqe_type) {
   1084	case CQE_RX_OKAY:
   1085		break;
   1086
   1087	case CQE_RX_TRUNCATED:
   1088		++ndev->stats.rx_dropped;
   1089		rxbuf_oob = &rxq->rx_oobs[rxq->buf_index];
   1090		netdev_warn_once(ndev, "Dropped a truncated packet\n");
   1091		goto drop;
   1092
   1093	case CQE_RX_COALESCED_4:
   1094		netdev_err(ndev, "RX coalescing is unsupported\n");
   1095		return;
   1096
   1097	case CQE_RX_OBJECT_FENCE:
   1098		complete(&rxq->fence_event);
   1099		return;
   1100
   1101	default:
   1102		netdev_err(ndev, "Unknown RX CQE type = %d\n",
   1103			   oob->cqe_hdr.cqe_type);
   1104		return;
   1105	}
   1106
   1107	pktlen = oob->ppi[0].pkt_len;
   1108
   1109	if (pktlen == 0) {
   1110		/* data packets should never have packetlength of zero */
   1111		netdev_err(ndev, "RX pkt len=0, rq=%u, cq=%u, rxobj=0x%llx\n",
   1112			   rxq->gdma_id, cq->gdma_id, rxq->rxobj);
   1113		return;
   1114	}
   1115
   1116	curr = rxq->buf_index;
   1117	rxbuf_oob = &rxq->rx_oobs[curr];
   1118	WARN_ON_ONCE(rxbuf_oob->wqe_inf.wqe_size_in_bu != 1);
   1119
   1120	/* Reuse XDP dropped page if available */
   1121	if (rxq->xdp_save_page) {
   1122		new_page = rxq->xdp_save_page;
   1123		rxq->xdp_save_page = NULL;
   1124	} else {
   1125		new_page = alloc_page(GFP_ATOMIC);
   1126	}
   1127
   1128	if (new_page) {
   1129		da = dma_map_page(dev, new_page, XDP_PACKET_HEADROOM, rxq->datasize,
   1130				  DMA_FROM_DEVICE);
   1131
   1132		if (dma_mapping_error(dev, da)) {
   1133			__free_page(new_page);
   1134			new_page = NULL;
   1135		}
   1136	}
   1137
   1138	new_buf = new_page ? page_to_virt(new_page) : NULL;
   1139
   1140	if (new_buf) {
   1141		dma_unmap_page(dev, rxbuf_oob->buf_dma_addr, rxq->datasize,
   1142			       DMA_FROM_DEVICE);
   1143
   1144		old_buf = rxbuf_oob->buf_va;
   1145
   1146		/* refresh the rxbuf_oob with the new page */
   1147		rxbuf_oob->buf_va = new_buf;
   1148		rxbuf_oob->buf_dma_addr = da;
   1149		rxbuf_oob->sgl[0].address = rxbuf_oob->buf_dma_addr;
   1150	} else {
   1151		old_buf = NULL; /* drop the packet if no memory */
   1152	}
   1153
   1154	mana_rx_skb(old_buf, oob, rxq);
   1155
   1156drop:
   1157	mana_move_wq_tail(rxq->gdma_rq, rxbuf_oob->wqe_inf.wqe_size_in_bu);
   1158
   1159	mana_post_pkt_rxq(rxq);
   1160}
   1161
   1162static void mana_poll_rx_cq(struct mana_cq *cq)
   1163{
   1164	struct gdma_comp *comp = cq->gdma_comp_buf;
   1165	int comp_read, i;
   1166
   1167	comp_read = mana_gd_poll_cq(cq->gdma_cq, comp, CQE_POLLING_BUFFER);
   1168	WARN_ON_ONCE(comp_read > CQE_POLLING_BUFFER);
   1169
   1170	for (i = 0; i < comp_read; i++) {
   1171		if (WARN_ON_ONCE(comp[i].is_sq))
   1172			return;
   1173
   1174		/* verify recv cqe references the right rxq */
   1175		if (WARN_ON_ONCE(comp[i].wq_num != cq->rxq->gdma_id))
   1176			return;
   1177
   1178		mana_process_rx_cqe(cq->rxq, cq, &comp[i]);
   1179	}
   1180}
   1181
   1182static void mana_cq_handler(void *context, struct gdma_queue *gdma_queue)
   1183{
   1184	struct mana_cq *cq = context;
   1185	u8 arm_bit;
   1186
   1187	WARN_ON_ONCE(cq->gdma_cq != gdma_queue);
   1188
   1189	if (cq->type == MANA_CQ_TYPE_RX)
   1190		mana_poll_rx_cq(cq);
   1191	else
   1192		mana_poll_tx_cq(cq);
   1193
   1194	if (cq->work_done < cq->budget &&
   1195	    napi_complete_done(&cq->napi, cq->work_done)) {
   1196		arm_bit = SET_ARM_BIT;
   1197	} else {
   1198		arm_bit = 0;
   1199	}
   1200
   1201	mana_gd_ring_cq(gdma_queue, arm_bit);
   1202}
   1203
   1204static int mana_poll(struct napi_struct *napi, int budget)
   1205{
   1206	struct mana_cq *cq = container_of(napi, struct mana_cq, napi);
   1207
   1208	cq->work_done = 0;
   1209	cq->budget = budget;
   1210
   1211	mana_cq_handler(cq, cq->gdma_cq);
   1212
   1213	return min(cq->work_done, budget);
   1214}
   1215
   1216static void mana_schedule_napi(void *context, struct gdma_queue *gdma_queue)
   1217{
   1218	struct mana_cq *cq = context;
   1219
   1220	napi_schedule_irqoff(&cq->napi);
   1221}
   1222
   1223static void mana_deinit_cq(struct mana_port_context *apc, struct mana_cq *cq)
   1224{
   1225	struct gdma_dev *gd = apc->ac->gdma_dev;
   1226
   1227	if (!cq->gdma_cq)
   1228		return;
   1229
   1230	mana_gd_destroy_queue(gd->gdma_context, cq->gdma_cq);
   1231}
   1232
   1233static void mana_deinit_txq(struct mana_port_context *apc, struct mana_txq *txq)
   1234{
   1235	struct gdma_dev *gd = apc->ac->gdma_dev;
   1236
   1237	if (!txq->gdma_sq)
   1238		return;
   1239
   1240	mana_gd_destroy_queue(gd->gdma_context, txq->gdma_sq);
   1241}
   1242
   1243static void mana_destroy_txq(struct mana_port_context *apc)
   1244{
   1245	struct napi_struct *napi;
   1246	int i;
   1247
   1248	if (!apc->tx_qp)
   1249		return;
   1250
   1251	for (i = 0; i < apc->num_queues; i++) {
   1252		napi = &apc->tx_qp[i].tx_cq.napi;
   1253		napi_synchronize(napi);
   1254		napi_disable(napi);
   1255		netif_napi_del(napi);
   1256
   1257		mana_destroy_wq_obj(apc, GDMA_SQ, apc->tx_qp[i].tx_object);
   1258
   1259		mana_deinit_cq(apc, &apc->tx_qp[i].tx_cq);
   1260
   1261		mana_deinit_txq(apc, &apc->tx_qp[i].txq);
   1262	}
   1263
   1264	kfree(apc->tx_qp);
   1265	apc->tx_qp = NULL;
   1266}
   1267
   1268static int mana_create_txq(struct mana_port_context *apc,
   1269			   struct net_device *net)
   1270{
   1271	struct mana_context *ac = apc->ac;
   1272	struct gdma_dev *gd = ac->gdma_dev;
   1273	struct mana_obj_spec wq_spec;
   1274	struct mana_obj_spec cq_spec;
   1275	struct gdma_queue_spec spec;
   1276	struct gdma_context *gc;
   1277	struct mana_txq *txq;
   1278	struct mana_cq *cq;
   1279	u32 txq_size;
   1280	u32 cq_size;
   1281	int err;
   1282	int i;
   1283
   1284	apc->tx_qp = kcalloc(apc->num_queues, sizeof(struct mana_tx_qp),
   1285			     GFP_KERNEL);
   1286	if (!apc->tx_qp)
   1287		return -ENOMEM;
   1288
   1289	/*  The minimum size of the WQE is 32 bytes, hence
   1290	 *  MAX_SEND_BUFFERS_PER_QUEUE represents the maximum number of WQEs
   1291	 *  the SQ can store. This value is then used to size other queues
   1292	 *  to prevent overflow.
   1293	 */
   1294	txq_size = MAX_SEND_BUFFERS_PER_QUEUE * 32;
   1295	BUILD_BUG_ON(!PAGE_ALIGNED(txq_size));
   1296
   1297	cq_size = MAX_SEND_BUFFERS_PER_QUEUE * COMP_ENTRY_SIZE;
   1298	cq_size = PAGE_ALIGN(cq_size);
   1299
   1300	gc = gd->gdma_context;
   1301
   1302	for (i = 0; i < apc->num_queues; i++) {
   1303		apc->tx_qp[i].tx_object = INVALID_MANA_HANDLE;
   1304
   1305		/* Create SQ */
   1306		txq = &apc->tx_qp[i].txq;
   1307
   1308		u64_stats_init(&txq->stats.syncp);
   1309		txq->ndev = net;
   1310		txq->net_txq = netdev_get_tx_queue(net, i);
   1311		txq->vp_offset = apc->tx_vp_offset;
   1312		skb_queue_head_init(&txq->pending_skbs);
   1313
   1314		memset(&spec, 0, sizeof(spec));
   1315		spec.type = GDMA_SQ;
   1316		spec.monitor_avl_buf = true;
   1317		spec.queue_size = txq_size;
   1318		err = mana_gd_create_mana_wq_cq(gd, &spec, &txq->gdma_sq);
   1319		if (err)
   1320			goto out;
   1321
   1322		/* Create SQ's CQ */
   1323		cq = &apc->tx_qp[i].tx_cq;
   1324		cq->type = MANA_CQ_TYPE_TX;
   1325
   1326		cq->txq = txq;
   1327
   1328		memset(&spec, 0, sizeof(spec));
   1329		spec.type = GDMA_CQ;
   1330		spec.monitor_avl_buf = false;
   1331		spec.queue_size = cq_size;
   1332		spec.cq.callback = mana_schedule_napi;
   1333		spec.cq.parent_eq = ac->eqs[i].eq;
   1334		spec.cq.context = cq;
   1335		err = mana_gd_create_mana_wq_cq(gd, &spec, &cq->gdma_cq);
   1336		if (err)
   1337			goto out;
   1338
   1339		memset(&wq_spec, 0, sizeof(wq_spec));
   1340		memset(&cq_spec, 0, sizeof(cq_spec));
   1341
   1342		wq_spec.gdma_region = txq->gdma_sq->mem_info.gdma_region;
   1343		wq_spec.queue_size = txq->gdma_sq->queue_size;
   1344
   1345		cq_spec.gdma_region = cq->gdma_cq->mem_info.gdma_region;
   1346		cq_spec.queue_size = cq->gdma_cq->queue_size;
   1347		cq_spec.modr_ctx_id = 0;
   1348		cq_spec.attached_eq = cq->gdma_cq->cq.parent->id;
   1349
   1350		err = mana_create_wq_obj(apc, apc->port_handle, GDMA_SQ,
   1351					 &wq_spec, &cq_spec,
   1352					 &apc->tx_qp[i].tx_object);
   1353
   1354		if (err)
   1355			goto out;
   1356
   1357		txq->gdma_sq->id = wq_spec.queue_index;
   1358		cq->gdma_cq->id = cq_spec.queue_index;
   1359
   1360		txq->gdma_sq->mem_info.gdma_region = GDMA_INVALID_DMA_REGION;
   1361		cq->gdma_cq->mem_info.gdma_region = GDMA_INVALID_DMA_REGION;
   1362
   1363		txq->gdma_txq_id = txq->gdma_sq->id;
   1364
   1365		cq->gdma_id = cq->gdma_cq->id;
   1366
   1367		if (WARN_ON(cq->gdma_id >= gc->max_num_cqs)) {
   1368			err = -EINVAL;
   1369			goto out;
   1370		}
   1371
   1372		gc->cq_table[cq->gdma_id] = cq->gdma_cq;
   1373
   1374		netif_napi_add_tx(net, &cq->napi, mana_poll);
   1375		napi_enable(&cq->napi);
   1376
   1377		mana_gd_ring_cq(cq->gdma_cq, SET_ARM_BIT);
   1378	}
   1379
   1380	return 0;
   1381out:
   1382	mana_destroy_txq(apc);
   1383	return err;
   1384}
   1385
   1386static void mana_destroy_rxq(struct mana_port_context *apc,
   1387			     struct mana_rxq *rxq, bool validate_state)
   1388
   1389{
   1390	struct gdma_context *gc = apc->ac->gdma_dev->gdma_context;
   1391	struct mana_recv_buf_oob *rx_oob;
   1392	struct device *dev = gc->dev;
   1393	struct napi_struct *napi;
   1394	int i;
   1395
   1396	if (!rxq)
   1397		return;
   1398
   1399	napi = &rxq->rx_cq.napi;
   1400
   1401	if (validate_state)
   1402		napi_synchronize(napi);
   1403
   1404	napi_disable(napi);
   1405
   1406	xdp_rxq_info_unreg(&rxq->xdp_rxq);
   1407
   1408	netif_napi_del(napi);
   1409
   1410	mana_destroy_wq_obj(apc, GDMA_RQ, rxq->rxobj);
   1411
   1412	mana_deinit_cq(apc, &rxq->rx_cq);
   1413
   1414	if (rxq->xdp_save_page)
   1415		__free_page(rxq->xdp_save_page);
   1416
   1417	for (i = 0; i < rxq->num_rx_buf; i++) {
   1418		rx_oob = &rxq->rx_oobs[i];
   1419
   1420		if (!rx_oob->buf_va)
   1421			continue;
   1422
   1423		dma_unmap_page(dev, rx_oob->buf_dma_addr, rxq->datasize,
   1424			       DMA_FROM_DEVICE);
   1425
   1426		free_page((unsigned long)rx_oob->buf_va);
   1427		rx_oob->buf_va = NULL;
   1428	}
   1429
   1430	if (rxq->gdma_rq)
   1431		mana_gd_destroy_queue(gc, rxq->gdma_rq);
   1432
   1433	kfree(rxq);
   1434}
   1435
   1436#define MANA_WQE_HEADER_SIZE 16
   1437#define MANA_WQE_SGE_SIZE 16
   1438
   1439static int mana_alloc_rx_wqe(struct mana_port_context *apc,
   1440			     struct mana_rxq *rxq, u32 *rxq_size, u32 *cq_size)
   1441{
   1442	struct gdma_context *gc = apc->ac->gdma_dev->gdma_context;
   1443	struct mana_recv_buf_oob *rx_oob;
   1444	struct device *dev = gc->dev;
   1445	struct page *page;
   1446	dma_addr_t da;
   1447	u32 buf_idx;
   1448
   1449	WARN_ON(rxq->datasize == 0 || rxq->datasize > PAGE_SIZE);
   1450
   1451	*rxq_size = 0;
   1452	*cq_size = 0;
   1453
   1454	for (buf_idx = 0; buf_idx < rxq->num_rx_buf; buf_idx++) {
   1455		rx_oob = &rxq->rx_oobs[buf_idx];
   1456		memset(rx_oob, 0, sizeof(*rx_oob));
   1457
   1458		page = alloc_page(GFP_KERNEL);
   1459		if (!page)
   1460			return -ENOMEM;
   1461
   1462		da = dma_map_page(dev, page, XDP_PACKET_HEADROOM, rxq->datasize,
   1463				  DMA_FROM_DEVICE);
   1464
   1465		if (dma_mapping_error(dev, da)) {
   1466			__free_page(page);
   1467			return -ENOMEM;
   1468		}
   1469
   1470		rx_oob->buf_va = page_to_virt(page);
   1471		rx_oob->buf_dma_addr = da;
   1472
   1473		rx_oob->num_sge = 1;
   1474		rx_oob->sgl[0].address = rx_oob->buf_dma_addr;
   1475		rx_oob->sgl[0].size = rxq->datasize;
   1476		rx_oob->sgl[0].mem_key = apc->ac->gdma_dev->gpa_mkey;
   1477
   1478		rx_oob->wqe_req.sgl = rx_oob->sgl;
   1479		rx_oob->wqe_req.num_sge = rx_oob->num_sge;
   1480		rx_oob->wqe_req.inline_oob_size = 0;
   1481		rx_oob->wqe_req.inline_oob_data = NULL;
   1482		rx_oob->wqe_req.flags = 0;
   1483		rx_oob->wqe_req.client_data_unit = 0;
   1484
   1485		*rxq_size += ALIGN(MANA_WQE_HEADER_SIZE +
   1486				   MANA_WQE_SGE_SIZE * rx_oob->num_sge, 32);
   1487		*cq_size += COMP_ENTRY_SIZE;
   1488	}
   1489
   1490	return 0;
   1491}
   1492
   1493static int mana_push_wqe(struct mana_rxq *rxq)
   1494{
   1495	struct mana_recv_buf_oob *rx_oob;
   1496	u32 buf_idx;
   1497	int err;
   1498
   1499	for (buf_idx = 0; buf_idx < rxq->num_rx_buf; buf_idx++) {
   1500		rx_oob = &rxq->rx_oobs[buf_idx];
   1501
   1502		err = mana_gd_post_and_ring(rxq->gdma_rq, &rx_oob->wqe_req,
   1503					    &rx_oob->wqe_inf);
   1504		if (err)
   1505			return -ENOSPC;
   1506	}
   1507
   1508	return 0;
   1509}
   1510
   1511static struct mana_rxq *mana_create_rxq(struct mana_port_context *apc,
   1512					u32 rxq_idx, struct mana_eq *eq,
   1513					struct net_device *ndev)
   1514{
   1515	struct gdma_dev *gd = apc->ac->gdma_dev;
   1516	struct mana_obj_spec wq_spec;
   1517	struct mana_obj_spec cq_spec;
   1518	struct gdma_queue_spec spec;
   1519	struct mana_cq *cq = NULL;
   1520	struct gdma_context *gc;
   1521	u32 cq_size, rq_size;
   1522	struct mana_rxq *rxq;
   1523	int err;
   1524
   1525	gc = gd->gdma_context;
   1526
   1527	rxq = kzalloc(struct_size(rxq, rx_oobs, RX_BUFFERS_PER_QUEUE),
   1528		      GFP_KERNEL);
   1529	if (!rxq)
   1530		return NULL;
   1531
   1532	rxq->ndev = ndev;
   1533	rxq->num_rx_buf = RX_BUFFERS_PER_QUEUE;
   1534	rxq->rxq_idx = rxq_idx;
   1535	rxq->datasize = ALIGN(MAX_FRAME_SIZE, 64);
   1536	rxq->rxobj = INVALID_MANA_HANDLE;
   1537
   1538	err = mana_alloc_rx_wqe(apc, rxq, &rq_size, &cq_size);
   1539	if (err)
   1540		goto out;
   1541
   1542	rq_size = PAGE_ALIGN(rq_size);
   1543	cq_size = PAGE_ALIGN(cq_size);
   1544
   1545	/* Create RQ */
   1546	memset(&spec, 0, sizeof(spec));
   1547	spec.type = GDMA_RQ;
   1548	spec.monitor_avl_buf = true;
   1549	spec.queue_size = rq_size;
   1550	err = mana_gd_create_mana_wq_cq(gd, &spec, &rxq->gdma_rq);
   1551	if (err)
   1552		goto out;
   1553
   1554	/* Create RQ's CQ */
   1555	cq = &rxq->rx_cq;
   1556	cq->type = MANA_CQ_TYPE_RX;
   1557	cq->rxq = rxq;
   1558
   1559	memset(&spec, 0, sizeof(spec));
   1560	spec.type = GDMA_CQ;
   1561	spec.monitor_avl_buf = false;
   1562	spec.queue_size = cq_size;
   1563	spec.cq.callback = mana_schedule_napi;
   1564	spec.cq.parent_eq = eq->eq;
   1565	spec.cq.context = cq;
   1566	err = mana_gd_create_mana_wq_cq(gd, &spec, &cq->gdma_cq);
   1567	if (err)
   1568		goto out;
   1569
   1570	memset(&wq_spec, 0, sizeof(wq_spec));
   1571	memset(&cq_spec, 0, sizeof(cq_spec));
   1572	wq_spec.gdma_region = rxq->gdma_rq->mem_info.gdma_region;
   1573	wq_spec.queue_size = rxq->gdma_rq->queue_size;
   1574
   1575	cq_spec.gdma_region = cq->gdma_cq->mem_info.gdma_region;
   1576	cq_spec.queue_size = cq->gdma_cq->queue_size;
   1577	cq_spec.modr_ctx_id = 0;
   1578	cq_spec.attached_eq = cq->gdma_cq->cq.parent->id;
   1579
   1580	err = mana_create_wq_obj(apc, apc->port_handle, GDMA_RQ,
   1581				 &wq_spec, &cq_spec, &rxq->rxobj);
   1582	if (err)
   1583		goto out;
   1584
   1585	rxq->gdma_rq->id = wq_spec.queue_index;
   1586	cq->gdma_cq->id = cq_spec.queue_index;
   1587
   1588	rxq->gdma_rq->mem_info.gdma_region = GDMA_INVALID_DMA_REGION;
   1589	cq->gdma_cq->mem_info.gdma_region = GDMA_INVALID_DMA_REGION;
   1590
   1591	rxq->gdma_id = rxq->gdma_rq->id;
   1592	cq->gdma_id = cq->gdma_cq->id;
   1593
   1594	err = mana_push_wqe(rxq);
   1595	if (err)
   1596		goto out;
   1597
   1598	if (WARN_ON(cq->gdma_id >= gc->max_num_cqs)) {
   1599		err = -EINVAL;
   1600		goto out;
   1601	}
   1602
   1603	gc->cq_table[cq->gdma_id] = cq->gdma_cq;
   1604
   1605	netif_napi_add_weight(ndev, &cq->napi, mana_poll, 1);
   1606
   1607	WARN_ON(xdp_rxq_info_reg(&rxq->xdp_rxq, ndev, rxq_idx,
   1608				 cq->napi.napi_id));
   1609	WARN_ON(xdp_rxq_info_reg_mem_model(&rxq->xdp_rxq,
   1610					   MEM_TYPE_PAGE_SHARED, NULL));
   1611
   1612	napi_enable(&cq->napi);
   1613
   1614	mana_gd_ring_cq(cq->gdma_cq, SET_ARM_BIT);
   1615out:
   1616	if (!err)
   1617		return rxq;
   1618
   1619	netdev_err(ndev, "Failed to create RXQ: err = %d\n", err);
   1620
   1621	mana_destroy_rxq(apc, rxq, false);
   1622
   1623	if (cq)
   1624		mana_deinit_cq(apc, cq);
   1625
   1626	return NULL;
   1627}
   1628
   1629static int mana_add_rx_queues(struct mana_port_context *apc,
   1630			      struct net_device *ndev)
   1631{
   1632	struct mana_context *ac = apc->ac;
   1633	struct mana_rxq *rxq;
   1634	int err = 0;
   1635	int i;
   1636
   1637	for (i = 0; i < apc->num_queues; i++) {
   1638		rxq = mana_create_rxq(apc, i, &ac->eqs[i], ndev);
   1639		if (!rxq) {
   1640			err = -ENOMEM;
   1641			goto out;
   1642		}
   1643
   1644		u64_stats_init(&rxq->stats.syncp);
   1645
   1646		apc->rxqs[i] = rxq;
   1647	}
   1648
   1649	apc->default_rxobj = apc->rxqs[0]->rxobj;
   1650out:
   1651	return err;
   1652}
   1653
   1654static void mana_destroy_vport(struct mana_port_context *apc)
   1655{
   1656	struct mana_rxq *rxq;
   1657	u32 rxq_idx;
   1658
   1659	for (rxq_idx = 0; rxq_idx < apc->num_queues; rxq_idx++) {
   1660		rxq = apc->rxqs[rxq_idx];
   1661		if (!rxq)
   1662			continue;
   1663
   1664		mana_destroy_rxq(apc, rxq, true);
   1665		apc->rxqs[rxq_idx] = NULL;
   1666	}
   1667
   1668	mana_destroy_txq(apc);
   1669}
   1670
   1671static int mana_create_vport(struct mana_port_context *apc,
   1672			     struct net_device *net)
   1673{
   1674	struct gdma_dev *gd = apc->ac->gdma_dev;
   1675	int err;
   1676
   1677	apc->default_rxobj = INVALID_MANA_HANDLE;
   1678
   1679	err = mana_cfg_vport(apc, gd->pdid, gd->doorbell);
   1680	if (err)
   1681		return err;
   1682
   1683	return mana_create_txq(apc, net);
   1684}
   1685
   1686static void mana_rss_table_init(struct mana_port_context *apc)
   1687{
   1688	int i;
   1689
   1690	for (i = 0; i < MANA_INDIRECT_TABLE_SIZE; i++)
   1691		apc->indir_table[i] =
   1692			ethtool_rxfh_indir_default(i, apc->num_queues);
   1693}
   1694
   1695int mana_config_rss(struct mana_port_context *apc, enum TRI_STATE rx,
   1696		    bool update_hash, bool update_tab)
   1697{
   1698	u32 queue_idx;
   1699	int err;
   1700	int i;
   1701
   1702	if (update_tab) {
   1703		for (i = 0; i < MANA_INDIRECT_TABLE_SIZE; i++) {
   1704			queue_idx = apc->indir_table[i];
   1705			apc->rxobj_table[i] = apc->rxqs[queue_idx]->rxobj;
   1706		}
   1707	}
   1708
   1709	err = mana_cfg_vport_steering(apc, rx, true, update_hash, update_tab);
   1710	if (err)
   1711		return err;
   1712
   1713	mana_fence_rqs(apc);
   1714
   1715	return 0;
   1716}
   1717
   1718static int mana_init_port(struct net_device *ndev)
   1719{
   1720	struct mana_port_context *apc = netdev_priv(ndev);
   1721	u32 max_txq, max_rxq, max_queues;
   1722	int port_idx = apc->port_idx;
   1723	u32 num_indirect_entries;
   1724	int err;
   1725
   1726	err = mana_init_port_context(apc);
   1727	if (err)
   1728		return err;
   1729
   1730	err = mana_query_vport_cfg(apc, port_idx, &max_txq, &max_rxq,
   1731				   &num_indirect_entries);
   1732	if (err) {
   1733		netdev_err(ndev, "Failed to query info for vPort %d\n",
   1734			   port_idx);
   1735		goto reset_apc;
   1736	}
   1737
   1738	max_queues = min_t(u32, max_txq, max_rxq);
   1739	if (apc->max_queues > max_queues)
   1740		apc->max_queues = max_queues;
   1741
   1742	if (apc->num_queues > apc->max_queues)
   1743		apc->num_queues = apc->max_queues;
   1744
   1745	eth_hw_addr_set(ndev, apc->mac_addr);
   1746
   1747	return 0;
   1748
   1749reset_apc:
   1750	kfree(apc->rxqs);
   1751	apc->rxqs = NULL;
   1752	return err;
   1753}
   1754
   1755int mana_alloc_queues(struct net_device *ndev)
   1756{
   1757	struct mana_port_context *apc = netdev_priv(ndev);
   1758	int err;
   1759
   1760	err = mana_create_vport(apc, ndev);
   1761	if (err)
   1762		return err;
   1763
   1764	err = netif_set_real_num_tx_queues(ndev, apc->num_queues);
   1765	if (err)
   1766		goto destroy_vport;
   1767
   1768	err = mana_add_rx_queues(apc, ndev);
   1769	if (err)
   1770		goto destroy_vport;
   1771
   1772	apc->rss_state = apc->num_queues > 1 ? TRI_STATE_TRUE : TRI_STATE_FALSE;
   1773
   1774	err = netif_set_real_num_rx_queues(ndev, apc->num_queues);
   1775	if (err)
   1776		goto destroy_vport;
   1777
   1778	mana_rss_table_init(apc);
   1779
   1780	err = mana_config_rss(apc, TRI_STATE_TRUE, true, true);
   1781	if (err)
   1782		goto destroy_vport;
   1783
   1784	mana_chn_setxdp(apc, mana_xdp_get(apc));
   1785
   1786	return 0;
   1787
   1788destroy_vport:
   1789	mana_destroy_vport(apc);
   1790	return err;
   1791}
   1792
   1793int mana_attach(struct net_device *ndev)
   1794{
   1795	struct mana_port_context *apc = netdev_priv(ndev);
   1796	int err;
   1797
   1798	ASSERT_RTNL();
   1799
   1800	err = mana_init_port(ndev);
   1801	if (err)
   1802		return err;
   1803
   1804	if (apc->port_st_save) {
   1805		err = mana_alloc_queues(ndev);
   1806		if (err) {
   1807			mana_cleanup_port_context(apc);
   1808			return err;
   1809		}
   1810	}
   1811
   1812	apc->port_is_up = apc->port_st_save;
   1813
   1814	/* Ensure port state updated before txq state */
   1815	smp_wmb();
   1816
   1817	if (apc->port_is_up)
   1818		netif_carrier_on(ndev);
   1819
   1820	netif_device_attach(ndev);
   1821
   1822	return 0;
   1823}
   1824
   1825static int mana_dealloc_queues(struct net_device *ndev)
   1826{
   1827	struct mana_port_context *apc = netdev_priv(ndev);
   1828	struct mana_txq *txq;
   1829	int i, err;
   1830
   1831	if (apc->port_is_up)
   1832		return -EINVAL;
   1833
   1834	mana_chn_setxdp(apc, NULL);
   1835
   1836	/* No packet can be transmitted now since apc->port_is_up is false.
   1837	 * There is still a tiny chance that mana_poll_tx_cq() can re-enable
   1838	 * a txq because it may not timely see apc->port_is_up being cleared
   1839	 * to false, but it doesn't matter since mana_start_xmit() drops any
   1840	 * new packets due to apc->port_is_up being false.
   1841	 *
   1842	 * Drain all the in-flight TX packets
   1843	 */
   1844	for (i = 0; i < apc->num_queues; i++) {
   1845		txq = &apc->tx_qp[i].txq;
   1846
   1847		while (atomic_read(&txq->pending_sends) > 0)
   1848			usleep_range(1000, 2000);
   1849	}
   1850
   1851	/* We're 100% sure the queues can no longer be woken up, because
   1852	 * we're sure now mana_poll_tx_cq() can't be running.
   1853	 */
   1854
   1855	apc->rss_state = TRI_STATE_FALSE;
   1856	err = mana_config_rss(apc, TRI_STATE_FALSE, false, false);
   1857	if (err) {
   1858		netdev_err(ndev, "Failed to disable vPort: %d\n", err);
   1859		return err;
   1860	}
   1861
   1862	mana_destroy_vport(apc);
   1863
   1864	return 0;
   1865}
   1866
   1867int mana_detach(struct net_device *ndev, bool from_close)
   1868{
   1869	struct mana_port_context *apc = netdev_priv(ndev);
   1870	int err;
   1871
   1872	ASSERT_RTNL();
   1873
   1874	apc->port_st_save = apc->port_is_up;
   1875	apc->port_is_up = false;
   1876
   1877	/* Ensure port state updated before txq state */
   1878	smp_wmb();
   1879
   1880	netif_tx_disable(ndev);
   1881	netif_carrier_off(ndev);
   1882
   1883	if (apc->port_st_save) {
   1884		err = mana_dealloc_queues(ndev);
   1885		if (err)
   1886			return err;
   1887	}
   1888
   1889	if (!from_close) {
   1890		netif_device_detach(ndev);
   1891		mana_cleanup_port_context(apc);
   1892	}
   1893
   1894	return 0;
   1895}
   1896
   1897static int mana_probe_port(struct mana_context *ac, int port_idx,
   1898			   struct net_device **ndev_storage)
   1899{
   1900	struct gdma_context *gc = ac->gdma_dev->gdma_context;
   1901	struct mana_port_context *apc;
   1902	struct net_device *ndev;
   1903	int err;
   1904
   1905	ndev = alloc_etherdev_mq(sizeof(struct mana_port_context),
   1906				 gc->max_num_queues);
   1907	if (!ndev)
   1908		return -ENOMEM;
   1909
   1910	*ndev_storage = ndev;
   1911
   1912	apc = netdev_priv(ndev);
   1913	apc->ac = ac;
   1914	apc->ndev = ndev;
   1915	apc->max_queues = gc->max_num_queues;
   1916	apc->num_queues = gc->max_num_queues;
   1917	apc->port_handle = INVALID_MANA_HANDLE;
   1918	apc->port_idx = port_idx;
   1919
   1920	ndev->netdev_ops = &mana_devops;
   1921	ndev->ethtool_ops = &mana_ethtool_ops;
   1922	ndev->mtu = ETH_DATA_LEN;
   1923	ndev->max_mtu = ndev->mtu;
   1924	ndev->min_mtu = ndev->mtu;
   1925	ndev->needed_headroom = MANA_HEADROOM;
   1926	SET_NETDEV_DEV(ndev, gc->dev);
   1927
   1928	netif_carrier_off(ndev);
   1929
   1930	netdev_rss_key_fill(apc->hashkey, MANA_HASH_KEY_SIZE);
   1931
   1932	err = mana_init_port(ndev);
   1933	if (err)
   1934		goto free_net;
   1935
   1936	netdev_lockdep_set_classes(ndev);
   1937
   1938	ndev->hw_features = NETIF_F_SG | NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM;
   1939	ndev->hw_features |= NETIF_F_RXCSUM;
   1940	ndev->hw_features |= NETIF_F_TSO | NETIF_F_TSO6;
   1941	ndev->hw_features |= NETIF_F_RXHASH;
   1942	ndev->features = ndev->hw_features;
   1943	ndev->vlan_features = 0;
   1944
   1945	err = register_netdev(ndev);
   1946	if (err) {
   1947		netdev_err(ndev, "Unable to register netdev.\n");
   1948		goto reset_apc;
   1949	}
   1950
   1951	return 0;
   1952
   1953reset_apc:
   1954	kfree(apc->rxqs);
   1955	apc->rxqs = NULL;
   1956free_net:
   1957	*ndev_storage = NULL;
   1958	netdev_err(ndev, "Failed to probe vPort %d: %d\n", port_idx, err);
   1959	free_netdev(ndev);
   1960	return err;
   1961}
   1962
   1963int mana_probe(struct gdma_dev *gd, bool resuming)
   1964{
   1965	struct gdma_context *gc = gd->gdma_context;
   1966	struct mana_context *ac = gd->driver_data;
   1967	struct device *dev = gc->dev;
   1968	u16 num_ports = 0;
   1969	int err;
   1970	int i;
   1971
   1972	dev_info(dev,
   1973		 "Microsoft Azure Network Adapter protocol version: %d.%d.%d\n",
   1974		 MANA_MAJOR_VERSION, MANA_MINOR_VERSION, MANA_MICRO_VERSION);
   1975
   1976	err = mana_gd_register_device(gd);
   1977	if (err)
   1978		return err;
   1979
   1980	if (!resuming) {
   1981		ac = kzalloc(sizeof(*ac), GFP_KERNEL);
   1982		if (!ac)
   1983			return -ENOMEM;
   1984
   1985		ac->gdma_dev = gd;
   1986		gd->driver_data = ac;
   1987	}
   1988
   1989	err = mana_create_eq(ac);
   1990	if (err)
   1991		goto out;
   1992
   1993	err = mana_query_device_cfg(ac, MANA_MAJOR_VERSION, MANA_MINOR_VERSION,
   1994				    MANA_MICRO_VERSION, &num_ports);
   1995	if (err)
   1996		goto out;
   1997
   1998	if (!resuming) {
   1999		ac->num_ports = num_ports;
   2000	} else {
   2001		if (ac->num_ports != num_ports) {
   2002			dev_err(dev, "The number of vPorts changed: %d->%d\n",
   2003				ac->num_ports, num_ports);
   2004			err = -EPROTO;
   2005			goto out;
   2006		}
   2007	}
   2008
   2009	if (ac->num_ports == 0)
   2010		dev_err(dev, "Failed to detect any vPort\n");
   2011
   2012	if (ac->num_ports > MAX_PORTS_IN_MANA_DEV)
   2013		ac->num_ports = MAX_PORTS_IN_MANA_DEV;
   2014
   2015	if (!resuming) {
   2016		for (i = 0; i < ac->num_ports; i++) {
   2017			err = mana_probe_port(ac, i, &ac->ports[i]);
   2018			if (err)
   2019				break;
   2020		}
   2021	} else {
   2022		for (i = 0; i < ac->num_ports; i++) {
   2023			rtnl_lock();
   2024			err = mana_attach(ac->ports[i]);
   2025			rtnl_unlock();
   2026			if (err)
   2027				break;
   2028		}
   2029	}
   2030out:
   2031	if (err)
   2032		mana_remove(gd, false);
   2033
   2034	return err;
   2035}
   2036
   2037void mana_remove(struct gdma_dev *gd, bool suspending)
   2038{
   2039	struct gdma_context *gc = gd->gdma_context;
   2040	struct mana_context *ac = gd->driver_data;
   2041	struct device *dev = gc->dev;
   2042	struct net_device *ndev;
   2043	int err;
   2044	int i;
   2045
   2046	for (i = 0; i < ac->num_ports; i++) {
   2047		ndev = ac->ports[i];
   2048		if (!ndev) {
   2049			if (i == 0)
   2050				dev_err(dev, "No net device to remove\n");
   2051			goto out;
   2052		}
   2053
   2054		/* All cleanup actions should stay after rtnl_lock(), otherwise
   2055		 * other functions may access partially cleaned up data.
   2056		 */
   2057		rtnl_lock();
   2058
   2059		err = mana_detach(ndev, false);
   2060		if (err)
   2061			netdev_err(ndev, "Failed to detach vPort %d: %d\n",
   2062				   i, err);
   2063
   2064		if (suspending) {
   2065			/* No need to unregister the ndev. */
   2066			rtnl_unlock();
   2067			continue;
   2068		}
   2069
   2070		unregister_netdevice(ndev);
   2071
   2072		rtnl_unlock();
   2073
   2074		free_netdev(ndev);
   2075	}
   2076
   2077	mana_destroy_eq(ac);
   2078
   2079out:
   2080	mana_gd_deregister_device(gd);
   2081
   2082	if (suspending)
   2083		return;
   2084
   2085	gd->driver_data = NULL;
   2086	gd->gdma_context = NULL;
   2087	kfree(ac);
   2088}