cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

funeth_tx.c (20357B)


      1// SPDX-License-Identifier: (GPL-2.0-only OR BSD-3-Clause)
      2
      3#include <linux/dma-mapping.h>
      4#include <linux/ip.h>
      5#include <linux/pci.h>
      6#include <linux/skbuff.h>
      7#include <linux/tcp.h>
      8#include <uapi/linux/udp.h>
      9#include "funeth.h"
     10#include "funeth_ktls.h"
     11#include "funeth_txrx.h"
     12#include "funeth_trace.h"
     13#include "fun_queue.h"
     14
     15#define FUN_XDP_CLEAN_THRES 32
     16#define FUN_XDP_CLEAN_BATCH 16
     17
     18/* DMA-map a packet and return the (length, DMA_address) pairs for its
     19 * segments. If a mapping error occurs -ENOMEM is returned.
     20 */
     21static int map_skb(const struct sk_buff *skb, struct device *dev,
     22		   dma_addr_t *addr, unsigned int *len)
     23{
     24	const struct skb_shared_info *si;
     25	const skb_frag_t *fp, *end;
     26
     27	*len = skb_headlen(skb);
     28	*addr = dma_map_single(dev, skb->data, *len, DMA_TO_DEVICE);
     29	if (dma_mapping_error(dev, *addr))
     30		return -ENOMEM;
     31
     32	si = skb_shinfo(skb);
     33	end = &si->frags[si->nr_frags];
     34
     35	for (fp = si->frags; fp < end; fp++) {
     36		*++len = skb_frag_size(fp);
     37		*++addr = skb_frag_dma_map(dev, fp, 0, *len, DMA_TO_DEVICE);
     38		if (dma_mapping_error(dev, *addr))
     39			goto unwind;
     40	}
     41	return 0;
     42
     43unwind:
     44	while (fp-- > si->frags)
     45		dma_unmap_page(dev, *--addr, skb_frag_size(fp), DMA_TO_DEVICE);
     46
     47	dma_unmap_single(dev, addr[-1], skb_headlen(skb), DMA_TO_DEVICE);
     48	return -ENOMEM;
     49}
     50
     51/* Return the address just past the end of a Tx queue's descriptor ring.
     52 * It exploits the fact that the HW writeback area is just after the end
     53 * of the descriptor ring.
     54 */
     55static void *txq_end(const struct funeth_txq *q)
     56{
     57	return (void *)q->hw_wb;
     58}
     59
     60/* Return the amount of space within a Tx ring from the given address to the
     61 * end.
     62 */
     63static unsigned int txq_to_end(const struct funeth_txq *q, void *p)
     64{
     65	return txq_end(q) - p;
     66}
     67
     68/* Return the number of Tx descriptors occupied by a Tx request. */
     69static unsigned int tx_req_ndesc(const struct fun_eth_tx_req *req)
     70{
     71	return DIV_ROUND_UP(req->len8, FUNETH_SQE_SIZE / 8);
     72}
     73
     74static __be16 tcp_hdr_doff_flags(const struct tcphdr *th)
     75{
     76	return *(__be16 *)&tcp_flag_word(th);
     77}
     78
     79static struct sk_buff *fun_tls_tx(struct sk_buff *skb, struct funeth_txq *q,
     80				  unsigned int *tls_len)
     81{
     82#if IS_ENABLED(CONFIG_TLS_DEVICE)
     83	const struct fun_ktls_tx_ctx *tls_ctx;
     84	u32 datalen, seq;
     85
     86	datalen = skb->len - (skb_transport_offset(skb) + tcp_hdrlen(skb));
     87	if (!datalen)
     88		return skb;
     89
     90	if (likely(!tls_offload_tx_resync_pending(skb->sk))) {
     91		seq = ntohl(tcp_hdr(skb)->seq);
     92		tls_ctx = tls_driver_ctx(skb->sk, TLS_OFFLOAD_CTX_DIR_TX);
     93
     94		if (likely(tls_ctx->next_seq == seq)) {
     95			*tls_len = datalen;
     96			return skb;
     97		}
     98		if (seq - tls_ctx->next_seq < U32_MAX / 4) {
     99			tls_offload_tx_resync_request(skb->sk, seq,
    100						      tls_ctx->next_seq);
    101		}
    102	}
    103
    104	FUN_QSTAT_INC(q, tx_tls_fallback);
    105	skb = tls_encrypt_skb(skb);
    106	if (!skb)
    107		FUN_QSTAT_INC(q, tx_tls_drops);
    108
    109	return skb;
    110#else
    111	return NULL;
    112#endif
    113}
    114
    115/* Write as many descriptors as needed for the supplied skb starting at the
    116 * current producer location. The caller has made certain enough descriptors
    117 * are available.
    118 *
    119 * Returns the number of descriptors written, 0 on error.
    120 */
    121static unsigned int write_pkt_desc(struct sk_buff *skb, struct funeth_txq *q,
    122				   unsigned int tls_len)
    123{
    124	unsigned int extra_bytes = 0, extra_pkts = 0;
    125	unsigned int idx = q->prod_cnt & q->mask;
    126	const struct skb_shared_info *shinfo;
    127	unsigned int lens[MAX_SKB_FRAGS + 1];
    128	dma_addr_t addrs[MAX_SKB_FRAGS + 1];
    129	struct fun_eth_tx_req *req;
    130	struct fun_dataop_gl *gle;
    131	const struct tcphdr *th;
    132	unsigned int ngle, i;
    133	u16 flags;
    134
    135	if (unlikely(map_skb(skb, q->dma_dev, addrs, lens))) {
    136		FUN_QSTAT_INC(q, tx_map_err);
    137		return 0;
    138	}
    139
    140	req = fun_tx_desc_addr(q, idx);
    141	req->op = FUN_ETH_OP_TX;
    142	req->len8 = 0;
    143	req->flags = 0;
    144	req->suboff8 = offsetof(struct fun_eth_tx_req, dataop);
    145	req->repr_idn = 0;
    146	req->encap_proto = 0;
    147
    148	shinfo = skb_shinfo(skb);
    149	if (likely(shinfo->gso_size)) {
    150		if (skb->encapsulation) {
    151			u16 ol4_ofst;
    152
    153			flags = FUN_ETH_OUTER_EN | FUN_ETH_INNER_LSO |
    154				FUN_ETH_UPDATE_INNER_L4_CKSUM |
    155				FUN_ETH_UPDATE_OUTER_L3_LEN;
    156			if (shinfo->gso_type & (SKB_GSO_UDP_TUNNEL |
    157						SKB_GSO_UDP_TUNNEL_CSUM)) {
    158				flags |= FUN_ETH_UPDATE_OUTER_L4_LEN |
    159					 FUN_ETH_OUTER_UDP;
    160				if (shinfo->gso_type & SKB_GSO_UDP_TUNNEL_CSUM)
    161					flags |= FUN_ETH_UPDATE_OUTER_L4_CKSUM;
    162				ol4_ofst = skb_transport_offset(skb);
    163			} else {
    164				ol4_ofst = skb_inner_network_offset(skb);
    165			}
    166
    167			if (ip_hdr(skb)->version == 4)
    168				flags |= FUN_ETH_UPDATE_OUTER_L3_CKSUM;
    169			else
    170				flags |= FUN_ETH_OUTER_IPV6;
    171
    172			if (skb->inner_network_header) {
    173				if (inner_ip_hdr(skb)->version == 4)
    174					flags |= FUN_ETH_UPDATE_INNER_L3_CKSUM |
    175						 FUN_ETH_UPDATE_INNER_L3_LEN;
    176				else
    177					flags |= FUN_ETH_INNER_IPV6 |
    178						 FUN_ETH_UPDATE_INNER_L3_LEN;
    179			}
    180			th = inner_tcp_hdr(skb);
    181			fun_eth_offload_init(&req->offload, flags,
    182					     shinfo->gso_size,
    183					     tcp_hdr_doff_flags(th), 0,
    184					     skb_inner_network_offset(skb),
    185					     skb_inner_transport_offset(skb),
    186					     skb_network_offset(skb), ol4_ofst);
    187			FUN_QSTAT_INC(q, tx_encap_tso);
    188		} else {
    189			/* HW considers one set of headers as inner */
    190			flags = FUN_ETH_INNER_LSO |
    191				FUN_ETH_UPDATE_INNER_L4_CKSUM |
    192				FUN_ETH_UPDATE_INNER_L3_LEN;
    193			if (shinfo->gso_type & SKB_GSO_TCPV6)
    194				flags |= FUN_ETH_INNER_IPV6;
    195			else
    196				flags |= FUN_ETH_UPDATE_INNER_L3_CKSUM;
    197			th = tcp_hdr(skb);
    198			fun_eth_offload_init(&req->offload, flags,
    199					     shinfo->gso_size,
    200					     tcp_hdr_doff_flags(th), 0,
    201					     skb_network_offset(skb),
    202					     skb_transport_offset(skb), 0, 0);
    203			FUN_QSTAT_INC(q, tx_tso);
    204		}
    205
    206		u64_stats_update_begin(&q->syncp);
    207		q->stats.tx_cso += shinfo->gso_segs;
    208		u64_stats_update_end(&q->syncp);
    209
    210		extra_pkts = shinfo->gso_segs - 1;
    211		extra_bytes = (be16_to_cpu(req->offload.inner_l4_off) +
    212			       __tcp_hdrlen(th)) * extra_pkts;
    213	} else if (likely(skb->ip_summed == CHECKSUM_PARTIAL)) {
    214		flags = FUN_ETH_UPDATE_INNER_L4_CKSUM;
    215		if (skb->csum_offset == offsetof(struct udphdr, check))
    216			flags |= FUN_ETH_INNER_UDP;
    217		fun_eth_offload_init(&req->offload, flags, 0, 0, 0, 0,
    218				     skb_checksum_start_offset(skb), 0, 0);
    219		FUN_QSTAT_INC(q, tx_cso);
    220	} else {
    221		fun_eth_offload_init(&req->offload, 0, 0, 0, 0, 0, 0, 0, 0);
    222	}
    223
    224	ngle = shinfo->nr_frags + 1;
    225	req->len8 = (sizeof(*req) + ngle * sizeof(*gle)) / 8;
    226	req->dataop = FUN_DATAOP_HDR_INIT(ngle, 0, ngle, 0, skb->len);
    227
    228	for (i = 0, gle = (struct fun_dataop_gl *)req->dataop.imm;
    229	     i < ngle && txq_to_end(q, gle); i++, gle++)
    230		fun_dataop_gl_init(gle, 0, 0, lens[i], addrs[i]);
    231
    232	if (txq_to_end(q, gle) == 0) {
    233		gle = (struct fun_dataop_gl *)q->desc;
    234		for ( ; i < ngle; i++, gle++)
    235			fun_dataop_gl_init(gle, 0, 0, lens[i], addrs[i]);
    236	}
    237
    238	if (IS_ENABLED(CONFIG_TLS_DEVICE) && unlikely(tls_len)) {
    239		struct fun_eth_tls *tls = (struct fun_eth_tls *)gle;
    240		struct fun_ktls_tx_ctx *tls_ctx;
    241
    242		req->len8 += FUNETH_TLS_SZ / 8;
    243		req->flags = cpu_to_be16(FUN_ETH_TX_TLS);
    244
    245		tls_ctx = tls_driver_ctx(skb->sk, TLS_OFFLOAD_CTX_DIR_TX);
    246		tls->tlsid = tls_ctx->tlsid;
    247		tls_ctx->next_seq += tls_len;
    248
    249		u64_stats_update_begin(&q->syncp);
    250		q->stats.tx_tls_bytes += tls_len;
    251		q->stats.tx_tls_pkts += 1 + extra_pkts;
    252		u64_stats_update_end(&q->syncp);
    253	}
    254
    255	u64_stats_update_begin(&q->syncp);
    256	q->stats.tx_bytes += skb->len + extra_bytes;
    257	q->stats.tx_pkts += 1 + extra_pkts;
    258	u64_stats_update_end(&q->syncp);
    259
    260	q->info[idx].skb = skb;
    261
    262	trace_funeth_tx(q, skb->len, idx, req->dataop.ngather);
    263	return tx_req_ndesc(req);
    264}
    265
    266/* Return the number of available descriptors of a Tx queue.
    267 * HW assumes head==tail means the ring is empty so we need to keep one
    268 * descriptor unused.
    269 */
    270static unsigned int fun_txq_avail(const struct funeth_txq *q)
    271{
    272	return q->mask - q->prod_cnt + q->cons_cnt;
    273}
    274
    275/* Stop a queue if it can't handle another worst-case packet. */
    276static void fun_tx_check_stop(struct funeth_txq *q)
    277{
    278	if (likely(fun_txq_avail(q) >= FUNETH_MAX_PKT_DESC))
    279		return;
    280
    281	netif_tx_stop_queue(q->ndq);
    282
    283	/* NAPI reclaim is freeing packets in parallel with us and we may race.
    284	 * We have stopped the queue but check again after synchronizing with
    285	 * reclaim.
    286	 */
    287	smp_mb();
    288	if (likely(fun_txq_avail(q) < FUNETH_MAX_PKT_DESC))
    289		FUN_QSTAT_INC(q, tx_nstops);
    290	else
    291		netif_tx_start_queue(q->ndq);
    292}
    293
    294/* Return true if a queue has enough space to restart. Current condition is
    295 * that the queue must be >= 1/4 empty.
    296 */
    297static bool fun_txq_may_restart(struct funeth_txq *q)
    298{
    299	return fun_txq_avail(q) >= q->mask / 4;
    300}
    301
    302netdev_tx_t fun_start_xmit(struct sk_buff *skb, struct net_device *netdev)
    303{
    304	struct funeth_priv *fp = netdev_priv(netdev);
    305	unsigned int qid = skb_get_queue_mapping(skb);
    306	struct funeth_txq *q = fp->txqs[qid];
    307	unsigned int tls_len = 0;
    308	unsigned int ndesc;
    309
    310	if (IS_ENABLED(CONFIG_TLS_DEVICE) && skb->sk &&
    311	    tls_is_sk_tx_device_offloaded(skb->sk)) {
    312		skb = fun_tls_tx(skb, q, &tls_len);
    313		if (unlikely(!skb))
    314			goto dropped;
    315	}
    316
    317	ndesc = write_pkt_desc(skb, q, tls_len);
    318	if (unlikely(!ndesc)) {
    319		dev_kfree_skb_any(skb);
    320		goto dropped;
    321	}
    322
    323	q->prod_cnt += ndesc;
    324	fun_tx_check_stop(q);
    325
    326	skb_tx_timestamp(skb);
    327
    328	if (__netdev_tx_sent_queue(q->ndq, skb->len, netdev_xmit_more()))
    329		fun_txq_wr_db(q);
    330	else
    331		FUN_QSTAT_INC(q, tx_more);
    332
    333	return NETDEV_TX_OK;
    334
    335dropped:
    336	/* A dropped packet may be the last one in a xmit_more train,
    337	 * ring the doorbell just in case.
    338	 */
    339	if (!netdev_xmit_more())
    340		fun_txq_wr_db(q);
    341	return NETDEV_TX_OK;
    342}
    343
    344/* Return a Tx queue's HW head index written back to host memory. */
    345static u16 txq_hw_head(const struct funeth_txq *q)
    346{
    347	return (u16)be64_to_cpu(*q->hw_wb);
    348}
    349
    350/* Unmap the Tx packet starting at the given descriptor index and
    351 * return the number of Tx descriptors it occupied.
    352 */
    353static unsigned int unmap_skb(const struct funeth_txq *q, unsigned int idx)
    354{
    355	const struct fun_eth_tx_req *req = fun_tx_desc_addr(q, idx);
    356	unsigned int ngle = req->dataop.ngather;
    357	struct fun_dataop_gl *gle;
    358
    359	if (ngle) {
    360		gle = (struct fun_dataop_gl *)req->dataop.imm;
    361		dma_unmap_single(q->dma_dev, be64_to_cpu(gle->sgl_data),
    362				 be32_to_cpu(gle->sgl_len), DMA_TO_DEVICE);
    363
    364		for (gle++; --ngle && txq_to_end(q, gle); gle++)
    365			dma_unmap_page(q->dma_dev, be64_to_cpu(gle->sgl_data),
    366				       be32_to_cpu(gle->sgl_len),
    367				       DMA_TO_DEVICE);
    368
    369		for (gle = (struct fun_dataop_gl *)q->desc; ngle; ngle--, gle++)
    370			dma_unmap_page(q->dma_dev, be64_to_cpu(gle->sgl_data),
    371				       be32_to_cpu(gle->sgl_len),
    372				       DMA_TO_DEVICE);
    373	}
    374
    375	return tx_req_ndesc(req);
    376}
    377
    378/* Reclaim completed Tx descriptors and free their packets. Restart a stopped
    379 * queue if we freed enough descriptors.
    380 *
    381 * Return true if we exhausted the budget while there is more work to be done.
    382 */
    383static bool fun_txq_reclaim(struct funeth_txq *q, int budget)
    384{
    385	unsigned int npkts = 0, nbytes = 0, ndesc = 0;
    386	unsigned int head, limit, reclaim_idx;
    387
    388	/* budget may be 0, e.g., netpoll */
    389	limit = budget ? budget : UINT_MAX;
    390
    391	for (head = txq_hw_head(q), reclaim_idx = q->cons_cnt & q->mask;
    392	     head != reclaim_idx && npkts < limit; head = txq_hw_head(q)) {
    393		/* The HW head is continually updated, ensure we don't read
    394		 * descriptor state before the head tells us to reclaim it.
    395		 * On the enqueue side the doorbell is an implicit write
    396		 * barrier.
    397		 */
    398		rmb();
    399
    400		do {
    401			unsigned int pkt_desc = unmap_skb(q, reclaim_idx);
    402			struct sk_buff *skb = q->info[reclaim_idx].skb;
    403
    404			trace_funeth_tx_free(q, reclaim_idx, pkt_desc, head);
    405
    406			nbytes += skb->len;
    407			napi_consume_skb(skb, budget);
    408			ndesc += pkt_desc;
    409			reclaim_idx = (reclaim_idx + pkt_desc) & q->mask;
    410			npkts++;
    411		} while (reclaim_idx != head && npkts < limit);
    412	}
    413
    414	q->cons_cnt += ndesc;
    415	netdev_tx_completed_queue(q->ndq, npkts, nbytes);
    416	smp_mb(); /* pairs with the one in fun_tx_check_stop() */
    417
    418	if (unlikely(netif_tx_queue_stopped(q->ndq) &&
    419		     fun_txq_may_restart(q))) {
    420		netif_tx_wake_queue(q->ndq);
    421		FUN_QSTAT_INC(q, tx_nrestarts);
    422	}
    423
    424	return reclaim_idx != head;
    425}
    426
    427/* The NAPI handler for Tx queues. */
    428int fun_txq_napi_poll(struct napi_struct *napi, int budget)
    429{
    430	struct fun_irq *irq = container_of(napi, struct fun_irq, napi);
    431	struct funeth_txq *q = irq->txq;
    432	unsigned int db_val;
    433
    434	if (fun_txq_reclaim(q, budget))
    435		return budget;               /* exhausted budget */
    436
    437	napi_complete(napi);                 /* exhausted pending work */
    438	db_val = READ_ONCE(q->irq_db_val) | (q->cons_cnt & q->mask);
    439	writel(db_val, q->db);
    440	return 0;
    441}
    442
    443static void fun_xdp_unmap(const struct funeth_txq *q, unsigned int idx)
    444{
    445	const struct fun_eth_tx_req *req = fun_tx_desc_addr(q, idx);
    446	const struct fun_dataop_gl *gle;
    447
    448	gle = (const struct fun_dataop_gl *)req->dataop.imm;
    449	dma_unmap_single(q->dma_dev, be64_to_cpu(gle->sgl_data),
    450			 be32_to_cpu(gle->sgl_len), DMA_TO_DEVICE);
    451}
    452
    453/* Reclaim up to @budget completed Tx descriptors from a TX XDP queue. */
    454static unsigned int fun_xdpq_clean(struct funeth_txq *q, unsigned int budget)
    455{
    456	unsigned int npkts = 0, head, reclaim_idx;
    457
    458	for (head = txq_hw_head(q), reclaim_idx = q->cons_cnt & q->mask;
    459	     head != reclaim_idx && npkts < budget; head = txq_hw_head(q)) {
    460		/* The HW head is continually updated, ensure we don't read
    461		 * descriptor state before the head tells us to reclaim it.
    462		 * On the enqueue side the doorbell is an implicit write
    463		 * barrier.
    464		 */
    465		rmb();
    466
    467		do {
    468			fun_xdp_unmap(q, reclaim_idx);
    469			page_frag_free(q->info[reclaim_idx].vaddr);
    470
    471			trace_funeth_tx_free(q, reclaim_idx, 1, head);
    472
    473			reclaim_idx = (reclaim_idx + 1) & q->mask;
    474			npkts++;
    475		} while (reclaim_idx != head && npkts < budget);
    476	}
    477
    478	q->cons_cnt += npkts;
    479	return npkts;
    480}
    481
    482bool fun_xdp_tx(struct funeth_txq *q, void *data, unsigned int len)
    483{
    484	struct fun_eth_tx_req *req;
    485	struct fun_dataop_gl *gle;
    486	unsigned int idx;
    487	dma_addr_t dma;
    488
    489	if (fun_txq_avail(q) < FUN_XDP_CLEAN_THRES)
    490		fun_xdpq_clean(q, FUN_XDP_CLEAN_BATCH);
    491
    492	if (!unlikely(fun_txq_avail(q))) {
    493		FUN_QSTAT_INC(q, tx_xdp_full);
    494		return false;
    495	}
    496
    497	dma = dma_map_single(q->dma_dev, data, len, DMA_TO_DEVICE);
    498	if (unlikely(dma_mapping_error(q->dma_dev, dma))) {
    499		FUN_QSTAT_INC(q, tx_map_err);
    500		return false;
    501	}
    502
    503	idx = q->prod_cnt & q->mask;
    504	req = fun_tx_desc_addr(q, idx);
    505	req->op = FUN_ETH_OP_TX;
    506	req->len8 = (sizeof(*req) + sizeof(*gle)) / 8;
    507	req->flags = 0;
    508	req->suboff8 = offsetof(struct fun_eth_tx_req, dataop);
    509	req->repr_idn = 0;
    510	req->encap_proto = 0;
    511	fun_eth_offload_init(&req->offload, 0, 0, 0, 0, 0, 0, 0, 0);
    512	req->dataop = FUN_DATAOP_HDR_INIT(1, 0, 1, 0, len);
    513
    514	gle = (struct fun_dataop_gl *)req->dataop.imm;
    515	fun_dataop_gl_init(gle, 0, 0, len, dma);
    516
    517	q->info[idx].vaddr = data;
    518
    519	u64_stats_update_begin(&q->syncp);
    520	q->stats.tx_bytes += len;
    521	q->stats.tx_pkts++;
    522	u64_stats_update_end(&q->syncp);
    523
    524	trace_funeth_tx(q, len, idx, 1);
    525	q->prod_cnt++;
    526
    527	return true;
    528}
    529
    530int fun_xdp_xmit_frames(struct net_device *dev, int n,
    531			struct xdp_frame **frames, u32 flags)
    532{
    533	struct funeth_priv *fp = netdev_priv(dev);
    534	struct funeth_txq *q, **xdpqs;
    535	int i, q_idx;
    536
    537	if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK))
    538		return -EINVAL;
    539
    540	xdpqs = rcu_dereference_bh(fp->xdpqs);
    541	if (unlikely(!xdpqs))
    542		return -ENETDOWN;
    543
    544	q_idx = smp_processor_id();
    545	if (unlikely(q_idx >= fp->num_xdpqs))
    546		return -ENXIO;
    547
    548	for (q = xdpqs[q_idx], i = 0; i < n; i++) {
    549		const struct xdp_frame *xdpf = frames[i];
    550
    551		if (!fun_xdp_tx(q, xdpf->data, xdpf->len))
    552			break;
    553	}
    554
    555	if (unlikely(flags & XDP_XMIT_FLUSH))
    556		fun_txq_wr_db(q);
    557	return i;
    558}
    559
    560/* Purge a Tx queue of any queued packets. Should be called once HW access
    561 * to the packets has been revoked, e.g., after the queue has been disabled.
    562 */
    563static void fun_txq_purge(struct funeth_txq *q)
    564{
    565	while (q->cons_cnt != q->prod_cnt) {
    566		unsigned int idx = q->cons_cnt & q->mask;
    567
    568		q->cons_cnt += unmap_skb(q, idx);
    569		dev_kfree_skb_any(q->info[idx].skb);
    570	}
    571	netdev_tx_reset_queue(q->ndq);
    572}
    573
    574static void fun_xdpq_purge(struct funeth_txq *q)
    575{
    576	while (q->cons_cnt != q->prod_cnt) {
    577		unsigned int idx = q->cons_cnt & q->mask;
    578
    579		fun_xdp_unmap(q, idx);
    580		page_frag_free(q->info[idx].vaddr);
    581		q->cons_cnt++;
    582	}
    583}
    584
    585/* Create a Tx queue, allocating all the host resources needed. */
    586static struct funeth_txq *fun_txq_create_sw(struct net_device *dev,
    587					    unsigned int qidx,
    588					    unsigned int ndesc,
    589					    struct fun_irq *irq)
    590{
    591	struct funeth_priv *fp = netdev_priv(dev);
    592	struct funeth_txq *q;
    593	int numa_node;
    594
    595	if (irq)
    596		numa_node = fun_irq_node(irq); /* skb Tx queue */
    597	else
    598		numa_node = cpu_to_node(qidx); /* XDP Tx queue */
    599
    600	q = kzalloc_node(sizeof(*q), GFP_KERNEL, numa_node);
    601	if (!q)
    602		goto err;
    603
    604	q->dma_dev = &fp->pdev->dev;
    605	q->desc = fun_alloc_ring_mem(q->dma_dev, ndesc, FUNETH_SQE_SIZE,
    606				     sizeof(*q->info), true, numa_node,
    607				     &q->dma_addr, (void **)&q->info,
    608				     &q->hw_wb);
    609	if (!q->desc)
    610		goto free_q;
    611
    612	q->netdev = dev;
    613	q->mask = ndesc - 1;
    614	q->qidx = qidx;
    615	q->numa_node = numa_node;
    616	u64_stats_init(&q->syncp);
    617	q->init_state = FUN_QSTATE_INIT_SW;
    618	return q;
    619
    620free_q:
    621	kfree(q);
    622err:
    623	netdev_err(dev, "Can't allocate memory for %s queue %u\n",
    624		   irq ? "Tx" : "XDP", qidx);
    625	return NULL;
    626}
    627
    628static void fun_txq_free_sw(struct funeth_txq *q)
    629{
    630	struct funeth_priv *fp = netdev_priv(q->netdev);
    631
    632	fun_free_ring_mem(q->dma_dev, q->mask + 1, FUNETH_SQE_SIZE, true,
    633			  q->desc, q->dma_addr, q->info);
    634
    635	fp->tx_packets += q->stats.tx_pkts;
    636	fp->tx_bytes   += q->stats.tx_bytes;
    637	fp->tx_dropped += q->stats.tx_map_err;
    638
    639	kfree(q);
    640}
    641
    642/* Allocate the device portion of a Tx queue. */
    643int fun_txq_create_dev(struct funeth_txq *q, struct fun_irq *irq)
    644{
    645	struct funeth_priv *fp = netdev_priv(q->netdev);
    646	unsigned int irq_idx, ndesc = q->mask + 1;
    647	int err;
    648
    649	q->irq = irq;
    650	*q->hw_wb = 0;
    651	q->prod_cnt = 0;
    652	q->cons_cnt = 0;
    653	irq_idx = irq ? irq->irq_idx : 0;
    654
    655	err = fun_sq_create(fp->fdev,
    656			    FUN_ADMIN_EPSQ_CREATE_FLAG_HEAD_WB_ADDRESS |
    657			    FUN_ADMIN_RES_CREATE_FLAG_ALLOCATOR, 0,
    658			    FUN_HCI_ID_INVALID, ilog2(FUNETH_SQE_SIZE), ndesc,
    659			    q->dma_addr, fp->tx_coal_count, fp->tx_coal_usec,
    660			    irq_idx, 0, fp->fdev->kern_end_qid, 0,
    661			    &q->hw_qid, &q->db);
    662	if (err)
    663		goto out;
    664
    665	err = fun_create_and_bind_tx(fp, q->hw_qid);
    666	if (err < 0)
    667		goto free_devq;
    668	q->ethid = err;
    669
    670	if (irq) {
    671		irq->txq = q;
    672		q->ndq = netdev_get_tx_queue(q->netdev, q->qidx);
    673		q->irq_db_val = FUN_IRQ_SQ_DB(fp->tx_coal_usec,
    674					      fp->tx_coal_count);
    675		writel(q->irq_db_val, q->db);
    676	}
    677
    678	q->init_state = FUN_QSTATE_INIT_FULL;
    679	netif_info(fp, ifup, q->netdev,
    680		   "%s queue %u, depth %u, HW qid %u, IRQ idx %u, eth id %u, node %d\n",
    681		   irq ? "Tx" : "XDP", q->qidx, ndesc, q->hw_qid, irq_idx,
    682		   q->ethid, q->numa_node);
    683	return 0;
    684
    685free_devq:
    686	fun_destroy_sq(fp->fdev, q->hw_qid);
    687out:
    688	netdev_err(q->netdev,
    689		   "Failed to create %s queue %u on device, error %d\n",
    690		   irq ? "Tx" : "XDP", q->qidx, err);
    691	return err;
    692}
    693
    694static void fun_txq_free_dev(struct funeth_txq *q)
    695{
    696	struct funeth_priv *fp = netdev_priv(q->netdev);
    697
    698	if (q->init_state < FUN_QSTATE_INIT_FULL)
    699		return;
    700
    701	netif_info(fp, ifdown, q->netdev,
    702		   "Freeing %s queue %u (id %u), IRQ %u, ethid %u\n",
    703		   q->irq ? "Tx" : "XDP", q->qidx, q->hw_qid,
    704		   q->irq ? q->irq->irq_idx : 0, q->ethid);
    705
    706	fun_destroy_sq(fp->fdev, q->hw_qid);
    707	fun_res_destroy(fp->fdev, FUN_ADMIN_OP_ETH, 0, q->ethid);
    708
    709	if (q->irq) {
    710		q->irq->txq = NULL;
    711		fun_txq_purge(q);
    712	} else {
    713		fun_xdpq_purge(q);
    714	}
    715
    716	q->init_state = FUN_QSTATE_INIT_SW;
    717}
    718
    719/* Create or advance a Tx queue, allocating all the host and device resources
    720 * needed to reach the target state.
    721 */
    722int funeth_txq_create(struct net_device *dev, unsigned int qidx,
    723		      unsigned int ndesc, struct fun_irq *irq, int state,
    724		      struct funeth_txq **qp)
    725{
    726	struct funeth_txq *q = *qp;
    727	int err;
    728
    729	if (!q)
    730		q = fun_txq_create_sw(dev, qidx, ndesc, irq);
    731	if (!q)
    732		return -ENOMEM;
    733
    734	if (q->init_state >= state)
    735		goto out;
    736
    737	err = fun_txq_create_dev(q, irq);
    738	if (err) {
    739		if (!*qp)
    740			fun_txq_free_sw(q);
    741		return err;
    742	}
    743
    744out:
    745	*qp = q;
    746	return 0;
    747}
    748
    749/* Free Tx queue resources until it reaches the target state.
    750 * The queue must be already disconnected from the stack.
    751 */
    752struct funeth_txq *funeth_txq_free(struct funeth_txq *q, int state)
    753{
    754	if (state < FUN_QSTATE_INIT_FULL)
    755		fun_txq_free_dev(q);
    756
    757	if (state == FUN_QSTATE_DESTROYED) {
    758		fun_txq_free_sw(q);
    759		q = NULL;
    760	}
    761
    762	return q;
    763}