cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

en_tx.c (27547B)


      1/*
      2 * Copyright (c) 2015-2016, Mellanox Technologies. All rights reserved.
      3 *
      4 * This software is available to you under a choice of one of two
      5 * licenses.  You may choose to be licensed under the terms of the GNU
      6 * General Public License (GPL) Version 2, available from the file
      7 * COPYING in the main directory of this source tree, or the
      8 * OpenIB.org BSD license below:
      9 *
     10 *     Redistribution and use in source and binary forms, with or
     11 *     without modification, are permitted provided that the following
     12 *     conditions are met:
     13 *
     14 *      - Redistributions of source code must retain the above
     15 *        copyright notice, this list of conditions and the following
     16 *        disclaimer.
     17 *
     18 *      - Redistributions in binary form must reproduce the above
     19 *        copyright notice, this list of conditions and the following
     20 *        disclaimer in the documentation and/or other materials
     21 *        provided with the distribution.
     22 *
     23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
     24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
     25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
     26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
     27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
     28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
     29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
     30 * SOFTWARE.
     31 */
     32
     33#include <linux/tcp.h>
     34#include <linux/if_vlan.h>
     35#include <net/geneve.h>
     36#include <net/dsfield.h>
     37#include "en.h"
     38#include "en/txrx.h"
     39#include "ipoib/ipoib.h"
     40#include "en_accel/en_accel.h"
     41#include "en_accel/ipsec_rxtx.h"
     42#include "en/ptp.h"
     43#include <net/ipv6.h>
     44
     45static void mlx5e_dma_unmap_wqe_err(struct mlx5e_txqsq *sq, u8 num_dma)
     46{
     47	int i;
     48
     49	for (i = 0; i < num_dma; i++) {
     50		struct mlx5e_sq_dma *last_pushed_dma =
     51			mlx5e_dma_get(sq, --sq->dma_fifo_pc);
     52
     53		mlx5e_tx_dma_unmap(sq->pdev, last_pushed_dma);
     54	}
     55}
     56
     57static inline int mlx5e_skb_l2_header_offset(struct sk_buff *skb)
     58{
     59#define MLX5E_MIN_INLINE (ETH_HLEN + VLAN_HLEN)
     60
     61	return max(skb_network_offset(skb), MLX5E_MIN_INLINE);
     62}
     63
     64static inline int mlx5e_skb_l3_header_offset(struct sk_buff *skb)
     65{
     66	if (skb_transport_header_was_set(skb))
     67		return skb_transport_offset(skb);
     68	else
     69		return mlx5e_skb_l2_header_offset(skb);
     70}
     71
     72static inline u16 mlx5e_calc_min_inline(enum mlx5_inline_modes mode,
     73					struct sk_buff *skb)
     74{
     75	u16 hlen;
     76
     77	switch (mode) {
     78	case MLX5_INLINE_MODE_NONE:
     79		return 0;
     80	case MLX5_INLINE_MODE_TCP_UDP:
     81		hlen = eth_get_headlen(skb->dev, skb->data, skb_headlen(skb));
     82		if (hlen == ETH_HLEN && !skb_vlan_tag_present(skb))
     83			hlen += VLAN_HLEN;
     84		break;
     85	case MLX5_INLINE_MODE_IP:
     86		hlen = mlx5e_skb_l3_header_offset(skb);
     87		break;
     88	case MLX5_INLINE_MODE_L2:
     89	default:
     90		hlen = mlx5e_skb_l2_header_offset(skb);
     91	}
     92	return min_t(u16, hlen, skb_headlen(skb));
     93}
     94
     95#define MLX5_UNSAFE_MEMCPY_DISCLAIMER				\
     96	"This copy has been bounds-checked earlier in "		\
     97	"mlx5i_sq_calc_wqe_attr() and intentionally "		\
     98	"crosses a flex array boundary. Since it is "		\
     99	"performance sensitive, splitting the copy is "		\
    100	"undesirable."
    101
    102static inline void mlx5e_insert_vlan(void *start, struct sk_buff *skb, u16 ihs)
    103{
    104	struct vlan_ethhdr *vhdr = (struct vlan_ethhdr *)start;
    105	int cpy1_sz = 2 * ETH_ALEN;
    106	int cpy2_sz = ihs - cpy1_sz;
    107
    108	memcpy(&vhdr->addrs, skb->data, cpy1_sz);
    109	vhdr->h_vlan_proto = skb->vlan_proto;
    110	vhdr->h_vlan_TCI = cpu_to_be16(skb_vlan_tag_get(skb));
    111	unsafe_memcpy(&vhdr->h_vlan_encapsulated_proto,
    112		      skb->data + cpy1_sz,
    113		      cpy2_sz,
    114		      MLX5_UNSAFE_MEMCPY_DISCLAIMER);
    115}
    116
    117static inline void
    118mlx5e_txwqe_build_eseg_csum(struct mlx5e_txqsq *sq, struct sk_buff *skb,
    119			    struct mlx5e_accel_tx_state *accel,
    120			    struct mlx5_wqe_eth_seg *eseg)
    121{
    122	if (unlikely(mlx5e_ipsec_txwqe_build_eseg_csum(sq, skb, eseg)))
    123		return;
    124
    125	if (likely(skb->ip_summed == CHECKSUM_PARTIAL)) {
    126		eseg->cs_flags = MLX5_ETH_WQE_L3_CSUM;
    127		if (skb->encapsulation) {
    128			eseg->cs_flags |= MLX5_ETH_WQE_L3_INNER_CSUM |
    129					  MLX5_ETH_WQE_L4_INNER_CSUM;
    130			sq->stats->csum_partial_inner++;
    131		} else {
    132			eseg->cs_flags |= MLX5_ETH_WQE_L4_CSUM;
    133			sq->stats->csum_partial++;
    134		}
    135#ifdef CONFIG_MLX5_EN_TLS
    136	} else if (unlikely(accel && accel->tls.tls_tisn)) {
    137		eseg->cs_flags = MLX5_ETH_WQE_L3_CSUM | MLX5_ETH_WQE_L4_CSUM;
    138		sq->stats->csum_partial++;
    139#endif
    140	} else
    141		sq->stats->csum_none++;
    142}
    143
    144/* Returns the number of header bytes that we plan
    145 * to inline later in the transmit descriptor
    146 */
    147static inline u16
    148mlx5e_tx_get_gso_ihs(struct mlx5e_txqsq *sq, struct sk_buff *skb, int *hopbyhop)
    149{
    150	struct mlx5e_sq_stats *stats = sq->stats;
    151	u16 ihs;
    152
    153	*hopbyhop = 0;
    154	if (skb->encapsulation) {
    155		ihs = skb_inner_transport_offset(skb) + inner_tcp_hdrlen(skb);
    156		stats->tso_inner_packets++;
    157		stats->tso_inner_bytes += skb->len - ihs;
    158	} else {
    159		if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4) {
    160			ihs = skb_transport_offset(skb) + sizeof(struct udphdr);
    161		} else {
    162			ihs = skb_transport_offset(skb) + tcp_hdrlen(skb);
    163			if (ipv6_has_hopopt_jumbo(skb)) {
    164				*hopbyhop = sizeof(struct hop_jumbo_hdr);
    165				ihs -= sizeof(struct hop_jumbo_hdr);
    166			}
    167		}
    168		stats->tso_packets++;
    169		stats->tso_bytes += skb->len - ihs - *hopbyhop;
    170	}
    171
    172	return ihs;
    173}
    174
    175static inline int
    176mlx5e_txwqe_build_dsegs(struct mlx5e_txqsq *sq, struct sk_buff *skb,
    177			unsigned char *skb_data, u16 headlen,
    178			struct mlx5_wqe_data_seg *dseg)
    179{
    180	dma_addr_t dma_addr = 0;
    181	u8 num_dma          = 0;
    182	int i;
    183
    184	if (headlen) {
    185		dma_addr = dma_map_single(sq->pdev, skb_data, headlen,
    186					  DMA_TO_DEVICE);
    187		if (unlikely(dma_mapping_error(sq->pdev, dma_addr)))
    188			goto dma_unmap_wqe_err;
    189
    190		dseg->addr       = cpu_to_be64(dma_addr);
    191		dseg->lkey       = sq->mkey_be;
    192		dseg->byte_count = cpu_to_be32(headlen);
    193
    194		mlx5e_dma_push(sq, dma_addr, headlen, MLX5E_DMA_MAP_SINGLE);
    195		num_dma++;
    196		dseg++;
    197	}
    198
    199	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
    200		skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
    201		int fsz = skb_frag_size(frag);
    202
    203		dma_addr = skb_frag_dma_map(sq->pdev, frag, 0, fsz,
    204					    DMA_TO_DEVICE);
    205		if (unlikely(dma_mapping_error(sq->pdev, dma_addr)))
    206			goto dma_unmap_wqe_err;
    207
    208		dseg->addr       = cpu_to_be64(dma_addr);
    209		dseg->lkey       = sq->mkey_be;
    210		dseg->byte_count = cpu_to_be32(fsz);
    211
    212		mlx5e_dma_push(sq, dma_addr, fsz, MLX5E_DMA_MAP_PAGE);
    213		num_dma++;
    214		dseg++;
    215	}
    216
    217	return num_dma;
    218
    219dma_unmap_wqe_err:
    220	mlx5e_dma_unmap_wqe_err(sq, num_dma);
    221	return -ENOMEM;
    222}
    223
    224struct mlx5e_tx_attr {
    225	u32 num_bytes;
    226	u16 headlen;
    227	u16 ihs;
    228	__be16 mss;
    229	u16 insz;
    230	u8 opcode;
    231	u8 hopbyhop;
    232};
    233
    234struct mlx5e_tx_wqe_attr {
    235	u16 ds_cnt;
    236	u16 ds_cnt_inl;
    237	u16 ds_cnt_ids;
    238	u8 num_wqebbs;
    239};
    240
    241static u8
    242mlx5e_tx_wqe_inline_mode(struct mlx5e_txqsq *sq, struct sk_buff *skb,
    243			 struct mlx5e_accel_tx_state *accel)
    244{
    245	u8 mode;
    246
    247#ifdef CONFIG_MLX5_EN_TLS
    248	if (accel && accel->tls.tls_tisn)
    249		return MLX5_INLINE_MODE_TCP_UDP;
    250#endif
    251
    252	mode = sq->min_inline_mode;
    253
    254	if (skb_vlan_tag_present(skb) &&
    255	    test_bit(MLX5E_SQ_STATE_VLAN_NEED_L2_INLINE, &sq->state))
    256		mode = max_t(u8, MLX5_INLINE_MODE_L2, mode);
    257
    258	return mode;
    259}
    260
    261static void mlx5e_sq_xmit_prepare(struct mlx5e_txqsq *sq, struct sk_buff *skb,
    262				  struct mlx5e_accel_tx_state *accel,
    263				  struct mlx5e_tx_attr *attr)
    264{
    265	struct mlx5e_sq_stats *stats = sq->stats;
    266
    267	if (skb_is_gso(skb)) {
    268		int hopbyhop;
    269		u16 ihs = mlx5e_tx_get_gso_ihs(sq, skb, &hopbyhop);
    270
    271		*attr = (struct mlx5e_tx_attr) {
    272			.opcode    = MLX5_OPCODE_LSO,
    273			.mss       = cpu_to_be16(skb_shinfo(skb)->gso_size),
    274			.ihs       = ihs,
    275			.num_bytes = skb->len + (skb_shinfo(skb)->gso_segs - 1) * ihs,
    276			.headlen   = skb_headlen(skb) - ihs - hopbyhop,
    277			.hopbyhop  = hopbyhop,
    278		};
    279
    280		stats->packets += skb_shinfo(skb)->gso_segs;
    281	} else {
    282		u8 mode = mlx5e_tx_wqe_inline_mode(sq, skb, accel);
    283		u16 ihs = mlx5e_calc_min_inline(mode, skb);
    284
    285		*attr = (struct mlx5e_tx_attr) {
    286			.opcode    = MLX5_OPCODE_SEND,
    287			.mss       = cpu_to_be16(0),
    288			.ihs       = ihs,
    289			.num_bytes = max_t(unsigned int, skb->len, ETH_ZLEN),
    290			.headlen   = skb_headlen(skb) - ihs,
    291		};
    292
    293		stats->packets++;
    294	}
    295
    296	attr->insz = mlx5e_accel_tx_ids_len(sq, accel);
    297	stats->bytes += attr->num_bytes;
    298}
    299
    300static void mlx5e_sq_calc_wqe_attr(struct sk_buff *skb, const struct mlx5e_tx_attr *attr,
    301				   struct mlx5e_tx_wqe_attr *wqe_attr)
    302{
    303	u16 ds_cnt = MLX5E_TX_WQE_EMPTY_DS_COUNT;
    304	u16 ds_cnt_inl = 0;
    305	u16 ds_cnt_ids = 0;
    306
    307	if (attr->insz)
    308		ds_cnt_ids = DIV_ROUND_UP(sizeof(struct mlx5_wqe_inline_seg) + attr->insz,
    309					  MLX5_SEND_WQE_DS);
    310
    311	ds_cnt += !!attr->headlen + skb_shinfo(skb)->nr_frags + ds_cnt_ids;
    312	if (attr->ihs) {
    313		u16 inl = attr->ihs - INL_HDR_START_SZ;
    314
    315		if (skb_vlan_tag_present(skb))
    316			inl += VLAN_HLEN;
    317
    318		ds_cnt_inl = DIV_ROUND_UP(inl, MLX5_SEND_WQE_DS);
    319		ds_cnt += ds_cnt_inl;
    320	}
    321
    322	*wqe_attr = (struct mlx5e_tx_wqe_attr) {
    323		.ds_cnt     = ds_cnt,
    324		.ds_cnt_inl = ds_cnt_inl,
    325		.ds_cnt_ids = ds_cnt_ids,
    326		.num_wqebbs = DIV_ROUND_UP(ds_cnt, MLX5_SEND_WQEBB_NUM_DS),
    327	};
    328}
    329
    330static void mlx5e_tx_skb_update_hwts_flags(struct sk_buff *skb)
    331{
    332	if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP))
    333		skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
    334}
    335
    336static void mlx5e_tx_check_stop(struct mlx5e_txqsq *sq)
    337{
    338	if (unlikely(!mlx5e_wqc_has_room_for(&sq->wq, sq->cc, sq->pc, sq->stop_room))) {
    339		netif_tx_stop_queue(sq->txq);
    340		sq->stats->stopped++;
    341	}
    342}
    343
    344static inline void
    345mlx5e_txwqe_complete(struct mlx5e_txqsq *sq, struct sk_buff *skb,
    346		     const struct mlx5e_tx_attr *attr,
    347		     const struct mlx5e_tx_wqe_attr *wqe_attr, u8 num_dma,
    348		     struct mlx5e_tx_wqe_info *wi, struct mlx5_wqe_ctrl_seg *cseg,
    349		     bool xmit_more)
    350{
    351	struct mlx5_wq_cyc *wq = &sq->wq;
    352	bool send_doorbell;
    353
    354	*wi = (struct mlx5e_tx_wqe_info) {
    355		.skb = skb,
    356		.num_bytes = attr->num_bytes,
    357		.num_dma = num_dma,
    358		.num_wqebbs = wqe_attr->num_wqebbs,
    359		.num_fifo_pkts = 0,
    360	};
    361
    362	cseg->opmod_idx_opcode = cpu_to_be32((sq->pc << 8) | attr->opcode);
    363	cseg->qpn_ds           = cpu_to_be32((sq->sqn << 8) | wqe_attr->ds_cnt);
    364
    365	mlx5e_tx_skb_update_hwts_flags(skb);
    366
    367	sq->pc += wi->num_wqebbs;
    368
    369	mlx5e_tx_check_stop(sq);
    370
    371	if (unlikely(sq->ptpsq)) {
    372		mlx5e_skb_cb_hwtstamp_init(skb);
    373		mlx5e_skb_fifo_push(&sq->ptpsq->skb_fifo, skb);
    374		skb_get(skb);
    375	}
    376
    377	send_doorbell = __netdev_tx_sent_queue(sq->txq, attr->num_bytes, xmit_more);
    378	if (send_doorbell)
    379		mlx5e_notify_hw(wq, sq->pc, sq->uar_map, cseg);
    380}
    381
    382static void
    383mlx5e_sq_xmit_wqe(struct mlx5e_txqsq *sq, struct sk_buff *skb,
    384		  const struct mlx5e_tx_attr *attr, const struct mlx5e_tx_wqe_attr *wqe_attr,
    385		  struct mlx5e_tx_wqe *wqe, u16 pi, bool xmit_more)
    386{
    387	struct mlx5_wqe_ctrl_seg *cseg;
    388	struct mlx5_wqe_eth_seg  *eseg;
    389	struct mlx5_wqe_data_seg *dseg;
    390	struct mlx5e_tx_wqe_info *wi;
    391	u16 ihs = attr->ihs;
    392	struct ipv6hdr *h6;
    393	struct mlx5e_sq_stats *stats = sq->stats;
    394	int num_dma;
    395
    396	stats->xmit_more += xmit_more;
    397
    398	/* fill wqe */
    399	wi   = &sq->db.wqe_info[pi];
    400	cseg = &wqe->ctrl;
    401	eseg = &wqe->eth;
    402	dseg =  wqe->data;
    403
    404	eseg->mss = attr->mss;
    405
    406	if (ihs) {
    407		u8 *start = eseg->inline_hdr.start;
    408
    409		if (unlikely(attr->hopbyhop)) {
    410			/* remove the HBH header.
    411			 * Layout: [Ethernet header][IPv6 header][HBH][TCP header]
    412			 */
    413			if (skb_vlan_tag_present(skb)) {
    414				mlx5e_insert_vlan(start, skb, ETH_HLEN + sizeof(*h6));
    415				ihs += VLAN_HLEN;
    416				h6 = (struct ipv6hdr *)(start + sizeof(struct vlan_ethhdr));
    417			} else {
    418				unsafe_memcpy(start, skb->data,
    419					      ETH_HLEN + sizeof(*h6),
    420					      MLX5_UNSAFE_MEMCPY_DISCLAIMER);
    421				h6 = (struct ipv6hdr *)(start + ETH_HLEN);
    422			}
    423			h6->nexthdr = IPPROTO_TCP;
    424			/* Copy the TCP header after the IPv6 one */
    425			memcpy(h6 + 1,
    426			       skb->data + ETH_HLEN + sizeof(*h6) +
    427					sizeof(struct hop_jumbo_hdr),
    428			       tcp_hdrlen(skb));
    429			/* Leave ipv6 payload_len set to 0, as LSO v2 specs request. */
    430		} else if (skb_vlan_tag_present(skb)) {
    431			mlx5e_insert_vlan(start, skb, ihs);
    432			ihs += VLAN_HLEN;
    433			stats->added_vlan_packets++;
    434		} else {
    435			unsafe_memcpy(eseg->inline_hdr.start, skb->data,
    436				      attr->ihs,
    437				      MLX5_UNSAFE_MEMCPY_DISCLAIMER);
    438		}
    439		eseg->inline_hdr.sz |= cpu_to_be16(ihs);
    440		dseg += wqe_attr->ds_cnt_inl;
    441	} else if (skb_vlan_tag_present(skb)) {
    442		eseg->insert.type = cpu_to_be16(MLX5_ETH_WQE_INSERT_VLAN);
    443		if (skb->vlan_proto == cpu_to_be16(ETH_P_8021AD))
    444			eseg->insert.type |= cpu_to_be16(MLX5_ETH_WQE_SVLAN);
    445		eseg->insert.vlan_tci = cpu_to_be16(skb_vlan_tag_get(skb));
    446		stats->added_vlan_packets++;
    447	}
    448
    449	dseg += wqe_attr->ds_cnt_ids;
    450	num_dma = mlx5e_txwqe_build_dsegs(sq, skb, skb->data + attr->ihs + attr->hopbyhop,
    451					  attr->headlen, dseg);
    452	if (unlikely(num_dma < 0))
    453		goto err_drop;
    454
    455	mlx5e_txwqe_complete(sq, skb, attr, wqe_attr, num_dma, wi, cseg, xmit_more);
    456
    457	return;
    458
    459err_drop:
    460	stats->dropped++;
    461	dev_kfree_skb_any(skb);
    462}
    463
    464static bool mlx5e_tx_skb_supports_mpwqe(struct sk_buff *skb, struct mlx5e_tx_attr *attr)
    465{
    466	return !skb_is_nonlinear(skb) && !skb_vlan_tag_present(skb) && !attr->ihs &&
    467	       !attr->insz;
    468}
    469
    470static bool mlx5e_tx_mpwqe_same_eseg(struct mlx5e_txqsq *sq, struct mlx5_wqe_eth_seg *eseg)
    471{
    472	struct mlx5e_tx_mpwqe *session = &sq->mpwqe;
    473
    474	/* Assumes the session is already running and has at least one packet. */
    475	return !memcmp(&session->wqe->eth, eseg, MLX5E_ACCEL_ESEG_LEN);
    476}
    477
    478static void mlx5e_tx_mpwqe_session_start(struct mlx5e_txqsq *sq,
    479					 struct mlx5_wqe_eth_seg *eseg)
    480{
    481	struct mlx5e_tx_mpwqe *session = &sq->mpwqe;
    482	struct mlx5e_tx_wqe *wqe;
    483	u16 pi;
    484
    485	pi = mlx5e_txqsq_get_next_pi(sq, sq->max_sq_mpw_wqebbs);
    486	wqe = MLX5E_TX_FETCH_WQE(sq, pi);
    487	net_prefetchw(wqe->data);
    488
    489	*session = (struct mlx5e_tx_mpwqe) {
    490		.wqe = wqe,
    491		.bytes_count = 0,
    492		.ds_count = MLX5E_TX_WQE_EMPTY_DS_COUNT,
    493		.pkt_count = 0,
    494		.inline_on = 0,
    495	};
    496
    497	memcpy(&session->wqe->eth, eseg, MLX5E_ACCEL_ESEG_LEN);
    498
    499	sq->stats->mpwqe_blks++;
    500}
    501
    502static bool mlx5e_tx_mpwqe_session_is_active(struct mlx5e_txqsq *sq)
    503{
    504	return sq->mpwqe.wqe;
    505}
    506
    507static void mlx5e_tx_mpwqe_add_dseg(struct mlx5e_txqsq *sq, struct mlx5e_xmit_data *txd)
    508{
    509	struct mlx5e_tx_mpwqe *session = &sq->mpwqe;
    510	struct mlx5_wqe_data_seg *dseg;
    511
    512	dseg = (struct mlx5_wqe_data_seg *)session->wqe + session->ds_count;
    513
    514	session->pkt_count++;
    515	session->bytes_count += txd->len;
    516
    517	dseg->addr = cpu_to_be64(txd->dma_addr);
    518	dseg->byte_count = cpu_to_be32(txd->len);
    519	dseg->lkey = sq->mkey_be;
    520	session->ds_count++;
    521
    522	sq->stats->mpwqe_pkts++;
    523}
    524
    525static struct mlx5_wqe_ctrl_seg *mlx5e_tx_mpwqe_session_complete(struct mlx5e_txqsq *sq)
    526{
    527	struct mlx5e_tx_mpwqe *session = &sq->mpwqe;
    528	u8 ds_count = session->ds_count;
    529	struct mlx5_wqe_ctrl_seg *cseg;
    530	struct mlx5e_tx_wqe_info *wi;
    531	u16 pi;
    532
    533	cseg = &session->wqe->ctrl;
    534	cseg->opmod_idx_opcode = cpu_to_be32((sq->pc << 8) | MLX5_OPCODE_ENHANCED_MPSW);
    535	cseg->qpn_ds = cpu_to_be32((sq->sqn << 8) | ds_count);
    536
    537	pi = mlx5_wq_cyc_ctr2ix(&sq->wq, sq->pc);
    538	wi = &sq->db.wqe_info[pi];
    539	*wi = (struct mlx5e_tx_wqe_info) {
    540		.skb = NULL,
    541		.num_bytes = session->bytes_count,
    542		.num_wqebbs = DIV_ROUND_UP(ds_count, MLX5_SEND_WQEBB_NUM_DS),
    543		.num_dma = session->pkt_count,
    544		.num_fifo_pkts = session->pkt_count,
    545	};
    546
    547	sq->pc += wi->num_wqebbs;
    548
    549	session->wqe = NULL;
    550
    551	mlx5e_tx_check_stop(sq);
    552
    553	return cseg;
    554}
    555
    556static void
    557mlx5e_sq_xmit_mpwqe(struct mlx5e_txqsq *sq, struct sk_buff *skb,
    558		    struct mlx5_wqe_eth_seg *eseg, bool xmit_more)
    559{
    560	struct mlx5_wqe_ctrl_seg *cseg;
    561	struct mlx5e_xmit_data txd;
    562
    563	if (!mlx5e_tx_mpwqe_session_is_active(sq)) {
    564		mlx5e_tx_mpwqe_session_start(sq, eseg);
    565	} else if (!mlx5e_tx_mpwqe_same_eseg(sq, eseg)) {
    566		mlx5e_tx_mpwqe_session_complete(sq);
    567		mlx5e_tx_mpwqe_session_start(sq, eseg);
    568	}
    569
    570	sq->stats->xmit_more += xmit_more;
    571
    572	txd.data = skb->data;
    573	txd.len = skb->len;
    574
    575	txd.dma_addr = dma_map_single(sq->pdev, txd.data, txd.len, DMA_TO_DEVICE);
    576	if (unlikely(dma_mapping_error(sq->pdev, txd.dma_addr)))
    577		goto err_unmap;
    578	mlx5e_dma_push(sq, txd.dma_addr, txd.len, MLX5E_DMA_MAP_SINGLE);
    579
    580	mlx5e_skb_fifo_push(&sq->db.skb_fifo, skb);
    581
    582	mlx5e_tx_mpwqe_add_dseg(sq, &txd);
    583
    584	mlx5e_tx_skb_update_hwts_flags(skb);
    585
    586	if (unlikely(mlx5e_tx_mpwqe_is_full(&sq->mpwqe, sq->max_sq_mpw_wqebbs))) {
    587		/* Might stop the queue and affect the retval of __netdev_tx_sent_queue. */
    588		cseg = mlx5e_tx_mpwqe_session_complete(sq);
    589
    590		if (__netdev_tx_sent_queue(sq->txq, txd.len, xmit_more))
    591			mlx5e_notify_hw(&sq->wq, sq->pc, sq->uar_map, cseg);
    592	} else if (__netdev_tx_sent_queue(sq->txq, txd.len, xmit_more)) {
    593		/* Might stop the queue, but we were asked to ring the doorbell anyway. */
    594		cseg = mlx5e_tx_mpwqe_session_complete(sq);
    595
    596		mlx5e_notify_hw(&sq->wq, sq->pc, sq->uar_map, cseg);
    597	}
    598
    599	return;
    600
    601err_unmap:
    602	mlx5e_dma_unmap_wqe_err(sq, 1);
    603	sq->stats->dropped++;
    604	dev_kfree_skb_any(skb);
    605}
    606
    607void mlx5e_tx_mpwqe_ensure_complete(struct mlx5e_txqsq *sq)
    608{
    609	/* Unlikely in non-MPWQE workloads; not important in MPWQE workloads. */
    610	if (unlikely(mlx5e_tx_mpwqe_session_is_active(sq)))
    611		mlx5e_tx_mpwqe_session_complete(sq);
    612}
    613
    614static void mlx5e_txwqe_build_eseg(struct mlx5e_priv *priv, struct mlx5e_txqsq *sq,
    615				   struct sk_buff *skb, struct mlx5e_accel_tx_state *accel,
    616				   struct mlx5_wqe_eth_seg *eseg, u16 ihs)
    617{
    618	mlx5e_accel_tx_eseg(priv, skb, eseg, ihs);
    619	mlx5e_txwqe_build_eseg_csum(sq, skb, accel, eseg);
    620}
    621
    622netdev_tx_t mlx5e_xmit(struct sk_buff *skb, struct net_device *dev)
    623{
    624	struct mlx5e_priv *priv = netdev_priv(dev);
    625	struct mlx5e_accel_tx_state accel = {};
    626	struct mlx5e_tx_wqe_attr wqe_attr;
    627	struct mlx5e_tx_attr attr;
    628	struct mlx5e_tx_wqe *wqe;
    629	struct mlx5e_txqsq *sq;
    630	u16 pi;
    631
    632	/* All changes to txq2sq are performed in sync with mlx5e_xmit, when the
    633	 * queue being changed is disabled, and smp_wmb guarantees that the
    634	 * changes are visible before mlx5e_xmit tries to read from txq2sq. It
    635	 * guarantees that the value of txq2sq[qid] doesn't change while
    636	 * mlx5e_xmit is running on queue number qid. smb_wmb is paired with
    637	 * HARD_TX_LOCK around ndo_start_xmit, which serves as an ACQUIRE.
    638	 */
    639	sq = priv->txq2sq[skb_get_queue_mapping(skb)];
    640	if (unlikely(!sq)) {
    641		/* Two cases when sq can be NULL:
    642		 * 1. The HTB node is registered, and mlx5e_select_queue
    643		 * selected its queue ID, but the SQ itself is not yet created.
    644		 * 2. HTB SQ creation failed. Similar to the previous case, but
    645		 * the SQ won't be created.
    646		 */
    647		dev_kfree_skb_any(skb);
    648		return NETDEV_TX_OK;
    649	}
    650
    651	/* May send SKBs and WQEs. */
    652	if (unlikely(!mlx5e_accel_tx_begin(dev, sq, skb, &accel)))
    653		return NETDEV_TX_OK;
    654
    655	mlx5e_sq_xmit_prepare(sq, skb, &accel, &attr);
    656
    657	if (test_bit(MLX5E_SQ_STATE_MPWQE, &sq->state)) {
    658		if (mlx5e_tx_skb_supports_mpwqe(skb, &attr)) {
    659			struct mlx5_wqe_eth_seg eseg = {};
    660
    661			mlx5e_txwqe_build_eseg(priv, sq, skb, &accel, &eseg, attr.ihs);
    662			mlx5e_sq_xmit_mpwqe(sq, skb, &eseg, netdev_xmit_more());
    663			return NETDEV_TX_OK;
    664		}
    665
    666		mlx5e_tx_mpwqe_ensure_complete(sq);
    667	}
    668
    669	mlx5e_sq_calc_wqe_attr(skb, &attr, &wqe_attr);
    670	pi = mlx5e_txqsq_get_next_pi(sq, wqe_attr.num_wqebbs);
    671	wqe = MLX5E_TX_FETCH_WQE(sq, pi);
    672
    673	/* May update the WQE, but may not post other WQEs. */
    674	mlx5e_accel_tx_finish(sq, wqe, &accel,
    675			      (struct mlx5_wqe_inline_seg *)(wqe->data + wqe_attr.ds_cnt_inl));
    676	mlx5e_txwqe_build_eseg(priv, sq, skb, &accel, &wqe->eth, attr.ihs);
    677	mlx5e_sq_xmit_wqe(sq, skb, &attr, &wqe_attr, wqe, pi, netdev_xmit_more());
    678
    679	return NETDEV_TX_OK;
    680}
    681
    682void mlx5e_sq_xmit_simple(struct mlx5e_txqsq *sq, struct sk_buff *skb, bool xmit_more)
    683{
    684	struct mlx5e_tx_wqe_attr wqe_attr;
    685	struct mlx5e_tx_attr attr;
    686	struct mlx5e_tx_wqe *wqe;
    687	u16 pi;
    688
    689	mlx5e_sq_xmit_prepare(sq, skb, NULL, &attr);
    690	mlx5e_sq_calc_wqe_attr(skb, &attr, &wqe_attr);
    691	pi = mlx5e_txqsq_get_next_pi(sq, wqe_attr.num_wqebbs);
    692	wqe = MLX5E_TX_FETCH_WQE(sq, pi);
    693	mlx5e_txwqe_build_eseg_csum(sq, skb, NULL, &wqe->eth);
    694	mlx5e_sq_xmit_wqe(sq, skb, &attr, &wqe_attr, wqe, pi, xmit_more);
    695}
    696
    697static void mlx5e_tx_wi_dma_unmap(struct mlx5e_txqsq *sq, struct mlx5e_tx_wqe_info *wi,
    698				  u32 *dma_fifo_cc)
    699{
    700	int i;
    701
    702	for (i = 0; i < wi->num_dma; i++) {
    703		struct mlx5e_sq_dma *dma = mlx5e_dma_get(sq, (*dma_fifo_cc)++);
    704
    705		mlx5e_tx_dma_unmap(sq->pdev, dma);
    706	}
    707}
    708
    709static void mlx5e_consume_skb(struct mlx5e_txqsq *sq, struct sk_buff *skb,
    710			      struct mlx5_cqe64 *cqe, int napi_budget)
    711{
    712	if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)) {
    713		struct skb_shared_hwtstamps hwts = {};
    714		u64 ts = get_cqe_ts(cqe);
    715
    716		hwts.hwtstamp = mlx5e_cqe_ts_to_ns(sq->ptp_cyc2time, sq->clock, ts);
    717		if (sq->ptpsq)
    718			mlx5e_skb_cb_hwtstamp_handler(skb, MLX5E_SKB_CB_CQE_HWTSTAMP,
    719						      hwts.hwtstamp, sq->ptpsq->cq_stats);
    720		else
    721			skb_tstamp_tx(skb, &hwts);
    722	}
    723
    724	napi_consume_skb(skb, napi_budget);
    725}
    726
    727static void mlx5e_tx_wi_consume_fifo_skbs(struct mlx5e_txqsq *sq, struct mlx5e_tx_wqe_info *wi,
    728					  struct mlx5_cqe64 *cqe, int napi_budget)
    729{
    730	int i;
    731
    732	for (i = 0; i < wi->num_fifo_pkts; i++) {
    733		struct sk_buff *skb = mlx5e_skb_fifo_pop(&sq->db.skb_fifo);
    734
    735		mlx5e_consume_skb(sq, skb, cqe, napi_budget);
    736	}
    737}
    738
    739bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget)
    740{
    741	struct mlx5e_sq_stats *stats;
    742	struct mlx5e_txqsq *sq;
    743	struct mlx5_cqe64 *cqe;
    744	u32 dma_fifo_cc;
    745	u32 nbytes;
    746	u16 npkts;
    747	u16 sqcc;
    748	int i;
    749
    750	sq = container_of(cq, struct mlx5e_txqsq, cq);
    751
    752	if (unlikely(!test_bit(MLX5E_SQ_STATE_ENABLED, &sq->state)))
    753		return false;
    754
    755	cqe = mlx5_cqwq_get_cqe(&cq->wq);
    756	if (!cqe)
    757		return false;
    758
    759	stats = sq->stats;
    760
    761	npkts = 0;
    762	nbytes = 0;
    763
    764	/* sq->cc must be updated only after mlx5_cqwq_update_db_record(),
    765	 * otherwise a cq overrun may occur
    766	 */
    767	sqcc = sq->cc;
    768
    769	/* avoid dirtying sq cache line every cqe */
    770	dma_fifo_cc = sq->dma_fifo_cc;
    771
    772	i = 0;
    773	do {
    774		struct mlx5e_tx_wqe_info *wi;
    775		u16 wqe_counter;
    776		bool last_wqe;
    777		u16 ci;
    778
    779		mlx5_cqwq_pop(&cq->wq);
    780
    781		wqe_counter = be16_to_cpu(cqe->wqe_counter);
    782
    783		do {
    784			last_wqe = (sqcc == wqe_counter);
    785
    786			ci = mlx5_wq_cyc_ctr2ix(&sq->wq, sqcc);
    787			wi = &sq->db.wqe_info[ci];
    788
    789			sqcc += wi->num_wqebbs;
    790
    791			if (likely(wi->skb)) {
    792				mlx5e_tx_wi_dma_unmap(sq, wi, &dma_fifo_cc);
    793				mlx5e_consume_skb(sq, wi->skb, cqe, napi_budget);
    794
    795				npkts++;
    796				nbytes += wi->num_bytes;
    797				continue;
    798			}
    799
    800			if (unlikely(mlx5e_ktls_tx_try_handle_resync_dump_comp(sq, wi,
    801									       &dma_fifo_cc)))
    802				continue;
    803
    804			if (wi->num_fifo_pkts) {
    805				mlx5e_tx_wi_dma_unmap(sq, wi, &dma_fifo_cc);
    806				mlx5e_tx_wi_consume_fifo_skbs(sq, wi, cqe, napi_budget);
    807
    808				npkts += wi->num_fifo_pkts;
    809				nbytes += wi->num_bytes;
    810			}
    811		} while (!last_wqe);
    812
    813		if (unlikely(get_cqe_opcode(cqe) == MLX5_CQE_REQ_ERR)) {
    814			if (!test_and_set_bit(MLX5E_SQ_STATE_RECOVERING,
    815					      &sq->state)) {
    816				mlx5e_dump_error_cqe(&sq->cq, sq->sqn,
    817						     (struct mlx5_err_cqe *)cqe);
    818				mlx5_wq_cyc_wqe_dump(&sq->wq, ci, wi->num_wqebbs);
    819				queue_work(cq->priv->wq, &sq->recover_work);
    820			}
    821			stats->cqe_err++;
    822		}
    823
    824	} while ((++i < MLX5E_TX_CQ_POLL_BUDGET) && (cqe = mlx5_cqwq_get_cqe(&cq->wq)));
    825
    826	stats->cqes += i;
    827
    828	mlx5_cqwq_update_db_record(&cq->wq);
    829
    830	/* ensure cq space is freed before enabling more cqes */
    831	wmb();
    832
    833	sq->dma_fifo_cc = dma_fifo_cc;
    834	sq->cc = sqcc;
    835
    836	netdev_tx_completed_queue(sq->txq, npkts, nbytes);
    837
    838	if (netif_tx_queue_stopped(sq->txq) &&
    839	    mlx5e_wqc_has_room_for(&sq->wq, sq->cc, sq->pc, sq->stop_room) &&
    840	    !test_bit(MLX5E_SQ_STATE_RECOVERING, &sq->state)) {
    841		netif_tx_wake_queue(sq->txq);
    842		stats->wake++;
    843	}
    844
    845	return (i == MLX5E_TX_CQ_POLL_BUDGET);
    846}
    847
    848static void mlx5e_tx_wi_kfree_fifo_skbs(struct mlx5e_txqsq *sq, struct mlx5e_tx_wqe_info *wi)
    849{
    850	int i;
    851
    852	for (i = 0; i < wi->num_fifo_pkts; i++)
    853		dev_kfree_skb_any(mlx5e_skb_fifo_pop(&sq->db.skb_fifo));
    854}
    855
    856void mlx5e_free_txqsq_descs(struct mlx5e_txqsq *sq)
    857{
    858	struct mlx5e_tx_wqe_info *wi;
    859	u32 dma_fifo_cc, nbytes = 0;
    860	u16 ci, sqcc, npkts = 0;
    861
    862	sqcc = sq->cc;
    863	dma_fifo_cc = sq->dma_fifo_cc;
    864
    865	while (sqcc != sq->pc) {
    866		ci = mlx5_wq_cyc_ctr2ix(&sq->wq, sqcc);
    867		wi = &sq->db.wqe_info[ci];
    868
    869		sqcc += wi->num_wqebbs;
    870
    871		if (likely(wi->skb)) {
    872			mlx5e_tx_wi_dma_unmap(sq, wi, &dma_fifo_cc);
    873			dev_kfree_skb_any(wi->skb);
    874
    875			npkts++;
    876			nbytes += wi->num_bytes;
    877			continue;
    878		}
    879
    880		if (unlikely(mlx5e_ktls_tx_try_handle_resync_dump_comp(sq, wi, &dma_fifo_cc)))
    881			continue;
    882
    883		if (wi->num_fifo_pkts) {
    884			mlx5e_tx_wi_dma_unmap(sq, wi, &dma_fifo_cc);
    885			mlx5e_tx_wi_kfree_fifo_skbs(sq, wi);
    886
    887			npkts += wi->num_fifo_pkts;
    888			nbytes += wi->num_bytes;
    889		}
    890	}
    891
    892	sq->dma_fifo_cc = dma_fifo_cc;
    893	sq->cc = sqcc;
    894
    895	netdev_tx_completed_queue(sq->txq, npkts, nbytes);
    896}
    897
    898#ifdef CONFIG_MLX5_CORE_IPOIB
    899static inline void
    900mlx5i_txwqe_build_datagram(struct mlx5_av *av, u32 dqpn, u32 dqkey,
    901			   struct mlx5_wqe_datagram_seg *dseg)
    902{
    903	memcpy(&dseg->av, av, sizeof(struct mlx5_av));
    904	dseg->av.dqp_dct = cpu_to_be32(dqpn | MLX5_EXTENDED_UD_AV);
    905	dseg->av.key.qkey.qkey = cpu_to_be32(dqkey);
    906}
    907
    908static void mlx5i_sq_calc_wqe_attr(struct sk_buff *skb,
    909				   const struct mlx5e_tx_attr *attr,
    910				   struct mlx5e_tx_wqe_attr *wqe_attr)
    911{
    912	u16 ds_cnt = sizeof(struct mlx5i_tx_wqe) / MLX5_SEND_WQE_DS;
    913	u16 ds_cnt_inl = 0;
    914
    915	ds_cnt += !!attr->headlen + skb_shinfo(skb)->nr_frags;
    916
    917	if (attr->ihs) {
    918		u16 inl = attr->ihs - INL_HDR_START_SZ;
    919
    920		ds_cnt_inl = DIV_ROUND_UP(inl, MLX5_SEND_WQE_DS);
    921		ds_cnt += ds_cnt_inl;
    922	}
    923
    924	*wqe_attr = (struct mlx5e_tx_wqe_attr) {
    925		.ds_cnt     = ds_cnt,
    926		.ds_cnt_inl = ds_cnt_inl,
    927		.num_wqebbs = DIV_ROUND_UP(ds_cnt, MLX5_SEND_WQEBB_NUM_DS),
    928	};
    929}
    930
    931void mlx5i_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb,
    932		   struct mlx5_av *av, u32 dqpn, u32 dqkey, bool xmit_more)
    933{
    934	struct mlx5e_tx_wqe_attr wqe_attr;
    935	struct mlx5e_tx_attr attr;
    936	struct mlx5i_tx_wqe *wqe;
    937
    938	struct mlx5_wqe_datagram_seg *datagram;
    939	struct mlx5_wqe_ctrl_seg *cseg;
    940	struct mlx5_wqe_eth_seg  *eseg;
    941	struct mlx5_wqe_data_seg *dseg;
    942	struct mlx5e_tx_wqe_info *wi;
    943
    944	struct mlx5e_sq_stats *stats = sq->stats;
    945	int num_dma;
    946	u16 pi;
    947
    948	mlx5e_sq_xmit_prepare(sq, skb, NULL, &attr);
    949	mlx5i_sq_calc_wqe_attr(skb, &attr, &wqe_attr);
    950
    951	pi = mlx5e_txqsq_get_next_pi(sq, wqe_attr.num_wqebbs);
    952	wqe = MLX5I_SQ_FETCH_WQE(sq, pi);
    953
    954	stats->xmit_more += xmit_more;
    955
    956	/* fill wqe */
    957	wi       = &sq->db.wqe_info[pi];
    958	cseg     = &wqe->ctrl;
    959	datagram = &wqe->datagram;
    960	eseg     = &wqe->eth;
    961	dseg     =  wqe->data;
    962
    963	mlx5i_txwqe_build_datagram(av, dqpn, dqkey, datagram);
    964
    965	mlx5e_txwqe_build_eseg_csum(sq, skb, NULL, eseg);
    966
    967	eseg->mss = attr.mss;
    968
    969	if (attr.ihs) {
    970		if (unlikely(attr.hopbyhop)) {
    971			struct ipv6hdr *h6;
    972
    973			/* remove the HBH header.
    974			 * Layout: [Ethernet header][IPv6 header][HBH][TCP header]
    975			 */
    976			unsafe_memcpy(eseg->inline_hdr.start, skb->data,
    977				      ETH_HLEN + sizeof(*h6),
    978				      MLX5_UNSAFE_MEMCPY_DISCLAIMER);
    979			h6 = (struct ipv6hdr *)((char *)eseg->inline_hdr.start + ETH_HLEN);
    980			h6->nexthdr = IPPROTO_TCP;
    981			/* Copy the TCP header after the IPv6 one */
    982			unsafe_memcpy(h6 + 1,
    983				      skb->data + ETH_HLEN + sizeof(*h6) +
    984						  sizeof(struct hop_jumbo_hdr),
    985				      tcp_hdrlen(skb),
    986				      MLX5_UNSAFE_MEMCPY_DISCLAIMER);
    987			/* Leave ipv6 payload_len set to 0, as LSO v2 specs request. */
    988		} else {
    989			unsafe_memcpy(eseg->inline_hdr.start, skb->data,
    990				      attr.ihs,
    991				      MLX5_UNSAFE_MEMCPY_DISCLAIMER);
    992		}
    993		eseg->inline_hdr.sz = cpu_to_be16(attr.ihs);
    994		dseg += wqe_attr.ds_cnt_inl;
    995	}
    996
    997	num_dma = mlx5e_txwqe_build_dsegs(sq, skb, skb->data + attr.ihs + attr.hopbyhop,
    998					  attr.headlen, dseg);
    999	if (unlikely(num_dma < 0))
   1000		goto err_drop;
   1001
   1002	mlx5e_txwqe_complete(sq, skb, &attr, &wqe_attr, num_dma, wi, cseg, xmit_more);
   1003
   1004	return;
   1005
   1006err_drop:
   1007	stats->dropped++;
   1008	dev_kfree_skb_any(skb);
   1009}
   1010#endif