en_tx.c (27547B)
1/* 2 * Copyright (c) 2015-2016, Mellanox Technologies. All rights reserved. 3 * 4 * This software is available to you under a choice of one of two 5 * licenses. You may choose to be licensed under the terms of the GNU 6 * General Public License (GPL) Version 2, available from the file 7 * COPYING in the main directory of this source tree, or the 8 * OpenIB.org BSD license below: 9 * 10 * Redistribution and use in source and binary forms, with or 11 * without modification, are permitted provided that the following 12 * conditions are met: 13 * 14 * - Redistributions of source code must retain the above 15 * copyright notice, this list of conditions and the following 16 * disclaimer. 17 * 18 * - Redistributions in binary form must reproduce the above 19 * copyright notice, this list of conditions and the following 20 * disclaimer in the documentation and/or other materials 21 * provided with the distribution. 22 * 23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 30 * SOFTWARE. 31 */ 32 33#include <linux/tcp.h> 34#include <linux/if_vlan.h> 35#include <net/geneve.h> 36#include <net/dsfield.h> 37#include "en.h" 38#include "en/txrx.h" 39#include "ipoib/ipoib.h" 40#include "en_accel/en_accel.h" 41#include "en_accel/ipsec_rxtx.h" 42#include "en/ptp.h" 43#include <net/ipv6.h> 44 45static void mlx5e_dma_unmap_wqe_err(struct mlx5e_txqsq *sq, u8 num_dma) 46{ 47 int i; 48 49 for (i = 0; i < num_dma; i++) { 50 struct mlx5e_sq_dma *last_pushed_dma = 51 mlx5e_dma_get(sq, --sq->dma_fifo_pc); 52 53 mlx5e_tx_dma_unmap(sq->pdev, last_pushed_dma); 54 } 55} 56 57static inline int mlx5e_skb_l2_header_offset(struct sk_buff *skb) 58{ 59#define MLX5E_MIN_INLINE (ETH_HLEN + VLAN_HLEN) 60 61 return max(skb_network_offset(skb), MLX5E_MIN_INLINE); 62} 63 64static inline int mlx5e_skb_l3_header_offset(struct sk_buff *skb) 65{ 66 if (skb_transport_header_was_set(skb)) 67 return skb_transport_offset(skb); 68 else 69 return mlx5e_skb_l2_header_offset(skb); 70} 71 72static inline u16 mlx5e_calc_min_inline(enum mlx5_inline_modes mode, 73 struct sk_buff *skb) 74{ 75 u16 hlen; 76 77 switch (mode) { 78 case MLX5_INLINE_MODE_NONE: 79 return 0; 80 case MLX5_INLINE_MODE_TCP_UDP: 81 hlen = eth_get_headlen(skb->dev, skb->data, skb_headlen(skb)); 82 if (hlen == ETH_HLEN && !skb_vlan_tag_present(skb)) 83 hlen += VLAN_HLEN; 84 break; 85 case MLX5_INLINE_MODE_IP: 86 hlen = mlx5e_skb_l3_header_offset(skb); 87 break; 88 case MLX5_INLINE_MODE_L2: 89 default: 90 hlen = mlx5e_skb_l2_header_offset(skb); 91 } 92 return min_t(u16, hlen, skb_headlen(skb)); 93} 94 95#define MLX5_UNSAFE_MEMCPY_DISCLAIMER \ 96 "This copy has been bounds-checked earlier in " \ 97 "mlx5i_sq_calc_wqe_attr() and intentionally " \ 98 "crosses a flex array boundary. Since it is " \ 99 "performance sensitive, splitting the copy is " \ 100 "undesirable." 101 102static inline void mlx5e_insert_vlan(void *start, struct sk_buff *skb, u16 ihs) 103{ 104 struct vlan_ethhdr *vhdr = (struct vlan_ethhdr *)start; 105 int cpy1_sz = 2 * ETH_ALEN; 106 int cpy2_sz = ihs - cpy1_sz; 107 108 memcpy(&vhdr->addrs, skb->data, cpy1_sz); 109 vhdr->h_vlan_proto = skb->vlan_proto; 110 vhdr->h_vlan_TCI = cpu_to_be16(skb_vlan_tag_get(skb)); 111 unsafe_memcpy(&vhdr->h_vlan_encapsulated_proto, 112 skb->data + cpy1_sz, 113 cpy2_sz, 114 MLX5_UNSAFE_MEMCPY_DISCLAIMER); 115} 116 117static inline void 118mlx5e_txwqe_build_eseg_csum(struct mlx5e_txqsq *sq, struct sk_buff *skb, 119 struct mlx5e_accel_tx_state *accel, 120 struct mlx5_wqe_eth_seg *eseg) 121{ 122 if (unlikely(mlx5e_ipsec_txwqe_build_eseg_csum(sq, skb, eseg))) 123 return; 124 125 if (likely(skb->ip_summed == CHECKSUM_PARTIAL)) { 126 eseg->cs_flags = MLX5_ETH_WQE_L3_CSUM; 127 if (skb->encapsulation) { 128 eseg->cs_flags |= MLX5_ETH_WQE_L3_INNER_CSUM | 129 MLX5_ETH_WQE_L4_INNER_CSUM; 130 sq->stats->csum_partial_inner++; 131 } else { 132 eseg->cs_flags |= MLX5_ETH_WQE_L4_CSUM; 133 sq->stats->csum_partial++; 134 } 135#ifdef CONFIG_MLX5_EN_TLS 136 } else if (unlikely(accel && accel->tls.tls_tisn)) { 137 eseg->cs_flags = MLX5_ETH_WQE_L3_CSUM | MLX5_ETH_WQE_L4_CSUM; 138 sq->stats->csum_partial++; 139#endif 140 } else 141 sq->stats->csum_none++; 142} 143 144/* Returns the number of header bytes that we plan 145 * to inline later in the transmit descriptor 146 */ 147static inline u16 148mlx5e_tx_get_gso_ihs(struct mlx5e_txqsq *sq, struct sk_buff *skb, int *hopbyhop) 149{ 150 struct mlx5e_sq_stats *stats = sq->stats; 151 u16 ihs; 152 153 *hopbyhop = 0; 154 if (skb->encapsulation) { 155 ihs = skb_inner_transport_offset(skb) + inner_tcp_hdrlen(skb); 156 stats->tso_inner_packets++; 157 stats->tso_inner_bytes += skb->len - ihs; 158 } else { 159 if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4) { 160 ihs = skb_transport_offset(skb) + sizeof(struct udphdr); 161 } else { 162 ihs = skb_transport_offset(skb) + tcp_hdrlen(skb); 163 if (ipv6_has_hopopt_jumbo(skb)) { 164 *hopbyhop = sizeof(struct hop_jumbo_hdr); 165 ihs -= sizeof(struct hop_jumbo_hdr); 166 } 167 } 168 stats->tso_packets++; 169 stats->tso_bytes += skb->len - ihs - *hopbyhop; 170 } 171 172 return ihs; 173} 174 175static inline int 176mlx5e_txwqe_build_dsegs(struct mlx5e_txqsq *sq, struct sk_buff *skb, 177 unsigned char *skb_data, u16 headlen, 178 struct mlx5_wqe_data_seg *dseg) 179{ 180 dma_addr_t dma_addr = 0; 181 u8 num_dma = 0; 182 int i; 183 184 if (headlen) { 185 dma_addr = dma_map_single(sq->pdev, skb_data, headlen, 186 DMA_TO_DEVICE); 187 if (unlikely(dma_mapping_error(sq->pdev, dma_addr))) 188 goto dma_unmap_wqe_err; 189 190 dseg->addr = cpu_to_be64(dma_addr); 191 dseg->lkey = sq->mkey_be; 192 dseg->byte_count = cpu_to_be32(headlen); 193 194 mlx5e_dma_push(sq, dma_addr, headlen, MLX5E_DMA_MAP_SINGLE); 195 num_dma++; 196 dseg++; 197 } 198 199 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { 200 skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; 201 int fsz = skb_frag_size(frag); 202 203 dma_addr = skb_frag_dma_map(sq->pdev, frag, 0, fsz, 204 DMA_TO_DEVICE); 205 if (unlikely(dma_mapping_error(sq->pdev, dma_addr))) 206 goto dma_unmap_wqe_err; 207 208 dseg->addr = cpu_to_be64(dma_addr); 209 dseg->lkey = sq->mkey_be; 210 dseg->byte_count = cpu_to_be32(fsz); 211 212 mlx5e_dma_push(sq, dma_addr, fsz, MLX5E_DMA_MAP_PAGE); 213 num_dma++; 214 dseg++; 215 } 216 217 return num_dma; 218 219dma_unmap_wqe_err: 220 mlx5e_dma_unmap_wqe_err(sq, num_dma); 221 return -ENOMEM; 222} 223 224struct mlx5e_tx_attr { 225 u32 num_bytes; 226 u16 headlen; 227 u16 ihs; 228 __be16 mss; 229 u16 insz; 230 u8 opcode; 231 u8 hopbyhop; 232}; 233 234struct mlx5e_tx_wqe_attr { 235 u16 ds_cnt; 236 u16 ds_cnt_inl; 237 u16 ds_cnt_ids; 238 u8 num_wqebbs; 239}; 240 241static u8 242mlx5e_tx_wqe_inline_mode(struct mlx5e_txqsq *sq, struct sk_buff *skb, 243 struct mlx5e_accel_tx_state *accel) 244{ 245 u8 mode; 246 247#ifdef CONFIG_MLX5_EN_TLS 248 if (accel && accel->tls.tls_tisn) 249 return MLX5_INLINE_MODE_TCP_UDP; 250#endif 251 252 mode = sq->min_inline_mode; 253 254 if (skb_vlan_tag_present(skb) && 255 test_bit(MLX5E_SQ_STATE_VLAN_NEED_L2_INLINE, &sq->state)) 256 mode = max_t(u8, MLX5_INLINE_MODE_L2, mode); 257 258 return mode; 259} 260 261static void mlx5e_sq_xmit_prepare(struct mlx5e_txqsq *sq, struct sk_buff *skb, 262 struct mlx5e_accel_tx_state *accel, 263 struct mlx5e_tx_attr *attr) 264{ 265 struct mlx5e_sq_stats *stats = sq->stats; 266 267 if (skb_is_gso(skb)) { 268 int hopbyhop; 269 u16 ihs = mlx5e_tx_get_gso_ihs(sq, skb, &hopbyhop); 270 271 *attr = (struct mlx5e_tx_attr) { 272 .opcode = MLX5_OPCODE_LSO, 273 .mss = cpu_to_be16(skb_shinfo(skb)->gso_size), 274 .ihs = ihs, 275 .num_bytes = skb->len + (skb_shinfo(skb)->gso_segs - 1) * ihs, 276 .headlen = skb_headlen(skb) - ihs - hopbyhop, 277 .hopbyhop = hopbyhop, 278 }; 279 280 stats->packets += skb_shinfo(skb)->gso_segs; 281 } else { 282 u8 mode = mlx5e_tx_wqe_inline_mode(sq, skb, accel); 283 u16 ihs = mlx5e_calc_min_inline(mode, skb); 284 285 *attr = (struct mlx5e_tx_attr) { 286 .opcode = MLX5_OPCODE_SEND, 287 .mss = cpu_to_be16(0), 288 .ihs = ihs, 289 .num_bytes = max_t(unsigned int, skb->len, ETH_ZLEN), 290 .headlen = skb_headlen(skb) - ihs, 291 }; 292 293 stats->packets++; 294 } 295 296 attr->insz = mlx5e_accel_tx_ids_len(sq, accel); 297 stats->bytes += attr->num_bytes; 298} 299 300static void mlx5e_sq_calc_wqe_attr(struct sk_buff *skb, const struct mlx5e_tx_attr *attr, 301 struct mlx5e_tx_wqe_attr *wqe_attr) 302{ 303 u16 ds_cnt = MLX5E_TX_WQE_EMPTY_DS_COUNT; 304 u16 ds_cnt_inl = 0; 305 u16 ds_cnt_ids = 0; 306 307 if (attr->insz) 308 ds_cnt_ids = DIV_ROUND_UP(sizeof(struct mlx5_wqe_inline_seg) + attr->insz, 309 MLX5_SEND_WQE_DS); 310 311 ds_cnt += !!attr->headlen + skb_shinfo(skb)->nr_frags + ds_cnt_ids; 312 if (attr->ihs) { 313 u16 inl = attr->ihs - INL_HDR_START_SZ; 314 315 if (skb_vlan_tag_present(skb)) 316 inl += VLAN_HLEN; 317 318 ds_cnt_inl = DIV_ROUND_UP(inl, MLX5_SEND_WQE_DS); 319 ds_cnt += ds_cnt_inl; 320 } 321 322 *wqe_attr = (struct mlx5e_tx_wqe_attr) { 323 .ds_cnt = ds_cnt, 324 .ds_cnt_inl = ds_cnt_inl, 325 .ds_cnt_ids = ds_cnt_ids, 326 .num_wqebbs = DIV_ROUND_UP(ds_cnt, MLX5_SEND_WQEBB_NUM_DS), 327 }; 328} 329 330static void mlx5e_tx_skb_update_hwts_flags(struct sk_buff *skb) 331{ 332 if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)) 333 skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS; 334} 335 336static void mlx5e_tx_check_stop(struct mlx5e_txqsq *sq) 337{ 338 if (unlikely(!mlx5e_wqc_has_room_for(&sq->wq, sq->cc, sq->pc, sq->stop_room))) { 339 netif_tx_stop_queue(sq->txq); 340 sq->stats->stopped++; 341 } 342} 343 344static inline void 345mlx5e_txwqe_complete(struct mlx5e_txqsq *sq, struct sk_buff *skb, 346 const struct mlx5e_tx_attr *attr, 347 const struct mlx5e_tx_wqe_attr *wqe_attr, u8 num_dma, 348 struct mlx5e_tx_wqe_info *wi, struct mlx5_wqe_ctrl_seg *cseg, 349 bool xmit_more) 350{ 351 struct mlx5_wq_cyc *wq = &sq->wq; 352 bool send_doorbell; 353 354 *wi = (struct mlx5e_tx_wqe_info) { 355 .skb = skb, 356 .num_bytes = attr->num_bytes, 357 .num_dma = num_dma, 358 .num_wqebbs = wqe_attr->num_wqebbs, 359 .num_fifo_pkts = 0, 360 }; 361 362 cseg->opmod_idx_opcode = cpu_to_be32((sq->pc << 8) | attr->opcode); 363 cseg->qpn_ds = cpu_to_be32((sq->sqn << 8) | wqe_attr->ds_cnt); 364 365 mlx5e_tx_skb_update_hwts_flags(skb); 366 367 sq->pc += wi->num_wqebbs; 368 369 mlx5e_tx_check_stop(sq); 370 371 if (unlikely(sq->ptpsq)) { 372 mlx5e_skb_cb_hwtstamp_init(skb); 373 mlx5e_skb_fifo_push(&sq->ptpsq->skb_fifo, skb); 374 skb_get(skb); 375 } 376 377 send_doorbell = __netdev_tx_sent_queue(sq->txq, attr->num_bytes, xmit_more); 378 if (send_doorbell) 379 mlx5e_notify_hw(wq, sq->pc, sq->uar_map, cseg); 380} 381 382static void 383mlx5e_sq_xmit_wqe(struct mlx5e_txqsq *sq, struct sk_buff *skb, 384 const struct mlx5e_tx_attr *attr, const struct mlx5e_tx_wqe_attr *wqe_attr, 385 struct mlx5e_tx_wqe *wqe, u16 pi, bool xmit_more) 386{ 387 struct mlx5_wqe_ctrl_seg *cseg; 388 struct mlx5_wqe_eth_seg *eseg; 389 struct mlx5_wqe_data_seg *dseg; 390 struct mlx5e_tx_wqe_info *wi; 391 u16 ihs = attr->ihs; 392 struct ipv6hdr *h6; 393 struct mlx5e_sq_stats *stats = sq->stats; 394 int num_dma; 395 396 stats->xmit_more += xmit_more; 397 398 /* fill wqe */ 399 wi = &sq->db.wqe_info[pi]; 400 cseg = &wqe->ctrl; 401 eseg = &wqe->eth; 402 dseg = wqe->data; 403 404 eseg->mss = attr->mss; 405 406 if (ihs) { 407 u8 *start = eseg->inline_hdr.start; 408 409 if (unlikely(attr->hopbyhop)) { 410 /* remove the HBH header. 411 * Layout: [Ethernet header][IPv6 header][HBH][TCP header] 412 */ 413 if (skb_vlan_tag_present(skb)) { 414 mlx5e_insert_vlan(start, skb, ETH_HLEN + sizeof(*h6)); 415 ihs += VLAN_HLEN; 416 h6 = (struct ipv6hdr *)(start + sizeof(struct vlan_ethhdr)); 417 } else { 418 unsafe_memcpy(start, skb->data, 419 ETH_HLEN + sizeof(*h6), 420 MLX5_UNSAFE_MEMCPY_DISCLAIMER); 421 h6 = (struct ipv6hdr *)(start + ETH_HLEN); 422 } 423 h6->nexthdr = IPPROTO_TCP; 424 /* Copy the TCP header after the IPv6 one */ 425 memcpy(h6 + 1, 426 skb->data + ETH_HLEN + sizeof(*h6) + 427 sizeof(struct hop_jumbo_hdr), 428 tcp_hdrlen(skb)); 429 /* Leave ipv6 payload_len set to 0, as LSO v2 specs request. */ 430 } else if (skb_vlan_tag_present(skb)) { 431 mlx5e_insert_vlan(start, skb, ihs); 432 ihs += VLAN_HLEN; 433 stats->added_vlan_packets++; 434 } else { 435 unsafe_memcpy(eseg->inline_hdr.start, skb->data, 436 attr->ihs, 437 MLX5_UNSAFE_MEMCPY_DISCLAIMER); 438 } 439 eseg->inline_hdr.sz |= cpu_to_be16(ihs); 440 dseg += wqe_attr->ds_cnt_inl; 441 } else if (skb_vlan_tag_present(skb)) { 442 eseg->insert.type = cpu_to_be16(MLX5_ETH_WQE_INSERT_VLAN); 443 if (skb->vlan_proto == cpu_to_be16(ETH_P_8021AD)) 444 eseg->insert.type |= cpu_to_be16(MLX5_ETH_WQE_SVLAN); 445 eseg->insert.vlan_tci = cpu_to_be16(skb_vlan_tag_get(skb)); 446 stats->added_vlan_packets++; 447 } 448 449 dseg += wqe_attr->ds_cnt_ids; 450 num_dma = mlx5e_txwqe_build_dsegs(sq, skb, skb->data + attr->ihs + attr->hopbyhop, 451 attr->headlen, dseg); 452 if (unlikely(num_dma < 0)) 453 goto err_drop; 454 455 mlx5e_txwqe_complete(sq, skb, attr, wqe_attr, num_dma, wi, cseg, xmit_more); 456 457 return; 458 459err_drop: 460 stats->dropped++; 461 dev_kfree_skb_any(skb); 462} 463 464static bool mlx5e_tx_skb_supports_mpwqe(struct sk_buff *skb, struct mlx5e_tx_attr *attr) 465{ 466 return !skb_is_nonlinear(skb) && !skb_vlan_tag_present(skb) && !attr->ihs && 467 !attr->insz; 468} 469 470static bool mlx5e_tx_mpwqe_same_eseg(struct mlx5e_txqsq *sq, struct mlx5_wqe_eth_seg *eseg) 471{ 472 struct mlx5e_tx_mpwqe *session = &sq->mpwqe; 473 474 /* Assumes the session is already running and has at least one packet. */ 475 return !memcmp(&session->wqe->eth, eseg, MLX5E_ACCEL_ESEG_LEN); 476} 477 478static void mlx5e_tx_mpwqe_session_start(struct mlx5e_txqsq *sq, 479 struct mlx5_wqe_eth_seg *eseg) 480{ 481 struct mlx5e_tx_mpwqe *session = &sq->mpwqe; 482 struct mlx5e_tx_wqe *wqe; 483 u16 pi; 484 485 pi = mlx5e_txqsq_get_next_pi(sq, sq->max_sq_mpw_wqebbs); 486 wqe = MLX5E_TX_FETCH_WQE(sq, pi); 487 net_prefetchw(wqe->data); 488 489 *session = (struct mlx5e_tx_mpwqe) { 490 .wqe = wqe, 491 .bytes_count = 0, 492 .ds_count = MLX5E_TX_WQE_EMPTY_DS_COUNT, 493 .pkt_count = 0, 494 .inline_on = 0, 495 }; 496 497 memcpy(&session->wqe->eth, eseg, MLX5E_ACCEL_ESEG_LEN); 498 499 sq->stats->mpwqe_blks++; 500} 501 502static bool mlx5e_tx_mpwqe_session_is_active(struct mlx5e_txqsq *sq) 503{ 504 return sq->mpwqe.wqe; 505} 506 507static void mlx5e_tx_mpwqe_add_dseg(struct mlx5e_txqsq *sq, struct mlx5e_xmit_data *txd) 508{ 509 struct mlx5e_tx_mpwqe *session = &sq->mpwqe; 510 struct mlx5_wqe_data_seg *dseg; 511 512 dseg = (struct mlx5_wqe_data_seg *)session->wqe + session->ds_count; 513 514 session->pkt_count++; 515 session->bytes_count += txd->len; 516 517 dseg->addr = cpu_to_be64(txd->dma_addr); 518 dseg->byte_count = cpu_to_be32(txd->len); 519 dseg->lkey = sq->mkey_be; 520 session->ds_count++; 521 522 sq->stats->mpwqe_pkts++; 523} 524 525static struct mlx5_wqe_ctrl_seg *mlx5e_tx_mpwqe_session_complete(struct mlx5e_txqsq *sq) 526{ 527 struct mlx5e_tx_mpwqe *session = &sq->mpwqe; 528 u8 ds_count = session->ds_count; 529 struct mlx5_wqe_ctrl_seg *cseg; 530 struct mlx5e_tx_wqe_info *wi; 531 u16 pi; 532 533 cseg = &session->wqe->ctrl; 534 cseg->opmod_idx_opcode = cpu_to_be32((sq->pc << 8) | MLX5_OPCODE_ENHANCED_MPSW); 535 cseg->qpn_ds = cpu_to_be32((sq->sqn << 8) | ds_count); 536 537 pi = mlx5_wq_cyc_ctr2ix(&sq->wq, sq->pc); 538 wi = &sq->db.wqe_info[pi]; 539 *wi = (struct mlx5e_tx_wqe_info) { 540 .skb = NULL, 541 .num_bytes = session->bytes_count, 542 .num_wqebbs = DIV_ROUND_UP(ds_count, MLX5_SEND_WQEBB_NUM_DS), 543 .num_dma = session->pkt_count, 544 .num_fifo_pkts = session->pkt_count, 545 }; 546 547 sq->pc += wi->num_wqebbs; 548 549 session->wqe = NULL; 550 551 mlx5e_tx_check_stop(sq); 552 553 return cseg; 554} 555 556static void 557mlx5e_sq_xmit_mpwqe(struct mlx5e_txqsq *sq, struct sk_buff *skb, 558 struct mlx5_wqe_eth_seg *eseg, bool xmit_more) 559{ 560 struct mlx5_wqe_ctrl_seg *cseg; 561 struct mlx5e_xmit_data txd; 562 563 if (!mlx5e_tx_mpwqe_session_is_active(sq)) { 564 mlx5e_tx_mpwqe_session_start(sq, eseg); 565 } else if (!mlx5e_tx_mpwqe_same_eseg(sq, eseg)) { 566 mlx5e_tx_mpwqe_session_complete(sq); 567 mlx5e_tx_mpwqe_session_start(sq, eseg); 568 } 569 570 sq->stats->xmit_more += xmit_more; 571 572 txd.data = skb->data; 573 txd.len = skb->len; 574 575 txd.dma_addr = dma_map_single(sq->pdev, txd.data, txd.len, DMA_TO_DEVICE); 576 if (unlikely(dma_mapping_error(sq->pdev, txd.dma_addr))) 577 goto err_unmap; 578 mlx5e_dma_push(sq, txd.dma_addr, txd.len, MLX5E_DMA_MAP_SINGLE); 579 580 mlx5e_skb_fifo_push(&sq->db.skb_fifo, skb); 581 582 mlx5e_tx_mpwqe_add_dseg(sq, &txd); 583 584 mlx5e_tx_skb_update_hwts_flags(skb); 585 586 if (unlikely(mlx5e_tx_mpwqe_is_full(&sq->mpwqe, sq->max_sq_mpw_wqebbs))) { 587 /* Might stop the queue and affect the retval of __netdev_tx_sent_queue. */ 588 cseg = mlx5e_tx_mpwqe_session_complete(sq); 589 590 if (__netdev_tx_sent_queue(sq->txq, txd.len, xmit_more)) 591 mlx5e_notify_hw(&sq->wq, sq->pc, sq->uar_map, cseg); 592 } else if (__netdev_tx_sent_queue(sq->txq, txd.len, xmit_more)) { 593 /* Might stop the queue, but we were asked to ring the doorbell anyway. */ 594 cseg = mlx5e_tx_mpwqe_session_complete(sq); 595 596 mlx5e_notify_hw(&sq->wq, sq->pc, sq->uar_map, cseg); 597 } 598 599 return; 600 601err_unmap: 602 mlx5e_dma_unmap_wqe_err(sq, 1); 603 sq->stats->dropped++; 604 dev_kfree_skb_any(skb); 605} 606 607void mlx5e_tx_mpwqe_ensure_complete(struct mlx5e_txqsq *sq) 608{ 609 /* Unlikely in non-MPWQE workloads; not important in MPWQE workloads. */ 610 if (unlikely(mlx5e_tx_mpwqe_session_is_active(sq))) 611 mlx5e_tx_mpwqe_session_complete(sq); 612} 613 614static void mlx5e_txwqe_build_eseg(struct mlx5e_priv *priv, struct mlx5e_txqsq *sq, 615 struct sk_buff *skb, struct mlx5e_accel_tx_state *accel, 616 struct mlx5_wqe_eth_seg *eseg, u16 ihs) 617{ 618 mlx5e_accel_tx_eseg(priv, skb, eseg, ihs); 619 mlx5e_txwqe_build_eseg_csum(sq, skb, accel, eseg); 620} 621 622netdev_tx_t mlx5e_xmit(struct sk_buff *skb, struct net_device *dev) 623{ 624 struct mlx5e_priv *priv = netdev_priv(dev); 625 struct mlx5e_accel_tx_state accel = {}; 626 struct mlx5e_tx_wqe_attr wqe_attr; 627 struct mlx5e_tx_attr attr; 628 struct mlx5e_tx_wqe *wqe; 629 struct mlx5e_txqsq *sq; 630 u16 pi; 631 632 /* All changes to txq2sq are performed in sync with mlx5e_xmit, when the 633 * queue being changed is disabled, and smp_wmb guarantees that the 634 * changes are visible before mlx5e_xmit tries to read from txq2sq. It 635 * guarantees that the value of txq2sq[qid] doesn't change while 636 * mlx5e_xmit is running on queue number qid. smb_wmb is paired with 637 * HARD_TX_LOCK around ndo_start_xmit, which serves as an ACQUIRE. 638 */ 639 sq = priv->txq2sq[skb_get_queue_mapping(skb)]; 640 if (unlikely(!sq)) { 641 /* Two cases when sq can be NULL: 642 * 1. The HTB node is registered, and mlx5e_select_queue 643 * selected its queue ID, but the SQ itself is not yet created. 644 * 2. HTB SQ creation failed. Similar to the previous case, but 645 * the SQ won't be created. 646 */ 647 dev_kfree_skb_any(skb); 648 return NETDEV_TX_OK; 649 } 650 651 /* May send SKBs and WQEs. */ 652 if (unlikely(!mlx5e_accel_tx_begin(dev, sq, skb, &accel))) 653 return NETDEV_TX_OK; 654 655 mlx5e_sq_xmit_prepare(sq, skb, &accel, &attr); 656 657 if (test_bit(MLX5E_SQ_STATE_MPWQE, &sq->state)) { 658 if (mlx5e_tx_skb_supports_mpwqe(skb, &attr)) { 659 struct mlx5_wqe_eth_seg eseg = {}; 660 661 mlx5e_txwqe_build_eseg(priv, sq, skb, &accel, &eseg, attr.ihs); 662 mlx5e_sq_xmit_mpwqe(sq, skb, &eseg, netdev_xmit_more()); 663 return NETDEV_TX_OK; 664 } 665 666 mlx5e_tx_mpwqe_ensure_complete(sq); 667 } 668 669 mlx5e_sq_calc_wqe_attr(skb, &attr, &wqe_attr); 670 pi = mlx5e_txqsq_get_next_pi(sq, wqe_attr.num_wqebbs); 671 wqe = MLX5E_TX_FETCH_WQE(sq, pi); 672 673 /* May update the WQE, but may not post other WQEs. */ 674 mlx5e_accel_tx_finish(sq, wqe, &accel, 675 (struct mlx5_wqe_inline_seg *)(wqe->data + wqe_attr.ds_cnt_inl)); 676 mlx5e_txwqe_build_eseg(priv, sq, skb, &accel, &wqe->eth, attr.ihs); 677 mlx5e_sq_xmit_wqe(sq, skb, &attr, &wqe_attr, wqe, pi, netdev_xmit_more()); 678 679 return NETDEV_TX_OK; 680} 681 682void mlx5e_sq_xmit_simple(struct mlx5e_txqsq *sq, struct sk_buff *skb, bool xmit_more) 683{ 684 struct mlx5e_tx_wqe_attr wqe_attr; 685 struct mlx5e_tx_attr attr; 686 struct mlx5e_tx_wqe *wqe; 687 u16 pi; 688 689 mlx5e_sq_xmit_prepare(sq, skb, NULL, &attr); 690 mlx5e_sq_calc_wqe_attr(skb, &attr, &wqe_attr); 691 pi = mlx5e_txqsq_get_next_pi(sq, wqe_attr.num_wqebbs); 692 wqe = MLX5E_TX_FETCH_WQE(sq, pi); 693 mlx5e_txwqe_build_eseg_csum(sq, skb, NULL, &wqe->eth); 694 mlx5e_sq_xmit_wqe(sq, skb, &attr, &wqe_attr, wqe, pi, xmit_more); 695} 696 697static void mlx5e_tx_wi_dma_unmap(struct mlx5e_txqsq *sq, struct mlx5e_tx_wqe_info *wi, 698 u32 *dma_fifo_cc) 699{ 700 int i; 701 702 for (i = 0; i < wi->num_dma; i++) { 703 struct mlx5e_sq_dma *dma = mlx5e_dma_get(sq, (*dma_fifo_cc)++); 704 705 mlx5e_tx_dma_unmap(sq->pdev, dma); 706 } 707} 708 709static void mlx5e_consume_skb(struct mlx5e_txqsq *sq, struct sk_buff *skb, 710 struct mlx5_cqe64 *cqe, int napi_budget) 711{ 712 if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)) { 713 struct skb_shared_hwtstamps hwts = {}; 714 u64 ts = get_cqe_ts(cqe); 715 716 hwts.hwtstamp = mlx5e_cqe_ts_to_ns(sq->ptp_cyc2time, sq->clock, ts); 717 if (sq->ptpsq) 718 mlx5e_skb_cb_hwtstamp_handler(skb, MLX5E_SKB_CB_CQE_HWTSTAMP, 719 hwts.hwtstamp, sq->ptpsq->cq_stats); 720 else 721 skb_tstamp_tx(skb, &hwts); 722 } 723 724 napi_consume_skb(skb, napi_budget); 725} 726 727static void mlx5e_tx_wi_consume_fifo_skbs(struct mlx5e_txqsq *sq, struct mlx5e_tx_wqe_info *wi, 728 struct mlx5_cqe64 *cqe, int napi_budget) 729{ 730 int i; 731 732 for (i = 0; i < wi->num_fifo_pkts; i++) { 733 struct sk_buff *skb = mlx5e_skb_fifo_pop(&sq->db.skb_fifo); 734 735 mlx5e_consume_skb(sq, skb, cqe, napi_budget); 736 } 737} 738 739bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget) 740{ 741 struct mlx5e_sq_stats *stats; 742 struct mlx5e_txqsq *sq; 743 struct mlx5_cqe64 *cqe; 744 u32 dma_fifo_cc; 745 u32 nbytes; 746 u16 npkts; 747 u16 sqcc; 748 int i; 749 750 sq = container_of(cq, struct mlx5e_txqsq, cq); 751 752 if (unlikely(!test_bit(MLX5E_SQ_STATE_ENABLED, &sq->state))) 753 return false; 754 755 cqe = mlx5_cqwq_get_cqe(&cq->wq); 756 if (!cqe) 757 return false; 758 759 stats = sq->stats; 760 761 npkts = 0; 762 nbytes = 0; 763 764 /* sq->cc must be updated only after mlx5_cqwq_update_db_record(), 765 * otherwise a cq overrun may occur 766 */ 767 sqcc = sq->cc; 768 769 /* avoid dirtying sq cache line every cqe */ 770 dma_fifo_cc = sq->dma_fifo_cc; 771 772 i = 0; 773 do { 774 struct mlx5e_tx_wqe_info *wi; 775 u16 wqe_counter; 776 bool last_wqe; 777 u16 ci; 778 779 mlx5_cqwq_pop(&cq->wq); 780 781 wqe_counter = be16_to_cpu(cqe->wqe_counter); 782 783 do { 784 last_wqe = (sqcc == wqe_counter); 785 786 ci = mlx5_wq_cyc_ctr2ix(&sq->wq, sqcc); 787 wi = &sq->db.wqe_info[ci]; 788 789 sqcc += wi->num_wqebbs; 790 791 if (likely(wi->skb)) { 792 mlx5e_tx_wi_dma_unmap(sq, wi, &dma_fifo_cc); 793 mlx5e_consume_skb(sq, wi->skb, cqe, napi_budget); 794 795 npkts++; 796 nbytes += wi->num_bytes; 797 continue; 798 } 799 800 if (unlikely(mlx5e_ktls_tx_try_handle_resync_dump_comp(sq, wi, 801 &dma_fifo_cc))) 802 continue; 803 804 if (wi->num_fifo_pkts) { 805 mlx5e_tx_wi_dma_unmap(sq, wi, &dma_fifo_cc); 806 mlx5e_tx_wi_consume_fifo_skbs(sq, wi, cqe, napi_budget); 807 808 npkts += wi->num_fifo_pkts; 809 nbytes += wi->num_bytes; 810 } 811 } while (!last_wqe); 812 813 if (unlikely(get_cqe_opcode(cqe) == MLX5_CQE_REQ_ERR)) { 814 if (!test_and_set_bit(MLX5E_SQ_STATE_RECOVERING, 815 &sq->state)) { 816 mlx5e_dump_error_cqe(&sq->cq, sq->sqn, 817 (struct mlx5_err_cqe *)cqe); 818 mlx5_wq_cyc_wqe_dump(&sq->wq, ci, wi->num_wqebbs); 819 queue_work(cq->priv->wq, &sq->recover_work); 820 } 821 stats->cqe_err++; 822 } 823 824 } while ((++i < MLX5E_TX_CQ_POLL_BUDGET) && (cqe = mlx5_cqwq_get_cqe(&cq->wq))); 825 826 stats->cqes += i; 827 828 mlx5_cqwq_update_db_record(&cq->wq); 829 830 /* ensure cq space is freed before enabling more cqes */ 831 wmb(); 832 833 sq->dma_fifo_cc = dma_fifo_cc; 834 sq->cc = sqcc; 835 836 netdev_tx_completed_queue(sq->txq, npkts, nbytes); 837 838 if (netif_tx_queue_stopped(sq->txq) && 839 mlx5e_wqc_has_room_for(&sq->wq, sq->cc, sq->pc, sq->stop_room) && 840 !test_bit(MLX5E_SQ_STATE_RECOVERING, &sq->state)) { 841 netif_tx_wake_queue(sq->txq); 842 stats->wake++; 843 } 844 845 return (i == MLX5E_TX_CQ_POLL_BUDGET); 846} 847 848static void mlx5e_tx_wi_kfree_fifo_skbs(struct mlx5e_txqsq *sq, struct mlx5e_tx_wqe_info *wi) 849{ 850 int i; 851 852 for (i = 0; i < wi->num_fifo_pkts; i++) 853 dev_kfree_skb_any(mlx5e_skb_fifo_pop(&sq->db.skb_fifo)); 854} 855 856void mlx5e_free_txqsq_descs(struct mlx5e_txqsq *sq) 857{ 858 struct mlx5e_tx_wqe_info *wi; 859 u32 dma_fifo_cc, nbytes = 0; 860 u16 ci, sqcc, npkts = 0; 861 862 sqcc = sq->cc; 863 dma_fifo_cc = sq->dma_fifo_cc; 864 865 while (sqcc != sq->pc) { 866 ci = mlx5_wq_cyc_ctr2ix(&sq->wq, sqcc); 867 wi = &sq->db.wqe_info[ci]; 868 869 sqcc += wi->num_wqebbs; 870 871 if (likely(wi->skb)) { 872 mlx5e_tx_wi_dma_unmap(sq, wi, &dma_fifo_cc); 873 dev_kfree_skb_any(wi->skb); 874 875 npkts++; 876 nbytes += wi->num_bytes; 877 continue; 878 } 879 880 if (unlikely(mlx5e_ktls_tx_try_handle_resync_dump_comp(sq, wi, &dma_fifo_cc))) 881 continue; 882 883 if (wi->num_fifo_pkts) { 884 mlx5e_tx_wi_dma_unmap(sq, wi, &dma_fifo_cc); 885 mlx5e_tx_wi_kfree_fifo_skbs(sq, wi); 886 887 npkts += wi->num_fifo_pkts; 888 nbytes += wi->num_bytes; 889 } 890 } 891 892 sq->dma_fifo_cc = dma_fifo_cc; 893 sq->cc = sqcc; 894 895 netdev_tx_completed_queue(sq->txq, npkts, nbytes); 896} 897 898#ifdef CONFIG_MLX5_CORE_IPOIB 899static inline void 900mlx5i_txwqe_build_datagram(struct mlx5_av *av, u32 dqpn, u32 dqkey, 901 struct mlx5_wqe_datagram_seg *dseg) 902{ 903 memcpy(&dseg->av, av, sizeof(struct mlx5_av)); 904 dseg->av.dqp_dct = cpu_to_be32(dqpn | MLX5_EXTENDED_UD_AV); 905 dseg->av.key.qkey.qkey = cpu_to_be32(dqkey); 906} 907 908static void mlx5i_sq_calc_wqe_attr(struct sk_buff *skb, 909 const struct mlx5e_tx_attr *attr, 910 struct mlx5e_tx_wqe_attr *wqe_attr) 911{ 912 u16 ds_cnt = sizeof(struct mlx5i_tx_wqe) / MLX5_SEND_WQE_DS; 913 u16 ds_cnt_inl = 0; 914 915 ds_cnt += !!attr->headlen + skb_shinfo(skb)->nr_frags; 916 917 if (attr->ihs) { 918 u16 inl = attr->ihs - INL_HDR_START_SZ; 919 920 ds_cnt_inl = DIV_ROUND_UP(inl, MLX5_SEND_WQE_DS); 921 ds_cnt += ds_cnt_inl; 922 } 923 924 *wqe_attr = (struct mlx5e_tx_wqe_attr) { 925 .ds_cnt = ds_cnt, 926 .ds_cnt_inl = ds_cnt_inl, 927 .num_wqebbs = DIV_ROUND_UP(ds_cnt, MLX5_SEND_WQEBB_NUM_DS), 928 }; 929} 930 931void mlx5i_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb, 932 struct mlx5_av *av, u32 dqpn, u32 dqkey, bool xmit_more) 933{ 934 struct mlx5e_tx_wqe_attr wqe_attr; 935 struct mlx5e_tx_attr attr; 936 struct mlx5i_tx_wqe *wqe; 937 938 struct mlx5_wqe_datagram_seg *datagram; 939 struct mlx5_wqe_ctrl_seg *cseg; 940 struct mlx5_wqe_eth_seg *eseg; 941 struct mlx5_wqe_data_seg *dseg; 942 struct mlx5e_tx_wqe_info *wi; 943 944 struct mlx5e_sq_stats *stats = sq->stats; 945 int num_dma; 946 u16 pi; 947 948 mlx5e_sq_xmit_prepare(sq, skb, NULL, &attr); 949 mlx5i_sq_calc_wqe_attr(skb, &attr, &wqe_attr); 950 951 pi = mlx5e_txqsq_get_next_pi(sq, wqe_attr.num_wqebbs); 952 wqe = MLX5I_SQ_FETCH_WQE(sq, pi); 953 954 stats->xmit_more += xmit_more; 955 956 /* fill wqe */ 957 wi = &sq->db.wqe_info[pi]; 958 cseg = &wqe->ctrl; 959 datagram = &wqe->datagram; 960 eseg = &wqe->eth; 961 dseg = wqe->data; 962 963 mlx5i_txwqe_build_datagram(av, dqpn, dqkey, datagram); 964 965 mlx5e_txwqe_build_eseg_csum(sq, skb, NULL, eseg); 966 967 eseg->mss = attr.mss; 968 969 if (attr.ihs) { 970 if (unlikely(attr.hopbyhop)) { 971 struct ipv6hdr *h6; 972 973 /* remove the HBH header. 974 * Layout: [Ethernet header][IPv6 header][HBH][TCP header] 975 */ 976 unsafe_memcpy(eseg->inline_hdr.start, skb->data, 977 ETH_HLEN + sizeof(*h6), 978 MLX5_UNSAFE_MEMCPY_DISCLAIMER); 979 h6 = (struct ipv6hdr *)((char *)eseg->inline_hdr.start + ETH_HLEN); 980 h6->nexthdr = IPPROTO_TCP; 981 /* Copy the TCP header after the IPv6 one */ 982 unsafe_memcpy(h6 + 1, 983 skb->data + ETH_HLEN + sizeof(*h6) + 984 sizeof(struct hop_jumbo_hdr), 985 tcp_hdrlen(skb), 986 MLX5_UNSAFE_MEMCPY_DISCLAIMER); 987 /* Leave ipv6 payload_len set to 0, as LSO v2 specs request. */ 988 } else { 989 unsafe_memcpy(eseg->inline_hdr.start, skb->data, 990 attr.ihs, 991 MLX5_UNSAFE_MEMCPY_DISCLAIMER); 992 } 993 eseg->inline_hdr.sz = cpu_to_be16(attr.ihs); 994 dseg += wqe_attr.ds_cnt_inl; 995 } 996 997 num_dma = mlx5e_txwqe_build_dsegs(sq, skb, skb->data + attr.ihs + attr.hopbyhop, 998 attr.headlen, dseg); 999 if (unlikely(num_dma < 0)) 1000 goto err_drop; 1001 1002 mlx5e_txwqe_complete(sq, skb, &attr, &wqe_attr, num_dma, wi, cseg, xmit_more); 1003 1004 return; 1005 1006err_drop: 1007 stats->dropped++; 1008 dev_kfree_skb_any(skb); 1009} 1010#endif