vnic_main.c (16335B)
1// SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause 2/* 3 * Copyright(c) 2017 - 2020 Intel Corporation. 4 */ 5 6/* 7 * This file contains HFI1 support for VNIC functionality 8 */ 9 10#include <linux/io.h> 11#include <linux/if_vlan.h> 12 13#include "vnic.h" 14#include "netdev.h" 15 16#define HFI_TX_TIMEOUT_MS 1000 17 18#define HFI1_VNIC_RCV_Q_SIZE 1024 19 20#define HFI1_VNIC_UP 0 21 22static DEFINE_SPINLOCK(vport_cntr_lock); 23 24#define SUM_GRP_COUNTERS(stats, qstats, x_grp) do { \ 25 u64 *src64, *dst64; \ 26 for (src64 = &qstats->x_grp.unicast, \ 27 dst64 = &stats->x_grp.unicast; \ 28 dst64 <= &stats->x_grp.s_1519_max;) { \ 29 *dst64++ += *src64++; \ 30 } \ 31 } while (0) 32 33#define VNIC_MASK (0xFF) 34#define VNIC_ID(val) ((1ull << 24) | ((val) & VNIC_MASK)) 35 36/* hfi1_vnic_update_stats - update statistics */ 37static void hfi1_vnic_update_stats(struct hfi1_vnic_vport_info *vinfo, 38 struct opa_vnic_stats *stats) 39{ 40 struct net_device *netdev = vinfo->netdev; 41 u8 i; 42 43 /* add tx counters on different queues */ 44 for (i = 0; i < vinfo->num_tx_q; i++) { 45 struct opa_vnic_stats *qstats = &vinfo->stats[i]; 46 struct rtnl_link_stats64 *qnstats = &vinfo->stats[i].netstats; 47 48 stats->netstats.tx_fifo_errors += qnstats->tx_fifo_errors; 49 stats->netstats.tx_carrier_errors += qnstats->tx_carrier_errors; 50 stats->tx_drop_state += qstats->tx_drop_state; 51 stats->tx_dlid_zero += qstats->tx_dlid_zero; 52 53 SUM_GRP_COUNTERS(stats, qstats, tx_grp); 54 stats->netstats.tx_packets += qnstats->tx_packets; 55 stats->netstats.tx_bytes += qnstats->tx_bytes; 56 } 57 58 /* add rx counters on different queues */ 59 for (i = 0; i < vinfo->num_rx_q; i++) { 60 struct opa_vnic_stats *qstats = &vinfo->stats[i]; 61 struct rtnl_link_stats64 *qnstats = &vinfo->stats[i].netstats; 62 63 stats->netstats.rx_fifo_errors += qnstats->rx_fifo_errors; 64 stats->netstats.rx_nohandler += qnstats->rx_nohandler; 65 stats->rx_drop_state += qstats->rx_drop_state; 66 stats->rx_oversize += qstats->rx_oversize; 67 stats->rx_runt += qstats->rx_runt; 68 69 SUM_GRP_COUNTERS(stats, qstats, rx_grp); 70 stats->netstats.rx_packets += qnstats->rx_packets; 71 stats->netstats.rx_bytes += qnstats->rx_bytes; 72 } 73 74 stats->netstats.tx_errors = stats->netstats.tx_fifo_errors + 75 stats->netstats.tx_carrier_errors + 76 stats->tx_drop_state + stats->tx_dlid_zero; 77 stats->netstats.tx_dropped = stats->netstats.tx_errors; 78 79 stats->netstats.rx_errors = stats->netstats.rx_fifo_errors + 80 stats->netstats.rx_nohandler + 81 stats->rx_drop_state + stats->rx_oversize + 82 stats->rx_runt; 83 stats->netstats.rx_dropped = stats->netstats.rx_errors; 84 85 netdev->stats.tx_packets = stats->netstats.tx_packets; 86 netdev->stats.tx_bytes = stats->netstats.tx_bytes; 87 netdev->stats.tx_fifo_errors = stats->netstats.tx_fifo_errors; 88 netdev->stats.tx_carrier_errors = stats->netstats.tx_carrier_errors; 89 netdev->stats.tx_errors = stats->netstats.tx_errors; 90 netdev->stats.tx_dropped = stats->netstats.tx_dropped; 91 92 netdev->stats.rx_packets = stats->netstats.rx_packets; 93 netdev->stats.rx_bytes = stats->netstats.rx_bytes; 94 netdev->stats.rx_fifo_errors = stats->netstats.rx_fifo_errors; 95 netdev->stats.multicast = stats->rx_grp.mcastbcast; 96 netdev->stats.rx_length_errors = stats->rx_oversize + stats->rx_runt; 97 netdev->stats.rx_errors = stats->netstats.rx_errors; 98 netdev->stats.rx_dropped = stats->netstats.rx_dropped; 99} 100 101/* update_len_counters - update pkt's len histogram counters */ 102static inline void update_len_counters(struct opa_vnic_grp_stats *grp, 103 int len) 104{ 105 /* account for 4 byte FCS */ 106 if (len >= 1515) 107 grp->s_1519_max++; 108 else if (len >= 1020) 109 grp->s_1024_1518++; 110 else if (len >= 508) 111 grp->s_512_1023++; 112 else if (len >= 252) 113 grp->s_256_511++; 114 else if (len >= 124) 115 grp->s_128_255++; 116 else if (len >= 61) 117 grp->s_65_127++; 118 else 119 grp->s_64++; 120} 121 122/* hfi1_vnic_update_tx_counters - update transmit counters */ 123static void hfi1_vnic_update_tx_counters(struct hfi1_vnic_vport_info *vinfo, 124 u8 q_idx, struct sk_buff *skb, int err) 125{ 126 struct ethhdr *mac_hdr = (struct ethhdr *)skb_mac_header(skb); 127 struct opa_vnic_stats *stats = &vinfo->stats[q_idx]; 128 struct opa_vnic_grp_stats *tx_grp = &stats->tx_grp; 129 u16 vlan_tci; 130 131 stats->netstats.tx_packets++; 132 stats->netstats.tx_bytes += skb->len + ETH_FCS_LEN; 133 134 update_len_counters(tx_grp, skb->len); 135 136 /* rest of the counts are for good packets only */ 137 if (unlikely(err)) 138 return; 139 140 if (is_multicast_ether_addr(mac_hdr->h_dest)) 141 tx_grp->mcastbcast++; 142 else 143 tx_grp->unicast++; 144 145 if (!__vlan_get_tag(skb, &vlan_tci)) 146 tx_grp->vlan++; 147 else 148 tx_grp->untagged++; 149} 150 151/* hfi1_vnic_update_rx_counters - update receive counters */ 152static void hfi1_vnic_update_rx_counters(struct hfi1_vnic_vport_info *vinfo, 153 u8 q_idx, struct sk_buff *skb, int err) 154{ 155 struct ethhdr *mac_hdr = (struct ethhdr *)skb->data; 156 struct opa_vnic_stats *stats = &vinfo->stats[q_idx]; 157 struct opa_vnic_grp_stats *rx_grp = &stats->rx_grp; 158 u16 vlan_tci; 159 160 stats->netstats.rx_packets++; 161 stats->netstats.rx_bytes += skb->len + ETH_FCS_LEN; 162 163 update_len_counters(rx_grp, skb->len); 164 165 /* rest of the counts are for good packets only */ 166 if (unlikely(err)) 167 return; 168 169 if (is_multicast_ether_addr(mac_hdr->h_dest)) 170 rx_grp->mcastbcast++; 171 else 172 rx_grp->unicast++; 173 174 if (!__vlan_get_tag(skb, &vlan_tci)) 175 rx_grp->vlan++; 176 else 177 rx_grp->untagged++; 178} 179 180/* This function is overloaded for opa_vnic specific implementation */ 181static void hfi1_vnic_get_stats64(struct net_device *netdev, 182 struct rtnl_link_stats64 *stats) 183{ 184 struct opa_vnic_stats *vstats = (struct opa_vnic_stats *)stats; 185 struct hfi1_vnic_vport_info *vinfo = opa_vnic_dev_priv(netdev); 186 187 hfi1_vnic_update_stats(vinfo, vstats); 188} 189 190static u64 create_bypass_pbc(u32 vl, u32 dw_len) 191{ 192 u64 pbc; 193 194 pbc = ((u64)PBC_IHCRC_NONE << PBC_INSERT_HCRC_SHIFT) 195 | PBC_INSERT_BYPASS_ICRC | PBC_CREDIT_RETURN 196 | PBC_PACKET_BYPASS 197 | ((vl & PBC_VL_MASK) << PBC_VL_SHIFT) 198 | (dw_len & PBC_LENGTH_DWS_MASK) << PBC_LENGTH_DWS_SHIFT; 199 200 return pbc; 201} 202 203/* hfi1_vnic_maybe_stop_tx - stop tx queue if required */ 204static void hfi1_vnic_maybe_stop_tx(struct hfi1_vnic_vport_info *vinfo, 205 u8 q_idx) 206{ 207 netif_stop_subqueue(vinfo->netdev, q_idx); 208 if (!hfi1_vnic_sdma_write_avail(vinfo, q_idx)) 209 return; 210 211 netif_start_subqueue(vinfo->netdev, q_idx); 212} 213 214static netdev_tx_t hfi1_netdev_start_xmit(struct sk_buff *skb, 215 struct net_device *netdev) 216{ 217 struct hfi1_vnic_vport_info *vinfo = opa_vnic_dev_priv(netdev); 218 u8 pad_len, q_idx = skb->queue_mapping; 219 struct hfi1_devdata *dd = vinfo->dd; 220 struct opa_vnic_skb_mdata *mdata; 221 u32 pkt_len, total_len; 222 int err = -EINVAL; 223 u64 pbc; 224 225 v_dbg("xmit: queue %d skb len %d\n", q_idx, skb->len); 226 if (unlikely(!netif_oper_up(netdev))) { 227 vinfo->stats[q_idx].tx_drop_state++; 228 goto tx_finish; 229 } 230 231 /* take out meta data */ 232 mdata = (struct opa_vnic_skb_mdata *)skb->data; 233 skb_pull(skb, sizeof(*mdata)); 234 if (unlikely(mdata->flags & OPA_VNIC_SKB_MDATA_ENCAP_ERR)) { 235 vinfo->stats[q_idx].tx_dlid_zero++; 236 goto tx_finish; 237 } 238 239 /* add tail padding (for 8 bytes size alignment) and icrc */ 240 pad_len = -(skb->len + OPA_VNIC_ICRC_TAIL_LEN) & 0x7; 241 pad_len += OPA_VNIC_ICRC_TAIL_LEN; 242 243 /* 244 * pkt_len is how much data we have to write, includes header and data. 245 * total_len is length of the packet in Dwords plus the PBC should not 246 * include the CRC. 247 */ 248 pkt_len = (skb->len + pad_len) >> 2; 249 total_len = pkt_len + 2; /* PBC + packet */ 250 251 pbc = create_bypass_pbc(mdata->vl, total_len); 252 253 skb_get(skb); 254 v_dbg("pbc 0x%016llX len %d pad_len %d\n", pbc, skb->len, pad_len); 255 err = dd->process_vnic_dma_send(dd, q_idx, vinfo, skb, pbc, pad_len); 256 if (unlikely(err)) { 257 if (err == -ENOMEM) 258 vinfo->stats[q_idx].netstats.tx_fifo_errors++; 259 else if (err != -EBUSY) 260 vinfo->stats[q_idx].netstats.tx_carrier_errors++; 261 } 262 /* remove the header before updating tx counters */ 263 skb_pull(skb, OPA_VNIC_HDR_LEN); 264 265 if (unlikely(err == -EBUSY)) { 266 hfi1_vnic_maybe_stop_tx(vinfo, q_idx); 267 dev_kfree_skb_any(skb); 268 return NETDEV_TX_BUSY; 269 } 270 271tx_finish: 272 /* update tx counters */ 273 hfi1_vnic_update_tx_counters(vinfo, q_idx, skb, err); 274 dev_kfree_skb_any(skb); 275 return NETDEV_TX_OK; 276} 277 278static u16 hfi1_vnic_select_queue(struct net_device *netdev, 279 struct sk_buff *skb, 280 struct net_device *sb_dev) 281{ 282 struct hfi1_vnic_vport_info *vinfo = opa_vnic_dev_priv(netdev); 283 struct opa_vnic_skb_mdata *mdata; 284 struct sdma_engine *sde; 285 286 mdata = (struct opa_vnic_skb_mdata *)skb->data; 287 sde = sdma_select_engine_vl(vinfo->dd, mdata->entropy, mdata->vl); 288 return sde->this_idx; 289} 290 291/* hfi1_vnic_decap_skb - strip OPA header from the skb (ethernet) packet */ 292static inline int hfi1_vnic_decap_skb(struct hfi1_vnic_rx_queue *rxq, 293 struct sk_buff *skb) 294{ 295 struct hfi1_vnic_vport_info *vinfo = rxq->vinfo; 296 int max_len = vinfo->netdev->mtu + VLAN_ETH_HLEN; 297 int rc = -EFAULT; 298 299 skb_pull(skb, OPA_VNIC_HDR_LEN); 300 301 /* Validate Packet length */ 302 if (unlikely(skb->len > max_len)) 303 vinfo->stats[rxq->idx].rx_oversize++; 304 else if (unlikely(skb->len < ETH_ZLEN)) 305 vinfo->stats[rxq->idx].rx_runt++; 306 else 307 rc = 0; 308 return rc; 309} 310 311static struct hfi1_vnic_vport_info *get_vnic_port(struct hfi1_devdata *dd, 312 int vesw_id) 313{ 314 int vnic_id = VNIC_ID(vesw_id); 315 316 return hfi1_netdev_get_data(dd, vnic_id); 317} 318 319static struct hfi1_vnic_vport_info *get_first_vnic_port(struct hfi1_devdata *dd) 320{ 321 struct hfi1_vnic_vport_info *vinfo; 322 int next_id = VNIC_ID(0); 323 324 vinfo = hfi1_netdev_get_first_data(dd, &next_id); 325 326 if (next_id > VNIC_ID(VNIC_MASK)) 327 return NULL; 328 329 return vinfo; 330} 331 332void hfi1_vnic_bypass_rcv(struct hfi1_packet *packet) 333{ 334 struct hfi1_devdata *dd = packet->rcd->dd; 335 struct hfi1_vnic_vport_info *vinfo = NULL; 336 struct hfi1_vnic_rx_queue *rxq; 337 struct sk_buff *skb; 338 int l4_type, vesw_id = -1, rc; 339 u8 q_idx; 340 unsigned char *pad_info; 341 342 l4_type = hfi1_16B_get_l4(packet->ebuf); 343 if (likely(l4_type == OPA_16B_L4_ETHR)) { 344 vesw_id = HFI1_VNIC_GET_VESWID(packet->ebuf); 345 vinfo = get_vnic_port(dd, vesw_id); 346 347 /* 348 * In case of invalid vesw id, count the error on 349 * the first available vport. 350 */ 351 if (unlikely(!vinfo)) { 352 struct hfi1_vnic_vport_info *vinfo_tmp; 353 354 vinfo_tmp = get_first_vnic_port(dd); 355 if (vinfo_tmp) { 356 spin_lock(&vport_cntr_lock); 357 vinfo_tmp->stats[0].netstats.rx_nohandler++; 358 spin_unlock(&vport_cntr_lock); 359 } 360 } 361 } 362 363 if (unlikely(!vinfo)) { 364 dd_dev_warn(dd, "vnic rcv err: l4 %d vesw id %d ctx %d\n", 365 l4_type, vesw_id, packet->rcd->ctxt); 366 return; 367 } 368 369 q_idx = packet->rcd->vnic_q_idx; 370 rxq = &vinfo->rxq[q_idx]; 371 if (unlikely(!netif_oper_up(vinfo->netdev))) { 372 vinfo->stats[q_idx].rx_drop_state++; 373 return; 374 } 375 376 skb = netdev_alloc_skb(vinfo->netdev, packet->tlen); 377 if (unlikely(!skb)) { 378 vinfo->stats[q_idx].netstats.rx_fifo_errors++; 379 return; 380 } 381 382 memcpy(skb->data, packet->ebuf, packet->tlen); 383 skb_put(skb, packet->tlen); 384 385 pad_info = skb->data + skb->len - 1; 386 skb_trim(skb, (skb->len - OPA_VNIC_ICRC_TAIL_LEN - 387 ((*pad_info) & 0x7))); 388 389 rc = hfi1_vnic_decap_skb(rxq, skb); 390 391 /* update rx counters */ 392 hfi1_vnic_update_rx_counters(vinfo, rxq->idx, skb, rc); 393 if (unlikely(rc)) { 394 dev_kfree_skb_any(skb); 395 return; 396 } 397 398 skb_checksum_none_assert(skb); 399 skb->protocol = eth_type_trans(skb, rxq->netdev); 400 401 napi_gro_receive(&rxq->napi, skb); 402} 403 404static int hfi1_vnic_up(struct hfi1_vnic_vport_info *vinfo) 405{ 406 struct hfi1_devdata *dd = vinfo->dd; 407 struct net_device *netdev = vinfo->netdev; 408 int rc; 409 410 /* ensure virtual eth switch id is valid */ 411 if (!vinfo->vesw_id) 412 return -EINVAL; 413 414 rc = hfi1_netdev_add_data(dd, VNIC_ID(vinfo->vesw_id), vinfo); 415 if (rc < 0) 416 return rc; 417 418 rc = hfi1_netdev_rx_init(dd); 419 if (rc) 420 goto err_remove; 421 422 netif_carrier_on(netdev); 423 netif_tx_start_all_queues(netdev); 424 set_bit(HFI1_VNIC_UP, &vinfo->flags); 425 426 return 0; 427 428err_remove: 429 hfi1_netdev_remove_data(dd, VNIC_ID(vinfo->vesw_id)); 430 return rc; 431} 432 433static void hfi1_vnic_down(struct hfi1_vnic_vport_info *vinfo) 434{ 435 struct hfi1_devdata *dd = vinfo->dd; 436 437 clear_bit(HFI1_VNIC_UP, &vinfo->flags); 438 netif_carrier_off(vinfo->netdev); 439 netif_tx_disable(vinfo->netdev); 440 hfi1_netdev_remove_data(dd, VNIC_ID(vinfo->vesw_id)); 441 442 hfi1_netdev_rx_destroy(dd); 443} 444 445static int hfi1_netdev_open(struct net_device *netdev) 446{ 447 struct hfi1_vnic_vport_info *vinfo = opa_vnic_dev_priv(netdev); 448 int rc; 449 450 mutex_lock(&vinfo->lock); 451 rc = hfi1_vnic_up(vinfo); 452 mutex_unlock(&vinfo->lock); 453 return rc; 454} 455 456static int hfi1_netdev_close(struct net_device *netdev) 457{ 458 struct hfi1_vnic_vport_info *vinfo = opa_vnic_dev_priv(netdev); 459 460 mutex_lock(&vinfo->lock); 461 if (test_bit(HFI1_VNIC_UP, &vinfo->flags)) 462 hfi1_vnic_down(vinfo); 463 mutex_unlock(&vinfo->lock); 464 return 0; 465} 466 467static int hfi1_vnic_init(struct hfi1_vnic_vport_info *vinfo) 468{ 469 struct hfi1_devdata *dd = vinfo->dd; 470 int rc = 0; 471 472 mutex_lock(&hfi1_mutex); 473 if (!dd->vnic_num_vports) { 474 rc = hfi1_vnic_txreq_init(dd); 475 if (rc) 476 goto txreq_fail; 477 } 478 479 rc = hfi1_netdev_rx_init(dd); 480 if (rc) { 481 dd_dev_err(dd, "Unable to initialize netdev contexts\n"); 482 goto alloc_fail; 483 } 484 485 hfi1_init_vnic_rsm(dd); 486 487 dd->vnic_num_vports++; 488 hfi1_vnic_sdma_init(vinfo); 489 490alloc_fail: 491 if (!dd->vnic_num_vports) 492 hfi1_vnic_txreq_deinit(dd); 493txreq_fail: 494 mutex_unlock(&hfi1_mutex); 495 return rc; 496} 497 498static void hfi1_vnic_deinit(struct hfi1_vnic_vport_info *vinfo) 499{ 500 struct hfi1_devdata *dd = vinfo->dd; 501 502 mutex_lock(&hfi1_mutex); 503 if (--dd->vnic_num_vports == 0) { 504 hfi1_deinit_vnic_rsm(dd); 505 hfi1_vnic_txreq_deinit(dd); 506 } 507 mutex_unlock(&hfi1_mutex); 508 hfi1_netdev_rx_destroy(dd); 509} 510 511static void hfi1_vnic_set_vesw_id(struct net_device *netdev, int id) 512{ 513 struct hfi1_vnic_vport_info *vinfo = opa_vnic_dev_priv(netdev); 514 bool reopen = false; 515 516 /* 517 * If vesw_id is being changed, and if the vnic port is up, 518 * reset the vnic port to ensure new vesw_id gets picked up 519 */ 520 if (id != vinfo->vesw_id) { 521 mutex_lock(&vinfo->lock); 522 if (test_bit(HFI1_VNIC_UP, &vinfo->flags)) { 523 hfi1_vnic_down(vinfo); 524 reopen = true; 525 } 526 527 vinfo->vesw_id = id; 528 if (reopen) 529 hfi1_vnic_up(vinfo); 530 531 mutex_unlock(&vinfo->lock); 532 } 533} 534 535/* netdev ops */ 536static const struct net_device_ops hfi1_netdev_ops = { 537 .ndo_open = hfi1_netdev_open, 538 .ndo_stop = hfi1_netdev_close, 539 .ndo_start_xmit = hfi1_netdev_start_xmit, 540 .ndo_select_queue = hfi1_vnic_select_queue, 541 .ndo_get_stats64 = hfi1_vnic_get_stats64, 542}; 543 544static void hfi1_vnic_free_rn(struct net_device *netdev) 545{ 546 struct hfi1_vnic_vport_info *vinfo = opa_vnic_dev_priv(netdev); 547 548 hfi1_vnic_deinit(vinfo); 549 mutex_destroy(&vinfo->lock); 550 free_netdev(netdev); 551} 552 553struct net_device *hfi1_vnic_alloc_rn(struct ib_device *device, 554 u32 port_num, 555 enum rdma_netdev_t type, 556 const char *name, 557 unsigned char name_assign_type, 558 void (*setup)(struct net_device *)) 559{ 560 struct hfi1_devdata *dd = dd_from_ibdev(device); 561 struct hfi1_vnic_vport_info *vinfo; 562 struct net_device *netdev; 563 struct rdma_netdev *rn; 564 int i, size, rc; 565 566 if (!dd->num_netdev_contexts) 567 return ERR_PTR(-ENOMEM); 568 569 if (!port_num || (port_num > dd->num_pports)) 570 return ERR_PTR(-EINVAL); 571 572 if (type != RDMA_NETDEV_OPA_VNIC) 573 return ERR_PTR(-EOPNOTSUPP); 574 575 size = sizeof(struct opa_vnic_rdma_netdev) + sizeof(*vinfo); 576 netdev = alloc_netdev_mqs(size, name, name_assign_type, setup, 577 chip_sdma_engines(dd), 578 dd->num_netdev_contexts); 579 if (!netdev) 580 return ERR_PTR(-ENOMEM); 581 582 rn = netdev_priv(netdev); 583 vinfo = opa_vnic_dev_priv(netdev); 584 vinfo->dd = dd; 585 vinfo->num_tx_q = chip_sdma_engines(dd); 586 vinfo->num_rx_q = dd->num_netdev_contexts; 587 vinfo->netdev = netdev; 588 rn->free_rdma_netdev = hfi1_vnic_free_rn; 589 rn->set_id = hfi1_vnic_set_vesw_id; 590 591 netdev->features = NETIF_F_HIGHDMA | NETIF_F_SG; 592 netdev->hw_features = netdev->features; 593 netdev->vlan_features = netdev->features; 594 netdev->watchdog_timeo = msecs_to_jiffies(HFI_TX_TIMEOUT_MS); 595 netdev->netdev_ops = &hfi1_netdev_ops; 596 mutex_init(&vinfo->lock); 597 598 for (i = 0; i < vinfo->num_rx_q; i++) { 599 struct hfi1_vnic_rx_queue *rxq = &vinfo->rxq[i]; 600 601 rxq->idx = i; 602 rxq->vinfo = vinfo; 603 rxq->netdev = netdev; 604 } 605 606 rc = hfi1_vnic_init(vinfo); 607 if (rc) 608 goto init_fail; 609 610 return netdev; 611init_fail: 612 mutex_destroy(&vinfo->lock); 613 free_netdev(netdev); 614 return ERR_PTR(rc); 615}