lag.c (37016B)
1/* 2 * Copyright (c) 2016, Mellanox Technologies. All rights reserved. 3 * 4 * This software is available to you under a choice of one of two 5 * licenses. You may choose to be licensed under the terms of the GNU 6 * General Public License (GPL) Version 2, available from the file 7 * COPYING in the main directory of this source tree, or the 8 * OpenIB.org BSD license below: 9 * 10 * Redistribution and use in source and binary forms, with or 11 * without modification, are permitted provided that the following 12 * conditions are met: 13 * 14 * - Redistributions of source code must retain the above 15 * copyright notice, this list of conditions and the following 16 * disclaimer. 17 * 18 * - Redistributions in binary form must reproduce the above 19 * copyright notice, this list of conditions and the following 20 * disclaimer in the documentation and/or other materials 21 * provided with the distribution. 22 * 23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 30 * SOFTWARE. 31 */ 32 33#include <linux/netdevice.h> 34#include <net/bonding.h> 35#include <linux/mlx5/driver.h> 36#include <linux/mlx5/eswitch.h> 37#include <linux/mlx5/vport.h> 38#include "lib/devcom.h" 39#include "mlx5_core.h" 40#include "eswitch.h" 41#include "esw/acl/ofld.h" 42#include "lag.h" 43#include "mp.h" 44#include "mpesw.h" 45 46enum { 47 MLX5_LAG_EGRESS_PORT_1 = 1, 48 MLX5_LAG_EGRESS_PORT_2, 49}; 50 51/* General purpose, use for short periods of time. 52 * Beware of lock dependencies (preferably, no locks should be acquired 53 * under it). 54 */ 55static DEFINE_SPINLOCK(lag_lock); 56 57static int get_port_sel_mode(enum mlx5_lag_mode mode, unsigned long flags) 58{ 59 if (test_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, &flags)) 60 return MLX5_LAG_PORT_SELECT_MODE_PORT_SELECT_FT; 61 62 if (mode == MLX5_LAG_MODE_MPESW) 63 return MLX5_LAG_PORT_SELECT_MODE_PORT_SELECT_MPESW; 64 65 return MLX5_LAG_PORT_SELECT_MODE_QUEUE_AFFINITY; 66} 67 68static int mlx5_cmd_create_lag(struct mlx5_core_dev *dev, u8 *ports, int mode, 69 unsigned long flags) 70{ 71 bool shared_fdb = test_bit(MLX5_LAG_MODE_FLAG_SHARED_FDB, &flags); 72 int port_sel_mode = get_port_sel_mode(mode, flags); 73 u32 in[MLX5_ST_SZ_DW(create_lag_in)] = {}; 74 void *lag_ctx; 75 76 lag_ctx = MLX5_ADDR_OF(create_lag_in, in, ctx); 77 MLX5_SET(create_lag_in, in, opcode, MLX5_CMD_OP_CREATE_LAG); 78 MLX5_SET(lagc, lag_ctx, fdb_selection_mode, shared_fdb); 79 if (port_sel_mode == MLX5_LAG_PORT_SELECT_MODE_QUEUE_AFFINITY) { 80 MLX5_SET(lagc, lag_ctx, tx_remap_affinity_1, ports[0]); 81 MLX5_SET(lagc, lag_ctx, tx_remap_affinity_2, ports[1]); 82 } 83 MLX5_SET(lagc, lag_ctx, port_select_mode, port_sel_mode); 84 85 return mlx5_cmd_exec_in(dev, create_lag, in); 86} 87 88static int mlx5_cmd_modify_lag(struct mlx5_core_dev *dev, u8 num_ports, 89 u8 *ports) 90{ 91 u32 in[MLX5_ST_SZ_DW(modify_lag_in)] = {}; 92 void *lag_ctx = MLX5_ADDR_OF(modify_lag_in, in, ctx); 93 94 MLX5_SET(modify_lag_in, in, opcode, MLX5_CMD_OP_MODIFY_LAG); 95 MLX5_SET(modify_lag_in, in, field_select, 0x1); 96 97 MLX5_SET(lagc, lag_ctx, tx_remap_affinity_1, ports[0]); 98 MLX5_SET(lagc, lag_ctx, tx_remap_affinity_2, ports[1]); 99 100 return mlx5_cmd_exec_in(dev, modify_lag, in); 101} 102 103int mlx5_cmd_create_vport_lag(struct mlx5_core_dev *dev) 104{ 105 u32 in[MLX5_ST_SZ_DW(create_vport_lag_in)] = {}; 106 107 MLX5_SET(create_vport_lag_in, in, opcode, MLX5_CMD_OP_CREATE_VPORT_LAG); 108 109 return mlx5_cmd_exec_in(dev, create_vport_lag, in); 110} 111EXPORT_SYMBOL(mlx5_cmd_create_vport_lag); 112 113int mlx5_cmd_destroy_vport_lag(struct mlx5_core_dev *dev) 114{ 115 u32 in[MLX5_ST_SZ_DW(destroy_vport_lag_in)] = {}; 116 117 MLX5_SET(destroy_vport_lag_in, in, opcode, MLX5_CMD_OP_DESTROY_VPORT_LAG); 118 119 return mlx5_cmd_exec_in(dev, destroy_vport_lag, in); 120} 121EXPORT_SYMBOL(mlx5_cmd_destroy_vport_lag); 122 123static void mlx5_infer_tx_disabled(struct lag_tracker *tracker, u8 num_ports, 124 u8 *ports, int *num_disabled) 125{ 126 int i; 127 128 *num_disabled = 0; 129 for (i = 0; i < num_ports; i++) { 130 if (!tracker->netdev_state[i].tx_enabled || 131 !tracker->netdev_state[i].link_up) 132 ports[(*num_disabled)++] = i; 133 } 134} 135 136void mlx5_infer_tx_enabled(struct lag_tracker *tracker, u8 num_ports, 137 u8 *ports, int *num_enabled) 138{ 139 int i; 140 141 *num_enabled = 0; 142 for (i = 0; i < num_ports; i++) { 143 if (tracker->netdev_state[i].tx_enabled && 144 tracker->netdev_state[i].link_up) 145 ports[(*num_enabled)++] = i; 146 } 147 148 if (*num_enabled == 0) 149 mlx5_infer_tx_disabled(tracker, num_ports, ports, num_enabled); 150} 151 152static void mlx5_lag_print_mapping(struct mlx5_core_dev *dev, 153 struct mlx5_lag *ldev, 154 struct lag_tracker *tracker, 155 unsigned long flags) 156{ 157 char buf[MLX5_MAX_PORTS * 10 + 1] = {}; 158 u8 enabled_ports[MLX5_MAX_PORTS] = {}; 159 int written = 0; 160 int num_enabled; 161 int idx; 162 int err; 163 int i; 164 int j; 165 166 if (test_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, &flags)) { 167 mlx5_infer_tx_enabled(tracker, ldev->ports, enabled_ports, 168 &num_enabled); 169 for (i = 0; i < num_enabled; i++) { 170 err = scnprintf(buf + written, 4, "%d, ", enabled_ports[i] + 1); 171 if (err != 3) 172 return; 173 written += err; 174 } 175 buf[written - 2] = 0; 176 mlx5_core_info(dev, "lag map active ports: %s\n", buf); 177 } else { 178 for (i = 0; i < ldev->ports; i++) { 179 for (j = 0; j < ldev->buckets; j++) { 180 idx = i * ldev->buckets + j; 181 err = scnprintf(buf + written, 10, 182 " port %d:%d", i + 1, ldev->v2p_map[idx]); 183 if (err != 9) 184 return; 185 written += err; 186 } 187 } 188 mlx5_core_info(dev, "lag map:%s\n", buf); 189 } 190} 191 192static int mlx5_lag_netdev_event(struct notifier_block *this, 193 unsigned long event, void *ptr); 194static void mlx5_do_bond_work(struct work_struct *work); 195 196static void mlx5_ldev_free(struct kref *ref) 197{ 198 struct mlx5_lag *ldev = container_of(ref, struct mlx5_lag, ref); 199 200 if (ldev->nb.notifier_call) 201 unregister_netdevice_notifier_net(&init_net, &ldev->nb); 202 mlx5_lag_mp_cleanup(ldev); 203 mlx5_lag_mpesw_cleanup(ldev); 204 cancel_work_sync(&ldev->mpesw_work); 205 destroy_workqueue(ldev->wq); 206 mutex_destroy(&ldev->lock); 207 kfree(ldev); 208} 209 210static void mlx5_ldev_put(struct mlx5_lag *ldev) 211{ 212 kref_put(&ldev->ref, mlx5_ldev_free); 213} 214 215static void mlx5_ldev_get(struct mlx5_lag *ldev) 216{ 217 kref_get(&ldev->ref); 218} 219 220static struct mlx5_lag *mlx5_lag_dev_alloc(struct mlx5_core_dev *dev) 221{ 222 struct mlx5_lag *ldev; 223 int err; 224 225 ldev = kzalloc(sizeof(*ldev), GFP_KERNEL); 226 if (!ldev) 227 return NULL; 228 229 ldev->wq = create_singlethread_workqueue("mlx5_lag"); 230 if (!ldev->wq) { 231 kfree(ldev); 232 return NULL; 233 } 234 235 kref_init(&ldev->ref); 236 mutex_init(&ldev->lock); 237 INIT_DELAYED_WORK(&ldev->bond_work, mlx5_do_bond_work); 238 239 ldev->nb.notifier_call = mlx5_lag_netdev_event; 240 if (register_netdevice_notifier_net(&init_net, &ldev->nb)) { 241 ldev->nb.notifier_call = NULL; 242 mlx5_core_err(dev, "Failed to register LAG netdev notifier\n"); 243 } 244 ldev->mode = MLX5_LAG_MODE_NONE; 245 246 err = mlx5_lag_mp_init(ldev); 247 if (err) 248 mlx5_core_err(dev, "Failed to init multipath lag err=%d\n", 249 err); 250 251 mlx5_lag_mpesw_init(ldev); 252 ldev->ports = MLX5_CAP_GEN(dev, num_lag_ports); 253 ldev->buckets = 1; 254 255 return ldev; 256} 257 258int mlx5_lag_dev_get_netdev_idx(struct mlx5_lag *ldev, 259 struct net_device *ndev) 260{ 261 int i; 262 263 for (i = 0; i < ldev->ports; i++) 264 if (ldev->pf[i].netdev == ndev) 265 return i; 266 267 return -ENOENT; 268} 269 270static bool __mlx5_lag_is_roce(struct mlx5_lag *ldev) 271{ 272 return ldev->mode == MLX5_LAG_MODE_ROCE; 273} 274 275static bool __mlx5_lag_is_sriov(struct mlx5_lag *ldev) 276{ 277 return ldev->mode == MLX5_LAG_MODE_SRIOV; 278} 279 280/* Create a mapping between steering slots and active ports. 281 * As we have ldev->buckets slots per port first assume the native 282 * mapping should be used. 283 * If there are ports that are disabled fill the relevant slots 284 * with mapping that points to active ports. 285 */ 286static void mlx5_infer_tx_affinity_mapping(struct lag_tracker *tracker, 287 u8 num_ports, 288 u8 buckets, 289 u8 *ports) 290{ 291 int disabled[MLX5_MAX_PORTS] = {}; 292 int enabled[MLX5_MAX_PORTS] = {}; 293 int disabled_ports_num = 0; 294 int enabled_ports_num = 0; 295 int idx; 296 u32 rand; 297 int i; 298 int j; 299 300 for (i = 0; i < num_ports; i++) { 301 if (tracker->netdev_state[i].tx_enabled && 302 tracker->netdev_state[i].link_up) 303 enabled[enabled_ports_num++] = i; 304 else 305 disabled[disabled_ports_num++] = i; 306 } 307 308 /* Use native mapping by default where each port's buckets 309 * point the native port: 1 1 1 .. 1 2 2 2 ... 2 3 3 3 ... 3 etc 310 */ 311 for (i = 0; i < num_ports; i++) 312 for (j = 0; j < buckets; j++) { 313 idx = i * buckets + j; 314 ports[idx] = MLX5_LAG_EGRESS_PORT_1 + i; 315 } 316 317 /* If all ports are disabled/enabled keep native mapping */ 318 if (enabled_ports_num == num_ports || 319 disabled_ports_num == num_ports) 320 return; 321 322 /* Go over the disabled ports and for each assign a random active port */ 323 for (i = 0; i < disabled_ports_num; i++) { 324 for (j = 0; j < buckets; j++) { 325 get_random_bytes(&rand, 4); 326 ports[disabled[i] * buckets + j] = enabled[rand % enabled_ports_num] + 1; 327 } 328 } 329} 330 331static bool mlx5_lag_has_drop_rule(struct mlx5_lag *ldev) 332{ 333 int i; 334 335 for (i = 0; i < ldev->ports; i++) 336 if (ldev->pf[i].has_drop) 337 return true; 338 return false; 339} 340 341static void mlx5_lag_drop_rule_cleanup(struct mlx5_lag *ldev) 342{ 343 int i; 344 345 for (i = 0; i < ldev->ports; i++) { 346 if (!ldev->pf[i].has_drop) 347 continue; 348 349 mlx5_esw_acl_ingress_vport_drop_rule_destroy(ldev->pf[i].dev->priv.eswitch, 350 MLX5_VPORT_UPLINK); 351 ldev->pf[i].has_drop = false; 352 } 353} 354 355static void mlx5_lag_drop_rule_setup(struct mlx5_lag *ldev, 356 struct lag_tracker *tracker) 357{ 358 u8 disabled_ports[MLX5_MAX_PORTS] = {}; 359 struct mlx5_core_dev *dev; 360 int disabled_index; 361 int num_disabled; 362 int err; 363 int i; 364 365 /* First delete the current drop rule so there won't be any dropped 366 * packets 367 */ 368 mlx5_lag_drop_rule_cleanup(ldev); 369 370 if (!ldev->tracker.has_inactive) 371 return; 372 373 mlx5_infer_tx_disabled(tracker, ldev->ports, disabled_ports, &num_disabled); 374 375 for (i = 0; i < num_disabled; i++) { 376 disabled_index = disabled_ports[i]; 377 dev = ldev->pf[disabled_index].dev; 378 err = mlx5_esw_acl_ingress_vport_drop_rule_create(dev->priv.eswitch, 379 MLX5_VPORT_UPLINK); 380 if (!err) 381 ldev->pf[disabled_index].has_drop = true; 382 else 383 mlx5_core_err(dev, 384 "Failed to create lag drop rule, error: %d", err); 385 } 386} 387 388static int _mlx5_modify_lag(struct mlx5_lag *ldev, u8 *ports) 389{ 390 struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev; 391 392 if (test_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, &ldev->mode_flags)) 393 return mlx5_lag_port_sel_modify(ldev, ports); 394 return mlx5_cmd_modify_lag(dev0, ldev->ports, ports); 395} 396 397void mlx5_modify_lag(struct mlx5_lag *ldev, 398 struct lag_tracker *tracker) 399{ 400 u8 ports[MLX5_MAX_PORTS * MLX5_LAG_MAX_HASH_BUCKETS] = {}; 401 struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev; 402 int idx; 403 int err; 404 int i; 405 int j; 406 407 mlx5_infer_tx_affinity_mapping(tracker, ldev->ports, ldev->buckets, ports); 408 409 for (i = 0; i < ldev->ports; i++) { 410 for (j = 0; j < ldev->buckets; j++) { 411 idx = i * ldev->buckets + j; 412 if (ports[idx] == ldev->v2p_map[idx]) 413 continue; 414 err = _mlx5_modify_lag(ldev, ports); 415 if (err) { 416 mlx5_core_err(dev0, 417 "Failed to modify LAG (%d)\n", 418 err); 419 return; 420 } 421 memcpy(ldev->v2p_map, ports, sizeof(ports)); 422 423 mlx5_lag_print_mapping(dev0, ldev, tracker, 424 ldev->mode_flags); 425 break; 426 } 427 } 428 429 if (tracker->tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP && 430 !(ldev->mode == MLX5_LAG_MODE_ROCE)) 431 mlx5_lag_drop_rule_setup(ldev, tracker); 432} 433 434#define MLX5_LAG_ROCE_HASH_PORTS_SUPPORTED 4 435static int mlx5_lag_set_port_sel_mode_roce(struct mlx5_lag *ldev, 436 unsigned long *flags) 437{ 438 struct lag_func *dev0 = &ldev->pf[MLX5_LAG_P1]; 439 440 if (ldev->ports == MLX5_LAG_ROCE_HASH_PORTS_SUPPORTED) { 441 /* Four ports are support only in hash mode */ 442 if (!MLX5_CAP_PORT_SELECTION(dev0->dev, port_select_flow_table)) 443 return -EINVAL; 444 set_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, flags); 445 if (ldev->ports > 2) 446 ldev->buckets = MLX5_LAG_MAX_HASH_BUCKETS; 447 } 448 449 return 0; 450} 451 452static void mlx5_lag_set_port_sel_mode_offloads(struct mlx5_lag *ldev, 453 struct lag_tracker *tracker, 454 enum mlx5_lag_mode mode, 455 unsigned long *flags) 456{ 457 struct lag_func *dev0 = &ldev->pf[MLX5_LAG_P1]; 458 459 if (mode == MLX5_LAG_MODE_MPESW) 460 return; 461 462 if (MLX5_CAP_PORT_SELECTION(dev0->dev, port_select_flow_table) && 463 tracker->tx_type == NETDEV_LAG_TX_TYPE_HASH) 464 set_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, flags); 465} 466 467static int mlx5_lag_set_flags(struct mlx5_lag *ldev, enum mlx5_lag_mode mode, 468 struct lag_tracker *tracker, bool shared_fdb, 469 unsigned long *flags) 470{ 471 bool roce_lag = mode == MLX5_LAG_MODE_ROCE; 472 473 *flags = 0; 474 if (shared_fdb) 475 set_bit(MLX5_LAG_MODE_FLAG_SHARED_FDB, flags); 476 477 if (roce_lag) 478 return mlx5_lag_set_port_sel_mode_roce(ldev, flags); 479 480 mlx5_lag_set_port_sel_mode_offloads(ldev, tracker, mode, flags); 481 return 0; 482} 483 484char *mlx5_get_str_port_sel_mode(struct mlx5_lag *ldev) 485{ 486 int port_sel_mode = get_port_sel_mode(ldev->mode, ldev->mode_flags); 487 488 switch (port_sel_mode) { 489 case MLX5_LAG_PORT_SELECT_MODE_QUEUE_AFFINITY: return "queue_affinity"; 490 case MLX5_LAG_PORT_SELECT_MODE_PORT_SELECT_FT: return "hash"; 491 case MLX5_LAG_PORT_SELECT_MODE_PORT_SELECT_MPESW: return "mpesw"; 492 default: return "invalid"; 493 } 494} 495 496static int mlx5_create_lag(struct mlx5_lag *ldev, 497 struct lag_tracker *tracker, 498 enum mlx5_lag_mode mode, 499 unsigned long flags) 500{ 501 bool shared_fdb = test_bit(MLX5_LAG_MODE_FLAG_SHARED_FDB, &flags); 502 struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev; 503 struct mlx5_core_dev *dev1 = ldev->pf[MLX5_LAG_P2].dev; 504 u32 in[MLX5_ST_SZ_DW(destroy_lag_in)] = {}; 505 int err; 506 507 if (tracker) 508 mlx5_lag_print_mapping(dev0, ldev, tracker, flags); 509 mlx5_core_info(dev0, "shared_fdb:%d mode:%s\n", 510 shared_fdb, mlx5_get_str_port_sel_mode(ldev)); 511 512 err = mlx5_cmd_create_lag(dev0, ldev->v2p_map, mode, flags); 513 if (err) { 514 mlx5_core_err(dev0, 515 "Failed to create LAG (%d)\n", 516 err); 517 return err; 518 } 519 520 if (shared_fdb) { 521 err = mlx5_eswitch_offloads_config_single_fdb(dev0->priv.eswitch, 522 dev1->priv.eswitch); 523 if (err) 524 mlx5_core_err(dev0, "Can't enable single FDB mode\n"); 525 else 526 mlx5_core_info(dev0, "Operation mode is single FDB\n"); 527 } 528 529 if (err) { 530 MLX5_SET(destroy_lag_in, in, opcode, MLX5_CMD_OP_DESTROY_LAG); 531 if (mlx5_cmd_exec_in(dev0, destroy_lag, in)) 532 mlx5_core_err(dev0, 533 "Failed to deactivate RoCE LAG; driver restart required\n"); 534 } 535 536 return err; 537} 538 539int mlx5_activate_lag(struct mlx5_lag *ldev, 540 struct lag_tracker *tracker, 541 enum mlx5_lag_mode mode, 542 bool shared_fdb) 543{ 544 bool roce_lag = mode == MLX5_LAG_MODE_ROCE; 545 struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev; 546 unsigned long flags = 0; 547 int err; 548 549 err = mlx5_lag_set_flags(ldev, mode, tracker, shared_fdb, &flags); 550 if (err) 551 return err; 552 553 if (mode != MLX5_LAG_MODE_MPESW) { 554 mlx5_infer_tx_affinity_mapping(tracker, ldev->ports, ldev->buckets, ldev->v2p_map); 555 if (test_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, &flags)) { 556 err = mlx5_lag_port_sel_create(ldev, tracker->hash_type, 557 ldev->v2p_map); 558 if (err) { 559 mlx5_core_err(dev0, 560 "Failed to create LAG port selection(%d)\n", 561 err); 562 return err; 563 } 564 } 565 } 566 567 err = mlx5_create_lag(ldev, tracker, mode, flags); 568 if (err) { 569 if (test_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, &flags)) 570 mlx5_lag_port_sel_destroy(ldev); 571 if (roce_lag) 572 mlx5_core_err(dev0, 573 "Failed to activate RoCE LAG\n"); 574 else 575 mlx5_core_err(dev0, 576 "Failed to activate VF LAG\n" 577 "Make sure all VFs are unbound prior to VF LAG activation or deactivation\n"); 578 return err; 579 } 580 581 if (tracker && tracker->tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP && 582 !roce_lag) 583 mlx5_lag_drop_rule_setup(ldev, tracker); 584 585 ldev->mode = mode; 586 ldev->mode_flags = flags; 587 return 0; 588} 589 590static int mlx5_deactivate_lag(struct mlx5_lag *ldev) 591{ 592 struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev; 593 struct mlx5_core_dev *dev1 = ldev->pf[MLX5_LAG_P2].dev; 594 u32 in[MLX5_ST_SZ_DW(destroy_lag_in)] = {}; 595 bool roce_lag = __mlx5_lag_is_roce(ldev); 596 unsigned long flags = ldev->mode_flags; 597 int err; 598 599 ldev->mode = MLX5_LAG_MODE_NONE; 600 ldev->mode_flags = 0; 601 mlx5_lag_mp_reset(ldev); 602 603 if (test_bit(MLX5_LAG_MODE_FLAG_SHARED_FDB, &flags)) { 604 mlx5_eswitch_offloads_destroy_single_fdb(dev0->priv.eswitch, 605 dev1->priv.eswitch); 606 clear_bit(MLX5_LAG_MODE_FLAG_SHARED_FDB, &flags); 607 } 608 609 MLX5_SET(destroy_lag_in, in, opcode, MLX5_CMD_OP_DESTROY_LAG); 610 err = mlx5_cmd_exec_in(dev0, destroy_lag, in); 611 if (err) { 612 if (roce_lag) { 613 mlx5_core_err(dev0, 614 "Failed to deactivate RoCE LAG; driver restart required\n"); 615 } else { 616 mlx5_core_err(dev0, 617 "Failed to deactivate VF LAG; driver restart required\n" 618 "Make sure all VFs are unbound prior to VF LAG activation or deactivation\n"); 619 } 620 return err; 621 } 622 623 if (test_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, &flags)) 624 mlx5_lag_port_sel_destroy(ldev); 625 if (mlx5_lag_has_drop_rule(ldev)) 626 mlx5_lag_drop_rule_cleanup(ldev); 627 628 return 0; 629} 630 631#define MLX5_LAG_OFFLOADS_SUPPORTED_PORTS 2 632static bool mlx5_lag_check_prereq(struct mlx5_lag *ldev) 633{ 634#ifdef CONFIG_MLX5_ESWITCH 635 u8 mode; 636#endif 637 int i; 638 639 for (i = 0; i < ldev->ports; i++) 640 if (!ldev->pf[i].dev) 641 return false; 642 643#ifdef CONFIG_MLX5_ESWITCH 644 mode = mlx5_eswitch_mode(ldev->pf[MLX5_LAG_P1].dev); 645 646 if (mode != MLX5_ESWITCH_NONE && mode != MLX5_ESWITCH_OFFLOADS) 647 return false; 648 649 for (i = 0; i < ldev->ports; i++) 650 if (mlx5_eswitch_mode(ldev->pf[i].dev) != mode) 651 return false; 652 653 if (mode == MLX5_ESWITCH_OFFLOADS && ldev->ports != MLX5_LAG_OFFLOADS_SUPPORTED_PORTS) 654 return false; 655#else 656 for (i = 0; i < ldev->ports; i++) 657 if (mlx5_sriov_is_enabled(ldev->pf[i].dev)) 658 return false; 659#endif 660 return true; 661} 662 663static void mlx5_lag_add_devices(struct mlx5_lag *ldev) 664{ 665 int i; 666 667 for (i = 0; i < ldev->ports; i++) { 668 if (!ldev->pf[i].dev) 669 continue; 670 671 if (ldev->pf[i].dev->priv.flags & 672 MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV) 673 continue; 674 675 ldev->pf[i].dev->priv.flags &= ~MLX5_PRIV_FLAGS_DISABLE_IB_ADEV; 676 mlx5_rescan_drivers_locked(ldev->pf[i].dev); 677 } 678} 679 680static void mlx5_lag_remove_devices(struct mlx5_lag *ldev) 681{ 682 int i; 683 684 for (i = 0; i < ldev->ports; i++) { 685 if (!ldev->pf[i].dev) 686 continue; 687 688 if (ldev->pf[i].dev->priv.flags & 689 MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV) 690 continue; 691 692 ldev->pf[i].dev->priv.flags |= MLX5_PRIV_FLAGS_DISABLE_IB_ADEV; 693 mlx5_rescan_drivers_locked(ldev->pf[i].dev); 694 } 695} 696 697void mlx5_disable_lag(struct mlx5_lag *ldev) 698{ 699 bool shared_fdb = test_bit(MLX5_LAG_MODE_FLAG_SHARED_FDB, &ldev->mode_flags); 700 struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev; 701 struct mlx5_core_dev *dev1 = ldev->pf[MLX5_LAG_P2].dev; 702 bool roce_lag; 703 int err; 704 int i; 705 706 roce_lag = __mlx5_lag_is_roce(ldev); 707 708 if (shared_fdb) { 709 mlx5_lag_remove_devices(ldev); 710 } else if (roce_lag) { 711 if (!(dev0->priv.flags & MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV)) { 712 dev0->priv.flags |= MLX5_PRIV_FLAGS_DISABLE_IB_ADEV; 713 mlx5_rescan_drivers_locked(dev0); 714 } 715 for (i = 1; i < ldev->ports; i++) 716 mlx5_nic_vport_disable_roce(ldev->pf[i].dev); 717 } 718 719 err = mlx5_deactivate_lag(ldev); 720 if (err) 721 return; 722 723 if (shared_fdb || roce_lag) 724 mlx5_lag_add_devices(ldev); 725 726 if (shared_fdb) { 727 if (!(dev0->priv.flags & MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV)) 728 mlx5_eswitch_reload_reps(dev0->priv.eswitch); 729 if (!(dev1->priv.flags & MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV)) 730 mlx5_eswitch_reload_reps(dev1->priv.eswitch); 731 } 732} 733 734bool mlx5_shared_fdb_supported(struct mlx5_lag *ldev) 735{ 736 struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev; 737 struct mlx5_core_dev *dev1 = ldev->pf[MLX5_LAG_P2].dev; 738 739 if (is_mdev_switchdev_mode(dev0) && 740 is_mdev_switchdev_mode(dev1) && 741 mlx5_eswitch_vport_match_metadata_enabled(dev0->priv.eswitch) && 742 mlx5_eswitch_vport_match_metadata_enabled(dev1->priv.eswitch) && 743 mlx5_devcom_is_paired(dev0->priv.devcom, 744 MLX5_DEVCOM_ESW_OFFLOADS) && 745 MLX5_CAP_GEN(dev1, lag_native_fdb_selection) && 746 MLX5_CAP_ESW(dev1, root_ft_on_other_esw) && 747 MLX5_CAP_ESW(dev0, esw_shared_ingress_acl)) 748 return true; 749 750 return false; 751} 752 753static bool mlx5_lag_is_roce_lag(struct mlx5_lag *ldev) 754{ 755 bool roce_lag = true; 756 int i; 757 758 for (i = 0; i < ldev->ports; i++) 759 roce_lag = roce_lag && !mlx5_sriov_is_enabled(ldev->pf[i].dev); 760 761#ifdef CONFIG_MLX5_ESWITCH 762 for (i = 0; i < ldev->ports; i++) 763 roce_lag = roce_lag && 764 ldev->pf[i].dev->priv.eswitch->mode == MLX5_ESWITCH_NONE; 765#endif 766 767 return roce_lag; 768} 769 770static bool mlx5_lag_should_modify_lag(struct mlx5_lag *ldev, bool do_bond) 771{ 772 return do_bond && __mlx5_lag_is_active(ldev) && 773 ldev->mode != MLX5_LAG_MODE_MPESW; 774} 775 776static bool mlx5_lag_should_disable_lag(struct mlx5_lag *ldev, bool do_bond) 777{ 778 return !do_bond && __mlx5_lag_is_active(ldev) && 779 ldev->mode != MLX5_LAG_MODE_MPESW; 780} 781 782static void mlx5_do_bond(struct mlx5_lag *ldev) 783{ 784 struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev; 785 struct mlx5_core_dev *dev1 = ldev->pf[MLX5_LAG_P2].dev; 786 struct lag_tracker tracker = { }; 787 bool do_bond, roce_lag; 788 int err; 789 int i; 790 791 if (!mlx5_lag_is_ready(ldev)) { 792 do_bond = false; 793 } else { 794 /* VF LAG is in multipath mode, ignore bond change requests */ 795 if (mlx5_lag_is_multipath(dev0)) 796 return; 797 798 tracker = ldev->tracker; 799 800 do_bond = tracker.is_bonded && mlx5_lag_check_prereq(ldev); 801 } 802 803 if (do_bond && !__mlx5_lag_is_active(ldev)) { 804 bool shared_fdb = mlx5_shared_fdb_supported(ldev); 805 806 roce_lag = mlx5_lag_is_roce_lag(ldev); 807 808 if (shared_fdb || roce_lag) 809 mlx5_lag_remove_devices(ldev); 810 811 err = mlx5_activate_lag(ldev, &tracker, 812 roce_lag ? MLX5_LAG_MODE_ROCE : 813 MLX5_LAG_MODE_SRIOV, 814 shared_fdb); 815 if (err) { 816 if (shared_fdb || roce_lag) 817 mlx5_lag_add_devices(ldev); 818 819 return; 820 } else if (roce_lag) { 821 dev0->priv.flags &= ~MLX5_PRIV_FLAGS_DISABLE_IB_ADEV; 822 mlx5_rescan_drivers_locked(dev0); 823 for (i = 1; i < ldev->ports; i++) 824 mlx5_nic_vport_enable_roce(ldev->pf[i].dev); 825 } else if (shared_fdb) { 826 dev0->priv.flags &= ~MLX5_PRIV_FLAGS_DISABLE_IB_ADEV; 827 mlx5_rescan_drivers_locked(dev0); 828 829 err = mlx5_eswitch_reload_reps(dev0->priv.eswitch); 830 if (!err) 831 err = mlx5_eswitch_reload_reps(dev1->priv.eswitch); 832 833 if (err) { 834 dev0->priv.flags |= MLX5_PRIV_FLAGS_DISABLE_IB_ADEV; 835 mlx5_rescan_drivers_locked(dev0); 836 mlx5_deactivate_lag(ldev); 837 mlx5_lag_add_devices(ldev); 838 mlx5_eswitch_reload_reps(dev0->priv.eswitch); 839 mlx5_eswitch_reload_reps(dev1->priv.eswitch); 840 mlx5_core_err(dev0, "Failed to enable lag\n"); 841 return; 842 } 843 } 844 } else if (mlx5_lag_should_modify_lag(ldev, do_bond)) { 845 mlx5_modify_lag(ldev, &tracker); 846 } else if (mlx5_lag_should_disable_lag(ldev, do_bond)) { 847 mlx5_disable_lag(ldev); 848 } 849} 850 851static void mlx5_queue_bond_work(struct mlx5_lag *ldev, unsigned long delay) 852{ 853 queue_delayed_work(ldev->wq, &ldev->bond_work, delay); 854} 855 856static void mlx5_do_bond_work(struct work_struct *work) 857{ 858 struct delayed_work *delayed_work = to_delayed_work(work); 859 struct mlx5_lag *ldev = container_of(delayed_work, struct mlx5_lag, 860 bond_work); 861 int status; 862 863 status = mlx5_dev_list_trylock(); 864 if (!status) { 865 mlx5_queue_bond_work(ldev, HZ); 866 return; 867 } 868 869 mutex_lock(&ldev->lock); 870 if (ldev->mode_changes_in_progress) { 871 mutex_unlock(&ldev->lock); 872 mlx5_dev_list_unlock(); 873 mlx5_queue_bond_work(ldev, HZ); 874 return; 875 } 876 877 mlx5_do_bond(ldev); 878 mutex_unlock(&ldev->lock); 879 mlx5_dev_list_unlock(); 880} 881 882static int mlx5_handle_changeupper_event(struct mlx5_lag *ldev, 883 struct lag_tracker *tracker, 884 struct netdev_notifier_changeupper_info *info) 885{ 886 struct net_device *upper = info->upper_dev, *ndev_tmp; 887 struct netdev_lag_upper_info *lag_upper_info = NULL; 888 bool is_bonded, is_in_lag, mode_supported; 889 bool has_inactive = 0; 890 struct slave *slave; 891 u8 bond_status = 0; 892 int num_slaves = 0; 893 int changed = 0; 894 int idx; 895 896 if (!netif_is_lag_master(upper)) 897 return 0; 898 899 if (info->linking) 900 lag_upper_info = info->upper_info; 901 902 /* The event may still be of interest if the slave does not belong to 903 * us, but is enslaved to a master which has one or more of our netdevs 904 * as slaves (e.g., if a new slave is added to a master that bonds two 905 * of our netdevs, we should unbond). 906 */ 907 rcu_read_lock(); 908 for_each_netdev_in_bond_rcu(upper, ndev_tmp) { 909 idx = mlx5_lag_dev_get_netdev_idx(ldev, ndev_tmp); 910 if (idx >= 0) { 911 slave = bond_slave_get_rcu(ndev_tmp); 912 if (slave) 913 has_inactive |= bond_is_slave_inactive(slave); 914 bond_status |= (1 << idx); 915 } 916 917 num_slaves++; 918 } 919 rcu_read_unlock(); 920 921 /* None of this lagdev's netdevs are slaves of this master. */ 922 if (!(bond_status & GENMASK(ldev->ports - 1, 0))) 923 return 0; 924 925 if (lag_upper_info) { 926 tracker->tx_type = lag_upper_info->tx_type; 927 tracker->hash_type = lag_upper_info->hash_type; 928 } 929 930 tracker->has_inactive = has_inactive; 931 /* Determine bonding status: 932 * A device is considered bonded if both its physical ports are slaves 933 * of the same lag master, and only them. 934 */ 935 is_in_lag = num_slaves == ldev->ports && 936 bond_status == GENMASK(ldev->ports - 1, 0); 937 938 /* Lag mode must be activebackup or hash. */ 939 mode_supported = tracker->tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP || 940 tracker->tx_type == NETDEV_LAG_TX_TYPE_HASH; 941 942 is_bonded = is_in_lag && mode_supported; 943 if (tracker->is_bonded != is_bonded) { 944 tracker->is_bonded = is_bonded; 945 changed = 1; 946 } 947 948 if (!is_in_lag) 949 return changed; 950 951 if (!mlx5_lag_is_ready(ldev)) 952 NL_SET_ERR_MSG_MOD(info->info.extack, 953 "Can't activate LAG offload, PF is configured with more than 64 VFs"); 954 else if (!mode_supported) 955 NL_SET_ERR_MSG_MOD(info->info.extack, 956 "Can't activate LAG offload, TX type isn't supported"); 957 958 return changed; 959} 960 961static int mlx5_handle_changelowerstate_event(struct mlx5_lag *ldev, 962 struct lag_tracker *tracker, 963 struct net_device *ndev, 964 struct netdev_notifier_changelowerstate_info *info) 965{ 966 struct netdev_lag_lower_state_info *lag_lower_info; 967 int idx; 968 969 if (!netif_is_lag_port(ndev)) 970 return 0; 971 972 idx = mlx5_lag_dev_get_netdev_idx(ldev, ndev); 973 if (idx < 0) 974 return 0; 975 976 /* This information is used to determine virtual to physical 977 * port mapping. 978 */ 979 lag_lower_info = info->lower_state_info; 980 if (!lag_lower_info) 981 return 0; 982 983 tracker->netdev_state[idx] = *lag_lower_info; 984 985 return 1; 986} 987 988static int mlx5_handle_changeinfodata_event(struct mlx5_lag *ldev, 989 struct lag_tracker *tracker, 990 struct net_device *ndev) 991{ 992 struct net_device *ndev_tmp; 993 struct slave *slave; 994 bool has_inactive = 0; 995 int idx; 996 997 if (!netif_is_lag_master(ndev)) 998 return 0; 999 1000 rcu_read_lock(); 1001 for_each_netdev_in_bond_rcu(ndev, ndev_tmp) { 1002 idx = mlx5_lag_dev_get_netdev_idx(ldev, ndev_tmp); 1003 if (idx < 0) 1004 continue; 1005 1006 slave = bond_slave_get_rcu(ndev_tmp); 1007 if (slave) 1008 has_inactive |= bond_is_slave_inactive(slave); 1009 } 1010 rcu_read_unlock(); 1011 1012 if (tracker->has_inactive == has_inactive) 1013 return 0; 1014 1015 tracker->has_inactive = has_inactive; 1016 1017 return 1; 1018} 1019 1020/* this handler is always registered to netdev events */ 1021static int mlx5_lag_netdev_event(struct notifier_block *this, 1022 unsigned long event, void *ptr) 1023{ 1024 struct net_device *ndev = netdev_notifier_info_to_dev(ptr); 1025 struct lag_tracker tracker; 1026 struct mlx5_lag *ldev; 1027 int changed = 0; 1028 1029 if (event != NETDEV_CHANGEUPPER && 1030 event != NETDEV_CHANGELOWERSTATE && 1031 event != NETDEV_CHANGEINFODATA) 1032 return NOTIFY_DONE; 1033 1034 ldev = container_of(this, struct mlx5_lag, nb); 1035 1036 tracker = ldev->tracker; 1037 1038 switch (event) { 1039 case NETDEV_CHANGEUPPER: 1040 changed = mlx5_handle_changeupper_event(ldev, &tracker, ptr); 1041 break; 1042 case NETDEV_CHANGELOWERSTATE: 1043 changed = mlx5_handle_changelowerstate_event(ldev, &tracker, 1044 ndev, ptr); 1045 break; 1046 case NETDEV_CHANGEINFODATA: 1047 changed = mlx5_handle_changeinfodata_event(ldev, &tracker, ndev); 1048 break; 1049 } 1050 1051 ldev->tracker = tracker; 1052 1053 if (changed) 1054 mlx5_queue_bond_work(ldev, 0); 1055 1056 return NOTIFY_DONE; 1057} 1058 1059static void mlx5_ldev_add_netdev(struct mlx5_lag *ldev, 1060 struct mlx5_core_dev *dev, 1061 struct net_device *netdev) 1062{ 1063 unsigned int fn = mlx5_get_dev_index(dev); 1064 1065 if (fn >= ldev->ports) 1066 return; 1067 1068 spin_lock(&lag_lock); 1069 ldev->pf[fn].netdev = netdev; 1070 ldev->tracker.netdev_state[fn].link_up = 0; 1071 ldev->tracker.netdev_state[fn].tx_enabled = 0; 1072 spin_unlock(&lag_lock); 1073} 1074 1075static void mlx5_ldev_remove_netdev(struct mlx5_lag *ldev, 1076 struct net_device *netdev) 1077{ 1078 int i; 1079 1080 spin_lock(&lag_lock); 1081 for (i = 0; i < ldev->ports; i++) { 1082 if (ldev->pf[i].netdev == netdev) { 1083 ldev->pf[i].netdev = NULL; 1084 break; 1085 } 1086 } 1087 spin_unlock(&lag_lock); 1088} 1089 1090static void mlx5_ldev_add_mdev(struct mlx5_lag *ldev, 1091 struct mlx5_core_dev *dev) 1092{ 1093 unsigned int fn = mlx5_get_dev_index(dev); 1094 1095 if (fn >= ldev->ports) 1096 return; 1097 1098 ldev->pf[fn].dev = dev; 1099 dev->priv.lag = ldev; 1100} 1101 1102static void mlx5_ldev_remove_mdev(struct mlx5_lag *ldev, 1103 struct mlx5_core_dev *dev) 1104{ 1105 int i; 1106 1107 for (i = 0; i < ldev->ports; i++) 1108 if (ldev->pf[i].dev == dev) 1109 break; 1110 1111 if (i == ldev->ports) 1112 return; 1113 1114 ldev->pf[i].dev = NULL; 1115 dev->priv.lag = NULL; 1116} 1117 1118/* Must be called with intf_mutex held */ 1119static int __mlx5_lag_dev_add_mdev(struct mlx5_core_dev *dev) 1120{ 1121 struct mlx5_lag *ldev = NULL; 1122 struct mlx5_core_dev *tmp_dev; 1123 1124 tmp_dev = mlx5_get_next_phys_dev_lag(dev); 1125 if (tmp_dev) 1126 ldev = tmp_dev->priv.lag; 1127 1128 if (!ldev) { 1129 ldev = mlx5_lag_dev_alloc(dev); 1130 if (!ldev) { 1131 mlx5_core_err(dev, "Failed to alloc lag dev\n"); 1132 return 0; 1133 } 1134 mlx5_ldev_add_mdev(ldev, dev); 1135 return 0; 1136 } 1137 1138 mutex_lock(&ldev->lock); 1139 if (ldev->mode_changes_in_progress) { 1140 mutex_unlock(&ldev->lock); 1141 return -EAGAIN; 1142 } 1143 mlx5_ldev_get(ldev); 1144 mlx5_ldev_add_mdev(ldev, dev); 1145 mutex_unlock(&ldev->lock); 1146 1147 return 0; 1148} 1149 1150void mlx5_lag_remove_mdev(struct mlx5_core_dev *dev) 1151{ 1152 struct mlx5_lag *ldev; 1153 1154 ldev = mlx5_lag_dev(dev); 1155 if (!ldev) 1156 return; 1157 1158 /* mdev is being removed, might as well remove debugfs 1159 * as early as possible. 1160 */ 1161 mlx5_ldev_remove_debugfs(dev->priv.dbg.lag_debugfs); 1162recheck: 1163 mutex_lock(&ldev->lock); 1164 if (ldev->mode_changes_in_progress) { 1165 mutex_unlock(&ldev->lock); 1166 msleep(100); 1167 goto recheck; 1168 } 1169 mlx5_ldev_remove_mdev(ldev, dev); 1170 mutex_unlock(&ldev->lock); 1171 mlx5_ldev_put(ldev); 1172} 1173 1174void mlx5_lag_add_mdev(struct mlx5_core_dev *dev) 1175{ 1176 int err; 1177 1178 if (!MLX5_CAP_GEN(dev, vport_group_manager) || 1179 !MLX5_CAP_GEN(dev, lag_master) || 1180 (MLX5_CAP_GEN(dev, num_lag_ports) > MLX5_MAX_PORTS || 1181 MLX5_CAP_GEN(dev, num_lag_ports) <= 1)) 1182 return; 1183 1184recheck: 1185 mlx5_dev_list_lock(); 1186 err = __mlx5_lag_dev_add_mdev(dev); 1187 mlx5_dev_list_unlock(); 1188 1189 if (err) { 1190 msleep(100); 1191 goto recheck; 1192 } 1193 mlx5_ldev_add_debugfs(dev); 1194} 1195 1196void mlx5_lag_remove_netdev(struct mlx5_core_dev *dev, 1197 struct net_device *netdev) 1198{ 1199 struct mlx5_lag *ldev; 1200 bool lag_is_active; 1201 1202 ldev = mlx5_lag_dev(dev); 1203 if (!ldev) 1204 return; 1205 1206 mutex_lock(&ldev->lock); 1207 mlx5_ldev_remove_netdev(ldev, netdev); 1208 clear_bit(MLX5_LAG_FLAG_NDEVS_READY, &ldev->state_flags); 1209 1210 lag_is_active = __mlx5_lag_is_active(ldev); 1211 mutex_unlock(&ldev->lock); 1212 1213 if (lag_is_active) 1214 mlx5_queue_bond_work(ldev, 0); 1215} 1216 1217void mlx5_lag_add_netdev(struct mlx5_core_dev *dev, 1218 struct net_device *netdev) 1219{ 1220 struct mlx5_lag *ldev; 1221 int i; 1222 1223 ldev = mlx5_lag_dev(dev); 1224 if (!ldev) 1225 return; 1226 1227 mutex_lock(&ldev->lock); 1228 mlx5_ldev_add_netdev(ldev, dev, netdev); 1229 1230 for (i = 0; i < ldev->ports; i++) 1231 if (!ldev->pf[i].dev) 1232 break; 1233 1234 if (i >= ldev->ports) 1235 set_bit(MLX5_LAG_FLAG_NDEVS_READY, &ldev->state_flags); 1236 mutex_unlock(&ldev->lock); 1237 mlx5_queue_bond_work(ldev, 0); 1238} 1239 1240bool mlx5_lag_is_roce(struct mlx5_core_dev *dev) 1241{ 1242 struct mlx5_lag *ldev; 1243 bool res; 1244 1245 spin_lock(&lag_lock); 1246 ldev = mlx5_lag_dev(dev); 1247 res = ldev && __mlx5_lag_is_roce(ldev); 1248 spin_unlock(&lag_lock); 1249 1250 return res; 1251} 1252EXPORT_SYMBOL(mlx5_lag_is_roce); 1253 1254bool mlx5_lag_is_active(struct mlx5_core_dev *dev) 1255{ 1256 struct mlx5_lag *ldev; 1257 bool res; 1258 1259 spin_lock(&lag_lock); 1260 ldev = mlx5_lag_dev(dev); 1261 res = ldev && __mlx5_lag_is_active(ldev); 1262 spin_unlock(&lag_lock); 1263 1264 return res; 1265} 1266EXPORT_SYMBOL(mlx5_lag_is_active); 1267 1268bool mlx5_lag_is_master(struct mlx5_core_dev *dev) 1269{ 1270 struct mlx5_lag *ldev; 1271 bool res; 1272 1273 spin_lock(&lag_lock); 1274 ldev = mlx5_lag_dev(dev); 1275 res = ldev && __mlx5_lag_is_active(ldev) && 1276 dev == ldev->pf[MLX5_LAG_P1].dev; 1277 spin_unlock(&lag_lock); 1278 1279 return res; 1280} 1281EXPORT_SYMBOL(mlx5_lag_is_master); 1282 1283bool mlx5_lag_is_sriov(struct mlx5_core_dev *dev) 1284{ 1285 struct mlx5_lag *ldev; 1286 bool res; 1287 1288 spin_lock(&lag_lock); 1289 ldev = mlx5_lag_dev(dev); 1290 res = ldev && __mlx5_lag_is_sriov(ldev); 1291 spin_unlock(&lag_lock); 1292 1293 return res; 1294} 1295EXPORT_SYMBOL(mlx5_lag_is_sriov); 1296 1297bool mlx5_lag_is_shared_fdb(struct mlx5_core_dev *dev) 1298{ 1299 struct mlx5_lag *ldev; 1300 bool res; 1301 1302 spin_lock(&lag_lock); 1303 ldev = mlx5_lag_dev(dev); 1304 res = ldev && __mlx5_lag_is_sriov(ldev) && 1305 test_bit(MLX5_LAG_MODE_FLAG_SHARED_FDB, &ldev->mode_flags); 1306 spin_unlock(&lag_lock); 1307 1308 return res; 1309} 1310EXPORT_SYMBOL(mlx5_lag_is_shared_fdb); 1311 1312void mlx5_lag_disable_change(struct mlx5_core_dev *dev) 1313{ 1314 struct mlx5_lag *ldev; 1315 1316 ldev = mlx5_lag_dev(dev); 1317 if (!ldev) 1318 return; 1319 1320 mlx5_dev_list_lock(); 1321 mutex_lock(&ldev->lock); 1322 1323 ldev->mode_changes_in_progress++; 1324 if (__mlx5_lag_is_active(ldev)) 1325 mlx5_disable_lag(ldev); 1326 1327 mutex_unlock(&ldev->lock); 1328 mlx5_dev_list_unlock(); 1329} 1330 1331void mlx5_lag_enable_change(struct mlx5_core_dev *dev) 1332{ 1333 struct mlx5_lag *ldev; 1334 1335 ldev = mlx5_lag_dev(dev); 1336 if (!ldev) 1337 return; 1338 1339 mutex_lock(&ldev->lock); 1340 ldev->mode_changes_in_progress--; 1341 mutex_unlock(&ldev->lock); 1342 mlx5_queue_bond_work(ldev, 0); 1343} 1344 1345struct net_device *mlx5_lag_get_roce_netdev(struct mlx5_core_dev *dev) 1346{ 1347 struct net_device *ndev = NULL; 1348 struct mlx5_lag *ldev; 1349 int i; 1350 1351 spin_lock(&lag_lock); 1352 ldev = mlx5_lag_dev(dev); 1353 1354 if (!(ldev && __mlx5_lag_is_roce(ldev))) 1355 goto unlock; 1356 1357 if (ldev->tracker.tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP) { 1358 for (i = 0; i < ldev->ports; i++) 1359 if (ldev->tracker.netdev_state[i].tx_enabled) 1360 ndev = ldev->pf[i].netdev; 1361 if (!ndev) 1362 ndev = ldev->pf[ldev->ports - 1].netdev; 1363 } else { 1364 ndev = ldev->pf[MLX5_LAG_P1].netdev; 1365 } 1366 if (ndev) 1367 dev_hold(ndev); 1368 1369unlock: 1370 spin_unlock(&lag_lock); 1371 1372 return ndev; 1373} 1374EXPORT_SYMBOL(mlx5_lag_get_roce_netdev); 1375 1376u8 mlx5_lag_get_slave_port(struct mlx5_core_dev *dev, 1377 struct net_device *slave) 1378{ 1379 struct mlx5_lag *ldev; 1380 u8 port = 0; 1381 int i; 1382 1383 spin_lock(&lag_lock); 1384 ldev = mlx5_lag_dev(dev); 1385 if (!(ldev && __mlx5_lag_is_roce(ldev))) 1386 goto unlock; 1387 1388 for (i = 0; i < ldev->ports; i++) { 1389 if (ldev->pf[MLX5_LAG_P1].netdev == slave) { 1390 port = i; 1391 break; 1392 } 1393 } 1394 1395 port = ldev->v2p_map[port * ldev->buckets]; 1396 1397unlock: 1398 spin_unlock(&lag_lock); 1399 return port; 1400} 1401EXPORT_SYMBOL(mlx5_lag_get_slave_port); 1402 1403u8 mlx5_lag_get_num_ports(struct mlx5_core_dev *dev) 1404{ 1405 struct mlx5_lag *ldev; 1406 1407 ldev = mlx5_lag_dev(dev); 1408 if (!ldev) 1409 return 0; 1410 1411 return ldev->ports; 1412} 1413EXPORT_SYMBOL(mlx5_lag_get_num_ports); 1414 1415struct mlx5_core_dev *mlx5_lag_get_peer_mdev(struct mlx5_core_dev *dev) 1416{ 1417 struct mlx5_core_dev *peer_dev = NULL; 1418 struct mlx5_lag *ldev; 1419 1420 spin_lock(&lag_lock); 1421 ldev = mlx5_lag_dev(dev); 1422 if (!ldev) 1423 goto unlock; 1424 1425 peer_dev = ldev->pf[MLX5_LAG_P1].dev == dev ? 1426 ldev->pf[MLX5_LAG_P2].dev : 1427 ldev->pf[MLX5_LAG_P1].dev; 1428 1429unlock: 1430 spin_unlock(&lag_lock); 1431 return peer_dev; 1432} 1433EXPORT_SYMBOL(mlx5_lag_get_peer_mdev); 1434 1435int mlx5_lag_query_cong_counters(struct mlx5_core_dev *dev, 1436 u64 *values, 1437 int num_counters, 1438 size_t *offsets) 1439{ 1440 int outlen = MLX5_ST_SZ_BYTES(query_cong_statistics_out); 1441 struct mlx5_core_dev **mdev; 1442 struct mlx5_lag *ldev; 1443 int num_ports; 1444 int ret, i, j; 1445 void *out; 1446 1447 out = kvzalloc(outlen, GFP_KERNEL); 1448 if (!out) 1449 return -ENOMEM; 1450 1451 mdev = kvzalloc(sizeof(mdev[0]) * MLX5_MAX_PORTS, GFP_KERNEL); 1452 if (!mdev) { 1453 ret = -ENOMEM; 1454 goto free_out; 1455 } 1456 1457 memset(values, 0, sizeof(*values) * num_counters); 1458 1459 spin_lock(&lag_lock); 1460 ldev = mlx5_lag_dev(dev); 1461 if (ldev && __mlx5_lag_is_active(ldev)) { 1462 num_ports = ldev->ports; 1463 for (i = 0; i < ldev->ports; i++) 1464 mdev[i] = ldev->pf[i].dev; 1465 } else { 1466 num_ports = 1; 1467 mdev[MLX5_LAG_P1] = dev; 1468 } 1469 spin_unlock(&lag_lock); 1470 1471 for (i = 0; i < num_ports; ++i) { 1472 u32 in[MLX5_ST_SZ_DW(query_cong_statistics_in)] = {}; 1473 1474 MLX5_SET(query_cong_statistics_in, in, opcode, 1475 MLX5_CMD_OP_QUERY_CONG_STATISTICS); 1476 ret = mlx5_cmd_exec_inout(mdev[i], query_cong_statistics, in, 1477 out); 1478 if (ret) 1479 goto free_mdev; 1480 1481 for (j = 0; j < num_counters; ++j) 1482 values[j] += be64_to_cpup((__be64 *)(out + offsets[j])); 1483 } 1484 1485free_mdev: 1486 kvfree(mdev); 1487free_out: 1488 kvfree(out); 1489 return ret; 1490} 1491EXPORT_SYMBOL(mlx5_lag_query_cong_counters);