qos.c (26327B)
1// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB 2/* Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */ 3 4#include "eswitch.h" 5#include "esw/qos.h" 6#include "en/port.h" 7#define CREATE_TRACE_POINTS 8#include "diag/qos_tracepoint.h" 9 10/* Minimum supported BW share value by the HW is 1 Mbit/sec */ 11#define MLX5_MIN_BW_SHARE 1 12 13#define MLX5_RATE_TO_BW_SHARE(rate, divider, limit) \ 14 min_t(u32, max_t(u32, DIV_ROUND_UP(rate, divider), MLX5_MIN_BW_SHARE), limit) 15 16struct mlx5_esw_rate_group { 17 u32 tsar_ix; 18 u32 max_rate; 19 u32 min_rate; 20 u32 bw_share; 21 struct list_head list; 22}; 23 24static int esw_qos_tsar_config(struct mlx5_core_dev *dev, u32 *sched_ctx, 25 u32 parent_ix, u32 tsar_ix, 26 u32 max_rate, u32 bw_share) 27{ 28 u32 bitmask = 0; 29 30 if (!MLX5_CAP_GEN(dev, qos) || !MLX5_CAP_QOS(dev, esw_scheduling)) 31 return -EOPNOTSUPP; 32 33 MLX5_SET(scheduling_context, sched_ctx, parent_element_id, parent_ix); 34 MLX5_SET(scheduling_context, sched_ctx, max_average_bw, max_rate); 35 MLX5_SET(scheduling_context, sched_ctx, bw_share, bw_share); 36 bitmask |= MODIFY_SCHEDULING_ELEMENT_IN_MODIFY_BITMASK_MAX_AVERAGE_BW; 37 bitmask |= MODIFY_SCHEDULING_ELEMENT_IN_MODIFY_BITMASK_BW_SHARE; 38 39 return mlx5_modify_scheduling_element_cmd(dev, 40 SCHEDULING_HIERARCHY_E_SWITCH, 41 sched_ctx, 42 tsar_ix, 43 bitmask); 44} 45 46static int esw_qos_group_config(struct mlx5_eswitch *esw, struct mlx5_esw_rate_group *group, 47 u32 max_rate, u32 bw_share, struct netlink_ext_ack *extack) 48{ 49 u32 sched_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {}; 50 struct mlx5_core_dev *dev = esw->dev; 51 int err; 52 53 err = esw_qos_tsar_config(dev, sched_ctx, 54 esw->qos.root_tsar_ix, group->tsar_ix, 55 max_rate, bw_share); 56 if (err) 57 NL_SET_ERR_MSG_MOD(extack, "E-Switch modify group TSAR element failed"); 58 59 trace_mlx5_esw_group_qos_config(dev, group, group->tsar_ix, bw_share, max_rate); 60 61 return err; 62} 63 64static int esw_qos_vport_config(struct mlx5_eswitch *esw, 65 struct mlx5_vport *vport, 66 u32 max_rate, u32 bw_share, 67 struct netlink_ext_ack *extack) 68{ 69 u32 sched_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {}; 70 struct mlx5_esw_rate_group *group = vport->qos.group; 71 struct mlx5_core_dev *dev = esw->dev; 72 u32 parent_tsar_ix; 73 void *vport_elem; 74 int err; 75 76 if (!vport->qos.enabled) 77 return -EIO; 78 79 parent_tsar_ix = group ? group->tsar_ix : esw->qos.root_tsar_ix; 80 MLX5_SET(scheduling_context, sched_ctx, element_type, 81 SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT); 82 vport_elem = MLX5_ADDR_OF(scheduling_context, sched_ctx, 83 element_attributes); 84 MLX5_SET(vport_element, vport_elem, vport_number, vport->vport); 85 86 err = esw_qos_tsar_config(dev, sched_ctx, parent_tsar_ix, vport->qos.esw_tsar_ix, 87 max_rate, bw_share); 88 if (err) { 89 esw_warn(esw->dev, 90 "E-Switch modify TSAR vport element failed (vport=%d,err=%d)\n", 91 vport->vport, err); 92 NL_SET_ERR_MSG_MOD(extack, "E-Switch modify TSAR vport element failed"); 93 return err; 94 } 95 96 trace_mlx5_esw_vport_qos_config(vport, bw_share, max_rate); 97 98 return 0; 99} 100 101static u32 esw_qos_calculate_min_rate_divider(struct mlx5_eswitch *esw, 102 struct mlx5_esw_rate_group *group, 103 bool group_level) 104{ 105 u32 fw_max_bw_share = MLX5_CAP_QOS(esw->dev, max_tsar_bw_share); 106 struct mlx5_vport *evport; 107 u32 max_guarantee = 0; 108 unsigned long i; 109 110 if (group_level) { 111 struct mlx5_esw_rate_group *group; 112 113 list_for_each_entry(group, &esw->qos.groups, list) { 114 if (group->min_rate < max_guarantee) 115 continue; 116 max_guarantee = group->min_rate; 117 } 118 } else { 119 mlx5_esw_for_each_vport(esw, i, evport) { 120 if (!evport->enabled || !evport->qos.enabled || 121 evport->qos.group != group || evport->qos.min_rate < max_guarantee) 122 continue; 123 max_guarantee = evport->qos.min_rate; 124 } 125 } 126 127 if (max_guarantee) 128 return max_t(u32, max_guarantee / fw_max_bw_share, 1); 129 130 /* If vports min rate divider is 0 but their group has bw_share configured, then 131 * need to set bw_share for vports to minimal value. 132 */ 133 if (!group_level && !max_guarantee && group && group->bw_share) 134 return 1; 135 return 0; 136} 137 138static u32 esw_qos_calc_bw_share(u32 min_rate, u32 divider, u32 fw_max) 139{ 140 if (divider) 141 return MLX5_RATE_TO_BW_SHARE(min_rate, divider, fw_max); 142 143 return 0; 144} 145 146static int esw_qos_normalize_vports_min_rate(struct mlx5_eswitch *esw, 147 struct mlx5_esw_rate_group *group, 148 struct netlink_ext_ack *extack) 149{ 150 u32 fw_max_bw_share = MLX5_CAP_QOS(esw->dev, max_tsar_bw_share); 151 u32 divider = esw_qos_calculate_min_rate_divider(esw, group, false); 152 struct mlx5_vport *evport; 153 unsigned long i; 154 u32 bw_share; 155 int err; 156 157 mlx5_esw_for_each_vport(esw, i, evport) { 158 if (!evport->enabled || !evport->qos.enabled || evport->qos.group != group) 159 continue; 160 bw_share = esw_qos_calc_bw_share(evport->qos.min_rate, divider, fw_max_bw_share); 161 162 if (bw_share == evport->qos.bw_share) 163 continue; 164 165 err = esw_qos_vport_config(esw, evport, evport->qos.max_rate, bw_share, extack); 166 if (err) 167 return err; 168 169 evport->qos.bw_share = bw_share; 170 } 171 172 return 0; 173} 174 175static int esw_qos_normalize_groups_min_rate(struct mlx5_eswitch *esw, u32 divider, 176 struct netlink_ext_ack *extack) 177{ 178 u32 fw_max_bw_share = MLX5_CAP_QOS(esw->dev, max_tsar_bw_share); 179 struct mlx5_esw_rate_group *group; 180 u32 bw_share; 181 int err; 182 183 list_for_each_entry(group, &esw->qos.groups, list) { 184 bw_share = esw_qos_calc_bw_share(group->min_rate, divider, fw_max_bw_share); 185 186 if (bw_share == group->bw_share) 187 continue; 188 189 err = esw_qos_group_config(esw, group, group->max_rate, bw_share, extack); 190 if (err) 191 return err; 192 193 group->bw_share = bw_share; 194 195 /* All the group's vports need to be set with default bw_share 196 * to enable them with QOS 197 */ 198 err = esw_qos_normalize_vports_min_rate(esw, group, extack); 199 200 if (err) 201 return err; 202 } 203 204 return 0; 205} 206 207static int esw_qos_set_vport_min_rate(struct mlx5_eswitch *esw, struct mlx5_vport *evport, 208 u32 min_rate, struct netlink_ext_ack *extack) 209{ 210 u32 fw_max_bw_share, previous_min_rate; 211 bool min_rate_supported; 212 int err; 213 214 lockdep_assert_held(&esw->state_lock); 215 fw_max_bw_share = MLX5_CAP_QOS(esw->dev, max_tsar_bw_share); 216 min_rate_supported = MLX5_CAP_QOS(esw->dev, esw_bw_share) && 217 fw_max_bw_share >= MLX5_MIN_BW_SHARE; 218 if (min_rate && !min_rate_supported) 219 return -EOPNOTSUPP; 220 if (min_rate == evport->qos.min_rate) 221 return 0; 222 223 previous_min_rate = evport->qos.min_rate; 224 evport->qos.min_rate = min_rate; 225 err = esw_qos_normalize_vports_min_rate(esw, evport->qos.group, extack); 226 if (err) 227 evport->qos.min_rate = previous_min_rate; 228 229 return err; 230} 231 232static int esw_qos_set_vport_max_rate(struct mlx5_eswitch *esw, struct mlx5_vport *evport, 233 u32 max_rate, struct netlink_ext_ack *extack) 234{ 235 u32 act_max_rate = max_rate; 236 bool max_rate_supported; 237 int err; 238 239 lockdep_assert_held(&esw->state_lock); 240 max_rate_supported = MLX5_CAP_QOS(esw->dev, esw_rate_limit); 241 242 if (max_rate && !max_rate_supported) 243 return -EOPNOTSUPP; 244 if (max_rate == evport->qos.max_rate) 245 return 0; 246 247 /* If parent group has rate limit need to set to group 248 * value when new max rate is 0. 249 */ 250 if (evport->qos.group && !max_rate) 251 act_max_rate = evport->qos.group->max_rate; 252 253 err = esw_qos_vport_config(esw, evport, act_max_rate, evport->qos.bw_share, extack); 254 255 if (!err) 256 evport->qos.max_rate = max_rate; 257 258 return err; 259} 260 261static int esw_qos_set_group_min_rate(struct mlx5_eswitch *esw, struct mlx5_esw_rate_group *group, 262 u32 min_rate, struct netlink_ext_ack *extack) 263{ 264 u32 fw_max_bw_share = MLX5_CAP_QOS(esw->dev, max_tsar_bw_share); 265 struct mlx5_core_dev *dev = esw->dev; 266 u32 previous_min_rate, divider; 267 int err; 268 269 if (!(MLX5_CAP_QOS(dev, esw_bw_share) && fw_max_bw_share >= MLX5_MIN_BW_SHARE)) 270 return -EOPNOTSUPP; 271 272 if (min_rate == group->min_rate) 273 return 0; 274 275 previous_min_rate = group->min_rate; 276 group->min_rate = min_rate; 277 divider = esw_qos_calculate_min_rate_divider(esw, group, true); 278 err = esw_qos_normalize_groups_min_rate(esw, divider, extack); 279 if (err) { 280 group->min_rate = previous_min_rate; 281 NL_SET_ERR_MSG_MOD(extack, "E-Switch group min rate setting failed"); 282 283 /* Attempt restoring previous configuration */ 284 divider = esw_qos_calculate_min_rate_divider(esw, group, true); 285 if (esw_qos_normalize_groups_min_rate(esw, divider, extack)) 286 NL_SET_ERR_MSG_MOD(extack, "E-Switch BW share restore failed"); 287 } 288 289 return err; 290} 291 292static int esw_qos_set_group_max_rate(struct mlx5_eswitch *esw, 293 struct mlx5_esw_rate_group *group, 294 u32 max_rate, struct netlink_ext_ack *extack) 295{ 296 struct mlx5_vport *vport; 297 unsigned long i; 298 int err; 299 300 if (group->max_rate == max_rate) 301 return 0; 302 303 err = esw_qos_group_config(esw, group, max_rate, group->bw_share, extack); 304 if (err) 305 return err; 306 307 group->max_rate = max_rate; 308 309 /* Any unlimited vports in the group should be set 310 * with the value of the group. 311 */ 312 mlx5_esw_for_each_vport(esw, i, vport) { 313 if (!vport->enabled || !vport->qos.enabled || 314 vport->qos.group != group || vport->qos.max_rate) 315 continue; 316 317 err = esw_qos_vport_config(esw, vport, max_rate, vport->qos.bw_share, extack); 318 if (err) 319 NL_SET_ERR_MSG_MOD(extack, 320 "E-Switch vport implicit rate limit setting failed"); 321 } 322 323 return err; 324} 325 326static int esw_qos_vport_create_sched_element(struct mlx5_eswitch *esw, 327 struct mlx5_vport *vport, 328 u32 max_rate, u32 bw_share) 329{ 330 u32 sched_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {}; 331 struct mlx5_esw_rate_group *group = vport->qos.group; 332 struct mlx5_core_dev *dev = esw->dev; 333 u32 parent_tsar_ix; 334 void *vport_elem; 335 int err; 336 337 parent_tsar_ix = group ? group->tsar_ix : esw->qos.root_tsar_ix; 338 MLX5_SET(scheduling_context, sched_ctx, element_type, 339 SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT); 340 vport_elem = MLX5_ADDR_OF(scheduling_context, sched_ctx, element_attributes); 341 MLX5_SET(vport_element, vport_elem, vport_number, vport->vport); 342 MLX5_SET(scheduling_context, sched_ctx, parent_element_id, parent_tsar_ix); 343 MLX5_SET(scheduling_context, sched_ctx, max_average_bw, max_rate); 344 MLX5_SET(scheduling_context, sched_ctx, bw_share, bw_share); 345 346 err = mlx5_create_scheduling_element_cmd(dev, 347 SCHEDULING_HIERARCHY_E_SWITCH, 348 sched_ctx, 349 &vport->qos.esw_tsar_ix); 350 if (err) { 351 esw_warn(esw->dev, "E-Switch create TSAR vport element failed (vport=%d,err=%d)\n", 352 vport->vport, err); 353 return err; 354 } 355 356 return 0; 357} 358 359static int esw_qos_update_group_scheduling_element(struct mlx5_eswitch *esw, 360 struct mlx5_vport *vport, 361 struct mlx5_esw_rate_group *curr_group, 362 struct mlx5_esw_rate_group *new_group, 363 struct netlink_ext_ack *extack) 364{ 365 u32 max_rate; 366 int err; 367 368 err = mlx5_destroy_scheduling_element_cmd(esw->dev, 369 SCHEDULING_HIERARCHY_E_SWITCH, 370 vport->qos.esw_tsar_ix); 371 if (err) { 372 NL_SET_ERR_MSG_MOD(extack, "E-Switch destroy TSAR vport element failed"); 373 return err; 374 } 375 376 vport->qos.group = new_group; 377 max_rate = vport->qos.max_rate ? vport->qos.max_rate : new_group->max_rate; 378 379 /* If vport is unlimited, we set the group's value. 380 * Therefore, if the group is limited it will apply to 381 * the vport as well and if not, vport will remain unlimited. 382 */ 383 err = esw_qos_vport_create_sched_element(esw, vport, max_rate, vport->qos.bw_share); 384 if (err) { 385 NL_SET_ERR_MSG_MOD(extack, "E-Switch vport group set failed."); 386 goto err_sched; 387 } 388 389 return 0; 390 391err_sched: 392 vport->qos.group = curr_group; 393 max_rate = vport->qos.max_rate ? vport->qos.max_rate : curr_group->max_rate; 394 if (esw_qos_vport_create_sched_element(esw, vport, max_rate, vport->qos.bw_share)) 395 esw_warn(esw->dev, "E-Switch vport group restore failed (vport=%d)\n", 396 vport->vport); 397 398 return err; 399} 400 401static int esw_qos_vport_update_group(struct mlx5_eswitch *esw, 402 struct mlx5_vport *vport, 403 struct mlx5_esw_rate_group *group, 404 struct netlink_ext_ack *extack) 405{ 406 struct mlx5_esw_rate_group *new_group, *curr_group; 407 int err; 408 409 if (!vport->enabled) 410 return -EINVAL; 411 412 curr_group = vport->qos.group; 413 new_group = group ?: esw->qos.group0; 414 if (curr_group == new_group) 415 return 0; 416 417 err = esw_qos_update_group_scheduling_element(esw, vport, curr_group, new_group, extack); 418 if (err) 419 return err; 420 421 /* Recalculate bw share weights of old and new groups */ 422 if (vport->qos.bw_share || new_group->bw_share) { 423 esw_qos_normalize_vports_min_rate(esw, curr_group, extack); 424 esw_qos_normalize_vports_min_rate(esw, new_group, extack); 425 } 426 427 return 0; 428} 429 430static struct mlx5_esw_rate_group * 431__esw_qos_create_rate_group(struct mlx5_eswitch *esw, struct netlink_ext_ack *extack) 432{ 433 u32 tsar_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {}; 434 struct mlx5_esw_rate_group *group; 435 u32 divider; 436 int err; 437 438 group = kzalloc(sizeof(*group), GFP_KERNEL); 439 if (!group) 440 return ERR_PTR(-ENOMEM); 441 442 MLX5_SET(scheduling_context, tsar_ctx, parent_element_id, 443 esw->qos.root_tsar_ix); 444 err = mlx5_create_scheduling_element_cmd(esw->dev, 445 SCHEDULING_HIERARCHY_E_SWITCH, 446 tsar_ctx, 447 &group->tsar_ix); 448 if (err) { 449 NL_SET_ERR_MSG_MOD(extack, "E-Switch create TSAR for group failed"); 450 goto err_sched_elem; 451 } 452 453 list_add_tail(&group->list, &esw->qos.groups); 454 455 divider = esw_qos_calculate_min_rate_divider(esw, group, true); 456 if (divider) { 457 err = esw_qos_normalize_groups_min_rate(esw, divider, extack); 458 if (err) { 459 NL_SET_ERR_MSG_MOD(extack, "E-Switch groups normalization failed"); 460 goto err_min_rate; 461 } 462 } 463 trace_mlx5_esw_group_qos_create(esw->dev, group, group->tsar_ix); 464 465 return group; 466 467err_min_rate: 468 list_del(&group->list); 469 if (mlx5_destroy_scheduling_element_cmd(esw->dev, 470 SCHEDULING_HIERARCHY_E_SWITCH, 471 group->tsar_ix)) 472 NL_SET_ERR_MSG_MOD(extack, "E-Switch destroy TSAR for group failed"); 473err_sched_elem: 474 kfree(group); 475 return ERR_PTR(err); 476} 477 478static int esw_qos_get(struct mlx5_eswitch *esw, struct netlink_ext_ack *extack); 479static void esw_qos_put(struct mlx5_eswitch *esw); 480 481static struct mlx5_esw_rate_group * 482esw_qos_create_rate_group(struct mlx5_eswitch *esw, struct netlink_ext_ack *extack) 483{ 484 struct mlx5_esw_rate_group *group; 485 int err; 486 487 if (!MLX5_CAP_QOS(esw->dev, log_esw_max_sched_depth)) 488 return ERR_PTR(-EOPNOTSUPP); 489 490 err = esw_qos_get(esw, extack); 491 if (err) 492 return ERR_PTR(err); 493 494 group = __esw_qos_create_rate_group(esw, extack); 495 if (IS_ERR(group)) 496 esw_qos_put(esw); 497 498 return group; 499} 500 501static int __esw_qos_destroy_rate_group(struct mlx5_eswitch *esw, 502 struct mlx5_esw_rate_group *group, 503 struct netlink_ext_ack *extack) 504{ 505 u32 divider; 506 int err; 507 508 list_del(&group->list); 509 510 divider = esw_qos_calculate_min_rate_divider(esw, NULL, true); 511 err = esw_qos_normalize_groups_min_rate(esw, divider, extack); 512 if (err) 513 NL_SET_ERR_MSG_MOD(extack, "E-Switch groups' normalization failed"); 514 515 err = mlx5_destroy_scheduling_element_cmd(esw->dev, 516 SCHEDULING_HIERARCHY_E_SWITCH, 517 group->tsar_ix); 518 if (err) 519 NL_SET_ERR_MSG_MOD(extack, "E-Switch destroy TSAR_ID failed"); 520 521 trace_mlx5_esw_group_qos_destroy(esw->dev, group, group->tsar_ix); 522 523 kfree(group); 524 525 return err; 526} 527 528static int esw_qos_destroy_rate_group(struct mlx5_eswitch *esw, 529 struct mlx5_esw_rate_group *group, 530 struct netlink_ext_ack *extack) 531{ 532 int err; 533 534 err = __esw_qos_destroy_rate_group(esw, group, extack); 535 esw_qos_put(esw); 536 537 return err; 538} 539 540static bool esw_qos_element_type_supported(struct mlx5_core_dev *dev, int type) 541{ 542 switch (type) { 543 case SCHEDULING_CONTEXT_ELEMENT_TYPE_TSAR: 544 return MLX5_CAP_QOS(dev, esw_element_type) & 545 ELEMENT_TYPE_CAP_MASK_TASR; 546 case SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT: 547 return MLX5_CAP_QOS(dev, esw_element_type) & 548 ELEMENT_TYPE_CAP_MASK_VPORT; 549 case SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT_TC: 550 return MLX5_CAP_QOS(dev, esw_element_type) & 551 ELEMENT_TYPE_CAP_MASK_VPORT_TC; 552 case SCHEDULING_CONTEXT_ELEMENT_TYPE_PARA_VPORT_TC: 553 return MLX5_CAP_QOS(dev, esw_element_type) & 554 ELEMENT_TYPE_CAP_MASK_PARA_VPORT_TC; 555 } 556 return false; 557} 558 559static int esw_qos_create(struct mlx5_eswitch *esw, struct netlink_ext_ack *extack) 560{ 561 u32 tsar_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {}; 562 struct mlx5_core_dev *dev = esw->dev; 563 __be32 *attr; 564 int err; 565 566 if (!MLX5_CAP_GEN(dev, qos) || !MLX5_CAP_QOS(dev, esw_scheduling)) 567 return -EOPNOTSUPP; 568 569 if (!esw_qos_element_type_supported(dev, SCHEDULING_CONTEXT_ELEMENT_TYPE_TSAR)) 570 return -EOPNOTSUPP; 571 572 MLX5_SET(scheduling_context, tsar_ctx, element_type, 573 SCHEDULING_CONTEXT_ELEMENT_TYPE_TSAR); 574 575 attr = MLX5_ADDR_OF(scheduling_context, tsar_ctx, element_attributes); 576 *attr = cpu_to_be32(TSAR_ELEMENT_TSAR_TYPE_DWRR << 16); 577 578 err = mlx5_create_scheduling_element_cmd(dev, 579 SCHEDULING_HIERARCHY_E_SWITCH, 580 tsar_ctx, 581 &esw->qos.root_tsar_ix); 582 if (err) { 583 esw_warn(dev, "E-Switch create root TSAR failed (%d)\n", err); 584 return err; 585 } 586 587 INIT_LIST_HEAD(&esw->qos.groups); 588 if (MLX5_CAP_QOS(dev, log_esw_max_sched_depth)) { 589 esw->qos.group0 = __esw_qos_create_rate_group(esw, extack); 590 if (IS_ERR(esw->qos.group0)) { 591 esw_warn(dev, "E-Switch create rate group 0 failed (%ld)\n", 592 PTR_ERR(esw->qos.group0)); 593 err = PTR_ERR(esw->qos.group0); 594 goto err_group0; 595 } 596 } 597 refcount_set(&esw->qos.refcnt, 1); 598 599 return 0; 600 601err_group0: 602 if (mlx5_destroy_scheduling_element_cmd(esw->dev, SCHEDULING_HIERARCHY_E_SWITCH, 603 esw->qos.root_tsar_ix)) 604 esw_warn(esw->dev, "E-Switch destroy root TSAR failed.\n"); 605 606 return err; 607} 608 609static void esw_qos_destroy(struct mlx5_eswitch *esw) 610{ 611 int err; 612 613 if (esw->qos.group0) 614 __esw_qos_destroy_rate_group(esw, esw->qos.group0, NULL); 615 616 err = mlx5_destroy_scheduling_element_cmd(esw->dev, 617 SCHEDULING_HIERARCHY_E_SWITCH, 618 esw->qos.root_tsar_ix); 619 if (err) 620 esw_warn(esw->dev, "E-Switch destroy root TSAR failed (%d)\n", err); 621} 622 623static int esw_qos_get(struct mlx5_eswitch *esw, struct netlink_ext_ack *extack) 624{ 625 int err = 0; 626 627 lockdep_assert_held(&esw->state_lock); 628 629 if (!refcount_inc_not_zero(&esw->qos.refcnt)) { 630 /* esw_qos_create() set refcount to 1 only on success. 631 * No need to decrement on failure. 632 */ 633 err = esw_qos_create(esw, extack); 634 } 635 636 return err; 637} 638 639static void esw_qos_put(struct mlx5_eswitch *esw) 640{ 641 lockdep_assert_held(&esw->state_lock); 642 if (refcount_dec_and_test(&esw->qos.refcnt)) 643 esw_qos_destroy(esw); 644} 645 646static int esw_qos_vport_enable(struct mlx5_eswitch *esw, struct mlx5_vport *vport, 647 u32 max_rate, u32 bw_share, struct netlink_ext_ack *extack) 648{ 649 int err; 650 651 lockdep_assert_held(&esw->state_lock); 652 if (vport->qos.enabled) 653 return 0; 654 655 err = esw_qos_get(esw, extack); 656 if (err) 657 return err; 658 659 vport->qos.group = esw->qos.group0; 660 661 err = esw_qos_vport_create_sched_element(esw, vport, max_rate, bw_share); 662 if (err) 663 goto err_out; 664 665 vport->qos.enabled = true; 666 trace_mlx5_esw_vport_qos_create(vport, bw_share, max_rate); 667 668 return 0; 669 670err_out: 671 esw_qos_put(esw); 672 673 return err; 674} 675 676void mlx5_esw_qos_vport_disable(struct mlx5_eswitch *esw, struct mlx5_vport *vport) 677{ 678 int err; 679 680 lockdep_assert_held(&esw->state_lock); 681 if (!vport->qos.enabled) 682 return; 683 WARN(vport->qos.group && vport->qos.group != esw->qos.group0, 684 "Disabling QoS on port before detaching it from group"); 685 686 err = mlx5_destroy_scheduling_element_cmd(esw->dev, 687 SCHEDULING_HIERARCHY_E_SWITCH, 688 vport->qos.esw_tsar_ix); 689 if (err) 690 esw_warn(esw->dev, "E-Switch destroy TSAR vport element failed (vport=%d,err=%d)\n", 691 vport->vport, err); 692 693 memset(&vport->qos, 0, sizeof(vport->qos)); 694 trace_mlx5_esw_vport_qos_destroy(vport); 695 696 esw_qos_put(esw); 697} 698 699int mlx5_esw_qos_set_vport_rate(struct mlx5_eswitch *esw, struct mlx5_vport *vport, 700 u32 max_rate, u32 min_rate) 701{ 702 int err; 703 704 lockdep_assert_held(&esw->state_lock); 705 err = esw_qos_vport_enable(esw, vport, 0, 0, NULL); 706 if (err) 707 return err; 708 709 err = esw_qos_set_vport_min_rate(esw, vport, min_rate, NULL); 710 if (!err) 711 err = esw_qos_set_vport_max_rate(esw, vport, max_rate, NULL); 712 713 return err; 714} 715 716int mlx5_esw_qos_modify_vport_rate(struct mlx5_eswitch *esw, u16 vport_num, u32 rate_mbps) 717{ 718 u32 ctx[MLX5_ST_SZ_DW(scheduling_context)] = {}; 719 struct mlx5_vport *vport; 720 u32 bitmask; 721 int err; 722 723 vport = mlx5_eswitch_get_vport(esw, vport_num); 724 if (IS_ERR(vport)) 725 return PTR_ERR(vport); 726 727 mutex_lock(&esw->state_lock); 728 if (!vport->qos.enabled) { 729 /* Eswitch QoS wasn't enabled yet. Enable it and vport QoS. */ 730 err = esw_qos_vport_enable(esw, vport, rate_mbps, vport->qos.bw_share, NULL); 731 } else { 732 MLX5_SET(scheduling_context, ctx, max_average_bw, rate_mbps); 733 734 bitmask = MODIFY_SCHEDULING_ELEMENT_IN_MODIFY_BITMASK_MAX_AVERAGE_BW; 735 err = mlx5_modify_scheduling_element_cmd(esw->dev, 736 SCHEDULING_HIERARCHY_E_SWITCH, 737 ctx, 738 vport->qos.esw_tsar_ix, 739 bitmask); 740 } 741 mutex_unlock(&esw->state_lock); 742 743 return err; 744} 745 746#define MLX5_LINKSPEED_UNIT 125000 /* 1Mbps in Bps */ 747 748/* Converts bytes per second value passed in a pointer into megabits per 749 * second, rewriting last. If converted rate exceed link speed or is not a 750 * fraction of Mbps - returns error. 751 */ 752static int esw_qos_devlink_rate_to_mbps(struct mlx5_core_dev *mdev, const char *name, 753 u64 *rate, struct netlink_ext_ack *extack) 754{ 755 u32 link_speed_max, reminder; 756 u64 value; 757 int err; 758 759 err = mlx5e_port_max_linkspeed(mdev, &link_speed_max); 760 if (err) { 761 NL_SET_ERR_MSG_MOD(extack, "Failed to get link maximum speed"); 762 return err; 763 } 764 765 value = div_u64_rem(*rate, MLX5_LINKSPEED_UNIT, &reminder); 766 if (reminder) { 767 pr_err("%s rate value %lluBps not in link speed units of 1Mbps.\n", 768 name, *rate); 769 NL_SET_ERR_MSG_MOD(extack, "TX rate value not in link speed units of 1Mbps"); 770 return -EINVAL; 771 } 772 773 if (value > link_speed_max) { 774 pr_err("%s rate value %lluMbps exceed link maximum speed %u.\n", 775 name, value, link_speed_max); 776 NL_SET_ERR_MSG_MOD(extack, "TX rate value exceed link maximum speed"); 777 return -EINVAL; 778 } 779 780 *rate = value; 781 return 0; 782} 783 784/* Eswitch devlink rate API */ 785 786int mlx5_esw_devlink_rate_leaf_tx_share_set(struct devlink_rate *rate_leaf, void *priv, 787 u64 tx_share, struct netlink_ext_ack *extack) 788{ 789 struct mlx5_vport *vport = priv; 790 struct mlx5_eswitch *esw; 791 int err; 792 793 esw = vport->dev->priv.eswitch; 794 if (!mlx5_esw_allowed(esw)) 795 return -EPERM; 796 797 err = esw_qos_devlink_rate_to_mbps(vport->dev, "tx_share", &tx_share, extack); 798 if (err) 799 return err; 800 801 mutex_lock(&esw->state_lock); 802 err = esw_qos_vport_enable(esw, vport, 0, 0, extack); 803 if (err) 804 goto unlock; 805 806 err = esw_qos_set_vport_min_rate(esw, vport, tx_share, extack); 807unlock: 808 mutex_unlock(&esw->state_lock); 809 return err; 810} 811 812int mlx5_esw_devlink_rate_leaf_tx_max_set(struct devlink_rate *rate_leaf, void *priv, 813 u64 tx_max, struct netlink_ext_ack *extack) 814{ 815 struct mlx5_vport *vport = priv; 816 struct mlx5_eswitch *esw; 817 int err; 818 819 esw = vport->dev->priv.eswitch; 820 if (!mlx5_esw_allowed(esw)) 821 return -EPERM; 822 823 err = esw_qos_devlink_rate_to_mbps(vport->dev, "tx_max", &tx_max, extack); 824 if (err) 825 return err; 826 827 mutex_lock(&esw->state_lock); 828 err = esw_qos_vport_enable(esw, vport, 0, 0, extack); 829 if (err) 830 goto unlock; 831 832 err = esw_qos_set_vport_max_rate(esw, vport, tx_max, extack); 833unlock: 834 mutex_unlock(&esw->state_lock); 835 return err; 836} 837 838int mlx5_esw_devlink_rate_node_tx_share_set(struct devlink_rate *rate_node, void *priv, 839 u64 tx_share, struct netlink_ext_ack *extack) 840{ 841 struct mlx5_core_dev *dev = devlink_priv(rate_node->devlink); 842 struct mlx5_eswitch *esw = dev->priv.eswitch; 843 struct mlx5_esw_rate_group *group = priv; 844 int err; 845 846 err = esw_qos_devlink_rate_to_mbps(dev, "tx_share", &tx_share, extack); 847 if (err) 848 return err; 849 850 mutex_lock(&esw->state_lock); 851 err = esw_qos_set_group_min_rate(esw, group, tx_share, extack); 852 mutex_unlock(&esw->state_lock); 853 return err; 854} 855 856int mlx5_esw_devlink_rate_node_tx_max_set(struct devlink_rate *rate_node, void *priv, 857 u64 tx_max, struct netlink_ext_ack *extack) 858{ 859 struct mlx5_core_dev *dev = devlink_priv(rate_node->devlink); 860 struct mlx5_eswitch *esw = dev->priv.eswitch; 861 struct mlx5_esw_rate_group *group = priv; 862 int err; 863 864 err = esw_qos_devlink_rate_to_mbps(dev, "tx_max", &tx_max, extack); 865 if (err) 866 return err; 867 868 mutex_lock(&esw->state_lock); 869 err = esw_qos_set_group_max_rate(esw, group, tx_max, extack); 870 mutex_unlock(&esw->state_lock); 871 return err; 872} 873 874int mlx5_esw_devlink_rate_node_new(struct devlink_rate *rate_node, void **priv, 875 struct netlink_ext_ack *extack) 876{ 877 struct mlx5_esw_rate_group *group; 878 struct mlx5_eswitch *esw; 879 int err = 0; 880 881 esw = mlx5_devlink_eswitch_get(rate_node->devlink); 882 if (IS_ERR(esw)) 883 return PTR_ERR(esw); 884 885 mutex_lock(&esw->state_lock); 886 if (esw->mode != MLX5_ESWITCH_OFFLOADS) { 887 NL_SET_ERR_MSG_MOD(extack, 888 "Rate node creation supported only in switchdev mode"); 889 err = -EOPNOTSUPP; 890 goto unlock; 891 } 892 893 group = esw_qos_create_rate_group(esw, extack); 894 if (IS_ERR(group)) { 895 err = PTR_ERR(group); 896 goto unlock; 897 } 898 899 *priv = group; 900unlock: 901 mutex_unlock(&esw->state_lock); 902 return err; 903} 904 905int mlx5_esw_devlink_rate_node_del(struct devlink_rate *rate_node, void *priv, 906 struct netlink_ext_ack *extack) 907{ 908 struct mlx5_esw_rate_group *group = priv; 909 struct mlx5_eswitch *esw; 910 int err; 911 912 esw = mlx5_devlink_eswitch_get(rate_node->devlink); 913 if (IS_ERR(esw)) 914 return PTR_ERR(esw); 915 916 mutex_lock(&esw->state_lock); 917 err = esw_qos_destroy_rate_group(esw, group, extack); 918 mutex_unlock(&esw->state_lock); 919 return err; 920} 921 922int mlx5_esw_qos_vport_update_group(struct mlx5_eswitch *esw, 923 struct mlx5_vport *vport, 924 struct mlx5_esw_rate_group *group, 925 struct netlink_ext_ack *extack) 926{ 927 int err; 928 929 mutex_lock(&esw->state_lock); 930 err = esw_qos_vport_enable(esw, vport, 0, 0, extack); 931 if (!err) 932 err = esw_qos_vport_update_group(esw, vport, group, extack); 933 mutex_unlock(&esw->state_lock); 934 return err; 935} 936 937int mlx5_esw_devlink_rate_parent_set(struct devlink_rate *devlink_rate, 938 struct devlink_rate *parent, 939 void *priv, void *parent_priv, 940 struct netlink_ext_ack *extack) 941{ 942 struct mlx5_esw_rate_group *group; 943 struct mlx5_vport *vport = priv; 944 945 if (!parent) 946 return mlx5_esw_qos_vport_update_group(vport->dev->priv.eswitch, 947 vport, NULL, extack); 948 949 group = parent_priv; 950 return mlx5_esw_qos_vport_update_group(vport->dev->priv.eswitch, vport, group, extack); 951}