cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

qos.c (26327B)


      1// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
      2/* Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */
      3
      4#include "eswitch.h"
      5#include "esw/qos.h"
      6#include "en/port.h"
      7#define CREATE_TRACE_POINTS
      8#include "diag/qos_tracepoint.h"
      9
     10/* Minimum supported BW share value by the HW is 1 Mbit/sec */
     11#define MLX5_MIN_BW_SHARE 1
     12
     13#define MLX5_RATE_TO_BW_SHARE(rate, divider, limit) \
     14	min_t(u32, max_t(u32, DIV_ROUND_UP(rate, divider), MLX5_MIN_BW_SHARE), limit)
     15
     16struct mlx5_esw_rate_group {
     17	u32 tsar_ix;
     18	u32 max_rate;
     19	u32 min_rate;
     20	u32 bw_share;
     21	struct list_head list;
     22};
     23
     24static int esw_qos_tsar_config(struct mlx5_core_dev *dev, u32 *sched_ctx,
     25			       u32 parent_ix, u32 tsar_ix,
     26			       u32 max_rate, u32 bw_share)
     27{
     28	u32 bitmask = 0;
     29
     30	if (!MLX5_CAP_GEN(dev, qos) || !MLX5_CAP_QOS(dev, esw_scheduling))
     31		return -EOPNOTSUPP;
     32
     33	MLX5_SET(scheduling_context, sched_ctx, parent_element_id, parent_ix);
     34	MLX5_SET(scheduling_context, sched_ctx, max_average_bw, max_rate);
     35	MLX5_SET(scheduling_context, sched_ctx, bw_share, bw_share);
     36	bitmask |= MODIFY_SCHEDULING_ELEMENT_IN_MODIFY_BITMASK_MAX_AVERAGE_BW;
     37	bitmask |= MODIFY_SCHEDULING_ELEMENT_IN_MODIFY_BITMASK_BW_SHARE;
     38
     39	return mlx5_modify_scheduling_element_cmd(dev,
     40						  SCHEDULING_HIERARCHY_E_SWITCH,
     41						  sched_ctx,
     42						  tsar_ix,
     43						  bitmask);
     44}
     45
     46static int esw_qos_group_config(struct mlx5_eswitch *esw, struct mlx5_esw_rate_group *group,
     47				u32 max_rate, u32 bw_share, struct netlink_ext_ack *extack)
     48{
     49	u32 sched_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {};
     50	struct mlx5_core_dev *dev = esw->dev;
     51	int err;
     52
     53	err = esw_qos_tsar_config(dev, sched_ctx,
     54				  esw->qos.root_tsar_ix, group->tsar_ix,
     55				  max_rate, bw_share);
     56	if (err)
     57		NL_SET_ERR_MSG_MOD(extack, "E-Switch modify group TSAR element failed");
     58
     59	trace_mlx5_esw_group_qos_config(dev, group, group->tsar_ix, bw_share, max_rate);
     60
     61	return err;
     62}
     63
     64static int esw_qos_vport_config(struct mlx5_eswitch *esw,
     65				struct mlx5_vport *vport,
     66				u32 max_rate, u32 bw_share,
     67				struct netlink_ext_ack *extack)
     68{
     69	u32 sched_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {};
     70	struct mlx5_esw_rate_group *group = vport->qos.group;
     71	struct mlx5_core_dev *dev = esw->dev;
     72	u32 parent_tsar_ix;
     73	void *vport_elem;
     74	int err;
     75
     76	if (!vport->qos.enabled)
     77		return -EIO;
     78
     79	parent_tsar_ix = group ? group->tsar_ix : esw->qos.root_tsar_ix;
     80	MLX5_SET(scheduling_context, sched_ctx, element_type,
     81		 SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT);
     82	vport_elem = MLX5_ADDR_OF(scheduling_context, sched_ctx,
     83				  element_attributes);
     84	MLX5_SET(vport_element, vport_elem, vport_number, vport->vport);
     85
     86	err = esw_qos_tsar_config(dev, sched_ctx, parent_tsar_ix, vport->qos.esw_tsar_ix,
     87				  max_rate, bw_share);
     88	if (err) {
     89		esw_warn(esw->dev,
     90			 "E-Switch modify TSAR vport element failed (vport=%d,err=%d)\n",
     91			 vport->vport, err);
     92		NL_SET_ERR_MSG_MOD(extack, "E-Switch modify TSAR vport element failed");
     93		return err;
     94	}
     95
     96	trace_mlx5_esw_vport_qos_config(vport, bw_share, max_rate);
     97
     98	return 0;
     99}
    100
    101static u32 esw_qos_calculate_min_rate_divider(struct mlx5_eswitch *esw,
    102					      struct mlx5_esw_rate_group *group,
    103					      bool group_level)
    104{
    105	u32 fw_max_bw_share = MLX5_CAP_QOS(esw->dev, max_tsar_bw_share);
    106	struct mlx5_vport *evport;
    107	u32 max_guarantee = 0;
    108	unsigned long i;
    109
    110	if (group_level) {
    111		struct mlx5_esw_rate_group *group;
    112
    113		list_for_each_entry(group, &esw->qos.groups, list) {
    114			if (group->min_rate < max_guarantee)
    115				continue;
    116			max_guarantee = group->min_rate;
    117		}
    118	} else {
    119		mlx5_esw_for_each_vport(esw, i, evport) {
    120			if (!evport->enabled || !evport->qos.enabled ||
    121			    evport->qos.group != group || evport->qos.min_rate < max_guarantee)
    122				continue;
    123			max_guarantee = evport->qos.min_rate;
    124		}
    125	}
    126
    127	if (max_guarantee)
    128		return max_t(u32, max_guarantee / fw_max_bw_share, 1);
    129
    130	/* If vports min rate divider is 0 but their group has bw_share configured, then
    131	 * need to set bw_share for vports to minimal value.
    132	 */
    133	if (!group_level && !max_guarantee && group && group->bw_share)
    134		return 1;
    135	return 0;
    136}
    137
    138static u32 esw_qos_calc_bw_share(u32 min_rate, u32 divider, u32 fw_max)
    139{
    140	if (divider)
    141		return MLX5_RATE_TO_BW_SHARE(min_rate, divider, fw_max);
    142
    143	return 0;
    144}
    145
    146static int esw_qos_normalize_vports_min_rate(struct mlx5_eswitch *esw,
    147					     struct mlx5_esw_rate_group *group,
    148					     struct netlink_ext_ack *extack)
    149{
    150	u32 fw_max_bw_share = MLX5_CAP_QOS(esw->dev, max_tsar_bw_share);
    151	u32 divider = esw_qos_calculate_min_rate_divider(esw, group, false);
    152	struct mlx5_vport *evport;
    153	unsigned long i;
    154	u32 bw_share;
    155	int err;
    156
    157	mlx5_esw_for_each_vport(esw, i, evport) {
    158		if (!evport->enabled || !evport->qos.enabled || evport->qos.group != group)
    159			continue;
    160		bw_share = esw_qos_calc_bw_share(evport->qos.min_rate, divider, fw_max_bw_share);
    161
    162		if (bw_share == evport->qos.bw_share)
    163			continue;
    164
    165		err = esw_qos_vport_config(esw, evport, evport->qos.max_rate, bw_share, extack);
    166		if (err)
    167			return err;
    168
    169		evport->qos.bw_share = bw_share;
    170	}
    171
    172	return 0;
    173}
    174
    175static int esw_qos_normalize_groups_min_rate(struct mlx5_eswitch *esw, u32 divider,
    176					     struct netlink_ext_ack *extack)
    177{
    178	u32 fw_max_bw_share = MLX5_CAP_QOS(esw->dev, max_tsar_bw_share);
    179	struct mlx5_esw_rate_group *group;
    180	u32 bw_share;
    181	int err;
    182
    183	list_for_each_entry(group, &esw->qos.groups, list) {
    184		bw_share = esw_qos_calc_bw_share(group->min_rate, divider, fw_max_bw_share);
    185
    186		if (bw_share == group->bw_share)
    187			continue;
    188
    189		err = esw_qos_group_config(esw, group, group->max_rate, bw_share, extack);
    190		if (err)
    191			return err;
    192
    193		group->bw_share = bw_share;
    194
    195		/* All the group's vports need to be set with default bw_share
    196		 * to enable them with QOS
    197		 */
    198		err = esw_qos_normalize_vports_min_rate(esw, group, extack);
    199
    200		if (err)
    201			return err;
    202	}
    203
    204	return 0;
    205}
    206
    207static int esw_qos_set_vport_min_rate(struct mlx5_eswitch *esw, struct mlx5_vport *evport,
    208				      u32 min_rate, struct netlink_ext_ack *extack)
    209{
    210	u32 fw_max_bw_share, previous_min_rate;
    211	bool min_rate_supported;
    212	int err;
    213
    214	lockdep_assert_held(&esw->state_lock);
    215	fw_max_bw_share = MLX5_CAP_QOS(esw->dev, max_tsar_bw_share);
    216	min_rate_supported = MLX5_CAP_QOS(esw->dev, esw_bw_share) &&
    217				fw_max_bw_share >= MLX5_MIN_BW_SHARE;
    218	if (min_rate && !min_rate_supported)
    219		return -EOPNOTSUPP;
    220	if (min_rate == evport->qos.min_rate)
    221		return 0;
    222
    223	previous_min_rate = evport->qos.min_rate;
    224	evport->qos.min_rate = min_rate;
    225	err = esw_qos_normalize_vports_min_rate(esw, evport->qos.group, extack);
    226	if (err)
    227		evport->qos.min_rate = previous_min_rate;
    228
    229	return err;
    230}
    231
    232static int esw_qos_set_vport_max_rate(struct mlx5_eswitch *esw, struct mlx5_vport *evport,
    233				      u32 max_rate, struct netlink_ext_ack *extack)
    234{
    235	u32 act_max_rate = max_rate;
    236	bool max_rate_supported;
    237	int err;
    238
    239	lockdep_assert_held(&esw->state_lock);
    240	max_rate_supported = MLX5_CAP_QOS(esw->dev, esw_rate_limit);
    241
    242	if (max_rate && !max_rate_supported)
    243		return -EOPNOTSUPP;
    244	if (max_rate == evport->qos.max_rate)
    245		return 0;
    246
    247	/* If parent group has rate limit need to set to group
    248	 * value when new max rate is 0.
    249	 */
    250	if (evport->qos.group && !max_rate)
    251		act_max_rate = evport->qos.group->max_rate;
    252
    253	err = esw_qos_vport_config(esw, evport, act_max_rate, evport->qos.bw_share, extack);
    254
    255	if (!err)
    256		evport->qos.max_rate = max_rate;
    257
    258	return err;
    259}
    260
    261static int esw_qos_set_group_min_rate(struct mlx5_eswitch *esw, struct mlx5_esw_rate_group *group,
    262				      u32 min_rate, struct netlink_ext_ack *extack)
    263{
    264	u32 fw_max_bw_share = MLX5_CAP_QOS(esw->dev, max_tsar_bw_share);
    265	struct mlx5_core_dev *dev = esw->dev;
    266	u32 previous_min_rate, divider;
    267	int err;
    268
    269	if (!(MLX5_CAP_QOS(dev, esw_bw_share) && fw_max_bw_share >= MLX5_MIN_BW_SHARE))
    270		return -EOPNOTSUPP;
    271
    272	if (min_rate == group->min_rate)
    273		return 0;
    274
    275	previous_min_rate = group->min_rate;
    276	group->min_rate = min_rate;
    277	divider = esw_qos_calculate_min_rate_divider(esw, group, true);
    278	err = esw_qos_normalize_groups_min_rate(esw, divider, extack);
    279	if (err) {
    280		group->min_rate = previous_min_rate;
    281		NL_SET_ERR_MSG_MOD(extack, "E-Switch group min rate setting failed");
    282
    283		/* Attempt restoring previous configuration */
    284		divider = esw_qos_calculate_min_rate_divider(esw, group, true);
    285		if (esw_qos_normalize_groups_min_rate(esw, divider, extack))
    286			NL_SET_ERR_MSG_MOD(extack, "E-Switch BW share restore failed");
    287	}
    288
    289	return err;
    290}
    291
    292static int esw_qos_set_group_max_rate(struct mlx5_eswitch *esw,
    293				      struct mlx5_esw_rate_group *group,
    294				      u32 max_rate, struct netlink_ext_ack *extack)
    295{
    296	struct mlx5_vport *vport;
    297	unsigned long i;
    298	int err;
    299
    300	if (group->max_rate == max_rate)
    301		return 0;
    302
    303	err = esw_qos_group_config(esw, group, max_rate, group->bw_share, extack);
    304	if (err)
    305		return err;
    306
    307	group->max_rate = max_rate;
    308
    309	/* Any unlimited vports in the group should be set
    310	 * with the value of the group.
    311	 */
    312	mlx5_esw_for_each_vport(esw, i, vport) {
    313		if (!vport->enabled || !vport->qos.enabled ||
    314		    vport->qos.group != group || vport->qos.max_rate)
    315			continue;
    316
    317		err = esw_qos_vport_config(esw, vport, max_rate, vport->qos.bw_share, extack);
    318		if (err)
    319			NL_SET_ERR_MSG_MOD(extack,
    320					   "E-Switch vport implicit rate limit setting failed");
    321	}
    322
    323	return err;
    324}
    325
    326static int esw_qos_vport_create_sched_element(struct mlx5_eswitch *esw,
    327					      struct mlx5_vport *vport,
    328					      u32 max_rate, u32 bw_share)
    329{
    330	u32 sched_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {};
    331	struct mlx5_esw_rate_group *group = vport->qos.group;
    332	struct mlx5_core_dev *dev = esw->dev;
    333	u32 parent_tsar_ix;
    334	void *vport_elem;
    335	int err;
    336
    337	parent_tsar_ix = group ? group->tsar_ix : esw->qos.root_tsar_ix;
    338	MLX5_SET(scheduling_context, sched_ctx, element_type,
    339		 SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT);
    340	vport_elem = MLX5_ADDR_OF(scheduling_context, sched_ctx, element_attributes);
    341	MLX5_SET(vport_element, vport_elem, vport_number, vport->vport);
    342	MLX5_SET(scheduling_context, sched_ctx, parent_element_id, parent_tsar_ix);
    343	MLX5_SET(scheduling_context, sched_ctx, max_average_bw, max_rate);
    344	MLX5_SET(scheduling_context, sched_ctx, bw_share, bw_share);
    345
    346	err = mlx5_create_scheduling_element_cmd(dev,
    347						 SCHEDULING_HIERARCHY_E_SWITCH,
    348						 sched_ctx,
    349						 &vport->qos.esw_tsar_ix);
    350	if (err) {
    351		esw_warn(esw->dev, "E-Switch create TSAR vport element failed (vport=%d,err=%d)\n",
    352			 vport->vport, err);
    353		return err;
    354	}
    355
    356	return 0;
    357}
    358
    359static int esw_qos_update_group_scheduling_element(struct mlx5_eswitch *esw,
    360						   struct mlx5_vport *vport,
    361						   struct mlx5_esw_rate_group *curr_group,
    362						   struct mlx5_esw_rate_group *new_group,
    363						   struct netlink_ext_ack *extack)
    364{
    365	u32 max_rate;
    366	int err;
    367
    368	err = mlx5_destroy_scheduling_element_cmd(esw->dev,
    369						  SCHEDULING_HIERARCHY_E_SWITCH,
    370						  vport->qos.esw_tsar_ix);
    371	if (err) {
    372		NL_SET_ERR_MSG_MOD(extack, "E-Switch destroy TSAR vport element failed");
    373		return err;
    374	}
    375
    376	vport->qos.group = new_group;
    377	max_rate = vport->qos.max_rate ? vport->qos.max_rate : new_group->max_rate;
    378
    379	/* If vport is unlimited, we set the group's value.
    380	 * Therefore, if the group is limited it will apply to
    381	 * the vport as well and if not, vport will remain unlimited.
    382	 */
    383	err = esw_qos_vport_create_sched_element(esw, vport, max_rate, vport->qos.bw_share);
    384	if (err) {
    385		NL_SET_ERR_MSG_MOD(extack, "E-Switch vport group set failed.");
    386		goto err_sched;
    387	}
    388
    389	return 0;
    390
    391err_sched:
    392	vport->qos.group = curr_group;
    393	max_rate = vport->qos.max_rate ? vport->qos.max_rate : curr_group->max_rate;
    394	if (esw_qos_vport_create_sched_element(esw, vport, max_rate, vport->qos.bw_share))
    395		esw_warn(esw->dev, "E-Switch vport group restore failed (vport=%d)\n",
    396			 vport->vport);
    397
    398	return err;
    399}
    400
    401static int esw_qos_vport_update_group(struct mlx5_eswitch *esw,
    402				      struct mlx5_vport *vport,
    403				      struct mlx5_esw_rate_group *group,
    404				      struct netlink_ext_ack *extack)
    405{
    406	struct mlx5_esw_rate_group *new_group, *curr_group;
    407	int err;
    408
    409	if (!vport->enabled)
    410		return -EINVAL;
    411
    412	curr_group = vport->qos.group;
    413	new_group = group ?: esw->qos.group0;
    414	if (curr_group == new_group)
    415		return 0;
    416
    417	err = esw_qos_update_group_scheduling_element(esw, vport, curr_group, new_group, extack);
    418	if (err)
    419		return err;
    420
    421	/* Recalculate bw share weights of old and new groups */
    422	if (vport->qos.bw_share || new_group->bw_share) {
    423		esw_qos_normalize_vports_min_rate(esw, curr_group, extack);
    424		esw_qos_normalize_vports_min_rate(esw, new_group, extack);
    425	}
    426
    427	return 0;
    428}
    429
    430static struct mlx5_esw_rate_group *
    431__esw_qos_create_rate_group(struct mlx5_eswitch *esw, struct netlink_ext_ack *extack)
    432{
    433	u32 tsar_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {};
    434	struct mlx5_esw_rate_group *group;
    435	u32 divider;
    436	int err;
    437
    438	group = kzalloc(sizeof(*group), GFP_KERNEL);
    439	if (!group)
    440		return ERR_PTR(-ENOMEM);
    441
    442	MLX5_SET(scheduling_context, tsar_ctx, parent_element_id,
    443		 esw->qos.root_tsar_ix);
    444	err = mlx5_create_scheduling_element_cmd(esw->dev,
    445						 SCHEDULING_HIERARCHY_E_SWITCH,
    446						 tsar_ctx,
    447						 &group->tsar_ix);
    448	if (err) {
    449		NL_SET_ERR_MSG_MOD(extack, "E-Switch create TSAR for group failed");
    450		goto err_sched_elem;
    451	}
    452
    453	list_add_tail(&group->list, &esw->qos.groups);
    454
    455	divider = esw_qos_calculate_min_rate_divider(esw, group, true);
    456	if (divider) {
    457		err = esw_qos_normalize_groups_min_rate(esw, divider, extack);
    458		if (err) {
    459			NL_SET_ERR_MSG_MOD(extack, "E-Switch groups normalization failed");
    460			goto err_min_rate;
    461		}
    462	}
    463	trace_mlx5_esw_group_qos_create(esw->dev, group, group->tsar_ix);
    464
    465	return group;
    466
    467err_min_rate:
    468	list_del(&group->list);
    469	if (mlx5_destroy_scheduling_element_cmd(esw->dev,
    470						SCHEDULING_HIERARCHY_E_SWITCH,
    471						group->tsar_ix))
    472		NL_SET_ERR_MSG_MOD(extack, "E-Switch destroy TSAR for group failed");
    473err_sched_elem:
    474	kfree(group);
    475	return ERR_PTR(err);
    476}
    477
    478static int esw_qos_get(struct mlx5_eswitch *esw, struct netlink_ext_ack *extack);
    479static void esw_qos_put(struct mlx5_eswitch *esw);
    480
    481static struct mlx5_esw_rate_group *
    482esw_qos_create_rate_group(struct mlx5_eswitch *esw, struct netlink_ext_ack *extack)
    483{
    484	struct mlx5_esw_rate_group *group;
    485	int err;
    486
    487	if (!MLX5_CAP_QOS(esw->dev, log_esw_max_sched_depth))
    488		return ERR_PTR(-EOPNOTSUPP);
    489
    490	err = esw_qos_get(esw, extack);
    491	if (err)
    492		return ERR_PTR(err);
    493
    494	group = __esw_qos_create_rate_group(esw, extack);
    495	if (IS_ERR(group))
    496		esw_qos_put(esw);
    497
    498	return group;
    499}
    500
    501static int __esw_qos_destroy_rate_group(struct mlx5_eswitch *esw,
    502					struct mlx5_esw_rate_group *group,
    503					struct netlink_ext_ack *extack)
    504{
    505	u32 divider;
    506	int err;
    507
    508	list_del(&group->list);
    509
    510	divider = esw_qos_calculate_min_rate_divider(esw, NULL, true);
    511	err = esw_qos_normalize_groups_min_rate(esw, divider, extack);
    512	if (err)
    513		NL_SET_ERR_MSG_MOD(extack, "E-Switch groups' normalization failed");
    514
    515	err = mlx5_destroy_scheduling_element_cmd(esw->dev,
    516						  SCHEDULING_HIERARCHY_E_SWITCH,
    517						  group->tsar_ix);
    518	if (err)
    519		NL_SET_ERR_MSG_MOD(extack, "E-Switch destroy TSAR_ID failed");
    520
    521	trace_mlx5_esw_group_qos_destroy(esw->dev, group, group->tsar_ix);
    522
    523	kfree(group);
    524
    525	return err;
    526}
    527
    528static int esw_qos_destroy_rate_group(struct mlx5_eswitch *esw,
    529				      struct mlx5_esw_rate_group *group,
    530				      struct netlink_ext_ack *extack)
    531{
    532	int err;
    533
    534	err = __esw_qos_destroy_rate_group(esw, group, extack);
    535	esw_qos_put(esw);
    536
    537	return err;
    538}
    539
    540static bool esw_qos_element_type_supported(struct mlx5_core_dev *dev, int type)
    541{
    542	switch (type) {
    543	case SCHEDULING_CONTEXT_ELEMENT_TYPE_TSAR:
    544		return MLX5_CAP_QOS(dev, esw_element_type) &
    545		       ELEMENT_TYPE_CAP_MASK_TASR;
    546	case SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT:
    547		return MLX5_CAP_QOS(dev, esw_element_type) &
    548		       ELEMENT_TYPE_CAP_MASK_VPORT;
    549	case SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT_TC:
    550		return MLX5_CAP_QOS(dev, esw_element_type) &
    551		       ELEMENT_TYPE_CAP_MASK_VPORT_TC;
    552	case SCHEDULING_CONTEXT_ELEMENT_TYPE_PARA_VPORT_TC:
    553		return MLX5_CAP_QOS(dev, esw_element_type) &
    554		       ELEMENT_TYPE_CAP_MASK_PARA_VPORT_TC;
    555	}
    556	return false;
    557}
    558
    559static int esw_qos_create(struct mlx5_eswitch *esw, struct netlink_ext_ack *extack)
    560{
    561	u32 tsar_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {};
    562	struct mlx5_core_dev *dev = esw->dev;
    563	__be32 *attr;
    564	int err;
    565
    566	if (!MLX5_CAP_GEN(dev, qos) || !MLX5_CAP_QOS(dev, esw_scheduling))
    567		return -EOPNOTSUPP;
    568
    569	if (!esw_qos_element_type_supported(dev, SCHEDULING_CONTEXT_ELEMENT_TYPE_TSAR))
    570		return -EOPNOTSUPP;
    571
    572	MLX5_SET(scheduling_context, tsar_ctx, element_type,
    573		 SCHEDULING_CONTEXT_ELEMENT_TYPE_TSAR);
    574
    575	attr = MLX5_ADDR_OF(scheduling_context, tsar_ctx, element_attributes);
    576	*attr = cpu_to_be32(TSAR_ELEMENT_TSAR_TYPE_DWRR << 16);
    577
    578	err = mlx5_create_scheduling_element_cmd(dev,
    579						 SCHEDULING_HIERARCHY_E_SWITCH,
    580						 tsar_ctx,
    581						 &esw->qos.root_tsar_ix);
    582	if (err) {
    583		esw_warn(dev, "E-Switch create root TSAR failed (%d)\n", err);
    584		return err;
    585	}
    586
    587	INIT_LIST_HEAD(&esw->qos.groups);
    588	if (MLX5_CAP_QOS(dev, log_esw_max_sched_depth)) {
    589		esw->qos.group0 = __esw_qos_create_rate_group(esw, extack);
    590		if (IS_ERR(esw->qos.group0)) {
    591			esw_warn(dev, "E-Switch create rate group 0 failed (%ld)\n",
    592				 PTR_ERR(esw->qos.group0));
    593			err = PTR_ERR(esw->qos.group0);
    594			goto err_group0;
    595		}
    596	}
    597	refcount_set(&esw->qos.refcnt, 1);
    598
    599	return 0;
    600
    601err_group0:
    602	if (mlx5_destroy_scheduling_element_cmd(esw->dev, SCHEDULING_HIERARCHY_E_SWITCH,
    603						esw->qos.root_tsar_ix))
    604		esw_warn(esw->dev, "E-Switch destroy root TSAR failed.\n");
    605
    606	return err;
    607}
    608
    609static void esw_qos_destroy(struct mlx5_eswitch *esw)
    610{
    611	int err;
    612
    613	if (esw->qos.group0)
    614		__esw_qos_destroy_rate_group(esw, esw->qos.group0, NULL);
    615
    616	err = mlx5_destroy_scheduling_element_cmd(esw->dev,
    617						  SCHEDULING_HIERARCHY_E_SWITCH,
    618						  esw->qos.root_tsar_ix);
    619	if (err)
    620		esw_warn(esw->dev, "E-Switch destroy root TSAR failed (%d)\n", err);
    621}
    622
    623static int esw_qos_get(struct mlx5_eswitch *esw, struct netlink_ext_ack *extack)
    624{
    625	int err = 0;
    626
    627	lockdep_assert_held(&esw->state_lock);
    628
    629	if (!refcount_inc_not_zero(&esw->qos.refcnt)) {
    630		/* esw_qos_create() set refcount to 1 only on success.
    631		 * No need to decrement on failure.
    632		 */
    633		err = esw_qos_create(esw, extack);
    634	}
    635
    636	return err;
    637}
    638
    639static void esw_qos_put(struct mlx5_eswitch *esw)
    640{
    641	lockdep_assert_held(&esw->state_lock);
    642	if (refcount_dec_and_test(&esw->qos.refcnt))
    643		esw_qos_destroy(esw);
    644}
    645
    646static int esw_qos_vport_enable(struct mlx5_eswitch *esw, struct mlx5_vport *vport,
    647				u32 max_rate, u32 bw_share, struct netlink_ext_ack *extack)
    648{
    649	int err;
    650
    651	lockdep_assert_held(&esw->state_lock);
    652	if (vport->qos.enabled)
    653		return 0;
    654
    655	err = esw_qos_get(esw, extack);
    656	if (err)
    657		return err;
    658
    659	vport->qos.group = esw->qos.group0;
    660
    661	err = esw_qos_vport_create_sched_element(esw, vport, max_rate, bw_share);
    662	if (err)
    663		goto err_out;
    664
    665	vport->qos.enabled = true;
    666	trace_mlx5_esw_vport_qos_create(vport, bw_share, max_rate);
    667
    668	return 0;
    669
    670err_out:
    671	esw_qos_put(esw);
    672
    673	return err;
    674}
    675
    676void mlx5_esw_qos_vport_disable(struct mlx5_eswitch *esw, struct mlx5_vport *vport)
    677{
    678	int err;
    679
    680	lockdep_assert_held(&esw->state_lock);
    681	if (!vport->qos.enabled)
    682		return;
    683	WARN(vport->qos.group && vport->qos.group != esw->qos.group0,
    684	     "Disabling QoS on port before detaching it from group");
    685
    686	err = mlx5_destroy_scheduling_element_cmd(esw->dev,
    687						  SCHEDULING_HIERARCHY_E_SWITCH,
    688						  vport->qos.esw_tsar_ix);
    689	if (err)
    690		esw_warn(esw->dev, "E-Switch destroy TSAR vport element failed (vport=%d,err=%d)\n",
    691			 vport->vport, err);
    692
    693	memset(&vport->qos, 0, sizeof(vport->qos));
    694	trace_mlx5_esw_vport_qos_destroy(vport);
    695
    696	esw_qos_put(esw);
    697}
    698
    699int mlx5_esw_qos_set_vport_rate(struct mlx5_eswitch *esw, struct mlx5_vport *vport,
    700				u32 max_rate, u32 min_rate)
    701{
    702	int err;
    703
    704	lockdep_assert_held(&esw->state_lock);
    705	err = esw_qos_vport_enable(esw, vport, 0, 0, NULL);
    706	if (err)
    707		return err;
    708
    709	err = esw_qos_set_vport_min_rate(esw, vport, min_rate, NULL);
    710	if (!err)
    711		err = esw_qos_set_vport_max_rate(esw, vport, max_rate, NULL);
    712
    713	return err;
    714}
    715
    716int mlx5_esw_qos_modify_vport_rate(struct mlx5_eswitch *esw, u16 vport_num, u32 rate_mbps)
    717{
    718	u32 ctx[MLX5_ST_SZ_DW(scheduling_context)] = {};
    719	struct mlx5_vport *vport;
    720	u32 bitmask;
    721	int err;
    722
    723	vport = mlx5_eswitch_get_vport(esw, vport_num);
    724	if (IS_ERR(vport))
    725		return PTR_ERR(vport);
    726
    727	mutex_lock(&esw->state_lock);
    728	if (!vport->qos.enabled) {
    729		/* Eswitch QoS wasn't enabled yet. Enable it and vport QoS. */
    730		err = esw_qos_vport_enable(esw, vport, rate_mbps, vport->qos.bw_share, NULL);
    731	} else {
    732		MLX5_SET(scheduling_context, ctx, max_average_bw, rate_mbps);
    733
    734		bitmask = MODIFY_SCHEDULING_ELEMENT_IN_MODIFY_BITMASK_MAX_AVERAGE_BW;
    735		err = mlx5_modify_scheduling_element_cmd(esw->dev,
    736							 SCHEDULING_HIERARCHY_E_SWITCH,
    737							 ctx,
    738							 vport->qos.esw_tsar_ix,
    739							 bitmask);
    740	}
    741	mutex_unlock(&esw->state_lock);
    742
    743	return err;
    744}
    745
    746#define MLX5_LINKSPEED_UNIT 125000 /* 1Mbps in Bps */
    747
    748/* Converts bytes per second value passed in a pointer into megabits per
    749 * second, rewriting last. If converted rate exceed link speed or is not a
    750 * fraction of Mbps - returns error.
    751 */
    752static int esw_qos_devlink_rate_to_mbps(struct mlx5_core_dev *mdev, const char *name,
    753					u64 *rate, struct netlink_ext_ack *extack)
    754{
    755	u32 link_speed_max, reminder;
    756	u64 value;
    757	int err;
    758
    759	err = mlx5e_port_max_linkspeed(mdev, &link_speed_max);
    760	if (err) {
    761		NL_SET_ERR_MSG_MOD(extack, "Failed to get link maximum speed");
    762		return err;
    763	}
    764
    765	value = div_u64_rem(*rate, MLX5_LINKSPEED_UNIT, &reminder);
    766	if (reminder) {
    767		pr_err("%s rate value %lluBps not in link speed units of 1Mbps.\n",
    768		       name, *rate);
    769		NL_SET_ERR_MSG_MOD(extack, "TX rate value not in link speed units of 1Mbps");
    770		return -EINVAL;
    771	}
    772
    773	if (value > link_speed_max) {
    774		pr_err("%s rate value %lluMbps exceed link maximum speed %u.\n",
    775		       name, value, link_speed_max);
    776		NL_SET_ERR_MSG_MOD(extack, "TX rate value exceed link maximum speed");
    777		return -EINVAL;
    778	}
    779
    780	*rate = value;
    781	return 0;
    782}
    783
    784/* Eswitch devlink rate API */
    785
    786int mlx5_esw_devlink_rate_leaf_tx_share_set(struct devlink_rate *rate_leaf, void *priv,
    787					    u64 tx_share, struct netlink_ext_ack *extack)
    788{
    789	struct mlx5_vport *vport = priv;
    790	struct mlx5_eswitch *esw;
    791	int err;
    792
    793	esw = vport->dev->priv.eswitch;
    794	if (!mlx5_esw_allowed(esw))
    795		return -EPERM;
    796
    797	err = esw_qos_devlink_rate_to_mbps(vport->dev, "tx_share", &tx_share, extack);
    798	if (err)
    799		return err;
    800
    801	mutex_lock(&esw->state_lock);
    802	err = esw_qos_vport_enable(esw, vport, 0, 0, extack);
    803	if (err)
    804		goto unlock;
    805
    806	err = esw_qos_set_vport_min_rate(esw, vport, tx_share, extack);
    807unlock:
    808	mutex_unlock(&esw->state_lock);
    809	return err;
    810}
    811
    812int mlx5_esw_devlink_rate_leaf_tx_max_set(struct devlink_rate *rate_leaf, void *priv,
    813					  u64 tx_max, struct netlink_ext_ack *extack)
    814{
    815	struct mlx5_vport *vport = priv;
    816	struct mlx5_eswitch *esw;
    817	int err;
    818
    819	esw = vport->dev->priv.eswitch;
    820	if (!mlx5_esw_allowed(esw))
    821		return -EPERM;
    822
    823	err = esw_qos_devlink_rate_to_mbps(vport->dev, "tx_max", &tx_max, extack);
    824	if (err)
    825		return err;
    826
    827	mutex_lock(&esw->state_lock);
    828	err = esw_qos_vport_enable(esw, vport, 0, 0, extack);
    829	if (err)
    830		goto unlock;
    831
    832	err = esw_qos_set_vport_max_rate(esw, vport, tx_max, extack);
    833unlock:
    834	mutex_unlock(&esw->state_lock);
    835	return err;
    836}
    837
    838int mlx5_esw_devlink_rate_node_tx_share_set(struct devlink_rate *rate_node, void *priv,
    839					    u64 tx_share, struct netlink_ext_ack *extack)
    840{
    841	struct mlx5_core_dev *dev = devlink_priv(rate_node->devlink);
    842	struct mlx5_eswitch *esw = dev->priv.eswitch;
    843	struct mlx5_esw_rate_group *group = priv;
    844	int err;
    845
    846	err = esw_qos_devlink_rate_to_mbps(dev, "tx_share", &tx_share, extack);
    847	if (err)
    848		return err;
    849
    850	mutex_lock(&esw->state_lock);
    851	err = esw_qos_set_group_min_rate(esw, group, tx_share, extack);
    852	mutex_unlock(&esw->state_lock);
    853	return err;
    854}
    855
    856int mlx5_esw_devlink_rate_node_tx_max_set(struct devlink_rate *rate_node, void *priv,
    857					  u64 tx_max, struct netlink_ext_ack *extack)
    858{
    859	struct mlx5_core_dev *dev = devlink_priv(rate_node->devlink);
    860	struct mlx5_eswitch *esw = dev->priv.eswitch;
    861	struct mlx5_esw_rate_group *group = priv;
    862	int err;
    863
    864	err = esw_qos_devlink_rate_to_mbps(dev, "tx_max", &tx_max, extack);
    865	if (err)
    866		return err;
    867
    868	mutex_lock(&esw->state_lock);
    869	err = esw_qos_set_group_max_rate(esw, group, tx_max, extack);
    870	mutex_unlock(&esw->state_lock);
    871	return err;
    872}
    873
    874int mlx5_esw_devlink_rate_node_new(struct devlink_rate *rate_node, void **priv,
    875				   struct netlink_ext_ack *extack)
    876{
    877	struct mlx5_esw_rate_group *group;
    878	struct mlx5_eswitch *esw;
    879	int err = 0;
    880
    881	esw = mlx5_devlink_eswitch_get(rate_node->devlink);
    882	if (IS_ERR(esw))
    883		return PTR_ERR(esw);
    884
    885	mutex_lock(&esw->state_lock);
    886	if (esw->mode != MLX5_ESWITCH_OFFLOADS) {
    887		NL_SET_ERR_MSG_MOD(extack,
    888				   "Rate node creation supported only in switchdev mode");
    889		err = -EOPNOTSUPP;
    890		goto unlock;
    891	}
    892
    893	group = esw_qos_create_rate_group(esw, extack);
    894	if (IS_ERR(group)) {
    895		err = PTR_ERR(group);
    896		goto unlock;
    897	}
    898
    899	*priv = group;
    900unlock:
    901	mutex_unlock(&esw->state_lock);
    902	return err;
    903}
    904
    905int mlx5_esw_devlink_rate_node_del(struct devlink_rate *rate_node, void *priv,
    906				   struct netlink_ext_ack *extack)
    907{
    908	struct mlx5_esw_rate_group *group = priv;
    909	struct mlx5_eswitch *esw;
    910	int err;
    911
    912	esw = mlx5_devlink_eswitch_get(rate_node->devlink);
    913	if (IS_ERR(esw))
    914		return PTR_ERR(esw);
    915
    916	mutex_lock(&esw->state_lock);
    917	err = esw_qos_destroy_rate_group(esw, group, extack);
    918	mutex_unlock(&esw->state_lock);
    919	return err;
    920}
    921
    922int mlx5_esw_qos_vport_update_group(struct mlx5_eswitch *esw,
    923				    struct mlx5_vport *vport,
    924				    struct mlx5_esw_rate_group *group,
    925				    struct netlink_ext_ack *extack)
    926{
    927	int err;
    928
    929	mutex_lock(&esw->state_lock);
    930	err = esw_qos_vport_enable(esw, vport, 0, 0, extack);
    931	if (!err)
    932		err = esw_qos_vport_update_group(esw, vport, group, extack);
    933	mutex_unlock(&esw->state_lock);
    934	return err;
    935}
    936
    937int mlx5_esw_devlink_rate_parent_set(struct devlink_rate *devlink_rate,
    938				     struct devlink_rate *parent,
    939				     void *priv, void *parent_priv,
    940				     struct netlink_ext_ack *extack)
    941{
    942	struct mlx5_esw_rate_group *group;
    943	struct mlx5_vport *vport = priv;
    944
    945	if (!parent)
    946		return mlx5_esw_qos_vport_update_group(vport->dev->priv.eswitch,
    947						       vport, NULL, extack);
    948
    949	group = parent_priv;
    950	return mlx5_esw_qos_vport_update_group(vport->dev->priv.eswitch, vport, group, extack);
    951}