cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

eswitch_offloads.c (108380B)


      1/*
      2 * Copyright (c) 2016, Mellanox Technologies. All rights reserved.
      3 *
      4 * This software is available to you under a choice of one of two
      5 * licenses.  You may choose to be licensed under the terms of the GNU
      6 * General Public License (GPL) Version 2, available from the file
      7 * COPYING in the main directory of this source tree, or the
      8 * OpenIB.org BSD license below:
      9 *
     10 *     Redistribution and use in source and binary forms, with or
     11 *     without modification, are permitted provided that the following
     12 *     conditions are met:
     13 *
     14 *      - Redistributions of source code must retain the above
     15 *        copyright notice, this list of conditions and the following
     16 *        disclaimer.
     17 *
     18 *      - Redistributions in binary form must reproduce the above
     19 *        copyright notice, this list of conditions and the following
     20 *        disclaimer in the documentation and/or other materials
     21 *        provided with the distribution.
     22 *
     23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
     24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
     25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
     26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
     27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
     28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
     29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
     30 * SOFTWARE.
     31 */
     32
     33#include <linux/etherdevice.h>
     34#include <linux/idr.h>
     35#include <linux/mlx5/driver.h>
     36#include <linux/mlx5/mlx5_ifc.h>
     37#include <linux/mlx5/vport.h>
     38#include <linux/mlx5/fs.h>
     39#include "mlx5_core.h"
     40#include "eswitch.h"
     41#include "esw/indir_table.h"
     42#include "esw/acl/ofld.h"
     43#include "rdma.h"
     44#include "en.h"
     45#include "fs_core.h"
     46#include "lib/devcom.h"
     47#include "lib/eq.h"
     48#include "lib/fs_chains.h"
     49#include "en_tc.h"
     50#include "en/mapping.h"
     51#include "devlink.h"
     52#include "lag/lag.h"
     53
     54#define mlx5_esw_for_each_rep(esw, i, rep) \
     55	xa_for_each(&((esw)->offloads.vport_reps), i, rep)
     56
     57#define mlx5_esw_for_each_sf_rep(esw, i, rep) \
     58	xa_for_each_marked(&((esw)->offloads.vport_reps), i, rep, MLX5_ESW_VPT_SF)
     59
     60#define mlx5_esw_for_each_vf_rep(esw, index, rep)	\
     61	mlx5_esw_for_each_entry_marked(&((esw)->offloads.vport_reps), index, \
     62				       rep, (esw)->esw_funcs.num_vfs, MLX5_ESW_VPT_VF)
     63
     64/* There are two match-all miss flows, one for unicast dst mac and
     65 * one for multicast.
     66 */
     67#define MLX5_ESW_MISS_FLOWS (2)
     68#define UPLINK_REP_INDEX 0
     69
     70#define MLX5_ESW_VPORT_TBL_SIZE 128
     71#define MLX5_ESW_VPORT_TBL_NUM_GROUPS  4
     72
     73static const struct esw_vport_tbl_namespace mlx5_esw_vport_tbl_mirror_ns = {
     74	.max_fte = MLX5_ESW_VPORT_TBL_SIZE,
     75	.max_num_groups = MLX5_ESW_VPORT_TBL_NUM_GROUPS,
     76	.flags = 0,
     77};
     78
     79static struct mlx5_eswitch_rep *mlx5_eswitch_get_rep(struct mlx5_eswitch *esw,
     80						     u16 vport_num)
     81{
     82	return xa_load(&esw->offloads.vport_reps, vport_num);
     83}
     84
     85static void
     86mlx5_eswitch_set_rule_flow_source(struct mlx5_eswitch *esw,
     87				  struct mlx5_flow_spec *spec,
     88				  struct mlx5_esw_flow_attr *attr)
     89{
     90	if (!MLX5_CAP_ESW_FLOWTABLE(esw->dev, flow_source) || !attr || !attr->in_rep)
     91		return;
     92
     93	if (attr->int_port) {
     94		spec->flow_context.flow_source = mlx5e_tc_int_port_get_flow_source(attr->int_port);
     95
     96		return;
     97	}
     98
     99	spec->flow_context.flow_source = (attr->in_rep->vport == MLX5_VPORT_UPLINK) ?
    100					 MLX5_FLOW_CONTEXT_FLOW_SOURCE_UPLINK :
    101					 MLX5_FLOW_CONTEXT_FLOW_SOURCE_LOCAL_VPORT;
    102}
    103
    104/* Actually only the upper 16 bits of reg c0 need to be cleared, but the lower 16 bits
    105 * are not needed as well in the following process. So clear them all for simplicity.
    106 */
    107void
    108mlx5_eswitch_clear_rule_source_port(struct mlx5_eswitch *esw, struct mlx5_flow_spec *spec)
    109{
    110	if (mlx5_eswitch_vport_match_metadata_enabled(esw)) {
    111		void *misc2;
    112
    113		misc2 = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters_2);
    114		MLX5_SET(fte_match_set_misc2, misc2, metadata_reg_c_0, 0);
    115
    116		misc2 = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters_2);
    117		MLX5_SET(fte_match_set_misc2, misc2, metadata_reg_c_0, 0);
    118
    119		if (!memchr_inv(misc2, 0, MLX5_ST_SZ_BYTES(fte_match_set_misc2)))
    120			spec->match_criteria_enable &= ~MLX5_MATCH_MISC_PARAMETERS_2;
    121	}
    122}
    123
    124static void
    125mlx5_eswitch_set_rule_source_port(struct mlx5_eswitch *esw,
    126				  struct mlx5_flow_spec *spec,
    127				  struct mlx5_flow_attr *attr,
    128				  struct mlx5_eswitch *src_esw,
    129				  u16 vport)
    130{
    131	struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr;
    132	u32 metadata;
    133	void *misc2;
    134	void *misc;
    135
    136	/* Use metadata matching because vport is not represented by single
    137	 * VHCA in dual-port RoCE mode, and matching on source vport may fail.
    138	 */
    139	if (mlx5_eswitch_vport_match_metadata_enabled(esw)) {
    140		if (mlx5_esw_indir_table_decap_vport(attr))
    141			vport = mlx5_esw_indir_table_decap_vport(attr);
    142
    143		if (attr && !attr->chain && esw_attr->int_port)
    144			metadata =
    145				mlx5e_tc_int_port_get_metadata_for_match(esw_attr->int_port);
    146		else
    147			metadata =
    148				mlx5_eswitch_get_vport_metadata_for_match(src_esw, vport);
    149
    150		misc2 = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters_2);
    151		MLX5_SET(fte_match_set_misc2, misc2, metadata_reg_c_0, metadata);
    152
    153		misc2 = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters_2);
    154		MLX5_SET(fte_match_set_misc2, misc2, metadata_reg_c_0,
    155			 mlx5_eswitch_get_vport_metadata_mask());
    156
    157		spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS_2;
    158	} else {
    159		misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters);
    160		MLX5_SET(fte_match_set_misc, misc, source_port, vport);
    161
    162		if (MLX5_CAP_ESW(esw->dev, merged_eswitch))
    163			MLX5_SET(fte_match_set_misc, misc,
    164				 source_eswitch_owner_vhca_id,
    165				 MLX5_CAP_GEN(src_esw->dev, vhca_id));
    166
    167		misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters);
    168		MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port);
    169		if (MLX5_CAP_ESW(esw->dev, merged_eswitch))
    170			MLX5_SET_TO_ONES(fte_match_set_misc, misc,
    171					 source_eswitch_owner_vhca_id);
    172
    173		spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS;
    174	}
    175}
    176
    177static int
    178esw_setup_decap_indir(struct mlx5_eswitch *esw,
    179		      struct mlx5_flow_attr *attr,
    180		      struct mlx5_flow_spec *spec)
    181{
    182	struct mlx5_flow_table *ft;
    183
    184	if (!(attr->flags & MLX5_ATTR_FLAG_SRC_REWRITE))
    185		return -EOPNOTSUPP;
    186
    187	ft = mlx5_esw_indir_table_get(esw, attr, spec,
    188				      mlx5_esw_indir_table_decap_vport(attr), true);
    189	return PTR_ERR_OR_ZERO(ft);
    190}
    191
    192static void
    193esw_cleanup_decap_indir(struct mlx5_eswitch *esw,
    194			struct mlx5_flow_attr *attr)
    195{
    196	if (mlx5_esw_indir_table_decap_vport(attr))
    197		mlx5_esw_indir_table_put(esw, attr,
    198					 mlx5_esw_indir_table_decap_vport(attr),
    199					 true);
    200}
    201
    202static int
    203esw_setup_sampler_dest(struct mlx5_flow_destination *dest,
    204		       struct mlx5_flow_act *flow_act,
    205		       u32 sampler_id,
    206		       int i)
    207{
    208	flow_act->flags |= FLOW_ACT_IGNORE_FLOW_LEVEL;
    209	dest[i].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_SAMPLER;
    210	dest[i].sampler_id = sampler_id;
    211
    212	return 0;
    213}
    214
    215static int
    216esw_setup_ft_dest(struct mlx5_flow_destination *dest,
    217		  struct mlx5_flow_act *flow_act,
    218		  struct mlx5_eswitch *esw,
    219		  struct mlx5_flow_attr *attr,
    220		  struct mlx5_flow_spec *spec,
    221		  int i)
    222{
    223	flow_act->flags |= FLOW_ACT_IGNORE_FLOW_LEVEL;
    224	dest[i].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
    225	dest[i].ft = attr->dest_ft;
    226
    227	if (mlx5_esw_indir_table_decap_vport(attr))
    228		return esw_setup_decap_indir(esw, attr, spec);
    229	return 0;
    230}
    231
    232static void
    233esw_setup_slow_path_dest(struct mlx5_flow_destination *dest,
    234			 struct mlx5_flow_act *flow_act,
    235			 struct mlx5_fs_chains *chains,
    236			 int i)
    237{
    238	if (mlx5_chains_ignore_flow_level_supported(chains))
    239		flow_act->flags |= FLOW_ACT_IGNORE_FLOW_LEVEL;
    240	dest[i].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
    241	dest[i].ft = mlx5_chains_get_tc_end_ft(chains);
    242}
    243
    244static int
    245esw_setup_chain_dest(struct mlx5_flow_destination *dest,
    246		     struct mlx5_flow_act *flow_act,
    247		     struct mlx5_fs_chains *chains,
    248		     u32 chain, u32 prio, u32 level,
    249		     int i)
    250{
    251	struct mlx5_flow_table *ft;
    252
    253	flow_act->flags |= FLOW_ACT_IGNORE_FLOW_LEVEL;
    254	ft = mlx5_chains_get_table(chains, chain, prio, level);
    255	if (IS_ERR(ft))
    256		return PTR_ERR(ft);
    257
    258	dest[i].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
    259	dest[i].ft = ft;
    260	return  0;
    261}
    262
    263static void esw_put_dest_tables_loop(struct mlx5_eswitch *esw, struct mlx5_flow_attr *attr,
    264				     int from, int to)
    265{
    266	struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr;
    267	struct mlx5_fs_chains *chains = esw_chains(esw);
    268	int i;
    269
    270	for (i = from; i < to; i++)
    271		if (esw_attr->dests[i].flags & MLX5_ESW_DEST_CHAIN_WITH_SRC_PORT_CHANGE)
    272			mlx5_chains_put_table(chains, 0, 1, 0);
    273		else if (mlx5_esw_indir_table_needed(esw, attr, esw_attr->dests[i].rep->vport,
    274						     esw_attr->dests[i].mdev))
    275			mlx5_esw_indir_table_put(esw, attr, esw_attr->dests[i].rep->vport,
    276						 false);
    277}
    278
    279static bool
    280esw_is_chain_src_port_rewrite(struct mlx5_eswitch *esw, struct mlx5_esw_flow_attr *esw_attr)
    281{
    282	int i;
    283
    284	for (i = esw_attr->split_count; i < esw_attr->out_count; i++)
    285		if (esw_attr->dests[i].flags & MLX5_ESW_DEST_CHAIN_WITH_SRC_PORT_CHANGE)
    286			return true;
    287	return false;
    288}
    289
    290static int
    291esw_setup_chain_src_port_rewrite(struct mlx5_flow_destination *dest,
    292				 struct mlx5_flow_act *flow_act,
    293				 struct mlx5_eswitch *esw,
    294				 struct mlx5_fs_chains *chains,
    295				 struct mlx5_flow_attr *attr,
    296				 int *i)
    297{
    298	struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr;
    299	int err;
    300
    301	if (!(attr->flags & MLX5_ATTR_FLAG_SRC_REWRITE))
    302		return -EOPNOTSUPP;
    303
    304	/* flow steering cannot handle more than one dest with the same ft
    305	 * in a single flow
    306	 */
    307	if (esw_attr->out_count - esw_attr->split_count > 1)
    308		return -EOPNOTSUPP;
    309
    310	err = esw_setup_chain_dest(dest, flow_act, chains, attr->dest_chain, 1, 0, *i);
    311	if (err)
    312		return err;
    313
    314	if (esw_attr->dests[esw_attr->split_count].pkt_reformat) {
    315		flow_act->action |= MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT;
    316		flow_act->pkt_reformat = esw_attr->dests[esw_attr->split_count].pkt_reformat;
    317	}
    318	(*i)++;
    319
    320	return 0;
    321}
    322
    323static void esw_cleanup_chain_src_port_rewrite(struct mlx5_eswitch *esw,
    324					       struct mlx5_flow_attr *attr)
    325{
    326	struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr;
    327
    328	esw_put_dest_tables_loop(esw, attr, esw_attr->split_count, esw_attr->out_count);
    329}
    330
    331static bool
    332esw_is_indir_table(struct mlx5_eswitch *esw, struct mlx5_flow_attr *attr)
    333{
    334	struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr;
    335	bool result = false;
    336	int i;
    337
    338	/* Indirect table is supported only for flows with in_port uplink
    339	 * and the destination is vport on the same eswitch as the uplink,
    340	 * return false in case at least one of destinations doesn't meet
    341	 * this criteria.
    342	 */
    343	for (i = esw_attr->split_count; i < esw_attr->out_count; i++) {
    344		if (esw_attr->dests[i].rep &&
    345		    mlx5_esw_indir_table_needed(esw, attr, esw_attr->dests[i].rep->vport,
    346						esw_attr->dests[i].mdev)) {
    347			result = true;
    348		} else {
    349			result = false;
    350			break;
    351		}
    352	}
    353	return result;
    354}
    355
    356static int
    357esw_setup_indir_table(struct mlx5_flow_destination *dest,
    358		      struct mlx5_flow_act *flow_act,
    359		      struct mlx5_eswitch *esw,
    360		      struct mlx5_flow_attr *attr,
    361		      struct mlx5_flow_spec *spec,
    362		      bool ignore_flow_lvl,
    363		      int *i)
    364{
    365	struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr;
    366	int j, err;
    367
    368	if (!(attr->flags & MLX5_ATTR_FLAG_SRC_REWRITE))
    369		return -EOPNOTSUPP;
    370
    371	for (j = esw_attr->split_count; j < esw_attr->out_count; j++, (*i)++) {
    372		if (ignore_flow_lvl)
    373			flow_act->flags |= FLOW_ACT_IGNORE_FLOW_LEVEL;
    374		dest[*i].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
    375
    376		dest[*i].ft = mlx5_esw_indir_table_get(esw, attr, spec,
    377						       esw_attr->dests[j].rep->vport, false);
    378		if (IS_ERR(dest[*i].ft)) {
    379			err = PTR_ERR(dest[*i].ft);
    380			goto err_indir_tbl_get;
    381		}
    382	}
    383
    384	if (mlx5_esw_indir_table_decap_vport(attr)) {
    385		err = esw_setup_decap_indir(esw, attr, spec);
    386		if (err)
    387			goto err_indir_tbl_get;
    388	}
    389
    390	return 0;
    391
    392err_indir_tbl_get:
    393	esw_put_dest_tables_loop(esw, attr, esw_attr->split_count, j);
    394	return err;
    395}
    396
    397static void esw_cleanup_indir_table(struct mlx5_eswitch *esw, struct mlx5_flow_attr *attr)
    398{
    399	struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr;
    400
    401	esw_put_dest_tables_loop(esw, attr, esw_attr->split_count, esw_attr->out_count);
    402	esw_cleanup_decap_indir(esw, attr);
    403}
    404
    405static void
    406esw_cleanup_chain_dest(struct mlx5_fs_chains *chains, u32 chain, u32 prio, u32 level)
    407{
    408	mlx5_chains_put_table(chains, chain, prio, level);
    409}
    410
    411static void
    412esw_setup_vport_dest(struct mlx5_flow_destination *dest, struct mlx5_flow_act *flow_act,
    413		     struct mlx5_eswitch *esw, struct mlx5_esw_flow_attr *esw_attr,
    414		     int attr_idx, int dest_idx, bool pkt_reformat)
    415{
    416	dest[dest_idx].type = MLX5_FLOW_DESTINATION_TYPE_VPORT;
    417	dest[dest_idx].vport.num = esw_attr->dests[attr_idx].rep->vport;
    418	if (MLX5_CAP_ESW(esw->dev, merged_eswitch)) {
    419		dest[dest_idx].vport.vhca_id =
    420			MLX5_CAP_GEN(esw_attr->dests[attr_idx].mdev, vhca_id);
    421		dest[dest_idx].vport.flags |= MLX5_FLOW_DEST_VPORT_VHCA_ID;
    422		if (mlx5_lag_mpesw_is_activated(esw->dev))
    423			dest[dest_idx].type = MLX5_FLOW_DESTINATION_TYPE_UPLINK;
    424	}
    425	if (esw_attr->dests[attr_idx].flags & MLX5_ESW_DEST_ENCAP) {
    426		if (pkt_reformat) {
    427			flow_act->action |= MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT;
    428			flow_act->pkt_reformat = esw_attr->dests[attr_idx].pkt_reformat;
    429		}
    430		dest[dest_idx].vport.flags |= MLX5_FLOW_DEST_VPORT_REFORMAT_ID;
    431		dest[dest_idx].vport.pkt_reformat = esw_attr->dests[attr_idx].pkt_reformat;
    432	}
    433}
    434
    435static int
    436esw_setup_vport_dests(struct mlx5_flow_destination *dest, struct mlx5_flow_act *flow_act,
    437		      struct mlx5_eswitch *esw, struct mlx5_esw_flow_attr *esw_attr,
    438		      int i)
    439{
    440	int j;
    441
    442	for (j = esw_attr->split_count; j < esw_attr->out_count; j++, i++)
    443		esw_setup_vport_dest(dest, flow_act, esw, esw_attr, j, i, true);
    444	return i;
    445}
    446
    447static bool
    448esw_src_port_rewrite_supported(struct mlx5_eswitch *esw)
    449{
    450	return MLX5_CAP_GEN(esw->dev, reg_c_preserve) &&
    451	       mlx5_eswitch_vport_match_metadata_enabled(esw) &&
    452	       MLX5_CAP_ESW_FLOWTABLE_FDB(esw->dev, ignore_flow_level);
    453}
    454
    455static int
    456esw_setup_dests(struct mlx5_flow_destination *dest,
    457		struct mlx5_flow_act *flow_act,
    458		struct mlx5_eswitch *esw,
    459		struct mlx5_flow_attr *attr,
    460		struct mlx5_flow_spec *spec,
    461		int *i)
    462{
    463	struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr;
    464	struct mlx5_fs_chains *chains = esw_chains(esw);
    465	int err = 0;
    466
    467	if (!mlx5_eswitch_termtbl_required(esw, attr, flow_act, spec) &&
    468	    esw_src_port_rewrite_supported(esw))
    469		attr->flags |= MLX5_ATTR_FLAG_SRC_REWRITE;
    470
    471	if (attr->flags & MLX5_ATTR_FLAG_SAMPLE &&
    472	    !(attr->flags & MLX5_ATTR_FLAG_SLOW_PATH)) {
    473		esw_setup_sampler_dest(dest, flow_act, attr->sample_attr.sampler_id, *i);
    474		(*i)++;
    475	} else if (attr->dest_ft) {
    476		esw_setup_ft_dest(dest, flow_act, esw, attr, spec, *i);
    477		(*i)++;
    478	} else if (mlx5e_tc_attr_flags_skip(attr->flags)) {
    479		esw_setup_slow_path_dest(dest, flow_act, chains, *i);
    480		(*i)++;
    481	} else if (attr->dest_chain) {
    482		err = esw_setup_chain_dest(dest, flow_act, chains, attr->dest_chain,
    483					   1, 0, *i);
    484		(*i)++;
    485	} else if (esw_is_indir_table(esw, attr)) {
    486		err = esw_setup_indir_table(dest, flow_act, esw, attr, spec, true, i);
    487	} else if (esw_is_chain_src_port_rewrite(esw, esw_attr)) {
    488		err = esw_setup_chain_src_port_rewrite(dest, flow_act, esw, chains, attr, i);
    489	} else {
    490		*i = esw_setup_vport_dests(dest, flow_act, esw, esw_attr, *i);
    491	}
    492
    493	return err;
    494}
    495
    496static void
    497esw_cleanup_dests(struct mlx5_eswitch *esw,
    498		  struct mlx5_flow_attr *attr)
    499{
    500	struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr;
    501	struct mlx5_fs_chains *chains = esw_chains(esw);
    502
    503	if (attr->dest_ft) {
    504		esw_cleanup_decap_indir(esw, attr);
    505	} else if (!mlx5e_tc_attr_flags_skip(attr->flags)) {
    506		if (attr->dest_chain)
    507			esw_cleanup_chain_dest(chains, attr->dest_chain, 1, 0);
    508		else if (esw_is_indir_table(esw, attr))
    509			esw_cleanup_indir_table(esw, attr);
    510		else if (esw_is_chain_src_port_rewrite(esw, esw_attr))
    511			esw_cleanup_chain_src_port_rewrite(esw, attr);
    512	}
    513}
    514
    515struct mlx5_flow_handle *
    516mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw,
    517				struct mlx5_flow_spec *spec,
    518				struct mlx5_flow_attr *attr)
    519{
    520	struct mlx5_flow_act flow_act = { .flags = FLOW_ACT_NO_APPEND, };
    521	struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr;
    522	struct mlx5_fs_chains *chains = esw_chains(esw);
    523	bool split = !!(esw_attr->split_count);
    524	struct mlx5_vport_tbl_attr fwd_attr;
    525	struct mlx5_flow_destination *dest;
    526	struct mlx5_flow_handle *rule;
    527	struct mlx5_flow_table *fdb;
    528	int i = 0;
    529
    530	if (esw->mode != MLX5_ESWITCH_OFFLOADS)
    531		return ERR_PTR(-EOPNOTSUPP);
    532
    533	dest = kcalloc(MLX5_MAX_FLOW_FWD_VPORTS + 1, sizeof(*dest), GFP_KERNEL);
    534	if (!dest)
    535		return ERR_PTR(-ENOMEM);
    536
    537	flow_act.action = attr->action;
    538	/* if per flow vlan pop/push is emulated, don't set that into the firmware */
    539	if (!mlx5_eswitch_vlan_actions_supported(esw->dev, 1))
    540		flow_act.action &= ~(MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH |
    541				     MLX5_FLOW_CONTEXT_ACTION_VLAN_POP);
    542	else if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH) {
    543		flow_act.vlan[0].ethtype = ntohs(esw_attr->vlan_proto[0]);
    544		flow_act.vlan[0].vid = esw_attr->vlan_vid[0];
    545		flow_act.vlan[0].prio = esw_attr->vlan_prio[0];
    546		if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH_2) {
    547			flow_act.vlan[1].ethtype = ntohs(esw_attr->vlan_proto[1]);
    548			flow_act.vlan[1].vid = esw_attr->vlan_vid[1];
    549			flow_act.vlan[1].prio = esw_attr->vlan_prio[1];
    550		}
    551	}
    552
    553	mlx5_eswitch_set_rule_flow_source(esw, spec, esw_attr);
    554
    555	if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) {
    556		int err;
    557
    558		err = esw_setup_dests(dest, &flow_act, esw, attr, spec, &i);
    559		if (err) {
    560			rule = ERR_PTR(err);
    561			goto err_create_goto_table;
    562		}
    563	}
    564
    565	if (esw_attr->decap_pkt_reformat)
    566		flow_act.pkt_reformat = esw_attr->decap_pkt_reformat;
    567
    568	if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_COUNT) {
    569		dest[i].type = MLX5_FLOW_DESTINATION_TYPE_COUNTER;
    570		dest[i].counter_id = mlx5_fc_id(attr->counter);
    571		i++;
    572	}
    573
    574	if (attr->outer_match_level != MLX5_MATCH_NONE)
    575		spec->match_criteria_enable |= MLX5_MATCH_OUTER_HEADERS;
    576	if (attr->inner_match_level != MLX5_MATCH_NONE)
    577		spec->match_criteria_enable |= MLX5_MATCH_INNER_HEADERS;
    578
    579	if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR)
    580		flow_act.modify_hdr = attr->modify_hdr;
    581
    582	if (split) {
    583		fwd_attr.chain = attr->chain;
    584		fwd_attr.prio = attr->prio;
    585		fwd_attr.vport = esw_attr->in_rep->vport;
    586		fwd_attr.vport_ns = &mlx5_esw_vport_tbl_mirror_ns;
    587
    588		fdb = mlx5_esw_vporttbl_get(esw, &fwd_attr);
    589	} else {
    590		if (attr->chain || attr->prio)
    591			fdb = mlx5_chains_get_table(chains, attr->chain,
    592						    attr->prio, 0);
    593		else
    594			fdb = attr->ft;
    595
    596		if (!(attr->flags & MLX5_ATTR_FLAG_NO_IN_PORT))
    597			mlx5_eswitch_set_rule_source_port(esw, spec, attr,
    598							  esw_attr->in_mdev->priv.eswitch,
    599							  esw_attr->in_rep->vport);
    600	}
    601	if (IS_ERR(fdb)) {
    602		rule = ERR_CAST(fdb);
    603		goto err_esw_get;
    604	}
    605
    606	if (mlx5_eswitch_termtbl_required(esw, attr, &flow_act, spec))
    607		rule = mlx5_eswitch_add_termtbl_rule(esw, fdb, spec, esw_attr,
    608						     &flow_act, dest, i);
    609	else
    610		rule = mlx5_add_flow_rules(fdb, spec, &flow_act, dest, i);
    611	if (IS_ERR(rule))
    612		goto err_add_rule;
    613	else
    614		atomic64_inc(&esw->offloads.num_flows);
    615
    616	kfree(dest);
    617	return rule;
    618
    619err_add_rule:
    620	if (split)
    621		mlx5_esw_vporttbl_put(esw, &fwd_attr);
    622	else if (attr->chain || attr->prio)
    623		mlx5_chains_put_table(chains, attr->chain, attr->prio, 0);
    624err_esw_get:
    625	esw_cleanup_dests(esw, attr);
    626err_create_goto_table:
    627	kfree(dest);
    628	return rule;
    629}
    630
    631struct mlx5_flow_handle *
    632mlx5_eswitch_add_fwd_rule(struct mlx5_eswitch *esw,
    633			  struct mlx5_flow_spec *spec,
    634			  struct mlx5_flow_attr *attr)
    635{
    636	struct mlx5_flow_act flow_act = { .flags = FLOW_ACT_NO_APPEND, };
    637	struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr;
    638	struct mlx5_fs_chains *chains = esw_chains(esw);
    639	struct mlx5_vport_tbl_attr fwd_attr;
    640	struct mlx5_flow_destination *dest;
    641	struct mlx5_flow_table *fast_fdb;
    642	struct mlx5_flow_table *fwd_fdb;
    643	struct mlx5_flow_handle *rule;
    644	int i, err = 0;
    645
    646	dest = kcalloc(MLX5_MAX_FLOW_FWD_VPORTS + 1, sizeof(*dest), GFP_KERNEL);
    647	if (!dest)
    648		return ERR_PTR(-ENOMEM);
    649
    650	fast_fdb = mlx5_chains_get_table(chains, attr->chain, attr->prio, 0);
    651	if (IS_ERR(fast_fdb)) {
    652		rule = ERR_CAST(fast_fdb);
    653		goto err_get_fast;
    654	}
    655
    656	fwd_attr.chain = attr->chain;
    657	fwd_attr.prio = attr->prio;
    658	fwd_attr.vport = esw_attr->in_rep->vport;
    659	fwd_attr.vport_ns = &mlx5_esw_vport_tbl_mirror_ns;
    660	fwd_fdb = mlx5_esw_vporttbl_get(esw, &fwd_attr);
    661	if (IS_ERR(fwd_fdb)) {
    662		rule = ERR_CAST(fwd_fdb);
    663		goto err_get_fwd;
    664	}
    665
    666	flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
    667	for (i = 0; i < esw_attr->split_count; i++) {
    668		if (esw_is_indir_table(esw, attr))
    669			err = esw_setup_indir_table(dest, &flow_act, esw, attr, spec, false, &i);
    670		else if (esw_is_chain_src_port_rewrite(esw, esw_attr))
    671			err = esw_setup_chain_src_port_rewrite(dest, &flow_act, esw, chains, attr,
    672							       &i);
    673		else
    674			esw_setup_vport_dest(dest, &flow_act, esw, esw_attr, i, i, false);
    675
    676		if (err) {
    677			rule = ERR_PTR(err);
    678			goto err_chain_src_rewrite;
    679		}
    680	}
    681	dest[i].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
    682	dest[i].ft = fwd_fdb;
    683	i++;
    684
    685	mlx5_eswitch_set_rule_source_port(esw, spec, attr,
    686					  esw_attr->in_mdev->priv.eswitch,
    687					  esw_attr->in_rep->vport);
    688
    689	if (attr->outer_match_level != MLX5_MATCH_NONE)
    690		spec->match_criteria_enable |= MLX5_MATCH_OUTER_HEADERS;
    691
    692	flow_act.flags |= FLOW_ACT_IGNORE_FLOW_LEVEL;
    693	rule = mlx5_add_flow_rules(fast_fdb, spec, &flow_act, dest, i);
    694
    695	if (IS_ERR(rule)) {
    696		i = esw_attr->split_count;
    697		goto err_chain_src_rewrite;
    698	}
    699
    700	atomic64_inc(&esw->offloads.num_flows);
    701
    702	kfree(dest);
    703	return rule;
    704err_chain_src_rewrite:
    705	esw_put_dest_tables_loop(esw, attr, 0, i);
    706	mlx5_esw_vporttbl_put(esw, &fwd_attr);
    707err_get_fwd:
    708	mlx5_chains_put_table(chains, attr->chain, attr->prio, 0);
    709err_get_fast:
    710	kfree(dest);
    711	return rule;
    712}
    713
    714static void
    715__mlx5_eswitch_del_rule(struct mlx5_eswitch *esw,
    716			struct mlx5_flow_handle *rule,
    717			struct mlx5_flow_attr *attr,
    718			bool fwd_rule)
    719{
    720	struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr;
    721	struct mlx5_fs_chains *chains = esw_chains(esw);
    722	bool split = (esw_attr->split_count > 0);
    723	struct mlx5_vport_tbl_attr fwd_attr;
    724	int i;
    725
    726	mlx5_del_flow_rules(rule);
    727
    728	if (!mlx5e_tc_attr_flags_skip(attr->flags)) {
    729		/* unref the term table */
    730		for (i = 0; i < MLX5_MAX_FLOW_FWD_VPORTS; i++) {
    731			if (esw_attr->dests[i].termtbl)
    732				mlx5_eswitch_termtbl_put(esw, esw_attr->dests[i].termtbl);
    733		}
    734	}
    735
    736	atomic64_dec(&esw->offloads.num_flows);
    737
    738	if (fwd_rule || split) {
    739		fwd_attr.chain = attr->chain;
    740		fwd_attr.prio = attr->prio;
    741		fwd_attr.vport = esw_attr->in_rep->vport;
    742		fwd_attr.vport_ns = &mlx5_esw_vport_tbl_mirror_ns;
    743	}
    744
    745	if (fwd_rule)  {
    746		mlx5_esw_vporttbl_put(esw, &fwd_attr);
    747		mlx5_chains_put_table(chains, attr->chain, attr->prio, 0);
    748		esw_put_dest_tables_loop(esw, attr, 0, esw_attr->split_count);
    749	} else {
    750		if (split)
    751			mlx5_esw_vporttbl_put(esw, &fwd_attr);
    752		else if (attr->chain || attr->prio)
    753			mlx5_chains_put_table(chains, attr->chain, attr->prio, 0);
    754		esw_cleanup_dests(esw, attr);
    755	}
    756}
    757
    758void
    759mlx5_eswitch_del_offloaded_rule(struct mlx5_eswitch *esw,
    760				struct mlx5_flow_handle *rule,
    761				struct mlx5_flow_attr *attr)
    762{
    763	__mlx5_eswitch_del_rule(esw, rule, attr, false);
    764}
    765
    766void
    767mlx5_eswitch_del_fwd_rule(struct mlx5_eswitch *esw,
    768			  struct mlx5_flow_handle *rule,
    769			  struct mlx5_flow_attr *attr)
    770{
    771	__mlx5_eswitch_del_rule(esw, rule, attr, true);
    772}
    773
    774static int esw_set_global_vlan_pop(struct mlx5_eswitch *esw, u8 val)
    775{
    776	struct mlx5_eswitch_rep *rep;
    777	unsigned long i;
    778	int err = 0;
    779
    780	esw_debug(esw->dev, "%s applying global %s policy\n", __func__, val ? "pop" : "none");
    781	mlx5_esw_for_each_host_func_vport(esw, i, rep, esw->esw_funcs.num_vfs) {
    782		if (atomic_read(&rep->rep_data[REP_ETH].state) != REP_LOADED)
    783			continue;
    784
    785		err = __mlx5_eswitch_set_vport_vlan(esw, rep->vport, 0, 0, val);
    786		if (err)
    787			goto out;
    788	}
    789
    790out:
    791	return err;
    792}
    793
    794static struct mlx5_eswitch_rep *
    795esw_vlan_action_get_vport(struct mlx5_esw_flow_attr *attr, bool push, bool pop)
    796{
    797	struct mlx5_eswitch_rep *in_rep, *out_rep, *vport = NULL;
    798
    799	in_rep  = attr->in_rep;
    800	out_rep = attr->dests[0].rep;
    801
    802	if (push)
    803		vport = in_rep;
    804	else if (pop)
    805		vport = out_rep;
    806	else
    807		vport = in_rep;
    808
    809	return vport;
    810}
    811
    812static int esw_add_vlan_action_check(struct mlx5_esw_flow_attr *attr,
    813				     bool push, bool pop, bool fwd)
    814{
    815	struct mlx5_eswitch_rep *in_rep, *out_rep;
    816
    817	if ((push || pop) && !fwd)
    818		goto out_notsupp;
    819
    820	in_rep  = attr->in_rep;
    821	out_rep = attr->dests[0].rep;
    822
    823	if (push && in_rep->vport == MLX5_VPORT_UPLINK)
    824		goto out_notsupp;
    825
    826	if (pop && out_rep->vport == MLX5_VPORT_UPLINK)
    827		goto out_notsupp;
    828
    829	/* vport has vlan push configured, can't offload VF --> wire rules w.o it */
    830	if (!push && !pop && fwd)
    831		if (in_rep->vlan && out_rep->vport == MLX5_VPORT_UPLINK)
    832			goto out_notsupp;
    833
    834	/* protects against (1) setting rules with different vlans to push and
    835	 * (2) setting rules w.o vlans (attr->vlan = 0) && w. vlans to push (!= 0)
    836	 */
    837	if (push && in_rep->vlan_refcount && (in_rep->vlan != attr->vlan_vid[0]))
    838		goto out_notsupp;
    839
    840	return 0;
    841
    842out_notsupp:
    843	return -EOPNOTSUPP;
    844}
    845
    846int mlx5_eswitch_add_vlan_action(struct mlx5_eswitch *esw,
    847				 struct mlx5_flow_attr *attr)
    848{
    849	struct offloads_fdb *offloads = &esw->fdb_table.offloads;
    850	struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr;
    851	struct mlx5_eswitch_rep *vport = NULL;
    852	bool push, pop, fwd;
    853	int err = 0;
    854
    855	/* nop if we're on the vlan push/pop non emulation mode */
    856	if (mlx5_eswitch_vlan_actions_supported(esw->dev, 1))
    857		return 0;
    858
    859	push = !!(attr->action & MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH);
    860	pop  = !!(attr->action & MLX5_FLOW_CONTEXT_ACTION_VLAN_POP);
    861	fwd  = !!((attr->action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) &&
    862		   !attr->dest_chain);
    863
    864	mutex_lock(&esw->state_lock);
    865
    866	err = esw_add_vlan_action_check(esw_attr, push, pop, fwd);
    867	if (err)
    868		goto unlock;
    869
    870	attr->flags &= ~MLX5_ATTR_FLAG_VLAN_HANDLED;
    871
    872	vport = esw_vlan_action_get_vport(esw_attr, push, pop);
    873
    874	if (!push && !pop && fwd) {
    875		/* tracks VF --> wire rules without vlan push action */
    876		if (esw_attr->dests[0].rep->vport == MLX5_VPORT_UPLINK) {
    877			vport->vlan_refcount++;
    878			attr->flags |= MLX5_ATTR_FLAG_VLAN_HANDLED;
    879		}
    880
    881		goto unlock;
    882	}
    883
    884	if (!push && !pop)
    885		goto unlock;
    886
    887	if (!(offloads->vlan_push_pop_refcount)) {
    888		/* it's the 1st vlan rule, apply global vlan pop policy */
    889		err = esw_set_global_vlan_pop(esw, SET_VLAN_STRIP);
    890		if (err)
    891			goto out;
    892	}
    893	offloads->vlan_push_pop_refcount++;
    894
    895	if (push) {
    896		if (vport->vlan_refcount)
    897			goto skip_set_push;
    898
    899		err = __mlx5_eswitch_set_vport_vlan(esw, vport->vport, esw_attr->vlan_vid[0],
    900						    0, SET_VLAN_INSERT | SET_VLAN_STRIP);
    901		if (err)
    902			goto out;
    903		vport->vlan = esw_attr->vlan_vid[0];
    904skip_set_push:
    905		vport->vlan_refcount++;
    906	}
    907out:
    908	if (!err)
    909		attr->flags |= MLX5_ATTR_FLAG_VLAN_HANDLED;
    910unlock:
    911	mutex_unlock(&esw->state_lock);
    912	return err;
    913}
    914
    915int mlx5_eswitch_del_vlan_action(struct mlx5_eswitch *esw,
    916				 struct mlx5_flow_attr *attr)
    917{
    918	struct offloads_fdb *offloads = &esw->fdb_table.offloads;
    919	struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr;
    920	struct mlx5_eswitch_rep *vport = NULL;
    921	bool push, pop, fwd;
    922	int err = 0;
    923
    924	/* nop if we're on the vlan push/pop non emulation mode */
    925	if (mlx5_eswitch_vlan_actions_supported(esw->dev, 1))
    926		return 0;
    927
    928	if (!(attr->flags & MLX5_ATTR_FLAG_VLAN_HANDLED))
    929		return 0;
    930
    931	push = !!(attr->action & MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH);
    932	pop  = !!(attr->action & MLX5_FLOW_CONTEXT_ACTION_VLAN_POP);
    933	fwd  = !!(attr->action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST);
    934
    935	mutex_lock(&esw->state_lock);
    936
    937	vport = esw_vlan_action_get_vport(esw_attr, push, pop);
    938
    939	if (!push && !pop && fwd) {
    940		/* tracks VF --> wire rules without vlan push action */
    941		if (esw_attr->dests[0].rep->vport == MLX5_VPORT_UPLINK)
    942			vport->vlan_refcount--;
    943
    944		goto out;
    945	}
    946
    947	if (push) {
    948		vport->vlan_refcount--;
    949		if (vport->vlan_refcount)
    950			goto skip_unset_push;
    951
    952		vport->vlan = 0;
    953		err = __mlx5_eswitch_set_vport_vlan(esw, vport->vport,
    954						    0, 0, SET_VLAN_STRIP);
    955		if (err)
    956			goto out;
    957	}
    958
    959skip_unset_push:
    960	offloads->vlan_push_pop_refcount--;
    961	if (offloads->vlan_push_pop_refcount)
    962		goto out;
    963
    964	/* no more vlan rules, stop global vlan pop policy */
    965	err = esw_set_global_vlan_pop(esw, 0);
    966
    967out:
    968	mutex_unlock(&esw->state_lock);
    969	return err;
    970}
    971
    972struct mlx5_flow_handle *
    973mlx5_eswitch_add_send_to_vport_rule(struct mlx5_eswitch *on_esw,
    974				    struct mlx5_eswitch *from_esw,
    975				    struct mlx5_eswitch_rep *rep,
    976				    u32 sqn)
    977{
    978	struct mlx5_flow_act flow_act = {0};
    979	struct mlx5_flow_destination dest = {};
    980	struct mlx5_flow_handle *flow_rule;
    981	struct mlx5_flow_spec *spec;
    982	void *misc;
    983
    984	spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
    985	if (!spec) {
    986		flow_rule = ERR_PTR(-ENOMEM);
    987		goto out;
    988	}
    989
    990	misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters);
    991	MLX5_SET(fte_match_set_misc, misc, source_sqn, sqn);
    992	/* source vport is the esw manager */
    993	MLX5_SET(fte_match_set_misc, misc, source_port, from_esw->manager_vport);
    994	if (MLX5_CAP_ESW(on_esw->dev, merged_eswitch))
    995		MLX5_SET(fte_match_set_misc, misc, source_eswitch_owner_vhca_id,
    996			 MLX5_CAP_GEN(from_esw->dev, vhca_id));
    997
    998	misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters);
    999	MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_sqn);
   1000	MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port);
   1001	if (MLX5_CAP_ESW(on_esw->dev, merged_eswitch))
   1002		MLX5_SET_TO_ONES(fte_match_set_misc, misc,
   1003				 source_eswitch_owner_vhca_id);
   1004
   1005	spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS;
   1006	dest.type = MLX5_FLOW_DESTINATION_TYPE_VPORT;
   1007	dest.vport.num = rep->vport;
   1008	dest.vport.vhca_id = MLX5_CAP_GEN(rep->esw->dev, vhca_id);
   1009	dest.vport.flags |= MLX5_FLOW_DEST_VPORT_VHCA_ID;
   1010	flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
   1011
   1012	if (rep->vport == MLX5_VPORT_UPLINK)
   1013		spec->flow_context.flow_source = MLX5_FLOW_CONTEXT_FLOW_SOURCE_LOCAL_VPORT;
   1014
   1015	flow_rule = mlx5_add_flow_rules(on_esw->fdb_table.offloads.slow_fdb,
   1016					spec, &flow_act, &dest, 1);
   1017	if (IS_ERR(flow_rule))
   1018		esw_warn(on_esw->dev, "FDB: Failed to add send to vport rule err %ld\n",
   1019			 PTR_ERR(flow_rule));
   1020out:
   1021	kvfree(spec);
   1022	return flow_rule;
   1023}
   1024EXPORT_SYMBOL(mlx5_eswitch_add_send_to_vport_rule);
   1025
   1026void mlx5_eswitch_del_send_to_vport_rule(struct mlx5_flow_handle *rule)
   1027{
   1028	mlx5_del_flow_rules(rule);
   1029}
   1030
   1031static void mlx5_eswitch_del_send_to_vport_meta_rules(struct mlx5_eswitch *esw)
   1032{
   1033	struct mlx5_flow_handle **flows = esw->fdb_table.offloads.send_to_vport_meta_rules;
   1034	int i = 0, num_vfs = esw->esw_funcs.num_vfs;
   1035
   1036	if (!num_vfs || !flows)
   1037		return;
   1038
   1039	for (i = 0; i < num_vfs; i++)
   1040		mlx5_del_flow_rules(flows[i]);
   1041
   1042	kvfree(flows);
   1043}
   1044
   1045static int
   1046mlx5_eswitch_add_send_to_vport_meta_rules(struct mlx5_eswitch *esw)
   1047{
   1048	struct mlx5_flow_destination dest = {};
   1049	struct mlx5_flow_act flow_act = {0};
   1050	int num_vfs, rule_idx = 0, err = 0;
   1051	struct mlx5_flow_handle *flow_rule;
   1052	struct mlx5_flow_handle **flows;
   1053	struct mlx5_flow_spec *spec;
   1054	struct mlx5_vport *vport;
   1055	unsigned long i;
   1056	u16 vport_num;
   1057
   1058	num_vfs = esw->esw_funcs.num_vfs;
   1059	flows = kvcalloc(num_vfs, sizeof(*flows), GFP_KERNEL);
   1060	if (!flows)
   1061		return -ENOMEM;
   1062
   1063	spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
   1064	if (!spec) {
   1065		err = -ENOMEM;
   1066		goto alloc_err;
   1067	}
   1068
   1069	MLX5_SET(fte_match_param, spec->match_criteria,
   1070		 misc_parameters_2.metadata_reg_c_0, mlx5_eswitch_get_vport_metadata_mask());
   1071	MLX5_SET(fte_match_param, spec->match_criteria,
   1072		 misc_parameters_2.metadata_reg_c_1, ESW_TUN_MASK);
   1073	MLX5_SET(fte_match_param, spec->match_value, misc_parameters_2.metadata_reg_c_1,
   1074		 ESW_TUN_SLOW_TABLE_GOTO_VPORT_MARK);
   1075
   1076	spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS_2;
   1077	dest.type = MLX5_FLOW_DESTINATION_TYPE_VPORT;
   1078	flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
   1079
   1080	mlx5_esw_for_each_vf_vport(esw, i, vport, num_vfs) {
   1081		vport_num = vport->vport;
   1082		MLX5_SET(fte_match_param, spec->match_value, misc_parameters_2.metadata_reg_c_0,
   1083			 mlx5_eswitch_get_vport_metadata_for_match(esw, vport_num));
   1084		dest.vport.num = vport_num;
   1085
   1086		flow_rule = mlx5_add_flow_rules(esw->fdb_table.offloads.slow_fdb,
   1087						spec, &flow_act, &dest, 1);
   1088		if (IS_ERR(flow_rule)) {
   1089			err = PTR_ERR(flow_rule);
   1090			esw_warn(esw->dev, "FDB: Failed to add send to vport meta rule idx %d, err %ld\n",
   1091				 rule_idx, PTR_ERR(flow_rule));
   1092			goto rule_err;
   1093		}
   1094		flows[rule_idx++] = flow_rule;
   1095	}
   1096
   1097	esw->fdb_table.offloads.send_to_vport_meta_rules = flows;
   1098	kvfree(spec);
   1099	return 0;
   1100
   1101rule_err:
   1102	while (--rule_idx >= 0)
   1103		mlx5_del_flow_rules(flows[rule_idx]);
   1104	kvfree(spec);
   1105alloc_err:
   1106	kvfree(flows);
   1107	return err;
   1108}
   1109
   1110static bool mlx5_eswitch_reg_c1_loopback_supported(struct mlx5_eswitch *esw)
   1111{
   1112	return MLX5_CAP_ESW_FLOWTABLE(esw->dev, fdb_to_vport_reg_c_id) &
   1113	       MLX5_FDB_TO_VPORT_REG_C_1;
   1114}
   1115
   1116static int esw_set_passing_vport_metadata(struct mlx5_eswitch *esw, bool enable)
   1117{
   1118	u32 out[MLX5_ST_SZ_DW(query_esw_vport_context_out)] = {};
   1119	u32 min[MLX5_ST_SZ_DW(modify_esw_vport_context_in)] = {};
   1120	u32 in[MLX5_ST_SZ_DW(query_esw_vport_context_in)] = {};
   1121	u8 curr, wanted;
   1122	int err;
   1123
   1124	if (!mlx5_eswitch_reg_c1_loopback_supported(esw) &&
   1125	    !mlx5_eswitch_vport_match_metadata_enabled(esw))
   1126		return 0;
   1127
   1128	MLX5_SET(query_esw_vport_context_in, in, opcode,
   1129		 MLX5_CMD_OP_QUERY_ESW_VPORT_CONTEXT);
   1130	err = mlx5_cmd_exec_inout(esw->dev, query_esw_vport_context, in, out);
   1131	if (err)
   1132		return err;
   1133
   1134	curr = MLX5_GET(query_esw_vport_context_out, out,
   1135			esw_vport_context.fdb_to_vport_reg_c_id);
   1136	wanted = MLX5_FDB_TO_VPORT_REG_C_0;
   1137	if (mlx5_eswitch_reg_c1_loopback_supported(esw))
   1138		wanted |= MLX5_FDB_TO_VPORT_REG_C_1;
   1139
   1140	if (enable)
   1141		curr |= wanted;
   1142	else
   1143		curr &= ~wanted;
   1144
   1145	MLX5_SET(modify_esw_vport_context_in, min,
   1146		 esw_vport_context.fdb_to_vport_reg_c_id, curr);
   1147	MLX5_SET(modify_esw_vport_context_in, min,
   1148		 field_select.fdb_to_vport_reg_c_id, 1);
   1149
   1150	err = mlx5_eswitch_modify_esw_vport_context(esw->dev, 0, false, min);
   1151	if (!err) {
   1152		if (enable && (curr & MLX5_FDB_TO_VPORT_REG_C_1))
   1153			esw->flags |= MLX5_ESWITCH_REG_C1_LOOPBACK_ENABLED;
   1154		else
   1155			esw->flags &= ~MLX5_ESWITCH_REG_C1_LOOPBACK_ENABLED;
   1156	}
   1157
   1158	return err;
   1159}
   1160
   1161static void peer_miss_rules_setup(struct mlx5_eswitch *esw,
   1162				  struct mlx5_core_dev *peer_dev,
   1163				  struct mlx5_flow_spec *spec,
   1164				  struct mlx5_flow_destination *dest)
   1165{
   1166	void *misc;
   1167
   1168	if (mlx5_eswitch_vport_match_metadata_enabled(esw)) {
   1169		misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
   1170				    misc_parameters_2);
   1171		MLX5_SET(fte_match_set_misc2, misc, metadata_reg_c_0,
   1172			 mlx5_eswitch_get_vport_metadata_mask());
   1173
   1174		spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS_2;
   1175	} else {
   1176		misc = MLX5_ADDR_OF(fte_match_param, spec->match_value,
   1177				    misc_parameters);
   1178
   1179		MLX5_SET(fte_match_set_misc, misc, source_eswitch_owner_vhca_id,
   1180			 MLX5_CAP_GEN(peer_dev, vhca_id));
   1181
   1182		spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS;
   1183
   1184		misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
   1185				    misc_parameters);
   1186		MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port);
   1187		MLX5_SET_TO_ONES(fte_match_set_misc, misc,
   1188				 source_eswitch_owner_vhca_id);
   1189	}
   1190
   1191	dest->type = MLX5_FLOW_DESTINATION_TYPE_VPORT;
   1192	dest->vport.num = peer_dev->priv.eswitch->manager_vport;
   1193	dest->vport.vhca_id = MLX5_CAP_GEN(peer_dev, vhca_id);
   1194	dest->vport.flags |= MLX5_FLOW_DEST_VPORT_VHCA_ID;
   1195}
   1196
   1197static void esw_set_peer_miss_rule_source_port(struct mlx5_eswitch *esw,
   1198					       struct mlx5_eswitch *peer_esw,
   1199					       struct mlx5_flow_spec *spec,
   1200					       u16 vport)
   1201{
   1202	void *misc;
   1203
   1204	if (mlx5_eswitch_vport_match_metadata_enabled(esw)) {
   1205		misc = MLX5_ADDR_OF(fte_match_param, spec->match_value,
   1206				    misc_parameters_2);
   1207		MLX5_SET(fte_match_set_misc2, misc, metadata_reg_c_0,
   1208			 mlx5_eswitch_get_vport_metadata_for_match(peer_esw,
   1209								   vport));
   1210	} else {
   1211		misc = MLX5_ADDR_OF(fte_match_param, spec->match_value,
   1212				    misc_parameters);
   1213		MLX5_SET(fte_match_set_misc, misc, source_port, vport);
   1214	}
   1215}
   1216
   1217static int esw_add_fdb_peer_miss_rules(struct mlx5_eswitch *esw,
   1218				       struct mlx5_core_dev *peer_dev)
   1219{
   1220	struct mlx5_flow_destination dest = {};
   1221	struct mlx5_flow_act flow_act = {0};
   1222	struct mlx5_flow_handle **flows;
   1223	/* total vports is the same for both e-switches */
   1224	int nvports = esw->total_vports;
   1225	struct mlx5_flow_handle *flow;
   1226	struct mlx5_flow_spec *spec;
   1227	struct mlx5_vport *vport;
   1228	unsigned long i;
   1229	void *misc;
   1230	int err;
   1231
   1232	spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
   1233	if (!spec)
   1234		return -ENOMEM;
   1235
   1236	peer_miss_rules_setup(esw, peer_dev, spec, &dest);
   1237
   1238	flows = kvcalloc(nvports, sizeof(*flows), GFP_KERNEL);
   1239	if (!flows) {
   1240		err = -ENOMEM;
   1241		goto alloc_flows_err;
   1242	}
   1243
   1244	flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
   1245	misc = MLX5_ADDR_OF(fte_match_param, spec->match_value,
   1246			    misc_parameters);
   1247
   1248	if (mlx5_core_is_ecpf_esw_manager(esw->dev)) {
   1249		vport = mlx5_eswitch_get_vport(esw, MLX5_VPORT_PF);
   1250		esw_set_peer_miss_rule_source_port(esw, peer_dev->priv.eswitch,
   1251						   spec, MLX5_VPORT_PF);
   1252
   1253		flow = mlx5_add_flow_rules(esw->fdb_table.offloads.slow_fdb,
   1254					   spec, &flow_act, &dest, 1);
   1255		if (IS_ERR(flow)) {
   1256			err = PTR_ERR(flow);
   1257			goto add_pf_flow_err;
   1258		}
   1259		flows[vport->index] = flow;
   1260	}
   1261
   1262	if (mlx5_ecpf_vport_exists(esw->dev)) {
   1263		vport = mlx5_eswitch_get_vport(esw, MLX5_VPORT_ECPF);
   1264		MLX5_SET(fte_match_set_misc, misc, source_port, MLX5_VPORT_ECPF);
   1265		flow = mlx5_add_flow_rules(esw->fdb_table.offloads.slow_fdb,
   1266					   spec, &flow_act, &dest, 1);
   1267		if (IS_ERR(flow)) {
   1268			err = PTR_ERR(flow);
   1269			goto add_ecpf_flow_err;
   1270		}
   1271		flows[vport->index] = flow;
   1272	}
   1273
   1274	mlx5_esw_for_each_vf_vport(esw, i, vport, mlx5_core_max_vfs(esw->dev)) {
   1275		esw_set_peer_miss_rule_source_port(esw,
   1276						   peer_dev->priv.eswitch,
   1277						   spec, vport->vport);
   1278
   1279		flow = mlx5_add_flow_rules(esw->fdb_table.offloads.slow_fdb,
   1280					   spec, &flow_act, &dest, 1);
   1281		if (IS_ERR(flow)) {
   1282			err = PTR_ERR(flow);
   1283			goto add_vf_flow_err;
   1284		}
   1285		flows[vport->index] = flow;
   1286	}
   1287
   1288	esw->fdb_table.offloads.peer_miss_rules = flows;
   1289
   1290	kvfree(spec);
   1291	return 0;
   1292
   1293add_vf_flow_err:
   1294	mlx5_esw_for_each_vf_vport(esw, i, vport, mlx5_core_max_vfs(esw->dev)) {
   1295		if (!flows[vport->index])
   1296			continue;
   1297		mlx5_del_flow_rules(flows[vport->index]);
   1298	}
   1299	if (mlx5_ecpf_vport_exists(esw->dev)) {
   1300		vport = mlx5_eswitch_get_vport(esw, MLX5_VPORT_ECPF);
   1301		mlx5_del_flow_rules(flows[vport->index]);
   1302	}
   1303add_ecpf_flow_err:
   1304	if (mlx5_core_is_ecpf_esw_manager(esw->dev)) {
   1305		vport = mlx5_eswitch_get_vport(esw, MLX5_VPORT_PF);
   1306		mlx5_del_flow_rules(flows[vport->index]);
   1307	}
   1308add_pf_flow_err:
   1309	esw_warn(esw->dev, "FDB: Failed to add peer miss flow rule err %d\n", err);
   1310	kvfree(flows);
   1311alloc_flows_err:
   1312	kvfree(spec);
   1313	return err;
   1314}
   1315
   1316static void esw_del_fdb_peer_miss_rules(struct mlx5_eswitch *esw)
   1317{
   1318	struct mlx5_flow_handle **flows;
   1319	struct mlx5_vport *vport;
   1320	unsigned long i;
   1321
   1322	flows = esw->fdb_table.offloads.peer_miss_rules;
   1323
   1324	mlx5_esw_for_each_vf_vport(esw, i, vport, mlx5_core_max_vfs(esw->dev))
   1325		mlx5_del_flow_rules(flows[vport->index]);
   1326
   1327	if (mlx5_ecpf_vport_exists(esw->dev)) {
   1328		vport = mlx5_eswitch_get_vport(esw, MLX5_VPORT_ECPF);
   1329		mlx5_del_flow_rules(flows[vport->index]);
   1330	}
   1331
   1332	if (mlx5_core_is_ecpf_esw_manager(esw->dev)) {
   1333		vport = mlx5_eswitch_get_vport(esw, MLX5_VPORT_PF);
   1334		mlx5_del_flow_rules(flows[vport->index]);
   1335	}
   1336	kvfree(flows);
   1337}
   1338
   1339static int esw_add_fdb_miss_rule(struct mlx5_eswitch *esw)
   1340{
   1341	struct mlx5_flow_act flow_act = {0};
   1342	struct mlx5_flow_destination dest = {};
   1343	struct mlx5_flow_handle *flow_rule = NULL;
   1344	struct mlx5_flow_spec *spec;
   1345	void *headers_c;
   1346	void *headers_v;
   1347	int err = 0;
   1348	u8 *dmac_c;
   1349	u8 *dmac_v;
   1350
   1351	spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
   1352	if (!spec) {
   1353		err = -ENOMEM;
   1354		goto out;
   1355	}
   1356
   1357	spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
   1358	headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
   1359				 outer_headers);
   1360	dmac_c = MLX5_ADDR_OF(fte_match_param, headers_c,
   1361			      outer_headers.dmac_47_16);
   1362	dmac_c[0] = 0x01;
   1363
   1364	dest.type = MLX5_FLOW_DESTINATION_TYPE_VPORT;
   1365	dest.vport.num = esw->manager_vport;
   1366	flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
   1367
   1368	flow_rule = mlx5_add_flow_rules(esw->fdb_table.offloads.slow_fdb,
   1369					spec, &flow_act, &dest, 1);
   1370	if (IS_ERR(flow_rule)) {
   1371		err = PTR_ERR(flow_rule);
   1372		esw_warn(esw->dev,  "FDB: Failed to add unicast miss flow rule err %d\n", err);
   1373		goto out;
   1374	}
   1375
   1376	esw->fdb_table.offloads.miss_rule_uni = flow_rule;
   1377
   1378	headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value,
   1379				 outer_headers);
   1380	dmac_v = MLX5_ADDR_OF(fte_match_param, headers_v,
   1381			      outer_headers.dmac_47_16);
   1382	dmac_v[0] = 0x01;
   1383	flow_rule = mlx5_add_flow_rules(esw->fdb_table.offloads.slow_fdb,
   1384					spec, &flow_act, &dest, 1);
   1385	if (IS_ERR(flow_rule)) {
   1386		err = PTR_ERR(flow_rule);
   1387		esw_warn(esw->dev, "FDB: Failed to add multicast miss flow rule err %d\n", err);
   1388		mlx5_del_flow_rules(esw->fdb_table.offloads.miss_rule_uni);
   1389		goto out;
   1390	}
   1391
   1392	esw->fdb_table.offloads.miss_rule_multi = flow_rule;
   1393
   1394out:
   1395	kvfree(spec);
   1396	return err;
   1397}
   1398
   1399struct mlx5_flow_handle *
   1400esw_add_restore_rule(struct mlx5_eswitch *esw, u32 tag)
   1401{
   1402	struct mlx5_flow_act flow_act = { .flags = FLOW_ACT_NO_APPEND, };
   1403	struct mlx5_flow_table *ft = esw->offloads.ft_offloads_restore;
   1404	struct mlx5_flow_context *flow_context;
   1405	struct mlx5_flow_handle *flow_rule;
   1406	struct mlx5_flow_destination dest;
   1407	struct mlx5_flow_spec *spec;
   1408	void *misc;
   1409
   1410	if (!mlx5_eswitch_reg_c1_loopback_supported(esw))
   1411		return ERR_PTR(-EOPNOTSUPP);
   1412
   1413	spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
   1414	if (!spec)
   1415		return ERR_PTR(-ENOMEM);
   1416
   1417	misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
   1418			    misc_parameters_2);
   1419	MLX5_SET(fte_match_set_misc2, misc, metadata_reg_c_0,
   1420		 ESW_REG_C0_USER_DATA_METADATA_MASK);
   1421	misc = MLX5_ADDR_OF(fte_match_param, spec->match_value,
   1422			    misc_parameters_2);
   1423	MLX5_SET(fte_match_set_misc2, misc, metadata_reg_c_0, tag);
   1424	spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS_2;
   1425	flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST |
   1426			  MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
   1427	flow_act.modify_hdr = esw->offloads.restore_copy_hdr_id;
   1428
   1429	flow_context = &spec->flow_context;
   1430	flow_context->flags |= FLOW_CONTEXT_HAS_TAG;
   1431	flow_context->flow_tag = tag;
   1432	dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
   1433	dest.ft = esw->offloads.ft_offloads;
   1434
   1435	flow_rule = mlx5_add_flow_rules(ft, spec, &flow_act, &dest, 1);
   1436	kvfree(spec);
   1437
   1438	if (IS_ERR(flow_rule))
   1439		esw_warn(esw->dev,
   1440			 "Failed to create restore rule for tag: %d, err(%d)\n",
   1441			 tag, (int)PTR_ERR(flow_rule));
   1442
   1443	return flow_rule;
   1444}
   1445
   1446#define MAX_PF_SQ 256
   1447#define MAX_SQ_NVPORTS 32
   1448
   1449static void esw_set_flow_group_source_port(struct mlx5_eswitch *esw,
   1450					   u32 *flow_group_in)
   1451{
   1452	void *match_criteria = MLX5_ADDR_OF(create_flow_group_in,
   1453					    flow_group_in,
   1454					    match_criteria);
   1455
   1456	if (mlx5_eswitch_vport_match_metadata_enabled(esw)) {
   1457		MLX5_SET(create_flow_group_in, flow_group_in,
   1458			 match_criteria_enable,
   1459			 MLX5_MATCH_MISC_PARAMETERS_2);
   1460
   1461		MLX5_SET(fte_match_param, match_criteria,
   1462			 misc_parameters_2.metadata_reg_c_0,
   1463			 mlx5_eswitch_get_vport_metadata_mask());
   1464	} else {
   1465		MLX5_SET(create_flow_group_in, flow_group_in,
   1466			 match_criteria_enable,
   1467			 MLX5_MATCH_MISC_PARAMETERS);
   1468
   1469		MLX5_SET_TO_ONES(fte_match_param, match_criteria,
   1470				 misc_parameters.source_port);
   1471	}
   1472}
   1473
   1474#if IS_ENABLED(CONFIG_MLX5_CLS_ACT)
   1475static void esw_vport_tbl_put(struct mlx5_eswitch *esw)
   1476{
   1477	struct mlx5_vport_tbl_attr attr;
   1478	struct mlx5_vport *vport;
   1479	unsigned long i;
   1480
   1481	attr.chain = 0;
   1482	attr.prio = 1;
   1483	mlx5_esw_for_each_vport(esw, i, vport) {
   1484		attr.vport = vport->vport;
   1485		attr.vport_ns = &mlx5_esw_vport_tbl_mirror_ns;
   1486		mlx5_esw_vporttbl_put(esw, &attr);
   1487	}
   1488}
   1489
   1490static int esw_vport_tbl_get(struct mlx5_eswitch *esw)
   1491{
   1492	struct mlx5_vport_tbl_attr attr;
   1493	struct mlx5_flow_table *fdb;
   1494	struct mlx5_vport *vport;
   1495	unsigned long i;
   1496
   1497	attr.chain = 0;
   1498	attr.prio = 1;
   1499	mlx5_esw_for_each_vport(esw, i, vport) {
   1500		attr.vport = vport->vport;
   1501		attr.vport_ns = &mlx5_esw_vport_tbl_mirror_ns;
   1502		fdb = mlx5_esw_vporttbl_get(esw, &attr);
   1503		if (IS_ERR(fdb))
   1504			goto out;
   1505	}
   1506	return 0;
   1507
   1508out:
   1509	esw_vport_tbl_put(esw);
   1510	return PTR_ERR(fdb);
   1511}
   1512
   1513#define fdb_modify_header_fwd_to_table_supported(esw) \
   1514	(MLX5_CAP_ESW_FLOWTABLE((esw)->dev, fdb_modify_header_fwd_to_table))
   1515static void esw_init_chains_offload_flags(struct mlx5_eswitch *esw, u32 *flags)
   1516{
   1517	struct mlx5_core_dev *dev = esw->dev;
   1518
   1519	if (MLX5_CAP_ESW_FLOWTABLE_FDB(dev, ignore_flow_level))
   1520		*flags |= MLX5_CHAINS_IGNORE_FLOW_LEVEL_SUPPORTED;
   1521
   1522	if (!MLX5_CAP_ESW_FLOWTABLE(dev, multi_fdb_encap) &&
   1523	    esw->offloads.encap != DEVLINK_ESWITCH_ENCAP_MODE_NONE) {
   1524		*flags &= ~MLX5_CHAINS_AND_PRIOS_SUPPORTED;
   1525		esw_warn(dev, "Tc chains and priorities offload aren't supported, update firmware if needed\n");
   1526	} else if (!mlx5_eswitch_reg_c1_loopback_enabled(esw)) {
   1527		*flags &= ~MLX5_CHAINS_AND_PRIOS_SUPPORTED;
   1528		esw_warn(dev, "Tc chains and priorities offload aren't supported\n");
   1529	} else if (!fdb_modify_header_fwd_to_table_supported(esw)) {
   1530		/* Disabled when ttl workaround is needed, e.g
   1531		 * when ESWITCH_IPV4_TTL_MODIFY_ENABLE = true in mlxconfig
   1532		 */
   1533		esw_warn(dev,
   1534			 "Tc chains and priorities offload aren't supported, check firmware version, or mlxconfig settings\n");
   1535		*flags &= ~MLX5_CHAINS_AND_PRIOS_SUPPORTED;
   1536	} else {
   1537		*flags |= MLX5_CHAINS_AND_PRIOS_SUPPORTED;
   1538		esw_info(dev, "Supported tc chains and prios offload\n");
   1539	}
   1540
   1541	if (esw->offloads.encap != DEVLINK_ESWITCH_ENCAP_MODE_NONE)
   1542		*flags |= MLX5_CHAINS_FT_TUNNEL_SUPPORTED;
   1543}
   1544
   1545static int
   1546esw_chains_create(struct mlx5_eswitch *esw, struct mlx5_flow_table *miss_fdb)
   1547{
   1548	struct mlx5_core_dev *dev = esw->dev;
   1549	struct mlx5_flow_table *nf_ft, *ft;
   1550	struct mlx5_chains_attr attr = {};
   1551	struct mlx5_fs_chains *chains;
   1552	u32 fdb_max;
   1553	int err;
   1554
   1555	fdb_max = 1 << MLX5_CAP_ESW_FLOWTABLE_FDB(dev, log_max_ft_size);
   1556
   1557	esw_init_chains_offload_flags(esw, &attr.flags);
   1558	attr.ns = MLX5_FLOW_NAMESPACE_FDB;
   1559	attr.max_ft_sz = fdb_max;
   1560	attr.max_grp_num = esw->params.large_group_num;
   1561	attr.default_ft = miss_fdb;
   1562	attr.mapping = esw->offloads.reg_c0_obj_pool;
   1563
   1564	chains = mlx5_chains_create(dev, &attr);
   1565	if (IS_ERR(chains)) {
   1566		err = PTR_ERR(chains);
   1567		esw_warn(dev, "Failed to create fdb chains err(%d)\n", err);
   1568		return err;
   1569	}
   1570
   1571	esw->fdb_table.offloads.esw_chains_priv = chains;
   1572
   1573	/* Create tc_end_ft which is the always created ft chain */
   1574	nf_ft = mlx5_chains_get_table(chains, mlx5_chains_get_nf_ft_chain(chains),
   1575				      1, 0);
   1576	if (IS_ERR(nf_ft)) {
   1577		err = PTR_ERR(nf_ft);
   1578		goto nf_ft_err;
   1579	}
   1580
   1581	/* Always open the root for fast path */
   1582	ft = mlx5_chains_get_table(chains, 0, 1, 0);
   1583	if (IS_ERR(ft)) {
   1584		err = PTR_ERR(ft);
   1585		goto level_0_err;
   1586	}
   1587
   1588	/* Open level 1 for split fdb rules now if prios isn't supported  */
   1589	if (!mlx5_chains_prios_supported(chains)) {
   1590		err = esw_vport_tbl_get(esw);
   1591		if (err)
   1592			goto level_1_err;
   1593	}
   1594
   1595	mlx5_chains_set_end_ft(chains, nf_ft);
   1596
   1597	return 0;
   1598
   1599level_1_err:
   1600	mlx5_chains_put_table(chains, 0, 1, 0);
   1601level_0_err:
   1602	mlx5_chains_put_table(chains, mlx5_chains_get_nf_ft_chain(chains), 1, 0);
   1603nf_ft_err:
   1604	mlx5_chains_destroy(chains);
   1605	esw->fdb_table.offloads.esw_chains_priv = NULL;
   1606
   1607	return err;
   1608}
   1609
   1610static void
   1611esw_chains_destroy(struct mlx5_eswitch *esw, struct mlx5_fs_chains *chains)
   1612{
   1613	if (!mlx5_chains_prios_supported(chains))
   1614		esw_vport_tbl_put(esw);
   1615	mlx5_chains_put_table(chains, 0, 1, 0);
   1616	mlx5_chains_put_table(chains, mlx5_chains_get_nf_ft_chain(chains), 1, 0);
   1617	mlx5_chains_destroy(chains);
   1618}
   1619
   1620#else /* CONFIG_MLX5_CLS_ACT */
   1621
   1622static int
   1623esw_chains_create(struct mlx5_eswitch *esw, struct mlx5_flow_table *miss_fdb)
   1624{ return 0; }
   1625
   1626static void
   1627esw_chains_destroy(struct mlx5_eswitch *esw, struct mlx5_fs_chains *chains)
   1628{}
   1629
   1630#endif
   1631
   1632static int esw_create_offloads_fdb_tables(struct mlx5_eswitch *esw)
   1633{
   1634	int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
   1635	struct mlx5_flow_table_attr ft_attr = {};
   1636	int num_vfs, table_size, ix, err = 0;
   1637	struct mlx5_core_dev *dev = esw->dev;
   1638	struct mlx5_flow_namespace *root_ns;
   1639	struct mlx5_flow_table *fdb = NULL;
   1640	u32 flags = 0, *flow_group_in;
   1641	struct mlx5_flow_group *g;
   1642	void *match_criteria;
   1643	u8 *dmac;
   1644
   1645	esw_debug(esw->dev, "Create offloads FDB Tables\n");
   1646
   1647	flow_group_in = kvzalloc(inlen, GFP_KERNEL);
   1648	if (!flow_group_in)
   1649		return -ENOMEM;
   1650
   1651	root_ns = mlx5_get_flow_namespace(dev, MLX5_FLOW_NAMESPACE_FDB);
   1652	if (!root_ns) {
   1653		esw_warn(dev, "Failed to get FDB flow namespace\n");
   1654		err = -EOPNOTSUPP;
   1655		goto ns_err;
   1656	}
   1657	esw->fdb_table.offloads.ns = root_ns;
   1658	err = mlx5_flow_namespace_set_mode(root_ns,
   1659					   esw->dev->priv.steering->mode);
   1660	if (err) {
   1661		esw_warn(dev, "Failed to set FDB namespace steering mode\n");
   1662		goto ns_err;
   1663	}
   1664
   1665	/* To be strictly correct:
   1666	 *	MLX5_MAX_PORTS * (esw->total_vports * MAX_SQ_NVPORTS + MAX_PF_SQ)
   1667	 * should be:
   1668	 *	esw->total_vports * MAX_SQ_NVPORTS + MAX_PF_SQ +
   1669	 *	peer_esw->total_vports * MAX_SQ_NVPORTS + MAX_PF_SQ
   1670	 * but as the peer device might not be in switchdev mode it's not
   1671	 * possible. We use the fact that by default FW sets max vfs and max sfs
   1672	 * to the same value on both devices. If it needs to be changed in the future note
   1673	 * the peer miss group should also be created based on the number of
   1674	 * total vports of the peer (currently is also uses esw->total_vports).
   1675	 */
   1676	table_size = MLX5_MAX_PORTS * (esw->total_vports * MAX_SQ_NVPORTS + MAX_PF_SQ) +
   1677		MLX5_ESW_MISS_FLOWS + esw->total_vports + esw->esw_funcs.num_vfs;
   1678
   1679	/* create the slow path fdb with encap set, so further table instances
   1680	 * can be created at run time while VFs are probed if the FW allows that.
   1681	 */
   1682	if (esw->offloads.encap != DEVLINK_ESWITCH_ENCAP_MODE_NONE)
   1683		flags |= (MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT |
   1684			  MLX5_FLOW_TABLE_TUNNEL_EN_DECAP);
   1685
   1686	ft_attr.flags = flags;
   1687	ft_attr.max_fte = table_size;
   1688	ft_attr.prio = FDB_SLOW_PATH;
   1689
   1690	fdb = mlx5_create_flow_table(root_ns, &ft_attr);
   1691	if (IS_ERR(fdb)) {
   1692		err = PTR_ERR(fdb);
   1693		esw_warn(dev, "Failed to create slow path FDB Table err %d\n", err);
   1694		goto slow_fdb_err;
   1695	}
   1696	esw->fdb_table.offloads.slow_fdb = fdb;
   1697
   1698	/* Create empty TC-miss managed table. This allows plugging in following
   1699	 * priorities without directly exposing their level 0 table to
   1700	 * eswitch_offloads and passing it as miss_fdb to following call to
   1701	 * esw_chains_create().
   1702	 */
   1703	memset(&ft_attr, 0, sizeof(ft_attr));
   1704	ft_attr.prio = FDB_TC_MISS;
   1705	esw->fdb_table.offloads.tc_miss_table = mlx5_create_flow_table(root_ns, &ft_attr);
   1706	if (IS_ERR(esw->fdb_table.offloads.tc_miss_table)) {
   1707		err = PTR_ERR(esw->fdb_table.offloads.tc_miss_table);
   1708		esw_warn(dev, "Failed to create TC miss FDB Table err %d\n", err);
   1709		goto tc_miss_table_err;
   1710	}
   1711
   1712	err = esw_chains_create(esw, esw->fdb_table.offloads.tc_miss_table);
   1713	if (err) {
   1714		esw_warn(dev, "Failed to open fdb chains err(%d)\n", err);
   1715		goto fdb_chains_err;
   1716	}
   1717
   1718	/* create send-to-vport group */
   1719	MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable,
   1720		 MLX5_MATCH_MISC_PARAMETERS);
   1721
   1722	match_criteria = MLX5_ADDR_OF(create_flow_group_in, flow_group_in, match_criteria);
   1723
   1724	MLX5_SET_TO_ONES(fte_match_param, match_criteria, misc_parameters.source_sqn);
   1725	MLX5_SET_TO_ONES(fte_match_param, match_criteria, misc_parameters.source_port);
   1726	if (MLX5_CAP_ESW(esw->dev, merged_eswitch)) {
   1727		MLX5_SET_TO_ONES(fte_match_param, match_criteria,
   1728				 misc_parameters.source_eswitch_owner_vhca_id);
   1729		MLX5_SET(create_flow_group_in, flow_group_in,
   1730			 source_eswitch_owner_vhca_id_valid, 1);
   1731	}
   1732
   1733	/* See comment above table_size calculation */
   1734	ix = MLX5_MAX_PORTS * (esw->total_vports * MAX_SQ_NVPORTS + MAX_PF_SQ);
   1735	MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 0);
   1736	MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, ix - 1);
   1737
   1738	g = mlx5_create_flow_group(fdb, flow_group_in);
   1739	if (IS_ERR(g)) {
   1740		err = PTR_ERR(g);
   1741		esw_warn(dev, "Failed to create send-to-vport flow group err(%d)\n", err);
   1742		goto send_vport_err;
   1743	}
   1744	esw->fdb_table.offloads.send_to_vport_grp = g;
   1745
   1746	if (esw_src_port_rewrite_supported(esw)) {
   1747		/* meta send to vport */
   1748		memset(flow_group_in, 0, inlen);
   1749		MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable,
   1750			 MLX5_MATCH_MISC_PARAMETERS_2);
   1751
   1752		match_criteria = MLX5_ADDR_OF(create_flow_group_in, flow_group_in, match_criteria);
   1753
   1754		MLX5_SET(fte_match_param, match_criteria,
   1755			 misc_parameters_2.metadata_reg_c_0,
   1756			 mlx5_eswitch_get_vport_metadata_mask());
   1757		MLX5_SET(fte_match_param, match_criteria,
   1758			 misc_parameters_2.metadata_reg_c_1, ESW_TUN_MASK);
   1759
   1760		num_vfs = esw->esw_funcs.num_vfs;
   1761		if (num_vfs) {
   1762			MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, ix);
   1763			MLX5_SET(create_flow_group_in, flow_group_in,
   1764				 end_flow_index, ix + num_vfs - 1);
   1765			ix += num_vfs;
   1766
   1767			g = mlx5_create_flow_group(fdb, flow_group_in);
   1768			if (IS_ERR(g)) {
   1769				err = PTR_ERR(g);
   1770				esw_warn(dev, "Failed to create send-to-vport meta flow group err(%d)\n",
   1771					 err);
   1772				goto send_vport_meta_err;
   1773			}
   1774			esw->fdb_table.offloads.send_to_vport_meta_grp = g;
   1775
   1776			err = mlx5_eswitch_add_send_to_vport_meta_rules(esw);
   1777			if (err)
   1778				goto meta_rule_err;
   1779		}
   1780	}
   1781
   1782	if (MLX5_CAP_ESW(esw->dev, merged_eswitch)) {
   1783		/* create peer esw miss group */
   1784		memset(flow_group_in, 0, inlen);
   1785
   1786		esw_set_flow_group_source_port(esw, flow_group_in);
   1787
   1788		if (!mlx5_eswitch_vport_match_metadata_enabled(esw)) {
   1789			match_criteria = MLX5_ADDR_OF(create_flow_group_in,
   1790						      flow_group_in,
   1791						      match_criteria);
   1792
   1793			MLX5_SET_TO_ONES(fte_match_param, match_criteria,
   1794					 misc_parameters.source_eswitch_owner_vhca_id);
   1795
   1796			MLX5_SET(create_flow_group_in, flow_group_in,
   1797				 source_eswitch_owner_vhca_id_valid, 1);
   1798		}
   1799
   1800		MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, ix);
   1801		MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index,
   1802			 ix + esw->total_vports - 1);
   1803		ix += esw->total_vports;
   1804
   1805		g = mlx5_create_flow_group(fdb, flow_group_in);
   1806		if (IS_ERR(g)) {
   1807			err = PTR_ERR(g);
   1808			esw_warn(dev, "Failed to create peer miss flow group err(%d)\n", err);
   1809			goto peer_miss_err;
   1810		}
   1811		esw->fdb_table.offloads.peer_miss_grp = g;
   1812	}
   1813
   1814	/* create miss group */
   1815	memset(flow_group_in, 0, inlen);
   1816	MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable,
   1817		 MLX5_MATCH_OUTER_HEADERS);
   1818	match_criteria = MLX5_ADDR_OF(create_flow_group_in, flow_group_in,
   1819				      match_criteria);
   1820	dmac = MLX5_ADDR_OF(fte_match_param, match_criteria,
   1821			    outer_headers.dmac_47_16);
   1822	dmac[0] = 0x01;
   1823
   1824	MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, ix);
   1825	MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index,
   1826		 ix + MLX5_ESW_MISS_FLOWS);
   1827
   1828	g = mlx5_create_flow_group(fdb, flow_group_in);
   1829	if (IS_ERR(g)) {
   1830		err = PTR_ERR(g);
   1831		esw_warn(dev, "Failed to create miss flow group err(%d)\n", err);
   1832		goto miss_err;
   1833	}
   1834	esw->fdb_table.offloads.miss_grp = g;
   1835
   1836	err = esw_add_fdb_miss_rule(esw);
   1837	if (err)
   1838		goto miss_rule_err;
   1839
   1840	kvfree(flow_group_in);
   1841	return 0;
   1842
   1843miss_rule_err:
   1844	mlx5_destroy_flow_group(esw->fdb_table.offloads.miss_grp);
   1845miss_err:
   1846	if (MLX5_CAP_ESW(esw->dev, merged_eswitch))
   1847		mlx5_destroy_flow_group(esw->fdb_table.offloads.peer_miss_grp);
   1848peer_miss_err:
   1849	mlx5_eswitch_del_send_to_vport_meta_rules(esw);
   1850meta_rule_err:
   1851	if (esw->fdb_table.offloads.send_to_vport_meta_grp)
   1852		mlx5_destroy_flow_group(esw->fdb_table.offloads.send_to_vport_meta_grp);
   1853send_vport_meta_err:
   1854	mlx5_destroy_flow_group(esw->fdb_table.offloads.send_to_vport_grp);
   1855send_vport_err:
   1856	esw_chains_destroy(esw, esw_chains(esw));
   1857fdb_chains_err:
   1858	mlx5_destroy_flow_table(esw->fdb_table.offloads.tc_miss_table);
   1859tc_miss_table_err:
   1860	mlx5_destroy_flow_table(esw->fdb_table.offloads.slow_fdb);
   1861slow_fdb_err:
   1862	/* Holds true only as long as DMFS is the default */
   1863	mlx5_flow_namespace_set_mode(root_ns, MLX5_FLOW_STEERING_MODE_DMFS);
   1864ns_err:
   1865	kvfree(flow_group_in);
   1866	return err;
   1867}
   1868
   1869static void esw_destroy_offloads_fdb_tables(struct mlx5_eswitch *esw)
   1870{
   1871	if (!esw->fdb_table.offloads.slow_fdb)
   1872		return;
   1873
   1874	esw_debug(esw->dev, "Destroy offloads FDB Tables\n");
   1875	mlx5_del_flow_rules(esw->fdb_table.offloads.miss_rule_multi);
   1876	mlx5_del_flow_rules(esw->fdb_table.offloads.miss_rule_uni);
   1877	mlx5_eswitch_del_send_to_vport_meta_rules(esw);
   1878	mlx5_destroy_flow_group(esw->fdb_table.offloads.send_to_vport_grp);
   1879	if (esw->fdb_table.offloads.send_to_vport_meta_grp)
   1880		mlx5_destroy_flow_group(esw->fdb_table.offloads.send_to_vport_meta_grp);
   1881	if (MLX5_CAP_ESW(esw->dev, merged_eswitch))
   1882		mlx5_destroy_flow_group(esw->fdb_table.offloads.peer_miss_grp);
   1883	mlx5_destroy_flow_group(esw->fdb_table.offloads.miss_grp);
   1884
   1885	esw_chains_destroy(esw, esw_chains(esw));
   1886
   1887	mlx5_destroy_flow_table(esw->fdb_table.offloads.tc_miss_table);
   1888	mlx5_destroy_flow_table(esw->fdb_table.offloads.slow_fdb);
   1889	/* Holds true only as long as DMFS is the default */
   1890	mlx5_flow_namespace_set_mode(esw->fdb_table.offloads.ns,
   1891				     MLX5_FLOW_STEERING_MODE_DMFS);
   1892	atomic64_set(&esw->user_count, 0);
   1893}
   1894
   1895static int esw_get_offloads_ft_size(struct mlx5_eswitch *esw)
   1896{
   1897	int nvports;
   1898
   1899	nvports = esw->total_vports + MLX5_ESW_MISS_FLOWS;
   1900	if (mlx5e_tc_int_port_supported(esw))
   1901		nvports += MLX5E_TC_MAX_INT_PORT_NUM;
   1902
   1903	return nvports;
   1904}
   1905
   1906static int esw_create_offloads_table(struct mlx5_eswitch *esw)
   1907{
   1908	struct mlx5_flow_table_attr ft_attr = {};
   1909	struct mlx5_core_dev *dev = esw->dev;
   1910	struct mlx5_flow_table *ft_offloads;
   1911	struct mlx5_flow_namespace *ns;
   1912	int err = 0;
   1913
   1914	ns = mlx5_get_flow_namespace(dev, MLX5_FLOW_NAMESPACE_OFFLOADS);
   1915	if (!ns) {
   1916		esw_warn(esw->dev, "Failed to get offloads flow namespace\n");
   1917		return -EOPNOTSUPP;
   1918	}
   1919
   1920	ft_attr.max_fte = esw_get_offloads_ft_size(esw);
   1921	ft_attr.prio = 1;
   1922
   1923	ft_offloads = mlx5_create_flow_table(ns, &ft_attr);
   1924	if (IS_ERR(ft_offloads)) {
   1925		err = PTR_ERR(ft_offloads);
   1926		esw_warn(esw->dev, "Failed to create offloads table, err %d\n", err);
   1927		return err;
   1928	}
   1929
   1930	esw->offloads.ft_offloads = ft_offloads;
   1931	return 0;
   1932}
   1933
   1934static void esw_destroy_offloads_table(struct mlx5_eswitch *esw)
   1935{
   1936	struct mlx5_esw_offload *offloads = &esw->offloads;
   1937
   1938	mlx5_destroy_flow_table(offloads->ft_offloads);
   1939}
   1940
   1941static int esw_create_vport_rx_group(struct mlx5_eswitch *esw)
   1942{
   1943	int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
   1944	struct mlx5_flow_group *g;
   1945	u32 *flow_group_in;
   1946	int nvports;
   1947	int err = 0;
   1948
   1949	nvports = esw_get_offloads_ft_size(esw);
   1950	flow_group_in = kvzalloc(inlen, GFP_KERNEL);
   1951	if (!flow_group_in)
   1952		return -ENOMEM;
   1953
   1954	/* create vport rx group */
   1955	esw_set_flow_group_source_port(esw, flow_group_in);
   1956
   1957	MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 0);
   1958	MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, nvports - 1);
   1959
   1960	g = mlx5_create_flow_group(esw->offloads.ft_offloads, flow_group_in);
   1961
   1962	if (IS_ERR(g)) {
   1963		err = PTR_ERR(g);
   1964		mlx5_core_warn(esw->dev, "Failed to create vport rx group err %d\n", err);
   1965		goto out;
   1966	}
   1967
   1968	esw->offloads.vport_rx_group = g;
   1969out:
   1970	kvfree(flow_group_in);
   1971	return err;
   1972}
   1973
   1974static void esw_destroy_vport_rx_group(struct mlx5_eswitch *esw)
   1975{
   1976	mlx5_destroy_flow_group(esw->offloads.vport_rx_group);
   1977}
   1978
   1979struct mlx5_flow_handle *
   1980mlx5_eswitch_create_vport_rx_rule(struct mlx5_eswitch *esw, u16 vport,
   1981				  struct mlx5_flow_destination *dest)
   1982{
   1983	struct mlx5_flow_act flow_act = {0};
   1984	struct mlx5_flow_handle *flow_rule;
   1985	struct mlx5_flow_spec *spec;
   1986	void *misc;
   1987
   1988	spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
   1989	if (!spec) {
   1990		flow_rule = ERR_PTR(-ENOMEM);
   1991		goto out;
   1992	}
   1993
   1994	if (mlx5_eswitch_vport_match_metadata_enabled(esw)) {
   1995		misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters_2);
   1996		MLX5_SET(fte_match_set_misc2, misc, metadata_reg_c_0,
   1997			 mlx5_eswitch_get_vport_metadata_for_match(esw, vport));
   1998
   1999		misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters_2);
   2000		MLX5_SET(fte_match_set_misc2, misc, metadata_reg_c_0,
   2001			 mlx5_eswitch_get_vport_metadata_mask());
   2002
   2003		spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS_2;
   2004	} else {
   2005		misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters);
   2006		MLX5_SET(fte_match_set_misc, misc, source_port, vport);
   2007
   2008		misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters);
   2009		MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port);
   2010
   2011		spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS;
   2012	}
   2013
   2014	flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
   2015	flow_rule = mlx5_add_flow_rules(esw->offloads.ft_offloads, spec,
   2016					&flow_act, dest, 1);
   2017	if (IS_ERR(flow_rule)) {
   2018		esw_warn(esw->dev, "fs offloads: Failed to add vport rx rule err %ld\n", PTR_ERR(flow_rule));
   2019		goto out;
   2020	}
   2021
   2022out:
   2023	kvfree(spec);
   2024	return flow_rule;
   2025}
   2026
   2027static int mlx5_eswitch_inline_mode_get(struct mlx5_eswitch *esw, u8 *mode)
   2028{
   2029	u8 prev_mlx5_mode, mlx5_mode = MLX5_INLINE_MODE_L2;
   2030	struct mlx5_core_dev *dev = esw->dev;
   2031	struct mlx5_vport *vport;
   2032	unsigned long i;
   2033
   2034	if (!MLX5_CAP_GEN(dev, vport_group_manager))
   2035		return -EOPNOTSUPP;
   2036
   2037	if (esw->mode == MLX5_ESWITCH_NONE)
   2038		return -EOPNOTSUPP;
   2039
   2040	switch (MLX5_CAP_ETH(dev, wqe_inline_mode)) {
   2041	case MLX5_CAP_INLINE_MODE_NOT_REQUIRED:
   2042		mlx5_mode = MLX5_INLINE_MODE_NONE;
   2043		goto out;
   2044	case MLX5_CAP_INLINE_MODE_L2:
   2045		mlx5_mode = MLX5_INLINE_MODE_L2;
   2046		goto out;
   2047	case MLX5_CAP_INLINE_MODE_VPORT_CONTEXT:
   2048		goto query_vports;
   2049	}
   2050
   2051query_vports:
   2052	mlx5_query_nic_vport_min_inline(dev, esw->first_host_vport, &prev_mlx5_mode);
   2053	mlx5_esw_for_each_host_func_vport(esw, i, vport, esw->esw_funcs.num_vfs) {
   2054		mlx5_query_nic_vport_min_inline(dev, vport->vport, &mlx5_mode);
   2055		if (prev_mlx5_mode != mlx5_mode)
   2056			return -EINVAL;
   2057		prev_mlx5_mode = mlx5_mode;
   2058	}
   2059
   2060out:
   2061	*mode = mlx5_mode;
   2062	return 0;
   2063}
   2064
   2065static void esw_destroy_restore_table(struct mlx5_eswitch *esw)
   2066{
   2067	struct mlx5_esw_offload *offloads = &esw->offloads;
   2068
   2069	if (!mlx5_eswitch_reg_c1_loopback_supported(esw))
   2070		return;
   2071
   2072	mlx5_modify_header_dealloc(esw->dev, offloads->restore_copy_hdr_id);
   2073	mlx5_destroy_flow_group(offloads->restore_group);
   2074	mlx5_destroy_flow_table(offloads->ft_offloads_restore);
   2075}
   2076
   2077static int esw_create_restore_table(struct mlx5_eswitch *esw)
   2078{
   2079	u8 modact[MLX5_UN_SZ_BYTES(set_add_copy_action_in_auto)] = {};
   2080	int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
   2081	struct mlx5_flow_table_attr ft_attr = {};
   2082	struct mlx5_core_dev *dev = esw->dev;
   2083	struct mlx5_flow_namespace *ns;
   2084	struct mlx5_modify_hdr *mod_hdr;
   2085	void *match_criteria, *misc;
   2086	struct mlx5_flow_table *ft;
   2087	struct mlx5_flow_group *g;
   2088	u32 *flow_group_in;
   2089	int err = 0;
   2090
   2091	if (!mlx5_eswitch_reg_c1_loopback_supported(esw))
   2092		return 0;
   2093
   2094	ns = mlx5_get_flow_namespace(dev, MLX5_FLOW_NAMESPACE_OFFLOADS);
   2095	if (!ns) {
   2096		esw_warn(esw->dev, "Failed to get offloads flow namespace\n");
   2097		return -EOPNOTSUPP;
   2098	}
   2099
   2100	flow_group_in = kvzalloc(inlen, GFP_KERNEL);
   2101	if (!flow_group_in) {
   2102		err = -ENOMEM;
   2103		goto out_free;
   2104	}
   2105
   2106	ft_attr.max_fte = 1 << ESW_REG_C0_USER_DATA_METADATA_BITS;
   2107	ft = mlx5_create_flow_table(ns, &ft_attr);
   2108	if (IS_ERR(ft)) {
   2109		err = PTR_ERR(ft);
   2110		esw_warn(esw->dev, "Failed to create restore table, err %d\n",
   2111			 err);
   2112		goto out_free;
   2113	}
   2114
   2115	match_criteria = MLX5_ADDR_OF(create_flow_group_in, flow_group_in,
   2116				      match_criteria);
   2117	misc = MLX5_ADDR_OF(fte_match_param, match_criteria,
   2118			    misc_parameters_2);
   2119
   2120	MLX5_SET(fte_match_set_misc2, misc, metadata_reg_c_0,
   2121		 ESW_REG_C0_USER_DATA_METADATA_MASK);
   2122	MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 0);
   2123	MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index,
   2124		 ft_attr.max_fte - 1);
   2125	MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable,
   2126		 MLX5_MATCH_MISC_PARAMETERS_2);
   2127	g = mlx5_create_flow_group(ft, flow_group_in);
   2128	if (IS_ERR(g)) {
   2129		err = PTR_ERR(g);
   2130		esw_warn(dev, "Failed to create restore flow group, err: %d\n",
   2131			 err);
   2132		goto err_group;
   2133	}
   2134
   2135	MLX5_SET(copy_action_in, modact, action_type, MLX5_ACTION_TYPE_COPY);
   2136	MLX5_SET(copy_action_in, modact, src_field,
   2137		 MLX5_ACTION_IN_FIELD_METADATA_REG_C_1);
   2138	MLX5_SET(copy_action_in, modact, dst_field,
   2139		 MLX5_ACTION_IN_FIELD_METADATA_REG_B);
   2140	mod_hdr = mlx5_modify_header_alloc(esw->dev,
   2141					   MLX5_FLOW_NAMESPACE_KERNEL, 1,
   2142					   modact);
   2143	if (IS_ERR(mod_hdr)) {
   2144		err = PTR_ERR(mod_hdr);
   2145		esw_warn(dev, "Failed to create restore mod header, err: %d\n",
   2146			 err);
   2147		goto err_mod_hdr;
   2148	}
   2149
   2150	esw->offloads.ft_offloads_restore = ft;
   2151	esw->offloads.restore_group = g;
   2152	esw->offloads.restore_copy_hdr_id = mod_hdr;
   2153
   2154	kvfree(flow_group_in);
   2155
   2156	return 0;
   2157
   2158err_mod_hdr:
   2159	mlx5_destroy_flow_group(g);
   2160err_group:
   2161	mlx5_destroy_flow_table(ft);
   2162out_free:
   2163	kvfree(flow_group_in);
   2164
   2165	return err;
   2166}
   2167
   2168static int esw_offloads_start(struct mlx5_eswitch *esw,
   2169			      struct netlink_ext_ack *extack)
   2170{
   2171	int err, err1;
   2172
   2173	mlx5_eswitch_disable_locked(esw, false);
   2174	err = mlx5_eswitch_enable_locked(esw, MLX5_ESWITCH_OFFLOADS,
   2175					 esw->dev->priv.sriov.num_vfs);
   2176	if (err) {
   2177		NL_SET_ERR_MSG_MOD(extack,
   2178				   "Failed setting eswitch to offloads");
   2179		err1 = mlx5_eswitch_enable_locked(esw, MLX5_ESWITCH_LEGACY,
   2180						  MLX5_ESWITCH_IGNORE_NUM_VFS);
   2181		if (err1) {
   2182			NL_SET_ERR_MSG_MOD(extack,
   2183					   "Failed setting eswitch back to legacy");
   2184		}
   2185	}
   2186	if (esw->offloads.inline_mode == MLX5_INLINE_MODE_NONE) {
   2187		if (mlx5_eswitch_inline_mode_get(esw,
   2188						 &esw->offloads.inline_mode)) {
   2189			esw->offloads.inline_mode = MLX5_INLINE_MODE_L2;
   2190			NL_SET_ERR_MSG_MOD(extack,
   2191					   "Inline mode is different between vports");
   2192		}
   2193	}
   2194	return err;
   2195}
   2196
   2197static void mlx5_esw_offloads_rep_mark_set(struct mlx5_eswitch *esw,
   2198					   struct mlx5_eswitch_rep *rep,
   2199					   xa_mark_t mark)
   2200{
   2201	bool mark_set;
   2202
   2203	/* Copy the mark from vport to its rep */
   2204	mark_set = xa_get_mark(&esw->vports, rep->vport, mark);
   2205	if (mark_set)
   2206		xa_set_mark(&esw->offloads.vport_reps, rep->vport, mark);
   2207}
   2208
   2209static int mlx5_esw_offloads_rep_init(struct mlx5_eswitch *esw, const struct mlx5_vport *vport)
   2210{
   2211	struct mlx5_eswitch_rep *rep;
   2212	int rep_type;
   2213	int err;
   2214
   2215	rep = kzalloc(sizeof(*rep), GFP_KERNEL);
   2216	if (!rep)
   2217		return -ENOMEM;
   2218
   2219	rep->vport = vport->vport;
   2220	rep->vport_index = vport->index;
   2221	for (rep_type = 0; rep_type < NUM_REP_TYPES; rep_type++)
   2222		atomic_set(&rep->rep_data[rep_type].state, REP_UNREGISTERED);
   2223
   2224	err = xa_insert(&esw->offloads.vport_reps, rep->vport, rep, GFP_KERNEL);
   2225	if (err)
   2226		goto insert_err;
   2227
   2228	mlx5_esw_offloads_rep_mark_set(esw, rep, MLX5_ESW_VPT_HOST_FN);
   2229	mlx5_esw_offloads_rep_mark_set(esw, rep, MLX5_ESW_VPT_VF);
   2230	mlx5_esw_offloads_rep_mark_set(esw, rep, MLX5_ESW_VPT_SF);
   2231	return 0;
   2232
   2233insert_err:
   2234	kfree(rep);
   2235	return err;
   2236}
   2237
   2238static void mlx5_esw_offloads_rep_cleanup(struct mlx5_eswitch *esw,
   2239					  struct mlx5_eswitch_rep *rep)
   2240{
   2241	xa_erase(&esw->offloads.vport_reps, rep->vport);
   2242	kfree(rep);
   2243}
   2244
   2245void esw_offloads_cleanup_reps(struct mlx5_eswitch *esw)
   2246{
   2247	struct mlx5_eswitch_rep *rep;
   2248	unsigned long i;
   2249
   2250	mlx5_esw_for_each_rep(esw, i, rep)
   2251		mlx5_esw_offloads_rep_cleanup(esw, rep);
   2252	xa_destroy(&esw->offloads.vport_reps);
   2253}
   2254
   2255int esw_offloads_init_reps(struct mlx5_eswitch *esw)
   2256{
   2257	struct mlx5_vport *vport;
   2258	unsigned long i;
   2259	int err;
   2260
   2261	xa_init(&esw->offloads.vport_reps);
   2262
   2263	mlx5_esw_for_each_vport(esw, i, vport) {
   2264		err = mlx5_esw_offloads_rep_init(esw, vport);
   2265		if (err)
   2266			goto err;
   2267	}
   2268	return 0;
   2269
   2270err:
   2271	esw_offloads_cleanup_reps(esw);
   2272	return err;
   2273}
   2274
   2275static void __esw_offloads_unload_rep(struct mlx5_eswitch *esw,
   2276				      struct mlx5_eswitch_rep *rep, u8 rep_type)
   2277{
   2278	if (atomic_cmpxchg(&rep->rep_data[rep_type].state,
   2279			   REP_LOADED, REP_REGISTERED) == REP_LOADED)
   2280		esw->offloads.rep_ops[rep_type]->unload(rep);
   2281}
   2282
   2283static void __unload_reps_sf_vport(struct mlx5_eswitch *esw, u8 rep_type)
   2284{
   2285	struct mlx5_eswitch_rep *rep;
   2286	unsigned long i;
   2287
   2288	mlx5_esw_for_each_sf_rep(esw, i, rep)
   2289		__esw_offloads_unload_rep(esw, rep, rep_type);
   2290}
   2291
   2292static void __unload_reps_all_vport(struct mlx5_eswitch *esw, u8 rep_type)
   2293{
   2294	struct mlx5_eswitch_rep *rep;
   2295	unsigned long i;
   2296
   2297	__unload_reps_sf_vport(esw, rep_type);
   2298
   2299	mlx5_esw_for_each_vf_rep(esw, i, rep)
   2300		__esw_offloads_unload_rep(esw, rep, rep_type);
   2301
   2302	if (mlx5_ecpf_vport_exists(esw->dev)) {
   2303		rep = mlx5_eswitch_get_rep(esw, MLX5_VPORT_ECPF);
   2304		__esw_offloads_unload_rep(esw, rep, rep_type);
   2305	}
   2306
   2307	if (mlx5_core_is_ecpf_esw_manager(esw->dev)) {
   2308		rep = mlx5_eswitch_get_rep(esw, MLX5_VPORT_PF);
   2309		__esw_offloads_unload_rep(esw, rep, rep_type);
   2310	}
   2311
   2312	rep = mlx5_eswitch_get_rep(esw, MLX5_VPORT_UPLINK);
   2313	__esw_offloads_unload_rep(esw, rep, rep_type);
   2314}
   2315
   2316int mlx5_esw_offloads_rep_load(struct mlx5_eswitch *esw, u16 vport_num)
   2317{
   2318	struct mlx5_eswitch_rep *rep;
   2319	int rep_type;
   2320	int err;
   2321
   2322	rep = mlx5_eswitch_get_rep(esw, vport_num);
   2323	for (rep_type = 0; rep_type < NUM_REP_TYPES; rep_type++)
   2324		if (atomic_cmpxchg(&rep->rep_data[rep_type].state,
   2325				   REP_REGISTERED, REP_LOADED) == REP_REGISTERED) {
   2326			err = esw->offloads.rep_ops[rep_type]->load(esw->dev, rep);
   2327			if (err)
   2328				goto err_reps;
   2329		}
   2330
   2331	return 0;
   2332
   2333err_reps:
   2334	atomic_set(&rep->rep_data[rep_type].state, REP_REGISTERED);
   2335	for (--rep_type; rep_type >= 0; rep_type--)
   2336		__esw_offloads_unload_rep(esw, rep, rep_type);
   2337	return err;
   2338}
   2339
   2340void mlx5_esw_offloads_rep_unload(struct mlx5_eswitch *esw, u16 vport_num)
   2341{
   2342	struct mlx5_eswitch_rep *rep;
   2343	int rep_type;
   2344
   2345	rep = mlx5_eswitch_get_rep(esw, vport_num);
   2346	for (rep_type = NUM_REP_TYPES - 1; rep_type >= 0; rep_type--)
   2347		__esw_offloads_unload_rep(esw, rep, rep_type);
   2348}
   2349
   2350int esw_offloads_load_rep(struct mlx5_eswitch *esw, u16 vport_num)
   2351{
   2352	int err;
   2353
   2354	if (esw->mode != MLX5_ESWITCH_OFFLOADS)
   2355		return 0;
   2356
   2357	if (vport_num != MLX5_VPORT_UPLINK) {
   2358		err = mlx5_esw_offloads_devlink_port_register(esw, vport_num);
   2359		if (err)
   2360			return err;
   2361	}
   2362
   2363	err = mlx5_esw_offloads_rep_load(esw, vport_num);
   2364	if (err)
   2365		goto load_err;
   2366	return err;
   2367
   2368load_err:
   2369	if (vport_num != MLX5_VPORT_UPLINK)
   2370		mlx5_esw_offloads_devlink_port_unregister(esw, vport_num);
   2371	return err;
   2372}
   2373
   2374void esw_offloads_unload_rep(struct mlx5_eswitch *esw, u16 vport_num)
   2375{
   2376	if (esw->mode != MLX5_ESWITCH_OFFLOADS)
   2377		return;
   2378
   2379	mlx5_esw_offloads_rep_unload(esw, vport_num);
   2380
   2381	if (vport_num != MLX5_VPORT_UPLINK)
   2382		mlx5_esw_offloads_devlink_port_unregister(esw, vport_num);
   2383}
   2384
   2385static int esw_set_slave_root_fdb(struct mlx5_core_dev *master,
   2386				  struct mlx5_core_dev *slave)
   2387{
   2388	u32 in[MLX5_ST_SZ_DW(set_flow_table_root_in)]   = {};
   2389	u32 out[MLX5_ST_SZ_DW(set_flow_table_root_out)] = {};
   2390	struct mlx5_flow_root_namespace *root;
   2391	struct mlx5_flow_namespace *ns;
   2392	int err;
   2393
   2394	MLX5_SET(set_flow_table_root_in, in, opcode,
   2395		 MLX5_CMD_OP_SET_FLOW_TABLE_ROOT);
   2396	MLX5_SET(set_flow_table_root_in, in, table_type,
   2397		 FS_FT_FDB);
   2398
   2399	if (master) {
   2400		ns = mlx5_get_flow_namespace(master,
   2401					     MLX5_FLOW_NAMESPACE_FDB);
   2402		root = find_root(&ns->node);
   2403		mutex_lock(&root->chain_lock);
   2404		MLX5_SET(set_flow_table_root_in, in,
   2405			 table_eswitch_owner_vhca_id_valid, 1);
   2406		MLX5_SET(set_flow_table_root_in, in,
   2407			 table_eswitch_owner_vhca_id,
   2408			 MLX5_CAP_GEN(master, vhca_id));
   2409		MLX5_SET(set_flow_table_root_in, in, table_id,
   2410			 root->root_ft->id);
   2411	} else {
   2412		ns = mlx5_get_flow_namespace(slave,
   2413					     MLX5_FLOW_NAMESPACE_FDB);
   2414		root = find_root(&ns->node);
   2415		mutex_lock(&root->chain_lock);
   2416		MLX5_SET(set_flow_table_root_in, in, table_id,
   2417			 root->root_ft->id);
   2418	}
   2419
   2420	err = mlx5_cmd_exec(slave, in, sizeof(in), out, sizeof(out));
   2421	mutex_unlock(&root->chain_lock);
   2422
   2423	return err;
   2424}
   2425
   2426static int __esw_set_master_egress_rule(struct mlx5_core_dev *master,
   2427					struct mlx5_core_dev *slave,
   2428					struct mlx5_vport *vport,
   2429					struct mlx5_flow_table *acl)
   2430{
   2431	struct mlx5_flow_handle *flow_rule = NULL;
   2432	struct mlx5_flow_destination dest = {};
   2433	struct mlx5_flow_act flow_act = {};
   2434	struct mlx5_flow_spec *spec;
   2435	int err = 0;
   2436	void *misc;
   2437
   2438	spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
   2439	if (!spec)
   2440		return -ENOMEM;
   2441
   2442	spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS;
   2443	misc = MLX5_ADDR_OF(fte_match_param, spec->match_value,
   2444			    misc_parameters);
   2445	MLX5_SET(fte_match_set_misc, misc, source_port, MLX5_VPORT_UPLINK);
   2446	MLX5_SET(fte_match_set_misc, misc, source_eswitch_owner_vhca_id,
   2447		 MLX5_CAP_GEN(slave, vhca_id));
   2448
   2449	misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters);
   2450	MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port);
   2451	MLX5_SET_TO_ONES(fte_match_set_misc, misc,
   2452			 source_eswitch_owner_vhca_id);
   2453
   2454	flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
   2455	dest.type = MLX5_FLOW_DESTINATION_TYPE_VPORT;
   2456	dest.vport.num = slave->priv.eswitch->manager_vport;
   2457	dest.vport.vhca_id = MLX5_CAP_GEN(slave, vhca_id);
   2458	dest.vport.flags |= MLX5_FLOW_DEST_VPORT_VHCA_ID;
   2459
   2460	flow_rule = mlx5_add_flow_rules(acl, spec, &flow_act,
   2461					&dest, 1);
   2462	if (IS_ERR(flow_rule))
   2463		err = PTR_ERR(flow_rule);
   2464	else
   2465		vport->egress.offloads.bounce_rule = flow_rule;
   2466
   2467	kvfree(spec);
   2468	return err;
   2469}
   2470
   2471static int esw_set_master_egress_rule(struct mlx5_core_dev *master,
   2472				      struct mlx5_core_dev *slave)
   2473{
   2474	int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
   2475	struct mlx5_eswitch *esw = master->priv.eswitch;
   2476	struct mlx5_flow_table_attr ft_attr = {
   2477		.max_fte = 1, .prio = 0, .level = 0,
   2478		.flags = MLX5_FLOW_TABLE_OTHER_VPORT,
   2479	};
   2480	struct mlx5_flow_namespace *egress_ns;
   2481	struct mlx5_flow_table *acl;
   2482	struct mlx5_flow_group *g;
   2483	struct mlx5_vport *vport;
   2484	void *match_criteria;
   2485	u32 *flow_group_in;
   2486	int err;
   2487
   2488	vport = mlx5_eswitch_get_vport(esw, esw->manager_vport);
   2489	if (IS_ERR(vport))
   2490		return PTR_ERR(vport);
   2491
   2492	egress_ns = mlx5_get_flow_vport_acl_namespace(master,
   2493						      MLX5_FLOW_NAMESPACE_ESW_EGRESS,
   2494						      vport->index);
   2495	if (!egress_ns)
   2496		return -EINVAL;
   2497
   2498	if (vport->egress.acl)
   2499		return -EINVAL;
   2500
   2501	flow_group_in = kvzalloc(inlen, GFP_KERNEL);
   2502	if (!flow_group_in)
   2503		return -ENOMEM;
   2504
   2505	acl = mlx5_create_vport_flow_table(egress_ns, &ft_attr, vport->vport);
   2506	if (IS_ERR(acl)) {
   2507		err = PTR_ERR(acl);
   2508		goto out;
   2509	}
   2510
   2511	match_criteria = MLX5_ADDR_OF(create_flow_group_in, flow_group_in,
   2512				      match_criteria);
   2513	MLX5_SET_TO_ONES(fte_match_param, match_criteria,
   2514			 misc_parameters.source_port);
   2515	MLX5_SET_TO_ONES(fte_match_param, match_criteria,
   2516			 misc_parameters.source_eswitch_owner_vhca_id);
   2517	MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable,
   2518		 MLX5_MATCH_MISC_PARAMETERS);
   2519
   2520	MLX5_SET(create_flow_group_in, flow_group_in,
   2521		 source_eswitch_owner_vhca_id_valid, 1);
   2522	MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 0);
   2523	MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, 0);
   2524
   2525	g = mlx5_create_flow_group(acl, flow_group_in);
   2526	if (IS_ERR(g)) {
   2527		err = PTR_ERR(g);
   2528		goto err_group;
   2529	}
   2530
   2531	err = __esw_set_master_egress_rule(master, slave, vport, acl);
   2532	if (err)
   2533		goto err_rule;
   2534
   2535	vport->egress.acl = acl;
   2536	vport->egress.offloads.bounce_grp = g;
   2537
   2538	kvfree(flow_group_in);
   2539
   2540	return 0;
   2541
   2542err_rule:
   2543	mlx5_destroy_flow_group(g);
   2544err_group:
   2545	mlx5_destroy_flow_table(acl);
   2546out:
   2547	kvfree(flow_group_in);
   2548	return err;
   2549}
   2550
   2551static void esw_unset_master_egress_rule(struct mlx5_core_dev *dev)
   2552{
   2553	struct mlx5_vport *vport;
   2554
   2555	vport = mlx5_eswitch_get_vport(dev->priv.eswitch,
   2556				       dev->priv.eswitch->manager_vport);
   2557
   2558	esw_acl_egress_ofld_cleanup(vport);
   2559}
   2560
   2561int mlx5_eswitch_offloads_config_single_fdb(struct mlx5_eswitch *master_esw,
   2562					    struct mlx5_eswitch *slave_esw)
   2563{
   2564	int err;
   2565
   2566	err = esw_set_slave_root_fdb(master_esw->dev,
   2567				     slave_esw->dev);
   2568	if (err)
   2569		return err;
   2570
   2571	err = esw_set_master_egress_rule(master_esw->dev,
   2572					 slave_esw->dev);
   2573	if (err)
   2574		goto err_acl;
   2575
   2576	return err;
   2577
   2578err_acl:
   2579	esw_set_slave_root_fdb(NULL, slave_esw->dev);
   2580
   2581	return err;
   2582}
   2583
   2584void mlx5_eswitch_offloads_destroy_single_fdb(struct mlx5_eswitch *master_esw,
   2585					      struct mlx5_eswitch *slave_esw)
   2586{
   2587	esw_unset_master_egress_rule(master_esw->dev);
   2588	esw_set_slave_root_fdb(NULL, slave_esw->dev);
   2589}
   2590
   2591#define ESW_OFFLOADS_DEVCOM_PAIR	(0)
   2592#define ESW_OFFLOADS_DEVCOM_UNPAIR	(1)
   2593
   2594static void mlx5_esw_offloads_rep_event_unpair(struct mlx5_eswitch *esw)
   2595{
   2596	const struct mlx5_eswitch_rep_ops *ops;
   2597	struct mlx5_eswitch_rep *rep;
   2598	unsigned long i;
   2599	u8 rep_type;
   2600
   2601	mlx5_esw_for_each_rep(esw, i, rep) {
   2602		rep_type = NUM_REP_TYPES;
   2603		while (rep_type--) {
   2604			ops = esw->offloads.rep_ops[rep_type];
   2605			if (atomic_read(&rep->rep_data[rep_type].state) == REP_LOADED &&
   2606			    ops->event)
   2607				ops->event(esw, rep, MLX5_SWITCHDEV_EVENT_UNPAIR, NULL);
   2608		}
   2609	}
   2610}
   2611
   2612static void mlx5_esw_offloads_unpair(struct mlx5_eswitch *esw)
   2613{
   2614#if IS_ENABLED(CONFIG_MLX5_CLS_ACT)
   2615	mlx5e_tc_clean_fdb_peer_flows(esw);
   2616#endif
   2617	mlx5_esw_offloads_rep_event_unpair(esw);
   2618	esw_del_fdb_peer_miss_rules(esw);
   2619}
   2620
   2621static int mlx5_esw_offloads_pair(struct mlx5_eswitch *esw,
   2622				  struct mlx5_eswitch *peer_esw)
   2623{
   2624	const struct mlx5_eswitch_rep_ops *ops;
   2625	struct mlx5_eswitch_rep *rep;
   2626	unsigned long i;
   2627	u8 rep_type;
   2628	int err;
   2629
   2630	err = esw_add_fdb_peer_miss_rules(esw, peer_esw->dev);
   2631	if (err)
   2632		return err;
   2633
   2634	mlx5_esw_for_each_rep(esw, i, rep) {
   2635		for (rep_type = 0; rep_type < NUM_REP_TYPES; rep_type++) {
   2636			ops = esw->offloads.rep_ops[rep_type];
   2637			if (atomic_read(&rep->rep_data[rep_type].state) == REP_LOADED &&
   2638			    ops->event) {
   2639				err = ops->event(esw, rep, MLX5_SWITCHDEV_EVENT_PAIR, peer_esw);
   2640				if (err)
   2641					goto err_out;
   2642			}
   2643		}
   2644	}
   2645
   2646	return 0;
   2647
   2648err_out:
   2649	mlx5_esw_offloads_unpair(esw);
   2650	return err;
   2651}
   2652
   2653static int mlx5_esw_offloads_set_ns_peer(struct mlx5_eswitch *esw,
   2654					 struct mlx5_eswitch *peer_esw,
   2655					 bool pair)
   2656{
   2657	struct mlx5_flow_root_namespace *peer_ns;
   2658	struct mlx5_flow_root_namespace *ns;
   2659	int err;
   2660
   2661	peer_ns = peer_esw->dev->priv.steering->fdb_root_ns;
   2662	ns = esw->dev->priv.steering->fdb_root_ns;
   2663
   2664	if (pair) {
   2665		err = mlx5_flow_namespace_set_peer(ns, peer_ns);
   2666		if (err)
   2667			return err;
   2668
   2669		err = mlx5_flow_namespace_set_peer(peer_ns, ns);
   2670		if (err) {
   2671			mlx5_flow_namespace_set_peer(ns, NULL);
   2672			return err;
   2673		}
   2674	} else {
   2675		mlx5_flow_namespace_set_peer(ns, NULL);
   2676		mlx5_flow_namespace_set_peer(peer_ns, NULL);
   2677	}
   2678
   2679	return 0;
   2680}
   2681
   2682static int mlx5_esw_offloads_devcom_event(int event,
   2683					  void *my_data,
   2684					  void *event_data)
   2685{
   2686	struct mlx5_eswitch *esw = my_data;
   2687	struct mlx5_devcom *devcom = esw->dev->priv.devcom;
   2688	struct mlx5_eswitch *peer_esw = event_data;
   2689	int err;
   2690
   2691	switch (event) {
   2692	case ESW_OFFLOADS_DEVCOM_PAIR:
   2693		if (mlx5_eswitch_vport_match_metadata_enabled(esw) !=
   2694		    mlx5_eswitch_vport_match_metadata_enabled(peer_esw))
   2695			break;
   2696
   2697		err = mlx5_esw_offloads_set_ns_peer(esw, peer_esw, true);
   2698		if (err)
   2699			goto err_out;
   2700		err = mlx5_esw_offloads_pair(esw, peer_esw);
   2701		if (err)
   2702			goto err_peer;
   2703
   2704		err = mlx5_esw_offloads_pair(peer_esw, esw);
   2705		if (err)
   2706			goto err_pair;
   2707
   2708		mlx5_devcom_set_paired(devcom, MLX5_DEVCOM_ESW_OFFLOADS, true);
   2709		break;
   2710
   2711	case ESW_OFFLOADS_DEVCOM_UNPAIR:
   2712		if (!mlx5_devcom_is_paired(devcom, MLX5_DEVCOM_ESW_OFFLOADS))
   2713			break;
   2714
   2715		mlx5_devcom_set_paired(devcom, MLX5_DEVCOM_ESW_OFFLOADS, false);
   2716		mlx5_esw_offloads_unpair(peer_esw);
   2717		mlx5_esw_offloads_unpair(esw);
   2718		mlx5_esw_offloads_set_ns_peer(esw, peer_esw, false);
   2719		break;
   2720	}
   2721
   2722	return 0;
   2723
   2724err_pair:
   2725	mlx5_esw_offloads_unpair(esw);
   2726err_peer:
   2727	mlx5_esw_offloads_set_ns_peer(esw, peer_esw, false);
   2728err_out:
   2729	mlx5_core_err(esw->dev, "esw offloads devcom event failure, event %u err %d",
   2730		      event, err);
   2731	return err;
   2732}
   2733
   2734static void esw_offloads_devcom_init(struct mlx5_eswitch *esw)
   2735{
   2736	struct mlx5_devcom *devcom = esw->dev->priv.devcom;
   2737
   2738	INIT_LIST_HEAD(&esw->offloads.peer_flows);
   2739	mutex_init(&esw->offloads.peer_mutex);
   2740
   2741	if (!MLX5_CAP_ESW(esw->dev, merged_eswitch))
   2742		return;
   2743
   2744	if (!mlx5_is_lag_supported(esw->dev))
   2745		return;
   2746
   2747	mlx5_devcom_register_component(devcom,
   2748				       MLX5_DEVCOM_ESW_OFFLOADS,
   2749				       mlx5_esw_offloads_devcom_event,
   2750				       esw);
   2751
   2752	mlx5_devcom_send_event(devcom,
   2753			       MLX5_DEVCOM_ESW_OFFLOADS,
   2754			       ESW_OFFLOADS_DEVCOM_PAIR, esw);
   2755}
   2756
   2757static void esw_offloads_devcom_cleanup(struct mlx5_eswitch *esw)
   2758{
   2759	struct mlx5_devcom *devcom = esw->dev->priv.devcom;
   2760
   2761	if (!MLX5_CAP_ESW(esw->dev, merged_eswitch))
   2762		return;
   2763
   2764	if (!mlx5_is_lag_supported(esw->dev))
   2765		return;
   2766
   2767	mlx5_devcom_send_event(devcom, MLX5_DEVCOM_ESW_OFFLOADS,
   2768			       ESW_OFFLOADS_DEVCOM_UNPAIR, esw);
   2769
   2770	mlx5_devcom_unregister_component(devcom, MLX5_DEVCOM_ESW_OFFLOADS);
   2771}
   2772
   2773bool mlx5_esw_vport_match_metadata_supported(const struct mlx5_eswitch *esw)
   2774{
   2775	if (!MLX5_CAP_ESW(esw->dev, esw_uplink_ingress_acl))
   2776		return false;
   2777
   2778	if (!(MLX5_CAP_ESW_FLOWTABLE(esw->dev, fdb_to_vport_reg_c_id) &
   2779	      MLX5_FDB_TO_VPORT_REG_C_0))
   2780		return false;
   2781
   2782	if (!MLX5_CAP_ESW_FLOWTABLE(esw->dev, flow_source))
   2783		return false;
   2784
   2785	return true;
   2786}
   2787
   2788#define MLX5_ESW_METADATA_RSVD_UPLINK 1
   2789
   2790/* Share the same metadata for uplink's. This is fine because:
   2791 * (a) In shared FDB mode (LAG) both uplink's are treated the
   2792 *     same and tagged with the same metadata.
   2793 * (b) In non shared FDB mode, packets from physical port0
   2794 *     cannot hit eswitch of PF1 and vice versa.
   2795 */
   2796static u32 mlx5_esw_match_metadata_reserved(struct mlx5_eswitch *esw)
   2797{
   2798	return MLX5_ESW_METADATA_RSVD_UPLINK;
   2799}
   2800
   2801u32 mlx5_esw_match_metadata_alloc(struct mlx5_eswitch *esw)
   2802{
   2803	u32 vport_end_ida = (1 << ESW_VPORT_BITS) - 1;
   2804	/* Reserve 0xf for internal port offload */
   2805	u32 max_pf_num = (1 << ESW_PFNUM_BITS) - 2;
   2806	u32 pf_num;
   2807	int id;
   2808
   2809	/* Only 4 bits of pf_num */
   2810	pf_num = mlx5_get_dev_index(esw->dev);
   2811	if (pf_num > max_pf_num)
   2812		return 0;
   2813
   2814	/* Metadata is 4 bits of PFNUM and 12 bits of unique id */
   2815	/* Use only non-zero vport_id (2-4095) for all PF's */
   2816	id = ida_alloc_range(&esw->offloads.vport_metadata_ida,
   2817			     MLX5_ESW_METADATA_RSVD_UPLINK + 1,
   2818			     vport_end_ida, GFP_KERNEL);
   2819	if (id < 0)
   2820		return 0;
   2821	id = (pf_num << ESW_VPORT_BITS) | id;
   2822	return id;
   2823}
   2824
   2825void mlx5_esw_match_metadata_free(struct mlx5_eswitch *esw, u32 metadata)
   2826{
   2827	u32 vport_bit_mask = (1 << ESW_VPORT_BITS) - 1;
   2828
   2829	/* Metadata contains only 12 bits of actual ida id */
   2830	ida_free(&esw->offloads.vport_metadata_ida, metadata & vport_bit_mask);
   2831}
   2832
   2833static int esw_offloads_vport_metadata_setup(struct mlx5_eswitch *esw,
   2834					     struct mlx5_vport *vport)
   2835{
   2836	if (vport->vport == MLX5_VPORT_UPLINK)
   2837		vport->default_metadata = mlx5_esw_match_metadata_reserved(esw);
   2838	else
   2839		vport->default_metadata = mlx5_esw_match_metadata_alloc(esw);
   2840
   2841	vport->metadata = vport->default_metadata;
   2842	return vport->metadata ? 0 : -ENOSPC;
   2843}
   2844
   2845static void esw_offloads_vport_metadata_cleanup(struct mlx5_eswitch *esw,
   2846						struct mlx5_vport *vport)
   2847{
   2848	if (!vport->default_metadata)
   2849		return;
   2850
   2851	if (vport->vport == MLX5_VPORT_UPLINK)
   2852		return;
   2853
   2854	WARN_ON(vport->metadata != vport->default_metadata);
   2855	mlx5_esw_match_metadata_free(esw, vport->default_metadata);
   2856}
   2857
   2858static void esw_offloads_metadata_uninit(struct mlx5_eswitch *esw)
   2859{
   2860	struct mlx5_vport *vport;
   2861	unsigned long i;
   2862
   2863	if (!mlx5_eswitch_vport_match_metadata_enabled(esw))
   2864		return;
   2865
   2866	mlx5_esw_for_each_vport(esw, i, vport)
   2867		esw_offloads_vport_metadata_cleanup(esw, vport);
   2868}
   2869
   2870static int esw_offloads_metadata_init(struct mlx5_eswitch *esw)
   2871{
   2872	struct mlx5_vport *vport;
   2873	unsigned long i;
   2874	int err;
   2875
   2876	if (!mlx5_eswitch_vport_match_metadata_enabled(esw))
   2877		return 0;
   2878
   2879	mlx5_esw_for_each_vport(esw, i, vport) {
   2880		err = esw_offloads_vport_metadata_setup(esw, vport);
   2881		if (err)
   2882			goto metadata_err;
   2883	}
   2884
   2885	return 0;
   2886
   2887metadata_err:
   2888	esw_offloads_metadata_uninit(esw);
   2889	return err;
   2890}
   2891
   2892int mlx5_esw_offloads_vport_metadata_set(struct mlx5_eswitch *esw, bool enable)
   2893{
   2894	int err = 0;
   2895
   2896	down_write(&esw->mode_lock);
   2897	if (esw->mode != MLX5_ESWITCH_NONE) {
   2898		err = -EBUSY;
   2899		goto done;
   2900	}
   2901	if (!mlx5_esw_vport_match_metadata_supported(esw)) {
   2902		err = -EOPNOTSUPP;
   2903		goto done;
   2904	}
   2905	if (enable)
   2906		esw->flags |= MLX5_ESWITCH_VPORT_MATCH_METADATA;
   2907	else
   2908		esw->flags &= ~MLX5_ESWITCH_VPORT_MATCH_METADATA;
   2909done:
   2910	up_write(&esw->mode_lock);
   2911	return err;
   2912}
   2913
   2914int
   2915esw_vport_create_offloads_acl_tables(struct mlx5_eswitch *esw,
   2916				     struct mlx5_vport *vport)
   2917{
   2918	int err;
   2919
   2920	err = esw_acl_ingress_ofld_setup(esw, vport);
   2921	if (err)
   2922		return err;
   2923
   2924	err = esw_acl_egress_ofld_setup(esw, vport);
   2925	if (err)
   2926		goto egress_err;
   2927
   2928	return 0;
   2929
   2930egress_err:
   2931	esw_acl_ingress_ofld_cleanup(esw, vport);
   2932	return err;
   2933}
   2934
   2935void
   2936esw_vport_destroy_offloads_acl_tables(struct mlx5_eswitch *esw,
   2937				      struct mlx5_vport *vport)
   2938{
   2939	esw_acl_egress_ofld_cleanup(vport);
   2940	esw_acl_ingress_ofld_cleanup(esw, vport);
   2941}
   2942
   2943static int esw_create_uplink_offloads_acl_tables(struct mlx5_eswitch *esw)
   2944{
   2945	struct mlx5_vport *vport;
   2946
   2947	vport = mlx5_eswitch_get_vport(esw, MLX5_VPORT_UPLINK);
   2948	if (IS_ERR(vport))
   2949		return PTR_ERR(vport);
   2950
   2951	return esw_vport_create_offloads_acl_tables(esw, vport);
   2952}
   2953
   2954static void esw_destroy_uplink_offloads_acl_tables(struct mlx5_eswitch *esw)
   2955{
   2956	struct mlx5_vport *vport;
   2957
   2958	vport = mlx5_eswitch_get_vport(esw, MLX5_VPORT_UPLINK);
   2959	if (IS_ERR(vport))
   2960		return;
   2961
   2962	esw_vport_destroy_offloads_acl_tables(esw, vport);
   2963}
   2964
   2965int mlx5_eswitch_reload_reps(struct mlx5_eswitch *esw)
   2966{
   2967	struct mlx5_eswitch_rep *rep;
   2968	unsigned long i;
   2969	int ret;
   2970
   2971	if (!esw || esw->mode != MLX5_ESWITCH_OFFLOADS)
   2972		return 0;
   2973
   2974	rep = mlx5_eswitch_get_rep(esw, MLX5_VPORT_UPLINK);
   2975	if (atomic_read(&rep->rep_data[REP_ETH].state) != REP_LOADED)
   2976		return 0;
   2977
   2978	ret = mlx5_esw_offloads_rep_load(esw, MLX5_VPORT_UPLINK);
   2979	if (ret)
   2980		return ret;
   2981
   2982	mlx5_esw_for_each_rep(esw, i, rep) {
   2983		if (atomic_read(&rep->rep_data[REP_ETH].state) == REP_LOADED)
   2984			mlx5_esw_offloads_rep_load(esw, rep->vport);
   2985	}
   2986
   2987	return 0;
   2988}
   2989
   2990static int esw_offloads_steering_init(struct mlx5_eswitch *esw)
   2991{
   2992	struct mlx5_esw_indir_table *indir;
   2993	int err;
   2994
   2995	memset(&esw->fdb_table.offloads, 0, sizeof(struct offloads_fdb));
   2996	mutex_init(&esw->fdb_table.offloads.vports.lock);
   2997	hash_init(esw->fdb_table.offloads.vports.table);
   2998	atomic64_set(&esw->user_count, 0);
   2999
   3000	indir = mlx5_esw_indir_table_init();
   3001	if (IS_ERR(indir)) {
   3002		err = PTR_ERR(indir);
   3003		goto create_indir_err;
   3004	}
   3005	esw->fdb_table.offloads.indir = indir;
   3006
   3007	err = esw_create_uplink_offloads_acl_tables(esw);
   3008	if (err)
   3009		goto create_acl_err;
   3010
   3011	err = esw_create_offloads_table(esw);
   3012	if (err)
   3013		goto create_offloads_err;
   3014
   3015	err = esw_create_restore_table(esw);
   3016	if (err)
   3017		goto create_restore_err;
   3018
   3019	err = esw_create_offloads_fdb_tables(esw);
   3020	if (err)
   3021		goto create_fdb_err;
   3022
   3023	err = esw_create_vport_rx_group(esw);
   3024	if (err)
   3025		goto create_fg_err;
   3026
   3027	return 0;
   3028
   3029create_fg_err:
   3030	esw_destroy_offloads_fdb_tables(esw);
   3031create_fdb_err:
   3032	esw_destroy_restore_table(esw);
   3033create_restore_err:
   3034	esw_destroy_offloads_table(esw);
   3035create_offloads_err:
   3036	esw_destroy_uplink_offloads_acl_tables(esw);
   3037create_acl_err:
   3038	mlx5_esw_indir_table_destroy(esw->fdb_table.offloads.indir);
   3039create_indir_err:
   3040	mutex_destroy(&esw->fdb_table.offloads.vports.lock);
   3041	return err;
   3042}
   3043
   3044static void esw_offloads_steering_cleanup(struct mlx5_eswitch *esw)
   3045{
   3046	esw_destroy_vport_rx_group(esw);
   3047	esw_destroy_offloads_fdb_tables(esw);
   3048	esw_destroy_restore_table(esw);
   3049	esw_destroy_offloads_table(esw);
   3050	esw_destroy_uplink_offloads_acl_tables(esw);
   3051	mlx5_esw_indir_table_destroy(esw->fdb_table.offloads.indir);
   3052	mutex_destroy(&esw->fdb_table.offloads.vports.lock);
   3053}
   3054
   3055static void
   3056esw_vfs_changed_event_handler(struct mlx5_eswitch *esw, const u32 *out)
   3057{
   3058	bool host_pf_disabled;
   3059	u16 new_num_vfs;
   3060
   3061	new_num_vfs = MLX5_GET(query_esw_functions_out, out,
   3062			       host_params_context.host_num_of_vfs);
   3063	host_pf_disabled = MLX5_GET(query_esw_functions_out, out,
   3064				    host_params_context.host_pf_disabled);
   3065
   3066	if (new_num_vfs == esw->esw_funcs.num_vfs || host_pf_disabled)
   3067		return;
   3068
   3069	/* Number of VFs can only change from "0 to x" or "x to 0". */
   3070	if (esw->esw_funcs.num_vfs > 0) {
   3071		mlx5_eswitch_unload_vf_vports(esw, esw->esw_funcs.num_vfs);
   3072	} else {
   3073		int err;
   3074
   3075		err = mlx5_eswitch_load_vf_vports(esw, new_num_vfs,
   3076						  MLX5_VPORT_UC_ADDR_CHANGE);
   3077		if (err)
   3078			return;
   3079	}
   3080	esw->esw_funcs.num_vfs = new_num_vfs;
   3081}
   3082
   3083static void esw_functions_changed_event_handler(struct work_struct *work)
   3084{
   3085	struct mlx5_host_work *host_work;
   3086	struct mlx5_eswitch *esw;
   3087	const u32 *out;
   3088
   3089	host_work = container_of(work, struct mlx5_host_work, work);
   3090	esw = host_work->esw;
   3091
   3092	out = mlx5_esw_query_functions(esw->dev);
   3093	if (IS_ERR(out))
   3094		goto out;
   3095
   3096	esw_vfs_changed_event_handler(esw, out);
   3097	kvfree(out);
   3098out:
   3099	kfree(host_work);
   3100}
   3101
   3102int mlx5_esw_funcs_changed_handler(struct notifier_block *nb, unsigned long type, void *data)
   3103{
   3104	struct mlx5_esw_functions *esw_funcs;
   3105	struct mlx5_host_work *host_work;
   3106	struct mlx5_eswitch *esw;
   3107
   3108	host_work = kzalloc(sizeof(*host_work), GFP_ATOMIC);
   3109	if (!host_work)
   3110		return NOTIFY_DONE;
   3111
   3112	esw_funcs = mlx5_nb_cof(nb, struct mlx5_esw_functions, nb);
   3113	esw = container_of(esw_funcs, struct mlx5_eswitch, esw_funcs);
   3114
   3115	host_work->esw = esw;
   3116
   3117	INIT_WORK(&host_work->work, esw_functions_changed_event_handler);
   3118	queue_work(esw->work_queue, &host_work->work);
   3119
   3120	return NOTIFY_OK;
   3121}
   3122
   3123static int mlx5_esw_host_number_init(struct mlx5_eswitch *esw)
   3124{
   3125	const u32 *query_host_out;
   3126
   3127	if (!mlx5_core_is_ecpf_esw_manager(esw->dev))
   3128		return 0;
   3129
   3130	query_host_out = mlx5_esw_query_functions(esw->dev);
   3131	if (IS_ERR(query_host_out))
   3132		return PTR_ERR(query_host_out);
   3133
   3134	/* Mark non local controller with non zero controller number. */
   3135	esw->offloads.host_number = MLX5_GET(query_esw_functions_out, query_host_out,
   3136					     host_params_context.host_number);
   3137	kvfree(query_host_out);
   3138	return 0;
   3139}
   3140
   3141bool mlx5_esw_offloads_controller_valid(const struct mlx5_eswitch *esw, u32 controller)
   3142{
   3143	/* Local controller is always valid */
   3144	if (controller == 0)
   3145		return true;
   3146
   3147	if (!mlx5_core_is_ecpf_esw_manager(esw->dev))
   3148		return false;
   3149
   3150	/* External host number starts with zero in device */
   3151	return (controller == esw->offloads.host_number + 1);
   3152}
   3153
   3154int esw_offloads_enable(struct mlx5_eswitch *esw)
   3155{
   3156	struct mapping_ctx *reg_c0_obj_pool;
   3157	struct mlx5_vport *vport;
   3158	unsigned long i;
   3159	u64 mapping_id;
   3160	int err;
   3161
   3162	mutex_init(&esw->offloads.termtbl_mutex);
   3163	mlx5_rdma_enable_roce(esw->dev);
   3164
   3165	err = mlx5_esw_host_number_init(esw);
   3166	if (err)
   3167		goto err_metadata;
   3168
   3169	err = esw_offloads_metadata_init(esw);
   3170	if (err)
   3171		goto err_metadata;
   3172
   3173	err = esw_set_passing_vport_metadata(esw, true);
   3174	if (err)
   3175		goto err_vport_metadata;
   3176
   3177	mapping_id = mlx5_query_nic_system_image_guid(esw->dev);
   3178
   3179	reg_c0_obj_pool = mapping_create_for_id(mapping_id, MAPPING_TYPE_CHAIN,
   3180						sizeof(struct mlx5_mapped_obj),
   3181						ESW_REG_C0_USER_DATA_METADATA_MASK,
   3182						true);
   3183
   3184	if (IS_ERR(reg_c0_obj_pool)) {
   3185		err = PTR_ERR(reg_c0_obj_pool);
   3186		goto err_pool;
   3187	}
   3188	esw->offloads.reg_c0_obj_pool = reg_c0_obj_pool;
   3189
   3190	err = esw_offloads_steering_init(esw);
   3191	if (err)
   3192		goto err_steering_init;
   3193
   3194	/* Representor will control the vport link state */
   3195	mlx5_esw_for_each_vf_vport(esw, i, vport, esw->esw_funcs.num_vfs)
   3196		vport->info.link_state = MLX5_VPORT_ADMIN_STATE_DOWN;
   3197
   3198	/* Uplink vport rep must load first. */
   3199	err = esw_offloads_load_rep(esw, MLX5_VPORT_UPLINK);
   3200	if (err)
   3201		goto err_uplink;
   3202
   3203	err = mlx5_eswitch_enable_pf_vf_vports(esw, MLX5_VPORT_UC_ADDR_CHANGE);
   3204	if (err)
   3205		goto err_vports;
   3206
   3207	esw_offloads_devcom_init(esw);
   3208
   3209	return 0;
   3210
   3211err_vports:
   3212	esw_offloads_unload_rep(esw, MLX5_VPORT_UPLINK);
   3213err_uplink:
   3214	esw_offloads_steering_cleanup(esw);
   3215err_steering_init:
   3216	mapping_destroy(reg_c0_obj_pool);
   3217err_pool:
   3218	esw_set_passing_vport_metadata(esw, false);
   3219err_vport_metadata:
   3220	esw_offloads_metadata_uninit(esw);
   3221err_metadata:
   3222	mlx5_rdma_disable_roce(esw->dev);
   3223	mutex_destroy(&esw->offloads.termtbl_mutex);
   3224	return err;
   3225}
   3226
   3227static int esw_offloads_stop(struct mlx5_eswitch *esw,
   3228			     struct netlink_ext_ack *extack)
   3229{
   3230	int err, err1;
   3231
   3232	mlx5_eswitch_disable_locked(esw, false);
   3233	err = mlx5_eswitch_enable_locked(esw, MLX5_ESWITCH_LEGACY,
   3234					 MLX5_ESWITCH_IGNORE_NUM_VFS);
   3235	if (err) {
   3236		NL_SET_ERR_MSG_MOD(extack, "Failed setting eswitch to legacy");
   3237		err1 = mlx5_eswitch_enable_locked(esw, MLX5_ESWITCH_OFFLOADS,
   3238						  MLX5_ESWITCH_IGNORE_NUM_VFS);
   3239		if (err1) {
   3240			NL_SET_ERR_MSG_MOD(extack,
   3241					   "Failed setting eswitch back to offloads");
   3242		}
   3243	}
   3244
   3245	return err;
   3246}
   3247
   3248void esw_offloads_disable(struct mlx5_eswitch *esw)
   3249{
   3250	esw_offloads_devcom_cleanup(esw);
   3251	mlx5_eswitch_disable_pf_vf_vports(esw);
   3252	esw_offloads_unload_rep(esw, MLX5_VPORT_UPLINK);
   3253	esw_set_passing_vport_metadata(esw, false);
   3254	esw_offloads_steering_cleanup(esw);
   3255	mapping_destroy(esw->offloads.reg_c0_obj_pool);
   3256	esw_offloads_metadata_uninit(esw);
   3257	mlx5_rdma_disable_roce(esw->dev);
   3258	mutex_destroy(&esw->offloads.termtbl_mutex);
   3259}
   3260
   3261static int esw_mode_from_devlink(u16 mode, u16 *mlx5_mode)
   3262{
   3263	switch (mode) {
   3264	case DEVLINK_ESWITCH_MODE_LEGACY:
   3265		*mlx5_mode = MLX5_ESWITCH_LEGACY;
   3266		break;
   3267	case DEVLINK_ESWITCH_MODE_SWITCHDEV:
   3268		*mlx5_mode = MLX5_ESWITCH_OFFLOADS;
   3269		break;
   3270	default:
   3271		return -EINVAL;
   3272	}
   3273
   3274	return 0;
   3275}
   3276
   3277static int esw_mode_to_devlink(u16 mlx5_mode, u16 *mode)
   3278{
   3279	switch (mlx5_mode) {
   3280	case MLX5_ESWITCH_LEGACY:
   3281		*mode = DEVLINK_ESWITCH_MODE_LEGACY;
   3282		break;
   3283	case MLX5_ESWITCH_OFFLOADS:
   3284		*mode = DEVLINK_ESWITCH_MODE_SWITCHDEV;
   3285		break;
   3286	default:
   3287		return -EINVAL;
   3288	}
   3289
   3290	return 0;
   3291}
   3292
   3293static int esw_inline_mode_from_devlink(u8 mode, u8 *mlx5_mode)
   3294{
   3295	switch (mode) {
   3296	case DEVLINK_ESWITCH_INLINE_MODE_NONE:
   3297		*mlx5_mode = MLX5_INLINE_MODE_NONE;
   3298		break;
   3299	case DEVLINK_ESWITCH_INLINE_MODE_LINK:
   3300		*mlx5_mode = MLX5_INLINE_MODE_L2;
   3301		break;
   3302	case DEVLINK_ESWITCH_INLINE_MODE_NETWORK:
   3303		*mlx5_mode = MLX5_INLINE_MODE_IP;
   3304		break;
   3305	case DEVLINK_ESWITCH_INLINE_MODE_TRANSPORT:
   3306		*mlx5_mode = MLX5_INLINE_MODE_TCP_UDP;
   3307		break;
   3308	default:
   3309		return -EINVAL;
   3310	}
   3311
   3312	return 0;
   3313}
   3314
   3315static int esw_inline_mode_to_devlink(u8 mlx5_mode, u8 *mode)
   3316{
   3317	switch (mlx5_mode) {
   3318	case MLX5_INLINE_MODE_NONE:
   3319		*mode = DEVLINK_ESWITCH_INLINE_MODE_NONE;
   3320		break;
   3321	case MLX5_INLINE_MODE_L2:
   3322		*mode = DEVLINK_ESWITCH_INLINE_MODE_LINK;
   3323		break;
   3324	case MLX5_INLINE_MODE_IP:
   3325		*mode = DEVLINK_ESWITCH_INLINE_MODE_NETWORK;
   3326		break;
   3327	case MLX5_INLINE_MODE_TCP_UDP:
   3328		*mode = DEVLINK_ESWITCH_INLINE_MODE_TRANSPORT;
   3329		break;
   3330	default:
   3331		return -EINVAL;
   3332	}
   3333
   3334	return 0;
   3335}
   3336
   3337static int eswitch_devlink_esw_mode_check(const struct mlx5_eswitch *esw)
   3338{
   3339	/* devlink commands in NONE eswitch mode are currently supported only
   3340	 * on ECPF.
   3341	 */
   3342	return (esw->mode == MLX5_ESWITCH_NONE &&
   3343		!mlx5_core_is_ecpf_esw_manager(esw->dev)) ? -EOPNOTSUPP : 0;
   3344}
   3345
   3346/* FIXME: devl_unlock() followed by devl_lock() inside driver callback
   3347 * is never correct and prone to races. It's a transitional workaround,
   3348 * never repeat this pattern.
   3349 *
   3350 * This code MUST be fixed before removing devlink_mutex as it is safe
   3351 * to do only because of that mutex.
   3352 */
   3353static void mlx5_eswtich_mode_callback_enter(struct devlink *devlink,
   3354					     struct mlx5_eswitch *esw)
   3355{
   3356	devl_unlock(devlink);
   3357	down_write(&esw->mode_lock);
   3358}
   3359
   3360static void mlx5_eswtich_mode_callback_exit(struct devlink *devlink,
   3361					    struct mlx5_eswitch *esw)
   3362{
   3363	up_write(&esw->mode_lock);
   3364	devl_lock(devlink);
   3365}
   3366
   3367int mlx5_devlink_eswitch_mode_set(struct devlink *devlink, u16 mode,
   3368				  struct netlink_ext_ack *extack)
   3369{
   3370	u16 cur_mlx5_mode, mlx5_mode = 0;
   3371	struct mlx5_eswitch *esw;
   3372	int err = 0;
   3373
   3374	esw = mlx5_devlink_eswitch_get(devlink);
   3375	if (IS_ERR(esw))
   3376		return PTR_ERR(esw);
   3377
   3378	if (esw_mode_from_devlink(mode, &mlx5_mode))
   3379		return -EINVAL;
   3380
   3381	/* FIXME: devl_unlock() followed by devl_lock() inside driver callback
   3382	 * is never correct and prone to races. It's a transitional workaround,
   3383	 * never repeat this pattern.
   3384	 *
   3385	 * This code MUST be fixed before removing devlink_mutex as it is safe
   3386	 * to do only because of that mutex.
   3387	 */
   3388	devl_unlock(devlink);
   3389
   3390	mlx5_lag_disable_change(esw->dev);
   3391	err = mlx5_esw_try_lock(esw);
   3392	if (err < 0) {
   3393		NL_SET_ERR_MSG_MOD(extack, "Can't change mode, E-Switch is busy");
   3394		goto enable_lag;
   3395	}
   3396	cur_mlx5_mode = err;
   3397	err = 0;
   3398
   3399	if (cur_mlx5_mode == mlx5_mode)
   3400		goto unlock;
   3401
   3402	if (mode == DEVLINK_ESWITCH_MODE_SWITCHDEV) {
   3403		if (mlx5_devlink_trap_get_num_active(esw->dev)) {
   3404			NL_SET_ERR_MSG_MOD(extack,
   3405					   "Can't change mode while devlink traps are active");
   3406			err = -EOPNOTSUPP;
   3407			goto unlock;
   3408		}
   3409		err = esw_offloads_start(esw, extack);
   3410	} else if (mode == DEVLINK_ESWITCH_MODE_LEGACY) {
   3411		err = esw_offloads_stop(esw, extack);
   3412	} else {
   3413		err = -EINVAL;
   3414	}
   3415
   3416unlock:
   3417	mlx5_esw_unlock(esw);
   3418enable_lag:
   3419	mlx5_lag_enable_change(esw->dev);
   3420	devl_lock(devlink);
   3421	return err;
   3422}
   3423
   3424int mlx5_devlink_eswitch_mode_get(struct devlink *devlink, u16 *mode)
   3425{
   3426	struct mlx5_eswitch *esw;
   3427	int err;
   3428
   3429	esw = mlx5_devlink_eswitch_get(devlink);
   3430	if (IS_ERR(esw))
   3431		return PTR_ERR(esw);
   3432
   3433	mlx5_eswtich_mode_callback_enter(devlink, esw);
   3434	err = eswitch_devlink_esw_mode_check(esw);
   3435	if (err)
   3436		goto unlock;
   3437
   3438	err = esw_mode_to_devlink(esw->mode, mode);
   3439unlock:
   3440	mlx5_eswtich_mode_callback_exit(devlink, esw);
   3441	return err;
   3442}
   3443
   3444static int mlx5_esw_vports_inline_set(struct mlx5_eswitch *esw, u8 mlx5_mode,
   3445				      struct netlink_ext_ack *extack)
   3446{
   3447	struct mlx5_core_dev *dev = esw->dev;
   3448	struct mlx5_vport *vport;
   3449	u16 err_vport_num = 0;
   3450	unsigned long i;
   3451	int err = 0;
   3452
   3453	mlx5_esw_for_each_host_func_vport(esw, i, vport, esw->esw_funcs.num_vfs) {
   3454		err = mlx5_modify_nic_vport_min_inline(dev, vport->vport, mlx5_mode);
   3455		if (err) {
   3456			err_vport_num = vport->vport;
   3457			NL_SET_ERR_MSG_MOD(extack,
   3458					   "Failed to set min inline on vport");
   3459			goto revert_inline_mode;
   3460		}
   3461	}
   3462	return 0;
   3463
   3464revert_inline_mode:
   3465	mlx5_esw_for_each_host_func_vport(esw, i, vport, esw->esw_funcs.num_vfs) {
   3466		if (vport->vport == err_vport_num)
   3467			break;
   3468		mlx5_modify_nic_vport_min_inline(dev,
   3469						 vport->vport,
   3470						 esw->offloads.inline_mode);
   3471	}
   3472	return err;
   3473}
   3474
   3475int mlx5_devlink_eswitch_inline_mode_set(struct devlink *devlink, u8 mode,
   3476					 struct netlink_ext_ack *extack)
   3477{
   3478	struct mlx5_core_dev *dev = devlink_priv(devlink);
   3479	struct mlx5_eswitch *esw;
   3480	u8 mlx5_mode;
   3481	int err;
   3482
   3483	esw = mlx5_devlink_eswitch_get(devlink);
   3484	if (IS_ERR(esw))
   3485		return PTR_ERR(esw);
   3486
   3487	mlx5_eswtich_mode_callback_enter(devlink, esw);
   3488	err = eswitch_devlink_esw_mode_check(esw);
   3489	if (err)
   3490		goto out;
   3491
   3492	switch (MLX5_CAP_ETH(dev, wqe_inline_mode)) {
   3493	case MLX5_CAP_INLINE_MODE_NOT_REQUIRED:
   3494		if (mode == DEVLINK_ESWITCH_INLINE_MODE_NONE) {
   3495			err = 0;
   3496			goto out;
   3497		}
   3498
   3499		fallthrough;
   3500	case MLX5_CAP_INLINE_MODE_L2:
   3501		NL_SET_ERR_MSG_MOD(extack, "Inline mode can't be set");
   3502		err = -EOPNOTSUPP;
   3503		goto out;
   3504	case MLX5_CAP_INLINE_MODE_VPORT_CONTEXT:
   3505		break;
   3506	}
   3507
   3508	if (atomic64_read(&esw->offloads.num_flows) > 0) {
   3509		NL_SET_ERR_MSG_MOD(extack,
   3510				   "Can't set inline mode when flows are configured");
   3511		err = -EOPNOTSUPP;
   3512		goto out;
   3513	}
   3514
   3515	err = esw_inline_mode_from_devlink(mode, &mlx5_mode);
   3516	if (err)
   3517		goto out;
   3518
   3519	err = mlx5_esw_vports_inline_set(esw, mlx5_mode, extack);
   3520	if (err)
   3521		goto out;
   3522
   3523	esw->offloads.inline_mode = mlx5_mode;
   3524	mlx5_eswtich_mode_callback_exit(devlink, esw);
   3525	return 0;
   3526
   3527out:
   3528	mlx5_eswtich_mode_callback_exit(devlink, esw);
   3529	return err;
   3530}
   3531
   3532int mlx5_devlink_eswitch_inline_mode_get(struct devlink *devlink, u8 *mode)
   3533{
   3534	struct mlx5_eswitch *esw;
   3535	int err;
   3536
   3537	esw = mlx5_devlink_eswitch_get(devlink);
   3538	if (IS_ERR(esw))
   3539		return PTR_ERR(esw);
   3540
   3541	mlx5_eswtich_mode_callback_enter(devlink, esw);
   3542	err = eswitch_devlink_esw_mode_check(esw);
   3543	if (err)
   3544		goto unlock;
   3545
   3546	err = esw_inline_mode_to_devlink(esw->offloads.inline_mode, mode);
   3547unlock:
   3548	mlx5_eswtich_mode_callback_exit(devlink, esw);
   3549	return err;
   3550}
   3551
   3552int mlx5_devlink_eswitch_encap_mode_set(struct devlink *devlink,
   3553					enum devlink_eswitch_encap_mode encap,
   3554					struct netlink_ext_ack *extack)
   3555{
   3556	struct mlx5_core_dev *dev = devlink_priv(devlink);
   3557	struct mlx5_eswitch *esw;
   3558	int err;
   3559
   3560	esw = mlx5_devlink_eswitch_get(devlink);
   3561	if (IS_ERR(esw))
   3562		return PTR_ERR(esw);
   3563
   3564	mlx5_eswtich_mode_callback_enter(devlink, esw);
   3565	err = eswitch_devlink_esw_mode_check(esw);
   3566	if (err)
   3567		goto unlock;
   3568
   3569	if (encap != DEVLINK_ESWITCH_ENCAP_MODE_NONE &&
   3570	    (!MLX5_CAP_ESW_FLOWTABLE_FDB(dev, reformat) ||
   3571	     !MLX5_CAP_ESW_FLOWTABLE_FDB(dev, decap))) {
   3572		err = -EOPNOTSUPP;
   3573		goto unlock;
   3574	}
   3575
   3576	if (encap && encap != DEVLINK_ESWITCH_ENCAP_MODE_BASIC) {
   3577		err = -EOPNOTSUPP;
   3578		goto unlock;
   3579	}
   3580
   3581	if (esw->mode == MLX5_ESWITCH_LEGACY) {
   3582		esw->offloads.encap = encap;
   3583		goto unlock;
   3584	}
   3585
   3586	if (esw->offloads.encap == encap)
   3587		goto unlock;
   3588
   3589	if (atomic64_read(&esw->offloads.num_flows) > 0) {
   3590		NL_SET_ERR_MSG_MOD(extack,
   3591				   "Can't set encapsulation when flows are configured");
   3592		err = -EOPNOTSUPP;
   3593		goto unlock;
   3594	}
   3595
   3596	esw_destroy_offloads_fdb_tables(esw);
   3597
   3598	esw->offloads.encap = encap;
   3599
   3600	err = esw_create_offloads_fdb_tables(esw);
   3601
   3602	if (err) {
   3603		NL_SET_ERR_MSG_MOD(extack,
   3604				   "Failed re-creating fast FDB table");
   3605		esw->offloads.encap = !encap;
   3606		(void)esw_create_offloads_fdb_tables(esw);
   3607	}
   3608
   3609unlock:
   3610	mlx5_eswtich_mode_callback_exit(devlink, esw);
   3611	return err;
   3612}
   3613
   3614int mlx5_devlink_eswitch_encap_mode_get(struct devlink *devlink,
   3615					enum devlink_eswitch_encap_mode *encap)
   3616{
   3617	struct mlx5_eswitch *esw;
   3618	int err;
   3619
   3620	esw = mlx5_devlink_eswitch_get(devlink);
   3621	if (IS_ERR(esw))
   3622		return PTR_ERR(esw);
   3623
   3624	mlx5_eswtich_mode_callback_enter(devlink, esw);
   3625	err = eswitch_devlink_esw_mode_check(esw);
   3626	if (err)
   3627		goto unlock;
   3628
   3629	*encap = esw->offloads.encap;
   3630unlock:
   3631	mlx5_eswtich_mode_callback_exit(devlink, esw);
   3632	return err;
   3633}
   3634
   3635static bool
   3636mlx5_eswitch_vport_has_rep(const struct mlx5_eswitch *esw, u16 vport_num)
   3637{
   3638	/* Currently, only ECPF based device has representor for host PF. */
   3639	if (vport_num == MLX5_VPORT_PF &&
   3640	    !mlx5_core_is_ecpf_esw_manager(esw->dev))
   3641		return false;
   3642
   3643	if (vport_num == MLX5_VPORT_ECPF &&
   3644	    !mlx5_ecpf_vport_exists(esw->dev))
   3645		return false;
   3646
   3647	return true;
   3648}
   3649
   3650void mlx5_eswitch_register_vport_reps(struct mlx5_eswitch *esw,
   3651				      const struct mlx5_eswitch_rep_ops *ops,
   3652				      u8 rep_type)
   3653{
   3654	struct mlx5_eswitch_rep_data *rep_data;
   3655	struct mlx5_eswitch_rep *rep;
   3656	unsigned long i;
   3657
   3658	esw->offloads.rep_ops[rep_type] = ops;
   3659	mlx5_esw_for_each_rep(esw, i, rep) {
   3660		if (likely(mlx5_eswitch_vport_has_rep(esw, rep->vport))) {
   3661			rep->esw = esw;
   3662			rep_data = &rep->rep_data[rep_type];
   3663			atomic_set(&rep_data->state, REP_REGISTERED);
   3664		}
   3665	}
   3666}
   3667EXPORT_SYMBOL(mlx5_eswitch_register_vport_reps);
   3668
   3669void mlx5_eswitch_unregister_vport_reps(struct mlx5_eswitch *esw, u8 rep_type)
   3670{
   3671	struct mlx5_eswitch_rep *rep;
   3672	unsigned long i;
   3673
   3674	if (esw->mode == MLX5_ESWITCH_OFFLOADS)
   3675		__unload_reps_all_vport(esw, rep_type);
   3676
   3677	mlx5_esw_for_each_rep(esw, i, rep)
   3678		atomic_set(&rep->rep_data[rep_type].state, REP_UNREGISTERED);
   3679}
   3680EXPORT_SYMBOL(mlx5_eswitch_unregister_vport_reps);
   3681
   3682void *mlx5_eswitch_get_uplink_priv(struct mlx5_eswitch *esw, u8 rep_type)
   3683{
   3684	struct mlx5_eswitch_rep *rep;
   3685
   3686	rep = mlx5_eswitch_get_rep(esw, MLX5_VPORT_UPLINK);
   3687	return rep->rep_data[rep_type].priv;
   3688}
   3689
   3690void *mlx5_eswitch_get_proto_dev(struct mlx5_eswitch *esw,
   3691				 u16 vport,
   3692				 u8 rep_type)
   3693{
   3694	struct mlx5_eswitch_rep *rep;
   3695
   3696	rep = mlx5_eswitch_get_rep(esw, vport);
   3697
   3698	if (atomic_read(&rep->rep_data[rep_type].state) == REP_LOADED &&
   3699	    esw->offloads.rep_ops[rep_type]->get_proto_dev)
   3700		return esw->offloads.rep_ops[rep_type]->get_proto_dev(rep);
   3701	return NULL;
   3702}
   3703EXPORT_SYMBOL(mlx5_eswitch_get_proto_dev);
   3704
   3705void *mlx5_eswitch_uplink_get_proto_dev(struct mlx5_eswitch *esw, u8 rep_type)
   3706{
   3707	return mlx5_eswitch_get_proto_dev(esw, MLX5_VPORT_UPLINK, rep_type);
   3708}
   3709EXPORT_SYMBOL(mlx5_eswitch_uplink_get_proto_dev);
   3710
   3711struct mlx5_eswitch_rep *mlx5_eswitch_vport_rep(struct mlx5_eswitch *esw,
   3712						u16 vport)
   3713{
   3714	return mlx5_eswitch_get_rep(esw, vport);
   3715}
   3716EXPORT_SYMBOL(mlx5_eswitch_vport_rep);
   3717
   3718bool mlx5_eswitch_reg_c1_loopback_enabled(const struct mlx5_eswitch *esw)
   3719{
   3720	return !!(esw->flags & MLX5_ESWITCH_REG_C1_LOOPBACK_ENABLED);
   3721}
   3722EXPORT_SYMBOL(mlx5_eswitch_reg_c1_loopback_enabled);
   3723
   3724bool mlx5_eswitch_vport_match_metadata_enabled(const struct mlx5_eswitch *esw)
   3725{
   3726	return !!(esw->flags & MLX5_ESWITCH_VPORT_MATCH_METADATA);
   3727}
   3728EXPORT_SYMBOL(mlx5_eswitch_vport_match_metadata_enabled);
   3729
   3730u32 mlx5_eswitch_get_vport_metadata_for_match(struct mlx5_eswitch *esw,
   3731					      u16 vport_num)
   3732{
   3733	struct mlx5_vport *vport = mlx5_eswitch_get_vport(esw, vport_num);
   3734
   3735	if (WARN_ON_ONCE(IS_ERR(vport)))
   3736		return 0;
   3737
   3738	return vport->metadata << (32 - ESW_SOURCE_PORT_METADATA_BITS);
   3739}
   3740EXPORT_SYMBOL(mlx5_eswitch_get_vport_metadata_for_match);
   3741
   3742int mlx5_esw_offloads_sf_vport_enable(struct mlx5_eswitch *esw, struct devlink_port *dl_port,
   3743				      u16 vport_num, u32 controller, u32 sfnum)
   3744{
   3745	int err;
   3746
   3747	err = mlx5_esw_vport_enable(esw, vport_num, MLX5_VPORT_UC_ADDR_CHANGE);
   3748	if (err)
   3749		return err;
   3750
   3751	err = mlx5_esw_devlink_sf_port_register(esw, dl_port, vport_num, controller, sfnum);
   3752	if (err)
   3753		goto devlink_err;
   3754
   3755	err = mlx5_esw_offloads_rep_load(esw, vport_num);
   3756	if (err)
   3757		goto rep_err;
   3758	return 0;
   3759
   3760rep_err:
   3761	mlx5_esw_devlink_sf_port_unregister(esw, vport_num);
   3762devlink_err:
   3763	mlx5_esw_vport_disable(esw, vport_num);
   3764	return err;
   3765}
   3766
   3767void mlx5_esw_offloads_sf_vport_disable(struct mlx5_eswitch *esw, u16 vport_num)
   3768{
   3769	mlx5_esw_offloads_rep_unload(esw, vport_num);
   3770	mlx5_esw_devlink_sf_port_unregister(esw, vport_num);
   3771	mlx5_esw_vport_disable(esw, vport_num);
   3772}
   3773
   3774static int mlx5_esw_query_vport_vhca_id(struct mlx5_eswitch *esw, u16 vport_num, u16 *vhca_id)
   3775{
   3776	int query_out_sz = MLX5_ST_SZ_BYTES(query_hca_cap_out);
   3777	void *query_ctx;
   3778	void *hca_caps;
   3779	int err;
   3780
   3781	*vhca_id = 0;
   3782	if (mlx5_esw_is_manager_vport(esw, vport_num) ||
   3783	    !MLX5_CAP_GEN(esw->dev, vhca_resource_manager))
   3784		return -EPERM;
   3785
   3786	query_ctx = kzalloc(query_out_sz, GFP_KERNEL);
   3787	if (!query_ctx)
   3788		return -ENOMEM;
   3789
   3790	err = mlx5_vport_get_other_func_cap(esw->dev, vport_num, query_ctx);
   3791	if (err)
   3792		goto out_free;
   3793
   3794	hca_caps = MLX5_ADDR_OF(query_hca_cap_out, query_ctx, capability);
   3795	*vhca_id = MLX5_GET(cmd_hca_cap, hca_caps, vhca_id);
   3796
   3797out_free:
   3798	kfree(query_ctx);
   3799	return err;
   3800}
   3801
   3802int mlx5_esw_vport_vhca_id_set(struct mlx5_eswitch *esw, u16 vport_num)
   3803{
   3804	u16 *old_entry, *vhca_map_entry, vhca_id;
   3805	int err;
   3806
   3807	err = mlx5_esw_query_vport_vhca_id(esw, vport_num, &vhca_id);
   3808	if (err) {
   3809		esw_warn(esw->dev, "Getting vhca_id for vport failed (vport=%u,err=%d)\n",
   3810			 vport_num, err);
   3811		return err;
   3812	}
   3813
   3814	vhca_map_entry = kmalloc(sizeof(*vhca_map_entry), GFP_KERNEL);
   3815	if (!vhca_map_entry)
   3816		return -ENOMEM;
   3817
   3818	*vhca_map_entry = vport_num;
   3819	old_entry = xa_store(&esw->offloads.vhca_map, vhca_id, vhca_map_entry, GFP_KERNEL);
   3820	if (xa_is_err(old_entry)) {
   3821		kfree(vhca_map_entry);
   3822		return xa_err(old_entry);
   3823	}
   3824	kfree(old_entry);
   3825	return 0;
   3826}
   3827
   3828void mlx5_esw_vport_vhca_id_clear(struct mlx5_eswitch *esw, u16 vport_num)
   3829{
   3830	u16 *vhca_map_entry, vhca_id;
   3831	int err;
   3832
   3833	err = mlx5_esw_query_vport_vhca_id(esw, vport_num, &vhca_id);
   3834	if (err)
   3835		esw_warn(esw->dev, "Getting vhca_id for vport failed (vport=%hu,err=%d)\n",
   3836			 vport_num, err);
   3837
   3838	vhca_map_entry = xa_erase(&esw->offloads.vhca_map, vhca_id);
   3839	kfree(vhca_map_entry);
   3840}
   3841
   3842int mlx5_eswitch_vhca_id_to_vport(struct mlx5_eswitch *esw, u16 vhca_id, u16 *vport_num)
   3843{
   3844	u16 *res = xa_load(&esw->offloads.vhca_map, vhca_id);
   3845
   3846	if (!res)
   3847		return -ENOENT;
   3848
   3849	*vport_num = *res;
   3850	return 0;
   3851}
   3852
   3853u32 mlx5_eswitch_get_vport_metadata_for_set(struct mlx5_eswitch *esw,
   3854					    u16 vport_num)
   3855{
   3856	struct mlx5_vport *vport = mlx5_eswitch_get_vport(esw, vport_num);
   3857
   3858	if (WARN_ON_ONCE(IS_ERR(vport)))
   3859		return 0;
   3860
   3861	return vport->metadata;
   3862}
   3863EXPORT_SYMBOL(mlx5_eswitch_get_vport_metadata_for_set);
   3864
   3865static bool
   3866is_port_function_supported(struct mlx5_eswitch *esw, u16 vport_num)
   3867{
   3868	return vport_num == MLX5_VPORT_PF ||
   3869	       mlx5_eswitch_is_vf_vport(esw, vport_num) ||
   3870	       mlx5_esw_is_sf_vport(esw, vport_num);
   3871}
   3872
   3873int mlx5_devlink_port_function_hw_addr_get(struct devlink_port *port,
   3874					   u8 *hw_addr, int *hw_addr_len,
   3875					   struct netlink_ext_ack *extack)
   3876{
   3877	struct mlx5_eswitch *esw;
   3878	struct mlx5_vport *vport;
   3879	u16 vport_num;
   3880
   3881	esw = mlx5_devlink_eswitch_get(port->devlink);
   3882	if (IS_ERR(esw))
   3883		return PTR_ERR(esw);
   3884
   3885	vport_num = mlx5_esw_devlink_port_index_to_vport_num(port->index);
   3886	if (!is_port_function_supported(esw, vport_num))
   3887		return -EOPNOTSUPP;
   3888
   3889	vport = mlx5_eswitch_get_vport(esw, vport_num);
   3890	if (IS_ERR(vport)) {
   3891		NL_SET_ERR_MSG_MOD(extack, "Invalid port");
   3892		return PTR_ERR(vport);
   3893	}
   3894
   3895	mutex_lock(&esw->state_lock);
   3896	ether_addr_copy(hw_addr, vport->info.mac);
   3897	*hw_addr_len = ETH_ALEN;
   3898	mutex_unlock(&esw->state_lock);
   3899	return 0;
   3900}
   3901
   3902int mlx5_devlink_port_function_hw_addr_set(struct devlink_port *port,
   3903					   const u8 *hw_addr, int hw_addr_len,
   3904					   struct netlink_ext_ack *extack)
   3905{
   3906	struct mlx5_eswitch *esw;
   3907	u16 vport_num;
   3908
   3909	esw = mlx5_devlink_eswitch_get(port->devlink);
   3910	if (IS_ERR(esw)) {
   3911		NL_SET_ERR_MSG_MOD(extack, "Eswitch doesn't support set hw_addr");
   3912		return PTR_ERR(esw);
   3913	}
   3914
   3915	vport_num = mlx5_esw_devlink_port_index_to_vport_num(port->index);
   3916	if (!is_port_function_supported(esw, vport_num)) {
   3917		NL_SET_ERR_MSG_MOD(extack, "Port doesn't support set hw_addr");
   3918		return -EINVAL;
   3919	}
   3920
   3921	return mlx5_eswitch_set_vport_mac(esw, vport_num, hw_addr);
   3922}