cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

fs.c (71104B)


      1// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
      2/*
      3 * Copyright (c) 2018, Mellanox Technologies inc.  All rights reserved.
      4 */
      5
      6#include <rdma/ib_user_verbs.h>
      7#include <rdma/ib_verbs.h>
      8#include <rdma/uverbs_types.h>
      9#include <rdma/uverbs_ioctl.h>
     10#include <rdma/uverbs_std_types.h>
     11#include <rdma/mlx5_user_ioctl_cmds.h>
     12#include <rdma/mlx5_user_ioctl_verbs.h>
     13#include <rdma/ib_hdrs.h>
     14#include <rdma/ib_umem.h>
     15#include <linux/mlx5/driver.h>
     16#include <linux/mlx5/fs.h>
     17#include <linux/mlx5/fs_helpers.h>
     18#include <linux/mlx5/eswitch.h>
     19#include <net/inet_ecn.h>
     20#include "mlx5_ib.h"
     21#include "counters.h"
     22#include "devx.h"
     23#include "fs.h"
     24
     25#define UVERBS_MODULE_NAME mlx5_ib
     26#include <rdma/uverbs_named_ioctl.h>
     27
     28enum {
     29	MATCH_CRITERIA_ENABLE_OUTER_BIT,
     30	MATCH_CRITERIA_ENABLE_MISC_BIT,
     31	MATCH_CRITERIA_ENABLE_INNER_BIT,
     32	MATCH_CRITERIA_ENABLE_MISC2_BIT
     33};
     34
     35#define HEADER_IS_ZERO(match_criteria, headers)			           \
     36	!(memchr_inv(MLX5_ADDR_OF(fte_match_param, match_criteria, headers), \
     37		    0, MLX5_FLD_SZ_BYTES(fte_match_param, headers)))       \
     38
     39static u8 get_match_criteria_enable(u32 *match_criteria)
     40{
     41	u8 match_criteria_enable;
     42
     43	match_criteria_enable =
     44		(!HEADER_IS_ZERO(match_criteria, outer_headers)) <<
     45		MATCH_CRITERIA_ENABLE_OUTER_BIT;
     46	match_criteria_enable |=
     47		(!HEADER_IS_ZERO(match_criteria, misc_parameters)) <<
     48		MATCH_CRITERIA_ENABLE_MISC_BIT;
     49	match_criteria_enable |=
     50		(!HEADER_IS_ZERO(match_criteria, inner_headers)) <<
     51		MATCH_CRITERIA_ENABLE_INNER_BIT;
     52	match_criteria_enable |=
     53		(!HEADER_IS_ZERO(match_criteria, misc_parameters_2)) <<
     54		MATCH_CRITERIA_ENABLE_MISC2_BIT;
     55
     56	return match_criteria_enable;
     57}
     58
     59static int set_proto(void *outer_c, void *outer_v, u8 mask, u8 val)
     60{
     61	u8 entry_mask;
     62	u8 entry_val;
     63	int err = 0;
     64
     65	if (!mask)
     66		goto out;
     67
     68	entry_mask = MLX5_GET(fte_match_set_lyr_2_4, outer_c,
     69			      ip_protocol);
     70	entry_val = MLX5_GET(fte_match_set_lyr_2_4, outer_v,
     71			     ip_protocol);
     72	if (!entry_mask) {
     73		MLX5_SET(fte_match_set_lyr_2_4, outer_c, ip_protocol, mask);
     74		MLX5_SET(fte_match_set_lyr_2_4, outer_v, ip_protocol, val);
     75		goto out;
     76	}
     77	/* Don't override existing ip protocol */
     78	if (mask != entry_mask || val != entry_val)
     79		err = -EINVAL;
     80out:
     81	return err;
     82}
     83
     84static void set_flow_label(void *misc_c, void *misc_v, u32 mask, u32 val,
     85			   bool inner)
     86{
     87	if (inner) {
     88		MLX5_SET(fte_match_set_misc,
     89			 misc_c, inner_ipv6_flow_label, mask);
     90		MLX5_SET(fte_match_set_misc,
     91			 misc_v, inner_ipv6_flow_label, val);
     92	} else {
     93		MLX5_SET(fte_match_set_misc,
     94			 misc_c, outer_ipv6_flow_label, mask);
     95		MLX5_SET(fte_match_set_misc,
     96			 misc_v, outer_ipv6_flow_label, val);
     97	}
     98}
     99
    100static void set_tos(void *outer_c, void *outer_v, u8 mask, u8 val)
    101{
    102	MLX5_SET(fte_match_set_lyr_2_4, outer_c, ip_ecn, mask);
    103	MLX5_SET(fte_match_set_lyr_2_4, outer_v, ip_ecn, val);
    104	MLX5_SET(fte_match_set_lyr_2_4, outer_c, ip_dscp, mask >> 2);
    105	MLX5_SET(fte_match_set_lyr_2_4, outer_v, ip_dscp, val >> 2);
    106}
    107
    108static int check_mpls_supp_fields(u32 field_support, const __be32 *set_mask)
    109{
    110	if (MLX5_GET(fte_match_mpls, set_mask, mpls_label) &&
    111	    !(field_support & MLX5_FIELD_SUPPORT_MPLS_LABEL))
    112		return -EOPNOTSUPP;
    113
    114	if (MLX5_GET(fte_match_mpls, set_mask, mpls_exp) &&
    115	    !(field_support & MLX5_FIELD_SUPPORT_MPLS_EXP))
    116		return -EOPNOTSUPP;
    117
    118	if (MLX5_GET(fte_match_mpls, set_mask, mpls_s_bos) &&
    119	    !(field_support & MLX5_FIELD_SUPPORT_MPLS_S_BOS))
    120		return -EOPNOTSUPP;
    121
    122	if (MLX5_GET(fte_match_mpls, set_mask, mpls_ttl) &&
    123	    !(field_support & MLX5_FIELD_SUPPORT_MPLS_TTL))
    124		return -EOPNOTSUPP;
    125
    126	return 0;
    127}
    128
    129#define LAST_ETH_FIELD vlan_tag
    130#define LAST_IB_FIELD sl
    131#define LAST_IPV4_FIELD tos
    132#define LAST_IPV6_FIELD traffic_class
    133#define LAST_TCP_UDP_FIELD src_port
    134#define LAST_TUNNEL_FIELD tunnel_id
    135#define LAST_FLOW_TAG_FIELD tag_id
    136#define LAST_DROP_FIELD size
    137#define LAST_COUNTERS_FIELD counters
    138
    139/* Field is the last supported field */
    140#define FIELDS_NOT_SUPPORTED(filter, field)                                    \
    141	memchr_inv((void *)&filter.field + sizeof(filter.field), 0,            \
    142		   sizeof(filter) - offsetofend(typeof(filter), field))
    143
    144int parse_flow_flow_action(struct mlx5_ib_flow_action *maction,
    145			   bool is_egress,
    146			   struct mlx5_flow_act *action)
    147{
    148
    149	switch (maction->ib_action.type) {
    150	case IB_FLOW_ACTION_UNSPECIFIED:
    151		if (maction->flow_action_raw.sub_type ==
    152		    MLX5_IB_FLOW_ACTION_MODIFY_HEADER) {
    153			if (action->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR)
    154				return -EINVAL;
    155			action->action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
    156			action->modify_hdr =
    157				maction->flow_action_raw.modify_hdr;
    158			return 0;
    159		}
    160		if (maction->flow_action_raw.sub_type ==
    161		    MLX5_IB_FLOW_ACTION_DECAP) {
    162			if (action->action & MLX5_FLOW_CONTEXT_ACTION_DECAP)
    163				return -EINVAL;
    164			action->action |= MLX5_FLOW_CONTEXT_ACTION_DECAP;
    165			return 0;
    166		}
    167		if (maction->flow_action_raw.sub_type ==
    168		    MLX5_IB_FLOW_ACTION_PACKET_REFORMAT) {
    169			if (action->action &
    170			    MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT)
    171				return -EINVAL;
    172			action->action |=
    173				MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT;
    174			action->pkt_reformat =
    175				maction->flow_action_raw.pkt_reformat;
    176			return 0;
    177		}
    178		fallthrough;
    179	default:
    180		return -EOPNOTSUPP;
    181	}
    182}
    183
    184static int parse_flow_attr(struct mlx5_core_dev *mdev,
    185			   struct mlx5_flow_spec *spec,
    186			   const union ib_flow_spec *ib_spec,
    187			   const struct ib_flow_attr *flow_attr,
    188			   struct mlx5_flow_act *action, u32 prev_type)
    189{
    190	struct mlx5_flow_context *flow_context = &spec->flow_context;
    191	u32 *match_c = spec->match_criteria;
    192	u32 *match_v = spec->match_value;
    193	void *misc_params_c = MLX5_ADDR_OF(fte_match_param, match_c,
    194					   misc_parameters);
    195	void *misc_params_v = MLX5_ADDR_OF(fte_match_param, match_v,
    196					   misc_parameters);
    197	void *misc_params2_c = MLX5_ADDR_OF(fte_match_param, match_c,
    198					    misc_parameters_2);
    199	void *misc_params2_v = MLX5_ADDR_OF(fte_match_param, match_v,
    200					    misc_parameters_2);
    201	void *headers_c;
    202	void *headers_v;
    203	int match_ipv;
    204	int ret;
    205
    206	if (ib_spec->type & IB_FLOW_SPEC_INNER) {
    207		headers_c = MLX5_ADDR_OF(fte_match_param, match_c,
    208					 inner_headers);
    209		headers_v = MLX5_ADDR_OF(fte_match_param, match_v,
    210					 inner_headers);
    211		match_ipv = MLX5_CAP_FLOWTABLE_NIC_RX(mdev,
    212					ft_field_support.inner_ip_version);
    213	} else {
    214		headers_c = MLX5_ADDR_OF(fte_match_param, match_c,
    215					 outer_headers);
    216		headers_v = MLX5_ADDR_OF(fte_match_param, match_v,
    217					 outer_headers);
    218		match_ipv = MLX5_CAP_FLOWTABLE_NIC_RX(mdev,
    219					ft_field_support.outer_ip_version);
    220	}
    221
    222	switch (ib_spec->type & ~IB_FLOW_SPEC_INNER) {
    223	case IB_FLOW_SPEC_ETH:
    224		if (FIELDS_NOT_SUPPORTED(ib_spec->eth.mask, LAST_ETH_FIELD))
    225			return -EOPNOTSUPP;
    226
    227		ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
    228					     dmac_47_16),
    229				ib_spec->eth.mask.dst_mac);
    230		ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
    231					     dmac_47_16),
    232				ib_spec->eth.val.dst_mac);
    233
    234		ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
    235					     smac_47_16),
    236				ib_spec->eth.mask.src_mac);
    237		ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
    238					     smac_47_16),
    239				ib_spec->eth.val.src_mac);
    240
    241		if (ib_spec->eth.mask.vlan_tag) {
    242			MLX5_SET(fte_match_set_lyr_2_4, headers_c,
    243				 cvlan_tag, 1);
    244			MLX5_SET(fte_match_set_lyr_2_4, headers_v,
    245				 cvlan_tag, 1);
    246
    247			MLX5_SET(fte_match_set_lyr_2_4, headers_c,
    248				 first_vid, ntohs(ib_spec->eth.mask.vlan_tag));
    249			MLX5_SET(fte_match_set_lyr_2_4, headers_v,
    250				 first_vid, ntohs(ib_spec->eth.val.vlan_tag));
    251
    252			MLX5_SET(fte_match_set_lyr_2_4, headers_c,
    253				 first_cfi,
    254				 ntohs(ib_spec->eth.mask.vlan_tag) >> 12);
    255			MLX5_SET(fte_match_set_lyr_2_4, headers_v,
    256				 first_cfi,
    257				 ntohs(ib_spec->eth.val.vlan_tag) >> 12);
    258
    259			MLX5_SET(fte_match_set_lyr_2_4, headers_c,
    260				 first_prio,
    261				 ntohs(ib_spec->eth.mask.vlan_tag) >> 13);
    262			MLX5_SET(fte_match_set_lyr_2_4, headers_v,
    263				 first_prio,
    264				 ntohs(ib_spec->eth.val.vlan_tag) >> 13);
    265		}
    266		MLX5_SET(fte_match_set_lyr_2_4, headers_c,
    267			 ethertype, ntohs(ib_spec->eth.mask.ether_type));
    268		MLX5_SET(fte_match_set_lyr_2_4, headers_v,
    269			 ethertype, ntohs(ib_spec->eth.val.ether_type));
    270		break;
    271	case IB_FLOW_SPEC_IPV4:
    272		if (FIELDS_NOT_SUPPORTED(ib_spec->ipv4.mask, LAST_IPV4_FIELD))
    273			return -EOPNOTSUPP;
    274
    275		if (match_ipv) {
    276			MLX5_SET(fte_match_set_lyr_2_4, headers_c,
    277				 ip_version, 0xf);
    278			MLX5_SET(fte_match_set_lyr_2_4, headers_v,
    279				 ip_version, MLX5_FS_IPV4_VERSION);
    280		} else {
    281			MLX5_SET(fte_match_set_lyr_2_4, headers_c,
    282				 ethertype, 0xffff);
    283			MLX5_SET(fte_match_set_lyr_2_4, headers_v,
    284				 ethertype, ETH_P_IP);
    285		}
    286
    287		memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
    288				    src_ipv4_src_ipv6.ipv4_layout.ipv4),
    289		       &ib_spec->ipv4.mask.src_ip,
    290		       sizeof(ib_spec->ipv4.mask.src_ip));
    291		memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
    292				    src_ipv4_src_ipv6.ipv4_layout.ipv4),
    293		       &ib_spec->ipv4.val.src_ip,
    294		       sizeof(ib_spec->ipv4.val.src_ip));
    295		memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
    296				    dst_ipv4_dst_ipv6.ipv4_layout.ipv4),
    297		       &ib_spec->ipv4.mask.dst_ip,
    298		       sizeof(ib_spec->ipv4.mask.dst_ip));
    299		memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
    300				    dst_ipv4_dst_ipv6.ipv4_layout.ipv4),
    301		       &ib_spec->ipv4.val.dst_ip,
    302		       sizeof(ib_spec->ipv4.val.dst_ip));
    303
    304		set_tos(headers_c, headers_v,
    305			ib_spec->ipv4.mask.tos, ib_spec->ipv4.val.tos);
    306
    307		if (set_proto(headers_c, headers_v,
    308			      ib_spec->ipv4.mask.proto,
    309			      ib_spec->ipv4.val.proto))
    310			return -EINVAL;
    311		break;
    312	case IB_FLOW_SPEC_IPV6:
    313		if (FIELDS_NOT_SUPPORTED(ib_spec->ipv6.mask, LAST_IPV6_FIELD))
    314			return -EOPNOTSUPP;
    315
    316		if (match_ipv) {
    317			MLX5_SET(fte_match_set_lyr_2_4, headers_c,
    318				 ip_version, 0xf);
    319			MLX5_SET(fte_match_set_lyr_2_4, headers_v,
    320				 ip_version, MLX5_FS_IPV6_VERSION);
    321		} else {
    322			MLX5_SET(fte_match_set_lyr_2_4, headers_c,
    323				 ethertype, 0xffff);
    324			MLX5_SET(fte_match_set_lyr_2_4, headers_v,
    325				 ethertype, ETH_P_IPV6);
    326		}
    327
    328		memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
    329				    src_ipv4_src_ipv6.ipv6_layout.ipv6),
    330		       &ib_spec->ipv6.mask.src_ip,
    331		       sizeof(ib_spec->ipv6.mask.src_ip));
    332		memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
    333				    src_ipv4_src_ipv6.ipv6_layout.ipv6),
    334		       &ib_spec->ipv6.val.src_ip,
    335		       sizeof(ib_spec->ipv6.val.src_ip));
    336		memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
    337				    dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
    338		       &ib_spec->ipv6.mask.dst_ip,
    339		       sizeof(ib_spec->ipv6.mask.dst_ip));
    340		memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
    341				    dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
    342		       &ib_spec->ipv6.val.dst_ip,
    343		       sizeof(ib_spec->ipv6.val.dst_ip));
    344
    345		set_tos(headers_c, headers_v,
    346			ib_spec->ipv6.mask.traffic_class,
    347			ib_spec->ipv6.val.traffic_class);
    348
    349		if (set_proto(headers_c, headers_v,
    350			      ib_spec->ipv6.mask.next_hdr,
    351			      ib_spec->ipv6.val.next_hdr))
    352			return -EINVAL;
    353
    354		set_flow_label(misc_params_c, misc_params_v,
    355			       ntohl(ib_spec->ipv6.mask.flow_label),
    356			       ntohl(ib_spec->ipv6.val.flow_label),
    357			       ib_spec->type & IB_FLOW_SPEC_INNER);
    358		break;
    359	case IB_FLOW_SPEC_ESP:
    360		return -EOPNOTSUPP;
    361	case IB_FLOW_SPEC_TCP:
    362		if (FIELDS_NOT_SUPPORTED(ib_spec->tcp_udp.mask,
    363					 LAST_TCP_UDP_FIELD))
    364			return -EOPNOTSUPP;
    365
    366		if (set_proto(headers_c, headers_v, 0xff, IPPROTO_TCP))
    367			return -EINVAL;
    368
    369		MLX5_SET(fte_match_set_lyr_2_4, headers_c, tcp_sport,
    370			 ntohs(ib_spec->tcp_udp.mask.src_port));
    371		MLX5_SET(fte_match_set_lyr_2_4, headers_v, tcp_sport,
    372			 ntohs(ib_spec->tcp_udp.val.src_port));
    373
    374		MLX5_SET(fte_match_set_lyr_2_4, headers_c, tcp_dport,
    375			 ntohs(ib_spec->tcp_udp.mask.dst_port));
    376		MLX5_SET(fte_match_set_lyr_2_4, headers_v, tcp_dport,
    377			 ntohs(ib_spec->tcp_udp.val.dst_port));
    378		break;
    379	case IB_FLOW_SPEC_UDP:
    380		if (FIELDS_NOT_SUPPORTED(ib_spec->tcp_udp.mask,
    381					 LAST_TCP_UDP_FIELD))
    382			return -EOPNOTSUPP;
    383
    384		if (set_proto(headers_c, headers_v, 0xff, IPPROTO_UDP))
    385			return -EINVAL;
    386
    387		MLX5_SET(fte_match_set_lyr_2_4, headers_c, udp_sport,
    388			 ntohs(ib_spec->tcp_udp.mask.src_port));
    389		MLX5_SET(fte_match_set_lyr_2_4, headers_v, udp_sport,
    390			 ntohs(ib_spec->tcp_udp.val.src_port));
    391
    392		MLX5_SET(fte_match_set_lyr_2_4, headers_c, udp_dport,
    393			 ntohs(ib_spec->tcp_udp.mask.dst_port));
    394		MLX5_SET(fte_match_set_lyr_2_4, headers_v, udp_dport,
    395			 ntohs(ib_spec->tcp_udp.val.dst_port));
    396		break;
    397	case IB_FLOW_SPEC_GRE:
    398		if (ib_spec->gre.mask.c_ks_res0_ver)
    399			return -EOPNOTSUPP;
    400
    401		if (set_proto(headers_c, headers_v, 0xff, IPPROTO_GRE))
    402			return -EINVAL;
    403
    404		MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_protocol,
    405			 0xff);
    406		MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_protocol,
    407			 IPPROTO_GRE);
    408
    409		MLX5_SET(fte_match_set_misc, misc_params_c, gre_protocol,
    410			 ntohs(ib_spec->gre.mask.protocol));
    411		MLX5_SET(fte_match_set_misc, misc_params_v, gre_protocol,
    412			 ntohs(ib_spec->gre.val.protocol));
    413
    414		memcpy(MLX5_ADDR_OF(fte_match_set_misc, misc_params_c,
    415				    gre_key.nvgre.hi),
    416		       &ib_spec->gre.mask.key,
    417		       sizeof(ib_spec->gre.mask.key));
    418		memcpy(MLX5_ADDR_OF(fte_match_set_misc, misc_params_v,
    419				    gre_key.nvgre.hi),
    420		       &ib_spec->gre.val.key,
    421		       sizeof(ib_spec->gre.val.key));
    422		break;
    423	case IB_FLOW_SPEC_MPLS:
    424		switch (prev_type) {
    425		case IB_FLOW_SPEC_UDP:
    426			if (check_mpls_supp_fields(MLX5_CAP_FLOWTABLE_NIC_RX(mdev,
    427						   ft_field_support.outer_first_mpls_over_udp),
    428						   &ib_spec->mpls.mask.tag))
    429				return -EOPNOTSUPP;
    430
    431			memcpy(MLX5_ADDR_OF(fte_match_set_misc2, misc_params2_v,
    432					    outer_first_mpls_over_udp),
    433			       &ib_spec->mpls.val.tag,
    434			       sizeof(ib_spec->mpls.val.tag));
    435			memcpy(MLX5_ADDR_OF(fte_match_set_misc2, misc_params2_c,
    436					    outer_first_mpls_over_udp),
    437			       &ib_spec->mpls.mask.tag,
    438			       sizeof(ib_spec->mpls.mask.tag));
    439			break;
    440		case IB_FLOW_SPEC_GRE:
    441			if (check_mpls_supp_fields(MLX5_CAP_FLOWTABLE_NIC_RX(mdev,
    442						   ft_field_support.outer_first_mpls_over_gre),
    443						   &ib_spec->mpls.mask.tag))
    444				return -EOPNOTSUPP;
    445
    446			memcpy(MLX5_ADDR_OF(fte_match_set_misc2, misc_params2_v,
    447					    outer_first_mpls_over_gre),
    448			       &ib_spec->mpls.val.tag,
    449			       sizeof(ib_spec->mpls.val.tag));
    450			memcpy(MLX5_ADDR_OF(fte_match_set_misc2, misc_params2_c,
    451					    outer_first_mpls_over_gre),
    452			       &ib_spec->mpls.mask.tag,
    453			       sizeof(ib_spec->mpls.mask.tag));
    454			break;
    455		default:
    456			if (ib_spec->type & IB_FLOW_SPEC_INNER) {
    457				if (check_mpls_supp_fields(MLX5_CAP_FLOWTABLE_NIC_RX(mdev,
    458							   ft_field_support.inner_first_mpls),
    459							   &ib_spec->mpls.mask.tag))
    460					return -EOPNOTSUPP;
    461
    462				memcpy(MLX5_ADDR_OF(fte_match_set_misc2, misc_params2_v,
    463						    inner_first_mpls),
    464				       &ib_spec->mpls.val.tag,
    465				       sizeof(ib_spec->mpls.val.tag));
    466				memcpy(MLX5_ADDR_OF(fte_match_set_misc2, misc_params2_c,
    467						    inner_first_mpls),
    468				       &ib_spec->mpls.mask.tag,
    469				       sizeof(ib_spec->mpls.mask.tag));
    470			} else {
    471				if (check_mpls_supp_fields(MLX5_CAP_FLOWTABLE_NIC_RX(mdev,
    472							   ft_field_support.outer_first_mpls),
    473							   &ib_spec->mpls.mask.tag))
    474					return -EOPNOTSUPP;
    475
    476				memcpy(MLX5_ADDR_OF(fte_match_set_misc2, misc_params2_v,
    477						    outer_first_mpls),
    478				       &ib_spec->mpls.val.tag,
    479				       sizeof(ib_spec->mpls.val.tag));
    480				memcpy(MLX5_ADDR_OF(fte_match_set_misc2, misc_params2_c,
    481						    outer_first_mpls),
    482				       &ib_spec->mpls.mask.tag,
    483				       sizeof(ib_spec->mpls.mask.tag));
    484			}
    485		}
    486		break;
    487	case IB_FLOW_SPEC_VXLAN_TUNNEL:
    488		if (FIELDS_NOT_SUPPORTED(ib_spec->tunnel.mask,
    489					 LAST_TUNNEL_FIELD))
    490			return -EOPNOTSUPP;
    491
    492		MLX5_SET(fte_match_set_misc, misc_params_c, vxlan_vni,
    493			 ntohl(ib_spec->tunnel.mask.tunnel_id));
    494		MLX5_SET(fte_match_set_misc, misc_params_v, vxlan_vni,
    495			 ntohl(ib_spec->tunnel.val.tunnel_id));
    496		break;
    497	case IB_FLOW_SPEC_ACTION_TAG:
    498		if (FIELDS_NOT_SUPPORTED(ib_spec->flow_tag,
    499					 LAST_FLOW_TAG_FIELD))
    500			return -EOPNOTSUPP;
    501		if (ib_spec->flow_tag.tag_id >= BIT(24))
    502			return -EINVAL;
    503
    504		flow_context->flow_tag = ib_spec->flow_tag.tag_id;
    505		flow_context->flags |= FLOW_CONTEXT_HAS_TAG;
    506		break;
    507	case IB_FLOW_SPEC_ACTION_DROP:
    508		if (FIELDS_NOT_SUPPORTED(ib_spec->drop,
    509					 LAST_DROP_FIELD))
    510			return -EOPNOTSUPP;
    511		action->action |= MLX5_FLOW_CONTEXT_ACTION_DROP;
    512		break;
    513	case IB_FLOW_SPEC_ACTION_HANDLE:
    514		ret = parse_flow_flow_action(to_mflow_act(ib_spec->action.act),
    515			flow_attr->flags & IB_FLOW_ATTR_FLAGS_EGRESS, action);
    516		if (ret)
    517			return ret;
    518		break;
    519	case IB_FLOW_SPEC_ACTION_COUNT:
    520		if (FIELDS_NOT_SUPPORTED(ib_spec->flow_count,
    521					 LAST_COUNTERS_FIELD))
    522			return -EOPNOTSUPP;
    523
    524		/* for now support only one counters spec per flow */
    525		if (action->action & MLX5_FLOW_CONTEXT_ACTION_COUNT)
    526			return -EINVAL;
    527
    528		action->counters = ib_spec->flow_count.counters;
    529		action->action |= MLX5_FLOW_CONTEXT_ACTION_COUNT;
    530		break;
    531	default:
    532		return -EINVAL;
    533	}
    534
    535	return 0;
    536}
    537
    538/* If a flow could catch both multicast and unicast packets,
    539 * it won't fall into the multicast flow steering table and this rule
    540 * could steal other multicast packets.
    541 */
    542static bool flow_is_multicast_only(const struct ib_flow_attr *ib_attr)
    543{
    544	union ib_flow_spec *flow_spec;
    545
    546	if (ib_attr->type != IB_FLOW_ATTR_NORMAL ||
    547	    ib_attr->num_of_specs < 1)
    548		return false;
    549
    550	flow_spec = (union ib_flow_spec *)(ib_attr + 1);
    551	if (flow_spec->type == IB_FLOW_SPEC_IPV4) {
    552		struct ib_flow_spec_ipv4 *ipv4_spec;
    553
    554		ipv4_spec = (struct ib_flow_spec_ipv4 *)flow_spec;
    555		if (ipv4_is_multicast(ipv4_spec->val.dst_ip))
    556			return true;
    557
    558		return false;
    559	}
    560
    561	if (flow_spec->type == IB_FLOW_SPEC_ETH) {
    562		struct ib_flow_spec_eth *eth_spec;
    563
    564		eth_spec = (struct ib_flow_spec_eth *)flow_spec;
    565		return is_multicast_ether_addr(eth_spec->mask.dst_mac) &&
    566		       is_multicast_ether_addr(eth_spec->val.dst_mac);
    567	}
    568
    569	return false;
    570}
    571
    572static bool is_valid_ethertype(struct mlx5_core_dev *mdev,
    573			       const struct ib_flow_attr *flow_attr,
    574			       bool check_inner)
    575{
    576	union ib_flow_spec *ib_spec = (union ib_flow_spec *)(flow_attr + 1);
    577	int match_ipv = check_inner ?
    578			MLX5_CAP_FLOWTABLE_NIC_RX(mdev,
    579					ft_field_support.inner_ip_version) :
    580			MLX5_CAP_FLOWTABLE_NIC_RX(mdev,
    581					ft_field_support.outer_ip_version);
    582	int inner_bit = check_inner ? IB_FLOW_SPEC_INNER : 0;
    583	bool ipv4_spec_valid, ipv6_spec_valid;
    584	unsigned int ip_spec_type = 0;
    585	bool has_ethertype = false;
    586	unsigned int spec_index;
    587	bool mask_valid = true;
    588	u16 eth_type = 0;
    589	bool type_valid;
    590
    591	/* Validate that ethertype is correct */
    592	for (spec_index = 0; spec_index < flow_attr->num_of_specs; spec_index++) {
    593		if ((ib_spec->type == (IB_FLOW_SPEC_ETH | inner_bit)) &&
    594		    ib_spec->eth.mask.ether_type) {
    595			mask_valid = (ib_spec->eth.mask.ether_type ==
    596				      htons(0xffff));
    597			has_ethertype = true;
    598			eth_type = ntohs(ib_spec->eth.val.ether_type);
    599		} else if ((ib_spec->type == (IB_FLOW_SPEC_IPV4 | inner_bit)) ||
    600			   (ib_spec->type == (IB_FLOW_SPEC_IPV6 | inner_bit))) {
    601			ip_spec_type = ib_spec->type;
    602		}
    603		ib_spec = (void *)ib_spec + ib_spec->size;
    604	}
    605
    606	type_valid = (!has_ethertype) || (!ip_spec_type);
    607	if (!type_valid && mask_valid) {
    608		ipv4_spec_valid = (eth_type == ETH_P_IP) &&
    609			(ip_spec_type == (IB_FLOW_SPEC_IPV4 | inner_bit));
    610		ipv6_spec_valid = (eth_type == ETH_P_IPV6) &&
    611			(ip_spec_type == (IB_FLOW_SPEC_IPV6 | inner_bit));
    612
    613		type_valid = (ipv4_spec_valid) || (ipv6_spec_valid) ||
    614			     (((eth_type == ETH_P_MPLS_UC) ||
    615			       (eth_type == ETH_P_MPLS_MC)) && match_ipv);
    616	}
    617
    618	return type_valid;
    619}
    620
    621static bool is_valid_attr(struct mlx5_core_dev *mdev,
    622			  const struct ib_flow_attr *flow_attr)
    623{
    624	return is_valid_ethertype(mdev, flow_attr, false) &&
    625	       is_valid_ethertype(mdev, flow_attr, true);
    626}
    627
    628static void put_flow_table(struct mlx5_ib_dev *dev,
    629			   struct mlx5_ib_flow_prio *prio, bool ft_added)
    630{
    631	prio->refcount -= !!ft_added;
    632	if (!prio->refcount) {
    633		mlx5_destroy_flow_table(prio->flow_table);
    634		prio->flow_table = NULL;
    635	}
    636}
    637
    638static int mlx5_ib_destroy_flow(struct ib_flow *flow_id)
    639{
    640	struct mlx5_ib_flow_handler *handler = container_of(flow_id,
    641							  struct mlx5_ib_flow_handler,
    642							  ibflow);
    643	struct mlx5_ib_flow_handler *iter, *tmp;
    644	struct mlx5_ib_dev *dev = handler->dev;
    645
    646	mutex_lock(&dev->flow_db->lock);
    647
    648	list_for_each_entry_safe(iter, tmp, &handler->list, list) {
    649		mlx5_del_flow_rules(iter->rule);
    650		put_flow_table(dev, iter->prio, true);
    651		list_del(&iter->list);
    652		kfree(iter);
    653	}
    654
    655	mlx5_del_flow_rules(handler->rule);
    656	put_flow_table(dev, handler->prio, true);
    657	mlx5_ib_counters_clear_description(handler->ibcounters);
    658	mutex_unlock(&dev->flow_db->lock);
    659	if (handler->flow_matcher)
    660		atomic_dec(&handler->flow_matcher->usecnt);
    661	kfree(handler);
    662
    663	return 0;
    664}
    665
    666static int ib_prio_to_core_prio(unsigned int priority, bool dont_trap)
    667{
    668	priority *= 2;
    669	if (!dont_trap)
    670		priority++;
    671	return priority;
    672}
    673
    674enum flow_table_type {
    675	MLX5_IB_FT_RX,
    676	MLX5_IB_FT_TX
    677};
    678
    679#define MLX5_FS_MAX_TYPES	 6
    680#define MLX5_FS_MAX_ENTRIES	 BIT(16)
    681
    682static struct mlx5_ib_flow_prio *_get_prio(struct mlx5_flow_namespace *ns,
    683					   struct mlx5_ib_flow_prio *prio,
    684					   int priority,
    685					   int num_entries, int num_groups,
    686					   u32 flags)
    687{
    688	struct mlx5_flow_table_attr ft_attr = {};
    689	struct mlx5_flow_table *ft;
    690
    691	ft_attr.prio = priority;
    692	ft_attr.max_fte = num_entries;
    693	ft_attr.flags = flags;
    694	ft_attr.autogroup.max_num_groups = num_groups;
    695	ft = mlx5_create_auto_grouped_flow_table(ns, &ft_attr);
    696	if (IS_ERR(ft))
    697		return ERR_CAST(ft);
    698
    699	prio->flow_table = ft;
    700	prio->refcount = 0;
    701	return prio;
    702}
    703
    704static struct mlx5_ib_flow_prio *get_flow_table(struct mlx5_ib_dev *dev,
    705						struct ib_flow_attr *flow_attr,
    706						enum flow_table_type ft_type)
    707{
    708	bool dont_trap = flow_attr->flags & IB_FLOW_ATTR_FLAGS_DONT_TRAP;
    709	struct mlx5_flow_namespace *ns = NULL;
    710	enum mlx5_flow_namespace_type fn_type;
    711	struct mlx5_ib_flow_prio *prio;
    712	struct mlx5_flow_table *ft;
    713	int max_table_size;
    714	int num_entries;
    715	int num_groups;
    716	bool esw_encap;
    717	u32 flags = 0;
    718	int priority;
    719
    720	max_table_size = BIT(MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev,
    721						       log_max_ft_size));
    722	esw_encap = mlx5_eswitch_get_encap_mode(dev->mdev) !=
    723		DEVLINK_ESWITCH_ENCAP_MODE_NONE;
    724	switch (flow_attr->type) {
    725	case IB_FLOW_ATTR_NORMAL:
    726		if (flow_is_multicast_only(flow_attr) && !dont_trap)
    727			priority = MLX5_IB_FLOW_MCAST_PRIO;
    728		else
    729			priority = ib_prio_to_core_prio(flow_attr->priority,
    730							dont_trap);
    731		if (ft_type == MLX5_IB_FT_RX) {
    732			fn_type = MLX5_FLOW_NAMESPACE_BYPASS;
    733			prio = &dev->flow_db->prios[priority];
    734			if (!dev->is_rep && !esw_encap &&
    735			    MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, decap))
    736				flags |= MLX5_FLOW_TABLE_TUNNEL_EN_DECAP;
    737			if (!dev->is_rep && !esw_encap &&
    738			    MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev,
    739						      reformat_l3_tunnel_to_l2))
    740				flags |= MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT;
    741		} else {
    742			max_table_size = BIT(MLX5_CAP_FLOWTABLE_NIC_TX(
    743				dev->mdev, log_max_ft_size));
    744			fn_type = MLX5_FLOW_NAMESPACE_EGRESS;
    745			prio = &dev->flow_db->egress_prios[priority];
    746			if (!dev->is_rep && !esw_encap &&
    747			    MLX5_CAP_FLOWTABLE_NIC_TX(dev->mdev, reformat))
    748				flags |= MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT;
    749		}
    750		ns = mlx5_get_flow_namespace(dev->mdev, fn_type);
    751		num_entries = MLX5_FS_MAX_ENTRIES;
    752		num_groups = MLX5_FS_MAX_TYPES;
    753		break;
    754	case IB_FLOW_ATTR_ALL_DEFAULT:
    755	case IB_FLOW_ATTR_MC_DEFAULT:
    756		ns = mlx5_get_flow_namespace(dev->mdev,
    757					     MLX5_FLOW_NAMESPACE_LEFTOVERS);
    758		build_leftovers_ft_param(&priority, &num_entries, &num_groups);
    759		prio = &dev->flow_db->prios[MLX5_IB_FLOW_LEFTOVERS_PRIO];
    760		break;
    761	case IB_FLOW_ATTR_SNIFFER:
    762		if (!MLX5_CAP_FLOWTABLE(dev->mdev,
    763					allow_sniffer_and_nic_rx_shared_tir))
    764			return ERR_PTR(-EOPNOTSUPP);
    765
    766		ns = mlx5_get_flow_namespace(
    767			dev->mdev, ft_type == MLX5_IB_FT_RX ?
    768					   MLX5_FLOW_NAMESPACE_SNIFFER_RX :
    769					   MLX5_FLOW_NAMESPACE_SNIFFER_TX);
    770
    771		prio = &dev->flow_db->sniffer[ft_type];
    772		priority = 0;
    773		num_entries = 1;
    774		num_groups = 1;
    775		break;
    776	default:
    777		break;
    778	}
    779
    780	if (!ns)
    781		return ERR_PTR(-EOPNOTSUPP);
    782
    783	max_table_size = min_t(int, num_entries, max_table_size);
    784
    785	ft = prio->flow_table;
    786	if (!ft)
    787		return _get_prio(ns, prio, priority, max_table_size, num_groups,
    788				 flags);
    789
    790	return prio;
    791}
    792
    793enum {
    794	RDMA_RX_ECN_OPCOUNTER_PRIO,
    795	RDMA_RX_CNP_OPCOUNTER_PRIO,
    796};
    797
    798enum {
    799	RDMA_TX_CNP_OPCOUNTER_PRIO,
    800};
    801
    802static int set_vhca_port_spec(struct mlx5_ib_dev *dev, u32 port_num,
    803			      struct mlx5_flow_spec *spec)
    804{
    805	if (!MLX5_CAP_FLOWTABLE_RDMA_RX(dev->mdev,
    806					ft_field_support.source_vhca_port) ||
    807	    !MLX5_CAP_FLOWTABLE_RDMA_TX(dev->mdev,
    808					ft_field_support.source_vhca_port))
    809		return -EOPNOTSUPP;
    810
    811	MLX5_SET_TO_ONES(fte_match_param, &spec->match_criteria,
    812			 misc_parameters.source_vhca_port);
    813	MLX5_SET(fte_match_param, &spec->match_value,
    814		 misc_parameters.source_vhca_port, port_num);
    815
    816	return 0;
    817}
    818
    819static int set_ecn_ce_spec(struct mlx5_ib_dev *dev, u32 port_num,
    820			   struct mlx5_flow_spec *spec, int ipv)
    821{
    822	if (!MLX5_CAP_FLOWTABLE_RDMA_RX(dev->mdev,
    823					ft_field_support.outer_ip_version))
    824		return -EOPNOTSUPP;
    825
    826	if (mlx5_core_mp_enabled(dev->mdev) &&
    827	    set_vhca_port_spec(dev, port_num, spec))
    828		return -EOPNOTSUPP;
    829
    830	MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria,
    831			 outer_headers.ip_ecn);
    832	MLX5_SET(fte_match_param, spec->match_value, outer_headers.ip_ecn,
    833		 INET_ECN_CE);
    834	MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria,
    835			 outer_headers.ip_version);
    836	MLX5_SET(fte_match_param, spec->match_value, outer_headers.ip_version,
    837		 ipv);
    838
    839	spec->match_criteria_enable =
    840		get_match_criteria_enable(spec->match_criteria);
    841
    842	return 0;
    843}
    844
    845static int set_cnp_spec(struct mlx5_ib_dev *dev, u32 port_num,
    846			struct mlx5_flow_spec *spec)
    847{
    848	if (mlx5_core_mp_enabled(dev->mdev) &&
    849	    set_vhca_port_spec(dev, port_num, spec))
    850		return -EOPNOTSUPP;
    851
    852	MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria,
    853			 misc_parameters.bth_opcode);
    854	MLX5_SET(fte_match_param, spec->match_value, misc_parameters.bth_opcode,
    855		 IB_BTH_OPCODE_CNP);
    856
    857	spec->match_criteria_enable =
    858		get_match_criteria_enable(spec->match_criteria);
    859
    860	return 0;
    861}
    862
    863int mlx5_ib_fs_add_op_fc(struct mlx5_ib_dev *dev, u32 port_num,
    864			 struct mlx5_ib_op_fc *opfc,
    865			 enum mlx5_ib_optional_counter_type type)
    866{
    867	enum mlx5_flow_namespace_type fn_type;
    868	int priority, i, err, spec_num;
    869	struct mlx5_flow_act flow_act = {};
    870	struct mlx5_flow_destination dst;
    871	struct mlx5_flow_namespace *ns;
    872	struct mlx5_ib_flow_prio *prio;
    873	struct mlx5_flow_spec *spec;
    874
    875	spec = kcalloc(MAX_OPFC_RULES, sizeof(*spec), GFP_KERNEL);
    876	if (!spec)
    877		return -ENOMEM;
    878
    879	switch (type) {
    880	case MLX5_IB_OPCOUNTER_CC_RX_CE_PKTS:
    881		if (set_ecn_ce_spec(dev, port_num, &spec[0],
    882				    MLX5_FS_IPV4_VERSION) ||
    883		    set_ecn_ce_spec(dev, port_num, &spec[1],
    884				    MLX5_FS_IPV6_VERSION)) {
    885			err = -EOPNOTSUPP;
    886			goto free;
    887		}
    888		spec_num = 2;
    889		fn_type = MLX5_FLOW_NAMESPACE_RDMA_RX_COUNTERS;
    890		priority = RDMA_RX_ECN_OPCOUNTER_PRIO;
    891		break;
    892
    893	case MLX5_IB_OPCOUNTER_CC_RX_CNP_PKTS:
    894		if (!MLX5_CAP_FLOWTABLE(dev->mdev,
    895					ft_field_support_2_nic_receive_rdma.bth_opcode) ||
    896		    set_cnp_spec(dev, port_num, &spec[0])) {
    897			err = -EOPNOTSUPP;
    898			goto free;
    899		}
    900		spec_num = 1;
    901		fn_type = MLX5_FLOW_NAMESPACE_RDMA_RX_COUNTERS;
    902		priority = RDMA_RX_CNP_OPCOUNTER_PRIO;
    903		break;
    904
    905	case MLX5_IB_OPCOUNTER_CC_TX_CNP_PKTS:
    906		if (!MLX5_CAP_FLOWTABLE(dev->mdev,
    907					ft_field_support_2_nic_transmit_rdma.bth_opcode) ||
    908		    set_cnp_spec(dev, port_num, &spec[0])) {
    909			err = -EOPNOTSUPP;
    910			goto free;
    911		}
    912		spec_num = 1;
    913		fn_type = MLX5_FLOW_NAMESPACE_RDMA_TX_COUNTERS;
    914		priority = RDMA_TX_CNP_OPCOUNTER_PRIO;
    915		break;
    916
    917	default:
    918		err = -EOPNOTSUPP;
    919		goto free;
    920	}
    921
    922	ns = mlx5_get_flow_namespace(dev->mdev, fn_type);
    923	if (!ns) {
    924		err = -EOPNOTSUPP;
    925		goto free;
    926	}
    927
    928	prio = &dev->flow_db->opfcs[type];
    929	if (!prio->flow_table) {
    930		prio = _get_prio(ns, prio, priority,
    931				 dev->num_ports * MAX_OPFC_RULES, 1, 0);
    932		if (IS_ERR(prio)) {
    933			err = PTR_ERR(prio);
    934			goto free;
    935		}
    936	}
    937
    938	dst.type = MLX5_FLOW_DESTINATION_TYPE_COUNTER;
    939	dst.counter_id = mlx5_fc_id(opfc->fc);
    940
    941	flow_act.action =
    942		MLX5_FLOW_CONTEXT_ACTION_COUNT | MLX5_FLOW_CONTEXT_ACTION_ALLOW;
    943
    944	for (i = 0; i < spec_num; i++) {
    945		opfc->rule[i] = mlx5_add_flow_rules(prio->flow_table, &spec[i],
    946						    &flow_act, &dst, 1);
    947		if (IS_ERR(opfc->rule[i])) {
    948			err = PTR_ERR(opfc->rule[i]);
    949			goto del_rules;
    950		}
    951	}
    952	prio->refcount += spec_num;
    953	kfree(spec);
    954
    955	return 0;
    956
    957del_rules:
    958	for (i -= 1; i >= 0; i--)
    959		mlx5_del_flow_rules(opfc->rule[i]);
    960	put_flow_table(dev, prio, false);
    961free:
    962	kfree(spec);
    963	return err;
    964}
    965
    966void mlx5_ib_fs_remove_op_fc(struct mlx5_ib_dev *dev,
    967			     struct mlx5_ib_op_fc *opfc,
    968			     enum mlx5_ib_optional_counter_type type)
    969{
    970	int i;
    971
    972	for (i = 0; i < MAX_OPFC_RULES && opfc->rule[i]; i++) {
    973		mlx5_del_flow_rules(opfc->rule[i]);
    974		put_flow_table(dev, &dev->flow_db->opfcs[type], true);
    975	}
    976}
    977
    978static void set_underlay_qp(struct mlx5_ib_dev *dev,
    979			    struct mlx5_flow_spec *spec,
    980			    u32 underlay_qpn)
    981{
    982	void *misc_params_c = MLX5_ADDR_OF(fte_match_param,
    983					   spec->match_criteria,
    984					   misc_parameters);
    985	void *misc_params_v = MLX5_ADDR_OF(fte_match_param, spec->match_value,
    986					   misc_parameters);
    987
    988	if (underlay_qpn &&
    989	    MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev,
    990				      ft_field_support.bth_dst_qp)) {
    991		MLX5_SET(fte_match_set_misc,
    992			 misc_params_v, bth_dst_qp, underlay_qpn);
    993		MLX5_SET(fte_match_set_misc,
    994			 misc_params_c, bth_dst_qp, 0xffffff);
    995	}
    996}
    997
    998static void mlx5_ib_set_rule_source_port(struct mlx5_ib_dev *dev,
    999					 struct mlx5_flow_spec *spec,
   1000					 struct mlx5_eswitch_rep *rep)
   1001{
   1002	struct mlx5_eswitch *esw = dev->mdev->priv.eswitch;
   1003	void *misc;
   1004
   1005	if (mlx5_eswitch_vport_match_metadata_enabled(esw)) {
   1006		misc = MLX5_ADDR_OF(fte_match_param, spec->match_value,
   1007				    misc_parameters_2);
   1008
   1009		MLX5_SET(fte_match_set_misc2, misc, metadata_reg_c_0,
   1010			 mlx5_eswitch_get_vport_metadata_for_match(rep->esw,
   1011								   rep->vport));
   1012		misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
   1013				    misc_parameters_2);
   1014
   1015		MLX5_SET(fte_match_set_misc2, misc, metadata_reg_c_0,
   1016			 mlx5_eswitch_get_vport_metadata_mask());
   1017	} else {
   1018		misc = MLX5_ADDR_OF(fte_match_param, spec->match_value,
   1019				    misc_parameters);
   1020
   1021		MLX5_SET(fte_match_set_misc, misc, source_port, rep->vport);
   1022
   1023		misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
   1024				    misc_parameters);
   1025
   1026		MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port);
   1027	}
   1028}
   1029
   1030static struct mlx5_ib_flow_handler *_create_flow_rule(struct mlx5_ib_dev *dev,
   1031						      struct mlx5_ib_flow_prio *ft_prio,
   1032						      const struct ib_flow_attr *flow_attr,
   1033						      struct mlx5_flow_destination *dst,
   1034						      u32 underlay_qpn,
   1035						      struct mlx5_ib_create_flow *ucmd)
   1036{
   1037	struct mlx5_flow_table	*ft = ft_prio->flow_table;
   1038	struct mlx5_ib_flow_handler *handler;
   1039	struct mlx5_flow_act flow_act = {};
   1040	struct mlx5_flow_spec *spec;
   1041	struct mlx5_flow_destination dest_arr[2] = {};
   1042	struct mlx5_flow_destination *rule_dst = dest_arr;
   1043	const void *ib_flow = (const void *)flow_attr + sizeof(*flow_attr);
   1044	unsigned int spec_index;
   1045	u32 prev_type = 0;
   1046	int err = 0;
   1047	int dest_num = 0;
   1048	bool is_egress = flow_attr->flags & IB_FLOW_ATTR_FLAGS_EGRESS;
   1049
   1050	if (!is_valid_attr(dev->mdev, flow_attr))
   1051		return ERR_PTR(-EINVAL);
   1052
   1053	if (dev->is_rep && is_egress)
   1054		return ERR_PTR(-EINVAL);
   1055
   1056	spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
   1057	handler = kzalloc(sizeof(*handler), GFP_KERNEL);
   1058	if (!handler || !spec) {
   1059		err = -ENOMEM;
   1060		goto free;
   1061	}
   1062
   1063	INIT_LIST_HEAD(&handler->list);
   1064
   1065	for (spec_index = 0; spec_index < flow_attr->num_of_specs; spec_index++) {
   1066		err = parse_flow_attr(dev->mdev, spec,
   1067				      ib_flow, flow_attr, &flow_act,
   1068				      prev_type);
   1069		if (err < 0)
   1070			goto free;
   1071
   1072		prev_type = ((union ib_flow_spec *)ib_flow)->type;
   1073		ib_flow += ((union ib_flow_spec *)ib_flow)->size;
   1074	}
   1075
   1076	if (dst && !(flow_act.action & MLX5_FLOW_CONTEXT_ACTION_DROP)) {
   1077		memcpy(&dest_arr[0], dst, sizeof(*dst));
   1078		dest_num++;
   1079	}
   1080
   1081	if (!flow_is_multicast_only(flow_attr))
   1082		set_underlay_qp(dev, spec, underlay_qpn);
   1083
   1084	if (dev->is_rep && flow_attr->type != IB_FLOW_ATTR_SNIFFER) {
   1085		struct mlx5_eswitch_rep *rep;
   1086
   1087		rep = dev->port[flow_attr->port - 1].rep;
   1088		if (!rep) {
   1089			err = -EINVAL;
   1090			goto free;
   1091		}
   1092
   1093		mlx5_ib_set_rule_source_port(dev, spec, rep);
   1094	}
   1095
   1096	spec->match_criteria_enable = get_match_criteria_enable(spec->match_criteria);
   1097
   1098	if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_COUNT) {
   1099		struct mlx5_ib_mcounters *mcounters;
   1100
   1101		err = mlx5_ib_flow_counters_set_data(flow_act.counters, ucmd);
   1102		if (err)
   1103			goto free;
   1104
   1105		mcounters = to_mcounters(flow_act.counters);
   1106		handler->ibcounters = flow_act.counters;
   1107		dest_arr[dest_num].type =
   1108			MLX5_FLOW_DESTINATION_TYPE_COUNTER;
   1109		dest_arr[dest_num].counter_id =
   1110			mlx5_fc_id(mcounters->hw_cntrs_hndl);
   1111		dest_num++;
   1112	}
   1113
   1114	if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_DROP) {
   1115		if (!dest_num)
   1116			rule_dst = NULL;
   1117	} else {
   1118		if (flow_attr->flags & IB_FLOW_ATTR_FLAGS_DONT_TRAP)
   1119			flow_act.action |=
   1120				MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO;
   1121		if (is_egress)
   1122			flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_ALLOW;
   1123		else if (dest_num)
   1124			flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
   1125	}
   1126
   1127	if ((spec->flow_context.flags & FLOW_CONTEXT_HAS_TAG)  &&
   1128	    (flow_attr->type == IB_FLOW_ATTR_ALL_DEFAULT ||
   1129	     flow_attr->type == IB_FLOW_ATTR_MC_DEFAULT)) {
   1130		mlx5_ib_warn(dev, "Flow tag %u and attribute type %x isn't allowed in leftovers\n",
   1131			     spec->flow_context.flow_tag, flow_attr->type);
   1132		err = -EINVAL;
   1133		goto free;
   1134	}
   1135	handler->rule = mlx5_add_flow_rules(ft, spec,
   1136					    &flow_act,
   1137					    rule_dst, dest_num);
   1138
   1139	if (IS_ERR(handler->rule)) {
   1140		err = PTR_ERR(handler->rule);
   1141		goto free;
   1142	}
   1143
   1144	ft_prio->refcount++;
   1145	handler->prio = ft_prio;
   1146	handler->dev = dev;
   1147
   1148	ft_prio->flow_table = ft;
   1149free:
   1150	if (err && handler) {
   1151		mlx5_ib_counters_clear_description(handler->ibcounters);
   1152		kfree(handler);
   1153	}
   1154	kvfree(spec);
   1155	return err ? ERR_PTR(err) : handler;
   1156}
   1157
   1158static struct mlx5_ib_flow_handler *create_flow_rule(struct mlx5_ib_dev *dev,
   1159						     struct mlx5_ib_flow_prio *ft_prio,
   1160						     const struct ib_flow_attr *flow_attr,
   1161						     struct mlx5_flow_destination *dst)
   1162{
   1163	return _create_flow_rule(dev, ft_prio, flow_attr, dst, 0, NULL);
   1164}
   1165
   1166enum {
   1167	LEFTOVERS_MC,
   1168	LEFTOVERS_UC,
   1169};
   1170
   1171static struct mlx5_ib_flow_handler *create_leftovers_rule(struct mlx5_ib_dev *dev,
   1172							  struct mlx5_ib_flow_prio *ft_prio,
   1173							  struct ib_flow_attr *flow_attr,
   1174							  struct mlx5_flow_destination *dst)
   1175{
   1176	struct mlx5_ib_flow_handler *handler_ucast = NULL;
   1177	struct mlx5_ib_flow_handler *handler = NULL;
   1178
   1179	static struct {
   1180		struct ib_flow_attr	flow_attr;
   1181		struct ib_flow_spec_eth eth_flow;
   1182	} leftovers_specs[] = {
   1183		[LEFTOVERS_MC] = {
   1184			.flow_attr = {
   1185				.num_of_specs = 1,
   1186				.size = sizeof(leftovers_specs[0])
   1187			},
   1188			.eth_flow = {
   1189				.type = IB_FLOW_SPEC_ETH,
   1190				.size = sizeof(struct ib_flow_spec_eth),
   1191				.mask = {.dst_mac = {0x1} },
   1192				.val =  {.dst_mac = {0x1} }
   1193			}
   1194		},
   1195		[LEFTOVERS_UC] = {
   1196			.flow_attr = {
   1197				.num_of_specs = 1,
   1198				.size = sizeof(leftovers_specs[0])
   1199			},
   1200			.eth_flow = {
   1201				.type = IB_FLOW_SPEC_ETH,
   1202				.size = sizeof(struct ib_flow_spec_eth),
   1203				.mask = {.dst_mac = {0x1} },
   1204				.val = {.dst_mac = {} }
   1205			}
   1206		}
   1207	};
   1208
   1209	handler = create_flow_rule(dev, ft_prio,
   1210				   &leftovers_specs[LEFTOVERS_MC].flow_attr,
   1211				   dst);
   1212	if (!IS_ERR(handler) &&
   1213	    flow_attr->type == IB_FLOW_ATTR_ALL_DEFAULT) {
   1214		handler_ucast = create_flow_rule(dev, ft_prio,
   1215						 &leftovers_specs[LEFTOVERS_UC].flow_attr,
   1216						 dst);
   1217		if (IS_ERR(handler_ucast)) {
   1218			mlx5_del_flow_rules(handler->rule);
   1219			ft_prio->refcount--;
   1220			kfree(handler);
   1221			handler = handler_ucast;
   1222		} else {
   1223			list_add(&handler_ucast->list, &handler->list);
   1224		}
   1225	}
   1226
   1227	return handler;
   1228}
   1229
   1230static struct mlx5_ib_flow_handler *create_sniffer_rule(struct mlx5_ib_dev *dev,
   1231							struct mlx5_ib_flow_prio *ft_rx,
   1232							struct mlx5_ib_flow_prio *ft_tx,
   1233							struct mlx5_flow_destination *dst)
   1234{
   1235	struct mlx5_ib_flow_handler *handler_rx;
   1236	struct mlx5_ib_flow_handler *handler_tx;
   1237	int err;
   1238	static const struct ib_flow_attr flow_attr  = {
   1239		.num_of_specs = 0,
   1240		.type = IB_FLOW_ATTR_SNIFFER,
   1241		.size = sizeof(flow_attr)
   1242	};
   1243
   1244	handler_rx = create_flow_rule(dev, ft_rx, &flow_attr, dst);
   1245	if (IS_ERR(handler_rx)) {
   1246		err = PTR_ERR(handler_rx);
   1247		goto err;
   1248	}
   1249
   1250	handler_tx = create_flow_rule(dev, ft_tx, &flow_attr, dst);
   1251	if (IS_ERR(handler_tx)) {
   1252		err = PTR_ERR(handler_tx);
   1253		goto err_tx;
   1254	}
   1255
   1256	list_add(&handler_tx->list, &handler_rx->list);
   1257
   1258	return handler_rx;
   1259
   1260err_tx:
   1261	mlx5_del_flow_rules(handler_rx->rule);
   1262	ft_rx->refcount--;
   1263	kfree(handler_rx);
   1264err:
   1265	return ERR_PTR(err);
   1266}
   1267
   1268static struct ib_flow *mlx5_ib_create_flow(struct ib_qp *qp,
   1269					   struct ib_flow_attr *flow_attr,
   1270					   struct ib_udata *udata)
   1271{
   1272	struct mlx5_ib_dev *dev = to_mdev(qp->device);
   1273	struct mlx5_ib_qp *mqp = to_mqp(qp);
   1274	struct mlx5_ib_flow_handler *handler = NULL;
   1275	struct mlx5_flow_destination *dst = NULL;
   1276	struct mlx5_ib_flow_prio *ft_prio_tx = NULL;
   1277	struct mlx5_ib_flow_prio *ft_prio;
   1278	bool is_egress = flow_attr->flags & IB_FLOW_ATTR_FLAGS_EGRESS;
   1279	struct mlx5_ib_create_flow *ucmd = NULL, ucmd_hdr;
   1280	size_t min_ucmd_sz, required_ucmd_sz;
   1281	int err;
   1282	int underlay_qpn;
   1283
   1284	if (udata && udata->inlen) {
   1285		min_ucmd_sz = offsetofend(struct mlx5_ib_create_flow, reserved);
   1286		if (udata->inlen < min_ucmd_sz)
   1287			return ERR_PTR(-EOPNOTSUPP);
   1288
   1289		err = ib_copy_from_udata(&ucmd_hdr, udata, min_ucmd_sz);
   1290		if (err)
   1291			return ERR_PTR(err);
   1292
   1293		/* currently supports only one counters data */
   1294		if (ucmd_hdr.ncounters_data > 1)
   1295			return ERR_PTR(-EINVAL);
   1296
   1297		required_ucmd_sz = min_ucmd_sz +
   1298			sizeof(struct mlx5_ib_flow_counters_data) *
   1299			ucmd_hdr.ncounters_data;
   1300		if (udata->inlen > required_ucmd_sz &&
   1301		    !ib_is_udata_cleared(udata, required_ucmd_sz,
   1302					 udata->inlen - required_ucmd_sz))
   1303			return ERR_PTR(-EOPNOTSUPP);
   1304
   1305		ucmd = kzalloc(required_ucmd_sz, GFP_KERNEL);
   1306		if (!ucmd)
   1307			return ERR_PTR(-ENOMEM);
   1308
   1309		err = ib_copy_from_udata(ucmd, udata, required_ucmd_sz);
   1310		if (err)
   1311			goto free_ucmd;
   1312	}
   1313
   1314	if (flow_attr->priority > MLX5_IB_FLOW_LAST_PRIO) {
   1315		err = -ENOMEM;
   1316		goto free_ucmd;
   1317	}
   1318
   1319	if (flow_attr->flags &
   1320	    ~(IB_FLOW_ATTR_FLAGS_DONT_TRAP | IB_FLOW_ATTR_FLAGS_EGRESS)) {
   1321		err = -EINVAL;
   1322		goto free_ucmd;
   1323	}
   1324
   1325	if (is_egress &&
   1326	    (flow_attr->type == IB_FLOW_ATTR_ALL_DEFAULT ||
   1327	     flow_attr->type == IB_FLOW_ATTR_MC_DEFAULT)) {
   1328		err = -EINVAL;
   1329		goto free_ucmd;
   1330	}
   1331
   1332	dst = kzalloc(sizeof(*dst), GFP_KERNEL);
   1333	if (!dst) {
   1334		err = -ENOMEM;
   1335		goto free_ucmd;
   1336	}
   1337
   1338	mutex_lock(&dev->flow_db->lock);
   1339
   1340	ft_prio = get_flow_table(dev, flow_attr,
   1341				 is_egress ? MLX5_IB_FT_TX : MLX5_IB_FT_RX);
   1342	if (IS_ERR(ft_prio)) {
   1343		err = PTR_ERR(ft_prio);
   1344		goto unlock;
   1345	}
   1346	if (flow_attr->type == IB_FLOW_ATTR_SNIFFER) {
   1347		ft_prio_tx = get_flow_table(dev, flow_attr, MLX5_IB_FT_TX);
   1348		if (IS_ERR(ft_prio_tx)) {
   1349			err = PTR_ERR(ft_prio_tx);
   1350			ft_prio_tx = NULL;
   1351			goto destroy_ft;
   1352		}
   1353	}
   1354
   1355	if (is_egress) {
   1356		dst->type = MLX5_FLOW_DESTINATION_TYPE_PORT;
   1357	} else {
   1358		dst->type = MLX5_FLOW_DESTINATION_TYPE_TIR;
   1359		if (mqp->is_rss)
   1360			dst->tir_num = mqp->rss_qp.tirn;
   1361		else
   1362			dst->tir_num = mqp->raw_packet_qp.rq.tirn;
   1363	}
   1364
   1365	switch (flow_attr->type) {
   1366	case IB_FLOW_ATTR_NORMAL:
   1367		underlay_qpn = (mqp->flags & IB_QP_CREATE_SOURCE_QPN) ?
   1368				       mqp->underlay_qpn :
   1369				       0;
   1370		handler = _create_flow_rule(dev, ft_prio, flow_attr, dst,
   1371					    underlay_qpn, ucmd);
   1372		break;
   1373	case IB_FLOW_ATTR_ALL_DEFAULT:
   1374	case IB_FLOW_ATTR_MC_DEFAULT:
   1375		handler = create_leftovers_rule(dev, ft_prio, flow_attr, dst);
   1376		break;
   1377	case IB_FLOW_ATTR_SNIFFER:
   1378		handler = create_sniffer_rule(dev, ft_prio, ft_prio_tx, dst);
   1379		break;
   1380	default:
   1381		err = -EINVAL;
   1382		goto destroy_ft;
   1383	}
   1384
   1385	if (IS_ERR(handler)) {
   1386		err = PTR_ERR(handler);
   1387		handler = NULL;
   1388		goto destroy_ft;
   1389	}
   1390
   1391	mutex_unlock(&dev->flow_db->lock);
   1392	kfree(dst);
   1393	kfree(ucmd);
   1394
   1395	return &handler->ibflow;
   1396
   1397destroy_ft:
   1398	put_flow_table(dev, ft_prio, false);
   1399	if (ft_prio_tx)
   1400		put_flow_table(dev, ft_prio_tx, false);
   1401unlock:
   1402	mutex_unlock(&dev->flow_db->lock);
   1403	kfree(dst);
   1404free_ucmd:
   1405	kfree(ucmd);
   1406	return ERR_PTR(err);
   1407}
   1408
   1409static struct mlx5_ib_flow_prio *
   1410_get_flow_table(struct mlx5_ib_dev *dev,
   1411		struct mlx5_ib_flow_matcher *fs_matcher,
   1412		bool mcast)
   1413{
   1414	struct mlx5_flow_namespace *ns = NULL;
   1415	struct mlx5_ib_flow_prio *prio = NULL;
   1416	int max_table_size = 0;
   1417	bool esw_encap;
   1418	u32 flags = 0;
   1419	int priority;
   1420
   1421	if (mcast)
   1422		priority = MLX5_IB_FLOW_MCAST_PRIO;
   1423	else
   1424		priority = ib_prio_to_core_prio(fs_matcher->priority, false);
   1425
   1426	esw_encap = mlx5_eswitch_get_encap_mode(dev->mdev) !=
   1427		DEVLINK_ESWITCH_ENCAP_MODE_NONE;
   1428	switch (fs_matcher->ns_type) {
   1429	case MLX5_FLOW_NAMESPACE_BYPASS:
   1430		max_table_size = BIT(
   1431			MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, log_max_ft_size));
   1432		if (MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, decap) && !esw_encap)
   1433			flags |= MLX5_FLOW_TABLE_TUNNEL_EN_DECAP;
   1434		if (MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev,
   1435					      reformat_l3_tunnel_to_l2) &&
   1436		    !esw_encap)
   1437			flags |= MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT;
   1438		break;
   1439	case MLX5_FLOW_NAMESPACE_EGRESS:
   1440		max_table_size = BIT(
   1441			MLX5_CAP_FLOWTABLE_NIC_TX(dev->mdev, log_max_ft_size));
   1442		if (MLX5_CAP_FLOWTABLE_NIC_TX(dev->mdev, reformat) &&
   1443		    !esw_encap)
   1444			flags |= MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT;
   1445		break;
   1446	case MLX5_FLOW_NAMESPACE_FDB_BYPASS:
   1447		max_table_size = BIT(
   1448			MLX5_CAP_ESW_FLOWTABLE_FDB(dev->mdev, log_max_ft_size));
   1449		if (MLX5_CAP_ESW_FLOWTABLE_FDB(dev->mdev, decap) && esw_encap)
   1450			flags |= MLX5_FLOW_TABLE_TUNNEL_EN_DECAP;
   1451		if (MLX5_CAP_ESW_FLOWTABLE_FDB(dev->mdev,
   1452					       reformat_l3_tunnel_to_l2) &&
   1453		    esw_encap)
   1454			flags |= MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT;
   1455		priority = fs_matcher->priority;
   1456		break;
   1457	case MLX5_FLOW_NAMESPACE_RDMA_RX:
   1458		max_table_size = BIT(
   1459			MLX5_CAP_FLOWTABLE_RDMA_RX(dev->mdev, log_max_ft_size));
   1460		priority = fs_matcher->priority;
   1461		break;
   1462	case MLX5_FLOW_NAMESPACE_RDMA_TX:
   1463		max_table_size = BIT(
   1464			MLX5_CAP_FLOWTABLE_RDMA_TX(dev->mdev, log_max_ft_size));
   1465		priority = fs_matcher->priority;
   1466		break;
   1467	default:
   1468		break;
   1469	}
   1470
   1471	max_table_size = min_t(int, max_table_size, MLX5_FS_MAX_ENTRIES);
   1472
   1473	ns = mlx5_get_flow_namespace(dev->mdev, fs_matcher->ns_type);
   1474	if (!ns)
   1475		return ERR_PTR(-EOPNOTSUPP);
   1476
   1477	switch (fs_matcher->ns_type) {
   1478	case MLX5_FLOW_NAMESPACE_BYPASS:
   1479		prio = &dev->flow_db->prios[priority];
   1480		break;
   1481	case MLX5_FLOW_NAMESPACE_EGRESS:
   1482		prio = &dev->flow_db->egress_prios[priority];
   1483		break;
   1484	case MLX5_FLOW_NAMESPACE_FDB_BYPASS:
   1485		prio = &dev->flow_db->fdb[priority];
   1486		break;
   1487	case MLX5_FLOW_NAMESPACE_RDMA_RX:
   1488		prio = &dev->flow_db->rdma_rx[priority];
   1489		break;
   1490	case MLX5_FLOW_NAMESPACE_RDMA_TX:
   1491		prio = &dev->flow_db->rdma_tx[priority];
   1492		break;
   1493	default: return ERR_PTR(-EINVAL);
   1494	}
   1495
   1496	if (!prio)
   1497		return ERR_PTR(-EINVAL);
   1498
   1499	if (prio->flow_table)
   1500		return prio;
   1501
   1502	return _get_prio(ns, prio, priority, max_table_size,
   1503			 MLX5_FS_MAX_TYPES, flags);
   1504}
   1505
   1506static struct mlx5_ib_flow_handler *
   1507_create_raw_flow_rule(struct mlx5_ib_dev *dev,
   1508		      struct mlx5_ib_flow_prio *ft_prio,
   1509		      struct mlx5_flow_destination *dst,
   1510		      struct mlx5_ib_flow_matcher  *fs_matcher,
   1511		      struct mlx5_flow_context *flow_context,
   1512		      struct mlx5_flow_act *flow_act,
   1513		      void *cmd_in, int inlen,
   1514		      int dst_num)
   1515{
   1516	struct mlx5_ib_flow_handler *handler;
   1517	struct mlx5_flow_spec *spec;
   1518	struct mlx5_flow_table *ft = ft_prio->flow_table;
   1519	int err = 0;
   1520
   1521	spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
   1522	handler = kzalloc(sizeof(*handler), GFP_KERNEL);
   1523	if (!handler || !spec) {
   1524		err = -ENOMEM;
   1525		goto free;
   1526	}
   1527
   1528	INIT_LIST_HEAD(&handler->list);
   1529
   1530	memcpy(spec->match_value, cmd_in, inlen);
   1531	memcpy(spec->match_criteria, fs_matcher->matcher_mask.match_params,
   1532	       fs_matcher->mask_len);
   1533	spec->match_criteria_enable = fs_matcher->match_criteria_enable;
   1534	spec->flow_context = *flow_context;
   1535
   1536	handler->rule = mlx5_add_flow_rules(ft, spec,
   1537					    flow_act, dst, dst_num);
   1538
   1539	if (IS_ERR(handler->rule)) {
   1540		err = PTR_ERR(handler->rule);
   1541		goto free;
   1542	}
   1543
   1544	ft_prio->refcount++;
   1545	handler->prio = ft_prio;
   1546	handler->dev = dev;
   1547	ft_prio->flow_table = ft;
   1548
   1549free:
   1550	if (err)
   1551		kfree(handler);
   1552	kvfree(spec);
   1553	return err ? ERR_PTR(err) : handler;
   1554}
   1555
   1556static bool raw_fs_is_multicast(struct mlx5_ib_flow_matcher *fs_matcher,
   1557				void *match_v)
   1558{
   1559	void *match_c;
   1560	void *match_v_set_lyr_2_4, *match_c_set_lyr_2_4;
   1561	void *dmac, *dmac_mask;
   1562	void *ipv4, *ipv4_mask;
   1563
   1564	if (!(fs_matcher->match_criteria_enable &
   1565	      (1 << MATCH_CRITERIA_ENABLE_OUTER_BIT)))
   1566		return false;
   1567
   1568	match_c = fs_matcher->matcher_mask.match_params;
   1569	match_v_set_lyr_2_4 = MLX5_ADDR_OF(fte_match_param, match_v,
   1570					   outer_headers);
   1571	match_c_set_lyr_2_4 = MLX5_ADDR_OF(fte_match_param, match_c,
   1572					   outer_headers);
   1573
   1574	dmac = MLX5_ADDR_OF(fte_match_set_lyr_2_4, match_v_set_lyr_2_4,
   1575			    dmac_47_16);
   1576	dmac_mask = MLX5_ADDR_OF(fte_match_set_lyr_2_4, match_c_set_lyr_2_4,
   1577				 dmac_47_16);
   1578
   1579	if (is_multicast_ether_addr(dmac) &&
   1580	    is_multicast_ether_addr(dmac_mask))
   1581		return true;
   1582
   1583	ipv4 = MLX5_ADDR_OF(fte_match_set_lyr_2_4, match_v_set_lyr_2_4,
   1584			    dst_ipv4_dst_ipv6.ipv4_layout.ipv4);
   1585
   1586	ipv4_mask = MLX5_ADDR_OF(fte_match_set_lyr_2_4, match_c_set_lyr_2_4,
   1587				 dst_ipv4_dst_ipv6.ipv4_layout.ipv4);
   1588
   1589	if (ipv4_is_multicast(*(__be32 *)(ipv4)) &&
   1590	    ipv4_is_multicast(*(__be32 *)(ipv4_mask)))
   1591		return true;
   1592
   1593	return false;
   1594}
   1595
   1596static struct mlx5_ib_flow_handler *raw_fs_rule_add(
   1597	struct mlx5_ib_dev *dev, struct mlx5_ib_flow_matcher *fs_matcher,
   1598	struct mlx5_flow_context *flow_context, struct mlx5_flow_act *flow_act,
   1599	u32 counter_id, void *cmd_in, int inlen, int dest_id, int dest_type)
   1600{
   1601	struct mlx5_flow_destination *dst;
   1602	struct mlx5_ib_flow_prio *ft_prio;
   1603	struct mlx5_ib_flow_handler *handler;
   1604	int dst_num = 0;
   1605	bool mcast;
   1606	int err;
   1607
   1608	if (fs_matcher->flow_type != MLX5_IB_FLOW_TYPE_NORMAL)
   1609		return ERR_PTR(-EOPNOTSUPP);
   1610
   1611	if (fs_matcher->priority > MLX5_IB_FLOW_LAST_PRIO)
   1612		return ERR_PTR(-ENOMEM);
   1613
   1614	dst = kcalloc(2, sizeof(*dst), GFP_KERNEL);
   1615	if (!dst)
   1616		return ERR_PTR(-ENOMEM);
   1617
   1618	mcast = raw_fs_is_multicast(fs_matcher, cmd_in);
   1619	mutex_lock(&dev->flow_db->lock);
   1620
   1621	ft_prio = _get_flow_table(dev, fs_matcher, mcast);
   1622	if (IS_ERR(ft_prio)) {
   1623		err = PTR_ERR(ft_prio);
   1624		goto unlock;
   1625	}
   1626
   1627	switch (dest_type) {
   1628	case MLX5_FLOW_DESTINATION_TYPE_TIR:
   1629		dst[dst_num].type = dest_type;
   1630		dst[dst_num++].tir_num = dest_id;
   1631		flow_act->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
   1632		break;
   1633	case MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE:
   1634		dst[dst_num].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE_NUM;
   1635		dst[dst_num++].ft_num = dest_id;
   1636		flow_act->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
   1637		break;
   1638	case MLX5_FLOW_DESTINATION_TYPE_PORT:
   1639		dst[dst_num++].type = MLX5_FLOW_DESTINATION_TYPE_PORT;
   1640		flow_act->action |= MLX5_FLOW_CONTEXT_ACTION_ALLOW;
   1641		break;
   1642	default:
   1643		break;
   1644	}
   1645
   1646	if (flow_act->action & MLX5_FLOW_CONTEXT_ACTION_COUNT) {
   1647		dst[dst_num].type = MLX5_FLOW_DESTINATION_TYPE_COUNTER;
   1648		dst[dst_num].counter_id = counter_id;
   1649		dst_num++;
   1650	}
   1651
   1652	handler = _create_raw_flow_rule(dev, ft_prio, dst_num ? dst : NULL,
   1653					fs_matcher, flow_context, flow_act,
   1654					cmd_in, inlen, dst_num);
   1655
   1656	if (IS_ERR(handler)) {
   1657		err = PTR_ERR(handler);
   1658		goto destroy_ft;
   1659	}
   1660
   1661	mutex_unlock(&dev->flow_db->lock);
   1662	atomic_inc(&fs_matcher->usecnt);
   1663	handler->flow_matcher = fs_matcher;
   1664
   1665	kfree(dst);
   1666
   1667	return handler;
   1668
   1669destroy_ft:
   1670	put_flow_table(dev, ft_prio, false);
   1671unlock:
   1672	mutex_unlock(&dev->flow_db->lock);
   1673	kfree(dst);
   1674
   1675	return ERR_PTR(err);
   1676}
   1677
   1678static void destroy_flow_action_raw(struct mlx5_ib_flow_action *maction)
   1679{
   1680	switch (maction->flow_action_raw.sub_type) {
   1681	case MLX5_IB_FLOW_ACTION_MODIFY_HEADER:
   1682		mlx5_modify_header_dealloc(maction->flow_action_raw.dev->mdev,
   1683					   maction->flow_action_raw.modify_hdr);
   1684		break;
   1685	case MLX5_IB_FLOW_ACTION_PACKET_REFORMAT:
   1686		mlx5_packet_reformat_dealloc(maction->flow_action_raw.dev->mdev,
   1687					     maction->flow_action_raw.pkt_reformat);
   1688		break;
   1689	case MLX5_IB_FLOW_ACTION_DECAP:
   1690		break;
   1691	default:
   1692		break;
   1693	}
   1694}
   1695
   1696static int mlx5_ib_destroy_flow_action(struct ib_flow_action *action)
   1697{
   1698	struct mlx5_ib_flow_action *maction = to_mflow_act(action);
   1699
   1700	switch (action->type) {
   1701	case IB_FLOW_ACTION_UNSPECIFIED:
   1702		destroy_flow_action_raw(maction);
   1703		break;
   1704	default:
   1705		WARN_ON(true);
   1706		break;
   1707	}
   1708
   1709	kfree(maction);
   1710	return 0;
   1711}
   1712
   1713static int
   1714mlx5_ib_ft_type_to_namespace(enum mlx5_ib_uapi_flow_table_type table_type,
   1715			     enum mlx5_flow_namespace_type *namespace)
   1716{
   1717	switch (table_type) {
   1718	case MLX5_IB_UAPI_FLOW_TABLE_TYPE_NIC_RX:
   1719		*namespace = MLX5_FLOW_NAMESPACE_BYPASS;
   1720		break;
   1721	case MLX5_IB_UAPI_FLOW_TABLE_TYPE_NIC_TX:
   1722		*namespace = MLX5_FLOW_NAMESPACE_EGRESS;
   1723		break;
   1724	case MLX5_IB_UAPI_FLOW_TABLE_TYPE_FDB:
   1725		*namespace = MLX5_FLOW_NAMESPACE_FDB_BYPASS;
   1726		break;
   1727	case MLX5_IB_UAPI_FLOW_TABLE_TYPE_RDMA_RX:
   1728		*namespace = MLX5_FLOW_NAMESPACE_RDMA_RX;
   1729		break;
   1730	case MLX5_IB_UAPI_FLOW_TABLE_TYPE_RDMA_TX:
   1731		*namespace = MLX5_FLOW_NAMESPACE_RDMA_TX;
   1732		break;
   1733	default:
   1734		return -EINVAL;
   1735	}
   1736
   1737	return 0;
   1738}
   1739
   1740static const struct uverbs_attr_spec mlx5_ib_flow_type[] = {
   1741	[MLX5_IB_FLOW_TYPE_NORMAL] = {
   1742		.type = UVERBS_ATTR_TYPE_PTR_IN,
   1743		.u.ptr = {
   1744			.len = sizeof(u16), /* data is priority */
   1745			.min_len = sizeof(u16),
   1746		}
   1747	},
   1748	[MLX5_IB_FLOW_TYPE_SNIFFER] = {
   1749		.type = UVERBS_ATTR_TYPE_PTR_IN,
   1750		UVERBS_ATTR_NO_DATA(),
   1751	},
   1752	[MLX5_IB_FLOW_TYPE_ALL_DEFAULT] = {
   1753		.type = UVERBS_ATTR_TYPE_PTR_IN,
   1754		UVERBS_ATTR_NO_DATA(),
   1755	},
   1756	[MLX5_IB_FLOW_TYPE_MC_DEFAULT] = {
   1757		.type = UVERBS_ATTR_TYPE_PTR_IN,
   1758		UVERBS_ATTR_NO_DATA(),
   1759	},
   1760};
   1761
   1762static bool is_flow_dest(void *obj, int *dest_id, int *dest_type)
   1763{
   1764	struct devx_obj *devx_obj = obj;
   1765	u16 opcode = MLX5_GET(general_obj_in_cmd_hdr, devx_obj->dinbox, opcode);
   1766
   1767	switch (opcode) {
   1768	case MLX5_CMD_OP_DESTROY_TIR:
   1769		*dest_type = MLX5_FLOW_DESTINATION_TYPE_TIR;
   1770		*dest_id = MLX5_GET(general_obj_in_cmd_hdr, devx_obj->dinbox,
   1771				    obj_id);
   1772		return true;
   1773
   1774	case MLX5_CMD_OP_DESTROY_FLOW_TABLE:
   1775		*dest_type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
   1776		*dest_id = MLX5_GET(destroy_flow_table_in, devx_obj->dinbox,
   1777				    table_id);
   1778		return true;
   1779	default:
   1780		return false;
   1781	}
   1782}
   1783
   1784static int get_dests(struct uverbs_attr_bundle *attrs,
   1785		     struct mlx5_ib_flow_matcher *fs_matcher, int *dest_id,
   1786		     int *dest_type, struct ib_qp **qp, u32 *flags)
   1787{
   1788	bool dest_devx, dest_qp;
   1789	void *devx_obj;
   1790	int err;
   1791
   1792	dest_devx = uverbs_attr_is_valid(attrs,
   1793					 MLX5_IB_ATTR_CREATE_FLOW_DEST_DEVX);
   1794	dest_qp = uverbs_attr_is_valid(attrs,
   1795				       MLX5_IB_ATTR_CREATE_FLOW_DEST_QP);
   1796
   1797	*flags = 0;
   1798	err = uverbs_get_flags32(flags, attrs, MLX5_IB_ATTR_CREATE_FLOW_FLAGS,
   1799				 MLX5_IB_ATTR_CREATE_FLOW_FLAGS_DEFAULT_MISS |
   1800					 MLX5_IB_ATTR_CREATE_FLOW_FLAGS_DROP);
   1801	if (err)
   1802		return err;
   1803
   1804	/* Both flags are not allowed */
   1805	if (*flags & MLX5_IB_ATTR_CREATE_FLOW_FLAGS_DEFAULT_MISS &&
   1806	    *flags & MLX5_IB_ATTR_CREATE_FLOW_FLAGS_DROP)
   1807		return -EINVAL;
   1808
   1809	if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_BYPASS) {
   1810		if (dest_devx && (dest_qp || *flags))
   1811			return -EINVAL;
   1812		else if (dest_qp && *flags)
   1813			return -EINVAL;
   1814	}
   1815
   1816	/* Allow only DEVX object, drop as dest for FDB */
   1817	if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_FDB_BYPASS &&
   1818	    !(dest_devx || (*flags & MLX5_IB_ATTR_CREATE_FLOW_FLAGS_DROP)))
   1819		return -EINVAL;
   1820
   1821	/* Allow only DEVX object or QP as dest when inserting to RDMA_RX */
   1822	if ((fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_RX) &&
   1823	    ((!dest_devx && !dest_qp) || (dest_devx && dest_qp)))
   1824		return -EINVAL;
   1825
   1826	*qp = NULL;
   1827	if (dest_devx) {
   1828		devx_obj =
   1829			uverbs_attr_get_obj(attrs,
   1830					    MLX5_IB_ATTR_CREATE_FLOW_DEST_DEVX);
   1831
   1832		/* Verify that the given DEVX object is a flow
   1833		 * steering destination.
   1834		 */
   1835		if (!is_flow_dest(devx_obj, dest_id, dest_type))
   1836			return -EINVAL;
   1837		/* Allow only flow table as dest when inserting to FDB or RDMA_RX */
   1838		if ((fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_FDB_BYPASS ||
   1839		     fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_RX) &&
   1840		    *dest_type != MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE)
   1841			return -EINVAL;
   1842	} else if (dest_qp) {
   1843		struct mlx5_ib_qp *mqp;
   1844
   1845		*qp = uverbs_attr_get_obj(attrs,
   1846					  MLX5_IB_ATTR_CREATE_FLOW_DEST_QP);
   1847		if (IS_ERR(*qp))
   1848			return PTR_ERR(*qp);
   1849
   1850		if ((*qp)->qp_type != IB_QPT_RAW_PACKET)
   1851			return -EINVAL;
   1852
   1853		mqp = to_mqp(*qp);
   1854		if (mqp->is_rss)
   1855			*dest_id = mqp->rss_qp.tirn;
   1856		else
   1857			*dest_id = mqp->raw_packet_qp.rq.tirn;
   1858		*dest_type = MLX5_FLOW_DESTINATION_TYPE_TIR;
   1859	} else if ((fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_EGRESS ||
   1860		    fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_TX) &&
   1861		   !(*flags & MLX5_IB_ATTR_CREATE_FLOW_FLAGS_DROP)) {
   1862		*dest_type = MLX5_FLOW_DESTINATION_TYPE_PORT;
   1863	}
   1864
   1865	if (*dest_type == MLX5_FLOW_DESTINATION_TYPE_TIR &&
   1866	    (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_EGRESS ||
   1867	     fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_TX))
   1868		return -EINVAL;
   1869
   1870	return 0;
   1871}
   1872
   1873static bool is_flow_counter(void *obj, u32 offset, u32 *counter_id)
   1874{
   1875	struct devx_obj *devx_obj = obj;
   1876	u16 opcode = MLX5_GET(general_obj_in_cmd_hdr, devx_obj->dinbox, opcode);
   1877
   1878	if (opcode == MLX5_CMD_OP_DEALLOC_FLOW_COUNTER) {
   1879
   1880		if (offset && offset >= devx_obj->flow_counter_bulk_size)
   1881			return false;
   1882
   1883		*counter_id = MLX5_GET(dealloc_flow_counter_in,
   1884				       devx_obj->dinbox,
   1885				       flow_counter_id);
   1886		*counter_id += offset;
   1887		return true;
   1888	}
   1889
   1890	return false;
   1891}
   1892
   1893#define MLX5_IB_CREATE_FLOW_MAX_FLOW_ACTIONS 2
   1894static int UVERBS_HANDLER(MLX5_IB_METHOD_CREATE_FLOW)(
   1895	struct uverbs_attr_bundle *attrs)
   1896{
   1897	struct mlx5_flow_context flow_context = {.flow_tag =
   1898		MLX5_FS_DEFAULT_FLOW_TAG};
   1899	u32 *offset_attr, offset = 0, counter_id = 0;
   1900	int dest_id, dest_type = -1, inlen, len, ret, i;
   1901	struct mlx5_ib_flow_handler *flow_handler;
   1902	struct mlx5_ib_flow_matcher *fs_matcher;
   1903	struct ib_uobject **arr_flow_actions;
   1904	struct ib_uflow_resources *uflow_res;
   1905	struct mlx5_flow_act flow_act = {};
   1906	struct ib_qp *qp = NULL;
   1907	void *devx_obj, *cmd_in;
   1908	struct ib_uobject *uobj;
   1909	struct mlx5_ib_dev *dev;
   1910	u32 flags;
   1911
   1912	if (!capable(CAP_NET_RAW))
   1913		return -EPERM;
   1914
   1915	fs_matcher = uverbs_attr_get_obj(attrs,
   1916					 MLX5_IB_ATTR_CREATE_FLOW_MATCHER);
   1917	uobj =  uverbs_attr_get_uobject(attrs, MLX5_IB_ATTR_CREATE_FLOW_HANDLE);
   1918	dev = mlx5_udata_to_mdev(&attrs->driver_udata);
   1919
   1920	if (get_dests(attrs, fs_matcher, &dest_id, &dest_type, &qp, &flags))
   1921		return -EINVAL;
   1922
   1923	if (flags & MLX5_IB_ATTR_CREATE_FLOW_FLAGS_DEFAULT_MISS)
   1924		flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_NS;
   1925
   1926	if (flags & MLX5_IB_ATTR_CREATE_FLOW_FLAGS_DROP)
   1927		flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_DROP;
   1928
   1929	len = uverbs_attr_get_uobjs_arr(attrs,
   1930		MLX5_IB_ATTR_CREATE_FLOW_ARR_COUNTERS_DEVX, &arr_flow_actions);
   1931	if (len) {
   1932		devx_obj = arr_flow_actions[0]->object;
   1933
   1934		if (uverbs_attr_is_valid(attrs,
   1935					 MLX5_IB_ATTR_CREATE_FLOW_ARR_COUNTERS_DEVX_OFFSET)) {
   1936
   1937			int num_offsets = uverbs_attr_ptr_get_array_size(
   1938				attrs,
   1939				MLX5_IB_ATTR_CREATE_FLOW_ARR_COUNTERS_DEVX_OFFSET,
   1940				sizeof(u32));
   1941
   1942			if (num_offsets != 1)
   1943				return -EINVAL;
   1944
   1945			offset_attr = uverbs_attr_get_alloced_ptr(
   1946				attrs,
   1947				MLX5_IB_ATTR_CREATE_FLOW_ARR_COUNTERS_DEVX_OFFSET);
   1948			offset = *offset_attr;
   1949		}
   1950
   1951		if (!is_flow_counter(devx_obj, offset, &counter_id))
   1952			return -EINVAL;
   1953
   1954		flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_COUNT;
   1955	}
   1956
   1957	cmd_in = uverbs_attr_get_alloced_ptr(
   1958		attrs, MLX5_IB_ATTR_CREATE_FLOW_MATCH_VALUE);
   1959	inlen = uverbs_attr_get_len(attrs,
   1960				    MLX5_IB_ATTR_CREATE_FLOW_MATCH_VALUE);
   1961
   1962	uflow_res = flow_resources_alloc(MLX5_IB_CREATE_FLOW_MAX_FLOW_ACTIONS);
   1963	if (!uflow_res)
   1964		return -ENOMEM;
   1965
   1966	len = uverbs_attr_get_uobjs_arr(attrs,
   1967		MLX5_IB_ATTR_CREATE_FLOW_ARR_FLOW_ACTIONS, &arr_flow_actions);
   1968	for (i = 0; i < len; i++) {
   1969		struct mlx5_ib_flow_action *maction =
   1970			to_mflow_act(arr_flow_actions[i]->object);
   1971
   1972		ret = parse_flow_flow_action(maction, false, &flow_act);
   1973		if (ret)
   1974			goto err_out;
   1975		flow_resources_add(uflow_res, IB_FLOW_SPEC_ACTION_HANDLE,
   1976				   arr_flow_actions[i]->object);
   1977	}
   1978
   1979	ret = uverbs_copy_from(&flow_context.flow_tag, attrs,
   1980			       MLX5_IB_ATTR_CREATE_FLOW_TAG);
   1981	if (!ret) {
   1982		if (flow_context.flow_tag >= BIT(24)) {
   1983			ret = -EINVAL;
   1984			goto err_out;
   1985		}
   1986		flow_context.flags |= FLOW_CONTEXT_HAS_TAG;
   1987	}
   1988
   1989	flow_handler =
   1990		raw_fs_rule_add(dev, fs_matcher, &flow_context, &flow_act,
   1991				counter_id, cmd_in, inlen, dest_id, dest_type);
   1992	if (IS_ERR(flow_handler)) {
   1993		ret = PTR_ERR(flow_handler);
   1994		goto err_out;
   1995	}
   1996
   1997	ib_set_flow(uobj, &flow_handler->ibflow, qp, &dev->ib_dev, uflow_res);
   1998
   1999	return 0;
   2000err_out:
   2001	ib_uverbs_flow_resources_free(uflow_res);
   2002	return ret;
   2003}
   2004
   2005static int flow_matcher_cleanup(struct ib_uobject *uobject,
   2006				enum rdma_remove_reason why,
   2007				struct uverbs_attr_bundle *attrs)
   2008{
   2009	struct mlx5_ib_flow_matcher *obj = uobject->object;
   2010
   2011	if (atomic_read(&obj->usecnt))
   2012		return -EBUSY;
   2013
   2014	kfree(obj);
   2015	return 0;
   2016}
   2017
   2018static int mlx5_ib_matcher_ns(struct uverbs_attr_bundle *attrs,
   2019			      struct mlx5_ib_flow_matcher *obj)
   2020{
   2021	enum mlx5_ib_uapi_flow_table_type ft_type =
   2022		MLX5_IB_UAPI_FLOW_TABLE_TYPE_NIC_RX;
   2023	u32 flags;
   2024	int err;
   2025
   2026	/* New users should use MLX5_IB_ATTR_FLOW_MATCHER_FT_TYPE and older
   2027	 * users should switch to it. We leave this to not break userspace
   2028	 */
   2029	if (uverbs_attr_is_valid(attrs, MLX5_IB_ATTR_FLOW_MATCHER_FT_TYPE) &&
   2030	    uverbs_attr_is_valid(attrs, MLX5_IB_ATTR_FLOW_MATCHER_FLOW_FLAGS))
   2031		return -EINVAL;
   2032
   2033	if (uverbs_attr_is_valid(attrs, MLX5_IB_ATTR_FLOW_MATCHER_FT_TYPE)) {
   2034		err = uverbs_get_const(&ft_type, attrs,
   2035				       MLX5_IB_ATTR_FLOW_MATCHER_FT_TYPE);
   2036		if (err)
   2037			return err;
   2038
   2039		err = mlx5_ib_ft_type_to_namespace(ft_type, &obj->ns_type);
   2040		if (err)
   2041			return err;
   2042
   2043		return 0;
   2044	}
   2045
   2046	if (uverbs_attr_is_valid(attrs, MLX5_IB_ATTR_FLOW_MATCHER_FLOW_FLAGS)) {
   2047		err = uverbs_get_flags32(&flags, attrs,
   2048					 MLX5_IB_ATTR_FLOW_MATCHER_FLOW_FLAGS,
   2049					 IB_FLOW_ATTR_FLAGS_EGRESS);
   2050		if (err)
   2051			return err;
   2052
   2053		if (flags) {
   2054			mlx5_ib_ft_type_to_namespace(
   2055				MLX5_IB_UAPI_FLOW_TABLE_TYPE_NIC_TX,
   2056				&obj->ns_type);
   2057			return 0;
   2058		}
   2059	}
   2060
   2061	obj->ns_type = MLX5_FLOW_NAMESPACE_BYPASS;
   2062
   2063	return 0;
   2064}
   2065
   2066static int UVERBS_HANDLER(MLX5_IB_METHOD_FLOW_MATCHER_CREATE)(
   2067	struct uverbs_attr_bundle *attrs)
   2068{
   2069	struct ib_uobject *uobj = uverbs_attr_get_uobject(
   2070		attrs, MLX5_IB_ATTR_FLOW_MATCHER_CREATE_HANDLE);
   2071	struct mlx5_ib_dev *dev = mlx5_udata_to_mdev(&attrs->driver_udata);
   2072	struct mlx5_ib_flow_matcher *obj;
   2073	int err;
   2074
   2075	obj = kzalloc(sizeof(struct mlx5_ib_flow_matcher), GFP_KERNEL);
   2076	if (!obj)
   2077		return -ENOMEM;
   2078
   2079	obj->mask_len = uverbs_attr_get_len(
   2080		attrs, MLX5_IB_ATTR_FLOW_MATCHER_MATCH_MASK);
   2081	err = uverbs_copy_from(&obj->matcher_mask,
   2082			       attrs,
   2083			       MLX5_IB_ATTR_FLOW_MATCHER_MATCH_MASK);
   2084	if (err)
   2085		goto end;
   2086
   2087	obj->flow_type = uverbs_attr_get_enum_id(
   2088		attrs, MLX5_IB_ATTR_FLOW_MATCHER_FLOW_TYPE);
   2089
   2090	if (obj->flow_type == MLX5_IB_FLOW_TYPE_NORMAL) {
   2091		err = uverbs_copy_from(&obj->priority,
   2092				       attrs,
   2093				       MLX5_IB_ATTR_FLOW_MATCHER_FLOW_TYPE);
   2094		if (err)
   2095			goto end;
   2096	}
   2097
   2098	err = uverbs_copy_from(&obj->match_criteria_enable,
   2099			       attrs,
   2100			       MLX5_IB_ATTR_FLOW_MATCHER_MATCH_CRITERIA);
   2101	if (err)
   2102		goto end;
   2103
   2104	err = mlx5_ib_matcher_ns(attrs, obj);
   2105	if (err)
   2106		goto end;
   2107
   2108	if (obj->ns_type == MLX5_FLOW_NAMESPACE_FDB_BYPASS &&
   2109	    mlx5_eswitch_mode(dev->mdev) != MLX5_ESWITCH_OFFLOADS) {
   2110		err = -EINVAL;
   2111		goto end;
   2112	}
   2113
   2114	uobj->object = obj;
   2115	obj->mdev = dev->mdev;
   2116	atomic_set(&obj->usecnt, 0);
   2117	return 0;
   2118
   2119end:
   2120	kfree(obj);
   2121	return err;
   2122}
   2123
   2124static struct ib_flow_action *
   2125mlx5_ib_create_modify_header(struct mlx5_ib_dev *dev,
   2126			     enum mlx5_ib_uapi_flow_table_type ft_type,
   2127			     u8 num_actions, void *in)
   2128{
   2129	enum mlx5_flow_namespace_type namespace;
   2130	struct mlx5_ib_flow_action *maction;
   2131	int ret;
   2132
   2133	ret = mlx5_ib_ft_type_to_namespace(ft_type, &namespace);
   2134	if (ret)
   2135		return ERR_PTR(-EINVAL);
   2136
   2137	maction = kzalloc(sizeof(*maction), GFP_KERNEL);
   2138	if (!maction)
   2139		return ERR_PTR(-ENOMEM);
   2140
   2141	maction->flow_action_raw.modify_hdr =
   2142		mlx5_modify_header_alloc(dev->mdev, namespace, num_actions, in);
   2143
   2144	if (IS_ERR(maction->flow_action_raw.modify_hdr)) {
   2145		ret = PTR_ERR(maction->flow_action_raw.modify_hdr);
   2146		kfree(maction);
   2147		return ERR_PTR(ret);
   2148	}
   2149	maction->flow_action_raw.sub_type =
   2150		MLX5_IB_FLOW_ACTION_MODIFY_HEADER;
   2151	maction->flow_action_raw.dev = dev;
   2152
   2153	return &maction->ib_action;
   2154}
   2155
   2156static bool mlx5_ib_modify_header_supported(struct mlx5_ib_dev *dev)
   2157{
   2158	return MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev,
   2159					 max_modify_header_actions) ||
   2160	       MLX5_CAP_FLOWTABLE_NIC_TX(dev->mdev,
   2161					 max_modify_header_actions) ||
   2162	       MLX5_CAP_FLOWTABLE_RDMA_TX(dev->mdev,
   2163					 max_modify_header_actions);
   2164}
   2165
   2166static int UVERBS_HANDLER(MLX5_IB_METHOD_FLOW_ACTION_CREATE_MODIFY_HEADER)(
   2167	struct uverbs_attr_bundle *attrs)
   2168{
   2169	struct ib_uobject *uobj = uverbs_attr_get_uobject(
   2170		attrs, MLX5_IB_ATTR_CREATE_MODIFY_HEADER_HANDLE);
   2171	struct mlx5_ib_dev *mdev = mlx5_udata_to_mdev(&attrs->driver_udata);
   2172	enum mlx5_ib_uapi_flow_table_type ft_type;
   2173	struct ib_flow_action *action;
   2174	int num_actions;
   2175	void *in;
   2176	int ret;
   2177
   2178	if (!mlx5_ib_modify_header_supported(mdev))
   2179		return -EOPNOTSUPP;
   2180
   2181	in = uverbs_attr_get_alloced_ptr(attrs,
   2182		MLX5_IB_ATTR_CREATE_MODIFY_HEADER_ACTIONS_PRM);
   2183
   2184	num_actions = uverbs_attr_ptr_get_array_size(
   2185		attrs, MLX5_IB_ATTR_CREATE_MODIFY_HEADER_ACTIONS_PRM,
   2186		MLX5_UN_SZ_BYTES(set_add_copy_action_in_auto));
   2187	if (num_actions < 0)
   2188		return num_actions;
   2189
   2190	ret = uverbs_get_const(&ft_type, attrs,
   2191			       MLX5_IB_ATTR_CREATE_MODIFY_HEADER_FT_TYPE);
   2192	if (ret)
   2193		return ret;
   2194	action = mlx5_ib_create_modify_header(mdev, ft_type, num_actions, in);
   2195	if (IS_ERR(action))
   2196		return PTR_ERR(action);
   2197
   2198	uverbs_flow_action_fill_action(action, uobj, &mdev->ib_dev,
   2199				       IB_FLOW_ACTION_UNSPECIFIED);
   2200
   2201	return 0;
   2202}
   2203
   2204static bool mlx5_ib_flow_action_packet_reformat_valid(struct mlx5_ib_dev *ibdev,
   2205						      u8 packet_reformat_type,
   2206						      u8 ft_type)
   2207{
   2208	switch (packet_reformat_type) {
   2209	case MLX5_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L2_TO_L2_TUNNEL:
   2210		if (ft_type == MLX5_IB_UAPI_FLOW_TABLE_TYPE_NIC_TX)
   2211			return MLX5_CAP_FLOWTABLE(ibdev->mdev,
   2212						  encap_general_header);
   2213		break;
   2214	case MLX5_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L2_TO_L3_TUNNEL:
   2215		if (ft_type == MLX5_IB_UAPI_FLOW_TABLE_TYPE_NIC_TX)
   2216			return MLX5_CAP_FLOWTABLE_NIC_TX(ibdev->mdev,
   2217				reformat_l2_to_l3_tunnel);
   2218		break;
   2219	case MLX5_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L3_TUNNEL_TO_L2:
   2220		if (ft_type == MLX5_IB_UAPI_FLOW_TABLE_TYPE_NIC_RX)
   2221			return MLX5_CAP_FLOWTABLE_NIC_RX(ibdev->mdev,
   2222				reformat_l3_tunnel_to_l2);
   2223		break;
   2224	case MLX5_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L2_TUNNEL_TO_L2:
   2225		if (ft_type == MLX5_IB_UAPI_FLOW_TABLE_TYPE_NIC_RX)
   2226			return MLX5_CAP_FLOWTABLE_NIC_RX(ibdev->mdev, decap);
   2227		break;
   2228	default:
   2229		break;
   2230	}
   2231
   2232	return false;
   2233}
   2234
   2235static int mlx5_ib_dv_to_prm_packet_reforamt_type(u8 dv_prt, u8 *prm_prt)
   2236{
   2237	switch (dv_prt) {
   2238	case MLX5_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L2_TO_L2_TUNNEL:
   2239		*prm_prt = MLX5_REFORMAT_TYPE_L2_TO_L2_TUNNEL;
   2240		break;
   2241	case MLX5_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L3_TUNNEL_TO_L2:
   2242		*prm_prt = MLX5_REFORMAT_TYPE_L3_TUNNEL_TO_L2;
   2243		break;
   2244	case MLX5_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L2_TO_L3_TUNNEL:
   2245		*prm_prt = MLX5_REFORMAT_TYPE_L2_TO_L3_TUNNEL;
   2246		break;
   2247	default:
   2248		return -EINVAL;
   2249	}
   2250
   2251	return 0;
   2252}
   2253
   2254static int mlx5_ib_flow_action_create_packet_reformat_ctx(
   2255	struct mlx5_ib_dev *dev,
   2256	struct mlx5_ib_flow_action *maction,
   2257	u8 ft_type, u8 dv_prt,
   2258	void *in, size_t len)
   2259{
   2260	struct mlx5_pkt_reformat_params reformat_params;
   2261	enum mlx5_flow_namespace_type namespace;
   2262	u8 prm_prt;
   2263	int ret;
   2264
   2265	ret = mlx5_ib_ft_type_to_namespace(ft_type, &namespace);
   2266	if (ret)
   2267		return ret;
   2268
   2269	ret = mlx5_ib_dv_to_prm_packet_reforamt_type(dv_prt, &prm_prt);
   2270	if (ret)
   2271		return ret;
   2272
   2273	memset(&reformat_params, 0, sizeof(reformat_params));
   2274	reformat_params.type = prm_prt;
   2275	reformat_params.size = len;
   2276	reformat_params.data = in;
   2277	maction->flow_action_raw.pkt_reformat =
   2278		mlx5_packet_reformat_alloc(dev->mdev, &reformat_params,
   2279					   namespace);
   2280	if (IS_ERR(maction->flow_action_raw.pkt_reformat)) {
   2281		ret = PTR_ERR(maction->flow_action_raw.pkt_reformat);
   2282		return ret;
   2283	}
   2284
   2285	maction->flow_action_raw.sub_type =
   2286		MLX5_IB_FLOW_ACTION_PACKET_REFORMAT;
   2287	maction->flow_action_raw.dev = dev;
   2288
   2289	return 0;
   2290}
   2291
   2292static int UVERBS_HANDLER(MLX5_IB_METHOD_FLOW_ACTION_CREATE_PACKET_REFORMAT)(
   2293	struct uverbs_attr_bundle *attrs)
   2294{
   2295	struct ib_uobject *uobj = uverbs_attr_get_uobject(attrs,
   2296		MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_HANDLE);
   2297	struct mlx5_ib_dev *mdev = mlx5_udata_to_mdev(&attrs->driver_udata);
   2298	enum mlx5_ib_uapi_flow_action_packet_reformat_type dv_prt;
   2299	enum mlx5_ib_uapi_flow_table_type ft_type;
   2300	struct mlx5_ib_flow_action *maction;
   2301	int ret;
   2302
   2303	ret = uverbs_get_const(&ft_type, attrs,
   2304			       MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_FT_TYPE);
   2305	if (ret)
   2306		return ret;
   2307
   2308	ret = uverbs_get_const(&dv_prt, attrs,
   2309			       MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_TYPE);
   2310	if (ret)
   2311		return ret;
   2312
   2313	if (!mlx5_ib_flow_action_packet_reformat_valid(mdev, dv_prt, ft_type))
   2314		return -EOPNOTSUPP;
   2315
   2316	maction = kzalloc(sizeof(*maction), GFP_KERNEL);
   2317	if (!maction)
   2318		return -ENOMEM;
   2319
   2320	if (dv_prt ==
   2321	    MLX5_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L2_TUNNEL_TO_L2) {
   2322		maction->flow_action_raw.sub_type =
   2323			MLX5_IB_FLOW_ACTION_DECAP;
   2324		maction->flow_action_raw.dev = mdev;
   2325	} else {
   2326		void *in;
   2327		int len;
   2328
   2329		in = uverbs_attr_get_alloced_ptr(attrs,
   2330			MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_DATA_BUF);
   2331		if (IS_ERR(in)) {
   2332			ret = PTR_ERR(in);
   2333			goto free_maction;
   2334		}
   2335
   2336		len = uverbs_attr_get_len(attrs,
   2337			MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_DATA_BUF);
   2338
   2339		ret = mlx5_ib_flow_action_create_packet_reformat_ctx(mdev,
   2340			maction, ft_type, dv_prt, in, len);
   2341		if (ret)
   2342			goto free_maction;
   2343	}
   2344
   2345	uverbs_flow_action_fill_action(&maction->ib_action, uobj, &mdev->ib_dev,
   2346				       IB_FLOW_ACTION_UNSPECIFIED);
   2347	return 0;
   2348
   2349free_maction:
   2350	kfree(maction);
   2351	return ret;
   2352}
   2353
   2354DECLARE_UVERBS_NAMED_METHOD(
   2355	MLX5_IB_METHOD_CREATE_FLOW,
   2356	UVERBS_ATTR_IDR(MLX5_IB_ATTR_CREATE_FLOW_HANDLE,
   2357			UVERBS_OBJECT_FLOW,
   2358			UVERBS_ACCESS_NEW,
   2359			UA_MANDATORY),
   2360	UVERBS_ATTR_PTR_IN(
   2361		MLX5_IB_ATTR_CREATE_FLOW_MATCH_VALUE,
   2362		UVERBS_ATTR_SIZE(1, sizeof(struct mlx5_ib_match_params)),
   2363		UA_MANDATORY,
   2364		UA_ALLOC_AND_COPY),
   2365	UVERBS_ATTR_IDR(MLX5_IB_ATTR_CREATE_FLOW_MATCHER,
   2366			MLX5_IB_OBJECT_FLOW_MATCHER,
   2367			UVERBS_ACCESS_READ,
   2368			UA_MANDATORY),
   2369	UVERBS_ATTR_IDR(MLX5_IB_ATTR_CREATE_FLOW_DEST_QP,
   2370			UVERBS_OBJECT_QP,
   2371			UVERBS_ACCESS_READ),
   2372	UVERBS_ATTR_IDR(MLX5_IB_ATTR_CREATE_FLOW_DEST_DEVX,
   2373			MLX5_IB_OBJECT_DEVX_OBJ,
   2374			UVERBS_ACCESS_READ),
   2375	UVERBS_ATTR_IDRS_ARR(MLX5_IB_ATTR_CREATE_FLOW_ARR_FLOW_ACTIONS,
   2376			     UVERBS_OBJECT_FLOW_ACTION,
   2377			     UVERBS_ACCESS_READ, 1,
   2378			     MLX5_IB_CREATE_FLOW_MAX_FLOW_ACTIONS,
   2379			     UA_OPTIONAL),
   2380	UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_CREATE_FLOW_TAG,
   2381			   UVERBS_ATTR_TYPE(u32),
   2382			   UA_OPTIONAL),
   2383	UVERBS_ATTR_IDRS_ARR(MLX5_IB_ATTR_CREATE_FLOW_ARR_COUNTERS_DEVX,
   2384			     MLX5_IB_OBJECT_DEVX_OBJ,
   2385			     UVERBS_ACCESS_READ, 1, 1,
   2386			     UA_OPTIONAL),
   2387	UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_CREATE_FLOW_ARR_COUNTERS_DEVX_OFFSET,
   2388			   UVERBS_ATTR_MIN_SIZE(sizeof(u32)),
   2389			   UA_OPTIONAL,
   2390			   UA_ALLOC_AND_COPY),
   2391	UVERBS_ATTR_FLAGS_IN(MLX5_IB_ATTR_CREATE_FLOW_FLAGS,
   2392			     enum mlx5_ib_create_flow_flags,
   2393			     UA_OPTIONAL));
   2394
   2395DECLARE_UVERBS_NAMED_METHOD_DESTROY(
   2396	MLX5_IB_METHOD_DESTROY_FLOW,
   2397	UVERBS_ATTR_IDR(MLX5_IB_ATTR_CREATE_FLOW_HANDLE,
   2398			UVERBS_OBJECT_FLOW,
   2399			UVERBS_ACCESS_DESTROY,
   2400			UA_MANDATORY));
   2401
   2402ADD_UVERBS_METHODS(mlx5_ib_fs,
   2403		   UVERBS_OBJECT_FLOW,
   2404		   &UVERBS_METHOD(MLX5_IB_METHOD_CREATE_FLOW),
   2405		   &UVERBS_METHOD(MLX5_IB_METHOD_DESTROY_FLOW));
   2406
   2407DECLARE_UVERBS_NAMED_METHOD(
   2408	MLX5_IB_METHOD_FLOW_ACTION_CREATE_MODIFY_HEADER,
   2409	UVERBS_ATTR_IDR(MLX5_IB_ATTR_CREATE_MODIFY_HEADER_HANDLE,
   2410			UVERBS_OBJECT_FLOW_ACTION,
   2411			UVERBS_ACCESS_NEW,
   2412			UA_MANDATORY),
   2413	UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_CREATE_MODIFY_HEADER_ACTIONS_PRM,
   2414			   UVERBS_ATTR_MIN_SIZE(MLX5_UN_SZ_BYTES(
   2415				   set_add_copy_action_in_auto)),
   2416			   UA_MANDATORY,
   2417			   UA_ALLOC_AND_COPY),
   2418	UVERBS_ATTR_CONST_IN(MLX5_IB_ATTR_CREATE_MODIFY_HEADER_FT_TYPE,
   2419			     enum mlx5_ib_uapi_flow_table_type,
   2420			     UA_MANDATORY));
   2421
   2422DECLARE_UVERBS_NAMED_METHOD(
   2423	MLX5_IB_METHOD_FLOW_ACTION_CREATE_PACKET_REFORMAT,
   2424	UVERBS_ATTR_IDR(MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_HANDLE,
   2425			UVERBS_OBJECT_FLOW_ACTION,
   2426			UVERBS_ACCESS_NEW,
   2427			UA_MANDATORY),
   2428	UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_DATA_BUF,
   2429			   UVERBS_ATTR_MIN_SIZE(1),
   2430			   UA_ALLOC_AND_COPY,
   2431			   UA_OPTIONAL),
   2432	UVERBS_ATTR_CONST_IN(MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_TYPE,
   2433			     enum mlx5_ib_uapi_flow_action_packet_reformat_type,
   2434			     UA_MANDATORY),
   2435	UVERBS_ATTR_CONST_IN(MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_FT_TYPE,
   2436			     enum mlx5_ib_uapi_flow_table_type,
   2437			     UA_MANDATORY));
   2438
   2439ADD_UVERBS_METHODS(
   2440	mlx5_ib_flow_actions,
   2441	UVERBS_OBJECT_FLOW_ACTION,
   2442	&UVERBS_METHOD(MLX5_IB_METHOD_FLOW_ACTION_CREATE_MODIFY_HEADER),
   2443	&UVERBS_METHOD(MLX5_IB_METHOD_FLOW_ACTION_CREATE_PACKET_REFORMAT));
   2444
   2445DECLARE_UVERBS_NAMED_METHOD(
   2446	MLX5_IB_METHOD_FLOW_MATCHER_CREATE,
   2447	UVERBS_ATTR_IDR(MLX5_IB_ATTR_FLOW_MATCHER_CREATE_HANDLE,
   2448			MLX5_IB_OBJECT_FLOW_MATCHER,
   2449			UVERBS_ACCESS_NEW,
   2450			UA_MANDATORY),
   2451	UVERBS_ATTR_PTR_IN(
   2452		MLX5_IB_ATTR_FLOW_MATCHER_MATCH_MASK,
   2453		UVERBS_ATTR_SIZE(1, sizeof(struct mlx5_ib_match_params)),
   2454		UA_MANDATORY),
   2455	UVERBS_ATTR_ENUM_IN(MLX5_IB_ATTR_FLOW_MATCHER_FLOW_TYPE,
   2456			    mlx5_ib_flow_type,
   2457			    UA_MANDATORY),
   2458	UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_FLOW_MATCHER_MATCH_CRITERIA,
   2459			   UVERBS_ATTR_TYPE(u8),
   2460			   UA_MANDATORY),
   2461	UVERBS_ATTR_FLAGS_IN(MLX5_IB_ATTR_FLOW_MATCHER_FLOW_FLAGS,
   2462			     enum ib_flow_flags,
   2463			     UA_OPTIONAL),
   2464	UVERBS_ATTR_CONST_IN(MLX5_IB_ATTR_FLOW_MATCHER_FT_TYPE,
   2465			     enum mlx5_ib_uapi_flow_table_type,
   2466			     UA_OPTIONAL));
   2467
   2468DECLARE_UVERBS_NAMED_METHOD_DESTROY(
   2469	MLX5_IB_METHOD_FLOW_MATCHER_DESTROY,
   2470	UVERBS_ATTR_IDR(MLX5_IB_ATTR_FLOW_MATCHER_DESTROY_HANDLE,
   2471			MLX5_IB_OBJECT_FLOW_MATCHER,
   2472			UVERBS_ACCESS_DESTROY,
   2473			UA_MANDATORY));
   2474
   2475DECLARE_UVERBS_NAMED_OBJECT(MLX5_IB_OBJECT_FLOW_MATCHER,
   2476			    UVERBS_TYPE_ALLOC_IDR(flow_matcher_cleanup),
   2477			    &UVERBS_METHOD(MLX5_IB_METHOD_FLOW_MATCHER_CREATE),
   2478			    &UVERBS_METHOD(MLX5_IB_METHOD_FLOW_MATCHER_DESTROY));
   2479
   2480const struct uapi_definition mlx5_ib_flow_defs[] = {
   2481	UAPI_DEF_CHAIN_OBJ_TREE_NAMED(
   2482		MLX5_IB_OBJECT_FLOW_MATCHER),
   2483	UAPI_DEF_CHAIN_OBJ_TREE(
   2484		UVERBS_OBJECT_FLOW,
   2485		&mlx5_ib_fs),
   2486	UAPI_DEF_CHAIN_OBJ_TREE(UVERBS_OBJECT_FLOW_ACTION,
   2487				&mlx5_ib_flow_actions),
   2488	{},
   2489};
   2490
   2491static const struct ib_device_ops flow_ops = {
   2492	.create_flow = mlx5_ib_create_flow,
   2493	.destroy_flow = mlx5_ib_destroy_flow,
   2494	.destroy_flow_action = mlx5_ib_destroy_flow_action,
   2495};
   2496
   2497int mlx5_ib_fs_init(struct mlx5_ib_dev *dev)
   2498{
   2499	dev->flow_db = kzalloc(sizeof(*dev->flow_db), GFP_KERNEL);
   2500
   2501	if (!dev->flow_db)
   2502		return -ENOMEM;
   2503
   2504	mutex_init(&dev->flow_db->lock);
   2505
   2506	ib_set_device_ops(&dev->ib_dev, &flow_ops);
   2507	return 0;
   2508}