cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

bnxt_tc.c (60081B)


      1/* Broadcom NetXtreme-C/E network driver.
      2 *
      3 * Copyright (c) 2017 Broadcom Limited
      4 *
      5 * This program is free software; you can redistribute it and/or modify
      6 * it under the terms of the GNU General Public License as published by
      7 * the Free Software Foundation.
      8 */
      9
     10#include <linux/netdevice.h>
     11#include <linux/inetdevice.h>
     12#include <linux/if_vlan.h>
     13#include <net/flow_dissector.h>
     14#include <net/pkt_cls.h>
     15#include <net/tc_act/tc_gact.h>
     16#include <net/tc_act/tc_skbedit.h>
     17#include <net/tc_act/tc_mirred.h>
     18#include <net/tc_act/tc_vlan.h>
     19#include <net/tc_act/tc_pedit.h>
     20#include <net/tc_act/tc_tunnel_key.h>
     21#include <net/vxlan.h>
     22
     23#include "bnxt_hsi.h"
     24#include "bnxt.h"
     25#include "bnxt_hwrm.h"
     26#include "bnxt_sriov.h"
     27#include "bnxt_tc.h"
     28#include "bnxt_vfr.h"
     29
     30#define BNXT_FID_INVALID			0xffff
     31#define VLAN_TCI(vid, prio)	((vid) | ((prio) << VLAN_PRIO_SHIFT))
     32
     33#define is_vlan_pcp_wildcarded(vlan_tci_mask)	\
     34	((ntohs(vlan_tci_mask) & VLAN_PRIO_MASK) == 0x0000)
     35#define is_vlan_pcp_exactmatch(vlan_tci_mask)	\
     36	((ntohs(vlan_tci_mask) & VLAN_PRIO_MASK) == VLAN_PRIO_MASK)
     37#define is_vlan_pcp_zero(vlan_tci)	\
     38	((ntohs(vlan_tci) & VLAN_PRIO_MASK) == 0x0000)
     39#define is_vid_exactmatch(vlan_tci_mask)	\
     40	((ntohs(vlan_tci_mask) & VLAN_VID_MASK) == VLAN_VID_MASK)
     41
     42static bool is_wildcard(void *mask, int len);
     43static bool is_exactmatch(void *mask, int len);
     44/* Return the dst fid of the func for flow forwarding
     45 * For PFs: src_fid is the fid of the PF
     46 * For VF-reps: src_fid the fid of the VF
     47 */
     48static u16 bnxt_flow_get_dst_fid(struct bnxt *pf_bp, struct net_device *dev)
     49{
     50	struct bnxt *bp;
     51
     52	/* check if dev belongs to the same switch */
     53	if (!netdev_port_same_parent_id(pf_bp->dev, dev)) {
     54		netdev_info(pf_bp->dev, "dev(ifindex=%d) not on same switch\n",
     55			    dev->ifindex);
     56		return BNXT_FID_INVALID;
     57	}
     58
     59	/* Is dev a VF-rep? */
     60	if (bnxt_dev_is_vf_rep(dev))
     61		return bnxt_vf_rep_get_fid(dev);
     62
     63	bp = netdev_priv(dev);
     64	return bp->pf.fw_fid;
     65}
     66
     67static int bnxt_tc_parse_redir(struct bnxt *bp,
     68			       struct bnxt_tc_actions *actions,
     69			       const struct flow_action_entry *act)
     70{
     71	struct net_device *dev = act->dev;
     72
     73	if (!dev) {
     74		netdev_info(bp->dev, "no dev in mirred action\n");
     75		return -EINVAL;
     76	}
     77
     78	actions->flags |= BNXT_TC_ACTION_FLAG_FWD;
     79	actions->dst_dev = dev;
     80	return 0;
     81}
     82
     83static int bnxt_tc_parse_vlan(struct bnxt *bp,
     84			      struct bnxt_tc_actions *actions,
     85			      const struct flow_action_entry *act)
     86{
     87	switch (act->id) {
     88	case FLOW_ACTION_VLAN_POP:
     89		actions->flags |= BNXT_TC_ACTION_FLAG_POP_VLAN;
     90		break;
     91	case FLOW_ACTION_VLAN_PUSH:
     92		actions->flags |= BNXT_TC_ACTION_FLAG_PUSH_VLAN;
     93		actions->push_vlan_tci = htons(act->vlan.vid);
     94		actions->push_vlan_tpid = act->vlan.proto;
     95		break;
     96	default:
     97		return -EOPNOTSUPP;
     98	}
     99	return 0;
    100}
    101
    102static int bnxt_tc_parse_tunnel_set(struct bnxt *bp,
    103				    struct bnxt_tc_actions *actions,
    104				    const struct flow_action_entry *act)
    105{
    106	const struct ip_tunnel_info *tun_info = act->tunnel;
    107	const struct ip_tunnel_key *tun_key = &tun_info->key;
    108
    109	if (ip_tunnel_info_af(tun_info) != AF_INET) {
    110		netdev_info(bp->dev, "only IPv4 tunnel-encap is supported\n");
    111		return -EOPNOTSUPP;
    112	}
    113
    114	actions->tun_encap_key = *tun_key;
    115	actions->flags |= BNXT_TC_ACTION_FLAG_TUNNEL_ENCAP;
    116	return 0;
    117}
    118
    119/* Key & Mask from the stack comes unaligned in multiple iterations of 4 bytes
    120 * each(u32).
    121 * This routine consolidates such multiple unaligned values into one
    122 * field each for Key & Mask (for src and dst macs separately)
    123 * For example,
    124 *			Mask/Key	Offset	Iteration
    125 *			==========	======	=========
    126 *	dst mac		0xffffffff	0	1
    127 *	dst mac		0x0000ffff	4	2
    128 *
    129 *	src mac		0xffff0000	4	1
    130 *	src mac		0xffffffff	8	2
    131 *
    132 * The above combination coming from the stack will be consolidated as
    133 *			Mask/Key
    134 *			==============
    135 *	src mac:	0xffffffffffff
    136 *	dst mac:	0xffffffffffff
    137 */
    138static void bnxt_set_l2_key_mask(u32 part_key, u32 part_mask,
    139				 u8 *actual_key, u8 *actual_mask)
    140{
    141	u32 key = get_unaligned((u32 *)actual_key);
    142	u32 mask = get_unaligned((u32 *)actual_mask);
    143
    144	part_key &= part_mask;
    145	part_key |= key & ~part_mask;
    146
    147	put_unaligned(mask | part_mask, (u32 *)actual_mask);
    148	put_unaligned(part_key, (u32 *)actual_key);
    149}
    150
    151static int
    152bnxt_fill_l2_rewrite_fields(struct bnxt_tc_actions *actions,
    153			    u16 *eth_addr, u16 *eth_addr_mask)
    154{
    155	u16 *p;
    156	int j;
    157
    158	if (unlikely(bnxt_eth_addr_key_mask_invalid(eth_addr, eth_addr_mask)))
    159		return -EINVAL;
    160
    161	if (!is_wildcard(&eth_addr_mask[0], ETH_ALEN)) {
    162		if (!is_exactmatch(&eth_addr_mask[0], ETH_ALEN))
    163			return -EINVAL;
    164		/* FW expects dmac to be in u16 array format */
    165		p = eth_addr;
    166		for (j = 0; j < 3; j++)
    167			actions->l2_rewrite_dmac[j] = cpu_to_be16(*(p + j));
    168	}
    169
    170	if (!is_wildcard(&eth_addr_mask[ETH_ALEN / 2], ETH_ALEN)) {
    171		if (!is_exactmatch(&eth_addr_mask[ETH_ALEN / 2], ETH_ALEN))
    172			return -EINVAL;
    173		/* FW expects smac to be in u16 array format */
    174		p = &eth_addr[ETH_ALEN / 2];
    175		for (j = 0; j < 3; j++)
    176			actions->l2_rewrite_smac[j] = cpu_to_be16(*(p + j));
    177	}
    178
    179	return 0;
    180}
    181
    182static int
    183bnxt_tc_parse_pedit(struct bnxt *bp, struct bnxt_tc_actions *actions,
    184		    struct flow_action_entry *act, int act_idx, u8 *eth_addr,
    185		    u8 *eth_addr_mask)
    186{
    187	size_t offset_of_ip6_daddr = offsetof(struct ipv6hdr, daddr);
    188	size_t offset_of_ip6_saddr = offsetof(struct ipv6hdr, saddr);
    189	u32 mask, val, offset, idx;
    190	u8 htype;
    191
    192	offset = act->mangle.offset;
    193	htype = act->mangle.htype;
    194	mask = ~act->mangle.mask;
    195	val = act->mangle.val;
    196
    197	switch (htype) {
    198	case FLOW_ACT_MANGLE_HDR_TYPE_ETH:
    199		if (offset > PEDIT_OFFSET_SMAC_LAST_4_BYTES) {
    200			netdev_err(bp->dev,
    201				   "%s: eth_hdr: Invalid pedit field\n",
    202				   __func__);
    203			return -EINVAL;
    204		}
    205		actions->flags |= BNXT_TC_ACTION_FLAG_L2_REWRITE;
    206
    207		bnxt_set_l2_key_mask(val, mask, &eth_addr[offset],
    208				     &eth_addr_mask[offset]);
    209		break;
    210	case FLOW_ACT_MANGLE_HDR_TYPE_IP4:
    211		actions->flags |= BNXT_TC_ACTION_FLAG_NAT_XLATE;
    212		actions->nat.l3_is_ipv4 = true;
    213		if (offset ==  offsetof(struct iphdr, saddr)) {
    214			actions->nat.src_xlate = true;
    215			actions->nat.l3.ipv4.saddr.s_addr = htonl(val);
    216		} else if (offset ==  offsetof(struct iphdr, daddr)) {
    217			actions->nat.src_xlate = false;
    218			actions->nat.l3.ipv4.daddr.s_addr = htonl(val);
    219		} else {
    220			netdev_err(bp->dev,
    221				   "%s: IPv4_hdr: Invalid pedit field\n",
    222				   __func__);
    223			return -EINVAL;
    224		}
    225
    226		netdev_dbg(bp->dev, "nat.src_xlate = %d src IP: %pI4 dst ip : %pI4\n",
    227			   actions->nat.src_xlate, &actions->nat.l3.ipv4.saddr,
    228			   &actions->nat.l3.ipv4.daddr);
    229		break;
    230
    231	case FLOW_ACT_MANGLE_HDR_TYPE_IP6:
    232		actions->flags |= BNXT_TC_ACTION_FLAG_NAT_XLATE;
    233		actions->nat.l3_is_ipv4 = false;
    234		if (offset >= offsetof(struct ipv6hdr, saddr) &&
    235		    offset < offset_of_ip6_daddr) {
    236			/* 16 byte IPv6 address comes in 4 iterations of
    237			 * 4byte chunks each
    238			 */
    239			actions->nat.src_xlate = true;
    240			idx = (offset - offset_of_ip6_saddr) / 4;
    241			/* First 4bytes will be copied to idx 0 and so on */
    242			actions->nat.l3.ipv6.saddr.s6_addr32[idx] = htonl(val);
    243		} else if (offset >= offset_of_ip6_daddr &&
    244			   offset < offset_of_ip6_daddr + 16) {
    245			actions->nat.src_xlate = false;
    246			idx = (offset - offset_of_ip6_daddr) / 4;
    247			actions->nat.l3.ipv6.saddr.s6_addr32[idx] = htonl(val);
    248		} else {
    249			netdev_err(bp->dev,
    250				   "%s: IPv6_hdr: Invalid pedit field\n",
    251				   __func__);
    252			return -EINVAL;
    253		}
    254		break;
    255	case FLOW_ACT_MANGLE_HDR_TYPE_TCP:
    256	case FLOW_ACT_MANGLE_HDR_TYPE_UDP:
    257		/* HW does not support L4 rewrite alone without L3
    258		 * rewrite
    259		 */
    260		if (!(actions->flags & BNXT_TC_ACTION_FLAG_NAT_XLATE)) {
    261			netdev_err(bp->dev,
    262				   "Need to specify L3 rewrite as well\n");
    263			return -EINVAL;
    264		}
    265		if (actions->nat.src_xlate)
    266			actions->nat.l4.ports.sport = htons(val);
    267		else
    268			actions->nat.l4.ports.dport = htons(val);
    269		netdev_dbg(bp->dev, "actions->nat.sport = %d dport = %d\n",
    270			   actions->nat.l4.ports.sport,
    271			   actions->nat.l4.ports.dport);
    272		break;
    273	default:
    274		netdev_err(bp->dev, "%s: Unsupported pedit hdr type\n",
    275			   __func__);
    276		return -EINVAL;
    277	}
    278	return 0;
    279}
    280
    281static int bnxt_tc_parse_actions(struct bnxt *bp,
    282				 struct bnxt_tc_actions *actions,
    283				 struct flow_action *flow_action,
    284				 struct netlink_ext_ack *extack)
    285{
    286	/* Used to store the L2 rewrite mask for dmac (6 bytes) followed by
    287	 * smac (6 bytes) if rewrite of both is specified, otherwise either
    288	 * dmac or smac
    289	 */
    290	u16 eth_addr_mask[ETH_ALEN] = { 0 };
    291	/* Used to store the L2 rewrite key for dmac (6 bytes) followed by
    292	 * smac (6 bytes) if rewrite of both is specified, otherwise either
    293	 * dmac or smac
    294	 */
    295	u16 eth_addr[ETH_ALEN] = { 0 };
    296	struct flow_action_entry *act;
    297	int i, rc;
    298
    299	if (!flow_action_has_entries(flow_action)) {
    300		netdev_info(bp->dev, "no actions\n");
    301		return -EINVAL;
    302	}
    303
    304	if (!flow_action_basic_hw_stats_check(flow_action, extack))
    305		return -EOPNOTSUPP;
    306
    307	flow_action_for_each(i, act, flow_action) {
    308		switch (act->id) {
    309		case FLOW_ACTION_DROP:
    310			actions->flags |= BNXT_TC_ACTION_FLAG_DROP;
    311			return 0; /* don't bother with other actions */
    312		case FLOW_ACTION_REDIRECT:
    313			rc = bnxt_tc_parse_redir(bp, actions, act);
    314			if (rc)
    315				return rc;
    316			break;
    317		case FLOW_ACTION_VLAN_POP:
    318		case FLOW_ACTION_VLAN_PUSH:
    319		case FLOW_ACTION_VLAN_MANGLE:
    320			rc = bnxt_tc_parse_vlan(bp, actions, act);
    321			if (rc)
    322				return rc;
    323			break;
    324		case FLOW_ACTION_TUNNEL_ENCAP:
    325			rc = bnxt_tc_parse_tunnel_set(bp, actions, act);
    326			if (rc)
    327				return rc;
    328			break;
    329		case FLOW_ACTION_TUNNEL_DECAP:
    330			actions->flags |= BNXT_TC_ACTION_FLAG_TUNNEL_DECAP;
    331			break;
    332		/* Packet edit: L2 rewrite, NAT, NAPT */
    333		case FLOW_ACTION_MANGLE:
    334			rc = bnxt_tc_parse_pedit(bp, actions, act, i,
    335						 (u8 *)eth_addr,
    336						 (u8 *)eth_addr_mask);
    337			if (rc)
    338				return rc;
    339			break;
    340		default:
    341			break;
    342		}
    343	}
    344
    345	if (actions->flags & BNXT_TC_ACTION_FLAG_L2_REWRITE) {
    346		rc = bnxt_fill_l2_rewrite_fields(actions, eth_addr,
    347						 eth_addr_mask);
    348		if (rc)
    349			return rc;
    350	}
    351
    352	if (actions->flags & BNXT_TC_ACTION_FLAG_FWD) {
    353		if (actions->flags & BNXT_TC_ACTION_FLAG_TUNNEL_ENCAP) {
    354			/* dst_fid is PF's fid */
    355			actions->dst_fid = bp->pf.fw_fid;
    356		} else {
    357			/* find the FID from dst_dev */
    358			actions->dst_fid =
    359				bnxt_flow_get_dst_fid(bp, actions->dst_dev);
    360			if (actions->dst_fid == BNXT_FID_INVALID)
    361				return -EINVAL;
    362		}
    363	}
    364
    365	return 0;
    366}
    367
    368static int bnxt_tc_parse_flow(struct bnxt *bp,
    369			      struct flow_cls_offload *tc_flow_cmd,
    370			      struct bnxt_tc_flow *flow)
    371{
    372	struct flow_rule *rule = flow_cls_offload_flow_rule(tc_flow_cmd);
    373	struct flow_dissector *dissector = rule->match.dissector;
    374
    375	/* KEY_CONTROL and KEY_BASIC are needed for forming a meaningful key */
    376	if ((dissector->used_keys & BIT(FLOW_DISSECTOR_KEY_CONTROL)) == 0 ||
    377	    (dissector->used_keys & BIT(FLOW_DISSECTOR_KEY_BASIC)) == 0) {
    378		netdev_info(bp->dev, "cannot form TC key: used_keys = 0x%x\n",
    379			    dissector->used_keys);
    380		return -EOPNOTSUPP;
    381	}
    382
    383	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_BASIC)) {
    384		struct flow_match_basic match;
    385
    386		flow_rule_match_basic(rule, &match);
    387		flow->l2_key.ether_type = match.key->n_proto;
    388		flow->l2_mask.ether_type = match.mask->n_proto;
    389
    390		if (match.key->n_proto == htons(ETH_P_IP) ||
    391		    match.key->n_proto == htons(ETH_P_IPV6)) {
    392			flow->l4_key.ip_proto = match.key->ip_proto;
    393			flow->l4_mask.ip_proto = match.mask->ip_proto;
    394		}
    395	}
    396
    397	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ETH_ADDRS)) {
    398		struct flow_match_eth_addrs match;
    399
    400		flow_rule_match_eth_addrs(rule, &match);
    401		flow->flags |= BNXT_TC_FLOW_FLAGS_ETH_ADDRS;
    402		ether_addr_copy(flow->l2_key.dmac, match.key->dst);
    403		ether_addr_copy(flow->l2_mask.dmac, match.mask->dst);
    404		ether_addr_copy(flow->l2_key.smac, match.key->src);
    405		ether_addr_copy(flow->l2_mask.smac, match.mask->src);
    406	}
    407
    408	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_VLAN)) {
    409		struct flow_match_vlan match;
    410
    411		flow_rule_match_vlan(rule, &match);
    412		flow->l2_key.inner_vlan_tci =
    413			cpu_to_be16(VLAN_TCI(match.key->vlan_id,
    414					     match.key->vlan_priority));
    415		flow->l2_mask.inner_vlan_tci =
    416			cpu_to_be16((VLAN_TCI(match.mask->vlan_id,
    417					      match.mask->vlan_priority)));
    418		flow->l2_key.inner_vlan_tpid = htons(ETH_P_8021Q);
    419		flow->l2_mask.inner_vlan_tpid = htons(0xffff);
    420		flow->l2_key.num_vlans = 1;
    421	}
    422
    423	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_IPV4_ADDRS)) {
    424		struct flow_match_ipv4_addrs match;
    425
    426		flow_rule_match_ipv4_addrs(rule, &match);
    427		flow->flags |= BNXT_TC_FLOW_FLAGS_IPV4_ADDRS;
    428		flow->l3_key.ipv4.daddr.s_addr = match.key->dst;
    429		flow->l3_mask.ipv4.daddr.s_addr = match.mask->dst;
    430		flow->l3_key.ipv4.saddr.s_addr = match.key->src;
    431		flow->l3_mask.ipv4.saddr.s_addr = match.mask->src;
    432	} else if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_IPV6_ADDRS)) {
    433		struct flow_match_ipv6_addrs match;
    434
    435		flow_rule_match_ipv6_addrs(rule, &match);
    436		flow->flags |= BNXT_TC_FLOW_FLAGS_IPV6_ADDRS;
    437		flow->l3_key.ipv6.daddr = match.key->dst;
    438		flow->l3_mask.ipv6.daddr = match.mask->dst;
    439		flow->l3_key.ipv6.saddr = match.key->src;
    440		flow->l3_mask.ipv6.saddr = match.mask->src;
    441	}
    442
    443	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_PORTS)) {
    444		struct flow_match_ports match;
    445
    446		flow_rule_match_ports(rule, &match);
    447		flow->flags |= BNXT_TC_FLOW_FLAGS_PORTS;
    448		flow->l4_key.ports.dport = match.key->dst;
    449		flow->l4_mask.ports.dport = match.mask->dst;
    450		flow->l4_key.ports.sport = match.key->src;
    451		flow->l4_mask.ports.sport = match.mask->src;
    452	}
    453
    454	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ICMP)) {
    455		struct flow_match_icmp match;
    456
    457		flow_rule_match_icmp(rule, &match);
    458		flow->flags |= BNXT_TC_FLOW_FLAGS_ICMP;
    459		flow->l4_key.icmp.type = match.key->type;
    460		flow->l4_key.icmp.code = match.key->code;
    461		flow->l4_mask.icmp.type = match.mask->type;
    462		flow->l4_mask.icmp.code = match.mask->code;
    463	}
    464
    465	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS)) {
    466		struct flow_match_ipv4_addrs match;
    467
    468		flow_rule_match_enc_ipv4_addrs(rule, &match);
    469		flow->flags |= BNXT_TC_FLOW_FLAGS_TUNL_IPV4_ADDRS;
    470		flow->tun_key.u.ipv4.dst = match.key->dst;
    471		flow->tun_mask.u.ipv4.dst = match.mask->dst;
    472		flow->tun_key.u.ipv4.src = match.key->src;
    473		flow->tun_mask.u.ipv4.src = match.mask->src;
    474	} else if (flow_rule_match_key(rule,
    475				      FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS)) {
    476		return -EOPNOTSUPP;
    477	}
    478
    479	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_KEYID)) {
    480		struct flow_match_enc_keyid match;
    481
    482		flow_rule_match_enc_keyid(rule, &match);
    483		flow->flags |= BNXT_TC_FLOW_FLAGS_TUNL_ID;
    484		flow->tun_key.tun_id = key32_to_tunnel_id(match.key->keyid);
    485		flow->tun_mask.tun_id = key32_to_tunnel_id(match.mask->keyid);
    486	}
    487
    488	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_PORTS)) {
    489		struct flow_match_ports match;
    490
    491		flow_rule_match_enc_ports(rule, &match);
    492		flow->flags |= BNXT_TC_FLOW_FLAGS_TUNL_PORTS;
    493		flow->tun_key.tp_dst = match.key->dst;
    494		flow->tun_mask.tp_dst = match.mask->dst;
    495		flow->tun_key.tp_src = match.key->src;
    496		flow->tun_mask.tp_src = match.mask->src;
    497	}
    498
    499	return bnxt_tc_parse_actions(bp, &flow->actions, &rule->action,
    500				     tc_flow_cmd->common.extack);
    501}
    502
    503static int bnxt_hwrm_cfa_flow_free(struct bnxt *bp,
    504				   struct bnxt_tc_flow_node *flow_node)
    505{
    506	struct hwrm_cfa_flow_free_input *req;
    507	int rc;
    508
    509	rc = hwrm_req_init(bp, req, HWRM_CFA_FLOW_FREE);
    510	if (!rc) {
    511		if (bp->fw_cap & BNXT_FW_CAP_OVS_64BIT_HANDLE)
    512			req->ext_flow_handle = flow_node->ext_flow_handle;
    513		else
    514			req->flow_handle = flow_node->flow_handle;
    515
    516		rc = hwrm_req_send(bp, req);
    517	}
    518	if (rc)
    519		netdev_info(bp->dev, "%s: Error rc=%d\n", __func__, rc);
    520
    521	return rc;
    522}
    523
    524static int ipv6_mask_len(struct in6_addr *mask)
    525{
    526	int mask_len = 0, i;
    527
    528	for (i = 0; i < 4; i++)
    529		mask_len += inet_mask_len(mask->s6_addr32[i]);
    530
    531	return mask_len;
    532}
    533
    534static bool is_wildcard(void *mask, int len)
    535{
    536	const u8 *p = mask;
    537	int i;
    538
    539	for (i = 0; i < len; i++) {
    540		if (p[i] != 0)
    541			return false;
    542	}
    543	return true;
    544}
    545
    546static bool is_exactmatch(void *mask, int len)
    547{
    548	const u8 *p = mask;
    549	int i;
    550
    551	for (i = 0; i < len; i++)
    552		if (p[i] != 0xff)
    553			return false;
    554
    555	return true;
    556}
    557
    558static bool is_vlan_tci_allowed(__be16  vlan_tci_mask,
    559				__be16  vlan_tci)
    560{
    561	/* VLAN priority must be either exactly zero or fully wildcarded and
    562	 * VLAN id must be exact match.
    563	 */
    564	if (is_vid_exactmatch(vlan_tci_mask) &&
    565	    ((is_vlan_pcp_exactmatch(vlan_tci_mask) &&
    566	      is_vlan_pcp_zero(vlan_tci)) ||
    567	     is_vlan_pcp_wildcarded(vlan_tci_mask)))
    568		return true;
    569
    570	return false;
    571}
    572
    573static bool bits_set(void *key, int len)
    574{
    575	const u8 *p = key;
    576	int i;
    577
    578	for (i = 0; i < len; i++)
    579		if (p[i] != 0)
    580			return true;
    581
    582	return false;
    583}
    584
    585static int bnxt_hwrm_cfa_flow_alloc(struct bnxt *bp, struct bnxt_tc_flow *flow,
    586				    __le16 ref_flow_handle,
    587				    __le32 tunnel_handle,
    588				    struct bnxt_tc_flow_node *flow_node)
    589{
    590	struct bnxt_tc_actions *actions = &flow->actions;
    591	struct bnxt_tc_l3_key *l3_mask = &flow->l3_mask;
    592	struct bnxt_tc_l3_key *l3_key = &flow->l3_key;
    593	struct hwrm_cfa_flow_alloc_output *resp;
    594	struct hwrm_cfa_flow_alloc_input *req;
    595	u16 flow_flags = 0, action_flags = 0;
    596	int rc;
    597
    598	rc = hwrm_req_init(bp, req, HWRM_CFA_FLOW_ALLOC);
    599	if (rc)
    600		return rc;
    601
    602	req->src_fid = cpu_to_le16(flow->src_fid);
    603	req->ref_flow_handle = ref_flow_handle;
    604
    605	if (actions->flags & BNXT_TC_ACTION_FLAG_L2_REWRITE) {
    606		memcpy(req->l2_rewrite_dmac, actions->l2_rewrite_dmac,
    607		       ETH_ALEN);
    608		memcpy(req->l2_rewrite_smac, actions->l2_rewrite_smac,
    609		       ETH_ALEN);
    610		action_flags |=
    611			CFA_FLOW_ALLOC_REQ_ACTION_FLAGS_L2_HEADER_REWRITE;
    612	}
    613
    614	if (actions->flags & BNXT_TC_ACTION_FLAG_NAT_XLATE) {
    615		if (actions->nat.l3_is_ipv4) {
    616			action_flags |=
    617				CFA_FLOW_ALLOC_REQ_ACTION_FLAGS_NAT_IPV4_ADDRESS;
    618
    619			if (actions->nat.src_xlate) {
    620				action_flags |=
    621					CFA_FLOW_ALLOC_REQ_ACTION_FLAGS_NAT_SRC;
    622				/* L3 source rewrite */
    623				req->nat_ip_address[0] =
    624					actions->nat.l3.ipv4.saddr.s_addr;
    625				/* L4 source port */
    626				if (actions->nat.l4.ports.sport)
    627					req->nat_port =
    628						actions->nat.l4.ports.sport;
    629			} else {
    630				action_flags |=
    631					CFA_FLOW_ALLOC_REQ_ACTION_FLAGS_NAT_DEST;
    632				/* L3 destination rewrite */
    633				req->nat_ip_address[0] =
    634					actions->nat.l3.ipv4.daddr.s_addr;
    635				/* L4 destination port */
    636				if (actions->nat.l4.ports.dport)
    637					req->nat_port =
    638						actions->nat.l4.ports.dport;
    639			}
    640			netdev_dbg(bp->dev,
    641				   "req->nat_ip_address: %pI4 src_xlate: %d req->nat_port: %x\n",
    642				   req->nat_ip_address, actions->nat.src_xlate,
    643				   req->nat_port);
    644		} else {
    645			if (actions->nat.src_xlate) {
    646				action_flags |=
    647					CFA_FLOW_ALLOC_REQ_ACTION_FLAGS_NAT_SRC;
    648				/* L3 source rewrite */
    649				memcpy(req->nat_ip_address,
    650				       actions->nat.l3.ipv6.saddr.s6_addr32,
    651				       sizeof(req->nat_ip_address));
    652				/* L4 source port */
    653				if (actions->nat.l4.ports.sport)
    654					req->nat_port =
    655						actions->nat.l4.ports.sport;
    656			} else {
    657				action_flags |=
    658					CFA_FLOW_ALLOC_REQ_ACTION_FLAGS_NAT_DEST;
    659				/* L3 destination rewrite */
    660				memcpy(req->nat_ip_address,
    661				       actions->nat.l3.ipv6.daddr.s6_addr32,
    662				       sizeof(req->nat_ip_address));
    663				/* L4 destination port */
    664				if (actions->nat.l4.ports.dport)
    665					req->nat_port =
    666						actions->nat.l4.ports.dport;
    667			}
    668			netdev_dbg(bp->dev,
    669				   "req->nat_ip_address: %pI6 src_xlate: %d req->nat_port: %x\n",
    670				   req->nat_ip_address, actions->nat.src_xlate,
    671				   req->nat_port);
    672		}
    673	}
    674
    675	if (actions->flags & BNXT_TC_ACTION_FLAG_TUNNEL_DECAP ||
    676	    actions->flags & BNXT_TC_ACTION_FLAG_TUNNEL_ENCAP) {
    677		req->tunnel_handle = tunnel_handle;
    678		flow_flags |= CFA_FLOW_ALLOC_REQ_FLAGS_TUNNEL;
    679		action_flags |= CFA_FLOW_ALLOC_REQ_ACTION_FLAGS_TUNNEL;
    680	}
    681
    682	req->ethertype = flow->l2_key.ether_type;
    683	req->ip_proto = flow->l4_key.ip_proto;
    684
    685	if (flow->flags & BNXT_TC_FLOW_FLAGS_ETH_ADDRS) {
    686		memcpy(req->dmac, flow->l2_key.dmac, ETH_ALEN);
    687		memcpy(req->smac, flow->l2_key.smac, ETH_ALEN);
    688	}
    689
    690	if (flow->l2_key.num_vlans > 0) {
    691		flow_flags |= CFA_FLOW_ALLOC_REQ_FLAGS_NUM_VLAN_ONE;
    692		/* FW expects the inner_vlan_tci value to be set
    693		 * in outer_vlan_tci when num_vlans is 1 (which is
    694		 * always the case in TC.)
    695		 */
    696		req->outer_vlan_tci = flow->l2_key.inner_vlan_tci;
    697	}
    698
    699	/* If all IP and L4 fields are wildcarded then this is an L2 flow */
    700	if (is_wildcard(l3_mask, sizeof(*l3_mask)) &&
    701	    is_wildcard(&flow->l4_mask, sizeof(flow->l4_mask))) {
    702		flow_flags |= CFA_FLOW_ALLOC_REQ_FLAGS_FLOWTYPE_L2;
    703	} else {
    704		flow_flags |= flow->l2_key.ether_type == htons(ETH_P_IP) ?
    705				CFA_FLOW_ALLOC_REQ_FLAGS_FLOWTYPE_IPV4 :
    706				CFA_FLOW_ALLOC_REQ_FLAGS_FLOWTYPE_IPV6;
    707
    708		if (flow->flags & BNXT_TC_FLOW_FLAGS_IPV4_ADDRS) {
    709			req->ip_dst[0] = l3_key->ipv4.daddr.s_addr;
    710			req->ip_dst_mask_len =
    711				inet_mask_len(l3_mask->ipv4.daddr.s_addr);
    712			req->ip_src[0] = l3_key->ipv4.saddr.s_addr;
    713			req->ip_src_mask_len =
    714				inet_mask_len(l3_mask->ipv4.saddr.s_addr);
    715		} else if (flow->flags & BNXT_TC_FLOW_FLAGS_IPV6_ADDRS) {
    716			memcpy(req->ip_dst, l3_key->ipv6.daddr.s6_addr32,
    717			       sizeof(req->ip_dst));
    718			req->ip_dst_mask_len =
    719					ipv6_mask_len(&l3_mask->ipv6.daddr);
    720			memcpy(req->ip_src, l3_key->ipv6.saddr.s6_addr32,
    721			       sizeof(req->ip_src));
    722			req->ip_src_mask_len =
    723					ipv6_mask_len(&l3_mask->ipv6.saddr);
    724		}
    725	}
    726
    727	if (flow->flags & BNXT_TC_FLOW_FLAGS_PORTS) {
    728		req->l4_src_port = flow->l4_key.ports.sport;
    729		req->l4_src_port_mask = flow->l4_mask.ports.sport;
    730		req->l4_dst_port = flow->l4_key.ports.dport;
    731		req->l4_dst_port_mask = flow->l4_mask.ports.dport;
    732	} else if (flow->flags & BNXT_TC_FLOW_FLAGS_ICMP) {
    733		/* l4 ports serve as type/code when ip_proto is ICMP */
    734		req->l4_src_port = htons(flow->l4_key.icmp.type);
    735		req->l4_src_port_mask = htons(flow->l4_mask.icmp.type);
    736		req->l4_dst_port = htons(flow->l4_key.icmp.code);
    737		req->l4_dst_port_mask = htons(flow->l4_mask.icmp.code);
    738	}
    739	req->flags = cpu_to_le16(flow_flags);
    740
    741	if (actions->flags & BNXT_TC_ACTION_FLAG_DROP) {
    742		action_flags |= CFA_FLOW_ALLOC_REQ_ACTION_FLAGS_DROP;
    743	} else {
    744		if (actions->flags & BNXT_TC_ACTION_FLAG_FWD) {
    745			action_flags |= CFA_FLOW_ALLOC_REQ_ACTION_FLAGS_FWD;
    746			req->dst_fid = cpu_to_le16(actions->dst_fid);
    747		}
    748		if (actions->flags & BNXT_TC_ACTION_FLAG_PUSH_VLAN) {
    749			action_flags |=
    750			    CFA_FLOW_ALLOC_REQ_ACTION_FLAGS_L2_HEADER_REWRITE;
    751			req->l2_rewrite_vlan_tpid = actions->push_vlan_tpid;
    752			req->l2_rewrite_vlan_tci = actions->push_vlan_tci;
    753			memcpy(&req->l2_rewrite_dmac, &req->dmac, ETH_ALEN);
    754			memcpy(&req->l2_rewrite_smac, &req->smac, ETH_ALEN);
    755		}
    756		if (actions->flags & BNXT_TC_ACTION_FLAG_POP_VLAN) {
    757			action_flags |=
    758			    CFA_FLOW_ALLOC_REQ_ACTION_FLAGS_L2_HEADER_REWRITE;
    759			/* Rewrite config with tpid = 0 implies vlan pop */
    760			req->l2_rewrite_vlan_tpid = 0;
    761			memcpy(&req->l2_rewrite_dmac, &req->dmac, ETH_ALEN);
    762			memcpy(&req->l2_rewrite_smac, &req->smac, ETH_ALEN);
    763		}
    764	}
    765	req->action_flags = cpu_to_le16(action_flags);
    766
    767	resp = hwrm_req_hold(bp, req);
    768	rc = hwrm_req_send_silent(bp, req);
    769	if (!rc) {
    770		/* CFA_FLOW_ALLOC response interpretation:
    771		 *		    fw with	     fw with
    772		 *		    16-bit	     64-bit
    773		 *		    flow handle      flow handle
    774		 *		    ===========	     ===========
    775		 * flow_handle      flow handle      flow context id
    776		 * ext_flow_handle  INVALID	     flow handle
    777		 * flow_id	    INVALID	     flow counter id
    778		 */
    779		flow_node->flow_handle = resp->flow_handle;
    780		if (bp->fw_cap & BNXT_FW_CAP_OVS_64BIT_HANDLE) {
    781			flow_node->ext_flow_handle = resp->ext_flow_handle;
    782			flow_node->flow_id = resp->flow_id;
    783		}
    784	}
    785	hwrm_req_drop(bp, req);
    786	return rc;
    787}
    788
    789static int hwrm_cfa_decap_filter_alloc(struct bnxt *bp,
    790				       struct bnxt_tc_flow *flow,
    791				       struct bnxt_tc_l2_key *l2_info,
    792				       __le32 ref_decap_handle,
    793				       __le32 *decap_filter_handle)
    794{
    795	struct hwrm_cfa_decap_filter_alloc_output *resp;
    796	struct ip_tunnel_key *tun_key = &flow->tun_key;
    797	struct hwrm_cfa_decap_filter_alloc_input *req;
    798	u32 enables = 0;
    799	int rc;
    800
    801	rc = hwrm_req_init(bp, req, HWRM_CFA_DECAP_FILTER_ALLOC);
    802	if (rc)
    803		goto exit;
    804
    805	req->flags = cpu_to_le32(CFA_DECAP_FILTER_ALLOC_REQ_FLAGS_OVS_TUNNEL);
    806	enables |= CFA_DECAP_FILTER_ALLOC_REQ_ENABLES_TUNNEL_TYPE |
    807		   CFA_DECAP_FILTER_ALLOC_REQ_ENABLES_IP_PROTOCOL;
    808	req->tunnel_type = CFA_DECAP_FILTER_ALLOC_REQ_TUNNEL_TYPE_VXLAN;
    809	req->ip_protocol = CFA_DECAP_FILTER_ALLOC_REQ_IP_PROTOCOL_UDP;
    810
    811	if (flow->flags & BNXT_TC_FLOW_FLAGS_TUNL_ID) {
    812		enables |= CFA_DECAP_FILTER_ALLOC_REQ_ENABLES_TUNNEL_ID;
    813		/* tunnel_id is wrongly defined in hsi defn. as __le32 */
    814		req->tunnel_id = tunnel_id_to_key32(tun_key->tun_id);
    815	}
    816
    817	if (flow->flags & BNXT_TC_FLOW_FLAGS_TUNL_ETH_ADDRS) {
    818		enables |= CFA_DECAP_FILTER_ALLOC_REQ_ENABLES_DST_MACADDR;
    819		ether_addr_copy(req->dst_macaddr, l2_info->dmac);
    820	}
    821	if (l2_info->num_vlans) {
    822		enables |= CFA_DECAP_FILTER_ALLOC_REQ_ENABLES_T_IVLAN_VID;
    823		req->t_ivlan_vid = l2_info->inner_vlan_tci;
    824	}
    825
    826	enables |= CFA_DECAP_FILTER_ALLOC_REQ_ENABLES_ETHERTYPE;
    827	req->ethertype = htons(ETH_P_IP);
    828
    829	if (flow->flags & BNXT_TC_FLOW_FLAGS_TUNL_IPV4_ADDRS) {
    830		enables |= CFA_DECAP_FILTER_ALLOC_REQ_ENABLES_SRC_IPADDR |
    831			   CFA_DECAP_FILTER_ALLOC_REQ_ENABLES_DST_IPADDR |
    832			   CFA_DECAP_FILTER_ALLOC_REQ_ENABLES_IPADDR_TYPE;
    833		req->ip_addr_type =
    834			CFA_DECAP_FILTER_ALLOC_REQ_IP_ADDR_TYPE_IPV4;
    835		req->dst_ipaddr[0] = tun_key->u.ipv4.dst;
    836		req->src_ipaddr[0] = tun_key->u.ipv4.src;
    837	}
    838
    839	if (flow->flags & BNXT_TC_FLOW_FLAGS_TUNL_PORTS) {
    840		enables |= CFA_DECAP_FILTER_ALLOC_REQ_ENABLES_DST_PORT;
    841		req->dst_port = tun_key->tp_dst;
    842	}
    843
    844	/* Eventhough the decap_handle returned by hwrm_cfa_decap_filter_alloc
    845	 * is defined as __le32, l2_ctxt_ref_id is defined in HSI as __le16.
    846	 */
    847	req->l2_ctxt_ref_id = (__force __le16)ref_decap_handle;
    848	req->enables = cpu_to_le32(enables);
    849
    850	resp = hwrm_req_hold(bp, req);
    851	rc = hwrm_req_send_silent(bp, req);
    852	if (!rc)
    853		*decap_filter_handle = resp->decap_filter_id;
    854	hwrm_req_drop(bp, req);
    855exit:
    856	if (rc)
    857		netdev_info(bp->dev, "%s: Error rc=%d\n", __func__, rc);
    858
    859	return rc;
    860}
    861
    862static int hwrm_cfa_decap_filter_free(struct bnxt *bp,
    863				      __le32 decap_filter_handle)
    864{
    865	struct hwrm_cfa_decap_filter_free_input *req;
    866	int rc;
    867
    868	rc = hwrm_req_init(bp, req, HWRM_CFA_DECAP_FILTER_FREE);
    869	if (!rc) {
    870		req->decap_filter_id = decap_filter_handle;
    871		rc = hwrm_req_send(bp, req);
    872	}
    873	if (rc)
    874		netdev_info(bp->dev, "%s: Error rc=%d\n", __func__, rc);
    875
    876	return rc;
    877}
    878
    879static int hwrm_cfa_encap_record_alloc(struct bnxt *bp,
    880				       struct ip_tunnel_key *encap_key,
    881				       struct bnxt_tc_l2_key *l2_info,
    882				       __le32 *encap_record_handle)
    883{
    884	struct hwrm_cfa_encap_record_alloc_output *resp;
    885	struct hwrm_cfa_encap_record_alloc_input *req;
    886	struct hwrm_cfa_encap_data_vxlan *encap;
    887	struct hwrm_vxlan_ipv4_hdr *encap_ipv4;
    888	int rc;
    889
    890	rc = hwrm_req_init(bp, req, HWRM_CFA_ENCAP_RECORD_ALLOC);
    891	if (rc)
    892		goto exit;
    893
    894	encap = (struct hwrm_cfa_encap_data_vxlan *)&req->encap_data;
    895	req->encap_type = CFA_ENCAP_RECORD_ALLOC_REQ_ENCAP_TYPE_VXLAN;
    896	ether_addr_copy(encap->dst_mac_addr, l2_info->dmac);
    897	ether_addr_copy(encap->src_mac_addr, l2_info->smac);
    898	if (l2_info->num_vlans) {
    899		encap->num_vlan_tags = l2_info->num_vlans;
    900		encap->ovlan_tci = l2_info->inner_vlan_tci;
    901		encap->ovlan_tpid = l2_info->inner_vlan_tpid;
    902	}
    903
    904	encap_ipv4 = (struct hwrm_vxlan_ipv4_hdr *)encap->l3;
    905	encap_ipv4->ver_hlen = 4 << VXLAN_IPV4_HDR_VER_HLEN_VERSION_SFT;
    906	encap_ipv4->ver_hlen |= 5 << VXLAN_IPV4_HDR_VER_HLEN_HEADER_LENGTH_SFT;
    907	encap_ipv4->ttl = encap_key->ttl;
    908
    909	encap_ipv4->dest_ip_addr = encap_key->u.ipv4.dst;
    910	encap_ipv4->src_ip_addr = encap_key->u.ipv4.src;
    911	encap_ipv4->protocol = IPPROTO_UDP;
    912
    913	encap->dst_port = encap_key->tp_dst;
    914	encap->vni = tunnel_id_to_key32(encap_key->tun_id);
    915
    916	resp = hwrm_req_hold(bp, req);
    917	rc = hwrm_req_send_silent(bp, req);
    918	if (!rc)
    919		*encap_record_handle = resp->encap_record_id;
    920	hwrm_req_drop(bp, req);
    921exit:
    922	if (rc)
    923		netdev_info(bp->dev, "%s: Error rc=%d\n", __func__, rc);
    924
    925	return rc;
    926}
    927
    928static int hwrm_cfa_encap_record_free(struct bnxt *bp,
    929				      __le32 encap_record_handle)
    930{
    931	struct hwrm_cfa_encap_record_free_input *req;
    932	int rc;
    933
    934	rc = hwrm_req_init(bp, req, HWRM_CFA_ENCAP_RECORD_FREE);
    935	if (!rc) {
    936		req->encap_record_id = encap_record_handle;
    937		rc = hwrm_req_send(bp, req);
    938	}
    939	if (rc)
    940		netdev_info(bp->dev, "%s: Error rc=%d\n", __func__, rc);
    941
    942	return rc;
    943}
    944
    945static int bnxt_tc_put_l2_node(struct bnxt *bp,
    946			       struct bnxt_tc_flow_node *flow_node)
    947{
    948	struct bnxt_tc_l2_node *l2_node = flow_node->l2_node;
    949	struct bnxt_tc_info *tc_info = bp->tc_info;
    950	int rc;
    951
    952	/* remove flow_node from the L2 shared flow list */
    953	list_del(&flow_node->l2_list_node);
    954	if (--l2_node->refcount == 0) {
    955		rc =  rhashtable_remove_fast(&tc_info->l2_table, &l2_node->node,
    956					     tc_info->l2_ht_params);
    957		if (rc)
    958			netdev_err(bp->dev,
    959				   "Error: %s: rhashtable_remove_fast: %d\n",
    960				   __func__, rc);
    961		kfree_rcu(l2_node, rcu);
    962	}
    963	return 0;
    964}
    965
    966static struct bnxt_tc_l2_node *
    967bnxt_tc_get_l2_node(struct bnxt *bp, struct rhashtable *l2_table,
    968		    struct rhashtable_params ht_params,
    969		    struct bnxt_tc_l2_key *l2_key)
    970{
    971	struct bnxt_tc_l2_node *l2_node;
    972	int rc;
    973
    974	l2_node = rhashtable_lookup_fast(l2_table, l2_key, ht_params);
    975	if (!l2_node) {
    976		l2_node = kzalloc(sizeof(*l2_node), GFP_KERNEL);
    977		if (!l2_node) {
    978			rc = -ENOMEM;
    979			return NULL;
    980		}
    981
    982		l2_node->key = *l2_key;
    983		rc = rhashtable_insert_fast(l2_table, &l2_node->node,
    984					    ht_params);
    985		if (rc) {
    986			kfree_rcu(l2_node, rcu);
    987			netdev_err(bp->dev,
    988				   "Error: %s: rhashtable_insert_fast: %d\n",
    989				   __func__, rc);
    990			return NULL;
    991		}
    992		INIT_LIST_HEAD(&l2_node->common_l2_flows);
    993	}
    994	return l2_node;
    995}
    996
    997/* Get the ref_flow_handle for a flow by checking if there are any other
    998 * flows that share the same L2 key as this flow.
    999 */
   1000static int
   1001bnxt_tc_get_ref_flow_handle(struct bnxt *bp, struct bnxt_tc_flow *flow,
   1002			    struct bnxt_tc_flow_node *flow_node,
   1003			    __le16 *ref_flow_handle)
   1004{
   1005	struct bnxt_tc_info *tc_info = bp->tc_info;
   1006	struct bnxt_tc_flow_node *ref_flow_node;
   1007	struct bnxt_tc_l2_node *l2_node;
   1008
   1009	l2_node = bnxt_tc_get_l2_node(bp, &tc_info->l2_table,
   1010				      tc_info->l2_ht_params,
   1011				      &flow->l2_key);
   1012	if (!l2_node)
   1013		return -1;
   1014
   1015	/* If any other flow is using this l2_node, use it's flow_handle
   1016	 * as the ref_flow_handle
   1017	 */
   1018	if (l2_node->refcount > 0) {
   1019		ref_flow_node = list_first_entry(&l2_node->common_l2_flows,
   1020						 struct bnxt_tc_flow_node,
   1021						 l2_list_node);
   1022		*ref_flow_handle = ref_flow_node->flow_handle;
   1023	} else {
   1024		*ref_flow_handle = cpu_to_le16(0xffff);
   1025	}
   1026
   1027	/* Insert the l2_node into the flow_node so that subsequent flows
   1028	 * with a matching l2 key can use the flow_handle of this flow
   1029	 * as their ref_flow_handle
   1030	 */
   1031	flow_node->l2_node = l2_node;
   1032	list_add(&flow_node->l2_list_node, &l2_node->common_l2_flows);
   1033	l2_node->refcount++;
   1034	return 0;
   1035}
   1036
   1037/* After the flow parsing is done, this routine is used for checking
   1038 * if there are any aspects of the flow that prevent it from being
   1039 * offloaded.
   1040 */
   1041static bool bnxt_tc_can_offload(struct bnxt *bp, struct bnxt_tc_flow *flow)
   1042{
   1043	/* If L4 ports are specified then ip_proto must be TCP or UDP */
   1044	if ((flow->flags & BNXT_TC_FLOW_FLAGS_PORTS) &&
   1045	    (flow->l4_key.ip_proto != IPPROTO_TCP &&
   1046	     flow->l4_key.ip_proto != IPPROTO_UDP)) {
   1047		netdev_info(bp->dev, "Cannot offload non-TCP/UDP (%d) ports\n",
   1048			    flow->l4_key.ip_proto);
   1049		return false;
   1050	}
   1051
   1052	/* Currently source/dest MAC cannot be partial wildcard  */
   1053	if (bits_set(&flow->l2_key.smac, sizeof(flow->l2_key.smac)) &&
   1054	    !is_exactmatch(flow->l2_mask.smac, sizeof(flow->l2_mask.smac))) {
   1055		netdev_info(bp->dev, "Wildcard match unsupported for Source MAC\n");
   1056		return false;
   1057	}
   1058	if (bits_set(&flow->l2_key.dmac, sizeof(flow->l2_key.dmac)) &&
   1059	    !is_exactmatch(&flow->l2_mask.dmac, sizeof(flow->l2_mask.dmac))) {
   1060		netdev_info(bp->dev, "Wildcard match unsupported for Dest MAC\n");
   1061		return false;
   1062	}
   1063
   1064	/* Currently VLAN fields cannot be partial wildcard */
   1065	if (bits_set(&flow->l2_key.inner_vlan_tci,
   1066		     sizeof(flow->l2_key.inner_vlan_tci)) &&
   1067	    !is_vlan_tci_allowed(flow->l2_mask.inner_vlan_tci,
   1068				 flow->l2_key.inner_vlan_tci)) {
   1069		netdev_info(bp->dev, "Unsupported VLAN TCI\n");
   1070		return false;
   1071	}
   1072	if (bits_set(&flow->l2_key.inner_vlan_tpid,
   1073		     sizeof(flow->l2_key.inner_vlan_tpid)) &&
   1074	    !is_exactmatch(&flow->l2_mask.inner_vlan_tpid,
   1075			   sizeof(flow->l2_mask.inner_vlan_tpid))) {
   1076		netdev_info(bp->dev, "Wildcard match unsupported for VLAN TPID\n");
   1077		return false;
   1078	}
   1079
   1080	/* Currently Ethertype must be set */
   1081	if (!is_exactmatch(&flow->l2_mask.ether_type,
   1082			   sizeof(flow->l2_mask.ether_type))) {
   1083		netdev_info(bp->dev, "Wildcard match unsupported for Ethertype\n");
   1084		return false;
   1085	}
   1086
   1087	return true;
   1088}
   1089
   1090/* Returns the final refcount of the node on success
   1091 * or a -ve error code on failure
   1092 */
   1093static int bnxt_tc_put_tunnel_node(struct bnxt *bp,
   1094				   struct rhashtable *tunnel_table,
   1095				   struct rhashtable_params *ht_params,
   1096				   struct bnxt_tc_tunnel_node *tunnel_node)
   1097{
   1098	int rc;
   1099
   1100	if (--tunnel_node->refcount == 0) {
   1101		rc =  rhashtable_remove_fast(tunnel_table, &tunnel_node->node,
   1102					     *ht_params);
   1103		if (rc) {
   1104			netdev_err(bp->dev, "rhashtable_remove_fast rc=%d\n", rc);
   1105			rc = -1;
   1106		}
   1107		kfree_rcu(tunnel_node, rcu);
   1108		return rc;
   1109	} else {
   1110		return tunnel_node->refcount;
   1111	}
   1112}
   1113
   1114/* Get (or add) either encap or decap tunnel node from/to the supplied
   1115 * hash table.
   1116 */
   1117static struct bnxt_tc_tunnel_node *
   1118bnxt_tc_get_tunnel_node(struct bnxt *bp, struct rhashtable *tunnel_table,
   1119			struct rhashtable_params *ht_params,
   1120			struct ip_tunnel_key *tun_key)
   1121{
   1122	struct bnxt_tc_tunnel_node *tunnel_node;
   1123	int rc;
   1124
   1125	tunnel_node = rhashtable_lookup_fast(tunnel_table, tun_key, *ht_params);
   1126	if (!tunnel_node) {
   1127		tunnel_node = kzalloc(sizeof(*tunnel_node), GFP_KERNEL);
   1128		if (!tunnel_node) {
   1129			rc = -ENOMEM;
   1130			goto err;
   1131		}
   1132
   1133		tunnel_node->key = *tun_key;
   1134		tunnel_node->tunnel_handle = INVALID_TUNNEL_HANDLE;
   1135		rc = rhashtable_insert_fast(tunnel_table, &tunnel_node->node,
   1136					    *ht_params);
   1137		if (rc) {
   1138			kfree_rcu(tunnel_node, rcu);
   1139			goto err;
   1140		}
   1141	}
   1142	tunnel_node->refcount++;
   1143	return tunnel_node;
   1144err:
   1145	netdev_info(bp->dev, "error rc=%d\n", rc);
   1146	return NULL;
   1147}
   1148
   1149static int bnxt_tc_get_ref_decap_handle(struct bnxt *bp,
   1150					struct bnxt_tc_flow *flow,
   1151					struct bnxt_tc_l2_key *l2_key,
   1152					struct bnxt_tc_flow_node *flow_node,
   1153					__le32 *ref_decap_handle)
   1154{
   1155	struct bnxt_tc_info *tc_info = bp->tc_info;
   1156	struct bnxt_tc_flow_node *ref_flow_node;
   1157	struct bnxt_tc_l2_node *decap_l2_node;
   1158
   1159	decap_l2_node = bnxt_tc_get_l2_node(bp, &tc_info->decap_l2_table,
   1160					    tc_info->decap_l2_ht_params,
   1161					    l2_key);
   1162	if (!decap_l2_node)
   1163		return -1;
   1164
   1165	/* If any other flow is using this decap_l2_node, use it's decap_handle
   1166	 * as the ref_decap_handle
   1167	 */
   1168	if (decap_l2_node->refcount > 0) {
   1169		ref_flow_node =
   1170			list_first_entry(&decap_l2_node->common_l2_flows,
   1171					 struct bnxt_tc_flow_node,
   1172					 decap_l2_list_node);
   1173		*ref_decap_handle = ref_flow_node->decap_node->tunnel_handle;
   1174	} else {
   1175		*ref_decap_handle = INVALID_TUNNEL_HANDLE;
   1176	}
   1177
   1178	/* Insert the l2_node into the flow_node so that subsequent flows
   1179	 * with a matching decap l2 key can use the decap_filter_handle of
   1180	 * this flow as their ref_decap_handle
   1181	 */
   1182	flow_node->decap_l2_node = decap_l2_node;
   1183	list_add(&flow_node->decap_l2_list_node,
   1184		 &decap_l2_node->common_l2_flows);
   1185	decap_l2_node->refcount++;
   1186	return 0;
   1187}
   1188
   1189static void bnxt_tc_put_decap_l2_node(struct bnxt *bp,
   1190				      struct bnxt_tc_flow_node *flow_node)
   1191{
   1192	struct bnxt_tc_l2_node *decap_l2_node = flow_node->decap_l2_node;
   1193	struct bnxt_tc_info *tc_info = bp->tc_info;
   1194	int rc;
   1195
   1196	/* remove flow_node from the decap L2 sharing flow list */
   1197	list_del(&flow_node->decap_l2_list_node);
   1198	if (--decap_l2_node->refcount == 0) {
   1199		rc =  rhashtable_remove_fast(&tc_info->decap_l2_table,
   1200					     &decap_l2_node->node,
   1201					     tc_info->decap_l2_ht_params);
   1202		if (rc)
   1203			netdev_err(bp->dev, "rhashtable_remove_fast rc=%d\n", rc);
   1204		kfree_rcu(decap_l2_node, rcu);
   1205	}
   1206}
   1207
   1208static void bnxt_tc_put_decap_handle(struct bnxt *bp,
   1209				     struct bnxt_tc_flow_node *flow_node)
   1210{
   1211	__le32 decap_handle = flow_node->decap_node->tunnel_handle;
   1212	struct bnxt_tc_info *tc_info = bp->tc_info;
   1213	int rc;
   1214
   1215	if (flow_node->decap_l2_node)
   1216		bnxt_tc_put_decap_l2_node(bp, flow_node);
   1217
   1218	rc = bnxt_tc_put_tunnel_node(bp, &tc_info->decap_table,
   1219				     &tc_info->decap_ht_params,
   1220				     flow_node->decap_node);
   1221	if (!rc && decap_handle != INVALID_TUNNEL_HANDLE)
   1222		hwrm_cfa_decap_filter_free(bp, decap_handle);
   1223}
   1224
   1225static int bnxt_tc_resolve_tunnel_hdrs(struct bnxt *bp,
   1226				       struct ip_tunnel_key *tun_key,
   1227				       struct bnxt_tc_l2_key *l2_info)
   1228{
   1229#ifdef CONFIG_INET
   1230	struct net_device *real_dst_dev = bp->dev;
   1231	struct flowi4 flow = { {0} };
   1232	struct net_device *dst_dev;
   1233	struct neighbour *nbr;
   1234	struct rtable *rt;
   1235	int rc;
   1236
   1237	flow.flowi4_proto = IPPROTO_UDP;
   1238	flow.fl4_dport = tun_key->tp_dst;
   1239	flow.daddr = tun_key->u.ipv4.dst;
   1240
   1241	rt = ip_route_output_key(dev_net(real_dst_dev), &flow);
   1242	if (IS_ERR(rt)) {
   1243		netdev_info(bp->dev, "no route to %pI4b\n", &flow.daddr);
   1244		return -EOPNOTSUPP;
   1245	}
   1246
   1247	/* The route must either point to the real_dst_dev or a dst_dev that
   1248	 * uses the real_dst_dev.
   1249	 */
   1250	dst_dev = rt->dst.dev;
   1251	if (is_vlan_dev(dst_dev)) {
   1252#if IS_ENABLED(CONFIG_VLAN_8021Q)
   1253		struct vlan_dev_priv *vlan = vlan_dev_priv(dst_dev);
   1254
   1255		if (vlan->real_dev != real_dst_dev) {
   1256			netdev_info(bp->dev,
   1257				    "dst_dev(%s) doesn't use PF-if(%s)\n",
   1258				    netdev_name(dst_dev),
   1259				    netdev_name(real_dst_dev));
   1260			rc = -EOPNOTSUPP;
   1261			goto put_rt;
   1262		}
   1263		l2_info->inner_vlan_tci = htons(vlan->vlan_id);
   1264		l2_info->inner_vlan_tpid = vlan->vlan_proto;
   1265		l2_info->num_vlans = 1;
   1266#endif
   1267	} else if (dst_dev != real_dst_dev) {
   1268		netdev_info(bp->dev,
   1269			    "dst_dev(%s) for %pI4b is not PF-if(%s)\n",
   1270			    netdev_name(dst_dev), &flow.daddr,
   1271			    netdev_name(real_dst_dev));
   1272		rc = -EOPNOTSUPP;
   1273		goto put_rt;
   1274	}
   1275
   1276	nbr = dst_neigh_lookup(&rt->dst, &flow.daddr);
   1277	if (!nbr) {
   1278		netdev_info(bp->dev, "can't lookup neighbor for %pI4b\n",
   1279			    &flow.daddr);
   1280		rc = -EOPNOTSUPP;
   1281		goto put_rt;
   1282	}
   1283
   1284	tun_key->u.ipv4.src = flow.saddr;
   1285	tun_key->ttl = ip4_dst_hoplimit(&rt->dst);
   1286	neigh_ha_snapshot(l2_info->dmac, nbr, dst_dev);
   1287	ether_addr_copy(l2_info->smac, dst_dev->dev_addr);
   1288	neigh_release(nbr);
   1289	ip_rt_put(rt);
   1290
   1291	return 0;
   1292put_rt:
   1293	ip_rt_put(rt);
   1294	return rc;
   1295#else
   1296	return -EOPNOTSUPP;
   1297#endif
   1298}
   1299
   1300static int bnxt_tc_get_decap_handle(struct bnxt *bp, struct bnxt_tc_flow *flow,
   1301				    struct bnxt_tc_flow_node *flow_node,
   1302				    __le32 *decap_filter_handle)
   1303{
   1304	struct ip_tunnel_key *decap_key = &flow->tun_key;
   1305	struct bnxt_tc_info *tc_info = bp->tc_info;
   1306	struct bnxt_tc_l2_key l2_info = { {0} };
   1307	struct bnxt_tc_tunnel_node *decap_node;
   1308	struct ip_tunnel_key tun_key = { 0 };
   1309	struct bnxt_tc_l2_key *decap_l2_info;
   1310	__le32 ref_decap_handle;
   1311	int rc;
   1312
   1313	/* Check if there's another flow using the same tunnel decap.
   1314	 * If not, add this tunnel to the table and resolve the other
   1315	 * tunnel header fileds. Ignore src_port in the tunnel_key,
   1316	 * since it is not required for decap filters.
   1317	 */
   1318	decap_key->tp_src = 0;
   1319	decap_node = bnxt_tc_get_tunnel_node(bp, &tc_info->decap_table,
   1320					     &tc_info->decap_ht_params,
   1321					     decap_key);
   1322	if (!decap_node)
   1323		return -ENOMEM;
   1324
   1325	flow_node->decap_node = decap_node;
   1326
   1327	if (decap_node->tunnel_handle != INVALID_TUNNEL_HANDLE)
   1328		goto done;
   1329
   1330	/* Resolve the L2 fields for tunnel decap
   1331	 * Resolve the route for remote vtep (saddr) of the decap key
   1332	 * Find it's next-hop mac addrs
   1333	 */
   1334	tun_key.u.ipv4.dst = flow->tun_key.u.ipv4.src;
   1335	tun_key.tp_dst = flow->tun_key.tp_dst;
   1336	rc = bnxt_tc_resolve_tunnel_hdrs(bp, &tun_key, &l2_info);
   1337	if (rc)
   1338		goto put_decap;
   1339
   1340	decap_l2_info = &decap_node->l2_info;
   1341	/* decap smac is wildcarded */
   1342	ether_addr_copy(decap_l2_info->dmac, l2_info.smac);
   1343	if (l2_info.num_vlans) {
   1344		decap_l2_info->num_vlans = l2_info.num_vlans;
   1345		decap_l2_info->inner_vlan_tpid = l2_info.inner_vlan_tpid;
   1346		decap_l2_info->inner_vlan_tci = l2_info.inner_vlan_tci;
   1347	}
   1348	flow->flags |= BNXT_TC_FLOW_FLAGS_TUNL_ETH_ADDRS;
   1349
   1350	/* For getting a decap_filter_handle we first need to check if
   1351	 * there are any other decap flows that share the same tunnel L2
   1352	 * key and if so, pass that flow's decap_filter_handle as the
   1353	 * ref_decap_handle for this flow.
   1354	 */
   1355	rc = bnxt_tc_get_ref_decap_handle(bp, flow, decap_l2_info, flow_node,
   1356					  &ref_decap_handle);
   1357	if (rc)
   1358		goto put_decap;
   1359
   1360	/* Issue the hwrm cmd to allocate a decap filter handle */
   1361	rc = hwrm_cfa_decap_filter_alloc(bp, flow, decap_l2_info,
   1362					 ref_decap_handle,
   1363					 &decap_node->tunnel_handle);
   1364	if (rc)
   1365		goto put_decap_l2;
   1366
   1367done:
   1368	*decap_filter_handle = decap_node->tunnel_handle;
   1369	return 0;
   1370
   1371put_decap_l2:
   1372	bnxt_tc_put_decap_l2_node(bp, flow_node);
   1373put_decap:
   1374	bnxt_tc_put_tunnel_node(bp, &tc_info->decap_table,
   1375				&tc_info->decap_ht_params,
   1376				flow_node->decap_node);
   1377	return rc;
   1378}
   1379
   1380static void bnxt_tc_put_encap_handle(struct bnxt *bp,
   1381				     struct bnxt_tc_tunnel_node *encap_node)
   1382{
   1383	__le32 encap_handle = encap_node->tunnel_handle;
   1384	struct bnxt_tc_info *tc_info = bp->tc_info;
   1385	int rc;
   1386
   1387	rc = bnxt_tc_put_tunnel_node(bp, &tc_info->encap_table,
   1388				     &tc_info->encap_ht_params, encap_node);
   1389	if (!rc && encap_handle != INVALID_TUNNEL_HANDLE)
   1390		hwrm_cfa_encap_record_free(bp, encap_handle);
   1391}
   1392
   1393/* Lookup the tunnel encap table and check if there's an encap_handle
   1394 * alloc'd already.
   1395 * If not, query L2 info via a route lookup and issue an encap_record_alloc
   1396 * cmd to FW.
   1397 */
   1398static int bnxt_tc_get_encap_handle(struct bnxt *bp, struct bnxt_tc_flow *flow,
   1399				    struct bnxt_tc_flow_node *flow_node,
   1400				    __le32 *encap_handle)
   1401{
   1402	struct ip_tunnel_key *encap_key = &flow->actions.tun_encap_key;
   1403	struct bnxt_tc_info *tc_info = bp->tc_info;
   1404	struct bnxt_tc_tunnel_node *encap_node;
   1405	int rc;
   1406
   1407	/* Check if there's another flow using the same tunnel encap.
   1408	 * If not, add this tunnel to the table and resolve the other
   1409	 * tunnel header fileds
   1410	 */
   1411	encap_node = bnxt_tc_get_tunnel_node(bp, &tc_info->encap_table,
   1412					     &tc_info->encap_ht_params,
   1413					     encap_key);
   1414	if (!encap_node)
   1415		return -ENOMEM;
   1416
   1417	flow_node->encap_node = encap_node;
   1418
   1419	if (encap_node->tunnel_handle != INVALID_TUNNEL_HANDLE)
   1420		goto done;
   1421
   1422	rc = bnxt_tc_resolve_tunnel_hdrs(bp, encap_key, &encap_node->l2_info);
   1423	if (rc)
   1424		goto put_encap;
   1425
   1426	/* Allocate a new tunnel encap record */
   1427	rc = hwrm_cfa_encap_record_alloc(bp, encap_key, &encap_node->l2_info,
   1428					 &encap_node->tunnel_handle);
   1429	if (rc)
   1430		goto put_encap;
   1431
   1432done:
   1433	*encap_handle = encap_node->tunnel_handle;
   1434	return 0;
   1435
   1436put_encap:
   1437	bnxt_tc_put_tunnel_node(bp, &tc_info->encap_table,
   1438				&tc_info->encap_ht_params, encap_node);
   1439	return rc;
   1440}
   1441
   1442static void bnxt_tc_put_tunnel_handle(struct bnxt *bp,
   1443				      struct bnxt_tc_flow *flow,
   1444				      struct bnxt_tc_flow_node *flow_node)
   1445{
   1446	if (flow->actions.flags & BNXT_TC_ACTION_FLAG_TUNNEL_DECAP)
   1447		bnxt_tc_put_decap_handle(bp, flow_node);
   1448	else if (flow->actions.flags & BNXT_TC_ACTION_FLAG_TUNNEL_ENCAP)
   1449		bnxt_tc_put_encap_handle(bp, flow_node->encap_node);
   1450}
   1451
   1452static int bnxt_tc_get_tunnel_handle(struct bnxt *bp,
   1453				     struct bnxt_tc_flow *flow,
   1454				     struct bnxt_tc_flow_node *flow_node,
   1455				     __le32 *tunnel_handle)
   1456{
   1457	if (flow->actions.flags & BNXT_TC_ACTION_FLAG_TUNNEL_DECAP)
   1458		return bnxt_tc_get_decap_handle(bp, flow, flow_node,
   1459						tunnel_handle);
   1460	else if (flow->actions.flags & BNXT_TC_ACTION_FLAG_TUNNEL_ENCAP)
   1461		return bnxt_tc_get_encap_handle(bp, flow, flow_node,
   1462						tunnel_handle);
   1463	else
   1464		return 0;
   1465}
   1466static int __bnxt_tc_del_flow(struct bnxt *bp,
   1467			      struct bnxt_tc_flow_node *flow_node)
   1468{
   1469	struct bnxt_tc_info *tc_info = bp->tc_info;
   1470	int rc;
   1471
   1472	/* send HWRM cmd to free the flow-id */
   1473	bnxt_hwrm_cfa_flow_free(bp, flow_node);
   1474
   1475	mutex_lock(&tc_info->lock);
   1476
   1477	/* release references to any tunnel encap/decap nodes */
   1478	bnxt_tc_put_tunnel_handle(bp, &flow_node->flow, flow_node);
   1479
   1480	/* release reference to l2 node */
   1481	bnxt_tc_put_l2_node(bp, flow_node);
   1482
   1483	mutex_unlock(&tc_info->lock);
   1484
   1485	rc = rhashtable_remove_fast(&tc_info->flow_table, &flow_node->node,
   1486				    tc_info->flow_ht_params);
   1487	if (rc)
   1488		netdev_err(bp->dev, "Error: %s: rhashtable_remove_fast rc=%d\n",
   1489			   __func__, rc);
   1490
   1491	kfree_rcu(flow_node, rcu);
   1492	return 0;
   1493}
   1494
   1495static void bnxt_tc_set_flow_dir(struct bnxt *bp, struct bnxt_tc_flow *flow,
   1496				 u16 src_fid)
   1497{
   1498	flow->l2_key.dir = (bp->pf.fw_fid == src_fid) ? BNXT_DIR_RX : BNXT_DIR_TX;
   1499}
   1500
   1501static void bnxt_tc_set_src_fid(struct bnxt *bp, struct bnxt_tc_flow *flow,
   1502				u16 src_fid)
   1503{
   1504	if (flow->actions.flags & BNXT_TC_ACTION_FLAG_TUNNEL_DECAP)
   1505		flow->src_fid = bp->pf.fw_fid;
   1506	else
   1507		flow->src_fid = src_fid;
   1508}
   1509
   1510/* Add a new flow or replace an existing flow.
   1511 * Notes on locking:
   1512 * There are essentially two critical sections here.
   1513 * 1. while adding a new flow
   1514 *    a) lookup l2-key
   1515 *    b) issue HWRM cmd and get flow_handle
   1516 *    c) link l2-key with flow
   1517 * 2. while deleting a flow
   1518 *    a) unlinking l2-key from flow
   1519 * A lock is needed to protect these two critical sections.
   1520 *
   1521 * The hash-tables are already protected by the rhashtable API.
   1522 */
   1523static int bnxt_tc_add_flow(struct bnxt *bp, u16 src_fid,
   1524			    struct flow_cls_offload *tc_flow_cmd)
   1525{
   1526	struct bnxt_tc_flow_node *new_node, *old_node;
   1527	struct bnxt_tc_info *tc_info = bp->tc_info;
   1528	struct bnxt_tc_flow *flow;
   1529	__le32 tunnel_handle = 0;
   1530	__le16 ref_flow_handle;
   1531	int rc;
   1532
   1533	/* allocate memory for the new flow and it's node */
   1534	new_node = kzalloc(sizeof(*new_node), GFP_KERNEL);
   1535	if (!new_node) {
   1536		rc = -ENOMEM;
   1537		goto done;
   1538	}
   1539	new_node->cookie = tc_flow_cmd->cookie;
   1540	flow = &new_node->flow;
   1541
   1542	rc = bnxt_tc_parse_flow(bp, tc_flow_cmd, flow);
   1543	if (rc)
   1544		goto free_node;
   1545
   1546	bnxt_tc_set_src_fid(bp, flow, src_fid);
   1547	bnxt_tc_set_flow_dir(bp, flow, flow->src_fid);
   1548
   1549	if (!bnxt_tc_can_offload(bp, flow)) {
   1550		rc = -EOPNOTSUPP;
   1551		kfree_rcu(new_node, rcu);
   1552		return rc;
   1553	}
   1554
   1555	/* If a flow exists with the same cookie, delete it */
   1556	old_node = rhashtable_lookup_fast(&tc_info->flow_table,
   1557					  &tc_flow_cmd->cookie,
   1558					  tc_info->flow_ht_params);
   1559	if (old_node)
   1560		__bnxt_tc_del_flow(bp, old_node);
   1561
   1562	/* Check if the L2 part of the flow has been offloaded already.
   1563	 * If so, bump up it's refcnt and get it's reference handle.
   1564	 */
   1565	mutex_lock(&tc_info->lock);
   1566	rc = bnxt_tc_get_ref_flow_handle(bp, flow, new_node, &ref_flow_handle);
   1567	if (rc)
   1568		goto unlock;
   1569
   1570	/* If the flow involves tunnel encap/decap, get tunnel_handle */
   1571	rc = bnxt_tc_get_tunnel_handle(bp, flow, new_node, &tunnel_handle);
   1572	if (rc)
   1573		goto put_l2;
   1574
   1575	/* send HWRM cmd to alloc the flow */
   1576	rc = bnxt_hwrm_cfa_flow_alloc(bp, flow, ref_flow_handle,
   1577				      tunnel_handle, new_node);
   1578	if (rc)
   1579		goto put_tunnel;
   1580
   1581	flow->lastused = jiffies;
   1582	spin_lock_init(&flow->stats_lock);
   1583	/* add new flow to flow-table */
   1584	rc = rhashtable_insert_fast(&tc_info->flow_table, &new_node->node,
   1585				    tc_info->flow_ht_params);
   1586	if (rc)
   1587		goto hwrm_flow_free;
   1588
   1589	mutex_unlock(&tc_info->lock);
   1590	return 0;
   1591
   1592hwrm_flow_free:
   1593	bnxt_hwrm_cfa_flow_free(bp, new_node);
   1594put_tunnel:
   1595	bnxt_tc_put_tunnel_handle(bp, flow, new_node);
   1596put_l2:
   1597	bnxt_tc_put_l2_node(bp, new_node);
   1598unlock:
   1599	mutex_unlock(&tc_info->lock);
   1600free_node:
   1601	kfree_rcu(new_node, rcu);
   1602done:
   1603	netdev_err(bp->dev, "Error: %s: cookie=0x%lx error=%d\n",
   1604		   __func__, tc_flow_cmd->cookie, rc);
   1605	return rc;
   1606}
   1607
   1608static int bnxt_tc_del_flow(struct bnxt *bp,
   1609			    struct flow_cls_offload *tc_flow_cmd)
   1610{
   1611	struct bnxt_tc_info *tc_info = bp->tc_info;
   1612	struct bnxt_tc_flow_node *flow_node;
   1613
   1614	flow_node = rhashtable_lookup_fast(&tc_info->flow_table,
   1615					   &tc_flow_cmd->cookie,
   1616					   tc_info->flow_ht_params);
   1617	if (!flow_node)
   1618		return -EINVAL;
   1619
   1620	return __bnxt_tc_del_flow(bp, flow_node);
   1621}
   1622
   1623static int bnxt_tc_get_flow_stats(struct bnxt *bp,
   1624				  struct flow_cls_offload *tc_flow_cmd)
   1625{
   1626	struct bnxt_tc_flow_stats stats, *curr_stats, *prev_stats;
   1627	struct bnxt_tc_info *tc_info = bp->tc_info;
   1628	struct bnxt_tc_flow_node *flow_node;
   1629	struct bnxt_tc_flow *flow;
   1630	unsigned long lastused;
   1631
   1632	flow_node = rhashtable_lookup_fast(&tc_info->flow_table,
   1633					   &tc_flow_cmd->cookie,
   1634					   tc_info->flow_ht_params);
   1635	if (!flow_node)
   1636		return -1;
   1637
   1638	flow = &flow_node->flow;
   1639	curr_stats = &flow->stats;
   1640	prev_stats = &flow->prev_stats;
   1641
   1642	spin_lock(&flow->stats_lock);
   1643	stats.packets = curr_stats->packets - prev_stats->packets;
   1644	stats.bytes = curr_stats->bytes - prev_stats->bytes;
   1645	*prev_stats = *curr_stats;
   1646	lastused = flow->lastused;
   1647	spin_unlock(&flow->stats_lock);
   1648
   1649	flow_stats_update(&tc_flow_cmd->stats, stats.bytes, stats.packets, 0,
   1650			  lastused, FLOW_ACTION_HW_STATS_DELAYED);
   1651	return 0;
   1652}
   1653
   1654static void bnxt_fill_cfa_stats_req(struct bnxt *bp,
   1655				    struct bnxt_tc_flow_node *flow_node,
   1656				    __le16 *flow_handle, __le32 *flow_id)
   1657{
   1658	u16 handle;
   1659
   1660	if (bp->fw_cap & BNXT_FW_CAP_OVS_64BIT_HANDLE) {
   1661		*flow_id = flow_node->flow_id;
   1662
   1663		/* If flow_id is used to fetch flow stats then:
   1664		 * 1. lower 12 bits of flow_handle must be set to all 1s.
   1665		 * 2. 15th bit of flow_handle must specify the flow
   1666		 *    direction (TX/RX).
   1667		 */
   1668		if (flow_node->flow.l2_key.dir == BNXT_DIR_RX)
   1669			handle = CFA_FLOW_INFO_REQ_FLOW_HANDLE_DIR_RX |
   1670				 CFA_FLOW_INFO_REQ_FLOW_HANDLE_MAX_MASK;
   1671		else
   1672			handle = CFA_FLOW_INFO_REQ_FLOW_HANDLE_MAX_MASK;
   1673
   1674		*flow_handle = cpu_to_le16(handle);
   1675	} else {
   1676		*flow_handle = flow_node->flow_handle;
   1677	}
   1678}
   1679
   1680static int
   1681bnxt_hwrm_cfa_flow_stats_get(struct bnxt *bp, int num_flows,
   1682			     struct bnxt_tc_stats_batch stats_batch[])
   1683{
   1684	struct hwrm_cfa_flow_stats_output *resp;
   1685	struct hwrm_cfa_flow_stats_input *req;
   1686	__le16 *req_flow_handles;
   1687	__le32 *req_flow_ids;
   1688	int rc, i;
   1689
   1690	rc = hwrm_req_init(bp, req, HWRM_CFA_FLOW_STATS);
   1691	if (rc)
   1692		goto exit;
   1693
   1694	req_flow_handles = &req->flow_handle_0;
   1695	req_flow_ids = &req->flow_id_0;
   1696
   1697	req->num_flows = cpu_to_le16(num_flows);
   1698	for (i = 0; i < num_flows; i++) {
   1699		struct bnxt_tc_flow_node *flow_node = stats_batch[i].flow_node;
   1700
   1701		bnxt_fill_cfa_stats_req(bp, flow_node,
   1702					&req_flow_handles[i], &req_flow_ids[i]);
   1703	}
   1704
   1705	resp = hwrm_req_hold(bp, req);
   1706	rc = hwrm_req_send(bp, req);
   1707	if (!rc) {
   1708		__le64 *resp_packets;
   1709		__le64 *resp_bytes;
   1710
   1711		resp_packets = &resp->packet_0;
   1712		resp_bytes = &resp->byte_0;
   1713
   1714		for (i = 0; i < num_flows; i++) {
   1715			stats_batch[i].hw_stats.packets =
   1716						le64_to_cpu(resp_packets[i]);
   1717			stats_batch[i].hw_stats.bytes =
   1718						le64_to_cpu(resp_bytes[i]);
   1719		}
   1720	}
   1721	hwrm_req_drop(bp, req);
   1722exit:
   1723	if (rc)
   1724		netdev_info(bp->dev, "error rc=%d\n", rc);
   1725
   1726	return rc;
   1727}
   1728
   1729/* Add val to accum while handling a possible wraparound
   1730 * of val. Eventhough val is of type u64, its actual width
   1731 * is denoted by mask and will wrap-around beyond that width.
   1732 */
   1733static void accumulate_val(u64 *accum, u64 val, u64 mask)
   1734{
   1735#define low_bits(x, mask)		((x) & (mask))
   1736#define high_bits(x, mask)		((x) & ~(mask))
   1737	bool wrapped = val < low_bits(*accum, mask);
   1738
   1739	*accum = high_bits(*accum, mask) + val;
   1740	if (wrapped)
   1741		*accum += (mask + 1);
   1742}
   1743
   1744/* The HW counters' width is much less than 64bits.
   1745 * Handle possible wrap-around while updating the stat counters
   1746 */
   1747static void bnxt_flow_stats_accum(struct bnxt_tc_info *tc_info,
   1748				  struct bnxt_tc_flow_stats *acc_stats,
   1749				  struct bnxt_tc_flow_stats *hw_stats)
   1750{
   1751	accumulate_val(&acc_stats->bytes, hw_stats->bytes, tc_info->bytes_mask);
   1752	accumulate_val(&acc_stats->packets, hw_stats->packets,
   1753		       tc_info->packets_mask);
   1754}
   1755
   1756static int
   1757bnxt_tc_flow_stats_batch_update(struct bnxt *bp, int num_flows,
   1758				struct bnxt_tc_stats_batch stats_batch[])
   1759{
   1760	struct bnxt_tc_info *tc_info = bp->tc_info;
   1761	int rc, i;
   1762
   1763	rc = bnxt_hwrm_cfa_flow_stats_get(bp, num_flows, stats_batch);
   1764	if (rc)
   1765		return rc;
   1766
   1767	for (i = 0; i < num_flows; i++) {
   1768		struct bnxt_tc_flow_node *flow_node = stats_batch[i].flow_node;
   1769		struct bnxt_tc_flow *flow = &flow_node->flow;
   1770
   1771		spin_lock(&flow->stats_lock);
   1772		bnxt_flow_stats_accum(tc_info, &flow->stats,
   1773				      &stats_batch[i].hw_stats);
   1774		if (flow->stats.packets != flow->prev_stats.packets)
   1775			flow->lastused = jiffies;
   1776		spin_unlock(&flow->stats_lock);
   1777	}
   1778
   1779	return 0;
   1780}
   1781
   1782static int
   1783bnxt_tc_flow_stats_batch_prep(struct bnxt *bp,
   1784			      struct bnxt_tc_stats_batch stats_batch[],
   1785			      int *num_flows)
   1786{
   1787	struct bnxt_tc_info *tc_info = bp->tc_info;
   1788	struct rhashtable_iter *iter = &tc_info->iter;
   1789	void *flow_node;
   1790	int rc, i;
   1791
   1792	rhashtable_walk_start(iter);
   1793
   1794	rc = 0;
   1795	for (i = 0; i < BNXT_FLOW_STATS_BATCH_MAX; i++) {
   1796		flow_node = rhashtable_walk_next(iter);
   1797		if (IS_ERR(flow_node)) {
   1798			i = 0;
   1799			if (PTR_ERR(flow_node) == -EAGAIN) {
   1800				continue;
   1801			} else {
   1802				rc = PTR_ERR(flow_node);
   1803				goto done;
   1804			}
   1805		}
   1806
   1807		/* No more flows */
   1808		if (!flow_node)
   1809			goto done;
   1810
   1811		stats_batch[i].flow_node = flow_node;
   1812	}
   1813done:
   1814	rhashtable_walk_stop(iter);
   1815	*num_flows = i;
   1816	return rc;
   1817}
   1818
   1819void bnxt_tc_flow_stats_work(struct bnxt *bp)
   1820{
   1821	struct bnxt_tc_info *tc_info = bp->tc_info;
   1822	int num_flows, rc;
   1823
   1824	num_flows = atomic_read(&tc_info->flow_table.nelems);
   1825	if (!num_flows)
   1826		return;
   1827
   1828	rhashtable_walk_enter(&tc_info->flow_table, &tc_info->iter);
   1829
   1830	for (;;) {
   1831		rc = bnxt_tc_flow_stats_batch_prep(bp, tc_info->stats_batch,
   1832						   &num_flows);
   1833		if (rc) {
   1834			if (rc == -EAGAIN)
   1835				continue;
   1836			break;
   1837		}
   1838
   1839		if (!num_flows)
   1840			break;
   1841
   1842		bnxt_tc_flow_stats_batch_update(bp, num_flows,
   1843						tc_info->stats_batch);
   1844	}
   1845
   1846	rhashtable_walk_exit(&tc_info->iter);
   1847}
   1848
   1849int bnxt_tc_setup_flower(struct bnxt *bp, u16 src_fid,
   1850			 struct flow_cls_offload *cls_flower)
   1851{
   1852	switch (cls_flower->command) {
   1853	case FLOW_CLS_REPLACE:
   1854		return bnxt_tc_add_flow(bp, src_fid, cls_flower);
   1855	case FLOW_CLS_DESTROY:
   1856		return bnxt_tc_del_flow(bp, cls_flower);
   1857	case FLOW_CLS_STATS:
   1858		return bnxt_tc_get_flow_stats(bp, cls_flower);
   1859	default:
   1860		return -EOPNOTSUPP;
   1861	}
   1862}
   1863
   1864static int bnxt_tc_setup_indr_block_cb(enum tc_setup_type type,
   1865				       void *type_data, void *cb_priv)
   1866{
   1867	struct bnxt_flower_indr_block_cb_priv *priv = cb_priv;
   1868	struct flow_cls_offload *flower = type_data;
   1869	struct bnxt *bp = priv->bp;
   1870
   1871	if (!tc_cls_can_offload_and_chain0(bp->dev, type_data))
   1872		return -EOPNOTSUPP;
   1873
   1874	switch (type) {
   1875	case TC_SETUP_CLSFLOWER:
   1876		return bnxt_tc_setup_flower(bp, bp->pf.fw_fid, flower);
   1877	default:
   1878		return -EOPNOTSUPP;
   1879	}
   1880}
   1881
   1882static struct bnxt_flower_indr_block_cb_priv *
   1883bnxt_tc_indr_block_cb_lookup(struct bnxt *bp, struct net_device *netdev)
   1884{
   1885	struct bnxt_flower_indr_block_cb_priv *cb_priv;
   1886
   1887	list_for_each_entry(cb_priv, &bp->tc_indr_block_list, list)
   1888		if (cb_priv->tunnel_netdev == netdev)
   1889			return cb_priv;
   1890
   1891	return NULL;
   1892}
   1893
   1894static void bnxt_tc_setup_indr_rel(void *cb_priv)
   1895{
   1896	struct bnxt_flower_indr_block_cb_priv *priv = cb_priv;
   1897
   1898	list_del(&priv->list);
   1899	kfree(priv);
   1900}
   1901
   1902static int bnxt_tc_setup_indr_block(struct net_device *netdev, struct Qdisc *sch, struct bnxt *bp,
   1903				    struct flow_block_offload *f, void *data,
   1904				    void (*cleanup)(struct flow_block_cb *block_cb))
   1905{
   1906	struct bnxt_flower_indr_block_cb_priv *cb_priv;
   1907	struct flow_block_cb *block_cb;
   1908
   1909	if (f->binder_type != FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS)
   1910		return -EOPNOTSUPP;
   1911
   1912	switch (f->command) {
   1913	case FLOW_BLOCK_BIND:
   1914		cb_priv = kmalloc(sizeof(*cb_priv), GFP_KERNEL);
   1915		if (!cb_priv)
   1916			return -ENOMEM;
   1917
   1918		cb_priv->tunnel_netdev = netdev;
   1919		cb_priv->bp = bp;
   1920		list_add(&cb_priv->list, &bp->tc_indr_block_list);
   1921
   1922		block_cb = flow_indr_block_cb_alloc(bnxt_tc_setup_indr_block_cb,
   1923						    cb_priv, cb_priv,
   1924						    bnxt_tc_setup_indr_rel, f,
   1925						    netdev, sch, data, bp, cleanup);
   1926		if (IS_ERR(block_cb)) {
   1927			list_del(&cb_priv->list);
   1928			kfree(cb_priv);
   1929			return PTR_ERR(block_cb);
   1930		}
   1931
   1932		flow_block_cb_add(block_cb, f);
   1933		list_add_tail(&block_cb->driver_list, &bnxt_block_cb_list);
   1934		break;
   1935	case FLOW_BLOCK_UNBIND:
   1936		cb_priv = bnxt_tc_indr_block_cb_lookup(bp, netdev);
   1937		if (!cb_priv)
   1938			return -ENOENT;
   1939
   1940		block_cb = flow_block_cb_lookup(f->block,
   1941						bnxt_tc_setup_indr_block_cb,
   1942						cb_priv);
   1943		if (!block_cb)
   1944			return -ENOENT;
   1945
   1946		flow_indr_block_cb_remove(block_cb, f);
   1947		list_del(&block_cb->driver_list);
   1948		break;
   1949	default:
   1950		return -EOPNOTSUPP;
   1951	}
   1952	return 0;
   1953}
   1954
   1955static bool bnxt_is_netdev_indr_offload(struct net_device *netdev)
   1956{
   1957	return netif_is_vxlan(netdev);
   1958}
   1959
   1960static int bnxt_tc_setup_indr_cb(struct net_device *netdev, struct Qdisc *sch, void *cb_priv,
   1961				 enum tc_setup_type type, void *type_data,
   1962				 void *data,
   1963				 void (*cleanup)(struct flow_block_cb *block_cb))
   1964{
   1965	if (!netdev || !bnxt_is_netdev_indr_offload(netdev))
   1966		return -EOPNOTSUPP;
   1967
   1968	switch (type) {
   1969	case TC_SETUP_BLOCK:
   1970		return bnxt_tc_setup_indr_block(netdev, sch, cb_priv, type_data, data, cleanup);
   1971	default:
   1972		break;
   1973	}
   1974
   1975	return -EOPNOTSUPP;
   1976}
   1977
   1978static const struct rhashtable_params bnxt_tc_flow_ht_params = {
   1979	.head_offset = offsetof(struct bnxt_tc_flow_node, node),
   1980	.key_offset = offsetof(struct bnxt_tc_flow_node, cookie),
   1981	.key_len = sizeof(((struct bnxt_tc_flow_node *)0)->cookie),
   1982	.automatic_shrinking = true
   1983};
   1984
   1985static const struct rhashtable_params bnxt_tc_l2_ht_params = {
   1986	.head_offset = offsetof(struct bnxt_tc_l2_node, node),
   1987	.key_offset = offsetof(struct bnxt_tc_l2_node, key),
   1988	.key_len = BNXT_TC_L2_KEY_LEN,
   1989	.automatic_shrinking = true
   1990};
   1991
   1992static const struct rhashtable_params bnxt_tc_decap_l2_ht_params = {
   1993	.head_offset = offsetof(struct bnxt_tc_l2_node, node),
   1994	.key_offset = offsetof(struct bnxt_tc_l2_node, key),
   1995	.key_len = BNXT_TC_L2_KEY_LEN,
   1996	.automatic_shrinking = true
   1997};
   1998
   1999static const struct rhashtable_params bnxt_tc_tunnel_ht_params = {
   2000	.head_offset = offsetof(struct bnxt_tc_tunnel_node, node),
   2001	.key_offset = offsetof(struct bnxt_tc_tunnel_node, key),
   2002	.key_len = sizeof(struct ip_tunnel_key),
   2003	.automatic_shrinking = true
   2004};
   2005
   2006/* convert counter width in bits to a mask */
   2007#define mask(width)		((u64)~0 >> (64 - (width)))
   2008
   2009int bnxt_init_tc(struct bnxt *bp)
   2010{
   2011	struct bnxt_tc_info *tc_info;
   2012	int rc;
   2013
   2014	if (bp->hwrm_spec_code < 0x10803)
   2015		return 0;
   2016
   2017	tc_info = kzalloc(sizeof(*tc_info), GFP_KERNEL);
   2018	if (!tc_info)
   2019		return -ENOMEM;
   2020	mutex_init(&tc_info->lock);
   2021
   2022	/* Counter widths are programmed by FW */
   2023	tc_info->bytes_mask = mask(36);
   2024	tc_info->packets_mask = mask(28);
   2025
   2026	tc_info->flow_ht_params = bnxt_tc_flow_ht_params;
   2027	rc = rhashtable_init(&tc_info->flow_table, &tc_info->flow_ht_params);
   2028	if (rc)
   2029		goto free_tc_info;
   2030
   2031	tc_info->l2_ht_params = bnxt_tc_l2_ht_params;
   2032	rc = rhashtable_init(&tc_info->l2_table, &tc_info->l2_ht_params);
   2033	if (rc)
   2034		goto destroy_flow_table;
   2035
   2036	tc_info->decap_l2_ht_params = bnxt_tc_decap_l2_ht_params;
   2037	rc = rhashtable_init(&tc_info->decap_l2_table,
   2038			     &tc_info->decap_l2_ht_params);
   2039	if (rc)
   2040		goto destroy_l2_table;
   2041
   2042	tc_info->decap_ht_params = bnxt_tc_tunnel_ht_params;
   2043	rc = rhashtable_init(&tc_info->decap_table,
   2044			     &tc_info->decap_ht_params);
   2045	if (rc)
   2046		goto destroy_decap_l2_table;
   2047
   2048	tc_info->encap_ht_params = bnxt_tc_tunnel_ht_params;
   2049	rc = rhashtable_init(&tc_info->encap_table,
   2050			     &tc_info->encap_ht_params);
   2051	if (rc)
   2052		goto destroy_decap_table;
   2053
   2054	tc_info->enabled = true;
   2055	bp->dev->hw_features |= NETIF_F_HW_TC;
   2056	bp->dev->features |= NETIF_F_HW_TC;
   2057	bp->tc_info = tc_info;
   2058
   2059	/* init indirect block notifications */
   2060	INIT_LIST_HEAD(&bp->tc_indr_block_list);
   2061
   2062	rc = flow_indr_dev_register(bnxt_tc_setup_indr_cb, bp);
   2063	if (!rc)
   2064		return 0;
   2065
   2066	rhashtable_destroy(&tc_info->encap_table);
   2067
   2068destroy_decap_table:
   2069	rhashtable_destroy(&tc_info->decap_table);
   2070destroy_decap_l2_table:
   2071	rhashtable_destroy(&tc_info->decap_l2_table);
   2072destroy_l2_table:
   2073	rhashtable_destroy(&tc_info->l2_table);
   2074destroy_flow_table:
   2075	rhashtable_destroy(&tc_info->flow_table);
   2076free_tc_info:
   2077	kfree(tc_info);
   2078	return rc;
   2079}
   2080
   2081void bnxt_shutdown_tc(struct bnxt *bp)
   2082{
   2083	struct bnxt_tc_info *tc_info = bp->tc_info;
   2084
   2085	if (!bnxt_tc_flower_enabled(bp))
   2086		return;
   2087
   2088	flow_indr_dev_unregister(bnxt_tc_setup_indr_cb, bp,
   2089				 bnxt_tc_setup_indr_rel);
   2090	rhashtable_destroy(&tc_info->flow_table);
   2091	rhashtable_destroy(&tc_info->l2_table);
   2092	rhashtable_destroy(&tc_info->decap_l2_table);
   2093	rhashtable_destroy(&tc_info->decap_table);
   2094	rhashtable_destroy(&tc_info->encap_table);
   2095	kfree(tc_info);
   2096	bp->tc_info = NULL;
   2097}