cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

vxlan_vnifilter.c (24668B)


      1// SPDX-License-Identifier: GPL-2.0-only
      2/*
      3 *	Vxlan vni filter for collect metadata mode
      4 *
      5 *	Authors: Roopa Prabhu <roopa@nvidia.com>
      6 *
      7 */
      8
      9#include <linux/kernel.h>
     10#include <linux/slab.h>
     11#include <linux/etherdevice.h>
     12#include <linux/rhashtable.h>
     13#include <net/rtnetlink.h>
     14#include <net/net_namespace.h>
     15#include <net/sock.h>
     16#include <net/vxlan.h>
     17
     18#include "vxlan_private.h"
     19
     20static inline int vxlan_vni_cmp(struct rhashtable_compare_arg *arg,
     21				const void *ptr)
     22{
     23	const struct vxlan_vni_node *vnode = ptr;
     24	__be32 vni = *(__be32 *)arg->key;
     25
     26	return vnode->vni != vni;
     27}
     28
     29const struct rhashtable_params vxlan_vni_rht_params = {
     30	.head_offset = offsetof(struct vxlan_vni_node, vnode),
     31	.key_offset = offsetof(struct vxlan_vni_node, vni),
     32	.key_len = sizeof(__be32),
     33	.nelem_hint = 3,
     34	.max_size = VXLAN_N_VID,
     35	.obj_cmpfn = vxlan_vni_cmp,
     36	.automatic_shrinking = true,
     37};
     38
     39static void vxlan_vs_add_del_vninode(struct vxlan_dev *vxlan,
     40				     struct vxlan_vni_node *v,
     41				     bool del)
     42{
     43	struct vxlan_net *vn = net_generic(vxlan->net, vxlan_net_id);
     44	struct vxlan_dev_node *node;
     45	struct vxlan_sock *vs;
     46
     47	spin_lock(&vn->sock_lock);
     48	if (del) {
     49		if (!hlist_unhashed(&v->hlist4.hlist))
     50			hlist_del_init_rcu(&v->hlist4.hlist);
     51#if IS_ENABLED(CONFIG_IPV6)
     52		if (!hlist_unhashed(&v->hlist6.hlist))
     53			hlist_del_init_rcu(&v->hlist6.hlist);
     54#endif
     55		goto out;
     56	}
     57
     58#if IS_ENABLED(CONFIG_IPV6)
     59	vs = rtnl_dereference(vxlan->vn6_sock);
     60	if (vs && v) {
     61		node = &v->hlist6;
     62		hlist_add_head_rcu(&node->hlist, vni_head(vs, v->vni));
     63	}
     64#endif
     65	vs = rtnl_dereference(vxlan->vn4_sock);
     66	if (vs && v) {
     67		node = &v->hlist4;
     68		hlist_add_head_rcu(&node->hlist, vni_head(vs, v->vni));
     69	}
     70out:
     71	spin_unlock(&vn->sock_lock);
     72}
     73
     74void vxlan_vs_add_vnigrp(struct vxlan_dev *vxlan,
     75			 struct vxlan_sock *vs,
     76			 bool ipv6)
     77{
     78	struct vxlan_net *vn = net_generic(vxlan->net, vxlan_net_id);
     79	struct vxlan_vni_group *vg = rtnl_dereference(vxlan->vnigrp);
     80	struct vxlan_vni_node *v, *tmp;
     81	struct vxlan_dev_node *node;
     82
     83	if (!vg)
     84		return;
     85
     86	spin_lock(&vn->sock_lock);
     87	list_for_each_entry_safe(v, tmp, &vg->vni_list, vlist) {
     88#if IS_ENABLED(CONFIG_IPV6)
     89		if (ipv6)
     90			node = &v->hlist6;
     91		else
     92#endif
     93			node = &v->hlist4;
     94		node->vxlan = vxlan;
     95		hlist_add_head_rcu(&node->hlist, vni_head(vs, v->vni));
     96	}
     97	spin_unlock(&vn->sock_lock);
     98}
     99
    100void vxlan_vs_del_vnigrp(struct vxlan_dev *vxlan)
    101{
    102	struct vxlan_vni_group *vg = rtnl_dereference(vxlan->vnigrp);
    103	struct vxlan_net *vn = net_generic(vxlan->net, vxlan_net_id);
    104	struct vxlan_vni_node *v, *tmp;
    105
    106	if (!vg)
    107		return;
    108
    109	spin_lock(&vn->sock_lock);
    110	list_for_each_entry_safe(v, tmp, &vg->vni_list, vlist) {
    111		hlist_del_init_rcu(&v->hlist4.hlist);
    112#if IS_ENABLED(CONFIG_IPV6)
    113		hlist_del_init_rcu(&v->hlist6.hlist);
    114#endif
    115	}
    116	spin_unlock(&vn->sock_lock);
    117}
    118
    119static void vxlan_vnifilter_stats_get(const struct vxlan_vni_node *vninode,
    120				      struct vxlan_vni_stats *dest)
    121{
    122	int i;
    123
    124	memset(dest, 0, sizeof(*dest));
    125	for_each_possible_cpu(i) {
    126		struct vxlan_vni_stats_pcpu *pstats;
    127		struct vxlan_vni_stats temp;
    128		unsigned int start;
    129
    130		pstats = per_cpu_ptr(vninode->stats, i);
    131		do {
    132			start = u64_stats_fetch_begin_irq(&pstats->syncp);
    133			memcpy(&temp, &pstats->stats, sizeof(temp));
    134		} while (u64_stats_fetch_retry_irq(&pstats->syncp, start));
    135
    136		dest->rx_packets += temp.rx_packets;
    137		dest->rx_bytes += temp.rx_bytes;
    138		dest->rx_drops += temp.rx_drops;
    139		dest->rx_errors += temp.rx_errors;
    140		dest->tx_packets += temp.tx_packets;
    141		dest->tx_bytes += temp.tx_bytes;
    142		dest->tx_drops += temp.tx_drops;
    143		dest->tx_errors += temp.tx_errors;
    144	}
    145}
    146
    147static void vxlan_vnifilter_stats_add(struct vxlan_vni_node *vninode,
    148				      int type, unsigned int len)
    149{
    150	struct vxlan_vni_stats_pcpu *pstats = this_cpu_ptr(vninode->stats);
    151
    152	u64_stats_update_begin(&pstats->syncp);
    153	switch (type) {
    154	case VXLAN_VNI_STATS_RX:
    155		pstats->stats.rx_bytes += len;
    156		pstats->stats.rx_packets++;
    157		break;
    158	case VXLAN_VNI_STATS_RX_DROPS:
    159		pstats->stats.rx_drops++;
    160		break;
    161	case VXLAN_VNI_STATS_RX_ERRORS:
    162		pstats->stats.rx_errors++;
    163		break;
    164	case VXLAN_VNI_STATS_TX:
    165		pstats->stats.tx_bytes += len;
    166		pstats->stats.tx_packets++;
    167		break;
    168	case VXLAN_VNI_STATS_TX_DROPS:
    169		pstats->stats.tx_drops++;
    170		break;
    171	case VXLAN_VNI_STATS_TX_ERRORS:
    172		pstats->stats.tx_errors++;
    173		break;
    174	}
    175	u64_stats_update_end(&pstats->syncp);
    176}
    177
    178void vxlan_vnifilter_count(struct vxlan_dev *vxlan, __be32 vni,
    179			   struct vxlan_vni_node *vninode,
    180			   int type, unsigned int len)
    181{
    182	struct vxlan_vni_node *vnode;
    183
    184	if (!(vxlan->cfg.flags & VXLAN_F_VNIFILTER))
    185		return;
    186
    187	if (vninode) {
    188		vnode = vninode;
    189	} else {
    190		vnode = vxlan_vnifilter_lookup(vxlan, vni);
    191		if (!vnode)
    192			return;
    193	}
    194
    195	vxlan_vnifilter_stats_add(vnode, type, len);
    196}
    197
    198static u32 vnirange(struct vxlan_vni_node *vbegin,
    199		    struct vxlan_vni_node *vend)
    200{
    201	return (be32_to_cpu(vend->vni) - be32_to_cpu(vbegin->vni));
    202}
    203
    204static size_t vxlan_vnifilter_entry_nlmsg_size(void)
    205{
    206	return NLMSG_ALIGN(sizeof(struct tunnel_msg))
    207		+ nla_total_size(0) /* VXLAN_VNIFILTER_ENTRY */
    208		+ nla_total_size(sizeof(u32)) /* VXLAN_VNIFILTER_ENTRY_START */
    209		+ nla_total_size(sizeof(u32)) /* VXLAN_VNIFILTER_ENTRY_END */
    210		+ nla_total_size(sizeof(struct in6_addr));/* VXLAN_VNIFILTER_ENTRY_GROUP{6} */
    211}
    212
    213static int __vnifilter_entry_fill_stats(struct sk_buff *skb,
    214					const struct vxlan_vni_node *vbegin)
    215{
    216	struct vxlan_vni_stats vstats;
    217	struct nlattr *vstats_attr;
    218
    219	vstats_attr = nla_nest_start(skb, VXLAN_VNIFILTER_ENTRY_STATS);
    220	if (!vstats_attr)
    221		goto out_stats_err;
    222
    223	vxlan_vnifilter_stats_get(vbegin, &vstats);
    224	if (nla_put_u64_64bit(skb, VNIFILTER_ENTRY_STATS_RX_BYTES,
    225			      vstats.rx_bytes, VNIFILTER_ENTRY_STATS_PAD) ||
    226	    nla_put_u64_64bit(skb, VNIFILTER_ENTRY_STATS_RX_PKTS,
    227			      vstats.rx_packets, VNIFILTER_ENTRY_STATS_PAD) ||
    228	    nla_put_u64_64bit(skb, VNIFILTER_ENTRY_STATS_RX_DROPS,
    229			      vstats.rx_drops, VNIFILTER_ENTRY_STATS_PAD) ||
    230	    nla_put_u64_64bit(skb, VNIFILTER_ENTRY_STATS_RX_ERRORS,
    231			      vstats.rx_errors, VNIFILTER_ENTRY_STATS_PAD) ||
    232	    nla_put_u64_64bit(skb, VNIFILTER_ENTRY_STATS_TX_BYTES,
    233			      vstats.tx_bytes, VNIFILTER_ENTRY_STATS_PAD) ||
    234	    nla_put_u64_64bit(skb, VNIFILTER_ENTRY_STATS_TX_PKTS,
    235			      vstats.tx_packets, VNIFILTER_ENTRY_STATS_PAD) ||
    236	    nla_put_u64_64bit(skb, VNIFILTER_ENTRY_STATS_TX_DROPS,
    237			      vstats.tx_drops, VNIFILTER_ENTRY_STATS_PAD) ||
    238	    nla_put_u64_64bit(skb, VNIFILTER_ENTRY_STATS_TX_ERRORS,
    239			      vstats.tx_errors, VNIFILTER_ENTRY_STATS_PAD))
    240		goto out_stats_err;
    241
    242	nla_nest_end(skb, vstats_attr);
    243
    244	return 0;
    245
    246out_stats_err:
    247	nla_nest_cancel(skb, vstats_attr);
    248	return -EMSGSIZE;
    249}
    250
    251static bool vxlan_fill_vni_filter_entry(struct sk_buff *skb,
    252					struct vxlan_vni_node *vbegin,
    253					struct vxlan_vni_node *vend,
    254					bool fill_stats)
    255{
    256	struct nlattr *ventry;
    257	u32 vs = be32_to_cpu(vbegin->vni);
    258	u32 ve = 0;
    259
    260	if (vbegin != vend)
    261		ve = be32_to_cpu(vend->vni);
    262
    263	ventry = nla_nest_start(skb, VXLAN_VNIFILTER_ENTRY);
    264	if (!ventry)
    265		return false;
    266
    267	if (nla_put_u32(skb, VXLAN_VNIFILTER_ENTRY_START, vs))
    268		goto out_err;
    269
    270	if (ve && nla_put_u32(skb, VXLAN_VNIFILTER_ENTRY_END, ve))
    271		goto out_err;
    272
    273	if (!vxlan_addr_any(&vbegin->remote_ip)) {
    274		if (vbegin->remote_ip.sa.sa_family == AF_INET) {
    275			if (nla_put_in_addr(skb, VXLAN_VNIFILTER_ENTRY_GROUP,
    276					    vbegin->remote_ip.sin.sin_addr.s_addr))
    277				goto out_err;
    278#if IS_ENABLED(CONFIG_IPV6)
    279		} else {
    280			if (nla_put_in6_addr(skb, VXLAN_VNIFILTER_ENTRY_GROUP6,
    281					     &vbegin->remote_ip.sin6.sin6_addr))
    282				goto out_err;
    283#endif
    284		}
    285	}
    286
    287	if (fill_stats && __vnifilter_entry_fill_stats(skb, vbegin))
    288		goto out_err;
    289
    290	nla_nest_end(skb, ventry);
    291
    292	return true;
    293
    294out_err:
    295	nla_nest_cancel(skb, ventry);
    296
    297	return false;
    298}
    299
    300static void vxlan_vnifilter_notify(const struct vxlan_dev *vxlan,
    301				   struct vxlan_vni_node *vninode, int cmd)
    302{
    303	struct tunnel_msg *tmsg;
    304	struct sk_buff *skb;
    305	struct nlmsghdr *nlh;
    306	struct net *net = dev_net(vxlan->dev);
    307	int err = -ENOBUFS;
    308
    309	skb = nlmsg_new(vxlan_vnifilter_entry_nlmsg_size(), GFP_KERNEL);
    310	if (!skb)
    311		goto out_err;
    312
    313	err = -EMSGSIZE;
    314	nlh = nlmsg_put(skb, 0, 0, cmd, sizeof(*tmsg), 0);
    315	if (!nlh)
    316		goto out_err;
    317	tmsg = nlmsg_data(nlh);
    318	memset(tmsg, 0, sizeof(*tmsg));
    319	tmsg->family = AF_BRIDGE;
    320	tmsg->ifindex = vxlan->dev->ifindex;
    321
    322	if (!vxlan_fill_vni_filter_entry(skb, vninode, vninode, false))
    323		goto out_err;
    324
    325	nlmsg_end(skb, nlh);
    326	rtnl_notify(skb, net, 0, RTNLGRP_TUNNEL, NULL, GFP_KERNEL);
    327
    328	return;
    329
    330out_err:
    331	rtnl_set_sk_err(net, RTNLGRP_TUNNEL, err);
    332
    333	kfree_skb(skb);
    334}
    335
    336static int vxlan_vnifilter_dump_dev(const struct net_device *dev,
    337				    struct sk_buff *skb,
    338				    struct netlink_callback *cb)
    339{
    340	struct vxlan_vni_node *tmp, *v, *vbegin = NULL, *vend = NULL;
    341	struct vxlan_dev *vxlan = netdev_priv(dev);
    342	struct tunnel_msg *new_tmsg, *tmsg;
    343	int idx = 0, s_idx = cb->args[1];
    344	struct vxlan_vni_group *vg;
    345	struct nlmsghdr *nlh;
    346	bool dump_stats;
    347	int err = 0;
    348
    349	if (!(vxlan->cfg.flags & VXLAN_F_VNIFILTER))
    350		return -EINVAL;
    351
    352	/* RCU needed because of the vni locking rules (rcu || rtnl) */
    353	vg = rcu_dereference(vxlan->vnigrp);
    354	if (!vg || !vg->num_vnis)
    355		return 0;
    356
    357	tmsg = nlmsg_data(cb->nlh);
    358	dump_stats = !!(tmsg->flags & TUNNEL_MSG_FLAG_STATS);
    359
    360	nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
    361			RTM_NEWTUNNEL, sizeof(*new_tmsg), NLM_F_MULTI);
    362	if (!nlh)
    363		return -EMSGSIZE;
    364	new_tmsg = nlmsg_data(nlh);
    365	memset(new_tmsg, 0, sizeof(*new_tmsg));
    366	new_tmsg->family = PF_BRIDGE;
    367	new_tmsg->ifindex = dev->ifindex;
    368
    369	list_for_each_entry_safe(v, tmp, &vg->vni_list, vlist) {
    370		if (idx < s_idx) {
    371			idx++;
    372			continue;
    373		}
    374		if (!vbegin) {
    375			vbegin = v;
    376			vend = v;
    377			continue;
    378		}
    379		if (!dump_stats && vnirange(vend, v) == 1 &&
    380		    vxlan_addr_equal(&v->remote_ip, &vend->remote_ip)) {
    381			goto update_end;
    382		} else {
    383			if (!vxlan_fill_vni_filter_entry(skb, vbegin, vend,
    384							 dump_stats)) {
    385				err = -EMSGSIZE;
    386				break;
    387			}
    388			idx += vnirange(vbegin, vend) + 1;
    389			vbegin = v;
    390		}
    391update_end:
    392		vend = v;
    393	}
    394
    395	if (!err && vbegin) {
    396		if (!vxlan_fill_vni_filter_entry(skb, vbegin, vend, dump_stats))
    397			err = -EMSGSIZE;
    398	}
    399
    400	cb->args[1] = err ? idx : 0;
    401
    402	nlmsg_end(skb, nlh);
    403
    404	return err;
    405}
    406
    407static int vxlan_vnifilter_dump(struct sk_buff *skb, struct netlink_callback *cb)
    408{
    409	int idx = 0, err = 0, s_idx = cb->args[0];
    410	struct net *net = sock_net(skb->sk);
    411	struct tunnel_msg *tmsg;
    412	struct net_device *dev;
    413
    414	tmsg = nlmsg_data(cb->nlh);
    415
    416	if (tmsg->flags & ~TUNNEL_MSG_VALID_USER_FLAGS) {
    417		NL_SET_ERR_MSG(cb->extack, "Invalid tunnelmsg flags in ancillary header");
    418		return -EINVAL;
    419	}
    420
    421	rcu_read_lock();
    422	if (tmsg->ifindex) {
    423		dev = dev_get_by_index_rcu(net, tmsg->ifindex);
    424		if (!dev) {
    425			err = -ENODEV;
    426			goto out_err;
    427		}
    428		if (!netif_is_vxlan(dev)) {
    429			NL_SET_ERR_MSG(cb->extack,
    430				       "The device is not a vxlan device");
    431			err = -EINVAL;
    432			goto out_err;
    433		}
    434		err = vxlan_vnifilter_dump_dev(dev, skb, cb);
    435		/* if the dump completed without an error we return 0 here */
    436		if (err != -EMSGSIZE)
    437			goto out_err;
    438	} else {
    439		for_each_netdev_rcu(net, dev) {
    440			if (!netif_is_vxlan(dev))
    441				continue;
    442			if (idx < s_idx)
    443				goto skip;
    444			err = vxlan_vnifilter_dump_dev(dev, skb, cb);
    445			if (err == -EMSGSIZE)
    446				break;
    447skip:
    448			idx++;
    449		}
    450	}
    451	cb->args[0] = idx;
    452	rcu_read_unlock();
    453
    454	return skb->len;
    455
    456out_err:
    457	rcu_read_unlock();
    458
    459	return err;
    460}
    461
    462static const struct nla_policy vni_filter_entry_policy[VXLAN_VNIFILTER_ENTRY_MAX + 1] = {
    463	[VXLAN_VNIFILTER_ENTRY_START] = { .type = NLA_U32 },
    464	[VXLAN_VNIFILTER_ENTRY_END] = { .type = NLA_U32 },
    465	[VXLAN_VNIFILTER_ENTRY_GROUP]	= { .type = NLA_BINARY,
    466					    .len = sizeof_field(struct iphdr, daddr) },
    467	[VXLAN_VNIFILTER_ENTRY_GROUP6]	= { .type = NLA_BINARY,
    468					    .len = sizeof(struct in6_addr) },
    469};
    470
    471static const struct nla_policy vni_filter_policy[VXLAN_VNIFILTER_MAX + 1] = {
    472	[VXLAN_VNIFILTER_ENTRY] = { .type = NLA_NESTED },
    473};
    474
    475static int vxlan_update_default_fdb_entry(struct vxlan_dev *vxlan, __be32 vni,
    476					  union vxlan_addr *old_remote_ip,
    477					  union vxlan_addr *remote_ip,
    478					  struct netlink_ext_ack *extack)
    479{
    480	struct vxlan_rdst *dst = &vxlan->default_dst;
    481	u32 hash_index;
    482	int err = 0;
    483
    484	hash_index = fdb_head_index(vxlan, all_zeros_mac, vni);
    485	spin_lock_bh(&vxlan->hash_lock[hash_index]);
    486	if (remote_ip && !vxlan_addr_any(remote_ip)) {
    487		err = vxlan_fdb_update(vxlan, all_zeros_mac,
    488				       remote_ip,
    489				       NUD_REACHABLE | NUD_PERMANENT,
    490				       NLM_F_APPEND | NLM_F_CREATE,
    491				       vxlan->cfg.dst_port,
    492				       vni,
    493				       vni,
    494				       dst->remote_ifindex,
    495				       NTF_SELF, 0, true, extack);
    496		if (err) {
    497			spin_unlock_bh(&vxlan->hash_lock[hash_index]);
    498			return err;
    499		}
    500	}
    501
    502	if (old_remote_ip && !vxlan_addr_any(old_remote_ip)) {
    503		__vxlan_fdb_delete(vxlan, all_zeros_mac,
    504				   *old_remote_ip,
    505				   vxlan->cfg.dst_port,
    506				   vni, vni,
    507				   dst->remote_ifindex,
    508				   true);
    509	}
    510	spin_unlock_bh(&vxlan->hash_lock[hash_index]);
    511
    512	return err;
    513}
    514
    515static int vxlan_vni_update_group(struct vxlan_dev *vxlan,
    516				  struct vxlan_vni_node *vninode,
    517				  union vxlan_addr *group,
    518				  bool create, bool *changed,
    519				  struct netlink_ext_ack *extack)
    520{
    521	struct vxlan_net *vn = net_generic(vxlan->net, vxlan_net_id);
    522	struct vxlan_rdst *dst = &vxlan->default_dst;
    523	union vxlan_addr *newrip = NULL, *oldrip = NULL;
    524	union vxlan_addr old_remote_ip;
    525	int ret = 0;
    526
    527	memcpy(&old_remote_ip, &vninode->remote_ip, sizeof(old_remote_ip));
    528
    529	/* if per vni remote ip is not present use vxlan dev
    530	 * default dst remote ip for fdb entry
    531	 */
    532	if (group && !vxlan_addr_any(group)) {
    533		newrip = group;
    534	} else {
    535		if (!vxlan_addr_any(&dst->remote_ip))
    536			newrip = &dst->remote_ip;
    537	}
    538
    539	/* if old rip exists, and no newrip,
    540	 * explicitly delete old rip
    541	 */
    542	if (!newrip && !vxlan_addr_any(&old_remote_ip))
    543		oldrip = &old_remote_ip;
    544
    545	if (!newrip && !oldrip)
    546		return 0;
    547
    548	if (!create && oldrip && newrip && vxlan_addr_equal(oldrip, newrip))
    549		return 0;
    550
    551	ret = vxlan_update_default_fdb_entry(vxlan, vninode->vni,
    552					     oldrip, newrip,
    553					     extack);
    554	if (ret)
    555		goto out;
    556
    557	if (group)
    558		memcpy(&vninode->remote_ip, group, sizeof(vninode->remote_ip));
    559
    560	if (vxlan->dev->flags & IFF_UP) {
    561		if (vxlan_addr_multicast(&old_remote_ip) &&
    562		    !vxlan_group_used(vn, vxlan, vninode->vni,
    563				      &old_remote_ip,
    564				      vxlan->default_dst.remote_ifindex)) {
    565			ret = vxlan_igmp_leave(vxlan, &old_remote_ip,
    566					       0);
    567			if (ret)
    568				goto out;
    569		}
    570
    571		if (vxlan_addr_multicast(&vninode->remote_ip)) {
    572			ret = vxlan_igmp_join(vxlan, &vninode->remote_ip, 0);
    573			if (ret == -EADDRINUSE)
    574				ret = 0;
    575			if (ret)
    576				goto out;
    577		}
    578	}
    579
    580	*changed = true;
    581
    582	return 0;
    583out:
    584	return ret;
    585}
    586
    587int vxlan_vnilist_update_group(struct vxlan_dev *vxlan,
    588			       union vxlan_addr *old_remote_ip,
    589			       union vxlan_addr *new_remote_ip,
    590			       struct netlink_ext_ack *extack)
    591{
    592	struct list_head *headp, *hpos;
    593	struct vxlan_vni_group *vg;
    594	struct vxlan_vni_node *vent;
    595	int ret;
    596
    597	vg = rtnl_dereference(vxlan->vnigrp);
    598
    599	headp = &vg->vni_list;
    600	list_for_each_prev(hpos, headp) {
    601		vent = list_entry(hpos, struct vxlan_vni_node, vlist);
    602		if (vxlan_addr_any(&vent->remote_ip)) {
    603			ret = vxlan_update_default_fdb_entry(vxlan, vent->vni,
    604							     old_remote_ip,
    605							     new_remote_ip,
    606							     extack);
    607			if (ret)
    608				return ret;
    609		}
    610	}
    611
    612	return 0;
    613}
    614
    615static void vxlan_vni_delete_group(struct vxlan_dev *vxlan,
    616				   struct vxlan_vni_node *vninode)
    617{
    618	struct vxlan_net *vn = net_generic(vxlan->net, vxlan_net_id);
    619	struct vxlan_rdst *dst = &vxlan->default_dst;
    620
    621	/* if per vni remote_ip not present, delete the
    622	 * default dst remote_ip previously added for this vni
    623	 */
    624	if (!vxlan_addr_any(&vninode->remote_ip) ||
    625	    !vxlan_addr_any(&dst->remote_ip))
    626		__vxlan_fdb_delete(vxlan, all_zeros_mac,
    627				   (vxlan_addr_any(&vninode->remote_ip) ?
    628				   dst->remote_ip : vninode->remote_ip),
    629				   vxlan->cfg.dst_port,
    630				   vninode->vni, vninode->vni,
    631				   dst->remote_ifindex,
    632				   true);
    633
    634	if (vxlan->dev->flags & IFF_UP) {
    635		if (vxlan_addr_multicast(&vninode->remote_ip) &&
    636		    !vxlan_group_used(vn, vxlan, vninode->vni,
    637				      &vninode->remote_ip,
    638				      dst->remote_ifindex)) {
    639			vxlan_igmp_leave(vxlan, &vninode->remote_ip, 0);
    640		}
    641	}
    642}
    643
    644static int vxlan_vni_update(struct vxlan_dev *vxlan,
    645			    struct vxlan_vni_group *vg,
    646			    __be32 vni, union vxlan_addr *group,
    647			    bool *changed,
    648			    struct netlink_ext_ack *extack)
    649{
    650	struct vxlan_vni_node *vninode;
    651	int ret;
    652
    653	vninode = rhashtable_lookup_fast(&vg->vni_hash, &vni,
    654					 vxlan_vni_rht_params);
    655	if (!vninode)
    656		return 0;
    657
    658	ret = vxlan_vni_update_group(vxlan, vninode, group, false, changed,
    659				     extack);
    660	if (ret)
    661		return ret;
    662
    663	if (changed)
    664		vxlan_vnifilter_notify(vxlan, vninode, RTM_NEWTUNNEL);
    665
    666	return 0;
    667}
    668
    669static void __vxlan_vni_add_list(struct vxlan_vni_group *vg,
    670				 struct vxlan_vni_node *v)
    671{
    672	struct list_head *headp, *hpos;
    673	struct vxlan_vni_node *vent;
    674
    675	headp = &vg->vni_list;
    676	list_for_each_prev(hpos, headp) {
    677		vent = list_entry(hpos, struct vxlan_vni_node, vlist);
    678		if (be32_to_cpu(v->vni) < be32_to_cpu(vent->vni))
    679			continue;
    680		else
    681			break;
    682	}
    683	list_add_rcu(&v->vlist, hpos);
    684	vg->num_vnis++;
    685}
    686
    687static void __vxlan_vni_del_list(struct vxlan_vni_group *vg,
    688				 struct vxlan_vni_node *v)
    689{
    690	list_del_rcu(&v->vlist);
    691	vg->num_vnis--;
    692}
    693
    694static struct vxlan_vni_node *vxlan_vni_alloc(struct vxlan_dev *vxlan,
    695					      __be32 vni)
    696{
    697	struct vxlan_vni_node *vninode;
    698
    699	vninode = kzalloc(sizeof(*vninode), GFP_ATOMIC);
    700	if (!vninode)
    701		return NULL;
    702	vninode->stats = netdev_alloc_pcpu_stats(struct vxlan_vni_stats_pcpu);
    703	if (!vninode->stats) {
    704		kfree(vninode);
    705		return NULL;
    706	}
    707	vninode->vni = vni;
    708	vninode->hlist4.vxlan = vxlan;
    709#if IS_ENABLED(CONFIG_IPV6)
    710	vninode->hlist6.vxlan = vxlan;
    711#endif
    712
    713	return vninode;
    714}
    715
    716static int vxlan_vni_add(struct vxlan_dev *vxlan,
    717			 struct vxlan_vni_group *vg,
    718			 u32 vni, union vxlan_addr *group,
    719			 struct netlink_ext_ack *extack)
    720{
    721	struct vxlan_vni_node *vninode;
    722	__be32 v = cpu_to_be32(vni);
    723	bool changed = false;
    724	int err = 0;
    725
    726	if (vxlan_vnifilter_lookup(vxlan, v))
    727		return vxlan_vni_update(vxlan, vg, v, group, &changed, extack);
    728
    729	err = vxlan_vni_in_use(vxlan->net, vxlan, &vxlan->cfg, v);
    730	if (err) {
    731		NL_SET_ERR_MSG(extack, "VNI in use");
    732		return err;
    733	}
    734
    735	vninode = vxlan_vni_alloc(vxlan, v);
    736	if (!vninode)
    737		return -ENOMEM;
    738
    739	err = rhashtable_lookup_insert_fast(&vg->vni_hash,
    740					    &vninode->vnode,
    741					    vxlan_vni_rht_params);
    742	if (err) {
    743		kfree(vninode);
    744		return err;
    745	}
    746
    747	__vxlan_vni_add_list(vg, vninode);
    748
    749	if (vxlan->dev->flags & IFF_UP)
    750		vxlan_vs_add_del_vninode(vxlan, vninode, false);
    751
    752	err = vxlan_vni_update_group(vxlan, vninode, group, true, &changed,
    753				     extack);
    754
    755	if (changed)
    756		vxlan_vnifilter_notify(vxlan, vninode, RTM_NEWTUNNEL);
    757
    758	return err;
    759}
    760
    761static void vxlan_vni_node_rcu_free(struct rcu_head *rcu)
    762{
    763	struct vxlan_vni_node *v;
    764
    765	v = container_of(rcu, struct vxlan_vni_node, rcu);
    766	free_percpu(v->stats);
    767	kfree(v);
    768}
    769
    770static int vxlan_vni_del(struct vxlan_dev *vxlan,
    771			 struct vxlan_vni_group *vg,
    772			 u32 vni, struct netlink_ext_ack *extack)
    773{
    774	struct vxlan_vni_node *vninode;
    775	__be32 v = cpu_to_be32(vni);
    776	int err = 0;
    777
    778	vg = rtnl_dereference(vxlan->vnigrp);
    779
    780	vninode = rhashtable_lookup_fast(&vg->vni_hash, &v,
    781					 vxlan_vni_rht_params);
    782	if (!vninode) {
    783		err = -ENOENT;
    784		goto out;
    785	}
    786
    787	vxlan_vni_delete_group(vxlan, vninode);
    788
    789	err = rhashtable_remove_fast(&vg->vni_hash,
    790				     &vninode->vnode,
    791				     vxlan_vni_rht_params);
    792	if (err)
    793		goto out;
    794
    795	__vxlan_vni_del_list(vg, vninode);
    796
    797	vxlan_vnifilter_notify(vxlan, vninode, RTM_DELTUNNEL);
    798
    799	if (vxlan->dev->flags & IFF_UP)
    800		vxlan_vs_add_del_vninode(vxlan, vninode, true);
    801
    802	call_rcu(&vninode->rcu, vxlan_vni_node_rcu_free);
    803
    804	return 0;
    805out:
    806	return err;
    807}
    808
    809static int vxlan_vni_add_del(struct vxlan_dev *vxlan, __u32 start_vni,
    810			     __u32 end_vni, union vxlan_addr *group,
    811			     int cmd, struct netlink_ext_ack *extack)
    812{
    813	struct vxlan_vni_group *vg;
    814	int v, err = 0;
    815
    816	vg = rtnl_dereference(vxlan->vnigrp);
    817
    818	for (v = start_vni; v <= end_vni; v++) {
    819		switch (cmd) {
    820		case RTM_NEWTUNNEL:
    821			err = vxlan_vni_add(vxlan, vg, v, group, extack);
    822			break;
    823		case RTM_DELTUNNEL:
    824			err = vxlan_vni_del(vxlan, vg, v, extack);
    825			break;
    826		default:
    827			err = -EOPNOTSUPP;
    828			break;
    829		}
    830		if (err)
    831			goto out;
    832	}
    833
    834	return 0;
    835out:
    836	return err;
    837}
    838
    839static int vxlan_process_vni_filter(struct vxlan_dev *vxlan,
    840				    struct nlattr *nlvnifilter,
    841				    int cmd, struct netlink_ext_ack *extack)
    842{
    843	struct nlattr *vattrs[VXLAN_VNIFILTER_ENTRY_MAX + 1];
    844	u32 vni_start = 0, vni_end = 0;
    845	union vxlan_addr group;
    846	int err;
    847
    848	err = nla_parse_nested(vattrs,
    849			       VXLAN_VNIFILTER_ENTRY_MAX,
    850			       nlvnifilter, vni_filter_entry_policy,
    851			       extack);
    852	if (err)
    853		return err;
    854
    855	if (vattrs[VXLAN_VNIFILTER_ENTRY_START]) {
    856		vni_start = nla_get_u32(vattrs[VXLAN_VNIFILTER_ENTRY_START]);
    857		vni_end = vni_start;
    858	}
    859
    860	if (vattrs[VXLAN_VNIFILTER_ENTRY_END])
    861		vni_end = nla_get_u32(vattrs[VXLAN_VNIFILTER_ENTRY_END]);
    862
    863	if (!vni_start && !vni_end) {
    864		NL_SET_ERR_MSG_ATTR(extack, nlvnifilter,
    865				    "vni start nor end found in vni entry");
    866		return -EINVAL;
    867	}
    868
    869	if (vattrs[VXLAN_VNIFILTER_ENTRY_GROUP]) {
    870		group.sin.sin_addr.s_addr =
    871			nla_get_in_addr(vattrs[VXLAN_VNIFILTER_ENTRY_GROUP]);
    872		group.sa.sa_family = AF_INET;
    873	} else if (vattrs[VXLAN_VNIFILTER_ENTRY_GROUP6]) {
    874		group.sin6.sin6_addr =
    875			nla_get_in6_addr(vattrs[VXLAN_VNIFILTER_ENTRY_GROUP6]);
    876		group.sa.sa_family = AF_INET6;
    877	} else {
    878		memset(&group, 0, sizeof(group));
    879	}
    880
    881	if (vxlan_addr_multicast(&group) && !vxlan->default_dst.remote_ifindex) {
    882		NL_SET_ERR_MSG(extack,
    883			       "Local interface required for multicast remote group");
    884
    885		return -EINVAL;
    886	}
    887
    888	err = vxlan_vni_add_del(vxlan, vni_start, vni_end, &group, cmd,
    889				extack);
    890	if (err)
    891		return err;
    892
    893	return 0;
    894}
    895
    896void vxlan_vnigroup_uninit(struct vxlan_dev *vxlan)
    897{
    898	struct vxlan_vni_node *v, *tmp;
    899	struct vxlan_vni_group *vg;
    900
    901	vg = rtnl_dereference(vxlan->vnigrp);
    902	list_for_each_entry_safe(v, tmp, &vg->vni_list, vlist) {
    903		rhashtable_remove_fast(&vg->vni_hash, &v->vnode,
    904				       vxlan_vni_rht_params);
    905		hlist_del_init_rcu(&v->hlist4.hlist);
    906#if IS_ENABLED(CONFIG_IPV6)
    907		hlist_del_init_rcu(&v->hlist6.hlist);
    908#endif
    909		__vxlan_vni_del_list(vg, v);
    910		vxlan_vnifilter_notify(vxlan, v, RTM_DELTUNNEL);
    911		call_rcu(&v->rcu, vxlan_vni_node_rcu_free);
    912	}
    913	rhashtable_destroy(&vg->vni_hash);
    914	kfree(vg);
    915}
    916
    917int vxlan_vnigroup_init(struct vxlan_dev *vxlan)
    918{
    919	struct vxlan_vni_group *vg;
    920	int ret;
    921
    922	vg = kzalloc(sizeof(*vg), GFP_KERNEL);
    923	if (!vg)
    924		return -ENOMEM;
    925	ret = rhashtable_init(&vg->vni_hash, &vxlan_vni_rht_params);
    926	if (ret) {
    927		kfree(vg);
    928		return ret;
    929	}
    930	INIT_LIST_HEAD(&vg->vni_list);
    931	rcu_assign_pointer(vxlan->vnigrp, vg);
    932
    933	return 0;
    934}
    935
    936static int vxlan_vnifilter_process(struct sk_buff *skb, struct nlmsghdr *nlh,
    937				   struct netlink_ext_ack *extack)
    938{
    939	struct net *net = sock_net(skb->sk);
    940	struct tunnel_msg *tmsg;
    941	struct vxlan_dev *vxlan;
    942	struct net_device *dev;
    943	struct nlattr *attr;
    944	int err, vnis = 0;
    945	int rem;
    946
    947	/* this should validate the header and check for remaining bytes */
    948	err = nlmsg_parse(nlh, sizeof(*tmsg), NULL, VXLAN_VNIFILTER_MAX,
    949			  vni_filter_policy, extack);
    950	if (err < 0)
    951		return err;
    952
    953	tmsg = nlmsg_data(nlh);
    954	dev = __dev_get_by_index(net, tmsg->ifindex);
    955	if (!dev)
    956		return -ENODEV;
    957
    958	if (!netif_is_vxlan(dev)) {
    959		NL_SET_ERR_MSG_MOD(extack, "The device is not a vxlan device");
    960		return -EINVAL;
    961	}
    962
    963	vxlan = netdev_priv(dev);
    964
    965	if (!(vxlan->cfg.flags & VXLAN_F_VNIFILTER))
    966		return -EOPNOTSUPP;
    967
    968	nlmsg_for_each_attr(attr, nlh, sizeof(*tmsg), rem) {
    969		switch (nla_type(attr)) {
    970		case VXLAN_VNIFILTER_ENTRY:
    971			err = vxlan_process_vni_filter(vxlan, attr,
    972						       nlh->nlmsg_type, extack);
    973			break;
    974		default:
    975			continue;
    976		}
    977		vnis++;
    978		if (err)
    979			break;
    980	}
    981
    982	if (!vnis) {
    983		NL_SET_ERR_MSG_MOD(extack, "No vnis found to process");
    984		err = -EINVAL;
    985	}
    986
    987	return err;
    988}
    989
    990void vxlan_vnifilter_init(void)
    991{
    992	rtnl_register_module(THIS_MODULE, PF_BRIDGE, RTM_GETTUNNEL, NULL,
    993			     vxlan_vnifilter_dump, 0);
    994	rtnl_register_module(THIS_MODULE, PF_BRIDGE, RTM_NEWTUNNEL,
    995			     vxlan_vnifilter_process, NULL, 0);
    996	rtnl_register_module(THIS_MODULE, PF_BRIDGE, RTM_DELTUNNEL,
    997			     vxlan_vnifilter_process, NULL, 0);
    998}
    999
   1000void vxlan_vnifilter_uninit(void)
   1001{
   1002	rtnl_unregister(PF_BRIDGE, RTM_GETTUNNEL);
   1003	rtnl_unregister(PF_BRIDGE, RTM_NEWTUNNEL);
   1004	rtnl_unregister(PF_BRIDGE, RTM_DELTUNNEL);
   1005}