vxlan_vnifilter.c (24668B)
1// SPDX-License-Identifier: GPL-2.0-only 2/* 3 * Vxlan vni filter for collect metadata mode 4 * 5 * Authors: Roopa Prabhu <roopa@nvidia.com> 6 * 7 */ 8 9#include <linux/kernel.h> 10#include <linux/slab.h> 11#include <linux/etherdevice.h> 12#include <linux/rhashtable.h> 13#include <net/rtnetlink.h> 14#include <net/net_namespace.h> 15#include <net/sock.h> 16#include <net/vxlan.h> 17 18#include "vxlan_private.h" 19 20static inline int vxlan_vni_cmp(struct rhashtable_compare_arg *arg, 21 const void *ptr) 22{ 23 const struct vxlan_vni_node *vnode = ptr; 24 __be32 vni = *(__be32 *)arg->key; 25 26 return vnode->vni != vni; 27} 28 29const struct rhashtable_params vxlan_vni_rht_params = { 30 .head_offset = offsetof(struct vxlan_vni_node, vnode), 31 .key_offset = offsetof(struct vxlan_vni_node, vni), 32 .key_len = sizeof(__be32), 33 .nelem_hint = 3, 34 .max_size = VXLAN_N_VID, 35 .obj_cmpfn = vxlan_vni_cmp, 36 .automatic_shrinking = true, 37}; 38 39static void vxlan_vs_add_del_vninode(struct vxlan_dev *vxlan, 40 struct vxlan_vni_node *v, 41 bool del) 42{ 43 struct vxlan_net *vn = net_generic(vxlan->net, vxlan_net_id); 44 struct vxlan_dev_node *node; 45 struct vxlan_sock *vs; 46 47 spin_lock(&vn->sock_lock); 48 if (del) { 49 if (!hlist_unhashed(&v->hlist4.hlist)) 50 hlist_del_init_rcu(&v->hlist4.hlist); 51#if IS_ENABLED(CONFIG_IPV6) 52 if (!hlist_unhashed(&v->hlist6.hlist)) 53 hlist_del_init_rcu(&v->hlist6.hlist); 54#endif 55 goto out; 56 } 57 58#if IS_ENABLED(CONFIG_IPV6) 59 vs = rtnl_dereference(vxlan->vn6_sock); 60 if (vs && v) { 61 node = &v->hlist6; 62 hlist_add_head_rcu(&node->hlist, vni_head(vs, v->vni)); 63 } 64#endif 65 vs = rtnl_dereference(vxlan->vn4_sock); 66 if (vs && v) { 67 node = &v->hlist4; 68 hlist_add_head_rcu(&node->hlist, vni_head(vs, v->vni)); 69 } 70out: 71 spin_unlock(&vn->sock_lock); 72} 73 74void vxlan_vs_add_vnigrp(struct vxlan_dev *vxlan, 75 struct vxlan_sock *vs, 76 bool ipv6) 77{ 78 struct vxlan_net *vn = net_generic(vxlan->net, vxlan_net_id); 79 struct vxlan_vni_group *vg = rtnl_dereference(vxlan->vnigrp); 80 struct vxlan_vni_node *v, *tmp; 81 struct vxlan_dev_node *node; 82 83 if (!vg) 84 return; 85 86 spin_lock(&vn->sock_lock); 87 list_for_each_entry_safe(v, tmp, &vg->vni_list, vlist) { 88#if IS_ENABLED(CONFIG_IPV6) 89 if (ipv6) 90 node = &v->hlist6; 91 else 92#endif 93 node = &v->hlist4; 94 node->vxlan = vxlan; 95 hlist_add_head_rcu(&node->hlist, vni_head(vs, v->vni)); 96 } 97 spin_unlock(&vn->sock_lock); 98} 99 100void vxlan_vs_del_vnigrp(struct vxlan_dev *vxlan) 101{ 102 struct vxlan_vni_group *vg = rtnl_dereference(vxlan->vnigrp); 103 struct vxlan_net *vn = net_generic(vxlan->net, vxlan_net_id); 104 struct vxlan_vni_node *v, *tmp; 105 106 if (!vg) 107 return; 108 109 spin_lock(&vn->sock_lock); 110 list_for_each_entry_safe(v, tmp, &vg->vni_list, vlist) { 111 hlist_del_init_rcu(&v->hlist4.hlist); 112#if IS_ENABLED(CONFIG_IPV6) 113 hlist_del_init_rcu(&v->hlist6.hlist); 114#endif 115 } 116 spin_unlock(&vn->sock_lock); 117} 118 119static void vxlan_vnifilter_stats_get(const struct vxlan_vni_node *vninode, 120 struct vxlan_vni_stats *dest) 121{ 122 int i; 123 124 memset(dest, 0, sizeof(*dest)); 125 for_each_possible_cpu(i) { 126 struct vxlan_vni_stats_pcpu *pstats; 127 struct vxlan_vni_stats temp; 128 unsigned int start; 129 130 pstats = per_cpu_ptr(vninode->stats, i); 131 do { 132 start = u64_stats_fetch_begin_irq(&pstats->syncp); 133 memcpy(&temp, &pstats->stats, sizeof(temp)); 134 } while (u64_stats_fetch_retry_irq(&pstats->syncp, start)); 135 136 dest->rx_packets += temp.rx_packets; 137 dest->rx_bytes += temp.rx_bytes; 138 dest->rx_drops += temp.rx_drops; 139 dest->rx_errors += temp.rx_errors; 140 dest->tx_packets += temp.tx_packets; 141 dest->tx_bytes += temp.tx_bytes; 142 dest->tx_drops += temp.tx_drops; 143 dest->tx_errors += temp.tx_errors; 144 } 145} 146 147static void vxlan_vnifilter_stats_add(struct vxlan_vni_node *vninode, 148 int type, unsigned int len) 149{ 150 struct vxlan_vni_stats_pcpu *pstats = this_cpu_ptr(vninode->stats); 151 152 u64_stats_update_begin(&pstats->syncp); 153 switch (type) { 154 case VXLAN_VNI_STATS_RX: 155 pstats->stats.rx_bytes += len; 156 pstats->stats.rx_packets++; 157 break; 158 case VXLAN_VNI_STATS_RX_DROPS: 159 pstats->stats.rx_drops++; 160 break; 161 case VXLAN_VNI_STATS_RX_ERRORS: 162 pstats->stats.rx_errors++; 163 break; 164 case VXLAN_VNI_STATS_TX: 165 pstats->stats.tx_bytes += len; 166 pstats->stats.tx_packets++; 167 break; 168 case VXLAN_VNI_STATS_TX_DROPS: 169 pstats->stats.tx_drops++; 170 break; 171 case VXLAN_VNI_STATS_TX_ERRORS: 172 pstats->stats.tx_errors++; 173 break; 174 } 175 u64_stats_update_end(&pstats->syncp); 176} 177 178void vxlan_vnifilter_count(struct vxlan_dev *vxlan, __be32 vni, 179 struct vxlan_vni_node *vninode, 180 int type, unsigned int len) 181{ 182 struct vxlan_vni_node *vnode; 183 184 if (!(vxlan->cfg.flags & VXLAN_F_VNIFILTER)) 185 return; 186 187 if (vninode) { 188 vnode = vninode; 189 } else { 190 vnode = vxlan_vnifilter_lookup(vxlan, vni); 191 if (!vnode) 192 return; 193 } 194 195 vxlan_vnifilter_stats_add(vnode, type, len); 196} 197 198static u32 vnirange(struct vxlan_vni_node *vbegin, 199 struct vxlan_vni_node *vend) 200{ 201 return (be32_to_cpu(vend->vni) - be32_to_cpu(vbegin->vni)); 202} 203 204static size_t vxlan_vnifilter_entry_nlmsg_size(void) 205{ 206 return NLMSG_ALIGN(sizeof(struct tunnel_msg)) 207 + nla_total_size(0) /* VXLAN_VNIFILTER_ENTRY */ 208 + nla_total_size(sizeof(u32)) /* VXLAN_VNIFILTER_ENTRY_START */ 209 + nla_total_size(sizeof(u32)) /* VXLAN_VNIFILTER_ENTRY_END */ 210 + nla_total_size(sizeof(struct in6_addr));/* VXLAN_VNIFILTER_ENTRY_GROUP{6} */ 211} 212 213static int __vnifilter_entry_fill_stats(struct sk_buff *skb, 214 const struct vxlan_vni_node *vbegin) 215{ 216 struct vxlan_vni_stats vstats; 217 struct nlattr *vstats_attr; 218 219 vstats_attr = nla_nest_start(skb, VXLAN_VNIFILTER_ENTRY_STATS); 220 if (!vstats_attr) 221 goto out_stats_err; 222 223 vxlan_vnifilter_stats_get(vbegin, &vstats); 224 if (nla_put_u64_64bit(skb, VNIFILTER_ENTRY_STATS_RX_BYTES, 225 vstats.rx_bytes, VNIFILTER_ENTRY_STATS_PAD) || 226 nla_put_u64_64bit(skb, VNIFILTER_ENTRY_STATS_RX_PKTS, 227 vstats.rx_packets, VNIFILTER_ENTRY_STATS_PAD) || 228 nla_put_u64_64bit(skb, VNIFILTER_ENTRY_STATS_RX_DROPS, 229 vstats.rx_drops, VNIFILTER_ENTRY_STATS_PAD) || 230 nla_put_u64_64bit(skb, VNIFILTER_ENTRY_STATS_RX_ERRORS, 231 vstats.rx_errors, VNIFILTER_ENTRY_STATS_PAD) || 232 nla_put_u64_64bit(skb, VNIFILTER_ENTRY_STATS_TX_BYTES, 233 vstats.tx_bytes, VNIFILTER_ENTRY_STATS_PAD) || 234 nla_put_u64_64bit(skb, VNIFILTER_ENTRY_STATS_TX_PKTS, 235 vstats.tx_packets, VNIFILTER_ENTRY_STATS_PAD) || 236 nla_put_u64_64bit(skb, VNIFILTER_ENTRY_STATS_TX_DROPS, 237 vstats.tx_drops, VNIFILTER_ENTRY_STATS_PAD) || 238 nla_put_u64_64bit(skb, VNIFILTER_ENTRY_STATS_TX_ERRORS, 239 vstats.tx_errors, VNIFILTER_ENTRY_STATS_PAD)) 240 goto out_stats_err; 241 242 nla_nest_end(skb, vstats_attr); 243 244 return 0; 245 246out_stats_err: 247 nla_nest_cancel(skb, vstats_attr); 248 return -EMSGSIZE; 249} 250 251static bool vxlan_fill_vni_filter_entry(struct sk_buff *skb, 252 struct vxlan_vni_node *vbegin, 253 struct vxlan_vni_node *vend, 254 bool fill_stats) 255{ 256 struct nlattr *ventry; 257 u32 vs = be32_to_cpu(vbegin->vni); 258 u32 ve = 0; 259 260 if (vbegin != vend) 261 ve = be32_to_cpu(vend->vni); 262 263 ventry = nla_nest_start(skb, VXLAN_VNIFILTER_ENTRY); 264 if (!ventry) 265 return false; 266 267 if (nla_put_u32(skb, VXLAN_VNIFILTER_ENTRY_START, vs)) 268 goto out_err; 269 270 if (ve && nla_put_u32(skb, VXLAN_VNIFILTER_ENTRY_END, ve)) 271 goto out_err; 272 273 if (!vxlan_addr_any(&vbegin->remote_ip)) { 274 if (vbegin->remote_ip.sa.sa_family == AF_INET) { 275 if (nla_put_in_addr(skb, VXLAN_VNIFILTER_ENTRY_GROUP, 276 vbegin->remote_ip.sin.sin_addr.s_addr)) 277 goto out_err; 278#if IS_ENABLED(CONFIG_IPV6) 279 } else { 280 if (nla_put_in6_addr(skb, VXLAN_VNIFILTER_ENTRY_GROUP6, 281 &vbegin->remote_ip.sin6.sin6_addr)) 282 goto out_err; 283#endif 284 } 285 } 286 287 if (fill_stats && __vnifilter_entry_fill_stats(skb, vbegin)) 288 goto out_err; 289 290 nla_nest_end(skb, ventry); 291 292 return true; 293 294out_err: 295 nla_nest_cancel(skb, ventry); 296 297 return false; 298} 299 300static void vxlan_vnifilter_notify(const struct vxlan_dev *vxlan, 301 struct vxlan_vni_node *vninode, int cmd) 302{ 303 struct tunnel_msg *tmsg; 304 struct sk_buff *skb; 305 struct nlmsghdr *nlh; 306 struct net *net = dev_net(vxlan->dev); 307 int err = -ENOBUFS; 308 309 skb = nlmsg_new(vxlan_vnifilter_entry_nlmsg_size(), GFP_KERNEL); 310 if (!skb) 311 goto out_err; 312 313 err = -EMSGSIZE; 314 nlh = nlmsg_put(skb, 0, 0, cmd, sizeof(*tmsg), 0); 315 if (!nlh) 316 goto out_err; 317 tmsg = nlmsg_data(nlh); 318 memset(tmsg, 0, sizeof(*tmsg)); 319 tmsg->family = AF_BRIDGE; 320 tmsg->ifindex = vxlan->dev->ifindex; 321 322 if (!vxlan_fill_vni_filter_entry(skb, vninode, vninode, false)) 323 goto out_err; 324 325 nlmsg_end(skb, nlh); 326 rtnl_notify(skb, net, 0, RTNLGRP_TUNNEL, NULL, GFP_KERNEL); 327 328 return; 329 330out_err: 331 rtnl_set_sk_err(net, RTNLGRP_TUNNEL, err); 332 333 kfree_skb(skb); 334} 335 336static int vxlan_vnifilter_dump_dev(const struct net_device *dev, 337 struct sk_buff *skb, 338 struct netlink_callback *cb) 339{ 340 struct vxlan_vni_node *tmp, *v, *vbegin = NULL, *vend = NULL; 341 struct vxlan_dev *vxlan = netdev_priv(dev); 342 struct tunnel_msg *new_tmsg, *tmsg; 343 int idx = 0, s_idx = cb->args[1]; 344 struct vxlan_vni_group *vg; 345 struct nlmsghdr *nlh; 346 bool dump_stats; 347 int err = 0; 348 349 if (!(vxlan->cfg.flags & VXLAN_F_VNIFILTER)) 350 return -EINVAL; 351 352 /* RCU needed because of the vni locking rules (rcu || rtnl) */ 353 vg = rcu_dereference(vxlan->vnigrp); 354 if (!vg || !vg->num_vnis) 355 return 0; 356 357 tmsg = nlmsg_data(cb->nlh); 358 dump_stats = !!(tmsg->flags & TUNNEL_MSG_FLAG_STATS); 359 360 nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, 361 RTM_NEWTUNNEL, sizeof(*new_tmsg), NLM_F_MULTI); 362 if (!nlh) 363 return -EMSGSIZE; 364 new_tmsg = nlmsg_data(nlh); 365 memset(new_tmsg, 0, sizeof(*new_tmsg)); 366 new_tmsg->family = PF_BRIDGE; 367 new_tmsg->ifindex = dev->ifindex; 368 369 list_for_each_entry_safe(v, tmp, &vg->vni_list, vlist) { 370 if (idx < s_idx) { 371 idx++; 372 continue; 373 } 374 if (!vbegin) { 375 vbegin = v; 376 vend = v; 377 continue; 378 } 379 if (!dump_stats && vnirange(vend, v) == 1 && 380 vxlan_addr_equal(&v->remote_ip, &vend->remote_ip)) { 381 goto update_end; 382 } else { 383 if (!vxlan_fill_vni_filter_entry(skb, vbegin, vend, 384 dump_stats)) { 385 err = -EMSGSIZE; 386 break; 387 } 388 idx += vnirange(vbegin, vend) + 1; 389 vbegin = v; 390 } 391update_end: 392 vend = v; 393 } 394 395 if (!err && vbegin) { 396 if (!vxlan_fill_vni_filter_entry(skb, vbegin, vend, dump_stats)) 397 err = -EMSGSIZE; 398 } 399 400 cb->args[1] = err ? idx : 0; 401 402 nlmsg_end(skb, nlh); 403 404 return err; 405} 406 407static int vxlan_vnifilter_dump(struct sk_buff *skb, struct netlink_callback *cb) 408{ 409 int idx = 0, err = 0, s_idx = cb->args[0]; 410 struct net *net = sock_net(skb->sk); 411 struct tunnel_msg *tmsg; 412 struct net_device *dev; 413 414 tmsg = nlmsg_data(cb->nlh); 415 416 if (tmsg->flags & ~TUNNEL_MSG_VALID_USER_FLAGS) { 417 NL_SET_ERR_MSG(cb->extack, "Invalid tunnelmsg flags in ancillary header"); 418 return -EINVAL; 419 } 420 421 rcu_read_lock(); 422 if (tmsg->ifindex) { 423 dev = dev_get_by_index_rcu(net, tmsg->ifindex); 424 if (!dev) { 425 err = -ENODEV; 426 goto out_err; 427 } 428 if (!netif_is_vxlan(dev)) { 429 NL_SET_ERR_MSG(cb->extack, 430 "The device is not a vxlan device"); 431 err = -EINVAL; 432 goto out_err; 433 } 434 err = vxlan_vnifilter_dump_dev(dev, skb, cb); 435 /* if the dump completed without an error we return 0 here */ 436 if (err != -EMSGSIZE) 437 goto out_err; 438 } else { 439 for_each_netdev_rcu(net, dev) { 440 if (!netif_is_vxlan(dev)) 441 continue; 442 if (idx < s_idx) 443 goto skip; 444 err = vxlan_vnifilter_dump_dev(dev, skb, cb); 445 if (err == -EMSGSIZE) 446 break; 447skip: 448 idx++; 449 } 450 } 451 cb->args[0] = idx; 452 rcu_read_unlock(); 453 454 return skb->len; 455 456out_err: 457 rcu_read_unlock(); 458 459 return err; 460} 461 462static const struct nla_policy vni_filter_entry_policy[VXLAN_VNIFILTER_ENTRY_MAX + 1] = { 463 [VXLAN_VNIFILTER_ENTRY_START] = { .type = NLA_U32 }, 464 [VXLAN_VNIFILTER_ENTRY_END] = { .type = NLA_U32 }, 465 [VXLAN_VNIFILTER_ENTRY_GROUP] = { .type = NLA_BINARY, 466 .len = sizeof_field(struct iphdr, daddr) }, 467 [VXLAN_VNIFILTER_ENTRY_GROUP6] = { .type = NLA_BINARY, 468 .len = sizeof(struct in6_addr) }, 469}; 470 471static const struct nla_policy vni_filter_policy[VXLAN_VNIFILTER_MAX + 1] = { 472 [VXLAN_VNIFILTER_ENTRY] = { .type = NLA_NESTED }, 473}; 474 475static int vxlan_update_default_fdb_entry(struct vxlan_dev *vxlan, __be32 vni, 476 union vxlan_addr *old_remote_ip, 477 union vxlan_addr *remote_ip, 478 struct netlink_ext_ack *extack) 479{ 480 struct vxlan_rdst *dst = &vxlan->default_dst; 481 u32 hash_index; 482 int err = 0; 483 484 hash_index = fdb_head_index(vxlan, all_zeros_mac, vni); 485 spin_lock_bh(&vxlan->hash_lock[hash_index]); 486 if (remote_ip && !vxlan_addr_any(remote_ip)) { 487 err = vxlan_fdb_update(vxlan, all_zeros_mac, 488 remote_ip, 489 NUD_REACHABLE | NUD_PERMANENT, 490 NLM_F_APPEND | NLM_F_CREATE, 491 vxlan->cfg.dst_port, 492 vni, 493 vni, 494 dst->remote_ifindex, 495 NTF_SELF, 0, true, extack); 496 if (err) { 497 spin_unlock_bh(&vxlan->hash_lock[hash_index]); 498 return err; 499 } 500 } 501 502 if (old_remote_ip && !vxlan_addr_any(old_remote_ip)) { 503 __vxlan_fdb_delete(vxlan, all_zeros_mac, 504 *old_remote_ip, 505 vxlan->cfg.dst_port, 506 vni, vni, 507 dst->remote_ifindex, 508 true); 509 } 510 spin_unlock_bh(&vxlan->hash_lock[hash_index]); 511 512 return err; 513} 514 515static int vxlan_vni_update_group(struct vxlan_dev *vxlan, 516 struct vxlan_vni_node *vninode, 517 union vxlan_addr *group, 518 bool create, bool *changed, 519 struct netlink_ext_ack *extack) 520{ 521 struct vxlan_net *vn = net_generic(vxlan->net, vxlan_net_id); 522 struct vxlan_rdst *dst = &vxlan->default_dst; 523 union vxlan_addr *newrip = NULL, *oldrip = NULL; 524 union vxlan_addr old_remote_ip; 525 int ret = 0; 526 527 memcpy(&old_remote_ip, &vninode->remote_ip, sizeof(old_remote_ip)); 528 529 /* if per vni remote ip is not present use vxlan dev 530 * default dst remote ip for fdb entry 531 */ 532 if (group && !vxlan_addr_any(group)) { 533 newrip = group; 534 } else { 535 if (!vxlan_addr_any(&dst->remote_ip)) 536 newrip = &dst->remote_ip; 537 } 538 539 /* if old rip exists, and no newrip, 540 * explicitly delete old rip 541 */ 542 if (!newrip && !vxlan_addr_any(&old_remote_ip)) 543 oldrip = &old_remote_ip; 544 545 if (!newrip && !oldrip) 546 return 0; 547 548 if (!create && oldrip && newrip && vxlan_addr_equal(oldrip, newrip)) 549 return 0; 550 551 ret = vxlan_update_default_fdb_entry(vxlan, vninode->vni, 552 oldrip, newrip, 553 extack); 554 if (ret) 555 goto out; 556 557 if (group) 558 memcpy(&vninode->remote_ip, group, sizeof(vninode->remote_ip)); 559 560 if (vxlan->dev->flags & IFF_UP) { 561 if (vxlan_addr_multicast(&old_remote_ip) && 562 !vxlan_group_used(vn, vxlan, vninode->vni, 563 &old_remote_ip, 564 vxlan->default_dst.remote_ifindex)) { 565 ret = vxlan_igmp_leave(vxlan, &old_remote_ip, 566 0); 567 if (ret) 568 goto out; 569 } 570 571 if (vxlan_addr_multicast(&vninode->remote_ip)) { 572 ret = vxlan_igmp_join(vxlan, &vninode->remote_ip, 0); 573 if (ret == -EADDRINUSE) 574 ret = 0; 575 if (ret) 576 goto out; 577 } 578 } 579 580 *changed = true; 581 582 return 0; 583out: 584 return ret; 585} 586 587int vxlan_vnilist_update_group(struct vxlan_dev *vxlan, 588 union vxlan_addr *old_remote_ip, 589 union vxlan_addr *new_remote_ip, 590 struct netlink_ext_ack *extack) 591{ 592 struct list_head *headp, *hpos; 593 struct vxlan_vni_group *vg; 594 struct vxlan_vni_node *vent; 595 int ret; 596 597 vg = rtnl_dereference(vxlan->vnigrp); 598 599 headp = &vg->vni_list; 600 list_for_each_prev(hpos, headp) { 601 vent = list_entry(hpos, struct vxlan_vni_node, vlist); 602 if (vxlan_addr_any(&vent->remote_ip)) { 603 ret = vxlan_update_default_fdb_entry(vxlan, vent->vni, 604 old_remote_ip, 605 new_remote_ip, 606 extack); 607 if (ret) 608 return ret; 609 } 610 } 611 612 return 0; 613} 614 615static void vxlan_vni_delete_group(struct vxlan_dev *vxlan, 616 struct vxlan_vni_node *vninode) 617{ 618 struct vxlan_net *vn = net_generic(vxlan->net, vxlan_net_id); 619 struct vxlan_rdst *dst = &vxlan->default_dst; 620 621 /* if per vni remote_ip not present, delete the 622 * default dst remote_ip previously added for this vni 623 */ 624 if (!vxlan_addr_any(&vninode->remote_ip) || 625 !vxlan_addr_any(&dst->remote_ip)) 626 __vxlan_fdb_delete(vxlan, all_zeros_mac, 627 (vxlan_addr_any(&vninode->remote_ip) ? 628 dst->remote_ip : vninode->remote_ip), 629 vxlan->cfg.dst_port, 630 vninode->vni, vninode->vni, 631 dst->remote_ifindex, 632 true); 633 634 if (vxlan->dev->flags & IFF_UP) { 635 if (vxlan_addr_multicast(&vninode->remote_ip) && 636 !vxlan_group_used(vn, vxlan, vninode->vni, 637 &vninode->remote_ip, 638 dst->remote_ifindex)) { 639 vxlan_igmp_leave(vxlan, &vninode->remote_ip, 0); 640 } 641 } 642} 643 644static int vxlan_vni_update(struct vxlan_dev *vxlan, 645 struct vxlan_vni_group *vg, 646 __be32 vni, union vxlan_addr *group, 647 bool *changed, 648 struct netlink_ext_ack *extack) 649{ 650 struct vxlan_vni_node *vninode; 651 int ret; 652 653 vninode = rhashtable_lookup_fast(&vg->vni_hash, &vni, 654 vxlan_vni_rht_params); 655 if (!vninode) 656 return 0; 657 658 ret = vxlan_vni_update_group(vxlan, vninode, group, false, changed, 659 extack); 660 if (ret) 661 return ret; 662 663 if (changed) 664 vxlan_vnifilter_notify(vxlan, vninode, RTM_NEWTUNNEL); 665 666 return 0; 667} 668 669static void __vxlan_vni_add_list(struct vxlan_vni_group *vg, 670 struct vxlan_vni_node *v) 671{ 672 struct list_head *headp, *hpos; 673 struct vxlan_vni_node *vent; 674 675 headp = &vg->vni_list; 676 list_for_each_prev(hpos, headp) { 677 vent = list_entry(hpos, struct vxlan_vni_node, vlist); 678 if (be32_to_cpu(v->vni) < be32_to_cpu(vent->vni)) 679 continue; 680 else 681 break; 682 } 683 list_add_rcu(&v->vlist, hpos); 684 vg->num_vnis++; 685} 686 687static void __vxlan_vni_del_list(struct vxlan_vni_group *vg, 688 struct vxlan_vni_node *v) 689{ 690 list_del_rcu(&v->vlist); 691 vg->num_vnis--; 692} 693 694static struct vxlan_vni_node *vxlan_vni_alloc(struct vxlan_dev *vxlan, 695 __be32 vni) 696{ 697 struct vxlan_vni_node *vninode; 698 699 vninode = kzalloc(sizeof(*vninode), GFP_ATOMIC); 700 if (!vninode) 701 return NULL; 702 vninode->stats = netdev_alloc_pcpu_stats(struct vxlan_vni_stats_pcpu); 703 if (!vninode->stats) { 704 kfree(vninode); 705 return NULL; 706 } 707 vninode->vni = vni; 708 vninode->hlist4.vxlan = vxlan; 709#if IS_ENABLED(CONFIG_IPV6) 710 vninode->hlist6.vxlan = vxlan; 711#endif 712 713 return vninode; 714} 715 716static int vxlan_vni_add(struct vxlan_dev *vxlan, 717 struct vxlan_vni_group *vg, 718 u32 vni, union vxlan_addr *group, 719 struct netlink_ext_ack *extack) 720{ 721 struct vxlan_vni_node *vninode; 722 __be32 v = cpu_to_be32(vni); 723 bool changed = false; 724 int err = 0; 725 726 if (vxlan_vnifilter_lookup(vxlan, v)) 727 return vxlan_vni_update(vxlan, vg, v, group, &changed, extack); 728 729 err = vxlan_vni_in_use(vxlan->net, vxlan, &vxlan->cfg, v); 730 if (err) { 731 NL_SET_ERR_MSG(extack, "VNI in use"); 732 return err; 733 } 734 735 vninode = vxlan_vni_alloc(vxlan, v); 736 if (!vninode) 737 return -ENOMEM; 738 739 err = rhashtable_lookup_insert_fast(&vg->vni_hash, 740 &vninode->vnode, 741 vxlan_vni_rht_params); 742 if (err) { 743 kfree(vninode); 744 return err; 745 } 746 747 __vxlan_vni_add_list(vg, vninode); 748 749 if (vxlan->dev->flags & IFF_UP) 750 vxlan_vs_add_del_vninode(vxlan, vninode, false); 751 752 err = vxlan_vni_update_group(vxlan, vninode, group, true, &changed, 753 extack); 754 755 if (changed) 756 vxlan_vnifilter_notify(vxlan, vninode, RTM_NEWTUNNEL); 757 758 return err; 759} 760 761static void vxlan_vni_node_rcu_free(struct rcu_head *rcu) 762{ 763 struct vxlan_vni_node *v; 764 765 v = container_of(rcu, struct vxlan_vni_node, rcu); 766 free_percpu(v->stats); 767 kfree(v); 768} 769 770static int vxlan_vni_del(struct vxlan_dev *vxlan, 771 struct vxlan_vni_group *vg, 772 u32 vni, struct netlink_ext_ack *extack) 773{ 774 struct vxlan_vni_node *vninode; 775 __be32 v = cpu_to_be32(vni); 776 int err = 0; 777 778 vg = rtnl_dereference(vxlan->vnigrp); 779 780 vninode = rhashtable_lookup_fast(&vg->vni_hash, &v, 781 vxlan_vni_rht_params); 782 if (!vninode) { 783 err = -ENOENT; 784 goto out; 785 } 786 787 vxlan_vni_delete_group(vxlan, vninode); 788 789 err = rhashtable_remove_fast(&vg->vni_hash, 790 &vninode->vnode, 791 vxlan_vni_rht_params); 792 if (err) 793 goto out; 794 795 __vxlan_vni_del_list(vg, vninode); 796 797 vxlan_vnifilter_notify(vxlan, vninode, RTM_DELTUNNEL); 798 799 if (vxlan->dev->flags & IFF_UP) 800 vxlan_vs_add_del_vninode(vxlan, vninode, true); 801 802 call_rcu(&vninode->rcu, vxlan_vni_node_rcu_free); 803 804 return 0; 805out: 806 return err; 807} 808 809static int vxlan_vni_add_del(struct vxlan_dev *vxlan, __u32 start_vni, 810 __u32 end_vni, union vxlan_addr *group, 811 int cmd, struct netlink_ext_ack *extack) 812{ 813 struct vxlan_vni_group *vg; 814 int v, err = 0; 815 816 vg = rtnl_dereference(vxlan->vnigrp); 817 818 for (v = start_vni; v <= end_vni; v++) { 819 switch (cmd) { 820 case RTM_NEWTUNNEL: 821 err = vxlan_vni_add(vxlan, vg, v, group, extack); 822 break; 823 case RTM_DELTUNNEL: 824 err = vxlan_vni_del(vxlan, vg, v, extack); 825 break; 826 default: 827 err = -EOPNOTSUPP; 828 break; 829 } 830 if (err) 831 goto out; 832 } 833 834 return 0; 835out: 836 return err; 837} 838 839static int vxlan_process_vni_filter(struct vxlan_dev *vxlan, 840 struct nlattr *nlvnifilter, 841 int cmd, struct netlink_ext_ack *extack) 842{ 843 struct nlattr *vattrs[VXLAN_VNIFILTER_ENTRY_MAX + 1]; 844 u32 vni_start = 0, vni_end = 0; 845 union vxlan_addr group; 846 int err; 847 848 err = nla_parse_nested(vattrs, 849 VXLAN_VNIFILTER_ENTRY_MAX, 850 nlvnifilter, vni_filter_entry_policy, 851 extack); 852 if (err) 853 return err; 854 855 if (vattrs[VXLAN_VNIFILTER_ENTRY_START]) { 856 vni_start = nla_get_u32(vattrs[VXLAN_VNIFILTER_ENTRY_START]); 857 vni_end = vni_start; 858 } 859 860 if (vattrs[VXLAN_VNIFILTER_ENTRY_END]) 861 vni_end = nla_get_u32(vattrs[VXLAN_VNIFILTER_ENTRY_END]); 862 863 if (!vni_start && !vni_end) { 864 NL_SET_ERR_MSG_ATTR(extack, nlvnifilter, 865 "vni start nor end found in vni entry"); 866 return -EINVAL; 867 } 868 869 if (vattrs[VXLAN_VNIFILTER_ENTRY_GROUP]) { 870 group.sin.sin_addr.s_addr = 871 nla_get_in_addr(vattrs[VXLAN_VNIFILTER_ENTRY_GROUP]); 872 group.sa.sa_family = AF_INET; 873 } else if (vattrs[VXLAN_VNIFILTER_ENTRY_GROUP6]) { 874 group.sin6.sin6_addr = 875 nla_get_in6_addr(vattrs[VXLAN_VNIFILTER_ENTRY_GROUP6]); 876 group.sa.sa_family = AF_INET6; 877 } else { 878 memset(&group, 0, sizeof(group)); 879 } 880 881 if (vxlan_addr_multicast(&group) && !vxlan->default_dst.remote_ifindex) { 882 NL_SET_ERR_MSG(extack, 883 "Local interface required for multicast remote group"); 884 885 return -EINVAL; 886 } 887 888 err = vxlan_vni_add_del(vxlan, vni_start, vni_end, &group, cmd, 889 extack); 890 if (err) 891 return err; 892 893 return 0; 894} 895 896void vxlan_vnigroup_uninit(struct vxlan_dev *vxlan) 897{ 898 struct vxlan_vni_node *v, *tmp; 899 struct vxlan_vni_group *vg; 900 901 vg = rtnl_dereference(vxlan->vnigrp); 902 list_for_each_entry_safe(v, tmp, &vg->vni_list, vlist) { 903 rhashtable_remove_fast(&vg->vni_hash, &v->vnode, 904 vxlan_vni_rht_params); 905 hlist_del_init_rcu(&v->hlist4.hlist); 906#if IS_ENABLED(CONFIG_IPV6) 907 hlist_del_init_rcu(&v->hlist6.hlist); 908#endif 909 __vxlan_vni_del_list(vg, v); 910 vxlan_vnifilter_notify(vxlan, v, RTM_DELTUNNEL); 911 call_rcu(&v->rcu, vxlan_vni_node_rcu_free); 912 } 913 rhashtable_destroy(&vg->vni_hash); 914 kfree(vg); 915} 916 917int vxlan_vnigroup_init(struct vxlan_dev *vxlan) 918{ 919 struct vxlan_vni_group *vg; 920 int ret; 921 922 vg = kzalloc(sizeof(*vg), GFP_KERNEL); 923 if (!vg) 924 return -ENOMEM; 925 ret = rhashtable_init(&vg->vni_hash, &vxlan_vni_rht_params); 926 if (ret) { 927 kfree(vg); 928 return ret; 929 } 930 INIT_LIST_HEAD(&vg->vni_list); 931 rcu_assign_pointer(vxlan->vnigrp, vg); 932 933 return 0; 934} 935 936static int vxlan_vnifilter_process(struct sk_buff *skb, struct nlmsghdr *nlh, 937 struct netlink_ext_ack *extack) 938{ 939 struct net *net = sock_net(skb->sk); 940 struct tunnel_msg *tmsg; 941 struct vxlan_dev *vxlan; 942 struct net_device *dev; 943 struct nlattr *attr; 944 int err, vnis = 0; 945 int rem; 946 947 /* this should validate the header and check for remaining bytes */ 948 err = nlmsg_parse(nlh, sizeof(*tmsg), NULL, VXLAN_VNIFILTER_MAX, 949 vni_filter_policy, extack); 950 if (err < 0) 951 return err; 952 953 tmsg = nlmsg_data(nlh); 954 dev = __dev_get_by_index(net, tmsg->ifindex); 955 if (!dev) 956 return -ENODEV; 957 958 if (!netif_is_vxlan(dev)) { 959 NL_SET_ERR_MSG_MOD(extack, "The device is not a vxlan device"); 960 return -EINVAL; 961 } 962 963 vxlan = netdev_priv(dev); 964 965 if (!(vxlan->cfg.flags & VXLAN_F_VNIFILTER)) 966 return -EOPNOTSUPP; 967 968 nlmsg_for_each_attr(attr, nlh, sizeof(*tmsg), rem) { 969 switch (nla_type(attr)) { 970 case VXLAN_VNIFILTER_ENTRY: 971 err = vxlan_process_vni_filter(vxlan, attr, 972 nlh->nlmsg_type, extack); 973 break; 974 default: 975 continue; 976 } 977 vnis++; 978 if (err) 979 break; 980 } 981 982 if (!vnis) { 983 NL_SET_ERR_MSG_MOD(extack, "No vnis found to process"); 984 err = -EINVAL; 985 } 986 987 return err; 988} 989 990void vxlan_vnifilter_init(void) 991{ 992 rtnl_register_module(THIS_MODULE, PF_BRIDGE, RTM_GETTUNNEL, NULL, 993 vxlan_vnifilter_dump, 0); 994 rtnl_register_module(THIS_MODULE, PF_BRIDGE, RTM_NEWTUNNEL, 995 vxlan_vnifilter_process, NULL, 0); 996 rtnl_register_module(THIS_MODULE, PF_BRIDGE, RTM_DELTUNNEL, 997 vxlan_vnifilter_process, NULL, 0); 998} 999 1000void vxlan_vnifilter_uninit(void) 1001{ 1002 rtnl_unregister(PF_BRIDGE, RTM_GETTUNNEL); 1003 rtnl_unregister(PF_BRIDGE, RTM_NEWTUNNEL); 1004 rtnl_unregister(PF_BRIDGE, RTM_DELTUNNEL); 1005}