seg6_local.c (47329B)
1// SPDX-License-Identifier: GPL-2.0-or-later 2/* 3 * SR-IPv6 implementation 4 * 5 * Authors: 6 * David Lebrun <david.lebrun@uclouvain.be> 7 * eBPF support: Mathieu Xhonneux <m.xhonneux@gmail.com> 8 */ 9 10#include <linux/filter.h> 11#include <linux/types.h> 12#include <linux/skbuff.h> 13#include <linux/net.h> 14#include <linux/module.h> 15#include <net/ip.h> 16#include <net/lwtunnel.h> 17#include <net/netevent.h> 18#include <net/netns/generic.h> 19#include <net/ip6_fib.h> 20#include <net/route.h> 21#include <net/seg6.h> 22#include <linux/seg6.h> 23#include <linux/seg6_local.h> 24#include <net/addrconf.h> 25#include <net/ip6_route.h> 26#include <net/dst_cache.h> 27#include <net/ip_tunnels.h> 28#ifdef CONFIG_IPV6_SEG6_HMAC 29#include <net/seg6_hmac.h> 30#endif 31#include <net/seg6_local.h> 32#include <linux/etherdevice.h> 33#include <linux/bpf.h> 34#include <linux/netfilter.h> 35 36#define SEG6_F_ATTR(i) BIT(i) 37 38struct seg6_local_lwt; 39 40/* callbacks used for customizing the creation and destruction of a behavior */ 41struct seg6_local_lwtunnel_ops { 42 int (*build_state)(struct seg6_local_lwt *slwt, const void *cfg, 43 struct netlink_ext_ack *extack); 44 void (*destroy_state)(struct seg6_local_lwt *slwt); 45}; 46 47struct seg6_action_desc { 48 int action; 49 unsigned long attrs; 50 51 /* The optattrs field is used for specifying all the optional 52 * attributes supported by a specific behavior. 53 * It means that if one of these attributes is not provided in the 54 * netlink message during the behavior creation, no errors will be 55 * returned to the userspace. 56 * 57 * Each attribute can be only of two types (mutually exclusive): 58 * 1) required or 2) optional. 59 * Every user MUST obey to this rule! If you set an attribute as 60 * required the same attribute CANNOT be set as optional and vice 61 * versa. 62 */ 63 unsigned long optattrs; 64 65 int (*input)(struct sk_buff *skb, struct seg6_local_lwt *slwt); 66 int static_headroom; 67 68 struct seg6_local_lwtunnel_ops slwt_ops; 69}; 70 71struct bpf_lwt_prog { 72 struct bpf_prog *prog; 73 char *name; 74}; 75 76enum seg6_end_dt_mode { 77 DT_INVALID_MODE = -EINVAL, 78 DT_LEGACY_MODE = 0, 79 DT_VRF_MODE = 1, 80}; 81 82struct seg6_end_dt_info { 83 enum seg6_end_dt_mode mode; 84 85 struct net *net; 86 /* VRF device associated to the routing table used by the SRv6 87 * End.DT4/DT6 behavior for routing IPv4/IPv6 packets. 88 */ 89 int vrf_ifindex; 90 int vrf_table; 91 92 /* tunneled packet family (IPv4 or IPv6). 93 * Protocol and header length are inferred from family. 94 */ 95 u16 family; 96}; 97 98struct pcpu_seg6_local_counters { 99 u64_stats_t packets; 100 u64_stats_t bytes; 101 u64_stats_t errors; 102 103 struct u64_stats_sync syncp; 104}; 105 106/* This struct groups all the SRv6 Behavior counters supported so far. 107 * 108 * put_nla_counters() makes use of this data structure to collect all counter 109 * values after the per-CPU counter evaluation has been performed. 110 * Finally, each counter value (in seg6_local_counters) is stored in the 111 * corresponding netlink attribute and sent to user space. 112 * 113 * NB: we don't want to expose this structure to user space! 114 */ 115struct seg6_local_counters { 116 __u64 packets; 117 __u64 bytes; 118 __u64 errors; 119}; 120 121#define seg6_local_alloc_pcpu_counters(__gfp) \ 122 __netdev_alloc_pcpu_stats(struct pcpu_seg6_local_counters, \ 123 ((__gfp) | __GFP_ZERO)) 124 125#define SEG6_F_LOCAL_COUNTERS SEG6_F_ATTR(SEG6_LOCAL_COUNTERS) 126 127struct seg6_local_lwt { 128 int action; 129 struct ipv6_sr_hdr *srh; 130 int table; 131 struct in_addr nh4; 132 struct in6_addr nh6; 133 int iif; 134 int oif; 135 struct bpf_lwt_prog bpf; 136#ifdef CONFIG_NET_L3_MASTER_DEV 137 struct seg6_end_dt_info dt_info; 138#endif 139 struct pcpu_seg6_local_counters __percpu *pcpu_counters; 140 141 int headroom; 142 struct seg6_action_desc *desc; 143 /* unlike the required attrs, we have to track the optional attributes 144 * that have been effectively parsed. 145 */ 146 unsigned long parsed_optattrs; 147}; 148 149static struct seg6_local_lwt *seg6_local_lwtunnel(struct lwtunnel_state *lwt) 150{ 151 return (struct seg6_local_lwt *)lwt->data; 152} 153 154static struct ipv6_sr_hdr *get_and_validate_srh(struct sk_buff *skb) 155{ 156 struct ipv6_sr_hdr *srh; 157 158 srh = seg6_get_srh(skb, IP6_FH_F_SKIP_RH); 159 if (!srh) 160 return NULL; 161 162#ifdef CONFIG_IPV6_SEG6_HMAC 163 if (!seg6_hmac_validate_skb(skb)) 164 return NULL; 165#endif 166 167 return srh; 168} 169 170static bool decap_and_validate(struct sk_buff *skb, int proto) 171{ 172 struct ipv6_sr_hdr *srh; 173 unsigned int off = 0; 174 175 srh = seg6_get_srh(skb, 0); 176 if (srh && srh->segments_left > 0) 177 return false; 178 179#ifdef CONFIG_IPV6_SEG6_HMAC 180 if (srh && !seg6_hmac_validate_skb(skb)) 181 return false; 182#endif 183 184 if (ipv6_find_hdr(skb, &off, proto, NULL, NULL) < 0) 185 return false; 186 187 if (!pskb_pull(skb, off)) 188 return false; 189 190 skb_postpull_rcsum(skb, skb_network_header(skb), off); 191 192 skb_reset_network_header(skb); 193 skb_reset_transport_header(skb); 194 if (iptunnel_pull_offloads(skb)) 195 return false; 196 197 return true; 198} 199 200static void advance_nextseg(struct ipv6_sr_hdr *srh, struct in6_addr *daddr) 201{ 202 struct in6_addr *addr; 203 204 srh->segments_left--; 205 addr = srh->segments + srh->segments_left; 206 *daddr = *addr; 207} 208 209static int 210seg6_lookup_any_nexthop(struct sk_buff *skb, struct in6_addr *nhaddr, 211 u32 tbl_id, bool local_delivery) 212{ 213 struct net *net = dev_net(skb->dev); 214 struct ipv6hdr *hdr = ipv6_hdr(skb); 215 int flags = RT6_LOOKUP_F_HAS_SADDR; 216 struct dst_entry *dst = NULL; 217 struct rt6_info *rt; 218 struct flowi6 fl6; 219 int dev_flags = 0; 220 221 memset(&fl6, 0, sizeof(fl6)); 222 fl6.flowi6_iif = skb->dev->ifindex; 223 fl6.daddr = nhaddr ? *nhaddr : hdr->daddr; 224 fl6.saddr = hdr->saddr; 225 fl6.flowlabel = ip6_flowinfo(hdr); 226 fl6.flowi6_mark = skb->mark; 227 fl6.flowi6_proto = hdr->nexthdr; 228 229 if (nhaddr) 230 fl6.flowi6_flags = FLOWI_FLAG_KNOWN_NH; 231 232 if (!tbl_id) { 233 dst = ip6_route_input_lookup(net, skb->dev, &fl6, skb, flags); 234 } else { 235 struct fib6_table *table; 236 237 table = fib6_get_table(net, tbl_id); 238 if (!table) 239 goto out; 240 241 rt = ip6_pol_route(net, table, 0, &fl6, skb, flags); 242 dst = &rt->dst; 243 } 244 245 /* we want to discard traffic destined for local packet processing, 246 * if @local_delivery is set to false. 247 */ 248 if (!local_delivery) 249 dev_flags |= IFF_LOOPBACK; 250 251 if (dst && (dst->dev->flags & dev_flags) && !dst->error) { 252 dst_release(dst); 253 dst = NULL; 254 } 255 256out: 257 if (!dst) { 258 rt = net->ipv6.ip6_blk_hole_entry; 259 dst = &rt->dst; 260 dst_hold(dst); 261 } 262 263 skb_dst_drop(skb); 264 skb_dst_set(skb, dst); 265 return dst->error; 266} 267 268int seg6_lookup_nexthop(struct sk_buff *skb, 269 struct in6_addr *nhaddr, u32 tbl_id) 270{ 271 return seg6_lookup_any_nexthop(skb, nhaddr, tbl_id, false); 272} 273 274/* regular endpoint function */ 275static int input_action_end(struct sk_buff *skb, struct seg6_local_lwt *slwt) 276{ 277 struct ipv6_sr_hdr *srh; 278 279 srh = get_and_validate_srh(skb); 280 if (!srh) 281 goto drop; 282 283 advance_nextseg(srh, &ipv6_hdr(skb)->daddr); 284 285 seg6_lookup_nexthop(skb, NULL, 0); 286 287 return dst_input(skb); 288 289drop: 290 kfree_skb(skb); 291 return -EINVAL; 292} 293 294/* regular endpoint, and forward to specified nexthop */ 295static int input_action_end_x(struct sk_buff *skb, struct seg6_local_lwt *slwt) 296{ 297 struct ipv6_sr_hdr *srh; 298 299 srh = get_and_validate_srh(skb); 300 if (!srh) 301 goto drop; 302 303 advance_nextseg(srh, &ipv6_hdr(skb)->daddr); 304 305 seg6_lookup_nexthop(skb, &slwt->nh6, 0); 306 307 return dst_input(skb); 308 309drop: 310 kfree_skb(skb); 311 return -EINVAL; 312} 313 314static int input_action_end_t(struct sk_buff *skb, struct seg6_local_lwt *slwt) 315{ 316 struct ipv6_sr_hdr *srh; 317 318 srh = get_and_validate_srh(skb); 319 if (!srh) 320 goto drop; 321 322 advance_nextseg(srh, &ipv6_hdr(skb)->daddr); 323 324 seg6_lookup_nexthop(skb, NULL, slwt->table); 325 326 return dst_input(skb); 327 328drop: 329 kfree_skb(skb); 330 return -EINVAL; 331} 332 333/* decapsulate and forward inner L2 frame on specified interface */ 334static int input_action_end_dx2(struct sk_buff *skb, 335 struct seg6_local_lwt *slwt) 336{ 337 struct net *net = dev_net(skb->dev); 338 struct net_device *odev; 339 struct ethhdr *eth; 340 341 if (!decap_and_validate(skb, IPPROTO_ETHERNET)) 342 goto drop; 343 344 if (!pskb_may_pull(skb, ETH_HLEN)) 345 goto drop; 346 347 skb_reset_mac_header(skb); 348 eth = (struct ethhdr *)skb->data; 349 350 /* To determine the frame's protocol, we assume it is 802.3. This avoids 351 * a call to eth_type_trans(), which is not really relevant for our 352 * use case. 353 */ 354 if (!eth_proto_is_802_3(eth->h_proto)) 355 goto drop; 356 357 odev = dev_get_by_index_rcu(net, slwt->oif); 358 if (!odev) 359 goto drop; 360 361 /* As we accept Ethernet frames, make sure the egress device is of 362 * the correct type. 363 */ 364 if (odev->type != ARPHRD_ETHER) 365 goto drop; 366 367 if (!(odev->flags & IFF_UP) || !netif_carrier_ok(odev)) 368 goto drop; 369 370 skb_orphan(skb); 371 372 if (skb_warn_if_lro(skb)) 373 goto drop; 374 375 skb_forward_csum(skb); 376 377 if (skb->len - ETH_HLEN > odev->mtu) 378 goto drop; 379 380 skb->dev = odev; 381 skb->protocol = eth->h_proto; 382 383 return dev_queue_xmit(skb); 384 385drop: 386 kfree_skb(skb); 387 return -EINVAL; 388} 389 390static int input_action_end_dx6_finish(struct net *net, struct sock *sk, 391 struct sk_buff *skb) 392{ 393 struct dst_entry *orig_dst = skb_dst(skb); 394 struct in6_addr *nhaddr = NULL; 395 struct seg6_local_lwt *slwt; 396 397 slwt = seg6_local_lwtunnel(orig_dst->lwtstate); 398 399 /* The inner packet is not associated to any local interface, 400 * so we do not call netif_rx(). 401 * 402 * If slwt->nh6 is set to ::, then lookup the nexthop for the 403 * inner packet's DA. Otherwise, use the specified nexthop. 404 */ 405 if (!ipv6_addr_any(&slwt->nh6)) 406 nhaddr = &slwt->nh6; 407 408 seg6_lookup_nexthop(skb, nhaddr, 0); 409 410 return dst_input(skb); 411} 412 413/* decapsulate and forward to specified nexthop */ 414static int input_action_end_dx6(struct sk_buff *skb, 415 struct seg6_local_lwt *slwt) 416{ 417 /* this function accepts IPv6 encapsulated packets, with either 418 * an SRH with SL=0, or no SRH. 419 */ 420 421 if (!decap_and_validate(skb, IPPROTO_IPV6)) 422 goto drop; 423 424 if (!pskb_may_pull(skb, sizeof(struct ipv6hdr))) 425 goto drop; 426 427 skb_set_transport_header(skb, sizeof(struct ipv6hdr)); 428 nf_reset_ct(skb); 429 430 if (static_branch_unlikely(&nf_hooks_lwtunnel_enabled)) 431 return NF_HOOK(NFPROTO_IPV6, NF_INET_PRE_ROUTING, 432 dev_net(skb->dev), NULL, skb, NULL, 433 skb_dst(skb)->dev, input_action_end_dx6_finish); 434 435 return input_action_end_dx6_finish(dev_net(skb->dev), NULL, skb); 436drop: 437 kfree_skb(skb); 438 return -EINVAL; 439} 440 441static int input_action_end_dx4_finish(struct net *net, struct sock *sk, 442 struct sk_buff *skb) 443{ 444 struct dst_entry *orig_dst = skb_dst(skb); 445 struct seg6_local_lwt *slwt; 446 struct iphdr *iph; 447 __be32 nhaddr; 448 int err; 449 450 slwt = seg6_local_lwtunnel(orig_dst->lwtstate); 451 452 iph = ip_hdr(skb); 453 454 nhaddr = slwt->nh4.s_addr ?: iph->daddr; 455 456 skb_dst_drop(skb); 457 458 err = ip_route_input(skb, nhaddr, iph->saddr, 0, skb->dev); 459 if (err) { 460 kfree_skb(skb); 461 return -EINVAL; 462 } 463 464 return dst_input(skb); 465} 466 467static int input_action_end_dx4(struct sk_buff *skb, 468 struct seg6_local_lwt *slwt) 469{ 470 if (!decap_and_validate(skb, IPPROTO_IPIP)) 471 goto drop; 472 473 if (!pskb_may_pull(skb, sizeof(struct iphdr))) 474 goto drop; 475 476 skb->protocol = htons(ETH_P_IP); 477 skb_set_transport_header(skb, sizeof(struct iphdr)); 478 nf_reset_ct(skb); 479 480 if (static_branch_unlikely(&nf_hooks_lwtunnel_enabled)) 481 return NF_HOOK(NFPROTO_IPV4, NF_INET_PRE_ROUTING, 482 dev_net(skb->dev), NULL, skb, NULL, 483 skb_dst(skb)->dev, input_action_end_dx4_finish); 484 485 return input_action_end_dx4_finish(dev_net(skb->dev), NULL, skb); 486drop: 487 kfree_skb(skb); 488 return -EINVAL; 489} 490 491#ifdef CONFIG_NET_L3_MASTER_DEV 492static struct net *fib6_config_get_net(const struct fib6_config *fib6_cfg) 493{ 494 const struct nl_info *nli = &fib6_cfg->fc_nlinfo; 495 496 return nli->nl_net; 497} 498 499static int __seg6_end_dt_vrf_build(struct seg6_local_lwt *slwt, const void *cfg, 500 u16 family, struct netlink_ext_ack *extack) 501{ 502 struct seg6_end_dt_info *info = &slwt->dt_info; 503 int vrf_ifindex; 504 struct net *net; 505 506 net = fib6_config_get_net(cfg); 507 508 /* note that vrf_table was already set by parse_nla_vrftable() */ 509 vrf_ifindex = l3mdev_ifindex_lookup_by_table_id(L3MDEV_TYPE_VRF, net, 510 info->vrf_table); 511 if (vrf_ifindex < 0) { 512 if (vrf_ifindex == -EPERM) { 513 NL_SET_ERR_MSG(extack, 514 "Strict mode for VRF is disabled"); 515 } else if (vrf_ifindex == -ENODEV) { 516 NL_SET_ERR_MSG(extack, 517 "Table has no associated VRF device"); 518 } else { 519 pr_debug("seg6local: SRv6 End.DT* creation error=%d\n", 520 vrf_ifindex); 521 } 522 523 return vrf_ifindex; 524 } 525 526 info->net = net; 527 info->vrf_ifindex = vrf_ifindex; 528 529 info->family = family; 530 info->mode = DT_VRF_MODE; 531 532 return 0; 533} 534 535/* The SRv6 End.DT4/DT6 behavior extracts the inner (IPv4/IPv6) packet and 536 * routes the IPv4/IPv6 packet by looking at the configured routing table. 537 * 538 * In the SRv6 End.DT4/DT6 use case, we can receive traffic (IPv6+Segment 539 * Routing Header packets) from several interfaces and the outer IPv6 540 * destination address (DA) is used for retrieving the specific instance of the 541 * End.DT4/DT6 behavior that should process the packets. 542 * 543 * However, the inner IPv4/IPv6 packet is not really bound to any receiving 544 * interface and thus the End.DT4/DT6 sets the VRF (associated with the 545 * corresponding routing table) as the *receiving* interface. 546 * In other words, the End.DT4/DT6 processes a packet as if it has been received 547 * directly by the VRF (and not by one of its slave devices, if any). 548 * In this way, the VRF interface is used for routing the IPv4/IPv6 packet in 549 * according to the routing table configured by the End.DT4/DT6 instance. 550 * 551 * This design allows you to get some interesting features like: 552 * 1) the statistics on rx packets; 553 * 2) the possibility to install a packet sniffer on the receiving interface 554 * (the VRF one) for looking at the incoming packets; 555 * 3) the possibility to leverage the netfilter prerouting hook for the inner 556 * IPv4 packet. 557 * 558 * This function returns: 559 * - the sk_buff* when the VRF rcv handler has processed the packet correctly; 560 * - NULL when the skb is consumed by the VRF rcv handler; 561 * - a pointer which encodes a negative error number in case of error. 562 * Note that in this case, the function takes care of freeing the skb. 563 */ 564static struct sk_buff *end_dt_vrf_rcv(struct sk_buff *skb, u16 family, 565 struct net_device *dev) 566{ 567 /* based on l3mdev_ip_rcv; we are only interested in the master */ 568 if (unlikely(!netif_is_l3_master(dev) && !netif_has_l3_rx_handler(dev))) 569 goto drop; 570 571 if (unlikely(!dev->l3mdev_ops->l3mdev_l3_rcv)) 572 goto drop; 573 574 /* the decap packet IPv4/IPv6 does not come with any mac header info. 575 * We must unset the mac header to allow the VRF device to rebuild it, 576 * just in case there is a sniffer attached on the device. 577 */ 578 skb_unset_mac_header(skb); 579 580 skb = dev->l3mdev_ops->l3mdev_l3_rcv(dev, skb, family); 581 if (!skb) 582 /* the skb buffer was consumed by the handler */ 583 return NULL; 584 585 /* when a packet is received by a VRF or by one of its slaves, the 586 * master device reference is set into the skb. 587 */ 588 if (unlikely(skb->dev != dev || skb->skb_iif != dev->ifindex)) 589 goto drop; 590 591 return skb; 592 593drop: 594 kfree_skb(skb); 595 return ERR_PTR(-EINVAL); 596} 597 598static struct net_device *end_dt_get_vrf_rcu(struct sk_buff *skb, 599 struct seg6_end_dt_info *info) 600{ 601 int vrf_ifindex = info->vrf_ifindex; 602 struct net *net = info->net; 603 604 if (unlikely(vrf_ifindex < 0)) 605 goto error; 606 607 if (unlikely(!net_eq(dev_net(skb->dev), net))) 608 goto error; 609 610 return dev_get_by_index_rcu(net, vrf_ifindex); 611 612error: 613 return NULL; 614} 615 616static struct sk_buff *end_dt_vrf_core(struct sk_buff *skb, 617 struct seg6_local_lwt *slwt, u16 family) 618{ 619 struct seg6_end_dt_info *info = &slwt->dt_info; 620 struct net_device *vrf; 621 __be16 protocol; 622 int hdrlen; 623 624 vrf = end_dt_get_vrf_rcu(skb, info); 625 if (unlikely(!vrf)) 626 goto drop; 627 628 switch (family) { 629 case AF_INET: 630 protocol = htons(ETH_P_IP); 631 hdrlen = sizeof(struct iphdr); 632 break; 633 case AF_INET6: 634 protocol = htons(ETH_P_IPV6); 635 hdrlen = sizeof(struct ipv6hdr); 636 break; 637 case AF_UNSPEC: 638 fallthrough; 639 default: 640 goto drop; 641 } 642 643 if (unlikely(info->family != AF_UNSPEC && info->family != family)) { 644 pr_warn_once("seg6local: SRv6 End.DT* family mismatch"); 645 goto drop; 646 } 647 648 skb->protocol = protocol; 649 650 skb_dst_drop(skb); 651 652 skb_set_transport_header(skb, hdrlen); 653 nf_reset_ct(skb); 654 655 return end_dt_vrf_rcv(skb, family, vrf); 656 657drop: 658 kfree_skb(skb); 659 return ERR_PTR(-EINVAL); 660} 661 662static int input_action_end_dt4(struct sk_buff *skb, 663 struct seg6_local_lwt *slwt) 664{ 665 struct iphdr *iph; 666 int err; 667 668 if (!decap_and_validate(skb, IPPROTO_IPIP)) 669 goto drop; 670 671 if (!pskb_may_pull(skb, sizeof(struct iphdr))) 672 goto drop; 673 674 skb = end_dt_vrf_core(skb, slwt, AF_INET); 675 if (!skb) 676 /* packet has been processed and consumed by the VRF */ 677 return 0; 678 679 if (IS_ERR(skb)) 680 return PTR_ERR(skb); 681 682 iph = ip_hdr(skb); 683 684 err = ip_route_input(skb, iph->daddr, iph->saddr, 0, skb->dev); 685 if (unlikely(err)) 686 goto drop; 687 688 return dst_input(skb); 689 690drop: 691 kfree_skb(skb); 692 return -EINVAL; 693} 694 695static int seg6_end_dt4_build(struct seg6_local_lwt *slwt, const void *cfg, 696 struct netlink_ext_ack *extack) 697{ 698 return __seg6_end_dt_vrf_build(slwt, cfg, AF_INET, extack); 699} 700 701static enum 702seg6_end_dt_mode seg6_end_dt6_parse_mode(struct seg6_local_lwt *slwt) 703{ 704 unsigned long parsed_optattrs = slwt->parsed_optattrs; 705 bool legacy, vrfmode; 706 707 legacy = !!(parsed_optattrs & SEG6_F_ATTR(SEG6_LOCAL_TABLE)); 708 vrfmode = !!(parsed_optattrs & SEG6_F_ATTR(SEG6_LOCAL_VRFTABLE)); 709 710 if (!(legacy ^ vrfmode)) 711 /* both are absent or present: invalid DT6 mode */ 712 return DT_INVALID_MODE; 713 714 return legacy ? DT_LEGACY_MODE : DT_VRF_MODE; 715} 716 717static enum seg6_end_dt_mode seg6_end_dt6_get_mode(struct seg6_local_lwt *slwt) 718{ 719 struct seg6_end_dt_info *info = &slwt->dt_info; 720 721 return info->mode; 722} 723 724static int seg6_end_dt6_build(struct seg6_local_lwt *slwt, const void *cfg, 725 struct netlink_ext_ack *extack) 726{ 727 enum seg6_end_dt_mode mode = seg6_end_dt6_parse_mode(slwt); 728 struct seg6_end_dt_info *info = &slwt->dt_info; 729 730 switch (mode) { 731 case DT_LEGACY_MODE: 732 info->mode = DT_LEGACY_MODE; 733 return 0; 734 case DT_VRF_MODE: 735 return __seg6_end_dt_vrf_build(slwt, cfg, AF_INET6, extack); 736 default: 737 NL_SET_ERR_MSG(extack, "table or vrftable must be specified"); 738 return -EINVAL; 739 } 740} 741#endif 742 743static int input_action_end_dt6(struct sk_buff *skb, 744 struct seg6_local_lwt *slwt) 745{ 746 if (!decap_and_validate(skb, IPPROTO_IPV6)) 747 goto drop; 748 749 if (!pskb_may_pull(skb, sizeof(struct ipv6hdr))) 750 goto drop; 751 752#ifdef CONFIG_NET_L3_MASTER_DEV 753 if (seg6_end_dt6_get_mode(slwt) == DT_LEGACY_MODE) 754 goto legacy_mode; 755 756 /* DT6_VRF_MODE */ 757 skb = end_dt_vrf_core(skb, slwt, AF_INET6); 758 if (!skb) 759 /* packet has been processed and consumed by the VRF */ 760 return 0; 761 762 if (IS_ERR(skb)) 763 return PTR_ERR(skb); 764 765 /* note: this time we do not need to specify the table because the VRF 766 * takes care of selecting the correct table. 767 */ 768 seg6_lookup_any_nexthop(skb, NULL, 0, true); 769 770 return dst_input(skb); 771 772legacy_mode: 773#endif 774 skb_set_transport_header(skb, sizeof(struct ipv6hdr)); 775 776 seg6_lookup_any_nexthop(skb, NULL, slwt->table, true); 777 778 return dst_input(skb); 779 780drop: 781 kfree_skb(skb); 782 return -EINVAL; 783} 784 785#ifdef CONFIG_NET_L3_MASTER_DEV 786static int seg6_end_dt46_build(struct seg6_local_lwt *slwt, const void *cfg, 787 struct netlink_ext_ack *extack) 788{ 789 return __seg6_end_dt_vrf_build(slwt, cfg, AF_UNSPEC, extack); 790} 791 792static int input_action_end_dt46(struct sk_buff *skb, 793 struct seg6_local_lwt *slwt) 794{ 795 unsigned int off = 0; 796 int nexthdr; 797 798 nexthdr = ipv6_find_hdr(skb, &off, -1, NULL, NULL); 799 if (unlikely(nexthdr < 0)) 800 goto drop; 801 802 switch (nexthdr) { 803 case IPPROTO_IPIP: 804 return input_action_end_dt4(skb, slwt); 805 case IPPROTO_IPV6: 806 return input_action_end_dt6(skb, slwt); 807 } 808 809drop: 810 kfree_skb(skb); 811 return -EINVAL; 812} 813#endif 814 815/* push an SRH on top of the current one */ 816static int input_action_end_b6(struct sk_buff *skb, struct seg6_local_lwt *slwt) 817{ 818 struct ipv6_sr_hdr *srh; 819 int err = -EINVAL; 820 821 srh = get_and_validate_srh(skb); 822 if (!srh) 823 goto drop; 824 825 err = seg6_do_srh_inline(skb, slwt->srh); 826 if (err) 827 goto drop; 828 829 ipv6_hdr(skb)->payload_len = htons(skb->len - sizeof(struct ipv6hdr)); 830 skb_set_transport_header(skb, sizeof(struct ipv6hdr)); 831 832 seg6_lookup_nexthop(skb, NULL, 0); 833 834 return dst_input(skb); 835 836drop: 837 kfree_skb(skb); 838 return err; 839} 840 841/* encapsulate within an outer IPv6 header and a specified SRH */ 842static int input_action_end_b6_encap(struct sk_buff *skb, 843 struct seg6_local_lwt *slwt) 844{ 845 struct ipv6_sr_hdr *srh; 846 int err = -EINVAL; 847 848 srh = get_and_validate_srh(skb); 849 if (!srh) 850 goto drop; 851 852 advance_nextseg(srh, &ipv6_hdr(skb)->daddr); 853 854 skb_reset_inner_headers(skb); 855 skb->encapsulation = 1; 856 857 err = seg6_do_srh_encap(skb, slwt->srh, IPPROTO_IPV6); 858 if (err) 859 goto drop; 860 861 ipv6_hdr(skb)->payload_len = htons(skb->len - sizeof(struct ipv6hdr)); 862 skb_set_transport_header(skb, sizeof(struct ipv6hdr)); 863 864 seg6_lookup_nexthop(skb, NULL, 0); 865 866 return dst_input(skb); 867 868drop: 869 kfree_skb(skb); 870 return err; 871} 872 873DEFINE_PER_CPU(struct seg6_bpf_srh_state, seg6_bpf_srh_states); 874 875bool seg6_bpf_has_valid_srh(struct sk_buff *skb) 876{ 877 struct seg6_bpf_srh_state *srh_state = 878 this_cpu_ptr(&seg6_bpf_srh_states); 879 struct ipv6_sr_hdr *srh = srh_state->srh; 880 881 if (unlikely(srh == NULL)) 882 return false; 883 884 if (unlikely(!srh_state->valid)) { 885 if ((srh_state->hdrlen & 7) != 0) 886 return false; 887 888 srh->hdrlen = (u8)(srh_state->hdrlen >> 3); 889 if (!seg6_validate_srh(srh, (srh->hdrlen + 1) << 3, true)) 890 return false; 891 892 srh_state->valid = true; 893 } 894 895 return true; 896} 897 898static int input_action_end_bpf(struct sk_buff *skb, 899 struct seg6_local_lwt *slwt) 900{ 901 struct seg6_bpf_srh_state *srh_state = 902 this_cpu_ptr(&seg6_bpf_srh_states); 903 struct ipv6_sr_hdr *srh; 904 int ret; 905 906 srh = get_and_validate_srh(skb); 907 if (!srh) { 908 kfree_skb(skb); 909 return -EINVAL; 910 } 911 advance_nextseg(srh, &ipv6_hdr(skb)->daddr); 912 913 /* preempt_disable is needed to protect the per-CPU buffer srh_state, 914 * which is also accessed by the bpf_lwt_seg6_* helpers 915 */ 916 preempt_disable(); 917 srh_state->srh = srh; 918 srh_state->hdrlen = srh->hdrlen << 3; 919 srh_state->valid = true; 920 921 rcu_read_lock(); 922 bpf_compute_data_pointers(skb); 923 ret = bpf_prog_run_save_cb(slwt->bpf.prog, skb); 924 rcu_read_unlock(); 925 926 switch (ret) { 927 case BPF_OK: 928 case BPF_REDIRECT: 929 break; 930 case BPF_DROP: 931 goto drop; 932 default: 933 pr_warn_once("bpf-seg6local: Illegal return value %u\n", ret); 934 goto drop; 935 } 936 937 if (srh_state->srh && !seg6_bpf_has_valid_srh(skb)) 938 goto drop; 939 940 preempt_enable(); 941 if (ret != BPF_REDIRECT) 942 seg6_lookup_nexthop(skb, NULL, 0); 943 944 return dst_input(skb); 945 946drop: 947 preempt_enable(); 948 kfree_skb(skb); 949 return -EINVAL; 950} 951 952static struct seg6_action_desc seg6_action_table[] = { 953 { 954 .action = SEG6_LOCAL_ACTION_END, 955 .attrs = 0, 956 .optattrs = SEG6_F_LOCAL_COUNTERS, 957 .input = input_action_end, 958 }, 959 { 960 .action = SEG6_LOCAL_ACTION_END_X, 961 .attrs = SEG6_F_ATTR(SEG6_LOCAL_NH6), 962 .optattrs = SEG6_F_LOCAL_COUNTERS, 963 .input = input_action_end_x, 964 }, 965 { 966 .action = SEG6_LOCAL_ACTION_END_T, 967 .attrs = SEG6_F_ATTR(SEG6_LOCAL_TABLE), 968 .optattrs = SEG6_F_LOCAL_COUNTERS, 969 .input = input_action_end_t, 970 }, 971 { 972 .action = SEG6_LOCAL_ACTION_END_DX2, 973 .attrs = SEG6_F_ATTR(SEG6_LOCAL_OIF), 974 .optattrs = SEG6_F_LOCAL_COUNTERS, 975 .input = input_action_end_dx2, 976 }, 977 { 978 .action = SEG6_LOCAL_ACTION_END_DX6, 979 .attrs = SEG6_F_ATTR(SEG6_LOCAL_NH6), 980 .optattrs = SEG6_F_LOCAL_COUNTERS, 981 .input = input_action_end_dx6, 982 }, 983 { 984 .action = SEG6_LOCAL_ACTION_END_DX4, 985 .attrs = SEG6_F_ATTR(SEG6_LOCAL_NH4), 986 .optattrs = SEG6_F_LOCAL_COUNTERS, 987 .input = input_action_end_dx4, 988 }, 989 { 990 .action = SEG6_LOCAL_ACTION_END_DT4, 991 .attrs = SEG6_F_ATTR(SEG6_LOCAL_VRFTABLE), 992 .optattrs = SEG6_F_LOCAL_COUNTERS, 993#ifdef CONFIG_NET_L3_MASTER_DEV 994 .input = input_action_end_dt4, 995 .slwt_ops = { 996 .build_state = seg6_end_dt4_build, 997 }, 998#endif 999 }, 1000 { 1001 .action = SEG6_LOCAL_ACTION_END_DT6, 1002#ifdef CONFIG_NET_L3_MASTER_DEV 1003 .attrs = 0, 1004 .optattrs = SEG6_F_LOCAL_COUNTERS | 1005 SEG6_F_ATTR(SEG6_LOCAL_TABLE) | 1006 SEG6_F_ATTR(SEG6_LOCAL_VRFTABLE), 1007 .slwt_ops = { 1008 .build_state = seg6_end_dt6_build, 1009 }, 1010#else 1011 .attrs = SEG6_F_ATTR(SEG6_LOCAL_TABLE), 1012 .optattrs = SEG6_F_LOCAL_COUNTERS, 1013#endif 1014 .input = input_action_end_dt6, 1015 }, 1016 { 1017 .action = SEG6_LOCAL_ACTION_END_DT46, 1018 .attrs = SEG6_F_ATTR(SEG6_LOCAL_VRFTABLE), 1019 .optattrs = SEG6_F_LOCAL_COUNTERS, 1020#ifdef CONFIG_NET_L3_MASTER_DEV 1021 .input = input_action_end_dt46, 1022 .slwt_ops = { 1023 .build_state = seg6_end_dt46_build, 1024 }, 1025#endif 1026 }, 1027 { 1028 .action = SEG6_LOCAL_ACTION_END_B6, 1029 .attrs = SEG6_F_ATTR(SEG6_LOCAL_SRH), 1030 .optattrs = SEG6_F_LOCAL_COUNTERS, 1031 .input = input_action_end_b6, 1032 }, 1033 { 1034 .action = SEG6_LOCAL_ACTION_END_B6_ENCAP, 1035 .attrs = SEG6_F_ATTR(SEG6_LOCAL_SRH), 1036 .optattrs = SEG6_F_LOCAL_COUNTERS, 1037 .input = input_action_end_b6_encap, 1038 .static_headroom = sizeof(struct ipv6hdr), 1039 }, 1040 { 1041 .action = SEG6_LOCAL_ACTION_END_BPF, 1042 .attrs = SEG6_F_ATTR(SEG6_LOCAL_BPF), 1043 .optattrs = SEG6_F_LOCAL_COUNTERS, 1044 .input = input_action_end_bpf, 1045 }, 1046 1047}; 1048 1049static struct seg6_action_desc *__get_action_desc(int action) 1050{ 1051 struct seg6_action_desc *desc; 1052 int i, count; 1053 1054 count = ARRAY_SIZE(seg6_action_table); 1055 for (i = 0; i < count; i++) { 1056 desc = &seg6_action_table[i]; 1057 if (desc->action == action) 1058 return desc; 1059 } 1060 1061 return NULL; 1062} 1063 1064static bool seg6_lwtunnel_counters_enabled(struct seg6_local_lwt *slwt) 1065{ 1066 return slwt->parsed_optattrs & SEG6_F_LOCAL_COUNTERS; 1067} 1068 1069static void seg6_local_update_counters(struct seg6_local_lwt *slwt, 1070 unsigned int len, int err) 1071{ 1072 struct pcpu_seg6_local_counters *pcounters; 1073 1074 pcounters = this_cpu_ptr(slwt->pcpu_counters); 1075 u64_stats_update_begin(&pcounters->syncp); 1076 1077 if (likely(!err)) { 1078 u64_stats_inc(&pcounters->packets); 1079 u64_stats_add(&pcounters->bytes, len); 1080 } else { 1081 u64_stats_inc(&pcounters->errors); 1082 } 1083 1084 u64_stats_update_end(&pcounters->syncp); 1085} 1086 1087static int seg6_local_input_core(struct net *net, struct sock *sk, 1088 struct sk_buff *skb) 1089{ 1090 struct dst_entry *orig_dst = skb_dst(skb); 1091 struct seg6_action_desc *desc; 1092 struct seg6_local_lwt *slwt; 1093 unsigned int len = skb->len; 1094 int rc; 1095 1096 slwt = seg6_local_lwtunnel(orig_dst->lwtstate); 1097 desc = slwt->desc; 1098 1099 rc = desc->input(skb, slwt); 1100 1101 if (!seg6_lwtunnel_counters_enabled(slwt)) 1102 return rc; 1103 1104 seg6_local_update_counters(slwt, len, rc); 1105 1106 return rc; 1107} 1108 1109static int seg6_local_input(struct sk_buff *skb) 1110{ 1111 if (skb->protocol != htons(ETH_P_IPV6)) { 1112 kfree_skb(skb); 1113 return -EINVAL; 1114 } 1115 1116 if (static_branch_unlikely(&nf_hooks_lwtunnel_enabled)) 1117 return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_IN, 1118 dev_net(skb->dev), NULL, skb, skb->dev, NULL, 1119 seg6_local_input_core); 1120 1121 return seg6_local_input_core(dev_net(skb->dev), NULL, skb); 1122} 1123 1124static const struct nla_policy seg6_local_policy[SEG6_LOCAL_MAX + 1] = { 1125 [SEG6_LOCAL_ACTION] = { .type = NLA_U32 }, 1126 [SEG6_LOCAL_SRH] = { .type = NLA_BINARY }, 1127 [SEG6_LOCAL_TABLE] = { .type = NLA_U32 }, 1128 [SEG6_LOCAL_VRFTABLE] = { .type = NLA_U32 }, 1129 [SEG6_LOCAL_NH4] = { .type = NLA_BINARY, 1130 .len = sizeof(struct in_addr) }, 1131 [SEG6_LOCAL_NH6] = { .type = NLA_BINARY, 1132 .len = sizeof(struct in6_addr) }, 1133 [SEG6_LOCAL_IIF] = { .type = NLA_U32 }, 1134 [SEG6_LOCAL_OIF] = { .type = NLA_U32 }, 1135 [SEG6_LOCAL_BPF] = { .type = NLA_NESTED }, 1136 [SEG6_LOCAL_COUNTERS] = { .type = NLA_NESTED }, 1137}; 1138 1139static int parse_nla_srh(struct nlattr **attrs, struct seg6_local_lwt *slwt) 1140{ 1141 struct ipv6_sr_hdr *srh; 1142 int len; 1143 1144 srh = nla_data(attrs[SEG6_LOCAL_SRH]); 1145 len = nla_len(attrs[SEG6_LOCAL_SRH]); 1146 1147 /* SRH must contain at least one segment */ 1148 if (len < sizeof(*srh) + sizeof(struct in6_addr)) 1149 return -EINVAL; 1150 1151 if (!seg6_validate_srh(srh, len, false)) 1152 return -EINVAL; 1153 1154 slwt->srh = kmemdup(srh, len, GFP_KERNEL); 1155 if (!slwt->srh) 1156 return -ENOMEM; 1157 1158 slwt->headroom += len; 1159 1160 return 0; 1161} 1162 1163static int put_nla_srh(struct sk_buff *skb, struct seg6_local_lwt *slwt) 1164{ 1165 struct ipv6_sr_hdr *srh; 1166 struct nlattr *nla; 1167 int len; 1168 1169 srh = slwt->srh; 1170 len = (srh->hdrlen + 1) << 3; 1171 1172 nla = nla_reserve(skb, SEG6_LOCAL_SRH, len); 1173 if (!nla) 1174 return -EMSGSIZE; 1175 1176 memcpy(nla_data(nla), srh, len); 1177 1178 return 0; 1179} 1180 1181static int cmp_nla_srh(struct seg6_local_lwt *a, struct seg6_local_lwt *b) 1182{ 1183 int len = (a->srh->hdrlen + 1) << 3; 1184 1185 if (len != ((b->srh->hdrlen + 1) << 3)) 1186 return 1; 1187 1188 return memcmp(a->srh, b->srh, len); 1189} 1190 1191static void destroy_attr_srh(struct seg6_local_lwt *slwt) 1192{ 1193 kfree(slwt->srh); 1194} 1195 1196static int parse_nla_table(struct nlattr **attrs, struct seg6_local_lwt *slwt) 1197{ 1198 slwt->table = nla_get_u32(attrs[SEG6_LOCAL_TABLE]); 1199 1200 return 0; 1201} 1202 1203static int put_nla_table(struct sk_buff *skb, struct seg6_local_lwt *slwt) 1204{ 1205 if (nla_put_u32(skb, SEG6_LOCAL_TABLE, slwt->table)) 1206 return -EMSGSIZE; 1207 1208 return 0; 1209} 1210 1211static int cmp_nla_table(struct seg6_local_lwt *a, struct seg6_local_lwt *b) 1212{ 1213 if (a->table != b->table) 1214 return 1; 1215 1216 return 0; 1217} 1218 1219static struct 1220seg6_end_dt_info *seg6_possible_end_dt_info(struct seg6_local_lwt *slwt) 1221{ 1222#ifdef CONFIG_NET_L3_MASTER_DEV 1223 return &slwt->dt_info; 1224#else 1225 return ERR_PTR(-EOPNOTSUPP); 1226#endif 1227} 1228 1229static int parse_nla_vrftable(struct nlattr **attrs, 1230 struct seg6_local_lwt *slwt) 1231{ 1232 struct seg6_end_dt_info *info = seg6_possible_end_dt_info(slwt); 1233 1234 if (IS_ERR(info)) 1235 return PTR_ERR(info); 1236 1237 info->vrf_table = nla_get_u32(attrs[SEG6_LOCAL_VRFTABLE]); 1238 1239 return 0; 1240} 1241 1242static int put_nla_vrftable(struct sk_buff *skb, struct seg6_local_lwt *slwt) 1243{ 1244 struct seg6_end_dt_info *info = seg6_possible_end_dt_info(slwt); 1245 1246 if (IS_ERR(info)) 1247 return PTR_ERR(info); 1248 1249 if (nla_put_u32(skb, SEG6_LOCAL_VRFTABLE, info->vrf_table)) 1250 return -EMSGSIZE; 1251 1252 return 0; 1253} 1254 1255static int cmp_nla_vrftable(struct seg6_local_lwt *a, struct seg6_local_lwt *b) 1256{ 1257 struct seg6_end_dt_info *info_a = seg6_possible_end_dt_info(a); 1258 struct seg6_end_dt_info *info_b = seg6_possible_end_dt_info(b); 1259 1260 if (info_a->vrf_table != info_b->vrf_table) 1261 return 1; 1262 1263 return 0; 1264} 1265 1266static int parse_nla_nh4(struct nlattr **attrs, struct seg6_local_lwt *slwt) 1267{ 1268 memcpy(&slwt->nh4, nla_data(attrs[SEG6_LOCAL_NH4]), 1269 sizeof(struct in_addr)); 1270 1271 return 0; 1272} 1273 1274static int put_nla_nh4(struct sk_buff *skb, struct seg6_local_lwt *slwt) 1275{ 1276 struct nlattr *nla; 1277 1278 nla = nla_reserve(skb, SEG6_LOCAL_NH4, sizeof(struct in_addr)); 1279 if (!nla) 1280 return -EMSGSIZE; 1281 1282 memcpy(nla_data(nla), &slwt->nh4, sizeof(struct in_addr)); 1283 1284 return 0; 1285} 1286 1287static int cmp_nla_nh4(struct seg6_local_lwt *a, struct seg6_local_lwt *b) 1288{ 1289 return memcmp(&a->nh4, &b->nh4, sizeof(struct in_addr)); 1290} 1291 1292static int parse_nla_nh6(struct nlattr **attrs, struct seg6_local_lwt *slwt) 1293{ 1294 memcpy(&slwt->nh6, nla_data(attrs[SEG6_LOCAL_NH6]), 1295 sizeof(struct in6_addr)); 1296 1297 return 0; 1298} 1299 1300static int put_nla_nh6(struct sk_buff *skb, struct seg6_local_lwt *slwt) 1301{ 1302 struct nlattr *nla; 1303 1304 nla = nla_reserve(skb, SEG6_LOCAL_NH6, sizeof(struct in6_addr)); 1305 if (!nla) 1306 return -EMSGSIZE; 1307 1308 memcpy(nla_data(nla), &slwt->nh6, sizeof(struct in6_addr)); 1309 1310 return 0; 1311} 1312 1313static int cmp_nla_nh6(struct seg6_local_lwt *a, struct seg6_local_lwt *b) 1314{ 1315 return memcmp(&a->nh6, &b->nh6, sizeof(struct in6_addr)); 1316} 1317 1318static int parse_nla_iif(struct nlattr **attrs, struct seg6_local_lwt *slwt) 1319{ 1320 slwt->iif = nla_get_u32(attrs[SEG6_LOCAL_IIF]); 1321 1322 return 0; 1323} 1324 1325static int put_nla_iif(struct sk_buff *skb, struct seg6_local_lwt *slwt) 1326{ 1327 if (nla_put_u32(skb, SEG6_LOCAL_IIF, slwt->iif)) 1328 return -EMSGSIZE; 1329 1330 return 0; 1331} 1332 1333static int cmp_nla_iif(struct seg6_local_lwt *a, struct seg6_local_lwt *b) 1334{ 1335 if (a->iif != b->iif) 1336 return 1; 1337 1338 return 0; 1339} 1340 1341static int parse_nla_oif(struct nlattr **attrs, struct seg6_local_lwt *slwt) 1342{ 1343 slwt->oif = nla_get_u32(attrs[SEG6_LOCAL_OIF]); 1344 1345 return 0; 1346} 1347 1348static int put_nla_oif(struct sk_buff *skb, struct seg6_local_lwt *slwt) 1349{ 1350 if (nla_put_u32(skb, SEG6_LOCAL_OIF, slwt->oif)) 1351 return -EMSGSIZE; 1352 1353 return 0; 1354} 1355 1356static int cmp_nla_oif(struct seg6_local_lwt *a, struct seg6_local_lwt *b) 1357{ 1358 if (a->oif != b->oif) 1359 return 1; 1360 1361 return 0; 1362} 1363 1364#define MAX_PROG_NAME 256 1365static const struct nla_policy bpf_prog_policy[SEG6_LOCAL_BPF_PROG_MAX + 1] = { 1366 [SEG6_LOCAL_BPF_PROG] = { .type = NLA_U32, }, 1367 [SEG6_LOCAL_BPF_PROG_NAME] = { .type = NLA_NUL_STRING, 1368 .len = MAX_PROG_NAME }, 1369}; 1370 1371static int parse_nla_bpf(struct nlattr **attrs, struct seg6_local_lwt *slwt) 1372{ 1373 struct nlattr *tb[SEG6_LOCAL_BPF_PROG_MAX + 1]; 1374 struct bpf_prog *p; 1375 int ret; 1376 u32 fd; 1377 1378 ret = nla_parse_nested_deprecated(tb, SEG6_LOCAL_BPF_PROG_MAX, 1379 attrs[SEG6_LOCAL_BPF], 1380 bpf_prog_policy, NULL); 1381 if (ret < 0) 1382 return ret; 1383 1384 if (!tb[SEG6_LOCAL_BPF_PROG] || !tb[SEG6_LOCAL_BPF_PROG_NAME]) 1385 return -EINVAL; 1386 1387 slwt->bpf.name = nla_memdup(tb[SEG6_LOCAL_BPF_PROG_NAME], GFP_KERNEL); 1388 if (!slwt->bpf.name) 1389 return -ENOMEM; 1390 1391 fd = nla_get_u32(tb[SEG6_LOCAL_BPF_PROG]); 1392 p = bpf_prog_get_type(fd, BPF_PROG_TYPE_LWT_SEG6LOCAL); 1393 if (IS_ERR(p)) { 1394 kfree(slwt->bpf.name); 1395 return PTR_ERR(p); 1396 } 1397 1398 slwt->bpf.prog = p; 1399 return 0; 1400} 1401 1402static int put_nla_bpf(struct sk_buff *skb, struct seg6_local_lwt *slwt) 1403{ 1404 struct nlattr *nest; 1405 1406 if (!slwt->bpf.prog) 1407 return 0; 1408 1409 nest = nla_nest_start_noflag(skb, SEG6_LOCAL_BPF); 1410 if (!nest) 1411 return -EMSGSIZE; 1412 1413 if (nla_put_u32(skb, SEG6_LOCAL_BPF_PROG, slwt->bpf.prog->aux->id)) 1414 return -EMSGSIZE; 1415 1416 if (slwt->bpf.name && 1417 nla_put_string(skb, SEG6_LOCAL_BPF_PROG_NAME, slwt->bpf.name)) 1418 return -EMSGSIZE; 1419 1420 return nla_nest_end(skb, nest); 1421} 1422 1423static int cmp_nla_bpf(struct seg6_local_lwt *a, struct seg6_local_lwt *b) 1424{ 1425 if (!a->bpf.name && !b->bpf.name) 1426 return 0; 1427 1428 if (!a->bpf.name || !b->bpf.name) 1429 return 1; 1430 1431 return strcmp(a->bpf.name, b->bpf.name); 1432} 1433 1434static void destroy_attr_bpf(struct seg6_local_lwt *slwt) 1435{ 1436 kfree(slwt->bpf.name); 1437 if (slwt->bpf.prog) 1438 bpf_prog_put(slwt->bpf.prog); 1439} 1440 1441static const struct 1442nla_policy seg6_local_counters_policy[SEG6_LOCAL_CNT_MAX + 1] = { 1443 [SEG6_LOCAL_CNT_PACKETS] = { .type = NLA_U64 }, 1444 [SEG6_LOCAL_CNT_BYTES] = { .type = NLA_U64 }, 1445 [SEG6_LOCAL_CNT_ERRORS] = { .type = NLA_U64 }, 1446}; 1447 1448static int parse_nla_counters(struct nlattr **attrs, 1449 struct seg6_local_lwt *slwt) 1450{ 1451 struct pcpu_seg6_local_counters __percpu *pcounters; 1452 struct nlattr *tb[SEG6_LOCAL_CNT_MAX + 1]; 1453 int ret; 1454 1455 ret = nla_parse_nested_deprecated(tb, SEG6_LOCAL_CNT_MAX, 1456 attrs[SEG6_LOCAL_COUNTERS], 1457 seg6_local_counters_policy, NULL); 1458 if (ret < 0) 1459 return ret; 1460 1461 /* basic support for SRv6 Behavior counters requires at least: 1462 * packets, bytes and errors. 1463 */ 1464 if (!tb[SEG6_LOCAL_CNT_PACKETS] || !tb[SEG6_LOCAL_CNT_BYTES] || 1465 !tb[SEG6_LOCAL_CNT_ERRORS]) 1466 return -EINVAL; 1467 1468 /* counters are always zero initialized */ 1469 pcounters = seg6_local_alloc_pcpu_counters(GFP_KERNEL); 1470 if (!pcounters) 1471 return -ENOMEM; 1472 1473 slwt->pcpu_counters = pcounters; 1474 1475 return 0; 1476} 1477 1478static int seg6_local_fill_nla_counters(struct sk_buff *skb, 1479 struct seg6_local_counters *counters) 1480{ 1481 if (nla_put_u64_64bit(skb, SEG6_LOCAL_CNT_PACKETS, counters->packets, 1482 SEG6_LOCAL_CNT_PAD)) 1483 return -EMSGSIZE; 1484 1485 if (nla_put_u64_64bit(skb, SEG6_LOCAL_CNT_BYTES, counters->bytes, 1486 SEG6_LOCAL_CNT_PAD)) 1487 return -EMSGSIZE; 1488 1489 if (nla_put_u64_64bit(skb, SEG6_LOCAL_CNT_ERRORS, counters->errors, 1490 SEG6_LOCAL_CNT_PAD)) 1491 return -EMSGSIZE; 1492 1493 return 0; 1494} 1495 1496static int put_nla_counters(struct sk_buff *skb, struct seg6_local_lwt *slwt) 1497{ 1498 struct seg6_local_counters counters = { 0, 0, 0 }; 1499 struct nlattr *nest; 1500 int rc, i; 1501 1502 nest = nla_nest_start(skb, SEG6_LOCAL_COUNTERS); 1503 if (!nest) 1504 return -EMSGSIZE; 1505 1506 for_each_possible_cpu(i) { 1507 struct pcpu_seg6_local_counters *pcounters; 1508 u64 packets, bytes, errors; 1509 unsigned int start; 1510 1511 pcounters = per_cpu_ptr(slwt->pcpu_counters, i); 1512 do { 1513 start = u64_stats_fetch_begin_irq(&pcounters->syncp); 1514 1515 packets = u64_stats_read(&pcounters->packets); 1516 bytes = u64_stats_read(&pcounters->bytes); 1517 errors = u64_stats_read(&pcounters->errors); 1518 1519 } while (u64_stats_fetch_retry_irq(&pcounters->syncp, start)); 1520 1521 counters.packets += packets; 1522 counters.bytes += bytes; 1523 counters.errors += errors; 1524 } 1525 1526 rc = seg6_local_fill_nla_counters(skb, &counters); 1527 if (rc < 0) { 1528 nla_nest_cancel(skb, nest); 1529 return rc; 1530 } 1531 1532 return nla_nest_end(skb, nest); 1533} 1534 1535static int cmp_nla_counters(struct seg6_local_lwt *a, struct seg6_local_lwt *b) 1536{ 1537 /* a and b are equal if both have pcpu_counters set or not */ 1538 return (!!((unsigned long)a->pcpu_counters)) ^ 1539 (!!((unsigned long)b->pcpu_counters)); 1540} 1541 1542static void destroy_attr_counters(struct seg6_local_lwt *slwt) 1543{ 1544 free_percpu(slwt->pcpu_counters); 1545} 1546 1547struct seg6_action_param { 1548 int (*parse)(struct nlattr **attrs, struct seg6_local_lwt *slwt); 1549 int (*put)(struct sk_buff *skb, struct seg6_local_lwt *slwt); 1550 int (*cmp)(struct seg6_local_lwt *a, struct seg6_local_lwt *b); 1551 1552 /* optional destroy() callback useful for releasing resources which 1553 * have been previously acquired in the corresponding parse() 1554 * function. 1555 */ 1556 void (*destroy)(struct seg6_local_lwt *slwt); 1557}; 1558 1559static struct seg6_action_param seg6_action_params[SEG6_LOCAL_MAX + 1] = { 1560 [SEG6_LOCAL_SRH] = { .parse = parse_nla_srh, 1561 .put = put_nla_srh, 1562 .cmp = cmp_nla_srh, 1563 .destroy = destroy_attr_srh }, 1564 1565 [SEG6_LOCAL_TABLE] = { .parse = parse_nla_table, 1566 .put = put_nla_table, 1567 .cmp = cmp_nla_table }, 1568 1569 [SEG6_LOCAL_NH4] = { .parse = parse_nla_nh4, 1570 .put = put_nla_nh4, 1571 .cmp = cmp_nla_nh4 }, 1572 1573 [SEG6_LOCAL_NH6] = { .parse = parse_nla_nh6, 1574 .put = put_nla_nh6, 1575 .cmp = cmp_nla_nh6 }, 1576 1577 [SEG6_LOCAL_IIF] = { .parse = parse_nla_iif, 1578 .put = put_nla_iif, 1579 .cmp = cmp_nla_iif }, 1580 1581 [SEG6_LOCAL_OIF] = { .parse = parse_nla_oif, 1582 .put = put_nla_oif, 1583 .cmp = cmp_nla_oif }, 1584 1585 [SEG6_LOCAL_BPF] = { .parse = parse_nla_bpf, 1586 .put = put_nla_bpf, 1587 .cmp = cmp_nla_bpf, 1588 .destroy = destroy_attr_bpf }, 1589 1590 [SEG6_LOCAL_VRFTABLE] = { .parse = parse_nla_vrftable, 1591 .put = put_nla_vrftable, 1592 .cmp = cmp_nla_vrftable }, 1593 1594 [SEG6_LOCAL_COUNTERS] = { .parse = parse_nla_counters, 1595 .put = put_nla_counters, 1596 .cmp = cmp_nla_counters, 1597 .destroy = destroy_attr_counters }, 1598}; 1599 1600/* call the destroy() callback (if available) for each set attribute in 1601 * @parsed_attrs, starting from the first attribute up to the @max_parsed 1602 * (excluded) attribute. 1603 */ 1604static void __destroy_attrs(unsigned long parsed_attrs, int max_parsed, 1605 struct seg6_local_lwt *slwt) 1606{ 1607 struct seg6_action_param *param; 1608 int i; 1609 1610 /* Every required seg6local attribute is identified by an ID which is 1611 * encoded as a flag (i.e: 1 << ID) in the 'attrs' bitmask; 1612 * 1613 * We scan the 'parsed_attrs' bitmask, starting from the first attribute 1614 * up to the @max_parsed (excluded) attribute. 1615 * For each set attribute, we retrieve the corresponding destroy() 1616 * callback. If the callback is not available, then we skip to the next 1617 * attribute; otherwise, we call the destroy() callback. 1618 */ 1619 for (i = 0; i < max_parsed; ++i) { 1620 if (!(parsed_attrs & SEG6_F_ATTR(i))) 1621 continue; 1622 1623 param = &seg6_action_params[i]; 1624 1625 if (param->destroy) 1626 param->destroy(slwt); 1627 } 1628} 1629 1630/* release all the resources that may have been acquired during parsing 1631 * operations. 1632 */ 1633static void destroy_attrs(struct seg6_local_lwt *slwt) 1634{ 1635 unsigned long attrs = slwt->desc->attrs | slwt->parsed_optattrs; 1636 1637 __destroy_attrs(attrs, SEG6_LOCAL_MAX + 1, slwt); 1638} 1639 1640static int parse_nla_optional_attrs(struct nlattr **attrs, 1641 struct seg6_local_lwt *slwt) 1642{ 1643 struct seg6_action_desc *desc = slwt->desc; 1644 unsigned long parsed_optattrs = 0; 1645 struct seg6_action_param *param; 1646 int err, i; 1647 1648 for (i = 0; i < SEG6_LOCAL_MAX + 1; ++i) { 1649 if (!(desc->optattrs & SEG6_F_ATTR(i)) || !attrs[i]) 1650 continue; 1651 1652 /* once here, the i-th attribute is provided by the 1653 * userspace AND it is identified optional as well. 1654 */ 1655 param = &seg6_action_params[i]; 1656 1657 err = param->parse(attrs, slwt); 1658 if (err < 0) 1659 goto parse_optattrs_err; 1660 1661 /* current attribute has been correctly parsed */ 1662 parsed_optattrs |= SEG6_F_ATTR(i); 1663 } 1664 1665 /* store in the tunnel state all the optional attributed successfully 1666 * parsed. 1667 */ 1668 slwt->parsed_optattrs = parsed_optattrs; 1669 1670 return 0; 1671 1672parse_optattrs_err: 1673 __destroy_attrs(parsed_optattrs, i, slwt); 1674 1675 return err; 1676} 1677 1678/* call the custom constructor of the behavior during its initialization phase 1679 * and after that all its attributes have been parsed successfully. 1680 */ 1681static int 1682seg6_local_lwtunnel_build_state(struct seg6_local_lwt *slwt, const void *cfg, 1683 struct netlink_ext_ack *extack) 1684{ 1685 struct seg6_action_desc *desc = slwt->desc; 1686 struct seg6_local_lwtunnel_ops *ops; 1687 1688 ops = &desc->slwt_ops; 1689 if (!ops->build_state) 1690 return 0; 1691 1692 return ops->build_state(slwt, cfg, extack); 1693} 1694 1695/* call the custom destructor of the behavior which is invoked before the 1696 * tunnel is going to be destroyed. 1697 */ 1698static void seg6_local_lwtunnel_destroy_state(struct seg6_local_lwt *slwt) 1699{ 1700 struct seg6_action_desc *desc = slwt->desc; 1701 struct seg6_local_lwtunnel_ops *ops; 1702 1703 ops = &desc->slwt_ops; 1704 if (!ops->destroy_state) 1705 return; 1706 1707 ops->destroy_state(slwt); 1708} 1709 1710static int parse_nla_action(struct nlattr **attrs, struct seg6_local_lwt *slwt) 1711{ 1712 struct seg6_action_param *param; 1713 struct seg6_action_desc *desc; 1714 unsigned long invalid_attrs; 1715 int i, err; 1716 1717 desc = __get_action_desc(slwt->action); 1718 if (!desc) 1719 return -EINVAL; 1720 1721 if (!desc->input) 1722 return -EOPNOTSUPP; 1723 1724 slwt->desc = desc; 1725 slwt->headroom += desc->static_headroom; 1726 1727 /* Forcing the desc->optattrs *set* and the desc->attrs *set* to be 1728 * disjoined, this allow us to release acquired resources by optional 1729 * attributes and by required attributes independently from each other 1730 * without any interference. 1731 * In other terms, we are sure that we do not release some the acquired 1732 * resources twice. 1733 * 1734 * Note that if an attribute is configured both as required and as 1735 * optional, it means that the user has messed something up in the 1736 * seg6_action_table. Therefore, this check is required for SRv6 1737 * behaviors to work properly. 1738 */ 1739 invalid_attrs = desc->attrs & desc->optattrs; 1740 if (invalid_attrs) { 1741 WARN_ONCE(1, 1742 "An attribute cannot be both required AND optional"); 1743 return -EINVAL; 1744 } 1745 1746 /* parse the required attributes */ 1747 for (i = 0; i < SEG6_LOCAL_MAX + 1; i++) { 1748 if (desc->attrs & SEG6_F_ATTR(i)) { 1749 if (!attrs[i]) 1750 return -EINVAL; 1751 1752 param = &seg6_action_params[i]; 1753 1754 err = param->parse(attrs, slwt); 1755 if (err < 0) 1756 goto parse_attrs_err; 1757 } 1758 } 1759 1760 /* parse the optional attributes, if any */ 1761 err = parse_nla_optional_attrs(attrs, slwt); 1762 if (err < 0) 1763 goto parse_attrs_err; 1764 1765 return 0; 1766 1767parse_attrs_err: 1768 /* release any resource that may have been acquired during the i-1 1769 * parse() operations. 1770 */ 1771 __destroy_attrs(desc->attrs, i, slwt); 1772 1773 return err; 1774} 1775 1776static int seg6_local_build_state(struct net *net, struct nlattr *nla, 1777 unsigned int family, const void *cfg, 1778 struct lwtunnel_state **ts, 1779 struct netlink_ext_ack *extack) 1780{ 1781 struct nlattr *tb[SEG6_LOCAL_MAX + 1]; 1782 struct lwtunnel_state *newts; 1783 struct seg6_local_lwt *slwt; 1784 int err; 1785 1786 if (family != AF_INET6) 1787 return -EINVAL; 1788 1789 err = nla_parse_nested_deprecated(tb, SEG6_LOCAL_MAX, nla, 1790 seg6_local_policy, extack); 1791 1792 if (err < 0) 1793 return err; 1794 1795 if (!tb[SEG6_LOCAL_ACTION]) 1796 return -EINVAL; 1797 1798 newts = lwtunnel_state_alloc(sizeof(*slwt)); 1799 if (!newts) 1800 return -ENOMEM; 1801 1802 slwt = seg6_local_lwtunnel(newts); 1803 slwt->action = nla_get_u32(tb[SEG6_LOCAL_ACTION]); 1804 1805 err = parse_nla_action(tb, slwt); 1806 if (err < 0) 1807 goto out_free; 1808 1809 err = seg6_local_lwtunnel_build_state(slwt, cfg, extack); 1810 if (err < 0) 1811 goto out_destroy_attrs; 1812 1813 newts->type = LWTUNNEL_ENCAP_SEG6_LOCAL; 1814 newts->flags = LWTUNNEL_STATE_INPUT_REDIRECT; 1815 newts->headroom = slwt->headroom; 1816 1817 *ts = newts; 1818 1819 return 0; 1820 1821out_destroy_attrs: 1822 destroy_attrs(slwt); 1823out_free: 1824 kfree(newts); 1825 return err; 1826} 1827 1828static void seg6_local_destroy_state(struct lwtunnel_state *lwt) 1829{ 1830 struct seg6_local_lwt *slwt = seg6_local_lwtunnel(lwt); 1831 1832 seg6_local_lwtunnel_destroy_state(slwt); 1833 1834 destroy_attrs(slwt); 1835 1836 return; 1837} 1838 1839static int seg6_local_fill_encap(struct sk_buff *skb, 1840 struct lwtunnel_state *lwt) 1841{ 1842 struct seg6_local_lwt *slwt = seg6_local_lwtunnel(lwt); 1843 struct seg6_action_param *param; 1844 unsigned long attrs; 1845 int i, err; 1846 1847 if (nla_put_u32(skb, SEG6_LOCAL_ACTION, slwt->action)) 1848 return -EMSGSIZE; 1849 1850 attrs = slwt->desc->attrs | slwt->parsed_optattrs; 1851 1852 for (i = 0; i < SEG6_LOCAL_MAX + 1; i++) { 1853 if (attrs & SEG6_F_ATTR(i)) { 1854 param = &seg6_action_params[i]; 1855 err = param->put(skb, slwt); 1856 if (err < 0) 1857 return err; 1858 } 1859 } 1860 1861 return 0; 1862} 1863 1864static int seg6_local_get_encap_size(struct lwtunnel_state *lwt) 1865{ 1866 struct seg6_local_lwt *slwt = seg6_local_lwtunnel(lwt); 1867 unsigned long attrs; 1868 int nlsize; 1869 1870 nlsize = nla_total_size(4); /* action */ 1871 1872 attrs = slwt->desc->attrs | slwt->parsed_optattrs; 1873 1874 if (attrs & SEG6_F_ATTR(SEG6_LOCAL_SRH)) 1875 nlsize += nla_total_size((slwt->srh->hdrlen + 1) << 3); 1876 1877 if (attrs & SEG6_F_ATTR(SEG6_LOCAL_TABLE)) 1878 nlsize += nla_total_size(4); 1879 1880 if (attrs & SEG6_F_ATTR(SEG6_LOCAL_NH4)) 1881 nlsize += nla_total_size(4); 1882 1883 if (attrs & SEG6_F_ATTR(SEG6_LOCAL_NH6)) 1884 nlsize += nla_total_size(16); 1885 1886 if (attrs & SEG6_F_ATTR(SEG6_LOCAL_IIF)) 1887 nlsize += nla_total_size(4); 1888 1889 if (attrs & SEG6_F_ATTR(SEG6_LOCAL_OIF)) 1890 nlsize += nla_total_size(4); 1891 1892 if (attrs & SEG6_F_ATTR(SEG6_LOCAL_BPF)) 1893 nlsize += nla_total_size(sizeof(struct nlattr)) + 1894 nla_total_size(MAX_PROG_NAME) + 1895 nla_total_size(4); 1896 1897 if (attrs & SEG6_F_ATTR(SEG6_LOCAL_VRFTABLE)) 1898 nlsize += nla_total_size(4); 1899 1900 if (attrs & SEG6_F_LOCAL_COUNTERS) 1901 nlsize += nla_total_size(0) + /* nest SEG6_LOCAL_COUNTERS */ 1902 /* SEG6_LOCAL_CNT_PACKETS */ 1903 nla_total_size_64bit(sizeof(__u64)) + 1904 /* SEG6_LOCAL_CNT_BYTES */ 1905 nla_total_size_64bit(sizeof(__u64)) + 1906 /* SEG6_LOCAL_CNT_ERRORS */ 1907 nla_total_size_64bit(sizeof(__u64)); 1908 1909 return nlsize; 1910} 1911 1912static int seg6_local_cmp_encap(struct lwtunnel_state *a, 1913 struct lwtunnel_state *b) 1914{ 1915 struct seg6_local_lwt *slwt_a, *slwt_b; 1916 struct seg6_action_param *param; 1917 unsigned long attrs_a, attrs_b; 1918 int i; 1919 1920 slwt_a = seg6_local_lwtunnel(a); 1921 slwt_b = seg6_local_lwtunnel(b); 1922 1923 if (slwt_a->action != slwt_b->action) 1924 return 1; 1925 1926 attrs_a = slwt_a->desc->attrs | slwt_a->parsed_optattrs; 1927 attrs_b = slwt_b->desc->attrs | slwt_b->parsed_optattrs; 1928 1929 if (attrs_a != attrs_b) 1930 return 1; 1931 1932 for (i = 0; i < SEG6_LOCAL_MAX + 1; i++) { 1933 if (attrs_a & SEG6_F_ATTR(i)) { 1934 param = &seg6_action_params[i]; 1935 if (param->cmp(slwt_a, slwt_b)) 1936 return 1; 1937 } 1938 } 1939 1940 return 0; 1941} 1942 1943static const struct lwtunnel_encap_ops seg6_local_ops = { 1944 .build_state = seg6_local_build_state, 1945 .destroy_state = seg6_local_destroy_state, 1946 .input = seg6_local_input, 1947 .fill_encap = seg6_local_fill_encap, 1948 .get_encap_size = seg6_local_get_encap_size, 1949 .cmp_encap = seg6_local_cmp_encap, 1950 .owner = THIS_MODULE, 1951}; 1952 1953int __init seg6_local_init(void) 1954{ 1955 /* If the max total number of defined attributes is reached, then your 1956 * kernel build stops here. 1957 * 1958 * This check is required to avoid arithmetic overflows when processing 1959 * behavior attributes and the maximum number of defined attributes 1960 * exceeds the allowed value. 1961 */ 1962 BUILD_BUG_ON(SEG6_LOCAL_MAX + 1 > BITS_PER_TYPE(unsigned long)); 1963 1964 return lwtunnel_encap_add_ops(&seg6_local_ops, 1965 LWTUNNEL_ENCAP_SEG6_LOCAL); 1966} 1967 1968void seg6_local_exit(void) 1969{ 1970 lwtunnel_encap_del_ops(&seg6_local_ops, LWTUNNEL_ENCAP_SEG6_LOCAL); 1971}