bpf_flow.c (11066B)
1// SPDX-License-Identifier: GPL-2.0 2#include <limits.h> 3#include <stddef.h> 4#include <stdbool.h> 5#include <string.h> 6#include <linux/pkt_cls.h> 7#include <linux/bpf.h> 8#include <linux/in.h> 9#include <linux/if_ether.h> 10#include <linux/icmp.h> 11#include <linux/ip.h> 12#include <linux/ipv6.h> 13#include <linux/tcp.h> 14#include <linux/udp.h> 15#include <linux/if_packet.h> 16#include <sys/socket.h> 17#include <linux/if_tunnel.h> 18#include <linux/mpls.h> 19#include <bpf/bpf_helpers.h> 20#include <bpf/bpf_endian.h> 21 22#define PROG(F) PROG_(F, _##F) 23#define PROG_(NUM, NAME) SEC("flow_dissector") int flow_dissector_##NUM 24 25/* These are the identifiers of the BPF programs that will be used in tail 26 * calls. Name is limited to 16 characters, with the terminating character and 27 * bpf_func_ above, we have only 6 to work with, anything after will be cropped. 28 */ 29#define IP 0 30#define IPV6 1 31#define IPV6OP 2 /* Destination/Hop-by-Hop Options IPv6 Ext. Header */ 32#define IPV6FR 3 /* Fragmentation IPv6 Extension Header */ 33#define MPLS 4 34#define VLAN 5 35#define MAX_PROG 6 36 37#define IP_MF 0x2000 38#define IP_OFFSET 0x1FFF 39#define IP6_MF 0x0001 40#define IP6_OFFSET 0xFFF8 41 42struct vlan_hdr { 43 __be16 h_vlan_TCI; 44 __be16 h_vlan_encapsulated_proto; 45}; 46 47struct gre_hdr { 48 __be16 flags; 49 __be16 proto; 50}; 51 52struct frag_hdr { 53 __u8 nexthdr; 54 __u8 reserved; 55 __be16 frag_off; 56 __be32 identification; 57}; 58 59struct { 60 __uint(type, BPF_MAP_TYPE_PROG_ARRAY); 61 __uint(max_entries, MAX_PROG); 62 __uint(key_size, sizeof(__u32)); 63 __uint(value_size, sizeof(__u32)); 64} jmp_table SEC(".maps"); 65 66struct { 67 __uint(type, BPF_MAP_TYPE_HASH); 68 __uint(max_entries, 1024); 69 __type(key, __u32); 70 __type(value, struct bpf_flow_keys); 71} last_dissection SEC(".maps"); 72 73static __always_inline int export_flow_keys(struct bpf_flow_keys *keys, 74 int ret) 75{ 76 __u32 key = (__u32)(keys->sport) << 16 | keys->dport; 77 struct bpf_flow_keys val; 78 79 memcpy(&val, keys, sizeof(val)); 80 bpf_map_update_elem(&last_dissection, &key, &val, BPF_ANY); 81 return ret; 82} 83 84#define IPV6_FLOWLABEL_MASK __bpf_constant_htonl(0x000FFFFF) 85static inline __be32 ip6_flowlabel(const struct ipv6hdr *hdr) 86{ 87 return *(__be32 *)hdr & IPV6_FLOWLABEL_MASK; 88} 89 90static __always_inline void *bpf_flow_dissect_get_header(struct __sk_buff *skb, 91 __u16 hdr_size, 92 void *buffer) 93{ 94 void *data_end = (void *)(long)skb->data_end; 95 void *data = (void *)(long)skb->data; 96 __u16 thoff = skb->flow_keys->thoff; 97 __u8 *hdr; 98 99 /* Verifies this variable offset does not overflow */ 100 if (thoff > (USHRT_MAX - hdr_size)) 101 return NULL; 102 103 hdr = data + thoff; 104 if (hdr + hdr_size <= data_end) 105 return hdr; 106 107 if (bpf_skb_load_bytes(skb, thoff, buffer, hdr_size)) 108 return NULL; 109 110 return buffer; 111} 112 113/* Dispatches on ETHERTYPE */ 114static __always_inline int parse_eth_proto(struct __sk_buff *skb, __be16 proto) 115{ 116 struct bpf_flow_keys *keys = skb->flow_keys; 117 118 switch (proto) { 119 case bpf_htons(ETH_P_IP): 120 bpf_tail_call_static(skb, &jmp_table, IP); 121 break; 122 case bpf_htons(ETH_P_IPV6): 123 bpf_tail_call_static(skb, &jmp_table, IPV6); 124 break; 125 case bpf_htons(ETH_P_MPLS_MC): 126 case bpf_htons(ETH_P_MPLS_UC): 127 bpf_tail_call_static(skb, &jmp_table, MPLS); 128 break; 129 case bpf_htons(ETH_P_8021Q): 130 case bpf_htons(ETH_P_8021AD): 131 bpf_tail_call_static(skb, &jmp_table, VLAN); 132 break; 133 default: 134 /* Protocol not supported */ 135 return export_flow_keys(keys, BPF_DROP); 136 } 137 138 return export_flow_keys(keys, BPF_DROP); 139} 140 141SEC("flow_dissector") 142int _dissect(struct __sk_buff *skb) 143{ 144 struct bpf_flow_keys *keys = skb->flow_keys; 145 146 return parse_eth_proto(skb, keys->n_proto); 147} 148 149/* Parses on IPPROTO_* */ 150static __always_inline int parse_ip_proto(struct __sk_buff *skb, __u8 proto) 151{ 152 struct bpf_flow_keys *keys = skb->flow_keys; 153 void *data_end = (void *)(long)skb->data_end; 154 struct icmphdr *icmp, _icmp; 155 struct gre_hdr *gre, _gre; 156 struct ethhdr *eth, _eth; 157 struct tcphdr *tcp, _tcp; 158 struct udphdr *udp, _udp; 159 160 switch (proto) { 161 case IPPROTO_ICMP: 162 icmp = bpf_flow_dissect_get_header(skb, sizeof(*icmp), &_icmp); 163 if (!icmp) 164 return export_flow_keys(keys, BPF_DROP); 165 return export_flow_keys(keys, BPF_OK); 166 case IPPROTO_IPIP: 167 keys->is_encap = true; 168 if (keys->flags & BPF_FLOW_DISSECTOR_F_STOP_AT_ENCAP) 169 return export_flow_keys(keys, BPF_OK); 170 171 return parse_eth_proto(skb, bpf_htons(ETH_P_IP)); 172 case IPPROTO_IPV6: 173 keys->is_encap = true; 174 if (keys->flags & BPF_FLOW_DISSECTOR_F_STOP_AT_ENCAP) 175 return export_flow_keys(keys, BPF_OK); 176 177 return parse_eth_proto(skb, bpf_htons(ETH_P_IPV6)); 178 case IPPROTO_GRE: 179 gre = bpf_flow_dissect_get_header(skb, sizeof(*gre), &_gre); 180 if (!gre) 181 return export_flow_keys(keys, BPF_DROP); 182 183 if (bpf_htons(gre->flags & GRE_VERSION)) 184 /* Only inspect standard GRE packets with version 0 */ 185 return export_flow_keys(keys, BPF_OK); 186 187 keys->thoff += sizeof(*gre); /* Step over GRE Flags and Proto */ 188 if (GRE_IS_CSUM(gre->flags)) 189 keys->thoff += 4; /* Step over chksum and Padding */ 190 if (GRE_IS_KEY(gre->flags)) 191 keys->thoff += 4; /* Step over key */ 192 if (GRE_IS_SEQ(gre->flags)) 193 keys->thoff += 4; /* Step over sequence number */ 194 195 keys->is_encap = true; 196 if (keys->flags & BPF_FLOW_DISSECTOR_F_STOP_AT_ENCAP) 197 return export_flow_keys(keys, BPF_OK); 198 199 if (gre->proto == bpf_htons(ETH_P_TEB)) { 200 eth = bpf_flow_dissect_get_header(skb, sizeof(*eth), 201 &_eth); 202 if (!eth) 203 return export_flow_keys(keys, BPF_DROP); 204 205 keys->thoff += sizeof(*eth); 206 207 return parse_eth_proto(skb, eth->h_proto); 208 } else { 209 return parse_eth_proto(skb, gre->proto); 210 } 211 case IPPROTO_TCP: 212 tcp = bpf_flow_dissect_get_header(skb, sizeof(*tcp), &_tcp); 213 if (!tcp) 214 return export_flow_keys(keys, BPF_DROP); 215 216 if (tcp->doff < 5) 217 return export_flow_keys(keys, BPF_DROP); 218 219 if ((__u8 *)tcp + (tcp->doff << 2) > data_end) 220 return export_flow_keys(keys, BPF_DROP); 221 222 keys->sport = tcp->source; 223 keys->dport = tcp->dest; 224 return export_flow_keys(keys, BPF_OK); 225 case IPPROTO_UDP: 226 case IPPROTO_UDPLITE: 227 udp = bpf_flow_dissect_get_header(skb, sizeof(*udp), &_udp); 228 if (!udp) 229 return export_flow_keys(keys, BPF_DROP); 230 231 keys->sport = udp->source; 232 keys->dport = udp->dest; 233 return export_flow_keys(keys, BPF_OK); 234 default: 235 return export_flow_keys(keys, BPF_DROP); 236 } 237 238 return export_flow_keys(keys, BPF_DROP); 239} 240 241static __always_inline int parse_ipv6_proto(struct __sk_buff *skb, __u8 nexthdr) 242{ 243 struct bpf_flow_keys *keys = skb->flow_keys; 244 245 switch (nexthdr) { 246 case IPPROTO_HOPOPTS: 247 case IPPROTO_DSTOPTS: 248 bpf_tail_call_static(skb, &jmp_table, IPV6OP); 249 break; 250 case IPPROTO_FRAGMENT: 251 bpf_tail_call_static(skb, &jmp_table, IPV6FR); 252 break; 253 default: 254 return parse_ip_proto(skb, nexthdr); 255 } 256 257 return export_flow_keys(keys, BPF_DROP); 258} 259 260PROG(IP)(struct __sk_buff *skb) 261{ 262 void *data_end = (void *)(long)skb->data_end; 263 struct bpf_flow_keys *keys = skb->flow_keys; 264 void *data = (void *)(long)skb->data; 265 struct iphdr *iph, _iph; 266 bool done = false; 267 268 iph = bpf_flow_dissect_get_header(skb, sizeof(*iph), &_iph); 269 if (!iph) 270 return export_flow_keys(keys, BPF_DROP); 271 272 /* IP header cannot be smaller than 20 bytes */ 273 if (iph->ihl < 5) 274 return export_flow_keys(keys, BPF_DROP); 275 276 keys->addr_proto = ETH_P_IP; 277 keys->ipv4_src = iph->saddr; 278 keys->ipv4_dst = iph->daddr; 279 keys->ip_proto = iph->protocol; 280 281 keys->thoff += iph->ihl << 2; 282 if (data + keys->thoff > data_end) 283 return export_flow_keys(keys, BPF_DROP); 284 285 if (iph->frag_off & bpf_htons(IP_MF | IP_OFFSET)) { 286 keys->is_frag = true; 287 if (iph->frag_off & bpf_htons(IP_OFFSET)) { 288 /* From second fragment on, packets do not have headers 289 * we can parse. 290 */ 291 done = true; 292 } else { 293 keys->is_first_frag = true; 294 /* No need to parse fragmented packet unless 295 * explicitly asked for. 296 */ 297 if (!(keys->flags & 298 BPF_FLOW_DISSECTOR_F_PARSE_1ST_FRAG)) 299 done = true; 300 } 301 } 302 303 if (done) 304 return export_flow_keys(keys, BPF_OK); 305 306 return parse_ip_proto(skb, iph->protocol); 307} 308 309PROG(IPV6)(struct __sk_buff *skb) 310{ 311 struct bpf_flow_keys *keys = skb->flow_keys; 312 struct ipv6hdr *ip6h, _ip6h; 313 314 ip6h = bpf_flow_dissect_get_header(skb, sizeof(*ip6h), &_ip6h); 315 if (!ip6h) 316 return export_flow_keys(keys, BPF_DROP); 317 318 keys->addr_proto = ETH_P_IPV6; 319 memcpy(&keys->ipv6_src, &ip6h->saddr, 2*sizeof(ip6h->saddr)); 320 321 keys->thoff += sizeof(struct ipv6hdr); 322 keys->ip_proto = ip6h->nexthdr; 323 keys->flow_label = ip6_flowlabel(ip6h); 324 325 if (keys->flags & BPF_FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL) 326 return export_flow_keys(keys, BPF_OK); 327 328 return parse_ipv6_proto(skb, ip6h->nexthdr); 329} 330 331PROG(IPV6OP)(struct __sk_buff *skb) 332{ 333 struct bpf_flow_keys *keys = skb->flow_keys; 334 struct ipv6_opt_hdr *ip6h, _ip6h; 335 336 ip6h = bpf_flow_dissect_get_header(skb, sizeof(*ip6h), &_ip6h); 337 if (!ip6h) 338 return export_flow_keys(keys, BPF_DROP); 339 340 /* hlen is in 8-octets and does not include the first 8 bytes 341 * of the header 342 */ 343 keys->thoff += (1 + ip6h->hdrlen) << 3; 344 keys->ip_proto = ip6h->nexthdr; 345 346 return parse_ipv6_proto(skb, ip6h->nexthdr); 347} 348 349PROG(IPV6FR)(struct __sk_buff *skb) 350{ 351 struct bpf_flow_keys *keys = skb->flow_keys; 352 struct frag_hdr *fragh, _fragh; 353 354 fragh = bpf_flow_dissect_get_header(skb, sizeof(*fragh), &_fragh); 355 if (!fragh) 356 return export_flow_keys(keys, BPF_DROP); 357 358 keys->thoff += sizeof(*fragh); 359 keys->is_frag = true; 360 keys->ip_proto = fragh->nexthdr; 361 362 if (!(fragh->frag_off & bpf_htons(IP6_OFFSET))) { 363 keys->is_first_frag = true; 364 365 /* No need to parse fragmented packet unless 366 * explicitly asked for. 367 */ 368 if (!(keys->flags & BPF_FLOW_DISSECTOR_F_PARSE_1ST_FRAG)) 369 return export_flow_keys(keys, BPF_OK); 370 } else { 371 return export_flow_keys(keys, BPF_OK); 372 } 373 374 return parse_ipv6_proto(skb, fragh->nexthdr); 375} 376 377PROG(MPLS)(struct __sk_buff *skb) 378{ 379 struct bpf_flow_keys *keys = skb->flow_keys; 380 struct mpls_label *mpls, _mpls; 381 382 mpls = bpf_flow_dissect_get_header(skb, sizeof(*mpls), &_mpls); 383 if (!mpls) 384 return export_flow_keys(keys, BPF_DROP); 385 386 return export_flow_keys(keys, BPF_OK); 387} 388 389PROG(VLAN)(struct __sk_buff *skb) 390{ 391 struct bpf_flow_keys *keys = skb->flow_keys; 392 struct vlan_hdr *vlan, _vlan; 393 394 /* Account for double-tagging */ 395 if (keys->n_proto == bpf_htons(ETH_P_8021AD)) { 396 vlan = bpf_flow_dissect_get_header(skb, sizeof(*vlan), &_vlan); 397 if (!vlan) 398 return export_flow_keys(keys, BPF_DROP); 399 400 if (vlan->h_vlan_encapsulated_proto != bpf_htons(ETH_P_8021Q)) 401 return export_flow_keys(keys, BPF_DROP); 402 403 keys->nhoff += sizeof(*vlan); 404 keys->thoff += sizeof(*vlan); 405 } 406 407 vlan = bpf_flow_dissect_get_header(skb, sizeof(*vlan), &_vlan); 408 if (!vlan) 409 return export_flow_keys(keys, BPF_DROP); 410 411 keys->nhoff += sizeof(*vlan); 412 keys->thoff += sizeof(*vlan); 413 /* Only allow 8021AD + 8021Q double tagging and no triple tagging.*/ 414 if (vlan->h_vlan_encapsulated_proto == bpf_htons(ETH_P_8021AD) || 415 vlan->h_vlan_encapsulated_proto == bpf_htons(ETH_P_8021Q)) 416 return export_flow_keys(keys, BPF_DROP); 417 418 keys->n_proto = vlan->h_vlan_encapsulated_proto; 419 return parse_eth_proto(skb, vlan->h_vlan_encapsulated_proto); 420} 421 422char __license[] SEC("license") = "GPL";