xdp_redirect_cpu.bpf.c (12262B)
1/* XDP redirect to CPUs via cpumap (BPF_MAP_TYPE_CPUMAP) 2 * 3 * GPLv2, Copyright(c) 2017 Jesper Dangaard Brouer, Red Hat, Inc. 4 */ 5#include "vmlinux.h" 6#include "xdp_sample.bpf.h" 7#include "xdp_sample_shared.h" 8#include "hash_func01.h" 9 10/* Special map type that can XDP_REDIRECT frames to another CPU */ 11struct { 12 __uint(type, BPF_MAP_TYPE_CPUMAP); 13 __uint(key_size, sizeof(u32)); 14 __uint(value_size, sizeof(struct bpf_cpumap_val)); 15} cpu_map SEC(".maps"); 16 17/* Set of maps controlling available CPU, and for iterating through 18 * selectable redirect CPUs. 19 */ 20struct { 21 __uint(type, BPF_MAP_TYPE_ARRAY); 22 __type(key, u32); 23 __type(value, u32); 24} cpus_available SEC(".maps"); 25 26struct { 27 __uint(type, BPF_MAP_TYPE_ARRAY); 28 __type(key, u32); 29 __type(value, u32); 30 __uint(max_entries, 1); 31} cpus_count SEC(".maps"); 32 33struct { 34 __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); 35 __type(key, u32); 36 __type(value, u32); 37 __uint(max_entries, 1); 38} cpus_iterator SEC(".maps"); 39 40struct { 41 __uint(type, BPF_MAP_TYPE_DEVMAP); 42 __uint(key_size, sizeof(int)); 43 __uint(value_size, sizeof(struct bpf_devmap_val)); 44 __uint(max_entries, 1); 45} tx_port SEC(".maps"); 46 47char tx_mac_addr[ETH_ALEN]; 48 49/* Helper parse functions */ 50 51static __always_inline 52bool parse_eth(struct ethhdr *eth, void *data_end, 53 u16 *eth_proto, u64 *l3_offset) 54{ 55 u16 eth_type; 56 u64 offset; 57 58 offset = sizeof(*eth); 59 if ((void *)eth + offset > data_end) 60 return false; 61 62 eth_type = eth->h_proto; 63 64 /* Skip non 802.3 Ethertypes */ 65 if (__builtin_expect(bpf_ntohs(eth_type) < ETH_P_802_3_MIN, 0)) 66 return false; 67 68 /* Handle VLAN tagged packet */ 69 if (eth_type == bpf_htons(ETH_P_8021Q) || 70 eth_type == bpf_htons(ETH_P_8021AD)) { 71 struct vlan_hdr *vlan_hdr; 72 73 vlan_hdr = (void *)eth + offset; 74 offset += sizeof(*vlan_hdr); 75 if ((void *)eth + offset > data_end) 76 return false; 77 eth_type = vlan_hdr->h_vlan_encapsulated_proto; 78 } 79 /* Handle double VLAN tagged packet */ 80 if (eth_type == bpf_htons(ETH_P_8021Q) || 81 eth_type == bpf_htons(ETH_P_8021AD)) { 82 struct vlan_hdr *vlan_hdr; 83 84 vlan_hdr = (void *)eth + offset; 85 offset += sizeof(*vlan_hdr); 86 if ((void *)eth + offset > data_end) 87 return false; 88 eth_type = vlan_hdr->h_vlan_encapsulated_proto; 89 } 90 91 *eth_proto = bpf_ntohs(eth_type); 92 *l3_offset = offset; 93 return true; 94} 95 96static __always_inline 97u16 get_dest_port_ipv4_udp(struct xdp_md *ctx, u64 nh_off) 98{ 99 void *data_end = (void *)(long)ctx->data_end; 100 void *data = (void *)(long)ctx->data; 101 struct iphdr *iph = data + nh_off; 102 struct udphdr *udph; 103 104 if (iph + 1 > data_end) 105 return 0; 106 if (!(iph->protocol == IPPROTO_UDP)) 107 return 0; 108 109 udph = (void *)(iph + 1); 110 if (udph + 1 > data_end) 111 return 0; 112 113 return bpf_ntohs(udph->dest); 114} 115 116static __always_inline 117int get_proto_ipv4(struct xdp_md *ctx, u64 nh_off) 118{ 119 void *data_end = (void *)(long)ctx->data_end; 120 void *data = (void *)(long)ctx->data; 121 struct iphdr *iph = data + nh_off; 122 123 if (iph + 1 > data_end) 124 return 0; 125 return iph->protocol; 126} 127 128static __always_inline 129int get_proto_ipv6(struct xdp_md *ctx, u64 nh_off) 130{ 131 void *data_end = (void *)(long)ctx->data_end; 132 void *data = (void *)(long)ctx->data; 133 struct ipv6hdr *ip6h = data + nh_off; 134 135 if (ip6h + 1 > data_end) 136 return 0; 137 return ip6h->nexthdr; 138} 139 140SEC("xdp") 141int xdp_prognum0_no_touch(struct xdp_md *ctx) 142{ 143 u32 key = bpf_get_smp_processor_id(); 144 struct datarec *rec; 145 u32 *cpu_selected; 146 u32 cpu_dest = 0; 147 u32 key0 = 0; 148 149 /* Only use first entry in cpus_available */ 150 cpu_selected = bpf_map_lookup_elem(&cpus_available, &key0); 151 if (!cpu_selected) 152 return XDP_ABORTED; 153 cpu_dest = *cpu_selected; 154 155 rec = bpf_map_lookup_elem(&rx_cnt, &key); 156 if (!rec) 157 return XDP_PASS; 158 NO_TEAR_INC(rec->processed); 159 160 if (cpu_dest >= nr_cpus) { 161 NO_TEAR_INC(rec->issue); 162 return XDP_ABORTED; 163 } 164 return bpf_redirect_map(&cpu_map, cpu_dest, 0); 165} 166 167SEC("xdp") 168int xdp_prognum1_touch_data(struct xdp_md *ctx) 169{ 170 void *data_end = (void *)(long)ctx->data_end; 171 void *data = (void *)(long)ctx->data; 172 u32 key = bpf_get_smp_processor_id(); 173 struct ethhdr *eth = data; 174 struct datarec *rec; 175 u32 *cpu_selected; 176 u32 cpu_dest = 0; 177 u32 key0 = 0; 178 u16 eth_type; 179 180 /* Only use first entry in cpus_available */ 181 cpu_selected = bpf_map_lookup_elem(&cpus_available, &key0); 182 if (!cpu_selected) 183 return XDP_ABORTED; 184 cpu_dest = *cpu_selected; 185 186 /* Validate packet length is minimum Eth header size */ 187 if (eth + 1 > data_end) 188 return XDP_ABORTED; 189 190 rec = bpf_map_lookup_elem(&rx_cnt, &key); 191 if (!rec) 192 return XDP_PASS; 193 NO_TEAR_INC(rec->processed); 194 195 /* Read packet data, and use it (drop non 802.3 Ethertypes) */ 196 eth_type = eth->h_proto; 197 if (bpf_ntohs(eth_type) < ETH_P_802_3_MIN) { 198 NO_TEAR_INC(rec->dropped); 199 return XDP_DROP; 200 } 201 202 if (cpu_dest >= nr_cpus) { 203 NO_TEAR_INC(rec->issue); 204 return XDP_ABORTED; 205 } 206 return bpf_redirect_map(&cpu_map, cpu_dest, 0); 207} 208 209SEC("xdp") 210int xdp_prognum2_round_robin(struct xdp_md *ctx) 211{ 212 void *data_end = (void *)(long)ctx->data_end; 213 void *data = (void *)(long)ctx->data; 214 u32 key = bpf_get_smp_processor_id(); 215 struct datarec *rec; 216 u32 cpu_dest = 0; 217 u32 key0 = 0; 218 219 u32 *cpu_selected; 220 u32 *cpu_iterator; 221 u32 *cpu_max; 222 u32 cpu_idx; 223 224 cpu_max = bpf_map_lookup_elem(&cpus_count, &key0); 225 if (!cpu_max) 226 return XDP_ABORTED; 227 228 cpu_iterator = bpf_map_lookup_elem(&cpus_iterator, &key0); 229 if (!cpu_iterator) 230 return XDP_ABORTED; 231 cpu_idx = *cpu_iterator; 232 233 *cpu_iterator += 1; 234 if (*cpu_iterator == *cpu_max) 235 *cpu_iterator = 0; 236 237 cpu_selected = bpf_map_lookup_elem(&cpus_available, &cpu_idx); 238 if (!cpu_selected) 239 return XDP_ABORTED; 240 cpu_dest = *cpu_selected; 241 242 rec = bpf_map_lookup_elem(&rx_cnt, &key); 243 if (!rec) 244 return XDP_PASS; 245 NO_TEAR_INC(rec->processed); 246 247 if (cpu_dest >= nr_cpus) { 248 NO_TEAR_INC(rec->issue); 249 return XDP_ABORTED; 250 } 251 return bpf_redirect_map(&cpu_map, cpu_dest, 0); 252} 253 254SEC("xdp") 255int xdp_prognum3_proto_separate(struct xdp_md *ctx) 256{ 257 void *data_end = (void *)(long)ctx->data_end; 258 void *data = (void *)(long)ctx->data; 259 u32 key = bpf_get_smp_processor_id(); 260 struct ethhdr *eth = data; 261 u8 ip_proto = IPPROTO_UDP; 262 struct datarec *rec; 263 u16 eth_proto = 0; 264 u64 l3_offset = 0; 265 u32 cpu_dest = 0; 266 u32 *cpu_lookup; 267 u32 cpu_idx = 0; 268 269 rec = bpf_map_lookup_elem(&rx_cnt, &key); 270 if (!rec) 271 return XDP_PASS; 272 NO_TEAR_INC(rec->processed); 273 274 if (!(parse_eth(eth, data_end, ð_proto, &l3_offset))) 275 return XDP_PASS; /* Just skip */ 276 277 /* Extract L4 protocol */ 278 switch (eth_proto) { 279 case ETH_P_IP: 280 ip_proto = get_proto_ipv4(ctx, l3_offset); 281 break; 282 case ETH_P_IPV6: 283 ip_proto = get_proto_ipv6(ctx, l3_offset); 284 break; 285 case ETH_P_ARP: 286 cpu_idx = 0; /* ARP packet handled on separate CPU */ 287 break; 288 default: 289 cpu_idx = 0; 290 } 291 292 /* Choose CPU based on L4 protocol */ 293 switch (ip_proto) { 294 case IPPROTO_ICMP: 295 case IPPROTO_ICMPV6: 296 cpu_idx = 2; 297 break; 298 case IPPROTO_TCP: 299 cpu_idx = 0; 300 break; 301 case IPPROTO_UDP: 302 cpu_idx = 1; 303 break; 304 default: 305 cpu_idx = 0; 306 } 307 308 cpu_lookup = bpf_map_lookup_elem(&cpus_available, &cpu_idx); 309 if (!cpu_lookup) 310 return XDP_ABORTED; 311 cpu_dest = *cpu_lookup; 312 313 if (cpu_dest >= nr_cpus) { 314 NO_TEAR_INC(rec->issue); 315 return XDP_ABORTED; 316 } 317 return bpf_redirect_map(&cpu_map, cpu_dest, 0); 318} 319 320SEC("xdp") 321int xdp_prognum4_ddos_filter_pktgen(struct xdp_md *ctx) 322{ 323 void *data_end = (void *)(long)ctx->data_end; 324 void *data = (void *)(long)ctx->data; 325 u32 key = bpf_get_smp_processor_id(); 326 struct ethhdr *eth = data; 327 u8 ip_proto = IPPROTO_UDP; 328 struct datarec *rec; 329 u16 eth_proto = 0; 330 u64 l3_offset = 0; 331 u32 cpu_dest = 0; 332 u32 *cpu_lookup; 333 u32 cpu_idx = 0; 334 u16 dest_port; 335 336 rec = bpf_map_lookup_elem(&rx_cnt, &key); 337 if (!rec) 338 return XDP_PASS; 339 NO_TEAR_INC(rec->processed); 340 341 if (!(parse_eth(eth, data_end, ð_proto, &l3_offset))) 342 return XDP_PASS; /* Just skip */ 343 344 /* Extract L4 protocol */ 345 switch (eth_proto) { 346 case ETH_P_IP: 347 ip_proto = get_proto_ipv4(ctx, l3_offset); 348 break; 349 case ETH_P_IPV6: 350 ip_proto = get_proto_ipv6(ctx, l3_offset); 351 break; 352 case ETH_P_ARP: 353 cpu_idx = 0; /* ARP packet handled on separate CPU */ 354 break; 355 default: 356 cpu_idx = 0; 357 } 358 359 /* Choose CPU based on L4 protocol */ 360 switch (ip_proto) { 361 case IPPROTO_ICMP: 362 case IPPROTO_ICMPV6: 363 cpu_idx = 2; 364 break; 365 case IPPROTO_TCP: 366 cpu_idx = 0; 367 break; 368 case IPPROTO_UDP: 369 cpu_idx = 1; 370 /* DDoS filter UDP port 9 (pktgen) */ 371 dest_port = get_dest_port_ipv4_udp(ctx, l3_offset); 372 if (dest_port == 9) { 373 NO_TEAR_INC(rec->dropped); 374 return XDP_DROP; 375 } 376 break; 377 default: 378 cpu_idx = 0; 379 } 380 381 cpu_lookup = bpf_map_lookup_elem(&cpus_available, &cpu_idx); 382 if (!cpu_lookup) 383 return XDP_ABORTED; 384 cpu_dest = *cpu_lookup; 385 386 if (cpu_dest >= nr_cpus) { 387 NO_TEAR_INC(rec->issue); 388 return XDP_ABORTED; 389 } 390 return bpf_redirect_map(&cpu_map, cpu_dest, 0); 391} 392 393/* Hashing initval */ 394#define INITVAL 15485863 395 396static __always_inline 397u32 get_ipv4_hash_ip_pair(struct xdp_md *ctx, u64 nh_off) 398{ 399 void *data_end = (void *)(long)ctx->data_end; 400 void *data = (void *)(long)ctx->data; 401 struct iphdr *iph = data + nh_off; 402 u32 cpu_hash; 403 404 if (iph + 1 > data_end) 405 return 0; 406 407 cpu_hash = iph->saddr + iph->daddr; 408 cpu_hash = SuperFastHash((char *)&cpu_hash, 4, INITVAL + iph->protocol); 409 410 return cpu_hash; 411} 412 413static __always_inline 414u32 get_ipv6_hash_ip_pair(struct xdp_md *ctx, u64 nh_off) 415{ 416 void *data_end = (void *)(long)ctx->data_end; 417 void *data = (void *)(long)ctx->data; 418 struct ipv6hdr *ip6h = data + nh_off; 419 u32 cpu_hash; 420 421 if (ip6h + 1 > data_end) 422 return 0; 423 424 cpu_hash = ip6h->saddr.in6_u.u6_addr32[0] + ip6h->daddr.in6_u.u6_addr32[0]; 425 cpu_hash += ip6h->saddr.in6_u.u6_addr32[1] + ip6h->daddr.in6_u.u6_addr32[1]; 426 cpu_hash += ip6h->saddr.in6_u.u6_addr32[2] + ip6h->daddr.in6_u.u6_addr32[2]; 427 cpu_hash += ip6h->saddr.in6_u.u6_addr32[3] + ip6h->daddr.in6_u.u6_addr32[3]; 428 cpu_hash = SuperFastHash((char *)&cpu_hash, 4, INITVAL + ip6h->nexthdr); 429 430 return cpu_hash; 431} 432 433/* Load-Balance traffic based on hashing IP-addrs + L4-proto. The 434 * hashing scheme is symmetric, meaning swapping IP src/dest still hit 435 * same CPU. 436 */ 437SEC("xdp") 438int xdp_prognum5_lb_hash_ip_pairs(struct xdp_md *ctx) 439{ 440 void *data_end = (void *)(long)ctx->data_end; 441 void *data = (void *)(long)ctx->data; 442 u32 key = bpf_get_smp_processor_id(); 443 struct ethhdr *eth = data; 444 struct datarec *rec; 445 u16 eth_proto = 0; 446 u64 l3_offset = 0; 447 u32 cpu_dest = 0; 448 u32 cpu_idx = 0; 449 u32 *cpu_lookup; 450 u32 key0 = 0; 451 u32 *cpu_max; 452 u32 cpu_hash; 453 454 rec = bpf_map_lookup_elem(&rx_cnt, &key); 455 if (!rec) 456 return XDP_PASS; 457 NO_TEAR_INC(rec->processed); 458 459 cpu_max = bpf_map_lookup_elem(&cpus_count, &key0); 460 if (!cpu_max) 461 return XDP_ABORTED; 462 463 if (!(parse_eth(eth, data_end, ð_proto, &l3_offset))) 464 return XDP_PASS; /* Just skip */ 465 466 /* Hash for IPv4 and IPv6 */ 467 switch (eth_proto) { 468 case ETH_P_IP: 469 cpu_hash = get_ipv4_hash_ip_pair(ctx, l3_offset); 470 break; 471 case ETH_P_IPV6: 472 cpu_hash = get_ipv6_hash_ip_pair(ctx, l3_offset); 473 break; 474 case ETH_P_ARP: /* ARP packet handled on CPU idx 0 */ 475 default: 476 cpu_hash = 0; 477 } 478 479 /* Choose CPU based on hash */ 480 cpu_idx = cpu_hash % *cpu_max; 481 482 cpu_lookup = bpf_map_lookup_elem(&cpus_available, &cpu_idx); 483 if (!cpu_lookup) 484 return XDP_ABORTED; 485 cpu_dest = *cpu_lookup; 486 487 if (cpu_dest >= nr_cpus) { 488 NO_TEAR_INC(rec->issue); 489 return XDP_ABORTED; 490 } 491 return bpf_redirect_map(&cpu_map, cpu_dest, 0); 492} 493 494SEC("xdp/cpumap") 495int xdp_redirect_cpu_devmap(struct xdp_md *ctx) 496{ 497 void *data_end = (void *)(long)ctx->data_end; 498 void *data = (void *)(long)ctx->data; 499 struct ethhdr *eth = data; 500 u64 nh_off; 501 502 nh_off = sizeof(*eth); 503 if (data + nh_off > data_end) 504 return XDP_DROP; 505 506 swap_src_dst_mac(data); 507 return bpf_redirect_map(&tx_port, 0, 0); 508} 509 510SEC("xdp/cpumap") 511int xdp_redirect_cpu_pass(struct xdp_md *ctx) 512{ 513 return XDP_PASS; 514} 515 516SEC("xdp/cpumap") 517int xdp_redirect_cpu_drop(struct xdp_md *ctx) 518{ 519 return XDP_DROP; 520} 521 522SEC("xdp/devmap") 523int xdp_redirect_egress_prog(struct xdp_md *ctx) 524{ 525 void *data_end = (void *)(long)ctx->data_end; 526 void *data = (void *)(long)ctx->data; 527 struct ethhdr *eth = data; 528 u64 nh_off; 529 530 nh_off = sizeof(*eth); 531 if (data + nh_off > data_end) 532 return XDP_DROP; 533 534 __builtin_memcpy(eth->h_source, (const char *)tx_mac_addr, ETH_ALEN); 535 536 return XDP_PASS; 537} 538 539char _license[] SEC("license") = "GPL";