sockex3_kern.c (6342B)
1/* Copyright (c) 2015 PLUMgrid, http://plumgrid.com 2 * 3 * This program is free software; you can redistribute it and/or 4 * modify it under the terms of version 2 of the GNU General Public 5 * License as published by the Free Software Foundation. 6 */ 7#include <uapi/linux/bpf.h> 8#include <uapi/linux/in.h> 9#include <uapi/linux/if.h> 10#include <uapi/linux/if_ether.h> 11#include <uapi/linux/ip.h> 12#include <uapi/linux/ipv6.h> 13#include <uapi/linux/if_tunnel.h> 14#include <uapi/linux/mpls.h> 15#include <bpf/bpf_helpers.h> 16#include "bpf_legacy.h" 17#define IP_MF 0x2000 18#define IP_OFFSET 0x1FFF 19 20#define PROG(F) SEC("socket/"__stringify(F)) int bpf_func_##F 21 22struct { 23 __uint(type, BPF_MAP_TYPE_PROG_ARRAY); 24 __uint(key_size, sizeof(u32)); 25 __uint(value_size, sizeof(u32)); 26 __uint(max_entries, 8); 27} jmp_table SEC(".maps"); 28 29#define PARSE_VLAN 1 30#define PARSE_MPLS 2 31#define PARSE_IP 3 32#define PARSE_IPV6 4 33 34/* Protocol dispatch routine. It tail-calls next BPF program depending 35 * on eth proto. Note, we could have used ... 36 * 37 * bpf_tail_call(skb, &jmp_table, proto); 38 * 39 * ... but it would need large prog_array and cannot be optimised given 40 * the map key is not static. 41 */ 42static inline void parse_eth_proto(struct __sk_buff *skb, u32 proto) 43{ 44 switch (proto) { 45 case ETH_P_8021Q: 46 case ETH_P_8021AD: 47 bpf_tail_call(skb, &jmp_table, PARSE_VLAN); 48 break; 49 case ETH_P_MPLS_UC: 50 case ETH_P_MPLS_MC: 51 bpf_tail_call(skb, &jmp_table, PARSE_MPLS); 52 break; 53 case ETH_P_IP: 54 bpf_tail_call(skb, &jmp_table, PARSE_IP); 55 break; 56 case ETH_P_IPV6: 57 bpf_tail_call(skb, &jmp_table, PARSE_IPV6); 58 break; 59 } 60} 61 62struct vlan_hdr { 63 __be16 h_vlan_TCI; 64 __be16 h_vlan_encapsulated_proto; 65}; 66 67struct flow_key_record { 68 __be32 src; 69 __be32 dst; 70 union { 71 __be32 ports; 72 __be16 port16[2]; 73 }; 74 __u32 ip_proto; 75}; 76 77static inline int ip_is_fragment(struct __sk_buff *ctx, __u64 nhoff) 78{ 79 return load_half(ctx, nhoff + offsetof(struct iphdr, frag_off)) 80 & (IP_MF | IP_OFFSET); 81} 82 83static inline __u32 ipv6_addr_hash(struct __sk_buff *ctx, __u64 off) 84{ 85 __u64 w0 = load_word(ctx, off); 86 __u64 w1 = load_word(ctx, off + 4); 87 __u64 w2 = load_word(ctx, off + 8); 88 __u64 w3 = load_word(ctx, off + 12); 89 90 return (__u32)(w0 ^ w1 ^ w2 ^ w3); 91} 92 93struct globals { 94 struct flow_key_record flow; 95}; 96 97struct { 98 __uint(type, BPF_MAP_TYPE_ARRAY); 99 __type(key, __u32); 100 __type(value, struct globals); 101 __uint(max_entries, 32); 102} percpu_map SEC(".maps"); 103 104/* user poor man's per_cpu until native support is ready */ 105static struct globals *this_cpu_globals(void) 106{ 107 u32 key = bpf_get_smp_processor_id(); 108 109 return bpf_map_lookup_elem(&percpu_map, &key); 110} 111 112/* some simple stats for user space consumption */ 113struct pair { 114 __u64 packets; 115 __u64 bytes; 116}; 117 118struct { 119 __uint(type, BPF_MAP_TYPE_HASH); 120 __type(key, struct flow_key_record); 121 __type(value, struct pair); 122 __uint(max_entries, 1024); 123} hash_map SEC(".maps"); 124 125static void update_stats(struct __sk_buff *skb, struct globals *g) 126{ 127 struct flow_key_record key = g->flow; 128 struct pair *value; 129 130 value = bpf_map_lookup_elem(&hash_map, &key); 131 if (value) { 132 __sync_fetch_and_add(&value->packets, 1); 133 __sync_fetch_and_add(&value->bytes, skb->len); 134 } else { 135 struct pair val = {1, skb->len}; 136 137 bpf_map_update_elem(&hash_map, &key, &val, BPF_ANY); 138 } 139} 140 141static __always_inline void parse_ip_proto(struct __sk_buff *skb, 142 struct globals *g, __u32 ip_proto) 143{ 144 __u32 nhoff = skb->cb[0]; 145 int poff; 146 147 switch (ip_proto) { 148 case IPPROTO_GRE: { 149 struct gre_hdr { 150 __be16 flags; 151 __be16 proto; 152 }; 153 154 __u32 gre_flags = load_half(skb, 155 nhoff + offsetof(struct gre_hdr, flags)); 156 __u32 gre_proto = load_half(skb, 157 nhoff + offsetof(struct gre_hdr, proto)); 158 159 if (gre_flags & (GRE_VERSION|GRE_ROUTING)) 160 break; 161 162 nhoff += 4; 163 if (gre_flags & GRE_CSUM) 164 nhoff += 4; 165 if (gre_flags & GRE_KEY) 166 nhoff += 4; 167 if (gre_flags & GRE_SEQ) 168 nhoff += 4; 169 170 skb->cb[0] = nhoff; 171 parse_eth_proto(skb, gre_proto); 172 break; 173 } 174 case IPPROTO_IPIP: 175 parse_eth_proto(skb, ETH_P_IP); 176 break; 177 case IPPROTO_IPV6: 178 parse_eth_proto(skb, ETH_P_IPV6); 179 break; 180 case IPPROTO_TCP: 181 case IPPROTO_UDP: 182 g->flow.ports = load_word(skb, nhoff); 183 case IPPROTO_ICMP: 184 g->flow.ip_proto = ip_proto; 185 update_stats(skb, g); 186 break; 187 default: 188 break; 189 } 190} 191 192PROG(PARSE_IP)(struct __sk_buff *skb) 193{ 194 struct globals *g = this_cpu_globals(); 195 __u32 nhoff, verlen, ip_proto; 196 197 if (!g) 198 return 0; 199 200 nhoff = skb->cb[0]; 201 202 if (unlikely(ip_is_fragment(skb, nhoff))) 203 return 0; 204 205 ip_proto = load_byte(skb, nhoff + offsetof(struct iphdr, protocol)); 206 207 if (ip_proto != IPPROTO_GRE) { 208 g->flow.src = load_word(skb, nhoff + offsetof(struct iphdr, saddr)); 209 g->flow.dst = load_word(skb, nhoff + offsetof(struct iphdr, daddr)); 210 } 211 212 verlen = load_byte(skb, nhoff + 0/*offsetof(struct iphdr, ihl)*/); 213 nhoff += (verlen & 0xF) << 2; 214 215 skb->cb[0] = nhoff; 216 parse_ip_proto(skb, g, ip_proto); 217 return 0; 218} 219 220PROG(PARSE_IPV6)(struct __sk_buff *skb) 221{ 222 struct globals *g = this_cpu_globals(); 223 __u32 nhoff, ip_proto; 224 225 if (!g) 226 return 0; 227 228 nhoff = skb->cb[0]; 229 230 ip_proto = load_byte(skb, 231 nhoff + offsetof(struct ipv6hdr, nexthdr)); 232 g->flow.src = ipv6_addr_hash(skb, 233 nhoff + offsetof(struct ipv6hdr, saddr)); 234 g->flow.dst = ipv6_addr_hash(skb, 235 nhoff + offsetof(struct ipv6hdr, daddr)); 236 nhoff += sizeof(struct ipv6hdr); 237 238 skb->cb[0] = nhoff; 239 parse_ip_proto(skb, g, ip_proto); 240 return 0; 241} 242 243PROG(PARSE_VLAN)(struct __sk_buff *skb) 244{ 245 __u32 nhoff, proto; 246 247 nhoff = skb->cb[0]; 248 249 proto = load_half(skb, nhoff + offsetof(struct vlan_hdr, 250 h_vlan_encapsulated_proto)); 251 nhoff += sizeof(struct vlan_hdr); 252 skb->cb[0] = nhoff; 253 254 parse_eth_proto(skb, proto); 255 256 return 0; 257} 258 259PROG(PARSE_MPLS)(struct __sk_buff *skb) 260{ 261 __u32 nhoff, label; 262 263 nhoff = skb->cb[0]; 264 265 label = load_word(skb, nhoff); 266 nhoff += sizeof(struct mpls_label); 267 skb->cb[0] = nhoff; 268 269 if (label & MPLS_LS_S_MASK) { 270 __u8 verlen = load_byte(skb, nhoff); 271 if ((verlen & 0xF0) == 4) 272 parse_eth_proto(skb, ETH_P_IP); 273 else 274 parse_eth_proto(skb, ETH_P_IPV6); 275 } else { 276 parse_eth_proto(skb, ETH_P_MPLS_UC); 277 } 278 279 return 0; 280} 281 282SEC("socket/0") 283int main_prog(struct __sk_buff *skb) 284{ 285 __u32 nhoff = ETH_HLEN; 286 __u32 proto = load_half(skb, 12); 287 288 skb->cb[0] = nhoff; 289 parse_eth_proto(skb, proto); 290 return 0; 291} 292 293char _license[] SEC("license") = "GPL";