xt_HMARK.c (8948B)
1// SPDX-License-Identifier: GPL-2.0-only 2/* 3 * xt_HMARK - Netfilter module to set mark by means of hashing 4 * 5 * (C) 2012 by Hans Schillstrom <hans.schillstrom@ericsson.com> 6 * (C) 2012 by Pablo Neira Ayuso <pablo@netfilter.org> 7 */ 8 9#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 10 11#include <linux/module.h> 12#include <linux/skbuff.h> 13#include <linux/icmp.h> 14 15#include <linux/netfilter/x_tables.h> 16#include <linux/netfilter/xt_HMARK.h> 17 18#include <net/ip.h> 19#if IS_ENABLED(CONFIG_NF_CONNTRACK) 20#include <net/netfilter/nf_conntrack.h> 21#endif 22#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES) 23#include <net/ipv6.h> 24#include <linux/netfilter_ipv6/ip6_tables.h> 25#endif 26 27MODULE_LICENSE("GPL"); 28MODULE_AUTHOR("Hans Schillstrom <hans.schillstrom@ericsson.com>"); 29MODULE_DESCRIPTION("Xtables: packet marking using hash calculation"); 30MODULE_ALIAS("ipt_HMARK"); 31MODULE_ALIAS("ip6t_HMARK"); 32 33struct hmark_tuple { 34 __be32 src; 35 __be32 dst; 36 union hmark_ports uports; 37 u8 proto; 38}; 39 40static inline __be32 hmark_addr6_mask(const __be32 *addr32, const __be32 *mask) 41{ 42 return (addr32[0] & mask[0]) ^ 43 (addr32[1] & mask[1]) ^ 44 (addr32[2] & mask[2]) ^ 45 (addr32[3] & mask[3]); 46} 47 48static inline __be32 49hmark_addr_mask(int l3num, const __be32 *addr32, const __be32 *mask) 50{ 51 switch (l3num) { 52 case AF_INET: 53 return *addr32 & *mask; 54 case AF_INET6: 55 return hmark_addr6_mask(addr32, mask); 56 } 57 return 0; 58} 59 60static inline void hmark_swap_ports(union hmark_ports *uports, 61 const struct xt_hmark_info *info) 62{ 63 union hmark_ports hp; 64 u16 src, dst; 65 66 hp.b32 = (uports->b32 & info->port_mask.b32) | info->port_set.b32; 67 src = ntohs(hp.b16.src); 68 dst = ntohs(hp.b16.dst); 69 70 if (dst > src) 71 uports->v32 = (dst << 16) | src; 72 else 73 uports->v32 = (src << 16) | dst; 74} 75 76static int 77hmark_ct_set_htuple(const struct sk_buff *skb, struct hmark_tuple *t, 78 const struct xt_hmark_info *info) 79{ 80#if IS_ENABLED(CONFIG_NF_CONNTRACK) 81 enum ip_conntrack_info ctinfo; 82 struct nf_conn *ct = nf_ct_get(skb, &ctinfo); 83 struct nf_conntrack_tuple *otuple; 84 struct nf_conntrack_tuple *rtuple; 85 86 if (ct == NULL) 87 return -1; 88 89 otuple = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple; 90 rtuple = &ct->tuplehash[IP_CT_DIR_REPLY].tuple; 91 92 t->src = hmark_addr_mask(otuple->src.l3num, otuple->src.u3.ip6, 93 info->src_mask.ip6); 94 t->dst = hmark_addr_mask(otuple->src.l3num, rtuple->src.u3.ip6, 95 info->dst_mask.ip6); 96 97 if (info->flags & XT_HMARK_FLAG(XT_HMARK_METHOD_L3)) 98 return 0; 99 100 t->proto = nf_ct_protonum(ct); 101 if (t->proto != IPPROTO_ICMP) { 102 t->uports.b16.src = otuple->src.u.all; 103 t->uports.b16.dst = rtuple->src.u.all; 104 hmark_swap_ports(&t->uports, info); 105 } 106 107 return 0; 108#else 109 return -1; 110#endif 111} 112 113/* This hash function is endian independent, to ensure consistent hashing if 114 * the cluster is composed of big and little endian systems. */ 115static inline u32 116hmark_hash(struct hmark_tuple *t, const struct xt_hmark_info *info) 117{ 118 u32 hash; 119 u32 src = ntohl(t->src); 120 u32 dst = ntohl(t->dst); 121 122 if (dst < src) 123 swap(src, dst); 124 125 hash = jhash_3words(src, dst, t->uports.v32, info->hashrnd); 126 hash = hash ^ (t->proto & info->proto_mask); 127 128 return reciprocal_scale(hash, info->hmodulus) + info->hoffset; 129} 130 131static void 132hmark_set_tuple_ports(const struct sk_buff *skb, unsigned int nhoff, 133 struct hmark_tuple *t, const struct xt_hmark_info *info) 134{ 135 int protoff; 136 137 protoff = proto_ports_offset(t->proto); 138 if (protoff < 0) 139 return; 140 141 nhoff += protoff; 142 if (skb_copy_bits(skb, nhoff, &t->uports, sizeof(t->uports)) < 0) 143 return; 144 145 hmark_swap_ports(&t->uports, info); 146} 147 148#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES) 149static int get_inner6_hdr(const struct sk_buff *skb, int *offset) 150{ 151 struct icmp6hdr *icmp6h, _ih6; 152 153 icmp6h = skb_header_pointer(skb, *offset, sizeof(_ih6), &_ih6); 154 if (icmp6h == NULL) 155 return 0; 156 157 if (icmp6h->icmp6_type && icmp6h->icmp6_type < 128) { 158 *offset += sizeof(struct icmp6hdr); 159 return 1; 160 } 161 return 0; 162} 163 164static int 165hmark_pkt_set_htuple_ipv6(const struct sk_buff *skb, struct hmark_tuple *t, 166 const struct xt_hmark_info *info) 167{ 168 struct ipv6hdr *ip6, _ip6; 169 int flag = IP6_FH_F_AUTH; 170 unsigned int nhoff = 0; 171 u16 fragoff = 0; 172 int nexthdr; 173 174 ip6 = (struct ipv6hdr *) (skb->data + skb_network_offset(skb)); 175 nexthdr = ipv6_find_hdr(skb, &nhoff, -1, &fragoff, &flag); 176 if (nexthdr < 0) 177 return 0; 178 /* No need to check for icmp errors on fragments */ 179 if ((flag & IP6_FH_F_FRAG) || (nexthdr != IPPROTO_ICMPV6)) 180 goto noicmp; 181 /* Use inner header in case of ICMP errors */ 182 if (get_inner6_hdr(skb, &nhoff)) { 183 ip6 = skb_header_pointer(skb, nhoff, sizeof(_ip6), &_ip6); 184 if (ip6 == NULL) 185 return -1; 186 /* If AH present, use SPI like in ESP. */ 187 flag = IP6_FH_F_AUTH; 188 nexthdr = ipv6_find_hdr(skb, &nhoff, -1, &fragoff, &flag); 189 if (nexthdr < 0) 190 return -1; 191 } 192noicmp: 193 t->src = hmark_addr6_mask(ip6->saddr.s6_addr32, info->src_mask.ip6); 194 t->dst = hmark_addr6_mask(ip6->daddr.s6_addr32, info->dst_mask.ip6); 195 196 if (info->flags & XT_HMARK_FLAG(XT_HMARK_METHOD_L3)) 197 return 0; 198 199 t->proto = nexthdr; 200 if (t->proto == IPPROTO_ICMPV6) 201 return 0; 202 203 if (flag & IP6_FH_F_FRAG) 204 return 0; 205 206 hmark_set_tuple_ports(skb, nhoff, t, info); 207 return 0; 208} 209 210static unsigned int 211hmark_tg_v6(struct sk_buff *skb, const struct xt_action_param *par) 212{ 213 const struct xt_hmark_info *info = par->targinfo; 214 struct hmark_tuple t; 215 216 memset(&t, 0, sizeof(struct hmark_tuple)); 217 218 if (info->flags & XT_HMARK_FLAG(XT_HMARK_CT)) { 219 if (hmark_ct_set_htuple(skb, &t, info) < 0) 220 return XT_CONTINUE; 221 } else { 222 if (hmark_pkt_set_htuple_ipv6(skb, &t, info) < 0) 223 return XT_CONTINUE; 224 } 225 226 skb->mark = hmark_hash(&t, info); 227 return XT_CONTINUE; 228} 229#endif 230 231static int get_inner_hdr(const struct sk_buff *skb, int iphsz, int *nhoff) 232{ 233 const struct icmphdr *icmph; 234 struct icmphdr _ih; 235 236 /* Not enough header? */ 237 icmph = skb_header_pointer(skb, *nhoff + iphsz, sizeof(_ih), &_ih); 238 if (icmph == NULL || icmph->type > NR_ICMP_TYPES) 239 return 0; 240 241 /* Error message? */ 242 if (!icmp_is_err(icmph->type)) 243 return 0; 244 245 *nhoff += iphsz + sizeof(_ih); 246 return 1; 247} 248 249static int 250hmark_pkt_set_htuple_ipv4(const struct sk_buff *skb, struct hmark_tuple *t, 251 const struct xt_hmark_info *info) 252{ 253 struct iphdr *ip, _ip; 254 int nhoff = skb_network_offset(skb); 255 256 ip = (struct iphdr *) (skb->data + nhoff); 257 if (ip->protocol == IPPROTO_ICMP) { 258 /* Use inner header in case of ICMP errors */ 259 if (get_inner_hdr(skb, ip->ihl * 4, &nhoff)) { 260 ip = skb_header_pointer(skb, nhoff, sizeof(_ip), &_ip); 261 if (ip == NULL) 262 return -1; 263 } 264 } 265 266 t->src = ip->saddr & info->src_mask.ip; 267 t->dst = ip->daddr & info->dst_mask.ip; 268 269 if (info->flags & XT_HMARK_FLAG(XT_HMARK_METHOD_L3)) 270 return 0; 271 272 t->proto = ip->protocol; 273 274 /* ICMP has no ports, skip */ 275 if (t->proto == IPPROTO_ICMP) 276 return 0; 277 278 /* follow-up fragments don't contain ports, skip all fragments */ 279 if (ip_is_fragment(ip)) 280 return 0; 281 282 hmark_set_tuple_ports(skb, (ip->ihl * 4) + nhoff, t, info); 283 284 return 0; 285} 286 287static unsigned int 288hmark_tg_v4(struct sk_buff *skb, const struct xt_action_param *par) 289{ 290 const struct xt_hmark_info *info = par->targinfo; 291 struct hmark_tuple t; 292 293 memset(&t, 0, sizeof(struct hmark_tuple)); 294 295 if (info->flags & XT_HMARK_FLAG(XT_HMARK_CT)) { 296 if (hmark_ct_set_htuple(skb, &t, info) < 0) 297 return XT_CONTINUE; 298 } else { 299 if (hmark_pkt_set_htuple_ipv4(skb, &t, info) < 0) 300 return XT_CONTINUE; 301 } 302 303 skb->mark = hmark_hash(&t, info); 304 return XT_CONTINUE; 305} 306 307static int hmark_tg_check(const struct xt_tgchk_param *par) 308{ 309 const struct xt_hmark_info *info = par->targinfo; 310 const char *errmsg = "proto mask must be zero with L3 mode"; 311 312 if (!info->hmodulus) 313 return -EINVAL; 314 315 if (info->proto_mask && 316 (info->flags & XT_HMARK_FLAG(XT_HMARK_METHOD_L3))) 317 goto err; 318 319 if (info->flags & XT_HMARK_FLAG(XT_HMARK_SPI_MASK) && 320 (info->flags & (XT_HMARK_FLAG(XT_HMARK_SPORT_MASK) | 321 XT_HMARK_FLAG(XT_HMARK_DPORT_MASK)))) 322 return -EINVAL; 323 324 if (info->flags & XT_HMARK_FLAG(XT_HMARK_SPI) && 325 (info->flags & (XT_HMARK_FLAG(XT_HMARK_SPORT) | 326 XT_HMARK_FLAG(XT_HMARK_DPORT)))) { 327 errmsg = "spi-set and port-set can't be combined"; 328 goto err; 329 } 330 return 0; 331err: 332 pr_info_ratelimited("%s\n", errmsg); 333 return -EINVAL; 334} 335 336static struct xt_target hmark_tg_reg[] __read_mostly = { 337 { 338 .name = "HMARK", 339 .family = NFPROTO_IPV4, 340 .target = hmark_tg_v4, 341 .targetsize = sizeof(struct xt_hmark_info), 342 .checkentry = hmark_tg_check, 343 .me = THIS_MODULE, 344 }, 345#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES) 346 { 347 .name = "HMARK", 348 .family = NFPROTO_IPV6, 349 .target = hmark_tg_v6, 350 .targetsize = sizeof(struct xt_hmark_info), 351 .checkentry = hmark_tg_check, 352 .me = THIS_MODULE, 353 }, 354#endif 355}; 356 357static int __init hmark_tg_init(void) 358{ 359 return xt_register_targets(hmark_tg_reg, ARRAY_SIZE(hmark_tg_reg)); 360} 361 362static void __exit hmark_tg_exit(void) 363{ 364 xt_unregister_targets(hmark_tg_reg, ARRAY_SIZE(hmark_tg_reg)); 365} 366 367module_init(hmark_tg_init); 368module_exit(hmark_tg_exit);