nf_nat_masquerade.c (9272B)
1// SPDX-License-Identifier: GPL-2.0 2 3#include <linux/types.h> 4#include <linux/atomic.h> 5#include <linux/inetdevice.h> 6#include <linux/netfilter.h> 7#include <linux/netfilter_ipv4.h> 8#include <linux/netfilter_ipv6.h> 9 10#include <net/netfilter/nf_nat_masquerade.h> 11 12struct masq_dev_work { 13 struct work_struct work; 14 struct net *net; 15 netns_tracker ns_tracker; 16 union nf_inet_addr addr; 17 int ifindex; 18 int (*iter)(struct nf_conn *i, void *data); 19}; 20 21#define MAX_MASQ_WORKER_COUNT 16 22 23static DEFINE_MUTEX(masq_mutex); 24static unsigned int masq_refcnt __read_mostly; 25static atomic_t masq_worker_count __read_mostly; 26 27unsigned int 28nf_nat_masquerade_ipv4(struct sk_buff *skb, unsigned int hooknum, 29 const struct nf_nat_range2 *range, 30 const struct net_device *out) 31{ 32 struct nf_conn *ct; 33 struct nf_conn_nat *nat; 34 enum ip_conntrack_info ctinfo; 35 struct nf_nat_range2 newrange; 36 const struct rtable *rt; 37 __be32 newsrc, nh; 38 39 WARN_ON(hooknum != NF_INET_POST_ROUTING); 40 41 ct = nf_ct_get(skb, &ctinfo); 42 43 WARN_ON(!(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED || 44 ctinfo == IP_CT_RELATED_REPLY))); 45 46 /* Source address is 0.0.0.0 - locally generated packet that is 47 * probably not supposed to be masqueraded. 48 */ 49 if (ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.ip == 0) 50 return NF_ACCEPT; 51 52 rt = skb_rtable(skb); 53 nh = rt_nexthop(rt, ip_hdr(skb)->daddr); 54 newsrc = inet_select_addr(out, nh, RT_SCOPE_UNIVERSE); 55 if (!newsrc) { 56 pr_info("%s ate my IP address\n", out->name); 57 return NF_DROP; 58 } 59 60 nat = nf_ct_nat_ext_add(ct); 61 if (nat) 62 nat->masq_index = out->ifindex; 63 64 /* Transfer from original range. */ 65 memset(&newrange.min_addr, 0, sizeof(newrange.min_addr)); 66 memset(&newrange.max_addr, 0, sizeof(newrange.max_addr)); 67 newrange.flags = range->flags | NF_NAT_RANGE_MAP_IPS; 68 newrange.min_addr.ip = newsrc; 69 newrange.max_addr.ip = newsrc; 70 newrange.min_proto = range->min_proto; 71 newrange.max_proto = range->max_proto; 72 73 /* Hand modified range to generic setup. */ 74 return nf_nat_setup_info(ct, &newrange, NF_NAT_MANIP_SRC); 75} 76EXPORT_SYMBOL_GPL(nf_nat_masquerade_ipv4); 77 78static void iterate_cleanup_work(struct work_struct *work) 79{ 80 struct nf_ct_iter_data iter_data = {}; 81 struct masq_dev_work *w; 82 83 w = container_of(work, struct masq_dev_work, work); 84 85 iter_data.net = w->net; 86 iter_data.data = (void *)w; 87 nf_ct_iterate_cleanup_net(w->iter, &iter_data); 88 89 put_net_track(w->net, &w->ns_tracker); 90 kfree(w); 91 atomic_dec(&masq_worker_count); 92 module_put(THIS_MODULE); 93} 94 95/* Iterate conntrack table in the background and remove conntrack entries 96 * that use the device/address being removed. 97 * 98 * In case too many work items have been queued already or memory allocation 99 * fails iteration is skipped, conntrack entries will time out eventually. 100 */ 101static void nf_nat_masq_schedule(struct net *net, union nf_inet_addr *addr, 102 int ifindex, 103 int (*iter)(struct nf_conn *i, void *data), 104 gfp_t gfp_flags) 105{ 106 struct masq_dev_work *w; 107 108 if (atomic_read(&masq_worker_count) > MAX_MASQ_WORKER_COUNT) 109 return; 110 111 net = maybe_get_net(net); 112 if (!net) 113 return; 114 115 if (!try_module_get(THIS_MODULE)) 116 goto err_module; 117 118 w = kzalloc(sizeof(*w), gfp_flags); 119 if (w) { 120 /* We can overshoot MAX_MASQ_WORKER_COUNT, no big deal */ 121 atomic_inc(&masq_worker_count); 122 123 INIT_WORK(&w->work, iterate_cleanup_work); 124 w->ifindex = ifindex; 125 w->net = net; 126 netns_tracker_alloc(net, &w->ns_tracker, gfp_flags); 127 w->iter = iter; 128 if (addr) 129 w->addr = *addr; 130 schedule_work(&w->work); 131 return; 132 } 133 134 module_put(THIS_MODULE); 135 err_module: 136 put_net(net); 137} 138 139static int device_cmp(struct nf_conn *i, void *arg) 140{ 141 const struct nf_conn_nat *nat = nfct_nat(i); 142 const struct masq_dev_work *w = arg; 143 144 if (!nat) 145 return 0; 146 return nat->masq_index == w->ifindex; 147} 148 149static int masq_device_event(struct notifier_block *this, 150 unsigned long event, 151 void *ptr) 152{ 153 const struct net_device *dev = netdev_notifier_info_to_dev(ptr); 154 struct net *net = dev_net(dev); 155 156 if (event == NETDEV_DOWN) { 157 /* Device was downed. Search entire table for 158 * conntracks which were associated with that device, 159 * and forget them. 160 */ 161 162 nf_nat_masq_schedule(net, NULL, dev->ifindex, 163 device_cmp, GFP_KERNEL); 164 } 165 166 return NOTIFY_DONE; 167} 168 169static int inet_cmp(struct nf_conn *ct, void *ptr) 170{ 171 struct nf_conntrack_tuple *tuple; 172 struct masq_dev_work *w = ptr; 173 174 if (!device_cmp(ct, ptr)) 175 return 0; 176 177 tuple = &ct->tuplehash[IP_CT_DIR_REPLY].tuple; 178 179 return nf_inet_addr_cmp(&w->addr, &tuple->dst.u3); 180} 181 182static int masq_inet_event(struct notifier_block *this, 183 unsigned long event, 184 void *ptr) 185{ 186 const struct in_ifaddr *ifa = ptr; 187 const struct in_device *idev; 188 const struct net_device *dev; 189 union nf_inet_addr addr; 190 191 if (event != NETDEV_DOWN) 192 return NOTIFY_DONE; 193 194 /* The masq_dev_notifier will catch the case of the device going 195 * down. So if the inetdev is dead and being destroyed we have 196 * no work to do. Otherwise this is an individual address removal 197 * and we have to perform the flush. 198 */ 199 idev = ifa->ifa_dev; 200 if (idev->dead) 201 return NOTIFY_DONE; 202 203 memset(&addr, 0, sizeof(addr)); 204 205 addr.ip = ifa->ifa_address; 206 207 dev = idev->dev; 208 nf_nat_masq_schedule(dev_net(idev->dev), &addr, dev->ifindex, 209 inet_cmp, GFP_KERNEL); 210 211 return NOTIFY_DONE; 212} 213 214static struct notifier_block masq_dev_notifier = { 215 .notifier_call = masq_device_event, 216}; 217 218static struct notifier_block masq_inet_notifier = { 219 .notifier_call = masq_inet_event, 220}; 221 222#if IS_ENABLED(CONFIG_IPV6) 223static int 224nat_ipv6_dev_get_saddr(struct net *net, const struct net_device *dev, 225 const struct in6_addr *daddr, unsigned int srcprefs, 226 struct in6_addr *saddr) 227{ 228#ifdef CONFIG_IPV6_MODULE 229 const struct nf_ipv6_ops *v6_ops = nf_get_ipv6_ops(); 230 231 if (!v6_ops) 232 return -EHOSTUNREACH; 233 234 return v6_ops->dev_get_saddr(net, dev, daddr, srcprefs, saddr); 235#else 236 return ipv6_dev_get_saddr(net, dev, daddr, srcprefs, saddr); 237#endif 238} 239 240unsigned int 241nf_nat_masquerade_ipv6(struct sk_buff *skb, const struct nf_nat_range2 *range, 242 const struct net_device *out) 243{ 244 enum ip_conntrack_info ctinfo; 245 struct nf_conn_nat *nat; 246 struct in6_addr src; 247 struct nf_conn *ct; 248 struct nf_nat_range2 newrange; 249 250 ct = nf_ct_get(skb, &ctinfo); 251 WARN_ON(!(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED || 252 ctinfo == IP_CT_RELATED_REPLY))); 253 254 if (nat_ipv6_dev_get_saddr(nf_ct_net(ct), out, 255 &ipv6_hdr(skb)->daddr, 0, &src) < 0) 256 return NF_DROP; 257 258 nat = nf_ct_nat_ext_add(ct); 259 if (nat) 260 nat->masq_index = out->ifindex; 261 262 newrange.flags = range->flags | NF_NAT_RANGE_MAP_IPS; 263 newrange.min_addr.in6 = src; 264 newrange.max_addr.in6 = src; 265 newrange.min_proto = range->min_proto; 266 newrange.max_proto = range->max_proto; 267 268 return nf_nat_setup_info(ct, &newrange, NF_NAT_MANIP_SRC); 269} 270EXPORT_SYMBOL_GPL(nf_nat_masquerade_ipv6); 271 272/* atomic notifier; can't call nf_ct_iterate_cleanup_net (it can sleep). 273 * 274 * Defer it to the system workqueue. 275 * 276 * As we can have 'a lot' of inet_events (depending on amount of ipv6 277 * addresses being deleted), we also need to limit work item queue. 278 */ 279static int masq_inet6_event(struct notifier_block *this, 280 unsigned long event, void *ptr) 281{ 282 struct inet6_ifaddr *ifa = ptr; 283 const struct net_device *dev; 284 union nf_inet_addr addr; 285 286 if (event != NETDEV_DOWN) 287 return NOTIFY_DONE; 288 289 dev = ifa->idev->dev; 290 291 memset(&addr, 0, sizeof(addr)); 292 293 addr.in6 = ifa->addr; 294 295 nf_nat_masq_schedule(dev_net(dev), &addr, dev->ifindex, inet_cmp, 296 GFP_ATOMIC); 297 return NOTIFY_DONE; 298} 299 300static struct notifier_block masq_inet6_notifier = { 301 .notifier_call = masq_inet6_event, 302}; 303 304static int nf_nat_masquerade_ipv6_register_notifier(void) 305{ 306 return register_inet6addr_notifier(&masq_inet6_notifier); 307} 308#else 309static inline int nf_nat_masquerade_ipv6_register_notifier(void) { return 0; } 310#endif 311 312int nf_nat_masquerade_inet_register_notifiers(void) 313{ 314 int ret = 0; 315 316 mutex_lock(&masq_mutex); 317 if (WARN_ON_ONCE(masq_refcnt == UINT_MAX)) { 318 ret = -EOVERFLOW; 319 goto out_unlock; 320 } 321 322 /* check if the notifier was already set */ 323 if (++masq_refcnt > 1) 324 goto out_unlock; 325 326 /* Register for device down reports */ 327 ret = register_netdevice_notifier(&masq_dev_notifier); 328 if (ret) 329 goto err_dec; 330 /* Register IP address change reports */ 331 ret = register_inetaddr_notifier(&masq_inet_notifier); 332 if (ret) 333 goto err_unregister; 334 335 ret = nf_nat_masquerade_ipv6_register_notifier(); 336 if (ret) 337 goto err_unreg_inet; 338 339 mutex_unlock(&masq_mutex); 340 return ret; 341err_unreg_inet: 342 unregister_inetaddr_notifier(&masq_inet_notifier); 343err_unregister: 344 unregister_netdevice_notifier(&masq_dev_notifier); 345err_dec: 346 masq_refcnt--; 347out_unlock: 348 mutex_unlock(&masq_mutex); 349 return ret; 350} 351EXPORT_SYMBOL_GPL(nf_nat_masquerade_inet_register_notifiers); 352 353void nf_nat_masquerade_inet_unregister_notifiers(void) 354{ 355 mutex_lock(&masq_mutex); 356 /* check if the notifiers still have clients */ 357 if (--masq_refcnt > 0) 358 goto out_unlock; 359 360 unregister_netdevice_notifier(&masq_dev_notifier); 361 unregister_inetaddr_notifier(&masq_inet_notifier); 362#if IS_ENABLED(CONFIG_IPV6) 363 unregister_inet6addr_notifier(&masq_inet6_notifier); 364#endif 365out_unlock: 366 mutex_unlock(&masq_mutex); 367} 368EXPORT_SYMBOL_GPL(nf_nat_masquerade_inet_unregister_notifiers);