cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

nf_nat_masquerade.c (9272B)


      1// SPDX-License-Identifier: GPL-2.0
      2
      3#include <linux/types.h>
      4#include <linux/atomic.h>
      5#include <linux/inetdevice.h>
      6#include <linux/netfilter.h>
      7#include <linux/netfilter_ipv4.h>
      8#include <linux/netfilter_ipv6.h>
      9
     10#include <net/netfilter/nf_nat_masquerade.h>
     11
     12struct masq_dev_work {
     13	struct work_struct work;
     14	struct net *net;
     15	netns_tracker ns_tracker;
     16	union nf_inet_addr addr;
     17	int ifindex;
     18	int (*iter)(struct nf_conn *i, void *data);
     19};
     20
     21#define MAX_MASQ_WORKER_COUNT	16
     22
     23static DEFINE_MUTEX(masq_mutex);
     24static unsigned int masq_refcnt __read_mostly;
     25static atomic_t masq_worker_count __read_mostly;
     26
     27unsigned int
     28nf_nat_masquerade_ipv4(struct sk_buff *skb, unsigned int hooknum,
     29		       const struct nf_nat_range2 *range,
     30		       const struct net_device *out)
     31{
     32	struct nf_conn *ct;
     33	struct nf_conn_nat *nat;
     34	enum ip_conntrack_info ctinfo;
     35	struct nf_nat_range2 newrange;
     36	const struct rtable *rt;
     37	__be32 newsrc, nh;
     38
     39	WARN_ON(hooknum != NF_INET_POST_ROUTING);
     40
     41	ct = nf_ct_get(skb, &ctinfo);
     42
     43	WARN_ON(!(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED ||
     44			 ctinfo == IP_CT_RELATED_REPLY)));
     45
     46	/* Source address is 0.0.0.0 - locally generated packet that is
     47	 * probably not supposed to be masqueraded.
     48	 */
     49	if (ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.ip == 0)
     50		return NF_ACCEPT;
     51
     52	rt = skb_rtable(skb);
     53	nh = rt_nexthop(rt, ip_hdr(skb)->daddr);
     54	newsrc = inet_select_addr(out, nh, RT_SCOPE_UNIVERSE);
     55	if (!newsrc) {
     56		pr_info("%s ate my IP address\n", out->name);
     57		return NF_DROP;
     58	}
     59
     60	nat = nf_ct_nat_ext_add(ct);
     61	if (nat)
     62		nat->masq_index = out->ifindex;
     63
     64	/* Transfer from original range. */
     65	memset(&newrange.min_addr, 0, sizeof(newrange.min_addr));
     66	memset(&newrange.max_addr, 0, sizeof(newrange.max_addr));
     67	newrange.flags       = range->flags | NF_NAT_RANGE_MAP_IPS;
     68	newrange.min_addr.ip = newsrc;
     69	newrange.max_addr.ip = newsrc;
     70	newrange.min_proto   = range->min_proto;
     71	newrange.max_proto   = range->max_proto;
     72
     73	/* Hand modified range to generic setup. */
     74	return nf_nat_setup_info(ct, &newrange, NF_NAT_MANIP_SRC);
     75}
     76EXPORT_SYMBOL_GPL(nf_nat_masquerade_ipv4);
     77
     78static void iterate_cleanup_work(struct work_struct *work)
     79{
     80	struct nf_ct_iter_data iter_data = {};
     81	struct masq_dev_work *w;
     82
     83	w = container_of(work, struct masq_dev_work, work);
     84
     85	iter_data.net = w->net;
     86	iter_data.data = (void *)w;
     87	nf_ct_iterate_cleanup_net(w->iter, &iter_data);
     88
     89	put_net_track(w->net, &w->ns_tracker);
     90	kfree(w);
     91	atomic_dec(&masq_worker_count);
     92	module_put(THIS_MODULE);
     93}
     94
     95/* Iterate conntrack table in the background and remove conntrack entries
     96 * that use the device/address being removed.
     97 *
     98 * In case too many work items have been queued already or memory allocation
     99 * fails iteration is skipped, conntrack entries will time out eventually.
    100 */
    101static void nf_nat_masq_schedule(struct net *net, union nf_inet_addr *addr,
    102				 int ifindex,
    103				 int (*iter)(struct nf_conn *i, void *data),
    104				 gfp_t gfp_flags)
    105{
    106	struct masq_dev_work *w;
    107
    108	if (atomic_read(&masq_worker_count) > MAX_MASQ_WORKER_COUNT)
    109		return;
    110
    111	net = maybe_get_net(net);
    112	if (!net)
    113		return;
    114
    115	if (!try_module_get(THIS_MODULE))
    116		goto err_module;
    117
    118	w = kzalloc(sizeof(*w), gfp_flags);
    119	if (w) {
    120		/* We can overshoot MAX_MASQ_WORKER_COUNT, no big deal */
    121		atomic_inc(&masq_worker_count);
    122
    123		INIT_WORK(&w->work, iterate_cleanup_work);
    124		w->ifindex = ifindex;
    125		w->net = net;
    126		netns_tracker_alloc(net, &w->ns_tracker, gfp_flags);
    127		w->iter = iter;
    128		if (addr)
    129			w->addr = *addr;
    130		schedule_work(&w->work);
    131		return;
    132	}
    133
    134	module_put(THIS_MODULE);
    135 err_module:
    136	put_net(net);
    137}
    138
    139static int device_cmp(struct nf_conn *i, void *arg)
    140{
    141	const struct nf_conn_nat *nat = nfct_nat(i);
    142	const struct masq_dev_work *w = arg;
    143
    144	if (!nat)
    145		return 0;
    146	return nat->masq_index == w->ifindex;
    147}
    148
    149static int masq_device_event(struct notifier_block *this,
    150			     unsigned long event,
    151			     void *ptr)
    152{
    153	const struct net_device *dev = netdev_notifier_info_to_dev(ptr);
    154	struct net *net = dev_net(dev);
    155
    156	if (event == NETDEV_DOWN) {
    157		/* Device was downed.  Search entire table for
    158		 * conntracks which were associated with that device,
    159		 * and forget them.
    160		 */
    161
    162		nf_nat_masq_schedule(net, NULL, dev->ifindex,
    163				     device_cmp, GFP_KERNEL);
    164	}
    165
    166	return NOTIFY_DONE;
    167}
    168
    169static int inet_cmp(struct nf_conn *ct, void *ptr)
    170{
    171	struct nf_conntrack_tuple *tuple;
    172	struct masq_dev_work *w = ptr;
    173
    174	if (!device_cmp(ct, ptr))
    175		return 0;
    176
    177	tuple = &ct->tuplehash[IP_CT_DIR_REPLY].tuple;
    178
    179	return nf_inet_addr_cmp(&w->addr, &tuple->dst.u3);
    180}
    181
    182static int masq_inet_event(struct notifier_block *this,
    183			   unsigned long event,
    184			   void *ptr)
    185{
    186	const struct in_ifaddr *ifa = ptr;
    187	const struct in_device *idev;
    188	const struct net_device *dev;
    189	union nf_inet_addr addr;
    190
    191	if (event != NETDEV_DOWN)
    192		return NOTIFY_DONE;
    193
    194	/* The masq_dev_notifier will catch the case of the device going
    195	 * down.  So if the inetdev is dead and being destroyed we have
    196	 * no work to do.  Otherwise this is an individual address removal
    197	 * and we have to perform the flush.
    198	 */
    199	idev = ifa->ifa_dev;
    200	if (idev->dead)
    201		return NOTIFY_DONE;
    202
    203	memset(&addr, 0, sizeof(addr));
    204
    205	addr.ip = ifa->ifa_address;
    206
    207	dev = idev->dev;
    208	nf_nat_masq_schedule(dev_net(idev->dev), &addr, dev->ifindex,
    209			     inet_cmp, GFP_KERNEL);
    210
    211	return NOTIFY_DONE;
    212}
    213
    214static struct notifier_block masq_dev_notifier = {
    215	.notifier_call	= masq_device_event,
    216};
    217
    218static struct notifier_block masq_inet_notifier = {
    219	.notifier_call	= masq_inet_event,
    220};
    221
    222#if IS_ENABLED(CONFIG_IPV6)
    223static int
    224nat_ipv6_dev_get_saddr(struct net *net, const struct net_device *dev,
    225		       const struct in6_addr *daddr, unsigned int srcprefs,
    226		       struct in6_addr *saddr)
    227{
    228#ifdef CONFIG_IPV6_MODULE
    229	const struct nf_ipv6_ops *v6_ops = nf_get_ipv6_ops();
    230
    231	if (!v6_ops)
    232		return -EHOSTUNREACH;
    233
    234	return v6_ops->dev_get_saddr(net, dev, daddr, srcprefs, saddr);
    235#else
    236	return ipv6_dev_get_saddr(net, dev, daddr, srcprefs, saddr);
    237#endif
    238}
    239
    240unsigned int
    241nf_nat_masquerade_ipv6(struct sk_buff *skb, const struct nf_nat_range2 *range,
    242		       const struct net_device *out)
    243{
    244	enum ip_conntrack_info ctinfo;
    245	struct nf_conn_nat *nat;
    246	struct in6_addr src;
    247	struct nf_conn *ct;
    248	struct nf_nat_range2 newrange;
    249
    250	ct = nf_ct_get(skb, &ctinfo);
    251	WARN_ON(!(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED ||
    252			 ctinfo == IP_CT_RELATED_REPLY)));
    253
    254	if (nat_ipv6_dev_get_saddr(nf_ct_net(ct), out,
    255				   &ipv6_hdr(skb)->daddr, 0, &src) < 0)
    256		return NF_DROP;
    257
    258	nat = nf_ct_nat_ext_add(ct);
    259	if (nat)
    260		nat->masq_index = out->ifindex;
    261
    262	newrange.flags		= range->flags | NF_NAT_RANGE_MAP_IPS;
    263	newrange.min_addr.in6	= src;
    264	newrange.max_addr.in6	= src;
    265	newrange.min_proto	= range->min_proto;
    266	newrange.max_proto	= range->max_proto;
    267
    268	return nf_nat_setup_info(ct, &newrange, NF_NAT_MANIP_SRC);
    269}
    270EXPORT_SYMBOL_GPL(nf_nat_masquerade_ipv6);
    271
    272/* atomic notifier; can't call nf_ct_iterate_cleanup_net (it can sleep).
    273 *
    274 * Defer it to the system workqueue.
    275 *
    276 * As we can have 'a lot' of inet_events (depending on amount of ipv6
    277 * addresses being deleted), we also need to limit work item queue.
    278 */
    279static int masq_inet6_event(struct notifier_block *this,
    280			    unsigned long event, void *ptr)
    281{
    282	struct inet6_ifaddr *ifa = ptr;
    283	const struct net_device *dev;
    284	union nf_inet_addr addr;
    285
    286	if (event != NETDEV_DOWN)
    287		return NOTIFY_DONE;
    288
    289	dev = ifa->idev->dev;
    290
    291	memset(&addr, 0, sizeof(addr));
    292
    293	addr.in6 = ifa->addr;
    294
    295	nf_nat_masq_schedule(dev_net(dev), &addr, dev->ifindex, inet_cmp,
    296			     GFP_ATOMIC);
    297	return NOTIFY_DONE;
    298}
    299
    300static struct notifier_block masq_inet6_notifier = {
    301	.notifier_call	= masq_inet6_event,
    302};
    303
    304static int nf_nat_masquerade_ipv6_register_notifier(void)
    305{
    306	return register_inet6addr_notifier(&masq_inet6_notifier);
    307}
    308#else
    309static inline int nf_nat_masquerade_ipv6_register_notifier(void) { return 0; }
    310#endif
    311
    312int nf_nat_masquerade_inet_register_notifiers(void)
    313{
    314	int ret = 0;
    315
    316	mutex_lock(&masq_mutex);
    317	if (WARN_ON_ONCE(masq_refcnt == UINT_MAX)) {
    318		ret = -EOVERFLOW;
    319		goto out_unlock;
    320	}
    321
    322	/* check if the notifier was already set */
    323	if (++masq_refcnt > 1)
    324		goto out_unlock;
    325
    326	/* Register for device down reports */
    327	ret = register_netdevice_notifier(&masq_dev_notifier);
    328	if (ret)
    329		goto err_dec;
    330	/* Register IP address change reports */
    331	ret = register_inetaddr_notifier(&masq_inet_notifier);
    332	if (ret)
    333		goto err_unregister;
    334
    335	ret = nf_nat_masquerade_ipv6_register_notifier();
    336	if (ret)
    337		goto err_unreg_inet;
    338
    339	mutex_unlock(&masq_mutex);
    340	return ret;
    341err_unreg_inet:
    342	unregister_inetaddr_notifier(&masq_inet_notifier);
    343err_unregister:
    344	unregister_netdevice_notifier(&masq_dev_notifier);
    345err_dec:
    346	masq_refcnt--;
    347out_unlock:
    348	mutex_unlock(&masq_mutex);
    349	return ret;
    350}
    351EXPORT_SYMBOL_GPL(nf_nat_masquerade_inet_register_notifiers);
    352
    353void nf_nat_masquerade_inet_unregister_notifiers(void)
    354{
    355	mutex_lock(&masq_mutex);
    356	/* check if the notifiers still have clients */
    357	if (--masq_refcnt > 0)
    358		goto out_unlock;
    359
    360	unregister_netdevice_notifier(&masq_dev_notifier);
    361	unregister_inetaddr_notifier(&masq_inet_notifier);
    362#if IS_ENABLED(CONFIG_IPV6)
    363	unregister_inet6addr_notifier(&masq_inet6_notifier);
    364#endif
    365out_unlock:
    366	mutex_unlock(&masq_mutex);
    367}
    368EXPORT_SYMBOL_GPL(nf_nat_masquerade_inet_unregister_notifiers);