cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

nf_conntrack_ecache.c (8774B)


      1// SPDX-License-Identifier: GPL-2.0-only
      2/* Event cache for netfilter. */
      3
      4/*
      5 * (C) 2005 Harald Welte <laforge@gnumonks.org>
      6 * (C) 2005 Patrick McHardy <kaber@trash.net>
      7 * (C) 2005-2006 Netfilter Core Team <coreteam@netfilter.org>
      8 * (C) 2005 USAGI/WIDE Project <http://www.linux-ipv6.org>
      9 */
     10
     11#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
     12
     13#include <linux/types.h>
     14#include <linux/netfilter.h>
     15#include <linux/skbuff.h>
     16#include <linux/vmalloc.h>
     17#include <linux/stddef.h>
     18#include <linux/err.h>
     19#include <linux/kernel.h>
     20#include <linux/netdevice.h>
     21#include <linux/slab.h>
     22#include <linux/export.h>
     23
     24#include <net/netfilter/nf_conntrack.h>
     25#include <net/netfilter/nf_conntrack_core.h>
     26#include <net/netfilter/nf_conntrack_ecache.h>
     27#include <net/netfilter/nf_conntrack_extend.h>
     28
     29static DEFINE_MUTEX(nf_ct_ecache_mutex);
     30
     31#define DYING_NULLS_VAL			((1 << 30) + 1)
     32#define ECACHE_MAX_JIFFIES		msecs_to_jiffies(10)
     33#define ECACHE_RETRY_JIFFIES		msecs_to_jiffies(10)
     34
     35enum retry_state {
     36	STATE_CONGESTED,
     37	STATE_RESTART,
     38	STATE_DONE,
     39};
     40
     41struct nf_conntrack_net_ecache *nf_conn_pernet_ecache(const struct net *net)
     42{
     43	struct nf_conntrack_net *cnet = nf_ct_pernet(net);
     44
     45	return &cnet->ecache;
     46}
     47#if IS_MODULE(CONFIG_NF_CT_NETLINK)
     48EXPORT_SYMBOL_GPL(nf_conn_pernet_ecache);
     49#endif
     50
     51static enum retry_state ecache_work_evict_list(struct nf_conntrack_net *cnet)
     52{
     53	unsigned long stop = jiffies + ECACHE_MAX_JIFFIES;
     54	struct hlist_nulls_head evicted_list;
     55	enum retry_state ret = STATE_DONE;
     56	struct nf_conntrack_tuple_hash *h;
     57	struct hlist_nulls_node *n;
     58	unsigned int sent;
     59
     60	INIT_HLIST_NULLS_HEAD(&evicted_list, DYING_NULLS_VAL);
     61
     62next:
     63	sent = 0;
     64	spin_lock_bh(&cnet->ecache.dying_lock);
     65
     66	hlist_nulls_for_each_entry_safe(h, n, &cnet->ecache.dying_list, hnnode) {
     67		struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(h);
     68
     69		/* The worker owns all entries, ct remains valid until nf_ct_put
     70		 * in the loop below.
     71		 */
     72		if (nf_conntrack_event(IPCT_DESTROY, ct)) {
     73			ret = STATE_CONGESTED;
     74			break;
     75		}
     76
     77		hlist_nulls_del_rcu(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode);
     78		hlist_nulls_add_head(&ct->tuplehash[IP_CT_DIR_REPLY].hnnode, &evicted_list);
     79
     80		if (time_after(stop, jiffies)) {
     81			ret = STATE_RESTART;
     82			break;
     83		}
     84
     85		if (sent++ > 16) {
     86			spin_unlock_bh(&cnet->ecache.dying_lock);
     87			cond_resched();
     88			goto next;
     89		}
     90	}
     91
     92	spin_unlock_bh(&cnet->ecache.dying_lock);
     93
     94	hlist_nulls_for_each_entry_safe(h, n, &evicted_list, hnnode) {
     95		struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(h);
     96
     97		hlist_nulls_del_rcu(&ct->tuplehash[IP_CT_DIR_REPLY].hnnode);
     98		nf_ct_put(ct);
     99
    100		cond_resched();
    101	}
    102
    103	return ret;
    104}
    105
    106static void ecache_work(struct work_struct *work)
    107{
    108	struct nf_conntrack_net *cnet = container_of(work, struct nf_conntrack_net, ecache.dwork.work);
    109	int ret, delay = -1;
    110
    111	ret = ecache_work_evict_list(cnet);
    112	switch (ret) {
    113	case STATE_CONGESTED:
    114		delay = ECACHE_RETRY_JIFFIES;
    115		break;
    116	case STATE_RESTART:
    117		delay = 0;
    118		break;
    119	case STATE_DONE:
    120		break;
    121	}
    122
    123	if (delay >= 0)
    124		schedule_delayed_work(&cnet->ecache.dwork, delay);
    125}
    126
    127static int __nf_conntrack_eventmask_report(struct nf_conntrack_ecache *e,
    128					   const u32 events,
    129					   const u32 missed,
    130					   const struct nf_ct_event *item)
    131{
    132	struct net *net = nf_ct_net(item->ct);
    133	struct nf_ct_event_notifier *notify;
    134	u32 old, want;
    135	int ret;
    136
    137	if (!((events | missed) & e->ctmask))
    138		return 0;
    139
    140	rcu_read_lock();
    141
    142	notify = rcu_dereference(net->ct.nf_conntrack_event_cb);
    143	if (!notify) {
    144		rcu_read_unlock();
    145		return 0;
    146	}
    147
    148	ret = notify->ct_event(events | missed, item);
    149	rcu_read_unlock();
    150
    151	if (likely(ret >= 0 && missed == 0))
    152		return 0;
    153
    154	do {
    155		old = READ_ONCE(e->missed);
    156		if (ret < 0)
    157			want = old | events;
    158		else
    159			want = old & ~missed;
    160	} while (cmpxchg(&e->missed, old, want) != old);
    161
    162	return ret;
    163}
    164
    165int nf_conntrack_eventmask_report(unsigned int events, struct nf_conn *ct,
    166				  u32 portid, int report)
    167{
    168	struct nf_conntrack_ecache *e;
    169	struct nf_ct_event item;
    170	unsigned int missed;
    171	int ret;
    172
    173	if (!nf_ct_is_confirmed(ct))
    174		return 0;
    175
    176	e = nf_ct_ecache_find(ct);
    177	if (!e)
    178		return 0;
    179
    180	memset(&item, 0, sizeof(item));
    181
    182	item.ct = ct;
    183	item.portid = e->portid ? e->portid : portid;
    184	item.report = report;
    185
    186	/* This is a resent of a destroy event? If so, skip missed */
    187	missed = e->portid ? 0 : e->missed;
    188
    189	ret = __nf_conntrack_eventmask_report(e, events, missed, &item);
    190	if (unlikely(ret < 0 && (events & (1 << IPCT_DESTROY)))) {
    191		/* This is a destroy event that has been triggered by a process,
    192		 * we store the PORTID to include it in the retransmission.
    193		 */
    194		if (e->portid == 0 && portid != 0)
    195			e->portid = portid;
    196	}
    197
    198	return ret;
    199}
    200EXPORT_SYMBOL_GPL(nf_conntrack_eventmask_report);
    201
    202/* deliver cached events and clear cache entry - must be called with locally
    203 * disabled softirqs */
    204void nf_ct_deliver_cached_events(struct nf_conn *ct)
    205{
    206	struct nf_conntrack_ecache *e;
    207	struct nf_ct_event item;
    208	unsigned int events;
    209
    210	if (!nf_ct_is_confirmed(ct) || nf_ct_is_dying(ct))
    211		return;
    212
    213	e = nf_ct_ecache_find(ct);
    214	if (e == NULL)
    215		return;
    216
    217	events = xchg(&e->cache, 0);
    218
    219	item.ct = ct;
    220	item.portid = 0;
    221	item.report = 0;
    222
    223	/* We make a copy of the missed event cache without taking
    224	 * the lock, thus we may send missed events twice. However,
    225	 * this does not harm and it happens very rarely.
    226	 */
    227	__nf_conntrack_eventmask_report(e, events, e->missed, &item);
    228}
    229EXPORT_SYMBOL_GPL(nf_ct_deliver_cached_events);
    230
    231void nf_ct_expect_event_report(enum ip_conntrack_expect_events event,
    232			       struct nf_conntrack_expect *exp,
    233			       u32 portid, int report)
    234
    235{
    236	struct net *net = nf_ct_exp_net(exp);
    237	struct nf_ct_event_notifier *notify;
    238	struct nf_conntrack_ecache *e;
    239
    240	rcu_read_lock();
    241	notify = rcu_dereference(net->ct.nf_conntrack_event_cb);
    242	if (!notify)
    243		goto out_unlock;
    244
    245	e = nf_ct_ecache_find(exp->master);
    246	if (!e)
    247		goto out_unlock;
    248
    249	if (e->expmask & (1 << event)) {
    250		struct nf_exp_event item = {
    251			.exp	= exp,
    252			.portid	= portid,
    253			.report = report
    254		};
    255		notify->exp_event(1 << event, &item);
    256	}
    257out_unlock:
    258	rcu_read_unlock();
    259}
    260
    261void nf_conntrack_register_notifier(struct net *net,
    262				    const struct nf_ct_event_notifier *new)
    263{
    264	struct nf_ct_event_notifier *notify;
    265
    266	mutex_lock(&nf_ct_ecache_mutex);
    267	notify = rcu_dereference_protected(net->ct.nf_conntrack_event_cb,
    268					   lockdep_is_held(&nf_ct_ecache_mutex));
    269	WARN_ON_ONCE(notify);
    270	rcu_assign_pointer(net->ct.nf_conntrack_event_cb, new);
    271	mutex_unlock(&nf_ct_ecache_mutex);
    272}
    273EXPORT_SYMBOL_GPL(nf_conntrack_register_notifier);
    274
    275void nf_conntrack_unregister_notifier(struct net *net)
    276{
    277	mutex_lock(&nf_ct_ecache_mutex);
    278	RCU_INIT_POINTER(net->ct.nf_conntrack_event_cb, NULL);
    279	mutex_unlock(&nf_ct_ecache_mutex);
    280	/* synchronize_rcu() is called after netns pre_exit */
    281}
    282EXPORT_SYMBOL_GPL(nf_conntrack_unregister_notifier);
    283
    284void nf_conntrack_ecache_work(struct net *net, enum nf_ct_ecache_state state)
    285{
    286	struct nf_conntrack_net *cnet = nf_ct_pernet(net);
    287
    288	if (state == NFCT_ECACHE_DESTROY_FAIL &&
    289	    !delayed_work_pending(&cnet->ecache.dwork)) {
    290		schedule_delayed_work(&cnet->ecache.dwork, HZ);
    291		net->ct.ecache_dwork_pending = true;
    292	} else if (state == NFCT_ECACHE_DESTROY_SENT) {
    293		if (!hlist_nulls_empty(&cnet->ecache.dying_list))
    294			mod_delayed_work(system_wq, &cnet->ecache.dwork, 0);
    295		else
    296			net->ct.ecache_dwork_pending = false;
    297	}
    298}
    299
    300bool nf_ct_ecache_ext_add(struct nf_conn *ct, u16 ctmask, u16 expmask, gfp_t gfp)
    301{
    302	struct net *net = nf_ct_net(ct);
    303	struct nf_conntrack_ecache *e;
    304
    305	switch (net->ct.sysctl_events) {
    306	case 0:
    307		 /* assignment via template / ruleset? ignore sysctl. */
    308		if (ctmask || expmask)
    309			break;
    310		return true;
    311	case 2: /* autodetect: no event listener, don't allocate extension. */
    312		if (!READ_ONCE(net->ct.ctnetlink_has_listener))
    313			return true;
    314		fallthrough;
    315	case 1:
    316		/* always allocate an extension. */
    317		if (!ctmask && !expmask) {
    318			ctmask = ~0;
    319			expmask = ~0;
    320		}
    321		break;
    322	default:
    323		WARN_ON_ONCE(1);
    324		return true;
    325	}
    326
    327	e = nf_ct_ext_add(ct, NF_CT_EXT_ECACHE, gfp);
    328	if (e) {
    329		e->ctmask  = ctmask;
    330		e->expmask = expmask;
    331	}
    332
    333	return e != NULL;
    334}
    335EXPORT_SYMBOL_GPL(nf_ct_ecache_ext_add);
    336
    337#define NF_CT_EVENTS_DEFAULT 2
    338static int nf_ct_events __read_mostly = NF_CT_EVENTS_DEFAULT;
    339
    340void nf_conntrack_ecache_pernet_init(struct net *net)
    341{
    342	struct nf_conntrack_net *cnet = nf_ct_pernet(net);
    343
    344	net->ct.sysctl_events = nf_ct_events;
    345
    346	INIT_DELAYED_WORK(&cnet->ecache.dwork, ecache_work);
    347	INIT_HLIST_NULLS_HEAD(&cnet->ecache.dying_list, DYING_NULLS_VAL);
    348	spin_lock_init(&cnet->ecache.dying_lock);
    349
    350	BUILD_BUG_ON(__IPCT_MAX >= 16);	/* e->ctmask is u16 */
    351}
    352
    353void nf_conntrack_ecache_pernet_fini(struct net *net)
    354{
    355	struct nf_conntrack_net *cnet = nf_ct_pernet(net);
    356
    357	cancel_delayed_work_sync(&cnet->ecache.dwork);
    358}