cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

inetpeer.c (8394B)


      1/*
      2 *		INETPEER - A storage for permanent information about peers
      3 *
      4 *  This source is covered by the GNU GPL, the same as all kernel sources.
      5 *
      6 *  Authors:	Andrey V. Savochkin <saw@msu.ru>
      7 */
      8
      9#include <linux/cache.h>
     10#include <linux/module.h>
     11#include <linux/types.h>
     12#include <linux/slab.h>
     13#include <linux/interrupt.h>
     14#include <linux/spinlock.h>
     15#include <linux/random.h>
     16#include <linux/timer.h>
     17#include <linux/time.h>
     18#include <linux/kernel.h>
     19#include <linux/mm.h>
     20#include <linux/net.h>
     21#include <linux/workqueue.h>
     22#include <net/ip.h>
     23#include <net/inetpeer.h>
     24#include <net/secure_seq.h>
     25
     26/*
     27 *  Theory of operations.
     28 *  We keep one entry for each peer IP address.  The nodes contains long-living
     29 *  information about the peer which doesn't depend on routes.
     30 *
     31 *  Nodes are removed only when reference counter goes to 0.
     32 *  When it's happened the node may be removed when a sufficient amount of
     33 *  time has been passed since its last use.  The less-recently-used entry can
     34 *  also be removed if the pool is overloaded i.e. if the total amount of
     35 *  entries is greater-or-equal than the threshold.
     36 *
     37 *  Node pool is organised as an RB tree.
     38 *  Such an implementation has been chosen not just for fun.  It's a way to
     39 *  prevent easy and efficient DoS attacks by creating hash collisions.  A huge
     40 *  amount of long living nodes in a single hash slot would significantly delay
     41 *  lookups performed with disabled BHs.
     42 *
     43 *  Serialisation issues.
     44 *  1.  Nodes may appear in the tree only with the pool lock held.
     45 *  2.  Nodes may disappear from the tree only with the pool lock held
     46 *      AND reference count being 0.
     47 *  3.  Global variable peer_total is modified under the pool lock.
     48 *  4.  struct inet_peer fields modification:
     49 *		rb_node: pool lock
     50 *		refcnt: atomically against modifications on other CPU;
     51 *		   usually under some other lock to prevent node disappearing
     52 *		daddr: unchangeable
     53 */
     54
     55static struct kmem_cache *peer_cachep __ro_after_init;
     56
     57void inet_peer_base_init(struct inet_peer_base *bp)
     58{
     59	bp->rb_root = RB_ROOT;
     60	seqlock_init(&bp->lock);
     61	bp->total = 0;
     62}
     63EXPORT_SYMBOL_GPL(inet_peer_base_init);
     64
     65#define PEER_MAX_GC 32
     66
     67/* Exported for sysctl_net_ipv4.  */
     68int inet_peer_threshold __read_mostly;	/* start to throw entries more
     69					 * aggressively at this stage */
     70int inet_peer_minttl __read_mostly = 120 * HZ;	/* TTL under high load: 120 sec */
     71int inet_peer_maxttl __read_mostly = 10 * 60 * HZ;	/* usual time to live: 10 min */
     72
     73/* Called from ip_output.c:ip_init  */
     74void __init inet_initpeers(void)
     75{
     76	u64 nr_entries;
     77
     78	 /* 1% of physical memory */
     79	nr_entries = div64_ul((u64)totalram_pages() << PAGE_SHIFT,
     80			      100 * L1_CACHE_ALIGN(sizeof(struct inet_peer)));
     81
     82	inet_peer_threshold = clamp_val(nr_entries, 4096, 65536 + 128);
     83
     84	peer_cachep = kmem_cache_create("inet_peer_cache",
     85			sizeof(struct inet_peer),
     86			0, SLAB_HWCACHE_ALIGN | SLAB_PANIC,
     87			NULL);
     88}
     89
     90/* Called with rcu_read_lock() or base->lock held */
     91static struct inet_peer *lookup(const struct inetpeer_addr *daddr,
     92				struct inet_peer_base *base,
     93				unsigned int seq,
     94				struct inet_peer *gc_stack[],
     95				unsigned int *gc_cnt,
     96				struct rb_node **parent_p,
     97				struct rb_node ***pp_p)
     98{
     99	struct rb_node **pp, *parent, *next;
    100	struct inet_peer *p;
    101
    102	pp = &base->rb_root.rb_node;
    103	parent = NULL;
    104	while (1) {
    105		int cmp;
    106
    107		next = rcu_dereference_raw(*pp);
    108		if (!next)
    109			break;
    110		parent = next;
    111		p = rb_entry(parent, struct inet_peer, rb_node);
    112		cmp = inetpeer_addr_cmp(daddr, &p->daddr);
    113		if (cmp == 0) {
    114			if (!refcount_inc_not_zero(&p->refcnt))
    115				break;
    116			return p;
    117		}
    118		if (gc_stack) {
    119			if (*gc_cnt < PEER_MAX_GC)
    120				gc_stack[(*gc_cnt)++] = p;
    121		} else if (unlikely(read_seqretry(&base->lock, seq))) {
    122			break;
    123		}
    124		if (cmp == -1)
    125			pp = &next->rb_left;
    126		else
    127			pp = &next->rb_right;
    128	}
    129	*parent_p = parent;
    130	*pp_p = pp;
    131	return NULL;
    132}
    133
    134static void inetpeer_free_rcu(struct rcu_head *head)
    135{
    136	kmem_cache_free(peer_cachep, container_of(head, struct inet_peer, rcu));
    137}
    138
    139/* perform garbage collect on all items stacked during a lookup */
    140static void inet_peer_gc(struct inet_peer_base *base,
    141			 struct inet_peer *gc_stack[],
    142			 unsigned int gc_cnt)
    143{
    144	struct inet_peer *p;
    145	__u32 delta, ttl;
    146	int i;
    147
    148	if (base->total >= inet_peer_threshold)
    149		ttl = 0; /* be aggressive */
    150	else
    151		ttl = inet_peer_maxttl
    152				- (inet_peer_maxttl - inet_peer_minttl) / HZ *
    153					base->total / inet_peer_threshold * HZ;
    154	for (i = 0; i < gc_cnt; i++) {
    155		p = gc_stack[i];
    156
    157		/* The READ_ONCE() pairs with the WRITE_ONCE()
    158		 * in inet_putpeer()
    159		 */
    160		delta = (__u32)jiffies - READ_ONCE(p->dtime);
    161
    162		if (delta < ttl || !refcount_dec_if_one(&p->refcnt))
    163			gc_stack[i] = NULL;
    164	}
    165	for (i = 0; i < gc_cnt; i++) {
    166		p = gc_stack[i];
    167		if (p) {
    168			rb_erase(&p->rb_node, &base->rb_root);
    169			base->total--;
    170			call_rcu(&p->rcu, inetpeer_free_rcu);
    171		}
    172	}
    173}
    174
    175struct inet_peer *inet_getpeer(struct inet_peer_base *base,
    176			       const struct inetpeer_addr *daddr,
    177			       int create)
    178{
    179	struct inet_peer *p, *gc_stack[PEER_MAX_GC];
    180	struct rb_node **pp, *parent;
    181	unsigned int gc_cnt, seq;
    182	int invalidated;
    183
    184	/* Attempt a lockless lookup first.
    185	 * Because of a concurrent writer, we might not find an existing entry.
    186	 */
    187	rcu_read_lock();
    188	seq = read_seqbegin(&base->lock);
    189	p = lookup(daddr, base, seq, NULL, &gc_cnt, &parent, &pp);
    190	invalidated = read_seqretry(&base->lock, seq);
    191	rcu_read_unlock();
    192
    193	if (p)
    194		return p;
    195
    196	/* If no writer did a change during our lookup, we can return early. */
    197	if (!create && !invalidated)
    198		return NULL;
    199
    200	/* retry an exact lookup, taking the lock before.
    201	 * At least, nodes should be hot in our cache.
    202	 */
    203	parent = NULL;
    204	write_seqlock_bh(&base->lock);
    205
    206	gc_cnt = 0;
    207	p = lookup(daddr, base, seq, gc_stack, &gc_cnt, &parent, &pp);
    208	if (!p && create) {
    209		p = kmem_cache_alloc(peer_cachep, GFP_ATOMIC);
    210		if (p) {
    211			p->daddr = *daddr;
    212			p->dtime = (__u32)jiffies;
    213			refcount_set(&p->refcnt, 2);
    214			atomic_set(&p->rid, 0);
    215			p->metrics[RTAX_LOCK-1] = INETPEER_METRICS_NEW;
    216			p->rate_tokens = 0;
    217			p->n_redirects = 0;
    218			/* 60*HZ is arbitrary, but chosen enough high so that the first
    219			 * calculation of tokens is at its maximum.
    220			 */
    221			p->rate_last = jiffies - 60*HZ;
    222
    223			rb_link_node(&p->rb_node, parent, pp);
    224			rb_insert_color(&p->rb_node, &base->rb_root);
    225			base->total++;
    226		}
    227	}
    228	if (gc_cnt)
    229		inet_peer_gc(base, gc_stack, gc_cnt);
    230	write_sequnlock_bh(&base->lock);
    231
    232	return p;
    233}
    234EXPORT_SYMBOL_GPL(inet_getpeer);
    235
    236void inet_putpeer(struct inet_peer *p)
    237{
    238	/* The WRITE_ONCE() pairs with itself (we run lockless)
    239	 * and the READ_ONCE() in inet_peer_gc()
    240	 */
    241	WRITE_ONCE(p->dtime, (__u32)jiffies);
    242
    243	if (refcount_dec_and_test(&p->refcnt))
    244		call_rcu(&p->rcu, inetpeer_free_rcu);
    245}
    246EXPORT_SYMBOL_GPL(inet_putpeer);
    247
    248/*
    249 *	Check transmit rate limitation for given message.
    250 *	The rate information is held in the inet_peer entries now.
    251 *	This function is generic and could be used for other purposes
    252 *	too. It uses a Token bucket filter as suggested by Alexey Kuznetsov.
    253 *
    254 *	Note that the same inet_peer fields are modified by functions in
    255 *	route.c too, but these work for packet destinations while xrlim_allow
    256 *	works for icmp destinations. This means the rate limiting information
    257 *	for one "ip object" is shared - and these ICMPs are twice limited:
    258 *	by source and by destination.
    259 *
    260 *	RFC 1812: 4.3.2.8 SHOULD be able to limit error message rate
    261 *			  SHOULD allow setting of rate limits
    262 *
    263 * 	Shared between ICMPv4 and ICMPv6.
    264 */
    265#define XRLIM_BURST_FACTOR 6
    266bool inet_peer_xrlim_allow(struct inet_peer *peer, int timeout)
    267{
    268	unsigned long now, token;
    269	bool rc = false;
    270
    271	if (!peer)
    272		return true;
    273
    274	token = peer->rate_tokens;
    275	now = jiffies;
    276	token += now - peer->rate_last;
    277	peer->rate_last = now;
    278	if (token > XRLIM_BURST_FACTOR * timeout)
    279		token = XRLIM_BURST_FACTOR * timeout;
    280	if (token >= timeout) {
    281		token -= timeout;
    282		rc = true;
    283	}
    284	peer->rate_tokens = token;
    285	return rc;
    286}
    287EXPORT_SYMBOL(inet_peer_xrlim_allow);
    288
    289void inetpeer_invalidate_tree(struct inet_peer_base *base)
    290{
    291	struct rb_node *p = rb_first(&base->rb_root);
    292
    293	while (p) {
    294		struct inet_peer *peer = rb_entry(p, struct inet_peer, rb_node);
    295
    296		p = rb_next(p);
    297		rb_erase(&peer->rb_node, &base->rb_root);
    298		inet_putpeer(peer);
    299		cond_resched();
    300	}
    301
    302	base->total = 0;
    303}
    304EXPORT_SYMBOL(inetpeer_invalidate_tree);