cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

xfrm_state.c (69993B)


      1// SPDX-License-Identifier: GPL-2.0-only
      2/*
      3 * xfrm_state.c
      4 *
      5 * Changes:
      6 *	Mitsuru KANDA @USAGI
      7 * 	Kazunori MIYAZAWA @USAGI
      8 * 	Kunihiro Ishiguro <kunihiro@ipinfusion.com>
      9 * 		IPv6 support
     10 * 	YOSHIFUJI Hideaki @USAGI
     11 * 		Split up af-specific functions
     12 *	Derek Atkins <derek@ihtfp.com>
     13 *		Add UDP Encapsulation
     14 *
     15 */
     16
     17#include <linux/compat.h>
     18#include <linux/workqueue.h>
     19#include <net/xfrm.h>
     20#include <linux/pfkeyv2.h>
     21#include <linux/ipsec.h>
     22#include <linux/module.h>
     23#include <linux/cache.h>
     24#include <linux/audit.h>
     25#include <linux/uaccess.h>
     26#include <linux/ktime.h>
     27#include <linux/slab.h>
     28#include <linux/interrupt.h>
     29#include <linux/kernel.h>
     30
     31#include <crypto/aead.h>
     32
     33#include "xfrm_hash.h"
     34
     35#define xfrm_state_deref_prot(table, net) \
     36	rcu_dereference_protected((table), lockdep_is_held(&(net)->xfrm.xfrm_state_lock))
     37
     38static void xfrm_state_gc_task(struct work_struct *work);
     39
     40/* Each xfrm_state may be linked to two tables:
     41
     42   1. Hash table by (spi,daddr,ah/esp) to find SA by SPI. (input,ctl)
     43   2. Hash table by (daddr,family,reqid) to find what SAs exist for given
     44      destination/tunnel endpoint. (output)
     45 */
     46
     47static unsigned int xfrm_state_hashmax __read_mostly = 1 * 1024 * 1024;
     48static struct kmem_cache *xfrm_state_cache __ro_after_init;
     49
     50static DECLARE_WORK(xfrm_state_gc_work, xfrm_state_gc_task);
     51static HLIST_HEAD(xfrm_state_gc_list);
     52
     53static inline bool xfrm_state_hold_rcu(struct xfrm_state __rcu *x)
     54{
     55	return refcount_inc_not_zero(&x->refcnt);
     56}
     57
     58static inline unsigned int xfrm_dst_hash(struct net *net,
     59					 const xfrm_address_t *daddr,
     60					 const xfrm_address_t *saddr,
     61					 u32 reqid,
     62					 unsigned short family)
     63{
     64	return __xfrm_dst_hash(daddr, saddr, reqid, family, net->xfrm.state_hmask);
     65}
     66
     67static inline unsigned int xfrm_src_hash(struct net *net,
     68					 const xfrm_address_t *daddr,
     69					 const xfrm_address_t *saddr,
     70					 unsigned short family)
     71{
     72	return __xfrm_src_hash(daddr, saddr, family, net->xfrm.state_hmask);
     73}
     74
     75static inline unsigned int
     76xfrm_spi_hash(struct net *net, const xfrm_address_t *daddr,
     77	      __be32 spi, u8 proto, unsigned short family)
     78{
     79	return __xfrm_spi_hash(daddr, spi, proto, family, net->xfrm.state_hmask);
     80}
     81
     82static unsigned int xfrm_seq_hash(struct net *net, u32 seq)
     83{
     84	return __xfrm_seq_hash(seq, net->xfrm.state_hmask);
     85}
     86
     87static void xfrm_hash_transfer(struct hlist_head *list,
     88			       struct hlist_head *ndsttable,
     89			       struct hlist_head *nsrctable,
     90			       struct hlist_head *nspitable,
     91			       struct hlist_head *nseqtable,
     92			       unsigned int nhashmask)
     93{
     94	struct hlist_node *tmp;
     95	struct xfrm_state *x;
     96
     97	hlist_for_each_entry_safe(x, tmp, list, bydst) {
     98		unsigned int h;
     99
    100		h = __xfrm_dst_hash(&x->id.daddr, &x->props.saddr,
    101				    x->props.reqid, x->props.family,
    102				    nhashmask);
    103		hlist_add_head_rcu(&x->bydst, ndsttable + h);
    104
    105		h = __xfrm_src_hash(&x->id.daddr, &x->props.saddr,
    106				    x->props.family,
    107				    nhashmask);
    108		hlist_add_head_rcu(&x->bysrc, nsrctable + h);
    109
    110		if (x->id.spi) {
    111			h = __xfrm_spi_hash(&x->id.daddr, x->id.spi,
    112					    x->id.proto, x->props.family,
    113					    nhashmask);
    114			hlist_add_head_rcu(&x->byspi, nspitable + h);
    115		}
    116
    117		if (x->km.seq) {
    118			h = __xfrm_seq_hash(x->km.seq, nhashmask);
    119			hlist_add_head_rcu(&x->byseq, nseqtable + h);
    120		}
    121	}
    122}
    123
    124static unsigned long xfrm_hash_new_size(unsigned int state_hmask)
    125{
    126	return ((state_hmask + 1) << 1) * sizeof(struct hlist_head);
    127}
    128
    129static void xfrm_hash_resize(struct work_struct *work)
    130{
    131	struct net *net = container_of(work, struct net, xfrm.state_hash_work);
    132	struct hlist_head *ndst, *nsrc, *nspi, *nseq, *odst, *osrc, *ospi, *oseq;
    133	unsigned long nsize, osize;
    134	unsigned int nhashmask, ohashmask;
    135	int i;
    136
    137	nsize = xfrm_hash_new_size(net->xfrm.state_hmask);
    138	ndst = xfrm_hash_alloc(nsize);
    139	if (!ndst)
    140		return;
    141	nsrc = xfrm_hash_alloc(nsize);
    142	if (!nsrc) {
    143		xfrm_hash_free(ndst, nsize);
    144		return;
    145	}
    146	nspi = xfrm_hash_alloc(nsize);
    147	if (!nspi) {
    148		xfrm_hash_free(ndst, nsize);
    149		xfrm_hash_free(nsrc, nsize);
    150		return;
    151	}
    152	nseq = xfrm_hash_alloc(nsize);
    153	if (!nseq) {
    154		xfrm_hash_free(ndst, nsize);
    155		xfrm_hash_free(nsrc, nsize);
    156		xfrm_hash_free(nspi, nsize);
    157		return;
    158	}
    159
    160	spin_lock_bh(&net->xfrm.xfrm_state_lock);
    161	write_seqcount_begin(&net->xfrm.xfrm_state_hash_generation);
    162
    163	nhashmask = (nsize / sizeof(struct hlist_head)) - 1U;
    164	odst = xfrm_state_deref_prot(net->xfrm.state_bydst, net);
    165	for (i = net->xfrm.state_hmask; i >= 0; i--)
    166		xfrm_hash_transfer(odst + i, ndst, nsrc, nspi, nseq, nhashmask);
    167
    168	osrc = xfrm_state_deref_prot(net->xfrm.state_bysrc, net);
    169	ospi = xfrm_state_deref_prot(net->xfrm.state_byspi, net);
    170	oseq = xfrm_state_deref_prot(net->xfrm.state_byseq, net);
    171	ohashmask = net->xfrm.state_hmask;
    172
    173	rcu_assign_pointer(net->xfrm.state_bydst, ndst);
    174	rcu_assign_pointer(net->xfrm.state_bysrc, nsrc);
    175	rcu_assign_pointer(net->xfrm.state_byspi, nspi);
    176	rcu_assign_pointer(net->xfrm.state_byseq, nseq);
    177	net->xfrm.state_hmask = nhashmask;
    178
    179	write_seqcount_end(&net->xfrm.xfrm_state_hash_generation);
    180	spin_unlock_bh(&net->xfrm.xfrm_state_lock);
    181
    182	osize = (ohashmask + 1) * sizeof(struct hlist_head);
    183
    184	synchronize_rcu();
    185
    186	xfrm_hash_free(odst, osize);
    187	xfrm_hash_free(osrc, osize);
    188	xfrm_hash_free(ospi, osize);
    189	xfrm_hash_free(oseq, osize);
    190}
    191
    192static DEFINE_SPINLOCK(xfrm_state_afinfo_lock);
    193static struct xfrm_state_afinfo __rcu *xfrm_state_afinfo[NPROTO];
    194
    195static DEFINE_SPINLOCK(xfrm_state_gc_lock);
    196
    197int __xfrm_state_delete(struct xfrm_state *x);
    198
    199int km_query(struct xfrm_state *x, struct xfrm_tmpl *t, struct xfrm_policy *pol);
    200static bool km_is_alive(const struct km_event *c);
    201void km_state_expired(struct xfrm_state *x, int hard, u32 portid);
    202
    203int xfrm_register_type(const struct xfrm_type *type, unsigned short family)
    204{
    205	struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);
    206	int err = 0;
    207
    208	if (!afinfo)
    209		return -EAFNOSUPPORT;
    210
    211#define X(afi, T, name) do {			\
    212		WARN_ON((afi)->type_ ## name);	\
    213		(afi)->type_ ## name = (T);	\
    214	} while (0)
    215
    216	switch (type->proto) {
    217	case IPPROTO_COMP:
    218		X(afinfo, type, comp);
    219		break;
    220	case IPPROTO_AH:
    221		X(afinfo, type, ah);
    222		break;
    223	case IPPROTO_ESP:
    224		X(afinfo, type, esp);
    225		break;
    226	case IPPROTO_IPIP:
    227		X(afinfo, type, ipip);
    228		break;
    229	case IPPROTO_DSTOPTS:
    230		X(afinfo, type, dstopts);
    231		break;
    232	case IPPROTO_ROUTING:
    233		X(afinfo, type, routing);
    234		break;
    235	case IPPROTO_IPV6:
    236		X(afinfo, type, ipip6);
    237		break;
    238	default:
    239		WARN_ON(1);
    240		err = -EPROTONOSUPPORT;
    241		break;
    242	}
    243#undef X
    244	rcu_read_unlock();
    245	return err;
    246}
    247EXPORT_SYMBOL(xfrm_register_type);
    248
    249void xfrm_unregister_type(const struct xfrm_type *type, unsigned short family)
    250{
    251	struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);
    252
    253	if (unlikely(afinfo == NULL))
    254		return;
    255
    256#define X(afi, T, name) do {				\
    257		WARN_ON((afi)->type_ ## name != (T));	\
    258		(afi)->type_ ## name = NULL;		\
    259	} while (0)
    260
    261	switch (type->proto) {
    262	case IPPROTO_COMP:
    263		X(afinfo, type, comp);
    264		break;
    265	case IPPROTO_AH:
    266		X(afinfo, type, ah);
    267		break;
    268	case IPPROTO_ESP:
    269		X(afinfo, type, esp);
    270		break;
    271	case IPPROTO_IPIP:
    272		X(afinfo, type, ipip);
    273		break;
    274	case IPPROTO_DSTOPTS:
    275		X(afinfo, type, dstopts);
    276		break;
    277	case IPPROTO_ROUTING:
    278		X(afinfo, type, routing);
    279		break;
    280	case IPPROTO_IPV6:
    281		X(afinfo, type, ipip6);
    282		break;
    283	default:
    284		WARN_ON(1);
    285		break;
    286	}
    287#undef X
    288	rcu_read_unlock();
    289}
    290EXPORT_SYMBOL(xfrm_unregister_type);
    291
    292static const struct xfrm_type *xfrm_get_type(u8 proto, unsigned short family)
    293{
    294	const struct xfrm_type *type = NULL;
    295	struct xfrm_state_afinfo *afinfo;
    296	int modload_attempted = 0;
    297
    298retry:
    299	afinfo = xfrm_state_get_afinfo(family);
    300	if (unlikely(afinfo == NULL))
    301		return NULL;
    302
    303	switch (proto) {
    304	case IPPROTO_COMP:
    305		type = afinfo->type_comp;
    306		break;
    307	case IPPROTO_AH:
    308		type = afinfo->type_ah;
    309		break;
    310	case IPPROTO_ESP:
    311		type = afinfo->type_esp;
    312		break;
    313	case IPPROTO_IPIP:
    314		type = afinfo->type_ipip;
    315		break;
    316	case IPPROTO_DSTOPTS:
    317		type = afinfo->type_dstopts;
    318		break;
    319	case IPPROTO_ROUTING:
    320		type = afinfo->type_routing;
    321		break;
    322	case IPPROTO_IPV6:
    323		type = afinfo->type_ipip6;
    324		break;
    325	default:
    326		break;
    327	}
    328
    329	if (unlikely(type && !try_module_get(type->owner)))
    330		type = NULL;
    331
    332	rcu_read_unlock();
    333
    334	if (!type && !modload_attempted) {
    335		request_module("xfrm-type-%d-%d", family, proto);
    336		modload_attempted = 1;
    337		goto retry;
    338	}
    339
    340	return type;
    341}
    342
    343static void xfrm_put_type(const struct xfrm_type *type)
    344{
    345	module_put(type->owner);
    346}
    347
    348int xfrm_register_type_offload(const struct xfrm_type_offload *type,
    349			       unsigned short family)
    350{
    351	struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);
    352	int err = 0;
    353
    354	if (unlikely(afinfo == NULL))
    355		return -EAFNOSUPPORT;
    356
    357	switch (type->proto) {
    358	case IPPROTO_ESP:
    359		WARN_ON(afinfo->type_offload_esp);
    360		afinfo->type_offload_esp = type;
    361		break;
    362	default:
    363		WARN_ON(1);
    364		err = -EPROTONOSUPPORT;
    365		break;
    366	}
    367
    368	rcu_read_unlock();
    369	return err;
    370}
    371EXPORT_SYMBOL(xfrm_register_type_offload);
    372
    373void xfrm_unregister_type_offload(const struct xfrm_type_offload *type,
    374				  unsigned short family)
    375{
    376	struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);
    377
    378	if (unlikely(afinfo == NULL))
    379		return;
    380
    381	switch (type->proto) {
    382	case IPPROTO_ESP:
    383		WARN_ON(afinfo->type_offload_esp != type);
    384		afinfo->type_offload_esp = NULL;
    385		break;
    386	default:
    387		WARN_ON(1);
    388		break;
    389	}
    390	rcu_read_unlock();
    391}
    392EXPORT_SYMBOL(xfrm_unregister_type_offload);
    393
    394static const struct xfrm_type_offload *
    395xfrm_get_type_offload(u8 proto, unsigned short family, bool try_load)
    396{
    397	const struct xfrm_type_offload *type = NULL;
    398	struct xfrm_state_afinfo *afinfo;
    399
    400retry:
    401	afinfo = xfrm_state_get_afinfo(family);
    402	if (unlikely(afinfo == NULL))
    403		return NULL;
    404
    405	switch (proto) {
    406	case IPPROTO_ESP:
    407		type = afinfo->type_offload_esp;
    408		break;
    409	default:
    410		break;
    411	}
    412
    413	if ((type && !try_module_get(type->owner)))
    414		type = NULL;
    415
    416	rcu_read_unlock();
    417
    418	if (!type && try_load) {
    419		request_module("xfrm-offload-%d-%d", family, proto);
    420		try_load = false;
    421		goto retry;
    422	}
    423
    424	return type;
    425}
    426
    427static void xfrm_put_type_offload(const struct xfrm_type_offload *type)
    428{
    429	module_put(type->owner);
    430}
    431
    432static const struct xfrm_mode xfrm4_mode_map[XFRM_MODE_MAX] = {
    433	[XFRM_MODE_BEET] = {
    434		.encap = XFRM_MODE_BEET,
    435		.flags = XFRM_MODE_FLAG_TUNNEL,
    436		.family = AF_INET,
    437	},
    438	[XFRM_MODE_TRANSPORT] = {
    439		.encap = XFRM_MODE_TRANSPORT,
    440		.family = AF_INET,
    441	},
    442	[XFRM_MODE_TUNNEL] = {
    443		.encap = XFRM_MODE_TUNNEL,
    444		.flags = XFRM_MODE_FLAG_TUNNEL,
    445		.family = AF_INET,
    446	},
    447};
    448
    449static const struct xfrm_mode xfrm6_mode_map[XFRM_MODE_MAX] = {
    450	[XFRM_MODE_BEET] = {
    451		.encap = XFRM_MODE_BEET,
    452		.flags = XFRM_MODE_FLAG_TUNNEL,
    453		.family = AF_INET6,
    454	},
    455	[XFRM_MODE_ROUTEOPTIMIZATION] = {
    456		.encap = XFRM_MODE_ROUTEOPTIMIZATION,
    457		.family = AF_INET6,
    458	},
    459	[XFRM_MODE_TRANSPORT] = {
    460		.encap = XFRM_MODE_TRANSPORT,
    461		.family = AF_INET6,
    462	},
    463	[XFRM_MODE_TUNNEL] = {
    464		.encap = XFRM_MODE_TUNNEL,
    465		.flags = XFRM_MODE_FLAG_TUNNEL,
    466		.family = AF_INET6,
    467	},
    468};
    469
    470static const struct xfrm_mode *xfrm_get_mode(unsigned int encap, int family)
    471{
    472	const struct xfrm_mode *mode;
    473
    474	if (unlikely(encap >= XFRM_MODE_MAX))
    475		return NULL;
    476
    477	switch (family) {
    478	case AF_INET:
    479		mode = &xfrm4_mode_map[encap];
    480		if (mode->family == family)
    481			return mode;
    482		break;
    483	case AF_INET6:
    484		mode = &xfrm6_mode_map[encap];
    485		if (mode->family == family)
    486			return mode;
    487		break;
    488	default:
    489		break;
    490	}
    491
    492	return NULL;
    493}
    494
    495void xfrm_state_free(struct xfrm_state *x)
    496{
    497	kmem_cache_free(xfrm_state_cache, x);
    498}
    499EXPORT_SYMBOL(xfrm_state_free);
    500
    501static void ___xfrm_state_destroy(struct xfrm_state *x)
    502{
    503	hrtimer_cancel(&x->mtimer);
    504	del_timer_sync(&x->rtimer);
    505	kfree(x->aead);
    506	kfree(x->aalg);
    507	kfree(x->ealg);
    508	kfree(x->calg);
    509	kfree(x->encap);
    510	kfree(x->coaddr);
    511	kfree(x->replay_esn);
    512	kfree(x->preplay_esn);
    513	if (x->type_offload)
    514		xfrm_put_type_offload(x->type_offload);
    515	if (x->type) {
    516		x->type->destructor(x);
    517		xfrm_put_type(x->type);
    518	}
    519	if (x->xfrag.page)
    520		put_page(x->xfrag.page);
    521	xfrm_dev_state_free(x);
    522	security_xfrm_state_free(x);
    523	xfrm_state_free(x);
    524}
    525
    526static void xfrm_state_gc_task(struct work_struct *work)
    527{
    528	struct xfrm_state *x;
    529	struct hlist_node *tmp;
    530	struct hlist_head gc_list;
    531
    532	spin_lock_bh(&xfrm_state_gc_lock);
    533	hlist_move_list(&xfrm_state_gc_list, &gc_list);
    534	spin_unlock_bh(&xfrm_state_gc_lock);
    535
    536	synchronize_rcu();
    537
    538	hlist_for_each_entry_safe(x, tmp, &gc_list, gclist)
    539		___xfrm_state_destroy(x);
    540}
    541
    542static enum hrtimer_restart xfrm_timer_handler(struct hrtimer *me)
    543{
    544	struct xfrm_state *x = container_of(me, struct xfrm_state, mtimer);
    545	enum hrtimer_restart ret = HRTIMER_NORESTART;
    546	time64_t now = ktime_get_real_seconds();
    547	time64_t next = TIME64_MAX;
    548	int warn = 0;
    549	int err = 0;
    550
    551	spin_lock(&x->lock);
    552	if (x->km.state == XFRM_STATE_DEAD)
    553		goto out;
    554	if (x->km.state == XFRM_STATE_EXPIRED)
    555		goto expired;
    556	if (x->lft.hard_add_expires_seconds) {
    557		long tmo = x->lft.hard_add_expires_seconds +
    558			x->curlft.add_time - now;
    559		if (tmo <= 0) {
    560			if (x->xflags & XFRM_SOFT_EXPIRE) {
    561				/* enter hard expire without soft expire first?!
    562				 * setting a new date could trigger this.
    563				 * workaround: fix x->curflt.add_time by below:
    564				 */
    565				x->curlft.add_time = now - x->saved_tmo - 1;
    566				tmo = x->lft.hard_add_expires_seconds - x->saved_tmo;
    567			} else
    568				goto expired;
    569		}
    570		if (tmo < next)
    571			next = tmo;
    572	}
    573	if (x->lft.hard_use_expires_seconds) {
    574		long tmo = x->lft.hard_use_expires_seconds +
    575			(x->curlft.use_time ? : now) - now;
    576		if (tmo <= 0)
    577			goto expired;
    578		if (tmo < next)
    579			next = tmo;
    580	}
    581	if (x->km.dying)
    582		goto resched;
    583	if (x->lft.soft_add_expires_seconds) {
    584		long tmo = x->lft.soft_add_expires_seconds +
    585			x->curlft.add_time - now;
    586		if (tmo <= 0) {
    587			warn = 1;
    588			x->xflags &= ~XFRM_SOFT_EXPIRE;
    589		} else if (tmo < next) {
    590			next = tmo;
    591			x->xflags |= XFRM_SOFT_EXPIRE;
    592			x->saved_tmo = tmo;
    593		}
    594	}
    595	if (x->lft.soft_use_expires_seconds) {
    596		long tmo = x->lft.soft_use_expires_seconds +
    597			(x->curlft.use_time ? : now) - now;
    598		if (tmo <= 0)
    599			warn = 1;
    600		else if (tmo < next)
    601			next = tmo;
    602	}
    603
    604	x->km.dying = warn;
    605	if (warn)
    606		km_state_expired(x, 0, 0);
    607resched:
    608	if (next != TIME64_MAX) {
    609		hrtimer_forward_now(&x->mtimer, ktime_set(next, 0));
    610		ret = HRTIMER_RESTART;
    611	}
    612
    613	goto out;
    614
    615expired:
    616	if (x->km.state == XFRM_STATE_ACQ && x->id.spi == 0)
    617		x->km.state = XFRM_STATE_EXPIRED;
    618
    619	err = __xfrm_state_delete(x);
    620	if (!err)
    621		km_state_expired(x, 1, 0);
    622
    623	xfrm_audit_state_delete(x, err ? 0 : 1, true);
    624
    625out:
    626	spin_unlock(&x->lock);
    627	return ret;
    628}
    629
    630static void xfrm_replay_timer_handler(struct timer_list *t);
    631
    632struct xfrm_state *xfrm_state_alloc(struct net *net)
    633{
    634	struct xfrm_state *x;
    635
    636	x = kmem_cache_zalloc(xfrm_state_cache, GFP_ATOMIC);
    637
    638	if (x) {
    639		write_pnet(&x->xs_net, net);
    640		refcount_set(&x->refcnt, 1);
    641		atomic_set(&x->tunnel_users, 0);
    642		INIT_LIST_HEAD(&x->km.all);
    643		INIT_HLIST_NODE(&x->bydst);
    644		INIT_HLIST_NODE(&x->bysrc);
    645		INIT_HLIST_NODE(&x->byspi);
    646		INIT_HLIST_NODE(&x->byseq);
    647		hrtimer_init(&x->mtimer, CLOCK_BOOTTIME, HRTIMER_MODE_ABS_SOFT);
    648		x->mtimer.function = xfrm_timer_handler;
    649		timer_setup(&x->rtimer, xfrm_replay_timer_handler, 0);
    650		x->curlft.add_time = ktime_get_real_seconds();
    651		x->lft.soft_byte_limit = XFRM_INF;
    652		x->lft.soft_packet_limit = XFRM_INF;
    653		x->lft.hard_byte_limit = XFRM_INF;
    654		x->lft.hard_packet_limit = XFRM_INF;
    655		x->replay_maxage = 0;
    656		x->replay_maxdiff = 0;
    657		spin_lock_init(&x->lock);
    658	}
    659	return x;
    660}
    661EXPORT_SYMBOL(xfrm_state_alloc);
    662
    663void __xfrm_state_destroy(struct xfrm_state *x, bool sync)
    664{
    665	WARN_ON(x->km.state != XFRM_STATE_DEAD);
    666
    667	if (sync) {
    668		synchronize_rcu();
    669		___xfrm_state_destroy(x);
    670	} else {
    671		spin_lock_bh(&xfrm_state_gc_lock);
    672		hlist_add_head(&x->gclist, &xfrm_state_gc_list);
    673		spin_unlock_bh(&xfrm_state_gc_lock);
    674		schedule_work(&xfrm_state_gc_work);
    675	}
    676}
    677EXPORT_SYMBOL(__xfrm_state_destroy);
    678
    679int __xfrm_state_delete(struct xfrm_state *x)
    680{
    681	struct net *net = xs_net(x);
    682	int err = -ESRCH;
    683
    684	if (x->km.state != XFRM_STATE_DEAD) {
    685		x->km.state = XFRM_STATE_DEAD;
    686		spin_lock(&net->xfrm.xfrm_state_lock);
    687		list_del(&x->km.all);
    688		hlist_del_rcu(&x->bydst);
    689		hlist_del_rcu(&x->bysrc);
    690		if (x->km.seq)
    691			hlist_del_rcu(&x->byseq);
    692		if (x->id.spi)
    693			hlist_del_rcu(&x->byspi);
    694		net->xfrm.state_num--;
    695		spin_unlock(&net->xfrm.xfrm_state_lock);
    696
    697		if (x->encap_sk)
    698			sock_put(rcu_dereference_raw(x->encap_sk));
    699
    700		xfrm_dev_state_delete(x);
    701
    702		/* All xfrm_state objects are created by xfrm_state_alloc.
    703		 * The xfrm_state_alloc call gives a reference, and that
    704		 * is what we are dropping here.
    705		 */
    706		xfrm_state_put(x);
    707		err = 0;
    708	}
    709
    710	return err;
    711}
    712EXPORT_SYMBOL(__xfrm_state_delete);
    713
    714int xfrm_state_delete(struct xfrm_state *x)
    715{
    716	int err;
    717
    718	spin_lock_bh(&x->lock);
    719	err = __xfrm_state_delete(x);
    720	spin_unlock_bh(&x->lock);
    721
    722	return err;
    723}
    724EXPORT_SYMBOL(xfrm_state_delete);
    725
    726#ifdef CONFIG_SECURITY_NETWORK_XFRM
    727static inline int
    728xfrm_state_flush_secctx_check(struct net *net, u8 proto, bool task_valid)
    729{
    730	int i, err = 0;
    731
    732	for (i = 0; i <= net->xfrm.state_hmask; i++) {
    733		struct xfrm_state *x;
    734
    735		hlist_for_each_entry(x, net->xfrm.state_bydst+i, bydst) {
    736			if (xfrm_id_proto_match(x->id.proto, proto) &&
    737			   (err = security_xfrm_state_delete(x)) != 0) {
    738				xfrm_audit_state_delete(x, 0, task_valid);
    739				return err;
    740			}
    741		}
    742	}
    743
    744	return err;
    745}
    746
    747static inline int
    748xfrm_dev_state_flush_secctx_check(struct net *net, struct net_device *dev, bool task_valid)
    749{
    750	int i, err = 0;
    751
    752	for (i = 0; i <= net->xfrm.state_hmask; i++) {
    753		struct xfrm_state *x;
    754		struct xfrm_dev_offload *xso;
    755
    756		hlist_for_each_entry(x, net->xfrm.state_bydst+i, bydst) {
    757			xso = &x->xso;
    758
    759			if (xso->dev == dev &&
    760			   (err = security_xfrm_state_delete(x)) != 0) {
    761				xfrm_audit_state_delete(x, 0, task_valid);
    762				return err;
    763			}
    764		}
    765	}
    766
    767	return err;
    768}
    769#else
    770static inline int
    771xfrm_state_flush_secctx_check(struct net *net, u8 proto, bool task_valid)
    772{
    773	return 0;
    774}
    775
    776static inline int
    777xfrm_dev_state_flush_secctx_check(struct net *net, struct net_device *dev, bool task_valid)
    778{
    779	return 0;
    780}
    781#endif
    782
    783int xfrm_state_flush(struct net *net, u8 proto, bool task_valid, bool sync)
    784{
    785	int i, err = 0, cnt = 0;
    786
    787	spin_lock_bh(&net->xfrm.xfrm_state_lock);
    788	err = xfrm_state_flush_secctx_check(net, proto, task_valid);
    789	if (err)
    790		goto out;
    791
    792	err = -ESRCH;
    793	for (i = 0; i <= net->xfrm.state_hmask; i++) {
    794		struct xfrm_state *x;
    795restart:
    796		hlist_for_each_entry(x, net->xfrm.state_bydst+i, bydst) {
    797			if (!xfrm_state_kern(x) &&
    798			    xfrm_id_proto_match(x->id.proto, proto)) {
    799				xfrm_state_hold(x);
    800				spin_unlock_bh(&net->xfrm.xfrm_state_lock);
    801
    802				err = xfrm_state_delete(x);
    803				xfrm_audit_state_delete(x, err ? 0 : 1,
    804							task_valid);
    805				if (sync)
    806					xfrm_state_put_sync(x);
    807				else
    808					xfrm_state_put(x);
    809				if (!err)
    810					cnt++;
    811
    812				spin_lock_bh(&net->xfrm.xfrm_state_lock);
    813				goto restart;
    814			}
    815		}
    816	}
    817out:
    818	spin_unlock_bh(&net->xfrm.xfrm_state_lock);
    819	if (cnt)
    820		err = 0;
    821
    822	return err;
    823}
    824EXPORT_SYMBOL(xfrm_state_flush);
    825
    826int xfrm_dev_state_flush(struct net *net, struct net_device *dev, bool task_valid)
    827{
    828	int i, err = 0, cnt = 0;
    829
    830	spin_lock_bh(&net->xfrm.xfrm_state_lock);
    831	err = xfrm_dev_state_flush_secctx_check(net, dev, task_valid);
    832	if (err)
    833		goto out;
    834
    835	err = -ESRCH;
    836	for (i = 0; i <= net->xfrm.state_hmask; i++) {
    837		struct xfrm_state *x;
    838		struct xfrm_dev_offload *xso;
    839restart:
    840		hlist_for_each_entry(x, net->xfrm.state_bydst+i, bydst) {
    841			xso = &x->xso;
    842
    843			if (!xfrm_state_kern(x) && xso->dev == dev) {
    844				xfrm_state_hold(x);
    845				spin_unlock_bh(&net->xfrm.xfrm_state_lock);
    846
    847				err = xfrm_state_delete(x);
    848				xfrm_audit_state_delete(x, err ? 0 : 1,
    849							task_valid);
    850				xfrm_state_put(x);
    851				if (!err)
    852					cnt++;
    853
    854				spin_lock_bh(&net->xfrm.xfrm_state_lock);
    855				goto restart;
    856			}
    857		}
    858	}
    859	if (cnt)
    860		err = 0;
    861
    862out:
    863	spin_unlock_bh(&net->xfrm.xfrm_state_lock);
    864	return err;
    865}
    866EXPORT_SYMBOL(xfrm_dev_state_flush);
    867
    868void xfrm_sad_getinfo(struct net *net, struct xfrmk_sadinfo *si)
    869{
    870	spin_lock_bh(&net->xfrm.xfrm_state_lock);
    871	si->sadcnt = net->xfrm.state_num;
    872	si->sadhcnt = net->xfrm.state_hmask + 1;
    873	si->sadhmcnt = xfrm_state_hashmax;
    874	spin_unlock_bh(&net->xfrm.xfrm_state_lock);
    875}
    876EXPORT_SYMBOL(xfrm_sad_getinfo);
    877
    878static void
    879__xfrm4_init_tempsel(struct xfrm_selector *sel, const struct flowi *fl)
    880{
    881	const struct flowi4 *fl4 = &fl->u.ip4;
    882
    883	sel->daddr.a4 = fl4->daddr;
    884	sel->saddr.a4 = fl4->saddr;
    885	sel->dport = xfrm_flowi_dport(fl, &fl4->uli);
    886	sel->dport_mask = htons(0xffff);
    887	sel->sport = xfrm_flowi_sport(fl, &fl4->uli);
    888	sel->sport_mask = htons(0xffff);
    889	sel->family = AF_INET;
    890	sel->prefixlen_d = 32;
    891	sel->prefixlen_s = 32;
    892	sel->proto = fl4->flowi4_proto;
    893	sel->ifindex = fl4->flowi4_oif;
    894}
    895
    896static void
    897__xfrm6_init_tempsel(struct xfrm_selector *sel, const struct flowi *fl)
    898{
    899	const struct flowi6 *fl6 = &fl->u.ip6;
    900
    901	/* Initialize temporary selector matching only to current session. */
    902	*(struct in6_addr *)&sel->daddr = fl6->daddr;
    903	*(struct in6_addr *)&sel->saddr = fl6->saddr;
    904	sel->dport = xfrm_flowi_dport(fl, &fl6->uli);
    905	sel->dport_mask = htons(0xffff);
    906	sel->sport = xfrm_flowi_sport(fl, &fl6->uli);
    907	sel->sport_mask = htons(0xffff);
    908	sel->family = AF_INET6;
    909	sel->prefixlen_d = 128;
    910	sel->prefixlen_s = 128;
    911	sel->proto = fl6->flowi6_proto;
    912	sel->ifindex = fl6->flowi6_oif;
    913}
    914
    915static void
    916xfrm_init_tempstate(struct xfrm_state *x, const struct flowi *fl,
    917		    const struct xfrm_tmpl *tmpl,
    918		    const xfrm_address_t *daddr, const xfrm_address_t *saddr,
    919		    unsigned short family)
    920{
    921	switch (family) {
    922	case AF_INET:
    923		__xfrm4_init_tempsel(&x->sel, fl);
    924		break;
    925	case AF_INET6:
    926		__xfrm6_init_tempsel(&x->sel, fl);
    927		break;
    928	}
    929
    930	x->id = tmpl->id;
    931
    932	switch (tmpl->encap_family) {
    933	case AF_INET:
    934		if (x->id.daddr.a4 == 0)
    935			x->id.daddr.a4 = daddr->a4;
    936		x->props.saddr = tmpl->saddr;
    937		if (x->props.saddr.a4 == 0)
    938			x->props.saddr.a4 = saddr->a4;
    939		break;
    940	case AF_INET6:
    941		if (ipv6_addr_any((struct in6_addr *)&x->id.daddr))
    942			memcpy(&x->id.daddr, daddr, sizeof(x->sel.daddr));
    943		memcpy(&x->props.saddr, &tmpl->saddr, sizeof(x->props.saddr));
    944		if (ipv6_addr_any((struct in6_addr *)&x->props.saddr))
    945			memcpy(&x->props.saddr, saddr, sizeof(x->props.saddr));
    946		break;
    947	}
    948
    949	x->props.mode = tmpl->mode;
    950	x->props.reqid = tmpl->reqid;
    951	x->props.family = tmpl->encap_family;
    952}
    953
    954static struct xfrm_state *__xfrm_state_lookup(struct net *net, u32 mark,
    955					      const xfrm_address_t *daddr,
    956					      __be32 spi, u8 proto,
    957					      unsigned short family)
    958{
    959	unsigned int h = xfrm_spi_hash(net, daddr, spi, proto, family);
    960	struct xfrm_state *x;
    961
    962	hlist_for_each_entry_rcu(x, net->xfrm.state_byspi + h, byspi) {
    963		if (x->props.family != family ||
    964		    x->id.spi       != spi ||
    965		    x->id.proto     != proto ||
    966		    !xfrm_addr_equal(&x->id.daddr, daddr, family))
    967			continue;
    968
    969		if ((mark & x->mark.m) != x->mark.v)
    970			continue;
    971		if (!xfrm_state_hold_rcu(x))
    972			continue;
    973		return x;
    974	}
    975
    976	return NULL;
    977}
    978
    979static struct xfrm_state *__xfrm_state_lookup_byaddr(struct net *net, u32 mark,
    980						     const xfrm_address_t *daddr,
    981						     const xfrm_address_t *saddr,
    982						     u8 proto, unsigned short family)
    983{
    984	unsigned int h = xfrm_src_hash(net, daddr, saddr, family);
    985	struct xfrm_state *x;
    986
    987	hlist_for_each_entry_rcu(x, net->xfrm.state_bysrc + h, bysrc) {
    988		if (x->props.family != family ||
    989		    x->id.proto     != proto ||
    990		    !xfrm_addr_equal(&x->id.daddr, daddr, family) ||
    991		    !xfrm_addr_equal(&x->props.saddr, saddr, family))
    992			continue;
    993
    994		if ((mark & x->mark.m) != x->mark.v)
    995			continue;
    996		if (!xfrm_state_hold_rcu(x))
    997			continue;
    998		return x;
    999	}
   1000
   1001	return NULL;
   1002}
   1003
   1004static inline struct xfrm_state *
   1005__xfrm_state_locate(struct xfrm_state *x, int use_spi, int family)
   1006{
   1007	struct net *net = xs_net(x);
   1008	u32 mark = x->mark.v & x->mark.m;
   1009
   1010	if (use_spi)
   1011		return __xfrm_state_lookup(net, mark, &x->id.daddr,
   1012					   x->id.spi, x->id.proto, family);
   1013	else
   1014		return __xfrm_state_lookup_byaddr(net, mark,
   1015						  &x->id.daddr,
   1016						  &x->props.saddr,
   1017						  x->id.proto, family);
   1018}
   1019
   1020static void xfrm_hash_grow_check(struct net *net, int have_hash_collision)
   1021{
   1022	if (have_hash_collision &&
   1023	    (net->xfrm.state_hmask + 1) < xfrm_state_hashmax &&
   1024	    net->xfrm.state_num > net->xfrm.state_hmask)
   1025		schedule_work(&net->xfrm.state_hash_work);
   1026}
   1027
   1028static void xfrm_state_look_at(struct xfrm_policy *pol, struct xfrm_state *x,
   1029			       const struct flowi *fl, unsigned short family,
   1030			       struct xfrm_state **best, int *acq_in_progress,
   1031			       int *error)
   1032{
   1033	/* Resolution logic:
   1034	 * 1. There is a valid state with matching selector. Done.
   1035	 * 2. Valid state with inappropriate selector. Skip.
   1036	 *
   1037	 * Entering area of "sysdeps".
   1038	 *
   1039	 * 3. If state is not valid, selector is temporary, it selects
   1040	 *    only session which triggered previous resolution. Key
   1041	 *    manager will do something to install a state with proper
   1042	 *    selector.
   1043	 */
   1044	if (x->km.state == XFRM_STATE_VALID) {
   1045		if ((x->sel.family &&
   1046		     (x->sel.family != family ||
   1047		      !xfrm_selector_match(&x->sel, fl, family))) ||
   1048		    !security_xfrm_state_pol_flow_match(x, pol,
   1049							&fl->u.__fl_common))
   1050			return;
   1051
   1052		if (!*best ||
   1053		    (*best)->km.dying > x->km.dying ||
   1054		    ((*best)->km.dying == x->km.dying &&
   1055		     (*best)->curlft.add_time < x->curlft.add_time))
   1056			*best = x;
   1057	} else if (x->km.state == XFRM_STATE_ACQ) {
   1058		*acq_in_progress = 1;
   1059	} else if (x->km.state == XFRM_STATE_ERROR ||
   1060		   x->km.state == XFRM_STATE_EXPIRED) {
   1061		if ((!x->sel.family ||
   1062		     (x->sel.family == family &&
   1063		      xfrm_selector_match(&x->sel, fl, family))) &&
   1064		    security_xfrm_state_pol_flow_match(x, pol,
   1065						       &fl->u.__fl_common))
   1066			*error = -ESRCH;
   1067	}
   1068}
   1069
   1070struct xfrm_state *
   1071xfrm_state_find(const xfrm_address_t *daddr, const xfrm_address_t *saddr,
   1072		const struct flowi *fl, struct xfrm_tmpl *tmpl,
   1073		struct xfrm_policy *pol, int *err,
   1074		unsigned short family, u32 if_id)
   1075{
   1076	static xfrm_address_t saddr_wildcard = { };
   1077	struct net *net = xp_net(pol);
   1078	unsigned int h, h_wildcard;
   1079	struct xfrm_state *x, *x0, *to_put;
   1080	int acquire_in_progress = 0;
   1081	int error = 0;
   1082	struct xfrm_state *best = NULL;
   1083	u32 mark = pol->mark.v & pol->mark.m;
   1084	unsigned short encap_family = tmpl->encap_family;
   1085	unsigned int sequence;
   1086	struct km_event c;
   1087
   1088	to_put = NULL;
   1089
   1090	sequence = read_seqcount_begin(&net->xfrm.xfrm_state_hash_generation);
   1091
   1092	rcu_read_lock();
   1093	h = xfrm_dst_hash(net, daddr, saddr, tmpl->reqid, encap_family);
   1094	hlist_for_each_entry_rcu(x, net->xfrm.state_bydst + h, bydst) {
   1095		if (x->props.family == encap_family &&
   1096		    x->props.reqid == tmpl->reqid &&
   1097		    (mark & x->mark.m) == x->mark.v &&
   1098		    x->if_id == if_id &&
   1099		    !(x->props.flags & XFRM_STATE_WILDRECV) &&
   1100		    xfrm_state_addr_check(x, daddr, saddr, encap_family) &&
   1101		    tmpl->mode == x->props.mode &&
   1102		    tmpl->id.proto == x->id.proto &&
   1103		    (tmpl->id.spi == x->id.spi || !tmpl->id.spi))
   1104			xfrm_state_look_at(pol, x, fl, family,
   1105					   &best, &acquire_in_progress, &error);
   1106	}
   1107	if (best || acquire_in_progress)
   1108		goto found;
   1109
   1110	h_wildcard = xfrm_dst_hash(net, daddr, &saddr_wildcard, tmpl->reqid, encap_family);
   1111	hlist_for_each_entry_rcu(x, net->xfrm.state_bydst + h_wildcard, bydst) {
   1112		if (x->props.family == encap_family &&
   1113		    x->props.reqid == tmpl->reqid &&
   1114		    (mark & x->mark.m) == x->mark.v &&
   1115		    x->if_id == if_id &&
   1116		    !(x->props.flags & XFRM_STATE_WILDRECV) &&
   1117		    xfrm_addr_equal(&x->id.daddr, daddr, encap_family) &&
   1118		    tmpl->mode == x->props.mode &&
   1119		    tmpl->id.proto == x->id.proto &&
   1120		    (tmpl->id.spi == x->id.spi || !tmpl->id.spi))
   1121			xfrm_state_look_at(pol, x, fl, family,
   1122					   &best, &acquire_in_progress, &error);
   1123	}
   1124
   1125found:
   1126	x = best;
   1127	if (!x && !error && !acquire_in_progress) {
   1128		if (tmpl->id.spi &&
   1129		    (x0 = __xfrm_state_lookup(net, mark, daddr, tmpl->id.spi,
   1130					      tmpl->id.proto, encap_family)) != NULL) {
   1131			to_put = x0;
   1132			error = -EEXIST;
   1133			goto out;
   1134		}
   1135
   1136		c.net = net;
   1137		/* If the KMs have no listeners (yet...), avoid allocating an SA
   1138		 * for each and every packet - garbage collection might not
   1139		 * handle the flood.
   1140		 */
   1141		if (!km_is_alive(&c)) {
   1142			error = -ESRCH;
   1143			goto out;
   1144		}
   1145
   1146		x = xfrm_state_alloc(net);
   1147		if (x == NULL) {
   1148			error = -ENOMEM;
   1149			goto out;
   1150		}
   1151		/* Initialize temporary state matching only
   1152		 * to current session. */
   1153		xfrm_init_tempstate(x, fl, tmpl, daddr, saddr, family);
   1154		memcpy(&x->mark, &pol->mark, sizeof(x->mark));
   1155		x->if_id = if_id;
   1156
   1157		error = security_xfrm_state_alloc_acquire(x, pol->security, fl->flowi_secid);
   1158		if (error) {
   1159			x->km.state = XFRM_STATE_DEAD;
   1160			to_put = x;
   1161			x = NULL;
   1162			goto out;
   1163		}
   1164
   1165		if (km_query(x, tmpl, pol) == 0) {
   1166			spin_lock_bh(&net->xfrm.xfrm_state_lock);
   1167			x->km.state = XFRM_STATE_ACQ;
   1168			list_add(&x->km.all, &net->xfrm.state_all);
   1169			hlist_add_head_rcu(&x->bydst, net->xfrm.state_bydst + h);
   1170			h = xfrm_src_hash(net, daddr, saddr, encap_family);
   1171			hlist_add_head_rcu(&x->bysrc, net->xfrm.state_bysrc + h);
   1172			if (x->id.spi) {
   1173				h = xfrm_spi_hash(net, &x->id.daddr, x->id.spi, x->id.proto, encap_family);
   1174				hlist_add_head_rcu(&x->byspi, net->xfrm.state_byspi + h);
   1175			}
   1176			if (x->km.seq) {
   1177				h = xfrm_seq_hash(net, x->km.seq);
   1178				hlist_add_head_rcu(&x->byseq, net->xfrm.state_byseq + h);
   1179			}
   1180			x->lft.hard_add_expires_seconds = net->xfrm.sysctl_acq_expires;
   1181			hrtimer_start(&x->mtimer,
   1182				      ktime_set(net->xfrm.sysctl_acq_expires, 0),
   1183				      HRTIMER_MODE_REL_SOFT);
   1184			net->xfrm.state_num++;
   1185			xfrm_hash_grow_check(net, x->bydst.next != NULL);
   1186			spin_unlock_bh(&net->xfrm.xfrm_state_lock);
   1187		} else {
   1188			x->km.state = XFRM_STATE_DEAD;
   1189			to_put = x;
   1190			x = NULL;
   1191			error = -ESRCH;
   1192		}
   1193	}
   1194out:
   1195	if (x) {
   1196		if (!xfrm_state_hold_rcu(x)) {
   1197			*err = -EAGAIN;
   1198			x = NULL;
   1199		}
   1200	} else {
   1201		*err = acquire_in_progress ? -EAGAIN : error;
   1202	}
   1203	rcu_read_unlock();
   1204	if (to_put)
   1205		xfrm_state_put(to_put);
   1206
   1207	if (read_seqcount_retry(&net->xfrm.xfrm_state_hash_generation, sequence)) {
   1208		*err = -EAGAIN;
   1209		if (x) {
   1210			xfrm_state_put(x);
   1211			x = NULL;
   1212		}
   1213	}
   1214
   1215	return x;
   1216}
   1217
   1218struct xfrm_state *
   1219xfrm_stateonly_find(struct net *net, u32 mark, u32 if_id,
   1220		    xfrm_address_t *daddr, xfrm_address_t *saddr,
   1221		    unsigned short family, u8 mode, u8 proto, u32 reqid)
   1222{
   1223	unsigned int h;
   1224	struct xfrm_state *rx = NULL, *x = NULL;
   1225
   1226	spin_lock_bh(&net->xfrm.xfrm_state_lock);
   1227	h = xfrm_dst_hash(net, daddr, saddr, reqid, family);
   1228	hlist_for_each_entry(x, net->xfrm.state_bydst+h, bydst) {
   1229		if (x->props.family == family &&
   1230		    x->props.reqid == reqid &&
   1231		    (mark & x->mark.m) == x->mark.v &&
   1232		    x->if_id == if_id &&
   1233		    !(x->props.flags & XFRM_STATE_WILDRECV) &&
   1234		    xfrm_state_addr_check(x, daddr, saddr, family) &&
   1235		    mode == x->props.mode &&
   1236		    proto == x->id.proto &&
   1237		    x->km.state == XFRM_STATE_VALID) {
   1238			rx = x;
   1239			break;
   1240		}
   1241	}
   1242
   1243	if (rx)
   1244		xfrm_state_hold(rx);
   1245	spin_unlock_bh(&net->xfrm.xfrm_state_lock);
   1246
   1247
   1248	return rx;
   1249}
   1250EXPORT_SYMBOL(xfrm_stateonly_find);
   1251
   1252struct xfrm_state *xfrm_state_lookup_byspi(struct net *net, __be32 spi,
   1253					      unsigned short family)
   1254{
   1255	struct xfrm_state *x;
   1256	struct xfrm_state_walk *w;
   1257
   1258	spin_lock_bh(&net->xfrm.xfrm_state_lock);
   1259	list_for_each_entry(w, &net->xfrm.state_all, all) {
   1260		x = container_of(w, struct xfrm_state, km);
   1261		if (x->props.family != family ||
   1262			x->id.spi != spi)
   1263			continue;
   1264
   1265		xfrm_state_hold(x);
   1266		spin_unlock_bh(&net->xfrm.xfrm_state_lock);
   1267		return x;
   1268	}
   1269	spin_unlock_bh(&net->xfrm.xfrm_state_lock);
   1270	return NULL;
   1271}
   1272EXPORT_SYMBOL(xfrm_state_lookup_byspi);
   1273
   1274static void __xfrm_state_insert(struct xfrm_state *x)
   1275{
   1276	struct net *net = xs_net(x);
   1277	unsigned int h;
   1278
   1279	list_add(&x->km.all, &net->xfrm.state_all);
   1280
   1281	h = xfrm_dst_hash(net, &x->id.daddr, &x->props.saddr,
   1282			  x->props.reqid, x->props.family);
   1283	hlist_add_head_rcu(&x->bydst, net->xfrm.state_bydst + h);
   1284
   1285	h = xfrm_src_hash(net, &x->id.daddr, &x->props.saddr, x->props.family);
   1286	hlist_add_head_rcu(&x->bysrc, net->xfrm.state_bysrc + h);
   1287
   1288	if (x->id.spi) {
   1289		h = xfrm_spi_hash(net, &x->id.daddr, x->id.spi, x->id.proto,
   1290				  x->props.family);
   1291
   1292		hlist_add_head_rcu(&x->byspi, net->xfrm.state_byspi + h);
   1293	}
   1294
   1295	if (x->km.seq) {
   1296		h = xfrm_seq_hash(net, x->km.seq);
   1297
   1298		hlist_add_head_rcu(&x->byseq, net->xfrm.state_byseq + h);
   1299	}
   1300
   1301	hrtimer_start(&x->mtimer, ktime_set(1, 0), HRTIMER_MODE_REL_SOFT);
   1302	if (x->replay_maxage)
   1303		mod_timer(&x->rtimer, jiffies + x->replay_maxage);
   1304
   1305	net->xfrm.state_num++;
   1306
   1307	xfrm_hash_grow_check(net, x->bydst.next != NULL);
   1308}
   1309
   1310/* net->xfrm.xfrm_state_lock is held */
   1311static void __xfrm_state_bump_genids(struct xfrm_state *xnew)
   1312{
   1313	struct net *net = xs_net(xnew);
   1314	unsigned short family = xnew->props.family;
   1315	u32 reqid = xnew->props.reqid;
   1316	struct xfrm_state *x;
   1317	unsigned int h;
   1318	u32 mark = xnew->mark.v & xnew->mark.m;
   1319	u32 if_id = xnew->if_id;
   1320
   1321	h = xfrm_dst_hash(net, &xnew->id.daddr, &xnew->props.saddr, reqid, family);
   1322	hlist_for_each_entry(x, net->xfrm.state_bydst+h, bydst) {
   1323		if (x->props.family	== family &&
   1324		    x->props.reqid	== reqid &&
   1325		    x->if_id		== if_id &&
   1326		    (mark & x->mark.m) == x->mark.v &&
   1327		    xfrm_addr_equal(&x->id.daddr, &xnew->id.daddr, family) &&
   1328		    xfrm_addr_equal(&x->props.saddr, &xnew->props.saddr, family))
   1329			x->genid++;
   1330	}
   1331}
   1332
   1333void xfrm_state_insert(struct xfrm_state *x)
   1334{
   1335	struct net *net = xs_net(x);
   1336
   1337	spin_lock_bh(&net->xfrm.xfrm_state_lock);
   1338	__xfrm_state_bump_genids(x);
   1339	__xfrm_state_insert(x);
   1340	spin_unlock_bh(&net->xfrm.xfrm_state_lock);
   1341}
   1342EXPORT_SYMBOL(xfrm_state_insert);
   1343
   1344/* net->xfrm.xfrm_state_lock is held */
   1345static struct xfrm_state *__find_acq_core(struct net *net,
   1346					  const struct xfrm_mark *m,
   1347					  unsigned short family, u8 mode,
   1348					  u32 reqid, u32 if_id, u8 proto,
   1349					  const xfrm_address_t *daddr,
   1350					  const xfrm_address_t *saddr,
   1351					  int create)
   1352{
   1353	unsigned int h = xfrm_dst_hash(net, daddr, saddr, reqid, family);
   1354	struct xfrm_state *x;
   1355	u32 mark = m->v & m->m;
   1356
   1357	hlist_for_each_entry(x, net->xfrm.state_bydst+h, bydst) {
   1358		if (x->props.reqid  != reqid ||
   1359		    x->props.mode   != mode ||
   1360		    x->props.family != family ||
   1361		    x->km.state     != XFRM_STATE_ACQ ||
   1362		    x->id.spi       != 0 ||
   1363		    x->id.proto	    != proto ||
   1364		    (mark & x->mark.m) != x->mark.v ||
   1365		    !xfrm_addr_equal(&x->id.daddr, daddr, family) ||
   1366		    !xfrm_addr_equal(&x->props.saddr, saddr, family))
   1367			continue;
   1368
   1369		xfrm_state_hold(x);
   1370		return x;
   1371	}
   1372
   1373	if (!create)
   1374		return NULL;
   1375
   1376	x = xfrm_state_alloc(net);
   1377	if (likely(x)) {
   1378		switch (family) {
   1379		case AF_INET:
   1380			x->sel.daddr.a4 = daddr->a4;
   1381			x->sel.saddr.a4 = saddr->a4;
   1382			x->sel.prefixlen_d = 32;
   1383			x->sel.prefixlen_s = 32;
   1384			x->props.saddr.a4 = saddr->a4;
   1385			x->id.daddr.a4 = daddr->a4;
   1386			break;
   1387
   1388		case AF_INET6:
   1389			x->sel.daddr.in6 = daddr->in6;
   1390			x->sel.saddr.in6 = saddr->in6;
   1391			x->sel.prefixlen_d = 128;
   1392			x->sel.prefixlen_s = 128;
   1393			x->props.saddr.in6 = saddr->in6;
   1394			x->id.daddr.in6 = daddr->in6;
   1395			break;
   1396		}
   1397
   1398		x->km.state = XFRM_STATE_ACQ;
   1399		x->id.proto = proto;
   1400		x->props.family = family;
   1401		x->props.mode = mode;
   1402		x->props.reqid = reqid;
   1403		x->if_id = if_id;
   1404		x->mark.v = m->v;
   1405		x->mark.m = m->m;
   1406		x->lft.hard_add_expires_seconds = net->xfrm.sysctl_acq_expires;
   1407		xfrm_state_hold(x);
   1408		hrtimer_start(&x->mtimer,
   1409			      ktime_set(net->xfrm.sysctl_acq_expires, 0),
   1410			      HRTIMER_MODE_REL_SOFT);
   1411		list_add(&x->km.all, &net->xfrm.state_all);
   1412		hlist_add_head_rcu(&x->bydst, net->xfrm.state_bydst + h);
   1413		h = xfrm_src_hash(net, daddr, saddr, family);
   1414		hlist_add_head_rcu(&x->bysrc, net->xfrm.state_bysrc + h);
   1415
   1416		net->xfrm.state_num++;
   1417
   1418		xfrm_hash_grow_check(net, x->bydst.next != NULL);
   1419	}
   1420
   1421	return x;
   1422}
   1423
   1424static struct xfrm_state *__xfrm_find_acq_byseq(struct net *net, u32 mark, u32 seq);
   1425
   1426int xfrm_state_add(struct xfrm_state *x)
   1427{
   1428	struct net *net = xs_net(x);
   1429	struct xfrm_state *x1, *to_put;
   1430	int family;
   1431	int err;
   1432	u32 mark = x->mark.v & x->mark.m;
   1433	int use_spi = xfrm_id_proto_match(x->id.proto, IPSEC_PROTO_ANY);
   1434
   1435	family = x->props.family;
   1436
   1437	to_put = NULL;
   1438
   1439	spin_lock_bh(&net->xfrm.xfrm_state_lock);
   1440
   1441	x1 = __xfrm_state_locate(x, use_spi, family);
   1442	if (x1) {
   1443		to_put = x1;
   1444		x1 = NULL;
   1445		err = -EEXIST;
   1446		goto out;
   1447	}
   1448
   1449	if (use_spi && x->km.seq) {
   1450		x1 = __xfrm_find_acq_byseq(net, mark, x->km.seq);
   1451		if (x1 && ((x1->id.proto != x->id.proto) ||
   1452		    !xfrm_addr_equal(&x1->id.daddr, &x->id.daddr, family))) {
   1453			to_put = x1;
   1454			x1 = NULL;
   1455		}
   1456	}
   1457
   1458	if (use_spi && !x1)
   1459		x1 = __find_acq_core(net, &x->mark, family, x->props.mode,
   1460				     x->props.reqid, x->if_id, x->id.proto,
   1461				     &x->id.daddr, &x->props.saddr, 0);
   1462
   1463	__xfrm_state_bump_genids(x);
   1464	__xfrm_state_insert(x);
   1465	err = 0;
   1466
   1467out:
   1468	spin_unlock_bh(&net->xfrm.xfrm_state_lock);
   1469
   1470	if (x1) {
   1471		xfrm_state_delete(x1);
   1472		xfrm_state_put(x1);
   1473	}
   1474
   1475	if (to_put)
   1476		xfrm_state_put(to_put);
   1477
   1478	return err;
   1479}
   1480EXPORT_SYMBOL(xfrm_state_add);
   1481
   1482#ifdef CONFIG_XFRM_MIGRATE
   1483static inline int clone_security(struct xfrm_state *x, struct xfrm_sec_ctx *security)
   1484{
   1485	struct xfrm_user_sec_ctx *uctx;
   1486	int size = sizeof(*uctx) + security->ctx_len;
   1487	int err;
   1488
   1489	uctx = kmalloc(size, GFP_KERNEL);
   1490	if (!uctx)
   1491		return -ENOMEM;
   1492
   1493	uctx->exttype = XFRMA_SEC_CTX;
   1494	uctx->len = size;
   1495	uctx->ctx_doi = security->ctx_doi;
   1496	uctx->ctx_alg = security->ctx_alg;
   1497	uctx->ctx_len = security->ctx_len;
   1498	memcpy(uctx + 1, security->ctx_str, security->ctx_len);
   1499	err = security_xfrm_state_alloc(x, uctx);
   1500	kfree(uctx);
   1501	if (err)
   1502		return err;
   1503
   1504	return 0;
   1505}
   1506
   1507static struct xfrm_state *xfrm_state_clone(struct xfrm_state *orig,
   1508					   struct xfrm_encap_tmpl *encap)
   1509{
   1510	struct net *net = xs_net(orig);
   1511	struct xfrm_state *x = xfrm_state_alloc(net);
   1512	if (!x)
   1513		goto out;
   1514
   1515	memcpy(&x->id, &orig->id, sizeof(x->id));
   1516	memcpy(&x->sel, &orig->sel, sizeof(x->sel));
   1517	memcpy(&x->lft, &orig->lft, sizeof(x->lft));
   1518	x->props.mode = orig->props.mode;
   1519	x->props.replay_window = orig->props.replay_window;
   1520	x->props.reqid = orig->props.reqid;
   1521	x->props.family = orig->props.family;
   1522	x->props.saddr = orig->props.saddr;
   1523
   1524	if (orig->aalg) {
   1525		x->aalg = xfrm_algo_auth_clone(orig->aalg);
   1526		if (!x->aalg)
   1527			goto error;
   1528	}
   1529	x->props.aalgo = orig->props.aalgo;
   1530
   1531	if (orig->aead) {
   1532		x->aead = xfrm_algo_aead_clone(orig->aead);
   1533		x->geniv = orig->geniv;
   1534		if (!x->aead)
   1535			goto error;
   1536	}
   1537	if (orig->ealg) {
   1538		x->ealg = xfrm_algo_clone(orig->ealg);
   1539		if (!x->ealg)
   1540			goto error;
   1541	}
   1542	x->props.ealgo = orig->props.ealgo;
   1543
   1544	if (orig->calg) {
   1545		x->calg = xfrm_algo_clone(orig->calg);
   1546		if (!x->calg)
   1547			goto error;
   1548	}
   1549	x->props.calgo = orig->props.calgo;
   1550
   1551	if (encap || orig->encap) {
   1552		if (encap)
   1553			x->encap = kmemdup(encap, sizeof(*x->encap),
   1554					GFP_KERNEL);
   1555		else
   1556			x->encap = kmemdup(orig->encap, sizeof(*x->encap),
   1557					GFP_KERNEL);
   1558
   1559		if (!x->encap)
   1560			goto error;
   1561	}
   1562
   1563	if (orig->security)
   1564		if (clone_security(x, orig->security))
   1565			goto error;
   1566
   1567	if (orig->coaddr) {
   1568		x->coaddr = kmemdup(orig->coaddr, sizeof(*x->coaddr),
   1569				    GFP_KERNEL);
   1570		if (!x->coaddr)
   1571			goto error;
   1572	}
   1573
   1574	if (orig->replay_esn) {
   1575		if (xfrm_replay_clone(x, orig))
   1576			goto error;
   1577	}
   1578
   1579	memcpy(&x->mark, &orig->mark, sizeof(x->mark));
   1580	memcpy(&x->props.smark, &orig->props.smark, sizeof(x->props.smark));
   1581
   1582	x->props.flags = orig->props.flags;
   1583	x->props.extra_flags = orig->props.extra_flags;
   1584
   1585	x->if_id = orig->if_id;
   1586	x->tfcpad = orig->tfcpad;
   1587	x->replay_maxdiff = orig->replay_maxdiff;
   1588	x->replay_maxage = orig->replay_maxage;
   1589	memcpy(&x->curlft, &orig->curlft, sizeof(x->curlft));
   1590	x->km.state = orig->km.state;
   1591	x->km.seq = orig->km.seq;
   1592	x->replay = orig->replay;
   1593	x->preplay = orig->preplay;
   1594	x->mapping_maxage = orig->mapping_maxage;
   1595	x->new_mapping = 0;
   1596	x->new_mapping_sport = 0;
   1597
   1598	return x;
   1599
   1600 error:
   1601	xfrm_state_put(x);
   1602out:
   1603	return NULL;
   1604}
   1605
   1606struct xfrm_state *xfrm_migrate_state_find(struct xfrm_migrate *m, struct net *net,
   1607						u32 if_id)
   1608{
   1609	unsigned int h;
   1610	struct xfrm_state *x = NULL;
   1611
   1612	spin_lock_bh(&net->xfrm.xfrm_state_lock);
   1613
   1614	if (m->reqid) {
   1615		h = xfrm_dst_hash(net, &m->old_daddr, &m->old_saddr,
   1616				  m->reqid, m->old_family);
   1617		hlist_for_each_entry(x, net->xfrm.state_bydst+h, bydst) {
   1618			if (x->props.mode != m->mode ||
   1619			    x->id.proto != m->proto)
   1620				continue;
   1621			if (m->reqid && x->props.reqid != m->reqid)
   1622				continue;
   1623			if (if_id != 0 && x->if_id != if_id)
   1624				continue;
   1625			if (!xfrm_addr_equal(&x->id.daddr, &m->old_daddr,
   1626					     m->old_family) ||
   1627			    !xfrm_addr_equal(&x->props.saddr, &m->old_saddr,
   1628					     m->old_family))
   1629				continue;
   1630			xfrm_state_hold(x);
   1631			break;
   1632		}
   1633	} else {
   1634		h = xfrm_src_hash(net, &m->old_daddr, &m->old_saddr,
   1635				  m->old_family);
   1636		hlist_for_each_entry(x, net->xfrm.state_bysrc+h, bysrc) {
   1637			if (x->props.mode != m->mode ||
   1638			    x->id.proto != m->proto)
   1639				continue;
   1640			if (if_id != 0 && x->if_id != if_id)
   1641				continue;
   1642			if (!xfrm_addr_equal(&x->id.daddr, &m->old_daddr,
   1643					     m->old_family) ||
   1644			    !xfrm_addr_equal(&x->props.saddr, &m->old_saddr,
   1645					     m->old_family))
   1646				continue;
   1647			xfrm_state_hold(x);
   1648			break;
   1649		}
   1650	}
   1651
   1652	spin_unlock_bh(&net->xfrm.xfrm_state_lock);
   1653
   1654	return x;
   1655}
   1656EXPORT_SYMBOL(xfrm_migrate_state_find);
   1657
   1658struct xfrm_state *xfrm_state_migrate(struct xfrm_state *x,
   1659				      struct xfrm_migrate *m,
   1660				      struct xfrm_encap_tmpl *encap)
   1661{
   1662	struct xfrm_state *xc;
   1663
   1664	xc = xfrm_state_clone(x, encap);
   1665	if (!xc)
   1666		return NULL;
   1667
   1668	xc->props.family = m->new_family;
   1669
   1670	if (xfrm_init_state(xc) < 0)
   1671		goto error;
   1672
   1673	memcpy(&xc->id.daddr, &m->new_daddr, sizeof(xc->id.daddr));
   1674	memcpy(&xc->props.saddr, &m->new_saddr, sizeof(xc->props.saddr));
   1675
   1676	/* add state */
   1677	if (xfrm_addr_equal(&x->id.daddr, &m->new_daddr, m->new_family)) {
   1678		/* a care is needed when the destination address of the
   1679		   state is to be updated as it is a part of triplet */
   1680		xfrm_state_insert(xc);
   1681	} else {
   1682		if (xfrm_state_add(xc) < 0)
   1683			goto error;
   1684	}
   1685
   1686	return xc;
   1687error:
   1688	xfrm_state_put(xc);
   1689	return NULL;
   1690}
   1691EXPORT_SYMBOL(xfrm_state_migrate);
   1692#endif
   1693
   1694int xfrm_state_update(struct xfrm_state *x)
   1695{
   1696	struct xfrm_state *x1, *to_put;
   1697	int err;
   1698	int use_spi = xfrm_id_proto_match(x->id.proto, IPSEC_PROTO_ANY);
   1699	struct net *net = xs_net(x);
   1700
   1701	to_put = NULL;
   1702
   1703	spin_lock_bh(&net->xfrm.xfrm_state_lock);
   1704	x1 = __xfrm_state_locate(x, use_spi, x->props.family);
   1705
   1706	err = -ESRCH;
   1707	if (!x1)
   1708		goto out;
   1709
   1710	if (xfrm_state_kern(x1)) {
   1711		to_put = x1;
   1712		err = -EEXIST;
   1713		goto out;
   1714	}
   1715
   1716	if (x1->km.state == XFRM_STATE_ACQ) {
   1717		__xfrm_state_insert(x);
   1718		x = NULL;
   1719	}
   1720	err = 0;
   1721
   1722out:
   1723	spin_unlock_bh(&net->xfrm.xfrm_state_lock);
   1724
   1725	if (to_put)
   1726		xfrm_state_put(to_put);
   1727
   1728	if (err)
   1729		return err;
   1730
   1731	if (!x) {
   1732		xfrm_state_delete(x1);
   1733		xfrm_state_put(x1);
   1734		return 0;
   1735	}
   1736
   1737	err = -EINVAL;
   1738	spin_lock_bh(&x1->lock);
   1739	if (likely(x1->km.state == XFRM_STATE_VALID)) {
   1740		if (x->encap && x1->encap &&
   1741		    x->encap->encap_type == x1->encap->encap_type)
   1742			memcpy(x1->encap, x->encap, sizeof(*x1->encap));
   1743		else if (x->encap || x1->encap)
   1744			goto fail;
   1745
   1746		if (x->coaddr && x1->coaddr) {
   1747			memcpy(x1->coaddr, x->coaddr, sizeof(*x1->coaddr));
   1748		}
   1749		if (!use_spi && memcmp(&x1->sel, &x->sel, sizeof(x1->sel)))
   1750			memcpy(&x1->sel, &x->sel, sizeof(x1->sel));
   1751		memcpy(&x1->lft, &x->lft, sizeof(x1->lft));
   1752		x1->km.dying = 0;
   1753
   1754		hrtimer_start(&x1->mtimer, ktime_set(1, 0),
   1755			      HRTIMER_MODE_REL_SOFT);
   1756		if (x1->curlft.use_time)
   1757			xfrm_state_check_expire(x1);
   1758
   1759		if (x->props.smark.m || x->props.smark.v || x->if_id) {
   1760			spin_lock_bh(&net->xfrm.xfrm_state_lock);
   1761
   1762			if (x->props.smark.m || x->props.smark.v)
   1763				x1->props.smark = x->props.smark;
   1764
   1765			if (x->if_id)
   1766				x1->if_id = x->if_id;
   1767
   1768			__xfrm_state_bump_genids(x1);
   1769			spin_unlock_bh(&net->xfrm.xfrm_state_lock);
   1770		}
   1771
   1772		err = 0;
   1773		x->km.state = XFRM_STATE_DEAD;
   1774		__xfrm_state_put(x);
   1775	}
   1776
   1777fail:
   1778	spin_unlock_bh(&x1->lock);
   1779
   1780	xfrm_state_put(x1);
   1781
   1782	return err;
   1783}
   1784EXPORT_SYMBOL(xfrm_state_update);
   1785
   1786int xfrm_state_check_expire(struct xfrm_state *x)
   1787{
   1788	if (!x->curlft.use_time)
   1789		x->curlft.use_time = ktime_get_real_seconds();
   1790
   1791	if (x->curlft.bytes >= x->lft.hard_byte_limit ||
   1792	    x->curlft.packets >= x->lft.hard_packet_limit) {
   1793		x->km.state = XFRM_STATE_EXPIRED;
   1794		hrtimer_start(&x->mtimer, 0, HRTIMER_MODE_REL_SOFT);
   1795		return -EINVAL;
   1796	}
   1797
   1798	if (!x->km.dying &&
   1799	    (x->curlft.bytes >= x->lft.soft_byte_limit ||
   1800	     x->curlft.packets >= x->lft.soft_packet_limit)) {
   1801		x->km.dying = 1;
   1802		km_state_expired(x, 0, 0);
   1803	}
   1804	return 0;
   1805}
   1806EXPORT_SYMBOL(xfrm_state_check_expire);
   1807
   1808struct xfrm_state *
   1809xfrm_state_lookup(struct net *net, u32 mark, const xfrm_address_t *daddr, __be32 spi,
   1810		  u8 proto, unsigned short family)
   1811{
   1812	struct xfrm_state *x;
   1813
   1814	rcu_read_lock();
   1815	x = __xfrm_state_lookup(net, mark, daddr, spi, proto, family);
   1816	rcu_read_unlock();
   1817	return x;
   1818}
   1819EXPORT_SYMBOL(xfrm_state_lookup);
   1820
   1821struct xfrm_state *
   1822xfrm_state_lookup_byaddr(struct net *net, u32 mark,
   1823			 const xfrm_address_t *daddr, const xfrm_address_t *saddr,
   1824			 u8 proto, unsigned short family)
   1825{
   1826	struct xfrm_state *x;
   1827
   1828	spin_lock_bh(&net->xfrm.xfrm_state_lock);
   1829	x = __xfrm_state_lookup_byaddr(net, mark, daddr, saddr, proto, family);
   1830	spin_unlock_bh(&net->xfrm.xfrm_state_lock);
   1831	return x;
   1832}
   1833EXPORT_SYMBOL(xfrm_state_lookup_byaddr);
   1834
   1835struct xfrm_state *
   1836xfrm_find_acq(struct net *net, const struct xfrm_mark *mark, u8 mode, u32 reqid,
   1837	      u32 if_id, u8 proto, const xfrm_address_t *daddr,
   1838	      const xfrm_address_t *saddr, int create, unsigned short family)
   1839{
   1840	struct xfrm_state *x;
   1841
   1842	spin_lock_bh(&net->xfrm.xfrm_state_lock);
   1843	x = __find_acq_core(net, mark, family, mode, reqid, if_id, proto, daddr, saddr, create);
   1844	spin_unlock_bh(&net->xfrm.xfrm_state_lock);
   1845
   1846	return x;
   1847}
   1848EXPORT_SYMBOL(xfrm_find_acq);
   1849
   1850#ifdef CONFIG_XFRM_SUB_POLICY
   1851#if IS_ENABLED(CONFIG_IPV6)
   1852/* distribution counting sort function for xfrm_state and xfrm_tmpl */
   1853static void
   1854__xfrm6_sort(void **dst, void **src, int n,
   1855	     int (*cmp)(const void *p), int maxclass)
   1856{
   1857	int count[XFRM_MAX_DEPTH] = { };
   1858	int class[XFRM_MAX_DEPTH];
   1859	int i;
   1860
   1861	for (i = 0; i < n; i++) {
   1862		int c = cmp(src[i]);
   1863
   1864		class[i] = c;
   1865		count[c]++;
   1866	}
   1867
   1868	for (i = 2; i < maxclass; i++)
   1869		count[i] += count[i - 1];
   1870
   1871	for (i = 0; i < n; i++) {
   1872		dst[count[class[i] - 1]++] = src[i];
   1873		src[i] = NULL;
   1874	}
   1875}
   1876
   1877/* Rule for xfrm_state:
   1878 *
   1879 * rule 1: select IPsec transport except AH
   1880 * rule 2: select MIPv6 RO or inbound trigger
   1881 * rule 3: select IPsec transport AH
   1882 * rule 4: select IPsec tunnel
   1883 * rule 5: others
   1884 */
   1885static int __xfrm6_state_sort_cmp(const void *p)
   1886{
   1887	const struct xfrm_state *v = p;
   1888
   1889	switch (v->props.mode) {
   1890	case XFRM_MODE_TRANSPORT:
   1891		if (v->id.proto != IPPROTO_AH)
   1892			return 1;
   1893		else
   1894			return 3;
   1895#if IS_ENABLED(CONFIG_IPV6_MIP6)
   1896	case XFRM_MODE_ROUTEOPTIMIZATION:
   1897	case XFRM_MODE_IN_TRIGGER:
   1898		return 2;
   1899#endif
   1900	case XFRM_MODE_TUNNEL:
   1901	case XFRM_MODE_BEET:
   1902		return 4;
   1903	}
   1904	return 5;
   1905}
   1906
   1907/* Rule for xfrm_tmpl:
   1908 *
   1909 * rule 1: select IPsec transport
   1910 * rule 2: select MIPv6 RO or inbound trigger
   1911 * rule 3: select IPsec tunnel
   1912 * rule 4: others
   1913 */
   1914static int __xfrm6_tmpl_sort_cmp(const void *p)
   1915{
   1916	const struct xfrm_tmpl *v = p;
   1917
   1918	switch (v->mode) {
   1919	case XFRM_MODE_TRANSPORT:
   1920		return 1;
   1921#if IS_ENABLED(CONFIG_IPV6_MIP6)
   1922	case XFRM_MODE_ROUTEOPTIMIZATION:
   1923	case XFRM_MODE_IN_TRIGGER:
   1924		return 2;
   1925#endif
   1926	case XFRM_MODE_TUNNEL:
   1927	case XFRM_MODE_BEET:
   1928		return 3;
   1929	}
   1930	return 4;
   1931}
   1932#else
   1933static inline int __xfrm6_state_sort_cmp(const void *p) { return 5; }
   1934static inline int __xfrm6_tmpl_sort_cmp(const void *p) { return 4; }
   1935
   1936static inline void
   1937__xfrm6_sort(void **dst, void **src, int n,
   1938	     int (*cmp)(const void *p), int maxclass)
   1939{
   1940	int i;
   1941
   1942	for (i = 0; i < n; i++)
   1943		dst[i] = src[i];
   1944}
   1945#endif /* CONFIG_IPV6 */
   1946
   1947void
   1948xfrm_tmpl_sort(struct xfrm_tmpl **dst, struct xfrm_tmpl **src, int n,
   1949	       unsigned short family)
   1950{
   1951	int i;
   1952
   1953	if (family == AF_INET6)
   1954		__xfrm6_sort((void **)dst, (void **)src, n,
   1955			     __xfrm6_tmpl_sort_cmp, 5);
   1956	else
   1957		for (i = 0; i < n; i++)
   1958			dst[i] = src[i];
   1959}
   1960
   1961void
   1962xfrm_state_sort(struct xfrm_state **dst, struct xfrm_state **src, int n,
   1963		unsigned short family)
   1964{
   1965	int i;
   1966
   1967	if (family == AF_INET6)
   1968		__xfrm6_sort((void **)dst, (void **)src, n,
   1969			     __xfrm6_state_sort_cmp, 6);
   1970	else
   1971		for (i = 0; i < n; i++)
   1972			dst[i] = src[i];
   1973}
   1974#endif
   1975
   1976/* Silly enough, but I'm lazy to build resolution list */
   1977
   1978static struct xfrm_state *__xfrm_find_acq_byseq(struct net *net, u32 mark, u32 seq)
   1979{
   1980	unsigned int h = xfrm_seq_hash(net, seq);
   1981	struct xfrm_state *x;
   1982
   1983	hlist_for_each_entry_rcu(x, net->xfrm.state_byseq + h, byseq) {
   1984		if (x->km.seq == seq &&
   1985		    (mark & x->mark.m) == x->mark.v &&
   1986		    x->km.state == XFRM_STATE_ACQ) {
   1987			xfrm_state_hold(x);
   1988			return x;
   1989		}
   1990	}
   1991
   1992	return NULL;
   1993}
   1994
   1995struct xfrm_state *xfrm_find_acq_byseq(struct net *net, u32 mark, u32 seq)
   1996{
   1997	struct xfrm_state *x;
   1998
   1999	spin_lock_bh(&net->xfrm.xfrm_state_lock);
   2000	x = __xfrm_find_acq_byseq(net, mark, seq);
   2001	spin_unlock_bh(&net->xfrm.xfrm_state_lock);
   2002	return x;
   2003}
   2004EXPORT_SYMBOL(xfrm_find_acq_byseq);
   2005
   2006u32 xfrm_get_acqseq(void)
   2007{
   2008	u32 res;
   2009	static atomic_t acqseq;
   2010
   2011	do {
   2012		res = atomic_inc_return(&acqseq);
   2013	} while (!res);
   2014
   2015	return res;
   2016}
   2017EXPORT_SYMBOL(xfrm_get_acqseq);
   2018
   2019int verify_spi_info(u8 proto, u32 min, u32 max)
   2020{
   2021	switch (proto) {
   2022	case IPPROTO_AH:
   2023	case IPPROTO_ESP:
   2024		break;
   2025
   2026	case IPPROTO_COMP:
   2027		/* IPCOMP spi is 16-bits. */
   2028		if (max >= 0x10000)
   2029			return -EINVAL;
   2030		break;
   2031
   2032	default:
   2033		return -EINVAL;
   2034	}
   2035
   2036	if (min > max)
   2037		return -EINVAL;
   2038
   2039	return 0;
   2040}
   2041EXPORT_SYMBOL(verify_spi_info);
   2042
   2043int xfrm_alloc_spi(struct xfrm_state *x, u32 low, u32 high)
   2044{
   2045	struct net *net = xs_net(x);
   2046	unsigned int h;
   2047	struct xfrm_state *x0;
   2048	int err = -ENOENT;
   2049	__be32 minspi = htonl(low);
   2050	__be32 maxspi = htonl(high);
   2051	__be32 newspi = 0;
   2052	u32 mark = x->mark.v & x->mark.m;
   2053
   2054	spin_lock_bh(&x->lock);
   2055	if (x->km.state == XFRM_STATE_DEAD)
   2056		goto unlock;
   2057
   2058	err = 0;
   2059	if (x->id.spi)
   2060		goto unlock;
   2061
   2062	err = -ENOENT;
   2063
   2064	if (minspi == maxspi) {
   2065		x0 = xfrm_state_lookup(net, mark, &x->id.daddr, minspi, x->id.proto, x->props.family);
   2066		if (x0) {
   2067			xfrm_state_put(x0);
   2068			goto unlock;
   2069		}
   2070		newspi = minspi;
   2071	} else {
   2072		u32 spi = 0;
   2073		for (h = 0; h < high-low+1; h++) {
   2074			spi = low + prandom_u32()%(high-low+1);
   2075			x0 = xfrm_state_lookup(net, mark, &x->id.daddr, htonl(spi), x->id.proto, x->props.family);
   2076			if (x0 == NULL) {
   2077				newspi = htonl(spi);
   2078				break;
   2079			}
   2080			xfrm_state_put(x0);
   2081		}
   2082	}
   2083	if (newspi) {
   2084		spin_lock_bh(&net->xfrm.xfrm_state_lock);
   2085		x->id.spi = newspi;
   2086		h = xfrm_spi_hash(net, &x->id.daddr, x->id.spi, x->id.proto, x->props.family);
   2087		hlist_add_head_rcu(&x->byspi, net->xfrm.state_byspi + h);
   2088		spin_unlock_bh(&net->xfrm.xfrm_state_lock);
   2089
   2090		err = 0;
   2091	}
   2092
   2093unlock:
   2094	spin_unlock_bh(&x->lock);
   2095
   2096	return err;
   2097}
   2098EXPORT_SYMBOL(xfrm_alloc_spi);
   2099
   2100static bool __xfrm_state_filter_match(struct xfrm_state *x,
   2101				      struct xfrm_address_filter *filter)
   2102{
   2103	if (filter) {
   2104		if ((filter->family == AF_INET ||
   2105		     filter->family == AF_INET6) &&
   2106		    x->props.family != filter->family)
   2107			return false;
   2108
   2109		return addr_match(&x->props.saddr, &filter->saddr,
   2110				  filter->splen) &&
   2111		       addr_match(&x->id.daddr, &filter->daddr,
   2112				  filter->dplen);
   2113	}
   2114	return true;
   2115}
   2116
   2117int xfrm_state_walk(struct net *net, struct xfrm_state_walk *walk,
   2118		    int (*func)(struct xfrm_state *, int, void*),
   2119		    void *data)
   2120{
   2121	struct xfrm_state *state;
   2122	struct xfrm_state_walk *x;
   2123	int err = 0;
   2124
   2125	if (walk->seq != 0 && list_empty(&walk->all))
   2126		return 0;
   2127
   2128	spin_lock_bh(&net->xfrm.xfrm_state_lock);
   2129	if (list_empty(&walk->all))
   2130		x = list_first_entry(&net->xfrm.state_all, struct xfrm_state_walk, all);
   2131	else
   2132		x = list_first_entry(&walk->all, struct xfrm_state_walk, all);
   2133	list_for_each_entry_from(x, &net->xfrm.state_all, all) {
   2134		if (x->state == XFRM_STATE_DEAD)
   2135			continue;
   2136		state = container_of(x, struct xfrm_state, km);
   2137		if (!xfrm_id_proto_match(state->id.proto, walk->proto))
   2138			continue;
   2139		if (!__xfrm_state_filter_match(state, walk->filter))
   2140			continue;
   2141		err = func(state, walk->seq, data);
   2142		if (err) {
   2143			list_move_tail(&walk->all, &x->all);
   2144			goto out;
   2145		}
   2146		walk->seq++;
   2147	}
   2148	if (walk->seq == 0) {
   2149		err = -ENOENT;
   2150		goto out;
   2151	}
   2152	list_del_init(&walk->all);
   2153out:
   2154	spin_unlock_bh(&net->xfrm.xfrm_state_lock);
   2155	return err;
   2156}
   2157EXPORT_SYMBOL(xfrm_state_walk);
   2158
   2159void xfrm_state_walk_init(struct xfrm_state_walk *walk, u8 proto,
   2160			  struct xfrm_address_filter *filter)
   2161{
   2162	INIT_LIST_HEAD(&walk->all);
   2163	walk->proto = proto;
   2164	walk->state = XFRM_STATE_DEAD;
   2165	walk->seq = 0;
   2166	walk->filter = filter;
   2167}
   2168EXPORT_SYMBOL(xfrm_state_walk_init);
   2169
   2170void xfrm_state_walk_done(struct xfrm_state_walk *walk, struct net *net)
   2171{
   2172	kfree(walk->filter);
   2173
   2174	if (list_empty(&walk->all))
   2175		return;
   2176
   2177	spin_lock_bh(&net->xfrm.xfrm_state_lock);
   2178	list_del(&walk->all);
   2179	spin_unlock_bh(&net->xfrm.xfrm_state_lock);
   2180}
   2181EXPORT_SYMBOL(xfrm_state_walk_done);
   2182
   2183static void xfrm_replay_timer_handler(struct timer_list *t)
   2184{
   2185	struct xfrm_state *x = from_timer(x, t, rtimer);
   2186
   2187	spin_lock(&x->lock);
   2188
   2189	if (x->km.state == XFRM_STATE_VALID) {
   2190		if (xfrm_aevent_is_on(xs_net(x)))
   2191			xfrm_replay_notify(x, XFRM_REPLAY_TIMEOUT);
   2192		else
   2193			x->xflags |= XFRM_TIME_DEFER;
   2194	}
   2195
   2196	spin_unlock(&x->lock);
   2197}
   2198
   2199static LIST_HEAD(xfrm_km_list);
   2200
   2201void km_policy_notify(struct xfrm_policy *xp, int dir, const struct km_event *c)
   2202{
   2203	struct xfrm_mgr *km;
   2204
   2205	rcu_read_lock();
   2206	list_for_each_entry_rcu(km, &xfrm_km_list, list)
   2207		if (km->notify_policy)
   2208			km->notify_policy(xp, dir, c);
   2209	rcu_read_unlock();
   2210}
   2211
   2212void km_state_notify(struct xfrm_state *x, const struct km_event *c)
   2213{
   2214	struct xfrm_mgr *km;
   2215	rcu_read_lock();
   2216	list_for_each_entry_rcu(km, &xfrm_km_list, list)
   2217		if (km->notify)
   2218			km->notify(x, c);
   2219	rcu_read_unlock();
   2220}
   2221
   2222EXPORT_SYMBOL(km_policy_notify);
   2223EXPORT_SYMBOL(km_state_notify);
   2224
   2225void km_state_expired(struct xfrm_state *x, int hard, u32 portid)
   2226{
   2227	struct km_event c;
   2228
   2229	c.data.hard = hard;
   2230	c.portid = portid;
   2231	c.event = XFRM_MSG_EXPIRE;
   2232	km_state_notify(x, &c);
   2233}
   2234
   2235EXPORT_SYMBOL(km_state_expired);
   2236/*
   2237 * We send to all registered managers regardless of failure
   2238 * We are happy with one success
   2239*/
   2240int km_query(struct xfrm_state *x, struct xfrm_tmpl *t, struct xfrm_policy *pol)
   2241{
   2242	int err = -EINVAL, acqret;
   2243	struct xfrm_mgr *km;
   2244
   2245	rcu_read_lock();
   2246	list_for_each_entry_rcu(km, &xfrm_km_list, list) {
   2247		acqret = km->acquire(x, t, pol);
   2248		if (!acqret)
   2249			err = acqret;
   2250	}
   2251	rcu_read_unlock();
   2252	return err;
   2253}
   2254EXPORT_SYMBOL(km_query);
   2255
   2256static int __km_new_mapping(struct xfrm_state *x, xfrm_address_t *ipaddr, __be16 sport)
   2257{
   2258	int err = -EINVAL;
   2259	struct xfrm_mgr *km;
   2260
   2261	rcu_read_lock();
   2262	list_for_each_entry_rcu(km, &xfrm_km_list, list) {
   2263		if (km->new_mapping)
   2264			err = km->new_mapping(x, ipaddr, sport);
   2265		if (!err)
   2266			break;
   2267	}
   2268	rcu_read_unlock();
   2269	return err;
   2270}
   2271
   2272int km_new_mapping(struct xfrm_state *x, xfrm_address_t *ipaddr, __be16 sport)
   2273{
   2274	int ret = 0;
   2275
   2276	if (x->mapping_maxage) {
   2277		if ((jiffies / HZ - x->new_mapping) > x->mapping_maxage ||
   2278		    x->new_mapping_sport != sport) {
   2279			x->new_mapping_sport = sport;
   2280			x->new_mapping = jiffies / HZ;
   2281			ret = __km_new_mapping(x, ipaddr, sport);
   2282		}
   2283	} else {
   2284		ret = __km_new_mapping(x, ipaddr, sport);
   2285	}
   2286
   2287	return ret;
   2288}
   2289EXPORT_SYMBOL(km_new_mapping);
   2290
   2291void km_policy_expired(struct xfrm_policy *pol, int dir, int hard, u32 portid)
   2292{
   2293	struct km_event c;
   2294
   2295	c.data.hard = hard;
   2296	c.portid = portid;
   2297	c.event = XFRM_MSG_POLEXPIRE;
   2298	km_policy_notify(pol, dir, &c);
   2299}
   2300EXPORT_SYMBOL(km_policy_expired);
   2301
   2302#ifdef CONFIG_XFRM_MIGRATE
   2303int km_migrate(const struct xfrm_selector *sel, u8 dir, u8 type,
   2304	       const struct xfrm_migrate *m, int num_migrate,
   2305	       const struct xfrm_kmaddress *k,
   2306	       const struct xfrm_encap_tmpl *encap)
   2307{
   2308	int err = -EINVAL;
   2309	int ret;
   2310	struct xfrm_mgr *km;
   2311
   2312	rcu_read_lock();
   2313	list_for_each_entry_rcu(km, &xfrm_km_list, list) {
   2314		if (km->migrate) {
   2315			ret = km->migrate(sel, dir, type, m, num_migrate, k,
   2316					  encap);
   2317			if (!ret)
   2318				err = ret;
   2319		}
   2320	}
   2321	rcu_read_unlock();
   2322	return err;
   2323}
   2324EXPORT_SYMBOL(km_migrate);
   2325#endif
   2326
   2327int km_report(struct net *net, u8 proto, struct xfrm_selector *sel, xfrm_address_t *addr)
   2328{
   2329	int err = -EINVAL;
   2330	int ret;
   2331	struct xfrm_mgr *km;
   2332
   2333	rcu_read_lock();
   2334	list_for_each_entry_rcu(km, &xfrm_km_list, list) {
   2335		if (km->report) {
   2336			ret = km->report(net, proto, sel, addr);
   2337			if (!ret)
   2338				err = ret;
   2339		}
   2340	}
   2341	rcu_read_unlock();
   2342	return err;
   2343}
   2344EXPORT_SYMBOL(km_report);
   2345
   2346static bool km_is_alive(const struct km_event *c)
   2347{
   2348	struct xfrm_mgr *km;
   2349	bool is_alive = false;
   2350
   2351	rcu_read_lock();
   2352	list_for_each_entry_rcu(km, &xfrm_km_list, list) {
   2353		if (km->is_alive && km->is_alive(c)) {
   2354			is_alive = true;
   2355			break;
   2356		}
   2357	}
   2358	rcu_read_unlock();
   2359
   2360	return is_alive;
   2361}
   2362
   2363#if IS_ENABLED(CONFIG_XFRM_USER_COMPAT)
   2364static DEFINE_SPINLOCK(xfrm_translator_lock);
   2365static struct xfrm_translator __rcu *xfrm_translator;
   2366
   2367struct xfrm_translator *xfrm_get_translator(void)
   2368{
   2369	struct xfrm_translator *xtr;
   2370
   2371	rcu_read_lock();
   2372	xtr = rcu_dereference(xfrm_translator);
   2373	if (unlikely(!xtr))
   2374		goto out;
   2375	if (!try_module_get(xtr->owner))
   2376		xtr = NULL;
   2377out:
   2378	rcu_read_unlock();
   2379	return xtr;
   2380}
   2381EXPORT_SYMBOL_GPL(xfrm_get_translator);
   2382
   2383void xfrm_put_translator(struct xfrm_translator *xtr)
   2384{
   2385	module_put(xtr->owner);
   2386}
   2387EXPORT_SYMBOL_GPL(xfrm_put_translator);
   2388
   2389int xfrm_register_translator(struct xfrm_translator *xtr)
   2390{
   2391	int err = 0;
   2392
   2393	spin_lock_bh(&xfrm_translator_lock);
   2394	if (unlikely(xfrm_translator != NULL))
   2395		err = -EEXIST;
   2396	else
   2397		rcu_assign_pointer(xfrm_translator, xtr);
   2398	spin_unlock_bh(&xfrm_translator_lock);
   2399
   2400	return err;
   2401}
   2402EXPORT_SYMBOL_GPL(xfrm_register_translator);
   2403
   2404int xfrm_unregister_translator(struct xfrm_translator *xtr)
   2405{
   2406	int err = 0;
   2407
   2408	spin_lock_bh(&xfrm_translator_lock);
   2409	if (likely(xfrm_translator != NULL)) {
   2410		if (rcu_access_pointer(xfrm_translator) != xtr)
   2411			err = -EINVAL;
   2412		else
   2413			RCU_INIT_POINTER(xfrm_translator, NULL);
   2414	}
   2415	spin_unlock_bh(&xfrm_translator_lock);
   2416	synchronize_rcu();
   2417
   2418	return err;
   2419}
   2420EXPORT_SYMBOL_GPL(xfrm_unregister_translator);
   2421#endif
   2422
   2423int xfrm_user_policy(struct sock *sk, int optname, sockptr_t optval, int optlen)
   2424{
   2425	int err;
   2426	u8 *data;
   2427	struct xfrm_mgr *km;
   2428	struct xfrm_policy *pol = NULL;
   2429
   2430	if (sockptr_is_null(optval) && !optlen) {
   2431		xfrm_sk_policy_insert(sk, XFRM_POLICY_IN, NULL);
   2432		xfrm_sk_policy_insert(sk, XFRM_POLICY_OUT, NULL);
   2433		__sk_dst_reset(sk);
   2434		return 0;
   2435	}
   2436
   2437	if (optlen <= 0 || optlen > PAGE_SIZE)
   2438		return -EMSGSIZE;
   2439
   2440	data = memdup_sockptr(optval, optlen);
   2441	if (IS_ERR(data))
   2442		return PTR_ERR(data);
   2443
   2444	if (in_compat_syscall()) {
   2445		struct xfrm_translator *xtr = xfrm_get_translator();
   2446
   2447		if (!xtr) {
   2448			kfree(data);
   2449			return -EOPNOTSUPP;
   2450		}
   2451
   2452		err = xtr->xlate_user_policy_sockptr(&data, optlen);
   2453		xfrm_put_translator(xtr);
   2454		if (err) {
   2455			kfree(data);
   2456			return err;
   2457		}
   2458	}
   2459
   2460	err = -EINVAL;
   2461	rcu_read_lock();
   2462	list_for_each_entry_rcu(km, &xfrm_km_list, list) {
   2463		pol = km->compile_policy(sk, optname, data,
   2464					 optlen, &err);
   2465		if (err >= 0)
   2466			break;
   2467	}
   2468	rcu_read_unlock();
   2469
   2470	if (err >= 0) {
   2471		xfrm_sk_policy_insert(sk, err, pol);
   2472		xfrm_pol_put(pol);
   2473		__sk_dst_reset(sk);
   2474		err = 0;
   2475	}
   2476
   2477	kfree(data);
   2478	return err;
   2479}
   2480EXPORT_SYMBOL(xfrm_user_policy);
   2481
   2482static DEFINE_SPINLOCK(xfrm_km_lock);
   2483
   2484int xfrm_register_km(struct xfrm_mgr *km)
   2485{
   2486	spin_lock_bh(&xfrm_km_lock);
   2487	list_add_tail_rcu(&km->list, &xfrm_km_list);
   2488	spin_unlock_bh(&xfrm_km_lock);
   2489	return 0;
   2490}
   2491EXPORT_SYMBOL(xfrm_register_km);
   2492
   2493int xfrm_unregister_km(struct xfrm_mgr *km)
   2494{
   2495	spin_lock_bh(&xfrm_km_lock);
   2496	list_del_rcu(&km->list);
   2497	spin_unlock_bh(&xfrm_km_lock);
   2498	synchronize_rcu();
   2499	return 0;
   2500}
   2501EXPORT_SYMBOL(xfrm_unregister_km);
   2502
   2503int xfrm_state_register_afinfo(struct xfrm_state_afinfo *afinfo)
   2504{
   2505	int err = 0;
   2506
   2507	if (WARN_ON(afinfo->family >= NPROTO))
   2508		return -EAFNOSUPPORT;
   2509
   2510	spin_lock_bh(&xfrm_state_afinfo_lock);
   2511	if (unlikely(xfrm_state_afinfo[afinfo->family] != NULL))
   2512		err = -EEXIST;
   2513	else
   2514		rcu_assign_pointer(xfrm_state_afinfo[afinfo->family], afinfo);
   2515	spin_unlock_bh(&xfrm_state_afinfo_lock);
   2516	return err;
   2517}
   2518EXPORT_SYMBOL(xfrm_state_register_afinfo);
   2519
   2520int xfrm_state_unregister_afinfo(struct xfrm_state_afinfo *afinfo)
   2521{
   2522	int err = 0, family = afinfo->family;
   2523
   2524	if (WARN_ON(family >= NPROTO))
   2525		return -EAFNOSUPPORT;
   2526
   2527	spin_lock_bh(&xfrm_state_afinfo_lock);
   2528	if (likely(xfrm_state_afinfo[afinfo->family] != NULL)) {
   2529		if (rcu_access_pointer(xfrm_state_afinfo[family]) != afinfo)
   2530			err = -EINVAL;
   2531		else
   2532			RCU_INIT_POINTER(xfrm_state_afinfo[afinfo->family], NULL);
   2533	}
   2534	spin_unlock_bh(&xfrm_state_afinfo_lock);
   2535	synchronize_rcu();
   2536	return err;
   2537}
   2538EXPORT_SYMBOL(xfrm_state_unregister_afinfo);
   2539
   2540struct xfrm_state_afinfo *xfrm_state_afinfo_get_rcu(unsigned int family)
   2541{
   2542	if (unlikely(family >= NPROTO))
   2543		return NULL;
   2544
   2545	return rcu_dereference(xfrm_state_afinfo[family]);
   2546}
   2547EXPORT_SYMBOL_GPL(xfrm_state_afinfo_get_rcu);
   2548
   2549struct xfrm_state_afinfo *xfrm_state_get_afinfo(unsigned int family)
   2550{
   2551	struct xfrm_state_afinfo *afinfo;
   2552	if (unlikely(family >= NPROTO))
   2553		return NULL;
   2554	rcu_read_lock();
   2555	afinfo = rcu_dereference(xfrm_state_afinfo[family]);
   2556	if (unlikely(!afinfo))
   2557		rcu_read_unlock();
   2558	return afinfo;
   2559}
   2560
   2561void xfrm_flush_gc(void)
   2562{
   2563	flush_work(&xfrm_state_gc_work);
   2564}
   2565EXPORT_SYMBOL(xfrm_flush_gc);
   2566
   2567/* Temporarily located here until net/xfrm/xfrm_tunnel.c is created */
   2568void xfrm_state_delete_tunnel(struct xfrm_state *x)
   2569{
   2570	if (x->tunnel) {
   2571		struct xfrm_state *t = x->tunnel;
   2572
   2573		if (atomic_read(&t->tunnel_users) == 2)
   2574			xfrm_state_delete(t);
   2575		atomic_dec(&t->tunnel_users);
   2576		xfrm_state_put_sync(t);
   2577		x->tunnel = NULL;
   2578	}
   2579}
   2580EXPORT_SYMBOL(xfrm_state_delete_tunnel);
   2581
   2582u32 xfrm_state_mtu(struct xfrm_state *x, int mtu)
   2583{
   2584	const struct xfrm_type *type = READ_ONCE(x->type);
   2585	struct crypto_aead *aead;
   2586	u32 blksize, net_adj = 0;
   2587
   2588	if (x->km.state != XFRM_STATE_VALID ||
   2589	    !type || type->proto != IPPROTO_ESP)
   2590		return mtu - x->props.header_len;
   2591
   2592	aead = x->data;
   2593	blksize = ALIGN(crypto_aead_blocksize(aead), 4);
   2594
   2595	switch (x->props.mode) {
   2596	case XFRM_MODE_TRANSPORT:
   2597	case XFRM_MODE_BEET:
   2598		if (x->props.family == AF_INET)
   2599			net_adj = sizeof(struct iphdr);
   2600		else if (x->props.family == AF_INET6)
   2601			net_adj = sizeof(struct ipv6hdr);
   2602		break;
   2603	case XFRM_MODE_TUNNEL:
   2604		break;
   2605	default:
   2606		WARN_ON_ONCE(1);
   2607		break;
   2608	}
   2609
   2610	return ((mtu - x->props.header_len - crypto_aead_authsize(aead) -
   2611		 net_adj) & ~(blksize - 1)) + net_adj - 2;
   2612}
   2613EXPORT_SYMBOL_GPL(xfrm_state_mtu);
   2614
   2615int __xfrm_init_state(struct xfrm_state *x, bool init_replay, bool offload)
   2616{
   2617	const struct xfrm_mode *inner_mode;
   2618	const struct xfrm_mode *outer_mode;
   2619	int family = x->props.family;
   2620	int err;
   2621
   2622	if (family == AF_INET &&
   2623	    xs_net(x)->ipv4.sysctl_ip_no_pmtu_disc)
   2624		x->props.flags |= XFRM_STATE_NOPMTUDISC;
   2625
   2626	err = -EPROTONOSUPPORT;
   2627
   2628	if (x->sel.family != AF_UNSPEC) {
   2629		inner_mode = xfrm_get_mode(x->props.mode, x->sel.family);
   2630		if (inner_mode == NULL)
   2631			goto error;
   2632
   2633		if (!(inner_mode->flags & XFRM_MODE_FLAG_TUNNEL) &&
   2634		    family != x->sel.family)
   2635			goto error;
   2636
   2637		x->inner_mode = *inner_mode;
   2638	} else {
   2639		const struct xfrm_mode *inner_mode_iaf;
   2640		int iafamily = AF_INET;
   2641
   2642		inner_mode = xfrm_get_mode(x->props.mode, x->props.family);
   2643		if (inner_mode == NULL)
   2644			goto error;
   2645
   2646		if (!(inner_mode->flags & XFRM_MODE_FLAG_TUNNEL))
   2647			goto error;
   2648
   2649		x->inner_mode = *inner_mode;
   2650
   2651		if (x->props.family == AF_INET)
   2652			iafamily = AF_INET6;
   2653
   2654		inner_mode_iaf = xfrm_get_mode(x->props.mode, iafamily);
   2655		if (inner_mode_iaf) {
   2656			if (inner_mode_iaf->flags & XFRM_MODE_FLAG_TUNNEL)
   2657				x->inner_mode_iaf = *inner_mode_iaf;
   2658		}
   2659	}
   2660
   2661	x->type = xfrm_get_type(x->id.proto, family);
   2662	if (x->type == NULL)
   2663		goto error;
   2664
   2665	x->type_offload = xfrm_get_type_offload(x->id.proto, family, offload);
   2666
   2667	err = x->type->init_state(x);
   2668	if (err)
   2669		goto error;
   2670
   2671	outer_mode = xfrm_get_mode(x->props.mode, family);
   2672	if (!outer_mode) {
   2673		err = -EPROTONOSUPPORT;
   2674		goto error;
   2675	}
   2676
   2677	x->outer_mode = *outer_mode;
   2678	if (init_replay) {
   2679		err = xfrm_init_replay(x);
   2680		if (err)
   2681			goto error;
   2682	}
   2683
   2684error:
   2685	return err;
   2686}
   2687
   2688EXPORT_SYMBOL(__xfrm_init_state);
   2689
   2690int xfrm_init_state(struct xfrm_state *x)
   2691{
   2692	int err;
   2693
   2694	err = __xfrm_init_state(x, true, false);
   2695	if (!err)
   2696		x->km.state = XFRM_STATE_VALID;
   2697
   2698	return err;
   2699}
   2700
   2701EXPORT_SYMBOL(xfrm_init_state);
   2702
   2703int __net_init xfrm_state_init(struct net *net)
   2704{
   2705	unsigned int sz;
   2706
   2707	if (net_eq(net, &init_net))
   2708		xfrm_state_cache = KMEM_CACHE(xfrm_state,
   2709					      SLAB_HWCACHE_ALIGN | SLAB_PANIC);
   2710
   2711	INIT_LIST_HEAD(&net->xfrm.state_all);
   2712
   2713	sz = sizeof(struct hlist_head) * 8;
   2714
   2715	net->xfrm.state_bydst = xfrm_hash_alloc(sz);
   2716	if (!net->xfrm.state_bydst)
   2717		goto out_bydst;
   2718	net->xfrm.state_bysrc = xfrm_hash_alloc(sz);
   2719	if (!net->xfrm.state_bysrc)
   2720		goto out_bysrc;
   2721	net->xfrm.state_byspi = xfrm_hash_alloc(sz);
   2722	if (!net->xfrm.state_byspi)
   2723		goto out_byspi;
   2724	net->xfrm.state_byseq = xfrm_hash_alloc(sz);
   2725	if (!net->xfrm.state_byseq)
   2726		goto out_byseq;
   2727	net->xfrm.state_hmask = ((sz / sizeof(struct hlist_head)) - 1);
   2728
   2729	net->xfrm.state_num = 0;
   2730	INIT_WORK(&net->xfrm.state_hash_work, xfrm_hash_resize);
   2731	spin_lock_init(&net->xfrm.xfrm_state_lock);
   2732	seqcount_spinlock_init(&net->xfrm.xfrm_state_hash_generation,
   2733			       &net->xfrm.xfrm_state_lock);
   2734	return 0;
   2735
   2736out_byseq:
   2737	xfrm_hash_free(net->xfrm.state_byspi, sz);
   2738out_byspi:
   2739	xfrm_hash_free(net->xfrm.state_bysrc, sz);
   2740out_bysrc:
   2741	xfrm_hash_free(net->xfrm.state_bydst, sz);
   2742out_bydst:
   2743	return -ENOMEM;
   2744}
   2745
   2746void xfrm_state_fini(struct net *net)
   2747{
   2748	unsigned int sz;
   2749
   2750	flush_work(&net->xfrm.state_hash_work);
   2751	flush_work(&xfrm_state_gc_work);
   2752	xfrm_state_flush(net, 0, false, true);
   2753
   2754	WARN_ON(!list_empty(&net->xfrm.state_all));
   2755
   2756	sz = (net->xfrm.state_hmask + 1) * sizeof(struct hlist_head);
   2757	WARN_ON(!hlist_empty(net->xfrm.state_byseq));
   2758	xfrm_hash_free(net->xfrm.state_byseq, sz);
   2759	WARN_ON(!hlist_empty(net->xfrm.state_byspi));
   2760	xfrm_hash_free(net->xfrm.state_byspi, sz);
   2761	WARN_ON(!hlist_empty(net->xfrm.state_bysrc));
   2762	xfrm_hash_free(net->xfrm.state_bysrc, sz);
   2763	WARN_ON(!hlist_empty(net->xfrm.state_bydst));
   2764	xfrm_hash_free(net->xfrm.state_bydst, sz);
   2765}
   2766
   2767#ifdef CONFIG_AUDITSYSCALL
   2768static void xfrm_audit_helper_sainfo(struct xfrm_state *x,
   2769				     struct audit_buffer *audit_buf)
   2770{
   2771	struct xfrm_sec_ctx *ctx = x->security;
   2772	u32 spi = ntohl(x->id.spi);
   2773
   2774	if (ctx)
   2775		audit_log_format(audit_buf, " sec_alg=%u sec_doi=%u sec_obj=%s",
   2776				 ctx->ctx_alg, ctx->ctx_doi, ctx->ctx_str);
   2777
   2778	switch (x->props.family) {
   2779	case AF_INET:
   2780		audit_log_format(audit_buf, " src=%pI4 dst=%pI4",
   2781				 &x->props.saddr.a4, &x->id.daddr.a4);
   2782		break;
   2783	case AF_INET6:
   2784		audit_log_format(audit_buf, " src=%pI6 dst=%pI6",
   2785				 x->props.saddr.a6, x->id.daddr.a6);
   2786		break;
   2787	}
   2788
   2789	audit_log_format(audit_buf, " spi=%u(0x%x)", spi, spi);
   2790}
   2791
   2792static void xfrm_audit_helper_pktinfo(struct sk_buff *skb, u16 family,
   2793				      struct audit_buffer *audit_buf)
   2794{
   2795	const struct iphdr *iph4;
   2796	const struct ipv6hdr *iph6;
   2797
   2798	switch (family) {
   2799	case AF_INET:
   2800		iph4 = ip_hdr(skb);
   2801		audit_log_format(audit_buf, " src=%pI4 dst=%pI4",
   2802				 &iph4->saddr, &iph4->daddr);
   2803		break;
   2804	case AF_INET6:
   2805		iph6 = ipv6_hdr(skb);
   2806		audit_log_format(audit_buf,
   2807				 " src=%pI6 dst=%pI6 flowlbl=0x%x%02x%02x",
   2808				 &iph6->saddr, &iph6->daddr,
   2809				 iph6->flow_lbl[0] & 0x0f,
   2810				 iph6->flow_lbl[1],
   2811				 iph6->flow_lbl[2]);
   2812		break;
   2813	}
   2814}
   2815
   2816void xfrm_audit_state_add(struct xfrm_state *x, int result, bool task_valid)
   2817{
   2818	struct audit_buffer *audit_buf;
   2819
   2820	audit_buf = xfrm_audit_start("SAD-add");
   2821	if (audit_buf == NULL)
   2822		return;
   2823	xfrm_audit_helper_usrinfo(task_valid, audit_buf);
   2824	xfrm_audit_helper_sainfo(x, audit_buf);
   2825	audit_log_format(audit_buf, " res=%u", result);
   2826	audit_log_end(audit_buf);
   2827}
   2828EXPORT_SYMBOL_GPL(xfrm_audit_state_add);
   2829
   2830void xfrm_audit_state_delete(struct xfrm_state *x, int result, bool task_valid)
   2831{
   2832	struct audit_buffer *audit_buf;
   2833
   2834	audit_buf = xfrm_audit_start("SAD-delete");
   2835	if (audit_buf == NULL)
   2836		return;
   2837	xfrm_audit_helper_usrinfo(task_valid, audit_buf);
   2838	xfrm_audit_helper_sainfo(x, audit_buf);
   2839	audit_log_format(audit_buf, " res=%u", result);
   2840	audit_log_end(audit_buf);
   2841}
   2842EXPORT_SYMBOL_GPL(xfrm_audit_state_delete);
   2843
   2844void xfrm_audit_state_replay_overflow(struct xfrm_state *x,
   2845				      struct sk_buff *skb)
   2846{
   2847	struct audit_buffer *audit_buf;
   2848	u32 spi;
   2849
   2850	audit_buf = xfrm_audit_start("SA-replay-overflow");
   2851	if (audit_buf == NULL)
   2852		return;
   2853	xfrm_audit_helper_pktinfo(skb, x->props.family, audit_buf);
   2854	/* don't record the sequence number because it's inherent in this kind
   2855	 * of audit message */
   2856	spi = ntohl(x->id.spi);
   2857	audit_log_format(audit_buf, " spi=%u(0x%x)", spi, spi);
   2858	audit_log_end(audit_buf);
   2859}
   2860EXPORT_SYMBOL_GPL(xfrm_audit_state_replay_overflow);
   2861
   2862void xfrm_audit_state_replay(struct xfrm_state *x,
   2863			     struct sk_buff *skb, __be32 net_seq)
   2864{
   2865	struct audit_buffer *audit_buf;
   2866	u32 spi;
   2867
   2868	audit_buf = xfrm_audit_start("SA-replayed-pkt");
   2869	if (audit_buf == NULL)
   2870		return;
   2871	xfrm_audit_helper_pktinfo(skb, x->props.family, audit_buf);
   2872	spi = ntohl(x->id.spi);
   2873	audit_log_format(audit_buf, " spi=%u(0x%x) seqno=%u",
   2874			 spi, spi, ntohl(net_seq));
   2875	audit_log_end(audit_buf);
   2876}
   2877EXPORT_SYMBOL_GPL(xfrm_audit_state_replay);
   2878
   2879void xfrm_audit_state_notfound_simple(struct sk_buff *skb, u16 family)
   2880{
   2881	struct audit_buffer *audit_buf;
   2882
   2883	audit_buf = xfrm_audit_start("SA-notfound");
   2884	if (audit_buf == NULL)
   2885		return;
   2886	xfrm_audit_helper_pktinfo(skb, family, audit_buf);
   2887	audit_log_end(audit_buf);
   2888}
   2889EXPORT_SYMBOL_GPL(xfrm_audit_state_notfound_simple);
   2890
   2891void xfrm_audit_state_notfound(struct sk_buff *skb, u16 family,
   2892			       __be32 net_spi, __be32 net_seq)
   2893{
   2894	struct audit_buffer *audit_buf;
   2895	u32 spi;
   2896
   2897	audit_buf = xfrm_audit_start("SA-notfound");
   2898	if (audit_buf == NULL)
   2899		return;
   2900	xfrm_audit_helper_pktinfo(skb, family, audit_buf);
   2901	spi = ntohl(net_spi);
   2902	audit_log_format(audit_buf, " spi=%u(0x%x) seqno=%u",
   2903			 spi, spi, ntohl(net_seq));
   2904	audit_log_end(audit_buf);
   2905}
   2906EXPORT_SYMBOL_GPL(xfrm_audit_state_notfound);
   2907
   2908void xfrm_audit_state_icvfail(struct xfrm_state *x,
   2909			      struct sk_buff *skb, u8 proto)
   2910{
   2911	struct audit_buffer *audit_buf;
   2912	__be32 net_spi;
   2913	__be32 net_seq;
   2914
   2915	audit_buf = xfrm_audit_start("SA-icv-failure");
   2916	if (audit_buf == NULL)
   2917		return;
   2918	xfrm_audit_helper_pktinfo(skb, x->props.family, audit_buf);
   2919	if (xfrm_parse_spi(skb, proto, &net_spi, &net_seq) == 0) {
   2920		u32 spi = ntohl(net_spi);
   2921		audit_log_format(audit_buf, " spi=%u(0x%x) seqno=%u",
   2922				 spi, spi, ntohl(net_seq));
   2923	}
   2924	audit_log_end(audit_buf);
   2925}
   2926EXPORT_SYMBOL_GPL(xfrm_audit_state_icvfail);
   2927#endif /* CONFIG_AUDITSYSCALL */