cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

ip6_flowlabel.c (20973B)


      1// SPDX-License-Identifier: GPL-2.0-or-later
      2/*
      3 *	ip6_flowlabel.c		IPv6 flowlabel manager.
      4 *
      5 *	Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
      6 */
      7
      8#include <linux/capability.h>
      9#include <linux/errno.h>
     10#include <linux/types.h>
     11#include <linux/socket.h>
     12#include <linux/net.h>
     13#include <linux/netdevice.h>
     14#include <linux/in6.h>
     15#include <linux/proc_fs.h>
     16#include <linux/seq_file.h>
     17#include <linux/slab.h>
     18#include <linux/export.h>
     19#include <linux/pid_namespace.h>
     20#include <linux/jump_label_ratelimit.h>
     21
     22#include <net/net_namespace.h>
     23#include <net/sock.h>
     24
     25#include <net/ipv6.h>
     26#include <net/rawv6.h>
     27#include <net/transp_v6.h>
     28
     29#include <linux/uaccess.h>
     30
     31#define FL_MIN_LINGER	6	/* Minimal linger. It is set to 6sec specified
     32				   in old IPv6 RFC. Well, it was reasonable value.
     33				 */
     34#define FL_MAX_LINGER	150	/* Maximal linger timeout */
     35
     36/* FL hash table */
     37
     38#define FL_MAX_PER_SOCK	32
     39#define FL_MAX_SIZE	4096
     40#define FL_HASH_MASK	255
     41#define FL_HASH(l)	(ntohl(l)&FL_HASH_MASK)
     42
     43static atomic_t fl_size = ATOMIC_INIT(0);
     44static struct ip6_flowlabel __rcu *fl_ht[FL_HASH_MASK+1];
     45
     46static void ip6_fl_gc(struct timer_list *unused);
     47static DEFINE_TIMER(ip6_fl_gc_timer, ip6_fl_gc);
     48
     49/* FL hash table lock: it protects only of GC */
     50
     51static DEFINE_SPINLOCK(ip6_fl_lock);
     52
     53/* Big socket sock */
     54
     55static DEFINE_SPINLOCK(ip6_sk_fl_lock);
     56
     57DEFINE_STATIC_KEY_DEFERRED_FALSE(ipv6_flowlabel_exclusive, HZ);
     58EXPORT_SYMBOL(ipv6_flowlabel_exclusive);
     59
     60#define for_each_fl_rcu(hash, fl)				\
     61	for (fl = rcu_dereference_bh(fl_ht[(hash)]);		\
     62	     fl != NULL;					\
     63	     fl = rcu_dereference_bh(fl->next))
     64#define for_each_fl_continue_rcu(fl)				\
     65	for (fl = rcu_dereference_bh(fl->next);			\
     66	     fl != NULL;					\
     67	     fl = rcu_dereference_bh(fl->next))
     68
     69#define for_each_sk_fl_rcu(np, sfl)				\
     70	for (sfl = rcu_dereference_bh(np->ipv6_fl_list);	\
     71	     sfl != NULL;					\
     72	     sfl = rcu_dereference_bh(sfl->next))
     73
     74static inline struct ip6_flowlabel *__fl_lookup(struct net *net, __be32 label)
     75{
     76	struct ip6_flowlabel *fl;
     77
     78	for_each_fl_rcu(FL_HASH(label), fl) {
     79		if (fl->label == label && net_eq(fl->fl_net, net))
     80			return fl;
     81	}
     82	return NULL;
     83}
     84
     85static struct ip6_flowlabel *fl_lookup(struct net *net, __be32 label)
     86{
     87	struct ip6_flowlabel *fl;
     88
     89	rcu_read_lock_bh();
     90	fl = __fl_lookup(net, label);
     91	if (fl && !atomic_inc_not_zero(&fl->users))
     92		fl = NULL;
     93	rcu_read_unlock_bh();
     94	return fl;
     95}
     96
     97static bool fl_shared_exclusive(struct ip6_flowlabel *fl)
     98{
     99	return fl->share == IPV6_FL_S_EXCL ||
    100	       fl->share == IPV6_FL_S_PROCESS ||
    101	       fl->share == IPV6_FL_S_USER;
    102}
    103
    104static void fl_free_rcu(struct rcu_head *head)
    105{
    106	struct ip6_flowlabel *fl = container_of(head, struct ip6_flowlabel, rcu);
    107
    108	if (fl->share == IPV6_FL_S_PROCESS)
    109		put_pid(fl->owner.pid);
    110	kfree(fl->opt);
    111	kfree(fl);
    112}
    113
    114
    115static void fl_free(struct ip6_flowlabel *fl)
    116{
    117	if (!fl)
    118		return;
    119
    120	if (fl_shared_exclusive(fl) || fl->opt)
    121		static_branch_slow_dec_deferred(&ipv6_flowlabel_exclusive);
    122
    123	call_rcu(&fl->rcu, fl_free_rcu);
    124}
    125
    126static void fl_release(struct ip6_flowlabel *fl)
    127{
    128	spin_lock_bh(&ip6_fl_lock);
    129
    130	fl->lastuse = jiffies;
    131	if (atomic_dec_and_test(&fl->users)) {
    132		unsigned long ttd = fl->lastuse + fl->linger;
    133		if (time_after(ttd, fl->expires))
    134			fl->expires = ttd;
    135		ttd = fl->expires;
    136		if (fl->opt && fl->share == IPV6_FL_S_EXCL) {
    137			struct ipv6_txoptions *opt = fl->opt;
    138			fl->opt = NULL;
    139			kfree(opt);
    140		}
    141		if (!timer_pending(&ip6_fl_gc_timer) ||
    142		    time_after(ip6_fl_gc_timer.expires, ttd))
    143			mod_timer(&ip6_fl_gc_timer, ttd);
    144	}
    145	spin_unlock_bh(&ip6_fl_lock);
    146}
    147
    148static void ip6_fl_gc(struct timer_list *unused)
    149{
    150	int i;
    151	unsigned long now = jiffies;
    152	unsigned long sched = 0;
    153
    154	spin_lock(&ip6_fl_lock);
    155
    156	for (i = 0; i <= FL_HASH_MASK; i++) {
    157		struct ip6_flowlabel *fl;
    158		struct ip6_flowlabel __rcu **flp;
    159
    160		flp = &fl_ht[i];
    161		while ((fl = rcu_dereference_protected(*flp,
    162						       lockdep_is_held(&ip6_fl_lock))) != NULL) {
    163			if (atomic_read(&fl->users) == 0) {
    164				unsigned long ttd = fl->lastuse + fl->linger;
    165				if (time_after(ttd, fl->expires))
    166					fl->expires = ttd;
    167				ttd = fl->expires;
    168				if (time_after_eq(now, ttd)) {
    169					*flp = fl->next;
    170					fl_free(fl);
    171					atomic_dec(&fl_size);
    172					continue;
    173				}
    174				if (!sched || time_before(ttd, sched))
    175					sched = ttd;
    176			}
    177			flp = &fl->next;
    178		}
    179	}
    180	if (!sched && atomic_read(&fl_size))
    181		sched = now + FL_MAX_LINGER;
    182	if (sched) {
    183		mod_timer(&ip6_fl_gc_timer, sched);
    184	}
    185	spin_unlock(&ip6_fl_lock);
    186}
    187
    188static void __net_exit ip6_fl_purge(struct net *net)
    189{
    190	int i;
    191
    192	spin_lock_bh(&ip6_fl_lock);
    193	for (i = 0; i <= FL_HASH_MASK; i++) {
    194		struct ip6_flowlabel *fl;
    195		struct ip6_flowlabel __rcu **flp;
    196
    197		flp = &fl_ht[i];
    198		while ((fl = rcu_dereference_protected(*flp,
    199						       lockdep_is_held(&ip6_fl_lock))) != NULL) {
    200			if (net_eq(fl->fl_net, net) &&
    201			    atomic_read(&fl->users) == 0) {
    202				*flp = fl->next;
    203				fl_free(fl);
    204				atomic_dec(&fl_size);
    205				continue;
    206			}
    207			flp = &fl->next;
    208		}
    209	}
    210	spin_unlock_bh(&ip6_fl_lock);
    211}
    212
    213static struct ip6_flowlabel *fl_intern(struct net *net,
    214				       struct ip6_flowlabel *fl, __be32 label)
    215{
    216	struct ip6_flowlabel *lfl;
    217
    218	fl->label = label & IPV6_FLOWLABEL_MASK;
    219
    220	spin_lock_bh(&ip6_fl_lock);
    221	if (label == 0) {
    222		for (;;) {
    223			fl->label = htonl(prandom_u32())&IPV6_FLOWLABEL_MASK;
    224			if (fl->label) {
    225				lfl = __fl_lookup(net, fl->label);
    226				if (!lfl)
    227					break;
    228			}
    229		}
    230	} else {
    231		/*
    232		 * we dropper the ip6_fl_lock, so this entry could reappear
    233		 * and we need to recheck with it.
    234		 *
    235		 * OTOH no need to search the active socket first, like it is
    236		 * done in ipv6_flowlabel_opt - sock is locked, so new entry
    237		 * with the same label can only appear on another sock
    238		 */
    239		lfl = __fl_lookup(net, fl->label);
    240		if (lfl) {
    241			atomic_inc(&lfl->users);
    242			spin_unlock_bh(&ip6_fl_lock);
    243			return lfl;
    244		}
    245	}
    246
    247	fl->lastuse = jiffies;
    248	fl->next = fl_ht[FL_HASH(fl->label)];
    249	rcu_assign_pointer(fl_ht[FL_HASH(fl->label)], fl);
    250	atomic_inc(&fl_size);
    251	spin_unlock_bh(&ip6_fl_lock);
    252	return NULL;
    253}
    254
    255
    256
    257/* Socket flowlabel lists */
    258
    259struct ip6_flowlabel *__fl6_sock_lookup(struct sock *sk, __be32 label)
    260{
    261	struct ipv6_fl_socklist *sfl;
    262	struct ipv6_pinfo *np = inet6_sk(sk);
    263
    264	label &= IPV6_FLOWLABEL_MASK;
    265
    266	rcu_read_lock_bh();
    267	for_each_sk_fl_rcu(np, sfl) {
    268		struct ip6_flowlabel *fl = sfl->fl;
    269
    270		if (fl->label == label && atomic_inc_not_zero(&fl->users)) {
    271			fl->lastuse = jiffies;
    272			rcu_read_unlock_bh();
    273			return fl;
    274		}
    275	}
    276	rcu_read_unlock_bh();
    277	return NULL;
    278}
    279EXPORT_SYMBOL_GPL(__fl6_sock_lookup);
    280
    281void fl6_free_socklist(struct sock *sk)
    282{
    283	struct ipv6_pinfo *np = inet6_sk(sk);
    284	struct ipv6_fl_socklist *sfl;
    285
    286	if (!rcu_access_pointer(np->ipv6_fl_list))
    287		return;
    288
    289	spin_lock_bh(&ip6_sk_fl_lock);
    290	while ((sfl = rcu_dereference_protected(np->ipv6_fl_list,
    291						lockdep_is_held(&ip6_sk_fl_lock))) != NULL) {
    292		np->ipv6_fl_list = sfl->next;
    293		spin_unlock_bh(&ip6_sk_fl_lock);
    294
    295		fl_release(sfl->fl);
    296		kfree_rcu(sfl, rcu);
    297
    298		spin_lock_bh(&ip6_sk_fl_lock);
    299	}
    300	spin_unlock_bh(&ip6_sk_fl_lock);
    301}
    302
    303/* Service routines */
    304
    305
    306/*
    307   It is the only difficult place. flowlabel enforces equal headers
    308   before and including routing header, however user may supply options
    309   following rthdr.
    310 */
    311
    312struct ipv6_txoptions *fl6_merge_options(struct ipv6_txoptions *opt_space,
    313					 struct ip6_flowlabel *fl,
    314					 struct ipv6_txoptions *fopt)
    315{
    316	struct ipv6_txoptions *fl_opt = fl->opt;
    317
    318	if (!fopt || fopt->opt_flen == 0)
    319		return fl_opt;
    320
    321	if (fl_opt) {
    322		opt_space->hopopt = fl_opt->hopopt;
    323		opt_space->dst0opt = fl_opt->dst0opt;
    324		opt_space->srcrt = fl_opt->srcrt;
    325		opt_space->opt_nflen = fl_opt->opt_nflen;
    326	} else {
    327		if (fopt->opt_nflen == 0)
    328			return fopt;
    329		opt_space->hopopt = NULL;
    330		opt_space->dst0opt = NULL;
    331		opt_space->srcrt = NULL;
    332		opt_space->opt_nflen = 0;
    333	}
    334	opt_space->dst1opt = fopt->dst1opt;
    335	opt_space->opt_flen = fopt->opt_flen;
    336	opt_space->tot_len = fopt->tot_len;
    337	return opt_space;
    338}
    339EXPORT_SYMBOL_GPL(fl6_merge_options);
    340
    341static unsigned long check_linger(unsigned long ttl)
    342{
    343	if (ttl < FL_MIN_LINGER)
    344		return FL_MIN_LINGER*HZ;
    345	if (ttl > FL_MAX_LINGER && !capable(CAP_NET_ADMIN))
    346		return 0;
    347	return ttl*HZ;
    348}
    349
    350static int fl6_renew(struct ip6_flowlabel *fl, unsigned long linger, unsigned long expires)
    351{
    352	linger = check_linger(linger);
    353	if (!linger)
    354		return -EPERM;
    355	expires = check_linger(expires);
    356	if (!expires)
    357		return -EPERM;
    358
    359	spin_lock_bh(&ip6_fl_lock);
    360	fl->lastuse = jiffies;
    361	if (time_before(fl->linger, linger))
    362		fl->linger = linger;
    363	if (time_before(expires, fl->linger))
    364		expires = fl->linger;
    365	if (time_before(fl->expires, fl->lastuse + expires))
    366		fl->expires = fl->lastuse + expires;
    367	spin_unlock_bh(&ip6_fl_lock);
    368
    369	return 0;
    370}
    371
    372static struct ip6_flowlabel *
    373fl_create(struct net *net, struct sock *sk, struct in6_flowlabel_req *freq,
    374	  sockptr_t optval, int optlen, int *err_p)
    375{
    376	struct ip6_flowlabel *fl = NULL;
    377	int olen;
    378	int addr_type;
    379	int err;
    380
    381	olen = optlen - CMSG_ALIGN(sizeof(*freq));
    382	err = -EINVAL;
    383	if (olen > 64 * 1024)
    384		goto done;
    385
    386	err = -ENOMEM;
    387	fl = kzalloc(sizeof(*fl), GFP_KERNEL);
    388	if (!fl)
    389		goto done;
    390
    391	if (olen > 0) {
    392		struct msghdr msg;
    393		struct flowi6 flowi6;
    394		struct ipcm6_cookie ipc6;
    395
    396		err = -ENOMEM;
    397		fl->opt = kmalloc(sizeof(*fl->opt) + olen, GFP_KERNEL);
    398		if (!fl->opt)
    399			goto done;
    400
    401		memset(fl->opt, 0, sizeof(*fl->opt));
    402		fl->opt->tot_len = sizeof(*fl->opt) + olen;
    403		err = -EFAULT;
    404		if (copy_from_sockptr_offset(fl->opt + 1, optval,
    405				CMSG_ALIGN(sizeof(*freq)), olen))
    406			goto done;
    407
    408		msg.msg_controllen = olen;
    409		msg.msg_control = (void *)(fl->opt+1);
    410		memset(&flowi6, 0, sizeof(flowi6));
    411
    412		ipc6.opt = fl->opt;
    413		err = ip6_datagram_send_ctl(net, sk, &msg, &flowi6, &ipc6);
    414		if (err)
    415			goto done;
    416		err = -EINVAL;
    417		if (fl->opt->opt_flen)
    418			goto done;
    419		if (fl->opt->opt_nflen == 0) {
    420			kfree(fl->opt);
    421			fl->opt = NULL;
    422		}
    423	}
    424
    425	fl->fl_net = net;
    426	fl->expires = jiffies;
    427	err = fl6_renew(fl, freq->flr_linger, freq->flr_expires);
    428	if (err)
    429		goto done;
    430	fl->share = freq->flr_share;
    431	addr_type = ipv6_addr_type(&freq->flr_dst);
    432	if ((addr_type & IPV6_ADDR_MAPPED) ||
    433	    addr_type == IPV6_ADDR_ANY) {
    434		err = -EINVAL;
    435		goto done;
    436	}
    437	fl->dst = freq->flr_dst;
    438	atomic_set(&fl->users, 1);
    439	switch (fl->share) {
    440	case IPV6_FL_S_EXCL:
    441	case IPV6_FL_S_ANY:
    442		break;
    443	case IPV6_FL_S_PROCESS:
    444		fl->owner.pid = get_task_pid(current, PIDTYPE_PID);
    445		break;
    446	case IPV6_FL_S_USER:
    447		fl->owner.uid = current_euid();
    448		break;
    449	default:
    450		err = -EINVAL;
    451		goto done;
    452	}
    453	if (fl_shared_exclusive(fl) || fl->opt) {
    454		WRITE_ONCE(sock_net(sk)->ipv6.flowlabel_has_excl, 1);
    455		static_branch_deferred_inc(&ipv6_flowlabel_exclusive);
    456	}
    457	return fl;
    458
    459done:
    460	if (fl) {
    461		kfree(fl->opt);
    462		kfree(fl);
    463	}
    464	*err_p = err;
    465	return NULL;
    466}
    467
    468static int mem_check(struct sock *sk)
    469{
    470	struct ipv6_pinfo *np = inet6_sk(sk);
    471	struct ipv6_fl_socklist *sfl;
    472	int room = FL_MAX_SIZE - atomic_read(&fl_size);
    473	int count = 0;
    474
    475	if (room > FL_MAX_SIZE - FL_MAX_PER_SOCK)
    476		return 0;
    477
    478	rcu_read_lock_bh();
    479	for_each_sk_fl_rcu(np, sfl)
    480		count++;
    481	rcu_read_unlock_bh();
    482
    483	if (room <= 0 ||
    484	    ((count >= FL_MAX_PER_SOCK ||
    485	      (count > 0 && room < FL_MAX_SIZE/2) || room < FL_MAX_SIZE/4) &&
    486	     !capable(CAP_NET_ADMIN)))
    487		return -ENOBUFS;
    488
    489	return 0;
    490}
    491
    492static inline void fl_link(struct ipv6_pinfo *np, struct ipv6_fl_socklist *sfl,
    493		struct ip6_flowlabel *fl)
    494{
    495	spin_lock_bh(&ip6_sk_fl_lock);
    496	sfl->fl = fl;
    497	sfl->next = np->ipv6_fl_list;
    498	rcu_assign_pointer(np->ipv6_fl_list, sfl);
    499	spin_unlock_bh(&ip6_sk_fl_lock);
    500}
    501
    502int ipv6_flowlabel_opt_get(struct sock *sk, struct in6_flowlabel_req *freq,
    503			   int flags)
    504{
    505	struct ipv6_pinfo *np = inet6_sk(sk);
    506	struct ipv6_fl_socklist *sfl;
    507
    508	if (flags & IPV6_FL_F_REMOTE) {
    509		freq->flr_label = np->rcv_flowinfo & IPV6_FLOWLABEL_MASK;
    510		return 0;
    511	}
    512
    513	if (np->repflow) {
    514		freq->flr_label = np->flow_label;
    515		return 0;
    516	}
    517
    518	rcu_read_lock_bh();
    519
    520	for_each_sk_fl_rcu(np, sfl) {
    521		if (sfl->fl->label == (np->flow_label & IPV6_FLOWLABEL_MASK)) {
    522			spin_lock_bh(&ip6_fl_lock);
    523			freq->flr_label = sfl->fl->label;
    524			freq->flr_dst = sfl->fl->dst;
    525			freq->flr_share = sfl->fl->share;
    526			freq->flr_expires = (sfl->fl->expires - jiffies) / HZ;
    527			freq->flr_linger = sfl->fl->linger / HZ;
    528
    529			spin_unlock_bh(&ip6_fl_lock);
    530			rcu_read_unlock_bh();
    531			return 0;
    532		}
    533	}
    534	rcu_read_unlock_bh();
    535
    536	return -ENOENT;
    537}
    538
    539#define socklist_dereference(__sflp) \
    540	rcu_dereference_protected(__sflp, lockdep_is_held(&ip6_sk_fl_lock))
    541
    542static int ipv6_flowlabel_put(struct sock *sk, struct in6_flowlabel_req *freq)
    543{
    544	struct ipv6_pinfo *np = inet6_sk(sk);
    545	struct ipv6_fl_socklist __rcu **sflp;
    546	struct ipv6_fl_socklist *sfl;
    547
    548	if (freq->flr_flags & IPV6_FL_F_REFLECT) {
    549		if (sk->sk_protocol != IPPROTO_TCP)
    550			return -ENOPROTOOPT;
    551		if (!np->repflow)
    552			return -ESRCH;
    553		np->flow_label = 0;
    554		np->repflow = 0;
    555		return 0;
    556	}
    557
    558	spin_lock_bh(&ip6_sk_fl_lock);
    559	for (sflp = &np->ipv6_fl_list;
    560	     (sfl = socklist_dereference(*sflp)) != NULL;
    561	     sflp = &sfl->next) {
    562		if (sfl->fl->label == freq->flr_label)
    563			goto found;
    564	}
    565	spin_unlock_bh(&ip6_sk_fl_lock);
    566	return -ESRCH;
    567found:
    568	if (freq->flr_label == (np->flow_label & IPV6_FLOWLABEL_MASK))
    569		np->flow_label &= ~IPV6_FLOWLABEL_MASK;
    570	*sflp = sfl->next;
    571	spin_unlock_bh(&ip6_sk_fl_lock);
    572	fl_release(sfl->fl);
    573	kfree_rcu(sfl, rcu);
    574	return 0;
    575}
    576
    577static int ipv6_flowlabel_renew(struct sock *sk, struct in6_flowlabel_req *freq)
    578{
    579	struct ipv6_pinfo *np = inet6_sk(sk);
    580	struct net *net = sock_net(sk);
    581	struct ipv6_fl_socklist *sfl;
    582	int err;
    583
    584	rcu_read_lock_bh();
    585	for_each_sk_fl_rcu(np, sfl) {
    586		if (sfl->fl->label == freq->flr_label) {
    587			err = fl6_renew(sfl->fl, freq->flr_linger,
    588					freq->flr_expires);
    589			rcu_read_unlock_bh();
    590			return err;
    591		}
    592	}
    593	rcu_read_unlock_bh();
    594
    595	if (freq->flr_share == IPV6_FL_S_NONE &&
    596	    ns_capable(net->user_ns, CAP_NET_ADMIN)) {
    597		struct ip6_flowlabel *fl = fl_lookup(net, freq->flr_label);
    598
    599		if (fl) {
    600			err = fl6_renew(fl, freq->flr_linger,
    601					freq->flr_expires);
    602			fl_release(fl);
    603			return err;
    604		}
    605	}
    606	return -ESRCH;
    607}
    608
    609static int ipv6_flowlabel_get(struct sock *sk, struct in6_flowlabel_req *freq,
    610		sockptr_t optval, int optlen)
    611{
    612	struct ipv6_fl_socklist *sfl, *sfl1 = NULL;
    613	struct ip6_flowlabel *fl, *fl1 = NULL;
    614	struct ipv6_pinfo *np = inet6_sk(sk);
    615	struct net *net = sock_net(sk);
    616	int err;
    617
    618	if (freq->flr_flags & IPV6_FL_F_REFLECT) {
    619		if (net->ipv6.sysctl.flowlabel_consistency) {
    620			net_info_ratelimited("Can not set IPV6_FL_F_REFLECT if flowlabel_consistency sysctl is enable\n");
    621			return -EPERM;
    622		}
    623
    624		if (sk->sk_protocol != IPPROTO_TCP)
    625			return -ENOPROTOOPT;
    626		np->repflow = 1;
    627		return 0;
    628	}
    629
    630	if (freq->flr_label & ~IPV6_FLOWLABEL_MASK)
    631		return -EINVAL;
    632	if (net->ipv6.sysctl.flowlabel_state_ranges &&
    633	    (freq->flr_label & IPV6_FLOWLABEL_STATELESS_FLAG))
    634		return -ERANGE;
    635
    636	fl = fl_create(net, sk, freq, optval, optlen, &err);
    637	if (!fl)
    638		return err;
    639
    640	sfl1 = kmalloc(sizeof(*sfl1), GFP_KERNEL);
    641
    642	if (freq->flr_label) {
    643		err = -EEXIST;
    644		rcu_read_lock_bh();
    645		for_each_sk_fl_rcu(np, sfl) {
    646			if (sfl->fl->label == freq->flr_label) {
    647				if (freq->flr_flags & IPV6_FL_F_EXCL) {
    648					rcu_read_unlock_bh();
    649					goto done;
    650				}
    651				fl1 = sfl->fl;
    652				if (!atomic_inc_not_zero(&fl1->users))
    653					fl1 = NULL;
    654				break;
    655			}
    656		}
    657		rcu_read_unlock_bh();
    658
    659		if (!fl1)
    660			fl1 = fl_lookup(net, freq->flr_label);
    661		if (fl1) {
    662recheck:
    663			err = -EEXIST;
    664			if (freq->flr_flags&IPV6_FL_F_EXCL)
    665				goto release;
    666			err = -EPERM;
    667			if (fl1->share == IPV6_FL_S_EXCL ||
    668			    fl1->share != fl->share ||
    669			    ((fl1->share == IPV6_FL_S_PROCESS) &&
    670			     (fl1->owner.pid != fl->owner.pid)) ||
    671			    ((fl1->share == IPV6_FL_S_USER) &&
    672			     !uid_eq(fl1->owner.uid, fl->owner.uid)))
    673				goto release;
    674
    675			err = -ENOMEM;
    676			if (!sfl1)
    677				goto release;
    678			if (fl->linger > fl1->linger)
    679				fl1->linger = fl->linger;
    680			if ((long)(fl->expires - fl1->expires) > 0)
    681				fl1->expires = fl->expires;
    682			fl_link(np, sfl1, fl1);
    683			fl_free(fl);
    684			return 0;
    685
    686release:
    687			fl_release(fl1);
    688			goto done;
    689		}
    690	}
    691	err = -ENOENT;
    692	if (!(freq->flr_flags & IPV6_FL_F_CREATE))
    693		goto done;
    694
    695	err = -ENOMEM;
    696	if (!sfl1)
    697		goto done;
    698
    699	err = mem_check(sk);
    700	if (err != 0)
    701		goto done;
    702
    703	fl1 = fl_intern(net, fl, freq->flr_label);
    704	if (fl1)
    705		goto recheck;
    706
    707	if (!freq->flr_label) {
    708		size_t offset = offsetof(struct in6_flowlabel_req, flr_label);
    709
    710		if (copy_to_sockptr_offset(optval, offset, &fl->label,
    711				sizeof(fl->label))) {
    712			/* Intentionally ignore fault. */
    713		}
    714	}
    715
    716	fl_link(np, sfl1, fl);
    717	return 0;
    718done:
    719	fl_free(fl);
    720	kfree(sfl1);
    721	return err;
    722}
    723
    724int ipv6_flowlabel_opt(struct sock *sk, sockptr_t optval, int optlen)
    725{
    726	struct in6_flowlabel_req freq;
    727
    728	if (optlen < sizeof(freq))
    729		return -EINVAL;
    730	if (copy_from_sockptr(&freq, optval, sizeof(freq)))
    731		return -EFAULT;
    732
    733	switch (freq.flr_action) {
    734	case IPV6_FL_A_PUT:
    735		return ipv6_flowlabel_put(sk, &freq);
    736	case IPV6_FL_A_RENEW:
    737		return ipv6_flowlabel_renew(sk, &freq);
    738	case IPV6_FL_A_GET:
    739		return ipv6_flowlabel_get(sk, &freq, optval, optlen);
    740	default:
    741		return -EINVAL;
    742	}
    743}
    744
    745#ifdef CONFIG_PROC_FS
    746
    747struct ip6fl_iter_state {
    748	struct seq_net_private p;
    749	struct pid_namespace *pid_ns;
    750	int bucket;
    751};
    752
    753#define ip6fl_seq_private(seq)	((struct ip6fl_iter_state *)(seq)->private)
    754
    755static struct ip6_flowlabel *ip6fl_get_first(struct seq_file *seq)
    756{
    757	struct ip6_flowlabel *fl = NULL;
    758	struct ip6fl_iter_state *state = ip6fl_seq_private(seq);
    759	struct net *net = seq_file_net(seq);
    760
    761	for (state->bucket = 0; state->bucket <= FL_HASH_MASK; ++state->bucket) {
    762		for_each_fl_rcu(state->bucket, fl) {
    763			if (net_eq(fl->fl_net, net))
    764				goto out;
    765		}
    766	}
    767	fl = NULL;
    768out:
    769	return fl;
    770}
    771
    772static struct ip6_flowlabel *ip6fl_get_next(struct seq_file *seq, struct ip6_flowlabel *fl)
    773{
    774	struct ip6fl_iter_state *state = ip6fl_seq_private(seq);
    775	struct net *net = seq_file_net(seq);
    776
    777	for_each_fl_continue_rcu(fl) {
    778		if (net_eq(fl->fl_net, net))
    779			goto out;
    780	}
    781
    782try_again:
    783	if (++state->bucket <= FL_HASH_MASK) {
    784		for_each_fl_rcu(state->bucket, fl) {
    785			if (net_eq(fl->fl_net, net))
    786				goto out;
    787		}
    788		goto try_again;
    789	}
    790	fl = NULL;
    791
    792out:
    793	return fl;
    794}
    795
    796static struct ip6_flowlabel *ip6fl_get_idx(struct seq_file *seq, loff_t pos)
    797{
    798	struct ip6_flowlabel *fl = ip6fl_get_first(seq);
    799	if (fl)
    800		while (pos && (fl = ip6fl_get_next(seq, fl)) != NULL)
    801			--pos;
    802	return pos ? NULL : fl;
    803}
    804
    805static void *ip6fl_seq_start(struct seq_file *seq, loff_t *pos)
    806	__acquires(RCU)
    807{
    808	struct ip6fl_iter_state *state = ip6fl_seq_private(seq);
    809
    810	state->pid_ns = proc_pid_ns(file_inode(seq->file)->i_sb);
    811
    812	rcu_read_lock_bh();
    813	return *pos ? ip6fl_get_idx(seq, *pos - 1) : SEQ_START_TOKEN;
    814}
    815
    816static void *ip6fl_seq_next(struct seq_file *seq, void *v, loff_t *pos)
    817{
    818	struct ip6_flowlabel *fl;
    819
    820	if (v == SEQ_START_TOKEN)
    821		fl = ip6fl_get_first(seq);
    822	else
    823		fl = ip6fl_get_next(seq, v);
    824	++*pos;
    825	return fl;
    826}
    827
    828static void ip6fl_seq_stop(struct seq_file *seq, void *v)
    829	__releases(RCU)
    830{
    831	rcu_read_unlock_bh();
    832}
    833
    834static int ip6fl_seq_show(struct seq_file *seq, void *v)
    835{
    836	struct ip6fl_iter_state *state = ip6fl_seq_private(seq);
    837	if (v == SEQ_START_TOKEN) {
    838		seq_puts(seq, "Label S Owner  Users  Linger Expires  Dst                              Opt\n");
    839	} else {
    840		struct ip6_flowlabel *fl = v;
    841		seq_printf(seq,
    842			   "%05X %-1d %-6d %-6d %-6ld %-8ld %pi6 %-4d\n",
    843			   (unsigned int)ntohl(fl->label),
    844			   fl->share,
    845			   ((fl->share == IPV6_FL_S_PROCESS) ?
    846			    pid_nr_ns(fl->owner.pid, state->pid_ns) :
    847			    ((fl->share == IPV6_FL_S_USER) ?
    848			     from_kuid_munged(seq_user_ns(seq), fl->owner.uid) :
    849			     0)),
    850			   atomic_read(&fl->users),
    851			   fl->linger/HZ,
    852			   (long)(fl->expires - jiffies)/HZ,
    853			   &fl->dst,
    854			   fl->opt ? fl->opt->opt_nflen : 0);
    855	}
    856	return 0;
    857}
    858
    859static const struct seq_operations ip6fl_seq_ops = {
    860	.start	=	ip6fl_seq_start,
    861	.next	=	ip6fl_seq_next,
    862	.stop	=	ip6fl_seq_stop,
    863	.show	=	ip6fl_seq_show,
    864};
    865
    866static int __net_init ip6_flowlabel_proc_init(struct net *net)
    867{
    868	if (!proc_create_net("ip6_flowlabel", 0444, net->proc_net,
    869			&ip6fl_seq_ops, sizeof(struct ip6fl_iter_state)))
    870		return -ENOMEM;
    871	return 0;
    872}
    873
    874static void __net_exit ip6_flowlabel_proc_fini(struct net *net)
    875{
    876	remove_proc_entry("ip6_flowlabel", net->proc_net);
    877}
    878#else
    879static inline int ip6_flowlabel_proc_init(struct net *net)
    880{
    881	return 0;
    882}
    883static inline void ip6_flowlabel_proc_fini(struct net *net)
    884{
    885}
    886#endif
    887
    888static void __net_exit ip6_flowlabel_net_exit(struct net *net)
    889{
    890	ip6_fl_purge(net);
    891	ip6_flowlabel_proc_fini(net);
    892}
    893
    894static struct pernet_operations ip6_flowlabel_net_ops = {
    895	.init = ip6_flowlabel_proc_init,
    896	.exit = ip6_flowlabel_net_exit,
    897};
    898
    899int ip6_flowlabel_init(void)
    900{
    901	return register_pernet_subsys(&ip6_flowlabel_net_ops);
    902}
    903
    904void ip6_flowlabel_cleanup(void)
    905{
    906	static_key_deferred_flush(&ipv6_flowlabel_exclusive);
    907	del_timer(&ip6_fl_gc_timer);
    908	unregister_pernet_subsys(&ip6_flowlabel_net_ops);
    909}