cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

ip6_fib.c (64267B)


      1// SPDX-License-Identifier: GPL-2.0-or-later
      2/*
      3 *	Linux INET6 implementation
      4 *	Forwarding Information Database
      5 *
      6 *	Authors:
      7 *	Pedro Roque		<roque@di.fc.ul.pt>
      8 *
      9 *	Changes:
     10 *	Yuji SEKIYA @USAGI:	Support default route on router node;
     11 *				remove ip6_null_entry from the top of
     12 *				routing table.
     13 *	Ville Nuorvala:		Fixed routing subtrees.
     14 */
     15
     16#define pr_fmt(fmt) "IPv6: " fmt
     17
     18#include <linux/bpf.h>
     19#include <linux/errno.h>
     20#include <linux/types.h>
     21#include <linux/net.h>
     22#include <linux/route.h>
     23#include <linux/netdevice.h>
     24#include <linux/in6.h>
     25#include <linux/init.h>
     26#include <linux/list.h>
     27#include <linux/slab.h>
     28
     29#include <net/ip.h>
     30#include <net/ipv6.h>
     31#include <net/ndisc.h>
     32#include <net/addrconf.h>
     33#include <net/lwtunnel.h>
     34#include <net/fib_notifier.h>
     35
     36#include <net/ip_fib.h>
     37#include <net/ip6_fib.h>
     38#include <net/ip6_route.h>
     39
     40static struct kmem_cache *fib6_node_kmem __read_mostly;
     41
     42struct fib6_cleaner {
     43	struct fib6_walker w;
     44	struct net *net;
     45	int (*func)(struct fib6_info *, void *arg);
     46	int sernum;
     47	void *arg;
     48	bool skip_notify;
     49};
     50
     51#ifdef CONFIG_IPV6_SUBTREES
     52#define FWS_INIT FWS_S
     53#else
     54#define FWS_INIT FWS_L
     55#endif
     56
     57static struct fib6_info *fib6_find_prefix(struct net *net,
     58					 struct fib6_table *table,
     59					 struct fib6_node *fn);
     60static struct fib6_node *fib6_repair_tree(struct net *net,
     61					  struct fib6_table *table,
     62					  struct fib6_node *fn);
     63static int fib6_walk(struct net *net, struct fib6_walker *w);
     64static int fib6_walk_continue(struct fib6_walker *w);
     65
     66/*
     67 *	A routing update causes an increase of the serial number on the
     68 *	affected subtree. This allows for cached routes to be asynchronously
     69 *	tested when modifications are made to the destination cache as a
     70 *	result of redirects, path MTU changes, etc.
     71 */
     72
     73static void fib6_gc_timer_cb(struct timer_list *t);
     74
     75#define FOR_WALKERS(net, w) \
     76	list_for_each_entry(w, &(net)->ipv6.fib6_walkers, lh)
     77
     78static void fib6_walker_link(struct net *net, struct fib6_walker *w)
     79{
     80	write_lock_bh(&net->ipv6.fib6_walker_lock);
     81	list_add(&w->lh, &net->ipv6.fib6_walkers);
     82	write_unlock_bh(&net->ipv6.fib6_walker_lock);
     83}
     84
     85static void fib6_walker_unlink(struct net *net, struct fib6_walker *w)
     86{
     87	write_lock_bh(&net->ipv6.fib6_walker_lock);
     88	list_del(&w->lh);
     89	write_unlock_bh(&net->ipv6.fib6_walker_lock);
     90}
     91
     92static int fib6_new_sernum(struct net *net)
     93{
     94	int new, old;
     95
     96	do {
     97		old = atomic_read(&net->ipv6.fib6_sernum);
     98		new = old < INT_MAX ? old + 1 : 1;
     99	} while (atomic_cmpxchg(&net->ipv6.fib6_sernum,
    100				old, new) != old);
    101	return new;
    102}
    103
    104enum {
    105	FIB6_NO_SERNUM_CHANGE = 0,
    106};
    107
    108void fib6_update_sernum(struct net *net, struct fib6_info *f6i)
    109{
    110	struct fib6_node *fn;
    111
    112	fn = rcu_dereference_protected(f6i->fib6_node,
    113			lockdep_is_held(&f6i->fib6_table->tb6_lock));
    114	if (fn)
    115		WRITE_ONCE(fn->fn_sernum, fib6_new_sernum(net));
    116}
    117
    118/*
    119 *	Auxiliary address test functions for the radix tree.
    120 *
    121 *	These assume a 32bit processor (although it will work on
    122 *	64bit processors)
    123 */
    124
    125/*
    126 *	test bit
    127 */
    128#if defined(__LITTLE_ENDIAN)
    129# define BITOP_BE32_SWIZZLE	(0x1F & ~7)
    130#else
    131# define BITOP_BE32_SWIZZLE	0
    132#endif
    133
    134static __be32 addr_bit_set(const void *token, int fn_bit)
    135{
    136	const __be32 *addr = token;
    137	/*
    138	 * Here,
    139	 *	1 << ((~fn_bit ^ BITOP_BE32_SWIZZLE) & 0x1f)
    140	 * is optimized version of
    141	 *	htonl(1 << ((~fn_bit)&0x1F))
    142	 * See include/asm-generic/bitops/le.h.
    143	 */
    144	return (__force __be32)(1 << ((~fn_bit ^ BITOP_BE32_SWIZZLE) & 0x1f)) &
    145	       addr[fn_bit >> 5];
    146}
    147
    148struct fib6_info *fib6_info_alloc(gfp_t gfp_flags, bool with_fib6_nh)
    149{
    150	struct fib6_info *f6i;
    151	size_t sz = sizeof(*f6i);
    152
    153	if (with_fib6_nh)
    154		sz += sizeof(struct fib6_nh);
    155
    156	f6i = kzalloc(sz, gfp_flags);
    157	if (!f6i)
    158		return NULL;
    159
    160	/* fib6_siblings is a union with nh_list, so this initializes both */
    161	INIT_LIST_HEAD(&f6i->fib6_siblings);
    162	refcount_set(&f6i->fib6_ref, 1);
    163
    164	return f6i;
    165}
    166
    167void fib6_info_destroy_rcu(struct rcu_head *head)
    168{
    169	struct fib6_info *f6i = container_of(head, struct fib6_info, rcu);
    170
    171	WARN_ON(f6i->fib6_node);
    172
    173	if (f6i->nh)
    174		nexthop_put(f6i->nh);
    175	else
    176		fib6_nh_release(f6i->fib6_nh);
    177
    178	ip_fib_metrics_put(f6i->fib6_metrics);
    179	kfree(f6i);
    180}
    181EXPORT_SYMBOL_GPL(fib6_info_destroy_rcu);
    182
    183static struct fib6_node *node_alloc(struct net *net)
    184{
    185	struct fib6_node *fn;
    186
    187	fn = kmem_cache_zalloc(fib6_node_kmem, GFP_ATOMIC);
    188	if (fn)
    189		net->ipv6.rt6_stats->fib_nodes++;
    190
    191	return fn;
    192}
    193
    194static void node_free_immediate(struct net *net, struct fib6_node *fn)
    195{
    196	kmem_cache_free(fib6_node_kmem, fn);
    197	net->ipv6.rt6_stats->fib_nodes--;
    198}
    199
    200static void node_free_rcu(struct rcu_head *head)
    201{
    202	struct fib6_node *fn = container_of(head, struct fib6_node, rcu);
    203
    204	kmem_cache_free(fib6_node_kmem, fn);
    205}
    206
    207static void node_free(struct net *net, struct fib6_node *fn)
    208{
    209	call_rcu(&fn->rcu, node_free_rcu);
    210	net->ipv6.rt6_stats->fib_nodes--;
    211}
    212
    213static void fib6_free_table(struct fib6_table *table)
    214{
    215	inetpeer_invalidate_tree(&table->tb6_peers);
    216	kfree(table);
    217}
    218
    219static void fib6_link_table(struct net *net, struct fib6_table *tb)
    220{
    221	unsigned int h;
    222
    223	/*
    224	 * Initialize table lock at a single place to give lockdep a key,
    225	 * tables aren't visible prior to being linked to the list.
    226	 */
    227	spin_lock_init(&tb->tb6_lock);
    228	h = tb->tb6_id & (FIB6_TABLE_HASHSZ - 1);
    229
    230	/*
    231	 * No protection necessary, this is the only list mutatation
    232	 * operation, tables never disappear once they exist.
    233	 */
    234	hlist_add_head_rcu(&tb->tb6_hlist, &net->ipv6.fib_table_hash[h]);
    235}
    236
    237#ifdef CONFIG_IPV6_MULTIPLE_TABLES
    238
    239static struct fib6_table *fib6_alloc_table(struct net *net, u32 id)
    240{
    241	struct fib6_table *table;
    242
    243	table = kzalloc(sizeof(*table), GFP_ATOMIC);
    244	if (table) {
    245		table->tb6_id = id;
    246		rcu_assign_pointer(table->tb6_root.leaf,
    247				   net->ipv6.fib6_null_entry);
    248		table->tb6_root.fn_flags = RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO;
    249		inet_peer_base_init(&table->tb6_peers);
    250	}
    251
    252	return table;
    253}
    254
    255struct fib6_table *fib6_new_table(struct net *net, u32 id)
    256{
    257	struct fib6_table *tb;
    258
    259	if (id == 0)
    260		id = RT6_TABLE_MAIN;
    261	tb = fib6_get_table(net, id);
    262	if (tb)
    263		return tb;
    264
    265	tb = fib6_alloc_table(net, id);
    266	if (tb)
    267		fib6_link_table(net, tb);
    268
    269	return tb;
    270}
    271EXPORT_SYMBOL_GPL(fib6_new_table);
    272
    273struct fib6_table *fib6_get_table(struct net *net, u32 id)
    274{
    275	struct fib6_table *tb;
    276	struct hlist_head *head;
    277	unsigned int h;
    278
    279	if (id == 0)
    280		id = RT6_TABLE_MAIN;
    281	h = id & (FIB6_TABLE_HASHSZ - 1);
    282	rcu_read_lock();
    283	head = &net->ipv6.fib_table_hash[h];
    284	hlist_for_each_entry_rcu(tb, head, tb6_hlist) {
    285		if (tb->tb6_id == id) {
    286			rcu_read_unlock();
    287			return tb;
    288		}
    289	}
    290	rcu_read_unlock();
    291
    292	return NULL;
    293}
    294EXPORT_SYMBOL_GPL(fib6_get_table);
    295
    296static void __net_init fib6_tables_init(struct net *net)
    297{
    298	fib6_link_table(net, net->ipv6.fib6_main_tbl);
    299	fib6_link_table(net, net->ipv6.fib6_local_tbl);
    300}
    301#else
    302
    303struct fib6_table *fib6_new_table(struct net *net, u32 id)
    304{
    305	return fib6_get_table(net, id);
    306}
    307
    308struct fib6_table *fib6_get_table(struct net *net, u32 id)
    309{
    310	  return net->ipv6.fib6_main_tbl;
    311}
    312
    313struct dst_entry *fib6_rule_lookup(struct net *net, struct flowi6 *fl6,
    314				   const struct sk_buff *skb,
    315				   int flags, pol_lookup_t lookup)
    316{
    317	struct rt6_info *rt;
    318
    319	rt = pol_lookup_func(lookup,
    320			net, net->ipv6.fib6_main_tbl, fl6, skb, flags);
    321	if (rt->dst.error == -EAGAIN) {
    322		ip6_rt_put_flags(rt, flags);
    323		rt = net->ipv6.ip6_null_entry;
    324		if (!(flags & RT6_LOOKUP_F_DST_NOREF))
    325			dst_hold(&rt->dst);
    326	}
    327
    328	return &rt->dst;
    329}
    330
    331/* called with rcu lock held; no reference taken on fib6_info */
    332int fib6_lookup(struct net *net, int oif, struct flowi6 *fl6,
    333		struct fib6_result *res, int flags)
    334{
    335	return fib6_table_lookup(net, net->ipv6.fib6_main_tbl, oif, fl6,
    336				 res, flags);
    337}
    338
    339static void __net_init fib6_tables_init(struct net *net)
    340{
    341	fib6_link_table(net, net->ipv6.fib6_main_tbl);
    342}
    343
    344#endif
    345
    346unsigned int fib6_tables_seq_read(struct net *net)
    347{
    348	unsigned int h, fib_seq = 0;
    349
    350	rcu_read_lock();
    351	for (h = 0; h < FIB6_TABLE_HASHSZ; h++) {
    352		struct hlist_head *head = &net->ipv6.fib_table_hash[h];
    353		struct fib6_table *tb;
    354
    355		hlist_for_each_entry_rcu(tb, head, tb6_hlist)
    356			fib_seq += tb->fib_seq;
    357	}
    358	rcu_read_unlock();
    359
    360	return fib_seq;
    361}
    362
    363static int call_fib6_entry_notifier(struct notifier_block *nb,
    364				    enum fib_event_type event_type,
    365				    struct fib6_info *rt,
    366				    struct netlink_ext_ack *extack)
    367{
    368	struct fib6_entry_notifier_info info = {
    369		.info.extack = extack,
    370		.rt = rt,
    371	};
    372
    373	return call_fib6_notifier(nb, event_type, &info.info);
    374}
    375
    376static int call_fib6_multipath_entry_notifier(struct notifier_block *nb,
    377					      enum fib_event_type event_type,
    378					      struct fib6_info *rt,
    379					      unsigned int nsiblings,
    380					      struct netlink_ext_ack *extack)
    381{
    382	struct fib6_entry_notifier_info info = {
    383		.info.extack = extack,
    384		.rt = rt,
    385		.nsiblings = nsiblings,
    386	};
    387
    388	return call_fib6_notifier(nb, event_type, &info.info);
    389}
    390
    391int call_fib6_entry_notifiers(struct net *net,
    392			      enum fib_event_type event_type,
    393			      struct fib6_info *rt,
    394			      struct netlink_ext_ack *extack)
    395{
    396	struct fib6_entry_notifier_info info = {
    397		.info.extack = extack,
    398		.rt = rt,
    399	};
    400
    401	rt->fib6_table->fib_seq++;
    402	return call_fib6_notifiers(net, event_type, &info.info);
    403}
    404
    405int call_fib6_multipath_entry_notifiers(struct net *net,
    406					enum fib_event_type event_type,
    407					struct fib6_info *rt,
    408					unsigned int nsiblings,
    409					struct netlink_ext_ack *extack)
    410{
    411	struct fib6_entry_notifier_info info = {
    412		.info.extack = extack,
    413		.rt = rt,
    414		.nsiblings = nsiblings,
    415	};
    416
    417	rt->fib6_table->fib_seq++;
    418	return call_fib6_notifiers(net, event_type, &info.info);
    419}
    420
    421int call_fib6_entry_notifiers_replace(struct net *net, struct fib6_info *rt)
    422{
    423	struct fib6_entry_notifier_info info = {
    424		.rt = rt,
    425		.nsiblings = rt->fib6_nsiblings,
    426	};
    427
    428	rt->fib6_table->fib_seq++;
    429	return call_fib6_notifiers(net, FIB_EVENT_ENTRY_REPLACE, &info.info);
    430}
    431
    432struct fib6_dump_arg {
    433	struct net *net;
    434	struct notifier_block *nb;
    435	struct netlink_ext_ack *extack;
    436};
    437
    438static int fib6_rt_dump(struct fib6_info *rt, struct fib6_dump_arg *arg)
    439{
    440	enum fib_event_type fib_event = FIB_EVENT_ENTRY_REPLACE;
    441	int err;
    442
    443	if (!rt || rt == arg->net->ipv6.fib6_null_entry)
    444		return 0;
    445
    446	if (rt->fib6_nsiblings)
    447		err = call_fib6_multipath_entry_notifier(arg->nb, fib_event,
    448							 rt,
    449							 rt->fib6_nsiblings,
    450							 arg->extack);
    451	else
    452		err = call_fib6_entry_notifier(arg->nb, fib_event, rt,
    453					       arg->extack);
    454
    455	return err;
    456}
    457
    458static int fib6_node_dump(struct fib6_walker *w)
    459{
    460	int err;
    461
    462	err = fib6_rt_dump(w->leaf, w->args);
    463	w->leaf = NULL;
    464	return err;
    465}
    466
    467static int fib6_table_dump(struct net *net, struct fib6_table *tb,
    468			   struct fib6_walker *w)
    469{
    470	int err;
    471
    472	w->root = &tb->tb6_root;
    473	spin_lock_bh(&tb->tb6_lock);
    474	err = fib6_walk(net, w);
    475	spin_unlock_bh(&tb->tb6_lock);
    476	return err;
    477}
    478
    479/* Called with rcu_read_lock() */
    480int fib6_tables_dump(struct net *net, struct notifier_block *nb,
    481		     struct netlink_ext_ack *extack)
    482{
    483	struct fib6_dump_arg arg;
    484	struct fib6_walker *w;
    485	unsigned int h;
    486	int err = 0;
    487
    488	w = kzalloc(sizeof(*w), GFP_ATOMIC);
    489	if (!w)
    490		return -ENOMEM;
    491
    492	w->func = fib6_node_dump;
    493	arg.net = net;
    494	arg.nb = nb;
    495	arg.extack = extack;
    496	w->args = &arg;
    497
    498	for (h = 0; h < FIB6_TABLE_HASHSZ; h++) {
    499		struct hlist_head *head = &net->ipv6.fib_table_hash[h];
    500		struct fib6_table *tb;
    501
    502		hlist_for_each_entry_rcu(tb, head, tb6_hlist) {
    503			err = fib6_table_dump(net, tb, w);
    504			if (err)
    505				goto out;
    506		}
    507	}
    508
    509out:
    510	kfree(w);
    511
    512	/* The tree traversal function should never return a positive value. */
    513	return err > 0 ? -EINVAL : err;
    514}
    515
    516static int fib6_dump_node(struct fib6_walker *w)
    517{
    518	int res;
    519	struct fib6_info *rt;
    520
    521	for_each_fib6_walker_rt(w) {
    522		res = rt6_dump_route(rt, w->args, w->skip_in_node);
    523		if (res >= 0) {
    524			/* Frame is full, suspend walking */
    525			w->leaf = rt;
    526
    527			/* We'll restart from this node, so if some routes were
    528			 * already dumped, skip them next time.
    529			 */
    530			w->skip_in_node += res;
    531
    532			return 1;
    533		}
    534		w->skip_in_node = 0;
    535
    536		/* Multipath routes are dumped in one route with the
    537		 * RTA_MULTIPATH attribute. Jump 'rt' to point to the
    538		 * last sibling of this route (no need to dump the
    539		 * sibling routes again)
    540		 */
    541		if (rt->fib6_nsiblings)
    542			rt = list_last_entry(&rt->fib6_siblings,
    543					     struct fib6_info,
    544					     fib6_siblings);
    545	}
    546	w->leaf = NULL;
    547	return 0;
    548}
    549
    550static void fib6_dump_end(struct netlink_callback *cb)
    551{
    552	struct net *net = sock_net(cb->skb->sk);
    553	struct fib6_walker *w = (void *)cb->args[2];
    554
    555	if (w) {
    556		if (cb->args[4]) {
    557			cb->args[4] = 0;
    558			fib6_walker_unlink(net, w);
    559		}
    560		cb->args[2] = 0;
    561		kfree(w);
    562	}
    563	cb->done = (void *)cb->args[3];
    564	cb->args[1] = 3;
    565}
    566
    567static int fib6_dump_done(struct netlink_callback *cb)
    568{
    569	fib6_dump_end(cb);
    570	return cb->done ? cb->done(cb) : 0;
    571}
    572
    573static int fib6_dump_table(struct fib6_table *table, struct sk_buff *skb,
    574			   struct netlink_callback *cb)
    575{
    576	struct net *net = sock_net(skb->sk);
    577	struct fib6_walker *w;
    578	int res;
    579
    580	w = (void *)cb->args[2];
    581	w->root = &table->tb6_root;
    582
    583	if (cb->args[4] == 0) {
    584		w->count = 0;
    585		w->skip = 0;
    586		w->skip_in_node = 0;
    587
    588		spin_lock_bh(&table->tb6_lock);
    589		res = fib6_walk(net, w);
    590		spin_unlock_bh(&table->tb6_lock);
    591		if (res > 0) {
    592			cb->args[4] = 1;
    593			cb->args[5] = READ_ONCE(w->root->fn_sernum);
    594		}
    595	} else {
    596		int sernum = READ_ONCE(w->root->fn_sernum);
    597		if (cb->args[5] != sernum) {
    598			/* Begin at the root if the tree changed */
    599			cb->args[5] = sernum;
    600			w->state = FWS_INIT;
    601			w->node = w->root;
    602			w->skip = w->count;
    603			w->skip_in_node = 0;
    604		} else
    605			w->skip = 0;
    606
    607		spin_lock_bh(&table->tb6_lock);
    608		res = fib6_walk_continue(w);
    609		spin_unlock_bh(&table->tb6_lock);
    610		if (res <= 0) {
    611			fib6_walker_unlink(net, w);
    612			cb->args[4] = 0;
    613		}
    614	}
    615
    616	return res;
    617}
    618
    619static int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
    620{
    621	struct rt6_rtnl_dump_arg arg = { .filter.dump_exceptions = true,
    622					 .filter.dump_routes = true };
    623	const struct nlmsghdr *nlh = cb->nlh;
    624	struct net *net = sock_net(skb->sk);
    625	unsigned int h, s_h;
    626	unsigned int e = 0, s_e;
    627	struct fib6_walker *w;
    628	struct fib6_table *tb;
    629	struct hlist_head *head;
    630	int res = 0;
    631
    632	if (cb->strict_check) {
    633		int err;
    634
    635		err = ip_valid_fib_dump_req(net, nlh, &arg.filter, cb);
    636		if (err < 0)
    637			return err;
    638	} else if (nlmsg_len(nlh) >= sizeof(struct rtmsg)) {
    639		struct rtmsg *rtm = nlmsg_data(nlh);
    640
    641		if (rtm->rtm_flags & RTM_F_PREFIX)
    642			arg.filter.flags = RTM_F_PREFIX;
    643	}
    644
    645	w = (void *)cb->args[2];
    646	if (!w) {
    647		/* New dump:
    648		 *
    649		 * 1. hook callback destructor.
    650		 */
    651		cb->args[3] = (long)cb->done;
    652		cb->done = fib6_dump_done;
    653
    654		/*
    655		 * 2. allocate and initialize walker.
    656		 */
    657		w = kzalloc(sizeof(*w), GFP_ATOMIC);
    658		if (!w)
    659			return -ENOMEM;
    660		w->func = fib6_dump_node;
    661		cb->args[2] = (long)w;
    662	}
    663
    664	arg.skb = skb;
    665	arg.cb = cb;
    666	arg.net = net;
    667	w->args = &arg;
    668
    669	if (arg.filter.table_id) {
    670		tb = fib6_get_table(net, arg.filter.table_id);
    671		if (!tb) {
    672			if (rtnl_msg_family(cb->nlh) != PF_INET6)
    673				goto out;
    674
    675			NL_SET_ERR_MSG_MOD(cb->extack, "FIB table does not exist");
    676			return -ENOENT;
    677		}
    678
    679		if (!cb->args[0]) {
    680			res = fib6_dump_table(tb, skb, cb);
    681			if (!res)
    682				cb->args[0] = 1;
    683		}
    684		goto out;
    685	}
    686
    687	s_h = cb->args[0];
    688	s_e = cb->args[1];
    689
    690	rcu_read_lock();
    691	for (h = s_h; h < FIB6_TABLE_HASHSZ; h++, s_e = 0) {
    692		e = 0;
    693		head = &net->ipv6.fib_table_hash[h];
    694		hlist_for_each_entry_rcu(tb, head, tb6_hlist) {
    695			if (e < s_e)
    696				goto next;
    697			res = fib6_dump_table(tb, skb, cb);
    698			if (res != 0)
    699				goto out_unlock;
    700next:
    701			e++;
    702		}
    703	}
    704out_unlock:
    705	rcu_read_unlock();
    706	cb->args[1] = e;
    707	cb->args[0] = h;
    708out:
    709	res = res < 0 ? res : skb->len;
    710	if (res <= 0)
    711		fib6_dump_end(cb);
    712	return res;
    713}
    714
    715void fib6_metric_set(struct fib6_info *f6i, int metric, u32 val)
    716{
    717	if (!f6i)
    718		return;
    719
    720	if (f6i->fib6_metrics == &dst_default_metrics) {
    721		struct dst_metrics *p = kzalloc(sizeof(*p), GFP_ATOMIC);
    722
    723		if (!p)
    724			return;
    725
    726		refcount_set(&p->refcnt, 1);
    727		f6i->fib6_metrics = p;
    728	}
    729
    730	f6i->fib6_metrics->metrics[metric - 1] = val;
    731}
    732
    733/*
    734 *	Routing Table
    735 *
    736 *	return the appropriate node for a routing tree "add" operation
    737 *	by either creating and inserting or by returning an existing
    738 *	node.
    739 */
    740
    741static struct fib6_node *fib6_add_1(struct net *net,
    742				    struct fib6_table *table,
    743				    struct fib6_node *root,
    744				    struct in6_addr *addr, int plen,
    745				    int offset, int allow_create,
    746				    int replace_required,
    747				    struct netlink_ext_ack *extack)
    748{
    749	struct fib6_node *fn, *in, *ln;
    750	struct fib6_node *pn = NULL;
    751	struct rt6key *key;
    752	int	bit;
    753	__be32	dir = 0;
    754
    755	RT6_TRACE("fib6_add_1\n");
    756
    757	/* insert node in tree */
    758
    759	fn = root;
    760
    761	do {
    762		struct fib6_info *leaf = rcu_dereference_protected(fn->leaf,
    763					    lockdep_is_held(&table->tb6_lock));
    764		key = (struct rt6key *)((u8 *)leaf + offset);
    765
    766		/*
    767		 *	Prefix match
    768		 */
    769		if (plen < fn->fn_bit ||
    770		    !ipv6_prefix_equal(&key->addr, addr, fn->fn_bit)) {
    771			if (!allow_create) {
    772				if (replace_required) {
    773					NL_SET_ERR_MSG(extack,
    774						       "Can not replace route - no match found");
    775					pr_warn("Can't replace route, no match found\n");
    776					return ERR_PTR(-ENOENT);
    777				}
    778				pr_warn("NLM_F_CREATE should be set when creating new route\n");
    779			}
    780			goto insert_above;
    781		}
    782
    783		/*
    784		 *	Exact match ?
    785		 */
    786
    787		if (plen == fn->fn_bit) {
    788			/* clean up an intermediate node */
    789			if (!(fn->fn_flags & RTN_RTINFO)) {
    790				RCU_INIT_POINTER(fn->leaf, NULL);
    791				fib6_info_release(leaf);
    792			/* remove null_entry in the root node */
    793			} else if (fn->fn_flags & RTN_TL_ROOT &&
    794				   rcu_access_pointer(fn->leaf) ==
    795				   net->ipv6.fib6_null_entry) {
    796				RCU_INIT_POINTER(fn->leaf, NULL);
    797			}
    798
    799			return fn;
    800		}
    801
    802		/*
    803		 *	We have more bits to go
    804		 */
    805
    806		/* Try to walk down on tree. */
    807		dir = addr_bit_set(addr, fn->fn_bit);
    808		pn = fn;
    809		fn = dir ?
    810		     rcu_dereference_protected(fn->right,
    811					lockdep_is_held(&table->tb6_lock)) :
    812		     rcu_dereference_protected(fn->left,
    813					lockdep_is_held(&table->tb6_lock));
    814	} while (fn);
    815
    816	if (!allow_create) {
    817		/* We should not create new node because
    818		 * NLM_F_REPLACE was specified without NLM_F_CREATE
    819		 * I assume it is safe to require NLM_F_CREATE when
    820		 * REPLACE flag is used! Later we may want to remove the
    821		 * check for replace_required, because according
    822		 * to netlink specification, NLM_F_CREATE
    823		 * MUST be specified if new route is created.
    824		 * That would keep IPv6 consistent with IPv4
    825		 */
    826		if (replace_required) {
    827			NL_SET_ERR_MSG(extack,
    828				       "Can not replace route - no match found");
    829			pr_warn("Can't replace route, no match found\n");
    830			return ERR_PTR(-ENOENT);
    831		}
    832		pr_warn("NLM_F_CREATE should be set when creating new route\n");
    833	}
    834	/*
    835	 *	We walked to the bottom of tree.
    836	 *	Create new leaf node without children.
    837	 */
    838
    839	ln = node_alloc(net);
    840
    841	if (!ln)
    842		return ERR_PTR(-ENOMEM);
    843	ln->fn_bit = plen;
    844	RCU_INIT_POINTER(ln->parent, pn);
    845
    846	if (dir)
    847		rcu_assign_pointer(pn->right, ln);
    848	else
    849		rcu_assign_pointer(pn->left, ln);
    850
    851	return ln;
    852
    853
    854insert_above:
    855	/*
    856	 * split since we don't have a common prefix anymore or
    857	 * we have a less significant route.
    858	 * we've to insert an intermediate node on the list
    859	 * this new node will point to the one we need to create
    860	 * and the current
    861	 */
    862
    863	pn = rcu_dereference_protected(fn->parent,
    864				       lockdep_is_held(&table->tb6_lock));
    865
    866	/* find 1st bit in difference between the 2 addrs.
    867
    868	   See comment in __ipv6_addr_diff: bit may be an invalid value,
    869	   but if it is >= plen, the value is ignored in any case.
    870	 */
    871
    872	bit = __ipv6_addr_diff(addr, &key->addr, sizeof(*addr));
    873
    874	/*
    875	 *		(intermediate)[in]
    876	 *	          /	   \
    877	 *	(new leaf node)[ln] (old node)[fn]
    878	 */
    879	if (plen > bit) {
    880		in = node_alloc(net);
    881		ln = node_alloc(net);
    882
    883		if (!in || !ln) {
    884			if (in)
    885				node_free_immediate(net, in);
    886			if (ln)
    887				node_free_immediate(net, ln);
    888			return ERR_PTR(-ENOMEM);
    889		}
    890
    891		/*
    892		 * new intermediate node.
    893		 * RTN_RTINFO will
    894		 * be off since that an address that chooses one of
    895		 * the branches would not match less specific routes
    896		 * in the other branch
    897		 */
    898
    899		in->fn_bit = bit;
    900
    901		RCU_INIT_POINTER(in->parent, pn);
    902		in->leaf = fn->leaf;
    903		fib6_info_hold(rcu_dereference_protected(in->leaf,
    904				lockdep_is_held(&table->tb6_lock)));
    905
    906		/* update parent pointer */
    907		if (dir)
    908			rcu_assign_pointer(pn->right, in);
    909		else
    910			rcu_assign_pointer(pn->left, in);
    911
    912		ln->fn_bit = plen;
    913
    914		RCU_INIT_POINTER(ln->parent, in);
    915		rcu_assign_pointer(fn->parent, in);
    916
    917		if (addr_bit_set(addr, bit)) {
    918			rcu_assign_pointer(in->right, ln);
    919			rcu_assign_pointer(in->left, fn);
    920		} else {
    921			rcu_assign_pointer(in->left, ln);
    922			rcu_assign_pointer(in->right, fn);
    923		}
    924	} else { /* plen <= bit */
    925
    926		/*
    927		 *		(new leaf node)[ln]
    928		 *	          /	   \
    929		 *	     (old node)[fn] NULL
    930		 */
    931
    932		ln = node_alloc(net);
    933
    934		if (!ln)
    935			return ERR_PTR(-ENOMEM);
    936
    937		ln->fn_bit = plen;
    938
    939		RCU_INIT_POINTER(ln->parent, pn);
    940
    941		if (addr_bit_set(&key->addr, plen))
    942			RCU_INIT_POINTER(ln->right, fn);
    943		else
    944			RCU_INIT_POINTER(ln->left, fn);
    945
    946		rcu_assign_pointer(fn->parent, ln);
    947
    948		if (dir)
    949			rcu_assign_pointer(pn->right, ln);
    950		else
    951			rcu_assign_pointer(pn->left, ln);
    952	}
    953	return ln;
    954}
    955
    956static void __fib6_drop_pcpu_from(struct fib6_nh *fib6_nh,
    957				  const struct fib6_info *match,
    958				  const struct fib6_table *table)
    959{
    960	int cpu;
    961
    962	if (!fib6_nh->rt6i_pcpu)
    963		return;
    964
    965	/* release the reference to this fib entry from
    966	 * all of its cached pcpu routes
    967	 */
    968	for_each_possible_cpu(cpu) {
    969		struct rt6_info **ppcpu_rt;
    970		struct rt6_info *pcpu_rt;
    971
    972		ppcpu_rt = per_cpu_ptr(fib6_nh->rt6i_pcpu, cpu);
    973		pcpu_rt = *ppcpu_rt;
    974
    975		/* only dropping the 'from' reference if the cached route
    976		 * is using 'match'. The cached pcpu_rt->from only changes
    977		 * from a fib6_info to NULL (ip6_dst_destroy); it can never
    978		 * change from one fib6_info reference to another
    979		 */
    980		if (pcpu_rt && rcu_access_pointer(pcpu_rt->from) == match) {
    981			struct fib6_info *from;
    982
    983			from = xchg((__force struct fib6_info **)&pcpu_rt->from, NULL);
    984			fib6_info_release(from);
    985		}
    986	}
    987}
    988
    989struct fib6_nh_pcpu_arg {
    990	struct fib6_info	*from;
    991	const struct fib6_table *table;
    992};
    993
    994static int fib6_nh_drop_pcpu_from(struct fib6_nh *nh, void *_arg)
    995{
    996	struct fib6_nh_pcpu_arg *arg = _arg;
    997
    998	__fib6_drop_pcpu_from(nh, arg->from, arg->table);
    999	return 0;
   1000}
   1001
   1002static void fib6_drop_pcpu_from(struct fib6_info *f6i,
   1003				const struct fib6_table *table)
   1004{
   1005	/* Make sure rt6_make_pcpu_route() wont add other percpu routes
   1006	 * while we are cleaning them here.
   1007	 */
   1008	f6i->fib6_destroying = 1;
   1009	mb(); /* paired with the cmpxchg() in rt6_make_pcpu_route() */
   1010
   1011	if (f6i->nh) {
   1012		struct fib6_nh_pcpu_arg arg = {
   1013			.from = f6i,
   1014			.table = table
   1015		};
   1016
   1017		nexthop_for_each_fib6_nh(f6i->nh, fib6_nh_drop_pcpu_from,
   1018					 &arg);
   1019	} else {
   1020		struct fib6_nh *fib6_nh;
   1021
   1022		fib6_nh = f6i->fib6_nh;
   1023		__fib6_drop_pcpu_from(fib6_nh, f6i, table);
   1024	}
   1025}
   1026
   1027static void fib6_purge_rt(struct fib6_info *rt, struct fib6_node *fn,
   1028			  struct net *net)
   1029{
   1030	struct fib6_table *table = rt->fib6_table;
   1031
   1032	/* Flush all cached dst in exception table */
   1033	rt6_flush_exceptions(rt);
   1034	fib6_drop_pcpu_from(rt, table);
   1035
   1036	if (rt->nh && !list_empty(&rt->nh_list))
   1037		list_del_init(&rt->nh_list);
   1038
   1039	if (refcount_read(&rt->fib6_ref) != 1) {
   1040		/* This route is used as dummy address holder in some split
   1041		 * nodes. It is not leaked, but it still holds other resources,
   1042		 * which must be released in time. So, scan ascendant nodes
   1043		 * and replace dummy references to this route with references
   1044		 * to still alive ones.
   1045		 */
   1046		while (fn) {
   1047			struct fib6_info *leaf = rcu_dereference_protected(fn->leaf,
   1048					    lockdep_is_held(&table->tb6_lock));
   1049			struct fib6_info *new_leaf;
   1050			if (!(fn->fn_flags & RTN_RTINFO) && leaf == rt) {
   1051				new_leaf = fib6_find_prefix(net, table, fn);
   1052				fib6_info_hold(new_leaf);
   1053
   1054				rcu_assign_pointer(fn->leaf, new_leaf);
   1055				fib6_info_release(rt);
   1056			}
   1057			fn = rcu_dereference_protected(fn->parent,
   1058				    lockdep_is_held(&table->tb6_lock));
   1059		}
   1060	}
   1061}
   1062
   1063/*
   1064 *	Insert routing information in a node.
   1065 */
   1066
   1067static int fib6_add_rt2node(struct fib6_node *fn, struct fib6_info *rt,
   1068			    struct nl_info *info,
   1069			    struct netlink_ext_ack *extack)
   1070{
   1071	struct fib6_info *leaf = rcu_dereference_protected(fn->leaf,
   1072				    lockdep_is_held(&rt->fib6_table->tb6_lock));
   1073	struct fib6_info *iter = NULL;
   1074	struct fib6_info __rcu **ins;
   1075	struct fib6_info __rcu **fallback_ins = NULL;
   1076	int replace = (info->nlh &&
   1077		       (info->nlh->nlmsg_flags & NLM_F_REPLACE));
   1078	int add = (!info->nlh ||
   1079		   (info->nlh->nlmsg_flags & NLM_F_CREATE));
   1080	int found = 0;
   1081	bool rt_can_ecmp = rt6_qualify_for_ecmp(rt);
   1082	bool notify_sibling_rt = false;
   1083	u16 nlflags = NLM_F_EXCL;
   1084	int err;
   1085
   1086	if (info->nlh && (info->nlh->nlmsg_flags & NLM_F_APPEND))
   1087		nlflags |= NLM_F_APPEND;
   1088
   1089	ins = &fn->leaf;
   1090
   1091	for (iter = leaf; iter;
   1092	     iter = rcu_dereference_protected(iter->fib6_next,
   1093				lockdep_is_held(&rt->fib6_table->tb6_lock))) {
   1094		/*
   1095		 *	Search for duplicates
   1096		 */
   1097
   1098		if (iter->fib6_metric == rt->fib6_metric) {
   1099			/*
   1100			 *	Same priority level
   1101			 */
   1102			if (info->nlh &&
   1103			    (info->nlh->nlmsg_flags & NLM_F_EXCL))
   1104				return -EEXIST;
   1105
   1106			nlflags &= ~NLM_F_EXCL;
   1107			if (replace) {
   1108				if (rt_can_ecmp == rt6_qualify_for_ecmp(iter)) {
   1109					found++;
   1110					break;
   1111				}
   1112				fallback_ins = fallback_ins ?: ins;
   1113				goto next_iter;
   1114			}
   1115
   1116			if (rt6_duplicate_nexthop(iter, rt)) {
   1117				if (rt->fib6_nsiblings)
   1118					rt->fib6_nsiblings = 0;
   1119				if (!(iter->fib6_flags & RTF_EXPIRES))
   1120					return -EEXIST;
   1121				if (!(rt->fib6_flags & RTF_EXPIRES))
   1122					fib6_clean_expires(iter);
   1123				else
   1124					fib6_set_expires(iter, rt->expires);
   1125
   1126				if (rt->fib6_pmtu)
   1127					fib6_metric_set(iter, RTAX_MTU,
   1128							rt->fib6_pmtu);
   1129				return -EEXIST;
   1130			}
   1131			/* If we have the same destination and the same metric,
   1132			 * but not the same gateway, then the route we try to
   1133			 * add is sibling to this route, increment our counter
   1134			 * of siblings, and later we will add our route to the
   1135			 * list.
   1136			 * Only static routes (which don't have flag
   1137			 * RTF_EXPIRES) are used for ECMPv6.
   1138			 *
   1139			 * To avoid long list, we only had siblings if the
   1140			 * route have a gateway.
   1141			 */
   1142			if (rt_can_ecmp &&
   1143			    rt6_qualify_for_ecmp(iter))
   1144				rt->fib6_nsiblings++;
   1145		}
   1146
   1147		if (iter->fib6_metric > rt->fib6_metric)
   1148			break;
   1149
   1150next_iter:
   1151		ins = &iter->fib6_next;
   1152	}
   1153
   1154	if (fallback_ins && !found) {
   1155		/* No matching route with same ecmp-able-ness found, replace
   1156		 * first matching route
   1157		 */
   1158		ins = fallback_ins;
   1159		iter = rcu_dereference_protected(*ins,
   1160				    lockdep_is_held(&rt->fib6_table->tb6_lock));
   1161		found++;
   1162	}
   1163
   1164	/* Reset round-robin state, if necessary */
   1165	if (ins == &fn->leaf)
   1166		fn->rr_ptr = NULL;
   1167
   1168	/* Link this route to others same route. */
   1169	if (rt->fib6_nsiblings) {
   1170		unsigned int fib6_nsiblings;
   1171		struct fib6_info *sibling, *temp_sibling;
   1172
   1173		/* Find the first route that have the same metric */
   1174		sibling = leaf;
   1175		notify_sibling_rt = true;
   1176		while (sibling) {
   1177			if (sibling->fib6_metric == rt->fib6_metric &&
   1178			    rt6_qualify_for_ecmp(sibling)) {
   1179				list_add_tail(&rt->fib6_siblings,
   1180					      &sibling->fib6_siblings);
   1181				break;
   1182			}
   1183			sibling = rcu_dereference_protected(sibling->fib6_next,
   1184				    lockdep_is_held(&rt->fib6_table->tb6_lock));
   1185			notify_sibling_rt = false;
   1186		}
   1187		/* For each sibling in the list, increment the counter of
   1188		 * siblings. BUG() if counters does not match, list of siblings
   1189		 * is broken!
   1190		 */
   1191		fib6_nsiblings = 0;
   1192		list_for_each_entry_safe(sibling, temp_sibling,
   1193					 &rt->fib6_siblings, fib6_siblings) {
   1194			sibling->fib6_nsiblings++;
   1195			BUG_ON(sibling->fib6_nsiblings != rt->fib6_nsiblings);
   1196			fib6_nsiblings++;
   1197		}
   1198		BUG_ON(fib6_nsiblings != rt->fib6_nsiblings);
   1199		rt6_multipath_rebalance(temp_sibling);
   1200	}
   1201
   1202	/*
   1203	 *	insert node
   1204	 */
   1205	if (!replace) {
   1206		if (!add)
   1207			pr_warn("NLM_F_CREATE should be set when creating new route\n");
   1208
   1209add:
   1210		nlflags |= NLM_F_CREATE;
   1211
   1212		/* The route should only be notified if it is the first
   1213		 * route in the node or if it is added as a sibling
   1214		 * route to the first route in the node.
   1215		 */
   1216		if (!info->skip_notify_kernel &&
   1217		    (notify_sibling_rt || ins == &fn->leaf)) {
   1218			enum fib_event_type fib_event;
   1219
   1220			if (notify_sibling_rt)
   1221				fib_event = FIB_EVENT_ENTRY_APPEND;
   1222			else
   1223				fib_event = FIB_EVENT_ENTRY_REPLACE;
   1224			err = call_fib6_entry_notifiers(info->nl_net,
   1225							fib_event, rt,
   1226							extack);
   1227			if (err) {
   1228				struct fib6_info *sibling, *next_sibling;
   1229
   1230				/* If the route has siblings, then it first
   1231				 * needs to be unlinked from them.
   1232				 */
   1233				if (!rt->fib6_nsiblings)
   1234					return err;
   1235
   1236				list_for_each_entry_safe(sibling, next_sibling,
   1237							 &rt->fib6_siblings,
   1238							 fib6_siblings)
   1239					sibling->fib6_nsiblings--;
   1240				rt->fib6_nsiblings = 0;
   1241				list_del_init(&rt->fib6_siblings);
   1242				rt6_multipath_rebalance(next_sibling);
   1243				return err;
   1244			}
   1245		}
   1246
   1247		rcu_assign_pointer(rt->fib6_next, iter);
   1248		fib6_info_hold(rt);
   1249		rcu_assign_pointer(rt->fib6_node, fn);
   1250		rcu_assign_pointer(*ins, rt);
   1251		if (!info->skip_notify)
   1252			inet6_rt_notify(RTM_NEWROUTE, rt, info, nlflags);
   1253		info->nl_net->ipv6.rt6_stats->fib_rt_entries++;
   1254
   1255		if (!(fn->fn_flags & RTN_RTINFO)) {
   1256			info->nl_net->ipv6.rt6_stats->fib_route_nodes++;
   1257			fn->fn_flags |= RTN_RTINFO;
   1258		}
   1259
   1260	} else {
   1261		int nsiblings;
   1262
   1263		if (!found) {
   1264			if (add)
   1265				goto add;
   1266			pr_warn("NLM_F_REPLACE set, but no existing node found!\n");
   1267			return -ENOENT;
   1268		}
   1269
   1270		if (!info->skip_notify_kernel && ins == &fn->leaf) {
   1271			err = call_fib6_entry_notifiers(info->nl_net,
   1272							FIB_EVENT_ENTRY_REPLACE,
   1273							rt, extack);
   1274			if (err)
   1275				return err;
   1276		}
   1277
   1278		fib6_info_hold(rt);
   1279		rcu_assign_pointer(rt->fib6_node, fn);
   1280		rt->fib6_next = iter->fib6_next;
   1281		rcu_assign_pointer(*ins, rt);
   1282		if (!info->skip_notify)
   1283			inet6_rt_notify(RTM_NEWROUTE, rt, info, NLM_F_REPLACE);
   1284		if (!(fn->fn_flags & RTN_RTINFO)) {
   1285			info->nl_net->ipv6.rt6_stats->fib_route_nodes++;
   1286			fn->fn_flags |= RTN_RTINFO;
   1287		}
   1288		nsiblings = iter->fib6_nsiblings;
   1289		iter->fib6_node = NULL;
   1290		fib6_purge_rt(iter, fn, info->nl_net);
   1291		if (rcu_access_pointer(fn->rr_ptr) == iter)
   1292			fn->rr_ptr = NULL;
   1293		fib6_info_release(iter);
   1294
   1295		if (nsiblings) {
   1296			/* Replacing an ECMP route, remove all siblings */
   1297			ins = &rt->fib6_next;
   1298			iter = rcu_dereference_protected(*ins,
   1299				    lockdep_is_held(&rt->fib6_table->tb6_lock));
   1300			while (iter) {
   1301				if (iter->fib6_metric > rt->fib6_metric)
   1302					break;
   1303				if (rt6_qualify_for_ecmp(iter)) {
   1304					*ins = iter->fib6_next;
   1305					iter->fib6_node = NULL;
   1306					fib6_purge_rt(iter, fn, info->nl_net);
   1307					if (rcu_access_pointer(fn->rr_ptr) == iter)
   1308						fn->rr_ptr = NULL;
   1309					fib6_info_release(iter);
   1310					nsiblings--;
   1311					info->nl_net->ipv6.rt6_stats->fib_rt_entries--;
   1312				} else {
   1313					ins = &iter->fib6_next;
   1314				}
   1315				iter = rcu_dereference_protected(*ins,
   1316					lockdep_is_held(&rt->fib6_table->tb6_lock));
   1317			}
   1318			WARN_ON(nsiblings != 0);
   1319		}
   1320	}
   1321
   1322	return 0;
   1323}
   1324
   1325static void fib6_start_gc(struct net *net, struct fib6_info *rt)
   1326{
   1327	if (!timer_pending(&net->ipv6.ip6_fib_timer) &&
   1328	    (rt->fib6_flags & RTF_EXPIRES))
   1329		mod_timer(&net->ipv6.ip6_fib_timer,
   1330			  jiffies + net->ipv6.sysctl.ip6_rt_gc_interval);
   1331}
   1332
   1333void fib6_force_start_gc(struct net *net)
   1334{
   1335	if (!timer_pending(&net->ipv6.ip6_fib_timer))
   1336		mod_timer(&net->ipv6.ip6_fib_timer,
   1337			  jiffies + net->ipv6.sysctl.ip6_rt_gc_interval);
   1338}
   1339
   1340static void __fib6_update_sernum_upto_root(struct fib6_info *rt,
   1341					   int sernum)
   1342{
   1343	struct fib6_node *fn = rcu_dereference_protected(rt->fib6_node,
   1344				lockdep_is_held(&rt->fib6_table->tb6_lock));
   1345
   1346	/* paired with smp_rmb() in fib6_get_cookie_safe() */
   1347	smp_wmb();
   1348	while (fn) {
   1349		WRITE_ONCE(fn->fn_sernum, sernum);
   1350		fn = rcu_dereference_protected(fn->parent,
   1351				lockdep_is_held(&rt->fib6_table->tb6_lock));
   1352	}
   1353}
   1354
   1355void fib6_update_sernum_upto_root(struct net *net, struct fib6_info *rt)
   1356{
   1357	__fib6_update_sernum_upto_root(rt, fib6_new_sernum(net));
   1358}
   1359
   1360/* allow ipv4 to update sernum via ipv6_stub */
   1361void fib6_update_sernum_stub(struct net *net, struct fib6_info *f6i)
   1362{
   1363	spin_lock_bh(&f6i->fib6_table->tb6_lock);
   1364	fib6_update_sernum_upto_root(net, f6i);
   1365	spin_unlock_bh(&f6i->fib6_table->tb6_lock);
   1366}
   1367
   1368/*
   1369 *	Add routing information to the routing tree.
   1370 *	<destination addr>/<source addr>
   1371 *	with source addr info in sub-trees
   1372 *	Need to own table->tb6_lock
   1373 */
   1374
   1375int fib6_add(struct fib6_node *root, struct fib6_info *rt,
   1376	     struct nl_info *info, struct netlink_ext_ack *extack)
   1377{
   1378	struct fib6_table *table = rt->fib6_table;
   1379	struct fib6_node *fn, *pn = NULL;
   1380	int err = -ENOMEM;
   1381	int allow_create = 1;
   1382	int replace_required = 0;
   1383
   1384	if (info->nlh) {
   1385		if (!(info->nlh->nlmsg_flags & NLM_F_CREATE))
   1386			allow_create = 0;
   1387		if (info->nlh->nlmsg_flags & NLM_F_REPLACE)
   1388			replace_required = 1;
   1389	}
   1390	if (!allow_create && !replace_required)
   1391		pr_warn("RTM_NEWROUTE with no NLM_F_CREATE or NLM_F_REPLACE\n");
   1392
   1393	fn = fib6_add_1(info->nl_net, table, root,
   1394			&rt->fib6_dst.addr, rt->fib6_dst.plen,
   1395			offsetof(struct fib6_info, fib6_dst), allow_create,
   1396			replace_required, extack);
   1397	if (IS_ERR(fn)) {
   1398		err = PTR_ERR(fn);
   1399		fn = NULL;
   1400		goto out;
   1401	}
   1402
   1403	pn = fn;
   1404
   1405#ifdef CONFIG_IPV6_SUBTREES
   1406	if (rt->fib6_src.plen) {
   1407		struct fib6_node *sn;
   1408
   1409		if (!rcu_access_pointer(fn->subtree)) {
   1410			struct fib6_node *sfn;
   1411
   1412			/*
   1413			 * Create subtree.
   1414			 *
   1415			 *		fn[main tree]
   1416			 *		|
   1417			 *		sfn[subtree root]
   1418			 *		   \
   1419			 *		    sn[new leaf node]
   1420			 */
   1421
   1422			/* Create subtree root node */
   1423			sfn = node_alloc(info->nl_net);
   1424			if (!sfn)
   1425				goto failure;
   1426
   1427			fib6_info_hold(info->nl_net->ipv6.fib6_null_entry);
   1428			rcu_assign_pointer(sfn->leaf,
   1429					   info->nl_net->ipv6.fib6_null_entry);
   1430			sfn->fn_flags = RTN_ROOT;
   1431
   1432			/* Now add the first leaf node to new subtree */
   1433
   1434			sn = fib6_add_1(info->nl_net, table, sfn,
   1435					&rt->fib6_src.addr, rt->fib6_src.plen,
   1436					offsetof(struct fib6_info, fib6_src),
   1437					allow_create, replace_required, extack);
   1438
   1439			if (IS_ERR(sn)) {
   1440				/* If it is failed, discard just allocated
   1441				   root, and then (in failure) stale node
   1442				   in main tree.
   1443				 */
   1444				node_free_immediate(info->nl_net, sfn);
   1445				err = PTR_ERR(sn);
   1446				goto failure;
   1447			}
   1448
   1449			/* Now link new subtree to main tree */
   1450			rcu_assign_pointer(sfn->parent, fn);
   1451			rcu_assign_pointer(fn->subtree, sfn);
   1452		} else {
   1453			sn = fib6_add_1(info->nl_net, table, FIB6_SUBTREE(fn),
   1454					&rt->fib6_src.addr, rt->fib6_src.plen,
   1455					offsetof(struct fib6_info, fib6_src),
   1456					allow_create, replace_required, extack);
   1457
   1458			if (IS_ERR(sn)) {
   1459				err = PTR_ERR(sn);
   1460				goto failure;
   1461			}
   1462		}
   1463
   1464		if (!rcu_access_pointer(fn->leaf)) {
   1465			if (fn->fn_flags & RTN_TL_ROOT) {
   1466				/* put back null_entry for root node */
   1467				rcu_assign_pointer(fn->leaf,
   1468					    info->nl_net->ipv6.fib6_null_entry);
   1469			} else {
   1470				fib6_info_hold(rt);
   1471				rcu_assign_pointer(fn->leaf, rt);
   1472			}
   1473		}
   1474		fn = sn;
   1475	}
   1476#endif
   1477
   1478	err = fib6_add_rt2node(fn, rt, info, extack);
   1479	if (!err) {
   1480		if (rt->nh)
   1481			list_add(&rt->nh_list, &rt->nh->f6i_list);
   1482		__fib6_update_sernum_upto_root(rt, fib6_new_sernum(info->nl_net));
   1483		fib6_start_gc(info->nl_net, rt);
   1484	}
   1485
   1486out:
   1487	if (err) {
   1488#ifdef CONFIG_IPV6_SUBTREES
   1489		/*
   1490		 * If fib6_add_1 has cleared the old leaf pointer in the
   1491		 * super-tree leaf node we have to find a new one for it.
   1492		 */
   1493		if (pn != fn) {
   1494			struct fib6_info *pn_leaf =
   1495				rcu_dereference_protected(pn->leaf,
   1496				    lockdep_is_held(&table->tb6_lock));
   1497			if (pn_leaf == rt) {
   1498				pn_leaf = NULL;
   1499				RCU_INIT_POINTER(pn->leaf, NULL);
   1500				fib6_info_release(rt);
   1501			}
   1502			if (!pn_leaf && !(pn->fn_flags & RTN_RTINFO)) {
   1503				pn_leaf = fib6_find_prefix(info->nl_net, table,
   1504							   pn);
   1505#if RT6_DEBUG >= 2
   1506				if (!pn_leaf) {
   1507					WARN_ON(!pn_leaf);
   1508					pn_leaf =
   1509					    info->nl_net->ipv6.fib6_null_entry;
   1510				}
   1511#endif
   1512				fib6_info_hold(pn_leaf);
   1513				rcu_assign_pointer(pn->leaf, pn_leaf);
   1514			}
   1515		}
   1516#endif
   1517		goto failure;
   1518	} else if (fib6_requires_src(rt)) {
   1519		fib6_routes_require_src_inc(info->nl_net);
   1520	}
   1521	return err;
   1522
   1523failure:
   1524	/* fn->leaf could be NULL and fib6_repair_tree() needs to be called if:
   1525	 * 1. fn is an intermediate node and we failed to add the new
   1526	 * route to it in both subtree creation failure and fib6_add_rt2node()
   1527	 * failure case.
   1528	 * 2. fn is the root node in the table and we fail to add the first
   1529	 * default route to it.
   1530	 */
   1531	if (fn &&
   1532	    (!(fn->fn_flags & (RTN_RTINFO|RTN_ROOT)) ||
   1533	     (fn->fn_flags & RTN_TL_ROOT &&
   1534	      !rcu_access_pointer(fn->leaf))))
   1535		fib6_repair_tree(info->nl_net, table, fn);
   1536	return err;
   1537}
   1538
   1539/*
   1540 *	Routing tree lookup
   1541 *
   1542 */
   1543
   1544struct lookup_args {
   1545	int			offset;		/* key offset on fib6_info */
   1546	const struct in6_addr	*addr;		/* search key			*/
   1547};
   1548
   1549static struct fib6_node *fib6_node_lookup_1(struct fib6_node *root,
   1550					    struct lookup_args *args)
   1551{
   1552	struct fib6_node *fn;
   1553	__be32 dir;
   1554
   1555	if (unlikely(args->offset == 0))
   1556		return NULL;
   1557
   1558	/*
   1559	 *	Descend on a tree
   1560	 */
   1561
   1562	fn = root;
   1563
   1564	for (;;) {
   1565		struct fib6_node *next;
   1566
   1567		dir = addr_bit_set(args->addr, fn->fn_bit);
   1568
   1569		next = dir ? rcu_dereference(fn->right) :
   1570			     rcu_dereference(fn->left);
   1571
   1572		if (next) {
   1573			fn = next;
   1574			continue;
   1575		}
   1576		break;
   1577	}
   1578
   1579	while (fn) {
   1580		struct fib6_node *subtree = FIB6_SUBTREE(fn);
   1581
   1582		if (subtree || fn->fn_flags & RTN_RTINFO) {
   1583			struct fib6_info *leaf = rcu_dereference(fn->leaf);
   1584			struct rt6key *key;
   1585
   1586			if (!leaf)
   1587				goto backtrack;
   1588
   1589			key = (struct rt6key *) ((u8 *)leaf + args->offset);
   1590
   1591			if (ipv6_prefix_equal(&key->addr, args->addr, key->plen)) {
   1592#ifdef CONFIG_IPV6_SUBTREES
   1593				if (subtree) {
   1594					struct fib6_node *sfn;
   1595					sfn = fib6_node_lookup_1(subtree,
   1596								 args + 1);
   1597					if (!sfn)
   1598						goto backtrack;
   1599					fn = sfn;
   1600				}
   1601#endif
   1602				if (fn->fn_flags & RTN_RTINFO)
   1603					return fn;
   1604			}
   1605		}
   1606backtrack:
   1607		if (fn->fn_flags & RTN_ROOT)
   1608			break;
   1609
   1610		fn = rcu_dereference(fn->parent);
   1611	}
   1612
   1613	return NULL;
   1614}
   1615
   1616/* called with rcu_read_lock() held
   1617 */
   1618struct fib6_node *fib6_node_lookup(struct fib6_node *root,
   1619				   const struct in6_addr *daddr,
   1620				   const struct in6_addr *saddr)
   1621{
   1622	struct fib6_node *fn;
   1623	struct lookup_args args[] = {
   1624		{
   1625			.offset = offsetof(struct fib6_info, fib6_dst),
   1626			.addr = daddr,
   1627		},
   1628#ifdef CONFIG_IPV6_SUBTREES
   1629		{
   1630			.offset = offsetof(struct fib6_info, fib6_src),
   1631			.addr = saddr,
   1632		},
   1633#endif
   1634		{
   1635			.offset = 0,	/* sentinel */
   1636		}
   1637	};
   1638
   1639	fn = fib6_node_lookup_1(root, daddr ? args : args + 1);
   1640	if (!fn || fn->fn_flags & RTN_TL_ROOT)
   1641		fn = root;
   1642
   1643	return fn;
   1644}
   1645
   1646/*
   1647 *	Get node with specified destination prefix (and source prefix,
   1648 *	if subtrees are used)
   1649 *	exact_match == true means we try to find fn with exact match of
   1650 *	the passed in prefix addr
   1651 *	exact_match == false means we try to find fn with longest prefix
   1652 *	match of the passed in prefix addr. This is useful for finding fn
   1653 *	for cached route as it will be stored in the exception table under
   1654 *	the node with longest prefix length.
   1655 */
   1656
   1657
   1658static struct fib6_node *fib6_locate_1(struct fib6_node *root,
   1659				       const struct in6_addr *addr,
   1660				       int plen, int offset,
   1661				       bool exact_match)
   1662{
   1663	struct fib6_node *fn, *prev = NULL;
   1664
   1665	for (fn = root; fn ; ) {
   1666		struct fib6_info *leaf = rcu_dereference(fn->leaf);
   1667		struct rt6key *key;
   1668
   1669		/* This node is being deleted */
   1670		if (!leaf) {
   1671			if (plen <= fn->fn_bit)
   1672				goto out;
   1673			else
   1674				goto next;
   1675		}
   1676
   1677		key = (struct rt6key *)((u8 *)leaf + offset);
   1678
   1679		/*
   1680		 *	Prefix match
   1681		 */
   1682		if (plen < fn->fn_bit ||
   1683		    !ipv6_prefix_equal(&key->addr, addr, fn->fn_bit))
   1684			goto out;
   1685
   1686		if (plen == fn->fn_bit)
   1687			return fn;
   1688
   1689		if (fn->fn_flags & RTN_RTINFO)
   1690			prev = fn;
   1691
   1692next:
   1693		/*
   1694		 *	We have more bits to go
   1695		 */
   1696		if (addr_bit_set(addr, fn->fn_bit))
   1697			fn = rcu_dereference(fn->right);
   1698		else
   1699			fn = rcu_dereference(fn->left);
   1700	}
   1701out:
   1702	if (exact_match)
   1703		return NULL;
   1704	else
   1705		return prev;
   1706}
   1707
   1708struct fib6_node *fib6_locate(struct fib6_node *root,
   1709			      const struct in6_addr *daddr, int dst_len,
   1710			      const struct in6_addr *saddr, int src_len,
   1711			      bool exact_match)
   1712{
   1713	struct fib6_node *fn;
   1714
   1715	fn = fib6_locate_1(root, daddr, dst_len,
   1716			   offsetof(struct fib6_info, fib6_dst),
   1717			   exact_match);
   1718
   1719#ifdef CONFIG_IPV6_SUBTREES
   1720	if (src_len) {
   1721		WARN_ON(saddr == NULL);
   1722		if (fn) {
   1723			struct fib6_node *subtree = FIB6_SUBTREE(fn);
   1724
   1725			if (subtree) {
   1726				fn = fib6_locate_1(subtree, saddr, src_len,
   1727					   offsetof(struct fib6_info, fib6_src),
   1728					   exact_match);
   1729			}
   1730		}
   1731	}
   1732#endif
   1733
   1734	if (fn && fn->fn_flags & RTN_RTINFO)
   1735		return fn;
   1736
   1737	return NULL;
   1738}
   1739
   1740
   1741/*
   1742 *	Deletion
   1743 *
   1744 */
   1745
   1746static struct fib6_info *fib6_find_prefix(struct net *net,
   1747					 struct fib6_table *table,
   1748					 struct fib6_node *fn)
   1749{
   1750	struct fib6_node *child_left, *child_right;
   1751
   1752	if (fn->fn_flags & RTN_ROOT)
   1753		return net->ipv6.fib6_null_entry;
   1754
   1755	while (fn) {
   1756		child_left = rcu_dereference_protected(fn->left,
   1757				    lockdep_is_held(&table->tb6_lock));
   1758		child_right = rcu_dereference_protected(fn->right,
   1759				    lockdep_is_held(&table->tb6_lock));
   1760		if (child_left)
   1761			return rcu_dereference_protected(child_left->leaf,
   1762					lockdep_is_held(&table->tb6_lock));
   1763		if (child_right)
   1764			return rcu_dereference_protected(child_right->leaf,
   1765					lockdep_is_held(&table->tb6_lock));
   1766
   1767		fn = FIB6_SUBTREE(fn);
   1768	}
   1769	return NULL;
   1770}
   1771
   1772/*
   1773 *	Called to trim the tree of intermediate nodes when possible. "fn"
   1774 *	is the node we want to try and remove.
   1775 *	Need to own table->tb6_lock
   1776 */
   1777
   1778static struct fib6_node *fib6_repair_tree(struct net *net,
   1779					  struct fib6_table *table,
   1780					  struct fib6_node *fn)
   1781{
   1782	int children;
   1783	int nstate;
   1784	struct fib6_node *child;
   1785	struct fib6_walker *w;
   1786	int iter = 0;
   1787
   1788	/* Set fn->leaf to null_entry for root node. */
   1789	if (fn->fn_flags & RTN_TL_ROOT) {
   1790		rcu_assign_pointer(fn->leaf, net->ipv6.fib6_null_entry);
   1791		return fn;
   1792	}
   1793
   1794	for (;;) {
   1795		struct fib6_node *fn_r = rcu_dereference_protected(fn->right,
   1796					    lockdep_is_held(&table->tb6_lock));
   1797		struct fib6_node *fn_l = rcu_dereference_protected(fn->left,
   1798					    lockdep_is_held(&table->tb6_lock));
   1799		struct fib6_node *pn = rcu_dereference_protected(fn->parent,
   1800					    lockdep_is_held(&table->tb6_lock));
   1801		struct fib6_node *pn_r = rcu_dereference_protected(pn->right,
   1802					    lockdep_is_held(&table->tb6_lock));
   1803		struct fib6_node *pn_l = rcu_dereference_protected(pn->left,
   1804					    lockdep_is_held(&table->tb6_lock));
   1805		struct fib6_info *fn_leaf = rcu_dereference_protected(fn->leaf,
   1806					    lockdep_is_held(&table->tb6_lock));
   1807		struct fib6_info *pn_leaf = rcu_dereference_protected(pn->leaf,
   1808					    lockdep_is_held(&table->tb6_lock));
   1809		struct fib6_info *new_fn_leaf;
   1810
   1811		RT6_TRACE("fixing tree: plen=%d iter=%d\n", fn->fn_bit, iter);
   1812		iter++;
   1813
   1814		WARN_ON(fn->fn_flags & RTN_RTINFO);
   1815		WARN_ON(fn->fn_flags & RTN_TL_ROOT);
   1816		WARN_ON(fn_leaf);
   1817
   1818		children = 0;
   1819		child = NULL;
   1820		if (fn_r) {
   1821			child = fn_r;
   1822			children |= 1;
   1823		}
   1824		if (fn_l) {
   1825			child = fn_l;
   1826			children |= 2;
   1827		}
   1828
   1829		if (children == 3 || FIB6_SUBTREE(fn)
   1830#ifdef CONFIG_IPV6_SUBTREES
   1831		    /* Subtree root (i.e. fn) may have one child */
   1832		    || (children && fn->fn_flags & RTN_ROOT)
   1833#endif
   1834		    ) {
   1835			new_fn_leaf = fib6_find_prefix(net, table, fn);
   1836#if RT6_DEBUG >= 2
   1837			if (!new_fn_leaf) {
   1838				WARN_ON(!new_fn_leaf);
   1839				new_fn_leaf = net->ipv6.fib6_null_entry;
   1840			}
   1841#endif
   1842			fib6_info_hold(new_fn_leaf);
   1843			rcu_assign_pointer(fn->leaf, new_fn_leaf);
   1844			return pn;
   1845		}
   1846
   1847#ifdef CONFIG_IPV6_SUBTREES
   1848		if (FIB6_SUBTREE(pn) == fn) {
   1849			WARN_ON(!(fn->fn_flags & RTN_ROOT));
   1850			RCU_INIT_POINTER(pn->subtree, NULL);
   1851			nstate = FWS_L;
   1852		} else {
   1853			WARN_ON(fn->fn_flags & RTN_ROOT);
   1854#endif
   1855			if (pn_r == fn)
   1856				rcu_assign_pointer(pn->right, child);
   1857			else if (pn_l == fn)
   1858				rcu_assign_pointer(pn->left, child);
   1859#if RT6_DEBUG >= 2
   1860			else
   1861				WARN_ON(1);
   1862#endif
   1863			if (child)
   1864				rcu_assign_pointer(child->parent, pn);
   1865			nstate = FWS_R;
   1866#ifdef CONFIG_IPV6_SUBTREES
   1867		}
   1868#endif
   1869
   1870		read_lock(&net->ipv6.fib6_walker_lock);
   1871		FOR_WALKERS(net, w) {
   1872			if (!child) {
   1873				if (w->node == fn) {
   1874					RT6_TRACE("W %p adjusted by delnode 1, s=%d/%d\n", w, w->state, nstate);
   1875					w->node = pn;
   1876					w->state = nstate;
   1877				}
   1878			} else {
   1879				if (w->node == fn) {
   1880					w->node = child;
   1881					if (children&2) {
   1882						RT6_TRACE("W %p adjusted by delnode 2, s=%d\n", w, w->state);
   1883						w->state = w->state >= FWS_R ? FWS_U : FWS_INIT;
   1884					} else {
   1885						RT6_TRACE("W %p adjusted by delnode 2, s=%d\n", w, w->state);
   1886						w->state = w->state >= FWS_C ? FWS_U : FWS_INIT;
   1887					}
   1888				}
   1889			}
   1890		}
   1891		read_unlock(&net->ipv6.fib6_walker_lock);
   1892
   1893		node_free(net, fn);
   1894		if (pn->fn_flags & RTN_RTINFO || FIB6_SUBTREE(pn))
   1895			return pn;
   1896
   1897		RCU_INIT_POINTER(pn->leaf, NULL);
   1898		fib6_info_release(pn_leaf);
   1899		fn = pn;
   1900	}
   1901}
   1902
   1903static void fib6_del_route(struct fib6_table *table, struct fib6_node *fn,
   1904			   struct fib6_info __rcu **rtp, struct nl_info *info)
   1905{
   1906	struct fib6_info *leaf, *replace_rt = NULL;
   1907	struct fib6_walker *w;
   1908	struct fib6_info *rt = rcu_dereference_protected(*rtp,
   1909				    lockdep_is_held(&table->tb6_lock));
   1910	struct net *net = info->nl_net;
   1911	bool notify_del = false;
   1912
   1913	RT6_TRACE("fib6_del_route\n");
   1914
   1915	/* If the deleted route is the first in the node and it is not part of
   1916	 * a multipath route, then we need to replace it with the next route
   1917	 * in the node, if exists.
   1918	 */
   1919	leaf = rcu_dereference_protected(fn->leaf,
   1920					 lockdep_is_held(&table->tb6_lock));
   1921	if (leaf == rt && !rt->fib6_nsiblings) {
   1922		if (rcu_access_pointer(rt->fib6_next))
   1923			replace_rt = rcu_dereference_protected(rt->fib6_next,
   1924					    lockdep_is_held(&table->tb6_lock));
   1925		else
   1926			notify_del = true;
   1927	}
   1928
   1929	/* Unlink it */
   1930	*rtp = rt->fib6_next;
   1931	rt->fib6_node = NULL;
   1932	net->ipv6.rt6_stats->fib_rt_entries--;
   1933	net->ipv6.rt6_stats->fib_discarded_routes++;
   1934
   1935	/* Reset round-robin state, if necessary */
   1936	if (rcu_access_pointer(fn->rr_ptr) == rt)
   1937		fn->rr_ptr = NULL;
   1938
   1939	/* Remove this entry from other siblings */
   1940	if (rt->fib6_nsiblings) {
   1941		struct fib6_info *sibling, *next_sibling;
   1942
   1943		/* The route is deleted from a multipath route. If this
   1944		 * multipath route is the first route in the node, then we need
   1945		 * to emit a delete notification. Otherwise, we need to skip
   1946		 * the notification.
   1947		 */
   1948		if (rt->fib6_metric == leaf->fib6_metric &&
   1949		    rt6_qualify_for_ecmp(leaf))
   1950			notify_del = true;
   1951		list_for_each_entry_safe(sibling, next_sibling,
   1952					 &rt->fib6_siblings, fib6_siblings)
   1953			sibling->fib6_nsiblings--;
   1954		rt->fib6_nsiblings = 0;
   1955		list_del_init(&rt->fib6_siblings);
   1956		rt6_multipath_rebalance(next_sibling);
   1957	}
   1958
   1959	/* Adjust walkers */
   1960	read_lock(&net->ipv6.fib6_walker_lock);
   1961	FOR_WALKERS(net, w) {
   1962		if (w->state == FWS_C && w->leaf == rt) {
   1963			RT6_TRACE("walker %p adjusted by delroute\n", w);
   1964			w->leaf = rcu_dereference_protected(rt->fib6_next,
   1965					    lockdep_is_held(&table->tb6_lock));
   1966			if (!w->leaf)
   1967				w->state = FWS_U;
   1968		}
   1969	}
   1970	read_unlock(&net->ipv6.fib6_walker_lock);
   1971
   1972	/* If it was last route, call fib6_repair_tree() to:
   1973	 * 1. For root node, put back null_entry as how the table was created.
   1974	 * 2. For other nodes, expunge its radix tree node.
   1975	 */
   1976	if (!rcu_access_pointer(fn->leaf)) {
   1977		if (!(fn->fn_flags & RTN_TL_ROOT)) {
   1978			fn->fn_flags &= ~RTN_RTINFO;
   1979			net->ipv6.rt6_stats->fib_route_nodes--;
   1980		}
   1981		fn = fib6_repair_tree(net, table, fn);
   1982	}
   1983
   1984	fib6_purge_rt(rt, fn, net);
   1985
   1986	if (!info->skip_notify_kernel) {
   1987		if (notify_del)
   1988			call_fib6_entry_notifiers(net, FIB_EVENT_ENTRY_DEL,
   1989						  rt, NULL);
   1990		else if (replace_rt)
   1991			call_fib6_entry_notifiers_replace(net, replace_rt);
   1992	}
   1993	if (!info->skip_notify)
   1994		inet6_rt_notify(RTM_DELROUTE, rt, info, 0);
   1995
   1996	fib6_info_release(rt);
   1997}
   1998
   1999/* Need to own table->tb6_lock */
   2000int fib6_del(struct fib6_info *rt, struct nl_info *info)
   2001{
   2002	struct net *net = info->nl_net;
   2003	struct fib6_info __rcu **rtp;
   2004	struct fib6_info __rcu **rtp_next;
   2005	struct fib6_table *table;
   2006	struct fib6_node *fn;
   2007
   2008	if (rt == net->ipv6.fib6_null_entry)
   2009		return -ENOENT;
   2010
   2011	table = rt->fib6_table;
   2012	fn = rcu_dereference_protected(rt->fib6_node,
   2013				       lockdep_is_held(&table->tb6_lock));
   2014	if (!fn)
   2015		return -ENOENT;
   2016
   2017	WARN_ON(!(fn->fn_flags & RTN_RTINFO));
   2018
   2019	/*
   2020	 *	Walk the leaf entries looking for ourself
   2021	 */
   2022
   2023	for (rtp = &fn->leaf; *rtp; rtp = rtp_next) {
   2024		struct fib6_info *cur = rcu_dereference_protected(*rtp,
   2025					lockdep_is_held(&table->tb6_lock));
   2026		if (rt == cur) {
   2027			if (fib6_requires_src(cur))
   2028				fib6_routes_require_src_dec(info->nl_net);
   2029			fib6_del_route(table, fn, rtp, info);
   2030			return 0;
   2031		}
   2032		rtp_next = &cur->fib6_next;
   2033	}
   2034	return -ENOENT;
   2035}
   2036
   2037/*
   2038 *	Tree traversal function.
   2039 *
   2040 *	Certainly, it is not interrupt safe.
   2041 *	However, it is internally reenterable wrt itself and fib6_add/fib6_del.
   2042 *	It means, that we can modify tree during walking
   2043 *	and use this function for garbage collection, clone pruning,
   2044 *	cleaning tree when a device goes down etc. etc.
   2045 *
   2046 *	It guarantees that every node will be traversed,
   2047 *	and that it will be traversed only once.
   2048 *
   2049 *	Callback function w->func may return:
   2050 *	0 -> continue walking.
   2051 *	positive value -> walking is suspended (used by tree dumps,
   2052 *	and probably by gc, if it will be split to several slices)
   2053 *	negative value -> terminate walking.
   2054 *
   2055 *	The function itself returns:
   2056 *	0   -> walk is complete.
   2057 *	>0  -> walk is incomplete (i.e. suspended)
   2058 *	<0  -> walk is terminated by an error.
   2059 *
   2060 *	This function is called with tb6_lock held.
   2061 */
   2062
   2063static int fib6_walk_continue(struct fib6_walker *w)
   2064{
   2065	struct fib6_node *fn, *pn, *left, *right;
   2066
   2067	/* w->root should always be table->tb6_root */
   2068	WARN_ON_ONCE(!(w->root->fn_flags & RTN_TL_ROOT));
   2069
   2070	for (;;) {
   2071		fn = w->node;
   2072		if (!fn)
   2073			return 0;
   2074
   2075		switch (w->state) {
   2076#ifdef CONFIG_IPV6_SUBTREES
   2077		case FWS_S:
   2078			if (FIB6_SUBTREE(fn)) {
   2079				w->node = FIB6_SUBTREE(fn);
   2080				continue;
   2081			}
   2082			w->state = FWS_L;
   2083			fallthrough;
   2084#endif
   2085		case FWS_L:
   2086			left = rcu_dereference_protected(fn->left, 1);
   2087			if (left) {
   2088				w->node = left;
   2089				w->state = FWS_INIT;
   2090				continue;
   2091			}
   2092			w->state = FWS_R;
   2093			fallthrough;
   2094		case FWS_R:
   2095			right = rcu_dereference_protected(fn->right, 1);
   2096			if (right) {
   2097				w->node = right;
   2098				w->state = FWS_INIT;
   2099				continue;
   2100			}
   2101			w->state = FWS_C;
   2102			w->leaf = rcu_dereference_protected(fn->leaf, 1);
   2103			fallthrough;
   2104		case FWS_C:
   2105			if (w->leaf && fn->fn_flags & RTN_RTINFO) {
   2106				int err;
   2107
   2108				if (w->skip) {
   2109					w->skip--;
   2110					goto skip;
   2111				}
   2112
   2113				err = w->func(w);
   2114				if (err)
   2115					return err;
   2116
   2117				w->count++;
   2118				continue;
   2119			}
   2120skip:
   2121			w->state = FWS_U;
   2122			fallthrough;
   2123		case FWS_U:
   2124			if (fn == w->root)
   2125				return 0;
   2126			pn = rcu_dereference_protected(fn->parent, 1);
   2127			left = rcu_dereference_protected(pn->left, 1);
   2128			right = rcu_dereference_protected(pn->right, 1);
   2129			w->node = pn;
   2130#ifdef CONFIG_IPV6_SUBTREES
   2131			if (FIB6_SUBTREE(pn) == fn) {
   2132				WARN_ON(!(fn->fn_flags & RTN_ROOT));
   2133				w->state = FWS_L;
   2134				continue;
   2135			}
   2136#endif
   2137			if (left == fn) {
   2138				w->state = FWS_R;
   2139				continue;
   2140			}
   2141			if (right == fn) {
   2142				w->state = FWS_C;
   2143				w->leaf = rcu_dereference_protected(w->node->leaf, 1);
   2144				continue;
   2145			}
   2146#if RT6_DEBUG >= 2
   2147			WARN_ON(1);
   2148#endif
   2149		}
   2150	}
   2151}
   2152
   2153static int fib6_walk(struct net *net, struct fib6_walker *w)
   2154{
   2155	int res;
   2156
   2157	w->state = FWS_INIT;
   2158	w->node = w->root;
   2159
   2160	fib6_walker_link(net, w);
   2161	res = fib6_walk_continue(w);
   2162	if (res <= 0)
   2163		fib6_walker_unlink(net, w);
   2164	return res;
   2165}
   2166
   2167static int fib6_clean_node(struct fib6_walker *w)
   2168{
   2169	int res;
   2170	struct fib6_info *rt;
   2171	struct fib6_cleaner *c = container_of(w, struct fib6_cleaner, w);
   2172	struct nl_info info = {
   2173		.nl_net = c->net,
   2174		.skip_notify = c->skip_notify,
   2175	};
   2176
   2177	if (c->sernum != FIB6_NO_SERNUM_CHANGE &&
   2178	    READ_ONCE(w->node->fn_sernum) != c->sernum)
   2179		WRITE_ONCE(w->node->fn_sernum, c->sernum);
   2180
   2181	if (!c->func) {
   2182		WARN_ON_ONCE(c->sernum == FIB6_NO_SERNUM_CHANGE);
   2183		w->leaf = NULL;
   2184		return 0;
   2185	}
   2186
   2187	for_each_fib6_walker_rt(w) {
   2188		res = c->func(rt, c->arg);
   2189		if (res == -1) {
   2190			w->leaf = rt;
   2191			res = fib6_del(rt, &info);
   2192			if (res) {
   2193#if RT6_DEBUG >= 2
   2194				pr_debug("%s: del failed: rt=%p@%p err=%d\n",
   2195					 __func__, rt,
   2196					 rcu_access_pointer(rt->fib6_node),
   2197					 res);
   2198#endif
   2199				continue;
   2200			}
   2201			return 0;
   2202		} else if (res == -2) {
   2203			if (WARN_ON(!rt->fib6_nsiblings))
   2204				continue;
   2205			rt = list_last_entry(&rt->fib6_siblings,
   2206					     struct fib6_info, fib6_siblings);
   2207			continue;
   2208		}
   2209		WARN_ON(res != 0);
   2210	}
   2211	w->leaf = rt;
   2212	return 0;
   2213}
   2214
   2215/*
   2216 *	Convenient frontend to tree walker.
   2217 *
   2218 *	func is called on each route.
   2219 *		It may return -2 -> skip multipath route.
   2220 *			      -1 -> delete this route.
   2221 *		              0  -> continue walking
   2222 */
   2223
   2224static void fib6_clean_tree(struct net *net, struct fib6_node *root,
   2225			    int (*func)(struct fib6_info *, void *arg),
   2226			    int sernum, void *arg, bool skip_notify)
   2227{
   2228	struct fib6_cleaner c;
   2229
   2230	c.w.root = root;
   2231	c.w.func = fib6_clean_node;
   2232	c.w.count = 0;
   2233	c.w.skip = 0;
   2234	c.w.skip_in_node = 0;
   2235	c.func = func;
   2236	c.sernum = sernum;
   2237	c.arg = arg;
   2238	c.net = net;
   2239	c.skip_notify = skip_notify;
   2240
   2241	fib6_walk(net, &c.w);
   2242}
   2243
   2244static void __fib6_clean_all(struct net *net,
   2245			     int (*func)(struct fib6_info *, void *),
   2246			     int sernum, void *arg, bool skip_notify)
   2247{
   2248	struct fib6_table *table;
   2249	struct hlist_head *head;
   2250	unsigned int h;
   2251
   2252	rcu_read_lock();
   2253	for (h = 0; h < FIB6_TABLE_HASHSZ; h++) {
   2254		head = &net->ipv6.fib_table_hash[h];
   2255		hlist_for_each_entry_rcu(table, head, tb6_hlist) {
   2256			spin_lock_bh(&table->tb6_lock);
   2257			fib6_clean_tree(net, &table->tb6_root,
   2258					func, sernum, arg, skip_notify);
   2259			spin_unlock_bh(&table->tb6_lock);
   2260		}
   2261	}
   2262	rcu_read_unlock();
   2263}
   2264
   2265void fib6_clean_all(struct net *net, int (*func)(struct fib6_info *, void *),
   2266		    void *arg)
   2267{
   2268	__fib6_clean_all(net, func, FIB6_NO_SERNUM_CHANGE, arg, false);
   2269}
   2270
   2271void fib6_clean_all_skip_notify(struct net *net,
   2272				int (*func)(struct fib6_info *, void *),
   2273				void *arg)
   2274{
   2275	__fib6_clean_all(net, func, FIB6_NO_SERNUM_CHANGE, arg, true);
   2276}
   2277
   2278static void fib6_flush_trees(struct net *net)
   2279{
   2280	int new_sernum = fib6_new_sernum(net);
   2281
   2282	__fib6_clean_all(net, NULL, new_sernum, NULL, false);
   2283}
   2284
   2285/*
   2286 *	Garbage collection
   2287 */
   2288
   2289static int fib6_age(struct fib6_info *rt, void *arg)
   2290{
   2291	struct fib6_gc_args *gc_args = arg;
   2292	unsigned long now = jiffies;
   2293
   2294	/*
   2295	 *	check addrconf expiration here.
   2296	 *	Routes are expired even if they are in use.
   2297	 */
   2298
   2299	if (rt->fib6_flags & RTF_EXPIRES && rt->expires) {
   2300		if (time_after(now, rt->expires)) {
   2301			RT6_TRACE("expiring %p\n", rt);
   2302			return -1;
   2303		}
   2304		gc_args->more++;
   2305	}
   2306
   2307	/*	Also age clones in the exception table.
   2308	 *	Note, that clones are aged out
   2309	 *	only if they are not in use now.
   2310	 */
   2311	rt6_age_exceptions(rt, gc_args, now);
   2312
   2313	return 0;
   2314}
   2315
   2316void fib6_run_gc(unsigned long expires, struct net *net, bool force)
   2317{
   2318	struct fib6_gc_args gc_args;
   2319	unsigned long now;
   2320
   2321	if (force) {
   2322		spin_lock_bh(&net->ipv6.fib6_gc_lock);
   2323	} else if (!spin_trylock_bh(&net->ipv6.fib6_gc_lock)) {
   2324		mod_timer(&net->ipv6.ip6_fib_timer, jiffies + HZ);
   2325		return;
   2326	}
   2327	gc_args.timeout = expires ? (int)expires :
   2328			  net->ipv6.sysctl.ip6_rt_gc_interval;
   2329	gc_args.more = 0;
   2330
   2331	fib6_clean_all(net, fib6_age, &gc_args);
   2332	now = jiffies;
   2333	net->ipv6.ip6_rt_last_gc = now;
   2334
   2335	if (gc_args.more)
   2336		mod_timer(&net->ipv6.ip6_fib_timer,
   2337			  round_jiffies(now
   2338					+ net->ipv6.sysctl.ip6_rt_gc_interval));
   2339	else
   2340		del_timer(&net->ipv6.ip6_fib_timer);
   2341	spin_unlock_bh(&net->ipv6.fib6_gc_lock);
   2342}
   2343
   2344static void fib6_gc_timer_cb(struct timer_list *t)
   2345{
   2346	struct net *arg = from_timer(arg, t, ipv6.ip6_fib_timer);
   2347
   2348	fib6_run_gc(0, arg, true);
   2349}
   2350
   2351static int __net_init fib6_net_init(struct net *net)
   2352{
   2353	size_t size = sizeof(struct hlist_head) * FIB6_TABLE_HASHSZ;
   2354	int err;
   2355
   2356	err = fib6_notifier_init(net);
   2357	if (err)
   2358		return err;
   2359
   2360	/* Default to 3-tuple */
   2361	net->ipv6.sysctl.multipath_hash_fields =
   2362		FIB_MULTIPATH_HASH_FIELD_DEFAULT_MASK;
   2363
   2364	spin_lock_init(&net->ipv6.fib6_gc_lock);
   2365	rwlock_init(&net->ipv6.fib6_walker_lock);
   2366	INIT_LIST_HEAD(&net->ipv6.fib6_walkers);
   2367	timer_setup(&net->ipv6.ip6_fib_timer, fib6_gc_timer_cb, 0);
   2368
   2369	net->ipv6.rt6_stats = kzalloc(sizeof(*net->ipv6.rt6_stats), GFP_KERNEL);
   2370	if (!net->ipv6.rt6_stats)
   2371		goto out_notifier;
   2372
   2373	/* Avoid false sharing : Use at least a full cache line */
   2374	size = max_t(size_t, size, L1_CACHE_BYTES);
   2375
   2376	net->ipv6.fib_table_hash = kzalloc(size, GFP_KERNEL);
   2377	if (!net->ipv6.fib_table_hash)
   2378		goto out_rt6_stats;
   2379
   2380	net->ipv6.fib6_main_tbl = kzalloc(sizeof(*net->ipv6.fib6_main_tbl),
   2381					  GFP_KERNEL);
   2382	if (!net->ipv6.fib6_main_tbl)
   2383		goto out_fib_table_hash;
   2384
   2385	net->ipv6.fib6_main_tbl->tb6_id = RT6_TABLE_MAIN;
   2386	rcu_assign_pointer(net->ipv6.fib6_main_tbl->tb6_root.leaf,
   2387			   net->ipv6.fib6_null_entry);
   2388	net->ipv6.fib6_main_tbl->tb6_root.fn_flags =
   2389		RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO;
   2390	inet_peer_base_init(&net->ipv6.fib6_main_tbl->tb6_peers);
   2391
   2392#ifdef CONFIG_IPV6_MULTIPLE_TABLES
   2393	net->ipv6.fib6_local_tbl = kzalloc(sizeof(*net->ipv6.fib6_local_tbl),
   2394					   GFP_KERNEL);
   2395	if (!net->ipv6.fib6_local_tbl)
   2396		goto out_fib6_main_tbl;
   2397	net->ipv6.fib6_local_tbl->tb6_id = RT6_TABLE_LOCAL;
   2398	rcu_assign_pointer(net->ipv6.fib6_local_tbl->tb6_root.leaf,
   2399			   net->ipv6.fib6_null_entry);
   2400	net->ipv6.fib6_local_tbl->tb6_root.fn_flags =
   2401		RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO;
   2402	inet_peer_base_init(&net->ipv6.fib6_local_tbl->tb6_peers);
   2403#endif
   2404	fib6_tables_init(net);
   2405
   2406	return 0;
   2407
   2408#ifdef CONFIG_IPV6_MULTIPLE_TABLES
   2409out_fib6_main_tbl:
   2410	kfree(net->ipv6.fib6_main_tbl);
   2411#endif
   2412out_fib_table_hash:
   2413	kfree(net->ipv6.fib_table_hash);
   2414out_rt6_stats:
   2415	kfree(net->ipv6.rt6_stats);
   2416out_notifier:
   2417	fib6_notifier_exit(net);
   2418	return -ENOMEM;
   2419}
   2420
   2421static void fib6_net_exit(struct net *net)
   2422{
   2423	unsigned int i;
   2424
   2425	del_timer_sync(&net->ipv6.ip6_fib_timer);
   2426
   2427	for (i = 0; i < FIB6_TABLE_HASHSZ; i++) {
   2428		struct hlist_head *head = &net->ipv6.fib_table_hash[i];
   2429		struct hlist_node *tmp;
   2430		struct fib6_table *tb;
   2431
   2432		hlist_for_each_entry_safe(tb, tmp, head, tb6_hlist) {
   2433			hlist_del(&tb->tb6_hlist);
   2434			fib6_free_table(tb);
   2435		}
   2436	}
   2437
   2438	kfree(net->ipv6.fib_table_hash);
   2439	kfree(net->ipv6.rt6_stats);
   2440	fib6_notifier_exit(net);
   2441}
   2442
   2443static struct pernet_operations fib6_net_ops = {
   2444	.init = fib6_net_init,
   2445	.exit = fib6_net_exit,
   2446};
   2447
   2448int __init fib6_init(void)
   2449{
   2450	int ret = -ENOMEM;
   2451
   2452	fib6_node_kmem = kmem_cache_create("fib6_nodes",
   2453					   sizeof(struct fib6_node), 0,
   2454					   SLAB_HWCACHE_ALIGN | SLAB_ACCOUNT,
   2455					   NULL);
   2456	if (!fib6_node_kmem)
   2457		goto out;
   2458
   2459	ret = register_pernet_subsys(&fib6_net_ops);
   2460	if (ret)
   2461		goto out_kmem_cache_create;
   2462
   2463	ret = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_GETROUTE, NULL,
   2464				   inet6_dump_fib, 0);
   2465	if (ret)
   2466		goto out_unregister_subsys;
   2467
   2468	__fib6_flush_trees = fib6_flush_trees;
   2469out:
   2470	return ret;
   2471
   2472out_unregister_subsys:
   2473	unregister_pernet_subsys(&fib6_net_ops);
   2474out_kmem_cache_create:
   2475	kmem_cache_destroy(fib6_node_kmem);
   2476	goto out;
   2477}
   2478
   2479void fib6_gc_cleanup(void)
   2480{
   2481	unregister_pernet_subsys(&fib6_net_ops);
   2482	kmem_cache_destroy(fib6_node_kmem);
   2483}
   2484
   2485#ifdef CONFIG_PROC_FS
   2486static int ipv6_route_native_seq_show(struct seq_file *seq, void *v)
   2487{
   2488	struct fib6_info *rt = v;
   2489	struct ipv6_route_iter *iter = seq->private;
   2490	struct fib6_nh *fib6_nh = rt->fib6_nh;
   2491	unsigned int flags = rt->fib6_flags;
   2492	const struct net_device *dev;
   2493
   2494	if (rt->nh)
   2495		fib6_nh = nexthop_fib6_nh_bh(rt->nh);
   2496
   2497	seq_printf(seq, "%pi6 %02x ", &rt->fib6_dst.addr, rt->fib6_dst.plen);
   2498
   2499#ifdef CONFIG_IPV6_SUBTREES
   2500	seq_printf(seq, "%pi6 %02x ", &rt->fib6_src.addr, rt->fib6_src.plen);
   2501#else
   2502	seq_puts(seq, "00000000000000000000000000000000 00 ");
   2503#endif
   2504	if (fib6_nh->fib_nh_gw_family) {
   2505		flags |= RTF_GATEWAY;
   2506		seq_printf(seq, "%pi6", &fib6_nh->fib_nh_gw6);
   2507	} else {
   2508		seq_puts(seq, "00000000000000000000000000000000");
   2509	}
   2510
   2511	dev = fib6_nh->fib_nh_dev;
   2512	seq_printf(seq, " %08x %08x %08x %08x %8s\n",
   2513		   rt->fib6_metric, refcount_read(&rt->fib6_ref), 0,
   2514		   flags, dev ? dev->name : "");
   2515	iter->w.leaf = NULL;
   2516	return 0;
   2517}
   2518
   2519static int ipv6_route_yield(struct fib6_walker *w)
   2520{
   2521	struct ipv6_route_iter *iter = w->args;
   2522
   2523	if (!iter->skip)
   2524		return 1;
   2525
   2526	do {
   2527		iter->w.leaf = rcu_dereference_protected(
   2528				iter->w.leaf->fib6_next,
   2529				lockdep_is_held(&iter->tbl->tb6_lock));
   2530		iter->skip--;
   2531		if (!iter->skip && iter->w.leaf)
   2532			return 1;
   2533	} while (iter->w.leaf);
   2534
   2535	return 0;
   2536}
   2537
   2538static void ipv6_route_seq_setup_walk(struct ipv6_route_iter *iter,
   2539				      struct net *net)
   2540{
   2541	memset(&iter->w, 0, sizeof(iter->w));
   2542	iter->w.func = ipv6_route_yield;
   2543	iter->w.root = &iter->tbl->tb6_root;
   2544	iter->w.state = FWS_INIT;
   2545	iter->w.node = iter->w.root;
   2546	iter->w.args = iter;
   2547	iter->sernum = READ_ONCE(iter->w.root->fn_sernum);
   2548	INIT_LIST_HEAD(&iter->w.lh);
   2549	fib6_walker_link(net, &iter->w);
   2550}
   2551
   2552static struct fib6_table *ipv6_route_seq_next_table(struct fib6_table *tbl,
   2553						    struct net *net)
   2554{
   2555	unsigned int h;
   2556	struct hlist_node *node;
   2557
   2558	if (tbl) {
   2559		h = (tbl->tb6_id & (FIB6_TABLE_HASHSZ - 1)) + 1;
   2560		node = rcu_dereference_bh(hlist_next_rcu(&tbl->tb6_hlist));
   2561	} else {
   2562		h = 0;
   2563		node = NULL;
   2564	}
   2565
   2566	while (!node && h < FIB6_TABLE_HASHSZ) {
   2567		node = rcu_dereference_bh(
   2568			hlist_first_rcu(&net->ipv6.fib_table_hash[h++]));
   2569	}
   2570	return hlist_entry_safe(node, struct fib6_table, tb6_hlist);
   2571}
   2572
   2573static void ipv6_route_check_sernum(struct ipv6_route_iter *iter)
   2574{
   2575	int sernum = READ_ONCE(iter->w.root->fn_sernum);
   2576
   2577	if (iter->sernum != sernum) {
   2578		iter->sernum = sernum;
   2579		iter->w.state = FWS_INIT;
   2580		iter->w.node = iter->w.root;
   2581		WARN_ON(iter->w.skip);
   2582		iter->w.skip = iter->w.count;
   2583	}
   2584}
   2585
   2586static void *ipv6_route_seq_next(struct seq_file *seq, void *v, loff_t *pos)
   2587{
   2588	int r;
   2589	struct fib6_info *n;
   2590	struct net *net = seq_file_net(seq);
   2591	struct ipv6_route_iter *iter = seq->private;
   2592
   2593	++(*pos);
   2594	if (!v)
   2595		goto iter_table;
   2596
   2597	n = rcu_dereference_bh(((struct fib6_info *)v)->fib6_next);
   2598	if (n)
   2599		return n;
   2600
   2601iter_table:
   2602	ipv6_route_check_sernum(iter);
   2603	spin_lock_bh(&iter->tbl->tb6_lock);
   2604	r = fib6_walk_continue(&iter->w);
   2605	spin_unlock_bh(&iter->tbl->tb6_lock);
   2606	if (r > 0) {
   2607		return iter->w.leaf;
   2608	} else if (r < 0) {
   2609		fib6_walker_unlink(net, &iter->w);
   2610		return NULL;
   2611	}
   2612	fib6_walker_unlink(net, &iter->w);
   2613
   2614	iter->tbl = ipv6_route_seq_next_table(iter->tbl, net);
   2615	if (!iter->tbl)
   2616		return NULL;
   2617
   2618	ipv6_route_seq_setup_walk(iter, net);
   2619	goto iter_table;
   2620}
   2621
   2622static void *ipv6_route_seq_start(struct seq_file *seq, loff_t *pos)
   2623	__acquires(RCU_BH)
   2624{
   2625	struct net *net = seq_file_net(seq);
   2626	struct ipv6_route_iter *iter = seq->private;
   2627
   2628	rcu_read_lock_bh();
   2629	iter->tbl = ipv6_route_seq_next_table(NULL, net);
   2630	iter->skip = *pos;
   2631
   2632	if (iter->tbl) {
   2633		loff_t p = 0;
   2634
   2635		ipv6_route_seq_setup_walk(iter, net);
   2636		return ipv6_route_seq_next(seq, NULL, &p);
   2637	} else {
   2638		return NULL;
   2639	}
   2640}
   2641
   2642static bool ipv6_route_iter_active(struct ipv6_route_iter *iter)
   2643{
   2644	struct fib6_walker *w = &iter->w;
   2645	return w->node && !(w->state == FWS_U && w->node == w->root);
   2646}
   2647
   2648static void ipv6_route_native_seq_stop(struct seq_file *seq, void *v)
   2649	__releases(RCU_BH)
   2650{
   2651	struct net *net = seq_file_net(seq);
   2652	struct ipv6_route_iter *iter = seq->private;
   2653
   2654	if (ipv6_route_iter_active(iter))
   2655		fib6_walker_unlink(net, &iter->w);
   2656
   2657	rcu_read_unlock_bh();
   2658}
   2659
   2660#if IS_BUILTIN(CONFIG_IPV6) && defined(CONFIG_BPF_SYSCALL)
   2661static int ipv6_route_prog_seq_show(struct bpf_prog *prog,
   2662				    struct bpf_iter_meta *meta,
   2663				    void *v)
   2664{
   2665	struct bpf_iter__ipv6_route ctx;
   2666
   2667	ctx.meta = meta;
   2668	ctx.rt = v;
   2669	return bpf_iter_run_prog(prog, &ctx);
   2670}
   2671
   2672static int ipv6_route_seq_show(struct seq_file *seq, void *v)
   2673{
   2674	struct ipv6_route_iter *iter = seq->private;
   2675	struct bpf_iter_meta meta;
   2676	struct bpf_prog *prog;
   2677	int ret;
   2678
   2679	meta.seq = seq;
   2680	prog = bpf_iter_get_info(&meta, false);
   2681	if (!prog)
   2682		return ipv6_route_native_seq_show(seq, v);
   2683
   2684	ret = ipv6_route_prog_seq_show(prog, &meta, v);
   2685	iter->w.leaf = NULL;
   2686
   2687	return ret;
   2688}
   2689
   2690static void ipv6_route_seq_stop(struct seq_file *seq, void *v)
   2691{
   2692	struct bpf_iter_meta meta;
   2693	struct bpf_prog *prog;
   2694
   2695	if (!v) {
   2696		meta.seq = seq;
   2697		prog = bpf_iter_get_info(&meta, true);
   2698		if (prog)
   2699			(void)ipv6_route_prog_seq_show(prog, &meta, v);
   2700	}
   2701
   2702	ipv6_route_native_seq_stop(seq, v);
   2703}
   2704#else
   2705static int ipv6_route_seq_show(struct seq_file *seq, void *v)
   2706{
   2707	return ipv6_route_native_seq_show(seq, v);
   2708}
   2709
   2710static void ipv6_route_seq_stop(struct seq_file *seq, void *v)
   2711{
   2712	ipv6_route_native_seq_stop(seq, v);
   2713}
   2714#endif
   2715
   2716const struct seq_operations ipv6_route_seq_ops = {
   2717	.start	= ipv6_route_seq_start,
   2718	.next	= ipv6_route_seq_next,
   2719	.stop	= ipv6_route_seq_stop,
   2720	.show	= ipv6_route_seq_show
   2721};
   2722#endif /* CONFIG_PROC_FS */