cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

radix-tree.c (44110B)


      1// SPDX-License-Identifier: GPL-2.0-or-later
      2/*
      3 * Copyright (C) 2001 Momchil Velikov
      4 * Portions Copyright (C) 2001 Christoph Hellwig
      5 * Copyright (C) 2005 SGI, Christoph Lameter
      6 * Copyright (C) 2006 Nick Piggin
      7 * Copyright (C) 2012 Konstantin Khlebnikov
      8 * Copyright (C) 2016 Intel, Matthew Wilcox
      9 * Copyright (C) 2016 Intel, Ross Zwisler
     10 */
     11
     12#include <linux/bitmap.h>
     13#include <linux/bitops.h>
     14#include <linux/bug.h>
     15#include <linux/cpu.h>
     16#include <linux/errno.h>
     17#include <linux/export.h>
     18#include <linux/idr.h>
     19#include <linux/init.h>
     20#include <linux/kernel.h>
     21#include <linux/kmemleak.h>
     22#include <linux/percpu.h>
     23#include <linux/preempt.h>		/* in_interrupt() */
     24#include <linux/radix-tree.h>
     25#include <linux/rcupdate.h>
     26#include <linux/slab.h>
     27#include <linux/string.h>
     28#include <linux/xarray.h>
     29
     30/*
     31 * Radix tree node cache.
     32 */
     33struct kmem_cache *radix_tree_node_cachep;
     34
     35/*
     36 * The radix tree is variable-height, so an insert operation not only has
     37 * to build the branch to its corresponding item, it also has to build the
     38 * branch to existing items if the size has to be increased (by
     39 * radix_tree_extend).
     40 *
     41 * The worst case is a zero height tree with just a single item at index 0,
     42 * and then inserting an item at index ULONG_MAX. This requires 2 new branches
     43 * of RADIX_TREE_MAX_PATH size to be created, with only the root node shared.
     44 * Hence:
     45 */
     46#define RADIX_TREE_PRELOAD_SIZE (RADIX_TREE_MAX_PATH * 2 - 1)
     47
     48/*
     49 * The IDR does not have to be as high as the radix tree since it uses
     50 * signed integers, not unsigned longs.
     51 */
     52#define IDR_INDEX_BITS		(8 /* CHAR_BIT */ * sizeof(int) - 1)
     53#define IDR_MAX_PATH		(DIV_ROUND_UP(IDR_INDEX_BITS, \
     54						RADIX_TREE_MAP_SHIFT))
     55#define IDR_PRELOAD_SIZE	(IDR_MAX_PATH * 2 - 1)
     56
     57/*
     58 * Per-cpu pool of preloaded nodes
     59 */
     60DEFINE_PER_CPU(struct radix_tree_preload, radix_tree_preloads) = {
     61	.lock = INIT_LOCAL_LOCK(lock),
     62};
     63EXPORT_PER_CPU_SYMBOL_GPL(radix_tree_preloads);
     64
     65static inline struct radix_tree_node *entry_to_node(void *ptr)
     66{
     67	return (void *)((unsigned long)ptr & ~RADIX_TREE_INTERNAL_NODE);
     68}
     69
     70static inline void *node_to_entry(void *ptr)
     71{
     72	return (void *)((unsigned long)ptr | RADIX_TREE_INTERNAL_NODE);
     73}
     74
     75#define RADIX_TREE_RETRY	XA_RETRY_ENTRY
     76
     77static inline unsigned long
     78get_slot_offset(const struct radix_tree_node *parent, void __rcu **slot)
     79{
     80	return parent ? slot - parent->slots : 0;
     81}
     82
     83static unsigned int radix_tree_descend(const struct radix_tree_node *parent,
     84			struct radix_tree_node **nodep, unsigned long index)
     85{
     86	unsigned int offset = (index >> parent->shift) & RADIX_TREE_MAP_MASK;
     87	void __rcu **entry = rcu_dereference_raw(parent->slots[offset]);
     88
     89	*nodep = (void *)entry;
     90	return offset;
     91}
     92
     93static inline gfp_t root_gfp_mask(const struct radix_tree_root *root)
     94{
     95	return root->xa_flags & (__GFP_BITS_MASK & ~GFP_ZONEMASK);
     96}
     97
     98static inline void tag_set(struct radix_tree_node *node, unsigned int tag,
     99		int offset)
    100{
    101	__set_bit(offset, node->tags[tag]);
    102}
    103
    104static inline void tag_clear(struct radix_tree_node *node, unsigned int tag,
    105		int offset)
    106{
    107	__clear_bit(offset, node->tags[tag]);
    108}
    109
    110static inline int tag_get(const struct radix_tree_node *node, unsigned int tag,
    111		int offset)
    112{
    113	return test_bit(offset, node->tags[tag]);
    114}
    115
    116static inline void root_tag_set(struct radix_tree_root *root, unsigned tag)
    117{
    118	root->xa_flags |= (__force gfp_t)(1 << (tag + ROOT_TAG_SHIFT));
    119}
    120
    121static inline void root_tag_clear(struct radix_tree_root *root, unsigned tag)
    122{
    123	root->xa_flags &= (__force gfp_t)~(1 << (tag + ROOT_TAG_SHIFT));
    124}
    125
    126static inline void root_tag_clear_all(struct radix_tree_root *root)
    127{
    128	root->xa_flags &= (__force gfp_t)((1 << ROOT_TAG_SHIFT) - 1);
    129}
    130
    131static inline int root_tag_get(const struct radix_tree_root *root, unsigned tag)
    132{
    133	return (__force int)root->xa_flags & (1 << (tag + ROOT_TAG_SHIFT));
    134}
    135
    136static inline unsigned root_tags_get(const struct radix_tree_root *root)
    137{
    138	return (__force unsigned)root->xa_flags >> ROOT_TAG_SHIFT;
    139}
    140
    141static inline bool is_idr(const struct radix_tree_root *root)
    142{
    143	return !!(root->xa_flags & ROOT_IS_IDR);
    144}
    145
    146/*
    147 * Returns 1 if any slot in the node has this tag set.
    148 * Otherwise returns 0.
    149 */
    150static inline int any_tag_set(const struct radix_tree_node *node,
    151							unsigned int tag)
    152{
    153	unsigned idx;
    154	for (idx = 0; idx < RADIX_TREE_TAG_LONGS; idx++) {
    155		if (node->tags[tag][idx])
    156			return 1;
    157	}
    158	return 0;
    159}
    160
    161static inline void all_tag_set(struct radix_tree_node *node, unsigned int tag)
    162{
    163	bitmap_fill(node->tags[tag], RADIX_TREE_MAP_SIZE);
    164}
    165
    166/**
    167 * radix_tree_find_next_bit - find the next set bit in a memory region
    168 *
    169 * @node: where to begin the search
    170 * @tag: the tag index
    171 * @offset: the bitnumber to start searching at
    172 *
    173 * Unrollable variant of find_next_bit() for constant size arrays.
    174 * Tail bits starting from size to roundup(size, BITS_PER_LONG) must be zero.
    175 * Returns next bit offset, or size if nothing found.
    176 */
    177static __always_inline unsigned long
    178radix_tree_find_next_bit(struct radix_tree_node *node, unsigned int tag,
    179			 unsigned long offset)
    180{
    181	const unsigned long *addr = node->tags[tag];
    182
    183	if (offset < RADIX_TREE_MAP_SIZE) {
    184		unsigned long tmp;
    185
    186		addr += offset / BITS_PER_LONG;
    187		tmp = *addr >> (offset % BITS_PER_LONG);
    188		if (tmp)
    189			return __ffs(tmp) + offset;
    190		offset = (offset + BITS_PER_LONG) & ~(BITS_PER_LONG - 1);
    191		while (offset < RADIX_TREE_MAP_SIZE) {
    192			tmp = *++addr;
    193			if (tmp)
    194				return __ffs(tmp) + offset;
    195			offset += BITS_PER_LONG;
    196		}
    197	}
    198	return RADIX_TREE_MAP_SIZE;
    199}
    200
    201static unsigned int iter_offset(const struct radix_tree_iter *iter)
    202{
    203	return iter->index & RADIX_TREE_MAP_MASK;
    204}
    205
    206/*
    207 * The maximum index which can be stored in a radix tree
    208 */
    209static inline unsigned long shift_maxindex(unsigned int shift)
    210{
    211	return (RADIX_TREE_MAP_SIZE << shift) - 1;
    212}
    213
    214static inline unsigned long node_maxindex(const struct radix_tree_node *node)
    215{
    216	return shift_maxindex(node->shift);
    217}
    218
    219static unsigned long next_index(unsigned long index,
    220				const struct radix_tree_node *node,
    221				unsigned long offset)
    222{
    223	return (index & ~node_maxindex(node)) + (offset << node->shift);
    224}
    225
    226/*
    227 * This assumes that the caller has performed appropriate preallocation, and
    228 * that the caller has pinned this thread of control to the current CPU.
    229 */
    230static struct radix_tree_node *
    231radix_tree_node_alloc(gfp_t gfp_mask, struct radix_tree_node *parent,
    232			struct radix_tree_root *root,
    233			unsigned int shift, unsigned int offset,
    234			unsigned int count, unsigned int nr_values)
    235{
    236	struct radix_tree_node *ret = NULL;
    237
    238	/*
    239	 * Preload code isn't irq safe and it doesn't make sense to use
    240	 * preloading during an interrupt anyway as all the allocations have
    241	 * to be atomic. So just do normal allocation when in interrupt.
    242	 */
    243	if (!gfpflags_allow_blocking(gfp_mask) && !in_interrupt()) {
    244		struct radix_tree_preload *rtp;
    245
    246		/*
    247		 * Even if the caller has preloaded, try to allocate from the
    248		 * cache first for the new node to get accounted to the memory
    249		 * cgroup.
    250		 */
    251		ret = kmem_cache_alloc(radix_tree_node_cachep,
    252				       gfp_mask | __GFP_NOWARN);
    253		if (ret)
    254			goto out;
    255
    256		/*
    257		 * Provided the caller has preloaded here, we will always
    258		 * succeed in getting a node here (and never reach
    259		 * kmem_cache_alloc)
    260		 */
    261		rtp = this_cpu_ptr(&radix_tree_preloads);
    262		if (rtp->nr) {
    263			ret = rtp->nodes;
    264			rtp->nodes = ret->parent;
    265			rtp->nr--;
    266		}
    267		/*
    268		 * Update the allocation stack trace as this is more useful
    269		 * for debugging.
    270		 */
    271		kmemleak_update_trace(ret);
    272		goto out;
    273	}
    274	ret = kmem_cache_alloc(radix_tree_node_cachep, gfp_mask);
    275out:
    276	BUG_ON(radix_tree_is_internal_node(ret));
    277	if (ret) {
    278		ret->shift = shift;
    279		ret->offset = offset;
    280		ret->count = count;
    281		ret->nr_values = nr_values;
    282		ret->parent = parent;
    283		ret->array = root;
    284	}
    285	return ret;
    286}
    287
    288void radix_tree_node_rcu_free(struct rcu_head *head)
    289{
    290	struct radix_tree_node *node =
    291			container_of(head, struct radix_tree_node, rcu_head);
    292
    293	/*
    294	 * Must only free zeroed nodes into the slab.  We can be left with
    295	 * non-NULL entries by radix_tree_free_nodes, so clear the entries
    296	 * and tags here.
    297	 */
    298	memset(node->slots, 0, sizeof(node->slots));
    299	memset(node->tags, 0, sizeof(node->tags));
    300	INIT_LIST_HEAD(&node->private_list);
    301
    302	kmem_cache_free(radix_tree_node_cachep, node);
    303}
    304
    305static inline void
    306radix_tree_node_free(struct radix_tree_node *node)
    307{
    308	call_rcu(&node->rcu_head, radix_tree_node_rcu_free);
    309}
    310
    311/*
    312 * Load up this CPU's radix_tree_node buffer with sufficient objects to
    313 * ensure that the addition of a single element in the tree cannot fail.  On
    314 * success, return zero, with preemption disabled.  On error, return -ENOMEM
    315 * with preemption not disabled.
    316 *
    317 * To make use of this facility, the radix tree must be initialised without
    318 * __GFP_DIRECT_RECLAIM being passed to INIT_RADIX_TREE().
    319 */
    320static __must_check int __radix_tree_preload(gfp_t gfp_mask, unsigned nr)
    321{
    322	struct radix_tree_preload *rtp;
    323	struct radix_tree_node *node;
    324	int ret = -ENOMEM;
    325
    326	/*
    327	 * Nodes preloaded by one cgroup can be used by another cgroup, so
    328	 * they should never be accounted to any particular memory cgroup.
    329	 */
    330	gfp_mask &= ~__GFP_ACCOUNT;
    331
    332	local_lock(&radix_tree_preloads.lock);
    333	rtp = this_cpu_ptr(&radix_tree_preloads);
    334	while (rtp->nr < nr) {
    335		local_unlock(&radix_tree_preloads.lock);
    336		node = kmem_cache_alloc(radix_tree_node_cachep, gfp_mask);
    337		if (node == NULL)
    338			goto out;
    339		local_lock(&radix_tree_preloads.lock);
    340		rtp = this_cpu_ptr(&radix_tree_preloads);
    341		if (rtp->nr < nr) {
    342			node->parent = rtp->nodes;
    343			rtp->nodes = node;
    344			rtp->nr++;
    345		} else {
    346			kmem_cache_free(radix_tree_node_cachep, node);
    347		}
    348	}
    349	ret = 0;
    350out:
    351	return ret;
    352}
    353
    354/*
    355 * Load up this CPU's radix_tree_node buffer with sufficient objects to
    356 * ensure that the addition of a single element in the tree cannot fail.  On
    357 * success, return zero, with preemption disabled.  On error, return -ENOMEM
    358 * with preemption not disabled.
    359 *
    360 * To make use of this facility, the radix tree must be initialised without
    361 * __GFP_DIRECT_RECLAIM being passed to INIT_RADIX_TREE().
    362 */
    363int radix_tree_preload(gfp_t gfp_mask)
    364{
    365	/* Warn on non-sensical use... */
    366	WARN_ON_ONCE(!gfpflags_allow_blocking(gfp_mask));
    367	return __radix_tree_preload(gfp_mask, RADIX_TREE_PRELOAD_SIZE);
    368}
    369EXPORT_SYMBOL(radix_tree_preload);
    370
    371/*
    372 * The same as above function, except we don't guarantee preloading happens.
    373 * We do it, if we decide it helps. On success, return zero with preemption
    374 * disabled. On error, return -ENOMEM with preemption not disabled.
    375 */
    376int radix_tree_maybe_preload(gfp_t gfp_mask)
    377{
    378	if (gfpflags_allow_blocking(gfp_mask))
    379		return __radix_tree_preload(gfp_mask, RADIX_TREE_PRELOAD_SIZE);
    380	/* Preloading doesn't help anything with this gfp mask, skip it */
    381	local_lock(&radix_tree_preloads.lock);
    382	return 0;
    383}
    384EXPORT_SYMBOL(radix_tree_maybe_preload);
    385
    386static unsigned radix_tree_load_root(const struct radix_tree_root *root,
    387		struct radix_tree_node **nodep, unsigned long *maxindex)
    388{
    389	struct radix_tree_node *node = rcu_dereference_raw(root->xa_head);
    390
    391	*nodep = node;
    392
    393	if (likely(radix_tree_is_internal_node(node))) {
    394		node = entry_to_node(node);
    395		*maxindex = node_maxindex(node);
    396		return node->shift + RADIX_TREE_MAP_SHIFT;
    397	}
    398
    399	*maxindex = 0;
    400	return 0;
    401}
    402
    403/*
    404 *	Extend a radix tree so it can store key @index.
    405 */
    406static int radix_tree_extend(struct radix_tree_root *root, gfp_t gfp,
    407				unsigned long index, unsigned int shift)
    408{
    409	void *entry;
    410	unsigned int maxshift;
    411	int tag;
    412
    413	/* Figure out what the shift should be.  */
    414	maxshift = shift;
    415	while (index > shift_maxindex(maxshift))
    416		maxshift += RADIX_TREE_MAP_SHIFT;
    417
    418	entry = rcu_dereference_raw(root->xa_head);
    419	if (!entry && (!is_idr(root) || root_tag_get(root, IDR_FREE)))
    420		goto out;
    421
    422	do {
    423		struct radix_tree_node *node = radix_tree_node_alloc(gfp, NULL,
    424							root, shift, 0, 1, 0);
    425		if (!node)
    426			return -ENOMEM;
    427
    428		if (is_idr(root)) {
    429			all_tag_set(node, IDR_FREE);
    430			if (!root_tag_get(root, IDR_FREE)) {
    431				tag_clear(node, IDR_FREE, 0);
    432				root_tag_set(root, IDR_FREE);
    433			}
    434		} else {
    435			/* Propagate the aggregated tag info to the new child */
    436			for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++) {
    437				if (root_tag_get(root, tag))
    438					tag_set(node, tag, 0);
    439			}
    440		}
    441
    442		BUG_ON(shift > BITS_PER_LONG);
    443		if (radix_tree_is_internal_node(entry)) {
    444			entry_to_node(entry)->parent = node;
    445		} else if (xa_is_value(entry)) {
    446			/* Moving a value entry root->xa_head to a node */
    447			node->nr_values = 1;
    448		}
    449		/*
    450		 * entry was already in the radix tree, so we do not need
    451		 * rcu_assign_pointer here
    452		 */
    453		node->slots[0] = (void __rcu *)entry;
    454		entry = node_to_entry(node);
    455		rcu_assign_pointer(root->xa_head, entry);
    456		shift += RADIX_TREE_MAP_SHIFT;
    457	} while (shift <= maxshift);
    458out:
    459	return maxshift + RADIX_TREE_MAP_SHIFT;
    460}
    461
    462/**
    463 *	radix_tree_shrink    -    shrink radix tree to minimum height
    464 *	@root:		radix tree root
    465 */
    466static inline bool radix_tree_shrink(struct radix_tree_root *root)
    467{
    468	bool shrunk = false;
    469
    470	for (;;) {
    471		struct radix_tree_node *node = rcu_dereference_raw(root->xa_head);
    472		struct radix_tree_node *child;
    473
    474		if (!radix_tree_is_internal_node(node))
    475			break;
    476		node = entry_to_node(node);
    477
    478		/*
    479		 * The candidate node has more than one child, or its child
    480		 * is not at the leftmost slot, we cannot shrink.
    481		 */
    482		if (node->count != 1)
    483			break;
    484		child = rcu_dereference_raw(node->slots[0]);
    485		if (!child)
    486			break;
    487
    488		/*
    489		 * For an IDR, we must not shrink entry 0 into the root in
    490		 * case somebody calls idr_replace() with a pointer that
    491		 * appears to be an internal entry
    492		 */
    493		if (!node->shift && is_idr(root))
    494			break;
    495
    496		if (radix_tree_is_internal_node(child))
    497			entry_to_node(child)->parent = NULL;
    498
    499		/*
    500		 * We don't need rcu_assign_pointer(), since we are simply
    501		 * moving the node from one part of the tree to another: if it
    502		 * was safe to dereference the old pointer to it
    503		 * (node->slots[0]), it will be safe to dereference the new
    504		 * one (root->xa_head) as far as dependent read barriers go.
    505		 */
    506		root->xa_head = (void __rcu *)child;
    507		if (is_idr(root) && !tag_get(node, IDR_FREE, 0))
    508			root_tag_clear(root, IDR_FREE);
    509
    510		/*
    511		 * We have a dilemma here. The node's slot[0] must not be
    512		 * NULLed in case there are concurrent lookups expecting to
    513		 * find the item. However if this was a bottom-level node,
    514		 * then it may be subject to the slot pointer being visible
    515		 * to callers dereferencing it. If item corresponding to
    516		 * slot[0] is subsequently deleted, these callers would expect
    517		 * their slot to become empty sooner or later.
    518		 *
    519		 * For example, lockless pagecache will look up a slot, deref
    520		 * the page pointer, and if the page has 0 refcount it means it
    521		 * was concurrently deleted from pagecache so try the deref
    522		 * again. Fortunately there is already a requirement for logic
    523		 * to retry the entire slot lookup -- the indirect pointer
    524		 * problem (replacing direct root node with an indirect pointer
    525		 * also results in a stale slot). So tag the slot as indirect
    526		 * to force callers to retry.
    527		 */
    528		node->count = 0;
    529		if (!radix_tree_is_internal_node(child)) {
    530			node->slots[0] = (void __rcu *)RADIX_TREE_RETRY;
    531		}
    532
    533		WARN_ON_ONCE(!list_empty(&node->private_list));
    534		radix_tree_node_free(node);
    535		shrunk = true;
    536	}
    537
    538	return shrunk;
    539}
    540
    541static bool delete_node(struct radix_tree_root *root,
    542			struct radix_tree_node *node)
    543{
    544	bool deleted = false;
    545
    546	do {
    547		struct radix_tree_node *parent;
    548
    549		if (node->count) {
    550			if (node_to_entry(node) ==
    551					rcu_dereference_raw(root->xa_head))
    552				deleted |= radix_tree_shrink(root);
    553			return deleted;
    554		}
    555
    556		parent = node->parent;
    557		if (parent) {
    558			parent->slots[node->offset] = NULL;
    559			parent->count--;
    560		} else {
    561			/*
    562			 * Shouldn't the tags already have all been cleared
    563			 * by the caller?
    564			 */
    565			if (!is_idr(root))
    566				root_tag_clear_all(root);
    567			root->xa_head = NULL;
    568		}
    569
    570		WARN_ON_ONCE(!list_empty(&node->private_list));
    571		radix_tree_node_free(node);
    572		deleted = true;
    573
    574		node = parent;
    575	} while (node);
    576
    577	return deleted;
    578}
    579
    580/**
    581 *	__radix_tree_create	-	create a slot in a radix tree
    582 *	@root:		radix tree root
    583 *	@index:		index key
    584 *	@nodep:		returns node
    585 *	@slotp:		returns slot
    586 *
    587 *	Create, if necessary, and return the node and slot for an item
    588 *	at position @index in the radix tree @root.
    589 *
    590 *	Until there is more than one item in the tree, no nodes are
    591 *	allocated and @root->xa_head is used as a direct slot instead of
    592 *	pointing to a node, in which case *@nodep will be NULL.
    593 *
    594 *	Returns -ENOMEM, or 0 for success.
    595 */
    596static int __radix_tree_create(struct radix_tree_root *root,
    597		unsigned long index, struct radix_tree_node **nodep,
    598		void __rcu ***slotp)
    599{
    600	struct radix_tree_node *node = NULL, *child;
    601	void __rcu **slot = (void __rcu **)&root->xa_head;
    602	unsigned long maxindex;
    603	unsigned int shift, offset = 0;
    604	unsigned long max = index;
    605	gfp_t gfp = root_gfp_mask(root);
    606
    607	shift = radix_tree_load_root(root, &child, &maxindex);
    608
    609	/* Make sure the tree is high enough.  */
    610	if (max > maxindex) {
    611		int error = radix_tree_extend(root, gfp, max, shift);
    612		if (error < 0)
    613			return error;
    614		shift = error;
    615		child = rcu_dereference_raw(root->xa_head);
    616	}
    617
    618	while (shift > 0) {
    619		shift -= RADIX_TREE_MAP_SHIFT;
    620		if (child == NULL) {
    621			/* Have to add a child node.  */
    622			child = radix_tree_node_alloc(gfp, node, root, shift,
    623							offset, 0, 0);
    624			if (!child)
    625				return -ENOMEM;
    626			rcu_assign_pointer(*slot, node_to_entry(child));
    627			if (node)
    628				node->count++;
    629		} else if (!radix_tree_is_internal_node(child))
    630			break;
    631
    632		/* Go a level down */
    633		node = entry_to_node(child);
    634		offset = radix_tree_descend(node, &child, index);
    635		slot = &node->slots[offset];
    636	}
    637
    638	if (nodep)
    639		*nodep = node;
    640	if (slotp)
    641		*slotp = slot;
    642	return 0;
    643}
    644
    645/*
    646 * Free any nodes below this node.  The tree is presumed to not need
    647 * shrinking, and any user data in the tree is presumed to not need a
    648 * destructor called on it.  If we need to add a destructor, we can
    649 * add that functionality later.  Note that we may not clear tags or
    650 * slots from the tree as an RCU walker may still have a pointer into
    651 * this subtree.  We could replace the entries with RADIX_TREE_RETRY,
    652 * but we'll still have to clear those in rcu_free.
    653 */
    654static void radix_tree_free_nodes(struct radix_tree_node *node)
    655{
    656	unsigned offset = 0;
    657	struct radix_tree_node *child = entry_to_node(node);
    658
    659	for (;;) {
    660		void *entry = rcu_dereference_raw(child->slots[offset]);
    661		if (xa_is_node(entry) && child->shift) {
    662			child = entry_to_node(entry);
    663			offset = 0;
    664			continue;
    665		}
    666		offset++;
    667		while (offset == RADIX_TREE_MAP_SIZE) {
    668			struct radix_tree_node *old = child;
    669			offset = child->offset + 1;
    670			child = child->parent;
    671			WARN_ON_ONCE(!list_empty(&old->private_list));
    672			radix_tree_node_free(old);
    673			if (old == entry_to_node(node))
    674				return;
    675		}
    676	}
    677}
    678
    679static inline int insert_entries(struct radix_tree_node *node,
    680		void __rcu **slot, void *item, bool replace)
    681{
    682	if (*slot)
    683		return -EEXIST;
    684	rcu_assign_pointer(*slot, item);
    685	if (node) {
    686		node->count++;
    687		if (xa_is_value(item))
    688			node->nr_values++;
    689	}
    690	return 1;
    691}
    692
    693/**
    694 *	radix_tree_insert    -    insert into a radix tree
    695 *	@root:		radix tree root
    696 *	@index:		index key
    697 *	@item:		item to insert
    698 *
    699 *	Insert an item into the radix tree at position @index.
    700 */
    701int radix_tree_insert(struct radix_tree_root *root, unsigned long index,
    702			void *item)
    703{
    704	struct radix_tree_node *node;
    705	void __rcu **slot;
    706	int error;
    707
    708	BUG_ON(radix_tree_is_internal_node(item));
    709
    710	error = __radix_tree_create(root, index, &node, &slot);
    711	if (error)
    712		return error;
    713
    714	error = insert_entries(node, slot, item, false);
    715	if (error < 0)
    716		return error;
    717
    718	if (node) {
    719		unsigned offset = get_slot_offset(node, slot);
    720		BUG_ON(tag_get(node, 0, offset));
    721		BUG_ON(tag_get(node, 1, offset));
    722		BUG_ON(tag_get(node, 2, offset));
    723	} else {
    724		BUG_ON(root_tags_get(root));
    725	}
    726
    727	return 0;
    728}
    729EXPORT_SYMBOL(radix_tree_insert);
    730
    731/**
    732 *	__radix_tree_lookup	-	lookup an item in a radix tree
    733 *	@root:		radix tree root
    734 *	@index:		index key
    735 *	@nodep:		returns node
    736 *	@slotp:		returns slot
    737 *
    738 *	Lookup and return the item at position @index in the radix
    739 *	tree @root.
    740 *
    741 *	Until there is more than one item in the tree, no nodes are
    742 *	allocated and @root->xa_head is used as a direct slot instead of
    743 *	pointing to a node, in which case *@nodep will be NULL.
    744 */
    745void *__radix_tree_lookup(const struct radix_tree_root *root,
    746			  unsigned long index, struct radix_tree_node **nodep,
    747			  void __rcu ***slotp)
    748{
    749	struct radix_tree_node *node, *parent;
    750	unsigned long maxindex;
    751	void __rcu **slot;
    752
    753 restart:
    754	parent = NULL;
    755	slot = (void __rcu **)&root->xa_head;
    756	radix_tree_load_root(root, &node, &maxindex);
    757	if (index > maxindex)
    758		return NULL;
    759
    760	while (radix_tree_is_internal_node(node)) {
    761		unsigned offset;
    762
    763		parent = entry_to_node(node);
    764		offset = radix_tree_descend(parent, &node, index);
    765		slot = parent->slots + offset;
    766		if (node == RADIX_TREE_RETRY)
    767			goto restart;
    768		if (parent->shift == 0)
    769			break;
    770	}
    771
    772	if (nodep)
    773		*nodep = parent;
    774	if (slotp)
    775		*slotp = slot;
    776	return node;
    777}
    778
    779/**
    780 *	radix_tree_lookup_slot    -    lookup a slot in a radix tree
    781 *	@root:		radix tree root
    782 *	@index:		index key
    783 *
    784 *	Returns:  the slot corresponding to the position @index in the
    785 *	radix tree @root. This is useful for update-if-exists operations.
    786 *
    787 *	This function can be called under rcu_read_lock iff the slot is not
    788 *	modified by radix_tree_replace_slot, otherwise it must be called
    789 *	exclusive from other writers. Any dereference of the slot must be done
    790 *	using radix_tree_deref_slot.
    791 */
    792void __rcu **radix_tree_lookup_slot(const struct radix_tree_root *root,
    793				unsigned long index)
    794{
    795	void __rcu **slot;
    796
    797	if (!__radix_tree_lookup(root, index, NULL, &slot))
    798		return NULL;
    799	return slot;
    800}
    801EXPORT_SYMBOL(radix_tree_lookup_slot);
    802
    803/**
    804 *	radix_tree_lookup    -    perform lookup operation on a radix tree
    805 *	@root:		radix tree root
    806 *	@index:		index key
    807 *
    808 *	Lookup the item at the position @index in the radix tree @root.
    809 *
    810 *	This function can be called under rcu_read_lock, however the caller
    811 *	must manage lifetimes of leaf nodes (eg. RCU may also be used to free
    812 *	them safely). No RCU barriers are required to access or modify the
    813 *	returned item, however.
    814 */
    815void *radix_tree_lookup(const struct radix_tree_root *root, unsigned long index)
    816{
    817	return __radix_tree_lookup(root, index, NULL, NULL);
    818}
    819EXPORT_SYMBOL(radix_tree_lookup);
    820
    821static void replace_slot(void __rcu **slot, void *item,
    822		struct radix_tree_node *node, int count, int values)
    823{
    824	if (node && (count || values)) {
    825		node->count += count;
    826		node->nr_values += values;
    827	}
    828
    829	rcu_assign_pointer(*slot, item);
    830}
    831
    832static bool node_tag_get(const struct radix_tree_root *root,
    833				const struct radix_tree_node *node,
    834				unsigned int tag, unsigned int offset)
    835{
    836	if (node)
    837		return tag_get(node, tag, offset);
    838	return root_tag_get(root, tag);
    839}
    840
    841/*
    842 * IDR users want to be able to store NULL in the tree, so if the slot isn't
    843 * free, don't adjust the count, even if it's transitioning between NULL and
    844 * non-NULL.  For the IDA, we mark slots as being IDR_FREE while they still
    845 * have empty bits, but it only stores NULL in slots when they're being
    846 * deleted.
    847 */
    848static int calculate_count(struct radix_tree_root *root,
    849				struct radix_tree_node *node, void __rcu **slot,
    850				void *item, void *old)
    851{
    852	if (is_idr(root)) {
    853		unsigned offset = get_slot_offset(node, slot);
    854		bool free = node_tag_get(root, node, IDR_FREE, offset);
    855		if (!free)
    856			return 0;
    857		if (!old)
    858			return 1;
    859	}
    860	return !!item - !!old;
    861}
    862
    863/**
    864 * __radix_tree_replace		- replace item in a slot
    865 * @root:		radix tree root
    866 * @node:		pointer to tree node
    867 * @slot:		pointer to slot in @node
    868 * @item:		new item to store in the slot.
    869 *
    870 * For use with __radix_tree_lookup().  Caller must hold tree write locked
    871 * across slot lookup and replacement.
    872 */
    873void __radix_tree_replace(struct radix_tree_root *root,
    874			  struct radix_tree_node *node,
    875			  void __rcu **slot, void *item)
    876{
    877	void *old = rcu_dereference_raw(*slot);
    878	int values = !!xa_is_value(item) - !!xa_is_value(old);
    879	int count = calculate_count(root, node, slot, item, old);
    880
    881	/*
    882	 * This function supports replacing value entries and
    883	 * deleting entries, but that needs accounting against the
    884	 * node unless the slot is root->xa_head.
    885	 */
    886	WARN_ON_ONCE(!node && (slot != (void __rcu **)&root->xa_head) &&
    887			(count || values));
    888	replace_slot(slot, item, node, count, values);
    889
    890	if (!node)
    891		return;
    892
    893	delete_node(root, node);
    894}
    895
    896/**
    897 * radix_tree_replace_slot	- replace item in a slot
    898 * @root:	radix tree root
    899 * @slot:	pointer to slot
    900 * @item:	new item to store in the slot.
    901 *
    902 * For use with radix_tree_lookup_slot() and
    903 * radix_tree_gang_lookup_tag_slot().  Caller must hold tree write locked
    904 * across slot lookup and replacement.
    905 *
    906 * NOTE: This cannot be used to switch between non-entries (empty slots),
    907 * regular entries, and value entries, as that requires accounting
    908 * inside the radix tree node. When switching from one type of entry or
    909 * deleting, use __radix_tree_lookup() and __radix_tree_replace() or
    910 * radix_tree_iter_replace().
    911 */
    912void radix_tree_replace_slot(struct radix_tree_root *root,
    913			     void __rcu **slot, void *item)
    914{
    915	__radix_tree_replace(root, NULL, slot, item);
    916}
    917EXPORT_SYMBOL(radix_tree_replace_slot);
    918
    919/**
    920 * radix_tree_iter_replace - replace item in a slot
    921 * @root:	radix tree root
    922 * @iter:	iterator state
    923 * @slot:	pointer to slot
    924 * @item:	new item to store in the slot.
    925 *
    926 * For use with radix_tree_for_each_slot().
    927 * Caller must hold tree write locked.
    928 */
    929void radix_tree_iter_replace(struct radix_tree_root *root,
    930				const struct radix_tree_iter *iter,
    931				void __rcu **slot, void *item)
    932{
    933	__radix_tree_replace(root, iter->node, slot, item);
    934}
    935
    936static void node_tag_set(struct radix_tree_root *root,
    937				struct radix_tree_node *node,
    938				unsigned int tag, unsigned int offset)
    939{
    940	while (node) {
    941		if (tag_get(node, tag, offset))
    942			return;
    943		tag_set(node, tag, offset);
    944		offset = node->offset;
    945		node = node->parent;
    946	}
    947
    948	if (!root_tag_get(root, tag))
    949		root_tag_set(root, tag);
    950}
    951
    952/**
    953 *	radix_tree_tag_set - set a tag on a radix tree node
    954 *	@root:		radix tree root
    955 *	@index:		index key
    956 *	@tag:		tag index
    957 *
    958 *	Set the search tag (which must be < RADIX_TREE_MAX_TAGS)
    959 *	corresponding to @index in the radix tree.  From
    960 *	the root all the way down to the leaf node.
    961 *
    962 *	Returns the address of the tagged item.  Setting a tag on a not-present
    963 *	item is a bug.
    964 */
    965void *radix_tree_tag_set(struct radix_tree_root *root,
    966			unsigned long index, unsigned int tag)
    967{
    968	struct radix_tree_node *node, *parent;
    969	unsigned long maxindex;
    970
    971	radix_tree_load_root(root, &node, &maxindex);
    972	BUG_ON(index > maxindex);
    973
    974	while (radix_tree_is_internal_node(node)) {
    975		unsigned offset;
    976
    977		parent = entry_to_node(node);
    978		offset = radix_tree_descend(parent, &node, index);
    979		BUG_ON(!node);
    980
    981		if (!tag_get(parent, tag, offset))
    982			tag_set(parent, tag, offset);
    983	}
    984
    985	/* set the root's tag bit */
    986	if (!root_tag_get(root, tag))
    987		root_tag_set(root, tag);
    988
    989	return node;
    990}
    991EXPORT_SYMBOL(radix_tree_tag_set);
    992
    993static void node_tag_clear(struct radix_tree_root *root,
    994				struct radix_tree_node *node,
    995				unsigned int tag, unsigned int offset)
    996{
    997	while (node) {
    998		if (!tag_get(node, tag, offset))
    999			return;
   1000		tag_clear(node, tag, offset);
   1001		if (any_tag_set(node, tag))
   1002			return;
   1003
   1004		offset = node->offset;
   1005		node = node->parent;
   1006	}
   1007
   1008	/* clear the root's tag bit */
   1009	if (root_tag_get(root, tag))
   1010		root_tag_clear(root, tag);
   1011}
   1012
   1013/**
   1014 *	radix_tree_tag_clear - clear a tag on a radix tree node
   1015 *	@root:		radix tree root
   1016 *	@index:		index key
   1017 *	@tag:		tag index
   1018 *
   1019 *	Clear the search tag (which must be < RADIX_TREE_MAX_TAGS)
   1020 *	corresponding to @index in the radix tree.  If this causes
   1021 *	the leaf node to have no tags set then clear the tag in the
   1022 *	next-to-leaf node, etc.
   1023 *
   1024 *	Returns the address of the tagged item on success, else NULL.  ie:
   1025 *	has the same return value and semantics as radix_tree_lookup().
   1026 */
   1027void *radix_tree_tag_clear(struct radix_tree_root *root,
   1028			unsigned long index, unsigned int tag)
   1029{
   1030	struct radix_tree_node *node, *parent;
   1031	unsigned long maxindex;
   1032	int offset;
   1033
   1034	radix_tree_load_root(root, &node, &maxindex);
   1035	if (index > maxindex)
   1036		return NULL;
   1037
   1038	parent = NULL;
   1039
   1040	while (radix_tree_is_internal_node(node)) {
   1041		parent = entry_to_node(node);
   1042		offset = radix_tree_descend(parent, &node, index);
   1043	}
   1044
   1045	if (node)
   1046		node_tag_clear(root, parent, tag, offset);
   1047
   1048	return node;
   1049}
   1050EXPORT_SYMBOL(radix_tree_tag_clear);
   1051
   1052/**
   1053  * radix_tree_iter_tag_clear - clear a tag on the current iterator entry
   1054  * @root: radix tree root
   1055  * @iter: iterator state
   1056  * @tag: tag to clear
   1057  */
   1058void radix_tree_iter_tag_clear(struct radix_tree_root *root,
   1059			const struct radix_tree_iter *iter, unsigned int tag)
   1060{
   1061	node_tag_clear(root, iter->node, tag, iter_offset(iter));
   1062}
   1063
   1064/**
   1065 * radix_tree_tag_get - get a tag on a radix tree node
   1066 * @root:		radix tree root
   1067 * @index:		index key
   1068 * @tag:		tag index (< RADIX_TREE_MAX_TAGS)
   1069 *
   1070 * Return values:
   1071 *
   1072 *  0: tag not present or not set
   1073 *  1: tag set
   1074 *
   1075 * Note that the return value of this function may not be relied on, even if
   1076 * the RCU lock is held, unless tag modification and node deletion are excluded
   1077 * from concurrency.
   1078 */
   1079int radix_tree_tag_get(const struct radix_tree_root *root,
   1080			unsigned long index, unsigned int tag)
   1081{
   1082	struct radix_tree_node *node, *parent;
   1083	unsigned long maxindex;
   1084
   1085	if (!root_tag_get(root, tag))
   1086		return 0;
   1087
   1088	radix_tree_load_root(root, &node, &maxindex);
   1089	if (index > maxindex)
   1090		return 0;
   1091
   1092	while (radix_tree_is_internal_node(node)) {
   1093		unsigned offset;
   1094
   1095		parent = entry_to_node(node);
   1096		offset = radix_tree_descend(parent, &node, index);
   1097
   1098		if (!tag_get(parent, tag, offset))
   1099			return 0;
   1100		if (node == RADIX_TREE_RETRY)
   1101			break;
   1102	}
   1103
   1104	return 1;
   1105}
   1106EXPORT_SYMBOL(radix_tree_tag_get);
   1107
   1108/* Construct iter->tags bit-mask from node->tags[tag] array */
   1109static void set_iter_tags(struct radix_tree_iter *iter,
   1110				struct radix_tree_node *node, unsigned offset,
   1111				unsigned tag)
   1112{
   1113	unsigned tag_long = offset / BITS_PER_LONG;
   1114	unsigned tag_bit  = offset % BITS_PER_LONG;
   1115
   1116	if (!node) {
   1117		iter->tags = 1;
   1118		return;
   1119	}
   1120
   1121	iter->tags = node->tags[tag][tag_long] >> tag_bit;
   1122
   1123	/* This never happens if RADIX_TREE_TAG_LONGS == 1 */
   1124	if (tag_long < RADIX_TREE_TAG_LONGS - 1) {
   1125		/* Pick tags from next element */
   1126		if (tag_bit)
   1127			iter->tags |= node->tags[tag][tag_long + 1] <<
   1128						(BITS_PER_LONG - tag_bit);
   1129		/* Clip chunk size, here only BITS_PER_LONG tags */
   1130		iter->next_index = __radix_tree_iter_add(iter, BITS_PER_LONG);
   1131	}
   1132}
   1133
   1134void __rcu **radix_tree_iter_resume(void __rcu **slot,
   1135					struct radix_tree_iter *iter)
   1136{
   1137	slot++;
   1138	iter->index = __radix_tree_iter_add(iter, 1);
   1139	iter->next_index = iter->index;
   1140	iter->tags = 0;
   1141	return NULL;
   1142}
   1143EXPORT_SYMBOL(radix_tree_iter_resume);
   1144
   1145/**
   1146 * radix_tree_next_chunk - find next chunk of slots for iteration
   1147 *
   1148 * @root:	radix tree root
   1149 * @iter:	iterator state
   1150 * @flags:	RADIX_TREE_ITER_* flags and tag index
   1151 * Returns:	pointer to chunk first slot, or NULL if iteration is over
   1152 */
   1153void __rcu **radix_tree_next_chunk(const struct radix_tree_root *root,
   1154			     struct radix_tree_iter *iter, unsigned flags)
   1155{
   1156	unsigned tag = flags & RADIX_TREE_ITER_TAG_MASK;
   1157	struct radix_tree_node *node, *child;
   1158	unsigned long index, offset, maxindex;
   1159
   1160	if ((flags & RADIX_TREE_ITER_TAGGED) && !root_tag_get(root, tag))
   1161		return NULL;
   1162
   1163	/*
   1164	 * Catch next_index overflow after ~0UL. iter->index never overflows
   1165	 * during iterating; it can be zero only at the beginning.
   1166	 * And we cannot overflow iter->next_index in a single step,
   1167	 * because RADIX_TREE_MAP_SHIFT < BITS_PER_LONG.
   1168	 *
   1169	 * This condition also used by radix_tree_next_slot() to stop
   1170	 * contiguous iterating, and forbid switching to the next chunk.
   1171	 */
   1172	index = iter->next_index;
   1173	if (!index && iter->index)
   1174		return NULL;
   1175
   1176 restart:
   1177	radix_tree_load_root(root, &child, &maxindex);
   1178	if (index > maxindex)
   1179		return NULL;
   1180	if (!child)
   1181		return NULL;
   1182
   1183	if (!radix_tree_is_internal_node(child)) {
   1184		/* Single-slot tree */
   1185		iter->index = index;
   1186		iter->next_index = maxindex + 1;
   1187		iter->tags = 1;
   1188		iter->node = NULL;
   1189		return (void __rcu **)&root->xa_head;
   1190	}
   1191
   1192	do {
   1193		node = entry_to_node(child);
   1194		offset = radix_tree_descend(node, &child, index);
   1195
   1196		if ((flags & RADIX_TREE_ITER_TAGGED) ?
   1197				!tag_get(node, tag, offset) : !child) {
   1198			/* Hole detected */
   1199			if (flags & RADIX_TREE_ITER_CONTIG)
   1200				return NULL;
   1201
   1202			if (flags & RADIX_TREE_ITER_TAGGED)
   1203				offset = radix_tree_find_next_bit(node, tag,
   1204						offset + 1);
   1205			else
   1206				while (++offset	< RADIX_TREE_MAP_SIZE) {
   1207					void *slot = rcu_dereference_raw(
   1208							node->slots[offset]);
   1209					if (slot)
   1210						break;
   1211				}
   1212			index &= ~node_maxindex(node);
   1213			index += offset << node->shift;
   1214			/* Overflow after ~0UL */
   1215			if (!index)
   1216				return NULL;
   1217			if (offset == RADIX_TREE_MAP_SIZE)
   1218				goto restart;
   1219			child = rcu_dereference_raw(node->slots[offset]);
   1220		}
   1221
   1222		if (!child)
   1223			goto restart;
   1224		if (child == RADIX_TREE_RETRY)
   1225			break;
   1226	} while (node->shift && radix_tree_is_internal_node(child));
   1227
   1228	/* Update the iterator state */
   1229	iter->index = (index &~ node_maxindex(node)) | offset;
   1230	iter->next_index = (index | node_maxindex(node)) + 1;
   1231	iter->node = node;
   1232
   1233	if (flags & RADIX_TREE_ITER_TAGGED)
   1234		set_iter_tags(iter, node, offset, tag);
   1235
   1236	return node->slots + offset;
   1237}
   1238EXPORT_SYMBOL(radix_tree_next_chunk);
   1239
   1240/**
   1241 *	radix_tree_gang_lookup - perform multiple lookup on a radix tree
   1242 *	@root:		radix tree root
   1243 *	@results:	where the results of the lookup are placed
   1244 *	@first_index:	start the lookup from this key
   1245 *	@max_items:	place up to this many items at *results
   1246 *
   1247 *	Performs an index-ascending scan of the tree for present items.  Places
   1248 *	them at *@results and returns the number of items which were placed at
   1249 *	*@results.
   1250 *
   1251 *	The implementation is naive.
   1252 *
   1253 *	Like radix_tree_lookup, radix_tree_gang_lookup may be called under
   1254 *	rcu_read_lock. In this case, rather than the returned results being
   1255 *	an atomic snapshot of the tree at a single point in time, the
   1256 *	semantics of an RCU protected gang lookup are as though multiple
   1257 *	radix_tree_lookups have been issued in individual locks, and results
   1258 *	stored in 'results'.
   1259 */
   1260unsigned int
   1261radix_tree_gang_lookup(const struct radix_tree_root *root, void **results,
   1262			unsigned long first_index, unsigned int max_items)
   1263{
   1264	struct radix_tree_iter iter;
   1265	void __rcu **slot;
   1266	unsigned int ret = 0;
   1267
   1268	if (unlikely(!max_items))
   1269		return 0;
   1270
   1271	radix_tree_for_each_slot(slot, root, &iter, first_index) {
   1272		results[ret] = rcu_dereference_raw(*slot);
   1273		if (!results[ret])
   1274			continue;
   1275		if (radix_tree_is_internal_node(results[ret])) {
   1276			slot = radix_tree_iter_retry(&iter);
   1277			continue;
   1278		}
   1279		if (++ret == max_items)
   1280			break;
   1281	}
   1282
   1283	return ret;
   1284}
   1285EXPORT_SYMBOL(radix_tree_gang_lookup);
   1286
   1287/**
   1288 *	radix_tree_gang_lookup_tag - perform multiple lookup on a radix tree
   1289 *	                             based on a tag
   1290 *	@root:		radix tree root
   1291 *	@results:	where the results of the lookup are placed
   1292 *	@first_index:	start the lookup from this key
   1293 *	@max_items:	place up to this many items at *results
   1294 *	@tag:		the tag index (< RADIX_TREE_MAX_TAGS)
   1295 *
   1296 *	Performs an index-ascending scan of the tree for present items which
   1297 *	have the tag indexed by @tag set.  Places the items at *@results and
   1298 *	returns the number of items which were placed at *@results.
   1299 */
   1300unsigned int
   1301radix_tree_gang_lookup_tag(const struct radix_tree_root *root, void **results,
   1302		unsigned long first_index, unsigned int max_items,
   1303		unsigned int tag)
   1304{
   1305	struct radix_tree_iter iter;
   1306	void __rcu **slot;
   1307	unsigned int ret = 0;
   1308
   1309	if (unlikely(!max_items))
   1310		return 0;
   1311
   1312	radix_tree_for_each_tagged(slot, root, &iter, first_index, tag) {
   1313		results[ret] = rcu_dereference_raw(*slot);
   1314		if (!results[ret])
   1315			continue;
   1316		if (radix_tree_is_internal_node(results[ret])) {
   1317			slot = radix_tree_iter_retry(&iter);
   1318			continue;
   1319		}
   1320		if (++ret == max_items)
   1321			break;
   1322	}
   1323
   1324	return ret;
   1325}
   1326EXPORT_SYMBOL(radix_tree_gang_lookup_tag);
   1327
   1328/**
   1329 *	radix_tree_gang_lookup_tag_slot - perform multiple slot lookup on a
   1330 *					  radix tree based on a tag
   1331 *	@root:		radix tree root
   1332 *	@results:	where the results of the lookup are placed
   1333 *	@first_index:	start the lookup from this key
   1334 *	@max_items:	place up to this many items at *results
   1335 *	@tag:		the tag index (< RADIX_TREE_MAX_TAGS)
   1336 *
   1337 *	Performs an index-ascending scan of the tree for present items which
   1338 *	have the tag indexed by @tag set.  Places the slots at *@results and
   1339 *	returns the number of slots which were placed at *@results.
   1340 */
   1341unsigned int
   1342radix_tree_gang_lookup_tag_slot(const struct radix_tree_root *root,
   1343		void __rcu ***results, unsigned long first_index,
   1344		unsigned int max_items, unsigned int tag)
   1345{
   1346	struct radix_tree_iter iter;
   1347	void __rcu **slot;
   1348	unsigned int ret = 0;
   1349
   1350	if (unlikely(!max_items))
   1351		return 0;
   1352
   1353	radix_tree_for_each_tagged(slot, root, &iter, first_index, tag) {
   1354		results[ret] = slot;
   1355		if (++ret == max_items)
   1356			break;
   1357	}
   1358
   1359	return ret;
   1360}
   1361EXPORT_SYMBOL(radix_tree_gang_lookup_tag_slot);
   1362
   1363static bool __radix_tree_delete(struct radix_tree_root *root,
   1364				struct radix_tree_node *node, void __rcu **slot)
   1365{
   1366	void *old = rcu_dereference_raw(*slot);
   1367	int values = xa_is_value(old) ? -1 : 0;
   1368	unsigned offset = get_slot_offset(node, slot);
   1369	int tag;
   1370
   1371	if (is_idr(root))
   1372		node_tag_set(root, node, IDR_FREE, offset);
   1373	else
   1374		for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++)
   1375			node_tag_clear(root, node, tag, offset);
   1376
   1377	replace_slot(slot, NULL, node, -1, values);
   1378	return node && delete_node(root, node);
   1379}
   1380
   1381/**
   1382 * radix_tree_iter_delete - delete the entry at this iterator position
   1383 * @root: radix tree root
   1384 * @iter: iterator state
   1385 * @slot: pointer to slot
   1386 *
   1387 * Delete the entry at the position currently pointed to by the iterator.
   1388 * This may result in the current node being freed; if it is, the iterator
   1389 * is advanced so that it will not reference the freed memory.  This
   1390 * function may be called without any locking if there are no other threads
   1391 * which can access this tree.
   1392 */
   1393void radix_tree_iter_delete(struct radix_tree_root *root,
   1394				struct radix_tree_iter *iter, void __rcu **slot)
   1395{
   1396	if (__radix_tree_delete(root, iter->node, slot))
   1397		iter->index = iter->next_index;
   1398}
   1399EXPORT_SYMBOL(radix_tree_iter_delete);
   1400
   1401/**
   1402 * radix_tree_delete_item - delete an item from a radix tree
   1403 * @root: radix tree root
   1404 * @index: index key
   1405 * @item: expected item
   1406 *
   1407 * Remove @item at @index from the radix tree rooted at @root.
   1408 *
   1409 * Return: the deleted entry, or %NULL if it was not present
   1410 * or the entry at the given @index was not @item.
   1411 */
   1412void *radix_tree_delete_item(struct radix_tree_root *root,
   1413			     unsigned long index, void *item)
   1414{
   1415	struct radix_tree_node *node = NULL;
   1416	void __rcu **slot = NULL;
   1417	void *entry;
   1418
   1419	entry = __radix_tree_lookup(root, index, &node, &slot);
   1420	if (!slot)
   1421		return NULL;
   1422	if (!entry && (!is_idr(root) || node_tag_get(root, node, IDR_FREE,
   1423						get_slot_offset(node, slot))))
   1424		return NULL;
   1425
   1426	if (item && entry != item)
   1427		return NULL;
   1428
   1429	__radix_tree_delete(root, node, slot);
   1430
   1431	return entry;
   1432}
   1433EXPORT_SYMBOL(radix_tree_delete_item);
   1434
   1435/**
   1436 * radix_tree_delete - delete an entry from a radix tree
   1437 * @root: radix tree root
   1438 * @index: index key
   1439 *
   1440 * Remove the entry at @index from the radix tree rooted at @root.
   1441 *
   1442 * Return: The deleted entry, or %NULL if it was not present.
   1443 */
   1444void *radix_tree_delete(struct radix_tree_root *root, unsigned long index)
   1445{
   1446	return radix_tree_delete_item(root, index, NULL);
   1447}
   1448EXPORT_SYMBOL(radix_tree_delete);
   1449
   1450/**
   1451 *	radix_tree_tagged - test whether any items in the tree are tagged
   1452 *	@root:		radix tree root
   1453 *	@tag:		tag to test
   1454 */
   1455int radix_tree_tagged(const struct radix_tree_root *root, unsigned int tag)
   1456{
   1457	return root_tag_get(root, tag);
   1458}
   1459EXPORT_SYMBOL(radix_tree_tagged);
   1460
   1461/**
   1462 * idr_preload - preload for idr_alloc()
   1463 * @gfp_mask: allocation mask to use for preloading
   1464 *
   1465 * Preallocate memory to use for the next call to idr_alloc().  This function
   1466 * returns with preemption disabled.  It will be enabled by idr_preload_end().
   1467 */
   1468void idr_preload(gfp_t gfp_mask)
   1469{
   1470	if (__radix_tree_preload(gfp_mask, IDR_PRELOAD_SIZE))
   1471		local_lock(&radix_tree_preloads.lock);
   1472}
   1473EXPORT_SYMBOL(idr_preload);
   1474
   1475void __rcu **idr_get_free(struct radix_tree_root *root,
   1476			      struct radix_tree_iter *iter, gfp_t gfp,
   1477			      unsigned long max)
   1478{
   1479	struct radix_tree_node *node = NULL, *child;
   1480	void __rcu **slot = (void __rcu **)&root->xa_head;
   1481	unsigned long maxindex, start = iter->next_index;
   1482	unsigned int shift, offset = 0;
   1483
   1484 grow:
   1485	shift = radix_tree_load_root(root, &child, &maxindex);
   1486	if (!radix_tree_tagged(root, IDR_FREE))
   1487		start = max(start, maxindex + 1);
   1488	if (start > max)
   1489		return ERR_PTR(-ENOSPC);
   1490
   1491	if (start > maxindex) {
   1492		int error = radix_tree_extend(root, gfp, start, shift);
   1493		if (error < 0)
   1494			return ERR_PTR(error);
   1495		shift = error;
   1496		child = rcu_dereference_raw(root->xa_head);
   1497	}
   1498	if (start == 0 && shift == 0)
   1499		shift = RADIX_TREE_MAP_SHIFT;
   1500
   1501	while (shift) {
   1502		shift -= RADIX_TREE_MAP_SHIFT;
   1503		if (child == NULL) {
   1504			/* Have to add a child node.  */
   1505			child = radix_tree_node_alloc(gfp, node, root, shift,
   1506							offset, 0, 0);
   1507			if (!child)
   1508				return ERR_PTR(-ENOMEM);
   1509			all_tag_set(child, IDR_FREE);
   1510			rcu_assign_pointer(*slot, node_to_entry(child));
   1511			if (node)
   1512				node->count++;
   1513		} else if (!radix_tree_is_internal_node(child))
   1514			break;
   1515
   1516		node = entry_to_node(child);
   1517		offset = radix_tree_descend(node, &child, start);
   1518		if (!tag_get(node, IDR_FREE, offset)) {
   1519			offset = radix_tree_find_next_bit(node, IDR_FREE,
   1520							offset + 1);
   1521			start = next_index(start, node, offset);
   1522			if (start > max || start == 0)
   1523				return ERR_PTR(-ENOSPC);
   1524			while (offset == RADIX_TREE_MAP_SIZE) {
   1525				offset = node->offset + 1;
   1526				node = node->parent;
   1527				if (!node)
   1528					goto grow;
   1529				shift = node->shift;
   1530			}
   1531			child = rcu_dereference_raw(node->slots[offset]);
   1532		}
   1533		slot = &node->slots[offset];
   1534	}
   1535
   1536	iter->index = start;
   1537	if (node)
   1538		iter->next_index = 1 + min(max, (start | node_maxindex(node)));
   1539	else
   1540		iter->next_index = 1;
   1541	iter->node = node;
   1542	set_iter_tags(iter, node, offset, IDR_FREE);
   1543
   1544	return slot;
   1545}
   1546
   1547/**
   1548 * idr_destroy - release all internal memory from an IDR
   1549 * @idr: idr handle
   1550 *
   1551 * After this function is called, the IDR is empty, and may be reused or
   1552 * the data structure containing it may be freed.
   1553 *
   1554 * A typical clean-up sequence for objects stored in an idr tree will use
   1555 * idr_for_each() to free all objects, if necessary, then idr_destroy() to
   1556 * free the memory used to keep track of those objects.
   1557 */
   1558void idr_destroy(struct idr *idr)
   1559{
   1560	struct radix_tree_node *node = rcu_dereference_raw(idr->idr_rt.xa_head);
   1561	if (radix_tree_is_internal_node(node))
   1562		radix_tree_free_nodes(node);
   1563	idr->idr_rt.xa_head = NULL;
   1564	root_tag_set(&idr->idr_rt, IDR_FREE);
   1565}
   1566EXPORT_SYMBOL(idr_destroy);
   1567
   1568static void
   1569radix_tree_node_ctor(void *arg)
   1570{
   1571	struct radix_tree_node *node = arg;
   1572
   1573	memset(node, 0, sizeof(*node));
   1574	INIT_LIST_HEAD(&node->private_list);
   1575}
   1576
   1577static int radix_tree_cpu_dead(unsigned int cpu)
   1578{
   1579	struct radix_tree_preload *rtp;
   1580	struct radix_tree_node *node;
   1581
   1582	/* Free per-cpu pool of preloaded nodes */
   1583	rtp = &per_cpu(radix_tree_preloads, cpu);
   1584	while (rtp->nr) {
   1585		node = rtp->nodes;
   1586		rtp->nodes = node->parent;
   1587		kmem_cache_free(radix_tree_node_cachep, node);
   1588		rtp->nr--;
   1589	}
   1590	return 0;
   1591}
   1592
   1593void __init radix_tree_init(void)
   1594{
   1595	int ret;
   1596
   1597	BUILD_BUG_ON(RADIX_TREE_MAX_TAGS + __GFP_BITS_SHIFT > 32);
   1598	BUILD_BUG_ON(ROOT_IS_IDR & ~GFP_ZONEMASK);
   1599	BUILD_BUG_ON(XA_CHUNK_SIZE > 255);
   1600	radix_tree_node_cachep = kmem_cache_create("radix_tree_node",
   1601			sizeof(struct radix_tree_node), 0,
   1602			SLAB_PANIC | SLAB_RECLAIM_ACCOUNT,
   1603			radix_tree_node_ctor);
   1604	ret = cpuhp_setup_state_nocalls(CPUHP_RADIX_DEAD, "lib/radix:dead",
   1605					NULL, radix_tree_cpu_dead);
   1606	WARN_ON(ret < 0);
   1607}