cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

assoc_array.c (53133B)


      1// SPDX-License-Identifier: GPL-2.0-or-later
      2/* Generic associative array implementation.
      3 *
      4 * See Documentation/core-api/assoc_array.rst for information.
      5 *
      6 * Copyright (C) 2013 Red Hat, Inc. All Rights Reserved.
      7 * Written by David Howells (dhowells@redhat.com)
      8 */
      9//#define DEBUG
     10#include <linux/rcupdate.h>
     11#include <linux/slab.h>
     12#include <linux/err.h>
     13#include <linux/assoc_array_priv.h>
     14
     15/*
     16 * Iterate over an associative array.  The caller must hold the RCU read lock
     17 * or better.
     18 */
     19static int assoc_array_subtree_iterate(const struct assoc_array_ptr *root,
     20				       const struct assoc_array_ptr *stop,
     21				       int (*iterator)(const void *leaf,
     22						       void *iterator_data),
     23				       void *iterator_data)
     24{
     25	const struct assoc_array_shortcut *shortcut;
     26	const struct assoc_array_node *node;
     27	const struct assoc_array_ptr *cursor, *ptr, *parent;
     28	unsigned long has_meta;
     29	int slot, ret;
     30
     31	cursor = root;
     32
     33begin_node:
     34	if (assoc_array_ptr_is_shortcut(cursor)) {
     35		/* Descend through a shortcut */
     36		shortcut = assoc_array_ptr_to_shortcut(cursor);
     37		cursor = READ_ONCE(shortcut->next_node); /* Address dependency. */
     38	}
     39
     40	node = assoc_array_ptr_to_node(cursor);
     41	slot = 0;
     42
     43	/* We perform two passes of each node.
     44	 *
     45	 * The first pass does all the leaves in this node.  This means we
     46	 * don't miss any leaves if the node is split up by insertion whilst
     47	 * we're iterating over the branches rooted here (we may, however, see
     48	 * some leaves twice).
     49	 */
     50	has_meta = 0;
     51	for (; slot < ASSOC_ARRAY_FAN_OUT; slot++) {
     52		ptr = READ_ONCE(node->slots[slot]); /* Address dependency. */
     53		has_meta |= (unsigned long)ptr;
     54		if (ptr && assoc_array_ptr_is_leaf(ptr)) {
     55			/* We need a barrier between the read of the pointer,
     56			 * which is supplied by the above READ_ONCE().
     57			 */
     58			/* Invoke the callback */
     59			ret = iterator(assoc_array_ptr_to_leaf(ptr),
     60				       iterator_data);
     61			if (ret)
     62				return ret;
     63		}
     64	}
     65
     66	/* The second pass attends to all the metadata pointers.  If we follow
     67	 * one of these we may find that we don't come back here, but rather go
     68	 * back to a replacement node with the leaves in a different layout.
     69	 *
     70	 * We are guaranteed to make progress, however, as the slot number for
     71	 * a particular portion of the key space cannot change - and we
     72	 * continue at the back pointer + 1.
     73	 */
     74	if (!(has_meta & ASSOC_ARRAY_PTR_META_TYPE))
     75		goto finished_node;
     76	slot = 0;
     77
     78continue_node:
     79	node = assoc_array_ptr_to_node(cursor);
     80	for (; slot < ASSOC_ARRAY_FAN_OUT; slot++) {
     81		ptr = READ_ONCE(node->slots[slot]); /* Address dependency. */
     82		if (assoc_array_ptr_is_meta(ptr)) {
     83			cursor = ptr;
     84			goto begin_node;
     85		}
     86	}
     87
     88finished_node:
     89	/* Move up to the parent (may need to skip back over a shortcut) */
     90	parent = READ_ONCE(node->back_pointer); /* Address dependency. */
     91	slot = node->parent_slot;
     92	if (parent == stop)
     93		return 0;
     94
     95	if (assoc_array_ptr_is_shortcut(parent)) {
     96		shortcut = assoc_array_ptr_to_shortcut(parent);
     97		cursor = parent;
     98		parent = READ_ONCE(shortcut->back_pointer); /* Address dependency. */
     99		slot = shortcut->parent_slot;
    100		if (parent == stop)
    101			return 0;
    102	}
    103
    104	/* Ascend to next slot in parent node */
    105	cursor = parent;
    106	slot++;
    107	goto continue_node;
    108}
    109
    110/**
    111 * assoc_array_iterate - Pass all objects in the array to a callback
    112 * @array: The array to iterate over.
    113 * @iterator: The callback function.
    114 * @iterator_data: Private data for the callback function.
    115 *
    116 * Iterate over all the objects in an associative array.  Each one will be
    117 * presented to the iterator function.
    118 *
    119 * If the array is being modified concurrently with the iteration then it is
    120 * possible that some objects in the array will be passed to the iterator
    121 * callback more than once - though every object should be passed at least
    122 * once.  If this is undesirable then the caller must lock against modification
    123 * for the duration of this function.
    124 *
    125 * The function will return 0 if no objects were in the array or else it will
    126 * return the result of the last iterator function called.  Iteration stops
    127 * immediately if any call to the iteration function results in a non-zero
    128 * return.
    129 *
    130 * The caller should hold the RCU read lock or better if concurrent
    131 * modification is possible.
    132 */
    133int assoc_array_iterate(const struct assoc_array *array,
    134			int (*iterator)(const void *object,
    135					void *iterator_data),
    136			void *iterator_data)
    137{
    138	struct assoc_array_ptr *root = READ_ONCE(array->root); /* Address dependency. */
    139
    140	if (!root)
    141		return 0;
    142	return assoc_array_subtree_iterate(root, NULL, iterator, iterator_data);
    143}
    144
    145enum assoc_array_walk_status {
    146	assoc_array_walk_tree_empty,
    147	assoc_array_walk_found_terminal_node,
    148	assoc_array_walk_found_wrong_shortcut,
    149};
    150
    151struct assoc_array_walk_result {
    152	struct {
    153		struct assoc_array_node	*node;	/* Node in which leaf might be found */
    154		int		level;
    155		int		slot;
    156	} terminal_node;
    157	struct {
    158		struct assoc_array_shortcut *shortcut;
    159		int		level;
    160		int		sc_level;
    161		unsigned long	sc_segments;
    162		unsigned long	dissimilarity;
    163	} wrong_shortcut;
    164};
    165
    166/*
    167 * Navigate through the internal tree looking for the closest node to the key.
    168 */
    169static enum assoc_array_walk_status
    170assoc_array_walk(const struct assoc_array *array,
    171		 const struct assoc_array_ops *ops,
    172		 const void *index_key,
    173		 struct assoc_array_walk_result *result)
    174{
    175	struct assoc_array_shortcut *shortcut;
    176	struct assoc_array_node *node;
    177	struct assoc_array_ptr *cursor, *ptr;
    178	unsigned long sc_segments, dissimilarity;
    179	unsigned long segments;
    180	int level, sc_level, next_sc_level;
    181	int slot;
    182
    183	pr_devel("-->%s()\n", __func__);
    184
    185	cursor = READ_ONCE(array->root);  /* Address dependency. */
    186	if (!cursor)
    187		return assoc_array_walk_tree_empty;
    188
    189	level = 0;
    190
    191	/* Use segments from the key for the new leaf to navigate through the
    192	 * internal tree, skipping through nodes and shortcuts that are on
    193	 * route to the destination.  Eventually we'll come to a slot that is
    194	 * either empty or contains a leaf at which point we've found a node in
    195	 * which the leaf we're looking for might be found or into which it
    196	 * should be inserted.
    197	 */
    198jumped:
    199	segments = ops->get_key_chunk(index_key, level);
    200	pr_devel("segments[%d]: %lx\n", level, segments);
    201
    202	if (assoc_array_ptr_is_shortcut(cursor))
    203		goto follow_shortcut;
    204
    205consider_node:
    206	node = assoc_array_ptr_to_node(cursor);
    207	slot = segments >> (level & ASSOC_ARRAY_KEY_CHUNK_MASK);
    208	slot &= ASSOC_ARRAY_FAN_MASK;
    209	ptr = READ_ONCE(node->slots[slot]); /* Address dependency. */
    210
    211	pr_devel("consider slot %x [ix=%d type=%lu]\n",
    212		 slot, level, (unsigned long)ptr & 3);
    213
    214	if (!assoc_array_ptr_is_meta(ptr)) {
    215		/* The node doesn't have a node/shortcut pointer in the slot
    216		 * corresponding to the index key that we have to follow.
    217		 */
    218		result->terminal_node.node = node;
    219		result->terminal_node.level = level;
    220		result->terminal_node.slot = slot;
    221		pr_devel("<--%s() = terminal_node\n", __func__);
    222		return assoc_array_walk_found_terminal_node;
    223	}
    224
    225	if (assoc_array_ptr_is_node(ptr)) {
    226		/* There is a pointer to a node in the slot corresponding to
    227		 * this index key segment, so we need to follow it.
    228		 */
    229		cursor = ptr;
    230		level += ASSOC_ARRAY_LEVEL_STEP;
    231		if ((level & ASSOC_ARRAY_KEY_CHUNK_MASK) != 0)
    232			goto consider_node;
    233		goto jumped;
    234	}
    235
    236	/* There is a shortcut in the slot corresponding to the index key
    237	 * segment.  We follow the shortcut if its partial index key matches
    238	 * this leaf's.  Otherwise we need to split the shortcut.
    239	 */
    240	cursor = ptr;
    241follow_shortcut:
    242	shortcut = assoc_array_ptr_to_shortcut(cursor);
    243	pr_devel("shortcut to %d\n", shortcut->skip_to_level);
    244	sc_level = level + ASSOC_ARRAY_LEVEL_STEP;
    245	BUG_ON(sc_level > shortcut->skip_to_level);
    246
    247	do {
    248		/* Check the leaf against the shortcut's index key a word at a
    249		 * time, trimming the final word (the shortcut stores the index
    250		 * key completely from the root to the shortcut's target).
    251		 */
    252		if ((sc_level & ASSOC_ARRAY_KEY_CHUNK_MASK) == 0)
    253			segments = ops->get_key_chunk(index_key, sc_level);
    254
    255		sc_segments = shortcut->index_key[sc_level >> ASSOC_ARRAY_KEY_CHUNK_SHIFT];
    256		dissimilarity = segments ^ sc_segments;
    257
    258		if (round_up(sc_level, ASSOC_ARRAY_KEY_CHUNK_SIZE) > shortcut->skip_to_level) {
    259			/* Trim segments that are beyond the shortcut */
    260			int shift = shortcut->skip_to_level & ASSOC_ARRAY_KEY_CHUNK_MASK;
    261			dissimilarity &= ~(ULONG_MAX << shift);
    262			next_sc_level = shortcut->skip_to_level;
    263		} else {
    264			next_sc_level = sc_level + ASSOC_ARRAY_KEY_CHUNK_SIZE;
    265			next_sc_level = round_down(next_sc_level, ASSOC_ARRAY_KEY_CHUNK_SIZE);
    266		}
    267
    268		if (dissimilarity != 0) {
    269			/* This shortcut points elsewhere */
    270			result->wrong_shortcut.shortcut = shortcut;
    271			result->wrong_shortcut.level = level;
    272			result->wrong_shortcut.sc_level = sc_level;
    273			result->wrong_shortcut.sc_segments = sc_segments;
    274			result->wrong_shortcut.dissimilarity = dissimilarity;
    275			return assoc_array_walk_found_wrong_shortcut;
    276		}
    277
    278		sc_level = next_sc_level;
    279	} while (sc_level < shortcut->skip_to_level);
    280
    281	/* The shortcut matches the leaf's index to this point. */
    282	cursor = READ_ONCE(shortcut->next_node); /* Address dependency. */
    283	if (((level ^ sc_level) & ~ASSOC_ARRAY_KEY_CHUNK_MASK) != 0) {
    284		level = sc_level;
    285		goto jumped;
    286	} else {
    287		level = sc_level;
    288		goto consider_node;
    289	}
    290}
    291
    292/**
    293 * assoc_array_find - Find an object by index key
    294 * @array: The associative array to search.
    295 * @ops: The operations to use.
    296 * @index_key: The key to the object.
    297 *
    298 * Find an object in an associative array by walking through the internal tree
    299 * to the node that should contain the object and then searching the leaves
    300 * there.  NULL is returned if the requested object was not found in the array.
    301 *
    302 * The caller must hold the RCU read lock or better.
    303 */
    304void *assoc_array_find(const struct assoc_array *array,
    305		       const struct assoc_array_ops *ops,
    306		       const void *index_key)
    307{
    308	struct assoc_array_walk_result result;
    309	const struct assoc_array_node *node;
    310	const struct assoc_array_ptr *ptr;
    311	const void *leaf;
    312	int slot;
    313
    314	if (assoc_array_walk(array, ops, index_key, &result) !=
    315	    assoc_array_walk_found_terminal_node)
    316		return NULL;
    317
    318	node = result.terminal_node.node;
    319
    320	/* If the target key is available to us, it's has to be pointed to by
    321	 * the terminal node.
    322	 */
    323	for (slot = 0; slot < ASSOC_ARRAY_FAN_OUT; slot++) {
    324		ptr = READ_ONCE(node->slots[slot]); /* Address dependency. */
    325		if (ptr && assoc_array_ptr_is_leaf(ptr)) {
    326			/* We need a barrier between the read of the pointer
    327			 * and dereferencing the pointer - but only if we are
    328			 * actually going to dereference it.
    329			 */
    330			leaf = assoc_array_ptr_to_leaf(ptr);
    331			if (ops->compare_object(leaf, index_key))
    332				return (void *)leaf;
    333		}
    334	}
    335
    336	return NULL;
    337}
    338
    339/*
    340 * Destructively iterate over an associative array.  The caller must prevent
    341 * other simultaneous accesses.
    342 */
    343static void assoc_array_destroy_subtree(struct assoc_array_ptr *root,
    344					const struct assoc_array_ops *ops)
    345{
    346	struct assoc_array_shortcut *shortcut;
    347	struct assoc_array_node *node;
    348	struct assoc_array_ptr *cursor, *parent = NULL;
    349	int slot = -1;
    350
    351	pr_devel("-->%s()\n", __func__);
    352
    353	cursor = root;
    354	if (!cursor) {
    355		pr_devel("empty\n");
    356		return;
    357	}
    358
    359move_to_meta:
    360	if (assoc_array_ptr_is_shortcut(cursor)) {
    361		/* Descend through a shortcut */
    362		pr_devel("[%d] shortcut\n", slot);
    363		BUG_ON(!assoc_array_ptr_is_shortcut(cursor));
    364		shortcut = assoc_array_ptr_to_shortcut(cursor);
    365		BUG_ON(shortcut->back_pointer != parent);
    366		BUG_ON(slot != -1 && shortcut->parent_slot != slot);
    367		parent = cursor;
    368		cursor = shortcut->next_node;
    369		slot = -1;
    370		BUG_ON(!assoc_array_ptr_is_node(cursor));
    371	}
    372
    373	pr_devel("[%d] node\n", slot);
    374	node = assoc_array_ptr_to_node(cursor);
    375	BUG_ON(node->back_pointer != parent);
    376	BUG_ON(slot != -1 && node->parent_slot != slot);
    377	slot = 0;
    378
    379continue_node:
    380	pr_devel("Node %p [back=%p]\n", node, node->back_pointer);
    381	for (; slot < ASSOC_ARRAY_FAN_OUT; slot++) {
    382		struct assoc_array_ptr *ptr = node->slots[slot];
    383		if (!ptr)
    384			continue;
    385		if (assoc_array_ptr_is_meta(ptr)) {
    386			parent = cursor;
    387			cursor = ptr;
    388			goto move_to_meta;
    389		}
    390
    391		if (ops) {
    392			pr_devel("[%d] free leaf\n", slot);
    393			ops->free_object(assoc_array_ptr_to_leaf(ptr));
    394		}
    395	}
    396
    397	parent = node->back_pointer;
    398	slot = node->parent_slot;
    399	pr_devel("free node\n");
    400	kfree(node);
    401	if (!parent)
    402		return; /* Done */
    403
    404	/* Move back up to the parent (may need to free a shortcut on
    405	 * the way up) */
    406	if (assoc_array_ptr_is_shortcut(parent)) {
    407		shortcut = assoc_array_ptr_to_shortcut(parent);
    408		BUG_ON(shortcut->next_node != cursor);
    409		cursor = parent;
    410		parent = shortcut->back_pointer;
    411		slot = shortcut->parent_slot;
    412		pr_devel("free shortcut\n");
    413		kfree(shortcut);
    414		if (!parent)
    415			return;
    416
    417		BUG_ON(!assoc_array_ptr_is_node(parent));
    418	}
    419
    420	/* Ascend to next slot in parent node */
    421	pr_devel("ascend to %p[%d]\n", parent, slot);
    422	cursor = parent;
    423	node = assoc_array_ptr_to_node(cursor);
    424	slot++;
    425	goto continue_node;
    426}
    427
    428/**
    429 * assoc_array_destroy - Destroy an associative array
    430 * @array: The array to destroy.
    431 * @ops: The operations to use.
    432 *
    433 * Discard all metadata and free all objects in an associative array.  The
    434 * array will be empty and ready to use again upon completion.  This function
    435 * cannot fail.
    436 *
    437 * The caller must prevent all other accesses whilst this takes place as no
    438 * attempt is made to adjust pointers gracefully to permit RCU readlock-holding
    439 * accesses to continue.  On the other hand, no memory allocation is required.
    440 */
    441void assoc_array_destroy(struct assoc_array *array,
    442			 const struct assoc_array_ops *ops)
    443{
    444	assoc_array_destroy_subtree(array->root, ops);
    445	array->root = NULL;
    446}
    447
    448/*
    449 * Handle insertion into an empty tree.
    450 */
    451static bool assoc_array_insert_in_empty_tree(struct assoc_array_edit *edit)
    452{
    453	struct assoc_array_node *new_n0;
    454
    455	pr_devel("-->%s()\n", __func__);
    456
    457	new_n0 = kzalloc(sizeof(struct assoc_array_node), GFP_KERNEL);
    458	if (!new_n0)
    459		return false;
    460
    461	edit->new_meta[0] = assoc_array_node_to_ptr(new_n0);
    462	edit->leaf_p = &new_n0->slots[0];
    463	edit->adjust_count_on = new_n0;
    464	edit->set[0].ptr = &edit->array->root;
    465	edit->set[0].to = assoc_array_node_to_ptr(new_n0);
    466
    467	pr_devel("<--%s() = ok [no root]\n", __func__);
    468	return true;
    469}
    470
    471/*
    472 * Handle insertion into a terminal node.
    473 */
    474static bool assoc_array_insert_into_terminal_node(struct assoc_array_edit *edit,
    475						  const struct assoc_array_ops *ops,
    476						  const void *index_key,
    477						  struct assoc_array_walk_result *result)
    478{
    479	struct assoc_array_shortcut *shortcut, *new_s0;
    480	struct assoc_array_node *node, *new_n0, *new_n1, *side;
    481	struct assoc_array_ptr *ptr;
    482	unsigned long dissimilarity, base_seg, blank;
    483	size_t keylen;
    484	bool have_meta;
    485	int level, diff;
    486	int slot, next_slot, free_slot, i, j;
    487
    488	node	= result->terminal_node.node;
    489	level	= result->terminal_node.level;
    490	edit->segment_cache[ASSOC_ARRAY_FAN_OUT] = result->terminal_node.slot;
    491
    492	pr_devel("-->%s()\n", __func__);
    493
    494	/* We arrived at a node which doesn't have an onward node or shortcut
    495	 * pointer that we have to follow.  This means that (a) the leaf we
    496	 * want must go here (either by insertion or replacement) or (b) we
    497	 * need to split this node and insert in one of the fragments.
    498	 */
    499	free_slot = -1;
    500
    501	/* Firstly, we have to check the leaves in this node to see if there's
    502	 * a matching one we should replace in place.
    503	 */
    504	for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++) {
    505		ptr = node->slots[i];
    506		if (!ptr) {
    507			free_slot = i;
    508			continue;
    509		}
    510		if (assoc_array_ptr_is_leaf(ptr) &&
    511		    ops->compare_object(assoc_array_ptr_to_leaf(ptr),
    512					index_key)) {
    513			pr_devel("replace in slot %d\n", i);
    514			edit->leaf_p = &node->slots[i];
    515			edit->dead_leaf = node->slots[i];
    516			pr_devel("<--%s() = ok [replace]\n", __func__);
    517			return true;
    518		}
    519	}
    520
    521	/* If there is a free slot in this node then we can just insert the
    522	 * leaf here.
    523	 */
    524	if (free_slot >= 0) {
    525		pr_devel("insert in free slot %d\n", free_slot);
    526		edit->leaf_p = &node->slots[free_slot];
    527		edit->adjust_count_on = node;
    528		pr_devel("<--%s() = ok [insert]\n", __func__);
    529		return true;
    530	}
    531
    532	/* The node has no spare slots - so we're either going to have to split
    533	 * it or insert another node before it.
    534	 *
    535	 * Whatever, we're going to need at least two new nodes - so allocate
    536	 * those now.  We may also need a new shortcut, but we deal with that
    537	 * when we need it.
    538	 */
    539	new_n0 = kzalloc(sizeof(struct assoc_array_node), GFP_KERNEL);
    540	if (!new_n0)
    541		return false;
    542	edit->new_meta[0] = assoc_array_node_to_ptr(new_n0);
    543	new_n1 = kzalloc(sizeof(struct assoc_array_node), GFP_KERNEL);
    544	if (!new_n1)
    545		return false;
    546	edit->new_meta[1] = assoc_array_node_to_ptr(new_n1);
    547
    548	/* We need to find out how similar the leaves are. */
    549	pr_devel("no spare slots\n");
    550	have_meta = false;
    551	for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++) {
    552		ptr = node->slots[i];
    553		if (assoc_array_ptr_is_meta(ptr)) {
    554			edit->segment_cache[i] = 0xff;
    555			have_meta = true;
    556			continue;
    557		}
    558		base_seg = ops->get_object_key_chunk(
    559			assoc_array_ptr_to_leaf(ptr), level);
    560		base_seg >>= level & ASSOC_ARRAY_KEY_CHUNK_MASK;
    561		edit->segment_cache[i] = base_seg & ASSOC_ARRAY_FAN_MASK;
    562	}
    563
    564	if (have_meta) {
    565		pr_devel("have meta\n");
    566		goto split_node;
    567	}
    568
    569	/* The node contains only leaves */
    570	dissimilarity = 0;
    571	base_seg = edit->segment_cache[0];
    572	for (i = 1; i < ASSOC_ARRAY_FAN_OUT; i++)
    573		dissimilarity |= edit->segment_cache[i] ^ base_seg;
    574
    575	pr_devel("only leaves; dissimilarity=%lx\n", dissimilarity);
    576
    577	if ((dissimilarity & ASSOC_ARRAY_FAN_MASK) == 0) {
    578		/* The old leaves all cluster in the same slot.  We will need
    579		 * to insert a shortcut if the new node wants to cluster with them.
    580		 */
    581		if ((edit->segment_cache[ASSOC_ARRAY_FAN_OUT] ^ base_seg) == 0)
    582			goto all_leaves_cluster_together;
    583
    584		/* Otherwise all the old leaves cluster in the same slot, but
    585		 * the new leaf wants to go into a different slot - so we
    586		 * create a new node (n0) to hold the new leaf and a pointer to
    587		 * a new node (n1) holding all the old leaves.
    588		 *
    589		 * This can be done by falling through to the node splitting
    590		 * path.
    591		 */
    592		pr_devel("present leaves cluster but not new leaf\n");
    593	}
    594
    595split_node:
    596	pr_devel("split node\n");
    597
    598	/* We need to split the current node.  The node must contain anything
    599	 * from a single leaf (in the one leaf case, this leaf will cluster
    600	 * with the new leaf) and the rest meta-pointers, to all leaves, some
    601	 * of which may cluster.
    602	 *
    603	 * It won't contain the case in which all the current leaves plus the
    604	 * new leaves want to cluster in the same slot.
    605	 *
    606	 * We need to expel at least two leaves out of a set consisting of the
    607	 * leaves in the node and the new leaf.  The current meta pointers can
    608	 * just be copied as they shouldn't cluster with any of the leaves.
    609	 *
    610	 * We need a new node (n0) to replace the current one and a new node to
    611	 * take the expelled nodes (n1).
    612	 */
    613	edit->set[0].to = assoc_array_node_to_ptr(new_n0);
    614	new_n0->back_pointer = node->back_pointer;
    615	new_n0->parent_slot = node->parent_slot;
    616	new_n1->back_pointer = assoc_array_node_to_ptr(new_n0);
    617	new_n1->parent_slot = -1; /* Need to calculate this */
    618
    619do_split_node:
    620	pr_devel("do_split_node\n");
    621
    622	new_n0->nr_leaves_on_branch = node->nr_leaves_on_branch;
    623	new_n1->nr_leaves_on_branch = 0;
    624
    625	/* Begin by finding two matching leaves.  There have to be at least two
    626	 * that match - even if there are meta pointers - because any leaf that
    627	 * would match a slot with a meta pointer in it must be somewhere
    628	 * behind that meta pointer and cannot be here.  Further, given N
    629	 * remaining leaf slots, we now have N+1 leaves to go in them.
    630	 */
    631	for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++) {
    632		slot = edit->segment_cache[i];
    633		if (slot != 0xff)
    634			for (j = i + 1; j < ASSOC_ARRAY_FAN_OUT + 1; j++)
    635				if (edit->segment_cache[j] == slot)
    636					goto found_slot_for_multiple_occupancy;
    637	}
    638found_slot_for_multiple_occupancy:
    639	pr_devel("same slot: %x %x [%02x]\n", i, j, slot);
    640	BUG_ON(i >= ASSOC_ARRAY_FAN_OUT);
    641	BUG_ON(j >= ASSOC_ARRAY_FAN_OUT + 1);
    642	BUG_ON(slot >= ASSOC_ARRAY_FAN_OUT);
    643
    644	new_n1->parent_slot = slot;
    645
    646	/* Metadata pointers cannot change slot */
    647	for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++)
    648		if (assoc_array_ptr_is_meta(node->slots[i]))
    649			new_n0->slots[i] = node->slots[i];
    650		else
    651			new_n0->slots[i] = NULL;
    652	BUG_ON(new_n0->slots[slot] != NULL);
    653	new_n0->slots[slot] = assoc_array_node_to_ptr(new_n1);
    654
    655	/* Filter the leaf pointers between the new nodes */
    656	free_slot = -1;
    657	next_slot = 0;
    658	for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++) {
    659		if (assoc_array_ptr_is_meta(node->slots[i]))
    660			continue;
    661		if (edit->segment_cache[i] == slot) {
    662			new_n1->slots[next_slot++] = node->slots[i];
    663			new_n1->nr_leaves_on_branch++;
    664		} else {
    665			do {
    666				free_slot++;
    667			} while (new_n0->slots[free_slot] != NULL);
    668			new_n0->slots[free_slot] = node->slots[i];
    669		}
    670	}
    671
    672	pr_devel("filtered: f=%x n=%x\n", free_slot, next_slot);
    673
    674	if (edit->segment_cache[ASSOC_ARRAY_FAN_OUT] != slot) {
    675		do {
    676			free_slot++;
    677		} while (new_n0->slots[free_slot] != NULL);
    678		edit->leaf_p = &new_n0->slots[free_slot];
    679		edit->adjust_count_on = new_n0;
    680	} else {
    681		edit->leaf_p = &new_n1->slots[next_slot++];
    682		edit->adjust_count_on = new_n1;
    683	}
    684
    685	BUG_ON(next_slot <= 1);
    686
    687	edit->set_backpointers_to = assoc_array_node_to_ptr(new_n0);
    688	for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++) {
    689		if (edit->segment_cache[i] == 0xff) {
    690			ptr = node->slots[i];
    691			BUG_ON(assoc_array_ptr_is_leaf(ptr));
    692			if (assoc_array_ptr_is_node(ptr)) {
    693				side = assoc_array_ptr_to_node(ptr);
    694				edit->set_backpointers[i] = &side->back_pointer;
    695			} else {
    696				shortcut = assoc_array_ptr_to_shortcut(ptr);
    697				edit->set_backpointers[i] = &shortcut->back_pointer;
    698			}
    699		}
    700	}
    701
    702	ptr = node->back_pointer;
    703	if (!ptr)
    704		edit->set[0].ptr = &edit->array->root;
    705	else if (assoc_array_ptr_is_node(ptr))
    706		edit->set[0].ptr = &assoc_array_ptr_to_node(ptr)->slots[node->parent_slot];
    707	else
    708		edit->set[0].ptr = &assoc_array_ptr_to_shortcut(ptr)->next_node;
    709	edit->excised_meta[0] = assoc_array_node_to_ptr(node);
    710	pr_devel("<--%s() = ok [split node]\n", __func__);
    711	return true;
    712
    713all_leaves_cluster_together:
    714	/* All the leaves, new and old, want to cluster together in this node
    715	 * in the same slot, so we have to replace this node with a shortcut to
    716	 * skip over the identical parts of the key and then place a pair of
    717	 * nodes, one inside the other, at the end of the shortcut and
    718	 * distribute the keys between them.
    719	 *
    720	 * Firstly we need to work out where the leaves start diverging as a
    721	 * bit position into their keys so that we know how big the shortcut
    722	 * needs to be.
    723	 *
    724	 * We only need to make a single pass of N of the N+1 leaves because if
    725	 * any keys differ between themselves at bit X then at least one of
    726	 * them must also differ with the base key at bit X or before.
    727	 */
    728	pr_devel("all leaves cluster together\n");
    729	diff = INT_MAX;
    730	for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++) {
    731		int x = ops->diff_objects(assoc_array_ptr_to_leaf(node->slots[i]),
    732					  index_key);
    733		if (x < diff) {
    734			BUG_ON(x < 0);
    735			diff = x;
    736		}
    737	}
    738	BUG_ON(diff == INT_MAX);
    739	BUG_ON(diff < level + ASSOC_ARRAY_LEVEL_STEP);
    740
    741	keylen = round_up(diff, ASSOC_ARRAY_KEY_CHUNK_SIZE);
    742	keylen >>= ASSOC_ARRAY_KEY_CHUNK_SHIFT;
    743
    744	new_s0 = kzalloc(struct_size(new_s0, index_key, keylen), GFP_KERNEL);
    745	if (!new_s0)
    746		return false;
    747	edit->new_meta[2] = assoc_array_shortcut_to_ptr(new_s0);
    748
    749	edit->set[0].to = assoc_array_shortcut_to_ptr(new_s0);
    750	new_s0->back_pointer = node->back_pointer;
    751	new_s0->parent_slot = node->parent_slot;
    752	new_s0->next_node = assoc_array_node_to_ptr(new_n0);
    753	new_n0->back_pointer = assoc_array_shortcut_to_ptr(new_s0);
    754	new_n0->parent_slot = 0;
    755	new_n1->back_pointer = assoc_array_node_to_ptr(new_n0);
    756	new_n1->parent_slot = -1; /* Need to calculate this */
    757
    758	new_s0->skip_to_level = level = diff & ~ASSOC_ARRAY_LEVEL_STEP_MASK;
    759	pr_devel("skip_to_level = %d [diff %d]\n", level, diff);
    760	BUG_ON(level <= 0);
    761
    762	for (i = 0; i < keylen; i++)
    763		new_s0->index_key[i] =
    764			ops->get_key_chunk(index_key, i * ASSOC_ARRAY_KEY_CHUNK_SIZE);
    765
    766	if (level & ASSOC_ARRAY_KEY_CHUNK_MASK) {
    767		blank = ULONG_MAX << (level & ASSOC_ARRAY_KEY_CHUNK_MASK);
    768		pr_devel("blank off [%zu] %d: %lx\n", keylen - 1, level, blank);
    769		new_s0->index_key[keylen - 1] &= ~blank;
    770	}
    771
    772	/* This now reduces to a node splitting exercise for which we'll need
    773	 * to regenerate the disparity table.
    774	 */
    775	for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++) {
    776		ptr = node->slots[i];
    777		base_seg = ops->get_object_key_chunk(assoc_array_ptr_to_leaf(ptr),
    778						     level);
    779		base_seg >>= level & ASSOC_ARRAY_KEY_CHUNK_MASK;
    780		edit->segment_cache[i] = base_seg & ASSOC_ARRAY_FAN_MASK;
    781	}
    782
    783	base_seg = ops->get_key_chunk(index_key, level);
    784	base_seg >>= level & ASSOC_ARRAY_KEY_CHUNK_MASK;
    785	edit->segment_cache[ASSOC_ARRAY_FAN_OUT] = base_seg & ASSOC_ARRAY_FAN_MASK;
    786	goto do_split_node;
    787}
    788
    789/*
    790 * Handle insertion into the middle of a shortcut.
    791 */
    792static bool assoc_array_insert_mid_shortcut(struct assoc_array_edit *edit,
    793					    const struct assoc_array_ops *ops,
    794					    struct assoc_array_walk_result *result)
    795{
    796	struct assoc_array_shortcut *shortcut, *new_s0, *new_s1;
    797	struct assoc_array_node *node, *new_n0, *side;
    798	unsigned long sc_segments, dissimilarity, blank;
    799	size_t keylen;
    800	int level, sc_level, diff;
    801	int sc_slot;
    802
    803	shortcut	= result->wrong_shortcut.shortcut;
    804	level		= result->wrong_shortcut.level;
    805	sc_level	= result->wrong_shortcut.sc_level;
    806	sc_segments	= result->wrong_shortcut.sc_segments;
    807	dissimilarity	= result->wrong_shortcut.dissimilarity;
    808
    809	pr_devel("-->%s(ix=%d dis=%lx scix=%d)\n",
    810		 __func__, level, dissimilarity, sc_level);
    811
    812	/* We need to split a shortcut and insert a node between the two
    813	 * pieces.  Zero-length pieces will be dispensed with entirely.
    814	 *
    815	 * First of all, we need to find out in which level the first
    816	 * difference was.
    817	 */
    818	diff = __ffs(dissimilarity);
    819	diff &= ~ASSOC_ARRAY_LEVEL_STEP_MASK;
    820	diff += sc_level & ~ASSOC_ARRAY_KEY_CHUNK_MASK;
    821	pr_devel("diff=%d\n", diff);
    822
    823	if (!shortcut->back_pointer) {
    824		edit->set[0].ptr = &edit->array->root;
    825	} else if (assoc_array_ptr_is_node(shortcut->back_pointer)) {
    826		node = assoc_array_ptr_to_node(shortcut->back_pointer);
    827		edit->set[0].ptr = &node->slots[shortcut->parent_slot];
    828	} else {
    829		BUG();
    830	}
    831
    832	edit->excised_meta[0] = assoc_array_shortcut_to_ptr(shortcut);
    833
    834	/* Create a new node now since we're going to need it anyway */
    835	new_n0 = kzalloc(sizeof(struct assoc_array_node), GFP_KERNEL);
    836	if (!new_n0)
    837		return false;
    838	edit->new_meta[0] = assoc_array_node_to_ptr(new_n0);
    839	edit->adjust_count_on = new_n0;
    840
    841	/* Insert a new shortcut before the new node if this segment isn't of
    842	 * zero length - otherwise we just connect the new node directly to the
    843	 * parent.
    844	 */
    845	level += ASSOC_ARRAY_LEVEL_STEP;
    846	if (diff > level) {
    847		pr_devel("pre-shortcut %d...%d\n", level, diff);
    848		keylen = round_up(diff, ASSOC_ARRAY_KEY_CHUNK_SIZE);
    849		keylen >>= ASSOC_ARRAY_KEY_CHUNK_SHIFT;
    850
    851		new_s0 = kzalloc(struct_size(new_s0, index_key, keylen),
    852				 GFP_KERNEL);
    853		if (!new_s0)
    854			return false;
    855		edit->new_meta[1] = assoc_array_shortcut_to_ptr(new_s0);
    856		edit->set[0].to = assoc_array_shortcut_to_ptr(new_s0);
    857		new_s0->back_pointer = shortcut->back_pointer;
    858		new_s0->parent_slot = shortcut->parent_slot;
    859		new_s0->next_node = assoc_array_node_to_ptr(new_n0);
    860		new_s0->skip_to_level = diff;
    861
    862		new_n0->back_pointer = assoc_array_shortcut_to_ptr(new_s0);
    863		new_n0->parent_slot = 0;
    864
    865		memcpy(new_s0->index_key, shortcut->index_key,
    866		       flex_array_size(new_s0, index_key, keylen));
    867
    868		blank = ULONG_MAX << (diff & ASSOC_ARRAY_KEY_CHUNK_MASK);
    869		pr_devel("blank off [%zu] %d: %lx\n", keylen - 1, diff, blank);
    870		new_s0->index_key[keylen - 1] &= ~blank;
    871	} else {
    872		pr_devel("no pre-shortcut\n");
    873		edit->set[0].to = assoc_array_node_to_ptr(new_n0);
    874		new_n0->back_pointer = shortcut->back_pointer;
    875		new_n0->parent_slot = shortcut->parent_slot;
    876	}
    877
    878	side = assoc_array_ptr_to_node(shortcut->next_node);
    879	new_n0->nr_leaves_on_branch = side->nr_leaves_on_branch;
    880
    881	/* We need to know which slot in the new node is going to take a
    882	 * metadata pointer.
    883	 */
    884	sc_slot = sc_segments >> (diff & ASSOC_ARRAY_KEY_CHUNK_MASK);
    885	sc_slot &= ASSOC_ARRAY_FAN_MASK;
    886
    887	pr_devel("new slot %lx >> %d -> %d\n",
    888		 sc_segments, diff & ASSOC_ARRAY_KEY_CHUNK_MASK, sc_slot);
    889
    890	/* Determine whether we need to follow the new node with a replacement
    891	 * for the current shortcut.  We could in theory reuse the current
    892	 * shortcut if its parent slot number doesn't change - but that's a
    893	 * 1-in-16 chance so not worth expending the code upon.
    894	 */
    895	level = diff + ASSOC_ARRAY_LEVEL_STEP;
    896	if (level < shortcut->skip_to_level) {
    897		pr_devel("post-shortcut %d...%d\n", level, shortcut->skip_to_level);
    898		keylen = round_up(shortcut->skip_to_level, ASSOC_ARRAY_KEY_CHUNK_SIZE);
    899		keylen >>= ASSOC_ARRAY_KEY_CHUNK_SHIFT;
    900
    901		new_s1 = kzalloc(struct_size(new_s1, index_key, keylen),
    902				 GFP_KERNEL);
    903		if (!new_s1)
    904			return false;
    905		edit->new_meta[2] = assoc_array_shortcut_to_ptr(new_s1);
    906
    907		new_s1->back_pointer = assoc_array_node_to_ptr(new_n0);
    908		new_s1->parent_slot = sc_slot;
    909		new_s1->next_node = shortcut->next_node;
    910		new_s1->skip_to_level = shortcut->skip_to_level;
    911
    912		new_n0->slots[sc_slot] = assoc_array_shortcut_to_ptr(new_s1);
    913
    914		memcpy(new_s1->index_key, shortcut->index_key,
    915		       flex_array_size(new_s1, index_key, keylen));
    916
    917		edit->set[1].ptr = &side->back_pointer;
    918		edit->set[1].to = assoc_array_shortcut_to_ptr(new_s1);
    919	} else {
    920		pr_devel("no post-shortcut\n");
    921
    922		/* We don't have to replace the pointed-to node as long as we
    923		 * use memory barriers to make sure the parent slot number is
    924		 * changed before the back pointer (the parent slot number is
    925		 * irrelevant to the old parent shortcut).
    926		 */
    927		new_n0->slots[sc_slot] = shortcut->next_node;
    928		edit->set_parent_slot[0].p = &side->parent_slot;
    929		edit->set_parent_slot[0].to = sc_slot;
    930		edit->set[1].ptr = &side->back_pointer;
    931		edit->set[1].to = assoc_array_node_to_ptr(new_n0);
    932	}
    933
    934	/* Install the new leaf in a spare slot in the new node. */
    935	if (sc_slot == 0)
    936		edit->leaf_p = &new_n0->slots[1];
    937	else
    938		edit->leaf_p = &new_n0->slots[0];
    939
    940	pr_devel("<--%s() = ok [split shortcut]\n", __func__);
    941	return edit;
    942}
    943
    944/**
    945 * assoc_array_insert - Script insertion of an object into an associative array
    946 * @array: The array to insert into.
    947 * @ops: The operations to use.
    948 * @index_key: The key to insert at.
    949 * @object: The object to insert.
    950 *
    951 * Precalculate and preallocate a script for the insertion or replacement of an
    952 * object in an associative array.  This results in an edit script that can
    953 * either be applied or cancelled.
    954 *
    955 * The function returns a pointer to an edit script or -ENOMEM.
    956 *
    957 * The caller should lock against other modifications and must continue to hold
    958 * the lock until assoc_array_apply_edit() has been called.
    959 *
    960 * Accesses to the tree may take place concurrently with this function,
    961 * provided they hold the RCU read lock.
    962 */
    963struct assoc_array_edit *assoc_array_insert(struct assoc_array *array,
    964					    const struct assoc_array_ops *ops,
    965					    const void *index_key,
    966					    void *object)
    967{
    968	struct assoc_array_walk_result result;
    969	struct assoc_array_edit *edit;
    970
    971	pr_devel("-->%s()\n", __func__);
    972
    973	/* The leaf pointer we're given must not have the bottom bit set as we
    974	 * use those for type-marking the pointer.  NULL pointers are also not
    975	 * allowed as they indicate an empty slot but we have to allow them
    976	 * here as they can be updated later.
    977	 */
    978	BUG_ON(assoc_array_ptr_is_meta(object));
    979
    980	edit = kzalloc(sizeof(struct assoc_array_edit), GFP_KERNEL);
    981	if (!edit)
    982		return ERR_PTR(-ENOMEM);
    983	edit->array = array;
    984	edit->ops = ops;
    985	edit->leaf = assoc_array_leaf_to_ptr(object);
    986	edit->adjust_count_by = 1;
    987
    988	switch (assoc_array_walk(array, ops, index_key, &result)) {
    989	case assoc_array_walk_tree_empty:
    990		/* Allocate a root node if there isn't one yet */
    991		if (!assoc_array_insert_in_empty_tree(edit))
    992			goto enomem;
    993		return edit;
    994
    995	case assoc_array_walk_found_terminal_node:
    996		/* We found a node that doesn't have a node/shortcut pointer in
    997		 * the slot corresponding to the index key that we have to
    998		 * follow.
    999		 */
   1000		if (!assoc_array_insert_into_terminal_node(edit, ops, index_key,
   1001							   &result))
   1002			goto enomem;
   1003		return edit;
   1004
   1005	case assoc_array_walk_found_wrong_shortcut:
   1006		/* We found a shortcut that didn't match our key in a slot we
   1007		 * needed to follow.
   1008		 */
   1009		if (!assoc_array_insert_mid_shortcut(edit, ops, &result))
   1010			goto enomem;
   1011		return edit;
   1012	}
   1013
   1014enomem:
   1015	/* Clean up after an out of memory error */
   1016	pr_devel("enomem\n");
   1017	assoc_array_cancel_edit(edit);
   1018	return ERR_PTR(-ENOMEM);
   1019}
   1020
   1021/**
   1022 * assoc_array_insert_set_object - Set the new object pointer in an edit script
   1023 * @edit: The edit script to modify.
   1024 * @object: The object pointer to set.
   1025 *
   1026 * Change the object to be inserted in an edit script.  The object pointed to
   1027 * by the old object is not freed.  This must be done prior to applying the
   1028 * script.
   1029 */
   1030void assoc_array_insert_set_object(struct assoc_array_edit *edit, void *object)
   1031{
   1032	BUG_ON(!object);
   1033	edit->leaf = assoc_array_leaf_to_ptr(object);
   1034}
   1035
   1036struct assoc_array_delete_collapse_context {
   1037	struct assoc_array_node	*node;
   1038	const void		*skip_leaf;
   1039	int			slot;
   1040};
   1041
   1042/*
   1043 * Subtree collapse to node iterator.
   1044 */
   1045static int assoc_array_delete_collapse_iterator(const void *leaf,
   1046						void *iterator_data)
   1047{
   1048	struct assoc_array_delete_collapse_context *collapse = iterator_data;
   1049
   1050	if (leaf == collapse->skip_leaf)
   1051		return 0;
   1052
   1053	BUG_ON(collapse->slot >= ASSOC_ARRAY_FAN_OUT);
   1054
   1055	collapse->node->slots[collapse->slot++] = assoc_array_leaf_to_ptr(leaf);
   1056	return 0;
   1057}
   1058
   1059/**
   1060 * assoc_array_delete - Script deletion of an object from an associative array
   1061 * @array: The array to search.
   1062 * @ops: The operations to use.
   1063 * @index_key: The key to the object.
   1064 *
   1065 * Precalculate and preallocate a script for the deletion of an object from an
   1066 * associative array.  This results in an edit script that can either be
   1067 * applied or cancelled.
   1068 *
   1069 * The function returns a pointer to an edit script if the object was found,
   1070 * NULL if the object was not found or -ENOMEM.
   1071 *
   1072 * The caller should lock against other modifications and must continue to hold
   1073 * the lock until assoc_array_apply_edit() has been called.
   1074 *
   1075 * Accesses to the tree may take place concurrently with this function,
   1076 * provided they hold the RCU read lock.
   1077 */
   1078struct assoc_array_edit *assoc_array_delete(struct assoc_array *array,
   1079					    const struct assoc_array_ops *ops,
   1080					    const void *index_key)
   1081{
   1082	struct assoc_array_delete_collapse_context collapse;
   1083	struct assoc_array_walk_result result;
   1084	struct assoc_array_node *node, *new_n0;
   1085	struct assoc_array_edit *edit;
   1086	struct assoc_array_ptr *ptr;
   1087	bool has_meta;
   1088	int slot, i;
   1089
   1090	pr_devel("-->%s()\n", __func__);
   1091
   1092	edit = kzalloc(sizeof(struct assoc_array_edit), GFP_KERNEL);
   1093	if (!edit)
   1094		return ERR_PTR(-ENOMEM);
   1095	edit->array = array;
   1096	edit->ops = ops;
   1097	edit->adjust_count_by = -1;
   1098
   1099	switch (assoc_array_walk(array, ops, index_key, &result)) {
   1100	case assoc_array_walk_found_terminal_node:
   1101		/* We found a node that should contain the leaf we've been
   1102		 * asked to remove - *if* it's in the tree.
   1103		 */
   1104		pr_devel("terminal_node\n");
   1105		node = result.terminal_node.node;
   1106
   1107		for (slot = 0; slot < ASSOC_ARRAY_FAN_OUT; slot++) {
   1108			ptr = node->slots[slot];
   1109			if (ptr &&
   1110			    assoc_array_ptr_is_leaf(ptr) &&
   1111			    ops->compare_object(assoc_array_ptr_to_leaf(ptr),
   1112						index_key))
   1113				goto found_leaf;
   1114		}
   1115		fallthrough;
   1116	case assoc_array_walk_tree_empty:
   1117	case assoc_array_walk_found_wrong_shortcut:
   1118	default:
   1119		assoc_array_cancel_edit(edit);
   1120		pr_devel("not found\n");
   1121		return NULL;
   1122	}
   1123
   1124found_leaf:
   1125	BUG_ON(array->nr_leaves_on_tree <= 0);
   1126
   1127	/* In the simplest form of deletion we just clear the slot and release
   1128	 * the leaf after a suitable interval.
   1129	 */
   1130	edit->dead_leaf = node->slots[slot];
   1131	edit->set[0].ptr = &node->slots[slot];
   1132	edit->set[0].to = NULL;
   1133	edit->adjust_count_on = node;
   1134
   1135	/* If that concludes erasure of the last leaf, then delete the entire
   1136	 * internal array.
   1137	 */
   1138	if (array->nr_leaves_on_tree == 1) {
   1139		edit->set[1].ptr = &array->root;
   1140		edit->set[1].to = NULL;
   1141		edit->adjust_count_on = NULL;
   1142		edit->excised_subtree = array->root;
   1143		pr_devel("all gone\n");
   1144		return edit;
   1145	}
   1146
   1147	/* However, we'd also like to clear up some metadata blocks if we
   1148	 * possibly can.
   1149	 *
   1150	 * We go for a simple algorithm of: if this node has FAN_OUT or fewer
   1151	 * leaves in it, then attempt to collapse it - and attempt to
   1152	 * recursively collapse up the tree.
   1153	 *
   1154	 * We could also try and collapse in partially filled subtrees to take
   1155	 * up space in this node.
   1156	 */
   1157	if (node->nr_leaves_on_branch <= ASSOC_ARRAY_FAN_OUT + 1) {
   1158		struct assoc_array_node *parent, *grandparent;
   1159		struct assoc_array_ptr *ptr;
   1160
   1161		/* First of all, we need to know if this node has metadata so
   1162		 * that we don't try collapsing if all the leaves are already
   1163		 * here.
   1164		 */
   1165		has_meta = false;
   1166		for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++) {
   1167			ptr = node->slots[i];
   1168			if (assoc_array_ptr_is_meta(ptr)) {
   1169				has_meta = true;
   1170				break;
   1171			}
   1172		}
   1173
   1174		pr_devel("leaves: %ld [m=%d]\n",
   1175			 node->nr_leaves_on_branch - 1, has_meta);
   1176
   1177		/* Look further up the tree to see if we can collapse this node
   1178		 * into a more proximal node too.
   1179		 */
   1180		parent = node;
   1181	collapse_up:
   1182		pr_devel("collapse subtree: %ld\n", parent->nr_leaves_on_branch);
   1183
   1184		ptr = parent->back_pointer;
   1185		if (!ptr)
   1186			goto do_collapse;
   1187		if (assoc_array_ptr_is_shortcut(ptr)) {
   1188			struct assoc_array_shortcut *s = assoc_array_ptr_to_shortcut(ptr);
   1189			ptr = s->back_pointer;
   1190			if (!ptr)
   1191				goto do_collapse;
   1192		}
   1193
   1194		grandparent = assoc_array_ptr_to_node(ptr);
   1195		if (grandparent->nr_leaves_on_branch <= ASSOC_ARRAY_FAN_OUT + 1) {
   1196			parent = grandparent;
   1197			goto collapse_up;
   1198		}
   1199
   1200	do_collapse:
   1201		/* There's no point collapsing if the original node has no meta
   1202		 * pointers to discard and if we didn't merge into one of that
   1203		 * node's ancestry.
   1204		 */
   1205		if (has_meta || parent != node) {
   1206			node = parent;
   1207
   1208			/* Create a new node to collapse into */
   1209			new_n0 = kzalloc(sizeof(struct assoc_array_node), GFP_KERNEL);
   1210			if (!new_n0)
   1211				goto enomem;
   1212			edit->new_meta[0] = assoc_array_node_to_ptr(new_n0);
   1213
   1214			new_n0->back_pointer = node->back_pointer;
   1215			new_n0->parent_slot = node->parent_slot;
   1216			new_n0->nr_leaves_on_branch = node->nr_leaves_on_branch;
   1217			edit->adjust_count_on = new_n0;
   1218
   1219			collapse.node = new_n0;
   1220			collapse.skip_leaf = assoc_array_ptr_to_leaf(edit->dead_leaf);
   1221			collapse.slot = 0;
   1222			assoc_array_subtree_iterate(assoc_array_node_to_ptr(node),
   1223						    node->back_pointer,
   1224						    assoc_array_delete_collapse_iterator,
   1225						    &collapse);
   1226			pr_devel("collapsed %d,%lu\n", collapse.slot, new_n0->nr_leaves_on_branch);
   1227			BUG_ON(collapse.slot != new_n0->nr_leaves_on_branch - 1);
   1228
   1229			if (!node->back_pointer) {
   1230				edit->set[1].ptr = &array->root;
   1231			} else if (assoc_array_ptr_is_leaf(node->back_pointer)) {
   1232				BUG();
   1233			} else if (assoc_array_ptr_is_node(node->back_pointer)) {
   1234				struct assoc_array_node *p =
   1235					assoc_array_ptr_to_node(node->back_pointer);
   1236				edit->set[1].ptr = &p->slots[node->parent_slot];
   1237			} else if (assoc_array_ptr_is_shortcut(node->back_pointer)) {
   1238				struct assoc_array_shortcut *s =
   1239					assoc_array_ptr_to_shortcut(node->back_pointer);
   1240				edit->set[1].ptr = &s->next_node;
   1241			}
   1242			edit->set[1].to = assoc_array_node_to_ptr(new_n0);
   1243			edit->excised_subtree = assoc_array_node_to_ptr(node);
   1244		}
   1245	}
   1246
   1247	return edit;
   1248
   1249enomem:
   1250	/* Clean up after an out of memory error */
   1251	pr_devel("enomem\n");
   1252	assoc_array_cancel_edit(edit);
   1253	return ERR_PTR(-ENOMEM);
   1254}
   1255
   1256/**
   1257 * assoc_array_clear - Script deletion of all objects from an associative array
   1258 * @array: The array to clear.
   1259 * @ops: The operations to use.
   1260 *
   1261 * Precalculate and preallocate a script for the deletion of all the objects
   1262 * from an associative array.  This results in an edit script that can either
   1263 * be applied or cancelled.
   1264 *
   1265 * The function returns a pointer to an edit script if there are objects to be
   1266 * deleted, NULL if there are no objects in the array or -ENOMEM.
   1267 *
   1268 * The caller should lock against other modifications and must continue to hold
   1269 * the lock until assoc_array_apply_edit() has been called.
   1270 *
   1271 * Accesses to the tree may take place concurrently with this function,
   1272 * provided they hold the RCU read lock.
   1273 */
   1274struct assoc_array_edit *assoc_array_clear(struct assoc_array *array,
   1275					   const struct assoc_array_ops *ops)
   1276{
   1277	struct assoc_array_edit *edit;
   1278
   1279	pr_devel("-->%s()\n", __func__);
   1280
   1281	if (!array->root)
   1282		return NULL;
   1283
   1284	edit = kzalloc(sizeof(struct assoc_array_edit), GFP_KERNEL);
   1285	if (!edit)
   1286		return ERR_PTR(-ENOMEM);
   1287	edit->array = array;
   1288	edit->ops = ops;
   1289	edit->set[1].ptr = &array->root;
   1290	edit->set[1].to = NULL;
   1291	edit->excised_subtree = array->root;
   1292	edit->ops_for_excised_subtree = ops;
   1293	pr_devel("all gone\n");
   1294	return edit;
   1295}
   1296
   1297/*
   1298 * Handle the deferred destruction after an applied edit.
   1299 */
   1300static void assoc_array_rcu_cleanup(struct rcu_head *head)
   1301{
   1302	struct assoc_array_edit *edit =
   1303		container_of(head, struct assoc_array_edit, rcu);
   1304	int i;
   1305
   1306	pr_devel("-->%s()\n", __func__);
   1307
   1308	if (edit->dead_leaf)
   1309		edit->ops->free_object(assoc_array_ptr_to_leaf(edit->dead_leaf));
   1310	for (i = 0; i < ARRAY_SIZE(edit->excised_meta); i++)
   1311		if (edit->excised_meta[i])
   1312			kfree(assoc_array_ptr_to_node(edit->excised_meta[i]));
   1313
   1314	if (edit->excised_subtree) {
   1315		BUG_ON(assoc_array_ptr_is_leaf(edit->excised_subtree));
   1316		if (assoc_array_ptr_is_node(edit->excised_subtree)) {
   1317			struct assoc_array_node *n =
   1318				assoc_array_ptr_to_node(edit->excised_subtree);
   1319			n->back_pointer = NULL;
   1320		} else {
   1321			struct assoc_array_shortcut *s =
   1322				assoc_array_ptr_to_shortcut(edit->excised_subtree);
   1323			s->back_pointer = NULL;
   1324		}
   1325		assoc_array_destroy_subtree(edit->excised_subtree,
   1326					    edit->ops_for_excised_subtree);
   1327	}
   1328
   1329	kfree(edit);
   1330}
   1331
   1332/**
   1333 * assoc_array_apply_edit - Apply an edit script to an associative array
   1334 * @edit: The script to apply.
   1335 *
   1336 * Apply an edit script to an associative array to effect an insertion,
   1337 * deletion or clearance.  As the edit script includes preallocated memory,
   1338 * this is guaranteed not to fail.
   1339 *
   1340 * The edit script, dead objects and dead metadata will be scheduled for
   1341 * destruction after an RCU grace period to permit those doing read-only
   1342 * accesses on the array to continue to do so under the RCU read lock whilst
   1343 * the edit is taking place.
   1344 */
   1345void assoc_array_apply_edit(struct assoc_array_edit *edit)
   1346{
   1347	struct assoc_array_shortcut *shortcut;
   1348	struct assoc_array_node *node;
   1349	struct assoc_array_ptr *ptr;
   1350	int i;
   1351
   1352	pr_devel("-->%s()\n", __func__);
   1353
   1354	smp_wmb();
   1355	if (edit->leaf_p)
   1356		*edit->leaf_p = edit->leaf;
   1357
   1358	smp_wmb();
   1359	for (i = 0; i < ARRAY_SIZE(edit->set_parent_slot); i++)
   1360		if (edit->set_parent_slot[i].p)
   1361			*edit->set_parent_slot[i].p = edit->set_parent_slot[i].to;
   1362
   1363	smp_wmb();
   1364	for (i = 0; i < ARRAY_SIZE(edit->set_backpointers); i++)
   1365		if (edit->set_backpointers[i])
   1366			*edit->set_backpointers[i] = edit->set_backpointers_to;
   1367
   1368	smp_wmb();
   1369	for (i = 0; i < ARRAY_SIZE(edit->set); i++)
   1370		if (edit->set[i].ptr)
   1371			*edit->set[i].ptr = edit->set[i].to;
   1372
   1373	if (edit->array->root == NULL) {
   1374		edit->array->nr_leaves_on_tree = 0;
   1375	} else if (edit->adjust_count_on) {
   1376		node = edit->adjust_count_on;
   1377		for (;;) {
   1378			node->nr_leaves_on_branch += edit->adjust_count_by;
   1379
   1380			ptr = node->back_pointer;
   1381			if (!ptr)
   1382				break;
   1383			if (assoc_array_ptr_is_shortcut(ptr)) {
   1384				shortcut = assoc_array_ptr_to_shortcut(ptr);
   1385				ptr = shortcut->back_pointer;
   1386				if (!ptr)
   1387					break;
   1388			}
   1389			BUG_ON(!assoc_array_ptr_is_node(ptr));
   1390			node = assoc_array_ptr_to_node(ptr);
   1391		}
   1392
   1393		edit->array->nr_leaves_on_tree += edit->adjust_count_by;
   1394	}
   1395
   1396	call_rcu(&edit->rcu, assoc_array_rcu_cleanup);
   1397}
   1398
   1399/**
   1400 * assoc_array_cancel_edit - Discard an edit script.
   1401 * @edit: The script to discard.
   1402 *
   1403 * Free an edit script and all the preallocated data it holds without making
   1404 * any changes to the associative array it was intended for.
   1405 *
   1406 * NOTE!  In the case of an insertion script, this does _not_ release the leaf
   1407 * that was to be inserted.  That is left to the caller.
   1408 */
   1409void assoc_array_cancel_edit(struct assoc_array_edit *edit)
   1410{
   1411	struct assoc_array_ptr *ptr;
   1412	int i;
   1413
   1414	pr_devel("-->%s()\n", __func__);
   1415
   1416	/* Clean up after an out of memory error */
   1417	for (i = 0; i < ARRAY_SIZE(edit->new_meta); i++) {
   1418		ptr = edit->new_meta[i];
   1419		if (ptr) {
   1420			if (assoc_array_ptr_is_node(ptr))
   1421				kfree(assoc_array_ptr_to_node(ptr));
   1422			else
   1423				kfree(assoc_array_ptr_to_shortcut(ptr));
   1424		}
   1425	}
   1426	kfree(edit);
   1427}
   1428
   1429/**
   1430 * assoc_array_gc - Garbage collect an associative array.
   1431 * @array: The array to clean.
   1432 * @ops: The operations to use.
   1433 * @iterator: A callback function to pass judgement on each object.
   1434 * @iterator_data: Private data for the callback function.
   1435 *
   1436 * Collect garbage from an associative array and pack down the internal tree to
   1437 * save memory.
   1438 *
   1439 * The iterator function is asked to pass judgement upon each object in the
   1440 * array.  If it returns false, the object is discard and if it returns true,
   1441 * the object is kept.  If it returns true, it must increment the object's
   1442 * usage count (or whatever it needs to do to retain it) before returning.
   1443 *
   1444 * This function returns 0 if successful or -ENOMEM if out of memory.  In the
   1445 * latter case, the array is not changed.
   1446 *
   1447 * The caller should lock against other modifications and must continue to hold
   1448 * the lock until assoc_array_apply_edit() has been called.
   1449 *
   1450 * Accesses to the tree may take place concurrently with this function,
   1451 * provided they hold the RCU read lock.
   1452 */
   1453int assoc_array_gc(struct assoc_array *array,
   1454		   const struct assoc_array_ops *ops,
   1455		   bool (*iterator)(void *object, void *iterator_data),
   1456		   void *iterator_data)
   1457{
   1458	struct assoc_array_shortcut *shortcut, *new_s;
   1459	struct assoc_array_node *node, *new_n;
   1460	struct assoc_array_edit *edit;
   1461	struct assoc_array_ptr *cursor, *ptr;
   1462	struct assoc_array_ptr *new_root, *new_parent, **new_ptr_pp;
   1463	unsigned long nr_leaves_on_tree;
   1464	bool retained;
   1465	int keylen, slot, nr_free, next_slot, i;
   1466
   1467	pr_devel("-->%s()\n", __func__);
   1468
   1469	if (!array->root)
   1470		return 0;
   1471
   1472	edit = kzalloc(sizeof(struct assoc_array_edit), GFP_KERNEL);
   1473	if (!edit)
   1474		return -ENOMEM;
   1475	edit->array = array;
   1476	edit->ops = ops;
   1477	edit->ops_for_excised_subtree = ops;
   1478	edit->set[0].ptr = &array->root;
   1479	edit->excised_subtree = array->root;
   1480
   1481	new_root = new_parent = NULL;
   1482	new_ptr_pp = &new_root;
   1483	cursor = array->root;
   1484
   1485descend:
   1486	/* If this point is a shortcut, then we need to duplicate it and
   1487	 * advance the target cursor.
   1488	 */
   1489	if (assoc_array_ptr_is_shortcut(cursor)) {
   1490		shortcut = assoc_array_ptr_to_shortcut(cursor);
   1491		keylen = round_up(shortcut->skip_to_level, ASSOC_ARRAY_KEY_CHUNK_SIZE);
   1492		keylen >>= ASSOC_ARRAY_KEY_CHUNK_SHIFT;
   1493		new_s = kmalloc(struct_size(new_s, index_key, keylen),
   1494				GFP_KERNEL);
   1495		if (!new_s)
   1496			goto enomem;
   1497		pr_devel("dup shortcut %p -> %p\n", shortcut, new_s);
   1498		memcpy(new_s, shortcut, struct_size(new_s, index_key, keylen));
   1499		new_s->back_pointer = new_parent;
   1500		new_s->parent_slot = shortcut->parent_slot;
   1501		*new_ptr_pp = new_parent = assoc_array_shortcut_to_ptr(new_s);
   1502		new_ptr_pp = &new_s->next_node;
   1503		cursor = shortcut->next_node;
   1504	}
   1505
   1506	/* Duplicate the node at this position */
   1507	node = assoc_array_ptr_to_node(cursor);
   1508	new_n = kzalloc(sizeof(struct assoc_array_node), GFP_KERNEL);
   1509	if (!new_n)
   1510		goto enomem;
   1511	pr_devel("dup node %p -> %p\n", node, new_n);
   1512	new_n->back_pointer = new_parent;
   1513	new_n->parent_slot = node->parent_slot;
   1514	*new_ptr_pp = new_parent = assoc_array_node_to_ptr(new_n);
   1515	new_ptr_pp = NULL;
   1516	slot = 0;
   1517
   1518continue_node:
   1519	/* Filter across any leaves and gc any subtrees */
   1520	for (; slot < ASSOC_ARRAY_FAN_OUT; slot++) {
   1521		ptr = node->slots[slot];
   1522		if (!ptr)
   1523			continue;
   1524
   1525		if (assoc_array_ptr_is_leaf(ptr)) {
   1526			if (iterator(assoc_array_ptr_to_leaf(ptr),
   1527				     iterator_data))
   1528				/* The iterator will have done any reference
   1529				 * counting on the object for us.
   1530				 */
   1531				new_n->slots[slot] = ptr;
   1532			continue;
   1533		}
   1534
   1535		new_ptr_pp = &new_n->slots[slot];
   1536		cursor = ptr;
   1537		goto descend;
   1538	}
   1539
   1540retry_compress:
   1541	pr_devel("-- compress node %p --\n", new_n);
   1542
   1543	/* Count up the number of empty slots in this node and work out the
   1544	 * subtree leaf count.
   1545	 */
   1546	new_n->nr_leaves_on_branch = 0;
   1547	nr_free = 0;
   1548	for (slot = 0; slot < ASSOC_ARRAY_FAN_OUT; slot++) {
   1549		ptr = new_n->slots[slot];
   1550		if (!ptr)
   1551			nr_free++;
   1552		else if (assoc_array_ptr_is_leaf(ptr))
   1553			new_n->nr_leaves_on_branch++;
   1554	}
   1555	pr_devel("free=%d, leaves=%lu\n", nr_free, new_n->nr_leaves_on_branch);
   1556
   1557	/* See what we can fold in */
   1558	retained = false;
   1559	next_slot = 0;
   1560	for (slot = 0; slot < ASSOC_ARRAY_FAN_OUT; slot++) {
   1561		struct assoc_array_shortcut *s;
   1562		struct assoc_array_node *child;
   1563
   1564		ptr = new_n->slots[slot];
   1565		if (!ptr || assoc_array_ptr_is_leaf(ptr))
   1566			continue;
   1567
   1568		s = NULL;
   1569		if (assoc_array_ptr_is_shortcut(ptr)) {
   1570			s = assoc_array_ptr_to_shortcut(ptr);
   1571			ptr = s->next_node;
   1572		}
   1573
   1574		child = assoc_array_ptr_to_node(ptr);
   1575		new_n->nr_leaves_on_branch += child->nr_leaves_on_branch;
   1576
   1577		if (child->nr_leaves_on_branch <= nr_free + 1) {
   1578			/* Fold the child node into this one */
   1579			pr_devel("[%d] fold node %lu/%d [nx %d]\n",
   1580				 slot, child->nr_leaves_on_branch, nr_free + 1,
   1581				 next_slot);
   1582
   1583			/* We would already have reaped an intervening shortcut
   1584			 * on the way back up the tree.
   1585			 */
   1586			BUG_ON(s);
   1587
   1588			new_n->slots[slot] = NULL;
   1589			nr_free++;
   1590			if (slot < next_slot)
   1591				next_slot = slot;
   1592			for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++) {
   1593				struct assoc_array_ptr *p = child->slots[i];
   1594				if (!p)
   1595					continue;
   1596				BUG_ON(assoc_array_ptr_is_meta(p));
   1597				while (new_n->slots[next_slot])
   1598					next_slot++;
   1599				BUG_ON(next_slot >= ASSOC_ARRAY_FAN_OUT);
   1600				new_n->slots[next_slot++] = p;
   1601				nr_free--;
   1602			}
   1603			kfree(child);
   1604		} else {
   1605			pr_devel("[%d] retain node %lu/%d [nx %d]\n",
   1606				 slot, child->nr_leaves_on_branch, nr_free + 1,
   1607				 next_slot);
   1608			retained = true;
   1609		}
   1610	}
   1611
   1612	if (retained && new_n->nr_leaves_on_branch <= ASSOC_ARRAY_FAN_OUT) {
   1613		pr_devel("internal nodes remain despite enough space, retrying\n");
   1614		goto retry_compress;
   1615	}
   1616	pr_devel("after: %lu\n", new_n->nr_leaves_on_branch);
   1617
   1618	nr_leaves_on_tree = new_n->nr_leaves_on_branch;
   1619
   1620	/* Excise this node if it is singly occupied by a shortcut */
   1621	if (nr_free == ASSOC_ARRAY_FAN_OUT - 1) {
   1622		for (slot = 0; slot < ASSOC_ARRAY_FAN_OUT; slot++)
   1623			if ((ptr = new_n->slots[slot]))
   1624				break;
   1625
   1626		if (assoc_array_ptr_is_meta(ptr) &&
   1627		    assoc_array_ptr_is_shortcut(ptr)) {
   1628			pr_devel("excise node %p with 1 shortcut\n", new_n);
   1629			new_s = assoc_array_ptr_to_shortcut(ptr);
   1630			new_parent = new_n->back_pointer;
   1631			slot = new_n->parent_slot;
   1632			kfree(new_n);
   1633			if (!new_parent) {
   1634				new_s->back_pointer = NULL;
   1635				new_s->parent_slot = 0;
   1636				new_root = ptr;
   1637				goto gc_complete;
   1638			}
   1639
   1640			if (assoc_array_ptr_is_shortcut(new_parent)) {
   1641				/* We can discard any preceding shortcut also */
   1642				struct assoc_array_shortcut *s =
   1643					assoc_array_ptr_to_shortcut(new_parent);
   1644
   1645				pr_devel("excise preceding shortcut\n");
   1646
   1647				new_parent = new_s->back_pointer = s->back_pointer;
   1648				slot = new_s->parent_slot = s->parent_slot;
   1649				kfree(s);
   1650				if (!new_parent) {
   1651					new_s->back_pointer = NULL;
   1652					new_s->parent_slot = 0;
   1653					new_root = ptr;
   1654					goto gc_complete;
   1655				}
   1656			}
   1657
   1658			new_s->back_pointer = new_parent;
   1659			new_s->parent_slot = slot;
   1660			new_n = assoc_array_ptr_to_node(new_parent);
   1661			new_n->slots[slot] = ptr;
   1662			goto ascend_old_tree;
   1663		}
   1664	}
   1665
   1666	/* Excise any shortcuts we might encounter that point to nodes that
   1667	 * only contain leaves.
   1668	 */
   1669	ptr = new_n->back_pointer;
   1670	if (!ptr)
   1671		goto gc_complete;
   1672
   1673	if (assoc_array_ptr_is_shortcut(ptr)) {
   1674		new_s = assoc_array_ptr_to_shortcut(ptr);
   1675		new_parent = new_s->back_pointer;
   1676		slot = new_s->parent_slot;
   1677
   1678		if (new_n->nr_leaves_on_branch <= ASSOC_ARRAY_FAN_OUT) {
   1679			struct assoc_array_node *n;
   1680
   1681			pr_devel("excise shortcut\n");
   1682			new_n->back_pointer = new_parent;
   1683			new_n->parent_slot = slot;
   1684			kfree(new_s);
   1685			if (!new_parent) {
   1686				new_root = assoc_array_node_to_ptr(new_n);
   1687				goto gc_complete;
   1688			}
   1689
   1690			n = assoc_array_ptr_to_node(new_parent);
   1691			n->slots[slot] = assoc_array_node_to_ptr(new_n);
   1692		}
   1693	} else {
   1694		new_parent = ptr;
   1695	}
   1696	new_n = assoc_array_ptr_to_node(new_parent);
   1697
   1698ascend_old_tree:
   1699	ptr = node->back_pointer;
   1700	if (assoc_array_ptr_is_shortcut(ptr)) {
   1701		shortcut = assoc_array_ptr_to_shortcut(ptr);
   1702		slot = shortcut->parent_slot;
   1703		cursor = shortcut->back_pointer;
   1704		if (!cursor)
   1705			goto gc_complete;
   1706	} else {
   1707		slot = node->parent_slot;
   1708		cursor = ptr;
   1709	}
   1710	BUG_ON(!cursor);
   1711	node = assoc_array_ptr_to_node(cursor);
   1712	slot++;
   1713	goto continue_node;
   1714
   1715gc_complete:
   1716	edit->set[0].to = new_root;
   1717	assoc_array_apply_edit(edit);
   1718	array->nr_leaves_on_tree = nr_leaves_on_tree;
   1719	return 0;
   1720
   1721enomem:
   1722	pr_devel("enomem\n");
   1723	assoc_array_destroy_subtree(new_root, edit->ops);
   1724	kfree(edit);
   1725	return -ENOMEM;
   1726}