cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

xfs_iext_tree.c (23704B)


      1// SPDX-License-Identifier: GPL-2.0
      2/*
      3 * Copyright (c) 2017 Christoph Hellwig.
      4 */
      5
      6#include "xfs.h"
      7#include "xfs_shared.h"
      8#include "xfs_format.h"
      9#include "xfs_bit.h"
     10#include "xfs_log_format.h"
     11#include "xfs_trans_resv.h"
     12#include "xfs_mount.h"
     13#include "xfs_inode.h"
     14#include "xfs_trace.h"
     15
     16/*
     17 * In-core extent record layout:
     18 *
     19 * +-------+----------------------------+
     20 * | 00:53 | all 54 bits of startoff    |
     21 * | 54:63 | low 10 bits of startblock  |
     22 * +-------+----------------------------+
     23 * | 00:20 | all 21 bits of length      |
     24 * |    21 | unwritten extent bit       |
     25 * | 22:63 | high 42 bits of startblock |
     26 * +-------+----------------------------+
     27 */
     28#define XFS_IEXT_STARTOFF_MASK		xfs_mask64lo(BMBT_STARTOFF_BITLEN)
     29#define XFS_IEXT_LENGTH_MASK		xfs_mask64lo(BMBT_BLOCKCOUNT_BITLEN)
     30#define XFS_IEXT_STARTBLOCK_MASK	xfs_mask64lo(BMBT_STARTBLOCK_BITLEN)
     31
     32struct xfs_iext_rec {
     33	uint64_t			lo;
     34	uint64_t			hi;
     35};
     36
     37/*
     38 * Given that the length can't be a zero, only an empty hi value indicates an
     39 * unused record.
     40 */
     41static bool xfs_iext_rec_is_empty(struct xfs_iext_rec *rec)
     42{
     43	return rec->hi == 0;
     44}
     45
     46static inline void xfs_iext_rec_clear(struct xfs_iext_rec *rec)
     47{
     48	rec->lo = 0;
     49	rec->hi = 0;
     50}
     51
     52static void
     53xfs_iext_set(
     54	struct xfs_iext_rec	*rec,
     55	struct xfs_bmbt_irec	*irec)
     56{
     57	ASSERT((irec->br_startoff & ~XFS_IEXT_STARTOFF_MASK) == 0);
     58	ASSERT((irec->br_blockcount & ~XFS_IEXT_LENGTH_MASK) == 0);
     59	ASSERT((irec->br_startblock & ~XFS_IEXT_STARTBLOCK_MASK) == 0);
     60
     61	rec->lo = irec->br_startoff & XFS_IEXT_STARTOFF_MASK;
     62	rec->hi = irec->br_blockcount & XFS_IEXT_LENGTH_MASK;
     63
     64	rec->lo |= (irec->br_startblock << 54);
     65	rec->hi |= ((irec->br_startblock & ~xfs_mask64lo(10)) << (22 - 10));
     66
     67	if (irec->br_state == XFS_EXT_UNWRITTEN)
     68		rec->hi |= (1 << 21);
     69}
     70
     71static void
     72xfs_iext_get(
     73	struct xfs_bmbt_irec	*irec,
     74	struct xfs_iext_rec	*rec)
     75{
     76	irec->br_startoff = rec->lo & XFS_IEXT_STARTOFF_MASK;
     77	irec->br_blockcount = rec->hi & XFS_IEXT_LENGTH_MASK;
     78
     79	irec->br_startblock = rec->lo >> 54;
     80	irec->br_startblock |= (rec->hi & xfs_mask64hi(42)) >> (22 - 10);
     81
     82	if (rec->hi & (1 << 21))
     83		irec->br_state = XFS_EXT_UNWRITTEN;
     84	else
     85		irec->br_state = XFS_EXT_NORM;
     86}
     87
     88enum {
     89	NODE_SIZE	= 256,
     90	KEYS_PER_NODE	= NODE_SIZE / (sizeof(uint64_t) + sizeof(void *)),
     91	RECS_PER_LEAF	= (NODE_SIZE - (2 * sizeof(struct xfs_iext_leaf *))) /
     92				sizeof(struct xfs_iext_rec),
     93};
     94
     95/*
     96 * In-core extent btree block layout:
     97 *
     98 * There are two types of blocks in the btree: leaf and inner (non-leaf) blocks.
     99 *
    100 * The leaf blocks are made up by %KEYS_PER_NODE extent records, which each
    101 * contain the startoffset, blockcount, startblock and unwritten extent flag.
    102 * See above for the exact format, followed by pointers to the previous and next
    103 * leaf blocks (if there are any).
    104 *
    105 * The inner (non-leaf) blocks first contain KEYS_PER_NODE lookup keys, followed
    106 * by an equal number of pointers to the btree blocks at the next lower level.
    107 *
    108 *		+-------+-------+-------+-------+-------+----------+----------+
    109 * Leaf:	| rec 1 | rec 2 | rec 3 | rec 4 | rec N | prev-ptr | next-ptr |
    110 *		+-------+-------+-------+-------+-------+----------+----------+
    111 *
    112 *		+-------+-------+-------+-------+-------+-------+------+-------+
    113 * Inner:	| key 1 | key 2 | key 3 | key N | ptr 1 | ptr 2 | ptr3 | ptr N |
    114 *		+-------+-------+-------+-------+-------+-------+------+-------+
    115 */
    116struct xfs_iext_node {
    117	uint64_t		keys[KEYS_PER_NODE];
    118#define XFS_IEXT_KEY_INVALID	(1ULL << 63)
    119	void			*ptrs[KEYS_PER_NODE];
    120};
    121
    122struct xfs_iext_leaf {
    123	struct xfs_iext_rec	recs[RECS_PER_LEAF];
    124	struct xfs_iext_leaf	*prev;
    125	struct xfs_iext_leaf	*next;
    126};
    127
    128inline xfs_extnum_t xfs_iext_count(struct xfs_ifork *ifp)
    129{
    130	return ifp->if_bytes / sizeof(struct xfs_iext_rec);
    131}
    132
    133static inline int xfs_iext_max_recs(struct xfs_ifork *ifp)
    134{
    135	if (ifp->if_height == 1)
    136		return xfs_iext_count(ifp);
    137	return RECS_PER_LEAF;
    138}
    139
    140static inline struct xfs_iext_rec *cur_rec(struct xfs_iext_cursor *cur)
    141{
    142	return &cur->leaf->recs[cur->pos];
    143}
    144
    145static inline bool xfs_iext_valid(struct xfs_ifork *ifp,
    146		struct xfs_iext_cursor *cur)
    147{
    148	if (!cur->leaf)
    149		return false;
    150	if (cur->pos < 0 || cur->pos >= xfs_iext_max_recs(ifp))
    151		return false;
    152	if (xfs_iext_rec_is_empty(cur_rec(cur)))
    153		return false;
    154	return true;
    155}
    156
    157static void *
    158xfs_iext_find_first_leaf(
    159	struct xfs_ifork	*ifp)
    160{
    161	struct xfs_iext_node	*node = ifp->if_u1.if_root;
    162	int			height;
    163
    164	if (!ifp->if_height)
    165		return NULL;
    166
    167	for (height = ifp->if_height; height > 1; height--) {
    168		node = node->ptrs[0];
    169		ASSERT(node);
    170	}
    171
    172	return node;
    173}
    174
    175static void *
    176xfs_iext_find_last_leaf(
    177	struct xfs_ifork	*ifp)
    178{
    179	struct xfs_iext_node	*node = ifp->if_u1.if_root;
    180	int			height, i;
    181
    182	if (!ifp->if_height)
    183		return NULL;
    184
    185	for (height = ifp->if_height; height > 1; height--) {
    186		for (i = 1; i < KEYS_PER_NODE; i++)
    187			if (!node->ptrs[i])
    188				break;
    189		node = node->ptrs[i - 1];
    190		ASSERT(node);
    191	}
    192
    193	return node;
    194}
    195
    196void
    197xfs_iext_first(
    198	struct xfs_ifork	*ifp,
    199	struct xfs_iext_cursor	*cur)
    200{
    201	cur->pos = 0;
    202	cur->leaf = xfs_iext_find_first_leaf(ifp);
    203}
    204
    205void
    206xfs_iext_last(
    207	struct xfs_ifork	*ifp,
    208	struct xfs_iext_cursor	*cur)
    209{
    210	int			i;
    211
    212	cur->leaf = xfs_iext_find_last_leaf(ifp);
    213	if (!cur->leaf) {
    214		cur->pos = 0;
    215		return;
    216	}
    217
    218	for (i = 1; i < xfs_iext_max_recs(ifp); i++) {
    219		if (xfs_iext_rec_is_empty(&cur->leaf->recs[i]))
    220			break;
    221	}
    222	cur->pos = i - 1;
    223}
    224
    225void
    226xfs_iext_next(
    227	struct xfs_ifork	*ifp,
    228	struct xfs_iext_cursor	*cur)
    229{
    230	if (!cur->leaf) {
    231		ASSERT(cur->pos <= 0 || cur->pos >= RECS_PER_LEAF);
    232		xfs_iext_first(ifp, cur);
    233		return;
    234	}
    235
    236	ASSERT(cur->pos >= 0);
    237	ASSERT(cur->pos < xfs_iext_max_recs(ifp));
    238
    239	cur->pos++;
    240	if (ifp->if_height > 1 && !xfs_iext_valid(ifp, cur) &&
    241	    cur->leaf->next) {
    242		cur->leaf = cur->leaf->next;
    243		cur->pos = 0;
    244	}
    245}
    246
    247void
    248xfs_iext_prev(
    249	struct xfs_ifork	*ifp,
    250	struct xfs_iext_cursor	*cur)
    251{
    252	if (!cur->leaf) {
    253		ASSERT(cur->pos <= 0 || cur->pos >= RECS_PER_LEAF);
    254		xfs_iext_last(ifp, cur);
    255		return;
    256	}
    257
    258	ASSERT(cur->pos >= 0);
    259	ASSERT(cur->pos <= RECS_PER_LEAF);
    260
    261recurse:
    262	do {
    263		cur->pos--;
    264		if (xfs_iext_valid(ifp, cur))
    265			return;
    266	} while (cur->pos > 0);
    267
    268	if (ifp->if_height > 1 && cur->leaf->prev) {
    269		cur->leaf = cur->leaf->prev;
    270		cur->pos = RECS_PER_LEAF;
    271		goto recurse;
    272	}
    273}
    274
    275static inline int
    276xfs_iext_key_cmp(
    277	struct xfs_iext_node	*node,
    278	int			n,
    279	xfs_fileoff_t		offset)
    280{
    281	if (node->keys[n] > offset)
    282		return 1;
    283	if (node->keys[n] < offset)
    284		return -1;
    285	return 0;
    286}
    287
    288static inline int
    289xfs_iext_rec_cmp(
    290	struct xfs_iext_rec	*rec,
    291	xfs_fileoff_t		offset)
    292{
    293	uint64_t		rec_offset = rec->lo & XFS_IEXT_STARTOFF_MASK;
    294	uint32_t		rec_len = rec->hi & XFS_IEXT_LENGTH_MASK;
    295
    296	if (rec_offset > offset)
    297		return 1;
    298	if (rec_offset + rec_len <= offset)
    299		return -1;
    300	return 0;
    301}
    302
    303static void *
    304xfs_iext_find_level(
    305	struct xfs_ifork	*ifp,
    306	xfs_fileoff_t		offset,
    307	int			level)
    308{
    309	struct xfs_iext_node	*node = ifp->if_u1.if_root;
    310	int			height, i;
    311
    312	if (!ifp->if_height)
    313		return NULL;
    314
    315	for (height = ifp->if_height; height > level; height--) {
    316		for (i = 1; i < KEYS_PER_NODE; i++)
    317			if (xfs_iext_key_cmp(node, i, offset) > 0)
    318				break;
    319
    320		node = node->ptrs[i - 1];
    321		if (!node)
    322			break;
    323	}
    324
    325	return node;
    326}
    327
    328static int
    329xfs_iext_node_pos(
    330	struct xfs_iext_node	*node,
    331	xfs_fileoff_t		offset)
    332{
    333	int			i;
    334
    335	for (i = 1; i < KEYS_PER_NODE; i++) {
    336		if (xfs_iext_key_cmp(node, i, offset) > 0)
    337			break;
    338	}
    339
    340	return i - 1;
    341}
    342
    343static int
    344xfs_iext_node_insert_pos(
    345	struct xfs_iext_node	*node,
    346	xfs_fileoff_t		offset)
    347{
    348	int			i;
    349
    350	for (i = 0; i < KEYS_PER_NODE; i++) {
    351		if (xfs_iext_key_cmp(node, i, offset) > 0)
    352			return i;
    353	}
    354
    355	return KEYS_PER_NODE;
    356}
    357
    358static int
    359xfs_iext_node_nr_entries(
    360	struct xfs_iext_node	*node,
    361	int			start)
    362{
    363	int			i;
    364
    365	for (i = start; i < KEYS_PER_NODE; i++) {
    366		if (node->keys[i] == XFS_IEXT_KEY_INVALID)
    367			break;
    368	}
    369
    370	return i;
    371}
    372
    373static int
    374xfs_iext_leaf_nr_entries(
    375	struct xfs_ifork	*ifp,
    376	struct xfs_iext_leaf	*leaf,
    377	int			start)
    378{
    379	int			i;
    380
    381	for (i = start; i < xfs_iext_max_recs(ifp); i++) {
    382		if (xfs_iext_rec_is_empty(&leaf->recs[i]))
    383			break;
    384	}
    385
    386	return i;
    387}
    388
    389static inline uint64_t
    390xfs_iext_leaf_key(
    391	struct xfs_iext_leaf	*leaf,
    392	int			n)
    393{
    394	return leaf->recs[n].lo & XFS_IEXT_STARTOFF_MASK;
    395}
    396
    397static void
    398xfs_iext_grow(
    399	struct xfs_ifork	*ifp)
    400{
    401	struct xfs_iext_node	*node = kmem_zalloc(NODE_SIZE, KM_NOFS);
    402	int			i;
    403
    404	if (ifp->if_height == 1) {
    405		struct xfs_iext_leaf *prev = ifp->if_u1.if_root;
    406
    407		node->keys[0] = xfs_iext_leaf_key(prev, 0);
    408		node->ptrs[0] = prev;
    409	} else  {
    410		struct xfs_iext_node *prev = ifp->if_u1.if_root;
    411
    412		ASSERT(ifp->if_height > 1);
    413
    414		node->keys[0] = prev->keys[0];
    415		node->ptrs[0] = prev;
    416	}
    417
    418	for (i = 1; i < KEYS_PER_NODE; i++)
    419		node->keys[i] = XFS_IEXT_KEY_INVALID;
    420
    421	ifp->if_u1.if_root = node;
    422	ifp->if_height++;
    423}
    424
    425static void
    426xfs_iext_update_node(
    427	struct xfs_ifork	*ifp,
    428	xfs_fileoff_t		old_offset,
    429	xfs_fileoff_t		new_offset,
    430	int			level,
    431	void			*ptr)
    432{
    433	struct xfs_iext_node	*node = ifp->if_u1.if_root;
    434	int			height, i;
    435
    436	for (height = ifp->if_height; height > level; height--) {
    437		for (i = 0; i < KEYS_PER_NODE; i++) {
    438			if (i > 0 && xfs_iext_key_cmp(node, i, old_offset) > 0)
    439				break;
    440			if (node->keys[i] == old_offset)
    441				node->keys[i] = new_offset;
    442		}
    443		node = node->ptrs[i - 1];
    444		ASSERT(node);
    445	}
    446
    447	ASSERT(node == ptr);
    448}
    449
    450static struct xfs_iext_node *
    451xfs_iext_split_node(
    452	struct xfs_iext_node	**nodep,
    453	int			*pos,
    454	int			*nr_entries)
    455{
    456	struct xfs_iext_node	*node = *nodep;
    457	struct xfs_iext_node	*new = kmem_zalloc(NODE_SIZE, KM_NOFS);
    458	const int		nr_move = KEYS_PER_NODE / 2;
    459	int			nr_keep = nr_move + (KEYS_PER_NODE & 1);
    460	int			i = 0;
    461
    462	/* for sequential append operations just spill over into the new node */
    463	if (*pos == KEYS_PER_NODE) {
    464		*nodep = new;
    465		*pos = 0;
    466		*nr_entries = 0;
    467		goto done;
    468	}
    469
    470
    471	for (i = 0; i < nr_move; i++) {
    472		new->keys[i] = node->keys[nr_keep + i];
    473		new->ptrs[i] = node->ptrs[nr_keep + i];
    474
    475		node->keys[nr_keep + i] = XFS_IEXT_KEY_INVALID;
    476		node->ptrs[nr_keep + i] = NULL;
    477	}
    478
    479	if (*pos >= nr_keep) {
    480		*nodep = new;
    481		*pos -= nr_keep;
    482		*nr_entries = nr_move;
    483	} else {
    484		*nr_entries = nr_keep;
    485	}
    486done:
    487	for (; i < KEYS_PER_NODE; i++)
    488		new->keys[i] = XFS_IEXT_KEY_INVALID;
    489	return new;
    490}
    491
    492static void
    493xfs_iext_insert_node(
    494	struct xfs_ifork	*ifp,
    495	uint64_t		offset,
    496	void			*ptr,
    497	int			level)
    498{
    499	struct xfs_iext_node	*node, *new;
    500	int			i, pos, nr_entries;
    501
    502again:
    503	if (ifp->if_height < level)
    504		xfs_iext_grow(ifp);
    505
    506	new = NULL;
    507	node = xfs_iext_find_level(ifp, offset, level);
    508	pos = xfs_iext_node_insert_pos(node, offset);
    509	nr_entries = xfs_iext_node_nr_entries(node, pos);
    510
    511	ASSERT(pos >= nr_entries || xfs_iext_key_cmp(node, pos, offset) != 0);
    512	ASSERT(nr_entries <= KEYS_PER_NODE);
    513
    514	if (nr_entries == KEYS_PER_NODE)
    515		new = xfs_iext_split_node(&node, &pos, &nr_entries);
    516
    517	/*
    518	 * Update the pointers in higher levels if the first entry changes
    519	 * in an existing node.
    520	 */
    521	if (node != new && pos == 0 && nr_entries > 0)
    522		xfs_iext_update_node(ifp, node->keys[0], offset, level, node);
    523
    524	for (i = nr_entries; i > pos; i--) {
    525		node->keys[i] = node->keys[i - 1];
    526		node->ptrs[i] = node->ptrs[i - 1];
    527	}
    528	node->keys[pos] = offset;
    529	node->ptrs[pos] = ptr;
    530
    531	if (new) {
    532		offset = new->keys[0];
    533		ptr = new;
    534		level++;
    535		goto again;
    536	}
    537}
    538
    539static struct xfs_iext_leaf *
    540xfs_iext_split_leaf(
    541	struct xfs_iext_cursor	*cur,
    542	int			*nr_entries)
    543{
    544	struct xfs_iext_leaf	*leaf = cur->leaf;
    545	struct xfs_iext_leaf	*new = kmem_zalloc(NODE_SIZE, KM_NOFS);
    546	const int		nr_move = RECS_PER_LEAF / 2;
    547	int			nr_keep = nr_move + (RECS_PER_LEAF & 1);
    548	int			i;
    549
    550	/* for sequential append operations just spill over into the new node */
    551	if (cur->pos == RECS_PER_LEAF) {
    552		cur->leaf = new;
    553		cur->pos = 0;
    554		*nr_entries = 0;
    555		goto done;
    556	}
    557
    558	for (i = 0; i < nr_move; i++) {
    559		new->recs[i] = leaf->recs[nr_keep + i];
    560		xfs_iext_rec_clear(&leaf->recs[nr_keep + i]);
    561	}
    562
    563	if (cur->pos >= nr_keep) {
    564		cur->leaf = new;
    565		cur->pos -= nr_keep;
    566		*nr_entries = nr_move;
    567	} else {
    568		*nr_entries = nr_keep;
    569	}
    570done:
    571	if (leaf->next)
    572		leaf->next->prev = new;
    573	new->next = leaf->next;
    574	new->prev = leaf;
    575	leaf->next = new;
    576	return new;
    577}
    578
    579static void
    580xfs_iext_alloc_root(
    581	struct xfs_ifork	*ifp,
    582	struct xfs_iext_cursor	*cur)
    583{
    584	ASSERT(ifp->if_bytes == 0);
    585
    586	ifp->if_u1.if_root = kmem_zalloc(sizeof(struct xfs_iext_rec), KM_NOFS);
    587	ifp->if_height = 1;
    588
    589	/* now that we have a node step into it */
    590	cur->leaf = ifp->if_u1.if_root;
    591	cur->pos = 0;
    592}
    593
    594static void
    595xfs_iext_realloc_root(
    596	struct xfs_ifork	*ifp,
    597	struct xfs_iext_cursor	*cur)
    598{
    599	int64_t new_size = ifp->if_bytes + sizeof(struct xfs_iext_rec);
    600	void *new;
    601
    602	/* account for the prev/next pointers */
    603	if (new_size / sizeof(struct xfs_iext_rec) == RECS_PER_LEAF)
    604		new_size = NODE_SIZE;
    605
    606	new = krealloc(ifp->if_u1.if_root, new_size, GFP_NOFS | __GFP_NOFAIL);
    607	memset(new + ifp->if_bytes, 0, new_size - ifp->if_bytes);
    608	ifp->if_u1.if_root = new;
    609	cur->leaf = new;
    610}
    611
    612/*
    613 * Increment the sequence counter on extent tree changes. If we are on a COW
    614 * fork, this allows the writeback code to skip looking for a COW extent if the
    615 * COW fork hasn't changed. We use WRITE_ONCE here to ensure the update to the
    616 * sequence counter is seen before the modifications to the extent tree itself
    617 * take effect.
    618 */
    619static inline void xfs_iext_inc_seq(struct xfs_ifork *ifp)
    620{
    621	WRITE_ONCE(ifp->if_seq, READ_ONCE(ifp->if_seq) + 1);
    622}
    623
    624void
    625xfs_iext_insert(
    626	struct xfs_inode	*ip,
    627	struct xfs_iext_cursor	*cur,
    628	struct xfs_bmbt_irec	*irec,
    629	int			state)
    630{
    631	struct xfs_ifork	*ifp = xfs_iext_state_to_fork(ip, state);
    632	xfs_fileoff_t		offset = irec->br_startoff;
    633	struct xfs_iext_leaf	*new = NULL;
    634	int			nr_entries, i;
    635
    636	xfs_iext_inc_seq(ifp);
    637
    638	if (ifp->if_height == 0)
    639		xfs_iext_alloc_root(ifp, cur);
    640	else if (ifp->if_height == 1)
    641		xfs_iext_realloc_root(ifp, cur);
    642
    643	nr_entries = xfs_iext_leaf_nr_entries(ifp, cur->leaf, cur->pos);
    644	ASSERT(nr_entries <= RECS_PER_LEAF);
    645	ASSERT(cur->pos >= nr_entries ||
    646	       xfs_iext_rec_cmp(cur_rec(cur), irec->br_startoff) != 0);
    647
    648	if (nr_entries == RECS_PER_LEAF)
    649		new = xfs_iext_split_leaf(cur, &nr_entries);
    650
    651	/*
    652	 * Update the pointers in higher levels if the first entry changes
    653	 * in an existing node.
    654	 */
    655	if (cur->leaf != new && cur->pos == 0 && nr_entries > 0) {
    656		xfs_iext_update_node(ifp, xfs_iext_leaf_key(cur->leaf, 0),
    657				offset, 1, cur->leaf);
    658	}
    659
    660	for (i = nr_entries; i > cur->pos; i--)
    661		cur->leaf->recs[i] = cur->leaf->recs[i - 1];
    662	xfs_iext_set(cur_rec(cur), irec);
    663	ifp->if_bytes += sizeof(struct xfs_iext_rec);
    664
    665	trace_xfs_iext_insert(ip, cur, state, _RET_IP_);
    666
    667	if (new)
    668		xfs_iext_insert_node(ifp, xfs_iext_leaf_key(new, 0), new, 2);
    669}
    670
    671static struct xfs_iext_node *
    672xfs_iext_rebalance_node(
    673	struct xfs_iext_node	*parent,
    674	int			*pos,
    675	struct xfs_iext_node	*node,
    676	int			nr_entries)
    677{
    678	/*
    679	 * If the neighbouring nodes are completely full, or have different
    680	 * parents, we might never be able to merge our node, and will only
    681	 * delete it once the number of entries hits zero.
    682	 */
    683	if (nr_entries == 0)
    684		return node;
    685
    686	if (*pos > 0) {
    687		struct xfs_iext_node *prev = parent->ptrs[*pos - 1];
    688		int nr_prev = xfs_iext_node_nr_entries(prev, 0), i;
    689
    690		if (nr_prev + nr_entries <= KEYS_PER_NODE) {
    691			for (i = 0; i < nr_entries; i++) {
    692				prev->keys[nr_prev + i] = node->keys[i];
    693				prev->ptrs[nr_prev + i] = node->ptrs[i];
    694			}
    695			return node;
    696		}
    697	}
    698
    699	if (*pos + 1 < xfs_iext_node_nr_entries(parent, *pos)) {
    700		struct xfs_iext_node *next = parent->ptrs[*pos + 1];
    701		int nr_next = xfs_iext_node_nr_entries(next, 0), i;
    702
    703		if (nr_entries + nr_next <= KEYS_PER_NODE) {
    704			/*
    705			 * Merge the next node into this node so that we don't
    706			 * have to do an additional update of the keys in the
    707			 * higher levels.
    708			 */
    709			for (i = 0; i < nr_next; i++) {
    710				node->keys[nr_entries + i] = next->keys[i];
    711				node->ptrs[nr_entries + i] = next->ptrs[i];
    712			}
    713
    714			++*pos;
    715			return next;
    716		}
    717	}
    718
    719	return NULL;
    720}
    721
    722static void
    723xfs_iext_remove_node(
    724	struct xfs_ifork	*ifp,
    725	xfs_fileoff_t		offset,
    726	void			*victim)
    727{
    728	struct xfs_iext_node	*node, *parent;
    729	int			level = 2, pos, nr_entries, i;
    730
    731	ASSERT(level <= ifp->if_height);
    732	node = xfs_iext_find_level(ifp, offset, level);
    733	pos = xfs_iext_node_pos(node, offset);
    734again:
    735	ASSERT(node->ptrs[pos]);
    736	ASSERT(node->ptrs[pos] == victim);
    737	kmem_free(victim);
    738
    739	nr_entries = xfs_iext_node_nr_entries(node, pos) - 1;
    740	offset = node->keys[0];
    741	for (i = pos; i < nr_entries; i++) {
    742		node->keys[i] = node->keys[i + 1];
    743		node->ptrs[i] = node->ptrs[i + 1];
    744	}
    745	node->keys[nr_entries] = XFS_IEXT_KEY_INVALID;
    746	node->ptrs[nr_entries] = NULL;
    747
    748	if (pos == 0 && nr_entries > 0) {
    749		xfs_iext_update_node(ifp, offset, node->keys[0], level, node);
    750		offset = node->keys[0];
    751	}
    752
    753	if (nr_entries >= KEYS_PER_NODE / 2)
    754		return;
    755
    756	if (level < ifp->if_height) {
    757		/*
    758		 * If we aren't at the root yet try to find a neighbour node to
    759		 * merge with (or delete the node if it is empty), and then
    760		 * recurse up to the next level.
    761		 */
    762		level++;
    763		parent = xfs_iext_find_level(ifp, offset, level);
    764		pos = xfs_iext_node_pos(parent, offset);
    765
    766		ASSERT(pos != KEYS_PER_NODE);
    767		ASSERT(parent->ptrs[pos] == node);
    768
    769		node = xfs_iext_rebalance_node(parent, &pos, node, nr_entries);
    770		if (node) {
    771			victim = node;
    772			node = parent;
    773			goto again;
    774		}
    775	} else if (nr_entries == 1) {
    776		/*
    777		 * If we are at the root and only one entry is left we can just
    778		 * free this node and update the root pointer.
    779		 */
    780		ASSERT(node == ifp->if_u1.if_root);
    781		ifp->if_u1.if_root = node->ptrs[0];
    782		ifp->if_height--;
    783		kmem_free(node);
    784	}
    785}
    786
    787static void
    788xfs_iext_rebalance_leaf(
    789	struct xfs_ifork	*ifp,
    790	struct xfs_iext_cursor	*cur,
    791	struct xfs_iext_leaf	*leaf,
    792	xfs_fileoff_t		offset,
    793	int			nr_entries)
    794{
    795	/*
    796	 * If the neighbouring nodes are completely full we might never be able
    797	 * to merge our node, and will only delete it once the number of
    798	 * entries hits zero.
    799	 */
    800	if (nr_entries == 0)
    801		goto remove_node;
    802
    803	if (leaf->prev) {
    804		int nr_prev = xfs_iext_leaf_nr_entries(ifp, leaf->prev, 0), i;
    805
    806		if (nr_prev + nr_entries <= RECS_PER_LEAF) {
    807			for (i = 0; i < nr_entries; i++)
    808				leaf->prev->recs[nr_prev + i] = leaf->recs[i];
    809
    810			if (cur->leaf == leaf) {
    811				cur->leaf = leaf->prev;
    812				cur->pos += nr_prev;
    813			}
    814			goto remove_node;
    815		}
    816	}
    817
    818	if (leaf->next) {
    819		int nr_next = xfs_iext_leaf_nr_entries(ifp, leaf->next, 0), i;
    820
    821		if (nr_entries + nr_next <= RECS_PER_LEAF) {
    822			/*
    823			 * Merge the next node into this node so that we don't
    824			 * have to do an additional update of the keys in the
    825			 * higher levels.
    826			 */
    827			for (i = 0; i < nr_next; i++) {
    828				leaf->recs[nr_entries + i] =
    829					leaf->next->recs[i];
    830			}
    831
    832			if (cur->leaf == leaf->next) {
    833				cur->leaf = leaf;
    834				cur->pos += nr_entries;
    835			}
    836
    837			offset = xfs_iext_leaf_key(leaf->next, 0);
    838			leaf = leaf->next;
    839			goto remove_node;
    840		}
    841	}
    842
    843	return;
    844remove_node:
    845	if (leaf->prev)
    846		leaf->prev->next = leaf->next;
    847	if (leaf->next)
    848		leaf->next->prev = leaf->prev;
    849	xfs_iext_remove_node(ifp, offset, leaf);
    850}
    851
    852static void
    853xfs_iext_free_last_leaf(
    854	struct xfs_ifork	*ifp)
    855{
    856	ifp->if_height--;
    857	kmem_free(ifp->if_u1.if_root);
    858	ifp->if_u1.if_root = NULL;
    859}
    860
    861void
    862xfs_iext_remove(
    863	struct xfs_inode	*ip,
    864	struct xfs_iext_cursor	*cur,
    865	int			state)
    866{
    867	struct xfs_ifork	*ifp = xfs_iext_state_to_fork(ip, state);
    868	struct xfs_iext_leaf	*leaf = cur->leaf;
    869	xfs_fileoff_t		offset = xfs_iext_leaf_key(leaf, 0);
    870	int			i, nr_entries;
    871
    872	trace_xfs_iext_remove(ip, cur, state, _RET_IP_);
    873
    874	ASSERT(ifp->if_height > 0);
    875	ASSERT(ifp->if_u1.if_root != NULL);
    876	ASSERT(xfs_iext_valid(ifp, cur));
    877
    878	xfs_iext_inc_seq(ifp);
    879
    880	nr_entries = xfs_iext_leaf_nr_entries(ifp, leaf, cur->pos) - 1;
    881	for (i = cur->pos; i < nr_entries; i++)
    882		leaf->recs[i] = leaf->recs[i + 1];
    883	xfs_iext_rec_clear(&leaf->recs[nr_entries]);
    884	ifp->if_bytes -= sizeof(struct xfs_iext_rec);
    885
    886	if (cur->pos == 0 && nr_entries > 0) {
    887		xfs_iext_update_node(ifp, offset, xfs_iext_leaf_key(leaf, 0), 1,
    888				leaf);
    889		offset = xfs_iext_leaf_key(leaf, 0);
    890	} else if (cur->pos == nr_entries) {
    891		if (ifp->if_height > 1 && leaf->next)
    892			cur->leaf = leaf->next;
    893		else
    894			cur->leaf = NULL;
    895		cur->pos = 0;
    896	}
    897
    898	if (nr_entries >= RECS_PER_LEAF / 2)
    899		return;
    900
    901	if (ifp->if_height > 1)
    902		xfs_iext_rebalance_leaf(ifp, cur, leaf, offset, nr_entries);
    903	else if (nr_entries == 0)
    904		xfs_iext_free_last_leaf(ifp);
    905}
    906
    907/*
    908 * Lookup the extent covering bno.
    909 *
    910 * If there is an extent covering bno return the extent index, and store the
    911 * expanded extent structure in *gotp, and the extent cursor in *cur.
    912 * If there is no extent covering bno, but there is an extent after it (e.g.
    913 * it lies in a hole) return that extent in *gotp and its cursor in *cur
    914 * instead.
    915 * If bno is beyond the last extent return false, and return an invalid
    916 * cursor value.
    917 */
    918bool
    919xfs_iext_lookup_extent(
    920	struct xfs_inode	*ip,
    921	struct xfs_ifork	*ifp,
    922	xfs_fileoff_t		offset,
    923	struct xfs_iext_cursor	*cur,
    924	struct xfs_bmbt_irec	*gotp)
    925{
    926	XFS_STATS_INC(ip->i_mount, xs_look_exlist);
    927
    928	cur->leaf = xfs_iext_find_level(ifp, offset, 1);
    929	if (!cur->leaf) {
    930		cur->pos = 0;
    931		return false;
    932	}
    933
    934	for (cur->pos = 0; cur->pos < xfs_iext_max_recs(ifp); cur->pos++) {
    935		struct xfs_iext_rec *rec = cur_rec(cur);
    936
    937		if (xfs_iext_rec_is_empty(rec))
    938			break;
    939		if (xfs_iext_rec_cmp(rec, offset) >= 0)
    940			goto found;
    941	}
    942
    943	/* Try looking in the next node for an entry > offset */
    944	if (ifp->if_height == 1 || !cur->leaf->next)
    945		return false;
    946	cur->leaf = cur->leaf->next;
    947	cur->pos = 0;
    948	if (!xfs_iext_valid(ifp, cur))
    949		return false;
    950found:
    951	xfs_iext_get(gotp, cur_rec(cur));
    952	return true;
    953}
    954
    955/*
    956 * Returns the last extent before end, and if this extent doesn't cover
    957 * end, update end to the end of the extent.
    958 */
    959bool
    960xfs_iext_lookup_extent_before(
    961	struct xfs_inode	*ip,
    962	struct xfs_ifork	*ifp,
    963	xfs_fileoff_t		*end,
    964	struct xfs_iext_cursor	*cur,
    965	struct xfs_bmbt_irec	*gotp)
    966{
    967	/* could be optimized to not even look up the next on a match.. */
    968	if (xfs_iext_lookup_extent(ip, ifp, *end - 1, cur, gotp) &&
    969	    gotp->br_startoff <= *end - 1)
    970		return true;
    971	if (!xfs_iext_prev_extent(ifp, cur, gotp))
    972		return false;
    973	*end = gotp->br_startoff + gotp->br_blockcount;
    974	return true;
    975}
    976
    977void
    978xfs_iext_update_extent(
    979	struct xfs_inode	*ip,
    980	int			state,
    981	struct xfs_iext_cursor	*cur,
    982	struct xfs_bmbt_irec	*new)
    983{
    984	struct xfs_ifork	*ifp = xfs_iext_state_to_fork(ip, state);
    985
    986	xfs_iext_inc_seq(ifp);
    987
    988	if (cur->pos == 0) {
    989		struct xfs_bmbt_irec	old;
    990
    991		xfs_iext_get(&old, cur_rec(cur));
    992		if (new->br_startoff != old.br_startoff) {
    993			xfs_iext_update_node(ifp, old.br_startoff,
    994					new->br_startoff, 1, cur->leaf);
    995		}
    996	}
    997
    998	trace_xfs_bmap_pre_update(ip, cur, state, _RET_IP_);
    999	xfs_iext_set(cur_rec(cur), new);
   1000	trace_xfs_bmap_post_update(ip, cur, state, _RET_IP_);
   1001}
   1002
   1003/*
   1004 * Return true if the cursor points at an extent and return the extent structure
   1005 * in gotp.  Else return false.
   1006 */
   1007bool
   1008xfs_iext_get_extent(
   1009	struct xfs_ifork	*ifp,
   1010	struct xfs_iext_cursor	*cur,
   1011	struct xfs_bmbt_irec	*gotp)
   1012{
   1013	if (!xfs_iext_valid(ifp, cur))
   1014		return false;
   1015	xfs_iext_get(gotp, cur_rec(cur));
   1016	return true;
   1017}
   1018
   1019/*
   1020 * This is a recursive function, because of that we need to be extremely
   1021 * careful with stack usage.
   1022 */
   1023static void
   1024xfs_iext_destroy_node(
   1025	struct xfs_iext_node	*node,
   1026	int			level)
   1027{
   1028	int			i;
   1029
   1030	if (level > 1) {
   1031		for (i = 0; i < KEYS_PER_NODE; i++) {
   1032			if (node->keys[i] == XFS_IEXT_KEY_INVALID)
   1033				break;
   1034			xfs_iext_destroy_node(node->ptrs[i], level - 1);
   1035		}
   1036	}
   1037
   1038	kmem_free(node);
   1039}
   1040
   1041void
   1042xfs_iext_destroy(
   1043	struct xfs_ifork	*ifp)
   1044{
   1045	xfs_iext_destroy_node(ifp->if_u1.if_root, ifp->if_height);
   1046
   1047	ifp->if_bytes = 0;
   1048	ifp->if_height = 0;
   1049	ifp->if_u1.if_root = NULL;
   1050}