cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

xfs_rmap_btree.c (18162B)


      1// SPDX-License-Identifier: GPL-2.0
      2/*
      3 * Copyright (c) 2014 Red Hat, Inc.
      4 * All Rights Reserved.
      5 */
      6#include "xfs.h"
      7#include "xfs_fs.h"
      8#include "xfs_shared.h"
      9#include "xfs_format.h"
     10#include "xfs_log_format.h"
     11#include "xfs_trans_resv.h"
     12#include "xfs_mount.h"
     13#include "xfs_trans.h"
     14#include "xfs_alloc.h"
     15#include "xfs_btree.h"
     16#include "xfs_btree_staging.h"
     17#include "xfs_rmap.h"
     18#include "xfs_rmap_btree.h"
     19#include "xfs_trace.h"
     20#include "xfs_error.h"
     21#include "xfs_extent_busy.h"
     22#include "xfs_ag.h"
     23#include "xfs_ag_resv.h"
     24
     25static struct kmem_cache	*xfs_rmapbt_cur_cache;
     26
     27/*
     28 * Reverse map btree.
     29 *
     30 * This is a per-ag tree used to track the owner(s) of a given extent. With
     31 * reflink it is possible for there to be multiple owners, which is a departure
     32 * from classic XFS. Owner records for data extents are inserted when the
     33 * extent is mapped and removed when an extent is unmapped.  Owner records for
     34 * all other block types (i.e. metadata) are inserted when an extent is
     35 * allocated and removed when an extent is freed. There can only be one owner
     36 * of a metadata extent, usually an inode or some other metadata structure like
     37 * an AG btree.
     38 *
     39 * The rmap btree is part of the free space management, so blocks for the tree
     40 * are sourced from the agfl. Hence we need transaction reservation support for
     41 * this tree so that the freelist is always large enough. This also impacts on
     42 * the minimum space we need to leave free in the AG.
     43 *
     44 * The tree is ordered by [ag block, owner, offset]. This is a large key size,
     45 * but it is the only way to enforce unique keys when a block can be owned by
     46 * multiple files at any offset. There's no need to order/search by extent
     47 * size for online updating/management of the tree. It is intended that most
     48 * reverse lookups will be to find the owner(s) of a particular block, or to
     49 * try to recover tree and file data from corrupt primary metadata.
     50 */
     51
     52static struct xfs_btree_cur *
     53xfs_rmapbt_dup_cursor(
     54	struct xfs_btree_cur	*cur)
     55{
     56	return xfs_rmapbt_init_cursor(cur->bc_mp, cur->bc_tp,
     57				cur->bc_ag.agbp, cur->bc_ag.pag);
     58}
     59
     60STATIC void
     61xfs_rmapbt_set_root(
     62	struct xfs_btree_cur		*cur,
     63	const union xfs_btree_ptr	*ptr,
     64	int				inc)
     65{
     66	struct xfs_buf		*agbp = cur->bc_ag.agbp;
     67	struct xfs_agf		*agf = agbp->b_addr;
     68	int			btnum = cur->bc_btnum;
     69
     70	ASSERT(ptr->s != 0);
     71
     72	agf->agf_roots[btnum] = ptr->s;
     73	be32_add_cpu(&agf->agf_levels[btnum], inc);
     74	cur->bc_ag.pag->pagf_levels[btnum] += inc;
     75
     76	xfs_alloc_log_agf(cur->bc_tp, agbp, XFS_AGF_ROOTS | XFS_AGF_LEVELS);
     77}
     78
     79STATIC int
     80xfs_rmapbt_alloc_block(
     81	struct xfs_btree_cur		*cur,
     82	const union xfs_btree_ptr	*start,
     83	union xfs_btree_ptr		*new,
     84	int				*stat)
     85{
     86	struct xfs_buf		*agbp = cur->bc_ag.agbp;
     87	struct xfs_agf		*agf = agbp->b_addr;
     88	struct xfs_perag	*pag = cur->bc_ag.pag;
     89	int			error;
     90	xfs_agblock_t		bno;
     91
     92	/* Allocate the new block from the freelist. If we can't, give up.  */
     93	error = xfs_alloc_get_freelist(cur->bc_tp, cur->bc_ag.agbp,
     94				       &bno, 1);
     95	if (error)
     96		return error;
     97
     98	trace_xfs_rmapbt_alloc_block(cur->bc_mp, pag->pag_agno, bno, 1);
     99	if (bno == NULLAGBLOCK) {
    100		*stat = 0;
    101		return 0;
    102	}
    103
    104	xfs_extent_busy_reuse(cur->bc_mp, pag, bno, 1, false);
    105
    106	new->s = cpu_to_be32(bno);
    107	be32_add_cpu(&agf->agf_rmap_blocks, 1);
    108	xfs_alloc_log_agf(cur->bc_tp, agbp, XFS_AGF_RMAP_BLOCKS);
    109
    110	xfs_ag_resv_rmapbt_alloc(cur->bc_mp, pag->pag_agno);
    111
    112	*stat = 1;
    113	return 0;
    114}
    115
    116STATIC int
    117xfs_rmapbt_free_block(
    118	struct xfs_btree_cur	*cur,
    119	struct xfs_buf		*bp)
    120{
    121	struct xfs_buf		*agbp = cur->bc_ag.agbp;
    122	struct xfs_agf		*agf = agbp->b_addr;
    123	struct xfs_perag	*pag = cur->bc_ag.pag;
    124	xfs_agblock_t		bno;
    125	int			error;
    126
    127	bno = xfs_daddr_to_agbno(cur->bc_mp, xfs_buf_daddr(bp));
    128	trace_xfs_rmapbt_free_block(cur->bc_mp, pag->pag_agno,
    129			bno, 1);
    130	be32_add_cpu(&agf->agf_rmap_blocks, -1);
    131	xfs_alloc_log_agf(cur->bc_tp, agbp, XFS_AGF_RMAP_BLOCKS);
    132	error = xfs_alloc_put_freelist(cur->bc_tp, agbp, NULL, bno, 1);
    133	if (error)
    134		return error;
    135
    136	xfs_extent_busy_insert(cur->bc_tp, pag, bno, 1,
    137			      XFS_EXTENT_BUSY_SKIP_DISCARD);
    138
    139	xfs_ag_resv_free_extent(pag, XFS_AG_RESV_RMAPBT, NULL, 1);
    140	return 0;
    141}
    142
    143STATIC int
    144xfs_rmapbt_get_minrecs(
    145	struct xfs_btree_cur	*cur,
    146	int			level)
    147{
    148	return cur->bc_mp->m_rmap_mnr[level != 0];
    149}
    150
    151STATIC int
    152xfs_rmapbt_get_maxrecs(
    153	struct xfs_btree_cur	*cur,
    154	int			level)
    155{
    156	return cur->bc_mp->m_rmap_mxr[level != 0];
    157}
    158
    159STATIC void
    160xfs_rmapbt_init_key_from_rec(
    161	union xfs_btree_key		*key,
    162	const union xfs_btree_rec	*rec)
    163{
    164	key->rmap.rm_startblock = rec->rmap.rm_startblock;
    165	key->rmap.rm_owner = rec->rmap.rm_owner;
    166	key->rmap.rm_offset = rec->rmap.rm_offset;
    167}
    168
    169/*
    170 * The high key for a reverse mapping record can be computed by shifting
    171 * the startblock and offset to the highest value that would still map
    172 * to that record.  In practice this means that we add blockcount-1 to
    173 * the startblock for all records, and if the record is for a data/attr
    174 * fork mapping, we add blockcount-1 to the offset too.
    175 */
    176STATIC void
    177xfs_rmapbt_init_high_key_from_rec(
    178	union xfs_btree_key		*key,
    179	const union xfs_btree_rec	*rec)
    180{
    181	uint64_t			off;
    182	int				adj;
    183
    184	adj = be32_to_cpu(rec->rmap.rm_blockcount) - 1;
    185
    186	key->rmap.rm_startblock = rec->rmap.rm_startblock;
    187	be32_add_cpu(&key->rmap.rm_startblock, adj);
    188	key->rmap.rm_owner = rec->rmap.rm_owner;
    189	key->rmap.rm_offset = rec->rmap.rm_offset;
    190	if (XFS_RMAP_NON_INODE_OWNER(be64_to_cpu(rec->rmap.rm_owner)) ||
    191	    XFS_RMAP_IS_BMBT_BLOCK(be64_to_cpu(rec->rmap.rm_offset)))
    192		return;
    193	off = be64_to_cpu(key->rmap.rm_offset);
    194	off = (XFS_RMAP_OFF(off) + adj) | (off & ~XFS_RMAP_OFF_MASK);
    195	key->rmap.rm_offset = cpu_to_be64(off);
    196}
    197
    198STATIC void
    199xfs_rmapbt_init_rec_from_cur(
    200	struct xfs_btree_cur	*cur,
    201	union xfs_btree_rec	*rec)
    202{
    203	rec->rmap.rm_startblock = cpu_to_be32(cur->bc_rec.r.rm_startblock);
    204	rec->rmap.rm_blockcount = cpu_to_be32(cur->bc_rec.r.rm_blockcount);
    205	rec->rmap.rm_owner = cpu_to_be64(cur->bc_rec.r.rm_owner);
    206	rec->rmap.rm_offset = cpu_to_be64(
    207			xfs_rmap_irec_offset_pack(&cur->bc_rec.r));
    208}
    209
    210STATIC void
    211xfs_rmapbt_init_ptr_from_cur(
    212	struct xfs_btree_cur	*cur,
    213	union xfs_btree_ptr	*ptr)
    214{
    215	struct xfs_agf		*agf = cur->bc_ag.agbp->b_addr;
    216
    217	ASSERT(cur->bc_ag.pag->pag_agno == be32_to_cpu(agf->agf_seqno));
    218
    219	ptr->s = agf->agf_roots[cur->bc_btnum];
    220}
    221
    222STATIC int64_t
    223xfs_rmapbt_key_diff(
    224	struct xfs_btree_cur		*cur,
    225	const union xfs_btree_key	*key)
    226{
    227	struct xfs_rmap_irec		*rec = &cur->bc_rec.r;
    228	const struct xfs_rmap_key	*kp = &key->rmap;
    229	__u64				x, y;
    230	int64_t				d;
    231
    232	d = (int64_t)be32_to_cpu(kp->rm_startblock) - rec->rm_startblock;
    233	if (d)
    234		return d;
    235
    236	x = be64_to_cpu(kp->rm_owner);
    237	y = rec->rm_owner;
    238	if (x > y)
    239		return 1;
    240	else if (y > x)
    241		return -1;
    242
    243	x = XFS_RMAP_OFF(be64_to_cpu(kp->rm_offset));
    244	y = rec->rm_offset;
    245	if (x > y)
    246		return 1;
    247	else if (y > x)
    248		return -1;
    249	return 0;
    250}
    251
    252STATIC int64_t
    253xfs_rmapbt_diff_two_keys(
    254	struct xfs_btree_cur		*cur,
    255	const union xfs_btree_key	*k1,
    256	const union xfs_btree_key	*k2)
    257{
    258	const struct xfs_rmap_key	*kp1 = &k1->rmap;
    259	const struct xfs_rmap_key	*kp2 = &k2->rmap;
    260	int64_t				d;
    261	__u64				x, y;
    262
    263	d = (int64_t)be32_to_cpu(kp1->rm_startblock) -
    264		       be32_to_cpu(kp2->rm_startblock);
    265	if (d)
    266		return d;
    267
    268	x = be64_to_cpu(kp1->rm_owner);
    269	y = be64_to_cpu(kp2->rm_owner);
    270	if (x > y)
    271		return 1;
    272	else if (y > x)
    273		return -1;
    274
    275	x = XFS_RMAP_OFF(be64_to_cpu(kp1->rm_offset));
    276	y = XFS_RMAP_OFF(be64_to_cpu(kp2->rm_offset));
    277	if (x > y)
    278		return 1;
    279	else if (y > x)
    280		return -1;
    281	return 0;
    282}
    283
    284static xfs_failaddr_t
    285xfs_rmapbt_verify(
    286	struct xfs_buf		*bp)
    287{
    288	struct xfs_mount	*mp = bp->b_mount;
    289	struct xfs_btree_block	*block = XFS_BUF_TO_BLOCK(bp);
    290	struct xfs_perag	*pag = bp->b_pag;
    291	xfs_failaddr_t		fa;
    292	unsigned int		level;
    293
    294	/*
    295	 * magic number and level verification
    296	 *
    297	 * During growfs operations, we can't verify the exact level or owner as
    298	 * the perag is not fully initialised and hence not attached to the
    299	 * buffer.  In this case, check against the maximum tree depth.
    300	 *
    301	 * Similarly, during log recovery we will have a perag structure
    302	 * attached, but the agf information will not yet have been initialised
    303	 * from the on disk AGF. Again, we can only check against maximum limits
    304	 * in this case.
    305	 */
    306	if (!xfs_verify_magic(bp, block->bb_magic))
    307		return __this_address;
    308
    309	if (!xfs_has_rmapbt(mp))
    310		return __this_address;
    311	fa = xfs_btree_sblock_v5hdr_verify(bp);
    312	if (fa)
    313		return fa;
    314
    315	level = be16_to_cpu(block->bb_level);
    316	if (pag && pag->pagf_init) {
    317		if (level >= pag->pagf_levels[XFS_BTNUM_RMAPi])
    318			return __this_address;
    319	} else if (level >= mp->m_rmap_maxlevels)
    320		return __this_address;
    321
    322	return xfs_btree_sblock_verify(bp, mp->m_rmap_mxr[level != 0]);
    323}
    324
    325static void
    326xfs_rmapbt_read_verify(
    327	struct xfs_buf	*bp)
    328{
    329	xfs_failaddr_t	fa;
    330
    331	if (!xfs_btree_sblock_verify_crc(bp))
    332		xfs_verifier_error(bp, -EFSBADCRC, __this_address);
    333	else {
    334		fa = xfs_rmapbt_verify(bp);
    335		if (fa)
    336			xfs_verifier_error(bp, -EFSCORRUPTED, fa);
    337	}
    338
    339	if (bp->b_error)
    340		trace_xfs_btree_corrupt(bp, _RET_IP_);
    341}
    342
    343static void
    344xfs_rmapbt_write_verify(
    345	struct xfs_buf	*bp)
    346{
    347	xfs_failaddr_t	fa;
    348
    349	fa = xfs_rmapbt_verify(bp);
    350	if (fa) {
    351		trace_xfs_btree_corrupt(bp, _RET_IP_);
    352		xfs_verifier_error(bp, -EFSCORRUPTED, fa);
    353		return;
    354	}
    355	xfs_btree_sblock_calc_crc(bp);
    356
    357}
    358
    359const struct xfs_buf_ops xfs_rmapbt_buf_ops = {
    360	.name			= "xfs_rmapbt",
    361	.magic			= { 0, cpu_to_be32(XFS_RMAP_CRC_MAGIC) },
    362	.verify_read		= xfs_rmapbt_read_verify,
    363	.verify_write		= xfs_rmapbt_write_verify,
    364	.verify_struct		= xfs_rmapbt_verify,
    365};
    366
    367STATIC int
    368xfs_rmapbt_keys_inorder(
    369	struct xfs_btree_cur		*cur,
    370	const union xfs_btree_key	*k1,
    371	const union xfs_btree_key	*k2)
    372{
    373	uint32_t		x;
    374	uint32_t		y;
    375	uint64_t		a;
    376	uint64_t		b;
    377
    378	x = be32_to_cpu(k1->rmap.rm_startblock);
    379	y = be32_to_cpu(k2->rmap.rm_startblock);
    380	if (x < y)
    381		return 1;
    382	else if (x > y)
    383		return 0;
    384	a = be64_to_cpu(k1->rmap.rm_owner);
    385	b = be64_to_cpu(k2->rmap.rm_owner);
    386	if (a < b)
    387		return 1;
    388	else if (a > b)
    389		return 0;
    390	a = XFS_RMAP_OFF(be64_to_cpu(k1->rmap.rm_offset));
    391	b = XFS_RMAP_OFF(be64_to_cpu(k2->rmap.rm_offset));
    392	if (a <= b)
    393		return 1;
    394	return 0;
    395}
    396
    397STATIC int
    398xfs_rmapbt_recs_inorder(
    399	struct xfs_btree_cur		*cur,
    400	const union xfs_btree_rec	*r1,
    401	const union xfs_btree_rec	*r2)
    402{
    403	uint32_t		x;
    404	uint32_t		y;
    405	uint64_t		a;
    406	uint64_t		b;
    407
    408	x = be32_to_cpu(r1->rmap.rm_startblock);
    409	y = be32_to_cpu(r2->rmap.rm_startblock);
    410	if (x < y)
    411		return 1;
    412	else if (x > y)
    413		return 0;
    414	a = be64_to_cpu(r1->rmap.rm_owner);
    415	b = be64_to_cpu(r2->rmap.rm_owner);
    416	if (a < b)
    417		return 1;
    418	else if (a > b)
    419		return 0;
    420	a = XFS_RMAP_OFF(be64_to_cpu(r1->rmap.rm_offset));
    421	b = XFS_RMAP_OFF(be64_to_cpu(r2->rmap.rm_offset));
    422	if (a <= b)
    423		return 1;
    424	return 0;
    425}
    426
    427static const struct xfs_btree_ops xfs_rmapbt_ops = {
    428	.rec_len		= sizeof(struct xfs_rmap_rec),
    429	.key_len		= 2 * sizeof(struct xfs_rmap_key),
    430
    431	.dup_cursor		= xfs_rmapbt_dup_cursor,
    432	.set_root		= xfs_rmapbt_set_root,
    433	.alloc_block		= xfs_rmapbt_alloc_block,
    434	.free_block		= xfs_rmapbt_free_block,
    435	.get_minrecs		= xfs_rmapbt_get_minrecs,
    436	.get_maxrecs		= xfs_rmapbt_get_maxrecs,
    437	.init_key_from_rec	= xfs_rmapbt_init_key_from_rec,
    438	.init_high_key_from_rec	= xfs_rmapbt_init_high_key_from_rec,
    439	.init_rec_from_cur	= xfs_rmapbt_init_rec_from_cur,
    440	.init_ptr_from_cur	= xfs_rmapbt_init_ptr_from_cur,
    441	.key_diff		= xfs_rmapbt_key_diff,
    442	.buf_ops		= &xfs_rmapbt_buf_ops,
    443	.diff_two_keys		= xfs_rmapbt_diff_two_keys,
    444	.keys_inorder		= xfs_rmapbt_keys_inorder,
    445	.recs_inorder		= xfs_rmapbt_recs_inorder,
    446};
    447
    448static struct xfs_btree_cur *
    449xfs_rmapbt_init_common(
    450	struct xfs_mount	*mp,
    451	struct xfs_trans	*tp,
    452	struct xfs_perag	*pag)
    453{
    454	struct xfs_btree_cur	*cur;
    455
    456	/* Overlapping btree; 2 keys per pointer. */
    457	cur = xfs_btree_alloc_cursor(mp, tp, XFS_BTNUM_RMAP,
    458			mp->m_rmap_maxlevels, xfs_rmapbt_cur_cache);
    459	cur->bc_flags = XFS_BTREE_CRC_BLOCKS | XFS_BTREE_OVERLAPPING;
    460	cur->bc_statoff = XFS_STATS_CALC_INDEX(xs_rmap_2);
    461	cur->bc_ops = &xfs_rmapbt_ops;
    462
    463	/* take a reference for the cursor */
    464	atomic_inc(&pag->pag_ref);
    465	cur->bc_ag.pag = pag;
    466
    467	return cur;
    468}
    469
    470/* Create a new reverse mapping btree cursor. */
    471struct xfs_btree_cur *
    472xfs_rmapbt_init_cursor(
    473	struct xfs_mount	*mp,
    474	struct xfs_trans	*tp,
    475	struct xfs_buf		*agbp,
    476	struct xfs_perag	*pag)
    477{
    478	struct xfs_agf		*agf = agbp->b_addr;
    479	struct xfs_btree_cur	*cur;
    480
    481	cur = xfs_rmapbt_init_common(mp, tp, pag);
    482	cur->bc_nlevels = be32_to_cpu(agf->agf_levels[XFS_BTNUM_RMAP]);
    483	cur->bc_ag.agbp = agbp;
    484	return cur;
    485}
    486
    487/* Create a new reverse mapping btree cursor with a fake root for staging. */
    488struct xfs_btree_cur *
    489xfs_rmapbt_stage_cursor(
    490	struct xfs_mount	*mp,
    491	struct xbtree_afakeroot	*afake,
    492	struct xfs_perag	*pag)
    493{
    494	struct xfs_btree_cur	*cur;
    495
    496	cur = xfs_rmapbt_init_common(mp, NULL, pag);
    497	xfs_btree_stage_afakeroot(cur, afake);
    498	return cur;
    499}
    500
    501/*
    502 * Install a new reverse mapping btree root.  Caller is responsible for
    503 * invalidating and freeing the old btree blocks.
    504 */
    505void
    506xfs_rmapbt_commit_staged_btree(
    507	struct xfs_btree_cur	*cur,
    508	struct xfs_trans	*tp,
    509	struct xfs_buf		*agbp)
    510{
    511	struct xfs_agf		*agf = agbp->b_addr;
    512	struct xbtree_afakeroot	*afake = cur->bc_ag.afake;
    513
    514	ASSERT(cur->bc_flags & XFS_BTREE_STAGING);
    515
    516	agf->agf_roots[cur->bc_btnum] = cpu_to_be32(afake->af_root);
    517	agf->agf_levels[cur->bc_btnum] = cpu_to_be32(afake->af_levels);
    518	agf->agf_rmap_blocks = cpu_to_be32(afake->af_blocks);
    519	xfs_alloc_log_agf(tp, agbp, XFS_AGF_ROOTS | XFS_AGF_LEVELS |
    520				    XFS_AGF_RMAP_BLOCKS);
    521	xfs_btree_commit_afakeroot(cur, tp, agbp, &xfs_rmapbt_ops);
    522}
    523
    524/* Calculate number of records in a reverse mapping btree block. */
    525static inline unsigned int
    526xfs_rmapbt_block_maxrecs(
    527	unsigned int		blocklen,
    528	bool			leaf)
    529{
    530	if (leaf)
    531		return blocklen / sizeof(struct xfs_rmap_rec);
    532	return blocklen /
    533		(2 * sizeof(struct xfs_rmap_key) + sizeof(xfs_rmap_ptr_t));
    534}
    535
    536/*
    537 * Calculate number of records in an rmap btree block.
    538 */
    539int
    540xfs_rmapbt_maxrecs(
    541	int			blocklen,
    542	int			leaf)
    543{
    544	blocklen -= XFS_RMAP_BLOCK_LEN;
    545	return xfs_rmapbt_block_maxrecs(blocklen, leaf);
    546}
    547
    548/* Compute the max possible height for reverse mapping btrees. */
    549unsigned int
    550xfs_rmapbt_maxlevels_ondisk(void)
    551{
    552	unsigned int		minrecs[2];
    553	unsigned int		blocklen;
    554
    555	blocklen = XFS_MIN_CRC_BLOCKSIZE - XFS_BTREE_SBLOCK_CRC_LEN;
    556
    557	minrecs[0] = xfs_rmapbt_block_maxrecs(blocklen, true) / 2;
    558	minrecs[1] = xfs_rmapbt_block_maxrecs(blocklen, false) / 2;
    559
    560	/*
    561	 * Compute the asymptotic maxlevels for an rmapbt on any reflink fs.
    562	 *
    563	 * On a reflink filesystem, each AG block can have up to 2^32 (per the
    564	 * refcount record format) owners, which means that theoretically we
    565	 * could face up to 2^64 rmap records.  However, we're likely to run
    566	 * out of blocks in the AG long before that happens, which means that
    567	 * we must compute the max height based on what the btree will look
    568	 * like if it consumes almost all the blocks in the AG due to maximal
    569	 * sharing factor.
    570	 */
    571	return xfs_btree_space_to_height(minrecs, XFS_MAX_CRC_AG_BLOCKS);
    572}
    573
    574/* Compute the maximum height of an rmap btree. */
    575void
    576xfs_rmapbt_compute_maxlevels(
    577	struct xfs_mount		*mp)
    578{
    579	if (!xfs_has_rmapbt(mp)) {
    580		mp->m_rmap_maxlevels = 0;
    581		return;
    582	}
    583
    584	if (xfs_has_reflink(mp)) {
    585		/*
    586		 * Compute the asymptotic maxlevels for an rmap btree on a
    587		 * filesystem that supports reflink.
    588		 *
    589		 * On a reflink filesystem, each AG block can have up to 2^32
    590		 * (per the refcount record format) owners, which means that
    591		 * theoretically we could face up to 2^64 rmap records.
    592		 * However, we're likely to run out of blocks in the AG long
    593		 * before that happens, which means that we must compute the
    594		 * max height based on what the btree will look like if it
    595		 * consumes almost all the blocks in the AG due to maximal
    596		 * sharing factor.
    597		 */
    598		mp->m_rmap_maxlevels = xfs_btree_space_to_height(mp->m_rmap_mnr,
    599				mp->m_sb.sb_agblocks);
    600	} else {
    601		/*
    602		 * If there's no block sharing, compute the maximum rmapbt
    603		 * height assuming one rmap record per AG block.
    604		 */
    605		mp->m_rmap_maxlevels = xfs_btree_compute_maxlevels(
    606				mp->m_rmap_mnr, mp->m_sb.sb_agblocks);
    607	}
    608	ASSERT(mp->m_rmap_maxlevels <= xfs_rmapbt_maxlevels_ondisk());
    609}
    610
    611/* Calculate the refcount btree size for some records. */
    612xfs_extlen_t
    613xfs_rmapbt_calc_size(
    614	struct xfs_mount	*mp,
    615	unsigned long long	len)
    616{
    617	return xfs_btree_calc_size(mp->m_rmap_mnr, len);
    618}
    619
    620/*
    621 * Calculate the maximum refcount btree size.
    622 */
    623xfs_extlen_t
    624xfs_rmapbt_max_size(
    625	struct xfs_mount	*mp,
    626	xfs_agblock_t		agblocks)
    627{
    628	/* Bail out if we're uninitialized, which can happen in mkfs. */
    629	if (mp->m_rmap_mxr[0] == 0)
    630		return 0;
    631
    632	return xfs_rmapbt_calc_size(mp, agblocks);
    633}
    634
    635/*
    636 * Figure out how many blocks to reserve and how many are used by this btree.
    637 */
    638int
    639xfs_rmapbt_calc_reserves(
    640	struct xfs_mount	*mp,
    641	struct xfs_trans	*tp,
    642	struct xfs_perag	*pag,
    643	xfs_extlen_t		*ask,
    644	xfs_extlen_t		*used)
    645{
    646	struct xfs_buf		*agbp;
    647	struct xfs_agf		*agf;
    648	xfs_agblock_t		agblocks;
    649	xfs_extlen_t		tree_len;
    650	int			error;
    651
    652	if (!xfs_has_rmapbt(mp))
    653		return 0;
    654
    655	error = xfs_alloc_read_agf(mp, tp, pag->pag_agno, 0, &agbp);
    656	if (error)
    657		return error;
    658
    659	agf = agbp->b_addr;
    660	agblocks = be32_to_cpu(agf->agf_length);
    661	tree_len = be32_to_cpu(agf->agf_rmap_blocks);
    662	xfs_trans_brelse(tp, agbp);
    663
    664	/*
    665	 * The log is permanently allocated, so the space it occupies will
    666	 * never be available for the kinds of things that would require btree
    667	 * expansion.  We therefore can pretend the space isn't there.
    668	 */
    669	if (mp->m_sb.sb_logstart &&
    670	    XFS_FSB_TO_AGNO(mp, mp->m_sb.sb_logstart) == pag->pag_agno)
    671		agblocks -= mp->m_sb.sb_logblocks;
    672
    673	/* Reserve 1% of the AG or enough for 1 block per record. */
    674	*ask += max(agblocks / 100, xfs_rmapbt_max_size(mp, agblocks));
    675	*used += tree_len;
    676
    677	return error;
    678}
    679
    680int __init
    681xfs_rmapbt_init_cur_cache(void)
    682{
    683	xfs_rmapbt_cur_cache = kmem_cache_create("xfs_rmapbt_cur",
    684			xfs_btree_cur_sizeof(xfs_rmapbt_maxlevels_ondisk()),
    685			0, 0, NULL);
    686
    687	if (!xfs_rmapbt_cur_cache)
    688		return -ENOMEM;
    689	return 0;
    690}
    691
    692void
    693xfs_rmapbt_destroy_cur_cache(void)
    694{
    695	kmem_cache_destroy(xfs_rmapbt_cur_cache);
    696	xfs_rmapbt_cur_cache = NULL;
    697}