cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

xfs_alloc.c (97214B)


      1// SPDX-License-Identifier: GPL-2.0
      2/*
      3 * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc.
      4 * All Rights Reserved.
      5 */
      6#include "xfs.h"
      7#include "xfs_fs.h"
      8#include "xfs_format.h"
      9#include "xfs_log_format.h"
     10#include "xfs_shared.h"
     11#include "xfs_trans_resv.h"
     12#include "xfs_bit.h"
     13#include "xfs_mount.h"
     14#include "xfs_defer.h"
     15#include "xfs_btree.h"
     16#include "xfs_rmap.h"
     17#include "xfs_alloc_btree.h"
     18#include "xfs_alloc.h"
     19#include "xfs_extent_busy.h"
     20#include "xfs_errortag.h"
     21#include "xfs_error.h"
     22#include "xfs_trace.h"
     23#include "xfs_trans.h"
     24#include "xfs_buf_item.h"
     25#include "xfs_log.h"
     26#include "xfs_ag.h"
     27#include "xfs_ag_resv.h"
     28#include "xfs_bmap.h"
     29
     30struct kmem_cache	*xfs_extfree_item_cache;
     31
     32struct workqueue_struct *xfs_alloc_wq;
     33
     34#define XFS_ABSDIFF(a,b)	(((a) <= (b)) ? ((b) - (a)) : ((a) - (b)))
     35
     36#define	XFSA_FIXUP_BNO_OK	1
     37#define	XFSA_FIXUP_CNT_OK	2
     38
     39STATIC int xfs_alloc_ag_vextent_exact(xfs_alloc_arg_t *);
     40STATIC int xfs_alloc_ag_vextent_near(xfs_alloc_arg_t *);
     41STATIC int xfs_alloc_ag_vextent_size(xfs_alloc_arg_t *);
     42
     43/*
     44 * Size of the AGFL.  For CRC-enabled filesystes we steal a couple of slots in
     45 * the beginning of the block for a proper header with the location information
     46 * and CRC.
     47 */
     48unsigned int
     49xfs_agfl_size(
     50	struct xfs_mount	*mp)
     51{
     52	unsigned int		size = mp->m_sb.sb_sectsize;
     53
     54	if (xfs_has_crc(mp))
     55		size -= sizeof(struct xfs_agfl);
     56
     57	return size / sizeof(xfs_agblock_t);
     58}
     59
     60unsigned int
     61xfs_refc_block(
     62	struct xfs_mount	*mp)
     63{
     64	if (xfs_has_rmapbt(mp))
     65		return XFS_RMAP_BLOCK(mp) + 1;
     66	if (xfs_has_finobt(mp))
     67		return XFS_FIBT_BLOCK(mp) + 1;
     68	return XFS_IBT_BLOCK(mp) + 1;
     69}
     70
     71xfs_extlen_t
     72xfs_prealloc_blocks(
     73	struct xfs_mount	*mp)
     74{
     75	if (xfs_has_reflink(mp))
     76		return xfs_refc_block(mp) + 1;
     77	if (xfs_has_rmapbt(mp))
     78		return XFS_RMAP_BLOCK(mp) + 1;
     79	if (xfs_has_finobt(mp))
     80		return XFS_FIBT_BLOCK(mp) + 1;
     81	return XFS_IBT_BLOCK(mp) + 1;
     82}
     83
     84/*
     85 * The number of blocks per AG that we withhold from xfs_mod_fdblocks to
     86 * guarantee that we can refill the AGFL prior to allocating space in a nearly
     87 * full AG.  Although the the space described by the free space btrees, the
     88 * blocks used by the freesp btrees themselves, and the blocks owned by the
     89 * AGFL are counted in the ondisk fdblocks, it's a mistake to let the ondisk
     90 * free space in the AG drop so low that the free space btrees cannot refill an
     91 * empty AGFL up to the minimum level.  Rather than grind through empty AGs
     92 * until the fs goes down, we subtract this many AG blocks from the incore
     93 * fdblocks to ensure user allocation does not overcommit the space the
     94 * filesystem needs for the AGFLs.  The rmap btree uses a per-AG reservation to
     95 * withhold space from xfs_mod_fdblocks, so we do not account for that here.
     96 */
     97#define XFS_ALLOCBT_AGFL_RESERVE	4
     98
     99/*
    100 * Compute the number of blocks that we set aside to guarantee the ability to
    101 * refill the AGFL and handle a full bmap btree split.
    102 *
    103 * In order to avoid ENOSPC-related deadlock caused by out-of-order locking of
    104 * AGF buffer (PV 947395), we place constraints on the relationship among
    105 * actual allocations for data blocks, freelist blocks, and potential file data
    106 * bmap btree blocks. However, these restrictions may result in no actual space
    107 * allocated for a delayed extent, for example, a data block in a certain AG is
    108 * allocated but there is no additional block for the additional bmap btree
    109 * block due to a split of the bmap btree of the file. The result of this may
    110 * lead to an infinite loop when the file gets flushed to disk and all delayed
    111 * extents need to be actually allocated. To get around this, we explicitly set
    112 * aside a few blocks which will not be reserved in delayed allocation.
    113 *
    114 * For each AG, we need to reserve enough blocks to replenish a totally empty
    115 * AGFL and 4 more to handle a potential split of the file's bmap btree.
    116 */
    117unsigned int
    118xfs_alloc_set_aside(
    119	struct xfs_mount	*mp)
    120{
    121	return mp->m_sb.sb_agcount * (XFS_ALLOCBT_AGFL_RESERVE + 4);
    122}
    123
    124/*
    125 * When deciding how much space to allocate out of an AG, we limit the
    126 * allocation maximum size to the size the AG. However, we cannot use all the
    127 * blocks in the AG - some are permanently used by metadata. These
    128 * blocks are generally:
    129 *	- the AG superblock, AGF, AGI and AGFL
    130 *	- the AGF (bno and cnt) and AGI btree root blocks, and optionally
    131 *	  the AGI free inode and rmap btree root blocks.
    132 *	- blocks on the AGFL according to xfs_alloc_set_aside() limits
    133 *	- the rmapbt root block
    134 *
    135 * The AG headers are sector sized, so the amount of space they take up is
    136 * dependent on filesystem geometry. The others are all single blocks.
    137 */
    138unsigned int
    139xfs_alloc_ag_max_usable(
    140	struct xfs_mount	*mp)
    141{
    142	unsigned int		blocks;
    143
    144	blocks = XFS_BB_TO_FSB(mp, XFS_FSS_TO_BB(mp, 4)); /* ag headers */
    145	blocks += XFS_ALLOCBT_AGFL_RESERVE;
    146	blocks += 3;			/* AGF, AGI btree root blocks */
    147	if (xfs_has_finobt(mp))
    148		blocks++;		/* finobt root block */
    149	if (xfs_has_rmapbt(mp))
    150		blocks++;		/* rmap root block */
    151	if (xfs_has_reflink(mp))
    152		blocks++;		/* refcount root block */
    153
    154	return mp->m_sb.sb_agblocks - blocks;
    155}
    156
    157/*
    158 * Lookup the record equal to [bno, len] in the btree given by cur.
    159 */
    160STATIC int				/* error */
    161xfs_alloc_lookup_eq(
    162	struct xfs_btree_cur	*cur,	/* btree cursor */
    163	xfs_agblock_t		bno,	/* starting block of extent */
    164	xfs_extlen_t		len,	/* length of extent */
    165	int			*stat)	/* success/failure */
    166{
    167	int			error;
    168
    169	cur->bc_rec.a.ar_startblock = bno;
    170	cur->bc_rec.a.ar_blockcount = len;
    171	error = xfs_btree_lookup(cur, XFS_LOOKUP_EQ, stat);
    172	cur->bc_ag.abt.active = (*stat == 1);
    173	return error;
    174}
    175
    176/*
    177 * Lookup the first record greater than or equal to [bno, len]
    178 * in the btree given by cur.
    179 */
    180int				/* error */
    181xfs_alloc_lookup_ge(
    182	struct xfs_btree_cur	*cur,	/* btree cursor */
    183	xfs_agblock_t		bno,	/* starting block of extent */
    184	xfs_extlen_t		len,	/* length of extent */
    185	int			*stat)	/* success/failure */
    186{
    187	int			error;
    188
    189	cur->bc_rec.a.ar_startblock = bno;
    190	cur->bc_rec.a.ar_blockcount = len;
    191	error = xfs_btree_lookup(cur, XFS_LOOKUP_GE, stat);
    192	cur->bc_ag.abt.active = (*stat == 1);
    193	return error;
    194}
    195
    196/*
    197 * Lookup the first record less than or equal to [bno, len]
    198 * in the btree given by cur.
    199 */
    200int					/* error */
    201xfs_alloc_lookup_le(
    202	struct xfs_btree_cur	*cur,	/* btree cursor */
    203	xfs_agblock_t		bno,	/* starting block of extent */
    204	xfs_extlen_t		len,	/* length of extent */
    205	int			*stat)	/* success/failure */
    206{
    207	int			error;
    208	cur->bc_rec.a.ar_startblock = bno;
    209	cur->bc_rec.a.ar_blockcount = len;
    210	error = xfs_btree_lookup(cur, XFS_LOOKUP_LE, stat);
    211	cur->bc_ag.abt.active = (*stat == 1);
    212	return error;
    213}
    214
    215static inline bool
    216xfs_alloc_cur_active(
    217	struct xfs_btree_cur	*cur)
    218{
    219	return cur && cur->bc_ag.abt.active;
    220}
    221
    222/*
    223 * Update the record referred to by cur to the value given
    224 * by [bno, len].
    225 * This either works (return 0) or gets an EFSCORRUPTED error.
    226 */
    227STATIC int				/* error */
    228xfs_alloc_update(
    229	struct xfs_btree_cur	*cur,	/* btree cursor */
    230	xfs_agblock_t		bno,	/* starting block of extent */
    231	xfs_extlen_t		len)	/* length of extent */
    232{
    233	union xfs_btree_rec	rec;
    234
    235	rec.alloc.ar_startblock = cpu_to_be32(bno);
    236	rec.alloc.ar_blockcount = cpu_to_be32(len);
    237	return xfs_btree_update(cur, &rec);
    238}
    239
    240/*
    241 * Get the data from the pointed-to record.
    242 */
    243int					/* error */
    244xfs_alloc_get_rec(
    245	struct xfs_btree_cur	*cur,	/* btree cursor */
    246	xfs_agblock_t		*bno,	/* output: starting block of extent */
    247	xfs_extlen_t		*len,	/* output: length of extent */
    248	int			*stat)	/* output: success/failure */
    249{
    250	struct xfs_mount	*mp = cur->bc_mp;
    251	xfs_agnumber_t		agno = cur->bc_ag.pag->pag_agno;
    252	union xfs_btree_rec	*rec;
    253	int			error;
    254
    255	error = xfs_btree_get_rec(cur, &rec, stat);
    256	if (error || !(*stat))
    257		return error;
    258
    259	*bno = be32_to_cpu(rec->alloc.ar_startblock);
    260	*len = be32_to_cpu(rec->alloc.ar_blockcount);
    261
    262	if (*len == 0)
    263		goto out_bad_rec;
    264
    265	/* check for valid extent range, including overflow */
    266	if (!xfs_verify_agbno(mp, agno, *bno))
    267		goto out_bad_rec;
    268	if (*bno > *bno + *len)
    269		goto out_bad_rec;
    270	if (!xfs_verify_agbno(mp, agno, *bno + *len - 1))
    271		goto out_bad_rec;
    272
    273	return 0;
    274
    275out_bad_rec:
    276	xfs_warn(mp,
    277		"%s Freespace BTree record corruption in AG %d detected!",
    278		cur->bc_btnum == XFS_BTNUM_BNO ? "Block" : "Size", agno);
    279	xfs_warn(mp,
    280		"start block 0x%x block count 0x%x", *bno, *len);
    281	return -EFSCORRUPTED;
    282}
    283
    284/*
    285 * Compute aligned version of the found extent.
    286 * Takes alignment and min length into account.
    287 */
    288STATIC bool
    289xfs_alloc_compute_aligned(
    290	xfs_alloc_arg_t	*args,		/* allocation argument structure */
    291	xfs_agblock_t	foundbno,	/* starting block in found extent */
    292	xfs_extlen_t	foundlen,	/* length in found extent */
    293	xfs_agblock_t	*resbno,	/* result block number */
    294	xfs_extlen_t	*reslen,	/* result length */
    295	unsigned	*busy_gen)
    296{
    297	xfs_agblock_t	bno = foundbno;
    298	xfs_extlen_t	len = foundlen;
    299	xfs_extlen_t	diff;
    300	bool		busy;
    301
    302	/* Trim busy sections out of found extent */
    303	busy = xfs_extent_busy_trim(args, &bno, &len, busy_gen);
    304
    305	/*
    306	 * If we have a largish extent that happens to start before min_agbno,
    307	 * see if we can shift it into range...
    308	 */
    309	if (bno < args->min_agbno && bno + len > args->min_agbno) {
    310		diff = args->min_agbno - bno;
    311		if (len > diff) {
    312			bno += diff;
    313			len -= diff;
    314		}
    315	}
    316
    317	if (args->alignment > 1 && len >= args->minlen) {
    318		xfs_agblock_t	aligned_bno = roundup(bno, args->alignment);
    319
    320		diff = aligned_bno - bno;
    321
    322		*resbno = aligned_bno;
    323		*reslen = diff >= len ? 0 : len - diff;
    324	} else {
    325		*resbno = bno;
    326		*reslen = len;
    327	}
    328
    329	return busy;
    330}
    331
    332/*
    333 * Compute best start block and diff for "near" allocations.
    334 * freelen >= wantlen already checked by caller.
    335 */
    336STATIC xfs_extlen_t			/* difference value (absolute) */
    337xfs_alloc_compute_diff(
    338	xfs_agblock_t	wantbno,	/* target starting block */
    339	xfs_extlen_t	wantlen,	/* target length */
    340	xfs_extlen_t	alignment,	/* target alignment */
    341	int		datatype,	/* are we allocating data? */
    342	xfs_agblock_t	freebno,	/* freespace's starting block */
    343	xfs_extlen_t	freelen,	/* freespace's length */
    344	xfs_agblock_t	*newbnop)	/* result: best start block from free */
    345{
    346	xfs_agblock_t	freeend;	/* end of freespace extent */
    347	xfs_agblock_t	newbno1;	/* return block number */
    348	xfs_agblock_t	newbno2;	/* other new block number */
    349	xfs_extlen_t	newlen1=0;	/* length with newbno1 */
    350	xfs_extlen_t	newlen2=0;	/* length with newbno2 */
    351	xfs_agblock_t	wantend;	/* end of target extent */
    352	bool		userdata = datatype & XFS_ALLOC_USERDATA;
    353
    354	ASSERT(freelen >= wantlen);
    355	freeend = freebno + freelen;
    356	wantend = wantbno + wantlen;
    357	/*
    358	 * We want to allocate from the start of a free extent if it is past
    359	 * the desired block or if we are allocating user data and the free
    360	 * extent is before desired block. The second case is there to allow
    361	 * for contiguous allocation from the remaining free space if the file
    362	 * grows in the short term.
    363	 */
    364	if (freebno >= wantbno || (userdata && freeend < wantend)) {
    365		if ((newbno1 = roundup(freebno, alignment)) >= freeend)
    366			newbno1 = NULLAGBLOCK;
    367	} else if (freeend >= wantend && alignment > 1) {
    368		newbno1 = roundup(wantbno, alignment);
    369		newbno2 = newbno1 - alignment;
    370		if (newbno1 >= freeend)
    371			newbno1 = NULLAGBLOCK;
    372		else
    373			newlen1 = XFS_EXTLEN_MIN(wantlen, freeend - newbno1);
    374		if (newbno2 < freebno)
    375			newbno2 = NULLAGBLOCK;
    376		else
    377			newlen2 = XFS_EXTLEN_MIN(wantlen, freeend - newbno2);
    378		if (newbno1 != NULLAGBLOCK && newbno2 != NULLAGBLOCK) {
    379			if (newlen1 < newlen2 ||
    380			    (newlen1 == newlen2 &&
    381			     XFS_ABSDIFF(newbno1, wantbno) >
    382			     XFS_ABSDIFF(newbno2, wantbno)))
    383				newbno1 = newbno2;
    384		} else if (newbno2 != NULLAGBLOCK)
    385			newbno1 = newbno2;
    386	} else if (freeend >= wantend) {
    387		newbno1 = wantbno;
    388	} else if (alignment > 1) {
    389		newbno1 = roundup(freeend - wantlen, alignment);
    390		if (newbno1 > freeend - wantlen &&
    391		    newbno1 - alignment >= freebno)
    392			newbno1 -= alignment;
    393		else if (newbno1 >= freeend)
    394			newbno1 = NULLAGBLOCK;
    395	} else
    396		newbno1 = freeend - wantlen;
    397	*newbnop = newbno1;
    398	return newbno1 == NULLAGBLOCK ? 0 : XFS_ABSDIFF(newbno1, wantbno);
    399}
    400
    401/*
    402 * Fix up the length, based on mod and prod.
    403 * len should be k * prod + mod for some k.
    404 * If len is too small it is returned unchanged.
    405 * If len hits maxlen it is left alone.
    406 */
    407STATIC void
    408xfs_alloc_fix_len(
    409	xfs_alloc_arg_t	*args)		/* allocation argument structure */
    410{
    411	xfs_extlen_t	k;
    412	xfs_extlen_t	rlen;
    413
    414	ASSERT(args->mod < args->prod);
    415	rlen = args->len;
    416	ASSERT(rlen >= args->minlen);
    417	ASSERT(rlen <= args->maxlen);
    418	if (args->prod <= 1 || rlen < args->mod || rlen == args->maxlen ||
    419	    (args->mod == 0 && rlen < args->prod))
    420		return;
    421	k = rlen % args->prod;
    422	if (k == args->mod)
    423		return;
    424	if (k > args->mod)
    425		rlen = rlen - (k - args->mod);
    426	else
    427		rlen = rlen - args->prod + (args->mod - k);
    428	/* casts to (int) catch length underflows */
    429	if ((int)rlen < (int)args->minlen)
    430		return;
    431	ASSERT(rlen >= args->minlen && rlen <= args->maxlen);
    432	ASSERT(rlen % args->prod == args->mod);
    433	ASSERT(args->pag->pagf_freeblks + args->pag->pagf_flcount >=
    434		rlen + args->minleft);
    435	args->len = rlen;
    436}
    437
    438/*
    439 * Update the two btrees, logically removing from freespace the extent
    440 * starting at rbno, rlen blocks.  The extent is contained within the
    441 * actual (current) free extent fbno for flen blocks.
    442 * Flags are passed in indicating whether the cursors are set to the
    443 * relevant records.
    444 */
    445STATIC int				/* error code */
    446xfs_alloc_fixup_trees(
    447	struct xfs_btree_cur *cnt_cur,	/* cursor for by-size btree */
    448	struct xfs_btree_cur *bno_cur,	/* cursor for by-block btree */
    449	xfs_agblock_t	fbno,		/* starting block of free extent */
    450	xfs_extlen_t	flen,		/* length of free extent */
    451	xfs_agblock_t	rbno,		/* starting block of returned extent */
    452	xfs_extlen_t	rlen,		/* length of returned extent */
    453	int		flags)		/* flags, XFSA_FIXUP_... */
    454{
    455	int		error;		/* error code */
    456	int		i;		/* operation results */
    457	xfs_agblock_t	nfbno1;		/* first new free startblock */
    458	xfs_agblock_t	nfbno2;		/* second new free startblock */
    459	xfs_extlen_t	nflen1=0;	/* first new free length */
    460	xfs_extlen_t	nflen2=0;	/* second new free length */
    461	struct xfs_mount *mp;
    462
    463	mp = cnt_cur->bc_mp;
    464
    465	/*
    466	 * Look up the record in the by-size tree if necessary.
    467	 */
    468	if (flags & XFSA_FIXUP_CNT_OK) {
    469#ifdef DEBUG
    470		if ((error = xfs_alloc_get_rec(cnt_cur, &nfbno1, &nflen1, &i)))
    471			return error;
    472		if (XFS_IS_CORRUPT(mp,
    473				   i != 1 ||
    474				   nfbno1 != fbno ||
    475				   nflen1 != flen))
    476			return -EFSCORRUPTED;
    477#endif
    478	} else {
    479		if ((error = xfs_alloc_lookup_eq(cnt_cur, fbno, flen, &i)))
    480			return error;
    481		if (XFS_IS_CORRUPT(mp, i != 1))
    482			return -EFSCORRUPTED;
    483	}
    484	/*
    485	 * Look up the record in the by-block tree if necessary.
    486	 */
    487	if (flags & XFSA_FIXUP_BNO_OK) {
    488#ifdef DEBUG
    489		if ((error = xfs_alloc_get_rec(bno_cur, &nfbno1, &nflen1, &i)))
    490			return error;
    491		if (XFS_IS_CORRUPT(mp,
    492				   i != 1 ||
    493				   nfbno1 != fbno ||
    494				   nflen1 != flen))
    495			return -EFSCORRUPTED;
    496#endif
    497	} else {
    498		if ((error = xfs_alloc_lookup_eq(bno_cur, fbno, flen, &i)))
    499			return error;
    500		if (XFS_IS_CORRUPT(mp, i != 1))
    501			return -EFSCORRUPTED;
    502	}
    503
    504#ifdef DEBUG
    505	if (bno_cur->bc_nlevels == 1 && cnt_cur->bc_nlevels == 1) {
    506		struct xfs_btree_block	*bnoblock;
    507		struct xfs_btree_block	*cntblock;
    508
    509		bnoblock = XFS_BUF_TO_BLOCK(bno_cur->bc_levels[0].bp);
    510		cntblock = XFS_BUF_TO_BLOCK(cnt_cur->bc_levels[0].bp);
    511
    512		if (XFS_IS_CORRUPT(mp,
    513				   bnoblock->bb_numrecs !=
    514				   cntblock->bb_numrecs))
    515			return -EFSCORRUPTED;
    516	}
    517#endif
    518
    519	/*
    520	 * Deal with all four cases: the allocated record is contained
    521	 * within the freespace record, so we can have new freespace
    522	 * at either (or both) end, or no freespace remaining.
    523	 */
    524	if (rbno == fbno && rlen == flen)
    525		nfbno1 = nfbno2 = NULLAGBLOCK;
    526	else if (rbno == fbno) {
    527		nfbno1 = rbno + rlen;
    528		nflen1 = flen - rlen;
    529		nfbno2 = NULLAGBLOCK;
    530	} else if (rbno + rlen == fbno + flen) {
    531		nfbno1 = fbno;
    532		nflen1 = flen - rlen;
    533		nfbno2 = NULLAGBLOCK;
    534	} else {
    535		nfbno1 = fbno;
    536		nflen1 = rbno - fbno;
    537		nfbno2 = rbno + rlen;
    538		nflen2 = (fbno + flen) - nfbno2;
    539	}
    540	/*
    541	 * Delete the entry from the by-size btree.
    542	 */
    543	if ((error = xfs_btree_delete(cnt_cur, &i)))
    544		return error;
    545	if (XFS_IS_CORRUPT(mp, i != 1))
    546		return -EFSCORRUPTED;
    547	/*
    548	 * Add new by-size btree entry(s).
    549	 */
    550	if (nfbno1 != NULLAGBLOCK) {
    551		if ((error = xfs_alloc_lookup_eq(cnt_cur, nfbno1, nflen1, &i)))
    552			return error;
    553		if (XFS_IS_CORRUPT(mp, i != 0))
    554			return -EFSCORRUPTED;
    555		if ((error = xfs_btree_insert(cnt_cur, &i)))
    556			return error;
    557		if (XFS_IS_CORRUPT(mp, i != 1))
    558			return -EFSCORRUPTED;
    559	}
    560	if (nfbno2 != NULLAGBLOCK) {
    561		if ((error = xfs_alloc_lookup_eq(cnt_cur, nfbno2, nflen2, &i)))
    562			return error;
    563		if (XFS_IS_CORRUPT(mp, i != 0))
    564			return -EFSCORRUPTED;
    565		if ((error = xfs_btree_insert(cnt_cur, &i)))
    566			return error;
    567		if (XFS_IS_CORRUPT(mp, i != 1))
    568			return -EFSCORRUPTED;
    569	}
    570	/*
    571	 * Fix up the by-block btree entry(s).
    572	 */
    573	if (nfbno1 == NULLAGBLOCK) {
    574		/*
    575		 * No remaining freespace, just delete the by-block tree entry.
    576		 */
    577		if ((error = xfs_btree_delete(bno_cur, &i)))
    578			return error;
    579		if (XFS_IS_CORRUPT(mp, i != 1))
    580			return -EFSCORRUPTED;
    581	} else {
    582		/*
    583		 * Update the by-block entry to start later|be shorter.
    584		 */
    585		if ((error = xfs_alloc_update(bno_cur, nfbno1, nflen1)))
    586			return error;
    587	}
    588	if (nfbno2 != NULLAGBLOCK) {
    589		/*
    590		 * 2 resulting free entries, need to add one.
    591		 */
    592		if ((error = xfs_alloc_lookup_eq(bno_cur, nfbno2, nflen2, &i)))
    593			return error;
    594		if (XFS_IS_CORRUPT(mp, i != 0))
    595			return -EFSCORRUPTED;
    596		if ((error = xfs_btree_insert(bno_cur, &i)))
    597			return error;
    598		if (XFS_IS_CORRUPT(mp, i != 1))
    599			return -EFSCORRUPTED;
    600	}
    601	return 0;
    602}
    603
    604static xfs_failaddr_t
    605xfs_agfl_verify(
    606	struct xfs_buf	*bp)
    607{
    608	struct xfs_mount *mp = bp->b_mount;
    609	struct xfs_agfl	*agfl = XFS_BUF_TO_AGFL(bp);
    610	__be32		*agfl_bno = xfs_buf_to_agfl_bno(bp);
    611	int		i;
    612
    613	/*
    614	 * There is no verification of non-crc AGFLs because mkfs does not
    615	 * initialise the AGFL to zero or NULL. Hence the only valid part of the
    616	 * AGFL is what the AGF says is active. We can't get to the AGF, so we
    617	 * can't verify just those entries are valid.
    618	 */
    619	if (!xfs_has_crc(mp))
    620		return NULL;
    621
    622	if (!xfs_verify_magic(bp, agfl->agfl_magicnum))
    623		return __this_address;
    624	if (!uuid_equal(&agfl->agfl_uuid, &mp->m_sb.sb_meta_uuid))
    625		return __this_address;
    626	/*
    627	 * during growfs operations, the perag is not fully initialised,
    628	 * so we can't use it for any useful checking. growfs ensures we can't
    629	 * use it by using uncached buffers that don't have the perag attached
    630	 * so we can detect and avoid this problem.
    631	 */
    632	if (bp->b_pag && be32_to_cpu(agfl->agfl_seqno) != bp->b_pag->pag_agno)
    633		return __this_address;
    634
    635	for (i = 0; i < xfs_agfl_size(mp); i++) {
    636		if (be32_to_cpu(agfl_bno[i]) != NULLAGBLOCK &&
    637		    be32_to_cpu(agfl_bno[i]) >= mp->m_sb.sb_agblocks)
    638			return __this_address;
    639	}
    640
    641	if (!xfs_log_check_lsn(mp, be64_to_cpu(XFS_BUF_TO_AGFL(bp)->agfl_lsn)))
    642		return __this_address;
    643	return NULL;
    644}
    645
    646static void
    647xfs_agfl_read_verify(
    648	struct xfs_buf	*bp)
    649{
    650	struct xfs_mount *mp = bp->b_mount;
    651	xfs_failaddr_t	fa;
    652
    653	/*
    654	 * There is no verification of non-crc AGFLs because mkfs does not
    655	 * initialise the AGFL to zero or NULL. Hence the only valid part of the
    656	 * AGFL is what the AGF says is active. We can't get to the AGF, so we
    657	 * can't verify just those entries are valid.
    658	 */
    659	if (!xfs_has_crc(mp))
    660		return;
    661
    662	if (!xfs_buf_verify_cksum(bp, XFS_AGFL_CRC_OFF))
    663		xfs_verifier_error(bp, -EFSBADCRC, __this_address);
    664	else {
    665		fa = xfs_agfl_verify(bp);
    666		if (fa)
    667			xfs_verifier_error(bp, -EFSCORRUPTED, fa);
    668	}
    669}
    670
    671static void
    672xfs_agfl_write_verify(
    673	struct xfs_buf	*bp)
    674{
    675	struct xfs_mount	*mp = bp->b_mount;
    676	struct xfs_buf_log_item	*bip = bp->b_log_item;
    677	xfs_failaddr_t		fa;
    678
    679	/* no verification of non-crc AGFLs */
    680	if (!xfs_has_crc(mp))
    681		return;
    682
    683	fa = xfs_agfl_verify(bp);
    684	if (fa) {
    685		xfs_verifier_error(bp, -EFSCORRUPTED, fa);
    686		return;
    687	}
    688
    689	if (bip)
    690		XFS_BUF_TO_AGFL(bp)->agfl_lsn = cpu_to_be64(bip->bli_item.li_lsn);
    691
    692	xfs_buf_update_cksum(bp, XFS_AGFL_CRC_OFF);
    693}
    694
    695const struct xfs_buf_ops xfs_agfl_buf_ops = {
    696	.name = "xfs_agfl",
    697	.magic = { cpu_to_be32(XFS_AGFL_MAGIC), cpu_to_be32(XFS_AGFL_MAGIC) },
    698	.verify_read = xfs_agfl_read_verify,
    699	.verify_write = xfs_agfl_write_verify,
    700	.verify_struct = xfs_agfl_verify,
    701};
    702
    703/*
    704 * Read in the allocation group free block array.
    705 */
    706int					/* error */
    707xfs_alloc_read_agfl(
    708	xfs_mount_t	*mp,		/* mount point structure */
    709	xfs_trans_t	*tp,		/* transaction pointer */
    710	xfs_agnumber_t	agno,		/* allocation group number */
    711	struct xfs_buf	**bpp)		/* buffer for the ag free block array */
    712{
    713	struct xfs_buf	*bp;		/* return value */
    714	int		error;
    715
    716	ASSERT(agno != NULLAGNUMBER);
    717	error = xfs_trans_read_buf(
    718			mp, tp, mp->m_ddev_targp,
    719			XFS_AG_DADDR(mp, agno, XFS_AGFL_DADDR(mp)),
    720			XFS_FSS_TO_BB(mp, 1), 0, &bp, &xfs_agfl_buf_ops);
    721	if (error)
    722		return error;
    723	xfs_buf_set_ref(bp, XFS_AGFL_REF);
    724	*bpp = bp;
    725	return 0;
    726}
    727
    728STATIC int
    729xfs_alloc_update_counters(
    730	struct xfs_trans	*tp,
    731	struct xfs_buf		*agbp,
    732	long			len)
    733{
    734	struct xfs_agf		*agf = agbp->b_addr;
    735
    736	agbp->b_pag->pagf_freeblks += len;
    737	be32_add_cpu(&agf->agf_freeblks, len);
    738
    739	if (unlikely(be32_to_cpu(agf->agf_freeblks) >
    740		     be32_to_cpu(agf->agf_length))) {
    741		xfs_buf_mark_corrupt(agbp);
    742		return -EFSCORRUPTED;
    743	}
    744
    745	xfs_alloc_log_agf(tp, agbp, XFS_AGF_FREEBLKS);
    746	return 0;
    747}
    748
    749/*
    750 * Block allocation algorithm and data structures.
    751 */
    752struct xfs_alloc_cur {
    753	struct xfs_btree_cur		*cnt;	/* btree cursors */
    754	struct xfs_btree_cur		*bnolt;
    755	struct xfs_btree_cur		*bnogt;
    756	xfs_extlen_t			cur_len;/* current search length */
    757	xfs_agblock_t			rec_bno;/* extent startblock */
    758	xfs_extlen_t			rec_len;/* extent length */
    759	xfs_agblock_t			bno;	/* alloc bno */
    760	xfs_extlen_t			len;	/* alloc len */
    761	xfs_extlen_t			diff;	/* diff from search bno */
    762	unsigned int			busy_gen;/* busy state */
    763	bool				busy;
    764};
    765
    766/*
    767 * Set up cursors, etc. in the extent allocation cursor. This function can be
    768 * called multiple times to reset an initialized structure without having to
    769 * reallocate cursors.
    770 */
    771static int
    772xfs_alloc_cur_setup(
    773	struct xfs_alloc_arg	*args,
    774	struct xfs_alloc_cur	*acur)
    775{
    776	int			error;
    777	int			i;
    778
    779	ASSERT(args->alignment == 1 || args->type != XFS_ALLOCTYPE_THIS_BNO);
    780
    781	acur->cur_len = args->maxlen;
    782	acur->rec_bno = 0;
    783	acur->rec_len = 0;
    784	acur->bno = 0;
    785	acur->len = 0;
    786	acur->diff = -1;
    787	acur->busy = false;
    788	acur->busy_gen = 0;
    789
    790	/*
    791	 * Perform an initial cntbt lookup to check for availability of maxlen
    792	 * extents. If this fails, we'll return -ENOSPC to signal the caller to
    793	 * attempt a small allocation.
    794	 */
    795	if (!acur->cnt)
    796		acur->cnt = xfs_allocbt_init_cursor(args->mp, args->tp,
    797					args->agbp, args->pag, XFS_BTNUM_CNT);
    798	error = xfs_alloc_lookup_ge(acur->cnt, 0, args->maxlen, &i);
    799	if (error)
    800		return error;
    801
    802	/*
    803	 * Allocate the bnobt left and right search cursors.
    804	 */
    805	if (!acur->bnolt)
    806		acur->bnolt = xfs_allocbt_init_cursor(args->mp, args->tp,
    807					args->agbp, args->pag, XFS_BTNUM_BNO);
    808	if (!acur->bnogt)
    809		acur->bnogt = xfs_allocbt_init_cursor(args->mp, args->tp,
    810					args->agbp, args->pag, XFS_BTNUM_BNO);
    811	return i == 1 ? 0 : -ENOSPC;
    812}
    813
    814static void
    815xfs_alloc_cur_close(
    816	struct xfs_alloc_cur	*acur,
    817	bool			error)
    818{
    819	int			cur_error = XFS_BTREE_NOERROR;
    820
    821	if (error)
    822		cur_error = XFS_BTREE_ERROR;
    823
    824	if (acur->cnt)
    825		xfs_btree_del_cursor(acur->cnt, cur_error);
    826	if (acur->bnolt)
    827		xfs_btree_del_cursor(acur->bnolt, cur_error);
    828	if (acur->bnogt)
    829		xfs_btree_del_cursor(acur->bnogt, cur_error);
    830	acur->cnt = acur->bnolt = acur->bnogt = NULL;
    831}
    832
    833/*
    834 * Check an extent for allocation and track the best available candidate in the
    835 * allocation structure. The cursor is deactivated if it has entered an out of
    836 * range state based on allocation arguments. Optionally return the extent
    837 * extent geometry and allocation status if requested by the caller.
    838 */
    839static int
    840xfs_alloc_cur_check(
    841	struct xfs_alloc_arg	*args,
    842	struct xfs_alloc_cur	*acur,
    843	struct xfs_btree_cur	*cur,
    844	int			*new)
    845{
    846	int			error, i;
    847	xfs_agblock_t		bno, bnoa, bnew;
    848	xfs_extlen_t		len, lena, diff = -1;
    849	bool			busy;
    850	unsigned		busy_gen = 0;
    851	bool			deactivate = false;
    852	bool			isbnobt = cur->bc_btnum == XFS_BTNUM_BNO;
    853
    854	*new = 0;
    855
    856	error = xfs_alloc_get_rec(cur, &bno, &len, &i);
    857	if (error)
    858		return error;
    859	if (XFS_IS_CORRUPT(args->mp, i != 1))
    860		return -EFSCORRUPTED;
    861
    862	/*
    863	 * Check minlen and deactivate a cntbt cursor if out of acceptable size
    864	 * range (i.e., walking backwards looking for a minlen extent).
    865	 */
    866	if (len < args->minlen) {
    867		deactivate = !isbnobt;
    868		goto out;
    869	}
    870
    871	busy = xfs_alloc_compute_aligned(args, bno, len, &bnoa, &lena,
    872					 &busy_gen);
    873	acur->busy |= busy;
    874	if (busy)
    875		acur->busy_gen = busy_gen;
    876	/* deactivate a bnobt cursor outside of locality range */
    877	if (bnoa < args->min_agbno || bnoa > args->max_agbno) {
    878		deactivate = isbnobt;
    879		goto out;
    880	}
    881	if (lena < args->minlen)
    882		goto out;
    883
    884	args->len = XFS_EXTLEN_MIN(lena, args->maxlen);
    885	xfs_alloc_fix_len(args);
    886	ASSERT(args->len >= args->minlen);
    887	if (args->len < acur->len)
    888		goto out;
    889
    890	/*
    891	 * We have an aligned record that satisfies minlen and beats or matches
    892	 * the candidate extent size. Compare locality for near allocation mode.
    893	 */
    894	ASSERT(args->type == XFS_ALLOCTYPE_NEAR_BNO);
    895	diff = xfs_alloc_compute_diff(args->agbno, args->len,
    896				      args->alignment, args->datatype,
    897				      bnoa, lena, &bnew);
    898	if (bnew == NULLAGBLOCK)
    899		goto out;
    900
    901	/*
    902	 * Deactivate a bnobt cursor with worse locality than the current best.
    903	 */
    904	if (diff > acur->diff) {
    905		deactivate = isbnobt;
    906		goto out;
    907	}
    908
    909	ASSERT(args->len > acur->len ||
    910	       (args->len == acur->len && diff <= acur->diff));
    911	acur->rec_bno = bno;
    912	acur->rec_len = len;
    913	acur->bno = bnew;
    914	acur->len = args->len;
    915	acur->diff = diff;
    916	*new = 1;
    917
    918	/*
    919	 * We're done if we found a perfect allocation. This only deactivates
    920	 * the current cursor, but this is just an optimization to terminate a
    921	 * cntbt search that otherwise runs to the edge of the tree.
    922	 */
    923	if (acur->diff == 0 && acur->len == args->maxlen)
    924		deactivate = true;
    925out:
    926	if (deactivate)
    927		cur->bc_ag.abt.active = false;
    928	trace_xfs_alloc_cur_check(args->mp, cur->bc_btnum, bno, len, diff,
    929				  *new);
    930	return 0;
    931}
    932
    933/*
    934 * Complete an allocation of a candidate extent. Remove the extent from both
    935 * trees and update the args structure.
    936 */
    937STATIC int
    938xfs_alloc_cur_finish(
    939	struct xfs_alloc_arg	*args,
    940	struct xfs_alloc_cur	*acur)
    941{
    942	struct xfs_agf __maybe_unused *agf = args->agbp->b_addr;
    943	int			error;
    944
    945	ASSERT(acur->cnt && acur->bnolt);
    946	ASSERT(acur->bno >= acur->rec_bno);
    947	ASSERT(acur->bno + acur->len <= acur->rec_bno + acur->rec_len);
    948	ASSERT(acur->rec_bno + acur->rec_len <= be32_to_cpu(agf->agf_length));
    949
    950	error = xfs_alloc_fixup_trees(acur->cnt, acur->bnolt, acur->rec_bno,
    951				      acur->rec_len, acur->bno, acur->len, 0);
    952	if (error)
    953		return error;
    954
    955	args->agbno = acur->bno;
    956	args->len = acur->len;
    957	args->wasfromfl = 0;
    958
    959	trace_xfs_alloc_cur(args);
    960	return 0;
    961}
    962
    963/*
    964 * Locality allocation lookup algorithm. This expects a cntbt cursor and uses
    965 * bno optimized lookup to search for extents with ideal size and locality.
    966 */
    967STATIC int
    968xfs_alloc_cntbt_iter(
    969	struct xfs_alloc_arg		*args,
    970	struct xfs_alloc_cur		*acur)
    971{
    972	struct xfs_btree_cur	*cur = acur->cnt;
    973	xfs_agblock_t		bno;
    974	xfs_extlen_t		len, cur_len;
    975	int			error;
    976	int			i;
    977
    978	if (!xfs_alloc_cur_active(cur))
    979		return 0;
    980
    981	/* locality optimized lookup */
    982	cur_len = acur->cur_len;
    983	error = xfs_alloc_lookup_ge(cur, args->agbno, cur_len, &i);
    984	if (error)
    985		return error;
    986	if (i == 0)
    987		return 0;
    988	error = xfs_alloc_get_rec(cur, &bno, &len, &i);
    989	if (error)
    990		return error;
    991
    992	/* check the current record and update search length from it */
    993	error = xfs_alloc_cur_check(args, acur, cur, &i);
    994	if (error)
    995		return error;
    996	ASSERT(len >= acur->cur_len);
    997	acur->cur_len = len;
    998
    999	/*
   1000	 * We looked up the first record >= [agbno, len] above. The agbno is a
   1001	 * secondary key and so the current record may lie just before or after
   1002	 * agbno. If it is past agbno, check the previous record too so long as
   1003	 * the length matches as it may be closer. Don't check a smaller record
   1004	 * because that could deactivate our cursor.
   1005	 */
   1006	if (bno > args->agbno) {
   1007		error = xfs_btree_decrement(cur, 0, &i);
   1008		if (!error && i) {
   1009			error = xfs_alloc_get_rec(cur, &bno, &len, &i);
   1010			if (!error && i && len == acur->cur_len)
   1011				error = xfs_alloc_cur_check(args, acur, cur,
   1012							    &i);
   1013		}
   1014		if (error)
   1015			return error;
   1016	}
   1017
   1018	/*
   1019	 * Increment the search key until we find at least one allocation
   1020	 * candidate or if the extent we found was larger. Otherwise, double the
   1021	 * search key to optimize the search. Efficiency is more important here
   1022	 * than absolute best locality.
   1023	 */
   1024	cur_len <<= 1;
   1025	if (!acur->len || acur->cur_len >= cur_len)
   1026		acur->cur_len++;
   1027	else
   1028		acur->cur_len = cur_len;
   1029
   1030	return error;
   1031}
   1032
   1033/*
   1034 * Deal with the case where only small freespaces remain. Either return the
   1035 * contents of the last freespace record, or allocate space from the freelist if
   1036 * there is nothing in the tree.
   1037 */
   1038STATIC int			/* error */
   1039xfs_alloc_ag_vextent_small(
   1040	struct xfs_alloc_arg	*args,	/* allocation argument structure */
   1041	struct xfs_btree_cur	*ccur,	/* optional by-size cursor */
   1042	xfs_agblock_t		*fbnop,	/* result block number */
   1043	xfs_extlen_t		*flenp,	/* result length */
   1044	int			*stat)	/* status: 0-freelist, 1-normal/none */
   1045{
   1046	struct xfs_agf		*agf = args->agbp->b_addr;
   1047	int			error = 0;
   1048	xfs_agblock_t		fbno = NULLAGBLOCK;
   1049	xfs_extlen_t		flen = 0;
   1050	int			i = 0;
   1051
   1052	/*
   1053	 * If a cntbt cursor is provided, try to allocate the largest record in
   1054	 * the tree. Try the AGFL if the cntbt is empty, otherwise fail the
   1055	 * allocation. Make sure to respect minleft even when pulling from the
   1056	 * freelist.
   1057	 */
   1058	if (ccur)
   1059		error = xfs_btree_decrement(ccur, 0, &i);
   1060	if (error)
   1061		goto error;
   1062	if (i) {
   1063		error = xfs_alloc_get_rec(ccur, &fbno, &flen, &i);
   1064		if (error)
   1065			goto error;
   1066		if (XFS_IS_CORRUPT(args->mp, i != 1)) {
   1067			error = -EFSCORRUPTED;
   1068			goto error;
   1069		}
   1070		goto out;
   1071	}
   1072
   1073	if (args->minlen != 1 || args->alignment != 1 ||
   1074	    args->resv == XFS_AG_RESV_AGFL ||
   1075	    be32_to_cpu(agf->agf_flcount) <= args->minleft)
   1076		goto out;
   1077
   1078	error = xfs_alloc_get_freelist(args->tp, args->agbp, &fbno, 0);
   1079	if (error)
   1080		goto error;
   1081	if (fbno == NULLAGBLOCK)
   1082		goto out;
   1083
   1084	xfs_extent_busy_reuse(args->mp, args->pag, fbno, 1,
   1085			      (args->datatype & XFS_ALLOC_NOBUSY));
   1086
   1087	if (args->datatype & XFS_ALLOC_USERDATA) {
   1088		struct xfs_buf	*bp;
   1089
   1090		error = xfs_trans_get_buf(args->tp, args->mp->m_ddev_targp,
   1091				XFS_AGB_TO_DADDR(args->mp, args->agno, fbno),
   1092				args->mp->m_bsize, 0, &bp);
   1093		if (error)
   1094			goto error;
   1095		xfs_trans_binval(args->tp, bp);
   1096	}
   1097	*fbnop = args->agbno = fbno;
   1098	*flenp = args->len = 1;
   1099	if (XFS_IS_CORRUPT(args->mp, fbno >= be32_to_cpu(agf->agf_length))) {
   1100		error = -EFSCORRUPTED;
   1101		goto error;
   1102	}
   1103	args->wasfromfl = 1;
   1104	trace_xfs_alloc_small_freelist(args);
   1105
   1106	/*
   1107	 * If we're feeding an AGFL block to something that doesn't live in the
   1108	 * free space, we need to clear out the OWN_AG rmap.
   1109	 */
   1110	error = xfs_rmap_free(args->tp, args->agbp, args->pag, fbno, 1,
   1111			      &XFS_RMAP_OINFO_AG);
   1112	if (error)
   1113		goto error;
   1114
   1115	*stat = 0;
   1116	return 0;
   1117
   1118out:
   1119	/*
   1120	 * Can't do the allocation, give up.
   1121	 */
   1122	if (flen < args->minlen) {
   1123		args->agbno = NULLAGBLOCK;
   1124		trace_xfs_alloc_small_notenough(args);
   1125		flen = 0;
   1126	}
   1127	*fbnop = fbno;
   1128	*flenp = flen;
   1129	*stat = 1;
   1130	trace_xfs_alloc_small_done(args);
   1131	return 0;
   1132
   1133error:
   1134	trace_xfs_alloc_small_error(args);
   1135	return error;
   1136}
   1137
   1138/*
   1139 * Allocate a variable extent in the allocation group agno.
   1140 * Type and bno are used to determine where in the allocation group the
   1141 * extent will start.
   1142 * Extent's length (returned in *len) will be between minlen and maxlen,
   1143 * and of the form k * prod + mod unless there's nothing that large.
   1144 * Return the starting a.g. block, or NULLAGBLOCK if we can't do it.
   1145 */
   1146STATIC int			/* error */
   1147xfs_alloc_ag_vextent(
   1148	xfs_alloc_arg_t	*args)	/* argument structure for allocation */
   1149{
   1150	int		error=0;
   1151
   1152	ASSERT(args->minlen > 0);
   1153	ASSERT(args->maxlen > 0);
   1154	ASSERT(args->minlen <= args->maxlen);
   1155	ASSERT(args->mod < args->prod);
   1156	ASSERT(args->alignment > 0);
   1157
   1158	/*
   1159	 * Branch to correct routine based on the type.
   1160	 */
   1161	args->wasfromfl = 0;
   1162	switch (args->type) {
   1163	case XFS_ALLOCTYPE_THIS_AG:
   1164		error = xfs_alloc_ag_vextent_size(args);
   1165		break;
   1166	case XFS_ALLOCTYPE_NEAR_BNO:
   1167		error = xfs_alloc_ag_vextent_near(args);
   1168		break;
   1169	case XFS_ALLOCTYPE_THIS_BNO:
   1170		error = xfs_alloc_ag_vextent_exact(args);
   1171		break;
   1172	default:
   1173		ASSERT(0);
   1174		/* NOTREACHED */
   1175	}
   1176
   1177	if (error || args->agbno == NULLAGBLOCK)
   1178		return error;
   1179
   1180	ASSERT(args->len >= args->minlen);
   1181	ASSERT(args->len <= args->maxlen);
   1182	ASSERT(!args->wasfromfl || args->resv != XFS_AG_RESV_AGFL);
   1183	ASSERT(args->agbno % args->alignment == 0);
   1184
   1185	/* if not file data, insert new block into the reverse map btree */
   1186	if (!xfs_rmap_should_skip_owner_update(&args->oinfo)) {
   1187		error = xfs_rmap_alloc(args->tp, args->agbp, args->pag,
   1188				       args->agbno, args->len, &args->oinfo);
   1189		if (error)
   1190			return error;
   1191	}
   1192
   1193	if (!args->wasfromfl) {
   1194		error = xfs_alloc_update_counters(args->tp, args->agbp,
   1195						  -((long)(args->len)));
   1196		if (error)
   1197			return error;
   1198
   1199		ASSERT(!xfs_extent_busy_search(args->mp, args->pag,
   1200					      args->agbno, args->len));
   1201	}
   1202
   1203	xfs_ag_resv_alloc_extent(args->pag, args->resv, args);
   1204
   1205	XFS_STATS_INC(args->mp, xs_allocx);
   1206	XFS_STATS_ADD(args->mp, xs_allocb, args->len);
   1207	return error;
   1208}
   1209
   1210/*
   1211 * Allocate a variable extent at exactly agno/bno.
   1212 * Extent's length (returned in *len) will be between minlen and maxlen,
   1213 * and of the form k * prod + mod unless there's nothing that large.
   1214 * Return the starting a.g. block (bno), or NULLAGBLOCK if we can't do it.
   1215 */
   1216STATIC int			/* error */
   1217xfs_alloc_ag_vextent_exact(
   1218	xfs_alloc_arg_t	*args)	/* allocation argument structure */
   1219{
   1220	struct xfs_agf __maybe_unused *agf = args->agbp->b_addr;
   1221	struct xfs_btree_cur *bno_cur;/* by block-number btree cursor */
   1222	struct xfs_btree_cur *cnt_cur;/* by count btree cursor */
   1223	int		error;
   1224	xfs_agblock_t	fbno;	/* start block of found extent */
   1225	xfs_extlen_t	flen;	/* length of found extent */
   1226	xfs_agblock_t	tbno;	/* start block of busy extent */
   1227	xfs_extlen_t	tlen;	/* length of busy extent */
   1228	xfs_agblock_t	tend;	/* end block of busy extent */
   1229	int		i;	/* success/failure of operation */
   1230	unsigned	busy_gen;
   1231
   1232	ASSERT(args->alignment == 1);
   1233
   1234	/*
   1235	 * Allocate/initialize a cursor for the by-number freespace btree.
   1236	 */
   1237	bno_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
   1238					  args->pag, XFS_BTNUM_BNO);
   1239
   1240	/*
   1241	 * Lookup bno and minlen in the btree (minlen is irrelevant, really).
   1242	 * Look for the closest free block <= bno, it must contain bno
   1243	 * if any free block does.
   1244	 */
   1245	error = xfs_alloc_lookup_le(bno_cur, args->agbno, args->minlen, &i);
   1246	if (error)
   1247		goto error0;
   1248	if (!i)
   1249		goto not_found;
   1250
   1251	/*
   1252	 * Grab the freespace record.
   1253	 */
   1254	error = xfs_alloc_get_rec(bno_cur, &fbno, &flen, &i);
   1255	if (error)
   1256		goto error0;
   1257	if (XFS_IS_CORRUPT(args->mp, i != 1)) {
   1258		error = -EFSCORRUPTED;
   1259		goto error0;
   1260	}
   1261	ASSERT(fbno <= args->agbno);
   1262
   1263	/*
   1264	 * Check for overlapping busy extents.
   1265	 */
   1266	tbno = fbno;
   1267	tlen = flen;
   1268	xfs_extent_busy_trim(args, &tbno, &tlen, &busy_gen);
   1269
   1270	/*
   1271	 * Give up if the start of the extent is busy, or the freespace isn't
   1272	 * long enough for the minimum request.
   1273	 */
   1274	if (tbno > args->agbno)
   1275		goto not_found;
   1276	if (tlen < args->minlen)
   1277		goto not_found;
   1278	tend = tbno + tlen;
   1279	if (tend < args->agbno + args->minlen)
   1280		goto not_found;
   1281
   1282	/*
   1283	 * End of extent will be smaller of the freespace end and the
   1284	 * maximal requested end.
   1285	 *
   1286	 * Fix the length according to mod and prod if given.
   1287	 */
   1288	args->len = XFS_AGBLOCK_MIN(tend, args->agbno + args->maxlen)
   1289						- args->agbno;
   1290	xfs_alloc_fix_len(args);
   1291	ASSERT(args->agbno + args->len <= tend);
   1292
   1293	/*
   1294	 * We are allocating agbno for args->len
   1295	 * Allocate/initialize a cursor for the by-size btree.
   1296	 */
   1297	cnt_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
   1298					args->pag, XFS_BTNUM_CNT);
   1299	ASSERT(args->agbno + args->len <= be32_to_cpu(agf->agf_length));
   1300	error = xfs_alloc_fixup_trees(cnt_cur, bno_cur, fbno, flen, args->agbno,
   1301				      args->len, XFSA_FIXUP_BNO_OK);
   1302	if (error) {
   1303		xfs_btree_del_cursor(cnt_cur, XFS_BTREE_ERROR);
   1304		goto error0;
   1305	}
   1306
   1307	xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR);
   1308	xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
   1309
   1310	args->wasfromfl = 0;
   1311	trace_xfs_alloc_exact_done(args);
   1312	return 0;
   1313
   1314not_found:
   1315	/* Didn't find it, return null. */
   1316	xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR);
   1317	args->agbno = NULLAGBLOCK;
   1318	trace_xfs_alloc_exact_notfound(args);
   1319	return 0;
   1320
   1321error0:
   1322	xfs_btree_del_cursor(bno_cur, XFS_BTREE_ERROR);
   1323	trace_xfs_alloc_exact_error(args);
   1324	return error;
   1325}
   1326
   1327/*
   1328 * Search a given number of btree records in a given direction. Check each
   1329 * record against the good extent we've already found.
   1330 */
   1331STATIC int
   1332xfs_alloc_walk_iter(
   1333	struct xfs_alloc_arg	*args,
   1334	struct xfs_alloc_cur	*acur,
   1335	struct xfs_btree_cur	*cur,
   1336	bool			increment,
   1337	bool			find_one, /* quit on first candidate */
   1338	int			count,    /* rec count (-1 for infinite) */
   1339	int			*stat)
   1340{
   1341	int			error;
   1342	int			i;
   1343
   1344	*stat = 0;
   1345
   1346	/*
   1347	 * Search so long as the cursor is active or we find a better extent.
   1348	 * The cursor is deactivated if it extends beyond the range of the
   1349	 * current allocation candidate.
   1350	 */
   1351	while (xfs_alloc_cur_active(cur) && count) {
   1352		error = xfs_alloc_cur_check(args, acur, cur, &i);
   1353		if (error)
   1354			return error;
   1355		if (i == 1) {
   1356			*stat = 1;
   1357			if (find_one)
   1358				break;
   1359		}
   1360		if (!xfs_alloc_cur_active(cur))
   1361			break;
   1362
   1363		if (increment)
   1364			error = xfs_btree_increment(cur, 0, &i);
   1365		else
   1366			error = xfs_btree_decrement(cur, 0, &i);
   1367		if (error)
   1368			return error;
   1369		if (i == 0)
   1370			cur->bc_ag.abt.active = false;
   1371
   1372		if (count > 0)
   1373			count--;
   1374	}
   1375
   1376	return 0;
   1377}
   1378
   1379/*
   1380 * Search the by-bno and by-size btrees in parallel in search of an extent with
   1381 * ideal locality based on the NEAR mode ->agbno locality hint.
   1382 */
   1383STATIC int
   1384xfs_alloc_ag_vextent_locality(
   1385	struct xfs_alloc_arg	*args,
   1386	struct xfs_alloc_cur	*acur,
   1387	int			*stat)
   1388{
   1389	struct xfs_btree_cur	*fbcur = NULL;
   1390	int			error;
   1391	int			i;
   1392	bool			fbinc;
   1393
   1394	ASSERT(acur->len == 0);
   1395	ASSERT(args->type == XFS_ALLOCTYPE_NEAR_BNO);
   1396
   1397	*stat = 0;
   1398
   1399	error = xfs_alloc_lookup_ge(acur->cnt, args->agbno, acur->cur_len, &i);
   1400	if (error)
   1401		return error;
   1402	error = xfs_alloc_lookup_le(acur->bnolt, args->agbno, 0, &i);
   1403	if (error)
   1404		return error;
   1405	error = xfs_alloc_lookup_ge(acur->bnogt, args->agbno, 0, &i);
   1406	if (error)
   1407		return error;
   1408
   1409	/*
   1410	 * Search the bnobt and cntbt in parallel. Search the bnobt left and
   1411	 * right and lookup the closest extent to the locality hint for each
   1412	 * extent size key in the cntbt. The entire search terminates
   1413	 * immediately on a bnobt hit because that means we've found best case
   1414	 * locality. Otherwise the search continues until the cntbt cursor runs
   1415	 * off the end of the tree. If no allocation candidate is found at this
   1416	 * point, give up on locality, walk backwards from the end of the cntbt
   1417	 * and take the first available extent.
   1418	 *
   1419	 * The parallel tree searches balance each other out to provide fairly
   1420	 * consistent performance for various situations. The bnobt search can
   1421	 * have pathological behavior in the worst case scenario of larger
   1422	 * allocation requests and fragmented free space. On the other hand, the
   1423	 * bnobt is able to satisfy most smaller allocation requests much more
   1424	 * quickly than the cntbt. The cntbt search can sift through fragmented
   1425	 * free space and sets of free extents for larger allocation requests
   1426	 * more quickly than the bnobt. Since the locality hint is just a hint
   1427	 * and we don't want to scan the entire bnobt for perfect locality, the
   1428	 * cntbt search essentially bounds the bnobt search such that we can
   1429	 * find good enough locality at reasonable performance in most cases.
   1430	 */
   1431	while (xfs_alloc_cur_active(acur->bnolt) ||
   1432	       xfs_alloc_cur_active(acur->bnogt) ||
   1433	       xfs_alloc_cur_active(acur->cnt)) {
   1434
   1435		trace_xfs_alloc_cur_lookup(args);
   1436
   1437		/*
   1438		 * Search the bnobt left and right. In the case of a hit, finish
   1439		 * the search in the opposite direction and we're done.
   1440		 */
   1441		error = xfs_alloc_walk_iter(args, acur, acur->bnolt, false,
   1442					    true, 1, &i);
   1443		if (error)
   1444			return error;
   1445		if (i == 1) {
   1446			trace_xfs_alloc_cur_left(args);
   1447			fbcur = acur->bnogt;
   1448			fbinc = true;
   1449			break;
   1450		}
   1451		error = xfs_alloc_walk_iter(args, acur, acur->bnogt, true, true,
   1452					    1, &i);
   1453		if (error)
   1454			return error;
   1455		if (i == 1) {
   1456			trace_xfs_alloc_cur_right(args);
   1457			fbcur = acur->bnolt;
   1458			fbinc = false;
   1459			break;
   1460		}
   1461
   1462		/*
   1463		 * Check the extent with best locality based on the current
   1464		 * extent size search key and keep track of the best candidate.
   1465		 */
   1466		error = xfs_alloc_cntbt_iter(args, acur);
   1467		if (error)
   1468			return error;
   1469		if (!xfs_alloc_cur_active(acur->cnt)) {
   1470			trace_xfs_alloc_cur_lookup_done(args);
   1471			break;
   1472		}
   1473	}
   1474
   1475	/*
   1476	 * If we failed to find anything due to busy extents, return empty
   1477	 * handed so the caller can flush and retry. If no busy extents were
   1478	 * found, walk backwards from the end of the cntbt as a last resort.
   1479	 */
   1480	if (!xfs_alloc_cur_active(acur->cnt) && !acur->len && !acur->busy) {
   1481		error = xfs_btree_decrement(acur->cnt, 0, &i);
   1482		if (error)
   1483			return error;
   1484		if (i) {
   1485			acur->cnt->bc_ag.abt.active = true;
   1486			fbcur = acur->cnt;
   1487			fbinc = false;
   1488		}
   1489	}
   1490
   1491	/*
   1492	 * Search in the opposite direction for a better entry in the case of
   1493	 * a bnobt hit or walk backwards from the end of the cntbt.
   1494	 */
   1495	if (fbcur) {
   1496		error = xfs_alloc_walk_iter(args, acur, fbcur, fbinc, true, -1,
   1497					    &i);
   1498		if (error)
   1499			return error;
   1500	}
   1501
   1502	if (acur->len)
   1503		*stat = 1;
   1504
   1505	return 0;
   1506}
   1507
   1508/* Check the last block of the cnt btree for allocations. */
   1509static int
   1510xfs_alloc_ag_vextent_lastblock(
   1511	struct xfs_alloc_arg	*args,
   1512	struct xfs_alloc_cur	*acur,
   1513	xfs_agblock_t		*bno,
   1514	xfs_extlen_t		*len,
   1515	bool			*allocated)
   1516{
   1517	int			error;
   1518	int			i;
   1519
   1520#ifdef DEBUG
   1521	/* Randomly don't execute the first algorithm. */
   1522	if (prandom_u32() & 1)
   1523		return 0;
   1524#endif
   1525
   1526	/*
   1527	 * Start from the entry that lookup found, sequence through all larger
   1528	 * free blocks.  If we're actually pointing at a record smaller than
   1529	 * maxlen, go to the start of this block, and skip all those smaller
   1530	 * than minlen.
   1531	 */
   1532	if (*len || args->alignment > 1) {
   1533		acur->cnt->bc_levels[0].ptr = 1;
   1534		do {
   1535			error = xfs_alloc_get_rec(acur->cnt, bno, len, &i);
   1536			if (error)
   1537				return error;
   1538			if (XFS_IS_CORRUPT(args->mp, i != 1))
   1539				return -EFSCORRUPTED;
   1540			if (*len >= args->minlen)
   1541				break;
   1542			error = xfs_btree_increment(acur->cnt, 0, &i);
   1543			if (error)
   1544				return error;
   1545		} while (i);
   1546		ASSERT(*len >= args->minlen);
   1547		if (!i)
   1548			return 0;
   1549	}
   1550
   1551	error = xfs_alloc_walk_iter(args, acur, acur->cnt, true, false, -1, &i);
   1552	if (error)
   1553		return error;
   1554
   1555	/*
   1556	 * It didn't work.  We COULD be in a case where there's a good record
   1557	 * somewhere, so try again.
   1558	 */
   1559	if (acur->len == 0)
   1560		return 0;
   1561
   1562	trace_xfs_alloc_near_first(args);
   1563	*allocated = true;
   1564	return 0;
   1565}
   1566
   1567/*
   1568 * Allocate a variable extent near bno in the allocation group agno.
   1569 * Extent's length (returned in len) will be between minlen and maxlen,
   1570 * and of the form k * prod + mod unless there's nothing that large.
   1571 * Return the starting a.g. block, or NULLAGBLOCK if we can't do it.
   1572 */
   1573STATIC int
   1574xfs_alloc_ag_vextent_near(
   1575	struct xfs_alloc_arg	*args)
   1576{
   1577	struct xfs_alloc_cur	acur = {};
   1578	int			error;		/* error code */
   1579	int			i;		/* result code, temporary */
   1580	xfs_agblock_t		bno;
   1581	xfs_extlen_t		len;
   1582
   1583	/* handle uninitialized agbno range so caller doesn't have to */
   1584	if (!args->min_agbno && !args->max_agbno)
   1585		args->max_agbno = args->mp->m_sb.sb_agblocks - 1;
   1586	ASSERT(args->min_agbno <= args->max_agbno);
   1587
   1588	/* clamp agbno to the range if it's outside */
   1589	if (args->agbno < args->min_agbno)
   1590		args->agbno = args->min_agbno;
   1591	if (args->agbno > args->max_agbno)
   1592		args->agbno = args->max_agbno;
   1593
   1594restart:
   1595	len = 0;
   1596
   1597	/*
   1598	 * Set up cursors and see if there are any free extents as big as
   1599	 * maxlen. If not, pick the last entry in the tree unless the tree is
   1600	 * empty.
   1601	 */
   1602	error = xfs_alloc_cur_setup(args, &acur);
   1603	if (error == -ENOSPC) {
   1604		error = xfs_alloc_ag_vextent_small(args, acur.cnt, &bno,
   1605				&len, &i);
   1606		if (error)
   1607			goto out;
   1608		if (i == 0 || len == 0) {
   1609			trace_xfs_alloc_near_noentry(args);
   1610			goto out;
   1611		}
   1612		ASSERT(i == 1);
   1613	} else if (error) {
   1614		goto out;
   1615	}
   1616
   1617	/*
   1618	 * First algorithm.
   1619	 * If the requested extent is large wrt the freespaces available
   1620	 * in this a.g., then the cursor will be pointing to a btree entry
   1621	 * near the right edge of the tree.  If it's in the last btree leaf
   1622	 * block, then we just examine all the entries in that block
   1623	 * that are big enough, and pick the best one.
   1624	 */
   1625	if (xfs_btree_islastblock(acur.cnt, 0)) {
   1626		bool		allocated = false;
   1627
   1628		error = xfs_alloc_ag_vextent_lastblock(args, &acur, &bno, &len,
   1629				&allocated);
   1630		if (error)
   1631			goto out;
   1632		if (allocated)
   1633			goto alloc_finish;
   1634	}
   1635
   1636	/*
   1637	 * Second algorithm. Combined cntbt and bnobt search to find ideal
   1638	 * locality.
   1639	 */
   1640	error = xfs_alloc_ag_vextent_locality(args, &acur, &i);
   1641	if (error)
   1642		goto out;
   1643
   1644	/*
   1645	 * If we couldn't get anything, give up.
   1646	 */
   1647	if (!acur.len) {
   1648		if (acur.busy) {
   1649			trace_xfs_alloc_near_busy(args);
   1650			xfs_extent_busy_flush(args->mp, args->pag,
   1651					      acur.busy_gen);
   1652			goto restart;
   1653		}
   1654		trace_xfs_alloc_size_neither(args);
   1655		args->agbno = NULLAGBLOCK;
   1656		goto out;
   1657	}
   1658
   1659alloc_finish:
   1660	/* fix up btrees on a successful allocation */
   1661	error = xfs_alloc_cur_finish(args, &acur);
   1662
   1663out:
   1664	xfs_alloc_cur_close(&acur, error);
   1665	return error;
   1666}
   1667
   1668/*
   1669 * Allocate a variable extent anywhere in the allocation group agno.
   1670 * Extent's length (returned in len) will be between minlen and maxlen,
   1671 * and of the form k * prod + mod unless there's nothing that large.
   1672 * Return the starting a.g. block, or NULLAGBLOCK if we can't do it.
   1673 */
   1674STATIC int				/* error */
   1675xfs_alloc_ag_vextent_size(
   1676	xfs_alloc_arg_t	*args)		/* allocation argument structure */
   1677{
   1678	struct xfs_agf	*agf = args->agbp->b_addr;
   1679	struct xfs_btree_cur *bno_cur;	/* cursor for bno btree */
   1680	struct xfs_btree_cur *cnt_cur;	/* cursor for cnt btree */
   1681	int		error;		/* error result */
   1682	xfs_agblock_t	fbno;		/* start of found freespace */
   1683	xfs_extlen_t	flen;		/* length of found freespace */
   1684	int		i;		/* temp status variable */
   1685	xfs_agblock_t	rbno;		/* returned block number */
   1686	xfs_extlen_t	rlen;		/* length of returned extent */
   1687	bool		busy;
   1688	unsigned	busy_gen;
   1689
   1690restart:
   1691	/*
   1692	 * Allocate and initialize a cursor for the by-size btree.
   1693	 */
   1694	cnt_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
   1695					args->pag, XFS_BTNUM_CNT);
   1696	bno_cur = NULL;
   1697
   1698	/*
   1699	 * Look for an entry >= maxlen+alignment-1 blocks.
   1700	 */
   1701	if ((error = xfs_alloc_lookup_ge(cnt_cur, 0,
   1702			args->maxlen + args->alignment - 1, &i)))
   1703		goto error0;
   1704
   1705	/*
   1706	 * If none then we have to settle for a smaller extent. In the case that
   1707	 * there are no large extents, this will return the last entry in the
   1708	 * tree unless the tree is empty. In the case that there are only busy
   1709	 * large extents, this will return the largest small extent unless there
   1710	 * are no smaller extents available.
   1711	 */
   1712	if (!i) {
   1713		error = xfs_alloc_ag_vextent_small(args, cnt_cur,
   1714						   &fbno, &flen, &i);
   1715		if (error)
   1716			goto error0;
   1717		if (i == 0 || flen == 0) {
   1718			xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
   1719			trace_xfs_alloc_size_noentry(args);
   1720			return 0;
   1721		}
   1722		ASSERT(i == 1);
   1723		busy = xfs_alloc_compute_aligned(args, fbno, flen, &rbno,
   1724				&rlen, &busy_gen);
   1725	} else {
   1726		/*
   1727		 * Search for a non-busy extent that is large enough.
   1728		 */
   1729		for (;;) {
   1730			error = xfs_alloc_get_rec(cnt_cur, &fbno, &flen, &i);
   1731			if (error)
   1732				goto error0;
   1733			if (XFS_IS_CORRUPT(args->mp, i != 1)) {
   1734				error = -EFSCORRUPTED;
   1735				goto error0;
   1736			}
   1737
   1738			busy = xfs_alloc_compute_aligned(args, fbno, flen,
   1739					&rbno, &rlen, &busy_gen);
   1740
   1741			if (rlen >= args->maxlen)
   1742				break;
   1743
   1744			error = xfs_btree_increment(cnt_cur, 0, &i);
   1745			if (error)
   1746				goto error0;
   1747			if (i == 0) {
   1748				/*
   1749				 * Our only valid extents must have been busy.
   1750				 * Make it unbusy by forcing the log out and
   1751				 * retrying.
   1752				 */
   1753				xfs_btree_del_cursor(cnt_cur,
   1754						     XFS_BTREE_NOERROR);
   1755				trace_xfs_alloc_size_busy(args);
   1756				xfs_extent_busy_flush(args->mp,
   1757							args->pag, busy_gen);
   1758				goto restart;
   1759			}
   1760		}
   1761	}
   1762
   1763	/*
   1764	 * In the first case above, we got the last entry in the
   1765	 * by-size btree.  Now we check to see if the space hits maxlen
   1766	 * once aligned; if not, we search left for something better.
   1767	 * This can't happen in the second case above.
   1768	 */
   1769	rlen = XFS_EXTLEN_MIN(args->maxlen, rlen);
   1770	if (XFS_IS_CORRUPT(args->mp,
   1771			   rlen != 0 &&
   1772			   (rlen > flen ||
   1773			    rbno + rlen > fbno + flen))) {
   1774		error = -EFSCORRUPTED;
   1775		goto error0;
   1776	}
   1777	if (rlen < args->maxlen) {
   1778		xfs_agblock_t	bestfbno;
   1779		xfs_extlen_t	bestflen;
   1780		xfs_agblock_t	bestrbno;
   1781		xfs_extlen_t	bestrlen;
   1782
   1783		bestrlen = rlen;
   1784		bestrbno = rbno;
   1785		bestflen = flen;
   1786		bestfbno = fbno;
   1787		for (;;) {
   1788			if ((error = xfs_btree_decrement(cnt_cur, 0, &i)))
   1789				goto error0;
   1790			if (i == 0)
   1791				break;
   1792			if ((error = xfs_alloc_get_rec(cnt_cur, &fbno, &flen,
   1793					&i)))
   1794				goto error0;
   1795			if (XFS_IS_CORRUPT(args->mp, i != 1)) {
   1796				error = -EFSCORRUPTED;
   1797				goto error0;
   1798			}
   1799			if (flen < bestrlen)
   1800				break;
   1801			busy = xfs_alloc_compute_aligned(args, fbno, flen,
   1802					&rbno, &rlen, &busy_gen);
   1803			rlen = XFS_EXTLEN_MIN(args->maxlen, rlen);
   1804			if (XFS_IS_CORRUPT(args->mp,
   1805					   rlen != 0 &&
   1806					   (rlen > flen ||
   1807					    rbno + rlen > fbno + flen))) {
   1808				error = -EFSCORRUPTED;
   1809				goto error0;
   1810			}
   1811			if (rlen > bestrlen) {
   1812				bestrlen = rlen;
   1813				bestrbno = rbno;
   1814				bestflen = flen;
   1815				bestfbno = fbno;
   1816				if (rlen == args->maxlen)
   1817					break;
   1818			}
   1819		}
   1820		if ((error = xfs_alloc_lookup_eq(cnt_cur, bestfbno, bestflen,
   1821				&i)))
   1822			goto error0;
   1823		if (XFS_IS_CORRUPT(args->mp, i != 1)) {
   1824			error = -EFSCORRUPTED;
   1825			goto error0;
   1826		}
   1827		rlen = bestrlen;
   1828		rbno = bestrbno;
   1829		flen = bestflen;
   1830		fbno = bestfbno;
   1831	}
   1832	args->wasfromfl = 0;
   1833	/*
   1834	 * Fix up the length.
   1835	 */
   1836	args->len = rlen;
   1837	if (rlen < args->minlen) {
   1838		if (busy) {
   1839			xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
   1840			trace_xfs_alloc_size_busy(args);
   1841			xfs_extent_busy_flush(args->mp, args->pag, busy_gen);
   1842			goto restart;
   1843		}
   1844		goto out_nominleft;
   1845	}
   1846	xfs_alloc_fix_len(args);
   1847
   1848	rlen = args->len;
   1849	if (XFS_IS_CORRUPT(args->mp, rlen > flen)) {
   1850		error = -EFSCORRUPTED;
   1851		goto error0;
   1852	}
   1853	/*
   1854	 * Allocate and initialize a cursor for the by-block tree.
   1855	 */
   1856	bno_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
   1857					args->pag, XFS_BTNUM_BNO);
   1858	if ((error = xfs_alloc_fixup_trees(cnt_cur, bno_cur, fbno, flen,
   1859			rbno, rlen, XFSA_FIXUP_CNT_OK)))
   1860		goto error0;
   1861	xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
   1862	xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR);
   1863	cnt_cur = bno_cur = NULL;
   1864	args->len = rlen;
   1865	args->agbno = rbno;
   1866	if (XFS_IS_CORRUPT(args->mp,
   1867			   args->agbno + args->len >
   1868			   be32_to_cpu(agf->agf_length))) {
   1869		error = -EFSCORRUPTED;
   1870		goto error0;
   1871	}
   1872	trace_xfs_alloc_size_done(args);
   1873	return 0;
   1874
   1875error0:
   1876	trace_xfs_alloc_size_error(args);
   1877	if (cnt_cur)
   1878		xfs_btree_del_cursor(cnt_cur, XFS_BTREE_ERROR);
   1879	if (bno_cur)
   1880		xfs_btree_del_cursor(bno_cur, XFS_BTREE_ERROR);
   1881	return error;
   1882
   1883out_nominleft:
   1884	xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
   1885	trace_xfs_alloc_size_nominleft(args);
   1886	args->agbno = NULLAGBLOCK;
   1887	return 0;
   1888}
   1889
   1890/*
   1891 * Free the extent starting at agno/bno for length.
   1892 */
   1893STATIC int
   1894xfs_free_ag_extent(
   1895	struct xfs_trans		*tp,
   1896	struct xfs_buf			*agbp,
   1897	xfs_agnumber_t			agno,
   1898	xfs_agblock_t			bno,
   1899	xfs_extlen_t			len,
   1900	const struct xfs_owner_info	*oinfo,
   1901	enum xfs_ag_resv_type		type)
   1902{
   1903	struct xfs_mount		*mp;
   1904	struct xfs_btree_cur		*bno_cur;
   1905	struct xfs_btree_cur		*cnt_cur;
   1906	xfs_agblock_t			gtbno; /* start of right neighbor */
   1907	xfs_extlen_t			gtlen; /* length of right neighbor */
   1908	xfs_agblock_t			ltbno; /* start of left neighbor */
   1909	xfs_extlen_t			ltlen; /* length of left neighbor */
   1910	xfs_agblock_t			nbno; /* new starting block of freesp */
   1911	xfs_extlen_t			nlen; /* new length of freespace */
   1912	int				haveleft; /* have a left neighbor */
   1913	int				haveright; /* have a right neighbor */
   1914	int				i;
   1915	int				error;
   1916	struct xfs_perag		*pag = agbp->b_pag;
   1917
   1918	bno_cur = cnt_cur = NULL;
   1919	mp = tp->t_mountp;
   1920
   1921	if (!xfs_rmap_should_skip_owner_update(oinfo)) {
   1922		error = xfs_rmap_free(tp, agbp, pag, bno, len, oinfo);
   1923		if (error)
   1924			goto error0;
   1925	}
   1926
   1927	/*
   1928	 * Allocate and initialize a cursor for the by-block btree.
   1929	 */
   1930	bno_cur = xfs_allocbt_init_cursor(mp, tp, agbp, pag, XFS_BTNUM_BNO);
   1931	/*
   1932	 * Look for a neighboring block on the left (lower block numbers)
   1933	 * that is contiguous with this space.
   1934	 */
   1935	if ((error = xfs_alloc_lookup_le(bno_cur, bno, len, &haveleft)))
   1936		goto error0;
   1937	if (haveleft) {
   1938		/*
   1939		 * There is a block to our left.
   1940		 */
   1941		if ((error = xfs_alloc_get_rec(bno_cur, &ltbno, &ltlen, &i)))
   1942			goto error0;
   1943		if (XFS_IS_CORRUPT(mp, i != 1)) {
   1944			error = -EFSCORRUPTED;
   1945			goto error0;
   1946		}
   1947		/*
   1948		 * It's not contiguous, though.
   1949		 */
   1950		if (ltbno + ltlen < bno)
   1951			haveleft = 0;
   1952		else {
   1953			/*
   1954			 * If this failure happens the request to free this
   1955			 * space was invalid, it's (partly) already free.
   1956			 * Very bad.
   1957			 */
   1958			if (XFS_IS_CORRUPT(mp, ltbno + ltlen > bno)) {
   1959				error = -EFSCORRUPTED;
   1960				goto error0;
   1961			}
   1962		}
   1963	}
   1964	/*
   1965	 * Look for a neighboring block on the right (higher block numbers)
   1966	 * that is contiguous with this space.
   1967	 */
   1968	if ((error = xfs_btree_increment(bno_cur, 0, &haveright)))
   1969		goto error0;
   1970	if (haveright) {
   1971		/*
   1972		 * There is a block to our right.
   1973		 */
   1974		if ((error = xfs_alloc_get_rec(bno_cur, &gtbno, &gtlen, &i)))
   1975			goto error0;
   1976		if (XFS_IS_CORRUPT(mp, i != 1)) {
   1977			error = -EFSCORRUPTED;
   1978			goto error0;
   1979		}
   1980		/*
   1981		 * It's not contiguous, though.
   1982		 */
   1983		if (bno + len < gtbno)
   1984			haveright = 0;
   1985		else {
   1986			/*
   1987			 * If this failure happens the request to free this
   1988			 * space was invalid, it's (partly) already free.
   1989			 * Very bad.
   1990			 */
   1991			if (XFS_IS_CORRUPT(mp, bno + len > gtbno)) {
   1992				error = -EFSCORRUPTED;
   1993				goto error0;
   1994			}
   1995		}
   1996	}
   1997	/*
   1998	 * Now allocate and initialize a cursor for the by-size tree.
   1999	 */
   2000	cnt_cur = xfs_allocbt_init_cursor(mp, tp, agbp, pag, XFS_BTNUM_CNT);
   2001	/*
   2002	 * Have both left and right contiguous neighbors.
   2003	 * Merge all three into a single free block.
   2004	 */
   2005	if (haveleft && haveright) {
   2006		/*
   2007		 * Delete the old by-size entry on the left.
   2008		 */
   2009		if ((error = xfs_alloc_lookup_eq(cnt_cur, ltbno, ltlen, &i)))
   2010			goto error0;
   2011		if (XFS_IS_CORRUPT(mp, i != 1)) {
   2012			error = -EFSCORRUPTED;
   2013			goto error0;
   2014		}
   2015		if ((error = xfs_btree_delete(cnt_cur, &i)))
   2016			goto error0;
   2017		if (XFS_IS_CORRUPT(mp, i != 1)) {
   2018			error = -EFSCORRUPTED;
   2019			goto error0;
   2020		}
   2021		/*
   2022		 * Delete the old by-size entry on the right.
   2023		 */
   2024		if ((error = xfs_alloc_lookup_eq(cnt_cur, gtbno, gtlen, &i)))
   2025			goto error0;
   2026		if (XFS_IS_CORRUPT(mp, i != 1)) {
   2027			error = -EFSCORRUPTED;
   2028			goto error0;
   2029		}
   2030		if ((error = xfs_btree_delete(cnt_cur, &i)))
   2031			goto error0;
   2032		if (XFS_IS_CORRUPT(mp, i != 1)) {
   2033			error = -EFSCORRUPTED;
   2034			goto error0;
   2035		}
   2036		/*
   2037		 * Delete the old by-block entry for the right block.
   2038		 */
   2039		if ((error = xfs_btree_delete(bno_cur, &i)))
   2040			goto error0;
   2041		if (XFS_IS_CORRUPT(mp, i != 1)) {
   2042			error = -EFSCORRUPTED;
   2043			goto error0;
   2044		}
   2045		/*
   2046		 * Move the by-block cursor back to the left neighbor.
   2047		 */
   2048		if ((error = xfs_btree_decrement(bno_cur, 0, &i)))
   2049			goto error0;
   2050		if (XFS_IS_CORRUPT(mp, i != 1)) {
   2051			error = -EFSCORRUPTED;
   2052			goto error0;
   2053		}
   2054#ifdef DEBUG
   2055		/*
   2056		 * Check that this is the right record: delete didn't
   2057		 * mangle the cursor.
   2058		 */
   2059		{
   2060			xfs_agblock_t	xxbno;
   2061			xfs_extlen_t	xxlen;
   2062
   2063			if ((error = xfs_alloc_get_rec(bno_cur, &xxbno, &xxlen,
   2064					&i)))
   2065				goto error0;
   2066			if (XFS_IS_CORRUPT(mp,
   2067					   i != 1 ||
   2068					   xxbno != ltbno ||
   2069					   xxlen != ltlen)) {
   2070				error = -EFSCORRUPTED;
   2071				goto error0;
   2072			}
   2073		}
   2074#endif
   2075		/*
   2076		 * Update remaining by-block entry to the new, joined block.
   2077		 */
   2078		nbno = ltbno;
   2079		nlen = len + ltlen + gtlen;
   2080		if ((error = xfs_alloc_update(bno_cur, nbno, nlen)))
   2081			goto error0;
   2082	}
   2083	/*
   2084	 * Have only a left contiguous neighbor.
   2085	 * Merge it together with the new freespace.
   2086	 */
   2087	else if (haveleft) {
   2088		/*
   2089		 * Delete the old by-size entry on the left.
   2090		 */
   2091		if ((error = xfs_alloc_lookup_eq(cnt_cur, ltbno, ltlen, &i)))
   2092			goto error0;
   2093		if (XFS_IS_CORRUPT(mp, i != 1)) {
   2094			error = -EFSCORRUPTED;
   2095			goto error0;
   2096		}
   2097		if ((error = xfs_btree_delete(cnt_cur, &i)))
   2098			goto error0;
   2099		if (XFS_IS_CORRUPT(mp, i != 1)) {
   2100			error = -EFSCORRUPTED;
   2101			goto error0;
   2102		}
   2103		/*
   2104		 * Back up the by-block cursor to the left neighbor, and
   2105		 * update its length.
   2106		 */
   2107		if ((error = xfs_btree_decrement(bno_cur, 0, &i)))
   2108			goto error0;
   2109		if (XFS_IS_CORRUPT(mp, i != 1)) {
   2110			error = -EFSCORRUPTED;
   2111			goto error0;
   2112		}
   2113		nbno = ltbno;
   2114		nlen = len + ltlen;
   2115		if ((error = xfs_alloc_update(bno_cur, nbno, nlen)))
   2116			goto error0;
   2117	}
   2118	/*
   2119	 * Have only a right contiguous neighbor.
   2120	 * Merge it together with the new freespace.
   2121	 */
   2122	else if (haveright) {
   2123		/*
   2124		 * Delete the old by-size entry on the right.
   2125		 */
   2126		if ((error = xfs_alloc_lookup_eq(cnt_cur, gtbno, gtlen, &i)))
   2127			goto error0;
   2128		if (XFS_IS_CORRUPT(mp, i != 1)) {
   2129			error = -EFSCORRUPTED;
   2130			goto error0;
   2131		}
   2132		if ((error = xfs_btree_delete(cnt_cur, &i)))
   2133			goto error0;
   2134		if (XFS_IS_CORRUPT(mp, i != 1)) {
   2135			error = -EFSCORRUPTED;
   2136			goto error0;
   2137		}
   2138		/*
   2139		 * Update the starting block and length of the right
   2140		 * neighbor in the by-block tree.
   2141		 */
   2142		nbno = bno;
   2143		nlen = len + gtlen;
   2144		if ((error = xfs_alloc_update(bno_cur, nbno, nlen)))
   2145			goto error0;
   2146	}
   2147	/*
   2148	 * No contiguous neighbors.
   2149	 * Insert the new freespace into the by-block tree.
   2150	 */
   2151	else {
   2152		nbno = bno;
   2153		nlen = len;
   2154		if ((error = xfs_btree_insert(bno_cur, &i)))
   2155			goto error0;
   2156		if (XFS_IS_CORRUPT(mp, i != 1)) {
   2157			error = -EFSCORRUPTED;
   2158			goto error0;
   2159		}
   2160	}
   2161	xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR);
   2162	bno_cur = NULL;
   2163	/*
   2164	 * In all cases we need to insert the new freespace in the by-size tree.
   2165	 */
   2166	if ((error = xfs_alloc_lookup_eq(cnt_cur, nbno, nlen, &i)))
   2167		goto error0;
   2168	if (XFS_IS_CORRUPT(mp, i != 0)) {
   2169		error = -EFSCORRUPTED;
   2170		goto error0;
   2171	}
   2172	if ((error = xfs_btree_insert(cnt_cur, &i)))
   2173		goto error0;
   2174	if (XFS_IS_CORRUPT(mp, i != 1)) {
   2175		error = -EFSCORRUPTED;
   2176		goto error0;
   2177	}
   2178	xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
   2179	cnt_cur = NULL;
   2180
   2181	/*
   2182	 * Update the freespace totals in the ag and superblock.
   2183	 */
   2184	error = xfs_alloc_update_counters(tp, agbp, len);
   2185	xfs_ag_resv_free_extent(agbp->b_pag, type, tp, len);
   2186	if (error)
   2187		goto error0;
   2188
   2189	XFS_STATS_INC(mp, xs_freex);
   2190	XFS_STATS_ADD(mp, xs_freeb, len);
   2191
   2192	trace_xfs_free_extent(mp, agno, bno, len, type, haveleft, haveright);
   2193
   2194	return 0;
   2195
   2196 error0:
   2197	trace_xfs_free_extent(mp, agno, bno, len, type, -1, -1);
   2198	if (bno_cur)
   2199		xfs_btree_del_cursor(bno_cur, XFS_BTREE_ERROR);
   2200	if (cnt_cur)
   2201		xfs_btree_del_cursor(cnt_cur, XFS_BTREE_ERROR);
   2202	return error;
   2203}
   2204
   2205/*
   2206 * Visible (exported) allocation/free functions.
   2207 * Some of these are used just by xfs_alloc_btree.c and this file.
   2208 */
   2209
   2210/*
   2211 * Compute and fill in value of m_alloc_maxlevels.
   2212 */
   2213void
   2214xfs_alloc_compute_maxlevels(
   2215	xfs_mount_t	*mp)	/* file system mount structure */
   2216{
   2217	mp->m_alloc_maxlevels = xfs_btree_compute_maxlevels(mp->m_alloc_mnr,
   2218			(mp->m_sb.sb_agblocks + 1) / 2);
   2219	ASSERT(mp->m_alloc_maxlevels <= xfs_allocbt_maxlevels_ondisk());
   2220}
   2221
   2222/*
   2223 * Find the length of the longest extent in an AG.  The 'need' parameter
   2224 * specifies how much space we're going to need for the AGFL and the
   2225 * 'reserved' parameter tells us how many blocks in this AG are reserved for
   2226 * other callers.
   2227 */
   2228xfs_extlen_t
   2229xfs_alloc_longest_free_extent(
   2230	struct xfs_perag	*pag,
   2231	xfs_extlen_t		need,
   2232	xfs_extlen_t		reserved)
   2233{
   2234	xfs_extlen_t		delta = 0;
   2235
   2236	/*
   2237	 * If the AGFL needs a recharge, we'll have to subtract that from the
   2238	 * longest extent.
   2239	 */
   2240	if (need > pag->pagf_flcount)
   2241		delta = need - pag->pagf_flcount;
   2242
   2243	/*
   2244	 * If we cannot maintain others' reservations with space from the
   2245	 * not-longest freesp extents, we'll have to subtract /that/ from
   2246	 * the longest extent too.
   2247	 */
   2248	if (pag->pagf_freeblks - pag->pagf_longest < reserved)
   2249		delta += reserved - (pag->pagf_freeblks - pag->pagf_longest);
   2250
   2251	/*
   2252	 * If the longest extent is long enough to satisfy all the
   2253	 * reservations and AGFL rules in place, we can return this extent.
   2254	 */
   2255	if (pag->pagf_longest > delta)
   2256		return min_t(xfs_extlen_t, pag->pag_mount->m_ag_max_usable,
   2257				pag->pagf_longest - delta);
   2258
   2259	/* Otherwise, let the caller try for 1 block if there's space. */
   2260	return pag->pagf_flcount > 0 || pag->pagf_longest > 0;
   2261}
   2262
   2263/*
   2264 * Compute the minimum length of the AGFL in the given AG.  If @pag is NULL,
   2265 * return the largest possible minimum length.
   2266 */
   2267unsigned int
   2268xfs_alloc_min_freelist(
   2269	struct xfs_mount	*mp,
   2270	struct xfs_perag	*pag)
   2271{
   2272	/* AG btrees have at least 1 level. */
   2273	static const uint8_t	fake_levels[XFS_BTNUM_AGF] = {1, 1, 1};
   2274	const uint8_t		*levels = pag ? pag->pagf_levels : fake_levels;
   2275	unsigned int		min_free;
   2276
   2277	ASSERT(mp->m_alloc_maxlevels > 0);
   2278
   2279	/* space needed by-bno freespace btree */
   2280	min_free = min_t(unsigned int, levels[XFS_BTNUM_BNOi] + 1,
   2281				       mp->m_alloc_maxlevels);
   2282	/* space needed by-size freespace btree */
   2283	min_free += min_t(unsigned int, levels[XFS_BTNUM_CNTi] + 1,
   2284				       mp->m_alloc_maxlevels);
   2285	/* space needed reverse mapping used space btree */
   2286	if (xfs_has_rmapbt(mp))
   2287		min_free += min_t(unsigned int, levels[XFS_BTNUM_RMAPi] + 1,
   2288						mp->m_rmap_maxlevels);
   2289
   2290	return min_free;
   2291}
   2292
   2293/*
   2294 * Check if the operation we are fixing up the freelist for should go ahead or
   2295 * not. If we are freeing blocks, we always allow it, otherwise the allocation
   2296 * is dependent on whether the size and shape of free space available will
   2297 * permit the requested allocation to take place.
   2298 */
   2299static bool
   2300xfs_alloc_space_available(
   2301	struct xfs_alloc_arg	*args,
   2302	xfs_extlen_t		min_free,
   2303	int			flags)
   2304{
   2305	struct xfs_perag	*pag = args->pag;
   2306	xfs_extlen_t		alloc_len, longest;
   2307	xfs_extlen_t		reservation; /* blocks that are still reserved */
   2308	int			available;
   2309	xfs_extlen_t		agflcount;
   2310
   2311	if (flags & XFS_ALLOC_FLAG_FREEING)
   2312		return true;
   2313
   2314	reservation = xfs_ag_resv_needed(pag, args->resv);
   2315
   2316	/* do we have enough contiguous free space for the allocation? */
   2317	alloc_len = args->minlen + (args->alignment - 1) + args->minalignslop;
   2318	longest = xfs_alloc_longest_free_extent(pag, min_free, reservation);
   2319	if (longest < alloc_len)
   2320		return false;
   2321
   2322	/*
   2323	 * Do we have enough free space remaining for the allocation? Don't
   2324	 * account extra agfl blocks because we are about to defer free them,
   2325	 * making them unavailable until the current transaction commits.
   2326	 */
   2327	agflcount = min_t(xfs_extlen_t, pag->pagf_flcount, min_free);
   2328	available = (int)(pag->pagf_freeblks + agflcount -
   2329			  reservation - min_free - args->minleft);
   2330	if (available < (int)max(args->total, alloc_len))
   2331		return false;
   2332
   2333	/*
   2334	 * Clamp maxlen to the amount of free space available for the actual
   2335	 * extent allocation.
   2336	 */
   2337	if (available < (int)args->maxlen && !(flags & XFS_ALLOC_FLAG_CHECK)) {
   2338		args->maxlen = available;
   2339		ASSERT(args->maxlen > 0);
   2340		ASSERT(args->maxlen >= args->minlen);
   2341	}
   2342
   2343	return true;
   2344}
   2345
   2346int
   2347xfs_free_agfl_block(
   2348	struct xfs_trans	*tp,
   2349	xfs_agnumber_t		agno,
   2350	xfs_agblock_t		agbno,
   2351	struct xfs_buf		*agbp,
   2352	struct xfs_owner_info	*oinfo)
   2353{
   2354	int			error;
   2355	struct xfs_buf		*bp;
   2356
   2357	error = xfs_free_ag_extent(tp, agbp, agno, agbno, 1, oinfo,
   2358				   XFS_AG_RESV_AGFL);
   2359	if (error)
   2360		return error;
   2361
   2362	error = xfs_trans_get_buf(tp, tp->t_mountp->m_ddev_targp,
   2363			XFS_AGB_TO_DADDR(tp->t_mountp, agno, agbno),
   2364			tp->t_mountp->m_bsize, 0, &bp);
   2365	if (error)
   2366		return error;
   2367	xfs_trans_binval(tp, bp);
   2368
   2369	return 0;
   2370}
   2371
   2372/*
   2373 * Check the agfl fields of the agf for inconsistency or corruption. The purpose
   2374 * is to detect an agfl header padding mismatch between current and early v5
   2375 * kernels. This problem manifests as a 1-slot size difference between the
   2376 * on-disk flcount and the active [first, last] range of a wrapped agfl. This
   2377 * may also catch variants of agfl count corruption unrelated to padding. Either
   2378 * way, we'll reset the agfl and warn the user.
   2379 *
   2380 * Return true if a reset is required before the agfl can be used, false
   2381 * otherwise.
   2382 */
   2383static bool
   2384xfs_agfl_needs_reset(
   2385	struct xfs_mount	*mp,
   2386	struct xfs_agf		*agf)
   2387{
   2388	uint32_t		f = be32_to_cpu(agf->agf_flfirst);
   2389	uint32_t		l = be32_to_cpu(agf->agf_fllast);
   2390	uint32_t		c = be32_to_cpu(agf->agf_flcount);
   2391	int			agfl_size = xfs_agfl_size(mp);
   2392	int			active;
   2393
   2394	/* no agfl header on v4 supers */
   2395	if (!xfs_has_crc(mp))
   2396		return false;
   2397
   2398	/*
   2399	 * The agf read verifier catches severe corruption of these fields.
   2400	 * Repeat some sanity checks to cover a packed -> unpacked mismatch if
   2401	 * the verifier allows it.
   2402	 */
   2403	if (f >= agfl_size || l >= agfl_size)
   2404		return true;
   2405	if (c > agfl_size)
   2406		return true;
   2407
   2408	/*
   2409	 * Check consistency between the on-disk count and the active range. An
   2410	 * agfl padding mismatch manifests as an inconsistent flcount.
   2411	 */
   2412	if (c && l >= f)
   2413		active = l - f + 1;
   2414	else if (c)
   2415		active = agfl_size - f + l + 1;
   2416	else
   2417		active = 0;
   2418
   2419	return active != c;
   2420}
   2421
   2422/*
   2423 * Reset the agfl to an empty state. Ignore/drop any existing blocks since the
   2424 * agfl content cannot be trusted. Warn the user that a repair is required to
   2425 * recover leaked blocks.
   2426 *
   2427 * The purpose of this mechanism is to handle filesystems affected by the agfl
   2428 * header padding mismatch problem. A reset keeps the filesystem online with a
   2429 * relatively minor free space accounting inconsistency rather than suffer the
   2430 * inevitable crash from use of an invalid agfl block.
   2431 */
   2432static void
   2433xfs_agfl_reset(
   2434	struct xfs_trans	*tp,
   2435	struct xfs_buf		*agbp,
   2436	struct xfs_perag	*pag)
   2437{
   2438	struct xfs_mount	*mp = tp->t_mountp;
   2439	struct xfs_agf		*agf = agbp->b_addr;
   2440
   2441	ASSERT(pag->pagf_agflreset);
   2442	trace_xfs_agfl_reset(mp, agf, 0, _RET_IP_);
   2443
   2444	xfs_warn(mp,
   2445	       "WARNING: Reset corrupted AGFL on AG %u. %d blocks leaked. "
   2446	       "Please unmount and run xfs_repair.",
   2447	         pag->pag_agno, pag->pagf_flcount);
   2448
   2449	agf->agf_flfirst = 0;
   2450	agf->agf_fllast = cpu_to_be32(xfs_agfl_size(mp) - 1);
   2451	agf->agf_flcount = 0;
   2452	xfs_alloc_log_agf(tp, agbp, XFS_AGF_FLFIRST | XFS_AGF_FLLAST |
   2453				    XFS_AGF_FLCOUNT);
   2454
   2455	pag->pagf_flcount = 0;
   2456	pag->pagf_agflreset = false;
   2457}
   2458
   2459/*
   2460 * Defer an AGFL block free. This is effectively equivalent to
   2461 * xfs_free_extent_later() with some special handling particular to AGFL blocks.
   2462 *
   2463 * Deferring AGFL frees helps prevent log reservation overruns due to too many
   2464 * allocation operations in a transaction. AGFL frees are prone to this problem
   2465 * because for one they are always freed one at a time. Further, an immediate
   2466 * AGFL block free can cause a btree join and require another block free before
   2467 * the real allocation can proceed. Deferring the free disconnects freeing up
   2468 * the AGFL slot from freeing the block.
   2469 */
   2470STATIC void
   2471xfs_defer_agfl_block(
   2472	struct xfs_trans		*tp,
   2473	xfs_agnumber_t			agno,
   2474	xfs_fsblock_t			agbno,
   2475	struct xfs_owner_info		*oinfo)
   2476{
   2477	struct xfs_mount		*mp = tp->t_mountp;
   2478	struct xfs_extent_free_item	*new;		/* new element */
   2479
   2480	ASSERT(xfs_extfree_item_cache != NULL);
   2481	ASSERT(oinfo != NULL);
   2482
   2483	new = kmem_cache_zalloc(xfs_extfree_item_cache,
   2484			       GFP_KERNEL | __GFP_NOFAIL);
   2485	new->xefi_startblock = XFS_AGB_TO_FSB(mp, agno, agbno);
   2486	new->xefi_blockcount = 1;
   2487	new->xefi_owner = oinfo->oi_owner;
   2488
   2489	trace_xfs_agfl_free_defer(mp, agno, 0, agbno, 1);
   2490
   2491	xfs_defer_add(tp, XFS_DEFER_OPS_TYPE_AGFL_FREE, &new->xefi_list);
   2492}
   2493
   2494/*
   2495 * Add the extent to the list of extents to be free at transaction end.
   2496 * The list is maintained sorted (by block number).
   2497 */
   2498void
   2499__xfs_free_extent_later(
   2500	struct xfs_trans		*tp,
   2501	xfs_fsblock_t			bno,
   2502	xfs_filblks_t			len,
   2503	const struct xfs_owner_info	*oinfo,
   2504	bool				skip_discard)
   2505{
   2506	struct xfs_extent_free_item	*new;		/* new element */
   2507#ifdef DEBUG
   2508	struct xfs_mount		*mp = tp->t_mountp;
   2509	xfs_agnumber_t			agno;
   2510	xfs_agblock_t			agbno;
   2511
   2512	ASSERT(bno != NULLFSBLOCK);
   2513	ASSERT(len > 0);
   2514	ASSERT(len <= XFS_MAX_BMBT_EXTLEN);
   2515	ASSERT(!isnullstartblock(bno));
   2516	agno = XFS_FSB_TO_AGNO(mp, bno);
   2517	agbno = XFS_FSB_TO_AGBNO(mp, bno);
   2518	ASSERT(agno < mp->m_sb.sb_agcount);
   2519	ASSERT(agbno < mp->m_sb.sb_agblocks);
   2520	ASSERT(len < mp->m_sb.sb_agblocks);
   2521	ASSERT(agbno + len <= mp->m_sb.sb_agblocks);
   2522#endif
   2523	ASSERT(xfs_extfree_item_cache != NULL);
   2524
   2525	new = kmem_cache_zalloc(xfs_extfree_item_cache,
   2526			       GFP_KERNEL | __GFP_NOFAIL);
   2527	new->xefi_startblock = bno;
   2528	new->xefi_blockcount = (xfs_extlen_t)len;
   2529	if (skip_discard)
   2530		new->xefi_flags |= XFS_EFI_SKIP_DISCARD;
   2531	if (oinfo) {
   2532		ASSERT(oinfo->oi_offset == 0);
   2533
   2534		if (oinfo->oi_flags & XFS_OWNER_INFO_ATTR_FORK)
   2535			new->xefi_flags |= XFS_EFI_ATTR_FORK;
   2536		if (oinfo->oi_flags & XFS_OWNER_INFO_BMBT_BLOCK)
   2537			new->xefi_flags |= XFS_EFI_BMBT_BLOCK;
   2538		new->xefi_owner = oinfo->oi_owner;
   2539	} else {
   2540		new->xefi_owner = XFS_RMAP_OWN_NULL;
   2541	}
   2542	trace_xfs_bmap_free_defer(tp->t_mountp,
   2543			XFS_FSB_TO_AGNO(tp->t_mountp, bno), 0,
   2544			XFS_FSB_TO_AGBNO(tp->t_mountp, bno), len);
   2545	xfs_defer_add(tp, XFS_DEFER_OPS_TYPE_FREE, &new->xefi_list);
   2546}
   2547
   2548#ifdef DEBUG
   2549/*
   2550 * Check if an AGF has a free extent record whose length is equal to
   2551 * args->minlen.
   2552 */
   2553STATIC int
   2554xfs_exact_minlen_extent_available(
   2555	struct xfs_alloc_arg	*args,
   2556	struct xfs_buf		*agbp,
   2557	int			*stat)
   2558{
   2559	struct xfs_btree_cur	*cnt_cur;
   2560	xfs_agblock_t		fbno;
   2561	xfs_extlen_t		flen;
   2562	int			error = 0;
   2563
   2564	cnt_cur = xfs_allocbt_init_cursor(args->mp, args->tp, agbp,
   2565					args->pag, XFS_BTNUM_CNT);
   2566	error = xfs_alloc_lookup_ge(cnt_cur, 0, args->minlen, stat);
   2567	if (error)
   2568		goto out;
   2569
   2570	if (*stat == 0) {
   2571		error = -EFSCORRUPTED;
   2572		goto out;
   2573	}
   2574
   2575	error = xfs_alloc_get_rec(cnt_cur, &fbno, &flen, stat);
   2576	if (error)
   2577		goto out;
   2578
   2579	if (*stat == 1 && flen != args->minlen)
   2580		*stat = 0;
   2581
   2582out:
   2583	xfs_btree_del_cursor(cnt_cur, error);
   2584
   2585	return error;
   2586}
   2587#endif
   2588
   2589/*
   2590 * Decide whether to use this allocation group for this allocation.
   2591 * If so, fix up the btree freelist's size.
   2592 */
   2593int			/* error */
   2594xfs_alloc_fix_freelist(
   2595	struct xfs_alloc_arg	*args,	/* allocation argument structure */
   2596	int			flags)	/* XFS_ALLOC_FLAG_... */
   2597{
   2598	struct xfs_mount	*mp = args->mp;
   2599	struct xfs_perag	*pag = args->pag;
   2600	struct xfs_trans	*tp = args->tp;
   2601	struct xfs_buf		*agbp = NULL;
   2602	struct xfs_buf		*agflbp = NULL;
   2603	struct xfs_alloc_arg	targs;	/* local allocation arguments */
   2604	xfs_agblock_t		bno;	/* freelist block */
   2605	xfs_extlen_t		need;	/* total blocks needed in freelist */
   2606	int			error = 0;
   2607
   2608	/* deferred ops (AGFL block frees) require permanent transactions */
   2609	ASSERT(tp->t_flags & XFS_TRANS_PERM_LOG_RES);
   2610
   2611	if (!pag->pagf_init) {
   2612		error = xfs_alloc_read_agf(mp, tp, args->agno, flags, &agbp);
   2613		if (error) {
   2614			/* Couldn't lock the AGF so skip this AG. */
   2615			if (error == -EAGAIN)
   2616				error = 0;
   2617			goto out_no_agbp;
   2618		}
   2619	}
   2620
   2621	/*
   2622	 * If this is a metadata preferred pag and we are user data then try
   2623	 * somewhere else if we are not being asked to try harder at this
   2624	 * point
   2625	 */
   2626	if (pag->pagf_metadata && (args->datatype & XFS_ALLOC_USERDATA) &&
   2627	    (flags & XFS_ALLOC_FLAG_TRYLOCK)) {
   2628		ASSERT(!(flags & XFS_ALLOC_FLAG_FREEING));
   2629		goto out_agbp_relse;
   2630	}
   2631
   2632	need = xfs_alloc_min_freelist(mp, pag);
   2633	if (!xfs_alloc_space_available(args, need, flags |
   2634			XFS_ALLOC_FLAG_CHECK))
   2635		goto out_agbp_relse;
   2636
   2637	/*
   2638	 * Get the a.g. freespace buffer.
   2639	 * Can fail if we're not blocking on locks, and it's held.
   2640	 */
   2641	if (!agbp) {
   2642		error = xfs_alloc_read_agf(mp, tp, args->agno, flags, &agbp);
   2643		if (error) {
   2644			/* Couldn't lock the AGF so skip this AG. */
   2645			if (error == -EAGAIN)
   2646				error = 0;
   2647			goto out_no_agbp;
   2648		}
   2649	}
   2650
   2651	/* reset a padding mismatched agfl before final free space check */
   2652	if (pag->pagf_agflreset)
   2653		xfs_agfl_reset(tp, agbp, pag);
   2654
   2655	/* If there isn't enough total space or single-extent, reject it. */
   2656	need = xfs_alloc_min_freelist(mp, pag);
   2657	if (!xfs_alloc_space_available(args, need, flags))
   2658		goto out_agbp_relse;
   2659
   2660#ifdef DEBUG
   2661	if (args->alloc_minlen_only) {
   2662		int stat;
   2663
   2664		error = xfs_exact_minlen_extent_available(args, agbp, &stat);
   2665		if (error || !stat)
   2666			goto out_agbp_relse;
   2667	}
   2668#endif
   2669	/*
   2670	 * Make the freelist shorter if it's too long.
   2671	 *
   2672	 * Note that from this point onwards, we will always release the agf and
   2673	 * agfl buffers on error. This handles the case where we error out and
   2674	 * the buffers are clean or may not have been joined to the transaction
   2675	 * and hence need to be released manually. If they have been joined to
   2676	 * the transaction, then xfs_trans_brelse() will handle them
   2677	 * appropriately based on the recursion count and dirty state of the
   2678	 * buffer.
   2679	 *
   2680	 * XXX (dgc): When we have lots of free space, does this buy us
   2681	 * anything other than extra overhead when we need to put more blocks
   2682	 * back on the free list? Maybe we should only do this when space is
   2683	 * getting low or the AGFL is more than half full?
   2684	 *
   2685	 * The NOSHRINK flag prevents the AGFL from being shrunk if it's too
   2686	 * big; the NORMAP flag prevents AGFL expand/shrink operations from
   2687	 * updating the rmapbt.  Both flags are used in xfs_repair while we're
   2688	 * rebuilding the rmapbt, and neither are used by the kernel.  They're
   2689	 * both required to ensure that rmaps are correctly recorded for the
   2690	 * regenerated AGFL, bnobt, and cntbt.  See repair/phase5.c and
   2691	 * repair/rmap.c in xfsprogs for details.
   2692	 */
   2693	memset(&targs, 0, sizeof(targs));
   2694	/* struct copy below */
   2695	if (flags & XFS_ALLOC_FLAG_NORMAP)
   2696		targs.oinfo = XFS_RMAP_OINFO_SKIP_UPDATE;
   2697	else
   2698		targs.oinfo = XFS_RMAP_OINFO_AG;
   2699	while (!(flags & XFS_ALLOC_FLAG_NOSHRINK) && pag->pagf_flcount > need) {
   2700		error = xfs_alloc_get_freelist(tp, agbp, &bno, 0);
   2701		if (error)
   2702			goto out_agbp_relse;
   2703
   2704		/* defer agfl frees */
   2705		xfs_defer_agfl_block(tp, args->agno, bno, &targs.oinfo);
   2706	}
   2707
   2708	targs.tp = tp;
   2709	targs.mp = mp;
   2710	targs.agbp = agbp;
   2711	targs.agno = args->agno;
   2712	targs.alignment = targs.minlen = targs.prod = 1;
   2713	targs.type = XFS_ALLOCTYPE_THIS_AG;
   2714	targs.pag = pag;
   2715	error = xfs_alloc_read_agfl(mp, tp, targs.agno, &agflbp);
   2716	if (error)
   2717		goto out_agbp_relse;
   2718
   2719	/* Make the freelist longer if it's too short. */
   2720	while (pag->pagf_flcount < need) {
   2721		targs.agbno = 0;
   2722		targs.maxlen = need - pag->pagf_flcount;
   2723		targs.resv = XFS_AG_RESV_AGFL;
   2724
   2725		/* Allocate as many blocks as possible at once. */
   2726		error = xfs_alloc_ag_vextent(&targs);
   2727		if (error)
   2728			goto out_agflbp_relse;
   2729
   2730		/*
   2731		 * Stop if we run out.  Won't happen if callers are obeying
   2732		 * the restrictions correctly.  Can happen for free calls
   2733		 * on a completely full ag.
   2734		 */
   2735		if (targs.agbno == NULLAGBLOCK) {
   2736			if (flags & XFS_ALLOC_FLAG_FREEING)
   2737				break;
   2738			goto out_agflbp_relse;
   2739		}
   2740		/*
   2741		 * Put each allocated block on the list.
   2742		 */
   2743		for (bno = targs.agbno; bno < targs.agbno + targs.len; bno++) {
   2744			error = xfs_alloc_put_freelist(tp, agbp,
   2745							agflbp, bno, 0);
   2746			if (error)
   2747				goto out_agflbp_relse;
   2748		}
   2749	}
   2750	xfs_trans_brelse(tp, agflbp);
   2751	args->agbp = agbp;
   2752	return 0;
   2753
   2754out_agflbp_relse:
   2755	xfs_trans_brelse(tp, agflbp);
   2756out_agbp_relse:
   2757	if (agbp)
   2758		xfs_trans_brelse(tp, agbp);
   2759out_no_agbp:
   2760	args->agbp = NULL;
   2761	return error;
   2762}
   2763
   2764/*
   2765 * Get a block from the freelist.
   2766 * Returns with the buffer for the block gotten.
   2767 */
   2768int
   2769xfs_alloc_get_freelist(
   2770	struct xfs_trans	*tp,
   2771	struct xfs_buf		*agbp,
   2772	xfs_agblock_t		*bnop,
   2773	int			btreeblk)
   2774{
   2775	struct xfs_agf		*agf = agbp->b_addr;
   2776	struct xfs_buf		*agflbp;
   2777	xfs_agblock_t		bno;
   2778	__be32			*agfl_bno;
   2779	int			error;
   2780	uint32_t		logflags;
   2781	struct xfs_mount	*mp = tp->t_mountp;
   2782	struct xfs_perag	*pag;
   2783
   2784	/*
   2785	 * Freelist is empty, give up.
   2786	 */
   2787	if (!agf->agf_flcount) {
   2788		*bnop = NULLAGBLOCK;
   2789		return 0;
   2790	}
   2791	/*
   2792	 * Read the array of free blocks.
   2793	 */
   2794	error = xfs_alloc_read_agfl(mp, tp, be32_to_cpu(agf->agf_seqno),
   2795				    &agflbp);
   2796	if (error)
   2797		return error;
   2798
   2799
   2800	/*
   2801	 * Get the block number and update the data structures.
   2802	 */
   2803	agfl_bno = xfs_buf_to_agfl_bno(agflbp);
   2804	bno = be32_to_cpu(agfl_bno[be32_to_cpu(agf->agf_flfirst)]);
   2805	be32_add_cpu(&agf->agf_flfirst, 1);
   2806	xfs_trans_brelse(tp, agflbp);
   2807	if (be32_to_cpu(agf->agf_flfirst) == xfs_agfl_size(mp))
   2808		agf->agf_flfirst = 0;
   2809
   2810	pag = agbp->b_pag;
   2811	ASSERT(!pag->pagf_agflreset);
   2812	be32_add_cpu(&agf->agf_flcount, -1);
   2813	pag->pagf_flcount--;
   2814
   2815	logflags = XFS_AGF_FLFIRST | XFS_AGF_FLCOUNT;
   2816	if (btreeblk) {
   2817		be32_add_cpu(&agf->agf_btreeblks, 1);
   2818		pag->pagf_btreeblks++;
   2819		logflags |= XFS_AGF_BTREEBLKS;
   2820	}
   2821
   2822	xfs_alloc_log_agf(tp, agbp, logflags);
   2823	*bnop = bno;
   2824
   2825	return 0;
   2826}
   2827
   2828/*
   2829 * Log the given fields from the agf structure.
   2830 */
   2831void
   2832xfs_alloc_log_agf(
   2833	struct xfs_trans	*tp,
   2834	struct xfs_buf		*bp,
   2835	uint32_t		fields)
   2836{
   2837	int	first;		/* first byte offset */
   2838	int	last;		/* last byte offset */
   2839	static const short	offsets[] = {
   2840		offsetof(xfs_agf_t, agf_magicnum),
   2841		offsetof(xfs_agf_t, agf_versionnum),
   2842		offsetof(xfs_agf_t, agf_seqno),
   2843		offsetof(xfs_agf_t, agf_length),
   2844		offsetof(xfs_agf_t, agf_roots[0]),
   2845		offsetof(xfs_agf_t, agf_levels[0]),
   2846		offsetof(xfs_agf_t, agf_flfirst),
   2847		offsetof(xfs_agf_t, agf_fllast),
   2848		offsetof(xfs_agf_t, agf_flcount),
   2849		offsetof(xfs_agf_t, agf_freeblks),
   2850		offsetof(xfs_agf_t, agf_longest),
   2851		offsetof(xfs_agf_t, agf_btreeblks),
   2852		offsetof(xfs_agf_t, agf_uuid),
   2853		offsetof(xfs_agf_t, agf_rmap_blocks),
   2854		offsetof(xfs_agf_t, agf_refcount_blocks),
   2855		offsetof(xfs_agf_t, agf_refcount_root),
   2856		offsetof(xfs_agf_t, agf_refcount_level),
   2857		/* needed so that we don't log the whole rest of the structure: */
   2858		offsetof(xfs_agf_t, agf_spare64),
   2859		sizeof(xfs_agf_t)
   2860	};
   2861
   2862	trace_xfs_agf(tp->t_mountp, bp->b_addr, fields, _RET_IP_);
   2863
   2864	xfs_trans_buf_set_type(tp, bp, XFS_BLFT_AGF_BUF);
   2865
   2866	xfs_btree_offsets(fields, offsets, XFS_AGF_NUM_BITS, &first, &last);
   2867	xfs_trans_log_buf(tp, bp, (uint)first, (uint)last);
   2868}
   2869
   2870/*
   2871 * Interface for inode allocation to force the pag data to be initialized.
   2872 */
   2873int					/* error */
   2874xfs_alloc_pagf_init(
   2875	xfs_mount_t		*mp,	/* file system mount structure */
   2876	xfs_trans_t		*tp,	/* transaction pointer */
   2877	xfs_agnumber_t		agno,	/* allocation group number */
   2878	int			flags)	/* XFS_ALLOC_FLAGS_... */
   2879{
   2880	struct xfs_buf		*bp;
   2881	int			error;
   2882
   2883	error = xfs_alloc_read_agf(mp, tp, agno, flags, &bp);
   2884	if (!error)
   2885		xfs_trans_brelse(tp, bp);
   2886	return error;
   2887}
   2888
   2889/*
   2890 * Put the block on the freelist for the allocation group.
   2891 */
   2892int
   2893xfs_alloc_put_freelist(
   2894	struct xfs_trans	*tp,
   2895	struct xfs_buf		*agbp,
   2896	struct xfs_buf		*agflbp,
   2897	xfs_agblock_t		bno,
   2898	int			btreeblk)
   2899{
   2900	struct xfs_mount	*mp = tp->t_mountp;
   2901	struct xfs_agf		*agf = agbp->b_addr;
   2902	struct xfs_perag	*pag;
   2903	__be32			*blockp;
   2904	int			error;
   2905	uint32_t		logflags;
   2906	__be32			*agfl_bno;
   2907	int			startoff;
   2908
   2909	if (!agflbp && (error = xfs_alloc_read_agfl(mp, tp,
   2910			be32_to_cpu(agf->agf_seqno), &agflbp)))
   2911		return error;
   2912	be32_add_cpu(&agf->agf_fllast, 1);
   2913	if (be32_to_cpu(agf->agf_fllast) == xfs_agfl_size(mp))
   2914		agf->agf_fllast = 0;
   2915
   2916	pag = agbp->b_pag;
   2917	ASSERT(!pag->pagf_agflreset);
   2918	be32_add_cpu(&agf->agf_flcount, 1);
   2919	pag->pagf_flcount++;
   2920
   2921	logflags = XFS_AGF_FLLAST | XFS_AGF_FLCOUNT;
   2922	if (btreeblk) {
   2923		be32_add_cpu(&agf->agf_btreeblks, -1);
   2924		pag->pagf_btreeblks--;
   2925		logflags |= XFS_AGF_BTREEBLKS;
   2926	}
   2927
   2928	xfs_alloc_log_agf(tp, agbp, logflags);
   2929
   2930	ASSERT(be32_to_cpu(agf->agf_flcount) <= xfs_agfl_size(mp));
   2931
   2932	agfl_bno = xfs_buf_to_agfl_bno(agflbp);
   2933	blockp = &agfl_bno[be32_to_cpu(agf->agf_fllast)];
   2934	*blockp = cpu_to_be32(bno);
   2935	startoff = (char *)blockp - (char *)agflbp->b_addr;
   2936
   2937	xfs_alloc_log_agf(tp, agbp, logflags);
   2938
   2939	xfs_trans_buf_set_type(tp, agflbp, XFS_BLFT_AGFL_BUF);
   2940	xfs_trans_log_buf(tp, agflbp, startoff,
   2941			  startoff + sizeof(xfs_agblock_t) - 1);
   2942	return 0;
   2943}
   2944
   2945static xfs_failaddr_t
   2946xfs_agf_verify(
   2947	struct xfs_buf		*bp)
   2948{
   2949	struct xfs_mount	*mp = bp->b_mount;
   2950	struct xfs_agf		*agf = bp->b_addr;
   2951
   2952	if (xfs_has_crc(mp)) {
   2953		if (!uuid_equal(&agf->agf_uuid, &mp->m_sb.sb_meta_uuid))
   2954			return __this_address;
   2955		if (!xfs_log_check_lsn(mp, be64_to_cpu(agf->agf_lsn)))
   2956			return __this_address;
   2957	}
   2958
   2959	if (!xfs_verify_magic(bp, agf->agf_magicnum))
   2960		return __this_address;
   2961
   2962	if (!(XFS_AGF_GOOD_VERSION(be32_to_cpu(agf->agf_versionnum)) &&
   2963	      be32_to_cpu(agf->agf_freeblks) <= be32_to_cpu(agf->agf_length) &&
   2964	      be32_to_cpu(agf->agf_flfirst) < xfs_agfl_size(mp) &&
   2965	      be32_to_cpu(agf->agf_fllast) < xfs_agfl_size(mp) &&
   2966	      be32_to_cpu(agf->agf_flcount) <= xfs_agfl_size(mp)))
   2967		return __this_address;
   2968
   2969	if (be32_to_cpu(agf->agf_length) > mp->m_sb.sb_dblocks)
   2970		return __this_address;
   2971
   2972	if (be32_to_cpu(agf->agf_freeblks) < be32_to_cpu(agf->agf_longest) ||
   2973	    be32_to_cpu(agf->agf_freeblks) > be32_to_cpu(agf->agf_length))
   2974		return __this_address;
   2975
   2976	if (be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNO]) < 1 ||
   2977	    be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNT]) < 1 ||
   2978	    be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNO]) >
   2979						mp->m_alloc_maxlevels ||
   2980	    be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNT]) >
   2981						mp->m_alloc_maxlevels)
   2982		return __this_address;
   2983
   2984	if (xfs_has_rmapbt(mp) &&
   2985	    (be32_to_cpu(agf->agf_levels[XFS_BTNUM_RMAP]) < 1 ||
   2986	     be32_to_cpu(agf->agf_levels[XFS_BTNUM_RMAP]) >
   2987						mp->m_rmap_maxlevels))
   2988		return __this_address;
   2989
   2990	if (xfs_has_rmapbt(mp) &&
   2991	    be32_to_cpu(agf->agf_rmap_blocks) > be32_to_cpu(agf->agf_length))
   2992		return __this_address;
   2993
   2994	/*
   2995	 * during growfs operations, the perag is not fully initialised,
   2996	 * so we can't use it for any useful checking. growfs ensures we can't
   2997	 * use it by using uncached buffers that don't have the perag attached
   2998	 * so we can detect and avoid this problem.
   2999	 */
   3000	if (bp->b_pag && be32_to_cpu(agf->agf_seqno) != bp->b_pag->pag_agno)
   3001		return __this_address;
   3002
   3003	if (xfs_has_lazysbcount(mp) &&
   3004	    be32_to_cpu(agf->agf_btreeblks) > be32_to_cpu(agf->agf_length))
   3005		return __this_address;
   3006
   3007	if (xfs_has_reflink(mp) &&
   3008	    be32_to_cpu(agf->agf_refcount_blocks) >
   3009	    be32_to_cpu(agf->agf_length))
   3010		return __this_address;
   3011
   3012	if (xfs_has_reflink(mp) &&
   3013	    (be32_to_cpu(agf->agf_refcount_level) < 1 ||
   3014	     be32_to_cpu(agf->agf_refcount_level) > mp->m_refc_maxlevels))
   3015		return __this_address;
   3016
   3017	return NULL;
   3018
   3019}
   3020
   3021static void
   3022xfs_agf_read_verify(
   3023	struct xfs_buf	*bp)
   3024{
   3025	struct xfs_mount *mp = bp->b_mount;
   3026	xfs_failaddr_t	fa;
   3027
   3028	if (xfs_has_crc(mp) &&
   3029	    !xfs_buf_verify_cksum(bp, XFS_AGF_CRC_OFF))
   3030		xfs_verifier_error(bp, -EFSBADCRC, __this_address);
   3031	else {
   3032		fa = xfs_agf_verify(bp);
   3033		if (XFS_TEST_ERROR(fa, mp, XFS_ERRTAG_ALLOC_READ_AGF))
   3034			xfs_verifier_error(bp, -EFSCORRUPTED, fa);
   3035	}
   3036}
   3037
   3038static void
   3039xfs_agf_write_verify(
   3040	struct xfs_buf	*bp)
   3041{
   3042	struct xfs_mount	*mp = bp->b_mount;
   3043	struct xfs_buf_log_item	*bip = bp->b_log_item;
   3044	struct xfs_agf		*agf = bp->b_addr;
   3045	xfs_failaddr_t		fa;
   3046
   3047	fa = xfs_agf_verify(bp);
   3048	if (fa) {
   3049		xfs_verifier_error(bp, -EFSCORRUPTED, fa);
   3050		return;
   3051	}
   3052
   3053	if (!xfs_has_crc(mp))
   3054		return;
   3055
   3056	if (bip)
   3057		agf->agf_lsn = cpu_to_be64(bip->bli_item.li_lsn);
   3058
   3059	xfs_buf_update_cksum(bp, XFS_AGF_CRC_OFF);
   3060}
   3061
   3062const struct xfs_buf_ops xfs_agf_buf_ops = {
   3063	.name = "xfs_agf",
   3064	.magic = { cpu_to_be32(XFS_AGF_MAGIC), cpu_to_be32(XFS_AGF_MAGIC) },
   3065	.verify_read = xfs_agf_read_verify,
   3066	.verify_write = xfs_agf_write_verify,
   3067	.verify_struct = xfs_agf_verify,
   3068};
   3069
   3070/*
   3071 * Read in the allocation group header (free/alloc section).
   3072 */
   3073int					/* error */
   3074xfs_read_agf(
   3075	struct xfs_mount	*mp,	/* mount point structure */
   3076	struct xfs_trans	*tp,	/* transaction pointer */
   3077	xfs_agnumber_t		agno,	/* allocation group number */
   3078	int			flags,	/* XFS_BUF_ */
   3079	struct xfs_buf		**bpp)	/* buffer for the ag freelist header */
   3080{
   3081	int		error;
   3082
   3083	trace_xfs_read_agf(mp, agno);
   3084
   3085	ASSERT(agno != NULLAGNUMBER);
   3086	error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp,
   3087			XFS_AG_DADDR(mp, agno, XFS_AGF_DADDR(mp)),
   3088			XFS_FSS_TO_BB(mp, 1), flags, bpp, &xfs_agf_buf_ops);
   3089	if (error)
   3090		return error;
   3091
   3092	ASSERT(!(*bpp)->b_error);
   3093	xfs_buf_set_ref(*bpp, XFS_AGF_REF);
   3094	return 0;
   3095}
   3096
   3097/*
   3098 * Read in the allocation group header (free/alloc section).
   3099 */
   3100int					/* error */
   3101xfs_alloc_read_agf(
   3102	struct xfs_mount	*mp,	/* mount point structure */
   3103	struct xfs_trans	*tp,	/* transaction pointer */
   3104	xfs_agnumber_t		agno,	/* allocation group number */
   3105	int			flags,	/* XFS_ALLOC_FLAG_... */
   3106	struct xfs_buf		**bpp)	/* buffer for the ag freelist header */
   3107{
   3108	struct xfs_agf		*agf;		/* ag freelist header */
   3109	struct xfs_perag	*pag;		/* per allocation group data */
   3110	int			error;
   3111	int			allocbt_blks;
   3112
   3113	trace_xfs_alloc_read_agf(mp, agno);
   3114
   3115	/* We don't support trylock when freeing. */
   3116	ASSERT((flags & (XFS_ALLOC_FLAG_FREEING | XFS_ALLOC_FLAG_TRYLOCK)) !=
   3117			(XFS_ALLOC_FLAG_FREEING | XFS_ALLOC_FLAG_TRYLOCK));
   3118	ASSERT(agno != NULLAGNUMBER);
   3119	error = xfs_read_agf(mp, tp, agno,
   3120			(flags & XFS_ALLOC_FLAG_TRYLOCK) ? XBF_TRYLOCK : 0,
   3121			bpp);
   3122	if (error)
   3123		return error;
   3124	ASSERT(!(*bpp)->b_error);
   3125
   3126	agf = (*bpp)->b_addr;
   3127	pag = (*bpp)->b_pag;
   3128	if (!pag->pagf_init) {
   3129		pag->pagf_freeblks = be32_to_cpu(agf->agf_freeblks);
   3130		pag->pagf_btreeblks = be32_to_cpu(agf->agf_btreeblks);
   3131		pag->pagf_flcount = be32_to_cpu(agf->agf_flcount);
   3132		pag->pagf_longest = be32_to_cpu(agf->agf_longest);
   3133		pag->pagf_levels[XFS_BTNUM_BNOi] =
   3134			be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNOi]);
   3135		pag->pagf_levels[XFS_BTNUM_CNTi] =
   3136			be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNTi]);
   3137		pag->pagf_levels[XFS_BTNUM_RMAPi] =
   3138			be32_to_cpu(agf->agf_levels[XFS_BTNUM_RMAPi]);
   3139		pag->pagf_refcount_level = be32_to_cpu(agf->agf_refcount_level);
   3140		pag->pagf_init = 1;
   3141		pag->pagf_agflreset = xfs_agfl_needs_reset(mp, agf);
   3142
   3143		/*
   3144		 * Update the in-core allocbt counter. Filter out the rmapbt
   3145		 * subset of the btreeblks counter because the rmapbt is managed
   3146		 * by perag reservation. Subtract one for the rmapbt root block
   3147		 * because the rmap counter includes it while the btreeblks
   3148		 * counter only tracks non-root blocks.
   3149		 */
   3150		allocbt_blks = pag->pagf_btreeblks;
   3151		if (xfs_has_rmapbt(mp))
   3152			allocbt_blks -= be32_to_cpu(agf->agf_rmap_blocks) - 1;
   3153		if (allocbt_blks > 0)
   3154			atomic64_add(allocbt_blks, &mp->m_allocbt_blks);
   3155	}
   3156#ifdef DEBUG
   3157	else if (!xfs_is_shutdown(mp)) {
   3158		ASSERT(pag->pagf_freeblks == be32_to_cpu(agf->agf_freeblks));
   3159		ASSERT(pag->pagf_btreeblks == be32_to_cpu(agf->agf_btreeblks));
   3160		ASSERT(pag->pagf_flcount == be32_to_cpu(agf->agf_flcount));
   3161		ASSERT(pag->pagf_longest == be32_to_cpu(agf->agf_longest));
   3162		ASSERT(pag->pagf_levels[XFS_BTNUM_BNOi] ==
   3163		       be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNOi]));
   3164		ASSERT(pag->pagf_levels[XFS_BTNUM_CNTi] ==
   3165		       be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNTi]));
   3166	}
   3167#endif
   3168	return 0;
   3169}
   3170
   3171/*
   3172 * Allocate an extent (variable-size).
   3173 * Depending on the allocation type, we either look in a single allocation
   3174 * group or loop over the allocation groups to find the result.
   3175 */
   3176int				/* error */
   3177xfs_alloc_vextent(
   3178	struct xfs_alloc_arg	*args)	/* allocation argument structure */
   3179{
   3180	xfs_agblock_t		agsize;	/* allocation group size */
   3181	int			error;
   3182	int			flags;	/* XFS_ALLOC_FLAG_... locking flags */
   3183	struct xfs_mount	*mp;	/* mount structure pointer */
   3184	xfs_agnumber_t		sagno;	/* starting allocation group number */
   3185	xfs_alloctype_t		type;	/* input allocation type */
   3186	int			bump_rotor = 0;
   3187	xfs_agnumber_t		rotorstep = xfs_rotorstep; /* inode32 agf stepper */
   3188
   3189	mp = args->mp;
   3190	type = args->otype = args->type;
   3191	args->agbno = NULLAGBLOCK;
   3192	/*
   3193	 * Just fix this up, for the case where the last a.g. is shorter
   3194	 * (or there's only one a.g.) and the caller couldn't easily figure
   3195	 * that out (xfs_bmap_alloc).
   3196	 */
   3197	agsize = mp->m_sb.sb_agblocks;
   3198	if (args->maxlen > agsize)
   3199		args->maxlen = agsize;
   3200	if (args->alignment == 0)
   3201		args->alignment = 1;
   3202	ASSERT(XFS_FSB_TO_AGNO(mp, args->fsbno) < mp->m_sb.sb_agcount);
   3203	ASSERT(XFS_FSB_TO_AGBNO(mp, args->fsbno) < agsize);
   3204	ASSERT(args->minlen <= args->maxlen);
   3205	ASSERT(args->minlen <= agsize);
   3206	ASSERT(args->mod < args->prod);
   3207	if (XFS_FSB_TO_AGNO(mp, args->fsbno) >= mp->m_sb.sb_agcount ||
   3208	    XFS_FSB_TO_AGBNO(mp, args->fsbno) >= agsize ||
   3209	    args->minlen > args->maxlen || args->minlen > agsize ||
   3210	    args->mod >= args->prod) {
   3211		args->fsbno = NULLFSBLOCK;
   3212		trace_xfs_alloc_vextent_badargs(args);
   3213		return 0;
   3214	}
   3215
   3216	switch (type) {
   3217	case XFS_ALLOCTYPE_THIS_AG:
   3218	case XFS_ALLOCTYPE_NEAR_BNO:
   3219	case XFS_ALLOCTYPE_THIS_BNO:
   3220		/*
   3221		 * These three force us into a single a.g.
   3222		 */
   3223		args->agno = XFS_FSB_TO_AGNO(mp, args->fsbno);
   3224		args->pag = xfs_perag_get(mp, args->agno);
   3225		error = xfs_alloc_fix_freelist(args, 0);
   3226		if (error) {
   3227			trace_xfs_alloc_vextent_nofix(args);
   3228			goto error0;
   3229		}
   3230		if (!args->agbp) {
   3231			trace_xfs_alloc_vextent_noagbp(args);
   3232			break;
   3233		}
   3234		args->agbno = XFS_FSB_TO_AGBNO(mp, args->fsbno);
   3235		if ((error = xfs_alloc_ag_vextent(args)))
   3236			goto error0;
   3237		break;
   3238	case XFS_ALLOCTYPE_START_BNO:
   3239		/*
   3240		 * Try near allocation first, then anywhere-in-ag after
   3241		 * the first a.g. fails.
   3242		 */
   3243		if ((args->datatype & XFS_ALLOC_INITIAL_USER_DATA) &&
   3244		    xfs_is_inode32(mp)) {
   3245			args->fsbno = XFS_AGB_TO_FSB(mp,
   3246					((mp->m_agfrotor / rotorstep) %
   3247					mp->m_sb.sb_agcount), 0);
   3248			bump_rotor = 1;
   3249		}
   3250		args->agbno = XFS_FSB_TO_AGBNO(mp, args->fsbno);
   3251		args->type = XFS_ALLOCTYPE_NEAR_BNO;
   3252		fallthrough;
   3253	case XFS_ALLOCTYPE_FIRST_AG:
   3254		/*
   3255		 * Rotate through the allocation groups looking for a winner.
   3256		 */
   3257		if (type == XFS_ALLOCTYPE_FIRST_AG) {
   3258			/*
   3259			 * Start with allocation group given by bno.
   3260			 */
   3261			args->agno = XFS_FSB_TO_AGNO(mp, args->fsbno);
   3262			args->type = XFS_ALLOCTYPE_THIS_AG;
   3263			sagno = 0;
   3264			flags = 0;
   3265		} else {
   3266			/*
   3267			 * Start with the given allocation group.
   3268			 */
   3269			args->agno = sagno = XFS_FSB_TO_AGNO(mp, args->fsbno);
   3270			flags = XFS_ALLOC_FLAG_TRYLOCK;
   3271		}
   3272		/*
   3273		 * Loop over allocation groups twice; first time with
   3274		 * trylock set, second time without.
   3275		 */
   3276		for (;;) {
   3277			args->pag = xfs_perag_get(mp, args->agno);
   3278			error = xfs_alloc_fix_freelist(args, flags);
   3279			if (error) {
   3280				trace_xfs_alloc_vextent_nofix(args);
   3281				goto error0;
   3282			}
   3283			/*
   3284			 * If we get a buffer back then the allocation will fly.
   3285			 */
   3286			if (args->agbp) {
   3287				if ((error = xfs_alloc_ag_vextent(args)))
   3288					goto error0;
   3289				break;
   3290			}
   3291
   3292			trace_xfs_alloc_vextent_loopfailed(args);
   3293
   3294			/*
   3295			 * Didn't work, figure out the next iteration.
   3296			 */
   3297			if (args->agno == sagno &&
   3298			    type == XFS_ALLOCTYPE_START_BNO)
   3299				args->type = XFS_ALLOCTYPE_THIS_AG;
   3300			/*
   3301			* For the first allocation, we can try any AG to get
   3302			* space.  However, if we already have allocated a
   3303			* block, we don't want to try AGs whose number is below
   3304			* sagno. Otherwise, we may end up with out-of-order
   3305			* locking of AGF, which might cause deadlock.
   3306			*/
   3307			if (++(args->agno) == mp->m_sb.sb_agcount) {
   3308				if (args->tp->t_firstblock != NULLFSBLOCK)
   3309					args->agno = sagno;
   3310				else
   3311					args->agno = 0;
   3312			}
   3313			/*
   3314			 * Reached the starting a.g., must either be done
   3315			 * or switch to non-trylock mode.
   3316			 */
   3317			if (args->agno == sagno) {
   3318				if (flags == 0) {
   3319					args->agbno = NULLAGBLOCK;
   3320					trace_xfs_alloc_vextent_allfailed(args);
   3321					break;
   3322				}
   3323
   3324				flags = 0;
   3325				if (type == XFS_ALLOCTYPE_START_BNO) {
   3326					args->agbno = XFS_FSB_TO_AGBNO(mp,
   3327						args->fsbno);
   3328					args->type = XFS_ALLOCTYPE_NEAR_BNO;
   3329				}
   3330			}
   3331			xfs_perag_put(args->pag);
   3332		}
   3333		if (bump_rotor) {
   3334			if (args->agno == sagno)
   3335				mp->m_agfrotor = (mp->m_agfrotor + 1) %
   3336					(mp->m_sb.sb_agcount * rotorstep);
   3337			else
   3338				mp->m_agfrotor = (args->agno * rotorstep + 1) %
   3339					(mp->m_sb.sb_agcount * rotorstep);
   3340		}
   3341		break;
   3342	default:
   3343		ASSERT(0);
   3344		/* NOTREACHED */
   3345	}
   3346	if (args->agbno == NULLAGBLOCK)
   3347		args->fsbno = NULLFSBLOCK;
   3348	else {
   3349		args->fsbno = XFS_AGB_TO_FSB(mp, args->agno, args->agbno);
   3350#ifdef DEBUG
   3351		ASSERT(args->len >= args->minlen);
   3352		ASSERT(args->len <= args->maxlen);
   3353		ASSERT(args->agbno % args->alignment == 0);
   3354		XFS_AG_CHECK_DADDR(mp, XFS_FSB_TO_DADDR(mp, args->fsbno),
   3355			args->len);
   3356#endif
   3357
   3358	}
   3359	xfs_perag_put(args->pag);
   3360	return 0;
   3361error0:
   3362	xfs_perag_put(args->pag);
   3363	return error;
   3364}
   3365
   3366/* Ensure that the freelist is at full capacity. */
   3367int
   3368xfs_free_extent_fix_freelist(
   3369	struct xfs_trans	*tp,
   3370	struct xfs_perag	*pag,
   3371	struct xfs_buf		**agbp)
   3372{
   3373	struct xfs_alloc_arg	args;
   3374	int			error;
   3375
   3376	memset(&args, 0, sizeof(struct xfs_alloc_arg));
   3377	args.tp = tp;
   3378	args.mp = tp->t_mountp;
   3379	args.agno = pag->pag_agno;
   3380	args.pag = pag;
   3381
   3382	/*
   3383	 * validate that the block number is legal - the enables us to detect
   3384	 * and handle a silent filesystem corruption rather than crashing.
   3385	 */
   3386	if (args.agno >= args.mp->m_sb.sb_agcount)
   3387		return -EFSCORRUPTED;
   3388
   3389	error = xfs_alloc_fix_freelist(&args, XFS_ALLOC_FLAG_FREEING);
   3390	if (error)
   3391		return error;
   3392
   3393	*agbp = args.agbp;
   3394	return 0;
   3395}
   3396
   3397/*
   3398 * Free an extent.
   3399 * Just break up the extent address and hand off to xfs_free_ag_extent
   3400 * after fixing up the freelist.
   3401 */
   3402int
   3403__xfs_free_extent(
   3404	struct xfs_trans		*tp,
   3405	xfs_fsblock_t			bno,
   3406	xfs_extlen_t			len,
   3407	const struct xfs_owner_info	*oinfo,
   3408	enum xfs_ag_resv_type		type,
   3409	bool				skip_discard)
   3410{
   3411	struct xfs_mount		*mp = tp->t_mountp;
   3412	struct xfs_buf			*agbp;
   3413	xfs_agnumber_t			agno = XFS_FSB_TO_AGNO(mp, bno);
   3414	xfs_agblock_t			agbno = XFS_FSB_TO_AGBNO(mp, bno);
   3415	struct xfs_agf			*agf;
   3416	int				error;
   3417	unsigned int			busy_flags = 0;
   3418	struct xfs_perag		*pag;
   3419
   3420	ASSERT(len != 0);
   3421	ASSERT(type != XFS_AG_RESV_AGFL);
   3422
   3423	if (XFS_TEST_ERROR(false, mp,
   3424			XFS_ERRTAG_FREE_EXTENT))
   3425		return -EIO;
   3426
   3427	pag = xfs_perag_get(mp, agno);
   3428	error = xfs_free_extent_fix_freelist(tp, pag, &agbp);
   3429	if (error)
   3430		goto err;
   3431	agf = agbp->b_addr;
   3432
   3433	if (XFS_IS_CORRUPT(mp, agbno >= mp->m_sb.sb_agblocks)) {
   3434		error = -EFSCORRUPTED;
   3435		goto err_release;
   3436	}
   3437
   3438	/* validate the extent size is legal now we have the agf locked */
   3439	if (XFS_IS_CORRUPT(mp, agbno + len > be32_to_cpu(agf->agf_length))) {
   3440		error = -EFSCORRUPTED;
   3441		goto err_release;
   3442	}
   3443
   3444	error = xfs_free_ag_extent(tp, agbp, agno, agbno, len, oinfo, type);
   3445	if (error)
   3446		goto err_release;
   3447
   3448	if (skip_discard)
   3449		busy_flags |= XFS_EXTENT_BUSY_SKIP_DISCARD;
   3450	xfs_extent_busy_insert(tp, pag, agbno, len, busy_flags);
   3451	xfs_perag_put(pag);
   3452	return 0;
   3453
   3454err_release:
   3455	xfs_trans_brelse(tp, agbp);
   3456err:
   3457	xfs_perag_put(pag);
   3458	return error;
   3459}
   3460
   3461struct xfs_alloc_query_range_info {
   3462	xfs_alloc_query_range_fn	fn;
   3463	void				*priv;
   3464};
   3465
   3466/* Format btree record and pass to our callback. */
   3467STATIC int
   3468xfs_alloc_query_range_helper(
   3469	struct xfs_btree_cur		*cur,
   3470	const union xfs_btree_rec	*rec,
   3471	void				*priv)
   3472{
   3473	struct xfs_alloc_query_range_info	*query = priv;
   3474	struct xfs_alloc_rec_incore		irec;
   3475
   3476	irec.ar_startblock = be32_to_cpu(rec->alloc.ar_startblock);
   3477	irec.ar_blockcount = be32_to_cpu(rec->alloc.ar_blockcount);
   3478	return query->fn(cur, &irec, query->priv);
   3479}
   3480
   3481/* Find all free space within a given range of blocks. */
   3482int
   3483xfs_alloc_query_range(
   3484	struct xfs_btree_cur			*cur,
   3485	const struct xfs_alloc_rec_incore	*low_rec,
   3486	const struct xfs_alloc_rec_incore	*high_rec,
   3487	xfs_alloc_query_range_fn		fn,
   3488	void					*priv)
   3489{
   3490	union xfs_btree_irec			low_brec;
   3491	union xfs_btree_irec			high_brec;
   3492	struct xfs_alloc_query_range_info	query;
   3493
   3494	ASSERT(cur->bc_btnum == XFS_BTNUM_BNO);
   3495	low_brec.a = *low_rec;
   3496	high_brec.a = *high_rec;
   3497	query.priv = priv;
   3498	query.fn = fn;
   3499	return xfs_btree_query_range(cur, &low_brec, &high_brec,
   3500			xfs_alloc_query_range_helper, &query);
   3501}
   3502
   3503/* Find all free space records. */
   3504int
   3505xfs_alloc_query_all(
   3506	struct xfs_btree_cur			*cur,
   3507	xfs_alloc_query_range_fn		fn,
   3508	void					*priv)
   3509{
   3510	struct xfs_alloc_query_range_info	query;
   3511
   3512	ASSERT(cur->bc_btnum == XFS_BTNUM_BNO);
   3513	query.priv = priv;
   3514	query.fn = fn;
   3515	return xfs_btree_query_all(cur, xfs_alloc_query_range_helper, &query);
   3516}
   3517
   3518/* Is there a record covering a given extent? */
   3519int
   3520xfs_alloc_has_record(
   3521	struct xfs_btree_cur	*cur,
   3522	xfs_agblock_t		bno,
   3523	xfs_extlen_t		len,
   3524	bool			*exists)
   3525{
   3526	union xfs_btree_irec	low;
   3527	union xfs_btree_irec	high;
   3528
   3529	memset(&low, 0, sizeof(low));
   3530	low.a.ar_startblock = bno;
   3531	memset(&high, 0xFF, sizeof(high));
   3532	high.a.ar_startblock = bno + len - 1;
   3533
   3534	return xfs_btree_has_record(cur, &low, &high, exists);
   3535}
   3536
   3537/*
   3538 * Walk all the blocks in the AGFL.  The @walk_fn can return any negative
   3539 * error code or XFS_ITER_*.
   3540 */
   3541int
   3542xfs_agfl_walk(
   3543	struct xfs_mount	*mp,
   3544	struct xfs_agf		*agf,
   3545	struct xfs_buf		*agflbp,
   3546	xfs_agfl_walk_fn	walk_fn,
   3547	void			*priv)
   3548{
   3549	__be32			*agfl_bno;
   3550	unsigned int		i;
   3551	int			error;
   3552
   3553	agfl_bno = xfs_buf_to_agfl_bno(agflbp);
   3554	i = be32_to_cpu(agf->agf_flfirst);
   3555
   3556	/* Nothing to walk in an empty AGFL. */
   3557	if (agf->agf_flcount == cpu_to_be32(0))
   3558		return 0;
   3559
   3560	/* Otherwise, walk from first to last, wrapping as needed. */
   3561	for (;;) {
   3562		error = walk_fn(mp, be32_to_cpu(agfl_bno[i]), priv);
   3563		if (error)
   3564			return error;
   3565		if (i == be32_to_cpu(agf->agf_fllast))
   3566			break;
   3567		if (++i == xfs_agfl_size(mp))
   3568			i = 0;
   3569	}
   3570
   3571	return 0;
   3572}
   3573
   3574int __init
   3575xfs_extfree_intent_init_cache(void)
   3576{
   3577	xfs_extfree_item_cache = kmem_cache_create("xfs_extfree_intent",
   3578			sizeof(struct xfs_extent_free_item),
   3579			0, 0, NULL);
   3580
   3581	return xfs_extfree_item_cache != NULL ? 0 : -ENOMEM;
   3582}
   3583
   3584void
   3585xfs_extfree_intent_destroy_cache(void)
   3586{
   3587	kmem_cache_destroy(xfs_extfree_item_cache);
   3588	xfs_extfree_item_cache = NULL;
   3589}