cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

xfs_bmap.c (169145B)


      1// SPDX-License-Identifier: GPL-2.0
      2/*
      3 * Copyright (c) 2000-2006 Silicon Graphics, Inc.
      4 * All Rights Reserved.
      5 */
      6#include "xfs.h"
      7#include "xfs_fs.h"
      8#include "xfs_shared.h"
      9#include "xfs_format.h"
     10#include "xfs_log_format.h"
     11#include "xfs_trans_resv.h"
     12#include "xfs_bit.h"
     13#include "xfs_sb.h"
     14#include "xfs_mount.h"
     15#include "xfs_defer.h"
     16#include "xfs_dir2.h"
     17#include "xfs_inode.h"
     18#include "xfs_btree.h"
     19#include "xfs_trans.h"
     20#include "xfs_alloc.h"
     21#include "xfs_bmap.h"
     22#include "xfs_bmap_util.h"
     23#include "xfs_bmap_btree.h"
     24#include "xfs_rtalloc.h"
     25#include "xfs_errortag.h"
     26#include "xfs_error.h"
     27#include "xfs_quota.h"
     28#include "xfs_trans_space.h"
     29#include "xfs_buf_item.h"
     30#include "xfs_trace.h"
     31#include "xfs_attr_leaf.h"
     32#include "xfs_filestream.h"
     33#include "xfs_rmap.h"
     34#include "xfs_ag.h"
     35#include "xfs_ag_resv.h"
     36#include "xfs_refcount.h"
     37#include "xfs_icache.h"
     38#include "xfs_iomap.h"
     39
     40struct kmem_cache		*xfs_bmap_intent_cache;
     41
     42/*
     43 * Miscellaneous helper functions
     44 */
     45
     46/*
     47 * Compute and fill in the value of the maximum depth of a bmap btree
     48 * in this filesystem.  Done once, during mount.
     49 */
     50void
     51xfs_bmap_compute_maxlevels(
     52	xfs_mount_t	*mp,		/* file system mount structure */
     53	int		whichfork)	/* data or attr fork */
     54{
     55	uint64_t	maxblocks;	/* max blocks at this level */
     56	xfs_extnum_t	maxleafents;	/* max leaf entries possible */
     57	int		level;		/* btree level */
     58	int		maxrootrecs;	/* max records in root block */
     59	int		minleafrecs;	/* min records in leaf block */
     60	int		minnoderecs;	/* min records in node block */
     61	int		sz;		/* root block size */
     62
     63	/*
     64	 * The maximum number of extents in a fork, hence the maximum number of
     65	 * leaf entries, is controlled by the size of the on-disk extent count.
     66	 *
     67	 * Note that we can no longer assume that if we are in ATTR1 that the
     68	 * fork offset of all the inodes will be
     69	 * (xfs_default_attroffset(ip) >> 3) because we could have mounted with
     70	 * ATTR2 and then mounted back with ATTR1, keeping the i_forkoff's fixed
     71	 * but probably at various positions. Therefore, for both ATTR1 and
     72	 * ATTR2 we have to assume the worst case scenario of a minimum size
     73	 * available.
     74	 */
     75	maxleafents = xfs_iext_max_nextents(xfs_has_large_extent_counts(mp),
     76				whichfork);
     77	if (whichfork == XFS_DATA_FORK)
     78		sz = XFS_BMDR_SPACE_CALC(MINDBTPTRS);
     79	else
     80		sz = XFS_BMDR_SPACE_CALC(MINABTPTRS);
     81
     82	maxrootrecs = xfs_bmdr_maxrecs(sz, 0);
     83	minleafrecs = mp->m_bmap_dmnr[0];
     84	minnoderecs = mp->m_bmap_dmnr[1];
     85	maxblocks = howmany_64(maxleafents, minleafrecs);
     86	for (level = 1; maxblocks > 1; level++) {
     87		if (maxblocks <= maxrootrecs)
     88			maxblocks = 1;
     89		else
     90			maxblocks = howmany_64(maxblocks, minnoderecs);
     91	}
     92	mp->m_bm_maxlevels[whichfork] = level;
     93	ASSERT(mp->m_bm_maxlevels[whichfork] <= xfs_bmbt_maxlevels_ondisk());
     94}
     95
     96unsigned int
     97xfs_bmap_compute_attr_offset(
     98	struct xfs_mount	*mp)
     99{
    100	if (mp->m_sb.sb_inodesize == 256)
    101		return XFS_LITINO(mp) - XFS_BMDR_SPACE_CALC(MINABTPTRS);
    102	return XFS_BMDR_SPACE_CALC(6 * MINABTPTRS);
    103}
    104
    105STATIC int				/* error */
    106xfs_bmbt_lookup_eq(
    107	struct xfs_btree_cur	*cur,
    108	struct xfs_bmbt_irec	*irec,
    109	int			*stat)	/* success/failure */
    110{
    111	cur->bc_rec.b = *irec;
    112	return xfs_btree_lookup(cur, XFS_LOOKUP_EQ, stat);
    113}
    114
    115STATIC int				/* error */
    116xfs_bmbt_lookup_first(
    117	struct xfs_btree_cur	*cur,
    118	int			*stat)	/* success/failure */
    119{
    120	cur->bc_rec.b.br_startoff = 0;
    121	cur->bc_rec.b.br_startblock = 0;
    122	cur->bc_rec.b.br_blockcount = 0;
    123	return xfs_btree_lookup(cur, XFS_LOOKUP_GE, stat);
    124}
    125
    126/*
    127 * Check if the inode needs to be converted to btree format.
    128 */
    129static inline bool xfs_bmap_needs_btree(struct xfs_inode *ip, int whichfork)
    130{
    131	struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, whichfork);
    132
    133	return whichfork != XFS_COW_FORK &&
    134		ifp->if_format == XFS_DINODE_FMT_EXTENTS &&
    135		ifp->if_nextents > XFS_IFORK_MAXEXT(ip, whichfork);
    136}
    137
    138/*
    139 * Check if the inode should be converted to extent format.
    140 */
    141static inline bool xfs_bmap_wants_extents(struct xfs_inode *ip, int whichfork)
    142{
    143	struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, whichfork);
    144
    145	return whichfork != XFS_COW_FORK &&
    146		ifp->if_format == XFS_DINODE_FMT_BTREE &&
    147		ifp->if_nextents <= XFS_IFORK_MAXEXT(ip, whichfork);
    148}
    149
    150/*
    151 * Update the record referred to by cur to the value given by irec
    152 * This either works (return 0) or gets an EFSCORRUPTED error.
    153 */
    154STATIC int
    155xfs_bmbt_update(
    156	struct xfs_btree_cur	*cur,
    157	struct xfs_bmbt_irec	*irec)
    158{
    159	union xfs_btree_rec	rec;
    160
    161	xfs_bmbt_disk_set_all(&rec.bmbt, irec);
    162	return xfs_btree_update(cur, &rec);
    163}
    164
    165/*
    166 * Compute the worst-case number of indirect blocks that will be used
    167 * for ip's delayed extent of length "len".
    168 */
    169STATIC xfs_filblks_t
    170xfs_bmap_worst_indlen(
    171	xfs_inode_t	*ip,		/* incore inode pointer */
    172	xfs_filblks_t	len)		/* delayed extent length */
    173{
    174	int		level;		/* btree level number */
    175	int		maxrecs;	/* maximum record count at this level */
    176	xfs_mount_t	*mp;		/* mount structure */
    177	xfs_filblks_t	rval;		/* return value */
    178
    179	mp = ip->i_mount;
    180	maxrecs = mp->m_bmap_dmxr[0];
    181	for (level = 0, rval = 0;
    182	     level < XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK);
    183	     level++) {
    184		len += maxrecs - 1;
    185		do_div(len, maxrecs);
    186		rval += len;
    187		if (len == 1)
    188			return rval + XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK) -
    189				level - 1;
    190		if (level == 0)
    191			maxrecs = mp->m_bmap_dmxr[1];
    192	}
    193	return rval;
    194}
    195
    196/*
    197 * Calculate the default attribute fork offset for newly created inodes.
    198 */
    199uint
    200xfs_default_attroffset(
    201	struct xfs_inode	*ip)
    202{
    203	if (ip->i_df.if_format == XFS_DINODE_FMT_DEV)
    204		return roundup(sizeof(xfs_dev_t), 8);
    205	return M_IGEO(ip->i_mount)->attr_fork_offset;
    206}
    207
    208/*
    209 * Helper routine to reset inode i_forkoff field when switching attribute fork
    210 * from local to extent format - we reset it where possible to make space
    211 * available for inline data fork extents.
    212 */
    213STATIC void
    214xfs_bmap_forkoff_reset(
    215	xfs_inode_t	*ip,
    216	int		whichfork)
    217{
    218	if (whichfork == XFS_ATTR_FORK &&
    219	    ip->i_df.if_format != XFS_DINODE_FMT_DEV &&
    220	    ip->i_df.if_format != XFS_DINODE_FMT_BTREE) {
    221		uint	dfl_forkoff = xfs_default_attroffset(ip) >> 3;
    222
    223		if (dfl_forkoff > ip->i_forkoff)
    224			ip->i_forkoff = dfl_forkoff;
    225	}
    226}
    227
    228#ifdef DEBUG
    229STATIC struct xfs_buf *
    230xfs_bmap_get_bp(
    231	struct xfs_btree_cur	*cur,
    232	xfs_fsblock_t		bno)
    233{
    234	struct xfs_log_item	*lip;
    235	int			i;
    236
    237	if (!cur)
    238		return NULL;
    239
    240	for (i = 0; i < cur->bc_maxlevels; i++) {
    241		if (!cur->bc_levels[i].bp)
    242			break;
    243		if (xfs_buf_daddr(cur->bc_levels[i].bp) == bno)
    244			return cur->bc_levels[i].bp;
    245	}
    246
    247	/* Chase down all the log items to see if the bp is there */
    248	list_for_each_entry(lip, &cur->bc_tp->t_items, li_trans) {
    249		struct xfs_buf_log_item	*bip = (struct xfs_buf_log_item *)lip;
    250
    251		if (bip->bli_item.li_type == XFS_LI_BUF &&
    252		    xfs_buf_daddr(bip->bli_buf) == bno)
    253			return bip->bli_buf;
    254	}
    255
    256	return NULL;
    257}
    258
    259STATIC void
    260xfs_check_block(
    261	struct xfs_btree_block	*block,
    262	xfs_mount_t		*mp,
    263	int			root,
    264	short			sz)
    265{
    266	int			i, j, dmxr;
    267	__be64			*pp, *thispa;	/* pointer to block address */
    268	xfs_bmbt_key_t		*prevp, *keyp;
    269
    270	ASSERT(be16_to_cpu(block->bb_level) > 0);
    271
    272	prevp = NULL;
    273	for( i = 1; i <= xfs_btree_get_numrecs(block); i++) {
    274		dmxr = mp->m_bmap_dmxr[0];
    275		keyp = XFS_BMBT_KEY_ADDR(mp, block, i);
    276
    277		if (prevp) {
    278			ASSERT(be64_to_cpu(prevp->br_startoff) <
    279			       be64_to_cpu(keyp->br_startoff));
    280		}
    281		prevp = keyp;
    282
    283		/*
    284		 * Compare the block numbers to see if there are dups.
    285		 */
    286		if (root)
    287			pp = XFS_BMAP_BROOT_PTR_ADDR(mp, block, i, sz);
    288		else
    289			pp = XFS_BMBT_PTR_ADDR(mp, block, i, dmxr);
    290
    291		for (j = i+1; j <= be16_to_cpu(block->bb_numrecs); j++) {
    292			if (root)
    293				thispa = XFS_BMAP_BROOT_PTR_ADDR(mp, block, j, sz);
    294			else
    295				thispa = XFS_BMBT_PTR_ADDR(mp, block, j, dmxr);
    296			if (*thispa == *pp) {
    297				xfs_warn(mp, "%s: thispa(%d) == pp(%d) %Ld",
    298					__func__, j, i,
    299					(unsigned long long)be64_to_cpu(*thispa));
    300				xfs_err(mp, "%s: ptrs are equal in node\n",
    301					__func__);
    302				xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
    303			}
    304		}
    305	}
    306}
    307
    308/*
    309 * Check that the extents for the inode ip are in the right order in all
    310 * btree leaves. THis becomes prohibitively expensive for large extent count
    311 * files, so don't bother with inodes that have more than 10,000 extents in
    312 * them. The btree record ordering checks will still be done, so for such large
    313 * bmapbt constructs that is going to catch most corruptions.
    314 */
    315STATIC void
    316xfs_bmap_check_leaf_extents(
    317	struct xfs_btree_cur	*cur,	/* btree cursor or null */
    318	xfs_inode_t		*ip,		/* incore inode pointer */
    319	int			whichfork)	/* data or attr fork */
    320{
    321	struct xfs_mount	*mp = ip->i_mount;
    322	struct xfs_ifork	*ifp = XFS_IFORK_PTR(ip, whichfork);
    323	struct xfs_btree_block	*block;	/* current btree block */
    324	xfs_fsblock_t		bno;	/* block # of "block" */
    325	struct xfs_buf		*bp;	/* buffer for "block" */
    326	int			error;	/* error return value */
    327	xfs_extnum_t		i=0, j;	/* index into the extents list */
    328	int			level;	/* btree level, for checking */
    329	__be64			*pp;	/* pointer to block address */
    330	xfs_bmbt_rec_t		*ep;	/* pointer to current extent */
    331	xfs_bmbt_rec_t		last = {0, 0}; /* last extent in prev block */
    332	xfs_bmbt_rec_t		*nextp;	/* pointer to next extent */
    333	int			bp_release = 0;
    334
    335	if (ifp->if_format != XFS_DINODE_FMT_BTREE)
    336		return;
    337
    338	/* skip large extent count inodes */
    339	if (ip->i_df.if_nextents > 10000)
    340		return;
    341
    342	bno = NULLFSBLOCK;
    343	block = ifp->if_broot;
    344	/*
    345	 * Root level must use BMAP_BROOT_PTR_ADDR macro to get ptr out.
    346	 */
    347	level = be16_to_cpu(block->bb_level);
    348	ASSERT(level > 0);
    349	xfs_check_block(block, mp, 1, ifp->if_broot_bytes);
    350	pp = XFS_BMAP_BROOT_PTR_ADDR(mp, block, 1, ifp->if_broot_bytes);
    351	bno = be64_to_cpu(*pp);
    352
    353	ASSERT(bno != NULLFSBLOCK);
    354	ASSERT(XFS_FSB_TO_AGNO(mp, bno) < mp->m_sb.sb_agcount);
    355	ASSERT(XFS_FSB_TO_AGBNO(mp, bno) < mp->m_sb.sb_agblocks);
    356
    357	/*
    358	 * Go down the tree until leaf level is reached, following the first
    359	 * pointer (leftmost) at each level.
    360	 */
    361	while (level-- > 0) {
    362		/* See if buf is in cur first */
    363		bp_release = 0;
    364		bp = xfs_bmap_get_bp(cur, XFS_FSB_TO_DADDR(mp, bno));
    365		if (!bp) {
    366			bp_release = 1;
    367			error = xfs_btree_read_bufl(mp, NULL, bno, &bp,
    368						XFS_BMAP_BTREE_REF,
    369						&xfs_bmbt_buf_ops);
    370			if (error)
    371				goto error_norelse;
    372		}
    373		block = XFS_BUF_TO_BLOCK(bp);
    374		if (level == 0)
    375			break;
    376
    377		/*
    378		 * Check this block for basic sanity (increasing keys and
    379		 * no duplicate blocks).
    380		 */
    381
    382		xfs_check_block(block, mp, 0, 0);
    383		pp = XFS_BMBT_PTR_ADDR(mp, block, 1, mp->m_bmap_dmxr[1]);
    384		bno = be64_to_cpu(*pp);
    385		if (XFS_IS_CORRUPT(mp, !xfs_verify_fsbno(mp, bno))) {
    386			error = -EFSCORRUPTED;
    387			goto error0;
    388		}
    389		if (bp_release) {
    390			bp_release = 0;
    391			xfs_trans_brelse(NULL, bp);
    392		}
    393	}
    394
    395	/*
    396	 * Here with bp and block set to the leftmost leaf node in the tree.
    397	 */
    398	i = 0;
    399
    400	/*
    401	 * Loop over all leaf nodes checking that all extents are in the right order.
    402	 */
    403	for (;;) {
    404		xfs_fsblock_t	nextbno;
    405		xfs_extnum_t	num_recs;
    406
    407
    408		num_recs = xfs_btree_get_numrecs(block);
    409
    410		/*
    411		 * Read-ahead the next leaf block, if any.
    412		 */
    413
    414		nextbno = be64_to_cpu(block->bb_u.l.bb_rightsib);
    415
    416		/*
    417		 * Check all the extents to make sure they are OK.
    418		 * If we had a previous block, the last entry should
    419		 * conform with the first entry in this one.
    420		 */
    421
    422		ep = XFS_BMBT_REC_ADDR(mp, block, 1);
    423		if (i) {
    424			ASSERT(xfs_bmbt_disk_get_startoff(&last) +
    425			       xfs_bmbt_disk_get_blockcount(&last) <=
    426			       xfs_bmbt_disk_get_startoff(ep));
    427		}
    428		for (j = 1; j < num_recs; j++) {
    429			nextp = XFS_BMBT_REC_ADDR(mp, block, j + 1);
    430			ASSERT(xfs_bmbt_disk_get_startoff(ep) +
    431			       xfs_bmbt_disk_get_blockcount(ep) <=
    432			       xfs_bmbt_disk_get_startoff(nextp));
    433			ep = nextp;
    434		}
    435
    436		last = *ep;
    437		i += num_recs;
    438		if (bp_release) {
    439			bp_release = 0;
    440			xfs_trans_brelse(NULL, bp);
    441		}
    442		bno = nextbno;
    443		/*
    444		 * If we've reached the end, stop.
    445		 */
    446		if (bno == NULLFSBLOCK)
    447			break;
    448
    449		bp_release = 0;
    450		bp = xfs_bmap_get_bp(cur, XFS_FSB_TO_DADDR(mp, bno));
    451		if (!bp) {
    452			bp_release = 1;
    453			error = xfs_btree_read_bufl(mp, NULL, bno, &bp,
    454						XFS_BMAP_BTREE_REF,
    455						&xfs_bmbt_buf_ops);
    456			if (error)
    457				goto error_norelse;
    458		}
    459		block = XFS_BUF_TO_BLOCK(bp);
    460	}
    461
    462	return;
    463
    464error0:
    465	xfs_warn(mp, "%s: at error0", __func__);
    466	if (bp_release)
    467		xfs_trans_brelse(NULL, bp);
    468error_norelse:
    469	xfs_warn(mp, "%s: BAD after btree leaves for %llu extents",
    470		__func__, i);
    471	xfs_err(mp, "%s: CORRUPTED BTREE OR SOMETHING", __func__);
    472	xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
    473	return;
    474}
    475
    476/*
    477 * Validate that the bmbt_irecs being returned from bmapi are valid
    478 * given the caller's original parameters.  Specifically check the
    479 * ranges of the returned irecs to ensure that they only extend beyond
    480 * the given parameters if the XFS_BMAPI_ENTIRE flag was set.
    481 */
    482STATIC void
    483xfs_bmap_validate_ret(
    484	xfs_fileoff_t		bno,
    485	xfs_filblks_t		len,
    486	uint32_t		flags,
    487	xfs_bmbt_irec_t		*mval,
    488	int			nmap,
    489	int			ret_nmap)
    490{
    491	int			i;		/* index to map values */
    492
    493	ASSERT(ret_nmap <= nmap);
    494
    495	for (i = 0; i < ret_nmap; i++) {
    496		ASSERT(mval[i].br_blockcount > 0);
    497		if (!(flags & XFS_BMAPI_ENTIRE)) {
    498			ASSERT(mval[i].br_startoff >= bno);
    499			ASSERT(mval[i].br_blockcount <= len);
    500			ASSERT(mval[i].br_startoff + mval[i].br_blockcount <=
    501			       bno + len);
    502		} else {
    503			ASSERT(mval[i].br_startoff < bno + len);
    504			ASSERT(mval[i].br_startoff + mval[i].br_blockcount >
    505			       bno);
    506		}
    507		ASSERT(i == 0 ||
    508		       mval[i - 1].br_startoff + mval[i - 1].br_blockcount ==
    509		       mval[i].br_startoff);
    510		ASSERT(mval[i].br_startblock != DELAYSTARTBLOCK &&
    511		       mval[i].br_startblock != HOLESTARTBLOCK);
    512		ASSERT(mval[i].br_state == XFS_EXT_NORM ||
    513		       mval[i].br_state == XFS_EXT_UNWRITTEN);
    514	}
    515}
    516
    517#else
    518#define xfs_bmap_check_leaf_extents(cur, ip, whichfork)		do { } while (0)
    519#define	xfs_bmap_validate_ret(bno,len,flags,mval,onmap,nmap)	do { } while (0)
    520#endif /* DEBUG */
    521
    522/*
    523 * Inode fork format manipulation functions
    524 */
    525
    526/*
    527 * Convert the inode format to extent format if it currently is in btree format,
    528 * but the extent list is small enough that it fits into the extent format.
    529 *
    530 * Since the extents are already in-core, all we have to do is give up the space
    531 * for the btree root and pitch the leaf block.
    532 */
    533STATIC int				/* error */
    534xfs_bmap_btree_to_extents(
    535	struct xfs_trans	*tp,	/* transaction pointer */
    536	struct xfs_inode	*ip,	/* incore inode pointer */
    537	struct xfs_btree_cur	*cur,	/* btree cursor */
    538	int			*logflagsp, /* inode logging flags */
    539	int			whichfork)  /* data or attr fork */
    540{
    541	struct xfs_ifork	*ifp = XFS_IFORK_PTR(ip, whichfork);
    542	struct xfs_mount	*mp = ip->i_mount;
    543	struct xfs_btree_block	*rblock = ifp->if_broot;
    544	struct xfs_btree_block	*cblock;/* child btree block */
    545	xfs_fsblock_t		cbno;	/* child block number */
    546	struct xfs_buf		*cbp;	/* child block's buffer */
    547	int			error;	/* error return value */
    548	__be64			*pp;	/* ptr to block address */
    549	struct xfs_owner_info	oinfo;
    550
    551	/* check if we actually need the extent format first: */
    552	if (!xfs_bmap_wants_extents(ip, whichfork))
    553		return 0;
    554
    555	ASSERT(cur);
    556	ASSERT(whichfork != XFS_COW_FORK);
    557	ASSERT(ifp->if_format == XFS_DINODE_FMT_BTREE);
    558	ASSERT(be16_to_cpu(rblock->bb_level) == 1);
    559	ASSERT(be16_to_cpu(rblock->bb_numrecs) == 1);
    560	ASSERT(xfs_bmbt_maxrecs(mp, ifp->if_broot_bytes, 0) == 1);
    561
    562	pp = XFS_BMAP_BROOT_PTR_ADDR(mp, rblock, 1, ifp->if_broot_bytes);
    563	cbno = be64_to_cpu(*pp);
    564#ifdef DEBUG
    565	if (XFS_IS_CORRUPT(cur->bc_mp, !xfs_btree_check_lptr(cur, cbno, 1)))
    566		return -EFSCORRUPTED;
    567#endif
    568	error = xfs_btree_read_bufl(mp, tp, cbno, &cbp, XFS_BMAP_BTREE_REF,
    569				&xfs_bmbt_buf_ops);
    570	if (error)
    571		return error;
    572	cblock = XFS_BUF_TO_BLOCK(cbp);
    573	if ((error = xfs_btree_check_block(cur, cblock, 0, cbp)))
    574		return error;
    575	xfs_rmap_ino_bmbt_owner(&oinfo, ip->i_ino, whichfork);
    576	xfs_free_extent_later(cur->bc_tp, cbno, 1, &oinfo);
    577	ip->i_nblocks--;
    578	xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_BCOUNT, -1L);
    579	xfs_trans_binval(tp, cbp);
    580	if (cur->bc_levels[0].bp == cbp)
    581		cur->bc_levels[0].bp = NULL;
    582	xfs_iroot_realloc(ip, -1, whichfork);
    583	ASSERT(ifp->if_broot == NULL);
    584	ifp->if_format = XFS_DINODE_FMT_EXTENTS;
    585	*logflagsp |= XFS_ILOG_CORE | xfs_ilog_fext(whichfork);
    586	return 0;
    587}
    588
    589/*
    590 * Convert an extents-format file into a btree-format file.
    591 * The new file will have a root block (in the inode) and a single child block.
    592 */
    593STATIC int					/* error */
    594xfs_bmap_extents_to_btree(
    595	struct xfs_trans	*tp,		/* transaction pointer */
    596	struct xfs_inode	*ip,		/* incore inode pointer */
    597	struct xfs_btree_cur	**curp,		/* cursor returned to caller */
    598	int			wasdel,		/* converting a delayed alloc */
    599	int			*logflagsp,	/* inode logging flags */
    600	int			whichfork)	/* data or attr fork */
    601{
    602	struct xfs_btree_block	*ablock;	/* allocated (child) bt block */
    603	struct xfs_buf		*abp;		/* buffer for ablock */
    604	struct xfs_alloc_arg	args;		/* allocation arguments */
    605	struct xfs_bmbt_rec	*arp;		/* child record pointer */
    606	struct xfs_btree_block	*block;		/* btree root block */
    607	struct xfs_btree_cur	*cur;		/* bmap btree cursor */
    608	int			error;		/* error return value */
    609	struct xfs_ifork	*ifp;		/* inode fork pointer */
    610	struct xfs_bmbt_key	*kp;		/* root block key pointer */
    611	struct xfs_mount	*mp;		/* mount structure */
    612	xfs_bmbt_ptr_t		*pp;		/* root block address pointer */
    613	struct xfs_iext_cursor	icur;
    614	struct xfs_bmbt_irec	rec;
    615	xfs_extnum_t		cnt = 0;
    616
    617	mp = ip->i_mount;
    618	ASSERT(whichfork != XFS_COW_FORK);
    619	ifp = XFS_IFORK_PTR(ip, whichfork);
    620	ASSERT(ifp->if_format == XFS_DINODE_FMT_EXTENTS);
    621
    622	/*
    623	 * Make space in the inode incore. This needs to be undone if we fail
    624	 * to expand the root.
    625	 */
    626	xfs_iroot_realloc(ip, 1, whichfork);
    627
    628	/*
    629	 * Fill in the root.
    630	 */
    631	block = ifp->if_broot;
    632	xfs_btree_init_block_int(mp, block, XFS_BUF_DADDR_NULL,
    633				 XFS_BTNUM_BMAP, 1, 1, ip->i_ino,
    634				 XFS_BTREE_LONG_PTRS);
    635	/*
    636	 * Need a cursor.  Can't allocate until bb_level is filled in.
    637	 */
    638	cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork);
    639	cur->bc_ino.flags = wasdel ? XFS_BTCUR_BMBT_WASDEL : 0;
    640	/*
    641	 * Convert to a btree with two levels, one record in root.
    642	 */
    643	ifp->if_format = XFS_DINODE_FMT_BTREE;
    644	memset(&args, 0, sizeof(args));
    645	args.tp = tp;
    646	args.mp = mp;
    647	xfs_rmap_ino_bmbt_owner(&args.oinfo, ip->i_ino, whichfork);
    648	if (tp->t_firstblock == NULLFSBLOCK) {
    649		args.type = XFS_ALLOCTYPE_START_BNO;
    650		args.fsbno = XFS_INO_TO_FSB(mp, ip->i_ino);
    651	} else if (tp->t_flags & XFS_TRANS_LOWMODE) {
    652		args.type = XFS_ALLOCTYPE_START_BNO;
    653		args.fsbno = tp->t_firstblock;
    654	} else {
    655		args.type = XFS_ALLOCTYPE_NEAR_BNO;
    656		args.fsbno = tp->t_firstblock;
    657	}
    658	args.minlen = args.maxlen = args.prod = 1;
    659	args.wasdel = wasdel;
    660	*logflagsp = 0;
    661	error = xfs_alloc_vextent(&args);
    662	if (error)
    663		goto out_root_realloc;
    664
    665	if (WARN_ON_ONCE(args.fsbno == NULLFSBLOCK)) {
    666		error = -ENOSPC;
    667		goto out_root_realloc;
    668	}
    669
    670	/*
    671	 * Allocation can't fail, the space was reserved.
    672	 */
    673	ASSERT(tp->t_firstblock == NULLFSBLOCK ||
    674	       args.agno >= XFS_FSB_TO_AGNO(mp, tp->t_firstblock));
    675	tp->t_firstblock = args.fsbno;
    676	cur->bc_ino.allocated++;
    677	ip->i_nblocks++;
    678	xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_BCOUNT, 1L);
    679	error = xfs_trans_get_buf(tp, mp->m_ddev_targp,
    680			XFS_FSB_TO_DADDR(mp, args.fsbno),
    681			mp->m_bsize, 0, &abp);
    682	if (error)
    683		goto out_unreserve_dquot;
    684
    685	/*
    686	 * Fill in the child block.
    687	 */
    688	abp->b_ops = &xfs_bmbt_buf_ops;
    689	ablock = XFS_BUF_TO_BLOCK(abp);
    690	xfs_btree_init_block_int(mp, ablock, xfs_buf_daddr(abp),
    691				XFS_BTNUM_BMAP, 0, 0, ip->i_ino,
    692				XFS_BTREE_LONG_PTRS);
    693
    694	for_each_xfs_iext(ifp, &icur, &rec) {
    695		if (isnullstartblock(rec.br_startblock))
    696			continue;
    697		arp = XFS_BMBT_REC_ADDR(mp, ablock, 1 + cnt);
    698		xfs_bmbt_disk_set_all(arp, &rec);
    699		cnt++;
    700	}
    701	ASSERT(cnt == ifp->if_nextents);
    702	xfs_btree_set_numrecs(ablock, cnt);
    703
    704	/*
    705	 * Fill in the root key and pointer.
    706	 */
    707	kp = XFS_BMBT_KEY_ADDR(mp, block, 1);
    708	arp = XFS_BMBT_REC_ADDR(mp, ablock, 1);
    709	kp->br_startoff = cpu_to_be64(xfs_bmbt_disk_get_startoff(arp));
    710	pp = XFS_BMBT_PTR_ADDR(mp, block, 1, xfs_bmbt_get_maxrecs(cur,
    711						be16_to_cpu(block->bb_level)));
    712	*pp = cpu_to_be64(args.fsbno);
    713
    714	/*
    715	 * Do all this logging at the end so that
    716	 * the root is at the right level.
    717	 */
    718	xfs_btree_log_block(cur, abp, XFS_BB_ALL_BITS);
    719	xfs_btree_log_recs(cur, abp, 1, be16_to_cpu(ablock->bb_numrecs));
    720	ASSERT(*curp == NULL);
    721	*curp = cur;
    722	*logflagsp = XFS_ILOG_CORE | xfs_ilog_fbroot(whichfork);
    723	return 0;
    724
    725out_unreserve_dquot:
    726	xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_BCOUNT, -1L);
    727out_root_realloc:
    728	xfs_iroot_realloc(ip, -1, whichfork);
    729	ifp->if_format = XFS_DINODE_FMT_EXTENTS;
    730	ASSERT(ifp->if_broot == NULL);
    731	xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
    732
    733	return error;
    734}
    735
    736/*
    737 * Convert a local file to an extents file.
    738 * This code is out of bounds for data forks of regular files,
    739 * since the file data needs to get logged so things will stay consistent.
    740 * (The bmap-level manipulations are ok, though).
    741 */
    742void
    743xfs_bmap_local_to_extents_empty(
    744	struct xfs_trans	*tp,
    745	struct xfs_inode	*ip,
    746	int			whichfork)
    747{
    748	struct xfs_ifork	*ifp = XFS_IFORK_PTR(ip, whichfork);
    749
    750	ASSERT(whichfork != XFS_COW_FORK);
    751	ASSERT(ifp->if_format == XFS_DINODE_FMT_LOCAL);
    752	ASSERT(ifp->if_bytes == 0);
    753	ASSERT(ifp->if_nextents == 0);
    754
    755	xfs_bmap_forkoff_reset(ip, whichfork);
    756	ifp->if_u1.if_root = NULL;
    757	ifp->if_height = 0;
    758	ifp->if_format = XFS_DINODE_FMT_EXTENTS;
    759	xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
    760}
    761
    762
    763STATIC int				/* error */
    764xfs_bmap_local_to_extents(
    765	xfs_trans_t	*tp,		/* transaction pointer */
    766	xfs_inode_t	*ip,		/* incore inode pointer */
    767	xfs_extlen_t	total,		/* total blocks needed by transaction */
    768	int		*logflagsp,	/* inode logging flags */
    769	int		whichfork,
    770	void		(*init_fn)(struct xfs_trans *tp,
    771				   struct xfs_buf *bp,
    772				   struct xfs_inode *ip,
    773				   struct xfs_ifork *ifp))
    774{
    775	int		error = 0;
    776	int		flags;		/* logging flags returned */
    777	struct xfs_ifork *ifp;		/* inode fork pointer */
    778	xfs_alloc_arg_t	args;		/* allocation arguments */
    779	struct xfs_buf	*bp;		/* buffer for extent block */
    780	struct xfs_bmbt_irec rec;
    781	struct xfs_iext_cursor icur;
    782
    783	/*
    784	 * We don't want to deal with the case of keeping inode data inline yet.
    785	 * So sending the data fork of a regular inode is invalid.
    786	 */
    787	ASSERT(!(S_ISREG(VFS_I(ip)->i_mode) && whichfork == XFS_DATA_FORK));
    788	ifp = XFS_IFORK_PTR(ip, whichfork);
    789	ASSERT(ifp->if_format == XFS_DINODE_FMT_LOCAL);
    790
    791	if (!ifp->if_bytes) {
    792		xfs_bmap_local_to_extents_empty(tp, ip, whichfork);
    793		flags = XFS_ILOG_CORE;
    794		goto done;
    795	}
    796
    797	flags = 0;
    798	error = 0;
    799	memset(&args, 0, sizeof(args));
    800	args.tp = tp;
    801	args.mp = ip->i_mount;
    802	xfs_rmap_ino_owner(&args.oinfo, ip->i_ino, whichfork, 0);
    803	/*
    804	 * Allocate a block.  We know we need only one, since the
    805	 * file currently fits in an inode.
    806	 */
    807	if (tp->t_firstblock == NULLFSBLOCK) {
    808		args.fsbno = XFS_INO_TO_FSB(args.mp, ip->i_ino);
    809		args.type = XFS_ALLOCTYPE_START_BNO;
    810	} else {
    811		args.fsbno = tp->t_firstblock;
    812		args.type = XFS_ALLOCTYPE_NEAR_BNO;
    813	}
    814	args.total = total;
    815	args.minlen = args.maxlen = args.prod = 1;
    816	error = xfs_alloc_vextent(&args);
    817	if (error)
    818		goto done;
    819
    820	/* Can't fail, the space was reserved. */
    821	ASSERT(args.fsbno != NULLFSBLOCK);
    822	ASSERT(args.len == 1);
    823	tp->t_firstblock = args.fsbno;
    824	error = xfs_trans_get_buf(tp, args.mp->m_ddev_targp,
    825			XFS_FSB_TO_DADDR(args.mp, args.fsbno),
    826			args.mp->m_bsize, 0, &bp);
    827	if (error)
    828		goto done;
    829
    830	/*
    831	 * Initialize the block, copy the data and log the remote buffer.
    832	 *
    833	 * The callout is responsible for logging because the remote format
    834	 * might differ from the local format and thus we don't know how much to
    835	 * log here. Note that init_fn must also set the buffer log item type
    836	 * correctly.
    837	 */
    838	init_fn(tp, bp, ip, ifp);
    839
    840	/* account for the change in fork size */
    841	xfs_idata_realloc(ip, -ifp->if_bytes, whichfork);
    842	xfs_bmap_local_to_extents_empty(tp, ip, whichfork);
    843	flags |= XFS_ILOG_CORE;
    844
    845	ifp->if_u1.if_root = NULL;
    846	ifp->if_height = 0;
    847
    848	rec.br_startoff = 0;
    849	rec.br_startblock = args.fsbno;
    850	rec.br_blockcount = 1;
    851	rec.br_state = XFS_EXT_NORM;
    852	xfs_iext_first(ifp, &icur);
    853	xfs_iext_insert(ip, &icur, &rec, 0);
    854
    855	ifp->if_nextents = 1;
    856	ip->i_nblocks = 1;
    857	xfs_trans_mod_dquot_byino(tp, ip,
    858		XFS_TRANS_DQ_BCOUNT, 1L);
    859	flags |= xfs_ilog_fext(whichfork);
    860
    861done:
    862	*logflagsp = flags;
    863	return error;
    864}
    865
    866/*
    867 * Called from xfs_bmap_add_attrfork to handle btree format files.
    868 */
    869STATIC int					/* error */
    870xfs_bmap_add_attrfork_btree(
    871	xfs_trans_t		*tp,		/* transaction pointer */
    872	xfs_inode_t		*ip,		/* incore inode pointer */
    873	int			*flags)		/* inode logging flags */
    874{
    875	struct xfs_btree_block	*block = ip->i_df.if_broot;
    876	struct xfs_btree_cur	*cur;		/* btree cursor */
    877	int			error;		/* error return value */
    878	xfs_mount_t		*mp;		/* file system mount struct */
    879	int			stat;		/* newroot status */
    880
    881	mp = ip->i_mount;
    882
    883	if (XFS_BMAP_BMDR_SPACE(block) <= XFS_IFORK_DSIZE(ip))
    884		*flags |= XFS_ILOG_DBROOT;
    885	else {
    886		cur = xfs_bmbt_init_cursor(mp, tp, ip, XFS_DATA_FORK);
    887		error = xfs_bmbt_lookup_first(cur, &stat);
    888		if (error)
    889			goto error0;
    890		/* must be at least one entry */
    891		if (XFS_IS_CORRUPT(mp, stat != 1)) {
    892			error = -EFSCORRUPTED;
    893			goto error0;
    894		}
    895		if ((error = xfs_btree_new_iroot(cur, flags, &stat)))
    896			goto error0;
    897		if (stat == 0) {
    898			xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
    899			return -ENOSPC;
    900		}
    901		cur->bc_ino.allocated = 0;
    902		xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
    903	}
    904	return 0;
    905error0:
    906	xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
    907	return error;
    908}
    909
    910/*
    911 * Called from xfs_bmap_add_attrfork to handle extents format files.
    912 */
    913STATIC int					/* error */
    914xfs_bmap_add_attrfork_extents(
    915	struct xfs_trans	*tp,		/* transaction pointer */
    916	struct xfs_inode	*ip,		/* incore inode pointer */
    917	int			*flags)		/* inode logging flags */
    918{
    919	struct xfs_btree_cur	*cur;		/* bmap btree cursor */
    920	int			error;		/* error return value */
    921
    922	if (ip->i_df.if_nextents * sizeof(struct xfs_bmbt_rec) <=
    923	    XFS_IFORK_DSIZE(ip))
    924		return 0;
    925	cur = NULL;
    926	error = xfs_bmap_extents_to_btree(tp, ip, &cur, 0, flags,
    927					  XFS_DATA_FORK);
    928	if (cur) {
    929		cur->bc_ino.allocated = 0;
    930		xfs_btree_del_cursor(cur, error);
    931	}
    932	return error;
    933}
    934
    935/*
    936 * Called from xfs_bmap_add_attrfork to handle local format files. Each
    937 * different data fork content type needs a different callout to do the
    938 * conversion. Some are basic and only require special block initialisation
    939 * callouts for the data formating, others (directories) are so specialised they
    940 * handle everything themselves.
    941 *
    942 * XXX (dgc): investigate whether directory conversion can use the generic
    943 * formatting callout. It should be possible - it's just a very complex
    944 * formatter.
    945 */
    946STATIC int					/* error */
    947xfs_bmap_add_attrfork_local(
    948	struct xfs_trans	*tp,		/* transaction pointer */
    949	struct xfs_inode	*ip,		/* incore inode pointer */
    950	int			*flags)		/* inode logging flags */
    951{
    952	struct xfs_da_args	dargs;		/* args for dir/attr code */
    953
    954	if (ip->i_df.if_bytes <= XFS_IFORK_DSIZE(ip))
    955		return 0;
    956
    957	if (S_ISDIR(VFS_I(ip)->i_mode)) {
    958		memset(&dargs, 0, sizeof(dargs));
    959		dargs.geo = ip->i_mount->m_dir_geo;
    960		dargs.dp = ip;
    961		dargs.total = dargs.geo->fsbcount;
    962		dargs.whichfork = XFS_DATA_FORK;
    963		dargs.trans = tp;
    964		return xfs_dir2_sf_to_block(&dargs);
    965	}
    966
    967	if (S_ISLNK(VFS_I(ip)->i_mode))
    968		return xfs_bmap_local_to_extents(tp, ip, 1, flags,
    969						 XFS_DATA_FORK,
    970						 xfs_symlink_local_to_remote);
    971
    972	/* should only be called for types that support local format data */
    973	ASSERT(0);
    974	return -EFSCORRUPTED;
    975}
    976
    977/*
    978 * Set an inode attr fork offset based on the format of the data fork.
    979 */
    980static int
    981xfs_bmap_set_attrforkoff(
    982	struct xfs_inode	*ip,
    983	int			size,
    984	int			*version)
    985{
    986	int			default_size = xfs_default_attroffset(ip) >> 3;
    987
    988	switch (ip->i_df.if_format) {
    989	case XFS_DINODE_FMT_DEV:
    990		ip->i_forkoff = default_size;
    991		break;
    992	case XFS_DINODE_FMT_LOCAL:
    993	case XFS_DINODE_FMT_EXTENTS:
    994	case XFS_DINODE_FMT_BTREE:
    995		ip->i_forkoff = xfs_attr_shortform_bytesfit(ip, size);
    996		if (!ip->i_forkoff)
    997			ip->i_forkoff = default_size;
    998		else if (xfs_has_attr2(ip->i_mount) && version)
    999			*version = 2;
   1000		break;
   1001	default:
   1002		ASSERT(0);
   1003		return -EINVAL;
   1004	}
   1005
   1006	return 0;
   1007}
   1008
   1009/*
   1010 * Convert inode from non-attributed to attributed.
   1011 * Must not be in a transaction, ip must not be locked.
   1012 */
   1013int						/* error code */
   1014xfs_bmap_add_attrfork(
   1015	xfs_inode_t		*ip,		/* incore inode pointer */
   1016	int			size,		/* space new attribute needs */
   1017	int			rsvd)		/* xact may use reserved blks */
   1018{
   1019	xfs_mount_t		*mp;		/* mount structure */
   1020	xfs_trans_t		*tp;		/* transaction pointer */
   1021	int			blks;		/* space reservation */
   1022	int			version = 1;	/* superblock attr version */
   1023	int			logflags;	/* logging flags */
   1024	int			error;		/* error return value */
   1025
   1026	ASSERT(XFS_IFORK_Q(ip) == 0);
   1027
   1028	mp = ip->i_mount;
   1029	ASSERT(!XFS_NOT_DQATTACHED(mp, ip));
   1030
   1031	blks = XFS_ADDAFORK_SPACE_RES(mp);
   1032
   1033	error = xfs_trans_alloc_inode(ip, &M_RES(mp)->tr_addafork, blks, 0,
   1034			rsvd, &tp);
   1035	if (error)
   1036		return error;
   1037	if (XFS_IFORK_Q(ip))
   1038		goto trans_cancel;
   1039
   1040	xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
   1041	error = xfs_bmap_set_attrforkoff(ip, size, &version);
   1042	if (error)
   1043		goto trans_cancel;
   1044	ASSERT(ip->i_afp == NULL);
   1045
   1046	ip->i_afp = xfs_ifork_alloc(XFS_DINODE_FMT_EXTENTS, 0);
   1047	logflags = 0;
   1048	switch (ip->i_df.if_format) {
   1049	case XFS_DINODE_FMT_LOCAL:
   1050		error = xfs_bmap_add_attrfork_local(tp, ip, &logflags);
   1051		break;
   1052	case XFS_DINODE_FMT_EXTENTS:
   1053		error = xfs_bmap_add_attrfork_extents(tp, ip, &logflags);
   1054		break;
   1055	case XFS_DINODE_FMT_BTREE:
   1056		error = xfs_bmap_add_attrfork_btree(tp, ip, &logflags);
   1057		break;
   1058	default:
   1059		error = 0;
   1060		break;
   1061	}
   1062	if (logflags)
   1063		xfs_trans_log_inode(tp, ip, logflags);
   1064	if (error)
   1065		goto trans_cancel;
   1066	if (!xfs_has_attr(mp) ||
   1067	   (!xfs_has_attr2(mp) && version == 2)) {
   1068		bool log_sb = false;
   1069
   1070		spin_lock(&mp->m_sb_lock);
   1071		if (!xfs_has_attr(mp)) {
   1072			xfs_add_attr(mp);
   1073			log_sb = true;
   1074		}
   1075		if (!xfs_has_attr2(mp) && version == 2) {
   1076			xfs_add_attr2(mp);
   1077			log_sb = true;
   1078		}
   1079		spin_unlock(&mp->m_sb_lock);
   1080		if (log_sb)
   1081			xfs_log_sb(tp);
   1082	}
   1083
   1084	error = xfs_trans_commit(tp);
   1085	xfs_iunlock(ip, XFS_ILOCK_EXCL);
   1086	return error;
   1087
   1088trans_cancel:
   1089	xfs_trans_cancel(tp);
   1090	xfs_iunlock(ip, XFS_ILOCK_EXCL);
   1091	return error;
   1092}
   1093
   1094/*
   1095 * Internal and external extent tree search functions.
   1096 */
   1097
   1098struct xfs_iread_state {
   1099	struct xfs_iext_cursor	icur;
   1100	xfs_extnum_t		loaded;
   1101};
   1102
   1103/* Stuff every bmbt record from this block into the incore extent map. */
   1104static int
   1105xfs_iread_bmbt_block(
   1106	struct xfs_btree_cur	*cur,
   1107	int			level,
   1108	void			*priv)
   1109{
   1110	struct xfs_iread_state	*ir = priv;
   1111	struct xfs_mount	*mp = cur->bc_mp;
   1112	struct xfs_inode	*ip = cur->bc_ino.ip;
   1113	struct xfs_btree_block	*block;
   1114	struct xfs_buf		*bp;
   1115	struct xfs_bmbt_rec	*frp;
   1116	xfs_extnum_t		num_recs;
   1117	xfs_extnum_t		j;
   1118	int			whichfork = cur->bc_ino.whichfork;
   1119	struct xfs_ifork	*ifp = XFS_IFORK_PTR(ip, whichfork);
   1120
   1121	block = xfs_btree_get_block(cur, level, &bp);
   1122
   1123	/* Abort if we find more records than nextents. */
   1124	num_recs = xfs_btree_get_numrecs(block);
   1125	if (unlikely(ir->loaded + num_recs > ifp->if_nextents)) {
   1126		xfs_warn(ip->i_mount, "corrupt dinode %llu, (btree extents).",
   1127				(unsigned long long)ip->i_ino);
   1128		xfs_inode_verifier_error(ip, -EFSCORRUPTED, __func__, block,
   1129				sizeof(*block), __this_address);
   1130		return -EFSCORRUPTED;
   1131	}
   1132
   1133	/* Copy records into the incore cache. */
   1134	frp = XFS_BMBT_REC_ADDR(mp, block, 1);
   1135	for (j = 0; j < num_recs; j++, frp++, ir->loaded++) {
   1136		struct xfs_bmbt_irec	new;
   1137		xfs_failaddr_t		fa;
   1138
   1139		xfs_bmbt_disk_get_all(frp, &new);
   1140		fa = xfs_bmap_validate_extent(ip, whichfork, &new);
   1141		if (fa) {
   1142			xfs_inode_verifier_error(ip, -EFSCORRUPTED,
   1143					"xfs_iread_extents(2)", frp,
   1144					sizeof(*frp), fa);
   1145			return -EFSCORRUPTED;
   1146		}
   1147		xfs_iext_insert(ip, &ir->icur, &new,
   1148				xfs_bmap_fork_to_state(whichfork));
   1149		trace_xfs_read_extent(ip, &ir->icur,
   1150				xfs_bmap_fork_to_state(whichfork), _THIS_IP_);
   1151		xfs_iext_next(ifp, &ir->icur);
   1152	}
   1153
   1154	return 0;
   1155}
   1156
   1157/*
   1158 * Read in extents from a btree-format inode.
   1159 */
   1160int
   1161xfs_iread_extents(
   1162	struct xfs_trans	*tp,
   1163	struct xfs_inode	*ip,
   1164	int			whichfork)
   1165{
   1166	struct xfs_iread_state	ir;
   1167	struct xfs_ifork	*ifp = XFS_IFORK_PTR(ip, whichfork);
   1168	struct xfs_mount	*mp = ip->i_mount;
   1169	struct xfs_btree_cur	*cur;
   1170	int			error;
   1171
   1172	if (!xfs_need_iread_extents(ifp))
   1173		return 0;
   1174
   1175	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
   1176
   1177	ir.loaded = 0;
   1178	xfs_iext_first(ifp, &ir.icur);
   1179	cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork);
   1180	error = xfs_btree_visit_blocks(cur, xfs_iread_bmbt_block,
   1181			XFS_BTREE_VISIT_RECORDS, &ir);
   1182	xfs_btree_del_cursor(cur, error);
   1183	if (error)
   1184		goto out;
   1185
   1186	if (XFS_IS_CORRUPT(mp, ir.loaded != ifp->if_nextents)) {
   1187		error = -EFSCORRUPTED;
   1188		goto out;
   1189	}
   1190	ASSERT(ir.loaded == xfs_iext_count(ifp));
   1191	return 0;
   1192out:
   1193	xfs_iext_destroy(ifp);
   1194	return error;
   1195}
   1196
   1197/*
   1198 * Returns the relative block number of the first unused block(s) in the given
   1199 * fork with at least "len" logically contiguous blocks free.  This is the
   1200 * lowest-address hole if the fork has holes, else the first block past the end
   1201 * of fork.  Return 0 if the fork is currently local (in-inode).
   1202 */
   1203int						/* error */
   1204xfs_bmap_first_unused(
   1205	struct xfs_trans	*tp,		/* transaction pointer */
   1206	struct xfs_inode	*ip,		/* incore inode */
   1207	xfs_extlen_t		len,		/* size of hole to find */
   1208	xfs_fileoff_t		*first_unused,	/* unused block */
   1209	int			whichfork)	/* data or attr fork */
   1210{
   1211	struct xfs_ifork	*ifp = XFS_IFORK_PTR(ip, whichfork);
   1212	struct xfs_bmbt_irec	got;
   1213	struct xfs_iext_cursor	icur;
   1214	xfs_fileoff_t		lastaddr = 0;
   1215	xfs_fileoff_t		lowest, max;
   1216	int			error;
   1217
   1218	if (ifp->if_format == XFS_DINODE_FMT_LOCAL) {
   1219		*first_unused = 0;
   1220		return 0;
   1221	}
   1222
   1223	ASSERT(xfs_ifork_has_extents(ifp));
   1224
   1225	error = xfs_iread_extents(tp, ip, whichfork);
   1226	if (error)
   1227		return error;
   1228
   1229	lowest = max = *first_unused;
   1230	for_each_xfs_iext(ifp, &icur, &got) {
   1231		/*
   1232		 * See if the hole before this extent will work.
   1233		 */
   1234		if (got.br_startoff >= lowest + len &&
   1235		    got.br_startoff - max >= len)
   1236			break;
   1237		lastaddr = got.br_startoff + got.br_blockcount;
   1238		max = XFS_FILEOFF_MAX(lastaddr, lowest);
   1239	}
   1240
   1241	*first_unused = max;
   1242	return 0;
   1243}
   1244
   1245/*
   1246 * Returns the file-relative block number of the last block - 1 before
   1247 * last_block (input value) in the file.
   1248 * This is not based on i_size, it is based on the extent records.
   1249 * Returns 0 for local files, as they do not have extent records.
   1250 */
   1251int						/* error */
   1252xfs_bmap_last_before(
   1253	struct xfs_trans	*tp,		/* transaction pointer */
   1254	struct xfs_inode	*ip,		/* incore inode */
   1255	xfs_fileoff_t		*last_block,	/* last block */
   1256	int			whichfork)	/* data or attr fork */
   1257{
   1258	struct xfs_ifork	*ifp = XFS_IFORK_PTR(ip, whichfork);
   1259	struct xfs_bmbt_irec	got;
   1260	struct xfs_iext_cursor	icur;
   1261	int			error;
   1262
   1263	switch (ifp->if_format) {
   1264	case XFS_DINODE_FMT_LOCAL:
   1265		*last_block = 0;
   1266		return 0;
   1267	case XFS_DINODE_FMT_BTREE:
   1268	case XFS_DINODE_FMT_EXTENTS:
   1269		break;
   1270	default:
   1271		ASSERT(0);
   1272		return -EFSCORRUPTED;
   1273	}
   1274
   1275	error = xfs_iread_extents(tp, ip, whichfork);
   1276	if (error)
   1277		return error;
   1278
   1279	if (!xfs_iext_lookup_extent_before(ip, ifp, last_block, &icur, &got))
   1280		*last_block = 0;
   1281	return 0;
   1282}
   1283
   1284int
   1285xfs_bmap_last_extent(
   1286	struct xfs_trans	*tp,
   1287	struct xfs_inode	*ip,
   1288	int			whichfork,
   1289	struct xfs_bmbt_irec	*rec,
   1290	int			*is_empty)
   1291{
   1292	struct xfs_ifork	*ifp = XFS_IFORK_PTR(ip, whichfork);
   1293	struct xfs_iext_cursor	icur;
   1294	int			error;
   1295
   1296	error = xfs_iread_extents(tp, ip, whichfork);
   1297	if (error)
   1298		return error;
   1299
   1300	xfs_iext_last(ifp, &icur);
   1301	if (!xfs_iext_get_extent(ifp, &icur, rec))
   1302		*is_empty = 1;
   1303	else
   1304		*is_empty = 0;
   1305	return 0;
   1306}
   1307
   1308/*
   1309 * Check the last inode extent to determine whether this allocation will result
   1310 * in blocks being allocated at the end of the file. When we allocate new data
   1311 * blocks at the end of the file which do not start at the previous data block,
   1312 * we will try to align the new blocks at stripe unit boundaries.
   1313 *
   1314 * Returns 1 in bma->aeof if the file (fork) is empty as any new write will be
   1315 * at, or past the EOF.
   1316 */
   1317STATIC int
   1318xfs_bmap_isaeof(
   1319	struct xfs_bmalloca	*bma,
   1320	int			whichfork)
   1321{
   1322	struct xfs_bmbt_irec	rec;
   1323	int			is_empty;
   1324	int			error;
   1325
   1326	bma->aeof = false;
   1327	error = xfs_bmap_last_extent(NULL, bma->ip, whichfork, &rec,
   1328				     &is_empty);
   1329	if (error)
   1330		return error;
   1331
   1332	if (is_empty) {
   1333		bma->aeof = true;
   1334		return 0;
   1335	}
   1336
   1337	/*
   1338	 * Check if we are allocation or past the last extent, or at least into
   1339	 * the last delayed allocated extent.
   1340	 */
   1341	bma->aeof = bma->offset >= rec.br_startoff + rec.br_blockcount ||
   1342		(bma->offset >= rec.br_startoff &&
   1343		 isnullstartblock(rec.br_startblock));
   1344	return 0;
   1345}
   1346
   1347/*
   1348 * Returns the file-relative block number of the first block past eof in
   1349 * the file.  This is not based on i_size, it is based on the extent records.
   1350 * Returns 0 for local files, as they do not have extent records.
   1351 */
   1352int
   1353xfs_bmap_last_offset(
   1354	struct xfs_inode	*ip,
   1355	xfs_fileoff_t		*last_block,
   1356	int			whichfork)
   1357{
   1358	struct xfs_ifork	*ifp = XFS_IFORK_PTR(ip, whichfork);
   1359	struct xfs_bmbt_irec	rec;
   1360	int			is_empty;
   1361	int			error;
   1362
   1363	*last_block = 0;
   1364
   1365	if (ifp->if_format == XFS_DINODE_FMT_LOCAL)
   1366		return 0;
   1367
   1368	if (XFS_IS_CORRUPT(ip->i_mount, !xfs_ifork_has_extents(ifp)))
   1369		return -EFSCORRUPTED;
   1370
   1371	error = xfs_bmap_last_extent(NULL, ip, whichfork, &rec, &is_empty);
   1372	if (error || is_empty)
   1373		return error;
   1374
   1375	*last_block = rec.br_startoff + rec.br_blockcount;
   1376	return 0;
   1377}
   1378
   1379/*
   1380 * Extent tree manipulation functions used during allocation.
   1381 */
   1382
   1383/*
   1384 * Convert a delayed allocation to a real allocation.
   1385 */
   1386STATIC int				/* error */
   1387xfs_bmap_add_extent_delay_real(
   1388	struct xfs_bmalloca	*bma,
   1389	int			whichfork)
   1390{
   1391	struct xfs_mount	*mp = bma->ip->i_mount;
   1392	struct xfs_ifork	*ifp = XFS_IFORK_PTR(bma->ip, whichfork);
   1393	struct xfs_bmbt_irec	*new = &bma->got;
   1394	int			error;	/* error return value */
   1395	int			i;	/* temp state */
   1396	xfs_fileoff_t		new_endoff;	/* end offset of new entry */
   1397	xfs_bmbt_irec_t		r[3];	/* neighbor extent entries */
   1398					/* left is 0, right is 1, prev is 2 */
   1399	int			rval=0;	/* return value (logging flags) */
   1400	uint32_t		state = xfs_bmap_fork_to_state(whichfork);
   1401	xfs_filblks_t		da_new; /* new count del alloc blocks used */
   1402	xfs_filblks_t		da_old; /* old count del alloc blocks used */
   1403	xfs_filblks_t		temp=0;	/* value for da_new calculations */
   1404	int			tmp_rval;	/* partial logging flags */
   1405	struct xfs_bmbt_irec	old;
   1406
   1407	ASSERT(whichfork != XFS_ATTR_FORK);
   1408	ASSERT(!isnullstartblock(new->br_startblock));
   1409	ASSERT(!bma->cur ||
   1410	       (bma->cur->bc_ino.flags & XFS_BTCUR_BMBT_WASDEL));
   1411
   1412	XFS_STATS_INC(mp, xs_add_exlist);
   1413
   1414#define	LEFT		r[0]
   1415#define	RIGHT		r[1]
   1416#define	PREV		r[2]
   1417
   1418	/*
   1419	 * Set up a bunch of variables to make the tests simpler.
   1420	 */
   1421	xfs_iext_get_extent(ifp, &bma->icur, &PREV);
   1422	new_endoff = new->br_startoff + new->br_blockcount;
   1423	ASSERT(isnullstartblock(PREV.br_startblock));
   1424	ASSERT(PREV.br_startoff <= new->br_startoff);
   1425	ASSERT(PREV.br_startoff + PREV.br_blockcount >= new_endoff);
   1426
   1427	da_old = startblockval(PREV.br_startblock);
   1428	da_new = 0;
   1429
   1430	/*
   1431	 * Set flags determining what part of the previous delayed allocation
   1432	 * extent is being replaced by a real allocation.
   1433	 */
   1434	if (PREV.br_startoff == new->br_startoff)
   1435		state |= BMAP_LEFT_FILLING;
   1436	if (PREV.br_startoff + PREV.br_blockcount == new_endoff)
   1437		state |= BMAP_RIGHT_FILLING;
   1438
   1439	/*
   1440	 * Check and set flags if this segment has a left neighbor.
   1441	 * Don't set contiguous if the combined extent would be too large.
   1442	 */
   1443	if (xfs_iext_peek_prev_extent(ifp, &bma->icur, &LEFT)) {
   1444		state |= BMAP_LEFT_VALID;
   1445		if (isnullstartblock(LEFT.br_startblock))
   1446			state |= BMAP_LEFT_DELAY;
   1447	}
   1448
   1449	if ((state & BMAP_LEFT_VALID) && !(state & BMAP_LEFT_DELAY) &&
   1450	    LEFT.br_startoff + LEFT.br_blockcount == new->br_startoff &&
   1451	    LEFT.br_startblock + LEFT.br_blockcount == new->br_startblock &&
   1452	    LEFT.br_state == new->br_state &&
   1453	    LEFT.br_blockcount + new->br_blockcount <= XFS_MAX_BMBT_EXTLEN)
   1454		state |= BMAP_LEFT_CONTIG;
   1455
   1456	/*
   1457	 * Check and set flags if this segment has a right neighbor.
   1458	 * Don't set contiguous if the combined extent would be too large.
   1459	 * Also check for all-three-contiguous being too large.
   1460	 */
   1461	if (xfs_iext_peek_next_extent(ifp, &bma->icur, &RIGHT)) {
   1462		state |= BMAP_RIGHT_VALID;
   1463		if (isnullstartblock(RIGHT.br_startblock))
   1464			state |= BMAP_RIGHT_DELAY;
   1465	}
   1466
   1467	if ((state & BMAP_RIGHT_VALID) && !(state & BMAP_RIGHT_DELAY) &&
   1468	    new_endoff == RIGHT.br_startoff &&
   1469	    new->br_startblock + new->br_blockcount == RIGHT.br_startblock &&
   1470	    new->br_state == RIGHT.br_state &&
   1471	    new->br_blockcount + RIGHT.br_blockcount <= XFS_MAX_BMBT_EXTLEN &&
   1472	    ((state & (BMAP_LEFT_CONTIG | BMAP_LEFT_FILLING |
   1473		       BMAP_RIGHT_FILLING)) !=
   1474		      (BMAP_LEFT_CONTIG | BMAP_LEFT_FILLING |
   1475		       BMAP_RIGHT_FILLING) ||
   1476	     LEFT.br_blockcount + new->br_blockcount + RIGHT.br_blockcount
   1477			<= XFS_MAX_BMBT_EXTLEN))
   1478		state |= BMAP_RIGHT_CONTIG;
   1479
   1480	error = 0;
   1481	/*
   1482	 * Switch out based on the FILLING and CONTIG state bits.
   1483	 */
   1484	switch (state & (BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG |
   1485			 BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG)) {
   1486	case BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG |
   1487	     BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG:
   1488		/*
   1489		 * Filling in all of a previously delayed allocation extent.
   1490		 * The left and right neighbors are both contiguous with new.
   1491		 */
   1492		LEFT.br_blockcount += PREV.br_blockcount + RIGHT.br_blockcount;
   1493
   1494		xfs_iext_remove(bma->ip, &bma->icur, state);
   1495		xfs_iext_remove(bma->ip, &bma->icur, state);
   1496		xfs_iext_prev(ifp, &bma->icur);
   1497		xfs_iext_update_extent(bma->ip, state, &bma->icur, &LEFT);
   1498		ifp->if_nextents--;
   1499
   1500		if (bma->cur == NULL)
   1501			rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
   1502		else {
   1503			rval = XFS_ILOG_CORE;
   1504			error = xfs_bmbt_lookup_eq(bma->cur, &RIGHT, &i);
   1505			if (error)
   1506				goto done;
   1507			if (XFS_IS_CORRUPT(mp, i != 1)) {
   1508				error = -EFSCORRUPTED;
   1509				goto done;
   1510			}
   1511			error = xfs_btree_delete(bma->cur, &i);
   1512			if (error)
   1513				goto done;
   1514			if (XFS_IS_CORRUPT(mp, i != 1)) {
   1515				error = -EFSCORRUPTED;
   1516				goto done;
   1517			}
   1518			error = xfs_btree_decrement(bma->cur, 0, &i);
   1519			if (error)
   1520				goto done;
   1521			if (XFS_IS_CORRUPT(mp, i != 1)) {
   1522				error = -EFSCORRUPTED;
   1523				goto done;
   1524			}
   1525			error = xfs_bmbt_update(bma->cur, &LEFT);
   1526			if (error)
   1527				goto done;
   1528		}
   1529		break;
   1530
   1531	case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING | BMAP_LEFT_CONTIG:
   1532		/*
   1533		 * Filling in all of a previously delayed allocation extent.
   1534		 * The left neighbor is contiguous, the right is not.
   1535		 */
   1536		old = LEFT;
   1537		LEFT.br_blockcount += PREV.br_blockcount;
   1538
   1539		xfs_iext_remove(bma->ip, &bma->icur, state);
   1540		xfs_iext_prev(ifp, &bma->icur);
   1541		xfs_iext_update_extent(bma->ip, state, &bma->icur, &LEFT);
   1542
   1543		if (bma->cur == NULL)
   1544			rval = XFS_ILOG_DEXT;
   1545		else {
   1546			rval = 0;
   1547			error = xfs_bmbt_lookup_eq(bma->cur, &old, &i);
   1548			if (error)
   1549				goto done;
   1550			if (XFS_IS_CORRUPT(mp, i != 1)) {
   1551				error = -EFSCORRUPTED;
   1552				goto done;
   1553			}
   1554			error = xfs_bmbt_update(bma->cur, &LEFT);
   1555			if (error)
   1556				goto done;
   1557		}
   1558		break;
   1559
   1560	case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG:
   1561		/*
   1562		 * Filling in all of a previously delayed allocation extent.
   1563		 * The right neighbor is contiguous, the left is not. Take care
   1564		 * with delay -> unwritten extent allocation here because the
   1565		 * delalloc record we are overwriting is always written.
   1566		 */
   1567		PREV.br_startblock = new->br_startblock;
   1568		PREV.br_blockcount += RIGHT.br_blockcount;
   1569		PREV.br_state = new->br_state;
   1570
   1571		xfs_iext_next(ifp, &bma->icur);
   1572		xfs_iext_remove(bma->ip, &bma->icur, state);
   1573		xfs_iext_prev(ifp, &bma->icur);
   1574		xfs_iext_update_extent(bma->ip, state, &bma->icur, &PREV);
   1575
   1576		if (bma->cur == NULL)
   1577			rval = XFS_ILOG_DEXT;
   1578		else {
   1579			rval = 0;
   1580			error = xfs_bmbt_lookup_eq(bma->cur, &RIGHT, &i);
   1581			if (error)
   1582				goto done;
   1583			if (XFS_IS_CORRUPT(mp, i != 1)) {
   1584				error = -EFSCORRUPTED;
   1585				goto done;
   1586			}
   1587			error = xfs_bmbt_update(bma->cur, &PREV);
   1588			if (error)
   1589				goto done;
   1590		}
   1591		break;
   1592
   1593	case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING:
   1594		/*
   1595		 * Filling in all of a previously delayed allocation extent.
   1596		 * Neither the left nor right neighbors are contiguous with
   1597		 * the new one.
   1598		 */
   1599		PREV.br_startblock = new->br_startblock;
   1600		PREV.br_state = new->br_state;
   1601		xfs_iext_update_extent(bma->ip, state, &bma->icur, &PREV);
   1602		ifp->if_nextents++;
   1603
   1604		if (bma->cur == NULL)
   1605			rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
   1606		else {
   1607			rval = XFS_ILOG_CORE;
   1608			error = xfs_bmbt_lookup_eq(bma->cur, new, &i);
   1609			if (error)
   1610				goto done;
   1611			if (XFS_IS_CORRUPT(mp, i != 0)) {
   1612				error = -EFSCORRUPTED;
   1613				goto done;
   1614			}
   1615			error = xfs_btree_insert(bma->cur, &i);
   1616			if (error)
   1617				goto done;
   1618			if (XFS_IS_CORRUPT(mp, i != 1)) {
   1619				error = -EFSCORRUPTED;
   1620				goto done;
   1621			}
   1622		}
   1623		break;
   1624
   1625	case BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG:
   1626		/*
   1627		 * Filling in the first part of a previous delayed allocation.
   1628		 * The left neighbor is contiguous.
   1629		 */
   1630		old = LEFT;
   1631		temp = PREV.br_blockcount - new->br_blockcount;
   1632		da_new = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(bma->ip, temp),
   1633				startblockval(PREV.br_startblock));
   1634
   1635		LEFT.br_blockcount += new->br_blockcount;
   1636
   1637		PREV.br_blockcount = temp;
   1638		PREV.br_startoff += new->br_blockcount;
   1639		PREV.br_startblock = nullstartblock(da_new);
   1640
   1641		xfs_iext_update_extent(bma->ip, state, &bma->icur, &PREV);
   1642		xfs_iext_prev(ifp, &bma->icur);
   1643		xfs_iext_update_extent(bma->ip, state, &bma->icur, &LEFT);
   1644
   1645		if (bma->cur == NULL)
   1646			rval = XFS_ILOG_DEXT;
   1647		else {
   1648			rval = 0;
   1649			error = xfs_bmbt_lookup_eq(bma->cur, &old, &i);
   1650			if (error)
   1651				goto done;
   1652			if (XFS_IS_CORRUPT(mp, i != 1)) {
   1653				error = -EFSCORRUPTED;
   1654				goto done;
   1655			}
   1656			error = xfs_bmbt_update(bma->cur, &LEFT);
   1657			if (error)
   1658				goto done;
   1659		}
   1660		break;
   1661
   1662	case BMAP_LEFT_FILLING:
   1663		/*
   1664		 * Filling in the first part of a previous delayed allocation.
   1665		 * The left neighbor is not contiguous.
   1666		 */
   1667		xfs_iext_update_extent(bma->ip, state, &bma->icur, new);
   1668		ifp->if_nextents++;
   1669
   1670		if (bma->cur == NULL)
   1671			rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
   1672		else {
   1673			rval = XFS_ILOG_CORE;
   1674			error = xfs_bmbt_lookup_eq(bma->cur, new, &i);
   1675			if (error)
   1676				goto done;
   1677			if (XFS_IS_CORRUPT(mp, i != 0)) {
   1678				error = -EFSCORRUPTED;
   1679				goto done;
   1680			}
   1681			error = xfs_btree_insert(bma->cur, &i);
   1682			if (error)
   1683				goto done;
   1684			if (XFS_IS_CORRUPT(mp, i != 1)) {
   1685				error = -EFSCORRUPTED;
   1686				goto done;
   1687			}
   1688		}
   1689
   1690		if (xfs_bmap_needs_btree(bma->ip, whichfork)) {
   1691			error = xfs_bmap_extents_to_btree(bma->tp, bma->ip,
   1692					&bma->cur, 1, &tmp_rval, whichfork);
   1693			rval |= tmp_rval;
   1694			if (error)
   1695				goto done;
   1696		}
   1697
   1698		temp = PREV.br_blockcount - new->br_blockcount;
   1699		da_new = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(bma->ip, temp),
   1700			startblockval(PREV.br_startblock) -
   1701			(bma->cur ? bma->cur->bc_ino.allocated : 0));
   1702
   1703		PREV.br_startoff = new_endoff;
   1704		PREV.br_blockcount = temp;
   1705		PREV.br_startblock = nullstartblock(da_new);
   1706		xfs_iext_next(ifp, &bma->icur);
   1707		xfs_iext_insert(bma->ip, &bma->icur, &PREV, state);
   1708		xfs_iext_prev(ifp, &bma->icur);
   1709		break;
   1710
   1711	case BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG:
   1712		/*
   1713		 * Filling in the last part of a previous delayed allocation.
   1714		 * The right neighbor is contiguous with the new allocation.
   1715		 */
   1716		old = RIGHT;
   1717		RIGHT.br_startoff = new->br_startoff;
   1718		RIGHT.br_startblock = new->br_startblock;
   1719		RIGHT.br_blockcount += new->br_blockcount;
   1720
   1721		if (bma->cur == NULL)
   1722			rval = XFS_ILOG_DEXT;
   1723		else {
   1724			rval = 0;
   1725			error = xfs_bmbt_lookup_eq(bma->cur, &old, &i);
   1726			if (error)
   1727				goto done;
   1728			if (XFS_IS_CORRUPT(mp, i != 1)) {
   1729				error = -EFSCORRUPTED;
   1730				goto done;
   1731			}
   1732			error = xfs_bmbt_update(bma->cur, &RIGHT);
   1733			if (error)
   1734				goto done;
   1735		}
   1736
   1737		temp = PREV.br_blockcount - new->br_blockcount;
   1738		da_new = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(bma->ip, temp),
   1739			startblockval(PREV.br_startblock));
   1740
   1741		PREV.br_blockcount = temp;
   1742		PREV.br_startblock = nullstartblock(da_new);
   1743
   1744		xfs_iext_update_extent(bma->ip, state, &bma->icur, &PREV);
   1745		xfs_iext_next(ifp, &bma->icur);
   1746		xfs_iext_update_extent(bma->ip, state, &bma->icur, &RIGHT);
   1747		break;
   1748
   1749	case BMAP_RIGHT_FILLING:
   1750		/*
   1751		 * Filling in the last part of a previous delayed allocation.
   1752		 * The right neighbor is not contiguous.
   1753		 */
   1754		xfs_iext_update_extent(bma->ip, state, &bma->icur, new);
   1755		ifp->if_nextents++;
   1756
   1757		if (bma->cur == NULL)
   1758			rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
   1759		else {
   1760			rval = XFS_ILOG_CORE;
   1761			error = xfs_bmbt_lookup_eq(bma->cur, new, &i);
   1762			if (error)
   1763				goto done;
   1764			if (XFS_IS_CORRUPT(mp, i != 0)) {
   1765				error = -EFSCORRUPTED;
   1766				goto done;
   1767			}
   1768			error = xfs_btree_insert(bma->cur, &i);
   1769			if (error)
   1770				goto done;
   1771			if (XFS_IS_CORRUPT(mp, i != 1)) {
   1772				error = -EFSCORRUPTED;
   1773				goto done;
   1774			}
   1775		}
   1776
   1777		if (xfs_bmap_needs_btree(bma->ip, whichfork)) {
   1778			error = xfs_bmap_extents_to_btree(bma->tp, bma->ip,
   1779				&bma->cur, 1, &tmp_rval, whichfork);
   1780			rval |= tmp_rval;
   1781			if (error)
   1782				goto done;
   1783		}
   1784
   1785		temp = PREV.br_blockcount - new->br_blockcount;
   1786		da_new = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(bma->ip, temp),
   1787			startblockval(PREV.br_startblock) -
   1788			(bma->cur ? bma->cur->bc_ino.allocated : 0));
   1789
   1790		PREV.br_startblock = nullstartblock(da_new);
   1791		PREV.br_blockcount = temp;
   1792		xfs_iext_insert(bma->ip, &bma->icur, &PREV, state);
   1793		xfs_iext_next(ifp, &bma->icur);
   1794		break;
   1795
   1796	case 0:
   1797		/*
   1798		 * Filling in the middle part of a previous delayed allocation.
   1799		 * Contiguity is impossible here.
   1800		 * This case is avoided almost all the time.
   1801		 *
   1802		 * We start with a delayed allocation:
   1803		 *
   1804		 * +ddddddddddddddddddddddddddddddddddddddddddddddddddddddd+
   1805		 *  PREV @ idx
   1806		 *
   1807	         * and we are allocating:
   1808		 *                     +rrrrrrrrrrrrrrrrr+
   1809		 *			      new
   1810		 *
   1811		 * and we set it up for insertion as:
   1812		 * +ddddddddddddddddddd+rrrrrrrrrrrrrrrrr+ddddddddddddddddd+
   1813		 *                            new
   1814		 *  PREV @ idx          LEFT              RIGHT
   1815		 *                      inserted at idx + 1
   1816		 */
   1817		old = PREV;
   1818
   1819		/* LEFT is the new middle */
   1820		LEFT = *new;
   1821
   1822		/* RIGHT is the new right */
   1823		RIGHT.br_state = PREV.br_state;
   1824		RIGHT.br_startoff = new_endoff;
   1825		RIGHT.br_blockcount =
   1826			PREV.br_startoff + PREV.br_blockcount - new_endoff;
   1827		RIGHT.br_startblock =
   1828			nullstartblock(xfs_bmap_worst_indlen(bma->ip,
   1829					RIGHT.br_blockcount));
   1830
   1831		/* truncate PREV */
   1832		PREV.br_blockcount = new->br_startoff - PREV.br_startoff;
   1833		PREV.br_startblock =
   1834			nullstartblock(xfs_bmap_worst_indlen(bma->ip,
   1835					PREV.br_blockcount));
   1836		xfs_iext_update_extent(bma->ip, state, &bma->icur, &PREV);
   1837
   1838		xfs_iext_next(ifp, &bma->icur);
   1839		xfs_iext_insert(bma->ip, &bma->icur, &RIGHT, state);
   1840		xfs_iext_insert(bma->ip, &bma->icur, &LEFT, state);
   1841		ifp->if_nextents++;
   1842
   1843		if (bma->cur == NULL)
   1844			rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
   1845		else {
   1846			rval = XFS_ILOG_CORE;
   1847			error = xfs_bmbt_lookup_eq(bma->cur, new, &i);
   1848			if (error)
   1849				goto done;
   1850			if (XFS_IS_CORRUPT(mp, i != 0)) {
   1851				error = -EFSCORRUPTED;
   1852				goto done;
   1853			}
   1854			error = xfs_btree_insert(bma->cur, &i);
   1855			if (error)
   1856				goto done;
   1857			if (XFS_IS_CORRUPT(mp, i != 1)) {
   1858				error = -EFSCORRUPTED;
   1859				goto done;
   1860			}
   1861		}
   1862
   1863		if (xfs_bmap_needs_btree(bma->ip, whichfork)) {
   1864			error = xfs_bmap_extents_to_btree(bma->tp, bma->ip,
   1865					&bma->cur, 1, &tmp_rval, whichfork);
   1866			rval |= tmp_rval;
   1867			if (error)
   1868				goto done;
   1869		}
   1870
   1871		da_new = startblockval(PREV.br_startblock) +
   1872			 startblockval(RIGHT.br_startblock);
   1873		break;
   1874
   1875	case BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG:
   1876	case BMAP_RIGHT_FILLING | BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG:
   1877	case BMAP_LEFT_FILLING | BMAP_RIGHT_CONTIG:
   1878	case BMAP_RIGHT_FILLING | BMAP_LEFT_CONTIG:
   1879	case BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG:
   1880	case BMAP_LEFT_CONTIG:
   1881	case BMAP_RIGHT_CONTIG:
   1882		/*
   1883		 * These cases are all impossible.
   1884		 */
   1885		ASSERT(0);
   1886	}
   1887
   1888	/* add reverse mapping unless caller opted out */
   1889	if (!(bma->flags & XFS_BMAPI_NORMAP))
   1890		xfs_rmap_map_extent(bma->tp, bma->ip, whichfork, new);
   1891
   1892	/* convert to a btree if necessary */
   1893	if (xfs_bmap_needs_btree(bma->ip, whichfork)) {
   1894		int	tmp_logflags;	/* partial log flag return val */
   1895
   1896		ASSERT(bma->cur == NULL);
   1897		error = xfs_bmap_extents_to_btree(bma->tp, bma->ip,
   1898				&bma->cur, da_old > 0, &tmp_logflags,
   1899				whichfork);
   1900		bma->logflags |= tmp_logflags;
   1901		if (error)
   1902			goto done;
   1903	}
   1904
   1905	if (da_new != da_old)
   1906		xfs_mod_delalloc(mp, (int64_t)da_new - da_old);
   1907
   1908	if (bma->cur) {
   1909		da_new += bma->cur->bc_ino.allocated;
   1910		bma->cur->bc_ino.allocated = 0;
   1911	}
   1912
   1913	/* adjust for changes in reserved delayed indirect blocks */
   1914	if (da_new != da_old) {
   1915		ASSERT(state == 0 || da_new < da_old);
   1916		error = xfs_mod_fdblocks(mp, (int64_t)(da_old - da_new),
   1917				false);
   1918	}
   1919
   1920	xfs_bmap_check_leaf_extents(bma->cur, bma->ip, whichfork);
   1921done:
   1922	if (whichfork != XFS_COW_FORK)
   1923		bma->logflags |= rval;
   1924	return error;
   1925#undef	LEFT
   1926#undef	RIGHT
   1927#undef	PREV
   1928}
   1929
   1930/*
   1931 * Convert an unwritten allocation to a real allocation or vice versa.
   1932 */
   1933int					/* error */
   1934xfs_bmap_add_extent_unwritten_real(
   1935	struct xfs_trans	*tp,
   1936	xfs_inode_t		*ip,	/* incore inode pointer */
   1937	int			whichfork,
   1938	struct xfs_iext_cursor	*icur,
   1939	struct xfs_btree_cur	**curp,	/* if *curp is null, not a btree */
   1940	xfs_bmbt_irec_t		*new,	/* new data to add to file extents */
   1941	int			*logflagsp) /* inode logging flags */
   1942{
   1943	struct xfs_btree_cur	*cur;	/* btree cursor */
   1944	int			error;	/* error return value */
   1945	int			i;	/* temp state */
   1946	struct xfs_ifork	*ifp;	/* inode fork pointer */
   1947	xfs_fileoff_t		new_endoff;	/* end offset of new entry */
   1948	xfs_bmbt_irec_t		r[3];	/* neighbor extent entries */
   1949					/* left is 0, right is 1, prev is 2 */
   1950	int			rval=0;	/* return value (logging flags) */
   1951	uint32_t		state = xfs_bmap_fork_to_state(whichfork);
   1952	struct xfs_mount	*mp = ip->i_mount;
   1953	struct xfs_bmbt_irec	old;
   1954
   1955	*logflagsp = 0;
   1956
   1957	cur = *curp;
   1958	ifp = XFS_IFORK_PTR(ip, whichfork);
   1959
   1960	ASSERT(!isnullstartblock(new->br_startblock));
   1961
   1962	XFS_STATS_INC(mp, xs_add_exlist);
   1963
   1964#define	LEFT		r[0]
   1965#define	RIGHT		r[1]
   1966#define	PREV		r[2]
   1967
   1968	/*
   1969	 * Set up a bunch of variables to make the tests simpler.
   1970	 */
   1971	error = 0;
   1972	xfs_iext_get_extent(ifp, icur, &PREV);
   1973	ASSERT(new->br_state != PREV.br_state);
   1974	new_endoff = new->br_startoff + new->br_blockcount;
   1975	ASSERT(PREV.br_startoff <= new->br_startoff);
   1976	ASSERT(PREV.br_startoff + PREV.br_blockcount >= new_endoff);
   1977
   1978	/*
   1979	 * Set flags determining what part of the previous oldext allocation
   1980	 * extent is being replaced by a newext allocation.
   1981	 */
   1982	if (PREV.br_startoff == new->br_startoff)
   1983		state |= BMAP_LEFT_FILLING;
   1984	if (PREV.br_startoff + PREV.br_blockcount == new_endoff)
   1985		state |= BMAP_RIGHT_FILLING;
   1986
   1987	/*
   1988	 * Check and set flags if this segment has a left neighbor.
   1989	 * Don't set contiguous if the combined extent would be too large.
   1990	 */
   1991	if (xfs_iext_peek_prev_extent(ifp, icur, &LEFT)) {
   1992		state |= BMAP_LEFT_VALID;
   1993		if (isnullstartblock(LEFT.br_startblock))
   1994			state |= BMAP_LEFT_DELAY;
   1995	}
   1996
   1997	if ((state & BMAP_LEFT_VALID) && !(state & BMAP_LEFT_DELAY) &&
   1998	    LEFT.br_startoff + LEFT.br_blockcount == new->br_startoff &&
   1999	    LEFT.br_startblock + LEFT.br_blockcount == new->br_startblock &&
   2000	    LEFT.br_state == new->br_state &&
   2001	    LEFT.br_blockcount + new->br_blockcount <= XFS_MAX_BMBT_EXTLEN)
   2002		state |= BMAP_LEFT_CONTIG;
   2003
   2004	/*
   2005	 * Check and set flags if this segment has a right neighbor.
   2006	 * Don't set contiguous if the combined extent would be too large.
   2007	 * Also check for all-three-contiguous being too large.
   2008	 */
   2009	if (xfs_iext_peek_next_extent(ifp, icur, &RIGHT)) {
   2010		state |= BMAP_RIGHT_VALID;
   2011		if (isnullstartblock(RIGHT.br_startblock))
   2012			state |= BMAP_RIGHT_DELAY;
   2013	}
   2014
   2015	if ((state & BMAP_RIGHT_VALID) && !(state & BMAP_RIGHT_DELAY) &&
   2016	    new_endoff == RIGHT.br_startoff &&
   2017	    new->br_startblock + new->br_blockcount == RIGHT.br_startblock &&
   2018	    new->br_state == RIGHT.br_state &&
   2019	    new->br_blockcount + RIGHT.br_blockcount <= XFS_MAX_BMBT_EXTLEN &&
   2020	    ((state & (BMAP_LEFT_CONTIG | BMAP_LEFT_FILLING |
   2021		       BMAP_RIGHT_FILLING)) !=
   2022		      (BMAP_LEFT_CONTIG | BMAP_LEFT_FILLING |
   2023		       BMAP_RIGHT_FILLING) ||
   2024	     LEFT.br_blockcount + new->br_blockcount + RIGHT.br_blockcount
   2025			<= XFS_MAX_BMBT_EXTLEN))
   2026		state |= BMAP_RIGHT_CONTIG;
   2027
   2028	/*
   2029	 * Switch out based on the FILLING and CONTIG state bits.
   2030	 */
   2031	switch (state & (BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG |
   2032			 BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG)) {
   2033	case BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG |
   2034	     BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG:
   2035		/*
   2036		 * Setting all of a previous oldext extent to newext.
   2037		 * The left and right neighbors are both contiguous with new.
   2038		 */
   2039		LEFT.br_blockcount += PREV.br_blockcount + RIGHT.br_blockcount;
   2040
   2041		xfs_iext_remove(ip, icur, state);
   2042		xfs_iext_remove(ip, icur, state);
   2043		xfs_iext_prev(ifp, icur);
   2044		xfs_iext_update_extent(ip, state, icur, &LEFT);
   2045		ifp->if_nextents -= 2;
   2046		if (cur == NULL)
   2047			rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
   2048		else {
   2049			rval = XFS_ILOG_CORE;
   2050			error = xfs_bmbt_lookup_eq(cur, &RIGHT, &i);
   2051			if (error)
   2052				goto done;
   2053			if (XFS_IS_CORRUPT(mp, i != 1)) {
   2054				error = -EFSCORRUPTED;
   2055				goto done;
   2056			}
   2057			if ((error = xfs_btree_delete(cur, &i)))
   2058				goto done;
   2059			if (XFS_IS_CORRUPT(mp, i != 1)) {
   2060				error = -EFSCORRUPTED;
   2061				goto done;
   2062			}
   2063			if ((error = xfs_btree_decrement(cur, 0, &i)))
   2064				goto done;
   2065			if (XFS_IS_CORRUPT(mp, i != 1)) {
   2066				error = -EFSCORRUPTED;
   2067				goto done;
   2068			}
   2069			if ((error = xfs_btree_delete(cur, &i)))
   2070				goto done;
   2071			if (XFS_IS_CORRUPT(mp, i != 1)) {
   2072				error = -EFSCORRUPTED;
   2073				goto done;
   2074			}
   2075			if ((error = xfs_btree_decrement(cur, 0, &i)))
   2076				goto done;
   2077			if (XFS_IS_CORRUPT(mp, i != 1)) {
   2078				error = -EFSCORRUPTED;
   2079				goto done;
   2080			}
   2081			error = xfs_bmbt_update(cur, &LEFT);
   2082			if (error)
   2083				goto done;
   2084		}
   2085		break;
   2086
   2087	case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING | BMAP_LEFT_CONTIG:
   2088		/*
   2089		 * Setting all of a previous oldext extent to newext.
   2090		 * The left neighbor is contiguous, the right is not.
   2091		 */
   2092		LEFT.br_blockcount += PREV.br_blockcount;
   2093
   2094		xfs_iext_remove(ip, icur, state);
   2095		xfs_iext_prev(ifp, icur);
   2096		xfs_iext_update_extent(ip, state, icur, &LEFT);
   2097		ifp->if_nextents--;
   2098		if (cur == NULL)
   2099			rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
   2100		else {
   2101			rval = XFS_ILOG_CORE;
   2102			error = xfs_bmbt_lookup_eq(cur, &PREV, &i);
   2103			if (error)
   2104				goto done;
   2105			if (XFS_IS_CORRUPT(mp, i != 1)) {
   2106				error = -EFSCORRUPTED;
   2107				goto done;
   2108			}
   2109			if ((error = xfs_btree_delete(cur, &i)))
   2110				goto done;
   2111			if (XFS_IS_CORRUPT(mp, i != 1)) {
   2112				error = -EFSCORRUPTED;
   2113				goto done;
   2114			}
   2115			if ((error = xfs_btree_decrement(cur, 0, &i)))
   2116				goto done;
   2117			if (XFS_IS_CORRUPT(mp, i != 1)) {
   2118				error = -EFSCORRUPTED;
   2119				goto done;
   2120			}
   2121			error = xfs_bmbt_update(cur, &LEFT);
   2122			if (error)
   2123				goto done;
   2124		}
   2125		break;
   2126
   2127	case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG:
   2128		/*
   2129		 * Setting all of a previous oldext extent to newext.
   2130		 * The right neighbor is contiguous, the left is not.
   2131		 */
   2132		PREV.br_blockcount += RIGHT.br_blockcount;
   2133		PREV.br_state = new->br_state;
   2134
   2135		xfs_iext_next(ifp, icur);
   2136		xfs_iext_remove(ip, icur, state);
   2137		xfs_iext_prev(ifp, icur);
   2138		xfs_iext_update_extent(ip, state, icur, &PREV);
   2139		ifp->if_nextents--;
   2140
   2141		if (cur == NULL)
   2142			rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
   2143		else {
   2144			rval = XFS_ILOG_CORE;
   2145			error = xfs_bmbt_lookup_eq(cur, &RIGHT, &i);
   2146			if (error)
   2147				goto done;
   2148			if (XFS_IS_CORRUPT(mp, i != 1)) {
   2149				error = -EFSCORRUPTED;
   2150				goto done;
   2151			}
   2152			if ((error = xfs_btree_delete(cur, &i)))
   2153				goto done;
   2154			if (XFS_IS_CORRUPT(mp, i != 1)) {
   2155				error = -EFSCORRUPTED;
   2156				goto done;
   2157			}
   2158			if ((error = xfs_btree_decrement(cur, 0, &i)))
   2159				goto done;
   2160			if (XFS_IS_CORRUPT(mp, i != 1)) {
   2161				error = -EFSCORRUPTED;
   2162				goto done;
   2163			}
   2164			error = xfs_bmbt_update(cur, &PREV);
   2165			if (error)
   2166				goto done;
   2167		}
   2168		break;
   2169
   2170	case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING:
   2171		/*
   2172		 * Setting all of a previous oldext extent to newext.
   2173		 * Neither the left nor right neighbors are contiguous with
   2174		 * the new one.
   2175		 */
   2176		PREV.br_state = new->br_state;
   2177		xfs_iext_update_extent(ip, state, icur, &PREV);
   2178
   2179		if (cur == NULL)
   2180			rval = XFS_ILOG_DEXT;
   2181		else {
   2182			rval = 0;
   2183			error = xfs_bmbt_lookup_eq(cur, new, &i);
   2184			if (error)
   2185				goto done;
   2186			if (XFS_IS_CORRUPT(mp, i != 1)) {
   2187				error = -EFSCORRUPTED;
   2188				goto done;
   2189			}
   2190			error = xfs_bmbt_update(cur, &PREV);
   2191			if (error)
   2192				goto done;
   2193		}
   2194		break;
   2195
   2196	case BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG:
   2197		/*
   2198		 * Setting the first part of a previous oldext extent to newext.
   2199		 * The left neighbor is contiguous.
   2200		 */
   2201		LEFT.br_blockcount += new->br_blockcount;
   2202
   2203		old = PREV;
   2204		PREV.br_startoff += new->br_blockcount;
   2205		PREV.br_startblock += new->br_blockcount;
   2206		PREV.br_blockcount -= new->br_blockcount;
   2207
   2208		xfs_iext_update_extent(ip, state, icur, &PREV);
   2209		xfs_iext_prev(ifp, icur);
   2210		xfs_iext_update_extent(ip, state, icur, &LEFT);
   2211
   2212		if (cur == NULL)
   2213			rval = XFS_ILOG_DEXT;
   2214		else {
   2215			rval = 0;
   2216			error = xfs_bmbt_lookup_eq(cur, &old, &i);
   2217			if (error)
   2218				goto done;
   2219			if (XFS_IS_CORRUPT(mp, i != 1)) {
   2220				error = -EFSCORRUPTED;
   2221				goto done;
   2222			}
   2223			error = xfs_bmbt_update(cur, &PREV);
   2224			if (error)
   2225				goto done;
   2226			error = xfs_btree_decrement(cur, 0, &i);
   2227			if (error)
   2228				goto done;
   2229			error = xfs_bmbt_update(cur, &LEFT);
   2230			if (error)
   2231				goto done;
   2232		}
   2233		break;
   2234
   2235	case BMAP_LEFT_FILLING:
   2236		/*
   2237		 * Setting the first part of a previous oldext extent to newext.
   2238		 * The left neighbor is not contiguous.
   2239		 */
   2240		old = PREV;
   2241		PREV.br_startoff += new->br_blockcount;
   2242		PREV.br_startblock += new->br_blockcount;
   2243		PREV.br_blockcount -= new->br_blockcount;
   2244
   2245		xfs_iext_update_extent(ip, state, icur, &PREV);
   2246		xfs_iext_insert(ip, icur, new, state);
   2247		ifp->if_nextents++;
   2248
   2249		if (cur == NULL)
   2250			rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
   2251		else {
   2252			rval = XFS_ILOG_CORE;
   2253			error = xfs_bmbt_lookup_eq(cur, &old, &i);
   2254			if (error)
   2255				goto done;
   2256			if (XFS_IS_CORRUPT(mp, i != 1)) {
   2257				error = -EFSCORRUPTED;
   2258				goto done;
   2259			}
   2260			error = xfs_bmbt_update(cur, &PREV);
   2261			if (error)
   2262				goto done;
   2263			cur->bc_rec.b = *new;
   2264			if ((error = xfs_btree_insert(cur, &i)))
   2265				goto done;
   2266			if (XFS_IS_CORRUPT(mp, i != 1)) {
   2267				error = -EFSCORRUPTED;
   2268				goto done;
   2269			}
   2270		}
   2271		break;
   2272
   2273	case BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG:
   2274		/*
   2275		 * Setting the last part of a previous oldext extent to newext.
   2276		 * The right neighbor is contiguous with the new allocation.
   2277		 */
   2278		old = PREV;
   2279		PREV.br_blockcount -= new->br_blockcount;
   2280
   2281		RIGHT.br_startoff = new->br_startoff;
   2282		RIGHT.br_startblock = new->br_startblock;
   2283		RIGHT.br_blockcount += new->br_blockcount;
   2284
   2285		xfs_iext_update_extent(ip, state, icur, &PREV);
   2286		xfs_iext_next(ifp, icur);
   2287		xfs_iext_update_extent(ip, state, icur, &RIGHT);
   2288
   2289		if (cur == NULL)
   2290			rval = XFS_ILOG_DEXT;
   2291		else {
   2292			rval = 0;
   2293			error = xfs_bmbt_lookup_eq(cur, &old, &i);
   2294			if (error)
   2295				goto done;
   2296			if (XFS_IS_CORRUPT(mp, i != 1)) {
   2297				error = -EFSCORRUPTED;
   2298				goto done;
   2299			}
   2300			error = xfs_bmbt_update(cur, &PREV);
   2301			if (error)
   2302				goto done;
   2303			error = xfs_btree_increment(cur, 0, &i);
   2304			if (error)
   2305				goto done;
   2306			error = xfs_bmbt_update(cur, &RIGHT);
   2307			if (error)
   2308				goto done;
   2309		}
   2310		break;
   2311
   2312	case BMAP_RIGHT_FILLING:
   2313		/*
   2314		 * Setting the last part of a previous oldext extent to newext.
   2315		 * The right neighbor is not contiguous.
   2316		 */
   2317		old = PREV;
   2318		PREV.br_blockcount -= new->br_blockcount;
   2319
   2320		xfs_iext_update_extent(ip, state, icur, &PREV);
   2321		xfs_iext_next(ifp, icur);
   2322		xfs_iext_insert(ip, icur, new, state);
   2323		ifp->if_nextents++;
   2324
   2325		if (cur == NULL)
   2326			rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
   2327		else {
   2328			rval = XFS_ILOG_CORE;
   2329			error = xfs_bmbt_lookup_eq(cur, &old, &i);
   2330			if (error)
   2331				goto done;
   2332			if (XFS_IS_CORRUPT(mp, i != 1)) {
   2333				error = -EFSCORRUPTED;
   2334				goto done;
   2335			}
   2336			error = xfs_bmbt_update(cur, &PREV);
   2337			if (error)
   2338				goto done;
   2339			error = xfs_bmbt_lookup_eq(cur, new, &i);
   2340			if (error)
   2341				goto done;
   2342			if (XFS_IS_CORRUPT(mp, i != 0)) {
   2343				error = -EFSCORRUPTED;
   2344				goto done;
   2345			}
   2346			if ((error = xfs_btree_insert(cur, &i)))
   2347				goto done;
   2348			if (XFS_IS_CORRUPT(mp, i != 1)) {
   2349				error = -EFSCORRUPTED;
   2350				goto done;
   2351			}
   2352		}
   2353		break;
   2354
   2355	case 0:
   2356		/*
   2357		 * Setting the middle part of a previous oldext extent to
   2358		 * newext.  Contiguity is impossible here.
   2359		 * One extent becomes three extents.
   2360		 */
   2361		old = PREV;
   2362		PREV.br_blockcount = new->br_startoff - PREV.br_startoff;
   2363
   2364		r[0] = *new;
   2365		r[1].br_startoff = new_endoff;
   2366		r[1].br_blockcount =
   2367			old.br_startoff + old.br_blockcount - new_endoff;
   2368		r[1].br_startblock = new->br_startblock + new->br_blockcount;
   2369		r[1].br_state = PREV.br_state;
   2370
   2371		xfs_iext_update_extent(ip, state, icur, &PREV);
   2372		xfs_iext_next(ifp, icur);
   2373		xfs_iext_insert(ip, icur, &r[1], state);
   2374		xfs_iext_insert(ip, icur, &r[0], state);
   2375		ifp->if_nextents += 2;
   2376
   2377		if (cur == NULL)
   2378			rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
   2379		else {
   2380			rval = XFS_ILOG_CORE;
   2381			error = xfs_bmbt_lookup_eq(cur, &old, &i);
   2382			if (error)
   2383				goto done;
   2384			if (XFS_IS_CORRUPT(mp, i != 1)) {
   2385				error = -EFSCORRUPTED;
   2386				goto done;
   2387			}
   2388			/* new right extent - oldext */
   2389			error = xfs_bmbt_update(cur, &r[1]);
   2390			if (error)
   2391				goto done;
   2392			/* new left extent - oldext */
   2393			cur->bc_rec.b = PREV;
   2394			if ((error = xfs_btree_insert(cur, &i)))
   2395				goto done;
   2396			if (XFS_IS_CORRUPT(mp, i != 1)) {
   2397				error = -EFSCORRUPTED;
   2398				goto done;
   2399			}
   2400			/*
   2401			 * Reset the cursor to the position of the new extent
   2402			 * we are about to insert as we can't trust it after
   2403			 * the previous insert.
   2404			 */
   2405			error = xfs_bmbt_lookup_eq(cur, new, &i);
   2406			if (error)
   2407				goto done;
   2408			if (XFS_IS_CORRUPT(mp, i != 0)) {
   2409				error = -EFSCORRUPTED;
   2410				goto done;
   2411			}
   2412			/* new middle extent - newext */
   2413			if ((error = xfs_btree_insert(cur, &i)))
   2414				goto done;
   2415			if (XFS_IS_CORRUPT(mp, i != 1)) {
   2416				error = -EFSCORRUPTED;
   2417				goto done;
   2418			}
   2419		}
   2420		break;
   2421
   2422	case BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG:
   2423	case BMAP_RIGHT_FILLING | BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG:
   2424	case BMAP_LEFT_FILLING | BMAP_RIGHT_CONTIG:
   2425	case BMAP_RIGHT_FILLING | BMAP_LEFT_CONTIG:
   2426	case BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG:
   2427	case BMAP_LEFT_CONTIG:
   2428	case BMAP_RIGHT_CONTIG:
   2429		/*
   2430		 * These cases are all impossible.
   2431		 */
   2432		ASSERT(0);
   2433	}
   2434
   2435	/* update reverse mappings */
   2436	xfs_rmap_convert_extent(mp, tp, ip, whichfork, new);
   2437
   2438	/* convert to a btree if necessary */
   2439	if (xfs_bmap_needs_btree(ip, whichfork)) {
   2440		int	tmp_logflags;	/* partial log flag return val */
   2441
   2442		ASSERT(cur == NULL);
   2443		error = xfs_bmap_extents_to_btree(tp, ip, &cur, 0,
   2444				&tmp_logflags, whichfork);
   2445		*logflagsp |= tmp_logflags;
   2446		if (error)
   2447			goto done;
   2448	}
   2449
   2450	/* clear out the allocated field, done with it now in any case. */
   2451	if (cur) {
   2452		cur->bc_ino.allocated = 0;
   2453		*curp = cur;
   2454	}
   2455
   2456	xfs_bmap_check_leaf_extents(*curp, ip, whichfork);
   2457done:
   2458	*logflagsp |= rval;
   2459	return error;
   2460#undef	LEFT
   2461#undef	RIGHT
   2462#undef	PREV
   2463}
   2464
   2465/*
   2466 * Convert a hole to a delayed allocation.
   2467 */
   2468STATIC void
   2469xfs_bmap_add_extent_hole_delay(
   2470	xfs_inode_t		*ip,	/* incore inode pointer */
   2471	int			whichfork,
   2472	struct xfs_iext_cursor	*icur,
   2473	xfs_bmbt_irec_t		*new)	/* new data to add to file extents */
   2474{
   2475	struct xfs_ifork	*ifp;	/* inode fork pointer */
   2476	xfs_bmbt_irec_t		left;	/* left neighbor extent entry */
   2477	xfs_filblks_t		newlen=0;	/* new indirect size */
   2478	xfs_filblks_t		oldlen=0;	/* old indirect size */
   2479	xfs_bmbt_irec_t		right;	/* right neighbor extent entry */
   2480	uint32_t		state = xfs_bmap_fork_to_state(whichfork);
   2481	xfs_filblks_t		temp;	 /* temp for indirect calculations */
   2482
   2483	ifp = XFS_IFORK_PTR(ip, whichfork);
   2484	ASSERT(isnullstartblock(new->br_startblock));
   2485
   2486	/*
   2487	 * Check and set flags if this segment has a left neighbor
   2488	 */
   2489	if (xfs_iext_peek_prev_extent(ifp, icur, &left)) {
   2490		state |= BMAP_LEFT_VALID;
   2491		if (isnullstartblock(left.br_startblock))
   2492			state |= BMAP_LEFT_DELAY;
   2493	}
   2494
   2495	/*
   2496	 * Check and set flags if the current (right) segment exists.
   2497	 * If it doesn't exist, we're converting the hole at end-of-file.
   2498	 */
   2499	if (xfs_iext_get_extent(ifp, icur, &right)) {
   2500		state |= BMAP_RIGHT_VALID;
   2501		if (isnullstartblock(right.br_startblock))
   2502			state |= BMAP_RIGHT_DELAY;
   2503	}
   2504
   2505	/*
   2506	 * Set contiguity flags on the left and right neighbors.
   2507	 * Don't let extents get too large, even if the pieces are contiguous.
   2508	 */
   2509	if ((state & BMAP_LEFT_VALID) && (state & BMAP_LEFT_DELAY) &&
   2510	    left.br_startoff + left.br_blockcount == new->br_startoff &&
   2511	    left.br_blockcount + new->br_blockcount <= XFS_MAX_BMBT_EXTLEN)
   2512		state |= BMAP_LEFT_CONTIG;
   2513
   2514	if ((state & BMAP_RIGHT_VALID) && (state & BMAP_RIGHT_DELAY) &&
   2515	    new->br_startoff + new->br_blockcount == right.br_startoff &&
   2516	    new->br_blockcount + right.br_blockcount <= XFS_MAX_BMBT_EXTLEN &&
   2517	    (!(state & BMAP_LEFT_CONTIG) ||
   2518	     (left.br_blockcount + new->br_blockcount +
   2519	      right.br_blockcount <= XFS_MAX_BMBT_EXTLEN)))
   2520		state |= BMAP_RIGHT_CONTIG;
   2521
   2522	/*
   2523	 * Switch out based on the contiguity flags.
   2524	 */
   2525	switch (state & (BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG)) {
   2526	case BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG:
   2527		/*
   2528		 * New allocation is contiguous with delayed allocations
   2529		 * on the left and on the right.
   2530		 * Merge all three into a single extent record.
   2531		 */
   2532		temp = left.br_blockcount + new->br_blockcount +
   2533			right.br_blockcount;
   2534
   2535		oldlen = startblockval(left.br_startblock) +
   2536			startblockval(new->br_startblock) +
   2537			startblockval(right.br_startblock);
   2538		newlen = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp),
   2539					 oldlen);
   2540		left.br_startblock = nullstartblock(newlen);
   2541		left.br_blockcount = temp;
   2542
   2543		xfs_iext_remove(ip, icur, state);
   2544		xfs_iext_prev(ifp, icur);
   2545		xfs_iext_update_extent(ip, state, icur, &left);
   2546		break;
   2547
   2548	case BMAP_LEFT_CONTIG:
   2549		/*
   2550		 * New allocation is contiguous with a delayed allocation
   2551		 * on the left.
   2552		 * Merge the new allocation with the left neighbor.
   2553		 */
   2554		temp = left.br_blockcount + new->br_blockcount;
   2555
   2556		oldlen = startblockval(left.br_startblock) +
   2557			startblockval(new->br_startblock);
   2558		newlen = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp),
   2559					 oldlen);
   2560		left.br_blockcount = temp;
   2561		left.br_startblock = nullstartblock(newlen);
   2562
   2563		xfs_iext_prev(ifp, icur);
   2564		xfs_iext_update_extent(ip, state, icur, &left);
   2565		break;
   2566
   2567	case BMAP_RIGHT_CONTIG:
   2568		/*
   2569		 * New allocation is contiguous with a delayed allocation
   2570		 * on the right.
   2571		 * Merge the new allocation with the right neighbor.
   2572		 */
   2573		temp = new->br_blockcount + right.br_blockcount;
   2574		oldlen = startblockval(new->br_startblock) +
   2575			startblockval(right.br_startblock);
   2576		newlen = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp),
   2577					 oldlen);
   2578		right.br_startoff = new->br_startoff;
   2579		right.br_startblock = nullstartblock(newlen);
   2580		right.br_blockcount = temp;
   2581		xfs_iext_update_extent(ip, state, icur, &right);
   2582		break;
   2583
   2584	case 0:
   2585		/*
   2586		 * New allocation is not contiguous with another
   2587		 * delayed allocation.
   2588		 * Insert a new entry.
   2589		 */
   2590		oldlen = newlen = 0;
   2591		xfs_iext_insert(ip, icur, new, state);
   2592		break;
   2593	}
   2594	if (oldlen != newlen) {
   2595		ASSERT(oldlen > newlen);
   2596		xfs_mod_fdblocks(ip->i_mount, (int64_t)(oldlen - newlen),
   2597				 false);
   2598		/*
   2599		 * Nothing to do for disk quota accounting here.
   2600		 */
   2601		xfs_mod_delalloc(ip->i_mount, (int64_t)newlen - oldlen);
   2602	}
   2603}
   2604
   2605/*
   2606 * Convert a hole to a real allocation.
   2607 */
   2608STATIC int				/* error */
   2609xfs_bmap_add_extent_hole_real(
   2610	struct xfs_trans	*tp,
   2611	struct xfs_inode	*ip,
   2612	int			whichfork,
   2613	struct xfs_iext_cursor	*icur,
   2614	struct xfs_btree_cur	**curp,
   2615	struct xfs_bmbt_irec	*new,
   2616	int			*logflagsp,
   2617	uint32_t		flags)
   2618{
   2619	struct xfs_ifork	*ifp = XFS_IFORK_PTR(ip, whichfork);
   2620	struct xfs_mount	*mp = ip->i_mount;
   2621	struct xfs_btree_cur	*cur = *curp;
   2622	int			error;	/* error return value */
   2623	int			i;	/* temp state */
   2624	xfs_bmbt_irec_t		left;	/* left neighbor extent entry */
   2625	xfs_bmbt_irec_t		right;	/* right neighbor extent entry */
   2626	int			rval=0;	/* return value (logging flags) */
   2627	uint32_t		state = xfs_bmap_fork_to_state(whichfork);
   2628	struct xfs_bmbt_irec	old;
   2629
   2630	ASSERT(!isnullstartblock(new->br_startblock));
   2631	ASSERT(!cur || !(cur->bc_ino.flags & XFS_BTCUR_BMBT_WASDEL));
   2632
   2633	XFS_STATS_INC(mp, xs_add_exlist);
   2634
   2635	/*
   2636	 * Check and set flags if this segment has a left neighbor.
   2637	 */
   2638	if (xfs_iext_peek_prev_extent(ifp, icur, &left)) {
   2639		state |= BMAP_LEFT_VALID;
   2640		if (isnullstartblock(left.br_startblock))
   2641			state |= BMAP_LEFT_DELAY;
   2642	}
   2643
   2644	/*
   2645	 * Check and set flags if this segment has a current value.
   2646	 * Not true if we're inserting into the "hole" at eof.
   2647	 */
   2648	if (xfs_iext_get_extent(ifp, icur, &right)) {
   2649		state |= BMAP_RIGHT_VALID;
   2650		if (isnullstartblock(right.br_startblock))
   2651			state |= BMAP_RIGHT_DELAY;
   2652	}
   2653
   2654	/*
   2655	 * We're inserting a real allocation between "left" and "right".
   2656	 * Set the contiguity flags.  Don't let extents get too large.
   2657	 */
   2658	if ((state & BMAP_LEFT_VALID) && !(state & BMAP_LEFT_DELAY) &&
   2659	    left.br_startoff + left.br_blockcount == new->br_startoff &&
   2660	    left.br_startblock + left.br_blockcount == new->br_startblock &&
   2661	    left.br_state == new->br_state &&
   2662	    left.br_blockcount + new->br_blockcount <= XFS_MAX_BMBT_EXTLEN)
   2663		state |= BMAP_LEFT_CONTIG;
   2664
   2665	if ((state & BMAP_RIGHT_VALID) && !(state & BMAP_RIGHT_DELAY) &&
   2666	    new->br_startoff + new->br_blockcount == right.br_startoff &&
   2667	    new->br_startblock + new->br_blockcount == right.br_startblock &&
   2668	    new->br_state == right.br_state &&
   2669	    new->br_blockcount + right.br_blockcount <= XFS_MAX_BMBT_EXTLEN &&
   2670	    (!(state & BMAP_LEFT_CONTIG) ||
   2671	     left.br_blockcount + new->br_blockcount +
   2672	     right.br_blockcount <= XFS_MAX_BMBT_EXTLEN))
   2673		state |= BMAP_RIGHT_CONTIG;
   2674
   2675	error = 0;
   2676	/*
   2677	 * Select which case we're in here, and implement it.
   2678	 */
   2679	switch (state & (BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG)) {
   2680	case BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG:
   2681		/*
   2682		 * New allocation is contiguous with real allocations on the
   2683		 * left and on the right.
   2684		 * Merge all three into a single extent record.
   2685		 */
   2686		left.br_blockcount += new->br_blockcount + right.br_blockcount;
   2687
   2688		xfs_iext_remove(ip, icur, state);
   2689		xfs_iext_prev(ifp, icur);
   2690		xfs_iext_update_extent(ip, state, icur, &left);
   2691		ifp->if_nextents--;
   2692
   2693		if (cur == NULL) {
   2694			rval = XFS_ILOG_CORE | xfs_ilog_fext(whichfork);
   2695		} else {
   2696			rval = XFS_ILOG_CORE;
   2697			error = xfs_bmbt_lookup_eq(cur, &right, &i);
   2698			if (error)
   2699				goto done;
   2700			if (XFS_IS_CORRUPT(mp, i != 1)) {
   2701				error = -EFSCORRUPTED;
   2702				goto done;
   2703			}
   2704			error = xfs_btree_delete(cur, &i);
   2705			if (error)
   2706				goto done;
   2707			if (XFS_IS_CORRUPT(mp, i != 1)) {
   2708				error = -EFSCORRUPTED;
   2709				goto done;
   2710			}
   2711			error = xfs_btree_decrement(cur, 0, &i);
   2712			if (error)
   2713				goto done;
   2714			if (XFS_IS_CORRUPT(mp, i != 1)) {
   2715				error = -EFSCORRUPTED;
   2716				goto done;
   2717			}
   2718			error = xfs_bmbt_update(cur, &left);
   2719			if (error)
   2720				goto done;
   2721		}
   2722		break;
   2723
   2724	case BMAP_LEFT_CONTIG:
   2725		/*
   2726		 * New allocation is contiguous with a real allocation
   2727		 * on the left.
   2728		 * Merge the new allocation with the left neighbor.
   2729		 */
   2730		old = left;
   2731		left.br_blockcount += new->br_blockcount;
   2732
   2733		xfs_iext_prev(ifp, icur);
   2734		xfs_iext_update_extent(ip, state, icur, &left);
   2735
   2736		if (cur == NULL) {
   2737			rval = xfs_ilog_fext(whichfork);
   2738		} else {
   2739			rval = 0;
   2740			error = xfs_bmbt_lookup_eq(cur, &old, &i);
   2741			if (error)
   2742				goto done;
   2743			if (XFS_IS_CORRUPT(mp, i != 1)) {
   2744				error = -EFSCORRUPTED;
   2745				goto done;
   2746			}
   2747			error = xfs_bmbt_update(cur, &left);
   2748			if (error)
   2749				goto done;
   2750		}
   2751		break;
   2752
   2753	case BMAP_RIGHT_CONTIG:
   2754		/*
   2755		 * New allocation is contiguous with a real allocation
   2756		 * on the right.
   2757		 * Merge the new allocation with the right neighbor.
   2758		 */
   2759		old = right;
   2760
   2761		right.br_startoff = new->br_startoff;
   2762		right.br_startblock = new->br_startblock;
   2763		right.br_blockcount += new->br_blockcount;
   2764		xfs_iext_update_extent(ip, state, icur, &right);
   2765
   2766		if (cur == NULL) {
   2767			rval = xfs_ilog_fext(whichfork);
   2768		} else {
   2769			rval = 0;
   2770			error = xfs_bmbt_lookup_eq(cur, &old, &i);
   2771			if (error)
   2772				goto done;
   2773			if (XFS_IS_CORRUPT(mp, i != 1)) {
   2774				error = -EFSCORRUPTED;
   2775				goto done;
   2776			}
   2777			error = xfs_bmbt_update(cur, &right);
   2778			if (error)
   2779				goto done;
   2780		}
   2781		break;
   2782
   2783	case 0:
   2784		/*
   2785		 * New allocation is not contiguous with another
   2786		 * real allocation.
   2787		 * Insert a new entry.
   2788		 */
   2789		xfs_iext_insert(ip, icur, new, state);
   2790		ifp->if_nextents++;
   2791
   2792		if (cur == NULL) {
   2793			rval = XFS_ILOG_CORE | xfs_ilog_fext(whichfork);
   2794		} else {
   2795			rval = XFS_ILOG_CORE;
   2796			error = xfs_bmbt_lookup_eq(cur, new, &i);
   2797			if (error)
   2798				goto done;
   2799			if (XFS_IS_CORRUPT(mp, i != 0)) {
   2800				error = -EFSCORRUPTED;
   2801				goto done;
   2802			}
   2803			error = xfs_btree_insert(cur, &i);
   2804			if (error)
   2805				goto done;
   2806			if (XFS_IS_CORRUPT(mp, i != 1)) {
   2807				error = -EFSCORRUPTED;
   2808				goto done;
   2809			}
   2810		}
   2811		break;
   2812	}
   2813
   2814	/* add reverse mapping unless caller opted out */
   2815	if (!(flags & XFS_BMAPI_NORMAP))
   2816		xfs_rmap_map_extent(tp, ip, whichfork, new);
   2817
   2818	/* convert to a btree if necessary */
   2819	if (xfs_bmap_needs_btree(ip, whichfork)) {
   2820		int	tmp_logflags;	/* partial log flag return val */
   2821
   2822		ASSERT(cur == NULL);
   2823		error = xfs_bmap_extents_to_btree(tp, ip, curp, 0,
   2824				&tmp_logflags, whichfork);
   2825		*logflagsp |= tmp_logflags;
   2826		cur = *curp;
   2827		if (error)
   2828			goto done;
   2829	}
   2830
   2831	/* clear out the allocated field, done with it now in any case. */
   2832	if (cur)
   2833		cur->bc_ino.allocated = 0;
   2834
   2835	xfs_bmap_check_leaf_extents(cur, ip, whichfork);
   2836done:
   2837	*logflagsp |= rval;
   2838	return error;
   2839}
   2840
   2841/*
   2842 * Functions used in the extent read, allocate and remove paths
   2843 */
   2844
   2845/*
   2846 * Adjust the size of the new extent based on i_extsize and rt extsize.
   2847 */
   2848int
   2849xfs_bmap_extsize_align(
   2850	xfs_mount_t	*mp,
   2851	xfs_bmbt_irec_t	*gotp,		/* next extent pointer */
   2852	xfs_bmbt_irec_t	*prevp,		/* previous extent pointer */
   2853	xfs_extlen_t	extsz,		/* align to this extent size */
   2854	int		rt,		/* is this a realtime inode? */
   2855	int		eof,		/* is extent at end-of-file? */
   2856	int		delay,		/* creating delalloc extent? */
   2857	int		convert,	/* overwriting unwritten extent? */
   2858	xfs_fileoff_t	*offp,		/* in/out: aligned offset */
   2859	xfs_extlen_t	*lenp)		/* in/out: aligned length */
   2860{
   2861	xfs_fileoff_t	orig_off;	/* original offset */
   2862	xfs_extlen_t	orig_alen;	/* original length */
   2863	xfs_fileoff_t	orig_end;	/* original off+len */
   2864	xfs_fileoff_t	nexto;		/* next file offset */
   2865	xfs_fileoff_t	prevo;		/* previous file offset */
   2866	xfs_fileoff_t	align_off;	/* temp for offset */
   2867	xfs_extlen_t	align_alen;	/* temp for length */
   2868	xfs_extlen_t	temp;		/* temp for calculations */
   2869
   2870	if (convert)
   2871		return 0;
   2872
   2873	orig_off = align_off = *offp;
   2874	orig_alen = align_alen = *lenp;
   2875	orig_end = orig_off + orig_alen;
   2876
   2877	/*
   2878	 * If this request overlaps an existing extent, then don't
   2879	 * attempt to perform any additional alignment.
   2880	 */
   2881	if (!delay && !eof &&
   2882	    (orig_off >= gotp->br_startoff) &&
   2883	    (orig_end <= gotp->br_startoff + gotp->br_blockcount)) {
   2884		return 0;
   2885	}
   2886
   2887	/*
   2888	 * If the file offset is unaligned vs. the extent size
   2889	 * we need to align it.  This will be possible unless
   2890	 * the file was previously written with a kernel that didn't
   2891	 * perform this alignment, or if a truncate shot us in the
   2892	 * foot.
   2893	 */
   2894	div_u64_rem(orig_off, extsz, &temp);
   2895	if (temp) {
   2896		align_alen += temp;
   2897		align_off -= temp;
   2898	}
   2899
   2900	/* Same adjustment for the end of the requested area. */
   2901	temp = (align_alen % extsz);
   2902	if (temp)
   2903		align_alen += extsz - temp;
   2904
   2905	/*
   2906	 * For large extent hint sizes, the aligned extent might be larger than
   2907	 * XFS_BMBT_MAX_EXTLEN. In that case, reduce the size by an extsz so
   2908	 * that it pulls the length back under XFS_BMBT_MAX_EXTLEN. The outer
   2909	 * allocation loops handle short allocation just fine, so it is safe to
   2910	 * do this. We only want to do it when we are forced to, though, because
   2911	 * it means more allocation operations are required.
   2912	 */
   2913	while (align_alen > XFS_MAX_BMBT_EXTLEN)
   2914		align_alen -= extsz;
   2915	ASSERT(align_alen <= XFS_MAX_BMBT_EXTLEN);
   2916
   2917	/*
   2918	 * If the previous block overlaps with this proposed allocation
   2919	 * then move the start forward without adjusting the length.
   2920	 */
   2921	if (prevp->br_startoff != NULLFILEOFF) {
   2922		if (prevp->br_startblock == HOLESTARTBLOCK)
   2923			prevo = prevp->br_startoff;
   2924		else
   2925			prevo = prevp->br_startoff + prevp->br_blockcount;
   2926	} else
   2927		prevo = 0;
   2928	if (align_off != orig_off && align_off < prevo)
   2929		align_off = prevo;
   2930	/*
   2931	 * If the next block overlaps with this proposed allocation
   2932	 * then move the start back without adjusting the length,
   2933	 * but not before offset 0.
   2934	 * This may of course make the start overlap previous block,
   2935	 * and if we hit the offset 0 limit then the next block
   2936	 * can still overlap too.
   2937	 */
   2938	if (!eof && gotp->br_startoff != NULLFILEOFF) {
   2939		if ((delay && gotp->br_startblock == HOLESTARTBLOCK) ||
   2940		    (!delay && gotp->br_startblock == DELAYSTARTBLOCK))
   2941			nexto = gotp->br_startoff + gotp->br_blockcount;
   2942		else
   2943			nexto = gotp->br_startoff;
   2944	} else
   2945		nexto = NULLFILEOFF;
   2946	if (!eof &&
   2947	    align_off + align_alen != orig_end &&
   2948	    align_off + align_alen > nexto)
   2949		align_off = nexto > align_alen ? nexto - align_alen : 0;
   2950	/*
   2951	 * If we're now overlapping the next or previous extent that
   2952	 * means we can't fit an extsz piece in this hole.  Just move
   2953	 * the start forward to the first valid spot and set
   2954	 * the length so we hit the end.
   2955	 */
   2956	if (align_off != orig_off && align_off < prevo)
   2957		align_off = prevo;
   2958	if (align_off + align_alen != orig_end &&
   2959	    align_off + align_alen > nexto &&
   2960	    nexto != NULLFILEOFF) {
   2961		ASSERT(nexto > prevo);
   2962		align_alen = nexto - align_off;
   2963	}
   2964
   2965	/*
   2966	 * If realtime, and the result isn't a multiple of the realtime
   2967	 * extent size we need to remove blocks until it is.
   2968	 */
   2969	if (rt && (temp = (align_alen % mp->m_sb.sb_rextsize))) {
   2970		/*
   2971		 * We're not covering the original request, or
   2972		 * we won't be able to once we fix the length.
   2973		 */
   2974		if (orig_off < align_off ||
   2975		    orig_end > align_off + align_alen ||
   2976		    align_alen - temp < orig_alen)
   2977			return -EINVAL;
   2978		/*
   2979		 * Try to fix it by moving the start up.
   2980		 */
   2981		if (align_off + temp <= orig_off) {
   2982			align_alen -= temp;
   2983			align_off += temp;
   2984		}
   2985		/*
   2986		 * Try to fix it by moving the end in.
   2987		 */
   2988		else if (align_off + align_alen - temp >= orig_end)
   2989			align_alen -= temp;
   2990		/*
   2991		 * Set the start to the minimum then trim the length.
   2992		 */
   2993		else {
   2994			align_alen -= orig_off - align_off;
   2995			align_off = orig_off;
   2996			align_alen -= align_alen % mp->m_sb.sb_rextsize;
   2997		}
   2998		/*
   2999		 * Result doesn't cover the request, fail it.
   3000		 */
   3001		if (orig_off < align_off || orig_end > align_off + align_alen)
   3002			return -EINVAL;
   3003	} else {
   3004		ASSERT(orig_off >= align_off);
   3005		/* see XFS_BMBT_MAX_EXTLEN handling above */
   3006		ASSERT(orig_end <= align_off + align_alen ||
   3007		       align_alen + extsz > XFS_MAX_BMBT_EXTLEN);
   3008	}
   3009
   3010#ifdef DEBUG
   3011	if (!eof && gotp->br_startoff != NULLFILEOFF)
   3012		ASSERT(align_off + align_alen <= gotp->br_startoff);
   3013	if (prevp->br_startoff != NULLFILEOFF)
   3014		ASSERT(align_off >= prevp->br_startoff + prevp->br_blockcount);
   3015#endif
   3016
   3017	*lenp = align_alen;
   3018	*offp = align_off;
   3019	return 0;
   3020}
   3021
   3022#define XFS_ALLOC_GAP_UNITS	4
   3023
   3024void
   3025xfs_bmap_adjacent(
   3026	struct xfs_bmalloca	*ap)	/* bmap alloc argument struct */
   3027{
   3028	xfs_fsblock_t	adjust;		/* adjustment to block numbers */
   3029	xfs_agnumber_t	fb_agno;	/* ag number of ap->firstblock */
   3030	xfs_mount_t	*mp;		/* mount point structure */
   3031	int		nullfb;		/* true if ap->firstblock isn't set */
   3032	int		rt;		/* true if inode is realtime */
   3033
   3034#define	ISVALID(x,y)	\
   3035	(rt ? \
   3036		(x) < mp->m_sb.sb_rblocks : \
   3037		XFS_FSB_TO_AGNO(mp, x) == XFS_FSB_TO_AGNO(mp, y) && \
   3038		XFS_FSB_TO_AGNO(mp, x) < mp->m_sb.sb_agcount && \
   3039		XFS_FSB_TO_AGBNO(mp, x) < mp->m_sb.sb_agblocks)
   3040
   3041	mp = ap->ip->i_mount;
   3042	nullfb = ap->tp->t_firstblock == NULLFSBLOCK;
   3043	rt = XFS_IS_REALTIME_INODE(ap->ip) &&
   3044		(ap->datatype & XFS_ALLOC_USERDATA);
   3045	fb_agno = nullfb ? NULLAGNUMBER : XFS_FSB_TO_AGNO(mp,
   3046							ap->tp->t_firstblock);
   3047	/*
   3048	 * If allocating at eof, and there's a previous real block,
   3049	 * try to use its last block as our starting point.
   3050	 */
   3051	if (ap->eof && ap->prev.br_startoff != NULLFILEOFF &&
   3052	    !isnullstartblock(ap->prev.br_startblock) &&
   3053	    ISVALID(ap->prev.br_startblock + ap->prev.br_blockcount,
   3054		    ap->prev.br_startblock)) {
   3055		ap->blkno = ap->prev.br_startblock + ap->prev.br_blockcount;
   3056		/*
   3057		 * Adjust for the gap between prevp and us.
   3058		 */
   3059		adjust = ap->offset -
   3060			(ap->prev.br_startoff + ap->prev.br_blockcount);
   3061		if (adjust &&
   3062		    ISVALID(ap->blkno + adjust, ap->prev.br_startblock))
   3063			ap->blkno += adjust;
   3064	}
   3065	/*
   3066	 * If not at eof, then compare the two neighbor blocks.
   3067	 * Figure out whether either one gives us a good starting point,
   3068	 * and pick the better one.
   3069	 */
   3070	else if (!ap->eof) {
   3071		xfs_fsblock_t	gotbno;		/* right side block number */
   3072		xfs_fsblock_t	gotdiff=0;	/* right side difference */
   3073		xfs_fsblock_t	prevbno;	/* left side block number */
   3074		xfs_fsblock_t	prevdiff=0;	/* left side difference */
   3075
   3076		/*
   3077		 * If there's a previous (left) block, select a requested
   3078		 * start block based on it.
   3079		 */
   3080		if (ap->prev.br_startoff != NULLFILEOFF &&
   3081		    !isnullstartblock(ap->prev.br_startblock) &&
   3082		    (prevbno = ap->prev.br_startblock +
   3083			       ap->prev.br_blockcount) &&
   3084		    ISVALID(prevbno, ap->prev.br_startblock)) {
   3085			/*
   3086			 * Calculate gap to end of previous block.
   3087			 */
   3088			adjust = prevdiff = ap->offset -
   3089				(ap->prev.br_startoff +
   3090				 ap->prev.br_blockcount);
   3091			/*
   3092			 * Figure the startblock based on the previous block's
   3093			 * end and the gap size.
   3094			 * Heuristic!
   3095			 * If the gap is large relative to the piece we're
   3096			 * allocating, or using it gives us an invalid block
   3097			 * number, then just use the end of the previous block.
   3098			 */
   3099			if (prevdiff <= XFS_ALLOC_GAP_UNITS * ap->length &&
   3100			    ISVALID(prevbno + prevdiff,
   3101				    ap->prev.br_startblock))
   3102				prevbno += adjust;
   3103			else
   3104				prevdiff += adjust;
   3105			/*
   3106			 * If the firstblock forbids it, can't use it,
   3107			 * must use default.
   3108			 */
   3109			if (!rt && !nullfb &&
   3110			    XFS_FSB_TO_AGNO(mp, prevbno) != fb_agno)
   3111				prevbno = NULLFSBLOCK;
   3112		}
   3113		/*
   3114		 * No previous block or can't follow it, just default.
   3115		 */
   3116		else
   3117			prevbno = NULLFSBLOCK;
   3118		/*
   3119		 * If there's a following (right) block, select a requested
   3120		 * start block based on it.
   3121		 */
   3122		if (!isnullstartblock(ap->got.br_startblock)) {
   3123			/*
   3124			 * Calculate gap to start of next block.
   3125			 */
   3126			adjust = gotdiff = ap->got.br_startoff - ap->offset;
   3127			/*
   3128			 * Figure the startblock based on the next block's
   3129			 * start and the gap size.
   3130			 */
   3131			gotbno = ap->got.br_startblock;
   3132			/*
   3133			 * Heuristic!
   3134			 * If the gap is large relative to the piece we're
   3135			 * allocating, or using it gives us an invalid block
   3136			 * number, then just use the start of the next block
   3137			 * offset by our length.
   3138			 */
   3139			if (gotdiff <= XFS_ALLOC_GAP_UNITS * ap->length &&
   3140			    ISVALID(gotbno - gotdiff, gotbno))
   3141				gotbno -= adjust;
   3142			else if (ISVALID(gotbno - ap->length, gotbno)) {
   3143				gotbno -= ap->length;
   3144				gotdiff += adjust - ap->length;
   3145			} else
   3146				gotdiff += adjust;
   3147			/*
   3148			 * If the firstblock forbids it, can't use it,
   3149			 * must use default.
   3150			 */
   3151			if (!rt && !nullfb &&
   3152			    XFS_FSB_TO_AGNO(mp, gotbno) != fb_agno)
   3153				gotbno = NULLFSBLOCK;
   3154		}
   3155		/*
   3156		 * No next block, just default.
   3157		 */
   3158		else
   3159			gotbno = NULLFSBLOCK;
   3160		/*
   3161		 * If both valid, pick the better one, else the only good
   3162		 * one, else ap->blkno is already set (to 0 or the inode block).
   3163		 */
   3164		if (prevbno != NULLFSBLOCK && gotbno != NULLFSBLOCK)
   3165			ap->blkno = prevdiff <= gotdiff ? prevbno : gotbno;
   3166		else if (prevbno != NULLFSBLOCK)
   3167			ap->blkno = prevbno;
   3168		else if (gotbno != NULLFSBLOCK)
   3169			ap->blkno = gotbno;
   3170	}
   3171#undef ISVALID
   3172}
   3173
   3174static int
   3175xfs_bmap_longest_free_extent(
   3176	struct xfs_trans	*tp,
   3177	xfs_agnumber_t		ag,
   3178	xfs_extlen_t		*blen,
   3179	int			*notinit)
   3180{
   3181	struct xfs_mount	*mp = tp->t_mountp;
   3182	struct xfs_perag	*pag;
   3183	xfs_extlen_t		longest;
   3184	int			error = 0;
   3185
   3186	pag = xfs_perag_get(mp, ag);
   3187	if (!pag->pagf_init) {
   3188		error = xfs_alloc_pagf_init(mp, tp, ag, XFS_ALLOC_FLAG_TRYLOCK);
   3189		if (error) {
   3190			/* Couldn't lock the AGF, so skip this AG. */
   3191			if (error == -EAGAIN) {
   3192				*notinit = 1;
   3193				error = 0;
   3194			}
   3195			goto out;
   3196		}
   3197	}
   3198
   3199	longest = xfs_alloc_longest_free_extent(pag,
   3200				xfs_alloc_min_freelist(mp, pag),
   3201				xfs_ag_resv_needed(pag, XFS_AG_RESV_NONE));
   3202	if (*blen < longest)
   3203		*blen = longest;
   3204
   3205out:
   3206	xfs_perag_put(pag);
   3207	return error;
   3208}
   3209
   3210static void
   3211xfs_bmap_select_minlen(
   3212	struct xfs_bmalloca	*ap,
   3213	struct xfs_alloc_arg	*args,
   3214	xfs_extlen_t		*blen,
   3215	int			notinit)
   3216{
   3217	if (notinit || *blen < ap->minlen) {
   3218		/*
   3219		 * Since we did a BUF_TRYLOCK above, it is possible that
   3220		 * there is space for this request.
   3221		 */
   3222		args->minlen = ap->minlen;
   3223	} else if (*blen < args->maxlen) {
   3224		/*
   3225		 * If the best seen length is less than the request length,
   3226		 * use the best as the minimum.
   3227		 */
   3228		args->minlen = *blen;
   3229	} else {
   3230		/*
   3231		 * Otherwise we've seen an extent as big as maxlen, use that
   3232		 * as the minimum.
   3233		 */
   3234		args->minlen = args->maxlen;
   3235	}
   3236}
   3237
   3238STATIC int
   3239xfs_bmap_btalloc_nullfb(
   3240	struct xfs_bmalloca	*ap,
   3241	struct xfs_alloc_arg	*args,
   3242	xfs_extlen_t		*blen)
   3243{
   3244	struct xfs_mount	*mp = ap->ip->i_mount;
   3245	xfs_agnumber_t		ag, startag;
   3246	int			notinit = 0;
   3247	int			error;
   3248
   3249	args->type = XFS_ALLOCTYPE_START_BNO;
   3250	args->total = ap->total;
   3251
   3252	startag = ag = XFS_FSB_TO_AGNO(mp, args->fsbno);
   3253	if (startag == NULLAGNUMBER)
   3254		startag = ag = 0;
   3255
   3256	while (*blen < args->maxlen) {
   3257		error = xfs_bmap_longest_free_extent(args->tp, ag, blen,
   3258						     &notinit);
   3259		if (error)
   3260			return error;
   3261
   3262		if (++ag == mp->m_sb.sb_agcount)
   3263			ag = 0;
   3264		if (ag == startag)
   3265			break;
   3266	}
   3267
   3268	xfs_bmap_select_minlen(ap, args, blen, notinit);
   3269	return 0;
   3270}
   3271
   3272STATIC int
   3273xfs_bmap_btalloc_filestreams(
   3274	struct xfs_bmalloca	*ap,
   3275	struct xfs_alloc_arg	*args,
   3276	xfs_extlen_t		*blen)
   3277{
   3278	struct xfs_mount	*mp = ap->ip->i_mount;
   3279	xfs_agnumber_t		ag;
   3280	int			notinit = 0;
   3281	int			error;
   3282
   3283	args->type = XFS_ALLOCTYPE_NEAR_BNO;
   3284	args->total = ap->total;
   3285
   3286	ag = XFS_FSB_TO_AGNO(mp, args->fsbno);
   3287	if (ag == NULLAGNUMBER)
   3288		ag = 0;
   3289
   3290	error = xfs_bmap_longest_free_extent(args->tp, ag, blen, &notinit);
   3291	if (error)
   3292		return error;
   3293
   3294	if (*blen < args->maxlen) {
   3295		error = xfs_filestream_new_ag(ap, &ag);
   3296		if (error)
   3297			return error;
   3298
   3299		error = xfs_bmap_longest_free_extent(args->tp, ag, blen,
   3300						     &notinit);
   3301		if (error)
   3302			return error;
   3303
   3304	}
   3305
   3306	xfs_bmap_select_minlen(ap, args, blen, notinit);
   3307
   3308	/*
   3309	 * Set the failure fallback case to look in the selected AG as stream
   3310	 * may have moved.
   3311	 */
   3312	ap->blkno = args->fsbno = XFS_AGB_TO_FSB(mp, ag, 0);
   3313	return 0;
   3314}
   3315
   3316/* Update all inode and quota accounting for the allocation we just did. */
   3317static void
   3318xfs_bmap_btalloc_accounting(
   3319	struct xfs_bmalloca	*ap,
   3320	struct xfs_alloc_arg	*args)
   3321{
   3322	if (ap->flags & XFS_BMAPI_COWFORK) {
   3323		/*
   3324		 * COW fork blocks are in-core only and thus are treated as
   3325		 * in-core quota reservation (like delalloc blocks) even when
   3326		 * converted to real blocks. The quota reservation is not
   3327		 * accounted to disk until blocks are remapped to the data
   3328		 * fork. So if these blocks were previously delalloc, we
   3329		 * already have quota reservation and there's nothing to do
   3330		 * yet.
   3331		 */
   3332		if (ap->wasdel) {
   3333			xfs_mod_delalloc(ap->ip->i_mount, -(int64_t)args->len);
   3334			return;
   3335		}
   3336
   3337		/*
   3338		 * Otherwise, we've allocated blocks in a hole. The transaction
   3339		 * has acquired in-core quota reservation for this extent.
   3340		 * Rather than account these as real blocks, however, we reduce
   3341		 * the transaction quota reservation based on the allocation.
   3342		 * This essentially transfers the transaction quota reservation
   3343		 * to that of a delalloc extent.
   3344		 */
   3345		ap->ip->i_delayed_blks += args->len;
   3346		xfs_trans_mod_dquot_byino(ap->tp, ap->ip, XFS_TRANS_DQ_RES_BLKS,
   3347				-(long)args->len);
   3348		return;
   3349	}
   3350
   3351	/* data/attr fork only */
   3352	ap->ip->i_nblocks += args->len;
   3353	xfs_trans_log_inode(ap->tp, ap->ip, XFS_ILOG_CORE);
   3354	if (ap->wasdel) {
   3355		ap->ip->i_delayed_blks -= args->len;
   3356		xfs_mod_delalloc(ap->ip->i_mount, -(int64_t)args->len);
   3357	}
   3358	xfs_trans_mod_dquot_byino(ap->tp, ap->ip,
   3359		ap->wasdel ? XFS_TRANS_DQ_DELBCOUNT : XFS_TRANS_DQ_BCOUNT,
   3360		args->len);
   3361}
   3362
   3363static int
   3364xfs_bmap_compute_alignments(
   3365	struct xfs_bmalloca	*ap,
   3366	struct xfs_alloc_arg	*args)
   3367{
   3368	struct xfs_mount	*mp = args->mp;
   3369	xfs_extlen_t		align = 0; /* minimum allocation alignment */
   3370	int			stripe_align = 0;
   3371
   3372	/* stripe alignment for allocation is determined by mount parameters */
   3373	if (mp->m_swidth && xfs_has_swalloc(mp))
   3374		stripe_align = mp->m_swidth;
   3375	else if (mp->m_dalign)
   3376		stripe_align = mp->m_dalign;
   3377
   3378	if (ap->flags & XFS_BMAPI_COWFORK)
   3379		align = xfs_get_cowextsz_hint(ap->ip);
   3380	else if (ap->datatype & XFS_ALLOC_USERDATA)
   3381		align = xfs_get_extsz_hint(ap->ip);
   3382	if (align) {
   3383		if (xfs_bmap_extsize_align(mp, &ap->got, &ap->prev, align, 0,
   3384					ap->eof, 0, ap->conv, &ap->offset,
   3385					&ap->length))
   3386			ASSERT(0);
   3387		ASSERT(ap->length);
   3388	}
   3389
   3390	/* apply extent size hints if obtained earlier */
   3391	if (align) {
   3392		args->prod = align;
   3393		div_u64_rem(ap->offset, args->prod, &args->mod);
   3394		if (args->mod)
   3395			args->mod = args->prod - args->mod;
   3396	} else if (mp->m_sb.sb_blocksize >= PAGE_SIZE) {
   3397		args->prod = 1;
   3398		args->mod = 0;
   3399	} else {
   3400		args->prod = PAGE_SIZE >> mp->m_sb.sb_blocklog;
   3401		div_u64_rem(ap->offset, args->prod, &args->mod);
   3402		if (args->mod)
   3403			args->mod = args->prod - args->mod;
   3404	}
   3405
   3406	return stripe_align;
   3407}
   3408
   3409static void
   3410xfs_bmap_process_allocated_extent(
   3411	struct xfs_bmalloca	*ap,
   3412	struct xfs_alloc_arg	*args,
   3413	xfs_fileoff_t		orig_offset,
   3414	xfs_extlen_t		orig_length)
   3415{
   3416	int			nullfb;
   3417
   3418	nullfb = ap->tp->t_firstblock == NULLFSBLOCK;
   3419
   3420	/*
   3421	 * check the allocation happened at the same or higher AG than
   3422	 * the first block that was allocated.
   3423	 */
   3424	ASSERT(nullfb ||
   3425		XFS_FSB_TO_AGNO(args->mp, ap->tp->t_firstblock) <=
   3426		XFS_FSB_TO_AGNO(args->mp, args->fsbno));
   3427
   3428	ap->blkno = args->fsbno;
   3429	if (nullfb)
   3430		ap->tp->t_firstblock = args->fsbno;
   3431	ap->length = args->len;
   3432	/*
   3433	 * If the extent size hint is active, we tried to round the
   3434	 * caller's allocation request offset down to extsz and the
   3435	 * length up to another extsz boundary.  If we found a free
   3436	 * extent we mapped it in starting at this new offset.  If the
   3437	 * newly mapped space isn't long enough to cover any of the
   3438	 * range of offsets that was originally requested, move the
   3439	 * mapping up so that we can fill as much of the caller's
   3440	 * original request as possible.  Free space is apparently
   3441	 * very fragmented so we're unlikely to be able to satisfy the
   3442	 * hints anyway.
   3443	 */
   3444	if (ap->length <= orig_length)
   3445		ap->offset = orig_offset;
   3446	else if (ap->offset + ap->length < orig_offset + orig_length)
   3447		ap->offset = orig_offset + orig_length - ap->length;
   3448	xfs_bmap_btalloc_accounting(ap, args);
   3449}
   3450
   3451#ifdef DEBUG
   3452static int
   3453xfs_bmap_exact_minlen_extent_alloc(
   3454	struct xfs_bmalloca	*ap)
   3455{
   3456	struct xfs_mount	*mp = ap->ip->i_mount;
   3457	struct xfs_alloc_arg	args = { .tp = ap->tp, .mp = mp };
   3458	xfs_fileoff_t		orig_offset;
   3459	xfs_extlen_t		orig_length;
   3460	int			error;
   3461
   3462	ASSERT(ap->length);
   3463
   3464	if (ap->minlen != 1) {
   3465		ap->blkno = NULLFSBLOCK;
   3466		ap->length = 0;
   3467		return 0;
   3468	}
   3469
   3470	orig_offset = ap->offset;
   3471	orig_length = ap->length;
   3472
   3473	args.alloc_minlen_only = 1;
   3474
   3475	xfs_bmap_compute_alignments(ap, &args);
   3476
   3477	if (ap->tp->t_firstblock == NULLFSBLOCK) {
   3478		/*
   3479		 * Unlike the longest extent available in an AG, we don't track
   3480		 * the length of an AG's shortest extent.
   3481		 * XFS_ERRTAG_BMAP_ALLOC_MINLEN_EXTENT is a debug only knob and
   3482		 * hence we can afford to start traversing from the 0th AG since
   3483		 * we need not be concerned about a drop in performance in
   3484		 * "debug only" code paths.
   3485		 */
   3486		ap->blkno = XFS_AGB_TO_FSB(mp, 0, 0);
   3487	} else {
   3488		ap->blkno = ap->tp->t_firstblock;
   3489	}
   3490
   3491	args.fsbno = ap->blkno;
   3492	args.oinfo = XFS_RMAP_OINFO_SKIP_UPDATE;
   3493	args.type = XFS_ALLOCTYPE_FIRST_AG;
   3494	args.minlen = args.maxlen = ap->minlen;
   3495	args.total = ap->total;
   3496
   3497	args.alignment = 1;
   3498	args.minalignslop = 0;
   3499
   3500	args.minleft = ap->minleft;
   3501	args.wasdel = ap->wasdel;
   3502	args.resv = XFS_AG_RESV_NONE;
   3503	args.datatype = ap->datatype;
   3504
   3505	error = xfs_alloc_vextent(&args);
   3506	if (error)
   3507		return error;
   3508
   3509	if (args.fsbno != NULLFSBLOCK) {
   3510		xfs_bmap_process_allocated_extent(ap, &args, orig_offset,
   3511			orig_length);
   3512	} else {
   3513		ap->blkno = NULLFSBLOCK;
   3514		ap->length = 0;
   3515	}
   3516
   3517	return 0;
   3518}
   3519#else
   3520
   3521#define xfs_bmap_exact_minlen_extent_alloc(bma) (-EFSCORRUPTED)
   3522
   3523#endif
   3524
   3525STATIC int
   3526xfs_bmap_btalloc(
   3527	struct xfs_bmalloca	*ap)
   3528{
   3529	struct xfs_mount	*mp = ap->ip->i_mount;
   3530	struct xfs_alloc_arg	args = { .tp = ap->tp, .mp = mp };
   3531	xfs_alloctype_t		atype = 0;
   3532	xfs_agnumber_t		fb_agno;	/* ag number of ap->firstblock */
   3533	xfs_agnumber_t		ag;
   3534	xfs_fileoff_t		orig_offset;
   3535	xfs_extlen_t		orig_length;
   3536	xfs_extlen_t		blen;
   3537	xfs_extlen_t		nextminlen = 0;
   3538	int			nullfb; /* true if ap->firstblock isn't set */
   3539	int			isaligned;
   3540	int			tryagain;
   3541	int			error;
   3542	int			stripe_align;
   3543
   3544	ASSERT(ap->length);
   3545	orig_offset = ap->offset;
   3546	orig_length = ap->length;
   3547
   3548	stripe_align = xfs_bmap_compute_alignments(ap, &args);
   3549
   3550	nullfb = ap->tp->t_firstblock == NULLFSBLOCK;
   3551	fb_agno = nullfb ? NULLAGNUMBER : XFS_FSB_TO_AGNO(mp,
   3552							ap->tp->t_firstblock);
   3553	if (nullfb) {
   3554		if ((ap->datatype & XFS_ALLOC_USERDATA) &&
   3555		    xfs_inode_is_filestream(ap->ip)) {
   3556			ag = xfs_filestream_lookup_ag(ap->ip);
   3557			ag = (ag != NULLAGNUMBER) ? ag : 0;
   3558			ap->blkno = XFS_AGB_TO_FSB(mp, ag, 0);
   3559		} else {
   3560			ap->blkno = XFS_INO_TO_FSB(mp, ap->ip->i_ino);
   3561		}
   3562	} else
   3563		ap->blkno = ap->tp->t_firstblock;
   3564
   3565	xfs_bmap_adjacent(ap);
   3566
   3567	/*
   3568	 * If allowed, use ap->blkno; otherwise must use firstblock since
   3569	 * it's in the right allocation group.
   3570	 */
   3571	if (nullfb || XFS_FSB_TO_AGNO(mp, ap->blkno) == fb_agno)
   3572		;
   3573	else
   3574		ap->blkno = ap->tp->t_firstblock;
   3575	/*
   3576	 * Normal allocation, done through xfs_alloc_vextent.
   3577	 */
   3578	tryagain = isaligned = 0;
   3579	args.fsbno = ap->blkno;
   3580	args.oinfo = XFS_RMAP_OINFO_SKIP_UPDATE;
   3581
   3582	/* Trim the allocation back to the maximum an AG can fit. */
   3583	args.maxlen = min(ap->length, mp->m_ag_max_usable);
   3584	blen = 0;
   3585	if (nullfb) {
   3586		/*
   3587		 * Search for an allocation group with a single extent large
   3588		 * enough for the request.  If one isn't found, then adjust
   3589		 * the minimum allocation size to the largest space found.
   3590		 */
   3591		if ((ap->datatype & XFS_ALLOC_USERDATA) &&
   3592		    xfs_inode_is_filestream(ap->ip))
   3593			error = xfs_bmap_btalloc_filestreams(ap, &args, &blen);
   3594		else
   3595			error = xfs_bmap_btalloc_nullfb(ap, &args, &blen);
   3596		if (error)
   3597			return error;
   3598	} else if (ap->tp->t_flags & XFS_TRANS_LOWMODE) {
   3599		if (xfs_inode_is_filestream(ap->ip))
   3600			args.type = XFS_ALLOCTYPE_FIRST_AG;
   3601		else
   3602			args.type = XFS_ALLOCTYPE_START_BNO;
   3603		args.total = args.minlen = ap->minlen;
   3604	} else {
   3605		args.type = XFS_ALLOCTYPE_NEAR_BNO;
   3606		args.total = ap->total;
   3607		args.minlen = ap->minlen;
   3608	}
   3609
   3610	/*
   3611	 * If we are not low on available data blocks, and the underlying
   3612	 * logical volume manager is a stripe, and the file offset is zero then
   3613	 * try to allocate data blocks on stripe unit boundary. NOTE: ap->aeof
   3614	 * is only set if the allocation length is >= the stripe unit and the
   3615	 * allocation offset is at the end of file.
   3616	 */
   3617	if (!(ap->tp->t_flags & XFS_TRANS_LOWMODE) && ap->aeof) {
   3618		if (!ap->offset) {
   3619			args.alignment = stripe_align;
   3620			atype = args.type;
   3621			isaligned = 1;
   3622			/*
   3623			 * Adjust minlen to try and preserve alignment if we
   3624			 * can't guarantee an aligned maxlen extent.
   3625			 */
   3626			if (blen > args.alignment &&
   3627			    blen <= args.maxlen + args.alignment)
   3628				args.minlen = blen - args.alignment;
   3629			args.minalignslop = 0;
   3630		} else {
   3631			/*
   3632			 * First try an exact bno allocation.
   3633			 * If it fails then do a near or start bno
   3634			 * allocation with alignment turned on.
   3635			 */
   3636			atype = args.type;
   3637			tryagain = 1;
   3638			args.type = XFS_ALLOCTYPE_THIS_BNO;
   3639			args.alignment = 1;
   3640			/*
   3641			 * Compute the minlen+alignment for the
   3642			 * next case.  Set slop so that the value
   3643			 * of minlen+alignment+slop doesn't go up
   3644			 * between the calls.
   3645			 */
   3646			if (blen > stripe_align && blen <= args.maxlen)
   3647				nextminlen = blen - stripe_align;
   3648			else
   3649				nextminlen = args.minlen;
   3650			if (nextminlen + stripe_align > args.minlen + 1)
   3651				args.minalignslop =
   3652					nextminlen + stripe_align -
   3653					args.minlen - 1;
   3654			else
   3655				args.minalignslop = 0;
   3656		}
   3657	} else {
   3658		args.alignment = 1;
   3659		args.minalignslop = 0;
   3660	}
   3661	args.minleft = ap->minleft;
   3662	args.wasdel = ap->wasdel;
   3663	args.resv = XFS_AG_RESV_NONE;
   3664	args.datatype = ap->datatype;
   3665
   3666	error = xfs_alloc_vextent(&args);
   3667	if (error)
   3668		return error;
   3669
   3670	if (tryagain && args.fsbno == NULLFSBLOCK) {
   3671		/*
   3672		 * Exact allocation failed. Now try with alignment
   3673		 * turned on.
   3674		 */
   3675		args.type = atype;
   3676		args.fsbno = ap->blkno;
   3677		args.alignment = stripe_align;
   3678		args.minlen = nextminlen;
   3679		args.minalignslop = 0;
   3680		isaligned = 1;
   3681		if ((error = xfs_alloc_vextent(&args)))
   3682			return error;
   3683	}
   3684	if (isaligned && args.fsbno == NULLFSBLOCK) {
   3685		/*
   3686		 * allocation failed, so turn off alignment and
   3687		 * try again.
   3688		 */
   3689		args.type = atype;
   3690		args.fsbno = ap->blkno;
   3691		args.alignment = 0;
   3692		if ((error = xfs_alloc_vextent(&args)))
   3693			return error;
   3694	}
   3695	if (args.fsbno == NULLFSBLOCK && nullfb &&
   3696	    args.minlen > ap->minlen) {
   3697		args.minlen = ap->minlen;
   3698		args.type = XFS_ALLOCTYPE_START_BNO;
   3699		args.fsbno = ap->blkno;
   3700		if ((error = xfs_alloc_vextent(&args)))
   3701			return error;
   3702	}
   3703	if (args.fsbno == NULLFSBLOCK && nullfb) {
   3704		args.fsbno = 0;
   3705		args.type = XFS_ALLOCTYPE_FIRST_AG;
   3706		args.total = ap->minlen;
   3707		if ((error = xfs_alloc_vextent(&args)))
   3708			return error;
   3709		ap->tp->t_flags |= XFS_TRANS_LOWMODE;
   3710	}
   3711
   3712	if (args.fsbno != NULLFSBLOCK) {
   3713		xfs_bmap_process_allocated_extent(ap, &args, orig_offset,
   3714			orig_length);
   3715	} else {
   3716		ap->blkno = NULLFSBLOCK;
   3717		ap->length = 0;
   3718	}
   3719	return 0;
   3720}
   3721
   3722/* Trim extent to fit a logical block range. */
   3723void
   3724xfs_trim_extent(
   3725	struct xfs_bmbt_irec	*irec,
   3726	xfs_fileoff_t		bno,
   3727	xfs_filblks_t		len)
   3728{
   3729	xfs_fileoff_t		distance;
   3730	xfs_fileoff_t		end = bno + len;
   3731
   3732	if (irec->br_startoff + irec->br_blockcount <= bno ||
   3733	    irec->br_startoff >= end) {
   3734		irec->br_blockcount = 0;
   3735		return;
   3736	}
   3737
   3738	if (irec->br_startoff < bno) {
   3739		distance = bno - irec->br_startoff;
   3740		if (isnullstartblock(irec->br_startblock))
   3741			irec->br_startblock = DELAYSTARTBLOCK;
   3742		if (irec->br_startblock != DELAYSTARTBLOCK &&
   3743		    irec->br_startblock != HOLESTARTBLOCK)
   3744			irec->br_startblock += distance;
   3745		irec->br_startoff += distance;
   3746		irec->br_blockcount -= distance;
   3747	}
   3748
   3749	if (end < irec->br_startoff + irec->br_blockcount) {
   3750		distance = irec->br_startoff + irec->br_blockcount - end;
   3751		irec->br_blockcount -= distance;
   3752	}
   3753}
   3754
   3755/*
   3756 * Trim the returned map to the required bounds
   3757 */
   3758STATIC void
   3759xfs_bmapi_trim_map(
   3760	struct xfs_bmbt_irec	*mval,
   3761	struct xfs_bmbt_irec	*got,
   3762	xfs_fileoff_t		*bno,
   3763	xfs_filblks_t		len,
   3764	xfs_fileoff_t		obno,
   3765	xfs_fileoff_t		end,
   3766	int			n,
   3767	uint32_t		flags)
   3768{
   3769	if ((flags & XFS_BMAPI_ENTIRE) ||
   3770	    got->br_startoff + got->br_blockcount <= obno) {
   3771		*mval = *got;
   3772		if (isnullstartblock(got->br_startblock))
   3773			mval->br_startblock = DELAYSTARTBLOCK;
   3774		return;
   3775	}
   3776
   3777	if (obno > *bno)
   3778		*bno = obno;
   3779	ASSERT((*bno >= obno) || (n == 0));
   3780	ASSERT(*bno < end);
   3781	mval->br_startoff = *bno;
   3782	if (isnullstartblock(got->br_startblock))
   3783		mval->br_startblock = DELAYSTARTBLOCK;
   3784	else
   3785		mval->br_startblock = got->br_startblock +
   3786					(*bno - got->br_startoff);
   3787	/*
   3788	 * Return the minimum of what we got and what we asked for for
   3789	 * the length.  We can use the len variable here because it is
   3790	 * modified below and we could have been there before coming
   3791	 * here if the first part of the allocation didn't overlap what
   3792	 * was asked for.
   3793	 */
   3794	mval->br_blockcount = XFS_FILBLKS_MIN(end - *bno,
   3795			got->br_blockcount - (*bno - got->br_startoff));
   3796	mval->br_state = got->br_state;
   3797	ASSERT(mval->br_blockcount <= len);
   3798	return;
   3799}
   3800
   3801/*
   3802 * Update and validate the extent map to return
   3803 */
   3804STATIC void
   3805xfs_bmapi_update_map(
   3806	struct xfs_bmbt_irec	**map,
   3807	xfs_fileoff_t		*bno,
   3808	xfs_filblks_t		*len,
   3809	xfs_fileoff_t		obno,
   3810	xfs_fileoff_t		end,
   3811	int			*n,
   3812	uint32_t		flags)
   3813{
   3814	xfs_bmbt_irec_t	*mval = *map;
   3815
   3816	ASSERT((flags & XFS_BMAPI_ENTIRE) ||
   3817	       ((mval->br_startoff + mval->br_blockcount) <= end));
   3818	ASSERT((flags & XFS_BMAPI_ENTIRE) || (mval->br_blockcount <= *len) ||
   3819	       (mval->br_startoff < obno));
   3820
   3821	*bno = mval->br_startoff + mval->br_blockcount;
   3822	*len = end - *bno;
   3823	if (*n > 0 && mval->br_startoff == mval[-1].br_startoff) {
   3824		/* update previous map with new information */
   3825		ASSERT(mval->br_startblock == mval[-1].br_startblock);
   3826		ASSERT(mval->br_blockcount > mval[-1].br_blockcount);
   3827		ASSERT(mval->br_state == mval[-1].br_state);
   3828		mval[-1].br_blockcount = mval->br_blockcount;
   3829		mval[-1].br_state = mval->br_state;
   3830	} else if (*n > 0 && mval->br_startblock != DELAYSTARTBLOCK &&
   3831		   mval[-1].br_startblock != DELAYSTARTBLOCK &&
   3832		   mval[-1].br_startblock != HOLESTARTBLOCK &&
   3833		   mval->br_startblock == mval[-1].br_startblock +
   3834					  mval[-1].br_blockcount &&
   3835		   mval[-1].br_state == mval->br_state) {
   3836		ASSERT(mval->br_startoff ==
   3837		       mval[-1].br_startoff + mval[-1].br_blockcount);
   3838		mval[-1].br_blockcount += mval->br_blockcount;
   3839	} else if (*n > 0 &&
   3840		   mval->br_startblock == DELAYSTARTBLOCK &&
   3841		   mval[-1].br_startblock == DELAYSTARTBLOCK &&
   3842		   mval->br_startoff ==
   3843		   mval[-1].br_startoff + mval[-1].br_blockcount) {
   3844		mval[-1].br_blockcount += mval->br_blockcount;
   3845		mval[-1].br_state = mval->br_state;
   3846	} else if (!((*n == 0) &&
   3847		     ((mval->br_startoff + mval->br_blockcount) <=
   3848		      obno))) {
   3849		mval++;
   3850		(*n)++;
   3851	}
   3852	*map = mval;
   3853}
   3854
   3855/*
   3856 * Map file blocks to filesystem blocks without allocation.
   3857 */
   3858int
   3859xfs_bmapi_read(
   3860	struct xfs_inode	*ip,
   3861	xfs_fileoff_t		bno,
   3862	xfs_filblks_t		len,
   3863	struct xfs_bmbt_irec	*mval,
   3864	int			*nmap,
   3865	uint32_t		flags)
   3866{
   3867	struct xfs_mount	*mp = ip->i_mount;
   3868	int			whichfork = xfs_bmapi_whichfork(flags);
   3869	struct xfs_ifork	*ifp = XFS_IFORK_PTR(ip, whichfork);
   3870	struct xfs_bmbt_irec	got;
   3871	xfs_fileoff_t		obno;
   3872	xfs_fileoff_t		end;
   3873	struct xfs_iext_cursor	icur;
   3874	int			error;
   3875	bool			eof = false;
   3876	int			n = 0;
   3877
   3878	ASSERT(*nmap >= 1);
   3879	ASSERT(!(flags & ~(XFS_BMAPI_ATTRFORK | XFS_BMAPI_ENTIRE)));
   3880	ASSERT(xfs_isilocked(ip, XFS_ILOCK_SHARED|XFS_ILOCK_EXCL));
   3881
   3882	if (WARN_ON_ONCE(!ifp))
   3883		return -EFSCORRUPTED;
   3884
   3885	if (XFS_IS_CORRUPT(mp, !xfs_ifork_has_extents(ifp)) ||
   3886	    XFS_TEST_ERROR(false, mp, XFS_ERRTAG_BMAPIFORMAT))
   3887		return -EFSCORRUPTED;
   3888
   3889	if (xfs_is_shutdown(mp))
   3890		return -EIO;
   3891
   3892	XFS_STATS_INC(mp, xs_blk_mapr);
   3893
   3894	error = xfs_iread_extents(NULL, ip, whichfork);
   3895	if (error)
   3896		return error;
   3897
   3898	if (!xfs_iext_lookup_extent(ip, ifp, bno, &icur, &got))
   3899		eof = true;
   3900	end = bno + len;
   3901	obno = bno;
   3902
   3903	while (bno < end && n < *nmap) {
   3904		/* Reading past eof, act as though there's a hole up to end. */
   3905		if (eof)
   3906			got.br_startoff = end;
   3907		if (got.br_startoff > bno) {
   3908			/* Reading in a hole.  */
   3909			mval->br_startoff = bno;
   3910			mval->br_startblock = HOLESTARTBLOCK;
   3911			mval->br_blockcount =
   3912				XFS_FILBLKS_MIN(len, got.br_startoff - bno);
   3913			mval->br_state = XFS_EXT_NORM;
   3914			bno += mval->br_blockcount;
   3915			len -= mval->br_blockcount;
   3916			mval++;
   3917			n++;
   3918			continue;
   3919		}
   3920
   3921		/* set up the extent map to return. */
   3922		xfs_bmapi_trim_map(mval, &got, &bno, len, obno, end, n, flags);
   3923		xfs_bmapi_update_map(&mval, &bno, &len, obno, end, &n, flags);
   3924
   3925		/* If we're done, stop now. */
   3926		if (bno >= end || n >= *nmap)
   3927			break;
   3928
   3929		/* Else go on to the next record. */
   3930		if (!xfs_iext_next_extent(ifp, &icur, &got))
   3931			eof = true;
   3932	}
   3933	*nmap = n;
   3934	return 0;
   3935}
   3936
   3937/*
   3938 * Add a delayed allocation extent to an inode. Blocks are reserved from the
   3939 * global pool and the extent inserted into the inode in-core extent tree.
   3940 *
   3941 * On entry, got refers to the first extent beyond the offset of the extent to
   3942 * allocate or eof is specified if no such extent exists. On return, got refers
   3943 * to the extent record that was inserted to the inode fork.
   3944 *
   3945 * Note that the allocated extent may have been merged with contiguous extents
   3946 * during insertion into the inode fork. Thus, got does not reflect the current
   3947 * state of the inode fork on return. If necessary, the caller can use lastx to
   3948 * look up the updated record in the inode fork.
   3949 */
   3950int
   3951xfs_bmapi_reserve_delalloc(
   3952	struct xfs_inode	*ip,
   3953	int			whichfork,
   3954	xfs_fileoff_t		off,
   3955	xfs_filblks_t		len,
   3956	xfs_filblks_t		prealloc,
   3957	struct xfs_bmbt_irec	*got,
   3958	struct xfs_iext_cursor	*icur,
   3959	int			eof)
   3960{
   3961	struct xfs_mount	*mp = ip->i_mount;
   3962	struct xfs_ifork	*ifp = XFS_IFORK_PTR(ip, whichfork);
   3963	xfs_extlen_t		alen;
   3964	xfs_extlen_t		indlen;
   3965	int			error;
   3966	xfs_fileoff_t		aoff = off;
   3967
   3968	/*
   3969	 * Cap the alloc length. Keep track of prealloc so we know whether to
   3970	 * tag the inode before we return.
   3971	 */
   3972	alen = XFS_FILBLKS_MIN(len + prealloc, XFS_MAX_BMBT_EXTLEN);
   3973	if (!eof)
   3974		alen = XFS_FILBLKS_MIN(alen, got->br_startoff - aoff);
   3975	if (prealloc && alen >= len)
   3976		prealloc = alen - len;
   3977
   3978	/* Figure out the extent size, adjust alen */
   3979	if (whichfork == XFS_COW_FORK) {
   3980		struct xfs_bmbt_irec	prev;
   3981		xfs_extlen_t		extsz = xfs_get_cowextsz_hint(ip);
   3982
   3983		if (!xfs_iext_peek_prev_extent(ifp, icur, &prev))
   3984			prev.br_startoff = NULLFILEOFF;
   3985
   3986		error = xfs_bmap_extsize_align(mp, got, &prev, extsz, 0, eof,
   3987					       1, 0, &aoff, &alen);
   3988		ASSERT(!error);
   3989	}
   3990
   3991	/*
   3992	 * Make a transaction-less quota reservation for delayed allocation
   3993	 * blocks.  This number gets adjusted later.  We return if we haven't
   3994	 * allocated blocks already inside this loop.
   3995	 */
   3996	error = xfs_quota_reserve_blkres(ip, alen);
   3997	if (error)
   3998		return error;
   3999
   4000	/*
   4001	 * Split changing sb for alen and indlen since they could be coming
   4002	 * from different places.
   4003	 */
   4004	indlen = (xfs_extlen_t)xfs_bmap_worst_indlen(ip, alen);
   4005	ASSERT(indlen > 0);
   4006
   4007	error = xfs_mod_fdblocks(mp, -((int64_t)alen), false);
   4008	if (error)
   4009		goto out_unreserve_quota;
   4010
   4011	error = xfs_mod_fdblocks(mp, -((int64_t)indlen), false);
   4012	if (error)
   4013		goto out_unreserve_blocks;
   4014
   4015
   4016	ip->i_delayed_blks += alen;
   4017	xfs_mod_delalloc(ip->i_mount, alen + indlen);
   4018
   4019	got->br_startoff = aoff;
   4020	got->br_startblock = nullstartblock(indlen);
   4021	got->br_blockcount = alen;
   4022	got->br_state = XFS_EXT_NORM;
   4023
   4024	xfs_bmap_add_extent_hole_delay(ip, whichfork, icur, got);
   4025
   4026	/*
   4027	 * Tag the inode if blocks were preallocated. Note that COW fork
   4028	 * preallocation can occur at the start or end of the extent, even when
   4029	 * prealloc == 0, so we must also check the aligned offset and length.
   4030	 */
   4031	if (whichfork == XFS_DATA_FORK && prealloc)
   4032		xfs_inode_set_eofblocks_tag(ip);
   4033	if (whichfork == XFS_COW_FORK && (prealloc || aoff < off || alen > len))
   4034		xfs_inode_set_cowblocks_tag(ip);
   4035
   4036	return 0;
   4037
   4038out_unreserve_blocks:
   4039	xfs_mod_fdblocks(mp, alen, false);
   4040out_unreserve_quota:
   4041	if (XFS_IS_QUOTA_ON(mp))
   4042		xfs_quota_unreserve_blkres(ip, alen);
   4043	return error;
   4044}
   4045
   4046static int
   4047xfs_bmap_alloc_userdata(
   4048	struct xfs_bmalloca	*bma)
   4049{
   4050	struct xfs_mount	*mp = bma->ip->i_mount;
   4051	int			whichfork = xfs_bmapi_whichfork(bma->flags);
   4052	int			error;
   4053
   4054	/*
   4055	 * Set the data type being allocated. For the data fork, the first data
   4056	 * in the file is treated differently to all other allocations. For the
   4057	 * attribute fork, we only need to ensure the allocated range is not on
   4058	 * the busy list.
   4059	 */
   4060	bma->datatype = XFS_ALLOC_NOBUSY;
   4061	if (whichfork == XFS_DATA_FORK) {
   4062		bma->datatype |= XFS_ALLOC_USERDATA;
   4063		if (bma->offset == 0)
   4064			bma->datatype |= XFS_ALLOC_INITIAL_USER_DATA;
   4065
   4066		if (mp->m_dalign && bma->length >= mp->m_dalign) {
   4067			error = xfs_bmap_isaeof(bma, whichfork);
   4068			if (error)
   4069				return error;
   4070		}
   4071
   4072		if (XFS_IS_REALTIME_INODE(bma->ip))
   4073			return xfs_bmap_rtalloc(bma);
   4074	}
   4075
   4076	if (unlikely(XFS_TEST_ERROR(false, mp,
   4077			XFS_ERRTAG_BMAP_ALLOC_MINLEN_EXTENT)))
   4078		return xfs_bmap_exact_minlen_extent_alloc(bma);
   4079
   4080	return xfs_bmap_btalloc(bma);
   4081}
   4082
   4083static int
   4084xfs_bmapi_allocate(
   4085	struct xfs_bmalloca	*bma)
   4086{
   4087	struct xfs_mount	*mp = bma->ip->i_mount;
   4088	int			whichfork = xfs_bmapi_whichfork(bma->flags);
   4089	struct xfs_ifork	*ifp = XFS_IFORK_PTR(bma->ip, whichfork);
   4090	int			tmp_logflags = 0;
   4091	int			error;
   4092
   4093	ASSERT(bma->length > 0);
   4094
   4095	/*
   4096	 * For the wasdelay case, we could also just allocate the stuff asked
   4097	 * for in this bmap call but that wouldn't be as good.
   4098	 */
   4099	if (bma->wasdel) {
   4100		bma->length = (xfs_extlen_t)bma->got.br_blockcount;
   4101		bma->offset = bma->got.br_startoff;
   4102		if (!xfs_iext_peek_prev_extent(ifp, &bma->icur, &bma->prev))
   4103			bma->prev.br_startoff = NULLFILEOFF;
   4104	} else {
   4105		bma->length = XFS_FILBLKS_MIN(bma->length, XFS_MAX_BMBT_EXTLEN);
   4106		if (!bma->eof)
   4107			bma->length = XFS_FILBLKS_MIN(bma->length,
   4108					bma->got.br_startoff - bma->offset);
   4109	}
   4110
   4111	if (bma->flags & XFS_BMAPI_CONTIG)
   4112		bma->minlen = bma->length;
   4113	else
   4114		bma->minlen = 1;
   4115
   4116	if (bma->flags & XFS_BMAPI_METADATA) {
   4117		if (unlikely(XFS_TEST_ERROR(false, mp,
   4118				XFS_ERRTAG_BMAP_ALLOC_MINLEN_EXTENT)))
   4119			error = xfs_bmap_exact_minlen_extent_alloc(bma);
   4120		else
   4121			error = xfs_bmap_btalloc(bma);
   4122	} else {
   4123		error = xfs_bmap_alloc_userdata(bma);
   4124	}
   4125	if (error || bma->blkno == NULLFSBLOCK)
   4126		return error;
   4127
   4128	if (bma->flags & XFS_BMAPI_ZERO) {
   4129		error = xfs_zero_extent(bma->ip, bma->blkno, bma->length);
   4130		if (error)
   4131			return error;
   4132	}
   4133
   4134	if (ifp->if_format == XFS_DINODE_FMT_BTREE && !bma->cur)
   4135		bma->cur = xfs_bmbt_init_cursor(mp, bma->tp, bma->ip, whichfork);
   4136	/*
   4137	 * Bump the number of extents we've allocated
   4138	 * in this call.
   4139	 */
   4140	bma->nallocs++;
   4141
   4142	if (bma->cur)
   4143		bma->cur->bc_ino.flags =
   4144			bma->wasdel ? XFS_BTCUR_BMBT_WASDEL : 0;
   4145
   4146	bma->got.br_startoff = bma->offset;
   4147	bma->got.br_startblock = bma->blkno;
   4148	bma->got.br_blockcount = bma->length;
   4149	bma->got.br_state = XFS_EXT_NORM;
   4150
   4151	if (bma->flags & XFS_BMAPI_PREALLOC)
   4152		bma->got.br_state = XFS_EXT_UNWRITTEN;
   4153
   4154	if (bma->wasdel)
   4155		error = xfs_bmap_add_extent_delay_real(bma, whichfork);
   4156	else
   4157		error = xfs_bmap_add_extent_hole_real(bma->tp, bma->ip,
   4158				whichfork, &bma->icur, &bma->cur, &bma->got,
   4159				&bma->logflags, bma->flags);
   4160
   4161	bma->logflags |= tmp_logflags;
   4162	if (error)
   4163		return error;
   4164
   4165	/*
   4166	 * Update our extent pointer, given that xfs_bmap_add_extent_delay_real
   4167	 * or xfs_bmap_add_extent_hole_real might have merged it into one of
   4168	 * the neighbouring ones.
   4169	 */
   4170	xfs_iext_get_extent(ifp, &bma->icur, &bma->got);
   4171
   4172	ASSERT(bma->got.br_startoff <= bma->offset);
   4173	ASSERT(bma->got.br_startoff + bma->got.br_blockcount >=
   4174	       bma->offset + bma->length);
   4175	ASSERT(bma->got.br_state == XFS_EXT_NORM ||
   4176	       bma->got.br_state == XFS_EXT_UNWRITTEN);
   4177	return 0;
   4178}
   4179
   4180STATIC int
   4181xfs_bmapi_convert_unwritten(
   4182	struct xfs_bmalloca	*bma,
   4183	struct xfs_bmbt_irec	*mval,
   4184	xfs_filblks_t		len,
   4185	uint32_t		flags)
   4186{
   4187	int			whichfork = xfs_bmapi_whichfork(flags);
   4188	struct xfs_ifork	*ifp = XFS_IFORK_PTR(bma->ip, whichfork);
   4189	int			tmp_logflags = 0;
   4190	int			error;
   4191
   4192	/* check if we need to do unwritten->real conversion */
   4193	if (mval->br_state == XFS_EXT_UNWRITTEN &&
   4194	    (flags & XFS_BMAPI_PREALLOC))
   4195		return 0;
   4196
   4197	/* check if we need to do real->unwritten conversion */
   4198	if (mval->br_state == XFS_EXT_NORM &&
   4199	    (flags & (XFS_BMAPI_PREALLOC | XFS_BMAPI_CONVERT)) !=
   4200			(XFS_BMAPI_PREALLOC | XFS_BMAPI_CONVERT))
   4201		return 0;
   4202
   4203	/*
   4204	 * Modify (by adding) the state flag, if writing.
   4205	 */
   4206	ASSERT(mval->br_blockcount <= len);
   4207	if (ifp->if_format == XFS_DINODE_FMT_BTREE && !bma->cur) {
   4208		bma->cur = xfs_bmbt_init_cursor(bma->ip->i_mount, bma->tp,
   4209					bma->ip, whichfork);
   4210	}
   4211	mval->br_state = (mval->br_state == XFS_EXT_UNWRITTEN)
   4212				? XFS_EXT_NORM : XFS_EXT_UNWRITTEN;
   4213
   4214	/*
   4215	 * Before insertion into the bmbt, zero the range being converted
   4216	 * if required.
   4217	 */
   4218	if (flags & XFS_BMAPI_ZERO) {
   4219		error = xfs_zero_extent(bma->ip, mval->br_startblock,
   4220					mval->br_blockcount);
   4221		if (error)
   4222			return error;
   4223	}
   4224
   4225	error = xfs_bmap_add_extent_unwritten_real(bma->tp, bma->ip, whichfork,
   4226			&bma->icur, &bma->cur, mval, &tmp_logflags);
   4227	/*
   4228	 * Log the inode core unconditionally in the unwritten extent conversion
   4229	 * path because the conversion might not have done so (e.g., if the
   4230	 * extent count hasn't changed). We need to make sure the inode is dirty
   4231	 * in the transaction for the sake of fsync(), even if nothing has
   4232	 * changed, because fsync() will not force the log for this transaction
   4233	 * unless it sees the inode pinned.
   4234	 *
   4235	 * Note: If we're only converting cow fork extents, there aren't
   4236	 * any on-disk updates to make, so we don't need to log anything.
   4237	 */
   4238	if (whichfork != XFS_COW_FORK)
   4239		bma->logflags |= tmp_logflags | XFS_ILOG_CORE;
   4240	if (error)
   4241		return error;
   4242
   4243	/*
   4244	 * Update our extent pointer, given that
   4245	 * xfs_bmap_add_extent_unwritten_real might have merged it into one
   4246	 * of the neighbouring ones.
   4247	 */
   4248	xfs_iext_get_extent(ifp, &bma->icur, &bma->got);
   4249
   4250	/*
   4251	 * We may have combined previously unwritten space with written space,
   4252	 * so generate another request.
   4253	 */
   4254	if (mval->br_blockcount < len)
   4255		return -EAGAIN;
   4256	return 0;
   4257}
   4258
   4259static inline xfs_extlen_t
   4260xfs_bmapi_minleft(
   4261	struct xfs_trans	*tp,
   4262	struct xfs_inode	*ip,
   4263	int			fork)
   4264{
   4265	struct xfs_ifork	*ifp = XFS_IFORK_PTR(ip, fork);
   4266
   4267	if (tp && tp->t_firstblock != NULLFSBLOCK)
   4268		return 0;
   4269	if (ifp->if_format != XFS_DINODE_FMT_BTREE)
   4270		return 1;
   4271	return be16_to_cpu(ifp->if_broot->bb_level) + 1;
   4272}
   4273
   4274/*
   4275 * Log whatever the flags say, even if error.  Otherwise we might miss detecting
   4276 * a case where the data is changed, there's an error, and it's not logged so we
   4277 * don't shutdown when we should.  Don't bother logging extents/btree changes if
   4278 * we converted to the other format.
   4279 */
   4280static void
   4281xfs_bmapi_finish(
   4282	struct xfs_bmalloca	*bma,
   4283	int			whichfork,
   4284	int			error)
   4285{
   4286	struct xfs_ifork	*ifp = XFS_IFORK_PTR(bma->ip, whichfork);
   4287
   4288	if ((bma->logflags & xfs_ilog_fext(whichfork)) &&
   4289	    ifp->if_format != XFS_DINODE_FMT_EXTENTS)
   4290		bma->logflags &= ~xfs_ilog_fext(whichfork);
   4291	else if ((bma->logflags & xfs_ilog_fbroot(whichfork)) &&
   4292		 ifp->if_format != XFS_DINODE_FMT_BTREE)
   4293		bma->logflags &= ~xfs_ilog_fbroot(whichfork);
   4294
   4295	if (bma->logflags)
   4296		xfs_trans_log_inode(bma->tp, bma->ip, bma->logflags);
   4297	if (bma->cur)
   4298		xfs_btree_del_cursor(bma->cur, error);
   4299}
   4300
   4301/*
   4302 * Map file blocks to filesystem blocks, and allocate blocks or convert the
   4303 * extent state if necessary.  Details behaviour is controlled by the flags
   4304 * parameter.  Only allocates blocks from a single allocation group, to avoid
   4305 * locking problems.
   4306 */
   4307int
   4308xfs_bmapi_write(
   4309	struct xfs_trans	*tp,		/* transaction pointer */
   4310	struct xfs_inode	*ip,		/* incore inode */
   4311	xfs_fileoff_t		bno,		/* starting file offs. mapped */
   4312	xfs_filblks_t		len,		/* length to map in file */
   4313	uint32_t		flags,		/* XFS_BMAPI_... */
   4314	xfs_extlen_t		total,		/* total blocks needed */
   4315	struct xfs_bmbt_irec	*mval,		/* output: map values */
   4316	int			*nmap)		/* i/o: mval size/count */
   4317{
   4318	struct xfs_bmalloca	bma = {
   4319		.tp		= tp,
   4320		.ip		= ip,
   4321		.total		= total,
   4322	};
   4323	struct xfs_mount	*mp = ip->i_mount;
   4324	int			whichfork = xfs_bmapi_whichfork(flags);
   4325	struct xfs_ifork	*ifp = XFS_IFORK_PTR(ip, whichfork);
   4326	xfs_fileoff_t		end;		/* end of mapped file region */
   4327	bool			eof = false;	/* after the end of extents */
   4328	int			error;		/* error return */
   4329	int			n;		/* current extent index */
   4330	xfs_fileoff_t		obno;		/* old block number (offset) */
   4331
   4332#ifdef DEBUG
   4333	xfs_fileoff_t		orig_bno;	/* original block number value */
   4334	int			orig_flags;	/* original flags arg value */
   4335	xfs_filblks_t		orig_len;	/* original value of len arg */
   4336	struct xfs_bmbt_irec	*orig_mval;	/* original value of mval */
   4337	int			orig_nmap;	/* original value of *nmap */
   4338
   4339	orig_bno = bno;
   4340	orig_len = len;
   4341	orig_flags = flags;
   4342	orig_mval = mval;
   4343	orig_nmap = *nmap;
   4344#endif
   4345
   4346	ASSERT(*nmap >= 1);
   4347	ASSERT(*nmap <= XFS_BMAP_MAX_NMAP);
   4348	ASSERT(tp != NULL);
   4349	ASSERT(len > 0);
   4350	ASSERT(ifp->if_format != XFS_DINODE_FMT_LOCAL);
   4351	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
   4352	ASSERT(!(flags & XFS_BMAPI_REMAP));
   4353
   4354	/* zeroing is for currently only for data extents, not metadata */
   4355	ASSERT((flags & (XFS_BMAPI_METADATA | XFS_BMAPI_ZERO)) !=
   4356			(XFS_BMAPI_METADATA | XFS_BMAPI_ZERO));
   4357	/*
   4358	 * we can allocate unwritten extents or pre-zero allocated blocks,
   4359	 * but it makes no sense to do both at once. This would result in
   4360	 * zeroing the unwritten extent twice, but it still being an
   4361	 * unwritten extent....
   4362	 */
   4363	ASSERT((flags & (XFS_BMAPI_PREALLOC | XFS_BMAPI_ZERO)) !=
   4364			(XFS_BMAPI_PREALLOC | XFS_BMAPI_ZERO));
   4365
   4366	if (XFS_IS_CORRUPT(mp, !xfs_ifork_has_extents(ifp)) ||
   4367	    XFS_TEST_ERROR(false, mp, XFS_ERRTAG_BMAPIFORMAT)) {
   4368		return -EFSCORRUPTED;
   4369	}
   4370
   4371	if (xfs_is_shutdown(mp))
   4372		return -EIO;
   4373
   4374	XFS_STATS_INC(mp, xs_blk_mapw);
   4375
   4376	error = xfs_iread_extents(tp, ip, whichfork);
   4377	if (error)
   4378		goto error0;
   4379
   4380	if (!xfs_iext_lookup_extent(ip, ifp, bno, &bma.icur, &bma.got))
   4381		eof = true;
   4382	if (!xfs_iext_peek_prev_extent(ifp, &bma.icur, &bma.prev))
   4383		bma.prev.br_startoff = NULLFILEOFF;
   4384	bma.minleft = xfs_bmapi_minleft(tp, ip, whichfork);
   4385
   4386	n = 0;
   4387	end = bno + len;
   4388	obno = bno;
   4389	while (bno < end && n < *nmap) {
   4390		bool			need_alloc = false, wasdelay = false;
   4391
   4392		/* in hole or beyond EOF? */
   4393		if (eof || bma.got.br_startoff > bno) {
   4394			/*
   4395			 * CoW fork conversions should /never/ hit EOF or
   4396			 * holes.  There should always be something for us
   4397			 * to work on.
   4398			 */
   4399			ASSERT(!((flags & XFS_BMAPI_CONVERT) &&
   4400			         (flags & XFS_BMAPI_COWFORK)));
   4401
   4402			need_alloc = true;
   4403		} else if (isnullstartblock(bma.got.br_startblock)) {
   4404			wasdelay = true;
   4405		}
   4406
   4407		/*
   4408		 * First, deal with the hole before the allocated space
   4409		 * that we found, if any.
   4410		 */
   4411		if (need_alloc || wasdelay) {
   4412			bma.eof = eof;
   4413			bma.conv = !!(flags & XFS_BMAPI_CONVERT);
   4414			bma.wasdel = wasdelay;
   4415			bma.offset = bno;
   4416			bma.flags = flags;
   4417
   4418			/*
   4419			 * There's a 32/64 bit type mismatch between the
   4420			 * allocation length request (which can be 64 bits in
   4421			 * length) and the bma length request, which is
   4422			 * xfs_extlen_t and therefore 32 bits. Hence we have to
   4423			 * check for 32-bit overflows and handle them here.
   4424			 */
   4425			if (len > (xfs_filblks_t)XFS_MAX_BMBT_EXTLEN)
   4426				bma.length = XFS_MAX_BMBT_EXTLEN;
   4427			else
   4428				bma.length = len;
   4429
   4430			ASSERT(len > 0);
   4431			ASSERT(bma.length > 0);
   4432			error = xfs_bmapi_allocate(&bma);
   4433			if (error)
   4434				goto error0;
   4435			if (bma.blkno == NULLFSBLOCK)
   4436				break;
   4437
   4438			/*
   4439			 * If this is a CoW allocation, record the data in
   4440			 * the refcount btree for orphan recovery.
   4441			 */
   4442			if (whichfork == XFS_COW_FORK)
   4443				xfs_refcount_alloc_cow_extent(tp, bma.blkno,
   4444						bma.length);
   4445		}
   4446
   4447		/* Deal with the allocated space we found.  */
   4448		xfs_bmapi_trim_map(mval, &bma.got, &bno, len, obno,
   4449							end, n, flags);
   4450
   4451		/* Execute unwritten extent conversion if necessary */
   4452		error = xfs_bmapi_convert_unwritten(&bma, mval, len, flags);
   4453		if (error == -EAGAIN)
   4454			continue;
   4455		if (error)
   4456			goto error0;
   4457
   4458		/* update the extent map to return */
   4459		xfs_bmapi_update_map(&mval, &bno, &len, obno, end, &n, flags);
   4460
   4461		/*
   4462		 * If we're done, stop now.  Stop when we've allocated
   4463		 * XFS_BMAP_MAX_NMAP extents no matter what.  Otherwise
   4464		 * the transaction may get too big.
   4465		 */
   4466		if (bno >= end || n >= *nmap || bma.nallocs >= *nmap)
   4467			break;
   4468
   4469		/* Else go on to the next record. */
   4470		bma.prev = bma.got;
   4471		if (!xfs_iext_next_extent(ifp, &bma.icur, &bma.got))
   4472			eof = true;
   4473	}
   4474	*nmap = n;
   4475
   4476	error = xfs_bmap_btree_to_extents(tp, ip, bma.cur, &bma.logflags,
   4477			whichfork);
   4478	if (error)
   4479		goto error0;
   4480
   4481	ASSERT(ifp->if_format != XFS_DINODE_FMT_BTREE ||
   4482	       ifp->if_nextents > XFS_IFORK_MAXEXT(ip, whichfork));
   4483	xfs_bmapi_finish(&bma, whichfork, 0);
   4484	xfs_bmap_validate_ret(orig_bno, orig_len, orig_flags, orig_mval,
   4485		orig_nmap, *nmap);
   4486	return 0;
   4487error0:
   4488	xfs_bmapi_finish(&bma, whichfork, error);
   4489	return error;
   4490}
   4491
   4492/*
   4493 * Convert an existing delalloc extent to real blocks based on file offset. This
   4494 * attempts to allocate the entire delalloc extent and may require multiple
   4495 * invocations to allocate the target offset if a large enough physical extent
   4496 * is not available.
   4497 */
   4498int
   4499xfs_bmapi_convert_delalloc(
   4500	struct xfs_inode	*ip,
   4501	int			whichfork,
   4502	xfs_off_t		offset,
   4503	struct iomap		*iomap,
   4504	unsigned int		*seq)
   4505{
   4506	struct xfs_ifork	*ifp = XFS_IFORK_PTR(ip, whichfork);
   4507	struct xfs_mount	*mp = ip->i_mount;
   4508	xfs_fileoff_t		offset_fsb = XFS_B_TO_FSBT(mp, offset);
   4509	struct xfs_bmalloca	bma = { NULL };
   4510	uint16_t		flags = 0;
   4511	struct xfs_trans	*tp;
   4512	int			error;
   4513
   4514	if (whichfork == XFS_COW_FORK)
   4515		flags |= IOMAP_F_SHARED;
   4516
   4517	/*
   4518	 * Space for the extent and indirect blocks was reserved when the
   4519	 * delalloc extent was created so there's no need to do so here.
   4520	 */
   4521	error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, 0, 0,
   4522				XFS_TRANS_RESERVE, &tp);
   4523	if (error)
   4524		return error;
   4525
   4526	xfs_ilock(ip, XFS_ILOCK_EXCL);
   4527	xfs_trans_ijoin(tp, ip, 0);
   4528
   4529	error = xfs_iext_count_may_overflow(ip, whichfork,
   4530			XFS_IEXT_ADD_NOSPLIT_CNT);
   4531	if (error == -EFBIG)
   4532		error = xfs_iext_count_upgrade(tp, ip,
   4533				XFS_IEXT_ADD_NOSPLIT_CNT);
   4534	if (error)
   4535		goto out_trans_cancel;
   4536
   4537	if (!xfs_iext_lookup_extent(ip, ifp, offset_fsb, &bma.icur, &bma.got) ||
   4538	    bma.got.br_startoff > offset_fsb) {
   4539		/*
   4540		 * No extent found in the range we are trying to convert.  This
   4541		 * should only happen for the COW fork, where another thread
   4542		 * might have moved the extent to the data fork in the meantime.
   4543		 */
   4544		WARN_ON_ONCE(whichfork != XFS_COW_FORK);
   4545		error = -EAGAIN;
   4546		goto out_trans_cancel;
   4547	}
   4548
   4549	/*
   4550	 * If we find a real extent here we raced with another thread converting
   4551	 * the extent.  Just return the real extent at this offset.
   4552	 */
   4553	if (!isnullstartblock(bma.got.br_startblock)) {
   4554		xfs_bmbt_to_iomap(ip, iomap, &bma.got, 0, flags);
   4555		*seq = READ_ONCE(ifp->if_seq);
   4556		goto out_trans_cancel;
   4557	}
   4558
   4559	bma.tp = tp;
   4560	bma.ip = ip;
   4561	bma.wasdel = true;
   4562	bma.offset = bma.got.br_startoff;
   4563	bma.length = max_t(xfs_filblks_t, bma.got.br_blockcount,
   4564			XFS_MAX_BMBT_EXTLEN);
   4565	bma.minleft = xfs_bmapi_minleft(tp, ip, whichfork);
   4566
   4567	/*
   4568	 * When we're converting the delalloc reservations backing dirty pages
   4569	 * in the page cache, we must be careful about how we create the new
   4570	 * extents:
   4571	 *
   4572	 * New CoW fork extents are created unwritten, turned into real extents
   4573	 * when we're about to write the data to disk, and mapped into the data
   4574	 * fork after the write finishes.  End of story.
   4575	 *
   4576	 * New data fork extents must be mapped in as unwritten and converted
   4577	 * to real extents after the write succeeds to avoid exposing stale
   4578	 * disk contents if we crash.
   4579	 */
   4580	bma.flags = XFS_BMAPI_PREALLOC;
   4581	if (whichfork == XFS_COW_FORK)
   4582		bma.flags |= XFS_BMAPI_COWFORK;
   4583
   4584	if (!xfs_iext_peek_prev_extent(ifp, &bma.icur, &bma.prev))
   4585		bma.prev.br_startoff = NULLFILEOFF;
   4586
   4587	error = xfs_bmapi_allocate(&bma);
   4588	if (error)
   4589		goto out_finish;
   4590
   4591	error = -ENOSPC;
   4592	if (WARN_ON_ONCE(bma.blkno == NULLFSBLOCK))
   4593		goto out_finish;
   4594	error = -EFSCORRUPTED;
   4595	if (WARN_ON_ONCE(!xfs_valid_startblock(ip, bma.got.br_startblock)))
   4596		goto out_finish;
   4597
   4598	XFS_STATS_ADD(mp, xs_xstrat_bytes, XFS_FSB_TO_B(mp, bma.length));
   4599	XFS_STATS_INC(mp, xs_xstrat_quick);
   4600
   4601	ASSERT(!isnullstartblock(bma.got.br_startblock));
   4602	xfs_bmbt_to_iomap(ip, iomap, &bma.got, 0, flags);
   4603	*seq = READ_ONCE(ifp->if_seq);
   4604
   4605	if (whichfork == XFS_COW_FORK)
   4606		xfs_refcount_alloc_cow_extent(tp, bma.blkno, bma.length);
   4607
   4608	error = xfs_bmap_btree_to_extents(tp, ip, bma.cur, &bma.logflags,
   4609			whichfork);
   4610	if (error)
   4611		goto out_finish;
   4612
   4613	xfs_bmapi_finish(&bma, whichfork, 0);
   4614	error = xfs_trans_commit(tp);
   4615	xfs_iunlock(ip, XFS_ILOCK_EXCL);
   4616	return error;
   4617
   4618out_finish:
   4619	xfs_bmapi_finish(&bma, whichfork, error);
   4620out_trans_cancel:
   4621	xfs_trans_cancel(tp);
   4622	xfs_iunlock(ip, XFS_ILOCK_EXCL);
   4623	return error;
   4624}
   4625
   4626int
   4627xfs_bmapi_remap(
   4628	struct xfs_trans	*tp,
   4629	struct xfs_inode	*ip,
   4630	xfs_fileoff_t		bno,
   4631	xfs_filblks_t		len,
   4632	xfs_fsblock_t		startblock,
   4633	uint32_t		flags)
   4634{
   4635	struct xfs_mount	*mp = ip->i_mount;
   4636	struct xfs_ifork	*ifp;
   4637	struct xfs_btree_cur	*cur = NULL;
   4638	struct xfs_bmbt_irec	got;
   4639	struct xfs_iext_cursor	icur;
   4640	int			whichfork = xfs_bmapi_whichfork(flags);
   4641	int			logflags = 0, error;
   4642
   4643	ifp = XFS_IFORK_PTR(ip, whichfork);
   4644	ASSERT(len > 0);
   4645	ASSERT(len <= (xfs_filblks_t)XFS_MAX_BMBT_EXTLEN);
   4646	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
   4647	ASSERT(!(flags & ~(XFS_BMAPI_ATTRFORK | XFS_BMAPI_PREALLOC |
   4648			   XFS_BMAPI_NORMAP)));
   4649	ASSERT((flags & (XFS_BMAPI_ATTRFORK | XFS_BMAPI_PREALLOC)) !=
   4650			(XFS_BMAPI_ATTRFORK | XFS_BMAPI_PREALLOC));
   4651
   4652	if (XFS_IS_CORRUPT(mp, !xfs_ifork_has_extents(ifp)) ||
   4653	    XFS_TEST_ERROR(false, mp, XFS_ERRTAG_BMAPIFORMAT)) {
   4654		return -EFSCORRUPTED;
   4655	}
   4656
   4657	if (xfs_is_shutdown(mp))
   4658		return -EIO;
   4659
   4660	error = xfs_iread_extents(tp, ip, whichfork);
   4661	if (error)
   4662		return error;
   4663
   4664	if (xfs_iext_lookup_extent(ip, ifp, bno, &icur, &got)) {
   4665		/* make sure we only reflink into a hole. */
   4666		ASSERT(got.br_startoff > bno);
   4667		ASSERT(got.br_startoff - bno >= len);
   4668	}
   4669
   4670	ip->i_nblocks += len;
   4671	xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
   4672
   4673	if (ifp->if_format == XFS_DINODE_FMT_BTREE) {
   4674		cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork);
   4675		cur->bc_ino.flags = 0;
   4676	}
   4677
   4678	got.br_startoff = bno;
   4679	got.br_startblock = startblock;
   4680	got.br_blockcount = len;
   4681	if (flags & XFS_BMAPI_PREALLOC)
   4682		got.br_state = XFS_EXT_UNWRITTEN;
   4683	else
   4684		got.br_state = XFS_EXT_NORM;
   4685
   4686	error = xfs_bmap_add_extent_hole_real(tp, ip, whichfork, &icur,
   4687			&cur, &got, &logflags, flags);
   4688	if (error)
   4689		goto error0;
   4690
   4691	error = xfs_bmap_btree_to_extents(tp, ip, cur, &logflags, whichfork);
   4692
   4693error0:
   4694	if (ip->i_df.if_format != XFS_DINODE_FMT_EXTENTS)
   4695		logflags &= ~XFS_ILOG_DEXT;
   4696	else if (ip->i_df.if_format != XFS_DINODE_FMT_BTREE)
   4697		logflags &= ~XFS_ILOG_DBROOT;
   4698
   4699	if (logflags)
   4700		xfs_trans_log_inode(tp, ip, logflags);
   4701	if (cur)
   4702		xfs_btree_del_cursor(cur, error);
   4703	return error;
   4704}
   4705
   4706/*
   4707 * When a delalloc extent is split (e.g., due to a hole punch), the original
   4708 * indlen reservation must be shared across the two new extents that are left
   4709 * behind.
   4710 *
   4711 * Given the original reservation and the worst case indlen for the two new
   4712 * extents (as calculated by xfs_bmap_worst_indlen()), split the original
   4713 * reservation fairly across the two new extents. If necessary, steal available
   4714 * blocks from a deleted extent to make up a reservation deficiency (e.g., if
   4715 * ores == 1). The number of stolen blocks is returned. The availability and
   4716 * subsequent accounting of stolen blocks is the responsibility of the caller.
   4717 */
   4718static xfs_filblks_t
   4719xfs_bmap_split_indlen(
   4720	xfs_filblks_t			ores,		/* original res. */
   4721	xfs_filblks_t			*indlen1,	/* ext1 worst indlen */
   4722	xfs_filblks_t			*indlen2,	/* ext2 worst indlen */
   4723	xfs_filblks_t			avail)		/* stealable blocks */
   4724{
   4725	xfs_filblks_t			len1 = *indlen1;
   4726	xfs_filblks_t			len2 = *indlen2;
   4727	xfs_filblks_t			nres = len1 + len2; /* new total res. */
   4728	xfs_filblks_t			stolen = 0;
   4729	xfs_filblks_t			resfactor;
   4730
   4731	/*
   4732	 * Steal as many blocks as we can to try and satisfy the worst case
   4733	 * indlen for both new extents.
   4734	 */
   4735	if (ores < nres && avail)
   4736		stolen = XFS_FILBLKS_MIN(nres - ores, avail);
   4737	ores += stolen;
   4738
   4739	 /* nothing else to do if we've satisfied the new reservation */
   4740	if (ores >= nres)
   4741		return stolen;
   4742
   4743	/*
   4744	 * We can't meet the total required reservation for the two extents.
   4745	 * Calculate the percent of the overall shortage between both extents
   4746	 * and apply this percentage to each of the requested indlen values.
   4747	 * This distributes the shortage fairly and reduces the chances that one
   4748	 * of the two extents is left with nothing when extents are repeatedly
   4749	 * split.
   4750	 */
   4751	resfactor = (ores * 100);
   4752	do_div(resfactor, nres);
   4753	len1 *= resfactor;
   4754	do_div(len1, 100);
   4755	len2 *= resfactor;
   4756	do_div(len2, 100);
   4757	ASSERT(len1 + len2 <= ores);
   4758	ASSERT(len1 < *indlen1 && len2 < *indlen2);
   4759
   4760	/*
   4761	 * Hand out the remainder to each extent. If one of the two reservations
   4762	 * is zero, we want to make sure that one gets a block first. The loop
   4763	 * below starts with len1, so hand len2 a block right off the bat if it
   4764	 * is zero.
   4765	 */
   4766	ores -= (len1 + len2);
   4767	ASSERT((*indlen1 - len1) + (*indlen2 - len2) >= ores);
   4768	if (ores && !len2 && *indlen2) {
   4769		len2++;
   4770		ores--;
   4771	}
   4772	while (ores) {
   4773		if (len1 < *indlen1) {
   4774			len1++;
   4775			ores--;
   4776		}
   4777		if (!ores)
   4778			break;
   4779		if (len2 < *indlen2) {
   4780			len2++;
   4781			ores--;
   4782		}
   4783	}
   4784
   4785	*indlen1 = len1;
   4786	*indlen2 = len2;
   4787
   4788	return stolen;
   4789}
   4790
   4791int
   4792xfs_bmap_del_extent_delay(
   4793	struct xfs_inode	*ip,
   4794	int			whichfork,
   4795	struct xfs_iext_cursor	*icur,
   4796	struct xfs_bmbt_irec	*got,
   4797	struct xfs_bmbt_irec	*del)
   4798{
   4799	struct xfs_mount	*mp = ip->i_mount;
   4800	struct xfs_ifork	*ifp = XFS_IFORK_PTR(ip, whichfork);
   4801	struct xfs_bmbt_irec	new;
   4802	int64_t			da_old, da_new, da_diff = 0;
   4803	xfs_fileoff_t		del_endoff, got_endoff;
   4804	xfs_filblks_t		got_indlen, new_indlen, stolen;
   4805	uint32_t		state = xfs_bmap_fork_to_state(whichfork);
   4806	int			error = 0;
   4807	bool			isrt;
   4808
   4809	XFS_STATS_INC(mp, xs_del_exlist);
   4810
   4811	isrt = (whichfork == XFS_DATA_FORK) && XFS_IS_REALTIME_INODE(ip);
   4812	del_endoff = del->br_startoff + del->br_blockcount;
   4813	got_endoff = got->br_startoff + got->br_blockcount;
   4814	da_old = startblockval(got->br_startblock);
   4815	da_new = 0;
   4816
   4817	ASSERT(del->br_blockcount > 0);
   4818	ASSERT(got->br_startoff <= del->br_startoff);
   4819	ASSERT(got_endoff >= del_endoff);
   4820
   4821	if (isrt) {
   4822		uint64_t rtexts = XFS_FSB_TO_B(mp, del->br_blockcount);
   4823
   4824		do_div(rtexts, mp->m_sb.sb_rextsize);
   4825		xfs_mod_frextents(mp, rtexts);
   4826	}
   4827
   4828	/*
   4829	 * Update the inode delalloc counter now and wait to update the
   4830	 * sb counters as we might have to borrow some blocks for the
   4831	 * indirect block accounting.
   4832	 */
   4833	ASSERT(!isrt);
   4834	error = xfs_quota_unreserve_blkres(ip, del->br_blockcount);
   4835	if (error)
   4836		return error;
   4837	ip->i_delayed_blks -= del->br_blockcount;
   4838
   4839	if (got->br_startoff == del->br_startoff)
   4840		state |= BMAP_LEFT_FILLING;
   4841	if (got_endoff == del_endoff)
   4842		state |= BMAP_RIGHT_FILLING;
   4843
   4844	switch (state & (BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING)) {
   4845	case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING:
   4846		/*
   4847		 * Matches the whole extent.  Delete the entry.
   4848		 */
   4849		xfs_iext_remove(ip, icur, state);
   4850		xfs_iext_prev(ifp, icur);
   4851		break;
   4852	case BMAP_LEFT_FILLING:
   4853		/*
   4854		 * Deleting the first part of the extent.
   4855		 */
   4856		got->br_startoff = del_endoff;
   4857		got->br_blockcount -= del->br_blockcount;
   4858		da_new = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip,
   4859				got->br_blockcount), da_old);
   4860		got->br_startblock = nullstartblock((int)da_new);
   4861		xfs_iext_update_extent(ip, state, icur, got);
   4862		break;
   4863	case BMAP_RIGHT_FILLING:
   4864		/*
   4865		 * Deleting the last part of the extent.
   4866		 */
   4867		got->br_blockcount = got->br_blockcount - del->br_blockcount;
   4868		da_new = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip,
   4869				got->br_blockcount), da_old);
   4870		got->br_startblock = nullstartblock((int)da_new);
   4871		xfs_iext_update_extent(ip, state, icur, got);
   4872		break;
   4873	case 0:
   4874		/*
   4875		 * Deleting the middle of the extent.
   4876		 *
   4877		 * Distribute the original indlen reservation across the two new
   4878		 * extents.  Steal blocks from the deleted extent if necessary.
   4879		 * Stealing blocks simply fudges the fdblocks accounting below.
   4880		 * Warn if either of the new indlen reservations is zero as this
   4881		 * can lead to delalloc problems.
   4882		 */
   4883		got->br_blockcount = del->br_startoff - got->br_startoff;
   4884		got_indlen = xfs_bmap_worst_indlen(ip, got->br_blockcount);
   4885
   4886		new.br_blockcount = got_endoff - del_endoff;
   4887		new_indlen = xfs_bmap_worst_indlen(ip, new.br_blockcount);
   4888
   4889		WARN_ON_ONCE(!got_indlen || !new_indlen);
   4890		stolen = xfs_bmap_split_indlen(da_old, &got_indlen, &new_indlen,
   4891						       del->br_blockcount);
   4892
   4893		got->br_startblock = nullstartblock((int)got_indlen);
   4894
   4895		new.br_startoff = del_endoff;
   4896		new.br_state = got->br_state;
   4897		new.br_startblock = nullstartblock((int)new_indlen);
   4898
   4899		xfs_iext_update_extent(ip, state, icur, got);
   4900		xfs_iext_next(ifp, icur);
   4901		xfs_iext_insert(ip, icur, &new, state);
   4902
   4903		da_new = got_indlen + new_indlen - stolen;
   4904		del->br_blockcount -= stolen;
   4905		break;
   4906	}
   4907
   4908	ASSERT(da_old >= da_new);
   4909	da_diff = da_old - da_new;
   4910	if (!isrt)
   4911		da_diff += del->br_blockcount;
   4912	if (da_diff) {
   4913		xfs_mod_fdblocks(mp, da_diff, false);
   4914		xfs_mod_delalloc(mp, -da_diff);
   4915	}
   4916	return error;
   4917}
   4918
   4919void
   4920xfs_bmap_del_extent_cow(
   4921	struct xfs_inode	*ip,
   4922	struct xfs_iext_cursor	*icur,
   4923	struct xfs_bmbt_irec	*got,
   4924	struct xfs_bmbt_irec	*del)
   4925{
   4926	struct xfs_mount	*mp = ip->i_mount;
   4927	struct xfs_ifork	*ifp = XFS_IFORK_PTR(ip, XFS_COW_FORK);
   4928	struct xfs_bmbt_irec	new;
   4929	xfs_fileoff_t		del_endoff, got_endoff;
   4930	uint32_t		state = BMAP_COWFORK;
   4931
   4932	XFS_STATS_INC(mp, xs_del_exlist);
   4933
   4934	del_endoff = del->br_startoff + del->br_blockcount;
   4935	got_endoff = got->br_startoff + got->br_blockcount;
   4936
   4937	ASSERT(del->br_blockcount > 0);
   4938	ASSERT(got->br_startoff <= del->br_startoff);
   4939	ASSERT(got_endoff >= del_endoff);
   4940	ASSERT(!isnullstartblock(got->br_startblock));
   4941
   4942	if (got->br_startoff == del->br_startoff)
   4943		state |= BMAP_LEFT_FILLING;
   4944	if (got_endoff == del_endoff)
   4945		state |= BMAP_RIGHT_FILLING;
   4946
   4947	switch (state & (BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING)) {
   4948	case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING:
   4949		/*
   4950		 * Matches the whole extent.  Delete the entry.
   4951		 */
   4952		xfs_iext_remove(ip, icur, state);
   4953		xfs_iext_prev(ifp, icur);
   4954		break;
   4955	case BMAP_LEFT_FILLING:
   4956		/*
   4957		 * Deleting the first part of the extent.
   4958		 */
   4959		got->br_startoff = del_endoff;
   4960		got->br_blockcount -= del->br_blockcount;
   4961		got->br_startblock = del->br_startblock + del->br_blockcount;
   4962		xfs_iext_update_extent(ip, state, icur, got);
   4963		break;
   4964	case BMAP_RIGHT_FILLING:
   4965		/*
   4966		 * Deleting the last part of the extent.
   4967		 */
   4968		got->br_blockcount -= del->br_blockcount;
   4969		xfs_iext_update_extent(ip, state, icur, got);
   4970		break;
   4971	case 0:
   4972		/*
   4973		 * Deleting the middle of the extent.
   4974		 */
   4975		got->br_blockcount = del->br_startoff - got->br_startoff;
   4976
   4977		new.br_startoff = del_endoff;
   4978		new.br_blockcount = got_endoff - del_endoff;
   4979		new.br_state = got->br_state;
   4980		new.br_startblock = del->br_startblock + del->br_blockcount;
   4981
   4982		xfs_iext_update_extent(ip, state, icur, got);
   4983		xfs_iext_next(ifp, icur);
   4984		xfs_iext_insert(ip, icur, &new, state);
   4985		break;
   4986	}
   4987	ip->i_delayed_blks -= del->br_blockcount;
   4988}
   4989
   4990/*
   4991 * Called by xfs_bmapi to update file extent records and the btree
   4992 * after removing space.
   4993 */
   4994STATIC int				/* error */
   4995xfs_bmap_del_extent_real(
   4996	xfs_inode_t		*ip,	/* incore inode pointer */
   4997	xfs_trans_t		*tp,	/* current transaction pointer */
   4998	struct xfs_iext_cursor	*icur,
   4999	struct xfs_btree_cur	*cur,	/* if null, not a btree */
   5000	xfs_bmbt_irec_t		*del,	/* data to remove from extents */
   5001	int			*logflagsp, /* inode logging flags */
   5002	int			whichfork, /* data or attr fork */
   5003	uint32_t		bflags)	/* bmapi flags */
   5004{
   5005	xfs_fsblock_t		del_endblock=0;	/* first block past del */
   5006	xfs_fileoff_t		del_endoff;	/* first offset past del */
   5007	int			do_fx;	/* free extent at end of routine */
   5008	int			error;	/* error return value */
   5009	int			flags = 0;/* inode logging flags */
   5010	struct xfs_bmbt_irec	got;	/* current extent entry */
   5011	xfs_fileoff_t		got_endoff;	/* first offset past got */
   5012	int			i;	/* temp state */
   5013	struct xfs_ifork	*ifp;	/* inode fork pointer */
   5014	xfs_mount_t		*mp;	/* mount structure */
   5015	xfs_filblks_t		nblks;	/* quota/sb block count */
   5016	xfs_bmbt_irec_t		new;	/* new record to be inserted */
   5017	/* REFERENCED */
   5018	uint			qfield;	/* quota field to update */
   5019	uint32_t		state = xfs_bmap_fork_to_state(whichfork);
   5020	struct xfs_bmbt_irec	old;
   5021
   5022	mp = ip->i_mount;
   5023	XFS_STATS_INC(mp, xs_del_exlist);
   5024
   5025	ifp = XFS_IFORK_PTR(ip, whichfork);
   5026	ASSERT(del->br_blockcount > 0);
   5027	xfs_iext_get_extent(ifp, icur, &got);
   5028	ASSERT(got.br_startoff <= del->br_startoff);
   5029	del_endoff = del->br_startoff + del->br_blockcount;
   5030	got_endoff = got.br_startoff + got.br_blockcount;
   5031	ASSERT(got_endoff >= del_endoff);
   5032	ASSERT(!isnullstartblock(got.br_startblock));
   5033	qfield = 0;
   5034	error = 0;
   5035
   5036	/*
   5037	 * If it's the case where the directory code is running with no block
   5038	 * reservation, and the deleted block is in the middle of its extent,
   5039	 * and the resulting insert of an extent would cause transformation to
   5040	 * btree format, then reject it.  The calling code will then swap blocks
   5041	 * around instead.  We have to do this now, rather than waiting for the
   5042	 * conversion to btree format, since the transaction will be dirty then.
   5043	 */
   5044	if (tp->t_blk_res == 0 &&
   5045	    ifp->if_format == XFS_DINODE_FMT_EXTENTS &&
   5046	    ifp->if_nextents >= XFS_IFORK_MAXEXT(ip, whichfork) &&
   5047	    del->br_startoff > got.br_startoff && del_endoff < got_endoff)
   5048		return -ENOSPC;
   5049
   5050	flags = XFS_ILOG_CORE;
   5051	if (whichfork == XFS_DATA_FORK && XFS_IS_REALTIME_INODE(ip)) {
   5052		xfs_filblks_t	len;
   5053		xfs_extlen_t	mod;
   5054
   5055		len = div_u64_rem(del->br_blockcount, mp->m_sb.sb_rextsize,
   5056				  &mod);
   5057		ASSERT(mod == 0);
   5058
   5059		if (!(bflags & XFS_BMAPI_REMAP)) {
   5060			xfs_fsblock_t	bno;
   5061
   5062			bno = div_u64_rem(del->br_startblock,
   5063					mp->m_sb.sb_rextsize, &mod);
   5064			ASSERT(mod == 0);
   5065
   5066			error = xfs_rtfree_extent(tp, bno, (xfs_extlen_t)len);
   5067			if (error)
   5068				goto done;
   5069		}
   5070
   5071		do_fx = 0;
   5072		nblks = len * mp->m_sb.sb_rextsize;
   5073		qfield = XFS_TRANS_DQ_RTBCOUNT;
   5074	} else {
   5075		do_fx = 1;
   5076		nblks = del->br_blockcount;
   5077		qfield = XFS_TRANS_DQ_BCOUNT;
   5078	}
   5079
   5080	del_endblock = del->br_startblock + del->br_blockcount;
   5081	if (cur) {
   5082		error = xfs_bmbt_lookup_eq(cur, &got, &i);
   5083		if (error)
   5084			goto done;
   5085		if (XFS_IS_CORRUPT(mp, i != 1)) {
   5086			error = -EFSCORRUPTED;
   5087			goto done;
   5088		}
   5089	}
   5090
   5091	if (got.br_startoff == del->br_startoff)
   5092		state |= BMAP_LEFT_FILLING;
   5093	if (got_endoff == del_endoff)
   5094		state |= BMAP_RIGHT_FILLING;
   5095
   5096	switch (state & (BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING)) {
   5097	case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING:
   5098		/*
   5099		 * Matches the whole extent.  Delete the entry.
   5100		 */
   5101		xfs_iext_remove(ip, icur, state);
   5102		xfs_iext_prev(ifp, icur);
   5103		ifp->if_nextents--;
   5104
   5105		flags |= XFS_ILOG_CORE;
   5106		if (!cur) {
   5107			flags |= xfs_ilog_fext(whichfork);
   5108			break;
   5109		}
   5110		if ((error = xfs_btree_delete(cur, &i)))
   5111			goto done;
   5112		if (XFS_IS_CORRUPT(mp, i != 1)) {
   5113			error = -EFSCORRUPTED;
   5114			goto done;
   5115		}
   5116		break;
   5117	case BMAP_LEFT_FILLING:
   5118		/*
   5119		 * Deleting the first part of the extent.
   5120		 */
   5121		got.br_startoff = del_endoff;
   5122		got.br_startblock = del_endblock;
   5123		got.br_blockcount -= del->br_blockcount;
   5124		xfs_iext_update_extent(ip, state, icur, &got);
   5125		if (!cur) {
   5126			flags |= xfs_ilog_fext(whichfork);
   5127			break;
   5128		}
   5129		error = xfs_bmbt_update(cur, &got);
   5130		if (error)
   5131			goto done;
   5132		break;
   5133	case BMAP_RIGHT_FILLING:
   5134		/*
   5135		 * Deleting the last part of the extent.
   5136		 */
   5137		got.br_blockcount -= del->br_blockcount;
   5138		xfs_iext_update_extent(ip, state, icur, &got);
   5139		if (!cur) {
   5140			flags |= xfs_ilog_fext(whichfork);
   5141			break;
   5142		}
   5143		error = xfs_bmbt_update(cur, &got);
   5144		if (error)
   5145			goto done;
   5146		break;
   5147	case 0:
   5148		/*
   5149		 * Deleting the middle of the extent.
   5150		 */
   5151
   5152		old = got;
   5153
   5154		got.br_blockcount = del->br_startoff - got.br_startoff;
   5155		xfs_iext_update_extent(ip, state, icur, &got);
   5156
   5157		new.br_startoff = del_endoff;
   5158		new.br_blockcount = got_endoff - del_endoff;
   5159		new.br_state = got.br_state;
   5160		new.br_startblock = del_endblock;
   5161
   5162		flags |= XFS_ILOG_CORE;
   5163		if (cur) {
   5164			error = xfs_bmbt_update(cur, &got);
   5165			if (error)
   5166				goto done;
   5167			error = xfs_btree_increment(cur, 0, &i);
   5168			if (error)
   5169				goto done;
   5170			cur->bc_rec.b = new;
   5171			error = xfs_btree_insert(cur, &i);
   5172			if (error && error != -ENOSPC)
   5173				goto done;
   5174			/*
   5175			 * If get no-space back from btree insert, it tried a
   5176			 * split, and we have a zero block reservation.  Fix up
   5177			 * our state and return the error.
   5178			 */
   5179			if (error == -ENOSPC) {
   5180				/*
   5181				 * Reset the cursor, don't trust it after any
   5182				 * insert operation.
   5183				 */
   5184				error = xfs_bmbt_lookup_eq(cur, &got, &i);
   5185				if (error)
   5186					goto done;
   5187				if (XFS_IS_CORRUPT(mp, i != 1)) {
   5188					error = -EFSCORRUPTED;
   5189					goto done;
   5190				}
   5191				/*
   5192				 * Update the btree record back
   5193				 * to the original value.
   5194				 */
   5195				error = xfs_bmbt_update(cur, &old);
   5196				if (error)
   5197					goto done;
   5198				/*
   5199				 * Reset the extent record back
   5200				 * to the original value.
   5201				 */
   5202				xfs_iext_update_extent(ip, state, icur, &old);
   5203				flags = 0;
   5204				error = -ENOSPC;
   5205				goto done;
   5206			}
   5207			if (XFS_IS_CORRUPT(mp, i != 1)) {
   5208				error = -EFSCORRUPTED;
   5209				goto done;
   5210			}
   5211		} else
   5212			flags |= xfs_ilog_fext(whichfork);
   5213
   5214		ifp->if_nextents++;
   5215		xfs_iext_next(ifp, icur);
   5216		xfs_iext_insert(ip, icur, &new, state);
   5217		break;
   5218	}
   5219
   5220	/* remove reverse mapping */
   5221	xfs_rmap_unmap_extent(tp, ip, whichfork, del);
   5222
   5223	/*
   5224	 * If we need to, add to list of extents to delete.
   5225	 */
   5226	if (do_fx && !(bflags & XFS_BMAPI_REMAP)) {
   5227		if (xfs_is_reflink_inode(ip) && whichfork == XFS_DATA_FORK) {
   5228			xfs_refcount_decrease_extent(tp, del);
   5229		} else {
   5230			__xfs_free_extent_later(tp, del->br_startblock,
   5231					del->br_blockcount, NULL,
   5232					(bflags & XFS_BMAPI_NODISCARD) ||
   5233					del->br_state == XFS_EXT_UNWRITTEN);
   5234		}
   5235	}
   5236
   5237	/*
   5238	 * Adjust inode # blocks in the file.
   5239	 */
   5240	if (nblks)
   5241		ip->i_nblocks -= nblks;
   5242	/*
   5243	 * Adjust quota data.
   5244	 */
   5245	if (qfield && !(bflags & XFS_BMAPI_REMAP))
   5246		xfs_trans_mod_dquot_byino(tp, ip, qfield, (long)-nblks);
   5247
   5248done:
   5249	*logflagsp = flags;
   5250	return error;
   5251}
   5252
   5253/*
   5254 * Unmap (remove) blocks from a file.
   5255 * If nexts is nonzero then the number of extents to remove is limited to
   5256 * that value.  If not all extents in the block range can be removed then
   5257 * *done is set.
   5258 */
   5259int						/* error */
   5260__xfs_bunmapi(
   5261	struct xfs_trans	*tp,		/* transaction pointer */
   5262	struct xfs_inode	*ip,		/* incore inode */
   5263	xfs_fileoff_t		start,		/* first file offset deleted */
   5264	xfs_filblks_t		*rlen,		/* i/o: amount remaining */
   5265	uint32_t		flags,		/* misc flags */
   5266	xfs_extnum_t		nexts)		/* number of extents max */
   5267{
   5268	struct xfs_btree_cur	*cur;		/* bmap btree cursor */
   5269	struct xfs_bmbt_irec	del;		/* extent being deleted */
   5270	int			error;		/* error return value */
   5271	xfs_extnum_t		extno;		/* extent number in list */
   5272	struct xfs_bmbt_irec	got;		/* current extent record */
   5273	struct xfs_ifork	*ifp;		/* inode fork pointer */
   5274	int			isrt;		/* freeing in rt area */
   5275	int			logflags;	/* transaction logging flags */
   5276	xfs_extlen_t		mod;		/* rt extent offset */
   5277	struct xfs_mount	*mp = ip->i_mount;
   5278	int			tmp_logflags;	/* partial logging flags */
   5279	int			wasdel;		/* was a delayed alloc extent */
   5280	int			whichfork;	/* data or attribute fork */
   5281	xfs_fsblock_t		sum;
   5282	xfs_filblks_t		len = *rlen;	/* length to unmap in file */
   5283	xfs_fileoff_t		end;
   5284	struct xfs_iext_cursor	icur;
   5285	bool			done = false;
   5286
   5287	trace_xfs_bunmap(ip, start, len, flags, _RET_IP_);
   5288
   5289	whichfork = xfs_bmapi_whichfork(flags);
   5290	ASSERT(whichfork != XFS_COW_FORK);
   5291	ifp = XFS_IFORK_PTR(ip, whichfork);
   5292	if (XFS_IS_CORRUPT(mp, !xfs_ifork_has_extents(ifp)))
   5293		return -EFSCORRUPTED;
   5294	if (xfs_is_shutdown(mp))
   5295		return -EIO;
   5296
   5297	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
   5298	ASSERT(len > 0);
   5299	ASSERT(nexts >= 0);
   5300
   5301	error = xfs_iread_extents(tp, ip, whichfork);
   5302	if (error)
   5303		return error;
   5304
   5305	if (xfs_iext_count(ifp) == 0) {
   5306		*rlen = 0;
   5307		return 0;
   5308	}
   5309	XFS_STATS_INC(mp, xs_blk_unmap);
   5310	isrt = (whichfork == XFS_DATA_FORK) && XFS_IS_REALTIME_INODE(ip);
   5311	end = start + len;
   5312
   5313	if (!xfs_iext_lookup_extent_before(ip, ifp, &end, &icur, &got)) {
   5314		*rlen = 0;
   5315		return 0;
   5316	}
   5317	end--;
   5318
   5319	logflags = 0;
   5320	if (ifp->if_format == XFS_DINODE_FMT_BTREE) {
   5321		ASSERT(ifp->if_format == XFS_DINODE_FMT_BTREE);
   5322		cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork);
   5323		cur->bc_ino.flags = 0;
   5324	} else
   5325		cur = NULL;
   5326
   5327	if (isrt) {
   5328		/*
   5329		 * Synchronize by locking the bitmap inode.
   5330		 */
   5331		xfs_ilock(mp->m_rbmip, XFS_ILOCK_EXCL|XFS_ILOCK_RTBITMAP);
   5332		xfs_trans_ijoin(tp, mp->m_rbmip, XFS_ILOCK_EXCL);
   5333		xfs_ilock(mp->m_rsumip, XFS_ILOCK_EXCL|XFS_ILOCK_RTSUM);
   5334		xfs_trans_ijoin(tp, mp->m_rsumip, XFS_ILOCK_EXCL);
   5335	}
   5336
   5337	extno = 0;
   5338	while (end != (xfs_fileoff_t)-1 && end >= start &&
   5339	       (nexts == 0 || extno < nexts)) {
   5340		/*
   5341		 * Is the found extent after a hole in which end lives?
   5342		 * Just back up to the previous extent, if so.
   5343		 */
   5344		if (got.br_startoff > end &&
   5345		    !xfs_iext_prev_extent(ifp, &icur, &got)) {
   5346			done = true;
   5347			break;
   5348		}
   5349		/*
   5350		 * Is the last block of this extent before the range
   5351		 * we're supposed to delete?  If so, we're done.
   5352		 */
   5353		end = XFS_FILEOFF_MIN(end,
   5354			got.br_startoff + got.br_blockcount - 1);
   5355		if (end < start)
   5356			break;
   5357		/*
   5358		 * Then deal with the (possibly delayed) allocated space
   5359		 * we found.
   5360		 */
   5361		del = got;
   5362		wasdel = isnullstartblock(del.br_startblock);
   5363
   5364		if (got.br_startoff < start) {
   5365			del.br_startoff = start;
   5366			del.br_blockcount -= start - got.br_startoff;
   5367			if (!wasdel)
   5368				del.br_startblock += start - got.br_startoff;
   5369		}
   5370		if (del.br_startoff + del.br_blockcount > end + 1)
   5371			del.br_blockcount = end + 1 - del.br_startoff;
   5372
   5373		if (!isrt)
   5374			goto delete;
   5375
   5376		sum = del.br_startblock + del.br_blockcount;
   5377		div_u64_rem(sum, mp->m_sb.sb_rextsize, &mod);
   5378		if (mod) {
   5379			/*
   5380			 * Realtime extent not lined up at the end.
   5381			 * The extent could have been split into written
   5382			 * and unwritten pieces, or we could just be
   5383			 * unmapping part of it.  But we can't really
   5384			 * get rid of part of a realtime extent.
   5385			 */
   5386			if (del.br_state == XFS_EXT_UNWRITTEN) {
   5387				/*
   5388				 * This piece is unwritten, or we're not
   5389				 * using unwritten extents.  Skip over it.
   5390				 */
   5391				ASSERT(end >= mod);
   5392				end -= mod > del.br_blockcount ?
   5393					del.br_blockcount : mod;
   5394				if (end < got.br_startoff &&
   5395				    !xfs_iext_prev_extent(ifp, &icur, &got)) {
   5396					done = true;
   5397					break;
   5398				}
   5399				continue;
   5400			}
   5401			/*
   5402			 * It's written, turn it unwritten.
   5403			 * This is better than zeroing it.
   5404			 */
   5405			ASSERT(del.br_state == XFS_EXT_NORM);
   5406			ASSERT(tp->t_blk_res > 0);
   5407			/*
   5408			 * If this spans a realtime extent boundary,
   5409			 * chop it back to the start of the one we end at.
   5410			 */
   5411			if (del.br_blockcount > mod) {
   5412				del.br_startoff += del.br_blockcount - mod;
   5413				del.br_startblock += del.br_blockcount - mod;
   5414				del.br_blockcount = mod;
   5415			}
   5416			del.br_state = XFS_EXT_UNWRITTEN;
   5417			error = xfs_bmap_add_extent_unwritten_real(tp, ip,
   5418					whichfork, &icur, &cur, &del,
   5419					&logflags);
   5420			if (error)
   5421				goto error0;
   5422			goto nodelete;
   5423		}
   5424		div_u64_rem(del.br_startblock, mp->m_sb.sb_rextsize, &mod);
   5425		if (mod) {
   5426			xfs_extlen_t off = mp->m_sb.sb_rextsize - mod;
   5427
   5428			/*
   5429			 * Realtime extent is lined up at the end but not
   5430			 * at the front.  We'll get rid of full extents if
   5431			 * we can.
   5432			 */
   5433			if (del.br_blockcount > off) {
   5434				del.br_blockcount -= off;
   5435				del.br_startoff += off;
   5436				del.br_startblock += off;
   5437			} else if (del.br_startoff == start &&
   5438				   (del.br_state == XFS_EXT_UNWRITTEN ||
   5439				    tp->t_blk_res == 0)) {
   5440				/*
   5441				 * Can't make it unwritten.  There isn't
   5442				 * a full extent here so just skip it.
   5443				 */
   5444				ASSERT(end >= del.br_blockcount);
   5445				end -= del.br_blockcount;
   5446				if (got.br_startoff > end &&
   5447				    !xfs_iext_prev_extent(ifp, &icur, &got)) {
   5448					done = true;
   5449					break;
   5450				}
   5451				continue;
   5452			} else if (del.br_state == XFS_EXT_UNWRITTEN) {
   5453				struct xfs_bmbt_irec	prev;
   5454				xfs_fileoff_t		unwrite_start;
   5455
   5456				/*
   5457				 * This one is already unwritten.
   5458				 * It must have a written left neighbor.
   5459				 * Unwrite the killed part of that one and
   5460				 * try again.
   5461				 */
   5462				if (!xfs_iext_prev_extent(ifp, &icur, &prev))
   5463					ASSERT(0);
   5464				ASSERT(prev.br_state == XFS_EXT_NORM);
   5465				ASSERT(!isnullstartblock(prev.br_startblock));
   5466				ASSERT(del.br_startblock ==
   5467				       prev.br_startblock + prev.br_blockcount);
   5468				unwrite_start = max3(start,
   5469						     del.br_startoff - mod,
   5470						     prev.br_startoff);
   5471				mod = unwrite_start - prev.br_startoff;
   5472				prev.br_startoff = unwrite_start;
   5473				prev.br_startblock += mod;
   5474				prev.br_blockcount -= mod;
   5475				prev.br_state = XFS_EXT_UNWRITTEN;
   5476				error = xfs_bmap_add_extent_unwritten_real(tp,
   5477						ip, whichfork, &icur, &cur,
   5478						&prev, &logflags);
   5479				if (error)
   5480					goto error0;
   5481				goto nodelete;
   5482			} else {
   5483				ASSERT(del.br_state == XFS_EXT_NORM);
   5484				del.br_state = XFS_EXT_UNWRITTEN;
   5485				error = xfs_bmap_add_extent_unwritten_real(tp,
   5486						ip, whichfork, &icur, &cur,
   5487						&del, &logflags);
   5488				if (error)
   5489					goto error0;
   5490				goto nodelete;
   5491			}
   5492		}
   5493
   5494delete:
   5495		if (wasdel) {
   5496			error = xfs_bmap_del_extent_delay(ip, whichfork, &icur,
   5497					&got, &del);
   5498		} else {
   5499			error = xfs_bmap_del_extent_real(ip, tp, &icur, cur,
   5500					&del, &tmp_logflags, whichfork,
   5501					flags);
   5502			logflags |= tmp_logflags;
   5503		}
   5504
   5505		if (error)
   5506			goto error0;
   5507
   5508		end = del.br_startoff - 1;
   5509nodelete:
   5510		/*
   5511		 * If not done go on to the next (previous) record.
   5512		 */
   5513		if (end != (xfs_fileoff_t)-1 && end >= start) {
   5514			if (!xfs_iext_get_extent(ifp, &icur, &got) ||
   5515			    (got.br_startoff > end &&
   5516			     !xfs_iext_prev_extent(ifp, &icur, &got))) {
   5517				done = true;
   5518				break;
   5519			}
   5520			extno++;
   5521		}
   5522	}
   5523	if (done || end == (xfs_fileoff_t)-1 || end < start)
   5524		*rlen = 0;
   5525	else
   5526		*rlen = end - start + 1;
   5527
   5528	/*
   5529	 * Convert to a btree if necessary.
   5530	 */
   5531	if (xfs_bmap_needs_btree(ip, whichfork)) {
   5532		ASSERT(cur == NULL);
   5533		error = xfs_bmap_extents_to_btree(tp, ip, &cur, 0,
   5534				&tmp_logflags, whichfork);
   5535		logflags |= tmp_logflags;
   5536	} else {
   5537		error = xfs_bmap_btree_to_extents(tp, ip, cur, &logflags,
   5538			whichfork);
   5539	}
   5540
   5541error0:
   5542	/*
   5543	 * Log everything.  Do this after conversion, there's no point in
   5544	 * logging the extent records if we've converted to btree format.
   5545	 */
   5546	if ((logflags & xfs_ilog_fext(whichfork)) &&
   5547	    ifp->if_format != XFS_DINODE_FMT_EXTENTS)
   5548		logflags &= ~xfs_ilog_fext(whichfork);
   5549	else if ((logflags & xfs_ilog_fbroot(whichfork)) &&
   5550		 ifp->if_format != XFS_DINODE_FMT_BTREE)
   5551		logflags &= ~xfs_ilog_fbroot(whichfork);
   5552	/*
   5553	 * Log inode even in the error case, if the transaction
   5554	 * is dirty we'll need to shut down the filesystem.
   5555	 */
   5556	if (logflags)
   5557		xfs_trans_log_inode(tp, ip, logflags);
   5558	if (cur) {
   5559		if (!error)
   5560			cur->bc_ino.allocated = 0;
   5561		xfs_btree_del_cursor(cur, error);
   5562	}
   5563	return error;
   5564}
   5565
   5566/* Unmap a range of a file. */
   5567int
   5568xfs_bunmapi(
   5569	xfs_trans_t		*tp,
   5570	struct xfs_inode	*ip,
   5571	xfs_fileoff_t		bno,
   5572	xfs_filblks_t		len,
   5573	uint32_t		flags,
   5574	xfs_extnum_t		nexts,
   5575	int			*done)
   5576{
   5577	int			error;
   5578
   5579	error = __xfs_bunmapi(tp, ip, bno, &len, flags, nexts);
   5580	*done = (len == 0);
   5581	return error;
   5582}
   5583
   5584/*
   5585 * Determine whether an extent shift can be accomplished by a merge with the
   5586 * extent that precedes the target hole of the shift.
   5587 */
   5588STATIC bool
   5589xfs_bmse_can_merge(
   5590	struct xfs_bmbt_irec	*left,	/* preceding extent */
   5591	struct xfs_bmbt_irec	*got,	/* current extent to shift */
   5592	xfs_fileoff_t		shift)	/* shift fsb */
   5593{
   5594	xfs_fileoff_t		startoff;
   5595
   5596	startoff = got->br_startoff - shift;
   5597
   5598	/*
   5599	 * The extent, once shifted, must be adjacent in-file and on-disk with
   5600	 * the preceding extent.
   5601	 */
   5602	if ((left->br_startoff + left->br_blockcount != startoff) ||
   5603	    (left->br_startblock + left->br_blockcount != got->br_startblock) ||
   5604	    (left->br_state != got->br_state) ||
   5605	    (left->br_blockcount + got->br_blockcount > XFS_MAX_BMBT_EXTLEN))
   5606		return false;
   5607
   5608	return true;
   5609}
   5610
   5611/*
   5612 * A bmap extent shift adjusts the file offset of an extent to fill a preceding
   5613 * hole in the file. If an extent shift would result in the extent being fully
   5614 * adjacent to the extent that currently precedes the hole, we can merge with
   5615 * the preceding extent rather than do the shift.
   5616 *
   5617 * This function assumes the caller has verified a shift-by-merge is possible
   5618 * with the provided extents via xfs_bmse_can_merge().
   5619 */
   5620STATIC int
   5621xfs_bmse_merge(
   5622	struct xfs_trans		*tp,
   5623	struct xfs_inode		*ip,
   5624	int				whichfork,
   5625	xfs_fileoff_t			shift,		/* shift fsb */
   5626	struct xfs_iext_cursor		*icur,
   5627	struct xfs_bmbt_irec		*got,		/* extent to shift */
   5628	struct xfs_bmbt_irec		*left,		/* preceding extent */
   5629	struct xfs_btree_cur		*cur,
   5630	int				*logflags)	/* output */
   5631{
   5632	struct xfs_ifork		*ifp = XFS_IFORK_PTR(ip, whichfork);
   5633	struct xfs_bmbt_irec		new;
   5634	xfs_filblks_t			blockcount;
   5635	int				error, i;
   5636	struct xfs_mount		*mp = ip->i_mount;
   5637
   5638	blockcount = left->br_blockcount + got->br_blockcount;
   5639
   5640	ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL));
   5641	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
   5642	ASSERT(xfs_bmse_can_merge(left, got, shift));
   5643
   5644	new = *left;
   5645	new.br_blockcount = blockcount;
   5646
   5647	/*
   5648	 * Update the on-disk extent count, the btree if necessary and log the
   5649	 * inode.
   5650	 */
   5651	ifp->if_nextents--;
   5652	*logflags |= XFS_ILOG_CORE;
   5653	if (!cur) {
   5654		*logflags |= XFS_ILOG_DEXT;
   5655		goto done;
   5656	}
   5657
   5658	/* lookup and remove the extent to merge */
   5659	error = xfs_bmbt_lookup_eq(cur, got, &i);
   5660	if (error)
   5661		return error;
   5662	if (XFS_IS_CORRUPT(mp, i != 1))
   5663		return -EFSCORRUPTED;
   5664
   5665	error = xfs_btree_delete(cur, &i);
   5666	if (error)
   5667		return error;
   5668	if (XFS_IS_CORRUPT(mp, i != 1))
   5669		return -EFSCORRUPTED;
   5670
   5671	/* lookup and update size of the previous extent */
   5672	error = xfs_bmbt_lookup_eq(cur, left, &i);
   5673	if (error)
   5674		return error;
   5675	if (XFS_IS_CORRUPT(mp, i != 1))
   5676		return -EFSCORRUPTED;
   5677
   5678	error = xfs_bmbt_update(cur, &new);
   5679	if (error)
   5680		return error;
   5681
   5682	/* change to extent format if required after extent removal */
   5683	error = xfs_bmap_btree_to_extents(tp, ip, cur, logflags, whichfork);
   5684	if (error)
   5685		return error;
   5686
   5687done:
   5688	xfs_iext_remove(ip, icur, 0);
   5689	xfs_iext_prev(ifp, icur);
   5690	xfs_iext_update_extent(ip, xfs_bmap_fork_to_state(whichfork), icur,
   5691			&new);
   5692
   5693	/* update reverse mapping. rmap functions merge the rmaps for us */
   5694	xfs_rmap_unmap_extent(tp, ip, whichfork, got);
   5695	memcpy(&new, got, sizeof(new));
   5696	new.br_startoff = left->br_startoff + left->br_blockcount;
   5697	xfs_rmap_map_extent(tp, ip, whichfork, &new);
   5698	return 0;
   5699}
   5700
   5701static int
   5702xfs_bmap_shift_update_extent(
   5703	struct xfs_trans	*tp,
   5704	struct xfs_inode	*ip,
   5705	int			whichfork,
   5706	struct xfs_iext_cursor	*icur,
   5707	struct xfs_bmbt_irec	*got,
   5708	struct xfs_btree_cur	*cur,
   5709	int			*logflags,
   5710	xfs_fileoff_t		startoff)
   5711{
   5712	struct xfs_mount	*mp = ip->i_mount;
   5713	struct xfs_bmbt_irec	prev = *got;
   5714	int			error, i;
   5715
   5716	*logflags |= XFS_ILOG_CORE;
   5717
   5718	got->br_startoff = startoff;
   5719
   5720	if (cur) {
   5721		error = xfs_bmbt_lookup_eq(cur, &prev, &i);
   5722		if (error)
   5723			return error;
   5724		if (XFS_IS_CORRUPT(mp, i != 1))
   5725			return -EFSCORRUPTED;
   5726
   5727		error = xfs_bmbt_update(cur, got);
   5728		if (error)
   5729			return error;
   5730	} else {
   5731		*logflags |= XFS_ILOG_DEXT;
   5732	}
   5733
   5734	xfs_iext_update_extent(ip, xfs_bmap_fork_to_state(whichfork), icur,
   5735			got);
   5736
   5737	/* update reverse mapping */
   5738	xfs_rmap_unmap_extent(tp, ip, whichfork, &prev);
   5739	xfs_rmap_map_extent(tp, ip, whichfork, got);
   5740	return 0;
   5741}
   5742
   5743int
   5744xfs_bmap_collapse_extents(
   5745	struct xfs_trans	*tp,
   5746	struct xfs_inode	*ip,
   5747	xfs_fileoff_t		*next_fsb,
   5748	xfs_fileoff_t		offset_shift_fsb,
   5749	bool			*done)
   5750{
   5751	int			whichfork = XFS_DATA_FORK;
   5752	struct xfs_mount	*mp = ip->i_mount;
   5753	struct xfs_ifork	*ifp = XFS_IFORK_PTR(ip, whichfork);
   5754	struct xfs_btree_cur	*cur = NULL;
   5755	struct xfs_bmbt_irec	got, prev;
   5756	struct xfs_iext_cursor	icur;
   5757	xfs_fileoff_t		new_startoff;
   5758	int			error = 0;
   5759	int			logflags = 0;
   5760
   5761	if (XFS_IS_CORRUPT(mp, !xfs_ifork_has_extents(ifp)) ||
   5762	    XFS_TEST_ERROR(false, mp, XFS_ERRTAG_BMAPIFORMAT)) {
   5763		return -EFSCORRUPTED;
   5764	}
   5765
   5766	if (xfs_is_shutdown(mp))
   5767		return -EIO;
   5768
   5769	ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL));
   5770
   5771	error = xfs_iread_extents(tp, ip, whichfork);
   5772	if (error)
   5773		return error;
   5774
   5775	if (ifp->if_format == XFS_DINODE_FMT_BTREE) {
   5776		cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork);
   5777		cur->bc_ino.flags = 0;
   5778	}
   5779
   5780	if (!xfs_iext_lookup_extent(ip, ifp, *next_fsb, &icur, &got)) {
   5781		*done = true;
   5782		goto del_cursor;
   5783	}
   5784	if (XFS_IS_CORRUPT(mp, isnullstartblock(got.br_startblock))) {
   5785		error = -EFSCORRUPTED;
   5786		goto del_cursor;
   5787	}
   5788
   5789	new_startoff = got.br_startoff - offset_shift_fsb;
   5790	if (xfs_iext_peek_prev_extent(ifp, &icur, &prev)) {
   5791		if (new_startoff < prev.br_startoff + prev.br_blockcount) {
   5792			error = -EINVAL;
   5793			goto del_cursor;
   5794		}
   5795
   5796		if (xfs_bmse_can_merge(&prev, &got, offset_shift_fsb)) {
   5797			error = xfs_bmse_merge(tp, ip, whichfork,
   5798					offset_shift_fsb, &icur, &got, &prev,
   5799					cur, &logflags);
   5800			if (error)
   5801				goto del_cursor;
   5802			goto done;
   5803		}
   5804	} else {
   5805		if (got.br_startoff < offset_shift_fsb) {
   5806			error = -EINVAL;
   5807			goto del_cursor;
   5808		}
   5809	}
   5810
   5811	error = xfs_bmap_shift_update_extent(tp, ip, whichfork, &icur, &got,
   5812			cur, &logflags, new_startoff);
   5813	if (error)
   5814		goto del_cursor;
   5815
   5816done:
   5817	if (!xfs_iext_next_extent(ifp, &icur, &got)) {
   5818		*done = true;
   5819		goto del_cursor;
   5820	}
   5821
   5822	*next_fsb = got.br_startoff;
   5823del_cursor:
   5824	if (cur)
   5825		xfs_btree_del_cursor(cur, error);
   5826	if (logflags)
   5827		xfs_trans_log_inode(tp, ip, logflags);
   5828	return error;
   5829}
   5830
   5831/* Make sure we won't be right-shifting an extent past the maximum bound. */
   5832int
   5833xfs_bmap_can_insert_extents(
   5834	struct xfs_inode	*ip,
   5835	xfs_fileoff_t		off,
   5836	xfs_fileoff_t		shift)
   5837{
   5838	struct xfs_bmbt_irec	got;
   5839	int			is_empty;
   5840	int			error = 0;
   5841
   5842	ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL));
   5843
   5844	if (xfs_is_shutdown(ip->i_mount))
   5845		return -EIO;
   5846
   5847	xfs_ilock(ip, XFS_ILOCK_EXCL);
   5848	error = xfs_bmap_last_extent(NULL, ip, XFS_DATA_FORK, &got, &is_empty);
   5849	if (!error && !is_empty && got.br_startoff >= off &&
   5850	    ((got.br_startoff + shift) & BMBT_STARTOFF_MASK) < got.br_startoff)
   5851		error = -EINVAL;
   5852	xfs_iunlock(ip, XFS_ILOCK_EXCL);
   5853
   5854	return error;
   5855}
   5856
   5857int
   5858xfs_bmap_insert_extents(
   5859	struct xfs_trans	*tp,
   5860	struct xfs_inode	*ip,
   5861	xfs_fileoff_t		*next_fsb,
   5862	xfs_fileoff_t		offset_shift_fsb,
   5863	bool			*done,
   5864	xfs_fileoff_t		stop_fsb)
   5865{
   5866	int			whichfork = XFS_DATA_FORK;
   5867	struct xfs_mount	*mp = ip->i_mount;
   5868	struct xfs_ifork	*ifp = XFS_IFORK_PTR(ip, whichfork);
   5869	struct xfs_btree_cur	*cur = NULL;
   5870	struct xfs_bmbt_irec	got, next;
   5871	struct xfs_iext_cursor	icur;
   5872	xfs_fileoff_t		new_startoff;
   5873	int			error = 0;
   5874	int			logflags = 0;
   5875
   5876	if (XFS_IS_CORRUPT(mp, !xfs_ifork_has_extents(ifp)) ||
   5877	    XFS_TEST_ERROR(false, mp, XFS_ERRTAG_BMAPIFORMAT)) {
   5878		return -EFSCORRUPTED;
   5879	}
   5880
   5881	if (xfs_is_shutdown(mp))
   5882		return -EIO;
   5883
   5884	ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL));
   5885
   5886	error = xfs_iread_extents(tp, ip, whichfork);
   5887	if (error)
   5888		return error;
   5889
   5890	if (ifp->if_format == XFS_DINODE_FMT_BTREE) {
   5891		cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork);
   5892		cur->bc_ino.flags = 0;
   5893	}
   5894
   5895	if (*next_fsb == NULLFSBLOCK) {
   5896		xfs_iext_last(ifp, &icur);
   5897		if (!xfs_iext_get_extent(ifp, &icur, &got) ||
   5898		    stop_fsb > got.br_startoff) {
   5899			*done = true;
   5900			goto del_cursor;
   5901		}
   5902	} else {
   5903		if (!xfs_iext_lookup_extent(ip, ifp, *next_fsb, &icur, &got)) {
   5904			*done = true;
   5905			goto del_cursor;
   5906		}
   5907	}
   5908	if (XFS_IS_CORRUPT(mp, isnullstartblock(got.br_startblock))) {
   5909		error = -EFSCORRUPTED;
   5910		goto del_cursor;
   5911	}
   5912
   5913	if (XFS_IS_CORRUPT(mp, stop_fsb > got.br_startoff)) {
   5914		error = -EFSCORRUPTED;
   5915		goto del_cursor;
   5916	}
   5917
   5918	new_startoff = got.br_startoff + offset_shift_fsb;
   5919	if (xfs_iext_peek_next_extent(ifp, &icur, &next)) {
   5920		if (new_startoff + got.br_blockcount > next.br_startoff) {
   5921			error = -EINVAL;
   5922			goto del_cursor;
   5923		}
   5924
   5925		/*
   5926		 * Unlike a left shift (which involves a hole punch), a right
   5927		 * shift does not modify extent neighbors in any way.  We should
   5928		 * never find mergeable extents in this scenario.  Check anyways
   5929		 * and warn if we encounter two extents that could be one.
   5930		 */
   5931		if (xfs_bmse_can_merge(&got, &next, offset_shift_fsb))
   5932			WARN_ON_ONCE(1);
   5933	}
   5934
   5935	error = xfs_bmap_shift_update_extent(tp, ip, whichfork, &icur, &got,
   5936			cur, &logflags, new_startoff);
   5937	if (error)
   5938		goto del_cursor;
   5939
   5940	if (!xfs_iext_prev_extent(ifp, &icur, &got) ||
   5941	    stop_fsb >= got.br_startoff + got.br_blockcount) {
   5942		*done = true;
   5943		goto del_cursor;
   5944	}
   5945
   5946	*next_fsb = got.br_startoff;
   5947del_cursor:
   5948	if (cur)
   5949		xfs_btree_del_cursor(cur, error);
   5950	if (logflags)
   5951		xfs_trans_log_inode(tp, ip, logflags);
   5952	return error;
   5953}
   5954
   5955/*
   5956 * Splits an extent into two extents at split_fsb block such that it is the
   5957 * first block of the current_ext. @ext is a target extent to be split.
   5958 * @split_fsb is a block where the extents is split.  If split_fsb lies in a
   5959 * hole or the first block of extents, just return 0.
   5960 */
   5961int
   5962xfs_bmap_split_extent(
   5963	struct xfs_trans	*tp,
   5964	struct xfs_inode	*ip,
   5965	xfs_fileoff_t		split_fsb)
   5966{
   5967	int				whichfork = XFS_DATA_FORK;
   5968	struct xfs_ifork		*ifp = XFS_IFORK_PTR(ip, whichfork);
   5969	struct xfs_btree_cur		*cur = NULL;
   5970	struct xfs_bmbt_irec		got;
   5971	struct xfs_bmbt_irec		new; /* split extent */
   5972	struct xfs_mount		*mp = ip->i_mount;
   5973	xfs_fsblock_t			gotblkcnt; /* new block count for got */
   5974	struct xfs_iext_cursor		icur;
   5975	int				error = 0;
   5976	int				logflags = 0;
   5977	int				i = 0;
   5978
   5979	if (XFS_IS_CORRUPT(mp, !xfs_ifork_has_extents(ifp)) ||
   5980	    XFS_TEST_ERROR(false, mp, XFS_ERRTAG_BMAPIFORMAT)) {
   5981		return -EFSCORRUPTED;
   5982	}
   5983
   5984	if (xfs_is_shutdown(mp))
   5985		return -EIO;
   5986
   5987	/* Read in all the extents */
   5988	error = xfs_iread_extents(tp, ip, whichfork);
   5989	if (error)
   5990		return error;
   5991
   5992	/*
   5993	 * If there are not extents, or split_fsb lies in a hole we are done.
   5994	 */
   5995	if (!xfs_iext_lookup_extent(ip, ifp, split_fsb, &icur, &got) ||
   5996	    got.br_startoff >= split_fsb)
   5997		return 0;
   5998
   5999	gotblkcnt = split_fsb - got.br_startoff;
   6000	new.br_startoff = split_fsb;
   6001	new.br_startblock = got.br_startblock + gotblkcnt;
   6002	new.br_blockcount = got.br_blockcount - gotblkcnt;
   6003	new.br_state = got.br_state;
   6004
   6005	if (ifp->if_format == XFS_DINODE_FMT_BTREE) {
   6006		cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork);
   6007		cur->bc_ino.flags = 0;
   6008		error = xfs_bmbt_lookup_eq(cur, &got, &i);
   6009		if (error)
   6010			goto del_cursor;
   6011		if (XFS_IS_CORRUPT(mp, i != 1)) {
   6012			error = -EFSCORRUPTED;
   6013			goto del_cursor;
   6014		}
   6015	}
   6016
   6017	got.br_blockcount = gotblkcnt;
   6018	xfs_iext_update_extent(ip, xfs_bmap_fork_to_state(whichfork), &icur,
   6019			&got);
   6020
   6021	logflags = XFS_ILOG_CORE;
   6022	if (cur) {
   6023		error = xfs_bmbt_update(cur, &got);
   6024		if (error)
   6025			goto del_cursor;
   6026	} else
   6027		logflags |= XFS_ILOG_DEXT;
   6028
   6029	/* Add new extent */
   6030	xfs_iext_next(ifp, &icur);
   6031	xfs_iext_insert(ip, &icur, &new, 0);
   6032	ifp->if_nextents++;
   6033
   6034	if (cur) {
   6035		error = xfs_bmbt_lookup_eq(cur, &new, &i);
   6036		if (error)
   6037			goto del_cursor;
   6038		if (XFS_IS_CORRUPT(mp, i != 0)) {
   6039			error = -EFSCORRUPTED;
   6040			goto del_cursor;
   6041		}
   6042		error = xfs_btree_insert(cur, &i);
   6043		if (error)
   6044			goto del_cursor;
   6045		if (XFS_IS_CORRUPT(mp, i != 1)) {
   6046			error = -EFSCORRUPTED;
   6047			goto del_cursor;
   6048		}
   6049	}
   6050
   6051	/*
   6052	 * Convert to a btree if necessary.
   6053	 */
   6054	if (xfs_bmap_needs_btree(ip, whichfork)) {
   6055		int tmp_logflags; /* partial log flag return val */
   6056
   6057		ASSERT(cur == NULL);
   6058		error = xfs_bmap_extents_to_btree(tp, ip, &cur, 0,
   6059				&tmp_logflags, whichfork);
   6060		logflags |= tmp_logflags;
   6061	}
   6062
   6063del_cursor:
   6064	if (cur) {
   6065		cur->bc_ino.allocated = 0;
   6066		xfs_btree_del_cursor(cur, error);
   6067	}
   6068
   6069	if (logflags)
   6070		xfs_trans_log_inode(tp, ip, logflags);
   6071	return error;
   6072}
   6073
   6074/* Deferred mapping is only for real extents in the data fork. */
   6075static bool
   6076xfs_bmap_is_update_needed(
   6077	struct xfs_bmbt_irec	*bmap)
   6078{
   6079	return  bmap->br_startblock != HOLESTARTBLOCK &&
   6080		bmap->br_startblock != DELAYSTARTBLOCK;
   6081}
   6082
   6083/* Record a bmap intent. */
   6084static int
   6085__xfs_bmap_add(
   6086	struct xfs_trans		*tp,
   6087	enum xfs_bmap_intent_type	type,
   6088	struct xfs_inode		*ip,
   6089	int				whichfork,
   6090	struct xfs_bmbt_irec		*bmap)
   6091{
   6092	struct xfs_bmap_intent		*bi;
   6093
   6094	trace_xfs_bmap_defer(tp->t_mountp,
   6095			XFS_FSB_TO_AGNO(tp->t_mountp, bmap->br_startblock),
   6096			type,
   6097			XFS_FSB_TO_AGBNO(tp->t_mountp, bmap->br_startblock),
   6098			ip->i_ino, whichfork,
   6099			bmap->br_startoff,
   6100			bmap->br_blockcount,
   6101			bmap->br_state);
   6102
   6103	bi = kmem_cache_alloc(xfs_bmap_intent_cache, GFP_NOFS | __GFP_NOFAIL);
   6104	INIT_LIST_HEAD(&bi->bi_list);
   6105	bi->bi_type = type;
   6106	bi->bi_owner = ip;
   6107	bi->bi_whichfork = whichfork;
   6108	bi->bi_bmap = *bmap;
   6109
   6110	xfs_defer_add(tp, XFS_DEFER_OPS_TYPE_BMAP, &bi->bi_list);
   6111	return 0;
   6112}
   6113
   6114/* Map an extent into a file. */
   6115void
   6116xfs_bmap_map_extent(
   6117	struct xfs_trans	*tp,
   6118	struct xfs_inode	*ip,
   6119	struct xfs_bmbt_irec	*PREV)
   6120{
   6121	if (!xfs_bmap_is_update_needed(PREV))
   6122		return;
   6123
   6124	__xfs_bmap_add(tp, XFS_BMAP_MAP, ip, XFS_DATA_FORK, PREV);
   6125}
   6126
   6127/* Unmap an extent out of a file. */
   6128void
   6129xfs_bmap_unmap_extent(
   6130	struct xfs_trans	*tp,
   6131	struct xfs_inode	*ip,
   6132	struct xfs_bmbt_irec	*PREV)
   6133{
   6134	if (!xfs_bmap_is_update_needed(PREV))
   6135		return;
   6136
   6137	__xfs_bmap_add(tp, XFS_BMAP_UNMAP, ip, XFS_DATA_FORK, PREV);
   6138}
   6139
   6140/*
   6141 * Process one of the deferred bmap operations.  We pass back the
   6142 * btree cursor to maintain our lock on the bmapbt between calls.
   6143 */
   6144int
   6145xfs_bmap_finish_one(
   6146	struct xfs_trans		*tp,
   6147	struct xfs_inode		*ip,
   6148	enum xfs_bmap_intent_type	type,
   6149	int				whichfork,
   6150	xfs_fileoff_t			startoff,
   6151	xfs_fsblock_t			startblock,
   6152	xfs_filblks_t			*blockcount,
   6153	xfs_exntst_t			state)
   6154{
   6155	int				error = 0;
   6156
   6157	ASSERT(tp->t_firstblock == NULLFSBLOCK);
   6158
   6159	trace_xfs_bmap_deferred(tp->t_mountp,
   6160			XFS_FSB_TO_AGNO(tp->t_mountp, startblock), type,
   6161			XFS_FSB_TO_AGBNO(tp->t_mountp, startblock),
   6162			ip->i_ino, whichfork, startoff, *blockcount, state);
   6163
   6164	if (WARN_ON_ONCE(whichfork != XFS_DATA_FORK))
   6165		return -EFSCORRUPTED;
   6166
   6167	if (XFS_TEST_ERROR(false, tp->t_mountp,
   6168			XFS_ERRTAG_BMAP_FINISH_ONE))
   6169		return -EIO;
   6170
   6171	switch (type) {
   6172	case XFS_BMAP_MAP:
   6173		error = xfs_bmapi_remap(tp, ip, startoff, *blockcount,
   6174				startblock, 0);
   6175		*blockcount = 0;
   6176		break;
   6177	case XFS_BMAP_UNMAP:
   6178		error = __xfs_bunmapi(tp, ip, startoff, blockcount,
   6179				XFS_BMAPI_REMAP, 1);
   6180		break;
   6181	default:
   6182		ASSERT(0);
   6183		error = -EFSCORRUPTED;
   6184	}
   6185
   6186	return error;
   6187}
   6188
   6189/* Check that an inode's extent does not have invalid flags or bad ranges. */
   6190xfs_failaddr_t
   6191xfs_bmap_validate_extent(
   6192	struct xfs_inode	*ip,
   6193	int			whichfork,
   6194	struct xfs_bmbt_irec	*irec)
   6195{
   6196	struct xfs_mount	*mp = ip->i_mount;
   6197
   6198	if (!xfs_verify_fileext(mp, irec->br_startoff, irec->br_blockcount))
   6199		return __this_address;
   6200
   6201	if (XFS_IS_REALTIME_INODE(ip) && whichfork == XFS_DATA_FORK) {
   6202		if (!xfs_verify_rtext(mp, irec->br_startblock,
   6203					  irec->br_blockcount))
   6204			return __this_address;
   6205	} else {
   6206		if (!xfs_verify_fsbext(mp, irec->br_startblock,
   6207					   irec->br_blockcount))
   6208			return __this_address;
   6209	}
   6210	if (irec->br_state != XFS_EXT_NORM && whichfork != XFS_DATA_FORK)
   6211		return __this_address;
   6212	return NULL;
   6213}
   6214
   6215int __init
   6216xfs_bmap_intent_init_cache(void)
   6217{
   6218	xfs_bmap_intent_cache = kmem_cache_create("xfs_bmap_intent",
   6219			sizeof(struct xfs_bmap_intent),
   6220			0, 0, NULL);
   6221
   6222	return xfs_bmap_intent_cache != NULL ? 0 : -ENOMEM;
   6223}
   6224
   6225void
   6226xfs_bmap_intent_destroy_cache(void)
   6227{
   6228	kmem_cache_destroy(xfs_bmap_intent_cache);
   6229	xfs_bmap_intent_cache = NULL;
   6230}