cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

xfs_dir2_leaf.c (51070B)


      1// SPDX-License-Identifier: GPL-2.0
      2/*
      3 * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc.
      4 * Copyright (c) 2013 Red Hat, Inc.
      5 * All Rights Reserved.
      6 */
      7#include "xfs.h"
      8#include "xfs_fs.h"
      9#include "xfs_shared.h"
     10#include "xfs_format.h"
     11#include "xfs_log_format.h"
     12#include "xfs_trans_resv.h"
     13#include "xfs_mount.h"
     14#include "xfs_inode.h"
     15#include "xfs_bmap.h"
     16#include "xfs_dir2.h"
     17#include "xfs_dir2_priv.h"
     18#include "xfs_error.h"
     19#include "xfs_trace.h"
     20#include "xfs_trans.h"
     21#include "xfs_buf_item.h"
     22
     23/*
     24 * Local function declarations.
     25 */
     26static int xfs_dir2_leaf_lookup_int(xfs_da_args_t *args, struct xfs_buf **lbpp,
     27				    int *indexp, struct xfs_buf **dbpp,
     28				    struct xfs_dir3_icleaf_hdr *leafhdr);
     29static void xfs_dir3_leaf_log_bests(struct xfs_da_args *args,
     30				    struct xfs_buf *bp, int first, int last);
     31static void xfs_dir3_leaf_log_tail(struct xfs_da_args *args,
     32				   struct xfs_buf *bp);
     33
     34void
     35xfs_dir2_leaf_hdr_from_disk(
     36	struct xfs_mount		*mp,
     37	struct xfs_dir3_icleaf_hdr	*to,
     38	struct xfs_dir2_leaf		*from)
     39{
     40	if (xfs_has_crc(mp)) {
     41		struct xfs_dir3_leaf *from3 = (struct xfs_dir3_leaf *)from;
     42
     43		to->forw = be32_to_cpu(from3->hdr.info.hdr.forw);
     44		to->back = be32_to_cpu(from3->hdr.info.hdr.back);
     45		to->magic = be16_to_cpu(from3->hdr.info.hdr.magic);
     46		to->count = be16_to_cpu(from3->hdr.count);
     47		to->stale = be16_to_cpu(from3->hdr.stale);
     48		to->ents = from3->__ents;
     49
     50		ASSERT(to->magic == XFS_DIR3_LEAF1_MAGIC ||
     51		       to->magic == XFS_DIR3_LEAFN_MAGIC);
     52	} else {
     53		to->forw = be32_to_cpu(from->hdr.info.forw);
     54		to->back = be32_to_cpu(from->hdr.info.back);
     55		to->magic = be16_to_cpu(from->hdr.info.magic);
     56		to->count = be16_to_cpu(from->hdr.count);
     57		to->stale = be16_to_cpu(from->hdr.stale);
     58		to->ents = from->__ents;
     59
     60		ASSERT(to->magic == XFS_DIR2_LEAF1_MAGIC ||
     61		       to->magic == XFS_DIR2_LEAFN_MAGIC);
     62	}
     63}
     64
     65void
     66xfs_dir2_leaf_hdr_to_disk(
     67	struct xfs_mount		*mp,
     68	struct xfs_dir2_leaf		*to,
     69	struct xfs_dir3_icleaf_hdr	*from)
     70{
     71	if (xfs_has_crc(mp)) {
     72		struct xfs_dir3_leaf *to3 = (struct xfs_dir3_leaf *)to;
     73
     74		ASSERT(from->magic == XFS_DIR3_LEAF1_MAGIC ||
     75		       from->magic == XFS_DIR3_LEAFN_MAGIC);
     76
     77		to3->hdr.info.hdr.forw = cpu_to_be32(from->forw);
     78		to3->hdr.info.hdr.back = cpu_to_be32(from->back);
     79		to3->hdr.info.hdr.magic = cpu_to_be16(from->magic);
     80		to3->hdr.count = cpu_to_be16(from->count);
     81		to3->hdr.stale = cpu_to_be16(from->stale);
     82	} else {
     83		ASSERT(from->magic == XFS_DIR2_LEAF1_MAGIC ||
     84		       from->magic == XFS_DIR2_LEAFN_MAGIC);
     85
     86		to->hdr.info.forw = cpu_to_be32(from->forw);
     87		to->hdr.info.back = cpu_to_be32(from->back);
     88		to->hdr.info.magic = cpu_to_be16(from->magic);
     89		to->hdr.count = cpu_to_be16(from->count);
     90		to->hdr.stale = cpu_to_be16(from->stale);
     91	}
     92}
     93
     94/*
     95 * Check the internal consistency of a leaf1 block.
     96 * Pop an assert if something is wrong.
     97 */
     98#ifdef DEBUG
     99static xfs_failaddr_t
    100xfs_dir3_leaf1_check(
    101	struct xfs_inode	*dp,
    102	struct xfs_buf		*bp)
    103{
    104	struct xfs_dir2_leaf	*leaf = bp->b_addr;
    105	struct xfs_dir3_icleaf_hdr leafhdr;
    106
    107	xfs_dir2_leaf_hdr_from_disk(dp->i_mount, &leafhdr, leaf);
    108
    109	if (leafhdr.magic == XFS_DIR3_LEAF1_MAGIC) {
    110		struct xfs_dir3_leaf_hdr *leaf3 = bp->b_addr;
    111		if (be64_to_cpu(leaf3->info.blkno) != xfs_buf_daddr(bp))
    112			return __this_address;
    113	} else if (leafhdr.magic != XFS_DIR2_LEAF1_MAGIC)
    114		return __this_address;
    115
    116	return xfs_dir3_leaf_check_int(dp->i_mount, &leafhdr, leaf, false);
    117}
    118
    119static inline void
    120xfs_dir3_leaf_check(
    121	struct xfs_inode	*dp,
    122	struct xfs_buf		*bp)
    123{
    124	xfs_failaddr_t		fa;
    125
    126	fa = xfs_dir3_leaf1_check(dp, bp);
    127	if (!fa)
    128		return;
    129	xfs_corruption_error(__func__, XFS_ERRLEVEL_LOW, dp->i_mount,
    130			bp->b_addr, BBTOB(bp->b_length), __FILE__, __LINE__,
    131			fa);
    132	ASSERT(0);
    133}
    134#else
    135#define	xfs_dir3_leaf_check(dp, bp)
    136#endif
    137
    138xfs_failaddr_t
    139xfs_dir3_leaf_check_int(
    140	struct xfs_mount		*mp,
    141	struct xfs_dir3_icleaf_hdr	*hdr,
    142	struct xfs_dir2_leaf		*leaf,
    143	bool				expensive_checking)
    144{
    145	struct xfs_da_geometry		*geo = mp->m_dir_geo;
    146	xfs_dir2_leaf_tail_t		*ltp;
    147	int				stale;
    148	int				i;
    149
    150	ltp = xfs_dir2_leaf_tail_p(geo, leaf);
    151
    152	/*
    153	 * XXX (dgc): This value is not restrictive enough.
    154	 * Should factor in the size of the bests table as well.
    155	 * We can deduce a value for that from i_disk_size.
    156	 */
    157	if (hdr->count > geo->leaf_max_ents)
    158		return __this_address;
    159
    160	/* Leaves and bests don't overlap in leaf format. */
    161	if ((hdr->magic == XFS_DIR2_LEAF1_MAGIC ||
    162	     hdr->magic == XFS_DIR3_LEAF1_MAGIC) &&
    163	    (char *)&hdr->ents[hdr->count] > (char *)xfs_dir2_leaf_bests_p(ltp))
    164		return __this_address;
    165
    166	if (!expensive_checking)
    167		return NULL;
    168
    169	/* Check hash value order, count stale entries.  */
    170	for (i = stale = 0; i < hdr->count; i++) {
    171		if (i + 1 < hdr->count) {
    172			if (be32_to_cpu(hdr->ents[i].hashval) >
    173					be32_to_cpu(hdr->ents[i + 1].hashval))
    174				return __this_address;
    175		}
    176		if (hdr->ents[i].address == cpu_to_be32(XFS_DIR2_NULL_DATAPTR))
    177			stale++;
    178	}
    179	if (hdr->stale != stale)
    180		return __this_address;
    181	return NULL;
    182}
    183
    184/*
    185 * We verify the magic numbers before decoding the leaf header so that on debug
    186 * kernels we don't get assertion failures in xfs_dir3_leaf_hdr_from_disk() due
    187 * to incorrect magic numbers.
    188 */
    189static xfs_failaddr_t
    190xfs_dir3_leaf_verify(
    191	struct xfs_buf			*bp)
    192{
    193	struct xfs_mount		*mp = bp->b_mount;
    194	struct xfs_dir3_icleaf_hdr	leafhdr;
    195	xfs_failaddr_t			fa;
    196
    197	fa = xfs_da3_blkinfo_verify(bp, bp->b_addr);
    198	if (fa)
    199		return fa;
    200
    201	xfs_dir2_leaf_hdr_from_disk(mp, &leafhdr, bp->b_addr);
    202	return xfs_dir3_leaf_check_int(mp, &leafhdr, bp->b_addr, true);
    203}
    204
    205static void
    206xfs_dir3_leaf_read_verify(
    207	struct xfs_buf  *bp)
    208{
    209	struct xfs_mount	*mp = bp->b_mount;
    210	xfs_failaddr_t		fa;
    211
    212	if (xfs_has_crc(mp) &&
    213	     !xfs_buf_verify_cksum(bp, XFS_DIR3_LEAF_CRC_OFF))
    214		xfs_verifier_error(bp, -EFSBADCRC, __this_address);
    215	else {
    216		fa = xfs_dir3_leaf_verify(bp);
    217		if (fa)
    218			xfs_verifier_error(bp, -EFSCORRUPTED, fa);
    219	}
    220}
    221
    222static void
    223xfs_dir3_leaf_write_verify(
    224	struct xfs_buf  *bp)
    225{
    226	struct xfs_mount	*mp = bp->b_mount;
    227	struct xfs_buf_log_item	*bip = bp->b_log_item;
    228	struct xfs_dir3_leaf_hdr *hdr3 = bp->b_addr;
    229	xfs_failaddr_t		fa;
    230
    231	fa = xfs_dir3_leaf_verify(bp);
    232	if (fa) {
    233		xfs_verifier_error(bp, -EFSCORRUPTED, fa);
    234		return;
    235	}
    236
    237	if (!xfs_has_crc(mp))
    238		return;
    239
    240	if (bip)
    241		hdr3->info.lsn = cpu_to_be64(bip->bli_item.li_lsn);
    242
    243	xfs_buf_update_cksum(bp, XFS_DIR3_LEAF_CRC_OFF);
    244}
    245
    246const struct xfs_buf_ops xfs_dir3_leaf1_buf_ops = {
    247	.name = "xfs_dir3_leaf1",
    248	.magic16 = { cpu_to_be16(XFS_DIR2_LEAF1_MAGIC),
    249		     cpu_to_be16(XFS_DIR3_LEAF1_MAGIC) },
    250	.verify_read = xfs_dir3_leaf_read_verify,
    251	.verify_write = xfs_dir3_leaf_write_verify,
    252	.verify_struct = xfs_dir3_leaf_verify,
    253};
    254
    255const struct xfs_buf_ops xfs_dir3_leafn_buf_ops = {
    256	.name = "xfs_dir3_leafn",
    257	.magic16 = { cpu_to_be16(XFS_DIR2_LEAFN_MAGIC),
    258		     cpu_to_be16(XFS_DIR3_LEAFN_MAGIC) },
    259	.verify_read = xfs_dir3_leaf_read_verify,
    260	.verify_write = xfs_dir3_leaf_write_verify,
    261	.verify_struct = xfs_dir3_leaf_verify,
    262};
    263
    264int
    265xfs_dir3_leaf_read(
    266	struct xfs_trans	*tp,
    267	struct xfs_inode	*dp,
    268	xfs_dablk_t		fbno,
    269	struct xfs_buf		**bpp)
    270{
    271	int			err;
    272
    273	err = xfs_da_read_buf(tp, dp, fbno, 0, bpp, XFS_DATA_FORK,
    274			&xfs_dir3_leaf1_buf_ops);
    275	if (!err && tp && *bpp)
    276		xfs_trans_buf_set_type(tp, *bpp, XFS_BLFT_DIR_LEAF1_BUF);
    277	return err;
    278}
    279
    280int
    281xfs_dir3_leafn_read(
    282	struct xfs_trans	*tp,
    283	struct xfs_inode	*dp,
    284	xfs_dablk_t		fbno,
    285	struct xfs_buf		**bpp)
    286{
    287	int			err;
    288
    289	err = xfs_da_read_buf(tp, dp, fbno, 0, bpp, XFS_DATA_FORK,
    290			&xfs_dir3_leafn_buf_ops);
    291	if (!err && tp && *bpp)
    292		xfs_trans_buf_set_type(tp, *bpp, XFS_BLFT_DIR_LEAFN_BUF);
    293	return err;
    294}
    295
    296/*
    297 * Initialize a new leaf block, leaf1 or leafn magic accepted.
    298 */
    299static void
    300xfs_dir3_leaf_init(
    301	struct xfs_mount	*mp,
    302	struct xfs_trans	*tp,
    303	struct xfs_buf		*bp,
    304	xfs_ino_t		owner,
    305	uint16_t		type)
    306{
    307	struct xfs_dir2_leaf	*leaf = bp->b_addr;
    308
    309	ASSERT(type == XFS_DIR2_LEAF1_MAGIC || type == XFS_DIR2_LEAFN_MAGIC);
    310
    311	if (xfs_has_crc(mp)) {
    312		struct xfs_dir3_leaf_hdr *leaf3 = bp->b_addr;
    313
    314		memset(leaf3, 0, sizeof(*leaf3));
    315
    316		leaf3->info.hdr.magic = (type == XFS_DIR2_LEAF1_MAGIC)
    317					 ? cpu_to_be16(XFS_DIR3_LEAF1_MAGIC)
    318					 : cpu_to_be16(XFS_DIR3_LEAFN_MAGIC);
    319		leaf3->info.blkno = cpu_to_be64(xfs_buf_daddr(bp));
    320		leaf3->info.owner = cpu_to_be64(owner);
    321		uuid_copy(&leaf3->info.uuid, &mp->m_sb.sb_meta_uuid);
    322	} else {
    323		memset(leaf, 0, sizeof(*leaf));
    324		leaf->hdr.info.magic = cpu_to_be16(type);
    325	}
    326
    327	/*
    328	 * If it's a leaf-format directory initialize the tail.
    329	 * Caller is responsible for initialising the bests table.
    330	 */
    331	if (type == XFS_DIR2_LEAF1_MAGIC) {
    332		struct xfs_dir2_leaf_tail *ltp;
    333
    334		ltp = xfs_dir2_leaf_tail_p(mp->m_dir_geo, leaf);
    335		ltp->bestcount = 0;
    336		bp->b_ops = &xfs_dir3_leaf1_buf_ops;
    337		xfs_trans_buf_set_type(tp, bp, XFS_BLFT_DIR_LEAF1_BUF);
    338	} else {
    339		bp->b_ops = &xfs_dir3_leafn_buf_ops;
    340		xfs_trans_buf_set_type(tp, bp, XFS_BLFT_DIR_LEAFN_BUF);
    341	}
    342}
    343
    344int
    345xfs_dir3_leaf_get_buf(
    346	xfs_da_args_t		*args,
    347	xfs_dir2_db_t		bno,
    348	struct xfs_buf		**bpp,
    349	uint16_t		magic)
    350{
    351	struct xfs_inode	*dp = args->dp;
    352	struct xfs_trans	*tp = args->trans;
    353	struct xfs_mount	*mp = dp->i_mount;
    354	struct xfs_buf		*bp;
    355	int			error;
    356
    357	ASSERT(magic == XFS_DIR2_LEAF1_MAGIC || magic == XFS_DIR2_LEAFN_MAGIC);
    358	ASSERT(bno >= xfs_dir2_byte_to_db(args->geo, XFS_DIR2_LEAF_OFFSET) &&
    359	       bno < xfs_dir2_byte_to_db(args->geo, XFS_DIR2_FREE_OFFSET));
    360
    361	error = xfs_da_get_buf(tp, dp, xfs_dir2_db_to_da(args->geo, bno),
    362			       &bp, XFS_DATA_FORK);
    363	if (error)
    364		return error;
    365
    366	xfs_dir3_leaf_init(mp, tp, bp, dp->i_ino, magic);
    367	xfs_dir3_leaf_log_header(args, bp);
    368	if (magic == XFS_DIR2_LEAF1_MAGIC)
    369		xfs_dir3_leaf_log_tail(args, bp);
    370	*bpp = bp;
    371	return 0;
    372}
    373
    374/*
    375 * Convert a block form directory to a leaf form directory.
    376 */
    377int						/* error */
    378xfs_dir2_block_to_leaf(
    379	xfs_da_args_t		*args,		/* operation arguments */
    380	struct xfs_buf		*dbp)		/* input block's buffer */
    381{
    382	__be16			*bestsp;	/* leaf's bestsp entries */
    383	xfs_dablk_t		blkno;		/* leaf block's bno */
    384	xfs_dir2_data_hdr_t	*hdr;		/* block header */
    385	xfs_dir2_leaf_entry_t	*blp;		/* block's leaf entries */
    386	xfs_dir2_block_tail_t	*btp;		/* block's tail */
    387	xfs_inode_t		*dp;		/* incore directory inode */
    388	int			error;		/* error return code */
    389	struct xfs_buf		*lbp;		/* leaf block's buffer */
    390	xfs_dir2_db_t		ldb;		/* leaf block's bno */
    391	xfs_dir2_leaf_t		*leaf;		/* leaf structure */
    392	xfs_dir2_leaf_tail_t	*ltp;		/* leaf's tail */
    393	int			needlog;	/* need to log block header */
    394	int			needscan;	/* need to rescan bestfree */
    395	xfs_trans_t		*tp;		/* transaction pointer */
    396	struct xfs_dir2_data_free *bf;
    397	struct xfs_dir3_icleaf_hdr leafhdr;
    398
    399	trace_xfs_dir2_block_to_leaf(args);
    400
    401	dp = args->dp;
    402	tp = args->trans;
    403	/*
    404	 * Add the leaf block to the inode.
    405	 * This interface will only put blocks in the leaf/node range.
    406	 * Since that's empty now, we'll get the root (block 0 in range).
    407	 */
    408	if ((error = xfs_da_grow_inode(args, &blkno))) {
    409		return error;
    410	}
    411	ldb = xfs_dir2_da_to_db(args->geo, blkno);
    412	ASSERT(ldb == xfs_dir2_byte_to_db(args->geo, XFS_DIR2_LEAF_OFFSET));
    413	/*
    414	 * Initialize the leaf block, get a buffer for it.
    415	 */
    416	error = xfs_dir3_leaf_get_buf(args, ldb, &lbp, XFS_DIR2_LEAF1_MAGIC);
    417	if (error)
    418		return error;
    419
    420	leaf = lbp->b_addr;
    421	hdr = dbp->b_addr;
    422	xfs_dir3_data_check(dp, dbp);
    423	btp = xfs_dir2_block_tail_p(args->geo, hdr);
    424	blp = xfs_dir2_block_leaf_p(btp);
    425	bf = xfs_dir2_data_bestfree_p(dp->i_mount, hdr);
    426
    427	/*
    428	 * Set the counts in the leaf header.
    429	 */
    430	xfs_dir2_leaf_hdr_from_disk(dp->i_mount, &leafhdr, leaf);
    431	leafhdr.count = be32_to_cpu(btp->count);
    432	leafhdr.stale = be32_to_cpu(btp->stale);
    433	xfs_dir2_leaf_hdr_to_disk(dp->i_mount, leaf, &leafhdr);
    434	xfs_dir3_leaf_log_header(args, lbp);
    435
    436	/*
    437	 * Could compact these but I think we always do the conversion
    438	 * after squeezing out stale entries.
    439	 */
    440	memcpy(leafhdr.ents, blp,
    441		be32_to_cpu(btp->count) * sizeof(struct xfs_dir2_leaf_entry));
    442	xfs_dir3_leaf_log_ents(args, &leafhdr, lbp, 0, leafhdr.count - 1);
    443	needscan = 0;
    444	needlog = 1;
    445	/*
    446	 * Make the space formerly occupied by the leaf entries and block
    447	 * tail be free.
    448	 */
    449	xfs_dir2_data_make_free(args, dbp,
    450		(xfs_dir2_data_aoff_t)((char *)blp - (char *)hdr),
    451		(xfs_dir2_data_aoff_t)((char *)hdr + args->geo->blksize -
    452				       (char *)blp),
    453		&needlog, &needscan);
    454	/*
    455	 * Fix up the block header, make it a data block.
    456	 */
    457	dbp->b_ops = &xfs_dir3_data_buf_ops;
    458	xfs_trans_buf_set_type(tp, dbp, XFS_BLFT_DIR_DATA_BUF);
    459	if (hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC))
    460		hdr->magic = cpu_to_be32(XFS_DIR2_DATA_MAGIC);
    461	else
    462		hdr->magic = cpu_to_be32(XFS_DIR3_DATA_MAGIC);
    463
    464	if (needscan)
    465		xfs_dir2_data_freescan(dp->i_mount, hdr, &needlog);
    466	/*
    467	 * Set up leaf tail and bests table.
    468	 */
    469	ltp = xfs_dir2_leaf_tail_p(args->geo, leaf);
    470	ltp->bestcount = cpu_to_be32(1);
    471	bestsp = xfs_dir2_leaf_bests_p(ltp);
    472	bestsp[0] =  bf[0].length;
    473	/*
    474	 * Log the data header and leaf bests table.
    475	 */
    476	if (needlog)
    477		xfs_dir2_data_log_header(args, dbp);
    478	xfs_dir3_leaf_check(dp, lbp);
    479	xfs_dir3_data_check(dp, dbp);
    480	xfs_dir3_leaf_log_bests(args, lbp, 0, 0);
    481	return 0;
    482}
    483
    484STATIC void
    485xfs_dir3_leaf_find_stale(
    486	struct xfs_dir3_icleaf_hdr *leafhdr,
    487	struct xfs_dir2_leaf_entry *ents,
    488	int			index,
    489	int			*lowstale,
    490	int			*highstale)
    491{
    492	/*
    493	 * Find the first stale entry before our index, if any.
    494	 */
    495	for (*lowstale = index - 1; *lowstale >= 0; --*lowstale) {
    496		if (ents[*lowstale].address ==
    497		    cpu_to_be32(XFS_DIR2_NULL_DATAPTR))
    498			break;
    499	}
    500
    501	/*
    502	 * Find the first stale entry at or after our index, if any.
    503	 * Stop if the result would require moving more entries than using
    504	 * lowstale.
    505	 */
    506	for (*highstale = index; *highstale < leafhdr->count; ++*highstale) {
    507		if (ents[*highstale].address ==
    508		    cpu_to_be32(XFS_DIR2_NULL_DATAPTR))
    509			break;
    510		if (*lowstale >= 0 && index - *lowstale <= *highstale - index)
    511			break;
    512	}
    513}
    514
    515struct xfs_dir2_leaf_entry *
    516xfs_dir3_leaf_find_entry(
    517	struct xfs_dir3_icleaf_hdr *leafhdr,
    518	struct xfs_dir2_leaf_entry *ents,
    519	int			index,		/* leaf table position */
    520	int			compact,	/* need to compact leaves */
    521	int			lowstale,	/* index of prev stale leaf */
    522	int			highstale,	/* index of next stale leaf */
    523	int			*lfloglow,	/* low leaf logging index */
    524	int			*lfloghigh)	/* high leaf logging index */
    525{
    526	if (!leafhdr->stale) {
    527		xfs_dir2_leaf_entry_t	*lep;	/* leaf entry table pointer */
    528
    529		/*
    530		 * Now we need to make room to insert the leaf entry.
    531		 *
    532		 * If there are no stale entries, just insert a hole at index.
    533		 */
    534		lep = &ents[index];
    535		if (index < leafhdr->count)
    536			memmove(lep + 1, lep,
    537				(leafhdr->count - index) * sizeof(*lep));
    538
    539		/*
    540		 * Record low and high logging indices for the leaf.
    541		 */
    542		*lfloglow = index;
    543		*lfloghigh = leafhdr->count++;
    544		return lep;
    545	}
    546
    547	/*
    548	 * There are stale entries.
    549	 *
    550	 * We will use one of them for the new entry.  It's probably not at
    551	 * the right location, so we'll have to shift some up or down first.
    552	 *
    553	 * If we didn't compact before, we need to find the nearest stale
    554	 * entries before and after our insertion point.
    555	 */
    556	if (compact == 0)
    557		xfs_dir3_leaf_find_stale(leafhdr, ents, index,
    558					 &lowstale, &highstale);
    559
    560	/*
    561	 * If the low one is better, use it.
    562	 */
    563	if (lowstale >= 0 &&
    564	    (highstale == leafhdr->count ||
    565	     index - lowstale - 1 < highstale - index)) {
    566		ASSERT(index - lowstale - 1 >= 0);
    567		ASSERT(ents[lowstale].address ==
    568		       cpu_to_be32(XFS_DIR2_NULL_DATAPTR));
    569
    570		/*
    571		 * Copy entries up to cover the stale entry and make room
    572		 * for the new entry.
    573		 */
    574		if (index - lowstale - 1 > 0) {
    575			memmove(&ents[lowstale], &ents[lowstale + 1],
    576				(index - lowstale - 1) *
    577					sizeof(xfs_dir2_leaf_entry_t));
    578		}
    579		*lfloglow = min(lowstale, *lfloglow);
    580		*lfloghigh = max(index - 1, *lfloghigh);
    581		leafhdr->stale--;
    582		return &ents[index - 1];
    583	}
    584
    585	/*
    586	 * The high one is better, so use that one.
    587	 */
    588	ASSERT(highstale - index >= 0);
    589	ASSERT(ents[highstale].address == cpu_to_be32(XFS_DIR2_NULL_DATAPTR));
    590
    591	/*
    592	 * Copy entries down to cover the stale entry and make room for the
    593	 * new entry.
    594	 */
    595	if (highstale - index > 0) {
    596		memmove(&ents[index + 1], &ents[index],
    597			(highstale - index) * sizeof(xfs_dir2_leaf_entry_t));
    598	}
    599	*lfloglow = min(index, *lfloglow);
    600	*lfloghigh = max(highstale, *lfloghigh);
    601	leafhdr->stale--;
    602	return &ents[index];
    603}
    604
    605/*
    606 * Add an entry to a leaf form directory.
    607 */
    608int						/* error */
    609xfs_dir2_leaf_addname(
    610	struct xfs_da_args	*args)		/* operation arguments */
    611{
    612	struct xfs_dir3_icleaf_hdr leafhdr;
    613	struct xfs_trans	*tp = args->trans;
    614	__be16			*bestsp;	/* freespace table in leaf */
    615	__be16			*tagp;		/* end of data entry */
    616	struct xfs_buf		*dbp;		/* data block buffer */
    617	struct xfs_buf		*lbp;		/* leaf's buffer */
    618	struct xfs_dir2_leaf	*leaf;		/* leaf structure */
    619	struct xfs_inode	*dp = args->dp;	/* incore directory inode */
    620	struct xfs_dir2_data_hdr *hdr;		/* data block header */
    621	struct xfs_dir2_data_entry *dep;	/* data block entry */
    622	struct xfs_dir2_leaf_entry *lep;	/* leaf entry table pointer */
    623	struct xfs_dir2_leaf_entry *ents;
    624	struct xfs_dir2_data_unused *dup;	/* data unused entry */
    625	struct xfs_dir2_leaf_tail *ltp;		/* leaf tail pointer */
    626	struct xfs_dir2_data_free *bf;		/* bestfree table */
    627	int			compact;	/* need to compact leaves */
    628	int			error;		/* error return value */
    629	int			grown;		/* allocated new data block */
    630	int			highstale = 0;	/* index of next stale leaf */
    631	int			i;		/* temporary, index */
    632	int			index;		/* leaf table position */
    633	int			length;		/* length of new entry */
    634	int			lfloglow;	/* low leaf logging index */
    635	int			lfloghigh;	/* high leaf logging index */
    636	int			lowstale = 0;	/* index of prev stale leaf */
    637	int			needbytes;	/* leaf block bytes needed */
    638	int			needlog;	/* need to log data header */
    639	int			needscan;	/* need to rescan data free */
    640	xfs_dir2_db_t		use_block;	/* data block number */
    641
    642	trace_xfs_dir2_leaf_addname(args);
    643
    644	error = xfs_dir3_leaf_read(tp, dp, args->geo->leafblk, &lbp);
    645	if (error)
    646		return error;
    647
    648	/*
    649	 * Look up the entry by hash value and name.
    650	 * We know it's not there, our caller has already done a lookup.
    651	 * So the index is of the entry to insert in front of.
    652	 * But if there are dup hash values the index is of the first of those.
    653	 */
    654	index = xfs_dir2_leaf_search_hash(args, lbp);
    655	leaf = lbp->b_addr;
    656	ltp = xfs_dir2_leaf_tail_p(args->geo, leaf);
    657	xfs_dir2_leaf_hdr_from_disk(dp->i_mount, &leafhdr, leaf);
    658	ents = leafhdr.ents;
    659	bestsp = xfs_dir2_leaf_bests_p(ltp);
    660	length = xfs_dir2_data_entsize(dp->i_mount, args->namelen);
    661
    662	/*
    663	 * See if there are any entries with the same hash value
    664	 * and space in their block for the new entry.
    665	 * This is good because it puts multiple same-hash value entries
    666	 * in a data block, improving the lookup of those entries.
    667	 */
    668	for (use_block = -1, lep = &ents[index];
    669	     index < leafhdr.count && be32_to_cpu(lep->hashval) == args->hashval;
    670	     index++, lep++) {
    671		if (be32_to_cpu(lep->address) == XFS_DIR2_NULL_DATAPTR)
    672			continue;
    673		i = xfs_dir2_dataptr_to_db(args->geo, be32_to_cpu(lep->address));
    674		ASSERT(i < be32_to_cpu(ltp->bestcount));
    675		ASSERT(bestsp[i] != cpu_to_be16(NULLDATAOFF));
    676		if (be16_to_cpu(bestsp[i]) >= length) {
    677			use_block = i;
    678			break;
    679		}
    680	}
    681	/*
    682	 * Didn't find a block yet, linear search all the data blocks.
    683	 */
    684	if (use_block == -1) {
    685		for (i = 0; i < be32_to_cpu(ltp->bestcount); i++) {
    686			/*
    687			 * Remember a block we see that's missing.
    688			 */
    689			if (bestsp[i] == cpu_to_be16(NULLDATAOFF) &&
    690			    use_block == -1)
    691				use_block = i;
    692			else if (be16_to_cpu(bestsp[i]) >= length) {
    693				use_block = i;
    694				break;
    695			}
    696		}
    697	}
    698	/*
    699	 * How many bytes do we need in the leaf block?
    700	 */
    701	needbytes = 0;
    702	if (!leafhdr.stale)
    703		needbytes += sizeof(xfs_dir2_leaf_entry_t);
    704	if (use_block == -1)
    705		needbytes += sizeof(xfs_dir2_data_off_t);
    706
    707	/*
    708	 * Now kill use_block if it refers to a missing block, so we
    709	 * can use it as an indication of allocation needed.
    710	 */
    711	if (use_block != -1 && bestsp[use_block] == cpu_to_be16(NULLDATAOFF))
    712		use_block = -1;
    713	/*
    714	 * If we don't have enough free bytes but we can make enough
    715	 * by compacting out stale entries, we'll do that.
    716	 */
    717	if ((char *)bestsp - (char *)&ents[leafhdr.count] < needbytes &&
    718	    leafhdr.stale > 1)
    719		compact = 1;
    720
    721	/*
    722	 * Otherwise if we don't have enough free bytes we need to
    723	 * convert to node form.
    724	 */
    725	else if ((char *)bestsp - (char *)&ents[leafhdr.count] < needbytes) {
    726		/*
    727		 * Just checking or no space reservation, give up.
    728		 */
    729		if ((args->op_flags & XFS_DA_OP_JUSTCHECK) ||
    730							args->total == 0) {
    731			xfs_trans_brelse(tp, lbp);
    732			return -ENOSPC;
    733		}
    734		/*
    735		 * Convert to node form.
    736		 */
    737		error = xfs_dir2_leaf_to_node(args, lbp);
    738		if (error)
    739			return error;
    740		/*
    741		 * Then add the new entry.
    742		 */
    743		return xfs_dir2_node_addname(args);
    744	}
    745	/*
    746	 * Otherwise it will fit without compaction.
    747	 */
    748	else
    749		compact = 0;
    750	/*
    751	 * If just checking, then it will fit unless we needed to allocate
    752	 * a new data block.
    753	 */
    754	if (args->op_flags & XFS_DA_OP_JUSTCHECK) {
    755		xfs_trans_brelse(tp, lbp);
    756		return use_block == -1 ? -ENOSPC : 0;
    757	}
    758	/*
    759	 * If no allocations are allowed, return now before we've
    760	 * changed anything.
    761	 */
    762	if (args->total == 0 && use_block == -1) {
    763		xfs_trans_brelse(tp, lbp);
    764		return -ENOSPC;
    765	}
    766	/*
    767	 * Need to compact the leaf entries, removing stale ones.
    768	 * Leave one stale entry behind - the one closest to our
    769	 * insertion index - and we'll shift that one to our insertion
    770	 * point later.
    771	 */
    772	if (compact) {
    773		xfs_dir3_leaf_compact_x1(&leafhdr, ents, &index, &lowstale,
    774			&highstale, &lfloglow, &lfloghigh);
    775	}
    776	/*
    777	 * There are stale entries, so we'll need log-low and log-high
    778	 * impossibly bad values later.
    779	 */
    780	else if (leafhdr.stale) {
    781		lfloglow = leafhdr.count;
    782		lfloghigh = -1;
    783	}
    784	/*
    785	 * If there was no data block space found, we need to allocate
    786	 * a new one.
    787	 */
    788	if (use_block == -1) {
    789		/*
    790		 * Add the new data block.
    791		 */
    792		if ((error = xfs_dir2_grow_inode(args, XFS_DIR2_DATA_SPACE,
    793				&use_block))) {
    794			xfs_trans_brelse(tp, lbp);
    795			return error;
    796		}
    797		/*
    798		 * Initialize the block.
    799		 */
    800		if ((error = xfs_dir3_data_init(args, use_block, &dbp))) {
    801			xfs_trans_brelse(tp, lbp);
    802			return error;
    803		}
    804		/*
    805		 * If we're adding a new data block on the end we need to
    806		 * extend the bests table.  Copy it up one entry.
    807		 */
    808		if (use_block >= be32_to_cpu(ltp->bestcount)) {
    809			bestsp--;
    810			memmove(&bestsp[0], &bestsp[1],
    811				be32_to_cpu(ltp->bestcount) * sizeof(bestsp[0]));
    812			be32_add_cpu(&ltp->bestcount, 1);
    813			xfs_dir3_leaf_log_tail(args, lbp);
    814			xfs_dir3_leaf_log_bests(args, lbp, 0,
    815						be32_to_cpu(ltp->bestcount) - 1);
    816		}
    817		/*
    818		 * If we're filling in a previously empty block just log it.
    819		 */
    820		else
    821			xfs_dir3_leaf_log_bests(args, lbp, use_block, use_block);
    822		hdr = dbp->b_addr;
    823		bf = xfs_dir2_data_bestfree_p(dp->i_mount, hdr);
    824		bestsp[use_block] = bf[0].length;
    825		grown = 1;
    826	} else {
    827		/*
    828		 * Already had space in some data block.
    829		 * Just read that one in.
    830		 */
    831		error = xfs_dir3_data_read(tp, dp,
    832				   xfs_dir2_db_to_da(args->geo, use_block),
    833				   0, &dbp);
    834		if (error) {
    835			xfs_trans_brelse(tp, lbp);
    836			return error;
    837		}
    838		hdr = dbp->b_addr;
    839		bf = xfs_dir2_data_bestfree_p(dp->i_mount, hdr);
    840		grown = 0;
    841	}
    842	/*
    843	 * Point to the biggest freespace in our data block.
    844	 */
    845	dup = (xfs_dir2_data_unused_t *)
    846	      ((char *)hdr + be16_to_cpu(bf[0].offset));
    847	needscan = needlog = 0;
    848	/*
    849	 * Mark the initial part of our freespace in use for the new entry.
    850	 */
    851	error = xfs_dir2_data_use_free(args, dbp, dup,
    852			(xfs_dir2_data_aoff_t)((char *)dup - (char *)hdr),
    853			length, &needlog, &needscan);
    854	if (error) {
    855		xfs_trans_brelse(tp, lbp);
    856		return error;
    857	}
    858	/*
    859	 * Initialize our new entry (at last).
    860	 */
    861	dep = (xfs_dir2_data_entry_t *)dup;
    862	dep->inumber = cpu_to_be64(args->inumber);
    863	dep->namelen = args->namelen;
    864	memcpy(dep->name, args->name, dep->namelen);
    865	xfs_dir2_data_put_ftype(dp->i_mount, dep, args->filetype);
    866	tagp = xfs_dir2_data_entry_tag_p(dp->i_mount, dep);
    867	*tagp = cpu_to_be16((char *)dep - (char *)hdr);
    868	/*
    869	 * Need to scan fix up the bestfree table.
    870	 */
    871	if (needscan)
    872		xfs_dir2_data_freescan(dp->i_mount, hdr, &needlog);
    873	/*
    874	 * Need to log the data block's header.
    875	 */
    876	if (needlog)
    877		xfs_dir2_data_log_header(args, dbp);
    878	xfs_dir2_data_log_entry(args, dbp, dep);
    879	/*
    880	 * If the bests table needs to be changed, do it.
    881	 * Log the change unless we've already done that.
    882	 */
    883	if (be16_to_cpu(bestsp[use_block]) != be16_to_cpu(bf[0].length)) {
    884		bestsp[use_block] = bf[0].length;
    885		if (!grown)
    886			xfs_dir3_leaf_log_bests(args, lbp, use_block, use_block);
    887	}
    888
    889	lep = xfs_dir3_leaf_find_entry(&leafhdr, ents, index, compact, lowstale,
    890				       highstale, &lfloglow, &lfloghigh);
    891
    892	/*
    893	 * Fill in the new leaf entry.
    894	 */
    895	lep->hashval = cpu_to_be32(args->hashval);
    896	lep->address = cpu_to_be32(
    897				xfs_dir2_db_off_to_dataptr(args->geo, use_block,
    898				be16_to_cpu(*tagp)));
    899	/*
    900	 * Log the leaf fields and give up the buffers.
    901	 */
    902	xfs_dir2_leaf_hdr_to_disk(dp->i_mount, leaf, &leafhdr);
    903	xfs_dir3_leaf_log_header(args, lbp);
    904	xfs_dir3_leaf_log_ents(args, &leafhdr, lbp, lfloglow, lfloghigh);
    905	xfs_dir3_leaf_check(dp, lbp);
    906	xfs_dir3_data_check(dp, dbp);
    907	return 0;
    908}
    909
    910/*
    911 * Compact out any stale entries in the leaf.
    912 * Log the header and changed leaf entries, if any.
    913 */
    914void
    915xfs_dir3_leaf_compact(
    916	xfs_da_args_t	*args,		/* operation arguments */
    917	struct xfs_dir3_icleaf_hdr *leafhdr,
    918	struct xfs_buf	*bp)		/* leaf buffer */
    919{
    920	int		from;		/* source leaf index */
    921	xfs_dir2_leaf_t	*leaf;		/* leaf structure */
    922	int		loglow;		/* first leaf entry to log */
    923	int		to;		/* target leaf index */
    924	struct xfs_inode *dp = args->dp;
    925
    926	leaf = bp->b_addr;
    927	if (!leafhdr->stale)
    928		return;
    929
    930	/*
    931	 * Compress out the stale entries in place.
    932	 */
    933	for (from = to = 0, loglow = -1; from < leafhdr->count; from++) {
    934		if (leafhdr->ents[from].address ==
    935		    cpu_to_be32(XFS_DIR2_NULL_DATAPTR))
    936			continue;
    937		/*
    938		 * Only actually copy the entries that are different.
    939		 */
    940		if (from > to) {
    941			if (loglow == -1)
    942				loglow = to;
    943			leafhdr->ents[to] = leafhdr->ents[from];
    944		}
    945		to++;
    946	}
    947	/*
    948	 * Update and log the header, log the leaf entries.
    949	 */
    950	ASSERT(leafhdr->stale == from - to);
    951	leafhdr->count -= leafhdr->stale;
    952	leafhdr->stale = 0;
    953
    954	xfs_dir2_leaf_hdr_to_disk(dp->i_mount, leaf, leafhdr);
    955	xfs_dir3_leaf_log_header(args, bp);
    956	if (loglow != -1)
    957		xfs_dir3_leaf_log_ents(args, leafhdr, bp, loglow, to - 1);
    958}
    959
    960/*
    961 * Compact the leaf entries, removing stale ones.
    962 * Leave one stale entry behind - the one closest to our
    963 * insertion index - and the caller will shift that one to our insertion
    964 * point later.
    965 * Return new insertion index, where the remaining stale entry is,
    966 * and leaf logging indices.
    967 */
    968void
    969xfs_dir3_leaf_compact_x1(
    970	struct xfs_dir3_icleaf_hdr *leafhdr,
    971	struct xfs_dir2_leaf_entry *ents,
    972	int		*indexp,	/* insertion index */
    973	int		*lowstalep,	/* out: stale entry before us */
    974	int		*highstalep,	/* out: stale entry after us */
    975	int		*lowlogp,	/* out: low log index */
    976	int		*highlogp)	/* out: high log index */
    977{
    978	int		from;		/* source copy index */
    979	int		highstale;	/* stale entry at/after index */
    980	int		index;		/* insertion index */
    981	int		keepstale;	/* source index of kept stale */
    982	int		lowstale;	/* stale entry before index */
    983	int		newindex=0;	/* new insertion index */
    984	int		to;		/* destination copy index */
    985
    986	ASSERT(leafhdr->stale > 1);
    987	index = *indexp;
    988
    989	xfs_dir3_leaf_find_stale(leafhdr, ents, index, &lowstale, &highstale);
    990
    991	/*
    992	 * Pick the better of lowstale and highstale.
    993	 */
    994	if (lowstale >= 0 &&
    995	    (highstale == leafhdr->count ||
    996	     index - lowstale <= highstale - index))
    997		keepstale = lowstale;
    998	else
    999		keepstale = highstale;
   1000	/*
   1001	 * Copy the entries in place, removing all the stale entries
   1002	 * except keepstale.
   1003	 */
   1004	for (from = to = 0; from < leafhdr->count; from++) {
   1005		/*
   1006		 * Notice the new value of index.
   1007		 */
   1008		if (index == from)
   1009			newindex = to;
   1010		if (from != keepstale &&
   1011		    ents[from].address == cpu_to_be32(XFS_DIR2_NULL_DATAPTR)) {
   1012			if (from == to)
   1013				*lowlogp = to;
   1014			continue;
   1015		}
   1016		/*
   1017		 * Record the new keepstale value for the insertion.
   1018		 */
   1019		if (from == keepstale)
   1020			lowstale = highstale = to;
   1021		/*
   1022		 * Copy only the entries that have moved.
   1023		 */
   1024		if (from > to)
   1025			ents[to] = ents[from];
   1026		to++;
   1027	}
   1028	ASSERT(from > to);
   1029	/*
   1030	 * If the insertion point was past the last entry,
   1031	 * set the new insertion point accordingly.
   1032	 */
   1033	if (index == from)
   1034		newindex = to;
   1035	*indexp = newindex;
   1036	/*
   1037	 * Adjust the leaf header values.
   1038	 */
   1039	leafhdr->count -= from - to;
   1040	leafhdr->stale = 1;
   1041	/*
   1042	 * Remember the low/high stale value only in the "right"
   1043	 * direction.
   1044	 */
   1045	if (lowstale >= newindex)
   1046		lowstale = -1;
   1047	else
   1048		highstale = leafhdr->count;
   1049	*highlogp = leafhdr->count - 1;
   1050	*lowstalep = lowstale;
   1051	*highstalep = highstale;
   1052}
   1053
   1054/*
   1055 * Log the bests entries indicated from a leaf1 block.
   1056 */
   1057static void
   1058xfs_dir3_leaf_log_bests(
   1059	struct xfs_da_args	*args,
   1060	struct xfs_buf		*bp,		/* leaf buffer */
   1061	int			first,		/* first entry to log */
   1062	int			last)		/* last entry to log */
   1063{
   1064	__be16			*firstb;	/* pointer to first entry */
   1065	__be16			*lastb;		/* pointer to last entry */
   1066	struct xfs_dir2_leaf	*leaf = bp->b_addr;
   1067	xfs_dir2_leaf_tail_t	*ltp;		/* leaf tail structure */
   1068
   1069	ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAF1_MAGIC) ||
   1070	       leaf->hdr.info.magic == cpu_to_be16(XFS_DIR3_LEAF1_MAGIC));
   1071
   1072	ltp = xfs_dir2_leaf_tail_p(args->geo, leaf);
   1073	firstb = xfs_dir2_leaf_bests_p(ltp) + first;
   1074	lastb = xfs_dir2_leaf_bests_p(ltp) + last;
   1075	xfs_trans_log_buf(args->trans, bp,
   1076		(uint)((char *)firstb - (char *)leaf),
   1077		(uint)((char *)lastb - (char *)leaf + sizeof(*lastb) - 1));
   1078}
   1079
   1080/*
   1081 * Log the leaf entries indicated from a leaf1 or leafn block.
   1082 */
   1083void
   1084xfs_dir3_leaf_log_ents(
   1085	struct xfs_da_args	*args,
   1086	struct xfs_dir3_icleaf_hdr *hdr,
   1087	struct xfs_buf		*bp,
   1088	int			first,
   1089	int			last)
   1090{
   1091	xfs_dir2_leaf_entry_t	*firstlep;	/* pointer to first entry */
   1092	xfs_dir2_leaf_entry_t	*lastlep;	/* pointer to last entry */
   1093	struct xfs_dir2_leaf	*leaf = bp->b_addr;
   1094
   1095	ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAF1_MAGIC) ||
   1096	       leaf->hdr.info.magic == cpu_to_be16(XFS_DIR3_LEAF1_MAGIC) ||
   1097	       leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC) ||
   1098	       leaf->hdr.info.magic == cpu_to_be16(XFS_DIR3_LEAFN_MAGIC));
   1099
   1100	firstlep = &hdr->ents[first];
   1101	lastlep = &hdr->ents[last];
   1102	xfs_trans_log_buf(args->trans, bp,
   1103		(uint)((char *)firstlep - (char *)leaf),
   1104		(uint)((char *)lastlep - (char *)leaf + sizeof(*lastlep) - 1));
   1105}
   1106
   1107/*
   1108 * Log the header of the leaf1 or leafn block.
   1109 */
   1110void
   1111xfs_dir3_leaf_log_header(
   1112	struct xfs_da_args	*args,
   1113	struct xfs_buf		*bp)
   1114{
   1115	struct xfs_dir2_leaf	*leaf = bp->b_addr;
   1116
   1117	ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAF1_MAGIC) ||
   1118	       leaf->hdr.info.magic == cpu_to_be16(XFS_DIR3_LEAF1_MAGIC) ||
   1119	       leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC) ||
   1120	       leaf->hdr.info.magic == cpu_to_be16(XFS_DIR3_LEAFN_MAGIC));
   1121
   1122	xfs_trans_log_buf(args->trans, bp,
   1123			  (uint)((char *)&leaf->hdr - (char *)leaf),
   1124			  args->geo->leaf_hdr_size - 1);
   1125}
   1126
   1127/*
   1128 * Log the tail of the leaf1 block.
   1129 */
   1130STATIC void
   1131xfs_dir3_leaf_log_tail(
   1132	struct xfs_da_args	*args,
   1133	struct xfs_buf		*bp)
   1134{
   1135	struct xfs_dir2_leaf	*leaf = bp->b_addr;
   1136	xfs_dir2_leaf_tail_t	*ltp;		/* leaf tail structure */
   1137
   1138	ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAF1_MAGIC) ||
   1139	       leaf->hdr.info.magic == cpu_to_be16(XFS_DIR3_LEAF1_MAGIC) ||
   1140	       leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC) ||
   1141	       leaf->hdr.info.magic == cpu_to_be16(XFS_DIR3_LEAFN_MAGIC));
   1142
   1143	ltp = xfs_dir2_leaf_tail_p(args->geo, leaf);
   1144	xfs_trans_log_buf(args->trans, bp, (uint)((char *)ltp - (char *)leaf),
   1145		(uint)(args->geo->blksize - 1));
   1146}
   1147
   1148/*
   1149 * Look up the entry referred to by args in the leaf format directory.
   1150 * Most of the work is done by the xfs_dir2_leaf_lookup_int routine which
   1151 * is also used by the node-format code.
   1152 */
   1153int
   1154xfs_dir2_leaf_lookup(
   1155	xfs_da_args_t		*args)		/* operation arguments */
   1156{
   1157	struct xfs_buf		*dbp;		/* data block buffer */
   1158	xfs_dir2_data_entry_t	*dep;		/* data block entry */
   1159	xfs_inode_t		*dp;		/* incore directory inode */
   1160	int			error;		/* error return code */
   1161	int			index;		/* found entry index */
   1162	struct xfs_buf		*lbp;		/* leaf buffer */
   1163	xfs_dir2_leaf_entry_t	*lep;		/* leaf entry */
   1164	xfs_trans_t		*tp;		/* transaction pointer */
   1165	struct xfs_dir3_icleaf_hdr leafhdr;
   1166
   1167	trace_xfs_dir2_leaf_lookup(args);
   1168
   1169	/*
   1170	 * Look up name in the leaf block, returning both buffers and index.
   1171	 */
   1172	error = xfs_dir2_leaf_lookup_int(args, &lbp, &index, &dbp, &leafhdr);
   1173	if (error)
   1174		return error;
   1175
   1176	tp = args->trans;
   1177	dp = args->dp;
   1178	xfs_dir3_leaf_check(dp, lbp);
   1179
   1180	/*
   1181	 * Get to the leaf entry and contained data entry address.
   1182	 */
   1183	lep = &leafhdr.ents[index];
   1184
   1185	/*
   1186	 * Point to the data entry.
   1187	 */
   1188	dep = (xfs_dir2_data_entry_t *)
   1189	      ((char *)dbp->b_addr +
   1190	       xfs_dir2_dataptr_to_off(args->geo, be32_to_cpu(lep->address)));
   1191	/*
   1192	 * Return the found inode number & CI name if appropriate
   1193	 */
   1194	args->inumber = be64_to_cpu(dep->inumber);
   1195	args->filetype = xfs_dir2_data_get_ftype(dp->i_mount, dep);
   1196	error = xfs_dir_cilookup_result(args, dep->name, dep->namelen);
   1197	xfs_trans_brelse(tp, dbp);
   1198	xfs_trans_brelse(tp, lbp);
   1199	return error;
   1200}
   1201
   1202/*
   1203 * Look up name/hash in the leaf block.
   1204 * Fill in indexp with the found index, and dbpp with the data buffer.
   1205 * If not found dbpp will be NULL, and ENOENT comes back.
   1206 * lbpp will always be filled in with the leaf buffer unless there's an error.
   1207 */
   1208static int					/* error */
   1209xfs_dir2_leaf_lookup_int(
   1210	xfs_da_args_t		*args,		/* operation arguments */
   1211	struct xfs_buf		**lbpp,		/* out: leaf buffer */
   1212	int			*indexp,	/* out: index in leaf block */
   1213	struct xfs_buf		**dbpp,		/* out: data buffer */
   1214	struct xfs_dir3_icleaf_hdr *leafhdr)
   1215{
   1216	xfs_dir2_db_t		curdb = -1;	/* current data block number */
   1217	struct xfs_buf		*dbp = NULL;	/* data buffer */
   1218	xfs_dir2_data_entry_t	*dep;		/* data entry */
   1219	xfs_inode_t		*dp;		/* incore directory inode */
   1220	int			error;		/* error return code */
   1221	int			index;		/* index in leaf block */
   1222	struct xfs_buf		*lbp;		/* leaf buffer */
   1223	xfs_dir2_leaf_entry_t	*lep;		/* leaf entry */
   1224	xfs_dir2_leaf_t		*leaf;		/* leaf structure */
   1225	xfs_mount_t		*mp;		/* filesystem mount point */
   1226	xfs_dir2_db_t		newdb;		/* new data block number */
   1227	xfs_trans_t		*tp;		/* transaction pointer */
   1228	xfs_dir2_db_t		cidb = -1;	/* case match data block no. */
   1229	enum xfs_dacmp		cmp;		/* name compare result */
   1230
   1231	dp = args->dp;
   1232	tp = args->trans;
   1233	mp = dp->i_mount;
   1234
   1235	error = xfs_dir3_leaf_read(tp, dp, args->geo->leafblk, &lbp);
   1236	if (error)
   1237		return error;
   1238
   1239	*lbpp = lbp;
   1240	leaf = lbp->b_addr;
   1241	xfs_dir3_leaf_check(dp, lbp);
   1242	xfs_dir2_leaf_hdr_from_disk(mp, leafhdr, leaf);
   1243
   1244	/*
   1245	 * Look for the first leaf entry with our hash value.
   1246	 */
   1247	index = xfs_dir2_leaf_search_hash(args, lbp);
   1248	/*
   1249	 * Loop over all the entries with the right hash value
   1250	 * looking to match the name.
   1251	 */
   1252	for (lep = &leafhdr->ents[index];
   1253	     index < leafhdr->count &&
   1254			be32_to_cpu(lep->hashval) == args->hashval;
   1255	     lep++, index++) {
   1256		/*
   1257		 * Skip over stale leaf entries.
   1258		 */
   1259		if (be32_to_cpu(lep->address) == XFS_DIR2_NULL_DATAPTR)
   1260			continue;
   1261		/*
   1262		 * Get the new data block number.
   1263		 */
   1264		newdb = xfs_dir2_dataptr_to_db(args->geo,
   1265					       be32_to_cpu(lep->address));
   1266		/*
   1267		 * If it's not the same as the old data block number,
   1268		 * need to pitch the old one and read the new one.
   1269		 */
   1270		if (newdb != curdb) {
   1271			if (dbp)
   1272				xfs_trans_brelse(tp, dbp);
   1273			error = xfs_dir3_data_read(tp, dp,
   1274					   xfs_dir2_db_to_da(args->geo, newdb),
   1275					   0, &dbp);
   1276			if (error) {
   1277				xfs_trans_brelse(tp, lbp);
   1278				return error;
   1279			}
   1280			curdb = newdb;
   1281		}
   1282		/*
   1283		 * Point to the data entry.
   1284		 */
   1285		dep = (xfs_dir2_data_entry_t *)((char *)dbp->b_addr +
   1286			xfs_dir2_dataptr_to_off(args->geo,
   1287						be32_to_cpu(lep->address)));
   1288		/*
   1289		 * Compare name and if it's an exact match, return the index
   1290		 * and buffer. If it's the first case-insensitive match, store
   1291		 * the index and buffer and continue looking for an exact match.
   1292		 */
   1293		cmp = xfs_dir2_compname(args, dep->name, dep->namelen);
   1294		if (cmp != XFS_CMP_DIFFERENT && cmp != args->cmpresult) {
   1295			args->cmpresult = cmp;
   1296			*indexp = index;
   1297			/* case exact match: return the current buffer. */
   1298			if (cmp == XFS_CMP_EXACT) {
   1299				*dbpp = dbp;
   1300				return 0;
   1301			}
   1302			cidb = curdb;
   1303		}
   1304	}
   1305	ASSERT(args->op_flags & XFS_DA_OP_OKNOENT);
   1306	/*
   1307	 * Here, we can only be doing a lookup (not a rename or remove).
   1308	 * If a case-insensitive match was found earlier, re-read the
   1309	 * appropriate data block if required and return it.
   1310	 */
   1311	if (args->cmpresult == XFS_CMP_CASE) {
   1312		ASSERT(cidb != -1);
   1313		if (cidb != curdb) {
   1314			xfs_trans_brelse(tp, dbp);
   1315			error = xfs_dir3_data_read(tp, dp,
   1316					   xfs_dir2_db_to_da(args->geo, cidb),
   1317					   0, &dbp);
   1318			if (error) {
   1319				xfs_trans_brelse(tp, lbp);
   1320				return error;
   1321			}
   1322		}
   1323		*dbpp = dbp;
   1324		return 0;
   1325	}
   1326	/*
   1327	 * No match found, return -ENOENT.
   1328	 */
   1329	ASSERT(cidb == -1);
   1330	if (dbp)
   1331		xfs_trans_brelse(tp, dbp);
   1332	xfs_trans_brelse(tp, lbp);
   1333	return -ENOENT;
   1334}
   1335
   1336/*
   1337 * Remove an entry from a leaf format directory.
   1338 */
   1339int						/* error */
   1340xfs_dir2_leaf_removename(
   1341	xfs_da_args_t		*args)		/* operation arguments */
   1342{
   1343	struct xfs_da_geometry	*geo = args->geo;
   1344	__be16			*bestsp;	/* leaf block best freespace */
   1345	xfs_dir2_data_hdr_t	*hdr;		/* data block header */
   1346	xfs_dir2_db_t		db;		/* data block number */
   1347	struct xfs_buf		*dbp;		/* data block buffer */
   1348	xfs_dir2_data_entry_t	*dep;		/* data entry structure */
   1349	xfs_inode_t		*dp;		/* incore directory inode */
   1350	int			error;		/* error return code */
   1351	xfs_dir2_db_t		i;		/* temporary data block # */
   1352	int			index;		/* index into leaf entries */
   1353	struct xfs_buf		*lbp;		/* leaf buffer */
   1354	xfs_dir2_leaf_t		*leaf;		/* leaf structure */
   1355	xfs_dir2_leaf_entry_t	*lep;		/* leaf entry */
   1356	xfs_dir2_leaf_tail_t	*ltp;		/* leaf tail structure */
   1357	int			needlog;	/* need to log data header */
   1358	int			needscan;	/* need to rescan data frees */
   1359	xfs_dir2_data_off_t	oldbest;	/* old value of best free */
   1360	struct xfs_dir2_data_free *bf;		/* bestfree table */
   1361	struct xfs_dir3_icleaf_hdr leafhdr;
   1362
   1363	trace_xfs_dir2_leaf_removename(args);
   1364
   1365	/*
   1366	 * Lookup the leaf entry, get the leaf and data blocks read in.
   1367	 */
   1368	error = xfs_dir2_leaf_lookup_int(args, &lbp, &index, &dbp, &leafhdr);
   1369	if (error)
   1370		return error;
   1371
   1372	dp = args->dp;
   1373	leaf = lbp->b_addr;
   1374	hdr = dbp->b_addr;
   1375	xfs_dir3_data_check(dp, dbp);
   1376	bf = xfs_dir2_data_bestfree_p(dp->i_mount, hdr);
   1377
   1378	/*
   1379	 * Point to the leaf entry, use that to point to the data entry.
   1380	 */
   1381	lep = &leafhdr.ents[index];
   1382	db = xfs_dir2_dataptr_to_db(geo, be32_to_cpu(lep->address));
   1383	dep = (xfs_dir2_data_entry_t *)((char *)hdr +
   1384		xfs_dir2_dataptr_to_off(geo, be32_to_cpu(lep->address)));
   1385	needscan = needlog = 0;
   1386	oldbest = be16_to_cpu(bf[0].length);
   1387	ltp = xfs_dir2_leaf_tail_p(geo, leaf);
   1388	bestsp = xfs_dir2_leaf_bests_p(ltp);
   1389	if (be16_to_cpu(bestsp[db]) != oldbest) {
   1390		xfs_buf_mark_corrupt(lbp);
   1391		return -EFSCORRUPTED;
   1392	}
   1393	/*
   1394	 * Mark the former data entry unused.
   1395	 */
   1396	xfs_dir2_data_make_free(args, dbp,
   1397		(xfs_dir2_data_aoff_t)((char *)dep - (char *)hdr),
   1398		xfs_dir2_data_entsize(dp->i_mount, dep->namelen), &needlog,
   1399		&needscan);
   1400	/*
   1401	 * We just mark the leaf entry stale by putting a null in it.
   1402	 */
   1403	leafhdr.stale++;
   1404	xfs_dir2_leaf_hdr_to_disk(dp->i_mount, leaf, &leafhdr);
   1405	xfs_dir3_leaf_log_header(args, lbp);
   1406
   1407	lep->address = cpu_to_be32(XFS_DIR2_NULL_DATAPTR);
   1408	xfs_dir3_leaf_log_ents(args, &leafhdr, lbp, index, index);
   1409
   1410	/*
   1411	 * Scan the freespace in the data block again if necessary,
   1412	 * log the data block header if necessary.
   1413	 */
   1414	if (needscan)
   1415		xfs_dir2_data_freescan(dp->i_mount, hdr, &needlog);
   1416	if (needlog)
   1417		xfs_dir2_data_log_header(args, dbp);
   1418	/*
   1419	 * If the longest freespace in the data block has changed,
   1420	 * put the new value in the bests table and log that.
   1421	 */
   1422	if (be16_to_cpu(bf[0].length) != oldbest) {
   1423		bestsp[db] = bf[0].length;
   1424		xfs_dir3_leaf_log_bests(args, lbp, db, db);
   1425	}
   1426	xfs_dir3_data_check(dp, dbp);
   1427	/*
   1428	 * If the data block is now empty then get rid of the data block.
   1429	 */
   1430	if (be16_to_cpu(bf[0].length) ==
   1431	    geo->blksize - geo->data_entry_offset) {
   1432		ASSERT(db != geo->datablk);
   1433		if ((error = xfs_dir2_shrink_inode(args, db, dbp))) {
   1434			/*
   1435			 * Nope, can't get rid of it because it caused
   1436			 * allocation of a bmap btree block to do so.
   1437			 * Just go on, returning success, leaving the
   1438			 * empty block in place.
   1439			 */
   1440			if (error == -ENOSPC && args->total == 0)
   1441				error = 0;
   1442			xfs_dir3_leaf_check(dp, lbp);
   1443			return error;
   1444		}
   1445		dbp = NULL;
   1446		/*
   1447		 * If this is the last data block then compact the
   1448		 * bests table by getting rid of entries.
   1449		 */
   1450		if (db == be32_to_cpu(ltp->bestcount) - 1) {
   1451			/*
   1452			 * Look for the last active entry (i).
   1453			 */
   1454			for (i = db - 1; i > 0; i--) {
   1455				if (bestsp[i] != cpu_to_be16(NULLDATAOFF))
   1456					break;
   1457			}
   1458			/*
   1459			 * Copy the table down so inactive entries at the
   1460			 * end are removed.
   1461			 */
   1462			memmove(&bestsp[db - i], bestsp,
   1463				(be32_to_cpu(ltp->bestcount) - (db - i)) * sizeof(*bestsp));
   1464			be32_add_cpu(&ltp->bestcount, -(db - i));
   1465			xfs_dir3_leaf_log_tail(args, lbp);
   1466			xfs_dir3_leaf_log_bests(args, lbp, 0,
   1467						be32_to_cpu(ltp->bestcount) - 1);
   1468		} else
   1469			bestsp[db] = cpu_to_be16(NULLDATAOFF);
   1470	}
   1471	/*
   1472	 * If the data block was not the first one, drop it.
   1473	 */
   1474	else if (db != geo->datablk)
   1475		dbp = NULL;
   1476
   1477	xfs_dir3_leaf_check(dp, lbp);
   1478	/*
   1479	 * See if we can convert to block form.
   1480	 */
   1481	return xfs_dir2_leaf_to_block(args, lbp, dbp);
   1482}
   1483
   1484/*
   1485 * Replace the inode number in a leaf format directory entry.
   1486 */
   1487int						/* error */
   1488xfs_dir2_leaf_replace(
   1489	xfs_da_args_t		*args)		/* operation arguments */
   1490{
   1491	struct xfs_buf		*dbp;		/* data block buffer */
   1492	xfs_dir2_data_entry_t	*dep;		/* data block entry */
   1493	xfs_inode_t		*dp;		/* incore directory inode */
   1494	int			error;		/* error return code */
   1495	int			index;		/* index of leaf entry */
   1496	struct xfs_buf		*lbp;		/* leaf buffer */
   1497	xfs_dir2_leaf_entry_t	*lep;		/* leaf entry */
   1498	xfs_trans_t		*tp;		/* transaction pointer */
   1499	struct xfs_dir3_icleaf_hdr leafhdr;
   1500
   1501	trace_xfs_dir2_leaf_replace(args);
   1502
   1503	/*
   1504	 * Look up the entry.
   1505	 */
   1506	error = xfs_dir2_leaf_lookup_int(args, &lbp, &index, &dbp, &leafhdr);
   1507	if (error)
   1508		return error;
   1509
   1510	dp = args->dp;
   1511	/*
   1512	 * Point to the leaf entry, get data address from it.
   1513	 */
   1514	lep = &leafhdr.ents[index];
   1515	/*
   1516	 * Point to the data entry.
   1517	 */
   1518	dep = (xfs_dir2_data_entry_t *)
   1519	      ((char *)dbp->b_addr +
   1520	       xfs_dir2_dataptr_to_off(args->geo, be32_to_cpu(lep->address)));
   1521	ASSERT(args->inumber != be64_to_cpu(dep->inumber));
   1522	/*
   1523	 * Put the new inode number in, log it.
   1524	 */
   1525	dep->inumber = cpu_to_be64(args->inumber);
   1526	xfs_dir2_data_put_ftype(dp->i_mount, dep, args->filetype);
   1527	tp = args->trans;
   1528	xfs_dir2_data_log_entry(args, dbp, dep);
   1529	xfs_dir3_leaf_check(dp, lbp);
   1530	xfs_trans_brelse(tp, lbp);
   1531	return 0;
   1532}
   1533
   1534/*
   1535 * Return index in the leaf block (lbp) which is either the first
   1536 * one with this hash value, or if there are none, the insert point
   1537 * for that hash value.
   1538 */
   1539int						/* index value */
   1540xfs_dir2_leaf_search_hash(
   1541	xfs_da_args_t		*args,		/* operation arguments */
   1542	struct xfs_buf		*lbp)		/* leaf buffer */
   1543{
   1544	xfs_dahash_t		hash=0;		/* hash from this entry */
   1545	xfs_dahash_t		hashwant;	/* hash value looking for */
   1546	int			high;		/* high leaf index */
   1547	int			low;		/* low leaf index */
   1548	xfs_dir2_leaf_entry_t	*lep;		/* leaf entry */
   1549	int			mid=0;		/* current leaf index */
   1550	struct xfs_dir3_icleaf_hdr leafhdr;
   1551
   1552	xfs_dir2_leaf_hdr_from_disk(args->dp->i_mount, &leafhdr, lbp->b_addr);
   1553
   1554	/*
   1555	 * Note, the table cannot be empty, so we have to go through the loop.
   1556	 * Binary search the leaf entries looking for our hash value.
   1557	 */
   1558	for (lep = leafhdr.ents, low = 0, high = leafhdr.count - 1,
   1559		hashwant = args->hashval;
   1560	     low <= high; ) {
   1561		mid = (low + high) >> 1;
   1562		if ((hash = be32_to_cpu(lep[mid].hashval)) == hashwant)
   1563			break;
   1564		if (hash < hashwant)
   1565			low = mid + 1;
   1566		else
   1567			high = mid - 1;
   1568	}
   1569	/*
   1570	 * Found one, back up through all the equal hash values.
   1571	 */
   1572	if (hash == hashwant) {
   1573		while (mid > 0 && be32_to_cpu(lep[mid - 1].hashval) == hashwant) {
   1574			mid--;
   1575		}
   1576	}
   1577	/*
   1578	 * Need to point to an entry higher than ours.
   1579	 */
   1580	else if (hash < hashwant)
   1581		mid++;
   1582	return mid;
   1583}
   1584
   1585/*
   1586 * Trim off a trailing data block.  We know it's empty since the leaf
   1587 * freespace table says so.
   1588 */
   1589int						/* error */
   1590xfs_dir2_leaf_trim_data(
   1591	xfs_da_args_t		*args,		/* operation arguments */
   1592	struct xfs_buf		*lbp,		/* leaf buffer */
   1593	xfs_dir2_db_t		db)		/* data block number */
   1594{
   1595	struct xfs_da_geometry	*geo = args->geo;
   1596	__be16			*bestsp;	/* leaf bests table */
   1597	struct xfs_buf		*dbp;		/* data block buffer */
   1598	xfs_inode_t		*dp;		/* incore directory inode */
   1599	int			error;		/* error return value */
   1600	xfs_dir2_leaf_t		*leaf;		/* leaf structure */
   1601	xfs_dir2_leaf_tail_t	*ltp;		/* leaf tail structure */
   1602	xfs_trans_t		*tp;		/* transaction pointer */
   1603
   1604	dp = args->dp;
   1605	tp = args->trans;
   1606	/*
   1607	 * Read the offending data block.  We need its buffer.
   1608	 */
   1609	error = xfs_dir3_data_read(tp, dp, xfs_dir2_db_to_da(geo, db), 0, &dbp);
   1610	if (error)
   1611		return error;
   1612
   1613	leaf = lbp->b_addr;
   1614	ltp = xfs_dir2_leaf_tail_p(geo, leaf);
   1615
   1616#ifdef DEBUG
   1617{
   1618	struct xfs_dir2_data_hdr *hdr = dbp->b_addr;
   1619	struct xfs_dir2_data_free *bf =
   1620		xfs_dir2_data_bestfree_p(dp->i_mount, hdr);
   1621
   1622	ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC) ||
   1623	       hdr->magic == cpu_to_be32(XFS_DIR3_DATA_MAGIC));
   1624	ASSERT(be16_to_cpu(bf[0].length) ==
   1625	       geo->blksize - geo->data_entry_offset);
   1626	ASSERT(db == be32_to_cpu(ltp->bestcount) - 1);
   1627}
   1628#endif
   1629
   1630	/*
   1631	 * Get rid of the data block.
   1632	 */
   1633	if ((error = xfs_dir2_shrink_inode(args, db, dbp))) {
   1634		ASSERT(error != -ENOSPC);
   1635		xfs_trans_brelse(tp, dbp);
   1636		return error;
   1637	}
   1638	/*
   1639	 * Eliminate the last bests entry from the table.
   1640	 */
   1641	bestsp = xfs_dir2_leaf_bests_p(ltp);
   1642	be32_add_cpu(&ltp->bestcount, -1);
   1643	memmove(&bestsp[1], &bestsp[0], be32_to_cpu(ltp->bestcount) * sizeof(*bestsp));
   1644	xfs_dir3_leaf_log_tail(args, lbp);
   1645	xfs_dir3_leaf_log_bests(args, lbp, 0, be32_to_cpu(ltp->bestcount) - 1);
   1646	return 0;
   1647}
   1648
   1649static inline size_t
   1650xfs_dir3_leaf_size(
   1651	struct xfs_dir3_icleaf_hdr	*hdr,
   1652	int				counts)
   1653{
   1654	int	entries;
   1655	int	hdrsize;
   1656
   1657	entries = hdr->count - hdr->stale;
   1658	if (hdr->magic == XFS_DIR2_LEAF1_MAGIC ||
   1659	    hdr->magic == XFS_DIR2_LEAFN_MAGIC)
   1660		hdrsize = sizeof(struct xfs_dir2_leaf_hdr);
   1661	else
   1662		hdrsize = sizeof(struct xfs_dir3_leaf_hdr);
   1663
   1664	return hdrsize + entries * sizeof(xfs_dir2_leaf_entry_t)
   1665	               + counts * sizeof(xfs_dir2_data_off_t)
   1666		       + sizeof(xfs_dir2_leaf_tail_t);
   1667}
   1668
   1669/*
   1670 * Convert node form directory to leaf form directory.
   1671 * The root of the node form dir needs to already be a LEAFN block.
   1672 * Just return if we can't do anything.
   1673 */
   1674int						/* error */
   1675xfs_dir2_node_to_leaf(
   1676	xfs_da_state_t		*state)		/* directory operation state */
   1677{
   1678	xfs_da_args_t		*args;		/* operation arguments */
   1679	xfs_inode_t		*dp;		/* incore directory inode */
   1680	int			error;		/* error return code */
   1681	struct xfs_buf		*fbp;		/* buffer for freespace block */
   1682	xfs_fileoff_t		fo;		/* freespace file offset */
   1683	struct xfs_buf		*lbp;		/* buffer for leaf block */
   1684	xfs_dir2_leaf_tail_t	*ltp;		/* tail of leaf structure */
   1685	xfs_dir2_leaf_t		*leaf;		/* leaf structure */
   1686	xfs_mount_t		*mp;		/* filesystem mount point */
   1687	int			rval;		/* successful free trim? */
   1688	xfs_trans_t		*tp;		/* transaction pointer */
   1689	struct xfs_dir3_icleaf_hdr leafhdr;
   1690	struct xfs_dir3_icfree_hdr freehdr;
   1691
   1692	/*
   1693	 * There's more than a leaf level in the btree, so there must
   1694	 * be multiple leafn blocks.  Give up.
   1695	 */
   1696	if (state->path.active > 1)
   1697		return 0;
   1698	args = state->args;
   1699
   1700	trace_xfs_dir2_node_to_leaf(args);
   1701
   1702	mp = state->mp;
   1703	dp = args->dp;
   1704	tp = args->trans;
   1705	/*
   1706	 * Get the last offset in the file.
   1707	 */
   1708	if ((error = xfs_bmap_last_offset(dp, &fo, XFS_DATA_FORK))) {
   1709		return error;
   1710	}
   1711	fo -= args->geo->fsbcount;
   1712	/*
   1713	 * If there are freespace blocks other than the first one,
   1714	 * take this opportunity to remove trailing empty freespace blocks
   1715	 * that may have been left behind during no-space-reservation
   1716	 * operations.
   1717	 */
   1718	while (fo > args->geo->freeblk) {
   1719		if ((error = xfs_dir2_node_trim_free(args, fo, &rval))) {
   1720			return error;
   1721		}
   1722		if (rval)
   1723			fo -= args->geo->fsbcount;
   1724		else
   1725			return 0;
   1726	}
   1727	/*
   1728	 * Now find the block just before the freespace block.
   1729	 */
   1730	if ((error = xfs_bmap_last_before(tp, dp, &fo, XFS_DATA_FORK))) {
   1731		return error;
   1732	}
   1733	/*
   1734	 * If it's not the single leaf block, give up.
   1735	 */
   1736	if (XFS_FSB_TO_B(mp, fo) > XFS_DIR2_LEAF_OFFSET + args->geo->blksize)
   1737		return 0;
   1738	lbp = state->path.blk[0].bp;
   1739	leaf = lbp->b_addr;
   1740	xfs_dir2_leaf_hdr_from_disk(mp, &leafhdr, leaf);
   1741
   1742	ASSERT(leafhdr.magic == XFS_DIR2_LEAFN_MAGIC ||
   1743	       leafhdr.magic == XFS_DIR3_LEAFN_MAGIC);
   1744
   1745	/*
   1746	 * Read the freespace block.
   1747	 */
   1748	error = xfs_dir2_free_read(tp, dp,  args->geo->freeblk, &fbp);
   1749	if (error)
   1750		return error;
   1751	xfs_dir2_free_hdr_from_disk(mp, &freehdr, fbp->b_addr);
   1752
   1753	ASSERT(!freehdr.firstdb);
   1754
   1755	/*
   1756	 * Now see if the leafn and free data will fit in a leaf1.
   1757	 * If not, release the buffer and give up.
   1758	 */
   1759	if (xfs_dir3_leaf_size(&leafhdr, freehdr.nvalid) > args->geo->blksize) {
   1760		xfs_trans_brelse(tp, fbp);
   1761		return 0;
   1762	}
   1763
   1764	/*
   1765	 * If the leaf has any stale entries in it, compress them out.
   1766	 */
   1767	if (leafhdr.stale)
   1768		xfs_dir3_leaf_compact(args, &leafhdr, lbp);
   1769
   1770	lbp->b_ops = &xfs_dir3_leaf1_buf_ops;
   1771	xfs_trans_buf_set_type(tp, lbp, XFS_BLFT_DIR_LEAF1_BUF);
   1772	leafhdr.magic = (leafhdr.magic == XFS_DIR2_LEAFN_MAGIC)
   1773					? XFS_DIR2_LEAF1_MAGIC
   1774					: XFS_DIR3_LEAF1_MAGIC;
   1775
   1776	/*
   1777	 * Set up the leaf tail from the freespace block.
   1778	 */
   1779	ltp = xfs_dir2_leaf_tail_p(args->geo, leaf);
   1780	ltp->bestcount = cpu_to_be32(freehdr.nvalid);
   1781
   1782	/*
   1783	 * Set up the leaf bests table.
   1784	 */
   1785	memcpy(xfs_dir2_leaf_bests_p(ltp), freehdr.bests,
   1786		freehdr.nvalid * sizeof(xfs_dir2_data_off_t));
   1787
   1788	xfs_dir2_leaf_hdr_to_disk(mp, leaf, &leafhdr);
   1789	xfs_dir3_leaf_log_header(args, lbp);
   1790	xfs_dir3_leaf_log_bests(args, lbp, 0, be32_to_cpu(ltp->bestcount) - 1);
   1791	xfs_dir3_leaf_log_tail(args, lbp);
   1792	xfs_dir3_leaf_check(dp, lbp);
   1793
   1794	/*
   1795	 * Get rid of the freespace block.
   1796	 */
   1797	error = xfs_dir2_shrink_inode(args,
   1798			xfs_dir2_byte_to_db(args->geo, XFS_DIR2_FREE_OFFSET),
   1799			fbp);
   1800	if (error) {
   1801		/*
   1802		 * This can't fail here because it can only happen when
   1803		 * punching out the middle of an extent, and this is an
   1804		 * isolated block.
   1805		 */
   1806		ASSERT(error != -ENOSPC);
   1807		return error;
   1808	}
   1809	fbp = NULL;
   1810	/*
   1811	 * Now see if we can convert the single-leaf directory
   1812	 * down to a block form directory.
   1813	 * This routine always kills the dabuf for the leaf, so
   1814	 * eliminate it from the path.
   1815	 */
   1816	error = xfs_dir2_leaf_to_block(args, lbp, NULL);
   1817	state->path.blk[0].bp = NULL;
   1818	return error;
   1819}