cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

jfs_dtree.c (95183B)


      1// SPDX-License-Identifier: GPL-2.0-or-later
      2/*
      3 *   Copyright (C) International Business Machines Corp., 2000-2004
      4 */
      5
      6/*
      7 *	jfs_dtree.c: directory B+-tree manager
      8 *
      9 * B+-tree with variable length key directory:
     10 *
     11 * each directory page is structured as an array of 32-byte
     12 * directory entry slots initialized as a freelist
     13 * to avoid search/compaction of free space at insertion.
     14 * when an entry is inserted, a number of slots are allocated
     15 * from the freelist as required to store variable length data
     16 * of the entry; when the entry is deleted, slots of the entry
     17 * are returned to freelist.
     18 *
     19 * leaf entry stores full name as key and file serial number
     20 * (aka inode number) as data.
     21 * internal/router entry stores sufffix compressed name
     22 * as key and simple extent descriptor as data.
     23 *
     24 * each directory page maintains a sorted entry index table
     25 * which stores the start slot index of sorted entries
     26 * to allow binary search on the table.
     27 *
     28 * directory starts as a root/leaf page in on-disk inode
     29 * inline data area.
     30 * when it becomes full, it starts a leaf of a external extent
     31 * of length of 1 block. each time the first leaf becomes full,
     32 * it is extended rather than split (its size is doubled),
     33 * until its length becoms 4 KBytes, from then the extent is split
     34 * with new 4 Kbyte extent when it becomes full
     35 * to reduce external fragmentation of small directories.
     36 *
     37 * blah, blah, blah, for linear scan of directory in pieces by
     38 * readdir().
     39 *
     40 *
     41 *	case-insensitive directory file system
     42 *
     43 * names are stored in case-sensitive way in leaf entry.
     44 * but stored, searched and compared in case-insensitive (uppercase) order
     45 * (i.e., both search key and entry key are folded for search/compare):
     46 * (note that case-sensitive order is BROKEN in storage, e.g.,
     47 *  sensitive: Ad, aB, aC, aD -> insensitive: aB, aC, aD, Ad
     48 *
     49 *  entries which folds to the same key makes up a equivalent class
     50 *  whose members are stored as contiguous cluster (may cross page boundary)
     51 *  but whose order is arbitrary and acts as duplicate, e.g.,
     52 *  abc, Abc, aBc, abC)
     53 *
     54 * once match is found at leaf, requires scan forward/backward
     55 * either for, in case-insensitive search, duplicate
     56 * or for, in case-sensitive search, for exact match
     57 *
     58 * router entry must be created/stored in case-insensitive way
     59 * in internal entry:
     60 * (right most key of left page and left most key of right page
     61 * are folded, and its suffix compression is propagated as router
     62 * key in parent)
     63 * (e.g., if split occurs <abc> and <aBd>, <ABD> trather than <aB>
     64 * should be made the router key for the split)
     65 *
     66 * case-insensitive search:
     67 *
     68 *	fold search key;
     69 *
     70 *	case-insensitive search of B-tree:
     71 *	for internal entry, router key is already folded;
     72 *	for leaf entry, fold the entry key before comparison.
     73 *
     74 *	if (leaf entry case-insensitive match found)
     75 *		if (next entry satisfies case-insensitive match)
     76 *			return EDUPLICATE;
     77 *		if (prev entry satisfies case-insensitive match)
     78 *			return EDUPLICATE;
     79 *		return match;
     80 *	else
     81 *		return no match;
     82 *
     83 *	serialization:
     84 * target directory inode lock is being held on entry/exit
     85 * of all main directory service routines.
     86 *
     87 *	log based recovery:
     88 */
     89
     90#include <linux/fs.h>
     91#include <linux/quotaops.h>
     92#include <linux/slab.h>
     93#include "jfs_incore.h"
     94#include "jfs_superblock.h"
     95#include "jfs_filsys.h"
     96#include "jfs_metapage.h"
     97#include "jfs_dmap.h"
     98#include "jfs_unicode.h"
     99#include "jfs_debug.h"
    100
    101/* dtree split parameter */
    102struct dtsplit {
    103	struct metapage *mp;
    104	s16 index;
    105	s16 nslot;
    106	struct component_name *key;
    107	ddata_t *data;
    108	struct pxdlist *pxdlist;
    109};
    110
    111#define DT_PAGE(IP, MP) BT_PAGE(IP, MP, dtpage_t, i_dtroot)
    112
    113/* get page buffer for specified block address */
    114#define DT_GETPAGE(IP, BN, MP, SIZE, P, RC)				\
    115do {									\
    116	BT_GETPAGE(IP, BN, MP, dtpage_t, SIZE, P, RC, i_dtroot);	\
    117	if (!(RC)) {							\
    118		if (((P)->header.nextindex >				\
    119		     (((BN) == 0) ? DTROOTMAXSLOT : (P)->header.maxslot)) || \
    120		    ((BN) && ((P)->header.maxslot > DTPAGEMAXSLOT))) {	\
    121			BT_PUTPAGE(MP);					\
    122			jfs_error((IP)->i_sb,				\
    123				  "DT_GETPAGE: dtree page corrupt\n");	\
    124			MP = NULL;					\
    125			RC = -EIO;					\
    126		}							\
    127	}								\
    128} while (0)
    129
    130/* for consistency */
    131#define DT_PUTPAGE(MP) BT_PUTPAGE(MP)
    132
    133#define DT_GETSEARCH(IP, LEAF, BN, MP, P, INDEX) \
    134	BT_GETSEARCH(IP, LEAF, BN, MP, dtpage_t, P, INDEX, i_dtroot)
    135
    136/*
    137 * forward references
    138 */
    139static int dtSplitUp(tid_t tid, struct inode *ip,
    140		     struct dtsplit * split, struct btstack * btstack);
    141
    142static int dtSplitPage(tid_t tid, struct inode *ip, struct dtsplit * split,
    143		       struct metapage ** rmpp, dtpage_t ** rpp, pxd_t * rxdp);
    144
    145static int dtExtendPage(tid_t tid, struct inode *ip,
    146			struct dtsplit * split, struct btstack * btstack);
    147
    148static int dtSplitRoot(tid_t tid, struct inode *ip,
    149		       struct dtsplit * split, struct metapage ** rmpp);
    150
    151static int dtDeleteUp(tid_t tid, struct inode *ip, struct metapage * fmp,
    152		      dtpage_t * fp, struct btstack * btstack);
    153
    154static int dtRelink(tid_t tid, struct inode *ip, dtpage_t * p);
    155
    156static int dtReadFirst(struct inode *ip, struct btstack * btstack);
    157
    158static int dtReadNext(struct inode *ip,
    159		      loff_t * offset, struct btstack * btstack);
    160
    161static int dtCompare(struct component_name * key, dtpage_t * p, int si);
    162
    163static int ciCompare(struct component_name * key, dtpage_t * p, int si,
    164		     int flag);
    165
    166static void dtGetKey(dtpage_t * p, int i, struct component_name * key,
    167		     int flag);
    168
    169static int ciGetLeafPrefixKey(dtpage_t * lp, int li, dtpage_t * rp,
    170			      int ri, struct component_name * key, int flag);
    171
    172static void dtInsertEntry(dtpage_t * p, int index, struct component_name * key,
    173			  ddata_t * data, struct dt_lock **);
    174
    175static void dtMoveEntry(dtpage_t * sp, int si, dtpage_t * dp,
    176			struct dt_lock ** sdtlock, struct dt_lock ** ddtlock,
    177			int do_index);
    178
    179static void dtDeleteEntry(dtpage_t * p, int fi, struct dt_lock ** dtlock);
    180
    181static void dtTruncateEntry(dtpage_t * p, int ti, struct dt_lock ** dtlock);
    182
    183static void dtLinelockFreelist(dtpage_t * p, int m, struct dt_lock ** dtlock);
    184
    185#define ciToUpper(c)	UniStrupr((c)->name)
    186
    187/*
    188 *	read_index_page()
    189 *
    190 *	Reads a page of a directory's index table.
    191 *	Having metadata mapped into the directory inode's address space
    192 *	presents a multitude of problems.  We avoid this by mapping to
    193 *	the absolute address space outside of the *_metapage routines
    194 */
    195static struct metapage *read_index_page(struct inode *inode, s64 blkno)
    196{
    197	int rc;
    198	s64 xaddr;
    199	int xflag;
    200	s32 xlen;
    201
    202	rc = xtLookup(inode, blkno, 1, &xflag, &xaddr, &xlen, 1);
    203	if (rc || (xaddr == 0))
    204		return NULL;
    205
    206	return read_metapage(inode, xaddr, PSIZE, 1);
    207}
    208
    209/*
    210 *	get_index_page()
    211 *
    212 *	Same as get_index_page(), but get's a new page without reading
    213 */
    214static struct metapage *get_index_page(struct inode *inode, s64 blkno)
    215{
    216	int rc;
    217	s64 xaddr;
    218	int xflag;
    219	s32 xlen;
    220
    221	rc = xtLookup(inode, blkno, 1, &xflag, &xaddr, &xlen, 1);
    222	if (rc || (xaddr == 0))
    223		return NULL;
    224
    225	return get_metapage(inode, xaddr, PSIZE, 1);
    226}
    227
    228/*
    229 *	find_index()
    230 *
    231 *	Returns dtree page containing directory table entry for specified
    232 *	index and pointer to its entry.
    233 *
    234 *	mp must be released by caller.
    235 */
    236static struct dir_table_slot *find_index(struct inode *ip, u32 index,
    237					 struct metapage ** mp, s64 *lblock)
    238{
    239	struct jfs_inode_info *jfs_ip = JFS_IP(ip);
    240	s64 blkno;
    241	s64 offset;
    242	int page_offset;
    243	struct dir_table_slot *slot;
    244	static int maxWarnings = 10;
    245
    246	if (index < 2) {
    247		if (maxWarnings) {
    248			jfs_warn("find_entry called with index = %d", index);
    249			maxWarnings--;
    250		}
    251		return NULL;
    252	}
    253
    254	if (index >= jfs_ip->next_index) {
    255		jfs_warn("find_entry called with index >= next_index");
    256		return NULL;
    257	}
    258
    259	if (jfs_dirtable_inline(ip)) {
    260		/*
    261		 * Inline directory table
    262		 */
    263		*mp = NULL;
    264		slot = &jfs_ip->i_dirtable[index - 2];
    265	} else {
    266		offset = (index - 2) * sizeof(struct dir_table_slot);
    267		page_offset = offset & (PSIZE - 1);
    268		blkno = ((offset + 1) >> L2PSIZE) <<
    269		    JFS_SBI(ip->i_sb)->l2nbperpage;
    270
    271		if (*mp && (*lblock != blkno)) {
    272			release_metapage(*mp);
    273			*mp = NULL;
    274		}
    275		if (!(*mp)) {
    276			*lblock = blkno;
    277			*mp = read_index_page(ip, blkno);
    278		}
    279		if (!(*mp)) {
    280			jfs_err("free_index: error reading directory table");
    281			return NULL;
    282		}
    283
    284		slot =
    285		    (struct dir_table_slot *) ((char *) (*mp)->data +
    286					       page_offset);
    287	}
    288	return slot;
    289}
    290
    291static inline void lock_index(tid_t tid, struct inode *ip, struct metapage * mp,
    292			      u32 index)
    293{
    294	struct tlock *tlck;
    295	struct linelock *llck;
    296	struct lv *lv;
    297
    298	tlck = txLock(tid, ip, mp, tlckDATA);
    299	llck = (struct linelock *) tlck->lock;
    300
    301	if (llck->index >= llck->maxcnt)
    302		llck = txLinelock(llck);
    303	lv = &llck->lv[llck->index];
    304
    305	/*
    306	 *	Linelock slot size is twice the size of directory table
    307	 *	slot size.  512 entries per page.
    308	 */
    309	lv->offset = ((index - 2) & 511) >> 1;
    310	lv->length = 1;
    311	llck->index++;
    312}
    313
    314/*
    315 *	add_index()
    316 *
    317 *	Adds an entry to the directory index table.  This is used to provide
    318 *	each directory entry with a persistent index in which to resume
    319 *	directory traversals
    320 */
    321static u32 add_index(tid_t tid, struct inode *ip, s64 bn, int slot)
    322{
    323	struct super_block *sb = ip->i_sb;
    324	struct jfs_sb_info *sbi = JFS_SBI(sb);
    325	struct jfs_inode_info *jfs_ip = JFS_IP(ip);
    326	u64 blkno;
    327	struct dir_table_slot *dirtab_slot;
    328	u32 index;
    329	struct linelock *llck;
    330	struct lv *lv;
    331	struct metapage *mp;
    332	s64 offset;
    333	uint page_offset;
    334	struct tlock *tlck;
    335	s64 xaddr;
    336
    337	ASSERT(DO_INDEX(ip));
    338
    339	if (jfs_ip->next_index < 2) {
    340		jfs_warn("add_index: next_index = %d.  Resetting!",
    341			   jfs_ip->next_index);
    342		jfs_ip->next_index = 2;
    343	}
    344
    345	index = jfs_ip->next_index++;
    346
    347	if (index <= MAX_INLINE_DIRTABLE_ENTRY) {
    348		/*
    349		 * i_size reflects size of index table, or 8 bytes per entry.
    350		 */
    351		ip->i_size = (loff_t) (index - 1) << 3;
    352
    353		/*
    354		 * dir table fits inline within inode
    355		 */
    356		dirtab_slot = &jfs_ip->i_dirtable[index-2];
    357		dirtab_slot->flag = DIR_INDEX_VALID;
    358		dirtab_slot->slot = slot;
    359		DTSaddress(dirtab_slot, bn);
    360
    361		set_cflag(COMMIT_Dirtable, ip);
    362
    363		return index;
    364	}
    365	if (index == (MAX_INLINE_DIRTABLE_ENTRY + 1)) {
    366		struct dir_table_slot temp_table[12];
    367
    368		/*
    369		 * It's time to move the inline table to an external
    370		 * page and begin to build the xtree
    371		 */
    372		if (dquot_alloc_block(ip, sbi->nbperpage))
    373			goto clean_up;
    374		if (dbAlloc(ip, 0, sbi->nbperpage, &xaddr)) {
    375			dquot_free_block(ip, sbi->nbperpage);
    376			goto clean_up;
    377		}
    378
    379		/*
    380		 * Save the table, we're going to overwrite it with the
    381		 * xtree root
    382		 */
    383		memcpy(temp_table, &jfs_ip->i_dirtable, sizeof(temp_table));
    384
    385		/*
    386		 * Initialize empty x-tree
    387		 */
    388		xtInitRoot(tid, ip);
    389
    390		/*
    391		 * Add the first block to the xtree
    392		 */
    393		if (xtInsert(tid, ip, 0, 0, sbi->nbperpage, &xaddr, 0)) {
    394			/* This really shouldn't fail */
    395			jfs_warn("add_index: xtInsert failed!");
    396			memcpy(&jfs_ip->i_dirtable, temp_table,
    397			       sizeof (temp_table));
    398			dbFree(ip, xaddr, sbi->nbperpage);
    399			dquot_free_block(ip, sbi->nbperpage);
    400			goto clean_up;
    401		}
    402		ip->i_size = PSIZE;
    403
    404		mp = get_index_page(ip, 0);
    405		if (!mp) {
    406			jfs_err("add_index: get_metapage failed!");
    407			xtTruncate(tid, ip, 0, COMMIT_PWMAP);
    408			memcpy(&jfs_ip->i_dirtable, temp_table,
    409			       sizeof (temp_table));
    410			goto clean_up;
    411		}
    412		tlck = txLock(tid, ip, mp, tlckDATA);
    413		llck = (struct linelock *) & tlck->lock;
    414		ASSERT(llck->index == 0);
    415		lv = &llck->lv[0];
    416
    417		lv->offset = 0;
    418		lv->length = 6;	/* tlckDATA slot size is 16 bytes */
    419		llck->index++;
    420
    421		memcpy(mp->data, temp_table, sizeof(temp_table));
    422
    423		mark_metapage_dirty(mp);
    424		release_metapage(mp);
    425
    426		/*
    427		 * Logging is now directed by xtree tlocks
    428		 */
    429		clear_cflag(COMMIT_Dirtable, ip);
    430	}
    431
    432	offset = (index - 2) * sizeof(struct dir_table_slot);
    433	page_offset = offset & (PSIZE - 1);
    434	blkno = ((offset + 1) >> L2PSIZE) << sbi->l2nbperpage;
    435	if (page_offset == 0) {
    436		/*
    437		 * This will be the beginning of a new page
    438		 */
    439		xaddr = 0;
    440		if (xtInsert(tid, ip, 0, blkno, sbi->nbperpage, &xaddr, 0)) {
    441			jfs_warn("add_index: xtInsert failed!");
    442			goto clean_up;
    443		}
    444		ip->i_size += PSIZE;
    445
    446		if ((mp = get_index_page(ip, blkno)))
    447			memset(mp->data, 0, PSIZE);	/* Just looks better */
    448		else
    449			xtTruncate(tid, ip, offset, COMMIT_PWMAP);
    450	} else
    451		mp = read_index_page(ip, blkno);
    452
    453	if (!mp) {
    454		jfs_err("add_index: get/read_metapage failed!");
    455		goto clean_up;
    456	}
    457
    458	lock_index(tid, ip, mp, index);
    459
    460	dirtab_slot =
    461	    (struct dir_table_slot *) ((char *) mp->data + page_offset);
    462	dirtab_slot->flag = DIR_INDEX_VALID;
    463	dirtab_slot->slot = slot;
    464	DTSaddress(dirtab_slot, bn);
    465
    466	mark_metapage_dirty(mp);
    467	release_metapage(mp);
    468
    469	return index;
    470
    471      clean_up:
    472
    473	jfs_ip->next_index--;
    474
    475	return 0;
    476}
    477
    478/*
    479 *	free_index()
    480 *
    481 *	Marks an entry to the directory index table as free.
    482 */
    483static void free_index(tid_t tid, struct inode *ip, u32 index, u32 next)
    484{
    485	struct dir_table_slot *dirtab_slot;
    486	s64 lblock;
    487	struct metapage *mp = NULL;
    488
    489	dirtab_slot = find_index(ip, index, &mp, &lblock);
    490
    491	if (!dirtab_slot)
    492		return;
    493
    494	dirtab_slot->flag = DIR_INDEX_FREE;
    495	dirtab_slot->slot = dirtab_slot->addr1 = 0;
    496	dirtab_slot->addr2 = cpu_to_le32(next);
    497
    498	if (mp) {
    499		lock_index(tid, ip, mp, index);
    500		mark_metapage_dirty(mp);
    501		release_metapage(mp);
    502	} else
    503		set_cflag(COMMIT_Dirtable, ip);
    504}
    505
    506/*
    507 *	modify_index()
    508 *
    509 *	Changes an entry in the directory index table
    510 */
    511static void modify_index(tid_t tid, struct inode *ip, u32 index, s64 bn,
    512			 int slot, struct metapage ** mp, s64 *lblock)
    513{
    514	struct dir_table_slot *dirtab_slot;
    515
    516	dirtab_slot = find_index(ip, index, mp, lblock);
    517
    518	if (!dirtab_slot)
    519		return;
    520
    521	DTSaddress(dirtab_slot, bn);
    522	dirtab_slot->slot = slot;
    523
    524	if (*mp) {
    525		lock_index(tid, ip, *mp, index);
    526		mark_metapage_dirty(*mp);
    527	} else
    528		set_cflag(COMMIT_Dirtable, ip);
    529}
    530
    531/*
    532 *	read_index()
    533 *
    534 *	reads a directory table slot
    535 */
    536static int read_index(struct inode *ip, u32 index,
    537		     struct dir_table_slot * dirtab_slot)
    538{
    539	s64 lblock;
    540	struct metapage *mp = NULL;
    541	struct dir_table_slot *slot;
    542
    543	slot = find_index(ip, index, &mp, &lblock);
    544	if (!slot) {
    545		return -EIO;
    546	}
    547
    548	memcpy(dirtab_slot, slot, sizeof(struct dir_table_slot));
    549
    550	if (mp)
    551		release_metapage(mp);
    552
    553	return 0;
    554}
    555
    556/*
    557 *	dtSearch()
    558 *
    559 * function:
    560 *	Search for the entry with specified key
    561 *
    562 * parameter:
    563 *
    564 * return: 0 - search result on stack, leaf page pinned;
    565 *	   errno - I/O error
    566 */
    567int dtSearch(struct inode *ip, struct component_name * key, ino_t * data,
    568	     struct btstack * btstack, int flag)
    569{
    570	int rc = 0;
    571	int cmp = 1;		/* init for empty page */
    572	s64 bn;
    573	struct metapage *mp;
    574	dtpage_t *p;
    575	s8 *stbl;
    576	int base, index, lim;
    577	struct btframe *btsp;
    578	pxd_t *pxd;
    579	int psize = 288;	/* initial in-line directory */
    580	ino_t inumber;
    581	struct component_name ciKey;
    582	struct super_block *sb = ip->i_sb;
    583
    584	ciKey.name = kmalloc_array(JFS_NAME_MAX + 1, sizeof(wchar_t),
    585				   GFP_NOFS);
    586	if (!ciKey.name) {
    587		rc = -ENOMEM;
    588		goto dtSearch_Exit2;
    589	}
    590
    591
    592	/* uppercase search key for c-i directory */
    593	UniStrcpy(ciKey.name, key->name);
    594	ciKey.namlen = key->namlen;
    595
    596	/* only uppercase if case-insensitive support is on */
    597	if ((JFS_SBI(sb)->mntflag & JFS_OS2) == JFS_OS2) {
    598		ciToUpper(&ciKey);
    599	}
    600	BT_CLR(btstack);	/* reset stack */
    601
    602	/* init level count for max pages to split */
    603	btstack->nsplit = 1;
    604
    605	/*
    606	 *	search down tree from root:
    607	 *
    608	 * between two consecutive entries of <Ki, Pi> and <Kj, Pj> of
    609	 * internal page, child page Pi contains entry with k, Ki <= K < Kj.
    610	 *
    611	 * if entry with search key K is not found
    612	 * internal page search find the entry with largest key Ki
    613	 * less than K which point to the child page to search;
    614	 * leaf page search find the entry with smallest key Kj
    615	 * greater than K so that the returned index is the position of
    616	 * the entry to be shifted right for insertion of new entry.
    617	 * for empty tree, search key is greater than any key of the tree.
    618	 *
    619	 * by convention, root bn = 0.
    620	 */
    621	for (bn = 0;;) {
    622		/* get/pin the page to search */
    623		DT_GETPAGE(ip, bn, mp, psize, p, rc);
    624		if (rc)
    625			goto dtSearch_Exit1;
    626
    627		/* get sorted entry table of the page */
    628		stbl = DT_GETSTBL(p);
    629
    630		/*
    631		 * binary search with search key K on the current page.
    632		 */
    633		for (base = 0, lim = p->header.nextindex; lim; lim >>= 1) {
    634			index = base + (lim >> 1);
    635
    636			if (p->header.flag & BT_LEAF) {
    637				/* uppercase leaf name to compare */
    638				cmp =
    639				    ciCompare(&ciKey, p, stbl[index],
    640					      JFS_SBI(sb)->mntflag);
    641			} else {
    642				/* router key is in uppercase */
    643
    644				cmp = dtCompare(&ciKey, p, stbl[index]);
    645
    646
    647			}
    648			if (cmp == 0) {
    649				/*
    650				 *	search hit
    651				 */
    652				/* search hit - leaf page:
    653				 * return the entry found
    654				 */
    655				if (p->header.flag & BT_LEAF) {
    656					inumber = le32_to_cpu(
    657			((struct ldtentry *) & p->slot[stbl[index]])->inumber);
    658
    659					/*
    660					 * search for JFS_LOOKUP
    661					 */
    662					if (flag == JFS_LOOKUP) {
    663						*data = inumber;
    664						rc = 0;
    665						goto out;
    666					}
    667
    668					/*
    669					 * search for JFS_CREATE
    670					 */
    671					if (flag == JFS_CREATE) {
    672						*data = inumber;
    673						rc = -EEXIST;
    674						goto out;
    675					}
    676
    677					/*
    678					 * search for JFS_REMOVE or JFS_RENAME
    679					 */
    680					if ((flag == JFS_REMOVE ||
    681					     flag == JFS_RENAME) &&
    682					    *data != inumber) {
    683						rc = -ESTALE;
    684						goto out;
    685					}
    686
    687					/*
    688					 * JFS_REMOVE|JFS_FINDDIR|JFS_RENAME
    689					 */
    690					/* save search result */
    691					*data = inumber;
    692					btsp = btstack->top;
    693					btsp->bn = bn;
    694					btsp->index = index;
    695					btsp->mp = mp;
    696
    697					rc = 0;
    698					goto dtSearch_Exit1;
    699				}
    700
    701				/* search hit - internal page:
    702				 * descend/search its child page
    703				 */
    704				goto getChild;
    705			}
    706
    707			if (cmp > 0) {
    708				base = index + 1;
    709				--lim;
    710			}
    711		}
    712
    713		/*
    714		 *	search miss
    715		 *
    716		 * base is the smallest index with key (Kj) greater than
    717		 * search key (K) and may be zero or (maxindex + 1) index.
    718		 */
    719		/*
    720		 * search miss - leaf page
    721		 *
    722		 * return location of entry (base) where new entry with
    723		 * search key K is to be inserted.
    724		 */
    725		if (p->header.flag & BT_LEAF) {
    726			/*
    727			 * search for JFS_LOOKUP, JFS_REMOVE, or JFS_RENAME
    728			 */
    729			if (flag == JFS_LOOKUP || flag == JFS_REMOVE ||
    730			    flag == JFS_RENAME) {
    731				rc = -ENOENT;
    732				goto out;
    733			}
    734
    735			/*
    736			 * search for JFS_CREATE|JFS_FINDDIR:
    737			 *
    738			 * save search result
    739			 */
    740			*data = 0;
    741			btsp = btstack->top;
    742			btsp->bn = bn;
    743			btsp->index = base;
    744			btsp->mp = mp;
    745
    746			rc = 0;
    747			goto dtSearch_Exit1;
    748		}
    749
    750		/*
    751		 * search miss - internal page
    752		 *
    753		 * if base is non-zero, decrement base by one to get the parent
    754		 * entry of the child page to search.
    755		 */
    756		index = base ? base - 1 : base;
    757
    758		/*
    759		 * go down to child page
    760		 */
    761	      getChild:
    762		/* update max. number of pages to split */
    763		if (BT_STACK_FULL(btstack)) {
    764			/* Something's corrupted, mark filesystem dirty so
    765			 * chkdsk will fix it.
    766			 */
    767			jfs_error(sb, "stack overrun!\n");
    768			BT_STACK_DUMP(btstack);
    769			rc = -EIO;
    770			goto out;
    771		}
    772		btstack->nsplit++;
    773
    774		/* push (bn, index) of the parent page/entry */
    775		BT_PUSH(btstack, bn, index);
    776
    777		/* get the child page block number */
    778		pxd = (pxd_t *) & p->slot[stbl[index]];
    779		bn = addressPXD(pxd);
    780		psize = lengthPXD(pxd) << JFS_SBI(ip->i_sb)->l2bsize;
    781
    782		/* unpin the parent page */
    783		DT_PUTPAGE(mp);
    784	}
    785
    786      out:
    787	DT_PUTPAGE(mp);
    788
    789      dtSearch_Exit1:
    790
    791	kfree(ciKey.name);
    792
    793      dtSearch_Exit2:
    794
    795	return rc;
    796}
    797
    798
    799/*
    800 *	dtInsert()
    801 *
    802 * function: insert an entry to directory tree
    803 *
    804 * parameter:
    805 *
    806 * return: 0 - success;
    807 *	   errno - failure;
    808 */
    809int dtInsert(tid_t tid, struct inode *ip,
    810	 struct component_name * name, ino_t * fsn, struct btstack * btstack)
    811{
    812	int rc = 0;
    813	struct metapage *mp;	/* meta-page buffer */
    814	dtpage_t *p;		/* base B+-tree index page */
    815	s64 bn;
    816	int index;
    817	struct dtsplit split;	/* split information */
    818	ddata_t data;
    819	struct dt_lock *dtlck;
    820	int n;
    821	struct tlock *tlck;
    822	struct lv *lv;
    823
    824	/*
    825	 *	retrieve search result
    826	 *
    827	 * dtSearch() returns (leaf page pinned, index at which to insert).
    828	 * n.b. dtSearch() may return index of (maxindex + 1) of
    829	 * the full page.
    830	 */
    831	DT_GETSEARCH(ip, btstack->top, bn, mp, p, index);
    832
    833	/*
    834	 *	insert entry for new key
    835	 */
    836	if (DO_INDEX(ip)) {
    837		if (JFS_IP(ip)->next_index == DIREND) {
    838			DT_PUTPAGE(mp);
    839			return -EMLINK;
    840		}
    841		n = NDTLEAF(name->namlen);
    842		data.leaf.tid = tid;
    843		data.leaf.ip = ip;
    844	} else {
    845		n = NDTLEAF_LEGACY(name->namlen);
    846		data.leaf.ip = NULL;	/* signifies legacy directory format */
    847	}
    848	data.leaf.ino = *fsn;
    849
    850	/*
    851	 *	leaf page does not have enough room for new entry:
    852	 *
    853	 *	extend/split the leaf page;
    854	 *
    855	 * dtSplitUp() will insert the entry and unpin the leaf page.
    856	 */
    857	if (n > p->header.freecnt) {
    858		split.mp = mp;
    859		split.index = index;
    860		split.nslot = n;
    861		split.key = name;
    862		split.data = &data;
    863		rc = dtSplitUp(tid, ip, &split, btstack);
    864		return rc;
    865	}
    866
    867	/*
    868	 *	leaf page does have enough room for new entry:
    869	 *
    870	 *	insert the new data entry into the leaf page;
    871	 */
    872	BT_MARK_DIRTY(mp, ip);
    873	/*
    874	 * acquire a transaction lock on the leaf page
    875	 */
    876	tlck = txLock(tid, ip, mp, tlckDTREE | tlckENTRY);
    877	dtlck = (struct dt_lock *) & tlck->lock;
    878	ASSERT(dtlck->index == 0);
    879	lv = & dtlck->lv[0];
    880
    881	/* linelock header */
    882	lv->offset = 0;
    883	lv->length = 1;
    884	dtlck->index++;
    885
    886	dtInsertEntry(p, index, name, &data, &dtlck);
    887
    888	/* linelock stbl of non-root leaf page */
    889	if (!(p->header.flag & BT_ROOT)) {
    890		if (dtlck->index >= dtlck->maxcnt)
    891			dtlck = (struct dt_lock *) txLinelock(dtlck);
    892		lv = & dtlck->lv[dtlck->index];
    893		n = index >> L2DTSLOTSIZE;
    894		lv->offset = p->header.stblindex + n;
    895		lv->length =
    896		    ((p->header.nextindex - 1) >> L2DTSLOTSIZE) - n + 1;
    897		dtlck->index++;
    898	}
    899
    900	/* unpin the leaf page */
    901	DT_PUTPAGE(mp);
    902
    903	return 0;
    904}
    905
    906
    907/*
    908 *	dtSplitUp()
    909 *
    910 * function: propagate insertion bottom up;
    911 *
    912 * parameter:
    913 *
    914 * return: 0 - success;
    915 *	   errno - failure;
    916 *	leaf page unpinned;
    917 */
    918static int dtSplitUp(tid_t tid,
    919	  struct inode *ip, struct dtsplit * split, struct btstack * btstack)
    920{
    921	struct jfs_sb_info *sbi = JFS_SBI(ip->i_sb);
    922	int rc = 0;
    923	struct metapage *smp;
    924	dtpage_t *sp;		/* split page */
    925	struct metapage *rmp;
    926	dtpage_t *rp;		/* new right page split from sp */
    927	pxd_t rpxd;		/* new right page extent descriptor */
    928	struct metapage *lmp;
    929	dtpage_t *lp;		/* left child page */
    930	int skip;		/* index of entry of insertion */
    931	struct btframe *parent;	/* parent page entry on traverse stack */
    932	s64 xaddr, nxaddr;
    933	int xlen, xsize;
    934	struct pxdlist pxdlist;
    935	pxd_t *pxd;
    936	struct component_name key = { 0, NULL };
    937	ddata_t *data = split->data;
    938	int n;
    939	struct dt_lock *dtlck;
    940	struct tlock *tlck;
    941	struct lv *lv;
    942	int quota_allocation = 0;
    943
    944	/* get split page */
    945	smp = split->mp;
    946	sp = DT_PAGE(ip, smp);
    947
    948	key.name = kmalloc_array(JFS_NAME_MAX + 2, sizeof(wchar_t), GFP_NOFS);
    949	if (!key.name) {
    950		DT_PUTPAGE(smp);
    951		rc = -ENOMEM;
    952		goto dtSplitUp_Exit;
    953	}
    954
    955	/*
    956	 *	split leaf page
    957	 *
    958	 * The split routines insert the new entry, and
    959	 * acquire txLock as appropriate.
    960	 */
    961	/*
    962	 *	split root leaf page:
    963	 */
    964	if (sp->header.flag & BT_ROOT) {
    965		/*
    966		 * allocate a single extent child page
    967		 */
    968		xlen = 1;
    969		n = sbi->bsize >> L2DTSLOTSIZE;
    970		n -= (n + 31) >> L2DTSLOTSIZE;	/* stbl size */
    971		n -= DTROOTMAXSLOT - sp->header.freecnt; /* header + entries */
    972		if (n <= split->nslot)
    973			xlen++;
    974		if ((rc = dbAlloc(ip, 0, (s64) xlen, &xaddr))) {
    975			DT_PUTPAGE(smp);
    976			goto freeKeyName;
    977		}
    978
    979		pxdlist.maxnpxd = 1;
    980		pxdlist.npxd = 0;
    981		pxd = &pxdlist.pxd[0];
    982		PXDaddress(pxd, xaddr);
    983		PXDlength(pxd, xlen);
    984		split->pxdlist = &pxdlist;
    985		rc = dtSplitRoot(tid, ip, split, &rmp);
    986
    987		if (rc)
    988			dbFree(ip, xaddr, xlen);
    989		else
    990			DT_PUTPAGE(rmp);
    991
    992		DT_PUTPAGE(smp);
    993
    994		if (!DO_INDEX(ip))
    995			ip->i_size = xlen << sbi->l2bsize;
    996
    997		goto freeKeyName;
    998	}
    999
   1000	/*
   1001	 *	extend first leaf page
   1002	 *
   1003	 * extend the 1st extent if less than buffer page size
   1004	 * (dtExtendPage() reurns leaf page unpinned)
   1005	 */
   1006	pxd = &sp->header.self;
   1007	xlen = lengthPXD(pxd);
   1008	xsize = xlen << sbi->l2bsize;
   1009	if (xsize < PSIZE) {
   1010		xaddr = addressPXD(pxd);
   1011		n = xsize >> L2DTSLOTSIZE;
   1012		n -= (n + 31) >> L2DTSLOTSIZE;	/* stbl size */
   1013		if ((n + sp->header.freecnt) <= split->nslot)
   1014			n = xlen + (xlen << 1);
   1015		else
   1016			n = xlen;
   1017
   1018		/* Allocate blocks to quota. */
   1019		rc = dquot_alloc_block(ip, n);
   1020		if (rc)
   1021			goto extendOut;
   1022		quota_allocation += n;
   1023
   1024		if ((rc = dbReAlloc(sbi->ipbmap, xaddr, (s64) xlen,
   1025				    (s64) n, &nxaddr)))
   1026			goto extendOut;
   1027
   1028		pxdlist.maxnpxd = 1;
   1029		pxdlist.npxd = 0;
   1030		pxd = &pxdlist.pxd[0];
   1031		PXDaddress(pxd, nxaddr);
   1032		PXDlength(pxd, xlen + n);
   1033		split->pxdlist = &pxdlist;
   1034		if ((rc = dtExtendPage(tid, ip, split, btstack))) {
   1035			nxaddr = addressPXD(pxd);
   1036			if (xaddr != nxaddr) {
   1037				/* free relocated extent */
   1038				xlen = lengthPXD(pxd);
   1039				dbFree(ip, nxaddr, (s64) xlen);
   1040			} else {
   1041				/* free extended delta */
   1042				xlen = lengthPXD(pxd) - n;
   1043				xaddr = addressPXD(pxd) + xlen;
   1044				dbFree(ip, xaddr, (s64) n);
   1045			}
   1046		} else if (!DO_INDEX(ip))
   1047			ip->i_size = lengthPXD(pxd) << sbi->l2bsize;
   1048
   1049
   1050	      extendOut:
   1051		DT_PUTPAGE(smp);
   1052		goto freeKeyName;
   1053	}
   1054
   1055	/*
   1056	 *	split leaf page <sp> into <sp> and a new right page <rp>.
   1057	 *
   1058	 * return <rp> pinned and its extent descriptor <rpxd>
   1059	 */
   1060	/*
   1061	 * allocate new directory page extent and
   1062	 * new index page(s) to cover page split(s)
   1063	 *
   1064	 * allocation hint: ?
   1065	 */
   1066	n = btstack->nsplit;
   1067	pxdlist.maxnpxd = pxdlist.npxd = 0;
   1068	xlen = sbi->nbperpage;
   1069	for (pxd = pxdlist.pxd; n > 0; n--, pxd++) {
   1070		if ((rc = dbAlloc(ip, 0, (s64) xlen, &xaddr)) == 0) {
   1071			PXDaddress(pxd, xaddr);
   1072			PXDlength(pxd, xlen);
   1073			pxdlist.maxnpxd++;
   1074			continue;
   1075		}
   1076
   1077		DT_PUTPAGE(smp);
   1078
   1079		/* undo allocation */
   1080		goto splitOut;
   1081	}
   1082
   1083	split->pxdlist = &pxdlist;
   1084	if ((rc = dtSplitPage(tid, ip, split, &rmp, &rp, &rpxd))) {
   1085		DT_PUTPAGE(smp);
   1086
   1087		/* undo allocation */
   1088		goto splitOut;
   1089	}
   1090
   1091	if (!DO_INDEX(ip))
   1092		ip->i_size += PSIZE;
   1093
   1094	/*
   1095	 * propagate up the router entry for the leaf page just split
   1096	 *
   1097	 * insert a router entry for the new page into the parent page,
   1098	 * propagate the insert/split up the tree by walking back the stack
   1099	 * of (bn of parent page, index of child page entry in parent page)
   1100	 * that were traversed during the search for the page that split.
   1101	 *
   1102	 * the propagation of insert/split up the tree stops if the root
   1103	 * splits or the page inserted into doesn't have to split to hold
   1104	 * the new entry.
   1105	 *
   1106	 * the parent entry for the split page remains the same, and
   1107	 * a new entry is inserted at its right with the first key and
   1108	 * block number of the new right page.
   1109	 *
   1110	 * There are a maximum of 4 pages pinned at any time:
   1111	 * two children, left parent and right parent (when the parent splits).
   1112	 * keep the child pages pinned while working on the parent.
   1113	 * make sure that all pins are released at exit.
   1114	 */
   1115	while ((parent = BT_POP(btstack)) != NULL) {
   1116		/* parent page specified by stack frame <parent> */
   1117
   1118		/* keep current child pages (<lp>, <rp>) pinned */
   1119		lmp = smp;
   1120		lp = sp;
   1121
   1122		/*
   1123		 * insert router entry in parent for new right child page <rp>
   1124		 */
   1125		/* get the parent page <sp> */
   1126		DT_GETPAGE(ip, parent->bn, smp, PSIZE, sp, rc);
   1127		if (rc) {
   1128			DT_PUTPAGE(lmp);
   1129			DT_PUTPAGE(rmp);
   1130			goto splitOut;
   1131		}
   1132
   1133		/*
   1134		 * The new key entry goes ONE AFTER the index of parent entry,
   1135		 * because the split was to the right.
   1136		 */
   1137		skip = parent->index + 1;
   1138
   1139		/*
   1140		 * compute the key for the router entry
   1141		 *
   1142		 * key suffix compression:
   1143		 * for internal pages that have leaf pages as children,
   1144		 * retain only what's needed to distinguish between
   1145		 * the new entry and the entry on the page to its left.
   1146		 * If the keys compare equal, retain the entire key.
   1147		 *
   1148		 * note that compression is performed only at computing
   1149		 * router key at the lowest internal level.
   1150		 * further compression of the key between pairs of higher
   1151		 * level internal pages loses too much information and
   1152		 * the search may fail.
   1153		 * (e.g., two adjacent leaf pages of {a, ..., x} {xx, ...,}
   1154		 * results in two adjacent parent entries (a)(xx).
   1155		 * if split occurs between these two entries, and
   1156		 * if compression is applied, the router key of parent entry
   1157		 * of right page (x) will divert search for x into right
   1158		 * subtree and miss x in the left subtree.)
   1159		 *
   1160		 * the entire key must be retained for the next-to-leftmost
   1161		 * internal key at any level of the tree, or search may fail
   1162		 * (e.g., ?)
   1163		 */
   1164		switch (rp->header.flag & BT_TYPE) {
   1165		case BT_LEAF:
   1166			/*
   1167			 * compute the length of prefix for suffix compression
   1168			 * between last entry of left page and first entry
   1169			 * of right page
   1170			 */
   1171			if ((sp->header.flag & BT_ROOT && skip > 1) ||
   1172			    sp->header.prev != 0 || skip > 1) {
   1173				/* compute uppercase router prefix key */
   1174				rc = ciGetLeafPrefixKey(lp,
   1175							lp->header.nextindex-1,
   1176							rp, 0, &key,
   1177							sbi->mntflag);
   1178				if (rc) {
   1179					DT_PUTPAGE(lmp);
   1180					DT_PUTPAGE(rmp);
   1181					DT_PUTPAGE(smp);
   1182					goto splitOut;
   1183				}
   1184			} else {
   1185				/* next to leftmost entry of
   1186				   lowest internal level */
   1187
   1188				/* compute uppercase router key */
   1189				dtGetKey(rp, 0, &key, sbi->mntflag);
   1190				key.name[key.namlen] = 0;
   1191
   1192				if ((sbi->mntflag & JFS_OS2) == JFS_OS2)
   1193					ciToUpper(&key);
   1194			}
   1195
   1196			n = NDTINTERNAL(key.namlen);
   1197			break;
   1198
   1199		case BT_INTERNAL:
   1200			dtGetKey(rp, 0, &key, sbi->mntflag);
   1201			n = NDTINTERNAL(key.namlen);
   1202			break;
   1203
   1204		default:
   1205			jfs_err("dtSplitUp(): UFO!");
   1206			break;
   1207		}
   1208
   1209		/* unpin left child page */
   1210		DT_PUTPAGE(lmp);
   1211
   1212		/*
   1213		 * compute the data for the router entry
   1214		 */
   1215		data->xd = rpxd;	/* child page xd */
   1216
   1217		/*
   1218		 * parent page is full - split the parent page
   1219		 */
   1220		if (n > sp->header.freecnt) {
   1221			/* init for parent page split */
   1222			split->mp = smp;
   1223			split->index = skip;	/* index at insert */
   1224			split->nslot = n;
   1225			split->key = &key;
   1226			/* split->data = data; */
   1227
   1228			/* unpin right child page */
   1229			DT_PUTPAGE(rmp);
   1230
   1231			/* The split routines insert the new entry,
   1232			 * acquire txLock as appropriate.
   1233			 * return <rp> pinned and its block number <rbn>.
   1234			 */
   1235			rc = (sp->header.flag & BT_ROOT) ?
   1236			    dtSplitRoot(tid, ip, split, &rmp) :
   1237			    dtSplitPage(tid, ip, split, &rmp, &rp, &rpxd);
   1238			if (rc) {
   1239				DT_PUTPAGE(smp);
   1240				goto splitOut;
   1241			}
   1242
   1243			/* smp and rmp are pinned */
   1244		}
   1245		/*
   1246		 * parent page is not full - insert router entry in parent page
   1247		 */
   1248		else {
   1249			BT_MARK_DIRTY(smp, ip);
   1250			/*
   1251			 * acquire a transaction lock on the parent page
   1252			 */
   1253			tlck = txLock(tid, ip, smp, tlckDTREE | tlckENTRY);
   1254			dtlck = (struct dt_lock *) & tlck->lock;
   1255			ASSERT(dtlck->index == 0);
   1256			lv = & dtlck->lv[0];
   1257
   1258			/* linelock header */
   1259			lv->offset = 0;
   1260			lv->length = 1;
   1261			dtlck->index++;
   1262
   1263			/* linelock stbl of non-root parent page */
   1264			if (!(sp->header.flag & BT_ROOT)) {
   1265				lv++;
   1266				n = skip >> L2DTSLOTSIZE;
   1267				lv->offset = sp->header.stblindex + n;
   1268				lv->length =
   1269				    ((sp->header.nextindex -
   1270				      1) >> L2DTSLOTSIZE) - n + 1;
   1271				dtlck->index++;
   1272			}
   1273
   1274			dtInsertEntry(sp, skip, &key, data, &dtlck);
   1275
   1276			/* exit propagate up */
   1277			break;
   1278		}
   1279	}
   1280
   1281	/* unpin current split and its right page */
   1282	DT_PUTPAGE(smp);
   1283	DT_PUTPAGE(rmp);
   1284
   1285	/*
   1286	 * free remaining extents allocated for split
   1287	 */
   1288      splitOut:
   1289	n = pxdlist.npxd;
   1290	pxd = &pxdlist.pxd[n];
   1291	for (; n < pxdlist.maxnpxd; n++, pxd++)
   1292		dbFree(ip, addressPXD(pxd), (s64) lengthPXD(pxd));
   1293
   1294      freeKeyName:
   1295	kfree(key.name);
   1296
   1297	/* Rollback quota allocation */
   1298	if (rc && quota_allocation)
   1299		dquot_free_block(ip, quota_allocation);
   1300
   1301      dtSplitUp_Exit:
   1302
   1303	return rc;
   1304}
   1305
   1306
   1307/*
   1308 *	dtSplitPage()
   1309 *
   1310 * function: Split a non-root page of a btree.
   1311 *
   1312 * parameter:
   1313 *
   1314 * return: 0 - success;
   1315 *	   errno - failure;
   1316 *	return split and new page pinned;
   1317 */
   1318static int dtSplitPage(tid_t tid, struct inode *ip, struct dtsplit * split,
   1319	    struct metapage ** rmpp, dtpage_t ** rpp, pxd_t * rpxdp)
   1320{
   1321	int rc = 0;
   1322	struct metapage *smp;
   1323	dtpage_t *sp;
   1324	struct metapage *rmp;
   1325	dtpage_t *rp;		/* new right page allocated */
   1326	s64 rbn;		/* new right page block number */
   1327	struct metapage *mp;
   1328	dtpage_t *p;
   1329	s64 nextbn;
   1330	struct pxdlist *pxdlist;
   1331	pxd_t *pxd;
   1332	int skip, nextindex, half, left, nxt, off, si;
   1333	struct ldtentry *ldtentry;
   1334	struct idtentry *idtentry;
   1335	u8 *stbl;
   1336	struct dtslot *f;
   1337	int fsi, stblsize;
   1338	int n;
   1339	struct dt_lock *sdtlck, *rdtlck;
   1340	struct tlock *tlck;
   1341	struct dt_lock *dtlck;
   1342	struct lv *slv, *rlv, *lv;
   1343
   1344	/* get split page */
   1345	smp = split->mp;
   1346	sp = DT_PAGE(ip, smp);
   1347
   1348	/*
   1349	 * allocate the new right page for the split
   1350	 */
   1351	pxdlist = split->pxdlist;
   1352	pxd = &pxdlist->pxd[pxdlist->npxd];
   1353	pxdlist->npxd++;
   1354	rbn = addressPXD(pxd);
   1355	rmp = get_metapage(ip, rbn, PSIZE, 1);
   1356	if (rmp == NULL)
   1357		return -EIO;
   1358
   1359	/* Allocate blocks to quota. */
   1360	rc = dquot_alloc_block(ip, lengthPXD(pxd));
   1361	if (rc) {
   1362		release_metapage(rmp);
   1363		return rc;
   1364	}
   1365
   1366	jfs_info("dtSplitPage: ip:0x%p smp:0x%p rmp:0x%p", ip, smp, rmp);
   1367
   1368	BT_MARK_DIRTY(rmp, ip);
   1369	/*
   1370	 * acquire a transaction lock on the new right page
   1371	 */
   1372	tlck = txLock(tid, ip, rmp, tlckDTREE | tlckNEW);
   1373	rdtlck = (struct dt_lock *) & tlck->lock;
   1374
   1375	rp = (dtpage_t *) rmp->data;
   1376	*rpp = rp;
   1377	rp->header.self = *pxd;
   1378
   1379	BT_MARK_DIRTY(smp, ip);
   1380	/*
   1381	 * acquire a transaction lock on the split page
   1382	 *
   1383	 * action:
   1384	 */
   1385	tlck = txLock(tid, ip, smp, tlckDTREE | tlckENTRY);
   1386	sdtlck = (struct dt_lock *) & tlck->lock;
   1387
   1388	/* linelock header of split page */
   1389	ASSERT(sdtlck->index == 0);
   1390	slv = & sdtlck->lv[0];
   1391	slv->offset = 0;
   1392	slv->length = 1;
   1393	sdtlck->index++;
   1394
   1395	/*
   1396	 * initialize/update sibling pointers between sp and rp
   1397	 */
   1398	nextbn = le64_to_cpu(sp->header.next);
   1399	rp->header.next = cpu_to_le64(nextbn);
   1400	rp->header.prev = cpu_to_le64(addressPXD(&sp->header.self));
   1401	sp->header.next = cpu_to_le64(rbn);
   1402
   1403	/*
   1404	 * initialize new right page
   1405	 */
   1406	rp->header.flag = sp->header.flag;
   1407
   1408	/* compute sorted entry table at start of extent data area */
   1409	rp->header.nextindex = 0;
   1410	rp->header.stblindex = 1;
   1411
   1412	n = PSIZE >> L2DTSLOTSIZE;
   1413	rp->header.maxslot = n;
   1414	stblsize = (n + 31) >> L2DTSLOTSIZE;	/* in unit of slot */
   1415
   1416	/* init freelist */
   1417	fsi = rp->header.stblindex + stblsize;
   1418	rp->header.freelist = fsi;
   1419	rp->header.freecnt = rp->header.maxslot - fsi;
   1420
   1421	/*
   1422	 *	sequential append at tail: append without split
   1423	 *
   1424	 * If splitting the last page on a level because of appending
   1425	 * a entry to it (skip is maxentry), it's likely that the access is
   1426	 * sequential. Adding an empty page on the side of the level is less
   1427	 * work and can push the fill factor much higher than normal.
   1428	 * If we're wrong it's no big deal, we'll just do the split the right
   1429	 * way next time.
   1430	 * (It may look like it's equally easy to do a similar hack for
   1431	 * reverse sorted data, that is, split the tree left,
   1432	 * but it's not. Be my guest.)
   1433	 */
   1434	if (nextbn == 0 && split->index == sp->header.nextindex) {
   1435		/* linelock header + stbl (first slot) of new page */
   1436		rlv = & rdtlck->lv[rdtlck->index];
   1437		rlv->offset = 0;
   1438		rlv->length = 2;
   1439		rdtlck->index++;
   1440
   1441		/*
   1442		 * initialize freelist of new right page
   1443		 */
   1444		f = &rp->slot[fsi];
   1445		for (fsi++; fsi < rp->header.maxslot; f++, fsi++)
   1446			f->next = fsi;
   1447		f->next = -1;
   1448
   1449		/* insert entry at the first entry of the new right page */
   1450		dtInsertEntry(rp, 0, split->key, split->data, &rdtlck);
   1451
   1452		goto out;
   1453	}
   1454
   1455	/*
   1456	 *	non-sequential insert (at possibly middle page)
   1457	 */
   1458
   1459	/*
   1460	 * update prev pointer of previous right sibling page;
   1461	 */
   1462	if (nextbn != 0) {
   1463		DT_GETPAGE(ip, nextbn, mp, PSIZE, p, rc);
   1464		if (rc) {
   1465			discard_metapage(rmp);
   1466			return rc;
   1467		}
   1468
   1469		BT_MARK_DIRTY(mp, ip);
   1470		/*
   1471		 * acquire a transaction lock on the next page
   1472		 */
   1473		tlck = txLock(tid, ip, mp, tlckDTREE | tlckRELINK);
   1474		jfs_info("dtSplitPage: tlck = 0x%p, ip = 0x%p, mp=0x%p",
   1475			tlck, ip, mp);
   1476		dtlck = (struct dt_lock *) & tlck->lock;
   1477
   1478		/* linelock header of previous right sibling page */
   1479		lv = & dtlck->lv[dtlck->index];
   1480		lv->offset = 0;
   1481		lv->length = 1;
   1482		dtlck->index++;
   1483
   1484		p->header.prev = cpu_to_le64(rbn);
   1485
   1486		DT_PUTPAGE(mp);
   1487	}
   1488
   1489	/*
   1490	 * split the data between the split and right pages.
   1491	 */
   1492	skip = split->index;
   1493	half = (PSIZE >> L2DTSLOTSIZE) >> 1;	/* swag */
   1494	left = 0;
   1495
   1496	/*
   1497	 *	compute fill factor for split pages
   1498	 *
   1499	 * <nxt> traces the next entry to move to rp
   1500	 * <off> traces the next entry to stay in sp
   1501	 */
   1502	stbl = (u8 *) & sp->slot[sp->header.stblindex];
   1503	nextindex = sp->header.nextindex;
   1504	for (nxt = off = 0; nxt < nextindex; ++off) {
   1505		if (off == skip)
   1506			/* check for fill factor with new entry size */
   1507			n = split->nslot;
   1508		else {
   1509			si = stbl[nxt];
   1510			switch (sp->header.flag & BT_TYPE) {
   1511			case BT_LEAF:
   1512				ldtentry = (struct ldtentry *) & sp->slot[si];
   1513				if (DO_INDEX(ip))
   1514					n = NDTLEAF(ldtentry->namlen);
   1515				else
   1516					n = NDTLEAF_LEGACY(ldtentry->
   1517							   namlen);
   1518				break;
   1519
   1520			case BT_INTERNAL:
   1521				idtentry = (struct idtentry *) & sp->slot[si];
   1522				n = NDTINTERNAL(idtentry->namlen);
   1523				break;
   1524
   1525			default:
   1526				break;
   1527			}
   1528
   1529			++nxt;	/* advance to next entry to move in sp */
   1530		}
   1531
   1532		left += n;
   1533		if (left >= half)
   1534			break;
   1535	}
   1536
   1537	/* <nxt> poins to the 1st entry to move */
   1538
   1539	/*
   1540	 *	move entries to right page
   1541	 *
   1542	 * dtMoveEntry() initializes rp and reserves entry for insertion
   1543	 *
   1544	 * split page moved out entries are linelocked;
   1545	 * new/right page moved in entries are linelocked;
   1546	 */
   1547	/* linelock header + stbl of new right page */
   1548	rlv = & rdtlck->lv[rdtlck->index];
   1549	rlv->offset = 0;
   1550	rlv->length = 5;
   1551	rdtlck->index++;
   1552
   1553	dtMoveEntry(sp, nxt, rp, &sdtlck, &rdtlck, DO_INDEX(ip));
   1554
   1555	sp->header.nextindex = nxt;
   1556
   1557	/*
   1558	 * finalize freelist of new right page
   1559	 */
   1560	fsi = rp->header.freelist;
   1561	f = &rp->slot[fsi];
   1562	for (fsi++; fsi < rp->header.maxslot; f++, fsi++)
   1563		f->next = fsi;
   1564	f->next = -1;
   1565
   1566	/*
   1567	 * Update directory index table for entries now in right page
   1568	 */
   1569	if ((rp->header.flag & BT_LEAF) && DO_INDEX(ip)) {
   1570		s64 lblock;
   1571
   1572		mp = NULL;
   1573		stbl = DT_GETSTBL(rp);
   1574		for (n = 0; n < rp->header.nextindex; n++) {
   1575			ldtentry = (struct ldtentry *) & rp->slot[stbl[n]];
   1576			modify_index(tid, ip, le32_to_cpu(ldtentry->index),
   1577				     rbn, n, &mp, &lblock);
   1578		}
   1579		if (mp)
   1580			release_metapage(mp);
   1581	}
   1582
   1583	/*
   1584	 * the skipped index was on the left page,
   1585	 */
   1586	if (skip <= off) {
   1587		/* insert the new entry in the split page */
   1588		dtInsertEntry(sp, skip, split->key, split->data, &sdtlck);
   1589
   1590		/* linelock stbl of split page */
   1591		if (sdtlck->index >= sdtlck->maxcnt)
   1592			sdtlck = (struct dt_lock *) txLinelock(sdtlck);
   1593		slv = & sdtlck->lv[sdtlck->index];
   1594		n = skip >> L2DTSLOTSIZE;
   1595		slv->offset = sp->header.stblindex + n;
   1596		slv->length =
   1597		    ((sp->header.nextindex - 1) >> L2DTSLOTSIZE) - n + 1;
   1598		sdtlck->index++;
   1599	}
   1600	/*
   1601	 * the skipped index was on the right page,
   1602	 */
   1603	else {
   1604		/* adjust the skip index to reflect the new position */
   1605		skip -= nxt;
   1606
   1607		/* insert the new entry in the right page */
   1608		dtInsertEntry(rp, skip, split->key, split->data, &rdtlck);
   1609	}
   1610
   1611      out:
   1612	*rmpp = rmp;
   1613	*rpxdp = *pxd;
   1614
   1615	return rc;
   1616}
   1617
   1618
   1619/*
   1620 *	dtExtendPage()
   1621 *
   1622 * function: extend 1st/only directory leaf page
   1623 *
   1624 * parameter:
   1625 *
   1626 * return: 0 - success;
   1627 *	   errno - failure;
   1628 *	return extended page pinned;
   1629 */
   1630static int dtExtendPage(tid_t tid,
   1631	     struct inode *ip, struct dtsplit * split, struct btstack * btstack)
   1632{
   1633	struct super_block *sb = ip->i_sb;
   1634	int rc;
   1635	struct metapage *smp, *pmp, *mp;
   1636	dtpage_t *sp, *pp;
   1637	struct pxdlist *pxdlist;
   1638	pxd_t *pxd, *tpxd;
   1639	int xlen, xsize;
   1640	int newstblindex, newstblsize;
   1641	int oldstblindex, oldstblsize;
   1642	int fsi, last;
   1643	struct dtslot *f;
   1644	struct btframe *parent;
   1645	int n;
   1646	struct dt_lock *dtlck;
   1647	s64 xaddr, txaddr;
   1648	struct tlock *tlck;
   1649	struct pxd_lock *pxdlock;
   1650	struct lv *lv;
   1651	uint type;
   1652	struct ldtentry *ldtentry;
   1653	u8 *stbl;
   1654
   1655	/* get page to extend */
   1656	smp = split->mp;
   1657	sp = DT_PAGE(ip, smp);
   1658
   1659	/* get parent/root page */
   1660	parent = BT_POP(btstack);
   1661	DT_GETPAGE(ip, parent->bn, pmp, PSIZE, pp, rc);
   1662	if (rc)
   1663		return (rc);
   1664
   1665	/*
   1666	 *	extend the extent
   1667	 */
   1668	pxdlist = split->pxdlist;
   1669	pxd = &pxdlist->pxd[pxdlist->npxd];
   1670	pxdlist->npxd++;
   1671
   1672	xaddr = addressPXD(pxd);
   1673	tpxd = &sp->header.self;
   1674	txaddr = addressPXD(tpxd);
   1675	/* in-place extension */
   1676	if (xaddr == txaddr) {
   1677		type = tlckEXTEND;
   1678	}
   1679	/* relocation */
   1680	else {
   1681		type = tlckNEW;
   1682
   1683		/* save moved extent descriptor for later free */
   1684		tlck = txMaplock(tid, ip, tlckDTREE | tlckRELOCATE);
   1685		pxdlock = (struct pxd_lock *) & tlck->lock;
   1686		pxdlock->flag = mlckFREEPXD;
   1687		pxdlock->pxd = sp->header.self;
   1688		pxdlock->index = 1;
   1689
   1690		/*
   1691		 * Update directory index table to reflect new page address
   1692		 */
   1693		if (DO_INDEX(ip)) {
   1694			s64 lblock;
   1695
   1696			mp = NULL;
   1697			stbl = DT_GETSTBL(sp);
   1698			for (n = 0; n < sp->header.nextindex; n++) {
   1699				ldtentry =
   1700				    (struct ldtentry *) & sp->slot[stbl[n]];
   1701				modify_index(tid, ip,
   1702					     le32_to_cpu(ldtentry->index),
   1703					     xaddr, n, &mp, &lblock);
   1704			}
   1705			if (mp)
   1706				release_metapage(mp);
   1707		}
   1708	}
   1709
   1710	/*
   1711	 *	extend the page
   1712	 */
   1713	sp->header.self = *pxd;
   1714
   1715	jfs_info("dtExtendPage: ip:0x%p smp:0x%p sp:0x%p", ip, smp, sp);
   1716
   1717	BT_MARK_DIRTY(smp, ip);
   1718	/*
   1719	 * acquire a transaction lock on the extended/leaf page
   1720	 */
   1721	tlck = txLock(tid, ip, smp, tlckDTREE | type);
   1722	dtlck = (struct dt_lock *) & tlck->lock;
   1723	lv = & dtlck->lv[0];
   1724
   1725	/* update buffer extent descriptor of extended page */
   1726	xlen = lengthPXD(pxd);
   1727	xsize = xlen << JFS_SBI(sb)->l2bsize;
   1728
   1729	/*
   1730	 * copy old stbl to new stbl at start of extended area
   1731	 */
   1732	oldstblindex = sp->header.stblindex;
   1733	oldstblsize = (sp->header.maxslot + 31) >> L2DTSLOTSIZE;
   1734	newstblindex = sp->header.maxslot;
   1735	n = xsize >> L2DTSLOTSIZE;
   1736	newstblsize = (n + 31) >> L2DTSLOTSIZE;
   1737	memcpy(&sp->slot[newstblindex], &sp->slot[oldstblindex],
   1738	       sp->header.nextindex);
   1739
   1740	/*
   1741	 * in-line extension: linelock old area of extended page
   1742	 */
   1743	if (type == tlckEXTEND) {
   1744		/* linelock header */
   1745		lv->offset = 0;
   1746		lv->length = 1;
   1747		dtlck->index++;
   1748		lv++;
   1749
   1750		/* linelock new stbl of extended page */
   1751		lv->offset = newstblindex;
   1752		lv->length = newstblsize;
   1753	}
   1754	/*
   1755	 * relocation: linelock whole relocated area
   1756	 */
   1757	else {
   1758		lv->offset = 0;
   1759		lv->length = sp->header.maxslot + newstblsize;
   1760	}
   1761
   1762	dtlck->index++;
   1763
   1764	sp->header.maxslot = n;
   1765	sp->header.stblindex = newstblindex;
   1766	/* sp->header.nextindex remains the same */
   1767
   1768	/*
   1769	 * add old stbl region at head of freelist
   1770	 */
   1771	fsi = oldstblindex;
   1772	f = &sp->slot[fsi];
   1773	last = sp->header.freelist;
   1774	for (n = 0; n < oldstblsize; n++, fsi++, f++) {
   1775		f->next = last;
   1776		last = fsi;
   1777	}
   1778	sp->header.freelist = last;
   1779	sp->header.freecnt += oldstblsize;
   1780
   1781	/*
   1782	 * append free region of newly extended area at tail of freelist
   1783	 */
   1784	/* init free region of newly extended area */
   1785	fsi = n = newstblindex + newstblsize;
   1786	f = &sp->slot[fsi];
   1787	for (fsi++; fsi < sp->header.maxslot; f++, fsi++)
   1788		f->next = fsi;
   1789	f->next = -1;
   1790
   1791	/* append new free region at tail of old freelist */
   1792	fsi = sp->header.freelist;
   1793	if (fsi == -1)
   1794		sp->header.freelist = n;
   1795	else {
   1796		do {
   1797			f = &sp->slot[fsi];
   1798			fsi = f->next;
   1799		} while (fsi != -1);
   1800
   1801		f->next = n;
   1802	}
   1803
   1804	sp->header.freecnt += sp->header.maxslot - n;
   1805
   1806	/*
   1807	 * insert the new entry
   1808	 */
   1809	dtInsertEntry(sp, split->index, split->key, split->data, &dtlck);
   1810
   1811	BT_MARK_DIRTY(pmp, ip);
   1812	/*
   1813	 * linelock any freeslots residing in old extent
   1814	 */
   1815	if (type == tlckEXTEND) {
   1816		n = sp->header.maxslot >> 2;
   1817		if (sp->header.freelist < n)
   1818			dtLinelockFreelist(sp, n, &dtlck);
   1819	}
   1820
   1821	/*
   1822	 *	update parent entry on the parent/root page
   1823	 */
   1824	/*
   1825	 * acquire a transaction lock on the parent/root page
   1826	 */
   1827	tlck = txLock(tid, ip, pmp, tlckDTREE | tlckENTRY);
   1828	dtlck = (struct dt_lock *) & tlck->lock;
   1829	lv = & dtlck->lv[dtlck->index];
   1830
   1831	/* linelock parent entry - 1st slot */
   1832	lv->offset = 1;
   1833	lv->length = 1;
   1834	dtlck->index++;
   1835
   1836	/* update the parent pxd for page extension */
   1837	tpxd = (pxd_t *) & pp->slot[1];
   1838	*tpxd = *pxd;
   1839
   1840	DT_PUTPAGE(pmp);
   1841	return 0;
   1842}
   1843
   1844
   1845/*
   1846 *	dtSplitRoot()
   1847 *
   1848 * function:
   1849 *	split the full root page into
   1850 *	original/root/split page and new right page
   1851 *	i.e., root remains fixed in tree anchor (inode) and
   1852 *	the root is copied to a single new right child page
   1853 *	since root page << non-root page, and
   1854 *	the split root page contains a single entry for the
   1855 *	new right child page.
   1856 *
   1857 * parameter:
   1858 *
   1859 * return: 0 - success;
   1860 *	   errno - failure;
   1861 *	return new page pinned;
   1862 */
   1863static int dtSplitRoot(tid_t tid,
   1864	    struct inode *ip, struct dtsplit * split, struct metapage ** rmpp)
   1865{
   1866	struct super_block *sb = ip->i_sb;
   1867	struct metapage *smp;
   1868	dtroot_t *sp;
   1869	struct metapage *rmp;
   1870	dtpage_t *rp;
   1871	s64 rbn;
   1872	int xlen;
   1873	int xsize;
   1874	struct dtslot *f;
   1875	s8 *stbl;
   1876	int fsi, stblsize, n;
   1877	struct idtentry *s;
   1878	pxd_t *ppxd;
   1879	struct pxdlist *pxdlist;
   1880	pxd_t *pxd;
   1881	struct dt_lock *dtlck;
   1882	struct tlock *tlck;
   1883	struct lv *lv;
   1884	int rc;
   1885
   1886	/* get split root page */
   1887	smp = split->mp;
   1888	sp = &JFS_IP(ip)->i_dtroot;
   1889
   1890	/*
   1891	 *	allocate/initialize a single (right) child page
   1892	 *
   1893	 * N.B. at first split, a one (or two) block to fit new entry
   1894	 * is allocated; at subsequent split, a full page is allocated;
   1895	 */
   1896	pxdlist = split->pxdlist;
   1897	pxd = &pxdlist->pxd[pxdlist->npxd];
   1898	pxdlist->npxd++;
   1899	rbn = addressPXD(pxd);
   1900	xlen = lengthPXD(pxd);
   1901	xsize = xlen << JFS_SBI(sb)->l2bsize;
   1902	rmp = get_metapage(ip, rbn, xsize, 1);
   1903	if (!rmp)
   1904		return -EIO;
   1905
   1906	rp = rmp->data;
   1907
   1908	/* Allocate blocks to quota. */
   1909	rc = dquot_alloc_block(ip, lengthPXD(pxd));
   1910	if (rc) {
   1911		release_metapage(rmp);
   1912		return rc;
   1913	}
   1914
   1915	BT_MARK_DIRTY(rmp, ip);
   1916	/*
   1917	 * acquire a transaction lock on the new right page
   1918	 */
   1919	tlck = txLock(tid, ip, rmp, tlckDTREE | tlckNEW);
   1920	dtlck = (struct dt_lock *) & tlck->lock;
   1921
   1922	rp->header.flag =
   1923	    (sp->header.flag & BT_LEAF) ? BT_LEAF : BT_INTERNAL;
   1924	rp->header.self = *pxd;
   1925
   1926	/* initialize sibling pointers */
   1927	rp->header.next = 0;
   1928	rp->header.prev = 0;
   1929
   1930	/*
   1931	 *	move in-line root page into new right page extent
   1932	 */
   1933	/* linelock header + copied entries + new stbl (1st slot) in new page */
   1934	ASSERT(dtlck->index == 0);
   1935	lv = & dtlck->lv[0];
   1936	lv->offset = 0;
   1937	lv->length = 10;	/* 1 + 8 + 1 */
   1938	dtlck->index++;
   1939
   1940	n = xsize >> L2DTSLOTSIZE;
   1941	rp->header.maxslot = n;
   1942	stblsize = (n + 31) >> L2DTSLOTSIZE;
   1943
   1944	/* copy old stbl to new stbl at start of extended area */
   1945	rp->header.stblindex = DTROOTMAXSLOT;
   1946	stbl = (s8 *) & rp->slot[DTROOTMAXSLOT];
   1947	memcpy(stbl, sp->header.stbl, sp->header.nextindex);
   1948	rp->header.nextindex = sp->header.nextindex;
   1949
   1950	/* copy old data area to start of new data area */
   1951	memcpy(&rp->slot[1], &sp->slot[1], IDATASIZE);
   1952
   1953	/*
   1954	 * append free region of newly extended area at tail of freelist
   1955	 */
   1956	/* init free region of newly extended area */
   1957	fsi = n = DTROOTMAXSLOT + stblsize;
   1958	f = &rp->slot[fsi];
   1959	for (fsi++; fsi < rp->header.maxslot; f++, fsi++)
   1960		f->next = fsi;
   1961	f->next = -1;
   1962
   1963	/* append new free region at tail of old freelist */
   1964	fsi = sp->header.freelist;
   1965	if (fsi == -1)
   1966		rp->header.freelist = n;
   1967	else {
   1968		rp->header.freelist = fsi;
   1969
   1970		do {
   1971			f = &rp->slot[fsi];
   1972			fsi = f->next;
   1973		} while (fsi != -1);
   1974
   1975		f->next = n;
   1976	}
   1977
   1978	rp->header.freecnt = sp->header.freecnt + rp->header.maxslot - n;
   1979
   1980	/*
   1981	 * Update directory index table for entries now in right page
   1982	 */
   1983	if ((rp->header.flag & BT_LEAF) && DO_INDEX(ip)) {
   1984		s64 lblock;
   1985		struct metapage *mp = NULL;
   1986		struct ldtentry *ldtentry;
   1987
   1988		stbl = DT_GETSTBL(rp);
   1989		for (n = 0; n < rp->header.nextindex; n++) {
   1990			ldtentry = (struct ldtentry *) & rp->slot[stbl[n]];
   1991			modify_index(tid, ip, le32_to_cpu(ldtentry->index),
   1992				     rbn, n, &mp, &lblock);
   1993		}
   1994		if (mp)
   1995			release_metapage(mp);
   1996	}
   1997	/*
   1998	 * insert the new entry into the new right/child page
   1999	 * (skip index in the new right page will not change)
   2000	 */
   2001	dtInsertEntry(rp, split->index, split->key, split->data, &dtlck);
   2002
   2003	/*
   2004	 *	reset parent/root page
   2005	 *
   2006	 * set the 1st entry offset to 0, which force the left-most key
   2007	 * at any level of the tree to be less than any search key.
   2008	 *
   2009	 * The btree comparison code guarantees that the left-most key on any
   2010	 * level of the tree is never used, so it doesn't need to be filled in.
   2011	 */
   2012	BT_MARK_DIRTY(smp, ip);
   2013	/*
   2014	 * acquire a transaction lock on the root page (in-memory inode)
   2015	 */
   2016	tlck = txLock(tid, ip, smp, tlckDTREE | tlckNEW | tlckBTROOT);
   2017	dtlck = (struct dt_lock *) & tlck->lock;
   2018
   2019	/* linelock root */
   2020	ASSERT(dtlck->index == 0);
   2021	lv = & dtlck->lv[0];
   2022	lv->offset = 0;
   2023	lv->length = DTROOTMAXSLOT;
   2024	dtlck->index++;
   2025
   2026	/* update page header of root */
   2027	if (sp->header.flag & BT_LEAF) {
   2028		sp->header.flag &= ~BT_LEAF;
   2029		sp->header.flag |= BT_INTERNAL;
   2030	}
   2031
   2032	/* init the first entry */
   2033	s = (struct idtentry *) & sp->slot[DTENTRYSTART];
   2034	ppxd = (pxd_t *) s;
   2035	*ppxd = *pxd;
   2036	s->next = -1;
   2037	s->namlen = 0;
   2038
   2039	stbl = sp->header.stbl;
   2040	stbl[0] = DTENTRYSTART;
   2041	sp->header.nextindex = 1;
   2042
   2043	/* init freelist */
   2044	fsi = DTENTRYSTART + 1;
   2045	f = &sp->slot[fsi];
   2046
   2047	/* init free region of remaining area */
   2048	for (fsi++; fsi < DTROOTMAXSLOT; f++, fsi++)
   2049		f->next = fsi;
   2050	f->next = -1;
   2051
   2052	sp->header.freelist = DTENTRYSTART + 1;
   2053	sp->header.freecnt = DTROOTMAXSLOT - (DTENTRYSTART + 1);
   2054
   2055	*rmpp = rmp;
   2056
   2057	return 0;
   2058}
   2059
   2060
   2061/*
   2062 *	dtDelete()
   2063 *
   2064 * function: delete the entry(s) referenced by a key.
   2065 *
   2066 * parameter:
   2067 *
   2068 * return:
   2069 */
   2070int dtDelete(tid_t tid,
   2071	 struct inode *ip, struct component_name * key, ino_t * ino, int flag)
   2072{
   2073	int rc = 0;
   2074	s64 bn;
   2075	struct metapage *mp, *imp;
   2076	dtpage_t *p;
   2077	int index;
   2078	struct btstack btstack;
   2079	struct dt_lock *dtlck;
   2080	struct tlock *tlck;
   2081	struct lv *lv;
   2082	int i;
   2083	struct ldtentry *ldtentry;
   2084	u8 *stbl;
   2085	u32 table_index, next_index;
   2086	struct metapage *nmp;
   2087	dtpage_t *np;
   2088
   2089	/*
   2090	 *	search for the entry to delete:
   2091	 *
   2092	 * dtSearch() returns (leaf page pinned, index at which to delete).
   2093	 */
   2094	if ((rc = dtSearch(ip, key, ino, &btstack, flag)))
   2095		return rc;
   2096
   2097	/* retrieve search result */
   2098	DT_GETSEARCH(ip, btstack.top, bn, mp, p, index);
   2099
   2100	/*
   2101	 * We need to find put the index of the next entry into the
   2102	 * directory index table in order to resume a readdir from this
   2103	 * entry.
   2104	 */
   2105	if (DO_INDEX(ip)) {
   2106		stbl = DT_GETSTBL(p);
   2107		ldtentry = (struct ldtentry *) & p->slot[stbl[index]];
   2108		table_index = le32_to_cpu(ldtentry->index);
   2109		if (index == (p->header.nextindex - 1)) {
   2110			/*
   2111			 * Last entry in this leaf page
   2112			 */
   2113			if ((p->header.flag & BT_ROOT)
   2114			    || (p->header.next == 0))
   2115				next_index = -1;
   2116			else {
   2117				/* Read next leaf page */
   2118				DT_GETPAGE(ip, le64_to_cpu(p->header.next),
   2119					   nmp, PSIZE, np, rc);
   2120				if (rc)
   2121					next_index = -1;
   2122				else {
   2123					stbl = DT_GETSTBL(np);
   2124					ldtentry =
   2125					    (struct ldtentry *) & np->
   2126					    slot[stbl[0]];
   2127					next_index =
   2128					    le32_to_cpu(ldtentry->index);
   2129					DT_PUTPAGE(nmp);
   2130				}
   2131			}
   2132		} else {
   2133			ldtentry =
   2134			    (struct ldtentry *) & p->slot[stbl[index + 1]];
   2135			next_index = le32_to_cpu(ldtentry->index);
   2136		}
   2137		free_index(tid, ip, table_index, next_index);
   2138	}
   2139	/*
   2140	 * the leaf page becomes empty, delete the page
   2141	 */
   2142	if (p->header.nextindex == 1) {
   2143		/* delete empty page */
   2144		rc = dtDeleteUp(tid, ip, mp, p, &btstack);
   2145	}
   2146	/*
   2147	 * the leaf page has other entries remaining:
   2148	 *
   2149	 * delete the entry from the leaf page.
   2150	 */
   2151	else {
   2152		BT_MARK_DIRTY(mp, ip);
   2153		/*
   2154		 * acquire a transaction lock on the leaf page
   2155		 */
   2156		tlck = txLock(tid, ip, mp, tlckDTREE | tlckENTRY);
   2157		dtlck = (struct dt_lock *) & tlck->lock;
   2158
   2159		/*
   2160		 * Do not assume that dtlck->index will be zero.  During a
   2161		 * rename within a directory, this transaction may have
   2162		 * modified this page already when adding the new entry.
   2163		 */
   2164
   2165		/* linelock header */
   2166		if (dtlck->index >= dtlck->maxcnt)
   2167			dtlck = (struct dt_lock *) txLinelock(dtlck);
   2168		lv = & dtlck->lv[dtlck->index];
   2169		lv->offset = 0;
   2170		lv->length = 1;
   2171		dtlck->index++;
   2172
   2173		/* linelock stbl of non-root leaf page */
   2174		if (!(p->header.flag & BT_ROOT)) {
   2175			if (dtlck->index >= dtlck->maxcnt)
   2176				dtlck = (struct dt_lock *) txLinelock(dtlck);
   2177			lv = & dtlck->lv[dtlck->index];
   2178			i = index >> L2DTSLOTSIZE;
   2179			lv->offset = p->header.stblindex + i;
   2180			lv->length =
   2181			    ((p->header.nextindex - 1) >> L2DTSLOTSIZE) -
   2182			    i + 1;
   2183			dtlck->index++;
   2184		}
   2185
   2186		/* free the leaf entry */
   2187		dtDeleteEntry(p, index, &dtlck);
   2188
   2189		/*
   2190		 * Update directory index table for entries moved in stbl
   2191		 */
   2192		if (DO_INDEX(ip) && index < p->header.nextindex) {
   2193			s64 lblock;
   2194
   2195			imp = NULL;
   2196			stbl = DT_GETSTBL(p);
   2197			for (i = index; i < p->header.nextindex; i++) {
   2198				ldtentry =
   2199				    (struct ldtentry *) & p->slot[stbl[i]];
   2200				modify_index(tid, ip,
   2201					     le32_to_cpu(ldtentry->index),
   2202					     bn, i, &imp, &lblock);
   2203			}
   2204			if (imp)
   2205				release_metapage(imp);
   2206		}
   2207
   2208		DT_PUTPAGE(mp);
   2209	}
   2210
   2211	return rc;
   2212}
   2213
   2214
   2215/*
   2216 *	dtDeleteUp()
   2217 *
   2218 * function:
   2219 *	free empty pages as propagating deletion up the tree
   2220 *
   2221 * parameter:
   2222 *
   2223 * return:
   2224 */
   2225static int dtDeleteUp(tid_t tid, struct inode *ip,
   2226	   struct metapage * fmp, dtpage_t * fp, struct btstack * btstack)
   2227{
   2228	int rc = 0;
   2229	struct metapage *mp;
   2230	dtpage_t *p;
   2231	int index, nextindex;
   2232	int xlen;
   2233	struct btframe *parent;
   2234	struct dt_lock *dtlck;
   2235	struct tlock *tlck;
   2236	struct lv *lv;
   2237	struct pxd_lock *pxdlock;
   2238	int i;
   2239
   2240	/*
   2241	 *	keep the root leaf page which has become empty
   2242	 */
   2243	if (BT_IS_ROOT(fmp)) {
   2244		/*
   2245		 * reset the root
   2246		 *
   2247		 * dtInitRoot() acquires txlock on the root
   2248		 */
   2249		dtInitRoot(tid, ip, PARENT(ip));
   2250
   2251		DT_PUTPAGE(fmp);
   2252
   2253		return 0;
   2254	}
   2255
   2256	/*
   2257	 *	free the non-root leaf page
   2258	 */
   2259	/*
   2260	 * acquire a transaction lock on the page
   2261	 *
   2262	 * write FREEXTENT|NOREDOPAGE log record
   2263	 * N.B. linelock is overlaid as freed extent descriptor, and
   2264	 * the buffer page is freed;
   2265	 */
   2266	tlck = txMaplock(tid, ip, tlckDTREE | tlckFREE);
   2267	pxdlock = (struct pxd_lock *) & tlck->lock;
   2268	pxdlock->flag = mlckFREEPXD;
   2269	pxdlock->pxd = fp->header.self;
   2270	pxdlock->index = 1;
   2271
   2272	/* update sibling pointers */
   2273	if ((rc = dtRelink(tid, ip, fp))) {
   2274		BT_PUTPAGE(fmp);
   2275		return rc;
   2276	}
   2277
   2278	xlen = lengthPXD(&fp->header.self);
   2279
   2280	/* Free quota allocation. */
   2281	dquot_free_block(ip, xlen);
   2282
   2283	/* free/invalidate its buffer page */
   2284	discard_metapage(fmp);
   2285
   2286	/*
   2287	 *	propagate page deletion up the directory tree
   2288	 *
   2289	 * If the delete from the parent page makes it empty,
   2290	 * continue all the way up the tree.
   2291	 * stop if the root page is reached (which is never deleted) or
   2292	 * if the entry deletion does not empty the page.
   2293	 */
   2294	while ((parent = BT_POP(btstack)) != NULL) {
   2295		/* pin the parent page <sp> */
   2296		DT_GETPAGE(ip, parent->bn, mp, PSIZE, p, rc);
   2297		if (rc)
   2298			return rc;
   2299
   2300		/*
   2301		 * free the extent of the child page deleted
   2302		 */
   2303		index = parent->index;
   2304
   2305		/*
   2306		 * delete the entry for the child page from parent
   2307		 */
   2308		nextindex = p->header.nextindex;
   2309
   2310		/*
   2311		 * the parent has the single entry being deleted:
   2312		 *
   2313		 * free the parent page which has become empty.
   2314		 */
   2315		if (nextindex == 1) {
   2316			/*
   2317			 * keep the root internal page which has become empty
   2318			 */
   2319			if (p->header.flag & BT_ROOT) {
   2320				/*
   2321				 * reset the root
   2322				 *
   2323				 * dtInitRoot() acquires txlock on the root
   2324				 */
   2325				dtInitRoot(tid, ip, PARENT(ip));
   2326
   2327				DT_PUTPAGE(mp);
   2328
   2329				return 0;
   2330			}
   2331			/*
   2332			 * free the parent page
   2333			 */
   2334			else {
   2335				/*
   2336				 * acquire a transaction lock on the page
   2337				 *
   2338				 * write FREEXTENT|NOREDOPAGE log record
   2339				 */
   2340				tlck =
   2341				    txMaplock(tid, ip,
   2342					      tlckDTREE | tlckFREE);
   2343				pxdlock = (struct pxd_lock *) & tlck->lock;
   2344				pxdlock->flag = mlckFREEPXD;
   2345				pxdlock->pxd = p->header.self;
   2346				pxdlock->index = 1;
   2347
   2348				/* update sibling pointers */
   2349				if ((rc = dtRelink(tid, ip, p))) {
   2350					DT_PUTPAGE(mp);
   2351					return rc;
   2352				}
   2353
   2354				xlen = lengthPXD(&p->header.self);
   2355
   2356				/* Free quota allocation */
   2357				dquot_free_block(ip, xlen);
   2358
   2359				/* free/invalidate its buffer page */
   2360				discard_metapage(mp);
   2361
   2362				/* propagate up */
   2363				continue;
   2364			}
   2365		}
   2366
   2367		/*
   2368		 * the parent has other entries remaining:
   2369		 *
   2370		 * delete the router entry from the parent page.
   2371		 */
   2372		BT_MARK_DIRTY(mp, ip);
   2373		/*
   2374		 * acquire a transaction lock on the page
   2375		 *
   2376		 * action: router entry deletion
   2377		 */
   2378		tlck = txLock(tid, ip, mp, tlckDTREE | tlckENTRY);
   2379		dtlck = (struct dt_lock *) & tlck->lock;
   2380
   2381		/* linelock header */
   2382		if (dtlck->index >= dtlck->maxcnt)
   2383			dtlck = (struct dt_lock *) txLinelock(dtlck);
   2384		lv = & dtlck->lv[dtlck->index];
   2385		lv->offset = 0;
   2386		lv->length = 1;
   2387		dtlck->index++;
   2388
   2389		/* linelock stbl of non-root leaf page */
   2390		if (!(p->header.flag & BT_ROOT)) {
   2391			if (dtlck->index < dtlck->maxcnt)
   2392				lv++;
   2393			else {
   2394				dtlck = (struct dt_lock *) txLinelock(dtlck);
   2395				lv = & dtlck->lv[0];
   2396			}
   2397			i = index >> L2DTSLOTSIZE;
   2398			lv->offset = p->header.stblindex + i;
   2399			lv->length =
   2400			    ((p->header.nextindex - 1) >> L2DTSLOTSIZE) -
   2401			    i + 1;
   2402			dtlck->index++;
   2403		}
   2404
   2405		/* free the router entry */
   2406		dtDeleteEntry(p, index, &dtlck);
   2407
   2408		/* reset key of new leftmost entry of level (for consistency) */
   2409		if (index == 0 &&
   2410		    ((p->header.flag & BT_ROOT) || p->header.prev == 0))
   2411			dtTruncateEntry(p, 0, &dtlck);
   2412
   2413		/* unpin the parent page */
   2414		DT_PUTPAGE(mp);
   2415
   2416		/* exit propagation up */
   2417		break;
   2418	}
   2419
   2420	if (!DO_INDEX(ip))
   2421		ip->i_size -= PSIZE;
   2422
   2423	return 0;
   2424}
   2425
   2426/*
   2427 *	dtRelink()
   2428 *
   2429 * function:
   2430 *	link around a freed page.
   2431 *
   2432 * parameter:
   2433 *	fp:	page to be freed
   2434 *
   2435 * return:
   2436 */
   2437static int dtRelink(tid_t tid, struct inode *ip, dtpage_t * p)
   2438{
   2439	int rc;
   2440	struct metapage *mp;
   2441	s64 nextbn, prevbn;
   2442	struct tlock *tlck;
   2443	struct dt_lock *dtlck;
   2444	struct lv *lv;
   2445
   2446	nextbn = le64_to_cpu(p->header.next);
   2447	prevbn = le64_to_cpu(p->header.prev);
   2448
   2449	/* update prev pointer of the next page */
   2450	if (nextbn != 0) {
   2451		DT_GETPAGE(ip, nextbn, mp, PSIZE, p, rc);
   2452		if (rc)
   2453			return rc;
   2454
   2455		BT_MARK_DIRTY(mp, ip);
   2456		/*
   2457		 * acquire a transaction lock on the next page
   2458		 *
   2459		 * action: update prev pointer;
   2460		 */
   2461		tlck = txLock(tid, ip, mp, tlckDTREE | tlckRELINK);
   2462		jfs_info("dtRelink nextbn: tlck = 0x%p, ip = 0x%p, mp=0x%p",
   2463			tlck, ip, mp);
   2464		dtlck = (struct dt_lock *) & tlck->lock;
   2465
   2466		/* linelock header */
   2467		if (dtlck->index >= dtlck->maxcnt)
   2468			dtlck = (struct dt_lock *) txLinelock(dtlck);
   2469		lv = & dtlck->lv[dtlck->index];
   2470		lv->offset = 0;
   2471		lv->length = 1;
   2472		dtlck->index++;
   2473
   2474		p->header.prev = cpu_to_le64(prevbn);
   2475		DT_PUTPAGE(mp);
   2476	}
   2477
   2478	/* update next pointer of the previous page */
   2479	if (prevbn != 0) {
   2480		DT_GETPAGE(ip, prevbn, mp, PSIZE, p, rc);
   2481		if (rc)
   2482			return rc;
   2483
   2484		BT_MARK_DIRTY(mp, ip);
   2485		/*
   2486		 * acquire a transaction lock on the prev page
   2487		 *
   2488		 * action: update next pointer;
   2489		 */
   2490		tlck = txLock(tid, ip, mp, tlckDTREE | tlckRELINK);
   2491		jfs_info("dtRelink prevbn: tlck = 0x%p, ip = 0x%p, mp=0x%p",
   2492			tlck, ip, mp);
   2493		dtlck = (struct dt_lock *) & tlck->lock;
   2494
   2495		/* linelock header */
   2496		if (dtlck->index >= dtlck->maxcnt)
   2497			dtlck = (struct dt_lock *) txLinelock(dtlck);
   2498		lv = & dtlck->lv[dtlck->index];
   2499		lv->offset = 0;
   2500		lv->length = 1;
   2501		dtlck->index++;
   2502
   2503		p->header.next = cpu_to_le64(nextbn);
   2504		DT_PUTPAGE(mp);
   2505	}
   2506
   2507	return 0;
   2508}
   2509
   2510
   2511/*
   2512 *	dtInitRoot()
   2513 *
   2514 * initialize directory root (inline in inode)
   2515 */
   2516void dtInitRoot(tid_t tid, struct inode *ip, u32 idotdot)
   2517{
   2518	struct jfs_inode_info *jfs_ip = JFS_IP(ip);
   2519	dtroot_t *p;
   2520	int fsi;
   2521	struct dtslot *f;
   2522	struct tlock *tlck;
   2523	struct dt_lock *dtlck;
   2524	struct lv *lv;
   2525	u16 xflag_save;
   2526
   2527	/*
   2528	 * If this was previously an non-empty directory, we need to remove
   2529	 * the old directory table.
   2530	 */
   2531	if (DO_INDEX(ip)) {
   2532		if (!jfs_dirtable_inline(ip)) {
   2533			struct tblock *tblk = tid_to_tblock(tid);
   2534			/*
   2535			 * We're playing games with the tid's xflag.  If
   2536			 * we're removing a regular file, the file's xtree
   2537			 * is committed with COMMIT_PMAP, but we always
   2538			 * commit the directories xtree with COMMIT_PWMAP.
   2539			 */
   2540			xflag_save = tblk->xflag;
   2541			tblk->xflag = 0;
   2542			/*
   2543			 * xtTruncate isn't guaranteed to fully truncate
   2544			 * the xtree.  The caller needs to check i_size
   2545			 * after committing the transaction to see if
   2546			 * additional truncation is needed.  The
   2547			 * COMMIT_Stale flag tells caller that we
   2548			 * initiated the truncation.
   2549			 */
   2550			xtTruncate(tid, ip, 0, COMMIT_PWMAP);
   2551			set_cflag(COMMIT_Stale, ip);
   2552
   2553			tblk->xflag = xflag_save;
   2554		} else
   2555			ip->i_size = 1;
   2556
   2557		jfs_ip->next_index = 2;
   2558	} else
   2559		ip->i_size = IDATASIZE;
   2560
   2561	/*
   2562	 * acquire a transaction lock on the root
   2563	 *
   2564	 * action: directory initialization;
   2565	 */
   2566	tlck = txLock(tid, ip, (struct metapage *) & jfs_ip->bxflag,
   2567		      tlckDTREE | tlckENTRY | tlckBTROOT);
   2568	dtlck = (struct dt_lock *) & tlck->lock;
   2569
   2570	/* linelock root */
   2571	ASSERT(dtlck->index == 0);
   2572	lv = & dtlck->lv[0];
   2573	lv->offset = 0;
   2574	lv->length = DTROOTMAXSLOT;
   2575	dtlck->index++;
   2576
   2577	p = &jfs_ip->i_dtroot;
   2578
   2579	p->header.flag = DXD_INDEX | BT_ROOT | BT_LEAF;
   2580
   2581	p->header.nextindex = 0;
   2582
   2583	/* init freelist */
   2584	fsi = 1;
   2585	f = &p->slot[fsi];
   2586
   2587	/* init data area of root */
   2588	for (fsi++; fsi < DTROOTMAXSLOT; f++, fsi++)
   2589		f->next = fsi;
   2590	f->next = -1;
   2591
   2592	p->header.freelist = 1;
   2593	p->header.freecnt = 8;
   2594
   2595	/* init '..' entry */
   2596	p->header.idotdot = cpu_to_le32(idotdot);
   2597
   2598	return;
   2599}
   2600
   2601/*
   2602 *	add_missing_indices()
   2603 *
   2604 * function: Fix dtree page in which one or more entries has an invalid index.
   2605 *	     fsck.jfs should really fix this, but it currently does not.
   2606 *	     Called from jfs_readdir when bad index is detected.
   2607 */
   2608static void add_missing_indices(struct inode *inode, s64 bn)
   2609{
   2610	struct ldtentry *d;
   2611	struct dt_lock *dtlck;
   2612	int i;
   2613	uint index;
   2614	struct lv *lv;
   2615	struct metapage *mp;
   2616	dtpage_t *p;
   2617	int rc;
   2618	s8 *stbl;
   2619	tid_t tid;
   2620	struct tlock *tlck;
   2621
   2622	tid = txBegin(inode->i_sb, 0);
   2623
   2624	DT_GETPAGE(inode, bn, mp, PSIZE, p, rc);
   2625
   2626	if (rc) {
   2627		printk(KERN_ERR "DT_GETPAGE failed!\n");
   2628		goto end;
   2629	}
   2630	BT_MARK_DIRTY(mp, inode);
   2631
   2632	ASSERT(p->header.flag & BT_LEAF);
   2633
   2634	tlck = txLock(tid, inode, mp, tlckDTREE | tlckENTRY);
   2635	if (BT_IS_ROOT(mp))
   2636		tlck->type |= tlckBTROOT;
   2637
   2638	dtlck = (struct dt_lock *) &tlck->lock;
   2639
   2640	stbl = DT_GETSTBL(p);
   2641	for (i = 0; i < p->header.nextindex; i++) {
   2642		d = (struct ldtentry *) &p->slot[stbl[i]];
   2643		index = le32_to_cpu(d->index);
   2644		if ((index < 2) || (index >= JFS_IP(inode)->next_index)) {
   2645			d->index = cpu_to_le32(add_index(tid, inode, bn, i));
   2646			if (dtlck->index >= dtlck->maxcnt)
   2647				dtlck = (struct dt_lock *) txLinelock(dtlck);
   2648			lv = &dtlck->lv[dtlck->index];
   2649			lv->offset = stbl[i];
   2650			lv->length = 1;
   2651			dtlck->index++;
   2652		}
   2653	}
   2654
   2655	DT_PUTPAGE(mp);
   2656	(void) txCommit(tid, 1, &inode, 0);
   2657end:
   2658	txEnd(tid);
   2659}
   2660
   2661/*
   2662 * Buffer to hold directory entry info while traversing a dtree page
   2663 * before being fed to the filldir function
   2664 */
   2665struct jfs_dirent {
   2666	loff_t position;
   2667	int ino;
   2668	u16 name_len;
   2669	char name[];
   2670};
   2671
   2672/*
   2673 * function to determine next variable-sized jfs_dirent in buffer
   2674 */
   2675static inline struct jfs_dirent *next_jfs_dirent(struct jfs_dirent *dirent)
   2676{
   2677	return (struct jfs_dirent *)
   2678		((char *)dirent +
   2679		 ((sizeof (struct jfs_dirent) + dirent->name_len + 1 +
   2680		   sizeof (loff_t) - 1) &
   2681		  ~(sizeof (loff_t) - 1)));
   2682}
   2683
   2684/*
   2685 *	jfs_readdir()
   2686 *
   2687 * function: read directory entries sequentially
   2688 *	from the specified entry offset
   2689 *
   2690 * parameter:
   2691 *
   2692 * return: offset = (pn, index) of start entry
   2693 *	of next jfs_readdir()/dtRead()
   2694 */
   2695int jfs_readdir(struct file *file, struct dir_context *ctx)
   2696{
   2697	struct inode *ip = file_inode(file);
   2698	struct nls_table *codepage = JFS_SBI(ip->i_sb)->nls_tab;
   2699	int rc = 0;
   2700	loff_t dtpos;	/* legacy OS/2 style position */
   2701	struct dtoffset {
   2702		s16 pn;
   2703		s16 index;
   2704		s32 unused;
   2705	} *dtoffset = (struct dtoffset *) &dtpos;
   2706	s64 bn;
   2707	struct metapage *mp;
   2708	dtpage_t *p;
   2709	int index;
   2710	s8 *stbl;
   2711	struct btstack btstack;
   2712	int i, next;
   2713	struct ldtentry *d;
   2714	struct dtslot *t;
   2715	int d_namleft, len, outlen;
   2716	unsigned long dirent_buf;
   2717	char *name_ptr;
   2718	u32 dir_index;
   2719	int do_index = 0;
   2720	uint loop_count = 0;
   2721	struct jfs_dirent *jfs_dirent;
   2722	int jfs_dirents;
   2723	int overflow, fix_page, page_fixed = 0;
   2724	static int unique_pos = 2;	/* If we can't fix broken index */
   2725
   2726	if (ctx->pos == DIREND)
   2727		return 0;
   2728
   2729	if (DO_INDEX(ip)) {
   2730		/*
   2731		 * persistent index is stored in directory entries.
   2732		 * Special cases:	 0 = .
   2733		 *			 1 = ..
   2734		 *			-1 = End of directory
   2735		 */
   2736		do_index = 1;
   2737
   2738		dir_index = (u32) ctx->pos;
   2739
   2740		/*
   2741		 * NFSv4 reserves cookies 1 and 2 for . and .. so the value
   2742		 * we return to the vfs is one greater than the one we use
   2743		 * internally.
   2744		 */
   2745		if (dir_index)
   2746			dir_index--;
   2747
   2748		if (dir_index > 1) {
   2749			struct dir_table_slot dirtab_slot;
   2750
   2751			if (dtEmpty(ip) ||
   2752			    (dir_index >= JFS_IP(ip)->next_index)) {
   2753				/* Stale position.  Directory has shrunk */
   2754				ctx->pos = DIREND;
   2755				return 0;
   2756			}
   2757		      repeat:
   2758			rc = read_index(ip, dir_index, &dirtab_slot);
   2759			if (rc) {
   2760				ctx->pos = DIREND;
   2761				return rc;
   2762			}
   2763			if (dirtab_slot.flag == DIR_INDEX_FREE) {
   2764				if (loop_count++ > JFS_IP(ip)->next_index) {
   2765					jfs_err("jfs_readdir detected infinite loop!");
   2766					ctx->pos = DIREND;
   2767					return 0;
   2768				}
   2769				dir_index = le32_to_cpu(dirtab_slot.addr2);
   2770				if (dir_index == -1) {
   2771					ctx->pos = DIREND;
   2772					return 0;
   2773				}
   2774				goto repeat;
   2775			}
   2776			bn = addressDTS(&dirtab_slot);
   2777			index = dirtab_slot.slot;
   2778			DT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
   2779			if (rc) {
   2780				ctx->pos = DIREND;
   2781				return 0;
   2782			}
   2783			if (p->header.flag & BT_INTERNAL) {
   2784				jfs_err("jfs_readdir: bad index table");
   2785				DT_PUTPAGE(mp);
   2786				ctx->pos = DIREND;
   2787				return 0;
   2788			}
   2789		} else {
   2790			if (dir_index == 0) {
   2791				/*
   2792				 * self "."
   2793				 */
   2794				ctx->pos = 1;
   2795				if (!dir_emit(ctx, ".", 1, ip->i_ino, DT_DIR))
   2796					return 0;
   2797			}
   2798			/*
   2799			 * parent ".."
   2800			 */
   2801			ctx->pos = 2;
   2802			if (!dir_emit(ctx, "..", 2, PARENT(ip), DT_DIR))
   2803				return 0;
   2804
   2805			/*
   2806			 * Find first entry of left-most leaf
   2807			 */
   2808			if (dtEmpty(ip)) {
   2809				ctx->pos = DIREND;
   2810				return 0;
   2811			}
   2812
   2813			if ((rc = dtReadFirst(ip, &btstack)))
   2814				return rc;
   2815
   2816			DT_GETSEARCH(ip, btstack.top, bn, mp, p, index);
   2817		}
   2818	} else {
   2819		/*
   2820		 * Legacy filesystem - OS/2 & Linux JFS < 0.3.6
   2821		 *
   2822		 * pn = 0; index = 1:	First entry "."
   2823		 * pn = 0; index = 2:	Second entry ".."
   2824		 * pn > 0:		Real entries, pn=1 -> leftmost page
   2825		 * pn = index = -1:	No more entries
   2826		 */
   2827		dtpos = ctx->pos;
   2828		if (dtpos < 2) {
   2829			/* build "." entry */
   2830			ctx->pos = 1;
   2831			if (!dir_emit(ctx, ".", 1, ip->i_ino, DT_DIR))
   2832				return 0;
   2833			dtoffset->index = 2;
   2834			ctx->pos = dtpos;
   2835		}
   2836
   2837		if (dtoffset->pn == 0) {
   2838			if (dtoffset->index == 2) {
   2839				/* build ".." entry */
   2840				if (!dir_emit(ctx, "..", 2, PARENT(ip), DT_DIR))
   2841					return 0;
   2842			} else {
   2843				jfs_err("jfs_readdir called with invalid offset!");
   2844			}
   2845			dtoffset->pn = 1;
   2846			dtoffset->index = 0;
   2847			ctx->pos = dtpos;
   2848		}
   2849
   2850		if (dtEmpty(ip)) {
   2851			ctx->pos = DIREND;
   2852			return 0;
   2853		}
   2854
   2855		if ((rc = dtReadNext(ip, &ctx->pos, &btstack))) {
   2856			jfs_err("jfs_readdir: unexpected rc = %d from dtReadNext",
   2857				rc);
   2858			ctx->pos = DIREND;
   2859			return 0;
   2860		}
   2861		/* get start leaf page and index */
   2862		DT_GETSEARCH(ip, btstack.top, bn, mp, p, index);
   2863
   2864		/* offset beyond directory eof ? */
   2865		if (bn < 0) {
   2866			ctx->pos = DIREND;
   2867			return 0;
   2868		}
   2869	}
   2870
   2871	dirent_buf = __get_free_page(GFP_KERNEL);
   2872	if (dirent_buf == 0) {
   2873		DT_PUTPAGE(mp);
   2874		jfs_warn("jfs_readdir: __get_free_page failed!");
   2875		ctx->pos = DIREND;
   2876		return -ENOMEM;
   2877	}
   2878
   2879	while (1) {
   2880		jfs_dirent = (struct jfs_dirent *) dirent_buf;
   2881		jfs_dirents = 0;
   2882		overflow = fix_page = 0;
   2883
   2884		stbl = DT_GETSTBL(p);
   2885
   2886		for (i = index; i < p->header.nextindex; i++) {
   2887			d = (struct ldtentry *) & p->slot[stbl[i]];
   2888
   2889			if (((long) jfs_dirent + d->namlen + 1) >
   2890			    (dirent_buf + PAGE_SIZE)) {
   2891				/* DBCS codepages could overrun dirent_buf */
   2892				index = i;
   2893				overflow = 1;
   2894				break;
   2895			}
   2896
   2897			d_namleft = d->namlen;
   2898			name_ptr = jfs_dirent->name;
   2899			jfs_dirent->ino = le32_to_cpu(d->inumber);
   2900
   2901			if (do_index) {
   2902				len = min(d_namleft, DTLHDRDATALEN);
   2903				jfs_dirent->position = le32_to_cpu(d->index);
   2904				/*
   2905				 * d->index should always be valid, but it
   2906				 * isn't.  fsck.jfs doesn't create the
   2907				 * directory index for the lost+found
   2908				 * directory.  Rather than let it go,
   2909				 * we can try to fix it.
   2910				 */
   2911				if ((jfs_dirent->position < 2) ||
   2912				    (jfs_dirent->position >=
   2913				     JFS_IP(ip)->next_index)) {
   2914					if (!page_fixed && !isReadOnly(ip)) {
   2915						fix_page = 1;
   2916						/*
   2917						 * setting overflow and setting
   2918						 * index to i will cause the
   2919						 * same page to be processed
   2920						 * again starting here
   2921						 */
   2922						overflow = 1;
   2923						index = i;
   2924						break;
   2925					}
   2926					jfs_dirent->position = unique_pos++;
   2927				}
   2928				/*
   2929				 * We add 1 to the index because we may
   2930				 * use a value of 2 internally, and NFSv4
   2931				 * doesn't like that.
   2932				 */
   2933				jfs_dirent->position++;
   2934			} else {
   2935				jfs_dirent->position = dtpos;
   2936				len = min(d_namleft, DTLHDRDATALEN_LEGACY);
   2937			}
   2938
   2939			/* copy the name of head/only segment */
   2940			outlen = jfs_strfromUCS_le(name_ptr, d->name, len,
   2941						   codepage);
   2942			jfs_dirent->name_len = outlen;
   2943
   2944			/* copy name in the additional segment(s) */
   2945			next = d->next;
   2946			while (next >= 0) {
   2947				t = (struct dtslot *) & p->slot[next];
   2948				name_ptr += outlen;
   2949				d_namleft -= len;
   2950				/* Sanity Check */
   2951				if (d_namleft == 0) {
   2952					jfs_error(ip->i_sb,
   2953						  "JFS:Dtree error: ino = %ld, bn=%lld, index = %d\n",
   2954						  (long)ip->i_ino,
   2955						  (long long)bn,
   2956						  i);
   2957					goto skip_one;
   2958				}
   2959				len = min(d_namleft, DTSLOTDATALEN);
   2960				outlen = jfs_strfromUCS_le(name_ptr, t->name,
   2961							   len, codepage);
   2962				jfs_dirent->name_len += outlen;
   2963
   2964				next = t->next;
   2965			}
   2966
   2967			jfs_dirents++;
   2968			jfs_dirent = next_jfs_dirent(jfs_dirent);
   2969skip_one:
   2970			if (!do_index)
   2971				dtoffset->index++;
   2972		}
   2973
   2974		if (!overflow) {
   2975			/* Point to next leaf page */
   2976			if (p->header.flag & BT_ROOT)
   2977				bn = 0;
   2978			else {
   2979				bn = le64_to_cpu(p->header.next);
   2980				index = 0;
   2981				/* update offset (pn:index) for new page */
   2982				if (!do_index) {
   2983					dtoffset->pn++;
   2984					dtoffset->index = 0;
   2985				}
   2986			}
   2987			page_fixed = 0;
   2988		}
   2989
   2990		/* unpin previous leaf page */
   2991		DT_PUTPAGE(mp);
   2992
   2993		jfs_dirent = (struct jfs_dirent *) dirent_buf;
   2994		while (jfs_dirents--) {
   2995			ctx->pos = jfs_dirent->position;
   2996			if (!dir_emit(ctx, jfs_dirent->name,
   2997				    jfs_dirent->name_len,
   2998				    jfs_dirent->ino, DT_UNKNOWN))
   2999				goto out;
   3000			jfs_dirent = next_jfs_dirent(jfs_dirent);
   3001		}
   3002
   3003		if (fix_page) {
   3004			add_missing_indices(ip, bn);
   3005			page_fixed = 1;
   3006		}
   3007
   3008		if (!overflow && (bn == 0)) {
   3009			ctx->pos = DIREND;
   3010			break;
   3011		}
   3012
   3013		DT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
   3014		if (rc) {
   3015			free_page(dirent_buf);
   3016			return rc;
   3017		}
   3018	}
   3019
   3020      out:
   3021	free_page(dirent_buf);
   3022
   3023	return rc;
   3024}
   3025
   3026
   3027/*
   3028 *	dtReadFirst()
   3029 *
   3030 * function: get the leftmost page of the directory
   3031 */
   3032static int dtReadFirst(struct inode *ip, struct btstack * btstack)
   3033{
   3034	int rc = 0;
   3035	s64 bn;
   3036	int psize = 288;	/* initial in-line directory */
   3037	struct metapage *mp;
   3038	dtpage_t *p;
   3039	s8 *stbl;
   3040	struct btframe *btsp;
   3041	pxd_t *xd;
   3042
   3043	BT_CLR(btstack);	/* reset stack */
   3044
   3045	/*
   3046	 *	descend leftmost path of the tree
   3047	 *
   3048	 * by convention, root bn = 0.
   3049	 */
   3050	for (bn = 0;;) {
   3051		DT_GETPAGE(ip, bn, mp, psize, p, rc);
   3052		if (rc)
   3053			return rc;
   3054
   3055		/*
   3056		 * leftmost leaf page
   3057		 */
   3058		if (p->header.flag & BT_LEAF) {
   3059			/* return leftmost entry */
   3060			btsp = btstack->top;
   3061			btsp->bn = bn;
   3062			btsp->index = 0;
   3063			btsp->mp = mp;
   3064
   3065			return 0;
   3066		}
   3067
   3068		/*
   3069		 * descend down to leftmost child page
   3070		 */
   3071		if (BT_STACK_FULL(btstack)) {
   3072			DT_PUTPAGE(mp);
   3073			jfs_error(ip->i_sb, "btstack overrun\n");
   3074			BT_STACK_DUMP(btstack);
   3075			return -EIO;
   3076		}
   3077		/* push (bn, index) of the parent page/entry */
   3078		BT_PUSH(btstack, bn, 0);
   3079
   3080		/* get the leftmost entry */
   3081		stbl = DT_GETSTBL(p);
   3082		xd = (pxd_t *) & p->slot[stbl[0]];
   3083
   3084		/* get the child page block address */
   3085		bn = addressPXD(xd);
   3086		psize = lengthPXD(xd) << JFS_SBI(ip->i_sb)->l2bsize;
   3087
   3088		/* unpin the parent page */
   3089		DT_PUTPAGE(mp);
   3090	}
   3091}
   3092
   3093
   3094/*
   3095 *	dtReadNext()
   3096 *
   3097 * function: get the page of the specified offset (pn:index)
   3098 *
   3099 * return: if (offset > eof), bn = -1;
   3100 *
   3101 * note: if index > nextindex of the target leaf page,
   3102 * start with 1st entry of next leaf page;
   3103 */
   3104static int dtReadNext(struct inode *ip, loff_t * offset,
   3105		      struct btstack * btstack)
   3106{
   3107	int rc = 0;
   3108	struct dtoffset {
   3109		s16 pn;
   3110		s16 index;
   3111		s32 unused;
   3112	} *dtoffset = (struct dtoffset *) offset;
   3113	s64 bn;
   3114	struct metapage *mp;
   3115	dtpage_t *p;
   3116	int index;
   3117	int pn;
   3118	s8 *stbl;
   3119	struct btframe *btsp, *parent;
   3120	pxd_t *xd;
   3121
   3122	/*
   3123	 * get leftmost leaf page pinned
   3124	 */
   3125	if ((rc = dtReadFirst(ip, btstack)))
   3126		return rc;
   3127
   3128	/* get leaf page */
   3129	DT_GETSEARCH(ip, btstack->top, bn, mp, p, index);
   3130
   3131	/* get the start offset (pn:index) */
   3132	pn = dtoffset->pn - 1;	/* Now pn = 0 represents leftmost leaf */
   3133	index = dtoffset->index;
   3134
   3135	/* start at leftmost page ? */
   3136	if (pn == 0) {
   3137		/* offset beyond eof ? */
   3138		if (index < p->header.nextindex)
   3139			goto out;
   3140
   3141		if (p->header.flag & BT_ROOT) {
   3142			bn = -1;
   3143			goto out;
   3144		}
   3145
   3146		/* start with 1st entry of next leaf page */
   3147		dtoffset->pn++;
   3148		dtoffset->index = index = 0;
   3149		goto a;
   3150	}
   3151
   3152	/* start at non-leftmost page: scan parent pages for large pn */
   3153	if (p->header.flag & BT_ROOT) {
   3154		bn = -1;
   3155		goto out;
   3156	}
   3157
   3158	/* start after next leaf page ? */
   3159	if (pn > 1)
   3160		goto b;
   3161
   3162	/* get leaf page pn = 1 */
   3163      a:
   3164	bn = le64_to_cpu(p->header.next);
   3165
   3166	/* unpin leaf page */
   3167	DT_PUTPAGE(mp);
   3168
   3169	/* offset beyond eof ? */
   3170	if (bn == 0) {
   3171		bn = -1;
   3172		goto out;
   3173	}
   3174
   3175	goto c;
   3176
   3177	/*
   3178	 * scan last internal page level to get target leaf page
   3179	 */
   3180      b:
   3181	/* unpin leftmost leaf page */
   3182	DT_PUTPAGE(mp);
   3183
   3184	/* get left most parent page */
   3185	btsp = btstack->top;
   3186	parent = btsp - 1;
   3187	bn = parent->bn;
   3188	DT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
   3189	if (rc)
   3190		return rc;
   3191
   3192	/* scan parent pages at last internal page level */
   3193	while (pn >= p->header.nextindex) {
   3194		pn -= p->header.nextindex;
   3195
   3196		/* get next parent page address */
   3197		bn = le64_to_cpu(p->header.next);
   3198
   3199		/* unpin current parent page */
   3200		DT_PUTPAGE(mp);
   3201
   3202		/* offset beyond eof ? */
   3203		if (bn == 0) {
   3204			bn = -1;
   3205			goto out;
   3206		}
   3207
   3208		/* get next parent page */
   3209		DT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
   3210		if (rc)
   3211			return rc;
   3212
   3213		/* update parent page stack frame */
   3214		parent->bn = bn;
   3215	}
   3216
   3217	/* get leaf page address */
   3218	stbl = DT_GETSTBL(p);
   3219	xd = (pxd_t *) & p->slot[stbl[pn]];
   3220	bn = addressPXD(xd);
   3221
   3222	/* unpin parent page */
   3223	DT_PUTPAGE(mp);
   3224
   3225	/*
   3226	 * get target leaf page
   3227	 */
   3228      c:
   3229	DT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
   3230	if (rc)
   3231		return rc;
   3232
   3233	/*
   3234	 * leaf page has been completed:
   3235	 * start with 1st entry of next leaf page
   3236	 */
   3237	if (index >= p->header.nextindex) {
   3238		bn = le64_to_cpu(p->header.next);
   3239
   3240		/* unpin leaf page */
   3241		DT_PUTPAGE(mp);
   3242
   3243		/* offset beyond eof ? */
   3244		if (bn == 0) {
   3245			bn = -1;
   3246			goto out;
   3247		}
   3248
   3249		/* get next leaf page */
   3250		DT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
   3251		if (rc)
   3252			return rc;
   3253
   3254		/* start with 1st entry of next leaf page */
   3255		dtoffset->pn++;
   3256		dtoffset->index = 0;
   3257	}
   3258
   3259      out:
   3260	/* return target leaf page pinned */
   3261	btsp = btstack->top;
   3262	btsp->bn = bn;
   3263	btsp->index = dtoffset->index;
   3264	btsp->mp = mp;
   3265
   3266	return 0;
   3267}
   3268
   3269
   3270/*
   3271 *	dtCompare()
   3272 *
   3273 * function: compare search key with an internal entry
   3274 *
   3275 * return:
   3276 *	< 0 if k is < record
   3277 *	= 0 if k is = record
   3278 *	> 0 if k is > record
   3279 */
   3280static int dtCompare(struct component_name * key,	/* search key */
   3281		     dtpage_t * p,	/* directory page */
   3282		     int si)
   3283{				/* entry slot index */
   3284	wchar_t *kname;
   3285	__le16 *name;
   3286	int klen, namlen, len, rc;
   3287	struct idtentry *ih;
   3288	struct dtslot *t;
   3289
   3290	/*
   3291	 * force the left-most key on internal pages, at any level of
   3292	 * the tree, to be less than any search key.
   3293	 * this obviates having to update the leftmost key on an internal
   3294	 * page when the user inserts a new key in the tree smaller than
   3295	 * anything that has been stored.
   3296	 *
   3297	 * (? if/when dtSearch() narrows down to 1st entry (index = 0),
   3298	 * at any internal page at any level of the tree,
   3299	 * it descends to child of the entry anyway -
   3300	 * ? make the entry as min size dummy entry)
   3301	 *
   3302	 * if (e->index == 0 && h->prevpg == P_INVALID && !(h->flags & BT_LEAF))
   3303	 * return (1);
   3304	 */
   3305
   3306	kname = key->name;
   3307	klen = key->namlen;
   3308
   3309	ih = (struct idtentry *) & p->slot[si];
   3310	si = ih->next;
   3311	name = ih->name;
   3312	namlen = ih->namlen;
   3313	len = min(namlen, DTIHDRDATALEN);
   3314
   3315	/* compare with head/only segment */
   3316	len = min(klen, len);
   3317	if ((rc = UniStrncmp_le(kname, name, len)))
   3318		return rc;
   3319
   3320	klen -= len;
   3321	namlen -= len;
   3322
   3323	/* compare with additional segment(s) */
   3324	kname += len;
   3325	while (klen > 0 && namlen > 0) {
   3326		/* compare with next name segment */
   3327		t = (struct dtslot *) & p->slot[si];
   3328		len = min(namlen, DTSLOTDATALEN);
   3329		len = min(klen, len);
   3330		name = t->name;
   3331		if ((rc = UniStrncmp_le(kname, name, len)))
   3332			return rc;
   3333
   3334		klen -= len;
   3335		namlen -= len;
   3336		kname += len;
   3337		si = t->next;
   3338	}
   3339
   3340	return (klen - namlen);
   3341}
   3342
   3343
   3344
   3345
   3346/*
   3347 *	ciCompare()
   3348 *
   3349 * function: compare search key with an (leaf/internal) entry
   3350 *
   3351 * return:
   3352 *	< 0 if k is < record
   3353 *	= 0 if k is = record
   3354 *	> 0 if k is > record
   3355 */
   3356static int ciCompare(struct component_name * key,	/* search key */
   3357		     dtpage_t * p,	/* directory page */
   3358		     int si,	/* entry slot index */
   3359		     int flag)
   3360{
   3361	wchar_t *kname, x;
   3362	__le16 *name;
   3363	int klen, namlen, len, rc;
   3364	struct ldtentry *lh;
   3365	struct idtentry *ih;
   3366	struct dtslot *t;
   3367	int i;
   3368
   3369	/*
   3370	 * force the left-most key on internal pages, at any level of
   3371	 * the tree, to be less than any search key.
   3372	 * this obviates having to update the leftmost key on an internal
   3373	 * page when the user inserts a new key in the tree smaller than
   3374	 * anything that has been stored.
   3375	 *
   3376	 * (? if/when dtSearch() narrows down to 1st entry (index = 0),
   3377	 * at any internal page at any level of the tree,
   3378	 * it descends to child of the entry anyway -
   3379	 * ? make the entry as min size dummy entry)
   3380	 *
   3381	 * if (e->index == 0 && h->prevpg == P_INVALID && !(h->flags & BT_LEAF))
   3382	 * return (1);
   3383	 */
   3384
   3385	kname = key->name;
   3386	klen = key->namlen;
   3387
   3388	/*
   3389	 * leaf page entry
   3390	 */
   3391	if (p->header.flag & BT_LEAF) {
   3392		lh = (struct ldtentry *) & p->slot[si];
   3393		si = lh->next;
   3394		name = lh->name;
   3395		namlen = lh->namlen;
   3396		if (flag & JFS_DIR_INDEX)
   3397			len = min(namlen, DTLHDRDATALEN);
   3398		else
   3399			len = min(namlen, DTLHDRDATALEN_LEGACY);
   3400	}
   3401	/*
   3402	 * internal page entry
   3403	 */
   3404	else {
   3405		ih = (struct idtentry *) & p->slot[si];
   3406		si = ih->next;
   3407		name = ih->name;
   3408		namlen = ih->namlen;
   3409		len = min(namlen, DTIHDRDATALEN);
   3410	}
   3411
   3412	/* compare with head/only segment */
   3413	len = min(klen, len);
   3414	for (i = 0; i < len; i++, kname++, name++) {
   3415		/* only uppercase if case-insensitive support is on */
   3416		if ((flag & JFS_OS2) == JFS_OS2)
   3417			x = UniToupper(le16_to_cpu(*name));
   3418		else
   3419			x = le16_to_cpu(*name);
   3420		if ((rc = *kname - x))
   3421			return rc;
   3422	}
   3423
   3424	klen -= len;
   3425	namlen -= len;
   3426
   3427	/* compare with additional segment(s) */
   3428	while (klen > 0 && namlen > 0) {
   3429		/* compare with next name segment */
   3430		t = (struct dtslot *) & p->slot[si];
   3431		len = min(namlen, DTSLOTDATALEN);
   3432		len = min(klen, len);
   3433		name = t->name;
   3434		for (i = 0; i < len; i++, kname++, name++) {
   3435			/* only uppercase if case-insensitive support is on */
   3436			if ((flag & JFS_OS2) == JFS_OS2)
   3437				x = UniToupper(le16_to_cpu(*name));
   3438			else
   3439				x = le16_to_cpu(*name);
   3440
   3441			if ((rc = *kname - x))
   3442				return rc;
   3443		}
   3444
   3445		klen -= len;
   3446		namlen -= len;
   3447		si = t->next;
   3448	}
   3449
   3450	return (klen - namlen);
   3451}
   3452
   3453
   3454/*
   3455 *	ciGetLeafPrefixKey()
   3456 *
   3457 * function: compute prefix of suffix compression
   3458 *	     from two adjacent leaf entries
   3459 *	     across page boundary
   3460 *
   3461 * return: non-zero on error
   3462 *
   3463 */
   3464static int ciGetLeafPrefixKey(dtpage_t * lp, int li, dtpage_t * rp,
   3465			       int ri, struct component_name * key, int flag)
   3466{
   3467	int klen, namlen;
   3468	wchar_t *pl, *pr, *kname;
   3469	struct component_name lkey;
   3470	struct component_name rkey;
   3471
   3472	lkey.name = kmalloc_array(JFS_NAME_MAX + 1, sizeof(wchar_t),
   3473					GFP_KERNEL);
   3474	if (lkey.name == NULL)
   3475		return -ENOMEM;
   3476
   3477	rkey.name = kmalloc_array(JFS_NAME_MAX + 1, sizeof(wchar_t),
   3478					GFP_KERNEL);
   3479	if (rkey.name == NULL) {
   3480		kfree(lkey.name);
   3481		return -ENOMEM;
   3482	}
   3483
   3484	/* get left and right key */
   3485	dtGetKey(lp, li, &lkey, flag);
   3486	lkey.name[lkey.namlen] = 0;
   3487
   3488	if ((flag & JFS_OS2) == JFS_OS2)
   3489		ciToUpper(&lkey);
   3490
   3491	dtGetKey(rp, ri, &rkey, flag);
   3492	rkey.name[rkey.namlen] = 0;
   3493
   3494
   3495	if ((flag & JFS_OS2) == JFS_OS2)
   3496		ciToUpper(&rkey);
   3497
   3498	/* compute prefix */
   3499	klen = 0;
   3500	kname = key->name;
   3501	namlen = min(lkey.namlen, rkey.namlen);
   3502	for (pl = lkey.name, pr = rkey.name;
   3503	     namlen; pl++, pr++, namlen--, klen++, kname++) {
   3504		*kname = *pr;
   3505		if (*pl != *pr) {
   3506			key->namlen = klen + 1;
   3507			goto free_names;
   3508		}
   3509	}
   3510
   3511	/* l->namlen <= r->namlen since l <= r */
   3512	if (lkey.namlen < rkey.namlen) {
   3513		*kname = *pr;
   3514		key->namlen = klen + 1;
   3515	} else			/* l->namelen == r->namelen */
   3516		key->namlen = klen;
   3517
   3518free_names:
   3519	kfree(lkey.name);
   3520	kfree(rkey.name);
   3521	return 0;
   3522}
   3523
   3524
   3525
   3526/*
   3527 *	dtGetKey()
   3528 *
   3529 * function: get key of the entry
   3530 */
   3531static void dtGetKey(dtpage_t * p, int i,	/* entry index */
   3532		     struct component_name * key, int flag)
   3533{
   3534	int si;
   3535	s8 *stbl;
   3536	struct ldtentry *lh;
   3537	struct idtentry *ih;
   3538	struct dtslot *t;
   3539	int namlen, len;
   3540	wchar_t *kname;
   3541	__le16 *name;
   3542
   3543	/* get entry */
   3544	stbl = DT_GETSTBL(p);
   3545	si = stbl[i];
   3546	if (p->header.flag & BT_LEAF) {
   3547		lh = (struct ldtentry *) & p->slot[si];
   3548		si = lh->next;
   3549		namlen = lh->namlen;
   3550		name = lh->name;
   3551		if (flag & JFS_DIR_INDEX)
   3552			len = min(namlen, DTLHDRDATALEN);
   3553		else
   3554			len = min(namlen, DTLHDRDATALEN_LEGACY);
   3555	} else {
   3556		ih = (struct idtentry *) & p->slot[si];
   3557		si = ih->next;
   3558		namlen = ih->namlen;
   3559		name = ih->name;
   3560		len = min(namlen, DTIHDRDATALEN);
   3561	}
   3562
   3563	key->namlen = namlen;
   3564	kname = key->name;
   3565
   3566	/*
   3567	 * move head/only segment
   3568	 */
   3569	UniStrncpy_from_le(kname, name, len);
   3570
   3571	/*
   3572	 * move additional segment(s)
   3573	 */
   3574	while (si >= 0) {
   3575		/* get next segment */
   3576		t = &p->slot[si];
   3577		kname += len;
   3578		namlen -= len;
   3579		len = min(namlen, DTSLOTDATALEN);
   3580		UniStrncpy_from_le(kname, t->name, len);
   3581
   3582		si = t->next;
   3583	}
   3584}
   3585
   3586
   3587/*
   3588 *	dtInsertEntry()
   3589 *
   3590 * function: allocate free slot(s) and
   3591 *	     write a leaf/internal entry
   3592 *
   3593 * return: entry slot index
   3594 */
   3595static void dtInsertEntry(dtpage_t * p, int index, struct component_name * key,
   3596			  ddata_t * data, struct dt_lock ** dtlock)
   3597{
   3598	struct dtslot *h, *t;
   3599	struct ldtentry *lh = NULL;
   3600	struct idtentry *ih = NULL;
   3601	int hsi, fsi, klen, len, nextindex;
   3602	wchar_t *kname;
   3603	__le16 *name;
   3604	s8 *stbl;
   3605	pxd_t *xd;
   3606	struct dt_lock *dtlck = *dtlock;
   3607	struct lv *lv;
   3608	int xsi, n;
   3609	s64 bn = 0;
   3610	struct metapage *mp = NULL;
   3611
   3612	klen = key->namlen;
   3613	kname = key->name;
   3614
   3615	/* allocate a free slot */
   3616	hsi = fsi = p->header.freelist;
   3617	h = &p->slot[fsi];
   3618	p->header.freelist = h->next;
   3619	--p->header.freecnt;
   3620
   3621	/* open new linelock */
   3622	if (dtlck->index >= dtlck->maxcnt)
   3623		dtlck = (struct dt_lock *) txLinelock(dtlck);
   3624
   3625	lv = & dtlck->lv[dtlck->index];
   3626	lv->offset = hsi;
   3627
   3628	/* write head/only segment */
   3629	if (p->header.flag & BT_LEAF) {
   3630		lh = (struct ldtentry *) h;
   3631		lh->next = h->next;
   3632		lh->inumber = cpu_to_le32(data->leaf.ino);
   3633		lh->namlen = klen;
   3634		name = lh->name;
   3635		if (data->leaf.ip) {
   3636			len = min(klen, DTLHDRDATALEN);
   3637			if (!(p->header.flag & BT_ROOT))
   3638				bn = addressPXD(&p->header.self);
   3639			lh->index = cpu_to_le32(add_index(data->leaf.tid,
   3640							  data->leaf.ip,
   3641							  bn, index));
   3642		} else
   3643			len = min(klen, DTLHDRDATALEN_LEGACY);
   3644	} else {
   3645		ih = (struct idtentry *) h;
   3646		ih->next = h->next;
   3647		xd = (pxd_t *) ih;
   3648		*xd = data->xd;
   3649		ih->namlen = klen;
   3650		name = ih->name;
   3651		len = min(klen, DTIHDRDATALEN);
   3652	}
   3653
   3654	UniStrncpy_to_le(name, kname, len);
   3655
   3656	n = 1;
   3657	xsi = hsi;
   3658
   3659	/* write additional segment(s) */
   3660	t = h;
   3661	klen -= len;
   3662	while (klen) {
   3663		/* get free slot */
   3664		fsi = p->header.freelist;
   3665		t = &p->slot[fsi];
   3666		p->header.freelist = t->next;
   3667		--p->header.freecnt;
   3668
   3669		/* is next slot contiguous ? */
   3670		if (fsi != xsi + 1) {
   3671			/* close current linelock */
   3672			lv->length = n;
   3673			dtlck->index++;
   3674
   3675			/* open new linelock */
   3676			if (dtlck->index < dtlck->maxcnt)
   3677				lv++;
   3678			else {
   3679				dtlck = (struct dt_lock *) txLinelock(dtlck);
   3680				lv = & dtlck->lv[0];
   3681			}
   3682
   3683			lv->offset = fsi;
   3684			n = 0;
   3685		}
   3686
   3687		kname += len;
   3688		len = min(klen, DTSLOTDATALEN);
   3689		UniStrncpy_to_le(t->name, kname, len);
   3690
   3691		n++;
   3692		xsi = fsi;
   3693		klen -= len;
   3694	}
   3695
   3696	/* close current linelock */
   3697	lv->length = n;
   3698	dtlck->index++;
   3699
   3700	*dtlock = dtlck;
   3701
   3702	/* terminate last/only segment */
   3703	if (h == t) {
   3704		/* single segment entry */
   3705		if (p->header.flag & BT_LEAF)
   3706			lh->next = -1;
   3707		else
   3708			ih->next = -1;
   3709	} else
   3710		/* multi-segment entry */
   3711		t->next = -1;
   3712
   3713	/* if insert into middle, shift right succeeding entries in stbl */
   3714	stbl = DT_GETSTBL(p);
   3715	nextindex = p->header.nextindex;
   3716	if (index < nextindex) {
   3717		memmove(stbl + index + 1, stbl + index, nextindex - index);
   3718
   3719		if ((p->header.flag & BT_LEAF) && data->leaf.ip) {
   3720			s64 lblock;
   3721
   3722			/*
   3723			 * Need to update slot number for entries that moved
   3724			 * in the stbl
   3725			 */
   3726			mp = NULL;
   3727			for (n = index + 1; n <= nextindex; n++) {
   3728				lh = (struct ldtentry *) & (p->slot[stbl[n]]);
   3729				modify_index(data->leaf.tid, data->leaf.ip,
   3730					     le32_to_cpu(lh->index), bn, n,
   3731					     &mp, &lblock);
   3732			}
   3733			if (mp)
   3734				release_metapage(mp);
   3735		}
   3736	}
   3737
   3738	stbl[index] = hsi;
   3739
   3740	/* advance next available entry index of stbl */
   3741	++p->header.nextindex;
   3742}
   3743
   3744
   3745/*
   3746 *	dtMoveEntry()
   3747 *
   3748 * function: move entries from split/left page to new/right page
   3749 *
   3750 *	nextindex of dst page and freelist/freecnt of both pages
   3751 *	are updated.
   3752 */
   3753static void dtMoveEntry(dtpage_t * sp, int si, dtpage_t * dp,
   3754			struct dt_lock ** sdtlock, struct dt_lock ** ddtlock,
   3755			int do_index)
   3756{
   3757	int ssi, next;		/* src slot index */
   3758	int di;			/* dst entry index */
   3759	int dsi;		/* dst slot index */
   3760	s8 *sstbl, *dstbl;	/* sorted entry table */
   3761	int snamlen, len;
   3762	struct ldtentry *slh, *dlh = NULL;
   3763	struct idtentry *sih, *dih = NULL;
   3764	struct dtslot *h, *s, *d;
   3765	struct dt_lock *sdtlck = *sdtlock, *ddtlck = *ddtlock;
   3766	struct lv *slv, *dlv;
   3767	int xssi, ns, nd;
   3768	int sfsi;
   3769
   3770	sstbl = (s8 *) & sp->slot[sp->header.stblindex];
   3771	dstbl = (s8 *) & dp->slot[dp->header.stblindex];
   3772
   3773	dsi = dp->header.freelist;	/* first (whole page) free slot */
   3774	sfsi = sp->header.freelist;
   3775
   3776	/* linelock destination entry slot */
   3777	dlv = & ddtlck->lv[ddtlck->index];
   3778	dlv->offset = dsi;
   3779
   3780	/* linelock source entry slot */
   3781	slv = & sdtlck->lv[sdtlck->index];
   3782	slv->offset = sstbl[si];
   3783	xssi = slv->offset - 1;
   3784
   3785	/*
   3786	 * move entries
   3787	 */
   3788	ns = nd = 0;
   3789	for (di = 0; si < sp->header.nextindex; si++, di++) {
   3790		ssi = sstbl[si];
   3791		dstbl[di] = dsi;
   3792
   3793		/* is next slot contiguous ? */
   3794		if (ssi != xssi + 1) {
   3795			/* close current linelock */
   3796			slv->length = ns;
   3797			sdtlck->index++;
   3798
   3799			/* open new linelock */
   3800			if (sdtlck->index < sdtlck->maxcnt)
   3801				slv++;
   3802			else {
   3803				sdtlck = (struct dt_lock *) txLinelock(sdtlck);
   3804				slv = & sdtlck->lv[0];
   3805			}
   3806
   3807			slv->offset = ssi;
   3808			ns = 0;
   3809		}
   3810
   3811		/*
   3812		 * move head/only segment of an entry
   3813		 */
   3814		/* get dst slot */
   3815		h = d = &dp->slot[dsi];
   3816
   3817		/* get src slot and move */
   3818		s = &sp->slot[ssi];
   3819		if (sp->header.flag & BT_LEAF) {
   3820			/* get source entry */
   3821			slh = (struct ldtentry *) s;
   3822			dlh = (struct ldtentry *) h;
   3823			snamlen = slh->namlen;
   3824
   3825			if (do_index) {
   3826				len = min(snamlen, DTLHDRDATALEN);
   3827				dlh->index = slh->index; /* little-endian */
   3828			} else
   3829				len = min(snamlen, DTLHDRDATALEN_LEGACY);
   3830
   3831			memcpy(dlh, slh, 6 + len * 2);
   3832
   3833			next = slh->next;
   3834
   3835			/* update dst head/only segment next field */
   3836			dsi++;
   3837			dlh->next = dsi;
   3838		} else {
   3839			sih = (struct idtentry *) s;
   3840			snamlen = sih->namlen;
   3841
   3842			len = min(snamlen, DTIHDRDATALEN);
   3843			dih = (struct idtentry *) h;
   3844			memcpy(dih, sih, 10 + len * 2);
   3845			next = sih->next;
   3846
   3847			dsi++;
   3848			dih->next = dsi;
   3849		}
   3850
   3851		/* free src head/only segment */
   3852		s->next = sfsi;
   3853		s->cnt = 1;
   3854		sfsi = ssi;
   3855
   3856		ns++;
   3857		nd++;
   3858		xssi = ssi;
   3859
   3860		/*
   3861		 * move additional segment(s) of the entry
   3862		 */
   3863		snamlen -= len;
   3864		while ((ssi = next) >= 0) {
   3865			/* is next slot contiguous ? */
   3866			if (ssi != xssi + 1) {
   3867				/* close current linelock */
   3868				slv->length = ns;
   3869				sdtlck->index++;
   3870
   3871				/* open new linelock */
   3872				if (sdtlck->index < sdtlck->maxcnt)
   3873					slv++;
   3874				else {
   3875					sdtlck =
   3876					    (struct dt_lock *)
   3877					    txLinelock(sdtlck);
   3878					slv = & sdtlck->lv[0];
   3879				}
   3880
   3881				slv->offset = ssi;
   3882				ns = 0;
   3883			}
   3884
   3885			/* get next source segment */
   3886			s = &sp->slot[ssi];
   3887
   3888			/* get next destination free slot */
   3889			d++;
   3890
   3891			len = min(snamlen, DTSLOTDATALEN);
   3892			UniStrncpy_le(d->name, s->name, len);
   3893
   3894			ns++;
   3895			nd++;
   3896			xssi = ssi;
   3897
   3898			dsi++;
   3899			d->next = dsi;
   3900
   3901			/* free source segment */
   3902			next = s->next;
   3903			s->next = sfsi;
   3904			s->cnt = 1;
   3905			sfsi = ssi;
   3906
   3907			snamlen -= len;
   3908		}		/* end while */
   3909
   3910		/* terminate dst last/only segment */
   3911		if (h == d) {
   3912			/* single segment entry */
   3913			if (dp->header.flag & BT_LEAF)
   3914				dlh->next = -1;
   3915			else
   3916				dih->next = -1;
   3917		} else
   3918			/* multi-segment entry */
   3919			d->next = -1;
   3920	}			/* end for */
   3921
   3922	/* close current linelock */
   3923	slv->length = ns;
   3924	sdtlck->index++;
   3925	*sdtlock = sdtlck;
   3926
   3927	dlv->length = nd;
   3928	ddtlck->index++;
   3929	*ddtlock = ddtlck;
   3930
   3931	/* update source header */
   3932	sp->header.freelist = sfsi;
   3933	sp->header.freecnt += nd;
   3934
   3935	/* update destination header */
   3936	dp->header.nextindex = di;
   3937
   3938	dp->header.freelist = dsi;
   3939	dp->header.freecnt -= nd;
   3940}
   3941
   3942
   3943/*
   3944 *	dtDeleteEntry()
   3945 *
   3946 * function: free a (leaf/internal) entry
   3947 *
   3948 * log freelist header, stbl, and each segment slot of entry
   3949 * (even though last/only segment next field is modified,
   3950 * physical image logging requires all segment slots of
   3951 * the entry logged to avoid applying previous updates
   3952 * to the same slots)
   3953 */
   3954static void dtDeleteEntry(dtpage_t * p, int fi, struct dt_lock ** dtlock)
   3955{
   3956	int fsi;		/* free entry slot index */
   3957	s8 *stbl;
   3958	struct dtslot *t;
   3959	int si, freecnt;
   3960	struct dt_lock *dtlck = *dtlock;
   3961	struct lv *lv;
   3962	int xsi, n;
   3963
   3964	/* get free entry slot index */
   3965	stbl = DT_GETSTBL(p);
   3966	fsi = stbl[fi];
   3967
   3968	/* open new linelock */
   3969	if (dtlck->index >= dtlck->maxcnt)
   3970		dtlck = (struct dt_lock *) txLinelock(dtlck);
   3971	lv = & dtlck->lv[dtlck->index];
   3972
   3973	lv->offset = fsi;
   3974
   3975	/* get the head/only segment */
   3976	t = &p->slot[fsi];
   3977	if (p->header.flag & BT_LEAF)
   3978		si = ((struct ldtentry *) t)->next;
   3979	else
   3980		si = ((struct idtentry *) t)->next;
   3981	t->next = si;
   3982	t->cnt = 1;
   3983
   3984	n = freecnt = 1;
   3985	xsi = fsi;
   3986
   3987	/* find the last/only segment */
   3988	while (si >= 0) {
   3989		/* is next slot contiguous ? */
   3990		if (si != xsi + 1) {
   3991			/* close current linelock */
   3992			lv->length = n;
   3993			dtlck->index++;
   3994
   3995			/* open new linelock */
   3996			if (dtlck->index < dtlck->maxcnt)
   3997				lv++;
   3998			else {
   3999				dtlck = (struct dt_lock *) txLinelock(dtlck);
   4000				lv = & dtlck->lv[0];
   4001			}
   4002
   4003			lv->offset = si;
   4004			n = 0;
   4005		}
   4006
   4007		n++;
   4008		xsi = si;
   4009		freecnt++;
   4010
   4011		t = &p->slot[si];
   4012		t->cnt = 1;
   4013		si = t->next;
   4014	}
   4015
   4016	/* close current linelock */
   4017	lv->length = n;
   4018	dtlck->index++;
   4019
   4020	*dtlock = dtlck;
   4021
   4022	/* update freelist */
   4023	t->next = p->header.freelist;
   4024	p->header.freelist = fsi;
   4025	p->header.freecnt += freecnt;
   4026
   4027	/* if delete from middle,
   4028	 * shift left the succedding entries in the stbl
   4029	 */
   4030	si = p->header.nextindex;
   4031	if (fi < si - 1)
   4032		memmove(&stbl[fi], &stbl[fi + 1], si - fi - 1);
   4033
   4034	p->header.nextindex--;
   4035}
   4036
   4037
   4038/*
   4039 *	dtTruncateEntry()
   4040 *
   4041 * function: truncate a (leaf/internal) entry
   4042 *
   4043 * log freelist header, stbl, and each segment slot of entry
   4044 * (even though last/only segment next field is modified,
   4045 * physical image logging requires all segment slots of
   4046 * the entry logged to avoid applying previous updates
   4047 * to the same slots)
   4048 */
   4049static void dtTruncateEntry(dtpage_t * p, int ti, struct dt_lock ** dtlock)
   4050{
   4051	int tsi;		/* truncate entry slot index */
   4052	s8 *stbl;
   4053	struct dtslot *t;
   4054	int si, freecnt;
   4055	struct dt_lock *dtlck = *dtlock;
   4056	struct lv *lv;
   4057	int fsi, xsi, n;
   4058
   4059	/* get free entry slot index */
   4060	stbl = DT_GETSTBL(p);
   4061	tsi = stbl[ti];
   4062
   4063	/* open new linelock */
   4064	if (dtlck->index >= dtlck->maxcnt)
   4065		dtlck = (struct dt_lock *) txLinelock(dtlck);
   4066	lv = & dtlck->lv[dtlck->index];
   4067
   4068	lv->offset = tsi;
   4069
   4070	/* get the head/only segment */
   4071	t = &p->slot[tsi];
   4072	ASSERT(p->header.flag & BT_INTERNAL);
   4073	((struct idtentry *) t)->namlen = 0;
   4074	si = ((struct idtentry *) t)->next;
   4075	((struct idtentry *) t)->next = -1;
   4076
   4077	n = 1;
   4078	freecnt = 0;
   4079	fsi = si;
   4080	xsi = tsi;
   4081
   4082	/* find the last/only segment */
   4083	while (si >= 0) {
   4084		/* is next slot contiguous ? */
   4085		if (si != xsi + 1) {
   4086			/* close current linelock */
   4087			lv->length = n;
   4088			dtlck->index++;
   4089
   4090			/* open new linelock */
   4091			if (dtlck->index < dtlck->maxcnt)
   4092				lv++;
   4093			else {
   4094				dtlck = (struct dt_lock *) txLinelock(dtlck);
   4095				lv = & dtlck->lv[0];
   4096			}
   4097
   4098			lv->offset = si;
   4099			n = 0;
   4100		}
   4101
   4102		n++;
   4103		xsi = si;
   4104		freecnt++;
   4105
   4106		t = &p->slot[si];
   4107		t->cnt = 1;
   4108		si = t->next;
   4109	}
   4110
   4111	/* close current linelock */
   4112	lv->length = n;
   4113	dtlck->index++;
   4114
   4115	*dtlock = dtlck;
   4116
   4117	/* update freelist */
   4118	if (freecnt == 0)
   4119		return;
   4120	t->next = p->header.freelist;
   4121	p->header.freelist = fsi;
   4122	p->header.freecnt += freecnt;
   4123}
   4124
   4125
   4126/*
   4127 *	dtLinelockFreelist()
   4128 */
   4129static void dtLinelockFreelist(dtpage_t * p,	/* directory page */
   4130			       int m,	/* max slot index */
   4131			       struct dt_lock ** dtlock)
   4132{
   4133	int fsi;		/* free entry slot index */
   4134	struct dtslot *t;
   4135	int si;
   4136	struct dt_lock *dtlck = *dtlock;
   4137	struct lv *lv;
   4138	int xsi, n;
   4139
   4140	/* get free entry slot index */
   4141	fsi = p->header.freelist;
   4142
   4143	/* open new linelock */
   4144	if (dtlck->index >= dtlck->maxcnt)
   4145		dtlck = (struct dt_lock *) txLinelock(dtlck);
   4146	lv = & dtlck->lv[dtlck->index];
   4147
   4148	lv->offset = fsi;
   4149
   4150	n = 1;
   4151	xsi = fsi;
   4152
   4153	t = &p->slot[fsi];
   4154	si = t->next;
   4155
   4156	/* find the last/only segment */
   4157	while (si < m && si >= 0) {
   4158		/* is next slot contiguous ? */
   4159		if (si != xsi + 1) {
   4160			/* close current linelock */
   4161			lv->length = n;
   4162			dtlck->index++;
   4163
   4164			/* open new linelock */
   4165			if (dtlck->index < dtlck->maxcnt)
   4166				lv++;
   4167			else {
   4168				dtlck = (struct dt_lock *) txLinelock(dtlck);
   4169				lv = & dtlck->lv[0];
   4170			}
   4171
   4172			lv->offset = si;
   4173			n = 0;
   4174		}
   4175
   4176		n++;
   4177		xsi = si;
   4178
   4179		t = &p->slot[si];
   4180		si = t->next;
   4181	}
   4182
   4183	/* close current linelock */
   4184	lv->length = n;
   4185	dtlck->index++;
   4186
   4187	*dtlock = dtlck;
   4188}
   4189
   4190
   4191/*
   4192 * NAME: dtModify
   4193 *
   4194 * FUNCTION: Modify the inode number part of a directory entry
   4195 *
   4196 * PARAMETERS:
   4197 *	tid	- Transaction id
   4198 *	ip	- Inode of parent directory
   4199 *	key	- Name of entry to be modified
   4200 *	orig_ino	- Original inode number expected in entry
   4201 *	new_ino	- New inode number to put into entry
   4202 *	flag	- JFS_RENAME
   4203 *
   4204 * RETURNS:
   4205 *	-ESTALE	- If entry found does not match orig_ino passed in
   4206 *	-ENOENT	- If no entry can be found to match key
   4207 *	0	- If successfully modified entry
   4208 */
   4209int dtModify(tid_t tid, struct inode *ip,
   4210	 struct component_name * key, ino_t * orig_ino, ino_t new_ino, int flag)
   4211{
   4212	int rc;
   4213	s64 bn;
   4214	struct metapage *mp;
   4215	dtpage_t *p;
   4216	int index;
   4217	struct btstack btstack;
   4218	struct tlock *tlck;
   4219	struct dt_lock *dtlck;
   4220	struct lv *lv;
   4221	s8 *stbl;
   4222	int entry_si;		/* entry slot index */
   4223	struct ldtentry *entry;
   4224
   4225	/*
   4226	 *	search for the entry to modify:
   4227	 *
   4228	 * dtSearch() returns (leaf page pinned, index at which to modify).
   4229	 */
   4230	if ((rc = dtSearch(ip, key, orig_ino, &btstack, flag)))
   4231		return rc;
   4232
   4233	/* retrieve search result */
   4234	DT_GETSEARCH(ip, btstack.top, bn, mp, p, index);
   4235
   4236	BT_MARK_DIRTY(mp, ip);
   4237	/*
   4238	 * acquire a transaction lock on the leaf page of named entry
   4239	 */
   4240	tlck = txLock(tid, ip, mp, tlckDTREE | tlckENTRY);
   4241	dtlck = (struct dt_lock *) & tlck->lock;
   4242
   4243	/* get slot index of the entry */
   4244	stbl = DT_GETSTBL(p);
   4245	entry_si = stbl[index];
   4246
   4247	/* linelock entry */
   4248	ASSERT(dtlck->index == 0);
   4249	lv = & dtlck->lv[0];
   4250	lv->offset = entry_si;
   4251	lv->length = 1;
   4252	dtlck->index++;
   4253
   4254	/* get the head/only segment */
   4255	entry = (struct ldtentry *) & p->slot[entry_si];
   4256
   4257	/* substitute the inode number of the entry */
   4258	entry->inumber = cpu_to_le32(new_ino);
   4259
   4260	/* unpin the leaf page */
   4261	DT_PUTPAGE(mp);
   4262
   4263	return 0;
   4264}