cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

jfs_xtree.c (69560B)


      1// SPDX-License-Identifier: GPL-2.0-or-later
      2/*
      3 *   Copyright (C) International Business Machines Corp., 2000-2005
      4 */
      5/*
      6 *	jfs_xtree.c: extent allocation descriptor B+-tree manager
      7 */
      8
      9#include <linux/fs.h>
     10#include <linux/module.h>
     11#include <linux/quotaops.h>
     12#include <linux/seq_file.h>
     13#include "jfs_incore.h"
     14#include "jfs_filsys.h"
     15#include "jfs_metapage.h"
     16#include "jfs_dmap.h"
     17#include "jfs_dinode.h"
     18#include "jfs_superblock.h"
     19#include "jfs_debug.h"
     20
     21/*
     22 * xtree local flag
     23 */
     24#define XT_INSERT	0x00000001
     25
     26/*
     27 *	xtree key/entry comparison: extent offset
     28 *
     29 * return:
     30 *	-1: k < start of extent
     31 *	 0: start_of_extent <= k <= end_of_extent
     32 *	 1: k > end_of_extent
     33 */
     34#define XT_CMP(CMP, K, X, OFFSET64)\
     35{\
     36	OFFSET64 = offsetXAD(X);\
     37	(CMP) = ((K) >= OFFSET64 + lengthXAD(X)) ? 1 :\
     38		((K) < OFFSET64) ? -1 : 0;\
     39}
     40
     41/* write a xad entry */
     42#define XT_PUTENTRY(XAD, FLAG, OFF, LEN, ADDR)\
     43{\
     44	(XAD)->flag = (FLAG);\
     45	XADoffset((XAD), (OFF));\
     46	XADlength((XAD), (LEN));\
     47	XADaddress((XAD), (ADDR));\
     48}
     49
     50#define XT_PAGE(IP, MP) BT_PAGE(IP, MP, xtpage_t, i_xtroot)
     51
     52/* get page buffer for specified block address */
     53/* ToDo: Replace this ugly macro with a function */
     54#define XT_GETPAGE(IP, BN, MP, SIZE, P, RC)				\
     55do {									\
     56	BT_GETPAGE(IP, BN, MP, xtpage_t, SIZE, P, RC, i_xtroot);	\
     57	if (!(RC)) {							\
     58		if ((le16_to_cpu((P)->header.nextindex) < XTENTRYSTART) || \
     59		    (le16_to_cpu((P)->header.nextindex) >		\
     60		     le16_to_cpu((P)->header.maxentry)) ||		\
     61		    (le16_to_cpu((P)->header.maxentry) >		\
     62		     (((BN) == 0) ? XTROOTMAXSLOT : PSIZE >> L2XTSLOTSIZE))) { \
     63			jfs_error((IP)->i_sb,				\
     64				  "XT_GETPAGE: xtree page corrupt\n");	\
     65			BT_PUTPAGE(MP);					\
     66			MP = NULL;					\
     67			RC = -EIO;					\
     68		}							\
     69	}								\
     70} while (0)
     71
     72/* for consistency */
     73#define XT_PUTPAGE(MP) BT_PUTPAGE(MP)
     74
     75#define XT_GETSEARCH(IP, LEAF, BN, MP, P, INDEX) \
     76	BT_GETSEARCH(IP, LEAF, BN, MP, xtpage_t, P, INDEX, i_xtroot)
     77/* xtree entry parameter descriptor */
     78struct xtsplit {
     79	struct metapage *mp;
     80	s16 index;
     81	u8 flag;
     82	s64 off;
     83	s64 addr;
     84	int len;
     85	struct pxdlist *pxdlist;
     86};
     87
     88
     89/*
     90 *	statistics
     91 */
     92#ifdef CONFIG_JFS_STATISTICS
     93static struct {
     94	uint search;
     95	uint fastSearch;
     96	uint split;
     97} xtStat;
     98#endif
     99
    100
    101/*
    102 * forward references
    103 */
    104static int xtSearch(struct inode *ip, s64 xoff, s64 *next, int *cmpp,
    105		    struct btstack * btstack, int flag);
    106
    107static int xtSplitUp(tid_t tid,
    108		     struct inode *ip,
    109		     struct xtsplit * split, struct btstack * btstack);
    110
    111static int xtSplitPage(tid_t tid, struct inode *ip, struct xtsplit * split,
    112		       struct metapage ** rmpp, s64 * rbnp);
    113
    114static int xtSplitRoot(tid_t tid, struct inode *ip,
    115		       struct xtsplit * split, struct metapage ** rmpp);
    116
    117/*
    118 *	xtLookup()
    119 *
    120 * function: map a single page into a physical extent;
    121 */
    122int xtLookup(struct inode *ip, s64 lstart,
    123	     s64 llen, int *pflag, s64 * paddr, s32 * plen, int no_check)
    124{
    125	int rc = 0;
    126	struct btstack btstack;
    127	int cmp;
    128	s64 bn;
    129	struct metapage *mp;
    130	xtpage_t *p;
    131	int index;
    132	xad_t *xad;
    133	s64 next, size, xoff, xend;
    134	int xlen;
    135	s64 xaddr;
    136
    137	*paddr = 0;
    138	*plen = llen;
    139
    140	if (!no_check) {
    141		/* is lookup offset beyond eof ? */
    142		size = ((u64) ip->i_size + (JFS_SBI(ip->i_sb)->bsize - 1)) >>
    143		    JFS_SBI(ip->i_sb)->l2bsize;
    144		if (lstart >= size)
    145			return 0;
    146	}
    147
    148	/*
    149	 * search for the xad entry covering the logical extent
    150	 */
    151//search:
    152	if ((rc = xtSearch(ip, lstart, &next, &cmp, &btstack, 0))) {
    153		jfs_err("xtLookup: xtSearch returned %d", rc);
    154		return rc;
    155	}
    156
    157	/*
    158	 *	compute the physical extent covering logical extent
    159	 *
    160	 * N.B. search may have failed (e.g., hole in sparse file),
    161	 * and returned the index of the next entry.
    162	 */
    163	/* retrieve search result */
    164	XT_GETSEARCH(ip, btstack.top, bn, mp, p, index);
    165
    166	/* is xad found covering start of logical extent ?
    167	 * lstart is a page start address,
    168	 * i.e., lstart cannot start in a hole;
    169	 */
    170	if (cmp) {
    171		if (next)
    172			*plen = min(next - lstart, llen);
    173		goto out;
    174	}
    175
    176	/*
    177	 * lxd covered by xad
    178	 */
    179	xad = &p->xad[index];
    180	xoff = offsetXAD(xad);
    181	xlen = lengthXAD(xad);
    182	xend = xoff + xlen;
    183	xaddr = addressXAD(xad);
    184
    185	/* initialize new pxd */
    186	*pflag = xad->flag;
    187	*paddr = xaddr + (lstart - xoff);
    188	/* a page must be fully covered by an xad */
    189	*plen = min(xend - lstart, llen);
    190
    191      out:
    192	XT_PUTPAGE(mp);
    193
    194	return rc;
    195}
    196
    197/*
    198 *	xtSearch()
    199 *
    200 * function:	search for the xad entry covering specified offset.
    201 *
    202 * parameters:
    203 *	ip	- file object;
    204 *	xoff	- extent offset;
    205 *	nextp	- address of next extent (if any) for search miss
    206 *	cmpp	- comparison result:
    207 *	btstack - traverse stack;
    208 *	flag	- search process flag (XT_INSERT);
    209 *
    210 * returns:
    211 *	btstack contains (bn, index) of search path traversed to the entry.
    212 *	*cmpp is set to result of comparison with the entry returned.
    213 *	the page containing the entry is pinned at exit.
    214 */
    215static int xtSearch(struct inode *ip, s64 xoff,	s64 *nextp,
    216		    int *cmpp, struct btstack * btstack, int flag)
    217{
    218	struct jfs_inode_info *jfs_ip = JFS_IP(ip);
    219	int rc = 0;
    220	int cmp = 1;		/* init for empty page */
    221	s64 bn;			/* block number */
    222	struct metapage *mp;	/* page buffer */
    223	xtpage_t *p;		/* page */
    224	xad_t *xad;
    225	int base, index, lim, btindex;
    226	struct btframe *btsp;
    227	int nsplit = 0;		/* number of pages to split */
    228	s64 t64;
    229	s64 next = 0;
    230
    231	INCREMENT(xtStat.search);
    232
    233	BT_CLR(btstack);
    234
    235	btstack->nsplit = 0;
    236
    237	/*
    238	 *	search down tree from root:
    239	 *
    240	 * between two consecutive entries of <Ki, Pi> and <Kj, Pj> of
    241	 * internal page, child page Pi contains entry with k, Ki <= K < Kj.
    242	 *
    243	 * if entry with search key K is not found
    244	 * internal page search find the entry with largest key Ki
    245	 * less than K which point to the child page to search;
    246	 * leaf page search find the entry with smallest key Kj
    247	 * greater than K so that the returned index is the position of
    248	 * the entry to be shifted right for insertion of new entry.
    249	 * for empty tree, search key is greater than any key of the tree.
    250	 *
    251	 * by convention, root bn = 0.
    252	 */
    253	for (bn = 0;;) {
    254		/* get/pin the page to search */
    255		XT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
    256		if (rc)
    257			return rc;
    258
    259		/* try sequential access heuristics with the previous
    260		 * access entry in target leaf page:
    261		 * once search narrowed down into the target leaf,
    262		 * key must either match an entry in the leaf or
    263		 * key entry does not exist in the tree;
    264		 */
    265//fastSearch:
    266		if ((jfs_ip->btorder & BT_SEQUENTIAL) &&
    267		    (p->header.flag & BT_LEAF) &&
    268		    (index = jfs_ip->btindex) <
    269		    le16_to_cpu(p->header.nextindex)) {
    270			xad = &p->xad[index];
    271			t64 = offsetXAD(xad);
    272			if (xoff < t64 + lengthXAD(xad)) {
    273				if (xoff >= t64) {
    274					*cmpp = 0;
    275					goto out;
    276				}
    277
    278				/* stop sequential access heuristics */
    279				goto binarySearch;
    280			} else {	/* (t64 + lengthXAD(xad)) <= xoff */
    281
    282				/* try next sequential entry */
    283				index++;
    284				if (index <
    285				    le16_to_cpu(p->header.nextindex)) {
    286					xad++;
    287					t64 = offsetXAD(xad);
    288					if (xoff < t64 + lengthXAD(xad)) {
    289						if (xoff >= t64) {
    290							*cmpp = 0;
    291							goto out;
    292						}
    293
    294						/* miss: key falls between
    295						 * previous and this entry
    296						 */
    297						*cmpp = 1;
    298						next = t64;
    299						goto out;
    300					}
    301
    302					/* (xoff >= t64 + lengthXAD(xad));
    303					 * matching entry may be further out:
    304					 * stop heuristic search
    305					 */
    306					/* stop sequential access heuristics */
    307					goto binarySearch;
    308				}
    309
    310				/* (index == p->header.nextindex);
    311				 * miss: key entry does not exist in
    312				 * the target leaf/tree
    313				 */
    314				*cmpp = 1;
    315				goto out;
    316			}
    317
    318			/*
    319			 * if hit, return index of the entry found, and
    320			 * if miss, where new entry with search key is
    321			 * to be inserted;
    322			 */
    323		      out:
    324			/* compute number of pages to split */
    325			if (flag & XT_INSERT) {
    326				if (p->header.nextindex ==	/* little-endian */
    327				    p->header.maxentry)
    328					nsplit++;
    329				else
    330					nsplit = 0;
    331				btstack->nsplit = nsplit;
    332			}
    333
    334			/* save search result */
    335			btsp = btstack->top;
    336			btsp->bn = bn;
    337			btsp->index = index;
    338			btsp->mp = mp;
    339
    340			/* update sequential access heuristics */
    341			jfs_ip->btindex = index;
    342
    343			if (nextp)
    344				*nextp = next;
    345
    346			INCREMENT(xtStat.fastSearch);
    347			return 0;
    348		}
    349
    350		/* well, ... full search now */
    351	      binarySearch:
    352		lim = le16_to_cpu(p->header.nextindex) - XTENTRYSTART;
    353
    354		/*
    355		 * binary search with search key K on the current page
    356		 */
    357		for (base = XTENTRYSTART; lim; lim >>= 1) {
    358			index = base + (lim >> 1);
    359
    360			XT_CMP(cmp, xoff, &p->xad[index], t64);
    361			if (cmp == 0) {
    362				/*
    363				 *	search hit
    364				 */
    365				/* search hit - leaf page:
    366				 * return the entry found
    367				 */
    368				if (p->header.flag & BT_LEAF) {
    369					*cmpp = cmp;
    370
    371					/* compute number of pages to split */
    372					if (flag & XT_INSERT) {
    373						if (p->header.nextindex ==
    374						    p->header.maxentry)
    375							nsplit++;
    376						else
    377							nsplit = 0;
    378						btstack->nsplit = nsplit;
    379					}
    380
    381					/* save search result */
    382					btsp = btstack->top;
    383					btsp->bn = bn;
    384					btsp->index = index;
    385					btsp->mp = mp;
    386
    387					/* init sequential access heuristics */
    388					btindex = jfs_ip->btindex;
    389					if (index == btindex ||
    390					    index == btindex + 1)
    391						jfs_ip->btorder = BT_SEQUENTIAL;
    392					else
    393						jfs_ip->btorder = BT_RANDOM;
    394					jfs_ip->btindex = index;
    395
    396					return 0;
    397				}
    398				/* search hit - internal page:
    399				 * descend/search its child page
    400				 */
    401				if (index < le16_to_cpu(p->header.nextindex)-1)
    402					next = offsetXAD(&p->xad[index + 1]);
    403				goto next;
    404			}
    405
    406			if (cmp > 0) {
    407				base = index + 1;
    408				--lim;
    409			}
    410		}
    411
    412		/*
    413		 *	search miss
    414		 *
    415		 * base is the smallest index with key (Kj) greater than
    416		 * search key (K) and may be zero or maxentry index.
    417		 */
    418		if (base < le16_to_cpu(p->header.nextindex))
    419			next = offsetXAD(&p->xad[base]);
    420		/*
    421		 * search miss - leaf page:
    422		 *
    423		 * return location of entry (base) where new entry with
    424		 * search key K is to be inserted.
    425		 */
    426		if (p->header.flag & BT_LEAF) {
    427			*cmpp = cmp;
    428
    429			/* compute number of pages to split */
    430			if (flag & XT_INSERT) {
    431				if (p->header.nextindex ==
    432				    p->header.maxentry)
    433					nsplit++;
    434				else
    435					nsplit = 0;
    436				btstack->nsplit = nsplit;
    437			}
    438
    439			/* save search result */
    440			btsp = btstack->top;
    441			btsp->bn = bn;
    442			btsp->index = base;
    443			btsp->mp = mp;
    444
    445			/* init sequential access heuristics */
    446			btindex = jfs_ip->btindex;
    447			if (base == btindex || base == btindex + 1)
    448				jfs_ip->btorder = BT_SEQUENTIAL;
    449			else
    450				jfs_ip->btorder = BT_RANDOM;
    451			jfs_ip->btindex = base;
    452
    453			if (nextp)
    454				*nextp = next;
    455
    456			return 0;
    457		}
    458
    459		/*
    460		 * search miss - non-leaf page:
    461		 *
    462		 * if base is non-zero, decrement base by one to get the parent
    463		 * entry of the child page to search.
    464		 */
    465		index = base ? base - 1 : base;
    466
    467		/*
    468		 * go down to child page
    469		 */
    470	      next:
    471		/* update number of pages to split */
    472		if (p->header.nextindex == p->header.maxentry)
    473			nsplit++;
    474		else
    475			nsplit = 0;
    476
    477		/* push (bn, index) of the parent page/entry */
    478		if (BT_STACK_FULL(btstack)) {
    479			jfs_error(ip->i_sb, "stack overrun!\n");
    480			XT_PUTPAGE(mp);
    481			return -EIO;
    482		}
    483		BT_PUSH(btstack, bn, index);
    484
    485		/* get the child page block number */
    486		bn = addressXAD(&p->xad[index]);
    487
    488		/* unpin the parent page */
    489		XT_PUTPAGE(mp);
    490	}
    491}
    492
    493/*
    494 *	xtInsert()
    495 *
    496 * function:
    497 *
    498 * parameter:
    499 *	tid	- transaction id;
    500 *	ip	- file object;
    501 *	xflag	- extent flag (XAD_NOTRECORDED):
    502 *	xoff	- extent offset;
    503 *	xlen	- extent length;
    504 *	xaddrp	- extent address pointer (in/out):
    505 *		if (*xaddrp)
    506 *			caller allocated data extent at *xaddrp;
    507 *		else
    508 *			allocate data extent and return its xaddr;
    509 *	flag	-
    510 *
    511 * return:
    512 */
    513int xtInsert(tid_t tid,		/* transaction id */
    514	     struct inode *ip, int xflag, s64 xoff, s32 xlen, s64 * xaddrp,
    515	     int flag)
    516{
    517	int rc = 0;
    518	s64 xaddr, hint;
    519	struct metapage *mp;	/* meta-page buffer */
    520	xtpage_t *p;		/* base B+-tree index page */
    521	s64 bn;
    522	int index, nextindex;
    523	struct btstack btstack;	/* traverse stack */
    524	struct xtsplit split;	/* split information */
    525	xad_t *xad;
    526	int cmp;
    527	s64 next;
    528	struct tlock *tlck;
    529	struct xtlock *xtlck;
    530
    531	jfs_info("xtInsert: nxoff:0x%lx nxlen:0x%x", (ulong) xoff, xlen);
    532
    533	/*
    534	 *	search for the entry location at which to insert:
    535	 *
    536	 * xtFastSearch() and xtSearch() both returns (leaf page
    537	 * pinned, index at which to insert).
    538	 * n.b. xtSearch() may return index of maxentry of
    539	 * the full page.
    540	 */
    541	if ((rc = xtSearch(ip, xoff, &next, &cmp, &btstack, XT_INSERT)))
    542		return rc;
    543
    544	/* retrieve search result */
    545	XT_GETSEARCH(ip, btstack.top, bn, mp, p, index);
    546
    547	/* This test must follow XT_GETSEARCH since mp must be valid if
    548	 * we branch to out: */
    549	if ((cmp == 0) || (next && (xlen > next - xoff))) {
    550		rc = -EEXIST;
    551		goto out;
    552	}
    553
    554	/*
    555	 * allocate data extent requested
    556	 *
    557	 * allocation hint: last xad
    558	 */
    559	if ((xaddr = *xaddrp) == 0) {
    560		if (index > XTENTRYSTART) {
    561			xad = &p->xad[index - 1];
    562			hint = addressXAD(xad) + lengthXAD(xad) - 1;
    563		} else
    564			hint = 0;
    565		if ((rc = dquot_alloc_block(ip, xlen)))
    566			goto out;
    567		if ((rc = dbAlloc(ip, hint, (s64) xlen, &xaddr))) {
    568			dquot_free_block(ip, xlen);
    569			goto out;
    570		}
    571	}
    572
    573	/*
    574	 *	insert entry for new extent
    575	 */
    576	xflag |= XAD_NEW;
    577
    578	/*
    579	 *	if the leaf page is full, split the page and
    580	 *	propagate up the router entry for the new page from split
    581	 *
    582	 * The xtSplitUp() will insert the entry and unpin the leaf page.
    583	 */
    584	nextindex = le16_to_cpu(p->header.nextindex);
    585	if (nextindex == le16_to_cpu(p->header.maxentry)) {
    586		split.mp = mp;
    587		split.index = index;
    588		split.flag = xflag;
    589		split.off = xoff;
    590		split.len = xlen;
    591		split.addr = xaddr;
    592		split.pxdlist = NULL;
    593		if ((rc = xtSplitUp(tid, ip, &split, &btstack))) {
    594			/* undo data extent allocation */
    595			if (*xaddrp == 0) {
    596				dbFree(ip, xaddr, (s64) xlen);
    597				dquot_free_block(ip, xlen);
    598			}
    599			return rc;
    600		}
    601
    602		*xaddrp = xaddr;
    603		return 0;
    604	}
    605
    606	/*
    607	 *	insert the new entry into the leaf page
    608	 */
    609	/*
    610	 * acquire a transaction lock on the leaf page;
    611	 *
    612	 * action: xad insertion/extension;
    613	 */
    614	BT_MARK_DIRTY(mp, ip);
    615
    616	/* if insert into middle, shift right remaining entries. */
    617	if (index < nextindex)
    618		memmove(&p->xad[index + 1], &p->xad[index],
    619			(nextindex - index) * sizeof(xad_t));
    620
    621	/* insert the new entry: mark the entry NEW */
    622	xad = &p->xad[index];
    623	XT_PUTENTRY(xad, xflag, xoff, xlen, xaddr);
    624
    625	/* advance next available entry index */
    626	le16_add_cpu(&p->header.nextindex, 1);
    627
    628	/* Don't log it if there are no links to the file */
    629	if (!test_cflag(COMMIT_Nolink, ip)) {
    630		tlck = txLock(tid, ip, mp, tlckXTREE | tlckGROW);
    631		xtlck = (struct xtlock *) & tlck->lock;
    632		xtlck->lwm.offset =
    633		    (xtlck->lwm.offset) ? min(index,
    634					      (int)xtlck->lwm.offset) : index;
    635		xtlck->lwm.length =
    636		    le16_to_cpu(p->header.nextindex) - xtlck->lwm.offset;
    637	}
    638
    639	*xaddrp = xaddr;
    640
    641      out:
    642	/* unpin the leaf page */
    643	XT_PUTPAGE(mp);
    644
    645	return rc;
    646}
    647
    648
    649/*
    650 *	xtSplitUp()
    651 *
    652 * function:
    653 *	split full pages as propagating insertion up the tree
    654 *
    655 * parameter:
    656 *	tid	- transaction id;
    657 *	ip	- file object;
    658 *	split	- entry parameter descriptor;
    659 *	btstack - traverse stack from xtSearch()
    660 *
    661 * return:
    662 */
    663static int
    664xtSplitUp(tid_t tid,
    665	  struct inode *ip, struct xtsplit * split, struct btstack * btstack)
    666{
    667	int rc = 0;
    668	struct metapage *smp;
    669	xtpage_t *sp;		/* split page */
    670	struct metapage *rmp;
    671	s64 rbn;		/* new right page block number */
    672	struct metapage *rcmp;
    673	xtpage_t *rcp;		/* right child page */
    674	s64 rcbn;		/* right child page block number */
    675	int skip;		/* index of entry of insertion */
    676	int nextindex;		/* next available entry index of p */
    677	struct btframe *parent;	/* parent page entry on traverse stack */
    678	xad_t *xad;
    679	s64 xaddr;
    680	int xlen;
    681	int nsplit;		/* number of pages split */
    682	struct pxdlist pxdlist;
    683	pxd_t *pxd;
    684	struct tlock *tlck;
    685	struct xtlock *xtlck;
    686
    687	smp = split->mp;
    688	sp = XT_PAGE(ip, smp);
    689
    690	/* is inode xtree root extension/inline EA area free ? */
    691	if ((sp->header.flag & BT_ROOT) && (!S_ISDIR(ip->i_mode)) &&
    692	    (le16_to_cpu(sp->header.maxentry) < XTROOTMAXSLOT) &&
    693	    (JFS_IP(ip)->mode2 & INLINEEA)) {
    694		sp->header.maxentry = cpu_to_le16(XTROOTMAXSLOT);
    695		JFS_IP(ip)->mode2 &= ~INLINEEA;
    696
    697		BT_MARK_DIRTY(smp, ip);
    698		/*
    699		 * acquire a transaction lock on the leaf page;
    700		 *
    701		 * action: xad insertion/extension;
    702		 */
    703
    704		/* if insert into middle, shift right remaining entries. */
    705		skip = split->index;
    706		nextindex = le16_to_cpu(sp->header.nextindex);
    707		if (skip < nextindex)
    708			memmove(&sp->xad[skip + 1], &sp->xad[skip],
    709				(nextindex - skip) * sizeof(xad_t));
    710
    711		/* insert the new entry: mark the entry NEW */
    712		xad = &sp->xad[skip];
    713		XT_PUTENTRY(xad, split->flag, split->off, split->len,
    714			    split->addr);
    715
    716		/* advance next available entry index */
    717		le16_add_cpu(&sp->header.nextindex, 1);
    718
    719		/* Don't log it if there are no links to the file */
    720		if (!test_cflag(COMMIT_Nolink, ip)) {
    721			tlck = txLock(tid, ip, smp, tlckXTREE | tlckGROW);
    722			xtlck = (struct xtlock *) & tlck->lock;
    723			xtlck->lwm.offset = (xtlck->lwm.offset) ?
    724			    min(skip, (int)xtlck->lwm.offset) : skip;
    725			xtlck->lwm.length =
    726			    le16_to_cpu(sp->header.nextindex) -
    727			    xtlck->lwm.offset;
    728		}
    729
    730		return 0;
    731	}
    732
    733	/*
    734	 * allocate new index blocks to cover index page split(s)
    735	 *
    736	 * allocation hint: ?
    737	 */
    738	if (split->pxdlist == NULL) {
    739		nsplit = btstack->nsplit;
    740		split->pxdlist = &pxdlist;
    741		pxdlist.maxnpxd = pxdlist.npxd = 0;
    742		pxd = &pxdlist.pxd[0];
    743		xlen = JFS_SBI(ip->i_sb)->nbperpage;
    744		for (; nsplit > 0; nsplit--, pxd++) {
    745			if ((rc = dbAlloc(ip, (s64) 0, (s64) xlen, &xaddr))
    746			    == 0) {
    747				PXDaddress(pxd, xaddr);
    748				PXDlength(pxd, xlen);
    749
    750				pxdlist.maxnpxd++;
    751
    752				continue;
    753			}
    754
    755			/* undo allocation */
    756
    757			XT_PUTPAGE(smp);
    758			return rc;
    759		}
    760	}
    761
    762	/*
    763	 * Split leaf page <sp> into <sp> and a new right page <rp>.
    764	 *
    765	 * The split routines insert the new entry into the leaf page,
    766	 * and acquire txLock as appropriate.
    767	 * return <rp> pinned and its block number <rpbn>.
    768	 */
    769	rc = (sp->header.flag & BT_ROOT) ?
    770	    xtSplitRoot(tid, ip, split, &rmp) :
    771	    xtSplitPage(tid, ip, split, &rmp, &rbn);
    772
    773	XT_PUTPAGE(smp);
    774
    775	if (rc)
    776		return -EIO;
    777	/*
    778	 * propagate up the router entry for the leaf page just split
    779	 *
    780	 * insert a router entry for the new page into the parent page,
    781	 * propagate the insert/split up the tree by walking back the stack
    782	 * of (bn of parent page, index of child page entry in parent page)
    783	 * that were traversed during the search for the page that split.
    784	 *
    785	 * the propagation of insert/split up the tree stops if the root
    786	 * splits or the page inserted into doesn't have to split to hold
    787	 * the new entry.
    788	 *
    789	 * the parent entry for the split page remains the same, and
    790	 * a new entry is inserted at its right with the first key and
    791	 * block number of the new right page.
    792	 *
    793	 * There are a maximum of 3 pages pinned at any time:
    794	 * right child, left parent and right parent (when the parent splits)
    795	 * to keep the child page pinned while working on the parent.
    796	 * make sure that all pins are released at exit.
    797	 */
    798	while ((parent = BT_POP(btstack)) != NULL) {
    799		/* parent page specified by stack frame <parent> */
    800
    801		/* keep current child pages <rcp> pinned */
    802		rcmp = rmp;
    803		rcbn = rbn;
    804		rcp = XT_PAGE(ip, rcmp);
    805
    806		/*
    807		 * insert router entry in parent for new right child page <rp>
    808		 */
    809		/* get/pin the parent page <sp> */
    810		XT_GETPAGE(ip, parent->bn, smp, PSIZE, sp, rc);
    811		if (rc) {
    812			XT_PUTPAGE(rcmp);
    813			return rc;
    814		}
    815
    816		/*
    817		 * The new key entry goes ONE AFTER the index of parent entry,
    818		 * because the split was to the right.
    819		 */
    820		skip = parent->index + 1;
    821
    822		/*
    823		 * split or shift right remaining entries of the parent page
    824		 */
    825		nextindex = le16_to_cpu(sp->header.nextindex);
    826		/*
    827		 * parent page is full - split the parent page
    828		 */
    829		if (nextindex == le16_to_cpu(sp->header.maxentry)) {
    830			/* init for parent page split */
    831			split->mp = smp;
    832			split->index = skip;	/* index at insert */
    833			split->flag = XAD_NEW;
    834			split->off = offsetXAD(&rcp->xad[XTENTRYSTART]);
    835			split->len = JFS_SBI(ip->i_sb)->nbperpage;
    836			split->addr = rcbn;
    837
    838			/* unpin previous right child page */
    839			XT_PUTPAGE(rcmp);
    840
    841			/* The split routines insert the new entry,
    842			 * and acquire txLock as appropriate.
    843			 * return <rp> pinned and its block number <rpbn>.
    844			 */
    845			rc = (sp->header.flag & BT_ROOT) ?
    846			    xtSplitRoot(tid, ip, split, &rmp) :
    847			    xtSplitPage(tid, ip, split, &rmp, &rbn);
    848			if (rc) {
    849				XT_PUTPAGE(smp);
    850				return rc;
    851			}
    852
    853			XT_PUTPAGE(smp);
    854			/* keep new child page <rp> pinned */
    855		}
    856		/*
    857		 * parent page is not full - insert in parent page
    858		 */
    859		else {
    860			/*
    861			 * insert router entry in parent for the right child
    862			 * page from the first entry of the right child page:
    863			 */
    864			/*
    865			 * acquire a transaction lock on the parent page;
    866			 *
    867			 * action: router xad insertion;
    868			 */
    869			BT_MARK_DIRTY(smp, ip);
    870
    871			/*
    872			 * if insert into middle, shift right remaining entries
    873			 */
    874			if (skip < nextindex)
    875				memmove(&sp->xad[skip + 1], &sp->xad[skip],
    876					(nextindex -
    877					 skip) << L2XTSLOTSIZE);
    878
    879			/* insert the router entry */
    880			xad = &sp->xad[skip];
    881			XT_PUTENTRY(xad, XAD_NEW,
    882				    offsetXAD(&rcp->xad[XTENTRYSTART]),
    883				    JFS_SBI(ip->i_sb)->nbperpage, rcbn);
    884
    885			/* advance next available entry index. */
    886			le16_add_cpu(&sp->header.nextindex, 1);
    887
    888			/* Don't log it if there are no links to the file */
    889			if (!test_cflag(COMMIT_Nolink, ip)) {
    890				tlck = txLock(tid, ip, smp,
    891					      tlckXTREE | tlckGROW);
    892				xtlck = (struct xtlock *) & tlck->lock;
    893				xtlck->lwm.offset = (xtlck->lwm.offset) ?
    894				    min(skip, (int)xtlck->lwm.offset) : skip;
    895				xtlck->lwm.length =
    896				    le16_to_cpu(sp->header.nextindex) -
    897				    xtlck->lwm.offset;
    898			}
    899
    900			/* unpin parent page */
    901			XT_PUTPAGE(smp);
    902
    903			/* exit propagate up */
    904			break;
    905		}
    906	}
    907
    908	/* unpin current right page */
    909	XT_PUTPAGE(rmp);
    910
    911	return 0;
    912}
    913
    914
    915/*
    916 *	xtSplitPage()
    917 *
    918 * function:
    919 *	split a full non-root page into
    920 *	original/split/left page and new right page
    921 *	i.e., the original/split page remains as left page.
    922 *
    923 * parameter:
    924 *	int		tid,
    925 *	struct inode	*ip,
    926 *	struct xtsplit	*split,
    927 *	struct metapage	**rmpp,
    928 *	u64		*rbnp,
    929 *
    930 * return:
    931 *	Pointer to page in which to insert or NULL on error.
    932 */
    933static int
    934xtSplitPage(tid_t tid, struct inode *ip,
    935	    struct xtsplit * split, struct metapage ** rmpp, s64 * rbnp)
    936{
    937	int rc = 0;
    938	struct metapage *smp;
    939	xtpage_t *sp;
    940	struct metapage *rmp;
    941	xtpage_t *rp;		/* new right page allocated */
    942	s64 rbn;		/* new right page block number */
    943	struct metapage *mp;
    944	xtpage_t *p;
    945	s64 nextbn;
    946	int skip, maxentry, middle, righthalf, n;
    947	xad_t *xad;
    948	struct pxdlist *pxdlist;
    949	pxd_t *pxd;
    950	struct tlock *tlck;
    951	struct xtlock *sxtlck = NULL, *rxtlck = NULL;
    952	int quota_allocation = 0;
    953
    954	smp = split->mp;
    955	sp = XT_PAGE(ip, smp);
    956
    957	INCREMENT(xtStat.split);
    958
    959	pxdlist = split->pxdlist;
    960	pxd = &pxdlist->pxd[pxdlist->npxd];
    961	pxdlist->npxd++;
    962	rbn = addressPXD(pxd);
    963
    964	/* Allocate blocks to quota. */
    965	rc = dquot_alloc_block(ip, lengthPXD(pxd));
    966	if (rc)
    967		goto clean_up;
    968
    969	quota_allocation += lengthPXD(pxd);
    970
    971	/*
    972	 * allocate the new right page for the split
    973	 */
    974	rmp = get_metapage(ip, rbn, PSIZE, 1);
    975	if (rmp == NULL) {
    976		rc = -EIO;
    977		goto clean_up;
    978	}
    979
    980	jfs_info("xtSplitPage: ip:0x%p smp:0x%p rmp:0x%p", ip, smp, rmp);
    981
    982	BT_MARK_DIRTY(rmp, ip);
    983	/*
    984	 * action: new page;
    985	 */
    986
    987	rp = (xtpage_t *) rmp->data;
    988	rp->header.self = *pxd;
    989	rp->header.flag = sp->header.flag & BT_TYPE;
    990	rp->header.maxentry = sp->header.maxentry;	/* little-endian */
    991	rp->header.nextindex = cpu_to_le16(XTENTRYSTART);
    992
    993	BT_MARK_DIRTY(smp, ip);
    994	/* Don't log it if there are no links to the file */
    995	if (!test_cflag(COMMIT_Nolink, ip)) {
    996		/*
    997		 * acquire a transaction lock on the new right page;
    998		 */
    999		tlck = txLock(tid, ip, rmp, tlckXTREE | tlckNEW);
   1000		rxtlck = (struct xtlock *) & tlck->lock;
   1001		rxtlck->lwm.offset = XTENTRYSTART;
   1002		/*
   1003		 * acquire a transaction lock on the split page
   1004		 */
   1005		tlck = txLock(tid, ip, smp, tlckXTREE | tlckGROW);
   1006		sxtlck = (struct xtlock *) & tlck->lock;
   1007	}
   1008
   1009	/*
   1010	 * initialize/update sibling pointers of <sp> and <rp>
   1011	 */
   1012	nextbn = le64_to_cpu(sp->header.next);
   1013	rp->header.next = cpu_to_le64(nextbn);
   1014	rp->header.prev = cpu_to_le64(addressPXD(&sp->header.self));
   1015	sp->header.next = cpu_to_le64(rbn);
   1016
   1017	skip = split->index;
   1018
   1019	/*
   1020	 *	sequential append at tail (after last entry of last page)
   1021	 *
   1022	 * if splitting the last page on a level because of appending
   1023	 * a entry to it (skip is maxentry), it's likely that the access is
   1024	 * sequential. adding an empty page on the side of the level is less
   1025	 * work and can push the fill factor much higher than normal.
   1026	 * if we're wrong it's no big deal -  we will do the split the right
   1027	 * way next time.
   1028	 * (it may look like it's equally easy to do a similar hack for
   1029	 * reverse sorted data, that is, split the tree left, but it's not.
   1030	 * Be my guest.)
   1031	 */
   1032	if (nextbn == 0 && skip == le16_to_cpu(sp->header.maxentry)) {
   1033		/*
   1034		 * acquire a transaction lock on the new/right page;
   1035		 *
   1036		 * action: xad insertion;
   1037		 */
   1038		/* insert entry at the first entry of the new right page */
   1039		xad = &rp->xad[XTENTRYSTART];
   1040		XT_PUTENTRY(xad, split->flag, split->off, split->len,
   1041			    split->addr);
   1042
   1043		rp->header.nextindex = cpu_to_le16(XTENTRYSTART + 1);
   1044
   1045		if (!test_cflag(COMMIT_Nolink, ip)) {
   1046			/* rxtlck->lwm.offset = XTENTRYSTART; */
   1047			rxtlck->lwm.length = 1;
   1048		}
   1049
   1050		*rmpp = rmp;
   1051		*rbnp = rbn;
   1052
   1053		jfs_info("xtSplitPage: sp:0x%p rp:0x%p", sp, rp);
   1054		return 0;
   1055	}
   1056
   1057	/*
   1058	 *	non-sequential insert (at possibly middle page)
   1059	 */
   1060
   1061	/*
   1062	 * update previous pointer of old next/right page of <sp>
   1063	 */
   1064	if (nextbn != 0) {
   1065		XT_GETPAGE(ip, nextbn, mp, PSIZE, p, rc);
   1066		if (rc) {
   1067			XT_PUTPAGE(rmp);
   1068			goto clean_up;
   1069		}
   1070
   1071		BT_MARK_DIRTY(mp, ip);
   1072		/*
   1073		 * acquire a transaction lock on the next page;
   1074		 *
   1075		 * action:sibling pointer update;
   1076		 */
   1077		if (!test_cflag(COMMIT_Nolink, ip))
   1078			tlck = txLock(tid, ip, mp, tlckXTREE | tlckRELINK);
   1079
   1080		p->header.prev = cpu_to_le64(rbn);
   1081
   1082		/* sibling page may have been updated previously, or
   1083		 * it may be updated later;
   1084		 */
   1085
   1086		XT_PUTPAGE(mp);
   1087	}
   1088
   1089	/*
   1090	 * split the data between the split and new/right pages
   1091	 */
   1092	maxentry = le16_to_cpu(sp->header.maxentry);
   1093	middle = maxentry >> 1;
   1094	righthalf = maxentry - middle;
   1095
   1096	/*
   1097	 * skip index in old split/left page - insert into left page:
   1098	 */
   1099	if (skip <= middle) {
   1100		/* move right half of split page to the new right page */
   1101		memmove(&rp->xad[XTENTRYSTART], &sp->xad[middle],
   1102			righthalf << L2XTSLOTSIZE);
   1103
   1104		/* shift right tail of left half to make room for new entry */
   1105		if (skip < middle)
   1106			memmove(&sp->xad[skip + 1], &sp->xad[skip],
   1107				(middle - skip) << L2XTSLOTSIZE);
   1108
   1109		/* insert new entry */
   1110		xad = &sp->xad[skip];
   1111		XT_PUTENTRY(xad, split->flag, split->off, split->len,
   1112			    split->addr);
   1113
   1114		/* update page header */
   1115		sp->header.nextindex = cpu_to_le16(middle + 1);
   1116		if (!test_cflag(COMMIT_Nolink, ip)) {
   1117			sxtlck->lwm.offset = (sxtlck->lwm.offset) ?
   1118			    min(skip, (int)sxtlck->lwm.offset) : skip;
   1119		}
   1120
   1121		rp->header.nextindex =
   1122		    cpu_to_le16(XTENTRYSTART + righthalf);
   1123	}
   1124	/*
   1125	 * skip index in new right page - insert into right page:
   1126	 */
   1127	else {
   1128		/* move left head of right half to right page */
   1129		n = skip - middle;
   1130		memmove(&rp->xad[XTENTRYSTART], &sp->xad[middle],
   1131			n << L2XTSLOTSIZE);
   1132
   1133		/* insert new entry */
   1134		n += XTENTRYSTART;
   1135		xad = &rp->xad[n];
   1136		XT_PUTENTRY(xad, split->flag, split->off, split->len,
   1137			    split->addr);
   1138
   1139		/* move right tail of right half to right page */
   1140		if (skip < maxentry)
   1141			memmove(&rp->xad[n + 1], &sp->xad[skip],
   1142				(maxentry - skip) << L2XTSLOTSIZE);
   1143
   1144		/* update page header */
   1145		sp->header.nextindex = cpu_to_le16(middle);
   1146		if (!test_cflag(COMMIT_Nolink, ip)) {
   1147			sxtlck->lwm.offset = (sxtlck->lwm.offset) ?
   1148			    min(middle, (int)sxtlck->lwm.offset) : middle;
   1149		}
   1150
   1151		rp->header.nextindex = cpu_to_le16(XTENTRYSTART +
   1152						   righthalf + 1);
   1153	}
   1154
   1155	if (!test_cflag(COMMIT_Nolink, ip)) {
   1156		sxtlck->lwm.length = le16_to_cpu(sp->header.nextindex) -
   1157		    sxtlck->lwm.offset;
   1158
   1159		/* rxtlck->lwm.offset = XTENTRYSTART; */
   1160		rxtlck->lwm.length = le16_to_cpu(rp->header.nextindex) -
   1161		    XTENTRYSTART;
   1162	}
   1163
   1164	*rmpp = rmp;
   1165	*rbnp = rbn;
   1166
   1167	jfs_info("xtSplitPage: sp:0x%p rp:0x%p", sp, rp);
   1168	return rc;
   1169
   1170      clean_up:
   1171
   1172	/* Rollback quota allocation. */
   1173	if (quota_allocation)
   1174		dquot_free_block(ip, quota_allocation);
   1175
   1176	return (rc);
   1177}
   1178
   1179
   1180/*
   1181 *	xtSplitRoot()
   1182 *
   1183 * function:
   1184 *	split the full root page into original/root/split page and new
   1185 *	right page
   1186 *	i.e., root remains fixed in tree anchor (inode) and the root is
   1187 *	copied to a single new right child page since root page <<
   1188 *	non-root page, and the split root page contains a single entry
   1189 *	for the new right child page.
   1190 *
   1191 * parameter:
   1192 *	int		tid,
   1193 *	struct inode	*ip,
   1194 *	struct xtsplit	*split,
   1195 *	struct metapage	**rmpp)
   1196 *
   1197 * return:
   1198 *	Pointer to page in which to insert or NULL on error.
   1199 */
   1200static int
   1201xtSplitRoot(tid_t tid,
   1202	    struct inode *ip, struct xtsplit * split, struct metapage ** rmpp)
   1203{
   1204	xtpage_t *sp;
   1205	struct metapage *rmp;
   1206	xtpage_t *rp;
   1207	s64 rbn;
   1208	int skip, nextindex;
   1209	xad_t *xad;
   1210	pxd_t *pxd;
   1211	struct pxdlist *pxdlist;
   1212	struct tlock *tlck;
   1213	struct xtlock *xtlck;
   1214	int rc;
   1215
   1216	sp = &JFS_IP(ip)->i_xtroot;
   1217
   1218	INCREMENT(xtStat.split);
   1219
   1220	/*
   1221	 *	allocate a single (right) child page
   1222	 */
   1223	pxdlist = split->pxdlist;
   1224	pxd = &pxdlist->pxd[pxdlist->npxd];
   1225	pxdlist->npxd++;
   1226	rbn = addressPXD(pxd);
   1227	rmp = get_metapage(ip, rbn, PSIZE, 1);
   1228	if (rmp == NULL)
   1229		return -EIO;
   1230
   1231	/* Allocate blocks to quota. */
   1232	rc = dquot_alloc_block(ip, lengthPXD(pxd));
   1233	if (rc) {
   1234		release_metapage(rmp);
   1235		return rc;
   1236	}
   1237
   1238	jfs_info("xtSplitRoot: ip:0x%p rmp:0x%p", ip, rmp);
   1239
   1240	/*
   1241	 * acquire a transaction lock on the new right page;
   1242	 *
   1243	 * action: new page;
   1244	 */
   1245	BT_MARK_DIRTY(rmp, ip);
   1246
   1247	rp = (xtpage_t *) rmp->data;
   1248	rp->header.flag =
   1249	    (sp->header.flag & BT_LEAF) ? BT_LEAF : BT_INTERNAL;
   1250	rp->header.self = *pxd;
   1251	rp->header.nextindex = cpu_to_le16(XTENTRYSTART);
   1252	rp->header.maxentry = cpu_to_le16(PSIZE >> L2XTSLOTSIZE);
   1253
   1254	/* initialize sibling pointers */
   1255	rp->header.next = 0;
   1256	rp->header.prev = 0;
   1257
   1258	/*
   1259	 * copy the in-line root page into new right page extent
   1260	 */
   1261	nextindex = le16_to_cpu(sp->header.maxentry);
   1262	memmove(&rp->xad[XTENTRYSTART], &sp->xad[XTENTRYSTART],
   1263		(nextindex - XTENTRYSTART) << L2XTSLOTSIZE);
   1264
   1265	/*
   1266	 * insert the new entry into the new right/child page
   1267	 * (skip index in the new right page will not change)
   1268	 */
   1269	skip = split->index;
   1270	/* if insert into middle, shift right remaining entries */
   1271	if (skip != nextindex)
   1272		memmove(&rp->xad[skip + 1], &rp->xad[skip],
   1273			(nextindex - skip) * sizeof(xad_t));
   1274
   1275	xad = &rp->xad[skip];
   1276	XT_PUTENTRY(xad, split->flag, split->off, split->len, split->addr);
   1277
   1278	/* update page header */
   1279	rp->header.nextindex = cpu_to_le16(nextindex + 1);
   1280
   1281	if (!test_cflag(COMMIT_Nolink, ip)) {
   1282		tlck = txLock(tid, ip, rmp, tlckXTREE | tlckNEW);
   1283		xtlck = (struct xtlock *) & tlck->lock;
   1284		xtlck->lwm.offset = XTENTRYSTART;
   1285		xtlck->lwm.length = le16_to_cpu(rp->header.nextindex) -
   1286		    XTENTRYSTART;
   1287	}
   1288
   1289	/*
   1290	 *	reset the root
   1291	 *
   1292	 * init root with the single entry for the new right page
   1293	 * set the 1st entry offset to 0, which force the left-most key
   1294	 * at any level of the tree to be less than any search key.
   1295	 */
   1296	/*
   1297	 * acquire a transaction lock on the root page (in-memory inode);
   1298	 *
   1299	 * action: root split;
   1300	 */
   1301	BT_MARK_DIRTY(split->mp, ip);
   1302
   1303	xad = &sp->xad[XTENTRYSTART];
   1304	XT_PUTENTRY(xad, XAD_NEW, 0, JFS_SBI(ip->i_sb)->nbperpage, rbn);
   1305
   1306	/* update page header of root */
   1307	sp->header.flag &= ~BT_LEAF;
   1308	sp->header.flag |= BT_INTERNAL;
   1309
   1310	sp->header.nextindex = cpu_to_le16(XTENTRYSTART + 1);
   1311
   1312	if (!test_cflag(COMMIT_Nolink, ip)) {
   1313		tlck = txLock(tid, ip, split->mp, tlckXTREE | tlckGROW);
   1314		xtlck = (struct xtlock *) & tlck->lock;
   1315		xtlck->lwm.offset = XTENTRYSTART;
   1316		xtlck->lwm.length = 1;
   1317	}
   1318
   1319	*rmpp = rmp;
   1320
   1321	jfs_info("xtSplitRoot: sp:0x%p rp:0x%p", sp, rp);
   1322	return 0;
   1323}
   1324
   1325
   1326/*
   1327 *	xtExtend()
   1328 *
   1329 * function: extend in-place;
   1330 *
   1331 * note: existing extent may or may not have been committed.
   1332 * caller is responsible for pager buffer cache update, and
   1333 * working block allocation map update;
   1334 * update pmap: alloc whole extended extent;
   1335 */
   1336int xtExtend(tid_t tid,		/* transaction id */
   1337	     struct inode *ip, s64 xoff,	/* delta extent offset */
   1338	     s32 xlen,		/* delta extent length */
   1339	     int flag)
   1340{
   1341	int rc = 0;
   1342	int cmp;
   1343	struct metapage *mp;	/* meta-page buffer */
   1344	xtpage_t *p;		/* base B+-tree index page */
   1345	s64 bn;
   1346	int index, nextindex, len;
   1347	struct btstack btstack;	/* traverse stack */
   1348	struct xtsplit split;	/* split information */
   1349	xad_t *xad;
   1350	s64 xaddr;
   1351	struct tlock *tlck;
   1352	struct xtlock *xtlck = NULL;
   1353
   1354	jfs_info("xtExtend: nxoff:0x%lx nxlen:0x%x", (ulong) xoff, xlen);
   1355
   1356	/* there must exist extent to be extended */
   1357	if ((rc = xtSearch(ip, xoff - 1, NULL, &cmp, &btstack, XT_INSERT)))
   1358		return rc;
   1359
   1360	/* retrieve search result */
   1361	XT_GETSEARCH(ip, btstack.top, bn, mp, p, index);
   1362
   1363	if (cmp != 0) {
   1364		XT_PUTPAGE(mp);
   1365		jfs_error(ip->i_sb, "xtSearch did not find extent\n");
   1366		return -EIO;
   1367	}
   1368
   1369	/* extension must be contiguous */
   1370	xad = &p->xad[index];
   1371	if ((offsetXAD(xad) + lengthXAD(xad)) != xoff) {
   1372		XT_PUTPAGE(mp);
   1373		jfs_error(ip->i_sb, "extension is not contiguous\n");
   1374		return -EIO;
   1375	}
   1376
   1377	/*
   1378	 * acquire a transaction lock on the leaf page;
   1379	 *
   1380	 * action: xad insertion/extension;
   1381	 */
   1382	BT_MARK_DIRTY(mp, ip);
   1383	if (!test_cflag(COMMIT_Nolink, ip)) {
   1384		tlck = txLock(tid, ip, mp, tlckXTREE | tlckGROW);
   1385		xtlck = (struct xtlock *) & tlck->lock;
   1386	}
   1387
   1388	/* extend will overflow extent ? */
   1389	xlen = lengthXAD(xad) + xlen;
   1390	if ((len = xlen - MAXXLEN) <= 0)
   1391		goto extendOld;
   1392
   1393	/*
   1394	 *	extent overflow: insert entry for new extent
   1395	 */
   1396//insertNew:
   1397	xoff = offsetXAD(xad) + MAXXLEN;
   1398	xaddr = addressXAD(xad) + MAXXLEN;
   1399	nextindex = le16_to_cpu(p->header.nextindex);
   1400
   1401	/*
   1402	 *	if the leaf page is full, insert the new entry and
   1403	 *	propagate up the router entry for the new page from split
   1404	 *
   1405	 * The xtSplitUp() will insert the entry and unpin the leaf page.
   1406	 */
   1407	if (nextindex == le16_to_cpu(p->header.maxentry)) {
   1408		/* xtSpliUp() unpins leaf pages */
   1409		split.mp = mp;
   1410		split.index = index + 1;
   1411		split.flag = XAD_NEW;
   1412		split.off = xoff;	/* split offset */
   1413		split.len = len;
   1414		split.addr = xaddr;
   1415		split.pxdlist = NULL;
   1416		if ((rc = xtSplitUp(tid, ip, &split, &btstack)))
   1417			return rc;
   1418
   1419		/* get back old page */
   1420		XT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
   1421		if (rc)
   1422			return rc;
   1423		/*
   1424		 * if leaf root has been split, original root has been
   1425		 * copied to new child page, i.e., original entry now
   1426		 * resides on the new child page;
   1427		 */
   1428		if (p->header.flag & BT_INTERNAL) {
   1429			ASSERT(p->header.nextindex ==
   1430			       cpu_to_le16(XTENTRYSTART + 1));
   1431			xad = &p->xad[XTENTRYSTART];
   1432			bn = addressXAD(xad);
   1433			XT_PUTPAGE(mp);
   1434
   1435			/* get new child page */
   1436			XT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
   1437			if (rc)
   1438				return rc;
   1439
   1440			BT_MARK_DIRTY(mp, ip);
   1441			if (!test_cflag(COMMIT_Nolink, ip)) {
   1442				tlck = txLock(tid, ip, mp, tlckXTREE|tlckGROW);
   1443				xtlck = (struct xtlock *) & tlck->lock;
   1444			}
   1445		}
   1446	}
   1447	/*
   1448	 *	insert the new entry into the leaf page
   1449	 */
   1450	else {
   1451		/* insert the new entry: mark the entry NEW */
   1452		xad = &p->xad[index + 1];
   1453		XT_PUTENTRY(xad, XAD_NEW, xoff, len, xaddr);
   1454
   1455		/* advance next available entry index */
   1456		le16_add_cpu(&p->header.nextindex, 1);
   1457	}
   1458
   1459	/* get back old entry */
   1460	xad = &p->xad[index];
   1461	xlen = MAXXLEN;
   1462
   1463	/*
   1464	 * extend old extent
   1465	 */
   1466      extendOld:
   1467	XADlength(xad, xlen);
   1468	if (!(xad->flag & XAD_NEW))
   1469		xad->flag |= XAD_EXTENDED;
   1470
   1471	if (!test_cflag(COMMIT_Nolink, ip)) {
   1472		xtlck->lwm.offset =
   1473		    (xtlck->lwm.offset) ? min(index,
   1474					      (int)xtlck->lwm.offset) : index;
   1475		xtlck->lwm.length =
   1476		    le16_to_cpu(p->header.nextindex) - xtlck->lwm.offset;
   1477	}
   1478
   1479	/* unpin the leaf page */
   1480	XT_PUTPAGE(mp);
   1481
   1482	return rc;
   1483}
   1484
   1485/*
   1486 *	xtUpdate()
   1487 *
   1488 * function: update XAD;
   1489 *
   1490 *	update extent for allocated_but_not_recorded or
   1491 *	compressed extent;
   1492 *
   1493 * parameter:
   1494 *	nxad	- new XAD;
   1495 *		logical extent of the specified XAD must be completely
   1496 *		contained by an existing XAD;
   1497 */
   1498int xtUpdate(tid_t tid, struct inode *ip, xad_t * nxad)
   1499{				/* new XAD */
   1500	int rc = 0;
   1501	int cmp;
   1502	struct metapage *mp;	/* meta-page buffer */
   1503	xtpage_t *p;		/* base B+-tree index page */
   1504	s64 bn;
   1505	int index0, index, newindex, nextindex;
   1506	struct btstack btstack;	/* traverse stack */
   1507	struct xtsplit split;	/* split information */
   1508	xad_t *xad, *lxad, *rxad;
   1509	int xflag;
   1510	s64 nxoff, xoff;
   1511	int nxlen, xlen, lxlen, rxlen;
   1512	s64 nxaddr, xaddr;
   1513	struct tlock *tlck;
   1514	struct xtlock *xtlck = NULL;
   1515	int newpage = 0;
   1516
   1517	/* there must exist extent to be tailgated */
   1518	nxoff = offsetXAD(nxad);
   1519	nxlen = lengthXAD(nxad);
   1520	nxaddr = addressXAD(nxad);
   1521
   1522	if ((rc = xtSearch(ip, nxoff, NULL, &cmp, &btstack, XT_INSERT)))
   1523		return rc;
   1524
   1525	/* retrieve search result */
   1526	XT_GETSEARCH(ip, btstack.top, bn, mp, p, index0);
   1527
   1528	if (cmp != 0) {
   1529		XT_PUTPAGE(mp);
   1530		jfs_error(ip->i_sb, "Could not find extent\n");
   1531		return -EIO;
   1532	}
   1533
   1534	BT_MARK_DIRTY(mp, ip);
   1535	/*
   1536	 * acquire tlock of the leaf page containing original entry
   1537	 */
   1538	if (!test_cflag(COMMIT_Nolink, ip)) {
   1539		tlck = txLock(tid, ip, mp, tlckXTREE | tlckGROW);
   1540		xtlck = (struct xtlock *) & tlck->lock;
   1541	}
   1542
   1543	xad = &p->xad[index0];
   1544	xflag = xad->flag;
   1545	xoff = offsetXAD(xad);
   1546	xlen = lengthXAD(xad);
   1547	xaddr = addressXAD(xad);
   1548
   1549	/* nXAD must be completely contained within XAD */
   1550	if ((xoff > nxoff) ||
   1551	    (nxoff + nxlen > xoff + xlen)) {
   1552		XT_PUTPAGE(mp);
   1553		jfs_error(ip->i_sb,
   1554			  "nXAD in not completely contained within XAD\n");
   1555		return -EIO;
   1556	}
   1557
   1558	index = index0;
   1559	newindex = index + 1;
   1560	nextindex = le16_to_cpu(p->header.nextindex);
   1561
   1562	if (xoff < nxoff)
   1563		goto coalesceRight;
   1564
   1565	/*
   1566	 * coalesce with left XAD
   1567	 */
   1568	/* is XAD first entry of page ? */
   1569	if (index == XTENTRYSTART)
   1570		goto replace;
   1571
   1572	/* is nXAD logically and physically contiguous with lXAD ? */
   1573	lxad = &p->xad[index - 1];
   1574	lxlen = lengthXAD(lxad);
   1575	if (!(lxad->flag & XAD_NOTRECORDED) &&
   1576	    (nxoff == offsetXAD(lxad) + lxlen) &&
   1577	    (nxaddr == addressXAD(lxad) + lxlen) &&
   1578	    (lxlen + nxlen < MAXXLEN)) {
   1579		/* extend right lXAD */
   1580		index0 = index - 1;
   1581		XADlength(lxad, lxlen + nxlen);
   1582
   1583		/* If we just merged two extents together, need to make sure the
   1584		 * right extent gets logged.  If the left one is marked XAD_NEW,
   1585		 * then we know it will be logged.  Otherwise, mark as
   1586		 * XAD_EXTENDED
   1587		 */
   1588		if (!(lxad->flag & XAD_NEW))
   1589			lxad->flag |= XAD_EXTENDED;
   1590
   1591		if (xlen > nxlen) {
   1592			/* truncate XAD */
   1593			XADoffset(xad, xoff + nxlen);
   1594			XADlength(xad, xlen - nxlen);
   1595			XADaddress(xad, xaddr + nxlen);
   1596			goto out;
   1597		} else {	/* (xlen == nxlen) */
   1598
   1599			/* remove XAD */
   1600			if (index < nextindex - 1)
   1601				memmove(&p->xad[index], &p->xad[index + 1],
   1602					(nextindex - index -
   1603					 1) << L2XTSLOTSIZE);
   1604
   1605			p->header.nextindex =
   1606			    cpu_to_le16(le16_to_cpu(p->header.nextindex) -
   1607					1);
   1608
   1609			index = index0;
   1610			newindex = index + 1;
   1611			nextindex = le16_to_cpu(p->header.nextindex);
   1612			xoff = nxoff = offsetXAD(lxad);
   1613			xlen = nxlen = lxlen + nxlen;
   1614			xaddr = nxaddr = addressXAD(lxad);
   1615			goto coalesceRight;
   1616		}
   1617	}
   1618
   1619	/*
   1620	 * replace XAD with nXAD
   1621	 */
   1622      replace:			/* (nxoff == xoff) */
   1623	if (nxlen == xlen) {
   1624		/* replace XAD with nXAD:recorded */
   1625		*xad = *nxad;
   1626		xad->flag = xflag & ~XAD_NOTRECORDED;
   1627
   1628		goto coalesceRight;
   1629	} else			/* (nxlen < xlen) */
   1630		goto updateLeft;
   1631
   1632	/*
   1633	 * coalesce with right XAD
   1634	 */
   1635      coalesceRight:		/* (xoff <= nxoff) */
   1636	/* is XAD last entry of page ? */
   1637	if (newindex == nextindex) {
   1638		if (xoff == nxoff)
   1639			goto out;
   1640		goto updateRight;
   1641	}
   1642
   1643	/* is nXAD logically and physically contiguous with rXAD ? */
   1644	rxad = &p->xad[index + 1];
   1645	rxlen = lengthXAD(rxad);
   1646	if (!(rxad->flag & XAD_NOTRECORDED) &&
   1647	    (nxoff + nxlen == offsetXAD(rxad)) &&
   1648	    (nxaddr + nxlen == addressXAD(rxad)) &&
   1649	    (rxlen + nxlen < MAXXLEN)) {
   1650		/* extend left rXAD */
   1651		XADoffset(rxad, nxoff);
   1652		XADlength(rxad, rxlen + nxlen);
   1653		XADaddress(rxad, nxaddr);
   1654
   1655		/* If we just merged two extents together, need to make sure
   1656		 * the left extent gets logged.  If the right one is marked
   1657		 * XAD_NEW, then we know it will be logged.  Otherwise, mark as
   1658		 * XAD_EXTENDED
   1659		 */
   1660		if (!(rxad->flag & XAD_NEW))
   1661			rxad->flag |= XAD_EXTENDED;
   1662
   1663		if (xlen > nxlen)
   1664			/* truncate XAD */
   1665			XADlength(xad, xlen - nxlen);
   1666		else {		/* (xlen == nxlen) */
   1667
   1668			/* remove XAD */
   1669			memmove(&p->xad[index], &p->xad[index + 1],
   1670				(nextindex - index - 1) << L2XTSLOTSIZE);
   1671
   1672			p->header.nextindex =
   1673			    cpu_to_le16(le16_to_cpu(p->header.nextindex) -
   1674					1);
   1675		}
   1676
   1677		goto out;
   1678	} else if (xoff == nxoff)
   1679		goto out;
   1680
   1681	if (xoff >= nxoff) {
   1682		XT_PUTPAGE(mp);
   1683		jfs_error(ip->i_sb, "xoff >= nxoff\n");
   1684		return -EIO;
   1685	}
   1686
   1687	/*
   1688	 * split XAD into (lXAD, nXAD):
   1689	 *
   1690	 *          |---nXAD--->
   1691	 * --|----------XAD----------|--
   1692	 *   |-lXAD-|
   1693	 */
   1694      updateRight:		/* (xoff < nxoff) */
   1695	/* truncate old XAD as lXAD:not_recorded */
   1696	xad = &p->xad[index];
   1697	XADlength(xad, nxoff - xoff);
   1698
   1699	/* insert nXAD:recorded */
   1700	if (nextindex == le16_to_cpu(p->header.maxentry)) {
   1701
   1702		/* xtSpliUp() unpins leaf pages */
   1703		split.mp = mp;
   1704		split.index = newindex;
   1705		split.flag = xflag & ~XAD_NOTRECORDED;
   1706		split.off = nxoff;
   1707		split.len = nxlen;
   1708		split.addr = nxaddr;
   1709		split.pxdlist = NULL;
   1710		if ((rc = xtSplitUp(tid, ip, &split, &btstack)))
   1711			return rc;
   1712
   1713		/* get back old page */
   1714		XT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
   1715		if (rc)
   1716			return rc;
   1717		/*
   1718		 * if leaf root has been split, original root has been
   1719		 * copied to new child page, i.e., original entry now
   1720		 * resides on the new child page;
   1721		 */
   1722		if (p->header.flag & BT_INTERNAL) {
   1723			ASSERT(p->header.nextindex ==
   1724			       cpu_to_le16(XTENTRYSTART + 1));
   1725			xad = &p->xad[XTENTRYSTART];
   1726			bn = addressXAD(xad);
   1727			XT_PUTPAGE(mp);
   1728
   1729			/* get new child page */
   1730			XT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
   1731			if (rc)
   1732				return rc;
   1733
   1734			BT_MARK_DIRTY(mp, ip);
   1735			if (!test_cflag(COMMIT_Nolink, ip)) {
   1736				tlck = txLock(tid, ip, mp, tlckXTREE|tlckGROW);
   1737				xtlck = (struct xtlock *) & tlck->lock;
   1738			}
   1739		} else {
   1740			/* is nXAD on new page ? */
   1741			if (newindex >
   1742			    (le16_to_cpu(p->header.maxentry) >> 1)) {
   1743				newindex =
   1744				    newindex -
   1745				    le16_to_cpu(p->header.nextindex) +
   1746				    XTENTRYSTART;
   1747				newpage = 1;
   1748			}
   1749		}
   1750	} else {
   1751		/* if insert into middle, shift right remaining entries */
   1752		if (newindex < nextindex)
   1753			memmove(&p->xad[newindex + 1], &p->xad[newindex],
   1754				(nextindex - newindex) << L2XTSLOTSIZE);
   1755
   1756		/* insert the entry */
   1757		xad = &p->xad[newindex];
   1758		*xad = *nxad;
   1759		xad->flag = xflag & ~XAD_NOTRECORDED;
   1760
   1761		/* advance next available entry index. */
   1762		p->header.nextindex =
   1763		    cpu_to_le16(le16_to_cpu(p->header.nextindex) + 1);
   1764	}
   1765
   1766	/*
   1767	 * does nXAD force 3-way split ?
   1768	 *
   1769	 *          |---nXAD--->|
   1770	 * --|----------XAD-------------|--
   1771	 *   |-lXAD-|           |-rXAD -|
   1772	 */
   1773	if (nxoff + nxlen == xoff + xlen)
   1774		goto out;
   1775
   1776	/* reorient nXAD as XAD for further split XAD into (nXAD, rXAD) */
   1777	if (newpage) {
   1778		/* close out old page */
   1779		if (!test_cflag(COMMIT_Nolink, ip)) {
   1780			xtlck->lwm.offset = (xtlck->lwm.offset) ?
   1781			    min(index0, (int)xtlck->lwm.offset) : index0;
   1782			xtlck->lwm.length =
   1783			    le16_to_cpu(p->header.nextindex) -
   1784			    xtlck->lwm.offset;
   1785		}
   1786
   1787		bn = le64_to_cpu(p->header.next);
   1788		XT_PUTPAGE(mp);
   1789
   1790		/* get new right page */
   1791		XT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
   1792		if (rc)
   1793			return rc;
   1794
   1795		BT_MARK_DIRTY(mp, ip);
   1796		if (!test_cflag(COMMIT_Nolink, ip)) {
   1797			tlck = txLock(tid, ip, mp, tlckXTREE | tlckGROW);
   1798			xtlck = (struct xtlock *) & tlck->lock;
   1799		}
   1800
   1801		index0 = index = newindex;
   1802	} else
   1803		index++;
   1804
   1805	newindex = index + 1;
   1806	nextindex = le16_to_cpu(p->header.nextindex);
   1807	xlen = xlen - (nxoff - xoff);
   1808	xoff = nxoff;
   1809	xaddr = nxaddr;
   1810
   1811	/* recompute split pages */
   1812	if (nextindex == le16_to_cpu(p->header.maxentry)) {
   1813		XT_PUTPAGE(mp);
   1814
   1815		if ((rc = xtSearch(ip, nxoff, NULL, &cmp, &btstack, XT_INSERT)))
   1816			return rc;
   1817
   1818		/* retrieve search result */
   1819		XT_GETSEARCH(ip, btstack.top, bn, mp, p, index0);
   1820
   1821		if (cmp != 0) {
   1822			XT_PUTPAGE(mp);
   1823			jfs_error(ip->i_sb, "xtSearch failed\n");
   1824			return -EIO;
   1825		}
   1826
   1827		if (index0 != index) {
   1828			XT_PUTPAGE(mp);
   1829			jfs_error(ip->i_sb, "unexpected value of index\n");
   1830			return -EIO;
   1831		}
   1832	}
   1833
   1834	/*
   1835	 * split XAD into (nXAD, rXAD)
   1836	 *
   1837	 *          ---nXAD---|
   1838	 * --|----------XAD----------|--
   1839	 *                    |-rXAD-|
   1840	 */
   1841      updateLeft:		/* (nxoff == xoff) && (nxlen < xlen) */
   1842	/* update old XAD with nXAD:recorded */
   1843	xad = &p->xad[index];
   1844	*xad = *nxad;
   1845	xad->flag = xflag & ~XAD_NOTRECORDED;
   1846
   1847	/* insert rXAD:not_recorded */
   1848	xoff = xoff + nxlen;
   1849	xlen = xlen - nxlen;
   1850	xaddr = xaddr + nxlen;
   1851	if (nextindex == le16_to_cpu(p->header.maxentry)) {
   1852/*
   1853printf("xtUpdate.updateLeft.split p:0x%p\n", p);
   1854*/
   1855		/* xtSpliUp() unpins leaf pages */
   1856		split.mp = mp;
   1857		split.index = newindex;
   1858		split.flag = xflag;
   1859		split.off = xoff;
   1860		split.len = xlen;
   1861		split.addr = xaddr;
   1862		split.pxdlist = NULL;
   1863		if ((rc = xtSplitUp(tid, ip, &split, &btstack)))
   1864			return rc;
   1865
   1866		/* get back old page */
   1867		XT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
   1868		if (rc)
   1869			return rc;
   1870
   1871		/*
   1872		 * if leaf root has been split, original root has been
   1873		 * copied to new child page, i.e., original entry now
   1874		 * resides on the new child page;
   1875		 */
   1876		if (p->header.flag & BT_INTERNAL) {
   1877			ASSERT(p->header.nextindex ==
   1878			       cpu_to_le16(XTENTRYSTART + 1));
   1879			xad = &p->xad[XTENTRYSTART];
   1880			bn = addressXAD(xad);
   1881			XT_PUTPAGE(mp);
   1882
   1883			/* get new child page */
   1884			XT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
   1885			if (rc)
   1886				return rc;
   1887
   1888			BT_MARK_DIRTY(mp, ip);
   1889			if (!test_cflag(COMMIT_Nolink, ip)) {
   1890				tlck = txLock(tid, ip, mp, tlckXTREE|tlckGROW);
   1891				xtlck = (struct xtlock *) & tlck->lock;
   1892			}
   1893		}
   1894	} else {
   1895		/* if insert into middle, shift right remaining entries */
   1896		if (newindex < nextindex)
   1897			memmove(&p->xad[newindex + 1], &p->xad[newindex],
   1898				(nextindex - newindex) << L2XTSLOTSIZE);
   1899
   1900		/* insert the entry */
   1901		xad = &p->xad[newindex];
   1902		XT_PUTENTRY(xad, xflag, xoff, xlen, xaddr);
   1903
   1904		/* advance next available entry index. */
   1905		p->header.nextindex =
   1906		    cpu_to_le16(le16_to_cpu(p->header.nextindex) + 1);
   1907	}
   1908
   1909      out:
   1910	if (!test_cflag(COMMIT_Nolink, ip)) {
   1911		xtlck->lwm.offset = (xtlck->lwm.offset) ?
   1912		    min(index0, (int)xtlck->lwm.offset) : index0;
   1913		xtlck->lwm.length = le16_to_cpu(p->header.nextindex) -
   1914		    xtlck->lwm.offset;
   1915	}
   1916
   1917	/* unpin the leaf page */
   1918	XT_PUTPAGE(mp);
   1919
   1920	return rc;
   1921}
   1922
   1923
   1924/*
   1925 *	xtAppend()
   1926 *
   1927 * function: grow in append mode from contiguous region specified ;
   1928 *
   1929 * parameter:
   1930 *	tid		- transaction id;
   1931 *	ip		- file object;
   1932 *	xflag		- extent flag:
   1933 *	xoff		- extent offset;
   1934 *	maxblocks	- max extent length;
   1935 *	xlen		- extent length (in/out);
   1936 *	xaddrp		- extent address pointer (in/out):
   1937 *	flag		-
   1938 *
   1939 * return:
   1940 */
   1941int xtAppend(tid_t tid,		/* transaction id */
   1942	     struct inode *ip, int xflag, s64 xoff, s32 maxblocks,
   1943	     s32 * xlenp,	/* (in/out) */
   1944	     s64 * xaddrp,	/* (in/out) */
   1945	     int flag)
   1946{
   1947	int rc = 0;
   1948	struct metapage *mp;	/* meta-page buffer */
   1949	xtpage_t *p;		/* base B+-tree index page */
   1950	s64 bn, xaddr;
   1951	int index, nextindex;
   1952	struct btstack btstack;	/* traverse stack */
   1953	struct xtsplit split;	/* split information */
   1954	xad_t *xad;
   1955	int cmp;
   1956	struct tlock *tlck;
   1957	struct xtlock *xtlck;
   1958	int nsplit, nblocks, xlen;
   1959	struct pxdlist pxdlist;
   1960	pxd_t *pxd;
   1961	s64 next;
   1962
   1963	xaddr = *xaddrp;
   1964	xlen = *xlenp;
   1965	jfs_info("xtAppend: xoff:0x%lx maxblocks:%d xlen:%d xaddr:0x%lx",
   1966		 (ulong) xoff, maxblocks, xlen, (ulong) xaddr);
   1967
   1968	/*
   1969	 *	search for the entry location at which to insert:
   1970	 *
   1971	 * xtFastSearch() and xtSearch() both returns (leaf page
   1972	 * pinned, index at which to insert).
   1973	 * n.b. xtSearch() may return index of maxentry of
   1974	 * the full page.
   1975	 */
   1976	if ((rc = xtSearch(ip, xoff, &next, &cmp, &btstack, XT_INSERT)))
   1977		return rc;
   1978
   1979	/* retrieve search result */
   1980	XT_GETSEARCH(ip, btstack.top, bn, mp, p, index);
   1981
   1982	if (cmp == 0) {
   1983		rc = -EEXIST;
   1984		goto out;
   1985	}
   1986
   1987	if (next)
   1988		xlen = min(xlen, (int)(next - xoff));
   1989//insert:
   1990	/*
   1991	 *	insert entry for new extent
   1992	 */
   1993	xflag |= XAD_NEW;
   1994
   1995	/*
   1996	 *	if the leaf page is full, split the page and
   1997	 *	propagate up the router entry for the new page from split
   1998	 *
   1999	 * The xtSplitUp() will insert the entry and unpin the leaf page.
   2000	 */
   2001	nextindex = le16_to_cpu(p->header.nextindex);
   2002	if (nextindex < le16_to_cpu(p->header.maxentry))
   2003		goto insertLeaf;
   2004
   2005	/*
   2006	 * allocate new index blocks to cover index page split(s)
   2007	 */
   2008	nsplit = btstack.nsplit;
   2009	split.pxdlist = &pxdlist;
   2010	pxdlist.maxnpxd = pxdlist.npxd = 0;
   2011	pxd = &pxdlist.pxd[0];
   2012	nblocks = JFS_SBI(ip->i_sb)->nbperpage;
   2013	for (; nsplit > 0; nsplit--, pxd++, xaddr += nblocks, maxblocks -= nblocks) {
   2014		if ((rc = dbAllocBottomUp(ip, xaddr, (s64) nblocks)) == 0) {
   2015			PXDaddress(pxd, xaddr);
   2016			PXDlength(pxd, nblocks);
   2017
   2018			pxdlist.maxnpxd++;
   2019
   2020			continue;
   2021		}
   2022
   2023		/* undo allocation */
   2024
   2025		goto out;
   2026	}
   2027
   2028	xlen = min(xlen, maxblocks);
   2029
   2030	/*
   2031	 * allocate data extent requested
   2032	 */
   2033	if ((rc = dbAllocBottomUp(ip, xaddr, (s64) xlen)))
   2034		goto out;
   2035
   2036	split.mp = mp;
   2037	split.index = index;
   2038	split.flag = xflag;
   2039	split.off = xoff;
   2040	split.len = xlen;
   2041	split.addr = xaddr;
   2042	if ((rc = xtSplitUp(tid, ip, &split, &btstack))) {
   2043		/* undo data extent allocation */
   2044		dbFree(ip, *xaddrp, (s64) * xlenp);
   2045
   2046		return rc;
   2047	}
   2048
   2049	*xaddrp = xaddr;
   2050	*xlenp = xlen;
   2051	return 0;
   2052
   2053	/*
   2054	 *	insert the new entry into the leaf page
   2055	 */
   2056      insertLeaf:
   2057	/*
   2058	 * allocate data extent requested
   2059	 */
   2060	if ((rc = dbAllocBottomUp(ip, xaddr, (s64) xlen)))
   2061		goto out;
   2062
   2063	BT_MARK_DIRTY(mp, ip);
   2064	/*
   2065	 * acquire a transaction lock on the leaf page;
   2066	 *
   2067	 * action: xad insertion/extension;
   2068	 */
   2069	tlck = txLock(tid, ip, mp, tlckXTREE | tlckGROW);
   2070	xtlck = (struct xtlock *) & tlck->lock;
   2071
   2072	/* insert the new entry: mark the entry NEW */
   2073	xad = &p->xad[index];
   2074	XT_PUTENTRY(xad, xflag, xoff, xlen, xaddr);
   2075
   2076	/* advance next available entry index */
   2077	le16_add_cpu(&p->header.nextindex, 1);
   2078
   2079	xtlck->lwm.offset =
   2080	    (xtlck->lwm.offset) ? min(index,(int) xtlck->lwm.offset) : index;
   2081	xtlck->lwm.length = le16_to_cpu(p->header.nextindex) -
   2082	    xtlck->lwm.offset;
   2083
   2084	*xaddrp = xaddr;
   2085	*xlenp = xlen;
   2086
   2087      out:
   2088	/* unpin the leaf page */
   2089	XT_PUTPAGE(mp);
   2090
   2091	return rc;
   2092}
   2093
   2094/*
   2095 *	xtInitRoot()
   2096 *
   2097 * initialize file root (inline in inode)
   2098 */
   2099void xtInitRoot(tid_t tid, struct inode *ip)
   2100{
   2101	xtpage_t *p;
   2102
   2103	/*
   2104	 * acquire a transaction lock on the root
   2105	 *
   2106	 * action:
   2107	 */
   2108	txLock(tid, ip, (struct metapage *) &JFS_IP(ip)->bxflag,
   2109		      tlckXTREE | tlckNEW);
   2110	p = &JFS_IP(ip)->i_xtroot;
   2111
   2112	p->header.flag = DXD_INDEX | BT_ROOT | BT_LEAF;
   2113	p->header.nextindex = cpu_to_le16(XTENTRYSTART);
   2114
   2115	if (S_ISDIR(ip->i_mode))
   2116		p->header.maxentry = cpu_to_le16(XTROOTINITSLOT_DIR);
   2117	else {
   2118		p->header.maxentry = cpu_to_le16(XTROOTINITSLOT);
   2119		ip->i_size = 0;
   2120	}
   2121
   2122
   2123	return;
   2124}
   2125
   2126
   2127/*
   2128 * We can run into a deadlock truncating a file with a large number of
   2129 * xtree pages (large fragmented file).  A robust fix would entail a
   2130 * reservation system where we would reserve a number of metadata pages
   2131 * and tlocks which we would be guaranteed without a deadlock.  Without
   2132 * this, a partial fix is to limit number of metadata pages we will lock
   2133 * in a single transaction.  Currently we will truncate the file so that
   2134 * no more than 50 leaf pages will be locked.  The caller of xtTruncate
   2135 * will be responsible for ensuring that the current transaction gets
   2136 * committed, and that subsequent transactions are created to truncate
   2137 * the file further if needed.
   2138 */
   2139#define MAX_TRUNCATE_LEAVES 50
   2140
   2141/*
   2142 *	xtTruncate()
   2143 *
   2144 * function:
   2145 *	traverse for truncation logging backward bottom up;
   2146 *	terminate at the last extent entry at the current subtree
   2147 *	root page covering new down size.
   2148 *	truncation may occur within the last extent entry.
   2149 *
   2150 * parameter:
   2151 *	int		tid,
   2152 *	struct inode	*ip,
   2153 *	s64		newsize,
   2154 *	int		type)	{PWMAP, PMAP, WMAP; DELETE, TRUNCATE}
   2155 *
   2156 * return:
   2157 *
   2158 * note:
   2159 *	PWMAP:
   2160 *	 1. truncate (non-COMMIT_NOLINK file)
   2161 *	    by jfs_truncate() or jfs_open(O_TRUNC):
   2162 *	    xtree is updated;
   2163 *	 2. truncate index table of directory when last entry removed
   2164 *	map update via tlock at commit time;
   2165 *	PMAP:
   2166 *	 Call xtTruncate_pmap instead
   2167 *	WMAP:
   2168 *	 1. remove (free zero link count) on last reference release
   2169 *	    (pmap has been freed at commit zero link count);
   2170 *	 2. truncate (COMMIT_NOLINK file, i.e., tmp file):
   2171 *	    xtree is updated;
   2172 *	 map update directly at truncation time;
   2173 *
   2174 *	if (DELETE)
   2175 *		no LOG_NOREDOPAGE is required (NOREDOFILE is sufficient);
   2176 *	else if (TRUNCATE)
   2177 *		must write LOG_NOREDOPAGE for deleted index page;
   2178 *
   2179 * pages may already have been tlocked by anonymous transactions
   2180 * during file growth (i.e., write) before truncation;
   2181 *
   2182 * except last truncated entry, deleted entries remains as is
   2183 * in the page (nextindex is updated) for other use
   2184 * (e.g., log/update allocation map): this avoid copying the page
   2185 * info but delay free of pages;
   2186 *
   2187 */
   2188s64 xtTruncate(tid_t tid, struct inode *ip, s64 newsize, int flag)
   2189{
   2190	int rc = 0;
   2191	s64 teof;
   2192	struct metapage *mp;
   2193	xtpage_t *p;
   2194	s64 bn;
   2195	int index, nextindex;
   2196	xad_t *xad;
   2197	s64 xoff, xaddr;
   2198	int xlen, len, freexlen;
   2199	struct btstack btstack;
   2200	struct btframe *parent;
   2201	struct tblock *tblk = NULL;
   2202	struct tlock *tlck = NULL;
   2203	struct xtlock *xtlck = NULL;
   2204	struct xdlistlock xadlock;	/* maplock for COMMIT_WMAP */
   2205	struct pxd_lock *pxdlock;		/* maplock for COMMIT_WMAP */
   2206	s64 nfreed;
   2207	int freed, log;
   2208	int locked_leaves = 0;
   2209
   2210	/* save object truncation type */
   2211	if (tid) {
   2212		tblk = tid_to_tblock(tid);
   2213		tblk->xflag |= flag;
   2214	}
   2215
   2216	nfreed = 0;
   2217
   2218	flag &= COMMIT_MAP;
   2219	assert(flag != COMMIT_PMAP);
   2220
   2221	if (flag == COMMIT_PWMAP)
   2222		log = 1;
   2223	else {
   2224		log = 0;
   2225		xadlock.flag = mlckFREEXADLIST;
   2226		xadlock.index = 1;
   2227	}
   2228
   2229	/*
   2230	 * if the newsize is not an integral number of pages,
   2231	 * the file between newsize and next page boundary will
   2232	 * be cleared.
   2233	 * if truncating into a file hole, it will cause
   2234	 * a full block to be allocated for the logical block.
   2235	 */
   2236
   2237	/*
   2238	 * release page blocks of truncated region <teof, eof>
   2239	 *
   2240	 * free the data blocks from the leaf index blocks.
   2241	 * delete the parent index entries corresponding to
   2242	 * the freed child data/index blocks.
   2243	 * free the index blocks themselves which aren't needed
   2244	 * in new sized file.
   2245	 *
   2246	 * index blocks are updated only if the blocks are to be
   2247	 * retained in the new sized file.
   2248	 * if type is PMAP, the data and index pages are NOT
   2249	 * freed, and the data and index blocks are NOT freed
   2250	 * from working map.
   2251	 * (this will allow continued access of data/index of
   2252	 * temporary file (zerolink count file truncated to zero-length)).
   2253	 */
   2254	teof = (newsize + (JFS_SBI(ip->i_sb)->bsize - 1)) >>
   2255	    JFS_SBI(ip->i_sb)->l2bsize;
   2256
   2257	/* clear stack */
   2258	BT_CLR(&btstack);
   2259
   2260	/*
   2261	 * start with root
   2262	 *
   2263	 * root resides in the inode
   2264	 */
   2265	bn = 0;
   2266
   2267	/*
   2268	 * first access of each page:
   2269	 */
   2270      getPage:
   2271	XT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
   2272	if (rc)
   2273		return rc;
   2274
   2275	/* process entries backward from last index */
   2276	index = le16_to_cpu(p->header.nextindex) - 1;
   2277
   2278
   2279	/* Since this is the rightmost page at this level, and we may have
   2280	 * already freed a page that was formerly to the right, let's make
   2281	 * sure that the next pointer is zero.
   2282	 */
   2283	if (p->header.next) {
   2284		if (log)
   2285			/*
   2286			 * Make sure this change to the header is logged.
   2287			 * If we really truncate this leaf, the flag
   2288			 * will be changed to tlckTRUNCATE
   2289			 */
   2290			tlck = txLock(tid, ip, mp, tlckXTREE|tlckGROW);
   2291		BT_MARK_DIRTY(mp, ip);
   2292		p->header.next = 0;
   2293	}
   2294
   2295	if (p->header.flag & BT_INTERNAL)
   2296		goto getChild;
   2297
   2298	/*
   2299	 *	leaf page
   2300	 */
   2301	freed = 0;
   2302
   2303	/* does region covered by leaf page precede Teof ? */
   2304	xad = &p->xad[index];
   2305	xoff = offsetXAD(xad);
   2306	xlen = lengthXAD(xad);
   2307	if (teof >= xoff + xlen) {
   2308		XT_PUTPAGE(mp);
   2309		goto getParent;
   2310	}
   2311
   2312	/* (re)acquire tlock of the leaf page */
   2313	if (log) {
   2314		if (++locked_leaves > MAX_TRUNCATE_LEAVES) {
   2315			/*
   2316			 * We need to limit the size of the transaction
   2317			 * to avoid exhausting pagecache & tlocks
   2318			 */
   2319			XT_PUTPAGE(mp);
   2320			newsize = (xoff + xlen) << JFS_SBI(ip->i_sb)->l2bsize;
   2321			goto getParent;
   2322		}
   2323		tlck = txLock(tid, ip, mp, tlckXTREE);
   2324		tlck->type = tlckXTREE | tlckTRUNCATE;
   2325		xtlck = (struct xtlock *) & tlck->lock;
   2326		xtlck->hwm.offset = le16_to_cpu(p->header.nextindex) - 1;
   2327	}
   2328	BT_MARK_DIRTY(mp, ip);
   2329
   2330	/*
   2331	 * scan backward leaf page entries
   2332	 */
   2333	for (; index >= XTENTRYSTART; index--) {
   2334		xad = &p->xad[index];
   2335		xoff = offsetXAD(xad);
   2336		xlen = lengthXAD(xad);
   2337		xaddr = addressXAD(xad);
   2338
   2339		/*
   2340		 * The "data" for a directory is indexed by the block
   2341		 * device's address space.  This metadata must be invalidated
   2342		 * here
   2343		 */
   2344		if (S_ISDIR(ip->i_mode) && (teof == 0))
   2345			invalidate_xad_metapages(ip, *xad);
   2346		/*
   2347		 * entry beyond eof: continue scan of current page
   2348		 *          xad
   2349		 * ---|---=======------->
   2350		 *   eof
   2351		 */
   2352		if (teof < xoff) {
   2353			nfreed += xlen;
   2354			continue;
   2355		}
   2356
   2357		/*
   2358		 * (xoff <= teof): last entry to be deleted from page;
   2359		 * If other entries remain in page: keep and update the page.
   2360		 */
   2361
   2362		/*
   2363		 * eof == entry_start: delete the entry
   2364		 *           xad
   2365		 * -------|=======------->
   2366		 *       eof
   2367		 *
   2368		 */
   2369		if (teof == xoff) {
   2370			nfreed += xlen;
   2371
   2372			if (index == XTENTRYSTART)
   2373				break;
   2374
   2375			nextindex = index;
   2376		}
   2377		/*
   2378		 * eof within the entry: truncate the entry.
   2379		 *          xad
   2380		 * -------===|===------->
   2381		 *          eof
   2382		 */
   2383		else if (teof < xoff + xlen) {
   2384			/* update truncated entry */
   2385			len = teof - xoff;
   2386			freexlen = xlen - len;
   2387			XADlength(xad, len);
   2388
   2389			/* save pxd of truncated extent in tlck */
   2390			xaddr += len;
   2391			if (log) {	/* COMMIT_PWMAP */
   2392				xtlck->lwm.offset = (xtlck->lwm.offset) ?
   2393				    min(index, (int)xtlck->lwm.offset) : index;
   2394				xtlck->lwm.length = index + 1 -
   2395				    xtlck->lwm.offset;
   2396				xtlck->twm.offset = index;
   2397				pxdlock = (struct pxd_lock *) & xtlck->pxdlock;
   2398				pxdlock->flag = mlckFREEPXD;
   2399				PXDaddress(&pxdlock->pxd, xaddr);
   2400				PXDlength(&pxdlock->pxd, freexlen);
   2401			}
   2402			/* free truncated extent */
   2403			else {	/* COMMIT_WMAP */
   2404
   2405				pxdlock = (struct pxd_lock *) & xadlock;
   2406				pxdlock->flag = mlckFREEPXD;
   2407				PXDaddress(&pxdlock->pxd, xaddr);
   2408				PXDlength(&pxdlock->pxd, freexlen);
   2409				txFreeMap(ip, pxdlock, NULL, COMMIT_WMAP);
   2410
   2411				/* reset map lock */
   2412				xadlock.flag = mlckFREEXADLIST;
   2413			}
   2414
   2415			/* current entry is new last entry; */
   2416			nextindex = index + 1;
   2417
   2418			nfreed += freexlen;
   2419		}
   2420		/*
   2421		 * eof beyond the entry:
   2422		 *          xad
   2423		 * -------=======---|--->
   2424		 *                 eof
   2425		 */
   2426		else {		/* (xoff + xlen < teof) */
   2427
   2428			nextindex = index + 1;
   2429		}
   2430
   2431		if (nextindex < le16_to_cpu(p->header.nextindex)) {
   2432			if (!log) {	/* COMMIT_WAMP */
   2433				xadlock.xdlist = &p->xad[nextindex];
   2434				xadlock.count =
   2435				    le16_to_cpu(p->header.nextindex) -
   2436				    nextindex;
   2437				txFreeMap(ip, (struct maplock *) & xadlock,
   2438					  NULL, COMMIT_WMAP);
   2439			}
   2440			p->header.nextindex = cpu_to_le16(nextindex);
   2441		}
   2442
   2443		XT_PUTPAGE(mp);
   2444
   2445		/* assert(freed == 0); */
   2446		goto getParent;
   2447	}			/* end scan of leaf page entries */
   2448
   2449	freed = 1;
   2450
   2451	/*
   2452	 * leaf page become empty: free the page if type != PMAP
   2453	 */
   2454	if (log) {		/* COMMIT_PWMAP */
   2455		/* txCommit() with tlckFREE:
   2456		 * free data extents covered by leaf [XTENTRYSTART:hwm);
   2457		 * invalidate leaf if COMMIT_PWMAP;
   2458		 * if (TRUNCATE), will write LOG_NOREDOPAGE;
   2459		 */
   2460		tlck->type = tlckXTREE | tlckFREE;
   2461	} else {		/* COMMIT_WAMP */
   2462
   2463		/* free data extents covered by leaf */
   2464		xadlock.xdlist = &p->xad[XTENTRYSTART];
   2465		xadlock.count =
   2466		    le16_to_cpu(p->header.nextindex) - XTENTRYSTART;
   2467		txFreeMap(ip, (struct maplock *) & xadlock, NULL, COMMIT_WMAP);
   2468	}
   2469
   2470	if (p->header.flag & BT_ROOT) {
   2471		p->header.flag &= ~BT_INTERNAL;
   2472		p->header.flag |= BT_LEAF;
   2473		p->header.nextindex = cpu_to_le16(XTENTRYSTART);
   2474
   2475		XT_PUTPAGE(mp);	/* debug */
   2476		goto out;
   2477	} else {
   2478		if (log) {	/* COMMIT_PWMAP */
   2479			/* page will be invalidated at tx completion
   2480			 */
   2481			XT_PUTPAGE(mp);
   2482		} else {	/* COMMIT_WMAP */
   2483
   2484			if (mp->lid)
   2485				lid_to_tlock(mp->lid)->flag |= tlckFREELOCK;
   2486
   2487			/* invalidate empty leaf page */
   2488			discard_metapage(mp);
   2489		}
   2490	}
   2491
   2492	/*
   2493	 * the leaf page become empty: delete the parent entry
   2494	 * for the leaf page if the parent page is to be kept
   2495	 * in the new sized file.
   2496	 */
   2497
   2498	/*
   2499	 * go back up to the parent page
   2500	 */
   2501      getParent:
   2502	/* pop/restore parent entry for the current child page */
   2503	if ((parent = BT_POP(&btstack)) == NULL)
   2504		/* current page must have been root */
   2505		goto out;
   2506
   2507	/* get back the parent page */
   2508	bn = parent->bn;
   2509	XT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
   2510	if (rc)
   2511		return rc;
   2512
   2513	index = parent->index;
   2514
   2515	/*
   2516	 * child page was not empty:
   2517	 */
   2518	if (freed == 0) {
   2519		/* has any entry deleted from parent ? */
   2520		if (index < le16_to_cpu(p->header.nextindex) - 1) {
   2521			/* (re)acquire tlock on the parent page */
   2522			if (log) {	/* COMMIT_PWMAP */
   2523				/* txCommit() with tlckTRUNCATE:
   2524				 * free child extents covered by parent [);
   2525				 */
   2526				tlck = txLock(tid, ip, mp, tlckXTREE);
   2527				xtlck = (struct xtlock *) & tlck->lock;
   2528				if (!(tlck->type & tlckTRUNCATE)) {
   2529					xtlck->hwm.offset =
   2530					    le16_to_cpu(p->header.
   2531							nextindex) - 1;
   2532					tlck->type =
   2533					    tlckXTREE | tlckTRUNCATE;
   2534				}
   2535			} else {	/* COMMIT_WMAP */
   2536
   2537				/* free child extents covered by parent */
   2538				xadlock.xdlist = &p->xad[index + 1];
   2539				xadlock.count =
   2540				    le16_to_cpu(p->header.nextindex) -
   2541				    index - 1;
   2542				txFreeMap(ip, (struct maplock *) & xadlock,
   2543					  NULL, COMMIT_WMAP);
   2544			}
   2545			BT_MARK_DIRTY(mp, ip);
   2546
   2547			p->header.nextindex = cpu_to_le16(index + 1);
   2548		}
   2549		XT_PUTPAGE(mp);
   2550		goto getParent;
   2551	}
   2552
   2553	/*
   2554	 * child page was empty:
   2555	 */
   2556	nfreed += lengthXAD(&p->xad[index]);
   2557
   2558	/*
   2559	 * During working map update, child page's tlock must be handled
   2560	 * before parent's.  This is because the parent's tlock will cause
   2561	 * the child's disk space to be marked available in the wmap, so
   2562	 * it's important that the child page be released by that time.
   2563	 *
   2564	 * ToDo:  tlocks should be on doubly-linked list, so we can
   2565	 * quickly remove it and add it to the end.
   2566	 */
   2567
   2568	/*
   2569	 * Move parent page's tlock to the end of the tid's tlock list
   2570	 */
   2571	if (log && mp->lid && (tblk->last != mp->lid) &&
   2572	    lid_to_tlock(mp->lid)->tid) {
   2573		lid_t lid = mp->lid;
   2574		struct tlock *prev;
   2575
   2576		tlck = lid_to_tlock(lid);
   2577
   2578		if (tblk->next == lid)
   2579			tblk->next = tlck->next;
   2580		else {
   2581			for (prev = lid_to_tlock(tblk->next);
   2582			     prev->next != lid;
   2583			     prev = lid_to_tlock(prev->next)) {
   2584				assert(prev->next);
   2585			}
   2586			prev->next = tlck->next;
   2587		}
   2588		lid_to_tlock(tblk->last)->next = lid;
   2589		tlck->next = 0;
   2590		tblk->last = lid;
   2591	}
   2592
   2593	/*
   2594	 * parent page become empty: free the page
   2595	 */
   2596	if (index == XTENTRYSTART) {
   2597		if (log) {	/* COMMIT_PWMAP */
   2598			/* txCommit() with tlckFREE:
   2599			 * free child extents covered by parent;
   2600			 * invalidate parent if COMMIT_PWMAP;
   2601			 */
   2602			tlck = txLock(tid, ip, mp, tlckXTREE);
   2603			xtlck = (struct xtlock *) & tlck->lock;
   2604			xtlck->hwm.offset =
   2605			    le16_to_cpu(p->header.nextindex) - 1;
   2606			tlck->type = tlckXTREE | tlckFREE;
   2607		} else {	/* COMMIT_WMAP */
   2608
   2609			/* free child extents covered by parent */
   2610			xadlock.xdlist = &p->xad[XTENTRYSTART];
   2611			xadlock.count =
   2612			    le16_to_cpu(p->header.nextindex) -
   2613			    XTENTRYSTART;
   2614			txFreeMap(ip, (struct maplock *) & xadlock, NULL,
   2615				  COMMIT_WMAP);
   2616		}
   2617		BT_MARK_DIRTY(mp, ip);
   2618
   2619		if (p->header.flag & BT_ROOT) {
   2620			p->header.flag &= ~BT_INTERNAL;
   2621			p->header.flag |= BT_LEAF;
   2622			p->header.nextindex = cpu_to_le16(XTENTRYSTART);
   2623			if (le16_to_cpu(p->header.maxentry) == XTROOTMAXSLOT) {
   2624				/*
   2625				 * Shrink root down to allow inline
   2626				 * EA (otherwise fsck complains)
   2627				 */
   2628				p->header.maxentry =
   2629				    cpu_to_le16(XTROOTINITSLOT);
   2630				JFS_IP(ip)->mode2 |= INLINEEA;
   2631			}
   2632
   2633			XT_PUTPAGE(mp);	/* debug */
   2634			goto out;
   2635		} else {
   2636			if (log) {	/* COMMIT_PWMAP */
   2637				/* page will be invalidated at tx completion
   2638				 */
   2639				XT_PUTPAGE(mp);
   2640			} else {	/* COMMIT_WMAP */
   2641
   2642				if (mp->lid)
   2643					lid_to_tlock(mp->lid)->flag |=
   2644						tlckFREELOCK;
   2645
   2646				/* invalidate parent page */
   2647				discard_metapage(mp);
   2648			}
   2649
   2650			/* parent has become empty and freed:
   2651			 * go back up to its parent page
   2652			 */
   2653			/* freed = 1; */
   2654			goto getParent;
   2655		}
   2656	}
   2657	/*
   2658	 * parent page still has entries for front region;
   2659	 */
   2660	else {
   2661		/* try truncate region covered by preceding entry
   2662		 * (process backward)
   2663		 */
   2664		index--;
   2665
   2666		/* go back down to the child page corresponding
   2667		 * to the entry
   2668		 */
   2669		goto getChild;
   2670	}
   2671
   2672	/*
   2673	 *	internal page: go down to child page of current entry
   2674	 */
   2675      getChild:
   2676	/* save current parent entry for the child page */
   2677	if (BT_STACK_FULL(&btstack)) {
   2678		jfs_error(ip->i_sb, "stack overrun!\n");
   2679		XT_PUTPAGE(mp);
   2680		return -EIO;
   2681	}
   2682	BT_PUSH(&btstack, bn, index);
   2683
   2684	/* get child page */
   2685	xad = &p->xad[index];
   2686	bn = addressXAD(xad);
   2687
   2688	/*
   2689	 * first access of each internal entry:
   2690	 */
   2691	/* release parent page */
   2692	XT_PUTPAGE(mp);
   2693
   2694	/* process the child page */
   2695	goto getPage;
   2696
   2697      out:
   2698	/*
   2699	 * update file resource stat
   2700	 */
   2701	/* set size
   2702	 */
   2703	if (S_ISDIR(ip->i_mode) && !newsize)
   2704		ip->i_size = 1;	/* fsck hates zero-length directories */
   2705	else
   2706		ip->i_size = newsize;
   2707
   2708	/* update quota allocation to reflect freed blocks */
   2709	dquot_free_block(ip, nfreed);
   2710
   2711	/*
   2712	 * free tlock of invalidated pages
   2713	 */
   2714	if (flag == COMMIT_WMAP)
   2715		txFreelock(ip);
   2716
   2717	return newsize;
   2718}
   2719
   2720
   2721/*
   2722 *	xtTruncate_pmap()
   2723 *
   2724 * function:
   2725 *	Perform truncate to zero length for deleted file, leaving the
   2726 *	xtree and working map untouched.  This allows the file to
   2727 *	be accessed via open file handles, while the delete of the file
   2728 *	is committed to disk.
   2729 *
   2730 * parameter:
   2731 *	tid_t		tid,
   2732 *	struct inode	*ip,
   2733 *	s64		committed_size)
   2734 *
   2735 * return: new committed size
   2736 *
   2737 * note:
   2738 *
   2739 *	To avoid deadlock by holding too many transaction locks, the
   2740 *	truncation may be broken up into multiple transactions.
   2741 *	The committed_size keeps track of part of the file has been
   2742 *	freed from the pmaps.
   2743 */
   2744s64 xtTruncate_pmap(tid_t tid, struct inode *ip, s64 committed_size)
   2745{
   2746	s64 bn;
   2747	struct btstack btstack;
   2748	int cmp;
   2749	int index;
   2750	int locked_leaves = 0;
   2751	struct metapage *mp;
   2752	xtpage_t *p;
   2753	struct btframe *parent;
   2754	int rc;
   2755	struct tblock *tblk;
   2756	struct tlock *tlck = NULL;
   2757	xad_t *xad;
   2758	int xlen;
   2759	s64 xoff;
   2760	struct xtlock *xtlck = NULL;
   2761
   2762	/* save object truncation type */
   2763	tblk = tid_to_tblock(tid);
   2764	tblk->xflag |= COMMIT_PMAP;
   2765
   2766	/* clear stack */
   2767	BT_CLR(&btstack);
   2768
   2769	if (committed_size) {
   2770		xoff = (committed_size >> JFS_SBI(ip->i_sb)->l2bsize) - 1;
   2771		rc = xtSearch(ip, xoff, NULL, &cmp, &btstack, 0);
   2772		if (rc)
   2773			return rc;
   2774
   2775		XT_GETSEARCH(ip, btstack.top, bn, mp, p, index);
   2776
   2777		if (cmp != 0) {
   2778			XT_PUTPAGE(mp);
   2779			jfs_error(ip->i_sb, "did not find extent\n");
   2780			return -EIO;
   2781		}
   2782	} else {
   2783		/*
   2784		 * start with root
   2785		 *
   2786		 * root resides in the inode
   2787		 */
   2788		bn = 0;
   2789
   2790		/*
   2791		 * first access of each page:
   2792		 */
   2793      getPage:
   2794		XT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
   2795		if (rc)
   2796			return rc;
   2797
   2798		/* process entries backward from last index */
   2799		index = le16_to_cpu(p->header.nextindex) - 1;
   2800
   2801		if (p->header.flag & BT_INTERNAL)
   2802			goto getChild;
   2803	}
   2804
   2805	/*
   2806	 *	leaf page
   2807	 */
   2808
   2809	if (++locked_leaves > MAX_TRUNCATE_LEAVES) {
   2810		/*
   2811		 * We need to limit the size of the transaction
   2812		 * to avoid exhausting pagecache & tlocks
   2813		 */
   2814		xad = &p->xad[index];
   2815		xoff = offsetXAD(xad);
   2816		xlen = lengthXAD(xad);
   2817		XT_PUTPAGE(mp);
   2818		return (xoff + xlen) << JFS_SBI(ip->i_sb)->l2bsize;
   2819	}
   2820	tlck = txLock(tid, ip, mp, tlckXTREE);
   2821	tlck->type = tlckXTREE | tlckFREE;
   2822	xtlck = (struct xtlock *) & tlck->lock;
   2823	xtlck->hwm.offset = index;
   2824
   2825
   2826	XT_PUTPAGE(mp);
   2827
   2828	/*
   2829	 * go back up to the parent page
   2830	 */
   2831      getParent:
   2832	/* pop/restore parent entry for the current child page */
   2833	if ((parent = BT_POP(&btstack)) == NULL)
   2834		/* current page must have been root */
   2835		goto out;
   2836
   2837	/* get back the parent page */
   2838	bn = parent->bn;
   2839	XT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
   2840	if (rc)
   2841		return rc;
   2842
   2843	index = parent->index;
   2844
   2845	/*
   2846	 * parent page become empty: free the page
   2847	 */
   2848	if (index == XTENTRYSTART) {
   2849		/* txCommit() with tlckFREE:
   2850		 * free child extents covered by parent;
   2851		 * invalidate parent if COMMIT_PWMAP;
   2852		 */
   2853		tlck = txLock(tid, ip, mp, tlckXTREE);
   2854		xtlck = (struct xtlock *) & tlck->lock;
   2855		xtlck->hwm.offset = le16_to_cpu(p->header.nextindex) - 1;
   2856		tlck->type = tlckXTREE | tlckFREE;
   2857
   2858		XT_PUTPAGE(mp);
   2859
   2860		if (p->header.flag & BT_ROOT) {
   2861
   2862			goto out;
   2863		} else {
   2864			goto getParent;
   2865		}
   2866	}
   2867	/*
   2868	 * parent page still has entries for front region;
   2869	 */
   2870	else
   2871		index--;
   2872	/*
   2873	 *	internal page: go down to child page of current entry
   2874	 */
   2875      getChild:
   2876	/* save current parent entry for the child page */
   2877	if (BT_STACK_FULL(&btstack)) {
   2878		jfs_error(ip->i_sb, "stack overrun!\n");
   2879		XT_PUTPAGE(mp);
   2880		return -EIO;
   2881	}
   2882	BT_PUSH(&btstack, bn, index);
   2883
   2884	/* get child page */
   2885	xad = &p->xad[index];
   2886	bn = addressXAD(xad);
   2887
   2888	/*
   2889	 * first access of each internal entry:
   2890	 */
   2891	/* release parent page */
   2892	XT_PUTPAGE(mp);
   2893
   2894	/* process the child page */
   2895	goto getPage;
   2896
   2897      out:
   2898
   2899	return 0;
   2900}
   2901
   2902#ifdef CONFIG_JFS_STATISTICS
   2903int jfs_xtstat_proc_show(struct seq_file *m, void *v)
   2904{
   2905	seq_printf(m,
   2906		       "JFS Xtree statistics\n"
   2907		       "====================\n"
   2908		       "searches = %d\n"
   2909		       "fast searches = %d\n"
   2910		       "splits = %d\n",
   2911		       xtStat.search,
   2912		       xtStat.fastSearch,
   2913		       xtStat.split);
   2914	return 0;
   2915}
   2916#endif