cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

jfs_imap.c (85090B)


      1// SPDX-License-Identifier: GPL-2.0-or-later
      2/*
      3 *   Copyright (C) International Business Machines Corp., 2000-2004
      4 */
      5
      6/*
      7 *	jfs_imap.c: inode allocation map manager
      8 *
      9 * Serialization:
     10 *   Each AG has a simple lock which is used to control the serialization of
     11 *	the AG level lists.  This lock should be taken first whenever an AG
     12 *	level list will be modified or accessed.
     13 *
     14 *   Each IAG is locked by obtaining the buffer for the IAG page.
     15 *
     16 *   There is also a inode lock for the inode map inode.  A read lock needs to
     17 *	be taken whenever an IAG is read from the map or the global level
     18 *	information is read.  A write lock needs to be taken whenever the global
     19 *	level information is modified or an atomic operation needs to be used.
     20 *
     21 *	If more than one IAG is read at one time, the read lock may not
     22 *	be given up until all of the IAG's are read.  Otherwise, a deadlock
     23 *	may occur when trying to obtain the read lock while another thread
     24 *	holding the read lock is waiting on the IAG already being held.
     25 *
     26 *   The control page of the inode map is read into memory by diMount().
     27 *	Thereafter it should only be modified in memory and then it will be
     28 *	written out when the filesystem is unmounted by diUnmount().
     29 */
     30
     31#include <linux/fs.h>
     32#include <linux/buffer_head.h>
     33#include <linux/pagemap.h>
     34#include <linux/quotaops.h>
     35#include <linux/slab.h>
     36
     37#include "jfs_incore.h"
     38#include "jfs_inode.h"
     39#include "jfs_filsys.h"
     40#include "jfs_dinode.h"
     41#include "jfs_dmap.h"
     42#include "jfs_imap.h"
     43#include "jfs_metapage.h"
     44#include "jfs_superblock.h"
     45#include "jfs_debug.h"
     46
     47/*
     48 * imap locks
     49 */
     50/* iag free list lock */
     51#define IAGFREE_LOCK_INIT(imap)		mutex_init(&imap->im_freelock)
     52#define IAGFREE_LOCK(imap)		mutex_lock(&imap->im_freelock)
     53#define IAGFREE_UNLOCK(imap)		mutex_unlock(&imap->im_freelock)
     54
     55/* per ag iag list locks */
     56#define AG_LOCK_INIT(imap,index)	mutex_init(&(imap->im_aglock[index]))
     57#define AG_LOCK(imap,agno)		mutex_lock(&imap->im_aglock[agno])
     58#define AG_UNLOCK(imap,agno)		mutex_unlock(&imap->im_aglock[agno])
     59
     60/*
     61 * forward references
     62 */
     63static int diAllocAG(struct inomap *, int, bool, struct inode *);
     64static int diAllocAny(struct inomap *, int, bool, struct inode *);
     65static int diAllocBit(struct inomap *, struct iag *, int);
     66static int diAllocExt(struct inomap *, int, struct inode *);
     67static int diAllocIno(struct inomap *, int, struct inode *);
     68static int diFindFree(u32, int);
     69static int diNewExt(struct inomap *, struct iag *, int);
     70static int diNewIAG(struct inomap *, int *, int, struct metapage **);
     71static void duplicateIXtree(struct super_block *, s64, int, s64 *);
     72
     73static int diIAGRead(struct inomap * imap, int, struct metapage **);
     74static int copy_from_dinode(struct dinode *, struct inode *);
     75static void copy_to_dinode(struct dinode *, struct inode *);
     76
     77/*
     78 * NAME:	diMount()
     79 *
     80 * FUNCTION:	initialize the incore inode map control structures for
     81 *		a fileset or aggregate init time.
     82 *
     83 *		the inode map's control structure (dinomap) is
     84 *		brought in from disk and placed in virtual memory.
     85 *
     86 * PARAMETERS:
     87 *	ipimap	- pointer to inode map inode for the aggregate or fileset.
     88 *
     89 * RETURN VALUES:
     90 *	0	- success
     91 *	-ENOMEM	- insufficient free virtual memory.
     92 *	-EIO	- i/o error.
     93 */
     94int diMount(struct inode *ipimap)
     95{
     96	struct inomap *imap;
     97	struct metapage *mp;
     98	int index;
     99	struct dinomap_disk *dinom_le;
    100
    101	/*
    102	 * allocate/initialize the in-memory inode map control structure
    103	 */
    104	/* allocate the in-memory inode map control structure. */
    105	imap = kmalloc(sizeof(struct inomap), GFP_KERNEL);
    106	if (imap == NULL)
    107		return -ENOMEM;
    108
    109	/* read the on-disk inode map control structure. */
    110
    111	mp = read_metapage(ipimap,
    112			   IMAPBLKNO << JFS_SBI(ipimap->i_sb)->l2nbperpage,
    113			   PSIZE, 0);
    114	if (mp == NULL) {
    115		kfree(imap);
    116		return -EIO;
    117	}
    118
    119	/* copy the on-disk version to the in-memory version. */
    120	dinom_le = (struct dinomap_disk *) mp->data;
    121	imap->im_freeiag = le32_to_cpu(dinom_le->in_freeiag);
    122	imap->im_nextiag = le32_to_cpu(dinom_le->in_nextiag);
    123	atomic_set(&imap->im_numinos, le32_to_cpu(dinom_le->in_numinos));
    124	atomic_set(&imap->im_numfree, le32_to_cpu(dinom_le->in_numfree));
    125	imap->im_nbperiext = le32_to_cpu(dinom_le->in_nbperiext);
    126	imap->im_l2nbperiext = le32_to_cpu(dinom_le->in_l2nbperiext);
    127	for (index = 0; index < MAXAG; index++) {
    128		imap->im_agctl[index].inofree =
    129		    le32_to_cpu(dinom_le->in_agctl[index].inofree);
    130		imap->im_agctl[index].extfree =
    131		    le32_to_cpu(dinom_le->in_agctl[index].extfree);
    132		imap->im_agctl[index].numinos =
    133		    le32_to_cpu(dinom_le->in_agctl[index].numinos);
    134		imap->im_agctl[index].numfree =
    135		    le32_to_cpu(dinom_le->in_agctl[index].numfree);
    136	}
    137
    138	/* release the buffer. */
    139	release_metapage(mp);
    140
    141	/*
    142	 * allocate/initialize inode allocation map locks
    143	 */
    144	/* allocate and init iag free list lock */
    145	IAGFREE_LOCK_INIT(imap);
    146
    147	/* allocate and init ag list locks */
    148	for (index = 0; index < MAXAG; index++) {
    149		AG_LOCK_INIT(imap, index);
    150	}
    151
    152	/* bind the inode map inode and inode map control structure
    153	 * to each other.
    154	 */
    155	imap->im_ipimap = ipimap;
    156	JFS_IP(ipimap)->i_imap = imap;
    157
    158	return (0);
    159}
    160
    161
    162/*
    163 * NAME:	diUnmount()
    164 *
    165 * FUNCTION:	write to disk the incore inode map control structures for
    166 *		a fileset or aggregate at unmount time.
    167 *
    168 * PARAMETERS:
    169 *	ipimap	- pointer to inode map inode for the aggregate or fileset.
    170 *
    171 * RETURN VALUES:
    172 *	0	- success
    173 *	-ENOMEM	- insufficient free virtual memory.
    174 *	-EIO	- i/o error.
    175 */
    176int diUnmount(struct inode *ipimap, int mounterror)
    177{
    178	struct inomap *imap = JFS_IP(ipimap)->i_imap;
    179
    180	/*
    181	 * update the on-disk inode map control structure
    182	 */
    183
    184	if (!(mounterror || isReadOnly(ipimap)))
    185		diSync(ipimap);
    186
    187	/*
    188	 * Invalidate the page cache buffers
    189	 */
    190	truncate_inode_pages(ipimap->i_mapping, 0);
    191
    192	/*
    193	 * free in-memory control structure
    194	 */
    195	kfree(imap);
    196
    197	return (0);
    198}
    199
    200
    201/*
    202 *	diSync()
    203 */
    204int diSync(struct inode *ipimap)
    205{
    206	struct dinomap_disk *dinom_le;
    207	struct inomap *imp = JFS_IP(ipimap)->i_imap;
    208	struct metapage *mp;
    209	int index;
    210
    211	/*
    212	 * write imap global conrol page
    213	 */
    214	/* read the on-disk inode map control structure */
    215	mp = get_metapage(ipimap,
    216			  IMAPBLKNO << JFS_SBI(ipimap->i_sb)->l2nbperpage,
    217			  PSIZE, 0);
    218	if (mp == NULL) {
    219		jfs_err("diSync: get_metapage failed!");
    220		return -EIO;
    221	}
    222
    223	/* copy the in-memory version to the on-disk version */
    224	dinom_le = (struct dinomap_disk *) mp->data;
    225	dinom_le->in_freeiag = cpu_to_le32(imp->im_freeiag);
    226	dinom_le->in_nextiag = cpu_to_le32(imp->im_nextiag);
    227	dinom_le->in_numinos = cpu_to_le32(atomic_read(&imp->im_numinos));
    228	dinom_le->in_numfree = cpu_to_le32(atomic_read(&imp->im_numfree));
    229	dinom_le->in_nbperiext = cpu_to_le32(imp->im_nbperiext);
    230	dinom_le->in_l2nbperiext = cpu_to_le32(imp->im_l2nbperiext);
    231	for (index = 0; index < MAXAG; index++) {
    232		dinom_le->in_agctl[index].inofree =
    233		    cpu_to_le32(imp->im_agctl[index].inofree);
    234		dinom_le->in_agctl[index].extfree =
    235		    cpu_to_le32(imp->im_agctl[index].extfree);
    236		dinom_le->in_agctl[index].numinos =
    237		    cpu_to_le32(imp->im_agctl[index].numinos);
    238		dinom_le->in_agctl[index].numfree =
    239		    cpu_to_le32(imp->im_agctl[index].numfree);
    240	}
    241
    242	/* write out the control structure */
    243	write_metapage(mp);
    244
    245	/*
    246	 * write out dirty pages of imap
    247	 */
    248	filemap_write_and_wait(ipimap->i_mapping);
    249
    250	diWriteSpecial(ipimap, 0);
    251
    252	return (0);
    253}
    254
    255
    256/*
    257 * NAME:	diRead()
    258 *
    259 * FUNCTION:	initialize an incore inode from disk.
    260 *
    261 *		on entry, the specifed incore inode should itself
    262 *		specify the disk inode number corresponding to the
    263 *		incore inode (i.e. i_number should be initialized).
    264 *
    265 *		this routine handles incore inode initialization for
    266 *		both "special" and "regular" inodes.  special inodes
    267 *		are those required early in the mount process and
    268 *		require special handling since much of the file system
    269 *		is not yet initialized.  these "special" inodes are
    270 *		identified by a NULL inode map inode pointer and are
    271 *		actually initialized by a call to diReadSpecial().
    272 *
    273 *		for regular inodes, the iag describing the disk inode
    274 *		is read from disk to determine the inode extent address
    275 *		for the disk inode.  with the inode extent address in
    276 *		hand, the page of the extent that contains the disk
    277 *		inode is read and the disk inode is copied to the
    278 *		incore inode.
    279 *
    280 * PARAMETERS:
    281 *	ip	-  pointer to incore inode to be initialized from disk.
    282 *
    283 * RETURN VALUES:
    284 *	0	- success
    285 *	-EIO	- i/o error.
    286 *	-ENOMEM	- insufficient memory
    287 *
    288 */
    289int diRead(struct inode *ip)
    290{
    291	struct jfs_sb_info *sbi = JFS_SBI(ip->i_sb);
    292	int iagno, ino, extno, rc;
    293	struct inode *ipimap;
    294	struct dinode *dp;
    295	struct iag *iagp;
    296	struct metapage *mp;
    297	s64 blkno, agstart;
    298	struct inomap *imap;
    299	int block_offset;
    300	int inodes_left;
    301	unsigned long pageno;
    302	int rel_inode;
    303
    304	jfs_info("diRead: ino = %ld", ip->i_ino);
    305
    306	ipimap = sbi->ipimap;
    307	JFS_IP(ip)->ipimap = ipimap;
    308
    309	/* determine the iag number for this inode (number) */
    310	iagno = INOTOIAG(ip->i_ino);
    311
    312	/* read the iag */
    313	imap = JFS_IP(ipimap)->i_imap;
    314	IREAD_LOCK(ipimap, RDWRLOCK_IMAP);
    315	rc = diIAGRead(imap, iagno, &mp);
    316	IREAD_UNLOCK(ipimap);
    317	if (rc) {
    318		jfs_err("diRead: diIAGRead returned %d", rc);
    319		return (rc);
    320	}
    321
    322	iagp = (struct iag *) mp->data;
    323
    324	/* determine inode extent that holds the disk inode */
    325	ino = ip->i_ino & (INOSPERIAG - 1);
    326	extno = ino >> L2INOSPEREXT;
    327
    328	if ((lengthPXD(&iagp->inoext[extno]) != imap->im_nbperiext) ||
    329	    (addressPXD(&iagp->inoext[extno]) == 0)) {
    330		release_metapage(mp);
    331		return -ESTALE;
    332	}
    333
    334	/* get disk block number of the page within the inode extent
    335	 * that holds the disk inode.
    336	 */
    337	blkno = INOPBLK(&iagp->inoext[extno], ino, sbi->l2nbperpage);
    338
    339	/* get the ag for the iag */
    340	agstart = le64_to_cpu(iagp->agstart);
    341
    342	release_metapage(mp);
    343
    344	rel_inode = (ino & (INOSPERPAGE - 1));
    345	pageno = blkno >> sbi->l2nbperpage;
    346
    347	if ((block_offset = ((u32) blkno & (sbi->nbperpage - 1)))) {
    348		/*
    349		 * OS/2 didn't always align inode extents on page boundaries
    350		 */
    351		inodes_left =
    352		     (sbi->nbperpage - block_offset) << sbi->l2niperblk;
    353
    354		if (rel_inode < inodes_left)
    355			rel_inode += block_offset << sbi->l2niperblk;
    356		else {
    357			pageno += 1;
    358			rel_inode -= inodes_left;
    359		}
    360	}
    361
    362	/* read the page of disk inode */
    363	mp = read_metapage(ipimap, pageno << sbi->l2nbperpage, PSIZE, 1);
    364	if (!mp) {
    365		jfs_err("diRead: read_metapage failed");
    366		return -EIO;
    367	}
    368
    369	/* locate the disk inode requested */
    370	dp = (struct dinode *) mp->data;
    371	dp += rel_inode;
    372
    373	if (ip->i_ino != le32_to_cpu(dp->di_number)) {
    374		jfs_error(ip->i_sb, "i_ino != di_number\n");
    375		rc = -EIO;
    376	} else if (le32_to_cpu(dp->di_nlink) == 0)
    377		rc = -ESTALE;
    378	else
    379		/* copy the disk inode to the in-memory inode */
    380		rc = copy_from_dinode(dp, ip);
    381
    382	release_metapage(mp);
    383
    384	/* set the ag for the inode */
    385	JFS_IP(ip)->agstart = agstart;
    386	JFS_IP(ip)->active_ag = -1;
    387
    388	return (rc);
    389}
    390
    391
    392/*
    393 * NAME:	diReadSpecial()
    394 *
    395 * FUNCTION:	initialize a 'special' inode from disk.
    396 *
    397 *		this routines handles aggregate level inodes.  The
    398 *		inode cache cannot differentiate between the
    399 *		aggregate inodes and the filesystem inodes, so we
    400 *		handle these here.  We don't actually use the aggregate
    401 *		inode map, since these inodes are at a fixed location
    402 *		and in some cases the aggregate inode map isn't initialized
    403 *		yet.
    404 *
    405 * PARAMETERS:
    406 *	sb - filesystem superblock
    407 *	inum - aggregate inode number
    408 *	secondary - 1 if secondary aggregate inode table
    409 *
    410 * RETURN VALUES:
    411 *	new inode	- success
    412 *	NULL		- i/o error.
    413 */
    414struct inode *diReadSpecial(struct super_block *sb, ino_t inum, int secondary)
    415{
    416	struct jfs_sb_info *sbi = JFS_SBI(sb);
    417	uint address;
    418	struct dinode *dp;
    419	struct inode *ip;
    420	struct metapage *mp;
    421
    422	ip = new_inode(sb);
    423	if (ip == NULL) {
    424		jfs_err("diReadSpecial: new_inode returned NULL!");
    425		return ip;
    426	}
    427
    428	if (secondary) {
    429		address = addressPXD(&sbi->ait2) >> sbi->l2nbperpage;
    430		JFS_IP(ip)->ipimap = sbi->ipaimap2;
    431	} else {
    432		address = AITBL_OFF >> L2PSIZE;
    433		JFS_IP(ip)->ipimap = sbi->ipaimap;
    434	}
    435
    436	ASSERT(inum < INOSPEREXT);
    437
    438	ip->i_ino = inum;
    439
    440	address += inum >> 3;	/* 8 inodes per 4K page */
    441
    442	/* read the page of fixed disk inode (AIT) in raw mode */
    443	mp = read_metapage(ip, address << sbi->l2nbperpage, PSIZE, 1);
    444	if (mp == NULL) {
    445		set_nlink(ip, 1);	/* Don't want iput() deleting it */
    446		iput(ip);
    447		return (NULL);
    448	}
    449
    450	/* get the pointer to the disk inode of interest */
    451	dp = (struct dinode *) (mp->data);
    452	dp += inum % 8;		/* 8 inodes per 4K page */
    453
    454	/* copy on-disk inode to in-memory inode */
    455	if ((copy_from_dinode(dp, ip)) != 0) {
    456		/* handle bad return by returning NULL for ip */
    457		set_nlink(ip, 1);	/* Don't want iput() deleting it */
    458		iput(ip);
    459		/* release the page */
    460		release_metapage(mp);
    461		return (NULL);
    462
    463	}
    464
    465	ip->i_mapping->a_ops = &jfs_metapage_aops;
    466	mapping_set_gfp_mask(ip->i_mapping, GFP_NOFS);
    467
    468	/* Allocations to metadata inodes should not affect quotas */
    469	ip->i_flags |= S_NOQUOTA;
    470
    471	if ((inum == FILESYSTEM_I) && (JFS_IP(ip)->ipimap == sbi->ipaimap)) {
    472		sbi->gengen = le32_to_cpu(dp->di_gengen);
    473		sbi->inostamp = le32_to_cpu(dp->di_inostamp);
    474	}
    475
    476	/* release the page */
    477	release_metapage(mp);
    478
    479	inode_fake_hash(ip);
    480
    481	return (ip);
    482}
    483
    484/*
    485 * NAME:	diWriteSpecial()
    486 *
    487 * FUNCTION:	Write the special inode to disk
    488 *
    489 * PARAMETERS:
    490 *	ip - special inode
    491 *	secondary - 1 if secondary aggregate inode table
    492 *
    493 * RETURN VALUES: none
    494 */
    495
    496void diWriteSpecial(struct inode *ip, int secondary)
    497{
    498	struct jfs_sb_info *sbi = JFS_SBI(ip->i_sb);
    499	uint address;
    500	struct dinode *dp;
    501	ino_t inum = ip->i_ino;
    502	struct metapage *mp;
    503
    504	if (secondary)
    505		address = addressPXD(&sbi->ait2) >> sbi->l2nbperpage;
    506	else
    507		address = AITBL_OFF >> L2PSIZE;
    508
    509	ASSERT(inum < INOSPEREXT);
    510
    511	address += inum >> 3;	/* 8 inodes per 4K page */
    512
    513	/* read the page of fixed disk inode (AIT) in raw mode */
    514	mp = read_metapage(ip, address << sbi->l2nbperpage, PSIZE, 1);
    515	if (mp == NULL) {
    516		jfs_err("diWriteSpecial: failed to read aggregate inode extent!");
    517		return;
    518	}
    519
    520	/* get the pointer to the disk inode of interest */
    521	dp = (struct dinode *) (mp->data);
    522	dp += inum % 8;		/* 8 inodes per 4K page */
    523
    524	/* copy on-disk inode to in-memory inode */
    525	copy_to_dinode(dp, ip);
    526	memcpy(&dp->di_xtroot, &JFS_IP(ip)->i_xtroot, 288);
    527
    528	if (inum == FILESYSTEM_I)
    529		dp->di_gengen = cpu_to_le32(sbi->gengen);
    530
    531	/* write the page */
    532	write_metapage(mp);
    533}
    534
    535/*
    536 * NAME:	diFreeSpecial()
    537 *
    538 * FUNCTION:	Free allocated space for special inode
    539 */
    540void diFreeSpecial(struct inode *ip)
    541{
    542	if (ip == NULL) {
    543		jfs_err("diFreeSpecial called with NULL ip!");
    544		return;
    545	}
    546	filemap_write_and_wait(ip->i_mapping);
    547	truncate_inode_pages(ip->i_mapping, 0);
    548	iput(ip);
    549}
    550
    551
    552
    553/*
    554 * NAME:	diWrite()
    555 *
    556 * FUNCTION:	write the on-disk inode portion of the in-memory inode
    557 *		to its corresponding on-disk inode.
    558 *
    559 *		on entry, the specifed incore inode should itself
    560 *		specify the disk inode number corresponding to the
    561 *		incore inode (i.e. i_number should be initialized).
    562 *
    563 *		the inode contains the inode extent address for the disk
    564 *		inode.  with the inode extent address in hand, the
    565 *		page of the extent that contains the disk inode is
    566 *		read and the disk inode portion of the incore inode
    567 *		is copied to the disk inode.
    568 *
    569 * PARAMETERS:
    570 *	tid -  transacation id
    571 *	ip  -  pointer to incore inode to be written to the inode extent.
    572 *
    573 * RETURN VALUES:
    574 *	0	- success
    575 *	-EIO	- i/o error.
    576 */
    577int diWrite(tid_t tid, struct inode *ip)
    578{
    579	struct jfs_sb_info *sbi = JFS_SBI(ip->i_sb);
    580	struct jfs_inode_info *jfs_ip = JFS_IP(ip);
    581	int rc = 0;
    582	s32 ino;
    583	struct dinode *dp;
    584	s64 blkno;
    585	int block_offset;
    586	int inodes_left;
    587	struct metapage *mp;
    588	unsigned long pageno;
    589	int rel_inode;
    590	int dioffset;
    591	struct inode *ipimap;
    592	uint type;
    593	lid_t lid;
    594	struct tlock *ditlck, *tlck;
    595	struct linelock *dilinelock, *ilinelock;
    596	struct lv *lv;
    597	int n;
    598
    599	ipimap = jfs_ip->ipimap;
    600
    601	ino = ip->i_ino & (INOSPERIAG - 1);
    602
    603	if (!addressPXD(&(jfs_ip->ixpxd)) ||
    604	    (lengthPXD(&(jfs_ip->ixpxd)) !=
    605	     JFS_IP(ipimap)->i_imap->im_nbperiext)) {
    606		jfs_error(ip->i_sb, "ixpxd invalid\n");
    607		return -EIO;
    608	}
    609
    610	/*
    611	 * read the page of disk inode containing the specified inode:
    612	 */
    613	/* compute the block address of the page */
    614	blkno = INOPBLK(&(jfs_ip->ixpxd), ino, sbi->l2nbperpage);
    615
    616	rel_inode = (ino & (INOSPERPAGE - 1));
    617	pageno = blkno >> sbi->l2nbperpage;
    618
    619	if ((block_offset = ((u32) blkno & (sbi->nbperpage - 1)))) {
    620		/*
    621		 * OS/2 didn't always align inode extents on page boundaries
    622		 */
    623		inodes_left =
    624		    (sbi->nbperpage - block_offset) << sbi->l2niperblk;
    625
    626		if (rel_inode < inodes_left)
    627			rel_inode += block_offset << sbi->l2niperblk;
    628		else {
    629			pageno += 1;
    630			rel_inode -= inodes_left;
    631		}
    632	}
    633	/* read the page of disk inode */
    634      retry:
    635	mp = read_metapage(ipimap, pageno << sbi->l2nbperpage, PSIZE, 1);
    636	if (!mp)
    637		return -EIO;
    638
    639	/* get the pointer to the disk inode */
    640	dp = (struct dinode *) mp->data;
    641	dp += rel_inode;
    642
    643	dioffset = (ino & (INOSPERPAGE - 1)) << L2DISIZE;
    644
    645	/*
    646	 * acquire transaction lock on the on-disk inode;
    647	 * N.B. tlock is acquired on ipimap not ip;
    648	 */
    649	if ((ditlck =
    650	     txLock(tid, ipimap, mp, tlckINODE | tlckENTRY)) == NULL)
    651		goto retry;
    652	dilinelock = (struct linelock *) & ditlck->lock;
    653
    654	/*
    655	 * copy btree root from in-memory inode to on-disk inode
    656	 *
    657	 * (tlock is taken from inline B+-tree root in in-memory
    658	 * inode when the B+-tree root is updated, which is pointed
    659	 * by jfs_ip->blid as well as being on tx tlock list)
    660	 *
    661	 * further processing of btree root is based on the copy
    662	 * in in-memory inode, where txLog() will log from, and,
    663	 * for xtree root, txUpdateMap() will update map and reset
    664	 * XAD_NEW bit;
    665	 */
    666
    667	if (S_ISDIR(ip->i_mode) && (lid = jfs_ip->xtlid)) {
    668		/*
    669		 * This is the special xtree inside the directory for storing
    670		 * the directory table
    671		 */
    672		xtpage_t *p, *xp;
    673		xad_t *xad;
    674
    675		jfs_ip->xtlid = 0;
    676		tlck = lid_to_tlock(lid);
    677		assert(tlck->type & tlckXTREE);
    678		tlck->type |= tlckBTROOT;
    679		tlck->mp = mp;
    680		ilinelock = (struct linelock *) & tlck->lock;
    681
    682		/*
    683		 * copy xtree root from inode to dinode:
    684		 */
    685		p = &jfs_ip->i_xtroot;
    686		xp = (xtpage_t *) &dp->di_dirtable;
    687		lv = ilinelock->lv;
    688		for (n = 0; n < ilinelock->index; n++, lv++) {
    689			memcpy(&xp->xad[lv->offset], &p->xad[lv->offset],
    690			       lv->length << L2XTSLOTSIZE);
    691		}
    692
    693		/* reset on-disk (metadata page) xtree XAD_NEW bit */
    694		xad = &xp->xad[XTENTRYSTART];
    695		for (n = XTENTRYSTART;
    696		     n < le16_to_cpu(xp->header.nextindex); n++, xad++)
    697			if (xad->flag & (XAD_NEW | XAD_EXTENDED))
    698				xad->flag &= ~(XAD_NEW | XAD_EXTENDED);
    699	}
    700
    701	if ((lid = jfs_ip->blid) == 0)
    702		goto inlineData;
    703	jfs_ip->blid = 0;
    704
    705	tlck = lid_to_tlock(lid);
    706	type = tlck->type;
    707	tlck->type |= tlckBTROOT;
    708	tlck->mp = mp;
    709	ilinelock = (struct linelock *) & tlck->lock;
    710
    711	/*
    712	 *	regular file: 16 byte (XAD slot) granularity
    713	 */
    714	if (type & tlckXTREE) {
    715		xtpage_t *p, *xp;
    716		xad_t *xad;
    717
    718		/*
    719		 * copy xtree root from inode to dinode:
    720		 */
    721		p = &jfs_ip->i_xtroot;
    722		xp = &dp->di_xtroot;
    723		lv = ilinelock->lv;
    724		for (n = 0; n < ilinelock->index; n++, lv++) {
    725			memcpy(&xp->xad[lv->offset], &p->xad[lv->offset],
    726			       lv->length << L2XTSLOTSIZE);
    727		}
    728
    729		/* reset on-disk (metadata page) xtree XAD_NEW bit */
    730		xad = &xp->xad[XTENTRYSTART];
    731		for (n = XTENTRYSTART;
    732		     n < le16_to_cpu(xp->header.nextindex); n++, xad++)
    733			if (xad->flag & (XAD_NEW | XAD_EXTENDED))
    734				xad->flag &= ~(XAD_NEW | XAD_EXTENDED);
    735	}
    736	/*
    737	 *	directory: 32 byte (directory entry slot) granularity
    738	 */
    739	else if (type & tlckDTREE) {
    740		dtpage_t *p, *xp;
    741
    742		/*
    743		 * copy dtree root from inode to dinode:
    744		 */
    745		p = (dtpage_t *) &jfs_ip->i_dtroot;
    746		xp = (dtpage_t *) & dp->di_dtroot;
    747		lv = ilinelock->lv;
    748		for (n = 0; n < ilinelock->index; n++, lv++) {
    749			memcpy(&xp->slot[lv->offset], &p->slot[lv->offset],
    750			       lv->length << L2DTSLOTSIZE);
    751		}
    752	} else {
    753		jfs_err("diWrite: UFO tlock");
    754	}
    755
    756      inlineData:
    757	/*
    758	 * copy inline symlink from in-memory inode to on-disk inode
    759	 */
    760	if (S_ISLNK(ip->i_mode) && ip->i_size < IDATASIZE) {
    761		lv = & dilinelock->lv[dilinelock->index];
    762		lv->offset = (dioffset + 2 * 128) >> L2INODESLOTSIZE;
    763		lv->length = 2;
    764		memcpy(&dp->di_inline_all, jfs_ip->i_inline_all, IDATASIZE);
    765		dilinelock->index++;
    766	}
    767	/*
    768	 * copy inline data from in-memory inode to on-disk inode:
    769	 * 128 byte slot granularity
    770	 */
    771	if (test_cflag(COMMIT_Inlineea, ip)) {
    772		lv = & dilinelock->lv[dilinelock->index];
    773		lv->offset = (dioffset + 3 * 128) >> L2INODESLOTSIZE;
    774		lv->length = 1;
    775		memcpy(&dp->di_inlineea, jfs_ip->i_inline_ea, INODESLOTSIZE);
    776		dilinelock->index++;
    777
    778		clear_cflag(COMMIT_Inlineea, ip);
    779	}
    780
    781	/*
    782	 *	lock/copy inode base: 128 byte slot granularity
    783	 */
    784	lv = & dilinelock->lv[dilinelock->index];
    785	lv->offset = dioffset >> L2INODESLOTSIZE;
    786	copy_to_dinode(dp, ip);
    787	if (test_and_clear_cflag(COMMIT_Dirtable, ip)) {
    788		lv->length = 2;
    789		memcpy(&dp->di_dirtable, &jfs_ip->i_dirtable, 96);
    790	} else
    791		lv->length = 1;
    792	dilinelock->index++;
    793
    794	/* release the buffer holding the updated on-disk inode.
    795	 * the buffer will be later written by commit processing.
    796	 */
    797	write_metapage(mp);
    798
    799	return (rc);
    800}
    801
    802
    803/*
    804 * NAME:	diFree(ip)
    805 *
    806 * FUNCTION:	free a specified inode from the inode working map
    807 *		for a fileset or aggregate.
    808 *
    809 *		if the inode to be freed represents the first (only)
    810 *		free inode within the iag, the iag will be placed on
    811 *		the ag free inode list.
    812 *
    813 *		freeing the inode will cause the inode extent to be
    814 *		freed if the inode is the only allocated inode within
    815 *		the extent.  in this case all the disk resource backing
    816 *		up the inode extent will be freed. in addition, the iag
    817 *		will be placed on the ag extent free list if the extent
    818 *		is the first free extent in the iag.  if freeing the
    819 *		extent also means that no free inodes will exist for
    820 *		the iag, the iag will also be removed from the ag free
    821 *		inode list.
    822 *
    823 *		the iag describing the inode will be freed if the extent
    824 *		is to be freed and it is the only backed extent within
    825 *		the iag.  in this case, the iag will be removed from the
    826 *		ag free extent list and ag free inode list and placed on
    827 *		the inode map's free iag list.
    828 *
    829 *		a careful update approach is used to provide consistency
    830 *		in the face of updates to multiple buffers.  under this
    831 *		approach, all required buffers are obtained before making
    832 *		any updates and are held until all updates are complete.
    833 *
    834 * PARAMETERS:
    835 *	ip	- inode to be freed.
    836 *
    837 * RETURN VALUES:
    838 *	0	- success
    839 *	-EIO	- i/o error.
    840 */
    841int diFree(struct inode *ip)
    842{
    843	int rc;
    844	ino_t inum = ip->i_ino;
    845	struct iag *iagp, *aiagp, *biagp, *ciagp, *diagp;
    846	struct metapage *mp, *amp, *bmp, *cmp, *dmp;
    847	int iagno, ino, extno, bitno, sword, agno;
    848	int back, fwd;
    849	u32 bitmap, mask;
    850	struct inode *ipimap = JFS_SBI(ip->i_sb)->ipimap;
    851	struct inomap *imap = JFS_IP(ipimap)->i_imap;
    852	pxd_t freepxd;
    853	tid_t tid;
    854	struct inode *iplist[3];
    855	struct tlock *tlck;
    856	struct pxd_lock *pxdlock;
    857
    858	/*
    859	 * This is just to suppress compiler warnings.  The same logic that
    860	 * references these variables is used to initialize them.
    861	 */
    862	aiagp = biagp = ciagp = diagp = NULL;
    863
    864	/* get the iag number containing the inode.
    865	 */
    866	iagno = INOTOIAG(inum);
    867
    868	/* make sure that the iag is contained within
    869	 * the map.
    870	 */
    871	if (iagno >= imap->im_nextiag) {
    872		print_hex_dump(KERN_ERR, "imap: ", DUMP_PREFIX_ADDRESS, 16, 4,
    873			       imap, 32, 0);
    874		jfs_error(ip->i_sb, "inum = %d, iagno = %d, nextiag = %d\n",
    875			  (uint) inum, iagno, imap->im_nextiag);
    876		return -EIO;
    877	}
    878
    879	/* get the allocation group for this ino.
    880	 */
    881	agno = BLKTOAG(JFS_IP(ip)->agstart, JFS_SBI(ip->i_sb));
    882
    883	/* Lock the AG specific inode map information
    884	 */
    885	AG_LOCK(imap, agno);
    886
    887	/* Obtain read lock in imap inode.  Don't release it until we have
    888	 * read all of the IAG's that we are going to.
    889	 */
    890	IREAD_LOCK(ipimap, RDWRLOCK_IMAP);
    891
    892	/* read the iag.
    893	 */
    894	if ((rc = diIAGRead(imap, iagno, &mp))) {
    895		IREAD_UNLOCK(ipimap);
    896		AG_UNLOCK(imap, agno);
    897		return (rc);
    898	}
    899	iagp = (struct iag *) mp->data;
    900
    901	/* get the inode number and extent number of the inode within
    902	 * the iag and the inode number within the extent.
    903	 */
    904	ino = inum & (INOSPERIAG - 1);
    905	extno = ino >> L2INOSPEREXT;
    906	bitno = ino & (INOSPEREXT - 1);
    907	mask = HIGHORDER >> bitno;
    908
    909	if (!(le32_to_cpu(iagp->wmap[extno]) & mask)) {
    910		jfs_error(ip->i_sb, "wmap shows inode already free\n");
    911	}
    912
    913	if (!addressPXD(&iagp->inoext[extno])) {
    914		release_metapage(mp);
    915		IREAD_UNLOCK(ipimap);
    916		AG_UNLOCK(imap, agno);
    917		jfs_error(ip->i_sb, "invalid inoext\n");
    918		return -EIO;
    919	}
    920
    921	/* compute the bitmap for the extent reflecting the freed inode.
    922	 */
    923	bitmap = le32_to_cpu(iagp->wmap[extno]) & ~mask;
    924
    925	if (imap->im_agctl[agno].numfree > imap->im_agctl[agno].numinos) {
    926		release_metapage(mp);
    927		IREAD_UNLOCK(ipimap);
    928		AG_UNLOCK(imap, agno);
    929		jfs_error(ip->i_sb, "numfree > numinos\n");
    930		return -EIO;
    931	}
    932	/*
    933	 *	inode extent still has some inodes or below low water mark:
    934	 *	keep the inode extent;
    935	 */
    936	if (bitmap ||
    937	    imap->im_agctl[agno].numfree < 96 ||
    938	    (imap->im_agctl[agno].numfree < 288 &&
    939	     (((imap->im_agctl[agno].numfree * 100) /
    940	       imap->im_agctl[agno].numinos) <= 25))) {
    941		/* if the iag currently has no free inodes (i.e.,
    942		 * the inode being freed is the first free inode of iag),
    943		 * insert the iag at head of the inode free list for the ag.
    944		 */
    945		if (iagp->nfreeinos == 0) {
    946			/* check if there are any iags on the ag inode
    947			 * free list.  if so, read the first one so that
    948			 * we can link the current iag onto the list at
    949			 * the head.
    950			 */
    951			if ((fwd = imap->im_agctl[agno].inofree) >= 0) {
    952				/* read the iag that currently is the head
    953				 * of the list.
    954				 */
    955				if ((rc = diIAGRead(imap, fwd, &amp))) {
    956					IREAD_UNLOCK(ipimap);
    957					AG_UNLOCK(imap, agno);
    958					release_metapage(mp);
    959					return (rc);
    960				}
    961				aiagp = (struct iag *) amp->data;
    962
    963				/* make current head point back to the iag.
    964				 */
    965				aiagp->inofreeback = cpu_to_le32(iagno);
    966
    967				write_metapage(amp);
    968			}
    969
    970			/* iag points forward to current head and iag
    971			 * becomes the new head of the list.
    972			 */
    973			iagp->inofreefwd =
    974			    cpu_to_le32(imap->im_agctl[agno].inofree);
    975			iagp->inofreeback = cpu_to_le32(-1);
    976			imap->im_agctl[agno].inofree = iagno;
    977		}
    978		IREAD_UNLOCK(ipimap);
    979
    980		/* update the free inode summary map for the extent if
    981		 * freeing the inode means the extent will now have free
    982		 * inodes (i.e., the inode being freed is the first free
    983		 * inode of extent),
    984		 */
    985		if (iagp->wmap[extno] == cpu_to_le32(ONES)) {
    986			sword = extno >> L2EXTSPERSUM;
    987			bitno = extno & (EXTSPERSUM - 1);
    988			iagp->inosmap[sword] &=
    989			    cpu_to_le32(~(HIGHORDER >> bitno));
    990		}
    991
    992		/* update the bitmap.
    993		 */
    994		iagp->wmap[extno] = cpu_to_le32(bitmap);
    995
    996		/* update the free inode counts at the iag, ag and
    997		 * map level.
    998		 */
    999		le32_add_cpu(&iagp->nfreeinos, 1);
   1000		imap->im_agctl[agno].numfree += 1;
   1001		atomic_inc(&imap->im_numfree);
   1002
   1003		/* release the AG inode map lock
   1004		 */
   1005		AG_UNLOCK(imap, agno);
   1006
   1007		/* write the iag */
   1008		write_metapage(mp);
   1009
   1010		return (0);
   1011	}
   1012
   1013
   1014	/*
   1015	 *	inode extent has become free and above low water mark:
   1016	 *	free the inode extent;
   1017	 */
   1018
   1019	/*
   1020	 *	prepare to update iag list(s) (careful update step 1)
   1021	 */
   1022	amp = bmp = cmp = dmp = NULL;
   1023	fwd = back = -1;
   1024
   1025	/* check if the iag currently has no free extents.  if so,
   1026	 * it will be placed on the head of the ag extent free list.
   1027	 */
   1028	if (iagp->nfreeexts == 0) {
   1029		/* check if the ag extent free list has any iags.
   1030		 * if so, read the iag at the head of the list now.
   1031		 * this (head) iag will be updated later to reflect
   1032		 * the addition of the current iag at the head of
   1033		 * the list.
   1034		 */
   1035		if ((fwd = imap->im_agctl[agno].extfree) >= 0) {
   1036			if ((rc = diIAGRead(imap, fwd, &amp)))
   1037				goto error_out;
   1038			aiagp = (struct iag *) amp->data;
   1039		}
   1040	} else {
   1041		/* iag has free extents. check if the addition of a free
   1042		 * extent will cause all extents to be free within this
   1043		 * iag.  if so, the iag will be removed from the ag extent
   1044		 * free list and placed on the inode map's free iag list.
   1045		 */
   1046		if (iagp->nfreeexts == cpu_to_le32(EXTSPERIAG - 1)) {
   1047			/* in preparation for removing the iag from the
   1048			 * ag extent free list, read the iags preceding
   1049			 * and following the iag on the ag extent free
   1050			 * list.
   1051			 */
   1052			if ((fwd = le32_to_cpu(iagp->extfreefwd)) >= 0) {
   1053				if ((rc = diIAGRead(imap, fwd, &amp)))
   1054					goto error_out;
   1055				aiagp = (struct iag *) amp->data;
   1056			}
   1057
   1058			if ((back = le32_to_cpu(iagp->extfreeback)) >= 0) {
   1059				if ((rc = diIAGRead(imap, back, &bmp)))
   1060					goto error_out;
   1061				biagp = (struct iag *) bmp->data;
   1062			}
   1063		}
   1064	}
   1065
   1066	/* remove the iag from the ag inode free list if freeing
   1067	 * this extent cause the iag to have no free inodes.
   1068	 */
   1069	if (iagp->nfreeinos == cpu_to_le32(INOSPEREXT - 1)) {
   1070		int inofreeback = le32_to_cpu(iagp->inofreeback);
   1071		int inofreefwd = le32_to_cpu(iagp->inofreefwd);
   1072
   1073		/* in preparation for removing the iag from the
   1074		 * ag inode free list, read the iags preceding
   1075		 * and following the iag on the ag inode free
   1076		 * list.  before reading these iags, we must make
   1077		 * sure that we already don't have them in hand
   1078		 * from up above, since re-reading an iag (buffer)
   1079		 * we are currently holding would cause a deadlock.
   1080		 */
   1081		if (inofreefwd >= 0) {
   1082
   1083			if (inofreefwd == fwd)
   1084				ciagp = (struct iag *) amp->data;
   1085			else if (inofreefwd == back)
   1086				ciagp = (struct iag *) bmp->data;
   1087			else {
   1088				if ((rc =
   1089				     diIAGRead(imap, inofreefwd, &cmp)))
   1090					goto error_out;
   1091				ciagp = (struct iag *) cmp->data;
   1092			}
   1093			assert(ciagp != NULL);
   1094		}
   1095
   1096		if (inofreeback >= 0) {
   1097			if (inofreeback == fwd)
   1098				diagp = (struct iag *) amp->data;
   1099			else if (inofreeback == back)
   1100				diagp = (struct iag *) bmp->data;
   1101			else {
   1102				if ((rc =
   1103				     diIAGRead(imap, inofreeback, &dmp)))
   1104					goto error_out;
   1105				diagp = (struct iag *) dmp->data;
   1106			}
   1107			assert(diagp != NULL);
   1108		}
   1109	}
   1110
   1111	IREAD_UNLOCK(ipimap);
   1112
   1113	/*
   1114	 * invalidate any page of the inode extent freed from buffer cache;
   1115	 */
   1116	freepxd = iagp->inoext[extno];
   1117	invalidate_pxd_metapages(ip, freepxd);
   1118
   1119	/*
   1120	 *	update iag list(s) (careful update step 2)
   1121	 */
   1122	/* add the iag to the ag extent free list if this is the
   1123	 * first free extent for the iag.
   1124	 */
   1125	if (iagp->nfreeexts == 0) {
   1126		if (fwd >= 0)
   1127			aiagp->extfreeback = cpu_to_le32(iagno);
   1128
   1129		iagp->extfreefwd =
   1130		    cpu_to_le32(imap->im_agctl[agno].extfree);
   1131		iagp->extfreeback = cpu_to_le32(-1);
   1132		imap->im_agctl[agno].extfree = iagno;
   1133	} else {
   1134		/* remove the iag from the ag extent list if all extents
   1135		 * are now free and place it on the inode map iag free list.
   1136		 */
   1137		if (iagp->nfreeexts == cpu_to_le32(EXTSPERIAG - 1)) {
   1138			if (fwd >= 0)
   1139				aiagp->extfreeback = iagp->extfreeback;
   1140
   1141			if (back >= 0)
   1142				biagp->extfreefwd = iagp->extfreefwd;
   1143			else
   1144				imap->im_agctl[agno].extfree =
   1145				    le32_to_cpu(iagp->extfreefwd);
   1146
   1147			iagp->extfreefwd = iagp->extfreeback = cpu_to_le32(-1);
   1148
   1149			IAGFREE_LOCK(imap);
   1150			iagp->iagfree = cpu_to_le32(imap->im_freeiag);
   1151			imap->im_freeiag = iagno;
   1152			IAGFREE_UNLOCK(imap);
   1153		}
   1154	}
   1155
   1156	/* remove the iag from the ag inode free list if freeing
   1157	 * this extent causes the iag to have no free inodes.
   1158	 */
   1159	if (iagp->nfreeinos == cpu_to_le32(INOSPEREXT - 1)) {
   1160		if ((int) le32_to_cpu(iagp->inofreefwd) >= 0)
   1161			ciagp->inofreeback = iagp->inofreeback;
   1162
   1163		if ((int) le32_to_cpu(iagp->inofreeback) >= 0)
   1164			diagp->inofreefwd = iagp->inofreefwd;
   1165		else
   1166			imap->im_agctl[agno].inofree =
   1167			    le32_to_cpu(iagp->inofreefwd);
   1168
   1169		iagp->inofreefwd = iagp->inofreeback = cpu_to_le32(-1);
   1170	}
   1171
   1172	/* update the inode extent address and working map
   1173	 * to reflect the free extent.
   1174	 * the permanent map should have been updated already
   1175	 * for the inode being freed.
   1176	 */
   1177	if (iagp->pmap[extno] != 0) {
   1178		jfs_error(ip->i_sb, "the pmap does not show inode free\n");
   1179	}
   1180	iagp->wmap[extno] = 0;
   1181	PXDlength(&iagp->inoext[extno], 0);
   1182	PXDaddress(&iagp->inoext[extno], 0);
   1183
   1184	/* update the free extent and free inode summary maps
   1185	 * to reflect the freed extent.
   1186	 * the inode summary map is marked to indicate no inodes
   1187	 * available for the freed extent.
   1188	 */
   1189	sword = extno >> L2EXTSPERSUM;
   1190	bitno = extno & (EXTSPERSUM - 1);
   1191	mask = HIGHORDER >> bitno;
   1192	iagp->inosmap[sword] |= cpu_to_le32(mask);
   1193	iagp->extsmap[sword] &= cpu_to_le32(~mask);
   1194
   1195	/* update the number of free inodes and number of free extents
   1196	 * for the iag.
   1197	 */
   1198	le32_add_cpu(&iagp->nfreeinos, -(INOSPEREXT - 1));
   1199	le32_add_cpu(&iagp->nfreeexts, 1);
   1200
   1201	/* update the number of free inodes and backed inodes
   1202	 * at the ag and inode map level.
   1203	 */
   1204	imap->im_agctl[agno].numfree -= (INOSPEREXT - 1);
   1205	imap->im_agctl[agno].numinos -= INOSPEREXT;
   1206	atomic_sub(INOSPEREXT - 1, &imap->im_numfree);
   1207	atomic_sub(INOSPEREXT, &imap->im_numinos);
   1208
   1209	if (amp)
   1210		write_metapage(amp);
   1211	if (bmp)
   1212		write_metapage(bmp);
   1213	if (cmp)
   1214		write_metapage(cmp);
   1215	if (dmp)
   1216		write_metapage(dmp);
   1217
   1218	/*
   1219	 * start transaction to update block allocation map
   1220	 * for the inode extent freed;
   1221	 *
   1222	 * N.B. AG_LOCK is released and iag will be released below, and
   1223	 * other thread may allocate inode from/reusing the ixad freed
   1224	 * BUT with new/different backing inode extent from the extent
   1225	 * to be freed by the transaction;
   1226	 */
   1227	tid = txBegin(ipimap->i_sb, COMMIT_FORCE);
   1228	mutex_lock(&JFS_IP(ipimap)->commit_mutex);
   1229
   1230	/* acquire tlock of the iag page of the freed ixad
   1231	 * to force the page NOHOMEOK (even though no data is
   1232	 * logged from the iag page) until NOREDOPAGE|FREEXTENT log
   1233	 * for the free of the extent is committed;
   1234	 * write FREEXTENT|NOREDOPAGE log record
   1235	 * N.B. linelock is overlaid as freed extent descriptor;
   1236	 */
   1237	tlck = txLock(tid, ipimap, mp, tlckINODE | tlckFREE);
   1238	pxdlock = (struct pxd_lock *) & tlck->lock;
   1239	pxdlock->flag = mlckFREEPXD;
   1240	pxdlock->pxd = freepxd;
   1241	pxdlock->index = 1;
   1242
   1243	write_metapage(mp);
   1244
   1245	iplist[0] = ipimap;
   1246
   1247	/*
   1248	 * logredo needs the IAG number and IAG extent index in order
   1249	 * to ensure that the IMap is consistent.  The least disruptive
   1250	 * way to pass these values through  to the transaction manager
   1251	 * is in the iplist array.
   1252	 *
   1253	 * It's not pretty, but it works.
   1254	 */
   1255	iplist[1] = (struct inode *) (size_t)iagno;
   1256	iplist[2] = (struct inode *) (size_t)extno;
   1257
   1258	rc = txCommit(tid, 1, &iplist[0], COMMIT_FORCE);
   1259
   1260	txEnd(tid);
   1261	mutex_unlock(&JFS_IP(ipimap)->commit_mutex);
   1262
   1263	/* unlock the AG inode map information */
   1264	AG_UNLOCK(imap, agno);
   1265
   1266	return (0);
   1267
   1268      error_out:
   1269	IREAD_UNLOCK(ipimap);
   1270
   1271	if (amp)
   1272		release_metapage(amp);
   1273	if (bmp)
   1274		release_metapage(bmp);
   1275	if (cmp)
   1276		release_metapage(cmp);
   1277	if (dmp)
   1278		release_metapage(dmp);
   1279
   1280	AG_UNLOCK(imap, agno);
   1281
   1282	release_metapage(mp);
   1283
   1284	return (rc);
   1285}
   1286
   1287/*
   1288 * There are several places in the diAlloc* routines where we initialize
   1289 * the inode.
   1290 */
   1291static inline void
   1292diInitInode(struct inode *ip, int iagno, int ino, int extno, struct iag * iagp)
   1293{
   1294	struct jfs_inode_info *jfs_ip = JFS_IP(ip);
   1295
   1296	ip->i_ino = (iagno << L2INOSPERIAG) + ino;
   1297	jfs_ip->ixpxd = iagp->inoext[extno];
   1298	jfs_ip->agstart = le64_to_cpu(iagp->agstart);
   1299	jfs_ip->active_ag = -1;
   1300}
   1301
   1302
   1303/*
   1304 * NAME:	diAlloc(pip,dir,ip)
   1305 *
   1306 * FUNCTION:	allocate a disk inode from the inode working map
   1307 *		for a fileset or aggregate.
   1308 *
   1309 * PARAMETERS:
   1310 *	pip	- pointer to incore inode for the parent inode.
   1311 *	dir	- 'true' if the new disk inode is for a directory.
   1312 *	ip	- pointer to a new inode
   1313 *
   1314 * RETURN VALUES:
   1315 *	0	- success.
   1316 *	-ENOSPC	- insufficient disk resources.
   1317 *	-EIO	- i/o error.
   1318 */
   1319int diAlloc(struct inode *pip, bool dir, struct inode *ip)
   1320{
   1321	int rc, ino, iagno, addext, extno, bitno, sword;
   1322	int nwords, rem, i, agno;
   1323	u32 mask, inosmap, extsmap;
   1324	struct inode *ipimap;
   1325	struct metapage *mp;
   1326	ino_t inum;
   1327	struct iag *iagp;
   1328	struct inomap *imap;
   1329
   1330	/* get the pointers to the inode map inode and the
   1331	 * corresponding imap control structure.
   1332	 */
   1333	ipimap = JFS_SBI(pip->i_sb)->ipimap;
   1334	imap = JFS_IP(ipimap)->i_imap;
   1335	JFS_IP(ip)->ipimap = ipimap;
   1336	JFS_IP(ip)->fileset = FILESYSTEM_I;
   1337
   1338	/* for a directory, the allocation policy is to start
   1339	 * at the ag level using the preferred ag.
   1340	 */
   1341	if (dir) {
   1342		agno = dbNextAG(JFS_SBI(pip->i_sb)->ipbmap);
   1343		AG_LOCK(imap, agno);
   1344		goto tryag;
   1345	}
   1346
   1347	/* for files, the policy starts off by trying to allocate from
   1348	 * the same iag containing the parent disk inode:
   1349	 * try to allocate the new disk inode close to the parent disk
   1350	 * inode, using parent disk inode number + 1 as the allocation
   1351	 * hint.  (we use a left-to-right policy to attempt to avoid
   1352	 * moving backward on the disk.)  compute the hint within the
   1353	 * file system and the iag.
   1354	 */
   1355
   1356	/* get the ag number of this iag */
   1357	agno = BLKTOAG(JFS_IP(pip)->agstart, JFS_SBI(pip->i_sb));
   1358
   1359	if (atomic_read(&JFS_SBI(pip->i_sb)->bmap->db_active[agno])) {
   1360		/*
   1361		 * There is an open file actively growing.  We want to
   1362		 * allocate new inodes from a different ag to avoid
   1363		 * fragmentation problems.
   1364		 */
   1365		agno = dbNextAG(JFS_SBI(pip->i_sb)->ipbmap);
   1366		AG_LOCK(imap, agno);
   1367		goto tryag;
   1368	}
   1369
   1370	inum = pip->i_ino + 1;
   1371	ino = inum & (INOSPERIAG - 1);
   1372
   1373	/* back off the hint if it is outside of the iag */
   1374	if (ino == 0)
   1375		inum = pip->i_ino;
   1376
   1377	/* lock the AG inode map information */
   1378	AG_LOCK(imap, agno);
   1379
   1380	/* Get read lock on imap inode */
   1381	IREAD_LOCK(ipimap, RDWRLOCK_IMAP);
   1382
   1383	/* get the iag number and read the iag */
   1384	iagno = INOTOIAG(inum);
   1385	if ((rc = diIAGRead(imap, iagno, &mp))) {
   1386		IREAD_UNLOCK(ipimap);
   1387		AG_UNLOCK(imap, agno);
   1388		return (rc);
   1389	}
   1390	iagp = (struct iag *) mp->data;
   1391
   1392	/* determine if new inode extent is allowed to be added to the iag.
   1393	 * new inode extent can be added to the iag if the ag
   1394	 * has less than 32 free disk inodes and the iag has free extents.
   1395	 */
   1396	addext = (imap->im_agctl[agno].numfree < 32 && iagp->nfreeexts);
   1397
   1398	/*
   1399	 *	try to allocate from the IAG
   1400	 */
   1401	/* check if the inode may be allocated from the iag
   1402	 * (i.e. the inode has free inodes or new extent can be added).
   1403	 */
   1404	if (iagp->nfreeinos || addext) {
   1405		/* determine the extent number of the hint.
   1406		 */
   1407		extno = ino >> L2INOSPEREXT;
   1408
   1409		/* check if the extent containing the hint has backed
   1410		 * inodes.  if so, try to allocate within this extent.
   1411		 */
   1412		if (addressPXD(&iagp->inoext[extno])) {
   1413			bitno = ino & (INOSPEREXT - 1);
   1414			if ((bitno =
   1415			     diFindFree(le32_to_cpu(iagp->wmap[extno]),
   1416					bitno))
   1417			    < INOSPEREXT) {
   1418				ino = (extno << L2INOSPEREXT) + bitno;
   1419
   1420				/* a free inode (bit) was found within this
   1421				 * extent, so allocate it.
   1422				 */
   1423				rc = diAllocBit(imap, iagp, ino);
   1424				IREAD_UNLOCK(ipimap);
   1425				if (rc) {
   1426					assert(rc == -EIO);
   1427				} else {
   1428					/* set the results of the allocation
   1429					 * and write the iag.
   1430					 */
   1431					diInitInode(ip, iagno, ino, extno,
   1432						    iagp);
   1433					mark_metapage_dirty(mp);
   1434				}
   1435				release_metapage(mp);
   1436
   1437				/* free the AG lock and return.
   1438				 */
   1439				AG_UNLOCK(imap, agno);
   1440				return (rc);
   1441			}
   1442
   1443			if (!addext)
   1444				extno =
   1445				    (extno ==
   1446				     EXTSPERIAG - 1) ? 0 : extno + 1;
   1447		}
   1448
   1449		/*
   1450		 * no free inodes within the extent containing the hint.
   1451		 *
   1452		 * try to allocate from the backed extents following
   1453		 * hint or, if appropriate (i.e. addext is true), allocate
   1454		 * an extent of free inodes at or following the extent
   1455		 * containing the hint.
   1456		 *
   1457		 * the free inode and free extent summary maps are used
   1458		 * here, so determine the starting summary map position
   1459		 * and the number of words we'll have to examine.  again,
   1460		 * the approach is to allocate following the hint, so we
   1461		 * might have to initially ignore prior bits of the summary
   1462		 * map that represent extents prior to the extent containing
   1463		 * the hint and later revisit these bits.
   1464		 */
   1465		bitno = extno & (EXTSPERSUM - 1);
   1466		nwords = (bitno == 0) ? SMAPSZ : SMAPSZ + 1;
   1467		sword = extno >> L2EXTSPERSUM;
   1468
   1469		/* mask any prior bits for the starting words of the
   1470		 * summary map.
   1471		 */
   1472		mask = (bitno == 0) ? 0 : (ONES << (EXTSPERSUM - bitno));
   1473		inosmap = le32_to_cpu(iagp->inosmap[sword]) | mask;
   1474		extsmap = le32_to_cpu(iagp->extsmap[sword]) | mask;
   1475
   1476		/* scan the free inode and free extent summary maps for
   1477		 * free resources.
   1478		 */
   1479		for (i = 0; i < nwords; i++) {
   1480			/* check if this word of the free inode summary
   1481			 * map describes an extent with free inodes.
   1482			 */
   1483			if (~inosmap) {
   1484				/* an extent with free inodes has been
   1485				 * found. determine the extent number
   1486				 * and the inode number within the extent.
   1487				 */
   1488				rem = diFindFree(inosmap, 0);
   1489				extno = (sword << L2EXTSPERSUM) + rem;
   1490				rem = diFindFree(le32_to_cpu(iagp->wmap[extno]),
   1491						 0);
   1492				if (rem >= INOSPEREXT) {
   1493					IREAD_UNLOCK(ipimap);
   1494					release_metapage(mp);
   1495					AG_UNLOCK(imap, agno);
   1496					jfs_error(ip->i_sb,
   1497						  "can't find free bit in wmap\n");
   1498					return -EIO;
   1499				}
   1500
   1501				/* determine the inode number within the
   1502				 * iag and allocate the inode from the
   1503				 * map.
   1504				 */
   1505				ino = (extno << L2INOSPEREXT) + rem;
   1506				rc = diAllocBit(imap, iagp, ino);
   1507				IREAD_UNLOCK(ipimap);
   1508				if (rc)
   1509					assert(rc == -EIO);
   1510				else {
   1511					/* set the results of the allocation
   1512					 * and write the iag.
   1513					 */
   1514					diInitInode(ip, iagno, ino, extno,
   1515						    iagp);
   1516					mark_metapage_dirty(mp);
   1517				}
   1518				release_metapage(mp);
   1519
   1520				/* free the AG lock and return.
   1521				 */
   1522				AG_UNLOCK(imap, agno);
   1523				return (rc);
   1524
   1525			}
   1526
   1527			/* check if we may allocate an extent of free
   1528			 * inodes and whether this word of the free
   1529			 * extents summary map describes a free extent.
   1530			 */
   1531			if (addext && ~extsmap) {
   1532				/* a free extent has been found.  determine
   1533				 * the extent number.
   1534				 */
   1535				rem = diFindFree(extsmap, 0);
   1536				extno = (sword << L2EXTSPERSUM) + rem;
   1537
   1538				/* allocate an extent of free inodes.
   1539				 */
   1540				if ((rc = diNewExt(imap, iagp, extno))) {
   1541					/* if there is no disk space for a
   1542					 * new extent, try to allocate the
   1543					 * disk inode from somewhere else.
   1544					 */
   1545					if (rc == -ENOSPC)
   1546						break;
   1547
   1548					assert(rc == -EIO);
   1549				} else {
   1550					/* set the results of the allocation
   1551					 * and write the iag.
   1552					 */
   1553					diInitInode(ip, iagno,
   1554						    extno << L2INOSPEREXT,
   1555						    extno, iagp);
   1556					mark_metapage_dirty(mp);
   1557				}
   1558				release_metapage(mp);
   1559				/* free the imap inode & the AG lock & return.
   1560				 */
   1561				IREAD_UNLOCK(ipimap);
   1562				AG_UNLOCK(imap, agno);
   1563				return (rc);
   1564			}
   1565
   1566			/* move on to the next set of summary map words.
   1567			 */
   1568			sword = (sword == SMAPSZ - 1) ? 0 : sword + 1;
   1569			inosmap = le32_to_cpu(iagp->inosmap[sword]);
   1570			extsmap = le32_to_cpu(iagp->extsmap[sword]);
   1571		}
   1572	}
   1573	/* unlock imap inode */
   1574	IREAD_UNLOCK(ipimap);
   1575
   1576	/* nothing doing in this iag, so release it. */
   1577	release_metapage(mp);
   1578
   1579      tryag:
   1580	/*
   1581	 * try to allocate anywhere within the same AG as the parent inode.
   1582	 */
   1583	rc = diAllocAG(imap, agno, dir, ip);
   1584
   1585	AG_UNLOCK(imap, agno);
   1586
   1587	if (rc != -ENOSPC)
   1588		return (rc);
   1589
   1590	/*
   1591	 * try to allocate in any AG.
   1592	 */
   1593	return (diAllocAny(imap, agno, dir, ip));
   1594}
   1595
   1596
   1597/*
   1598 * NAME:	diAllocAG(imap,agno,dir,ip)
   1599 *
   1600 * FUNCTION:	allocate a disk inode from the allocation group.
   1601 *
   1602 *		this routine first determines if a new extent of free
   1603 *		inodes should be added for the allocation group, with
   1604 *		the current request satisfied from this extent. if this
   1605 *		is the case, an attempt will be made to do just that.  if
   1606 *		this attempt fails or it has been determined that a new
   1607 *		extent should not be added, an attempt is made to satisfy
   1608 *		the request by allocating an existing (backed) free inode
   1609 *		from the allocation group.
   1610 *
   1611 * PRE CONDITION: Already have the AG lock for this AG.
   1612 *
   1613 * PARAMETERS:
   1614 *	imap	- pointer to inode map control structure.
   1615 *	agno	- allocation group to allocate from.
   1616 *	dir	- 'true' if the new disk inode is for a directory.
   1617 *	ip	- pointer to the new inode to be filled in on successful return
   1618 *		  with the disk inode number allocated, its extent address
   1619 *		  and the start of the ag.
   1620 *
   1621 * RETURN VALUES:
   1622 *	0	- success.
   1623 *	-ENOSPC	- insufficient disk resources.
   1624 *	-EIO	- i/o error.
   1625 */
   1626static int
   1627diAllocAG(struct inomap * imap, int agno, bool dir, struct inode *ip)
   1628{
   1629	int rc, addext, numfree, numinos;
   1630
   1631	/* get the number of free and the number of backed disk
   1632	 * inodes currently within the ag.
   1633	 */
   1634	numfree = imap->im_agctl[agno].numfree;
   1635	numinos = imap->im_agctl[agno].numinos;
   1636
   1637	if (numfree > numinos) {
   1638		jfs_error(ip->i_sb, "numfree > numinos\n");
   1639		return -EIO;
   1640	}
   1641
   1642	/* determine if we should allocate a new extent of free inodes
   1643	 * within the ag: for directory inodes, add a new extent
   1644	 * if there are a small number of free inodes or number of free
   1645	 * inodes is a small percentage of the number of backed inodes.
   1646	 */
   1647	if (dir)
   1648		addext = (numfree < 64 ||
   1649			  (numfree < 256
   1650			   && ((numfree * 100) / numinos) <= 20));
   1651	else
   1652		addext = (numfree == 0);
   1653
   1654	/*
   1655	 * try to allocate a new extent of free inodes.
   1656	 */
   1657	if (addext) {
   1658		/* if free space is not available for this new extent, try
   1659		 * below to allocate a free and existing (already backed)
   1660		 * inode from the ag.
   1661		 */
   1662		if ((rc = diAllocExt(imap, agno, ip)) != -ENOSPC)
   1663			return (rc);
   1664	}
   1665
   1666	/*
   1667	 * try to allocate an existing free inode from the ag.
   1668	 */
   1669	return (diAllocIno(imap, agno, ip));
   1670}
   1671
   1672
   1673/*
   1674 * NAME:	diAllocAny(imap,agno,dir,iap)
   1675 *
   1676 * FUNCTION:	allocate a disk inode from any other allocation group.
   1677 *
   1678 *		this routine is called when an allocation attempt within
   1679 *		the primary allocation group has failed. if attempts to
   1680 *		allocate an inode from any allocation group other than the
   1681 *		specified primary group.
   1682 *
   1683 * PARAMETERS:
   1684 *	imap	- pointer to inode map control structure.
   1685 *	agno	- primary allocation group (to avoid).
   1686 *	dir	- 'true' if the new disk inode is for a directory.
   1687 *	ip	- pointer to a new inode to be filled in on successful return
   1688 *		  with the disk inode number allocated, its extent address
   1689 *		  and the start of the ag.
   1690 *
   1691 * RETURN VALUES:
   1692 *	0	- success.
   1693 *	-ENOSPC	- insufficient disk resources.
   1694 *	-EIO	- i/o error.
   1695 */
   1696static int
   1697diAllocAny(struct inomap * imap, int agno, bool dir, struct inode *ip)
   1698{
   1699	int ag, rc;
   1700	int maxag = JFS_SBI(imap->im_ipimap->i_sb)->bmap->db_maxag;
   1701
   1702
   1703	/* try to allocate from the ags following agno up to
   1704	 * the maximum ag number.
   1705	 */
   1706	for (ag = agno + 1; ag <= maxag; ag++) {
   1707		AG_LOCK(imap, ag);
   1708
   1709		rc = diAllocAG(imap, ag, dir, ip);
   1710
   1711		AG_UNLOCK(imap, ag);
   1712
   1713		if (rc != -ENOSPC)
   1714			return (rc);
   1715	}
   1716
   1717	/* try to allocate from the ags in front of agno.
   1718	 */
   1719	for (ag = 0; ag < agno; ag++) {
   1720		AG_LOCK(imap, ag);
   1721
   1722		rc = diAllocAG(imap, ag, dir, ip);
   1723
   1724		AG_UNLOCK(imap, ag);
   1725
   1726		if (rc != -ENOSPC)
   1727			return (rc);
   1728	}
   1729
   1730	/* no free disk inodes.
   1731	 */
   1732	return -ENOSPC;
   1733}
   1734
   1735
   1736/*
   1737 * NAME:	diAllocIno(imap,agno,ip)
   1738 *
   1739 * FUNCTION:	allocate a disk inode from the allocation group's free
   1740 *		inode list, returning an error if this free list is
   1741 *		empty (i.e. no iags on the list).
   1742 *
   1743 *		allocation occurs from the first iag on the list using
   1744 *		the iag's free inode summary map to find the leftmost
   1745 *		free inode in the iag.
   1746 *
   1747 * PRE CONDITION: Already have AG lock for this AG.
   1748 *
   1749 * PARAMETERS:
   1750 *	imap	- pointer to inode map control structure.
   1751 *	agno	- allocation group.
   1752 *	ip	- pointer to new inode to be filled in on successful return
   1753 *		  with the disk inode number allocated, its extent address
   1754 *		  and the start of the ag.
   1755 *
   1756 * RETURN VALUES:
   1757 *	0	- success.
   1758 *	-ENOSPC	- insufficient disk resources.
   1759 *	-EIO	- i/o error.
   1760 */
   1761static int diAllocIno(struct inomap * imap, int agno, struct inode *ip)
   1762{
   1763	int iagno, ino, rc, rem, extno, sword;
   1764	struct metapage *mp;
   1765	struct iag *iagp;
   1766
   1767	/* check if there are iags on the ag's free inode list.
   1768	 */
   1769	if ((iagno = imap->im_agctl[agno].inofree) < 0)
   1770		return -ENOSPC;
   1771
   1772	/* obtain read lock on imap inode */
   1773	IREAD_LOCK(imap->im_ipimap, RDWRLOCK_IMAP);
   1774
   1775	/* read the iag at the head of the list.
   1776	 */
   1777	if ((rc = diIAGRead(imap, iagno, &mp))) {
   1778		IREAD_UNLOCK(imap->im_ipimap);
   1779		return (rc);
   1780	}
   1781	iagp = (struct iag *) mp->data;
   1782
   1783	/* better be free inodes in this iag if it is on the
   1784	 * list.
   1785	 */
   1786	if (!iagp->nfreeinos) {
   1787		IREAD_UNLOCK(imap->im_ipimap);
   1788		release_metapage(mp);
   1789		jfs_error(ip->i_sb, "nfreeinos = 0, but iag on freelist\n");
   1790		return -EIO;
   1791	}
   1792
   1793	/* scan the free inode summary map to find an extent
   1794	 * with free inodes.
   1795	 */
   1796	for (sword = 0;; sword++) {
   1797		if (sword >= SMAPSZ) {
   1798			IREAD_UNLOCK(imap->im_ipimap);
   1799			release_metapage(mp);
   1800			jfs_error(ip->i_sb,
   1801				  "free inode not found in summary map\n");
   1802			return -EIO;
   1803		}
   1804
   1805		if (~iagp->inosmap[sword])
   1806			break;
   1807	}
   1808
   1809	/* found a extent with free inodes. determine
   1810	 * the extent number.
   1811	 */
   1812	rem = diFindFree(le32_to_cpu(iagp->inosmap[sword]), 0);
   1813	if (rem >= EXTSPERSUM) {
   1814		IREAD_UNLOCK(imap->im_ipimap);
   1815		release_metapage(mp);
   1816		jfs_error(ip->i_sb, "no free extent found\n");
   1817		return -EIO;
   1818	}
   1819	extno = (sword << L2EXTSPERSUM) + rem;
   1820
   1821	/* find the first free inode in the extent.
   1822	 */
   1823	rem = diFindFree(le32_to_cpu(iagp->wmap[extno]), 0);
   1824	if (rem >= INOSPEREXT) {
   1825		IREAD_UNLOCK(imap->im_ipimap);
   1826		release_metapage(mp);
   1827		jfs_error(ip->i_sb, "free inode not found\n");
   1828		return -EIO;
   1829	}
   1830
   1831	/* compute the inode number within the iag.
   1832	 */
   1833	ino = (extno << L2INOSPEREXT) + rem;
   1834
   1835	/* allocate the inode.
   1836	 */
   1837	rc = diAllocBit(imap, iagp, ino);
   1838	IREAD_UNLOCK(imap->im_ipimap);
   1839	if (rc) {
   1840		release_metapage(mp);
   1841		return (rc);
   1842	}
   1843
   1844	/* set the results of the allocation and write the iag.
   1845	 */
   1846	diInitInode(ip, iagno, ino, extno, iagp);
   1847	write_metapage(mp);
   1848
   1849	return (0);
   1850}
   1851
   1852
   1853/*
   1854 * NAME:	diAllocExt(imap,agno,ip)
   1855 *
   1856 * FUNCTION:	add a new extent of free inodes to an iag, allocating
   1857 *		an inode from this extent to satisfy the current allocation
   1858 *		request.
   1859 *
   1860 *		this routine first tries to find an existing iag with free
   1861 *		extents through the ag free extent list.  if list is not
   1862 *		empty, the head of the list will be selected as the home
   1863 *		of the new extent of free inodes.  otherwise (the list is
   1864 *		empty), a new iag will be allocated for the ag to contain
   1865 *		the extent.
   1866 *
   1867 *		once an iag has been selected, the free extent summary map
   1868 *		is used to locate a free extent within the iag and diNewExt()
   1869 *		is called to initialize the extent, with initialization
   1870 *		including the allocation of the first inode of the extent
   1871 *		for the purpose of satisfying this request.
   1872 *
   1873 * PARAMETERS:
   1874 *	imap	- pointer to inode map control structure.
   1875 *	agno	- allocation group number.
   1876 *	ip	- pointer to new inode to be filled in on successful return
   1877 *		  with the disk inode number allocated, its extent address
   1878 *		  and the start of the ag.
   1879 *
   1880 * RETURN VALUES:
   1881 *	0	- success.
   1882 *	-ENOSPC	- insufficient disk resources.
   1883 *	-EIO	- i/o error.
   1884 */
   1885static int diAllocExt(struct inomap * imap, int agno, struct inode *ip)
   1886{
   1887	int rem, iagno, sword, extno, rc;
   1888	struct metapage *mp;
   1889	struct iag *iagp;
   1890
   1891	/* check if the ag has any iags with free extents.  if not,
   1892	 * allocate a new iag for the ag.
   1893	 */
   1894	if ((iagno = imap->im_agctl[agno].extfree) < 0) {
   1895		/* If successful, diNewIAG will obtain the read lock on the
   1896		 * imap inode.
   1897		 */
   1898		if ((rc = diNewIAG(imap, &iagno, agno, &mp))) {
   1899			return (rc);
   1900		}
   1901		iagp = (struct iag *) mp->data;
   1902
   1903		/* set the ag number if this a brand new iag
   1904		 */
   1905		iagp->agstart =
   1906		    cpu_to_le64(AGTOBLK(agno, imap->im_ipimap));
   1907	} else {
   1908		/* read the iag.
   1909		 */
   1910		IREAD_LOCK(imap->im_ipimap, RDWRLOCK_IMAP);
   1911		if ((rc = diIAGRead(imap, iagno, &mp))) {
   1912			IREAD_UNLOCK(imap->im_ipimap);
   1913			jfs_error(ip->i_sb, "error reading iag\n");
   1914			return rc;
   1915		}
   1916		iagp = (struct iag *) mp->data;
   1917	}
   1918
   1919	/* using the free extent summary map, find a free extent.
   1920	 */
   1921	for (sword = 0;; sword++) {
   1922		if (sword >= SMAPSZ) {
   1923			release_metapage(mp);
   1924			IREAD_UNLOCK(imap->im_ipimap);
   1925			jfs_error(ip->i_sb, "free ext summary map not found\n");
   1926			return -EIO;
   1927		}
   1928		if (~iagp->extsmap[sword])
   1929			break;
   1930	}
   1931
   1932	/* determine the extent number of the free extent.
   1933	 */
   1934	rem = diFindFree(le32_to_cpu(iagp->extsmap[sword]), 0);
   1935	if (rem >= EXTSPERSUM) {
   1936		release_metapage(mp);
   1937		IREAD_UNLOCK(imap->im_ipimap);
   1938		jfs_error(ip->i_sb, "free extent not found\n");
   1939		return -EIO;
   1940	}
   1941	extno = (sword << L2EXTSPERSUM) + rem;
   1942
   1943	/* initialize the new extent.
   1944	 */
   1945	rc = diNewExt(imap, iagp, extno);
   1946	IREAD_UNLOCK(imap->im_ipimap);
   1947	if (rc) {
   1948		/* something bad happened.  if a new iag was allocated,
   1949		 * place it back on the inode map's iag free list, and
   1950		 * clear the ag number information.
   1951		 */
   1952		if (iagp->nfreeexts == cpu_to_le32(EXTSPERIAG)) {
   1953			IAGFREE_LOCK(imap);
   1954			iagp->iagfree = cpu_to_le32(imap->im_freeiag);
   1955			imap->im_freeiag = iagno;
   1956			IAGFREE_UNLOCK(imap);
   1957		}
   1958		write_metapage(mp);
   1959		return (rc);
   1960	}
   1961
   1962	/* set the results of the allocation and write the iag.
   1963	 */
   1964	diInitInode(ip, iagno, extno << L2INOSPEREXT, extno, iagp);
   1965
   1966	write_metapage(mp);
   1967
   1968	return (0);
   1969}
   1970
   1971
   1972/*
   1973 * NAME:	diAllocBit(imap,iagp,ino)
   1974 *
   1975 * FUNCTION:	allocate a backed inode from an iag.
   1976 *
   1977 *		this routine performs the mechanics of allocating a
   1978 *		specified inode from a backed extent.
   1979 *
   1980 *		if the inode to be allocated represents the last free
   1981 *		inode within the iag, the iag will be removed from the
   1982 *		ag free inode list.
   1983 *
   1984 *		a careful update approach is used to provide consistency
   1985 *		in the face of updates to multiple buffers.  under this
   1986 *		approach, all required buffers are obtained before making
   1987 *		any updates and are held all are updates are complete.
   1988 *
   1989 * PRE CONDITION: Already have buffer lock on iagp.  Already have AG lock on
   1990 *	this AG.  Must have read lock on imap inode.
   1991 *
   1992 * PARAMETERS:
   1993 *	imap	- pointer to inode map control structure.
   1994 *	iagp	- pointer to iag.
   1995 *	ino	- inode number to be allocated within the iag.
   1996 *
   1997 * RETURN VALUES:
   1998 *	0	- success.
   1999 *	-ENOSPC	- insufficient disk resources.
   2000 *	-EIO	- i/o error.
   2001 */
   2002static int diAllocBit(struct inomap * imap, struct iag * iagp, int ino)
   2003{
   2004	int extno, bitno, agno, sword, rc;
   2005	struct metapage *amp = NULL, *bmp = NULL;
   2006	struct iag *aiagp = NULL, *biagp = NULL;
   2007	u32 mask;
   2008
   2009	/* check if this is the last free inode within the iag.
   2010	 * if so, it will have to be removed from the ag free
   2011	 * inode list, so get the iags preceding and following
   2012	 * it on the list.
   2013	 */
   2014	if (iagp->nfreeinos == cpu_to_le32(1)) {
   2015		if ((int) le32_to_cpu(iagp->inofreefwd) >= 0) {
   2016			if ((rc =
   2017			     diIAGRead(imap, le32_to_cpu(iagp->inofreefwd),
   2018				       &amp)))
   2019				return (rc);
   2020			aiagp = (struct iag *) amp->data;
   2021		}
   2022
   2023		if ((int) le32_to_cpu(iagp->inofreeback) >= 0) {
   2024			if ((rc =
   2025			     diIAGRead(imap,
   2026				       le32_to_cpu(iagp->inofreeback),
   2027				       &bmp))) {
   2028				if (amp)
   2029					release_metapage(amp);
   2030				return (rc);
   2031			}
   2032			biagp = (struct iag *) bmp->data;
   2033		}
   2034	}
   2035
   2036	/* get the ag number, extent number, inode number within
   2037	 * the extent.
   2038	 */
   2039	agno = BLKTOAG(le64_to_cpu(iagp->agstart), JFS_SBI(imap->im_ipimap->i_sb));
   2040	extno = ino >> L2INOSPEREXT;
   2041	bitno = ino & (INOSPEREXT - 1);
   2042
   2043	/* compute the mask for setting the map.
   2044	 */
   2045	mask = HIGHORDER >> bitno;
   2046
   2047	/* the inode should be free and backed.
   2048	 */
   2049	if (((le32_to_cpu(iagp->pmap[extno]) & mask) != 0) ||
   2050	    ((le32_to_cpu(iagp->wmap[extno]) & mask) != 0) ||
   2051	    (addressPXD(&iagp->inoext[extno]) == 0)) {
   2052		if (amp)
   2053			release_metapage(amp);
   2054		if (bmp)
   2055			release_metapage(bmp);
   2056
   2057		jfs_error(imap->im_ipimap->i_sb, "iag inconsistent\n");
   2058		return -EIO;
   2059	}
   2060
   2061	/* mark the inode as allocated in the working map.
   2062	 */
   2063	iagp->wmap[extno] |= cpu_to_le32(mask);
   2064
   2065	/* check if all inodes within the extent are now
   2066	 * allocated.  if so, update the free inode summary
   2067	 * map to reflect this.
   2068	 */
   2069	if (iagp->wmap[extno] == cpu_to_le32(ONES)) {
   2070		sword = extno >> L2EXTSPERSUM;
   2071		bitno = extno & (EXTSPERSUM - 1);
   2072		iagp->inosmap[sword] |= cpu_to_le32(HIGHORDER >> bitno);
   2073	}
   2074
   2075	/* if this was the last free inode in the iag, remove the
   2076	 * iag from the ag free inode list.
   2077	 */
   2078	if (iagp->nfreeinos == cpu_to_le32(1)) {
   2079		if (amp) {
   2080			aiagp->inofreeback = iagp->inofreeback;
   2081			write_metapage(amp);
   2082		}
   2083
   2084		if (bmp) {
   2085			biagp->inofreefwd = iagp->inofreefwd;
   2086			write_metapage(bmp);
   2087		} else {
   2088			imap->im_agctl[agno].inofree =
   2089			    le32_to_cpu(iagp->inofreefwd);
   2090		}
   2091		iagp->inofreefwd = iagp->inofreeback = cpu_to_le32(-1);
   2092	}
   2093
   2094	/* update the free inode count at the iag, ag, inode
   2095	 * map levels.
   2096	 */
   2097	le32_add_cpu(&iagp->nfreeinos, -1);
   2098	imap->im_agctl[agno].numfree -= 1;
   2099	atomic_dec(&imap->im_numfree);
   2100
   2101	return (0);
   2102}
   2103
   2104
   2105/*
   2106 * NAME:	diNewExt(imap,iagp,extno)
   2107 *
   2108 * FUNCTION:	initialize a new extent of inodes for an iag, allocating
   2109 *		the first inode of the extent for use for the current
   2110 *		allocation request.
   2111 *
   2112 *		disk resources are allocated for the new extent of inodes
   2113 *		and the inodes themselves are initialized to reflect their
   2114 *		existence within the extent (i.e. their inode numbers and
   2115 *		inode extent addresses are set) and their initial state
   2116 *		(mode and link count are set to zero).
   2117 *
   2118 *		if the iag is new, it is not yet on an ag extent free list
   2119 *		but will now be placed on this list.
   2120 *
   2121 *		if the allocation of the new extent causes the iag to
   2122 *		have no free extent, the iag will be removed from the
   2123 *		ag extent free list.
   2124 *
   2125 *		if the iag has no free backed inodes, it will be placed
   2126 *		on the ag free inode list, since the addition of the new
   2127 *		extent will now cause it to have free inodes.
   2128 *
   2129 *		a careful update approach is used to provide consistency
   2130 *		(i.e. list consistency) in the face of updates to multiple
   2131 *		buffers.  under this approach, all required buffers are
   2132 *		obtained before making any updates and are held until all
   2133 *		updates are complete.
   2134 *
   2135 * PRE CONDITION: Already have buffer lock on iagp.  Already have AG lock on
   2136 *	this AG.  Must have read lock on imap inode.
   2137 *
   2138 * PARAMETERS:
   2139 *	imap	- pointer to inode map control structure.
   2140 *	iagp	- pointer to iag.
   2141 *	extno	- extent number.
   2142 *
   2143 * RETURN VALUES:
   2144 *	0	- success.
   2145 *	-ENOSPC	- insufficient disk resources.
   2146 *	-EIO	- i/o error.
   2147 */
   2148static int diNewExt(struct inomap * imap, struct iag * iagp, int extno)
   2149{
   2150	int agno, iagno, fwd, back, freei = 0, sword, rc;
   2151	struct iag *aiagp = NULL, *biagp = NULL, *ciagp = NULL;
   2152	struct metapage *amp, *bmp, *cmp, *dmp;
   2153	struct inode *ipimap;
   2154	s64 blkno, hint;
   2155	int i, j;
   2156	u32 mask;
   2157	ino_t ino;
   2158	struct dinode *dp;
   2159	struct jfs_sb_info *sbi;
   2160
   2161	/* better have free extents.
   2162	 */
   2163	if (!iagp->nfreeexts) {
   2164		jfs_error(imap->im_ipimap->i_sb, "no free extents\n");
   2165		return -EIO;
   2166	}
   2167
   2168	/* get the inode map inode.
   2169	 */
   2170	ipimap = imap->im_ipimap;
   2171	sbi = JFS_SBI(ipimap->i_sb);
   2172
   2173	amp = bmp = cmp = NULL;
   2174
   2175	/* get the ag and iag numbers for this iag.
   2176	 */
   2177	agno = BLKTOAG(le64_to_cpu(iagp->agstart), sbi);
   2178	iagno = le32_to_cpu(iagp->iagnum);
   2179
   2180	/* check if this is the last free extent within the
   2181	 * iag.  if so, the iag must be removed from the ag
   2182	 * free extent list, so get the iags preceding and
   2183	 * following the iag on this list.
   2184	 */
   2185	if (iagp->nfreeexts == cpu_to_le32(1)) {
   2186		if ((fwd = le32_to_cpu(iagp->extfreefwd)) >= 0) {
   2187			if ((rc = diIAGRead(imap, fwd, &amp)))
   2188				return (rc);
   2189			aiagp = (struct iag *) amp->data;
   2190		}
   2191
   2192		if ((back = le32_to_cpu(iagp->extfreeback)) >= 0) {
   2193			if ((rc = diIAGRead(imap, back, &bmp)))
   2194				goto error_out;
   2195			biagp = (struct iag *) bmp->data;
   2196		}
   2197	} else {
   2198		/* the iag has free extents.  if all extents are free
   2199		 * (as is the case for a newly allocated iag), the iag
   2200		 * must be added to the ag free extent list, so get
   2201		 * the iag at the head of the list in preparation for
   2202		 * adding this iag to this list.
   2203		 */
   2204		fwd = back = -1;
   2205		if (iagp->nfreeexts == cpu_to_le32(EXTSPERIAG)) {
   2206			if ((fwd = imap->im_agctl[agno].extfree) >= 0) {
   2207				if ((rc = diIAGRead(imap, fwd, &amp)))
   2208					goto error_out;
   2209				aiagp = (struct iag *) amp->data;
   2210			}
   2211		}
   2212	}
   2213
   2214	/* check if the iag has no free inodes.  if so, the iag
   2215	 * will have to be added to the ag free inode list, so get
   2216	 * the iag at the head of the list in preparation for
   2217	 * adding this iag to this list.  in doing this, we must
   2218	 * check if we already have the iag at the head of
   2219	 * the list in hand.
   2220	 */
   2221	if (iagp->nfreeinos == 0) {
   2222		freei = imap->im_agctl[agno].inofree;
   2223
   2224		if (freei >= 0) {
   2225			if (freei == fwd) {
   2226				ciagp = aiagp;
   2227			} else if (freei == back) {
   2228				ciagp = biagp;
   2229			} else {
   2230				if ((rc = diIAGRead(imap, freei, &cmp)))
   2231					goto error_out;
   2232				ciagp = (struct iag *) cmp->data;
   2233			}
   2234			if (ciagp == NULL) {
   2235				jfs_error(imap->im_ipimap->i_sb,
   2236					  "ciagp == NULL\n");
   2237				rc = -EIO;
   2238				goto error_out;
   2239			}
   2240		}
   2241	}
   2242
   2243	/* allocate disk space for the inode extent.
   2244	 */
   2245	if ((extno == 0) || (addressPXD(&iagp->inoext[extno - 1]) == 0))
   2246		hint = ((s64) agno << sbi->bmap->db_agl2size) - 1;
   2247	else
   2248		hint = addressPXD(&iagp->inoext[extno - 1]) +
   2249		    lengthPXD(&iagp->inoext[extno - 1]) - 1;
   2250
   2251	if ((rc = dbAlloc(ipimap, hint, (s64) imap->im_nbperiext, &blkno)))
   2252		goto error_out;
   2253
   2254	/* compute the inode number of the first inode within the
   2255	 * extent.
   2256	 */
   2257	ino = (iagno << L2INOSPERIAG) + (extno << L2INOSPEREXT);
   2258
   2259	/* initialize the inodes within the newly allocated extent a
   2260	 * page at a time.
   2261	 */
   2262	for (i = 0; i < imap->im_nbperiext; i += sbi->nbperpage) {
   2263		/* get a buffer for this page of disk inodes.
   2264		 */
   2265		dmp = get_metapage(ipimap, blkno + i, PSIZE, 1);
   2266		if (dmp == NULL) {
   2267			rc = -EIO;
   2268			goto error_out;
   2269		}
   2270		dp = (struct dinode *) dmp->data;
   2271
   2272		/* initialize the inode number, mode, link count and
   2273		 * inode extent address.
   2274		 */
   2275		for (j = 0; j < INOSPERPAGE; j++, dp++, ino++) {
   2276			dp->di_inostamp = cpu_to_le32(sbi->inostamp);
   2277			dp->di_number = cpu_to_le32(ino);
   2278			dp->di_fileset = cpu_to_le32(FILESYSTEM_I);
   2279			dp->di_mode = 0;
   2280			dp->di_nlink = 0;
   2281			PXDaddress(&(dp->di_ixpxd), blkno);
   2282			PXDlength(&(dp->di_ixpxd), imap->im_nbperiext);
   2283		}
   2284		write_metapage(dmp);
   2285	}
   2286
   2287	/* if this is the last free extent within the iag, remove the
   2288	 * iag from the ag free extent list.
   2289	 */
   2290	if (iagp->nfreeexts == cpu_to_le32(1)) {
   2291		if (fwd >= 0)
   2292			aiagp->extfreeback = iagp->extfreeback;
   2293
   2294		if (back >= 0)
   2295			biagp->extfreefwd = iagp->extfreefwd;
   2296		else
   2297			imap->im_agctl[agno].extfree =
   2298			    le32_to_cpu(iagp->extfreefwd);
   2299
   2300		iagp->extfreefwd = iagp->extfreeback = cpu_to_le32(-1);
   2301	} else {
   2302		/* if the iag has all free extents (newly allocated iag),
   2303		 * add the iag to the ag free extent list.
   2304		 */
   2305		if (iagp->nfreeexts == cpu_to_le32(EXTSPERIAG)) {
   2306			if (fwd >= 0)
   2307				aiagp->extfreeback = cpu_to_le32(iagno);
   2308
   2309			iagp->extfreefwd = cpu_to_le32(fwd);
   2310			iagp->extfreeback = cpu_to_le32(-1);
   2311			imap->im_agctl[agno].extfree = iagno;
   2312		}
   2313	}
   2314
   2315	/* if the iag has no free inodes, add the iag to the
   2316	 * ag free inode list.
   2317	 */
   2318	if (iagp->nfreeinos == 0) {
   2319		if (freei >= 0)
   2320			ciagp->inofreeback = cpu_to_le32(iagno);
   2321
   2322		iagp->inofreefwd =
   2323		    cpu_to_le32(imap->im_agctl[agno].inofree);
   2324		iagp->inofreeback = cpu_to_le32(-1);
   2325		imap->im_agctl[agno].inofree = iagno;
   2326	}
   2327
   2328	/* initialize the extent descriptor of the extent. */
   2329	PXDlength(&iagp->inoext[extno], imap->im_nbperiext);
   2330	PXDaddress(&iagp->inoext[extno], blkno);
   2331
   2332	/* initialize the working and persistent map of the extent.
   2333	 * the working map will be initialized such that
   2334	 * it indicates the first inode of the extent is allocated.
   2335	 */
   2336	iagp->wmap[extno] = cpu_to_le32(HIGHORDER);
   2337	iagp->pmap[extno] = 0;
   2338
   2339	/* update the free inode and free extent summary maps
   2340	 * for the extent to indicate the extent has free inodes
   2341	 * and no longer represents a free extent.
   2342	 */
   2343	sword = extno >> L2EXTSPERSUM;
   2344	mask = HIGHORDER >> (extno & (EXTSPERSUM - 1));
   2345	iagp->extsmap[sword] |= cpu_to_le32(mask);
   2346	iagp->inosmap[sword] &= cpu_to_le32(~mask);
   2347
   2348	/* update the free inode and free extent counts for the
   2349	 * iag.
   2350	 */
   2351	le32_add_cpu(&iagp->nfreeinos, (INOSPEREXT - 1));
   2352	le32_add_cpu(&iagp->nfreeexts, -1);
   2353
   2354	/* update the free and backed inode counts for the ag.
   2355	 */
   2356	imap->im_agctl[agno].numfree += (INOSPEREXT - 1);
   2357	imap->im_agctl[agno].numinos += INOSPEREXT;
   2358
   2359	/* update the free and backed inode counts for the inode map.
   2360	 */
   2361	atomic_add(INOSPEREXT - 1, &imap->im_numfree);
   2362	atomic_add(INOSPEREXT, &imap->im_numinos);
   2363
   2364	/* write the iags.
   2365	 */
   2366	if (amp)
   2367		write_metapage(amp);
   2368	if (bmp)
   2369		write_metapage(bmp);
   2370	if (cmp)
   2371		write_metapage(cmp);
   2372
   2373	return (0);
   2374
   2375      error_out:
   2376
   2377	/* release the iags.
   2378	 */
   2379	if (amp)
   2380		release_metapage(amp);
   2381	if (bmp)
   2382		release_metapage(bmp);
   2383	if (cmp)
   2384		release_metapage(cmp);
   2385
   2386	return (rc);
   2387}
   2388
   2389
   2390/*
   2391 * NAME:	diNewIAG(imap,iagnop,agno)
   2392 *
   2393 * FUNCTION:	allocate a new iag for an allocation group.
   2394 *
   2395 *		first tries to allocate the iag from the inode map
   2396 *		iagfree list:
   2397 *		if the list has free iags, the head of the list is removed
   2398 *		and returned to satisfy the request.
   2399 *		if the inode map's iag free list is empty, the inode map
   2400 *		is extended to hold a new iag. this new iag is initialized
   2401 *		and returned to satisfy the request.
   2402 *
   2403 * PARAMETERS:
   2404 *	imap	- pointer to inode map control structure.
   2405 *	iagnop	- pointer to an iag number set with the number of the
   2406 *		  newly allocated iag upon successful return.
   2407 *	agno	- allocation group number.
   2408 *	bpp	- Buffer pointer to be filled in with new IAG's buffer
   2409 *
   2410 * RETURN VALUES:
   2411 *	0	- success.
   2412 *	-ENOSPC	- insufficient disk resources.
   2413 *	-EIO	- i/o error.
   2414 *
   2415 * serialization:
   2416 *	AG lock held on entry/exit;
   2417 *	write lock on the map is held inside;
   2418 *	read lock on the map is held on successful completion;
   2419 *
   2420 * note: new iag transaction:
   2421 * . synchronously write iag;
   2422 * . write log of xtree and inode of imap;
   2423 * . commit;
   2424 * . synchronous write of xtree (right to left, bottom to top);
   2425 * . at start of logredo(): init in-memory imap with one additional iag page;
   2426 * . at end of logredo(): re-read imap inode to determine
   2427 *   new imap size;
   2428 */
   2429static int
   2430diNewIAG(struct inomap * imap, int *iagnop, int agno, struct metapage ** mpp)
   2431{
   2432	int rc;
   2433	int iagno, i, xlen;
   2434	struct inode *ipimap;
   2435	struct super_block *sb;
   2436	struct jfs_sb_info *sbi;
   2437	struct metapage *mp;
   2438	struct iag *iagp;
   2439	s64 xaddr = 0;
   2440	s64 blkno;
   2441	tid_t tid;
   2442	struct inode *iplist[1];
   2443
   2444	/* pick up pointers to the inode map and mount inodes */
   2445	ipimap = imap->im_ipimap;
   2446	sb = ipimap->i_sb;
   2447	sbi = JFS_SBI(sb);
   2448
   2449	/* acquire the free iag lock */
   2450	IAGFREE_LOCK(imap);
   2451
   2452	/* if there are any iags on the inode map free iag list,
   2453	 * allocate the iag from the head of the list.
   2454	 */
   2455	if (imap->im_freeiag >= 0) {
   2456		/* pick up the iag number at the head of the list */
   2457		iagno = imap->im_freeiag;
   2458
   2459		/* determine the logical block number of the iag */
   2460		blkno = IAGTOLBLK(iagno, sbi->l2nbperpage);
   2461	} else {
   2462		/* no free iags. the inode map will have to be extented
   2463		 * to include a new iag.
   2464		 */
   2465
   2466		/* acquire inode map lock */
   2467		IWRITE_LOCK(ipimap, RDWRLOCK_IMAP);
   2468
   2469		if (ipimap->i_size >> L2PSIZE != imap->im_nextiag + 1) {
   2470			IWRITE_UNLOCK(ipimap);
   2471			IAGFREE_UNLOCK(imap);
   2472			jfs_error(imap->im_ipimap->i_sb,
   2473				  "ipimap->i_size is wrong\n");
   2474			return -EIO;
   2475		}
   2476
   2477
   2478		/* get the next available iag number */
   2479		iagno = imap->im_nextiag;
   2480
   2481		/* make sure that we have not exceeded the maximum inode
   2482		 * number limit.
   2483		 */
   2484		if (iagno > (MAXIAGS - 1)) {
   2485			/* release the inode map lock */
   2486			IWRITE_UNLOCK(ipimap);
   2487
   2488			rc = -ENOSPC;
   2489			goto out;
   2490		}
   2491
   2492		/*
   2493		 * synchronously append new iag page.
   2494		 */
   2495		/* determine the logical address of iag page to append */
   2496		blkno = IAGTOLBLK(iagno, sbi->l2nbperpage);
   2497
   2498		/* Allocate extent for new iag page */
   2499		xlen = sbi->nbperpage;
   2500		if ((rc = dbAlloc(ipimap, 0, (s64) xlen, &xaddr))) {
   2501			/* release the inode map lock */
   2502			IWRITE_UNLOCK(ipimap);
   2503
   2504			goto out;
   2505		}
   2506
   2507		/*
   2508		 * start transaction of update of the inode map
   2509		 * addressing structure pointing to the new iag page;
   2510		 */
   2511		tid = txBegin(sb, COMMIT_FORCE);
   2512		mutex_lock(&JFS_IP(ipimap)->commit_mutex);
   2513
   2514		/* update the inode map addressing structure to point to it */
   2515		if ((rc =
   2516		     xtInsert(tid, ipimap, 0, blkno, xlen, &xaddr, 0))) {
   2517			txEnd(tid);
   2518			mutex_unlock(&JFS_IP(ipimap)->commit_mutex);
   2519			/* Free the blocks allocated for the iag since it was
   2520			 * not successfully added to the inode map
   2521			 */
   2522			dbFree(ipimap, xaddr, (s64) xlen);
   2523
   2524			/* release the inode map lock */
   2525			IWRITE_UNLOCK(ipimap);
   2526
   2527			goto out;
   2528		}
   2529
   2530		/* update the inode map's inode to reflect the extension */
   2531		ipimap->i_size += PSIZE;
   2532		inode_add_bytes(ipimap, PSIZE);
   2533
   2534		/* assign a buffer for the page */
   2535		mp = get_metapage(ipimap, blkno, PSIZE, 0);
   2536		if (!mp) {
   2537			/*
   2538			 * This is very unlikely since we just created the
   2539			 * extent, but let's try to handle it correctly
   2540			 */
   2541			xtTruncate(tid, ipimap, ipimap->i_size - PSIZE,
   2542				   COMMIT_PWMAP);
   2543
   2544			txAbort(tid, 0);
   2545			txEnd(tid);
   2546			mutex_unlock(&JFS_IP(ipimap)->commit_mutex);
   2547
   2548			/* release the inode map lock */
   2549			IWRITE_UNLOCK(ipimap);
   2550
   2551			rc = -EIO;
   2552			goto out;
   2553		}
   2554		iagp = (struct iag *) mp->data;
   2555
   2556		/* init the iag */
   2557		memset(iagp, 0, sizeof(struct iag));
   2558		iagp->iagnum = cpu_to_le32(iagno);
   2559		iagp->inofreefwd = iagp->inofreeback = cpu_to_le32(-1);
   2560		iagp->extfreefwd = iagp->extfreeback = cpu_to_le32(-1);
   2561		iagp->iagfree = cpu_to_le32(-1);
   2562		iagp->nfreeinos = 0;
   2563		iagp->nfreeexts = cpu_to_le32(EXTSPERIAG);
   2564
   2565		/* initialize the free inode summary map (free extent
   2566		 * summary map initialization handled by bzero).
   2567		 */
   2568		for (i = 0; i < SMAPSZ; i++)
   2569			iagp->inosmap[i] = cpu_to_le32(ONES);
   2570
   2571		/*
   2572		 * Write and sync the metapage
   2573		 */
   2574		flush_metapage(mp);
   2575
   2576		/*
   2577		 * txCommit(COMMIT_FORCE) will synchronously write address
   2578		 * index pages and inode after commit in careful update order
   2579		 * of address index pages (right to left, bottom up);
   2580		 */
   2581		iplist[0] = ipimap;
   2582		rc = txCommit(tid, 1, &iplist[0], COMMIT_FORCE);
   2583
   2584		txEnd(tid);
   2585		mutex_unlock(&JFS_IP(ipimap)->commit_mutex);
   2586
   2587		duplicateIXtree(sb, blkno, xlen, &xaddr);
   2588
   2589		/* update the next available iag number */
   2590		imap->im_nextiag += 1;
   2591
   2592		/* Add the iag to the iag free list so we don't lose the iag
   2593		 * if a failure happens now.
   2594		 */
   2595		imap->im_freeiag = iagno;
   2596
   2597		/* Until we have logredo working, we want the imap inode &
   2598		 * control page to be up to date.
   2599		 */
   2600		diSync(ipimap);
   2601
   2602		/* release the inode map lock */
   2603		IWRITE_UNLOCK(ipimap);
   2604	}
   2605
   2606	/* obtain read lock on map */
   2607	IREAD_LOCK(ipimap, RDWRLOCK_IMAP);
   2608
   2609	/* read the iag */
   2610	if ((rc = diIAGRead(imap, iagno, &mp))) {
   2611		IREAD_UNLOCK(ipimap);
   2612		rc = -EIO;
   2613		goto out;
   2614	}
   2615	iagp = (struct iag *) mp->data;
   2616
   2617	/* remove the iag from the iag free list */
   2618	imap->im_freeiag = le32_to_cpu(iagp->iagfree);
   2619	iagp->iagfree = cpu_to_le32(-1);
   2620
   2621	/* set the return iag number and buffer pointer */
   2622	*iagnop = iagno;
   2623	*mpp = mp;
   2624
   2625      out:
   2626	/* release the iag free lock */
   2627	IAGFREE_UNLOCK(imap);
   2628
   2629	return (rc);
   2630}
   2631
   2632/*
   2633 * NAME:	diIAGRead()
   2634 *
   2635 * FUNCTION:	get the buffer for the specified iag within a fileset
   2636 *		or aggregate inode map.
   2637 *
   2638 * PARAMETERS:
   2639 *	imap	- pointer to inode map control structure.
   2640 *	iagno	- iag number.
   2641 *	bpp	- point to buffer pointer to be filled in on successful
   2642 *		  exit.
   2643 *
   2644 * SERIALIZATION:
   2645 *	must have read lock on imap inode
   2646 *	(When called by diExtendFS, the filesystem is quiesced, therefore
   2647 *	 the read lock is unnecessary.)
   2648 *
   2649 * RETURN VALUES:
   2650 *	0	- success.
   2651 *	-EIO	- i/o error.
   2652 */
   2653static int diIAGRead(struct inomap * imap, int iagno, struct metapage ** mpp)
   2654{
   2655	struct inode *ipimap = imap->im_ipimap;
   2656	s64 blkno;
   2657
   2658	/* compute the logical block number of the iag. */
   2659	blkno = IAGTOLBLK(iagno, JFS_SBI(ipimap->i_sb)->l2nbperpage);
   2660
   2661	/* read the iag. */
   2662	*mpp = read_metapage(ipimap, blkno, PSIZE, 0);
   2663	if (*mpp == NULL) {
   2664		return -EIO;
   2665	}
   2666
   2667	return (0);
   2668}
   2669
   2670/*
   2671 * NAME:	diFindFree()
   2672 *
   2673 * FUNCTION:	find the first free bit in a word starting at
   2674 *		the specified bit position.
   2675 *
   2676 * PARAMETERS:
   2677 *	word	- word to be examined.
   2678 *	start	- starting bit position.
   2679 *
   2680 * RETURN VALUES:
   2681 *	bit position of first free bit in the word or 32 if
   2682 *	no free bits were found.
   2683 */
   2684static int diFindFree(u32 word, int start)
   2685{
   2686	int bitno;
   2687	assert(start < 32);
   2688	/* scan the word for the first free bit. */
   2689	for (word <<= start, bitno = start; bitno < 32;
   2690	     bitno++, word <<= 1) {
   2691		if ((word & HIGHORDER) == 0)
   2692			break;
   2693	}
   2694	return (bitno);
   2695}
   2696
   2697/*
   2698 * NAME:	diUpdatePMap()
   2699 *
   2700 * FUNCTION: Update the persistent map in an IAG for the allocation or
   2701 *	freeing of the specified inode.
   2702 *
   2703 * PRE CONDITIONS: Working map has already been updated for allocate.
   2704 *
   2705 * PARAMETERS:
   2706 *	ipimap	- Incore inode map inode
   2707 *	inum	- Number of inode to mark in permanent map
   2708 *	is_free	- If 'true' indicates inode should be marked freed, otherwise
   2709 *		  indicates inode should be marked allocated.
   2710 *
   2711 * RETURN VALUES:
   2712 *		0 for success
   2713 */
   2714int
   2715diUpdatePMap(struct inode *ipimap,
   2716	     unsigned long inum, bool is_free, struct tblock * tblk)
   2717{
   2718	int rc;
   2719	struct iag *iagp;
   2720	struct metapage *mp;
   2721	int iagno, ino, extno, bitno;
   2722	struct inomap *imap;
   2723	u32 mask;
   2724	struct jfs_log *log;
   2725	int lsn, difft, diffp;
   2726	unsigned long flags;
   2727
   2728	imap = JFS_IP(ipimap)->i_imap;
   2729	/* get the iag number containing the inode */
   2730	iagno = INOTOIAG(inum);
   2731	/* make sure that the iag is contained within the map */
   2732	if (iagno >= imap->im_nextiag) {
   2733		jfs_error(ipimap->i_sb, "the iag is outside the map\n");
   2734		return -EIO;
   2735	}
   2736	/* read the iag */
   2737	IREAD_LOCK(ipimap, RDWRLOCK_IMAP);
   2738	rc = diIAGRead(imap, iagno, &mp);
   2739	IREAD_UNLOCK(ipimap);
   2740	if (rc)
   2741		return (rc);
   2742	metapage_wait_for_io(mp);
   2743	iagp = (struct iag *) mp->data;
   2744	/* get the inode number and extent number of the inode within
   2745	 * the iag and the inode number within the extent.
   2746	 */
   2747	ino = inum & (INOSPERIAG - 1);
   2748	extno = ino >> L2INOSPEREXT;
   2749	bitno = ino & (INOSPEREXT - 1);
   2750	mask = HIGHORDER >> bitno;
   2751	/*
   2752	 * mark the inode free in persistent map:
   2753	 */
   2754	if (is_free) {
   2755		/* The inode should have been allocated both in working
   2756		 * map and in persistent map;
   2757		 * the inode will be freed from working map at the release
   2758		 * of last reference release;
   2759		 */
   2760		if (!(le32_to_cpu(iagp->wmap[extno]) & mask)) {
   2761			jfs_error(ipimap->i_sb,
   2762				  "inode %ld not marked as allocated in wmap!\n",
   2763				  inum);
   2764		}
   2765		if (!(le32_to_cpu(iagp->pmap[extno]) & mask)) {
   2766			jfs_error(ipimap->i_sb,
   2767				  "inode %ld not marked as allocated in pmap!\n",
   2768				  inum);
   2769		}
   2770		/* update the bitmap for the extent of the freed inode */
   2771		iagp->pmap[extno] &= cpu_to_le32(~mask);
   2772	}
   2773	/*
   2774	 * mark the inode allocated in persistent map:
   2775	 */
   2776	else {
   2777		/* The inode should be already allocated in the working map
   2778		 * and should be free in persistent map;
   2779		 */
   2780		if (!(le32_to_cpu(iagp->wmap[extno]) & mask)) {
   2781			release_metapage(mp);
   2782			jfs_error(ipimap->i_sb,
   2783				  "the inode is not allocated in the working map\n");
   2784			return -EIO;
   2785		}
   2786		if ((le32_to_cpu(iagp->pmap[extno]) & mask) != 0) {
   2787			release_metapage(mp);
   2788			jfs_error(ipimap->i_sb,
   2789				  "the inode is not free in the persistent map\n");
   2790			return -EIO;
   2791		}
   2792		/* update the bitmap for the extent of the allocated inode */
   2793		iagp->pmap[extno] |= cpu_to_le32(mask);
   2794	}
   2795	/*
   2796	 * update iag lsn
   2797	 */
   2798	lsn = tblk->lsn;
   2799	log = JFS_SBI(tblk->sb)->log;
   2800	LOGSYNC_LOCK(log, flags);
   2801	if (mp->lsn != 0) {
   2802		/* inherit older/smaller lsn */
   2803		logdiff(difft, lsn, log);
   2804		logdiff(diffp, mp->lsn, log);
   2805		if (difft < diffp) {
   2806			mp->lsn = lsn;
   2807			/* move mp after tblock in logsync list */
   2808			list_move(&mp->synclist, &tblk->synclist);
   2809		}
   2810		/* inherit younger/larger clsn */
   2811		assert(mp->clsn);
   2812		logdiff(difft, tblk->clsn, log);
   2813		logdiff(diffp, mp->clsn, log);
   2814		if (difft > diffp)
   2815			mp->clsn = tblk->clsn;
   2816	} else {
   2817		mp->log = log;
   2818		mp->lsn = lsn;
   2819		/* insert mp after tblock in logsync list */
   2820		log->count++;
   2821		list_add(&mp->synclist, &tblk->synclist);
   2822		mp->clsn = tblk->clsn;
   2823	}
   2824	LOGSYNC_UNLOCK(log, flags);
   2825	write_metapage(mp);
   2826	return (0);
   2827}
   2828
   2829/*
   2830 *	diExtendFS()
   2831 *
   2832 * function: update imap for extendfs();
   2833 *
   2834 * note: AG size has been increased s.t. each k old contiguous AGs are
   2835 * coalesced into a new AG;
   2836 */
   2837int diExtendFS(struct inode *ipimap, struct inode *ipbmap)
   2838{
   2839	int rc, rcx = 0;
   2840	struct inomap *imap = JFS_IP(ipimap)->i_imap;
   2841	struct iag *iagp = NULL, *hiagp = NULL;
   2842	struct bmap *mp = JFS_SBI(ipbmap->i_sb)->bmap;
   2843	struct metapage *bp, *hbp;
   2844	int i, n, head;
   2845	int numinos, xnuminos = 0, xnumfree = 0;
   2846	s64 agstart;
   2847
   2848	jfs_info("diExtendFS: nextiag:%d numinos:%d numfree:%d",
   2849		   imap->im_nextiag, atomic_read(&imap->im_numinos),
   2850		   atomic_read(&imap->im_numfree));
   2851
   2852	/*
   2853	 *	reconstruct imap
   2854	 *
   2855	 * coalesce contiguous k (newAGSize/oldAGSize) AGs;
   2856	 * i.e., (AGi, ..., AGj) where i = k*n and j = k*(n+1) - 1 to AGn;
   2857	 * note: new AG size = old AG size * (2**x).
   2858	 */
   2859
   2860	/* init per AG control information im_agctl[] */
   2861	for (i = 0; i < MAXAG; i++) {
   2862		imap->im_agctl[i].inofree = -1;
   2863		imap->im_agctl[i].extfree = -1;
   2864		imap->im_agctl[i].numinos = 0;	/* number of backed inodes */
   2865		imap->im_agctl[i].numfree = 0;	/* number of free backed inodes */
   2866	}
   2867
   2868	/*
   2869	 *	process each iag page of the map.
   2870	 *
   2871	 * rebuild AG Free Inode List, AG Free Inode Extent List;
   2872	 */
   2873	for (i = 0; i < imap->im_nextiag; i++) {
   2874		if ((rc = diIAGRead(imap, i, &bp))) {
   2875			rcx = rc;
   2876			continue;
   2877		}
   2878		iagp = (struct iag *) bp->data;
   2879		if (le32_to_cpu(iagp->iagnum) != i) {
   2880			release_metapage(bp);
   2881			jfs_error(ipimap->i_sb, "unexpected value of iagnum\n");
   2882			return -EIO;
   2883		}
   2884
   2885		/* leave free iag in the free iag list */
   2886		if (iagp->nfreeexts == cpu_to_le32(EXTSPERIAG)) {
   2887			release_metapage(bp);
   2888			continue;
   2889		}
   2890
   2891		agstart = le64_to_cpu(iagp->agstart);
   2892		n = agstart >> mp->db_agl2size;
   2893		iagp->agstart = cpu_to_le64((s64)n << mp->db_agl2size);
   2894
   2895		/* compute backed inodes */
   2896		numinos = (EXTSPERIAG - le32_to_cpu(iagp->nfreeexts))
   2897		    << L2INOSPEREXT;
   2898		if (numinos > 0) {
   2899			/* merge AG backed inodes */
   2900			imap->im_agctl[n].numinos += numinos;
   2901			xnuminos += numinos;
   2902		}
   2903
   2904		/* if any backed free inodes, insert at AG free inode list */
   2905		if ((int) le32_to_cpu(iagp->nfreeinos) > 0) {
   2906			if ((head = imap->im_agctl[n].inofree) == -1) {
   2907				iagp->inofreefwd = cpu_to_le32(-1);
   2908				iagp->inofreeback = cpu_to_le32(-1);
   2909			} else {
   2910				if ((rc = diIAGRead(imap, head, &hbp))) {
   2911					rcx = rc;
   2912					goto nextiag;
   2913				}
   2914				hiagp = (struct iag *) hbp->data;
   2915				hiagp->inofreeback = iagp->iagnum;
   2916				iagp->inofreefwd = cpu_to_le32(head);
   2917				iagp->inofreeback = cpu_to_le32(-1);
   2918				write_metapage(hbp);
   2919			}
   2920
   2921			imap->im_agctl[n].inofree =
   2922			    le32_to_cpu(iagp->iagnum);
   2923
   2924			/* merge AG backed free inodes */
   2925			imap->im_agctl[n].numfree +=
   2926			    le32_to_cpu(iagp->nfreeinos);
   2927			xnumfree += le32_to_cpu(iagp->nfreeinos);
   2928		}
   2929
   2930		/* if any free extents, insert at AG free extent list */
   2931		if (le32_to_cpu(iagp->nfreeexts) > 0) {
   2932			if ((head = imap->im_agctl[n].extfree) == -1) {
   2933				iagp->extfreefwd = cpu_to_le32(-1);
   2934				iagp->extfreeback = cpu_to_le32(-1);
   2935			} else {
   2936				if ((rc = diIAGRead(imap, head, &hbp))) {
   2937					rcx = rc;
   2938					goto nextiag;
   2939				}
   2940				hiagp = (struct iag *) hbp->data;
   2941				hiagp->extfreeback = iagp->iagnum;
   2942				iagp->extfreefwd = cpu_to_le32(head);
   2943				iagp->extfreeback = cpu_to_le32(-1);
   2944				write_metapage(hbp);
   2945			}
   2946
   2947			imap->im_agctl[n].extfree =
   2948			    le32_to_cpu(iagp->iagnum);
   2949		}
   2950
   2951	      nextiag:
   2952		write_metapage(bp);
   2953	}
   2954
   2955	if (xnuminos != atomic_read(&imap->im_numinos) ||
   2956	    xnumfree != atomic_read(&imap->im_numfree)) {
   2957		jfs_error(ipimap->i_sb, "numinos or numfree incorrect\n");
   2958		return -EIO;
   2959	}
   2960
   2961	return rcx;
   2962}
   2963
   2964
   2965/*
   2966 *	duplicateIXtree()
   2967 *
   2968 * serialization: IWRITE_LOCK held on entry/exit
   2969 *
   2970 * note: shadow page with regular inode (rel.2);
   2971 */
   2972static void duplicateIXtree(struct super_block *sb, s64 blkno,
   2973			    int xlen, s64 *xaddr)
   2974{
   2975	struct jfs_superblock *j_sb;
   2976	struct buffer_head *bh;
   2977	struct inode *ip;
   2978	tid_t tid;
   2979
   2980	/* if AIT2 ipmap2 is bad, do not try to update it */
   2981	if (JFS_SBI(sb)->mntflag & JFS_BAD_SAIT)	/* s_flag */
   2982		return;
   2983	ip = diReadSpecial(sb, FILESYSTEM_I, 1);
   2984	if (ip == NULL) {
   2985		JFS_SBI(sb)->mntflag |= JFS_BAD_SAIT;
   2986		if (readSuper(sb, &bh))
   2987			return;
   2988		j_sb = (struct jfs_superblock *)bh->b_data;
   2989		j_sb->s_flag |= cpu_to_le32(JFS_BAD_SAIT);
   2990
   2991		mark_buffer_dirty(bh);
   2992		sync_dirty_buffer(bh);
   2993		brelse(bh);
   2994		return;
   2995	}
   2996
   2997	/* start transaction */
   2998	tid = txBegin(sb, COMMIT_FORCE);
   2999	/* update the inode map addressing structure to point to it */
   3000	if (xtInsert(tid, ip, 0, blkno, xlen, xaddr, 0)) {
   3001		JFS_SBI(sb)->mntflag |= JFS_BAD_SAIT;
   3002		txAbort(tid, 1);
   3003		goto cleanup;
   3004
   3005	}
   3006	/* update the inode map's inode to reflect the extension */
   3007	ip->i_size += PSIZE;
   3008	inode_add_bytes(ip, PSIZE);
   3009	txCommit(tid, 1, &ip, COMMIT_FORCE);
   3010      cleanup:
   3011	txEnd(tid);
   3012	diFreeSpecial(ip);
   3013}
   3014
   3015/*
   3016 * NAME:	copy_from_dinode()
   3017 *
   3018 * FUNCTION:	Copies inode info from disk inode to in-memory inode
   3019 *
   3020 * RETURN VALUES:
   3021 *	0	- success
   3022 *	-ENOMEM	- insufficient memory
   3023 */
   3024static int copy_from_dinode(struct dinode * dip, struct inode *ip)
   3025{
   3026	struct jfs_inode_info *jfs_ip = JFS_IP(ip);
   3027	struct jfs_sb_info *sbi = JFS_SBI(ip->i_sb);
   3028
   3029	jfs_ip->fileset = le32_to_cpu(dip->di_fileset);
   3030	jfs_ip->mode2 = le32_to_cpu(dip->di_mode);
   3031	jfs_set_inode_flags(ip);
   3032
   3033	ip->i_mode = le32_to_cpu(dip->di_mode) & 0xffff;
   3034	if (sbi->umask != -1) {
   3035		ip->i_mode = (ip->i_mode & ~0777) | (0777 & ~sbi->umask);
   3036		/* For directories, add x permission if r is allowed by umask */
   3037		if (S_ISDIR(ip->i_mode)) {
   3038			if (ip->i_mode & 0400)
   3039				ip->i_mode |= 0100;
   3040			if (ip->i_mode & 0040)
   3041				ip->i_mode |= 0010;
   3042			if (ip->i_mode & 0004)
   3043				ip->i_mode |= 0001;
   3044		}
   3045	}
   3046	set_nlink(ip, le32_to_cpu(dip->di_nlink));
   3047
   3048	jfs_ip->saved_uid = make_kuid(&init_user_ns, le32_to_cpu(dip->di_uid));
   3049	if (!uid_valid(sbi->uid))
   3050		ip->i_uid = jfs_ip->saved_uid;
   3051	else {
   3052		ip->i_uid = sbi->uid;
   3053	}
   3054
   3055	jfs_ip->saved_gid = make_kgid(&init_user_ns, le32_to_cpu(dip->di_gid));
   3056	if (!gid_valid(sbi->gid))
   3057		ip->i_gid = jfs_ip->saved_gid;
   3058	else {
   3059		ip->i_gid = sbi->gid;
   3060	}
   3061
   3062	ip->i_size = le64_to_cpu(dip->di_size);
   3063	ip->i_atime.tv_sec = le32_to_cpu(dip->di_atime.tv_sec);
   3064	ip->i_atime.tv_nsec = le32_to_cpu(dip->di_atime.tv_nsec);
   3065	ip->i_mtime.tv_sec = le32_to_cpu(dip->di_mtime.tv_sec);
   3066	ip->i_mtime.tv_nsec = le32_to_cpu(dip->di_mtime.tv_nsec);
   3067	ip->i_ctime.tv_sec = le32_to_cpu(dip->di_ctime.tv_sec);
   3068	ip->i_ctime.tv_nsec = le32_to_cpu(dip->di_ctime.tv_nsec);
   3069	ip->i_blocks = LBLK2PBLK(ip->i_sb, le64_to_cpu(dip->di_nblocks));
   3070	ip->i_generation = le32_to_cpu(dip->di_gen);
   3071
   3072	jfs_ip->ixpxd = dip->di_ixpxd;	/* in-memory pxd's are little-endian */
   3073	jfs_ip->acl = dip->di_acl;	/* as are dxd's */
   3074	jfs_ip->ea = dip->di_ea;
   3075	jfs_ip->next_index = le32_to_cpu(dip->di_next_index);
   3076	jfs_ip->otime = le32_to_cpu(dip->di_otime.tv_sec);
   3077	jfs_ip->acltype = le32_to_cpu(dip->di_acltype);
   3078
   3079	if (S_ISCHR(ip->i_mode) || S_ISBLK(ip->i_mode)) {
   3080		jfs_ip->dev = le32_to_cpu(dip->di_rdev);
   3081		ip->i_rdev = new_decode_dev(jfs_ip->dev);
   3082	}
   3083
   3084	if (S_ISDIR(ip->i_mode)) {
   3085		memcpy(&jfs_ip->u.dir, &dip->u._dir, 384);
   3086	} else if (S_ISREG(ip->i_mode) || S_ISLNK(ip->i_mode)) {
   3087		memcpy(&jfs_ip->i_xtroot, &dip->di_xtroot, 288);
   3088	} else
   3089		memcpy(&jfs_ip->i_inline_ea, &dip->di_inlineea, 128);
   3090
   3091	/* Zero the in-memory-only stuff */
   3092	jfs_ip->cflag = 0;
   3093	jfs_ip->btindex = 0;
   3094	jfs_ip->btorder = 0;
   3095	jfs_ip->bxflag = 0;
   3096	jfs_ip->blid = 0;
   3097	jfs_ip->atlhead = 0;
   3098	jfs_ip->atltail = 0;
   3099	jfs_ip->xtlid = 0;
   3100	return (0);
   3101}
   3102
   3103/*
   3104 * NAME:	copy_to_dinode()
   3105 *
   3106 * FUNCTION:	Copies inode info from in-memory inode to disk inode
   3107 */
   3108static void copy_to_dinode(struct dinode * dip, struct inode *ip)
   3109{
   3110	struct jfs_inode_info *jfs_ip = JFS_IP(ip);
   3111	struct jfs_sb_info *sbi = JFS_SBI(ip->i_sb);
   3112
   3113	dip->di_fileset = cpu_to_le32(jfs_ip->fileset);
   3114	dip->di_inostamp = cpu_to_le32(sbi->inostamp);
   3115	dip->di_number = cpu_to_le32(ip->i_ino);
   3116	dip->di_gen = cpu_to_le32(ip->i_generation);
   3117	dip->di_size = cpu_to_le64(ip->i_size);
   3118	dip->di_nblocks = cpu_to_le64(PBLK2LBLK(ip->i_sb, ip->i_blocks));
   3119	dip->di_nlink = cpu_to_le32(ip->i_nlink);
   3120	if (!uid_valid(sbi->uid))
   3121		dip->di_uid = cpu_to_le32(i_uid_read(ip));
   3122	else
   3123		dip->di_uid =cpu_to_le32(from_kuid(&init_user_ns,
   3124						   jfs_ip->saved_uid));
   3125	if (!gid_valid(sbi->gid))
   3126		dip->di_gid = cpu_to_le32(i_gid_read(ip));
   3127	else
   3128		dip->di_gid = cpu_to_le32(from_kgid(&init_user_ns,
   3129						    jfs_ip->saved_gid));
   3130	/*
   3131	 * mode2 is only needed for storing the higher order bits.
   3132	 * Trust i_mode for the lower order ones
   3133	 */
   3134	if (sbi->umask == -1)
   3135		dip->di_mode = cpu_to_le32((jfs_ip->mode2 & 0xffff0000) |
   3136					   ip->i_mode);
   3137	else /* Leave the original permissions alone */
   3138		dip->di_mode = cpu_to_le32(jfs_ip->mode2);
   3139
   3140	dip->di_atime.tv_sec = cpu_to_le32(ip->i_atime.tv_sec);
   3141	dip->di_atime.tv_nsec = cpu_to_le32(ip->i_atime.tv_nsec);
   3142	dip->di_ctime.tv_sec = cpu_to_le32(ip->i_ctime.tv_sec);
   3143	dip->di_ctime.tv_nsec = cpu_to_le32(ip->i_ctime.tv_nsec);
   3144	dip->di_mtime.tv_sec = cpu_to_le32(ip->i_mtime.tv_sec);
   3145	dip->di_mtime.tv_nsec = cpu_to_le32(ip->i_mtime.tv_nsec);
   3146	dip->di_ixpxd = jfs_ip->ixpxd;	/* in-memory pxd's are little-endian */
   3147	dip->di_acl = jfs_ip->acl;	/* as are dxd's */
   3148	dip->di_ea = jfs_ip->ea;
   3149	dip->di_next_index = cpu_to_le32(jfs_ip->next_index);
   3150	dip->di_otime.tv_sec = cpu_to_le32(jfs_ip->otime);
   3151	dip->di_otime.tv_nsec = 0;
   3152	dip->di_acltype = cpu_to_le32(jfs_ip->acltype);
   3153	if (S_ISCHR(ip->i_mode) || S_ISBLK(ip->i_mode))
   3154		dip->di_rdev = cpu_to_le32(jfs_ip->dev);
   3155}