cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

jfs_txnmgr.c (74623B)


      1// SPDX-License-Identifier: GPL-2.0-or-later
      2/*
      3 *   Copyright (C) International Business Machines Corp., 2000-2005
      4 *   Portions Copyright (C) Christoph Hellwig, 2001-2002
      5 */
      6
      7/*
      8 *	jfs_txnmgr.c: transaction manager
      9 *
     10 * notes:
     11 * transaction starts with txBegin() and ends with txCommit()
     12 * or txAbort().
     13 *
     14 * tlock is acquired at the time of update;
     15 * (obviate scan at commit time for xtree and dtree)
     16 * tlock and mp points to each other;
     17 * (no hashlist for mp -> tlock).
     18 *
     19 * special cases:
     20 * tlock on in-memory inode:
     21 * in-place tlock in the in-memory inode itself;
     22 * converted to page lock by iWrite() at commit time.
     23 *
     24 * tlock during write()/mmap() under anonymous transaction (tid = 0):
     25 * transferred (?) to transaction at commit time.
     26 *
     27 * use the page itself to update allocation maps
     28 * (obviate intermediate replication of allocation/deallocation data)
     29 * hold on to mp+lock thru update of maps
     30 */
     31
     32#include <linux/fs.h>
     33#include <linux/vmalloc.h>
     34#include <linux/completion.h>
     35#include <linux/freezer.h>
     36#include <linux/module.h>
     37#include <linux/moduleparam.h>
     38#include <linux/kthread.h>
     39#include <linux/seq_file.h>
     40#include "jfs_incore.h"
     41#include "jfs_inode.h"
     42#include "jfs_filsys.h"
     43#include "jfs_metapage.h"
     44#include "jfs_dinode.h"
     45#include "jfs_imap.h"
     46#include "jfs_dmap.h"
     47#include "jfs_superblock.h"
     48#include "jfs_debug.h"
     49
     50/*
     51 *	transaction management structures
     52 */
     53static struct {
     54	int freetid;		/* index of a free tid structure */
     55	int freelock;		/* index first free lock word */
     56	wait_queue_head_t freewait;	/* eventlist of free tblock */
     57	wait_queue_head_t freelockwait;	/* eventlist of free tlock */
     58	wait_queue_head_t lowlockwait;	/* eventlist of ample tlocks */
     59	int tlocksInUse;	/* Number of tlocks in use */
     60	spinlock_t LazyLock;	/* synchronize sync_queue & unlock_queue */
     61/*	struct tblock *sync_queue; * Transactions waiting for data sync */
     62	struct list_head unlock_queue;	/* Txns waiting to be released */
     63	struct list_head anon_list;	/* inodes having anonymous txns */
     64	struct list_head anon_list2;	/* inodes having anonymous txns
     65					   that couldn't be sync'ed */
     66} TxAnchor;
     67
     68int jfs_tlocks_low;		/* Indicates low number of available tlocks */
     69
     70#ifdef CONFIG_JFS_STATISTICS
     71static struct {
     72	uint txBegin;
     73	uint txBegin_barrier;
     74	uint txBegin_lockslow;
     75	uint txBegin_freetid;
     76	uint txBeginAnon;
     77	uint txBeginAnon_barrier;
     78	uint txBeginAnon_lockslow;
     79	uint txLockAlloc;
     80	uint txLockAlloc_freelock;
     81} TxStat;
     82#endif
     83
     84static int nTxBlock = -1;	/* number of transaction blocks */
     85module_param(nTxBlock, int, 0);
     86MODULE_PARM_DESC(nTxBlock,
     87		 "Number of transaction blocks (max:65536)");
     88
     89static int nTxLock = -1;	/* number of transaction locks */
     90module_param(nTxLock, int, 0);
     91MODULE_PARM_DESC(nTxLock,
     92		 "Number of transaction locks (max:65536)");
     93
     94struct tblock *TxBlock;	/* transaction block table */
     95static int TxLockLWM;	/* Low water mark for number of txLocks used */
     96static int TxLockHWM;	/* High water mark for number of txLocks used */
     97static int TxLockVHWM;	/* Very High water mark */
     98struct tlock *TxLock;	/* transaction lock table */
     99
    100/*
    101 *	transaction management lock
    102 */
    103static DEFINE_SPINLOCK(jfsTxnLock);
    104
    105#define TXN_LOCK()		spin_lock(&jfsTxnLock)
    106#define TXN_UNLOCK()		spin_unlock(&jfsTxnLock)
    107
    108#define LAZY_LOCK_INIT()	spin_lock_init(&TxAnchor.LazyLock)
    109#define LAZY_LOCK(flags)	spin_lock_irqsave(&TxAnchor.LazyLock, flags)
    110#define LAZY_UNLOCK(flags) spin_unlock_irqrestore(&TxAnchor.LazyLock, flags)
    111
    112static DECLARE_WAIT_QUEUE_HEAD(jfs_commit_thread_wait);
    113static int jfs_commit_thread_waking;
    114
    115/*
    116 * Retry logic exist outside these macros to protect from spurrious wakeups.
    117 */
    118static inline void TXN_SLEEP_DROP_LOCK(wait_queue_head_t * event)
    119{
    120	DECLARE_WAITQUEUE(wait, current);
    121
    122	add_wait_queue(event, &wait);
    123	set_current_state(TASK_UNINTERRUPTIBLE);
    124	TXN_UNLOCK();
    125	io_schedule();
    126	remove_wait_queue(event, &wait);
    127}
    128
    129#define TXN_SLEEP(event)\
    130{\
    131	TXN_SLEEP_DROP_LOCK(event);\
    132	TXN_LOCK();\
    133}
    134
    135#define TXN_WAKEUP(event) wake_up_all(event)
    136
    137/*
    138 *	statistics
    139 */
    140static struct {
    141	tid_t maxtid;		/* 4: biggest tid ever used */
    142	lid_t maxlid;		/* 4: biggest lid ever used */
    143	int ntid;		/* 4: # of transactions performed */
    144	int nlid;		/* 4: # of tlocks acquired */
    145	int waitlock;		/* 4: # of tlock wait */
    146} stattx;
    147
    148/*
    149 * forward references
    150 */
    151static void diLog(struct jfs_log *log, struct tblock *tblk, struct lrd *lrd,
    152		struct tlock *tlck, struct commit *cd);
    153static void dataLog(struct jfs_log *log, struct tblock *tblk, struct lrd *lrd,
    154		struct tlock *tlck);
    155static void dtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
    156		struct tlock * tlck);
    157static void mapLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
    158		struct tlock * tlck);
    159static void txAllocPMap(struct inode *ip, struct maplock * maplock,
    160		struct tblock * tblk);
    161static void txForce(struct tblock * tblk);
    162static void txLog(struct jfs_log *log, struct tblock *tblk,
    163		struct commit *cd);
    164static void txUpdateMap(struct tblock * tblk);
    165static void txRelease(struct tblock * tblk);
    166static void xtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
    167	   struct tlock * tlck);
    168static void LogSyncRelease(struct metapage * mp);
    169
    170/*
    171 *		transaction block/lock management
    172 *		---------------------------------
    173 */
    174
    175/*
    176 * Get a transaction lock from the free list.  If the number in use is
    177 * greater than the high water mark, wake up the sync daemon.  This should
    178 * free some anonymous transaction locks.  (TXN_LOCK must be held.)
    179 */
    180static lid_t txLockAlloc(void)
    181{
    182	lid_t lid;
    183
    184	INCREMENT(TxStat.txLockAlloc);
    185	if (!TxAnchor.freelock) {
    186		INCREMENT(TxStat.txLockAlloc_freelock);
    187	}
    188
    189	while (!(lid = TxAnchor.freelock))
    190		TXN_SLEEP(&TxAnchor.freelockwait);
    191	TxAnchor.freelock = TxLock[lid].next;
    192	HIGHWATERMARK(stattx.maxlid, lid);
    193	if ((++TxAnchor.tlocksInUse > TxLockHWM) && (jfs_tlocks_low == 0)) {
    194		jfs_info("txLockAlloc tlocks low");
    195		jfs_tlocks_low = 1;
    196		wake_up_process(jfsSyncThread);
    197	}
    198
    199	return lid;
    200}
    201
    202static void txLockFree(lid_t lid)
    203{
    204	TxLock[lid].tid = 0;
    205	TxLock[lid].next = TxAnchor.freelock;
    206	TxAnchor.freelock = lid;
    207	TxAnchor.tlocksInUse--;
    208	if (jfs_tlocks_low && (TxAnchor.tlocksInUse < TxLockLWM)) {
    209		jfs_info("txLockFree jfs_tlocks_low no more");
    210		jfs_tlocks_low = 0;
    211		TXN_WAKEUP(&TxAnchor.lowlockwait);
    212	}
    213	TXN_WAKEUP(&TxAnchor.freelockwait);
    214}
    215
    216/*
    217 * NAME:	txInit()
    218 *
    219 * FUNCTION:	initialize transaction management structures
    220 *
    221 * RETURN:
    222 *
    223 * serialization: single thread at jfs_init()
    224 */
    225int txInit(void)
    226{
    227	int k, size;
    228	struct sysinfo si;
    229
    230	/* Set defaults for nTxLock and nTxBlock if unset */
    231
    232	if (nTxLock == -1) {
    233		if (nTxBlock == -1) {
    234			/* Base default on memory size */
    235			si_meminfo(&si);
    236			if (si.totalram > (256 * 1024)) /* 1 GB */
    237				nTxLock = 64 * 1024;
    238			else
    239				nTxLock = si.totalram >> 2;
    240		} else if (nTxBlock > (8 * 1024))
    241			nTxLock = 64 * 1024;
    242		else
    243			nTxLock = nTxBlock << 3;
    244	}
    245	if (nTxBlock == -1)
    246		nTxBlock = nTxLock >> 3;
    247
    248	/* Verify tunable parameters */
    249	if (nTxBlock < 16)
    250		nTxBlock = 16;	/* No one should set it this low */
    251	if (nTxBlock > 65536)
    252		nTxBlock = 65536;
    253	if (nTxLock < 256)
    254		nTxLock = 256;	/* No one should set it this low */
    255	if (nTxLock > 65536)
    256		nTxLock = 65536;
    257
    258	printk(KERN_INFO "JFS: nTxBlock = %d, nTxLock = %d\n",
    259	       nTxBlock, nTxLock);
    260	/*
    261	 * initialize transaction block (tblock) table
    262	 *
    263	 * transaction id (tid) = tblock index
    264	 * tid = 0 is reserved.
    265	 */
    266	TxLockLWM = (nTxLock * 4) / 10;
    267	TxLockHWM = (nTxLock * 7) / 10;
    268	TxLockVHWM = (nTxLock * 8) / 10;
    269
    270	size = sizeof(struct tblock) * nTxBlock;
    271	TxBlock = vmalloc(size);
    272	if (TxBlock == NULL)
    273		return -ENOMEM;
    274
    275	for (k = 1; k < nTxBlock - 1; k++) {
    276		TxBlock[k].next = k + 1;
    277		init_waitqueue_head(&TxBlock[k].gcwait);
    278		init_waitqueue_head(&TxBlock[k].waitor);
    279	}
    280	TxBlock[k].next = 0;
    281	init_waitqueue_head(&TxBlock[k].gcwait);
    282	init_waitqueue_head(&TxBlock[k].waitor);
    283
    284	TxAnchor.freetid = 1;
    285	init_waitqueue_head(&TxAnchor.freewait);
    286
    287	stattx.maxtid = 1;	/* statistics */
    288
    289	/*
    290	 * initialize transaction lock (tlock) table
    291	 *
    292	 * transaction lock id = tlock index
    293	 * tlock id = 0 is reserved.
    294	 */
    295	size = sizeof(struct tlock) * nTxLock;
    296	TxLock = vmalloc(size);
    297	if (TxLock == NULL) {
    298		vfree(TxBlock);
    299		return -ENOMEM;
    300	}
    301
    302	/* initialize tlock table */
    303	for (k = 1; k < nTxLock - 1; k++)
    304		TxLock[k].next = k + 1;
    305	TxLock[k].next = 0;
    306	init_waitqueue_head(&TxAnchor.freelockwait);
    307	init_waitqueue_head(&TxAnchor.lowlockwait);
    308
    309	TxAnchor.freelock = 1;
    310	TxAnchor.tlocksInUse = 0;
    311	INIT_LIST_HEAD(&TxAnchor.anon_list);
    312	INIT_LIST_HEAD(&TxAnchor.anon_list2);
    313
    314	LAZY_LOCK_INIT();
    315	INIT_LIST_HEAD(&TxAnchor.unlock_queue);
    316
    317	stattx.maxlid = 1;	/* statistics */
    318
    319	return 0;
    320}
    321
    322/*
    323 * NAME:	txExit()
    324 *
    325 * FUNCTION:	clean up when module is unloaded
    326 */
    327void txExit(void)
    328{
    329	vfree(TxLock);
    330	TxLock = NULL;
    331	vfree(TxBlock);
    332	TxBlock = NULL;
    333}
    334
    335/*
    336 * NAME:	txBegin()
    337 *
    338 * FUNCTION:	start a transaction.
    339 *
    340 * PARAMETER:	sb	- superblock
    341 *		flag	- force for nested tx;
    342 *
    343 * RETURN:	tid	- transaction id
    344 *
    345 * note: flag force allows to start tx for nested tx
    346 * to prevent deadlock on logsync barrier;
    347 */
    348tid_t txBegin(struct super_block *sb, int flag)
    349{
    350	tid_t t;
    351	struct tblock *tblk;
    352	struct jfs_log *log;
    353
    354	jfs_info("txBegin: flag = 0x%x", flag);
    355	log = JFS_SBI(sb)->log;
    356
    357	TXN_LOCK();
    358
    359	INCREMENT(TxStat.txBegin);
    360
    361      retry:
    362	if (!(flag & COMMIT_FORCE)) {
    363		/*
    364		 * synchronize with logsync barrier
    365		 */
    366		if (test_bit(log_SYNCBARRIER, &log->flag) ||
    367		    test_bit(log_QUIESCE, &log->flag)) {
    368			INCREMENT(TxStat.txBegin_barrier);
    369			TXN_SLEEP(&log->syncwait);
    370			goto retry;
    371		}
    372	}
    373	if (flag == 0) {
    374		/*
    375		 * Don't begin transaction if we're getting starved for tlocks
    376		 * unless COMMIT_FORCE or COMMIT_INODE (which may ultimately
    377		 * free tlocks)
    378		 */
    379		if (TxAnchor.tlocksInUse > TxLockVHWM) {
    380			INCREMENT(TxStat.txBegin_lockslow);
    381			TXN_SLEEP(&TxAnchor.lowlockwait);
    382			goto retry;
    383		}
    384	}
    385
    386	/*
    387	 * allocate transaction id/block
    388	 */
    389	if ((t = TxAnchor.freetid) == 0) {
    390		jfs_info("txBegin: waiting for free tid");
    391		INCREMENT(TxStat.txBegin_freetid);
    392		TXN_SLEEP(&TxAnchor.freewait);
    393		goto retry;
    394	}
    395
    396	tblk = tid_to_tblock(t);
    397
    398	if ((tblk->next == 0) && !(flag & COMMIT_FORCE)) {
    399		/* Don't let a non-forced transaction take the last tblk */
    400		jfs_info("txBegin: waiting for free tid");
    401		INCREMENT(TxStat.txBegin_freetid);
    402		TXN_SLEEP(&TxAnchor.freewait);
    403		goto retry;
    404	}
    405
    406	TxAnchor.freetid = tblk->next;
    407
    408	/*
    409	 * initialize transaction
    410	 */
    411
    412	/*
    413	 * We can't zero the whole thing or we screw up another thread being
    414	 * awakened after sleeping on tblk->waitor
    415	 *
    416	 * memset(tblk, 0, sizeof(struct tblock));
    417	 */
    418	tblk->next = tblk->last = tblk->xflag = tblk->flag = tblk->lsn = 0;
    419
    420	tblk->sb = sb;
    421	++log->logtid;
    422	tblk->logtid = log->logtid;
    423
    424	++log->active;
    425
    426	HIGHWATERMARK(stattx.maxtid, t);	/* statistics */
    427	INCREMENT(stattx.ntid);	/* statistics */
    428
    429	TXN_UNLOCK();
    430
    431	jfs_info("txBegin: returning tid = %d", t);
    432
    433	return t;
    434}
    435
    436/*
    437 * NAME:	txBeginAnon()
    438 *
    439 * FUNCTION:	start an anonymous transaction.
    440 *		Blocks if logsync or available tlocks are low to prevent
    441 *		anonymous tlocks from depleting supply.
    442 *
    443 * PARAMETER:	sb	- superblock
    444 *
    445 * RETURN:	none
    446 */
    447void txBeginAnon(struct super_block *sb)
    448{
    449	struct jfs_log *log;
    450
    451	log = JFS_SBI(sb)->log;
    452
    453	TXN_LOCK();
    454	INCREMENT(TxStat.txBeginAnon);
    455
    456      retry:
    457	/*
    458	 * synchronize with logsync barrier
    459	 */
    460	if (test_bit(log_SYNCBARRIER, &log->flag) ||
    461	    test_bit(log_QUIESCE, &log->flag)) {
    462		INCREMENT(TxStat.txBeginAnon_barrier);
    463		TXN_SLEEP(&log->syncwait);
    464		goto retry;
    465	}
    466
    467	/*
    468	 * Don't begin transaction if we're getting starved for tlocks
    469	 */
    470	if (TxAnchor.tlocksInUse > TxLockVHWM) {
    471		INCREMENT(TxStat.txBeginAnon_lockslow);
    472		TXN_SLEEP(&TxAnchor.lowlockwait);
    473		goto retry;
    474	}
    475	TXN_UNLOCK();
    476}
    477
    478/*
    479 *	txEnd()
    480 *
    481 * function: free specified transaction block.
    482 *
    483 *	logsync barrier processing:
    484 *
    485 * serialization:
    486 */
    487void txEnd(tid_t tid)
    488{
    489	struct tblock *tblk = tid_to_tblock(tid);
    490	struct jfs_log *log;
    491
    492	jfs_info("txEnd: tid = %d", tid);
    493	TXN_LOCK();
    494
    495	/*
    496	 * wakeup transactions waiting on the page locked
    497	 * by the current transaction
    498	 */
    499	TXN_WAKEUP(&tblk->waitor);
    500
    501	log = JFS_SBI(tblk->sb)->log;
    502
    503	/*
    504	 * Lazy commit thread can't free this guy until we mark it UNLOCKED,
    505	 * otherwise, we would be left with a transaction that may have been
    506	 * reused.
    507	 *
    508	 * Lazy commit thread will turn off tblkGC_LAZY before calling this
    509	 * routine.
    510	 */
    511	if (tblk->flag & tblkGC_LAZY) {
    512		jfs_info("txEnd called w/lazy tid: %d, tblk = 0x%p", tid, tblk);
    513		TXN_UNLOCK();
    514
    515		spin_lock_irq(&log->gclock);	// LOGGC_LOCK
    516		tblk->flag |= tblkGC_UNLOCKED;
    517		spin_unlock_irq(&log->gclock);	// LOGGC_UNLOCK
    518		return;
    519	}
    520
    521	jfs_info("txEnd: tid: %d, tblk = 0x%p", tid, tblk);
    522
    523	assert(tblk->next == 0);
    524
    525	/*
    526	 * insert tblock back on freelist
    527	 */
    528	tblk->next = TxAnchor.freetid;
    529	TxAnchor.freetid = tid;
    530
    531	/*
    532	 * mark the tblock not active
    533	 */
    534	if (--log->active == 0) {
    535		clear_bit(log_FLUSH, &log->flag);
    536
    537		/*
    538		 * synchronize with logsync barrier
    539		 */
    540		if (test_bit(log_SYNCBARRIER, &log->flag)) {
    541			TXN_UNLOCK();
    542
    543			/* write dirty metadata & forward log syncpt */
    544			jfs_syncpt(log, 1);
    545
    546			jfs_info("log barrier off: 0x%x", log->lsn);
    547
    548			/* enable new transactions start */
    549			clear_bit(log_SYNCBARRIER, &log->flag);
    550
    551			/* wakeup all waitors for logsync barrier */
    552			TXN_WAKEUP(&log->syncwait);
    553
    554			goto wakeup;
    555		}
    556	}
    557
    558	TXN_UNLOCK();
    559wakeup:
    560	/*
    561	 * wakeup all waitors for a free tblock
    562	 */
    563	TXN_WAKEUP(&TxAnchor.freewait);
    564}
    565
    566/*
    567 *	txLock()
    568 *
    569 * function: acquire a transaction lock on the specified <mp>
    570 *
    571 * parameter:
    572 *
    573 * return:	transaction lock id
    574 *
    575 * serialization:
    576 */
    577struct tlock *txLock(tid_t tid, struct inode *ip, struct metapage * mp,
    578		     int type)
    579{
    580	struct jfs_inode_info *jfs_ip = JFS_IP(ip);
    581	int dir_xtree = 0;
    582	lid_t lid;
    583	tid_t xtid;
    584	struct tlock *tlck;
    585	struct xtlock *xtlck;
    586	struct linelock *linelock;
    587	xtpage_t *p;
    588	struct tblock *tblk;
    589
    590	TXN_LOCK();
    591
    592	if (S_ISDIR(ip->i_mode) && (type & tlckXTREE) &&
    593	    !(mp->xflag & COMMIT_PAGE)) {
    594		/*
    595		 * Directory inode is special.  It can have both an xtree tlock
    596		 * and a dtree tlock associated with it.
    597		 */
    598		dir_xtree = 1;
    599		lid = jfs_ip->xtlid;
    600	} else
    601		lid = mp->lid;
    602
    603	/* is page not locked by a transaction ? */
    604	if (lid == 0)
    605		goto allocateLock;
    606
    607	jfs_info("txLock: tid:%d ip:0x%p mp:0x%p lid:%d", tid, ip, mp, lid);
    608
    609	/* is page locked by the requester transaction ? */
    610	tlck = lid_to_tlock(lid);
    611	if ((xtid = tlck->tid) == tid) {
    612		TXN_UNLOCK();
    613		goto grantLock;
    614	}
    615
    616	/*
    617	 * is page locked by anonymous transaction/lock ?
    618	 *
    619	 * (page update without transaction (i.e., file write) is
    620	 * locked under anonymous transaction tid = 0:
    621	 * anonymous tlocks maintained on anonymous tlock list of
    622	 * the inode of the page and available to all anonymous
    623	 * transactions until txCommit() time at which point
    624	 * they are transferred to the transaction tlock list of
    625	 * the committing transaction of the inode)
    626	 */
    627	if (xtid == 0) {
    628		tlck->tid = tid;
    629		TXN_UNLOCK();
    630		tblk = tid_to_tblock(tid);
    631		/*
    632		 * The order of the tlocks in the transaction is important
    633		 * (during truncate, child xtree pages must be freed before
    634		 * parent's tlocks change the working map).
    635		 * Take tlock off anonymous list and add to tail of
    636		 * transaction list
    637		 *
    638		 * Note:  We really need to get rid of the tid & lid and
    639		 * use list_head's.  This code is getting UGLY!
    640		 */
    641		if (jfs_ip->atlhead == lid) {
    642			if (jfs_ip->atltail == lid) {
    643				/* only anonymous txn.
    644				 * Remove from anon_list
    645				 */
    646				TXN_LOCK();
    647				list_del_init(&jfs_ip->anon_inode_list);
    648				TXN_UNLOCK();
    649			}
    650			jfs_ip->atlhead = tlck->next;
    651		} else {
    652			lid_t last;
    653			for (last = jfs_ip->atlhead;
    654			     lid_to_tlock(last)->next != lid;
    655			     last = lid_to_tlock(last)->next) {
    656				assert(last);
    657			}
    658			lid_to_tlock(last)->next = tlck->next;
    659			if (jfs_ip->atltail == lid)
    660				jfs_ip->atltail = last;
    661		}
    662
    663		/* insert the tlock at tail of transaction tlock list */
    664
    665		if (tblk->next)
    666			lid_to_tlock(tblk->last)->next = lid;
    667		else
    668			tblk->next = lid;
    669		tlck->next = 0;
    670		tblk->last = lid;
    671
    672		goto grantLock;
    673	}
    674
    675	goto waitLock;
    676
    677	/*
    678	 * allocate a tlock
    679	 */
    680      allocateLock:
    681	lid = txLockAlloc();
    682	tlck = lid_to_tlock(lid);
    683
    684	/*
    685	 * initialize tlock
    686	 */
    687	tlck->tid = tid;
    688
    689	TXN_UNLOCK();
    690
    691	/* mark tlock for meta-data page */
    692	if (mp->xflag & COMMIT_PAGE) {
    693
    694		tlck->flag = tlckPAGELOCK;
    695
    696		/* mark the page dirty and nohomeok */
    697		metapage_nohomeok(mp);
    698
    699		jfs_info("locking mp = 0x%p, nohomeok = %d tid = %d tlck = 0x%p",
    700			 mp, mp->nohomeok, tid, tlck);
    701
    702		/* if anonymous transaction, and buffer is on the group
    703		 * commit synclist, mark inode to show this.  This will
    704		 * prevent the buffer from being marked nohomeok for too
    705		 * long a time.
    706		 */
    707		if ((tid == 0) && mp->lsn)
    708			set_cflag(COMMIT_Synclist, ip);
    709	}
    710	/* mark tlock for in-memory inode */
    711	else
    712		tlck->flag = tlckINODELOCK;
    713
    714	if (S_ISDIR(ip->i_mode))
    715		tlck->flag |= tlckDIRECTORY;
    716
    717	tlck->type = 0;
    718
    719	/* bind the tlock and the page */
    720	tlck->ip = ip;
    721	tlck->mp = mp;
    722	if (dir_xtree)
    723		jfs_ip->xtlid = lid;
    724	else
    725		mp->lid = lid;
    726
    727	/*
    728	 * enqueue transaction lock to transaction/inode
    729	 */
    730	/* insert the tlock at tail of transaction tlock list */
    731	if (tid) {
    732		tblk = tid_to_tblock(tid);
    733		if (tblk->next)
    734			lid_to_tlock(tblk->last)->next = lid;
    735		else
    736			tblk->next = lid;
    737		tlck->next = 0;
    738		tblk->last = lid;
    739	}
    740	/* anonymous transaction:
    741	 * insert the tlock at head of inode anonymous tlock list
    742	 */
    743	else {
    744		tlck->next = jfs_ip->atlhead;
    745		jfs_ip->atlhead = lid;
    746		if (tlck->next == 0) {
    747			/* This inode's first anonymous transaction */
    748			jfs_ip->atltail = lid;
    749			TXN_LOCK();
    750			list_add_tail(&jfs_ip->anon_inode_list,
    751				      &TxAnchor.anon_list);
    752			TXN_UNLOCK();
    753		}
    754	}
    755
    756	/* initialize type dependent area for linelock */
    757	linelock = (struct linelock *) & tlck->lock;
    758	linelock->next = 0;
    759	linelock->flag = tlckLINELOCK;
    760	linelock->maxcnt = TLOCKSHORT;
    761	linelock->index = 0;
    762
    763	switch (type & tlckTYPE) {
    764	case tlckDTREE:
    765		linelock->l2linesize = L2DTSLOTSIZE;
    766		break;
    767
    768	case tlckXTREE:
    769		linelock->l2linesize = L2XTSLOTSIZE;
    770
    771		xtlck = (struct xtlock *) linelock;
    772		xtlck->header.offset = 0;
    773		xtlck->header.length = 2;
    774
    775		if (type & tlckNEW) {
    776			xtlck->lwm.offset = XTENTRYSTART;
    777		} else {
    778			if (mp->xflag & COMMIT_PAGE)
    779				p = (xtpage_t *) mp->data;
    780			else
    781				p = &jfs_ip->i_xtroot;
    782			xtlck->lwm.offset =
    783			    le16_to_cpu(p->header.nextindex);
    784		}
    785		xtlck->lwm.length = 0;	/* ! */
    786		xtlck->twm.offset = 0;
    787		xtlck->hwm.offset = 0;
    788
    789		xtlck->index = 2;
    790		break;
    791
    792	case tlckINODE:
    793		linelock->l2linesize = L2INODESLOTSIZE;
    794		break;
    795
    796	case tlckDATA:
    797		linelock->l2linesize = L2DATASLOTSIZE;
    798		break;
    799
    800	default:
    801		jfs_err("UFO tlock:0x%p", tlck);
    802	}
    803
    804	/*
    805	 * update tlock vector
    806	 */
    807      grantLock:
    808	tlck->type |= type;
    809
    810	return tlck;
    811
    812	/*
    813	 * page is being locked by another transaction:
    814	 */
    815      waitLock:
    816	/* Only locks on ipimap or ipaimap should reach here */
    817	/* assert(jfs_ip->fileset == AGGREGATE_I); */
    818	if (jfs_ip->fileset != AGGREGATE_I) {
    819		printk(KERN_ERR "txLock: trying to lock locked page!");
    820		print_hex_dump(KERN_ERR, "ip: ", DUMP_PREFIX_ADDRESS, 16, 4,
    821			       ip, sizeof(*ip), 0);
    822		print_hex_dump(KERN_ERR, "mp: ", DUMP_PREFIX_ADDRESS, 16, 4,
    823			       mp, sizeof(*mp), 0);
    824		print_hex_dump(KERN_ERR, "Locker's tblock: ",
    825			       DUMP_PREFIX_ADDRESS, 16, 4, tid_to_tblock(tid),
    826			       sizeof(struct tblock), 0);
    827		print_hex_dump(KERN_ERR, "Tlock: ", DUMP_PREFIX_ADDRESS, 16, 4,
    828			       tlck, sizeof(*tlck), 0);
    829		BUG();
    830	}
    831	INCREMENT(stattx.waitlock);	/* statistics */
    832	TXN_UNLOCK();
    833	release_metapage(mp);
    834	TXN_LOCK();
    835	xtid = tlck->tid;	/* reacquire after dropping TXN_LOCK */
    836
    837	jfs_info("txLock: in waitLock, tid = %d, xtid = %d, lid = %d",
    838		 tid, xtid, lid);
    839
    840	/* Recheck everything since dropping TXN_LOCK */
    841	if (xtid && (tlck->mp == mp) && (mp->lid == lid))
    842		TXN_SLEEP_DROP_LOCK(&tid_to_tblock(xtid)->waitor);
    843	else
    844		TXN_UNLOCK();
    845	jfs_info("txLock: awakened     tid = %d, lid = %d", tid, lid);
    846
    847	return NULL;
    848}
    849
    850/*
    851 * NAME:	txRelease()
    852 *
    853 * FUNCTION:	Release buffers associated with transaction locks, but don't
    854 *		mark homeok yet.  The allows other transactions to modify
    855 *		buffers, but won't let them go to disk until commit record
    856 *		actually gets written.
    857 *
    858 * PARAMETER:
    859 *		tblk	-
    860 *
    861 * RETURN:	Errors from subroutines.
    862 */
    863static void txRelease(struct tblock * tblk)
    864{
    865	struct metapage *mp;
    866	lid_t lid;
    867	struct tlock *tlck;
    868
    869	TXN_LOCK();
    870
    871	for (lid = tblk->next; lid; lid = tlck->next) {
    872		tlck = lid_to_tlock(lid);
    873		if ((mp = tlck->mp) != NULL &&
    874		    (tlck->type & tlckBTROOT) == 0) {
    875			assert(mp->xflag & COMMIT_PAGE);
    876			mp->lid = 0;
    877		}
    878	}
    879
    880	/*
    881	 * wakeup transactions waiting on a page locked
    882	 * by the current transaction
    883	 */
    884	TXN_WAKEUP(&tblk->waitor);
    885
    886	TXN_UNLOCK();
    887}
    888
    889/*
    890 * NAME:	txUnlock()
    891 *
    892 * FUNCTION:	Initiates pageout of pages modified by tid in journalled
    893 *		objects and frees their lockwords.
    894 */
    895static void txUnlock(struct tblock * tblk)
    896{
    897	struct tlock *tlck;
    898	struct linelock *linelock;
    899	lid_t lid, next, llid, k;
    900	struct metapage *mp;
    901	struct jfs_log *log;
    902	int difft, diffp;
    903	unsigned long flags;
    904
    905	jfs_info("txUnlock: tblk = 0x%p", tblk);
    906	log = JFS_SBI(tblk->sb)->log;
    907
    908	/*
    909	 * mark page under tlock homeok (its log has been written):
    910	 */
    911	for (lid = tblk->next; lid; lid = next) {
    912		tlck = lid_to_tlock(lid);
    913		next = tlck->next;
    914
    915		jfs_info("unlocking lid = %d, tlck = 0x%p", lid, tlck);
    916
    917		/* unbind page from tlock */
    918		if ((mp = tlck->mp) != NULL &&
    919		    (tlck->type & tlckBTROOT) == 0) {
    920			assert(mp->xflag & COMMIT_PAGE);
    921
    922			/* hold buffer
    923			 */
    924			hold_metapage(mp);
    925
    926			assert(mp->nohomeok > 0);
    927			_metapage_homeok(mp);
    928
    929			/* inherit younger/larger clsn */
    930			LOGSYNC_LOCK(log, flags);
    931			if (mp->clsn) {
    932				logdiff(difft, tblk->clsn, log);
    933				logdiff(diffp, mp->clsn, log);
    934				if (difft > diffp)
    935					mp->clsn = tblk->clsn;
    936			} else
    937				mp->clsn = tblk->clsn;
    938			LOGSYNC_UNLOCK(log, flags);
    939
    940			assert(!(tlck->flag & tlckFREEPAGE));
    941
    942			put_metapage(mp);
    943		}
    944
    945		/* insert tlock, and linelock(s) of the tlock if any,
    946		 * at head of freelist
    947		 */
    948		TXN_LOCK();
    949
    950		llid = ((struct linelock *) & tlck->lock)->next;
    951		while (llid) {
    952			linelock = (struct linelock *) lid_to_tlock(llid);
    953			k = linelock->next;
    954			txLockFree(llid);
    955			llid = k;
    956		}
    957		txLockFree(lid);
    958
    959		TXN_UNLOCK();
    960	}
    961	tblk->next = tblk->last = 0;
    962
    963	/*
    964	 * remove tblock from logsynclist
    965	 * (allocation map pages inherited lsn of tblk and
    966	 * has been inserted in logsync list at txUpdateMap())
    967	 */
    968	if (tblk->lsn) {
    969		LOGSYNC_LOCK(log, flags);
    970		log->count--;
    971		list_del(&tblk->synclist);
    972		LOGSYNC_UNLOCK(log, flags);
    973	}
    974}
    975
    976/*
    977 *	txMaplock()
    978 *
    979 * function: allocate a transaction lock for freed page/entry;
    980 *	for freed page, maplock is used as xtlock/dtlock type;
    981 */
    982struct tlock *txMaplock(tid_t tid, struct inode *ip, int type)
    983{
    984	struct jfs_inode_info *jfs_ip = JFS_IP(ip);
    985	lid_t lid;
    986	struct tblock *tblk;
    987	struct tlock *tlck;
    988	struct maplock *maplock;
    989
    990	TXN_LOCK();
    991
    992	/*
    993	 * allocate a tlock
    994	 */
    995	lid = txLockAlloc();
    996	tlck = lid_to_tlock(lid);
    997
    998	/*
    999	 * initialize tlock
   1000	 */
   1001	tlck->tid = tid;
   1002
   1003	/* bind the tlock and the object */
   1004	tlck->flag = tlckINODELOCK;
   1005	if (S_ISDIR(ip->i_mode))
   1006		tlck->flag |= tlckDIRECTORY;
   1007	tlck->ip = ip;
   1008	tlck->mp = NULL;
   1009
   1010	tlck->type = type;
   1011
   1012	/*
   1013	 * enqueue transaction lock to transaction/inode
   1014	 */
   1015	/* insert the tlock at tail of transaction tlock list */
   1016	if (tid) {
   1017		tblk = tid_to_tblock(tid);
   1018		if (tblk->next)
   1019			lid_to_tlock(tblk->last)->next = lid;
   1020		else
   1021			tblk->next = lid;
   1022		tlck->next = 0;
   1023		tblk->last = lid;
   1024	}
   1025	/* anonymous transaction:
   1026	 * insert the tlock at head of inode anonymous tlock list
   1027	 */
   1028	else {
   1029		tlck->next = jfs_ip->atlhead;
   1030		jfs_ip->atlhead = lid;
   1031		if (tlck->next == 0) {
   1032			/* This inode's first anonymous transaction */
   1033			jfs_ip->atltail = lid;
   1034			list_add_tail(&jfs_ip->anon_inode_list,
   1035				      &TxAnchor.anon_list);
   1036		}
   1037	}
   1038
   1039	TXN_UNLOCK();
   1040
   1041	/* initialize type dependent area for maplock */
   1042	maplock = (struct maplock *) & tlck->lock;
   1043	maplock->next = 0;
   1044	maplock->maxcnt = 0;
   1045	maplock->index = 0;
   1046
   1047	return tlck;
   1048}
   1049
   1050/*
   1051 *	txLinelock()
   1052 *
   1053 * function: allocate a transaction lock for log vector list
   1054 */
   1055struct linelock *txLinelock(struct linelock * tlock)
   1056{
   1057	lid_t lid;
   1058	struct tlock *tlck;
   1059	struct linelock *linelock;
   1060
   1061	TXN_LOCK();
   1062
   1063	/* allocate a TxLock structure */
   1064	lid = txLockAlloc();
   1065	tlck = lid_to_tlock(lid);
   1066
   1067	TXN_UNLOCK();
   1068
   1069	/* initialize linelock */
   1070	linelock = (struct linelock *) tlck;
   1071	linelock->next = 0;
   1072	linelock->flag = tlckLINELOCK;
   1073	linelock->maxcnt = TLOCKLONG;
   1074	linelock->index = 0;
   1075	if (tlck->flag & tlckDIRECTORY)
   1076		linelock->flag |= tlckDIRECTORY;
   1077
   1078	/* append linelock after tlock */
   1079	linelock->next = tlock->next;
   1080	tlock->next = lid;
   1081
   1082	return linelock;
   1083}
   1084
   1085/*
   1086 *		transaction commit management
   1087 *		-----------------------------
   1088 */
   1089
   1090/*
   1091 * NAME:	txCommit()
   1092 *
   1093 * FUNCTION:	commit the changes to the objects specified in
   1094 *		clist.  For journalled segments only the
   1095 *		changes of the caller are committed, ie by tid.
   1096 *		for non-journalled segments the data are flushed to
   1097 *		disk and then the change to the disk inode and indirect
   1098 *		blocks committed (so blocks newly allocated to the
   1099 *		segment will be made a part of the segment atomically).
   1100 *
   1101 *		all of the segments specified in clist must be in
   1102 *		one file system. no more than 6 segments are needed
   1103 *		to handle all unix svcs.
   1104 *
   1105 *		if the i_nlink field (i.e. disk inode link count)
   1106 *		is zero, and the type of inode is a regular file or
   1107 *		directory, or symbolic link , the inode is truncated
   1108 *		to zero length. the truncation is committed but the
   1109 *		VM resources are unaffected until it is closed (see
   1110 *		iput and iclose).
   1111 *
   1112 * PARAMETER:
   1113 *
   1114 * RETURN:
   1115 *
   1116 * serialization:
   1117 *		on entry the inode lock on each segment is assumed
   1118 *		to be held.
   1119 *
   1120 * i/o error:
   1121 */
   1122int txCommit(tid_t tid,		/* transaction identifier */
   1123	     int nip,		/* number of inodes to commit */
   1124	     struct inode **iplist,	/* list of inode to commit */
   1125	     int flag)
   1126{
   1127	int rc = 0;
   1128	struct commit cd;
   1129	struct jfs_log *log;
   1130	struct tblock *tblk;
   1131	struct lrd *lrd;
   1132	struct inode *ip;
   1133	struct jfs_inode_info *jfs_ip;
   1134	int k, n;
   1135	ino_t top;
   1136	struct super_block *sb;
   1137
   1138	jfs_info("txCommit, tid = %d, flag = %d", tid, flag);
   1139	/* is read-only file system ? */
   1140	if (isReadOnly(iplist[0])) {
   1141		rc = -EROFS;
   1142		goto TheEnd;
   1143	}
   1144
   1145	sb = cd.sb = iplist[0]->i_sb;
   1146	cd.tid = tid;
   1147
   1148	if (tid == 0)
   1149		tid = txBegin(sb, 0);
   1150	tblk = tid_to_tblock(tid);
   1151
   1152	/*
   1153	 * initialize commit structure
   1154	 */
   1155	log = JFS_SBI(sb)->log;
   1156	cd.log = log;
   1157
   1158	/* initialize log record descriptor in commit */
   1159	lrd = &cd.lrd;
   1160	lrd->logtid = cpu_to_le32(tblk->logtid);
   1161	lrd->backchain = 0;
   1162
   1163	tblk->xflag |= flag;
   1164
   1165	if ((flag & (COMMIT_FORCE | COMMIT_SYNC)) == 0)
   1166		tblk->xflag |= COMMIT_LAZY;
   1167	/*
   1168	 *	prepare non-journaled objects for commit
   1169	 *
   1170	 * flush data pages of non-journaled file
   1171	 * to prevent the file getting non-initialized disk blocks
   1172	 * in case of crash.
   1173	 * (new blocks - )
   1174	 */
   1175	cd.iplist = iplist;
   1176	cd.nip = nip;
   1177
   1178	/*
   1179	 *	acquire transaction lock on (on-disk) inodes
   1180	 *
   1181	 * update on-disk inode from in-memory inode
   1182	 * acquiring transaction locks for AFTER records
   1183	 * on the on-disk inode of file object
   1184	 *
   1185	 * sort the inodes array by inode number in descending order
   1186	 * to prevent deadlock when acquiring transaction lock
   1187	 * of on-disk inodes on multiple on-disk inode pages by
   1188	 * multiple concurrent transactions
   1189	 */
   1190	for (k = 0; k < cd.nip; k++) {
   1191		top = (cd.iplist[k])->i_ino;
   1192		for (n = k + 1; n < cd.nip; n++) {
   1193			ip = cd.iplist[n];
   1194			if (ip->i_ino > top) {
   1195				top = ip->i_ino;
   1196				cd.iplist[n] = cd.iplist[k];
   1197				cd.iplist[k] = ip;
   1198			}
   1199		}
   1200
   1201		ip = cd.iplist[k];
   1202		jfs_ip = JFS_IP(ip);
   1203
   1204		/*
   1205		 * BUGBUG - This code has temporarily been removed.  The
   1206		 * intent is to ensure that any file data is written before
   1207		 * the metadata is committed to the journal.  This prevents
   1208		 * uninitialized data from appearing in a file after the
   1209		 * journal has been replayed.  (The uninitialized data
   1210		 * could be sensitive data removed by another user.)
   1211		 *
   1212		 * The problem now is that we are holding the IWRITELOCK
   1213		 * on the inode, and calling filemap_fdatawrite on an
   1214		 * unmapped page will cause a deadlock in jfs_get_block.
   1215		 *
   1216		 * The long term solution is to pare down the use of
   1217		 * IWRITELOCK.  We are currently holding it too long.
   1218		 * We could also be smarter about which data pages need
   1219		 * to be written before the transaction is committed and
   1220		 * when we don't need to worry about it at all.
   1221		 *
   1222		 * if ((!S_ISDIR(ip->i_mode))
   1223		 *    && (tblk->flag & COMMIT_DELETE) == 0)
   1224		 *	filemap_write_and_wait(ip->i_mapping);
   1225		 */
   1226
   1227		/*
   1228		 * Mark inode as not dirty.  It will still be on the dirty
   1229		 * inode list, but we'll know not to commit it again unless
   1230		 * it gets marked dirty again
   1231		 */
   1232		clear_cflag(COMMIT_Dirty, ip);
   1233
   1234		/* inherit anonymous tlock(s) of inode */
   1235		if (jfs_ip->atlhead) {
   1236			lid_to_tlock(jfs_ip->atltail)->next = tblk->next;
   1237			tblk->next = jfs_ip->atlhead;
   1238			if (!tblk->last)
   1239				tblk->last = jfs_ip->atltail;
   1240			jfs_ip->atlhead = jfs_ip->atltail = 0;
   1241			TXN_LOCK();
   1242			list_del_init(&jfs_ip->anon_inode_list);
   1243			TXN_UNLOCK();
   1244		}
   1245
   1246		/*
   1247		 * acquire transaction lock on on-disk inode page
   1248		 * (become first tlock of the tblk's tlock list)
   1249		 */
   1250		if (((rc = diWrite(tid, ip))))
   1251			goto out;
   1252	}
   1253
   1254	/*
   1255	 *	write log records from transaction locks
   1256	 *
   1257	 * txUpdateMap() resets XAD_NEW in XAD.
   1258	 */
   1259	txLog(log, tblk, &cd);
   1260
   1261	/*
   1262	 * Ensure that inode isn't reused before
   1263	 * lazy commit thread finishes processing
   1264	 */
   1265	if (tblk->xflag & COMMIT_DELETE) {
   1266		ihold(tblk->u.ip);
   1267		/*
   1268		 * Avoid a rare deadlock
   1269		 *
   1270		 * If the inode is locked, we may be blocked in
   1271		 * jfs_commit_inode.  If so, we don't want the
   1272		 * lazy_commit thread doing the last iput() on the inode
   1273		 * since that may block on the locked inode.  Instead,
   1274		 * commit the transaction synchronously, so the last iput
   1275		 * will be done by the calling thread (or later)
   1276		 */
   1277		/*
   1278		 * I believe this code is no longer needed.  Splitting I_LOCK
   1279		 * into two bits, I_NEW and I_SYNC should prevent this
   1280		 * deadlock as well.  But since I don't have a JFS testload
   1281		 * to verify this, only a trivial s/I_LOCK/I_SYNC/ was done.
   1282		 * Joern
   1283		 */
   1284		if (tblk->u.ip->i_state & I_SYNC)
   1285			tblk->xflag &= ~COMMIT_LAZY;
   1286	}
   1287
   1288	ASSERT((!(tblk->xflag & COMMIT_DELETE)) ||
   1289	       ((tblk->u.ip->i_nlink == 0) &&
   1290		!test_cflag(COMMIT_Nolink, tblk->u.ip)));
   1291
   1292	/*
   1293	 *	write COMMIT log record
   1294	 */
   1295	lrd->type = cpu_to_le16(LOG_COMMIT);
   1296	lrd->length = 0;
   1297	lmLog(log, tblk, lrd, NULL);
   1298
   1299	lmGroupCommit(log, tblk);
   1300
   1301	/*
   1302	 *	- transaction is now committed -
   1303	 */
   1304
   1305	/*
   1306	 * force pages in careful update
   1307	 * (imap addressing structure update)
   1308	 */
   1309	if (flag & COMMIT_FORCE)
   1310		txForce(tblk);
   1311
   1312	/*
   1313	 *	update allocation map.
   1314	 *
   1315	 * update inode allocation map and inode:
   1316	 * free pager lock on memory object of inode if any.
   1317	 * update block allocation map.
   1318	 *
   1319	 * txUpdateMap() resets XAD_NEW in XAD.
   1320	 */
   1321	if (tblk->xflag & COMMIT_FORCE)
   1322		txUpdateMap(tblk);
   1323
   1324	/*
   1325	 *	free transaction locks and pageout/free pages
   1326	 */
   1327	txRelease(tblk);
   1328
   1329	if ((tblk->flag & tblkGC_LAZY) == 0)
   1330		txUnlock(tblk);
   1331
   1332
   1333	/*
   1334	 *	reset in-memory object state
   1335	 */
   1336	for (k = 0; k < cd.nip; k++) {
   1337		ip = cd.iplist[k];
   1338		jfs_ip = JFS_IP(ip);
   1339
   1340		/*
   1341		 * reset in-memory inode state
   1342		 */
   1343		jfs_ip->bxflag = 0;
   1344		jfs_ip->blid = 0;
   1345	}
   1346
   1347      out:
   1348	if (rc != 0)
   1349		txAbort(tid, 1);
   1350
   1351      TheEnd:
   1352	jfs_info("txCommit: tid = %d, returning %d", tid, rc);
   1353	return rc;
   1354}
   1355
   1356/*
   1357 * NAME:	txLog()
   1358 *
   1359 * FUNCTION:	Writes AFTER log records for all lines modified
   1360 *		by tid for segments specified by inodes in comdata.
   1361 *		Code assumes only WRITELOCKS are recorded in lockwords.
   1362 *
   1363 * PARAMETERS:
   1364 *
   1365 * RETURN :
   1366 */
   1367static void txLog(struct jfs_log *log, struct tblock *tblk, struct commit *cd)
   1368{
   1369	struct inode *ip;
   1370	lid_t lid;
   1371	struct tlock *tlck;
   1372	struct lrd *lrd = &cd->lrd;
   1373
   1374	/*
   1375	 * write log record(s) for each tlock of transaction,
   1376	 */
   1377	for (lid = tblk->next; lid; lid = tlck->next) {
   1378		tlck = lid_to_tlock(lid);
   1379
   1380		tlck->flag |= tlckLOG;
   1381
   1382		/* initialize lrd common */
   1383		ip = tlck->ip;
   1384		lrd->aggregate = cpu_to_le32(JFS_SBI(ip->i_sb)->aggregate);
   1385		lrd->log.redopage.fileset = cpu_to_le32(JFS_IP(ip)->fileset);
   1386		lrd->log.redopage.inode = cpu_to_le32(ip->i_ino);
   1387
   1388		/* write log record of page from the tlock */
   1389		switch (tlck->type & tlckTYPE) {
   1390		case tlckXTREE:
   1391			xtLog(log, tblk, lrd, tlck);
   1392			break;
   1393
   1394		case tlckDTREE:
   1395			dtLog(log, tblk, lrd, tlck);
   1396			break;
   1397
   1398		case tlckINODE:
   1399			diLog(log, tblk, lrd, tlck, cd);
   1400			break;
   1401
   1402		case tlckMAP:
   1403			mapLog(log, tblk, lrd, tlck);
   1404			break;
   1405
   1406		case tlckDATA:
   1407			dataLog(log, tblk, lrd, tlck);
   1408			break;
   1409
   1410		default:
   1411			jfs_err("UFO tlock:0x%p", tlck);
   1412		}
   1413	}
   1414
   1415	return;
   1416}
   1417
   1418/*
   1419 *	diLog()
   1420 *
   1421 * function:	log inode tlock and format maplock to update bmap;
   1422 */
   1423static void diLog(struct jfs_log *log, struct tblock *tblk, struct lrd *lrd,
   1424		 struct tlock *tlck, struct commit *cd)
   1425{
   1426	struct metapage *mp;
   1427	pxd_t *pxd;
   1428	struct pxd_lock *pxdlock;
   1429
   1430	mp = tlck->mp;
   1431
   1432	/* initialize as REDOPAGE record format */
   1433	lrd->log.redopage.type = cpu_to_le16(LOG_INODE);
   1434	lrd->log.redopage.l2linesize = cpu_to_le16(L2INODESLOTSIZE);
   1435
   1436	pxd = &lrd->log.redopage.pxd;
   1437
   1438	/*
   1439	 *	inode after image
   1440	 */
   1441	if (tlck->type & tlckENTRY) {
   1442		/* log after-image for logredo(): */
   1443		lrd->type = cpu_to_le16(LOG_REDOPAGE);
   1444		PXDaddress(pxd, mp->index);
   1445		PXDlength(pxd,
   1446			  mp->logical_size >> tblk->sb->s_blocksize_bits);
   1447		lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, tlck));
   1448
   1449		/* mark page as homeward bound */
   1450		tlck->flag |= tlckWRITEPAGE;
   1451	} else if (tlck->type & tlckFREE) {
   1452		/*
   1453		 *	free inode extent
   1454		 *
   1455		 * (pages of the freed inode extent have been invalidated and
   1456		 * a maplock for free of the extent has been formatted at
   1457		 * txLock() time);
   1458		 *
   1459		 * the tlock had been acquired on the inode allocation map page
   1460		 * (iag) that specifies the freed extent, even though the map
   1461		 * page is not itself logged, to prevent pageout of the map
   1462		 * page before the log;
   1463		 */
   1464
   1465		/* log LOG_NOREDOINOEXT of the freed inode extent for
   1466		 * logredo() to start NoRedoPage filters, and to update
   1467		 * imap and bmap for free of the extent;
   1468		 */
   1469		lrd->type = cpu_to_le16(LOG_NOREDOINOEXT);
   1470		/*
   1471		 * For the LOG_NOREDOINOEXT record, we need
   1472		 * to pass the IAG number and inode extent
   1473		 * index (within that IAG) from which the
   1474		 * extent is being released.  These have been
   1475		 * passed to us in the iplist[1] and iplist[2].
   1476		 */
   1477		lrd->log.noredoinoext.iagnum =
   1478		    cpu_to_le32((u32) (size_t) cd->iplist[1]);
   1479		lrd->log.noredoinoext.inoext_idx =
   1480		    cpu_to_le32((u32) (size_t) cd->iplist[2]);
   1481
   1482		pxdlock = (struct pxd_lock *) & tlck->lock;
   1483		*pxd = pxdlock->pxd;
   1484		lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, NULL));
   1485
   1486		/* update bmap */
   1487		tlck->flag |= tlckUPDATEMAP;
   1488
   1489		/* mark page as homeward bound */
   1490		tlck->flag |= tlckWRITEPAGE;
   1491	} else
   1492		jfs_err("diLog: UFO type tlck:0x%p", tlck);
   1493	return;
   1494}
   1495
   1496/*
   1497 *	dataLog()
   1498 *
   1499 * function:	log data tlock
   1500 */
   1501static void dataLog(struct jfs_log *log, struct tblock *tblk, struct lrd *lrd,
   1502	    struct tlock *tlck)
   1503{
   1504	struct metapage *mp;
   1505	pxd_t *pxd;
   1506
   1507	mp = tlck->mp;
   1508
   1509	/* initialize as REDOPAGE record format */
   1510	lrd->log.redopage.type = cpu_to_le16(LOG_DATA);
   1511	lrd->log.redopage.l2linesize = cpu_to_le16(L2DATASLOTSIZE);
   1512
   1513	pxd = &lrd->log.redopage.pxd;
   1514
   1515	/* log after-image for logredo(): */
   1516	lrd->type = cpu_to_le16(LOG_REDOPAGE);
   1517
   1518	if (jfs_dirtable_inline(tlck->ip)) {
   1519		/*
   1520		 * The table has been truncated, we've must have deleted
   1521		 * the last entry, so don't bother logging this
   1522		 */
   1523		mp->lid = 0;
   1524		grab_metapage(mp);
   1525		metapage_homeok(mp);
   1526		discard_metapage(mp);
   1527		tlck->mp = NULL;
   1528		return;
   1529	}
   1530
   1531	PXDaddress(pxd, mp->index);
   1532	PXDlength(pxd, mp->logical_size >> tblk->sb->s_blocksize_bits);
   1533
   1534	lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, tlck));
   1535
   1536	/* mark page as homeward bound */
   1537	tlck->flag |= tlckWRITEPAGE;
   1538
   1539	return;
   1540}
   1541
   1542/*
   1543 *	dtLog()
   1544 *
   1545 * function:	log dtree tlock and format maplock to update bmap;
   1546 */
   1547static void dtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
   1548	   struct tlock * tlck)
   1549{
   1550	struct metapage *mp;
   1551	struct pxd_lock *pxdlock;
   1552	pxd_t *pxd;
   1553
   1554	mp = tlck->mp;
   1555
   1556	/* initialize as REDOPAGE/NOREDOPAGE record format */
   1557	lrd->log.redopage.type = cpu_to_le16(LOG_DTREE);
   1558	lrd->log.redopage.l2linesize = cpu_to_le16(L2DTSLOTSIZE);
   1559
   1560	pxd = &lrd->log.redopage.pxd;
   1561
   1562	if (tlck->type & tlckBTROOT)
   1563		lrd->log.redopage.type |= cpu_to_le16(LOG_BTROOT);
   1564
   1565	/*
   1566	 *	page extension via relocation: entry insertion;
   1567	 *	page extension in-place: entry insertion;
   1568	 *	new right page from page split, reinitialized in-line
   1569	 *	root from root page split: entry insertion;
   1570	 */
   1571	if (tlck->type & (tlckNEW | tlckEXTEND)) {
   1572		/* log after-image of the new page for logredo():
   1573		 * mark log (LOG_NEW) for logredo() to initialize
   1574		 * freelist and update bmap for alloc of the new page;
   1575		 */
   1576		lrd->type = cpu_to_le16(LOG_REDOPAGE);
   1577		if (tlck->type & tlckEXTEND)
   1578			lrd->log.redopage.type |= cpu_to_le16(LOG_EXTEND);
   1579		else
   1580			lrd->log.redopage.type |= cpu_to_le16(LOG_NEW);
   1581		PXDaddress(pxd, mp->index);
   1582		PXDlength(pxd,
   1583			  mp->logical_size >> tblk->sb->s_blocksize_bits);
   1584		lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, tlck));
   1585
   1586		/* format a maplock for txUpdateMap() to update bPMAP for
   1587		 * alloc of the new page;
   1588		 */
   1589		if (tlck->type & tlckBTROOT)
   1590			return;
   1591		tlck->flag |= tlckUPDATEMAP;
   1592		pxdlock = (struct pxd_lock *) & tlck->lock;
   1593		pxdlock->flag = mlckALLOCPXD;
   1594		pxdlock->pxd = *pxd;
   1595
   1596		pxdlock->index = 1;
   1597
   1598		/* mark page as homeward bound */
   1599		tlck->flag |= tlckWRITEPAGE;
   1600		return;
   1601	}
   1602
   1603	/*
   1604	 *	entry insertion/deletion,
   1605	 *	sibling page link update (old right page before split);
   1606	 */
   1607	if (tlck->type & (tlckENTRY | tlckRELINK)) {
   1608		/* log after-image for logredo(): */
   1609		lrd->type = cpu_to_le16(LOG_REDOPAGE);
   1610		PXDaddress(pxd, mp->index);
   1611		PXDlength(pxd,
   1612			  mp->logical_size >> tblk->sb->s_blocksize_bits);
   1613		lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, tlck));
   1614
   1615		/* mark page as homeward bound */
   1616		tlck->flag |= tlckWRITEPAGE;
   1617		return;
   1618	}
   1619
   1620	/*
   1621	 *	page deletion: page has been invalidated
   1622	 *	page relocation: source extent
   1623	 *
   1624	 *	a maplock for free of the page has been formatted
   1625	 *	at txLock() time);
   1626	 */
   1627	if (tlck->type & (tlckFREE | tlckRELOCATE)) {
   1628		/* log LOG_NOREDOPAGE of the deleted page for logredo()
   1629		 * to start NoRedoPage filter and to update bmap for free
   1630		 * of the deletd page
   1631		 */
   1632		lrd->type = cpu_to_le16(LOG_NOREDOPAGE);
   1633		pxdlock = (struct pxd_lock *) & tlck->lock;
   1634		*pxd = pxdlock->pxd;
   1635		lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, NULL));
   1636
   1637		/* a maplock for txUpdateMap() for free of the page
   1638		 * has been formatted at txLock() time;
   1639		 */
   1640		tlck->flag |= tlckUPDATEMAP;
   1641	}
   1642	return;
   1643}
   1644
   1645/*
   1646 *	xtLog()
   1647 *
   1648 * function:	log xtree tlock and format maplock to update bmap;
   1649 */
   1650static void xtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
   1651	   struct tlock * tlck)
   1652{
   1653	struct inode *ip;
   1654	struct metapage *mp;
   1655	xtpage_t *p;
   1656	struct xtlock *xtlck;
   1657	struct maplock *maplock;
   1658	struct xdlistlock *xadlock;
   1659	struct pxd_lock *pxdlock;
   1660	pxd_t *page_pxd;
   1661	int next, lwm, hwm;
   1662
   1663	ip = tlck->ip;
   1664	mp = tlck->mp;
   1665
   1666	/* initialize as REDOPAGE/NOREDOPAGE record format */
   1667	lrd->log.redopage.type = cpu_to_le16(LOG_XTREE);
   1668	lrd->log.redopage.l2linesize = cpu_to_le16(L2XTSLOTSIZE);
   1669
   1670	page_pxd = &lrd->log.redopage.pxd;
   1671
   1672	if (tlck->type & tlckBTROOT) {
   1673		lrd->log.redopage.type |= cpu_to_le16(LOG_BTROOT);
   1674		p = &JFS_IP(ip)->i_xtroot;
   1675		if (S_ISDIR(ip->i_mode))
   1676			lrd->log.redopage.type |=
   1677			    cpu_to_le16(LOG_DIR_XTREE);
   1678	} else
   1679		p = (xtpage_t *) mp->data;
   1680	next = le16_to_cpu(p->header.nextindex);
   1681
   1682	xtlck = (struct xtlock *) & tlck->lock;
   1683
   1684	maplock = (struct maplock *) & tlck->lock;
   1685	xadlock = (struct xdlistlock *) maplock;
   1686
   1687	/*
   1688	 *	entry insertion/extension;
   1689	 *	sibling page link update (old right page before split);
   1690	 */
   1691	if (tlck->type & (tlckNEW | tlckGROW | tlckRELINK)) {
   1692		/* log after-image for logredo():
   1693		 * logredo() will update bmap for alloc of new/extended
   1694		 * extents (XAD_NEW|XAD_EXTEND) of XAD[lwm:next) from
   1695		 * after-image of XADlist;
   1696		 * logredo() resets (XAD_NEW|XAD_EXTEND) flag when
   1697		 * applying the after-image to the meta-data page.
   1698		 */
   1699		lrd->type = cpu_to_le16(LOG_REDOPAGE);
   1700		PXDaddress(page_pxd, mp->index);
   1701		PXDlength(page_pxd,
   1702			  mp->logical_size >> tblk->sb->s_blocksize_bits);
   1703		lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, tlck));
   1704
   1705		/* format a maplock for txUpdateMap() to update bPMAP
   1706		 * for alloc of new/extended extents of XAD[lwm:next)
   1707		 * from the page itself;
   1708		 * txUpdateMap() resets (XAD_NEW|XAD_EXTEND) flag.
   1709		 */
   1710		lwm = xtlck->lwm.offset;
   1711		if (lwm == 0)
   1712			lwm = XTPAGEMAXSLOT;
   1713
   1714		if (lwm == next)
   1715			goto out;
   1716		if (lwm > next) {
   1717			jfs_err("xtLog: lwm > next");
   1718			goto out;
   1719		}
   1720		tlck->flag |= tlckUPDATEMAP;
   1721		xadlock->flag = mlckALLOCXADLIST;
   1722		xadlock->count = next - lwm;
   1723		if ((xadlock->count <= 4) && (tblk->xflag & COMMIT_LAZY)) {
   1724			int i;
   1725			pxd_t *pxd;
   1726			/*
   1727			 * Lazy commit may allow xtree to be modified before
   1728			 * txUpdateMap runs.  Copy xad into linelock to
   1729			 * preserve correct data.
   1730			 *
   1731			 * We can fit twice as may pxd's as xads in the lock
   1732			 */
   1733			xadlock->flag = mlckALLOCPXDLIST;
   1734			pxd = xadlock->xdlist = &xtlck->pxdlock;
   1735			for (i = 0; i < xadlock->count; i++) {
   1736				PXDaddress(pxd, addressXAD(&p->xad[lwm + i]));
   1737				PXDlength(pxd, lengthXAD(&p->xad[lwm + i]));
   1738				p->xad[lwm + i].flag &=
   1739				    ~(XAD_NEW | XAD_EXTENDED);
   1740				pxd++;
   1741			}
   1742		} else {
   1743			/*
   1744			 * xdlist will point to into inode's xtree, ensure
   1745			 * that transaction is not committed lazily.
   1746			 */
   1747			xadlock->flag = mlckALLOCXADLIST;
   1748			xadlock->xdlist = &p->xad[lwm];
   1749			tblk->xflag &= ~COMMIT_LAZY;
   1750		}
   1751		jfs_info("xtLog: alloc ip:0x%p mp:0x%p tlck:0x%p lwm:%d count:%d",
   1752			 tlck->ip, mp, tlck, lwm, xadlock->count);
   1753
   1754		maplock->index = 1;
   1755
   1756	      out:
   1757		/* mark page as homeward bound */
   1758		tlck->flag |= tlckWRITEPAGE;
   1759
   1760		return;
   1761	}
   1762
   1763	/*
   1764	 *	page deletion: file deletion/truncation (ref. xtTruncate())
   1765	 *
   1766	 * (page will be invalidated after log is written and bmap
   1767	 * is updated from the page);
   1768	 */
   1769	if (tlck->type & tlckFREE) {
   1770		/* LOG_NOREDOPAGE log for NoRedoPage filter:
   1771		 * if page free from file delete, NoRedoFile filter from
   1772		 * inode image of zero link count will subsume NoRedoPage
   1773		 * filters for each page;
   1774		 * if page free from file truncattion, write NoRedoPage
   1775		 * filter;
   1776		 *
   1777		 * upadte of block allocation map for the page itself:
   1778		 * if page free from deletion and truncation, LOG_UPDATEMAP
   1779		 * log for the page itself is generated from processing
   1780		 * its parent page xad entries;
   1781		 */
   1782		/* if page free from file truncation, log LOG_NOREDOPAGE
   1783		 * of the deleted page for logredo() to start NoRedoPage
   1784		 * filter for the page;
   1785		 */
   1786		if (tblk->xflag & COMMIT_TRUNCATE) {
   1787			/* write NOREDOPAGE for the page */
   1788			lrd->type = cpu_to_le16(LOG_NOREDOPAGE);
   1789			PXDaddress(page_pxd, mp->index);
   1790			PXDlength(page_pxd,
   1791				  mp->logical_size >> tblk->sb->
   1792				  s_blocksize_bits);
   1793			lrd->backchain =
   1794			    cpu_to_le32(lmLog(log, tblk, lrd, NULL));
   1795
   1796			if (tlck->type & tlckBTROOT) {
   1797				/* Empty xtree must be logged */
   1798				lrd->type = cpu_to_le16(LOG_REDOPAGE);
   1799				lrd->backchain =
   1800				    cpu_to_le32(lmLog(log, tblk, lrd, tlck));
   1801			}
   1802		}
   1803
   1804		/* init LOG_UPDATEMAP of the freed extents
   1805		 * XAD[XTENTRYSTART:hwm) from the deleted page itself
   1806		 * for logredo() to update bmap;
   1807		 */
   1808		lrd->type = cpu_to_le16(LOG_UPDATEMAP);
   1809		lrd->log.updatemap.type = cpu_to_le16(LOG_FREEXADLIST);
   1810		xtlck = (struct xtlock *) & tlck->lock;
   1811		hwm = xtlck->hwm.offset;
   1812		lrd->log.updatemap.nxd =
   1813		    cpu_to_le16(hwm - XTENTRYSTART + 1);
   1814		/* reformat linelock for lmLog() */
   1815		xtlck->header.offset = XTENTRYSTART;
   1816		xtlck->header.length = hwm - XTENTRYSTART + 1;
   1817		xtlck->index = 1;
   1818		lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, tlck));
   1819
   1820		/* format a maplock for txUpdateMap() to update bmap
   1821		 * to free extents of XAD[XTENTRYSTART:hwm) from the
   1822		 * deleted page itself;
   1823		 */
   1824		tlck->flag |= tlckUPDATEMAP;
   1825		xadlock->count = hwm - XTENTRYSTART + 1;
   1826		if ((xadlock->count <= 4) && (tblk->xflag & COMMIT_LAZY)) {
   1827			int i;
   1828			pxd_t *pxd;
   1829			/*
   1830			 * Lazy commit may allow xtree to be modified before
   1831			 * txUpdateMap runs.  Copy xad into linelock to
   1832			 * preserve correct data.
   1833			 *
   1834			 * We can fit twice as may pxd's as xads in the lock
   1835			 */
   1836			xadlock->flag = mlckFREEPXDLIST;
   1837			pxd = xadlock->xdlist = &xtlck->pxdlock;
   1838			for (i = 0; i < xadlock->count; i++) {
   1839				PXDaddress(pxd,
   1840					addressXAD(&p->xad[XTENTRYSTART + i]));
   1841				PXDlength(pxd,
   1842					lengthXAD(&p->xad[XTENTRYSTART + i]));
   1843				pxd++;
   1844			}
   1845		} else {
   1846			/*
   1847			 * xdlist will point to into inode's xtree, ensure
   1848			 * that transaction is not committed lazily.
   1849			 */
   1850			xadlock->flag = mlckFREEXADLIST;
   1851			xadlock->xdlist = &p->xad[XTENTRYSTART];
   1852			tblk->xflag &= ~COMMIT_LAZY;
   1853		}
   1854		jfs_info("xtLog: free ip:0x%p mp:0x%p count:%d lwm:2",
   1855			 tlck->ip, mp, xadlock->count);
   1856
   1857		maplock->index = 1;
   1858
   1859		/* mark page as invalid */
   1860		if (((tblk->xflag & COMMIT_PWMAP) || S_ISDIR(ip->i_mode))
   1861		    && !(tlck->type & tlckBTROOT))
   1862			tlck->flag |= tlckFREEPAGE;
   1863		/*
   1864		   else (tblk->xflag & COMMIT_PMAP)
   1865		   ? release the page;
   1866		 */
   1867		return;
   1868	}
   1869
   1870	/*
   1871	 *	page/entry truncation: file truncation (ref. xtTruncate())
   1872	 *
   1873	 *	|----------+------+------+---------------|
   1874	 *		   |      |      |
   1875	 *		   |      |     hwm - hwm before truncation
   1876	 *		   |     next - truncation point
   1877	 *		  lwm - lwm before truncation
   1878	 * header ?
   1879	 */
   1880	if (tlck->type & tlckTRUNCATE) {
   1881		pxd_t pxd;	/* truncated extent of xad */
   1882		int twm;
   1883
   1884		/*
   1885		 * For truncation the entire linelock may be used, so it would
   1886		 * be difficult to store xad list in linelock itself.
   1887		 * Therefore, we'll just force transaction to be committed
   1888		 * synchronously, so that xtree pages won't be changed before
   1889		 * txUpdateMap runs.
   1890		 */
   1891		tblk->xflag &= ~COMMIT_LAZY;
   1892		lwm = xtlck->lwm.offset;
   1893		if (lwm == 0)
   1894			lwm = XTPAGEMAXSLOT;
   1895		hwm = xtlck->hwm.offset;
   1896		twm = xtlck->twm.offset;
   1897
   1898		/*
   1899		 *	write log records
   1900		 */
   1901		/* log after-image for logredo():
   1902		 *
   1903		 * logredo() will update bmap for alloc of new/extended
   1904		 * extents (XAD_NEW|XAD_EXTEND) of XAD[lwm:next) from
   1905		 * after-image of XADlist;
   1906		 * logredo() resets (XAD_NEW|XAD_EXTEND) flag when
   1907		 * applying the after-image to the meta-data page.
   1908		 */
   1909		lrd->type = cpu_to_le16(LOG_REDOPAGE);
   1910		PXDaddress(page_pxd, mp->index);
   1911		PXDlength(page_pxd,
   1912			  mp->logical_size >> tblk->sb->s_blocksize_bits);
   1913		lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, tlck));
   1914
   1915		/*
   1916		 * truncate entry XAD[twm == next - 1]:
   1917		 */
   1918		if (twm == next - 1) {
   1919			/* init LOG_UPDATEMAP for logredo() to update bmap for
   1920			 * free of truncated delta extent of the truncated
   1921			 * entry XAD[next - 1]:
   1922			 * (xtlck->pxdlock = truncated delta extent);
   1923			 */
   1924			pxdlock = (struct pxd_lock *) & xtlck->pxdlock;
   1925			/* assert(pxdlock->type & tlckTRUNCATE); */
   1926			lrd->type = cpu_to_le16(LOG_UPDATEMAP);
   1927			lrd->log.updatemap.type = cpu_to_le16(LOG_FREEPXD);
   1928			lrd->log.updatemap.nxd = cpu_to_le16(1);
   1929			lrd->log.updatemap.pxd = pxdlock->pxd;
   1930			pxd = pxdlock->pxd;	/* save to format maplock */
   1931			lrd->backchain =
   1932			    cpu_to_le32(lmLog(log, tblk, lrd, NULL));
   1933		}
   1934
   1935		/*
   1936		 * free entries XAD[next:hwm]:
   1937		 */
   1938		if (hwm >= next) {
   1939			/* init LOG_UPDATEMAP of the freed extents
   1940			 * XAD[next:hwm] from the deleted page itself
   1941			 * for logredo() to update bmap;
   1942			 */
   1943			lrd->type = cpu_to_le16(LOG_UPDATEMAP);
   1944			lrd->log.updatemap.type =
   1945			    cpu_to_le16(LOG_FREEXADLIST);
   1946			xtlck = (struct xtlock *) & tlck->lock;
   1947			hwm = xtlck->hwm.offset;
   1948			lrd->log.updatemap.nxd =
   1949			    cpu_to_le16(hwm - next + 1);
   1950			/* reformat linelock for lmLog() */
   1951			xtlck->header.offset = next;
   1952			xtlck->header.length = hwm - next + 1;
   1953			xtlck->index = 1;
   1954			lrd->backchain =
   1955			    cpu_to_le32(lmLog(log, tblk, lrd, tlck));
   1956		}
   1957
   1958		/*
   1959		 *	format maplock(s) for txUpdateMap() to update bmap
   1960		 */
   1961		maplock->index = 0;
   1962
   1963		/*
   1964		 * allocate entries XAD[lwm:next):
   1965		 */
   1966		if (lwm < next) {
   1967			/* format a maplock for txUpdateMap() to update bPMAP
   1968			 * for alloc of new/extended extents of XAD[lwm:next)
   1969			 * from the page itself;
   1970			 * txUpdateMap() resets (XAD_NEW|XAD_EXTEND) flag.
   1971			 */
   1972			tlck->flag |= tlckUPDATEMAP;
   1973			xadlock->flag = mlckALLOCXADLIST;
   1974			xadlock->count = next - lwm;
   1975			xadlock->xdlist = &p->xad[lwm];
   1976
   1977			jfs_info("xtLog: alloc ip:0x%p mp:0x%p count:%d lwm:%d next:%d",
   1978				 tlck->ip, mp, xadlock->count, lwm, next);
   1979			maplock->index++;
   1980			xadlock++;
   1981		}
   1982
   1983		/*
   1984		 * truncate entry XAD[twm == next - 1]:
   1985		 */
   1986		if (twm == next - 1) {
   1987			/* format a maplock for txUpdateMap() to update bmap
   1988			 * to free truncated delta extent of the truncated
   1989			 * entry XAD[next - 1];
   1990			 * (xtlck->pxdlock = truncated delta extent);
   1991			 */
   1992			tlck->flag |= tlckUPDATEMAP;
   1993			pxdlock = (struct pxd_lock *) xadlock;
   1994			pxdlock->flag = mlckFREEPXD;
   1995			pxdlock->count = 1;
   1996			pxdlock->pxd = pxd;
   1997
   1998			jfs_info("xtLog: truncate ip:0x%p mp:0x%p count:%d hwm:%d",
   1999				 ip, mp, pxdlock->count, hwm);
   2000			maplock->index++;
   2001			xadlock++;
   2002		}
   2003
   2004		/*
   2005		 * free entries XAD[next:hwm]:
   2006		 */
   2007		if (hwm >= next) {
   2008			/* format a maplock for txUpdateMap() to update bmap
   2009			 * to free extents of XAD[next:hwm] from thedeleted
   2010			 * page itself;
   2011			 */
   2012			tlck->flag |= tlckUPDATEMAP;
   2013			xadlock->flag = mlckFREEXADLIST;
   2014			xadlock->count = hwm - next + 1;
   2015			xadlock->xdlist = &p->xad[next];
   2016
   2017			jfs_info("xtLog: free ip:0x%p mp:0x%p count:%d next:%d hwm:%d",
   2018				 tlck->ip, mp, xadlock->count, next, hwm);
   2019			maplock->index++;
   2020		}
   2021
   2022		/* mark page as homeward bound */
   2023		tlck->flag |= tlckWRITEPAGE;
   2024	}
   2025	return;
   2026}
   2027
   2028/*
   2029 *	mapLog()
   2030 *
   2031 * function:	log from maplock of freed data extents;
   2032 */
   2033static void mapLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
   2034		   struct tlock * tlck)
   2035{
   2036	struct pxd_lock *pxdlock;
   2037	int i, nlock;
   2038	pxd_t *pxd;
   2039
   2040	/*
   2041	 *	page relocation: free the source page extent
   2042	 *
   2043	 * a maplock for txUpdateMap() for free of the page
   2044	 * has been formatted at txLock() time saving the src
   2045	 * relocated page address;
   2046	 */
   2047	if (tlck->type & tlckRELOCATE) {
   2048		/* log LOG_NOREDOPAGE of the old relocated page
   2049		 * for logredo() to start NoRedoPage filter;
   2050		 */
   2051		lrd->type = cpu_to_le16(LOG_NOREDOPAGE);
   2052		pxdlock = (struct pxd_lock *) & tlck->lock;
   2053		pxd = &lrd->log.redopage.pxd;
   2054		*pxd = pxdlock->pxd;
   2055		lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, NULL));
   2056
   2057		/* (N.B. currently, logredo() does NOT update bmap
   2058		 * for free of the page itself for (LOG_XTREE|LOG_NOREDOPAGE);
   2059		 * if page free from relocation, LOG_UPDATEMAP log is
   2060		 * specifically generated now for logredo()
   2061		 * to update bmap for free of src relocated page;
   2062		 * (new flag LOG_RELOCATE may be introduced which will
   2063		 * inform logredo() to start NORedoPage filter and also
   2064		 * update block allocation map at the same time, thus
   2065		 * avoiding an extra log write);
   2066		 */
   2067		lrd->type = cpu_to_le16(LOG_UPDATEMAP);
   2068		lrd->log.updatemap.type = cpu_to_le16(LOG_FREEPXD);
   2069		lrd->log.updatemap.nxd = cpu_to_le16(1);
   2070		lrd->log.updatemap.pxd = pxdlock->pxd;
   2071		lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, NULL));
   2072
   2073		/* a maplock for txUpdateMap() for free of the page
   2074		 * has been formatted at txLock() time;
   2075		 */
   2076		tlck->flag |= tlckUPDATEMAP;
   2077		return;
   2078	}
   2079	/*
   2080
   2081	 * Otherwise it's not a relocate request
   2082	 *
   2083	 */
   2084	else {
   2085		/* log LOG_UPDATEMAP for logredo() to update bmap for
   2086		 * free of truncated/relocated delta extent of the data;
   2087		 * e.g.: external EA extent, relocated/truncated extent
   2088		 * from xtTailgate();
   2089		 */
   2090		lrd->type = cpu_to_le16(LOG_UPDATEMAP);
   2091		pxdlock = (struct pxd_lock *) & tlck->lock;
   2092		nlock = pxdlock->index;
   2093		for (i = 0; i < nlock; i++, pxdlock++) {
   2094			if (pxdlock->flag & mlckALLOCPXD)
   2095				lrd->log.updatemap.type =
   2096				    cpu_to_le16(LOG_ALLOCPXD);
   2097			else
   2098				lrd->log.updatemap.type =
   2099				    cpu_to_le16(LOG_FREEPXD);
   2100			lrd->log.updatemap.nxd = cpu_to_le16(1);
   2101			lrd->log.updatemap.pxd = pxdlock->pxd;
   2102			lrd->backchain =
   2103			    cpu_to_le32(lmLog(log, tblk, lrd, NULL));
   2104			jfs_info("mapLog: xaddr:0x%lx xlen:0x%x",
   2105				 (ulong) addressPXD(&pxdlock->pxd),
   2106				 lengthPXD(&pxdlock->pxd));
   2107		}
   2108
   2109		/* update bmap */
   2110		tlck->flag |= tlckUPDATEMAP;
   2111	}
   2112}
   2113
   2114/*
   2115 *	txEA()
   2116 *
   2117 * function:	acquire maplock for EA/ACL extents or
   2118 *		set COMMIT_INLINE flag;
   2119 */
   2120void txEA(tid_t tid, struct inode *ip, dxd_t * oldea, dxd_t * newea)
   2121{
   2122	struct tlock *tlck = NULL;
   2123	struct pxd_lock *maplock = NULL, *pxdlock = NULL;
   2124
   2125	/*
   2126	 * format maplock for alloc of new EA extent
   2127	 */
   2128	if (newea) {
   2129		/* Since the newea could be a completely zeroed entry we need to
   2130		 * check for the two flags which indicate we should actually
   2131		 * commit new EA data
   2132		 */
   2133		if (newea->flag & DXD_EXTENT) {
   2134			tlck = txMaplock(tid, ip, tlckMAP);
   2135			maplock = (struct pxd_lock *) & tlck->lock;
   2136			pxdlock = (struct pxd_lock *) maplock;
   2137			pxdlock->flag = mlckALLOCPXD;
   2138			PXDaddress(&pxdlock->pxd, addressDXD(newea));
   2139			PXDlength(&pxdlock->pxd, lengthDXD(newea));
   2140			pxdlock++;
   2141			maplock->index = 1;
   2142		} else if (newea->flag & DXD_INLINE) {
   2143			tlck = NULL;
   2144
   2145			set_cflag(COMMIT_Inlineea, ip);
   2146		}
   2147	}
   2148
   2149	/*
   2150	 * format maplock for free of old EA extent
   2151	 */
   2152	if (!test_cflag(COMMIT_Nolink, ip) && oldea->flag & DXD_EXTENT) {
   2153		if (tlck == NULL) {
   2154			tlck = txMaplock(tid, ip, tlckMAP);
   2155			maplock = (struct pxd_lock *) & tlck->lock;
   2156			pxdlock = (struct pxd_lock *) maplock;
   2157			maplock->index = 0;
   2158		}
   2159		pxdlock->flag = mlckFREEPXD;
   2160		PXDaddress(&pxdlock->pxd, addressDXD(oldea));
   2161		PXDlength(&pxdlock->pxd, lengthDXD(oldea));
   2162		maplock->index++;
   2163	}
   2164}
   2165
   2166/*
   2167 *	txForce()
   2168 *
   2169 * function: synchronously write pages locked by transaction
   2170 *	     after txLog() but before txUpdateMap();
   2171 */
   2172static void txForce(struct tblock * tblk)
   2173{
   2174	struct tlock *tlck;
   2175	lid_t lid, next;
   2176	struct metapage *mp;
   2177
   2178	/*
   2179	 * reverse the order of transaction tlocks in
   2180	 * careful update order of address index pages
   2181	 * (right to left, bottom up)
   2182	 */
   2183	tlck = lid_to_tlock(tblk->next);
   2184	lid = tlck->next;
   2185	tlck->next = 0;
   2186	while (lid) {
   2187		tlck = lid_to_tlock(lid);
   2188		next = tlck->next;
   2189		tlck->next = tblk->next;
   2190		tblk->next = lid;
   2191		lid = next;
   2192	}
   2193
   2194	/*
   2195	 * synchronously write the page, and
   2196	 * hold the page for txUpdateMap();
   2197	 */
   2198	for (lid = tblk->next; lid; lid = next) {
   2199		tlck = lid_to_tlock(lid);
   2200		next = tlck->next;
   2201
   2202		if ((mp = tlck->mp) != NULL &&
   2203		    (tlck->type & tlckBTROOT) == 0) {
   2204			assert(mp->xflag & COMMIT_PAGE);
   2205
   2206			if (tlck->flag & tlckWRITEPAGE) {
   2207				tlck->flag &= ~tlckWRITEPAGE;
   2208
   2209				/* do not release page to freelist */
   2210				force_metapage(mp);
   2211#if 0
   2212				/*
   2213				 * The "right" thing to do here is to
   2214				 * synchronously write the metadata.
   2215				 * With the current implementation this
   2216				 * is hard since write_metapage requires
   2217				 * us to kunmap & remap the page.  If we
   2218				 * have tlocks pointing into the metadata
   2219				 * pages, we don't want to do this.  I think
   2220				 * we can get by with synchronously writing
   2221				 * the pages when they are released.
   2222				 */
   2223				assert(mp->nohomeok);
   2224				set_bit(META_dirty, &mp->flag);
   2225				set_bit(META_sync, &mp->flag);
   2226#endif
   2227			}
   2228		}
   2229	}
   2230}
   2231
   2232/*
   2233 *	txUpdateMap()
   2234 *
   2235 * function:	update persistent allocation map (and working map
   2236 *		if appropriate);
   2237 *
   2238 * parameter:
   2239 */
   2240static void txUpdateMap(struct tblock * tblk)
   2241{
   2242	struct inode *ip;
   2243	struct inode *ipimap;
   2244	lid_t lid;
   2245	struct tlock *tlck;
   2246	struct maplock *maplock;
   2247	struct pxd_lock pxdlock;
   2248	int maptype;
   2249	int k, nlock;
   2250	struct metapage *mp = NULL;
   2251
   2252	ipimap = JFS_SBI(tblk->sb)->ipimap;
   2253
   2254	maptype = (tblk->xflag & COMMIT_PMAP) ? COMMIT_PMAP : COMMIT_PWMAP;
   2255
   2256
   2257	/*
   2258	 *	update block allocation map
   2259	 *
   2260	 * update allocation state in pmap (and wmap) and
   2261	 * update lsn of the pmap page;
   2262	 */
   2263	/*
   2264	 * scan each tlock/page of transaction for block allocation/free:
   2265	 *
   2266	 * for each tlock/page of transaction, update map.
   2267	 *  ? are there tlock for pmap and pwmap at the same time ?
   2268	 */
   2269	for (lid = tblk->next; lid; lid = tlck->next) {
   2270		tlck = lid_to_tlock(lid);
   2271
   2272		if ((tlck->flag & tlckUPDATEMAP) == 0)
   2273			continue;
   2274
   2275		if (tlck->flag & tlckFREEPAGE) {
   2276			/*
   2277			 * Another thread may attempt to reuse freed space
   2278			 * immediately, so we want to get rid of the metapage
   2279			 * before anyone else has a chance to get it.
   2280			 * Lock metapage, update maps, then invalidate
   2281			 * the metapage.
   2282			 */
   2283			mp = tlck->mp;
   2284			ASSERT(mp->xflag & COMMIT_PAGE);
   2285			grab_metapage(mp);
   2286		}
   2287
   2288		/*
   2289		 * extent list:
   2290		 * . in-line PXD list:
   2291		 * . out-of-line XAD list:
   2292		 */
   2293		maplock = (struct maplock *) & tlck->lock;
   2294		nlock = maplock->index;
   2295
   2296		for (k = 0; k < nlock; k++, maplock++) {
   2297			/*
   2298			 * allocate blocks in persistent map:
   2299			 *
   2300			 * blocks have been allocated from wmap at alloc time;
   2301			 */
   2302			if (maplock->flag & mlckALLOC) {
   2303				txAllocPMap(ipimap, maplock, tblk);
   2304			}
   2305			/*
   2306			 * free blocks in persistent and working map:
   2307			 * blocks will be freed in pmap and then in wmap;
   2308			 *
   2309			 * ? tblock specifies the PMAP/PWMAP based upon
   2310			 * transaction
   2311			 *
   2312			 * free blocks in persistent map:
   2313			 * blocks will be freed from wmap at last reference
   2314			 * release of the object for regular files;
   2315			 *
   2316			 * Alway free blocks from both persistent & working
   2317			 * maps for directories
   2318			 */
   2319			else {	/* (maplock->flag & mlckFREE) */
   2320
   2321				if (tlck->flag & tlckDIRECTORY)
   2322					txFreeMap(ipimap, maplock,
   2323						  tblk, COMMIT_PWMAP);
   2324				else
   2325					txFreeMap(ipimap, maplock,
   2326						  tblk, maptype);
   2327			}
   2328		}
   2329		if (tlck->flag & tlckFREEPAGE) {
   2330			if (!(tblk->flag & tblkGC_LAZY)) {
   2331				/* This is equivalent to txRelease */
   2332				ASSERT(mp->lid == lid);
   2333				tlck->mp->lid = 0;
   2334			}
   2335			assert(mp->nohomeok == 1);
   2336			metapage_homeok(mp);
   2337			discard_metapage(mp);
   2338			tlck->mp = NULL;
   2339		}
   2340	}
   2341	/*
   2342	 *	update inode allocation map
   2343	 *
   2344	 * update allocation state in pmap and
   2345	 * update lsn of the pmap page;
   2346	 * update in-memory inode flag/state
   2347	 *
   2348	 * unlock mapper/write lock
   2349	 */
   2350	if (tblk->xflag & COMMIT_CREATE) {
   2351		diUpdatePMap(ipimap, tblk->ino, false, tblk);
   2352		/* update persistent block allocation map
   2353		 * for the allocation of inode extent;
   2354		 */
   2355		pxdlock.flag = mlckALLOCPXD;
   2356		pxdlock.pxd = tblk->u.ixpxd;
   2357		pxdlock.index = 1;
   2358		txAllocPMap(ipimap, (struct maplock *) & pxdlock, tblk);
   2359	} else if (tblk->xflag & COMMIT_DELETE) {
   2360		ip = tblk->u.ip;
   2361		diUpdatePMap(ipimap, ip->i_ino, true, tblk);
   2362		iput(ip);
   2363	}
   2364}
   2365
   2366/*
   2367 *	txAllocPMap()
   2368 *
   2369 * function: allocate from persistent map;
   2370 *
   2371 * parameter:
   2372 *	ipbmap	-
   2373 *	malock	-
   2374 *		xad list:
   2375 *		pxd:
   2376 *
   2377 *	maptype -
   2378 *		allocate from persistent map;
   2379 *		free from persistent map;
   2380 *		(e.g., tmp file - free from working map at releae
   2381 *		 of last reference);
   2382 *		free from persistent and working map;
   2383 *
   2384 *	lsn	- log sequence number;
   2385 */
   2386static void txAllocPMap(struct inode *ip, struct maplock * maplock,
   2387			struct tblock * tblk)
   2388{
   2389	struct inode *ipbmap = JFS_SBI(ip->i_sb)->ipbmap;
   2390	struct xdlistlock *xadlistlock;
   2391	xad_t *xad;
   2392	s64 xaddr;
   2393	int xlen;
   2394	struct pxd_lock *pxdlock;
   2395	struct xdlistlock *pxdlistlock;
   2396	pxd_t *pxd;
   2397	int n;
   2398
   2399	/*
   2400	 * allocate from persistent map;
   2401	 */
   2402	if (maplock->flag & mlckALLOCXADLIST) {
   2403		xadlistlock = (struct xdlistlock *) maplock;
   2404		xad = xadlistlock->xdlist;
   2405		for (n = 0; n < xadlistlock->count; n++, xad++) {
   2406			if (xad->flag & (XAD_NEW | XAD_EXTENDED)) {
   2407				xaddr = addressXAD(xad);
   2408				xlen = lengthXAD(xad);
   2409				dbUpdatePMap(ipbmap, false, xaddr,
   2410					     (s64) xlen, tblk);
   2411				xad->flag &= ~(XAD_NEW | XAD_EXTENDED);
   2412				jfs_info("allocPMap: xaddr:0x%lx xlen:%d",
   2413					 (ulong) xaddr, xlen);
   2414			}
   2415		}
   2416	} else if (maplock->flag & mlckALLOCPXD) {
   2417		pxdlock = (struct pxd_lock *) maplock;
   2418		xaddr = addressPXD(&pxdlock->pxd);
   2419		xlen = lengthPXD(&pxdlock->pxd);
   2420		dbUpdatePMap(ipbmap, false, xaddr, (s64) xlen, tblk);
   2421		jfs_info("allocPMap: xaddr:0x%lx xlen:%d", (ulong) xaddr, xlen);
   2422	} else {		/* (maplock->flag & mlckALLOCPXDLIST) */
   2423
   2424		pxdlistlock = (struct xdlistlock *) maplock;
   2425		pxd = pxdlistlock->xdlist;
   2426		for (n = 0; n < pxdlistlock->count; n++, pxd++) {
   2427			xaddr = addressPXD(pxd);
   2428			xlen = lengthPXD(pxd);
   2429			dbUpdatePMap(ipbmap, false, xaddr, (s64) xlen,
   2430				     tblk);
   2431			jfs_info("allocPMap: xaddr:0x%lx xlen:%d",
   2432				 (ulong) xaddr, xlen);
   2433		}
   2434	}
   2435}
   2436
   2437/*
   2438 *	txFreeMap()
   2439 *
   2440 * function:	free from persistent and/or working map;
   2441 *
   2442 * todo: optimization
   2443 */
   2444void txFreeMap(struct inode *ip,
   2445	       struct maplock * maplock, struct tblock * tblk, int maptype)
   2446{
   2447	struct inode *ipbmap = JFS_SBI(ip->i_sb)->ipbmap;
   2448	struct xdlistlock *xadlistlock;
   2449	xad_t *xad;
   2450	s64 xaddr;
   2451	int xlen;
   2452	struct pxd_lock *pxdlock;
   2453	struct xdlistlock *pxdlistlock;
   2454	pxd_t *pxd;
   2455	int n;
   2456
   2457	jfs_info("txFreeMap: tblk:0x%p maplock:0x%p maptype:0x%x",
   2458		 tblk, maplock, maptype);
   2459
   2460	/*
   2461	 * free from persistent map;
   2462	 */
   2463	if (maptype == COMMIT_PMAP || maptype == COMMIT_PWMAP) {
   2464		if (maplock->flag & mlckFREEXADLIST) {
   2465			xadlistlock = (struct xdlistlock *) maplock;
   2466			xad = xadlistlock->xdlist;
   2467			for (n = 0; n < xadlistlock->count; n++, xad++) {
   2468				if (!(xad->flag & XAD_NEW)) {
   2469					xaddr = addressXAD(xad);
   2470					xlen = lengthXAD(xad);
   2471					dbUpdatePMap(ipbmap, true, xaddr,
   2472						     (s64) xlen, tblk);
   2473					jfs_info("freePMap: xaddr:0x%lx xlen:%d",
   2474						 (ulong) xaddr, xlen);
   2475				}
   2476			}
   2477		} else if (maplock->flag & mlckFREEPXD) {
   2478			pxdlock = (struct pxd_lock *) maplock;
   2479			xaddr = addressPXD(&pxdlock->pxd);
   2480			xlen = lengthPXD(&pxdlock->pxd);
   2481			dbUpdatePMap(ipbmap, true, xaddr, (s64) xlen,
   2482				     tblk);
   2483			jfs_info("freePMap: xaddr:0x%lx xlen:%d",
   2484				 (ulong) xaddr, xlen);
   2485		} else {	/* (maplock->flag & mlckALLOCPXDLIST) */
   2486
   2487			pxdlistlock = (struct xdlistlock *) maplock;
   2488			pxd = pxdlistlock->xdlist;
   2489			for (n = 0; n < pxdlistlock->count; n++, pxd++) {
   2490				xaddr = addressPXD(pxd);
   2491				xlen = lengthPXD(pxd);
   2492				dbUpdatePMap(ipbmap, true, xaddr,
   2493					     (s64) xlen, tblk);
   2494				jfs_info("freePMap: xaddr:0x%lx xlen:%d",
   2495					 (ulong) xaddr, xlen);
   2496			}
   2497		}
   2498	}
   2499
   2500	/*
   2501	 * free from working map;
   2502	 */
   2503	if (maptype == COMMIT_PWMAP || maptype == COMMIT_WMAP) {
   2504		if (maplock->flag & mlckFREEXADLIST) {
   2505			xadlistlock = (struct xdlistlock *) maplock;
   2506			xad = xadlistlock->xdlist;
   2507			for (n = 0; n < xadlistlock->count; n++, xad++) {
   2508				xaddr = addressXAD(xad);
   2509				xlen = lengthXAD(xad);
   2510				dbFree(ip, xaddr, (s64) xlen);
   2511				xad->flag = 0;
   2512				jfs_info("freeWMap: xaddr:0x%lx xlen:%d",
   2513					 (ulong) xaddr, xlen);
   2514			}
   2515		} else if (maplock->flag & mlckFREEPXD) {
   2516			pxdlock = (struct pxd_lock *) maplock;
   2517			xaddr = addressPXD(&pxdlock->pxd);
   2518			xlen = lengthPXD(&pxdlock->pxd);
   2519			dbFree(ip, xaddr, (s64) xlen);
   2520			jfs_info("freeWMap: xaddr:0x%lx xlen:%d",
   2521				 (ulong) xaddr, xlen);
   2522		} else {	/* (maplock->flag & mlckFREEPXDLIST) */
   2523
   2524			pxdlistlock = (struct xdlistlock *) maplock;
   2525			pxd = pxdlistlock->xdlist;
   2526			for (n = 0; n < pxdlistlock->count; n++, pxd++) {
   2527				xaddr = addressPXD(pxd);
   2528				xlen = lengthPXD(pxd);
   2529				dbFree(ip, xaddr, (s64) xlen);
   2530				jfs_info("freeWMap: xaddr:0x%lx xlen:%d",
   2531					 (ulong) xaddr, xlen);
   2532			}
   2533		}
   2534	}
   2535}
   2536
   2537/*
   2538 *	txFreelock()
   2539 *
   2540 * function:	remove tlock from inode anonymous locklist
   2541 */
   2542void txFreelock(struct inode *ip)
   2543{
   2544	struct jfs_inode_info *jfs_ip = JFS_IP(ip);
   2545	struct tlock *xtlck, *tlck;
   2546	lid_t xlid = 0, lid;
   2547
   2548	if (!jfs_ip->atlhead)
   2549		return;
   2550
   2551	TXN_LOCK();
   2552	xtlck = (struct tlock *) &jfs_ip->atlhead;
   2553
   2554	while ((lid = xtlck->next) != 0) {
   2555		tlck = lid_to_tlock(lid);
   2556		if (tlck->flag & tlckFREELOCK) {
   2557			xtlck->next = tlck->next;
   2558			txLockFree(lid);
   2559		} else {
   2560			xtlck = tlck;
   2561			xlid = lid;
   2562		}
   2563	}
   2564
   2565	if (jfs_ip->atlhead)
   2566		jfs_ip->atltail = xlid;
   2567	else {
   2568		jfs_ip->atltail = 0;
   2569		/*
   2570		 * If inode was on anon_list, remove it
   2571		 */
   2572		list_del_init(&jfs_ip->anon_inode_list);
   2573	}
   2574	TXN_UNLOCK();
   2575}
   2576
   2577/*
   2578 *	txAbort()
   2579 *
   2580 * function: abort tx before commit;
   2581 *
   2582 * frees line-locks and segment locks for all
   2583 * segments in comdata structure.
   2584 * Optionally sets state of file-system to FM_DIRTY in super-block.
   2585 * log age of page-frames in memory for which caller has
   2586 * are reset to 0 (to avoid logwarap).
   2587 */
   2588void txAbort(tid_t tid, int dirty)
   2589{
   2590	lid_t lid, next;
   2591	struct metapage *mp;
   2592	struct tblock *tblk = tid_to_tblock(tid);
   2593	struct tlock *tlck;
   2594
   2595	/*
   2596	 * free tlocks of the transaction
   2597	 */
   2598	for (lid = tblk->next; lid; lid = next) {
   2599		tlck = lid_to_tlock(lid);
   2600		next = tlck->next;
   2601		mp = tlck->mp;
   2602		JFS_IP(tlck->ip)->xtlid = 0;
   2603
   2604		if (mp) {
   2605			mp->lid = 0;
   2606
   2607			/*
   2608			 * reset lsn of page to avoid logwarap:
   2609			 *
   2610			 * (page may have been previously committed by another
   2611			 * transaction(s) but has not been paged, i.e.,
   2612			 * it may be on logsync list even though it has not
   2613			 * been logged for the current tx.)
   2614			 */
   2615			if (mp->xflag & COMMIT_PAGE && mp->lsn)
   2616				LogSyncRelease(mp);
   2617		}
   2618		/* insert tlock at head of freelist */
   2619		TXN_LOCK();
   2620		txLockFree(lid);
   2621		TXN_UNLOCK();
   2622	}
   2623
   2624	/* caller will free the transaction block */
   2625
   2626	tblk->next = tblk->last = 0;
   2627
   2628	/*
   2629	 * mark filesystem dirty
   2630	 */
   2631	if (dirty)
   2632		jfs_error(tblk->sb, "\n");
   2633
   2634	return;
   2635}
   2636
   2637/*
   2638 *	txLazyCommit(void)
   2639 *
   2640 *	All transactions except those changing ipimap (COMMIT_FORCE) are
   2641 *	processed by this routine.  This insures that the inode and block
   2642 *	allocation maps are updated in order.  For synchronous transactions,
   2643 *	let the user thread finish processing after txUpdateMap() is called.
   2644 */
   2645static void txLazyCommit(struct tblock * tblk)
   2646{
   2647	struct jfs_log *log;
   2648
   2649	while (((tblk->flag & tblkGC_READY) == 0) &&
   2650	       ((tblk->flag & tblkGC_UNLOCKED) == 0)) {
   2651		/* We must have gotten ahead of the user thread
   2652		 */
   2653		jfs_info("jfs_lazycommit: tblk 0x%p not unlocked", tblk);
   2654		yield();
   2655	}
   2656
   2657	jfs_info("txLazyCommit: processing tblk 0x%p", tblk);
   2658
   2659	txUpdateMap(tblk);
   2660
   2661	log = (struct jfs_log *) JFS_SBI(tblk->sb)->log;
   2662
   2663	spin_lock_irq(&log->gclock);	// LOGGC_LOCK
   2664
   2665	tblk->flag |= tblkGC_COMMITTED;
   2666
   2667	if (tblk->flag & tblkGC_READY)
   2668		log->gcrtc--;
   2669
   2670	wake_up_all(&tblk->gcwait);	// LOGGC_WAKEUP
   2671
   2672	/*
   2673	 * Can't release log->gclock until we've tested tblk->flag
   2674	 */
   2675	if (tblk->flag & tblkGC_LAZY) {
   2676		spin_unlock_irq(&log->gclock);	// LOGGC_UNLOCK
   2677		txUnlock(tblk);
   2678		tblk->flag &= ~tblkGC_LAZY;
   2679		txEnd(tblk - TxBlock);	/* Convert back to tid */
   2680	} else
   2681		spin_unlock_irq(&log->gclock);	// LOGGC_UNLOCK
   2682
   2683	jfs_info("txLazyCommit: done: tblk = 0x%p", tblk);
   2684}
   2685
   2686/*
   2687 *	jfs_lazycommit(void)
   2688 *
   2689 *	To be run as a kernel daemon.  If lbmIODone is called in an interrupt
   2690 *	context, or where blocking is not wanted, this routine will process
   2691 *	committed transactions from the unlock queue.
   2692 */
   2693int jfs_lazycommit(void *arg)
   2694{
   2695	int WorkDone;
   2696	struct tblock *tblk;
   2697	unsigned long flags;
   2698	struct jfs_sb_info *sbi;
   2699
   2700	do {
   2701		LAZY_LOCK(flags);
   2702		jfs_commit_thread_waking = 0;	/* OK to wake another thread */
   2703		while (!list_empty(&TxAnchor.unlock_queue)) {
   2704			WorkDone = 0;
   2705			list_for_each_entry(tblk, &TxAnchor.unlock_queue,
   2706					    cqueue) {
   2707
   2708				sbi = JFS_SBI(tblk->sb);
   2709				/*
   2710				 * For each volume, the transactions must be
   2711				 * handled in order.  If another commit thread
   2712				 * is handling a tblk for this superblock,
   2713				 * skip it
   2714				 */
   2715				if (sbi->commit_state & IN_LAZYCOMMIT)
   2716					continue;
   2717
   2718				sbi->commit_state |= IN_LAZYCOMMIT;
   2719				WorkDone = 1;
   2720
   2721				/*
   2722				 * Remove transaction from queue
   2723				 */
   2724				list_del(&tblk->cqueue);
   2725
   2726				LAZY_UNLOCK(flags);
   2727				txLazyCommit(tblk);
   2728				LAZY_LOCK(flags);
   2729
   2730				sbi->commit_state &= ~IN_LAZYCOMMIT;
   2731				/*
   2732				 * Don't continue in the for loop.  (We can't
   2733				 * anyway, it's unsafe!)  We want to go back to
   2734				 * the beginning of the list.
   2735				 */
   2736				break;
   2737			}
   2738
   2739			/* If there was nothing to do, don't continue */
   2740			if (!WorkDone)
   2741				break;
   2742		}
   2743		/* In case a wakeup came while all threads were active */
   2744		jfs_commit_thread_waking = 0;
   2745
   2746		if (freezing(current)) {
   2747			LAZY_UNLOCK(flags);
   2748			try_to_freeze();
   2749		} else {
   2750			DECLARE_WAITQUEUE(wq, current);
   2751
   2752			add_wait_queue(&jfs_commit_thread_wait, &wq);
   2753			set_current_state(TASK_INTERRUPTIBLE);
   2754			LAZY_UNLOCK(flags);
   2755			schedule();
   2756			remove_wait_queue(&jfs_commit_thread_wait, &wq);
   2757		}
   2758	} while (!kthread_should_stop());
   2759
   2760	if (!list_empty(&TxAnchor.unlock_queue))
   2761		jfs_err("jfs_lazycommit being killed w/pending transactions!");
   2762	else
   2763		jfs_info("jfs_lazycommit being killed");
   2764	return 0;
   2765}
   2766
   2767void txLazyUnlock(struct tblock * tblk)
   2768{
   2769	unsigned long flags;
   2770
   2771	LAZY_LOCK(flags);
   2772
   2773	list_add_tail(&tblk->cqueue, &TxAnchor.unlock_queue);
   2774	/*
   2775	 * Don't wake up a commit thread if there is already one servicing
   2776	 * this superblock, or if the last one we woke up hasn't started yet.
   2777	 */
   2778	if (!(JFS_SBI(tblk->sb)->commit_state & IN_LAZYCOMMIT) &&
   2779	    !jfs_commit_thread_waking) {
   2780		jfs_commit_thread_waking = 1;
   2781		wake_up(&jfs_commit_thread_wait);
   2782	}
   2783	LAZY_UNLOCK(flags);
   2784}
   2785
   2786static void LogSyncRelease(struct metapage * mp)
   2787{
   2788	struct jfs_log *log = mp->log;
   2789
   2790	assert(mp->nohomeok);
   2791	assert(log);
   2792	metapage_homeok(mp);
   2793}
   2794
   2795/*
   2796 *	txQuiesce
   2797 *
   2798 *	Block all new transactions and push anonymous transactions to
   2799 *	completion
   2800 *
   2801 *	This does almost the same thing as jfs_sync below.  We don't
   2802 *	worry about deadlocking when jfs_tlocks_low is set, since we would
   2803 *	expect jfs_sync to get us out of that jam.
   2804 */
   2805void txQuiesce(struct super_block *sb)
   2806{
   2807	struct inode *ip;
   2808	struct jfs_inode_info *jfs_ip;
   2809	struct jfs_log *log = JFS_SBI(sb)->log;
   2810	tid_t tid;
   2811
   2812	set_bit(log_QUIESCE, &log->flag);
   2813
   2814	TXN_LOCK();
   2815restart:
   2816	while (!list_empty(&TxAnchor.anon_list)) {
   2817		jfs_ip = list_entry(TxAnchor.anon_list.next,
   2818				    struct jfs_inode_info,
   2819				    anon_inode_list);
   2820		ip = &jfs_ip->vfs_inode;
   2821
   2822		/*
   2823		 * inode will be removed from anonymous list
   2824		 * when it is committed
   2825		 */
   2826		TXN_UNLOCK();
   2827		tid = txBegin(ip->i_sb, COMMIT_INODE | COMMIT_FORCE);
   2828		mutex_lock(&jfs_ip->commit_mutex);
   2829		txCommit(tid, 1, &ip, 0);
   2830		txEnd(tid);
   2831		mutex_unlock(&jfs_ip->commit_mutex);
   2832		/*
   2833		 * Just to be safe.  I don't know how
   2834		 * long we can run without blocking
   2835		 */
   2836		cond_resched();
   2837		TXN_LOCK();
   2838	}
   2839
   2840	/*
   2841	 * If jfs_sync is running in parallel, there could be some inodes
   2842	 * on anon_list2.  Let's check.
   2843	 */
   2844	if (!list_empty(&TxAnchor.anon_list2)) {
   2845		list_splice_init(&TxAnchor.anon_list2, &TxAnchor.anon_list);
   2846		goto restart;
   2847	}
   2848	TXN_UNLOCK();
   2849
   2850	/*
   2851	 * We may need to kick off the group commit
   2852	 */
   2853	jfs_flush_journal(log, 0);
   2854}
   2855
   2856/*
   2857 * txResume()
   2858 *
   2859 * Allows transactions to start again following txQuiesce
   2860 */
   2861void txResume(struct super_block *sb)
   2862{
   2863	struct jfs_log *log = JFS_SBI(sb)->log;
   2864
   2865	clear_bit(log_QUIESCE, &log->flag);
   2866	TXN_WAKEUP(&log->syncwait);
   2867}
   2868
   2869/*
   2870 *	jfs_sync(void)
   2871 *
   2872 *	To be run as a kernel daemon.  This is awakened when tlocks run low.
   2873 *	We write any inodes that have anonymous tlocks so they will become
   2874 *	available.
   2875 */
   2876int jfs_sync(void *arg)
   2877{
   2878	struct inode *ip;
   2879	struct jfs_inode_info *jfs_ip;
   2880	tid_t tid;
   2881
   2882	do {
   2883		/*
   2884		 * write each inode on the anonymous inode list
   2885		 */
   2886		TXN_LOCK();
   2887		while (jfs_tlocks_low && !list_empty(&TxAnchor.anon_list)) {
   2888			jfs_ip = list_entry(TxAnchor.anon_list.next,
   2889					    struct jfs_inode_info,
   2890					    anon_inode_list);
   2891			ip = &jfs_ip->vfs_inode;
   2892
   2893			if (! igrab(ip)) {
   2894				/*
   2895				 * Inode is being freed
   2896				 */
   2897				list_del_init(&jfs_ip->anon_inode_list);
   2898			} else if (mutex_trylock(&jfs_ip->commit_mutex)) {
   2899				/*
   2900				 * inode will be removed from anonymous list
   2901				 * when it is committed
   2902				 */
   2903				TXN_UNLOCK();
   2904				tid = txBegin(ip->i_sb, COMMIT_INODE);
   2905				txCommit(tid, 1, &ip, 0);
   2906				txEnd(tid);
   2907				mutex_unlock(&jfs_ip->commit_mutex);
   2908
   2909				iput(ip);
   2910				/*
   2911				 * Just to be safe.  I don't know how
   2912				 * long we can run without blocking
   2913				 */
   2914				cond_resched();
   2915				TXN_LOCK();
   2916			} else {
   2917				/* We can't get the commit mutex.  It may
   2918				 * be held by a thread waiting for tlock's
   2919				 * so let's not block here.  Save it to
   2920				 * put back on the anon_list.
   2921				 */
   2922
   2923				/* Move from anon_list to anon_list2 */
   2924				list_move(&jfs_ip->anon_inode_list,
   2925					  &TxAnchor.anon_list2);
   2926
   2927				TXN_UNLOCK();
   2928				iput(ip);
   2929				TXN_LOCK();
   2930			}
   2931		}
   2932		/* Add anon_list2 back to anon_list */
   2933		list_splice_init(&TxAnchor.anon_list2, &TxAnchor.anon_list);
   2934
   2935		if (freezing(current)) {
   2936			TXN_UNLOCK();
   2937			try_to_freeze();
   2938		} else {
   2939			set_current_state(TASK_INTERRUPTIBLE);
   2940			TXN_UNLOCK();
   2941			schedule();
   2942		}
   2943	} while (!kthread_should_stop());
   2944
   2945	jfs_info("jfs_sync being killed");
   2946	return 0;
   2947}
   2948
   2949#if defined(CONFIG_PROC_FS) && defined(CONFIG_JFS_DEBUG)
   2950int jfs_txanchor_proc_show(struct seq_file *m, void *v)
   2951{
   2952	char *freewait;
   2953	char *freelockwait;
   2954	char *lowlockwait;
   2955
   2956	freewait =
   2957	    waitqueue_active(&TxAnchor.freewait) ? "active" : "empty";
   2958	freelockwait =
   2959	    waitqueue_active(&TxAnchor.freelockwait) ? "active" : "empty";
   2960	lowlockwait =
   2961	    waitqueue_active(&TxAnchor.lowlockwait) ? "active" : "empty";
   2962
   2963	seq_printf(m,
   2964		       "JFS TxAnchor\n"
   2965		       "============\n"
   2966		       "freetid = %d\n"
   2967		       "freewait = %s\n"
   2968		       "freelock = %d\n"
   2969		       "freelockwait = %s\n"
   2970		       "lowlockwait = %s\n"
   2971		       "tlocksInUse = %d\n"
   2972		       "jfs_tlocks_low = %d\n"
   2973		       "unlock_queue is %sempty\n",
   2974		       TxAnchor.freetid,
   2975		       freewait,
   2976		       TxAnchor.freelock,
   2977		       freelockwait,
   2978		       lowlockwait,
   2979		       TxAnchor.tlocksInUse,
   2980		       jfs_tlocks_low,
   2981		       list_empty(&TxAnchor.unlock_queue) ? "" : "not ");
   2982	return 0;
   2983}
   2984#endif
   2985
   2986#if defined(CONFIG_PROC_FS) && defined(CONFIG_JFS_STATISTICS)
   2987int jfs_txstats_proc_show(struct seq_file *m, void *v)
   2988{
   2989	seq_printf(m,
   2990		       "JFS TxStats\n"
   2991		       "===========\n"
   2992		       "calls to txBegin = %d\n"
   2993		       "txBegin blocked by sync barrier = %d\n"
   2994		       "txBegin blocked by tlocks low = %d\n"
   2995		       "txBegin blocked by no free tid = %d\n"
   2996		       "calls to txBeginAnon = %d\n"
   2997		       "txBeginAnon blocked by sync barrier = %d\n"
   2998		       "txBeginAnon blocked by tlocks low = %d\n"
   2999		       "calls to txLockAlloc = %d\n"
   3000		       "tLockAlloc blocked by no free lock = %d\n",
   3001		       TxStat.txBegin,
   3002		       TxStat.txBegin_barrier,
   3003		       TxStat.txBegin_lockslow,
   3004		       TxStat.txBegin_freetid,
   3005		       TxStat.txBeginAnon,
   3006		       TxStat.txBeginAnon_barrier,
   3007		       TxStat.txBeginAnon_lockslow,
   3008		       TxStat.txLockAlloc,
   3009		       TxStat.txLockAlloc_freelock);
   3010	return 0;
   3011}
   3012#endif