cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

jfs_logmgr.c (59340B)


      1// SPDX-License-Identifier: GPL-2.0-or-later
      2/*
      3 *   Copyright (C) International Business Machines Corp., 2000-2004
      4 *   Portions Copyright (C) Christoph Hellwig, 2001-2002
      5 */
      6
      7/*
      8 *	jfs_logmgr.c: log manager
      9 *
     10 * for related information, see transaction manager (jfs_txnmgr.c), and
     11 * recovery manager (jfs_logredo.c).
     12 *
     13 * note: for detail, RTFS.
     14 *
     15 *	log buffer manager:
     16 * special purpose buffer manager supporting log i/o requirements.
     17 * per log serial pageout of logpage
     18 * queuing i/o requests and redrive i/o at iodone
     19 * maintain current logpage buffer
     20 * no caching since append only
     21 * appropriate jfs buffer cache buffers as needed
     22 *
     23 *	group commit:
     24 * transactions which wrote COMMIT records in the same in-memory
     25 * log page during the pageout of previous/current log page(s) are
     26 * committed together by the pageout of the page.
     27 *
     28 *	TBD lazy commit:
     29 * transactions are committed asynchronously when the log page
     30 * containing it COMMIT is paged out when it becomes full;
     31 *
     32 *	serialization:
     33 * . a per log lock serialize log write.
     34 * . a per log lock serialize group commit.
     35 * . a per log lock serialize log open/close;
     36 *
     37 *	TBD log integrity:
     38 * careful-write (ping-pong) of last logpage to recover from crash
     39 * in overwrite.
     40 * detection of split (out-of-order) write of physical sectors
     41 * of last logpage via timestamp at end of each sector
     42 * with its mirror data array at trailer).
     43 *
     44 *	alternatives:
     45 * lsn - 64-bit monotonically increasing integer vs
     46 * 32-bit lspn and page eor.
     47 */
     48
     49#include <linux/fs.h>
     50#include <linux/blkdev.h>
     51#include <linux/interrupt.h>
     52#include <linux/completion.h>
     53#include <linux/kthread.h>
     54#include <linux/buffer_head.h>		/* for sync_blockdev() */
     55#include <linux/bio.h>
     56#include <linux/freezer.h>
     57#include <linux/export.h>
     58#include <linux/delay.h>
     59#include <linux/mutex.h>
     60#include <linux/seq_file.h>
     61#include <linux/slab.h>
     62#include "jfs_incore.h"
     63#include "jfs_filsys.h"
     64#include "jfs_metapage.h"
     65#include "jfs_superblock.h"
     66#include "jfs_txnmgr.h"
     67#include "jfs_debug.h"
     68
     69
     70/*
     71 * lbuf's ready to be redriven.  Protected by log_redrive_lock (jfsIO thread)
     72 */
     73static struct lbuf *log_redrive_list;
     74static DEFINE_SPINLOCK(log_redrive_lock);
     75
     76
     77/*
     78 *	log read/write serialization (per log)
     79 */
     80#define LOG_LOCK_INIT(log)	mutex_init(&(log)->loglock)
     81#define LOG_LOCK(log)		mutex_lock(&((log)->loglock))
     82#define LOG_UNLOCK(log)		mutex_unlock(&((log)->loglock))
     83
     84
     85/*
     86 *	log group commit serialization (per log)
     87 */
     88
     89#define LOGGC_LOCK_INIT(log)	spin_lock_init(&(log)->gclock)
     90#define LOGGC_LOCK(log)		spin_lock_irq(&(log)->gclock)
     91#define LOGGC_UNLOCK(log)	spin_unlock_irq(&(log)->gclock)
     92#define LOGGC_WAKEUP(tblk)	wake_up_all(&(tblk)->gcwait)
     93
     94/*
     95 *	log sync serialization (per log)
     96 */
     97#define	LOGSYNC_DELTA(logsize)		min((logsize)/8, 128*LOGPSIZE)
     98#define	LOGSYNC_BARRIER(logsize)	((logsize)/4)
     99/*
    100#define	LOGSYNC_DELTA(logsize)		min((logsize)/4, 256*LOGPSIZE)
    101#define	LOGSYNC_BARRIER(logsize)	((logsize)/2)
    102*/
    103
    104
    105/*
    106 *	log buffer cache synchronization
    107 */
    108static DEFINE_SPINLOCK(jfsLCacheLock);
    109
    110#define	LCACHE_LOCK(flags)	spin_lock_irqsave(&jfsLCacheLock, flags)
    111#define	LCACHE_UNLOCK(flags)	spin_unlock_irqrestore(&jfsLCacheLock, flags)
    112
    113/*
    114 * See __SLEEP_COND in jfs_locks.h
    115 */
    116#define LCACHE_SLEEP_COND(wq, cond, flags)	\
    117do {						\
    118	if (cond)				\
    119		break;				\
    120	__SLEEP_COND(wq, cond, LCACHE_LOCK(flags), LCACHE_UNLOCK(flags)); \
    121} while (0)
    122
    123#define	LCACHE_WAKEUP(event)	wake_up(event)
    124
    125
    126/*
    127 *	lbuf buffer cache (lCache) control
    128 */
    129/* log buffer manager pageout control (cumulative, inclusive) */
    130#define	lbmREAD		0x0001
    131#define	lbmWRITE	0x0002	/* enqueue at tail of write queue;
    132				 * init pageout if at head of queue;
    133				 */
    134#define	lbmRELEASE	0x0004	/* remove from write queue
    135				 * at completion of pageout;
    136				 * do not free/recycle it yet:
    137				 * caller will free it;
    138				 */
    139#define	lbmSYNC		0x0008	/* do not return to freelist
    140				 * when removed from write queue;
    141				 */
    142#define lbmFREE		0x0010	/* return to freelist
    143				 * at completion of pageout;
    144				 * the buffer may be recycled;
    145				 */
    146#define	lbmDONE		0x0020
    147#define	lbmERROR	0x0040
    148#define lbmGC		0x0080	/* lbmIODone to perform post-GC processing
    149				 * of log page
    150				 */
    151#define lbmDIRECT	0x0100
    152
    153/*
    154 * Global list of active external journals
    155 */
    156static LIST_HEAD(jfs_external_logs);
    157static struct jfs_log *dummy_log;
    158static DEFINE_MUTEX(jfs_log_mutex);
    159
    160/*
    161 * forward references
    162 */
    163static int lmWriteRecord(struct jfs_log * log, struct tblock * tblk,
    164			 struct lrd * lrd, struct tlock * tlck);
    165
    166static int lmNextPage(struct jfs_log * log);
    167static int lmLogFileSystem(struct jfs_log * log, struct jfs_sb_info *sbi,
    168			   int activate);
    169
    170static int open_inline_log(struct super_block *sb);
    171static int open_dummy_log(struct super_block *sb);
    172static int lbmLogInit(struct jfs_log * log);
    173static void lbmLogShutdown(struct jfs_log * log);
    174static struct lbuf *lbmAllocate(struct jfs_log * log, int);
    175static void lbmFree(struct lbuf * bp);
    176static void lbmfree(struct lbuf * bp);
    177static int lbmRead(struct jfs_log * log, int pn, struct lbuf ** bpp);
    178static void lbmWrite(struct jfs_log * log, struct lbuf * bp, int flag, int cant_block);
    179static void lbmDirectWrite(struct jfs_log * log, struct lbuf * bp, int flag);
    180static int lbmIOWait(struct lbuf * bp, int flag);
    181static bio_end_io_t lbmIODone;
    182static void lbmStartIO(struct lbuf * bp);
    183static void lmGCwrite(struct jfs_log * log, int cant_block);
    184static int lmLogSync(struct jfs_log * log, int hard_sync);
    185
    186
    187
    188/*
    189 *	statistics
    190 */
    191#ifdef CONFIG_JFS_STATISTICS
    192static struct lmStat {
    193	uint commit;		/* # of commit */
    194	uint pagedone;		/* # of page written */
    195	uint submitted;		/* # of pages submitted */
    196	uint full_page;		/* # of full pages submitted */
    197	uint partial_page;	/* # of partial pages submitted */
    198} lmStat;
    199#endif
    200
    201static void write_special_inodes(struct jfs_log *log,
    202				 int (*writer)(struct address_space *))
    203{
    204	struct jfs_sb_info *sbi;
    205
    206	list_for_each_entry(sbi, &log->sb_list, log_list) {
    207		writer(sbi->ipbmap->i_mapping);
    208		writer(sbi->ipimap->i_mapping);
    209		writer(sbi->direct_inode->i_mapping);
    210	}
    211}
    212
    213/*
    214 * NAME:	lmLog()
    215 *
    216 * FUNCTION:	write a log record;
    217 *
    218 * PARAMETER:
    219 *
    220 * RETURN:	lsn - offset to the next log record to write (end-of-log);
    221 *		-1  - error;
    222 *
    223 * note: todo: log error handler
    224 */
    225int lmLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
    226	  struct tlock * tlck)
    227{
    228	int lsn;
    229	int diffp, difft;
    230	struct metapage *mp = NULL;
    231	unsigned long flags;
    232
    233	jfs_info("lmLog: log:0x%p tblk:0x%p, lrd:0x%p tlck:0x%p",
    234		 log, tblk, lrd, tlck);
    235
    236	LOG_LOCK(log);
    237
    238	/* log by (out-of-transaction) JFS ? */
    239	if (tblk == NULL)
    240		goto writeRecord;
    241
    242	/* log from page ? */
    243	if (tlck == NULL ||
    244	    tlck->type & tlckBTROOT || (mp = tlck->mp) == NULL)
    245		goto writeRecord;
    246
    247	/*
    248	 *	initialize/update page/transaction recovery lsn
    249	 */
    250	lsn = log->lsn;
    251
    252	LOGSYNC_LOCK(log, flags);
    253
    254	/*
    255	 * initialize page lsn if first log write of the page
    256	 */
    257	if (mp->lsn == 0) {
    258		mp->log = log;
    259		mp->lsn = lsn;
    260		log->count++;
    261
    262		/* insert page at tail of logsynclist */
    263		list_add_tail(&mp->synclist, &log->synclist);
    264	}
    265
    266	/*
    267	 *	initialize/update lsn of tblock of the page
    268	 *
    269	 * transaction inherits oldest lsn of pages associated
    270	 * with allocation/deallocation of resources (their
    271	 * log records are used to reconstruct allocation map
    272	 * at recovery time: inode for inode allocation map,
    273	 * B+-tree index of extent descriptors for block
    274	 * allocation map);
    275	 * allocation map pages inherit transaction lsn at
    276	 * commit time to allow forwarding log syncpt past log
    277	 * records associated with allocation/deallocation of
    278	 * resources only after persistent map of these map pages
    279	 * have been updated and propagated to home.
    280	 */
    281	/*
    282	 * initialize transaction lsn:
    283	 */
    284	if (tblk->lsn == 0) {
    285		/* inherit lsn of its first page logged */
    286		tblk->lsn = mp->lsn;
    287		log->count++;
    288
    289		/* insert tblock after the page on logsynclist */
    290		list_add(&tblk->synclist, &mp->synclist);
    291	}
    292	/*
    293	 * update transaction lsn:
    294	 */
    295	else {
    296		/* inherit oldest/smallest lsn of page */
    297		logdiff(diffp, mp->lsn, log);
    298		logdiff(difft, tblk->lsn, log);
    299		if (diffp < difft) {
    300			/* update tblock lsn with page lsn */
    301			tblk->lsn = mp->lsn;
    302
    303			/* move tblock after page on logsynclist */
    304			list_move(&tblk->synclist, &mp->synclist);
    305		}
    306	}
    307
    308	LOGSYNC_UNLOCK(log, flags);
    309
    310	/*
    311	 *	write the log record
    312	 */
    313      writeRecord:
    314	lsn = lmWriteRecord(log, tblk, lrd, tlck);
    315
    316	/*
    317	 * forward log syncpt if log reached next syncpt trigger
    318	 */
    319	logdiff(diffp, lsn, log);
    320	if (diffp >= log->nextsync)
    321		lsn = lmLogSync(log, 0);
    322
    323	/* update end-of-log lsn */
    324	log->lsn = lsn;
    325
    326	LOG_UNLOCK(log);
    327
    328	/* return end-of-log address */
    329	return lsn;
    330}
    331
    332/*
    333 * NAME:	lmWriteRecord()
    334 *
    335 * FUNCTION:	move the log record to current log page
    336 *
    337 * PARAMETER:	cd	- commit descriptor
    338 *
    339 * RETURN:	end-of-log address
    340 *
    341 * serialization: LOG_LOCK() held on entry/exit
    342 */
    343static int
    344lmWriteRecord(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
    345	      struct tlock * tlck)
    346{
    347	int lsn = 0;		/* end-of-log address */
    348	struct lbuf *bp;	/* dst log page buffer */
    349	struct logpage *lp;	/* dst log page */
    350	caddr_t dst;		/* destination address in log page */
    351	int dstoffset;		/* end-of-log offset in log page */
    352	int freespace;		/* free space in log page */
    353	caddr_t p;		/* src meta-data page */
    354	caddr_t src;
    355	int srclen;
    356	int nbytes;		/* number of bytes to move */
    357	int i;
    358	int len;
    359	struct linelock *linelock;
    360	struct lv *lv;
    361	struct lvd *lvd;
    362	int l2linesize;
    363
    364	len = 0;
    365
    366	/* retrieve destination log page to write */
    367	bp = (struct lbuf *) log->bp;
    368	lp = (struct logpage *) bp->l_ldata;
    369	dstoffset = log->eor;
    370
    371	/* any log data to write ? */
    372	if (tlck == NULL)
    373		goto moveLrd;
    374
    375	/*
    376	 *	move log record data
    377	 */
    378	/* retrieve source meta-data page to log */
    379	if (tlck->flag & tlckPAGELOCK) {
    380		p = (caddr_t) (tlck->mp->data);
    381		linelock = (struct linelock *) & tlck->lock;
    382	}
    383	/* retrieve source in-memory inode to log */
    384	else if (tlck->flag & tlckINODELOCK) {
    385		if (tlck->type & tlckDTREE)
    386			p = (caddr_t) &JFS_IP(tlck->ip)->i_dtroot;
    387		else
    388			p = (caddr_t) &JFS_IP(tlck->ip)->i_xtroot;
    389		linelock = (struct linelock *) & tlck->lock;
    390	}
    391	else {
    392		jfs_err("lmWriteRecord: UFO tlck:0x%p", tlck);
    393		return 0;	/* Probably should trap */
    394	}
    395	l2linesize = linelock->l2linesize;
    396
    397      moveData:
    398	ASSERT(linelock->index <= linelock->maxcnt);
    399
    400	lv = linelock->lv;
    401	for (i = 0; i < linelock->index; i++, lv++) {
    402		if (lv->length == 0)
    403			continue;
    404
    405		/* is page full ? */
    406		if (dstoffset >= LOGPSIZE - LOGPTLRSIZE) {
    407			/* page become full: move on to next page */
    408			lmNextPage(log);
    409
    410			bp = log->bp;
    411			lp = (struct logpage *) bp->l_ldata;
    412			dstoffset = LOGPHDRSIZE;
    413		}
    414
    415		/*
    416		 * move log vector data
    417		 */
    418		src = (u8 *) p + (lv->offset << l2linesize);
    419		srclen = lv->length << l2linesize;
    420		len += srclen;
    421		while (srclen > 0) {
    422			freespace = (LOGPSIZE - LOGPTLRSIZE) - dstoffset;
    423			nbytes = min(freespace, srclen);
    424			dst = (caddr_t) lp + dstoffset;
    425			memcpy(dst, src, nbytes);
    426			dstoffset += nbytes;
    427
    428			/* is page not full ? */
    429			if (dstoffset < LOGPSIZE - LOGPTLRSIZE)
    430				break;
    431
    432			/* page become full: move on to next page */
    433			lmNextPage(log);
    434
    435			bp = (struct lbuf *) log->bp;
    436			lp = (struct logpage *) bp->l_ldata;
    437			dstoffset = LOGPHDRSIZE;
    438
    439			srclen -= nbytes;
    440			src += nbytes;
    441		}
    442
    443		/*
    444		 * move log vector descriptor
    445		 */
    446		len += 4;
    447		lvd = (struct lvd *) ((caddr_t) lp + dstoffset);
    448		lvd->offset = cpu_to_le16(lv->offset);
    449		lvd->length = cpu_to_le16(lv->length);
    450		dstoffset += 4;
    451		jfs_info("lmWriteRecord: lv offset:%d length:%d",
    452			 lv->offset, lv->length);
    453	}
    454
    455	if ((i = linelock->next)) {
    456		linelock = (struct linelock *) lid_to_tlock(i);
    457		goto moveData;
    458	}
    459
    460	/*
    461	 *	move log record descriptor
    462	 */
    463      moveLrd:
    464	lrd->length = cpu_to_le16(len);
    465
    466	src = (caddr_t) lrd;
    467	srclen = LOGRDSIZE;
    468
    469	while (srclen > 0) {
    470		freespace = (LOGPSIZE - LOGPTLRSIZE) - dstoffset;
    471		nbytes = min(freespace, srclen);
    472		dst = (caddr_t) lp + dstoffset;
    473		memcpy(dst, src, nbytes);
    474
    475		dstoffset += nbytes;
    476		srclen -= nbytes;
    477
    478		/* are there more to move than freespace of page ? */
    479		if (srclen)
    480			goto pageFull;
    481
    482		/*
    483		 * end of log record descriptor
    484		 */
    485
    486		/* update last log record eor */
    487		log->eor = dstoffset;
    488		bp->l_eor = dstoffset;
    489		lsn = (log->page << L2LOGPSIZE) + dstoffset;
    490
    491		if (lrd->type & cpu_to_le16(LOG_COMMIT)) {
    492			tblk->clsn = lsn;
    493			jfs_info("wr: tclsn:0x%x, beor:0x%x", tblk->clsn,
    494				 bp->l_eor);
    495
    496			INCREMENT(lmStat.commit);	/* # of commit */
    497
    498			/*
    499			 * enqueue tblock for group commit:
    500			 *
    501			 * enqueue tblock of non-trivial/synchronous COMMIT
    502			 * at tail of group commit queue
    503			 * (trivial/asynchronous COMMITs are ignored by
    504			 * group commit.)
    505			 */
    506			LOGGC_LOCK(log);
    507
    508			/* init tblock gc state */
    509			tblk->flag = tblkGC_QUEUE;
    510			tblk->bp = log->bp;
    511			tblk->pn = log->page;
    512			tblk->eor = log->eor;
    513
    514			/* enqueue transaction to commit queue */
    515			list_add_tail(&tblk->cqueue, &log->cqueue);
    516
    517			LOGGC_UNLOCK(log);
    518		}
    519
    520		jfs_info("lmWriteRecord: lrd:0x%04x bp:0x%p pn:%d eor:0x%x",
    521			le16_to_cpu(lrd->type), log->bp, log->page, dstoffset);
    522
    523		/* page not full ? */
    524		if (dstoffset < LOGPSIZE - LOGPTLRSIZE)
    525			return lsn;
    526
    527	      pageFull:
    528		/* page become full: move on to next page */
    529		lmNextPage(log);
    530
    531		bp = (struct lbuf *) log->bp;
    532		lp = (struct logpage *) bp->l_ldata;
    533		dstoffset = LOGPHDRSIZE;
    534		src += nbytes;
    535	}
    536
    537	return lsn;
    538}
    539
    540
    541/*
    542 * NAME:	lmNextPage()
    543 *
    544 * FUNCTION:	write current page and allocate next page.
    545 *
    546 * PARAMETER:	log
    547 *
    548 * RETURN:	0
    549 *
    550 * serialization: LOG_LOCK() held on entry/exit
    551 */
    552static int lmNextPage(struct jfs_log * log)
    553{
    554	struct logpage *lp;
    555	int lspn;		/* log sequence page number */
    556	int pn;			/* current page number */
    557	struct lbuf *bp;
    558	struct lbuf *nextbp;
    559	struct tblock *tblk;
    560
    561	/* get current log page number and log sequence page number */
    562	pn = log->page;
    563	bp = log->bp;
    564	lp = (struct logpage *) bp->l_ldata;
    565	lspn = le32_to_cpu(lp->h.page);
    566
    567	LOGGC_LOCK(log);
    568
    569	/*
    570	 *	write or queue the full page at the tail of write queue
    571	 */
    572	/* get the tail tblk on commit queue */
    573	if (list_empty(&log->cqueue))
    574		tblk = NULL;
    575	else
    576		tblk = list_entry(log->cqueue.prev, struct tblock, cqueue);
    577
    578	/* every tblk who has COMMIT record on the current page,
    579	 * and has not been committed, must be on commit queue
    580	 * since tblk is queued at commit queueu at the time
    581	 * of writing its COMMIT record on the page before
    582	 * page becomes full (even though the tblk thread
    583	 * who wrote COMMIT record may have been suspended
    584	 * currently);
    585	 */
    586
    587	/* is page bound with outstanding tail tblk ? */
    588	if (tblk && tblk->pn == pn) {
    589		/* mark tblk for end-of-page */
    590		tblk->flag |= tblkGC_EOP;
    591
    592		if (log->cflag & logGC_PAGEOUT) {
    593			/* if page is not already on write queue,
    594			 * just enqueue (no lbmWRITE to prevent redrive)
    595			 * buffer to wqueue to ensure correct serial order
    596			 * of the pages since log pages will be added
    597			 * continuously
    598			 */
    599			if (bp->l_wqnext == NULL)
    600				lbmWrite(log, bp, 0, 0);
    601		} else {
    602			/*
    603			 * No current GC leader, initiate group commit
    604			 */
    605			log->cflag |= logGC_PAGEOUT;
    606			lmGCwrite(log, 0);
    607		}
    608	}
    609	/* page is not bound with outstanding tblk:
    610	 * init write or mark it to be redriven (lbmWRITE)
    611	 */
    612	else {
    613		/* finalize the page */
    614		bp->l_ceor = bp->l_eor;
    615		lp->h.eor = lp->t.eor = cpu_to_le16(bp->l_ceor);
    616		lbmWrite(log, bp, lbmWRITE | lbmRELEASE | lbmFREE, 0);
    617	}
    618	LOGGC_UNLOCK(log);
    619
    620	/*
    621	 *	allocate/initialize next page
    622	 */
    623	/* if log wraps, the first data page of log is 2
    624	 * (0 never used, 1 is superblock).
    625	 */
    626	log->page = (pn == log->size - 1) ? 2 : pn + 1;
    627	log->eor = LOGPHDRSIZE;	/* ? valid page empty/full at logRedo() */
    628
    629	/* allocate/initialize next log page buffer */
    630	nextbp = lbmAllocate(log, log->page);
    631	nextbp->l_eor = log->eor;
    632	log->bp = nextbp;
    633
    634	/* initialize next log page */
    635	lp = (struct logpage *) nextbp->l_ldata;
    636	lp->h.page = lp->t.page = cpu_to_le32(lspn + 1);
    637	lp->h.eor = lp->t.eor = cpu_to_le16(LOGPHDRSIZE);
    638
    639	return 0;
    640}
    641
    642
    643/*
    644 * NAME:	lmGroupCommit()
    645 *
    646 * FUNCTION:	group commit
    647 *	initiate pageout of the pages with COMMIT in the order of
    648 *	page number - redrive pageout of the page at the head of
    649 *	pageout queue until full page has been written.
    650 *
    651 * RETURN:
    652 *
    653 * NOTE:
    654 *	LOGGC_LOCK serializes log group commit queue, and
    655 *	transaction blocks on the commit queue.
    656 *	N.B. LOG_LOCK is NOT held during lmGroupCommit().
    657 */
    658int lmGroupCommit(struct jfs_log * log, struct tblock * tblk)
    659{
    660	int rc = 0;
    661
    662	LOGGC_LOCK(log);
    663
    664	/* group committed already ? */
    665	if (tblk->flag & tblkGC_COMMITTED) {
    666		if (tblk->flag & tblkGC_ERROR)
    667			rc = -EIO;
    668
    669		LOGGC_UNLOCK(log);
    670		return rc;
    671	}
    672	jfs_info("lmGroup Commit: tblk = 0x%p, gcrtc = %d", tblk, log->gcrtc);
    673
    674	if (tblk->xflag & COMMIT_LAZY)
    675		tblk->flag |= tblkGC_LAZY;
    676
    677	if ((!(log->cflag & logGC_PAGEOUT)) && (!list_empty(&log->cqueue)) &&
    678	    (!(tblk->xflag & COMMIT_LAZY) || test_bit(log_FLUSH, &log->flag)
    679	     || jfs_tlocks_low)) {
    680		/*
    681		 * No pageout in progress
    682		 *
    683		 * start group commit as its group leader.
    684		 */
    685		log->cflag |= logGC_PAGEOUT;
    686
    687		lmGCwrite(log, 0);
    688	}
    689
    690	if (tblk->xflag & COMMIT_LAZY) {
    691		/*
    692		 * Lazy transactions can leave now
    693		 */
    694		LOGGC_UNLOCK(log);
    695		return 0;
    696	}
    697
    698	/* lmGCwrite gives up LOGGC_LOCK, check again */
    699
    700	if (tblk->flag & tblkGC_COMMITTED) {
    701		if (tblk->flag & tblkGC_ERROR)
    702			rc = -EIO;
    703
    704		LOGGC_UNLOCK(log);
    705		return rc;
    706	}
    707
    708	/* upcount transaction waiting for completion
    709	 */
    710	log->gcrtc++;
    711	tblk->flag |= tblkGC_READY;
    712
    713	__SLEEP_COND(tblk->gcwait, (tblk->flag & tblkGC_COMMITTED),
    714		     LOGGC_LOCK(log), LOGGC_UNLOCK(log));
    715
    716	/* removed from commit queue */
    717	if (tblk->flag & tblkGC_ERROR)
    718		rc = -EIO;
    719
    720	LOGGC_UNLOCK(log);
    721	return rc;
    722}
    723
    724/*
    725 * NAME:	lmGCwrite()
    726 *
    727 * FUNCTION:	group commit write
    728 *	initiate write of log page, building a group of all transactions
    729 *	with commit records on that page.
    730 *
    731 * RETURN:	None
    732 *
    733 * NOTE:
    734 *	LOGGC_LOCK must be held by caller.
    735 *	N.B. LOG_LOCK is NOT held during lmGroupCommit().
    736 */
    737static void lmGCwrite(struct jfs_log * log, int cant_write)
    738{
    739	struct lbuf *bp;
    740	struct logpage *lp;
    741	int gcpn;		/* group commit page number */
    742	struct tblock *tblk;
    743	struct tblock *xtblk = NULL;
    744
    745	/*
    746	 * build the commit group of a log page
    747	 *
    748	 * scan commit queue and make a commit group of all
    749	 * transactions with COMMIT records on the same log page.
    750	 */
    751	/* get the head tblk on the commit queue */
    752	gcpn = list_entry(log->cqueue.next, struct tblock, cqueue)->pn;
    753
    754	list_for_each_entry(tblk, &log->cqueue, cqueue) {
    755		if (tblk->pn != gcpn)
    756			break;
    757
    758		xtblk = tblk;
    759
    760		/* state transition: (QUEUE, READY) -> COMMIT */
    761		tblk->flag |= tblkGC_COMMIT;
    762	}
    763	tblk = xtblk;		/* last tblk of the page */
    764
    765	/*
    766	 * pageout to commit transactions on the log page.
    767	 */
    768	bp = (struct lbuf *) tblk->bp;
    769	lp = (struct logpage *) bp->l_ldata;
    770	/* is page already full ? */
    771	if (tblk->flag & tblkGC_EOP) {
    772		/* mark page to free at end of group commit of the page */
    773		tblk->flag &= ~tblkGC_EOP;
    774		tblk->flag |= tblkGC_FREE;
    775		bp->l_ceor = bp->l_eor;
    776		lp->h.eor = lp->t.eor = cpu_to_le16(bp->l_ceor);
    777		lbmWrite(log, bp, lbmWRITE | lbmRELEASE | lbmGC,
    778			 cant_write);
    779		INCREMENT(lmStat.full_page);
    780	}
    781	/* page is not yet full */
    782	else {
    783		bp->l_ceor = tblk->eor;	/* ? bp->l_ceor = bp->l_eor; */
    784		lp->h.eor = lp->t.eor = cpu_to_le16(bp->l_ceor);
    785		lbmWrite(log, bp, lbmWRITE | lbmGC, cant_write);
    786		INCREMENT(lmStat.partial_page);
    787	}
    788}
    789
    790/*
    791 * NAME:	lmPostGC()
    792 *
    793 * FUNCTION:	group commit post-processing
    794 *	Processes transactions after their commit records have been written
    795 *	to disk, redriving log I/O if necessary.
    796 *
    797 * RETURN:	None
    798 *
    799 * NOTE:
    800 *	This routine is called a interrupt time by lbmIODone
    801 */
    802static void lmPostGC(struct lbuf * bp)
    803{
    804	unsigned long flags;
    805	struct jfs_log *log = bp->l_log;
    806	struct logpage *lp;
    807	struct tblock *tblk, *temp;
    808
    809	//LOGGC_LOCK(log);
    810	spin_lock_irqsave(&log->gclock, flags);
    811	/*
    812	 * current pageout of group commit completed.
    813	 *
    814	 * remove/wakeup transactions from commit queue who were
    815	 * group committed with the current log page
    816	 */
    817	list_for_each_entry_safe(tblk, temp, &log->cqueue, cqueue) {
    818		if (!(tblk->flag & tblkGC_COMMIT))
    819			break;
    820		/* if transaction was marked GC_COMMIT then
    821		 * it has been shipped in the current pageout
    822		 * and made it to disk - it is committed.
    823		 */
    824
    825		if (bp->l_flag & lbmERROR)
    826			tblk->flag |= tblkGC_ERROR;
    827
    828		/* remove it from the commit queue */
    829		list_del(&tblk->cqueue);
    830		tblk->flag &= ~tblkGC_QUEUE;
    831
    832		if (tblk == log->flush_tblk) {
    833			/* we can stop flushing the log now */
    834			clear_bit(log_FLUSH, &log->flag);
    835			log->flush_tblk = NULL;
    836		}
    837
    838		jfs_info("lmPostGC: tblk = 0x%p, flag = 0x%x", tblk,
    839			 tblk->flag);
    840
    841		if (!(tblk->xflag & COMMIT_FORCE))
    842			/*
    843			 * Hand tblk over to lazy commit thread
    844			 */
    845			txLazyUnlock(tblk);
    846		else {
    847			/* state transition: COMMIT -> COMMITTED */
    848			tblk->flag |= tblkGC_COMMITTED;
    849
    850			if (tblk->flag & tblkGC_READY)
    851				log->gcrtc--;
    852
    853			LOGGC_WAKEUP(tblk);
    854		}
    855
    856		/* was page full before pageout ?
    857		 * (and this is the last tblk bound with the page)
    858		 */
    859		if (tblk->flag & tblkGC_FREE)
    860			lbmFree(bp);
    861		/* did page become full after pageout ?
    862		 * (and this is the last tblk bound with the page)
    863		 */
    864		else if (tblk->flag & tblkGC_EOP) {
    865			/* finalize the page */
    866			lp = (struct logpage *) bp->l_ldata;
    867			bp->l_ceor = bp->l_eor;
    868			lp->h.eor = lp->t.eor = cpu_to_le16(bp->l_eor);
    869			jfs_info("lmPostGC: calling lbmWrite");
    870			lbmWrite(log, bp, lbmWRITE | lbmRELEASE | lbmFREE,
    871				 1);
    872		}
    873
    874	}
    875
    876	/* are there any transactions who have entered lnGroupCommit()
    877	 * (whose COMMITs are after that of the last log page written.
    878	 * They are waiting for new group commit (above at (SLEEP 1))
    879	 * or lazy transactions are on a full (queued) log page,
    880	 * select the latest ready transaction as new group leader and
    881	 * wake her up to lead her group.
    882	 */
    883	if ((!list_empty(&log->cqueue)) &&
    884	    ((log->gcrtc > 0) || (tblk->bp->l_wqnext != NULL) ||
    885	     test_bit(log_FLUSH, &log->flag) || jfs_tlocks_low))
    886		/*
    887		 * Call lmGCwrite with new group leader
    888		 */
    889		lmGCwrite(log, 1);
    890
    891	/* no transaction are ready yet (transactions are only just
    892	 * queued (GC_QUEUE) and not entered for group commit yet).
    893	 * the first transaction entering group commit
    894	 * will elect herself as new group leader.
    895	 */
    896	else
    897		log->cflag &= ~logGC_PAGEOUT;
    898
    899	//LOGGC_UNLOCK(log);
    900	spin_unlock_irqrestore(&log->gclock, flags);
    901	return;
    902}
    903
    904/*
    905 * NAME:	lmLogSync()
    906 *
    907 * FUNCTION:	write log SYNCPT record for specified log
    908 *	if new sync address is available
    909 *	(normally the case if sync() is executed by back-ground
    910 *	process).
    911 *	calculate new value of i_nextsync which determines when
    912 *	this code is called again.
    913 *
    914 * PARAMETERS:	log	- log structure
    915 *		hard_sync - 1 to force all metadata to be written
    916 *
    917 * RETURN:	0
    918 *
    919 * serialization: LOG_LOCK() held on entry/exit
    920 */
    921static int lmLogSync(struct jfs_log * log, int hard_sync)
    922{
    923	int logsize;
    924	int written;		/* written since last syncpt */
    925	int free;		/* free space left available */
    926	int delta;		/* additional delta to write normally */
    927	int more;		/* additional write granted */
    928	struct lrd lrd;
    929	int lsn;
    930	struct logsyncblk *lp;
    931	unsigned long flags;
    932
    933	/* push dirty metapages out to disk */
    934	if (hard_sync)
    935		write_special_inodes(log, filemap_fdatawrite);
    936	else
    937		write_special_inodes(log, filemap_flush);
    938
    939	/*
    940	 *	forward syncpt
    941	 */
    942	/* if last sync is same as last syncpt,
    943	 * invoke sync point forward processing to update sync.
    944	 */
    945
    946	if (log->sync == log->syncpt) {
    947		LOGSYNC_LOCK(log, flags);
    948		if (list_empty(&log->synclist))
    949			log->sync = log->lsn;
    950		else {
    951			lp = list_entry(log->synclist.next,
    952					struct logsyncblk, synclist);
    953			log->sync = lp->lsn;
    954		}
    955		LOGSYNC_UNLOCK(log, flags);
    956
    957	}
    958
    959	/* if sync is different from last syncpt,
    960	 * write a SYNCPT record with syncpt = sync.
    961	 * reset syncpt = sync
    962	 */
    963	if (log->sync != log->syncpt) {
    964		lrd.logtid = 0;
    965		lrd.backchain = 0;
    966		lrd.type = cpu_to_le16(LOG_SYNCPT);
    967		lrd.length = 0;
    968		lrd.log.syncpt.sync = cpu_to_le32(log->sync);
    969		lsn = lmWriteRecord(log, NULL, &lrd, NULL);
    970
    971		log->syncpt = log->sync;
    972	} else
    973		lsn = log->lsn;
    974
    975	/*
    976	 *	setup next syncpt trigger (SWAG)
    977	 */
    978	logsize = log->logsize;
    979
    980	logdiff(written, lsn, log);
    981	free = logsize - written;
    982	delta = LOGSYNC_DELTA(logsize);
    983	more = min(free / 2, delta);
    984	if (more < 2 * LOGPSIZE) {
    985		jfs_warn("\n ... Log Wrap ... Log Wrap ... Log Wrap ...\n");
    986		/*
    987		 *	log wrapping
    988		 *
    989		 * option 1 - panic ? No.!
    990		 * option 2 - shutdown file systems
    991		 *	      associated with log ?
    992		 * option 3 - extend log ?
    993		 * option 4 - second chance
    994		 *
    995		 * mark log wrapped, and continue.
    996		 * when all active transactions are completed,
    997		 * mark log valid for recovery.
    998		 * if crashed during invalid state, log state
    999		 * implies invalid log, forcing fsck().
   1000		 */
   1001		/* mark log state log wrap in log superblock */
   1002		/* log->state = LOGWRAP; */
   1003
   1004		/* reset sync point computation */
   1005		log->syncpt = log->sync = lsn;
   1006		log->nextsync = delta;
   1007	} else
   1008		/* next syncpt trigger = written + more */
   1009		log->nextsync = written + more;
   1010
   1011	/* if number of bytes written from last sync point is more
   1012	 * than 1/4 of the log size, stop new transactions from
   1013	 * starting until all current transactions are completed
   1014	 * by setting syncbarrier flag.
   1015	 */
   1016	if (!test_bit(log_SYNCBARRIER, &log->flag) &&
   1017	    (written > LOGSYNC_BARRIER(logsize)) && log->active) {
   1018		set_bit(log_SYNCBARRIER, &log->flag);
   1019		jfs_info("log barrier on: lsn=0x%x syncpt=0x%x", lsn,
   1020			 log->syncpt);
   1021		/*
   1022		 * We may have to initiate group commit
   1023		 */
   1024		jfs_flush_journal(log, 0);
   1025	}
   1026
   1027	return lsn;
   1028}
   1029
   1030/*
   1031 * NAME:	jfs_syncpt
   1032 *
   1033 * FUNCTION:	write log SYNCPT record for specified log
   1034 *
   1035 * PARAMETERS:	log	  - log structure
   1036 *		hard_sync - set to 1 to force metadata to be written
   1037 */
   1038void jfs_syncpt(struct jfs_log *log, int hard_sync)
   1039{	LOG_LOCK(log);
   1040	if (!test_bit(log_QUIESCE, &log->flag))
   1041		lmLogSync(log, hard_sync);
   1042	LOG_UNLOCK(log);
   1043}
   1044
   1045/*
   1046 * NAME:	lmLogOpen()
   1047 *
   1048 * FUNCTION:	open the log on first open;
   1049 *	insert filesystem in the active list of the log.
   1050 *
   1051 * PARAMETER:	ipmnt	- file system mount inode
   1052 *		iplog	- log inode (out)
   1053 *
   1054 * RETURN:
   1055 *
   1056 * serialization:
   1057 */
   1058int lmLogOpen(struct super_block *sb)
   1059{
   1060	int rc;
   1061	struct block_device *bdev;
   1062	struct jfs_log *log;
   1063	struct jfs_sb_info *sbi = JFS_SBI(sb);
   1064
   1065	if (sbi->flag & JFS_NOINTEGRITY)
   1066		return open_dummy_log(sb);
   1067
   1068	if (sbi->mntflag & JFS_INLINELOG)
   1069		return open_inline_log(sb);
   1070
   1071	mutex_lock(&jfs_log_mutex);
   1072	list_for_each_entry(log, &jfs_external_logs, journal_list) {
   1073		if (log->bdev->bd_dev == sbi->logdev) {
   1074			if (!uuid_equal(&log->uuid, &sbi->loguuid)) {
   1075				jfs_warn("wrong uuid on JFS journal");
   1076				mutex_unlock(&jfs_log_mutex);
   1077				return -EINVAL;
   1078			}
   1079			/*
   1080			 * add file system to log active file system list
   1081			 */
   1082			if ((rc = lmLogFileSystem(log, sbi, 1))) {
   1083				mutex_unlock(&jfs_log_mutex);
   1084				return rc;
   1085			}
   1086			goto journal_found;
   1087		}
   1088	}
   1089
   1090	if (!(log = kzalloc(sizeof(struct jfs_log), GFP_KERNEL))) {
   1091		mutex_unlock(&jfs_log_mutex);
   1092		return -ENOMEM;
   1093	}
   1094	INIT_LIST_HEAD(&log->sb_list);
   1095	init_waitqueue_head(&log->syncwait);
   1096
   1097	/*
   1098	 *	external log as separate logical volume
   1099	 *
   1100	 * file systems to log may have n-to-1 relationship;
   1101	 */
   1102
   1103	bdev = blkdev_get_by_dev(sbi->logdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL,
   1104				 log);
   1105	if (IS_ERR(bdev)) {
   1106		rc = PTR_ERR(bdev);
   1107		goto free;
   1108	}
   1109
   1110	log->bdev = bdev;
   1111	uuid_copy(&log->uuid, &sbi->loguuid);
   1112
   1113	/*
   1114	 * initialize log:
   1115	 */
   1116	if ((rc = lmLogInit(log)))
   1117		goto close;
   1118
   1119	list_add(&log->journal_list, &jfs_external_logs);
   1120
   1121	/*
   1122	 * add file system to log active file system list
   1123	 */
   1124	if ((rc = lmLogFileSystem(log, sbi, 1)))
   1125		goto shutdown;
   1126
   1127journal_found:
   1128	LOG_LOCK(log);
   1129	list_add(&sbi->log_list, &log->sb_list);
   1130	sbi->log = log;
   1131	LOG_UNLOCK(log);
   1132
   1133	mutex_unlock(&jfs_log_mutex);
   1134	return 0;
   1135
   1136	/*
   1137	 *	unwind on error
   1138	 */
   1139      shutdown:		/* unwind lbmLogInit() */
   1140	list_del(&log->journal_list);
   1141	lbmLogShutdown(log);
   1142
   1143      close:		/* close external log device */
   1144	blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL);
   1145
   1146      free:		/* free log descriptor */
   1147	mutex_unlock(&jfs_log_mutex);
   1148	kfree(log);
   1149
   1150	jfs_warn("lmLogOpen: exit(%d)", rc);
   1151	return rc;
   1152}
   1153
   1154static int open_inline_log(struct super_block *sb)
   1155{
   1156	struct jfs_log *log;
   1157	int rc;
   1158
   1159	if (!(log = kzalloc(sizeof(struct jfs_log), GFP_KERNEL)))
   1160		return -ENOMEM;
   1161	INIT_LIST_HEAD(&log->sb_list);
   1162	init_waitqueue_head(&log->syncwait);
   1163
   1164	set_bit(log_INLINELOG, &log->flag);
   1165	log->bdev = sb->s_bdev;
   1166	log->base = addressPXD(&JFS_SBI(sb)->logpxd);
   1167	log->size = lengthPXD(&JFS_SBI(sb)->logpxd) >>
   1168	    (L2LOGPSIZE - sb->s_blocksize_bits);
   1169	log->l2bsize = sb->s_blocksize_bits;
   1170	ASSERT(L2LOGPSIZE >= sb->s_blocksize_bits);
   1171
   1172	/*
   1173	 * initialize log.
   1174	 */
   1175	if ((rc = lmLogInit(log))) {
   1176		kfree(log);
   1177		jfs_warn("lmLogOpen: exit(%d)", rc);
   1178		return rc;
   1179	}
   1180
   1181	list_add(&JFS_SBI(sb)->log_list, &log->sb_list);
   1182	JFS_SBI(sb)->log = log;
   1183
   1184	return rc;
   1185}
   1186
   1187static int open_dummy_log(struct super_block *sb)
   1188{
   1189	int rc;
   1190
   1191	mutex_lock(&jfs_log_mutex);
   1192	if (!dummy_log) {
   1193		dummy_log = kzalloc(sizeof(struct jfs_log), GFP_KERNEL);
   1194		if (!dummy_log) {
   1195			mutex_unlock(&jfs_log_mutex);
   1196			return -ENOMEM;
   1197		}
   1198		INIT_LIST_HEAD(&dummy_log->sb_list);
   1199		init_waitqueue_head(&dummy_log->syncwait);
   1200		dummy_log->no_integrity = 1;
   1201		/* Make up some stuff */
   1202		dummy_log->base = 0;
   1203		dummy_log->size = 1024;
   1204		rc = lmLogInit(dummy_log);
   1205		if (rc) {
   1206			kfree(dummy_log);
   1207			dummy_log = NULL;
   1208			mutex_unlock(&jfs_log_mutex);
   1209			return rc;
   1210		}
   1211	}
   1212
   1213	LOG_LOCK(dummy_log);
   1214	list_add(&JFS_SBI(sb)->log_list, &dummy_log->sb_list);
   1215	JFS_SBI(sb)->log = dummy_log;
   1216	LOG_UNLOCK(dummy_log);
   1217	mutex_unlock(&jfs_log_mutex);
   1218
   1219	return 0;
   1220}
   1221
   1222/*
   1223 * NAME:	lmLogInit()
   1224 *
   1225 * FUNCTION:	log initialization at first log open.
   1226 *
   1227 *	logredo() (or logformat()) should have been run previously.
   1228 *	initialize the log from log superblock.
   1229 *	set the log state in the superblock to LOGMOUNT and
   1230 *	write SYNCPT log record.
   1231 *
   1232 * PARAMETER:	log	- log structure
   1233 *
   1234 * RETURN:	0	- if ok
   1235 *		-EINVAL	- bad log magic number or superblock dirty
   1236 *		error returned from logwait()
   1237 *
   1238 * serialization: single first open thread
   1239 */
   1240int lmLogInit(struct jfs_log * log)
   1241{
   1242	int rc = 0;
   1243	struct lrd lrd;
   1244	struct logsuper *logsuper;
   1245	struct lbuf *bpsuper;
   1246	struct lbuf *bp;
   1247	struct logpage *lp;
   1248	int lsn = 0;
   1249
   1250	jfs_info("lmLogInit: log:0x%p", log);
   1251
   1252	/* initialize the group commit serialization lock */
   1253	LOGGC_LOCK_INIT(log);
   1254
   1255	/* allocate/initialize the log write serialization lock */
   1256	LOG_LOCK_INIT(log);
   1257
   1258	LOGSYNC_LOCK_INIT(log);
   1259
   1260	INIT_LIST_HEAD(&log->synclist);
   1261
   1262	INIT_LIST_HEAD(&log->cqueue);
   1263	log->flush_tblk = NULL;
   1264
   1265	log->count = 0;
   1266
   1267	/*
   1268	 * initialize log i/o
   1269	 */
   1270	if ((rc = lbmLogInit(log)))
   1271		return rc;
   1272
   1273	if (!test_bit(log_INLINELOG, &log->flag))
   1274		log->l2bsize = L2LOGPSIZE;
   1275
   1276	/* check for disabled journaling to disk */
   1277	if (log->no_integrity) {
   1278		/*
   1279		 * Journal pages will still be filled.  When the time comes
   1280		 * to actually do the I/O, the write is not done, and the
   1281		 * endio routine is called directly.
   1282		 */
   1283		bp = lbmAllocate(log , 0);
   1284		log->bp = bp;
   1285		bp->l_pn = bp->l_eor = 0;
   1286	} else {
   1287		/*
   1288		 * validate log superblock
   1289		 */
   1290		if ((rc = lbmRead(log, 1, &bpsuper)))
   1291			goto errout10;
   1292
   1293		logsuper = (struct logsuper *) bpsuper->l_ldata;
   1294
   1295		if (logsuper->magic != cpu_to_le32(LOGMAGIC)) {
   1296			jfs_warn("*** Log Format Error ! ***");
   1297			rc = -EINVAL;
   1298			goto errout20;
   1299		}
   1300
   1301		/* logredo() should have been run successfully. */
   1302		if (logsuper->state != cpu_to_le32(LOGREDONE)) {
   1303			jfs_warn("*** Log Is Dirty ! ***");
   1304			rc = -EINVAL;
   1305			goto errout20;
   1306		}
   1307
   1308		/* initialize log from log superblock */
   1309		if (test_bit(log_INLINELOG,&log->flag)) {
   1310			if (log->size != le32_to_cpu(logsuper->size)) {
   1311				rc = -EINVAL;
   1312				goto errout20;
   1313			}
   1314			jfs_info("lmLogInit: inline log:0x%p base:0x%Lx size:0x%x",
   1315				 log, (unsigned long long)log->base, log->size);
   1316		} else {
   1317			if (!uuid_equal(&logsuper->uuid, &log->uuid)) {
   1318				jfs_warn("wrong uuid on JFS log device");
   1319				rc = -EINVAL;
   1320				goto errout20;
   1321			}
   1322			log->size = le32_to_cpu(logsuper->size);
   1323			log->l2bsize = le32_to_cpu(logsuper->l2bsize);
   1324			jfs_info("lmLogInit: external log:0x%p base:0x%Lx size:0x%x",
   1325				 log, (unsigned long long)log->base, log->size);
   1326		}
   1327
   1328		log->page = le32_to_cpu(logsuper->end) / LOGPSIZE;
   1329		log->eor = le32_to_cpu(logsuper->end) - (LOGPSIZE * log->page);
   1330
   1331		/*
   1332		 * initialize for log append write mode
   1333		 */
   1334		/* establish current/end-of-log page/buffer */
   1335		if ((rc = lbmRead(log, log->page, &bp)))
   1336			goto errout20;
   1337
   1338		lp = (struct logpage *) bp->l_ldata;
   1339
   1340		jfs_info("lmLogInit: lsn:0x%x page:%d eor:%d:%d",
   1341			 le32_to_cpu(logsuper->end), log->page, log->eor,
   1342			 le16_to_cpu(lp->h.eor));
   1343
   1344		log->bp = bp;
   1345		bp->l_pn = log->page;
   1346		bp->l_eor = log->eor;
   1347
   1348		/* if current page is full, move on to next page */
   1349		if (log->eor >= LOGPSIZE - LOGPTLRSIZE)
   1350			lmNextPage(log);
   1351
   1352		/*
   1353		 * initialize log syncpoint
   1354		 */
   1355		/*
   1356		 * write the first SYNCPT record with syncpoint = 0
   1357		 * (i.e., log redo up to HERE !);
   1358		 * remove current page from lbm write queue at end of pageout
   1359		 * (to write log superblock update), but do not release to
   1360		 * freelist;
   1361		 */
   1362		lrd.logtid = 0;
   1363		lrd.backchain = 0;
   1364		lrd.type = cpu_to_le16(LOG_SYNCPT);
   1365		lrd.length = 0;
   1366		lrd.log.syncpt.sync = 0;
   1367		lsn = lmWriteRecord(log, NULL, &lrd, NULL);
   1368		bp = log->bp;
   1369		bp->l_ceor = bp->l_eor;
   1370		lp = (struct logpage *) bp->l_ldata;
   1371		lp->h.eor = lp->t.eor = cpu_to_le16(bp->l_eor);
   1372		lbmWrite(log, bp, lbmWRITE | lbmSYNC, 0);
   1373		if ((rc = lbmIOWait(bp, 0)))
   1374			goto errout30;
   1375
   1376		/*
   1377		 * update/write superblock
   1378		 */
   1379		logsuper->state = cpu_to_le32(LOGMOUNT);
   1380		log->serial = le32_to_cpu(logsuper->serial) + 1;
   1381		logsuper->serial = cpu_to_le32(log->serial);
   1382		lbmDirectWrite(log, bpsuper, lbmWRITE | lbmRELEASE | lbmSYNC);
   1383		if ((rc = lbmIOWait(bpsuper, lbmFREE)))
   1384			goto errout30;
   1385	}
   1386
   1387	/* initialize logsync parameters */
   1388	log->logsize = (log->size - 2) << L2LOGPSIZE;
   1389	log->lsn = lsn;
   1390	log->syncpt = lsn;
   1391	log->sync = log->syncpt;
   1392	log->nextsync = LOGSYNC_DELTA(log->logsize);
   1393
   1394	jfs_info("lmLogInit: lsn:0x%x syncpt:0x%x sync:0x%x",
   1395		 log->lsn, log->syncpt, log->sync);
   1396
   1397	/*
   1398	 * initialize for lazy/group commit
   1399	 */
   1400	log->clsn = lsn;
   1401
   1402	return 0;
   1403
   1404	/*
   1405	 *	unwind on error
   1406	 */
   1407      errout30:		/* release log page */
   1408	log->wqueue = NULL;
   1409	bp->l_wqnext = NULL;
   1410	lbmFree(bp);
   1411
   1412      errout20:		/* release log superblock */
   1413	lbmFree(bpsuper);
   1414
   1415      errout10:		/* unwind lbmLogInit() */
   1416	lbmLogShutdown(log);
   1417
   1418	jfs_warn("lmLogInit: exit(%d)", rc);
   1419	return rc;
   1420}
   1421
   1422
   1423/*
   1424 * NAME:	lmLogClose()
   1425 *
   1426 * FUNCTION:	remove file system <ipmnt> from active list of log <iplog>
   1427 *		and close it on last close.
   1428 *
   1429 * PARAMETER:	sb	- superblock
   1430 *
   1431 * RETURN:	errors from subroutines
   1432 *
   1433 * serialization:
   1434 */
   1435int lmLogClose(struct super_block *sb)
   1436{
   1437	struct jfs_sb_info *sbi = JFS_SBI(sb);
   1438	struct jfs_log *log = sbi->log;
   1439	struct block_device *bdev;
   1440	int rc = 0;
   1441
   1442	jfs_info("lmLogClose: log:0x%p", log);
   1443
   1444	mutex_lock(&jfs_log_mutex);
   1445	LOG_LOCK(log);
   1446	list_del(&sbi->log_list);
   1447	LOG_UNLOCK(log);
   1448	sbi->log = NULL;
   1449
   1450	/*
   1451	 * We need to make sure all of the "written" metapages
   1452	 * actually make it to disk
   1453	 */
   1454	sync_blockdev(sb->s_bdev);
   1455
   1456	if (test_bit(log_INLINELOG, &log->flag)) {
   1457		/*
   1458		 *	in-line log in host file system
   1459		 */
   1460		rc = lmLogShutdown(log);
   1461		kfree(log);
   1462		goto out;
   1463	}
   1464
   1465	if (!log->no_integrity)
   1466		lmLogFileSystem(log, sbi, 0);
   1467
   1468	if (!list_empty(&log->sb_list))
   1469		goto out;
   1470
   1471	/*
   1472	 * TODO: ensure that the dummy_log is in a state to allow
   1473	 * lbmLogShutdown to deallocate all the buffers and call
   1474	 * kfree against dummy_log.  For now, leave dummy_log & its
   1475	 * buffers in memory, and resuse if another no-integrity mount
   1476	 * is requested.
   1477	 */
   1478	if (log->no_integrity)
   1479		goto out;
   1480
   1481	/*
   1482	 *	external log as separate logical volume
   1483	 */
   1484	list_del(&log->journal_list);
   1485	bdev = log->bdev;
   1486	rc = lmLogShutdown(log);
   1487
   1488	blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL);
   1489
   1490	kfree(log);
   1491
   1492      out:
   1493	mutex_unlock(&jfs_log_mutex);
   1494	jfs_info("lmLogClose: exit(%d)", rc);
   1495	return rc;
   1496}
   1497
   1498
   1499/*
   1500 * NAME:	jfs_flush_journal()
   1501 *
   1502 * FUNCTION:	initiate write of any outstanding transactions to the journal
   1503 *		and optionally wait until they are all written to disk
   1504 *
   1505 *		wait == 0  flush until latest txn is committed, don't wait
   1506 *		wait == 1  flush until latest txn is committed, wait
   1507 *		wait > 1   flush until all txn's are complete, wait
   1508 */
   1509void jfs_flush_journal(struct jfs_log *log, int wait)
   1510{
   1511	int i;
   1512	struct tblock *target = NULL;
   1513
   1514	/* jfs_write_inode may call us during read-only mount */
   1515	if (!log)
   1516		return;
   1517
   1518	jfs_info("jfs_flush_journal: log:0x%p wait=%d", log, wait);
   1519
   1520	LOGGC_LOCK(log);
   1521
   1522	if (!list_empty(&log->cqueue)) {
   1523		/*
   1524		 * This ensures that we will keep writing to the journal as long
   1525		 * as there are unwritten commit records
   1526		 */
   1527		target = list_entry(log->cqueue.prev, struct tblock, cqueue);
   1528
   1529		if (test_bit(log_FLUSH, &log->flag)) {
   1530			/*
   1531			 * We're already flushing.
   1532			 * if flush_tblk is NULL, we are flushing everything,
   1533			 * so leave it that way.  Otherwise, update it to the
   1534			 * latest transaction
   1535			 */
   1536			if (log->flush_tblk)
   1537				log->flush_tblk = target;
   1538		} else {
   1539			/* Only flush until latest transaction is committed */
   1540			log->flush_tblk = target;
   1541			set_bit(log_FLUSH, &log->flag);
   1542
   1543			/*
   1544			 * Initiate I/O on outstanding transactions
   1545			 */
   1546			if (!(log->cflag & logGC_PAGEOUT)) {
   1547				log->cflag |= logGC_PAGEOUT;
   1548				lmGCwrite(log, 0);
   1549			}
   1550		}
   1551	}
   1552	if ((wait > 1) || test_bit(log_SYNCBARRIER, &log->flag)) {
   1553		/* Flush until all activity complete */
   1554		set_bit(log_FLUSH, &log->flag);
   1555		log->flush_tblk = NULL;
   1556	}
   1557
   1558	if (wait && target && !(target->flag & tblkGC_COMMITTED)) {
   1559		DECLARE_WAITQUEUE(__wait, current);
   1560
   1561		add_wait_queue(&target->gcwait, &__wait);
   1562		set_current_state(TASK_UNINTERRUPTIBLE);
   1563		LOGGC_UNLOCK(log);
   1564		schedule();
   1565		LOGGC_LOCK(log);
   1566		remove_wait_queue(&target->gcwait, &__wait);
   1567	}
   1568	LOGGC_UNLOCK(log);
   1569
   1570	if (wait < 2)
   1571		return;
   1572
   1573	write_special_inodes(log, filemap_fdatawrite);
   1574
   1575	/*
   1576	 * If there was recent activity, we may need to wait
   1577	 * for the lazycommit thread to catch up
   1578	 */
   1579	if ((!list_empty(&log->cqueue)) || !list_empty(&log->synclist)) {
   1580		for (i = 0; i < 200; i++) {	/* Too much? */
   1581			msleep(250);
   1582			write_special_inodes(log, filemap_fdatawrite);
   1583			if (list_empty(&log->cqueue) &&
   1584			    list_empty(&log->synclist))
   1585				break;
   1586		}
   1587	}
   1588	assert(list_empty(&log->cqueue));
   1589
   1590#ifdef CONFIG_JFS_DEBUG
   1591	if (!list_empty(&log->synclist)) {
   1592		struct logsyncblk *lp;
   1593
   1594		printk(KERN_ERR "jfs_flush_journal: synclist not empty\n");
   1595		list_for_each_entry(lp, &log->synclist, synclist) {
   1596			if (lp->xflag & COMMIT_PAGE) {
   1597				struct metapage *mp = (struct metapage *)lp;
   1598				print_hex_dump(KERN_ERR, "metapage: ",
   1599					       DUMP_PREFIX_ADDRESS, 16, 4,
   1600					       mp, sizeof(struct metapage), 0);
   1601				print_hex_dump(KERN_ERR, "page: ",
   1602					       DUMP_PREFIX_ADDRESS, 16,
   1603					       sizeof(long), mp->page,
   1604					       sizeof(struct page), 0);
   1605			} else
   1606				print_hex_dump(KERN_ERR, "tblock:",
   1607					       DUMP_PREFIX_ADDRESS, 16, 4,
   1608					       lp, sizeof(struct tblock), 0);
   1609		}
   1610	}
   1611#else
   1612	WARN_ON(!list_empty(&log->synclist));
   1613#endif
   1614	clear_bit(log_FLUSH, &log->flag);
   1615}
   1616
   1617/*
   1618 * NAME:	lmLogShutdown()
   1619 *
   1620 * FUNCTION:	log shutdown at last LogClose().
   1621 *
   1622 *		write log syncpt record.
   1623 *		update super block to set redone flag to 0.
   1624 *
   1625 * PARAMETER:	log	- log inode
   1626 *
   1627 * RETURN:	0	- success
   1628 *
   1629 * serialization: single last close thread
   1630 */
   1631int lmLogShutdown(struct jfs_log * log)
   1632{
   1633	int rc;
   1634	struct lrd lrd;
   1635	int lsn;
   1636	struct logsuper *logsuper;
   1637	struct lbuf *bpsuper;
   1638	struct lbuf *bp;
   1639	struct logpage *lp;
   1640
   1641	jfs_info("lmLogShutdown: log:0x%p", log);
   1642
   1643	jfs_flush_journal(log, 2);
   1644
   1645	/*
   1646	 * write the last SYNCPT record with syncpoint = 0
   1647	 * (i.e., log redo up to HERE !)
   1648	 */
   1649	lrd.logtid = 0;
   1650	lrd.backchain = 0;
   1651	lrd.type = cpu_to_le16(LOG_SYNCPT);
   1652	lrd.length = 0;
   1653	lrd.log.syncpt.sync = 0;
   1654
   1655	lsn = lmWriteRecord(log, NULL, &lrd, NULL);
   1656	bp = log->bp;
   1657	lp = (struct logpage *) bp->l_ldata;
   1658	lp->h.eor = lp->t.eor = cpu_to_le16(bp->l_eor);
   1659	lbmWrite(log, log->bp, lbmWRITE | lbmRELEASE | lbmSYNC, 0);
   1660	lbmIOWait(log->bp, lbmFREE);
   1661	log->bp = NULL;
   1662
   1663	/*
   1664	 * synchronous update log superblock
   1665	 * mark log state as shutdown cleanly
   1666	 * (i.e., Log does not need to be replayed).
   1667	 */
   1668	if ((rc = lbmRead(log, 1, &bpsuper)))
   1669		goto out;
   1670
   1671	logsuper = (struct logsuper *) bpsuper->l_ldata;
   1672	logsuper->state = cpu_to_le32(LOGREDONE);
   1673	logsuper->end = cpu_to_le32(lsn);
   1674	lbmDirectWrite(log, bpsuper, lbmWRITE | lbmRELEASE | lbmSYNC);
   1675	rc = lbmIOWait(bpsuper, lbmFREE);
   1676
   1677	jfs_info("lmLogShutdown: lsn:0x%x page:%d eor:%d",
   1678		 lsn, log->page, log->eor);
   1679
   1680      out:
   1681	/*
   1682	 * shutdown per log i/o
   1683	 */
   1684	lbmLogShutdown(log);
   1685
   1686	if (rc) {
   1687		jfs_warn("lmLogShutdown: exit(%d)", rc);
   1688	}
   1689	return rc;
   1690}
   1691
   1692
   1693/*
   1694 * NAME:	lmLogFileSystem()
   1695 *
   1696 * FUNCTION:	insert (<activate> = true)/remove (<activate> = false)
   1697 *	file system into/from log active file system list.
   1698 *
   1699 * PARAMETE:	log	- pointer to logs inode.
   1700 *		fsdev	- kdev_t of filesystem.
   1701 *		serial	- pointer to returned log serial number
   1702 *		activate - insert/remove device from active list.
   1703 *
   1704 * RETURN:	0	- success
   1705 *		errors returned by vms_iowait().
   1706 */
   1707static int lmLogFileSystem(struct jfs_log * log, struct jfs_sb_info *sbi,
   1708			   int activate)
   1709{
   1710	int rc = 0;
   1711	int i;
   1712	struct logsuper *logsuper;
   1713	struct lbuf *bpsuper;
   1714	uuid_t *uuid = &sbi->uuid;
   1715
   1716	/*
   1717	 * insert/remove file system device to log active file system list.
   1718	 */
   1719	if ((rc = lbmRead(log, 1, &bpsuper)))
   1720		return rc;
   1721
   1722	logsuper = (struct logsuper *) bpsuper->l_ldata;
   1723	if (activate) {
   1724		for (i = 0; i < MAX_ACTIVE; i++)
   1725			if (uuid_is_null(&logsuper->active[i].uuid)) {
   1726				uuid_copy(&logsuper->active[i].uuid, uuid);
   1727				sbi->aggregate = i;
   1728				break;
   1729			}
   1730		if (i == MAX_ACTIVE) {
   1731			jfs_warn("Too many file systems sharing journal!");
   1732			lbmFree(bpsuper);
   1733			return -EMFILE;	/* Is there a better rc? */
   1734		}
   1735	} else {
   1736		for (i = 0; i < MAX_ACTIVE; i++)
   1737			if (uuid_equal(&logsuper->active[i].uuid, uuid)) {
   1738				uuid_copy(&logsuper->active[i].uuid,
   1739					  &uuid_null);
   1740				break;
   1741			}
   1742		if (i == MAX_ACTIVE) {
   1743			jfs_warn("Somebody stomped on the journal!");
   1744			lbmFree(bpsuper);
   1745			return -EIO;
   1746		}
   1747
   1748	}
   1749
   1750	/*
   1751	 * synchronous write log superblock:
   1752	 *
   1753	 * write sidestream bypassing write queue:
   1754	 * at file system mount, log super block is updated for
   1755	 * activation of the file system before any log record
   1756	 * (MOUNT record) of the file system, and at file system
   1757	 * unmount, all meta data for the file system has been
   1758	 * flushed before log super block is updated for deactivation
   1759	 * of the file system.
   1760	 */
   1761	lbmDirectWrite(log, bpsuper, lbmWRITE | lbmRELEASE | lbmSYNC);
   1762	rc = lbmIOWait(bpsuper, lbmFREE);
   1763
   1764	return rc;
   1765}
   1766
   1767/*
   1768 *		log buffer manager (lbm)
   1769 *		------------------------
   1770 *
   1771 * special purpose buffer manager supporting log i/o requirements.
   1772 *
   1773 * per log write queue:
   1774 * log pageout occurs in serial order by fifo write queue and
   1775 * restricting to a single i/o in pregress at any one time.
   1776 * a circular singly-linked list
   1777 * (log->wrqueue points to the tail, and buffers are linked via
   1778 * bp->wrqueue field), and
   1779 * maintains log page in pageout ot waiting for pageout in serial pageout.
   1780 */
   1781
   1782/*
   1783 *	lbmLogInit()
   1784 *
   1785 * initialize per log I/O setup at lmLogInit()
   1786 */
   1787static int lbmLogInit(struct jfs_log * log)
   1788{				/* log inode */
   1789	int i;
   1790	struct lbuf *lbuf;
   1791
   1792	jfs_info("lbmLogInit: log:0x%p", log);
   1793
   1794	/* initialize current buffer cursor */
   1795	log->bp = NULL;
   1796
   1797	/* initialize log device write queue */
   1798	log->wqueue = NULL;
   1799
   1800	/*
   1801	 * Each log has its own buffer pages allocated to it.  These are
   1802	 * not managed by the page cache.  This ensures that a transaction
   1803	 * writing to the log does not block trying to allocate a page from
   1804	 * the page cache (for the log).  This would be bad, since page
   1805	 * allocation waits on the kswapd thread that may be committing inodes
   1806	 * which would cause log activity.  Was that clear?  I'm trying to
   1807	 * avoid deadlock here.
   1808	 */
   1809	init_waitqueue_head(&log->free_wait);
   1810
   1811	log->lbuf_free = NULL;
   1812
   1813	for (i = 0; i < LOGPAGES;) {
   1814		char *buffer;
   1815		uint offset;
   1816		struct page *page = alloc_page(GFP_KERNEL | __GFP_ZERO);
   1817
   1818		if (!page)
   1819			goto error;
   1820		buffer = page_address(page);
   1821		for (offset = 0; offset < PAGE_SIZE; offset += LOGPSIZE) {
   1822			lbuf = kmalloc(sizeof(struct lbuf), GFP_KERNEL);
   1823			if (lbuf == NULL) {
   1824				if (offset == 0)
   1825					__free_page(page);
   1826				goto error;
   1827			}
   1828			if (offset) /* we already have one reference */
   1829				get_page(page);
   1830			lbuf->l_offset = offset;
   1831			lbuf->l_ldata = buffer + offset;
   1832			lbuf->l_page = page;
   1833			lbuf->l_log = log;
   1834			init_waitqueue_head(&lbuf->l_ioevent);
   1835
   1836			lbuf->l_freelist = log->lbuf_free;
   1837			log->lbuf_free = lbuf;
   1838			i++;
   1839		}
   1840	}
   1841
   1842	return (0);
   1843
   1844      error:
   1845	lbmLogShutdown(log);
   1846	return -ENOMEM;
   1847}
   1848
   1849
   1850/*
   1851 *	lbmLogShutdown()
   1852 *
   1853 * finalize per log I/O setup at lmLogShutdown()
   1854 */
   1855static void lbmLogShutdown(struct jfs_log * log)
   1856{
   1857	struct lbuf *lbuf;
   1858
   1859	jfs_info("lbmLogShutdown: log:0x%p", log);
   1860
   1861	lbuf = log->lbuf_free;
   1862	while (lbuf) {
   1863		struct lbuf *next = lbuf->l_freelist;
   1864		__free_page(lbuf->l_page);
   1865		kfree(lbuf);
   1866		lbuf = next;
   1867	}
   1868}
   1869
   1870
   1871/*
   1872 *	lbmAllocate()
   1873 *
   1874 * allocate an empty log buffer
   1875 */
   1876static struct lbuf *lbmAllocate(struct jfs_log * log, int pn)
   1877{
   1878	struct lbuf *bp;
   1879	unsigned long flags;
   1880
   1881	/*
   1882	 * recycle from log buffer freelist if any
   1883	 */
   1884	LCACHE_LOCK(flags);
   1885	LCACHE_SLEEP_COND(log->free_wait, (bp = log->lbuf_free), flags);
   1886	log->lbuf_free = bp->l_freelist;
   1887	LCACHE_UNLOCK(flags);
   1888
   1889	bp->l_flag = 0;
   1890
   1891	bp->l_wqnext = NULL;
   1892	bp->l_freelist = NULL;
   1893
   1894	bp->l_pn = pn;
   1895	bp->l_blkno = log->base + (pn << (L2LOGPSIZE - log->l2bsize));
   1896	bp->l_ceor = 0;
   1897
   1898	return bp;
   1899}
   1900
   1901
   1902/*
   1903 *	lbmFree()
   1904 *
   1905 * release a log buffer to freelist
   1906 */
   1907static void lbmFree(struct lbuf * bp)
   1908{
   1909	unsigned long flags;
   1910
   1911	LCACHE_LOCK(flags);
   1912
   1913	lbmfree(bp);
   1914
   1915	LCACHE_UNLOCK(flags);
   1916}
   1917
   1918static void lbmfree(struct lbuf * bp)
   1919{
   1920	struct jfs_log *log = bp->l_log;
   1921
   1922	assert(bp->l_wqnext == NULL);
   1923
   1924	/*
   1925	 * return the buffer to head of freelist
   1926	 */
   1927	bp->l_freelist = log->lbuf_free;
   1928	log->lbuf_free = bp;
   1929
   1930	wake_up(&log->free_wait);
   1931	return;
   1932}
   1933
   1934
   1935/*
   1936 * NAME:	lbmRedrive
   1937 *
   1938 * FUNCTION:	add a log buffer to the log redrive list
   1939 *
   1940 * PARAMETER:
   1941 *	bp	- log buffer
   1942 *
   1943 * NOTES:
   1944 *	Takes log_redrive_lock.
   1945 */
   1946static inline void lbmRedrive(struct lbuf *bp)
   1947{
   1948	unsigned long flags;
   1949
   1950	spin_lock_irqsave(&log_redrive_lock, flags);
   1951	bp->l_redrive_next = log_redrive_list;
   1952	log_redrive_list = bp;
   1953	spin_unlock_irqrestore(&log_redrive_lock, flags);
   1954
   1955	wake_up_process(jfsIOthread);
   1956}
   1957
   1958
   1959/*
   1960 *	lbmRead()
   1961 */
   1962static int lbmRead(struct jfs_log * log, int pn, struct lbuf ** bpp)
   1963{
   1964	struct bio *bio;
   1965	struct lbuf *bp;
   1966
   1967	/*
   1968	 * allocate a log buffer
   1969	 */
   1970	*bpp = bp = lbmAllocate(log, pn);
   1971	jfs_info("lbmRead: bp:0x%p pn:0x%x", bp, pn);
   1972
   1973	bp->l_flag |= lbmREAD;
   1974
   1975	bio = bio_alloc(log->bdev, 1, REQ_OP_READ, GFP_NOFS);
   1976	bio->bi_iter.bi_sector = bp->l_blkno << (log->l2bsize - 9);
   1977	bio_add_page(bio, bp->l_page, LOGPSIZE, bp->l_offset);
   1978	BUG_ON(bio->bi_iter.bi_size != LOGPSIZE);
   1979
   1980	bio->bi_end_io = lbmIODone;
   1981	bio->bi_private = bp;
   1982	/*check if journaling to disk has been disabled*/
   1983	if (log->no_integrity) {
   1984		bio->bi_iter.bi_size = 0;
   1985		lbmIODone(bio);
   1986	} else {
   1987		submit_bio(bio);
   1988	}
   1989
   1990	wait_event(bp->l_ioevent, (bp->l_flag != lbmREAD));
   1991
   1992	return 0;
   1993}
   1994
   1995
   1996/*
   1997 *	lbmWrite()
   1998 *
   1999 * buffer at head of pageout queue stays after completion of
   2000 * partial-page pageout and redriven by explicit initiation of
   2001 * pageout by caller until full-page pageout is completed and
   2002 * released.
   2003 *
   2004 * device driver i/o done redrives pageout of new buffer at
   2005 * head of pageout queue when current buffer at head of pageout
   2006 * queue is released at the completion of its full-page pageout.
   2007 *
   2008 * LOGGC_LOCK() serializes lbmWrite() by lmNextPage() and lmGroupCommit().
   2009 * LCACHE_LOCK() serializes xflag between lbmWrite() and lbmIODone()
   2010 */
   2011static void lbmWrite(struct jfs_log * log, struct lbuf * bp, int flag,
   2012		     int cant_block)
   2013{
   2014	struct lbuf *tail;
   2015	unsigned long flags;
   2016
   2017	jfs_info("lbmWrite: bp:0x%p flag:0x%x pn:0x%x", bp, flag, bp->l_pn);
   2018
   2019	/* map the logical block address to physical block address */
   2020	bp->l_blkno =
   2021	    log->base + (bp->l_pn << (L2LOGPSIZE - log->l2bsize));
   2022
   2023	LCACHE_LOCK(flags);		/* disable+lock */
   2024
   2025	/*
   2026	 * initialize buffer for device driver
   2027	 */
   2028	bp->l_flag = flag;
   2029
   2030	/*
   2031	 *	insert bp at tail of write queue associated with log
   2032	 *
   2033	 * (request is either for bp already/currently at head of queue
   2034	 * or new bp to be inserted at tail)
   2035	 */
   2036	tail = log->wqueue;
   2037
   2038	/* is buffer not already on write queue ? */
   2039	if (bp->l_wqnext == NULL) {
   2040		/* insert at tail of wqueue */
   2041		if (tail == NULL) {
   2042			log->wqueue = bp;
   2043			bp->l_wqnext = bp;
   2044		} else {
   2045			log->wqueue = bp;
   2046			bp->l_wqnext = tail->l_wqnext;
   2047			tail->l_wqnext = bp;
   2048		}
   2049
   2050		tail = bp;
   2051	}
   2052
   2053	/* is buffer at head of wqueue and for write ? */
   2054	if ((bp != tail->l_wqnext) || !(flag & lbmWRITE)) {
   2055		LCACHE_UNLOCK(flags);	/* unlock+enable */
   2056		return;
   2057	}
   2058
   2059	LCACHE_UNLOCK(flags);	/* unlock+enable */
   2060
   2061	if (cant_block)
   2062		lbmRedrive(bp);
   2063	else if (flag & lbmSYNC)
   2064		lbmStartIO(bp);
   2065	else {
   2066		LOGGC_UNLOCK(log);
   2067		lbmStartIO(bp);
   2068		LOGGC_LOCK(log);
   2069	}
   2070}
   2071
   2072
   2073/*
   2074 *	lbmDirectWrite()
   2075 *
   2076 * initiate pageout bypassing write queue for sidestream
   2077 * (e.g., log superblock) write;
   2078 */
   2079static void lbmDirectWrite(struct jfs_log * log, struct lbuf * bp, int flag)
   2080{
   2081	jfs_info("lbmDirectWrite: bp:0x%p flag:0x%x pn:0x%x",
   2082		 bp, flag, bp->l_pn);
   2083
   2084	/*
   2085	 * initialize buffer for device driver
   2086	 */
   2087	bp->l_flag = flag | lbmDIRECT;
   2088
   2089	/* map the logical block address to physical block address */
   2090	bp->l_blkno =
   2091	    log->base + (bp->l_pn << (L2LOGPSIZE - log->l2bsize));
   2092
   2093	/*
   2094	 *	initiate pageout of the page
   2095	 */
   2096	lbmStartIO(bp);
   2097}
   2098
   2099
   2100/*
   2101 * NAME:	lbmStartIO()
   2102 *
   2103 * FUNCTION:	Interface to DD strategy routine
   2104 *
   2105 * RETURN:	none
   2106 *
   2107 * serialization: LCACHE_LOCK() is NOT held during log i/o;
   2108 */
   2109static void lbmStartIO(struct lbuf * bp)
   2110{
   2111	struct bio *bio;
   2112	struct jfs_log *log = bp->l_log;
   2113
   2114	jfs_info("lbmStartIO");
   2115
   2116	bio = bio_alloc(log->bdev, 1, REQ_OP_WRITE | REQ_SYNC, GFP_NOFS);
   2117	bio->bi_iter.bi_sector = bp->l_blkno << (log->l2bsize - 9);
   2118	bio_add_page(bio, bp->l_page, LOGPSIZE, bp->l_offset);
   2119	BUG_ON(bio->bi_iter.bi_size != LOGPSIZE);
   2120
   2121	bio->bi_end_io = lbmIODone;
   2122	bio->bi_private = bp;
   2123
   2124	/* check if journaling to disk has been disabled */
   2125	if (log->no_integrity) {
   2126		bio->bi_iter.bi_size = 0;
   2127		lbmIODone(bio);
   2128	} else {
   2129		submit_bio(bio);
   2130		INCREMENT(lmStat.submitted);
   2131	}
   2132}
   2133
   2134
   2135/*
   2136 *	lbmIOWait()
   2137 */
   2138static int lbmIOWait(struct lbuf * bp, int flag)
   2139{
   2140	unsigned long flags;
   2141	int rc = 0;
   2142
   2143	jfs_info("lbmIOWait1: bp:0x%p flag:0x%x:0x%x", bp, bp->l_flag, flag);
   2144
   2145	LCACHE_LOCK(flags);		/* disable+lock */
   2146
   2147	LCACHE_SLEEP_COND(bp->l_ioevent, (bp->l_flag & lbmDONE), flags);
   2148
   2149	rc = (bp->l_flag & lbmERROR) ? -EIO : 0;
   2150
   2151	if (flag & lbmFREE)
   2152		lbmfree(bp);
   2153
   2154	LCACHE_UNLOCK(flags);	/* unlock+enable */
   2155
   2156	jfs_info("lbmIOWait2: bp:0x%p flag:0x%x:0x%x", bp, bp->l_flag, flag);
   2157	return rc;
   2158}
   2159
   2160/*
   2161 *	lbmIODone()
   2162 *
   2163 * executed at INTIODONE level
   2164 */
   2165static void lbmIODone(struct bio *bio)
   2166{
   2167	struct lbuf *bp = bio->bi_private;
   2168	struct lbuf *nextbp, *tail;
   2169	struct jfs_log *log;
   2170	unsigned long flags;
   2171
   2172	/*
   2173	 * get back jfs buffer bound to the i/o buffer
   2174	 */
   2175	jfs_info("lbmIODone: bp:0x%p flag:0x%x", bp, bp->l_flag);
   2176
   2177	LCACHE_LOCK(flags);		/* disable+lock */
   2178
   2179	bp->l_flag |= lbmDONE;
   2180
   2181	if (bio->bi_status) {
   2182		bp->l_flag |= lbmERROR;
   2183
   2184		jfs_err("lbmIODone: I/O error in JFS log");
   2185	}
   2186
   2187	bio_put(bio);
   2188
   2189	/*
   2190	 *	pagein completion
   2191	 */
   2192	if (bp->l_flag & lbmREAD) {
   2193		bp->l_flag &= ~lbmREAD;
   2194
   2195		LCACHE_UNLOCK(flags);	/* unlock+enable */
   2196
   2197		/* wakeup I/O initiator */
   2198		LCACHE_WAKEUP(&bp->l_ioevent);
   2199
   2200		return;
   2201	}
   2202
   2203	/*
   2204	 *	pageout completion
   2205	 *
   2206	 * the bp at the head of write queue has completed pageout.
   2207	 *
   2208	 * if single-commit/full-page pageout, remove the current buffer
   2209	 * from head of pageout queue, and redrive pageout with
   2210	 * the new buffer at head of pageout queue;
   2211	 * otherwise, the partial-page pageout buffer stays at
   2212	 * the head of pageout queue to be redriven for pageout
   2213	 * by lmGroupCommit() until full-page pageout is completed.
   2214	 */
   2215	bp->l_flag &= ~lbmWRITE;
   2216	INCREMENT(lmStat.pagedone);
   2217
   2218	/* update committed lsn */
   2219	log = bp->l_log;
   2220	log->clsn = (bp->l_pn << L2LOGPSIZE) + bp->l_ceor;
   2221
   2222	if (bp->l_flag & lbmDIRECT) {
   2223		LCACHE_WAKEUP(&bp->l_ioevent);
   2224		LCACHE_UNLOCK(flags);
   2225		return;
   2226	}
   2227
   2228	tail = log->wqueue;
   2229
   2230	/* single element queue */
   2231	if (bp == tail) {
   2232		/* remove head buffer of full-page pageout
   2233		 * from log device write queue
   2234		 */
   2235		if (bp->l_flag & lbmRELEASE) {
   2236			log->wqueue = NULL;
   2237			bp->l_wqnext = NULL;
   2238		}
   2239	}
   2240	/* multi element queue */
   2241	else {
   2242		/* remove head buffer of full-page pageout
   2243		 * from log device write queue
   2244		 */
   2245		if (bp->l_flag & lbmRELEASE) {
   2246			nextbp = tail->l_wqnext = bp->l_wqnext;
   2247			bp->l_wqnext = NULL;
   2248
   2249			/*
   2250			 * redrive pageout of next page at head of write queue:
   2251			 * redrive next page without any bound tblk
   2252			 * (i.e., page w/o any COMMIT records), or
   2253			 * first page of new group commit which has been
   2254			 * queued after current page (subsequent pageout
   2255			 * is performed synchronously, except page without
   2256			 * any COMMITs) by lmGroupCommit() as indicated
   2257			 * by lbmWRITE flag;
   2258			 */
   2259			if (nextbp->l_flag & lbmWRITE) {
   2260				/*
   2261				 * We can't do the I/O at interrupt time.
   2262				 * The jfsIO thread can do it
   2263				 */
   2264				lbmRedrive(nextbp);
   2265			}
   2266		}
   2267	}
   2268
   2269	/*
   2270	 *	synchronous pageout:
   2271	 *
   2272	 * buffer has not necessarily been removed from write queue
   2273	 * (e.g., synchronous write of partial-page with COMMIT):
   2274	 * leave buffer for i/o initiator to dispose
   2275	 */
   2276	if (bp->l_flag & lbmSYNC) {
   2277		LCACHE_UNLOCK(flags);	/* unlock+enable */
   2278
   2279		/* wakeup I/O initiator */
   2280		LCACHE_WAKEUP(&bp->l_ioevent);
   2281	}
   2282
   2283	/*
   2284	 *	Group Commit pageout:
   2285	 */
   2286	else if (bp->l_flag & lbmGC) {
   2287		LCACHE_UNLOCK(flags);
   2288		lmPostGC(bp);
   2289	}
   2290
   2291	/*
   2292	 *	asynchronous pageout:
   2293	 *
   2294	 * buffer must have been removed from write queue:
   2295	 * insert buffer at head of freelist where it can be recycled
   2296	 */
   2297	else {
   2298		assert(bp->l_flag & lbmRELEASE);
   2299		assert(bp->l_flag & lbmFREE);
   2300		lbmfree(bp);
   2301
   2302		LCACHE_UNLOCK(flags);	/* unlock+enable */
   2303	}
   2304}
   2305
   2306int jfsIOWait(void *arg)
   2307{
   2308	struct lbuf *bp;
   2309
   2310	do {
   2311		spin_lock_irq(&log_redrive_lock);
   2312		while ((bp = log_redrive_list)) {
   2313			log_redrive_list = bp->l_redrive_next;
   2314			bp->l_redrive_next = NULL;
   2315			spin_unlock_irq(&log_redrive_lock);
   2316			lbmStartIO(bp);
   2317			spin_lock_irq(&log_redrive_lock);
   2318		}
   2319
   2320		if (freezing(current)) {
   2321			spin_unlock_irq(&log_redrive_lock);
   2322			try_to_freeze();
   2323		} else {
   2324			set_current_state(TASK_INTERRUPTIBLE);
   2325			spin_unlock_irq(&log_redrive_lock);
   2326			schedule();
   2327		}
   2328	} while (!kthread_should_stop());
   2329
   2330	jfs_info("jfsIOWait being killed!");
   2331	return 0;
   2332}
   2333
   2334/*
   2335 * NAME:	lmLogFormat()/jfs_logform()
   2336 *
   2337 * FUNCTION:	format file system log
   2338 *
   2339 * PARAMETERS:
   2340 *	log	- volume log
   2341 *	logAddress - start address of log space in FS block
   2342 *	logSize	- length of log space in FS block;
   2343 *
   2344 * RETURN:	0	- success
   2345 *		-EIO	- i/o error
   2346 *
   2347 * XXX: We're synchronously writing one page at a time.  This needs to
   2348 *	be improved by writing multiple pages at once.
   2349 */
   2350int lmLogFormat(struct jfs_log *log, s64 logAddress, int logSize)
   2351{
   2352	int rc = -EIO;
   2353	struct jfs_sb_info *sbi;
   2354	struct logsuper *logsuper;
   2355	struct logpage *lp;
   2356	int lspn;		/* log sequence page number */
   2357	struct lrd *lrd_ptr;
   2358	int npages = 0;
   2359	struct lbuf *bp;
   2360
   2361	jfs_info("lmLogFormat: logAddress:%Ld logSize:%d",
   2362		 (long long)logAddress, logSize);
   2363
   2364	sbi = list_entry(log->sb_list.next, struct jfs_sb_info, log_list);
   2365
   2366	/* allocate a log buffer */
   2367	bp = lbmAllocate(log, 1);
   2368
   2369	npages = logSize >> sbi->l2nbperpage;
   2370
   2371	/*
   2372	 *	log space:
   2373	 *
   2374	 * page 0 - reserved;
   2375	 * page 1 - log superblock;
   2376	 * page 2 - log data page: A SYNC log record is written
   2377	 *	    into this page at logform time;
   2378	 * pages 3-N - log data page: set to empty log data pages;
   2379	 */
   2380	/*
   2381	 *	init log superblock: log page 1
   2382	 */
   2383	logsuper = (struct logsuper *) bp->l_ldata;
   2384
   2385	logsuper->magic = cpu_to_le32(LOGMAGIC);
   2386	logsuper->version = cpu_to_le32(LOGVERSION);
   2387	logsuper->state = cpu_to_le32(LOGREDONE);
   2388	logsuper->flag = cpu_to_le32(sbi->mntflag);	/* ? */
   2389	logsuper->size = cpu_to_le32(npages);
   2390	logsuper->bsize = cpu_to_le32(sbi->bsize);
   2391	logsuper->l2bsize = cpu_to_le32(sbi->l2bsize);
   2392	logsuper->end = cpu_to_le32(2 * LOGPSIZE + LOGPHDRSIZE + LOGRDSIZE);
   2393
   2394	bp->l_flag = lbmWRITE | lbmSYNC | lbmDIRECT;
   2395	bp->l_blkno = logAddress + sbi->nbperpage;
   2396	lbmStartIO(bp);
   2397	if ((rc = lbmIOWait(bp, 0)))
   2398		goto exit;
   2399
   2400	/*
   2401	 *	init pages 2 to npages-1 as log data pages:
   2402	 *
   2403	 * log page sequence number (lpsn) initialization:
   2404	 *
   2405	 * pn:   0     1     2     3                 n-1
   2406	 *       +-----+-----+=====+=====+===.....===+=====+
   2407	 * lspn:             N-1   0     1           N-2
   2408	 *                   <--- N page circular file ---->
   2409	 *
   2410	 * the N (= npages-2) data pages of the log is maintained as
   2411	 * a circular file for the log records;
   2412	 * lpsn grows by 1 monotonically as each log page is written
   2413	 * to the circular file of the log;
   2414	 * and setLogpage() will not reset the page number even if
   2415	 * the eor is equal to LOGPHDRSIZE. In order for binary search
   2416	 * still work in find log end process, we have to simulate the
   2417	 * log wrap situation at the log format time.
   2418	 * The 1st log page written will have the highest lpsn. Then
   2419	 * the succeeding log pages will have ascending order of
   2420	 * the lspn starting from 0, ... (N-2)
   2421	 */
   2422	lp = (struct logpage *) bp->l_ldata;
   2423	/*
   2424	 * initialize 1st log page to be written: lpsn = N - 1,
   2425	 * write a SYNCPT log record is written to this page
   2426	 */
   2427	lp->h.page = lp->t.page = cpu_to_le32(npages - 3);
   2428	lp->h.eor = lp->t.eor = cpu_to_le16(LOGPHDRSIZE + LOGRDSIZE);
   2429
   2430	lrd_ptr = (struct lrd *) &lp->data;
   2431	lrd_ptr->logtid = 0;
   2432	lrd_ptr->backchain = 0;
   2433	lrd_ptr->type = cpu_to_le16(LOG_SYNCPT);
   2434	lrd_ptr->length = 0;
   2435	lrd_ptr->log.syncpt.sync = 0;
   2436
   2437	bp->l_blkno += sbi->nbperpage;
   2438	bp->l_flag = lbmWRITE | lbmSYNC | lbmDIRECT;
   2439	lbmStartIO(bp);
   2440	if ((rc = lbmIOWait(bp, 0)))
   2441		goto exit;
   2442
   2443	/*
   2444	 *	initialize succeeding log pages: lpsn = 0, 1, ..., (N-2)
   2445	 */
   2446	for (lspn = 0; lspn < npages - 3; lspn++) {
   2447		lp->h.page = lp->t.page = cpu_to_le32(lspn);
   2448		lp->h.eor = lp->t.eor = cpu_to_le16(LOGPHDRSIZE);
   2449
   2450		bp->l_blkno += sbi->nbperpage;
   2451		bp->l_flag = lbmWRITE | lbmSYNC | lbmDIRECT;
   2452		lbmStartIO(bp);
   2453		if ((rc = lbmIOWait(bp, 0)))
   2454			goto exit;
   2455	}
   2456
   2457	rc = 0;
   2458exit:
   2459	/*
   2460	 *	finalize log
   2461	 */
   2462	/* release the buffer */
   2463	lbmFree(bp);
   2464
   2465	return rc;
   2466}
   2467
   2468#ifdef CONFIG_JFS_STATISTICS
   2469int jfs_lmstats_proc_show(struct seq_file *m, void *v)
   2470{
   2471	seq_printf(m,
   2472		       "JFS Logmgr stats\n"
   2473		       "================\n"
   2474		       "commits = %d\n"
   2475		       "writes submitted = %d\n"
   2476		       "writes completed = %d\n"
   2477		       "full pages submitted = %d\n"
   2478		       "partial pages submitted = %d\n",
   2479		       lmStat.commit,
   2480		       lmStat.submitted,
   2481		       lmStat.pagedone,
   2482		       lmStat.full_page,
   2483		       lmStat.partial_page);
   2484	return 0;
   2485}
   2486#endif /* CONFIG_JFS_STATISTICS */