cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

recovery.c (24105B)


      1// SPDX-License-Identifier: GPL-2.0+
      2/*
      3 * linux/fs/jbd2/recovery.c
      4 *
      5 * Written by Stephen C. Tweedie <sct@redhat.com>, 1999
      6 *
      7 * Copyright 1999-2000 Red Hat Software --- All Rights Reserved
      8 *
      9 * Journal recovery routines for the generic filesystem journaling code;
     10 * part of the ext2fs journaling system.
     11 */
     12
     13#ifndef __KERNEL__
     14#include "jfs_user.h"
     15#else
     16#include <linux/time.h>
     17#include <linux/fs.h>
     18#include <linux/jbd2.h>
     19#include <linux/errno.h>
     20#include <linux/crc32.h>
     21#include <linux/blkdev.h>
     22#endif
     23
     24/*
     25 * Maintain information about the progress of the recovery job, so that
     26 * the different passes can carry information between them.
     27 */
     28struct recovery_info
     29{
     30	tid_t		start_transaction;
     31	tid_t		end_transaction;
     32
     33	int		nr_replays;
     34	int		nr_revokes;
     35	int		nr_revoke_hits;
     36};
     37
     38static int do_one_pass(journal_t *journal,
     39				struct recovery_info *info, enum passtype pass);
     40static int scan_revoke_records(journal_t *, struct buffer_head *,
     41				tid_t, struct recovery_info *);
     42
     43#ifdef __KERNEL__
     44
     45/* Release readahead buffers after use */
     46static void journal_brelse_array(struct buffer_head *b[], int n)
     47{
     48	while (--n >= 0)
     49		brelse (b[n]);
     50}
     51
     52
     53/*
     54 * When reading from the journal, we are going through the block device
     55 * layer directly and so there is no readahead being done for us.  We
     56 * need to implement any readahead ourselves if we want it to happen at
     57 * all.  Recovery is basically one long sequential read, so make sure we
     58 * do the IO in reasonably large chunks.
     59 *
     60 * This is not so critical that we need to be enormously clever about
     61 * the readahead size, though.  128K is a purely arbitrary, good-enough
     62 * fixed value.
     63 */
     64
     65#define MAXBUF 8
     66static int do_readahead(journal_t *journal, unsigned int start)
     67{
     68	int err;
     69	unsigned int max, nbufs, next;
     70	unsigned long long blocknr;
     71	struct buffer_head *bh;
     72
     73	struct buffer_head * bufs[MAXBUF];
     74
     75	/* Do up to 128K of readahead */
     76	max = start + (128 * 1024 / journal->j_blocksize);
     77	if (max > journal->j_total_len)
     78		max = journal->j_total_len;
     79
     80	/* Do the readahead itself.  We'll submit MAXBUF buffer_heads at
     81	 * a time to the block device IO layer. */
     82
     83	nbufs = 0;
     84
     85	for (next = start; next < max; next++) {
     86		err = jbd2_journal_bmap(journal, next, &blocknr);
     87
     88		if (err) {
     89			printk(KERN_ERR "JBD2: bad block at offset %u\n",
     90				next);
     91			goto failed;
     92		}
     93
     94		bh = __getblk(journal->j_dev, blocknr, journal->j_blocksize);
     95		if (!bh) {
     96			err = -ENOMEM;
     97			goto failed;
     98		}
     99
    100		if (!buffer_uptodate(bh) && !buffer_locked(bh)) {
    101			bufs[nbufs++] = bh;
    102			if (nbufs == MAXBUF) {
    103				ll_rw_block(REQ_OP_READ, 0, nbufs, bufs);
    104				journal_brelse_array(bufs, nbufs);
    105				nbufs = 0;
    106			}
    107		} else
    108			brelse(bh);
    109	}
    110
    111	if (nbufs)
    112		ll_rw_block(REQ_OP_READ, 0, nbufs, bufs);
    113	err = 0;
    114
    115failed:
    116	if (nbufs)
    117		journal_brelse_array(bufs, nbufs);
    118	return err;
    119}
    120
    121#endif /* __KERNEL__ */
    122
    123
    124/*
    125 * Read a block from the journal
    126 */
    127
    128static int jread(struct buffer_head **bhp, journal_t *journal,
    129		 unsigned int offset)
    130{
    131	int err;
    132	unsigned long long blocknr;
    133	struct buffer_head *bh;
    134
    135	*bhp = NULL;
    136
    137	if (offset >= journal->j_total_len) {
    138		printk(KERN_ERR "JBD2: corrupted journal superblock\n");
    139		return -EFSCORRUPTED;
    140	}
    141
    142	err = jbd2_journal_bmap(journal, offset, &blocknr);
    143
    144	if (err) {
    145		printk(KERN_ERR "JBD2: bad block at offset %u\n",
    146			offset);
    147		return err;
    148	}
    149
    150	bh = __getblk(journal->j_dev, blocknr, journal->j_blocksize);
    151	if (!bh)
    152		return -ENOMEM;
    153
    154	if (!buffer_uptodate(bh)) {
    155		/* If this is a brand new buffer, start readahead.
    156                   Otherwise, we assume we are already reading it.  */
    157		if (!buffer_req(bh))
    158			do_readahead(journal, offset);
    159		wait_on_buffer(bh);
    160	}
    161
    162	if (!buffer_uptodate(bh)) {
    163		printk(KERN_ERR "JBD2: Failed to read block at offset %u\n",
    164			offset);
    165		brelse(bh);
    166		return -EIO;
    167	}
    168
    169	*bhp = bh;
    170	return 0;
    171}
    172
    173static int jbd2_descriptor_block_csum_verify(journal_t *j, void *buf)
    174{
    175	struct jbd2_journal_block_tail *tail;
    176	__be32 provided;
    177	__u32 calculated;
    178
    179	if (!jbd2_journal_has_csum_v2or3(j))
    180		return 1;
    181
    182	tail = (struct jbd2_journal_block_tail *)((char *)buf +
    183		j->j_blocksize - sizeof(struct jbd2_journal_block_tail));
    184	provided = tail->t_checksum;
    185	tail->t_checksum = 0;
    186	calculated = jbd2_chksum(j, j->j_csum_seed, buf, j->j_blocksize);
    187	tail->t_checksum = provided;
    188
    189	return provided == cpu_to_be32(calculated);
    190}
    191
    192/*
    193 * Count the number of in-use tags in a journal descriptor block.
    194 */
    195
    196static int count_tags(journal_t *journal, struct buffer_head *bh)
    197{
    198	char *			tagp;
    199	journal_block_tag_t	tag;
    200	int			nr = 0, size = journal->j_blocksize;
    201	int			tag_bytes = journal_tag_bytes(journal);
    202
    203	if (jbd2_journal_has_csum_v2or3(journal))
    204		size -= sizeof(struct jbd2_journal_block_tail);
    205
    206	tagp = &bh->b_data[sizeof(journal_header_t)];
    207
    208	while ((tagp - bh->b_data + tag_bytes) <= size) {
    209		memcpy(&tag, tagp, sizeof(tag));
    210
    211		nr++;
    212		tagp += tag_bytes;
    213		if (!(tag.t_flags & cpu_to_be16(JBD2_FLAG_SAME_UUID)))
    214			tagp += 16;
    215
    216		if (tag.t_flags & cpu_to_be16(JBD2_FLAG_LAST_TAG))
    217			break;
    218	}
    219
    220	return nr;
    221}
    222
    223
    224/* Make sure we wrap around the log correctly! */
    225#define wrap(journal, var)						\
    226do {									\
    227	unsigned long _wrap_last =					\
    228		jbd2_has_feature_fast_commit(journal) ?			\
    229			(journal)->j_fc_last : (journal)->j_last;	\
    230									\
    231	if (var >= _wrap_last)						\
    232		var -= (_wrap_last - (journal)->j_first);		\
    233} while (0)
    234
    235static int fc_do_one_pass(journal_t *journal,
    236			  struct recovery_info *info, enum passtype pass)
    237{
    238	unsigned int expected_commit_id = info->end_transaction;
    239	unsigned long next_fc_block;
    240	struct buffer_head *bh;
    241	int err = 0;
    242
    243	next_fc_block = journal->j_fc_first;
    244	if (!journal->j_fc_replay_callback)
    245		return 0;
    246
    247	while (next_fc_block <= journal->j_fc_last) {
    248		jbd_debug(3, "Fast commit replay: next block %ld\n",
    249			  next_fc_block);
    250		err = jread(&bh, journal, next_fc_block);
    251		if (err) {
    252			jbd_debug(3, "Fast commit replay: read error\n");
    253			break;
    254		}
    255
    256		err = journal->j_fc_replay_callback(journal, bh, pass,
    257					next_fc_block - journal->j_fc_first,
    258					expected_commit_id);
    259		next_fc_block++;
    260		if (err < 0 || err == JBD2_FC_REPLAY_STOP)
    261			break;
    262		err = 0;
    263	}
    264
    265	if (err)
    266		jbd_debug(3, "Fast commit replay failed, err = %d\n", err);
    267
    268	return err;
    269}
    270
    271/**
    272 * jbd2_journal_recover - recovers a on-disk journal
    273 * @journal: the journal to recover
    274 *
    275 * The primary function for recovering the log contents when mounting a
    276 * journaled device.
    277 *
    278 * Recovery is done in three passes.  In the first pass, we look for the
    279 * end of the log.  In the second, we assemble the list of revoke
    280 * blocks.  In the third and final pass, we replay any un-revoked blocks
    281 * in the log.
    282 */
    283int jbd2_journal_recover(journal_t *journal)
    284{
    285	int			err, err2;
    286	journal_superblock_t *	sb;
    287
    288	struct recovery_info	info;
    289
    290	memset(&info, 0, sizeof(info));
    291	sb = journal->j_superblock;
    292
    293	/*
    294	 * The journal superblock's s_start field (the current log head)
    295	 * is always zero if, and only if, the journal was cleanly
    296	 * unmounted.
    297	 */
    298
    299	if (!sb->s_start) {
    300		jbd_debug(1, "No recovery required, last transaction %d\n",
    301			  be32_to_cpu(sb->s_sequence));
    302		journal->j_transaction_sequence = be32_to_cpu(sb->s_sequence) + 1;
    303		return 0;
    304	}
    305
    306	err = do_one_pass(journal, &info, PASS_SCAN);
    307	if (!err)
    308		err = do_one_pass(journal, &info, PASS_REVOKE);
    309	if (!err)
    310		err = do_one_pass(journal, &info, PASS_REPLAY);
    311
    312	jbd_debug(1, "JBD2: recovery, exit status %d, "
    313		  "recovered transactions %u to %u\n",
    314		  err, info.start_transaction, info.end_transaction);
    315	jbd_debug(1, "JBD2: Replayed %d and revoked %d/%d blocks\n",
    316		  info.nr_replays, info.nr_revoke_hits, info.nr_revokes);
    317
    318	/* Restart the log at the next transaction ID, thus invalidating
    319	 * any existing commit records in the log. */
    320	journal->j_transaction_sequence = ++info.end_transaction;
    321
    322	jbd2_journal_clear_revoke(journal);
    323	err2 = sync_blockdev(journal->j_fs_dev);
    324	if (!err)
    325		err = err2;
    326	/* Make sure all replayed data is on permanent storage */
    327	if (journal->j_flags & JBD2_BARRIER) {
    328		err2 = blkdev_issue_flush(journal->j_fs_dev);
    329		if (!err)
    330			err = err2;
    331	}
    332	return err;
    333}
    334
    335/**
    336 * jbd2_journal_skip_recovery - Start journal and wipe exiting records
    337 * @journal: journal to startup
    338 *
    339 * Locate any valid recovery information from the journal and set up the
    340 * journal structures in memory to ignore it (presumably because the
    341 * caller has evidence that it is out of date).
    342 * This function doesn't appear to be exported..
    343 *
    344 * We perform one pass over the journal to allow us to tell the user how
    345 * much recovery information is being erased, and to let us initialise
    346 * the journal transaction sequence numbers to the next unused ID.
    347 */
    348int jbd2_journal_skip_recovery(journal_t *journal)
    349{
    350	int			err;
    351
    352	struct recovery_info	info;
    353
    354	memset (&info, 0, sizeof(info));
    355
    356	err = do_one_pass(journal, &info, PASS_SCAN);
    357
    358	if (err) {
    359		printk(KERN_ERR "JBD2: error %d scanning journal\n", err);
    360		++journal->j_transaction_sequence;
    361	} else {
    362#ifdef CONFIG_JBD2_DEBUG
    363		int dropped = info.end_transaction - 
    364			be32_to_cpu(journal->j_superblock->s_sequence);
    365		jbd_debug(1,
    366			  "JBD2: ignoring %d transaction%s from the journal.\n",
    367			  dropped, (dropped == 1) ? "" : "s");
    368#endif
    369		journal->j_transaction_sequence = ++info.end_transaction;
    370	}
    371
    372	journal->j_tail = 0;
    373	return err;
    374}
    375
    376static inline unsigned long long read_tag_block(journal_t *journal,
    377						journal_block_tag_t *tag)
    378{
    379	unsigned long long block = be32_to_cpu(tag->t_blocknr);
    380	if (jbd2_has_feature_64bit(journal))
    381		block |= (u64)be32_to_cpu(tag->t_blocknr_high) << 32;
    382	return block;
    383}
    384
    385/*
    386 * calc_chksums calculates the checksums for the blocks described in the
    387 * descriptor block.
    388 */
    389static int calc_chksums(journal_t *journal, struct buffer_head *bh,
    390			unsigned long *next_log_block, __u32 *crc32_sum)
    391{
    392	int i, num_blks, err;
    393	unsigned long io_block;
    394	struct buffer_head *obh;
    395
    396	num_blks = count_tags(journal, bh);
    397	/* Calculate checksum of the descriptor block. */
    398	*crc32_sum = crc32_be(*crc32_sum, (void *)bh->b_data, bh->b_size);
    399
    400	for (i = 0; i < num_blks; i++) {
    401		io_block = (*next_log_block)++;
    402		wrap(journal, *next_log_block);
    403		err = jread(&obh, journal, io_block);
    404		if (err) {
    405			printk(KERN_ERR "JBD2: IO error %d recovering block "
    406				"%lu in log\n", err, io_block);
    407			return 1;
    408		} else {
    409			*crc32_sum = crc32_be(*crc32_sum, (void *)obh->b_data,
    410				     obh->b_size);
    411		}
    412		put_bh(obh);
    413	}
    414	return 0;
    415}
    416
    417static int jbd2_commit_block_csum_verify(journal_t *j, void *buf)
    418{
    419	struct commit_header *h;
    420	__be32 provided;
    421	__u32 calculated;
    422
    423	if (!jbd2_journal_has_csum_v2or3(j))
    424		return 1;
    425
    426	h = buf;
    427	provided = h->h_chksum[0];
    428	h->h_chksum[0] = 0;
    429	calculated = jbd2_chksum(j, j->j_csum_seed, buf, j->j_blocksize);
    430	h->h_chksum[0] = provided;
    431
    432	return provided == cpu_to_be32(calculated);
    433}
    434
    435static int jbd2_block_tag_csum_verify(journal_t *j, journal_block_tag_t *tag,
    436				      journal_block_tag3_t *tag3,
    437				      void *buf, __u32 sequence)
    438{
    439	__u32 csum32;
    440	__be32 seq;
    441
    442	if (!jbd2_journal_has_csum_v2or3(j))
    443		return 1;
    444
    445	seq = cpu_to_be32(sequence);
    446	csum32 = jbd2_chksum(j, j->j_csum_seed, (__u8 *)&seq, sizeof(seq));
    447	csum32 = jbd2_chksum(j, csum32, buf, j->j_blocksize);
    448
    449	if (jbd2_has_feature_csum3(j))
    450		return tag3->t_checksum == cpu_to_be32(csum32);
    451	else
    452		return tag->t_checksum == cpu_to_be16(csum32);
    453}
    454
    455static int do_one_pass(journal_t *journal,
    456			struct recovery_info *info, enum passtype pass)
    457{
    458	unsigned int		first_commit_ID, next_commit_ID;
    459	unsigned long		next_log_block;
    460	int			err, success = 0;
    461	journal_superblock_t *	sb;
    462	journal_header_t *	tmp;
    463	struct buffer_head *	bh;
    464	unsigned int		sequence;
    465	int			blocktype;
    466	int			tag_bytes = journal_tag_bytes(journal);
    467	__u32			crc32_sum = ~0; /* Transactional Checksums */
    468	int			descr_csum_size = 0;
    469	int			block_error = 0;
    470	bool			need_check_commit_time = false;
    471	__u64			last_trans_commit_time = 0, commit_time;
    472
    473	/*
    474	 * First thing is to establish what we expect to find in the log
    475	 * (in terms of transaction IDs), and where (in terms of log
    476	 * block offsets): query the superblock.
    477	 */
    478
    479	sb = journal->j_superblock;
    480	next_commit_ID = be32_to_cpu(sb->s_sequence);
    481	next_log_block = be32_to_cpu(sb->s_start);
    482
    483	first_commit_ID = next_commit_ID;
    484	if (pass == PASS_SCAN)
    485		info->start_transaction = first_commit_ID;
    486
    487	jbd_debug(1, "Starting recovery pass %d\n", pass);
    488
    489	/*
    490	 * Now we walk through the log, transaction by transaction,
    491	 * making sure that each transaction has a commit block in the
    492	 * expected place.  Each complete transaction gets replayed back
    493	 * into the main filesystem.
    494	 */
    495
    496	while (1) {
    497		int			flags;
    498		char *			tagp;
    499		journal_block_tag_t	tag;
    500		struct buffer_head *	obh;
    501		struct buffer_head *	nbh;
    502
    503		cond_resched();
    504
    505		/* If we already know where to stop the log traversal,
    506		 * check right now that we haven't gone past the end of
    507		 * the log. */
    508
    509		if (pass != PASS_SCAN)
    510			if (tid_geq(next_commit_ID, info->end_transaction))
    511				break;
    512
    513		jbd_debug(2, "Scanning for sequence ID %u at %lu/%lu\n",
    514			  next_commit_ID, next_log_block,
    515			  jbd2_has_feature_fast_commit(journal) ?
    516			  journal->j_fc_last : journal->j_last);
    517
    518		/* Skip over each chunk of the transaction looking
    519		 * either the next descriptor block or the final commit
    520		 * record. */
    521
    522		jbd_debug(3, "JBD2: checking block %ld\n", next_log_block);
    523		err = jread(&bh, journal, next_log_block);
    524		if (err)
    525			goto failed;
    526
    527		next_log_block++;
    528		wrap(journal, next_log_block);
    529
    530		/* What kind of buffer is it?
    531		 *
    532		 * If it is a descriptor block, check that it has the
    533		 * expected sequence number.  Otherwise, we're all done
    534		 * here. */
    535
    536		tmp = (journal_header_t *)bh->b_data;
    537
    538		if (tmp->h_magic != cpu_to_be32(JBD2_MAGIC_NUMBER)) {
    539			brelse(bh);
    540			break;
    541		}
    542
    543		blocktype = be32_to_cpu(tmp->h_blocktype);
    544		sequence = be32_to_cpu(tmp->h_sequence);
    545		jbd_debug(3, "Found magic %d, sequence %d\n",
    546			  blocktype, sequence);
    547
    548		if (sequence != next_commit_ID) {
    549			brelse(bh);
    550			break;
    551		}
    552
    553		/* OK, we have a valid descriptor block which matches
    554		 * all of the sequence number checks.  What are we going
    555		 * to do with it?  That depends on the pass... */
    556
    557		switch(blocktype) {
    558		case JBD2_DESCRIPTOR_BLOCK:
    559			/* Verify checksum first */
    560			if (jbd2_journal_has_csum_v2or3(journal))
    561				descr_csum_size =
    562					sizeof(struct jbd2_journal_block_tail);
    563			if (descr_csum_size > 0 &&
    564			    !jbd2_descriptor_block_csum_verify(journal,
    565							       bh->b_data)) {
    566				/*
    567				 * PASS_SCAN can see stale blocks due to lazy
    568				 * journal init. Don't error out on those yet.
    569				 */
    570				if (pass != PASS_SCAN) {
    571					pr_err("JBD2: Invalid checksum recovering block %lu in log\n",
    572					       next_log_block);
    573					err = -EFSBADCRC;
    574					brelse(bh);
    575					goto failed;
    576				}
    577				need_check_commit_time = true;
    578				jbd_debug(1,
    579					"invalid descriptor block found in %lu\n",
    580					next_log_block);
    581			}
    582
    583			/* If it is a valid descriptor block, replay it
    584			 * in pass REPLAY; if journal_checksums enabled, then
    585			 * calculate checksums in PASS_SCAN, otherwise,
    586			 * just skip over the blocks it describes. */
    587			if (pass != PASS_REPLAY) {
    588				if (pass == PASS_SCAN &&
    589				    jbd2_has_feature_checksum(journal) &&
    590				    !need_check_commit_time &&
    591				    !info->end_transaction) {
    592					if (calc_chksums(journal, bh,
    593							&next_log_block,
    594							&crc32_sum)) {
    595						put_bh(bh);
    596						break;
    597					}
    598					put_bh(bh);
    599					continue;
    600				}
    601				next_log_block += count_tags(journal, bh);
    602				wrap(journal, next_log_block);
    603				put_bh(bh);
    604				continue;
    605			}
    606
    607			/* A descriptor block: we can now write all of
    608			 * the data blocks.  Yay, useful work is finally
    609			 * getting done here! */
    610
    611			tagp = &bh->b_data[sizeof(journal_header_t)];
    612			while ((tagp - bh->b_data + tag_bytes)
    613			       <= journal->j_blocksize - descr_csum_size) {
    614				unsigned long io_block;
    615
    616				memcpy(&tag, tagp, sizeof(tag));
    617				flags = be16_to_cpu(tag.t_flags);
    618
    619				io_block = next_log_block++;
    620				wrap(journal, next_log_block);
    621				err = jread(&obh, journal, io_block);
    622				if (err) {
    623					/* Recover what we can, but
    624					 * report failure at the end. */
    625					success = err;
    626					printk(KERN_ERR
    627						"JBD2: IO error %d recovering "
    628						"block %ld in log\n",
    629						err, io_block);
    630				} else {
    631					unsigned long long blocknr;
    632
    633					J_ASSERT(obh != NULL);
    634					blocknr = read_tag_block(journal,
    635								 &tag);
    636
    637					/* If the block has been
    638					 * revoked, then we're all done
    639					 * here. */
    640					if (jbd2_journal_test_revoke
    641					    (journal, blocknr,
    642					     next_commit_ID)) {
    643						brelse(obh);
    644						++info->nr_revoke_hits;
    645						goto skip_write;
    646					}
    647
    648					/* Look for block corruption */
    649					if (!jbd2_block_tag_csum_verify(
    650			journal, &tag, (journal_block_tag3_t *)tagp,
    651			obh->b_data, be32_to_cpu(tmp->h_sequence))) {
    652						brelse(obh);
    653						success = -EFSBADCRC;
    654						printk(KERN_ERR "JBD2: Invalid "
    655						       "checksum recovering "
    656						       "data block %llu in "
    657						       "log\n", blocknr);
    658						block_error = 1;
    659						goto skip_write;
    660					}
    661
    662					/* Find a buffer for the new
    663					 * data being restored */
    664					nbh = __getblk(journal->j_fs_dev,
    665							blocknr,
    666							journal->j_blocksize);
    667					if (nbh == NULL) {
    668						printk(KERN_ERR
    669						       "JBD2: Out of memory "
    670						       "during recovery.\n");
    671						err = -ENOMEM;
    672						brelse(bh);
    673						brelse(obh);
    674						goto failed;
    675					}
    676
    677					lock_buffer(nbh);
    678					memcpy(nbh->b_data, obh->b_data,
    679							journal->j_blocksize);
    680					if (flags & JBD2_FLAG_ESCAPE) {
    681						*((__be32 *)nbh->b_data) =
    682						cpu_to_be32(JBD2_MAGIC_NUMBER);
    683					}
    684
    685					BUFFER_TRACE(nbh, "marking dirty");
    686					set_buffer_uptodate(nbh);
    687					mark_buffer_dirty(nbh);
    688					BUFFER_TRACE(nbh, "marking uptodate");
    689					++info->nr_replays;
    690					/* ll_rw_block(WRITE, 1, &nbh); */
    691					unlock_buffer(nbh);
    692					brelse(obh);
    693					brelse(nbh);
    694				}
    695
    696			skip_write:
    697				tagp += tag_bytes;
    698				if (!(flags & JBD2_FLAG_SAME_UUID))
    699					tagp += 16;
    700
    701				if (flags & JBD2_FLAG_LAST_TAG)
    702					break;
    703			}
    704
    705			brelse(bh);
    706			continue;
    707
    708		case JBD2_COMMIT_BLOCK:
    709			/*     How to differentiate between interrupted commit
    710			 *               and journal corruption ?
    711			 *
    712			 * {nth transaction}
    713			 *        Checksum Verification Failed
    714			 *			 |
    715			 *		 ____________________
    716			 *		|		     |
    717			 * 	async_commit             sync_commit
    718			 *     		|                    |
    719			 *		| GO TO NEXT    "Journal Corruption"
    720			 *		| TRANSACTION
    721			 *		|
    722			 * {(n+1)th transanction}
    723			 *		|
    724			 * 	 _______|______________
    725			 * 	|	 	      |
    726			 * Commit block found	Commit block not found
    727			 *      |		      |
    728			 * "Journal Corruption"       |
    729			 *		 _____________|_________
    730			 *     		|	           	|
    731			 *	nth trans corrupt	OR   nth trans
    732			 *	and (n+1)th interrupted     interrupted
    733			 *	before commit block
    734			 *      could reach the disk.
    735			 *	(Cannot find the difference in above
    736			 *	 mentioned conditions. Hence assume
    737			 *	 "Interrupted Commit".)
    738			 */
    739			commit_time = be64_to_cpu(
    740				((struct commit_header *)bh->b_data)->h_commit_sec);
    741			/*
    742			 * If need_check_commit_time is set, it means we are in
    743			 * PASS_SCAN and csum verify failed before. If
    744			 * commit_time is increasing, it's the same journal,
    745			 * otherwise it is stale journal block, just end this
    746			 * recovery.
    747			 */
    748			if (need_check_commit_time) {
    749				if (commit_time >= last_trans_commit_time) {
    750					pr_err("JBD2: Invalid checksum found in transaction %u\n",
    751					       next_commit_ID);
    752					err = -EFSBADCRC;
    753					brelse(bh);
    754					goto failed;
    755				}
    756			ignore_crc_mismatch:
    757				/*
    758				 * It likely does not belong to same journal,
    759				 * just end this recovery with success.
    760				 */
    761				jbd_debug(1, "JBD2: Invalid checksum ignored in transaction %u, likely stale data\n",
    762					  next_commit_ID);
    763				brelse(bh);
    764				goto done;
    765			}
    766
    767			/*
    768			 * Found an expected commit block: if checksums
    769			 * are present, verify them in PASS_SCAN; else not
    770			 * much to do other than move on to the next sequence
    771			 * number.
    772			 */
    773			if (pass == PASS_SCAN &&
    774			    jbd2_has_feature_checksum(journal)) {
    775				struct commit_header *cbh =
    776					(struct commit_header *)bh->b_data;
    777				unsigned found_chksum =
    778					be32_to_cpu(cbh->h_chksum[0]);
    779
    780				if (info->end_transaction) {
    781					journal->j_failed_commit =
    782						info->end_transaction;
    783					brelse(bh);
    784					break;
    785				}
    786
    787				/* Neither checksum match nor unused? */
    788				if (!((crc32_sum == found_chksum &&
    789				       cbh->h_chksum_type ==
    790						JBD2_CRC32_CHKSUM &&
    791				       cbh->h_chksum_size ==
    792						JBD2_CRC32_CHKSUM_SIZE) ||
    793				      (cbh->h_chksum_type == 0 &&
    794				       cbh->h_chksum_size == 0 &&
    795				       found_chksum == 0)))
    796					goto chksum_error;
    797
    798				crc32_sum = ~0;
    799			}
    800			if (pass == PASS_SCAN &&
    801			    !jbd2_commit_block_csum_verify(journal,
    802							   bh->b_data)) {
    803			chksum_error:
    804				if (commit_time < last_trans_commit_time)
    805					goto ignore_crc_mismatch;
    806				info->end_transaction = next_commit_ID;
    807
    808				if (!jbd2_has_feature_async_commit(journal)) {
    809					journal->j_failed_commit =
    810						next_commit_ID;
    811					brelse(bh);
    812					break;
    813				}
    814			}
    815			if (pass == PASS_SCAN)
    816				last_trans_commit_time = commit_time;
    817			brelse(bh);
    818			next_commit_ID++;
    819			continue;
    820
    821		case JBD2_REVOKE_BLOCK:
    822			/*
    823			 * Check revoke block crc in pass_scan, if csum verify
    824			 * failed, check commit block time later.
    825			 */
    826			if (pass == PASS_SCAN &&
    827			    !jbd2_descriptor_block_csum_verify(journal,
    828							       bh->b_data)) {
    829				jbd_debug(1, "JBD2: invalid revoke block found in %lu\n",
    830					  next_log_block);
    831				need_check_commit_time = true;
    832			}
    833			/* If we aren't in the REVOKE pass, then we can
    834			 * just skip over this block. */
    835			if (pass != PASS_REVOKE) {
    836				brelse(bh);
    837				continue;
    838			}
    839
    840			err = scan_revoke_records(journal, bh,
    841						  next_commit_ID, info);
    842			brelse(bh);
    843			if (err)
    844				goto failed;
    845			continue;
    846
    847		default:
    848			jbd_debug(3, "Unrecognised magic %d, end of scan.\n",
    849				  blocktype);
    850			brelse(bh);
    851			goto done;
    852		}
    853	}
    854
    855 done:
    856	/*
    857	 * We broke out of the log scan loop: either we came to the
    858	 * known end of the log or we found an unexpected block in the
    859	 * log.  If the latter happened, then we know that the "current"
    860	 * transaction marks the end of the valid log.
    861	 */
    862
    863	if (pass == PASS_SCAN) {
    864		if (!info->end_transaction)
    865			info->end_transaction = next_commit_ID;
    866	} else {
    867		/* It's really bad news if different passes end up at
    868		 * different places (but possible due to IO errors). */
    869		if (info->end_transaction != next_commit_ID) {
    870			printk(KERN_ERR "JBD2: recovery pass %d ended at "
    871				"transaction %u, expected %u\n",
    872				pass, next_commit_ID, info->end_transaction);
    873			if (!success)
    874				success = -EIO;
    875		}
    876	}
    877
    878	if (jbd2_has_feature_fast_commit(journal) &&  pass != PASS_REVOKE) {
    879		err = fc_do_one_pass(journal, info, pass);
    880		if (err)
    881			success = err;
    882	}
    883
    884	if (block_error && success == 0)
    885		success = -EIO;
    886	return success;
    887
    888 failed:
    889	return err;
    890}
    891
    892/* Scan a revoke record, marking all blocks mentioned as revoked. */
    893
    894static int scan_revoke_records(journal_t *journal, struct buffer_head *bh,
    895			       tid_t sequence, struct recovery_info *info)
    896{
    897	jbd2_journal_revoke_header_t *header;
    898	int offset, max;
    899	unsigned csum_size = 0;
    900	__u32 rcount;
    901	int record_len = 4;
    902
    903	header = (jbd2_journal_revoke_header_t *) bh->b_data;
    904	offset = sizeof(jbd2_journal_revoke_header_t);
    905	rcount = be32_to_cpu(header->r_count);
    906
    907	if (jbd2_journal_has_csum_v2or3(journal))
    908		csum_size = sizeof(struct jbd2_journal_block_tail);
    909	if (rcount > journal->j_blocksize - csum_size)
    910		return -EINVAL;
    911	max = rcount;
    912
    913	if (jbd2_has_feature_64bit(journal))
    914		record_len = 8;
    915
    916	while (offset + record_len <= max) {
    917		unsigned long long blocknr;
    918		int err;
    919
    920		if (record_len == 4)
    921			blocknr = be32_to_cpu(* ((__be32 *) (bh->b_data+offset)));
    922		else
    923			blocknr = be64_to_cpu(* ((__be64 *) (bh->b_data+offset)));
    924		offset += record_len;
    925		err = jbd2_journal_set_revoke(journal, blocknr, sequence);
    926		if (err)
    927			return err;
    928		++info->nr_revokes;
    929	}
    930	return 0;
    931}