cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

check-integrity.c (88015B)


      1// SPDX-License-Identifier: GPL-2.0
      2/*
      3 * Copyright (C) STRATO AG 2011.  All rights reserved.
      4 */
      5
      6/*
      7 * This module can be used to catch cases when the btrfs kernel
      8 * code executes write requests to the disk that bring the file
      9 * system in an inconsistent state. In such a state, a power-loss
     10 * or kernel panic event would cause that the data on disk is
     11 * lost or at least damaged.
     12 *
     13 * Code is added that examines all block write requests during
     14 * runtime (including writes of the super block). Three rules
     15 * are verified and an error is printed on violation of the
     16 * rules:
     17 * 1. It is not allowed to write a disk block which is
     18 *    currently referenced by the super block (either directly
     19 *    or indirectly).
     20 * 2. When a super block is written, it is verified that all
     21 *    referenced (directly or indirectly) blocks fulfill the
     22 *    following requirements:
     23 *    2a. All referenced blocks have either been present when
     24 *        the file system was mounted, (i.e., they have been
     25 *        referenced by the super block) or they have been
     26 *        written since then and the write completion callback
     27 *        was called and no write error was indicated and a
     28 *        FLUSH request to the device where these blocks are
     29 *        located was received and completed.
     30 *    2b. All referenced blocks need to have a generation
     31 *        number which is equal to the parent's number.
     32 *
     33 * One issue that was found using this module was that the log
     34 * tree on disk became temporarily corrupted because disk blocks
     35 * that had been in use for the log tree had been freed and
     36 * reused too early, while being referenced by the written super
     37 * block.
     38 *
     39 * The search term in the kernel log that can be used to filter
     40 * on the existence of detected integrity issues is
     41 * "btrfs: attempt".
     42 *
     43 * The integrity check is enabled via mount options. These
     44 * mount options are only supported if the integrity check
     45 * tool is compiled by defining BTRFS_FS_CHECK_INTEGRITY.
     46 *
     47 * Example #1, apply integrity checks to all metadata:
     48 * mount /dev/sdb1 /mnt -o check_int
     49 *
     50 * Example #2, apply integrity checks to all metadata and
     51 * to data extents:
     52 * mount /dev/sdb1 /mnt -o check_int_data
     53 *
     54 * Example #3, apply integrity checks to all metadata and dump
     55 * the tree that the super block references to kernel messages
     56 * each time after a super block was written:
     57 * mount /dev/sdb1 /mnt -o check_int,check_int_print_mask=263
     58 *
     59 * If the integrity check tool is included and activated in
     60 * the mount options, plenty of kernel memory is used, and
     61 * plenty of additional CPU cycles are spent. Enabling this
     62 * functionality is not intended for normal use. In most
     63 * cases, unless you are a btrfs developer who needs to verify
     64 * the integrity of (super)-block write requests, do not
     65 * enable the config option BTRFS_FS_CHECK_INTEGRITY to
     66 * include and compile the integrity check tool.
     67 *
     68 * Expect millions of lines of information in the kernel log with an
     69 * enabled check_int_print_mask. Therefore set LOG_BUF_SHIFT in the
     70 * kernel config to at least 26 (which is 64MB). Usually the value is
     71 * limited to 21 (which is 2MB) in init/Kconfig. The file needs to be
     72 * changed like this before LOG_BUF_SHIFT can be set to a high value:
     73 * config LOG_BUF_SHIFT
     74 *       int "Kernel log buffer size (16 => 64KB, 17 => 128KB)"
     75 *       range 12 30
     76 */
     77
     78#include <linux/sched.h>
     79#include <linux/slab.h>
     80#include <linux/mutex.h>
     81#include <linux/blkdev.h>
     82#include <linux/mm.h>
     83#include <linux/string.h>
     84#include <crypto/hash.h>
     85#include "ctree.h"
     86#include "disk-io.h"
     87#include "transaction.h"
     88#include "extent_io.h"
     89#include "volumes.h"
     90#include "print-tree.h"
     91#include "locking.h"
     92#include "check-integrity.h"
     93#include "rcu-string.h"
     94#include "compression.h"
     95
     96#define BTRFSIC_BLOCK_HASHTABLE_SIZE 0x10000
     97#define BTRFSIC_BLOCK_LINK_HASHTABLE_SIZE 0x10000
     98#define BTRFSIC_DEV2STATE_HASHTABLE_SIZE 0x100
     99#define BTRFSIC_BLOCK_MAGIC_NUMBER 0x14491051
    100#define BTRFSIC_BLOCK_LINK_MAGIC_NUMBER 0x11070807
    101#define BTRFSIC_DEV2STATE_MAGIC_NUMBER 0x20111530
    102#define BTRFSIC_BLOCK_STACK_FRAME_MAGIC_NUMBER 20111300
    103#define BTRFSIC_TREE_DUMP_MAX_INDENT_LEVEL (200 - 6)	/* in characters,
    104							 * excluding " [...]" */
    105#define BTRFSIC_GENERATION_UNKNOWN ((u64)-1)
    106
    107/*
    108 * The definition of the bitmask fields for the print_mask.
    109 * They are specified with the mount option check_integrity_print_mask.
    110 */
    111#define BTRFSIC_PRINT_MASK_SUPERBLOCK_WRITE			0x00000001
    112#define BTRFSIC_PRINT_MASK_ROOT_CHUNK_LOG_TREE_LOCATION		0x00000002
    113#define BTRFSIC_PRINT_MASK_TREE_AFTER_SB_WRITE			0x00000004
    114#define BTRFSIC_PRINT_MASK_TREE_BEFORE_SB_WRITE			0x00000008
    115#define BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH			0x00000010
    116#define BTRFSIC_PRINT_MASK_END_IO_BIO_BH			0x00000020
    117#define BTRFSIC_PRINT_MASK_VERBOSE				0x00000040
    118#define BTRFSIC_PRINT_MASK_VERY_VERBOSE				0x00000080
    119#define BTRFSIC_PRINT_MASK_INITIAL_TREE				0x00000100
    120#define BTRFSIC_PRINT_MASK_INITIAL_ALL_TREES			0x00000200
    121#define BTRFSIC_PRINT_MASK_INITIAL_DATABASE			0x00000400
    122#define BTRFSIC_PRINT_MASK_NUM_COPIES				0x00000800
    123#define BTRFSIC_PRINT_MASK_TREE_WITH_ALL_MIRRORS		0x00001000
    124#define BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH_VERBOSE		0x00002000
    125
    126struct btrfsic_dev_state;
    127struct btrfsic_state;
    128
    129struct btrfsic_block {
    130	u32 magic_num;		/* only used for debug purposes */
    131	unsigned int is_metadata:1;	/* if it is meta-data, not data-data */
    132	unsigned int is_superblock:1;	/* if it is one of the superblocks */
    133	unsigned int is_iodone:1;	/* if is done by lower subsystem */
    134	unsigned int iodone_w_error:1;	/* error was indicated to endio */
    135	unsigned int never_written:1;	/* block was added because it was
    136					 * referenced, not because it was
    137					 * written */
    138	unsigned int mirror_num;	/* large enough to hold
    139					 * BTRFS_SUPER_MIRROR_MAX */
    140	struct btrfsic_dev_state *dev_state;
    141	u64 dev_bytenr;		/* key, physical byte num on disk */
    142	u64 logical_bytenr;	/* logical byte num on disk */
    143	u64 generation;
    144	struct btrfs_disk_key disk_key;	/* extra info to print in case of
    145					 * issues, will not always be correct */
    146	struct list_head collision_resolving_node;	/* list node */
    147	struct list_head all_blocks_node;	/* list node */
    148
    149	/* the following two lists contain block_link items */
    150	struct list_head ref_to_list;	/* list */
    151	struct list_head ref_from_list;	/* list */
    152	struct btrfsic_block *next_in_same_bio;
    153	void *orig_bio_private;
    154	bio_end_io_t *orig_bio_end_io;
    155	int submit_bio_bh_rw;
    156	u64 flush_gen; /* only valid if !never_written */
    157};
    158
    159/*
    160 * Elements of this type are allocated dynamically and required because
    161 * each block object can refer to and can be ref from multiple blocks.
    162 * The key to lookup them in the hashtable is the dev_bytenr of
    163 * the block ref to plus the one from the block referred from.
    164 * The fact that they are searchable via a hashtable and that a
    165 * ref_cnt is maintained is not required for the btrfs integrity
    166 * check algorithm itself, it is only used to make the output more
    167 * beautiful in case that an error is detected (an error is defined
    168 * as a write operation to a block while that block is still referenced).
    169 */
    170struct btrfsic_block_link {
    171	u32 magic_num;		/* only used for debug purposes */
    172	u32 ref_cnt;
    173	struct list_head node_ref_to;	/* list node */
    174	struct list_head node_ref_from;	/* list node */
    175	struct list_head collision_resolving_node;	/* list node */
    176	struct btrfsic_block *block_ref_to;
    177	struct btrfsic_block *block_ref_from;
    178	u64 parent_generation;
    179};
    180
    181struct btrfsic_dev_state {
    182	u32 magic_num;		/* only used for debug purposes */
    183	struct block_device *bdev;
    184	struct btrfsic_state *state;
    185	struct list_head collision_resolving_node;	/* list node */
    186	struct btrfsic_block dummy_block_for_bio_bh_flush;
    187	u64 last_flush_gen;
    188};
    189
    190struct btrfsic_block_hashtable {
    191	struct list_head table[BTRFSIC_BLOCK_HASHTABLE_SIZE];
    192};
    193
    194struct btrfsic_block_link_hashtable {
    195	struct list_head table[BTRFSIC_BLOCK_LINK_HASHTABLE_SIZE];
    196};
    197
    198struct btrfsic_dev_state_hashtable {
    199	struct list_head table[BTRFSIC_DEV2STATE_HASHTABLE_SIZE];
    200};
    201
    202struct btrfsic_block_data_ctx {
    203	u64 start;		/* virtual bytenr */
    204	u64 dev_bytenr;		/* physical bytenr on device */
    205	u32 len;
    206	struct btrfsic_dev_state *dev;
    207	char **datav;
    208	struct page **pagev;
    209	void *mem_to_free;
    210};
    211
    212/* This structure is used to implement recursion without occupying
    213 * any stack space, refer to btrfsic_process_metablock() */
    214struct btrfsic_stack_frame {
    215	u32 magic;
    216	u32 nr;
    217	int error;
    218	int i;
    219	int limit_nesting;
    220	int num_copies;
    221	int mirror_num;
    222	struct btrfsic_block *block;
    223	struct btrfsic_block_data_ctx *block_ctx;
    224	struct btrfsic_block *next_block;
    225	struct btrfsic_block_data_ctx next_block_ctx;
    226	struct btrfs_header *hdr;
    227	struct btrfsic_stack_frame *prev;
    228};
    229
    230/* Some state per mounted filesystem */
    231struct btrfsic_state {
    232	u32 print_mask;
    233	int include_extent_data;
    234	struct list_head all_blocks_list;
    235	struct btrfsic_block_hashtable block_hashtable;
    236	struct btrfsic_block_link_hashtable block_link_hashtable;
    237	struct btrfs_fs_info *fs_info;
    238	u64 max_superblock_generation;
    239	struct btrfsic_block *latest_superblock;
    240	u32 metablock_size;
    241	u32 datablock_size;
    242};
    243
    244static int btrfsic_process_metablock(struct btrfsic_state *state,
    245				     struct btrfsic_block *block,
    246				     struct btrfsic_block_data_ctx *block_ctx,
    247				     int limit_nesting, int force_iodone_flag);
    248static void btrfsic_read_from_block_data(
    249	struct btrfsic_block_data_ctx *block_ctx,
    250	void *dst, u32 offset, size_t len);
    251static int btrfsic_create_link_to_next_block(
    252		struct btrfsic_state *state,
    253		struct btrfsic_block *block,
    254		struct btrfsic_block_data_ctx
    255		*block_ctx, u64 next_bytenr,
    256		int limit_nesting,
    257		struct btrfsic_block_data_ctx *next_block_ctx,
    258		struct btrfsic_block **next_blockp,
    259		int force_iodone_flag,
    260		int *num_copiesp, int *mirror_nump,
    261		struct btrfs_disk_key *disk_key,
    262		u64 parent_generation);
    263static int btrfsic_handle_extent_data(struct btrfsic_state *state,
    264				      struct btrfsic_block *block,
    265				      struct btrfsic_block_data_ctx *block_ctx,
    266				      u32 item_offset, int force_iodone_flag);
    267static int btrfsic_map_block(struct btrfsic_state *state, u64 bytenr, u32 len,
    268			     struct btrfsic_block_data_ctx *block_ctx_out,
    269			     int mirror_num);
    270static void btrfsic_release_block_ctx(struct btrfsic_block_data_ctx *block_ctx);
    271static int btrfsic_read_block(struct btrfsic_state *state,
    272			      struct btrfsic_block_data_ctx *block_ctx);
    273static int btrfsic_process_written_superblock(
    274		struct btrfsic_state *state,
    275		struct btrfsic_block *const block,
    276		struct btrfs_super_block *const super_hdr);
    277static void btrfsic_bio_end_io(struct bio *bp);
    278static int btrfsic_is_block_ref_by_superblock(const struct btrfsic_state *state,
    279					      const struct btrfsic_block *block,
    280					      int recursion_level);
    281static int btrfsic_check_all_ref_blocks(struct btrfsic_state *state,
    282					struct btrfsic_block *const block,
    283					int recursion_level);
    284static void btrfsic_print_add_link(const struct btrfsic_state *state,
    285				   const struct btrfsic_block_link *l);
    286static void btrfsic_print_rem_link(const struct btrfsic_state *state,
    287				   const struct btrfsic_block_link *l);
    288static char btrfsic_get_block_type(const struct btrfsic_state *state,
    289				   const struct btrfsic_block *block);
    290static void btrfsic_dump_tree(const struct btrfsic_state *state);
    291static void btrfsic_dump_tree_sub(const struct btrfsic_state *state,
    292				  const struct btrfsic_block *block,
    293				  int indent_level);
    294static struct btrfsic_block_link *btrfsic_block_link_lookup_or_add(
    295		struct btrfsic_state *state,
    296		struct btrfsic_block_data_ctx *next_block_ctx,
    297		struct btrfsic_block *next_block,
    298		struct btrfsic_block *from_block,
    299		u64 parent_generation);
    300static struct btrfsic_block *btrfsic_block_lookup_or_add(
    301		struct btrfsic_state *state,
    302		struct btrfsic_block_data_ctx *block_ctx,
    303		const char *additional_string,
    304		int is_metadata,
    305		int is_iodone,
    306		int never_written,
    307		int mirror_num,
    308		int *was_created);
    309static int btrfsic_process_superblock_dev_mirror(
    310		struct btrfsic_state *state,
    311		struct btrfsic_dev_state *dev_state,
    312		struct btrfs_device *device,
    313		int superblock_mirror_num,
    314		struct btrfsic_dev_state **selected_dev_state,
    315		struct btrfs_super_block *selected_super);
    316static struct btrfsic_dev_state *btrfsic_dev_state_lookup(dev_t dev);
    317static void btrfsic_cmp_log_and_dev_bytenr(struct btrfsic_state *state,
    318					   u64 bytenr,
    319					   struct btrfsic_dev_state *dev_state,
    320					   u64 dev_bytenr);
    321
    322static struct mutex btrfsic_mutex;
    323static int btrfsic_is_initialized;
    324static struct btrfsic_dev_state_hashtable btrfsic_dev_state_hashtable;
    325
    326
    327static void btrfsic_block_init(struct btrfsic_block *b)
    328{
    329	b->magic_num = BTRFSIC_BLOCK_MAGIC_NUMBER;
    330	b->dev_state = NULL;
    331	b->dev_bytenr = 0;
    332	b->logical_bytenr = 0;
    333	b->generation = BTRFSIC_GENERATION_UNKNOWN;
    334	b->disk_key.objectid = 0;
    335	b->disk_key.type = 0;
    336	b->disk_key.offset = 0;
    337	b->is_metadata = 0;
    338	b->is_superblock = 0;
    339	b->is_iodone = 0;
    340	b->iodone_w_error = 0;
    341	b->never_written = 0;
    342	b->mirror_num = 0;
    343	b->next_in_same_bio = NULL;
    344	b->orig_bio_private = NULL;
    345	b->orig_bio_end_io = NULL;
    346	INIT_LIST_HEAD(&b->collision_resolving_node);
    347	INIT_LIST_HEAD(&b->all_blocks_node);
    348	INIT_LIST_HEAD(&b->ref_to_list);
    349	INIT_LIST_HEAD(&b->ref_from_list);
    350	b->submit_bio_bh_rw = 0;
    351	b->flush_gen = 0;
    352}
    353
    354static struct btrfsic_block *btrfsic_block_alloc(void)
    355{
    356	struct btrfsic_block *b;
    357
    358	b = kzalloc(sizeof(*b), GFP_NOFS);
    359	if (NULL != b)
    360		btrfsic_block_init(b);
    361
    362	return b;
    363}
    364
    365static void btrfsic_block_free(struct btrfsic_block *b)
    366{
    367	BUG_ON(!(NULL == b || BTRFSIC_BLOCK_MAGIC_NUMBER == b->magic_num));
    368	kfree(b);
    369}
    370
    371static void btrfsic_block_link_init(struct btrfsic_block_link *l)
    372{
    373	l->magic_num = BTRFSIC_BLOCK_LINK_MAGIC_NUMBER;
    374	l->ref_cnt = 1;
    375	INIT_LIST_HEAD(&l->node_ref_to);
    376	INIT_LIST_HEAD(&l->node_ref_from);
    377	INIT_LIST_HEAD(&l->collision_resolving_node);
    378	l->block_ref_to = NULL;
    379	l->block_ref_from = NULL;
    380}
    381
    382static struct btrfsic_block_link *btrfsic_block_link_alloc(void)
    383{
    384	struct btrfsic_block_link *l;
    385
    386	l = kzalloc(sizeof(*l), GFP_NOFS);
    387	if (NULL != l)
    388		btrfsic_block_link_init(l);
    389
    390	return l;
    391}
    392
    393static void btrfsic_block_link_free(struct btrfsic_block_link *l)
    394{
    395	BUG_ON(!(NULL == l || BTRFSIC_BLOCK_LINK_MAGIC_NUMBER == l->magic_num));
    396	kfree(l);
    397}
    398
    399static void btrfsic_dev_state_init(struct btrfsic_dev_state *ds)
    400{
    401	ds->magic_num = BTRFSIC_DEV2STATE_MAGIC_NUMBER;
    402	ds->bdev = NULL;
    403	ds->state = NULL;
    404	INIT_LIST_HEAD(&ds->collision_resolving_node);
    405	ds->last_flush_gen = 0;
    406	btrfsic_block_init(&ds->dummy_block_for_bio_bh_flush);
    407	ds->dummy_block_for_bio_bh_flush.is_iodone = 1;
    408	ds->dummy_block_for_bio_bh_flush.dev_state = ds;
    409}
    410
    411static struct btrfsic_dev_state *btrfsic_dev_state_alloc(void)
    412{
    413	struct btrfsic_dev_state *ds;
    414
    415	ds = kzalloc(sizeof(*ds), GFP_NOFS);
    416	if (NULL != ds)
    417		btrfsic_dev_state_init(ds);
    418
    419	return ds;
    420}
    421
    422static void btrfsic_dev_state_free(struct btrfsic_dev_state *ds)
    423{
    424	BUG_ON(!(NULL == ds ||
    425		 BTRFSIC_DEV2STATE_MAGIC_NUMBER == ds->magic_num));
    426	kfree(ds);
    427}
    428
    429static void btrfsic_block_hashtable_init(struct btrfsic_block_hashtable *h)
    430{
    431	int i;
    432
    433	for (i = 0; i < BTRFSIC_BLOCK_HASHTABLE_SIZE; i++)
    434		INIT_LIST_HEAD(h->table + i);
    435}
    436
    437static void btrfsic_block_hashtable_add(struct btrfsic_block *b,
    438					struct btrfsic_block_hashtable *h)
    439{
    440	const unsigned int hashval =
    441	    (((unsigned int)(b->dev_bytenr >> 16)) ^
    442	     ((unsigned int)((uintptr_t)b->dev_state->bdev))) &
    443	     (BTRFSIC_BLOCK_HASHTABLE_SIZE - 1);
    444
    445	list_add(&b->collision_resolving_node, h->table + hashval);
    446}
    447
    448static void btrfsic_block_hashtable_remove(struct btrfsic_block *b)
    449{
    450	list_del(&b->collision_resolving_node);
    451}
    452
    453static struct btrfsic_block *btrfsic_block_hashtable_lookup(
    454		struct block_device *bdev,
    455		u64 dev_bytenr,
    456		struct btrfsic_block_hashtable *h)
    457{
    458	const unsigned int hashval =
    459	    (((unsigned int)(dev_bytenr >> 16)) ^
    460	     ((unsigned int)((uintptr_t)bdev))) &
    461	     (BTRFSIC_BLOCK_HASHTABLE_SIZE - 1);
    462	struct btrfsic_block *b;
    463
    464	list_for_each_entry(b, h->table + hashval, collision_resolving_node) {
    465		if (b->dev_state->bdev == bdev && b->dev_bytenr == dev_bytenr)
    466			return b;
    467	}
    468
    469	return NULL;
    470}
    471
    472static void btrfsic_block_link_hashtable_init(
    473		struct btrfsic_block_link_hashtable *h)
    474{
    475	int i;
    476
    477	for (i = 0; i < BTRFSIC_BLOCK_LINK_HASHTABLE_SIZE; i++)
    478		INIT_LIST_HEAD(h->table + i);
    479}
    480
    481static void btrfsic_block_link_hashtable_add(
    482		struct btrfsic_block_link *l,
    483		struct btrfsic_block_link_hashtable *h)
    484{
    485	const unsigned int hashval =
    486	    (((unsigned int)(l->block_ref_to->dev_bytenr >> 16)) ^
    487	     ((unsigned int)(l->block_ref_from->dev_bytenr >> 16)) ^
    488	     ((unsigned int)((uintptr_t)l->block_ref_to->dev_state->bdev)) ^
    489	     ((unsigned int)((uintptr_t)l->block_ref_from->dev_state->bdev)))
    490	     & (BTRFSIC_BLOCK_LINK_HASHTABLE_SIZE - 1);
    491
    492	BUG_ON(NULL == l->block_ref_to);
    493	BUG_ON(NULL == l->block_ref_from);
    494	list_add(&l->collision_resolving_node, h->table + hashval);
    495}
    496
    497static void btrfsic_block_link_hashtable_remove(struct btrfsic_block_link *l)
    498{
    499	list_del(&l->collision_resolving_node);
    500}
    501
    502static struct btrfsic_block_link *btrfsic_block_link_hashtable_lookup(
    503		struct block_device *bdev_ref_to,
    504		u64 dev_bytenr_ref_to,
    505		struct block_device *bdev_ref_from,
    506		u64 dev_bytenr_ref_from,
    507		struct btrfsic_block_link_hashtable *h)
    508{
    509	const unsigned int hashval =
    510	    (((unsigned int)(dev_bytenr_ref_to >> 16)) ^
    511	     ((unsigned int)(dev_bytenr_ref_from >> 16)) ^
    512	     ((unsigned int)((uintptr_t)bdev_ref_to)) ^
    513	     ((unsigned int)((uintptr_t)bdev_ref_from))) &
    514	     (BTRFSIC_BLOCK_LINK_HASHTABLE_SIZE - 1);
    515	struct btrfsic_block_link *l;
    516
    517	list_for_each_entry(l, h->table + hashval, collision_resolving_node) {
    518		BUG_ON(NULL == l->block_ref_to);
    519		BUG_ON(NULL == l->block_ref_from);
    520		if (l->block_ref_to->dev_state->bdev == bdev_ref_to &&
    521		    l->block_ref_to->dev_bytenr == dev_bytenr_ref_to &&
    522		    l->block_ref_from->dev_state->bdev == bdev_ref_from &&
    523		    l->block_ref_from->dev_bytenr == dev_bytenr_ref_from)
    524			return l;
    525	}
    526
    527	return NULL;
    528}
    529
    530static void btrfsic_dev_state_hashtable_init(
    531		struct btrfsic_dev_state_hashtable *h)
    532{
    533	int i;
    534
    535	for (i = 0; i < BTRFSIC_DEV2STATE_HASHTABLE_SIZE; i++)
    536		INIT_LIST_HEAD(h->table + i);
    537}
    538
    539static void btrfsic_dev_state_hashtable_add(
    540		struct btrfsic_dev_state *ds,
    541		struct btrfsic_dev_state_hashtable *h)
    542{
    543	const unsigned int hashval =
    544	    (((unsigned int)((uintptr_t)ds->bdev->bd_dev)) &
    545	     (BTRFSIC_DEV2STATE_HASHTABLE_SIZE - 1));
    546
    547	list_add(&ds->collision_resolving_node, h->table + hashval);
    548}
    549
    550static void btrfsic_dev_state_hashtable_remove(struct btrfsic_dev_state *ds)
    551{
    552	list_del(&ds->collision_resolving_node);
    553}
    554
    555static struct btrfsic_dev_state *btrfsic_dev_state_hashtable_lookup(dev_t dev,
    556		struct btrfsic_dev_state_hashtable *h)
    557{
    558	const unsigned int hashval =
    559		dev & (BTRFSIC_DEV2STATE_HASHTABLE_SIZE - 1);
    560	struct btrfsic_dev_state *ds;
    561
    562	list_for_each_entry(ds, h->table + hashval, collision_resolving_node) {
    563		if (ds->bdev->bd_dev == dev)
    564			return ds;
    565	}
    566
    567	return NULL;
    568}
    569
    570static int btrfsic_process_superblock(struct btrfsic_state *state,
    571				      struct btrfs_fs_devices *fs_devices)
    572{
    573	struct btrfs_super_block *selected_super;
    574	struct list_head *dev_head = &fs_devices->devices;
    575	struct btrfs_device *device;
    576	struct btrfsic_dev_state *selected_dev_state = NULL;
    577	int ret = 0;
    578	int pass;
    579
    580	selected_super = kzalloc(sizeof(*selected_super), GFP_NOFS);
    581	if (!selected_super)
    582		return -ENOMEM;
    583
    584	list_for_each_entry(device, dev_head, dev_list) {
    585		int i;
    586		struct btrfsic_dev_state *dev_state;
    587
    588		if (!device->bdev || !device->name)
    589			continue;
    590
    591		dev_state = btrfsic_dev_state_lookup(device->bdev->bd_dev);
    592		BUG_ON(NULL == dev_state);
    593		for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
    594			ret = btrfsic_process_superblock_dev_mirror(
    595					state, dev_state, device, i,
    596					&selected_dev_state, selected_super);
    597			if (0 != ret && 0 == i) {
    598				kfree(selected_super);
    599				return ret;
    600			}
    601		}
    602	}
    603
    604	if (NULL == state->latest_superblock) {
    605		pr_info("btrfsic: no superblock found!\n");
    606		kfree(selected_super);
    607		return -1;
    608	}
    609
    610	for (pass = 0; pass < 3; pass++) {
    611		int num_copies;
    612		int mirror_num;
    613		u64 next_bytenr;
    614
    615		switch (pass) {
    616		case 0:
    617			next_bytenr = btrfs_super_root(selected_super);
    618			if (state->print_mask &
    619			    BTRFSIC_PRINT_MASK_ROOT_CHUNK_LOG_TREE_LOCATION)
    620				pr_info("root@%llu\n", next_bytenr);
    621			break;
    622		case 1:
    623			next_bytenr = btrfs_super_chunk_root(selected_super);
    624			if (state->print_mask &
    625			    BTRFSIC_PRINT_MASK_ROOT_CHUNK_LOG_TREE_LOCATION)
    626				pr_info("chunk@%llu\n", next_bytenr);
    627			break;
    628		case 2:
    629			next_bytenr = btrfs_super_log_root(selected_super);
    630			if (0 == next_bytenr)
    631				continue;
    632			if (state->print_mask &
    633			    BTRFSIC_PRINT_MASK_ROOT_CHUNK_LOG_TREE_LOCATION)
    634				pr_info("log@%llu\n", next_bytenr);
    635			break;
    636		}
    637
    638		num_copies = btrfs_num_copies(state->fs_info, next_bytenr,
    639					      state->metablock_size);
    640		if (state->print_mask & BTRFSIC_PRINT_MASK_NUM_COPIES)
    641			pr_info("num_copies(log_bytenr=%llu) = %d\n",
    642			       next_bytenr, num_copies);
    643
    644		for (mirror_num = 1; mirror_num <= num_copies; mirror_num++) {
    645			struct btrfsic_block *next_block;
    646			struct btrfsic_block_data_ctx tmp_next_block_ctx;
    647			struct btrfsic_block_link *l;
    648
    649			ret = btrfsic_map_block(state, next_bytenr,
    650						state->metablock_size,
    651						&tmp_next_block_ctx,
    652						mirror_num);
    653			if (ret) {
    654				pr_info("btrfsic: btrfsic_map_block(root @%llu, mirror %d) failed!\n",
    655				       next_bytenr, mirror_num);
    656				kfree(selected_super);
    657				return -1;
    658			}
    659
    660			next_block = btrfsic_block_hashtable_lookup(
    661					tmp_next_block_ctx.dev->bdev,
    662					tmp_next_block_ctx.dev_bytenr,
    663					&state->block_hashtable);
    664			BUG_ON(NULL == next_block);
    665
    666			l = btrfsic_block_link_hashtable_lookup(
    667					tmp_next_block_ctx.dev->bdev,
    668					tmp_next_block_ctx.dev_bytenr,
    669					state->latest_superblock->dev_state->
    670					bdev,
    671					state->latest_superblock->dev_bytenr,
    672					&state->block_link_hashtable);
    673			BUG_ON(NULL == l);
    674
    675			ret = btrfsic_read_block(state, &tmp_next_block_ctx);
    676			if (ret < (int)PAGE_SIZE) {
    677				pr_info("btrfsic: read @logical %llu failed!\n",
    678				       tmp_next_block_ctx.start);
    679				btrfsic_release_block_ctx(&tmp_next_block_ctx);
    680				kfree(selected_super);
    681				return -1;
    682			}
    683
    684			ret = btrfsic_process_metablock(state,
    685							next_block,
    686							&tmp_next_block_ctx,
    687							BTRFS_MAX_LEVEL + 3, 1);
    688			btrfsic_release_block_ctx(&tmp_next_block_ctx);
    689		}
    690	}
    691
    692	kfree(selected_super);
    693	return ret;
    694}
    695
    696static int btrfsic_process_superblock_dev_mirror(
    697		struct btrfsic_state *state,
    698		struct btrfsic_dev_state *dev_state,
    699		struct btrfs_device *device,
    700		int superblock_mirror_num,
    701		struct btrfsic_dev_state **selected_dev_state,
    702		struct btrfs_super_block *selected_super)
    703{
    704	struct btrfs_fs_info *fs_info = state->fs_info;
    705	struct btrfs_super_block *super_tmp;
    706	u64 dev_bytenr;
    707	struct btrfsic_block *superblock_tmp;
    708	int pass;
    709	struct block_device *const superblock_bdev = device->bdev;
    710	struct page *page;
    711	struct address_space *mapping = superblock_bdev->bd_inode->i_mapping;
    712	int ret = 0;
    713
    714	/* super block bytenr is always the unmapped device bytenr */
    715	dev_bytenr = btrfs_sb_offset(superblock_mirror_num);
    716	if (dev_bytenr + BTRFS_SUPER_INFO_SIZE > device->commit_total_bytes)
    717		return -1;
    718
    719	page = read_cache_page_gfp(mapping, dev_bytenr >> PAGE_SHIFT, GFP_NOFS);
    720	if (IS_ERR(page))
    721		return -1;
    722
    723	super_tmp = page_address(page);
    724
    725	if (btrfs_super_bytenr(super_tmp) != dev_bytenr ||
    726	    btrfs_super_magic(super_tmp) != BTRFS_MAGIC ||
    727	    memcmp(device->uuid, super_tmp->dev_item.uuid, BTRFS_UUID_SIZE) ||
    728	    btrfs_super_nodesize(super_tmp) != state->metablock_size ||
    729	    btrfs_super_sectorsize(super_tmp) != state->datablock_size) {
    730		ret = 0;
    731		goto out;
    732	}
    733
    734	superblock_tmp =
    735	    btrfsic_block_hashtable_lookup(superblock_bdev,
    736					   dev_bytenr,
    737					   &state->block_hashtable);
    738	if (NULL == superblock_tmp) {
    739		superblock_tmp = btrfsic_block_alloc();
    740		if (NULL == superblock_tmp) {
    741			ret = -1;
    742			goto out;
    743		}
    744		/* for superblock, only the dev_bytenr makes sense */
    745		superblock_tmp->dev_bytenr = dev_bytenr;
    746		superblock_tmp->dev_state = dev_state;
    747		superblock_tmp->logical_bytenr = dev_bytenr;
    748		superblock_tmp->generation = btrfs_super_generation(super_tmp);
    749		superblock_tmp->is_metadata = 1;
    750		superblock_tmp->is_superblock = 1;
    751		superblock_tmp->is_iodone = 1;
    752		superblock_tmp->never_written = 0;
    753		superblock_tmp->mirror_num = 1 + superblock_mirror_num;
    754		if (state->print_mask & BTRFSIC_PRINT_MASK_SUPERBLOCK_WRITE)
    755			btrfs_info_in_rcu(fs_info,
    756			"new initial S-block (bdev %p, %s) @%llu (%pg/%llu/%d)",
    757				     superblock_bdev,
    758				     rcu_str_deref(device->name), dev_bytenr,
    759				     dev_state->bdev, dev_bytenr,
    760				     superblock_mirror_num);
    761		list_add(&superblock_tmp->all_blocks_node,
    762			 &state->all_blocks_list);
    763		btrfsic_block_hashtable_add(superblock_tmp,
    764					    &state->block_hashtable);
    765	}
    766
    767	/* select the one with the highest generation field */
    768	if (btrfs_super_generation(super_tmp) >
    769	    state->max_superblock_generation ||
    770	    0 == state->max_superblock_generation) {
    771		memcpy(selected_super, super_tmp, sizeof(*selected_super));
    772		*selected_dev_state = dev_state;
    773		state->max_superblock_generation =
    774		    btrfs_super_generation(super_tmp);
    775		state->latest_superblock = superblock_tmp;
    776	}
    777
    778	for (pass = 0; pass < 3; pass++) {
    779		u64 next_bytenr;
    780		int num_copies;
    781		int mirror_num;
    782		const char *additional_string = NULL;
    783		struct btrfs_disk_key tmp_disk_key;
    784
    785		tmp_disk_key.type = BTRFS_ROOT_ITEM_KEY;
    786		tmp_disk_key.offset = 0;
    787		switch (pass) {
    788		case 0:
    789			btrfs_set_disk_key_objectid(&tmp_disk_key,
    790						    BTRFS_ROOT_TREE_OBJECTID);
    791			additional_string = "initial root ";
    792			next_bytenr = btrfs_super_root(super_tmp);
    793			break;
    794		case 1:
    795			btrfs_set_disk_key_objectid(&tmp_disk_key,
    796						    BTRFS_CHUNK_TREE_OBJECTID);
    797			additional_string = "initial chunk ";
    798			next_bytenr = btrfs_super_chunk_root(super_tmp);
    799			break;
    800		case 2:
    801			btrfs_set_disk_key_objectid(&tmp_disk_key,
    802						    BTRFS_TREE_LOG_OBJECTID);
    803			additional_string = "initial log ";
    804			next_bytenr = btrfs_super_log_root(super_tmp);
    805			if (0 == next_bytenr)
    806				continue;
    807			break;
    808		}
    809
    810		num_copies = btrfs_num_copies(fs_info, next_bytenr,
    811					      state->metablock_size);
    812		if (state->print_mask & BTRFSIC_PRINT_MASK_NUM_COPIES)
    813			pr_info("num_copies(log_bytenr=%llu) = %d\n",
    814			       next_bytenr, num_copies);
    815		for (mirror_num = 1; mirror_num <= num_copies; mirror_num++) {
    816			struct btrfsic_block *next_block;
    817			struct btrfsic_block_data_ctx tmp_next_block_ctx;
    818			struct btrfsic_block_link *l;
    819
    820			if (btrfsic_map_block(state, next_bytenr,
    821					      state->metablock_size,
    822					      &tmp_next_block_ctx,
    823					      mirror_num)) {
    824				pr_info("btrfsic: btrfsic_map_block(bytenr @%llu, mirror %d) failed!\n",
    825				       next_bytenr, mirror_num);
    826				ret = -1;
    827				goto out;
    828			}
    829
    830			next_block = btrfsic_block_lookup_or_add(
    831					state, &tmp_next_block_ctx,
    832					additional_string, 1, 1, 0,
    833					mirror_num, NULL);
    834			if (NULL == next_block) {
    835				btrfsic_release_block_ctx(&tmp_next_block_ctx);
    836				ret = -1;
    837				goto out;
    838			}
    839
    840			next_block->disk_key = tmp_disk_key;
    841			next_block->generation = BTRFSIC_GENERATION_UNKNOWN;
    842			l = btrfsic_block_link_lookup_or_add(
    843					state, &tmp_next_block_ctx,
    844					next_block, superblock_tmp,
    845					BTRFSIC_GENERATION_UNKNOWN);
    846			btrfsic_release_block_ctx(&tmp_next_block_ctx);
    847			if (NULL == l) {
    848				ret = -1;
    849				goto out;
    850			}
    851		}
    852	}
    853	if (state->print_mask & BTRFSIC_PRINT_MASK_INITIAL_ALL_TREES)
    854		btrfsic_dump_tree_sub(state, superblock_tmp, 0);
    855
    856out:
    857	put_page(page);
    858	return ret;
    859}
    860
    861static struct btrfsic_stack_frame *btrfsic_stack_frame_alloc(void)
    862{
    863	struct btrfsic_stack_frame *sf;
    864
    865	sf = kzalloc(sizeof(*sf), GFP_NOFS);
    866	if (sf)
    867		sf->magic = BTRFSIC_BLOCK_STACK_FRAME_MAGIC_NUMBER;
    868	return sf;
    869}
    870
    871static void btrfsic_stack_frame_free(struct btrfsic_stack_frame *sf)
    872{
    873	BUG_ON(!(NULL == sf ||
    874		 BTRFSIC_BLOCK_STACK_FRAME_MAGIC_NUMBER == sf->magic));
    875	kfree(sf);
    876}
    877
    878static noinline_for_stack int btrfsic_process_metablock(
    879		struct btrfsic_state *state,
    880		struct btrfsic_block *const first_block,
    881		struct btrfsic_block_data_ctx *const first_block_ctx,
    882		int first_limit_nesting, int force_iodone_flag)
    883{
    884	struct btrfsic_stack_frame initial_stack_frame = { 0 };
    885	struct btrfsic_stack_frame *sf;
    886	struct btrfsic_stack_frame *next_stack;
    887	struct btrfs_header *const first_hdr =
    888		(struct btrfs_header *)first_block_ctx->datav[0];
    889
    890	BUG_ON(!first_hdr);
    891	sf = &initial_stack_frame;
    892	sf->error = 0;
    893	sf->i = -1;
    894	sf->limit_nesting = first_limit_nesting;
    895	sf->block = first_block;
    896	sf->block_ctx = first_block_ctx;
    897	sf->next_block = NULL;
    898	sf->hdr = first_hdr;
    899	sf->prev = NULL;
    900
    901continue_with_new_stack_frame:
    902	sf->block->generation = btrfs_stack_header_generation(sf->hdr);
    903	if (0 == sf->hdr->level) {
    904		struct btrfs_leaf *const leafhdr =
    905		    (struct btrfs_leaf *)sf->hdr;
    906
    907		if (-1 == sf->i) {
    908			sf->nr = btrfs_stack_header_nritems(&leafhdr->header);
    909
    910			if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
    911				pr_info("leaf %llu items %d generation %llu owner %llu\n",
    912				       sf->block_ctx->start, sf->nr,
    913				       btrfs_stack_header_generation(
    914					       &leafhdr->header),
    915				       btrfs_stack_header_owner(
    916					       &leafhdr->header));
    917		}
    918
    919continue_with_current_leaf_stack_frame:
    920		if (0 == sf->num_copies || sf->mirror_num > sf->num_copies) {
    921			sf->i++;
    922			sf->num_copies = 0;
    923		}
    924
    925		if (sf->i < sf->nr) {
    926			struct btrfs_item disk_item;
    927			u32 disk_item_offset =
    928				(uintptr_t)(leafhdr->items + sf->i) -
    929				(uintptr_t)leafhdr;
    930			struct btrfs_disk_key *disk_key;
    931			u8 type;
    932			u32 item_offset;
    933			u32 item_size;
    934
    935			if (disk_item_offset + sizeof(struct btrfs_item) >
    936			    sf->block_ctx->len) {
    937leaf_item_out_of_bounce_error:
    938				pr_info(
    939		"btrfsic: leaf item out of bounce at logical %llu, dev %pg\n",
    940				       sf->block_ctx->start,
    941				       sf->block_ctx->dev->bdev);
    942				goto one_stack_frame_backwards;
    943			}
    944			btrfsic_read_from_block_data(sf->block_ctx,
    945						     &disk_item,
    946						     disk_item_offset,
    947						     sizeof(struct btrfs_item));
    948			item_offset = btrfs_stack_item_offset(&disk_item);
    949			item_size = btrfs_stack_item_size(&disk_item);
    950			disk_key = &disk_item.key;
    951			type = btrfs_disk_key_type(disk_key);
    952
    953			if (BTRFS_ROOT_ITEM_KEY == type) {
    954				struct btrfs_root_item root_item;
    955				u32 root_item_offset;
    956				u64 next_bytenr;
    957
    958				root_item_offset = item_offset +
    959					offsetof(struct btrfs_leaf, items);
    960				if (root_item_offset + item_size >
    961				    sf->block_ctx->len)
    962					goto leaf_item_out_of_bounce_error;
    963				btrfsic_read_from_block_data(
    964					sf->block_ctx, &root_item,
    965					root_item_offset,
    966					item_size);
    967				next_bytenr = btrfs_root_bytenr(&root_item);
    968
    969				sf->error =
    970				    btrfsic_create_link_to_next_block(
    971						state,
    972						sf->block,
    973						sf->block_ctx,
    974						next_bytenr,
    975						sf->limit_nesting,
    976						&sf->next_block_ctx,
    977						&sf->next_block,
    978						force_iodone_flag,
    979						&sf->num_copies,
    980						&sf->mirror_num,
    981						disk_key,
    982						btrfs_root_generation(
    983						&root_item));
    984				if (sf->error)
    985					goto one_stack_frame_backwards;
    986
    987				if (NULL != sf->next_block) {
    988					struct btrfs_header *const next_hdr =
    989					    (struct btrfs_header *)
    990					    sf->next_block_ctx.datav[0];
    991
    992					next_stack =
    993					    btrfsic_stack_frame_alloc();
    994					if (NULL == next_stack) {
    995						sf->error = -1;
    996						btrfsic_release_block_ctx(
    997								&sf->
    998								next_block_ctx);
    999						goto one_stack_frame_backwards;
   1000					}
   1001
   1002					next_stack->i = -1;
   1003					next_stack->block = sf->next_block;
   1004					next_stack->block_ctx =
   1005					    &sf->next_block_ctx;
   1006					next_stack->next_block = NULL;
   1007					next_stack->hdr = next_hdr;
   1008					next_stack->limit_nesting =
   1009					    sf->limit_nesting - 1;
   1010					next_stack->prev = sf;
   1011					sf = next_stack;
   1012					goto continue_with_new_stack_frame;
   1013				}
   1014			} else if (BTRFS_EXTENT_DATA_KEY == type &&
   1015				   state->include_extent_data) {
   1016				sf->error = btrfsic_handle_extent_data(
   1017						state,
   1018						sf->block,
   1019						sf->block_ctx,
   1020						item_offset,
   1021						force_iodone_flag);
   1022				if (sf->error)
   1023					goto one_stack_frame_backwards;
   1024			}
   1025
   1026			goto continue_with_current_leaf_stack_frame;
   1027		}
   1028	} else {
   1029		struct btrfs_node *const nodehdr = (struct btrfs_node *)sf->hdr;
   1030
   1031		if (-1 == sf->i) {
   1032			sf->nr = btrfs_stack_header_nritems(&nodehdr->header);
   1033
   1034			if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
   1035				pr_info("node %llu level %d items %d generation %llu owner %llu\n",
   1036				       sf->block_ctx->start,
   1037				       nodehdr->header.level, sf->nr,
   1038				       btrfs_stack_header_generation(
   1039				       &nodehdr->header),
   1040				       btrfs_stack_header_owner(
   1041				       &nodehdr->header));
   1042		}
   1043
   1044continue_with_current_node_stack_frame:
   1045		if (0 == sf->num_copies || sf->mirror_num > sf->num_copies) {
   1046			sf->i++;
   1047			sf->num_copies = 0;
   1048		}
   1049
   1050		if (sf->i < sf->nr) {
   1051			struct btrfs_key_ptr key_ptr;
   1052			u32 key_ptr_offset;
   1053			u64 next_bytenr;
   1054
   1055			key_ptr_offset = (uintptr_t)(nodehdr->ptrs + sf->i) -
   1056					  (uintptr_t)nodehdr;
   1057			if (key_ptr_offset + sizeof(struct btrfs_key_ptr) >
   1058			    sf->block_ctx->len) {
   1059				pr_info(
   1060		"btrfsic: node item out of bounce at logical %llu, dev %pg\n",
   1061				       sf->block_ctx->start,
   1062				       sf->block_ctx->dev->bdev);
   1063				goto one_stack_frame_backwards;
   1064			}
   1065			btrfsic_read_from_block_data(
   1066				sf->block_ctx, &key_ptr, key_ptr_offset,
   1067				sizeof(struct btrfs_key_ptr));
   1068			next_bytenr = btrfs_stack_key_blockptr(&key_ptr);
   1069
   1070			sf->error = btrfsic_create_link_to_next_block(
   1071					state,
   1072					sf->block,
   1073					sf->block_ctx,
   1074					next_bytenr,
   1075					sf->limit_nesting,
   1076					&sf->next_block_ctx,
   1077					&sf->next_block,
   1078					force_iodone_flag,
   1079					&sf->num_copies,
   1080					&sf->mirror_num,
   1081					&key_ptr.key,
   1082					btrfs_stack_key_generation(&key_ptr));
   1083			if (sf->error)
   1084				goto one_stack_frame_backwards;
   1085
   1086			if (NULL != sf->next_block) {
   1087				struct btrfs_header *const next_hdr =
   1088				    (struct btrfs_header *)
   1089				    sf->next_block_ctx.datav[0];
   1090
   1091				next_stack = btrfsic_stack_frame_alloc();
   1092				if (NULL == next_stack) {
   1093					sf->error = -1;
   1094					goto one_stack_frame_backwards;
   1095				}
   1096
   1097				next_stack->i = -1;
   1098				next_stack->block = sf->next_block;
   1099				next_stack->block_ctx = &sf->next_block_ctx;
   1100				next_stack->next_block = NULL;
   1101				next_stack->hdr = next_hdr;
   1102				next_stack->limit_nesting =
   1103				    sf->limit_nesting - 1;
   1104				next_stack->prev = sf;
   1105				sf = next_stack;
   1106				goto continue_with_new_stack_frame;
   1107			}
   1108
   1109			goto continue_with_current_node_stack_frame;
   1110		}
   1111	}
   1112
   1113one_stack_frame_backwards:
   1114	if (NULL != sf->prev) {
   1115		struct btrfsic_stack_frame *const prev = sf->prev;
   1116
   1117		/* the one for the initial block is freed in the caller */
   1118		btrfsic_release_block_ctx(sf->block_ctx);
   1119
   1120		if (sf->error) {
   1121			prev->error = sf->error;
   1122			btrfsic_stack_frame_free(sf);
   1123			sf = prev;
   1124			goto one_stack_frame_backwards;
   1125		}
   1126
   1127		btrfsic_stack_frame_free(sf);
   1128		sf = prev;
   1129		goto continue_with_new_stack_frame;
   1130	} else {
   1131		BUG_ON(&initial_stack_frame != sf);
   1132	}
   1133
   1134	return sf->error;
   1135}
   1136
   1137static void btrfsic_read_from_block_data(
   1138	struct btrfsic_block_data_ctx *block_ctx,
   1139	void *dstv, u32 offset, size_t len)
   1140{
   1141	size_t cur;
   1142	size_t pgoff;
   1143	char *kaddr;
   1144	char *dst = (char *)dstv;
   1145	size_t start_offset = offset_in_page(block_ctx->start);
   1146	unsigned long i = (start_offset + offset) >> PAGE_SHIFT;
   1147
   1148	WARN_ON(offset + len > block_ctx->len);
   1149	pgoff = offset_in_page(start_offset + offset);
   1150
   1151	while (len > 0) {
   1152		cur = min(len, ((size_t)PAGE_SIZE - pgoff));
   1153		BUG_ON(i >= DIV_ROUND_UP(block_ctx->len, PAGE_SIZE));
   1154		kaddr = block_ctx->datav[i];
   1155		memcpy(dst, kaddr + pgoff, cur);
   1156
   1157		dst += cur;
   1158		len -= cur;
   1159		pgoff = 0;
   1160		i++;
   1161	}
   1162}
   1163
   1164static int btrfsic_create_link_to_next_block(
   1165		struct btrfsic_state *state,
   1166		struct btrfsic_block *block,
   1167		struct btrfsic_block_data_ctx *block_ctx,
   1168		u64 next_bytenr,
   1169		int limit_nesting,
   1170		struct btrfsic_block_data_ctx *next_block_ctx,
   1171		struct btrfsic_block **next_blockp,
   1172		int force_iodone_flag,
   1173		int *num_copiesp, int *mirror_nump,
   1174		struct btrfs_disk_key *disk_key,
   1175		u64 parent_generation)
   1176{
   1177	struct btrfs_fs_info *fs_info = state->fs_info;
   1178	struct btrfsic_block *next_block = NULL;
   1179	int ret;
   1180	struct btrfsic_block_link *l;
   1181	int did_alloc_block_link;
   1182	int block_was_created;
   1183
   1184	*next_blockp = NULL;
   1185	if (0 == *num_copiesp) {
   1186		*num_copiesp = btrfs_num_copies(fs_info, next_bytenr,
   1187						state->metablock_size);
   1188		if (state->print_mask & BTRFSIC_PRINT_MASK_NUM_COPIES)
   1189			pr_info("num_copies(log_bytenr=%llu) = %d\n",
   1190			       next_bytenr, *num_copiesp);
   1191		*mirror_nump = 1;
   1192	}
   1193
   1194	if (*mirror_nump > *num_copiesp)
   1195		return 0;
   1196
   1197	if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
   1198		pr_info("btrfsic_create_link_to_next_block(mirror_num=%d)\n",
   1199		       *mirror_nump);
   1200	ret = btrfsic_map_block(state, next_bytenr,
   1201				state->metablock_size,
   1202				next_block_ctx, *mirror_nump);
   1203	if (ret) {
   1204		pr_info("btrfsic: btrfsic_map_block(@%llu, mirror=%d) failed!\n",
   1205		       next_bytenr, *mirror_nump);
   1206		btrfsic_release_block_ctx(next_block_ctx);
   1207		*next_blockp = NULL;
   1208		return -1;
   1209	}
   1210
   1211	next_block = btrfsic_block_lookup_or_add(state,
   1212						 next_block_ctx, "referenced ",
   1213						 1, force_iodone_flag,
   1214						 !force_iodone_flag,
   1215						 *mirror_nump,
   1216						 &block_was_created);
   1217	if (NULL == next_block) {
   1218		btrfsic_release_block_ctx(next_block_ctx);
   1219		*next_blockp = NULL;
   1220		return -1;
   1221	}
   1222	if (block_was_created) {
   1223		l = NULL;
   1224		next_block->generation = BTRFSIC_GENERATION_UNKNOWN;
   1225	} else {
   1226		if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE) {
   1227			if (next_block->logical_bytenr != next_bytenr &&
   1228			    !(!next_block->is_metadata &&
   1229			      0 == next_block->logical_bytenr))
   1230				pr_info(
   1231"referenced block @%llu (%pg/%llu/%d) found in hash table, %c, bytenr mismatch (!= stored %llu)\n",
   1232				       next_bytenr, next_block_ctx->dev->bdev,
   1233				       next_block_ctx->dev_bytenr, *mirror_nump,
   1234				       btrfsic_get_block_type(state,
   1235							      next_block),
   1236				       next_block->logical_bytenr);
   1237			else
   1238				pr_info(
   1239		"referenced block @%llu (%pg/%llu/%d) found in hash table, %c\n",
   1240				       next_bytenr, next_block_ctx->dev->bdev,
   1241				       next_block_ctx->dev_bytenr, *mirror_nump,
   1242				       btrfsic_get_block_type(state,
   1243							      next_block));
   1244		}
   1245		next_block->logical_bytenr = next_bytenr;
   1246
   1247		next_block->mirror_num = *mirror_nump;
   1248		l = btrfsic_block_link_hashtable_lookup(
   1249				next_block_ctx->dev->bdev,
   1250				next_block_ctx->dev_bytenr,
   1251				block_ctx->dev->bdev,
   1252				block_ctx->dev_bytenr,
   1253				&state->block_link_hashtable);
   1254	}
   1255
   1256	next_block->disk_key = *disk_key;
   1257	if (NULL == l) {
   1258		l = btrfsic_block_link_alloc();
   1259		if (NULL == l) {
   1260			btrfsic_release_block_ctx(next_block_ctx);
   1261			*next_blockp = NULL;
   1262			return -1;
   1263		}
   1264
   1265		did_alloc_block_link = 1;
   1266		l->block_ref_to = next_block;
   1267		l->block_ref_from = block;
   1268		l->ref_cnt = 1;
   1269		l->parent_generation = parent_generation;
   1270
   1271		if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
   1272			btrfsic_print_add_link(state, l);
   1273
   1274		list_add(&l->node_ref_to, &block->ref_to_list);
   1275		list_add(&l->node_ref_from, &next_block->ref_from_list);
   1276
   1277		btrfsic_block_link_hashtable_add(l,
   1278						 &state->block_link_hashtable);
   1279	} else {
   1280		did_alloc_block_link = 0;
   1281		if (0 == limit_nesting) {
   1282			l->ref_cnt++;
   1283			l->parent_generation = parent_generation;
   1284			if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
   1285				btrfsic_print_add_link(state, l);
   1286		}
   1287	}
   1288
   1289	if (limit_nesting > 0 && did_alloc_block_link) {
   1290		ret = btrfsic_read_block(state, next_block_ctx);
   1291		if (ret < (int)next_block_ctx->len) {
   1292			pr_info("btrfsic: read block @logical %llu failed!\n",
   1293			       next_bytenr);
   1294			btrfsic_release_block_ctx(next_block_ctx);
   1295			*next_blockp = NULL;
   1296			return -1;
   1297		}
   1298
   1299		*next_blockp = next_block;
   1300	} else {
   1301		*next_blockp = NULL;
   1302	}
   1303	(*mirror_nump)++;
   1304
   1305	return 0;
   1306}
   1307
   1308static int btrfsic_handle_extent_data(
   1309		struct btrfsic_state *state,
   1310		struct btrfsic_block *block,
   1311		struct btrfsic_block_data_ctx *block_ctx,
   1312		u32 item_offset, int force_iodone_flag)
   1313{
   1314	struct btrfs_fs_info *fs_info = state->fs_info;
   1315	struct btrfs_file_extent_item file_extent_item;
   1316	u64 file_extent_item_offset;
   1317	u64 next_bytenr;
   1318	u64 num_bytes;
   1319	u64 generation;
   1320	struct btrfsic_block_link *l;
   1321	int ret;
   1322
   1323	file_extent_item_offset = offsetof(struct btrfs_leaf, items) +
   1324				  item_offset;
   1325	if (file_extent_item_offset +
   1326	    offsetof(struct btrfs_file_extent_item, disk_num_bytes) >
   1327	    block_ctx->len) {
   1328		pr_info("btrfsic: file item out of bounce at logical %llu, dev %pg\n",
   1329		       block_ctx->start, block_ctx->dev->bdev);
   1330		return -1;
   1331	}
   1332
   1333	btrfsic_read_from_block_data(block_ctx, &file_extent_item,
   1334		file_extent_item_offset,
   1335		offsetof(struct btrfs_file_extent_item, disk_num_bytes));
   1336	if (BTRFS_FILE_EXTENT_REG != file_extent_item.type ||
   1337	    btrfs_stack_file_extent_disk_bytenr(&file_extent_item) == 0) {
   1338		if (state->print_mask & BTRFSIC_PRINT_MASK_VERY_VERBOSE)
   1339			pr_info("extent_data: type %u, disk_bytenr = %llu\n",
   1340			       file_extent_item.type,
   1341			       btrfs_stack_file_extent_disk_bytenr(
   1342			       &file_extent_item));
   1343		return 0;
   1344	}
   1345
   1346	if (file_extent_item_offset + sizeof(struct btrfs_file_extent_item) >
   1347	    block_ctx->len) {
   1348		pr_info("btrfsic: file item out of bounce at logical %llu, dev %pg\n",
   1349		       block_ctx->start, block_ctx->dev->bdev);
   1350		return -1;
   1351	}
   1352	btrfsic_read_from_block_data(block_ctx, &file_extent_item,
   1353				     file_extent_item_offset,
   1354				     sizeof(struct btrfs_file_extent_item));
   1355	next_bytenr = btrfs_stack_file_extent_disk_bytenr(&file_extent_item);
   1356	if (btrfs_stack_file_extent_compression(&file_extent_item) ==
   1357	    BTRFS_COMPRESS_NONE) {
   1358		next_bytenr += btrfs_stack_file_extent_offset(&file_extent_item);
   1359		num_bytes = btrfs_stack_file_extent_num_bytes(&file_extent_item);
   1360	} else {
   1361		num_bytes = btrfs_stack_file_extent_disk_num_bytes(&file_extent_item);
   1362	}
   1363	generation = btrfs_stack_file_extent_generation(&file_extent_item);
   1364
   1365	if (state->print_mask & BTRFSIC_PRINT_MASK_VERY_VERBOSE)
   1366		pr_info("extent_data: type %u, disk_bytenr = %llu, offset = %llu, num_bytes = %llu\n",
   1367		       file_extent_item.type,
   1368		       btrfs_stack_file_extent_disk_bytenr(&file_extent_item),
   1369		       btrfs_stack_file_extent_offset(&file_extent_item),
   1370		       num_bytes);
   1371	while (num_bytes > 0) {
   1372		u32 chunk_len;
   1373		int num_copies;
   1374		int mirror_num;
   1375
   1376		if (num_bytes > state->datablock_size)
   1377			chunk_len = state->datablock_size;
   1378		else
   1379			chunk_len = num_bytes;
   1380
   1381		num_copies = btrfs_num_copies(fs_info, next_bytenr,
   1382					      state->datablock_size);
   1383		if (state->print_mask & BTRFSIC_PRINT_MASK_NUM_COPIES)
   1384			pr_info("num_copies(log_bytenr=%llu) = %d\n",
   1385			       next_bytenr, num_copies);
   1386		for (mirror_num = 1; mirror_num <= num_copies; mirror_num++) {
   1387			struct btrfsic_block_data_ctx next_block_ctx;
   1388			struct btrfsic_block *next_block;
   1389			int block_was_created;
   1390
   1391			if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
   1392				pr_info("btrfsic_handle_extent_data(mirror_num=%d)\n",
   1393					mirror_num);
   1394			if (state->print_mask & BTRFSIC_PRINT_MASK_VERY_VERBOSE)
   1395				pr_info("\tdisk_bytenr = %llu, num_bytes %u\n",
   1396				       next_bytenr, chunk_len);
   1397			ret = btrfsic_map_block(state, next_bytenr,
   1398						chunk_len, &next_block_ctx,
   1399						mirror_num);
   1400			if (ret) {
   1401				pr_info("btrfsic: btrfsic_map_block(@%llu, mirror=%d) failed!\n",
   1402				       next_bytenr, mirror_num);
   1403				return -1;
   1404			}
   1405
   1406			next_block = btrfsic_block_lookup_or_add(
   1407					state,
   1408					&next_block_ctx,
   1409					"referenced ",
   1410					0,
   1411					force_iodone_flag,
   1412					!force_iodone_flag,
   1413					mirror_num,
   1414					&block_was_created);
   1415			if (NULL == next_block) {
   1416				btrfsic_release_block_ctx(&next_block_ctx);
   1417				return -1;
   1418			}
   1419			if (!block_was_created) {
   1420				if ((state->print_mask &
   1421				     BTRFSIC_PRINT_MASK_VERBOSE) &&
   1422				    next_block->logical_bytenr != next_bytenr &&
   1423				    !(!next_block->is_metadata &&
   1424				      0 == next_block->logical_bytenr)) {
   1425					pr_info(
   1426"referenced block @%llu (%pg/%llu/%d) found in hash table, D, bytenr mismatch (!= stored %llu)\n",
   1427					       next_bytenr,
   1428					       next_block_ctx.dev->bdev,
   1429					       next_block_ctx.dev_bytenr,
   1430					       mirror_num,
   1431					       next_block->logical_bytenr);
   1432				}
   1433				next_block->logical_bytenr = next_bytenr;
   1434				next_block->mirror_num = mirror_num;
   1435			}
   1436
   1437			l = btrfsic_block_link_lookup_or_add(state,
   1438							     &next_block_ctx,
   1439							     next_block, block,
   1440							     generation);
   1441			btrfsic_release_block_ctx(&next_block_ctx);
   1442			if (NULL == l)
   1443				return -1;
   1444		}
   1445
   1446		next_bytenr += chunk_len;
   1447		num_bytes -= chunk_len;
   1448	}
   1449
   1450	return 0;
   1451}
   1452
   1453static int btrfsic_map_block(struct btrfsic_state *state, u64 bytenr, u32 len,
   1454			     struct btrfsic_block_data_ctx *block_ctx_out,
   1455			     int mirror_num)
   1456{
   1457	struct btrfs_fs_info *fs_info = state->fs_info;
   1458	int ret;
   1459	u64 length;
   1460	struct btrfs_io_context *multi = NULL;
   1461	struct btrfs_device *device;
   1462
   1463	length = len;
   1464	ret = btrfs_map_block(fs_info, BTRFS_MAP_READ,
   1465			      bytenr, &length, &multi, mirror_num);
   1466
   1467	if (ret) {
   1468		block_ctx_out->start = 0;
   1469		block_ctx_out->dev_bytenr = 0;
   1470		block_ctx_out->len = 0;
   1471		block_ctx_out->dev = NULL;
   1472		block_ctx_out->datav = NULL;
   1473		block_ctx_out->pagev = NULL;
   1474		block_ctx_out->mem_to_free = NULL;
   1475
   1476		return ret;
   1477	}
   1478
   1479	device = multi->stripes[0].dev;
   1480	if (test_bit(BTRFS_DEV_STATE_MISSING, &device->dev_state) ||
   1481	    !device->bdev || !device->name)
   1482		block_ctx_out->dev = NULL;
   1483	else
   1484		block_ctx_out->dev = btrfsic_dev_state_lookup(
   1485							device->bdev->bd_dev);
   1486	block_ctx_out->dev_bytenr = multi->stripes[0].physical;
   1487	block_ctx_out->start = bytenr;
   1488	block_ctx_out->len = len;
   1489	block_ctx_out->datav = NULL;
   1490	block_ctx_out->pagev = NULL;
   1491	block_ctx_out->mem_to_free = NULL;
   1492
   1493	kfree(multi);
   1494	if (NULL == block_ctx_out->dev) {
   1495		ret = -ENXIO;
   1496		pr_info("btrfsic: error, cannot lookup dev (#1)!\n");
   1497	}
   1498
   1499	return ret;
   1500}
   1501
   1502static void btrfsic_release_block_ctx(struct btrfsic_block_data_ctx *block_ctx)
   1503{
   1504	if (block_ctx->mem_to_free) {
   1505		unsigned int num_pages;
   1506
   1507		BUG_ON(!block_ctx->datav);
   1508		BUG_ON(!block_ctx->pagev);
   1509		num_pages = (block_ctx->len + (u64)PAGE_SIZE - 1) >>
   1510			    PAGE_SHIFT;
   1511		/* Pages must be unmapped in reverse order */
   1512		while (num_pages > 0) {
   1513			num_pages--;
   1514			if (block_ctx->datav[num_pages])
   1515				block_ctx->datav[num_pages] = NULL;
   1516			if (block_ctx->pagev[num_pages]) {
   1517				__free_page(block_ctx->pagev[num_pages]);
   1518				block_ctx->pagev[num_pages] = NULL;
   1519			}
   1520		}
   1521
   1522		kfree(block_ctx->mem_to_free);
   1523		block_ctx->mem_to_free = NULL;
   1524		block_ctx->pagev = NULL;
   1525		block_ctx->datav = NULL;
   1526	}
   1527}
   1528
   1529static int btrfsic_read_block(struct btrfsic_state *state,
   1530			      struct btrfsic_block_data_ctx *block_ctx)
   1531{
   1532	unsigned int num_pages;
   1533	unsigned int i;
   1534	size_t size;
   1535	u64 dev_bytenr;
   1536	int ret;
   1537
   1538	BUG_ON(block_ctx->datav);
   1539	BUG_ON(block_ctx->pagev);
   1540	BUG_ON(block_ctx->mem_to_free);
   1541	if (!PAGE_ALIGNED(block_ctx->dev_bytenr)) {
   1542		pr_info("btrfsic: read_block() with unaligned bytenr %llu\n",
   1543		       block_ctx->dev_bytenr);
   1544		return -1;
   1545	}
   1546
   1547	num_pages = (block_ctx->len + (u64)PAGE_SIZE - 1) >>
   1548		    PAGE_SHIFT;
   1549	size = sizeof(*block_ctx->datav) + sizeof(*block_ctx->pagev);
   1550	block_ctx->mem_to_free = kcalloc(num_pages, size, GFP_NOFS);
   1551	if (!block_ctx->mem_to_free)
   1552		return -ENOMEM;
   1553	block_ctx->datav = block_ctx->mem_to_free;
   1554	block_ctx->pagev = (struct page **)(block_ctx->datav + num_pages);
   1555	ret = btrfs_alloc_page_array(num_pages, block_ctx->pagev);
   1556	if (ret)
   1557		return ret;
   1558
   1559	dev_bytenr = block_ctx->dev_bytenr;
   1560	for (i = 0; i < num_pages;) {
   1561		struct bio *bio;
   1562		unsigned int j;
   1563
   1564		bio = bio_alloc(block_ctx->dev->bdev, num_pages - i,
   1565				REQ_OP_READ, GFP_NOFS);
   1566		bio->bi_iter.bi_sector = dev_bytenr >> 9;
   1567
   1568		for (j = i; j < num_pages; j++) {
   1569			ret = bio_add_page(bio, block_ctx->pagev[j],
   1570					   PAGE_SIZE, 0);
   1571			if (PAGE_SIZE != ret)
   1572				break;
   1573		}
   1574		if (j == i) {
   1575			pr_info("btrfsic: error, failed to add a single page!\n");
   1576			return -1;
   1577		}
   1578		if (submit_bio_wait(bio)) {
   1579			pr_info("btrfsic: read error at logical %llu dev %pg!\n",
   1580			       block_ctx->start, block_ctx->dev->bdev);
   1581			bio_put(bio);
   1582			return -1;
   1583		}
   1584		bio_put(bio);
   1585		dev_bytenr += (j - i) * PAGE_SIZE;
   1586		i = j;
   1587	}
   1588	for (i = 0; i < num_pages; i++)
   1589		block_ctx->datav[i] = page_address(block_ctx->pagev[i]);
   1590
   1591	return block_ctx->len;
   1592}
   1593
   1594static void btrfsic_dump_database(struct btrfsic_state *state)
   1595{
   1596	const struct btrfsic_block *b_all;
   1597
   1598	BUG_ON(NULL == state);
   1599
   1600	pr_info("all_blocks_list:\n");
   1601	list_for_each_entry(b_all, &state->all_blocks_list, all_blocks_node) {
   1602		const struct btrfsic_block_link *l;
   1603
   1604		pr_info("%c-block @%llu (%pg/%llu/%d)\n",
   1605		       btrfsic_get_block_type(state, b_all),
   1606		       b_all->logical_bytenr, b_all->dev_state->bdev,
   1607		       b_all->dev_bytenr, b_all->mirror_num);
   1608
   1609		list_for_each_entry(l, &b_all->ref_to_list, node_ref_to) {
   1610			pr_info(
   1611		" %c @%llu (%pg/%llu/%d) refers %u* to %c @%llu (%pg/%llu/%d)\n",
   1612			       btrfsic_get_block_type(state, b_all),
   1613			       b_all->logical_bytenr, b_all->dev_state->bdev,
   1614			       b_all->dev_bytenr, b_all->mirror_num,
   1615			       l->ref_cnt,
   1616			       btrfsic_get_block_type(state, l->block_ref_to),
   1617			       l->block_ref_to->logical_bytenr,
   1618			       l->block_ref_to->dev_state->bdev,
   1619			       l->block_ref_to->dev_bytenr,
   1620			       l->block_ref_to->mirror_num);
   1621		}
   1622
   1623		list_for_each_entry(l, &b_all->ref_from_list, node_ref_from) {
   1624			pr_info(
   1625		" %c @%llu (%pg/%llu/%d) is ref %u* from %c @%llu (%pg/%llu/%d)\n",
   1626			       btrfsic_get_block_type(state, b_all),
   1627			       b_all->logical_bytenr, b_all->dev_state->bdev,
   1628			       b_all->dev_bytenr, b_all->mirror_num,
   1629			       l->ref_cnt,
   1630			       btrfsic_get_block_type(state, l->block_ref_from),
   1631			       l->block_ref_from->logical_bytenr,
   1632			       l->block_ref_from->dev_state->bdev,
   1633			       l->block_ref_from->dev_bytenr,
   1634			       l->block_ref_from->mirror_num);
   1635		}
   1636
   1637		pr_info("\n");
   1638	}
   1639}
   1640
   1641/*
   1642 * Test whether the disk block contains a tree block (leaf or node)
   1643 * (note that this test fails for the super block)
   1644 */
   1645static noinline_for_stack int btrfsic_test_for_metadata(
   1646		struct btrfsic_state *state,
   1647		char **datav, unsigned int num_pages)
   1648{
   1649	struct btrfs_fs_info *fs_info = state->fs_info;
   1650	SHASH_DESC_ON_STACK(shash, fs_info->csum_shash);
   1651	struct btrfs_header *h;
   1652	u8 csum[BTRFS_CSUM_SIZE];
   1653	unsigned int i;
   1654
   1655	if (num_pages * PAGE_SIZE < state->metablock_size)
   1656		return 1; /* not metadata */
   1657	num_pages = state->metablock_size >> PAGE_SHIFT;
   1658	h = (struct btrfs_header *)datav[0];
   1659
   1660	if (memcmp(h->fsid, fs_info->fs_devices->fsid, BTRFS_FSID_SIZE))
   1661		return 1;
   1662
   1663	shash->tfm = fs_info->csum_shash;
   1664	crypto_shash_init(shash);
   1665
   1666	for (i = 0; i < num_pages; i++) {
   1667		u8 *data = i ? datav[i] : (datav[i] + BTRFS_CSUM_SIZE);
   1668		size_t sublen = i ? PAGE_SIZE :
   1669				    (PAGE_SIZE - BTRFS_CSUM_SIZE);
   1670
   1671		crypto_shash_update(shash, data, sublen);
   1672	}
   1673	crypto_shash_final(shash, csum);
   1674	if (memcmp(csum, h->csum, fs_info->csum_size))
   1675		return 1;
   1676
   1677	return 0; /* is metadata */
   1678}
   1679
   1680static void btrfsic_process_written_block(struct btrfsic_dev_state *dev_state,
   1681					  u64 dev_bytenr, char **mapped_datav,
   1682					  unsigned int num_pages,
   1683					  struct bio *bio, int *bio_is_patched,
   1684					  int submit_bio_bh_rw)
   1685{
   1686	int is_metadata;
   1687	struct btrfsic_block *block;
   1688	struct btrfsic_block_data_ctx block_ctx;
   1689	int ret;
   1690	struct btrfsic_state *state = dev_state->state;
   1691	struct block_device *bdev = dev_state->bdev;
   1692	unsigned int processed_len;
   1693
   1694	if (NULL != bio_is_patched)
   1695		*bio_is_patched = 0;
   1696
   1697again:
   1698	if (num_pages == 0)
   1699		return;
   1700
   1701	processed_len = 0;
   1702	is_metadata = (0 == btrfsic_test_for_metadata(state, mapped_datav,
   1703						      num_pages));
   1704
   1705	block = btrfsic_block_hashtable_lookup(bdev, dev_bytenr,
   1706					       &state->block_hashtable);
   1707	if (NULL != block) {
   1708		u64 bytenr = 0;
   1709		struct btrfsic_block_link *l, *tmp;
   1710
   1711		if (block->is_superblock) {
   1712			bytenr = btrfs_super_bytenr((struct btrfs_super_block *)
   1713						    mapped_datav[0]);
   1714			if (num_pages * PAGE_SIZE <
   1715			    BTRFS_SUPER_INFO_SIZE) {
   1716				pr_info("btrfsic: cannot work with too short bios!\n");
   1717				return;
   1718			}
   1719			is_metadata = 1;
   1720			BUG_ON(!PAGE_ALIGNED(BTRFS_SUPER_INFO_SIZE));
   1721			processed_len = BTRFS_SUPER_INFO_SIZE;
   1722			if (state->print_mask &
   1723			    BTRFSIC_PRINT_MASK_TREE_BEFORE_SB_WRITE) {
   1724				pr_info("[before new superblock is written]:\n");
   1725				btrfsic_dump_tree_sub(state, block, 0);
   1726			}
   1727		}
   1728		if (is_metadata) {
   1729			if (!block->is_superblock) {
   1730				if (num_pages * PAGE_SIZE <
   1731				    state->metablock_size) {
   1732					pr_info("btrfsic: cannot work with too short bios!\n");
   1733					return;
   1734				}
   1735				processed_len = state->metablock_size;
   1736				bytenr = btrfs_stack_header_bytenr(
   1737						(struct btrfs_header *)
   1738						mapped_datav[0]);
   1739				btrfsic_cmp_log_and_dev_bytenr(state, bytenr,
   1740							       dev_state,
   1741							       dev_bytenr);
   1742			}
   1743			if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE) {
   1744				if (block->logical_bytenr != bytenr &&
   1745				    !(!block->is_metadata &&
   1746				      block->logical_bytenr == 0))
   1747					pr_info(
   1748"written block @%llu (%pg/%llu/%d) found in hash table, %c, bytenr mismatch (!= stored %llu)\n",
   1749					       bytenr, dev_state->bdev,
   1750					       dev_bytenr,
   1751					       block->mirror_num,
   1752					       btrfsic_get_block_type(state,
   1753								      block),
   1754					       block->logical_bytenr);
   1755				else
   1756					pr_info(
   1757		"written block @%llu (%pg/%llu/%d) found in hash table, %c\n",
   1758					       bytenr, dev_state->bdev,
   1759					       dev_bytenr, block->mirror_num,
   1760					       btrfsic_get_block_type(state,
   1761								      block));
   1762			}
   1763			block->logical_bytenr = bytenr;
   1764		} else {
   1765			if (num_pages * PAGE_SIZE <
   1766			    state->datablock_size) {
   1767				pr_info("btrfsic: cannot work with too short bios!\n");
   1768				return;
   1769			}
   1770			processed_len = state->datablock_size;
   1771			bytenr = block->logical_bytenr;
   1772			if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
   1773				pr_info(
   1774		"written block @%llu (%pg/%llu/%d) found in hash table, %c\n",
   1775				       bytenr, dev_state->bdev, dev_bytenr,
   1776				       block->mirror_num,
   1777				       btrfsic_get_block_type(state, block));
   1778		}
   1779
   1780		if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
   1781			pr_info("ref_to_list: %cE, ref_from_list: %cE\n",
   1782			       list_empty(&block->ref_to_list) ? ' ' : '!',
   1783			       list_empty(&block->ref_from_list) ? ' ' : '!');
   1784		if (btrfsic_is_block_ref_by_superblock(state, block, 0)) {
   1785			pr_info(
   1786"btrfs: attempt to overwrite %c-block @%llu (%pg/%llu/%d), old(gen=%llu, objectid=%llu, type=%d, offset=%llu), new(gen=%llu), which is referenced by most recent superblock (superblockgen=%llu)!\n",
   1787			       btrfsic_get_block_type(state, block), bytenr,
   1788			       dev_state->bdev, dev_bytenr, block->mirror_num,
   1789			       block->generation,
   1790			       btrfs_disk_key_objectid(&block->disk_key),
   1791			       block->disk_key.type,
   1792			       btrfs_disk_key_offset(&block->disk_key),
   1793			       btrfs_stack_header_generation(
   1794				       (struct btrfs_header *) mapped_datav[0]),
   1795			       state->max_superblock_generation);
   1796			btrfsic_dump_tree(state);
   1797		}
   1798
   1799		if (!block->is_iodone && !block->never_written) {
   1800			pr_info(
   1801"btrfs: attempt to overwrite %c-block @%llu (%pg/%llu/%d), oldgen=%llu, newgen=%llu, which is not yet iodone!\n",
   1802			       btrfsic_get_block_type(state, block), bytenr,
   1803			       dev_state->bdev, dev_bytenr, block->mirror_num,
   1804			       block->generation,
   1805			       btrfs_stack_header_generation(
   1806				       (struct btrfs_header *)
   1807				       mapped_datav[0]));
   1808			/* it would not be safe to go on */
   1809			btrfsic_dump_tree(state);
   1810			goto continue_loop;
   1811		}
   1812
   1813		/*
   1814		 * Clear all references of this block. Do not free
   1815		 * the block itself even if is not referenced anymore
   1816		 * because it still carries valuable information
   1817		 * like whether it was ever written and IO completed.
   1818		 */
   1819		list_for_each_entry_safe(l, tmp, &block->ref_to_list,
   1820					 node_ref_to) {
   1821			if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
   1822				btrfsic_print_rem_link(state, l);
   1823			l->ref_cnt--;
   1824			if (0 == l->ref_cnt) {
   1825				list_del(&l->node_ref_to);
   1826				list_del(&l->node_ref_from);
   1827				btrfsic_block_link_hashtable_remove(l);
   1828				btrfsic_block_link_free(l);
   1829			}
   1830		}
   1831
   1832		block_ctx.dev = dev_state;
   1833		block_ctx.dev_bytenr = dev_bytenr;
   1834		block_ctx.start = bytenr;
   1835		block_ctx.len = processed_len;
   1836		block_ctx.pagev = NULL;
   1837		block_ctx.mem_to_free = NULL;
   1838		block_ctx.datav = mapped_datav;
   1839
   1840		if (is_metadata || state->include_extent_data) {
   1841			block->never_written = 0;
   1842			block->iodone_w_error = 0;
   1843			if (NULL != bio) {
   1844				block->is_iodone = 0;
   1845				BUG_ON(NULL == bio_is_patched);
   1846				if (!*bio_is_patched) {
   1847					block->orig_bio_private =
   1848					    bio->bi_private;
   1849					block->orig_bio_end_io =
   1850					    bio->bi_end_io;
   1851					block->next_in_same_bio = NULL;
   1852					bio->bi_private = block;
   1853					bio->bi_end_io = btrfsic_bio_end_io;
   1854					*bio_is_patched = 1;
   1855				} else {
   1856					struct btrfsic_block *chained_block =
   1857					    (struct btrfsic_block *)
   1858					    bio->bi_private;
   1859
   1860					BUG_ON(NULL == chained_block);
   1861					block->orig_bio_private =
   1862					    chained_block->orig_bio_private;
   1863					block->orig_bio_end_io =
   1864					    chained_block->orig_bio_end_io;
   1865					block->next_in_same_bio = chained_block;
   1866					bio->bi_private = block;
   1867				}
   1868			} else {
   1869				block->is_iodone = 1;
   1870				block->orig_bio_private = NULL;
   1871				block->orig_bio_end_io = NULL;
   1872				block->next_in_same_bio = NULL;
   1873			}
   1874		}
   1875
   1876		block->flush_gen = dev_state->last_flush_gen + 1;
   1877		block->submit_bio_bh_rw = submit_bio_bh_rw;
   1878		if (is_metadata) {
   1879			block->logical_bytenr = bytenr;
   1880			block->is_metadata = 1;
   1881			if (block->is_superblock) {
   1882				BUG_ON(PAGE_SIZE !=
   1883				       BTRFS_SUPER_INFO_SIZE);
   1884				ret = btrfsic_process_written_superblock(
   1885						state,
   1886						block,
   1887						(struct btrfs_super_block *)
   1888						mapped_datav[0]);
   1889				if (state->print_mask &
   1890				    BTRFSIC_PRINT_MASK_TREE_AFTER_SB_WRITE) {
   1891					pr_info("[after new superblock is written]:\n");
   1892					btrfsic_dump_tree_sub(state, block, 0);
   1893				}
   1894			} else {
   1895				block->mirror_num = 0;	/* unknown */
   1896				ret = btrfsic_process_metablock(
   1897						state,
   1898						block,
   1899						&block_ctx,
   1900						0, 0);
   1901			}
   1902			if (ret)
   1903				pr_info("btrfsic: btrfsic_process_metablock(root @%llu) failed!\n",
   1904				       dev_bytenr);
   1905		} else {
   1906			block->is_metadata = 0;
   1907			block->mirror_num = 0;	/* unknown */
   1908			block->generation = BTRFSIC_GENERATION_UNKNOWN;
   1909			if (!state->include_extent_data
   1910			    && list_empty(&block->ref_from_list)) {
   1911				/*
   1912				 * disk block is overwritten with extent
   1913				 * data (not meta data) and we are configured
   1914				 * to not include extent data: take the
   1915				 * chance and free the block's memory
   1916				 */
   1917				btrfsic_block_hashtable_remove(block);
   1918				list_del(&block->all_blocks_node);
   1919				btrfsic_block_free(block);
   1920			}
   1921		}
   1922		btrfsic_release_block_ctx(&block_ctx);
   1923	} else {
   1924		/* block has not been found in hash table */
   1925		u64 bytenr;
   1926
   1927		if (!is_metadata) {
   1928			processed_len = state->datablock_size;
   1929			if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
   1930				pr_info(
   1931			"written block (%pg/%llu/?) !found in hash table, D\n",
   1932				       dev_state->bdev, dev_bytenr);
   1933			if (!state->include_extent_data) {
   1934				/* ignore that written D block */
   1935				goto continue_loop;
   1936			}
   1937
   1938			/* this is getting ugly for the
   1939			 * include_extent_data case... */
   1940			bytenr = 0;	/* unknown */
   1941		} else {
   1942			processed_len = state->metablock_size;
   1943			bytenr = btrfs_stack_header_bytenr(
   1944					(struct btrfs_header *)
   1945					mapped_datav[0]);
   1946			btrfsic_cmp_log_and_dev_bytenr(state, bytenr, dev_state,
   1947						       dev_bytenr);
   1948			if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
   1949				pr_info(
   1950			"written block @%llu (%pg/%llu/?) !found in hash table, M\n",
   1951				       bytenr, dev_state->bdev, dev_bytenr);
   1952		}
   1953
   1954		block_ctx.dev = dev_state;
   1955		block_ctx.dev_bytenr = dev_bytenr;
   1956		block_ctx.start = bytenr;
   1957		block_ctx.len = processed_len;
   1958		block_ctx.pagev = NULL;
   1959		block_ctx.mem_to_free = NULL;
   1960		block_ctx.datav = mapped_datav;
   1961
   1962		block = btrfsic_block_alloc();
   1963		if (NULL == block) {
   1964			btrfsic_release_block_ctx(&block_ctx);
   1965			goto continue_loop;
   1966		}
   1967		block->dev_state = dev_state;
   1968		block->dev_bytenr = dev_bytenr;
   1969		block->logical_bytenr = bytenr;
   1970		block->is_metadata = is_metadata;
   1971		block->never_written = 0;
   1972		block->iodone_w_error = 0;
   1973		block->mirror_num = 0;	/* unknown */
   1974		block->flush_gen = dev_state->last_flush_gen + 1;
   1975		block->submit_bio_bh_rw = submit_bio_bh_rw;
   1976		if (NULL != bio) {
   1977			block->is_iodone = 0;
   1978			BUG_ON(NULL == bio_is_patched);
   1979			if (!*bio_is_patched) {
   1980				block->orig_bio_private = bio->bi_private;
   1981				block->orig_bio_end_io = bio->bi_end_io;
   1982				block->next_in_same_bio = NULL;
   1983				bio->bi_private = block;
   1984				bio->bi_end_io = btrfsic_bio_end_io;
   1985				*bio_is_patched = 1;
   1986			} else {
   1987				struct btrfsic_block *chained_block =
   1988				    (struct btrfsic_block *)
   1989				    bio->bi_private;
   1990
   1991				BUG_ON(NULL == chained_block);
   1992				block->orig_bio_private =
   1993				    chained_block->orig_bio_private;
   1994				block->orig_bio_end_io =
   1995				    chained_block->orig_bio_end_io;
   1996				block->next_in_same_bio = chained_block;
   1997				bio->bi_private = block;
   1998			}
   1999		} else {
   2000			block->is_iodone = 1;
   2001			block->orig_bio_private = NULL;
   2002			block->orig_bio_end_io = NULL;
   2003			block->next_in_same_bio = NULL;
   2004		}
   2005		if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
   2006			pr_info("new written %c-block @%llu (%pg/%llu/%d)\n",
   2007			       is_metadata ? 'M' : 'D',
   2008			       block->logical_bytenr, block->dev_state->bdev,
   2009			       block->dev_bytenr, block->mirror_num);
   2010		list_add(&block->all_blocks_node, &state->all_blocks_list);
   2011		btrfsic_block_hashtable_add(block, &state->block_hashtable);
   2012
   2013		if (is_metadata) {
   2014			ret = btrfsic_process_metablock(state, block,
   2015							&block_ctx, 0, 0);
   2016			if (ret)
   2017				pr_info("btrfsic: process_metablock(root @%llu) failed!\n",
   2018				       dev_bytenr);
   2019		}
   2020		btrfsic_release_block_ctx(&block_ctx);
   2021	}
   2022
   2023continue_loop:
   2024	BUG_ON(!processed_len);
   2025	dev_bytenr += processed_len;
   2026	mapped_datav += processed_len >> PAGE_SHIFT;
   2027	num_pages -= processed_len >> PAGE_SHIFT;
   2028	goto again;
   2029}
   2030
   2031static void btrfsic_bio_end_io(struct bio *bp)
   2032{
   2033	struct btrfsic_block *block = bp->bi_private;
   2034	int iodone_w_error;
   2035
   2036	/* mutex is not held! This is not save if IO is not yet completed
   2037	 * on umount */
   2038	iodone_w_error = 0;
   2039	if (bp->bi_status)
   2040		iodone_w_error = 1;
   2041
   2042	BUG_ON(NULL == block);
   2043	bp->bi_private = block->orig_bio_private;
   2044	bp->bi_end_io = block->orig_bio_end_io;
   2045
   2046	do {
   2047		struct btrfsic_block *next_block;
   2048		struct btrfsic_dev_state *const dev_state = block->dev_state;
   2049
   2050		if ((dev_state->state->print_mask &
   2051		     BTRFSIC_PRINT_MASK_END_IO_BIO_BH))
   2052			pr_info("bio_end_io(err=%d) for %c @%llu (%pg/%llu/%d)\n",
   2053			       bp->bi_status,
   2054			       btrfsic_get_block_type(dev_state->state, block),
   2055			       block->logical_bytenr, dev_state->bdev,
   2056			       block->dev_bytenr, block->mirror_num);
   2057		next_block = block->next_in_same_bio;
   2058		block->iodone_w_error = iodone_w_error;
   2059		if (block->submit_bio_bh_rw & REQ_PREFLUSH) {
   2060			dev_state->last_flush_gen++;
   2061			if ((dev_state->state->print_mask &
   2062			     BTRFSIC_PRINT_MASK_END_IO_BIO_BH))
   2063				pr_info("bio_end_io() new %pg flush_gen=%llu\n",
   2064				       dev_state->bdev,
   2065				       dev_state->last_flush_gen);
   2066		}
   2067		if (block->submit_bio_bh_rw & REQ_FUA)
   2068			block->flush_gen = 0; /* FUA completed means block is
   2069					       * on disk */
   2070		block->is_iodone = 1; /* for FLUSH, this releases the block */
   2071		block = next_block;
   2072	} while (NULL != block);
   2073
   2074	bp->bi_end_io(bp);
   2075}
   2076
   2077static int btrfsic_process_written_superblock(
   2078		struct btrfsic_state *state,
   2079		struct btrfsic_block *const superblock,
   2080		struct btrfs_super_block *const super_hdr)
   2081{
   2082	struct btrfs_fs_info *fs_info = state->fs_info;
   2083	int pass;
   2084
   2085	superblock->generation = btrfs_super_generation(super_hdr);
   2086	if (!(superblock->generation > state->max_superblock_generation ||
   2087	      0 == state->max_superblock_generation)) {
   2088		if (state->print_mask & BTRFSIC_PRINT_MASK_SUPERBLOCK_WRITE)
   2089			pr_info(
   2090	"btrfsic: superblock @%llu (%pg/%llu/%d) with old gen %llu <= %llu\n",
   2091			       superblock->logical_bytenr,
   2092			       superblock->dev_state->bdev,
   2093			       superblock->dev_bytenr, superblock->mirror_num,
   2094			       btrfs_super_generation(super_hdr),
   2095			       state->max_superblock_generation);
   2096	} else {
   2097		if (state->print_mask & BTRFSIC_PRINT_MASK_SUPERBLOCK_WRITE)
   2098			pr_info(
   2099	"btrfsic: got new superblock @%llu (%pg/%llu/%d) with new gen %llu > %llu\n",
   2100			       superblock->logical_bytenr,
   2101			       superblock->dev_state->bdev,
   2102			       superblock->dev_bytenr, superblock->mirror_num,
   2103			       btrfs_super_generation(super_hdr),
   2104			       state->max_superblock_generation);
   2105
   2106		state->max_superblock_generation =
   2107		    btrfs_super_generation(super_hdr);
   2108		state->latest_superblock = superblock;
   2109	}
   2110
   2111	for (pass = 0; pass < 3; pass++) {
   2112		int ret;
   2113		u64 next_bytenr;
   2114		struct btrfsic_block *next_block;
   2115		struct btrfsic_block_data_ctx tmp_next_block_ctx;
   2116		struct btrfsic_block_link *l;
   2117		int num_copies;
   2118		int mirror_num;
   2119		const char *additional_string = NULL;
   2120		struct btrfs_disk_key tmp_disk_key = {0};
   2121
   2122		btrfs_set_disk_key_objectid(&tmp_disk_key,
   2123					    BTRFS_ROOT_ITEM_KEY);
   2124		btrfs_set_disk_key_objectid(&tmp_disk_key, 0);
   2125
   2126		switch (pass) {
   2127		case 0:
   2128			btrfs_set_disk_key_objectid(&tmp_disk_key,
   2129						    BTRFS_ROOT_TREE_OBJECTID);
   2130			additional_string = "root ";
   2131			next_bytenr = btrfs_super_root(super_hdr);
   2132			if (state->print_mask &
   2133			    BTRFSIC_PRINT_MASK_ROOT_CHUNK_LOG_TREE_LOCATION)
   2134				pr_info("root@%llu\n", next_bytenr);
   2135			break;
   2136		case 1:
   2137			btrfs_set_disk_key_objectid(&tmp_disk_key,
   2138						    BTRFS_CHUNK_TREE_OBJECTID);
   2139			additional_string = "chunk ";
   2140			next_bytenr = btrfs_super_chunk_root(super_hdr);
   2141			if (state->print_mask &
   2142			    BTRFSIC_PRINT_MASK_ROOT_CHUNK_LOG_TREE_LOCATION)
   2143				pr_info("chunk@%llu\n", next_bytenr);
   2144			break;
   2145		case 2:
   2146			btrfs_set_disk_key_objectid(&tmp_disk_key,
   2147						    BTRFS_TREE_LOG_OBJECTID);
   2148			additional_string = "log ";
   2149			next_bytenr = btrfs_super_log_root(super_hdr);
   2150			if (0 == next_bytenr)
   2151				continue;
   2152			if (state->print_mask &
   2153			    BTRFSIC_PRINT_MASK_ROOT_CHUNK_LOG_TREE_LOCATION)
   2154				pr_info("log@%llu\n", next_bytenr);
   2155			break;
   2156		}
   2157
   2158		num_copies = btrfs_num_copies(fs_info, next_bytenr,
   2159					      BTRFS_SUPER_INFO_SIZE);
   2160		if (state->print_mask & BTRFSIC_PRINT_MASK_NUM_COPIES)
   2161			pr_info("num_copies(log_bytenr=%llu) = %d\n",
   2162			       next_bytenr, num_copies);
   2163		for (mirror_num = 1; mirror_num <= num_copies; mirror_num++) {
   2164			int was_created;
   2165
   2166			if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
   2167				pr_info("btrfsic_process_written_superblock(mirror_num=%d)\n", mirror_num);
   2168			ret = btrfsic_map_block(state, next_bytenr,
   2169						BTRFS_SUPER_INFO_SIZE,
   2170						&tmp_next_block_ctx,
   2171						mirror_num);
   2172			if (ret) {
   2173				pr_info("btrfsic: btrfsic_map_block(@%llu, mirror=%d) failed!\n",
   2174				       next_bytenr, mirror_num);
   2175				return -1;
   2176			}
   2177
   2178			next_block = btrfsic_block_lookup_or_add(
   2179					state,
   2180					&tmp_next_block_ctx,
   2181					additional_string,
   2182					1, 0, 1,
   2183					mirror_num,
   2184					&was_created);
   2185			if (NULL == next_block) {
   2186				btrfsic_release_block_ctx(&tmp_next_block_ctx);
   2187				return -1;
   2188			}
   2189
   2190			next_block->disk_key = tmp_disk_key;
   2191			if (was_created)
   2192				next_block->generation =
   2193				    BTRFSIC_GENERATION_UNKNOWN;
   2194			l = btrfsic_block_link_lookup_or_add(
   2195					state,
   2196					&tmp_next_block_ctx,
   2197					next_block,
   2198					superblock,
   2199					BTRFSIC_GENERATION_UNKNOWN);
   2200			btrfsic_release_block_ctx(&tmp_next_block_ctx);
   2201			if (NULL == l)
   2202				return -1;
   2203		}
   2204	}
   2205
   2206	if (WARN_ON(-1 == btrfsic_check_all_ref_blocks(state, superblock, 0)))
   2207		btrfsic_dump_tree(state);
   2208
   2209	return 0;
   2210}
   2211
   2212static int btrfsic_check_all_ref_blocks(struct btrfsic_state *state,
   2213					struct btrfsic_block *const block,
   2214					int recursion_level)
   2215{
   2216	const struct btrfsic_block_link *l;
   2217	int ret = 0;
   2218
   2219	if (recursion_level >= 3 + BTRFS_MAX_LEVEL) {
   2220		/*
   2221		 * Note that this situation can happen and does not
   2222		 * indicate an error in regular cases. It happens
   2223		 * when disk blocks are freed and later reused.
   2224		 * The check-integrity module is not aware of any
   2225		 * block free operations, it just recognizes block
   2226		 * write operations. Therefore it keeps the linkage
   2227		 * information for a block until a block is
   2228		 * rewritten. This can temporarily cause incorrect
   2229		 * and even circular linkage information. This
   2230		 * causes no harm unless such blocks are referenced
   2231		 * by the most recent super block.
   2232		 */
   2233		if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
   2234			pr_info("btrfsic: abort cyclic linkage (case 1).\n");
   2235
   2236		return ret;
   2237	}
   2238
   2239	/*
   2240	 * This algorithm is recursive because the amount of used stack
   2241	 * space is very small and the max recursion depth is limited.
   2242	 */
   2243	list_for_each_entry(l, &block->ref_to_list, node_ref_to) {
   2244		if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
   2245			pr_info(
   2246		"rl=%d, %c @%llu (%pg/%llu/%d) %u* refers to %c @%llu (%pg/%llu/%d)\n",
   2247			       recursion_level,
   2248			       btrfsic_get_block_type(state, block),
   2249			       block->logical_bytenr, block->dev_state->bdev,
   2250			       block->dev_bytenr, block->mirror_num,
   2251			       l->ref_cnt,
   2252			       btrfsic_get_block_type(state, l->block_ref_to),
   2253			       l->block_ref_to->logical_bytenr,
   2254			       l->block_ref_to->dev_state->bdev,
   2255			       l->block_ref_to->dev_bytenr,
   2256			       l->block_ref_to->mirror_num);
   2257		if (l->block_ref_to->never_written) {
   2258			pr_info(
   2259"btrfs: attempt to write superblock which references block %c @%llu (%pg/%llu/%d) which is never written!\n",
   2260			       btrfsic_get_block_type(state, l->block_ref_to),
   2261			       l->block_ref_to->logical_bytenr,
   2262			       l->block_ref_to->dev_state->bdev,
   2263			       l->block_ref_to->dev_bytenr,
   2264			       l->block_ref_to->mirror_num);
   2265			ret = -1;
   2266		} else if (!l->block_ref_to->is_iodone) {
   2267			pr_info(
   2268"btrfs: attempt to write superblock which references block %c @%llu (%pg/%llu/%d) which is not yet iodone!\n",
   2269			       btrfsic_get_block_type(state, l->block_ref_to),
   2270			       l->block_ref_to->logical_bytenr,
   2271			       l->block_ref_to->dev_state->bdev,
   2272			       l->block_ref_to->dev_bytenr,
   2273			       l->block_ref_to->mirror_num);
   2274			ret = -1;
   2275		} else if (l->block_ref_to->iodone_w_error) {
   2276			pr_info(
   2277"btrfs: attempt to write superblock which references block %c @%llu (%pg/%llu/%d) which has write error!\n",
   2278			       btrfsic_get_block_type(state, l->block_ref_to),
   2279			       l->block_ref_to->logical_bytenr,
   2280			       l->block_ref_to->dev_state->bdev,
   2281			       l->block_ref_to->dev_bytenr,
   2282			       l->block_ref_to->mirror_num);
   2283			ret = -1;
   2284		} else if (l->parent_generation !=
   2285			   l->block_ref_to->generation &&
   2286			   BTRFSIC_GENERATION_UNKNOWN !=
   2287			   l->parent_generation &&
   2288			   BTRFSIC_GENERATION_UNKNOWN !=
   2289			   l->block_ref_to->generation) {
   2290			pr_info(
   2291"btrfs: attempt to write superblock which references block %c @%llu (%pg/%llu/%d) with generation %llu != parent generation %llu!\n",
   2292			       btrfsic_get_block_type(state, l->block_ref_to),
   2293			       l->block_ref_to->logical_bytenr,
   2294			       l->block_ref_to->dev_state->bdev,
   2295			       l->block_ref_to->dev_bytenr,
   2296			       l->block_ref_to->mirror_num,
   2297			       l->block_ref_to->generation,
   2298			       l->parent_generation);
   2299			ret = -1;
   2300		} else if (l->block_ref_to->flush_gen >
   2301			   l->block_ref_to->dev_state->last_flush_gen) {
   2302			pr_info(
   2303"btrfs: attempt to write superblock which references block %c @%llu (%pg/%llu/%d) which is not flushed out of disk's write cache (block flush_gen=%llu, dev->flush_gen=%llu)!\n",
   2304			       btrfsic_get_block_type(state, l->block_ref_to),
   2305			       l->block_ref_to->logical_bytenr,
   2306			       l->block_ref_to->dev_state->bdev,
   2307			       l->block_ref_to->dev_bytenr,
   2308			       l->block_ref_to->mirror_num, block->flush_gen,
   2309			       l->block_ref_to->dev_state->last_flush_gen);
   2310			ret = -1;
   2311		} else if (-1 == btrfsic_check_all_ref_blocks(state,
   2312							      l->block_ref_to,
   2313							      recursion_level +
   2314							      1)) {
   2315			ret = -1;
   2316		}
   2317	}
   2318
   2319	return ret;
   2320}
   2321
   2322static int btrfsic_is_block_ref_by_superblock(
   2323		const struct btrfsic_state *state,
   2324		const struct btrfsic_block *block,
   2325		int recursion_level)
   2326{
   2327	const struct btrfsic_block_link *l;
   2328
   2329	if (recursion_level >= 3 + BTRFS_MAX_LEVEL) {
   2330		/* refer to comment at "abort cyclic linkage (case 1)" */
   2331		if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
   2332			pr_info("btrfsic: abort cyclic linkage (case 2).\n");
   2333
   2334		return 0;
   2335	}
   2336
   2337	/*
   2338	 * This algorithm is recursive because the amount of used stack space
   2339	 * is very small and the max recursion depth is limited.
   2340	 */
   2341	list_for_each_entry(l, &block->ref_from_list, node_ref_from) {
   2342		if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
   2343			pr_info(
   2344	"rl=%d, %c @%llu (%pg/%llu/%d) is ref %u* from %c @%llu (%pg/%llu/%d)\n",
   2345			       recursion_level,
   2346			       btrfsic_get_block_type(state, block),
   2347			       block->logical_bytenr, block->dev_state->bdev,
   2348			       block->dev_bytenr, block->mirror_num,
   2349			       l->ref_cnt,
   2350			       btrfsic_get_block_type(state, l->block_ref_from),
   2351			       l->block_ref_from->logical_bytenr,
   2352			       l->block_ref_from->dev_state->bdev,
   2353			       l->block_ref_from->dev_bytenr,
   2354			       l->block_ref_from->mirror_num);
   2355		if (l->block_ref_from->is_superblock &&
   2356		    state->latest_superblock->dev_bytenr ==
   2357		    l->block_ref_from->dev_bytenr &&
   2358		    state->latest_superblock->dev_state->bdev ==
   2359		    l->block_ref_from->dev_state->bdev)
   2360			return 1;
   2361		else if (btrfsic_is_block_ref_by_superblock(state,
   2362							    l->block_ref_from,
   2363							    recursion_level +
   2364							    1))
   2365			return 1;
   2366	}
   2367
   2368	return 0;
   2369}
   2370
   2371static void btrfsic_print_add_link(const struct btrfsic_state *state,
   2372				   const struct btrfsic_block_link *l)
   2373{
   2374	pr_info("add %u* link from %c @%llu (%pg/%llu/%d) to %c @%llu (%pg/%llu/%d)\n",
   2375	       l->ref_cnt,
   2376	       btrfsic_get_block_type(state, l->block_ref_from),
   2377	       l->block_ref_from->logical_bytenr,
   2378	       l->block_ref_from->dev_state->bdev,
   2379	       l->block_ref_from->dev_bytenr, l->block_ref_from->mirror_num,
   2380	       btrfsic_get_block_type(state, l->block_ref_to),
   2381	       l->block_ref_to->logical_bytenr,
   2382	       l->block_ref_to->dev_state->bdev, l->block_ref_to->dev_bytenr,
   2383	       l->block_ref_to->mirror_num);
   2384}
   2385
   2386static void btrfsic_print_rem_link(const struct btrfsic_state *state,
   2387				   const struct btrfsic_block_link *l)
   2388{
   2389	pr_info("rem %u* link from %c @%llu (%pg/%llu/%d) to %c @%llu (%pg/%llu/%d)\n",
   2390	       l->ref_cnt,
   2391	       btrfsic_get_block_type(state, l->block_ref_from),
   2392	       l->block_ref_from->logical_bytenr,
   2393	       l->block_ref_from->dev_state->bdev,
   2394	       l->block_ref_from->dev_bytenr, l->block_ref_from->mirror_num,
   2395	       btrfsic_get_block_type(state, l->block_ref_to),
   2396	       l->block_ref_to->logical_bytenr,
   2397	       l->block_ref_to->dev_state->bdev, l->block_ref_to->dev_bytenr,
   2398	       l->block_ref_to->mirror_num);
   2399}
   2400
   2401static char btrfsic_get_block_type(const struct btrfsic_state *state,
   2402				   const struct btrfsic_block *block)
   2403{
   2404	if (block->is_superblock &&
   2405	    state->latest_superblock->dev_bytenr == block->dev_bytenr &&
   2406	    state->latest_superblock->dev_state->bdev == block->dev_state->bdev)
   2407		return 'S';
   2408	else if (block->is_superblock)
   2409		return 's';
   2410	else if (block->is_metadata)
   2411		return 'M';
   2412	else
   2413		return 'D';
   2414}
   2415
   2416static void btrfsic_dump_tree(const struct btrfsic_state *state)
   2417{
   2418	btrfsic_dump_tree_sub(state, state->latest_superblock, 0);
   2419}
   2420
   2421static void btrfsic_dump_tree_sub(const struct btrfsic_state *state,
   2422				  const struct btrfsic_block *block,
   2423				  int indent_level)
   2424{
   2425	const struct btrfsic_block_link *l;
   2426	int indent_add;
   2427	static char buf[80];
   2428	int cursor_position;
   2429
   2430	/*
   2431	 * Should better fill an on-stack buffer with a complete line and
   2432	 * dump it at once when it is time to print a newline character.
   2433	 */
   2434
   2435	/*
   2436	 * This algorithm is recursive because the amount of used stack space
   2437	 * is very small and the max recursion depth is limited.
   2438	 */
   2439	indent_add = sprintf(buf, "%c-%llu(%pg/%llu/%u)",
   2440			     btrfsic_get_block_type(state, block),
   2441			     block->logical_bytenr, block->dev_state->bdev,
   2442			     block->dev_bytenr, block->mirror_num);
   2443	if (indent_level + indent_add > BTRFSIC_TREE_DUMP_MAX_INDENT_LEVEL) {
   2444		printk("[...]\n");
   2445		return;
   2446	}
   2447	printk(buf);
   2448	indent_level += indent_add;
   2449	if (list_empty(&block->ref_to_list)) {
   2450		printk("\n");
   2451		return;
   2452	}
   2453	if (block->mirror_num > 1 &&
   2454	    !(state->print_mask & BTRFSIC_PRINT_MASK_TREE_WITH_ALL_MIRRORS)) {
   2455		printk(" [...]\n");
   2456		return;
   2457	}
   2458
   2459	cursor_position = indent_level;
   2460	list_for_each_entry(l, &block->ref_to_list, node_ref_to) {
   2461		while (cursor_position < indent_level) {
   2462			printk(" ");
   2463			cursor_position++;
   2464		}
   2465		if (l->ref_cnt > 1)
   2466			indent_add = sprintf(buf, " %d*--> ", l->ref_cnt);
   2467		else
   2468			indent_add = sprintf(buf, " --> ");
   2469		if (indent_level + indent_add >
   2470		    BTRFSIC_TREE_DUMP_MAX_INDENT_LEVEL) {
   2471			printk("[...]\n");
   2472			cursor_position = 0;
   2473			continue;
   2474		}
   2475
   2476		printk(buf);
   2477
   2478		btrfsic_dump_tree_sub(state, l->block_ref_to,
   2479				      indent_level + indent_add);
   2480		cursor_position = 0;
   2481	}
   2482}
   2483
   2484static struct btrfsic_block_link *btrfsic_block_link_lookup_or_add(
   2485		struct btrfsic_state *state,
   2486		struct btrfsic_block_data_ctx *next_block_ctx,
   2487		struct btrfsic_block *next_block,
   2488		struct btrfsic_block *from_block,
   2489		u64 parent_generation)
   2490{
   2491	struct btrfsic_block_link *l;
   2492
   2493	l = btrfsic_block_link_hashtable_lookup(next_block_ctx->dev->bdev,
   2494						next_block_ctx->dev_bytenr,
   2495						from_block->dev_state->bdev,
   2496						from_block->dev_bytenr,
   2497						&state->block_link_hashtable);
   2498	if (NULL == l) {
   2499		l = btrfsic_block_link_alloc();
   2500		if (!l)
   2501			return NULL;
   2502
   2503		l->block_ref_to = next_block;
   2504		l->block_ref_from = from_block;
   2505		l->ref_cnt = 1;
   2506		l->parent_generation = parent_generation;
   2507
   2508		if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
   2509			btrfsic_print_add_link(state, l);
   2510
   2511		list_add(&l->node_ref_to, &from_block->ref_to_list);
   2512		list_add(&l->node_ref_from, &next_block->ref_from_list);
   2513
   2514		btrfsic_block_link_hashtable_add(l,
   2515						 &state->block_link_hashtable);
   2516	} else {
   2517		l->ref_cnt++;
   2518		l->parent_generation = parent_generation;
   2519		if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
   2520			btrfsic_print_add_link(state, l);
   2521	}
   2522
   2523	return l;
   2524}
   2525
   2526static struct btrfsic_block *btrfsic_block_lookup_or_add(
   2527		struct btrfsic_state *state,
   2528		struct btrfsic_block_data_ctx *block_ctx,
   2529		const char *additional_string,
   2530		int is_metadata,
   2531		int is_iodone,
   2532		int never_written,
   2533		int mirror_num,
   2534		int *was_created)
   2535{
   2536	struct btrfsic_block *block;
   2537
   2538	block = btrfsic_block_hashtable_lookup(block_ctx->dev->bdev,
   2539					       block_ctx->dev_bytenr,
   2540					       &state->block_hashtable);
   2541	if (NULL == block) {
   2542		struct btrfsic_dev_state *dev_state;
   2543
   2544		block = btrfsic_block_alloc();
   2545		if (!block)
   2546			return NULL;
   2547
   2548		dev_state = btrfsic_dev_state_lookup(block_ctx->dev->bdev->bd_dev);
   2549		if (NULL == dev_state) {
   2550			pr_info("btrfsic: error, lookup dev_state failed!\n");
   2551			btrfsic_block_free(block);
   2552			return NULL;
   2553		}
   2554		block->dev_state = dev_state;
   2555		block->dev_bytenr = block_ctx->dev_bytenr;
   2556		block->logical_bytenr = block_ctx->start;
   2557		block->is_metadata = is_metadata;
   2558		block->is_iodone = is_iodone;
   2559		block->never_written = never_written;
   2560		block->mirror_num = mirror_num;
   2561		if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
   2562			pr_info("New %s%c-block @%llu (%pg/%llu/%d)\n",
   2563			       additional_string,
   2564			       btrfsic_get_block_type(state, block),
   2565			       block->logical_bytenr, dev_state->bdev,
   2566			       block->dev_bytenr, mirror_num);
   2567		list_add(&block->all_blocks_node, &state->all_blocks_list);
   2568		btrfsic_block_hashtable_add(block, &state->block_hashtable);
   2569		if (NULL != was_created)
   2570			*was_created = 1;
   2571	} else {
   2572		if (NULL != was_created)
   2573			*was_created = 0;
   2574	}
   2575
   2576	return block;
   2577}
   2578
   2579static void btrfsic_cmp_log_and_dev_bytenr(struct btrfsic_state *state,
   2580					   u64 bytenr,
   2581					   struct btrfsic_dev_state *dev_state,
   2582					   u64 dev_bytenr)
   2583{
   2584	struct btrfs_fs_info *fs_info = state->fs_info;
   2585	struct btrfsic_block_data_ctx block_ctx;
   2586	int num_copies;
   2587	int mirror_num;
   2588	int match = 0;
   2589	int ret;
   2590
   2591	num_copies = btrfs_num_copies(fs_info, bytenr, state->metablock_size);
   2592
   2593	for (mirror_num = 1; mirror_num <= num_copies; mirror_num++) {
   2594		ret = btrfsic_map_block(state, bytenr, state->metablock_size,
   2595					&block_ctx, mirror_num);
   2596		if (ret) {
   2597			pr_info("btrfsic: btrfsic_map_block(logical @%llu, mirror %d) failed!\n",
   2598			       bytenr, mirror_num);
   2599			continue;
   2600		}
   2601
   2602		if (dev_state->bdev == block_ctx.dev->bdev &&
   2603		    dev_bytenr == block_ctx.dev_bytenr) {
   2604			match++;
   2605			btrfsic_release_block_ctx(&block_ctx);
   2606			break;
   2607		}
   2608		btrfsic_release_block_ctx(&block_ctx);
   2609	}
   2610
   2611	if (WARN_ON(!match)) {
   2612		pr_info(
   2613"btrfs: attempt to write M-block which contains logical bytenr that doesn't map to dev+physical bytenr of submit_bio, buffer->log_bytenr=%llu, submit_bio(bdev=%pg, phys_bytenr=%llu)!\n",
   2614		       bytenr, dev_state->bdev, dev_bytenr);
   2615		for (mirror_num = 1; mirror_num <= num_copies; mirror_num++) {
   2616			ret = btrfsic_map_block(state, bytenr,
   2617						state->metablock_size,
   2618						&block_ctx, mirror_num);
   2619			if (ret)
   2620				continue;
   2621
   2622			pr_info("read logical bytenr @%llu maps to (%pg/%llu/%d)\n",
   2623			       bytenr, block_ctx.dev->bdev,
   2624			       block_ctx.dev_bytenr, mirror_num);
   2625		}
   2626	}
   2627}
   2628
   2629static struct btrfsic_dev_state *btrfsic_dev_state_lookup(dev_t dev)
   2630{
   2631	return btrfsic_dev_state_hashtable_lookup(dev,
   2632						  &btrfsic_dev_state_hashtable);
   2633}
   2634
   2635static void btrfsic_check_write_bio(struct bio *bio, struct btrfsic_dev_state *dev_state)
   2636{
   2637	unsigned int segs = bio_segments(bio);
   2638	u64 dev_bytenr = 512 * bio->bi_iter.bi_sector;
   2639	u64 cur_bytenr = dev_bytenr;
   2640	struct bvec_iter iter;
   2641	struct bio_vec bvec;
   2642	char **mapped_datav;
   2643	int bio_is_patched = 0;
   2644	int i = 0;
   2645
   2646	if (dev_state->state->print_mask & BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH)
   2647		pr_info(
   2648"submit_bio(rw=%d,0x%x, bi_vcnt=%u, bi_sector=%llu (bytenr %llu), bi_bdev=%p)\n",
   2649		       bio_op(bio), bio->bi_opf, segs,
   2650		       bio->bi_iter.bi_sector, dev_bytenr, bio->bi_bdev);
   2651
   2652	mapped_datav = kmalloc_array(segs, sizeof(*mapped_datav), GFP_NOFS);
   2653	if (!mapped_datav)
   2654		return;
   2655
   2656	bio_for_each_segment(bvec, bio, iter) {
   2657		BUG_ON(bvec.bv_len != PAGE_SIZE);
   2658		mapped_datav[i] = page_address(bvec.bv_page);
   2659		i++;
   2660
   2661		if (dev_state->state->print_mask &
   2662		    BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH_VERBOSE)
   2663			pr_info("#%u: bytenr=%llu, len=%u, offset=%u\n",
   2664			       i, cur_bytenr, bvec.bv_len, bvec.bv_offset);
   2665		cur_bytenr += bvec.bv_len;
   2666	}
   2667
   2668	btrfsic_process_written_block(dev_state, dev_bytenr, mapped_datav, segs,
   2669				      bio, &bio_is_patched, bio->bi_opf);
   2670	kfree(mapped_datav);
   2671}
   2672
   2673static void btrfsic_check_flush_bio(struct bio *bio, struct btrfsic_dev_state *dev_state)
   2674{
   2675	if (dev_state->state->print_mask & BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH)
   2676		pr_info("submit_bio(rw=%d,0x%x FLUSH, bdev=%p)\n",
   2677		       bio_op(bio), bio->bi_opf, bio->bi_bdev);
   2678
   2679	if (dev_state->dummy_block_for_bio_bh_flush.is_iodone) {
   2680		struct btrfsic_block *const block =
   2681			&dev_state->dummy_block_for_bio_bh_flush;
   2682
   2683		block->is_iodone = 0;
   2684		block->never_written = 0;
   2685		block->iodone_w_error = 0;
   2686		block->flush_gen = dev_state->last_flush_gen + 1;
   2687		block->submit_bio_bh_rw = bio->bi_opf;
   2688		block->orig_bio_private = bio->bi_private;
   2689		block->orig_bio_end_io = bio->bi_end_io;
   2690		block->next_in_same_bio = NULL;
   2691		bio->bi_private = block;
   2692		bio->bi_end_io = btrfsic_bio_end_io;
   2693	} else if ((dev_state->state->print_mask &
   2694		   (BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH |
   2695		    BTRFSIC_PRINT_MASK_VERBOSE))) {
   2696		pr_info(
   2697"btrfsic_submit_bio(%pg) with FLUSH but dummy block already in use (ignored)!\n",
   2698		       dev_state->bdev);
   2699	}
   2700}
   2701
   2702void btrfsic_check_bio(struct bio *bio)
   2703{
   2704	struct btrfsic_dev_state *dev_state;
   2705
   2706	if (!btrfsic_is_initialized)
   2707		return;
   2708
   2709	/*
   2710	 * We can be called before btrfsic_mount, so there might not be a
   2711	 * dev_state.
   2712	 */
   2713	dev_state = btrfsic_dev_state_lookup(bio->bi_bdev->bd_dev);
   2714	mutex_lock(&btrfsic_mutex);
   2715	if (dev_state) {
   2716		if (bio_op(bio) == REQ_OP_WRITE && bio_has_data(bio))
   2717			btrfsic_check_write_bio(bio, dev_state);
   2718		else if (bio->bi_opf & REQ_PREFLUSH)
   2719			btrfsic_check_flush_bio(bio, dev_state);
   2720	}
   2721	mutex_unlock(&btrfsic_mutex);
   2722}
   2723
   2724int btrfsic_mount(struct btrfs_fs_info *fs_info,
   2725		  struct btrfs_fs_devices *fs_devices,
   2726		  int including_extent_data, u32 print_mask)
   2727{
   2728	int ret;
   2729	struct btrfsic_state *state;
   2730	struct list_head *dev_head = &fs_devices->devices;
   2731	struct btrfs_device *device;
   2732
   2733	if (!PAGE_ALIGNED(fs_info->nodesize)) {
   2734		pr_info("btrfsic: cannot handle nodesize %d not being a multiple of PAGE_SIZE %ld!\n",
   2735		       fs_info->nodesize, PAGE_SIZE);
   2736		return -1;
   2737	}
   2738	if (!PAGE_ALIGNED(fs_info->sectorsize)) {
   2739		pr_info("btrfsic: cannot handle sectorsize %d not being a multiple of PAGE_SIZE %ld!\n",
   2740		       fs_info->sectorsize, PAGE_SIZE);
   2741		return -1;
   2742	}
   2743	state = kvzalloc(sizeof(*state), GFP_KERNEL);
   2744	if (!state)
   2745		return -ENOMEM;
   2746
   2747	if (!btrfsic_is_initialized) {
   2748		mutex_init(&btrfsic_mutex);
   2749		btrfsic_dev_state_hashtable_init(&btrfsic_dev_state_hashtable);
   2750		btrfsic_is_initialized = 1;
   2751	}
   2752	mutex_lock(&btrfsic_mutex);
   2753	state->fs_info = fs_info;
   2754	state->print_mask = print_mask;
   2755	state->include_extent_data = including_extent_data;
   2756	state->metablock_size = fs_info->nodesize;
   2757	state->datablock_size = fs_info->sectorsize;
   2758	INIT_LIST_HEAD(&state->all_blocks_list);
   2759	btrfsic_block_hashtable_init(&state->block_hashtable);
   2760	btrfsic_block_link_hashtable_init(&state->block_link_hashtable);
   2761	state->max_superblock_generation = 0;
   2762	state->latest_superblock = NULL;
   2763
   2764	list_for_each_entry(device, dev_head, dev_list) {
   2765		struct btrfsic_dev_state *ds;
   2766
   2767		if (!device->bdev || !device->name)
   2768			continue;
   2769
   2770		ds = btrfsic_dev_state_alloc();
   2771		if (NULL == ds) {
   2772			mutex_unlock(&btrfsic_mutex);
   2773			return -ENOMEM;
   2774		}
   2775		ds->bdev = device->bdev;
   2776		ds->state = state;
   2777		btrfsic_dev_state_hashtable_add(ds,
   2778						&btrfsic_dev_state_hashtable);
   2779	}
   2780
   2781	ret = btrfsic_process_superblock(state, fs_devices);
   2782	if (0 != ret) {
   2783		mutex_unlock(&btrfsic_mutex);
   2784		btrfsic_unmount(fs_devices);
   2785		return ret;
   2786	}
   2787
   2788	if (state->print_mask & BTRFSIC_PRINT_MASK_INITIAL_DATABASE)
   2789		btrfsic_dump_database(state);
   2790	if (state->print_mask & BTRFSIC_PRINT_MASK_INITIAL_TREE)
   2791		btrfsic_dump_tree(state);
   2792
   2793	mutex_unlock(&btrfsic_mutex);
   2794	return 0;
   2795}
   2796
   2797void btrfsic_unmount(struct btrfs_fs_devices *fs_devices)
   2798{
   2799	struct btrfsic_block *b_all, *tmp_all;
   2800	struct btrfsic_state *state;
   2801	struct list_head *dev_head = &fs_devices->devices;
   2802	struct btrfs_device *device;
   2803
   2804	if (!btrfsic_is_initialized)
   2805		return;
   2806
   2807	mutex_lock(&btrfsic_mutex);
   2808
   2809	state = NULL;
   2810	list_for_each_entry(device, dev_head, dev_list) {
   2811		struct btrfsic_dev_state *ds;
   2812
   2813		if (!device->bdev || !device->name)
   2814			continue;
   2815
   2816		ds = btrfsic_dev_state_hashtable_lookup(
   2817				device->bdev->bd_dev,
   2818				&btrfsic_dev_state_hashtable);
   2819		if (NULL != ds) {
   2820			state = ds->state;
   2821			btrfsic_dev_state_hashtable_remove(ds);
   2822			btrfsic_dev_state_free(ds);
   2823		}
   2824	}
   2825
   2826	if (NULL == state) {
   2827		pr_info("btrfsic: error, cannot find state information on umount!\n");
   2828		mutex_unlock(&btrfsic_mutex);
   2829		return;
   2830	}
   2831
   2832	/*
   2833	 * Don't care about keeping the lists' state up to date,
   2834	 * just free all memory that was allocated dynamically.
   2835	 * Free the blocks and the block_links.
   2836	 */
   2837	list_for_each_entry_safe(b_all, tmp_all, &state->all_blocks_list,
   2838				 all_blocks_node) {
   2839		struct btrfsic_block_link *l, *tmp;
   2840
   2841		list_for_each_entry_safe(l, tmp, &b_all->ref_to_list,
   2842					 node_ref_to) {
   2843			if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
   2844				btrfsic_print_rem_link(state, l);
   2845
   2846			l->ref_cnt--;
   2847			if (0 == l->ref_cnt)
   2848				btrfsic_block_link_free(l);
   2849		}
   2850
   2851		if (b_all->is_iodone || b_all->never_written)
   2852			btrfsic_block_free(b_all);
   2853		else
   2854			pr_info(
   2855"btrfs: attempt to free %c-block @%llu (%pg/%llu/%d) on umount which is not yet iodone!\n",
   2856			       btrfsic_get_block_type(state, b_all),
   2857			       b_all->logical_bytenr, b_all->dev_state->bdev,
   2858			       b_all->dev_bytenr, b_all->mirror_num);
   2859	}
   2860
   2861	mutex_unlock(&btrfsic_mutex);
   2862
   2863	kvfree(state);
   2864}