cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

dm-clone-metadata.c (24530B)


      1// SPDX-License-Identifier: GPL-2.0-only
      2/*
      3 * Copyright (C) 2019 Arrikto, Inc. All Rights Reserved.
      4 */
      5
      6#include <linux/mm.h>
      7#include <linux/err.h>
      8#include <linux/slab.h>
      9#include <linux/rwsem.h>
     10#include <linux/bitops.h>
     11#include <linux/bitmap.h>
     12#include <linux/device-mapper.h>
     13
     14#include "persistent-data/dm-bitset.h"
     15#include "persistent-data/dm-space-map.h"
     16#include "persistent-data/dm-block-manager.h"
     17#include "persistent-data/dm-transaction-manager.h"
     18
     19#include "dm-clone-metadata.h"
     20
     21#define DM_MSG_PREFIX "clone metadata"
     22
     23#define SUPERBLOCK_LOCATION 0
     24#define SUPERBLOCK_MAGIC 0x8af27f64
     25#define SUPERBLOCK_CSUM_XOR 257649492
     26
     27#define DM_CLONE_MAX_CONCURRENT_LOCKS 5
     28
     29#define UUID_LEN 16
     30
     31/* Min and max dm-clone metadata versions supported */
     32#define DM_CLONE_MIN_METADATA_VERSION 1
     33#define DM_CLONE_MAX_METADATA_VERSION 1
     34
     35/*
     36 * On-disk metadata layout
     37 */
     38struct superblock_disk {
     39	__le32 csum;
     40	__le32 flags;
     41	__le64 blocknr;
     42
     43	__u8 uuid[UUID_LEN];
     44	__le64 magic;
     45	__le32 version;
     46
     47	__u8 metadata_space_map_root[SPACE_MAP_ROOT_SIZE];
     48
     49	__le64 region_size;
     50	__le64 target_size;
     51
     52	__le64 bitset_root;
     53} __packed;
     54
     55/*
     56 * Region and Dirty bitmaps.
     57 *
     58 * dm-clone logically splits the source and destination devices in regions of
     59 * fixed size. The destination device's regions are gradually hydrated, i.e.,
     60 * we copy (clone) the source's regions to the destination device. Eventually,
     61 * all regions will get hydrated and all I/O will be served from the
     62 * destination device.
     63 *
     64 * We maintain an on-disk bitmap which tracks the state of each of the
     65 * destination device's regions, i.e., whether they are hydrated or not.
     66 *
     67 * To save constantly doing look ups on disk we keep an in core copy of the
     68 * on-disk bitmap, the region_map.
     69 *
     70 * In order to track which regions are hydrated during a metadata transaction,
     71 * we use a second set of bitmaps, the dmap (dirty bitmap), which includes two
     72 * bitmaps, namely dirty_regions and dirty_words. The dirty_regions bitmap
     73 * tracks the regions that got hydrated during the current metadata
     74 * transaction. The dirty_words bitmap tracks the dirty words, i.e. longs, of
     75 * the dirty_regions bitmap.
     76 *
     77 * This allows us to precisely track the regions that were hydrated during the
     78 * current metadata transaction and update the metadata accordingly, when we
     79 * commit the current transaction. This is important because dm-clone should
     80 * only commit the metadata of regions that were properly flushed to the
     81 * destination device beforehand. Otherwise, in case of a crash, we could end
     82 * up with a corrupted dm-clone device.
     83 *
     84 * When a region finishes hydrating dm-clone calls
     85 * dm_clone_set_region_hydrated(), or for discard requests
     86 * dm_clone_cond_set_range(), which sets the corresponding bits in region_map
     87 * and dmap.
     88 *
     89 * During a metadata commit we scan dmap->dirty_words and dmap->dirty_regions
     90 * and update the on-disk metadata accordingly. Thus, we don't have to flush to
     91 * disk the whole region_map. We can just flush the dirty region_map bits.
     92 *
     93 * We use the helper dmap->dirty_words bitmap, which is smaller than the
     94 * original region_map, to reduce the amount of memory accesses during a
     95 * metadata commit. Moreover, as dm-bitset also accesses the on-disk bitmap in
     96 * 64-bit word granularity, the dirty_words bitmap helps us avoid useless disk
     97 * accesses.
     98 *
     99 * We could update directly the on-disk bitmap, when dm-clone calls either
    100 * dm_clone_set_region_hydrated() or dm_clone_cond_set_range(), buts this
    101 * inserts significant metadata I/O overhead in dm-clone's I/O path. Also, as
    102 * these two functions don't block, we can call them in interrupt context,
    103 * e.g., in a hooked overwrite bio's completion routine, and further reduce the
    104 * I/O completion latency.
    105 *
    106 * We maintain two dirty bitmap sets. During a metadata commit we atomically
    107 * swap the currently used dmap with the unused one. This allows the metadata
    108 * update functions to run concurrently with an ongoing commit.
    109 */
    110struct dirty_map {
    111	unsigned long *dirty_words;
    112	unsigned long *dirty_regions;
    113	unsigned int changed;
    114};
    115
    116struct dm_clone_metadata {
    117	/* The metadata block device */
    118	struct block_device *bdev;
    119
    120	sector_t target_size;
    121	sector_t region_size;
    122	unsigned long nr_regions;
    123	unsigned long nr_words;
    124
    125	/* Spinlock protecting the region and dirty bitmaps. */
    126	spinlock_t bitmap_lock;
    127	struct dirty_map dmap[2];
    128	struct dirty_map *current_dmap;
    129
    130	/* Protected by lock */
    131	struct dirty_map *committing_dmap;
    132
    133	/*
    134	 * In core copy of the on-disk bitmap to save constantly doing look ups
    135	 * on disk.
    136	 */
    137	unsigned long *region_map;
    138
    139	/* Protected by bitmap_lock */
    140	unsigned int read_only;
    141
    142	struct dm_block_manager *bm;
    143	struct dm_space_map *sm;
    144	struct dm_transaction_manager *tm;
    145
    146	struct rw_semaphore lock;
    147
    148	struct dm_disk_bitset bitset_info;
    149	dm_block_t bitset_root;
    150
    151	/*
    152	 * Reading the space map root can fail, so we read it into this
    153	 * buffer before the superblock is locked and updated.
    154	 */
    155	__u8 metadata_space_map_root[SPACE_MAP_ROOT_SIZE];
    156
    157	bool hydration_done:1;
    158	bool fail_io:1;
    159};
    160
    161/*---------------------------------------------------------------------------*/
    162
    163/*
    164 * Superblock validation.
    165 */
    166static void sb_prepare_for_write(struct dm_block_validator *v,
    167				 struct dm_block *b, size_t sb_block_size)
    168{
    169	struct superblock_disk *sb;
    170	u32 csum;
    171
    172	sb = dm_block_data(b);
    173	sb->blocknr = cpu_to_le64(dm_block_location(b));
    174
    175	csum = dm_bm_checksum(&sb->flags, sb_block_size - sizeof(__le32),
    176			      SUPERBLOCK_CSUM_XOR);
    177	sb->csum = cpu_to_le32(csum);
    178}
    179
    180static int sb_check(struct dm_block_validator *v, struct dm_block *b,
    181		    size_t sb_block_size)
    182{
    183	struct superblock_disk *sb;
    184	u32 csum, metadata_version;
    185
    186	sb = dm_block_data(b);
    187
    188	if (dm_block_location(b) != le64_to_cpu(sb->blocknr)) {
    189		DMERR("Superblock check failed: blocknr %llu, expected %llu",
    190		      le64_to_cpu(sb->blocknr),
    191		      (unsigned long long)dm_block_location(b));
    192		return -ENOTBLK;
    193	}
    194
    195	if (le64_to_cpu(sb->magic) != SUPERBLOCK_MAGIC) {
    196		DMERR("Superblock check failed: magic %llu, expected %llu",
    197		      le64_to_cpu(sb->magic),
    198		      (unsigned long long)SUPERBLOCK_MAGIC);
    199		return -EILSEQ;
    200	}
    201
    202	csum = dm_bm_checksum(&sb->flags, sb_block_size - sizeof(__le32),
    203			      SUPERBLOCK_CSUM_XOR);
    204	if (sb->csum != cpu_to_le32(csum)) {
    205		DMERR("Superblock check failed: checksum %u, expected %u",
    206		      csum, le32_to_cpu(sb->csum));
    207		return -EILSEQ;
    208	}
    209
    210	/* Check metadata version */
    211	metadata_version = le32_to_cpu(sb->version);
    212	if (metadata_version < DM_CLONE_MIN_METADATA_VERSION ||
    213	    metadata_version > DM_CLONE_MAX_METADATA_VERSION) {
    214		DMERR("Clone metadata version %u found, but only versions between %u and %u supported.",
    215		      metadata_version, DM_CLONE_MIN_METADATA_VERSION,
    216		      DM_CLONE_MAX_METADATA_VERSION);
    217		return -EINVAL;
    218	}
    219
    220	return 0;
    221}
    222
    223static struct dm_block_validator sb_validator = {
    224	.name = "superblock",
    225	.prepare_for_write = sb_prepare_for_write,
    226	.check = sb_check
    227};
    228
    229/*
    230 * Check if the superblock is formatted or not. We consider the superblock to
    231 * be formatted in case we find non-zero bytes in it.
    232 */
    233static int __superblock_all_zeroes(struct dm_block_manager *bm, bool *formatted)
    234{
    235	int r;
    236	unsigned int i, nr_words;
    237	struct dm_block *sblock;
    238	__le64 *data_le, zero = cpu_to_le64(0);
    239
    240	/*
    241	 * We don't use a validator here because the superblock could be all
    242	 * zeroes.
    243	 */
    244	r = dm_bm_read_lock(bm, SUPERBLOCK_LOCATION, NULL, &sblock);
    245	if (r) {
    246		DMERR("Failed to read_lock superblock");
    247		return r;
    248	}
    249
    250	data_le = dm_block_data(sblock);
    251	*formatted = false;
    252
    253	/* This assumes that the block size is a multiple of 8 bytes */
    254	BUG_ON(dm_bm_block_size(bm) % sizeof(__le64));
    255	nr_words = dm_bm_block_size(bm) / sizeof(__le64);
    256	for (i = 0; i < nr_words; i++) {
    257		if (data_le[i] != zero) {
    258			*formatted = true;
    259			break;
    260		}
    261	}
    262
    263	dm_bm_unlock(sblock);
    264
    265	return 0;
    266}
    267
    268/*---------------------------------------------------------------------------*/
    269
    270/*
    271 * Low-level metadata handling.
    272 */
    273static inline int superblock_read_lock(struct dm_clone_metadata *cmd,
    274				       struct dm_block **sblock)
    275{
    276	return dm_bm_read_lock(cmd->bm, SUPERBLOCK_LOCATION, &sb_validator, sblock);
    277}
    278
    279static inline int superblock_write_lock_zero(struct dm_clone_metadata *cmd,
    280					     struct dm_block **sblock)
    281{
    282	return dm_bm_write_lock_zero(cmd->bm, SUPERBLOCK_LOCATION, &sb_validator, sblock);
    283}
    284
    285static int __copy_sm_root(struct dm_clone_metadata *cmd)
    286{
    287	int r;
    288	size_t root_size;
    289
    290	r = dm_sm_root_size(cmd->sm, &root_size);
    291	if (r)
    292		return r;
    293
    294	return dm_sm_copy_root(cmd->sm, &cmd->metadata_space_map_root, root_size);
    295}
    296
    297/* Save dm-clone metadata in superblock */
    298static void __prepare_superblock(struct dm_clone_metadata *cmd,
    299				 struct superblock_disk *sb)
    300{
    301	sb->flags = cpu_to_le32(0UL);
    302
    303	/* FIXME: UUID is currently unused */
    304	memset(sb->uuid, 0, sizeof(sb->uuid));
    305
    306	sb->magic = cpu_to_le64(SUPERBLOCK_MAGIC);
    307	sb->version = cpu_to_le32(DM_CLONE_MAX_METADATA_VERSION);
    308
    309	/* Save the metadata space_map root */
    310	memcpy(&sb->metadata_space_map_root, &cmd->metadata_space_map_root,
    311	       sizeof(cmd->metadata_space_map_root));
    312
    313	sb->region_size = cpu_to_le64(cmd->region_size);
    314	sb->target_size = cpu_to_le64(cmd->target_size);
    315	sb->bitset_root = cpu_to_le64(cmd->bitset_root);
    316}
    317
    318static int __open_metadata(struct dm_clone_metadata *cmd)
    319{
    320	int r;
    321	struct dm_block *sblock;
    322	struct superblock_disk *sb;
    323
    324	r = superblock_read_lock(cmd, &sblock);
    325
    326	if (r) {
    327		DMERR("Failed to read_lock superblock");
    328		return r;
    329	}
    330
    331	sb = dm_block_data(sblock);
    332
    333	/* Verify that target_size and region_size haven't changed. */
    334	if (cmd->region_size != le64_to_cpu(sb->region_size) ||
    335	    cmd->target_size != le64_to_cpu(sb->target_size)) {
    336		DMERR("Region and/or target size don't match the ones in metadata");
    337		r = -EINVAL;
    338		goto out_with_lock;
    339	}
    340
    341	r = dm_tm_open_with_sm(cmd->bm, SUPERBLOCK_LOCATION,
    342			       sb->metadata_space_map_root,
    343			       sizeof(sb->metadata_space_map_root),
    344			       &cmd->tm, &cmd->sm);
    345
    346	if (r) {
    347		DMERR("dm_tm_open_with_sm failed");
    348		goto out_with_lock;
    349	}
    350
    351	dm_disk_bitset_init(cmd->tm, &cmd->bitset_info);
    352	cmd->bitset_root = le64_to_cpu(sb->bitset_root);
    353
    354out_with_lock:
    355	dm_bm_unlock(sblock);
    356
    357	return r;
    358}
    359
    360static int __format_metadata(struct dm_clone_metadata *cmd)
    361{
    362	int r;
    363	struct dm_block *sblock;
    364	struct superblock_disk *sb;
    365
    366	r = dm_tm_create_with_sm(cmd->bm, SUPERBLOCK_LOCATION, &cmd->tm, &cmd->sm);
    367	if (r) {
    368		DMERR("Failed to create transaction manager");
    369		return r;
    370	}
    371
    372	dm_disk_bitset_init(cmd->tm, &cmd->bitset_info);
    373
    374	r = dm_bitset_empty(&cmd->bitset_info, &cmd->bitset_root);
    375	if (r) {
    376		DMERR("Failed to create empty on-disk bitset");
    377		goto err_with_tm;
    378	}
    379
    380	r = dm_bitset_resize(&cmd->bitset_info, cmd->bitset_root, 0,
    381			     cmd->nr_regions, false, &cmd->bitset_root);
    382	if (r) {
    383		DMERR("Failed to resize on-disk bitset to %lu entries", cmd->nr_regions);
    384		goto err_with_tm;
    385	}
    386
    387	/* Flush to disk all blocks, except the superblock */
    388	r = dm_tm_pre_commit(cmd->tm);
    389	if (r) {
    390		DMERR("dm_tm_pre_commit failed");
    391		goto err_with_tm;
    392	}
    393
    394	r = __copy_sm_root(cmd);
    395	if (r) {
    396		DMERR("__copy_sm_root failed");
    397		goto err_with_tm;
    398	}
    399
    400	r = superblock_write_lock_zero(cmd, &sblock);
    401	if (r) {
    402		DMERR("Failed to write_lock superblock");
    403		goto err_with_tm;
    404	}
    405
    406	sb = dm_block_data(sblock);
    407	__prepare_superblock(cmd, sb);
    408	r = dm_tm_commit(cmd->tm, sblock);
    409	if (r) {
    410		DMERR("Failed to commit superblock");
    411		goto err_with_tm;
    412	}
    413
    414	return 0;
    415
    416err_with_tm:
    417	dm_sm_destroy(cmd->sm);
    418	dm_tm_destroy(cmd->tm);
    419
    420	return r;
    421}
    422
    423static int __open_or_format_metadata(struct dm_clone_metadata *cmd, bool may_format_device)
    424{
    425	int r;
    426	bool formatted = false;
    427
    428	r = __superblock_all_zeroes(cmd->bm, &formatted);
    429	if (r)
    430		return r;
    431
    432	if (!formatted)
    433		return may_format_device ? __format_metadata(cmd) : -EPERM;
    434
    435	return __open_metadata(cmd);
    436}
    437
    438static int __create_persistent_data_structures(struct dm_clone_metadata *cmd,
    439					       bool may_format_device)
    440{
    441	int r;
    442
    443	/* Create block manager */
    444	cmd->bm = dm_block_manager_create(cmd->bdev,
    445					 DM_CLONE_METADATA_BLOCK_SIZE << SECTOR_SHIFT,
    446					 DM_CLONE_MAX_CONCURRENT_LOCKS);
    447	if (IS_ERR(cmd->bm)) {
    448		DMERR("Failed to create block manager");
    449		return PTR_ERR(cmd->bm);
    450	}
    451
    452	r = __open_or_format_metadata(cmd, may_format_device);
    453	if (r)
    454		dm_block_manager_destroy(cmd->bm);
    455
    456	return r;
    457}
    458
    459static void __destroy_persistent_data_structures(struct dm_clone_metadata *cmd)
    460{
    461	dm_sm_destroy(cmd->sm);
    462	dm_tm_destroy(cmd->tm);
    463	dm_block_manager_destroy(cmd->bm);
    464}
    465
    466/*---------------------------------------------------------------------------*/
    467
    468static size_t bitmap_size(unsigned long nr_bits)
    469{
    470	return BITS_TO_LONGS(nr_bits) * sizeof(long);
    471}
    472
    473static int __dirty_map_init(struct dirty_map *dmap, unsigned long nr_words,
    474			    unsigned long nr_regions)
    475{
    476	dmap->changed = 0;
    477
    478	dmap->dirty_words = kvzalloc(bitmap_size(nr_words), GFP_KERNEL);
    479	if (!dmap->dirty_words)
    480		return -ENOMEM;
    481
    482	dmap->dirty_regions = kvzalloc(bitmap_size(nr_regions), GFP_KERNEL);
    483	if (!dmap->dirty_regions) {
    484		kvfree(dmap->dirty_words);
    485		return -ENOMEM;
    486	}
    487
    488	return 0;
    489}
    490
    491static void __dirty_map_exit(struct dirty_map *dmap)
    492{
    493	kvfree(dmap->dirty_words);
    494	kvfree(dmap->dirty_regions);
    495}
    496
    497static int dirty_map_init(struct dm_clone_metadata *cmd)
    498{
    499	if (__dirty_map_init(&cmd->dmap[0], cmd->nr_words, cmd->nr_regions)) {
    500		DMERR("Failed to allocate dirty bitmap");
    501		return -ENOMEM;
    502	}
    503
    504	if (__dirty_map_init(&cmd->dmap[1], cmd->nr_words, cmd->nr_regions)) {
    505		DMERR("Failed to allocate dirty bitmap");
    506		__dirty_map_exit(&cmd->dmap[0]);
    507		return -ENOMEM;
    508	}
    509
    510	cmd->current_dmap = &cmd->dmap[0];
    511	cmd->committing_dmap = NULL;
    512
    513	return 0;
    514}
    515
    516static void dirty_map_exit(struct dm_clone_metadata *cmd)
    517{
    518	__dirty_map_exit(&cmd->dmap[0]);
    519	__dirty_map_exit(&cmd->dmap[1]);
    520}
    521
    522static int __load_bitset_in_core(struct dm_clone_metadata *cmd)
    523{
    524	int r;
    525	unsigned long i;
    526	struct dm_bitset_cursor c;
    527
    528	/* Flush bitset cache */
    529	r = dm_bitset_flush(&cmd->bitset_info, cmd->bitset_root, &cmd->bitset_root);
    530	if (r)
    531		return r;
    532
    533	r = dm_bitset_cursor_begin(&cmd->bitset_info, cmd->bitset_root, cmd->nr_regions, &c);
    534	if (r)
    535		return r;
    536
    537	for (i = 0; ; i++) {
    538		if (dm_bitset_cursor_get_value(&c))
    539			__set_bit(i, cmd->region_map);
    540		else
    541			__clear_bit(i, cmd->region_map);
    542
    543		if (i >= (cmd->nr_regions - 1))
    544			break;
    545
    546		r = dm_bitset_cursor_next(&c);
    547
    548		if (r)
    549			break;
    550	}
    551
    552	dm_bitset_cursor_end(&c);
    553
    554	return r;
    555}
    556
    557struct dm_clone_metadata *dm_clone_metadata_open(struct block_device *bdev,
    558						 sector_t target_size,
    559						 sector_t region_size)
    560{
    561	int r;
    562	struct dm_clone_metadata *cmd;
    563
    564	cmd = kzalloc(sizeof(*cmd), GFP_KERNEL);
    565	if (!cmd) {
    566		DMERR("Failed to allocate memory for dm-clone metadata");
    567		return ERR_PTR(-ENOMEM);
    568	}
    569
    570	cmd->bdev = bdev;
    571	cmd->target_size = target_size;
    572	cmd->region_size = region_size;
    573	cmd->nr_regions = dm_sector_div_up(cmd->target_size, cmd->region_size);
    574	cmd->nr_words = BITS_TO_LONGS(cmd->nr_regions);
    575
    576	init_rwsem(&cmd->lock);
    577	spin_lock_init(&cmd->bitmap_lock);
    578	cmd->read_only = 0;
    579	cmd->fail_io = false;
    580	cmd->hydration_done = false;
    581
    582	cmd->region_map = kvmalloc(bitmap_size(cmd->nr_regions), GFP_KERNEL);
    583	if (!cmd->region_map) {
    584		DMERR("Failed to allocate memory for region bitmap");
    585		r = -ENOMEM;
    586		goto out_with_md;
    587	}
    588
    589	r = __create_persistent_data_structures(cmd, true);
    590	if (r)
    591		goto out_with_region_map;
    592
    593	r = __load_bitset_in_core(cmd);
    594	if (r) {
    595		DMERR("Failed to load on-disk region map");
    596		goto out_with_pds;
    597	}
    598
    599	r = dirty_map_init(cmd);
    600	if (r)
    601		goto out_with_pds;
    602
    603	if (bitmap_full(cmd->region_map, cmd->nr_regions))
    604		cmd->hydration_done = true;
    605
    606	return cmd;
    607
    608out_with_pds:
    609	__destroy_persistent_data_structures(cmd);
    610
    611out_with_region_map:
    612	kvfree(cmd->region_map);
    613
    614out_with_md:
    615	kfree(cmd);
    616
    617	return ERR_PTR(r);
    618}
    619
    620void dm_clone_metadata_close(struct dm_clone_metadata *cmd)
    621{
    622	if (!cmd->fail_io)
    623		__destroy_persistent_data_structures(cmd);
    624
    625	dirty_map_exit(cmd);
    626	kvfree(cmd->region_map);
    627	kfree(cmd);
    628}
    629
    630bool dm_clone_is_hydration_done(struct dm_clone_metadata *cmd)
    631{
    632	return cmd->hydration_done;
    633}
    634
    635bool dm_clone_is_region_hydrated(struct dm_clone_metadata *cmd, unsigned long region_nr)
    636{
    637	return dm_clone_is_hydration_done(cmd) || test_bit(region_nr, cmd->region_map);
    638}
    639
    640bool dm_clone_is_range_hydrated(struct dm_clone_metadata *cmd,
    641				unsigned long start, unsigned long nr_regions)
    642{
    643	unsigned long bit;
    644
    645	if (dm_clone_is_hydration_done(cmd))
    646		return true;
    647
    648	bit = find_next_zero_bit(cmd->region_map, cmd->nr_regions, start);
    649
    650	return (bit >= (start + nr_regions));
    651}
    652
    653unsigned int dm_clone_nr_of_hydrated_regions(struct dm_clone_metadata *cmd)
    654{
    655	return bitmap_weight(cmd->region_map, cmd->nr_regions);
    656}
    657
    658unsigned long dm_clone_find_next_unhydrated_region(struct dm_clone_metadata *cmd,
    659						   unsigned long start)
    660{
    661	return find_next_zero_bit(cmd->region_map, cmd->nr_regions, start);
    662}
    663
    664static int __update_metadata_word(struct dm_clone_metadata *cmd,
    665				  unsigned long *dirty_regions,
    666				  unsigned long word)
    667{
    668	int r;
    669	unsigned long index = word * BITS_PER_LONG;
    670	unsigned long max_index = min(cmd->nr_regions, (word + 1) * BITS_PER_LONG);
    671
    672	while (index < max_index) {
    673		if (test_bit(index, dirty_regions)) {
    674			r = dm_bitset_set_bit(&cmd->bitset_info, cmd->bitset_root,
    675					      index, &cmd->bitset_root);
    676			if (r) {
    677				DMERR("dm_bitset_set_bit failed");
    678				return r;
    679			}
    680			__clear_bit(index, dirty_regions);
    681		}
    682		index++;
    683	}
    684
    685	return 0;
    686}
    687
    688static int __metadata_commit(struct dm_clone_metadata *cmd)
    689{
    690	int r;
    691	struct dm_block *sblock;
    692	struct superblock_disk *sb;
    693
    694	/* Flush bitset cache */
    695	r = dm_bitset_flush(&cmd->bitset_info, cmd->bitset_root, &cmd->bitset_root);
    696	if (r) {
    697		DMERR("dm_bitset_flush failed");
    698		return r;
    699	}
    700
    701	/* Flush to disk all blocks, except the superblock */
    702	r = dm_tm_pre_commit(cmd->tm);
    703	if (r) {
    704		DMERR("dm_tm_pre_commit failed");
    705		return r;
    706	}
    707
    708	/* Save the space map root in cmd->metadata_space_map_root */
    709	r = __copy_sm_root(cmd);
    710	if (r) {
    711		DMERR("__copy_sm_root failed");
    712		return r;
    713	}
    714
    715	/* Lock the superblock */
    716	r = superblock_write_lock_zero(cmd, &sblock);
    717	if (r) {
    718		DMERR("Failed to write_lock superblock");
    719		return r;
    720	}
    721
    722	/* Save the metadata in superblock */
    723	sb = dm_block_data(sblock);
    724	__prepare_superblock(cmd, sb);
    725
    726	/* Unlock superblock and commit it to disk */
    727	r = dm_tm_commit(cmd->tm, sblock);
    728	if (r) {
    729		DMERR("Failed to commit superblock");
    730		return r;
    731	}
    732
    733	/*
    734	 * FIXME: Find a more efficient way to check if the hydration is done.
    735	 */
    736	if (bitmap_full(cmd->region_map, cmd->nr_regions))
    737		cmd->hydration_done = true;
    738
    739	return 0;
    740}
    741
    742static int __flush_dmap(struct dm_clone_metadata *cmd, struct dirty_map *dmap)
    743{
    744	int r;
    745	unsigned long word;
    746
    747	word = 0;
    748	do {
    749		word = find_next_bit(dmap->dirty_words, cmd->nr_words, word);
    750
    751		if (word == cmd->nr_words)
    752			break;
    753
    754		r = __update_metadata_word(cmd, dmap->dirty_regions, word);
    755
    756		if (r)
    757			return r;
    758
    759		__clear_bit(word, dmap->dirty_words);
    760		word++;
    761	} while (word < cmd->nr_words);
    762
    763	r = __metadata_commit(cmd);
    764
    765	if (r)
    766		return r;
    767
    768	/* Update the changed flag */
    769	spin_lock_irq(&cmd->bitmap_lock);
    770	dmap->changed = 0;
    771	spin_unlock_irq(&cmd->bitmap_lock);
    772
    773	return 0;
    774}
    775
    776int dm_clone_metadata_pre_commit(struct dm_clone_metadata *cmd)
    777{
    778	int r = 0;
    779	struct dirty_map *dmap, *next_dmap;
    780
    781	down_write(&cmd->lock);
    782
    783	if (cmd->fail_io || dm_bm_is_read_only(cmd->bm)) {
    784		r = -EPERM;
    785		goto out;
    786	}
    787
    788	/* Get current dirty bitmap */
    789	dmap = cmd->current_dmap;
    790
    791	/* Get next dirty bitmap */
    792	next_dmap = (dmap == &cmd->dmap[0]) ? &cmd->dmap[1] : &cmd->dmap[0];
    793
    794	/*
    795	 * The last commit failed, so we don't have a clean dirty-bitmap to
    796	 * use.
    797	 */
    798	if (WARN_ON(next_dmap->changed || cmd->committing_dmap)) {
    799		r = -EINVAL;
    800		goto out;
    801	}
    802
    803	/* Swap dirty bitmaps */
    804	spin_lock_irq(&cmd->bitmap_lock);
    805	cmd->current_dmap = next_dmap;
    806	spin_unlock_irq(&cmd->bitmap_lock);
    807
    808	/* Set old dirty bitmap as currently committing */
    809	cmd->committing_dmap = dmap;
    810out:
    811	up_write(&cmd->lock);
    812
    813	return r;
    814}
    815
    816int dm_clone_metadata_commit(struct dm_clone_metadata *cmd)
    817{
    818	int r = -EPERM;
    819
    820	down_write(&cmd->lock);
    821
    822	if (cmd->fail_io || dm_bm_is_read_only(cmd->bm))
    823		goto out;
    824
    825	if (WARN_ON(!cmd->committing_dmap)) {
    826		r = -EINVAL;
    827		goto out;
    828	}
    829
    830	r = __flush_dmap(cmd, cmd->committing_dmap);
    831	if (!r) {
    832		/* Clear committing dmap */
    833		cmd->committing_dmap = NULL;
    834	}
    835out:
    836	up_write(&cmd->lock);
    837
    838	return r;
    839}
    840
    841int dm_clone_set_region_hydrated(struct dm_clone_metadata *cmd, unsigned long region_nr)
    842{
    843	int r = 0;
    844	struct dirty_map *dmap;
    845	unsigned long word, flags;
    846
    847	if (unlikely(region_nr >= cmd->nr_regions)) {
    848		DMERR("Region %lu out of range (total number of regions %lu)",
    849		      region_nr, cmd->nr_regions);
    850		return -ERANGE;
    851	}
    852
    853	word = region_nr / BITS_PER_LONG;
    854
    855	spin_lock_irqsave(&cmd->bitmap_lock, flags);
    856
    857	if (cmd->read_only) {
    858		r = -EPERM;
    859		goto out;
    860	}
    861
    862	dmap = cmd->current_dmap;
    863
    864	__set_bit(word, dmap->dirty_words);
    865	__set_bit(region_nr, dmap->dirty_regions);
    866	__set_bit(region_nr, cmd->region_map);
    867	dmap->changed = 1;
    868
    869out:
    870	spin_unlock_irqrestore(&cmd->bitmap_lock, flags);
    871
    872	return r;
    873}
    874
    875int dm_clone_cond_set_range(struct dm_clone_metadata *cmd, unsigned long start,
    876			    unsigned long nr_regions)
    877{
    878	int r = 0;
    879	struct dirty_map *dmap;
    880	unsigned long word, region_nr;
    881
    882	if (unlikely(start >= cmd->nr_regions || (start + nr_regions) < start ||
    883		     (start + nr_regions) > cmd->nr_regions)) {
    884		DMERR("Invalid region range: start %lu, nr_regions %lu (total number of regions %lu)",
    885		      start, nr_regions, cmd->nr_regions);
    886		return -ERANGE;
    887	}
    888
    889	spin_lock_irq(&cmd->bitmap_lock);
    890
    891	if (cmd->read_only) {
    892		r = -EPERM;
    893		goto out;
    894	}
    895
    896	dmap = cmd->current_dmap;
    897	for (region_nr = start; region_nr < (start + nr_regions); region_nr++) {
    898		if (!test_bit(region_nr, cmd->region_map)) {
    899			word = region_nr / BITS_PER_LONG;
    900			__set_bit(word, dmap->dirty_words);
    901			__set_bit(region_nr, dmap->dirty_regions);
    902			__set_bit(region_nr, cmd->region_map);
    903			dmap->changed = 1;
    904		}
    905	}
    906out:
    907	spin_unlock_irq(&cmd->bitmap_lock);
    908
    909	return r;
    910}
    911
    912/*
    913 * WARNING: This must not be called concurrently with either
    914 * dm_clone_set_region_hydrated() or dm_clone_cond_set_range(), as it changes
    915 * cmd->region_map without taking the cmd->bitmap_lock spinlock. The only
    916 * exception is after setting the metadata to read-only mode, using
    917 * dm_clone_metadata_set_read_only().
    918 *
    919 * We don't take the spinlock because __load_bitset_in_core() does I/O, so it
    920 * may block.
    921 */
    922int dm_clone_reload_in_core_bitset(struct dm_clone_metadata *cmd)
    923{
    924	int r = -EINVAL;
    925
    926	down_write(&cmd->lock);
    927
    928	if (cmd->fail_io)
    929		goto out;
    930
    931	r = __load_bitset_in_core(cmd);
    932out:
    933	up_write(&cmd->lock);
    934
    935	return r;
    936}
    937
    938bool dm_clone_changed_this_transaction(struct dm_clone_metadata *cmd)
    939{
    940	bool r;
    941	unsigned long flags;
    942
    943	spin_lock_irqsave(&cmd->bitmap_lock, flags);
    944	r = cmd->dmap[0].changed || cmd->dmap[1].changed;
    945	spin_unlock_irqrestore(&cmd->bitmap_lock, flags);
    946
    947	return r;
    948}
    949
    950int dm_clone_metadata_abort(struct dm_clone_metadata *cmd)
    951{
    952	int r = -EPERM;
    953
    954	down_write(&cmd->lock);
    955
    956	if (cmd->fail_io || dm_bm_is_read_only(cmd->bm))
    957		goto out;
    958
    959	__destroy_persistent_data_structures(cmd);
    960
    961	r = __create_persistent_data_structures(cmd, false);
    962	if (r) {
    963		/* If something went wrong we can neither write nor read the metadata */
    964		cmd->fail_io = true;
    965	}
    966out:
    967	up_write(&cmd->lock);
    968
    969	return r;
    970}
    971
    972void dm_clone_metadata_set_read_only(struct dm_clone_metadata *cmd)
    973{
    974	down_write(&cmd->lock);
    975
    976	spin_lock_irq(&cmd->bitmap_lock);
    977	cmd->read_only = 1;
    978	spin_unlock_irq(&cmd->bitmap_lock);
    979
    980	if (!cmd->fail_io)
    981		dm_bm_set_read_only(cmd->bm);
    982
    983	up_write(&cmd->lock);
    984}
    985
    986void dm_clone_metadata_set_read_write(struct dm_clone_metadata *cmd)
    987{
    988	down_write(&cmd->lock);
    989
    990	spin_lock_irq(&cmd->bitmap_lock);
    991	cmd->read_only = 0;
    992	spin_unlock_irq(&cmd->bitmap_lock);
    993
    994	if (!cmd->fail_io)
    995		dm_bm_set_read_write(cmd->bm);
    996
    997	up_write(&cmd->lock);
    998}
    999
   1000int dm_clone_get_free_metadata_block_count(struct dm_clone_metadata *cmd,
   1001					   dm_block_t *result)
   1002{
   1003	int r = -EINVAL;
   1004
   1005	down_read(&cmd->lock);
   1006
   1007	if (!cmd->fail_io)
   1008		r = dm_sm_get_nr_free(cmd->sm, result);
   1009
   1010	up_read(&cmd->lock);
   1011
   1012	return r;
   1013}
   1014
   1015int dm_clone_get_metadata_dev_size(struct dm_clone_metadata *cmd,
   1016				   dm_block_t *result)
   1017{
   1018	int r = -EINVAL;
   1019
   1020	down_read(&cmd->lock);
   1021
   1022	if (!cmd->fail_io)
   1023		r = dm_sm_get_nr_blocks(cmd->sm, result);
   1024
   1025	up_read(&cmd->lock);
   1026
   1027	return r;
   1028}