cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

dm-cache-metadata.c (43086B)


      1/*
      2 * Copyright (C) 2012 Red Hat, Inc.
      3 *
      4 * This file is released under the GPL.
      5 */
      6
      7#include "dm-cache-metadata.h"
      8
      9#include "persistent-data/dm-array.h"
     10#include "persistent-data/dm-bitset.h"
     11#include "persistent-data/dm-space-map.h"
     12#include "persistent-data/dm-space-map-disk.h"
     13#include "persistent-data/dm-transaction-manager.h"
     14
     15#include <linux/device-mapper.h>
     16#include <linux/refcount.h>
     17
     18/*----------------------------------------------------------------*/
     19
     20#define DM_MSG_PREFIX   "cache metadata"
     21
     22#define CACHE_SUPERBLOCK_MAGIC 06142003
     23#define CACHE_SUPERBLOCK_LOCATION 0
     24
     25/*
     26 * defines a range of metadata versions that this module can handle.
     27 */
     28#define MIN_CACHE_VERSION 1
     29#define MAX_CACHE_VERSION 2
     30
     31/*
     32 *  3 for btree insert +
     33 *  2 for btree lookup used within space map
     34 */
     35#define CACHE_MAX_CONCURRENT_LOCKS 5
     36#define SPACE_MAP_ROOT_SIZE 128
     37
     38enum superblock_flag_bits {
     39	/* for spotting crashes that would invalidate the dirty bitset */
     40	CLEAN_SHUTDOWN,
     41	/* metadata must be checked using the tools */
     42	NEEDS_CHECK,
     43};
     44
     45/*
     46 * Each mapping from cache block -> origin block carries a set of flags.
     47 */
     48enum mapping_bits {
     49	/*
     50	 * A valid mapping.  Because we're using an array we clear this
     51	 * flag for an non existant mapping.
     52	 */
     53	M_VALID = 1,
     54
     55	/*
     56	 * The data on the cache is different from that on the origin.
     57	 * This flag is only used by metadata format 1.
     58	 */
     59	M_DIRTY = 2
     60};
     61
     62struct cache_disk_superblock {
     63	__le32 csum;
     64	__le32 flags;
     65	__le64 blocknr;
     66
     67	__u8 uuid[16];
     68	__le64 magic;
     69	__le32 version;
     70
     71	__u8 policy_name[CACHE_POLICY_NAME_SIZE];
     72	__le32 policy_hint_size;
     73
     74	__u8 metadata_space_map_root[SPACE_MAP_ROOT_SIZE];
     75	__le64 mapping_root;
     76	__le64 hint_root;
     77
     78	__le64 discard_root;
     79	__le64 discard_block_size;
     80	__le64 discard_nr_blocks;
     81
     82	__le32 data_block_size;
     83	__le32 metadata_block_size;
     84	__le32 cache_blocks;
     85
     86	__le32 compat_flags;
     87	__le32 compat_ro_flags;
     88	__le32 incompat_flags;
     89
     90	__le32 read_hits;
     91	__le32 read_misses;
     92	__le32 write_hits;
     93	__le32 write_misses;
     94
     95	__le32 policy_version[CACHE_POLICY_VERSION_SIZE];
     96
     97	/*
     98	 * Metadata format 2 fields.
     99	 */
    100	__le64 dirty_root;
    101} __packed;
    102
    103struct dm_cache_metadata {
    104	refcount_t ref_count;
    105	struct list_head list;
    106
    107	unsigned version;
    108	struct block_device *bdev;
    109	struct dm_block_manager *bm;
    110	struct dm_space_map *metadata_sm;
    111	struct dm_transaction_manager *tm;
    112
    113	struct dm_array_info info;
    114	struct dm_array_info hint_info;
    115	struct dm_disk_bitset discard_info;
    116
    117	struct rw_semaphore root_lock;
    118	unsigned long flags;
    119	dm_block_t root;
    120	dm_block_t hint_root;
    121	dm_block_t discard_root;
    122
    123	sector_t discard_block_size;
    124	dm_dblock_t discard_nr_blocks;
    125
    126	sector_t data_block_size;
    127	dm_cblock_t cache_blocks;
    128	bool changed:1;
    129	bool clean_when_opened:1;
    130
    131	char policy_name[CACHE_POLICY_NAME_SIZE];
    132	unsigned policy_version[CACHE_POLICY_VERSION_SIZE];
    133	size_t policy_hint_size;
    134	struct dm_cache_statistics stats;
    135
    136	/*
    137	 * Reading the space map root can fail, so we read it into this
    138	 * buffer before the superblock is locked and updated.
    139	 */
    140	__u8 metadata_space_map_root[SPACE_MAP_ROOT_SIZE];
    141
    142	/*
    143	 * Set if a transaction has to be aborted but the attempt to roll
    144	 * back to the previous (good) transaction failed.  The only
    145	 * metadata operation permissible in this state is the closing of
    146	 * the device.
    147	 */
    148	bool fail_io:1;
    149
    150	/*
    151	 * Metadata format 2 fields.
    152	 */
    153	dm_block_t dirty_root;
    154	struct dm_disk_bitset dirty_info;
    155
    156	/*
    157	 * These structures are used when loading metadata.  They're too
    158	 * big to put on the stack.
    159	 */
    160	struct dm_array_cursor mapping_cursor;
    161	struct dm_array_cursor hint_cursor;
    162	struct dm_bitset_cursor dirty_cursor;
    163};
    164
    165/*-------------------------------------------------------------------
    166 * superblock validator
    167 *-----------------------------------------------------------------*/
    168
    169#define SUPERBLOCK_CSUM_XOR 9031977
    170
    171static void sb_prepare_for_write(struct dm_block_validator *v,
    172				 struct dm_block *b,
    173				 size_t sb_block_size)
    174{
    175	struct cache_disk_superblock *disk_super = dm_block_data(b);
    176
    177	disk_super->blocknr = cpu_to_le64(dm_block_location(b));
    178	disk_super->csum = cpu_to_le32(dm_bm_checksum(&disk_super->flags,
    179						      sb_block_size - sizeof(__le32),
    180						      SUPERBLOCK_CSUM_XOR));
    181}
    182
    183static int check_metadata_version(struct cache_disk_superblock *disk_super)
    184{
    185	uint32_t metadata_version = le32_to_cpu(disk_super->version);
    186
    187	if (metadata_version < MIN_CACHE_VERSION || metadata_version > MAX_CACHE_VERSION) {
    188		DMERR("Cache metadata version %u found, but only versions between %u and %u supported.",
    189		      metadata_version, MIN_CACHE_VERSION, MAX_CACHE_VERSION);
    190		return -EINVAL;
    191	}
    192
    193	return 0;
    194}
    195
    196static int sb_check(struct dm_block_validator *v,
    197		    struct dm_block *b,
    198		    size_t sb_block_size)
    199{
    200	struct cache_disk_superblock *disk_super = dm_block_data(b);
    201	__le32 csum_le;
    202
    203	if (dm_block_location(b) != le64_to_cpu(disk_super->blocknr)) {
    204		DMERR("sb_check failed: blocknr %llu: wanted %llu",
    205		      le64_to_cpu(disk_super->blocknr),
    206		      (unsigned long long)dm_block_location(b));
    207		return -ENOTBLK;
    208	}
    209
    210	if (le64_to_cpu(disk_super->magic) != CACHE_SUPERBLOCK_MAGIC) {
    211		DMERR("sb_check failed: magic %llu: wanted %llu",
    212		      le64_to_cpu(disk_super->magic),
    213		      (unsigned long long)CACHE_SUPERBLOCK_MAGIC);
    214		return -EILSEQ;
    215	}
    216
    217	csum_le = cpu_to_le32(dm_bm_checksum(&disk_super->flags,
    218					     sb_block_size - sizeof(__le32),
    219					     SUPERBLOCK_CSUM_XOR));
    220	if (csum_le != disk_super->csum) {
    221		DMERR("sb_check failed: csum %u: wanted %u",
    222		      le32_to_cpu(csum_le), le32_to_cpu(disk_super->csum));
    223		return -EILSEQ;
    224	}
    225
    226	return check_metadata_version(disk_super);
    227}
    228
    229static struct dm_block_validator sb_validator = {
    230	.name = "superblock",
    231	.prepare_for_write = sb_prepare_for_write,
    232	.check = sb_check
    233};
    234
    235/*----------------------------------------------------------------*/
    236
    237static int superblock_read_lock(struct dm_cache_metadata *cmd,
    238				struct dm_block **sblock)
    239{
    240	return dm_bm_read_lock(cmd->bm, CACHE_SUPERBLOCK_LOCATION,
    241			       &sb_validator, sblock);
    242}
    243
    244static int superblock_lock_zero(struct dm_cache_metadata *cmd,
    245				struct dm_block **sblock)
    246{
    247	return dm_bm_write_lock_zero(cmd->bm, CACHE_SUPERBLOCK_LOCATION,
    248				     &sb_validator, sblock);
    249}
    250
    251static int superblock_lock(struct dm_cache_metadata *cmd,
    252			   struct dm_block **sblock)
    253{
    254	return dm_bm_write_lock(cmd->bm, CACHE_SUPERBLOCK_LOCATION,
    255				&sb_validator, sblock);
    256}
    257
    258/*----------------------------------------------------------------*/
    259
    260static int __superblock_all_zeroes(struct dm_block_manager *bm, bool *result)
    261{
    262	int r;
    263	unsigned i;
    264	struct dm_block *b;
    265	__le64 *data_le, zero = cpu_to_le64(0);
    266	unsigned sb_block_size = dm_bm_block_size(bm) / sizeof(__le64);
    267
    268	/*
    269	 * We can't use a validator here - it may be all zeroes.
    270	 */
    271	r = dm_bm_read_lock(bm, CACHE_SUPERBLOCK_LOCATION, NULL, &b);
    272	if (r)
    273		return r;
    274
    275	data_le = dm_block_data(b);
    276	*result = true;
    277	for (i = 0; i < sb_block_size; i++) {
    278		if (data_le[i] != zero) {
    279			*result = false;
    280			break;
    281		}
    282	}
    283
    284	dm_bm_unlock(b);
    285
    286	return 0;
    287}
    288
    289static void __setup_mapping_info(struct dm_cache_metadata *cmd)
    290{
    291	struct dm_btree_value_type vt;
    292
    293	vt.context = NULL;
    294	vt.size = sizeof(__le64);
    295	vt.inc = NULL;
    296	vt.dec = NULL;
    297	vt.equal = NULL;
    298	dm_array_info_init(&cmd->info, cmd->tm, &vt);
    299
    300	if (cmd->policy_hint_size) {
    301		vt.size = sizeof(__le32);
    302		dm_array_info_init(&cmd->hint_info, cmd->tm, &vt);
    303	}
    304}
    305
    306static int __save_sm_root(struct dm_cache_metadata *cmd)
    307{
    308	int r;
    309	size_t metadata_len;
    310
    311	r = dm_sm_root_size(cmd->metadata_sm, &metadata_len);
    312	if (r < 0)
    313		return r;
    314
    315	return dm_sm_copy_root(cmd->metadata_sm, &cmd->metadata_space_map_root,
    316			       metadata_len);
    317}
    318
    319static void __copy_sm_root(struct dm_cache_metadata *cmd,
    320			   struct cache_disk_superblock *disk_super)
    321{
    322	memcpy(&disk_super->metadata_space_map_root,
    323	       &cmd->metadata_space_map_root,
    324	       sizeof(cmd->metadata_space_map_root));
    325}
    326
    327static bool separate_dirty_bits(struct dm_cache_metadata *cmd)
    328{
    329	return cmd->version >= 2;
    330}
    331
    332static int __write_initial_superblock(struct dm_cache_metadata *cmd)
    333{
    334	int r;
    335	struct dm_block *sblock;
    336	struct cache_disk_superblock *disk_super;
    337	sector_t bdev_size = bdev_nr_sectors(cmd->bdev);
    338
    339	/* FIXME: see if we can lose the max sectors limit */
    340	if (bdev_size > DM_CACHE_METADATA_MAX_SECTORS)
    341		bdev_size = DM_CACHE_METADATA_MAX_SECTORS;
    342
    343	r = dm_tm_pre_commit(cmd->tm);
    344	if (r < 0)
    345		return r;
    346
    347	/*
    348	 * dm_sm_copy_root() can fail.  So we need to do it before we start
    349	 * updating the superblock.
    350	 */
    351	r = __save_sm_root(cmd);
    352	if (r)
    353		return r;
    354
    355	r = superblock_lock_zero(cmd, &sblock);
    356	if (r)
    357		return r;
    358
    359	disk_super = dm_block_data(sblock);
    360	disk_super->flags = 0;
    361	memset(disk_super->uuid, 0, sizeof(disk_super->uuid));
    362	disk_super->magic = cpu_to_le64(CACHE_SUPERBLOCK_MAGIC);
    363	disk_super->version = cpu_to_le32(cmd->version);
    364	memset(disk_super->policy_name, 0, sizeof(disk_super->policy_name));
    365	memset(disk_super->policy_version, 0, sizeof(disk_super->policy_version));
    366	disk_super->policy_hint_size = cpu_to_le32(0);
    367
    368	__copy_sm_root(cmd, disk_super);
    369
    370	disk_super->mapping_root = cpu_to_le64(cmd->root);
    371	disk_super->hint_root = cpu_to_le64(cmd->hint_root);
    372	disk_super->discard_root = cpu_to_le64(cmd->discard_root);
    373	disk_super->discard_block_size = cpu_to_le64(cmd->discard_block_size);
    374	disk_super->discard_nr_blocks = cpu_to_le64(from_dblock(cmd->discard_nr_blocks));
    375	disk_super->metadata_block_size = cpu_to_le32(DM_CACHE_METADATA_BLOCK_SIZE);
    376	disk_super->data_block_size = cpu_to_le32(cmd->data_block_size);
    377	disk_super->cache_blocks = cpu_to_le32(0);
    378
    379	disk_super->read_hits = cpu_to_le32(0);
    380	disk_super->read_misses = cpu_to_le32(0);
    381	disk_super->write_hits = cpu_to_le32(0);
    382	disk_super->write_misses = cpu_to_le32(0);
    383
    384	if (separate_dirty_bits(cmd))
    385		disk_super->dirty_root = cpu_to_le64(cmd->dirty_root);
    386
    387	return dm_tm_commit(cmd->tm, sblock);
    388}
    389
    390static int __format_metadata(struct dm_cache_metadata *cmd)
    391{
    392	int r;
    393
    394	r = dm_tm_create_with_sm(cmd->bm, CACHE_SUPERBLOCK_LOCATION,
    395				 &cmd->tm, &cmd->metadata_sm);
    396	if (r < 0) {
    397		DMERR("tm_create_with_sm failed");
    398		return r;
    399	}
    400
    401	__setup_mapping_info(cmd);
    402
    403	r = dm_array_empty(&cmd->info, &cmd->root);
    404	if (r < 0)
    405		goto bad;
    406
    407	if (separate_dirty_bits(cmd)) {
    408		dm_disk_bitset_init(cmd->tm, &cmd->dirty_info);
    409		r = dm_bitset_empty(&cmd->dirty_info, &cmd->dirty_root);
    410		if (r < 0)
    411			goto bad;
    412	}
    413
    414	dm_disk_bitset_init(cmd->tm, &cmd->discard_info);
    415	r = dm_bitset_empty(&cmd->discard_info, &cmd->discard_root);
    416	if (r < 0)
    417		goto bad;
    418
    419	cmd->discard_block_size = 0;
    420	cmd->discard_nr_blocks = 0;
    421
    422	r = __write_initial_superblock(cmd);
    423	if (r)
    424		goto bad;
    425
    426	cmd->clean_when_opened = true;
    427	return 0;
    428
    429bad:
    430	dm_tm_destroy(cmd->tm);
    431	dm_sm_destroy(cmd->metadata_sm);
    432
    433	return r;
    434}
    435
    436static int __check_incompat_features(struct cache_disk_superblock *disk_super,
    437				     struct dm_cache_metadata *cmd)
    438{
    439	uint32_t incompat_flags, features;
    440
    441	incompat_flags = le32_to_cpu(disk_super->incompat_flags);
    442	features = incompat_flags & ~DM_CACHE_FEATURE_INCOMPAT_SUPP;
    443	if (features) {
    444		DMERR("could not access metadata due to unsupported optional features (%lx).",
    445		      (unsigned long)features);
    446		return -EINVAL;
    447	}
    448
    449	/*
    450	 * Check for read-only metadata to skip the following RDWR checks.
    451	 */
    452	if (bdev_read_only(cmd->bdev))
    453		return 0;
    454
    455	features = le32_to_cpu(disk_super->compat_ro_flags) & ~DM_CACHE_FEATURE_COMPAT_RO_SUPP;
    456	if (features) {
    457		DMERR("could not access metadata RDWR due to unsupported optional features (%lx).",
    458		      (unsigned long)features);
    459		return -EINVAL;
    460	}
    461
    462	return 0;
    463}
    464
    465static int __open_metadata(struct dm_cache_metadata *cmd)
    466{
    467	int r;
    468	struct dm_block *sblock;
    469	struct cache_disk_superblock *disk_super;
    470	unsigned long sb_flags;
    471
    472	r = superblock_read_lock(cmd, &sblock);
    473	if (r < 0) {
    474		DMERR("couldn't read lock superblock");
    475		return r;
    476	}
    477
    478	disk_super = dm_block_data(sblock);
    479
    480	/* Verify the data block size hasn't changed */
    481	if (le32_to_cpu(disk_super->data_block_size) != cmd->data_block_size) {
    482		DMERR("changing the data block size (from %u to %llu) is not supported",
    483		      le32_to_cpu(disk_super->data_block_size),
    484		      (unsigned long long)cmd->data_block_size);
    485		r = -EINVAL;
    486		goto bad;
    487	}
    488
    489	r = __check_incompat_features(disk_super, cmd);
    490	if (r < 0)
    491		goto bad;
    492
    493	r = dm_tm_open_with_sm(cmd->bm, CACHE_SUPERBLOCK_LOCATION,
    494			       disk_super->metadata_space_map_root,
    495			       sizeof(disk_super->metadata_space_map_root),
    496			       &cmd->tm, &cmd->metadata_sm);
    497	if (r < 0) {
    498		DMERR("tm_open_with_sm failed");
    499		goto bad;
    500	}
    501
    502	__setup_mapping_info(cmd);
    503	dm_disk_bitset_init(cmd->tm, &cmd->dirty_info);
    504	dm_disk_bitset_init(cmd->tm, &cmd->discard_info);
    505	sb_flags = le32_to_cpu(disk_super->flags);
    506	cmd->clean_when_opened = test_bit(CLEAN_SHUTDOWN, &sb_flags);
    507	dm_bm_unlock(sblock);
    508
    509	return 0;
    510
    511bad:
    512	dm_bm_unlock(sblock);
    513	return r;
    514}
    515
    516static int __open_or_format_metadata(struct dm_cache_metadata *cmd,
    517				     bool format_device)
    518{
    519	int r;
    520	bool unformatted = false;
    521
    522	r = __superblock_all_zeroes(cmd->bm, &unformatted);
    523	if (r)
    524		return r;
    525
    526	if (unformatted)
    527		return format_device ? __format_metadata(cmd) : -EPERM;
    528
    529	return __open_metadata(cmd);
    530}
    531
    532static int __create_persistent_data_objects(struct dm_cache_metadata *cmd,
    533					    bool may_format_device)
    534{
    535	int r;
    536	cmd->bm = dm_block_manager_create(cmd->bdev, DM_CACHE_METADATA_BLOCK_SIZE << SECTOR_SHIFT,
    537					  CACHE_MAX_CONCURRENT_LOCKS);
    538	if (IS_ERR(cmd->bm)) {
    539		DMERR("could not create block manager");
    540		r = PTR_ERR(cmd->bm);
    541		cmd->bm = NULL;
    542		return r;
    543	}
    544
    545	r = __open_or_format_metadata(cmd, may_format_device);
    546	if (r) {
    547		dm_block_manager_destroy(cmd->bm);
    548		cmd->bm = NULL;
    549	}
    550
    551	return r;
    552}
    553
    554static void __destroy_persistent_data_objects(struct dm_cache_metadata *cmd)
    555{
    556	dm_sm_destroy(cmd->metadata_sm);
    557	dm_tm_destroy(cmd->tm);
    558	dm_block_manager_destroy(cmd->bm);
    559}
    560
    561typedef unsigned long (*flags_mutator)(unsigned long);
    562
    563static void update_flags(struct cache_disk_superblock *disk_super,
    564			 flags_mutator mutator)
    565{
    566	uint32_t sb_flags = mutator(le32_to_cpu(disk_super->flags));
    567	disk_super->flags = cpu_to_le32(sb_flags);
    568}
    569
    570static unsigned long set_clean_shutdown(unsigned long flags)
    571{
    572	set_bit(CLEAN_SHUTDOWN, &flags);
    573	return flags;
    574}
    575
    576static unsigned long clear_clean_shutdown(unsigned long flags)
    577{
    578	clear_bit(CLEAN_SHUTDOWN, &flags);
    579	return flags;
    580}
    581
    582static void read_superblock_fields(struct dm_cache_metadata *cmd,
    583				   struct cache_disk_superblock *disk_super)
    584{
    585	cmd->version = le32_to_cpu(disk_super->version);
    586	cmd->flags = le32_to_cpu(disk_super->flags);
    587	cmd->root = le64_to_cpu(disk_super->mapping_root);
    588	cmd->hint_root = le64_to_cpu(disk_super->hint_root);
    589	cmd->discard_root = le64_to_cpu(disk_super->discard_root);
    590	cmd->discard_block_size = le64_to_cpu(disk_super->discard_block_size);
    591	cmd->discard_nr_blocks = to_dblock(le64_to_cpu(disk_super->discard_nr_blocks));
    592	cmd->data_block_size = le32_to_cpu(disk_super->data_block_size);
    593	cmd->cache_blocks = to_cblock(le32_to_cpu(disk_super->cache_blocks));
    594	strncpy(cmd->policy_name, disk_super->policy_name, sizeof(cmd->policy_name));
    595	cmd->policy_version[0] = le32_to_cpu(disk_super->policy_version[0]);
    596	cmd->policy_version[1] = le32_to_cpu(disk_super->policy_version[1]);
    597	cmd->policy_version[2] = le32_to_cpu(disk_super->policy_version[2]);
    598	cmd->policy_hint_size = le32_to_cpu(disk_super->policy_hint_size);
    599
    600	cmd->stats.read_hits = le32_to_cpu(disk_super->read_hits);
    601	cmd->stats.read_misses = le32_to_cpu(disk_super->read_misses);
    602	cmd->stats.write_hits = le32_to_cpu(disk_super->write_hits);
    603	cmd->stats.write_misses = le32_to_cpu(disk_super->write_misses);
    604
    605	if (separate_dirty_bits(cmd))
    606		cmd->dirty_root = le64_to_cpu(disk_super->dirty_root);
    607
    608	cmd->changed = false;
    609}
    610
    611/*
    612 * The mutator updates the superblock flags.
    613 */
    614static int __begin_transaction_flags(struct dm_cache_metadata *cmd,
    615				     flags_mutator mutator)
    616{
    617	int r;
    618	struct cache_disk_superblock *disk_super;
    619	struct dm_block *sblock;
    620
    621	r = superblock_lock(cmd, &sblock);
    622	if (r)
    623		return r;
    624
    625	disk_super = dm_block_data(sblock);
    626	update_flags(disk_super, mutator);
    627	read_superblock_fields(cmd, disk_super);
    628	dm_bm_unlock(sblock);
    629
    630	return dm_bm_flush(cmd->bm);
    631}
    632
    633static int __begin_transaction(struct dm_cache_metadata *cmd)
    634{
    635	int r;
    636	struct cache_disk_superblock *disk_super;
    637	struct dm_block *sblock;
    638
    639	/*
    640	 * We re-read the superblock every time.  Shouldn't need to do this
    641	 * really.
    642	 */
    643	r = superblock_read_lock(cmd, &sblock);
    644	if (r)
    645		return r;
    646
    647	disk_super = dm_block_data(sblock);
    648	read_superblock_fields(cmd, disk_super);
    649	dm_bm_unlock(sblock);
    650
    651	return 0;
    652}
    653
    654static int __commit_transaction(struct dm_cache_metadata *cmd,
    655				flags_mutator mutator)
    656{
    657	int r;
    658	struct cache_disk_superblock *disk_super;
    659	struct dm_block *sblock;
    660
    661	/*
    662	 * We need to know if the cache_disk_superblock exceeds a 512-byte sector.
    663	 */
    664	BUILD_BUG_ON(sizeof(struct cache_disk_superblock) > 512);
    665
    666	if (separate_dirty_bits(cmd)) {
    667		r = dm_bitset_flush(&cmd->dirty_info, cmd->dirty_root,
    668				    &cmd->dirty_root);
    669		if (r)
    670			return r;
    671	}
    672
    673	r = dm_bitset_flush(&cmd->discard_info, cmd->discard_root,
    674			    &cmd->discard_root);
    675	if (r)
    676		return r;
    677
    678	r = dm_tm_pre_commit(cmd->tm);
    679	if (r < 0)
    680		return r;
    681
    682	r = __save_sm_root(cmd);
    683	if (r)
    684		return r;
    685
    686	r = superblock_lock(cmd, &sblock);
    687	if (r)
    688		return r;
    689
    690	disk_super = dm_block_data(sblock);
    691
    692	disk_super->flags = cpu_to_le32(cmd->flags);
    693	if (mutator)
    694		update_flags(disk_super, mutator);
    695
    696	disk_super->mapping_root = cpu_to_le64(cmd->root);
    697	if (separate_dirty_bits(cmd))
    698		disk_super->dirty_root = cpu_to_le64(cmd->dirty_root);
    699	disk_super->hint_root = cpu_to_le64(cmd->hint_root);
    700	disk_super->discard_root = cpu_to_le64(cmd->discard_root);
    701	disk_super->discard_block_size = cpu_to_le64(cmd->discard_block_size);
    702	disk_super->discard_nr_blocks = cpu_to_le64(from_dblock(cmd->discard_nr_blocks));
    703	disk_super->cache_blocks = cpu_to_le32(from_cblock(cmd->cache_blocks));
    704	strncpy(disk_super->policy_name, cmd->policy_name, sizeof(disk_super->policy_name));
    705	disk_super->policy_version[0] = cpu_to_le32(cmd->policy_version[0]);
    706	disk_super->policy_version[1] = cpu_to_le32(cmd->policy_version[1]);
    707	disk_super->policy_version[2] = cpu_to_le32(cmd->policy_version[2]);
    708	disk_super->policy_hint_size = cpu_to_le32(cmd->policy_hint_size);
    709
    710	disk_super->read_hits = cpu_to_le32(cmd->stats.read_hits);
    711	disk_super->read_misses = cpu_to_le32(cmd->stats.read_misses);
    712	disk_super->write_hits = cpu_to_le32(cmd->stats.write_hits);
    713	disk_super->write_misses = cpu_to_le32(cmd->stats.write_misses);
    714	__copy_sm_root(cmd, disk_super);
    715
    716	return dm_tm_commit(cmd->tm, sblock);
    717}
    718
    719/*----------------------------------------------------------------*/
    720
    721/*
    722 * The mappings are held in a dm-array that has 64-bit values stored in
    723 * little-endian format.  The index is the cblock, the high 48bits of the
    724 * value are the oblock and the low 16 bit the flags.
    725 */
    726#define FLAGS_MASK ((1 << 16) - 1)
    727
    728static __le64 pack_value(dm_oblock_t block, unsigned flags)
    729{
    730	uint64_t value = from_oblock(block);
    731	value <<= 16;
    732	value = value | (flags & FLAGS_MASK);
    733	return cpu_to_le64(value);
    734}
    735
    736static void unpack_value(__le64 value_le, dm_oblock_t *block, unsigned *flags)
    737{
    738	uint64_t value = le64_to_cpu(value_le);
    739	uint64_t b = value >> 16;
    740	*block = to_oblock(b);
    741	*flags = value & FLAGS_MASK;
    742}
    743
    744/*----------------------------------------------------------------*/
    745
    746static struct dm_cache_metadata *metadata_open(struct block_device *bdev,
    747					       sector_t data_block_size,
    748					       bool may_format_device,
    749					       size_t policy_hint_size,
    750					       unsigned metadata_version)
    751{
    752	int r;
    753	struct dm_cache_metadata *cmd;
    754
    755	cmd = kzalloc(sizeof(*cmd), GFP_KERNEL);
    756	if (!cmd) {
    757		DMERR("could not allocate metadata struct");
    758		return ERR_PTR(-ENOMEM);
    759	}
    760
    761	cmd->version = metadata_version;
    762	refcount_set(&cmd->ref_count, 1);
    763	init_rwsem(&cmd->root_lock);
    764	cmd->bdev = bdev;
    765	cmd->data_block_size = data_block_size;
    766	cmd->cache_blocks = 0;
    767	cmd->policy_hint_size = policy_hint_size;
    768	cmd->changed = true;
    769	cmd->fail_io = false;
    770
    771	r = __create_persistent_data_objects(cmd, may_format_device);
    772	if (r) {
    773		kfree(cmd);
    774		return ERR_PTR(r);
    775	}
    776
    777	r = __begin_transaction_flags(cmd, clear_clean_shutdown);
    778	if (r < 0) {
    779		dm_cache_metadata_close(cmd);
    780		return ERR_PTR(r);
    781	}
    782
    783	return cmd;
    784}
    785
    786/*
    787 * We keep a little list of ref counted metadata objects to prevent two
    788 * different target instances creating separate bufio instances.  This is
    789 * an issue if a table is reloaded before the suspend.
    790 */
    791static DEFINE_MUTEX(table_lock);
    792static LIST_HEAD(table);
    793
    794static struct dm_cache_metadata *lookup(struct block_device *bdev)
    795{
    796	struct dm_cache_metadata *cmd;
    797
    798	list_for_each_entry(cmd, &table, list)
    799		if (cmd->bdev == bdev) {
    800			refcount_inc(&cmd->ref_count);
    801			return cmd;
    802		}
    803
    804	return NULL;
    805}
    806
    807static struct dm_cache_metadata *lookup_or_open(struct block_device *bdev,
    808						sector_t data_block_size,
    809						bool may_format_device,
    810						size_t policy_hint_size,
    811						unsigned metadata_version)
    812{
    813	struct dm_cache_metadata *cmd, *cmd2;
    814
    815	mutex_lock(&table_lock);
    816	cmd = lookup(bdev);
    817	mutex_unlock(&table_lock);
    818
    819	if (cmd)
    820		return cmd;
    821
    822	cmd = metadata_open(bdev, data_block_size, may_format_device,
    823			    policy_hint_size, metadata_version);
    824	if (!IS_ERR(cmd)) {
    825		mutex_lock(&table_lock);
    826		cmd2 = lookup(bdev);
    827		if (cmd2) {
    828			mutex_unlock(&table_lock);
    829			__destroy_persistent_data_objects(cmd);
    830			kfree(cmd);
    831			return cmd2;
    832		}
    833		list_add(&cmd->list, &table);
    834		mutex_unlock(&table_lock);
    835	}
    836
    837	return cmd;
    838}
    839
    840static bool same_params(struct dm_cache_metadata *cmd, sector_t data_block_size)
    841{
    842	if (cmd->data_block_size != data_block_size) {
    843		DMERR("data_block_size (%llu) different from that in metadata (%llu)",
    844		      (unsigned long long) data_block_size,
    845		      (unsigned long long) cmd->data_block_size);
    846		return false;
    847	}
    848
    849	return true;
    850}
    851
    852struct dm_cache_metadata *dm_cache_metadata_open(struct block_device *bdev,
    853						 sector_t data_block_size,
    854						 bool may_format_device,
    855						 size_t policy_hint_size,
    856						 unsigned metadata_version)
    857{
    858	struct dm_cache_metadata *cmd = lookup_or_open(bdev, data_block_size, may_format_device,
    859						       policy_hint_size, metadata_version);
    860
    861	if (!IS_ERR(cmd) && !same_params(cmd, data_block_size)) {
    862		dm_cache_metadata_close(cmd);
    863		return ERR_PTR(-EINVAL);
    864	}
    865
    866	return cmd;
    867}
    868
    869void dm_cache_metadata_close(struct dm_cache_metadata *cmd)
    870{
    871	if (refcount_dec_and_test(&cmd->ref_count)) {
    872		mutex_lock(&table_lock);
    873		list_del(&cmd->list);
    874		mutex_unlock(&table_lock);
    875
    876		if (!cmd->fail_io)
    877			__destroy_persistent_data_objects(cmd);
    878		kfree(cmd);
    879	}
    880}
    881
    882/*
    883 * Checks that the given cache block is either unmapped or clean.
    884 */
    885static int block_clean_combined_dirty(struct dm_cache_metadata *cmd, dm_cblock_t b,
    886				      bool *result)
    887{
    888	int r;
    889	__le64 value;
    890	dm_oblock_t ob;
    891	unsigned flags;
    892
    893	r = dm_array_get_value(&cmd->info, cmd->root, from_cblock(b), &value);
    894	if (r)
    895		return r;
    896
    897	unpack_value(value, &ob, &flags);
    898	*result = !((flags & M_VALID) && (flags & M_DIRTY));
    899
    900	return 0;
    901}
    902
    903static int blocks_are_clean_combined_dirty(struct dm_cache_metadata *cmd,
    904					   dm_cblock_t begin, dm_cblock_t end,
    905					   bool *result)
    906{
    907	int r;
    908	*result = true;
    909
    910	while (begin != end) {
    911		r = block_clean_combined_dirty(cmd, begin, result);
    912		if (r) {
    913			DMERR("block_clean_combined_dirty failed");
    914			return r;
    915		}
    916
    917		if (!*result) {
    918			DMERR("cache block %llu is dirty",
    919			      (unsigned long long) from_cblock(begin));
    920			return 0;
    921		}
    922
    923		begin = to_cblock(from_cblock(begin) + 1);
    924	}
    925
    926	return 0;
    927}
    928
    929static int blocks_are_clean_separate_dirty(struct dm_cache_metadata *cmd,
    930					   dm_cblock_t begin, dm_cblock_t end,
    931					   bool *result)
    932{
    933	int r;
    934	bool dirty_flag;
    935	*result = true;
    936
    937	if (from_cblock(cmd->cache_blocks) == 0)
    938		/* Nothing to do */
    939		return 0;
    940
    941	r = dm_bitset_cursor_begin(&cmd->dirty_info, cmd->dirty_root,
    942				   from_cblock(cmd->cache_blocks), &cmd->dirty_cursor);
    943	if (r) {
    944		DMERR("%s: dm_bitset_cursor_begin for dirty failed", __func__);
    945		return r;
    946	}
    947
    948	r = dm_bitset_cursor_skip(&cmd->dirty_cursor, from_cblock(begin));
    949	if (r) {
    950		DMERR("%s: dm_bitset_cursor_skip for dirty failed", __func__);
    951		dm_bitset_cursor_end(&cmd->dirty_cursor);
    952		return r;
    953	}
    954
    955	while (begin != end) {
    956		/*
    957		 * We assume that unmapped blocks have their dirty bit
    958		 * cleared.
    959		 */
    960		dirty_flag = dm_bitset_cursor_get_value(&cmd->dirty_cursor);
    961		if (dirty_flag) {
    962			DMERR("%s: cache block %llu is dirty", __func__,
    963			      (unsigned long long) from_cblock(begin));
    964			dm_bitset_cursor_end(&cmd->dirty_cursor);
    965			*result = false;
    966			return 0;
    967		}
    968
    969		begin = to_cblock(from_cblock(begin) + 1);
    970		if (begin == end)
    971			break;
    972
    973		r = dm_bitset_cursor_next(&cmd->dirty_cursor);
    974		if (r) {
    975			DMERR("%s: dm_bitset_cursor_next for dirty failed", __func__);
    976			dm_bitset_cursor_end(&cmd->dirty_cursor);
    977			return r;
    978		}
    979	}
    980
    981	dm_bitset_cursor_end(&cmd->dirty_cursor);
    982
    983	return 0;
    984}
    985
    986static int blocks_are_unmapped_or_clean(struct dm_cache_metadata *cmd,
    987					dm_cblock_t begin, dm_cblock_t end,
    988					bool *result)
    989{
    990	if (separate_dirty_bits(cmd))
    991		return blocks_are_clean_separate_dirty(cmd, begin, end, result);
    992	else
    993		return blocks_are_clean_combined_dirty(cmd, begin, end, result);
    994}
    995
    996static bool cmd_write_lock(struct dm_cache_metadata *cmd)
    997{
    998	down_write(&cmd->root_lock);
    999	if (cmd->fail_io || dm_bm_is_read_only(cmd->bm)) {
   1000		up_write(&cmd->root_lock);
   1001		return false;
   1002	}
   1003	return true;
   1004}
   1005
   1006#define WRITE_LOCK(cmd)				\
   1007	do {					\
   1008		if (!cmd_write_lock((cmd)))	\
   1009			return -EINVAL;		\
   1010	} while(0)
   1011
   1012#define WRITE_LOCK_VOID(cmd)			\
   1013	do {					\
   1014		if (!cmd_write_lock((cmd)))	\
   1015			return;			\
   1016	} while(0)
   1017
   1018#define WRITE_UNLOCK(cmd) \
   1019	up_write(&(cmd)->root_lock)
   1020
   1021static bool cmd_read_lock(struct dm_cache_metadata *cmd)
   1022{
   1023	down_read(&cmd->root_lock);
   1024	if (cmd->fail_io) {
   1025		up_read(&cmd->root_lock);
   1026		return false;
   1027	}
   1028	return true;
   1029}
   1030
   1031#define READ_LOCK(cmd)				\
   1032	do {					\
   1033		if (!cmd_read_lock((cmd)))	\
   1034			return -EINVAL;		\
   1035	} while(0)
   1036
   1037#define READ_LOCK_VOID(cmd)			\
   1038	do {					\
   1039		if (!cmd_read_lock((cmd)))	\
   1040			return;			\
   1041	} while(0)
   1042
   1043#define READ_UNLOCK(cmd) \
   1044	up_read(&(cmd)->root_lock)
   1045
   1046int dm_cache_resize(struct dm_cache_metadata *cmd, dm_cblock_t new_cache_size)
   1047{
   1048	int r;
   1049	bool clean;
   1050	__le64 null_mapping = pack_value(0, 0);
   1051
   1052	WRITE_LOCK(cmd);
   1053	__dm_bless_for_disk(&null_mapping);
   1054
   1055	if (from_cblock(new_cache_size) < from_cblock(cmd->cache_blocks)) {
   1056		r = blocks_are_unmapped_or_clean(cmd, new_cache_size, cmd->cache_blocks, &clean);
   1057		if (r) {
   1058			__dm_unbless_for_disk(&null_mapping);
   1059			goto out;
   1060		}
   1061
   1062		if (!clean) {
   1063			DMERR("unable to shrink cache due to dirty blocks");
   1064			r = -EINVAL;
   1065			__dm_unbless_for_disk(&null_mapping);
   1066			goto out;
   1067		}
   1068	}
   1069
   1070	r = dm_array_resize(&cmd->info, cmd->root, from_cblock(cmd->cache_blocks),
   1071			    from_cblock(new_cache_size),
   1072			    &null_mapping, &cmd->root);
   1073	if (r)
   1074		goto out;
   1075
   1076	if (separate_dirty_bits(cmd)) {
   1077		r = dm_bitset_resize(&cmd->dirty_info, cmd->dirty_root,
   1078				     from_cblock(cmd->cache_blocks), from_cblock(new_cache_size),
   1079				     false, &cmd->dirty_root);
   1080		if (r)
   1081			goto out;
   1082	}
   1083
   1084	cmd->cache_blocks = new_cache_size;
   1085	cmd->changed = true;
   1086
   1087out:
   1088	WRITE_UNLOCK(cmd);
   1089
   1090	return r;
   1091}
   1092
   1093int dm_cache_discard_bitset_resize(struct dm_cache_metadata *cmd,
   1094				   sector_t discard_block_size,
   1095				   dm_dblock_t new_nr_entries)
   1096{
   1097	int r;
   1098
   1099	WRITE_LOCK(cmd);
   1100	r = dm_bitset_resize(&cmd->discard_info,
   1101			     cmd->discard_root,
   1102			     from_dblock(cmd->discard_nr_blocks),
   1103			     from_dblock(new_nr_entries),
   1104			     false, &cmd->discard_root);
   1105	if (!r) {
   1106		cmd->discard_block_size = discard_block_size;
   1107		cmd->discard_nr_blocks = new_nr_entries;
   1108	}
   1109
   1110	cmd->changed = true;
   1111	WRITE_UNLOCK(cmd);
   1112
   1113	return r;
   1114}
   1115
   1116static int __set_discard(struct dm_cache_metadata *cmd, dm_dblock_t b)
   1117{
   1118	return dm_bitset_set_bit(&cmd->discard_info, cmd->discard_root,
   1119				 from_dblock(b), &cmd->discard_root);
   1120}
   1121
   1122static int __clear_discard(struct dm_cache_metadata *cmd, dm_dblock_t b)
   1123{
   1124	return dm_bitset_clear_bit(&cmd->discard_info, cmd->discard_root,
   1125				   from_dblock(b), &cmd->discard_root);
   1126}
   1127
   1128static int __discard(struct dm_cache_metadata *cmd,
   1129		     dm_dblock_t dblock, bool discard)
   1130{
   1131	int r;
   1132
   1133	r = (discard ? __set_discard : __clear_discard)(cmd, dblock);
   1134	if (r)
   1135		return r;
   1136
   1137	cmd->changed = true;
   1138	return 0;
   1139}
   1140
   1141int dm_cache_set_discard(struct dm_cache_metadata *cmd,
   1142			 dm_dblock_t dblock, bool discard)
   1143{
   1144	int r;
   1145
   1146	WRITE_LOCK(cmd);
   1147	r = __discard(cmd, dblock, discard);
   1148	WRITE_UNLOCK(cmd);
   1149
   1150	return r;
   1151}
   1152
   1153static int __load_discards(struct dm_cache_metadata *cmd,
   1154			   load_discard_fn fn, void *context)
   1155{
   1156	int r = 0;
   1157	uint32_t b;
   1158	struct dm_bitset_cursor c;
   1159
   1160	if (from_dblock(cmd->discard_nr_blocks) == 0)
   1161		/* nothing to do */
   1162		return 0;
   1163
   1164	if (cmd->clean_when_opened) {
   1165		r = dm_bitset_flush(&cmd->discard_info, cmd->discard_root, &cmd->discard_root);
   1166		if (r)
   1167			return r;
   1168
   1169		r = dm_bitset_cursor_begin(&cmd->discard_info, cmd->discard_root,
   1170					   from_dblock(cmd->discard_nr_blocks), &c);
   1171		if (r)
   1172			return r;
   1173
   1174		for (b = 0; ; b++) {
   1175			r = fn(context, cmd->discard_block_size, to_dblock(b),
   1176			       dm_bitset_cursor_get_value(&c));
   1177			if (r)
   1178				break;
   1179
   1180			if (b >= (from_dblock(cmd->discard_nr_blocks) - 1))
   1181				break;
   1182
   1183			r = dm_bitset_cursor_next(&c);
   1184			if (r)
   1185				break;
   1186		}
   1187
   1188		dm_bitset_cursor_end(&c);
   1189
   1190	} else {
   1191		for (b = 0; b < from_dblock(cmd->discard_nr_blocks); b++) {
   1192			r = fn(context, cmd->discard_block_size, to_dblock(b), false);
   1193			if (r)
   1194				return r;
   1195		}
   1196	}
   1197
   1198	return r;
   1199}
   1200
   1201int dm_cache_load_discards(struct dm_cache_metadata *cmd,
   1202			   load_discard_fn fn, void *context)
   1203{
   1204	int r;
   1205
   1206	READ_LOCK(cmd);
   1207	r = __load_discards(cmd, fn, context);
   1208	READ_UNLOCK(cmd);
   1209
   1210	return r;
   1211}
   1212
   1213int dm_cache_size(struct dm_cache_metadata *cmd, dm_cblock_t *result)
   1214{
   1215	READ_LOCK(cmd);
   1216	*result = cmd->cache_blocks;
   1217	READ_UNLOCK(cmd);
   1218
   1219	return 0;
   1220}
   1221
   1222static int __remove(struct dm_cache_metadata *cmd, dm_cblock_t cblock)
   1223{
   1224	int r;
   1225	__le64 value = pack_value(0, 0);
   1226
   1227	__dm_bless_for_disk(&value);
   1228	r = dm_array_set_value(&cmd->info, cmd->root, from_cblock(cblock),
   1229			       &value, &cmd->root);
   1230	if (r)
   1231		return r;
   1232
   1233	cmd->changed = true;
   1234	return 0;
   1235}
   1236
   1237int dm_cache_remove_mapping(struct dm_cache_metadata *cmd, dm_cblock_t cblock)
   1238{
   1239	int r;
   1240
   1241	WRITE_LOCK(cmd);
   1242	r = __remove(cmd, cblock);
   1243	WRITE_UNLOCK(cmd);
   1244
   1245	return r;
   1246}
   1247
   1248static int __insert(struct dm_cache_metadata *cmd,
   1249		    dm_cblock_t cblock, dm_oblock_t oblock)
   1250{
   1251	int r;
   1252	__le64 value = pack_value(oblock, M_VALID);
   1253	__dm_bless_for_disk(&value);
   1254
   1255	r = dm_array_set_value(&cmd->info, cmd->root, from_cblock(cblock),
   1256			       &value, &cmd->root);
   1257	if (r)
   1258		return r;
   1259
   1260	cmd->changed = true;
   1261	return 0;
   1262}
   1263
   1264int dm_cache_insert_mapping(struct dm_cache_metadata *cmd,
   1265			    dm_cblock_t cblock, dm_oblock_t oblock)
   1266{
   1267	int r;
   1268
   1269	WRITE_LOCK(cmd);
   1270	r = __insert(cmd, cblock, oblock);
   1271	WRITE_UNLOCK(cmd);
   1272
   1273	return r;
   1274}
   1275
   1276struct thunk {
   1277	load_mapping_fn fn;
   1278	void *context;
   1279
   1280	struct dm_cache_metadata *cmd;
   1281	bool respect_dirty_flags;
   1282	bool hints_valid;
   1283};
   1284
   1285static bool policy_unchanged(struct dm_cache_metadata *cmd,
   1286			     struct dm_cache_policy *policy)
   1287{
   1288	const char *policy_name = dm_cache_policy_get_name(policy);
   1289	const unsigned *policy_version = dm_cache_policy_get_version(policy);
   1290	size_t policy_hint_size = dm_cache_policy_get_hint_size(policy);
   1291
   1292	/*
   1293	 * Ensure policy names match.
   1294	 */
   1295	if (strncmp(cmd->policy_name, policy_name, sizeof(cmd->policy_name)))
   1296		return false;
   1297
   1298	/*
   1299	 * Ensure policy major versions match.
   1300	 */
   1301	if (cmd->policy_version[0] != policy_version[0])
   1302		return false;
   1303
   1304	/*
   1305	 * Ensure policy hint sizes match.
   1306	 */
   1307	if (cmd->policy_hint_size != policy_hint_size)
   1308		return false;
   1309
   1310	return true;
   1311}
   1312
   1313static bool hints_array_initialized(struct dm_cache_metadata *cmd)
   1314{
   1315	return cmd->hint_root && cmd->policy_hint_size;
   1316}
   1317
   1318static bool hints_array_available(struct dm_cache_metadata *cmd,
   1319				  struct dm_cache_policy *policy)
   1320{
   1321	return cmd->clean_when_opened && policy_unchanged(cmd, policy) &&
   1322		hints_array_initialized(cmd);
   1323}
   1324
   1325static int __load_mapping_v1(struct dm_cache_metadata *cmd,
   1326			     uint64_t cb, bool hints_valid,
   1327			     struct dm_array_cursor *mapping_cursor,
   1328			     struct dm_array_cursor *hint_cursor,
   1329			     load_mapping_fn fn, void *context)
   1330{
   1331	int r = 0;
   1332
   1333	__le64 mapping;
   1334	__le32 hint = 0;
   1335
   1336	__le64 *mapping_value_le;
   1337	__le32 *hint_value_le;
   1338
   1339	dm_oblock_t oblock;
   1340	unsigned flags;
   1341	bool dirty = true;
   1342
   1343	dm_array_cursor_get_value(mapping_cursor, (void **) &mapping_value_le);
   1344	memcpy(&mapping, mapping_value_le, sizeof(mapping));
   1345	unpack_value(mapping, &oblock, &flags);
   1346
   1347	if (flags & M_VALID) {
   1348		if (hints_valid) {
   1349			dm_array_cursor_get_value(hint_cursor, (void **) &hint_value_le);
   1350			memcpy(&hint, hint_value_le, sizeof(hint));
   1351		}
   1352		if (cmd->clean_when_opened)
   1353			dirty = flags & M_DIRTY;
   1354
   1355		r = fn(context, oblock, to_cblock(cb), dirty,
   1356		       le32_to_cpu(hint), hints_valid);
   1357		if (r) {
   1358			DMERR("policy couldn't load cache block %llu",
   1359			      (unsigned long long) from_cblock(to_cblock(cb)));
   1360		}
   1361	}
   1362
   1363	return r;
   1364}
   1365
   1366static int __load_mapping_v2(struct dm_cache_metadata *cmd,
   1367			     uint64_t cb, bool hints_valid,
   1368			     struct dm_array_cursor *mapping_cursor,
   1369			     struct dm_array_cursor *hint_cursor,
   1370			     struct dm_bitset_cursor *dirty_cursor,
   1371			     load_mapping_fn fn, void *context)
   1372{
   1373	int r = 0;
   1374
   1375	__le64 mapping;
   1376	__le32 hint = 0;
   1377
   1378	__le64 *mapping_value_le;
   1379	__le32 *hint_value_le;
   1380
   1381	dm_oblock_t oblock;
   1382	unsigned flags;
   1383	bool dirty = true;
   1384
   1385	dm_array_cursor_get_value(mapping_cursor, (void **) &mapping_value_le);
   1386	memcpy(&mapping, mapping_value_le, sizeof(mapping));
   1387	unpack_value(mapping, &oblock, &flags);
   1388
   1389	if (flags & M_VALID) {
   1390		if (hints_valid) {
   1391			dm_array_cursor_get_value(hint_cursor, (void **) &hint_value_le);
   1392			memcpy(&hint, hint_value_le, sizeof(hint));
   1393		}
   1394		if (cmd->clean_when_opened)
   1395			dirty = dm_bitset_cursor_get_value(dirty_cursor);
   1396
   1397		r = fn(context, oblock, to_cblock(cb), dirty,
   1398		       le32_to_cpu(hint), hints_valid);
   1399		if (r) {
   1400			DMERR("policy couldn't load cache block %llu",
   1401			      (unsigned long long) from_cblock(to_cblock(cb)));
   1402		}
   1403	}
   1404
   1405	return r;
   1406}
   1407
   1408static int __load_mappings(struct dm_cache_metadata *cmd,
   1409			   struct dm_cache_policy *policy,
   1410			   load_mapping_fn fn, void *context)
   1411{
   1412	int r;
   1413	uint64_t cb;
   1414
   1415	bool hints_valid = hints_array_available(cmd, policy);
   1416
   1417	if (from_cblock(cmd->cache_blocks) == 0)
   1418		/* Nothing to do */
   1419		return 0;
   1420
   1421	r = dm_array_cursor_begin(&cmd->info, cmd->root, &cmd->mapping_cursor);
   1422	if (r)
   1423		return r;
   1424
   1425	if (hints_valid) {
   1426		r = dm_array_cursor_begin(&cmd->hint_info, cmd->hint_root, &cmd->hint_cursor);
   1427		if (r) {
   1428			dm_array_cursor_end(&cmd->mapping_cursor);
   1429			return r;
   1430		}
   1431	}
   1432
   1433	if (separate_dirty_bits(cmd)) {
   1434		r = dm_bitset_cursor_begin(&cmd->dirty_info, cmd->dirty_root,
   1435					   from_cblock(cmd->cache_blocks),
   1436					   &cmd->dirty_cursor);
   1437		if (r) {
   1438			dm_array_cursor_end(&cmd->hint_cursor);
   1439			dm_array_cursor_end(&cmd->mapping_cursor);
   1440			return r;
   1441		}
   1442	}
   1443
   1444	for (cb = 0; ; cb++) {
   1445		if (separate_dirty_bits(cmd))
   1446			r = __load_mapping_v2(cmd, cb, hints_valid,
   1447					      &cmd->mapping_cursor,
   1448					      &cmd->hint_cursor,
   1449					      &cmd->dirty_cursor,
   1450					      fn, context);
   1451		else
   1452			r = __load_mapping_v1(cmd, cb, hints_valid,
   1453					      &cmd->mapping_cursor, &cmd->hint_cursor,
   1454					      fn, context);
   1455		if (r)
   1456			goto out;
   1457
   1458		/*
   1459		 * We need to break out before we move the cursors.
   1460		 */
   1461		if (cb >= (from_cblock(cmd->cache_blocks) - 1))
   1462			break;
   1463
   1464		r = dm_array_cursor_next(&cmd->mapping_cursor);
   1465		if (r) {
   1466			DMERR("dm_array_cursor_next for mapping failed");
   1467			goto out;
   1468		}
   1469
   1470		if (hints_valid) {
   1471			r = dm_array_cursor_next(&cmd->hint_cursor);
   1472			if (r) {
   1473				dm_array_cursor_end(&cmd->hint_cursor);
   1474				hints_valid = false;
   1475			}
   1476		}
   1477
   1478		if (separate_dirty_bits(cmd)) {
   1479			r = dm_bitset_cursor_next(&cmd->dirty_cursor);
   1480			if (r) {
   1481				DMERR("dm_bitset_cursor_next for dirty failed");
   1482				goto out;
   1483			}
   1484		}
   1485	}
   1486out:
   1487	dm_array_cursor_end(&cmd->mapping_cursor);
   1488	if (hints_valid)
   1489		dm_array_cursor_end(&cmd->hint_cursor);
   1490
   1491	if (separate_dirty_bits(cmd))
   1492		dm_bitset_cursor_end(&cmd->dirty_cursor);
   1493
   1494	return r;
   1495}
   1496
   1497int dm_cache_load_mappings(struct dm_cache_metadata *cmd,
   1498			   struct dm_cache_policy *policy,
   1499			   load_mapping_fn fn, void *context)
   1500{
   1501	int r;
   1502
   1503	READ_LOCK(cmd);
   1504	r = __load_mappings(cmd, policy, fn, context);
   1505	READ_UNLOCK(cmd);
   1506
   1507	return r;
   1508}
   1509
   1510static int __dump_mapping(void *context, uint64_t cblock, void *leaf)
   1511{
   1512	__le64 value;
   1513	dm_oblock_t oblock;
   1514	unsigned flags;
   1515
   1516	memcpy(&value, leaf, sizeof(value));
   1517	unpack_value(value, &oblock, &flags);
   1518
   1519	return 0;
   1520}
   1521
   1522static int __dump_mappings(struct dm_cache_metadata *cmd)
   1523{
   1524	return dm_array_walk(&cmd->info, cmd->root, __dump_mapping, NULL);
   1525}
   1526
   1527void dm_cache_dump(struct dm_cache_metadata *cmd)
   1528{
   1529	READ_LOCK_VOID(cmd);
   1530	__dump_mappings(cmd);
   1531	READ_UNLOCK(cmd);
   1532}
   1533
   1534int dm_cache_changed_this_transaction(struct dm_cache_metadata *cmd)
   1535{
   1536	int r;
   1537
   1538	READ_LOCK(cmd);
   1539	r = cmd->changed;
   1540	READ_UNLOCK(cmd);
   1541
   1542	return r;
   1543}
   1544
   1545static int __dirty(struct dm_cache_metadata *cmd, dm_cblock_t cblock, bool dirty)
   1546{
   1547	int r;
   1548	unsigned flags;
   1549	dm_oblock_t oblock;
   1550	__le64 value;
   1551
   1552	r = dm_array_get_value(&cmd->info, cmd->root, from_cblock(cblock), &value);
   1553	if (r)
   1554		return r;
   1555
   1556	unpack_value(value, &oblock, &flags);
   1557
   1558	if (((flags & M_DIRTY) && dirty) || (!(flags & M_DIRTY) && !dirty))
   1559		/* nothing to be done */
   1560		return 0;
   1561
   1562	value = pack_value(oblock, (flags & ~M_DIRTY) | (dirty ? M_DIRTY : 0));
   1563	__dm_bless_for_disk(&value);
   1564
   1565	r = dm_array_set_value(&cmd->info, cmd->root, from_cblock(cblock),
   1566			       &value, &cmd->root);
   1567	if (r)
   1568		return r;
   1569
   1570	cmd->changed = true;
   1571	return 0;
   1572
   1573}
   1574
   1575static int __set_dirty_bits_v1(struct dm_cache_metadata *cmd, unsigned nr_bits, unsigned long *bits)
   1576{
   1577	int r;
   1578	unsigned i;
   1579	for (i = 0; i < nr_bits; i++) {
   1580		r = __dirty(cmd, to_cblock(i), test_bit(i, bits));
   1581		if (r)
   1582			return r;
   1583	}
   1584
   1585	return 0;
   1586}
   1587
   1588static int is_dirty_callback(uint32_t index, bool *value, void *context)
   1589{
   1590	unsigned long *bits = context;
   1591	*value = test_bit(index, bits);
   1592	return 0;
   1593}
   1594
   1595static int __set_dirty_bits_v2(struct dm_cache_metadata *cmd, unsigned nr_bits, unsigned long *bits)
   1596{
   1597	int r = 0;
   1598
   1599	/* nr_bits is really just a sanity check */
   1600	if (nr_bits != from_cblock(cmd->cache_blocks)) {
   1601		DMERR("dirty bitset is wrong size");
   1602		return -EINVAL;
   1603	}
   1604
   1605	r = dm_bitset_del(&cmd->dirty_info, cmd->dirty_root);
   1606	if (r)
   1607		return r;
   1608
   1609	cmd->changed = true;
   1610	return dm_bitset_new(&cmd->dirty_info, &cmd->dirty_root, nr_bits, is_dirty_callback, bits);
   1611}
   1612
   1613int dm_cache_set_dirty_bits(struct dm_cache_metadata *cmd,
   1614			    unsigned nr_bits,
   1615			    unsigned long *bits)
   1616{
   1617	int r;
   1618
   1619	WRITE_LOCK(cmd);
   1620	if (separate_dirty_bits(cmd))
   1621		r = __set_dirty_bits_v2(cmd, nr_bits, bits);
   1622	else
   1623		r = __set_dirty_bits_v1(cmd, nr_bits, bits);
   1624	WRITE_UNLOCK(cmd);
   1625
   1626	return r;
   1627}
   1628
   1629void dm_cache_metadata_get_stats(struct dm_cache_metadata *cmd,
   1630				 struct dm_cache_statistics *stats)
   1631{
   1632	READ_LOCK_VOID(cmd);
   1633	*stats = cmd->stats;
   1634	READ_UNLOCK(cmd);
   1635}
   1636
   1637void dm_cache_metadata_set_stats(struct dm_cache_metadata *cmd,
   1638				 struct dm_cache_statistics *stats)
   1639{
   1640	WRITE_LOCK_VOID(cmd);
   1641	cmd->stats = *stats;
   1642	WRITE_UNLOCK(cmd);
   1643}
   1644
   1645int dm_cache_commit(struct dm_cache_metadata *cmd, bool clean_shutdown)
   1646{
   1647	int r = -EINVAL;
   1648	flags_mutator mutator = (clean_shutdown ? set_clean_shutdown :
   1649				 clear_clean_shutdown);
   1650
   1651	WRITE_LOCK(cmd);
   1652	if (cmd->fail_io)
   1653		goto out;
   1654
   1655	r = __commit_transaction(cmd, mutator);
   1656	if (r)
   1657		goto out;
   1658
   1659	r = __begin_transaction(cmd);
   1660out:
   1661	WRITE_UNLOCK(cmd);
   1662	return r;
   1663}
   1664
   1665int dm_cache_get_free_metadata_block_count(struct dm_cache_metadata *cmd,
   1666					   dm_block_t *result)
   1667{
   1668	int r = -EINVAL;
   1669
   1670	READ_LOCK(cmd);
   1671	if (!cmd->fail_io)
   1672		r = dm_sm_get_nr_free(cmd->metadata_sm, result);
   1673	READ_UNLOCK(cmd);
   1674
   1675	return r;
   1676}
   1677
   1678int dm_cache_get_metadata_dev_size(struct dm_cache_metadata *cmd,
   1679				   dm_block_t *result)
   1680{
   1681	int r = -EINVAL;
   1682
   1683	READ_LOCK(cmd);
   1684	if (!cmd->fail_io)
   1685		r = dm_sm_get_nr_blocks(cmd->metadata_sm, result);
   1686	READ_UNLOCK(cmd);
   1687
   1688	return r;
   1689}
   1690
   1691/*----------------------------------------------------------------*/
   1692
   1693static int get_hint(uint32_t index, void *value_le, void *context)
   1694{
   1695	uint32_t value;
   1696	struct dm_cache_policy *policy = context;
   1697
   1698	value = policy_get_hint(policy, to_cblock(index));
   1699	*((__le32 *) value_le) = cpu_to_le32(value);
   1700
   1701	return 0;
   1702}
   1703
   1704/*
   1705 * It's quicker to always delete the hint array, and recreate with
   1706 * dm_array_new().
   1707 */
   1708static int write_hints(struct dm_cache_metadata *cmd, struct dm_cache_policy *policy)
   1709{
   1710	int r;
   1711	size_t hint_size;
   1712	const char *policy_name = dm_cache_policy_get_name(policy);
   1713	const unsigned *policy_version = dm_cache_policy_get_version(policy);
   1714
   1715	if (!policy_name[0] ||
   1716	    (strlen(policy_name) > sizeof(cmd->policy_name) - 1))
   1717		return -EINVAL;
   1718
   1719	strncpy(cmd->policy_name, policy_name, sizeof(cmd->policy_name));
   1720	memcpy(cmd->policy_version, policy_version, sizeof(cmd->policy_version));
   1721
   1722	hint_size = dm_cache_policy_get_hint_size(policy);
   1723	if (!hint_size)
   1724		return 0; /* short-circuit hints initialization */
   1725	cmd->policy_hint_size = hint_size;
   1726
   1727	if (cmd->hint_root) {
   1728		r = dm_array_del(&cmd->hint_info, cmd->hint_root);
   1729		if (r)
   1730			return r;
   1731	}
   1732
   1733	return dm_array_new(&cmd->hint_info, &cmd->hint_root,
   1734			    from_cblock(cmd->cache_blocks),
   1735			    get_hint, policy);
   1736}
   1737
   1738int dm_cache_write_hints(struct dm_cache_metadata *cmd, struct dm_cache_policy *policy)
   1739{
   1740	int r;
   1741
   1742	WRITE_LOCK(cmd);
   1743	r = write_hints(cmd, policy);
   1744	WRITE_UNLOCK(cmd);
   1745
   1746	return r;
   1747}
   1748
   1749int dm_cache_metadata_all_clean(struct dm_cache_metadata *cmd, bool *result)
   1750{
   1751	int r;
   1752
   1753	READ_LOCK(cmd);
   1754	r = blocks_are_unmapped_or_clean(cmd, 0, cmd->cache_blocks, result);
   1755	READ_UNLOCK(cmd);
   1756
   1757	return r;
   1758}
   1759
   1760void dm_cache_metadata_set_read_only(struct dm_cache_metadata *cmd)
   1761{
   1762	WRITE_LOCK_VOID(cmd);
   1763	dm_bm_set_read_only(cmd->bm);
   1764	WRITE_UNLOCK(cmd);
   1765}
   1766
   1767void dm_cache_metadata_set_read_write(struct dm_cache_metadata *cmd)
   1768{
   1769	WRITE_LOCK_VOID(cmd);
   1770	dm_bm_set_read_write(cmd->bm);
   1771	WRITE_UNLOCK(cmd);
   1772}
   1773
   1774int dm_cache_metadata_set_needs_check(struct dm_cache_metadata *cmd)
   1775{
   1776	int r;
   1777	struct dm_block *sblock;
   1778	struct cache_disk_superblock *disk_super;
   1779
   1780	WRITE_LOCK(cmd);
   1781	set_bit(NEEDS_CHECK, &cmd->flags);
   1782
   1783	r = superblock_lock(cmd, &sblock);
   1784	if (r) {
   1785		DMERR("couldn't read superblock");
   1786		goto out;
   1787	}
   1788
   1789	disk_super = dm_block_data(sblock);
   1790	disk_super->flags = cpu_to_le32(cmd->flags);
   1791
   1792	dm_bm_unlock(sblock);
   1793
   1794out:
   1795	WRITE_UNLOCK(cmd);
   1796	return r;
   1797}
   1798
   1799int dm_cache_metadata_needs_check(struct dm_cache_metadata *cmd, bool *result)
   1800{
   1801	READ_LOCK(cmd);
   1802	*result = !!test_bit(NEEDS_CHECK, &cmd->flags);
   1803	READ_UNLOCK(cmd);
   1804
   1805	return 0;
   1806}
   1807
   1808int dm_cache_metadata_abort(struct dm_cache_metadata *cmd)
   1809{
   1810	int r;
   1811
   1812	WRITE_LOCK(cmd);
   1813	__destroy_persistent_data_objects(cmd);
   1814	r = __create_persistent_data_objects(cmd, false);
   1815	if (r)
   1816		cmd->fail_io = true;
   1817	WRITE_UNLOCK(cmd);
   1818
   1819	return r;
   1820}