cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

dm-space-map-common.c (28076B)


      1/*
      2 * Copyright (C) 2011 Red Hat, Inc.
      3 *
      4 * This file is released under the GPL.
      5 */
      6
      7#include "dm-space-map-common.h"
      8#include "dm-transaction-manager.h"
      9#include "dm-btree-internal.h"
     10#include "dm-persistent-data-internal.h"
     11
     12#include <linux/bitops.h>
     13#include <linux/device-mapper.h>
     14
     15#define DM_MSG_PREFIX "space map common"
     16
     17/*----------------------------------------------------------------*/
     18
     19/*
     20 * Index validator.
     21 */
     22#define INDEX_CSUM_XOR 160478
     23
     24static void index_prepare_for_write(struct dm_block_validator *v,
     25				    struct dm_block *b,
     26				    size_t block_size)
     27{
     28	struct disk_metadata_index *mi_le = dm_block_data(b);
     29
     30	mi_le->blocknr = cpu_to_le64(dm_block_location(b));
     31	mi_le->csum = cpu_to_le32(dm_bm_checksum(&mi_le->padding,
     32						 block_size - sizeof(__le32),
     33						 INDEX_CSUM_XOR));
     34}
     35
     36static int index_check(struct dm_block_validator *v,
     37		       struct dm_block *b,
     38		       size_t block_size)
     39{
     40	struct disk_metadata_index *mi_le = dm_block_data(b);
     41	__le32 csum_disk;
     42
     43	if (dm_block_location(b) != le64_to_cpu(mi_le->blocknr)) {
     44		DMERR_LIMIT("index_check failed: blocknr %llu != wanted %llu",
     45			    le64_to_cpu(mi_le->blocknr), dm_block_location(b));
     46		return -ENOTBLK;
     47	}
     48
     49	csum_disk = cpu_to_le32(dm_bm_checksum(&mi_le->padding,
     50					       block_size - sizeof(__le32),
     51					       INDEX_CSUM_XOR));
     52	if (csum_disk != mi_le->csum) {
     53		DMERR_LIMIT("index_check failed: csum %u != wanted %u",
     54			    le32_to_cpu(csum_disk), le32_to_cpu(mi_le->csum));
     55		return -EILSEQ;
     56	}
     57
     58	return 0;
     59}
     60
     61static struct dm_block_validator index_validator = {
     62	.name = "index",
     63	.prepare_for_write = index_prepare_for_write,
     64	.check = index_check
     65};
     66
     67/*----------------------------------------------------------------*/
     68
     69/*
     70 * Bitmap validator
     71 */
     72#define BITMAP_CSUM_XOR 240779
     73
     74static void dm_bitmap_prepare_for_write(struct dm_block_validator *v,
     75					struct dm_block *b,
     76					size_t block_size)
     77{
     78	struct disk_bitmap_header *disk_header = dm_block_data(b);
     79
     80	disk_header->blocknr = cpu_to_le64(dm_block_location(b));
     81	disk_header->csum = cpu_to_le32(dm_bm_checksum(&disk_header->not_used,
     82						       block_size - sizeof(__le32),
     83						       BITMAP_CSUM_XOR));
     84}
     85
     86static int dm_bitmap_check(struct dm_block_validator *v,
     87			   struct dm_block *b,
     88			   size_t block_size)
     89{
     90	struct disk_bitmap_header *disk_header = dm_block_data(b);
     91	__le32 csum_disk;
     92
     93	if (dm_block_location(b) != le64_to_cpu(disk_header->blocknr)) {
     94		DMERR_LIMIT("bitmap check failed: blocknr %llu != wanted %llu",
     95			    le64_to_cpu(disk_header->blocknr), dm_block_location(b));
     96		return -ENOTBLK;
     97	}
     98
     99	csum_disk = cpu_to_le32(dm_bm_checksum(&disk_header->not_used,
    100					       block_size - sizeof(__le32),
    101					       BITMAP_CSUM_XOR));
    102	if (csum_disk != disk_header->csum) {
    103		DMERR_LIMIT("bitmap check failed: csum %u != wanted %u",
    104			    le32_to_cpu(csum_disk), le32_to_cpu(disk_header->csum));
    105		return -EILSEQ;
    106	}
    107
    108	return 0;
    109}
    110
    111static struct dm_block_validator dm_sm_bitmap_validator = {
    112	.name = "sm_bitmap",
    113	.prepare_for_write = dm_bitmap_prepare_for_write,
    114	.check = dm_bitmap_check,
    115};
    116
    117/*----------------------------------------------------------------*/
    118
    119#define ENTRIES_PER_WORD 32
    120#define ENTRIES_SHIFT	5
    121
    122static void *dm_bitmap_data(struct dm_block *b)
    123{
    124	return dm_block_data(b) + sizeof(struct disk_bitmap_header);
    125}
    126
    127#define WORD_MASK_HIGH 0xAAAAAAAAAAAAAAAAULL
    128
    129static unsigned dm_bitmap_word_used(void *addr, unsigned b)
    130{
    131	__le64 *words_le = addr;
    132	__le64 *w_le = words_le + (b >> ENTRIES_SHIFT);
    133
    134	uint64_t bits = le64_to_cpu(*w_le);
    135	uint64_t mask = (bits + WORD_MASK_HIGH + 1) & WORD_MASK_HIGH;
    136
    137	return !(~bits & mask);
    138}
    139
    140static unsigned sm_lookup_bitmap(void *addr, unsigned b)
    141{
    142	__le64 *words_le = addr;
    143	__le64 *w_le = words_le + (b >> ENTRIES_SHIFT);
    144	unsigned hi, lo;
    145
    146	b = (b & (ENTRIES_PER_WORD - 1)) << 1;
    147	hi = !!test_bit_le(b, (void *) w_le);
    148	lo = !!test_bit_le(b + 1, (void *) w_le);
    149	return (hi << 1) | lo;
    150}
    151
    152static void sm_set_bitmap(void *addr, unsigned b, unsigned val)
    153{
    154	__le64 *words_le = addr;
    155	__le64 *w_le = words_le + (b >> ENTRIES_SHIFT);
    156
    157	b = (b & (ENTRIES_PER_WORD - 1)) << 1;
    158
    159	if (val & 2)
    160		__set_bit_le(b, (void *) w_le);
    161	else
    162		__clear_bit_le(b, (void *) w_le);
    163
    164	if (val & 1)
    165		__set_bit_le(b + 1, (void *) w_le);
    166	else
    167		__clear_bit_le(b + 1, (void *) w_le);
    168}
    169
    170static int sm_find_free(void *addr, unsigned begin, unsigned end,
    171			unsigned *result)
    172{
    173	while (begin < end) {
    174		if (!(begin & (ENTRIES_PER_WORD - 1)) &&
    175		    dm_bitmap_word_used(addr, begin)) {
    176			begin += ENTRIES_PER_WORD;
    177			continue;
    178		}
    179
    180		if (!sm_lookup_bitmap(addr, begin)) {
    181			*result = begin;
    182			return 0;
    183		}
    184
    185		begin++;
    186	}
    187
    188	return -ENOSPC;
    189}
    190
    191/*----------------------------------------------------------------*/
    192
    193static int sm_ll_init(struct ll_disk *ll, struct dm_transaction_manager *tm)
    194{
    195	memset(ll, 0, sizeof(struct ll_disk));
    196
    197	ll->tm = tm;
    198
    199	ll->bitmap_info.tm = tm;
    200	ll->bitmap_info.levels = 1;
    201
    202	/*
    203	 * Because the new bitmap blocks are created via a shadow
    204	 * operation, the old entry has already had its reference count
    205	 * decremented and we don't need the btree to do any bookkeeping.
    206	 */
    207	ll->bitmap_info.value_type.size = sizeof(struct disk_index_entry);
    208	ll->bitmap_info.value_type.inc = NULL;
    209	ll->bitmap_info.value_type.dec = NULL;
    210	ll->bitmap_info.value_type.equal = NULL;
    211
    212	ll->ref_count_info.tm = tm;
    213	ll->ref_count_info.levels = 1;
    214	ll->ref_count_info.value_type.size = sizeof(uint32_t);
    215	ll->ref_count_info.value_type.inc = NULL;
    216	ll->ref_count_info.value_type.dec = NULL;
    217	ll->ref_count_info.value_type.equal = NULL;
    218
    219	ll->block_size = dm_bm_block_size(dm_tm_get_bm(tm));
    220
    221	if (ll->block_size > (1 << 30)) {
    222		DMERR("block size too big to hold bitmaps");
    223		return -EINVAL;
    224	}
    225
    226	ll->entries_per_block = (ll->block_size - sizeof(struct disk_bitmap_header)) *
    227		ENTRIES_PER_BYTE;
    228	ll->nr_blocks = 0;
    229	ll->bitmap_root = 0;
    230	ll->ref_count_root = 0;
    231	ll->bitmap_index_changed = false;
    232
    233	return 0;
    234}
    235
    236int sm_ll_extend(struct ll_disk *ll, dm_block_t extra_blocks)
    237{
    238	int r;
    239	dm_block_t i, nr_blocks, nr_indexes;
    240	unsigned old_blocks, blocks;
    241
    242	nr_blocks = ll->nr_blocks + extra_blocks;
    243	old_blocks = dm_sector_div_up(ll->nr_blocks, ll->entries_per_block);
    244	blocks = dm_sector_div_up(nr_blocks, ll->entries_per_block);
    245
    246	nr_indexes = dm_sector_div_up(nr_blocks, ll->entries_per_block);
    247	if (nr_indexes > ll->max_entries(ll)) {
    248		DMERR("space map too large");
    249		return -EINVAL;
    250	}
    251
    252	/*
    253	 * We need to set this before the dm_tm_new_block() call below.
    254	 */
    255	ll->nr_blocks = nr_blocks;
    256	for (i = old_blocks; i < blocks; i++) {
    257		struct dm_block *b;
    258		struct disk_index_entry idx;
    259
    260		r = dm_tm_new_block(ll->tm, &dm_sm_bitmap_validator, &b);
    261		if (r < 0)
    262			return r;
    263
    264		idx.blocknr = cpu_to_le64(dm_block_location(b));
    265
    266		dm_tm_unlock(ll->tm, b);
    267
    268		idx.nr_free = cpu_to_le32(ll->entries_per_block);
    269		idx.none_free_before = 0;
    270
    271		r = ll->save_ie(ll, i, &idx);
    272		if (r < 0)
    273			return r;
    274	}
    275
    276	return 0;
    277}
    278
    279int sm_ll_lookup_bitmap(struct ll_disk *ll, dm_block_t b, uint32_t *result)
    280{
    281	int r;
    282	dm_block_t index = b;
    283	struct disk_index_entry ie_disk;
    284	struct dm_block *blk;
    285
    286	if (b >= ll->nr_blocks) {
    287		DMERR_LIMIT("metadata block out of bounds");
    288		return -EINVAL;
    289	}
    290
    291	b = do_div(index, ll->entries_per_block);
    292	r = ll->load_ie(ll, index, &ie_disk);
    293	if (r < 0)
    294		return r;
    295
    296	r = dm_tm_read_lock(ll->tm, le64_to_cpu(ie_disk.blocknr),
    297			    &dm_sm_bitmap_validator, &blk);
    298	if (r < 0)
    299		return r;
    300
    301	*result = sm_lookup_bitmap(dm_bitmap_data(blk), b);
    302
    303	dm_tm_unlock(ll->tm, blk);
    304
    305	return 0;
    306}
    307
    308static int sm_ll_lookup_big_ref_count(struct ll_disk *ll, dm_block_t b,
    309				      uint32_t *result)
    310{
    311	__le32 le_rc;
    312	int r;
    313
    314	r = dm_btree_lookup(&ll->ref_count_info, ll->ref_count_root, &b, &le_rc);
    315	if (r < 0)
    316		return r;
    317
    318	*result = le32_to_cpu(le_rc);
    319
    320	return r;
    321}
    322
    323int sm_ll_lookup(struct ll_disk *ll, dm_block_t b, uint32_t *result)
    324{
    325	int r = sm_ll_lookup_bitmap(ll, b, result);
    326
    327	if (r)
    328		return r;
    329
    330	if (*result != 3)
    331		return r;
    332
    333	return sm_ll_lookup_big_ref_count(ll, b, result);
    334}
    335
    336int sm_ll_find_free_block(struct ll_disk *ll, dm_block_t begin,
    337			  dm_block_t end, dm_block_t *result)
    338{
    339	int r;
    340	struct disk_index_entry ie_disk;
    341	dm_block_t i, index_begin = begin;
    342	dm_block_t index_end = dm_sector_div_up(end, ll->entries_per_block);
    343
    344	/*
    345	 * FIXME: Use shifts
    346	 */
    347	begin = do_div(index_begin, ll->entries_per_block);
    348	end = do_div(end, ll->entries_per_block);
    349	if (end == 0)
    350		end = ll->entries_per_block;
    351
    352	for (i = index_begin; i < index_end; i++, begin = 0) {
    353		struct dm_block *blk;
    354		unsigned position;
    355		uint32_t bit_end;
    356
    357		r = ll->load_ie(ll, i, &ie_disk);
    358		if (r < 0)
    359			return r;
    360
    361		if (le32_to_cpu(ie_disk.nr_free) == 0)
    362			continue;
    363
    364		r = dm_tm_read_lock(ll->tm, le64_to_cpu(ie_disk.blocknr),
    365				    &dm_sm_bitmap_validator, &blk);
    366		if (r < 0)
    367			return r;
    368
    369		bit_end = (i == index_end - 1) ?  end : ll->entries_per_block;
    370
    371		r = sm_find_free(dm_bitmap_data(blk),
    372				 max_t(unsigned, begin, le32_to_cpu(ie_disk.none_free_before)),
    373				 bit_end, &position);
    374		if (r == -ENOSPC) {
    375			/*
    376			 * This might happen because we started searching
    377			 * part way through the bitmap.
    378			 */
    379			dm_tm_unlock(ll->tm, blk);
    380			continue;
    381		}
    382
    383		dm_tm_unlock(ll->tm, blk);
    384
    385		*result = i * ll->entries_per_block + (dm_block_t) position;
    386		return 0;
    387	}
    388
    389	return -ENOSPC;
    390}
    391
    392int sm_ll_find_common_free_block(struct ll_disk *old_ll, struct ll_disk *new_ll,
    393	                         dm_block_t begin, dm_block_t end, dm_block_t *b)
    394{
    395	int r;
    396	uint32_t count;
    397
    398	do {
    399		r = sm_ll_find_free_block(new_ll, begin, new_ll->nr_blocks, b);
    400		if (r)
    401			break;
    402
    403		/* double check this block wasn't used in the old transaction */
    404		if (*b >= old_ll->nr_blocks)
    405			count = 0;
    406		else {
    407			r = sm_ll_lookup(old_ll, *b, &count);
    408			if (r)
    409				break;
    410
    411			if (count)
    412				begin = *b + 1;
    413		}
    414	} while (count);
    415
    416	return r;
    417}
    418
    419/*----------------------------------------------------------------*/
    420
    421int sm_ll_insert(struct ll_disk *ll, dm_block_t b,
    422		 uint32_t ref_count, int32_t *nr_allocations)
    423{
    424	int r;
    425	uint32_t bit, old;
    426	struct dm_block *nb;
    427	dm_block_t index = b;
    428	struct disk_index_entry ie_disk;
    429	void *bm_le;
    430	int inc;
    431
    432	bit = do_div(index, ll->entries_per_block);
    433	r = ll->load_ie(ll, index, &ie_disk);
    434	if (r < 0)
    435		return r;
    436
    437	r = dm_tm_shadow_block(ll->tm, le64_to_cpu(ie_disk.blocknr),
    438			       &dm_sm_bitmap_validator, &nb, &inc);
    439	if (r < 0) {
    440		DMERR("dm_tm_shadow_block() failed");
    441		return r;
    442	}
    443	ie_disk.blocknr = cpu_to_le64(dm_block_location(nb));
    444	bm_le = dm_bitmap_data(nb);
    445
    446	old = sm_lookup_bitmap(bm_le, bit);
    447	if (old > 2) {
    448		r = sm_ll_lookup_big_ref_count(ll, b, &old);
    449		if (r < 0) {
    450			dm_tm_unlock(ll->tm, nb);
    451			return r;
    452		}
    453	}
    454
    455	if (r) {
    456		dm_tm_unlock(ll->tm, nb);
    457		return r;
    458	}
    459
    460	if (ref_count <= 2) {
    461		sm_set_bitmap(bm_le, bit, ref_count);
    462		dm_tm_unlock(ll->tm, nb);
    463
    464		if (old > 2) {
    465			r = dm_btree_remove(&ll->ref_count_info,
    466					    ll->ref_count_root,
    467					    &b, &ll->ref_count_root);
    468			if (r)
    469				return r;
    470		}
    471
    472	} else {
    473		__le32 le_rc = cpu_to_le32(ref_count);
    474
    475		sm_set_bitmap(bm_le, bit, 3);
    476		dm_tm_unlock(ll->tm, nb);
    477
    478		__dm_bless_for_disk(&le_rc);
    479		r = dm_btree_insert(&ll->ref_count_info, ll->ref_count_root,
    480				    &b, &le_rc, &ll->ref_count_root);
    481		if (r < 0) {
    482			DMERR("ref count insert failed");
    483			return r;
    484		}
    485	}
    486
    487	if (ref_count && !old) {
    488		*nr_allocations = 1;
    489		ll->nr_allocated++;
    490		le32_add_cpu(&ie_disk.nr_free, -1);
    491		if (le32_to_cpu(ie_disk.none_free_before) == bit)
    492			ie_disk.none_free_before = cpu_to_le32(bit + 1);
    493
    494	} else if (old && !ref_count) {
    495		*nr_allocations = -1;
    496		ll->nr_allocated--;
    497		le32_add_cpu(&ie_disk.nr_free, 1);
    498		ie_disk.none_free_before = cpu_to_le32(min(le32_to_cpu(ie_disk.none_free_before), bit));
    499	} else
    500		*nr_allocations = 0;
    501
    502	return ll->save_ie(ll, index, &ie_disk);
    503}
    504
    505/*----------------------------------------------------------------*/
    506
    507/*
    508 * Holds useful intermediate results for the range based inc and dec
    509 * operations.
    510 */
    511struct inc_context {
    512	struct disk_index_entry ie_disk;
    513	struct dm_block *bitmap_block;
    514	void *bitmap;
    515
    516	struct dm_block *overflow_leaf;
    517};
    518
    519static inline void init_inc_context(struct inc_context *ic)
    520{
    521	ic->bitmap_block = NULL;
    522	ic->bitmap = NULL;
    523	ic->overflow_leaf = NULL;
    524}
    525
    526static inline void exit_inc_context(struct ll_disk *ll, struct inc_context *ic)
    527{
    528	if (ic->bitmap_block)
    529		dm_tm_unlock(ll->tm, ic->bitmap_block);
    530	if (ic->overflow_leaf)
    531		dm_tm_unlock(ll->tm, ic->overflow_leaf);
    532}
    533
    534static inline void reset_inc_context(struct ll_disk *ll, struct inc_context *ic)
    535{
    536	exit_inc_context(ll, ic);
    537	init_inc_context(ic);
    538}
    539
    540/*
    541 * Confirms a btree node contains a particular key at an index.
    542 */
    543static bool contains_key(struct btree_node *n, uint64_t key, int index)
    544{
    545	return index >= 0 &&
    546		index < le32_to_cpu(n->header.nr_entries) &&
    547		le64_to_cpu(n->keys[index]) == key;
    548}
    549
    550static int __sm_ll_inc_overflow(struct ll_disk *ll, dm_block_t b, struct inc_context *ic)
    551{
    552	int r;
    553	int index;
    554	struct btree_node *n;
    555	__le32 *v_ptr;
    556	uint32_t rc;
    557
    558	/*
    559	 * bitmap_block needs to be unlocked because getting the
    560	 * overflow_leaf may need to allocate, and thus use the space map.
    561	 */
    562	reset_inc_context(ll, ic);
    563
    564	r = btree_get_overwrite_leaf(&ll->ref_count_info, ll->ref_count_root,
    565				     b, &index, &ll->ref_count_root, &ic->overflow_leaf);
    566	if (r < 0)
    567		return r;
    568
    569	n = dm_block_data(ic->overflow_leaf);
    570
    571	if (!contains_key(n, b, index)) {
    572		DMERR("overflow btree is missing an entry");
    573		return -EINVAL;
    574	}
    575
    576	v_ptr = value_ptr(n, index);
    577	rc = le32_to_cpu(*v_ptr) + 1;
    578	*v_ptr = cpu_to_le32(rc);
    579
    580	return 0;
    581}
    582
    583static int sm_ll_inc_overflow(struct ll_disk *ll, dm_block_t b, struct inc_context *ic)
    584{
    585	int index;
    586	struct btree_node *n;
    587	__le32 *v_ptr;
    588	uint32_t rc;
    589
    590	/*
    591	 * Do we already have the correct overflow leaf?
    592	 */
    593	if (ic->overflow_leaf) {
    594		n = dm_block_data(ic->overflow_leaf);
    595		index = lower_bound(n, b);
    596		if (contains_key(n, b, index)) {
    597			v_ptr = value_ptr(n, index);
    598			rc = le32_to_cpu(*v_ptr) + 1;
    599			*v_ptr = cpu_to_le32(rc);
    600
    601			return 0;
    602		}
    603	}
    604
    605	return __sm_ll_inc_overflow(ll, b, ic);
    606}
    607
    608static inline int shadow_bitmap(struct ll_disk *ll, struct inc_context *ic)
    609{
    610	int r, inc;
    611	r = dm_tm_shadow_block(ll->tm, le64_to_cpu(ic->ie_disk.blocknr),
    612			       &dm_sm_bitmap_validator, &ic->bitmap_block, &inc);
    613	if (r < 0) {
    614		DMERR("dm_tm_shadow_block() failed");
    615		return r;
    616	}
    617	ic->ie_disk.blocknr = cpu_to_le64(dm_block_location(ic->bitmap_block));
    618	ic->bitmap = dm_bitmap_data(ic->bitmap_block);
    619	return 0;
    620}
    621
    622/*
    623 * Once shadow_bitmap has been called, which always happens at the start of inc/dec,
    624 * we can reopen the bitmap with a simple write lock, rather than re calling
    625 * dm_tm_shadow_block().
    626 */
    627static inline int ensure_bitmap(struct ll_disk *ll, struct inc_context *ic)
    628{
    629	if (!ic->bitmap_block) {
    630		int r = dm_bm_write_lock(dm_tm_get_bm(ll->tm), le64_to_cpu(ic->ie_disk.blocknr),
    631					 &dm_sm_bitmap_validator, &ic->bitmap_block);
    632		if (r) {
    633			DMERR("unable to re-get write lock for bitmap");
    634			return r;
    635		}
    636		ic->bitmap = dm_bitmap_data(ic->bitmap_block);
    637	}
    638
    639	return 0;
    640}
    641
    642/*
    643 * Loops round incrementing entries in a single bitmap.
    644 */
    645static inline int sm_ll_inc_bitmap(struct ll_disk *ll, dm_block_t b,
    646				   uint32_t bit, uint32_t bit_end,
    647				   int32_t *nr_allocations, dm_block_t *new_b,
    648				   struct inc_context *ic)
    649{
    650	int r;
    651	__le32 le_rc;
    652	uint32_t old;
    653
    654	for (; bit != bit_end; bit++, b++) {
    655		/*
    656		 * We only need to drop the bitmap if we need to find a new btree
    657		 * leaf for the overflow.  So if it was dropped last iteration,
    658		 * we now re-get it.
    659		 */
    660		r = ensure_bitmap(ll, ic);
    661		if (r)
    662			return r;
    663
    664		old = sm_lookup_bitmap(ic->bitmap, bit);
    665		switch (old) {
    666		case 0:
    667			/* inc bitmap, adjust nr_allocated */
    668			sm_set_bitmap(ic->bitmap, bit, 1);
    669			(*nr_allocations)++;
    670			ll->nr_allocated++;
    671			le32_add_cpu(&ic->ie_disk.nr_free, -1);
    672			if (le32_to_cpu(ic->ie_disk.none_free_before) == bit)
    673				ic->ie_disk.none_free_before = cpu_to_le32(bit + 1);
    674			break;
    675
    676		case 1:
    677			/* inc bitmap */
    678			sm_set_bitmap(ic->bitmap, bit, 2);
    679			break;
    680
    681		case 2:
    682			/* inc bitmap and insert into overflow */
    683			sm_set_bitmap(ic->bitmap, bit, 3);
    684			reset_inc_context(ll, ic);
    685
    686			le_rc = cpu_to_le32(3);
    687			__dm_bless_for_disk(&le_rc);
    688			r = dm_btree_insert(&ll->ref_count_info, ll->ref_count_root,
    689					    &b, &le_rc, &ll->ref_count_root);
    690			if (r < 0) {
    691				DMERR("ref count insert failed");
    692				return r;
    693			}
    694			break;
    695
    696		default:
    697			/*
    698			 * inc within the overflow tree only.
    699			 */
    700			r = sm_ll_inc_overflow(ll, b, ic);
    701			if (r < 0)
    702				return r;
    703		}
    704	}
    705
    706	*new_b = b;
    707	return 0;
    708}
    709
    710/*
    711 * Finds a bitmap that contains entries in the block range, and increments
    712 * them.
    713 */
    714static int __sm_ll_inc(struct ll_disk *ll, dm_block_t b, dm_block_t e,
    715		       int32_t *nr_allocations, dm_block_t *new_b)
    716{
    717	int r;
    718	struct inc_context ic;
    719	uint32_t bit, bit_end;
    720	dm_block_t index = b;
    721
    722	init_inc_context(&ic);
    723
    724	bit = do_div(index, ll->entries_per_block);
    725	r = ll->load_ie(ll, index, &ic.ie_disk);
    726	if (r < 0)
    727		return r;
    728
    729	r = shadow_bitmap(ll, &ic);
    730	if (r)
    731		return r;
    732
    733	bit_end = min(bit + (e - b), (dm_block_t) ll->entries_per_block);
    734	r = sm_ll_inc_bitmap(ll, b, bit, bit_end, nr_allocations, new_b, &ic);
    735
    736	exit_inc_context(ll, &ic);
    737
    738	if (r)
    739		return r;
    740
    741	return ll->save_ie(ll, index, &ic.ie_disk);
    742}
    743
    744int sm_ll_inc(struct ll_disk *ll, dm_block_t b, dm_block_t e,
    745	      int32_t *nr_allocations)
    746{
    747	*nr_allocations = 0;
    748	while (b != e) {
    749		int r = __sm_ll_inc(ll, b, e, nr_allocations, &b);
    750		if (r)
    751			return r;
    752	}
    753
    754	return 0;
    755}
    756
    757/*----------------------------------------------------------------*/
    758
    759static int __sm_ll_del_overflow(struct ll_disk *ll, dm_block_t b,
    760				struct inc_context *ic)
    761{
    762	reset_inc_context(ll, ic);
    763	return dm_btree_remove(&ll->ref_count_info, ll->ref_count_root,
    764			       &b, &ll->ref_count_root);
    765}
    766
    767static int __sm_ll_dec_overflow(struct ll_disk *ll, dm_block_t b,
    768				struct inc_context *ic, uint32_t *old_rc)
    769{
    770	int r;
    771	int index = -1;
    772	struct btree_node *n;
    773	__le32 *v_ptr;
    774	uint32_t rc;
    775
    776	reset_inc_context(ll, ic);
    777	r = btree_get_overwrite_leaf(&ll->ref_count_info, ll->ref_count_root,
    778				     b, &index, &ll->ref_count_root, &ic->overflow_leaf);
    779	if (r < 0)
    780		return r;
    781
    782	n = dm_block_data(ic->overflow_leaf);
    783
    784	if (!contains_key(n, b, index)) {
    785		DMERR("overflow btree is missing an entry");
    786		return -EINVAL;
    787	}
    788
    789	v_ptr = value_ptr(n, index);
    790	rc = le32_to_cpu(*v_ptr);
    791	*old_rc = rc;
    792
    793	if (rc == 3) {
    794		return __sm_ll_del_overflow(ll, b, ic);
    795	} else {
    796		rc--;
    797		*v_ptr = cpu_to_le32(rc);
    798		return 0;
    799	}
    800}
    801
    802static int sm_ll_dec_overflow(struct ll_disk *ll, dm_block_t b,
    803			      struct inc_context *ic, uint32_t *old_rc)
    804{
    805	/*
    806	 * Do we already have the correct overflow leaf?
    807	 */
    808	if (ic->overflow_leaf) {
    809		int index;
    810		struct btree_node *n;
    811		__le32 *v_ptr;
    812		uint32_t rc;
    813
    814		n = dm_block_data(ic->overflow_leaf);
    815		index = lower_bound(n, b);
    816		if (contains_key(n, b, index)) {
    817			v_ptr = value_ptr(n, index);
    818			rc = le32_to_cpu(*v_ptr);
    819			*old_rc = rc;
    820
    821			if (rc > 3) {
    822				rc--;
    823				*v_ptr = cpu_to_le32(rc);
    824				return 0;
    825			} else {
    826				return __sm_ll_del_overflow(ll, b, ic);
    827			}
    828
    829		}
    830	}
    831
    832	return __sm_ll_dec_overflow(ll, b, ic, old_rc);
    833}
    834
    835/*
    836 * Loops round incrementing entries in a single bitmap.
    837 */
    838static inline int sm_ll_dec_bitmap(struct ll_disk *ll, dm_block_t b,
    839				   uint32_t bit, uint32_t bit_end,
    840				   struct inc_context *ic,
    841				   int32_t *nr_allocations, dm_block_t *new_b)
    842{
    843	int r;
    844	uint32_t old;
    845
    846	for (; bit != bit_end; bit++, b++) {
    847		/*
    848		 * We only need to drop the bitmap if we need to find a new btree
    849		 * leaf for the overflow.  So if it was dropped last iteration,
    850		 * we now re-get it.
    851		 */
    852		r = ensure_bitmap(ll, ic);
    853		if (r)
    854			return r;
    855
    856		old = sm_lookup_bitmap(ic->bitmap, bit);
    857		switch (old) {
    858		case 0:
    859			DMERR("unable to decrement block");
    860			return -EINVAL;
    861
    862		case 1:
    863			/* dec bitmap */
    864			sm_set_bitmap(ic->bitmap, bit, 0);
    865			(*nr_allocations)--;
    866			ll->nr_allocated--;
    867			le32_add_cpu(&ic->ie_disk.nr_free, 1);
    868			ic->ie_disk.none_free_before =
    869				cpu_to_le32(min(le32_to_cpu(ic->ie_disk.none_free_before), bit));
    870			break;
    871
    872		case 2:
    873			/* dec bitmap and insert into overflow */
    874			sm_set_bitmap(ic->bitmap, bit, 1);
    875			break;
    876
    877		case 3:
    878			r = sm_ll_dec_overflow(ll, b, ic, &old);
    879			if (r < 0)
    880				return r;
    881
    882			if (old == 3) {
    883				r = ensure_bitmap(ll, ic);
    884				if (r)
    885					return r;
    886
    887				sm_set_bitmap(ic->bitmap, bit, 2);
    888			}
    889			break;
    890		}
    891	}
    892
    893	*new_b = b;
    894	return 0;
    895}
    896
    897static int __sm_ll_dec(struct ll_disk *ll, dm_block_t b, dm_block_t e,
    898		       int32_t *nr_allocations, dm_block_t *new_b)
    899{
    900	int r;
    901	uint32_t bit, bit_end;
    902	struct inc_context ic;
    903	dm_block_t index = b;
    904
    905	init_inc_context(&ic);
    906
    907	bit = do_div(index, ll->entries_per_block);
    908	r = ll->load_ie(ll, index, &ic.ie_disk);
    909	if (r < 0)
    910		return r;
    911
    912	r = shadow_bitmap(ll, &ic);
    913	if (r)
    914		return r;
    915
    916	bit_end = min(bit + (e - b), (dm_block_t) ll->entries_per_block);
    917	r = sm_ll_dec_bitmap(ll, b, bit, bit_end, &ic, nr_allocations, new_b);
    918	exit_inc_context(ll, &ic);
    919
    920	if (r)
    921		return r;
    922
    923	return ll->save_ie(ll, index, &ic.ie_disk);
    924}
    925
    926int sm_ll_dec(struct ll_disk *ll, dm_block_t b, dm_block_t e,
    927	      int32_t *nr_allocations)
    928{
    929	*nr_allocations = 0;
    930	while (b != e) {
    931		int r = __sm_ll_dec(ll, b, e, nr_allocations, &b);
    932		if (r)
    933			return r;
    934	}
    935
    936	return 0;
    937}
    938
    939/*----------------------------------------------------------------*/
    940
    941int sm_ll_commit(struct ll_disk *ll)
    942{
    943	int r = 0;
    944
    945	if (ll->bitmap_index_changed) {
    946		r = ll->commit(ll);
    947		if (!r)
    948			ll->bitmap_index_changed = false;
    949	}
    950
    951	return r;
    952}
    953
    954/*----------------------------------------------------------------*/
    955
    956static int metadata_ll_load_ie(struct ll_disk *ll, dm_block_t index,
    957			       struct disk_index_entry *ie)
    958{
    959	memcpy(ie, ll->mi_le.index + index, sizeof(*ie));
    960	return 0;
    961}
    962
    963static int metadata_ll_save_ie(struct ll_disk *ll, dm_block_t index,
    964			       struct disk_index_entry *ie)
    965{
    966	ll->bitmap_index_changed = true;
    967	memcpy(ll->mi_le.index + index, ie, sizeof(*ie));
    968	return 0;
    969}
    970
    971static int metadata_ll_init_index(struct ll_disk *ll)
    972{
    973	int r;
    974	struct dm_block *b;
    975
    976	r = dm_tm_new_block(ll->tm, &index_validator, &b);
    977	if (r < 0)
    978		return r;
    979
    980	ll->bitmap_root = dm_block_location(b);
    981
    982	dm_tm_unlock(ll->tm, b);
    983
    984	return 0;
    985}
    986
    987static int metadata_ll_open(struct ll_disk *ll)
    988{
    989	int r;
    990	struct dm_block *block;
    991
    992	r = dm_tm_read_lock(ll->tm, ll->bitmap_root,
    993			    &index_validator, &block);
    994	if (r)
    995		return r;
    996
    997	memcpy(&ll->mi_le, dm_block_data(block), sizeof(ll->mi_le));
    998	dm_tm_unlock(ll->tm, block);
    999
   1000	return 0;
   1001}
   1002
   1003static dm_block_t metadata_ll_max_entries(struct ll_disk *ll)
   1004{
   1005	return MAX_METADATA_BITMAPS;
   1006}
   1007
   1008static int metadata_ll_commit(struct ll_disk *ll)
   1009{
   1010	int r, inc;
   1011	struct dm_block *b;
   1012
   1013	r = dm_tm_shadow_block(ll->tm, ll->bitmap_root, &index_validator, &b, &inc);
   1014	if (r)
   1015		return r;
   1016
   1017	memcpy(dm_block_data(b), &ll->mi_le, sizeof(ll->mi_le));
   1018	ll->bitmap_root = dm_block_location(b);
   1019
   1020	dm_tm_unlock(ll->tm, b);
   1021
   1022	return 0;
   1023}
   1024
   1025int sm_ll_new_metadata(struct ll_disk *ll, struct dm_transaction_manager *tm)
   1026{
   1027	int r;
   1028
   1029	r = sm_ll_init(ll, tm);
   1030	if (r < 0)
   1031		return r;
   1032
   1033	ll->load_ie = metadata_ll_load_ie;
   1034	ll->save_ie = metadata_ll_save_ie;
   1035	ll->init_index = metadata_ll_init_index;
   1036	ll->open_index = metadata_ll_open;
   1037	ll->max_entries = metadata_ll_max_entries;
   1038	ll->commit = metadata_ll_commit;
   1039
   1040	ll->nr_blocks = 0;
   1041	ll->nr_allocated = 0;
   1042
   1043	r = ll->init_index(ll);
   1044	if (r < 0)
   1045		return r;
   1046
   1047	r = dm_btree_empty(&ll->ref_count_info, &ll->ref_count_root);
   1048	if (r < 0)
   1049		return r;
   1050
   1051	return 0;
   1052}
   1053
   1054int sm_ll_open_metadata(struct ll_disk *ll, struct dm_transaction_manager *tm,
   1055			void *root_le, size_t len)
   1056{
   1057	int r;
   1058	struct disk_sm_root smr;
   1059
   1060	if (len < sizeof(struct disk_sm_root)) {
   1061		DMERR("sm_metadata root too small");
   1062		return -ENOMEM;
   1063	}
   1064
   1065	/*
   1066	 * We don't know the alignment of the root_le buffer, so need to
   1067	 * copy into a new structure.
   1068	 */
   1069	memcpy(&smr, root_le, sizeof(smr));
   1070
   1071	r = sm_ll_init(ll, tm);
   1072	if (r < 0)
   1073		return r;
   1074
   1075	ll->load_ie = metadata_ll_load_ie;
   1076	ll->save_ie = metadata_ll_save_ie;
   1077	ll->init_index = metadata_ll_init_index;
   1078	ll->open_index = metadata_ll_open;
   1079	ll->max_entries = metadata_ll_max_entries;
   1080	ll->commit = metadata_ll_commit;
   1081
   1082	ll->nr_blocks = le64_to_cpu(smr.nr_blocks);
   1083	ll->nr_allocated = le64_to_cpu(smr.nr_allocated);
   1084	ll->bitmap_root = le64_to_cpu(smr.bitmap_root);
   1085	ll->ref_count_root = le64_to_cpu(smr.ref_count_root);
   1086
   1087	return ll->open_index(ll);
   1088}
   1089
   1090/*----------------------------------------------------------------*/
   1091
   1092static inline int ie_cache_writeback(struct ll_disk *ll, struct ie_cache *iec)
   1093{
   1094	iec->dirty = false;
   1095	__dm_bless_for_disk(iec->ie);
   1096	return dm_btree_insert(&ll->bitmap_info, ll->bitmap_root,
   1097			       &iec->index, &iec->ie, &ll->bitmap_root);
   1098}
   1099
   1100static inline unsigned hash_index(dm_block_t index)
   1101{
   1102	return dm_hash_block(index, IE_CACHE_MASK);
   1103}
   1104
   1105static int disk_ll_load_ie(struct ll_disk *ll, dm_block_t index,
   1106			   struct disk_index_entry *ie)
   1107{
   1108	int r;
   1109	unsigned h = hash_index(index);
   1110	struct ie_cache *iec = ll->ie_cache + h;
   1111
   1112	if (iec->valid) {
   1113		if (iec->index == index) {
   1114			memcpy(ie, &iec->ie, sizeof(*ie));
   1115			return 0;
   1116		}
   1117
   1118		if (iec->dirty) {
   1119			r = ie_cache_writeback(ll, iec);
   1120			if (r)
   1121				return r;
   1122		}
   1123	}
   1124
   1125	r = dm_btree_lookup(&ll->bitmap_info, ll->bitmap_root, &index, ie);
   1126	if (!r) {
   1127		iec->valid = true;
   1128		iec->dirty = false;
   1129		iec->index = index;
   1130		memcpy(&iec->ie, ie, sizeof(*ie));
   1131	}
   1132
   1133	return r;
   1134}
   1135
   1136static int disk_ll_save_ie(struct ll_disk *ll, dm_block_t index,
   1137			   struct disk_index_entry *ie)
   1138{
   1139	int r;
   1140	unsigned h = hash_index(index);
   1141	struct ie_cache *iec = ll->ie_cache + h;
   1142
   1143	ll->bitmap_index_changed = true;
   1144	if (iec->valid) {
   1145		if (iec->index == index) {
   1146			memcpy(&iec->ie, ie, sizeof(*ie));
   1147			iec->dirty = true;
   1148			return 0;
   1149		}
   1150
   1151		if (iec->dirty) {
   1152			r = ie_cache_writeback(ll, iec);
   1153			if (r)
   1154				return r;
   1155		}
   1156	}
   1157
   1158	iec->valid = true;
   1159	iec->dirty = true;
   1160	iec->index = index;
   1161	memcpy(&iec->ie, ie, sizeof(*ie));
   1162	return 0;
   1163}
   1164
   1165static int disk_ll_init_index(struct ll_disk *ll)
   1166{
   1167	unsigned i;
   1168	for (i = 0; i < IE_CACHE_SIZE; i++) {
   1169		struct ie_cache *iec = ll->ie_cache + i;
   1170		iec->valid = false;
   1171		iec->dirty = false;
   1172	}
   1173	return dm_btree_empty(&ll->bitmap_info, &ll->bitmap_root);
   1174}
   1175
   1176static int disk_ll_open(struct ll_disk *ll)
   1177{
   1178	return 0;
   1179}
   1180
   1181static dm_block_t disk_ll_max_entries(struct ll_disk *ll)
   1182{
   1183	return -1ULL;
   1184}
   1185
   1186static int disk_ll_commit(struct ll_disk *ll)
   1187{
   1188	int r = 0;
   1189	unsigned i;
   1190
   1191	for (i = 0; i < IE_CACHE_SIZE; i++) {
   1192		struct ie_cache *iec = ll->ie_cache + i;
   1193		if (iec->valid && iec->dirty)
   1194			r = ie_cache_writeback(ll, iec);
   1195	}
   1196
   1197	return r;
   1198}
   1199
   1200int sm_ll_new_disk(struct ll_disk *ll, struct dm_transaction_manager *tm)
   1201{
   1202	int r;
   1203
   1204	r = sm_ll_init(ll, tm);
   1205	if (r < 0)
   1206		return r;
   1207
   1208	ll->load_ie = disk_ll_load_ie;
   1209	ll->save_ie = disk_ll_save_ie;
   1210	ll->init_index = disk_ll_init_index;
   1211	ll->open_index = disk_ll_open;
   1212	ll->max_entries = disk_ll_max_entries;
   1213	ll->commit = disk_ll_commit;
   1214
   1215	ll->nr_blocks = 0;
   1216	ll->nr_allocated = 0;
   1217
   1218	r = ll->init_index(ll);
   1219	if (r < 0)
   1220		return r;
   1221
   1222	r = dm_btree_empty(&ll->ref_count_info, &ll->ref_count_root);
   1223	if (r < 0)
   1224		return r;
   1225
   1226	return 0;
   1227}
   1228
   1229int sm_ll_open_disk(struct ll_disk *ll, struct dm_transaction_manager *tm,
   1230		    void *root_le, size_t len)
   1231{
   1232	int r;
   1233	struct disk_sm_root *smr = root_le;
   1234
   1235	if (len < sizeof(struct disk_sm_root)) {
   1236		DMERR("sm_metadata root too small");
   1237		return -ENOMEM;
   1238	}
   1239
   1240	r = sm_ll_init(ll, tm);
   1241	if (r < 0)
   1242		return r;
   1243
   1244	ll->load_ie = disk_ll_load_ie;
   1245	ll->save_ie = disk_ll_save_ie;
   1246	ll->init_index = disk_ll_init_index;
   1247	ll->open_index = disk_ll_open;
   1248	ll->max_entries = disk_ll_max_entries;
   1249	ll->commit = disk_ll_commit;
   1250
   1251	ll->nr_blocks = le64_to_cpu(smr->nr_blocks);
   1252	ll->nr_allocated = le64_to_cpu(smr->nr_allocated);
   1253	ll->bitmap_root = le64_to_cpu(smr->bitmap_root);
   1254	ll->ref_count_root = le64_to_cpu(smr->ref_count_root);
   1255
   1256	return ll->open_index(ll);
   1257}
   1258
   1259/*----------------------------------------------------------------*/