cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

extents.c (15425B)


      1// SPDX-License-Identifier: GPL-2.0
      2/*
      3 * Copyright (C) 2010 Kent Overstreet <kent.overstreet@gmail.com>
      4 *
      5 * Uses a block device as cache for other block devices; optimized for SSDs.
      6 * All allocation is done in buckets, which should match the erase block size
      7 * of the device.
      8 *
      9 * Buckets containing cached data are kept on a heap sorted by priority;
     10 * bucket priority is increased on cache hit, and periodically all the buckets
     11 * on the heap have their priority scaled down. This currently is just used as
     12 * an LRU but in the future should allow for more intelligent heuristics.
     13 *
     14 * Buckets have an 8 bit counter; freeing is accomplished by incrementing the
     15 * counter. Garbage collection is used to remove stale pointers.
     16 *
     17 * Indexing is done via a btree; nodes are not necessarily fully sorted, rather
     18 * as keys are inserted we only sort the pages that have not yet been written.
     19 * When garbage collection is run, we resort the entire node.
     20 *
     21 * All configuration is done via sysfs; see Documentation/admin-guide/bcache.rst.
     22 */
     23
     24#include "bcache.h"
     25#include "btree.h"
     26#include "debug.h"
     27#include "extents.h"
     28#include "writeback.h"
     29
     30static void sort_key_next(struct btree_iter *iter,
     31			  struct btree_iter_set *i)
     32{
     33	i->k = bkey_next(i->k);
     34
     35	if (i->k == i->end)
     36		*i = iter->data[--iter->used];
     37}
     38
     39static bool bch_key_sort_cmp(struct btree_iter_set l,
     40			     struct btree_iter_set r)
     41{
     42	int64_t c = bkey_cmp(l.k, r.k);
     43
     44	return c ? c > 0 : l.k < r.k;
     45}
     46
     47static bool __ptr_invalid(struct cache_set *c, const struct bkey *k)
     48{
     49	unsigned int i;
     50
     51	for (i = 0; i < KEY_PTRS(k); i++)
     52		if (ptr_available(c, k, i)) {
     53			struct cache *ca = c->cache;
     54			size_t bucket = PTR_BUCKET_NR(c, k, i);
     55			size_t r = bucket_remainder(c, PTR_OFFSET(k, i));
     56
     57			if (KEY_SIZE(k) + r > c->cache->sb.bucket_size ||
     58			    bucket <  ca->sb.first_bucket ||
     59			    bucket >= ca->sb.nbuckets)
     60				return true;
     61		}
     62
     63	return false;
     64}
     65
     66/* Common among btree and extent ptrs */
     67
     68static const char *bch_ptr_status(struct cache_set *c, const struct bkey *k)
     69{
     70	unsigned int i;
     71
     72	for (i = 0; i < KEY_PTRS(k); i++)
     73		if (ptr_available(c, k, i)) {
     74			struct cache *ca = c->cache;
     75			size_t bucket = PTR_BUCKET_NR(c, k, i);
     76			size_t r = bucket_remainder(c, PTR_OFFSET(k, i));
     77
     78			if (KEY_SIZE(k) + r > c->cache->sb.bucket_size)
     79				return "bad, length too big";
     80			if (bucket <  ca->sb.first_bucket)
     81				return "bad, short offset";
     82			if (bucket >= ca->sb.nbuckets)
     83				return "bad, offset past end of device";
     84			if (ptr_stale(c, k, i))
     85				return "stale";
     86		}
     87
     88	if (!bkey_cmp(k, &ZERO_KEY))
     89		return "bad, null key";
     90	if (!KEY_PTRS(k))
     91		return "bad, no pointers";
     92	if (!KEY_SIZE(k))
     93		return "zeroed key";
     94	return "";
     95}
     96
     97void bch_extent_to_text(char *buf, size_t size, const struct bkey *k)
     98{
     99	unsigned int i = 0;
    100	char *out = buf, *end = buf + size;
    101
    102#define p(...)	(out += scnprintf(out, end - out, __VA_ARGS__))
    103
    104	p("%llu:%llu len %llu -> [", KEY_INODE(k), KEY_START(k), KEY_SIZE(k));
    105
    106	for (i = 0; i < KEY_PTRS(k); i++) {
    107		if (i)
    108			p(", ");
    109
    110		if (PTR_DEV(k, i) == PTR_CHECK_DEV)
    111			p("check dev");
    112		else
    113			p("%llu:%llu gen %llu", PTR_DEV(k, i),
    114			  PTR_OFFSET(k, i), PTR_GEN(k, i));
    115	}
    116
    117	p("]");
    118
    119	if (KEY_DIRTY(k))
    120		p(" dirty");
    121	if (KEY_CSUM(k))
    122		p(" cs%llu %llx", KEY_CSUM(k), k->ptr[1]);
    123#undef p
    124}
    125
    126static void bch_bkey_dump(struct btree_keys *keys, const struct bkey *k)
    127{
    128	struct btree *b = container_of(keys, struct btree, keys);
    129	unsigned int j;
    130	char buf[80];
    131
    132	bch_extent_to_text(buf, sizeof(buf), k);
    133	pr_cont(" %s", buf);
    134
    135	for (j = 0; j < KEY_PTRS(k); j++) {
    136		size_t n = PTR_BUCKET_NR(b->c, k, j);
    137
    138		pr_cont(" bucket %zu", n);
    139		if (n >= b->c->cache->sb.first_bucket && n < b->c->cache->sb.nbuckets)
    140			pr_cont(" prio %i",
    141				PTR_BUCKET(b->c, k, j)->prio);
    142	}
    143
    144	pr_cont(" %s\n", bch_ptr_status(b->c, k));
    145}
    146
    147/* Btree ptrs */
    148
    149bool __bch_btree_ptr_invalid(struct cache_set *c, const struct bkey *k)
    150{
    151	char buf[80];
    152
    153	if (!KEY_PTRS(k) || !KEY_SIZE(k) || KEY_DIRTY(k))
    154		goto bad;
    155
    156	if (__ptr_invalid(c, k))
    157		goto bad;
    158
    159	return false;
    160bad:
    161	bch_extent_to_text(buf, sizeof(buf), k);
    162	cache_bug(c, "spotted btree ptr %s: %s", buf, bch_ptr_status(c, k));
    163	return true;
    164}
    165
    166static bool bch_btree_ptr_invalid(struct btree_keys *bk, const struct bkey *k)
    167{
    168	struct btree *b = container_of(bk, struct btree, keys);
    169
    170	return __bch_btree_ptr_invalid(b->c, k);
    171}
    172
    173static bool btree_ptr_bad_expensive(struct btree *b, const struct bkey *k)
    174{
    175	unsigned int i;
    176	char buf[80];
    177	struct bucket *g;
    178
    179	if (mutex_trylock(&b->c->bucket_lock)) {
    180		for (i = 0; i < KEY_PTRS(k); i++)
    181			if (ptr_available(b->c, k, i)) {
    182				g = PTR_BUCKET(b->c, k, i);
    183
    184				if (KEY_DIRTY(k) ||
    185				    g->prio != BTREE_PRIO ||
    186				    (b->c->gc_mark_valid &&
    187				     GC_MARK(g) != GC_MARK_METADATA))
    188					goto err;
    189			}
    190
    191		mutex_unlock(&b->c->bucket_lock);
    192	}
    193
    194	return false;
    195err:
    196	mutex_unlock(&b->c->bucket_lock);
    197	bch_extent_to_text(buf, sizeof(buf), k);
    198	btree_bug(b,
    199"inconsistent btree pointer %s: bucket %zi pin %i prio %i gen %i last_gc %i mark %llu",
    200		  buf, PTR_BUCKET_NR(b->c, k, i), atomic_read(&g->pin),
    201		  g->prio, g->gen, g->last_gc, GC_MARK(g));
    202	return true;
    203}
    204
    205static bool bch_btree_ptr_bad(struct btree_keys *bk, const struct bkey *k)
    206{
    207	struct btree *b = container_of(bk, struct btree, keys);
    208	unsigned int i;
    209
    210	if (!bkey_cmp(k, &ZERO_KEY) ||
    211	    !KEY_PTRS(k) ||
    212	    bch_ptr_invalid(bk, k))
    213		return true;
    214
    215	for (i = 0; i < KEY_PTRS(k); i++)
    216		if (!ptr_available(b->c, k, i) ||
    217		    ptr_stale(b->c, k, i))
    218			return true;
    219
    220	if (expensive_debug_checks(b->c) &&
    221	    btree_ptr_bad_expensive(b, k))
    222		return true;
    223
    224	return false;
    225}
    226
    227static bool bch_btree_ptr_insert_fixup(struct btree_keys *bk,
    228				       struct bkey *insert,
    229				       struct btree_iter *iter,
    230				       struct bkey *replace_key)
    231{
    232	struct btree *b = container_of(bk, struct btree, keys);
    233
    234	if (!KEY_OFFSET(insert))
    235		btree_current_write(b)->prio_blocked++;
    236
    237	return false;
    238}
    239
    240const struct btree_keys_ops bch_btree_keys_ops = {
    241	.sort_cmp	= bch_key_sort_cmp,
    242	.insert_fixup	= bch_btree_ptr_insert_fixup,
    243	.key_invalid	= bch_btree_ptr_invalid,
    244	.key_bad	= bch_btree_ptr_bad,
    245	.key_to_text	= bch_extent_to_text,
    246	.key_dump	= bch_bkey_dump,
    247};
    248
    249/* Extents */
    250
    251/*
    252 * Returns true if l > r - unless l == r, in which case returns true if l is
    253 * older than r.
    254 *
    255 * Necessary for btree_sort_fixup() - if there are multiple keys that compare
    256 * equal in different sets, we have to process them newest to oldest.
    257 */
    258static bool bch_extent_sort_cmp(struct btree_iter_set l,
    259				struct btree_iter_set r)
    260{
    261	int64_t c = bkey_cmp(&START_KEY(l.k), &START_KEY(r.k));
    262
    263	return c ? c > 0 : l.k < r.k;
    264}
    265
    266static struct bkey *bch_extent_sort_fixup(struct btree_iter *iter,
    267					  struct bkey *tmp)
    268{
    269	while (iter->used > 1) {
    270		struct btree_iter_set *top = iter->data, *i = top + 1;
    271
    272		if (iter->used > 2 &&
    273		    bch_extent_sort_cmp(i[0], i[1]))
    274			i++;
    275
    276		if (bkey_cmp(top->k, &START_KEY(i->k)) <= 0)
    277			break;
    278
    279		if (!KEY_SIZE(i->k)) {
    280			sort_key_next(iter, i);
    281			heap_sift(iter, i - top, bch_extent_sort_cmp);
    282			continue;
    283		}
    284
    285		if (top->k > i->k) {
    286			if (bkey_cmp(top->k, i->k) >= 0)
    287				sort_key_next(iter, i);
    288			else
    289				bch_cut_front(top->k, i->k);
    290
    291			heap_sift(iter, i - top, bch_extent_sort_cmp);
    292		} else {
    293			/* can't happen because of comparison func */
    294			BUG_ON(!bkey_cmp(&START_KEY(top->k), &START_KEY(i->k)));
    295
    296			if (bkey_cmp(i->k, top->k) < 0) {
    297				bkey_copy(tmp, top->k);
    298
    299				bch_cut_back(&START_KEY(i->k), tmp);
    300				bch_cut_front(i->k, top->k);
    301				heap_sift(iter, 0, bch_extent_sort_cmp);
    302
    303				return tmp;
    304			} else {
    305				bch_cut_back(&START_KEY(i->k), top->k);
    306			}
    307		}
    308	}
    309
    310	return NULL;
    311}
    312
    313static void bch_subtract_dirty(struct bkey *k,
    314			   struct cache_set *c,
    315			   uint64_t offset,
    316			   int sectors)
    317{
    318	if (KEY_DIRTY(k))
    319		bcache_dev_sectors_dirty_add(c, KEY_INODE(k),
    320					     offset, -sectors);
    321}
    322
    323static bool bch_extent_insert_fixup(struct btree_keys *b,
    324				    struct bkey *insert,
    325				    struct btree_iter *iter,
    326				    struct bkey *replace_key)
    327{
    328	struct cache_set *c = container_of(b, struct btree, keys)->c;
    329
    330	uint64_t old_offset;
    331	unsigned int old_size, sectors_found = 0;
    332
    333	BUG_ON(!KEY_OFFSET(insert));
    334	BUG_ON(!KEY_SIZE(insert));
    335
    336	while (1) {
    337		struct bkey *k = bch_btree_iter_next(iter);
    338
    339		if (!k)
    340			break;
    341
    342		if (bkey_cmp(&START_KEY(k), insert) >= 0) {
    343			if (KEY_SIZE(k))
    344				break;
    345			else
    346				continue;
    347		}
    348
    349		if (bkey_cmp(k, &START_KEY(insert)) <= 0)
    350			continue;
    351
    352		old_offset = KEY_START(k);
    353		old_size = KEY_SIZE(k);
    354
    355		/*
    356		 * We might overlap with 0 size extents; we can't skip these
    357		 * because if they're in the set we're inserting to we have to
    358		 * adjust them so they don't overlap with the key we're
    359		 * inserting. But we don't want to check them for replace
    360		 * operations.
    361		 */
    362
    363		if (replace_key && KEY_SIZE(k)) {
    364			/*
    365			 * k might have been split since we inserted/found the
    366			 * key we're replacing
    367			 */
    368			unsigned int i;
    369			uint64_t offset = KEY_START(k) -
    370				KEY_START(replace_key);
    371
    372			/* But it must be a subset of the replace key */
    373			if (KEY_START(k) < KEY_START(replace_key) ||
    374			    KEY_OFFSET(k) > KEY_OFFSET(replace_key))
    375				goto check_failed;
    376
    377			/* We didn't find a key that we were supposed to */
    378			if (KEY_START(k) > KEY_START(insert) + sectors_found)
    379				goto check_failed;
    380
    381			if (!bch_bkey_equal_header(k, replace_key))
    382				goto check_failed;
    383
    384			/* skip past gen */
    385			offset <<= 8;
    386
    387			BUG_ON(!KEY_PTRS(replace_key));
    388
    389			for (i = 0; i < KEY_PTRS(replace_key); i++)
    390				if (k->ptr[i] != replace_key->ptr[i] + offset)
    391					goto check_failed;
    392
    393			sectors_found = KEY_OFFSET(k) - KEY_START(insert);
    394		}
    395
    396		if (bkey_cmp(insert, k) < 0 &&
    397		    bkey_cmp(&START_KEY(insert), &START_KEY(k)) > 0) {
    398			/*
    399			 * We overlapped in the middle of an existing key: that
    400			 * means we have to split the old key. But we have to do
    401			 * slightly different things depending on whether the
    402			 * old key has been written out yet.
    403			 */
    404
    405			struct bkey *top;
    406
    407			bch_subtract_dirty(k, c, KEY_START(insert),
    408				       KEY_SIZE(insert));
    409
    410			if (bkey_written(b, k)) {
    411				/*
    412				 * We insert a new key to cover the top of the
    413				 * old key, and the old key is modified in place
    414				 * to represent the bottom split.
    415				 *
    416				 * It's completely arbitrary whether the new key
    417				 * is the top or the bottom, but it has to match
    418				 * up with what btree_sort_fixup() does - it
    419				 * doesn't check for this kind of overlap, it
    420				 * depends on us inserting a new key for the top
    421				 * here.
    422				 */
    423				top = bch_bset_search(b, bset_tree_last(b),
    424						      insert);
    425				bch_bset_insert(b, top, k);
    426			} else {
    427				BKEY_PADDED(key) temp;
    428				bkey_copy(&temp.key, k);
    429				bch_bset_insert(b, k, &temp.key);
    430				top = bkey_next(k);
    431			}
    432
    433			bch_cut_front(insert, top);
    434			bch_cut_back(&START_KEY(insert), k);
    435			bch_bset_fix_invalidated_key(b, k);
    436			goto out;
    437		}
    438
    439		if (bkey_cmp(insert, k) < 0) {
    440			bch_cut_front(insert, k);
    441		} else {
    442			if (bkey_cmp(&START_KEY(insert), &START_KEY(k)) > 0)
    443				old_offset = KEY_START(insert);
    444
    445			if (bkey_written(b, k) &&
    446			    bkey_cmp(&START_KEY(insert), &START_KEY(k)) <= 0) {
    447				/*
    448				 * Completely overwrote, so we don't have to
    449				 * invalidate the binary search tree
    450				 */
    451				bch_cut_front(k, k);
    452			} else {
    453				__bch_cut_back(&START_KEY(insert), k);
    454				bch_bset_fix_invalidated_key(b, k);
    455			}
    456		}
    457
    458		bch_subtract_dirty(k, c, old_offset, old_size - KEY_SIZE(k));
    459	}
    460
    461check_failed:
    462	if (replace_key) {
    463		if (!sectors_found) {
    464			return true;
    465		} else if (sectors_found < KEY_SIZE(insert)) {
    466			SET_KEY_OFFSET(insert, KEY_OFFSET(insert) -
    467				       (KEY_SIZE(insert) - sectors_found));
    468			SET_KEY_SIZE(insert, sectors_found);
    469		}
    470	}
    471out:
    472	if (KEY_DIRTY(insert))
    473		bcache_dev_sectors_dirty_add(c, KEY_INODE(insert),
    474					     KEY_START(insert),
    475					     KEY_SIZE(insert));
    476
    477	return false;
    478}
    479
    480bool __bch_extent_invalid(struct cache_set *c, const struct bkey *k)
    481{
    482	char buf[80];
    483
    484	if (!KEY_SIZE(k))
    485		return true;
    486
    487	if (KEY_SIZE(k) > KEY_OFFSET(k))
    488		goto bad;
    489
    490	if (__ptr_invalid(c, k))
    491		goto bad;
    492
    493	return false;
    494bad:
    495	bch_extent_to_text(buf, sizeof(buf), k);
    496	cache_bug(c, "spotted extent %s: %s", buf, bch_ptr_status(c, k));
    497	return true;
    498}
    499
    500static bool bch_extent_invalid(struct btree_keys *bk, const struct bkey *k)
    501{
    502	struct btree *b = container_of(bk, struct btree, keys);
    503
    504	return __bch_extent_invalid(b->c, k);
    505}
    506
    507static bool bch_extent_bad_expensive(struct btree *b, const struct bkey *k,
    508				     unsigned int ptr)
    509{
    510	struct bucket *g = PTR_BUCKET(b->c, k, ptr);
    511	char buf[80];
    512
    513	if (mutex_trylock(&b->c->bucket_lock)) {
    514		if (b->c->gc_mark_valid &&
    515		    (!GC_MARK(g) ||
    516		     GC_MARK(g) == GC_MARK_METADATA ||
    517		     (GC_MARK(g) != GC_MARK_DIRTY && KEY_DIRTY(k))))
    518			goto err;
    519
    520		if (g->prio == BTREE_PRIO)
    521			goto err;
    522
    523		mutex_unlock(&b->c->bucket_lock);
    524	}
    525
    526	return false;
    527err:
    528	mutex_unlock(&b->c->bucket_lock);
    529	bch_extent_to_text(buf, sizeof(buf), k);
    530	btree_bug(b,
    531"inconsistent extent pointer %s:\nbucket %zu pin %i prio %i gen %i last_gc %i mark %llu",
    532		  buf, PTR_BUCKET_NR(b->c, k, ptr), atomic_read(&g->pin),
    533		  g->prio, g->gen, g->last_gc, GC_MARK(g));
    534	return true;
    535}
    536
    537static bool bch_extent_bad(struct btree_keys *bk, const struct bkey *k)
    538{
    539	struct btree *b = container_of(bk, struct btree, keys);
    540	unsigned int i, stale;
    541	char buf[80];
    542
    543	if (!KEY_PTRS(k) ||
    544	    bch_extent_invalid(bk, k))
    545		return true;
    546
    547	for (i = 0; i < KEY_PTRS(k); i++)
    548		if (!ptr_available(b->c, k, i))
    549			return true;
    550
    551	for (i = 0; i < KEY_PTRS(k); i++) {
    552		stale = ptr_stale(b->c, k, i);
    553
    554		if (stale && KEY_DIRTY(k)) {
    555			bch_extent_to_text(buf, sizeof(buf), k);
    556			pr_info("stale dirty pointer, stale %u, key: %s\n",
    557				stale, buf);
    558		}
    559
    560		btree_bug_on(stale > BUCKET_GC_GEN_MAX, b,
    561			     "key too stale: %i, need_gc %u",
    562			     stale, b->c->need_gc);
    563
    564		if (stale)
    565			return true;
    566
    567		if (expensive_debug_checks(b->c) &&
    568		    bch_extent_bad_expensive(b, k, i))
    569			return true;
    570	}
    571
    572	return false;
    573}
    574
    575static uint64_t merge_chksums(struct bkey *l, struct bkey *r)
    576{
    577	return (l->ptr[KEY_PTRS(l)] + r->ptr[KEY_PTRS(r)]) &
    578		~((uint64_t)1 << 63);
    579}
    580
    581static bool bch_extent_merge(struct btree_keys *bk,
    582			     struct bkey *l,
    583			     struct bkey *r)
    584{
    585	struct btree *b = container_of(bk, struct btree, keys);
    586	unsigned int i;
    587
    588	if (key_merging_disabled(b->c))
    589		return false;
    590
    591	for (i = 0; i < KEY_PTRS(l); i++)
    592		if (l->ptr[i] + MAKE_PTR(0, KEY_SIZE(l), 0) != r->ptr[i] ||
    593		    PTR_BUCKET_NR(b->c, l, i) != PTR_BUCKET_NR(b->c, r, i))
    594			return false;
    595
    596	/* Keys with no pointers aren't restricted to one bucket and could
    597	 * overflow KEY_SIZE
    598	 */
    599	if (KEY_SIZE(l) + KEY_SIZE(r) > USHRT_MAX) {
    600		SET_KEY_OFFSET(l, KEY_OFFSET(l) + USHRT_MAX - KEY_SIZE(l));
    601		SET_KEY_SIZE(l, USHRT_MAX);
    602
    603		bch_cut_front(l, r);
    604		return false;
    605	}
    606
    607	if (KEY_CSUM(l)) {
    608		if (KEY_CSUM(r))
    609			l->ptr[KEY_PTRS(l)] = merge_chksums(l, r);
    610		else
    611			SET_KEY_CSUM(l, 0);
    612	}
    613
    614	SET_KEY_OFFSET(l, KEY_OFFSET(l) + KEY_SIZE(r));
    615	SET_KEY_SIZE(l, KEY_SIZE(l) + KEY_SIZE(r));
    616
    617	return true;
    618}
    619
    620const struct btree_keys_ops bch_extent_keys_ops = {
    621	.sort_cmp	= bch_extent_sort_cmp,
    622	.sort_fixup	= bch_extent_sort_fixup,
    623	.insert_fixup	= bch_extent_insert_fixup,
    624	.key_invalid	= bch_extent_invalid,
    625	.key_bad	= bch_extent_bad,
    626	.key_merge	= bch_extent_merge,
    627	.key_to_text	= bch_extent_to_text,
    628	.key_dump	= bch_bkey_dump,
    629	.is_extents	= true,
    630};