cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

badblocks.c (14429B)


      1// SPDX-License-Identifier: GPL-2.0
      2/*
      3 * Bad block management
      4 *
      5 * - Heavily based on MD badblocks code from Neil Brown
      6 *
      7 * Copyright (c) 2015, Intel Corporation.
      8 */
      9
     10#include <linux/badblocks.h>
     11#include <linux/seqlock.h>
     12#include <linux/device.h>
     13#include <linux/kernel.h>
     14#include <linux/module.h>
     15#include <linux/stddef.h>
     16#include <linux/types.h>
     17#include <linux/slab.h>
     18
     19/**
     20 * badblocks_check() - check a given range for bad sectors
     21 * @bb:		the badblocks structure that holds all badblock information
     22 * @s:		sector (start) at which to check for badblocks
     23 * @sectors:	number of sectors to check for badblocks
     24 * @first_bad:	pointer to store location of the first badblock
     25 * @bad_sectors: pointer to store number of badblocks after @first_bad
     26 *
     27 * We can record which blocks on each device are 'bad' and so just
     28 * fail those blocks, or that stripe, rather than the whole device.
     29 * Entries in the bad-block table are 64bits wide.  This comprises:
     30 * Length of bad-range, in sectors: 0-511 for lengths 1-512
     31 * Start of bad-range, sector offset, 54 bits (allows 8 exbibytes)
     32 *  A 'shift' can be set so that larger blocks are tracked and
     33 *  consequently larger devices can be covered.
     34 * 'Acknowledged' flag - 1 bit. - the most significant bit.
     35 *
     36 * Locking of the bad-block table uses a seqlock so badblocks_check
     37 * might need to retry if it is very unlucky.
     38 * We will sometimes want to check for bad blocks in a bi_end_io function,
     39 * so we use the write_seqlock_irq variant.
     40 *
     41 * When looking for a bad block we specify a range and want to
     42 * know if any block in the range is bad.  So we binary-search
     43 * to the last range that starts at-or-before the given endpoint,
     44 * (or "before the sector after the target range")
     45 * then see if it ends after the given start.
     46 *
     47 * Return:
     48 *  0: there are no known bad blocks in the range
     49 *  1: there are known bad block which are all acknowledged
     50 * -1: there are bad blocks which have not yet been acknowledged in metadata.
     51 * plus the start/length of the first bad section we overlap.
     52 */
     53int badblocks_check(struct badblocks *bb, sector_t s, int sectors,
     54			sector_t *first_bad, int *bad_sectors)
     55{
     56	int hi;
     57	int lo;
     58	u64 *p = bb->page;
     59	int rv;
     60	sector_t target = s + sectors;
     61	unsigned seq;
     62
     63	if (bb->shift > 0) {
     64		/* round the start down, and the end up */
     65		s >>= bb->shift;
     66		target += (1<<bb->shift) - 1;
     67		target >>= bb->shift;
     68	}
     69	/* 'target' is now the first block after the bad range */
     70
     71retry:
     72	seq = read_seqbegin(&bb->lock);
     73	lo = 0;
     74	rv = 0;
     75	hi = bb->count;
     76
     77	/* Binary search between lo and hi for 'target'
     78	 * i.e. for the last range that starts before 'target'
     79	 */
     80	/* INVARIANT: ranges before 'lo' and at-or-after 'hi'
     81	 * are known not to be the last range before target.
     82	 * VARIANT: hi-lo is the number of possible
     83	 * ranges, and decreases until it reaches 1
     84	 */
     85	while (hi - lo > 1) {
     86		int mid = (lo + hi) / 2;
     87		sector_t a = BB_OFFSET(p[mid]);
     88
     89		if (a < target)
     90			/* This could still be the one, earlier ranges
     91			 * could not.
     92			 */
     93			lo = mid;
     94		else
     95			/* This and later ranges are definitely out. */
     96			hi = mid;
     97	}
     98	/* 'lo' might be the last that started before target, but 'hi' isn't */
     99	if (hi > lo) {
    100		/* need to check all range that end after 's' to see if
    101		 * any are unacknowledged.
    102		 */
    103		while (lo >= 0 &&
    104		       BB_OFFSET(p[lo]) + BB_LEN(p[lo]) > s) {
    105			if (BB_OFFSET(p[lo]) < target) {
    106				/* starts before the end, and finishes after
    107				 * the start, so they must overlap
    108				 */
    109				if (rv != -1 && BB_ACK(p[lo]))
    110					rv = 1;
    111				else
    112					rv = -1;
    113				*first_bad = BB_OFFSET(p[lo]);
    114				*bad_sectors = BB_LEN(p[lo]);
    115			}
    116			lo--;
    117		}
    118	}
    119
    120	if (read_seqretry(&bb->lock, seq))
    121		goto retry;
    122
    123	return rv;
    124}
    125EXPORT_SYMBOL_GPL(badblocks_check);
    126
    127static void badblocks_update_acked(struct badblocks *bb)
    128{
    129	u64 *p = bb->page;
    130	int i;
    131	bool unacked = false;
    132
    133	if (!bb->unacked_exist)
    134		return;
    135
    136	for (i = 0; i < bb->count ; i++) {
    137		if (!BB_ACK(p[i])) {
    138			unacked = true;
    139			break;
    140		}
    141	}
    142
    143	if (!unacked)
    144		bb->unacked_exist = 0;
    145}
    146
    147/**
    148 * badblocks_set() - Add a range of bad blocks to the table.
    149 * @bb:		the badblocks structure that holds all badblock information
    150 * @s:		first sector to mark as bad
    151 * @sectors:	number of sectors to mark as bad
    152 * @acknowledged: weather to mark the bad sectors as acknowledged
    153 *
    154 * This might extend the table, or might contract it if two adjacent ranges
    155 * can be merged. We binary-search to find the 'insertion' point, then
    156 * decide how best to handle it.
    157 *
    158 * Return:
    159 *  0: success
    160 *  1: failed to set badblocks (out of space)
    161 */
    162int badblocks_set(struct badblocks *bb, sector_t s, int sectors,
    163			int acknowledged)
    164{
    165	u64 *p;
    166	int lo, hi;
    167	int rv = 0;
    168	unsigned long flags;
    169
    170	if (bb->shift < 0)
    171		/* badblocks are disabled */
    172		return 1;
    173
    174	if (bb->shift) {
    175		/* round the start down, and the end up */
    176		sector_t next = s + sectors;
    177
    178		s >>= bb->shift;
    179		next += (1<<bb->shift) - 1;
    180		next >>= bb->shift;
    181		sectors = next - s;
    182	}
    183
    184	write_seqlock_irqsave(&bb->lock, flags);
    185
    186	p = bb->page;
    187	lo = 0;
    188	hi = bb->count;
    189	/* Find the last range that starts at-or-before 's' */
    190	while (hi - lo > 1) {
    191		int mid = (lo + hi) / 2;
    192		sector_t a = BB_OFFSET(p[mid]);
    193
    194		if (a <= s)
    195			lo = mid;
    196		else
    197			hi = mid;
    198	}
    199	if (hi > lo && BB_OFFSET(p[lo]) > s)
    200		hi = lo;
    201
    202	if (hi > lo) {
    203		/* we found a range that might merge with the start
    204		 * of our new range
    205		 */
    206		sector_t a = BB_OFFSET(p[lo]);
    207		sector_t e = a + BB_LEN(p[lo]);
    208		int ack = BB_ACK(p[lo]);
    209
    210		if (e >= s) {
    211			/* Yes, we can merge with a previous range */
    212			if (s == a && s + sectors >= e)
    213				/* new range covers old */
    214				ack = acknowledged;
    215			else
    216				ack = ack && acknowledged;
    217
    218			if (e < s + sectors)
    219				e = s + sectors;
    220			if (e - a <= BB_MAX_LEN) {
    221				p[lo] = BB_MAKE(a, e-a, ack);
    222				s = e;
    223			} else {
    224				/* does not all fit in one range,
    225				 * make p[lo] maximal
    226				 */
    227				if (BB_LEN(p[lo]) != BB_MAX_LEN)
    228					p[lo] = BB_MAKE(a, BB_MAX_LEN, ack);
    229				s = a + BB_MAX_LEN;
    230			}
    231			sectors = e - s;
    232		}
    233	}
    234	if (sectors && hi < bb->count) {
    235		/* 'hi' points to the first range that starts after 's'.
    236		 * Maybe we can merge with the start of that range
    237		 */
    238		sector_t a = BB_OFFSET(p[hi]);
    239		sector_t e = a + BB_LEN(p[hi]);
    240		int ack = BB_ACK(p[hi]);
    241
    242		if (a <= s + sectors) {
    243			/* merging is possible */
    244			if (e <= s + sectors) {
    245				/* full overlap */
    246				e = s + sectors;
    247				ack = acknowledged;
    248			} else
    249				ack = ack && acknowledged;
    250
    251			a = s;
    252			if (e - a <= BB_MAX_LEN) {
    253				p[hi] = BB_MAKE(a, e-a, ack);
    254				s = e;
    255			} else {
    256				p[hi] = BB_MAKE(a, BB_MAX_LEN, ack);
    257				s = a + BB_MAX_LEN;
    258			}
    259			sectors = e - s;
    260			lo = hi;
    261			hi++;
    262		}
    263	}
    264	if (sectors == 0 && hi < bb->count) {
    265		/* we might be able to combine lo and hi */
    266		/* Note: 's' is at the end of 'lo' */
    267		sector_t a = BB_OFFSET(p[hi]);
    268		int lolen = BB_LEN(p[lo]);
    269		int hilen = BB_LEN(p[hi]);
    270		int newlen = lolen + hilen - (s - a);
    271
    272		if (s >= a && newlen < BB_MAX_LEN) {
    273			/* yes, we can combine them */
    274			int ack = BB_ACK(p[lo]) && BB_ACK(p[hi]);
    275
    276			p[lo] = BB_MAKE(BB_OFFSET(p[lo]), newlen, ack);
    277			memmove(p + hi, p + hi + 1,
    278				(bb->count - hi - 1) * 8);
    279			bb->count--;
    280		}
    281	}
    282	while (sectors) {
    283		/* didn't merge (it all).
    284		 * Need to add a range just before 'hi'
    285		 */
    286		if (bb->count >= MAX_BADBLOCKS) {
    287			/* No room for more */
    288			rv = 1;
    289			break;
    290		} else {
    291			int this_sectors = sectors;
    292
    293			memmove(p + hi + 1, p + hi,
    294				(bb->count - hi) * 8);
    295			bb->count++;
    296
    297			if (this_sectors > BB_MAX_LEN)
    298				this_sectors = BB_MAX_LEN;
    299			p[hi] = BB_MAKE(s, this_sectors, acknowledged);
    300			sectors -= this_sectors;
    301			s += this_sectors;
    302		}
    303	}
    304
    305	bb->changed = 1;
    306	if (!acknowledged)
    307		bb->unacked_exist = 1;
    308	else
    309		badblocks_update_acked(bb);
    310	write_sequnlock_irqrestore(&bb->lock, flags);
    311
    312	return rv;
    313}
    314EXPORT_SYMBOL_GPL(badblocks_set);
    315
    316/**
    317 * badblocks_clear() - Remove a range of bad blocks to the table.
    318 * @bb:		the badblocks structure that holds all badblock information
    319 * @s:		first sector to mark as bad
    320 * @sectors:	number of sectors to mark as bad
    321 *
    322 * This may involve extending the table if we spilt a region,
    323 * but it must not fail.  So if the table becomes full, we just
    324 * drop the remove request.
    325 *
    326 * Return:
    327 *  0: success
    328 *  1: failed to clear badblocks
    329 */
    330int badblocks_clear(struct badblocks *bb, sector_t s, int sectors)
    331{
    332	u64 *p;
    333	int lo, hi;
    334	sector_t target = s + sectors;
    335	int rv = 0;
    336
    337	if (bb->shift > 0) {
    338		/* When clearing we round the start up and the end down.
    339		 * This should not matter as the shift should align with
    340		 * the block size and no rounding should ever be needed.
    341		 * However it is better the think a block is bad when it
    342		 * isn't than to think a block is not bad when it is.
    343		 */
    344		s += (1<<bb->shift) - 1;
    345		s >>= bb->shift;
    346		target >>= bb->shift;
    347	}
    348
    349	write_seqlock_irq(&bb->lock);
    350
    351	p = bb->page;
    352	lo = 0;
    353	hi = bb->count;
    354	/* Find the last range that starts before 'target' */
    355	while (hi - lo > 1) {
    356		int mid = (lo + hi) / 2;
    357		sector_t a = BB_OFFSET(p[mid]);
    358
    359		if (a < target)
    360			lo = mid;
    361		else
    362			hi = mid;
    363	}
    364	if (hi > lo) {
    365		/* p[lo] is the last range that could overlap the
    366		 * current range.  Earlier ranges could also overlap,
    367		 * but only this one can overlap the end of the range.
    368		 */
    369		if ((BB_OFFSET(p[lo]) + BB_LEN(p[lo]) > target) &&
    370		    (BB_OFFSET(p[lo]) < target)) {
    371			/* Partial overlap, leave the tail of this range */
    372			int ack = BB_ACK(p[lo]);
    373			sector_t a = BB_OFFSET(p[lo]);
    374			sector_t end = a + BB_LEN(p[lo]);
    375
    376			if (a < s) {
    377				/* we need to split this range */
    378				if (bb->count >= MAX_BADBLOCKS) {
    379					rv = -ENOSPC;
    380					goto out;
    381				}
    382				memmove(p+lo+1, p+lo, (bb->count - lo) * 8);
    383				bb->count++;
    384				p[lo] = BB_MAKE(a, s-a, ack);
    385				lo++;
    386			}
    387			p[lo] = BB_MAKE(target, end - target, ack);
    388			/* there is no longer an overlap */
    389			hi = lo;
    390			lo--;
    391		}
    392		while (lo >= 0 &&
    393		       (BB_OFFSET(p[lo]) + BB_LEN(p[lo]) > s) &&
    394		       (BB_OFFSET(p[lo]) < target)) {
    395			/* This range does overlap */
    396			if (BB_OFFSET(p[lo]) < s) {
    397				/* Keep the early parts of this range. */
    398				int ack = BB_ACK(p[lo]);
    399				sector_t start = BB_OFFSET(p[lo]);
    400
    401				p[lo] = BB_MAKE(start, s - start, ack);
    402				/* now low doesn't overlap, so.. */
    403				break;
    404			}
    405			lo--;
    406		}
    407		/* 'lo' is strictly before, 'hi' is strictly after,
    408		 * anything between needs to be discarded
    409		 */
    410		if (hi - lo > 1) {
    411			memmove(p+lo+1, p+hi, (bb->count - hi) * 8);
    412			bb->count -= (hi - lo - 1);
    413		}
    414	}
    415
    416	badblocks_update_acked(bb);
    417	bb->changed = 1;
    418out:
    419	write_sequnlock_irq(&bb->lock);
    420	return rv;
    421}
    422EXPORT_SYMBOL_GPL(badblocks_clear);
    423
    424/**
    425 * ack_all_badblocks() - Acknowledge all bad blocks in a list.
    426 * @bb:		the badblocks structure that holds all badblock information
    427 *
    428 * This only succeeds if ->changed is clear.  It is used by
    429 * in-kernel metadata updates
    430 */
    431void ack_all_badblocks(struct badblocks *bb)
    432{
    433	if (bb->page == NULL || bb->changed)
    434		/* no point even trying */
    435		return;
    436	write_seqlock_irq(&bb->lock);
    437
    438	if (bb->changed == 0 && bb->unacked_exist) {
    439		u64 *p = bb->page;
    440		int i;
    441
    442		for (i = 0; i < bb->count ; i++) {
    443			if (!BB_ACK(p[i])) {
    444				sector_t start = BB_OFFSET(p[i]);
    445				int len = BB_LEN(p[i]);
    446
    447				p[i] = BB_MAKE(start, len, 1);
    448			}
    449		}
    450		bb->unacked_exist = 0;
    451	}
    452	write_sequnlock_irq(&bb->lock);
    453}
    454EXPORT_SYMBOL_GPL(ack_all_badblocks);
    455
    456/**
    457 * badblocks_show() - sysfs access to bad-blocks list
    458 * @bb:		the badblocks structure that holds all badblock information
    459 * @page:	buffer received from sysfs
    460 * @unack:	weather to show unacknowledged badblocks
    461 *
    462 * Return:
    463 *  Length of returned data
    464 */
    465ssize_t badblocks_show(struct badblocks *bb, char *page, int unack)
    466{
    467	size_t len;
    468	int i;
    469	u64 *p = bb->page;
    470	unsigned seq;
    471
    472	if (bb->shift < 0)
    473		return 0;
    474
    475retry:
    476	seq = read_seqbegin(&bb->lock);
    477
    478	len = 0;
    479	i = 0;
    480
    481	while (len < PAGE_SIZE && i < bb->count) {
    482		sector_t s = BB_OFFSET(p[i]);
    483		unsigned int length = BB_LEN(p[i]);
    484		int ack = BB_ACK(p[i]);
    485
    486		i++;
    487
    488		if (unack && ack)
    489			continue;
    490
    491		len += snprintf(page+len, PAGE_SIZE-len, "%llu %u\n",
    492				(unsigned long long)s << bb->shift,
    493				length << bb->shift);
    494	}
    495	if (unack && len == 0)
    496		bb->unacked_exist = 0;
    497
    498	if (read_seqretry(&bb->lock, seq))
    499		goto retry;
    500
    501	return len;
    502}
    503EXPORT_SYMBOL_GPL(badblocks_show);
    504
    505/**
    506 * badblocks_store() - sysfs access to bad-blocks list
    507 * @bb:		the badblocks structure that holds all badblock information
    508 * @page:	buffer received from sysfs
    509 * @len:	length of data received from sysfs
    510 * @unack:	weather to show unacknowledged badblocks
    511 *
    512 * Return:
    513 *  Length of the buffer processed or -ve error.
    514 */
    515ssize_t badblocks_store(struct badblocks *bb, const char *page, size_t len,
    516			int unack)
    517{
    518	unsigned long long sector;
    519	int length;
    520	char newline;
    521
    522	switch (sscanf(page, "%llu %d%c", &sector, &length, &newline)) {
    523	case 3:
    524		if (newline != '\n')
    525			return -EINVAL;
    526		fallthrough;
    527	case 2:
    528		if (length <= 0)
    529			return -EINVAL;
    530		break;
    531	default:
    532		return -EINVAL;
    533	}
    534
    535	if (badblocks_set(bb, sector, length, !unack))
    536		return -ENOSPC;
    537	else
    538		return len;
    539}
    540EXPORT_SYMBOL_GPL(badblocks_store);
    541
    542static int __badblocks_init(struct device *dev, struct badblocks *bb,
    543		int enable)
    544{
    545	bb->dev = dev;
    546	bb->count = 0;
    547	if (enable)
    548		bb->shift = 0;
    549	else
    550		bb->shift = -1;
    551	if (dev)
    552		bb->page = devm_kzalloc(dev, PAGE_SIZE, GFP_KERNEL);
    553	else
    554		bb->page = kzalloc(PAGE_SIZE, GFP_KERNEL);
    555	if (!bb->page) {
    556		bb->shift = -1;
    557		return -ENOMEM;
    558	}
    559	seqlock_init(&bb->lock);
    560
    561	return 0;
    562}
    563
    564/**
    565 * badblocks_init() - initialize the badblocks structure
    566 * @bb:		the badblocks structure that holds all badblock information
    567 * @enable:	weather to enable badblocks accounting
    568 *
    569 * Return:
    570 *  0: success
    571 *  -ve errno: on error
    572 */
    573int badblocks_init(struct badblocks *bb, int enable)
    574{
    575	return __badblocks_init(NULL, bb, enable);
    576}
    577EXPORT_SYMBOL_GPL(badblocks_init);
    578
    579int devm_init_badblocks(struct device *dev, struct badblocks *bb)
    580{
    581	if (!bb)
    582		return -EINVAL;
    583	return __badblocks_init(dev, bb, 1);
    584}
    585EXPORT_SYMBOL_GPL(devm_init_badblocks);
    586
    587/**
    588 * badblocks_exit() - free the badblocks structure
    589 * @bb:		the badblocks structure that holds all badblock information
    590 */
    591void badblocks_exit(struct badblocks *bb)
    592{
    593	if (!bb)
    594		return;
    595	if (bb->dev)
    596		devm_kfree(bb->dev, bb->page);
    597	else
    598		kfree(bb->page);
    599	bb->page = NULL;
    600}
    601EXPORT_SYMBOL_GPL(badblocks_exit);