badblocks.c (14429B)
1// SPDX-License-Identifier: GPL-2.0 2/* 3 * Bad block management 4 * 5 * - Heavily based on MD badblocks code from Neil Brown 6 * 7 * Copyright (c) 2015, Intel Corporation. 8 */ 9 10#include <linux/badblocks.h> 11#include <linux/seqlock.h> 12#include <linux/device.h> 13#include <linux/kernel.h> 14#include <linux/module.h> 15#include <linux/stddef.h> 16#include <linux/types.h> 17#include <linux/slab.h> 18 19/** 20 * badblocks_check() - check a given range for bad sectors 21 * @bb: the badblocks structure that holds all badblock information 22 * @s: sector (start) at which to check for badblocks 23 * @sectors: number of sectors to check for badblocks 24 * @first_bad: pointer to store location of the first badblock 25 * @bad_sectors: pointer to store number of badblocks after @first_bad 26 * 27 * We can record which blocks on each device are 'bad' and so just 28 * fail those blocks, or that stripe, rather than the whole device. 29 * Entries in the bad-block table are 64bits wide. This comprises: 30 * Length of bad-range, in sectors: 0-511 for lengths 1-512 31 * Start of bad-range, sector offset, 54 bits (allows 8 exbibytes) 32 * A 'shift' can be set so that larger blocks are tracked and 33 * consequently larger devices can be covered. 34 * 'Acknowledged' flag - 1 bit. - the most significant bit. 35 * 36 * Locking of the bad-block table uses a seqlock so badblocks_check 37 * might need to retry if it is very unlucky. 38 * We will sometimes want to check for bad blocks in a bi_end_io function, 39 * so we use the write_seqlock_irq variant. 40 * 41 * When looking for a bad block we specify a range and want to 42 * know if any block in the range is bad. So we binary-search 43 * to the last range that starts at-or-before the given endpoint, 44 * (or "before the sector after the target range") 45 * then see if it ends after the given start. 46 * 47 * Return: 48 * 0: there are no known bad blocks in the range 49 * 1: there are known bad block which are all acknowledged 50 * -1: there are bad blocks which have not yet been acknowledged in metadata. 51 * plus the start/length of the first bad section we overlap. 52 */ 53int badblocks_check(struct badblocks *bb, sector_t s, int sectors, 54 sector_t *first_bad, int *bad_sectors) 55{ 56 int hi; 57 int lo; 58 u64 *p = bb->page; 59 int rv; 60 sector_t target = s + sectors; 61 unsigned seq; 62 63 if (bb->shift > 0) { 64 /* round the start down, and the end up */ 65 s >>= bb->shift; 66 target += (1<<bb->shift) - 1; 67 target >>= bb->shift; 68 } 69 /* 'target' is now the first block after the bad range */ 70 71retry: 72 seq = read_seqbegin(&bb->lock); 73 lo = 0; 74 rv = 0; 75 hi = bb->count; 76 77 /* Binary search between lo and hi for 'target' 78 * i.e. for the last range that starts before 'target' 79 */ 80 /* INVARIANT: ranges before 'lo' and at-or-after 'hi' 81 * are known not to be the last range before target. 82 * VARIANT: hi-lo is the number of possible 83 * ranges, and decreases until it reaches 1 84 */ 85 while (hi - lo > 1) { 86 int mid = (lo + hi) / 2; 87 sector_t a = BB_OFFSET(p[mid]); 88 89 if (a < target) 90 /* This could still be the one, earlier ranges 91 * could not. 92 */ 93 lo = mid; 94 else 95 /* This and later ranges are definitely out. */ 96 hi = mid; 97 } 98 /* 'lo' might be the last that started before target, but 'hi' isn't */ 99 if (hi > lo) { 100 /* need to check all range that end after 's' to see if 101 * any are unacknowledged. 102 */ 103 while (lo >= 0 && 104 BB_OFFSET(p[lo]) + BB_LEN(p[lo]) > s) { 105 if (BB_OFFSET(p[lo]) < target) { 106 /* starts before the end, and finishes after 107 * the start, so they must overlap 108 */ 109 if (rv != -1 && BB_ACK(p[lo])) 110 rv = 1; 111 else 112 rv = -1; 113 *first_bad = BB_OFFSET(p[lo]); 114 *bad_sectors = BB_LEN(p[lo]); 115 } 116 lo--; 117 } 118 } 119 120 if (read_seqretry(&bb->lock, seq)) 121 goto retry; 122 123 return rv; 124} 125EXPORT_SYMBOL_GPL(badblocks_check); 126 127static void badblocks_update_acked(struct badblocks *bb) 128{ 129 u64 *p = bb->page; 130 int i; 131 bool unacked = false; 132 133 if (!bb->unacked_exist) 134 return; 135 136 for (i = 0; i < bb->count ; i++) { 137 if (!BB_ACK(p[i])) { 138 unacked = true; 139 break; 140 } 141 } 142 143 if (!unacked) 144 bb->unacked_exist = 0; 145} 146 147/** 148 * badblocks_set() - Add a range of bad blocks to the table. 149 * @bb: the badblocks structure that holds all badblock information 150 * @s: first sector to mark as bad 151 * @sectors: number of sectors to mark as bad 152 * @acknowledged: weather to mark the bad sectors as acknowledged 153 * 154 * This might extend the table, or might contract it if two adjacent ranges 155 * can be merged. We binary-search to find the 'insertion' point, then 156 * decide how best to handle it. 157 * 158 * Return: 159 * 0: success 160 * 1: failed to set badblocks (out of space) 161 */ 162int badblocks_set(struct badblocks *bb, sector_t s, int sectors, 163 int acknowledged) 164{ 165 u64 *p; 166 int lo, hi; 167 int rv = 0; 168 unsigned long flags; 169 170 if (bb->shift < 0) 171 /* badblocks are disabled */ 172 return 1; 173 174 if (bb->shift) { 175 /* round the start down, and the end up */ 176 sector_t next = s + sectors; 177 178 s >>= bb->shift; 179 next += (1<<bb->shift) - 1; 180 next >>= bb->shift; 181 sectors = next - s; 182 } 183 184 write_seqlock_irqsave(&bb->lock, flags); 185 186 p = bb->page; 187 lo = 0; 188 hi = bb->count; 189 /* Find the last range that starts at-or-before 's' */ 190 while (hi - lo > 1) { 191 int mid = (lo + hi) / 2; 192 sector_t a = BB_OFFSET(p[mid]); 193 194 if (a <= s) 195 lo = mid; 196 else 197 hi = mid; 198 } 199 if (hi > lo && BB_OFFSET(p[lo]) > s) 200 hi = lo; 201 202 if (hi > lo) { 203 /* we found a range that might merge with the start 204 * of our new range 205 */ 206 sector_t a = BB_OFFSET(p[lo]); 207 sector_t e = a + BB_LEN(p[lo]); 208 int ack = BB_ACK(p[lo]); 209 210 if (e >= s) { 211 /* Yes, we can merge with a previous range */ 212 if (s == a && s + sectors >= e) 213 /* new range covers old */ 214 ack = acknowledged; 215 else 216 ack = ack && acknowledged; 217 218 if (e < s + sectors) 219 e = s + sectors; 220 if (e - a <= BB_MAX_LEN) { 221 p[lo] = BB_MAKE(a, e-a, ack); 222 s = e; 223 } else { 224 /* does not all fit in one range, 225 * make p[lo] maximal 226 */ 227 if (BB_LEN(p[lo]) != BB_MAX_LEN) 228 p[lo] = BB_MAKE(a, BB_MAX_LEN, ack); 229 s = a + BB_MAX_LEN; 230 } 231 sectors = e - s; 232 } 233 } 234 if (sectors && hi < bb->count) { 235 /* 'hi' points to the first range that starts after 's'. 236 * Maybe we can merge with the start of that range 237 */ 238 sector_t a = BB_OFFSET(p[hi]); 239 sector_t e = a + BB_LEN(p[hi]); 240 int ack = BB_ACK(p[hi]); 241 242 if (a <= s + sectors) { 243 /* merging is possible */ 244 if (e <= s + sectors) { 245 /* full overlap */ 246 e = s + sectors; 247 ack = acknowledged; 248 } else 249 ack = ack && acknowledged; 250 251 a = s; 252 if (e - a <= BB_MAX_LEN) { 253 p[hi] = BB_MAKE(a, e-a, ack); 254 s = e; 255 } else { 256 p[hi] = BB_MAKE(a, BB_MAX_LEN, ack); 257 s = a + BB_MAX_LEN; 258 } 259 sectors = e - s; 260 lo = hi; 261 hi++; 262 } 263 } 264 if (sectors == 0 && hi < bb->count) { 265 /* we might be able to combine lo and hi */ 266 /* Note: 's' is at the end of 'lo' */ 267 sector_t a = BB_OFFSET(p[hi]); 268 int lolen = BB_LEN(p[lo]); 269 int hilen = BB_LEN(p[hi]); 270 int newlen = lolen + hilen - (s - a); 271 272 if (s >= a && newlen < BB_MAX_LEN) { 273 /* yes, we can combine them */ 274 int ack = BB_ACK(p[lo]) && BB_ACK(p[hi]); 275 276 p[lo] = BB_MAKE(BB_OFFSET(p[lo]), newlen, ack); 277 memmove(p + hi, p + hi + 1, 278 (bb->count - hi - 1) * 8); 279 bb->count--; 280 } 281 } 282 while (sectors) { 283 /* didn't merge (it all). 284 * Need to add a range just before 'hi' 285 */ 286 if (bb->count >= MAX_BADBLOCKS) { 287 /* No room for more */ 288 rv = 1; 289 break; 290 } else { 291 int this_sectors = sectors; 292 293 memmove(p + hi + 1, p + hi, 294 (bb->count - hi) * 8); 295 bb->count++; 296 297 if (this_sectors > BB_MAX_LEN) 298 this_sectors = BB_MAX_LEN; 299 p[hi] = BB_MAKE(s, this_sectors, acknowledged); 300 sectors -= this_sectors; 301 s += this_sectors; 302 } 303 } 304 305 bb->changed = 1; 306 if (!acknowledged) 307 bb->unacked_exist = 1; 308 else 309 badblocks_update_acked(bb); 310 write_sequnlock_irqrestore(&bb->lock, flags); 311 312 return rv; 313} 314EXPORT_SYMBOL_GPL(badblocks_set); 315 316/** 317 * badblocks_clear() - Remove a range of bad blocks to the table. 318 * @bb: the badblocks structure that holds all badblock information 319 * @s: first sector to mark as bad 320 * @sectors: number of sectors to mark as bad 321 * 322 * This may involve extending the table if we spilt a region, 323 * but it must not fail. So if the table becomes full, we just 324 * drop the remove request. 325 * 326 * Return: 327 * 0: success 328 * 1: failed to clear badblocks 329 */ 330int badblocks_clear(struct badblocks *bb, sector_t s, int sectors) 331{ 332 u64 *p; 333 int lo, hi; 334 sector_t target = s + sectors; 335 int rv = 0; 336 337 if (bb->shift > 0) { 338 /* When clearing we round the start up and the end down. 339 * This should not matter as the shift should align with 340 * the block size and no rounding should ever be needed. 341 * However it is better the think a block is bad when it 342 * isn't than to think a block is not bad when it is. 343 */ 344 s += (1<<bb->shift) - 1; 345 s >>= bb->shift; 346 target >>= bb->shift; 347 } 348 349 write_seqlock_irq(&bb->lock); 350 351 p = bb->page; 352 lo = 0; 353 hi = bb->count; 354 /* Find the last range that starts before 'target' */ 355 while (hi - lo > 1) { 356 int mid = (lo + hi) / 2; 357 sector_t a = BB_OFFSET(p[mid]); 358 359 if (a < target) 360 lo = mid; 361 else 362 hi = mid; 363 } 364 if (hi > lo) { 365 /* p[lo] is the last range that could overlap the 366 * current range. Earlier ranges could also overlap, 367 * but only this one can overlap the end of the range. 368 */ 369 if ((BB_OFFSET(p[lo]) + BB_LEN(p[lo]) > target) && 370 (BB_OFFSET(p[lo]) < target)) { 371 /* Partial overlap, leave the tail of this range */ 372 int ack = BB_ACK(p[lo]); 373 sector_t a = BB_OFFSET(p[lo]); 374 sector_t end = a + BB_LEN(p[lo]); 375 376 if (a < s) { 377 /* we need to split this range */ 378 if (bb->count >= MAX_BADBLOCKS) { 379 rv = -ENOSPC; 380 goto out; 381 } 382 memmove(p+lo+1, p+lo, (bb->count - lo) * 8); 383 bb->count++; 384 p[lo] = BB_MAKE(a, s-a, ack); 385 lo++; 386 } 387 p[lo] = BB_MAKE(target, end - target, ack); 388 /* there is no longer an overlap */ 389 hi = lo; 390 lo--; 391 } 392 while (lo >= 0 && 393 (BB_OFFSET(p[lo]) + BB_LEN(p[lo]) > s) && 394 (BB_OFFSET(p[lo]) < target)) { 395 /* This range does overlap */ 396 if (BB_OFFSET(p[lo]) < s) { 397 /* Keep the early parts of this range. */ 398 int ack = BB_ACK(p[lo]); 399 sector_t start = BB_OFFSET(p[lo]); 400 401 p[lo] = BB_MAKE(start, s - start, ack); 402 /* now low doesn't overlap, so.. */ 403 break; 404 } 405 lo--; 406 } 407 /* 'lo' is strictly before, 'hi' is strictly after, 408 * anything between needs to be discarded 409 */ 410 if (hi - lo > 1) { 411 memmove(p+lo+1, p+hi, (bb->count - hi) * 8); 412 bb->count -= (hi - lo - 1); 413 } 414 } 415 416 badblocks_update_acked(bb); 417 bb->changed = 1; 418out: 419 write_sequnlock_irq(&bb->lock); 420 return rv; 421} 422EXPORT_SYMBOL_GPL(badblocks_clear); 423 424/** 425 * ack_all_badblocks() - Acknowledge all bad blocks in a list. 426 * @bb: the badblocks structure that holds all badblock information 427 * 428 * This only succeeds if ->changed is clear. It is used by 429 * in-kernel metadata updates 430 */ 431void ack_all_badblocks(struct badblocks *bb) 432{ 433 if (bb->page == NULL || bb->changed) 434 /* no point even trying */ 435 return; 436 write_seqlock_irq(&bb->lock); 437 438 if (bb->changed == 0 && bb->unacked_exist) { 439 u64 *p = bb->page; 440 int i; 441 442 for (i = 0; i < bb->count ; i++) { 443 if (!BB_ACK(p[i])) { 444 sector_t start = BB_OFFSET(p[i]); 445 int len = BB_LEN(p[i]); 446 447 p[i] = BB_MAKE(start, len, 1); 448 } 449 } 450 bb->unacked_exist = 0; 451 } 452 write_sequnlock_irq(&bb->lock); 453} 454EXPORT_SYMBOL_GPL(ack_all_badblocks); 455 456/** 457 * badblocks_show() - sysfs access to bad-blocks list 458 * @bb: the badblocks structure that holds all badblock information 459 * @page: buffer received from sysfs 460 * @unack: weather to show unacknowledged badblocks 461 * 462 * Return: 463 * Length of returned data 464 */ 465ssize_t badblocks_show(struct badblocks *bb, char *page, int unack) 466{ 467 size_t len; 468 int i; 469 u64 *p = bb->page; 470 unsigned seq; 471 472 if (bb->shift < 0) 473 return 0; 474 475retry: 476 seq = read_seqbegin(&bb->lock); 477 478 len = 0; 479 i = 0; 480 481 while (len < PAGE_SIZE && i < bb->count) { 482 sector_t s = BB_OFFSET(p[i]); 483 unsigned int length = BB_LEN(p[i]); 484 int ack = BB_ACK(p[i]); 485 486 i++; 487 488 if (unack && ack) 489 continue; 490 491 len += snprintf(page+len, PAGE_SIZE-len, "%llu %u\n", 492 (unsigned long long)s << bb->shift, 493 length << bb->shift); 494 } 495 if (unack && len == 0) 496 bb->unacked_exist = 0; 497 498 if (read_seqretry(&bb->lock, seq)) 499 goto retry; 500 501 return len; 502} 503EXPORT_SYMBOL_GPL(badblocks_show); 504 505/** 506 * badblocks_store() - sysfs access to bad-blocks list 507 * @bb: the badblocks structure that holds all badblock information 508 * @page: buffer received from sysfs 509 * @len: length of data received from sysfs 510 * @unack: weather to show unacknowledged badblocks 511 * 512 * Return: 513 * Length of the buffer processed or -ve error. 514 */ 515ssize_t badblocks_store(struct badblocks *bb, const char *page, size_t len, 516 int unack) 517{ 518 unsigned long long sector; 519 int length; 520 char newline; 521 522 switch (sscanf(page, "%llu %d%c", §or, &length, &newline)) { 523 case 3: 524 if (newline != '\n') 525 return -EINVAL; 526 fallthrough; 527 case 2: 528 if (length <= 0) 529 return -EINVAL; 530 break; 531 default: 532 return -EINVAL; 533 } 534 535 if (badblocks_set(bb, sector, length, !unack)) 536 return -ENOSPC; 537 else 538 return len; 539} 540EXPORT_SYMBOL_GPL(badblocks_store); 541 542static int __badblocks_init(struct device *dev, struct badblocks *bb, 543 int enable) 544{ 545 bb->dev = dev; 546 bb->count = 0; 547 if (enable) 548 bb->shift = 0; 549 else 550 bb->shift = -1; 551 if (dev) 552 bb->page = devm_kzalloc(dev, PAGE_SIZE, GFP_KERNEL); 553 else 554 bb->page = kzalloc(PAGE_SIZE, GFP_KERNEL); 555 if (!bb->page) { 556 bb->shift = -1; 557 return -ENOMEM; 558 } 559 seqlock_init(&bb->lock); 560 561 return 0; 562} 563 564/** 565 * badblocks_init() - initialize the badblocks structure 566 * @bb: the badblocks structure that holds all badblock information 567 * @enable: weather to enable badblocks accounting 568 * 569 * Return: 570 * 0: success 571 * -ve errno: on error 572 */ 573int badblocks_init(struct badblocks *bb, int enable) 574{ 575 return __badblocks_init(NULL, bb, enable); 576} 577EXPORT_SYMBOL_GPL(badblocks_init); 578 579int devm_init_badblocks(struct device *dev, struct badblocks *bb) 580{ 581 if (!bb) 582 return -EINVAL; 583 return __badblocks_init(dev, bb, 1); 584} 585EXPORT_SYMBOL_GPL(devm_init_badblocks); 586 587/** 588 * badblocks_exit() - free the badblocks structure 589 * @bb: the badblocks structure that holds all badblock information 590 */ 591void badblocks_exit(struct badblocks *bb) 592{ 593 if (!bb) 594 return; 595 if (bb->dev) 596 devm_kfree(bb->dev, bb->page); 597 else 598 kfree(bb->page); 599 bb->page = NULL; 600} 601EXPORT_SYMBOL_GPL(badblocks_exit);