dm-clone-metadata.c (24530B)
1// SPDX-License-Identifier: GPL-2.0-only 2/* 3 * Copyright (C) 2019 Arrikto, Inc. All Rights Reserved. 4 */ 5 6#include <linux/mm.h> 7#include <linux/err.h> 8#include <linux/slab.h> 9#include <linux/rwsem.h> 10#include <linux/bitops.h> 11#include <linux/bitmap.h> 12#include <linux/device-mapper.h> 13 14#include "persistent-data/dm-bitset.h" 15#include "persistent-data/dm-space-map.h" 16#include "persistent-data/dm-block-manager.h" 17#include "persistent-data/dm-transaction-manager.h" 18 19#include "dm-clone-metadata.h" 20 21#define DM_MSG_PREFIX "clone metadata" 22 23#define SUPERBLOCK_LOCATION 0 24#define SUPERBLOCK_MAGIC 0x8af27f64 25#define SUPERBLOCK_CSUM_XOR 257649492 26 27#define DM_CLONE_MAX_CONCURRENT_LOCKS 5 28 29#define UUID_LEN 16 30 31/* Min and max dm-clone metadata versions supported */ 32#define DM_CLONE_MIN_METADATA_VERSION 1 33#define DM_CLONE_MAX_METADATA_VERSION 1 34 35/* 36 * On-disk metadata layout 37 */ 38struct superblock_disk { 39 __le32 csum; 40 __le32 flags; 41 __le64 blocknr; 42 43 __u8 uuid[UUID_LEN]; 44 __le64 magic; 45 __le32 version; 46 47 __u8 metadata_space_map_root[SPACE_MAP_ROOT_SIZE]; 48 49 __le64 region_size; 50 __le64 target_size; 51 52 __le64 bitset_root; 53} __packed; 54 55/* 56 * Region and Dirty bitmaps. 57 * 58 * dm-clone logically splits the source and destination devices in regions of 59 * fixed size. The destination device's regions are gradually hydrated, i.e., 60 * we copy (clone) the source's regions to the destination device. Eventually, 61 * all regions will get hydrated and all I/O will be served from the 62 * destination device. 63 * 64 * We maintain an on-disk bitmap which tracks the state of each of the 65 * destination device's regions, i.e., whether they are hydrated or not. 66 * 67 * To save constantly doing look ups on disk we keep an in core copy of the 68 * on-disk bitmap, the region_map. 69 * 70 * In order to track which regions are hydrated during a metadata transaction, 71 * we use a second set of bitmaps, the dmap (dirty bitmap), which includes two 72 * bitmaps, namely dirty_regions and dirty_words. The dirty_regions bitmap 73 * tracks the regions that got hydrated during the current metadata 74 * transaction. The dirty_words bitmap tracks the dirty words, i.e. longs, of 75 * the dirty_regions bitmap. 76 * 77 * This allows us to precisely track the regions that were hydrated during the 78 * current metadata transaction and update the metadata accordingly, when we 79 * commit the current transaction. This is important because dm-clone should 80 * only commit the metadata of regions that were properly flushed to the 81 * destination device beforehand. Otherwise, in case of a crash, we could end 82 * up with a corrupted dm-clone device. 83 * 84 * When a region finishes hydrating dm-clone calls 85 * dm_clone_set_region_hydrated(), or for discard requests 86 * dm_clone_cond_set_range(), which sets the corresponding bits in region_map 87 * and dmap. 88 * 89 * During a metadata commit we scan dmap->dirty_words and dmap->dirty_regions 90 * and update the on-disk metadata accordingly. Thus, we don't have to flush to 91 * disk the whole region_map. We can just flush the dirty region_map bits. 92 * 93 * We use the helper dmap->dirty_words bitmap, which is smaller than the 94 * original region_map, to reduce the amount of memory accesses during a 95 * metadata commit. Moreover, as dm-bitset also accesses the on-disk bitmap in 96 * 64-bit word granularity, the dirty_words bitmap helps us avoid useless disk 97 * accesses. 98 * 99 * We could update directly the on-disk bitmap, when dm-clone calls either 100 * dm_clone_set_region_hydrated() or dm_clone_cond_set_range(), buts this 101 * inserts significant metadata I/O overhead in dm-clone's I/O path. Also, as 102 * these two functions don't block, we can call them in interrupt context, 103 * e.g., in a hooked overwrite bio's completion routine, and further reduce the 104 * I/O completion latency. 105 * 106 * We maintain two dirty bitmap sets. During a metadata commit we atomically 107 * swap the currently used dmap with the unused one. This allows the metadata 108 * update functions to run concurrently with an ongoing commit. 109 */ 110struct dirty_map { 111 unsigned long *dirty_words; 112 unsigned long *dirty_regions; 113 unsigned int changed; 114}; 115 116struct dm_clone_metadata { 117 /* The metadata block device */ 118 struct block_device *bdev; 119 120 sector_t target_size; 121 sector_t region_size; 122 unsigned long nr_regions; 123 unsigned long nr_words; 124 125 /* Spinlock protecting the region and dirty bitmaps. */ 126 spinlock_t bitmap_lock; 127 struct dirty_map dmap[2]; 128 struct dirty_map *current_dmap; 129 130 /* Protected by lock */ 131 struct dirty_map *committing_dmap; 132 133 /* 134 * In core copy of the on-disk bitmap to save constantly doing look ups 135 * on disk. 136 */ 137 unsigned long *region_map; 138 139 /* Protected by bitmap_lock */ 140 unsigned int read_only; 141 142 struct dm_block_manager *bm; 143 struct dm_space_map *sm; 144 struct dm_transaction_manager *tm; 145 146 struct rw_semaphore lock; 147 148 struct dm_disk_bitset bitset_info; 149 dm_block_t bitset_root; 150 151 /* 152 * Reading the space map root can fail, so we read it into this 153 * buffer before the superblock is locked and updated. 154 */ 155 __u8 metadata_space_map_root[SPACE_MAP_ROOT_SIZE]; 156 157 bool hydration_done:1; 158 bool fail_io:1; 159}; 160 161/*---------------------------------------------------------------------------*/ 162 163/* 164 * Superblock validation. 165 */ 166static void sb_prepare_for_write(struct dm_block_validator *v, 167 struct dm_block *b, size_t sb_block_size) 168{ 169 struct superblock_disk *sb; 170 u32 csum; 171 172 sb = dm_block_data(b); 173 sb->blocknr = cpu_to_le64(dm_block_location(b)); 174 175 csum = dm_bm_checksum(&sb->flags, sb_block_size - sizeof(__le32), 176 SUPERBLOCK_CSUM_XOR); 177 sb->csum = cpu_to_le32(csum); 178} 179 180static int sb_check(struct dm_block_validator *v, struct dm_block *b, 181 size_t sb_block_size) 182{ 183 struct superblock_disk *sb; 184 u32 csum, metadata_version; 185 186 sb = dm_block_data(b); 187 188 if (dm_block_location(b) != le64_to_cpu(sb->blocknr)) { 189 DMERR("Superblock check failed: blocknr %llu, expected %llu", 190 le64_to_cpu(sb->blocknr), 191 (unsigned long long)dm_block_location(b)); 192 return -ENOTBLK; 193 } 194 195 if (le64_to_cpu(sb->magic) != SUPERBLOCK_MAGIC) { 196 DMERR("Superblock check failed: magic %llu, expected %llu", 197 le64_to_cpu(sb->magic), 198 (unsigned long long)SUPERBLOCK_MAGIC); 199 return -EILSEQ; 200 } 201 202 csum = dm_bm_checksum(&sb->flags, sb_block_size - sizeof(__le32), 203 SUPERBLOCK_CSUM_XOR); 204 if (sb->csum != cpu_to_le32(csum)) { 205 DMERR("Superblock check failed: checksum %u, expected %u", 206 csum, le32_to_cpu(sb->csum)); 207 return -EILSEQ; 208 } 209 210 /* Check metadata version */ 211 metadata_version = le32_to_cpu(sb->version); 212 if (metadata_version < DM_CLONE_MIN_METADATA_VERSION || 213 metadata_version > DM_CLONE_MAX_METADATA_VERSION) { 214 DMERR("Clone metadata version %u found, but only versions between %u and %u supported.", 215 metadata_version, DM_CLONE_MIN_METADATA_VERSION, 216 DM_CLONE_MAX_METADATA_VERSION); 217 return -EINVAL; 218 } 219 220 return 0; 221} 222 223static struct dm_block_validator sb_validator = { 224 .name = "superblock", 225 .prepare_for_write = sb_prepare_for_write, 226 .check = sb_check 227}; 228 229/* 230 * Check if the superblock is formatted or not. We consider the superblock to 231 * be formatted in case we find non-zero bytes in it. 232 */ 233static int __superblock_all_zeroes(struct dm_block_manager *bm, bool *formatted) 234{ 235 int r; 236 unsigned int i, nr_words; 237 struct dm_block *sblock; 238 __le64 *data_le, zero = cpu_to_le64(0); 239 240 /* 241 * We don't use a validator here because the superblock could be all 242 * zeroes. 243 */ 244 r = dm_bm_read_lock(bm, SUPERBLOCK_LOCATION, NULL, &sblock); 245 if (r) { 246 DMERR("Failed to read_lock superblock"); 247 return r; 248 } 249 250 data_le = dm_block_data(sblock); 251 *formatted = false; 252 253 /* This assumes that the block size is a multiple of 8 bytes */ 254 BUG_ON(dm_bm_block_size(bm) % sizeof(__le64)); 255 nr_words = dm_bm_block_size(bm) / sizeof(__le64); 256 for (i = 0; i < nr_words; i++) { 257 if (data_le[i] != zero) { 258 *formatted = true; 259 break; 260 } 261 } 262 263 dm_bm_unlock(sblock); 264 265 return 0; 266} 267 268/*---------------------------------------------------------------------------*/ 269 270/* 271 * Low-level metadata handling. 272 */ 273static inline int superblock_read_lock(struct dm_clone_metadata *cmd, 274 struct dm_block **sblock) 275{ 276 return dm_bm_read_lock(cmd->bm, SUPERBLOCK_LOCATION, &sb_validator, sblock); 277} 278 279static inline int superblock_write_lock_zero(struct dm_clone_metadata *cmd, 280 struct dm_block **sblock) 281{ 282 return dm_bm_write_lock_zero(cmd->bm, SUPERBLOCK_LOCATION, &sb_validator, sblock); 283} 284 285static int __copy_sm_root(struct dm_clone_metadata *cmd) 286{ 287 int r; 288 size_t root_size; 289 290 r = dm_sm_root_size(cmd->sm, &root_size); 291 if (r) 292 return r; 293 294 return dm_sm_copy_root(cmd->sm, &cmd->metadata_space_map_root, root_size); 295} 296 297/* Save dm-clone metadata in superblock */ 298static void __prepare_superblock(struct dm_clone_metadata *cmd, 299 struct superblock_disk *sb) 300{ 301 sb->flags = cpu_to_le32(0UL); 302 303 /* FIXME: UUID is currently unused */ 304 memset(sb->uuid, 0, sizeof(sb->uuid)); 305 306 sb->magic = cpu_to_le64(SUPERBLOCK_MAGIC); 307 sb->version = cpu_to_le32(DM_CLONE_MAX_METADATA_VERSION); 308 309 /* Save the metadata space_map root */ 310 memcpy(&sb->metadata_space_map_root, &cmd->metadata_space_map_root, 311 sizeof(cmd->metadata_space_map_root)); 312 313 sb->region_size = cpu_to_le64(cmd->region_size); 314 sb->target_size = cpu_to_le64(cmd->target_size); 315 sb->bitset_root = cpu_to_le64(cmd->bitset_root); 316} 317 318static int __open_metadata(struct dm_clone_metadata *cmd) 319{ 320 int r; 321 struct dm_block *sblock; 322 struct superblock_disk *sb; 323 324 r = superblock_read_lock(cmd, &sblock); 325 326 if (r) { 327 DMERR("Failed to read_lock superblock"); 328 return r; 329 } 330 331 sb = dm_block_data(sblock); 332 333 /* Verify that target_size and region_size haven't changed. */ 334 if (cmd->region_size != le64_to_cpu(sb->region_size) || 335 cmd->target_size != le64_to_cpu(sb->target_size)) { 336 DMERR("Region and/or target size don't match the ones in metadata"); 337 r = -EINVAL; 338 goto out_with_lock; 339 } 340 341 r = dm_tm_open_with_sm(cmd->bm, SUPERBLOCK_LOCATION, 342 sb->metadata_space_map_root, 343 sizeof(sb->metadata_space_map_root), 344 &cmd->tm, &cmd->sm); 345 346 if (r) { 347 DMERR("dm_tm_open_with_sm failed"); 348 goto out_with_lock; 349 } 350 351 dm_disk_bitset_init(cmd->tm, &cmd->bitset_info); 352 cmd->bitset_root = le64_to_cpu(sb->bitset_root); 353 354out_with_lock: 355 dm_bm_unlock(sblock); 356 357 return r; 358} 359 360static int __format_metadata(struct dm_clone_metadata *cmd) 361{ 362 int r; 363 struct dm_block *sblock; 364 struct superblock_disk *sb; 365 366 r = dm_tm_create_with_sm(cmd->bm, SUPERBLOCK_LOCATION, &cmd->tm, &cmd->sm); 367 if (r) { 368 DMERR("Failed to create transaction manager"); 369 return r; 370 } 371 372 dm_disk_bitset_init(cmd->tm, &cmd->bitset_info); 373 374 r = dm_bitset_empty(&cmd->bitset_info, &cmd->bitset_root); 375 if (r) { 376 DMERR("Failed to create empty on-disk bitset"); 377 goto err_with_tm; 378 } 379 380 r = dm_bitset_resize(&cmd->bitset_info, cmd->bitset_root, 0, 381 cmd->nr_regions, false, &cmd->bitset_root); 382 if (r) { 383 DMERR("Failed to resize on-disk bitset to %lu entries", cmd->nr_regions); 384 goto err_with_tm; 385 } 386 387 /* Flush to disk all blocks, except the superblock */ 388 r = dm_tm_pre_commit(cmd->tm); 389 if (r) { 390 DMERR("dm_tm_pre_commit failed"); 391 goto err_with_tm; 392 } 393 394 r = __copy_sm_root(cmd); 395 if (r) { 396 DMERR("__copy_sm_root failed"); 397 goto err_with_tm; 398 } 399 400 r = superblock_write_lock_zero(cmd, &sblock); 401 if (r) { 402 DMERR("Failed to write_lock superblock"); 403 goto err_with_tm; 404 } 405 406 sb = dm_block_data(sblock); 407 __prepare_superblock(cmd, sb); 408 r = dm_tm_commit(cmd->tm, sblock); 409 if (r) { 410 DMERR("Failed to commit superblock"); 411 goto err_with_tm; 412 } 413 414 return 0; 415 416err_with_tm: 417 dm_sm_destroy(cmd->sm); 418 dm_tm_destroy(cmd->tm); 419 420 return r; 421} 422 423static int __open_or_format_metadata(struct dm_clone_metadata *cmd, bool may_format_device) 424{ 425 int r; 426 bool formatted = false; 427 428 r = __superblock_all_zeroes(cmd->bm, &formatted); 429 if (r) 430 return r; 431 432 if (!formatted) 433 return may_format_device ? __format_metadata(cmd) : -EPERM; 434 435 return __open_metadata(cmd); 436} 437 438static int __create_persistent_data_structures(struct dm_clone_metadata *cmd, 439 bool may_format_device) 440{ 441 int r; 442 443 /* Create block manager */ 444 cmd->bm = dm_block_manager_create(cmd->bdev, 445 DM_CLONE_METADATA_BLOCK_SIZE << SECTOR_SHIFT, 446 DM_CLONE_MAX_CONCURRENT_LOCKS); 447 if (IS_ERR(cmd->bm)) { 448 DMERR("Failed to create block manager"); 449 return PTR_ERR(cmd->bm); 450 } 451 452 r = __open_or_format_metadata(cmd, may_format_device); 453 if (r) 454 dm_block_manager_destroy(cmd->bm); 455 456 return r; 457} 458 459static void __destroy_persistent_data_structures(struct dm_clone_metadata *cmd) 460{ 461 dm_sm_destroy(cmd->sm); 462 dm_tm_destroy(cmd->tm); 463 dm_block_manager_destroy(cmd->bm); 464} 465 466/*---------------------------------------------------------------------------*/ 467 468static size_t bitmap_size(unsigned long nr_bits) 469{ 470 return BITS_TO_LONGS(nr_bits) * sizeof(long); 471} 472 473static int __dirty_map_init(struct dirty_map *dmap, unsigned long nr_words, 474 unsigned long nr_regions) 475{ 476 dmap->changed = 0; 477 478 dmap->dirty_words = kvzalloc(bitmap_size(nr_words), GFP_KERNEL); 479 if (!dmap->dirty_words) 480 return -ENOMEM; 481 482 dmap->dirty_regions = kvzalloc(bitmap_size(nr_regions), GFP_KERNEL); 483 if (!dmap->dirty_regions) { 484 kvfree(dmap->dirty_words); 485 return -ENOMEM; 486 } 487 488 return 0; 489} 490 491static void __dirty_map_exit(struct dirty_map *dmap) 492{ 493 kvfree(dmap->dirty_words); 494 kvfree(dmap->dirty_regions); 495} 496 497static int dirty_map_init(struct dm_clone_metadata *cmd) 498{ 499 if (__dirty_map_init(&cmd->dmap[0], cmd->nr_words, cmd->nr_regions)) { 500 DMERR("Failed to allocate dirty bitmap"); 501 return -ENOMEM; 502 } 503 504 if (__dirty_map_init(&cmd->dmap[1], cmd->nr_words, cmd->nr_regions)) { 505 DMERR("Failed to allocate dirty bitmap"); 506 __dirty_map_exit(&cmd->dmap[0]); 507 return -ENOMEM; 508 } 509 510 cmd->current_dmap = &cmd->dmap[0]; 511 cmd->committing_dmap = NULL; 512 513 return 0; 514} 515 516static void dirty_map_exit(struct dm_clone_metadata *cmd) 517{ 518 __dirty_map_exit(&cmd->dmap[0]); 519 __dirty_map_exit(&cmd->dmap[1]); 520} 521 522static int __load_bitset_in_core(struct dm_clone_metadata *cmd) 523{ 524 int r; 525 unsigned long i; 526 struct dm_bitset_cursor c; 527 528 /* Flush bitset cache */ 529 r = dm_bitset_flush(&cmd->bitset_info, cmd->bitset_root, &cmd->bitset_root); 530 if (r) 531 return r; 532 533 r = dm_bitset_cursor_begin(&cmd->bitset_info, cmd->bitset_root, cmd->nr_regions, &c); 534 if (r) 535 return r; 536 537 for (i = 0; ; i++) { 538 if (dm_bitset_cursor_get_value(&c)) 539 __set_bit(i, cmd->region_map); 540 else 541 __clear_bit(i, cmd->region_map); 542 543 if (i >= (cmd->nr_regions - 1)) 544 break; 545 546 r = dm_bitset_cursor_next(&c); 547 548 if (r) 549 break; 550 } 551 552 dm_bitset_cursor_end(&c); 553 554 return r; 555} 556 557struct dm_clone_metadata *dm_clone_metadata_open(struct block_device *bdev, 558 sector_t target_size, 559 sector_t region_size) 560{ 561 int r; 562 struct dm_clone_metadata *cmd; 563 564 cmd = kzalloc(sizeof(*cmd), GFP_KERNEL); 565 if (!cmd) { 566 DMERR("Failed to allocate memory for dm-clone metadata"); 567 return ERR_PTR(-ENOMEM); 568 } 569 570 cmd->bdev = bdev; 571 cmd->target_size = target_size; 572 cmd->region_size = region_size; 573 cmd->nr_regions = dm_sector_div_up(cmd->target_size, cmd->region_size); 574 cmd->nr_words = BITS_TO_LONGS(cmd->nr_regions); 575 576 init_rwsem(&cmd->lock); 577 spin_lock_init(&cmd->bitmap_lock); 578 cmd->read_only = 0; 579 cmd->fail_io = false; 580 cmd->hydration_done = false; 581 582 cmd->region_map = kvmalloc(bitmap_size(cmd->nr_regions), GFP_KERNEL); 583 if (!cmd->region_map) { 584 DMERR("Failed to allocate memory for region bitmap"); 585 r = -ENOMEM; 586 goto out_with_md; 587 } 588 589 r = __create_persistent_data_structures(cmd, true); 590 if (r) 591 goto out_with_region_map; 592 593 r = __load_bitset_in_core(cmd); 594 if (r) { 595 DMERR("Failed to load on-disk region map"); 596 goto out_with_pds; 597 } 598 599 r = dirty_map_init(cmd); 600 if (r) 601 goto out_with_pds; 602 603 if (bitmap_full(cmd->region_map, cmd->nr_regions)) 604 cmd->hydration_done = true; 605 606 return cmd; 607 608out_with_pds: 609 __destroy_persistent_data_structures(cmd); 610 611out_with_region_map: 612 kvfree(cmd->region_map); 613 614out_with_md: 615 kfree(cmd); 616 617 return ERR_PTR(r); 618} 619 620void dm_clone_metadata_close(struct dm_clone_metadata *cmd) 621{ 622 if (!cmd->fail_io) 623 __destroy_persistent_data_structures(cmd); 624 625 dirty_map_exit(cmd); 626 kvfree(cmd->region_map); 627 kfree(cmd); 628} 629 630bool dm_clone_is_hydration_done(struct dm_clone_metadata *cmd) 631{ 632 return cmd->hydration_done; 633} 634 635bool dm_clone_is_region_hydrated(struct dm_clone_metadata *cmd, unsigned long region_nr) 636{ 637 return dm_clone_is_hydration_done(cmd) || test_bit(region_nr, cmd->region_map); 638} 639 640bool dm_clone_is_range_hydrated(struct dm_clone_metadata *cmd, 641 unsigned long start, unsigned long nr_regions) 642{ 643 unsigned long bit; 644 645 if (dm_clone_is_hydration_done(cmd)) 646 return true; 647 648 bit = find_next_zero_bit(cmd->region_map, cmd->nr_regions, start); 649 650 return (bit >= (start + nr_regions)); 651} 652 653unsigned int dm_clone_nr_of_hydrated_regions(struct dm_clone_metadata *cmd) 654{ 655 return bitmap_weight(cmd->region_map, cmd->nr_regions); 656} 657 658unsigned long dm_clone_find_next_unhydrated_region(struct dm_clone_metadata *cmd, 659 unsigned long start) 660{ 661 return find_next_zero_bit(cmd->region_map, cmd->nr_regions, start); 662} 663 664static int __update_metadata_word(struct dm_clone_metadata *cmd, 665 unsigned long *dirty_regions, 666 unsigned long word) 667{ 668 int r; 669 unsigned long index = word * BITS_PER_LONG; 670 unsigned long max_index = min(cmd->nr_regions, (word + 1) * BITS_PER_LONG); 671 672 while (index < max_index) { 673 if (test_bit(index, dirty_regions)) { 674 r = dm_bitset_set_bit(&cmd->bitset_info, cmd->bitset_root, 675 index, &cmd->bitset_root); 676 if (r) { 677 DMERR("dm_bitset_set_bit failed"); 678 return r; 679 } 680 __clear_bit(index, dirty_regions); 681 } 682 index++; 683 } 684 685 return 0; 686} 687 688static int __metadata_commit(struct dm_clone_metadata *cmd) 689{ 690 int r; 691 struct dm_block *sblock; 692 struct superblock_disk *sb; 693 694 /* Flush bitset cache */ 695 r = dm_bitset_flush(&cmd->bitset_info, cmd->bitset_root, &cmd->bitset_root); 696 if (r) { 697 DMERR("dm_bitset_flush failed"); 698 return r; 699 } 700 701 /* Flush to disk all blocks, except the superblock */ 702 r = dm_tm_pre_commit(cmd->tm); 703 if (r) { 704 DMERR("dm_tm_pre_commit failed"); 705 return r; 706 } 707 708 /* Save the space map root in cmd->metadata_space_map_root */ 709 r = __copy_sm_root(cmd); 710 if (r) { 711 DMERR("__copy_sm_root failed"); 712 return r; 713 } 714 715 /* Lock the superblock */ 716 r = superblock_write_lock_zero(cmd, &sblock); 717 if (r) { 718 DMERR("Failed to write_lock superblock"); 719 return r; 720 } 721 722 /* Save the metadata in superblock */ 723 sb = dm_block_data(sblock); 724 __prepare_superblock(cmd, sb); 725 726 /* Unlock superblock and commit it to disk */ 727 r = dm_tm_commit(cmd->tm, sblock); 728 if (r) { 729 DMERR("Failed to commit superblock"); 730 return r; 731 } 732 733 /* 734 * FIXME: Find a more efficient way to check if the hydration is done. 735 */ 736 if (bitmap_full(cmd->region_map, cmd->nr_regions)) 737 cmd->hydration_done = true; 738 739 return 0; 740} 741 742static int __flush_dmap(struct dm_clone_metadata *cmd, struct dirty_map *dmap) 743{ 744 int r; 745 unsigned long word; 746 747 word = 0; 748 do { 749 word = find_next_bit(dmap->dirty_words, cmd->nr_words, word); 750 751 if (word == cmd->nr_words) 752 break; 753 754 r = __update_metadata_word(cmd, dmap->dirty_regions, word); 755 756 if (r) 757 return r; 758 759 __clear_bit(word, dmap->dirty_words); 760 word++; 761 } while (word < cmd->nr_words); 762 763 r = __metadata_commit(cmd); 764 765 if (r) 766 return r; 767 768 /* Update the changed flag */ 769 spin_lock_irq(&cmd->bitmap_lock); 770 dmap->changed = 0; 771 spin_unlock_irq(&cmd->bitmap_lock); 772 773 return 0; 774} 775 776int dm_clone_metadata_pre_commit(struct dm_clone_metadata *cmd) 777{ 778 int r = 0; 779 struct dirty_map *dmap, *next_dmap; 780 781 down_write(&cmd->lock); 782 783 if (cmd->fail_io || dm_bm_is_read_only(cmd->bm)) { 784 r = -EPERM; 785 goto out; 786 } 787 788 /* Get current dirty bitmap */ 789 dmap = cmd->current_dmap; 790 791 /* Get next dirty bitmap */ 792 next_dmap = (dmap == &cmd->dmap[0]) ? &cmd->dmap[1] : &cmd->dmap[0]; 793 794 /* 795 * The last commit failed, so we don't have a clean dirty-bitmap to 796 * use. 797 */ 798 if (WARN_ON(next_dmap->changed || cmd->committing_dmap)) { 799 r = -EINVAL; 800 goto out; 801 } 802 803 /* Swap dirty bitmaps */ 804 spin_lock_irq(&cmd->bitmap_lock); 805 cmd->current_dmap = next_dmap; 806 spin_unlock_irq(&cmd->bitmap_lock); 807 808 /* Set old dirty bitmap as currently committing */ 809 cmd->committing_dmap = dmap; 810out: 811 up_write(&cmd->lock); 812 813 return r; 814} 815 816int dm_clone_metadata_commit(struct dm_clone_metadata *cmd) 817{ 818 int r = -EPERM; 819 820 down_write(&cmd->lock); 821 822 if (cmd->fail_io || dm_bm_is_read_only(cmd->bm)) 823 goto out; 824 825 if (WARN_ON(!cmd->committing_dmap)) { 826 r = -EINVAL; 827 goto out; 828 } 829 830 r = __flush_dmap(cmd, cmd->committing_dmap); 831 if (!r) { 832 /* Clear committing dmap */ 833 cmd->committing_dmap = NULL; 834 } 835out: 836 up_write(&cmd->lock); 837 838 return r; 839} 840 841int dm_clone_set_region_hydrated(struct dm_clone_metadata *cmd, unsigned long region_nr) 842{ 843 int r = 0; 844 struct dirty_map *dmap; 845 unsigned long word, flags; 846 847 if (unlikely(region_nr >= cmd->nr_regions)) { 848 DMERR("Region %lu out of range (total number of regions %lu)", 849 region_nr, cmd->nr_regions); 850 return -ERANGE; 851 } 852 853 word = region_nr / BITS_PER_LONG; 854 855 spin_lock_irqsave(&cmd->bitmap_lock, flags); 856 857 if (cmd->read_only) { 858 r = -EPERM; 859 goto out; 860 } 861 862 dmap = cmd->current_dmap; 863 864 __set_bit(word, dmap->dirty_words); 865 __set_bit(region_nr, dmap->dirty_regions); 866 __set_bit(region_nr, cmd->region_map); 867 dmap->changed = 1; 868 869out: 870 spin_unlock_irqrestore(&cmd->bitmap_lock, flags); 871 872 return r; 873} 874 875int dm_clone_cond_set_range(struct dm_clone_metadata *cmd, unsigned long start, 876 unsigned long nr_regions) 877{ 878 int r = 0; 879 struct dirty_map *dmap; 880 unsigned long word, region_nr; 881 882 if (unlikely(start >= cmd->nr_regions || (start + nr_regions) < start || 883 (start + nr_regions) > cmd->nr_regions)) { 884 DMERR("Invalid region range: start %lu, nr_regions %lu (total number of regions %lu)", 885 start, nr_regions, cmd->nr_regions); 886 return -ERANGE; 887 } 888 889 spin_lock_irq(&cmd->bitmap_lock); 890 891 if (cmd->read_only) { 892 r = -EPERM; 893 goto out; 894 } 895 896 dmap = cmd->current_dmap; 897 for (region_nr = start; region_nr < (start + nr_regions); region_nr++) { 898 if (!test_bit(region_nr, cmd->region_map)) { 899 word = region_nr / BITS_PER_LONG; 900 __set_bit(word, dmap->dirty_words); 901 __set_bit(region_nr, dmap->dirty_regions); 902 __set_bit(region_nr, cmd->region_map); 903 dmap->changed = 1; 904 } 905 } 906out: 907 spin_unlock_irq(&cmd->bitmap_lock); 908 909 return r; 910} 911 912/* 913 * WARNING: This must not be called concurrently with either 914 * dm_clone_set_region_hydrated() or dm_clone_cond_set_range(), as it changes 915 * cmd->region_map without taking the cmd->bitmap_lock spinlock. The only 916 * exception is after setting the metadata to read-only mode, using 917 * dm_clone_metadata_set_read_only(). 918 * 919 * We don't take the spinlock because __load_bitset_in_core() does I/O, so it 920 * may block. 921 */ 922int dm_clone_reload_in_core_bitset(struct dm_clone_metadata *cmd) 923{ 924 int r = -EINVAL; 925 926 down_write(&cmd->lock); 927 928 if (cmd->fail_io) 929 goto out; 930 931 r = __load_bitset_in_core(cmd); 932out: 933 up_write(&cmd->lock); 934 935 return r; 936} 937 938bool dm_clone_changed_this_transaction(struct dm_clone_metadata *cmd) 939{ 940 bool r; 941 unsigned long flags; 942 943 spin_lock_irqsave(&cmd->bitmap_lock, flags); 944 r = cmd->dmap[0].changed || cmd->dmap[1].changed; 945 spin_unlock_irqrestore(&cmd->bitmap_lock, flags); 946 947 return r; 948} 949 950int dm_clone_metadata_abort(struct dm_clone_metadata *cmd) 951{ 952 int r = -EPERM; 953 954 down_write(&cmd->lock); 955 956 if (cmd->fail_io || dm_bm_is_read_only(cmd->bm)) 957 goto out; 958 959 __destroy_persistent_data_structures(cmd); 960 961 r = __create_persistent_data_structures(cmd, false); 962 if (r) { 963 /* If something went wrong we can neither write nor read the metadata */ 964 cmd->fail_io = true; 965 } 966out: 967 up_write(&cmd->lock); 968 969 return r; 970} 971 972void dm_clone_metadata_set_read_only(struct dm_clone_metadata *cmd) 973{ 974 down_write(&cmd->lock); 975 976 spin_lock_irq(&cmd->bitmap_lock); 977 cmd->read_only = 1; 978 spin_unlock_irq(&cmd->bitmap_lock); 979 980 if (!cmd->fail_io) 981 dm_bm_set_read_only(cmd->bm); 982 983 up_write(&cmd->lock); 984} 985 986void dm_clone_metadata_set_read_write(struct dm_clone_metadata *cmd) 987{ 988 down_write(&cmd->lock); 989 990 spin_lock_irq(&cmd->bitmap_lock); 991 cmd->read_only = 0; 992 spin_unlock_irq(&cmd->bitmap_lock); 993 994 if (!cmd->fail_io) 995 dm_bm_set_read_write(cmd->bm); 996 997 up_write(&cmd->lock); 998} 999 1000int dm_clone_get_free_metadata_block_count(struct dm_clone_metadata *cmd, 1001 dm_block_t *result) 1002{ 1003 int r = -EINVAL; 1004 1005 down_read(&cmd->lock); 1006 1007 if (!cmd->fail_io) 1008 r = dm_sm_get_nr_free(cmd->sm, result); 1009 1010 up_read(&cmd->lock); 1011 1012 return r; 1013} 1014 1015int dm_clone_get_metadata_dev_size(struct dm_clone_metadata *cmd, 1016 dm_block_t *result) 1017{ 1018 int r = -EINVAL; 1019 1020 down_read(&cmd->lock); 1021 1022 if (!cmd->fail_io) 1023 r = dm_sm_get_nr_blocks(cmd->sm, result); 1024 1025 up_read(&cmd->lock); 1026 1027 return r; 1028}