dm-bufio.c (52120B)
1/* 2 * Copyright (C) 2009-2011 Red Hat, Inc. 3 * 4 * Author: Mikulas Patocka <mpatocka@redhat.com> 5 * 6 * This file is released under the GPL. 7 */ 8 9#include <linux/dm-bufio.h> 10 11#include <linux/device-mapper.h> 12#include <linux/dm-io.h> 13#include <linux/slab.h> 14#include <linux/sched/mm.h> 15#include <linux/jiffies.h> 16#include <linux/vmalloc.h> 17#include <linux/shrinker.h> 18#include <linux/module.h> 19#include <linux/rbtree.h> 20#include <linux/stacktrace.h> 21 22#define DM_MSG_PREFIX "bufio" 23 24/* 25 * Memory management policy: 26 * Limit the number of buffers to DM_BUFIO_MEMORY_PERCENT of main memory 27 * or DM_BUFIO_VMALLOC_PERCENT of vmalloc memory (whichever is lower). 28 * Always allocate at least DM_BUFIO_MIN_BUFFERS buffers. 29 * Start background writeback when there are DM_BUFIO_WRITEBACK_PERCENT 30 * dirty buffers. 31 */ 32#define DM_BUFIO_MIN_BUFFERS 8 33 34#define DM_BUFIO_MEMORY_PERCENT 2 35#define DM_BUFIO_VMALLOC_PERCENT 25 36#define DM_BUFIO_WRITEBACK_RATIO 3 37#define DM_BUFIO_LOW_WATERMARK_RATIO 16 38 39/* 40 * Check buffer ages in this interval (seconds) 41 */ 42#define DM_BUFIO_WORK_TIMER_SECS 30 43 44/* 45 * Free buffers when they are older than this (seconds) 46 */ 47#define DM_BUFIO_DEFAULT_AGE_SECS 300 48 49/* 50 * The nr of bytes of cached data to keep around. 51 */ 52#define DM_BUFIO_DEFAULT_RETAIN_BYTES (256 * 1024) 53 54/* 55 * Align buffer writes to this boundary. 56 * Tests show that SSDs have the highest IOPS when using 4k writes. 57 */ 58#define DM_BUFIO_WRITE_ALIGN 4096 59 60/* 61 * dm_buffer->list_mode 62 */ 63#define LIST_CLEAN 0 64#define LIST_DIRTY 1 65#define LIST_SIZE 2 66 67/* 68 * Linking of buffers: 69 * All buffers are linked to buffer_tree with their node field. 70 * 71 * Clean buffers that are not being written (B_WRITING not set) 72 * are linked to lru[LIST_CLEAN] with their lru_list field. 73 * 74 * Dirty and clean buffers that are being written are linked to 75 * lru[LIST_DIRTY] with their lru_list field. When the write 76 * finishes, the buffer cannot be relinked immediately (because we 77 * are in an interrupt context and relinking requires process 78 * context), so some clean-not-writing buffers can be held on 79 * dirty_lru too. They are later added to lru in the process 80 * context. 81 */ 82struct dm_bufio_client { 83 struct mutex lock; 84 85 struct list_head lru[LIST_SIZE]; 86 unsigned long n_buffers[LIST_SIZE]; 87 88 struct block_device *bdev; 89 unsigned block_size; 90 s8 sectors_per_block_bits; 91 void (*alloc_callback)(struct dm_buffer *); 92 void (*write_callback)(struct dm_buffer *); 93 94 struct kmem_cache *slab_buffer; 95 struct kmem_cache *slab_cache; 96 struct dm_io_client *dm_io; 97 98 struct list_head reserved_buffers; 99 unsigned need_reserved_buffers; 100 101 unsigned minimum_buffers; 102 103 struct rb_root buffer_tree; 104 wait_queue_head_t free_buffer_wait; 105 106 sector_t start; 107 108 int async_write_error; 109 110 struct list_head client_list; 111 112 struct shrinker shrinker; 113 struct work_struct shrink_work; 114 atomic_long_t need_shrink; 115}; 116 117/* 118 * Buffer state bits. 119 */ 120#define B_READING 0 121#define B_WRITING 1 122#define B_DIRTY 2 123 124/* 125 * Describes how the block was allocated: 126 * kmem_cache_alloc(), __get_free_pages() or vmalloc(). 127 * See the comment at alloc_buffer_data. 128 */ 129enum data_mode { 130 DATA_MODE_SLAB = 0, 131 DATA_MODE_GET_FREE_PAGES = 1, 132 DATA_MODE_VMALLOC = 2, 133 DATA_MODE_LIMIT = 3 134}; 135 136struct dm_buffer { 137 struct rb_node node; 138 struct list_head lru_list; 139 struct list_head global_list; 140 sector_t block; 141 void *data; 142 unsigned char data_mode; /* DATA_MODE_* */ 143 unsigned char list_mode; /* LIST_* */ 144 blk_status_t read_error; 145 blk_status_t write_error; 146 unsigned accessed; 147 unsigned hold_count; 148 unsigned long state; 149 unsigned long last_accessed; 150 unsigned dirty_start; 151 unsigned dirty_end; 152 unsigned write_start; 153 unsigned write_end; 154 struct dm_bufio_client *c; 155 struct list_head write_list; 156 void (*end_io)(struct dm_buffer *, blk_status_t); 157#ifdef CONFIG_DM_DEBUG_BLOCK_STACK_TRACING 158#define MAX_STACK 10 159 unsigned int stack_len; 160 unsigned long stack_entries[MAX_STACK]; 161#endif 162}; 163 164/*----------------------------------------------------------------*/ 165 166#define dm_bufio_in_request() (!!current->bio_list) 167 168static void dm_bufio_lock(struct dm_bufio_client *c) 169{ 170 mutex_lock_nested(&c->lock, dm_bufio_in_request()); 171} 172 173static int dm_bufio_trylock(struct dm_bufio_client *c) 174{ 175 return mutex_trylock(&c->lock); 176} 177 178static void dm_bufio_unlock(struct dm_bufio_client *c) 179{ 180 mutex_unlock(&c->lock); 181} 182 183/*----------------------------------------------------------------*/ 184 185/* 186 * Default cache size: available memory divided by the ratio. 187 */ 188static unsigned long dm_bufio_default_cache_size; 189 190/* 191 * Total cache size set by the user. 192 */ 193static unsigned long dm_bufio_cache_size; 194 195/* 196 * A copy of dm_bufio_cache_size because dm_bufio_cache_size can change 197 * at any time. If it disagrees, the user has changed cache size. 198 */ 199static unsigned long dm_bufio_cache_size_latch; 200 201static DEFINE_SPINLOCK(global_spinlock); 202 203static LIST_HEAD(global_queue); 204 205static unsigned long global_num = 0; 206 207/* 208 * Buffers are freed after this timeout 209 */ 210static unsigned dm_bufio_max_age = DM_BUFIO_DEFAULT_AGE_SECS; 211static unsigned long dm_bufio_retain_bytes = DM_BUFIO_DEFAULT_RETAIN_BYTES; 212 213static unsigned long dm_bufio_peak_allocated; 214static unsigned long dm_bufio_allocated_kmem_cache; 215static unsigned long dm_bufio_allocated_get_free_pages; 216static unsigned long dm_bufio_allocated_vmalloc; 217static unsigned long dm_bufio_current_allocated; 218 219/*----------------------------------------------------------------*/ 220 221/* 222 * The current number of clients. 223 */ 224static int dm_bufio_client_count; 225 226/* 227 * The list of all clients. 228 */ 229static LIST_HEAD(dm_bufio_all_clients); 230 231/* 232 * This mutex protects dm_bufio_cache_size_latch and dm_bufio_client_count 233 */ 234static DEFINE_MUTEX(dm_bufio_clients_lock); 235 236static struct workqueue_struct *dm_bufio_wq; 237static struct delayed_work dm_bufio_cleanup_old_work; 238static struct work_struct dm_bufio_replacement_work; 239 240 241#ifdef CONFIG_DM_DEBUG_BLOCK_STACK_TRACING 242static void buffer_record_stack(struct dm_buffer *b) 243{ 244 b->stack_len = stack_trace_save(b->stack_entries, MAX_STACK, 2); 245} 246#endif 247 248/*---------------------------------------------------------------- 249 * A red/black tree acts as an index for all the buffers. 250 *--------------------------------------------------------------*/ 251static struct dm_buffer *__find(struct dm_bufio_client *c, sector_t block) 252{ 253 struct rb_node *n = c->buffer_tree.rb_node; 254 struct dm_buffer *b; 255 256 while (n) { 257 b = container_of(n, struct dm_buffer, node); 258 259 if (b->block == block) 260 return b; 261 262 n = block < b->block ? n->rb_left : n->rb_right; 263 } 264 265 return NULL; 266} 267 268static struct dm_buffer *__find_next(struct dm_bufio_client *c, sector_t block) 269{ 270 struct rb_node *n = c->buffer_tree.rb_node; 271 struct dm_buffer *b; 272 struct dm_buffer *best = NULL; 273 274 while (n) { 275 b = container_of(n, struct dm_buffer, node); 276 277 if (b->block == block) 278 return b; 279 280 if (block <= b->block) { 281 n = n->rb_left; 282 best = b; 283 } else { 284 n = n->rb_right; 285 } 286 } 287 288 return best; 289} 290 291static void __insert(struct dm_bufio_client *c, struct dm_buffer *b) 292{ 293 struct rb_node **new = &c->buffer_tree.rb_node, *parent = NULL; 294 struct dm_buffer *found; 295 296 while (*new) { 297 found = container_of(*new, struct dm_buffer, node); 298 299 if (found->block == b->block) { 300 BUG_ON(found != b); 301 return; 302 } 303 304 parent = *new; 305 new = b->block < found->block ? 306 &found->node.rb_left : &found->node.rb_right; 307 } 308 309 rb_link_node(&b->node, parent, new); 310 rb_insert_color(&b->node, &c->buffer_tree); 311} 312 313static void __remove(struct dm_bufio_client *c, struct dm_buffer *b) 314{ 315 rb_erase(&b->node, &c->buffer_tree); 316} 317 318/*----------------------------------------------------------------*/ 319 320static void adjust_total_allocated(struct dm_buffer *b, bool unlink) 321{ 322 unsigned char data_mode; 323 long diff; 324 325 static unsigned long * const class_ptr[DATA_MODE_LIMIT] = { 326 &dm_bufio_allocated_kmem_cache, 327 &dm_bufio_allocated_get_free_pages, 328 &dm_bufio_allocated_vmalloc, 329 }; 330 331 data_mode = b->data_mode; 332 diff = (long)b->c->block_size; 333 if (unlink) 334 diff = -diff; 335 336 spin_lock(&global_spinlock); 337 338 *class_ptr[data_mode] += diff; 339 340 dm_bufio_current_allocated += diff; 341 342 if (dm_bufio_current_allocated > dm_bufio_peak_allocated) 343 dm_bufio_peak_allocated = dm_bufio_current_allocated; 344 345 b->accessed = 1; 346 347 if (!unlink) { 348 list_add(&b->global_list, &global_queue); 349 global_num++; 350 if (dm_bufio_current_allocated > dm_bufio_cache_size) 351 queue_work(dm_bufio_wq, &dm_bufio_replacement_work); 352 } else { 353 list_del(&b->global_list); 354 global_num--; 355 } 356 357 spin_unlock(&global_spinlock); 358} 359 360/* 361 * Change the number of clients and recalculate per-client limit. 362 */ 363static void __cache_size_refresh(void) 364{ 365 BUG_ON(!mutex_is_locked(&dm_bufio_clients_lock)); 366 BUG_ON(dm_bufio_client_count < 0); 367 368 dm_bufio_cache_size_latch = READ_ONCE(dm_bufio_cache_size); 369 370 /* 371 * Use default if set to 0 and report the actual cache size used. 372 */ 373 if (!dm_bufio_cache_size_latch) { 374 (void)cmpxchg(&dm_bufio_cache_size, 0, 375 dm_bufio_default_cache_size); 376 dm_bufio_cache_size_latch = dm_bufio_default_cache_size; 377 } 378} 379 380/* 381 * Allocating buffer data. 382 * 383 * Small buffers are allocated with kmem_cache, to use space optimally. 384 * 385 * For large buffers, we choose between get_free_pages and vmalloc. 386 * Each has advantages and disadvantages. 387 * 388 * __get_free_pages can randomly fail if the memory is fragmented. 389 * __vmalloc won't randomly fail, but vmalloc space is limited (it may be 390 * as low as 128M) so using it for caching is not appropriate. 391 * 392 * If the allocation may fail we use __get_free_pages. Memory fragmentation 393 * won't have a fatal effect here, but it just causes flushes of some other 394 * buffers and more I/O will be performed. Don't use __get_free_pages if it 395 * always fails (i.e. order >= MAX_ORDER). 396 * 397 * If the allocation shouldn't fail we use __vmalloc. This is only for the 398 * initial reserve allocation, so there's no risk of wasting all vmalloc 399 * space. 400 */ 401static void *alloc_buffer_data(struct dm_bufio_client *c, gfp_t gfp_mask, 402 unsigned char *data_mode) 403{ 404 if (unlikely(c->slab_cache != NULL)) { 405 *data_mode = DATA_MODE_SLAB; 406 return kmem_cache_alloc(c->slab_cache, gfp_mask); 407 } 408 409 if (c->block_size <= KMALLOC_MAX_SIZE && 410 gfp_mask & __GFP_NORETRY) { 411 *data_mode = DATA_MODE_GET_FREE_PAGES; 412 return (void *)__get_free_pages(gfp_mask, 413 c->sectors_per_block_bits - (PAGE_SHIFT - SECTOR_SHIFT)); 414 } 415 416 *data_mode = DATA_MODE_VMALLOC; 417 418 /* 419 * __vmalloc allocates the data pages and auxiliary structures with 420 * gfp_flags that were specified, but pagetables are always allocated 421 * with GFP_KERNEL, no matter what was specified as gfp_mask. 422 * 423 * Consequently, we must set per-process flag PF_MEMALLOC_NOIO so that 424 * all allocations done by this process (including pagetables) are done 425 * as if GFP_NOIO was specified. 426 */ 427 if (gfp_mask & __GFP_NORETRY) { 428 unsigned noio_flag = memalloc_noio_save(); 429 void *ptr = __vmalloc(c->block_size, gfp_mask); 430 431 memalloc_noio_restore(noio_flag); 432 return ptr; 433 } 434 435 return __vmalloc(c->block_size, gfp_mask); 436} 437 438/* 439 * Free buffer's data. 440 */ 441static void free_buffer_data(struct dm_bufio_client *c, 442 void *data, unsigned char data_mode) 443{ 444 switch (data_mode) { 445 case DATA_MODE_SLAB: 446 kmem_cache_free(c->slab_cache, data); 447 break; 448 449 case DATA_MODE_GET_FREE_PAGES: 450 free_pages((unsigned long)data, 451 c->sectors_per_block_bits - (PAGE_SHIFT - SECTOR_SHIFT)); 452 break; 453 454 case DATA_MODE_VMALLOC: 455 vfree(data); 456 break; 457 458 default: 459 DMCRIT("dm_bufio_free_buffer_data: bad data mode: %d", 460 data_mode); 461 BUG(); 462 } 463} 464 465/* 466 * Allocate buffer and its data. 467 */ 468static struct dm_buffer *alloc_buffer(struct dm_bufio_client *c, gfp_t gfp_mask) 469{ 470 struct dm_buffer *b = kmem_cache_alloc(c->slab_buffer, gfp_mask); 471 472 if (!b) 473 return NULL; 474 475 b->c = c; 476 477 b->data = alloc_buffer_data(c, gfp_mask, &b->data_mode); 478 if (!b->data) { 479 kmem_cache_free(c->slab_buffer, b); 480 return NULL; 481 } 482 483#ifdef CONFIG_DM_DEBUG_BLOCK_STACK_TRACING 484 b->stack_len = 0; 485#endif 486 return b; 487} 488 489/* 490 * Free buffer and its data. 491 */ 492static void free_buffer(struct dm_buffer *b) 493{ 494 struct dm_bufio_client *c = b->c; 495 496 free_buffer_data(c, b->data, b->data_mode); 497 kmem_cache_free(c->slab_buffer, b); 498} 499 500/* 501 * Link buffer to the buffer tree and clean or dirty queue. 502 */ 503static void __link_buffer(struct dm_buffer *b, sector_t block, int dirty) 504{ 505 struct dm_bufio_client *c = b->c; 506 507 c->n_buffers[dirty]++; 508 b->block = block; 509 b->list_mode = dirty; 510 list_add(&b->lru_list, &c->lru[dirty]); 511 __insert(b->c, b); 512 b->last_accessed = jiffies; 513 514 adjust_total_allocated(b, false); 515} 516 517/* 518 * Unlink buffer from the buffer tree and dirty or clean queue. 519 */ 520static void __unlink_buffer(struct dm_buffer *b) 521{ 522 struct dm_bufio_client *c = b->c; 523 524 BUG_ON(!c->n_buffers[b->list_mode]); 525 526 c->n_buffers[b->list_mode]--; 527 __remove(b->c, b); 528 list_del(&b->lru_list); 529 530 adjust_total_allocated(b, true); 531} 532 533/* 534 * Place the buffer to the head of dirty or clean LRU queue. 535 */ 536static void __relink_lru(struct dm_buffer *b, int dirty) 537{ 538 struct dm_bufio_client *c = b->c; 539 540 b->accessed = 1; 541 542 BUG_ON(!c->n_buffers[b->list_mode]); 543 544 c->n_buffers[b->list_mode]--; 545 c->n_buffers[dirty]++; 546 b->list_mode = dirty; 547 list_move(&b->lru_list, &c->lru[dirty]); 548 b->last_accessed = jiffies; 549} 550 551/*---------------------------------------------------------------- 552 * Submit I/O on the buffer. 553 * 554 * Bio interface is faster but it has some problems: 555 * the vector list is limited (increasing this limit increases 556 * memory-consumption per buffer, so it is not viable); 557 * 558 * the memory must be direct-mapped, not vmalloced; 559 * 560 * If the buffer is small enough (up to DM_BUFIO_INLINE_VECS pages) and 561 * it is not vmalloced, try using the bio interface. 562 * 563 * If the buffer is big, if it is vmalloced or if the underlying device 564 * rejects the bio because it is too large, use dm-io layer to do the I/O. 565 * The dm-io layer splits the I/O into multiple requests, avoiding the above 566 * shortcomings. 567 *--------------------------------------------------------------*/ 568 569/* 570 * dm-io completion routine. It just calls b->bio.bi_end_io, pretending 571 * that the request was handled directly with bio interface. 572 */ 573static void dmio_complete(unsigned long error, void *context) 574{ 575 struct dm_buffer *b = context; 576 577 b->end_io(b, unlikely(error != 0) ? BLK_STS_IOERR : 0); 578} 579 580static void use_dmio(struct dm_buffer *b, int rw, sector_t sector, 581 unsigned n_sectors, unsigned offset) 582{ 583 int r; 584 struct dm_io_request io_req = { 585 .bi_op = rw, 586 .bi_op_flags = 0, 587 .notify.fn = dmio_complete, 588 .notify.context = b, 589 .client = b->c->dm_io, 590 }; 591 struct dm_io_region region = { 592 .bdev = b->c->bdev, 593 .sector = sector, 594 .count = n_sectors, 595 }; 596 597 if (b->data_mode != DATA_MODE_VMALLOC) { 598 io_req.mem.type = DM_IO_KMEM; 599 io_req.mem.ptr.addr = (char *)b->data + offset; 600 } else { 601 io_req.mem.type = DM_IO_VMA; 602 io_req.mem.ptr.vma = (char *)b->data + offset; 603 } 604 605 r = dm_io(&io_req, 1, ®ion, NULL); 606 if (unlikely(r)) 607 b->end_io(b, errno_to_blk_status(r)); 608} 609 610static void bio_complete(struct bio *bio) 611{ 612 struct dm_buffer *b = bio->bi_private; 613 blk_status_t status = bio->bi_status; 614 bio_uninit(bio); 615 kfree(bio); 616 b->end_io(b, status); 617} 618 619static void use_bio(struct dm_buffer *b, int rw, sector_t sector, 620 unsigned n_sectors, unsigned offset) 621{ 622 struct bio *bio; 623 char *ptr; 624 unsigned vec_size, len; 625 626 vec_size = b->c->block_size >> PAGE_SHIFT; 627 if (unlikely(b->c->sectors_per_block_bits < PAGE_SHIFT - SECTOR_SHIFT)) 628 vec_size += 2; 629 630 bio = bio_kmalloc(vec_size, GFP_NOWAIT | __GFP_NORETRY | __GFP_NOWARN); 631 if (!bio) { 632dmio: 633 use_dmio(b, rw, sector, n_sectors, offset); 634 return; 635 } 636 bio_init(bio, b->c->bdev, bio->bi_inline_vecs, vec_size, rw); 637 bio->bi_iter.bi_sector = sector; 638 bio->bi_end_io = bio_complete; 639 bio->bi_private = b; 640 641 ptr = (char *)b->data + offset; 642 len = n_sectors << SECTOR_SHIFT; 643 644 do { 645 unsigned this_step = min((unsigned)(PAGE_SIZE - offset_in_page(ptr)), len); 646 if (!bio_add_page(bio, virt_to_page(ptr), this_step, 647 offset_in_page(ptr))) { 648 bio_put(bio); 649 goto dmio; 650 } 651 652 len -= this_step; 653 ptr += this_step; 654 } while (len > 0); 655 656 submit_bio(bio); 657} 658 659static inline sector_t block_to_sector(struct dm_bufio_client *c, sector_t block) 660{ 661 sector_t sector; 662 663 if (likely(c->sectors_per_block_bits >= 0)) 664 sector = block << c->sectors_per_block_bits; 665 else 666 sector = block * (c->block_size >> SECTOR_SHIFT); 667 sector += c->start; 668 669 return sector; 670} 671 672static void submit_io(struct dm_buffer *b, int rw, void (*end_io)(struct dm_buffer *, blk_status_t)) 673{ 674 unsigned n_sectors; 675 sector_t sector; 676 unsigned offset, end; 677 678 b->end_io = end_io; 679 680 sector = block_to_sector(b->c, b->block); 681 682 if (rw != REQ_OP_WRITE) { 683 n_sectors = b->c->block_size >> SECTOR_SHIFT; 684 offset = 0; 685 } else { 686 if (b->c->write_callback) 687 b->c->write_callback(b); 688 offset = b->write_start; 689 end = b->write_end; 690 offset &= -DM_BUFIO_WRITE_ALIGN; 691 end += DM_BUFIO_WRITE_ALIGN - 1; 692 end &= -DM_BUFIO_WRITE_ALIGN; 693 if (unlikely(end > b->c->block_size)) 694 end = b->c->block_size; 695 696 sector += offset >> SECTOR_SHIFT; 697 n_sectors = (end - offset) >> SECTOR_SHIFT; 698 } 699 700 if (b->data_mode != DATA_MODE_VMALLOC) 701 use_bio(b, rw, sector, n_sectors, offset); 702 else 703 use_dmio(b, rw, sector, n_sectors, offset); 704} 705 706/*---------------------------------------------------------------- 707 * Writing dirty buffers 708 *--------------------------------------------------------------*/ 709 710/* 711 * The endio routine for write. 712 * 713 * Set the error, clear B_WRITING bit and wake anyone who was waiting on 714 * it. 715 */ 716static void write_endio(struct dm_buffer *b, blk_status_t status) 717{ 718 b->write_error = status; 719 if (unlikely(status)) { 720 struct dm_bufio_client *c = b->c; 721 722 (void)cmpxchg(&c->async_write_error, 0, 723 blk_status_to_errno(status)); 724 } 725 726 BUG_ON(!test_bit(B_WRITING, &b->state)); 727 728 smp_mb__before_atomic(); 729 clear_bit(B_WRITING, &b->state); 730 smp_mb__after_atomic(); 731 732 wake_up_bit(&b->state, B_WRITING); 733} 734 735/* 736 * Initiate a write on a dirty buffer, but don't wait for it. 737 * 738 * - If the buffer is not dirty, exit. 739 * - If there some previous write going on, wait for it to finish (we can't 740 * have two writes on the same buffer simultaneously). 741 * - Submit our write and don't wait on it. We set B_WRITING indicating 742 * that there is a write in progress. 743 */ 744static void __write_dirty_buffer(struct dm_buffer *b, 745 struct list_head *write_list) 746{ 747 if (!test_bit(B_DIRTY, &b->state)) 748 return; 749 750 clear_bit(B_DIRTY, &b->state); 751 wait_on_bit_lock_io(&b->state, B_WRITING, TASK_UNINTERRUPTIBLE); 752 753 b->write_start = b->dirty_start; 754 b->write_end = b->dirty_end; 755 756 if (!write_list) 757 submit_io(b, REQ_OP_WRITE, write_endio); 758 else 759 list_add_tail(&b->write_list, write_list); 760} 761 762static void __flush_write_list(struct list_head *write_list) 763{ 764 struct blk_plug plug; 765 blk_start_plug(&plug); 766 while (!list_empty(write_list)) { 767 struct dm_buffer *b = 768 list_entry(write_list->next, struct dm_buffer, write_list); 769 list_del(&b->write_list); 770 submit_io(b, REQ_OP_WRITE, write_endio); 771 cond_resched(); 772 } 773 blk_finish_plug(&plug); 774} 775 776/* 777 * Wait until any activity on the buffer finishes. Possibly write the 778 * buffer if it is dirty. When this function finishes, there is no I/O 779 * running on the buffer and the buffer is not dirty. 780 */ 781static void __make_buffer_clean(struct dm_buffer *b) 782{ 783 BUG_ON(b->hold_count); 784 785 if (!b->state) /* fast case */ 786 return; 787 788 wait_on_bit_io(&b->state, B_READING, TASK_UNINTERRUPTIBLE); 789 __write_dirty_buffer(b, NULL); 790 wait_on_bit_io(&b->state, B_WRITING, TASK_UNINTERRUPTIBLE); 791} 792 793/* 794 * Find some buffer that is not held by anybody, clean it, unlink it and 795 * return it. 796 */ 797static struct dm_buffer *__get_unclaimed_buffer(struct dm_bufio_client *c) 798{ 799 struct dm_buffer *b; 800 801 list_for_each_entry_reverse(b, &c->lru[LIST_CLEAN], lru_list) { 802 BUG_ON(test_bit(B_WRITING, &b->state)); 803 BUG_ON(test_bit(B_DIRTY, &b->state)); 804 805 if (!b->hold_count) { 806 __make_buffer_clean(b); 807 __unlink_buffer(b); 808 return b; 809 } 810 cond_resched(); 811 } 812 813 list_for_each_entry_reverse(b, &c->lru[LIST_DIRTY], lru_list) { 814 BUG_ON(test_bit(B_READING, &b->state)); 815 816 if (!b->hold_count) { 817 __make_buffer_clean(b); 818 __unlink_buffer(b); 819 return b; 820 } 821 cond_resched(); 822 } 823 824 return NULL; 825} 826 827/* 828 * Wait until some other threads free some buffer or release hold count on 829 * some buffer. 830 * 831 * This function is entered with c->lock held, drops it and regains it 832 * before exiting. 833 */ 834static void __wait_for_free_buffer(struct dm_bufio_client *c) 835{ 836 DECLARE_WAITQUEUE(wait, current); 837 838 add_wait_queue(&c->free_buffer_wait, &wait); 839 set_current_state(TASK_UNINTERRUPTIBLE); 840 dm_bufio_unlock(c); 841 842 io_schedule(); 843 844 remove_wait_queue(&c->free_buffer_wait, &wait); 845 846 dm_bufio_lock(c); 847} 848 849enum new_flag { 850 NF_FRESH = 0, 851 NF_READ = 1, 852 NF_GET = 2, 853 NF_PREFETCH = 3 854}; 855 856/* 857 * Allocate a new buffer. If the allocation is not possible, wait until 858 * some other thread frees a buffer. 859 * 860 * May drop the lock and regain it. 861 */ 862static struct dm_buffer *__alloc_buffer_wait_no_callback(struct dm_bufio_client *c, enum new_flag nf) 863{ 864 struct dm_buffer *b; 865 bool tried_noio_alloc = false; 866 867 /* 868 * dm-bufio is resistant to allocation failures (it just keeps 869 * one buffer reserved in cases all the allocations fail). 870 * So set flags to not try too hard: 871 * GFP_NOWAIT: don't wait; if we need to sleep we'll release our 872 * mutex and wait ourselves. 873 * __GFP_NORETRY: don't retry and rather return failure 874 * __GFP_NOMEMALLOC: don't use emergency reserves 875 * __GFP_NOWARN: don't print a warning in case of failure 876 * 877 * For debugging, if we set the cache size to 1, no new buffers will 878 * be allocated. 879 */ 880 while (1) { 881 if (dm_bufio_cache_size_latch != 1) { 882 b = alloc_buffer(c, GFP_NOWAIT | __GFP_NORETRY | __GFP_NOMEMALLOC | __GFP_NOWARN); 883 if (b) 884 return b; 885 } 886 887 if (nf == NF_PREFETCH) 888 return NULL; 889 890 if (dm_bufio_cache_size_latch != 1 && !tried_noio_alloc) { 891 dm_bufio_unlock(c); 892 b = alloc_buffer(c, GFP_NOIO | __GFP_NORETRY | __GFP_NOMEMALLOC | __GFP_NOWARN); 893 dm_bufio_lock(c); 894 if (b) 895 return b; 896 tried_noio_alloc = true; 897 } 898 899 if (!list_empty(&c->reserved_buffers)) { 900 b = list_entry(c->reserved_buffers.next, 901 struct dm_buffer, lru_list); 902 list_del(&b->lru_list); 903 c->need_reserved_buffers++; 904 905 return b; 906 } 907 908 b = __get_unclaimed_buffer(c); 909 if (b) 910 return b; 911 912 __wait_for_free_buffer(c); 913 } 914} 915 916static struct dm_buffer *__alloc_buffer_wait(struct dm_bufio_client *c, enum new_flag nf) 917{ 918 struct dm_buffer *b = __alloc_buffer_wait_no_callback(c, nf); 919 920 if (!b) 921 return NULL; 922 923 if (c->alloc_callback) 924 c->alloc_callback(b); 925 926 return b; 927} 928 929/* 930 * Free a buffer and wake other threads waiting for free buffers. 931 */ 932static void __free_buffer_wake(struct dm_buffer *b) 933{ 934 struct dm_bufio_client *c = b->c; 935 936 if (!c->need_reserved_buffers) 937 free_buffer(b); 938 else { 939 list_add(&b->lru_list, &c->reserved_buffers); 940 c->need_reserved_buffers--; 941 } 942 943 wake_up(&c->free_buffer_wait); 944} 945 946static void __write_dirty_buffers_async(struct dm_bufio_client *c, int no_wait, 947 struct list_head *write_list) 948{ 949 struct dm_buffer *b, *tmp; 950 951 list_for_each_entry_safe_reverse(b, tmp, &c->lru[LIST_DIRTY], lru_list) { 952 BUG_ON(test_bit(B_READING, &b->state)); 953 954 if (!test_bit(B_DIRTY, &b->state) && 955 !test_bit(B_WRITING, &b->state)) { 956 __relink_lru(b, LIST_CLEAN); 957 continue; 958 } 959 960 if (no_wait && test_bit(B_WRITING, &b->state)) 961 return; 962 963 __write_dirty_buffer(b, write_list); 964 cond_resched(); 965 } 966} 967 968/* 969 * Check if we're over watermark. 970 * If we are over threshold_buffers, start freeing buffers. 971 * If we're over "limit_buffers", block until we get under the limit. 972 */ 973static void __check_watermark(struct dm_bufio_client *c, 974 struct list_head *write_list) 975{ 976 if (c->n_buffers[LIST_DIRTY] > c->n_buffers[LIST_CLEAN] * DM_BUFIO_WRITEBACK_RATIO) 977 __write_dirty_buffers_async(c, 1, write_list); 978} 979 980/*---------------------------------------------------------------- 981 * Getting a buffer 982 *--------------------------------------------------------------*/ 983 984static struct dm_buffer *__bufio_new(struct dm_bufio_client *c, sector_t block, 985 enum new_flag nf, int *need_submit, 986 struct list_head *write_list) 987{ 988 struct dm_buffer *b, *new_b = NULL; 989 990 *need_submit = 0; 991 992 b = __find(c, block); 993 if (b) 994 goto found_buffer; 995 996 if (nf == NF_GET) 997 return NULL; 998 999 new_b = __alloc_buffer_wait(c, nf); 1000 if (!new_b) 1001 return NULL; 1002 1003 /* 1004 * We've had a period where the mutex was unlocked, so need to 1005 * recheck the buffer tree. 1006 */ 1007 b = __find(c, block); 1008 if (b) { 1009 __free_buffer_wake(new_b); 1010 goto found_buffer; 1011 } 1012 1013 __check_watermark(c, write_list); 1014 1015 b = new_b; 1016 b->hold_count = 1; 1017 b->read_error = 0; 1018 b->write_error = 0; 1019 __link_buffer(b, block, LIST_CLEAN); 1020 1021 if (nf == NF_FRESH) { 1022 b->state = 0; 1023 return b; 1024 } 1025 1026 b->state = 1 << B_READING; 1027 *need_submit = 1; 1028 1029 return b; 1030 1031found_buffer: 1032 if (nf == NF_PREFETCH) 1033 return NULL; 1034 /* 1035 * Note: it is essential that we don't wait for the buffer to be 1036 * read if dm_bufio_get function is used. Both dm_bufio_get and 1037 * dm_bufio_prefetch can be used in the driver request routine. 1038 * If the user called both dm_bufio_prefetch and dm_bufio_get on 1039 * the same buffer, it would deadlock if we waited. 1040 */ 1041 if (nf == NF_GET && unlikely(test_bit(B_READING, &b->state))) 1042 return NULL; 1043 1044 b->hold_count++; 1045 __relink_lru(b, test_bit(B_DIRTY, &b->state) || 1046 test_bit(B_WRITING, &b->state)); 1047 return b; 1048} 1049 1050/* 1051 * The endio routine for reading: set the error, clear the bit and wake up 1052 * anyone waiting on the buffer. 1053 */ 1054static void read_endio(struct dm_buffer *b, blk_status_t status) 1055{ 1056 b->read_error = status; 1057 1058 BUG_ON(!test_bit(B_READING, &b->state)); 1059 1060 smp_mb__before_atomic(); 1061 clear_bit(B_READING, &b->state); 1062 smp_mb__after_atomic(); 1063 1064 wake_up_bit(&b->state, B_READING); 1065} 1066 1067/* 1068 * A common routine for dm_bufio_new and dm_bufio_read. Operation of these 1069 * functions is similar except that dm_bufio_new doesn't read the 1070 * buffer from the disk (assuming that the caller overwrites all the data 1071 * and uses dm_bufio_mark_buffer_dirty to write new data back). 1072 */ 1073static void *new_read(struct dm_bufio_client *c, sector_t block, 1074 enum new_flag nf, struct dm_buffer **bp) 1075{ 1076 int need_submit; 1077 struct dm_buffer *b; 1078 1079 LIST_HEAD(write_list); 1080 1081 dm_bufio_lock(c); 1082 b = __bufio_new(c, block, nf, &need_submit, &write_list); 1083#ifdef CONFIG_DM_DEBUG_BLOCK_STACK_TRACING 1084 if (b && b->hold_count == 1) 1085 buffer_record_stack(b); 1086#endif 1087 dm_bufio_unlock(c); 1088 1089 __flush_write_list(&write_list); 1090 1091 if (!b) 1092 return NULL; 1093 1094 if (need_submit) 1095 submit_io(b, REQ_OP_READ, read_endio); 1096 1097 wait_on_bit_io(&b->state, B_READING, TASK_UNINTERRUPTIBLE); 1098 1099 if (b->read_error) { 1100 int error = blk_status_to_errno(b->read_error); 1101 1102 dm_bufio_release(b); 1103 1104 return ERR_PTR(error); 1105 } 1106 1107 *bp = b; 1108 1109 return b->data; 1110} 1111 1112void *dm_bufio_get(struct dm_bufio_client *c, sector_t block, 1113 struct dm_buffer **bp) 1114{ 1115 return new_read(c, block, NF_GET, bp); 1116} 1117EXPORT_SYMBOL_GPL(dm_bufio_get); 1118 1119void *dm_bufio_read(struct dm_bufio_client *c, sector_t block, 1120 struct dm_buffer **bp) 1121{ 1122 BUG_ON(dm_bufio_in_request()); 1123 1124 return new_read(c, block, NF_READ, bp); 1125} 1126EXPORT_SYMBOL_GPL(dm_bufio_read); 1127 1128void *dm_bufio_new(struct dm_bufio_client *c, sector_t block, 1129 struct dm_buffer **bp) 1130{ 1131 BUG_ON(dm_bufio_in_request()); 1132 1133 return new_read(c, block, NF_FRESH, bp); 1134} 1135EXPORT_SYMBOL_GPL(dm_bufio_new); 1136 1137void dm_bufio_prefetch(struct dm_bufio_client *c, 1138 sector_t block, unsigned n_blocks) 1139{ 1140 struct blk_plug plug; 1141 1142 LIST_HEAD(write_list); 1143 1144 BUG_ON(dm_bufio_in_request()); 1145 1146 blk_start_plug(&plug); 1147 dm_bufio_lock(c); 1148 1149 for (; n_blocks--; block++) { 1150 int need_submit; 1151 struct dm_buffer *b; 1152 b = __bufio_new(c, block, NF_PREFETCH, &need_submit, 1153 &write_list); 1154 if (unlikely(!list_empty(&write_list))) { 1155 dm_bufio_unlock(c); 1156 blk_finish_plug(&plug); 1157 __flush_write_list(&write_list); 1158 blk_start_plug(&plug); 1159 dm_bufio_lock(c); 1160 } 1161 if (unlikely(b != NULL)) { 1162 dm_bufio_unlock(c); 1163 1164 if (need_submit) 1165 submit_io(b, REQ_OP_READ, read_endio); 1166 dm_bufio_release(b); 1167 1168 cond_resched(); 1169 1170 if (!n_blocks) 1171 goto flush_plug; 1172 dm_bufio_lock(c); 1173 } 1174 } 1175 1176 dm_bufio_unlock(c); 1177 1178flush_plug: 1179 blk_finish_plug(&plug); 1180} 1181EXPORT_SYMBOL_GPL(dm_bufio_prefetch); 1182 1183void dm_bufio_release(struct dm_buffer *b) 1184{ 1185 struct dm_bufio_client *c = b->c; 1186 1187 dm_bufio_lock(c); 1188 1189 BUG_ON(!b->hold_count); 1190 1191 b->hold_count--; 1192 if (!b->hold_count) { 1193 wake_up(&c->free_buffer_wait); 1194 1195 /* 1196 * If there were errors on the buffer, and the buffer is not 1197 * to be written, free the buffer. There is no point in caching 1198 * invalid buffer. 1199 */ 1200 if ((b->read_error || b->write_error) && 1201 !test_bit(B_READING, &b->state) && 1202 !test_bit(B_WRITING, &b->state) && 1203 !test_bit(B_DIRTY, &b->state)) { 1204 __unlink_buffer(b); 1205 __free_buffer_wake(b); 1206 } 1207 } 1208 1209 dm_bufio_unlock(c); 1210} 1211EXPORT_SYMBOL_GPL(dm_bufio_release); 1212 1213void dm_bufio_mark_partial_buffer_dirty(struct dm_buffer *b, 1214 unsigned start, unsigned end) 1215{ 1216 struct dm_bufio_client *c = b->c; 1217 1218 BUG_ON(start >= end); 1219 BUG_ON(end > b->c->block_size); 1220 1221 dm_bufio_lock(c); 1222 1223 BUG_ON(test_bit(B_READING, &b->state)); 1224 1225 if (!test_and_set_bit(B_DIRTY, &b->state)) { 1226 b->dirty_start = start; 1227 b->dirty_end = end; 1228 __relink_lru(b, LIST_DIRTY); 1229 } else { 1230 if (start < b->dirty_start) 1231 b->dirty_start = start; 1232 if (end > b->dirty_end) 1233 b->dirty_end = end; 1234 } 1235 1236 dm_bufio_unlock(c); 1237} 1238EXPORT_SYMBOL_GPL(dm_bufio_mark_partial_buffer_dirty); 1239 1240void dm_bufio_mark_buffer_dirty(struct dm_buffer *b) 1241{ 1242 dm_bufio_mark_partial_buffer_dirty(b, 0, b->c->block_size); 1243} 1244EXPORT_SYMBOL_GPL(dm_bufio_mark_buffer_dirty); 1245 1246void dm_bufio_write_dirty_buffers_async(struct dm_bufio_client *c) 1247{ 1248 LIST_HEAD(write_list); 1249 1250 BUG_ON(dm_bufio_in_request()); 1251 1252 dm_bufio_lock(c); 1253 __write_dirty_buffers_async(c, 0, &write_list); 1254 dm_bufio_unlock(c); 1255 __flush_write_list(&write_list); 1256} 1257EXPORT_SYMBOL_GPL(dm_bufio_write_dirty_buffers_async); 1258 1259/* 1260 * For performance, it is essential that the buffers are written asynchronously 1261 * and simultaneously (so that the block layer can merge the writes) and then 1262 * waited upon. 1263 * 1264 * Finally, we flush hardware disk cache. 1265 */ 1266int dm_bufio_write_dirty_buffers(struct dm_bufio_client *c) 1267{ 1268 int a, f; 1269 unsigned long buffers_processed = 0; 1270 struct dm_buffer *b, *tmp; 1271 1272 LIST_HEAD(write_list); 1273 1274 dm_bufio_lock(c); 1275 __write_dirty_buffers_async(c, 0, &write_list); 1276 dm_bufio_unlock(c); 1277 __flush_write_list(&write_list); 1278 dm_bufio_lock(c); 1279 1280again: 1281 list_for_each_entry_safe_reverse(b, tmp, &c->lru[LIST_DIRTY], lru_list) { 1282 int dropped_lock = 0; 1283 1284 if (buffers_processed < c->n_buffers[LIST_DIRTY]) 1285 buffers_processed++; 1286 1287 BUG_ON(test_bit(B_READING, &b->state)); 1288 1289 if (test_bit(B_WRITING, &b->state)) { 1290 if (buffers_processed < c->n_buffers[LIST_DIRTY]) { 1291 dropped_lock = 1; 1292 b->hold_count++; 1293 dm_bufio_unlock(c); 1294 wait_on_bit_io(&b->state, B_WRITING, 1295 TASK_UNINTERRUPTIBLE); 1296 dm_bufio_lock(c); 1297 b->hold_count--; 1298 } else 1299 wait_on_bit_io(&b->state, B_WRITING, 1300 TASK_UNINTERRUPTIBLE); 1301 } 1302 1303 if (!test_bit(B_DIRTY, &b->state) && 1304 !test_bit(B_WRITING, &b->state)) 1305 __relink_lru(b, LIST_CLEAN); 1306 1307 cond_resched(); 1308 1309 /* 1310 * If we dropped the lock, the list is no longer consistent, 1311 * so we must restart the search. 1312 * 1313 * In the most common case, the buffer just processed is 1314 * relinked to the clean list, so we won't loop scanning the 1315 * same buffer again and again. 1316 * 1317 * This may livelock if there is another thread simultaneously 1318 * dirtying buffers, so we count the number of buffers walked 1319 * and if it exceeds the total number of buffers, it means that 1320 * someone is doing some writes simultaneously with us. In 1321 * this case, stop, dropping the lock. 1322 */ 1323 if (dropped_lock) 1324 goto again; 1325 } 1326 wake_up(&c->free_buffer_wait); 1327 dm_bufio_unlock(c); 1328 1329 a = xchg(&c->async_write_error, 0); 1330 f = dm_bufio_issue_flush(c); 1331 if (a) 1332 return a; 1333 1334 return f; 1335} 1336EXPORT_SYMBOL_GPL(dm_bufio_write_dirty_buffers); 1337 1338/* 1339 * Use dm-io to send an empty barrier to flush the device. 1340 */ 1341int dm_bufio_issue_flush(struct dm_bufio_client *c) 1342{ 1343 struct dm_io_request io_req = { 1344 .bi_op = REQ_OP_WRITE, 1345 .bi_op_flags = REQ_PREFLUSH | REQ_SYNC, 1346 .mem.type = DM_IO_KMEM, 1347 .mem.ptr.addr = NULL, 1348 .client = c->dm_io, 1349 }; 1350 struct dm_io_region io_reg = { 1351 .bdev = c->bdev, 1352 .sector = 0, 1353 .count = 0, 1354 }; 1355 1356 BUG_ON(dm_bufio_in_request()); 1357 1358 return dm_io(&io_req, 1, &io_reg, NULL); 1359} 1360EXPORT_SYMBOL_GPL(dm_bufio_issue_flush); 1361 1362/* 1363 * Use dm-io to send a discard request to flush the device. 1364 */ 1365int dm_bufio_issue_discard(struct dm_bufio_client *c, sector_t block, sector_t count) 1366{ 1367 struct dm_io_request io_req = { 1368 .bi_op = REQ_OP_DISCARD, 1369 .bi_op_flags = REQ_SYNC, 1370 .mem.type = DM_IO_KMEM, 1371 .mem.ptr.addr = NULL, 1372 .client = c->dm_io, 1373 }; 1374 struct dm_io_region io_reg = { 1375 .bdev = c->bdev, 1376 .sector = block_to_sector(c, block), 1377 .count = block_to_sector(c, count), 1378 }; 1379 1380 BUG_ON(dm_bufio_in_request()); 1381 1382 return dm_io(&io_req, 1, &io_reg, NULL); 1383} 1384EXPORT_SYMBOL_GPL(dm_bufio_issue_discard); 1385 1386/* 1387 * We first delete any other buffer that may be at that new location. 1388 * 1389 * Then, we write the buffer to the original location if it was dirty. 1390 * 1391 * Then, if we are the only one who is holding the buffer, relink the buffer 1392 * in the buffer tree for the new location. 1393 * 1394 * If there was someone else holding the buffer, we write it to the new 1395 * location but not relink it, because that other user needs to have the buffer 1396 * at the same place. 1397 */ 1398void dm_bufio_release_move(struct dm_buffer *b, sector_t new_block) 1399{ 1400 struct dm_bufio_client *c = b->c; 1401 struct dm_buffer *new; 1402 1403 BUG_ON(dm_bufio_in_request()); 1404 1405 dm_bufio_lock(c); 1406 1407retry: 1408 new = __find(c, new_block); 1409 if (new) { 1410 if (new->hold_count) { 1411 __wait_for_free_buffer(c); 1412 goto retry; 1413 } 1414 1415 /* 1416 * FIXME: Is there any point waiting for a write that's going 1417 * to be overwritten in a bit? 1418 */ 1419 __make_buffer_clean(new); 1420 __unlink_buffer(new); 1421 __free_buffer_wake(new); 1422 } 1423 1424 BUG_ON(!b->hold_count); 1425 BUG_ON(test_bit(B_READING, &b->state)); 1426 1427 __write_dirty_buffer(b, NULL); 1428 if (b->hold_count == 1) { 1429 wait_on_bit_io(&b->state, B_WRITING, 1430 TASK_UNINTERRUPTIBLE); 1431 set_bit(B_DIRTY, &b->state); 1432 b->dirty_start = 0; 1433 b->dirty_end = c->block_size; 1434 __unlink_buffer(b); 1435 __link_buffer(b, new_block, LIST_DIRTY); 1436 } else { 1437 sector_t old_block; 1438 wait_on_bit_lock_io(&b->state, B_WRITING, 1439 TASK_UNINTERRUPTIBLE); 1440 /* 1441 * Relink buffer to "new_block" so that write_callback 1442 * sees "new_block" as a block number. 1443 * After the write, link the buffer back to old_block. 1444 * All this must be done in bufio lock, so that block number 1445 * change isn't visible to other threads. 1446 */ 1447 old_block = b->block; 1448 __unlink_buffer(b); 1449 __link_buffer(b, new_block, b->list_mode); 1450 submit_io(b, REQ_OP_WRITE, write_endio); 1451 wait_on_bit_io(&b->state, B_WRITING, 1452 TASK_UNINTERRUPTIBLE); 1453 __unlink_buffer(b); 1454 __link_buffer(b, old_block, b->list_mode); 1455 } 1456 1457 dm_bufio_unlock(c); 1458 dm_bufio_release(b); 1459} 1460EXPORT_SYMBOL_GPL(dm_bufio_release_move); 1461 1462static void forget_buffer_locked(struct dm_buffer *b) 1463{ 1464 if (likely(!b->hold_count) && likely(!b->state)) { 1465 __unlink_buffer(b); 1466 __free_buffer_wake(b); 1467 } 1468} 1469 1470/* 1471 * Free the given buffer. 1472 * 1473 * This is just a hint, if the buffer is in use or dirty, this function 1474 * does nothing. 1475 */ 1476void dm_bufio_forget(struct dm_bufio_client *c, sector_t block) 1477{ 1478 struct dm_buffer *b; 1479 1480 dm_bufio_lock(c); 1481 1482 b = __find(c, block); 1483 if (b) 1484 forget_buffer_locked(b); 1485 1486 dm_bufio_unlock(c); 1487} 1488EXPORT_SYMBOL_GPL(dm_bufio_forget); 1489 1490void dm_bufio_forget_buffers(struct dm_bufio_client *c, sector_t block, sector_t n_blocks) 1491{ 1492 struct dm_buffer *b; 1493 sector_t end_block = block + n_blocks; 1494 1495 while (block < end_block) { 1496 dm_bufio_lock(c); 1497 1498 b = __find_next(c, block); 1499 if (b) { 1500 block = b->block + 1; 1501 forget_buffer_locked(b); 1502 } 1503 1504 dm_bufio_unlock(c); 1505 1506 if (!b) 1507 break; 1508 } 1509 1510} 1511EXPORT_SYMBOL_GPL(dm_bufio_forget_buffers); 1512 1513void dm_bufio_set_minimum_buffers(struct dm_bufio_client *c, unsigned n) 1514{ 1515 c->minimum_buffers = n; 1516} 1517EXPORT_SYMBOL_GPL(dm_bufio_set_minimum_buffers); 1518 1519unsigned dm_bufio_get_block_size(struct dm_bufio_client *c) 1520{ 1521 return c->block_size; 1522} 1523EXPORT_SYMBOL_GPL(dm_bufio_get_block_size); 1524 1525sector_t dm_bufio_get_device_size(struct dm_bufio_client *c) 1526{ 1527 sector_t s = bdev_nr_sectors(c->bdev); 1528 if (s >= c->start) 1529 s -= c->start; 1530 else 1531 s = 0; 1532 if (likely(c->sectors_per_block_bits >= 0)) 1533 s >>= c->sectors_per_block_bits; 1534 else 1535 sector_div(s, c->block_size >> SECTOR_SHIFT); 1536 return s; 1537} 1538EXPORT_SYMBOL_GPL(dm_bufio_get_device_size); 1539 1540struct dm_io_client *dm_bufio_get_dm_io_client(struct dm_bufio_client *c) 1541{ 1542 return c->dm_io; 1543} 1544EXPORT_SYMBOL_GPL(dm_bufio_get_dm_io_client); 1545 1546sector_t dm_bufio_get_block_number(struct dm_buffer *b) 1547{ 1548 return b->block; 1549} 1550EXPORT_SYMBOL_GPL(dm_bufio_get_block_number); 1551 1552void *dm_bufio_get_block_data(struct dm_buffer *b) 1553{ 1554 return b->data; 1555} 1556EXPORT_SYMBOL_GPL(dm_bufio_get_block_data); 1557 1558void *dm_bufio_get_aux_data(struct dm_buffer *b) 1559{ 1560 return b + 1; 1561} 1562EXPORT_SYMBOL_GPL(dm_bufio_get_aux_data); 1563 1564struct dm_bufio_client *dm_bufio_get_client(struct dm_buffer *b) 1565{ 1566 return b->c; 1567} 1568EXPORT_SYMBOL_GPL(dm_bufio_get_client); 1569 1570static void drop_buffers(struct dm_bufio_client *c) 1571{ 1572 struct dm_buffer *b; 1573 int i; 1574 bool warned = false; 1575 1576 BUG_ON(dm_bufio_in_request()); 1577 1578 /* 1579 * An optimization so that the buffers are not written one-by-one. 1580 */ 1581 dm_bufio_write_dirty_buffers_async(c); 1582 1583 dm_bufio_lock(c); 1584 1585 while ((b = __get_unclaimed_buffer(c))) 1586 __free_buffer_wake(b); 1587 1588 for (i = 0; i < LIST_SIZE; i++) 1589 list_for_each_entry(b, &c->lru[i], lru_list) { 1590 WARN_ON(!warned); 1591 warned = true; 1592 DMERR("leaked buffer %llx, hold count %u, list %d", 1593 (unsigned long long)b->block, b->hold_count, i); 1594#ifdef CONFIG_DM_DEBUG_BLOCK_STACK_TRACING 1595 stack_trace_print(b->stack_entries, b->stack_len, 1); 1596 /* mark unclaimed to avoid BUG_ON below */ 1597 b->hold_count = 0; 1598#endif 1599 } 1600 1601#ifdef CONFIG_DM_DEBUG_BLOCK_STACK_TRACING 1602 while ((b = __get_unclaimed_buffer(c))) 1603 __free_buffer_wake(b); 1604#endif 1605 1606 for (i = 0; i < LIST_SIZE; i++) 1607 BUG_ON(!list_empty(&c->lru[i])); 1608 1609 dm_bufio_unlock(c); 1610} 1611 1612/* 1613 * We may not be able to evict this buffer if IO pending or the client 1614 * is still using it. Caller is expected to know buffer is too old. 1615 * 1616 * And if GFP_NOFS is used, we must not do any I/O because we hold 1617 * dm_bufio_clients_lock and we would risk deadlock if the I/O gets 1618 * rerouted to different bufio client. 1619 */ 1620static bool __try_evict_buffer(struct dm_buffer *b, gfp_t gfp) 1621{ 1622 if (!(gfp & __GFP_FS)) { 1623 if (test_bit(B_READING, &b->state) || 1624 test_bit(B_WRITING, &b->state) || 1625 test_bit(B_DIRTY, &b->state)) 1626 return false; 1627 } 1628 1629 if (b->hold_count) 1630 return false; 1631 1632 __make_buffer_clean(b); 1633 __unlink_buffer(b); 1634 __free_buffer_wake(b); 1635 1636 return true; 1637} 1638 1639static unsigned long get_retain_buffers(struct dm_bufio_client *c) 1640{ 1641 unsigned long retain_bytes = READ_ONCE(dm_bufio_retain_bytes); 1642 if (likely(c->sectors_per_block_bits >= 0)) 1643 retain_bytes >>= c->sectors_per_block_bits + SECTOR_SHIFT; 1644 else 1645 retain_bytes /= c->block_size; 1646 return retain_bytes; 1647} 1648 1649static void __scan(struct dm_bufio_client *c) 1650{ 1651 int l; 1652 struct dm_buffer *b, *tmp; 1653 unsigned long freed = 0; 1654 unsigned long count = c->n_buffers[LIST_CLEAN] + 1655 c->n_buffers[LIST_DIRTY]; 1656 unsigned long retain_target = get_retain_buffers(c); 1657 1658 for (l = 0; l < LIST_SIZE; l++) { 1659 list_for_each_entry_safe_reverse(b, tmp, &c->lru[l], lru_list) { 1660 if (count - freed <= retain_target) 1661 atomic_long_set(&c->need_shrink, 0); 1662 if (!atomic_long_read(&c->need_shrink)) 1663 return; 1664 if (__try_evict_buffer(b, GFP_KERNEL)) { 1665 atomic_long_dec(&c->need_shrink); 1666 freed++; 1667 } 1668 cond_resched(); 1669 } 1670 } 1671} 1672 1673static void shrink_work(struct work_struct *w) 1674{ 1675 struct dm_bufio_client *c = container_of(w, struct dm_bufio_client, shrink_work); 1676 1677 dm_bufio_lock(c); 1678 __scan(c); 1679 dm_bufio_unlock(c); 1680} 1681 1682static unsigned long dm_bufio_shrink_scan(struct shrinker *shrink, struct shrink_control *sc) 1683{ 1684 struct dm_bufio_client *c; 1685 1686 c = container_of(shrink, struct dm_bufio_client, shrinker); 1687 atomic_long_add(sc->nr_to_scan, &c->need_shrink); 1688 queue_work(dm_bufio_wq, &c->shrink_work); 1689 1690 return sc->nr_to_scan; 1691} 1692 1693static unsigned long dm_bufio_shrink_count(struct shrinker *shrink, struct shrink_control *sc) 1694{ 1695 struct dm_bufio_client *c = container_of(shrink, struct dm_bufio_client, shrinker); 1696 unsigned long count = READ_ONCE(c->n_buffers[LIST_CLEAN]) + 1697 READ_ONCE(c->n_buffers[LIST_DIRTY]); 1698 unsigned long retain_target = get_retain_buffers(c); 1699 unsigned long queued_for_cleanup = atomic_long_read(&c->need_shrink); 1700 1701 if (unlikely(count < retain_target)) 1702 count = 0; 1703 else 1704 count -= retain_target; 1705 1706 if (unlikely(count < queued_for_cleanup)) 1707 count = 0; 1708 else 1709 count -= queued_for_cleanup; 1710 1711 return count; 1712} 1713 1714/* 1715 * Create the buffering interface 1716 */ 1717struct dm_bufio_client *dm_bufio_client_create(struct block_device *bdev, unsigned block_size, 1718 unsigned reserved_buffers, unsigned aux_size, 1719 void (*alloc_callback)(struct dm_buffer *), 1720 void (*write_callback)(struct dm_buffer *)) 1721{ 1722 int r; 1723 struct dm_bufio_client *c; 1724 unsigned i; 1725 char slab_name[27]; 1726 1727 if (!block_size || block_size & ((1 << SECTOR_SHIFT) - 1)) { 1728 DMERR("%s: block size not specified or is not multiple of 512b", __func__); 1729 r = -EINVAL; 1730 goto bad_client; 1731 } 1732 1733 c = kzalloc(sizeof(*c), GFP_KERNEL); 1734 if (!c) { 1735 r = -ENOMEM; 1736 goto bad_client; 1737 } 1738 c->buffer_tree = RB_ROOT; 1739 1740 c->bdev = bdev; 1741 c->block_size = block_size; 1742 if (is_power_of_2(block_size)) 1743 c->sectors_per_block_bits = __ffs(block_size) - SECTOR_SHIFT; 1744 else 1745 c->sectors_per_block_bits = -1; 1746 1747 c->alloc_callback = alloc_callback; 1748 c->write_callback = write_callback; 1749 1750 for (i = 0; i < LIST_SIZE; i++) { 1751 INIT_LIST_HEAD(&c->lru[i]); 1752 c->n_buffers[i] = 0; 1753 } 1754 1755 mutex_init(&c->lock); 1756 INIT_LIST_HEAD(&c->reserved_buffers); 1757 c->need_reserved_buffers = reserved_buffers; 1758 1759 dm_bufio_set_minimum_buffers(c, DM_BUFIO_MIN_BUFFERS); 1760 1761 init_waitqueue_head(&c->free_buffer_wait); 1762 c->async_write_error = 0; 1763 1764 c->dm_io = dm_io_client_create(); 1765 if (IS_ERR(c->dm_io)) { 1766 r = PTR_ERR(c->dm_io); 1767 goto bad_dm_io; 1768 } 1769 1770 if (block_size <= KMALLOC_MAX_SIZE && 1771 (block_size < PAGE_SIZE || !is_power_of_2(block_size))) { 1772 unsigned align = min(1U << __ffs(block_size), (unsigned)PAGE_SIZE); 1773 snprintf(slab_name, sizeof slab_name, "dm_bufio_cache-%u", block_size); 1774 c->slab_cache = kmem_cache_create(slab_name, block_size, align, 1775 SLAB_RECLAIM_ACCOUNT, NULL); 1776 if (!c->slab_cache) { 1777 r = -ENOMEM; 1778 goto bad; 1779 } 1780 } 1781 if (aux_size) 1782 snprintf(slab_name, sizeof slab_name, "dm_bufio_buffer-%u", aux_size); 1783 else 1784 snprintf(slab_name, sizeof slab_name, "dm_bufio_buffer"); 1785 c->slab_buffer = kmem_cache_create(slab_name, sizeof(struct dm_buffer) + aux_size, 1786 0, SLAB_RECLAIM_ACCOUNT, NULL); 1787 if (!c->slab_buffer) { 1788 r = -ENOMEM; 1789 goto bad; 1790 } 1791 1792 while (c->need_reserved_buffers) { 1793 struct dm_buffer *b = alloc_buffer(c, GFP_KERNEL); 1794 1795 if (!b) { 1796 r = -ENOMEM; 1797 goto bad; 1798 } 1799 __free_buffer_wake(b); 1800 } 1801 1802 INIT_WORK(&c->shrink_work, shrink_work); 1803 atomic_long_set(&c->need_shrink, 0); 1804 1805 c->shrinker.count_objects = dm_bufio_shrink_count; 1806 c->shrinker.scan_objects = dm_bufio_shrink_scan; 1807 c->shrinker.seeks = 1; 1808 c->shrinker.batch = 0; 1809 r = register_shrinker(&c->shrinker); 1810 if (r) 1811 goto bad; 1812 1813 mutex_lock(&dm_bufio_clients_lock); 1814 dm_bufio_client_count++; 1815 list_add(&c->client_list, &dm_bufio_all_clients); 1816 __cache_size_refresh(); 1817 mutex_unlock(&dm_bufio_clients_lock); 1818 1819 return c; 1820 1821bad: 1822 while (!list_empty(&c->reserved_buffers)) { 1823 struct dm_buffer *b = list_entry(c->reserved_buffers.next, 1824 struct dm_buffer, lru_list); 1825 list_del(&b->lru_list); 1826 free_buffer(b); 1827 } 1828 kmem_cache_destroy(c->slab_cache); 1829 kmem_cache_destroy(c->slab_buffer); 1830 dm_io_client_destroy(c->dm_io); 1831bad_dm_io: 1832 mutex_destroy(&c->lock); 1833 kfree(c); 1834bad_client: 1835 return ERR_PTR(r); 1836} 1837EXPORT_SYMBOL_GPL(dm_bufio_client_create); 1838 1839/* 1840 * Free the buffering interface. 1841 * It is required that there are no references on any buffers. 1842 */ 1843void dm_bufio_client_destroy(struct dm_bufio_client *c) 1844{ 1845 unsigned i; 1846 1847 drop_buffers(c); 1848 1849 unregister_shrinker(&c->shrinker); 1850 flush_work(&c->shrink_work); 1851 1852 mutex_lock(&dm_bufio_clients_lock); 1853 1854 list_del(&c->client_list); 1855 dm_bufio_client_count--; 1856 __cache_size_refresh(); 1857 1858 mutex_unlock(&dm_bufio_clients_lock); 1859 1860 BUG_ON(!RB_EMPTY_ROOT(&c->buffer_tree)); 1861 BUG_ON(c->need_reserved_buffers); 1862 1863 while (!list_empty(&c->reserved_buffers)) { 1864 struct dm_buffer *b = list_entry(c->reserved_buffers.next, 1865 struct dm_buffer, lru_list); 1866 list_del(&b->lru_list); 1867 free_buffer(b); 1868 } 1869 1870 for (i = 0; i < LIST_SIZE; i++) 1871 if (c->n_buffers[i]) 1872 DMERR("leaked buffer count %d: %ld", i, c->n_buffers[i]); 1873 1874 for (i = 0; i < LIST_SIZE; i++) 1875 BUG_ON(c->n_buffers[i]); 1876 1877 kmem_cache_destroy(c->slab_cache); 1878 kmem_cache_destroy(c->slab_buffer); 1879 dm_io_client_destroy(c->dm_io); 1880 mutex_destroy(&c->lock); 1881 kfree(c); 1882} 1883EXPORT_SYMBOL_GPL(dm_bufio_client_destroy); 1884 1885void dm_bufio_set_sector_offset(struct dm_bufio_client *c, sector_t start) 1886{ 1887 c->start = start; 1888} 1889EXPORT_SYMBOL_GPL(dm_bufio_set_sector_offset); 1890 1891static unsigned get_max_age_hz(void) 1892{ 1893 unsigned max_age = READ_ONCE(dm_bufio_max_age); 1894 1895 if (max_age > UINT_MAX / HZ) 1896 max_age = UINT_MAX / HZ; 1897 1898 return max_age * HZ; 1899} 1900 1901static bool older_than(struct dm_buffer *b, unsigned long age_hz) 1902{ 1903 return time_after_eq(jiffies, b->last_accessed + age_hz); 1904} 1905 1906static void __evict_old_buffers(struct dm_bufio_client *c, unsigned long age_hz) 1907{ 1908 struct dm_buffer *b, *tmp; 1909 unsigned long retain_target = get_retain_buffers(c); 1910 unsigned long count; 1911 LIST_HEAD(write_list); 1912 1913 dm_bufio_lock(c); 1914 1915 __check_watermark(c, &write_list); 1916 if (unlikely(!list_empty(&write_list))) { 1917 dm_bufio_unlock(c); 1918 __flush_write_list(&write_list); 1919 dm_bufio_lock(c); 1920 } 1921 1922 count = c->n_buffers[LIST_CLEAN] + c->n_buffers[LIST_DIRTY]; 1923 list_for_each_entry_safe_reverse(b, tmp, &c->lru[LIST_CLEAN], lru_list) { 1924 if (count <= retain_target) 1925 break; 1926 1927 if (!older_than(b, age_hz)) 1928 break; 1929 1930 if (__try_evict_buffer(b, 0)) 1931 count--; 1932 1933 cond_resched(); 1934 } 1935 1936 dm_bufio_unlock(c); 1937} 1938 1939static void do_global_cleanup(struct work_struct *w) 1940{ 1941 struct dm_bufio_client *locked_client = NULL; 1942 struct dm_bufio_client *current_client; 1943 struct dm_buffer *b; 1944 unsigned spinlock_hold_count; 1945 unsigned long threshold = dm_bufio_cache_size - 1946 dm_bufio_cache_size / DM_BUFIO_LOW_WATERMARK_RATIO; 1947 unsigned long loops = global_num * 2; 1948 1949 mutex_lock(&dm_bufio_clients_lock); 1950 1951 while (1) { 1952 cond_resched(); 1953 1954 spin_lock(&global_spinlock); 1955 if (unlikely(dm_bufio_current_allocated <= threshold)) 1956 break; 1957 1958 spinlock_hold_count = 0; 1959get_next: 1960 if (!loops--) 1961 break; 1962 if (unlikely(list_empty(&global_queue))) 1963 break; 1964 b = list_entry(global_queue.prev, struct dm_buffer, global_list); 1965 1966 if (b->accessed) { 1967 b->accessed = 0; 1968 list_move(&b->global_list, &global_queue); 1969 if (likely(++spinlock_hold_count < 16)) 1970 goto get_next; 1971 spin_unlock(&global_spinlock); 1972 continue; 1973 } 1974 1975 current_client = b->c; 1976 if (unlikely(current_client != locked_client)) { 1977 if (locked_client) 1978 dm_bufio_unlock(locked_client); 1979 1980 if (!dm_bufio_trylock(current_client)) { 1981 spin_unlock(&global_spinlock); 1982 dm_bufio_lock(current_client); 1983 locked_client = current_client; 1984 continue; 1985 } 1986 1987 locked_client = current_client; 1988 } 1989 1990 spin_unlock(&global_spinlock); 1991 1992 if (unlikely(!__try_evict_buffer(b, GFP_KERNEL))) { 1993 spin_lock(&global_spinlock); 1994 list_move(&b->global_list, &global_queue); 1995 spin_unlock(&global_spinlock); 1996 } 1997 } 1998 1999 spin_unlock(&global_spinlock); 2000 2001 if (locked_client) 2002 dm_bufio_unlock(locked_client); 2003 2004 mutex_unlock(&dm_bufio_clients_lock); 2005} 2006 2007static void cleanup_old_buffers(void) 2008{ 2009 unsigned long max_age_hz = get_max_age_hz(); 2010 struct dm_bufio_client *c; 2011 2012 mutex_lock(&dm_bufio_clients_lock); 2013 2014 __cache_size_refresh(); 2015 2016 list_for_each_entry(c, &dm_bufio_all_clients, client_list) 2017 __evict_old_buffers(c, max_age_hz); 2018 2019 mutex_unlock(&dm_bufio_clients_lock); 2020} 2021 2022static void work_fn(struct work_struct *w) 2023{ 2024 cleanup_old_buffers(); 2025 2026 queue_delayed_work(dm_bufio_wq, &dm_bufio_cleanup_old_work, 2027 DM_BUFIO_WORK_TIMER_SECS * HZ); 2028} 2029 2030/*---------------------------------------------------------------- 2031 * Module setup 2032 *--------------------------------------------------------------*/ 2033 2034/* 2035 * This is called only once for the whole dm_bufio module. 2036 * It initializes memory limit. 2037 */ 2038static int __init dm_bufio_init(void) 2039{ 2040 __u64 mem; 2041 2042 dm_bufio_allocated_kmem_cache = 0; 2043 dm_bufio_allocated_get_free_pages = 0; 2044 dm_bufio_allocated_vmalloc = 0; 2045 dm_bufio_current_allocated = 0; 2046 2047 mem = (__u64)mult_frac(totalram_pages() - totalhigh_pages(), 2048 DM_BUFIO_MEMORY_PERCENT, 100) << PAGE_SHIFT; 2049 2050 if (mem > ULONG_MAX) 2051 mem = ULONG_MAX; 2052 2053#ifdef CONFIG_MMU 2054 if (mem > mult_frac(VMALLOC_TOTAL, DM_BUFIO_VMALLOC_PERCENT, 100)) 2055 mem = mult_frac(VMALLOC_TOTAL, DM_BUFIO_VMALLOC_PERCENT, 100); 2056#endif 2057 2058 dm_bufio_default_cache_size = mem; 2059 2060 mutex_lock(&dm_bufio_clients_lock); 2061 __cache_size_refresh(); 2062 mutex_unlock(&dm_bufio_clients_lock); 2063 2064 dm_bufio_wq = alloc_workqueue("dm_bufio_cache", WQ_MEM_RECLAIM, 0); 2065 if (!dm_bufio_wq) 2066 return -ENOMEM; 2067 2068 INIT_DELAYED_WORK(&dm_bufio_cleanup_old_work, work_fn); 2069 INIT_WORK(&dm_bufio_replacement_work, do_global_cleanup); 2070 queue_delayed_work(dm_bufio_wq, &dm_bufio_cleanup_old_work, 2071 DM_BUFIO_WORK_TIMER_SECS * HZ); 2072 2073 return 0; 2074} 2075 2076/* 2077 * This is called once when unloading the dm_bufio module. 2078 */ 2079static void __exit dm_bufio_exit(void) 2080{ 2081 int bug = 0; 2082 2083 cancel_delayed_work_sync(&dm_bufio_cleanup_old_work); 2084 destroy_workqueue(dm_bufio_wq); 2085 2086 if (dm_bufio_client_count) { 2087 DMCRIT("%s: dm_bufio_client_count leaked: %d", 2088 __func__, dm_bufio_client_count); 2089 bug = 1; 2090 } 2091 2092 if (dm_bufio_current_allocated) { 2093 DMCRIT("%s: dm_bufio_current_allocated leaked: %lu", 2094 __func__, dm_bufio_current_allocated); 2095 bug = 1; 2096 } 2097 2098 if (dm_bufio_allocated_get_free_pages) { 2099 DMCRIT("%s: dm_bufio_allocated_get_free_pages leaked: %lu", 2100 __func__, dm_bufio_allocated_get_free_pages); 2101 bug = 1; 2102 } 2103 2104 if (dm_bufio_allocated_vmalloc) { 2105 DMCRIT("%s: dm_bufio_vmalloc leaked: %lu", 2106 __func__, dm_bufio_allocated_vmalloc); 2107 bug = 1; 2108 } 2109 2110 BUG_ON(bug); 2111} 2112 2113module_init(dm_bufio_init) 2114module_exit(dm_bufio_exit) 2115 2116module_param_named(max_cache_size_bytes, dm_bufio_cache_size, ulong, S_IRUGO | S_IWUSR); 2117MODULE_PARM_DESC(max_cache_size_bytes, "Size of metadata cache"); 2118 2119module_param_named(max_age_seconds, dm_bufio_max_age, uint, S_IRUGO | S_IWUSR); 2120MODULE_PARM_DESC(max_age_seconds, "Max age of a buffer in seconds"); 2121 2122module_param_named(retain_bytes, dm_bufio_retain_bytes, ulong, S_IRUGO | S_IWUSR); 2123MODULE_PARM_DESC(retain_bytes, "Try to keep at least this many bytes cached in memory"); 2124 2125module_param_named(peak_allocated_bytes, dm_bufio_peak_allocated, ulong, S_IRUGO | S_IWUSR); 2126MODULE_PARM_DESC(peak_allocated_bytes, "Tracks the maximum allocated memory"); 2127 2128module_param_named(allocated_kmem_cache_bytes, dm_bufio_allocated_kmem_cache, ulong, S_IRUGO); 2129MODULE_PARM_DESC(allocated_kmem_cache_bytes, "Memory allocated with kmem_cache_alloc"); 2130 2131module_param_named(allocated_get_free_pages_bytes, dm_bufio_allocated_get_free_pages, ulong, S_IRUGO); 2132MODULE_PARM_DESC(allocated_get_free_pages_bytes, "Memory allocated with get_free_pages"); 2133 2134module_param_named(allocated_vmalloc_bytes, dm_bufio_allocated_vmalloc, ulong, S_IRUGO); 2135MODULE_PARM_DESC(allocated_vmalloc_bytes, "Memory allocated with vmalloc"); 2136 2137module_param_named(current_allocated_bytes, dm_bufio_current_allocated, ulong, S_IRUGO); 2138MODULE_PARM_DESC(current_allocated_bytes, "Memory currently used by the cache"); 2139 2140MODULE_AUTHOR("Mikulas Patocka <dm-devel@redhat.com>"); 2141MODULE_DESCRIPTION(DM_NAME " buffered I/O library"); 2142MODULE_LICENSE("GPL");