cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

md-bitmap.c (72105B)


      1// SPDX-License-Identifier: GPL-2.0-only
      2/*
      3 * bitmap.c two-level bitmap (C) Peter T. Breuer (ptb@ot.uc3m.es) 2003
      4 *
      5 * bitmap_create  - sets up the bitmap structure
      6 * bitmap_destroy - destroys the bitmap structure
      7 *
      8 * additions, Copyright (C) 2003-2004, Paul Clements, SteelEye Technology, Inc.:
      9 * - added disk storage for bitmap
     10 * - changes to allow various bitmap chunk sizes
     11 */
     12
     13/*
     14 * Still to do:
     15 *
     16 * flush after percent set rather than just time based. (maybe both).
     17 */
     18
     19#include <linux/blkdev.h>
     20#include <linux/module.h>
     21#include <linux/errno.h>
     22#include <linux/slab.h>
     23#include <linux/init.h>
     24#include <linux/timer.h>
     25#include <linux/sched.h>
     26#include <linux/list.h>
     27#include <linux/file.h>
     28#include <linux/mount.h>
     29#include <linux/buffer_head.h>
     30#include <linux/seq_file.h>
     31#include <trace/events/block.h>
     32#include "md.h"
     33#include "md-bitmap.h"
     34
     35static inline char *bmname(struct bitmap *bitmap)
     36{
     37	return bitmap->mddev ? mdname(bitmap->mddev) : "mdX";
     38}
     39
     40/*
     41 * check a page and, if necessary, allocate it (or hijack it if the alloc fails)
     42 *
     43 * 1) check to see if this page is allocated, if it's not then try to alloc
     44 * 2) if the alloc fails, set the page's hijacked flag so we'll use the
     45 *    page pointer directly as a counter
     46 *
     47 * if we find our page, we increment the page's refcount so that it stays
     48 * allocated while we're using it
     49 */
     50static int md_bitmap_checkpage(struct bitmap_counts *bitmap,
     51			       unsigned long page, int create, int no_hijack)
     52__releases(bitmap->lock)
     53__acquires(bitmap->lock)
     54{
     55	unsigned char *mappage;
     56
     57	if (page >= bitmap->pages) {
     58		/* This can happen if bitmap_start_sync goes beyond
     59		 * End-of-device while looking for a whole page.
     60		 * It is harmless.
     61		 */
     62		return -EINVAL;
     63	}
     64
     65	if (bitmap->bp[page].hijacked) /* it's hijacked, don't try to alloc */
     66		return 0;
     67
     68	if (bitmap->bp[page].map) /* page is already allocated, just return */
     69		return 0;
     70
     71	if (!create)
     72		return -ENOENT;
     73
     74	/* this page has not been allocated yet */
     75
     76	spin_unlock_irq(&bitmap->lock);
     77	/* It is possible that this is being called inside a
     78	 * prepare_to_wait/finish_wait loop from raid5c:make_request().
     79	 * In general it is not permitted to sleep in that context as it
     80	 * can cause the loop to spin freely.
     81	 * That doesn't apply here as we can only reach this point
     82	 * once with any loop.
     83	 * When this function completes, either bp[page].map or
     84	 * bp[page].hijacked.  In either case, this function will
     85	 * abort before getting to this point again.  So there is
     86	 * no risk of a free-spin, and so it is safe to assert
     87	 * that sleeping here is allowed.
     88	 */
     89	sched_annotate_sleep();
     90	mappage = kzalloc(PAGE_SIZE, GFP_NOIO);
     91	spin_lock_irq(&bitmap->lock);
     92
     93	if (mappage == NULL) {
     94		pr_debug("md/bitmap: map page allocation failed, hijacking\n");
     95		/* We don't support hijack for cluster raid */
     96		if (no_hijack)
     97			return -ENOMEM;
     98		/* failed - set the hijacked flag so that we can use the
     99		 * pointer as a counter */
    100		if (!bitmap->bp[page].map)
    101			bitmap->bp[page].hijacked = 1;
    102	} else if (bitmap->bp[page].map ||
    103		   bitmap->bp[page].hijacked) {
    104		/* somebody beat us to getting the page */
    105		kfree(mappage);
    106	} else {
    107
    108		/* no page was in place and we have one, so install it */
    109
    110		bitmap->bp[page].map = mappage;
    111		bitmap->missing_pages--;
    112	}
    113	return 0;
    114}
    115
    116/* if page is completely empty, put it back on the free list, or dealloc it */
    117/* if page was hijacked, unmark the flag so it might get alloced next time */
    118/* Note: lock should be held when calling this */
    119static void md_bitmap_checkfree(struct bitmap_counts *bitmap, unsigned long page)
    120{
    121	char *ptr;
    122
    123	if (bitmap->bp[page].count) /* page is still busy */
    124		return;
    125
    126	/* page is no longer in use, it can be released */
    127
    128	if (bitmap->bp[page].hijacked) { /* page was hijacked, undo this now */
    129		bitmap->bp[page].hijacked = 0;
    130		bitmap->bp[page].map = NULL;
    131	} else {
    132		/* normal case, free the page */
    133		ptr = bitmap->bp[page].map;
    134		bitmap->bp[page].map = NULL;
    135		bitmap->missing_pages++;
    136		kfree(ptr);
    137	}
    138}
    139
    140/*
    141 * bitmap file handling - read and write the bitmap file and its superblock
    142 */
    143
    144/*
    145 * basic page I/O operations
    146 */
    147
    148/* IO operations when bitmap is stored near all superblocks */
    149static int read_sb_page(struct mddev *mddev, loff_t offset,
    150			struct page *page,
    151			unsigned long index, int size)
    152{
    153	/* choose a good rdev and read the page from there */
    154
    155	struct md_rdev *rdev;
    156	sector_t target;
    157
    158	rdev_for_each(rdev, mddev) {
    159		if (! test_bit(In_sync, &rdev->flags)
    160		    || test_bit(Faulty, &rdev->flags)
    161		    || test_bit(Bitmap_sync, &rdev->flags))
    162			continue;
    163
    164		target = offset + index * (PAGE_SIZE/512);
    165
    166		if (sync_page_io(rdev, target,
    167				 roundup(size, bdev_logical_block_size(rdev->bdev)),
    168				 page, REQ_OP_READ, 0, true)) {
    169			page->index = index;
    170			return 0;
    171		}
    172	}
    173	return -EIO;
    174}
    175
    176static struct md_rdev *next_active_rdev(struct md_rdev *rdev, struct mddev *mddev)
    177{
    178	/* Iterate the disks of an mddev, using rcu to protect access to the
    179	 * linked list, and raising the refcount of devices we return to ensure
    180	 * they don't disappear while in use.
    181	 * As devices are only added or removed when raid_disk is < 0 and
    182	 * nr_pending is 0 and In_sync is clear, the entries we return will
    183	 * still be in the same position on the list when we re-enter
    184	 * list_for_each_entry_continue_rcu.
    185	 *
    186	 * Note that if entered with 'rdev == NULL' to start at the
    187	 * beginning, we temporarily assign 'rdev' to an address which
    188	 * isn't really an rdev, but which can be used by
    189	 * list_for_each_entry_continue_rcu() to find the first entry.
    190	 */
    191	rcu_read_lock();
    192	if (rdev == NULL)
    193		/* start at the beginning */
    194		rdev = list_entry(&mddev->disks, struct md_rdev, same_set);
    195	else {
    196		/* release the previous rdev and start from there. */
    197		rdev_dec_pending(rdev, mddev);
    198	}
    199	list_for_each_entry_continue_rcu(rdev, &mddev->disks, same_set) {
    200		if (rdev->raid_disk >= 0 &&
    201		    !test_bit(Faulty, &rdev->flags)) {
    202			/* this is a usable devices */
    203			atomic_inc(&rdev->nr_pending);
    204			rcu_read_unlock();
    205			return rdev;
    206		}
    207	}
    208	rcu_read_unlock();
    209	return NULL;
    210}
    211
    212static int write_sb_page(struct bitmap *bitmap, struct page *page, int wait)
    213{
    214	struct md_rdev *rdev;
    215	struct block_device *bdev;
    216	struct mddev *mddev = bitmap->mddev;
    217	struct bitmap_storage *store = &bitmap->storage;
    218
    219restart:
    220	rdev = NULL;
    221	while ((rdev = next_active_rdev(rdev, mddev)) != NULL) {
    222		int size = PAGE_SIZE;
    223		loff_t offset = mddev->bitmap_info.offset;
    224
    225		bdev = (rdev->meta_bdev) ? rdev->meta_bdev : rdev->bdev;
    226
    227		if (page->index == store->file_pages-1) {
    228			int last_page_size = store->bytes & (PAGE_SIZE-1);
    229			if (last_page_size == 0)
    230				last_page_size = PAGE_SIZE;
    231			size = roundup(last_page_size,
    232				       bdev_logical_block_size(bdev));
    233		}
    234		/* Just make sure we aren't corrupting data or
    235		 * metadata
    236		 */
    237		if (mddev->external) {
    238			/* Bitmap could be anywhere. */
    239			if (rdev->sb_start + offset + (page->index
    240						       * (PAGE_SIZE/512))
    241			    > rdev->data_offset
    242			    &&
    243			    rdev->sb_start + offset
    244			    < (rdev->data_offset + mddev->dev_sectors
    245			     + (PAGE_SIZE/512)))
    246				goto bad_alignment;
    247		} else if (offset < 0) {
    248			/* DATA  BITMAP METADATA  */
    249			if (offset
    250			    + (long)(page->index * (PAGE_SIZE/512))
    251			    + size/512 > 0)
    252				/* bitmap runs in to metadata */
    253				goto bad_alignment;
    254			if (rdev->data_offset + mddev->dev_sectors
    255			    > rdev->sb_start + offset)
    256				/* data runs in to bitmap */
    257				goto bad_alignment;
    258		} else if (rdev->sb_start < rdev->data_offset) {
    259			/* METADATA BITMAP DATA */
    260			if (rdev->sb_start
    261			    + offset
    262			    + page->index*(PAGE_SIZE/512) + size/512
    263			    > rdev->data_offset)
    264				/* bitmap runs in to data */
    265				goto bad_alignment;
    266		} else {
    267			/* DATA METADATA BITMAP - no problems */
    268		}
    269		md_super_write(mddev, rdev,
    270			       rdev->sb_start + offset
    271			       + page->index * (PAGE_SIZE/512),
    272			       size,
    273			       page);
    274	}
    275
    276	if (wait && md_super_wait(mddev) < 0)
    277		goto restart;
    278	return 0;
    279
    280 bad_alignment:
    281	return -EINVAL;
    282}
    283
    284static void md_bitmap_file_kick(struct bitmap *bitmap);
    285/*
    286 * write out a page to a file
    287 */
    288static void write_page(struct bitmap *bitmap, struct page *page, int wait)
    289{
    290	struct buffer_head *bh;
    291
    292	if (bitmap->storage.file == NULL) {
    293		switch (write_sb_page(bitmap, page, wait)) {
    294		case -EINVAL:
    295			set_bit(BITMAP_WRITE_ERROR, &bitmap->flags);
    296		}
    297	} else {
    298
    299		bh = page_buffers(page);
    300
    301		while (bh && bh->b_blocknr) {
    302			atomic_inc(&bitmap->pending_writes);
    303			set_buffer_locked(bh);
    304			set_buffer_mapped(bh);
    305			submit_bh(REQ_OP_WRITE, REQ_SYNC, bh);
    306			bh = bh->b_this_page;
    307		}
    308
    309		if (wait)
    310			wait_event(bitmap->write_wait,
    311				   atomic_read(&bitmap->pending_writes)==0);
    312	}
    313	if (test_bit(BITMAP_WRITE_ERROR, &bitmap->flags))
    314		md_bitmap_file_kick(bitmap);
    315}
    316
    317static void end_bitmap_write(struct buffer_head *bh, int uptodate)
    318{
    319	struct bitmap *bitmap = bh->b_private;
    320
    321	if (!uptodate)
    322		set_bit(BITMAP_WRITE_ERROR, &bitmap->flags);
    323	if (atomic_dec_and_test(&bitmap->pending_writes))
    324		wake_up(&bitmap->write_wait);
    325}
    326
    327static void free_buffers(struct page *page)
    328{
    329	struct buffer_head *bh;
    330
    331	if (!PagePrivate(page))
    332		return;
    333
    334	bh = page_buffers(page);
    335	while (bh) {
    336		struct buffer_head *next = bh->b_this_page;
    337		free_buffer_head(bh);
    338		bh = next;
    339	}
    340	detach_page_private(page);
    341	put_page(page);
    342}
    343
    344/* read a page from a file.
    345 * We both read the page, and attach buffers to the page to record the
    346 * address of each block (using bmap).  These addresses will be used
    347 * to write the block later, completely bypassing the filesystem.
    348 * This usage is similar to how swap files are handled, and allows us
    349 * to write to a file with no concerns of memory allocation failing.
    350 */
    351static int read_page(struct file *file, unsigned long index,
    352		     struct bitmap *bitmap,
    353		     unsigned long count,
    354		     struct page *page)
    355{
    356	int ret = 0;
    357	struct inode *inode = file_inode(file);
    358	struct buffer_head *bh;
    359	sector_t block, blk_cur;
    360	unsigned long blocksize = i_blocksize(inode);
    361
    362	pr_debug("read bitmap file (%dB @ %llu)\n", (int)PAGE_SIZE,
    363		 (unsigned long long)index << PAGE_SHIFT);
    364
    365	bh = alloc_page_buffers(page, blocksize, false);
    366	if (!bh) {
    367		ret = -ENOMEM;
    368		goto out;
    369	}
    370	attach_page_private(page, bh);
    371	blk_cur = index << (PAGE_SHIFT - inode->i_blkbits);
    372	while (bh) {
    373		block = blk_cur;
    374
    375		if (count == 0)
    376			bh->b_blocknr = 0;
    377		else {
    378			ret = bmap(inode, &block);
    379			if (ret || !block) {
    380				ret = -EINVAL;
    381				bh->b_blocknr = 0;
    382				goto out;
    383			}
    384
    385			bh->b_blocknr = block;
    386			bh->b_bdev = inode->i_sb->s_bdev;
    387			if (count < blocksize)
    388				count = 0;
    389			else
    390				count -= blocksize;
    391
    392			bh->b_end_io = end_bitmap_write;
    393			bh->b_private = bitmap;
    394			atomic_inc(&bitmap->pending_writes);
    395			set_buffer_locked(bh);
    396			set_buffer_mapped(bh);
    397			submit_bh(REQ_OP_READ, 0, bh);
    398		}
    399		blk_cur++;
    400		bh = bh->b_this_page;
    401	}
    402	page->index = index;
    403
    404	wait_event(bitmap->write_wait,
    405		   atomic_read(&bitmap->pending_writes)==0);
    406	if (test_bit(BITMAP_WRITE_ERROR, &bitmap->flags))
    407		ret = -EIO;
    408out:
    409	if (ret)
    410		pr_err("md: bitmap read error: (%dB @ %llu): %d\n",
    411		       (int)PAGE_SIZE,
    412		       (unsigned long long)index << PAGE_SHIFT,
    413		       ret);
    414	return ret;
    415}
    416
    417/*
    418 * bitmap file superblock operations
    419 */
    420
    421/*
    422 * md_bitmap_wait_writes() should be called before writing any bitmap
    423 * blocks, to ensure previous writes, particularly from
    424 * md_bitmap_daemon_work(), have completed.
    425 */
    426static void md_bitmap_wait_writes(struct bitmap *bitmap)
    427{
    428	if (bitmap->storage.file)
    429		wait_event(bitmap->write_wait,
    430			   atomic_read(&bitmap->pending_writes)==0);
    431	else
    432		/* Note that we ignore the return value.  The writes
    433		 * might have failed, but that would just mean that
    434		 * some bits which should be cleared haven't been,
    435		 * which is safe.  The relevant bitmap blocks will
    436		 * probably get written again, but there is no great
    437		 * loss if they aren't.
    438		 */
    439		md_super_wait(bitmap->mddev);
    440}
    441
    442
    443/* update the event counter and sync the superblock to disk */
    444void md_bitmap_update_sb(struct bitmap *bitmap)
    445{
    446	bitmap_super_t *sb;
    447
    448	if (!bitmap || !bitmap->mddev) /* no bitmap for this array */
    449		return;
    450	if (bitmap->mddev->bitmap_info.external)
    451		return;
    452	if (!bitmap->storage.sb_page) /* no superblock */
    453		return;
    454	sb = kmap_atomic(bitmap->storage.sb_page);
    455	sb->events = cpu_to_le64(bitmap->mddev->events);
    456	if (bitmap->mddev->events < bitmap->events_cleared)
    457		/* rocking back to read-only */
    458		bitmap->events_cleared = bitmap->mddev->events;
    459	sb->events_cleared = cpu_to_le64(bitmap->events_cleared);
    460	/*
    461	 * clear BITMAP_WRITE_ERROR bit to protect against the case that
    462	 * a bitmap write error occurred but the later writes succeeded.
    463	 */
    464	sb->state = cpu_to_le32(bitmap->flags & ~BIT(BITMAP_WRITE_ERROR));
    465	/* Just in case these have been changed via sysfs: */
    466	sb->daemon_sleep = cpu_to_le32(bitmap->mddev->bitmap_info.daemon_sleep/HZ);
    467	sb->write_behind = cpu_to_le32(bitmap->mddev->bitmap_info.max_write_behind);
    468	/* This might have been changed by a reshape */
    469	sb->sync_size = cpu_to_le64(bitmap->mddev->resync_max_sectors);
    470	sb->chunksize = cpu_to_le32(bitmap->mddev->bitmap_info.chunksize);
    471	sb->nodes = cpu_to_le32(bitmap->mddev->bitmap_info.nodes);
    472	sb->sectors_reserved = cpu_to_le32(bitmap->mddev->
    473					   bitmap_info.space);
    474	kunmap_atomic(sb);
    475	write_page(bitmap, bitmap->storage.sb_page, 1);
    476}
    477EXPORT_SYMBOL(md_bitmap_update_sb);
    478
    479/* print out the bitmap file superblock */
    480void md_bitmap_print_sb(struct bitmap *bitmap)
    481{
    482	bitmap_super_t *sb;
    483
    484	if (!bitmap || !bitmap->storage.sb_page)
    485		return;
    486	sb = kmap_atomic(bitmap->storage.sb_page);
    487	pr_debug("%s: bitmap file superblock:\n", bmname(bitmap));
    488	pr_debug("         magic: %08x\n", le32_to_cpu(sb->magic));
    489	pr_debug("       version: %d\n", le32_to_cpu(sb->version));
    490	pr_debug("          uuid: %08x.%08x.%08x.%08x\n",
    491		 le32_to_cpu(*(__le32 *)(sb->uuid+0)),
    492		 le32_to_cpu(*(__le32 *)(sb->uuid+4)),
    493		 le32_to_cpu(*(__le32 *)(sb->uuid+8)),
    494		 le32_to_cpu(*(__le32 *)(sb->uuid+12)));
    495	pr_debug("        events: %llu\n",
    496		 (unsigned long long) le64_to_cpu(sb->events));
    497	pr_debug("events cleared: %llu\n",
    498		 (unsigned long long) le64_to_cpu(sb->events_cleared));
    499	pr_debug("         state: %08x\n", le32_to_cpu(sb->state));
    500	pr_debug("     chunksize: %d B\n", le32_to_cpu(sb->chunksize));
    501	pr_debug("  daemon sleep: %ds\n", le32_to_cpu(sb->daemon_sleep));
    502	pr_debug("     sync size: %llu KB\n",
    503		 (unsigned long long)le64_to_cpu(sb->sync_size)/2);
    504	pr_debug("max write behind: %d\n", le32_to_cpu(sb->write_behind));
    505	kunmap_atomic(sb);
    506}
    507
    508/*
    509 * bitmap_new_disk_sb
    510 * @bitmap
    511 *
    512 * This function is somewhat the reverse of bitmap_read_sb.  bitmap_read_sb
    513 * reads and verifies the on-disk bitmap superblock and populates bitmap_info.
    514 * This function verifies 'bitmap_info' and populates the on-disk bitmap
    515 * structure, which is to be written to disk.
    516 *
    517 * Returns: 0 on success, -Exxx on error
    518 */
    519static int md_bitmap_new_disk_sb(struct bitmap *bitmap)
    520{
    521	bitmap_super_t *sb;
    522	unsigned long chunksize, daemon_sleep, write_behind;
    523
    524	bitmap->storage.sb_page = alloc_page(GFP_KERNEL | __GFP_ZERO);
    525	if (bitmap->storage.sb_page == NULL)
    526		return -ENOMEM;
    527	bitmap->storage.sb_page->index = 0;
    528
    529	sb = kmap_atomic(bitmap->storage.sb_page);
    530
    531	sb->magic = cpu_to_le32(BITMAP_MAGIC);
    532	sb->version = cpu_to_le32(BITMAP_MAJOR_HI);
    533
    534	chunksize = bitmap->mddev->bitmap_info.chunksize;
    535	BUG_ON(!chunksize);
    536	if (!is_power_of_2(chunksize)) {
    537		kunmap_atomic(sb);
    538		pr_warn("bitmap chunksize not a power of 2\n");
    539		return -EINVAL;
    540	}
    541	sb->chunksize = cpu_to_le32(chunksize);
    542
    543	daemon_sleep = bitmap->mddev->bitmap_info.daemon_sleep;
    544	if (!daemon_sleep || (daemon_sleep > MAX_SCHEDULE_TIMEOUT)) {
    545		pr_debug("Choosing daemon_sleep default (5 sec)\n");
    546		daemon_sleep = 5 * HZ;
    547	}
    548	sb->daemon_sleep = cpu_to_le32(daemon_sleep);
    549	bitmap->mddev->bitmap_info.daemon_sleep = daemon_sleep;
    550
    551	/*
    552	 * FIXME: write_behind for RAID1.  If not specified, what
    553	 * is a good choice?  We choose COUNTER_MAX / 2 arbitrarily.
    554	 */
    555	write_behind = bitmap->mddev->bitmap_info.max_write_behind;
    556	if (write_behind > COUNTER_MAX)
    557		write_behind = COUNTER_MAX / 2;
    558	sb->write_behind = cpu_to_le32(write_behind);
    559	bitmap->mddev->bitmap_info.max_write_behind = write_behind;
    560
    561	/* keep the array size field of the bitmap superblock up to date */
    562	sb->sync_size = cpu_to_le64(bitmap->mddev->resync_max_sectors);
    563
    564	memcpy(sb->uuid, bitmap->mddev->uuid, 16);
    565
    566	set_bit(BITMAP_STALE, &bitmap->flags);
    567	sb->state = cpu_to_le32(bitmap->flags);
    568	bitmap->events_cleared = bitmap->mddev->events;
    569	sb->events_cleared = cpu_to_le64(bitmap->mddev->events);
    570	bitmap->mddev->bitmap_info.nodes = 0;
    571
    572	kunmap_atomic(sb);
    573
    574	return 0;
    575}
    576
    577/* read the superblock from the bitmap file and initialize some bitmap fields */
    578static int md_bitmap_read_sb(struct bitmap *bitmap)
    579{
    580	char *reason = NULL;
    581	bitmap_super_t *sb;
    582	unsigned long chunksize, daemon_sleep, write_behind;
    583	unsigned long long events;
    584	int nodes = 0;
    585	unsigned long sectors_reserved = 0;
    586	int err = -EINVAL;
    587	struct page *sb_page;
    588	loff_t offset = bitmap->mddev->bitmap_info.offset;
    589
    590	if (!bitmap->storage.file && !bitmap->mddev->bitmap_info.offset) {
    591		chunksize = 128 * 1024 * 1024;
    592		daemon_sleep = 5 * HZ;
    593		write_behind = 0;
    594		set_bit(BITMAP_STALE, &bitmap->flags);
    595		err = 0;
    596		goto out_no_sb;
    597	}
    598	/* page 0 is the superblock, read it... */
    599	sb_page = alloc_page(GFP_KERNEL);
    600	if (!sb_page)
    601		return -ENOMEM;
    602	bitmap->storage.sb_page = sb_page;
    603
    604re_read:
    605	/* If cluster_slot is set, the cluster is setup */
    606	if (bitmap->cluster_slot >= 0) {
    607		sector_t bm_blocks = bitmap->mddev->resync_max_sectors;
    608
    609		bm_blocks = DIV_ROUND_UP_SECTOR_T(bm_blocks,
    610			   (bitmap->mddev->bitmap_info.chunksize >> 9));
    611		/* bits to bytes */
    612		bm_blocks = ((bm_blocks+7) >> 3) + sizeof(bitmap_super_t);
    613		/* to 4k blocks */
    614		bm_blocks = DIV_ROUND_UP_SECTOR_T(bm_blocks, 4096);
    615		offset = bitmap->mddev->bitmap_info.offset + (bitmap->cluster_slot * (bm_blocks << 3));
    616		pr_debug("%s:%d bm slot: %d offset: %llu\n", __func__, __LINE__,
    617			bitmap->cluster_slot, offset);
    618	}
    619
    620	if (bitmap->storage.file) {
    621		loff_t isize = i_size_read(bitmap->storage.file->f_mapping->host);
    622		int bytes = isize > PAGE_SIZE ? PAGE_SIZE : isize;
    623
    624		err = read_page(bitmap->storage.file, 0,
    625				bitmap, bytes, sb_page);
    626	} else {
    627		err = read_sb_page(bitmap->mddev,
    628				   offset,
    629				   sb_page,
    630				   0, sizeof(bitmap_super_t));
    631	}
    632	if (err)
    633		return err;
    634
    635	err = -EINVAL;
    636	sb = kmap_atomic(sb_page);
    637
    638	chunksize = le32_to_cpu(sb->chunksize);
    639	daemon_sleep = le32_to_cpu(sb->daemon_sleep) * HZ;
    640	write_behind = le32_to_cpu(sb->write_behind);
    641	sectors_reserved = le32_to_cpu(sb->sectors_reserved);
    642
    643	/* verify that the bitmap-specific fields are valid */
    644	if (sb->magic != cpu_to_le32(BITMAP_MAGIC))
    645		reason = "bad magic";
    646	else if (le32_to_cpu(sb->version) < BITMAP_MAJOR_LO ||
    647		 le32_to_cpu(sb->version) > BITMAP_MAJOR_CLUSTERED)
    648		reason = "unrecognized superblock version";
    649	else if (chunksize < 512)
    650		reason = "bitmap chunksize too small";
    651	else if (!is_power_of_2(chunksize))
    652		reason = "bitmap chunksize not a power of 2";
    653	else if (daemon_sleep < 1 || daemon_sleep > MAX_SCHEDULE_TIMEOUT)
    654		reason = "daemon sleep period out of range";
    655	else if (write_behind > COUNTER_MAX)
    656		reason = "write-behind limit out of range (0 - 16383)";
    657	if (reason) {
    658		pr_warn("%s: invalid bitmap file superblock: %s\n",
    659			bmname(bitmap), reason);
    660		goto out;
    661	}
    662
    663	/*
    664	 * Setup nodes/clustername only if bitmap version is
    665	 * cluster-compatible
    666	 */
    667	if (sb->version == cpu_to_le32(BITMAP_MAJOR_CLUSTERED)) {
    668		nodes = le32_to_cpu(sb->nodes);
    669		strscpy(bitmap->mddev->bitmap_info.cluster_name,
    670				sb->cluster_name, 64);
    671	}
    672
    673	/* keep the array size field of the bitmap superblock up to date */
    674	sb->sync_size = cpu_to_le64(bitmap->mddev->resync_max_sectors);
    675
    676	if (bitmap->mddev->persistent) {
    677		/*
    678		 * We have a persistent array superblock, so compare the
    679		 * bitmap's UUID and event counter to the mddev's
    680		 */
    681		if (memcmp(sb->uuid, bitmap->mddev->uuid, 16)) {
    682			pr_warn("%s: bitmap superblock UUID mismatch\n",
    683				bmname(bitmap));
    684			goto out;
    685		}
    686		events = le64_to_cpu(sb->events);
    687		if (!nodes && (events < bitmap->mddev->events)) {
    688			pr_warn("%s: bitmap file is out of date (%llu < %llu) -- forcing full recovery\n",
    689				bmname(bitmap), events,
    690				(unsigned long long) bitmap->mddev->events);
    691			set_bit(BITMAP_STALE, &bitmap->flags);
    692		}
    693	}
    694
    695	/* assign fields using values from superblock */
    696	bitmap->flags |= le32_to_cpu(sb->state);
    697	if (le32_to_cpu(sb->version) == BITMAP_MAJOR_HOSTENDIAN)
    698		set_bit(BITMAP_HOSTENDIAN, &bitmap->flags);
    699	bitmap->events_cleared = le64_to_cpu(sb->events_cleared);
    700	err = 0;
    701
    702out:
    703	kunmap_atomic(sb);
    704	if (err == 0 && nodes && (bitmap->cluster_slot < 0)) {
    705		/* Assigning chunksize is required for "re_read" */
    706		bitmap->mddev->bitmap_info.chunksize = chunksize;
    707		err = md_setup_cluster(bitmap->mddev, nodes);
    708		if (err) {
    709			pr_warn("%s: Could not setup cluster service (%d)\n",
    710				bmname(bitmap), err);
    711			goto out_no_sb;
    712		}
    713		bitmap->cluster_slot = md_cluster_ops->slot_number(bitmap->mddev);
    714		goto re_read;
    715	}
    716
    717out_no_sb:
    718	if (err == 0) {
    719		if (test_bit(BITMAP_STALE, &bitmap->flags))
    720			bitmap->events_cleared = bitmap->mddev->events;
    721		bitmap->mddev->bitmap_info.chunksize = chunksize;
    722		bitmap->mddev->bitmap_info.daemon_sleep = daemon_sleep;
    723		bitmap->mddev->bitmap_info.max_write_behind = write_behind;
    724		bitmap->mddev->bitmap_info.nodes = nodes;
    725		if (bitmap->mddev->bitmap_info.space == 0 ||
    726			bitmap->mddev->bitmap_info.space > sectors_reserved)
    727			bitmap->mddev->bitmap_info.space = sectors_reserved;
    728	} else {
    729		md_bitmap_print_sb(bitmap);
    730		if (bitmap->cluster_slot < 0)
    731			md_cluster_stop(bitmap->mddev);
    732	}
    733	return err;
    734}
    735
    736/*
    737 * general bitmap file operations
    738 */
    739
    740/*
    741 * on-disk bitmap:
    742 *
    743 * Use one bit per "chunk" (block set). We do the disk I/O on the bitmap
    744 * file a page at a time. There's a superblock at the start of the file.
    745 */
    746/* calculate the index of the page that contains this bit */
    747static inline unsigned long file_page_index(struct bitmap_storage *store,
    748					    unsigned long chunk)
    749{
    750	if (store->sb_page)
    751		chunk += sizeof(bitmap_super_t) << 3;
    752	return chunk >> PAGE_BIT_SHIFT;
    753}
    754
    755/* calculate the (bit) offset of this bit within a page */
    756static inline unsigned long file_page_offset(struct bitmap_storage *store,
    757					     unsigned long chunk)
    758{
    759	if (store->sb_page)
    760		chunk += sizeof(bitmap_super_t) << 3;
    761	return chunk & (PAGE_BITS - 1);
    762}
    763
    764/*
    765 * return a pointer to the page in the filemap that contains the given bit
    766 *
    767 */
    768static inline struct page *filemap_get_page(struct bitmap_storage *store,
    769					    unsigned long chunk)
    770{
    771	if (file_page_index(store, chunk) >= store->file_pages)
    772		return NULL;
    773	return store->filemap[file_page_index(store, chunk)];
    774}
    775
    776static int md_bitmap_storage_alloc(struct bitmap_storage *store,
    777				   unsigned long chunks, int with_super,
    778				   int slot_number)
    779{
    780	int pnum, offset = 0;
    781	unsigned long num_pages;
    782	unsigned long bytes;
    783
    784	bytes = DIV_ROUND_UP(chunks, 8);
    785	if (with_super)
    786		bytes += sizeof(bitmap_super_t);
    787
    788	num_pages = DIV_ROUND_UP(bytes, PAGE_SIZE);
    789	offset = slot_number * num_pages;
    790
    791	store->filemap = kmalloc_array(num_pages, sizeof(struct page *),
    792				       GFP_KERNEL);
    793	if (!store->filemap)
    794		return -ENOMEM;
    795
    796	if (with_super && !store->sb_page) {
    797		store->sb_page = alloc_page(GFP_KERNEL|__GFP_ZERO);
    798		if (store->sb_page == NULL)
    799			return -ENOMEM;
    800	}
    801
    802	pnum = 0;
    803	if (store->sb_page) {
    804		store->filemap[0] = store->sb_page;
    805		pnum = 1;
    806		store->sb_page->index = offset;
    807	}
    808
    809	for ( ; pnum < num_pages; pnum++) {
    810		store->filemap[pnum] = alloc_page(GFP_KERNEL|__GFP_ZERO);
    811		if (!store->filemap[pnum]) {
    812			store->file_pages = pnum;
    813			return -ENOMEM;
    814		}
    815		store->filemap[pnum]->index = pnum + offset;
    816	}
    817	store->file_pages = pnum;
    818
    819	/* We need 4 bits per page, rounded up to a multiple
    820	 * of sizeof(unsigned long) */
    821	store->filemap_attr = kzalloc(
    822		roundup(DIV_ROUND_UP(num_pages*4, 8), sizeof(unsigned long)),
    823		GFP_KERNEL);
    824	if (!store->filemap_attr)
    825		return -ENOMEM;
    826
    827	store->bytes = bytes;
    828
    829	return 0;
    830}
    831
    832static void md_bitmap_file_unmap(struct bitmap_storage *store)
    833{
    834	struct page **map, *sb_page;
    835	int pages;
    836	struct file *file;
    837
    838	file = store->file;
    839	map = store->filemap;
    840	pages = store->file_pages;
    841	sb_page = store->sb_page;
    842
    843	while (pages--)
    844		if (map[pages] != sb_page) /* 0 is sb_page, release it below */
    845			free_buffers(map[pages]);
    846	kfree(map);
    847	kfree(store->filemap_attr);
    848
    849	if (sb_page)
    850		free_buffers(sb_page);
    851
    852	if (file) {
    853		struct inode *inode = file_inode(file);
    854		invalidate_mapping_pages(inode->i_mapping, 0, -1);
    855		fput(file);
    856	}
    857}
    858
    859/*
    860 * bitmap_file_kick - if an error occurs while manipulating the bitmap file
    861 * then it is no longer reliable, so we stop using it and we mark the file
    862 * as failed in the superblock
    863 */
    864static void md_bitmap_file_kick(struct bitmap *bitmap)
    865{
    866	char *path, *ptr = NULL;
    867
    868	if (!test_and_set_bit(BITMAP_STALE, &bitmap->flags)) {
    869		md_bitmap_update_sb(bitmap);
    870
    871		if (bitmap->storage.file) {
    872			path = kmalloc(PAGE_SIZE, GFP_KERNEL);
    873			if (path)
    874				ptr = file_path(bitmap->storage.file,
    875					     path, PAGE_SIZE);
    876
    877			pr_warn("%s: kicking failed bitmap file %s from array!\n",
    878				bmname(bitmap), IS_ERR(ptr) ? "" : ptr);
    879
    880			kfree(path);
    881		} else
    882			pr_warn("%s: disabling internal bitmap due to errors\n",
    883				bmname(bitmap));
    884	}
    885}
    886
    887enum bitmap_page_attr {
    888	BITMAP_PAGE_DIRTY = 0,     /* there are set bits that need to be synced */
    889	BITMAP_PAGE_PENDING = 1,   /* there are bits that are being cleaned.
    890				    * i.e. counter is 1 or 2. */
    891	BITMAP_PAGE_NEEDWRITE = 2, /* there are cleared bits that need to be synced */
    892};
    893
    894static inline void set_page_attr(struct bitmap *bitmap, int pnum,
    895				 enum bitmap_page_attr attr)
    896{
    897	set_bit((pnum<<2) + attr, bitmap->storage.filemap_attr);
    898}
    899
    900static inline void clear_page_attr(struct bitmap *bitmap, int pnum,
    901				   enum bitmap_page_attr attr)
    902{
    903	clear_bit((pnum<<2) + attr, bitmap->storage.filemap_attr);
    904}
    905
    906static inline int test_page_attr(struct bitmap *bitmap, int pnum,
    907				 enum bitmap_page_attr attr)
    908{
    909	return test_bit((pnum<<2) + attr, bitmap->storage.filemap_attr);
    910}
    911
    912static inline int test_and_clear_page_attr(struct bitmap *bitmap, int pnum,
    913					   enum bitmap_page_attr attr)
    914{
    915	return test_and_clear_bit((pnum<<2) + attr,
    916				  bitmap->storage.filemap_attr);
    917}
    918/*
    919 * bitmap_file_set_bit -- called before performing a write to the md device
    920 * to set (and eventually sync) a particular bit in the bitmap file
    921 *
    922 * we set the bit immediately, then we record the page number so that
    923 * when an unplug occurs, we can flush the dirty pages out to disk
    924 */
    925static void md_bitmap_file_set_bit(struct bitmap *bitmap, sector_t block)
    926{
    927	unsigned long bit;
    928	struct page *page;
    929	void *kaddr;
    930	unsigned long chunk = block >> bitmap->counts.chunkshift;
    931	struct bitmap_storage *store = &bitmap->storage;
    932	unsigned long node_offset = 0;
    933
    934	if (mddev_is_clustered(bitmap->mddev))
    935		node_offset = bitmap->cluster_slot * store->file_pages;
    936
    937	page = filemap_get_page(&bitmap->storage, chunk);
    938	if (!page)
    939		return;
    940	bit = file_page_offset(&bitmap->storage, chunk);
    941
    942	/* set the bit */
    943	kaddr = kmap_atomic(page);
    944	if (test_bit(BITMAP_HOSTENDIAN, &bitmap->flags))
    945		set_bit(bit, kaddr);
    946	else
    947		set_bit_le(bit, kaddr);
    948	kunmap_atomic(kaddr);
    949	pr_debug("set file bit %lu page %lu\n", bit, page->index);
    950	/* record page number so it gets flushed to disk when unplug occurs */
    951	set_page_attr(bitmap, page->index - node_offset, BITMAP_PAGE_DIRTY);
    952}
    953
    954static void md_bitmap_file_clear_bit(struct bitmap *bitmap, sector_t block)
    955{
    956	unsigned long bit;
    957	struct page *page;
    958	void *paddr;
    959	unsigned long chunk = block >> bitmap->counts.chunkshift;
    960	struct bitmap_storage *store = &bitmap->storage;
    961	unsigned long node_offset = 0;
    962
    963	if (mddev_is_clustered(bitmap->mddev))
    964		node_offset = bitmap->cluster_slot * store->file_pages;
    965
    966	page = filemap_get_page(&bitmap->storage, chunk);
    967	if (!page)
    968		return;
    969	bit = file_page_offset(&bitmap->storage, chunk);
    970	paddr = kmap_atomic(page);
    971	if (test_bit(BITMAP_HOSTENDIAN, &bitmap->flags))
    972		clear_bit(bit, paddr);
    973	else
    974		clear_bit_le(bit, paddr);
    975	kunmap_atomic(paddr);
    976	if (!test_page_attr(bitmap, page->index - node_offset, BITMAP_PAGE_NEEDWRITE)) {
    977		set_page_attr(bitmap, page->index - node_offset, BITMAP_PAGE_PENDING);
    978		bitmap->allclean = 0;
    979	}
    980}
    981
    982static int md_bitmap_file_test_bit(struct bitmap *bitmap, sector_t block)
    983{
    984	unsigned long bit;
    985	struct page *page;
    986	void *paddr;
    987	unsigned long chunk = block >> bitmap->counts.chunkshift;
    988	int set = 0;
    989
    990	page = filemap_get_page(&bitmap->storage, chunk);
    991	if (!page)
    992		return -EINVAL;
    993	bit = file_page_offset(&bitmap->storage, chunk);
    994	paddr = kmap_atomic(page);
    995	if (test_bit(BITMAP_HOSTENDIAN, &bitmap->flags))
    996		set = test_bit(bit, paddr);
    997	else
    998		set = test_bit_le(bit, paddr);
    999	kunmap_atomic(paddr);
   1000	return set;
   1001}
   1002
   1003
   1004/* this gets called when the md device is ready to unplug its underlying
   1005 * (slave) device queues -- before we let any writes go down, we need to
   1006 * sync the dirty pages of the bitmap file to disk */
   1007void md_bitmap_unplug(struct bitmap *bitmap)
   1008{
   1009	unsigned long i;
   1010	int dirty, need_write;
   1011	int writing = 0;
   1012
   1013	if (!bitmap || !bitmap->storage.filemap ||
   1014	    test_bit(BITMAP_STALE, &bitmap->flags))
   1015		return;
   1016
   1017	/* look at each page to see if there are any set bits that need to be
   1018	 * flushed out to disk */
   1019	for (i = 0; i < bitmap->storage.file_pages; i++) {
   1020		dirty = test_and_clear_page_attr(bitmap, i, BITMAP_PAGE_DIRTY);
   1021		need_write = test_and_clear_page_attr(bitmap, i,
   1022						      BITMAP_PAGE_NEEDWRITE);
   1023		if (dirty || need_write) {
   1024			if (!writing) {
   1025				md_bitmap_wait_writes(bitmap);
   1026				if (bitmap->mddev->queue)
   1027					blk_add_trace_msg(bitmap->mddev->queue,
   1028							  "md bitmap_unplug");
   1029			}
   1030			clear_page_attr(bitmap, i, BITMAP_PAGE_PENDING);
   1031			write_page(bitmap, bitmap->storage.filemap[i], 0);
   1032			writing = 1;
   1033		}
   1034	}
   1035	if (writing)
   1036		md_bitmap_wait_writes(bitmap);
   1037
   1038	if (test_bit(BITMAP_WRITE_ERROR, &bitmap->flags))
   1039		md_bitmap_file_kick(bitmap);
   1040}
   1041EXPORT_SYMBOL(md_bitmap_unplug);
   1042
   1043static void md_bitmap_set_memory_bits(struct bitmap *bitmap, sector_t offset, int needed);
   1044/* * bitmap_init_from_disk -- called at bitmap_create time to initialize
   1045 * the in-memory bitmap from the on-disk bitmap -- also, sets up the
   1046 * memory mapping of the bitmap file
   1047 * Special cases:
   1048 *   if there's no bitmap file, or if the bitmap file had been
   1049 *   previously kicked from the array, we mark all the bits as
   1050 *   1's in order to cause a full resync.
   1051 *
   1052 * We ignore all bits for sectors that end earlier than 'start'.
   1053 * This is used when reading an out-of-date bitmap...
   1054 */
   1055static int md_bitmap_init_from_disk(struct bitmap *bitmap, sector_t start)
   1056{
   1057	unsigned long i, chunks, index, oldindex, bit, node_offset = 0;
   1058	struct page *page = NULL;
   1059	unsigned long bit_cnt = 0;
   1060	struct file *file;
   1061	unsigned long offset;
   1062	int outofdate;
   1063	int ret = -ENOSPC;
   1064	void *paddr;
   1065	struct bitmap_storage *store = &bitmap->storage;
   1066
   1067	chunks = bitmap->counts.chunks;
   1068	file = store->file;
   1069
   1070	if (!file && !bitmap->mddev->bitmap_info.offset) {
   1071		/* No permanent bitmap - fill with '1s'. */
   1072		store->filemap = NULL;
   1073		store->file_pages = 0;
   1074		for (i = 0; i < chunks ; i++) {
   1075			/* if the disk bit is set, set the memory bit */
   1076			int needed = ((sector_t)(i+1) << (bitmap->counts.chunkshift)
   1077				      >= start);
   1078			md_bitmap_set_memory_bits(bitmap,
   1079						  (sector_t)i << bitmap->counts.chunkshift,
   1080						  needed);
   1081		}
   1082		return 0;
   1083	}
   1084
   1085	outofdate = test_bit(BITMAP_STALE, &bitmap->flags);
   1086	if (outofdate)
   1087		pr_warn("%s: bitmap file is out of date, doing full recovery\n", bmname(bitmap));
   1088
   1089	if (file && i_size_read(file->f_mapping->host) < store->bytes) {
   1090		pr_warn("%s: bitmap file too short %lu < %lu\n",
   1091			bmname(bitmap),
   1092			(unsigned long) i_size_read(file->f_mapping->host),
   1093			store->bytes);
   1094		goto err;
   1095	}
   1096
   1097	oldindex = ~0L;
   1098	offset = 0;
   1099	if (!bitmap->mddev->bitmap_info.external)
   1100		offset = sizeof(bitmap_super_t);
   1101
   1102	if (mddev_is_clustered(bitmap->mddev))
   1103		node_offset = bitmap->cluster_slot * (DIV_ROUND_UP(store->bytes, PAGE_SIZE));
   1104
   1105	for (i = 0; i < chunks; i++) {
   1106		int b;
   1107		index = file_page_index(&bitmap->storage, i);
   1108		bit = file_page_offset(&bitmap->storage, i);
   1109		if (index != oldindex) { /* this is a new page, read it in */
   1110			int count;
   1111			/* unmap the old page, we're done with it */
   1112			if (index == store->file_pages-1)
   1113				count = store->bytes - index * PAGE_SIZE;
   1114			else
   1115				count = PAGE_SIZE;
   1116			page = store->filemap[index];
   1117			if (file)
   1118				ret = read_page(file, index, bitmap,
   1119						count, page);
   1120			else
   1121				ret = read_sb_page(
   1122					bitmap->mddev,
   1123					bitmap->mddev->bitmap_info.offset,
   1124					page,
   1125					index + node_offset, count);
   1126
   1127			if (ret)
   1128				goto err;
   1129
   1130			oldindex = index;
   1131
   1132			if (outofdate) {
   1133				/*
   1134				 * if bitmap is out of date, dirty the
   1135				 * whole page and write it out
   1136				 */
   1137				paddr = kmap_atomic(page);
   1138				memset(paddr + offset, 0xff,
   1139				       PAGE_SIZE - offset);
   1140				kunmap_atomic(paddr);
   1141				write_page(bitmap, page, 1);
   1142
   1143				ret = -EIO;
   1144				if (test_bit(BITMAP_WRITE_ERROR,
   1145					     &bitmap->flags))
   1146					goto err;
   1147			}
   1148		}
   1149		paddr = kmap_atomic(page);
   1150		if (test_bit(BITMAP_HOSTENDIAN, &bitmap->flags))
   1151			b = test_bit(bit, paddr);
   1152		else
   1153			b = test_bit_le(bit, paddr);
   1154		kunmap_atomic(paddr);
   1155		if (b) {
   1156			/* if the disk bit is set, set the memory bit */
   1157			int needed = ((sector_t)(i+1) << bitmap->counts.chunkshift
   1158				      >= start);
   1159			md_bitmap_set_memory_bits(bitmap,
   1160						  (sector_t)i << bitmap->counts.chunkshift,
   1161						  needed);
   1162			bit_cnt++;
   1163		}
   1164		offset = 0;
   1165	}
   1166
   1167	pr_debug("%s: bitmap initialized from disk: read %lu pages, set %lu of %lu bits\n",
   1168		 bmname(bitmap), store->file_pages,
   1169		 bit_cnt, chunks);
   1170
   1171	return 0;
   1172
   1173 err:
   1174	pr_warn("%s: bitmap initialisation failed: %d\n",
   1175		bmname(bitmap), ret);
   1176	return ret;
   1177}
   1178
   1179void md_bitmap_write_all(struct bitmap *bitmap)
   1180{
   1181	/* We don't actually write all bitmap blocks here,
   1182	 * just flag them as needing to be written
   1183	 */
   1184	int i;
   1185
   1186	if (!bitmap || !bitmap->storage.filemap)
   1187		return;
   1188	if (bitmap->storage.file)
   1189		/* Only one copy, so nothing needed */
   1190		return;
   1191
   1192	for (i = 0; i < bitmap->storage.file_pages; i++)
   1193		set_page_attr(bitmap, i,
   1194			      BITMAP_PAGE_NEEDWRITE);
   1195	bitmap->allclean = 0;
   1196}
   1197
   1198static void md_bitmap_count_page(struct bitmap_counts *bitmap,
   1199				 sector_t offset, int inc)
   1200{
   1201	sector_t chunk = offset >> bitmap->chunkshift;
   1202	unsigned long page = chunk >> PAGE_COUNTER_SHIFT;
   1203	bitmap->bp[page].count += inc;
   1204	md_bitmap_checkfree(bitmap, page);
   1205}
   1206
   1207static void md_bitmap_set_pending(struct bitmap_counts *bitmap, sector_t offset)
   1208{
   1209	sector_t chunk = offset >> bitmap->chunkshift;
   1210	unsigned long page = chunk >> PAGE_COUNTER_SHIFT;
   1211	struct bitmap_page *bp = &bitmap->bp[page];
   1212
   1213	if (!bp->pending)
   1214		bp->pending = 1;
   1215}
   1216
   1217static bitmap_counter_t *md_bitmap_get_counter(struct bitmap_counts *bitmap,
   1218					       sector_t offset, sector_t *blocks,
   1219					       int create);
   1220
   1221/*
   1222 * bitmap daemon -- periodically wakes up to clean bits and flush pages
   1223 *			out to disk
   1224 */
   1225
   1226void md_bitmap_daemon_work(struct mddev *mddev)
   1227{
   1228	struct bitmap *bitmap;
   1229	unsigned long j;
   1230	unsigned long nextpage;
   1231	sector_t blocks;
   1232	struct bitmap_counts *counts;
   1233
   1234	/* Use a mutex to guard daemon_work against
   1235	 * bitmap_destroy.
   1236	 */
   1237	mutex_lock(&mddev->bitmap_info.mutex);
   1238	bitmap = mddev->bitmap;
   1239	if (bitmap == NULL) {
   1240		mutex_unlock(&mddev->bitmap_info.mutex);
   1241		return;
   1242	}
   1243	if (time_before(jiffies, bitmap->daemon_lastrun
   1244			+ mddev->bitmap_info.daemon_sleep))
   1245		goto done;
   1246
   1247	bitmap->daemon_lastrun = jiffies;
   1248	if (bitmap->allclean) {
   1249		mddev->thread->timeout = MAX_SCHEDULE_TIMEOUT;
   1250		goto done;
   1251	}
   1252	bitmap->allclean = 1;
   1253
   1254	if (bitmap->mddev->queue)
   1255		blk_add_trace_msg(bitmap->mddev->queue,
   1256				  "md bitmap_daemon_work");
   1257
   1258	/* Any file-page which is PENDING now needs to be written.
   1259	 * So set NEEDWRITE now, then after we make any last-minute changes
   1260	 * we will write it.
   1261	 */
   1262	for (j = 0; j < bitmap->storage.file_pages; j++)
   1263		if (test_and_clear_page_attr(bitmap, j,
   1264					     BITMAP_PAGE_PENDING))
   1265			set_page_attr(bitmap, j,
   1266				      BITMAP_PAGE_NEEDWRITE);
   1267
   1268	if (bitmap->need_sync &&
   1269	    mddev->bitmap_info.external == 0) {
   1270		/* Arrange for superblock update as well as
   1271		 * other changes */
   1272		bitmap_super_t *sb;
   1273		bitmap->need_sync = 0;
   1274		if (bitmap->storage.filemap) {
   1275			sb = kmap_atomic(bitmap->storage.sb_page);
   1276			sb->events_cleared =
   1277				cpu_to_le64(bitmap->events_cleared);
   1278			kunmap_atomic(sb);
   1279			set_page_attr(bitmap, 0,
   1280				      BITMAP_PAGE_NEEDWRITE);
   1281		}
   1282	}
   1283	/* Now look at the bitmap counters and if any are '2' or '1',
   1284	 * decrement and handle accordingly.
   1285	 */
   1286	counts = &bitmap->counts;
   1287	spin_lock_irq(&counts->lock);
   1288	nextpage = 0;
   1289	for (j = 0; j < counts->chunks; j++) {
   1290		bitmap_counter_t *bmc;
   1291		sector_t  block = (sector_t)j << counts->chunkshift;
   1292
   1293		if (j == nextpage) {
   1294			nextpage += PAGE_COUNTER_RATIO;
   1295			if (!counts->bp[j >> PAGE_COUNTER_SHIFT].pending) {
   1296				j |= PAGE_COUNTER_MASK;
   1297				continue;
   1298			}
   1299			counts->bp[j >> PAGE_COUNTER_SHIFT].pending = 0;
   1300		}
   1301
   1302		bmc = md_bitmap_get_counter(counts, block, &blocks, 0);
   1303		if (!bmc) {
   1304			j |= PAGE_COUNTER_MASK;
   1305			continue;
   1306		}
   1307		if (*bmc == 1 && !bitmap->need_sync) {
   1308			/* We can clear the bit */
   1309			*bmc = 0;
   1310			md_bitmap_count_page(counts, block, -1);
   1311			md_bitmap_file_clear_bit(bitmap, block);
   1312		} else if (*bmc && *bmc <= 2) {
   1313			*bmc = 1;
   1314			md_bitmap_set_pending(counts, block);
   1315			bitmap->allclean = 0;
   1316		}
   1317	}
   1318	spin_unlock_irq(&counts->lock);
   1319
   1320	md_bitmap_wait_writes(bitmap);
   1321	/* Now start writeout on any page in NEEDWRITE that isn't DIRTY.
   1322	 * DIRTY pages need to be written by bitmap_unplug so it can wait
   1323	 * for them.
   1324	 * If we find any DIRTY page we stop there and let bitmap_unplug
   1325	 * handle all the rest.  This is important in the case where
   1326	 * the first blocking holds the superblock and it has been updated.
   1327	 * We mustn't write any other blocks before the superblock.
   1328	 */
   1329	for (j = 0;
   1330	     j < bitmap->storage.file_pages
   1331		     && !test_bit(BITMAP_STALE, &bitmap->flags);
   1332	     j++) {
   1333		if (test_page_attr(bitmap, j,
   1334				   BITMAP_PAGE_DIRTY))
   1335			/* bitmap_unplug will handle the rest */
   1336			break;
   1337		if (bitmap->storage.filemap &&
   1338		    test_and_clear_page_attr(bitmap, j,
   1339					     BITMAP_PAGE_NEEDWRITE)) {
   1340			write_page(bitmap, bitmap->storage.filemap[j], 0);
   1341		}
   1342	}
   1343
   1344 done:
   1345	if (bitmap->allclean == 0)
   1346		mddev->thread->timeout =
   1347			mddev->bitmap_info.daemon_sleep;
   1348	mutex_unlock(&mddev->bitmap_info.mutex);
   1349}
   1350
   1351static bitmap_counter_t *md_bitmap_get_counter(struct bitmap_counts *bitmap,
   1352					       sector_t offset, sector_t *blocks,
   1353					       int create)
   1354__releases(bitmap->lock)
   1355__acquires(bitmap->lock)
   1356{
   1357	/* If 'create', we might release the lock and reclaim it.
   1358	 * The lock must have been taken with interrupts enabled.
   1359	 * If !create, we don't release the lock.
   1360	 */
   1361	sector_t chunk = offset >> bitmap->chunkshift;
   1362	unsigned long page = chunk >> PAGE_COUNTER_SHIFT;
   1363	unsigned long pageoff = (chunk & PAGE_COUNTER_MASK) << COUNTER_BYTE_SHIFT;
   1364	sector_t csize;
   1365	int err;
   1366
   1367	err = md_bitmap_checkpage(bitmap, page, create, 0);
   1368
   1369	if (bitmap->bp[page].hijacked ||
   1370	    bitmap->bp[page].map == NULL)
   1371		csize = ((sector_t)1) << (bitmap->chunkshift +
   1372					  PAGE_COUNTER_SHIFT);
   1373	else
   1374		csize = ((sector_t)1) << bitmap->chunkshift;
   1375	*blocks = csize - (offset & (csize - 1));
   1376
   1377	if (err < 0)
   1378		return NULL;
   1379
   1380	/* now locked ... */
   1381
   1382	if (bitmap->bp[page].hijacked) { /* hijacked pointer */
   1383		/* should we use the first or second counter field
   1384		 * of the hijacked pointer? */
   1385		int hi = (pageoff > PAGE_COUNTER_MASK);
   1386		return  &((bitmap_counter_t *)
   1387			  &bitmap->bp[page].map)[hi];
   1388	} else /* page is allocated */
   1389		return (bitmap_counter_t *)
   1390			&(bitmap->bp[page].map[pageoff]);
   1391}
   1392
   1393int md_bitmap_startwrite(struct bitmap *bitmap, sector_t offset, unsigned long sectors, int behind)
   1394{
   1395	if (!bitmap)
   1396		return 0;
   1397
   1398	if (behind) {
   1399		int bw;
   1400		atomic_inc(&bitmap->behind_writes);
   1401		bw = atomic_read(&bitmap->behind_writes);
   1402		if (bw > bitmap->behind_writes_used)
   1403			bitmap->behind_writes_used = bw;
   1404
   1405		pr_debug("inc write-behind count %d/%lu\n",
   1406			 bw, bitmap->mddev->bitmap_info.max_write_behind);
   1407	}
   1408
   1409	while (sectors) {
   1410		sector_t blocks;
   1411		bitmap_counter_t *bmc;
   1412
   1413		spin_lock_irq(&bitmap->counts.lock);
   1414		bmc = md_bitmap_get_counter(&bitmap->counts, offset, &blocks, 1);
   1415		if (!bmc) {
   1416			spin_unlock_irq(&bitmap->counts.lock);
   1417			return 0;
   1418		}
   1419
   1420		if (unlikely(COUNTER(*bmc) == COUNTER_MAX)) {
   1421			DEFINE_WAIT(__wait);
   1422			/* note that it is safe to do the prepare_to_wait
   1423			 * after the test as long as we do it before dropping
   1424			 * the spinlock.
   1425			 */
   1426			prepare_to_wait(&bitmap->overflow_wait, &__wait,
   1427					TASK_UNINTERRUPTIBLE);
   1428			spin_unlock_irq(&bitmap->counts.lock);
   1429			schedule();
   1430			finish_wait(&bitmap->overflow_wait, &__wait);
   1431			continue;
   1432		}
   1433
   1434		switch (*bmc) {
   1435		case 0:
   1436			md_bitmap_file_set_bit(bitmap, offset);
   1437			md_bitmap_count_page(&bitmap->counts, offset, 1);
   1438			fallthrough;
   1439		case 1:
   1440			*bmc = 2;
   1441		}
   1442
   1443		(*bmc)++;
   1444
   1445		spin_unlock_irq(&bitmap->counts.lock);
   1446
   1447		offset += blocks;
   1448		if (sectors > blocks)
   1449			sectors -= blocks;
   1450		else
   1451			sectors = 0;
   1452	}
   1453	return 0;
   1454}
   1455EXPORT_SYMBOL(md_bitmap_startwrite);
   1456
   1457void md_bitmap_endwrite(struct bitmap *bitmap, sector_t offset,
   1458			unsigned long sectors, int success, int behind)
   1459{
   1460	if (!bitmap)
   1461		return;
   1462	if (behind) {
   1463		if (atomic_dec_and_test(&bitmap->behind_writes))
   1464			wake_up(&bitmap->behind_wait);
   1465		pr_debug("dec write-behind count %d/%lu\n",
   1466			 atomic_read(&bitmap->behind_writes),
   1467			 bitmap->mddev->bitmap_info.max_write_behind);
   1468	}
   1469
   1470	while (sectors) {
   1471		sector_t blocks;
   1472		unsigned long flags;
   1473		bitmap_counter_t *bmc;
   1474
   1475		spin_lock_irqsave(&bitmap->counts.lock, flags);
   1476		bmc = md_bitmap_get_counter(&bitmap->counts, offset, &blocks, 0);
   1477		if (!bmc) {
   1478			spin_unlock_irqrestore(&bitmap->counts.lock, flags);
   1479			return;
   1480		}
   1481
   1482		if (success && !bitmap->mddev->degraded &&
   1483		    bitmap->events_cleared < bitmap->mddev->events) {
   1484			bitmap->events_cleared = bitmap->mddev->events;
   1485			bitmap->need_sync = 1;
   1486			sysfs_notify_dirent_safe(bitmap->sysfs_can_clear);
   1487		}
   1488
   1489		if (!success && !NEEDED(*bmc))
   1490			*bmc |= NEEDED_MASK;
   1491
   1492		if (COUNTER(*bmc) == COUNTER_MAX)
   1493			wake_up(&bitmap->overflow_wait);
   1494
   1495		(*bmc)--;
   1496		if (*bmc <= 2) {
   1497			md_bitmap_set_pending(&bitmap->counts, offset);
   1498			bitmap->allclean = 0;
   1499		}
   1500		spin_unlock_irqrestore(&bitmap->counts.lock, flags);
   1501		offset += blocks;
   1502		if (sectors > blocks)
   1503			sectors -= blocks;
   1504		else
   1505			sectors = 0;
   1506	}
   1507}
   1508EXPORT_SYMBOL(md_bitmap_endwrite);
   1509
   1510static int __bitmap_start_sync(struct bitmap *bitmap, sector_t offset, sector_t *blocks,
   1511			       int degraded)
   1512{
   1513	bitmap_counter_t *bmc;
   1514	int rv;
   1515	if (bitmap == NULL) {/* FIXME or bitmap set as 'failed' */
   1516		*blocks = 1024;
   1517		return 1; /* always resync if no bitmap */
   1518	}
   1519	spin_lock_irq(&bitmap->counts.lock);
   1520	bmc = md_bitmap_get_counter(&bitmap->counts, offset, blocks, 0);
   1521	rv = 0;
   1522	if (bmc) {
   1523		/* locked */
   1524		if (RESYNC(*bmc))
   1525			rv = 1;
   1526		else if (NEEDED(*bmc)) {
   1527			rv = 1;
   1528			if (!degraded) { /* don't set/clear bits if degraded */
   1529				*bmc |= RESYNC_MASK;
   1530				*bmc &= ~NEEDED_MASK;
   1531			}
   1532		}
   1533	}
   1534	spin_unlock_irq(&bitmap->counts.lock);
   1535	return rv;
   1536}
   1537
   1538int md_bitmap_start_sync(struct bitmap *bitmap, sector_t offset, sector_t *blocks,
   1539			 int degraded)
   1540{
   1541	/* bitmap_start_sync must always report on multiples of whole
   1542	 * pages, otherwise resync (which is very PAGE_SIZE based) will
   1543	 * get confused.
   1544	 * So call __bitmap_start_sync repeatedly (if needed) until
   1545	 * At least PAGE_SIZE>>9 blocks are covered.
   1546	 * Return the 'or' of the result.
   1547	 */
   1548	int rv = 0;
   1549	sector_t blocks1;
   1550
   1551	*blocks = 0;
   1552	while (*blocks < (PAGE_SIZE>>9)) {
   1553		rv |= __bitmap_start_sync(bitmap, offset,
   1554					  &blocks1, degraded);
   1555		offset += blocks1;
   1556		*blocks += blocks1;
   1557	}
   1558	return rv;
   1559}
   1560EXPORT_SYMBOL(md_bitmap_start_sync);
   1561
   1562void md_bitmap_end_sync(struct bitmap *bitmap, sector_t offset, sector_t *blocks, int aborted)
   1563{
   1564	bitmap_counter_t *bmc;
   1565	unsigned long flags;
   1566
   1567	if (bitmap == NULL) {
   1568		*blocks = 1024;
   1569		return;
   1570	}
   1571	spin_lock_irqsave(&bitmap->counts.lock, flags);
   1572	bmc = md_bitmap_get_counter(&bitmap->counts, offset, blocks, 0);
   1573	if (bmc == NULL)
   1574		goto unlock;
   1575	/* locked */
   1576	if (RESYNC(*bmc)) {
   1577		*bmc &= ~RESYNC_MASK;
   1578
   1579		if (!NEEDED(*bmc) && aborted)
   1580			*bmc |= NEEDED_MASK;
   1581		else {
   1582			if (*bmc <= 2) {
   1583				md_bitmap_set_pending(&bitmap->counts, offset);
   1584				bitmap->allclean = 0;
   1585			}
   1586		}
   1587	}
   1588 unlock:
   1589	spin_unlock_irqrestore(&bitmap->counts.lock, flags);
   1590}
   1591EXPORT_SYMBOL(md_bitmap_end_sync);
   1592
   1593void md_bitmap_close_sync(struct bitmap *bitmap)
   1594{
   1595	/* Sync has finished, and any bitmap chunks that weren't synced
   1596	 * properly have been aborted.  It remains to us to clear the
   1597	 * RESYNC bit wherever it is still on
   1598	 */
   1599	sector_t sector = 0;
   1600	sector_t blocks;
   1601	if (!bitmap)
   1602		return;
   1603	while (sector < bitmap->mddev->resync_max_sectors) {
   1604		md_bitmap_end_sync(bitmap, sector, &blocks, 0);
   1605		sector += blocks;
   1606	}
   1607}
   1608EXPORT_SYMBOL(md_bitmap_close_sync);
   1609
   1610void md_bitmap_cond_end_sync(struct bitmap *bitmap, sector_t sector, bool force)
   1611{
   1612	sector_t s = 0;
   1613	sector_t blocks;
   1614
   1615	if (!bitmap)
   1616		return;
   1617	if (sector == 0) {
   1618		bitmap->last_end_sync = jiffies;
   1619		return;
   1620	}
   1621	if (!force && time_before(jiffies, (bitmap->last_end_sync
   1622				  + bitmap->mddev->bitmap_info.daemon_sleep)))
   1623		return;
   1624	wait_event(bitmap->mddev->recovery_wait,
   1625		   atomic_read(&bitmap->mddev->recovery_active) == 0);
   1626
   1627	bitmap->mddev->curr_resync_completed = sector;
   1628	set_bit(MD_SB_CHANGE_CLEAN, &bitmap->mddev->sb_flags);
   1629	sector &= ~((1ULL << bitmap->counts.chunkshift) - 1);
   1630	s = 0;
   1631	while (s < sector && s < bitmap->mddev->resync_max_sectors) {
   1632		md_bitmap_end_sync(bitmap, s, &blocks, 0);
   1633		s += blocks;
   1634	}
   1635	bitmap->last_end_sync = jiffies;
   1636	sysfs_notify_dirent_safe(bitmap->mddev->sysfs_completed);
   1637}
   1638EXPORT_SYMBOL(md_bitmap_cond_end_sync);
   1639
   1640void md_bitmap_sync_with_cluster(struct mddev *mddev,
   1641			      sector_t old_lo, sector_t old_hi,
   1642			      sector_t new_lo, sector_t new_hi)
   1643{
   1644	struct bitmap *bitmap = mddev->bitmap;
   1645	sector_t sector, blocks = 0;
   1646
   1647	for (sector = old_lo; sector < new_lo; ) {
   1648		md_bitmap_end_sync(bitmap, sector, &blocks, 0);
   1649		sector += blocks;
   1650	}
   1651	WARN((blocks > new_lo) && old_lo, "alignment is not correct for lo\n");
   1652
   1653	for (sector = old_hi; sector < new_hi; ) {
   1654		md_bitmap_start_sync(bitmap, sector, &blocks, 0);
   1655		sector += blocks;
   1656	}
   1657	WARN((blocks > new_hi) && old_hi, "alignment is not correct for hi\n");
   1658}
   1659EXPORT_SYMBOL(md_bitmap_sync_with_cluster);
   1660
   1661static void md_bitmap_set_memory_bits(struct bitmap *bitmap, sector_t offset, int needed)
   1662{
   1663	/* For each chunk covered by any of these sectors, set the
   1664	 * counter to 2 and possibly set resync_needed.  They should all
   1665	 * be 0 at this point
   1666	 */
   1667
   1668	sector_t secs;
   1669	bitmap_counter_t *bmc;
   1670	spin_lock_irq(&bitmap->counts.lock);
   1671	bmc = md_bitmap_get_counter(&bitmap->counts, offset, &secs, 1);
   1672	if (!bmc) {
   1673		spin_unlock_irq(&bitmap->counts.lock);
   1674		return;
   1675	}
   1676	if (!*bmc) {
   1677		*bmc = 2;
   1678		md_bitmap_count_page(&bitmap->counts, offset, 1);
   1679		md_bitmap_set_pending(&bitmap->counts, offset);
   1680		bitmap->allclean = 0;
   1681	}
   1682	if (needed)
   1683		*bmc |= NEEDED_MASK;
   1684	spin_unlock_irq(&bitmap->counts.lock);
   1685}
   1686
   1687/* dirty the memory and file bits for bitmap chunks "s" to "e" */
   1688void md_bitmap_dirty_bits(struct bitmap *bitmap, unsigned long s, unsigned long e)
   1689{
   1690	unsigned long chunk;
   1691
   1692	for (chunk = s; chunk <= e; chunk++) {
   1693		sector_t sec = (sector_t)chunk << bitmap->counts.chunkshift;
   1694		md_bitmap_set_memory_bits(bitmap, sec, 1);
   1695		md_bitmap_file_set_bit(bitmap, sec);
   1696		if (sec < bitmap->mddev->recovery_cp)
   1697			/* We are asserting that the array is dirty,
   1698			 * so move the recovery_cp address back so
   1699			 * that it is obvious that it is dirty
   1700			 */
   1701			bitmap->mddev->recovery_cp = sec;
   1702	}
   1703}
   1704
   1705/*
   1706 * flush out any pending updates
   1707 */
   1708void md_bitmap_flush(struct mddev *mddev)
   1709{
   1710	struct bitmap *bitmap = mddev->bitmap;
   1711	long sleep;
   1712
   1713	if (!bitmap) /* there was no bitmap */
   1714		return;
   1715
   1716	/* run the daemon_work three time to ensure everything is flushed
   1717	 * that can be
   1718	 */
   1719	sleep = mddev->bitmap_info.daemon_sleep * 2;
   1720	bitmap->daemon_lastrun -= sleep;
   1721	md_bitmap_daemon_work(mddev);
   1722	bitmap->daemon_lastrun -= sleep;
   1723	md_bitmap_daemon_work(mddev);
   1724	bitmap->daemon_lastrun -= sleep;
   1725	md_bitmap_daemon_work(mddev);
   1726	if (mddev->bitmap_info.external)
   1727		md_super_wait(mddev);
   1728	md_bitmap_update_sb(bitmap);
   1729}
   1730
   1731/*
   1732 * free memory that was allocated
   1733 */
   1734void md_bitmap_free(struct bitmap *bitmap)
   1735{
   1736	unsigned long k, pages;
   1737	struct bitmap_page *bp;
   1738
   1739	if (!bitmap) /* there was no bitmap */
   1740		return;
   1741
   1742	if (bitmap->sysfs_can_clear)
   1743		sysfs_put(bitmap->sysfs_can_clear);
   1744
   1745	if (mddev_is_clustered(bitmap->mddev) && bitmap->mddev->cluster_info &&
   1746		bitmap->cluster_slot == md_cluster_ops->slot_number(bitmap->mddev))
   1747		md_cluster_stop(bitmap->mddev);
   1748
   1749	/* Shouldn't be needed - but just in case.... */
   1750	wait_event(bitmap->write_wait,
   1751		   atomic_read(&bitmap->pending_writes) == 0);
   1752
   1753	/* release the bitmap file  */
   1754	md_bitmap_file_unmap(&bitmap->storage);
   1755
   1756	bp = bitmap->counts.bp;
   1757	pages = bitmap->counts.pages;
   1758
   1759	/* free all allocated memory */
   1760
   1761	if (bp) /* deallocate the page memory */
   1762		for (k = 0; k < pages; k++)
   1763			if (bp[k].map && !bp[k].hijacked)
   1764				kfree(bp[k].map);
   1765	kfree(bp);
   1766	kfree(bitmap);
   1767}
   1768EXPORT_SYMBOL(md_bitmap_free);
   1769
   1770void md_bitmap_wait_behind_writes(struct mddev *mddev)
   1771{
   1772	struct bitmap *bitmap = mddev->bitmap;
   1773
   1774	/* wait for behind writes to complete */
   1775	if (bitmap && atomic_read(&bitmap->behind_writes) > 0) {
   1776		pr_debug("md:%s: behind writes in progress - waiting to stop.\n",
   1777			 mdname(mddev));
   1778		/* need to kick something here to make sure I/O goes? */
   1779		wait_event(bitmap->behind_wait,
   1780			   atomic_read(&bitmap->behind_writes) == 0);
   1781	}
   1782}
   1783
   1784void md_bitmap_destroy(struct mddev *mddev)
   1785{
   1786	struct bitmap *bitmap = mddev->bitmap;
   1787
   1788	if (!bitmap) /* there was no bitmap */
   1789		return;
   1790
   1791	md_bitmap_wait_behind_writes(mddev);
   1792	if (!mddev->serialize_policy)
   1793		mddev_destroy_serial_pool(mddev, NULL, true);
   1794
   1795	mutex_lock(&mddev->bitmap_info.mutex);
   1796	spin_lock(&mddev->lock);
   1797	mddev->bitmap = NULL; /* disconnect from the md device */
   1798	spin_unlock(&mddev->lock);
   1799	mutex_unlock(&mddev->bitmap_info.mutex);
   1800	if (mddev->thread)
   1801		mddev->thread->timeout = MAX_SCHEDULE_TIMEOUT;
   1802
   1803	md_bitmap_free(bitmap);
   1804}
   1805
   1806/*
   1807 * initialize the bitmap structure
   1808 * if this returns an error, bitmap_destroy must be called to do clean up
   1809 * once mddev->bitmap is set
   1810 */
   1811struct bitmap *md_bitmap_create(struct mddev *mddev, int slot)
   1812{
   1813	struct bitmap *bitmap;
   1814	sector_t blocks = mddev->resync_max_sectors;
   1815	struct file *file = mddev->bitmap_info.file;
   1816	int err;
   1817	struct kernfs_node *bm = NULL;
   1818
   1819	BUILD_BUG_ON(sizeof(bitmap_super_t) != 256);
   1820
   1821	BUG_ON(file && mddev->bitmap_info.offset);
   1822
   1823	if (test_bit(MD_HAS_JOURNAL, &mddev->flags)) {
   1824		pr_notice("md/raid:%s: array with journal cannot have bitmap\n",
   1825			  mdname(mddev));
   1826		return ERR_PTR(-EBUSY);
   1827	}
   1828
   1829	bitmap = kzalloc(sizeof(*bitmap), GFP_KERNEL);
   1830	if (!bitmap)
   1831		return ERR_PTR(-ENOMEM);
   1832
   1833	spin_lock_init(&bitmap->counts.lock);
   1834	atomic_set(&bitmap->pending_writes, 0);
   1835	init_waitqueue_head(&bitmap->write_wait);
   1836	init_waitqueue_head(&bitmap->overflow_wait);
   1837	init_waitqueue_head(&bitmap->behind_wait);
   1838
   1839	bitmap->mddev = mddev;
   1840	bitmap->cluster_slot = slot;
   1841
   1842	if (mddev->kobj.sd)
   1843		bm = sysfs_get_dirent(mddev->kobj.sd, "bitmap");
   1844	if (bm) {
   1845		bitmap->sysfs_can_clear = sysfs_get_dirent(bm, "can_clear");
   1846		sysfs_put(bm);
   1847	} else
   1848		bitmap->sysfs_can_clear = NULL;
   1849
   1850	bitmap->storage.file = file;
   1851	if (file) {
   1852		get_file(file);
   1853		/* As future accesses to this file will use bmap,
   1854		 * and bypass the page cache, we must sync the file
   1855		 * first.
   1856		 */
   1857		vfs_fsync(file, 1);
   1858	}
   1859	/* read superblock from bitmap file (this sets mddev->bitmap_info.chunksize) */
   1860	if (!mddev->bitmap_info.external) {
   1861		/*
   1862		 * If 'MD_ARRAY_FIRST_USE' is set, then device-mapper is
   1863		 * instructing us to create a new on-disk bitmap instance.
   1864		 */
   1865		if (test_and_clear_bit(MD_ARRAY_FIRST_USE, &mddev->flags))
   1866			err = md_bitmap_new_disk_sb(bitmap);
   1867		else
   1868			err = md_bitmap_read_sb(bitmap);
   1869	} else {
   1870		err = 0;
   1871		if (mddev->bitmap_info.chunksize == 0 ||
   1872		    mddev->bitmap_info.daemon_sleep == 0)
   1873			/* chunksize and time_base need to be
   1874			 * set first. */
   1875			err = -EINVAL;
   1876	}
   1877	if (err)
   1878		goto error;
   1879
   1880	bitmap->daemon_lastrun = jiffies;
   1881	err = md_bitmap_resize(bitmap, blocks, mddev->bitmap_info.chunksize, 1);
   1882	if (err)
   1883		goto error;
   1884
   1885	pr_debug("created bitmap (%lu pages) for device %s\n",
   1886		 bitmap->counts.pages, bmname(bitmap));
   1887
   1888	err = test_bit(BITMAP_WRITE_ERROR, &bitmap->flags) ? -EIO : 0;
   1889	if (err)
   1890		goto error;
   1891
   1892	return bitmap;
   1893 error:
   1894	md_bitmap_free(bitmap);
   1895	return ERR_PTR(err);
   1896}
   1897
   1898int md_bitmap_load(struct mddev *mddev)
   1899{
   1900	int err = 0;
   1901	sector_t start = 0;
   1902	sector_t sector = 0;
   1903	struct bitmap *bitmap = mddev->bitmap;
   1904	struct md_rdev *rdev;
   1905
   1906	if (!bitmap)
   1907		goto out;
   1908
   1909	rdev_for_each(rdev, mddev)
   1910		mddev_create_serial_pool(mddev, rdev, true);
   1911
   1912	if (mddev_is_clustered(mddev))
   1913		md_cluster_ops->load_bitmaps(mddev, mddev->bitmap_info.nodes);
   1914
   1915	/* Clear out old bitmap info first:  Either there is none, or we
   1916	 * are resuming after someone else has possibly changed things,
   1917	 * so we should forget old cached info.
   1918	 * All chunks should be clean, but some might need_sync.
   1919	 */
   1920	while (sector < mddev->resync_max_sectors) {
   1921		sector_t blocks;
   1922		md_bitmap_start_sync(bitmap, sector, &blocks, 0);
   1923		sector += blocks;
   1924	}
   1925	md_bitmap_close_sync(bitmap);
   1926
   1927	if (mddev->degraded == 0
   1928	    || bitmap->events_cleared == mddev->events)
   1929		/* no need to keep dirty bits to optimise a
   1930		 * re-add of a missing device */
   1931		start = mddev->recovery_cp;
   1932
   1933	mutex_lock(&mddev->bitmap_info.mutex);
   1934	err = md_bitmap_init_from_disk(bitmap, start);
   1935	mutex_unlock(&mddev->bitmap_info.mutex);
   1936
   1937	if (err)
   1938		goto out;
   1939	clear_bit(BITMAP_STALE, &bitmap->flags);
   1940
   1941	/* Kick recovery in case any bits were set */
   1942	set_bit(MD_RECOVERY_NEEDED, &bitmap->mddev->recovery);
   1943
   1944	mddev->thread->timeout = mddev->bitmap_info.daemon_sleep;
   1945	md_wakeup_thread(mddev->thread);
   1946
   1947	md_bitmap_update_sb(bitmap);
   1948
   1949	if (test_bit(BITMAP_WRITE_ERROR, &bitmap->flags))
   1950		err = -EIO;
   1951out:
   1952	return err;
   1953}
   1954EXPORT_SYMBOL_GPL(md_bitmap_load);
   1955
   1956/* caller need to free returned bitmap with md_bitmap_free() */
   1957struct bitmap *get_bitmap_from_slot(struct mddev *mddev, int slot)
   1958{
   1959	int rv = 0;
   1960	struct bitmap *bitmap;
   1961
   1962	bitmap = md_bitmap_create(mddev, slot);
   1963	if (IS_ERR(bitmap)) {
   1964		rv = PTR_ERR(bitmap);
   1965		return ERR_PTR(rv);
   1966	}
   1967
   1968	rv = md_bitmap_init_from_disk(bitmap, 0);
   1969	if (rv) {
   1970		md_bitmap_free(bitmap);
   1971		return ERR_PTR(rv);
   1972	}
   1973
   1974	return bitmap;
   1975}
   1976EXPORT_SYMBOL(get_bitmap_from_slot);
   1977
   1978/* Loads the bitmap associated with slot and copies the resync information
   1979 * to our bitmap
   1980 */
   1981int md_bitmap_copy_from_slot(struct mddev *mddev, int slot,
   1982		sector_t *low, sector_t *high, bool clear_bits)
   1983{
   1984	int rv = 0, i, j;
   1985	sector_t block, lo = 0, hi = 0;
   1986	struct bitmap_counts *counts;
   1987	struct bitmap *bitmap;
   1988
   1989	bitmap = get_bitmap_from_slot(mddev, slot);
   1990	if (IS_ERR(bitmap)) {
   1991		pr_err("%s can't get bitmap from slot %d\n", __func__, slot);
   1992		return -1;
   1993	}
   1994
   1995	counts = &bitmap->counts;
   1996	for (j = 0; j < counts->chunks; j++) {
   1997		block = (sector_t)j << counts->chunkshift;
   1998		if (md_bitmap_file_test_bit(bitmap, block)) {
   1999			if (!lo)
   2000				lo = block;
   2001			hi = block;
   2002			md_bitmap_file_clear_bit(bitmap, block);
   2003			md_bitmap_set_memory_bits(mddev->bitmap, block, 1);
   2004			md_bitmap_file_set_bit(mddev->bitmap, block);
   2005		}
   2006	}
   2007
   2008	if (clear_bits) {
   2009		md_bitmap_update_sb(bitmap);
   2010		/* BITMAP_PAGE_PENDING is set, but bitmap_unplug needs
   2011		 * BITMAP_PAGE_DIRTY or _NEEDWRITE to write ... */
   2012		for (i = 0; i < bitmap->storage.file_pages; i++)
   2013			if (test_page_attr(bitmap, i, BITMAP_PAGE_PENDING))
   2014				set_page_attr(bitmap, i, BITMAP_PAGE_NEEDWRITE);
   2015		md_bitmap_unplug(bitmap);
   2016	}
   2017	md_bitmap_unplug(mddev->bitmap);
   2018	*low = lo;
   2019	*high = hi;
   2020	md_bitmap_free(bitmap);
   2021
   2022	return rv;
   2023}
   2024EXPORT_SYMBOL_GPL(md_bitmap_copy_from_slot);
   2025
   2026
   2027void md_bitmap_status(struct seq_file *seq, struct bitmap *bitmap)
   2028{
   2029	unsigned long chunk_kb;
   2030	struct bitmap_counts *counts;
   2031
   2032	if (!bitmap)
   2033		return;
   2034
   2035	counts = &bitmap->counts;
   2036
   2037	chunk_kb = bitmap->mddev->bitmap_info.chunksize >> 10;
   2038	seq_printf(seq, "bitmap: %lu/%lu pages [%luKB], "
   2039		   "%lu%s chunk",
   2040		   counts->pages - counts->missing_pages,
   2041		   counts->pages,
   2042		   (counts->pages - counts->missing_pages)
   2043		   << (PAGE_SHIFT - 10),
   2044		   chunk_kb ? chunk_kb : bitmap->mddev->bitmap_info.chunksize,
   2045		   chunk_kb ? "KB" : "B");
   2046	if (bitmap->storage.file) {
   2047		seq_printf(seq, ", file: ");
   2048		seq_file_path(seq, bitmap->storage.file, " \t\n");
   2049	}
   2050
   2051	seq_printf(seq, "\n");
   2052}
   2053
   2054int md_bitmap_resize(struct bitmap *bitmap, sector_t blocks,
   2055		  int chunksize, int init)
   2056{
   2057	/* If chunk_size is 0, choose an appropriate chunk size.
   2058	 * Then possibly allocate new storage space.
   2059	 * Then quiesce, copy bits, replace bitmap, and re-start
   2060	 *
   2061	 * This function is called both to set up the initial bitmap
   2062	 * and to resize the bitmap while the array is active.
   2063	 * If this happens as a result of the array being resized,
   2064	 * chunksize will be zero, and we need to choose a suitable
   2065	 * chunksize, otherwise we use what we are given.
   2066	 */
   2067	struct bitmap_storage store;
   2068	struct bitmap_counts old_counts;
   2069	unsigned long chunks;
   2070	sector_t block;
   2071	sector_t old_blocks, new_blocks;
   2072	int chunkshift;
   2073	int ret = 0;
   2074	long pages;
   2075	struct bitmap_page *new_bp;
   2076
   2077	if (bitmap->storage.file && !init) {
   2078		pr_info("md: cannot resize file-based bitmap\n");
   2079		return -EINVAL;
   2080	}
   2081
   2082	if (chunksize == 0) {
   2083		/* If there is enough space, leave the chunk size unchanged,
   2084		 * else increase by factor of two until there is enough space.
   2085		 */
   2086		long bytes;
   2087		long space = bitmap->mddev->bitmap_info.space;
   2088
   2089		if (space == 0) {
   2090			/* We don't know how much space there is, so limit
   2091			 * to current size - in sectors.
   2092			 */
   2093			bytes = DIV_ROUND_UP(bitmap->counts.chunks, 8);
   2094			if (!bitmap->mddev->bitmap_info.external)
   2095				bytes += sizeof(bitmap_super_t);
   2096			space = DIV_ROUND_UP(bytes, 512);
   2097			bitmap->mddev->bitmap_info.space = space;
   2098		}
   2099		chunkshift = bitmap->counts.chunkshift;
   2100		chunkshift--;
   2101		do {
   2102			/* 'chunkshift' is shift from block size to chunk size */
   2103			chunkshift++;
   2104			chunks = DIV_ROUND_UP_SECTOR_T(blocks, 1 << chunkshift);
   2105			bytes = DIV_ROUND_UP(chunks, 8);
   2106			if (!bitmap->mddev->bitmap_info.external)
   2107				bytes += sizeof(bitmap_super_t);
   2108		} while (bytes > (space << 9));
   2109	} else
   2110		chunkshift = ffz(~chunksize) - BITMAP_BLOCK_SHIFT;
   2111
   2112	chunks = DIV_ROUND_UP_SECTOR_T(blocks, 1 << chunkshift);
   2113	memset(&store, 0, sizeof(store));
   2114	if (bitmap->mddev->bitmap_info.offset || bitmap->mddev->bitmap_info.file)
   2115		ret = md_bitmap_storage_alloc(&store, chunks,
   2116					      !bitmap->mddev->bitmap_info.external,
   2117					      mddev_is_clustered(bitmap->mddev)
   2118					      ? bitmap->cluster_slot : 0);
   2119	if (ret) {
   2120		md_bitmap_file_unmap(&store);
   2121		goto err;
   2122	}
   2123
   2124	pages = DIV_ROUND_UP(chunks, PAGE_COUNTER_RATIO);
   2125
   2126	new_bp = kcalloc(pages, sizeof(*new_bp), GFP_KERNEL);
   2127	ret = -ENOMEM;
   2128	if (!new_bp) {
   2129		md_bitmap_file_unmap(&store);
   2130		goto err;
   2131	}
   2132
   2133	if (!init)
   2134		bitmap->mddev->pers->quiesce(bitmap->mddev, 1);
   2135
   2136	store.file = bitmap->storage.file;
   2137	bitmap->storage.file = NULL;
   2138
   2139	if (store.sb_page && bitmap->storage.sb_page)
   2140		memcpy(page_address(store.sb_page),
   2141		       page_address(bitmap->storage.sb_page),
   2142		       sizeof(bitmap_super_t));
   2143	spin_lock_irq(&bitmap->counts.lock);
   2144	md_bitmap_file_unmap(&bitmap->storage);
   2145	bitmap->storage = store;
   2146
   2147	old_counts = bitmap->counts;
   2148	bitmap->counts.bp = new_bp;
   2149	bitmap->counts.pages = pages;
   2150	bitmap->counts.missing_pages = pages;
   2151	bitmap->counts.chunkshift = chunkshift;
   2152	bitmap->counts.chunks = chunks;
   2153	bitmap->mddev->bitmap_info.chunksize = 1 << (chunkshift +
   2154						     BITMAP_BLOCK_SHIFT);
   2155
   2156	blocks = min(old_counts.chunks << old_counts.chunkshift,
   2157		     chunks << chunkshift);
   2158
   2159	/* For cluster raid, need to pre-allocate bitmap */
   2160	if (mddev_is_clustered(bitmap->mddev)) {
   2161		unsigned long page;
   2162		for (page = 0; page < pages; page++) {
   2163			ret = md_bitmap_checkpage(&bitmap->counts, page, 1, 1);
   2164			if (ret) {
   2165				unsigned long k;
   2166
   2167				/* deallocate the page memory */
   2168				for (k = 0; k < page; k++) {
   2169					kfree(new_bp[k].map);
   2170				}
   2171				kfree(new_bp);
   2172
   2173				/* restore some fields from old_counts */
   2174				bitmap->counts.bp = old_counts.bp;
   2175				bitmap->counts.pages = old_counts.pages;
   2176				bitmap->counts.missing_pages = old_counts.pages;
   2177				bitmap->counts.chunkshift = old_counts.chunkshift;
   2178				bitmap->counts.chunks = old_counts.chunks;
   2179				bitmap->mddev->bitmap_info.chunksize = 1 << (old_counts.chunkshift +
   2180									     BITMAP_BLOCK_SHIFT);
   2181				blocks = old_counts.chunks << old_counts.chunkshift;
   2182				pr_warn("Could not pre-allocate in-memory bitmap for cluster raid\n");
   2183				break;
   2184			} else
   2185				bitmap->counts.bp[page].count += 1;
   2186		}
   2187	}
   2188
   2189	for (block = 0; block < blocks; ) {
   2190		bitmap_counter_t *bmc_old, *bmc_new;
   2191		int set;
   2192
   2193		bmc_old = md_bitmap_get_counter(&old_counts, block, &old_blocks, 0);
   2194		set = bmc_old && NEEDED(*bmc_old);
   2195
   2196		if (set) {
   2197			bmc_new = md_bitmap_get_counter(&bitmap->counts, block, &new_blocks, 1);
   2198			if (*bmc_new == 0) {
   2199				/* need to set on-disk bits too. */
   2200				sector_t end = block + new_blocks;
   2201				sector_t start = block >> chunkshift;
   2202				start <<= chunkshift;
   2203				while (start < end) {
   2204					md_bitmap_file_set_bit(bitmap, block);
   2205					start += 1 << chunkshift;
   2206				}
   2207				*bmc_new = 2;
   2208				md_bitmap_count_page(&bitmap->counts, block, 1);
   2209				md_bitmap_set_pending(&bitmap->counts, block);
   2210			}
   2211			*bmc_new |= NEEDED_MASK;
   2212			if (new_blocks < old_blocks)
   2213				old_blocks = new_blocks;
   2214		}
   2215		block += old_blocks;
   2216	}
   2217
   2218	if (bitmap->counts.bp != old_counts.bp) {
   2219		unsigned long k;
   2220		for (k = 0; k < old_counts.pages; k++)
   2221			if (!old_counts.bp[k].hijacked)
   2222				kfree(old_counts.bp[k].map);
   2223		kfree(old_counts.bp);
   2224	}
   2225
   2226	if (!init) {
   2227		int i;
   2228		while (block < (chunks << chunkshift)) {
   2229			bitmap_counter_t *bmc;
   2230			bmc = md_bitmap_get_counter(&bitmap->counts, block, &new_blocks, 1);
   2231			if (bmc) {
   2232				/* new space.  It needs to be resynced, so
   2233				 * we set NEEDED_MASK.
   2234				 */
   2235				if (*bmc == 0) {
   2236					*bmc = NEEDED_MASK | 2;
   2237					md_bitmap_count_page(&bitmap->counts, block, 1);
   2238					md_bitmap_set_pending(&bitmap->counts, block);
   2239				}
   2240			}
   2241			block += new_blocks;
   2242		}
   2243		for (i = 0; i < bitmap->storage.file_pages; i++)
   2244			set_page_attr(bitmap, i, BITMAP_PAGE_DIRTY);
   2245	}
   2246	spin_unlock_irq(&bitmap->counts.lock);
   2247
   2248	if (!init) {
   2249		md_bitmap_unplug(bitmap);
   2250		bitmap->mddev->pers->quiesce(bitmap->mddev, 0);
   2251	}
   2252	ret = 0;
   2253err:
   2254	return ret;
   2255}
   2256EXPORT_SYMBOL_GPL(md_bitmap_resize);
   2257
   2258static ssize_t
   2259location_show(struct mddev *mddev, char *page)
   2260{
   2261	ssize_t len;
   2262	if (mddev->bitmap_info.file)
   2263		len = sprintf(page, "file");
   2264	else if (mddev->bitmap_info.offset)
   2265		len = sprintf(page, "%+lld", (long long)mddev->bitmap_info.offset);
   2266	else
   2267		len = sprintf(page, "none");
   2268	len += sprintf(page+len, "\n");
   2269	return len;
   2270}
   2271
   2272static ssize_t
   2273location_store(struct mddev *mddev, const char *buf, size_t len)
   2274{
   2275	int rv;
   2276
   2277	rv = mddev_lock(mddev);
   2278	if (rv)
   2279		return rv;
   2280	if (mddev->pers) {
   2281		if (!mddev->pers->quiesce) {
   2282			rv = -EBUSY;
   2283			goto out;
   2284		}
   2285		if (mddev->recovery || mddev->sync_thread) {
   2286			rv = -EBUSY;
   2287			goto out;
   2288		}
   2289	}
   2290
   2291	if (mddev->bitmap || mddev->bitmap_info.file ||
   2292	    mddev->bitmap_info.offset) {
   2293		/* bitmap already configured.  Only option is to clear it */
   2294		if (strncmp(buf, "none", 4) != 0) {
   2295			rv = -EBUSY;
   2296			goto out;
   2297		}
   2298		if (mddev->pers) {
   2299			mddev_suspend(mddev);
   2300			md_bitmap_destroy(mddev);
   2301			mddev_resume(mddev);
   2302		}
   2303		mddev->bitmap_info.offset = 0;
   2304		if (mddev->bitmap_info.file) {
   2305			struct file *f = mddev->bitmap_info.file;
   2306			mddev->bitmap_info.file = NULL;
   2307			fput(f);
   2308		}
   2309	} else {
   2310		/* No bitmap, OK to set a location */
   2311		long long offset;
   2312		if (strncmp(buf, "none", 4) == 0)
   2313			/* nothing to be done */;
   2314		else if (strncmp(buf, "file:", 5) == 0) {
   2315			/* Not supported yet */
   2316			rv = -EINVAL;
   2317			goto out;
   2318		} else {
   2319			if (buf[0] == '+')
   2320				rv = kstrtoll(buf+1, 10, &offset);
   2321			else
   2322				rv = kstrtoll(buf, 10, &offset);
   2323			if (rv)
   2324				goto out;
   2325			if (offset == 0) {
   2326				rv = -EINVAL;
   2327				goto out;
   2328			}
   2329			if (mddev->bitmap_info.external == 0 &&
   2330			    mddev->major_version == 0 &&
   2331			    offset != mddev->bitmap_info.default_offset) {
   2332				rv = -EINVAL;
   2333				goto out;
   2334			}
   2335			mddev->bitmap_info.offset = offset;
   2336			if (mddev->pers) {
   2337				struct bitmap *bitmap;
   2338				bitmap = md_bitmap_create(mddev, -1);
   2339				mddev_suspend(mddev);
   2340				if (IS_ERR(bitmap))
   2341					rv = PTR_ERR(bitmap);
   2342				else {
   2343					mddev->bitmap = bitmap;
   2344					rv = md_bitmap_load(mddev);
   2345					if (rv)
   2346						mddev->bitmap_info.offset = 0;
   2347				}
   2348				if (rv) {
   2349					md_bitmap_destroy(mddev);
   2350					mddev_resume(mddev);
   2351					goto out;
   2352				}
   2353				mddev_resume(mddev);
   2354			}
   2355		}
   2356	}
   2357	if (!mddev->external) {
   2358		/* Ensure new bitmap info is stored in
   2359		 * metadata promptly.
   2360		 */
   2361		set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
   2362		md_wakeup_thread(mddev->thread);
   2363	}
   2364	rv = 0;
   2365out:
   2366	mddev_unlock(mddev);
   2367	if (rv)
   2368		return rv;
   2369	return len;
   2370}
   2371
   2372static struct md_sysfs_entry bitmap_location =
   2373__ATTR(location, S_IRUGO|S_IWUSR, location_show, location_store);
   2374
   2375/* 'bitmap/space' is the space available at 'location' for the
   2376 * bitmap.  This allows the kernel to know when it is safe to
   2377 * resize the bitmap to match a resized array.
   2378 */
   2379static ssize_t
   2380space_show(struct mddev *mddev, char *page)
   2381{
   2382	return sprintf(page, "%lu\n", mddev->bitmap_info.space);
   2383}
   2384
   2385static ssize_t
   2386space_store(struct mddev *mddev, const char *buf, size_t len)
   2387{
   2388	unsigned long sectors;
   2389	int rv;
   2390
   2391	rv = kstrtoul(buf, 10, &sectors);
   2392	if (rv)
   2393		return rv;
   2394
   2395	if (sectors == 0)
   2396		return -EINVAL;
   2397
   2398	if (mddev->bitmap &&
   2399	    sectors < (mddev->bitmap->storage.bytes + 511) >> 9)
   2400		return -EFBIG; /* Bitmap is too big for this small space */
   2401
   2402	/* could make sure it isn't too big, but that isn't really
   2403	 * needed - user-space should be careful.
   2404	 */
   2405	mddev->bitmap_info.space = sectors;
   2406	return len;
   2407}
   2408
   2409static struct md_sysfs_entry bitmap_space =
   2410__ATTR(space, S_IRUGO|S_IWUSR, space_show, space_store);
   2411
   2412static ssize_t
   2413timeout_show(struct mddev *mddev, char *page)
   2414{
   2415	ssize_t len;
   2416	unsigned long secs = mddev->bitmap_info.daemon_sleep / HZ;
   2417	unsigned long jifs = mddev->bitmap_info.daemon_sleep % HZ;
   2418
   2419	len = sprintf(page, "%lu", secs);
   2420	if (jifs)
   2421		len += sprintf(page+len, ".%03u", jiffies_to_msecs(jifs));
   2422	len += sprintf(page+len, "\n");
   2423	return len;
   2424}
   2425
   2426static ssize_t
   2427timeout_store(struct mddev *mddev, const char *buf, size_t len)
   2428{
   2429	/* timeout can be set at any time */
   2430	unsigned long timeout;
   2431	int rv = strict_strtoul_scaled(buf, &timeout, 4);
   2432	if (rv)
   2433		return rv;
   2434
   2435	/* just to make sure we don't overflow... */
   2436	if (timeout >= LONG_MAX / HZ)
   2437		return -EINVAL;
   2438
   2439	timeout = timeout * HZ / 10000;
   2440
   2441	if (timeout >= MAX_SCHEDULE_TIMEOUT)
   2442		timeout = MAX_SCHEDULE_TIMEOUT-1;
   2443	if (timeout < 1)
   2444		timeout = 1;
   2445	mddev->bitmap_info.daemon_sleep = timeout;
   2446	if (mddev->thread) {
   2447		/* if thread->timeout is MAX_SCHEDULE_TIMEOUT, then
   2448		 * the bitmap is all clean and we don't need to
   2449		 * adjust the timeout right now
   2450		 */
   2451		if (mddev->thread->timeout < MAX_SCHEDULE_TIMEOUT) {
   2452			mddev->thread->timeout = timeout;
   2453			md_wakeup_thread(mddev->thread);
   2454		}
   2455	}
   2456	return len;
   2457}
   2458
   2459static struct md_sysfs_entry bitmap_timeout =
   2460__ATTR(time_base, S_IRUGO|S_IWUSR, timeout_show, timeout_store);
   2461
   2462static ssize_t
   2463backlog_show(struct mddev *mddev, char *page)
   2464{
   2465	return sprintf(page, "%lu\n", mddev->bitmap_info.max_write_behind);
   2466}
   2467
   2468static ssize_t
   2469backlog_store(struct mddev *mddev, const char *buf, size_t len)
   2470{
   2471	unsigned long backlog;
   2472	unsigned long old_mwb = mddev->bitmap_info.max_write_behind;
   2473	struct md_rdev *rdev;
   2474	bool has_write_mostly = false;
   2475	int rv = kstrtoul(buf, 10, &backlog);
   2476	if (rv)
   2477		return rv;
   2478	if (backlog > COUNTER_MAX)
   2479		return -EINVAL;
   2480
   2481	/*
   2482	 * Without write mostly device, it doesn't make sense to set
   2483	 * backlog for max_write_behind.
   2484	 */
   2485	rdev_for_each(rdev, mddev) {
   2486		if (test_bit(WriteMostly, &rdev->flags)) {
   2487			has_write_mostly = true;
   2488			break;
   2489		}
   2490	}
   2491	if (!has_write_mostly) {
   2492		pr_warn_ratelimited("%s: can't set backlog, no write mostly device available\n",
   2493				    mdname(mddev));
   2494		return -EINVAL;
   2495	}
   2496
   2497	mddev->bitmap_info.max_write_behind = backlog;
   2498	if (!backlog && mddev->serial_info_pool) {
   2499		/* serial_info_pool is not needed if backlog is zero */
   2500		if (!mddev->serialize_policy)
   2501			mddev_destroy_serial_pool(mddev, NULL, false);
   2502	} else if (backlog && !mddev->serial_info_pool) {
   2503		/* serial_info_pool is needed since backlog is not zero */
   2504		struct md_rdev *rdev;
   2505
   2506		rdev_for_each(rdev, mddev)
   2507			mddev_create_serial_pool(mddev, rdev, false);
   2508	}
   2509	if (old_mwb != backlog)
   2510		md_bitmap_update_sb(mddev->bitmap);
   2511	return len;
   2512}
   2513
   2514static struct md_sysfs_entry bitmap_backlog =
   2515__ATTR(backlog, S_IRUGO|S_IWUSR, backlog_show, backlog_store);
   2516
   2517static ssize_t
   2518chunksize_show(struct mddev *mddev, char *page)
   2519{
   2520	return sprintf(page, "%lu\n", mddev->bitmap_info.chunksize);
   2521}
   2522
   2523static ssize_t
   2524chunksize_store(struct mddev *mddev, const char *buf, size_t len)
   2525{
   2526	/* Can only be changed when no bitmap is active */
   2527	int rv;
   2528	unsigned long csize;
   2529	if (mddev->bitmap)
   2530		return -EBUSY;
   2531	rv = kstrtoul(buf, 10, &csize);
   2532	if (rv)
   2533		return rv;
   2534	if (csize < 512 ||
   2535	    !is_power_of_2(csize))
   2536		return -EINVAL;
   2537	mddev->bitmap_info.chunksize = csize;
   2538	return len;
   2539}
   2540
   2541static struct md_sysfs_entry bitmap_chunksize =
   2542__ATTR(chunksize, S_IRUGO|S_IWUSR, chunksize_show, chunksize_store);
   2543
   2544static ssize_t metadata_show(struct mddev *mddev, char *page)
   2545{
   2546	if (mddev_is_clustered(mddev))
   2547		return sprintf(page, "clustered\n");
   2548	return sprintf(page, "%s\n", (mddev->bitmap_info.external
   2549				      ? "external" : "internal"));
   2550}
   2551
   2552static ssize_t metadata_store(struct mddev *mddev, const char *buf, size_t len)
   2553{
   2554	if (mddev->bitmap ||
   2555	    mddev->bitmap_info.file ||
   2556	    mddev->bitmap_info.offset)
   2557		return -EBUSY;
   2558	if (strncmp(buf, "external", 8) == 0)
   2559		mddev->bitmap_info.external = 1;
   2560	else if ((strncmp(buf, "internal", 8) == 0) ||
   2561			(strncmp(buf, "clustered", 9) == 0))
   2562		mddev->bitmap_info.external = 0;
   2563	else
   2564		return -EINVAL;
   2565	return len;
   2566}
   2567
   2568static struct md_sysfs_entry bitmap_metadata =
   2569__ATTR(metadata, S_IRUGO|S_IWUSR, metadata_show, metadata_store);
   2570
   2571static ssize_t can_clear_show(struct mddev *mddev, char *page)
   2572{
   2573	int len;
   2574	spin_lock(&mddev->lock);
   2575	if (mddev->bitmap)
   2576		len = sprintf(page, "%s\n", (mddev->bitmap->need_sync ?
   2577					     "false" : "true"));
   2578	else
   2579		len = sprintf(page, "\n");
   2580	spin_unlock(&mddev->lock);
   2581	return len;
   2582}
   2583
   2584static ssize_t can_clear_store(struct mddev *mddev, const char *buf, size_t len)
   2585{
   2586	if (mddev->bitmap == NULL)
   2587		return -ENOENT;
   2588	if (strncmp(buf, "false", 5) == 0)
   2589		mddev->bitmap->need_sync = 1;
   2590	else if (strncmp(buf, "true", 4) == 0) {
   2591		if (mddev->degraded)
   2592			return -EBUSY;
   2593		mddev->bitmap->need_sync = 0;
   2594	} else
   2595		return -EINVAL;
   2596	return len;
   2597}
   2598
   2599static struct md_sysfs_entry bitmap_can_clear =
   2600__ATTR(can_clear, S_IRUGO|S_IWUSR, can_clear_show, can_clear_store);
   2601
   2602static ssize_t
   2603behind_writes_used_show(struct mddev *mddev, char *page)
   2604{
   2605	ssize_t ret;
   2606	spin_lock(&mddev->lock);
   2607	if (mddev->bitmap == NULL)
   2608		ret = sprintf(page, "0\n");
   2609	else
   2610		ret = sprintf(page, "%lu\n",
   2611			      mddev->bitmap->behind_writes_used);
   2612	spin_unlock(&mddev->lock);
   2613	return ret;
   2614}
   2615
   2616static ssize_t
   2617behind_writes_used_reset(struct mddev *mddev, const char *buf, size_t len)
   2618{
   2619	if (mddev->bitmap)
   2620		mddev->bitmap->behind_writes_used = 0;
   2621	return len;
   2622}
   2623
   2624static struct md_sysfs_entry max_backlog_used =
   2625__ATTR(max_backlog_used, S_IRUGO | S_IWUSR,
   2626       behind_writes_used_show, behind_writes_used_reset);
   2627
   2628static struct attribute *md_bitmap_attrs[] = {
   2629	&bitmap_location.attr,
   2630	&bitmap_space.attr,
   2631	&bitmap_timeout.attr,
   2632	&bitmap_backlog.attr,
   2633	&bitmap_chunksize.attr,
   2634	&bitmap_metadata.attr,
   2635	&bitmap_can_clear.attr,
   2636	&max_backlog_used.attr,
   2637	NULL
   2638};
   2639const struct attribute_group md_bitmap_group = {
   2640	.name = "bitmap",
   2641	.attrs = md_bitmap_attrs,
   2642};