cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

dm-snap-persistent.c (23121B)


      1/*
      2 * Copyright (C) 2001-2002 Sistina Software (UK) Limited.
      3 * Copyright (C) 2006-2008 Red Hat GmbH
      4 *
      5 * This file is released under the GPL.
      6 */
      7
      8#include "dm-exception-store.h"
      9
     10#include <linux/ctype.h>
     11#include <linux/mm.h>
     12#include <linux/pagemap.h>
     13#include <linux/vmalloc.h>
     14#include <linux/export.h>
     15#include <linux/slab.h>
     16#include <linux/dm-io.h>
     17#include <linux/dm-bufio.h>
     18
     19#define DM_MSG_PREFIX "persistent snapshot"
     20#define DM_CHUNK_SIZE_DEFAULT_SECTORS 32U	/* 16KB */
     21
     22#define DM_PREFETCH_CHUNKS		12
     23
     24/*-----------------------------------------------------------------
     25 * Persistent snapshots, by persistent we mean that the snapshot
     26 * will survive a reboot.
     27 *---------------------------------------------------------------*/
     28
     29/*
     30 * We need to store a record of which parts of the origin have
     31 * been copied to the snapshot device.  The snapshot code
     32 * requires that we copy exception chunks to chunk aligned areas
     33 * of the COW store.  It makes sense therefore, to store the
     34 * metadata in chunk size blocks.
     35 *
     36 * There is no backward or forward compatibility implemented,
     37 * snapshots with different disk versions than the kernel will
     38 * not be usable.  It is expected that "lvcreate" will blank out
     39 * the start of a fresh COW device before calling the snapshot
     40 * constructor.
     41 *
     42 * The first chunk of the COW device just contains the header.
     43 * After this there is a chunk filled with exception metadata,
     44 * followed by as many exception chunks as can fit in the
     45 * metadata areas.
     46 *
     47 * All on disk structures are in little-endian format.  The end
     48 * of the exceptions info is indicated by an exception with a
     49 * new_chunk of 0, which is invalid since it would point to the
     50 * header chunk.
     51 */
     52
     53/*
     54 * Magic for persistent snapshots: "SnAp" - Feeble isn't it.
     55 */
     56#define SNAP_MAGIC 0x70416e53
     57
     58/*
     59 * The on-disk version of the metadata.
     60 */
     61#define SNAPSHOT_DISK_VERSION 1
     62
     63#define NUM_SNAPSHOT_HDR_CHUNKS 1
     64
     65struct disk_header {
     66	__le32 magic;
     67
     68	/*
     69	 * Is this snapshot valid.  There is no way of recovering
     70	 * an invalid snapshot.
     71	 */
     72	__le32 valid;
     73
     74	/*
     75	 * Simple, incrementing version. no backward
     76	 * compatibility.
     77	 */
     78	__le32 version;
     79
     80	/* In sectors */
     81	__le32 chunk_size;
     82} __packed;
     83
     84struct disk_exception {
     85	__le64 old_chunk;
     86	__le64 new_chunk;
     87} __packed;
     88
     89struct core_exception {
     90	uint64_t old_chunk;
     91	uint64_t new_chunk;
     92};
     93
     94struct commit_callback {
     95	void (*callback)(void *, int success);
     96	void *context;
     97};
     98
     99/*
    100 * The top level structure for a persistent exception store.
    101 */
    102struct pstore {
    103	struct dm_exception_store *store;
    104	int version;
    105	int valid;
    106	uint32_t exceptions_per_area;
    107
    108	/*
    109	 * Now that we have an asynchronous kcopyd there is no
    110	 * need for large chunk sizes, so it wont hurt to have a
    111	 * whole chunks worth of metadata in memory at once.
    112	 */
    113	void *area;
    114
    115	/*
    116	 * An area of zeros used to clear the next area.
    117	 */
    118	void *zero_area;
    119
    120	/*
    121	 * An area used for header. The header can be written
    122	 * concurrently with metadata (when invalidating the snapshot),
    123	 * so it needs a separate buffer.
    124	 */
    125	void *header_area;
    126
    127	/*
    128	 * Used to keep track of which metadata area the data in
    129	 * 'chunk' refers to.
    130	 */
    131	chunk_t current_area;
    132
    133	/*
    134	 * The next free chunk for an exception.
    135	 *
    136	 * When creating exceptions, all the chunks here and above are
    137	 * free.  It holds the next chunk to be allocated.  On rare
    138	 * occasions (e.g. after a system crash) holes can be left in
    139	 * the exception store because chunks can be committed out of
    140	 * order.
    141	 *
    142	 * When merging exceptions, it does not necessarily mean all the
    143	 * chunks here and above are free.  It holds the value it would
    144	 * have held if all chunks had been committed in order of
    145	 * allocation.  Consequently the value may occasionally be
    146	 * slightly too low, but since it's only used for 'status' and
    147	 * it can never reach its minimum value too early this doesn't
    148	 * matter.
    149	 */
    150
    151	chunk_t next_free;
    152
    153	/*
    154	 * The index of next free exception in the current
    155	 * metadata area.
    156	 */
    157	uint32_t current_committed;
    158
    159	atomic_t pending_count;
    160	uint32_t callback_count;
    161	struct commit_callback *callbacks;
    162	struct dm_io_client *io_client;
    163
    164	struct workqueue_struct *metadata_wq;
    165};
    166
    167static int alloc_area(struct pstore *ps)
    168{
    169	int r = -ENOMEM;
    170	size_t len;
    171
    172	len = ps->store->chunk_size << SECTOR_SHIFT;
    173
    174	/*
    175	 * Allocate the chunk_size block of memory that will hold
    176	 * a single metadata area.
    177	 */
    178	ps->area = vmalloc(len);
    179	if (!ps->area)
    180		goto err_area;
    181
    182	ps->zero_area = vzalloc(len);
    183	if (!ps->zero_area)
    184		goto err_zero_area;
    185
    186	ps->header_area = vmalloc(len);
    187	if (!ps->header_area)
    188		goto err_header_area;
    189
    190	return 0;
    191
    192err_header_area:
    193	vfree(ps->zero_area);
    194
    195err_zero_area:
    196	vfree(ps->area);
    197
    198err_area:
    199	return r;
    200}
    201
    202static void free_area(struct pstore *ps)
    203{
    204	vfree(ps->area);
    205	ps->area = NULL;
    206	vfree(ps->zero_area);
    207	ps->zero_area = NULL;
    208	vfree(ps->header_area);
    209	ps->header_area = NULL;
    210}
    211
    212struct mdata_req {
    213	struct dm_io_region *where;
    214	struct dm_io_request *io_req;
    215	struct work_struct work;
    216	int result;
    217};
    218
    219static void do_metadata(struct work_struct *work)
    220{
    221	struct mdata_req *req = container_of(work, struct mdata_req, work);
    222
    223	req->result = dm_io(req->io_req, 1, req->where, NULL);
    224}
    225
    226/*
    227 * Read or write a chunk aligned and sized block of data from a device.
    228 */
    229static int chunk_io(struct pstore *ps, void *area, chunk_t chunk, int op,
    230		    int op_flags, int metadata)
    231{
    232	struct dm_io_region where = {
    233		.bdev = dm_snap_cow(ps->store->snap)->bdev,
    234		.sector = ps->store->chunk_size * chunk,
    235		.count = ps->store->chunk_size,
    236	};
    237	struct dm_io_request io_req = {
    238		.bi_op = op,
    239		.bi_op_flags = op_flags,
    240		.mem.type = DM_IO_VMA,
    241		.mem.ptr.vma = area,
    242		.client = ps->io_client,
    243		.notify.fn = NULL,
    244	};
    245	struct mdata_req req;
    246
    247	if (!metadata)
    248		return dm_io(&io_req, 1, &where, NULL);
    249
    250	req.where = &where;
    251	req.io_req = &io_req;
    252
    253	/*
    254	 * Issue the synchronous I/O from a different thread
    255	 * to avoid submit_bio_noacct recursion.
    256	 */
    257	INIT_WORK_ONSTACK(&req.work, do_metadata);
    258	queue_work(ps->metadata_wq, &req.work);
    259	flush_workqueue(ps->metadata_wq);
    260	destroy_work_on_stack(&req.work);
    261
    262	return req.result;
    263}
    264
    265/*
    266 * Convert a metadata area index to a chunk index.
    267 */
    268static chunk_t area_location(struct pstore *ps, chunk_t area)
    269{
    270	return NUM_SNAPSHOT_HDR_CHUNKS + ((ps->exceptions_per_area + 1) * area);
    271}
    272
    273static void skip_metadata(struct pstore *ps)
    274{
    275	uint32_t stride = ps->exceptions_per_area + 1;
    276	chunk_t next_free = ps->next_free;
    277	if (sector_div(next_free, stride) == NUM_SNAPSHOT_HDR_CHUNKS)
    278		ps->next_free++;
    279}
    280
    281/*
    282 * Read or write a metadata area.  Remembering to skip the first
    283 * chunk which holds the header.
    284 */
    285static int area_io(struct pstore *ps, int op, int op_flags)
    286{
    287	chunk_t chunk = area_location(ps, ps->current_area);
    288
    289	return chunk_io(ps, ps->area, chunk, op, op_flags, 0);
    290}
    291
    292static void zero_memory_area(struct pstore *ps)
    293{
    294	memset(ps->area, 0, ps->store->chunk_size << SECTOR_SHIFT);
    295}
    296
    297static int zero_disk_area(struct pstore *ps, chunk_t area)
    298{
    299	return chunk_io(ps, ps->zero_area, area_location(ps, area),
    300			REQ_OP_WRITE, 0, 0);
    301}
    302
    303static int read_header(struct pstore *ps, int *new_snapshot)
    304{
    305	int r;
    306	struct disk_header *dh;
    307	unsigned chunk_size;
    308	int chunk_size_supplied = 1;
    309	char *chunk_err;
    310
    311	/*
    312	 * Use default chunk size (or logical_block_size, if larger)
    313	 * if none supplied
    314	 */
    315	if (!ps->store->chunk_size) {
    316		ps->store->chunk_size = max(DM_CHUNK_SIZE_DEFAULT_SECTORS,
    317		    bdev_logical_block_size(dm_snap_cow(ps->store->snap)->
    318					    bdev) >> 9);
    319		ps->store->chunk_mask = ps->store->chunk_size - 1;
    320		ps->store->chunk_shift = __ffs(ps->store->chunk_size);
    321		chunk_size_supplied = 0;
    322	}
    323
    324	ps->io_client = dm_io_client_create();
    325	if (IS_ERR(ps->io_client))
    326		return PTR_ERR(ps->io_client);
    327
    328	r = alloc_area(ps);
    329	if (r)
    330		return r;
    331
    332	r = chunk_io(ps, ps->header_area, 0, REQ_OP_READ, 0, 1);
    333	if (r)
    334		goto bad;
    335
    336	dh = ps->header_area;
    337
    338	if (le32_to_cpu(dh->magic) == 0) {
    339		*new_snapshot = 1;
    340		return 0;
    341	}
    342
    343	if (le32_to_cpu(dh->magic) != SNAP_MAGIC) {
    344		DMWARN("Invalid or corrupt snapshot");
    345		r = -ENXIO;
    346		goto bad;
    347	}
    348
    349	*new_snapshot = 0;
    350	ps->valid = le32_to_cpu(dh->valid);
    351	ps->version = le32_to_cpu(dh->version);
    352	chunk_size = le32_to_cpu(dh->chunk_size);
    353
    354	if (ps->store->chunk_size == chunk_size)
    355		return 0;
    356
    357	if (chunk_size_supplied)
    358		DMWARN("chunk size %u in device metadata overrides "
    359		       "table chunk size of %u.",
    360		       chunk_size, ps->store->chunk_size);
    361
    362	/* We had a bogus chunk_size. Fix stuff up. */
    363	free_area(ps);
    364
    365	r = dm_exception_store_set_chunk_size(ps->store, chunk_size,
    366					      &chunk_err);
    367	if (r) {
    368		DMERR("invalid on-disk chunk size %u: %s.",
    369		      chunk_size, chunk_err);
    370		return r;
    371	}
    372
    373	r = alloc_area(ps);
    374	return r;
    375
    376bad:
    377	free_area(ps);
    378	return r;
    379}
    380
    381static int write_header(struct pstore *ps)
    382{
    383	struct disk_header *dh;
    384
    385	memset(ps->header_area, 0, ps->store->chunk_size << SECTOR_SHIFT);
    386
    387	dh = ps->header_area;
    388	dh->magic = cpu_to_le32(SNAP_MAGIC);
    389	dh->valid = cpu_to_le32(ps->valid);
    390	dh->version = cpu_to_le32(ps->version);
    391	dh->chunk_size = cpu_to_le32(ps->store->chunk_size);
    392
    393	return chunk_io(ps, ps->header_area, 0, REQ_OP_WRITE, 0, 1);
    394}
    395
    396/*
    397 * Access functions for the disk exceptions, these do the endian conversions.
    398 */
    399static struct disk_exception *get_exception(struct pstore *ps, void *ps_area,
    400					    uint32_t index)
    401{
    402	BUG_ON(index >= ps->exceptions_per_area);
    403
    404	return ((struct disk_exception *) ps_area) + index;
    405}
    406
    407static void read_exception(struct pstore *ps, void *ps_area,
    408			   uint32_t index, struct core_exception *result)
    409{
    410	struct disk_exception *de = get_exception(ps, ps_area, index);
    411
    412	/* copy it */
    413	result->old_chunk = le64_to_cpu(de->old_chunk);
    414	result->new_chunk = le64_to_cpu(de->new_chunk);
    415}
    416
    417static void write_exception(struct pstore *ps,
    418			    uint32_t index, struct core_exception *e)
    419{
    420	struct disk_exception *de = get_exception(ps, ps->area, index);
    421
    422	/* copy it */
    423	de->old_chunk = cpu_to_le64(e->old_chunk);
    424	de->new_chunk = cpu_to_le64(e->new_chunk);
    425}
    426
    427static void clear_exception(struct pstore *ps, uint32_t index)
    428{
    429	struct disk_exception *de = get_exception(ps, ps->area, index);
    430
    431	/* clear it */
    432	de->old_chunk = 0;
    433	de->new_chunk = 0;
    434}
    435
    436/*
    437 * Registers the exceptions that are present in the current area.
    438 * 'full' is filled in to indicate if the area has been
    439 * filled.
    440 */
    441static int insert_exceptions(struct pstore *ps, void *ps_area,
    442			     int (*callback)(void *callback_context,
    443					     chunk_t old, chunk_t new),
    444			     void *callback_context,
    445			     int *full)
    446{
    447	int r;
    448	unsigned int i;
    449	struct core_exception e;
    450
    451	/* presume the area is full */
    452	*full = 1;
    453
    454	for (i = 0; i < ps->exceptions_per_area; i++) {
    455		read_exception(ps, ps_area, i, &e);
    456
    457		/*
    458		 * If the new_chunk is pointing at the start of
    459		 * the COW device, where the first metadata area
    460		 * is we know that we've hit the end of the
    461		 * exceptions.  Therefore the area is not full.
    462		 */
    463		if (e.new_chunk == 0LL) {
    464			ps->current_committed = i;
    465			*full = 0;
    466			break;
    467		}
    468
    469		/*
    470		 * Keep track of the start of the free chunks.
    471		 */
    472		if (ps->next_free <= e.new_chunk)
    473			ps->next_free = e.new_chunk + 1;
    474
    475		/*
    476		 * Otherwise we add the exception to the snapshot.
    477		 */
    478		r = callback(callback_context, e.old_chunk, e.new_chunk);
    479		if (r)
    480			return r;
    481	}
    482
    483	return 0;
    484}
    485
    486static int read_exceptions(struct pstore *ps,
    487			   int (*callback)(void *callback_context, chunk_t old,
    488					   chunk_t new),
    489			   void *callback_context)
    490{
    491	int r, full = 1;
    492	struct dm_bufio_client *client;
    493	chunk_t prefetch_area = 0;
    494
    495	client = dm_bufio_client_create(dm_snap_cow(ps->store->snap)->bdev,
    496					ps->store->chunk_size << SECTOR_SHIFT,
    497					1, 0, NULL, NULL);
    498
    499	if (IS_ERR(client))
    500		return PTR_ERR(client);
    501
    502	/*
    503	 * Setup for one current buffer + desired readahead buffers.
    504	 */
    505	dm_bufio_set_minimum_buffers(client, 1 + DM_PREFETCH_CHUNKS);
    506
    507	/*
    508	 * Keeping reading chunks and inserting exceptions until
    509	 * we find a partially full area.
    510	 */
    511	for (ps->current_area = 0; full; ps->current_area++) {
    512		struct dm_buffer *bp;
    513		void *area;
    514		chunk_t chunk;
    515
    516		if (unlikely(prefetch_area < ps->current_area))
    517			prefetch_area = ps->current_area;
    518
    519		if (DM_PREFETCH_CHUNKS) do {
    520			chunk_t pf_chunk = area_location(ps, prefetch_area);
    521			if (unlikely(pf_chunk >= dm_bufio_get_device_size(client)))
    522				break;
    523			dm_bufio_prefetch(client, pf_chunk, 1);
    524			prefetch_area++;
    525			if (unlikely(!prefetch_area))
    526				break;
    527		} while (prefetch_area <= ps->current_area + DM_PREFETCH_CHUNKS);
    528
    529		chunk = area_location(ps, ps->current_area);
    530
    531		area = dm_bufio_read(client, chunk, &bp);
    532		if (IS_ERR(area)) {
    533			r = PTR_ERR(area);
    534			goto ret_destroy_bufio;
    535		}
    536
    537		r = insert_exceptions(ps, area, callback, callback_context,
    538				      &full);
    539
    540		if (!full)
    541			memcpy(ps->area, area, ps->store->chunk_size << SECTOR_SHIFT);
    542
    543		dm_bufio_release(bp);
    544
    545		dm_bufio_forget(client, chunk);
    546
    547		if (unlikely(r))
    548			goto ret_destroy_bufio;
    549	}
    550
    551	ps->current_area--;
    552
    553	skip_metadata(ps);
    554
    555	r = 0;
    556
    557ret_destroy_bufio:
    558	dm_bufio_client_destroy(client);
    559
    560	return r;
    561}
    562
    563static struct pstore *get_info(struct dm_exception_store *store)
    564{
    565	return (struct pstore *) store->context;
    566}
    567
    568static void persistent_usage(struct dm_exception_store *store,
    569			     sector_t *total_sectors,
    570			     sector_t *sectors_allocated,
    571			     sector_t *metadata_sectors)
    572{
    573	struct pstore *ps = get_info(store);
    574
    575	*sectors_allocated = ps->next_free * store->chunk_size;
    576	*total_sectors = get_dev_size(dm_snap_cow(store->snap)->bdev);
    577
    578	/*
    579	 * First chunk is the fixed header.
    580	 * Then there are (ps->current_area + 1) metadata chunks, each one
    581	 * separated from the next by ps->exceptions_per_area data chunks.
    582	 */
    583	*metadata_sectors = (ps->current_area + 1 + NUM_SNAPSHOT_HDR_CHUNKS) *
    584			    store->chunk_size;
    585}
    586
    587static void persistent_dtr(struct dm_exception_store *store)
    588{
    589	struct pstore *ps = get_info(store);
    590
    591	destroy_workqueue(ps->metadata_wq);
    592
    593	/* Created in read_header */
    594	if (ps->io_client)
    595		dm_io_client_destroy(ps->io_client);
    596	free_area(ps);
    597
    598	/* Allocated in persistent_read_metadata */
    599	kvfree(ps->callbacks);
    600
    601	kfree(ps);
    602}
    603
    604static int persistent_read_metadata(struct dm_exception_store *store,
    605				    int (*callback)(void *callback_context,
    606						    chunk_t old, chunk_t new),
    607				    void *callback_context)
    608{
    609	int r, new_snapshot;
    610	struct pstore *ps = get_info(store);
    611
    612	/*
    613	 * Read the snapshot header.
    614	 */
    615	r = read_header(ps, &new_snapshot);
    616	if (r)
    617		return r;
    618
    619	/*
    620	 * Now we know correct chunk_size, complete the initialisation.
    621	 */
    622	ps->exceptions_per_area = (ps->store->chunk_size << SECTOR_SHIFT) /
    623				  sizeof(struct disk_exception);
    624	ps->callbacks = kvcalloc(ps->exceptions_per_area,
    625				 sizeof(*ps->callbacks), GFP_KERNEL);
    626	if (!ps->callbacks)
    627		return -ENOMEM;
    628
    629	/*
    630	 * Do we need to setup a new snapshot ?
    631	 */
    632	if (new_snapshot) {
    633		r = write_header(ps);
    634		if (r) {
    635			DMWARN("write_header failed");
    636			return r;
    637		}
    638
    639		ps->current_area = 0;
    640		zero_memory_area(ps);
    641		r = zero_disk_area(ps, 0);
    642		if (r)
    643			DMWARN("zero_disk_area(0) failed");
    644		return r;
    645	}
    646	/*
    647	 * Sanity checks.
    648	 */
    649	if (ps->version != SNAPSHOT_DISK_VERSION) {
    650		DMWARN("unable to handle snapshot disk version %d",
    651		       ps->version);
    652		return -EINVAL;
    653	}
    654
    655	/*
    656	 * Metadata are valid, but snapshot is invalidated
    657	 */
    658	if (!ps->valid)
    659		return 1;
    660
    661	/*
    662	 * Read the metadata.
    663	 */
    664	r = read_exceptions(ps, callback, callback_context);
    665
    666	return r;
    667}
    668
    669static int persistent_prepare_exception(struct dm_exception_store *store,
    670					struct dm_exception *e)
    671{
    672	struct pstore *ps = get_info(store);
    673	sector_t size = get_dev_size(dm_snap_cow(store->snap)->bdev);
    674
    675	/* Is there enough room ? */
    676	if (size < ((ps->next_free + 1) * store->chunk_size))
    677		return -ENOSPC;
    678
    679	e->new_chunk = ps->next_free;
    680
    681	/*
    682	 * Move onto the next free pending, making sure to take
    683	 * into account the location of the metadata chunks.
    684	 */
    685	ps->next_free++;
    686	skip_metadata(ps);
    687
    688	atomic_inc(&ps->pending_count);
    689	return 0;
    690}
    691
    692static void persistent_commit_exception(struct dm_exception_store *store,
    693					struct dm_exception *e, int valid,
    694					void (*callback) (void *, int success),
    695					void *callback_context)
    696{
    697	unsigned int i;
    698	struct pstore *ps = get_info(store);
    699	struct core_exception ce;
    700	struct commit_callback *cb;
    701
    702	if (!valid)
    703		ps->valid = 0;
    704
    705	ce.old_chunk = e->old_chunk;
    706	ce.new_chunk = e->new_chunk;
    707	write_exception(ps, ps->current_committed++, &ce);
    708
    709	/*
    710	 * Add the callback to the back of the array.  This code
    711	 * is the only place where the callback array is
    712	 * manipulated, and we know that it will never be called
    713	 * multiple times concurrently.
    714	 */
    715	cb = ps->callbacks + ps->callback_count++;
    716	cb->callback = callback;
    717	cb->context = callback_context;
    718
    719	/*
    720	 * If there are exceptions in flight and we have not yet
    721	 * filled this metadata area there's nothing more to do.
    722	 */
    723	if (!atomic_dec_and_test(&ps->pending_count) &&
    724	    (ps->current_committed != ps->exceptions_per_area))
    725		return;
    726
    727	/*
    728	 * If we completely filled the current area, then wipe the next one.
    729	 */
    730	if ((ps->current_committed == ps->exceptions_per_area) &&
    731	    zero_disk_area(ps, ps->current_area + 1))
    732		ps->valid = 0;
    733
    734	/*
    735	 * Commit exceptions to disk.
    736	 */
    737	if (ps->valid && area_io(ps, REQ_OP_WRITE,
    738				 REQ_PREFLUSH | REQ_FUA | REQ_SYNC))
    739		ps->valid = 0;
    740
    741	/*
    742	 * Advance to the next area if this one is full.
    743	 */
    744	if (ps->current_committed == ps->exceptions_per_area) {
    745		ps->current_committed = 0;
    746		ps->current_area++;
    747		zero_memory_area(ps);
    748	}
    749
    750	for (i = 0; i < ps->callback_count; i++) {
    751		cb = ps->callbacks + i;
    752		cb->callback(cb->context, ps->valid);
    753	}
    754
    755	ps->callback_count = 0;
    756}
    757
    758static int persistent_prepare_merge(struct dm_exception_store *store,
    759				    chunk_t *last_old_chunk,
    760				    chunk_t *last_new_chunk)
    761{
    762	struct pstore *ps = get_info(store);
    763	struct core_exception ce;
    764	int nr_consecutive;
    765	int r;
    766
    767	/*
    768	 * When current area is empty, move back to preceding area.
    769	 */
    770	if (!ps->current_committed) {
    771		/*
    772		 * Have we finished?
    773		 */
    774		if (!ps->current_area)
    775			return 0;
    776
    777		ps->current_area--;
    778		r = area_io(ps, REQ_OP_READ, 0);
    779		if (r < 0)
    780			return r;
    781		ps->current_committed = ps->exceptions_per_area;
    782	}
    783
    784	read_exception(ps, ps->area, ps->current_committed - 1, &ce);
    785	*last_old_chunk = ce.old_chunk;
    786	*last_new_chunk = ce.new_chunk;
    787
    788	/*
    789	 * Find number of consecutive chunks within the current area,
    790	 * working backwards.
    791	 */
    792	for (nr_consecutive = 1; nr_consecutive < ps->current_committed;
    793	     nr_consecutive++) {
    794		read_exception(ps, ps->area,
    795			       ps->current_committed - 1 - nr_consecutive, &ce);
    796		if (ce.old_chunk != *last_old_chunk - nr_consecutive ||
    797		    ce.new_chunk != *last_new_chunk - nr_consecutive)
    798			break;
    799	}
    800
    801	return nr_consecutive;
    802}
    803
    804static int persistent_commit_merge(struct dm_exception_store *store,
    805				   int nr_merged)
    806{
    807	int r, i;
    808	struct pstore *ps = get_info(store);
    809
    810	BUG_ON(nr_merged > ps->current_committed);
    811
    812	for (i = 0; i < nr_merged; i++)
    813		clear_exception(ps, ps->current_committed - 1 - i);
    814
    815	r = area_io(ps, REQ_OP_WRITE, REQ_PREFLUSH | REQ_FUA);
    816	if (r < 0)
    817		return r;
    818
    819	ps->current_committed -= nr_merged;
    820
    821	/*
    822	 * At this stage, only persistent_usage() uses ps->next_free, so
    823	 * we make no attempt to keep ps->next_free strictly accurate
    824	 * as exceptions may have been committed out-of-order originally.
    825	 * Once a snapshot has become merging, we set it to the value it
    826	 * would have held had all the exceptions been committed in order.
    827	 *
    828	 * ps->current_area does not get reduced by prepare_merge() until
    829	 * after commit_merge() has removed the nr_merged previous exceptions.
    830	 */
    831	ps->next_free = area_location(ps, ps->current_area) +
    832			ps->current_committed + 1;
    833
    834	return 0;
    835}
    836
    837static void persistent_drop_snapshot(struct dm_exception_store *store)
    838{
    839	struct pstore *ps = get_info(store);
    840
    841	ps->valid = 0;
    842	if (write_header(ps))
    843		DMWARN("write header failed");
    844}
    845
    846static int persistent_ctr(struct dm_exception_store *store, char *options)
    847{
    848	struct pstore *ps;
    849	int r;
    850
    851	/* allocate the pstore */
    852	ps = kzalloc(sizeof(*ps), GFP_KERNEL);
    853	if (!ps)
    854		return -ENOMEM;
    855
    856	ps->store = store;
    857	ps->valid = 1;
    858	ps->version = SNAPSHOT_DISK_VERSION;
    859	ps->area = NULL;
    860	ps->zero_area = NULL;
    861	ps->header_area = NULL;
    862	ps->next_free = NUM_SNAPSHOT_HDR_CHUNKS + 1; /* header and 1st area */
    863	ps->current_committed = 0;
    864
    865	ps->callback_count = 0;
    866	atomic_set(&ps->pending_count, 0);
    867	ps->callbacks = NULL;
    868
    869	ps->metadata_wq = alloc_workqueue("ksnaphd", WQ_MEM_RECLAIM, 0);
    870	if (!ps->metadata_wq) {
    871		DMERR("couldn't start header metadata update thread");
    872		r = -ENOMEM;
    873		goto err_workqueue;
    874	}
    875
    876	if (options) {
    877		char overflow = toupper(options[0]);
    878		if (overflow == 'O')
    879			store->userspace_supports_overflow = true;
    880		else {
    881			DMERR("Unsupported persistent store option: %s", options);
    882			r = -EINVAL;
    883			goto err_options;
    884		}
    885	}
    886
    887	store->context = ps;
    888
    889	return 0;
    890
    891err_options:
    892	destroy_workqueue(ps->metadata_wq);
    893err_workqueue:
    894	kfree(ps);
    895
    896	return r;
    897}
    898
    899static unsigned persistent_status(struct dm_exception_store *store,
    900				  status_type_t status, char *result,
    901				  unsigned maxlen)
    902{
    903	unsigned sz = 0;
    904
    905	switch (status) {
    906	case STATUSTYPE_INFO:
    907		break;
    908	case STATUSTYPE_TABLE:
    909		DMEMIT(" %s %llu", store->userspace_supports_overflow ? "PO" : "P",
    910		       (unsigned long long)store->chunk_size);
    911		break;
    912	case STATUSTYPE_IMA:
    913		*result = '\0';
    914		break;
    915	}
    916
    917	return sz;
    918}
    919
    920static struct dm_exception_store_type _persistent_type = {
    921	.name = "persistent",
    922	.module = THIS_MODULE,
    923	.ctr = persistent_ctr,
    924	.dtr = persistent_dtr,
    925	.read_metadata = persistent_read_metadata,
    926	.prepare_exception = persistent_prepare_exception,
    927	.commit_exception = persistent_commit_exception,
    928	.prepare_merge = persistent_prepare_merge,
    929	.commit_merge = persistent_commit_merge,
    930	.drop_snapshot = persistent_drop_snapshot,
    931	.usage = persistent_usage,
    932	.status = persistent_status,
    933};
    934
    935static struct dm_exception_store_type _persistent_compat_type = {
    936	.name = "P",
    937	.module = THIS_MODULE,
    938	.ctr = persistent_ctr,
    939	.dtr = persistent_dtr,
    940	.read_metadata = persistent_read_metadata,
    941	.prepare_exception = persistent_prepare_exception,
    942	.commit_exception = persistent_commit_exception,
    943	.prepare_merge = persistent_prepare_merge,
    944	.commit_merge = persistent_commit_merge,
    945	.drop_snapshot = persistent_drop_snapshot,
    946	.usage = persistent_usage,
    947	.status = persistent_status,
    948};
    949
    950int dm_persistent_snapshot_init(void)
    951{
    952	int r;
    953
    954	r = dm_exception_store_type_register(&_persistent_type);
    955	if (r) {
    956		DMERR("Unable to register persistent exception store type");
    957		return r;
    958	}
    959
    960	r = dm_exception_store_type_register(&_persistent_compat_type);
    961	if (r) {
    962		DMERR("Unable to register old-style persistent exception "
    963		      "store type");
    964		dm_exception_store_type_unregister(&_persistent_type);
    965		return r;
    966	}
    967
    968	return r;
    969}
    970
    971void dm_persistent_snapshot_exit(void)
    972{
    973	dm_exception_store_type_unregister(&_persistent_type);
    974	dm_exception_store_type_unregister(&_persistent_compat_type);
    975}