cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

dm-ebs-target.c (12538B)


      1/*
      2 * Copyright (C) 2020 Red Hat GmbH
      3 *
      4 * This file is released under the GPL.
      5 *
      6 * Device-mapper target to emulate smaller logical block
      7 * size on backing devices exposing (natively) larger ones.
      8 *
      9 * E.g. 512 byte sector emulation on 4K native disks.
     10 */
     11
     12#include "dm.h"
     13#include <linux/module.h>
     14#include <linux/workqueue.h>
     15#include <linux/dm-bufio.h>
     16
     17#define DM_MSG_PREFIX "ebs"
     18
     19static void ebs_dtr(struct dm_target *ti);
     20
     21/* Emulated block size context. */
     22struct ebs_c {
     23	struct dm_dev *dev;		/* Underlying device to emulate block size on. */
     24	struct dm_bufio_client *bufio;	/* Use dm-bufio for read and read-modify-write processing. */
     25	struct workqueue_struct *wq;	/* Workqueue for ^ processing of bios. */
     26	struct work_struct ws;		/* Work item used for ^. */
     27	struct bio_list bios_in;	/* Worker bios input list. */
     28	spinlock_t lock;		/* Guard bios input list above. */
     29	sector_t start;			/* <start> table line argument, see ebs_ctr below. */
     30	unsigned int e_bs;		/* Emulated block size in sectors exposed to upper layer. */
     31	unsigned int u_bs;		/* Underlying block size in sectors retrieved from/set on lower layer device. */
     32	unsigned char block_shift;	/* bitshift sectors -> blocks used in dm-bufio API. */
     33	bool u_bs_set:1;		/* Flag to indicate underlying block size is set on table line. */
     34};
     35
     36static inline sector_t __sector_to_block(struct ebs_c *ec, sector_t sector)
     37{
     38	return sector >> ec->block_shift;
     39}
     40
     41static inline sector_t __block_mod(sector_t sector, unsigned int bs)
     42{
     43	return sector & (bs - 1);
     44}
     45
     46/* Return number of blocks for a bio, accounting for misalignment of start and end sectors. */
     47static inline unsigned int __nr_blocks(struct ebs_c *ec, struct bio *bio)
     48{
     49	sector_t end_sector = __block_mod(bio->bi_iter.bi_sector, ec->u_bs) + bio_sectors(bio);
     50
     51	return __sector_to_block(ec, end_sector) + (__block_mod(end_sector, ec->u_bs) ? 1 : 0);
     52}
     53
     54static inline bool __ebs_check_bs(unsigned int bs)
     55{
     56	return bs && is_power_of_2(bs);
     57}
     58
     59/*
     60 * READ/WRITE:
     61 *
     62 * copy blocks between bufio blocks and bio vector's (partial/overlapping) pages.
     63 */
     64static int __ebs_rw_bvec(struct ebs_c *ec, int rw, struct bio_vec *bv, struct bvec_iter *iter)
     65{
     66	int r = 0;
     67	unsigned char *ba, *pa;
     68	unsigned int cur_len;
     69	unsigned int bv_len = bv->bv_len;
     70	unsigned int buf_off = to_bytes(__block_mod(iter->bi_sector, ec->u_bs));
     71	sector_t block = __sector_to_block(ec, iter->bi_sector);
     72	struct dm_buffer *b;
     73
     74	if (unlikely(!bv->bv_page || !bv_len))
     75		return -EIO;
     76
     77	pa = bvec_virt(bv);
     78
     79	/* Handle overlapping page <-> blocks */
     80	while (bv_len) {
     81		cur_len = min(dm_bufio_get_block_size(ec->bufio) - buf_off, bv_len);
     82
     83		/* Avoid reading for writes in case bio vector's page overwrites block completely. */
     84		if (rw == READ || buf_off || bv_len < dm_bufio_get_block_size(ec->bufio))
     85			ba = dm_bufio_read(ec->bufio, block, &b);
     86		else
     87			ba = dm_bufio_new(ec->bufio, block, &b);
     88
     89		if (IS_ERR(ba)) {
     90			/*
     91			 * Carry on with next buffer, if any, to issue all possible
     92			 * data but return error.
     93			 */
     94			r = PTR_ERR(ba);
     95		} else {
     96			/* Copy data to/from bio to buffer if read/new was successful above. */
     97			ba += buf_off;
     98			if (rw == READ) {
     99				memcpy(pa, ba, cur_len);
    100				flush_dcache_page(bv->bv_page);
    101			} else {
    102				flush_dcache_page(bv->bv_page);
    103				memcpy(ba, pa, cur_len);
    104				dm_bufio_mark_partial_buffer_dirty(b, buf_off, buf_off + cur_len);
    105			}
    106
    107			dm_bufio_release(b);
    108		}
    109
    110		pa += cur_len;
    111		bv_len -= cur_len;
    112		buf_off = 0;
    113		block++;
    114	}
    115
    116	return r;
    117}
    118
    119/* READ/WRITE: iterate bio vector's copying between (partial) pages and bufio blocks. */
    120static int __ebs_rw_bio(struct ebs_c *ec, int rw, struct bio *bio)
    121{
    122	int r = 0, rr;
    123	struct bio_vec bv;
    124	struct bvec_iter iter;
    125
    126	bio_for_each_bvec(bv, bio, iter) {
    127		rr = __ebs_rw_bvec(ec, rw, &bv, &iter);
    128		if (rr)
    129			r = rr;
    130	}
    131
    132	return r;
    133}
    134
    135/*
    136 * Discard bio's blocks, i.e. pass discards down.
    137 *
    138 * Avoid discarding partial blocks at beginning and end;
    139 * return 0 in case no blocks can be discarded as a result.
    140 */
    141static int __ebs_discard_bio(struct ebs_c *ec, struct bio *bio)
    142{
    143	sector_t block, blocks, sector = bio->bi_iter.bi_sector;
    144
    145	block = __sector_to_block(ec, sector);
    146	blocks = __nr_blocks(ec, bio);
    147
    148	/*
    149	 * Partial first underlying block (__nr_blocks() may have
    150	 * resulted in one block).
    151	 */
    152	if (__block_mod(sector, ec->u_bs)) {
    153		block++;
    154		blocks--;
    155	}
    156
    157	/* Partial last underlying block if any. */
    158	if (blocks && __block_mod(bio_end_sector(bio), ec->u_bs))
    159		blocks--;
    160
    161	return blocks ? dm_bufio_issue_discard(ec->bufio, block, blocks) : 0;
    162}
    163
    164/* Release blocks them from the bufio cache. */
    165static void __ebs_forget_bio(struct ebs_c *ec, struct bio *bio)
    166{
    167	sector_t blocks, sector = bio->bi_iter.bi_sector;
    168
    169	blocks = __nr_blocks(ec, bio);
    170
    171	dm_bufio_forget_buffers(ec->bufio, __sector_to_block(ec, sector), blocks);
    172}
    173
    174/* Worker function to process incoming bios. */
    175static void __ebs_process_bios(struct work_struct *ws)
    176{
    177	int r;
    178	bool write = false;
    179	sector_t block1, block2;
    180	struct ebs_c *ec = container_of(ws, struct ebs_c, ws);
    181	struct bio *bio;
    182	struct bio_list bios;
    183
    184	bio_list_init(&bios);
    185
    186	spin_lock_irq(&ec->lock);
    187	bios = ec->bios_in;
    188	bio_list_init(&ec->bios_in);
    189	spin_unlock_irq(&ec->lock);
    190
    191	/* Prefetch all read and any mis-aligned write buffers */
    192	bio_list_for_each(bio, &bios) {
    193		block1 = __sector_to_block(ec, bio->bi_iter.bi_sector);
    194		if (bio_op(bio) == REQ_OP_READ)
    195			dm_bufio_prefetch(ec->bufio, block1, __nr_blocks(ec, bio));
    196		else if (bio_op(bio) == REQ_OP_WRITE && !(bio->bi_opf & REQ_PREFLUSH)) {
    197			block2 = __sector_to_block(ec, bio_end_sector(bio));
    198			if (__block_mod(bio->bi_iter.bi_sector, ec->u_bs))
    199				dm_bufio_prefetch(ec->bufio, block1, 1);
    200			if (__block_mod(bio_end_sector(bio), ec->u_bs) && block2 != block1)
    201				dm_bufio_prefetch(ec->bufio, block2, 1);
    202		}
    203	}
    204
    205	bio_list_for_each(bio, &bios) {
    206		r = -EIO;
    207		if (bio_op(bio) == REQ_OP_READ)
    208			r = __ebs_rw_bio(ec, READ, bio);
    209		else if (bio_op(bio) == REQ_OP_WRITE) {
    210			write = true;
    211			r = __ebs_rw_bio(ec, WRITE, bio);
    212		} else if (bio_op(bio) == REQ_OP_DISCARD) {
    213			__ebs_forget_bio(ec, bio);
    214			r = __ebs_discard_bio(ec, bio);
    215		}
    216
    217		if (r < 0)
    218			bio->bi_status = errno_to_blk_status(r);
    219	}
    220
    221	/*
    222	 * We write dirty buffers after processing I/O on them
    223	 * but before we endio thus addressing REQ_FUA/REQ_SYNC.
    224	 */
    225	r = write ? dm_bufio_write_dirty_buffers(ec->bufio) : 0;
    226
    227	while ((bio = bio_list_pop(&bios))) {
    228		/* Any other request is endioed. */
    229		if (unlikely(r && bio_op(bio) == REQ_OP_WRITE))
    230			bio_io_error(bio);
    231		else
    232			bio_endio(bio);
    233	}
    234}
    235
    236/*
    237 * Construct an emulated block size mapping: <dev_path> <offset> <ebs> [<ubs>]
    238 *
    239 * <dev_path>: path of the underlying device
    240 * <offset>: offset in 512 bytes sectors into <dev_path>
    241 * <ebs>: emulated block size in units of 512 bytes exposed to the upper layer
    242 * [<ubs>]: underlying block size in units of 512 bytes imposed on the lower layer;
    243 * 	    optional, if not supplied, retrieve logical block size from underlying device
    244 */
    245static int ebs_ctr(struct dm_target *ti, unsigned int argc, char **argv)
    246{
    247	int r;
    248	unsigned short tmp1;
    249	unsigned long long tmp;
    250	char dummy;
    251	struct ebs_c *ec;
    252
    253	if (argc < 3 || argc > 4) {
    254		ti->error = "Invalid argument count";
    255		return -EINVAL;
    256	}
    257
    258	ec = ti->private = kzalloc(sizeof(*ec), GFP_KERNEL);
    259	if (!ec) {
    260		ti->error = "Cannot allocate ebs context";
    261		return -ENOMEM;
    262	}
    263
    264	r = -EINVAL;
    265	if (sscanf(argv[1], "%llu%c", &tmp, &dummy) != 1 ||
    266	    tmp != (sector_t)tmp ||
    267	    (sector_t)tmp >= ti->len) {
    268		ti->error = "Invalid device offset sector";
    269		goto bad;
    270	}
    271	ec->start = tmp;
    272
    273	if (sscanf(argv[2], "%hu%c", &tmp1, &dummy) != 1 ||
    274	    !__ebs_check_bs(tmp1) ||
    275	    to_bytes(tmp1) > PAGE_SIZE) {
    276		ti->error = "Invalid emulated block size";
    277		goto bad;
    278	}
    279	ec->e_bs = tmp1;
    280
    281	if (argc > 3) {
    282		if (sscanf(argv[3], "%hu%c", &tmp1, &dummy) != 1 || !__ebs_check_bs(tmp1)) {
    283			ti->error = "Invalid underlying block size";
    284			goto bad;
    285		}
    286		ec->u_bs = tmp1;
    287		ec->u_bs_set = true;
    288	} else
    289		ec->u_bs_set = false;
    290
    291	r = dm_get_device(ti, argv[0], dm_table_get_mode(ti->table), &ec->dev);
    292	if (r) {
    293		ti->error = "Device lookup failed";
    294		ec->dev = NULL;
    295		goto bad;
    296	}
    297
    298	r = -EINVAL;
    299	if (!ec->u_bs_set) {
    300		ec->u_bs = to_sector(bdev_logical_block_size(ec->dev->bdev));
    301		if (!__ebs_check_bs(ec->u_bs)) {
    302			ti->error = "Invalid retrieved underlying block size";
    303			goto bad;
    304		}
    305	}
    306
    307	if (!ec->u_bs_set && ec->e_bs == ec->u_bs)
    308		DMINFO("Emulation superfluous: emulated equal to underlying block size");
    309
    310	if (__block_mod(ec->start, ec->u_bs)) {
    311		ti->error = "Device offset must be multiple of underlying block size";
    312		goto bad;
    313	}
    314
    315	ec->bufio = dm_bufio_client_create(ec->dev->bdev, to_bytes(ec->u_bs), 1, 0, NULL, NULL);
    316	if (IS_ERR(ec->bufio)) {
    317		ti->error = "Cannot create dm bufio client";
    318		r = PTR_ERR(ec->bufio);
    319		ec->bufio = NULL;
    320		goto bad;
    321	}
    322
    323	ec->wq = alloc_ordered_workqueue("dm-" DM_MSG_PREFIX, WQ_MEM_RECLAIM);
    324	if (!ec->wq) {
    325		ti->error = "Cannot create dm-" DM_MSG_PREFIX " workqueue";
    326		r = -ENOMEM;
    327		goto bad;
    328	}
    329
    330	ec->block_shift = __ffs(ec->u_bs);
    331	INIT_WORK(&ec->ws, &__ebs_process_bios);
    332	bio_list_init(&ec->bios_in);
    333	spin_lock_init(&ec->lock);
    334
    335	ti->num_flush_bios = 1;
    336	ti->num_discard_bios = 1;
    337	ti->num_secure_erase_bios = 0;
    338	ti->num_write_zeroes_bios = 0;
    339	return 0;
    340bad:
    341	ebs_dtr(ti);
    342	return r;
    343}
    344
    345static void ebs_dtr(struct dm_target *ti)
    346{
    347	struct ebs_c *ec = ti->private;
    348
    349	if (ec->wq)
    350		destroy_workqueue(ec->wq);
    351	if (ec->bufio)
    352		dm_bufio_client_destroy(ec->bufio);
    353	if (ec->dev)
    354		dm_put_device(ti, ec->dev);
    355	kfree(ec);
    356}
    357
    358static int ebs_map(struct dm_target *ti, struct bio *bio)
    359{
    360	struct ebs_c *ec = ti->private;
    361
    362	bio_set_dev(bio, ec->dev->bdev);
    363	bio->bi_iter.bi_sector = ec->start + dm_target_offset(ti, bio->bi_iter.bi_sector);
    364
    365	if (unlikely(bio_op(bio) == REQ_OP_FLUSH))
    366		return DM_MAPIO_REMAPPED;
    367	/*
    368	 * Only queue for bufio processing in case of partial or overlapping buffers
    369	 * -or-
    370	 * emulation with ebs == ubs aiming for tests of dm-bufio overhead.
    371	 */
    372	if (likely(__block_mod(bio->bi_iter.bi_sector, ec->u_bs) ||
    373		   __block_mod(bio_end_sector(bio), ec->u_bs) ||
    374		   ec->e_bs == ec->u_bs)) {
    375		spin_lock_irq(&ec->lock);
    376		bio_list_add(&ec->bios_in, bio);
    377		spin_unlock_irq(&ec->lock);
    378
    379		queue_work(ec->wq, &ec->ws);
    380
    381		return DM_MAPIO_SUBMITTED;
    382	}
    383
    384	/* Forget any buffer content relative to this direct backing device I/O. */
    385	__ebs_forget_bio(ec, bio);
    386
    387	return DM_MAPIO_REMAPPED;
    388}
    389
    390static void ebs_status(struct dm_target *ti, status_type_t type,
    391		       unsigned status_flags, char *result, unsigned maxlen)
    392{
    393	struct ebs_c *ec = ti->private;
    394
    395	switch (type) {
    396	case STATUSTYPE_INFO:
    397		*result = '\0';
    398		break;
    399	case STATUSTYPE_TABLE:
    400		snprintf(result, maxlen, ec->u_bs_set ? "%s %llu %u %u" : "%s %llu %u",
    401			 ec->dev->name, (unsigned long long) ec->start, ec->e_bs, ec->u_bs);
    402		break;
    403	case STATUSTYPE_IMA:
    404		*result = '\0';
    405		break;
    406	}
    407}
    408
    409static int ebs_prepare_ioctl(struct dm_target *ti, struct block_device **bdev)
    410{
    411	struct ebs_c *ec = ti->private;
    412	struct dm_dev *dev = ec->dev;
    413
    414	/*
    415	 * Only pass ioctls through if the device sizes match exactly.
    416	 */
    417	*bdev = dev->bdev;
    418	return !!(ec->start || ti->len != bdev_nr_sectors(dev->bdev));
    419}
    420
    421static void ebs_io_hints(struct dm_target *ti, struct queue_limits *limits)
    422{
    423	struct ebs_c *ec = ti->private;
    424
    425	limits->logical_block_size = to_bytes(ec->e_bs);
    426	limits->physical_block_size = to_bytes(ec->u_bs);
    427	limits->alignment_offset = limits->physical_block_size;
    428	blk_limits_io_min(limits, limits->logical_block_size);
    429}
    430
    431static int ebs_iterate_devices(struct dm_target *ti,
    432				  iterate_devices_callout_fn fn, void *data)
    433{
    434	struct ebs_c *ec = ti->private;
    435
    436	return fn(ti, ec->dev, ec->start, ti->len, data);
    437}
    438
    439static struct target_type ebs_target = {
    440	.name		 = "ebs",
    441	.version	 = {1, 0, 1},
    442	.features	 = DM_TARGET_PASSES_INTEGRITY,
    443	.module		 = THIS_MODULE,
    444	.ctr		 = ebs_ctr,
    445	.dtr		 = ebs_dtr,
    446	.map		 = ebs_map,
    447	.status		 = ebs_status,
    448	.io_hints	 = ebs_io_hints,
    449	.prepare_ioctl	 = ebs_prepare_ioctl,
    450	.iterate_devices = ebs_iterate_devices,
    451};
    452
    453static int __init dm_ebs_init(void)
    454{
    455	int r = dm_register_target(&ebs_target);
    456
    457	if (r < 0)
    458		DMERR("register failed %d", r);
    459
    460	return r;
    461}
    462
    463static void dm_ebs_exit(void)
    464{
    465	dm_unregister_target(&ebs_target);
    466}
    467
    468module_init(dm_ebs_init);
    469module_exit(dm_ebs_exit);
    470
    471MODULE_AUTHOR("Heinz Mauelshagen <dm-devel@redhat.com>");
    472MODULE_DESCRIPTION(DM_NAME " emulated block size target");
    473MODULE_LICENSE("GPL");