cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

dm-switch.c (15714B)


      1/*
      2 * Copyright (C) 2010-2012 by Dell Inc.  All rights reserved.
      3 * Copyright (C) 2011-2013 Red Hat, Inc.
      4 *
      5 * This file is released under the GPL.
      6 *
      7 * dm-switch is a device-mapper target that maps IO to underlying block
      8 * devices efficiently when there are a large number of fixed-sized
      9 * address regions but there is no simple pattern to allow for a compact
     10 * mapping representation such as dm-stripe.
     11 */
     12
     13#include <linux/device-mapper.h>
     14
     15#include <linux/module.h>
     16#include <linux/init.h>
     17#include <linux/vmalloc.h>
     18
     19#define DM_MSG_PREFIX "switch"
     20
     21/*
     22 * One region_table_slot_t holds <region_entries_per_slot> region table
     23 * entries each of which is <region_table_entry_bits> in size.
     24 */
     25typedef unsigned long region_table_slot_t;
     26
     27/*
     28 * A device with the offset to its start sector.
     29 */
     30struct switch_path {
     31	struct dm_dev *dmdev;
     32	sector_t start;
     33};
     34
     35/*
     36 * Context block for a dm switch device.
     37 */
     38struct switch_ctx {
     39	struct dm_target *ti;
     40
     41	unsigned nr_paths;		/* Number of paths in path_list. */
     42
     43	unsigned region_size;		/* Region size in 512-byte sectors */
     44	unsigned long nr_regions;	/* Number of regions making up the device */
     45	signed char region_size_bits;	/* log2 of region_size or -1 */
     46
     47	unsigned char region_table_entry_bits;	/* Number of bits in one region table entry */
     48	unsigned char region_entries_per_slot;	/* Number of entries in one region table slot */
     49	signed char region_entries_per_slot_bits;	/* log2 of region_entries_per_slot or -1 */
     50
     51	region_table_slot_t *region_table;	/* Region table */
     52
     53	/*
     54	 * Array of dm devices to switch between.
     55	 */
     56	struct switch_path path_list[];
     57};
     58
     59static struct switch_ctx *alloc_switch_ctx(struct dm_target *ti, unsigned nr_paths,
     60					   unsigned region_size)
     61{
     62	struct switch_ctx *sctx;
     63
     64	sctx = kzalloc(struct_size(sctx, path_list, nr_paths), GFP_KERNEL);
     65	if (!sctx)
     66		return NULL;
     67
     68	sctx->ti = ti;
     69	sctx->region_size = region_size;
     70
     71	ti->private = sctx;
     72
     73	return sctx;
     74}
     75
     76static int alloc_region_table(struct dm_target *ti, unsigned nr_paths)
     77{
     78	struct switch_ctx *sctx = ti->private;
     79	sector_t nr_regions = ti->len;
     80	sector_t nr_slots;
     81
     82	if (!(sctx->region_size & (sctx->region_size - 1)))
     83		sctx->region_size_bits = __ffs(sctx->region_size);
     84	else
     85		sctx->region_size_bits = -1;
     86
     87	sctx->region_table_entry_bits = 1;
     88	while (sctx->region_table_entry_bits < sizeof(region_table_slot_t) * 8 &&
     89	       (region_table_slot_t)1 << sctx->region_table_entry_bits < nr_paths)
     90		sctx->region_table_entry_bits++;
     91
     92	sctx->region_entries_per_slot = (sizeof(region_table_slot_t) * 8) / sctx->region_table_entry_bits;
     93	if (!(sctx->region_entries_per_slot & (sctx->region_entries_per_slot - 1)))
     94		sctx->region_entries_per_slot_bits = __ffs(sctx->region_entries_per_slot);
     95	else
     96		sctx->region_entries_per_slot_bits = -1;
     97
     98	if (sector_div(nr_regions, sctx->region_size))
     99		nr_regions++;
    100
    101	if (nr_regions >= ULONG_MAX) {
    102		ti->error = "Region table too large";
    103		return -EINVAL;
    104	}
    105	sctx->nr_regions = nr_regions;
    106
    107	nr_slots = nr_regions;
    108	if (sector_div(nr_slots, sctx->region_entries_per_slot))
    109		nr_slots++;
    110
    111	if (nr_slots > ULONG_MAX / sizeof(region_table_slot_t)) {
    112		ti->error = "Region table too large";
    113		return -EINVAL;
    114	}
    115
    116	sctx->region_table = vmalloc(array_size(nr_slots,
    117						sizeof(region_table_slot_t)));
    118	if (!sctx->region_table) {
    119		ti->error = "Cannot allocate region table";
    120		return -ENOMEM;
    121	}
    122
    123	return 0;
    124}
    125
    126static void switch_get_position(struct switch_ctx *sctx, unsigned long region_nr,
    127				unsigned long *region_index, unsigned *bit)
    128{
    129	if (sctx->region_entries_per_slot_bits >= 0) {
    130		*region_index = region_nr >> sctx->region_entries_per_slot_bits;
    131		*bit = region_nr & (sctx->region_entries_per_slot - 1);
    132	} else {
    133		*region_index = region_nr / sctx->region_entries_per_slot;
    134		*bit = region_nr % sctx->region_entries_per_slot;
    135	}
    136
    137	*bit *= sctx->region_table_entry_bits;
    138}
    139
    140static unsigned switch_region_table_read(struct switch_ctx *sctx, unsigned long region_nr)
    141{
    142	unsigned long region_index;
    143	unsigned bit;
    144
    145	switch_get_position(sctx, region_nr, &region_index, &bit);
    146
    147	return (READ_ONCE(sctx->region_table[region_index]) >> bit) &
    148		((1 << sctx->region_table_entry_bits) - 1);
    149}
    150
    151/*
    152 * Find which path to use at given offset.
    153 */
    154static unsigned switch_get_path_nr(struct switch_ctx *sctx, sector_t offset)
    155{
    156	unsigned path_nr;
    157	sector_t p;
    158
    159	p = offset;
    160	if (sctx->region_size_bits >= 0)
    161		p >>= sctx->region_size_bits;
    162	else
    163		sector_div(p, sctx->region_size);
    164
    165	path_nr = switch_region_table_read(sctx, p);
    166
    167	/* This can only happen if the processor uses non-atomic stores. */
    168	if (unlikely(path_nr >= sctx->nr_paths))
    169		path_nr = 0;
    170
    171	return path_nr;
    172}
    173
    174static void switch_region_table_write(struct switch_ctx *sctx, unsigned long region_nr,
    175				      unsigned value)
    176{
    177	unsigned long region_index;
    178	unsigned bit;
    179	region_table_slot_t pte;
    180
    181	switch_get_position(sctx, region_nr, &region_index, &bit);
    182
    183	pte = sctx->region_table[region_index];
    184	pte &= ~((((region_table_slot_t)1 << sctx->region_table_entry_bits) - 1) << bit);
    185	pte |= (region_table_slot_t)value << bit;
    186	sctx->region_table[region_index] = pte;
    187}
    188
    189/*
    190 * Fill the region table with an initial round robin pattern.
    191 */
    192static void initialise_region_table(struct switch_ctx *sctx)
    193{
    194	unsigned path_nr = 0;
    195	unsigned long region_nr;
    196
    197	for (region_nr = 0; region_nr < sctx->nr_regions; region_nr++) {
    198		switch_region_table_write(sctx, region_nr, path_nr);
    199		if (++path_nr >= sctx->nr_paths)
    200			path_nr = 0;
    201	}
    202}
    203
    204static int parse_path(struct dm_arg_set *as, struct dm_target *ti)
    205{
    206	struct switch_ctx *sctx = ti->private;
    207	unsigned long long start;
    208	int r;
    209
    210	r = dm_get_device(ti, dm_shift_arg(as), dm_table_get_mode(ti->table),
    211			  &sctx->path_list[sctx->nr_paths].dmdev);
    212	if (r) {
    213		ti->error = "Device lookup failed";
    214		return r;
    215	}
    216
    217	if (kstrtoull(dm_shift_arg(as), 10, &start) || start != (sector_t)start) {
    218		ti->error = "Invalid device starting offset";
    219		dm_put_device(ti, sctx->path_list[sctx->nr_paths].dmdev);
    220		return -EINVAL;
    221	}
    222
    223	sctx->path_list[sctx->nr_paths].start = start;
    224
    225	sctx->nr_paths++;
    226
    227	return 0;
    228}
    229
    230/*
    231 * Destructor: Don't free the dm_target, just the ti->private data (if any).
    232 */
    233static void switch_dtr(struct dm_target *ti)
    234{
    235	struct switch_ctx *sctx = ti->private;
    236
    237	while (sctx->nr_paths--)
    238		dm_put_device(ti, sctx->path_list[sctx->nr_paths].dmdev);
    239
    240	vfree(sctx->region_table);
    241	kfree(sctx);
    242}
    243
    244/*
    245 * Constructor arguments:
    246 *   <num_paths> <region_size> <num_optional_args> [<optional_args>...]
    247 *   [<dev_path> <offset>]+
    248 *
    249 * Optional args are to allow for future extension: currently this
    250 * parameter must be 0.
    251 */
    252static int switch_ctr(struct dm_target *ti, unsigned argc, char **argv)
    253{
    254	static const struct dm_arg _args[] = {
    255		{1, (KMALLOC_MAX_SIZE - sizeof(struct switch_ctx)) / sizeof(struct switch_path), "Invalid number of paths"},
    256		{1, UINT_MAX, "Invalid region size"},
    257		{0, 0, "Invalid number of optional args"},
    258	};
    259
    260	struct switch_ctx *sctx;
    261	struct dm_arg_set as;
    262	unsigned nr_paths, region_size, nr_optional_args;
    263	int r;
    264
    265	as.argc = argc;
    266	as.argv = argv;
    267
    268	r = dm_read_arg(_args, &as, &nr_paths, &ti->error);
    269	if (r)
    270		return -EINVAL;
    271
    272	r = dm_read_arg(_args + 1, &as, &region_size, &ti->error);
    273	if (r)
    274		return r;
    275
    276	r = dm_read_arg_group(_args + 2, &as, &nr_optional_args, &ti->error);
    277	if (r)
    278		return r;
    279	/* parse optional arguments here, if we add any */
    280
    281	if (as.argc != nr_paths * 2) {
    282		ti->error = "Incorrect number of path arguments";
    283		return -EINVAL;
    284	}
    285
    286	sctx = alloc_switch_ctx(ti, nr_paths, region_size);
    287	if (!sctx) {
    288		ti->error = "Cannot allocate redirection context";
    289		return -ENOMEM;
    290	}
    291
    292	r = dm_set_target_max_io_len(ti, region_size);
    293	if (r)
    294		goto error;
    295
    296	while (as.argc) {
    297		r = parse_path(&as, ti);
    298		if (r)
    299			goto error;
    300	}
    301
    302	r = alloc_region_table(ti, nr_paths);
    303	if (r)
    304		goto error;
    305
    306	initialise_region_table(sctx);
    307
    308	/* For UNMAP, sending the request down any path is sufficient */
    309	ti->num_discard_bios = 1;
    310
    311	return 0;
    312
    313error:
    314	switch_dtr(ti);
    315
    316	return r;
    317}
    318
    319static int switch_map(struct dm_target *ti, struct bio *bio)
    320{
    321	struct switch_ctx *sctx = ti->private;
    322	sector_t offset = dm_target_offset(ti, bio->bi_iter.bi_sector);
    323	unsigned path_nr = switch_get_path_nr(sctx, offset);
    324
    325	bio_set_dev(bio, sctx->path_list[path_nr].dmdev->bdev);
    326	bio->bi_iter.bi_sector = sctx->path_list[path_nr].start + offset;
    327
    328	return DM_MAPIO_REMAPPED;
    329}
    330
    331/*
    332 * We need to parse hex numbers in the message as quickly as possible.
    333 *
    334 * This table-based hex parser improves performance.
    335 * It improves a time to load 1000000 entries compared to the condition-based
    336 * parser.
    337 *		table-based parser	condition-based parser
    338 * PA-RISC	0.29s			0.31s
    339 * Opteron	0.0495s			0.0498s
    340 */
    341static const unsigned char hex_table[256] = {
    342255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
    343255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
    344255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
    3450, 1, 2, 3, 4, 5, 6, 7, 8, 9, 255, 255, 255, 255, 255, 255,
    346255, 10, 11, 12, 13, 14, 15, 255, 255, 255, 255, 255, 255, 255, 255, 255,
    347255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
    348255, 10, 11, 12, 13, 14, 15, 255, 255, 255, 255, 255, 255, 255, 255, 255,
    349255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
    350255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
    351255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
    352255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
    353255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
    354255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
    355255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
    356255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
    357255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255
    358};
    359
    360static __always_inline unsigned long parse_hex(const char **string)
    361{
    362	unsigned char d;
    363	unsigned long r = 0;
    364
    365	while ((d = hex_table[(unsigned char)**string]) < 16) {
    366		r = (r << 4) | d;
    367		(*string)++;
    368	}
    369
    370	return r;
    371}
    372
    373static int process_set_region_mappings(struct switch_ctx *sctx,
    374				       unsigned argc, char **argv)
    375{
    376	unsigned i;
    377	unsigned long region_index = 0;
    378
    379	for (i = 1; i < argc; i++) {
    380		unsigned long path_nr;
    381		const char *string = argv[i];
    382
    383		if ((*string & 0xdf) == 'R') {
    384			unsigned long cycle_length, num_write;
    385
    386			string++;
    387			if (unlikely(*string == ',')) {
    388				DMWARN("invalid set_region_mappings argument: '%s'", argv[i]);
    389				return -EINVAL;
    390			}
    391			cycle_length = parse_hex(&string);
    392			if (unlikely(*string != ',')) {
    393				DMWARN("invalid set_region_mappings argument: '%s'", argv[i]);
    394				return -EINVAL;
    395			}
    396			string++;
    397			if (unlikely(!*string)) {
    398				DMWARN("invalid set_region_mappings argument: '%s'", argv[i]);
    399				return -EINVAL;
    400			}
    401			num_write = parse_hex(&string);
    402			if (unlikely(*string)) {
    403				DMWARN("invalid set_region_mappings argument: '%s'", argv[i]);
    404				return -EINVAL;
    405			}
    406
    407			if (unlikely(!cycle_length) || unlikely(cycle_length - 1 > region_index)) {
    408				DMWARN("invalid set_region_mappings cycle length: %lu > %lu",
    409				       cycle_length - 1, region_index);
    410				return -EINVAL;
    411			}
    412			if (unlikely(region_index + num_write < region_index) ||
    413			    unlikely(region_index + num_write >= sctx->nr_regions)) {
    414				DMWARN("invalid set_region_mappings region number: %lu + %lu >= %lu",
    415				       region_index, num_write, sctx->nr_regions);
    416				return -EINVAL;
    417			}
    418
    419			while (num_write--) {
    420				region_index++;
    421				path_nr = switch_region_table_read(sctx, region_index - cycle_length);
    422				switch_region_table_write(sctx, region_index, path_nr);
    423			}
    424
    425			continue;
    426		}
    427
    428		if (*string == ':')
    429			region_index++;
    430		else {
    431			region_index = parse_hex(&string);
    432			if (unlikely(*string != ':')) {
    433				DMWARN("invalid set_region_mappings argument: '%s'", argv[i]);
    434				return -EINVAL;
    435			}
    436		}
    437
    438		string++;
    439		if (unlikely(!*string)) {
    440			DMWARN("invalid set_region_mappings argument: '%s'", argv[i]);
    441			return -EINVAL;
    442		}
    443
    444		path_nr = parse_hex(&string);
    445		if (unlikely(*string)) {
    446			DMWARN("invalid set_region_mappings argument: '%s'", argv[i]);
    447			return -EINVAL;
    448		}
    449		if (unlikely(region_index >= sctx->nr_regions)) {
    450			DMWARN("invalid set_region_mappings region number: %lu >= %lu", region_index, sctx->nr_regions);
    451			return -EINVAL;
    452		}
    453		if (unlikely(path_nr >= sctx->nr_paths)) {
    454			DMWARN("invalid set_region_mappings device: %lu >= %u", path_nr, sctx->nr_paths);
    455			return -EINVAL;
    456		}
    457
    458		switch_region_table_write(sctx, region_index, path_nr);
    459	}
    460
    461	return 0;
    462}
    463
    464/*
    465 * Messages are processed one-at-a-time.
    466 *
    467 * Only set_region_mappings is supported.
    468 */
    469static int switch_message(struct dm_target *ti, unsigned argc, char **argv,
    470			  char *result, unsigned maxlen)
    471{
    472	static DEFINE_MUTEX(message_mutex);
    473
    474	struct switch_ctx *sctx = ti->private;
    475	int r = -EINVAL;
    476
    477	mutex_lock(&message_mutex);
    478
    479	if (!strcasecmp(argv[0], "set_region_mappings"))
    480		r = process_set_region_mappings(sctx, argc, argv);
    481	else
    482		DMWARN("Unrecognised message received.");
    483
    484	mutex_unlock(&message_mutex);
    485
    486	return r;
    487}
    488
    489static void switch_status(struct dm_target *ti, status_type_t type,
    490			  unsigned status_flags, char *result, unsigned maxlen)
    491{
    492	struct switch_ctx *sctx = ti->private;
    493	unsigned sz = 0;
    494	int path_nr;
    495
    496	switch (type) {
    497	case STATUSTYPE_INFO:
    498		result[0] = '\0';
    499		break;
    500
    501	case STATUSTYPE_TABLE:
    502		DMEMIT("%u %u 0", sctx->nr_paths, sctx->region_size);
    503		for (path_nr = 0; path_nr < sctx->nr_paths; path_nr++)
    504			DMEMIT(" %s %llu", sctx->path_list[path_nr].dmdev->name,
    505			       (unsigned long long)sctx->path_list[path_nr].start);
    506		break;
    507
    508	case STATUSTYPE_IMA:
    509		result[0] = '\0';
    510		break;
    511	}
    512}
    513
    514/*
    515 * Switch ioctl:
    516 *
    517 * Passthrough all ioctls to the path for sector 0
    518 */
    519static int switch_prepare_ioctl(struct dm_target *ti, struct block_device **bdev)
    520{
    521	struct switch_ctx *sctx = ti->private;
    522	unsigned path_nr;
    523
    524	path_nr = switch_get_path_nr(sctx, 0);
    525
    526	*bdev = sctx->path_list[path_nr].dmdev->bdev;
    527
    528	/*
    529	 * Only pass ioctls through if the device sizes match exactly.
    530	 */
    531	if (ti->len + sctx->path_list[path_nr].start !=
    532	    bdev_nr_sectors((*bdev)))
    533		return 1;
    534	return 0;
    535}
    536
    537static int switch_iterate_devices(struct dm_target *ti,
    538				  iterate_devices_callout_fn fn, void *data)
    539{
    540	struct switch_ctx *sctx = ti->private;
    541	int path_nr;
    542	int r;
    543
    544	for (path_nr = 0; path_nr < sctx->nr_paths; path_nr++) {
    545		r = fn(ti, sctx->path_list[path_nr].dmdev,
    546			 sctx->path_list[path_nr].start, ti->len, data);
    547		if (r)
    548			return r;
    549	}
    550
    551	return 0;
    552}
    553
    554static struct target_type switch_target = {
    555	.name = "switch",
    556	.version = {1, 1, 0},
    557	.features = DM_TARGET_NOWAIT,
    558	.module = THIS_MODULE,
    559	.ctr = switch_ctr,
    560	.dtr = switch_dtr,
    561	.map = switch_map,
    562	.message = switch_message,
    563	.status = switch_status,
    564	.prepare_ioctl = switch_prepare_ioctl,
    565	.iterate_devices = switch_iterate_devices,
    566};
    567
    568static int __init dm_switch_init(void)
    569{
    570	int r;
    571
    572	r = dm_register_target(&switch_target);
    573	if (r < 0)
    574		DMERR("dm_register_target() failed %d", r);
    575
    576	return r;
    577}
    578
    579static void __exit dm_switch_exit(void)
    580{
    581	dm_unregister_target(&switch_target);
    582}
    583
    584module_init(dm_switch_init);
    585module_exit(dm_switch_exit);
    586
    587MODULE_DESCRIPTION(DM_NAME " dynamic path switching target");
    588MODULE_AUTHOR("Kevin D. O'Kelley <Kevin_OKelley@dell.com>");
    589MODULE_AUTHOR("Narendran Ganapathy <Narendran_Ganapathy@dell.com>");
    590MODULE_AUTHOR("Jim Ramsay <Jim_Ramsay@dell.com>");
    591MODULE_AUTHOR("Mikulas Patocka <mpatocka@redhat.com>");
    592MODULE_LICENSE("GPL");