cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

map_benchmark.c (8834B)


      1// SPDX-License-Identifier: GPL-2.0-only
      2/*
      3 * Copyright (C) 2020 HiSilicon Limited.
      4 */
      5
      6#define pr_fmt(fmt)	KBUILD_MODNAME ": " fmt
      7
      8#include <linux/debugfs.h>
      9#include <linux/delay.h>
     10#include <linux/device.h>
     11#include <linux/dma-mapping.h>
     12#include <linux/kernel.h>
     13#include <linux/kthread.h>
     14#include <linux/map_benchmark.h>
     15#include <linux/math64.h>
     16#include <linux/module.h>
     17#include <linux/pci.h>
     18#include <linux/platform_device.h>
     19#include <linux/slab.h>
     20#include <linux/timekeeping.h>
     21
     22struct map_benchmark_data {
     23	struct map_benchmark bparam;
     24	struct device *dev;
     25	struct dentry  *debugfs;
     26	enum dma_data_direction dir;
     27	atomic64_t sum_map_100ns;
     28	atomic64_t sum_unmap_100ns;
     29	atomic64_t sum_sq_map;
     30	atomic64_t sum_sq_unmap;
     31	atomic64_t loops;
     32};
     33
     34static int map_benchmark_thread(void *data)
     35{
     36	void *buf;
     37	dma_addr_t dma_addr;
     38	struct map_benchmark_data *map = data;
     39	int npages = map->bparam.granule;
     40	u64 size = npages * PAGE_SIZE;
     41	int ret = 0;
     42
     43	buf = alloc_pages_exact(size, GFP_KERNEL);
     44	if (!buf)
     45		return -ENOMEM;
     46
     47	while (!kthread_should_stop())  {
     48		u64 map_100ns, unmap_100ns, map_sq, unmap_sq;
     49		ktime_t map_stime, map_etime, unmap_stime, unmap_etime;
     50		ktime_t map_delta, unmap_delta;
     51
     52		/*
     53		 * for a non-coherent device, if we don't stain them in the
     54		 * cache, this will give an underestimate of the real-world
     55		 * overhead of BIDIRECTIONAL or TO_DEVICE mappings;
     56		 * 66 means evertything goes well! 66 is lucky.
     57		 */
     58		if (map->dir != DMA_FROM_DEVICE)
     59			memset(buf, 0x66, size);
     60
     61		map_stime = ktime_get();
     62		dma_addr = dma_map_single(map->dev, buf, size, map->dir);
     63		if (unlikely(dma_mapping_error(map->dev, dma_addr))) {
     64			pr_err("dma_map_single failed on %s\n",
     65				dev_name(map->dev));
     66			ret = -ENOMEM;
     67			goto out;
     68		}
     69		map_etime = ktime_get();
     70		map_delta = ktime_sub(map_etime, map_stime);
     71
     72		/* Pretend DMA is transmitting */
     73		ndelay(map->bparam.dma_trans_ns);
     74
     75		unmap_stime = ktime_get();
     76		dma_unmap_single(map->dev, dma_addr, size, map->dir);
     77		unmap_etime = ktime_get();
     78		unmap_delta = ktime_sub(unmap_etime, unmap_stime);
     79
     80		/* calculate sum and sum of squares */
     81
     82		map_100ns = div64_ul(map_delta,  100);
     83		unmap_100ns = div64_ul(unmap_delta, 100);
     84		map_sq = map_100ns * map_100ns;
     85		unmap_sq = unmap_100ns * unmap_100ns;
     86
     87		atomic64_add(map_100ns, &map->sum_map_100ns);
     88		atomic64_add(unmap_100ns, &map->sum_unmap_100ns);
     89		atomic64_add(map_sq, &map->sum_sq_map);
     90		atomic64_add(unmap_sq, &map->sum_sq_unmap);
     91		atomic64_inc(&map->loops);
     92	}
     93
     94out:
     95	free_pages_exact(buf, size);
     96	return ret;
     97}
     98
     99static int do_map_benchmark(struct map_benchmark_data *map)
    100{
    101	struct task_struct **tsk;
    102	int threads = map->bparam.threads;
    103	int node = map->bparam.node;
    104	const cpumask_t *cpu_mask = cpumask_of_node(node);
    105	u64 loops;
    106	int ret = 0;
    107	int i;
    108
    109	tsk = kmalloc_array(threads, sizeof(*tsk), GFP_KERNEL);
    110	if (!tsk)
    111		return -ENOMEM;
    112
    113	get_device(map->dev);
    114
    115	for (i = 0; i < threads; i++) {
    116		tsk[i] = kthread_create_on_node(map_benchmark_thread, map,
    117				map->bparam.node, "dma-map-benchmark/%d", i);
    118		if (IS_ERR(tsk[i])) {
    119			pr_err("create dma_map thread failed\n");
    120			ret = PTR_ERR(tsk[i]);
    121			goto out;
    122		}
    123
    124		if (node != NUMA_NO_NODE)
    125			kthread_bind_mask(tsk[i], cpu_mask);
    126	}
    127
    128	/* clear the old value in the previous benchmark */
    129	atomic64_set(&map->sum_map_100ns, 0);
    130	atomic64_set(&map->sum_unmap_100ns, 0);
    131	atomic64_set(&map->sum_sq_map, 0);
    132	atomic64_set(&map->sum_sq_unmap, 0);
    133	atomic64_set(&map->loops, 0);
    134
    135	for (i = 0; i < threads; i++) {
    136		get_task_struct(tsk[i]);
    137		wake_up_process(tsk[i]);
    138	}
    139
    140	msleep_interruptible(map->bparam.seconds * 1000);
    141
    142	/* wait for the completion of benchmark threads */
    143	for (i = 0; i < threads; i++) {
    144		ret = kthread_stop(tsk[i]);
    145		if (ret)
    146			goto out;
    147	}
    148
    149	loops = atomic64_read(&map->loops);
    150	if (likely(loops > 0)) {
    151		u64 map_variance, unmap_variance;
    152		u64 sum_map = atomic64_read(&map->sum_map_100ns);
    153		u64 sum_unmap = atomic64_read(&map->sum_unmap_100ns);
    154		u64 sum_sq_map = atomic64_read(&map->sum_sq_map);
    155		u64 sum_sq_unmap = atomic64_read(&map->sum_sq_unmap);
    156
    157		/* average latency */
    158		map->bparam.avg_map_100ns = div64_u64(sum_map, loops);
    159		map->bparam.avg_unmap_100ns = div64_u64(sum_unmap, loops);
    160
    161		/* standard deviation of latency */
    162		map_variance = div64_u64(sum_sq_map, loops) -
    163				map->bparam.avg_map_100ns *
    164				map->bparam.avg_map_100ns;
    165		unmap_variance = div64_u64(sum_sq_unmap, loops) -
    166				map->bparam.avg_unmap_100ns *
    167				map->bparam.avg_unmap_100ns;
    168		map->bparam.map_stddev = int_sqrt64(map_variance);
    169		map->bparam.unmap_stddev = int_sqrt64(unmap_variance);
    170	}
    171
    172out:
    173	for (i = 0; i < threads; i++)
    174		put_task_struct(tsk[i]);
    175	put_device(map->dev);
    176	kfree(tsk);
    177	return ret;
    178}
    179
    180static long map_benchmark_ioctl(struct file *file, unsigned int cmd,
    181		unsigned long arg)
    182{
    183	struct map_benchmark_data *map = file->private_data;
    184	void __user *argp = (void __user *)arg;
    185	u64 old_dma_mask;
    186	int ret;
    187
    188	if (copy_from_user(&map->bparam, argp, sizeof(map->bparam)))
    189		return -EFAULT;
    190
    191	switch (cmd) {
    192	case DMA_MAP_BENCHMARK:
    193		if (map->bparam.threads == 0 ||
    194		    map->bparam.threads > DMA_MAP_MAX_THREADS) {
    195			pr_err("invalid thread number\n");
    196			return -EINVAL;
    197		}
    198
    199		if (map->bparam.seconds == 0 ||
    200		    map->bparam.seconds > DMA_MAP_MAX_SECONDS) {
    201			pr_err("invalid duration seconds\n");
    202			return -EINVAL;
    203		}
    204
    205		if (map->bparam.dma_trans_ns > DMA_MAP_MAX_TRANS_DELAY) {
    206			pr_err("invalid transmission delay\n");
    207			return -EINVAL;
    208		}
    209
    210		if (map->bparam.node != NUMA_NO_NODE &&
    211		    !node_possible(map->bparam.node)) {
    212			pr_err("invalid numa node\n");
    213			return -EINVAL;
    214		}
    215
    216		if (map->bparam.granule < 1 || map->bparam.granule > 1024) {
    217			pr_err("invalid granule size\n");
    218			return -EINVAL;
    219		}
    220
    221		switch (map->bparam.dma_dir) {
    222		case DMA_MAP_BIDIRECTIONAL:
    223			map->dir = DMA_BIDIRECTIONAL;
    224			break;
    225		case DMA_MAP_FROM_DEVICE:
    226			map->dir = DMA_FROM_DEVICE;
    227			break;
    228		case DMA_MAP_TO_DEVICE:
    229			map->dir = DMA_TO_DEVICE;
    230			break;
    231		default:
    232			pr_err("invalid DMA direction\n");
    233			return -EINVAL;
    234		}
    235
    236		old_dma_mask = dma_get_mask(map->dev);
    237
    238		ret = dma_set_mask(map->dev,
    239				   DMA_BIT_MASK(map->bparam.dma_bits));
    240		if (ret) {
    241			pr_err("failed to set dma_mask on device %s\n",
    242				dev_name(map->dev));
    243			return -EINVAL;
    244		}
    245
    246		ret = do_map_benchmark(map);
    247
    248		/*
    249		 * restore the original dma_mask as many devices' dma_mask are
    250		 * set by architectures, acpi, busses. When we bind them back
    251		 * to their original drivers, those drivers shouldn't see
    252		 * dma_mask changed by benchmark
    253		 */
    254		dma_set_mask(map->dev, old_dma_mask);
    255		break;
    256	default:
    257		return -EINVAL;
    258	}
    259
    260	if (copy_to_user(argp, &map->bparam, sizeof(map->bparam)))
    261		return -EFAULT;
    262
    263	return ret;
    264}
    265
    266static const struct file_operations map_benchmark_fops = {
    267	.open			= simple_open,
    268	.unlocked_ioctl		= map_benchmark_ioctl,
    269};
    270
    271static void map_benchmark_remove_debugfs(void *data)
    272{
    273	struct map_benchmark_data *map = (struct map_benchmark_data *)data;
    274
    275	debugfs_remove(map->debugfs);
    276}
    277
    278static int __map_benchmark_probe(struct device *dev)
    279{
    280	struct dentry *entry;
    281	struct map_benchmark_data *map;
    282	int ret;
    283
    284	map = devm_kzalloc(dev, sizeof(*map), GFP_KERNEL);
    285	if (!map)
    286		return -ENOMEM;
    287	map->dev = dev;
    288
    289	ret = devm_add_action(dev, map_benchmark_remove_debugfs, map);
    290	if (ret) {
    291		pr_err("Can't add debugfs remove action\n");
    292		return ret;
    293	}
    294
    295	/*
    296	 * we only permit a device bound with this driver, 2nd probe
    297	 * will fail
    298	 */
    299	entry = debugfs_create_file("dma_map_benchmark", 0600, NULL, map,
    300			&map_benchmark_fops);
    301	if (IS_ERR(entry))
    302		return PTR_ERR(entry);
    303	map->debugfs = entry;
    304
    305	return 0;
    306}
    307
    308static int map_benchmark_platform_probe(struct platform_device *pdev)
    309{
    310	return __map_benchmark_probe(&pdev->dev);
    311}
    312
    313static struct platform_driver map_benchmark_platform_driver = {
    314	.driver		= {
    315		.name	= "dma_map_benchmark",
    316	},
    317	.probe = map_benchmark_platform_probe,
    318};
    319
    320static int
    321map_benchmark_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
    322{
    323	return __map_benchmark_probe(&pdev->dev);
    324}
    325
    326static struct pci_driver map_benchmark_pci_driver = {
    327	.name	= "dma_map_benchmark",
    328	.probe	= map_benchmark_pci_probe,
    329};
    330
    331static int __init map_benchmark_init(void)
    332{
    333	int ret;
    334
    335	ret = pci_register_driver(&map_benchmark_pci_driver);
    336	if (ret)
    337		return ret;
    338
    339	ret = platform_driver_register(&map_benchmark_platform_driver);
    340	if (ret) {
    341		pci_unregister_driver(&map_benchmark_pci_driver);
    342		return ret;
    343	}
    344
    345	return 0;
    346}
    347
    348static void __exit map_benchmark_cleanup(void)
    349{
    350	platform_driver_unregister(&map_benchmark_platform_driver);
    351	pci_unregister_driver(&map_benchmark_pci_driver);
    352}
    353
    354module_init(map_benchmark_init);
    355module_exit(map_benchmark_cleanup);
    356
    357MODULE_AUTHOR("Barry Song <song.bao.hua@hisilicon.com>");
    358MODULE_DESCRIPTION("dma_map benchmark driver");
    359MODULE_LICENSE("GPL");