cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

qemu_fw_cfg.c (25738B)


      1/*
      2 * drivers/firmware/qemu_fw_cfg.c
      3 *
      4 * Copyright 2015 Carnegie Mellon University
      5 *
      6 * Expose entries from QEMU's firmware configuration (fw_cfg) device in
      7 * sysfs (read-only, under "/sys/firmware/qemu_fw_cfg/...").
      8 *
      9 * The fw_cfg device may be instantiated via either an ACPI node (on x86
     10 * and select subsets of aarch64), a Device Tree node (on arm), or using
     11 * a kernel module (or command line) parameter with the following syntax:
     12 *
     13 *      [qemu_fw_cfg.]ioport=<size>@<base>[:<ctrl_off>:<data_off>[:<dma_off>]]
     14 * or
     15 *      [qemu_fw_cfg.]mmio=<size>@<base>[:<ctrl_off>:<data_off>[:<dma_off>]]
     16 *
     17 * where:
     18 *      <size>     := size of ioport or mmio range
     19 *      <base>     := physical base address of ioport or mmio range
     20 *      <ctrl_off> := (optional) offset of control register
     21 *      <data_off> := (optional) offset of data register
     22 *      <dma_off> := (optional) offset of dma register
     23 *
     24 * e.g.:
     25 *      qemu_fw_cfg.ioport=12@0x510:0:1:4	(the default on x86)
     26 * or
     27 *      qemu_fw_cfg.mmio=16@0x9020000:8:0:16	(the default on arm)
     28 */
     29
     30#include <linux/module.h>
     31#include <linux/mod_devicetable.h>
     32#include <linux/platform_device.h>
     33#include <linux/acpi.h>
     34#include <linux/slab.h>
     35#include <linux/io.h>
     36#include <linux/ioport.h>
     37#include <uapi/linux/qemu_fw_cfg.h>
     38#include <linux/delay.h>
     39#include <linux/crash_dump.h>
     40#include <linux/crash_core.h>
     41
     42MODULE_AUTHOR("Gabriel L. Somlo <somlo@cmu.edu>");
     43MODULE_DESCRIPTION("QEMU fw_cfg sysfs support");
     44MODULE_LICENSE("GPL");
     45
     46/* fw_cfg revision attribute, in /sys/firmware/qemu_fw_cfg top-level dir. */
     47static u32 fw_cfg_rev;
     48
     49/* fw_cfg device i/o register addresses */
     50static bool fw_cfg_is_mmio;
     51static phys_addr_t fw_cfg_p_base;
     52static resource_size_t fw_cfg_p_size;
     53static void __iomem *fw_cfg_dev_base;
     54static void __iomem *fw_cfg_reg_ctrl;
     55static void __iomem *fw_cfg_reg_data;
     56static void __iomem *fw_cfg_reg_dma;
     57
     58/* atomic access to fw_cfg device (potentially slow i/o, so using mutex) */
     59static DEFINE_MUTEX(fw_cfg_dev_lock);
     60
     61/* pick appropriate endianness for selector key */
     62static void fw_cfg_sel_endianness(u16 key)
     63{
     64	if (fw_cfg_is_mmio)
     65		iowrite16be(key, fw_cfg_reg_ctrl);
     66	else
     67		iowrite16(key, fw_cfg_reg_ctrl);
     68}
     69
     70#ifdef CONFIG_CRASH_CORE
     71static inline bool fw_cfg_dma_enabled(void)
     72{
     73	return (fw_cfg_rev & FW_CFG_VERSION_DMA) && fw_cfg_reg_dma;
     74}
     75
     76/* qemu fw_cfg device is sync today, but spec says it may become async */
     77static void fw_cfg_wait_for_control(struct fw_cfg_dma_access *d)
     78{
     79	for (;;) {
     80		u32 ctrl = be32_to_cpu(READ_ONCE(d->control));
     81
     82		/* do not reorder the read to d->control */
     83		rmb();
     84		if ((ctrl & ~FW_CFG_DMA_CTL_ERROR) == 0)
     85			return;
     86
     87		cpu_relax();
     88	}
     89}
     90
     91static ssize_t fw_cfg_dma_transfer(void *address, u32 length, u32 control)
     92{
     93	phys_addr_t dma;
     94	struct fw_cfg_dma_access *d = NULL;
     95	ssize_t ret = length;
     96
     97	d = kmalloc(sizeof(*d), GFP_KERNEL);
     98	if (!d) {
     99		ret = -ENOMEM;
    100		goto end;
    101	}
    102
    103	/* fw_cfg device does not need IOMMU protection, so use physical addresses */
    104	*d = (struct fw_cfg_dma_access) {
    105		.address = cpu_to_be64(address ? virt_to_phys(address) : 0),
    106		.length = cpu_to_be32(length),
    107		.control = cpu_to_be32(control)
    108	};
    109
    110	dma = virt_to_phys(d);
    111
    112	iowrite32be((u64)dma >> 32, fw_cfg_reg_dma);
    113	/* force memory to sync before notifying device via MMIO */
    114	wmb();
    115	iowrite32be(dma, fw_cfg_reg_dma + 4);
    116
    117	fw_cfg_wait_for_control(d);
    118
    119	if (be32_to_cpu(READ_ONCE(d->control)) & FW_CFG_DMA_CTL_ERROR) {
    120		ret = -EIO;
    121	}
    122
    123end:
    124	kfree(d);
    125
    126	return ret;
    127}
    128#endif
    129
    130/* read chunk of given fw_cfg blob (caller responsible for sanity-check) */
    131static ssize_t fw_cfg_read_blob(u16 key,
    132				void *buf, loff_t pos, size_t count)
    133{
    134	u32 glk = -1U;
    135	acpi_status status;
    136
    137	/* If we have ACPI, ensure mutual exclusion against any potential
    138	 * device access by the firmware, e.g. via AML methods:
    139	 */
    140	status = acpi_acquire_global_lock(ACPI_WAIT_FOREVER, &glk);
    141	if (ACPI_FAILURE(status) && status != AE_NOT_CONFIGURED) {
    142		/* Should never get here */
    143		WARN(1, "fw_cfg_read_blob: Failed to lock ACPI!\n");
    144		memset(buf, 0, count);
    145		return -EINVAL;
    146	}
    147
    148	mutex_lock(&fw_cfg_dev_lock);
    149	fw_cfg_sel_endianness(key);
    150	while (pos-- > 0)
    151		ioread8(fw_cfg_reg_data);
    152	ioread8_rep(fw_cfg_reg_data, buf, count);
    153	mutex_unlock(&fw_cfg_dev_lock);
    154
    155	acpi_release_global_lock(glk);
    156	return count;
    157}
    158
    159#ifdef CONFIG_CRASH_CORE
    160/* write chunk of given fw_cfg blob (caller responsible for sanity-check) */
    161static ssize_t fw_cfg_write_blob(u16 key,
    162				 void *buf, loff_t pos, size_t count)
    163{
    164	u32 glk = -1U;
    165	acpi_status status;
    166	ssize_t ret = count;
    167
    168	/* If we have ACPI, ensure mutual exclusion against any potential
    169	 * device access by the firmware, e.g. via AML methods:
    170	 */
    171	status = acpi_acquire_global_lock(ACPI_WAIT_FOREVER, &glk);
    172	if (ACPI_FAILURE(status) && status != AE_NOT_CONFIGURED) {
    173		/* Should never get here */
    174		WARN(1, "%s: Failed to lock ACPI!\n", __func__);
    175		return -EINVAL;
    176	}
    177
    178	mutex_lock(&fw_cfg_dev_lock);
    179	if (pos == 0) {
    180		ret = fw_cfg_dma_transfer(buf, count, key << 16
    181					  | FW_CFG_DMA_CTL_SELECT
    182					  | FW_CFG_DMA_CTL_WRITE);
    183	} else {
    184		fw_cfg_sel_endianness(key);
    185		ret = fw_cfg_dma_transfer(NULL, pos, FW_CFG_DMA_CTL_SKIP);
    186		if (ret < 0)
    187			goto end;
    188		ret = fw_cfg_dma_transfer(buf, count, FW_CFG_DMA_CTL_WRITE);
    189	}
    190
    191end:
    192	mutex_unlock(&fw_cfg_dev_lock);
    193
    194	acpi_release_global_lock(glk);
    195
    196	return ret;
    197}
    198#endif /* CONFIG_CRASH_CORE */
    199
    200/* clean up fw_cfg device i/o */
    201static void fw_cfg_io_cleanup(void)
    202{
    203	if (fw_cfg_is_mmio) {
    204		iounmap(fw_cfg_dev_base);
    205		release_mem_region(fw_cfg_p_base, fw_cfg_p_size);
    206	} else {
    207		ioport_unmap(fw_cfg_dev_base);
    208		release_region(fw_cfg_p_base, fw_cfg_p_size);
    209	}
    210}
    211
    212/* arch-specific ctrl & data register offsets are not available in ACPI, DT */
    213#if !(defined(FW_CFG_CTRL_OFF) && defined(FW_CFG_DATA_OFF))
    214# if (defined(CONFIG_ARM) || defined(CONFIG_ARM64))
    215#  define FW_CFG_CTRL_OFF 0x08
    216#  define FW_CFG_DATA_OFF 0x00
    217#  define FW_CFG_DMA_OFF 0x10
    218# elif defined(CONFIG_PARISC)	/* parisc */
    219#  define FW_CFG_CTRL_OFF 0x00
    220#  define FW_CFG_DATA_OFF 0x04
    221# elif (defined(CONFIG_PPC_PMAC) || defined(CONFIG_SPARC32)) /* ppc/mac,sun4m */
    222#  define FW_CFG_CTRL_OFF 0x00
    223#  define FW_CFG_DATA_OFF 0x02
    224# elif (defined(CONFIG_X86) || defined(CONFIG_SPARC64)) /* x86, sun4u */
    225#  define FW_CFG_CTRL_OFF 0x00
    226#  define FW_CFG_DATA_OFF 0x01
    227#  define FW_CFG_DMA_OFF 0x04
    228# else
    229#  error "QEMU FW_CFG not available on this architecture!"
    230# endif
    231#endif
    232
    233/* initialize fw_cfg device i/o from platform data */
    234static int fw_cfg_do_platform_probe(struct platform_device *pdev)
    235{
    236	char sig[FW_CFG_SIG_SIZE];
    237	struct resource *range, *ctrl, *data, *dma;
    238
    239	/* acquire i/o range details */
    240	fw_cfg_is_mmio = false;
    241	range = platform_get_resource(pdev, IORESOURCE_IO, 0);
    242	if (!range) {
    243		fw_cfg_is_mmio = true;
    244		range = platform_get_resource(pdev, IORESOURCE_MEM, 0);
    245		if (!range)
    246			return -EINVAL;
    247	}
    248	fw_cfg_p_base = range->start;
    249	fw_cfg_p_size = resource_size(range);
    250
    251	if (fw_cfg_is_mmio) {
    252		if (!request_mem_region(fw_cfg_p_base,
    253					fw_cfg_p_size, "fw_cfg_mem"))
    254			return -EBUSY;
    255		fw_cfg_dev_base = ioremap(fw_cfg_p_base, fw_cfg_p_size);
    256		if (!fw_cfg_dev_base) {
    257			release_mem_region(fw_cfg_p_base, fw_cfg_p_size);
    258			return -EFAULT;
    259		}
    260	} else {
    261		if (!request_region(fw_cfg_p_base,
    262				    fw_cfg_p_size, "fw_cfg_io"))
    263			return -EBUSY;
    264		fw_cfg_dev_base = ioport_map(fw_cfg_p_base, fw_cfg_p_size);
    265		if (!fw_cfg_dev_base) {
    266			release_region(fw_cfg_p_base, fw_cfg_p_size);
    267			return -EFAULT;
    268		}
    269	}
    270
    271	/* were custom register offsets provided (e.g. on the command line)? */
    272	ctrl = platform_get_resource_byname(pdev, IORESOURCE_REG, "ctrl");
    273	data = platform_get_resource_byname(pdev, IORESOURCE_REG, "data");
    274	dma = platform_get_resource_byname(pdev, IORESOURCE_REG, "dma");
    275	if (ctrl && data) {
    276		fw_cfg_reg_ctrl = fw_cfg_dev_base + ctrl->start;
    277		fw_cfg_reg_data = fw_cfg_dev_base + data->start;
    278	} else {
    279		/* use architecture-specific offsets */
    280		fw_cfg_reg_ctrl = fw_cfg_dev_base + FW_CFG_CTRL_OFF;
    281		fw_cfg_reg_data = fw_cfg_dev_base + FW_CFG_DATA_OFF;
    282	}
    283
    284	if (dma)
    285		fw_cfg_reg_dma = fw_cfg_dev_base + dma->start;
    286#ifdef FW_CFG_DMA_OFF
    287	else
    288		fw_cfg_reg_dma = fw_cfg_dev_base + FW_CFG_DMA_OFF;
    289#endif
    290
    291	/* verify fw_cfg device signature */
    292	if (fw_cfg_read_blob(FW_CFG_SIGNATURE, sig,
    293				0, FW_CFG_SIG_SIZE) < 0 ||
    294		memcmp(sig, "QEMU", FW_CFG_SIG_SIZE) != 0) {
    295		fw_cfg_io_cleanup();
    296		return -ENODEV;
    297	}
    298
    299	return 0;
    300}
    301
    302static ssize_t fw_cfg_showrev(struct kobject *k, struct kobj_attribute *a,
    303			      char *buf)
    304{
    305	return sprintf(buf, "%u\n", fw_cfg_rev);
    306}
    307
    308static const struct kobj_attribute fw_cfg_rev_attr = {
    309	.attr = { .name = "rev", .mode = S_IRUSR },
    310	.show = fw_cfg_showrev,
    311};
    312
    313/* fw_cfg_sysfs_entry type */
    314struct fw_cfg_sysfs_entry {
    315	struct kobject kobj;
    316	u32 size;
    317	u16 select;
    318	char name[FW_CFG_MAX_FILE_PATH];
    319	struct list_head list;
    320};
    321
    322#ifdef CONFIG_CRASH_CORE
    323static ssize_t fw_cfg_write_vmcoreinfo(const struct fw_cfg_file *f)
    324{
    325	static struct fw_cfg_vmcoreinfo *data;
    326	ssize_t ret;
    327
    328	data = kmalloc(sizeof(struct fw_cfg_vmcoreinfo), GFP_KERNEL);
    329	if (!data)
    330		return -ENOMEM;
    331
    332	*data = (struct fw_cfg_vmcoreinfo) {
    333		.guest_format = cpu_to_le16(FW_CFG_VMCOREINFO_FORMAT_ELF),
    334		.size = cpu_to_le32(VMCOREINFO_NOTE_SIZE),
    335		.paddr = cpu_to_le64(paddr_vmcoreinfo_note())
    336	};
    337	/* spare ourself reading host format support for now since we
    338	 * don't know what else to format - host may ignore ours
    339	 */
    340	ret = fw_cfg_write_blob(be16_to_cpu(f->select), data,
    341				0, sizeof(struct fw_cfg_vmcoreinfo));
    342
    343	kfree(data);
    344	return ret;
    345}
    346#endif /* CONFIG_CRASH_CORE */
    347
    348/* get fw_cfg_sysfs_entry from kobject member */
    349static inline struct fw_cfg_sysfs_entry *to_entry(struct kobject *kobj)
    350{
    351	return container_of(kobj, struct fw_cfg_sysfs_entry, kobj);
    352}
    353
    354/* fw_cfg_sysfs_attribute type */
    355struct fw_cfg_sysfs_attribute {
    356	struct attribute attr;
    357	ssize_t (*show)(struct fw_cfg_sysfs_entry *entry, char *buf);
    358};
    359
    360/* get fw_cfg_sysfs_attribute from attribute member */
    361static inline struct fw_cfg_sysfs_attribute *to_attr(struct attribute *attr)
    362{
    363	return container_of(attr, struct fw_cfg_sysfs_attribute, attr);
    364}
    365
    366/* global cache of fw_cfg_sysfs_entry objects */
    367static LIST_HEAD(fw_cfg_entry_cache);
    368
    369/* kobjects removed lazily by kernel, mutual exclusion needed */
    370static DEFINE_SPINLOCK(fw_cfg_cache_lock);
    371
    372static inline void fw_cfg_sysfs_cache_enlist(struct fw_cfg_sysfs_entry *entry)
    373{
    374	spin_lock(&fw_cfg_cache_lock);
    375	list_add_tail(&entry->list, &fw_cfg_entry_cache);
    376	spin_unlock(&fw_cfg_cache_lock);
    377}
    378
    379static inline void fw_cfg_sysfs_cache_delist(struct fw_cfg_sysfs_entry *entry)
    380{
    381	spin_lock(&fw_cfg_cache_lock);
    382	list_del(&entry->list);
    383	spin_unlock(&fw_cfg_cache_lock);
    384}
    385
    386static void fw_cfg_sysfs_cache_cleanup(void)
    387{
    388	struct fw_cfg_sysfs_entry *entry, *next;
    389
    390	list_for_each_entry_safe(entry, next, &fw_cfg_entry_cache, list) {
    391		fw_cfg_sysfs_cache_delist(entry);
    392		kobject_del(&entry->kobj);
    393		kobject_put(&entry->kobj);
    394	}
    395}
    396
    397/* per-entry attributes and show methods */
    398
    399#define FW_CFG_SYSFS_ATTR(_attr) \
    400struct fw_cfg_sysfs_attribute fw_cfg_sysfs_attr_##_attr = { \
    401	.attr = { .name = __stringify(_attr), .mode = S_IRUSR }, \
    402	.show = fw_cfg_sysfs_show_##_attr, \
    403}
    404
    405static ssize_t fw_cfg_sysfs_show_size(struct fw_cfg_sysfs_entry *e, char *buf)
    406{
    407	return sprintf(buf, "%u\n", e->size);
    408}
    409
    410static ssize_t fw_cfg_sysfs_show_key(struct fw_cfg_sysfs_entry *e, char *buf)
    411{
    412	return sprintf(buf, "%u\n", e->select);
    413}
    414
    415static ssize_t fw_cfg_sysfs_show_name(struct fw_cfg_sysfs_entry *e, char *buf)
    416{
    417	return sprintf(buf, "%s\n", e->name);
    418}
    419
    420static FW_CFG_SYSFS_ATTR(size);
    421static FW_CFG_SYSFS_ATTR(key);
    422static FW_CFG_SYSFS_ATTR(name);
    423
    424static struct attribute *fw_cfg_sysfs_entry_attrs[] = {
    425	&fw_cfg_sysfs_attr_size.attr,
    426	&fw_cfg_sysfs_attr_key.attr,
    427	&fw_cfg_sysfs_attr_name.attr,
    428	NULL,
    429};
    430ATTRIBUTE_GROUPS(fw_cfg_sysfs_entry);
    431
    432/* sysfs_ops: find fw_cfg_[entry, attribute] and call appropriate show method */
    433static ssize_t fw_cfg_sysfs_attr_show(struct kobject *kobj, struct attribute *a,
    434				      char *buf)
    435{
    436	struct fw_cfg_sysfs_entry *entry = to_entry(kobj);
    437	struct fw_cfg_sysfs_attribute *attr = to_attr(a);
    438
    439	return attr->show(entry, buf);
    440}
    441
    442static const struct sysfs_ops fw_cfg_sysfs_attr_ops = {
    443	.show = fw_cfg_sysfs_attr_show,
    444};
    445
    446/* release: destructor, to be called via kobject_put() */
    447static void fw_cfg_sysfs_release_entry(struct kobject *kobj)
    448{
    449	struct fw_cfg_sysfs_entry *entry = to_entry(kobj);
    450
    451	kfree(entry);
    452}
    453
    454/* kobj_type: ties together all properties required to register an entry */
    455static struct kobj_type fw_cfg_sysfs_entry_ktype = {
    456	.default_groups = fw_cfg_sysfs_entry_groups,
    457	.sysfs_ops = &fw_cfg_sysfs_attr_ops,
    458	.release = fw_cfg_sysfs_release_entry,
    459};
    460
    461/* raw-read method and attribute */
    462static ssize_t fw_cfg_sysfs_read_raw(struct file *filp, struct kobject *kobj,
    463				     struct bin_attribute *bin_attr,
    464				     char *buf, loff_t pos, size_t count)
    465{
    466	struct fw_cfg_sysfs_entry *entry = to_entry(kobj);
    467
    468	if (pos > entry->size)
    469		return -EINVAL;
    470
    471	if (count > entry->size - pos)
    472		count = entry->size - pos;
    473
    474	return fw_cfg_read_blob(entry->select, buf, pos, count);
    475}
    476
    477static struct bin_attribute fw_cfg_sysfs_attr_raw = {
    478	.attr = { .name = "raw", .mode = S_IRUSR },
    479	.read = fw_cfg_sysfs_read_raw,
    480};
    481
    482/*
    483 * Create a kset subdirectory matching each '/' delimited dirname token
    484 * in 'name', starting with sysfs kset/folder 'dir'; At the end, create
    485 * a symlink directed at the given 'target'.
    486 * NOTE: We do this on a best-effort basis, since 'name' is not guaranteed
    487 * to be a well-behaved path name. Whenever a symlink vs. kset directory
    488 * name collision occurs, the kernel will issue big scary warnings while
    489 * refusing to add the offending link or directory. We follow up with our
    490 * own, slightly less scary error messages explaining the situation :)
    491 */
    492static int fw_cfg_build_symlink(struct kset *dir,
    493				struct kobject *target, const char *name)
    494{
    495	int ret;
    496	struct kset *subdir;
    497	struct kobject *ko;
    498	char *name_copy, *p, *tok;
    499
    500	if (!dir || !target || !name || !*name)
    501		return -EINVAL;
    502
    503	/* clone a copy of name for parsing */
    504	name_copy = p = kstrdup(name, GFP_KERNEL);
    505	if (!name_copy)
    506		return -ENOMEM;
    507
    508	/* create folders for each dirname token, then symlink for basename */
    509	while ((tok = strsep(&p, "/")) && *tok) {
    510
    511		/* last (basename) token? If so, add symlink here */
    512		if (!p || !*p) {
    513			ret = sysfs_create_link(&dir->kobj, target, tok);
    514			break;
    515		}
    516
    517		/* does the current dir contain an item named after tok ? */
    518		ko = kset_find_obj(dir, tok);
    519		if (ko) {
    520			/* drop reference added by kset_find_obj */
    521			kobject_put(ko);
    522
    523			/* ko MUST be a kset - we're about to use it as one ! */
    524			if (ko->ktype != dir->kobj.ktype) {
    525				ret = -EINVAL;
    526				break;
    527			}
    528
    529			/* descend into already existing subdirectory */
    530			dir = to_kset(ko);
    531		} else {
    532			/* create new subdirectory kset */
    533			subdir = kzalloc(sizeof(struct kset), GFP_KERNEL);
    534			if (!subdir) {
    535				ret = -ENOMEM;
    536				break;
    537			}
    538			subdir->kobj.kset = dir;
    539			subdir->kobj.ktype = dir->kobj.ktype;
    540			ret = kobject_set_name(&subdir->kobj, "%s", tok);
    541			if (ret) {
    542				kfree(subdir);
    543				break;
    544			}
    545			ret = kset_register(subdir);
    546			if (ret) {
    547				kfree(subdir);
    548				break;
    549			}
    550
    551			/* descend into newly created subdirectory */
    552			dir = subdir;
    553		}
    554	}
    555
    556	/* we're done with cloned copy of name */
    557	kfree(name_copy);
    558	return ret;
    559}
    560
    561/* recursively unregister fw_cfg/by_name/ kset directory tree */
    562static void fw_cfg_kset_unregister_recursive(struct kset *kset)
    563{
    564	struct kobject *k, *next;
    565
    566	list_for_each_entry_safe(k, next, &kset->list, entry)
    567		/* all set members are ksets too, but check just in case... */
    568		if (k->ktype == kset->kobj.ktype)
    569			fw_cfg_kset_unregister_recursive(to_kset(k));
    570
    571	/* symlinks are cleanly and automatically removed with the directory */
    572	kset_unregister(kset);
    573}
    574
    575/* kobjects & kset representing top-level, by_key, and by_name folders */
    576static struct kobject *fw_cfg_top_ko;
    577static struct kobject *fw_cfg_sel_ko;
    578static struct kset *fw_cfg_fname_kset;
    579
    580/* register an individual fw_cfg file */
    581static int fw_cfg_register_file(const struct fw_cfg_file *f)
    582{
    583	int err;
    584	struct fw_cfg_sysfs_entry *entry;
    585
    586#ifdef CONFIG_CRASH_CORE
    587	if (fw_cfg_dma_enabled() &&
    588		strcmp(f->name, FW_CFG_VMCOREINFO_FILENAME) == 0 &&
    589		!is_kdump_kernel()) {
    590		if (fw_cfg_write_vmcoreinfo(f) < 0)
    591			pr_warn("fw_cfg: failed to write vmcoreinfo");
    592	}
    593#endif
    594
    595	/* allocate new entry */
    596	entry = kzalloc(sizeof(*entry), GFP_KERNEL);
    597	if (!entry)
    598		return -ENOMEM;
    599
    600	/* set file entry information */
    601	entry->size = be32_to_cpu(f->size);
    602	entry->select = be16_to_cpu(f->select);
    603	strscpy(entry->name, f->name, FW_CFG_MAX_FILE_PATH);
    604
    605	/* register entry under "/sys/firmware/qemu_fw_cfg/by_key/" */
    606	err = kobject_init_and_add(&entry->kobj, &fw_cfg_sysfs_entry_ktype,
    607				   fw_cfg_sel_ko, "%d", entry->select);
    608	if (err)
    609		goto err_put_entry;
    610
    611	/* add raw binary content access */
    612	err = sysfs_create_bin_file(&entry->kobj, &fw_cfg_sysfs_attr_raw);
    613	if (err)
    614		goto err_del_entry;
    615
    616	/* try adding "/sys/firmware/qemu_fw_cfg/by_name/" symlink */
    617	fw_cfg_build_symlink(fw_cfg_fname_kset, &entry->kobj, entry->name);
    618
    619	/* success, add entry to global cache */
    620	fw_cfg_sysfs_cache_enlist(entry);
    621	return 0;
    622
    623err_del_entry:
    624	kobject_del(&entry->kobj);
    625err_put_entry:
    626	kobject_put(&entry->kobj);
    627	return err;
    628}
    629
    630/* iterate over all fw_cfg directory entries, registering each one */
    631static int fw_cfg_register_dir_entries(void)
    632{
    633	int ret = 0;
    634	__be32 files_count;
    635	u32 count, i;
    636	struct fw_cfg_file *dir;
    637	size_t dir_size;
    638
    639	ret = fw_cfg_read_blob(FW_CFG_FILE_DIR, &files_count,
    640			0, sizeof(files_count));
    641	if (ret < 0)
    642		return ret;
    643
    644	count = be32_to_cpu(files_count);
    645	dir_size = count * sizeof(struct fw_cfg_file);
    646
    647	dir = kmalloc(dir_size, GFP_KERNEL);
    648	if (!dir)
    649		return -ENOMEM;
    650
    651	ret = fw_cfg_read_blob(FW_CFG_FILE_DIR, dir,
    652			sizeof(files_count), dir_size);
    653	if (ret < 0)
    654		goto end;
    655
    656	for (i = 0; i < count; i++) {
    657		ret = fw_cfg_register_file(&dir[i]);
    658		if (ret)
    659			break;
    660	}
    661
    662end:
    663	kfree(dir);
    664	return ret;
    665}
    666
    667/* unregister top-level or by_key folder */
    668static inline void fw_cfg_kobj_cleanup(struct kobject *kobj)
    669{
    670	kobject_del(kobj);
    671	kobject_put(kobj);
    672}
    673
    674static int fw_cfg_sysfs_probe(struct platform_device *pdev)
    675{
    676	int err;
    677	__le32 rev;
    678
    679	/* NOTE: If we supported multiple fw_cfg devices, we'd first create
    680	 * a subdirectory named after e.g. pdev->id, then hang per-device
    681	 * by_key (and by_name) subdirectories underneath it. However, only
    682	 * one fw_cfg device exist system-wide, so if one was already found
    683	 * earlier, we might as well stop here.
    684	 */
    685	if (fw_cfg_sel_ko)
    686		return -EBUSY;
    687
    688	/* create by_key and by_name subdirs of /sys/firmware/qemu_fw_cfg/ */
    689	err = -ENOMEM;
    690	fw_cfg_sel_ko = kobject_create_and_add("by_key", fw_cfg_top_ko);
    691	if (!fw_cfg_sel_ko)
    692		goto err_sel;
    693	fw_cfg_fname_kset = kset_create_and_add("by_name", NULL, fw_cfg_top_ko);
    694	if (!fw_cfg_fname_kset)
    695		goto err_name;
    696
    697	/* initialize fw_cfg device i/o from platform data */
    698	err = fw_cfg_do_platform_probe(pdev);
    699	if (err)
    700		goto err_probe;
    701
    702	/* get revision number, add matching top-level attribute */
    703	err = fw_cfg_read_blob(FW_CFG_ID, &rev, 0, sizeof(rev));
    704	if (err < 0)
    705		goto err_probe;
    706
    707	fw_cfg_rev = le32_to_cpu(rev);
    708	err = sysfs_create_file(fw_cfg_top_ko, &fw_cfg_rev_attr.attr);
    709	if (err)
    710		goto err_rev;
    711
    712	/* process fw_cfg file directory entry, registering each file */
    713	err = fw_cfg_register_dir_entries();
    714	if (err)
    715		goto err_dir;
    716
    717	/* success */
    718	pr_debug("fw_cfg: loaded.\n");
    719	return 0;
    720
    721err_dir:
    722	fw_cfg_sysfs_cache_cleanup();
    723	sysfs_remove_file(fw_cfg_top_ko, &fw_cfg_rev_attr.attr);
    724err_rev:
    725	fw_cfg_io_cleanup();
    726err_probe:
    727	fw_cfg_kset_unregister_recursive(fw_cfg_fname_kset);
    728err_name:
    729	fw_cfg_kobj_cleanup(fw_cfg_sel_ko);
    730err_sel:
    731	return err;
    732}
    733
    734static int fw_cfg_sysfs_remove(struct platform_device *pdev)
    735{
    736	pr_debug("fw_cfg: unloading.\n");
    737	fw_cfg_sysfs_cache_cleanup();
    738	sysfs_remove_file(fw_cfg_top_ko, &fw_cfg_rev_attr.attr);
    739	fw_cfg_io_cleanup();
    740	fw_cfg_kset_unregister_recursive(fw_cfg_fname_kset);
    741	fw_cfg_kobj_cleanup(fw_cfg_sel_ko);
    742	return 0;
    743}
    744
    745static const struct of_device_id fw_cfg_sysfs_mmio_match[] = {
    746	{ .compatible = "qemu,fw-cfg-mmio", },
    747	{},
    748};
    749MODULE_DEVICE_TABLE(of, fw_cfg_sysfs_mmio_match);
    750
    751#ifdef CONFIG_ACPI
    752static const struct acpi_device_id fw_cfg_sysfs_acpi_match[] = {
    753	{ FW_CFG_ACPI_DEVICE_ID, },
    754	{},
    755};
    756MODULE_DEVICE_TABLE(acpi, fw_cfg_sysfs_acpi_match);
    757#endif
    758
    759static struct platform_driver fw_cfg_sysfs_driver = {
    760	.probe = fw_cfg_sysfs_probe,
    761	.remove = fw_cfg_sysfs_remove,
    762	.driver = {
    763		.name = "fw_cfg",
    764		.of_match_table = fw_cfg_sysfs_mmio_match,
    765		.acpi_match_table = ACPI_PTR(fw_cfg_sysfs_acpi_match),
    766	},
    767};
    768
    769#ifdef CONFIG_FW_CFG_SYSFS_CMDLINE
    770
    771static struct platform_device *fw_cfg_cmdline_dev;
    772
    773/* this probably belongs in e.g. include/linux/types.h,
    774 * but right now we are the only ones doing it...
    775 */
    776#ifdef CONFIG_PHYS_ADDR_T_64BIT
    777#define __PHYS_ADDR_PREFIX "ll"
    778#else
    779#define __PHYS_ADDR_PREFIX ""
    780#endif
    781
    782/* use special scanf/printf modifier for phys_addr_t, resource_size_t */
    783#define PH_ADDR_SCAN_FMT "@%" __PHYS_ADDR_PREFIX "i%n" \
    784			 ":%" __PHYS_ADDR_PREFIX "i" \
    785			 ":%" __PHYS_ADDR_PREFIX "i%n" \
    786			 ":%" __PHYS_ADDR_PREFIX "i%n"
    787
    788#define PH_ADDR_PR_1_FMT "0x%" __PHYS_ADDR_PREFIX "x@" \
    789			 "0x%" __PHYS_ADDR_PREFIX "x"
    790
    791#define PH_ADDR_PR_3_FMT PH_ADDR_PR_1_FMT \
    792			 ":%" __PHYS_ADDR_PREFIX "u" \
    793			 ":%" __PHYS_ADDR_PREFIX "u"
    794
    795#define PH_ADDR_PR_4_FMT PH_ADDR_PR_3_FMT \
    796			 ":%" __PHYS_ADDR_PREFIX "u"
    797
    798static int fw_cfg_cmdline_set(const char *arg, const struct kernel_param *kp)
    799{
    800	struct resource res[4] = {};
    801	char *str;
    802	phys_addr_t base;
    803	resource_size_t size, ctrl_off, data_off, dma_off;
    804	int processed, consumed = 0;
    805
    806	/* only one fw_cfg device can exist system-wide, so if one
    807	 * was processed on the command line already, we might as
    808	 * well stop here.
    809	 */
    810	if (fw_cfg_cmdline_dev) {
    811		/* avoid leaking previously registered device */
    812		platform_device_unregister(fw_cfg_cmdline_dev);
    813		return -EINVAL;
    814	}
    815
    816	/* consume "<size>" portion of command line argument */
    817	size = memparse(arg, &str);
    818
    819	/* get "@<base>[:<ctrl_off>:<data_off>[:<dma_off>]]" chunks */
    820	processed = sscanf(str, PH_ADDR_SCAN_FMT,
    821			   &base, &consumed,
    822			   &ctrl_off, &data_off, &consumed,
    823			   &dma_off, &consumed);
    824
    825	/* sscanf() must process precisely 1, 3 or 4 chunks:
    826	 * <base> is mandatory, optionally followed by <ctrl_off>
    827	 * and <data_off>, and <dma_off>;
    828	 * there must be no extra characters after the last chunk,
    829	 * so str[consumed] must be '\0'.
    830	 */
    831	if (str[consumed] ||
    832	    (processed != 1 && processed != 3 && processed != 4))
    833		return -EINVAL;
    834
    835	res[0].start = base;
    836	res[0].end = base + size - 1;
    837	res[0].flags = !strcmp(kp->name, "mmio") ? IORESOURCE_MEM :
    838						   IORESOURCE_IO;
    839
    840	/* insert register offsets, if provided */
    841	if (processed > 1) {
    842		res[1].name = "ctrl";
    843		res[1].start = ctrl_off;
    844		res[1].flags = IORESOURCE_REG;
    845		res[2].name = "data";
    846		res[2].start = data_off;
    847		res[2].flags = IORESOURCE_REG;
    848	}
    849	if (processed > 3) {
    850		res[3].name = "dma";
    851		res[3].start = dma_off;
    852		res[3].flags = IORESOURCE_REG;
    853	}
    854
    855	/* "processed" happens to nicely match the number of resources
    856	 * we need to pass in to this platform device.
    857	 */
    858	fw_cfg_cmdline_dev = platform_device_register_simple("fw_cfg",
    859					PLATFORM_DEVID_NONE, res, processed);
    860
    861	return PTR_ERR_OR_ZERO(fw_cfg_cmdline_dev);
    862}
    863
    864static int fw_cfg_cmdline_get(char *buf, const struct kernel_param *kp)
    865{
    866	/* stay silent if device was not configured via the command
    867	 * line, or if the parameter name (ioport/mmio) doesn't match
    868	 * the device setting
    869	 */
    870	if (!fw_cfg_cmdline_dev ||
    871	    (!strcmp(kp->name, "mmio") ^
    872	     (fw_cfg_cmdline_dev->resource[0].flags == IORESOURCE_MEM)))
    873		return 0;
    874
    875	switch (fw_cfg_cmdline_dev->num_resources) {
    876	case 1:
    877		return snprintf(buf, PAGE_SIZE, PH_ADDR_PR_1_FMT,
    878				resource_size(&fw_cfg_cmdline_dev->resource[0]),
    879				fw_cfg_cmdline_dev->resource[0].start);
    880	case 3:
    881		return snprintf(buf, PAGE_SIZE, PH_ADDR_PR_3_FMT,
    882				resource_size(&fw_cfg_cmdline_dev->resource[0]),
    883				fw_cfg_cmdline_dev->resource[0].start,
    884				fw_cfg_cmdline_dev->resource[1].start,
    885				fw_cfg_cmdline_dev->resource[2].start);
    886	case 4:
    887		return snprintf(buf, PAGE_SIZE, PH_ADDR_PR_4_FMT,
    888				resource_size(&fw_cfg_cmdline_dev->resource[0]),
    889				fw_cfg_cmdline_dev->resource[0].start,
    890				fw_cfg_cmdline_dev->resource[1].start,
    891				fw_cfg_cmdline_dev->resource[2].start,
    892				fw_cfg_cmdline_dev->resource[3].start);
    893	}
    894
    895	/* Should never get here */
    896	WARN(1, "Unexpected number of resources: %d\n",
    897		fw_cfg_cmdline_dev->num_resources);
    898	return 0;
    899}
    900
    901static const struct kernel_param_ops fw_cfg_cmdline_param_ops = {
    902	.set = fw_cfg_cmdline_set,
    903	.get = fw_cfg_cmdline_get,
    904};
    905
    906device_param_cb(ioport, &fw_cfg_cmdline_param_ops, NULL, S_IRUSR);
    907device_param_cb(mmio, &fw_cfg_cmdline_param_ops, NULL, S_IRUSR);
    908
    909#endif /* CONFIG_FW_CFG_SYSFS_CMDLINE */
    910
    911static int __init fw_cfg_sysfs_init(void)
    912{
    913	int ret;
    914
    915	/* create /sys/firmware/qemu_fw_cfg/ top level directory */
    916	fw_cfg_top_ko = kobject_create_and_add("qemu_fw_cfg", firmware_kobj);
    917	if (!fw_cfg_top_ko)
    918		return -ENOMEM;
    919
    920	ret = platform_driver_register(&fw_cfg_sysfs_driver);
    921	if (ret)
    922		fw_cfg_kobj_cleanup(fw_cfg_top_ko);
    923
    924	return ret;
    925}
    926
    927static void __exit fw_cfg_sysfs_exit(void)
    928{
    929	platform_driver_unregister(&fw_cfg_sysfs_driver);
    930
    931#ifdef CONFIG_FW_CFG_SYSFS_CMDLINE
    932	platform_device_unregister(fw_cfg_cmdline_dev);
    933#endif
    934
    935	/* clean up /sys/firmware/qemu_fw_cfg/ */
    936	fw_cfg_kobj_cleanup(fw_cfg_top_ko);
    937}
    938
    939module_init(fw_cfg_sysfs_init);
    940module_exit(fw_cfg_sysfs_exit);