cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

iommu.c (83373B)


      1// SPDX-License-Identifier: GPL-2.0-only
      2/*
      3 * Copyright (C) 2007-2008 Advanced Micro Devices, Inc.
      4 * Author: Joerg Roedel <jroedel@suse.de>
      5 */
      6
      7#define pr_fmt(fmt)    "iommu: " fmt
      8
      9#include <linux/device.h>
     10#include <linux/dma-iommu.h>
     11#include <linux/kernel.h>
     12#include <linux/bits.h>
     13#include <linux/bug.h>
     14#include <linux/types.h>
     15#include <linux/init.h>
     16#include <linux/export.h>
     17#include <linux/slab.h>
     18#include <linux/errno.h>
     19#include <linux/iommu.h>
     20#include <linux/idr.h>
     21#include <linux/err.h>
     22#include <linux/pci.h>
     23#include <linux/bitops.h>
     24#include <linux/property.h>
     25#include <linux/fsl/mc.h>
     26#include <linux/module.h>
     27#include <linux/cc_platform.h>
     28#include <trace/events/iommu.h>
     29
     30static struct kset *iommu_group_kset;
     31static DEFINE_IDA(iommu_group_ida);
     32
     33static unsigned int iommu_def_domain_type __read_mostly;
     34static bool iommu_dma_strict __read_mostly = IS_ENABLED(CONFIG_IOMMU_DEFAULT_DMA_STRICT);
     35static u32 iommu_cmd_line __read_mostly;
     36
     37struct iommu_group {
     38	struct kobject kobj;
     39	struct kobject *devices_kobj;
     40	struct list_head devices;
     41	struct mutex mutex;
     42	void *iommu_data;
     43	void (*iommu_data_release)(void *iommu_data);
     44	char *name;
     45	int id;
     46	struct iommu_domain *default_domain;
     47	struct iommu_domain *blocking_domain;
     48	struct iommu_domain *domain;
     49	struct list_head entry;
     50	unsigned int owner_cnt;
     51	void *owner;
     52};
     53
     54struct group_device {
     55	struct list_head list;
     56	struct device *dev;
     57	char *name;
     58};
     59
     60struct iommu_group_attribute {
     61	struct attribute attr;
     62	ssize_t (*show)(struct iommu_group *group, char *buf);
     63	ssize_t (*store)(struct iommu_group *group,
     64			 const char *buf, size_t count);
     65};
     66
     67static const char * const iommu_group_resv_type_string[] = {
     68	[IOMMU_RESV_DIRECT]			= "direct",
     69	[IOMMU_RESV_DIRECT_RELAXABLE]		= "direct-relaxable",
     70	[IOMMU_RESV_RESERVED]			= "reserved",
     71	[IOMMU_RESV_MSI]			= "msi",
     72	[IOMMU_RESV_SW_MSI]			= "msi",
     73};
     74
     75#define IOMMU_CMD_LINE_DMA_API		BIT(0)
     76#define IOMMU_CMD_LINE_STRICT		BIT(1)
     77
     78static int iommu_alloc_default_domain(struct iommu_group *group,
     79				      struct device *dev);
     80static struct iommu_domain *__iommu_domain_alloc(struct bus_type *bus,
     81						 unsigned type);
     82static int __iommu_attach_device(struct iommu_domain *domain,
     83				 struct device *dev);
     84static int __iommu_attach_group(struct iommu_domain *domain,
     85				struct iommu_group *group);
     86static int __iommu_group_set_domain(struct iommu_group *group,
     87				    struct iommu_domain *new_domain);
     88static int iommu_create_device_direct_mappings(struct iommu_group *group,
     89					       struct device *dev);
     90static struct iommu_group *iommu_group_get_for_dev(struct device *dev);
     91static ssize_t iommu_group_store_type(struct iommu_group *group,
     92				      const char *buf, size_t count);
     93
     94#define IOMMU_GROUP_ATTR(_name, _mode, _show, _store)		\
     95struct iommu_group_attribute iommu_group_attr_##_name =		\
     96	__ATTR(_name, _mode, _show, _store)
     97
     98#define to_iommu_group_attr(_attr)	\
     99	container_of(_attr, struct iommu_group_attribute, attr)
    100#define to_iommu_group(_kobj)		\
    101	container_of(_kobj, struct iommu_group, kobj)
    102
    103static LIST_HEAD(iommu_device_list);
    104static DEFINE_SPINLOCK(iommu_device_lock);
    105
    106/*
    107 * Use a function instead of an array here because the domain-type is a
    108 * bit-field, so an array would waste memory.
    109 */
    110static const char *iommu_domain_type_str(unsigned int t)
    111{
    112	switch (t) {
    113	case IOMMU_DOMAIN_BLOCKED:
    114		return "Blocked";
    115	case IOMMU_DOMAIN_IDENTITY:
    116		return "Passthrough";
    117	case IOMMU_DOMAIN_UNMANAGED:
    118		return "Unmanaged";
    119	case IOMMU_DOMAIN_DMA:
    120	case IOMMU_DOMAIN_DMA_FQ:
    121		return "Translated";
    122	default:
    123		return "Unknown";
    124	}
    125}
    126
    127static int __init iommu_subsys_init(void)
    128{
    129	if (!(iommu_cmd_line & IOMMU_CMD_LINE_DMA_API)) {
    130		if (IS_ENABLED(CONFIG_IOMMU_DEFAULT_PASSTHROUGH))
    131			iommu_set_default_passthrough(false);
    132		else
    133			iommu_set_default_translated(false);
    134
    135		if (iommu_default_passthrough() && cc_platform_has(CC_ATTR_MEM_ENCRYPT)) {
    136			pr_info("Memory encryption detected - Disabling default IOMMU Passthrough\n");
    137			iommu_set_default_translated(false);
    138		}
    139	}
    140
    141	if (!iommu_default_passthrough() && !iommu_dma_strict)
    142		iommu_def_domain_type = IOMMU_DOMAIN_DMA_FQ;
    143
    144	pr_info("Default domain type: %s %s\n",
    145		iommu_domain_type_str(iommu_def_domain_type),
    146		(iommu_cmd_line & IOMMU_CMD_LINE_DMA_API) ?
    147			"(set via kernel command line)" : "");
    148
    149	if (!iommu_default_passthrough())
    150		pr_info("DMA domain TLB invalidation policy: %s mode %s\n",
    151			iommu_dma_strict ? "strict" : "lazy",
    152			(iommu_cmd_line & IOMMU_CMD_LINE_STRICT) ?
    153				"(set via kernel command line)" : "");
    154
    155	return 0;
    156}
    157subsys_initcall(iommu_subsys_init);
    158
    159/**
    160 * iommu_device_register() - Register an IOMMU hardware instance
    161 * @iommu: IOMMU handle for the instance
    162 * @ops:   IOMMU ops to associate with the instance
    163 * @hwdev: (optional) actual instance device, used for fwnode lookup
    164 *
    165 * Return: 0 on success, or an error.
    166 */
    167int iommu_device_register(struct iommu_device *iommu,
    168			  const struct iommu_ops *ops, struct device *hwdev)
    169{
    170	/* We need to be able to take module references appropriately */
    171	if (WARN_ON(is_module_address((unsigned long)ops) && !ops->owner))
    172		return -EINVAL;
    173
    174	iommu->ops = ops;
    175	if (hwdev)
    176		iommu->fwnode = hwdev->fwnode;
    177
    178	spin_lock(&iommu_device_lock);
    179	list_add_tail(&iommu->list, &iommu_device_list);
    180	spin_unlock(&iommu_device_lock);
    181	return 0;
    182}
    183EXPORT_SYMBOL_GPL(iommu_device_register);
    184
    185void iommu_device_unregister(struct iommu_device *iommu)
    186{
    187	spin_lock(&iommu_device_lock);
    188	list_del(&iommu->list);
    189	spin_unlock(&iommu_device_lock);
    190}
    191EXPORT_SYMBOL_GPL(iommu_device_unregister);
    192
    193static struct dev_iommu *dev_iommu_get(struct device *dev)
    194{
    195	struct dev_iommu *param = dev->iommu;
    196
    197	if (param)
    198		return param;
    199
    200	param = kzalloc(sizeof(*param), GFP_KERNEL);
    201	if (!param)
    202		return NULL;
    203
    204	mutex_init(&param->lock);
    205	dev->iommu = param;
    206	return param;
    207}
    208
    209static void dev_iommu_free(struct device *dev)
    210{
    211	struct dev_iommu *param = dev->iommu;
    212
    213	dev->iommu = NULL;
    214	if (param->fwspec) {
    215		fwnode_handle_put(param->fwspec->iommu_fwnode);
    216		kfree(param->fwspec);
    217	}
    218	kfree(param);
    219}
    220
    221static int __iommu_probe_device(struct device *dev, struct list_head *group_list)
    222{
    223	const struct iommu_ops *ops = dev->bus->iommu_ops;
    224	struct iommu_device *iommu_dev;
    225	struct iommu_group *group;
    226	int ret;
    227
    228	if (!ops)
    229		return -ENODEV;
    230
    231	if (!dev_iommu_get(dev))
    232		return -ENOMEM;
    233
    234	if (!try_module_get(ops->owner)) {
    235		ret = -EINVAL;
    236		goto err_free;
    237	}
    238
    239	iommu_dev = ops->probe_device(dev);
    240	if (IS_ERR(iommu_dev)) {
    241		ret = PTR_ERR(iommu_dev);
    242		goto out_module_put;
    243	}
    244
    245	dev->iommu->iommu_dev = iommu_dev;
    246
    247	group = iommu_group_get_for_dev(dev);
    248	if (IS_ERR(group)) {
    249		ret = PTR_ERR(group);
    250		goto out_release;
    251	}
    252	iommu_group_put(group);
    253
    254	if (group_list && !group->default_domain && list_empty(&group->entry))
    255		list_add_tail(&group->entry, group_list);
    256
    257	iommu_device_link(iommu_dev, dev);
    258
    259	return 0;
    260
    261out_release:
    262	ops->release_device(dev);
    263
    264out_module_put:
    265	module_put(ops->owner);
    266
    267err_free:
    268	dev_iommu_free(dev);
    269
    270	return ret;
    271}
    272
    273int iommu_probe_device(struct device *dev)
    274{
    275	const struct iommu_ops *ops = dev->bus->iommu_ops;
    276	struct iommu_group *group;
    277	int ret;
    278
    279	ret = __iommu_probe_device(dev, NULL);
    280	if (ret)
    281		goto err_out;
    282
    283	group = iommu_group_get(dev);
    284	if (!group) {
    285		ret = -ENODEV;
    286		goto err_release;
    287	}
    288
    289	/*
    290	 * Try to allocate a default domain - needs support from the
    291	 * IOMMU driver. There are still some drivers which don't
    292	 * support default domains, so the return value is not yet
    293	 * checked.
    294	 */
    295	mutex_lock(&group->mutex);
    296	iommu_alloc_default_domain(group, dev);
    297
    298	/*
    299	 * If device joined an existing group which has been claimed, don't
    300	 * attach the default domain.
    301	 */
    302	if (group->default_domain && !group->owner) {
    303		ret = __iommu_attach_device(group->default_domain, dev);
    304		if (ret) {
    305			mutex_unlock(&group->mutex);
    306			iommu_group_put(group);
    307			goto err_release;
    308		}
    309	}
    310
    311	iommu_create_device_direct_mappings(group, dev);
    312
    313	mutex_unlock(&group->mutex);
    314	iommu_group_put(group);
    315
    316	if (ops->probe_finalize)
    317		ops->probe_finalize(dev);
    318
    319	return 0;
    320
    321err_release:
    322	iommu_release_device(dev);
    323
    324err_out:
    325	return ret;
    326
    327}
    328
    329void iommu_release_device(struct device *dev)
    330{
    331	const struct iommu_ops *ops;
    332
    333	if (!dev->iommu)
    334		return;
    335
    336	iommu_device_unlink(dev->iommu->iommu_dev, dev);
    337
    338	ops = dev_iommu_ops(dev);
    339	ops->release_device(dev);
    340
    341	iommu_group_remove_device(dev);
    342	module_put(ops->owner);
    343	dev_iommu_free(dev);
    344}
    345
    346static int __init iommu_set_def_domain_type(char *str)
    347{
    348	bool pt;
    349	int ret;
    350
    351	ret = kstrtobool(str, &pt);
    352	if (ret)
    353		return ret;
    354
    355	if (pt)
    356		iommu_set_default_passthrough(true);
    357	else
    358		iommu_set_default_translated(true);
    359
    360	return 0;
    361}
    362early_param("iommu.passthrough", iommu_set_def_domain_type);
    363
    364static int __init iommu_dma_setup(char *str)
    365{
    366	int ret = kstrtobool(str, &iommu_dma_strict);
    367
    368	if (!ret)
    369		iommu_cmd_line |= IOMMU_CMD_LINE_STRICT;
    370	return ret;
    371}
    372early_param("iommu.strict", iommu_dma_setup);
    373
    374void iommu_set_dma_strict(void)
    375{
    376	iommu_dma_strict = true;
    377	if (iommu_def_domain_type == IOMMU_DOMAIN_DMA_FQ)
    378		iommu_def_domain_type = IOMMU_DOMAIN_DMA;
    379}
    380
    381static ssize_t iommu_group_attr_show(struct kobject *kobj,
    382				     struct attribute *__attr, char *buf)
    383{
    384	struct iommu_group_attribute *attr = to_iommu_group_attr(__attr);
    385	struct iommu_group *group = to_iommu_group(kobj);
    386	ssize_t ret = -EIO;
    387
    388	if (attr->show)
    389		ret = attr->show(group, buf);
    390	return ret;
    391}
    392
    393static ssize_t iommu_group_attr_store(struct kobject *kobj,
    394				      struct attribute *__attr,
    395				      const char *buf, size_t count)
    396{
    397	struct iommu_group_attribute *attr = to_iommu_group_attr(__attr);
    398	struct iommu_group *group = to_iommu_group(kobj);
    399	ssize_t ret = -EIO;
    400
    401	if (attr->store)
    402		ret = attr->store(group, buf, count);
    403	return ret;
    404}
    405
    406static const struct sysfs_ops iommu_group_sysfs_ops = {
    407	.show = iommu_group_attr_show,
    408	.store = iommu_group_attr_store,
    409};
    410
    411static int iommu_group_create_file(struct iommu_group *group,
    412				   struct iommu_group_attribute *attr)
    413{
    414	return sysfs_create_file(&group->kobj, &attr->attr);
    415}
    416
    417static void iommu_group_remove_file(struct iommu_group *group,
    418				    struct iommu_group_attribute *attr)
    419{
    420	sysfs_remove_file(&group->kobj, &attr->attr);
    421}
    422
    423static ssize_t iommu_group_show_name(struct iommu_group *group, char *buf)
    424{
    425	return sprintf(buf, "%s\n", group->name);
    426}
    427
    428/**
    429 * iommu_insert_resv_region - Insert a new region in the
    430 * list of reserved regions.
    431 * @new: new region to insert
    432 * @regions: list of regions
    433 *
    434 * Elements are sorted by start address and overlapping segments
    435 * of the same type are merged.
    436 */
    437static int iommu_insert_resv_region(struct iommu_resv_region *new,
    438				    struct list_head *regions)
    439{
    440	struct iommu_resv_region *iter, *tmp, *nr, *top;
    441	LIST_HEAD(stack);
    442
    443	nr = iommu_alloc_resv_region(new->start, new->length,
    444				     new->prot, new->type);
    445	if (!nr)
    446		return -ENOMEM;
    447
    448	/* First add the new element based on start address sorting */
    449	list_for_each_entry(iter, regions, list) {
    450		if (nr->start < iter->start ||
    451		    (nr->start == iter->start && nr->type <= iter->type))
    452			break;
    453	}
    454	list_add_tail(&nr->list, &iter->list);
    455
    456	/* Merge overlapping segments of type nr->type in @regions, if any */
    457	list_for_each_entry_safe(iter, tmp, regions, list) {
    458		phys_addr_t top_end, iter_end = iter->start + iter->length - 1;
    459
    460		/* no merge needed on elements of different types than @new */
    461		if (iter->type != new->type) {
    462			list_move_tail(&iter->list, &stack);
    463			continue;
    464		}
    465
    466		/* look for the last stack element of same type as @iter */
    467		list_for_each_entry_reverse(top, &stack, list)
    468			if (top->type == iter->type)
    469				goto check_overlap;
    470
    471		list_move_tail(&iter->list, &stack);
    472		continue;
    473
    474check_overlap:
    475		top_end = top->start + top->length - 1;
    476
    477		if (iter->start > top_end + 1) {
    478			list_move_tail(&iter->list, &stack);
    479		} else {
    480			top->length = max(top_end, iter_end) - top->start + 1;
    481			list_del(&iter->list);
    482			kfree(iter);
    483		}
    484	}
    485	list_splice(&stack, regions);
    486	return 0;
    487}
    488
    489static int
    490iommu_insert_device_resv_regions(struct list_head *dev_resv_regions,
    491				 struct list_head *group_resv_regions)
    492{
    493	struct iommu_resv_region *entry;
    494	int ret = 0;
    495
    496	list_for_each_entry(entry, dev_resv_regions, list) {
    497		ret = iommu_insert_resv_region(entry, group_resv_regions);
    498		if (ret)
    499			break;
    500	}
    501	return ret;
    502}
    503
    504int iommu_get_group_resv_regions(struct iommu_group *group,
    505				 struct list_head *head)
    506{
    507	struct group_device *device;
    508	int ret = 0;
    509
    510	mutex_lock(&group->mutex);
    511	list_for_each_entry(device, &group->devices, list) {
    512		struct list_head dev_resv_regions;
    513
    514		/*
    515		 * Non-API groups still expose reserved_regions in sysfs,
    516		 * so filter out calls that get here that way.
    517		 */
    518		if (!device->dev->iommu)
    519			break;
    520
    521		INIT_LIST_HEAD(&dev_resv_regions);
    522		iommu_get_resv_regions(device->dev, &dev_resv_regions);
    523		ret = iommu_insert_device_resv_regions(&dev_resv_regions, head);
    524		iommu_put_resv_regions(device->dev, &dev_resv_regions);
    525		if (ret)
    526			break;
    527	}
    528	mutex_unlock(&group->mutex);
    529	return ret;
    530}
    531EXPORT_SYMBOL_GPL(iommu_get_group_resv_regions);
    532
    533static ssize_t iommu_group_show_resv_regions(struct iommu_group *group,
    534					     char *buf)
    535{
    536	struct iommu_resv_region *region, *next;
    537	struct list_head group_resv_regions;
    538	char *str = buf;
    539
    540	INIT_LIST_HEAD(&group_resv_regions);
    541	iommu_get_group_resv_regions(group, &group_resv_regions);
    542
    543	list_for_each_entry_safe(region, next, &group_resv_regions, list) {
    544		str += sprintf(str, "0x%016llx 0x%016llx %s\n",
    545			       (long long int)region->start,
    546			       (long long int)(region->start +
    547						region->length - 1),
    548			       iommu_group_resv_type_string[region->type]);
    549		kfree(region);
    550	}
    551
    552	return (str - buf);
    553}
    554
    555static ssize_t iommu_group_show_type(struct iommu_group *group,
    556				     char *buf)
    557{
    558	char *type = "unknown\n";
    559
    560	mutex_lock(&group->mutex);
    561	if (group->default_domain) {
    562		switch (group->default_domain->type) {
    563		case IOMMU_DOMAIN_BLOCKED:
    564			type = "blocked\n";
    565			break;
    566		case IOMMU_DOMAIN_IDENTITY:
    567			type = "identity\n";
    568			break;
    569		case IOMMU_DOMAIN_UNMANAGED:
    570			type = "unmanaged\n";
    571			break;
    572		case IOMMU_DOMAIN_DMA:
    573			type = "DMA\n";
    574			break;
    575		case IOMMU_DOMAIN_DMA_FQ:
    576			type = "DMA-FQ\n";
    577			break;
    578		}
    579	}
    580	mutex_unlock(&group->mutex);
    581	strcpy(buf, type);
    582
    583	return strlen(type);
    584}
    585
    586static IOMMU_GROUP_ATTR(name, S_IRUGO, iommu_group_show_name, NULL);
    587
    588static IOMMU_GROUP_ATTR(reserved_regions, 0444,
    589			iommu_group_show_resv_regions, NULL);
    590
    591static IOMMU_GROUP_ATTR(type, 0644, iommu_group_show_type,
    592			iommu_group_store_type);
    593
    594static void iommu_group_release(struct kobject *kobj)
    595{
    596	struct iommu_group *group = to_iommu_group(kobj);
    597
    598	pr_debug("Releasing group %d\n", group->id);
    599
    600	if (group->iommu_data_release)
    601		group->iommu_data_release(group->iommu_data);
    602
    603	ida_simple_remove(&iommu_group_ida, group->id);
    604
    605	if (group->default_domain)
    606		iommu_domain_free(group->default_domain);
    607	if (group->blocking_domain)
    608		iommu_domain_free(group->blocking_domain);
    609
    610	kfree(group->name);
    611	kfree(group);
    612}
    613
    614static struct kobj_type iommu_group_ktype = {
    615	.sysfs_ops = &iommu_group_sysfs_ops,
    616	.release = iommu_group_release,
    617};
    618
    619/**
    620 * iommu_group_alloc - Allocate a new group
    621 *
    622 * This function is called by an iommu driver to allocate a new iommu
    623 * group.  The iommu group represents the minimum granularity of the iommu.
    624 * Upon successful return, the caller holds a reference to the supplied
    625 * group in order to hold the group until devices are added.  Use
    626 * iommu_group_put() to release this extra reference count, allowing the
    627 * group to be automatically reclaimed once it has no devices or external
    628 * references.
    629 */
    630struct iommu_group *iommu_group_alloc(void)
    631{
    632	struct iommu_group *group;
    633	int ret;
    634
    635	group = kzalloc(sizeof(*group), GFP_KERNEL);
    636	if (!group)
    637		return ERR_PTR(-ENOMEM);
    638
    639	group->kobj.kset = iommu_group_kset;
    640	mutex_init(&group->mutex);
    641	INIT_LIST_HEAD(&group->devices);
    642	INIT_LIST_HEAD(&group->entry);
    643
    644	ret = ida_simple_get(&iommu_group_ida, 0, 0, GFP_KERNEL);
    645	if (ret < 0) {
    646		kfree(group);
    647		return ERR_PTR(ret);
    648	}
    649	group->id = ret;
    650
    651	ret = kobject_init_and_add(&group->kobj, &iommu_group_ktype,
    652				   NULL, "%d", group->id);
    653	if (ret) {
    654		ida_simple_remove(&iommu_group_ida, group->id);
    655		kobject_put(&group->kobj);
    656		return ERR_PTR(ret);
    657	}
    658
    659	group->devices_kobj = kobject_create_and_add("devices", &group->kobj);
    660	if (!group->devices_kobj) {
    661		kobject_put(&group->kobj); /* triggers .release & free */
    662		return ERR_PTR(-ENOMEM);
    663	}
    664
    665	/*
    666	 * The devices_kobj holds a reference on the group kobject, so
    667	 * as long as that exists so will the group.  We can therefore
    668	 * use the devices_kobj for reference counting.
    669	 */
    670	kobject_put(&group->kobj);
    671
    672	ret = iommu_group_create_file(group,
    673				      &iommu_group_attr_reserved_regions);
    674	if (ret)
    675		return ERR_PTR(ret);
    676
    677	ret = iommu_group_create_file(group, &iommu_group_attr_type);
    678	if (ret)
    679		return ERR_PTR(ret);
    680
    681	pr_debug("Allocated group %d\n", group->id);
    682
    683	return group;
    684}
    685EXPORT_SYMBOL_GPL(iommu_group_alloc);
    686
    687struct iommu_group *iommu_group_get_by_id(int id)
    688{
    689	struct kobject *group_kobj;
    690	struct iommu_group *group;
    691	const char *name;
    692
    693	if (!iommu_group_kset)
    694		return NULL;
    695
    696	name = kasprintf(GFP_KERNEL, "%d", id);
    697	if (!name)
    698		return NULL;
    699
    700	group_kobj = kset_find_obj(iommu_group_kset, name);
    701	kfree(name);
    702
    703	if (!group_kobj)
    704		return NULL;
    705
    706	group = container_of(group_kobj, struct iommu_group, kobj);
    707	BUG_ON(group->id != id);
    708
    709	kobject_get(group->devices_kobj);
    710	kobject_put(&group->kobj);
    711
    712	return group;
    713}
    714EXPORT_SYMBOL_GPL(iommu_group_get_by_id);
    715
    716/**
    717 * iommu_group_get_iommudata - retrieve iommu_data registered for a group
    718 * @group: the group
    719 *
    720 * iommu drivers can store data in the group for use when doing iommu
    721 * operations.  This function provides a way to retrieve it.  Caller
    722 * should hold a group reference.
    723 */
    724void *iommu_group_get_iommudata(struct iommu_group *group)
    725{
    726	return group->iommu_data;
    727}
    728EXPORT_SYMBOL_GPL(iommu_group_get_iommudata);
    729
    730/**
    731 * iommu_group_set_iommudata - set iommu_data for a group
    732 * @group: the group
    733 * @iommu_data: new data
    734 * @release: release function for iommu_data
    735 *
    736 * iommu drivers can store data in the group for use when doing iommu
    737 * operations.  This function provides a way to set the data after
    738 * the group has been allocated.  Caller should hold a group reference.
    739 */
    740void iommu_group_set_iommudata(struct iommu_group *group, void *iommu_data,
    741			       void (*release)(void *iommu_data))
    742{
    743	group->iommu_data = iommu_data;
    744	group->iommu_data_release = release;
    745}
    746EXPORT_SYMBOL_GPL(iommu_group_set_iommudata);
    747
    748/**
    749 * iommu_group_set_name - set name for a group
    750 * @group: the group
    751 * @name: name
    752 *
    753 * Allow iommu driver to set a name for a group.  When set it will
    754 * appear in a name attribute file under the group in sysfs.
    755 */
    756int iommu_group_set_name(struct iommu_group *group, const char *name)
    757{
    758	int ret;
    759
    760	if (group->name) {
    761		iommu_group_remove_file(group, &iommu_group_attr_name);
    762		kfree(group->name);
    763		group->name = NULL;
    764		if (!name)
    765			return 0;
    766	}
    767
    768	group->name = kstrdup(name, GFP_KERNEL);
    769	if (!group->name)
    770		return -ENOMEM;
    771
    772	ret = iommu_group_create_file(group, &iommu_group_attr_name);
    773	if (ret) {
    774		kfree(group->name);
    775		group->name = NULL;
    776		return ret;
    777	}
    778
    779	return 0;
    780}
    781EXPORT_SYMBOL_GPL(iommu_group_set_name);
    782
    783static int iommu_create_device_direct_mappings(struct iommu_group *group,
    784					       struct device *dev)
    785{
    786	struct iommu_domain *domain = group->default_domain;
    787	struct iommu_resv_region *entry;
    788	struct list_head mappings;
    789	unsigned long pg_size;
    790	int ret = 0;
    791
    792	if (!domain || !iommu_is_dma_domain(domain))
    793		return 0;
    794
    795	BUG_ON(!domain->pgsize_bitmap);
    796
    797	pg_size = 1UL << __ffs(domain->pgsize_bitmap);
    798	INIT_LIST_HEAD(&mappings);
    799
    800	iommu_get_resv_regions(dev, &mappings);
    801
    802	/* We need to consider overlapping regions for different devices */
    803	list_for_each_entry(entry, &mappings, list) {
    804		dma_addr_t start, end, addr;
    805		size_t map_size = 0;
    806
    807		start = ALIGN(entry->start, pg_size);
    808		end   = ALIGN(entry->start + entry->length, pg_size);
    809
    810		if (entry->type != IOMMU_RESV_DIRECT &&
    811		    entry->type != IOMMU_RESV_DIRECT_RELAXABLE)
    812			continue;
    813
    814		for (addr = start; addr <= end; addr += pg_size) {
    815			phys_addr_t phys_addr;
    816
    817			if (addr == end)
    818				goto map_end;
    819
    820			phys_addr = iommu_iova_to_phys(domain, addr);
    821			if (!phys_addr) {
    822				map_size += pg_size;
    823				continue;
    824			}
    825
    826map_end:
    827			if (map_size) {
    828				ret = iommu_map(domain, addr - map_size,
    829						addr - map_size, map_size,
    830						entry->prot);
    831				if (ret)
    832					goto out;
    833				map_size = 0;
    834			}
    835		}
    836
    837	}
    838
    839	iommu_flush_iotlb_all(domain);
    840
    841out:
    842	iommu_put_resv_regions(dev, &mappings);
    843
    844	return ret;
    845}
    846
    847static bool iommu_is_attach_deferred(struct device *dev)
    848{
    849	const struct iommu_ops *ops = dev_iommu_ops(dev);
    850
    851	if (ops->is_attach_deferred)
    852		return ops->is_attach_deferred(dev);
    853
    854	return false;
    855}
    856
    857/**
    858 * iommu_group_add_device - add a device to an iommu group
    859 * @group: the group into which to add the device (reference should be held)
    860 * @dev: the device
    861 *
    862 * This function is called by an iommu driver to add a device into a
    863 * group.  Adding a device increments the group reference count.
    864 */
    865int iommu_group_add_device(struct iommu_group *group, struct device *dev)
    866{
    867	int ret, i = 0;
    868	struct group_device *device;
    869
    870	device = kzalloc(sizeof(*device), GFP_KERNEL);
    871	if (!device)
    872		return -ENOMEM;
    873
    874	device->dev = dev;
    875
    876	ret = sysfs_create_link(&dev->kobj, &group->kobj, "iommu_group");
    877	if (ret)
    878		goto err_free_device;
    879
    880	device->name = kasprintf(GFP_KERNEL, "%s", kobject_name(&dev->kobj));
    881rename:
    882	if (!device->name) {
    883		ret = -ENOMEM;
    884		goto err_remove_link;
    885	}
    886
    887	ret = sysfs_create_link_nowarn(group->devices_kobj,
    888				       &dev->kobj, device->name);
    889	if (ret) {
    890		if (ret == -EEXIST && i >= 0) {
    891			/*
    892			 * Account for the slim chance of collision
    893			 * and append an instance to the name.
    894			 */
    895			kfree(device->name);
    896			device->name = kasprintf(GFP_KERNEL, "%s.%d",
    897						 kobject_name(&dev->kobj), i++);
    898			goto rename;
    899		}
    900		goto err_free_name;
    901	}
    902
    903	kobject_get(group->devices_kobj);
    904
    905	dev->iommu_group = group;
    906
    907	mutex_lock(&group->mutex);
    908	list_add_tail(&device->list, &group->devices);
    909	if (group->domain  && !iommu_is_attach_deferred(dev))
    910		ret = __iommu_attach_device(group->domain, dev);
    911	mutex_unlock(&group->mutex);
    912	if (ret)
    913		goto err_put_group;
    914
    915	trace_add_device_to_group(group->id, dev);
    916
    917	dev_info(dev, "Adding to iommu group %d\n", group->id);
    918
    919	return 0;
    920
    921err_put_group:
    922	mutex_lock(&group->mutex);
    923	list_del(&device->list);
    924	mutex_unlock(&group->mutex);
    925	dev->iommu_group = NULL;
    926	kobject_put(group->devices_kobj);
    927	sysfs_remove_link(group->devices_kobj, device->name);
    928err_free_name:
    929	kfree(device->name);
    930err_remove_link:
    931	sysfs_remove_link(&dev->kobj, "iommu_group");
    932err_free_device:
    933	kfree(device);
    934	dev_err(dev, "Failed to add to iommu group %d: %d\n", group->id, ret);
    935	return ret;
    936}
    937EXPORT_SYMBOL_GPL(iommu_group_add_device);
    938
    939/**
    940 * iommu_group_remove_device - remove a device from it's current group
    941 * @dev: device to be removed
    942 *
    943 * This function is called by an iommu driver to remove the device from
    944 * it's current group.  This decrements the iommu group reference count.
    945 */
    946void iommu_group_remove_device(struct device *dev)
    947{
    948	struct iommu_group *group = dev->iommu_group;
    949	struct group_device *tmp_device, *device = NULL;
    950
    951	if (!group)
    952		return;
    953
    954	dev_info(dev, "Removing from iommu group %d\n", group->id);
    955
    956	mutex_lock(&group->mutex);
    957	list_for_each_entry(tmp_device, &group->devices, list) {
    958		if (tmp_device->dev == dev) {
    959			device = tmp_device;
    960			list_del(&device->list);
    961			break;
    962		}
    963	}
    964	mutex_unlock(&group->mutex);
    965
    966	if (!device)
    967		return;
    968
    969	sysfs_remove_link(group->devices_kobj, device->name);
    970	sysfs_remove_link(&dev->kobj, "iommu_group");
    971
    972	trace_remove_device_from_group(group->id, dev);
    973
    974	kfree(device->name);
    975	kfree(device);
    976	dev->iommu_group = NULL;
    977	kobject_put(group->devices_kobj);
    978}
    979EXPORT_SYMBOL_GPL(iommu_group_remove_device);
    980
    981static int iommu_group_device_count(struct iommu_group *group)
    982{
    983	struct group_device *entry;
    984	int ret = 0;
    985
    986	list_for_each_entry(entry, &group->devices, list)
    987		ret++;
    988
    989	return ret;
    990}
    991
    992static int __iommu_group_for_each_dev(struct iommu_group *group, void *data,
    993				      int (*fn)(struct device *, void *))
    994{
    995	struct group_device *device;
    996	int ret = 0;
    997
    998	list_for_each_entry(device, &group->devices, list) {
    999		ret = fn(device->dev, data);
   1000		if (ret)
   1001			break;
   1002	}
   1003	return ret;
   1004}
   1005
   1006/**
   1007 * iommu_group_for_each_dev - iterate over each device in the group
   1008 * @group: the group
   1009 * @data: caller opaque data to be passed to callback function
   1010 * @fn: caller supplied callback function
   1011 *
   1012 * This function is called by group users to iterate over group devices.
   1013 * Callers should hold a reference count to the group during callback.
   1014 * The group->mutex is held across callbacks, which will block calls to
   1015 * iommu_group_add/remove_device.
   1016 */
   1017int iommu_group_for_each_dev(struct iommu_group *group, void *data,
   1018			     int (*fn)(struct device *, void *))
   1019{
   1020	int ret;
   1021
   1022	mutex_lock(&group->mutex);
   1023	ret = __iommu_group_for_each_dev(group, data, fn);
   1024	mutex_unlock(&group->mutex);
   1025
   1026	return ret;
   1027}
   1028EXPORT_SYMBOL_GPL(iommu_group_for_each_dev);
   1029
   1030/**
   1031 * iommu_group_get - Return the group for a device and increment reference
   1032 * @dev: get the group that this device belongs to
   1033 *
   1034 * This function is called by iommu drivers and users to get the group
   1035 * for the specified device.  If found, the group is returned and the group
   1036 * reference in incremented, else NULL.
   1037 */
   1038struct iommu_group *iommu_group_get(struct device *dev)
   1039{
   1040	struct iommu_group *group = dev->iommu_group;
   1041
   1042	if (group)
   1043		kobject_get(group->devices_kobj);
   1044
   1045	return group;
   1046}
   1047EXPORT_SYMBOL_GPL(iommu_group_get);
   1048
   1049/**
   1050 * iommu_group_ref_get - Increment reference on a group
   1051 * @group: the group to use, must not be NULL
   1052 *
   1053 * This function is called by iommu drivers to take additional references on an
   1054 * existing group.  Returns the given group for convenience.
   1055 */
   1056struct iommu_group *iommu_group_ref_get(struct iommu_group *group)
   1057{
   1058	kobject_get(group->devices_kobj);
   1059	return group;
   1060}
   1061EXPORT_SYMBOL_GPL(iommu_group_ref_get);
   1062
   1063/**
   1064 * iommu_group_put - Decrement group reference
   1065 * @group: the group to use
   1066 *
   1067 * This function is called by iommu drivers and users to release the
   1068 * iommu group.  Once the reference count is zero, the group is released.
   1069 */
   1070void iommu_group_put(struct iommu_group *group)
   1071{
   1072	if (group)
   1073		kobject_put(group->devices_kobj);
   1074}
   1075EXPORT_SYMBOL_GPL(iommu_group_put);
   1076
   1077/**
   1078 * iommu_register_device_fault_handler() - Register a device fault handler
   1079 * @dev: the device
   1080 * @handler: the fault handler
   1081 * @data: private data passed as argument to the handler
   1082 *
   1083 * When an IOMMU fault event is received, this handler gets called with the
   1084 * fault event and data as argument. The handler should return 0 on success. If
   1085 * the fault is recoverable (IOMMU_FAULT_PAGE_REQ), the consumer should also
   1086 * complete the fault by calling iommu_page_response() with one of the following
   1087 * response code:
   1088 * - IOMMU_PAGE_RESP_SUCCESS: retry the translation
   1089 * - IOMMU_PAGE_RESP_INVALID: terminate the fault
   1090 * - IOMMU_PAGE_RESP_FAILURE: terminate the fault and stop reporting
   1091 *   page faults if possible.
   1092 *
   1093 * Return 0 if the fault handler was installed successfully, or an error.
   1094 */
   1095int iommu_register_device_fault_handler(struct device *dev,
   1096					iommu_dev_fault_handler_t handler,
   1097					void *data)
   1098{
   1099	struct dev_iommu *param = dev->iommu;
   1100	int ret = 0;
   1101
   1102	if (!param)
   1103		return -EINVAL;
   1104
   1105	mutex_lock(&param->lock);
   1106	/* Only allow one fault handler registered for each device */
   1107	if (param->fault_param) {
   1108		ret = -EBUSY;
   1109		goto done_unlock;
   1110	}
   1111
   1112	get_device(dev);
   1113	param->fault_param = kzalloc(sizeof(*param->fault_param), GFP_KERNEL);
   1114	if (!param->fault_param) {
   1115		put_device(dev);
   1116		ret = -ENOMEM;
   1117		goto done_unlock;
   1118	}
   1119	param->fault_param->handler = handler;
   1120	param->fault_param->data = data;
   1121	mutex_init(&param->fault_param->lock);
   1122	INIT_LIST_HEAD(&param->fault_param->faults);
   1123
   1124done_unlock:
   1125	mutex_unlock(&param->lock);
   1126
   1127	return ret;
   1128}
   1129EXPORT_SYMBOL_GPL(iommu_register_device_fault_handler);
   1130
   1131/**
   1132 * iommu_unregister_device_fault_handler() - Unregister the device fault handler
   1133 * @dev: the device
   1134 *
   1135 * Remove the device fault handler installed with
   1136 * iommu_register_device_fault_handler().
   1137 *
   1138 * Return 0 on success, or an error.
   1139 */
   1140int iommu_unregister_device_fault_handler(struct device *dev)
   1141{
   1142	struct dev_iommu *param = dev->iommu;
   1143	int ret = 0;
   1144
   1145	if (!param)
   1146		return -EINVAL;
   1147
   1148	mutex_lock(&param->lock);
   1149
   1150	if (!param->fault_param)
   1151		goto unlock;
   1152
   1153	/* we cannot unregister handler if there are pending faults */
   1154	if (!list_empty(&param->fault_param->faults)) {
   1155		ret = -EBUSY;
   1156		goto unlock;
   1157	}
   1158
   1159	kfree(param->fault_param);
   1160	param->fault_param = NULL;
   1161	put_device(dev);
   1162unlock:
   1163	mutex_unlock(&param->lock);
   1164
   1165	return ret;
   1166}
   1167EXPORT_SYMBOL_GPL(iommu_unregister_device_fault_handler);
   1168
   1169/**
   1170 * iommu_report_device_fault() - Report fault event to device driver
   1171 * @dev: the device
   1172 * @evt: fault event data
   1173 *
   1174 * Called by IOMMU drivers when a fault is detected, typically in a threaded IRQ
   1175 * handler. When this function fails and the fault is recoverable, it is the
   1176 * caller's responsibility to complete the fault.
   1177 *
   1178 * Return 0 on success, or an error.
   1179 */
   1180int iommu_report_device_fault(struct device *dev, struct iommu_fault_event *evt)
   1181{
   1182	struct dev_iommu *param = dev->iommu;
   1183	struct iommu_fault_event *evt_pending = NULL;
   1184	struct iommu_fault_param *fparam;
   1185	int ret = 0;
   1186
   1187	if (!param || !evt)
   1188		return -EINVAL;
   1189
   1190	/* we only report device fault if there is a handler registered */
   1191	mutex_lock(&param->lock);
   1192	fparam = param->fault_param;
   1193	if (!fparam || !fparam->handler) {
   1194		ret = -EINVAL;
   1195		goto done_unlock;
   1196	}
   1197
   1198	if (evt->fault.type == IOMMU_FAULT_PAGE_REQ &&
   1199	    (evt->fault.prm.flags & IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE)) {
   1200		evt_pending = kmemdup(evt, sizeof(struct iommu_fault_event),
   1201				      GFP_KERNEL);
   1202		if (!evt_pending) {
   1203			ret = -ENOMEM;
   1204			goto done_unlock;
   1205		}
   1206		mutex_lock(&fparam->lock);
   1207		list_add_tail(&evt_pending->list, &fparam->faults);
   1208		mutex_unlock(&fparam->lock);
   1209	}
   1210
   1211	ret = fparam->handler(&evt->fault, fparam->data);
   1212	if (ret && evt_pending) {
   1213		mutex_lock(&fparam->lock);
   1214		list_del(&evt_pending->list);
   1215		mutex_unlock(&fparam->lock);
   1216		kfree(evt_pending);
   1217	}
   1218done_unlock:
   1219	mutex_unlock(&param->lock);
   1220	return ret;
   1221}
   1222EXPORT_SYMBOL_GPL(iommu_report_device_fault);
   1223
   1224int iommu_page_response(struct device *dev,
   1225			struct iommu_page_response *msg)
   1226{
   1227	bool needs_pasid;
   1228	int ret = -EINVAL;
   1229	struct iommu_fault_event *evt;
   1230	struct iommu_fault_page_request *prm;
   1231	struct dev_iommu *param = dev->iommu;
   1232	const struct iommu_ops *ops = dev_iommu_ops(dev);
   1233	bool has_pasid = msg->flags & IOMMU_PAGE_RESP_PASID_VALID;
   1234
   1235	if (!ops->page_response)
   1236		return -ENODEV;
   1237
   1238	if (!param || !param->fault_param)
   1239		return -EINVAL;
   1240
   1241	if (msg->version != IOMMU_PAGE_RESP_VERSION_1 ||
   1242	    msg->flags & ~IOMMU_PAGE_RESP_PASID_VALID)
   1243		return -EINVAL;
   1244
   1245	/* Only send response if there is a fault report pending */
   1246	mutex_lock(&param->fault_param->lock);
   1247	if (list_empty(&param->fault_param->faults)) {
   1248		dev_warn_ratelimited(dev, "no pending PRQ, drop response\n");
   1249		goto done_unlock;
   1250	}
   1251	/*
   1252	 * Check if we have a matching page request pending to respond,
   1253	 * otherwise return -EINVAL
   1254	 */
   1255	list_for_each_entry(evt, &param->fault_param->faults, list) {
   1256		prm = &evt->fault.prm;
   1257		if (prm->grpid != msg->grpid)
   1258			continue;
   1259
   1260		/*
   1261		 * If the PASID is required, the corresponding request is
   1262		 * matched using the group ID, the PASID valid bit and the PASID
   1263		 * value. Otherwise only the group ID matches request and
   1264		 * response.
   1265		 */
   1266		needs_pasid = prm->flags & IOMMU_FAULT_PAGE_RESPONSE_NEEDS_PASID;
   1267		if (needs_pasid && (!has_pasid || msg->pasid != prm->pasid))
   1268			continue;
   1269
   1270		if (!needs_pasid && has_pasid) {
   1271			/* No big deal, just clear it. */
   1272			msg->flags &= ~IOMMU_PAGE_RESP_PASID_VALID;
   1273			msg->pasid = 0;
   1274		}
   1275
   1276		ret = ops->page_response(dev, evt, msg);
   1277		list_del(&evt->list);
   1278		kfree(evt);
   1279		break;
   1280	}
   1281
   1282done_unlock:
   1283	mutex_unlock(&param->fault_param->lock);
   1284	return ret;
   1285}
   1286EXPORT_SYMBOL_GPL(iommu_page_response);
   1287
   1288/**
   1289 * iommu_group_id - Return ID for a group
   1290 * @group: the group to ID
   1291 *
   1292 * Return the unique ID for the group matching the sysfs group number.
   1293 */
   1294int iommu_group_id(struct iommu_group *group)
   1295{
   1296	return group->id;
   1297}
   1298EXPORT_SYMBOL_GPL(iommu_group_id);
   1299
   1300static struct iommu_group *get_pci_alias_group(struct pci_dev *pdev,
   1301					       unsigned long *devfns);
   1302
   1303/*
   1304 * To consider a PCI device isolated, we require ACS to support Source
   1305 * Validation, Request Redirection, Completer Redirection, and Upstream
   1306 * Forwarding.  This effectively means that devices cannot spoof their
   1307 * requester ID, requests and completions cannot be redirected, and all
   1308 * transactions are forwarded upstream, even as it passes through a
   1309 * bridge where the target device is downstream.
   1310 */
   1311#define REQ_ACS_FLAGS   (PCI_ACS_SV | PCI_ACS_RR | PCI_ACS_CR | PCI_ACS_UF)
   1312
   1313/*
   1314 * For multifunction devices which are not isolated from each other, find
   1315 * all the other non-isolated functions and look for existing groups.  For
   1316 * each function, we also need to look for aliases to or from other devices
   1317 * that may already have a group.
   1318 */
   1319static struct iommu_group *get_pci_function_alias_group(struct pci_dev *pdev,
   1320							unsigned long *devfns)
   1321{
   1322	struct pci_dev *tmp = NULL;
   1323	struct iommu_group *group;
   1324
   1325	if (!pdev->multifunction || pci_acs_enabled(pdev, REQ_ACS_FLAGS))
   1326		return NULL;
   1327
   1328	for_each_pci_dev(tmp) {
   1329		if (tmp == pdev || tmp->bus != pdev->bus ||
   1330		    PCI_SLOT(tmp->devfn) != PCI_SLOT(pdev->devfn) ||
   1331		    pci_acs_enabled(tmp, REQ_ACS_FLAGS))
   1332			continue;
   1333
   1334		group = get_pci_alias_group(tmp, devfns);
   1335		if (group) {
   1336			pci_dev_put(tmp);
   1337			return group;
   1338		}
   1339	}
   1340
   1341	return NULL;
   1342}
   1343
   1344/*
   1345 * Look for aliases to or from the given device for existing groups. DMA
   1346 * aliases are only supported on the same bus, therefore the search
   1347 * space is quite small (especially since we're really only looking at pcie
   1348 * device, and therefore only expect multiple slots on the root complex or
   1349 * downstream switch ports).  It's conceivable though that a pair of
   1350 * multifunction devices could have aliases between them that would cause a
   1351 * loop.  To prevent this, we use a bitmap to track where we've been.
   1352 */
   1353static struct iommu_group *get_pci_alias_group(struct pci_dev *pdev,
   1354					       unsigned long *devfns)
   1355{
   1356	struct pci_dev *tmp = NULL;
   1357	struct iommu_group *group;
   1358
   1359	if (test_and_set_bit(pdev->devfn & 0xff, devfns))
   1360		return NULL;
   1361
   1362	group = iommu_group_get(&pdev->dev);
   1363	if (group)
   1364		return group;
   1365
   1366	for_each_pci_dev(tmp) {
   1367		if (tmp == pdev || tmp->bus != pdev->bus)
   1368			continue;
   1369
   1370		/* We alias them or they alias us */
   1371		if (pci_devs_are_dma_aliases(pdev, tmp)) {
   1372			group = get_pci_alias_group(tmp, devfns);
   1373			if (group) {
   1374				pci_dev_put(tmp);
   1375				return group;
   1376			}
   1377
   1378			group = get_pci_function_alias_group(tmp, devfns);
   1379			if (group) {
   1380				pci_dev_put(tmp);
   1381				return group;
   1382			}
   1383		}
   1384	}
   1385
   1386	return NULL;
   1387}
   1388
   1389struct group_for_pci_data {
   1390	struct pci_dev *pdev;
   1391	struct iommu_group *group;
   1392};
   1393
   1394/*
   1395 * DMA alias iterator callback, return the last seen device.  Stop and return
   1396 * the IOMMU group if we find one along the way.
   1397 */
   1398static int get_pci_alias_or_group(struct pci_dev *pdev, u16 alias, void *opaque)
   1399{
   1400	struct group_for_pci_data *data = opaque;
   1401
   1402	data->pdev = pdev;
   1403	data->group = iommu_group_get(&pdev->dev);
   1404
   1405	return data->group != NULL;
   1406}
   1407
   1408/*
   1409 * Generic device_group call-back function. It just allocates one
   1410 * iommu-group per device.
   1411 */
   1412struct iommu_group *generic_device_group(struct device *dev)
   1413{
   1414	return iommu_group_alloc();
   1415}
   1416EXPORT_SYMBOL_GPL(generic_device_group);
   1417
   1418/*
   1419 * Use standard PCI bus topology, isolation features, and DMA alias quirks
   1420 * to find or create an IOMMU group for a device.
   1421 */
   1422struct iommu_group *pci_device_group(struct device *dev)
   1423{
   1424	struct pci_dev *pdev = to_pci_dev(dev);
   1425	struct group_for_pci_data data;
   1426	struct pci_bus *bus;
   1427	struct iommu_group *group = NULL;
   1428	u64 devfns[4] = { 0 };
   1429
   1430	if (WARN_ON(!dev_is_pci(dev)))
   1431		return ERR_PTR(-EINVAL);
   1432
   1433	/*
   1434	 * Find the upstream DMA alias for the device.  A device must not
   1435	 * be aliased due to topology in order to have its own IOMMU group.
   1436	 * If we find an alias along the way that already belongs to a
   1437	 * group, use it.
   1438	 */
   1439	if (pci_for_each_dma_alias(pdev, get_pci_alias_or_group, &data))
   1440		return data.group;
   1441
   1442	pdev = data.pdev;
   1443
   1444	/*
   1445	 * Continue upstream from the point of minimum IOMMU granularity
   1446	 * due to aliases to the point where devices are protected from
   1447	 * peer-to-peer DMA by PCI ACS.  Again, if we find an existing
   1448	 * group, use it.
   1449	 */
   1450	for (bus = pdev->bus; !pci_is_root_bus(bus); bus = bus->parent) {
   1451		if (!bus->self)
   1452			continue;
   1453
   1454		if (pci_acs_path_enabled(bus->self, NULL, REQ_ACS_FLAGS))
   1455			break;
   1456
   1457		pdev = bus->self;
   1458
   1459		group = iommu_group_get(&pdev->dev);
   1460		if (group)
   1461			return group;
   1462	}
   1463
   1464	/*
   1465	 * Look for existing groups on device aliases.  If we alias another
   1466	 * device or another device aliases us, use the same group.
   1467	 */
   1468	group = get_pci_alias_group(pdev, (unsigned long *)devfns);
   1469	if (group)
   1470		return group;
   1471
   1472	/*
   1473	 * Look for existing groups on non-isolated functions on the same
   1474	 * slot and aliases of those funcions, if any.  No need to clear
   1475	 * the search bitmap, the tested devfns are still valid.
   1476	 */
   1477	group = get_pci_function_alias_group(pdev, (unsigned long *)devfns);
   1478	if (group)
   1479		return group;
   1480
   1481	/* No shared group found, allocate new */
   1482	return iommu_group_alloc();
   1483}
   1484EXPORT_SYMBOL_GPL(pci_device_group);
   1485
   1486/* Get the IOMMU group for device on fsl-mc bus */
   1487struct iommu_group *fsl_mc_device_group(struct device *dev)
   1488{
   1489	struct device *cont_dev = fsl_mc_cont_dev(dev);
   1490	struct iommu_group *group;
   1491
   1492	group = iommu_group_get(cont_dev);
   1493	if (!group)
   1494		group = iommu_group_alloc();
   1495	return group;
   1496}
   1497EXPORT_SYMBOL_GPL(fsl_mc_device_group);
   1498
   1499static int iommu_get_def_domain_type(struct device *dev)
   1500{
   1501	const struct iommu_ops *ops = dev_iommu_ops(dev);
   1502
   1503	if (dev_is_pci(dev) && to_pci_dev(dev)->untrusted)
   1504		return IOMMU_DOMAIN_DMA;
   1505
   1506	if (ops->def_domain_type)
   1507		return ops->def_domain_type(dev);
   1508
   1509	return 0;
   1510}
   1511
   1512static int iommu_group_alloc_default_domain(struct bus_type *bus,
   1513					    struct iommu_group *group,
   1514					    unsigned int type)
   1515{
   1516	struct iommu_domain *dom;
   1517
   1518	dom = __iommu_domain_alloc(bus, type);
   1519	if (!dom && type != IOMMU_DOMAIN_DMA) {
   1520		dom = __iommu_domain_alloc(bus, IOMMU_DOMAIN_DMA);
   1521		if (dom)
   1522			pr_warn("Failed to allocate default IOMMU domain of type %u for group %s - Falling back to IOMMU_DOMAIN_DMA",
   1523				type, group->name);
   1524	}
   1525
   1526	if (!dom)
   1527		return -ENOMEM;
   1528
   1529	group->default_domain = dom;
   1530	if (!group->domain)
   1531		group->domain = dom;
   1532	return 0;
   1533}
   1534
   1535static int iommu_alloc_default_domain(struct iommu_group *group,
   1536				      struct device *dev)
   1537{
   1538	unsigned int type;
   1539
   1540	if (group->default_domain)
   1541		return 0;
   1542
   1543	type = iommu_get_def_domain_type(dev) ? : iommu_def_domain_type;
   1544
   1545	return iommu_group_alloc_default_domain(dev->bus, group, type);
   1546}
   1547
   1548/**
   1549 * iommu_group_get_for_dev - Find or create the IOMMU group for a device
   1550 * @dev: target device
   1551 *
   1552 * This function is intended to be called by IOMMU drivers and extended to
   1553 * support common, bus-defined algorithms when determining or creating the
   1554 * IOMMU group for a device.  On success, the caller will hold a reference
   1555 * to the returned IOMMU group, which will already include the provided
   1556 * device.  The reference should be released with iommu_group_put().
   1557 */
   1558static struct iommu_group *iommu_group_get_for_dev(struct device *dev)
   1559{
   1560	const struct iommu_ops *ops = dev_iommu_ops(dev);
   1561	struct iommu_group *group;
   1562	int ret;
   1563
   1564	group = iommu_group_get(dev);
   1565	if (group)
   1566		return group;
   1567
   1568	group = ops->device_group(dev);
   1569	if (WARN_ON_ONCE(group == NULL))
   1570		return ERR_PTR(-EINVAL);
   1571
   1572	if (IS_ERR(group))
   1573		return group;
   1574
   1575	ret = iommu_group_add_device(group, dev);
   1576	if (ret)
   1577		goto out_put_group;
   1578
   1579	return group;
   1580
   1581out_put_group:
   1582	iommu_group_put(group);
   1583
   1584	return ERR_PTR(ret);
   1585}
   1586
   1587struct iommu_domain *iommu_group_default_domain(struct iommu_group *group)
   1588{
   1589	return group->default_domain;
   1590}
   1591
   1592static int probe_iommu_group(struct device *dev, void *data)
   1593{
   1594	struct list_head *group_list = data;
   1595	struct iommu_group *group;
   1596	int ret;
   1597
   1598	/* Device is probed already if in a group */
   1599	group = iommu_group_get(dev);
   1600	if (group) {
   1601		iommu_group_put(group);
   1602		return 0;
   1603	}
   1604
   1605	ret = __iommu_probe_device(dev, group_list);
   1606	if (ret == -ENODEV)
   1607		ret = 0;
   1608
   1609	return ret;
   1610}
   1611
   1612static int remove_iommu_group(struct device *dev, void *data)
   1613{
   1614	iommu_release_device(dev);
   1615
   1616	return 0;
   1617}
   1618
   1619static int iommu_bus_notifier(struct notifier_block *nb,
   1620			      unsigned long action, void *data)
   1621{
   1622	struct device *dev = data;
   1623
   1624	if (action == BUS_NOTIFY_ADD_DEVICE) {
   1625		int ret;
   1626
   1627		ret = iommu_probe_device(dev);
   1628		return (ret) ? NOTIFY_DONE : NOTIFY_OK;
   1629	} else if (action == BUS_NOTIFY_REMOVED_DEVICE) {
   1630		iommu_release_device(dev);
   1631		return NOTIFY_OK;
   1632	}
   1633
   1634	return 0;
   1635}
   1636
   1637struct __group_domain_type {
   1638	struct device *dev;
   1639	unsigned int type;
   1640};
   1641
   1642static int probe_get_default_domain_type(struct device *dev, void *data)
   1643{
   1644	struct __group_domain_type *gtype = data;
   1645	unsigned int type = iommu_get_def_domain_type(dev);
   1646
   1647	if (type) {
   1648		if (gtype->type && gtype->type != type) {
   1649			dev_warn(dev, "Device needs domain type %s, but device %s in the same iommu group requires type %s - using default\n",
   1650				 iommu_domain_type_str(type),
   1651				 dev_name(gtype->dev),
   1652				 iommu_domain_type_str(gtype->type));
   1653			gtype->type = 0;
   1654		}
   1655
   1656		if (!gtype->dev) {
   1657			gtype->dev  = dev;
   1658			gtype->type = type;
   1659		}
   1660	}
   1661
   1662	return 0;
   1663}
   1664
   1665static void probe_alloc_default_domain(struct bus_type *bus,
   1666				       struct iommu_group *group)
   1667{
   1668	struct __group_domain_type gtype;
   1669
   1670	memset(&gtype, 0, sizeof(gtype));
   1671
   1672	/* Ask for default domain requirements of all devices in the group */
   1673	__iommu_group_for_each_dev(group, &gtype,
   1674				   probe_get_default_domain_type);
   1675
   1676	if (!gtype.type)
   1677		gtype.type = iommu_def_domain_type;
   1678
   1679	iommu_group_alloc_default_domain(bus, group, gtype.type);
   1680
   1681}
   1682
   1683static int iommu_group_do_dma_attach(struct device *dev, void *data)
   1684{
   1685	struct iommu_domain *domain = data;
   1686	int ret = 0;
   1687
   1688	if (!iommu_is_attach_deferred(dev))
   1689		ret = __iommu_attach_device(domain, dev);
   1690
   1691	return ret;
   1692}
   1693
   1694static int __iommu_group_dma_attach(struct iommu_group *group)
   1695{
   1696	return __iommu_group_for_each_dev(group, group->default_domain,
   1697					  iommu_group_do_dma_attach);
   1698}
   1699
   1700static int iommu_group_do_probe_finalize(struct device *dev, void *data)
   1701{
   1702	const struct iommu_ops *ops = dev_iommu_ops(dev);
   1703
   1704	if (ops->probe_finalize)
   1705		ops->probe_finalize(dev);
   1706
   1707	return 0;
   1708}
   1709
   1710static void __iommu_group_dma_finalize(struct iommu_group *group)
   1711{
   1712	__iommu_group_for_each_dev(group, group->default_domain,
   1713				   iommu_group_do_probe_finalize);
   1714}
   1715
   1716static int iommu_do_create_direct_mappings(struct device *dev, void *data)
   1717{
   1718	struct iommu_group *group = data;
   1719
   1720	iommu_create_device_direct_mappings(group, dev);
   1721
   1722	return 0;
   1723}
   1724
   1725static int iommu_group_create_direct_mappings(struct iommu_group *group)
   1726{
   1727	return __iommu_group_for_each_dev(group, group,
   1728					  iommu_do_create_direct_mappings);
   1729}
   1730
   1731int bus_iommu_probe(struct bus_type *bus)
   1732{
   1733	struct iommu_group *group, *next;
   1734	LIST_HEAD(group_list);
   1735	int ret;
   1736
   1737	/*
   1738	 * This code-path does not allocate the default domain when
   1739	 * creating the iommu group, so do it after the groups are
   1740	 * created.
   1741	 */
   1742	ret = bus_for_each_dev(bus, NULL, &group_list, probe_iommu_group);
   1743	if (ret)
   1744		return ret;
   1745
   1746	list_for_each_entry_safe(group, next, &group_list, entry) {
   1747		/* Remove item from the list */
   1748		list_del_init(&group->entry);
   1749
   1750		mutex_lock(&group->mutex);
   1751
   1752		/* Try to allocate default domain */
   1753		probe_alloc_default_domain(bus, group);
   1754
   1755		if (!group->default_domain) {
   1756			mutex_unlock(&group->mutex);
   1757			continue;
   1758		}
   1759
   1760		iommu_group_create_direct_mappings(group);
   1761
   1762		ret = __iommu_group_dma_attach(group);
   1763
   1764		mutex_unlock(&group->mutex);
   1765
   1766		if (ret)
   1767			break;
   1768
   1769		__iommu_group_dma_finalize(group);
   1770	}
   1771
   1772	return ret;
   1773}
   1774
   1775static int iommu_bus_init(struct bus_type *bus, const struct iommu_ops *ops)
   1776{
   1777	struct notifier_block *nb;
   1778	int err;
   1779
   1780	nb = kzalloc(sizeof(struct notifier_block), GFP_KERNEL);
   1781	if (!nb)
   1782		return -ENOMEM;
   1783
   1784	nb->notifier_call = iommu_bus_notifier;
   1785
   1786	err = bus_register_notifier(bus, nb);
   1787	if (err)
   1788		goto out_free;
   1789
   1790	err = bus_iommu_probe(bus);
   1791	if (err)
   1792		goto out_err;
   1793
   1794
   1795	return 0;
   1796
   1797out_err:
   1798	/* Clean up */
   1799	bus_for_each_dev(bus, NULL, NULL, remove_iommu_group);
   1800	bus_unregister_notifier(bus, nb);
   1801
   1802out_free:
   1803	kfree(nb);
   1804
   1805	return err;
   1806}
   1807
   1808/**
   1809 * bus_set_iommu - set iommu-callbacks for the bus
   1810 * @bus: bus.
   1811 * @ops: the callbacks provided by the iommu-driver
   1812 *
   1813 * This function is called by an iommu driver to set the iommu methods
   1814 * used for a particular bus. Drivers for devices on that bus can use
   1815 * the iommu-api after these ops are registered.
   1816 * This special function is needed because IOMMUs are usually devices on
   1817 * the bus itself, so the iommu drivers are not initialized when the bus
   1818 * is set up. With this function the iommu-driver can set the iommu-ops
   1819 * afterwards.
   1820 */
   1821int bus_set_iommu(struct bus_type *bus, const struct iommu_ops *ops)
   1822{
   1823	int err;
   1824
   1825	if (ops == NULL) {
   1826		bus->iommu_ops = NULL;
   1827		return 0;
   1828	}
   1829
   1830	if (bus->iommu_ops != NULL)
   1831		return -EBUSY;
   1832
   1833	bus->iommu_ops = ops;
   1834
   1835	/* Do IOMMU specific setup for this bus-type */
   1836	err = iommu_bus_init(bus, ops);
   1837	if (err)
   1838		bus->iommu_ops = NULL;
   1839
   1840	return err;
   1841}
   1842EXPORT_SYMBOL_GPL(bus_set_iommu);
   1843
   1844bool iommu_present(struct bus_type *bus)
   1845{
   1846	return bus->iommu_ops != NULL;
   1847}
   1848EXPORT_SYMBOL_GPL(iommu_present);
   1849
   1850/**
   1851 * device_iommu_capable() - check for a general IOMMU capability
   1852 * @dev: device to which the capability would be relevant, if available
   1853 * @cap: IOMMU capability
   1854 *
   1855 * Return: true if an IOMMU is present and supports the given capability
   1856 * for the given device, otherwise false.
   1857 */
   1858bool device_iommu_capable(struct device *dev, enum iommu_cap cap)
   1859{
   1860	const struct iommu_ops *ops;
   1861
   1862	if (!dev->iommu || !dev->iommu->iommu_dev)
   1863		return false;
   1864
   1865	ops = dev_iommu_ops(dev);
   1866	if (!ops->capable)
   1867		return false;
   1868
   1869	return ops->capable(cap);
   1870}
   1871EXPORT_SYMBOL_GPL(device_iommu_capable);
   1872
   1873bool iommu_capable(struct bus_type *bus, enum iommu_cap cap)
   1874{
   1875	if (!bus->iommu_ops || !bus->iommu_ops->capable)
   1876		return false;
   1877
   1878	return bus->iommu_ops->capable(cap);
   1879}
   1880EXPORT_SYMBOL_GPL(iommu_capable);
   1881
   1882/**
   1883 * iommu_set_fault_handler() - set a fault handler for an iommu domain
   1884 * @domain: iommu domain
   1885 * @handler: fault handler
   1886 * @token: user data, will be passed back to the fault handler
   1887 *
   1888 * This function should be used by IOMMU users which want to be notified
   1889 * whenever an IOMMU fault happens.
   1890 *
   1891 * The fault handler itself should return 0 on success, and an appropriate
   1892 * error code otherwise.
   1893 */
   1894void iommu_set_fault_handler(struct iommu_domain *domain,
   1895					iommu_fault_handler_t handler,
   1896					void *token)
   1897{
   1898	BUG_ON(!domain);
   1899
   1900	domain->handler = handler;
   1901	domain->handler_token = token;
   1902}
   1903EXPORT_SYMBOL_GPL(iommu_set_fault_handler);
   1904
   1905static struct iommu_domain *__iommu_domain_alloc(struct bus_type *bus,
   1906						 unsigned type)
   1907{
   1908	struct iommu_domain *domain;
   1909
   1910	if (bus == NULL || bus->iommu_ops == NULL)
   1911		return NULL;
   1912
   1913	domain = bus->iommu_ops->domain_alloc(type);
   1914	if (!domain)
   1915		return NULL;
   1916
   1917	domain->type = type;
   1918	/* Assume all sizes by default; the driver may override this later */
   1919	domain->pgsize_bitmap = bus->iommu_ops->pgsize_bitmap;
   1920	if (!domain->ops)
   1921		domain->ops = bus->iommu_ops->default_domain_ops;
   1922
   1923	if (iommu_is_dma_domain(domain) && iommu_get_dma_cookie(domain)) {
   1924		iommu_domain_free(domain);
   1925		domain = NULL;
   1926	}
   1927	return domain;
   1928}
   1929
   1930struct iommu_domain *iommu_domain_alloc(struct bus_type *bus)
   1931{
   1932	return __iommu_domain_alloc(bus, IOMMU_DOMAIN_UNMANAGED);
   1933}
   1934EXPORT_SYMBOL_GPL(iommu_domain_alloc);
   1935
   1936void iommu_domain_free(struct iommu_domain *domain)
   1937{
   1938	iommu_put_dma_cookie(domain);
   1939	domain->ops->free(domain);
   1940}
   1941EXPORT_SYMBOL_GPL(iommu_domain_free);
   1942
   1943/*
   1944 * Put the group's domain back to the appropriate core-owned domain - either the
   1945 * standard kernel-mode DMA configuration or an all-DMA-blocked domain.
   1946 */
   1947static void __iommu_group_set_core_domain(struct iommu_group *group)
   1948{
   1949	struct iommu_domain *new_domain;
   1950	int ret;
   1951
   1952	if (group->owner)
   1953		new_domain = group->blocking_domain;
   1954	else
   1955		new_domain = group->default_domain;
   1956
   1957	ret = __iommu_group_set_domain(group, new_domain);
   1958	WARN(ret, "iommu driver failed to attach the default/blocking domain");
   1959}
   1960
   1961static int __iommu_attach_device(struct iommu_domain *domain,
   1962				 struct device *dev)
   1963{
   1964	int ret;
   1965
   1966	if (unlikely(domain->ops->attach_dev == NULL))
   1967		return -ENODEV;
   1968
   1969	ret = domain->ops->attach_dev(domain, dev);
   1970	if (!ret)
   1971		trace_attach_device_to_domain(dev);
   1972	return ret;
   1973}
   1974
   1975int iommu_attach_device(struct iommu_domain *domain, struct device *dev)
   1976{
   1977	struct iommu_group *group;
   1978	int ret;
   1979
   1980	group = iommu_group_get(dev);
   1981	if (!group)
   1982		return -ENODEV;
   1983
   1984	/*
   1985	 * Lock the group to make sure the device-count doesn't
   1986	 * change while we are attaching
   1987	 */
   1988	mutex_lock(&group->mutex);
   1989	ret = -EINVAL;
   1990	if (iommu_group_device_count(group) != 1)
   1991		goto out_unlock;
   1992
   1993	ret = __iommu_attach_group(domain, group);
   1994
   1995out_unlock:
   1996	mutex_unlock(&group->mutex);
   1997	iommu_group_put(group);
   1998
   1999	return ret;
   2000}
   2001EXPORT_SYMBOL_GPL(iommu_attach_device);
   2002
   2003int iommu_deferred_attach(struct device *dev, struct iommu_domain *domain)
   2004{
   2005	if (iommu_is_attach_deferred(dev))
   2006		return __iommu_attach_device(domain, dev);
   2007
   2008	return 0;
   2009}
   2010
   2011static void __iommu_detach_device(struct iommu_domain *domain,
   2012				  struct device *dev)
   2013{
   2014	if (iommu_is_attach_deferred(dev))
   2015		return;
   2016
   2017	domain->ops->detach_dev(domain, dev);
   2018	trace_detach_device_from_domain(dev);
   2019}
   2020
   2021void iommu_detach_device(struct iommu_domain *domain, struct device *dev)
   2022{
   2023	struct iommu_group *group;
   2024
   2025	group = iommu_group_get(dev);
   2026	if (!group)
   2027		return;
   2028
   2029	mutex_lock(&group->mutex);
   2030	if (WARN_ON(domain != group->domain) ||
   2031	    WARN_ON(iommu_group_device_count(group) != 1))
   2032		goto out_unlock;
   2033	__iommu_group_set_core_domain(group);
   2034
   2035out_unlock:
   2036	mutex_unlock(&group->mutex);
   2037	iommu_group_put(group);
   2038}
   2039EXPORT_SYMBOL_GPL(iommu_detach_device);
   2040
   2041struct iommu_domain *iommu_get_domain_for_dev(struct device *dev)
   2042{
   2043	struct iommu_domain *domain;
   2044	struct iommu_group *group;
   2045
   2046	group = iommu_group_get(dev);
   2047	if (!group)
   2048		return NULL;
   2049
   2050	domain = group->domain;
   2051
   2052	iommu_group_put(group);
   2053
   2054	return domain;
   2055}
   2056EXPORT_SYMBOL_GPL(iommu_get_domain_for_dev);
   2057
   2058/*
   2059 * For IOMMU_DOMAIN_DMA implementations which already provide their own
   2060 * guarantees that the group and its default domain are valid and correct.
   2061 */
   2062struct iommu_domain *iommu_get_dma_domain(struct device *dev)
   2063{
   2064	return dev->iommu_group->default_domain;
   2065}
   2066
   2067/*
   2068 * IOMMU groups are really the natural working unit of the IOMMU, but
   2069 * the IOMMU API works on domains and devices.  Bridge that gap by
   2070 * iterating over the devices in a group.  Ideally we'd have a single
   2071 * device which represents the requestor ID of the group, but we also
   2072 * allow IOMMU drivers to create policy defined minimum sets, where
   2073 * the physical hardware may be able to distiguish members, but we
   2074 * wish to group them at a higher level (ex. untrusted multi-function
   2075 * PCI devices).  Thus we attach each device.
   2076 */
   2077static int iommu_group_do_attach_device(struct device *dev, void *data)
   2078{
   2079	struct iommu_domain *domain = data;
   2080
   2081	return __iommu_attach_device(domain, dev);
   2082}
   2083
   2084static int __iommu_attach_group(struct iommu_domain *domain,
   2085				struct iommu_group *group)
   2086{
   2087	int ret;
   2088
   2089	if (group->domain && group->domain != group->default_domain &&
   2090	    group->domain != group->blocking_domain)
   2091		return -EBUSY;
   2092
   2093	ret = __iommu_group_for_each_dev(group, domain,
   2094					 iommu_group_do_attach_device);
   2095	if (ret == 0)
   2096		group->domain = domain;
   2097
   2098	return ret;
   2099}
   2100
   2101int iommu_attach_group(struct iommu_domain *domain, struct iommu_group *group)
   2102{
   2103	int ret;
   2104
   2105	mutex_lock(&group->mutex);
   2106	ret = __iommu_attach_group(domain, group);
   2107	mutex_unlock(&group->mutex);
   2108
   2109	return ret;
   2110}
   2111EXPORT_SYMBOL_GPL(iommu_attach_group);
   2112
   2113static int iommu_group_do_detach_device(struct device *dev, void *data)
   2114{
   2115	struct iommu_domain *domain = data;
   2116
   2117	__iommu_detach_device(domain, dev);
   2118
   2119	return 0;
   2120}
   2121
   2122static int __iommu_group_set_domain(struct iommu_group *group,
   2123				    struct iommu_domain *new_domain)
   2124{
   2125	int ret;
   2126
   2127	if (group->domain == new_domain)
   2128		return 0;
   2129
   2130	/*
   2131	 * New drivers should support default domains and so the detach_dev() op
   2132	 * will never be called. Otherwise the NULL domain represents some
   2133	 * platform specific behavior.
   2134	 */
   2135	if (!new_domain) {
   2136		if (WARN_ON(!group->domain->ops->detach_dev))
   2137			return -EINVAL;
   2138		__iommu_group_for_each_dev(group, group->domain,
   2139					   iommu_group_do_detach_device);
   2140		group->domain = NULL;
   2141		return 0;
   2142	}
   2143
   2144	/*
   2145	 * Changing the domain is done by calling attach_dev() on the new
   2146	 * domain. This switch does not have to be atomic and DMA can be
   2147	 * discarded during the transition. DMA must only be able to access
   2148	 * either new_domain or group->domain, never something else.
   2149	 *
   2150	 * Note that this is called in error unwind paths, attaching to a
   2151	 * domain that has already been attached cannot fail.
   2152	 */
   2153	ret = __iommu_group_for_each_dev(group, new_domain,
   2154					 iommu_group_do_attach_device);
   2155	if (ret)
   2156		return ret;
   2157	group->domain = new_domain;
   2158	return 0;
   2159}
   2160
   2161void iommu_detach_group(struct iommu_domain *domain, struct iommu_group *group)
   2162{
   2163	mutex_lock(&group->mutex);
   2164	__iommu_group_set_core_domain(group);
   2165	mutex_unlock(&group->mutex);
   2166}
   2167EXPORT_SYMBOL_GPL(iommu_detach_group);
   2168
   2169phys_addr_t iommu_iova_to_phys(struct iommu_domain *domain, dma_addr_t iova)
   2170{
   2171	if (domain->type == IOMMU_DOMAIN_IDENTITY)
   2172		return iova;
   2173
   2174	if (domain->type == IOMMU_DOMAIN_BLOCKED)
   2175		return 0;
   2176
   2177	return domain->ops->iova_to_phys(domain, iova);
   2178}
   2179EXPORT_SYMBOL_GPL(iommu_iova_to_phys);
   2180
   2181static size_t iommu_pgsize(struct iommu_domain *domain, unsigned long iova,
   2182			   phys_addr_t paddr, size_t size, size_t *count)
   2183{
   2184	unsigned int pgsize_idx, pgsize_idx_next;
   2185	unsigned long pgsizes;
   2186	size_t offset, pgsize, pgsize_next;
   2187	unsigned long addr_merge = paddr | iova;
   2188
   2189	/* Page sizes supported by the hardware and small enough for @size */
   2190	pgsizes = domain->pgsize_bitmap & GENMASK(__fls(size), 0);
   2191
   2192	/* Constrain the page sizes further based on the maximum alignment */
   2193	if (likely(addr_merge))
   2194		pgsizes &= GENMASK(__ffs(addr_merge), 0);
   2195
   2196	/* Make sure we have at least one suitable page size */
   2197	BUG_ON(!pgsizes);
   2198
   2199	/* Pick the biggest page size remaining */
   2200	pgsize_idx = __fls(pgsizes);
   2201	pgsize = BIT(pgsize_idx);
   2202	if (!count)
   2203		return pgsize;
   2204
   2205	/* Find the next biggest support page size, if it exists */
   2206	pgsizes = domain->pgsize_bitmap & ~GENMASK(pgsize_idx, 0);
   2207	if (!pgsizes)
   2208		goto out_set_count;
   2209
   2210	pgsize_idx_next = __ffs(pgsizes);
   2211	pgsize_next = BIT(pgsize_idx_next);
   2212
   2213	/*
   2214	 * There's no point trying a bigger page size unless the virtual
   2215	 * and physical addresses are similarly offset within the larger page.
   2216	 */
   2217	if ((iova ^ paddr) & (pgsize_next - 1))
   2218		goto out_set_count;
   2219
   2220	/* Calculate the offset to the next page size alignment boundary */
   2221	offset = pgsize_next - (addr_merge & (pgsize_next - 1));
   2222
   2223	/*
   2224	 * If size is big enough to accommodate the larger page, reduce
   2225	 * the number of smaller pages.
   2226	 */
   2227	if (offset + pgsize_next <= size)
   2228		size = offset;
   2229
   2230out_set_count:
   2231	*count = size >> pgsize_idx;
   2232	return pgsize;
   2233}
   2234
   2235static int __iommu_map_pages(struct iommu_domain *domain, unsigned long iova,
   2236			     phys_addr_t paddr, size_t size, int prot,
   2237			     gfp_t gfp, size_t *mapped)
   2238{
   2239	const struct iommu_domain_ops *ops = domain->ops;
   2240	size_t pgsize, count;
   2241	int ret;
   2242
   2243	pgsize = iommu_pgsize(domain, iova, paddr, size, &count);
   2244
   2245	pr_debug("mapping: iova 0x%lx pa %pa pgsize 0x%zx count %zu\n",
   2246		 iova, &paddr, pgsize, count);
   2247
   2248	if (ops->map_pages) {
   2249		ret = ops->map_pages(domain, iova, paddr, pgsize, count, prot,
   2250				     gfp, mapped);
   2251	} else {
   2252		ret = ops->map(domain, iova, paddr, pgsize, prot, gfp);
   2253		*mapped = ret ? 0 : pgsize;
   2254	}
   2255
   2256	return ret;
   2257}
   2258
   2259static int __iommu_map(struct iommu_domain *domain, unsigned long iova,
   2260		       phys_addr_t paddr, size_t size, int prot, gfp_t gfp)
   2261{
   2262	const struct iommu_domain_ops *ops = domain->ops;
   2263	unsigned long orig_iova = iova;
   2264	unsigned int min_pagesz;
   2265	size_t orig_size = size;
   2266	phys_addr_t orig_paddr = paddr;
   2267	int ret = 0;
   2268
   2269	if (unlikely(!(ops->map || ops->map_pages) ||
   2270		     domain->pgsize_bitmap == 0UL))
   2271		return -ENODEV;
   2272
   2273	if (unlikely(!(domain->type & __IOMMU_DOMAIN_PAGING)))
   2274		return -EINVAL;
   2275
   2276	/* find out the minimum page size supported */
   2277	min_pagesz = 1 << __ffs(domain->pgsize_bitmap);
   2278
   2279	/*
   2280	 * both the virtual address and the physical one, as well as
   2281	 * the size of the mapping, must be aligned (at least) to the
   2282	 * size of the smallest page supported by the hardware
   2283	 */
   2284	if (!IS_ALIGNED(iova | paddr | size, min_pagesz)) {
   2285		pr_err("unaligned: iova 0x%lx pa %pa size 0x%zx min_pagesz 0x%x\n",
   2286		       iova, &paddr, size, min_pagesz);
   2287		return -EINVAL;
   2288	}
   2289
   2290	pr_debug("map: iova 0x%lx pa %pa size 0x%zx\n", iova, &paddr, size);
   2291
   2292	while (size) {
   2293		size_t mapped = 0;
   2294
   2295		ret = __iommu_map_pages(domain, iova, paddr, size, prot, gfp,
   2296					&mapped);
   2297		/*
   2298		 * Some pages may have been mapped, even if an error occurred,
   2299		 * so we should account for those so they can be unmapped.
   2300		 */
   2301		size -= mapped;
   2302
   2303		if (ret)
   2304			break;
   2305
   2306		iova += mapped;
   2307		paddr += mapped;
   2308	}
   2309
   2310	/* unroll mapping in case something went wrong */
   2311	if (ret)
   2312		iommu_unmap(domain, orig_iova, orig_size - size);
   2313	else
   2314		trace_map(orig_iova, orig_paddr, orig_size);
   2315
   2316	return ret;
   2317}
   2318
   2319static int _iommu_map(struct iommu_domain *domain, unsigned long iova,
   2320		      phys_addr_t paddr, size_t size, int prot, gfp_t gfp)
   2321{
   2322	const struct iommu_domain_ops *ops = domain->ops;
   2323	int ret;
   2324
   2325	ret = __iommu_map(domain, iova, paddr, size, prot, gfp);
   2326	if (ret == 0 && ops->iotlb_sync_map)
   2327		ops->iotlb_sync_map(domain, iova, size);
   2328
   2329	return ret;
   2330}
   2331
   2332int iommu_map(struct iommu_domain *domain, unsigned long iova,
   2333	      phys_addr_t paddr, size_t size, int prot)
   2334{
   2335	might_sleep();
   2336	return _iommu_map(domain, iova, paddr, size, prot, GFP_KERNEL);
   2337}
   2338EXPORT_SYMBOL_GPL(iommu_map);
   2339
   2340int iommu_map_atomic(struct iommu_domain *domain, unsigned long iova,
   2341	      phys_addr_t paddr, size_t size, int prot)
   2342{
   2343	return _iommu_map(domain, iova, paddr, size, prot, GFP_ATOMIC);
   2344}
   2345EXPORT_SYMBOL_GPL(iommu_map_atomic);
   2346
   2347static size_t __iommu_unmap_pages(struct iommu_domain *domain,
   2348				  unsigned long iova, size_t size,
   2349				  struct iommu_iotlb_gather *iotlb_gather)
   2350{
   2351	const struct iommu_domain_ops *ops = domain->ops;
   2352	size_t pgsize, count;
   2353
   2354	pgsize = iommu_pgsize(domain, iova, iova, size, &count);
   2355	return ops->unmap_pages ?
   2356	       ops->unmap_pages(domain, iova, pgsize, count, iotlb_gather) :
   2357	       ops->unmap(domain, iova, pgsize, iotlb_gather);
   2358}
   2359
   2360static size_t __iommu_unmap(struct iommu_domain *domain,
   2361			    unsigned long iova, size_t size,
   2362			    struct iommu_iotlb_gather *iotlb_gather)
   2363{
   2364	const struct iommu_domain_ops *ops = domain->ops;
   2365	size_t unmapped_page, unmapped = 0;
   2366	unsigned long orig_iova = iova;
   2367	unsigned int min_pagesz;
   2368
   2369	if (unlikely(!(ops->unmap || ops->unmap_pages) ||
   2370		     domain->pgsize_bitmap == 0UL))
   2371		return 0;
   2372
   2373	if (unlikely(!(domain->type & __IOMMU_DOMAIN_PAGING)))
   2374		return 0;
   2375
   2376	/* find out the minimum page size supported */
   2377	min_pagesz = 1 << __ffs(domain->pgsize_bitmap);
   2378
   2379	/*
   2380	 * The virtual address, as well as the size of the mapping, must be
   2381	 * aligned (at least) to the size of the smallest page supported
   2382	 * by the hardware
   2383	 */
   2384	if (!IS_ALIGNED(iova | size, min_pagesz)) {
   2385		pr_err("unaligned: iova 0x%lx size 0x%zx min_pagesz 0x%x\n",
   2386		       iova, size, min_pagesz);
   2387		return 0;
   2388	}
   2389
   2390	pr_debug("unmap this: iova 0x%lx size 0x%zx\n", iova, size);
   2391
   2392	/*
   2393	 * Keep iterating until we either unmap 'size' bytes (or more)
   2394	 * or we hit an area that isn't mapped.
   2395	 */
   2396	while (unmapped < size) {
   2397		unmapped_page = __iommu_unmap_pages(domain, iova,
   2398						    size - unmapped,
   2399						    iotlb_gather);
   2400		if (!unmapped_page)
   2401			break;
   2402
   2403		pr_debug("unmapped: iova 0x%lx size 0x%zx\n",
   2404			 iova, unmapped_page);
   2405
   2406		iova += unmapped_page;
   2407		unmapped += unmapped_page;
   2408	}
   2409
   2410	trace_unmap(orig_iova, size, unmapped);
   2411	return unmapped;
   2412}
   2413
   2414size_t iommu_unmap(struct iommu_domain *domain,
   2415		   unsigned long iova, size_t size)
   2416{
   2417	struct iommu_iotlb_gather iotlb_gather;
   2418	size_t ret;
   2419
   2420	iommu_iotlb_gather_init(&iotlb_gather);
   2421	ret = __iommu_unmap(domain, iova, size, &iotlb_gather);
   2422	iommu_iotlb_sync(domain, &iotlb_gather);
   2423
   2424	return ret;
   2425}
   2426EXPORT_SYMBOL_GPL(iommu_unmap);
   2427
   2428size_t iommu_unmap_fast(struct iommu_domain *domain,
   2429			unsigned long iova, size_t size,
   2430			struct iommu_iotlb_gather *iotlb_gather)
   2431{
   2432	return __iommu_unmap(domain, iova, size, iotlb_gather);
   2433}
   2434EXPORT_SYMBOL_GPL(iommu_unmap_fast);
   2435
   2436static ssize_t __iommu_map_sg(struct iommu_domain *domain, unsigned long iova,
   2437		struct scatterlist *sg, unsigned int nents, int prot,
   2438		gfp_t gfp)
   2439{
   2440	const struct iommu_domain_ops *ops = domain->ops;
   2441	size_t len = 0, mapped = 0;
   2442	phys_addr_t start;
   2443	unsigned int i = 0;
   2444	int ret;
   2445
   2446	while (i <= nents) {
   2447		phys_addr_t s_phys = sg_phys(sg);
   2448
   2449		if (len && s_phys != start + len) {
   2450			ret = __iommu_map(domain, iova + mapped, start,
   2451					len, prot, gfp);
   2452
   2453			if (ret)
   2454				goto out_err;
   2455
   2456			mapped += len;
   2457			len = 0;
   2458		}
   2459
   2460		if (len) {
   2461			len += sg->length;
   2462		} else {
   2463			len = sg->length;
   2464			start = s_phys;
   2465		}
   2466
   2467		if (++i < nents)
   2468			sg = sg_next(sg);
   2469	}
   2470
   2471	if (ops->iotlb_sync_map)
   2472		ops->iotlb_sync_map(domain, iova, mapped);
   2473	return mapped;
   2474
   2475out_err:
   2476	/* undo mappings already done */
   2477	iommu_unmap(domain, iova, mapped);
   2478
   2479	return ret;
   2480}
   2481
   2482ssize_t iommu_map_sg(struct iommu_domain *domain, unsigned long iova,
   2483		     struct scatterlist *sg, unsigned int nents, int prot)
   2484{
   2485	might_sleep();
   2486	return __iommu_map_sg(domain, iova, sg, nents, prot, GFP_KERNEL);
   2487}
   2488EXPORT_SYMBOL_GPL(iommu_map_sg);
   2489
   2490ssize_t iommu_map_sg_atomic(struct iommu_domain *domain, unsigned long iova,
   2491		    struct scatterlist *sg, unsigned int nents, int prot)
   2492{
   2493	return __iommu_map_sg(domain, iova, sg, nents, prot, GFP_ATOMIC);
   2494}
   2495
   2496/**
   2497 * report_iommu_fault() - report about an IOMMU fault to the IOMMU framework
   2498 * @domain: the iommu domain where the fault has happened
   2499 * @dev: the device where the fault has happened
   2500 * @iova: the faulting address
   2501 * @flags: mmu fault flags (e.g. IOMMU_FAULT_READ/IOMMU_FAULT_WRITE/...)
   2502 *
   2503 * This function should be called by the low-level IOMMU implementations
   2504 * whenever IOMMU faults happen, to allow high-level users, that are
   2505 * interested in such events, to know about them.
   2506 *
   2507 * This event may be useful for several possible use cases:
   2508 * - mere logging of the event
   2509 * - dynamic TLB/PTE loading
   2510 * - if restarting of the faulting device is required
   2511 *
   2512 * Returns 0 on success and an appropriate error code otherwise (if dynamic
   2513 * PTE/TLB loading will one day be supported, implementations will be able
   2514 * to tell whether it succeeded or not according to this return value).
   2515 *
   2516 * Specifically, -ENOSYS is returned if a fault handler isn't installed
   2517 * (though fault handlers can also return -ENOSYS, in case they want to
   2518 * elicit the default behavior of the IOMMU drivers).
   2519 */
   2520int report_iommu_fault(struct iommu_domain *domain, struct device *dev,
   2521		       unsigned long iova, int flags)
   2522{
   2523	int ret = -ENOSYS;
   2524
   2525	/*
   2526	 * if upper layers showed interest and installed a fault handler,
   2527	 * invoke it.
   2528	 */
   2529	if (domain->handler)
   2530		ret = domain->handler(domain, dev, iova, flags,
   2531						domain->handler_token);
   2532
   2533	trace_io_page_fault(dev, iova, flags);
   2534	return ret;
   2535}
   2536EXPORT_SYMBOL_GPL(report_iommu_fault);
   2537
   2538static int __init iommu_init(void)
   2539{
   2540	iommu_group_kset = kset_create_and_add("iommu_groups",
   2541					       NULL, kernel_kobj);
   2542	BUG_ON(!iommu_group_kset);
   2543
   2544	iommu_debugfs_setup();
   2545
   2546	return 0;
   2547}
   2548core_initcall(iommu_init);
   2549
   2550int iommu_enable_nesting(struct iommu_domain *domain)
   2551{
   2552	if (domain->type != IOMMU_DOMAIN_UNMANAGED)
   2553		return -EINVAL;
   2554	if (!domain->ops->enable_nesting)
   2555		return -EINVAL;
   2556	return domain->ops->enable_nesting(domain);
   2557}
   2558EXPORT_SYMBOL_GPL(iommu_enable_nesting);
   2559
   2560int iommu_set_pgtable_quirks(struct iommu_domain *domain,
   2561		unsigned long quirk)
   2562{
   2563	if (domain->type != IOMMU_DOMAIN_UNMANAGED)
   2564		return -EINVAL;
   2565	if (!domain->ops->set_pgtable_quirks)
   2566		return -EINVAL;
   2567	return domain->ops->set_pgtable_quirks(domain, quirk);
   2568}
   2569EXPORT_SYMBOL_GPL(iommu_set_pgtable_quirks);
   2570
   2571void iommu_get_resv_regions(struct device *dev, struct list_head *list)
   2572{
   2573	const struct iommu_ops *ops = dev_iommu_ops(dev);
   2574
   2575	if (ops->get_resv_regions)
   2576		ops->get_resv_regions(dev, list);
   2577}
   2578
   2579void iommu_put_resv_regions(struct device *dev, struct list_head *list)
   2580{
   2581	const struct iommu_ops *ops = dev_iommu_ops(dev);
   2582
   2583	if (ops->put_resv_regions)
   2584		ops->put_resv_regions(dev, list);
   2585}
   2586
   2587/**
   2588 * generic_iommu_put_resv_regions - Reserved region driver helper
   2589 * @dev: device for which to free reserved regions
   2590 * @list: reserved region list for device
   2591 *
   2592 * IOMMU drivers can use this to implement their .put_resv_regions() callback
   2593 * for simple reservations. Memory allocated for each reserved region will be
   2594 * freed. If an IOMMU driver allocates additional resources per region, it is
   2595 * going to have to implement a custom callback.
   2596 */
   2597void generic_iommu_put_resv_regions(struct device *dev, struct list_head *list)
   2598{
   2599	struct iommu_resv_region *entry, *next;
   2600
   2601	list_for_each_entry_safe(entry, next, list, list)
   2602		kfree(entry);
   2603}
   2604EXPORT_SYMBOL(generic_iommu_put_resv_regions);
   2605
   2606struct iommu_resv_region *iommu_alloc_resv_region(phys_addr_t start,
   2607						  size_t length, int prot,
   2608						  enum iommu_resv_type type)
   2609{
   2610	struct iommu_resv_region *region;
   2611
   2612	region = kzalloc(sizeof(*region), GFP_KERNEL);
   2613	if (!region)
   2614		return NULL;
   2615
   2616	INIT_LIST_HEAD(&region->list);
   2617	region->start = start;
   2618	region->length = length;
   2619	region->prot = prot;
   2620	region->type = type;
   2621	return region;
   2622}
   2623EXPORT_SYMBOL_GPL(iommu_alloc_resv_region);
   2624
   2625void iommu_set_default_passthrough(bool cmd_line)
   2626{
   2627	if (cmd_line)
   2628		iommu_cmd_line |= IOMMU_CMD_LINE_DMA_API;
   2629	iommu_def_domain_type = IOMMU_DOMAIN_IDENTITY;
   2630}
   2631
   2632void iommu_set_default_translated(bool cmd_line)
   2633{
   2634	if (cmd_line)
   2635		iommu_cmd_line |= IOMMU_CMD_LINE_DMA_API;
   2636	iommu_def_domain_type = IOMMU_DOMAIN_DMA;
   2637}
   2638
   2639bool iommu_default_passthrough(void)
   2640{
   2641	return iommu_def_domain_type == IOMMU_DOMAIN_IDENTITY;
   2642}
   2643EXPORT_SYMBOL_GPL(iommu_default_passthrough);
   2644
   2645const struct iommu_ops *iommu_ops_from_fwnode(struct fwnode_handle *fwnode)
   2646{
   2647	const struct iommu_ops *ops = NULL;
   2648	struct iommu_device *iommu;
   2649
   2650	spin_lock(&iommu_device_lock);
   2651	list_for_each_entry(iommu, &iommu_device_list, list)
   2652		if (iommu->fwnode == fwnode) {
   2653			ops = iommu->ops;
   2654			break;
   2655		}
   2656	spin_unlock(&iommu_device_lock);
   2657	return ops;
   2658}
   2659
   2660int iommu_fwspec_init(struct device *dev, struct fwnode_handle *iommu_fwnode,
   2661		      const struct iommu_ops *ops)
   2662{
   2663	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
   2664
   2665	if (fwspec)
   2666		return ops == fwspec->ops ? 0 : -EINVAL;
   2667
   2668	if (!dev_iommu_get(dev))
   2669		return -ENOMEM;
   2670
   2671	/* Preallocate for the overwhelmingly common case of 1 ID */
   2672	fwspec = kzalloc(struct_size(fwspec, ids, 1), GFP_KERNEL);
   2673	if (!fwspec)
   2674		return -ENOMEM;
   2675
   2676	of_node_get(to_of_node(iommu_fwnode));
   2677	fwspec->iommu_fwnode = iommu_fwnode;
   2678	fwspec->ops = ops;
   2679	dev_iommu_fwspec_set(dev, fwspec);
   2680	return 0;
   2681}
   2682EXPORT_SYMBOL_GPL(iommu_fwspec_init);
   2683
   2684void iommu_fwspec_free(struct device *dev)
   2685{
   2686	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
   2687
   2688	if (fwspec) {
   2689		fwnode_handle_put(fwspec->iommu_fwnode);
   2690		kfree(fwspec);
   2691		dev_iommu_fwspec_set(dev, NULL);
   2692	}
   2693}
   2694EXPORT_SYMBOL_GPL(iommu_fwspec_free);
   2695
   2696int iommu_fwspec_add_ids(struct device *dev, u32 *ids, int num_ids)
   2697{
   2698	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
   2699	int i, new_num;
   2700
   2701	if (!fwspec)
   2702		return -EINVAL;
   2703
   2704	new_num = fwspec->num_ids + num_ids;
   2705	if (new_num > 1) {
   2706		fwspec = krealloc(fwspec, struct_size(fwspec, ids, new_num),
   2707				  GFP_KERNEL);
   2708		if (!fwspec)
   2709			return -ENOMEM;
   2710
   2711		dev_iommu_fwspec_set(dev, fwspec);
   2712	}
   2713
   2714	for (i = 0; i < num_ids; i++)
   2715		fwspec->ids[fwspec->num_ids + i] = ids[i];
   2716
   2717	fwspec->num_ids = new_num;
   2718	return 0;
   2719}
   2720EXPORT_SYMBOL_GPL(iommu_fwspec_add_ids);
   2721
   2722/*
   2723 * Per device IOMMU features.
   2724 */
   2725int iommu_dev_enable_feature(struct device *dev, enum iommu_dev_features feat)
   2726{
   2727	if (dev->iommu && dev->iommu->iommu_dev) {
   2728		const struct iommu_ops *ops = dev->iommu->iommu_dev->ops;
   2729
   2730		if (ops->dev_enable_feat)
   2731			return ops->dev_enable_feat(dev, feat);
   2732	}
   2733
   2734	return -ENODEV;
   2735}
   2736EXPORT_SYMBOL_GPL(iommu_dev_enable_feature);
   2737
   2738/*
   2739 * The device drivers should do the necessary cleanups before calling this.
   2740 */
   2741int iommu_dev_disable_feature(struct device *dev, enum iommu_dev_features feat)
   2742{
   2743	if (dev->iommu && dev->iommu->iommu_dev) {
   2744		const struct iommu_ops *ops = dev->iommu->iommu_dev->ops;
   2745
   2746		if (ops->dev_disable_feat)
   2747			return ops->dev_disable_feat(dev, feat);
   2748	}
   2749
   2750	return -EBUSY;
   2751}
   2752EXPORT_SYMBOL_GPL(iommu_dev_disable_feature);
   2753
   2754bool iommu_dev_feature_enabled(struct device *dev, enum iommu_dev_features feat)
   2755{
   2756	if (dev->iommu && dev->iommu->iommu_dev) {
   2757		const struct iommu_ops *ops = dev->iommu->iommu_dev->ops;
   2758
   2759		if (ops->dev_feat_enabled)
   2760			return ops->dev_feat_enabled(dev, feat);
   2761	}
   2762
   2763	return false;
   2764}
   2765EXPORT_SYMBOL_GPL(iommu_dev_feature_enabled);
   2766
   2767/**
   2768 * iommu_sva_bind_device() - Bind a process address space to a device
   2769 * @dev: the device
   2770 * @mm: the mm to bind, caller must hold a reference to it
   2771 * @drvdata: opaque data pointer to pass to bind callback
   2772 *
   2773 * Create a bond between device and address space, allowing the device to access
   2774 * the mm using the returned PASID. If a bond already exists between @device and
   2775 * @mm, it is returned and an additional reference is taken. Caller must call
   2776 * iommu_sva_unbind_device() to release each reference.
   2777 *
   2778 * iommu_dev_enable_feature(dev, IOMMU_DEV_FEAT_SVA) must be called first, to
   2779 * initialize the required SVA features.
   2780 *
   2781 * On error, returns an ERR_PTR value.
   2782 */
   2783struct iommu_sva *
   2784iommu_sva_bind_device(struct device *dev, struct mm_struct *mm, void *drvdata)
   2785{
   2786	struct iommu_group *group;
   2787	struct iommu_sva *handle = ERR_PTR(-EINVAL);
   2788	const struct iommu_ops *ops = dev_iommu_ops(dev);
   2789
   2790	if (!ops->sva_bind)
   2791		return ERR_PTR(-ENODEV);
   2792
   2793	group = iommu_group_get(dev);
   2794	if (!group)
   2795		return ERR_PTR(-ENODEV);
   2796
   2797	/* Ensure device count and domain don't change while we're binding */
   2798	mutex_lock(&group->mutex);
   2799
   2800	/*
   2801	 * To keep things simple, SVA currently doesn't support IOMMU groups
   2802	 * with more than one device. Existing SVA-capable systems are not
   2803	 * affected by the problems that required IOMMU groups (lack of ACS
   2804	 * isolation, device ID aliasing and other hardware issues).
   2805	 */
   2806	if (iommu_group_device_count(group) != 1)
   2807		goto out_unlock;
   2808
   2809	handle = ops->sva_bind(dev, mm, drvdata);
   2810
   2811out_unlock:
   2812	mutex_unlock(&group->mutex);
   2813	iommu_group_put(group);
   2814
   2815	return handle;
   2816}
   2817EXPORT_SYMBOL_GPL(iommu_sva_bind_device);
   2818
   2819/**
   2820 * iommu_sva_unbind_device() - Remove a bond created with iommu_sva_bind_device
   2821 * @handle: the handle returned by iommu_sva_bind_device()
   2822 *
   2823 * Put reference to a bond between device and address space. The device should
   2824 * not be issuing any more transaction for this PASID. All outstanding page
   2825 * requests for this PASID must have been flushed to the IOMMU.
   2826 */
   2827void iommu_sva_unbind_device(struct iommu_sva *handle)
   2828{
   2829	struct iommu_group *group;
   2830	struct device *dev = handle->dev;
   2831	const struct iommu_ops *ops = dev_iommu_ops(dev);
   2832
   2833	if (!ops->sva_unbind)
   2834		return;
   2835
   2836	group = iommu_group_get(dev);
   2837	if (!group)
   2838		return;
   2839
   2840	mutex_lock(&group->mutex);
   2841	ops->sva_unbind(handle);
   2842	mutex_unlock(&group->mutex);
   2843
   2844	iommu_group_put(group);
   2845}
   2846EXPORT_SYMBOL_GPL(iommu_sva_unbind_device);
   2847
   2848u32 iommu_sva_get_pasid(struct iommu_sva *handle)
   2849{
   2850	const struct iommu_ops *ops = dev_iommu_ops(handle->dev);
   2851
   2852	if (!ops->sva_get_pasid)
   2853		return IOMMU_PASID_INVALID;
   2854
   2855	return ops->sva_get_pasid(handle);
   2856}
   2857EXPORT_SYMBOL_GPL(iommu_sva_get_pasid);
   2858
   2859/*
   2860 * Changes the default domain of an iommu group that has *only* one device
   2861 *
   2862 * @group: The group for which the default domain should be changed
   2863 * @prev_dev: The device in the group (this is used to make sure that the device
   2864 *	 hasn't changed after the caller has called this function)
   2865 * @type: The type of the new default domain that gets associated with the group
   2866 *
   2867 * Returns 0 on success and error code on failure
   2868 *
   2869 * Note:
   2870 * 1. Presently, this function is called only when user requests to change the
   2871 *    group's default domain type through /sys/kernel/iommu_groups/<grp_id>/type
   2872 *    Please take a closer look if intended to use for other purposes.
   2873 */
   2874static int iommu_change_dev_def_domain(struct iommu_group *group,
   2875				       struct device *prev_dev, int type)
   2876{
   2877	struct iommu_domain *prev_dom;
   2878	struct group_device *grp_dev;
   2879	int ret, dev_def_dom;
   2880	struct device *dev;
   2881
   2882	mutex_lock(&group->mutex);
   2883
   2884	if (group->default_domain != group->domain) {
   2885		dev_err_ratelimited(prev_dev, "Group not assigned to default domain\n");
   2886		ret = -EBUSY;
   2887		goto out;
   2888	}
   2889
   2890	/*
   2891	 * iommu group wasn't locked while acquiring device lock in
   2892	 * iommu_group_store_type(). So, make sure that the device count hasn't
   2893	 * changed while acquiring device lock.
   2894	 *
   2895	 * Changing default domain of an iommu group with two or more devices
   2896	 * isn't supported because there could be a potential deadlock. Consider
   2897	 * the following scenario. T1 is trying to acquire device locks of all
   2898	 * the devices in the group and before it could acquire all of them,
   2899	 * there could be another thread T2 (from different sub-system and use
   2900	 * case) that has already acquired some of the device locks and might be
   2901	 * waiting for T1 to release other device locks.
   2902	 */
   2903	if (iommu_group_device_count(group) != 1) {
   2904		dev_err_ratelimited(prev_dev, "Cannot change default domain: Group has more than one device\n");
   2905		ret = -EINVAL;
   2906		goto out;
   2907	}
   2908
   2909	/* Since group has only one device */
   2910	grp_dev = list_first_entry(&group->devices, struct group_device, list);
   2911	dev = grp_dev->dev;
   2912
   2913	if (prev_dev != dev) {
   2914		dev_err_ratelimited(prev_dev, "Cannot change default domain: Device has been changed\n");
   2915		ret = -EBUSY;
   2916		goto out;
   2917	}
   2918
   2919	prev_dom = group->default_domain;
   2920	if (!prev_dom) {
   2921		ret = -EINVAL;
   2922		goto out;
   2923	}
   2924
   2925	dev_def_dom = iommu_get_def_domain_type(dev);
   2926	if (!type) {
   2927		/*
   2928		 * If the user hasn't requested any specific type of domain and
   2929		 * if the device supports both the domains, then default to the
   2930		 * domain the device was booted with
   2931		 */
   2932		type = dev_def_dom ? : iommu_def_domain_type;
   2933	} else if (dev_def_dom && type != dev_def_dom) {
   2934		dev_err_ratelimited(prev_dev, "Device cannot be in %s domain\n",
   2935				    iommu_domain_type_str(type));
   2936		ret = -EINVAL;
   2937		goto out;
   2938	}
   2939
   2940	/*
   2941	 * Switch to a new domain only if the requested domain type is different
   2942	 * from the existing default domain type
   2943	 */
   2944	if (prev_dom->type == type) {
   2945		ret = 0;
   2946		goto out;
   2947	}
   2948
   2949	/* We can bring up a flush queue without tearing down the domain */
   2950	if (type == IOMMU_DOMAIN_DMA_FQ && prev_dom->type == IOMMU_DOMAIN_DMA) {
   2951		ret = iommu_dma_init_fq(prev_dom);
   2952		if (!ret)
   2953			prev_dom->type = IOMMU_DOMAIN_DMA_FQ;
   2954		goto out;
   2955	}
   2956
   2957	/* Sets group->default_domain to the newly allocated domain */
   2958	ret = iommu_group_alloc_default_domain(dev->bus, group, type);
   2959	if (ret)
   2960		goto out;
   2961
   2962	ret = iommu_create_device_direct_mappings(group, dev);
   2963	if (ret)
   2964		goto free_new_domain;
   2965
   2966	ret = __iommu_attach_device(group->default_domain, dev);
   2967	if (ret)
   2968		goto free_new_domain;
   2969
   2970	group->domain = group->default_domain;
   2971
   2972	/*
   2973	 * Release the mutex here because ops->probe_finalize() call-back of
   2974	 * some vendor IOMMU drivers calls arm_iommu_attach_device() which
   2975	 * in-turn might call back into IOMMU core code, where it tries to take
   2976	 * group->mutex, resulting in a deadlock.
   2977	 */
   2978	mutex_unlock(&group->mutex);
   2979
   2980	/* Make sure dma_ops is appropriatley set */
   2981	iommu_group_do_probe_finalize(dev, group->default_domain);
   2982	iommu_domain_free(prev_dom);
   2983	return 0;
   2984
   2985free_new_domain:
   2986	iommu_domain_free(group->default_domain);
   2987	group->default_domain = prev_dom;
   2988	group->domain = prev_dom;
   2989
   2990out:
   2991	mutex_unlock(&group->mutex);
   2992
   2993	return ret;
   2994}
   2995
   2996/*
   2997 * Changing the default domain through sysfs requires the users to unbind the
   2998 * drivers from the devices in the iommu group, except for a DMA -> DMA-FQ
   2999 * transition. Return failure if this isn't met.
   3000 *
   3001 * We need to consider the race between this and the device release path.
   3002 * device_lock(dev) is used here to guarantee that the device release path
   3003 * will not be entered at the same time.
   3004 */
   3005static ssize_t iommu_group_store_type(struct iommu_group *group,
   3006				      const char *buf, size_t count)
   3007{
   3008	struct group_device *grp_dev;
   3009	struct device *dev;
   3010	int ret, req_type;
   3011
   3012	if (!capable(CAP_SYS_ADMIN) || !capable(CAP_SYS_RAWIO))
   3013		return -EACCES;
   3014
   3015	if (WARN_ON(!group) || !group->default_domain)
   3016		return -EINVAL;
   3017
   3018	if (sysfs_streq(buf, "identity"))
   3019		req_type = IOMMU_DOMAIN_IDENTITY;
   3020	else if (sysfs_streq(buf, "DMA"))
   3021		req_type = IOMMU_DOMAIN_DMA;
   3022	else if (sysfs_streq(buf, "DMA-FQ"))
   3023		req_type = IOMMU_DOMAIN_DMA_FQ;
   3024	else if (sysfs_streq(buf, "auto"))
   3025		req_type = 0;
   3026	else
   3027		return -EINVAL;
   3028
   3029	/*
   3030	 * Lock/Unlock the group mutex here before device lock to
   3031	 * 1. Make sure that the iommu group has only one device (this is a
   3032	 *    prerequisite for step 2)
   3033	 * 2. Get struct *dev which is needed to lock device
   3034	 */
   3035	mutex_lock(&group->mutex);
   3036	if (iommu_group_device_count(group) != 1) {
   3037		mutex_unlock(&group->mutex);
   3038		pr_err_ratelimited("Cannot change default domain: Group has more than one device\n");
   3039		return -EINVAL;
   3040	}
   3041
   3042	/* Since group has only one device */
   3043	grp_dev = list_first_entry(&group->devices, struct group_device, list);
   3044	dev = grp_dev->dev;
   3045	get_device(dev);
   3046
   3047	/*
   3048	 * Don't hold the group mutex because taking group mutex first and then
   3049	 * the device lock could potentially cause a deadlock as below. Assume
   3050	 * two threads T1 and T2. T1 is trying to change default domain of an
   3051	 * iommu group and T2 is trying to hot unplug a device or release [1] VF
   3052	 * of a PCIe device which is in the same iommu group. T1 takes group
   3053	 * mutex and before it could take device lock assume T2 has taken device
   3054	 * lock and is yet to take group mutex. Now, both the threads will be
   3055	 * waiting for the other thread to release lock. Below, lock order was
   3056	 * suggested.
   3057	 * device_lock(dev);
   3058	 *	mutex_lock(&group->mutex);
   3059	 *		iommu_change_dev_def_domain();
   3060	 *	mutex_unlock(&group->mutex);
   3061	 * device_unlock(dev);
   3062	 *
   3063	 * [1] Typical device release path
   3064	 * device_lock() from device/driver core code
   3065	 *  -> bus_notifier()
   3066	 *   -> iommu_bus_notifier()
   3067	 *    -> iommu_release_device()
   3068	 *     -> ops->release_device() vendor driver calls back iommu core code
   3069	 *      -> mutex_lock() from iommu core code
   3070	 */
   3071	mutex_unlock(&group->mutex);
   3072
   3073	/* Check if the device in the group still has a driver bound to it */
   3074	device_lock(dev);
   3075	if (device_is_bound(dev) && !(req_type == IOMMU_DOMAIN_DMA_FQ &&
   3076	    group->default_domain->type == IOMMU_DOMAIN_DMA)) {
   3077		pr_err_ratelimited("Device is still bound to driver\n");
   3078		ret = -EBUSY;
   3079		goto out;
   3080	}
   3081
   3082	ret = iommu_change_dev_def_domain(group, dev, req_type);
   3083	ret = ret ?: count;
   3084
   3085out:
   3086	device_unlock(dev);
   3087	put_device(dev);
   3088
   3089	return ret;
   3090}
   3091
   3092/**
   3093 * iommu_device_use_default_domain() - Device driver wants to handle device
   3094 *                                     DMA through the kernel DMA API.
   3095 * @dev: The device.
   3096 *
   3097 * The device driver about to bind @dev wants to do DMA through the kernel
   3098 * DMA API. Return 0 if it is allowed, otherwise an error.
   3099 */
   3100int iommu_device_use_default_domain(struct device *dev)
   3101{
   3102	struct iommu_group *group = iommu_group_get(dev);
   3103	int ret = 0;
   3104
   3105	if (!group)
   3106		return 0;
   3107
   3108	mutex_lock(&group->mutex);
   3109	if (group->owner_cnt) {
   3110		if (group->domain != group->default_domain ||
   3111		    group->owner) {
   3112			ret = -EBUSY;
   3113			goto unlock_out;
   3114		}
   3115	}
   3116
   3117	group->owner_cnt++;
   3118
   3119unlock_out:
   3120	mutex_unlock(&group->mutex);
   3121	iommu_group_put(group);
   3122
   3123	return ret;
   3124}
   3125
   3126/**
   3127 * iommu_device_unuse_default_domain() - Device driver stops handling device
   3128 *                                       DMA through the kernel DMA API.
   3129 * @dev: The device.
   3130 *
   3131 * The device driver doesn't want to do DMA through kernel DMA API anymore.
   3132 * It must be called after iommu_device_use_default_domain().
   3133 */
   3134void iommu_device_unuse_default_domain(struct device *dev)
   3135{
   3136	struct iommu_group *group = iommu_group_get(dev);
   3137
   3138	if (!group)
   3139		return;
   3140
   3141	mutex_lock(&group->mutex);
   3142	if (!WARN_ON(!group->owner_cnt))
   3143		group->owner_cnt--;
   3144
   3145	mutex_unlock(&group->mutex);
   3146	iommu_group_put(group);
   3147}
   3148
   3149static int __iommu_group_alloc_blocking_domain(struct iommu_group *group)
   3150{
   3151	struct group_device *dev =
   3152		list_first_entry(&group->devices, struct group_device, list);
   3153
   3154	if (group->blocking_domain)
   3155		return 0;
   3156
   3157	group->blocking_domain =
   3158		__iommu_domain_alloc(dev->dev->bus, IOMMU_DOMAIN_BLOCKED);
   3159	if (!group->blocking_domain) {
   3160		/*
   3161		 * For drivers that do not yet understand IOMMU_DOMAIN_BLOCKED
   3162		 * create an empty domain instead.
   3163		 */
   3164		group->blocking_domain = __iommu_domain_alloc(
   3165			dev->dev->bus, IOMMU_DOMAIN_UNMANAGED);
   3166		if (!group->blocking_domain)
   3167			return -EINVAL;
   3168	}
   3169	return 0;
   3170}
   3171
   3172/**
   3173 * iommu_group_claim_dma_owner() - Set DMA ownership of a group
   3174 * @group: The group.
   3175 * @owner: Caller specified pointer. Used for exclusive ownership.
   3176 *
   3177 * This is to support backward compatibility for vfio which manages
   3178 * the dma ownership in iommu_group level. New invocations on this
   3179 * interface should be prohibited.
   3180 */
   3181int iommu_group_claim_dma_owner(struct iommu_group *group, void *owner)
   3182{
   3183	int ret = 0;
   3184
   3185	mutex_lock(&group->mutex);
   3186	if (group->owner_cnt) {
   3187		ret = -EPERM;
   3188		goto unlock_out;
   3189	} else {
   3190		if (group->domain && group->domain != group->default_domain) {
   3191			ret = -EBUSY;
   3192			goto unlock_out;
   3193		}
   3194
   3195		ret = __iommu_group_alloc_blocking_domain(group);
   3196		if (ret)
   3197			goto unlock_out;
   3198
   3199		ret = __iommu_group_set_domain(group, group->blocking_domain);
   3200		if (ret)
   3201			goto unlock_out;
   3202		group->owner = owner;
   3203	}
   3204
   3205	group->owner_cnt++;
   3206unlock_out:
   3207	mutex_unlock(&group->mutex);
   3208
   3209	return ret;
   3210}
   3211EXPORT_SYMBOL_GPL(iommu_group_claim_dma_owner);
   3212
   3213/**
   3214 * iommu_group_release_dma_owner() - Release DMA ownership of a group
   3215 * @group: The group.
   3216 *
   3217 * Release the DMA ownership claimed by iommu_group_claim_dma_owner().
   3218 */
   3219void iommu_group_release_dma_owner(struct iommu_group *group)
   3220{
   3221	int ret;
   3222
   3223	mutex_lock(&group->mutex);
   3224	if (WARN_ON(!group->owner_cnt || !group->owner))
   3225		goto unlock_out;
   3226
   3227	group->owner_cnt = 0;
   3228	group->owner = NULL;
   3229	ret = __iommu_group_set_domain(group, group->default_domain);
   3230	WARN(ret, "iommu driver failed to attach the default domain");
   3231
   3232unlock_out:
   3233	mutex_unlock(&group->mutex);
   3234}
   3235EXPORT_SYMBOL_GPL(iommu_group_release_dma_owner);
   3236
   3237/**
   3238 * iommu_group_dma_owner_claimed() - Query group dma ownership status
   3239 * @group: The group.
   3240 *
   3241 * This provides status query on a given group. It is racy and only for
   3242 * non-binding status reporting.
   3243 */
   3244bool iommu_group_dma_owner_claimed(struct iommu_group *group)
   3245{
   3246	unsigned int user;
   3247
   3248	mutex_lock(&group->mutex);
   3249	user = group->owner_cnt;
   3250	mutex_unlock(&group->mutex);
   3251
   3252	return user;
   3253}
   3254EXPORT_SYMBOL_GPL(iommu_group_dma_owner_claimed);