dma-iommu.c - cachepc-linux - Fork of AMDESE/linux with modifications for CachePC side-channel attack

	cachepc-linux Fork of AMDESE/linux with modifications for CachePC side-channel attack
	git clone https://git.sinitax.com/sinitax/cachepc-linux
	Log \| Files \| Refs \| README \| LICENSE \| sfeed.txt
dma-iommu.c (44460B)
      1// SPDX-License-Identifier: GPL-2.0-only
      2/*
      3 * A fairly generic DMA-API to IOMMU-API glue layer.
      4 *
      5 * Copyright (C) 2014-2015 ARM Ltd.
      6 *
      7 * based in part on arch/arm/mm/dma-mapping.c:
      8 * Copyright (C) 2000-2004 Russell King
      9 */
     10
     11#include <linux/acpi_iort.h>
     12#include <linux/atomic.h>
     13#include <linux/crash_dump.h>
     14#include <linux/device.h>
     15#include <linux/dma-direct.h>
     16#include <linux/dma-iommu.h>
     17#include <linux/dma-map-ops.h>
     18#include <linux/gfp.h>
     19#include <linux/huge_mm.h>
     20#include <linux/iommu.h>
     21#include <linux/iova.h>
     22#include <linux/irq.h>
     23#include <linux/list_sort.h>
     24#include <linux/mm.h>
     25#include <linux/mutex.h>
     26#include <linux/pci.h>
     27#include <linux/scatterlist.h>
     28#include <linux/spinlock.h>
     29#include <linux/swiotlb.h>
     30#include <linux/vmalloc.h>
     31
     32struct iommu_dma_msi_page {
     33	struct list_head	list;
     34	dma_addr_t		iova;
     35	phys_addr_t		phys;
     36};
     37
     38enum iommu_dma_cookie_type {
     39	IOMMU_DMA_IOVA_COOKIE,
     40	IOMMU_DMA_MSI_COOKIE,
     41};
     42
     43struct iommu_dma_cookie {
     44	enum iommu_dma_cookie_type	type;
     45	union {
     46		/* Full allocator for IOMMU_DMA_IOVA_COOKIE */
     47		struct {
     48			struct iova_domain	iovad;
     49
     50			struct iova_fq __percpu *fq;	/* Flush queue */
     51			/* Number of TLB flushes that have been started */
     52			atomic64_t		fq_flush_start_cnt;
     53			/* Number of TLB flushes that have been finished */
     54			atomic64_t		fq_flush_finish_cnt;
     55			/* Timer to regularily empty the flush queues */
     56			struct timer_list	fq_timer;
     57			/* 1 when timer is active, 0 when not */
     58			atomic_t		fq_timer_on;
     59		};
     60		/* Trivial linear page allocator for IOMMU_DMA_MSI_COOKIE */
     61		dma_addr_t		msi_iova;
     62	};
     63	struct list_head		msi_page_list;
     64
     65	/* Domain for flush queue callback; NULL if flush queue not in use */
     66	struct iommu_domain		*fq_domain;
     67};
     68
     69static DEFINE_STATIC_KEY_FALSE(iommu_deferred_attach_enabled);
     70bool iommu_dma_forcedac __read_mostly;
     71
     72static int __init iommu_dma_forcedac_setup(char *str)
     73{
     74	int ret = kstrtobool(str, &iommu_dma_forcedac);
     75
     76	if (!ret && iommu_dma_forcedac)
     77		pr_info("Forcing DAC for PCI devices\n");
     78	return ret;
     79}
     80early_param("iommu.forcedac", iommu_dma_forcedac_setup);
     81
     82/* Number of entries per flush queue */
     83#define IOVA_FQ_SIZE	256
     84
     85/* Timeout (in ms) after which entries are flushed from the queue */
     86#define IOVA_FQ_TIMEOUT	10
     87
     88/* Flush queue entry for deferred flushing */
     89struct iova_fq_entry {
     90	unsigned long iova_pfn;
     91	unsigned long pages;
     92	struct list_head freelist;
     93	u64 counter; /* Flush counter when this entry was added */
     94};
     95
     96/* Per-CPU flush queue structure */
     97struct iova_fq {
     98	struct iova_fq_entry entries[IOVA_FQ_SIZE];
     99	unsigned int head, tail;
    100	spinlock_t lock;
    101};
    102
    103#define fq_ring_for_each(i, fq) \
    104	for ((i) = (fq)->head; (i) != (fq)->tail; (i) = ((i) + 1) % IOVA_FQ_SIZE)
    105
    106static inline bool fq_full(struct iova_fq *fq)
    107{
    108	assert_spin_locked(&fq->lock);
    109	return (((fq->tail + 1) % IOVA_FQ_SIZE) == fq->head);
    110}
    111
    112static inline unsigned int fq_ring_add(struct iova_fq *fq)
    113{
    114	unsigned int idx = fq->tail;
    115
    116	assert_spin_locked(&fq->lock);
    117
    118	fq->tail = (idx + 1) % IOVA_FQ_SIZE;
    119
    120	return idx;
    121}
    122
    123static void fq_ring_free(struct iommu_dma_cookie *cookie, struct iova_fq *fq)
    124{
    125	u64 counter = atomic64_read(&cookie->fq_flush_finish_cnt);
    126	unsigned int idx;
    127
    128	assert_spin_locked(&fq->lock);
    129
    130	fq_ring_for_each(idx, fq) {
    131
    132		if (fq->entries[idx].counter >= counter)
    133			break;
    134
    135		put_pages_list(&fq->entries[idx].freelist);
    136		free_iova_fast(&cookie->iovad,
    137			       fq->entries[idx].iova_pfn,
    138			       fq->entries[idx].pages);
    139
    140		fq->head = (fq->head + 1) % IOVA_FQ_SIZE;
    141	}
    142}
    143
    144static void fq_flush_iotlb(struct iommu_dma_cookie *cookie)
    145{
    146	atomic64_inc(&cookie->fq_flush_start_cnt);
    147	cookie->fq_domain->ops->flush_iotlb_all(cookie->fq_domain);
    148	atomic64_inc(&cookie->fq_flush_finish_cnt);
    149}
    150
    151static void fq_flush_timeout(struct timer_list *t)
    152{
    153	struct iommu_dma_cookie *cookie = from_timer(cookie, t, fq_timer);
    154	int cpu;
    155
    156	atomic_set(&cookie->fq_timer_on, 0);
    157	fq_flush_iotlb(cookie);
    158
    159	for_each_possible_cpu(cpu) {
    160		unsigned long flags;
    161		struct iova_fq *fq;
    162
    163		fq = per_cpu_ptr(cookie->fq, cpu);
    164		spin_lock_irqsave(&fq->lock, flags);
    165		fq_ring_free(cookie, fq);
    166		spin_unlock_irqrestore(&fq->lock, flags);
    167	}
    168}
    169
    170static void queue_iova(struct iommu_dma_cookie *cookie,
    171		unsigned long pfn, unsigned long pages,
    172		struct list_head *freelist)
    173{
    174	struct iova_fq *fq;
    175	unsigned long flags;
    176	unsigned int idx;
    177
    178	/*
    179	 * Order against the IOMMU driver's pagetable update from unmapping
    180	 * @pte, to guarantee that fq_flush_iotlb() observes that if called
    181	 * from a different CPU before we release the lock below. Full barrier
    182	 * so it also pairs with iommu_dma_init_fq() to avoid seeing partially
    183	 * written fq state here.
    184	 */
    185	smp_mb();
    186
    187	fq = raw_cpu_ptr(cookie->fq);
    188	spin_lock_irqsave(&fq->lock, flags);
    189
    190	/*
    191	 * First remove all entries from the flush queue that have already been
    192	 * flushed out on another CPU. This makes the fq_full() check below less
    193	 * likely to be true.
    194	 */
    195	fq_ring_free(cookie, fq);
    196
    197	if (fq_full(fq)) {
    198		fq_flush_iotlb(cookie);
    199		fq_ring_free(cookie, fq);
    200	}
    201
    202	idx = fq_ring_add(fq);
    203
    204	fq->entries[idx].iova_pfn = pfn;
    205	fq->entries[idx].pages    = pages;
    206	fq->entries[idx].counter  = atomic64_read(&cookie->fq_flush_start_cnt);
    207	list_splice(freelist, &fq->entries[idx].freelist);
    208
    209	spin_unlock_irqrestore(&fq->lock, flags);
    210
    211	/* Avoid false sharing as much as possible. */
    212	if (!atomic_read(&cookie->fq_timer_on) &&
    213	    !atomic_xchg(&cookie->fq_timer_on, 1))
    214		mod_timer(&cookie->fq_timer,
    215			  jiffies + msecs_to_jiffies(IOVA_FQ_TIMEOUT));
    216}
    217
    218static void iommu_dma_free_fq(struct iommu_dma_cookie *cookie)
    219{
    220	int cpu, idx;
    221
    222	if (!cookie->fq)
    223		return;
    224
    225	del_timer_sync(&cookie->fq_timer);
    226	/* The IOVAs will be torn down separately, so just free our queued pages */
    227	for_each_possible_cpu(cpu) {
    228		struct iova_fq *fq = per_cpu_ptr(cookie->fq, cpu);
    229
    230		fq_ring_for_each(idx, fq)
    231			put_pages_list(&fq->entries[idx].freelist);
    232	}
    233
    234	free_percpu(cookie->fq);
    235}
    236
    237/* sysfs updates are serialised by the mutex of the group owning @domain */
    238int iommu_dma_init_fq(struct iommu_domain *domain)
    239{
    240	struct iommu_dma_cookie *cookie = domain->iova_cookie;
    241	struct iova_fq __percpu *queue;
    242	int i, cpu;
    243
    244	if (cookie->fq_domain)
    245		return 0;
    246
    247	atomic64_set(&cookie->fq_flush_start_cnt,  0);
    248	atomic64_set(&cookie->fq_flush_finish_cnt, 0);
    249
    250	queue = alloc_percpu(struct iova_fq);
    251	if (!queue) {
    252		pr_warn("iova flush queue initialization failed\n");
    253		return -ENOMEM;
    254	}
    255
    256	for_each_possible_cpu(cpu) {
    257		struct iova_fq *fq = per_cpu_ptr(queue, cpu);
    258
    259		fq->head = 0;
    260		fq->tail = 0;
    261
    262		spin_lock_init(&fq->lock);
    263
    264		for (i = 0; i < IOVA_FQ_SIZE; i++)
    265			INIT_LIST_HEAD(&fq->entries[i].freelist);
    266	}
    267
    268	cookie->fq = queue;
    269
    270	timer_setup(&cookie->fq_timer, fq_flush_timeout, 0);
    271	atomic_set(&cookie->fq_timer_on, 0);
    272	/*
    273	 * Prevent incomplete fq state being observable. Pairs with path from
    274	 * __iommu_dma_unmap() through iommu_dma_free_iova() to queue_iova()
    275	 */
    276	smp_wmb();
    277	WRITE_ONCE(cookie->fq_domain, domain);
    278	return 0;
    279}
    280
    281static inline size_t cookie_msi_granule(struct iommu_dma_cookie *cookie)
    282{
    283	if (cookie->type == IOMMU_DMA_IOVA_COOKIE)
    284		return cookie->iovad.granule;
    285	return PAGE_SIZE;
    286}
    287
    288static struct iommu_dma_cookie *cookie_alloc(enum iommu_dma_cookie_type type)
    289{
    290	struct iommu_dma_cookie *cookie;
    291
    292	cookie = kzalloc(sizeof(*cookie), GFP_KERNEL);
    293	if (cookie) {
    294		INIT_LIST_HEAD(&cookie->msi_page_list);
    295		cookie->type = type;
    296	}
    297	return cookie;
    298}
    299
    300/**
    301 * iommu_get_dma_cookie - Acquire DMA-API resources for a domain
    302 * @domain: IOMMU domain to prepare for DMA-API usage
    303 */
    304int iommu_get_dma_cookie(struct iommu_domain *domain)
    305{
    306	if (domain->iova_cookie)
    307		return -EEXIST;
    308
    309	domain->iova_cookie = cookie_alloc(IOMMU_DMA_IOVA_COOKIE);
    310	if (!domain->iova_cookie)
    311		return -ENOMEM;
    312
    313	return 0;
    314}
    315
    316/**
    317 * iommu_get_msi_cookie - Acquire just MSI remapping resources
    318 * @domain: IOMMU domain to prepare
    319 * @base: Start address of IOVA region for MSI mappings
    320 *
    321 * Users who manage their own IOVA allocation and do not want DMA API support,
    322 * but would still like to take advantage of automatic MSI remapping, can use
    323 * this to initialise their own domain appropriately. Users should reserve a
    324 * contiguous IOVA region, starting at @base, large enough to accommodate the
    325 * number of PAGE_SIZE mappings necessary to cover every MSI doorbell address
    326 * used by the devices attached to @domain.
    327 */
    328int iommu_get_msi_cookie(struct iommu_domain *domain, dma_addr_t base)
    329{
    330	struct iommu_dma_cookie *cookie;
    331
    332	if (domain->type != IOMMU_DOMAIN_UNMANAGED)
    333		return -EINVAL;
    334
    335	if (domain->iova_cookie)
    336		return -EEXIST;
    337
    338	cookie = cookie_alloc(IOMMU_DMA_MSI_COOKIE);
    339	if (!cookie)
    340		return -ENOMEM;
    341
    342	cookie->msi_iova = base;
    343	domain->iova_cookie = cookie;
    344	return 0;
    345}
    346EXPORT_SYMBOL(iommu_get_msi_cookie);
    347
    348/**
    349 * iommu_put_dma_cookie - Release a domain's DMA mapping resources
    350 * @domain: IOMMU domain previously prepared by iommu_get_dma_cookie() or
    351 *          iommu_get_msi_cookie()
    352 */
    353void iommu_put_dma_cookie(struct iommu_domain *domain)
    354{
    355	struct iommu_dma_cookie *cookie = domain->iova_cookie;
    356	struct iommu_dma_msi_page *msi, *tmp;
    357
    358	if (!cookie)
    359		return;
    360
    361	if (cookie->type == IOMMU_DMA_IOVA_COOKIE && cookie->iovad.granule) {
    362		iommu_dma_free_fq(cookie);
    363		put_iova_domain(&cookie->iovad);
    364	}
    365
    366	list_for_each_entry_safe(msi, tmp, &cookie->msi_page_list, list) {
    367		list_del(&msi->list);
    368		kfree(msi);
    369	}
    370	kfree(cookie);
    371	domain->iova_cookie = NULL;
    372}
    373
    374/**
    375 * iommu_dma_get_resv_regions - Reserved region driver helper
    376 * @dev: Device from iommu_get_resv_regions()
    377 * @list: Reserved region list from iommu_get_resv_regions()
    378 *
    379 * IOMMU drivers can use this to implement their .get_resv_regions callback
    380 * for general non-IOMMU-specific reservations. Currently, this covers GICv3
    381 * ITS region reservation on ACPI based ARM platforms that may require HW MSI
    382 * reservation.
    383 */
    384void iommu_dma_get_resv_regions(struct device *dev, struct list_head *list)
    385{
    386
    387	if (!is_of_node(dev_iommu_fwspec_get(dev)->iommu_fwnode))
    388		iort_iommu_msi_get_resv_regions(dev, list);
    389
    390}
    391EXPORT_SYMBOL(iommu_dma_get_resv_regions);
    392
    393static int cookie_init_hw_msi_region(struct iommu_dma_cookie *cookie,
    394		phys_addr_t start, phys_addr_t end)
    395{
    396	struct iova_domain *iovad = &cookie->iovad;
    397	struct iommu_dma_msi_page *msi_page;
    398	int i, num_pages;
    399
    400	start -= iova_offset(iovad, start);
    401	num_pages = iova_align(iovad, end - start) >> iova_shift(iovad);
    402
    403	for (i = 0; i < num_pages; i++) {
    404		msi_page = kmalloc(sizeof(*msi_page), GFP_KERNEL);
    405		if (!msi_page)
    406			return -ENOMEM;
    407
    408		msi_page->phys = start;
    409		msi_page->iova = start;
    410		INIT_LIST_HEAD(&msi_page->list);
    411		list_add(&msi_page->list, &cookie->msi_page_list);
    412		start += iovad->granule;
    413	}
    414
    415	return 0;
    416}
    417
    418static int iommu_dma_ranges_sort(void *priv, const struct list_head *a,
    419		const struct list_head *b)
    420{
    421	struct resource_entry *res_a = list_entry(a, typeof(*res_a), node);
    422	struct resource_entry *res_b = list_entry(b, typeof(*res_b), node);
    423
    424	return res_a->res->start > res_b->res->start;
    425}
    426
    427static int iova_reserve_pci_windows(struct pci_dev *dev,
    428		struct iova_domain *iovad)
    429{
    430	struct pci_host_bridge *bridge = pci_find_host_bridge(dev->bus);
    431	struct resource_entry *window;
    432	unsigned long lo, hi;
    433	phys_addr_t start = 0, end;
    434
    435	resource_list_for_each_entry(window, &bridge->windows) {
    436		if (resource_type(window->res) != IORESOURCE_MEM)
    437			continue;
    438
    439		lo = iova_pfn(iovad, window->res->start - window->offset);
    440		hi = iova_pfn(iovad, window->res->end - window->offset);
    441		reserve_iova(iovad, lo, hi);
    442	}
    443
    444	/* Get reserved DMA windows from host bridge */
    445	list_sort(NULL, &bridge->dma_ranges, iommu_dma_ranges_sort);
    446	resource_list_for_each_entry(window, &bridge->dma_ranges) {
    447		end = window->res->start - window->offset;
    448resv_iova:
    449		if (end > start) {
    450			lo = iova_pfn(iovad, start);
    451			hi = iova_pfn(iovad, end);
    452			reserve_iova(iovad, lo, hi);
    453		} else if (end < start) {
    454			/* DMA ranges should be non-overlapping */
    455			dev_err(&dev->dev,
    456				"Failed to reserve IOVA [%pa-%pa]\n",
    457				&start, &end);
    458			return -EINVAL;
    459		}
    460
    461		start = window->res->end - window->offset + 1;
    462		/* If window is last entry */
    463		if (window->node.next == &bridge->dma_ranges &&
    464		    end != ~(phys_addr_t)0) {
    465			end = ~(phys_addr_t)0;
    466			goto resv_iova;
    467		}
    468	}
    469
    470	return 0;
    471}
    472
    473static int iova_reserve_iommu_regions(struct device *dev,
    474		struct iommu_domain *domain)
    475{
    476	struct iommu_dma_cookie *cookie = domain->iova_cookie;
    477	struct iova_domain *iovad = &cookie->iovad;
    478	struct iommu_resv_region *region;
    479	LIST_HEAD(resv_regions);
    480	int ret = 0;
    481
    482	if (dev_is_pci(dev)) {
    483		ret = iova_reserve_pci_windows(to_pci_dev(dev), iovad);
    484		if (ret)
    485			return ret;
    486	}
    487
    488	iommu_get_resv_regions(dev, &resv_regions);
    489	list_for_each_entry(region, &resv_regions, list) {
    490		unsigned long lo, hi;
    491
    492		/* We ARE the software that manages these! */
    493		if (region->type == IOMMU_RESV_SW_MSI)
    494			continue;
    495
    496		lo = iova_pfn(iovad, region->start);
    497		hi = iova_pfn(iovad, region->start + region->length - 1);
    498		reserve_iova(iovad, lo, hi);
    499
    500		if (region->type == IOMMU_RESV_MSI)
    501			ret = cookie_init_hw_msi_region(cookie, region->start,
    502					region->start + region->length);
    503		if (ret)
    504			break;
    505	}
    506	iommu_put_resv_regions(dev, &resv_regions);
    507
    508	return ret;
    509}
    510
    511static bool dev_is_untrusted(struct device *dev)
    512{
    513	return dev_is_pci(dev) && to_pci_dev(dev)->untrusted;
    514}
    515
    516static bool dev_use_swiotlb(struct device *dev)
    517{
    518	return IS_ENABLED(CONFIG_SWIOTLB) && dev_is_untrusted(dev);
    519}
    520
    521/**
    522 * iommu_dma_init_domain - Initialise a DMA mapping domain
    523 * @domain: IOMMU domain previously prepared by iommu_get_dma_cookie()
    524 * @base: IOVA at which the mappable address space starts
    525 * @limit: Last address of the IOVA space
    526 * @dev: Device the domain is being initialised for
    527 *
    528 * @base and @limit + 1 should be exact multiples of IOMMU page granularity to
    529 * avoid rounding surprises. If necessary, we reserve the page at address 0
    530 * to ensure it is an invalid IOVA. It is safe to reinitialise a domain, but
    531 * any change which could make prior IOVAs invalid will fail.
    532 */
    533static int iommu_dma_init_domain(struct iommu_domain *domain, dma_addr_t base,
    534				 dma_addr_t limit, struct device *dev)
    535{
    536	struct iommu_dma_cookie *cookie = domain->iova_cookie;
    537	unsigned long order, base_pfn;
    538	struct iova_domain *iovad;
    539	int ret;
    540
    541	if (!cookie || cookie->type != IOMMU_DMA_IOVA_COOKIE)
    542		return -EINVAL;
    543
    544	iovad = &cookie->iovad;
    545
    546	/* Use the smallest supported page size for IOVA granularity */
    547	order = __ffs(domain->pgsize_bitmap);
    548	base_pfn = max_t(unsigned long, 1, base >> order);
    549
    550	/* Check the domain allows at least some access to the device... */
    551	if (domain->geometry.force_aperture) {
    552		if (base > domain->geometry.aperture_end ||
    553		    limit < domain->geometry.aperture_start) {
    554			pr_warn("specified DMA range outside IOMMU capability\n");
    555			return -EFAULT;
    556		}
    557		/* ...then finally give it a kicking to make sure it fits */
    558		base_pfn = max_t(unsigned long, base_pfn,
    559				domain->geometry.aperture_start >> order);
    560	}
    561
    562	/* start_pfn is always nonzero for an already-initialised domain */
    563	if (iovad->start_pfn) {
    564		if (1UL << order != iovad->granule ||
    565		    base_pfn != iovad->start_pfn) {
    566			pr_warn("Incompatible range for DMA domain\n");
    567			return -EFAULT;
    568		}
    569
    570		return 0;
    571	}
    572
    573	init_iova_domain(iovad, 1UL << order, base_pfn);
    574	ret = iova_domain_init_rcaches(iovad);
    575	if (ret)
    576		return ret;
    577
    578	/* If the FQ fails we can simply fall back to strict mode */
    579	if (domain->type == IOMMU_DOMAIN_DMA_FQ && iommu_dma_init_fq(domain))
    580		domain->type = IOMMU_DOMAIN_DMA;
    581
    582	return iova_reserve_iommu_regions(dev, domain);
    583}
    584
    585/**
    586 * dma_info_to_prot - Translate DMA API directions and attributes to IOMMU API
    587 *                    page flags.
    588 * @dir: Direction of DMA transfer
    589 * @coherent: Is the DMA master cache-coherent?
    590 * @attrs: DMA attributes for the mapping
    591 *
    592 * Return: corresponding IOMMU API page protection flags
    593 */
    594static int dma_info_to_prot(enum dma_data_direction dir, bool coherent,
    595		     unsigned long attrs)
    596{
    597	int prot = coherent ? IOMMU_CACHE : 0;
    598
    599	if (attrs & DMA_ATTR_PRIVILEGED)
    600		prot |= IOMMU_PRIV;
    601
    602	switch (dir) {
    603	case DMA_BIDIRECTIONAL:
    604		return prot | IOMMU_READ | IOMMU_WRITE;
    605	case DMA_TO_DEVICE:
    606		return prot | IOMMU_READ;
    607	case DMA_FROM_DEVICE:
    608		return prot | IOMMU_WRITE;
    609	default:
    610		return 0;
    611	}
    612}
    613
    614static dma_addr_t iommu_dma_alloc_iova(struct iommu_domain *domain,
    615		size_t size, u64 dma_limit, struct device *dev)
    616{
    617	struct iommu_dma_cookie *cookie = domain->iova_cookie;
    618	struct iova_domain *iovad = &cookie->iovad;
    619	unsigned long shift, iova_len, iova = 0;
    620
    621	if (cookie->type == IOMMU_DMA_MSI_COOKIE) {
    622		cookie->msi_iova += size;
    623		return cookie->msi_iova - size;
    624	}
    625
    626	shift = iova_shift(iovad);
    627	iova_len = size >> shift;
    628
    629	dma_limit = min_not_zero(dma_limit, dev->bus_dma_limit);
    630
    631	if (domain->geometry.force_aperture)
    632		dma_limit = min(dma_limit, (u64)domain->geometry.aperture_end);
    633
    634	/* Try to get PCI devices a SAC address */
    635	if (dma_limit > DMA_BIT_MASK(32) && !iommu_dma_forcedac && dev_is_pci(dev))
    636		iova = alloc_iova_fast(iovad, iova_len,
    637				       DMA_BIT_MASK(32) >> shift, false);
    638
    639	if (!iova)
    640		iova = alloc_iova_fast(iovad, iova_len, dma_limit >> shift,
    641				       true);
    642
    643	return (dma_addr_t)iova << shift;
    644}
    645
    646static void iommu_dma_free_iova(struct iommu_dma_cookie *cookie,
    647		dma_addr_t iova, size_t size, struct iommu_iotlb_gather *gather)
    648{
    649	struct iova_domain *iovad = &cookie->iovad;
    650
    651	/* The MSI case is only ever cleaning up its most recent allocation */
    652	if (cookie->type == IOMMU_DMA_MSI_COOKIE)
    653		cookie->msi_iova -= size;
    654	else if (gather && gather->queued)
    655		queue_iova(cookie, iova_pfn(iovad, iova),
    656				size >> iova_shift(iovad),
    657				&gather->freelist);
    658	else
    659		free_iova_fast(iovad, iova_pfn(iovad, iova),
    660				size >> iova_shift(iovad));
    661}
    662
    663static void __iommu_dma_unmap(struct device *dev, dma_addr_t dma_addr,
    664		size_t size)
    665{
    666	struct iommu_domain *domain = iommu_get_dma_domain(dev);
    667	struct iommu_dma_cookie *cookie = domain->iova_cookie;
    668	struct iova_domain *iovad = &cookie->iovad;
    669	size_t iova_off = iova_offset(iovad, dma_addr);
    670	struct iommu_iotlb_gather iotlb_gather;
    671	size_t unmapped;
    672
    673	dma_addr -= iova_off;
    674	size = iova_align(iovad, size + iova_off);
    675	iommu_iotlb_gather_init(&iotlb_gather);
    676	iotlb_gather.queued = READ_ONCE(cookie->fq_domain);
    677
    678	unmapped = iommu_unmap_fast(domain, dma_addr, size, &iotlb_gather);
    679	WARN_ON(unmapped != size);
    680
    681	if (!iotlb_gather.queued)
    682		iommu_iotlb_sync(domain, &iotlb_gather);
    683	iommu_dma_free_iova(cookie, dma_addr, size, &iotlb_gather);
    684}
    685
    686static dma_addr_t __iommu_dma_map(struct device *dev, phys_addr_t phys,
    687		size_t size, int prot, u64 dma_mask)
    688{
    689	struct iommu_domain *domain = iommu_get_dma_domain(dev);
    690	struct iommu_dma_cookie *cookie = domain->iova_cookie;
    691	struct iova_domain *iovad = &cookie->iovad;
    692	size_t iova_off = iova_offset(iovad, phys);
    693	dma_addr_t iova;
    694
    695	if (static_branch_unlikely(&iommu_deferred_attach_enabled) &&
    696	    iommu_deferred_attach(dev, domain))
    697		return DMA_MAPPING_ERROR;
    698
    699	size = iova_align(iovad, size + iova_off);
    700
    701	iova = iommu_dma_alloc_iova(domain, size, dma_mask, dev);
    702	if (!iova)
    703		return DMA_MAPPING_ERROR;
    704
    705	if (iommu_map_atomic(domain, iova, phys - iova_off, size, prot)) {
    706		iommu_dma_free_iova(cookie, iova, size, NULL);
    707		return DMA_MAPPING_ERROR;
    708	}
    709	return iova + iova_off;
    710}
    711
    712static void __iommu_dma_free_pages(struct page **pages, int count)
    713{
    714	while (count--)
    715		__free_page(pages[count]);
    716	kvfree(pages);
    717}
    718
    719static struct page **__iommu_dma_alloc_pages(struct device *dev,
    720		unsigned int count, unsigned long order_mask, gfp_t gfp)
    721{
    722	struct page **pages;
    723	unsigned int i = 0, nid = dev_to_node(dev);
    724
    725	order_mask &= (2U << MAX_ORDER) - 1;
    726	if (!order_mask)
    727		return NULL;
    728
    729	pages = kvcalloc(count, sizeof(*pages), GFP_KERNEL);
    730	if (!pages)
    731		return NULL;
    732
    733	/* IOMMU can map any pages, so himem can also be used here */
    734	gfp |= __GFP_NOWARN | __GFP_HIGHMEM;
    735
    736	/* It makes no sense to muck about with huge pages */
    737	gfp &= ~__GFP_COMP;
    738
    739	while (count) {
    740		struct page *page = NULL;
    741		unsigned int order_size;
    742
    743		/*
    744		 * Higher-order allocations are a convenience rather
    745		 * than a necessity, hence using __GFP_NORETRY until
    746		 * falling back to minimum-order allocations.
    747		 */
    748		for (order_mask &= (2U << __fls(count)) - 1;
    749		     order_mask; order_mask &= ~order_size) {
    750			unsigned int order = __fls(order_mask);
    751			gfp_t alloc_flags = gfp;
    752
    753			order_size = 1U << order;
    754			if (order_mask > order_size)
    755				alloc_flags |= __GFP_NORETRY;
    756			page = alloc_pages_node(nid, alloc_flags, order);
    757			if (!page)
    758				continue;
    759			if (order)
    760				split_page(page, order);
    761			break;
    762		}
    763		if (!page) {
    764			__iommu_dma_free_pages(pages, i);
    765			return NULL;
    766		}
    767		count -= order_size;
    768		while (order_size--)
    769			pages[i++] = page++;
    770	}
    771	return pages;
    772}
    773
    774/*
    775 * If size is less than PAGE_SIZE, then a full CPU page will be allocated,
    776 * but an IOMMU which supports smaller pages might not map the whole thing.
    777 */
    778static struct page **__iommu_dma_alloc_noncontiguous(struct device *dev,
    779		size_t size, struct sg_table *sgt, gfp_t gfp, pgprot_t prot,
    780		unsigned long attrs)
    781{
    782	struct iommu_domain *domain = iommu_get_dma_domain(dev);
    783	struct iommu_dma_cookie *cookie = domain->iova_cookie;
    784	struct iova_domain *iovad = &cookie->iovad;
    785	bool coherent = dev_is_dma_coherent(dev);
    786	int ioprot = dma_info_to_prot(DMA_BIDIRECTIONAL, coherent, attrs);
    787	unsigned int count, min_size, alloc_sizes = domain->pgsize_bitmap;
    788	struct page **pages;
    789	dma_addr_t iova;
    790	ssize_t ret;
    791
    792	if (static_branch_unlikely(&iommu_deferred_attach_enabled) &&
    793	    iommu_deferred_attach(dev, domain))
    794		return NULL;
    795
    796	min_size = alloc_sizes & -alloc_sizes;
    797	if (min_size < PAGE_SIZE) {
    798		min_size = PAGE_SIZE;
    799		alloc_sizes |= PAGE_SIZE;
    800	} else {
    801		size = ALIGN(size, min_size);
    802	}
    803	if (attrs & DMA_ATTR_ALLOC_SINGLE_PAGES)
    804		alloc_sizes = min_size;
    805
    806	count = PAGE_ALIGN(size) >> PAGE_SHIFT;
    807	pages = __iommu_dma_alloc_pages(dev, count, alloc_sizes >> PAGE_SHIFT,
    808					gfp);
    809	if (!pages)
    810		return NULL;
    811
    812	size = iova_align(iovad, size);
    813	iova = iommu_dma_alloc_iova(domain, size, dev->coherent_dma_mask, dev);
    814	if (!iova)
    815		goto out_free_pages;
    816
    817	if (sg_alloc_table_from_pages(sgt, pages, count, 0, size, GFP_KERNEL))
    818		goto out_free_iova;
    819
    820	if (!(ioprot & IOMMU_CACHE)) {
    821		struct scatterlist *sg;
    822		int i;
    823
    824		for_each_sg(sgt->sgl, sg, sgt->orig_nents, i)
    825			arch_dma_prep_coherent(sg_page(sg), sg->length);
    826	}
    827
    828	ret = iommu_map_sg_atomic(domain, iova, sgt->sgl, sgt->orig_nents, ioprot);
    829	if (ret < 0 || ret < size)
    830		goto out_free_sg;
    831
    832	sgt->sgl->dma_address = iova;
    833	sgt->sgl->dma_length = size;
    834	return pages;
    835
    836out_free_sg:
    837	sg_free_table(sgt);
    838out_free_iova:
    839	iommu_dma_free_iova(cookie, iova, size, NULL);
    840out_free_pages:
    841	__iommu_dma_free_pages(pages, count);
    842	return NULL;
    843}
    844
    845static void *iommu_dma_alloc_remap(struct device *dev, size_t size,
    846		dma_addr_t *dma_handle, gfp_t gfp, pgprot_t prot,
    847		unsigned long attrs)
    848{
    849	struct page **pages;
    850	struct sg_table sgt;
    851	void *vaddr;
    852
    853	pages = __iommu_dma_alloc_noncontiguous(dev, size, &sgt, gfp, prot,
    854						attrs);
    855	if (!pages)
    856		return NULL;
    857	*dma_handle = sgt.sgl->dma_address;
    858	sg_free_table(&sgt);
    859	vaddr = dma_common_pages_remap(pages, size, prot,
    860			__builtin_return_address(0));
    861	if (!vaddr)
    862		goto out_unmap;
    863	return vaddr;
    864
    865out_unmap:
    866	__iommu_dma_unmap(dev, *dma_handle, size);
    867	__iommu_dma_free_pages(pages, PAGE_ALIGN(size) >> PAGE_SHIFT);
    868	return NULL;
    869}
    870
    871static struct sg_table *iommu_dma_alloc_noncontiguous(struct device *dev,
    872		size_t size, enum dma_data_direction dir, gfp_t gfp,
    873		unsigned long attrs)
    874{
    875	struct dma_sgt_handle *sh;
    876
    877	sh = kmalloc(sizeof(*sh), gfp);
    878	if (!sh)
    879		return NULL;
    880
    881	sh->pages = __iommu_dma_alloc_noncontiguous(dev, size, &sh->sgt, gfp,
    882						    PAGE_KERNEL, attrs);
    883	if (!sh->pages) {
    884		kfree(sh);
    885		return NULL;
    886	}
    887	return &sh->sgt;
    888}
    889
    890static void iommu_dma_free_noncontiguous(struct device *dev, size_t size,
    891		struct sg_table *sgt, enum dma_data_direction dir)
    892{
    893	struct dma_sgt_handle *sh = sgt_handle(sgt);
    894
    895	__iommu_dma_unmap(dev, sgt->sgl->dma_address, size);
    896	__iommu_dma_free_pages(sh->pages, PAGE_ALIGN(size) >> PAGE_SHIFT);
    897	sg_free_table(&sh->sgt);
    898	kfree(sh);
    899}
    900
    901static void iommu_dma_sync_single_for_cpu(struct device *dev,
    902		dma_addr_t dma_handle, size_t size, enum dma_data_direction dir)
    903{
    904	phys_addr_t phys;
    905
    906	if (dev_is_dma_coherent(dev) && !dev_use_swiotlb(dev))
    907		return;
    908
    909	phys = iommu_iova_to_phys(iommu_get_dma_domain(dev), dma_handle);
    910	if (!dev_is_dma_coherent(dev))
    911		arch_sync_dma_for_cpu(phys, size, dir);
    912
    913	if (is_swiotlb_buffer(dev, phys))
    914		swiotlb_sync_single_for_cpu(dev, phys, size, dir);
    915}
    916
    917static void iommu_dma_sync_single_for_device(struct device *dev,
    918		dma_addr_t dma_handle, size_t size, enum dma_data_direction dir)
    919{
    920	phys_addr_t phys;
    921
    922	if (dev_is_dma_coherent(dev) && !dev_use_swiotlb(dev))
    923		return;
    924
    925	phys = iommu_iova_to_phys(iommu_get_dma_domain(dev), dma_handle);
    926	if (is_swiotlb_buffer(dev, phys))
    927		swiotlb_sync_single_for_device(dev, phys, size, dir);
    928
    929	if (!dev_is_dma_coherent(dev))
    930		arch_sync_dma_for_device(phys, size, dir);
    931}
    932
    933static void iommu_dma_sync_sg_for_cpu(struct device *dev,
    934		struct scatterlist *sgl, int nelems,
    935		enum dma_data_direction dir)
    936{
    937	struct scatterlist *sg;
    938	int i;
    939
    940	if (dev_use_swiotlb(dev))
    941		for_each_sg(sgl, sg, nelems, i)
    942			iommu_dma_sync_single_for_cpu(dev, sg_dma_address(sg),
    943						      sg->length, dir);
    944	else if (!dev_is_dma_coherent(dev))
    945		for_each_sg(sgl, sg, nelems, i)
    946			arch_sync_dma_for_cpu(sg_phys(sg), sg->length, dir);
    947}
    948
    949static void iommu_dma_sync_sg_for_device(struct device *dev,
    950		struct scatterlist *sgl, int nelems,
    951		enum dma_data_direction dir)
    952{
    953	struct scatterlist *sg;
    954	int i;
    955
    956	if (dev_use_swiotlb(dev))
    957		for_each_sg(sgl, sg, nelems, i)
    958			iommu_dma_sync_single_for_device(dev,
    959							 sg_dma_address(sg),
    960							 sg->length, dir);
    961	else if (!dev_is_dma_coherent(dev))
    962		for_each_sg(sgl, sg, nelems, i)
    963			arch_sync_dma_for_device(sg_phys(sg), sg->length, dir);
    964}
    965
    966static dma_addr_t iommu_dma_map_page(struct device *dev, struct page *page,
    967		unsigned long offset, size_t size, enum dma_data_direction dir,
    968		unsigned long attrs)
    969{
    970	phys_addr_t phys = page_to_phys(page) + offset;
    971	bool coherent = dev_is_dma_coherent(dev);
    972	int prot = dma_info_to_prot(dir, coherent, attrs);
    973	struct iommu_domain *domain = iommu_get_dma_domain(dev);
    974	struct iommu_dma_cookie *cookie = domain->iova_cookie;
    975	struct iova_domain *iovad = &cookie->iovad;
    976	dma_addr_t iova, dma_mask = dma_get_mask(dev);
    977
    978	/*
    979	 * If both the physical buffer start address and size are
    980	 * page aligned, we don't need to use a bounce page.
    981	 */
    982	if (dev_use_swiotlb(dev) && iova_offset(iovad, phys | size)) {
    983		void *padding_start;
    984		size_t padding_size, aligned_size;
    985
    986		if (!is_swiotlb_active(dev)) {
    987			dev_warn_once(dev, "DMA bounce buffers are inactive, unable to map unaligned transaction.\n");
    988			return DMA_MAPPING_ERROR;
    989		}
    990
    991		aligned_size = iova_align(iovad, size);
    992		phys = swiotlb_tbl_map_single(dev, phys, size, aligned_size,
    993					      iova_mask(iovad), dir, attrs);
    994
    995		if (phys == DMA_MAPPING_ERROR)
    996			return DMA_MAPPING_ERROR;
    997
    998		/* Cleanup the padding area. */
    999		padding_start = phys_to_virt(phys);
   1000		padding_size = aligned_size;
   1001
   1002		if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC) &&
   1003		    (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL)) {
   1004			padding_start += size;
   1005			padding_size -= size;
   1006		}
   1007
   1008		memset(padding_start, 0, padding_size);
   1009	}
   1010
   1011	if (!coherent && !(attrs & DMA_ATTR_SKIP_CPU_SYNC))
   1012		arch_sync_dma_for_device(phys, size, dir);
   1013
   1014	iova = __iommu_dma_map(dev, phys, size, prot, dma_mask);
   1015	if (iova == DMA_MAPPING_ERROR && is_swiotlb_buffer(dev, phys))
   1016		swiotlb_tbl_unmap_single(dev, phys, size, dir, attrs);
   1017	return iova;
   1018}
   1019
   1020static void iommu_dma_unmap_page(struct device *dev, dma_addr_t dma_handle,
   1021		size_t size, enum dma_data_direction dir, unsigned long attrs)
   1022{
   1023	struct iommu_domain *domain = iommu_get_dma_domain(dev);
   1024	phys_addr_t phys;
   1025
   1026	phys = iommu_iova_to_phys(domain, dma_handle);
   1027	if (WARN_ON(!phys))
   1028		return;
   1029
   1030	if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC) && !dev_is_dma_coherent(dev))
   1031		arch_sync_dma_for_cpu(phys, size, dir);
   1032
   1033	__iommu_dma_unmap(dev, dma_handle, size);
   1034
   1035	if (unlikely(is_swiotlb_buffer(dev, phys)))
   1036		swiotlb_tbl_unmap_single(dev, phys, size, dir, attrs);
   1037}
   1038
   1039/*
   1040 * Prepare a successfully-mapped scatterlist to give back to the caller.
   1041 *
   1042 * At this point the segments are already laid out by iommu_dma_map_sg() to
   1043 * avoid individually crossing any boundaries, so we merely need to check a
   1044 * segment's start address to avoid concatenating across one.
   1045 */
   1046static int __finalise_sg(struct device *dev, struct scatterlist *sg, int nents,
   1047		dma_addr_t dma_addr)
   1048{
   1049	struct scatterlist *s, *cur = sg;
   1050	unsigned long seg_mask = dma_get_seg_boundary(dev);
   1051	unsigned int cur_len = 0, max_len = dma_get_max_seg_size(dev);
   1052	int i, count = 0;
   1053
   1054	for_each_sg(sg, s, nents, i) {
   1055		/* Restore this segment's original unaligned fields first */
   1056		unsigned int s_iova_off = sg_dma_address(s);
   1057		unsigned int s_length = sg_dma_len(s);
   1058		unsigned int s_iova_len = s->length;
   1059
   1060		s->offset += s_iova_off;
   1061		s->length = s_length;
   1062		sg_dma_address(s) = DMA_MAPPING_ERROR;
   1063		sg_dma_len(s) = 0;
   1064
   1065		/*
   1066		 * Now fill in the real DMA data. If...
   1067		 * - there is a valid output segment to append to
   1068		 * - and this segment starts on an IOVA page boundary
   1069		 * - but doesn't fall at a segment boundary
   1070		 * - and wouldn't make the resulting output segment too long
   1071		 */
   1072		if (cur_len && !s_iova_off && (dma_addr & seg_mask) &&
   1073		    (max_len - cur_len >= s_length)) {
   1074			/* ...then concatenate it with the previous one */
   1075			cur_len += s_length;
   1076		} else {
   1077			/* Otherwise start the next output segment */
   1078			if (i > 0)
   1079				cur = sg_next(cur);
   1080			cur_len = s_length;
   1081			count++;
   1082
   1083			sg_dma_address(cur) = dma_addr + s_iova_off;
   1084		}
   1085
   1086		sg_dma_len(cur) = cur_len;
   1087		dma_addr += s_iova_len;
   1088
   1089		if (s_length + s_iova_off < s_iova_len)
   1090			cur_len = 0;
   1091	}
   1092	return count;
   1093}
   1094
   1095/*
   1096 * If mapping failed, then just restore the original list,
   1097 * but making sure the DMA fields are invalidated.
   1098 */
   1099static void __invalidate_sg(struct scatterlist *sg, int nents)
   1100{
   1101	struct scatterlist *s;
   1102	int i;
   1103
   1104	for_each_sg(sg, s, nents, i) {
   1105		if (sg_dma_address(s) != DMA_MAPPING_ERROR)
   1106			s->offset += sg_dma_address(s);
   1107		if (sg_dma_len(s))
   1108			s->length = sg_dma_len(s);
   1109		sg_dma_address(s) = DMA_MAPPING_ERROR;
   1110		sg_dma_len(s) = 0;
   1111	}
   1112}
   1113
   1114static void iommu_dma_unmap_sg_swiotlb(struct device *dev, struct scatterlist *sg,
   1115		int nents, enum dma_data_direction dir, unsigned long attrs)
   1116{
   1117	struct scatterlist *s;
   1118	int i;
   1119
   1120	for_each_sg(sg, s, nents, i)
   1121		iommu_dma_unmap_page(dev, sg_dma_address(s),
   1122				sg_dma_len(s), dir, attrs);
   1123}
   1124
   1125static int iommu_dma_map_sg_swiotlb(struct device *dev, struct scatterlist *sg,
   1126		int nents, enum dma_data_direction dir, unsigned long attrs)
   1127{
   1128	struct scatterlist *s;
   1129	int i;
   1130
   1131	for_each_sg(sg, s, nents, i) {
   1132		sg_dma_address(s) = iommu_dma_map_page(dev, sg_page(s),
   1133				s->offset, s->length, dir, attrs);
   1134		if (sg_dma_address(s) == DMA_MAPPING_ERROR)
   1135			goto out_unmap;
   1136		sg_dma_len(s) = s->length;
   1137	}
   1138
   1139	return nents;
   1140
   1141out_unmap:
   1142	iommu_dma_unmap_sg_swiotlb(dev, sg, i, dir, attrs | DMA_ATTR_SKIP_CPU_SYNC);
   1143	return -EIO;
   1144}
   1145
   1146/*
   1147 * The DMA API client is passing in a scatterlist which could describe
   1148 * any old buffer layout, but the IOMMU API requires everything to be
   1149 * aligned to IOMMU pages. Hence the need for this complicated bit of
   1150 * impedance-matching, to be able to hand off a suitably-aligned list,
   1151 * but still preserve the original offsets and sizes for the caller.
   1152 */
   1153static int iommu_dma_map_sg(struct device *dev, struct scatterlist *sg,
   1154		int nents, enum dma_data_direction dir, unsigned long attrs)
   1155{
   1156	struct iommu_domain *domain = iommu_get_dma_domain(dev);
   1157	struct iommu_dma_cookie *cookie = domain->iova_cookie;
   1158	struct iova_domain *iovad = &cookie->iovad;
   1159	struct scatterlist *s, *prev = NULL;
   1160	int prot = dma_info_to_prot(dir, dev_is_dma_coherent(dev), attrs);
   1161	dma_addr_t iova;
   1162	size_t iova_len = 0;
   1163	unsigned long mask = dma_get_seg_boundary(dev);
   1164	ssize_t ret;
   1165	int i;
   1166
   1167	if (static_branch_unlikely(&iommu_deferred_attach_enabled)) {
   1168		ret = iommu_deferred_attach(dev, domain);
   1169		if (ret)
   1170			goto out;
   1171	}
   1172
   1173	if (dev_use_swiotlb(dev))
   1174		return iommu_dma_map_sg_swiotlb(dev, sg, nents, dir, attrs);
   1175
   1176	if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC))
   1177		iommu_dma_sync_sg_for_device(dev, sg, nents, dir);
   1178
   1179	/*
   1180	 * Work out how much IOVA space we need, and align the segments to
   1181	 * IOVA granules for the IOMMU driver to handle. With some clever
   1182	 * trickery we can modify the list in-place, but reversibly, by
   1183	 * stashing the unaligned parts in the as-yet-unused DMA fields.
   1184	 */
   1185	for_each_sg(sg, s, nents, i) {
   1186		size_t s_iova_off = iova_offset(iovad, s->offset);
   1187		size_t s_length = s->length;
   1188		size_t pad_len = (mask - iova_len + 1) & mask;
   1189
   1190		sg_dma_address(s) = s_iova_off;
   1191		sg_dma_len(s) = s_length;
   1192		s->offset -= s_iova_off;
   1193		s_length = iova_align(iovad, s_length + s_iova_off);
   1194		s->length = s_length;
   1195
   1196		/*
   1197		 * Due to the alignment of our single IOVA allocation, we can
   1198		 * depend on these assumptions about the segment boundary mask:
   1199		 * - If mask size >= IOVA size, then the IOVA range cannot
   1200		 *   possibly fall across a boundary, so we don't care.
   1201		 * - If mask size < IOVA size, then the IOVA range must start
   1202		 *   exactly on a boundary, therefore we can lay things out
   1203		 *   based purely on segment lengths without needing to know
   1204		 *   the actual addresses beforehand.
   1205		 * - The mask must be a power of 2, so pad_len == 0 if
   1206		 *   iova_len == 0, thus we cannot dereference prev the first
   1207		 *   time through here (i.e. before it has a meaningful value).
   1208		 */
   1209		if (pad_len && pad_len < s_length - 1) {
   1210			prev->length += pad_len;
   1211			iova_len += pad_len;
   1212		}
   1213
   1214		iova_len += s_length;
   1215		prev = s;
   1216	}
   1217
   1218	iova = iommu_dma_alloc_iova(domain, iova_len, dma_get_mask(dev), dev);
   1219	if (!iova) {
   1220		ret = -ENOMEM;
   1221		goto out_restore_sg;
   1222	}
   1223
   1224	/*
   1225	 * We'll leave any physical concatenation to the IOMMU driver's
   1226	 * implementation - it knows better than we do.
   1227	 */
   1228	ret = iommu_map_sg_atomic(domain, iova, sg, nents, prot);
   1229	if (ret < 0 || ret < iova_len)
   1230		goto out_free_iova;
   1231
   1232	return __finalise_sg(dev, sg, nents, iova);
   1233
   1234out_free_iova:
   1235	iommu_dma_free_iova(cookie, iova, iova_len, NULL);
   1236out_restore_sg:
   1237	__invalidate_sg(sg, nents);
   1238out:
   1239	if (ret != -ENOMEM)
   1240		return -EINVAL;
   1241	return ret;
   1242}
   1243
   1244static void iommu_dma_unmap_sg(struct device *dev, struct scatterlist *sg,
   1245		int nents, enum dma_data_direction dir, unsigned long attrs)
   1246{
   1247	dma_addr_t start, end;
   1248	struct scatterlist *tmp;
   1249	int i;
   1250
   1251	if (dev_use_swiotlb(dev)) {
   1252		iommu_dma_unmap_sg_swiotlb(dev, sg, nents, dir, attrs);
   1253		return;
   1254	}
   1255
   1256	if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC))
   1257		iommu_dma_sync_sg_for_cpu(dev, sg, nents, dir);
   1258
   1259	/*
   1260	 * The scatterlist segments are mapped into a single
   1261	 * contiguous IOVA allocation, so this is incredibly easy.
   1262	 */
   1263	start = sg_dma_address(sg);
   1264	for_each_sg(sg_next(sg), tmp, nents - 1, i) {
   1265		if (sg_dma_len(tmp) == 0)
   1266			break;
   1267		sg = tmp;
   1268	}
   1269	end = sg_dma_address(sg) + sg_dma_len(sg);
   1270	__iommu_dma_unmap(dev, start, end - start);
   1271}
   1272
   1273static dma_addr_t iommu_dma_map_resource(struct device *dev, phys_addr_t phys,
   1274		size_t size, enum dma_data_direction dir, unsigned long attrs)
   1275{
   1276	return __iommu_dma_map(dev, phys, size,
   1277			dma_info_to_prot(dir, false, attrs) | IOMMU_MMIO,
   1278			dma_get_mask(dev));
   1279}
   1280
   1281static void iommu_dma_unmap_resource(struct device *dev, dma_addr_t handle,
   1282		size_t size, enum dma_data_direction dir, unsigned long attrs)
   1283{
   1284	__iommu_dma_unmap(dev, handle, size);
   1285}
   1286
   1287static void __iommu_dma_free(struct device *dev, size_t size, void *cpu_addr)
   1288{
   1289	size_t alloc_size = PAGE_ALIGN(size);
   1290	int count = alloc_size >> PAGE_SHIFT;
   1291	struct page *page = NULL, **pages = NULL;
   1292
   1293	/* Non-coherent atomic allocation? Easy */
   1294	if (IS_ENABLED(CONFIG_DMA_DIRECT_REMAP) &&
   1295	    dma_free_from_pool(dev, cpu_addr, alloc_size))
   1296		return;
   1297
   1298	if (is_vmalloc_addr(cpu_addr)) {
   1299		/*
   1300		 * If it the address is remapped, then it's either non-coherent
   1301		 * or highmem CMA, or an iommu_dma_alloc_remap() construction.
   1302		 */
   1303		pages = dma_common_find_pages(cpu_addr);
   1304		if (!pages)
   1305			page = vmalloc_to_page(cpu_addr);
   1306		dma_common_free_remap(cpu_addr, alloc_size);
   1307	} else {
   1308		/* Lowmem means a coherent atomic or CMA allocation */
   1309		page = virt_to_page(cpu_addr);
   1310	}
   1311
   1312	if (pages)
   1313		__iommu_dma_free_pages(pages, count);
   1314	if (page)
   1315		dma_free_contiguous(dev, page, alloc_size);
   1316}
   1317
   1318static void iommu_dma_free(struct device *dev, size_t size, void *cpu_addr,
   1319		dma_addr_t handle, unsigned long attrs)
   1320{
   1321	__iommu_dma_unmap(dev, handle, size);
   1322	__iommu_dma_free(dev, size, cpu_addr);
   1323}
   1324
   1325static void *iommu_dma_alloc_pages(struct device *dev, size_t size,
   1326		struct page **pagep, gfp_t gfp, unsigned long attrs)
   1327{
   1328	bool coherent = dev_is_dma_coherent(dev);
   1329	size_t alloc_size = PAGE_ALIGN(size);
   1330	int node = dev_to_node(dev);
   1331	struct page *page = NULL;
   1332	void *cpu_addr;
   1333
   1334	page = dma_alloc_contiguous(dev, alloc_size, gfp);
   1335	if (!page)
   1336		page = alloc_pages_node(node, gfp, get_order(alloc_size));
   1337	if (!page)
   1338		return NULL;
   1339
   1340	if (!coherent || PageHighMem(page)) {
   1341		pgprot_t prot = dma_pgprot(dev, PAGE_KERNEL, attrs);
   1342
   1343		cpu_addr = dma_common_contiguous_remap(page, alloc_size,
   1344				prot, __builtin_return_address(0));
   1345		if (!cpu_addr)
   1346			goto out_free_pages;
   1347
   1348		if (!coherent)
   1349			arch_dma_prep_coherent(page, size);
   1350	} else {
   1351		cpu_addr = page_address(page);
   1352	}
   1353
   1354	*pagep = page;
   1355	memset(cpu_addr, 0, alloc_size);
   1356	return cpu_addr;
   1357out_free_pages:
   1358	dma_free_contiguous(dev, page, alloc_size);
   1359	return NULL;
   1360}
   1361
   1362static void *iommu_dma_alloc(struct device *dev, size_t size,
   1363		dma_addr_t *handle, gfp_t gfp, unsigned long attrs)
   1364{
   1365	bool coherent = dev_is_dma_coherent(dev);
   1366	int ioprot = dma_info_to_prot(DMA_BIDIRECTIONAL, coherent, attrs);
   1367	struct page *page = NULL;
   1368	void *cpu_addr;
   1369
   1370	gfp |= __GFP_ZERO;
   1371
   1372	if (gfpflags_allow_blocking(gfp) &&
   1373	    !(attrs & DMA_ATTR_FORCE_CONTIGUOUS)) {
   1374		return iommu_dma_alloc_remap(dev, size, handle, gfp,
   1375				dma_pgprot(dev, PAGE_KERNEL, attrs), attrs);
   1376	}
   1377
   1378	if (IS_ENABLED(CONFIG_DMA_DIRECT_REMAP) &&
   1379	    !gfpflags_allow_blocking(gfp) && !coherent)
   1380		page = dma_alloc_from_pool(dev, PAGE_ALIGN(size), &cpu_addr,
   1381					       gfp, NULL);
   1382	else
   1383		cpu_addr = iommu_dma_alloc_pages(dev, size, &page, gfp, attrs);
   1384	if (!cpu_addr)
   1385		return NULL;
   1386
   1387	*handle = __iommu_dma_map(dev, page_to_phys(page), size, ioprot,
   1388			dev->coherent_dma_mask);
   1389	if (*handle == DMA_MAPPING_ERROR) {
   1390		__iommu_dma_free(dev, size, cpu_addr);
   1391		return NULL;
   1392	}
   1393
   1394	return cpu_addr;
   1395}
   1396
   1397static int iommu_dma_mmap(struct device *dev, struct vm_area_struct *vma,
   1398		void *cpu_addr, dma_addr_t dma_addr, size_t size,
   1399		unsigned long attrs)
   1400{
   1401	unsigned long nr_pages = PAGE_ALIGN(size) >> PAGE_SHIFT;
   1402	unsigned long pfn, off = vma->vm_pgoff;
   1403	int ret;
   1404
   1405	vma->vm_page_prot = dma_pgprot(dev, vma->vm_page_prot, attrs);
   1406
   1407	if (dma_mmap_from_dev_coherent(dev, vma, cpu_addr, size, &ret))
   1408		return ret;
   1409
   1410	if (off >= nr_pages || vma_pages(vma) > nr_pages - off)
   1411		return -ENXIO;
   1412
   1413	if (is_vmalloc_addr(cpu_addr)) {
   1414		struct page **pages = dma_common_find_pages(cpu_addr);
   1415
   1416		if (pages)
   1417			return vm_map_pages(vma, pages, nr_pages);
   1418		pfn = vmalloc_to_pfn(cpu_addr);
   1419	} else {
   1420		pfn = page_to_pfn(virt_to_page(cpu_addr));
   1421	}
   1422
   1423	return remap_pfn_range(vma, vma->vm_start, pfn + off,
   1424			       vma->vm_end - vma->vm_start,
   1425			       vma->vm_page_prot);
   1426}
   1427
   1428static int iommu_dma_get_sgtable(struct device *dev, struct sg_table *sgt,
   1429		void *cpu_addr, dma_addr_t dma_addr, size_t size,
   1430		unsigned long attrs)
   1431{
   1432	struct page *page;
   1433	int ret;
   1434
   1435	if (is_vmalloc_addr(cpu_addr)) {
   1436		struct page **pages = dma_common_find_pages(cpu_addr);
   1437
   1438		if (pages) {
   1439			return sg_alloc_table_from_pages(sgt, pages,
   1440					PAGE_ALIGN(size) >> PAGE_SHIFT,
   1441					0, size, GFP_KERNEL);
   1442		}
   1443
   1444		page = vmalloc_to_page(cpu_addr);
   1445	} else {
   1446		page = virt_to_page(cpu_addr);
   1447	}
   1448
   1449	ret = sg_alloc_table(sgt, 1, GFP_KERNEL);
   1450	if (!ret)
   1451		sg_set_page(sgt->sgl, page, PAGE_ALIGN(size), 0);
   1452	return ret;
   1453}
   1454
   1455static unsigned long iommu_dma_get_merge_boundary(struct device *dev)
   1456{
   1457	struct iommu_domain *domain = iommu_get_dma_domain(dev);
   1458
   1459	return (1UL << __ffs(domain->pgsize_bitmap)) - 1;
   1460}
   1461
   1462static const struct dma_map_ops iommu_dma_ops = {
   1463	.alloc			= iommu_dma_alloc,
   1464	.free			= iommu_dma_free,
   1465	.alloc_pages		= dma_common_alloc_pages,
   1466	.free_pages		= dma_common_free_pages,
   1467	.alloc_noncontiguous	= iommu_dma_alloc_noncontiguous,
   1468	.free_noncontiguous	= iommu_dma_free_noncontiguous,
   1469	.mmap			= iommu_dma_mmap,
   1470	.get_sgtable		= iommu_dma_get_sgtable,
   1471	.map_page		= iommu_dma_map_page,
   1472	.unmap_page		= iommu_dma_unmap_page,
   1473	.map_sg			= iommu_dma_map_sg,
   1474	.unmap_sg		= iommu_dma_unmap_sg,
   1475	.sync_single_for_cpu	= iommu_dma_sync_single_for_cpu,
   1476	.sync_single_for_device	= iommu_dma_sync_single_for_device,
   1477	.sync_sg_for_cpu	= iommu_dma_sync_sg_for_cpu,
   1478	.sync_sg_for_device	= iommu_dma_sync_sg_for_device,
   1479	.map_resource		= iommu_dma_map_resource,
   1480	.unmap_resource		= iommu_dma_unmap_resource,
   1481	.get_merge_boundary	= iommu_dma_get_merge_boundary,
   1482};
   1483
   1484/*
   1485 * The IOMMU core code allocates the default DMA domain, which the underlying
   1486 * IOMMU driver needs to support via the dma-iommu layer.
   1487 */
   1488void iommu_setup_dma_ops(struct device *dev, u64 dma_base, u64 dma_limit)
   1489{
   1490	struct iommu_domain *domain = iommu_get_domain_for_dev(dev);
   1491
   1492	if (!domain)
   1493		goto out_err;
   1494
   1495	/*
   1496	 * The IOMMU core code allocates the default DMA domain, which the
   1497	 * underlying IOMMU driver needs to support via the dma-iommu layer.
   1498	 */
   1499	if (iommu_is_dma_domain(domain)) {
   1500		if (iommu_dma_init_domain(domain, dma_base, dma_limit, dev))
   1501			goto out_err;
   1502		dev->dma_ops = &iommu_dma_ops;
   1503	}
   1504
   1505	return;
   1506out_err:
   1507	 pr_warn("Failed to set up IOMMU for device %s; retaining platform DMA ops\n",
   1508		 dev_name(dev));
   1509}
   1510EXPORT_SYMBOL_GPL(iommu_setup_dma_ops);
   1511
   1512static struct iommu_dma_msi_page *iommu_dma_get_msi_page(struct device *dev,
   1513		phys_addr_t msi_addr, struct iommu_domain *domain)
   1514{
   1515	struct iommu_dma_cookie *cookie = domain->iova_cookie;
   1516	struct iommu_dma_msi_page *msi_page;
   1517	dma_addr_t iova;
   1518	int prot = IOMMU_WRITE | IOMMU_NOEXEC | IOMMU_MMIO;
   1519	size_t size = cookie_msi_granule(cookie);
   1520
   1521	msi_addr &= ~(phys_addr_t)(size - 1);
   1522	list_for_each_entry(msi_page, &cookie->msi_page_list, list)
   1523		if (msi_page->phys == msi_addr)
   1524			return msi_page;
   1525
   1526	msi_page = kzalloc(sizeof(*msi_page), GFP_KERNEL);
   1527	if (!msi_page)
   1528		return NULL;
   1529
   1530	iova = iommu_dma_alloc_iova(domain, size, dma_get_mask(dev), dev);
   1531	if (!iova)
   1532		goto out_free_page;
   1533
   1534	if (iommu_map(domain, iova, msi_addr, size, prot))
   1535		goto out_free_iova;
   1536
   1537	INIT_LIST_HEAD(&msi_page->list);
   1538	msi_page->phys = msi_addr;
   1539	msi_page->iova = iova;
   1540	list_add(&msi_page->list, &cookie->msi_page_list);
   1541	return msi_page;
   1542
   1543out_free_iova:
   1544	iommu_dma_free_iova(cookie, iova, size, NULL);
   1545out_free_page:
   1546	kfree(msi_page);
   1547	return NULL;
   1548}
   1549
   1550int iommu_dma_prepare_msi(struct msi_desc *desc, phys_addr_t msi_addr)
   1551{
   1552	struct device *dev = msi_desc_to_dev(desc);
   1553	struct iommu_domain *domain = iommu_get_domain_for_dev(dev);
   1554	struct iommu_dma_msi_page *msi_page;
   1555	static DEFINE_MUTEX(msi_prepare_lock); /* see below */
   1556
   1557	if (!domain || !domain->iova_cookie) {
   1558		desc->iommu_cookie = NULL;
   1559		return 0;
   1560	}
   1561
   1562	/*
   1563	 * In fact the whole prepare operation should already be serialised by
   1564	 * irq_domain_mutex further up the callchain, but that's pretty subtle
   1565	 * on its own, so consider this locking as failsafe documentation...
   1566	 */
   1567	mutex_lock(&msi_prepare_lock);
   1568	msi_page = iommu_dma_get_msi_page(dev, msi_addr, domain);
   1569	mutex_unlock(&msi_prepare_lock);
   1570
   1571	msi_desc_set_iommu_cookie(desc, msi_page);
   1572
   1573	if (!msi_page)
   1574		return -ENOMEM;
   1575	return 0;
   1576}
   1577
   1578void iommu_dma_compose_msi_msg(struct msi_desc *desc,
   1579			       struct msi_msg *msg)
   1580{
   1581	struct device *dev = msi_desc_to_dev(desc);
   1582	const struct iommu_domain *domain = iommu_get_domain_for_dev(dev);
   1583	const struct iommu_dma_msi_page *msi_page;
   1584
   1585	msi_page = msi_desc_get_iommu_cookie(desc);
   1586
   1587	if (!domain || !domain->iova_cookie || WARN_ON(!msi_page))
   1588		return;
   1589
   1590	msg->address_hi = upper_32_bits(msi_page->iova);
   1591	msg->address_lo &= cookie_msi_granule(domain->iova_cookie) - 1;
   1592	msg->address_lo += lower_32_bits(msi_page->iova);
   1593}
   1594
   1595static int iommu_dma_init(void)
   1596{
   1597	if (is_kdump_kernel())
   1598		static_branch_enable(&iommu_deferred_attach_enabled);
   1599
   1600	return iova_cache_get();
   1601}
   1602arch_initcall(iommu_dma_init);