cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

iova.c (24666B)


      1// SPDX-License-Identifier: GPL-2.0-only
      2/*
      3 * Copyright © 2006-2009, Intel Corporation.
      4 *
      5 * Author: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
      6 */
      7
      8#include <linux/iova.h>
      9#include <linux/module.h>
     10#include <linux/slab.h>
     11#include <linux/smp.h>
     12#include <linux/bitops.h>
     13#include <linux/cpu.h>
     14
     15/* The anchor node sits above the top of the usable address space */
     16#define IOVA_ANCHOR	~0UL
     17
     18#define IOVA_RANGE_CACHE_MAX_SIZE 6	/* log of max cached IOVA range size (in pages) */
     19
     20static bool iova_rcache_insert(struct iova_domain *iovad,
     21			       unsigned long pfn,
     22			       unsigned long size);
     23static unsigned long iova_rcache_get(struct iova_domain *iovad,
     24				     unsigned long size,
     25				     unsigned long limit_pfn);
     26static void free_cpu_cached_iovas(unsigned int cpu, struct iova_domain *iovad);
     27static void free_iova_rcaches(struct iova_domain *iovad);
     28
     29static int iova_cpuhp_dead(unsigned int cpu, struct hlist_node *node)
     30{
     31	struct iova_domain *iovad;
     32
     33	iovad = hlist_entry_safe(node, struct iova_domain, cpuhp_dead);
     34
     35	free_cpu_cached_iovas(cpu, iovad);
     36	return 0;
     37}
     38
     39static void free_global_cached_iovas(struct iova_domain *iovad);
     40
     41static struct iova *to_iova(struct rb_node *node)
     42{
     43	return rb_entry(node, struct iova, node);
     44}
     45
     46void
     47init_iova_domain(struct iova_domain *iovad, unsigned long granule,
     48	unsigned long start_pfn)
     49{
     50	/*
     51	 * IOVA granularity will normally be equal to the smallest
     52	 * supported IOMMU page size; both *must* be capable of
     53	 * representing individual CPU pages exactly.
     54	 */
     55	BUG_ON((granule > PAGE_SIZE) || !is_power_of_2(granule));
     56
     57	spin_lock_init(&iovad->iova_rbtree_lock);
     58	iovad->rbroot = RB_ROOT;
     59	iovad->cached_node = &iovad->anchor.node;
     60	iovad->cached32_node = &iovad->anchor.node;
     61	iovad->granule = granule;
     62	iovad->start_pfn = start_pfn;
     63	iovad->dma_32bit_pfn = 1UL << (32 - iova_shift(iovad));
     64	iovad->max32_alloc_size = iovad->dma_32bit_pfn;
     65	iovad->anchor.pfn_lo = iovad->anchor.pfn_hi = IOVA_ANCHOR;
     66	rb_link_node(&iovad->anchor.node, NULL, &iovad->rbroot.rb_node);
     67	rb_insert_color(&iovad->anchor.node, &iovad->rbroot);
     68}
     69EXPORT_SYMBOL_GPL(init_iova_domain);
     70
     71static struct rb_node *
     72__get_cached_rbnode(struct iova_domain *iovad, unsigned long limit_pfn)
     73{
     74	if (limit_pfn <= iovad->dma_32bit_pfn)
     75		return iovad->cached32_node;
     76
     77	return iovad->cached_node;
     78}
     79
     80static void
     81__cached_rbnode_insert_update(struct iova_domain *iovad, struct iova *new)
     82{
     83	if (new->pfn_hi < iovad->dma_32bit_pfn)
     84		iovad->cached32_node = &new->node;
     85	else
     86		iovad->cached_node = &new->node;
     87}
     88
     89static void
     90__cached_rbnode_delete_update(struct iova_domain *iovad, struct iova *free)
     91{
     92	struct iova *cached_iova;
     93
     94	cached_iova = to_iova(iovad->cached32_node);
     95	if (free == cached_iova ||
     96	    (free->pfn_hi < iovad->dma_32bit_pfn &&
     97	     free->pfn_lo >= cached_iova->pfn_lo))
     98		iovad->cached32_node = rb_next(&free->node);
     99
    100	if (free->pfn_lo < iovad->dma_32bit_pfn)
    101		iovad->max32_alloc_size = iovad->dma_32bit_pfn;
    102
    103	cached_iova = to_iova(iovad->cached_node);
    104	if (free->pfn_lo >= cached_iova->pfn_lo)
    105		iovad->cached_node = rb_next(&free->node);
    106}
    107
    108static struct rb_node *iova_find_limit(struct iova_domain *iovad, unsigned long limit_pfn)
    109{
    110	struct rb_node *node, *next;
    111	/*
    112	 * Ideally what we'd like to judge here is whether limit_pfn is close
    113	 * enough to the highest-allocated IOVA that starting the allocation
    114	 * walk from the anchor node will be quicker than this initial work to
    115	 * find an exact starting point (especially if that ends up being the
    116	 * anchor node anyway). This is an incredibly crude approximation which
    117	 * only really helps the most likely case, but is at least trivially easy.
    118	 */
    119	if (limit_pfn > iovad->dma_32bit_pfn)
    120		return &iovad->anchor.node;
    121
    122	node = iovad->rbroot.rb_node;
    123	while (to_iova(node)->pfn_hi < limit_pfn)
    124		node = node->rb_right;
    125
    126search_left:
    127	while (node->rb_left && to_iova(node->rb_left)->pfn_lo >= limit_pfn)
    128		node = node->rb_left;
    129
    130	if (!node->rb_left)
    131		return node;
    132
    133	next = node->rb_left;
    134	while (next->rb_right) {
    135		next = next->rb_right;
    136		if (to_iova(next)->pfn_lo >= limit_pfn) {
    137			node = next;
    138			goto search_left;
    139		}
    140	}
    141
    142	return node;
    143}
    144
    145/* Insert the iova into domain rbtree by holding writer lock */
    146static void
    147iova_insert_rbtree(struct rb_root *root, struct iova *iova,
    148		   struct rb_node *start)
    149{
    150	struct rb_node **new, *parent = NULL;
    151
    152	new = (start) ? &start : &(root->rb_node);
    153	/* Figure out where to put new node */
    154	while (*new) {
    155		struct iova *this = to_iova(*new);
    156
    157		parent = *new;
    158
    159		if (iova->pfn_lo < this->pfn_lo)
    160			new = &((*new)->rb_left);
    161		else if (iova->pfn_lo > this->pfn_lo)
    162			new = &((*new)->rb_right);
    163		else {
    164			WARN_ON(1); /* this should not happen */
    165			return;
    166		}
    167	}
    168	/* Add new node and rebalance tree. */
    169	rb_link_node(&iova->node, parent, new);
    170	rb_insert_color(&iova->node, root);
    171}
    172
    173static int __alloc_and_insert_iova_range(struct iova_domain *iovad,
    174		unsigned long size, unsigned long limit_pfn,
    175			struct iova *new, bool size_aligned)
    176{
    177	struct rb_node *curr, *prev;
    178	struct iova *curr_iova;
    179	unsigned long flags;
    180	unsigned long new_pfn, retry_pfn;
    181	unsigned long align_mask = ~0UL;
    182	unsigned long high_pfn = limit_pfn, low_pfn = iovad->start_pfn;
    183
    184	if (size_aligned)
    185		align_mask <<= fls_long(size - 1);
    186
    187	/* Walk the tree backwards */
    188	spin_lock_irqsave(&iovad->iova_rbtree_lock, flags);
    189	if (limit_pfn <= iovad->dma_32bit_pfn &&
    190			size >= iovad->max32_alloc_size)
    191		goto iova32_full;
    192
    193	curr = __get_cached_rbnode(iovad, limit_pfn);
    194	curr_iova = to_iova(curr);
    195	retry_pfn = curr_iova->pfn_hi + 1;
    196
    197retry:
    198	do {
    199		high_pfn = min(high_pfn, curr_iova->pfn_lo);
    200		new_pfn = (high_pfn - size) & align_mask;
    201		prev = curr;
    202		curr = rb_prev(curr);
    203		curr_iova = to_iova(curr);
    204	} while (curr && new_pfn <= curr_iova->pfn_hi && new_pfn >= low_pfn);
    205
    206	if (high_pfn < size || new_pfn < low_pfn) {
    207		if (low_pfn == iovad->start_pfn && retry_pfn < limit_pfn) {
    208			high_pfn = limit_pfn;
    209			low_pfn = retry_pfn;
    210			curr = iova_find_limit(iovad, limit_pfn);
    211			curr_iova = to_iova(curr);
    212			goto retry;
    213		}
    214		iovad->max32_alloc_size = size;
    215		goto iova32_full;
    216	}
    217
    218	/* pfn_lo will point to size aligned address if size_aligned is set */
    219	new->pfn_lo = new_pfn;
    220	new->pfn_hi = new->pfn_lo + size - 1;
    221
    222	/* If we have 'prev', it's a valid place to start the insertion. */
    223	iova_insert_rbtree(&iovad->rbroot, new, prev);
    224	__cached_rbnode_insert_update(iovad, new);
    225
    226	spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags);
    227	return 0;
    228
    229iova32_full:
    230	spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags);
    231	return -ENOMEM;
    232}
    233
    234static struct kmem_cache *iova_cache;
    235static unsigned int iova_cache_users;
    236static DEFINE_MUTEX(iova_cache_mutex);
    237
    238static struct iova *alloc_iova_mem(void)
    239{
    240	return kmem_cache_zalloc(iova_cache, GFP_ATOMIC | __GFP_NOWARN);
    241}
    242
    243static void free_iova_mem(struct iova *iova)
    244{
    245	if (iova->pfn_lo != IOVA_ANCHOR)
    246		kmem_cache_free(iova_cache, iova);
    247}
    248
    249int iova_cache_get(void)
    250{
    251	mutex_lock(&iova_cache_mutex);
    252	if (!iova_cache_users) {
    253		int ret;
    254
    255		ret = cpuhp_setup_state_multi(CPUHP_IOMMU_IOVA_DEAD, "iommu/iova:dead", NULL,
    256					iova_cpuhp_dead);
    257		if (ret) {
    258			mutex_unlock(&iova_cache_mutex);
    259			pr_err("Couldn't register cpuhp handler\n");
    260			return ret;
    261		}
    262
    263		iova_cache = kmem_cache_create(
    264			"iommu_iova", sizeof(struct iova), 0,
    265			SLAB_HWCACHE_ALIGN, NULL);
    266		if (!iova_cache) {
    267			cpuhp_remove_multi_state(CPUHP_IOMMU_IOVA_DEAD);
    268			mutex_unlock(&iova_cache_mutex);
    269			pr_err("Couldn't create iova cache\n");
    270			return -ENOMEM;
    271		}
    272	}
    273
    274	iova_cache_users++;
    275	mutex_unlock(&iova_cache_mutex);
    276
    277	return 0;
    278}
    279EXPORT_SYMBOL_GPL(iova_cache_get);
    280
    281void iova_cache_put(void)
    282{
    283	mutex_lock(&iova_cache_mutex);
    284	if (WARN_ON(!iova_cache_users)) {
    285		mutex_unlock(&iova_cache_mutex);
    286		return;
    287	}
    288	iova_cache_users--;
    289	if (!iova_cache_users) {
    290		cpuhp_remove_multi_state(CPUHP_IOMMU_IOVA_DEAD);
    291		kmem_cache_destroy(iova_cache);
    292	}
    293	mutex_unlock(&iova_cache_mutex);
    294}
    295EXPORT_SYMBOL_GPL(iova_cache_put);
    296
    297/**
    298 * alloc_iova - allocates an iova
    299 * @iovad: - iova domain in question
    300 * @size: - size of page frames to allocate
    301 * @limit_pfn: - max limit address
    302 * @size_aligned: - set if size_aligned address range is required
    303 * This function allocates an iova in the range iovad->start_pfn to limit_pfn,
    304 * searching top-down from limit_pfn to iovad->start_pfn. If the size_aligned
    305 * flag is set then the allocated address iova->pfn_lo will be naturally
    306 * aligned on roundup_power_of_two(size).
    307 */
    308struct iova *
    309alloc_iova(struct iova_domain *iovad, unsigned long size,
    310	unsigned long limit_pfn,
    311	bool size_aligned)
    312{
    313	struct iova *new_iova;
    314	int ret;
    315
    316	new_iova = alloc_iova_mem();
    317	if (!new_iova)
    318		return NULL;
    319
    320	ret = __alloc_and_insert_iova_range(iovad, size, limit_pfn + 1,
    321			new_iova, size_aligned);
    322
    323	if (ret) {
    324		free_iova_mem(new_iova);
    325		return NULL;
    326	}
    327
    328	return new_iova;
    329}
    330EXPORT_SYMBOL_GPL(alloc_iova);
    331
    332static struct iova *
    333private_find_iova(struct iova_domain *iovad, unsigned long pfn)
    334{
    335	struct rb_node *node = iovad->rbroot.rb_node;
    336
    337	assert_spin_locked(&iovad->iova_rbtree_lock);
    338
    339	while (node) {
    340		struct iova *iova = to_iova(node);
    341
    342		if (pfn < iova->pfn_lo)
    343			node = node->rb_left;
    344		else if (pfn > iova->pfn_hi)
    345			node = node->rb_right;
    346		else
    347			return iova;	/* pfn falls within iova's range */
    348	}
    349
    350	return NULL;
    351}
    352
    353static void remove_iova(struct iova_domain *iovad, struct iova *iova)
    354{
    355	assert_spin_locked(&iovad->iova_rbtree_lock);
    356	__cached_rbnode_delete_update(iovad, iova);
    357	rb_erase(&iova->node, &iovad->rbroot);
    358}
    359
    360/**
    361 * find_iova - finds an iova for a given pfn
    362 * @iovad: - iova domain in question.
    363 * @pfn: - page frame number
    364 * This function finds and returns an iova belonging to the
    365 * given domain which matches the given pfn.
    366 */
    367struct iova *find_iova(struct iova_domain *iovad, unsigned long pfn)
    368{
    369	unsigned long flags;
    370	struct iova *iova;
    371
    372	/* Take the lock so that no other thread is manipulating the rbtree */
    373	spin_lock_irqsave(&iovad->iova_rbtree_lock, flags);
    374	iova = private_find_iova(iovad, pfn);
    375	spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags);
    376	return iova;
    377}
    378EXPORT_SYMBOL_GPL(find_iova);
    379
    380/**
    381 * __free_iova - frees the given iova
    382 * @iovad: iova domain in question.
    383 * @iova: iova in question.
    384 * Frees the given iova belonging to the giving domain
    385 */
    386void
    387__free_iova(struct iova_domain *iovad, struct iova *iova)
    388{
    389	unsigned long flags;
    390
    391	spin_lock_irqsave(&iovad->iova_rbtree_lock, flags);
    392	remove_iova(iovad, iova);
    393	spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags);
    394	free_iova_mem(iova);
    395}
    396EXPORT_SYMBOL_GPL(__free_iova);
    397
    398/**
    399 * free_iova - finds and frees the iova for a given pfn
    400 * @iovad: - iova domain in question.
    401 * @pfn: - pfn that is allocated previously
    402 * This functions finds an iova for a given pfn and then
    403 * frees the iova from that domain.
    404 */
    405void
    406free_iova(struct iova_domain *iovad, unsigned long pfn)
    407{
    408	unsigned long flags;
    409	struct iova *iova;
    410
    411	spin_lock_irqsave(&iovad->iova_rbtree_lock, flags);
    412	iova = private_find_iova(iovad, pfn);
    413	if (!iova) {
    414		spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags);
    415		return;
    416	}
    417	remove_iova(iovad, iova);
    418	spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags);
    419	free_iova_mem(iova);
    420}
    421EXPORT_SYMBOL_GPL(free_iova);
    422
    423/**
    424 * alloc_iova_fast - allocates an iova from rcache
    425 * @iovad: - iova domain in question
    426 * @size: - size of page frames to allocate
    427 * @limit_pfn: - max limit address
    428 * @flush_rcache: - set to flush rcache on regular allocation failure
    429 * This function tries to satisfy an iova allocation from the rcache,
    430 * and falls back to regular allocation on failure. If regular allocation
    431 * fails too and the flush_rcache flag is set then the rcache will be flushed.
    432*/
    433unsigned long
    434alloc_iova_fast(struct iova_domain *iovad, unsigned long size,
    435		unsigned long limit_pfn, bool flush_rcache)
    436{
    437	unsigned long iova_pfn;
    438	struct iova *new_iova;
    439
    440	/*
    441	 * Freeing non-power-of-two-sized allocations back into the IOVA caches
    442	 * will come back to bite us badly, so we have to waste a bit of space
    443	 * rounding up anything cacheable to make sure that can't happen. The
    444	 * order of the unadjusted size will still match upon freeing.
    445	 */
    446	if (size < (1 << (IOVA_RANGE_CACHE_MAX_SIZE - 1)))
    447		size = roundup_pow_of_two(size);
    448
    449	iova_pfn = iova_rcache_get(iovad, size, limit_pfn + 1);
    450	if (iova_pfn)
    451		return iova_pfn;
    452
    453retry:
    454	new_iova = alloc_iova(iovad, size, limit_pfn, true);
    455	if (!new_iova) {
    456		unsigned int cpu;
    457
    458		if (!flush_rcache)
    459			return 0;
    460
    461		/* Try replenishing IOVAs by flushing rcache. */
    462		flush_rcache = false;
    463		for_each_online_cpu(cpu)
    464			free_cpu_cached_iovas(cpu, iovad);
    465		free_global_cached_iovas(iovad);
    466		goto retry;
    467	}
    468
    469	return new_iova->pfn_lo;
    470}
    471EXPORT_SYMBOL_GPL(alloc_iova_fast);
    472
    473/**
    474 * free_iova_fast - free iova pfn range into rcache
    475 * @iovad: - iova domain in question.
    476 * @pfn: - pfn that is allocated previously
    477 * @size: - # of pages in range
    478 * This functions frees an iova range by trying to put it into the rcache,
    479 * falling back to regular iova deallocation via free_iova() if this fails.
    480 */
    481void
    482free_iova_fast(struct iova_domain *iovad, unsigned long pfn, unsigned long size)
    483{
    484	if (iova_rcache_insert(iovad, pfn, size))
    485		return;
    486
    487	free_iova(iovad, pfn);
    488}
    489EXPORT_SYMBOL_GPL(free_iova_fast);
    490
    491static void iova_domain_free_rcaches(struct iova_domain *iovad)
    492{
    493	cpuhp_state_remove_instance_nocalls(CPUHP_IOMMU_IOVA_DEAD,
    494					    &iovad->cpuhp_dead);
    495	free_iova_rcaches(iovad);
    496}
    497
    498/**
    499 * put_iova_domain - destroys the iova domain
    500 * @iovad: - iova domain in question.
    501 * All the iova's in that domain are destroyed.
    502 */
    503void put_iova_domain(struct iova_domain *iovad)
    504{
    505	struct iova *iova, *tmp;
    506
    507	if (iovad->rcaches)
    508		iova_domain_free_rcaches(iovad);
    509
    510	rbtree_postorder_for_each_entry_safe(iova, tmp, &iovad->rbroot, node)
    511		free_iova_mem(iova);
    512}
    513EXPORT_SYMBOL_GPL(put_iova_domain);
    514
    515static int
    516__is_range_overlap(struct rb_node *node,
    517	unsigned long pfn_lo, unsigned long pfn_hi)
    518{
    519	struct iova *iova = to_iova(node);
    520
    521	if ((pfn_lo <= iova->pfn_hi) && (pfn_hi >= iova->pfn_lo))
    522		return 1;
    523	return 0;
    524}
    525
    526static inline struct iova *
    527alloc_and_init_iova(unsigned long pfn_lo, unsigned long pfn_hi)
    528{
    529	struct iova *iova;
    530
    531	iova = alloc_iova_mem();
    532	if (iova) {
    533		iova->pfn_lo = pfn_lo;
    534		iova->pfn_hi = pfn_hi;
    535	}
    536
    537	return iova;
    538}
    539
    540static struct iova *
    541__insert_new_range(struct iova_domain *iovad,
    542	unsigned long pfn_lo, unsigned long pfn_hi)
    543{
    544	struct iova *iova;
    545
    546	iova = alloc_and_init_iova(pfn_lo, pfn_hi);
    547	if (iova)
    548		iova_insert_rbtree(&iovad->rbroot, iova, NULL);
    549
    550	return iova;
    551}
    552
    553static void
    554__adjust_overlap_range(struct iova *iova,
    555	unsigned long *pfn_lo, unsigned long *pfn_hi)
    556{
    557	if (*pfn_lo < iova->pfn_lo)
    558		iova->pfn_lo = *pfn_lo;
    559	if (*pfn_hi > iova->pfn_hi)
    560		*pfn_lo = iova->pfn_hi + 1;
    561}
    562
    563/**
    564 * reserve_iova - reserves an iova in the given range
    565 * @iovad: - iova domain pointer
    566 * @pfn_lo: - lower page frame address
    567 * @pfn_hi:- higher pfn adderss
    568 * This function allocates reserves the address range from pfn_lo to pfn_hi so
    569 * that this address is not dished out as part of alloc_iova.
    570 */
    571struct iova *
    572reserve_iova(struct iova_domain *iovad,
    573	unsigned long pfn_lo, unsigned long pfn_hi)
    574{
    575	struct rb_node *node;
    576	unsigned long flags;
    577	struct iova *iova;
    578	unsigned int overlap = 0;
    579
    580	/* Don't allow nonsensical pfns */
    581	if (WARN_ON((pfn_hi | pfn_lo) > (ULLONG_MAX >> iova_shift(iovad))))
    582		return NULL;
    583
    584	spin_lock_irqsave(&iovad->iova_rbtree_lock, flags);
    585	for (node = rb_first(&iovad->rbroot); node; node = rb_next(node)) {
    586		if (__is_range_overlap(node, pfn_lo, pfn_hi)) {
    587			iova = to_iova(node);
    588			__adjust_overlap_range(iova, &pfn_lo, &pfn_hi);
    589			if ((pfn_lo >= iova->pfn_lo) &&
    590				(pfn_hi <= iova->pfn_hi))
    591				goto finish;
    592			overlap = 1;
    593
    594		} else if (overlap)
    595				break;
    596	}
    597
    598	/* We are here either because this is the first reserver node
    599	 * or need to insert remaining non overlap addr range
    600	 */
    601	iova = __insert_new_range(iovad, pfn_lo, pfn_hi);
    602finish:
    603
    604	spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags);
    605	return iova;
    606}
    607EXPORT_SYMBOL_GPL(reserve_iova);
    608
    609/*
    610 * Magazine caches for IOVA ranges.  For an introduction to magazines,
    611 * see the USENIX 2001 paper "Magazines and Vmem: Extending the Slab
    612 * Allocator to Many CPUs and Arbitrary Resources" by Bonwick and Adams.
    613 * For simplicity, we use a static magazine size and don't implement the
    614 * dynamic size tuning described in the paper.
    615 */
    616
    617#define IOVA_MAG_SIZE 128
    618#define MAX_GLOBAL_MAGS 32	/* magazines per bin */
    619
    620struct iova_magazine {
    621	unsigned long size;
    622	unsigned long pfns[IOVA_MAG_SIZE];
    623};
    624
    625struct iova_cpu_rcache {
    626	spinlock_t lock;
    627	struct iova_magazine *loaded;
    628	struct iova_magazine *prev;
    629};
    630
    631struct iova_rcache {
    632	spinlock_t lock;
    633	unsigned long depot_size;
    634	struct iova_magazine *depot[MAX_GLOBAL_MAGS];
    635	struct iova_cpu_rcache __percpu *cpu_rcaches;
    636};
    637
    638static struct iova_magazine *iova_magazine_alloc(gfp_t flags)
    639{
    640	return kzalloc(sizeof(struct iova_magazine), flags);
    641}
    642
    643static void iova_magazine_free(struct iova_magazine *mag)
    644{
    645	kfree(mag);
    646}
    647
    648static void
    649iova_magazine_free_pfns(struct iova_magazine *mag, struct iova_domain *iovad)
    650{
    651	unsigned long flags;
    652	int i;
    653
    654	if (!mag)
    655		return;
    656
    657	spin_lock_irqsave(&iovad->iova_rbtree_lock, flags);
    658
    659	for (i = 0 ; i < mag->size; ++i) {
    660		struct iova *iova = private_find_iova(iovad, mag->pfns[i]);
    661
    662		if (WARN_ON(!iova))
    663			continue;
    664
    665		remove_iova(iovad, iova);
    666		free_iova_mem(iova);
    667	}
    668
    669	spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags);
    670
    671	mag->size = 0;
    672}
    673
    674static bool iova_magazine_full(struct iova_magazine *mag)
    675{
    676	return (mag && mag->size == IOVA_MAG_SIZE);
    677}
    678
    679static bool iova_magazine_empty(struct iova_magazine *mag)
    680{
    681	return (!mag || mag->size == 0);
    682}
    683
    684static unsigned long iova_magazine_pop(struct iova_magazine *mag,
    685				       unsigned long limit_pfn)
    686{
    687	int i;
    688	unsigned long pfn;
    689
    690	BUG_ON(iova_magazine_empty(mag));
    691
    692	/* Only fall back to the rbtree if we have no suitable pfns at all */
    693	for (i = mag->size - 1; mag->pfns[i] > limit_pfn; i--)
    694		if (i == 0)
    695			return 0;
    696
    697	/* Swap it to pop it */
    698	pfn = mag->pfns[i];
    699	mag->pfns[i] = mag->pfns[--mag->size];
    700
    701	return pfn;
    702}
    703
    704static void iova_magazine_push(struct iova_magazine *mag, unsigned long pfn)
    705{
    706	BUG_ON(iova_magazine_full(mag));
    707
    708	mag->pfns[mag->size++] = pfn;
    709}
    710
    711int iova_domain_init_rcaches(struct iova_domain *iovad)
    712{
    713	unsigned int cpu;
    714	int i, ret;
    715
    716	iovad->rcaches = kcalloc(IOVA_RANGE_CACHE_MAX_SIZE,
    717				 sizeof(struct iova_rcache),
    718				 GFP_KERNEL);
    719	if (!iovad->rcaches)
    720		return -ENOMEM;
    721
    722	for (i = 0; i < IOVA_RANGE_CACHE_MAX_SIZE; ++i) {
    723		struct iova_cpu_rcache *cpu_rcache;
    724		struct iova_rcache *rcache;
    725
    726		rcache = &iovad->rcaches[i];
    727		spin_lock_init(&rcache->lock);
    728		rcache->depot_size = 0;
    729		rcache->cpu_rcaches = __alloc_percpu(sizeof(*cpu_rcache),
    730						     cache_line_size());
    731		if (!rcache->cpu_rcaches) {
    732			ret = -ENOMEM;
    733			goto out_err;
    734		}
    735		for_each_possible_cpu(cpu) {
    736			cpu_rcache = per_cpu_ptr(rcache->cpu_rcaches, cpu);
    737
    738			spin_lock_init(&cpu_rcache->lock);
    739			cpu_rcache->loaded = iova_magazine_alloc(GFP_KERNEL);
    740			cpu_rcache->prev = iova_magazine_alloc(GFP_KERNEL);
    741			if (!cpu_rcache->loaded || !cpu_rcache->prev) {
    742				ret = -ENOMEM;
    743				goto out_err;
    744			}
    745		}
    746	}
    747
    748	ret = cpuhp_state_add_instance_nocalls(CPUHP_IOMMU_IOVA_DEAD,
    749					       &iovad->cpuhp_dead);
    750	if (ret)
    751		goto out_err;
    752	return 0;
    753
    754out_err:
    755	free_iova_rcaches(iovad);
    756	return ret;
    757}
    758EXPORT_SYMBOL_GPL(iova_domain_init_rcaches);
    759
    760/*
    761 * Try inserting IOVA range starting with 'iova_pfn' into 'rcache', and
    762 * return true on success.  Can fail if rcache is full and we can't free
    763 * space, and free_iova() (our only caller) will then return the IOVA
    764 * range to the rbtree instead.
    765 */
    766static bool __iova_rcache_insert(struct iova_domain *iovad,
    767				 struct iova_rcache *rcache,
    768				 unsigned long iova_pfn)
    769{
    770	struct iova_magazine *mag_to_free = NULL;
    771	struct iova_cpu_rcache *cpu_rcache;
    772	bool can_insert = false;
    773	unsigned long flags;
    774
    775	cpu_rcache = raw_cpu_ptr(rcache->cpu_rcaches);
    776	spin_lock_irqsave(&cpu_rcache->lock, flags);
    777
    778	if (!iova_magazine_full(cpu_rcache->loaded)) {
    779		can_insert = true;
    780	} else if (!iova_magazine_full(cpu_rcache->prev)) {
    781		swap(cpu_rcache->prev, cpu_rcache->loaded);
    782		can_insert = true;
    783	} else {
    784		struct iova_magazine *new_mag = iova_magazine_alloc(GFP_ATOMIC);
    785
    786		if (new_mag) {
    787			spin_lock(&rcache->lock);
    788			if (rcache->depot_size < MAX_GLOBAL_MAGS) {
    789				rcache->depot[rcache->depot_size++] =
    790						cpu_rcache->loaded;
    791			} else {
    792				mag_to_free = cpu_rcache->loaded;
    793			}
    794			spin_unlock(&rcache->lock);
    795
    796			cpu_rcache->loaded = new_mag;
    797			can_insert = true;
    798		}
    799	}
    800
    801	if (can_insert)
    802		iova_magazine_push(cpu_rcache->loaded, iova_pfn);
    803
    804	spin_unlock_irqrestore(&cpu_rcache->lock, flags);
    805
    806	if (mag_to_free) {
    807		iova_magazine_free_pfns(mag_to_free, iovad);
    808		iova_magazine_free(mag_to_free);
    809	}
    810
    811	return can_insert;
    812}
    813
    814static bool iova_rcache_insert(struct iova_domain *iovad, unsigned long pfn,
    815			       unsigned long size)
    816{
    817	unsigned int log_size = order_base_2(size);
    818
    819	if (log_size >= IOVA_RANGE_CACHE_MAX_SIZE)
    820		return false;
    821
    822	return __iova_rcache_insert(iovad, &iovad->rcaches[log_size], pfn);
    823}
    824
    825/*
    826 * Caller wants to allocate a new IOVA range from 'rcache'.  If we can
    827 * satisfy the request, return a matching non-NULL range and remove
    828 * it from the 'rcache'.
    829 */
    830static unsigned long __iova_rcache_get(struct iova_rcache *rcache,
    831				       unsigned long limit_pfn)
    832{
    833	struct iova_cpu_rcache *cpu_rcache;
    834	unsigned long iova_pfn = 0;
    835	bool has_pfn = false;
    836	unsigned long flags;
    837
    838	cpu_rcache = raw_cpu_ptr(rcache->cpu_rcaches);
    839	spin_lock_irqsave(&cpu_rcache->lock, flags);
    840
    841	if (!iova_magazine_empty(cpu_rcache->loaded)) {
    842		has_pfn = true;
    843	} else if (!iova_magazine_empty(cpu_rcache->prev)) {
    844		swap(cpu_rcache->prev, cpu_rcache->loaded);
    845		has_pfn = true;
    846	} else {
    847		spin_lock(&rcache->lock);
    848		if (rcache->depot_size > 0) {
    849			iova_magazine_free(cpu_rcache->loaded);
    850			cpu_rcache->loaded = rcache->depot[--rcache->depot_size];
    851			has_pfn = true;
    852		}
    853		spin_unlock(&rcache->lock);
    854	}
    855
    856	if (has_pfn)
    857		iova_pfn = iova_magazine_pop(cpu_rcache->loaded, limit_pfn);
    858
    859	spin_unlock_irqrestore(&cpu_rcache->lock, flags);
    860
    861	return iova_pfn;
    862}
    863
    864/*
    865 * Try to satisfy IOVA allocation range from rcache.  Fail if requested
    866 * size is too big or the DMA limit we are given isn't satisfied by the
    867 * top element in the magazine.
    868 */
    869static unsigned long iova_rcache_get(struct iova_domain *iovad,
    870				     unsigned long size,
    871				     unsigned long limit_pfn)
    872{
    873	unsigned int log_size = order_base_2(size);
    874
    875	if (log_size >= IOVA_RANGE_CACHE_MAX_SIZE || !iovad->rcaches)
    876		return 0;
    877
    878	return __iova_rcache_get(&iovad->rcaches[log_size], limit_pfn - size);
    879}
    880
    881/*
    882 * free rcache data structures.
    883 */
    884static void free_iova_rcaches(struct iova_domain *iovad)
    885{
    886	struct iova_rcache *rcache;
    887	struct iova_cpu_rcache *cpu_rcache;
    888	unsigned int cpu;
    889	int i, j;
    890
    891	for (i = 0; i < IOVA_RANGE_CACHE_MAX_SIZE; ++i) {
    892		rcache = &iovad->rcaches[i];
    893		if (!rcache->cpu_rcaches)
    894			break;
    895		for_each_possible_cpu(cpu) {
    896			cpu_rcache = per_cpu_ptr(rcache->cpu_rcaches, cpu);
    897			iova_magazine_free(cpu_rcache->loaded);
    898			iova_magazine_free(cpu_rcache->prev);
    899		}
    900		free_percpu(rcache->cpu_rcaches);
    901		for (j = 0; j < rcache->depot_size; ++j)
    902			iova_magazine_free(rcache->depot[j]);
    903	}
    904
    905	kfree(iovad->rcaches);
    906	iovad->rcaches = NULL;
    907}
    908
    909/*
    910 * free all the IOVA ranges cached by a cpu (used when cpu is unplugged)
    911 */
    912static void free_cpu_cached_iovas(unsigned int cpu, struct iova_domain *iovad)
    913{
    914	struct iova_cpu_rcache *cpu_rcache;
    915	struct iova_rcache *rcache;
    916	unsigned long flags;
    917	int i;
    918
    919	for (i = 0; i < IOVA_RANGE_CACHE_MAX_SIZE; ++i) {
    920		rcache = &iovad->rcaches[i];
    921		cpu_rcache = per_cpu_ptr(rcache->cpu_rcaches, cpu);
    922		spin_lock_irqsave(&cpu_rcache->lock, flags);
    923		iova_magazine_free_pfns(cpu_rcache->loaded, iovad);
    924		iova_magazine_free_pfns(cpu_rcache->prev, iovad);
    925		spin_unlock_irqrestore(&cpu_rcache->lock, flags);
    926	}
    927}
    928
    929/*
    930 * free all the IOVA ranges of global cache
    931 */
    932static void free_global_cached_iovas(struct iova_domain *iovad)
    933{
    934	struct iova_rcache *rcache;
    935	unsigned long flags;
    936	int i, j;
    937
    938	for (i = 0; i < IOVA_RANGE_CACHE_MAX_SIZE; ++i) {
    939		rcache = &iovad->rcaches[i];
    940		spin_lock_irqsave(&rcache->lock, flags);
    941		for (j = 0; j < rcache->depot_size; ++j) {
    942			iova_magazine_free_pfns(rcache->depot[j], iovad);
    943			iova_magazine_free(rcache->depot[j]);
    944		}
    945		rcache->depot_size = 0;
    946		spin_unlock_irqrestore(&rcache->lock, flags);
    947	}
    948}
    949MODULE_AUTHOR("Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>");
    950MODULE_LICENSE("GPL");