cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

grant-table.c (42475B)


      1/******************************************************************************
      2 * grant_table.c
      3 *
      4 * Granting foreign access to our memory reservation.
      5 *
      6 * Copyright (c) 2005-2006, Christopher Clark
      7 * Copyright (c) 2004-2005, K A Fraser
      8 *
      9 * This program is free software; you can redistribute it and/or
     10 * modify it under the terms of the GNU General Public License version 2
     11 * as published by the Free Software Foundation; or, when distributed
     12 * separately from the Linux kernel or incorporated into other
     13 * software packages, subject to the following license:
     14 *
     15 * Permission is hereby granted, free of charge, to any person obtaining a copy
     16 * of this source file (the "Software"), to deal in the Software without
     17 * restriction, including without limitation the rights to use, copy, modify,
     18 * merge, publish, distribute, sublicense, and/or sell copies of the Software,
     19 * and to permit persons to whom the Software is furnished to do so, subject to
     20 * the following conditions:
     21 *
     22 * The above copyright notice and this permission notice shall be included in
     23 * all copies or substantial portions of the Software.
     24 *
     25 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     26 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     27 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
     28 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
     29 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
     30 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
     31 * IN THE SOFTWARE.
     32 */
     33
     34#define pr_fmt(fmt) "xen:" KBUILD_MODNAME ": " fmt
     35
     36#include <linux/bitmap.h>
     37#include <linux/memblock.h>
     38#include <linux/sched.h>
     39#include <linux/mm.h>
     40#include <linux/slab.h>
     41#include <linux/vmalloc.h>
     42#include <linux/uaccess.h>
     43#include <linux/io.h>
     44#include <linux/delay.h>
     45#include <linux/hardirq.h>
     46#include <linux/workqueue.h>
     47#include <linux/ratelimit.h>
     48#include <linux/moduleparam.h>
     49#ifdef CONFIG_XEN_GRANT_DMA_ALLOC
     50#include <linux/dma-mapping.h>
     51#endif
     52
     53#include <xen/xen.h>
     54#include <xen/interface/xen.h>
     55#include <xen/page.h>
     56#include <xen/grant_table.h>
     57#include <xen/interface/memory.h>
     58#include <xen/hvc-console.h>
     59#include <xen/swiotlb-xen.h>
     60#include <xen/balloon.h>
     61#ifdef CONFIG_X86
     62#include <asm/xen/cpuid.h>
     63#endif
     64#include <xen/mem-reservation.h>
     65#include <asm/xen/hypercall.h>
     66#include <asm/xen/interface.h>
     67
     68#include <asm/sync_bitops.h>
     69
     70#define GNTTAB_LIST_END 0xffffffff
     71
     72static grant_ref_t **gnttab_list;
     73static unsigned int nr_grant_frames;
     74
     75/*
     76 * Handling of free grants:
     77 *
     78 * Free grants are in a simple list anchored in gnttab_free_head. They are
     79 * linked by grant ref, the last element contains GNTTAB_LIST_END. The number
     80 * of free entries is stored in gnttab_free_count.
     81 * Additionally there is a bitmap of free entries anchored in
     82 * gnttab_free_bitmap. This is being used for simplifying allocation of
     83 * multiple consecutive grants, which is needed e.g. for support of virtio.
     84 * gnttab_last_free is used to add free entries of new frames at the end of
     85 * the free list.
     86 * gnttab_free_tail_ptr specifies the variable which references the start
     87 * of consecutive free grants ending with gnttab_last_free. This pointer is
     88 * updated in a rather defensive way, in order to avoid performance hits in
     89 * hot paths.
     90 * All those variables are protected by gnttab_list_lock.
     91 */
     92static int gnttab_free_count;
     93static unsigned int gnttab_size;
     94static grant_ref_t gnttab_free_head = GNTTAB_LIST_END;
     95static grant_ref_t gnttab_last_free = GNTTAB_LIST_END;
     96static grant_ref_t *gnttab_free_tail_ptr;
     97static unsigned long *gnttab_free_bitmap;
     98static DEFINE_SPINLOCK(gnttab_list_lock);
     99
    100struct grant_frames xen_auto_xlat_grant_frames;
    101static unsigned int xen_gnttab_version;
    102module_param_named(version, xen_gnttab_version, uint, 0);
    103
    104static union {
    105	struct grant_entry_v1 *v1;
    106	union grant_entry_v2 *v2;
    107	void *addr;
    108} gnttab_shared;
    109
    110/*This is a structure of function pointers for grant table*/
    111struct gnttab_ops {
    112	/*
    113	 * Version of the grant interface.
    114	 */
    115	unsigned int version;
    116	/*
    117	 * Grant refs per grant frame.
    118	 */
    119	unsigned int grefs_per_grant_frame;
    120	/*
    121	 * Mapping a list of frames for storing grant entries. Frames parameter
    122	 * is used to store grant table address when grant table being setup,
    123	 * nr_gframes is the number of frames to map grant table. Returning
    124	 * GNTST_okay means success and negative value means failure.
    125	 */
    126	int (*map_frames)(xen_pfn_t *frames, unsigned int nr_gframes);
    127	/*
    128	 * Release a list of frames which are mapped in map_frames for grant
    129	 * entry status.
    130	 */
    131	void (*unmap_frames)(void);
    132	/*
    133	 * Introducing a valid entry into the grant table, granting the frame of
    134	 * this grant entry to domain for accessing. Ref
    135	 * parameter is reference of this introduced grant entry, domid is id of
    136	 * granted domain, frame is the page frame to be granted, and flags is
    137	 * status of the grant entry to be updated.
    138	 */
    139	void (*update_entry)(grant_ref_t ref, domid_t domid,
    140			     unsigned long frame, unsigned flags);
    141	/*
    142	 * Stop granting a grant entry to domain for accessing. Ref parameter is
    143	 * reference of a grant entry whose grant access will be stopped.
    144	 * If the grant entry is currently mapped for reading or writing, just
    145	 * return failure(==0) directly and don't tear down the grant access.
    146	 * Otherwise, stop grant access for this entry and return success(==1).
    147	 */
    148	int (*end_foreign_access_ref)(grant_ref_t ref);
    149	/*
    150	 * Read the frame number related to a given grant reference.
    151	 */
    152	unsigned long (*read_frame)(grant_ref_t ref);
    153};
    154
    155struct unmap_refs_callback_data {
    156	struct completion completion;
    157	int result;
    158};
    159
    160static const struct gnttab_ops *gnttab_interface;
    161
    162/* This reflects status of grant entries, so act as a global value. */
    163static grant_status_t *grstatus;
    164
    165static struct gnttab_free_callback *gnttab_free_callback_list;
    166
    167static int gnttab_expand(unsigned int req_entries);
    168
    169#define RPP (PAGE_SIZE / sizeof(grant_ref_t))
    170#define SPP (PAGE_SIZE / sizeof(grant_status_t))
    171
    172static inline grant_ref_t *__gnttab_entry(grant_ref_t entry)
    173{
    174	return &gnttab_list[(entry) / RPP][(entry) % RPP];
    175}
    176/* This can be used as an l-value */
    177#define gnttab_entry(entry) (*__gnttab_entry(entry))
    178
    179static int get_free_entries(unsigned count)
    180{
    181	unsigned long flags;
    182	int ref, rc = 0;
    183	grant_ref_t head;
    184
    185	spin_lock_irqsave(&gnttab_list_lock, flags);
    186
    187	if ((gnttab_free_count < count) &&
    188	    ((rc = gnttab_expand(count - gnttab_free_count)) < 0)) {
    189		spin_unlock_irqrestore(&gnttab_list_lock, flags);
    190		return rc;
    191	}
    192
    193	ref = head = gnttab_free_head;
    194	gnttab_free_count -= count;
    195	while (count--) {
    196		bitmap_clear(gnttab_free_bitmap, head, 1);
    197		if (gnttab_free_tail_ptr == __gnttab_entry(head))
    198			gnttab_free_tail_ptr = &gnttab_free_head;
    199		if (count)
    200			head = gnttab_entry(head);
    201	}
    202	gnttab_free_head = gnttab_entry(head);
    203	gnttab_entry(head) = GNTTAB_LIST_END;
    204
    205	if (!gnttab_free_count) {
    206		gnttab_last_free = GNTTAB_LIST_END;
    207		gnttab_free_tail_ptr = NULL;
    208	}
    209
    210	spin_unlock_irqrestore(&gnttab_list_lock, flags);
    211
    212	return ref;
    213}
    214
    215static int get_seq_entry_count(void)
    216{
    217	if (gnttab_last_free == GNTTAB_LIST_END || !gnttab_free_tail_ptr ||
    218	    *gnttab_free_tail_ptr == GNTTAB_LIST_END)
    219		return 0;
    220
    221	return gnttab_last_free - *gnttab_free_tail_ptr + 1;
    222}
    223
    224/* Rebuilds the free grant list and tries to find count consecutive entries. */
    225static int get_free_seq(unsigned int count)
    226{
    227	int ret = -ENOSPC;
    228	unsigned int from, to;
    229	grant_ref_t *last;
    230
    231	gnttab_free_tail_ptr = &gnttab_free_head;
    232	last = &gnttab_free_head;
    233
    234	for (from = find_first_bit(gnttab_free_bitmap, gnttab_size);
    235	     from < gnttab_size;
    236	     from = find_next_bit(gnttab_free_bitmap, gnttab_size, to + 1)) {
    237		to = find_next_zero_bit(gnttab_free_bitmap, gnttab_size,
    238					from + 1);
    239		if (ret < 0 && to - from >= count) {
    240			ret = from;
    241			bitmap_clear(gnttab_free_bitmap, ret, count);
    242			from += count;
    243			gnttab_free_count -= count;
    244			if (from == to)
    245				continue;
    246		}
    247
    248		/*
    249		 * Recreate the free list in order to have it properly sorted.
    250		 * This is needed to make sure that the free tail has the maximum
    251		 * possible size.
    252		 */
    253		while (from < to) {
    254			*last = from;
    255			last = __gnttab_entry(from);
    256			gnttab_last_free = from;
    257			from++;
    258		}
    259		if (to < gnttab_size)
    260			gnttab_free_tail_ptr = __gnttab_entry(to - 1);
    261	}
    262
    263	*last = GNTTAB_LIST_END;
    264	if (gnttab_last_free != gnttab_size - 1)
    265		gnttab_free_tail_ptr = NULL;
    266
    267	return ret;
    268}
    269
    270static int get_free_entries_seq(unsigned int count)
    271{
    272	unsigned long flags;
    273	int ret = 0;
    274
    275	spin_lock_irqsave(&gnttab_list_lock, flags);
    276
    277	if (gnttab_free_count < count) {
    278		ret = gnttab_expand(count - gnttab_free_count);
    279		if (ret < 0)
    280			goto out;
    281	}
    282
    283	if (get_seq_entry_count() < count) {
    284		ret = get_free_seq(count);
    285		if (ret >= 0)
    286			goto out;
    287		ret = gnttab_expand(count - get_seq_entry_count());
    288		if (ret < 0)
    289			goto out;
    290	}
    291
    292	ret = *gnttab_free_tail_ptr;
    293	*gnttab_free_tail_ptr = gnttab_entry(ret + count - 1);
    294	gnttab_free_count -= count;
    295	if (!gnttab_free_count)
    296		gnttab_free_tail_ptr = NULL;
    297	bitmap_clear(gnttab_free_bitmap, ret, count);
    298
    299 out:
    300	spin_unlock_irqrestore(&gnttab_list_lock, flags);
    301
    302	return ret;
    303}
    304
    305static void do_free_callbacks(void)
    306{
    307	struct gnttab_free_callback *callback, *next;
    308
    309	callback = gnttab_free_callback_list;
    310	gnttab_free_callback_list = NULL;
    311
    312	while (callback != NULL) {
    313		next = callback->next;
    314		if (gnttab_free_count >= callback->count) {
    315			callback->next = NULL;
    316			callback->fn(callback->arg);
    317		} else {
    318			callback->next = gnttab_free_callback_list;
    319			gnttab_free_callback_list = callback;
    320		}
    321		callback = next;
    322	}
    323}
    324
    325static inline void check_free_callbacks(void)
    326{
    327	if (unlikely(gnttab_free_callback_list))
    328		do_free_callbacks();
    329}
    330
    331static void put_free_entry_locked(grant_ref_t ref)
    332{
    333	if (unlikely(ref < GNTTAB_NR_RESERVED_ENTRIES))
    334		return;
    335
    336	gnttab_entry(ref) = gnttab_free_head;
    337	gnttab_free_head = ref;
    338	if (!gnttab_free_count)
    339		gnttab_last_free = ref;
    340	if (gnttab_free_tail_ptr == &gnttab_free_head)
    341		gnttab_free_tail_ptr = __gnttab_entry(ref);
    342	gnttab_free_count++;
    343	bitmap_set(gnttab_free_bitmap, ref, 1);
    344}
    345
    346static void put_free_entry(grant_ref_t ref)
    347{
    348	unsigned long flags;
    349
    350	spin_lock_irqsave(&gnttab_list_lock, flags);
    351	put_free_entry_locked(ref);
    352	check_free_callbacks();
    353	spin_unlock_irqrestore(&gnttab_list_lock, flags);
    354}
    355
    356static void gnttab_set_free(unsigned int start, unsigned int n)
    357{
    358	unsigned int i;
    359
    360	for (i = start; i < start + n - 1; i++)
    361		gnttab_entry(i) = i + 1;
    362
    363	gnttab_entry(i) = GNTTAB_LIST_END;
    364	if (!gnttab_free_count) {
    365		gnttab_free_head = start;
    366		gnttab_free_tail_ptr = &gnttab_free_head;
    367	} else {
    368		gnttab_entry(gnttab_last_free) = start;
    369	}
    370	gnttab_free_count += n;
    371	gnttab_last_free = i;
    372
    373	bitmap_set(gnttab_free_bitmap, start, n);
    374}
    375
    376/*
    377 * Following applies to gnttab_update_entry_v1 and gnttab_update_entry_v2.
    378 * Introducing a valid entry into the grant table:
    379 *  1. Write ent->domid.
    380 *  2. Write ent->frame: Frame to which access is permitted.
    381 *  3. Write memory barrier (WMB).
    382 *  4. Write ent->flags, inc. valid type.
    383 */
    384static void gnttab_update_entry_v1(grant_ref_t ref, domid_t domid,
    385				   unsigned long frame, unsigned flags)
    386{
    387	gnttab_shared.v1[ref].domid = domid;
    388	gnttab_shared.v1[ref].frame = frame;
    389	wmb();
    390	gnttab_shared.v1[ref].flags = flags;
    391}
    392
    393static void gnttab_update_entry_v2(grant_ref_t ref, domid_t domid,
    394				   unsigned long frame, unsigned int flags)
    395{
    396	gnttab_shared.v2[ref].hdr.domid = domid;
    397	gnttab_shared.v2[ref].full_page.frame = frame;
    398	wmb();	/* Hypervisor concurrent accesses. */
    399	gnttab_shared.v2[ref].hdr.flags = GTF_permit_access | flags;
    400}
    401
    402/*
    403 * Public grant-issuing interface functions
    404 */
    405void gnttab_grant_foreign_access_ref(grant_ref_t ref, domid_t domid,
    406				     unsigned long frame, int readonly)
    407{
    408	gnttab_interface->update_entry(ref, domid, frame,
    409			   GTF_permit_access | (readonly ? GTF_readonly : 0));
    410}
    411EXPORT_SYMBOL_GPL(gnttab_grant_foreign_access_ref);
    412
    413int gnttab_grant_foreign_access(domid_t domid, unsigned long frame,
    414				int readonly)
    415{
    416	int ref;
    417
    418	ref = get_free_entries(1);
    419	if (unlikely(ref < 0))
    420		return -ENOSPC;
    421
    422	gnttab_grant_foreign_access_ref(ref, domid, frame, readonly);
    423
    424	return ref;
    425}
    426EXPORT_SYMBOL_GPL(gnttab_grant_foreign_access);
    427
    428static int gnttab_end_foreign_access_ref_v1(grant_ref_t ref)
    429{
    430	u16 flags, nflags;
    431	u16 *pflags;
    432
    433	pflags = &gnttab_shared.v1[ref].flags;
    434	nflags = *pflags;
    435	do {
    436		flags = nflags;
    437		if (flags & (GTF_reading|GTF_writing))
    438			return 0;
    439	} while ((nflags = sync_cmpxchg(pflags, flags, 0)) != flags);
    440
    441	return 1;
    442}
    443
    444static int gnttab_end_foreign_access_ref_v2(grant_ref_t ref)
    445{
    446	gnttab_shared.v2[ref].hdr.flags = 0;
    447	mb();	/* Concurrent access by hypervisor. */
    448	if (grstatus[ref] & (GTF_reading|GTF_writing)) {
    449		return 0;
    450	} else {
    451		/*
    452		 * The read of grstatus needs to have acquire semantics.
    453		 *  On x86, reads already have that, and we just need to
    454		 * protect against compiler reorderings.
    455		 * On other architectures we may need a full barrier.
    456		 */
    457#ifdef CONFIG_X86
    458		barrier();
    459#else
    460		mb();
    461#endif
    462	}
    463
    464	return 1;
    465}
    466
    467static inline int _gnttab_end_foreign_access_ref(grant_ref_t ref)
    468{
    469	return gnttab_interface->end_foreign_access_ref(ref);
    470}
    471
    472int gnttab_end_foreign_access_ref(grant_ref_t ref)
    473{
    474	if (_gnttab_end_foreign_access_ref(ref))
    475		return 1;
    476	pr_warn("WARNING: g.e. %#x still in use!\n", ref);
    477	return 0;
    478}
    479EXPORT_SYMBOL_GPL(gnttab_end_foreign_access_ref);
    480
    481static unsigned long gnttab_read_frame_v1(grant_ref_t ref)
    482{
    483	return gnttab_shared.v1[ref].frame;
    484}
    485
    486static unsigned long gnttab_read_frame_v2(grant_ref_t ref)
    487{
    488	return gnttab_shared.v2[ref].full_page.frame;
    489}
    490
    491struct deferred_entry {
    492	struct list_head list;
    493	grant_ref_t ref;
    494	uint16_t warn_delay;
    495	struct page *page;
    496};
    497static LIST_HEAD(deferred_list);
    498static void gnttab_handle_deferred(struct timer_list *);
    499static DEFINE_TIMER(deferred_timer, gnttab_handle_deferred);
    500
    501static void gnttab_handle_deferred(struct timer_list *unused)
    502{
    503	unsigned int nr = 10;
    504	struct deferred_entry *first = NULL;
    505	unsigned long flags;
    506
    507	spin_lock_irqsave(&gnttab_list_lock, flags);
    508	while (nr--) {
    509		struct deferred_entry *entry
    510			= list_first_entry(&deferred_list,
    511					   struct deferred_entry, list);
    512
    513		if (entry == first)
    514			break;
    515		list_del(&entry->list);
    516		spin_unlock_irqrestore(&gnttab_list_lock, flags);
    517		if (_gnttab_end_foreign_access_ref(entry->ref)) {
    518			put_free_entry(entry->ref);
    519			pr_debug("freeing g.e. %#x (pfn %#lx)\n",
    520				 entry->ref, page_to_pfn(entry->page));
    521			put_page(entry->page);
    522			kfree(entry);
    523			entry = NULL;
    524		} else {
    525			if (!--entry->warn_delay)
    526				pr_info("g.e. %#x still pending\n", entry->ref);
    527			if (!first)
    528				first = entry;
    529		}
    530		spin_lock_irqsave(&gnttab_list_lock, flags);
    531		if (entry)
    532			list_add_tail(&entry->list, &deferred_list);
    533		else if (list_empty(&deferred_list))
    534			break;
    535	}
    536	if (!list_empty(&deferred_list) && !timer_pending(&deferred_timer)) {
    537		deferred_timer.expires = jiffies + HZ;
    538		add_timer(&deferred_timer);
    539	}
    540	spin_unlock_irqrestore(&gnttab_list_lock, flags);
    541}
    542
    543static void gnttab_add_deferred(grant_ref_t ref, struct page *page)
    544{
    545	struct deferred_entry *entry;
    546	gfp_t gfp = (in_atomic() || irqs_disabled()) ? GFP_ATOMIC : GFP_KERNEL;
    547	const char *what = KERN_WARNING "leaking";
    548
    549	entry = kmalloc(sizeof(*entry), gfp);
    550	if (!page) {
    551		unsigned long gfn = gnttab_interface->read_frame(ref);
    552
    553		page = pfn_to_page(gfn_to_pfn(gfn));
    554		get_page(page);
    555	}
    556
    557	if (entry) {
    558		unsigned long flags;
    559
    560		entry->ref = ref;
    561		entry->page = page;
    562		entry->warn_delay = 60;
    563		spin_lock_irqsave(&gnttab_list_lock, flags);
    564		list_add_tail(&entry->list, &deferred_list);
    565		if (!timer_pending(&deferred_timer)) {
    566			deferred_timer.expires = jiffies + HZ;
    567			add_timer(&deferred_timer);
    568		}
    569		spin_unlock_irqrestore(&gnttab_list_lock, flags);
    570		what = KERN_DEBUG "deferring";
    571	}
    572	printk("%s g.e. %#x (pfn %#lx)\n",
    573	       what, ref, page ? page_to_pfn(page) : -1);
    574}
    575
    576int gnttab_try_end_foreign_access(grant_ref_t ref)
    577{
    578	int ret = _gnttab_end_foreign_access_ref(ref);
    579
    580	if (ret)
    581		put_free_entry(ref);
    582
    583	return ret;
    584}
    585EXPORT_SYMBOL_GPL(gnttab_try_end_foreign_access);
    586
    587void gnttab_end_foreign_access(grant_ref_t ref, struct page *page)
    588{
    589	if (gnttab_try_end_foreign_access(ref)) {
    590		if (page)
    591			put_page(page);
    592	} else
    593		gnttab_add_deferred(ref, page);
    594}
    595EXPORT_SYMBOL_GPL(gnttab_end_foreign_access);
    596
    597void gnttab_free_grant_reference(grant_ref_t ref)
    598{
    599	put_free_entry(ref);
    600}
    601EXPORT_SYMBOL_GPL(gnttab_free_grant_reference);
    602
    603void gnttab_free_grant_references(grant_ref_t head)
    604{
    605	grant_ref_t ref;
    606	unsigned long flags;
    607
    608	spin_lock_irqsave(&gnttab_list_lock, flags);
    609	while (head != GNTTAB_LIST_END) {
    610		ref = gnttab_entry(head);
    611		put_free_entry_locked(head);
    612		head = ref;
    613	}
    614	check_free_callbacks();
    615	spin_unlock_irqrestore(&gnttab_list_lock, flags);
    616}
    617EXPORT_SYMBOL_GPL(gnttab_free_grant_references);
    618
    619void gnttab_free_grant_reference_seq(grant_ref_t head, unsigned int count)
    620{
    621	unsigned long flags;
    622	unsigned int i;
    623
    624	spin_lock_irqsave(&gnttab_list_lock, flags);
    625	for (i = count; i > 0; i--)
    626		put_free_entry_locked(head + i - 1);
    627	check_free_callbacks();
    628	spin_unlock_irqrestore(&gnttab_list_lock, flags);
    629}
    630EXPORT_SYMBOL_GPL(gnttab_free_grant_reference_seq);
    631
    632int gnttab_alloc_grant_references(u16 count, grant_ref_t *head)
    633{
    634	int h = get_free_entries(count);
    635
    636	if (h < 0)
    637		return -ENOSPC;
    638
    639	*head = h;
    640
    641	return 0;
    642}
    643EXPORT_SYMBOL_GPL(gnttab_alloc_grant_references);
    644
    645int gnttab_alloc_grant_reference_seq(unsigned int count, grant_ref_t *first)
    646{
    647	int h;
    648
    649	if (count == 1)
    650		h = get_free_entries(1);
    651	else
    652		h = get_free_entries_seq(count);
    653
    654	if (h < 0)
    655		return -ENOSPC;
    656
    657	*first = h;
    658
    659	return 0;
    660}
    661EXPORT_SYMBOL_GPL(gnttab_alloc_grant_reference_seq);
    662
    663int gnttab_empty_grant_references(const grant_ref_t *private_head)
    664{
    665	return (*private_head == GNTTAB_LIST_END);
    666}
    667EXPORT_SYMBOL_GPL(gnttab_empty_grant_references);
    668
    669int gnttab_claim_grant_reference(grant_ref_t *private_head)
    670{
    671	grant_ref_t g = *private_head;
    672	if (unlikely(g == GNTTAB_LIST_END))
    673		return -ENOSPC;
    674	*private_head = gnttab_entry(g);
    675	return g;
    676}
    677EXPORT_SYMBOL_GPL(gnttab_claim_grant_reference);
    678
    679void gnttab_release_grant_reference(grant_ref_t *private_head,
    680				    grant_ref_t release)
    681{
    682	gnttab_entry(release) = *private_head;
    683	*private_head = release;
    684}
    685EXPORT_SYMBOL_GPL(gnttab_release_grant_reference);
    686
    687void gnttab_request_free_callback(struct gnttab_free_callback *callback,
    688				  void (*fn)(void *), void *arg, u16 count)
    689{
    690	unsigned long flags;
    691	struct gnttab_free_callback *cb;
    692
    693	spin_lock_irqsave(&gnttab_list_lock, flags);
    694
    695	/* Check if the callback is already on the list */
    696	cb = gnttab_free_callback_list;
    697	while (cb) {
    698		if (cb == callback)
    699			goto out;
    700		cb = cb->next;
    701	}
    702
    703	callback->fn = fn;
    704	callback->arg = arg;
    705	callback->count = count;
    706	callback->next = gnttab_free_callback_list;
    707	gnttab_free_callback_list = callback;
    708	check_free_callbacks();
    709out:
    710	spin_unlock_irqrestore(&gnttab_list_lock, flags);
    711}
    712EXPORT_SYMBOL_GPL(gnttab_request_free_callback);
    713
    714void gnttab_cancel_free_callback(struct gnttab_free_callback *callback)
    715{
    716	struct gnttab_free_callback **pcb;
    717	unsigned long flags;
    718
    719	spin_lock_irqsave(&gnttab_list_lock, flags);
    720	for (pcb = &gnttab_free_callback_list; *pcb; pcb = &(*pcb)->next) {
    721		if (*pcb == callback) {
    722			*pcb = callback->next;
    723			break;
    724		}
    725	}
    726	spin_unlock_irqrestore(&gnttab_list_lock, flags);
    727}
    728EXPORT_SYMBOL_GPL(gnttab_cancel_free_callback);
    729
    730static unsigned int gnttab_frames(unsigned int frames, unsigned int align)
    731{
    732	return (frames * gnttab_interface->grefs_per_grant_frame + align - 1) /
    733	       align;
    734}
    735
    736static int grow_gnttab_list(unsigned int more_frames)
    737{
    738	unsigned int new_nr_grant_frames, extra_entries, i;
    739	unsigned int nr_glist_frames, new_nr_glist_frames;
    740	unsigned int grefs_per_frame;
    741
    742	grefs_per_frame = gnttab_interface->grefs_per_grant_frame;
    743
    744	new_nr_grant_frames = nr_grant_frames + more_frames;
    745	extra_entries = more_frames * grefs_per_frame;
    746
    747	nr_glist_frames = gnttab_frames(nr_grant_frames, RPP);
    748	new_nr_glist_frames = gnttab_frames(new_nr_grant_frames, RPP);
    749	for (i = nr_glist_frames; i < new_nr_glist_frames; i++) {
    750		gnttab_list[i] = (grant_ref_t *)__get_free_page(GFP_ATOMIC);
    751		if (!gnttab_list[i])
    752			goto grow_nomem;
    753	}
    754
    755	gnttab_set_free(gnttab_size, extra_entries);
    756
    757	if (!gnttab_free_tail_ptr)
    758		gnttab_free_tail_ptr = __gnttab_entry(gnttab_size);
    759
    760	nr_grant_frames = new_nr_grant_frames;
    761	gnttab_size += extra_entries;
    762
    763	check_free_callbacks();
    764
    765	return 0;
    766
    767grow_nomem:
    768	while (i-- > nr_glist_frames)
    769		free_page((unsigned long) gnttab_list[i]);
    770	return -ENOMEM;
    771}
    772
    773static unsigned int __max_nr_grant_frames(void)
    774{
    775	struct gnttab_query_size query;
    776	int rc;
    777
    778	query.dom = DOMID_SELF;
    779
    780	rc = HYPERVISOR_grant_table_op(GNTTABOP_query_size, &query, 1);
    781	if ((rc < 0) || (query.status != GNTST_okay))
    782		return 4; /* Legacy max supported number of frames */
    783
    784	return query.max_nr_frames;
    785}
    786
    787unsigned int gnttab_max_grant_frames(void)
    788{
    789	unsigned int xen_max = __max_nr_grant_frames();
    790	static unsigned int boot_max_nr_grant_frames;
    791
    792	/* First time, initialize it properly. */
    793	if (!boot_max_nr_grant_frames)
    794		boot_max_nr_grant_frames = __max_nr_grant_frames();
    795
    796	if (xen_max > boot_max_nr_grant_frames)
    797		return boot_max_nr_grant_frames;
    798	return xen_max;
    799}
    800EXPORT_SYMBOL_GPL(gnttab_max_grant_frames);
    801
    802int gnttab_setup_auto_xlat_frames(phys_addr_t addr)
    803{
    804	xen_pfn_t *pfn;
    805	unsigned int max_nr_gframes = __max_nr_grant_frames();
    806	unsigned int i;
    807	void *vaddr;
    808
    809	if (xen_auto_xlat_grant_frames.count)
    810		return -EINVAL;
    811
    812	vaddr = memremap(addr, XEN_PAGE_SIZE * max_nr_gframes, MEMREMAP_WB);
    813	if (vaddr == NULL) {
    814		pr_warn("Failed to ioremap gnttab share frames (addr=%pa)!\n",
    815			&addr);
    816		return -ENOMEM;
    817	}
    818	pfn = kcalloc(max_nr_gframes, sizeof(pfn[0]), GFP_KERNEL);
    819	if (!pfn) {
    820		memunmap(vaddr);
    821		return -ENOMEM;
    822	}
    823	for (i = 0; i < max_nr_gframes; i++)
    824		pfn[i] = XEN_PFN_DOWN(addr) + i;
    825
    826	xen_auto_xlat_grant_frames.vaddr = vaddr;
    827	xen_auto_xlat_grant_frames.pfn = pfn;
    828	xen_auto_xlat_grant_frames.count = max_nr_gframes;
    829
    830	return 0;
    831}
    832EXPORT_SYMBOL_GPL(gnttab_setup_auto_xlat_frames);
    833
    834void gnttab_free_auto_xlat_frames(void)
    835{
    836	if (!xen_auto_xlat_grant_frames.count)
    837		return;
    838	kfree(xen_auto_xlat_grant_frames.pfn);
    839	memunmap(xen_auto_xlat_grant_frames.vaddr);
    840
    841	xen_auto_xlat_grant_frames.pfn = NULL;
    842	xen_auto_xlat_grant_frames.count = 0;
    843	xen_auto_xlat_grant_frames.vaddr = NULL;
    844}
    845EXPORT_SYMBOL_GPL(gnttab_free_auto_xlat_frames);
    846
    847int gnttab_pages_set_private(int nr_pages, struct page **pages)
    848{
    849	int i;
    850
    851	for (i = 0; i < nr_pages; i++) {
    852#if BITS_PER_LONG < 64
    853		struct xen_page_foreign *foreign;
    854
    855		foreign = kzalloc(sizeof(*foreign), GFP_KERNEL);
    856		if (!foreign)
    857			return -ENOMEM;
    858
    859		set_page_private(pages[i], (unsigned long)foreign);
    860#endif
    861		SetPagePrivate(pages[i]);
    862	}
    863
    864	return 0;
    865}
    866EXPORT_SYMBOL_GPL(gnttab_pages_set_private);
    867
    868/**
    869 * gnttab_alloc_pages - alloc pages suitable for grant mapping into
    870 * @nr_pages: number of pages to alloc
    871 * @pages: returns the pages
    872 */
    873int gnttab_alloc_pages(int nr_pages, struct page **pages)
    874{
    875	int ret;
    876
    877	ret = xen_alloc_unpopulated_pages(nr_pages, pages);
    878	if (ret < 0)
    879		return ret;
    880
    881	ret = gnttab_pages_set_private(nr_pages, pages);
    882	if (ret < 0)
    883		gnttab_free_pages(nr_pages, pages);
    884
    885	return ret;
    886}
    887EXPORT_SYMBOL_GPL(gnttab_alloc_pages);
    888
    889#ifdef CONFIG_XEN_UNPOPULATED_ALLOC
    890static inline void cache_init(struct gnttab_page_cache *cache)
    891{
    892	cache->pages = NULL;
    893}
    894
    895static inline bool cache_empty(struct gnttab_page_cache *cache)
    896{
    897	return !cache->pages;
    898}
    899
    900static inline struct page *cache_deq(struct gnttab_page_cache *cache)
    901{
    902	struct page *page;
    903
    904	page = cache->pages;
    905	cache->pages = page->zone_device_data;
    906
    907	return page;
    908}
    909
    910static inline void cache_enq(struct gnttab_page_cache *cache, struct page *page)
    911{
    912	page->zone_device_data = cache->pages;
    913	cache->pages = page;
    914}
    915#else
    916static inline void cache_init(struct gnttab_page_cache *cache)
    917{
    918	INIT_LIST_HEAD(&cache->pages);
    919}
    920
    921static inline bool cache_empty(struct gnttab_page_cache *cache)
    922{
    923	return list_empty(&cache->pages);
    924}
    925
    926static inline struct page *cache_deq(struct gnttab_page_cache *cache)
    927{
    928	struct page *page;
    929
    930	page = list_first_entry(&cache->pages, struct page, lru);
    931	list_del(&page->lru);
    932
    933	return page;
    934}
    935
    936static inline void cache_enq(struct gnttab_page_cache *cache, struct page *page)
    937{
    938	list_add(&page->lru, &cache->pages);
    939}
    940#endif
    941
    942void gnttab_page_cache_init(struct gnttab_page_cache *cache)
    943{
    944	spin_lock_init(&cache->lock);
    945	cache_init(cache);
    946	cache->num_pages = 0;
    947}
    948EXPORT_SYMBOL_GPL(gnttab_page_cache_init);
    949
    950int gnttab_page_cache_get(struct gnttab_page_cache *cache, struct page **page)
    951{
    952	unsigned long flags;
    953
    954	spin_lock_irqsave(&cache->lock, flags);
    955
    956	if (cache_empty(cache)) {
    957		spin_unlock_irqrestore(&cache->lock, flags);
    958		return gnttab_alloc_pages(1, page);
    959	}
    960
    961	page[0] = cache_deq(cache);
    962	cache->num_pages--;
    963
    964	spin_unlock_irqrestore(&cache->lock, flags);
    965
    966	return 0;
    967}
    968EXPORT_SYMBOL_GPL(gnttab_page_cache_get);
    969
    970void gnttab_page_cache_put(struct gnttab_page_cache *cache, struct page **page,
    971			   unsigned int num)
    972{
    973	unsigned long flags;
    974	unsigned int i;
    975
    976	spin_lock_irqsave(&cache->lock, flags);
    977
    978	for (i = 0; i < num; i++)
    979		cache_enq(cache, page[i]);
    980	cache->num_pages += num;
    981
    982	spin_unlock_irqrestore(&cache->lock, flags);
    983}
    984EXPORT_SYMBOL_GPL(gnttab_page_cache_put);
    985
    986void gnttab_page_cache_shrink(struct gnttab_page_cache *cache, unsigned int num)
    987{
    988	struct page *page[10];
    989	unsigned int i = 0;
    990	unsigned long flags;
    991
    992	spin_lock_irqsave(&cache->lock, flags);
    993
    994	while (cache->num_pages > num) {
    995		page[i] = cache_deq(cache);
    996		cache->num_pages--;
    997		if (++i == ARRAY_SIZE(page)) {
    998			spin_unlock_irqrestore(&cache->lock, flags);
    999			gnttab_free_pages(i, page);
   1000			i = 0;
   1001			spin_lock_irqsave(&cache->lock, flags);
   1002		}
   1003	}
   1004
   1005	spin_unlock_irqrestore(&cache->lock, flags);
   1006
   1007	if (i != 0)
   1008		gnttab_free_pages(i, page);
   1009}
   1010EXPORT_SYMBOL_GPL(gnttab_page_cache_shrink);
   1011
   1012void gnttab_pages_clear_private(int nr_pages, struct page **pages)
   1013{
   1014	int i;
   1015
   1016	for (i = 0; i < nr_pages; i++) {
   1017		if (PagePrivate(pages[i])) {
   1018#if BITS_PER_LONG < 64
   1019			kfree((void *)page_private(pages[i]));
   1020#endif
   1021			ClearPagePrivate(pages[i]);
   1022		}
   1023	}
   1024}
   1025EXPORT_SYMBOL_GPL(gnttab_pages_clear_private);
   1026
   1027/**
   1028 * gnttab_free_pages - free pages allocated by gnttab_alloc_pages()
   1029 * @nr_pages; number of pages to free
   1030 * @pages: the pages
   1031 */
   1032void gnttab_free_pages(int nr_pages, struct page **pages)
   1033{
   1034	gnttab_pages_clear_private(nr_pages, pages);
   1035	xen_free_unpopulated_pages(nr_pages, pages);
   1036}
   1037EXPORT_SYMBOL_GPL(gnttab_free_pages);
   1038
   1039#ifdef CONFIG_XEN_GRANT_DMA_ALLOC
   1040/**
   1041 * gnttab_dma_alloc_pages - alloc DMAable pages suitable for grant mapping into
   1042 * @args: arguments to the function
   1043 */
   1044int gnttab_dma_alloc_pages(struct gnttab_dma_alloc_args *args)
   1045{
   1046	unsigned long pfn, start_pfn;
   1047	size_t size;
   1048	int i, ret;
   1049
   1050	size = args->nr_pages << PAGE_SHIFT;
   1051	if (args->coherent)
   1052		args->vaddr = dma_alloc_coherent(args->dev, size,
   1053						 &args->dev_bus_addr,
   1054						 GFP_KERNEL | __GFP_NOWARN);
   1055	else
   1056		args->vaddr = dma_alloc_wc(args->dev, size,
   1057					   &args->dev_bus_addr,
   1058					   GFP_KERNEL | __GFP_NOWARN);
   1059	if (!args->vaddr) {
   1060		pr_debug("Failed to allocate DMA buffer of size %zu\n", size);
   1061		return -ENOMEM;
   1062	}
   1063
   1064	start_pfn = __phys_to_pfn(args->dev_bus_addr);
   1065	for (pfn = start_pfn, i = 0; pfn < start_pfn + args->nr_pages;
   1066			pfn++, i++) {
   1067		struct page *page = pfn_to_page(pfn);
   1068
   1069		args->pages[i] = page;
   1070		args->frames[i] = xen_page_to_gfn(page);
   1071		xenmem_reservation_scrub_page(page);
   1072	}
   1073
   1074	xenmem_reservation_va_mapping_reset(args->nr_pages, args->pages);
   1075
   1076	ret = xenmem_reservation_decrease(args->nr_pages, args->frames);
   1077	if (ret != args->nr_pages) {
   1078		pr_debug("Failed to decrease reservation for DMA buffer\n");
   1079		ret = -EFAULT;
   1080		goto fail;
   1081	}
   1082
   1083	ret = gnttab_pages_set_private(args->nr_pages, args->pages);
   1084	if (ret < 0)
   1085		goto fail;
   1086
   1087	return 0;
   1088
   1089fail:
   1090	gnttab_dma_free_pages(args);
   1091	return ret;
   1092}
   1093EXPORT_SYMBOL_GPL(gnttab_dma_alloc_pages);
   1094
   1095/**
   1096 * gnttab_dma_free_pages - free DMAable pages
   1097 * @args: arguments to the function
   1098 */
   1099int gnttab_dma_free_pages(struct gnttab_dma_alloc_args *args)
   1100{
   1101	size_t size;
   1102	int i, ret;
   1103
   1104	gnttab_pages_clear_private(args->nr_pages, args->pages);
   1105
   1106	for (i = 0; i < args->nr_pages; i++)
   1107		args->frames[i] = page_to_xen_pfn(args->pages[i]);
   1108
   1109	ret = xenmem_reservation_increase(args->nr_pages, args->frames);
   1110	if (ret != args->nr_pages) {
   1111		pr_debug("Failed to increase reservation for DMA buffer\n");
   1112		ret = -EFAULT;
   1113	} else {
   1114		ret = 0;
   1115	}
   1116
   1117	xenmem_reservation_va_mapping_update(args->nr_pages, args->pages,
   1118					     args->frames);
   1119
   1120	size = args->nr_pages << PAGE_SHIFT;
   1121	if (args->coherent)
   1122		dma_free_coherent(args->dev, size,
   1123				  args->vaddr, args->dev_bus_addr);
   1124	else
   1125		dma_free_wc(args->dev, size,
   1126			    args->vaddr, args->dev_bus_addr);
   1127	return ret;
   1128}
   1129EXPORT_SYMBOL_GPL(gnttab_dma_free_pages);
   1130#endif
   1131
   1132/* Handling of paged out grant targets (GNTST_eagain) */
   1133#define MAX_DELAY 256
   1134static inline void
   1135gnttab_retry_eagain_gop(unsigned int cmd, void *gop, int16_t *status,
   1136						const char *func)
   1137{
   1138	unsigned delay = 1;
   1139
   1140	do {
   1141		BUG_ON(HYPERVISOR_grant_table_op(cmd, gop, 1));
   1142		if (*status == GNTST_eagain)
   1143			msleep(delay++);
   1144	} while ((*status == GNTST_eagain) && (delay < MAX_DELAY));
   1145
   1146	if (delay >= MAX_DELAY) {
   1147		pr_err("%s: %s eagain grant\n", func, current->comm);
   1148		*status = GNTST_bad_page;
   1149	}
   1150}
   1151
   1152void gnttab_batch_map(struct gnttab_map_grant_ref *batch, unsigned count)
   1153{
   1154	struct gnttab_map_grant_ref *op;
   1155
   1156	if (HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, batch, count))
   1157		BUG();
   1158	for (op = batch; op < batch + count; op++)
   1159		if (op->status == GNTST_eagain)
   1160			gnttab_retry_eagain_gop(GNTTABOP_map_grant_ref, op,
   1161						&op->status, __func__);
   1162}
   1163EXPORT_SYMBOL_GPL(gnttab_batch_map);
   1164
   1165void gnttab_batch_copy(struct gnttab_copy *batch, unsigned count)
   1166{
   1167	struct gnttab_copy *op;
   1168
   1169	if (HYPERVISOR_grant_table_op(GNTTABOP_copy, batch, count))
   1170		BUG();
   1171	for (op = batch; op < batch + count; op++)
   1172		if (op->status == GNTST_eagain)
   1173			gnttab_retry_eagain_gop(GNTTABOP_copy, op,
   1174						&op->status, __func__);
   1175}
   1176EXPORT_SYMBOL_GPL(gnttab_batch_copy);
   1177
   1178void gnttab_foreach_grant_in_range(struct page *page,
   1179				   unsigned int offset,
   1180				   unsigned int len,
   1181				   xen_grant_fn_t fn,
   1182				   void *data)
   1183{
   1184	unsigned int goffset;
   1185	unsigned int glen;
   1186	unsigned long xen_pfn;
   1187
   1188	len = min_t(unsigned int, PAGE_SIZE - offset, len);
   1189	goffset = xen_offset_in_page(offset);
   1190
   1191	xen_pfn = page_to_xen_pfn(page) + XEN_PFN_DOWN(offset);
   1192
   1193	while (len) {
   1194		glen = min_t(unsigned int, XEN_PAGE_SIZE - goffset, len);
   1195		fn(pfn_to_gfn(xen_pfn), goffset, glen, data);
   1196
   1197		goffset = 0;
   1198		xen_pfn++;
   1199		len -= glen;
   1200	}
   1201}
   1202EXPORT_SYMBOL_GPL(gnttab_foreach_grant_in_range);
   1203
   1204void gnttab_foreach_grant(struct page **pages,
   1205			  unsigned int nr_grefs,
   1206			  xen_grant_fn_t fn,
   1207			  void *data)
   1208{
   1209	unsigned int goffset = 0;
   1210	unsigned long xen_pfn = 0;
   1211	unsigned int i;
   1212
   1213	for (i = 0; i < nr_grefs; i++) {
   1214		if ((i % XEN_PFN_PER_PAGE) == 0) {
   1215			xen_pfn = page_to_xen_pfn(pages[i / XEN_PFN_PER_PAGE]);
   1216			goffset = 0;
   1217		}
   1218
   1219		fn(pfn_to_gfn(xen_pfn), goffset, XEN_PAGE_SIZE, data);
   1220
   1221		goffset += XEN_PAGE_SIZE;
   1222		xen_pfn++;
   1223	}
   1224}
   1225
   1226int gnttab_map_refs(struct gnttab_map_grant_ref *map_ops,
   1227		    struct gnttab_map_grant_ref *kmap_ops,
   1228		    struct page **pages, unsigned int count)
   1229{
   1230	int i, ret;
   1231
   1232	ret = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, map_ops, count);
   1233	if (ret)
   1234		return ret;
   1235
   1236	for (i = 0; i < count; i++) {
   1237		switch (map_ops[i].status) {
   1238		case GNTST_okay:
   1239		{
   1240			struct xen_page_foreign *foreign;
   1241
   1242			SetPageForeign(pages[i]);
   1243			foreign = xen_page_foreign(pages[i]);
   1244			foreign->domid = map_ops[i].dom;
   1245			foreign->gref = map_ops[i].ref;
   1246			break;
   1247		}
   1248
   1249		case GNTST_no_device_space:
   1250			pr_warn_ratelimited("maptrack limit reached, can't map all guest pages\n");
   1251			break;
   1252
   1253		case GNTST_eagain:
   1254			/* Retry eagain maps */
   1255			gnttab_retry_eagain_gop(GNTTABOP_map_grant_ref,
   1256						map_ops + i,
   1257						&map_ops[i].status, __func__);
   1258			/* Test status in next loop iteration. */
   1259			i--;
   1260			break;
   1261
   1262		default:
   1263			break;
   1264		}
   1265	}
   1266
   1267	return set_foreign_p2m_mapping(map_ops, kmap_ops, pages, count);
   1268}
   1269EXPORT_SYMBOL_GPL(gnttab_map_refs);
   1270
   1271int gnttab_unmap_refs(struct gnttab_unmap_grant_ref *unmap_ops,
   1272		      struct gnttab_unmap_grant_ref *kunmap_ops,
   1273		      struct page **pages, unsigned int count)
   1274{
   1275	unsigned int i;
   1276	int ret;
   1277
   1278	ret = HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, unmap_ops, count);
   1279	if (ret)
   1280		return ret;
   1281
   1282	for (i = 0; i < count; i++)
   1283		ClearPageForeign(pages[i]);
   1284
   1285	return clear_foreign_p2m_mapping(unmap_ops, kunmap_ops, pages, count);
   1286}
   1287EXPORT_SYMBOL_GPL(gnttab_unmap_refs);
   1288
   1289#define GNTTAB_UNMAP_REFS_DELAY 5
   1290
   1291static void __gnttab_unmap_refs_async(struct gntab_unmap_queue_data* item);
   1292
   1293static void gnttab_unmap_work(struct work_struct *work)
   1294{
   1295	struct gntab_unmap_queue_data
   1296		*unmap_data = container_of(work, 
   1297					   struct gntab_unmap_queue_data,
   1298					   gnttab_work.work);
   1299	if (unmap_data->age != UINT_MAX)
   1300		unmap_data->age++;
   1301	__gnttab_unmap_refs_async(unmap_data);
   1302}
   1303
   1304static void __gnttab_unmap_refs_async(struct gntab_unmap_queue_data* item)
   1305{
   1306	int ret;
   1307	int pc;
   1308
   1309	for (pc = 0; pc < item->count; pc++) {
   1310		if (page_count(item->pages[pc]) > 1) {
   1311			unsigned long delay = GNTTAB_UNMAP_REFS_DELAY * (item->age + 1);
   1312			schedule_delayed_work(&item->gnttab_work,
   1313					      msecs_to_jiffies(delay));
   1314			return;
   1315		}
   1316	}
   1317
   1318	ret = gnttab_unmap_refs(item->unmap_ops, item->kunmap_ops,
   1319				item->pages, item->count);
   1320	item->done(ret, item);
   1321}
   1322
   1323void gnttab_unmap_refs_async(struct gntab_unmap_queue_data* item)
   1324{
   1325	INIT_DELAYED_WORK(&item->gnttab_work, gnttab_unmap_work);
   1326	item->age = 0;
   1327
   1328	__gnttab_unmap_refs_async(item);
   1329}
   1330EXPORT_SYMBOL_GPL(gnttab_unmap_refs_async);
   1331
   1332static void unmap_refs_callback(int result,
   1333		struct gntab_unmap_queue_data *data)
   1334{
   1335	struct unmap_refs_callback_data *d = data->data;
   1336
   1337	d->result = result;
   1338	complete(&d->completion);
   1339}
   1340
   1341int gnttab_unmap_refs_sync(struct gntab_unmap_queue_data *item)
   1342{
   1343	struct unmap_refs_callback_data data;
   1344
   1345	init_completion(&data.completion);
   1346	item->data = &data;
   1347	item->done = &unmap_refs_callback;
   1348	gnttab_unmap_refs_async(item);
   1349	wait_for_completion(&data.completion);
   1350
   1351	return data.result;
   1352}
   1353EXPORT_SYMBOL_GPL(gnttab_unmap_refs_sync);
   1354
   1355static unsigned int nr_status_frames(unsigned int nr_grant_frames)
   1356{
   1357	return gnttab_frames(nr_grant_frames, SPP);
   1358}
   1359
   1360static int gnttab_map_frames_v1(xen_pfn_t *frames, unsigned int nr_gframes)
   1361{
   1362	int rc;
   1363
   1364	rc = arch_gnttab_map_shared(frames, nr_gframes,
   1365				    gnttab_max_grant_frames(),
   1366				    &gnttab_shared.addr);
   1367	BUG_ON(rc);
   1368
   1369	return 0;
   1370}
   1371
   1372static void gnttab_unmap_frames_v1(void)
   1373{
   1374	arch_gnttab_unmap(gnttab_shared.addr, nr_grant_frames);
   1375}
   1376
   1377static int gnttab_map_frames_v2(xen_pfn_t *frames, unsigned int nr_gframes)
   1378{
   1379	uint64_t *sframes;
   1380	unsigned int nr_sframes;
   1381	struct gnttab_get_status_frames getframes;
   1382	int rc;
   1383
   1384	nr_sframes = nr_status_frames(nr_gframes);
   1385
   1386	/* No need for kzalloc as it is initialized in following hypercall
   1387	 * GNTTABOP_get_status_frames.
   1388	 */
   1389	sframes = kmalloc_array(nr_sframes, sizeof(uint64_t), GFP_ATOMIC);
   1390	if (!sframes)
   1391		return -ENOMEM;
   1392
   1393	getframes.dom        = DOMID_SELF;
   1394	getframes.nr_frames  = nr_sframes;
   1395	set_xen_guest_handle(getframes.frame_list, sframes);
   1396
   1397	rc = HYPERVISOR_grant_table_op(GNTTABOP_get_status_frames,
   1398				       &getframes, 1);
   1399	if (rc == -ENOSYS) {
   1400		kfree(sframes);
   1401		return -ENOSYS;
   1402	}
   1403
   1404	BUG_ON(rc || getframes.status);
   1405
   1406	rc = arch_gnttab_map_status(sframes, nr_sframes,
   1407				    nr_status_frames(gnttab_max_grant_frames()),
   1408				    &grstatus);
   1409	BUG_ON(rc);
   1410	kfree(sframes);
   1411
   1412	rc = arch_gnttab_map_shared(frames, nr_gframes,
   1413				    gnttab_max_grant_frames(),
   1414				    &gnttab_shared.addr);
   1415	BUG_ON(rc);
   1416
   1417	return 0;
   1418}
   1419
   1420static void gnttab_unmap_frames_v2(void)
   1421{
   1422	arch_gnttab_unmap(gnttab_shared.addr, nr_grant_frames);
   1423	arch_gnttab_unmap(grstatus, nr_status_frames(nr_grant_frames));
   1424}
   1425
   1426static int gnttab_map(unsigned int start_idx, unsigned int end_idx)
   1427{
   1428	struct gnttab_setup_table setup;
   1429	xen_pfn_t *frames;
   1430	unsigned int nr_gframes = end_idx + 1;
   1431	int rc;
   1432
   1433	if (xen_feature(XENFEAT_auto_translated_physmap)) {
   1434		struct xen_add_to_physmap xatp;
   1435		unsigned int i = end_idx;
   1436		rc = 0;
   1437		BUG_ON(xen_auto_xlat_grant_frames.count < nr_gframes);
   1438		/*
   1439		 * Loop backwards, so that the first hypercall has the largest
   1440		 * index, ensuring that the table will grow only once.
   1441		 */
   1442		do {
   1443			xatp.domid = DOMID_SELF;
   1444			xatp.idx = i;
   1445			xatp.space = XENMAPSPACE_grant_table;
   1446			xatp.gpfn = xen_auto_xlat_grant_frames.pfn[i];
   1447			rc = HYPERVISOR_memory_op(XENMEM_add_to_physmap, &xatp);
   1448			if (rc != 0) {
   1449				pr_warn("grant table add_to_physmap failed, err=%d\n",
   1450					rc);
   1451				break;
   1452			}
   1453		} while (i-- > start_idx);
   1454
   1455		return rc;
   1456	}
   1457
   1458	/* No need for kzalloc as it is initialized in following hypercall
   1459	 * GNTTABOP_setup_table.
   1460	 */
   1461	frames = kmalloc_array(nr_gframes, sizeof(unsigned long), GFP_ATOMIC);
   1462	if (!frames)
   1463		return -ENOMEM;
   1464
   1465	setup.dom        = DOMID_SELF;
   1466	setup.nr_frames  = nr_gframes;
   1467	set_xen_guest_handle(setup.frame_list, frames);
   1468
   1469	rc = HYPERVISOR_grant_table_op(GNTTABOP_setup_table, &setup, 1);
   1470	if (rc == -ENOSYS) {
   1471		kfree(frames);
   1472		return -ENOSYS;
   1473	}
   1474
   1475	BUG_ON(rc || setup.status);
   1476
   1477	rc = gnttab_interface->map_frames(frames, nr_gframes);
   1478
   1479	kfree(frames);
   1480
   1481	return rc;
   1482}
   1483
   1484static const struct gnttab_ops gnttab_v1_ops = {
   1485	.version			= 1,
   1486	.grefs_per_grant_frame		= XEN_PAGE_SIZE /
   1487					  sizeof(struct grant_entry_v1),
   1488	.map_frames			= gnttab_map_frames_v1,
   1489	.unmap_frames			= gnttab_unmap_frames_v1,
   1490	.update_entry			= gnttab_update_entry_v1,
   1491	.end_foreign_access_ref		= gnttab_end_foreign_access_ref_v1,
   1492	.read_frame			= gnttab_read_frame_v1,
   1493};
   1494
   1495static const struct gnttab_ops gnttab_v2_ops = {
   1496	.version			= 2,
   1497	.grefs_per_grant_frame		= XEN_PAGE_SIZE /
   1498					  sizeof(union grant_entry_v2),
   1499	.map_frames			= gnttab_map_frames_v2,
   1500	.unmap_frames			= gnttab_unmap_frames_v2,
   1501	.update_entry			= gnttab_update_entry_v2,
   1502	.end_foreign_access_ref		= gnttab_end_foreign_access_ref_v2,
   1503	.read_frame			= gnttab_read_frame_v2,
   1504};
   1505
   1506static bool gnttab_need_v2(void)
   1507{
   1508#ifdef CONFIG_X86
   1509	uint32_t base, width;
   1510
   1511	if (xen_pv_domain()) {
   1512		base = xen_cpuid_base();
   1513		if (cpuid_eax(base) < 5)
   1514			return false;	/* Information not available, use V1. */
   1515		width = cpuid_ebx(base + 5) &
   1516			XEN_CPUID_MACHINE_ADDRESS_WIDTH_MASK;
   1517		return width > 32 + PAGE_SHIFT;
   1518	}
   1519#endif
   1520	return !!(max_possible_pfn >> 32);
   1521}
   1522
   1523static void gnttab_request_version(void)
   1524{
   1525	long rc;
   1526	struct gnttab_set_version gsv;
   1527
   1528	if (gnttab_need_v2())
   1529		gsv.version = 2;
   1530	else
   1531		gsv.version = 1;
   1532
   1533	/* Boot parameter overrides automatic selection. */
   1534	if (xen_gnttab_version >= 1 && xen_gnttab_version <= 2)
   1535		gsv.version = xen_gnttab_version;
   1536
   1537	rc = HYPERVISOR_grant_table_op(GNTTABOP_set_version, &gsv, 1);
   1538	if (rc == 0 && gsv.version == 2)
   1539		gnttab_interface = &gnttab_v2_ops;
   1540	else
   1541		gnttab_interface = &gnttab_v1_ops;
   1542	pr_info("Grant tables using version %d layout\n",
   1543		gnttab_interface->version);
   1544}
   1545
   1546static int gnttab_setup(void)
   1547{
   1548	unsigned int max_nr_gframes;
   1549
   1550	max_nr_gframes = gnttab_max_grant_frames();
   1551	if (max_nr_gframes < nr_grant_frames)
   1552		return -ENOSYS;
   1553
   1554	if (xen_feature(XENFEAT_auto_translated_physmap) && gnttab_shared.addr == NULL) {
   1555		gnttab_shared.addr = xen_auto_xlat_grant_frames.vaddr;
   1556		if (gnttab_shared.addr == NULL) {
   1557			pr_warn("gnttab share frames is not mapped!\n");
   1558			return -ENOMEM;
   1559		}
   1560	}
   1561	return gnttab_map(0, nr_grant_frames - 1);
   1562}
   1563
   1564int gnttab_resume(void)
   1565{
   1566	gnttab_request_version();
   1567	return gnttab_setup();
   1568}
   1569
   1570int gnttab_suspend(void)
   1571{
   1572	if (!xen_feature(XENFEAT_auto_translated_physmap))
   1573		gnttab_interface->unmap_frames();
   1574	return 0;
   1575}
   1576
   1577static int gnttab_expand(unsigned int req_entries)
   1578{
   1579	int rc;
   1580	unsigned int cur, extra;
   1581
   1582	cur = nr_grant_frames;
   1583	extra = ((req_entries + gnttab_interface->grefs_per_grant_frame - 1) /
   1584		 gnttab_interface->grefs_per_grant_frame);
   1585	if (cur + extra > gnttab_max_grant_frames()) {
   1586		pr_warn_ratelimited("xen/grant-table: max_grant_frames reached"
   1587				    " cur=%u extra=%u limit=%u"
   1588				    " gnttab_free_count=%u req_entries=%u\n",
   1589				    cur, extra, gnttab_max_grant_frames(),
   1590				    gnttab_free_count, req_entries);
   1591		return -ENOSPC;
   1592	}
   1593
   1594	rc = gnttab_map(cur, cur + extra - 1);
   1595	if (rc == 0)
   1596		rc = grow_gnttab_list(extra);
   1597
   1598	return rc;
   1599}
   1600
   1601int gnttab_init(void)
   1602{
   1603	int i;
   1604	unsigned long max_nr_grant_frames, max_nr_grefs;
   1605	unsigned int max_nr_glist_frames, nr_glist_frames;
   1606	int ret;
   1607
   1608	gnttab_request_version();
   1609	max_nr_grant_frames = gnttab_max_grant_frames();
   1610	max_nr_grefs = max_nr_grant_frames *
   1611			gnttab_interface->grefs_per_grant_frame;
   1612	nr_grant_frames = 1;
   1613
   1614	/* Determine the maximum number of frames required for the
   1615	 * grant reference free list on the current hypervisor.
   1616	 */
   1617	max_nr_glist_frames = max_nr_grefs / RPP;
   1618
   1619	gnttab_list = kmalloc_array(max_nr_glist_frames,
   1620				    sizeof(grant_ref_t *),
   1621				    GFP_KERNEL);
   1622	if (gnttab_list == NULL)
   1623		return -ENOMEM;
   1624
   1625	nr_glist_frames = gnttab_frames(nr_grant_frames, RPP);
   1626	for (i = 0; i < nr_glist_frames; i++) {
   1627		gnttab_list[i] = (grant_ref_t *)__get_free_page(GFP_KERNEL);
   1628		if (gnttab_list[i] == NULL) {
   1629			ret = -ENOMEM;
   1630			goto ini_nomem;
   1631		}
   1632	}
   1633
   1634	gnttab_free_bitmap = bitmap_zalloc(max_nr_grefs, GFP_KERNEL);
   1635	if (!gnttab_free_bitmap) {
   1636		ret = -ENOMEM;
   1637		goto ini_nomem;
   1638	}
   1639
   1640	ret = arch_gnttab_init(max_nr_grant_frames,
   1641			       nr_status_frames(max_nr_grant_frames));
   1642	if (ret < 0)
   1643		goto ini_nomem;
   1644
   1645	if (gnttab_setup() < 0) {
   1646		ret = -ENODEV;
   1647		goto ini_nomem;
   1648	}
   1649
   1650	gnttab_size = nr_grant_frames * gnttab_interface->grefs_per_grant_frame;
   1651
   1652	gnttab_set_free(GNTTAB_NR_RESERVED_ENTRIES,
   1653			gnttab_size - GNTTAB_NR_RESERVED_ENTRIES);
   1654
   1655	printk("Grant table initialized\n");
   1656	return 0;
   1657
   1658 ini_nomem:
   1659	for (i--; i >= 0; i--)
   1660		free_page((unsigned long)gnttab_list[i]);
   1661	kfree(gnttab_list);
   1662	bitmap_free(gnttab_free_bitmap);
   1663	return ret;
   1664}
   1665EXPORT_SYMBOL_GPL(gnttab_init);
   1666
   1667static int __gnttab_init(void)
   1668{
   1669	if (!xen_domain())
   1670		return -ENODEV;
   1671
   1672	/* Delay grant-table initialization in the PV on HVM case */
   1673	if (xen_hvm_domain() && !xen_pvh_domain())
   1674		return 0;
   1675
   1676	return gnttab_init();
   1677}
   1678/* Starts after core_initcall so that xen_pvh_gnttab_setup can be called
   1679 * beforehand to initialize xen_auto_xlat_grant_frames. */
   1680core_initcall_sync(__gnttab_init);