cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

kvm_util.c (66908B)


      1// SPDX-License-Identifier: GPL-2.0-only
      2/*
      3 * tools/testing/selftests/kvm/lib/kvm_util.c
      4 *
      5 * Copyright (C) 2018, Google LLC.
      6 */
      7
      8#define _GNU_SOURCE /* for program_invocation_name */
      9#include "test_util.h"
     10#include "kvm_util.h"
     11#include "kvm_util_internal.h"
     12#include "processor.h"
     13
     14#include <assert.h>
     15#include <sys/mman.h>
     16#include <sys/types.h>
     17#include <sys/stat.h>
     18#include <unistd.h>
     19#include <linux/kernel.h>
     20
     21#define KVM_UTIL_MIN_PFN	2
     22
     23static int vcpu_mmap_sz(void);
     24
     25int open_path_or_exit(const char *path, int flags)
     26{
     27	int fd;
     28
     29	fd = open(path, flags);
     30	if (fd < 0) {
     31		print_skip("%s not available (errno: %d)", path, errno);
     32		exit(KSFT_SKIP);
     33	}
     34
     35	return fd;
     36}
     37
     38/*
     39 * Open KVM_DEV_PATH if available, otherwise exit the entire program.
     40 *
     41 * Input Args:
     42 *   flags - The flags to pass when opening KVM_DEV_PATH.
     43 *
     44 * Return:
     45 *   The opened file descriptor of /dev/kvm.
     46 */
     47static int _open_kvm_dev_path_or_exit(int flags)
     48{
     49	return open_path_or_exit(KVM_DEV_PATH, flags);
     50}
     51
     52int open_kvm_dev_path_or_exit(void)
     53{
     54	return _open_kvm_dev_path_or_exit(O_RDONLY);
     55}
     56
     57/*
     58 * Capability
     59 *
     60 * Input Args:
     61 *   cap - Capability
     62 *
     63 * Output Args: None
     64 *
     65 * Return:
     66 *   On success, the Value corresponding to the capability (KVM_CAP_*)
     67 *   specified by the value of cap.  On failure a TEST_ASSERT failure
     68 *   is produced.
     69 *
     70 * Looks up and returns the value corresponding to the capability
     71 * (KVM_CAP_*) given by cap.
     72 */
     73int kvm_check_cap(long cap)
     74{
     75	int ret;
     76	int kvm_fd;
     77
     78	kvm_fd = open_kvm_dev_path_or_exit();
     79	ret = ioctl(kvm_fd, KVM_CHECK_EXTENSION, cap);
     80	TEST_ASSERT(ret >= 0, "KVM_CHECK_EXTENSION IOCTL failed,\n"
     81		"  rc: %i errno: %i", ret, errno);
     82
     83	close(kvm_fd);
     84
     85	return ret;
     86}
     87
     88/* VM Check Capability
     89 *
     90 * Input Args:
     91 *   vm - Virtual Machine
     92 *   cap - Capability
     93 *
     94 * Output Args: None
     95 *
     96 * Return:
     97 *   On success, the Value corresponding to the capability (KVM_CAP_*)
     98 *   specified by the value of cap.  On failure a TEST_ASSERT failure
     99 *   is produced.
    100 *
    101 * Looks up and returns the value corresponding to the capability
    102 * (KVM_CAP_*) given by cap.
    103 */
    104int vm_check_cap(struct kvm_vm *vm, long cap)
    105{
    106	int ret;
    107
    108	ret = ioctl(vm->fd, KVM_CHECK_EXTENSION, cap);
    109	TEST_ASSERT(ret >= 0, "KVM_CHECK_EXTENSION VM IOCTL failed,\n"
    110		"  rc: %i errno: %i", ret, errno);
    111
    112	return ret;
    113}
    114
    115/* VM Enable Capability
    116 *
    117 * Input Args:
    118 *   vm - Virtual Machine
    119 *   cap - Capability
    120 *
    121 * Output Args: None
    122 *
    123 * Return: On success, 0. On failure a TEST_ASSERT failure is produced.
    124 *
    125 * Enables a capability (KVM_CAP_*) on the VM.
    126 */
    127int vm_enable_cap(struct kvm_vm *vm, struct kvm_enable_cap *cap)
    128{
    129	int ret;
    130
    131	ret = ioctl(vm->fd, KVM_ENABLE_CAP, cap);
    132	TEST_ASSERT(ret == 0, "KVM_ENABLE_CAP IOCTL failed,\n"
    133		"  rc: %i errno: %i", ret, errno);
    134
    135	return ret;
    136}
    137
    138/* VCPU Enable Capability
    139 *
    140 * Input Args:
    141 *   vm - Virtual Machine
    142 *   vcpu_id - VCPU
    143 *   cap - Capability
    144 *
    145 * Output Args: None
    146 *
    147 * Return: On success, 0. On failure a TEST_ASSERT failure is produced.
    148 *
    149 * Enables a capability (KVM_CAP_*) on the VCPU.
    150 */
    151int vcpu_enable_cap(struct kvm_vm *vm, uint32_t vcpu_id,
    152		    struct kvm_enable_cap *cap)
    153{
    154	struct vcpu *vcpu = vcpu_find(vm, vcpu_id);
    155	int r;
    156
    157	TEST_ASSERT(vcpu, "cannot find vcpu %d", vcpu_id);
    158
    159	r = ioctl(vcpu->fd, KVM_ENABLE_CAP, cap);
    160	TEST_ASSERT(!r, "KVM_ENABLE_CAP vCPU ioctl failed,\n"
    161			"  rc: %i, errno: %i", r, errno);
    162
    163	return r;
    164}
    165
    166void vm_enable_dirty_ring(struct kvm_vm *vm, uint32_t ring_size)
    167{
    168	struct kvm_enable_cap cap = { 0 };
    169
    170	cap.cap = KVM_CAP_DIRTY_LOG_RING;
    171	cap.args[0] = ring_size;
    172	vm_enable_cap(vm, &cap);
    173	vm->dirty_ring_size = ring_size;
    174}
    175
    176static void vm_open(struct kvm_vm *vm, int perm)
    177{
    178	vm->kvm_fd = _open_kvm_dev_path_or_exit(perm);
    179
    180	if (!kvm_check_cap(KVM_CAP_IMMEDIATE_EXIT)) {
    181		print_skip("immediate_exit not available");
    182		exit(KSFT_SKIP);
    183	}
    184
    185	vm->fd = ioctl(vm->kvm_fd, KVM_CREATE_VM, vm->type);
    186	TEST_ASSERT(vm->fd >= 0, "KVM_CREATE_VM ioctl failed, "
    187		"rc: %i errno: %i", vm->fd, errno);
    188}
    189
    190const char *vm_guest_mode_string(uint32_t i)
    191{
    192	static const char * const strings[] = {
    193		[VM_MODE_P52V48_4K]	= "PA-bits:52,  VA-bits:48,  4K pages",
    194		[VM_MODE_P52V48_64K]	= "PA-bits:52,  VA-bits:48, 64K pages",
    195		[VM_MODE_P48V48_4K]	= "PA-bits:48,  VA-bits:48,  4K pages",
    196		[VM_MODE_P48V48_16K]	= "PA-bits:48,  VA-bits:48, 16K pages",
    197		[VM_MODE_P48V48_64K]	= "PA-bits:48,  VA-bits:48, 64K pages",
    198		[VM_MODE_P40V48_4K]	= "PA-bits:40,  VA-bits:48,  4K pages",
    199		[VM_MODE_P40V48_16K]	= "PA-bits:40,  VA-bits:48, 16K pages",
    200		[VM_MODE_P40V48_64K]	= "PA-bits:40,  VA-bits:48, 64K pages",
    201		[VM_MODE_PXXV48_4K]	= "PA-bits:ANY, VA-bits:48,  4K pages",
    202		[VM_MODE_P47V64_4K]	= "PA-bits:47,  VA-bits:64,  4K pages",
    203		[VM_MODE_P44V64_4K]	= "PA-bits:44,  VA-bits:64,  4K pages",
    204		[VM_MODE_P36V48_4K]	= "PA-bits:36,  VA-bits:48,  4K pages",
    205		[VM_MODE_P36V48_16K]	= "PA-bits:36,  VA-bits:48, 16K pages",
    206		[VM_MODE_P36V48_64K]	= "PA-bits:36,  VA-bits:48, 64K pages",
    207		[VM_MODE_P36V47_16K]	= "PA-bits:36,  VA-bits:47, 16K pages",
    208	};
    209	_Static_assert(sizeof(strings)/sizeof(char *) == NUM_VM_MODES,
    210		       "Missing new mode strings?");
    211
    212	TEST_ASSERT(i < NUM_VM_MODES, "Guest mode ID %d too big", i);
    213
    214	return strings[i];
    215}
    216
    217const struct vm_guest_mode_params vm_guest_mode_params[] = {
    218	[VM_MODE_P52V48_4K]	= { 52, 48,  0x1000, 12 },
    219	[VM_MODE_P52V48_64K]	= { 52, 48, 0x10000, 16 },
    220	[VM_MODE_P48V48_4K]	= { 48, 48,  0x1000, 12 },
    221	[VM_MODE_P48V48_16K]	= { 48, 48,  0x4000, 14 },
    222	[VM_MODE_P48V48_64K]	= { 48, 48, 0x10000, 16 },
    223	[VM_MODE_P40V48_4K]	= { 40, 48,  0x1000, 12 },
    224	[VM_MODE_P40V48_16K]	= { 40, 48,  0x4000, 14 },
    225	[VM_MODE_P40V48_64K]	= { 40, 48, 0x10000, 16 },
    226	[VM_MODE_PXXV48_4K]	= {  0,  0,  0x1000, 12 },
    227	[VM_MODE_P47V64_4K]	= { 47, 64,  0x1000, 12 },
    228	[VM_MODE_P44V64_4K]	= { 44, 64,  0x1000, 12 },
    229	[VM_MODE_P36V48_4K]	= { 36, 48,  0x1000, 12 },
    230	[VM_MODE_P36V48_16K]	= { 36, 48,  0x4000, 14 },
    231	[VM_MODE_P36V48_64K]	= { 36, 48, 0x10000, 16 },
    232	[VM_MODE_P36V47_16K]	= { 36, 47,  0x4000, 14 },
    233};
    234_Static_assert(sizeof(vm_guest_mode_params)/sizeof(struct vm_guest_mode_params) == NUM_VM_MODES,
    235	       "Missing new mode params?");
    236
    237/*
    238 * VM Create
    239 *
    240 * Input Args:
    241 *   mode - VM Mode (e.g. VM_MODE_P52V48_4K)
    242 *   phy_pages - Physical memory pages
    243 *   perm - permission
    244 *
    245 * Output Args: None
    246 *
    247 * Return:
    248 *   Pointer to opaque structure that describes the created VM.
    249 *
    250 * Creates a VM with the mode specified by mode (e.g. VM_MODE_P52V48_4K).
    251 * When phy_pages is non-zero, a memory region of phy_pages physical pages
    252 * is created and mapped starting at guest physical address 0.  The file
    253 * descriptor to control the created VM is created with the permissions
    254 * given by perm (e.g. O_RDWR).
    255 */
    256struct kvm_vm *vm_create(enum vm_guest_mode mode, uint64_t phy_pages, int perm)
    257{
    258	struct kvm_vm *vm;
    259
    260	pr_debug("%s: mode='%s' pages='%ld' perm='%d'\n", __func__,
    261		 vm_guest_mode_string(mode), phy_pages, perm);
    262
    263	vm = calloc(1, sizeof(*vm));
    264	TEST_ASSERT(vm != NULL, "Insufficient Memory");
    265
    266	INIT_LIST_HEAD(&vm->vcpus);
    267	vm->regions.gpa_tree = RB_ROOT;
    268	vm->regions.hva_tree = RB_ROOT;
    269	hash_init(vm->regions.slot_hash);
    270
    271	vm->mode = mode;
    272	vm->type = 0;
    273
    274	vm->pa_bits = vm_guest_mode_params[mode].pa_bits;
    275	vm->va_bits = vm_guest_mode_params[mode].va_bits;
    276	vm->page_size = vm_guest_mode_params[mode].page_size;
    277	vm->page_shift = vm_guest_mode_params[mode].page_shift;
    278
    279	/* Setup mode specific traits. */
    280	switch (vm->mode) {
    281	case VM_MODE_P52V48_4K:
    282		vm->pgtable_levels = 4;
    283		break;
    284	case VM_MODE_P52V48_64K:
    285		vm->pgtable_levels = 3;
    286		break;
    287	case VM_MODE_P48V48_4K:
    288		vm->pgtable_levels = 4;
    289		break;
    290	case VM_MODE_P48V48_64K:
    291		vm->pgtable_levels = 3;
    292		break;
    293	case VM_MODE_P40V48_4K:
    294	case VM_MODE_P36V48_4K:
    295		vm->pgtable_levels = 4;
    296		break;
    297	case VM_MODE_P40V48_64K:
    298	case VM_MODE_P36V48_64K:
    299		vm->pgtable_levels = 3;
    300		break;
    301	case VM_MODE_P48V48_16K:
    302	case VM_MODE_P40V48_16K:
    303	case VM_MODE_P36V48_16K:
    304		vm->pgtable_levels = 4;
    305		break;
    306	case VM_MODE_P36V47_16K:
    307		vm->pgtable_levels = 3;
    308		break;
    309	case VM_MODE_PXXV48_4K:
    310#ifdef __x86_64__
    311		kvm_get_cpu_address_width(&vm->pa_bits, &vm->va_bits);
    312		/*
    313		 * Ignore KVM support for 5-level paging (vm->va_bits == 57),
    314		 * it doesn't take effect unless a CR4.LA57 is set, which it
    315		 * isn't for this VM_MODE.
    316		 */
    317		TEST_ASSERT(vm->va_bits == 48 || vm->va_bits == 57,
    318			    "Linear address width (%d bits) not supported",
    319			    vm->va_bits);
    320		pr_debug("Guest physical address width detected: %d\n",
    321			 vm->pa_bits);
    322		vm->pgtable_levels = 4;
    323		vm->va_bits = 48;
    324#else
    325		TEST_FAIL("VM_MODE_PXXV48_4K not supported on non-x86 platforms");
    326#endif
    327		break;
    328	case VM_MODE_P47V64_4K:
    329		vm->pgtable_levels = 5;
    330		break;
    331	case VM_MODE_P44V64_4K:
    332		vm->pgtable_levels = 5;
    333		break;
    334	default:
    335		TEST_FAIL("Unknown guest mode, mode: 0x%x", mode);
    336	}
    337
    338#ifdef __aarch64__
    339	if (vm->pa_bits != 40)
    340		vm->type = KVM_VM_TYPE_ARM_IPA_SIZE(vm->pa_bits);
    341#endif
    342
    343	vm_open(vm, perm);
    344
    345	/* Limit to VA-bit canonical virtual addresses. */
    346	vm->vpages_valid = sparsebit_alloc();
    347	sparsebit_set_num(vm->vpages_valid,
    348		0, (1ULL << (vm->va_bits - 1)) >> vm->page_shift);
    349	sparsebit_set_num(vm->vpages_valid,
    350		(~((1ULL << (vm->va_bits - 1)) - 1)) >> vm->page_shift,
    351		(1ULL << (vm->va_bits - 1)) >> vm->page_shift);
    352
    353	/* Limit physical addresses to PA-bits. */
    354	vm->max_gfn = vm_compute_max_gfn(vm);
    355
    356	/* Allocate and setup memory for guest. */
    357	vm->vpages_mapped = sparsebit_alloc();
    358	if (phy_pages != 0)
    359		vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS,
    360					    0, 0, phy_pages, 0);
    361
    362	return vm;
    363}
    364
    365struct kvm_vm *vm_create_without_vcpus(enum vm_guest_mode mode, uint64_t pages)
    366{
    367	struct kvm_vm *vm;
    368
    369	vm = vm_create(mode, pages, O_RDWR);
    370
    371	kvm_vm_elf_load(vm, program_invocation_name);
    372
    373#ifdef __x86_64__
    374	vm_create_irqchip(vm);
    375#endif
    376	return vm;
    377}
    378
    379/*
    380 * VM Create with customized parameters
    381 *
    382 * Input Args:
    383 *   mode - VM Mode (e.g. VM_MODE_P52V48_4K)
    384 *   nr_vcpus - VCPU count
    385 *   slot0_mem_pages - Slot0 physical memory size
    386 *   extra_mem_pages - Non-slot0 physical memory total size
    387 *   num_percpu_pages - Per-cpu physical memory pages
    388 *   guest_code - Guest entry point
    389 *   vcpuids - VCPU IDs
    390 *
    391 * Output Args: None
    392 *
    393 * Return:
    394 *   Pointer to opaque structure that describes the created VM.
    395 *
    396 * Creates a VM with the mode specified by mode (e.g. VM_MODE_P52V48_4K),
    397 * with customized slot0 memory size, at least 512 pages currently.
    398 * extra_mem_pages is only used to calculate the maximum page table size,
    399 * no real memory allocation for non-slot0 memory in this function.
    400 */
    401struct kvm_vm *vm_create_with_vcpus(enum vm_guest_mode mode, uint32_t nr_vcpus,
    402				    uint64_t slot0_mem_pages, uint64_t extra_mem_pages,
    403				    uint32_t num_percpu_pages, void *guest_code,
    404				    uint32_t vcpuids[])
    405{
    406	uint64_t vcpu_pages, extra_pg_pages, pages;
    407	struct kvm_vm *vm;
    408	int i;
    409
    410	/* Force slot0 memory size not small than DEFAULT_GUEST_PHY_PAGES */
    411	if (slot0_mem_pages < DEFAULT_GUEST_PHY_PAGES)
    412		slot0_mem_pages = DEFAULT_GUEST_PHY_PAGES;
    413
    414	/* The maximum page table size for a memory region will be when the
    415	 * smallest pages are used. Considering each page contains x page
    416	 * table descriptors, the total extra size for page tables (for extra
    417	 * N pages) will be: N/x+N/x^2+N/x^3+... which is definitely smaller
    418	 * than N/x*2.
    419	 */
    420	vcpu_pages = (DEFAULT_STACK_PGS + num_percpu_pages) * nr_vcpus;
    421	extra_pg_pages = (slot0_mem_pages + extra_mem_pages + vcpu_pages) / PTES_PER_MIN_PAGE * 2;
    422	pages = slot0_mem_pages + vcpu_pages + extra_pg_pages;
    423
    424	TEST_ASSERT(nr_vcpus <= kvm_check_cap(KVM_CAP_MAX_VCPUS),
    425		    "nr_vcpus = %d too large for host, max-vcpus = %d",
    426		    nr_vcpus, kvm_check_cap(KVM_CAP_MAX_VCPUS));
    427
    428	pages = vm_adjust_num_guest_pages(mode, pages);
    429
    430	vm = vm_create_without_vcpus(mode, pages);
    431
    432	for (i = 0; i < nr_vcpus; ++i) {
    433		uint32_t vcpuid = vcpuids ? vcpuids[i] : i;
    434
    435		vm_vcpu_add_default(vm, vcpuid, guest_code);
    436	}
    437
    438	return vm;
    439}
    440
    441struct kvm_vm *vm_create_default_with_vcpus(uint32_t nr_vcpus, uint64_t extra_mem_pages,
    442					    uint32_t num_percpu_pages, void *guest_code,
    443					    uint32_t vcpuids[])
    444{
    445	return vm_create_with_vcpus(VM_MODE_DEFAULT, nr_vcpus, DEFAULT_GUEST_PHY_PAGES,
    446				    extra_mem_pages, num_percpu_pages, guest_code, vcpuids);
    447}
    448
    449struct kvm_vm *vm_create_default(uint32_t vcpuid, uint64_t extra_mem_pages,
    450				 void *guest_code)
    451{
    452	return vm_create_default_with_vcpus(1, extra_mem_pages, 0, guest_code,
    453					    (uint32_t []){ vcpuid });
    454}
    455
    456/*
    457 * VM Restart
    458 *
    459 * Input Args:
    460 *   vm - VM that has been released before
    461 *   perm - permission
    462 *
    463 * Output Args: None
    464 *
    465 * Reopens the file descriptors associated to the VM and reinstates the
    466 * global state, such as the irqchip and the memory regions that are mapped
    467 * into the guest.
    468 */
    469void kvm_vm_restart(struct kvm_vm *vmp, int perm)
    470{
    471	int ctr;
    472	struct userspace_mem_region *region;
    473
    474	vm_open(vmp, perm);
    475	if (vmp->has_irqchip)
    476		vm_create_irqchip(vmp);
    477
    478	hash_for_each(vmp->regions.slot_hash, ctr, region, slot_node) {
    479		int ret = ioctl(vmp->fd, KVM_SET_USER_MEMORY_REGION, &region->region);
    480		TEST_ASSERT(ret == 0, "KVM_SET_USER_MEMORY_REGION IOCTL failed,\n"
    481			    "  rc: %i errno: %i\n"
    482			    "  slot: %u flags: 0x%x\n"
    483			    "  guest_phys_addr: 0x%llx size: 0x%llx",
    484			    ret, errno, region->region.slot,
    485			    region->region.flags,
    486			    region->region.guest_phys_addr,
    487			    region->region.memory_size);
    488	}
    489}
    490
    491void kvm_vm_get_dirty_log(struct kvm_vm *vm, int slot, void *log)
    492{
    493	struct kvm_dirty_log args = { .dirty_bitmap = log, .slot = slot };
    494	int ret;
    495
    496	ret = ioctl(vm->fd, KVM_GET_DIRTY_LOG, &args);
    497	TEST_ASSERT(ret == 0, "%s: KVM_GET_DIRTY_LOG failed: %s",
    498		    __func__, strerror(-ret));
    499}
    500
    501void kvm_vm_clear_dirty_log(struct kvm_vm *vm, int slot, void *log,
    502			    uint64_t first_page, uint32_t num_pages)
    503{
    504	struct kvm_clear_dirty_log args = {
    505		.dirty_bitmap = log, .slot = slot,
    506		.first_page = first_page,
    507		.num_pages = num_pages
    508	};
    509	int ret;
    510
    511	ret = ioctl(vm->fd, KVM_CLEAR_DIRTY_LOG, &args);
    512	TEST_ASSERT(ret == 0, "%s: KVM_CLEAR_DIRTY_LOG failed: %s",
    513		    __func__, strerror(-ret));
    514}
    515
    516uint32_t kvm_vm_reset_dirty_ring(struct kvm_vm *vm)
    517{
    518	return ioctl(vm->fd, KVM_RESET_DIRTY_RINGS);
    519}
    520
    521/*
    522 * Userspace Memory Region Find
    523 *
    524 * Input Args:
    525 *   vm - Virtual Machine
    526 *   start - Starting VM physical address
    527 *   end - Ending VM physical address, inclusive.
    528 *
    529 * Output Args: None
    530 *
    531 * Return:
    532 *   Pointer to overlapping region, NULL if no such region.
    533 *
    534 * Searches for a region with any physical memory that overlaps with
    535 * any portion of the guest physical addresses from start to end
    536 * inclusive.  If multiple overlapping regions exist, a pointer to any
    537 * of the regions is returned.  Null is returned only when no overlapping
    538 * region exists.
    539 */
    540static struct userspace_mem_region *
    541userspace_mem_region_find(struct kvm_vm *vm, uint64_t start, uint64_t end)
    542{
    543	struct rb_node *node;
    544
    545	for (node = vm->regions.gpa_tree.rb_node; node; ) {
    546		struct userspace_mem_region *region =
    547			container_of(node, struct userspace_mem_region, gpa_node);
    548		uint64_t existing_start = region->region.guest_phys_addr;
    549		uint64_t existing_end = region->region.guest_phys_addr
    550			+ region->region.memory_size - 1;
    551		if (start <= existing_end && end >= existing_start)
    552			return region;
    553
    554		if (start < existing_start)
    555			node = node->rb_left;
    556		else
    557			node = node->rb_right;
    558	}
    559
    560	return NULL;
    561}
    562
    563/*
    564 * KVM Userspace Memory Region Find
    565 *
    566 * Input Args:
    567 *   vm - Virtual Machine
    568 *   start - Starting VM physical address
    569 *   end - Ending VM physical address, inclusive.
    570 *
    571 * Output Args: None
    572 *
    573 * Return:
    574 *   Pointer to overlapping region, NULL if no such region.
    575 *
    576 * Public interface to userspace_mem_region_find. Allows tests to look up
    577 * the memslot datastructure for a given range of guest physical memory.
    578 */
    579struct kvm_userspace_memory_region *
    580kvm_userspace_memory_region_find(struct kvm_vm *vm, uint64_t start,
    581				 uint64_t end)
    582{
    583	struct userspace_mem_region *region;
    584
    585	region = userspace_mem_region_find(vm, start, end);
    586	if (!region)
    587		return NULL;
    588
    589	return &region->region;
    590}
    591
    592/*
    593 * VCPU Find
    594 *
    595 * Input Args:
    596 *   vm - Virtual Machine
    597 *   vcpuid - VCPU ID
    598 *
    599 * Output Args: None
    600 *
    601 * Return:
    602 *   Pointer to VCPU structure
    603 *
    604 * Locates a vcpu structure that describes the VCPU specified by vcpuid and
    605 * returns a pointer to it.  Returns NULL if the VM doesn't contain a VCPU
    606 * for the specified vcpuid.
    607 */
    608struct vcpu *vcpu_find(struct kvm_vm *vm, uint32_t vcpuid)
    609{
    610	struct vcpu *vcpu;
    611
    612	list_for_each_entry(vcpu, &vm->vcpus, list) {
    613		if (vcpu->id == vcpuid)
    614			return vcpu;
    615	}
    616
    617	return NULL;
    618}
    619
    620/*
    621 * VM VCPU Remove
    622 *
    623 * Input Args:
    624 *   vcpu - VCPU to remove
    625 *
    626 * Output Args: None
    627 *
    628 * Return: None, TEST_ASSERT failures for all error conditions
    629 *
    630 * Removes a vCPU from a VM and frees its resources.
    631 */
    632static void vm_vcpu_rm(struct kvm_vm *vm, struct vcpu *vcpu)
    633{
    634	int ret;
    635
    636	if (vcpu->dirty_gfns) {
    637		ret = munmap(vcpu->dirty_gfns, vm->dirty_ring_size);
    638		TEST_ASSERT(ret == 0, "munmap of VCPU dirty ring failed, "
    639			    "rc: %i errno: %i", ret, errno);
    640		vcpu->dirty_gfns = NULL;
    641	}
    642
    643	ret = munmap(vcpu->state, vcpu_mmap_sz());
    644	TEST_ASSERT(ret == 0, "munmap of VCPU fd failed, rc: %i "
    645		"errno: %i", ret, errno);
    646	ret = close(vcpu->fd);
    647	TEST_ASSERT(ret == 0, "Close of VCPU fd failed, rc: %i "
    648		"errno: %i", ret, errno);
    649
    650	list_del(&vcpu->list);
    651	free(vcpu);
    652}
    653
    654void kvm_vm_release(struct kvm_vm *vmp)
    655{
    656	struct vcpu *vcpu, *tmp;
    657	int ret;
    658
    659	list_for_each_entry_safe(vcpu, tmp, &vmp->vcpus, list)
    660		vm_vcpu_rm(vmp, vcpu);
    661
    662	ret = close(vmp->fd);
    663	TEST_ASSERT(ret == 0, "Close of vm fd failed,\n"
    664		"  vmp->fd: %i rc: %i errno: %i", vmp->fd, ret, errno);
    665
    666	ret = close(vmp->kvm_fd);
    667	TEST_ASSERT(ret == 0, "Close of /dev/kvm fd failed,\n"
    668		"  vmp->kvm_fd: %i rc: %i errno: %i", vmp->kvm_fd, ret, errno);
    669}
    670
    671static void __vm_mem_region_delete(struct kvm_vm *vm,
    672				   struct userspace_mem_region *region,
    673				   bool unlink)
    674{
    675	int ret;
    676
    677	if (unlink) {
    678		rb_erase(&region->gpa_node, &vm->regions.gpa_tree);
    679		rb_erase(&region->hva_node, &vm->regions.hva_tree);
    680		hash_del(&region->slot_node);
    681	}
    682
    683	region->region.memory_size = 0;
    684	ret = ioctl(vm->fd, KVM_SET_USER_MEMORY_REGION, &region->region);
    685	TEST_ASSERT(ret == 0, "KVM_SET_USER_MEMORY_REGION IOCTL failed, "
    686		    "rc: %i errno: %i", ret, errno);
    687
    688	sparsebit_free(&region->unused_phy_pages);
    689	ret = munmap(region->mmap_start, region->mmap_size);
    690	TEST_ASSERT(ret == 0, "munmap failed, rc: %i errno: %i", ret, errno);
    691
    692	free(region);
    693}
    694
    695/*
    696 * Destroys and frees the VM pointed to by vmp.
    697 */
    698void kvm_vm_free(struct kvm_vm *vmp)
    699{
    700	int ctr;
    701	struct hlist_node *node;
    702	struct userspace_mem_region *region;
    703
    704	if (vmp == NULL)
    705		return;
    706
    707	/* Free userspace_mem_regions. */
    708	hash_for_each_safe(vmp->regions.slot_hash, ctr, node, region, slot_node)
    709		__vm_mem_region_delete(vmp, region, false);
    710
    711	/* Free sparsebit arrays. */
    712	sparsebit_free(&vmp->vpages_valid);
    713	sparsebit_free(&vmp->vpages_mapped);
    714
    715	kvm_vm_release(vmp);
    716
    717	/* Free the structure describing the VM. */
    718	free(vmp);
    719}
    720
    721int kvm_memfd_alloc(size_t size, bool hugepages)
    722{
    723	int memfd_flags = MFD_CLOEXEC;
    724	int fd, r;
    725
    726	if (hugepages)
    727		memfd_flags |= MFD_HUGETLB;
    728
    729	fd = memfd_create("kvm_selftest", memfd_flags);
    730	TEST_ASSERT(fd != -1, "memfd_create() failed, errno: %i (%s)",
    731		    errno, strerror(errno));
    732
    733	r = ftruncate(fd, size);
    734	TEST_ASSERT(!r, "ftruncate() failed, errno: %i (%s)", errno, strerror(errno));
    735
    736	r = fallocate(fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, 0, size);
    737	TEST_ASSERT(!r, "fallocate() failed, errno: %i (%s)", errno, strerror(errno));
    738
    739	return fd;
    740}
    741
    742/*
    743 * Memory Compare, host virtual to guest virtual
    744 *
    745 * Input Args:
    746 *   hva - Starting host virtual address
    747 *   vm - Virtual Machine
    748 *   gva - Starting guest virtual address
    749 *   len - number of bytes to compare
    750 *
    751 * Output Args: None
    752 *
    753 * Input/Output Args: None
    754 *
    755 * Return:
    756 *   Returns 0 if the bytes starting at hva for a length of len
    757 *   are equal the guest virtual bytes starting at gva.  Returns
    758 *   a value < 0, if bytes at hva are less than those at gva.
    759 *   Otherwise a value > 0 is returned.
    760 *
    761 * Compares the bytes starting at the host virtual address hva, for
    762 * a length of len, to the guest bytes starting at the guest virtual
    763 * address given by gva.
    764 */
    765int kvm_memcmp_hva_gva(void *hva, struct kvm_vm *vm, vm_vaddr_t gva, size_t len)
    766{
    767	size_t amt;
    768
    769	/*
    770	 * Compare a batch of bytes until either a match is found
    771	 * or all the bytes have been compared.
    772	 */
    773	for (uintptr_t offset = 0; offset < len; offset += amt) {
    774		uintptr_t ptr1 = (uintptr_t)hva + offset;
    775
    776		/*
    777		 * Determine host address for guest virtual address
    778		 * at offset.
    779		 */
    780		uintptr_t ptr2 = (uintptr_t)addr_gva2hva(vm, gva + offset);
    781
    782		/*
    783		 * Determine amount to compare on this pass.
    784		 * Don't allow the comparsion to cross a page boundary.
    785		 */
    786		amt = len - offset;
    787		if ((ptr1 >> vm->page_shift) != ((ptr1 + amt) >> vm->page_shift))
    788			amt = vm->page_size - (ptr1 % vm->page_size);
    789		if ((ptr2 >> vm->page_shift) != ((ptr2 + amt) >> vm->page_shift))
    790			amt = vm->page_size - (ptr2 % vm->page_size);
    791
    792		assert((ptr1 >> vm->page_shift) == ((ptr1 + amt - 1) >> vm->page_shift));
    793		assert((ptr2 >> vm->page_shift) == ((ptr2 + amt - 1) >> vm->page_shift));
    794
    795		/*
    796		 * Perform the comparison.  If there is a difference
    797		 * return that result to the caller, otherwise need
    798		 * to continue on looking for a mismatch.
    799		 */
    800		int ret = memcmp((void *)ptr1, (void *)ptr2, amt);
    801		if (ret != 0)
    802			return ret;
    803	}
    804
    805	/*
    806	 * No mismatch found.  Let the caller know the two memory
    807	 * areas are equal.
    808	 */
    809	return 0;
    810}
    811
    812static void vm_userspace_mem_region_gpa_insert(struct rb_root *gpa_tree,
    813					       struct userspace_mem_region *region)
    814{
    815	struct rb_node **cur, *parent;
    816
    817	for (cur = &gpa_tree->rb_node, parent = NULL; *cur; ) {
    818		struct userspace_mem_region *cregion;
    819
    820		cregion = container_of(*cur, typeof(*cregion), gpa_node);
    821		parent = *cur;
    822		if (region->region.guest_phys_addr <
    823		    cregion->region.guest_phys_addr)
    824			cur = &(*cur)->rb_left;
    825		else {
    826			TEST_ASSERT(region->region.guest_phys_addr !=
    827				    cregion->region.guest_phys_addr,
    828				    "Duplicate GPA in region tree");
    829
    830			cur = &(*cur)->rb_right;
    831		}
    832	}
    833
    834	rb_link_node(&region->gpa_node, parent, cur);
    835	rb_insert_color(&region->gpa_node, gpa_tree);
    836}
    837
    838static void vm_userspace_mem_region_hva_insert(struct rb_root *hva_tree,
    839					       struct userspace_mem_region *region)
    840{
    841	struct rb_node **cur, *parent;
    842
    843	for (cur = &hva_tree->rb_node, parent = NULL; *cur; ) {
    844		struct userspace_mem_region *cregion;
    845
    846		cregion = container_of(*cur, typeof(*cregion), hva_node);
    847		parent = *cur;
    848		if (region->host_mem < cregion->host_mem)
    849			cur = &(*cur)->rb_left;
    850		else {
    851			TEST_ASSERT(region->host_mem !=
    852				    cregion->host_mem,
    853				    "Duplicate HVA in region tree");
    854
    855			cur = &(*cur)->rb_right;
    856		}
    857	}
    858
    859	rb_link_node(&region->hva_node, parent, cur);
    860	rb_insert_color(&region->hva_node, hva_tree);
    861}
    862
    863
    864int __vm_set_user_memory_region(struct kvm_vm *vm, uint32_t slot, uint32_t flags,
    865				uint64_t gpa, uint64_t size, void *hva)
    866{
    867	struct kvm_userspace_memory_region region = {
    868		.slot = slot,
    869		.flags = flags,
    870		.guest_phys_addr = gpa,
    871		.memory_size = size,
    872		.userspace_addr = (uintptr_t)hva,
    873	};
    874
    875	return ioctl(vm->fd, KVM_SET_USER_MEMORY_REGION, &region);
    876}
    877
    878void vm_set_user_memory_region(struct kvm_vm *vm, uint32_t slot, uint32_t flags,
    879			       uint64_t gpa, uint64_t size, void *hva)
    880{
    881	int ret = __vm_set_user_memory_region(vm, slot, flags, gpa, size, hva);
    882
    883	TEST_ASSERT(!ret, "KVM_SET_USER_MEMORY_REGION failed, errno = %d (%s)",
    884		    errno, strerror(errno));
    885}
    886
    887/*
    888 * VM Userspace Memory Region Add
    889 *
    890 * Input Args:
    891 *   vm - Virtual Machine
    892 *   src_type - Storage source for this region.
    893 *              NULL to use anonymous memory.
    894 *   guest_paddr - Starting guest physical address
    895 *   slot - KVM region slot
    896 *   npages - Number of physical pages
    897 *   flags - KVM memory region flags (e.g. KVM_MEM_LOG_DIRTY_PAGES)
    898 *
    899 * Output Args: None
    900 *
    901 * Return: None
    902 *
    903 * Allocates a memory area of the number of pages specified by npages
    904 * and maps it to the VM specified by vm, at a starting physical address
    905 * given by guest_paddr.  The region is created with a KVM region slot
    906 * given by slot, which must be unique and < KVM_MEM_SLOTS_NUM.  The
    907 * region is created with the flags given by flags.
    908 */
    909void vm_userspace_mem_region_add(struct kvm_vm *vm,
    910	enum vm_mem_backing_src_type src_type,
    911	uint64_t guest_paddr, uint32_t slot, uint64_t npages,
    912	uint32_t flags)
    913{
    914	int ret;
    915	struct userspace_mem_region *region;
    916	size_t backing_src_pagesz = get_backing_src_pagesz(src_type);
    917	size_t alignment;
    918
    919	TEST_ASSERT(vm_adjust_num_guest_pages(vm->mode, npages) == npages,
    920		"Number of guest pages is not compatible with the host. "
    921		"Try npages=%d", vm_adjust_num_guest_pages(vm->mode, npages));
    922
    923	TEST_ASSERT((guest_paddr % vm->page_size) == 0, "Guest physical "
    924		"address not on a page boundary.\n"
    925		"  guest_paddr: 0x%lx vm->page_size: 0x%x",
    926		guest_paddr, vm->page_size);
    927	TEST_ASSERT((((guest_paddr >> vm->page_shift) + npages) - 1)
    928		<= vm->max_gfn, "Physical range beyond maximum "
    929		"supported physical address,\n"
    930		"  guest_paddr: 0x%lx npages: 0x%lx\n"
    931		"  vm->max_gfn: 0x%lx vm->page_size: 0x%x",
    932		guest_paddr, npages, vm->max_gfn, vm->page_size);
    933
    934	/*
    935	 * Confirm a mem region with an overlapping address doesn't
    936	 * already exist.
    937	 */
    938	region = (struct userspace_mem_region *) userspace_mem_region_find(
    939		vm, guest_paddr, (guest_paddr + npages * vm->page_size) - 1);
    940	if (region != NULL)
    941		TEST_FAIL("overlapping userspace_mem_region already "
    942			"exists\n"
    943			"  requested guest_paddr: 0x%lx npages: 0x%lx "
    944			"page_size: 0x%x\n"
    945			"  existing guest_paddr: 0x%lx size: 0x%lx",
    946			guest_paddr, npages, vm->page_size,
    947			(uint64_t) region->region.guest_phys_addr,
    948			(uint64_t) region->region.memory_size);
    949
    950	/* Confirm no region with the requested slot already exists. */
    951	hash_for_each_possible(vm->regions.slot_hash, region, slot_node,
    952			       slot) {
    953		if (region->region.slot != slot)
    954			continue;
    955
    956		TEST_FAIL("A mem region with the requested slot "
    957			"already exists.\n"
    958			"  requested slot: %u paddr: 0x%lx npages: 0x%lx\n"
    959			"  existing slot: %u paddr: 0x%lx size: 0x%lx",
    960			slot, guest_paddr, npages,
    961			region->region.slot,
    962			(uint64_t) region->region.guest_phys_addr,
    963			(uint64_t) region->region.memory_size);
    964	}
    965
    966	/* Allocate and initialize new mem region structure. */
    967	region = calloc(1, sizeof(*region));
    968	TEST_ASSERT(region != NULL, "Insufficient Memory");
    969	region->mmap_size = npages * vm->page_size;
    970
    971#ifdef __s390x__
    972	/* On s390x, the host address must be aligned to 1M (due to PGSTEs) */
    973	alignment = 0x100000;
    974#else
    975	alignment = 1;
    976#endif
    977
    978	/*
    979	 * When using THP mmap is not guaranteed to returned a hugepage aligned
    980	 * address so we have to pad the mmap. Padding is not needed for HugeTLB
    981	 * because mmap will always return an address aligned to the HugeTLB
    982	 * page size.
    983	 */
    984	if (src_type == VM_MEM_SRC_ANONYMOUS_THP)
    985		alignment = max(backing_src_pagesz, alignment);
    986
    987	ASSERT_EQ(guest_paddr, align_up(guest_paddr, backing_src_pagesz));
    988
    989	/* Add enough memory to align up if necessary */
    990	if (alignment > 1)
    991		region->mmap_size += alignment;
    992
    993	region->fd = -1;
    994	if (backing_src_is_shared(src_type))
    995		region->fd = kvm_memfd_alloc(region->mmap_size,
    996					     src_type == VM_MEM_SRC_SHARED_HUGETLB);
    997
    998	region->mmap_start = mmap(NULL, region->mmap_size,
    999				  PROT_READ | PROT_WRITE,
   1000				  vm_mem_backing_src_alias(src_type)->flag,
   1001				  region->fd, 0);
   1002	TEST_ASSERT(region->mmap_start != MAP_FAILED,
   1003		    "test_malloc failed, mmap_start: %p errno: %i",
   1004		    region->mmap_start, errno);
   1005
   1006	TEST_ASSERT(!is_backing_src_hugetlb(src_type) ||
   1007		    region->mmap_start == align_ptr_up(region->mmap_start, backing_src_pagesz),
   1008		    "mmap_start %p is not aligned to HugeTLB page size 0x%lx",
   1009		    region->mmap_start, backing_src_pagesz);
   1010
   1011	/* Align host address */
   1012	region->host_mem = align_ptr_up(region->mmap_start, alignment);
   1013
   1014	/* As needed perform madvise */
   1015	if ((src_type == VM_MEM_SRC_ANONYMOUS ||
   1016	     src_type == VM_MEM_SRC_ANONYMOUS_THP) && thp_configured()) {
   1017		ret = madvise(region->host_mem, npages * vm->page_size,
   1018			      src_type == VM_MEM_SRC_ANONYMOUS ? MADV_NOHUGEPAGE : MADV_HUGEPAGE);
   1019		TEST_ASSERT(ret == 0, "madvise failed, addr: %p length: 0x%lx src_type: %s",
   1020			    region->host_mem, npages * vm->page_size,
   1021			    vm_mem_backing_src_alias(src_type)->name);
   1022	}
   1023
   1024	region->unused_phy_pages = sparsebit_alloc();
   1025	sparsebit_set_num(region->unused_phy_pages,
   1026		guest_paddr >> vm->page_shift, npages);
   1027	region->region.slot = slot;
   1028	region->region.flags = flags;
   1029	region->region.guest_phys_addr = guest_paddr;
   1030	region->region.memory_size = npages * vm->page_size;
   1031	region->region.userspace_addr = (uintptr_t) region->host_mem;
   1032	ret = ioctl(vm->fd, KVM_SET_USER_MEMORY_REGION, &region->region);
   1033	TEST_ASSERT(ret == 0, "KVM_SET_USER_MEMORY_REGION IOCTL failed,\n"
   1034		"  rc: %i errno: %i\n"
   1035		"  slot: %u flags: 0x%x\n"
   1036		"  guest_phys_addr: 0x%lx size: 0x%lx",
   1037		ret, errno, slot, flags,
   1038		guest_paddr, (uint64_t) region->region.memory_size);
   1039
   1040	/* Add to quick lookup data structures */
   1041	vm_userspace_mem_region_gpa_insert(&vm->regions.gpa_tree, region);
   1042	vm_userspace_mem_region_hva_insert(&vm->regions.hva_tree, region);
   1043	hash_add(vm->regions.slot_hash, &region->slot_node, slot);
   1044
   1045	/* If shared memory, create an alias. */
   1046	if (region->fd >= 0) {
   1047		region->mmap_alias = mmap(NULL, region->mmap_size,
   1048					  PROT_READ | PROT_WRITE,
   1049					  vm_mem_backing_src_alias(src_type)->flag,
   1050					  region->fd, 0);
   1051		TEST_ASSERT(region->mmap_alias != MAP_FAILED,
   1052			    "mmap of alias failed, errno: %i", errno);
   1053
   1054		/* Align host alias address */
   1055		region->host_alias = align_ptr_up(region->mmap_alias, alignment);
   1056	}
   1057}
   1058
   1059/*
   1060 * Memslot to region
   1061 *
   1062 * Input Args:
   1063 *   vm - Virtual Machine
   1064 *   memslot - KVM memory slot ID
   1065 *
   1066 * Output Args: None
   1067 *
   1068 * Return:
   1069 *   Pointer to memory region structure that describe memory region
   1070 *   using kvm memory slot ID given by memslot.  TEST_ASSERT failure
   1071 *   on error (e.g. currently no memory region using memslot as a KVM
   1072 *   memory slot ID).
   1073 */
   1074struct userspace_mem_region *
   1075memslot2region(struct kvm_vm *vm, uint32_t memslot)
   1076{
   1077	struct userspace_mem_region *region;
   1078
   1079	hash_for_each_possible(vm->regions.slot_hash, region, slot_node,
   1080			       memslot)
   1081		if (region->region.slot == memslot)
   1082			return region;
   1083
   1084	fprintf(stderr, "No mem region with the requested slot found,\n"
   1085		"  requested slot: %u\n", memslot);
   1086	fputs("---- vm dump ----\n", stderr);
   1087	vm_dump(stderr, vm, 2);
   1088	TEST_FAIL("Mem region not found");
   1089	return NULL;
   1090}
   1091
   1092/*
   1093 * VM Memory Region Flags Set
   1094 *
   1095 * Input Args:
   1096 *   vm - Virtual Machine
   1097 *   flags - Starting guest physical address
   1098 *
   1099 * Output Args: None
   1100 *
   1101 * Return: None
   1102 *
   1103 * Sets the flags of the memory region specified by the value of slot,
   1104 * to the values given by flags.
   1105 */
   1106void vm_mem_region_set_flags(struct kvm_vm *vm, uint32_t slot, uint32_t flags)
   1107{
   1108	int ret;
   1109	struct userspace_mem_region *region;
   1110
   1111	region = memslot2region(vm, slot);
   1112
   1113	region->region.flags = flags;
   1114
   1115	ret = ioctl(vm->fd, KVM_SET_USER_MEMORY_REGION, &region->region);
   1116
   1117	TEST_ASSERT(ret == 0, "KVM_SET_USER_MEMORY_REGION IOCTL failed,\n"
   1118		"  rc: %i errno: %i slot: %u flags: 0x%x",
   1119		ret, errno, slot, flags);
   1120}
   1121
   1122/*
   1123 * VM Memory Region Move
   1124 *
   1125 * Input Args:
   1126 *   vm - Virtual Machine
   1127 *   slot - Slot of the memory region to move
   1128 *   new_gpa - Starting guest physical address
   1129 *
   1130 * Output Args: None
   1131 *
   1132 * Return: None
   1133 *
   1134 * Change the gpa of a memory region.
   1135 */
   1136void vm_mem_region_move(struct kvm_vm *vm, uint32_t slot, uint64_t new_gpa)
   1137{
   1138	struct userspace_mem_region *region;
   1139	int ret;
   1140
   1141	region = memslot2region(vm, slot);
   1142
   1143	region->region.guest_phys_addr = new_gpa;
   1144
   1145	ret = ioctl(vm->fd, KVM_SET_USER_MEMORY_REGION, &region->region);
   1146
   1147	TEST_ASSERT(!ret, "KVM_SET_USER_MEMORY_REGION failed\n"
   1148		    "ret: %i errno: %i slot: %u new_gpa: 0x%lx",
   1149		    ret, errno, slot, new_gpa);
   1150}
   1151
   1152/*
   1153 * VM Memory Region Delete
   1154 *
   1155 * Input Args:
   1156 *   vm - Virtual Machine
   1157 *   slot - Slot of the memory region to delete
   1158 *
   1159 * Output Args: None
   1160 *
   1161 * Return: None
   1162 *
   1163 * Delete a memory region.
   1164 */
   1165void vm_mem_region_delete(struct kvm_vm *vm, uint32_t slot)
   1166{
   1167	__vm_mem_region_delete(vm, memslot2region(vm, slot), true);
   1168}
   1169
   1170/*
   1171 * VCPU mmap Size
   1172 *
   1173 * Input Args: None
   1174 *
   1175 * Output Args: None
   1176 *
   1177 * Return:
   1178 *   Size of VCPU state
   1179 *
   1180 * Returns the size of the structure pointed to by the return value
   1181 * of vcpu_state().
   1182 */
   1183static int vcpu_mmap_sz(void)
   1184{
   1185	int dev_fd, ret;
   1186
   1187	dev_fd = open_kvm_dev_path_or_exit();
   1188
   1189	ret = ioctl(dev_fd, KVM_GET_VCPU_MMAP_SIZE, NULL);
   1190	TEST_ASSERT(ret >= sizeof(struct kvm_run),
   1191		"%s KVM_GET_VCPU_MMAP_SIZE ioctl failed, rc: %i errno: %i",
   1192		__func__, ret, errno);
   1193
   1194	close(dev_fd);
   1195
   1196	return ret;
   1197}
   1198
   1199/*
   1200 * VM VCPU Add
   1201 *
   1202 * Input Args:
   1203 *   vm - Virtual Machine
   1204 *   vcpuid - VCPU ID
   1205 *
   1206 * Output Args: None
   1207 *
   1208 * Return: None
   1209 *
   1210 * Adds a virtual CPU to the VM specified by vm with the ID given by vcpuid.
   1211 * No additional VCPU setup is done.
   1212 */
   1213void vm_vcpu_add(struct kvm_vm *vm, uint32_t vcpuid)
   1214{
   1215	struct vcpu *vcpu;
   1216
   1217	/* Confirm a vcpu with the specified id doesn't already exist. */
   1218	vcpu = vcpu_find(vm, vcpuid);
   1219	if (vcpu != NULL)
   1220		TEST_FAIL("vcpu with the specified id "
   1221			"already exists,\n"
   1222			"  requested vcpuid: %u\n"
   1223			"  existing vcpuid: %u state: %p",
   1224			vcpuid, vcpu->id, vcpu->state);
   1225
   1226	/* Allocate and initialize new vcpu structure. */
   1227	vcpu = calloc(1, sizeof(*vcpu));
   1228	TEST_ASSERT(vcpu != NULL, "Insufficient Memory");
   1229	vcpu->id = vcpuid;
   1230	vcpu->fd = ioctl(vm->fd, KVM_CREATE_VCPU, vcpuid);
   1231	TEST_ASSERT(vcpu->fd >= 0, "KVM_CREATE_VCPU failed, rc: %i errno: %i",
   1232		vcpu->fd, errno);
   1233
   1234	TEST_ASSERT(vcpu_mmap_sz() >= sizeof(*vcpu->state), "vcpu mmap size "
   1235		"smaller than expected, vcpu_mmap_sz: %i expected_min: %zi",
   1236		vcpu_mmap_sz(), sizeof(*vcpu->state));
   1237	vcpu->state = (struct kvm_run *) mmap(NULL, vcpu_mmap_sz(),
   1238		PROT_READ | PROT_WRITE, MAP_SHARED, vcpu->fd, 0);
   1239	TEST_ASSERT(vcpu->state != MAP_FAILED, "mmap vcpu_state failed, "
   1240		"vcpu id: %u errno: %i", vcpuid, errno);
   1241
   1242	/* Add to linked-list of VCPUs. */
   1243	list_add(&vcpu->list, &vm->vcpus);
   1244}
   1245
   1246/*
   1247 * VM Virtual Address Unused Gap
   1248 *
   1249 * Input Args:
   1250 *   vm - Virtual Machine
   1251 *   sz - Size (bytes)
   1252 *   vaddr_min - Minimum Virtual Address
   1253 *
   1254 * Output Args: None
   1255 *
   1256 * Return:
   1257 *   Lowest virtual address at or below vaddr_min, with at least
   1258 *   sz unused bytes.  TEST_ASSERT failure if no area of at least
   1259 *   size sz is available.
   1260 *
   1261 * Within the VM specified by vm, locates the lowest starting virtual
   1262 * address >= vaddr_min, that has at least sz unallocated bytes.  A
   1263 * TEST_ASSERT failure occurs for invalid input or no area of at least
   1264 * sz unallocated bytes >= vaddr_min is available.
   1265 */
   1266static vm_vaddr_t vm_vaddr_unused_gap(struct kvm_vm *vm, size_t sz,
   1267				      vm_vaddr_t vaddr_min)
   1268{
   1269	uint64_t pages = (sz + vm->page_size - 1) >> vm->page_shift;
   1270
   1271	/* Determine lowest permitted virtual page index. */
   1272	uint64_t pgidx_start = (vaddr_min + vm->page_size - 1) >> vm->page_shift;
   1273	if ((pgidx_start * vm->page_size) < vaddr_min)
   1274		goto no_va_found;
   1275
   1276	/* Loop over section with enough valid virtual page indexes. */
   1277	if (!sparsebit_is_set_num(vm->vpages_valid,
   1278		pgidx_start, pages))
   1279		pgidx_start = sparsebit_next_set_num(vm->vpages_valid,
   1280			pgidx_start, pages);
   1281	do {
   1282		/*
   1283		 * Are there enough unused virtual pages available at
   1284		 * the currently proposed starting virtual page index.
   1285		 * If not, adjust proposed starting index to next
   1286		 * possible.
   1287		 */
   1288		if (sparsebit_is_clear_num(vm->vpages_mapped,
   1289			pgidx_start, pages))
   1290			goto va_found;
   1291		pgidx_start = sparsebit_next_clear_num(vm->vpages_mapped,
   1292			pgidx_start, pages);
   1293		if (pgidx_start == 0)
   1294			goto no_va_found;
   1295
   1296		/*
   1297		 * If needed, adjust proposed starting virtual address,
   1298		 * to next range of valid virtual addresses.
   1299		 */
   1300		if (!sparsebit_is_set_num(vm->vpages_valid,
   1301			pgidx_start, pages)) {
   1302			pgidx_start = sparsebit_next_set_num(
   1303				vm->vpages_valid, pgidx_start, pages);
   1304			if (pgidx_start == 0)
   1305				goto no_va_found;
   1306		}
   1307	} while (pgidx_start != 0);
   1308
   1309no_va_found:
   1310	TEST_FAIL("No vaddr of specified pages available, pages: 0x%lx", pages);
   1311
   1312	/* NOT REACHED */
   1313	return -1;
   1314
   1315va_found:
   1316	TEST_ASSERT(sparsebit_is_set_num(vm->vpages_valid,
   1317		pgidx_start, pages),
   1318		"Unexpected, invalid virtual page index range,\n"
   1319		"  pgidx_start: 0x%lx\n"
   1320		"  pages: 0x%lx",
   1321		pgidx_start, pages);
   1322	TEST_ASSERT(sparsebit_is_clear_num(vm->vpages_mapped,
   1323		pgidx_start, pages),
   1324		"Unexpected, pages already mapped,\n"
   1325		"  pgidx_start: 0x%lx\n"
   1326		"  pages: 0x%lx",
   1327		pgidx_start, pages);
   1328
   1329	return pgidx_start * vm->page_size;
   1330}
   1331
   1332/*
   1333 * VM Virtual Address Allocate
   1334 *
   1335 * Input Args:
   1336 *   vm - Virtual Machine
   1337 *   sz - Size in bytes
   1338 *   vaddr_min - Minimum starting virtual address
   1339 *   data_memslot - Memory region slot for data pages
   1340 *   pgd_memslot - Memory region slot for new virtual translation tables
   1341 *
   1342 * Output Args: None
   1343 *
   1344 * Return:
   1345 *   Starting guest virtual address
   1346 *
   1347 * Allocates at least sz bytes within the virtual address space of the vm
   1348 * given by vm.  The allocated bytes are mapped to a virtual address >=
   1349 * the address given by vaddr_min.  Note that each allocation uses a
   1350 * a unique set of pages, with the minimum real allocation being at least
   1351 * a page.
   1352 */
   1353vm_vaddr_t vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min)
   1354{
   1355	uint64_t pages = (sz >> vm->page_shift) + ((sz % vm->page_size) != 0);
   1356
   1357	virt_pgd_alloc(vm);
   1358	vm_paddr_t paddr = vm_phy_pages_alloc(vm, pages,
   1359					      KVM_UTIL_MIN_PFN * vm->page_size, 0);
   1360
   1361	/*
   1362	 * Find an unused range of virtual page addresses of at least
   1363	 * pages in length.
   1364	 */
   1365	vm_vaddr_t vaddr_start = vm_vaddr_unused_gap(vm, sz, vaddr_min);
   1366
   1367	/* Map the virtual pages. */
   1368	for (vm_vaddr_t vaddr = vaddr_start; pages > 0;
   1369		pages--, vaddr += vm->page_size, paddr += vm->page_size) {
   1370
   1371		virt_pg_map(vm, vaddr, paddr);
   1372
   1373		sparsebit_set(vm->vpages_mapped,
   1374			vaddr >> vm->page_shift);
   1375	}
   1376
   1377	return vaddr_start;
   1378}
   1379
   1380/*
   1381 * VM Virtual Address Allocate Pages
   1382 *
   1383 * Input Args:
   1384 *   vm - Virtual Machine
   1385 *
   1386 * Output Args: None
   1387 *
   1388 * Return:
   1389 *   Starting guest virtual address
   1390 *
   1391 * Allocates at least N system pages worth of bytes within the virtual address
   1392 * space of the vm.
   1393 */
   1394vm_vaddr_t vm_vaddr_alloc_pages(struct kvm_vm *vm, int nr_pages)
   1395{
   1396	return vm_vaddr_alloc(vm, nr_pages * getpagesize(), KVM_UTIL_MIN_VADDR);
   1397}
   1398
   1399/*
   1400 * VM Virtual Address Allocate Page
   1401 *
   1402 * Input Args:
   1403 *   vm - Virtual Machine
   1404 *
   1405 * Output Args: None
   1406 *
   1407 * Return:
   1408 *   Starting guest virtual address
   1409 *
   1410 * Allocates at least one system page worth of bytes within the virtual address
   1411 * space of the vm.
   1412 */
   1413vm_vaddr_t vm_vaddr_alloc_page(struct kvm_vm *vm)
   1414{
   1415	return vm_vaddr_alloc_pages(vm, 1);
   1416}
   1417
   1418/*
   1419 * Map a range of VM virtual address to the VM's physical address
   1420 *
   1421 * Input Args:
   1422 *   vm - Virtual Machine
   1423 *   vaddr - Virtuall address to map
   1424 *   paddr - VM Physical Address
   1425 *   npages - The number of pages to map
   1426 *   pgd_memslot - Memory region slot for new virtual translation tables
   1427 *
   1428 * Output Args: None
   1429 *
   1430 * Return: None
   1431 *
   1432 * Within the VM given by @vm, creates a virtual translation for
   1433 * @npages starting at @vaddr to the page range starting at @paddr.
   1434 */
   1435void virt_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
   1436	      unsigned int npages)
   1437{
   1438	size_t page_size = vm->page_size;
   1439	size_t size = npages * page_size;
   1440
   1441	TEST_ASSERT(vaddr + size > vaddr, "Vaddr overflow");
   1442	TEST_ASSERT(paddr + size > paddr, "Paddr overflow");
   1443
   1444	while (npages--) {
   1445		virt_pg_map(vm, vaddr, paddr);
   1446		vaddr += page_size;
   1447		paddr += page_size;
   1448	}
   1449}
   1450
   1451/*
   1452 * Address VM Physical to Host Virtual
   1453 *
   1454 * Input Args:
   1455 *   vm - Virtual Machine
   1456 *   gpa - VM physical address
   1457 *
   1458 * Output Args: None
   1459 *
   1460 * Return:
   1461 *   Equivalent host virtual address
   1462 *
   1463 * Locates the memory region containing the VM physical address given
   1464 * by gpa, within the VM given by vm.  When found, the host virtual
   1465 * address providing the memory to the vm physical address is returned.
   1466 * A TEST_ASSERT failure occurs if no region containing gpa exists.
   1467 */
   1468void *addr_gpa2hva(struct kvm_vm *vm, vm_paddr_t gpa)
   1469{
   1470	struct userspace_mem_region *region;
   1471
   1472	region = userspace_mem_region_find(vm, gpa, gpa);
   1473	if (!region) {
   1474		TEST_FAIL("No vm physical memory at 0x%lx", gpa);
   1475		return NULL;
   1476	}
   1477
   1478	return (void *)((uintptr_t)region->host_mem
   1479		+ (gpa - region->region.guest_phys_addr));
   1480}
   1481
   1482/*
   1483 * Address Host Virtual to VM Physical
   1484 *
   1485 * Input Args:
   1486 *   vm - Virtual Machine
   1487 *   hva - Host virtual address
   1488 *
   1489 * Output Args: None
   1490 *
   1491 * Return:
   1492 *   Equivalent VM physical address
   1493 *
   1494 * Locates the memory region containing the host virtual address given
   1495 * by hva, within the VM given by vm.  When found, the equivalent
   1496 * VM physical address is returned. A TEST_ASSERT failure occurs if no
   1497 * region containing hva exists.
   1498 */
   1499vm_paddr_t addr_hva2gpa(struct kvm_vm *vm, void *hva)
   1500{
   1501	struct rb_node *node;
   1502
   1503	for (node = vm->regions.hva_tree.rb_node; node; ) {
   1504		struct userspace_mem_region *region =
   1505			container_of(node, struct userspace_mem_region, hva_node);
   1506
   1507		if (hva >= region->host_mem) {
   1508			if (hva <= (region->host_mem
   1509				+ region->region.memory_size - 1))
   1510				return (vm_paddr_t)((uintptr_t)
   1511					region->region.guest_phys_addr
   1512					+ (hva - (uintptr_t)region->host_mem));
   1513
   1514			node = node->rb_right;
   1515		} else
   1516			node = node->rb_left;
   1517	}
   1518
   1519	TEST_FAIL("No mapping to a guest physical address, hva: %p", hva);
   1520	return -1;
   1521}
   1522
   1523/*
   1524 * Address VM physical to Host Virtual *alias*.
   1525 *
   1526 * Input Args:
   1527 *   vm - Virtual Machine
   1528 *   gpa - VM physical address
   1529 *
   1530 * Output Args: None
   1531 *
   1532 * Return:
   1533 *   Equivalent address within the host virtual *alias* area, or NULL
   1534 *   (without failing the test) if the guest memory is not shared (so
   1535 *   no alias exists).
   1536 *
   1537 * When vm_create() and related functions are called with a shared memory
   1538 * src_type, we also create a writable, shared alias mapping of the
   1539 * underlying guest memory. This allows the host to manipulate guest memory
   1540 * without mapping that memory in the guest's address space. And, for
   1541 * userfaultfd-based demand paging, we can do so without triggering userfaults.
   1542 */
   1543void *addr_gpa2alias(struct kvm_vm *vm, vm_paddr_t gpa)
   1544{
   1545	struct userspace_mem_region *region;
   1546	uintptr_t offset;
   1547
   1548	region = userspace_mem_region_find(vm, gpa, gpa);
   1549	if (!region)
   1550		return NULL;
   1551
   1552	if (!region->host_alias)
   1553		return NULL;
   1554
   1555	offset = gpa - region->region.guest_phys_addr;
   1556	return (void *) ((uintptr_t) region->host_alias + offset);
   1557}
   1558
   1559/*
   1560 * VM Create IRQ Chip
   1561 *
   1562 * Input Args:
   1563 *   vm - Virtual Machine
   1564 *
   1565 * Output Args: None
   1566 *
   1567 * Return: None
   1568 *
   1569 * Creates an interrupt controller chip for the VM specified by vm.
   1570 */
   1571void vm_create_irqchip(struct kvm_vm *vm)
   1572{
   1573	int ret;
   1574
   1575	ret = ioctl(vm->fd, KVM_CREATE_IRQCHIP, 0);
   1576	TEST_ASSERT(ret == 0, "KVM_CREATE_IRQCHIP IOCTL failed, "
   1577		"rc: %i errno: %i", ret, errno);
   1578
   1579	vm->has_irqchip = true;
   1580}
   1581
   1582/*
   1583 * VM VCPU State
   1584 *
   1585 * Input Args:
   1586 *   vm - Virtual Machine
   1587 *   vcpuid - VCPU ID
   1588 *
   1589 * Output Args: None
   1590 *
   1591 * Return:
   1592 *   Pointer to structure that describes the state of the VCPU.
   1593 *
   1594 * Locates and returns a pointer to a structure that describes the
   1595 * state of the VCPU with the given vcpuid.
   1596 */
   1597struct kvm_run *vcpu_state(struct kvm_vm *vm, uint32_t vcpuid)
   1598{
   1599	struct vcpu *vcpu = vcpu_find(vm, vcpuid);
   1600	TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
   1601
   1602	return vcpu->state;
   1603}
   1604
   1605/*
   1606 * VM VCPU Run
   1607 *
   1608 * Input Args:
   1609 *   vm - Virtual Machine
   1610 *   vcpuid - VCPU ID
   1611 *
   1612 * Output Args: None
   1613 *
   1614 * Return: None
   1615 *
   1616 * Switch to executing the code for the VCPU given by vcpuid, within the VM
   1617 * given by vm.
   1618 */
   1619void vcpu_run(struct kvm_vm *vm, uint32_t vcpuid)
   1620{
   1621	int ret = _vcpu_run(vm, vcpuid);
   1622	TEST_ASSERT(ret == 0, "KVM_RUN IOCTL failed, "
   1623		"rc: %i errno: %i", ret, errno);
   1624}
   1625
   1626int _vcpu_run(struct kvm_vm *vm, uint32_t vcpuid)
   1627{
   1628	struct vcpu *vcpu = vcpu_find(vm, vcpuid);
   1629	int rc;
   1630
   1631	TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
   1632	do {
   1633		rc = ioctl(vcpu->fd, KVM_RUN, NULL);
   1634	} while (rc == -1 && errno == EINTR);
   1635
   1636	assert_on_unhandled_exception(vm, vcpuid);
   1637
   1638	return rc;
   1639}
   1640
   1641int vcpu_get_fd(struct kvm_vm *vm, uint32_t vcpuid)
   1642{
   1643	struct vcpu *vcpu = vcpu_find(vm, vcpuid);
   1644
   1645	TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
   1646
   1647	return vcpu->fd;
   1648}
   1649
   1650void vcpu_run_complete_io(struct kvm_vm *vm, uint32_t vcpuid)
   1651{
   1652	struct vcpu *vcpu = vcpu_find(vm, vcpuid);
   1653	int ret;
   1654
   1655	TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
   1656
   1657	vcpu->state->immediate_exit = 1;
   1658	ret = ioctl(vcpu->fd, KVM_RUN, NULL);
   1659	vcpu->state->immediate_exit = 0;
   1660
   1661	TEST_ASSERT(ret == -1 && errno == EINTR,
   1662		    "KVM_RUN IOCTL didn't exit immediately, rc: %i, errno: %i",
   1663		    ret, errno);
   1664}
   1665
   1666void vcpu_set_guest_debug(struct kvm_vm *vm, uint32_t vcpuid,
   1667			  struct kvm_guest_debug *debug)
   1668{
   1669	struct vcpu *vcpu = vcpu_find(vm, vcpuid);
   1670	int ret = ioctl(vcpu->fd, KVM_SET_GUEST_DEBUG, debug);
   1671
   1672	TEST_ASSERT(ret == 0, "KVM_SET_GUEST_DEBUG failed: %d", ret);
   1673}
   1674
   1675/*
   1676 * VM VCPU Set MP State
   1677 *
   1678 * Input Args:
   1679 *   vm - Virtual Machine
   1680 *   vcpuid - VCPU ID
   1681 *   mp_state - mp_state to be set
   1682 *
   1683 * Output Args: None
   1684 *
   1685 * Return: None
   1686 *
   1687 * Sets the MP state of the VCPU given by vcpuid, to the state given
   1688 * by mp_state.
   1689 */
   1690void vcpu_set_mp_state(struct kvm_vm *vm, uint32_t vcpuid,
   1691		       struct kvm_mp_state *mp_state)
   1692{
   1693	struct vcpu *vcpu = vcpu_find(vm, vcpuid);
   1694	int ret;
   1695
   1696	TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
   1697
   1698	ret = ioctl(vcpu->fd, KVM_SET_MP_STATE, mp_state);
   1699	TEST_ASSERT(ret == 0, "KVM_SET_MP_STATE IOCTL failed, "
   1700		"rc: %i errno: %i", ret, errno);
   1701}
   1702
   1703/*
   1704 * VM VCPU Get Reg List
   1705 *
   1706 * Input Args:
   1707 *   vm - Virtual Machine
   1708 *   vcpuid - VCPU ID
   1709 *
   1710 * Output Args:
   1711 *   None
   1712 *
   1713 * Return:
   1714 *   A pointer to an allocated struct kvm_reg_list
   1715 *
   1716 * Get the list of guest registers which are supported for
   1717 * KVM_GET_ONE_REG/KVM_SET_ONE_REG calls
   1718 */
   1719struct kvm_reg_list *vcpu_get_reg_list(struct kvm_vm *vm, uint32_t vcpuid)
   1720{
   1721	struct kvm_reg_list reg_list_n = { .n = 0 }, *reg_list;
   1722	int ret;
   1723
   1724	ret = _vcpu_ioctl(vm, vcpuid, KVM_GET_REG_LIST, &reg_list_n);
   1725	TEST_ASSERT(ret == -1 && errno == E2BIG, "KVM_GET_REG_LIST n=0");
   1726	reg_list = calloc(1, sizeof(*reg_list) + reg_list_n.n * sizeof(__u64));
   1727	reg_list->n = reg_list_n.n;
   1728	vcpu_ioctl(vm, vcpuid, KVM_GET_REG_LIST, reg_list);
   1729	return reg_list;
   1730}
   1731
   1732/*
   1733 * VM VCPU Regs Get
   1734 *
   1735 * Input Args:
   1736 *   vm - Virtual Machine
   1737 *   vcpuid - VCPU ID
   1738 *
   1739 * Output Args:
   1740 *   regs - current state of VCPU regs
   1741 *
   1742 * Return: None
   1743 *
   1744 * Obtains the current register state for the VCPU specified by vcpuid
   1745 * and stores it at the location given by regs.
   1746 */
   1747void vcpu_regs_get(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_regs *regs)
   1748{
   1749	struct vcpu *vcpu = vcpu_find(vm, vcpuid);
   1750	int ret;
   1751
   1752	TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
   1753
   1754	ret = ioctl(vcpu->fd, KVM_GET_REGS, regs);
   1755	TEST_ASSERT(ret == 0, "KVM_GET_REGS failed, rc: %i errno: %i",
   1756		ret, errno);
   1757}
   1758
   1759/*
   1760 * VM VCPU Regs Set
   1761 *
   1762 * Input Args:
   1763 *   vm - Virtual Machine
   1764 *   vcpuid - VCPU ID
   1765 *   regs - Values to set VCPU regs to
   1766 *
   1767 * Output Args: None
   1768 *
   1769 * Return: None
   1770 *
   1771 * Sets the regs of the VCPU specified by vcpuid to the values
   1772 * given by regs.
   1773 */
   1774void vcpu_regs_set(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_regs *regs)
   1775{
   1776	struct vcpu *vcpu = vcpu_find(vm, vcpuid);
   1777	int ret;
   1778
   1779	TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
   1780
   1781	ret = ioctl(vcpu->fd, KVM_SET_REGS, regs);
   1782	TEST_ASSERT(ret == 0, "KVM_SET_REGS failed, rc: %i errno: %i",
   1783		ret, errno);
   1784}
   1785
   1786#ifdef __KVM_HAVE_VCPU_EVENTS
   1787void vcpu_events_get(struct kvm_vm *vm, uint32_t vcpuid,
   1788		     struct kvm_vcpu_events *events)
   1789{
   1790	struct vcpu *vcpu = vcpu_find(vm, vcpuid);
   1791	int ret;
   1792
   1793	TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
   1794
   1795	ret = ioctl(vcpu->fd, KVM_GET_VCPU_EVENTS, events);
   1796	TEST_ASSERT(ret == 0, "KVM_GET_VCPU_EVENTS, failed, rc: %i errno: %i",
   1797		ret, errno);
   1798}
   1799
   1800void vcpu_events_set(struct kvm_vm *vm, uint32_t vcpuid,
   1801		     struct kvm_vcpu_events *events)
   1802{
   1803	struct vcpu *vcpu = vcpu_find(vm, vcpuid);
   1804	int ret;
   1805
   1806	TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
   1807
   1808	ret = ioctl(vcpu->fd, KVM_SET_VCPU_EVENTS, events);
   1809	TEST_ASSERT(ret == 0, "KVM_SET_VCPU_EVENTS, failed, rc: %i errno: %i",
   1810		ret, errno);
   1811}
   1812#endif
   1813
   1814#ifdef __x86_64__
   1815void vcpu_nested_state_get(struct kvm_vm *vm, uint32_t vcpuid,
   1816			   struct kvm_nested_state *state)
   1817{
   1818	struct vcpu *vcpu = vcpu_find(vm, vcpuid);
   1819	int ret;
   1820
   1821	TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
   1822
   1823	ret = ioctl(vcpu->fd, KVM_GET_NESTED_STATE, state);
   1824	TEST_ASSERT(ret == 0,
   1825		"KVM_SET_NESTED_STATE failed, ret: %i errno: %i",
   1826		ret, errno);
   1827}
   1828
   1829int vcpu_nested_state_set(struct kvm_vm *vm, uint32_t vcpuid,
   1830			  struct kvm_nested_state *state, bool ignore_error)
   1831{
   1832	struct vcpu *vcpu = vcpu_find(vm, vcpuid);
   1833	int ret;
   1834
   1835	TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
   1836
   1837	ret = ioctl(vcpu->fd, KVM_SET_NESTED_STATE, state);
   1838	if (!ignore_error) {
   1839		TEST_ASSERT(ret == 0,
   1840			"KVM_SET_NESTED_STATE failed, ret: %i errno: %i",
   1841			ret, errno);
   1842	}
   1843
   1844	return ret;
   1845}
   1846#endif
   1847
   1848/*
   1849 * VM VCPU System Regs Get
   1850 *
   1851 * Input Args:
   1852 *   vm - Virtual Machine
   1853 *   vcpuid - VCPU ID
   1854 *
   1855 * Output Args:
   1856 *   sregs - current state of VCPU system regs
   1857 *
   1858 * Return: None
   1859 *
   1860 * Obtains the current system register state for the VCPU specified by
   1861 * vcpuid and stores it at the location given by sregs.
   1862 */
   1863void vcpu_sregs_get(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_sregs *sregs)
   1864{
   1865	struct vcpu *vcpu = vcpu_find(vm, vcpuid);
   1866	int ret;
   1867
   1868	TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
   1869
   1870	ret = ioctl(vcpu->fd, KVM_GET_SREGS, sregs);
   1871	TEST_ASSERT(ret == 0, "KVM_GET_SREGS failed, rc: %i errno: %i",
   1872		ret, errno);
   1873}
   1874
   1875/*
   1876 * VM VCPU System Regs Set
   1877 *
   1878 * Input Args:
   1879 *   vm - Virtual Machine
   1880 *   vcpuid - VCPU ID
   1881 *   sregs - Values to set VCPU system regs to
   1882 *
   1883 * Output Args: None
   1884 *
   1885 * Return: None
   1886 *
   1887 * Sets the system regs of the VCPU specified by vcpuid to the values
   1888 * given by sregs.
   1889 */
   1890void vcpu_sregs_set(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_sregs *sregs)
   1891{
   1892	int ret = _vcpu_sregs_set(vm, vcpuid, sregs);
   1893	TEST_ASSERT(ret == 0, "KVM_SET_SREGS IOCTL failed, "
   1894		"rc: %i errno: %i", ret, errno);
   1895}
   1896
   1897int _vcpu_sregs_set(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_sregs *sregs)
   1898{
   1899	struct vcpu *vcpu = vcpu_find(vm, vcpuid);
   1900
   1901	TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
   1902
   1903	return ioctl(vcpu->fd, KVM_SET_SREGS, sregs);
   1904}
   1905
   1906void vcpu_fpu_get(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_fpu *fpu)
   1907{
   1908	int ret;
   1909
   1910	ret = _vcpu_ioctl(vm, vcpuid, KVM_GET_FPU, fpu);
   1911	TEST_ASSERT(ret == 0, "KVM_GET_FPU failed, rc: %i errno: %i (%s)",
   1912		    ret, errno, strerror(errno));
   1913}
   1914
   1915void vcpu_fpu_set(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_fpu *fpu)
   1916{
   1917	int ret;
   1918
   1919	ret = _vcpu_ioctl(vm, vcpuid, KVM_SET_FPU, fpu);
   1920	TEST_ASSERT(ret == 0, "KVM_SET_FPU failed, rc: %i errno: %i (%s)",
   1921		    ret, errno, strerror(errno));
   1922}
   1923
   1924void vcpu_get_reg(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_one_reg *reg)
   1925{
   1926	int ret;
   1927
   1928	ret = _vcpu_ioctl(vm, vcpuid, KVM_GET_ONE_REG, reg);
   1929	TEST_ASSERT(ret == 0, "KVM_GET_ONE_REG failed, rc: %i errno: %i (%s)",
   1930		    ret, errno, strerror(errno));
   1931}
   1932
   1933void vcpu_set_reg(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_one_reg *reg)
   1934{
   1935	int ret;
   1936
   1937	ret = _vcpu_ioctl(vm, vcpuid, KVM_SET_ONE_REG, reg);
   1938	TEST_ASSERT(ret == 0, "KVM_SET_ONE_REG failed, rc: %i errno: %i (%s)",
   1939		    ret, errno, strerror(errno));
   1940}
   1941
   1942/*
   1943 * VCPU Ioctl
   1944 *
   1945 * Input Args:
   1946 *   vm - Virtual Machine
   1947 *   vcpuid - VCPU ID
   1948 *   cmd - Ioctl number
   1949 *   arg - Argument to pass to the ioctl
   1950 *
   1951 * Return: None
   1952 *
   1953 * Issues an arbitrary ioctl on a VCPU fd.
   1954 */
   1955void vcpu_ioctl(struct kvm_vm *vm, uint32_t vcpuid,
   1956		unsigned long cmd, void *arg)
   1957{
   1958	int ret;
   1959
   1960	ret = _vcpu_ioctl(vm, vcpuid, cmd, arg);
   1961	TEST_ASSERT(ret == 0, "vcpu ioctl %lu failed, rc: %i errno: %i (%s)",
   1962		cmd, ret, errno, strerror(errno));
   1963}
   1964
   1965int _vcpu_ioctl(struct kvm_vm *vm, uint32_t vcpuid,
   1966		unsigned long cmd, void *arg)
   1967{
   1968	struct vcpu *vcpu = vcpu_find(vm, vcpuid);
   1969	int ret;
   1970
   1971	TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
   1972
   1973	ret = ioctl(vcpu->fd, cmd, arg);
   1974
   1975	return ret;
   1976}
   1977
   1978void *vcpu_map_dirty_ring(struct kvm_vm *vm, uint32_t vcpuid)
   1979{
   1980	struct vcpu *vcpu;
   1981	uint32_t size = vm->dirty_ring_size;
   1982
   1983	TEST_ASSERT(size > 0, "Should enable dirty ring first");
   1984
   1985	vcpu = vcpu_find(vm, vcpuid);
   1986
   1987	TEST_ASSERT(vcpu, "Cannot find vcpu %u", vcpuid);
   1988
   1989	if (!vcpu->dirty_gfns) {
   1990		void *addr;
   1991
   1992		addr = mmap(NULL, size, PROT_READ,
   1993			    MAP_PRIVATE, vcpu->fd,
   1994			    vm->page_size * KVM_DIRTY_LOG_PAGE_OFFSET);
   1995		TEST_ASSERT(addr == MAP_FAILED, "Dirty ring mapped private");
   1996
   1997		addr = mmap(NULL, size, PROT_READ | PROT_EXEC,
   1998			    MAP_PRIVATE, vcpu->fd,
   1999			    vm->page_size * KVM_DIRTY_LOG_PAGE_OFFSET);
   2000		TEST_ASSERT(addr == MAP_FAILED, "Dirty ring mapped exec");
   2001
   2002		addr = mmap(NULL, size, PROT_READ | PROT_WRITE,
   2003			    MAP_SHARED, vcpu->fd,
   2004			    vm->page_size * KVM_DIRTY_LOG_PAGE_OFFSET);
   2005		TEST_ASSERT(addr != MAP_FAILED, "Dirty ring map failed");
   2006
   2007		vcpu->dirty_gfns = addr;
   2008		vcpu->dirty_gfns_count = size / sizeof(struct kvm_dirty_gfn);
   2009	}
   2010
   2011	return vcpu->dirty_gfns;
   2012}
   2013
   2014/*
   2015 * VM Ioctl
   2016 *
   2017 * Input Args:
   2018 *   vm - Virtual Machine
   2019 *   cmd - Ioctl number
   2020 *   arg - Argument to pass to the ioctl
   2021 *
   2022 * Return: None
   2023 *
   2024 * Issues an arbitrary ioctl on a VM fd.
   2025 */
   2026void vm_ioctl(struct kvm_vm *vm, unsigned long cmd, void *arg)
   2027{
   2028	int ret;
   2029
   2030	ret = _vm_ioctl(vm, cmd, arg);
   2031	TEST_ASSERT(ret == 0, "vm ioctl %lu failed, rc: %i errno: %i (%s)",
   2032		cmd, ret, errno, strerror(errno));
   2033}
   2034
   2035int _vm_ioctl(struct kvm_vm *vm, unsigned long cmd, void *arg)
   2036{
   2037	return ioctl(vm->fd, cmd, arg);
   2038}
   2039
   2040/*
   2041 * KVM system ioctl
   2042 *
   2043 * Input Args:
   2044 *   vm - Virtual Machine
   2045 *   cmd - Ioctl number
   2046 *   arg - Argument to pass to the ioctl
   2047 *
   2048 * Return: None
   2049 *
   2050 * Issues an arbitrary ioctl on a KVM fd.
   2051 */
   2052void kvm_ioctl(struct kvm_vm *vm, unsigned long cmd, void *arg)
   2053{
   2054	int ret;
   2055
   2056	ret = ioctl(vm->kvm_fd, cmd, arg);
   2057	TEST_ASSERT(ret == 0, "KVM ioctl %lu failed, rc: %i errno: %i (%s)",
   2058		cmd, ret, errno, strerror(errno));
   2059}
   2060
   2061int _kvm_ioctl(struct kvm_vm *vm, unsigned long cmd, void *arg)
   2062{
   2063	return ioctl(vm->kvm_fd, cmd, arg);
   2064}
   2065
   2066/*
   2067 * Device Ioctl
   2068 */
   2069
   2070int _kvm_device_check_attr(int dev_fd, uint32_t group, uint64_t attr)
   2071{
   2072	struct kvm_device_attr attribute = {
   2073		.group = group,
   2074		.attr = attr,
   2075		.flags = 0,
   2076	};
   2077
   2078	return ioctl(dev_fd, KVM_HAS_DEVICE_ATTR, &attribute);
   2079}
   2080
   2081int kvm_device_check_attr(int dev_fd, uint32_t group, uint64_t attr)
   2082{
   2083	int ret = _kvm_device_check_attr(dev_fd, group, attr);
   2084
   2085	TEST_ASSERT(!ret, "KVM_HAS_DEVICE_ATTR failed, rc: %i errno: %i", ret, errno);
   2086	return ret;
   2087}
   2088
   2089int _kvm_create_device(struct kvm_vm *vm, uint64_t type, bool test, int *fd)
   2090{
   2091	struct kvm_create_device create_dev;
   2092	int ret;
   2093
   2094	create_dev.type = type;
   2095	create_dev.fd = -1;
   2096	create_dev.flags = test ? KVM_CREATE_DEVICE_TEST : 0;
   2097	ret = ioctl(vm_get_fd(vm), KVM_CREATE_DEVICE, &create_dev);
   2098	*fd = create_dev.fd;
   2099	return ret;
   2100}
   2101
   2102int kvm_create_device(struct kvm_vm *vm, uint64_t type, bool test)
   2103{
   2104	int fd, ret;
   2105
   2106	ret = _kvm_create_device(vm, type, test, &fd);
   2107
   2108	if (!test) {
   2109		TEST_ASSERT(!ret,
   2110			    "KVM_CREATE_DEVICE IOCTL failed, rc: %i errno: %i", ret, errno);
   2111		return fd;
   2112	}
   2113	return ret;
   2114}
   2115
   2116int _kvm_device_access(int dev_fd, uint32_t group, uint64_t attr,
   2117		      void *val, bool write)
   2118{
   2119	struct kvm_device_attr kvmattr = {
   2120		.group = group,
   2121		.attr = attr,
   2122		.flags = 0,
   2123		.addr = (uintptr_t)val,
   2124	};
   2125	int ret;
   2126
   2127	ret = ioctl(dev_fd, write ? KVM_SET_DEVICE_ATTR : KVM_GET_DEVICE_ATTR,
   2128		    &kvmattr);
   2129	return ret;
   2130}
   2131
   2132int kvm_device_access(int dev_fd, uint32_t group, uint64_t attr,
   2133		      void *val, bool write)
   2134{
   2135	int ret = _kvm_device_access(dev_fd, group, attr, val, write);
   2136
   2137	TEST_ASSERT(!ret, "KVM_SET|GET_DEVICE_ATTR IOCTL failed, rc: %i errno: %i", ret, errno);
   2138	return ret;
   2139}
   2140
   2141int _vcpu_has_device_attr(struct kvm_vm *vm, uint32_t vcpuid, uint32_t group,
   2142			  uint64_t attr)
   2143{
   2144	struct vcpu *vcpu = vcpu_find(vm, vcpuid);
   2145
   2146	TEST_ASSERT(vcpu, "nonexistent vcpu id: %d", vcpuid);
   2147
   2148	return _kvm_device_check_attr(vcpu->fd, group, attr);
   2149}
   2150
   2151int vcpu_has_device_attr(struct kvm_vm *vm, uint32_t vcpuid, uint32_t group,
   2152				 uint64_t attr)
   2153{
   2154	int ret = _vcpu_has_device_attr(vm, vcpuid, group, attr);
   2155
   2156	TEST_ASSERT(!ret, "KVM_HAS_DEVICE_ATTR IOCTL failed, rc: %i errno: %i", ret, errno);
   2157	return ret;
   2158}
   2159
   2160int _vcpu_access_device_attr(struct kvm_vm *vm, uint32_t vcpuid, uint32_t group,
   2161			     uint64_t attr, void *val, bool write)
   2162{
   2163	struct vcpu *vcpu = vcpu_find(vm, vcpuid);
   2164
   2165	TEST_ASSERT(vcpu, "nonexistent vcpu id: %d", vcpuid);
   2166
   2167	return _kvm_device_access(vcpu->fd, group, attr, val, write);
   2168}
   2169
   2170int vcpu_access_device_attr(struct kvm_vm *vm, uint32_t vcpuid, uint32_t group,
   2171			    uint64_t attr, void *val, bool write)
   2172{
   2173	int ret = _vcpu_access_device_attr(vm, vcpuid, group, attr, val, write);
   2174
   2175	TEST_ASSERT(!ret, "KVM_SET|GET_DEVICE_ATTR IOCTL failed, rc: %i errno: %i", ret, errno);
   2176	return ret;
   2177}
   2178
   2179/*
   2180 * IRQ related functions.
   2181 */
   2182
   2183int _kvm_irq_line(struct kvm_vm *vm, uint32_t irq, int level)
   2184{
   2185	struct kvm_irq_level irq_level = {
   2186		.irq    = irq,
   2187		.level  = level,
   2188	};
   2189
   2190	return _vm_ioctl(vm, KVM_IRQ_LINE, &irq_level);
   2191}
   2192
   2193void kvm_irq_line(struct kvm_vm *vm, uint32_t irq, int level)
   2194{
   2195	int ret = _kvm_irq_line(vm, irq, level);
   2196
   2197	TEST_ASSERT(ret >= 0, "KVM_IRQ_LINE failed, rc: %i errno: %i", ret, errno);
   2198}
   2199
   2200struct kvm_irq_routing *kvm_gsi_routing_create(void)
   2201{
   2202	struct kvm_irq_routing *routing;
   2203	size_t size;
   2204
   2205	size = sizeof(struct kvm_irq_routing);
   2206	/* Allocate space for the max number of entries: this wastes 196 KBs. */
   2207	size += KVM_MAX_IRQ_ROUTES * sizeof(struct kvm_irq_routing_entry);
   2208	routing = calloc(1, size);
   2209	assert(routing);
   2210
   2211	return routing;
   2212}
   2213
   2214void kvm_gsi_routing_irqchip_add(struct kvm_irq_routing *routing,
   2215		uint32_t gsi, uint32_t pin)
   2216{
   2217	int i;
   2218
   2219	assert(routing);
   2220	assert(routing->nr < KVM_MAX_IRQ_ROUTES);
   2221
   2222	i = routing->nr;
   2223	routing->entries[i].gsi = gsi;
   2224	routing->entries[i].type = KVM_IRQ_ROUTING_IRQCHIP;
   2225	routing->entries[i].flags = 0;
   2226	routing->entries[i].u.irqchip.irqchip = 0;
   2227	routing->entries[i].u.irqchip.pin = pin;
   2228	routing->nr++;
   2229}
   2230
   2231int _kvm_gsi_routing_write(struct kvm_vm *vm, struct kvm_irq_routing *routing)
   2232{
   2233	int ret;
   2234
   2235	assert(routing);
   2236	ret = ioctl(vm_get_fd(vm), KVM_SET_GSI_ROUTING, routing);
   2237	free(routing);
   2238
   2239	return ret;
   2240}
   2241
   2242void kvm_gsi_routing_write(struct kvm_vm *vm, struct kvm_irq_routing *routing)
   2243{
   2244	int ret;
   2245
   2246	ret = _kvm_gsi_routing_write(vm, routing);
   2247	TEST_ASSERT(ret == 0, "KVM_SET_GSI_ROUTING failed, rc: %i errno: %i",
   2248				ret, errno);
   2249}
   2250
   2251/*
   2252 * VM Dump
   2253 *
   2254 * Input Args:
   2255 *   vm - Virtual Machine
   2256 *   indent - Left margin indent amount
   2257 *
   2258 * Output Args:
   2259 *   stream - Output FILE stream
   2260 *
   2261 * Return: None
   2262 *
   2263 * Dumps the current state of the VM given by vm, to the FILE stream
   2264 * given by stream.
   2265 */
   2266void vm_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent)
   2267{
   2268	int ctr;
   2269	struct userspace_mem_region *region;
   2270	struct vcpu *vcpu;
   2271
   2272	fprintf(stream, "%*smode: 0x%x\n", indent, "", vm->mode);
   2273	fprintf(stream, "%*sfd: %i\n", indent, "", vm->fd);
   2274	fprintf(stream, "%*spage_size: 0x%x\n", indent, "", vm->page_size);
   2275	fprintf(stream, "%*sMem Regions:\n", indent, "");
   2276	hash_for_each(vm->regions.slot_hash, ctr, region, slot_node) {
   2277		fprintf(stream, "%*sguest_phys: 0x%lx size: 0x%lx "
   2278			"host_virt: %p\n", indent + 2, "",
   2279			(uint64_t) region->region.guest_phys_addr,
   2280			(uint64_t) region->region.memory_size,
   2281			region->host_mem);
   2282		fprintf(stream, "%*sunused_phy_pages: ", indent + 2, "");
   2283		sparsebit_dump(stream, region->unused_phy_pages, 0);
   2284	}
   2285	fprintf(stream, "%*sMapped Virtual Pages:\n", indent, "");
   2286	sparsebit_dump(stream, vm->vpages_mapped, indent + 2);
   2287	fprintf(stream, "%*spgd_created: %u\n", indent, "",
   2288		vm->pgd_created);
   2289	if (vm->pgd_created) {
   2290		fprintf(stream, "%*sVirtual Translation Tables:\n",
   2291			indent + 2, "");
   2292		virt_dump(stream, vm, indent + 4);
   2293	}
   2294	fprintf(stream, "%*sVCPUs:\n", indent, "");
   2295	list_for_each_entry(vcpu, &vm->vcpus, list)
   2296		vcpu_dump(stream, vm, vcpu->id, indent + 2);
   2297}
   2298
   2299/* Known KVM exit reasons */
   2300static struct exit_reason {
   2301	unsigned int reason;
   2302	const char *name;
   2303} exit_reasons_known[] = {
   2304	{KVM_EXIT_UNKNOWN, "UNKNOWN"},
   2305	{KVM_EXIT_EXCEPTION, "EXCEPTION"},
   2306	{KVM_EXIT_IO, "IO"},
   2307	{KVM_EXIT_HYPERCALL, "HYPERCALL"},
   2308	{KVM_EXIT_DEBUG, "DEBUG"},
   2309	{KVM_EXIT_HLT, "HLT"},
   2310	{KVM_EXIT_MMIO, "MMIO"},
   2311	{KVM_EXIT_IRQ_WINDOW_OPEN, "IRQ_WINDOW_OPEN"},
   2312	{KVM_EXIT_SHUTDOWN, "SHUTDOWN"},
   2313	{KVM_EXIT_FAIL_ENTRY, "FAIL_ENTRY"},
   2314	{KVM_EXIT_INTR, "INTR"},
   2315	{KVM_EXIT_SET_TPR, "SET_TPR"},
   2316	{KVM_EXIT_TPR_ACCESS, "TPR_ACCESS"},
   2317	{KVM_EXIT_S390_SIEIC, "S390_SIEIC"},
   2318	{KVM_EXIT_S390_RESET, "S390_RESET"},
   2319	{KVM_EXIT_DCR, "DCR"},
   2320	{KVM_EXIT_NMI, "NMI"},
   2321	{KVM_EXIT_INTERNAL_ERROR, "INTERNAL_ERROR"},
   2322	{KVM_EXIT_OSI, "OSI"},
   2323	{KVM_EXIT_PAPR_HCALL, "PAPR_HCALL"},
   2324	{KVM_EXIT_DIRTY_RING_FULL, "DIRTY_RING_FULL"},
   2325	{KVM_EXIT_X86_RDMSR, "RDMSR"},
   2326	{KVM_EXIT_X86_WRMSR, "WRMSR"},
   2327	{KVM_EXIT_XEN, "XEN"},
   2328#ifdef KVM_EXIT_MEMORY_NOT_PRESENT
   2329	{KVM_EXIT_MEMORY_NOT_PRESENT, "MEMORY_NOT_PRESENT"},
   2330#endif
   2331};
   2332
   2333/*
   2334 * Exit Reason String
   2335 *
   2336 * Input Args:
   2337 *   exit_reason - Exit reason
   2338 *
   2339 * Output Args: None
   2340 *
   2341 * Return:
   2342 *   Constant string pointer describing the exit reason.
   2343 *
   2344 * Locates and returns a constant string that describes the KVM exit
   2345 * reason given by exit_reason.  If no such string is found, a constant
   2346 * string of "Unknown" is returned.
   2347 */
   2348const char *exit_reason_str(unsigned int exit_reason)
   2349{
   2350	unsigned int n1;
   2351
   2352	for (n1 = 0; n1 < ARRAY_SIZE(exit_reasons_known); n1++) {
   2353		if (exit_reason == exit_reasons_known[n1].reason)
   2354			return exit_reasons_known[n1].name;
   2355	}
   2356
   2357	return "Unknown";
   2358}
   2359
   2360/*
   2361 * Physical Contiguous Page Allocator
   2362 *
   2363 * Input Args:
   2364 *   vm - Virtual Machine
   2365 *   num - number of pages
   2366 *   paddr_min - Physical address minimum
   2367 *   memslot - Memory region to allocate page from
   2368 *
   2369 * Output Args: None
   2370 *
   2371 * Return:
   2372 *   Starting physical address
   2373 *
   2374 * Within the VM specified by vm, locates a range of available physical
   2375 * pages at or above paddr_min. If found, the pages are marked as in use
   2376 * and their base address is returned. A TEST_ASSERT failure occurs if
   2377 * not enough pages are available at or above paddr_min.
   2378 */
   2379vm_paddr_t vm_phy_pages_alloc(struct kvm_vm *vm, size_t num,
   2380			      vm_paddr_t paddr_min, uint32_t memslot)
   2381{
   2382	struct userspace_mem_region *region;
   2383	sparsebit_idx_t pg, base;
   2384
   2385	TEST_ASSERT(num > 0, "Must allocate at least one page");
   2386
   2387	TEST_ASSERT((paddr_min % vm->page_size) == 0, "Min physical address "
   2388		"not divisible by page size.\n"
   2389		"  paddr_min: 0x%lx page_size: 0x%x",
   2390		paddr_min, vm->page_size);
   2391
   2392	region = memslot2region(vm, memslot);
   2393	base = pg = paddr_min >> vm->page_shift;
   2394
   2395	do {
   2396		for (; pg < base + num; ++pg) {
   2397			if (!sparsebit_is_set(region->unused_phy_pages, pg)) {
   2398				base = pg = sparsebit_next_set(region->unused_phy_pages, pg);
   2399				break;
   2400			}
   2401		}
   2402	} while (pg && pg != base + num);
   2403
   2404	if (pg == 0) {
   2405		fprintf(stderr, "No guest physical page available, "
   2406			"paddr_min: 0x%lx page_size: 0x%x memslot: %u\n",
   2407			paddr_min, vm->page_size, memslot);
   2408		fputs("---- vm dump ----\n", stderr);
   2409		vm_dump(stderr, vm, 2);
   2410		abort();
   2411	}
   2412
   2413	for (pg = base; pg < base + num; ++pg)
   2414		sparsebit_clear(region->unused_phy_pages, pg);
   2415
   2416	return base * vm->page_size;
   2417}
   2418
   2419vm_paddr_t vm_phy_page_alloc(struct kvm_vm *vm, vm_paddr_t paddr_min,
   2420			     uint32_t memslot)
   2421{
   2422	return vm_phy_pages_alloc(vm, 1, paddr_min, memslot);
   2423}
   2424
   2425/* Arbitrary minimum physical address used for virtual translation tables. */
   2426#define KVM_GUEST_PAGE_TABLE_MIN_PADDR 0x180000
   2427
   2428vm_paddr_t vm_alloc_page_table(struct kvm_vm *vm)
   2429{
   2430	return vm_phy_page_alloc(vm, KVM_GUEST_PAGE_TABLE_MIN_PADDR, 0);
   2431}
   2432
   2433/*
   2434 * Address Guest Virtual to Host Virtual
   2435 *
   2436 * Input Args:
   2437 *   vm - Virtual Machine
   2438 *   gva - VM virtual address
   2439 *
   2440 * Output Args: None
   2441 *
   2442 * Return:
   2443 *   Equivalent host virtual address
   2444 */
   2445void *addr_gva2hva(struct kvm_vm *vm, vm_vaddr_t gva)
   2446{
   2447	return addr_gpa2hva(vm, addr_gva2gpa(vm, gva));
   2448}
   2449
   2450/*
   2451 * Is Unrestricted Guest
   2452 *
   2453 * Input Args:
   2454 *   vm - Virtual Machine
   2455 *
   2456 * Output Args: None
   2457 *
   2458 * Return: True if the unrestricted guest is set to 'Y', otherwise return false.
   2459 *
   2460 * Check if the unrestricted guest flag is enabled.
   2461 */
   2462bool vm_is_unrestricted_guest(struct kvm_vm *vm)
   2463{
   2464	char val = 'N';
   2465	size_t count;
   2466	FILE *f;
   2467
   2468	if (vm == NULL) {
   2469		/* Ensure that the KVM vendor-specific module is loaded. */
   2470		close(open_kvm_dev_path_or_exit());
   2471	}
   2472
   2473	f = fopen("/sys/module/kvm_intel/parameters/unrestricted_guest", "r");
   2474	if (f) {
   2475		count = fread(&val, sizeof(char), 1, f);
   2476		TEST_ASSERT(count == 1, "Unable to read from param file.");
   2477		fclose(f);
   2478	}
   2479
   2480	return val == 'Y';
   2481}
   2482
   2483unsigned int vm_get_page_size(struct kvm_vm *vm)
   2484{
   2485	return vm->page_size;
   2486}
   2487
   2488unsigned int vm_get_page_shift(struct kvm_vm *vm)
   2489{
   2490	return vm->page_shift;
   2491}
   2492
   2493unsigned long __attribute__((weak)) vm_compute_max_gfn(struct kvm_vm *vm)
   2494{
   2495	return ((1ULL << vm->pa_bits) >> vm->page_shift) - 1;
   2496}
   2497
   2498uint64_t vm_get_max_gfn(struct kvm_vm *vm)
   2499{
   2500	return vm->max_gfn;
   2501}
   2502
   2503int vm_get_fd(struct kvm_vm *vm)
   2504{
   2505	return vm->fd;
   2506}
   2507
   2508static unsigned int vm_calc_num_pages(unsigned int num_pages,
   2509				      unsigned int page_shift,
   2510				      unsigned int new_page_shift,
   2511				      bool ceil)
   2512{
   2513	unsigned int n = 1 << (new_page_shift - page_shift);
   2514
   2515	if (page_shift >= new_page_shift)
   2516		return num_pages * (1 << (page_shift - new_page_shift));
   2517
   2518	return num_pages / n + !!(ceil && num_pages % n);
   2519}
   2520
   2521static inline int getpageshift(void)
   2522{
   2523	return __builtin_ffs(getpagesize()) - 1;
   2524}
   2525
   2526unsigned int
   2527vm_num_host_pages(enum vm_guest_mode mode, unsigned int num_guest_pages)
   2528{
   2529	return vm_calc_num_pages(num_guest_pages,
   2530				 vm_guest_mode_params[mode].page_shift,
   2531				 getpageshift(), true);
   2532}
   2533
   2534unsigned int
   2535vm_num_guest_pages(enum vm_guest_mode mode, unsigned int num_host_pages)
   2536{
   2537	return vm_calc_num_pages(num_host_pages, getpageshift(),
   2538				 vm_guest_mode_params[mode].page_shift, false);
   2539}
   2540
   2541unsigned int vm_calc_num_guest_pages(enum vm_guest_mode mode, size_t size)
   2542{
   2543	unsigned int n;
   2544	n = DIV_ROUND_UP(size, vm_guest_mode_params[mode].page_size);
   2545	return vm_adjust_num_guest_pages(mode, n);
   2546}
   2547
   2548int vm_get_stats_fd(struct kvm_vm *vm)
   2549{
   2550	return ioctl(vm->fd, KVM_GET_STATS_FD, NULL);
   2551}
   2552
   2553int vcpu_get_stats_fd(struct kvm_vm *vm, uint32_t vcpuid)
   2554{
   2555	struct vcpu *vcpu = vcpu_find(vm, vcpuid);
   2556
   2557	return ioctl(vcpu->fd, KVM_GET_STATS_FD, NULL);
   2558}