virt.c - cachepc-linux - Fork of AMDESE/linux with modifications for CachePC side-channel attack

	cachepc-linux Fork of AMDESE/linux with modifications for CachePC side-channel attack
	git clone https://git.sinitax.com/sinitax/cachepc-linux
	Log \| Files \| Refs \| README \| LICENSE \| sfeed.txt
virt.c (11423B)
      1// SPDX-License-Identifier: GPL-2.0
      2/*
      3 * Device driver to expose SGX enclave memory to KVM guests.
      4 *
      5 * Copyright(c) 2021 Intel Corporation.
      6 */
      7
      8#include <linux/miscdevice.h>
      9#include <linux/mm.h>
     10#include <linux/mman.h>
     11#include <linux/sched/mm.h>
     12#include <linux/sched/signal.h>
     13#include <linux/slab.h>
     14#include <linux/xarray.h>
     15#include <asm/sgx.h>
     16#include <uapi/asm/sgx.h>
     17
     18#include "encls.h"
     19#include "sgx.h"
     20
     21struct sgx_vepc {
     22	struct xarray page_array;
     23	struct mutex lock;
     24};
     25
     26/*
     27 * Temporary SECS pages that cannot be EREMOVE'd due to having child in other
     28 * virtual EPC instances, and the lock to protect it.
     29 */
     30static struct mutex zombie_secs_pages_lock;
     31static struct list_head zombie_secs_pages;
     32
     33static int __sgx_vepc_fault(struct sgx_vepc *vepc,
     34			    struct vm_area_struct *vma, unsigned long addr)
     35{
     36	struct sgx_epc_page *epc_page;
     37	unsigned long index, pfn;
     38	int ret;
     39
     40	WARN_ON(!mutex_is_locked(&vepc->lock));
     41
     42	/* Calculate index of EPC page in virtual EPC's page_array */
     43	index = vma->vm_pgoff + PFN_DOWN(addr - vma->vm_start);
     44
     45	epc_page = xa_load(&vepc->page_array, index);
     46	if (epc_page)
     47		return 0;
     48
     49	epc_page = sgx_alloc_epc_page(vepc, false);
     50	if (IS_ERR(epc_page))
     51		return PTR_ERR(epc_page);
     52
     53	ret = xa_err(xa_store(&vepc->page_array, index, epc_page, GFP_KERNEL));
     54	if (ret)
     55		goto err_free;
     56
     57	pfn = PFN_DOWN(sgx_get_epc_phys_addr(epc_page));
     58
     59	ret = vmf_insert_pfn(vma, addr, pfn);
     60	if (ret != VM_FAULT_NOPAGE) {
     61		ret = -EFAULT;
     62		goto err_delete;
     63	}
     64
     65	return 0;
     66
     67err_delete:
     68	xa_erase(&vepc->page_array, index);
     69err_free:
     70	sgx_free_epc_page(epc_page);
     71	return ret;
     72}
     73
     74static vm_fault_t sgx_vepc_fault(struct vm_fault *vmf)
     75{
     76	struct vm_area_struct *vma = vmf->vma;
     77	struct sgx_vepc *vepc = vma->vm_private_data;
     78	int ret;
     79
     80	mutex_lock(&vepc->lock);
     81	ret = __sgx_vepc_fault(vepc, vma, vmf->address);
     82	mutex_unlock(&vepc->lock);
     83
     84	if (!ret)
     85		return VM_FAULT_NOPAGE;
     86
     87	if (ret == -EBUSY && (vmf->flags & FAULT_FLAG_ALLOW_RETRY)) {
     88		mmap_read_unlock(vma->vm_mm);
     89		return VM_FAULT_RETRY;
     90	}
     91
     92	return VM_FAULT_SIGBUS;
     93}
     94
     95static const struct vm_operations_struct sgx_vepc_vm_ops = {
     96	.fault = sgx_vepc_fault,
     97};
     98
     99static int sgx_vepc_mmap(struct file *file, struct vm_area_struct *vma)
    100{
    101	struct sgx_vepc *vepc = file->private_data;
    102
    103	if (!(vma->vm_flags & VM_SHARED))
    104		return -EINVAL;
    105
    106	vma->vm_ops = &sgx_vepc_vm_ops;
    107	/* Don't copy VMA in fork() */
    108	vma->vm_flags |= VM_PFNMAP | VM_IO | VM_DONTDUMP | VM_DONTCOPY;
    109	vma->vm_private_data = vepc;
    110
    111	return 0;
    112}
    113
    114static int sgx_vepc_remove_page(struct sgx_epc_page *epc_page)
    115{
    116	/*
    117	 * Take a previously guest-owned EPC page and return it to the
    118	 * general EPC page pool.
    119	 *
    120	 * Guests can not be trusted to have left this page in a good
    121	 * state, so run EREMOVE on the page unconditionally.  In the
    122	 * case that a guest properly EREMOVE'd this page, a superfluous
    123	 * EREMOVE is harmless.
    124	 */
    125	return __eremove(sgx_get_epc_virt_addr(epc_page));
    126}
    127
    128static int sgx_vepc_free_page(struct sgx_epc_page *epc_page)
    129{
    130	int ret = sgx_vepc_remove_page(epc_page);
    131	if (ret) {
    132		/*
    133		 * Only SGX_CHILD_PRESENT is expected, which is because of
    134		 * EREMOVE'ing an SECS still with child, in which case it can
    135		 * be handled by EREMOVE'ing the SECS again after all pages in
    136		 * virtual EPC have been EREMOVE'd. See comments in below in
    137		 * sgx_vepc_release().
    138		 *
    139		 * The user of virtual EPC (KVM) needs to guarantee there's no
    140		 * logical processor is still running in the enclave in guest,
    141		 * otherwise EREMOVE will get SGX_ENCLAVE_ACT which cannot be
    142		 * handled here.
    143		 */
    144		WARN_ONCE(ret != SGX_CHILD_PRESENT, EREMOVE_ERROR_MESSAGE,
    145			  ret, ret);
    146		return ret;
    147	}
    148
    149	sgx_free_epc_page(epc_page);
    150	return 0;
    151}
    152
    153static long sgx_vepc_remove_all(struct sgx_vepc *vepc)
    154{
    155	struct sgx_epc_page *entry;
    156	unsigned long index;
    157	long failures = 0;
    158
    159	xa_for_each(&vepc->page_array, index, entry) {
    160		int ret = sgx_vepc_remove_page(entry);
    161		if (ret) {
    162			if (ret == SGX_CHILD_PRESENT) {
    163				/* The page is a SECS, userspace will retry.  */
    164				failures++;
    165			} else {
    166				/*
    167				 * Report errors due to #GP or SGX_ENCLAVE_ACT; do not
    168				 * WARN, as userspace can induce said failures by
    169				 * calling the ioctl concurrently on multiple vEPCs or
    170				 * while one or more CPUs is running the enclave.  Only
    171				 * a #PF on EREMOVE indicates a kernel/hardware issue.
    172				 */
    173				WARN_ON_ONCE(encls_faulted(ret) &&
    174					     ENCLS_TRAPNR(ret) != X86_TRAP_GP);
    175				return -EBUSY;
    176			}
    177		}
    178		cond_resched();
    179	}
    180
    181	/*
    182	 * Return the number of SECS pages that failed to be removed, so
    183	 * userspace knows that it has to retry.
    184	 */
    185	return failures;
    186}
    187
    188static int sgx_vepc_release(struct inode *inode, struct file *file)
    189{
    190	struct sgx_vepc *vepc = file->private_data;
    191	struct sgx_epc_page *epc_page, *tmp, *entry;
    192	unsigned long index;
    193
    194	LIST_HEAD(secs_pages);
    195
    196	xa_for_each(&vepc->page_array, index, entry) {
    197		/*
    198		 * Remove all normal, child pages.  sgx_vepc_free_page()
    199		 * will fail if EREMOVE fails, but this is OK and expected on
    200		 * SECS pages.  Those can only be EREMOVE'd *after* all their
    201		 * child pages. Retries below will clean them up.
    202		 */
    203		if (sgx_vepc_free_page(entry))
    204			continue;
    205
    206		xa_erase(&vepc->page_array, index);
    207	}
    208
    209	/*
    210	 * Retry EREMOVE'ing pages.  This will clean up any SECS pages that
    211	 * only had children in this 'epc' area.
    212	 */
    213	xa_for_each(&vepc->page_array, index, entry) {
    214		epc_page = entry;
    215		/*
    216		 * An EREMOVE failure here means that the SECS page still
    217		 * has children.  But, since all children in this 'sgx_vepc'
    218		 * have been removed, the SECS page must have a child on
    219		 * another instance.
    220		 */
    221		if (sgx_vepc_free_page(epc_page))
    222			list_add_tail(&epc_page->list, &secs_pages);
    223
    224		xa_erase(&vepc->page_array, index);
    225	}
    226
    227	/*
    228	 * SECS pages are "pinned" by child pages, and "unpinned" once all
    229	 * children have been EREMOVE'd.  A child page in this instance
    230	 * may have pinned an SECS page encountered in an earlier release(),
    231	 * creating a zombie.  Since some children were EREMOVE'd above,
    232	 * try to EREMOVE all zombies in the hopes that one was unpinned.
    233	 */
    234	mutex_lock(&zombie_secs_pages_lock);
    235	list_for_each_entry_safe(epc_page, tmp, &zombie_secs_pages, list) {
    236		/*
    237		 * Speculatively remove the page from the list of zombies,
    238		 * if the page is successfully EREMOVE'd it will be added to
    239		 * the list of free pages.  If EREMOVE fails, throw the page
    240		 * on the local list, which will be spliced on at the end.
    241		 */
    242		list_del(&epc_page->list);
    243
    244		if (sgx_vepc_free_page(epc_page))
    245			list_add_tail(&epc_page->list, &secs_pages);
    246	}
    247
    248	if (!list_empty(&secs_pages))
    249		list_splice_tail(&secs_pages, &zombie_secs_pages);
    250	mutex_unlock(&zombie_secs_pages_lock);
    251
    252	xa_destroy(&vepc->page_array);
    253	kfree(vepc);
    254
    255	return 0;
    256}
    257
    258static int sgx_vepc_open(struct inode *inode, struct file *file)
    259{
    260	struct sgx_vepc *vepc;
    261
    262	vepc = kzalloc(sizeof(struct sgx_vepc), GFP_KERNEL);
    263	if (!vepc)
    264		return -ENOMEM;
    265	mutex_init(&vepc->lock);
    266	xa_init(&vepc->page_array);
    267
    268	file->private_data = vepc;
    269
    270	return 0;
    271}
    272
    273static long sgx_vepc_ioctl(struct file *file,
    274			   unsigned int cmd, unsigned long arg)
    275{
    276	struct sgx_vepc *vepc = file->private_data;
    277
    278	switch (cmd) {
    279	case SGX_IOC_VEPC_REMOVE_ALL:
    280		if (arg)
    281			return -EINVAL;
    282		return sgx_vepc_remove_all(vepc);
    283
    284	default:
    285		return -ENOTTY;
    286	}
    287}
    288
    289static const struct file_operations sgx_vepc_fops = {
    290	.owner		= THIS_MODULE,
    291	.open		= sgx_vepc_open,
    292	.unlocked_ioctl	= sgx_vepc_ioctl,
    293	.compat_ioctl	= sgx_vepc_ioctl,
    294	.release	= sgx_vepc_release,
    295	.mmap		= sgx_vepc_mmap,
    296};
    297
    298static struct miscdevice sgx_vepc_dev = {
    299	.minor		= MISC_DYNAMIC_MINOR,
    300	.name		= "sgx_vepc",
    301	.nodename	= "sgx_vepc",
    302	.fops		= &sgx_vepc_fops,
    303};
    304
    305int __init sgx_vepc_init(void)
    306{
    307	/* SGX virtualization requires KVM to work */
    308	if (!cpu_feature_enabled(X86_FEATURE_VMX))
    309		return -ENODEV;
    310
    311	INIT_LIST_HEAD(&zombie_secs_pages);
    312	mutex_init(&zombie_secs_pages_lock);
    313
    314	return misc_register(&sgx_vepc_dev);
    315}
    316
    317/**
    318 * sgx_virt_ecreate() - Run ECREATE on behalf of guest
    319 * @pageinfo:	Pointer to PAGEINFO structure
    320 * @secs:	Userspace pointer to SECS page
    321 * @trapnr:	trap number injected to guest in case of ECREATE error
    322 *
    323 * Run ECREATE on behalf of guest after KVM traps ECREATE for the purpose
    324 * of enforcing policies of guest's enclaves, and return the trap number
    325 * which should be injected to guest in case of any ECREATE error.
    326 *
    327 * Return:
    328 * -  0:	ECREATE was successful.
    329 * - <0:	on error.
    330 */
    331int sgx_virt_ecreate(struct sgx_pageinfo *pageinfo, void __user *secs,
    332		     int *trapnr)
    333{
    334	int ret;
    335
    336	/*
    337	 * @secs is an untrusted, userspace-provided address.  It comes from
    338	 * KVM and is assumed to be a valid pointer which points somewhere in
    339	 * userspace.  This can fault and call SGX or other fault handlers when
    340	 * userspace mapping @secs doesn't exist.
    341	 *
    342	 * Add a WARN() to make sure @secs is already valid userspace pointer
    343	 * from caller (KVM), who should already have handled invalid pointer
    344	 * case (for instance, made by malicious guest).  All other checks,
    345	 * such as alignment of @secs, are deferred to ENCLS itself.
    346	 */
    347	if (WARN_ON_ONCE(!access_ok(secs, PAGE_SIZE)))
    348		return -EINVAL;
    349
    350	__uaccess_begin();
    351	ret = __ecreate(pageinfo, (void *)secs);
    352	__uaccess_end();
    353
    354	if (encls_faulted(ret)) {
    355		*trapnr = ENCLS_TRAPNR(ret);
    356		return -EFAULT;
    357	}
    358
    359	/* ECREATE doesn't return an error code, it faults or succeeds. */
    360	WARN_ON_ONCE(ret);
    361	return 0;
    362}
    363EXPORT_SYMBOL_GPL(sgx_virt_ecreate);
    364
    365static int __sgx_virt_einit(void __user *sigstruct, void __user *token,
    366			    void __user *secs)
    367{
    368	int ret;
    369
    370	/*
    371	 * Make sure all userspace pointers from caller (KVM) are valid.
    372	 * All other checks deferred to ENCLS itself.  Also see comment
    373	 * for @secs in sgx_virt_ecreate().
    374	 */
    375#define SGX_EINITTOKEN_SIZE	304
    376	if (WARN_ON_ONCE(!access_ok(sigstruct, sizeof(struct sgx_sigstruct)) ||
    377			 !access_ok(token, SGX_EINITTOKEN_SIZE) ||
    378			 !access_ok(secs, PAGE_SIZE)))
    379		return -EINVAL;
    380
    381	__uaccess_begin();
    382	ret = __einit((void *)sigstruct, (void *)token, (void *)secs);
    383	__uaccess_end();
    384
    385	return ret;
    386}
    387
    388/**
    389 * sgx_virt_einit() - Run EINIT on behalf of guest
    390 * @sigstruct:		Userspace pointer to SIGSTRUCT structure
    391 * @token:		Userspace pointer to EINITTOKEN structure
    392 * @secs:		Userspace pointer to SECS page
    393 * @lepubkeyhash:	Pointer to guest's *virtual* SGX_LEPUBKEYHASH MSR values
    394 * @trapnr:		trap number injected to guest in case of EINIT error
    395 *
    396 * Run EINIT on behalf of guest after KVM traps EINIT. If SGX_LC is available
    397 * in host, SGX driver may rewrite the hardware values at wish, therefore KVM
    398 * needs to update hardware values to guest's virtual MSR values in order to
    399 * ensure EINIT is executed with expected hardware values.
    400 *
    401 * Return:
    402 * -  0:	EINIT was successful.
    403 * - <0:	on error.
    404 */
    405int sgx_virt_einit(void __user *sigstruct, void __user *token,
    406		   void __user *secs, u64 *lepubkeyhash, int *trapnr)
    407{
    408	int ret;
    409
    410	if (!cpu_feature_enabled(X86_FEATURE_SGX_LC)) {
    411		ret = __sgx_virt_einit(sigstruct, token, secs);
    412	} else {
    413		preempt_disable();
    414
    415		sgx_update_lepubkeyhash(lepubkeyhash);
    416
    417		ret = __sgx_virt_einit(sigstruct, token, secs);
    418		preempt_enable();
    419	}
    420
    421	/* Propagate up the error from the WARN_ON_ONCE in __sgx_virt_einit() */
    422	if (ret == -EINVAL)
    423		return ret;
    424
    425	if (encls_faulted(ret)) {
    426		*trapnr = ENCLS_TRAPNR(ret);
    427		return -EFAULT;
    428	}
    429
    430	return ret;
    431}
    432EXPORT_SYMBOL_GPL(sgx_virt_einit);