cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

sgx.c (14756B)


      1// SPDX-License-Identifier: GPL-2.0
      2/*  Copyright(c) 2021 Intel Corporation. */
      3
      4#include <asm/sgx.h>
      5
      6#include "cpuid.h"
      7#include "kvm_cache_regs.h"
      8#include "nested.h"
      9#include "sgx.h"
     10#include "vmx.h"
     11#include "x86.h"
     12
     13bool __read_mostly enable_sgx = 1;
     14module_param_named(sgx, enable_sgx, bool, 0444);
     15
     16/* Initial value of guest's virtual SGX_LEPUBKEYHASHn MSRs */
     17static u64 sgx_pubkey_hash[4] __ro_after_init;
     18
     19/*
     20 * ENCLS's memory operands use a fixed segment (DS) and a fixed
     21 * address size based on the mode.  Related prefixes are ignored.
     22 */
     23static int sgx_get_encls_gva(struct kvm_vcpu *vcpu, unsigned long offset,
     24			     int size, int alignment, gva_t *gva)
     25{
     26	struct kvm_segment s;
     27	bool fault;
     28
     29	/* Skip vmcs.GUEST_DS retrieval for 64-bit mode to avoid VMREADs. */
     30	*gva = offset;
     31	if (!is_long_mode(vcpu)) {
     32		vmx_get_segment(vcpu, &s, VCPU_SREG_DS);
     33		*gva += s.base;
     34	}
     35
     36	if (!IS_ALIGNED(*gva, alignment)) {
     37		fault = true;
     38	} else if (likely(is_long_mode(vcpu))) {
     39		fault = is_noncanonical_address(*gva, vcpu);
     40	} else {
     41		*gva &= 0xffffffff;
     42		fault = (s.unusable) ||
     43			(s.type != 2 && s.type != 3) ||
     44			(*gva > s.limit) ||
     45			((s.base != 0 || s.limit != 0xffffffff) &&
     46			(((u64)*gva + size - 1) > s.limit + 1));
     47	}
     48	if (fault)
     49		kvm_inject_gp(vcpu, 0);
     50	return fault ? -EINVAL : 0;
     51}
     52
     53static void sgx_handle_emulation_failure(struct kvm_vcpu *vcpu, u64 addr,
     54					 unsigned int size)
     55{
     56	uint64_t data[2] = { addr, size };
     57
     58	__kvm_prepare_emulation_failure_exit(vcpu, data, ARRAY_SIZE(data));
     59}
     60
     61static int sgx_read_hva(struct kvm_vcpu *vcpu, unsigned long hva, void *data,
     62			unsigned int size)
     63{
     64	if (__copy_from_user(data, (void __user *)hva, size)) {
     65		sgx_handle_emulation_failure(vcpu, hva, size);
     66		return -EFAULT;
     67	}
     68
     69	return 0;
     70}
     71
     72static int sgx_gva_to_gpa(struct kvm_vcpu *vcpu, gva_t gva, bool write,
     73			  gpa_t *gpa)
     74{
     75	struct x86_exception ex;
     76
     77	if (write)
     78		*gpa = kvm_mmu_gva_to_gpa_write(vcpu, gva, &ex);
     79	else
     80		*gpa = kvm_mmu_gva_to_gpa_read(vcpu, gva, &ex);
     81
     82	if (*gpa == UNMAPPED_GVA) {
     83		kvm_inject_emulated_page_fault(vcpu, &ex);
     84		return -EFAULT;
     85	}
     86
     87	return 0;
     88}
     89
     90static int sgx_gpa_to_hva(struct kvm_vcpu *vcpu, gpa_t gpa, unsigned long *hva)
     91{
     92	*hva = kvm_vcpu_gfn_to_hva(vcpu, PFN_DOWN(gpa));
     93	if (kvm_is_error_hva(*hva)) {
     94		sgx_handle_emulation_failure(vcpu, gpa, 1);
     95		return -EFAULT;
     96	}
     97
     98	*hva |= gpa & ~PAGE_MASK;
     99
    100	return 0;
    101}
    102
    103static int sgx_inject_fault(struct kvm_vcpu *vcpu, gva_t gva, int trapnr)
    104{
    105	struct x86_exception ex;
    106
    107	/*
    108	 * A non-EPCM #PF indicates a bad userspace HVA.  This *should* check
    109	 * for PFEC.SGX and not assume any #PF on SGX2 originated in the EPC,
    110	 * but the error code isn't (yet) plumbed through the ENCLS helpers.
    111	 */
    112	if (trapnr == PF_VECTOR && !boot_cpu_has(X86_FEATURE_SGX2)) {
    113		kvm_prepare_emulation_failure_exit(vcpu);
    114		return 0;
    115	}
    116
    117	/*
    118	 * If the guest thinks it's running on SGX2 hardware, inject an SGX
    119	 * #PF if the fault matches an EPCM fault signature (#GP on SGX1,
    120	 * #PF on SGX2).  The assumption is that EPCM faults are much more
    121	 * likely than a bad userspace address.
    122	 */
    123	if ((trapnr == PF_VECTOR || !boot_cpu_has(X86_FEATURE_SGX2)) &&
    124	    guest_cpuid_has(vcpu, X86_FEATURE_SGX2)) {
    125		memset(&ex, 0, sizeof(ex));
    126		ex.vector = PF_VECTOR;
    127		ex.error_code = PFERR_PRESENT_MASK | PFERR_WRITE_MASK |
    128				PFERR_SGX_MASK;
    129		ex.address = gva;
    130		ex.error_code_valid = true;
    131		ex.nested_page_fault = false;
    132		kvm_inject_page_fault(vcpu, &ex);
    133	} else {
    134		kvm_inject_gp(vcpu, 0);
    135	}
    136	return 1;
    137}
    138
    139static int __handle_encls_ecreate(struct kvm_vcpu *vcpu,
    140				  struct sgx_pageinfo *pageinfo,
    141				  unsigned long secs_hva,
    142				  gva_t secs_gva)
    143{
    144	struct sgx_secs *contents = (struct sgx_secs *)pageinfo->contents;
    145	struct kvm_cpuid_entry2 *sgx_12_0, *sgx_12_1;
    146	u64 attributes, xfrm, size;
    147	u32 miscselect;
    148	u8 max_size_log2;
    149	int trapnr, ret;
    150
    151	sgx_12_0 = kvm_find_cpuid_entry(vcpu, 0x12, 0);
    152	sgx_12_1 = kvm_find_cpuid_entry(vcpu, 0x12, 1);
    153	if (!sgx_12_0 || !sgx_12_1) {
    154		kvm_prepare_emulation_failure_exit(vcpu);
    155		return 0;
    156	}
    157
    158	miscselect = contents->miscselect;
    159	attributes = contents->attributes;
    160	xfrm = contents->xfrm;
    161	size = contents->size;
    162
    163	/* Enforce restriction of access to the PROVISIONKEY. */
    164	if (!vcpu->kvm->arch.sgx_provisioning_allowed &&
    165	    (attributes & SGX_ATTR_PROVISIONKEY)) {
    166		if (sgx_12_1->eax & SGX_ATTR_PROVISIONKEY)
    167			pr_warn_once("KVM: SGX PROVISIONKEY advertised but not allowed\n");
    168		kvm_inject_gp(vcpu, 0);
    169		return 1;
    170	}
    171
    172	/* Enforce CPUID restrictions on MISCSELECT, ATTRIBUTES and XFRM. */
    173	if ((u32)miscselect & ~sgx_12_0->ebx ||
    174	    (u32)attributes & ~sgx_12_1->eax ||
    175	    (u32)(attributes >> 32) & ~sgx_12_1->ebx ||
    176	    (u32)xfrm & ~sgx_12_1->ecx ||
    177	    (u32)(xfrm >> 32) & ~sgx_12_1->edx) {
    178		kvm_inject_gp(vcpu, 0);
    179		return 1;
    180	}
    181
    182	/* Enforce CPUID restriction on max enclave size. */
    183	max_size_log2 = (attributes & SGX_ATTR_MODE64BIT) ? sgx_12_0->edx >> 8 :
    184							    sgx_12_0->edx;
    185	if (size >= BIT_ULL(max_size_log2))
    186		kvm_inject_gp(vcpu, 0);
    187
    188	/*
    189	 * sgx_virt_ecreate() returns:
    190	 *  1) 0:	ECREATE was successful
    191	 *  2) -EFAULT:	ECREATE was run but faulted, and trapnr was set to the
    192	 *		exception number.
    193	 *  3) -EINVAL:	access_ok() on @secs_hva failed. This should never
    194	 *		happen as KVM checks host addresses at memslot creation.
    195	 *		sgx_virt_ecreate() has already warned in this case.
    196	 */
    197	ret = sgx_virt_ecreate(pageinfo, (void __user *)secs_hva, &trapnr);
    198	if (!ret)
    199		return kvm_skip_emulated_instruction(vcpu);
    200	if (ret == -EFAULT)
    201		return sgx_inject_fault(vcpu, secs_gva, trapnr);
    202
    203	return ret;
    204}
    205
    206static int handle_encls_ecreate(struct kvm_vcpu *vcpu)
    207{
    208	gva_t pageinfo_gva, secs_gva;
    209	gva_t metadata_gva, contents_gva;
    210	gpa_t metadata_gpa, contents_gpa, secs_gpa;
    211	unsigned long metadata_hva, contents_hva, secs_hva;
    212	struct sgx_pageinfo pageinfo;
    213	struct sgx_secs *contents;
    214	struct x86_exception ex;
    215	int r;
    216
    217	if (sgx_get_encls_gva(vcpu, kvm_rbx_read(vcpu), 32, 32, &pageinfo_gva) ||
    218	    sgx_get_encls_gva(vcpu, kvm_rcx_read(vcpu), 4096, 4096, &secs_gva))
    219		return 1;
    220
    221	/*
    222	 * Copy the PAGEINFO to local memory, its pointers need to be
    223	 * translated, i.e. we need to do a deep copy/translate.
    224	 */
    225	r = kvm_read_guest_virt(vcpu, pageinfo_gva, &pageinfo,
    226				sizeof(pageinfo), &ex);
    227	if (r == X86EMUL_PROPAGATE_FAULT) {
    228		kvm_inject_emulated_page_fault(vcpu, &ex);
    229		return 1;
    230	} else if (r != X86EMUL_CONTINUE) {
    231		sgx_handle_emulation_failure(vcpu, pageinfo_gva,
    232					     sizeof(pageinfo));
    233		return 0;
    234	}
    235
    236	if (sgx_get_encls_gva(vcpu, pageinfo.metadata, 64, 64, &metadata_gva) ||
    237	    sgx_get_encls_gva(vcpu, pageinfo.contents, 4096, 4096,
    238			      &contents_gva))
    239		return 1;
    240
    241	/*
    242	 * Translate the SECINFO, SOURCE and SECS pointers from GVA to GPA.
    243	 * Resume the guest on failure to inject a #PF.
    244	 */
    245	if (sgx_gva_to_gpa(vcpu, metadata_gva, false, &metadata_gpa) ||
    246	    sgx_gva_to_gpa(vcpu, contents_gva, false, &contents_gpa) ||
    247	    sgx_gva_to_gpa(vcpu, secs_gva, true, &secs_gpa))
    248		return 1;
    249
    250	/*
    251	 * ...and then to HVA.  The order of accesses isn't architectural, i.e.
    252	 * KVM doesn't have to fully process one address at a time.  Exit to
    253	 * userspace if a GPA is invalid.
    254	 */
    255	if (sgx_gpa_to_hva(vcpu, metadata_gpa, &metadata_hva) ||
    256	    sgx_gpa_to_hva(vcpu, contents_gpa, &contents_hva) ||
    257	    sgx_gpa_to_hva(vcpu, secs_gpa, &secs_hva))
    258		return 0;
    259
    260	/*
    261	 * Copy contents into kernel memory to prevent TOCTOU attack. E.g. the
    262	 * guest could do ECREATE w/ SECS.SGX_ATTR_PROVISIONKEY=0, and
    263	 * simultaneously set SGX_ATTR_PROVISIONKEY to bypass the check to
    264	 * enforce restriction of access to the PROVISIONKEY.
    265	 */
    266	contents = (struct sgx_secs *)__get_free_page(GFP_KERNEL_ACCOUNT);
    267	if (!contents)
    268		return -ENOMEM;
    269
    270	/* Exit to userspace if copying from a host userspace address fails. */
    271	if (sgx_read_hva(vcpu, contents_hva, (void *)contents, PAGE_SIZE)) {
    272		free_page((unsigned long)contents);
    273		return 0;
    274	}
    275
    276	pageinfo.metadata = metadata_hva;
    277	pageinfo.contents = (u64)contents;
    278
    279	r = __handle_encls_ecreate(vcpu, &pageinfo, secs_hva, secs_gva);
    280
    281	free_page((unsigned long)contents);
    282
    283	return r;
    284}
    285
    286static int handle_encls_einit(struct kvm_vcpu *vcpu)
    287{
    288	unsigned long sig_hva, secs_hva, token_hva, rflags;
    289	struct vcpu_vmx *vmx = to_vmx(vcpu);
    290	gva_t sig_gva, secs_gva, token_gva;
    291	gpa_t sig_gpa, secs_gpa, token_gpa;
    292	int ret, trapnr;
    293
    294	if (sgx_get_encls_gva(vcpu, kvm_rbx_read(vcpu), 1808, 4096, &sig_gva) ||
    295	    sgx_get_encls_gva(vcpu, kvm_rcx_read(vcpu), 4096, 4096, &secs_gva) ||
    296	    sgx_get_encls_gva(vcpu, kvm_rdx_read(vcpu), 304, 512, &token_gva))
    297		return 1;
    298
    299	/*
    300	 * Translate the SIGSTRUCT, SECS and TOKEN pointers from GVA to GPA.
    301	 * Resume the guest on failure to inject a #PF.
    302	 */
    303	if (sgx_gva_to_gpa(vcpu, sig_gva, false, &sig_gpa) ||
    304	    sgx_gva_to_gpa(vcpu, secs_gva, true, &secs_gpa) ||
    305	    sgx_gva_to_gpa(vcpu, token_gva, false, &token_gpa))
    306		return 1;
    307
    308	/*
    309	 * ...and then to HVA.  The order of accesses isn't architectural, i.e.
    310	 * KVM doesn't have to fully process one address at a time.  Exit to
    311	 * userspace if a GPA is invalid.  Note, all structures are aligned and
    312	 * cannot split pages.
    313	 */
    314	if (sgx_gpa_to_hva(vcpu, sig_gpa, &sig_hva) ||
    315	    sgx_gpa_to_hva(vcpu, secs_gpa, &secs_hva) ||
    316	    sgx_gpa_to_hva(vcpu, token_gpa, &token_hva))
    317		return 0;
    318
    319	ret = sgx_virt_einit((void __user *)sig_hva, (void __user *)token_hva,
    320			     (void __user *)secs_hva,
    321			     vmx->msr_ia32_sgxlepubkeyhash, &trapnr);
    322
    323	if (ret == -EFAULT)
    324		return sgx_inject_fault(vcpu, secs_gva, trapnr);
    325
    326	/*
    327	 * sgx_virt_einit() returns -EINVAL when access_ok() fails on @sig_hva,
    328	 * @token_hva or @secs_hva. This should never happen as KVM checks host
    329	 * addresses at memslot creation. sgx_virt_einit() has already warned
    330	 * in this case, so just return.
    331	 */
    332	if (ret < 0)
    333		return ret;
    334
    335	rflags = vmx_get_rflags(vcpu) & ~(X86_EFLAGS_CF | X86_EFLAGS_PF |
    336					  X86_EFLAGS_AF | X86_EFLAGS_SF |
    337					  X86_EFLAGS_OF);
    338	if (ret)
    339		rflags |= X86_EFLAGS_ZF;
    340	else
    341		rflags &= ~X86_EFLAGS_ZF;
    342	vmx_set_rflags(vcpu, rflags);
    343
    344	kvm_rax_write(vcpu, ret);
    345	return kvm_skip_emulated_instruction(vcpu);
    346}
    347
    348static inline bool encls_leaf_enabled_in_guest(struct kvm_vcpu *vcpu, u32 leaf)
    349{
    350	if (!enable_sgx || !guest_cpuid_has(vcpu, X86_FEATURE_SGX))
    351		return false;
    352
    353	if (leaf >= ECREATE && leaf <= ETRACK)
    354		return guest_cpuid_has(vcpu, X86_FEATURE_SGX1);
    355
    356	if (leaf >= EAUG && leaf <= EMODT)
    357		return guest_cpuid_has(vcpu, X86_FEATURE_SGX2);
    358
    359	return false;
    360}
    361
    362static inline bool sgx_enabled_in_guest_bios(struct kvm_vcpu *vcpu)
    363{
    364	const u64 bits = FEAT_CTL_SGX_ENABLED | FEAT_CTL_LOCKED;
    365
    366	return (to_vmx(vcpu)->msr_ia32_feature_control & bits) == bits;
    367}
    368
    369int handle_encls(struct kvm_vcpu *vcpu)
    370{
    371	u32 leaf = (u32)kvm_rax_read(vcpu);
    372
    373	if (!encls_leaf_enabled_in_guest(vcpu, leaf)) {
    374		kvm_queue_exception(vcpu, UD_VECTOR);
    375	} else if (!sgx_enabled_in_guest_bios(vcpu)) {
    376		kvm_inject_gp(vcpu, 0);
    377	} else {
    378		if (leaf == ECREATE)
    379			return handle_encls_ecreate(vcpu);
    380		if (leaf == EINIT)
    381			return handle_encls_einit(vcpu);
    382		WARN(1, "KVM: unexpected exit on ENCLS[%u]", leaf);
    383		vcpu->run->exit_reason = KVM_EXIT_UNKNOWN;
    384		vcpu->run->hw.hardware_exit_reason = EXIT_REASON_ENCLS;
    385		return 0;
    386	}
    387	return 1;
    388}
    389
    390void setup_default_sgx_lepubkeyhash(void)
    391{
    392	/*
    393	 * Use Intel's default value for Skylake hardware if Launch Control is
    394	 * not supported, i.e. Intel's hash is hardcoded into silicon, or if
    395	 * Launch Control is supported and enabled, i.e. mimic the reset value
    396	 * and let the guest write the MSRs at will.  If Launch Control is
    397	 * supported but disabled, then use the current MSR values as the hash
    398	 * MSRs exist but are read-only (locked and not writable).
    399	 */
    400	if (!enable_sgx || boot_cpu_has(X86_FEATURE_SGX_LC) ||
    401	    rdmsrl_safe(MSR_IA32_SGXLEPUBKEYHASH0, &sgx_pubkey_hash[0])) {
    402		sgx_pubkey_hash[0] = 0xa6053e051270b7acULL;
    403		sgx_pubkey_hash[1] = 0x6cfbe8ba8b3b413dULL;
    404		sgx_pubkey_hash[2] = 0xc4916d99f2b3735dULL;
    405		sgx_pubkey_hash[3] = 0xd4f8c05909f9bb3bULL;
    406	} else {
    407		/* MSR_IA32_SGXLEPUBKEYHASH0 is read above */
    408		rdmsrl(MSR_IA32_SGXLEPUBKEYHASH1, sgx_pubkey_hash[1]);
    409		rdmsrl(MSR_IA32_SGXLEPUBKEYHASH2, sgx_pubkey_hash[2]);
    410		rdmsrl(MSR_IA32_SGXLEPUBKEYHASH3, sgx_pubkey_hash[3]);
    411	}
    412}
    413
    414void vcpu_setup_sgx_lepubkeyhash(struct kvm_vcpu *vcpu)
    415{
    416	struct vcpu_vmx *vmx = to_vmx(vcpu);
    417
    418	memcpy(vmx->msr_ia32_sgxlepubkeyhash, sgx_pubkey_hash,
    419	       sizeof(sgx_pubkey_hash));
    420}
    421
    422/*
    423 * ECREATE must be intercepted to enforce MISCSELECT, ATTRIBUTES and XFRM
    424 * restrictions if the guest's allowed-1 settings diverge from hardware.
    425 */
    426static bool sgx_intercept_encls_ecreate(struct kvm_vcpu *vcpu)
    427{
    428	struct kvm_cpuid_entry2 *guest_cpuid;
    429	u32 eax, ebx, ecx, edx;
    430
    431	if (!vcpu->kvm->arch.sgx_provisioning_allowed)
    432		return true;
    433
    434	guest_cpuid = kvm_find_cpuid_entry(vcpu, 0x12, 0);
    435	if (!guest_cpuid)
    436		return true;
    437
    438	cpuid_count(0x12, 0, &eax, &ebx, &ecx, &edx);
    439	if (guest_cpuid->ebx != ebx || guest_cpuid->edx != edx)
    440		return true;
    441
    442	guest_cpuid = kvm_find_cpuid_entry(vcpu, 0x12, 1);
    443	if (!guest_cpuid)
    444		return true;
    445
    446	cpuid_count(0x12, 1, &eax, &ebx, &ecx, &edx);
    447	if (guest_cpuid->eax != eax || guest_cpuid->ebx != ebx ||
    448	    guest_cpuid->ecx != ecx || guest_cpuid->edx != edx)
    449		return true;
    450
    451	return false;
    452}
    453
    454void vmx_write_encls_bitmap(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
    455{
    456	/*
    457	 * There is no software enable bit for SGX that is virtualized by
    458	 * hardware, e.g. there's no CR4.SGXE, so when SGX is disabled in the
    459	 * guest (either by the host or by the guest's BIOS) but enabled in the
    460	 * host, trap all ENCLS leafs and inject #UD/#GP as needed to emulate
    461	 * the expected system behavior for ENCLS.
    462	 */
    463	u64 bitmap = -1ull;
    464
    465	/* Nothing to do if hardware doesn't support SGX */
    466	if (!cpu_has_vmx_encls_vmexit())
    467		return;
    468
    469	if (guest_cpuid_has(vcpu, X86_FEATURE_SGX) &&
    470	    sgx_enabled_in_guest_bios(vcpu)) {
    471		if (guest_cpuid_has(vcpu, X86_FEATURE_SGX1)) {
    472			bitmap &= ~GENMASK_ULL(ETRACK, ECREATE);
    473			if (sgx_intercept_encls_ecreate(vcpu))
    474				bitmap |= (1 << ECREATE);
    475		}
    476
    477		if (guest_cpuid_has(vcpu, X86_FEATURE_SGX2))
    478			bitmap &= ~GENMASK_ULL(EMODT, EAUG);
    479
    480		/*
    481		 * Trap and execute EINIT if launch control is enabled in the
    482		 * host using the guest's values for launch control MSRs, even
    483		 * if the guest's values are fixed to hardware default values.
    484		 * The MSRs are not loaded/saved on VM-Enter/VM-Exit as writing
    485		 * the MSRs is extraordinarily expensive.
    486		 */
    487		if (boot_cpu_has(X86_FEATURE_SGX_LC))
    488			bitmap |= (1 << EINIT);
    489
    490		if (!vmcs12 && is_guest_mode(vcpu))
    491			vmcs12 = get_vmcs12(vcpu);
    492		if (vmcs12 && nested_cpu_has_encls_exit(vmcs12))
    493			bitmap |= vmcs12->encls_exiting_bitmap;
    494	}
    495	vmcs_write64(ENCLS_EXITING_BITMAP, bitmap);
    496}