cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

sev.c (119804B)


      1// SPDX-License-Identifier: GPL-2.0-only
      2/*
      3 * Kernel-based Virtual Machine driver for Linux
      4 *
      5 * AMD SVM-SEV support
      6 *
      7 * Copyright 2010 Red Hat, Inc. and/or its affiliates.
      8 */
      9
     10#include <linux/kvm_types.h>
     11#include <linux/kvm_host.h>
     12#include <linux/kernel.h>
     13#include <linux/highmem.h>
     14#include <linux/psp-sev.h>
     15#include <linux/pagemap.h>
     16#include <linux/swap.h>
     17#include <linux/misc_cgroup.h>
     18#include <linux/processor.h>
     19#include <linux/trace_events.h>
     20#include <linux/hugetlb.h>
     21#include <linux/sev.h>
     22#include <linux/ksm.h>
     23
     24#include <asm/pkru.h>
     25#include <asm/trapnr.h>
     26#include <asm/fpu/xcr.h>
     27#include <asm/sev.h>
     28#include <asm/mman.h>
     29
     30#include "mmu.h"
     31#include "x86.h"
     32#include "svm.h"
     33#include "svm_ops.h"
     34#include "cpuid.h"
     35#include "trace.h"
     36#include "mmu.h"
     37
     38#include "asm/set_memory.h"
     39#include "cachepc/cachepc.h"
     40
     41#ifndef CONFIG_KVM_AMD_SEV
     42/*
     43 * When this config is not defined, SEV feature is not supported and APIs in
     44 * this file are not used but this file still gets compiled into the KVM AMD
     45 * module.
     46 *
     47 * We will not have MISC_CG_RES_SEV and MISC_CG_RES_SEV_ES entries in the enum
     48 * misc_res_type {} defined in linux/misc_cgroup.h.
     49 *
     50 * Below macros allow compilation to succeed.
     51 */
     52#define MISC_CG_RES_SEV MISC_CG_RES_TYPES
     53#define MISC_CG_RES_SEV_ES MISC_CG_RES_TYPES
     54#endif
     55
     56#ifdef CONFIG_KVM_AMD_SEV
     57/* enable/disable SEV support */
     58static bool sev_enabled = true;
     59module_param_named(sev, sev_enabled, bool, 0444);
     60
     61/* enable/disable SEV-ES support */
     62static bool sev_es_enabled = true;
     63module_param_named(sev_es, sev_es_enabled, bool, 0444);
     64
     65/* enable/disable SEV-SNP support */
     66static bool sev_snp_enabled = true;
     67module_param_named(sev_snp, sev_snp_enabled, bool, 0444);
     68#else
     69#define sev_enabled false
     70#define sev_es_enabled false
     71#endif /* CONFIG_KVM_AMD_SEV */
     72
     73#define AP_RESET_HOLD_NONE		0
     74#define AP_RESET_HOLD_NAE_EVENT		1
     75#define AP_RESET_HOLD_MSR_PROTO		2
     76
     77static u8 sev_enc_bit;
     78static DECLARE_RWSEM(sev_deactivate_lock);
     79static DEFINE_MUTEX(sev_bitmap_lock);
     80unsigned int max_sev_asid;
     81static unsigned int min_sev_asid;
     82static unsigned long sev_me_mask;
     83static unsigned int nr_asids;
     84static unsigned long *sev_asid_bitmap;
     85static unsigned long *sev_reclaim_asid_bitmap;
     86
     87static int snp_decommission_context(struct kvm *kvm);
     88
     89struct enc_region {
     90	struct list_head list;
     91	unsigned long npages;
     92	struct page **pages;
     93	unsigned long uaddr;
     94	unsigned long size;
     95};
     96
     97/* Called with the sev_bitmap_lock held, or on shutdown  */
     98static int sev_flush_asids(int min_asid, int max_asid)
     99{
    100	int ret, asid, error = 0;
    101
    102	/* Check if there are any ASIDs to reclaim before performing a flush */
    103	asid = find_next_bit(sev_reclaim_asid_bitmap, nr_asids, min_asid);
    104	if (asid > max_asid)
    105		return -EBUSY;
    106
    107	/*
    108	 * DEACTIVATE will clear the WBINVD indicator causing DF_FLUSH to fail,
    109	 * so it must be guarded.
    110	 */
    111	down_write(&sev_deactivate_lock);
    112
    113	wbinvd_on_all_cpus();
    114
    115	if (sev_snp_enabled)
    116		ret = snp_guest_df_flush(&error);
    117	else
    118		ret = sev_guest_df_flush(&error);
    119
    120	up_write(&sev_deactivate_lock);
    121
    122	if (ret)
    123		pr_err("SEV%s: DF_FLUSH failed, ret=%d, error=%#x\n",
    124			sev_snp_enabled ? "-SNP" : "", ret, error);
    125
    126	return ret;
    127}
    128
    129static inline bool is_mirroring_enc_context(struct kvm *kvm)
    130{
    131	return !!to_kvm_svm(kvm)->sev_info.enc_context_owner;
    132}
    133
    134/* Must be called with the sev_bitmap_lock held */
    135static bool __sev_recycle_asids(int min_asid, int max_asid)
    136{
    137	if (sev_flush_asids(min_asid, max_asid))
    138		return false;
    139
    140	/* The flush process will flush all reclaimable SEV and SEV-ES ASIDs */
    141	bitmap_xor(sev_asid_bitmap, sev_asid_bitmap, sev_reclaim_asid_bitmap,
    142		   nr_asids);
    143	bitmap_zero(sev_reclaim_asid_bitmap, nr_asids);
    144
    145	return true;
    146}
    147
    148static int sev_misc_cg_try_charge(struct kvm_sev_info *sev)
    149{
    150	enum misc_res_type type = sev->es_active ? MISC_CG_RES_SEV_ES : MISC_CG_RES_SEV;
    151	return misc_cg_try_charge(type, sev->misc_cg, 1);
    152}
    153
    154static void sev_misc_cg_uncharge(struct kvm_sev_info *sev)
    155{
    156	enum misc_res_type type = sev->es_active ? MISC_CG_RES_SEV_ES : MISC_CG_RES_SEV;
    157	misc_cg_uncharge(type, sev->misc_cg, 1);
    158}
    159
    160static int sev_asid_new(struct kvm_sev_info *sev)
    161{
    162	int asid, min_asid, max_asid, ret;
    163	bool retry = true;
    164
    165	WARN_ON(sev->misc_cg);
    166	sev->misc_cg = get_current_misc_cg();
    167	ret = sev_misc_cg_try_charge(sev);
    168	if (ret) {
    169		put_misc_cg(sev->misc_cg);
    170		sev->misc_cg = NULL;
    171		return ret;
    172	}
    173
    174	mutex_lock(&sev_bitmap_lock);
    175
    176	/*
    177	 * SEV-enabled guests must use asid from min_sev_asid to max_sev_asid.
    178	 * SEV-ES-enabled guest can use from 1 to min_sev_asid - 1.
    179	 */
    180	min_asid = sev->es_active ? 1 : min_sev_asid;
    181	max_asid = sev->es_active ? min_sev_asid - 1 : max_sev_asid;
    182again:
    183	asid = find_next_zero_bit(sev_asid_bitmap, max_asid + 1, min_asid);
    184	if (asid > max_asid) {
    185		if (retry && __sev_recycle_asids(min_asid, max_asid)) {
    186			retry = false;
    187			goto again;
    188		}
    189		mutex_unlock(&sev_bitmap_lock);
    190		ret = -EBUSY;
    191		goto e_uncharge;
    192	}
    193
    194	__set_bit(asid, sev_asid_bitmap);
    195
    196	mutex_unlock(&sev_bitmap_lock);
    197
    198	return asid;
    199e_uncharge:
    200	sev_misc_cg_uncharge(sev);
    201	put_misc_cg(sev->misc_cg);
    202	sev->misc_cg = NULL;
    203	return ret;
    204}
    205
    206static int sev_get_asid(struct kvm *kvm)
    207{
    208	struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
    209
    210	return sev->asid;
    211}
    212
    213static void sev_asid_free(struct kvm_sev_info *sev)
    214{
    215	struct svm_cpu_data *sd;
    216	int cpu;
    217
    218	mutex_lock(&sev_bitmap_lock);
    219
    220	__set_bit(sev->asid, sev_reclaim_asid_bitmap);
    221
    222	for_each_possible_cpu(cpu) {
    223		sd = per_cpu(svm_data, cpu);
    224		sd->sev_vmcbs[sev->asid] = NULL;
    225	}
    226
    227	mutex_unlock(&sev_bitmap_lock);
    228
    229	sev_misc_cg_uncharge(sev);
    230	put_misc_cg(sev->misc_cg);
    231	sev->misc_cg = NULL;
    232}
    233
    234static void sev_decommission(unsigned int handle)
    235{
    236	struct sev_data_decommission decommission;
    237
    238	if (!handle)
    239		return;
    240
    241	decommission.handle = handle;
    242	sev_guest_decommission(&decommission, NULL);
    243}
    244
    245static inline void snp_leak_pages(u64 pfn, enum pg_level level)
    246{
    247	unsigned int npages = page_level_size(level) >> PAGE_SHIFT;
    248
    249	WARN(1, "psc failed pfn 0x%llx pages %d (leaking)\n", pfn, npages);
    250
    251	while (npages) {
    252		memory_failure(pfn, 0);
    253		dump_rmpentry(pfn);
    254		npages--;
    255		pfn++;
    256	}
    257}
    258
    259static int snp_page_reclaim(u64 pfn)
    260{
    261	struct sev_data_snp_page_reclaim data = {0};
    262	int err, rc;
    263
    264	data.paddr = __sme_set(pfn << PAGE_SHIFT);
    265	rc = snp_guest_page_reclaim(&data, &err);
    266	if (rc) {
    267		/*
    268		 * If the reclaim failed, then page is no longer safe
    269		 * to use.
    270		 */
    271		snp_leak_pages(pfn, PG_LEVEL_4K);
    272	}
    273
    274	return rc;
    275}
    276
    277static int host_rmp_make_shared(u64 pfn, enum pg_level level, bool leak)
    278{
    279	int rc;
    280
    281	rc = rmp_make_shared(pfn, level);
    282	if (rc && leak)
    283		snp_leak_pages(pfn, level);
    284
    285	return rc;
    286}
    287
    288static void sev_unbind_asid(struct kvm *kvm, unsigned int handle)
    289{
    290	struct sev_data_deactivate deactivate;
    291
    292	if (!handle)
    293		return;
    294
    295	deactivate.handle = handle;
    296
    297	/* Guard DEACTIVATE against WBINVD/DF_FLUSH used in ASID recycling */
    298	down_read(&sev_deactivate_lock);
    299	sev_guest_deactivate(&deactivate, NULL);
    300	up_read(&sev_deactivate_lock);
    301
    302	sev_decommission(handle);
    303}
    304
    305static int verify_snp_init_flags(struct kvm *kvm, struct kvm_sev_cmd *argp)
    306{
    307	struct kvm_snp_init params;
    308	int ret = 0;
    309
    310	if (copy_from_user(&params, (void __user *)(uintptr_t)argp->data, sizeof(params)))
    311		return -EFAULT;
    312
    313	if (params.flags & ~SEV_SNP_SUPPORTED_FLAGS)
    314		ret = -EOPNOTSUPP;
    315
    316	params.flags = SEV_SNP_SUPPORTED_FLAGS;
    317
    318	if (copy_to_user((void __user *)(uintptr_t)argp->data, &params, sizeof(params)))
    319		ret = -EFAULT;
    320
    321	return ret;
    322}
    323
    324static int sev_guest_init(struct kvm *kvm, struct kvm_sev_cmd *argp)
    325{
    326	struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
    327	int asid, ret;
    328
    329	if (kvm->created_vcpus)
    330		return -EINVAL;
    331
    332	ret = -EBUSY;
    333	if (unlikely(sev->active))
    334		return ret;
    335
    336	sev->active = true;
    337	sev->es_active = (argp->id == KVM_SEV_ES_INIT || argp->id == KVM_SEV_SNP_INIT);
    338	sev->snp_active = argp->id == KVM_SEV_SNP_INIT;
    339	asid = sev_asid_new(sev);
    340	if (asid < 0)
    341		goto e_no_asid;
    342	sev->asid = asid;
    343
    344	if (sev->snp_active) {
    345		ret = verify_snp_init_flags(kvm, argp);
    346		if (ret)
    347			goto e_free;
    348
    349		spin_lock_init(&sev->psc_lock);
    350		ret = sev_snp_init(&argp->error);
    351		mutex_init(&sev->guest_req_lock);
    352	} else {
    353		ret = sev_platform_init(&argp->error);
    354	}
    355
    356	if (ret)
    357		goto e_free;
    358
    359	INIT_LIST_HEAD(&sev->regions_list);
    360	INIT_LIST_HEAD(&sev->mirror_vms);
    361
    362	kvm_set_apicv_inhibit(kvm, APICV_INHIBIT_REASON_SEV);
    363
    364	return 0;
    365
    366e_free:
    367	sev_asid_free(sev);
    368	sev->asid = 0;
    369e_no_asid:
    370	sev->snp_active = false;
    371	sev->es_active = false;
    372	sev->active = false;
    373	return ret;
    374}
    375
    376static int sev_bind_asid(struct kvm *kvm, unsigned int handle, int *error)
    377{
    378	struct sev_data_activate activate;
    379	int asid = sev_get_asid(kvm);
    380	int ret;
    381
    382	/* activate ASID on the given handle */
    383	activate.handle = handle;
    384	activate.asid   = asid;
    385	ret = sev_guest_activate(&activate, error);
    386
    387	return ret;
    388}
    389
    390static int __sev_issue_cmd(int fd, int id, void *data, int *error)
    391{
    392	struct fd f;
    393	int ret;
    394
    395	f = fdget(fd);
    396	if (!f.file)
    397		return -EBADF;
    398
    399	ret = sev_issue_cmd_external_user(f.file, id, data, error);
    400
    401	fdput(f);
    402	return ret;
    403}
    404
    405static int sev_issue_cmd(struct kvm *kvm, int id, void *data, int *error)
    406{
    407	struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
    408
    409	return __sev_issue_cmd(sev->fd, id, data, error);
    410}
    411
    412static int sev_launch_start(struct kvm *kvm, struct kvm_sev_cmd *argp)
    413{
    414	struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
    415	struct sev_data_launch_start start;
    416	struct kvm_sev_launch_start params;
    417	void *dh_blob, *session_blob;
    418	int *error = &argp->error;
    419	int ret;
    420
    421	if (!sev_guest(kvm))
    422		return -ENOTTY;
    423
    424	if (copy_from_user(&params, (void __user *)(uintptr_t)argp->data, sizeof(params)))
    425		return -EFAULT;
    426
    427	memset(&start, 0, sizeof(start));
    428
    429	dh_blob = NULL;
    430	if (params.dh_uaddr) {
    431		dh_blob = psp_copy_user_blob(params.dh_uaddr, params.dh_len);
    432		if (IS_ERR(dh_blob))
    433			return PTR_ERR(dh_blob);
    434
    435		start.dh_cert_address = __sme_set(__pa(dh_blob));
    436		start.dh_cert_len = params.dh_len;
    437	}
    438
    439	session_blob = NULL;
    440	if (params.session_uaddr) {
    441		session_blob = psp_copy_user_blob(params.session_uaddr, params.session_len);
    442		if (IS_ERR(session_blob)) {
    443			ret = PTR_ERR(session_blob);
    444			goto e_free_dh;
    445		}
    446
    447		start.session_address = __sme_set(__pa(session_blob));
    448		start.session_len = params.session_len;
    449	}
    450
    451	start.handle = params.handle;
    452	start.policy = params.policy;
    453
    454	/* create memory encryption context */
    455	ret = __sev_issue_cmd(argp->sev_fd, SEV_CMD_LAUNCH_START, &start, error);
    456	if (ret)
    457		goto e_free_session;
    458
    459	/* Bind ASID to this guest */
    460	ret = sev_bind_asid(kvm, start.handle, error);
    461	if (ret) {
    462		sev_decommission(start.handle);
    463		goto e_free_session;
    464	}
    465
    466	/* return handle to userspace */
    467	params.handle = start.handle;
    468	if (copy_to_user((void __user *)(uintptr_t)argp->data, &params, sizeof(params))) {
    469		sev_unbind_asid(kvm, start.handle);
    470		ret = -EFAULT;
    471		goto e_free_session;
    472	}
    473
    474	sev->handle = start.handle;
    475	sev->fd = argp->sev_fd;
    476
    477e_free_session:
    478	kfree(session_blob);
    479e_free_dh:
    480	kfree(dh_blob);
    481	return ret;
    482}
    483
    484static struct page **sev_pin_memory(struct kvm *kvm, unsigned long uaddr,
    485				    unsigned long ulen, unsigned long *n,
    486				    int write)
    487{
    488	struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
    489	unsigned long npages, size;
    490	int npinned;
    491	unsigned long locked, lock_limit;
    492	struct page **pages;
    493	unsigned long first, last;
    494	int ret;
    495
    496	lockdep_assert_held(&kvm->lock);
    497
    498	if (ulen == 0 || uaddr + ulen < uaddr)
    499		return ERR_PTR(-EINVAL);
    500
    501	/* Calculate number of pages. */
    502	first = (uaddr & PAGE_MASK) >> PAGE_SHIFT;
    503	last = ((uaddr + ulen - 1) & PAGE_MASK) >> PAGE_SHIFT;
    504	npages = (last - first + 1);
    505
    506	locked = sev->pages_locked + npages;
    507	lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
    508	if (locked > lock_limit && !capable(CAP_IPC_LOCK)) {
    509		pr_err("SEV: %lu locked pages exceed the lock limit of %lu.\n", locked, lock_limit);
    510		return ERR_PTR(-ENOMEM);
    511	}
    512
    513	if (WARN_ON_ONCE(npages > INT_MAX))
    514		return ERR_PTR(-EINVAL);
    515
    516	/* Avoid using vmalloc for smaller buffers. */
    517	size = npages * sizeof(struct page *);
    518	if (size > PAGE_SIZE)
    519		pages = __vmalloc(size, GFP_KERNEL_ACCOUNT | __GFP_ZERO);
    520	else
    521		pages = kmalloc(size, GFP_KERNEL_ACCOUNT);
    522
    523	if (!pages)
    524		return ERR_PTR(-ENOMEM);
    525
    526	/* Pin the user virtual address. */
    527	npinned = pin_user_pages_fast(uaddr, npages, write ? FOLL_WRITE : 0, pages);
    528	if (npinned != npages) {
    529		pr_err("SEV: Failure locking %lu pages.\n", npages);
    530		ret = -ENOMEM;
    531		goto err;
    532	}
    533
    534	*n = npages;
    535	sev->pages_locked = locked;
    536
    537	return pages;
    538
    539err:
    540	if (npinned > 0)
    541		unpin_user_pages(pages, npinned);
    542
    543	kvfree(pages);
    544	return ERR_PTR(ret);
    545}
    546
    547static void sev_unpin_memory(struct kvm *kvm, struct page **pages,
    548			     unsigned long npages)
    549{
    550	struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
    551
    552	unpin_user_pages(pages, npages);
    553	kvfree(pages);
    554	sev->pages_locked -= npages;
    555}
    556
    557static void sev_clflush_pages(struct page *pages[], unsigned long npages)
    558{
    559	uint8_t *page_virtual;
    560	unsigned long i;
    561
    562	if (this_cpu_has(X86_FEATURE_SME_COHERENT) || npages == 0 ||
    563	    pages == NULL)
    564		return;
    565
    566	for (i = 0; i < npages; i++) {
    567		page_virtual = kmap_atomic(pages[i]);
    568		clflush_cache_range(page_virtual, PAGE_SIZE);
    569		kunmap_atomic(page_virtual);
    570		cond_resched();
    571	}
    572}
    573
    574static unsigned long get_num_contig_pages(unsigned long idx,
    575				struct page **inpages, unsigned long npages)
    576{
    577	unsigned long paddr, next_paddr;
    578	unsigned long i = idx + 1, pages = 1;
    579
    580	/* find the number of contiguous pages starting from idx */
    581	paddr = __sme_page_pa(inpages[idx]);
    582	while (i < npages) {
    583		next_paddr = __sme_page_pa(inpages[i++]);
    584		if ((paddr + PAGE_SIZE) == next_paddr) {
    585			pages++;
    586			paddr = next_paddr;
    587			continue;
    588		}
    589		break;
    590	}
    591
    592	return pages;
    593}
    594
    595static int sev_launch_update_data(struct kvm *kvm, struct kvm_sev_cmd *argp)
    596{
    597	unsigned long vaddr, vaddr_end, next_vaddr, npages, pages, size, i;
    598	struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
    599	struct kvm_sev_launch_update_data params;
    600	struct sev_data_launch_update_data data;
    601	struct page **inpages;
    602	int ret;
    603
    604	if (!sev_guest(kvm))
    605		return -ENOTTY;
    606
    607	if (copy_from_user(&params, (void __user *)(uintptr_t)argp->data, sizeof(params)))
    608		return -EFAULT;
    609
    610	vaddr = params.uaddr;
    611	size = params.len;
    612	vaddr_end = vaddr + size;
    613
    614	/* Lock the user memory. */
    615	inpages = sev_pin_memory(kvm, vaddr, size, &npages, 1);
    616	if (IS_ERR(inpages))
    617		return PTR_ERR(inpages);
    618
    619	/*
    620	 * Flush (on non-coherent CPUs) before LAUNCH_UPDATE encrypts pages in
    621	 * place; the cache may contain the data that was written unencrypted.
    622	 */
    623	sev_clflush_pages(inpages, npages);
    624
    625	data.reserved = 0;
    626	data.handle = sev->handle;
    627
    628	for (i = 0; vaddr < vaddr_end; vaddr = next_vaddr, i += pages) {
    629		int offset, len;
    630
    631		/*
    632		 * If the user buffer is not page-aligned, calculate the offset
    633		 * within the page.
    634		 */
    635		offset = vaddr & (PAGE_SIZE - 1);
    636
    637		/* Calculate the number of pages that can be encrypted in one go. */
    638		pages = get_num_contig_pages(i, inpages, npages);
    639
    640		len = min_t(size_t, ((pages * PAGE_SIZE) - offset), size);
    641
    642		data.len = len;
    643		data.address = __sme_page_pa(inpages[i]) + offset;
    644		ret = sev_issue_cmd(kvm, SEV_CMD_LAUNCH_UPDATE_DATA, &data, &argp->error);
    645		if (ret)
    646			goto e_unpin;
    647
    648		size -= len;
    649		next_vaddr = vaddr + len;
    650	}
    651
    652e_unpin:
    653	/* content of memory is updated, mark pages dirty */
    654	for (i = 0; i < npages; i++) {
    655		set_page_dirty_lock(inpages[i]);
    656		mark_page_accessed(inpages[i]);
    657	}
    658	/* unlock the user pages */
    659	sev_unpin_memory(kvm, inpages, npages);
    660	return ret;
    661}
    662
    663static int sev_es_sync_vmsa(struct vcpu_svm *svm)
    664{
    665	struct kvm_sev_info *sev = &to_kvm_svm(svm->vcpu.kvm)->sev_info;
    666	struct sev_es_save_area *save = svm->sev_es.vmsa;
    667
    668	/* Check some debug related fields before encrypting the VMSA */
    669	if (svm->vcpu.guest_debug || (svm->vmcb->save.dr7 & ~DR7_FIXED_1))
    670		return -EINVAL;
    671
    672	/*
    673	 * SEV-ES will use a VMSA that is pointed to by the VMCB, not
    674	 * the traditional VMSA that is part of the VMCB. Copy the
    675	 * traditional VMSA as it has been built so far (in prep
    676	 * for LAUNCH_UPDATE_VMSA) to be the initial SEV-ES state.
    677	 */
    678	memcpy(save, &svm->vmcb->save, sizeof(svm->vmcb->save));
    679
    680	/* Sync registgers */
    681	save->rax = svm->vcpu.arch.regs[VCPU_REGS_RAX];
    682	save->rbx = svm->vcpu.arch.regs[VCPU_REGS_RBX];
    683	save->rcx = svm->vcpu.arch.regs[VCPU_REGS_RCX];
    684	save->rdx = svm->vcpu.arch.regs[VCPU_REGS_RDX];
    685	save->rsp = svm->vcpu.arch.regs[VCPU_REGS_RSP];
    686	save->rbp = svm->vcpu.arch.regs[VCPU_REGS_RBP];
    687	save->rsi = svm->vcpu.arch.regs[VCPU_REGS_RSI];
    688	save->rdi = svm->vcpu.arch.regs[VCPU_REGS_RDI];
    689#ifdef CONFIG_X86_64
    690	save->r8  = svm->vcpu.arch.regs[VCPU_REGS_R8];
    691	save->r9  = svm->vcpu.arch.regs[VCPU_REGS_R9];
    692	save->r10 = svm->vcpu.arch.regs[VCPU_REGS_R10];
    693	save->r11 = svm->vcpu.arch.regs[VCPU_REGS_R11];
    694	save->r12 = svm->vcpu.arch.regs[VCPU_REGS_R12];
    695	save->r13 = svm->vcpu.arch.regs[VCPU_REGS_R13];
    696	save->r14 = svm->vcpu.arch.regs[VCPU_REGS_R14];
    697	save->r15 = svm->vcpu.arch.regs[VCPU_REGS_R15];
    698#endif
    699	save->rip = svm->vcpu.arch.regs[VCPU_REGS_RIP];
    700
    701	/* Sync some non-GPR registers before encrypting */
    702	save->xcr0 = svm->vcpu.arch.xcr0;
    703	save->pkru = svm->vcpu.arch.pkru;
    704	save->xss  = svm->vcpu.arch.ia32_xss;
    705	save->dr6  = svm->vcpu.arch.dr6;
    706
    707	/* Enable the SEV-SNP feature */
    708	if (sev_snp_guest(svm->vcpu.kvm))
    709		save->sev_features |= SVM_SEV_FEAT_SNP_ACTIVE;
    710
    711	/*
    712	 * Save the VMSA synced SEV features. For now, they are the same for
    713	 * all vCPUs, so just save each time.
    714	 */
    715	sev->sev_features = save->sev_features;
    716
    717	return 0;
    718}
    719
    720static int __sev_launch_update_vmsa(struct kvm *kvm, struct kvm_vcpu *vcpu,
    721				    int *error)
    722{
    723	struct sev_data_launch_update_vmsa vmsa;
    724	struct vcpu_svm *svm = to_svm(vcpu);
    725	int ret;
    726
    727	/* Perform some pre-encryption checks against the VMSA */
    728	ret = sev_es_sync_vmsa(svm);
    729	if (ret)
    730		return ret;
    731
    732	/*
    733	 * The LAUNCH_UPDATE_VMSA command will perform in-place encryption of
    734	 * the VMSA memory content (i.e it will write the same memory region
    735	 * with the guest's key), so invalidate it first.
    736	 */
    737	clflush_cache_range(svm->sev_es.vmsa, PAGE_SIZE);
    738
    739	vmsa.reserved = 0;
    740	vmsa.handle = to_kvm_svm(kvm)->sev_info.handle;
    741	vmsa.address = __sme_pa(svm->sev_es.vmsa);
    742	vmsa.len = PAGE_SIZE;
    743	ret = sev_issue_cmd(kvm, SEV_CMD_LAUNCH_UPDATE_VMSA, &vmsa, error);
    744	if (ret)
    745	  return ret;
    746
    747	vcpu->arch.guest_state_protected = true;
    748	return 0;
    749}
    750
    751static int sev_launch_update_vmsa(struct kvm *kvm, struct kvm_sev_cmd *argp)
    752{
    753	struct kvm_vcpu *vcpu;
    754	unsigned long i;
    755	int ret;
    756
    757	if (!sev_es_guest(kvm))
    758		return -ENOTTY;
    759
    760	kvm_for_each_vcpu(i, vcpu, kvm) {
    761		ret = mutex_lock_killable(&vcpu->mutex);
    762		if (ret)
    763			return ret;
    764
    765		ret = __sev_launch_update_vmsa(kvm, vcpu, &argp->error);
    766
    767		mutex_unlock(&vcpu->mutex);
    768		if (ret)
    769			return ret;
    770	}
    771
    772	return 0;
    773}
    774
    775static int sev_launch_measure(struct kvm *kvm, struct kvm_sev_cmd *argp)
    776{
    777	void __user *measure = (void __user *)(uintptr_t)argp->data;
    778	struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
    779	struct sev_data_launch_measure data;
    780	struct kvm_sev_launch_measure params;
    781	void __user *p = NULL;
    782	void *blob = NULL;
    783	int ret;
    784
    785	if (!sev_guest(kvm))
    786		return -ENOTTY;
    787
    788	if (copy_from_user(&params, measure, sizeof(params)))
    789		return -EFAULT;
    790
    791	memset(&data, 0, sizeof(data));
    792
    793	/* User wants to query the blob length */
    794	if (!params.len)
    795		goto cmd;
    796
    797	p = (void __user *)(uintptr_t)params.uaddr;
    798	if (p) {
    799		if (params.len > SEV_FW_BLOB_MAX_SIZE)
    800			return -EINVAL;
    801
    802		blob = kzalloc(params.len, GFP_KERNEL_ACCOUNT);
    803		if (!blob)
    804			return -ENOMEM;
    805
    806		data.address = __psp_pa(blob);
    807		data.len = params.len;
    808	}
    809
    810cmd:
    811	data.handle = sev->handle;
    812	ret = sev_issue_cmd(kvm, SEV_CMD_LAUNCH_MEASURE, &data, &argp->error);
    813
    814	/*
    815	 * If we query the session length, FW responded with expected data.
    816	 */
    817	if (!params.len)
    818		goto done;
    819
    820	if (ret)
    821		goto e_free_blob;
    822
    823	if (blob) {
    824		if (copy_to_user(p, blob, params.len))
    825			ret = -EFAULT;
    826	}
    827
    828done:
    829	params.len = data.len;
    830	if (copy_to_user(measure, &params, sizeof(params)))
    831		ret = -EFAULT;
    832e_free_blob:
    833	kfree(blob);
    834	return ret;
    835}
    836
    837static int sev_launch_finish(struct kvm *kvm, struct kvm_sev_cmd *argp)
    838{
    839	struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
    840	struct sev_data_launch_finish data;
    841
    842	if (!sev_guest(kvm))
    843		return -ENOTTY;
    844
    845	data.handle = sev->handle;
    846	return sev_issue_cmd(kvm, SEV_CMD_LAUNCH_FINISH, &data, &argp->error);
    847}
    848
    849static int sev_guest_status(struct kvm *kvm, struct kvm_sev_cmd *argp)
    850{
    851	struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
    852	struct kvm_sev_guest_status params;
    853	struct sev_data_guest_status data;
    854	int ret;
    855
    856	if (!sev_guest(kvm))
    857		return -ENOTTY;
    858
    859	memset(&data, 0, sizeof(data));
    860
    861	data.handle = sev->handle;
    862	ret = sev_issue_cmd(kvm, SEV_CMD_GUEST_STATUS, &data, &argp->error);
    863	if (ret)
    864		return ret;
    865
    866	params.policy = data.policy;
    867	params.state = data.state;
    868	params.handle = data.handle;
    869
    870	if (copy_to_user((void __user *)(uintptr_t)argp->data, &params, sizeof(params)))
    871		ret = -EFAULT;
    872
    873	return ret;
    874}
    875
    876static int __sev_issue_dbg_cmd(struct kvm *kvm, unsigned long src,
    877			       unsigned long dst, int size,
    878			       int *error, bool enc)
    879{
    880	struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
    881	struct sev_data_dbg data;
    882
    883	data.reserved = 0;
    884	data.handle = sev->handle;
    885	data.dst_addr = dst;
    886	data.src_addr = src;
    887	data.len = size;
    888
    889	return sev_issue_cmd(kvm,
    890			     enc ? SEV_CMD_DBG_ENCRYPT : SEV_CMD_DBG_DECRYPT,
    891			     &data, error);
    892}
    893
    894static int __sev_dbg_decrypt(struct kvm *kvm, unsigned long src_paddr,
    895			     unsigned long dst_paddr, int sz, int *err)
    896{
    897	int offset;
    898
    899	/*
    900	 * Its safe to read more than we are asked, caller should ensure that
    901	 * destination has enough space.
    902	 */
    903	offset = src_paddr & 15;
    904	src_paddr = round_down(src_paddr, 16);
    905	sz = round_up(sz + offset, 16);
    906
    907	return __sev_issue_dbg_cmd(kvm, src_paddr, dst_paddr, sz, err, false);
    908}
    909
    910static int __sev_dbg_decrypt_user(struct kvm *kvm, unsigned long paddr,
    911				  void __user *dst_uaddr,
    912				  unsigned long dst_paddr,
    913				  int size, int *err)
    914{
    915	struct page *tpage = NULL;
    916	struct vcpu_svm *svm;
    917	int ret, offset;
    918
    919	/* if inputs are not 16-byte then use intermediate buffer */
    920	if (!IS_ALIGNED(dst_paddr, 16) ||
    921	    !IS_ALIGNED(paddr,     16) ||
    922	    !IS_ALIGNED(size,      16)) {
    923		tpage = (void *)alloc_page(GFP_KERNEL | __GFP_ZERO);
    924		if (!tpage)
    925			return -ENOMEM;
    926
    927		dst_paddr = __sme_page_pa(tpage);
    928	}
    929
    930	if (dst_uaddr == CPC_VMSA_MAGIC_ADDR) {
    931		svm = to_svm(xa_load(&kvm->vcpu_array, 0));
    932		paddr = __pa(svm->sev_es.vmsa);
    933	}
    934
    935	ret = __sev_dbg_decrypt(kvm, paddr, dst_paddr, size, err);
    936	if (ret)
    937		goto e_free;
    938
    939	if (tpage) {
    940		offset = paddr & 15;
    941		if (copy_to_user(dst_uaddr, page_address(tpage) + offset, size))
    942			ret = -EFAULT;
    943	}
    944
    945e_free:
    946	if (tpage)
    947		__free_page(tpage);
    948
    949	return ret;
    950}
    951
    952static int __sev_dbg_encrypt_user(struct kvm *kvm, unsigned long paddr,
    953				  void __user *vaddr,
    954				  unsigned long dst_paddr,
    955				  void __user *dst_vaddr,
    956				  int size, int *error)
    957{
    958	struct page *src_tpage = NULL;
    959	struct page *dst_tpage = NULL;
    960	int ret, len = size;
    961
    962	/* If source buffer is not aligned then use an intermediate buffer */
    963	if (!IS_ALIGNED((unsigned long)vaddr, 16)) {
    964		src_tpage = alloc_page(GFP_KERNEL_ACCOUNT);
    965		if (!src_tpage)
    966			return -ENOMEM;
    967
    968		if (copy_from_user(page_address(src_tpage), vaddr, size)) {
    969			__free_page(src_tpage);
    970			return -EFAULT;
    971		}
    972
    973		paddr = __sme_page_pa(src_tpage);
    974	}
    975
    976	/*
    977	 *  If destination buffer or length is not aligned then do read-modify-write:
    978	 *   - decrypt destination in an intermediate buffer
    979	 *   - copy the source buffer in an intermediate buffer
    980	 *   - use the intermediate buffer as source buffer
    981	 */
    982	if (!IS_ALIGNED((unsigned long)dst_vaddr, 16) || !IS_ALIGNED(size, 16)) {
    983		int dst_offset;
    984
    985		dst_tpage = alloc_page(GFP_KERNEL_ACCOUNT);
    986		if (!dst_tpage) {
    987			ret = -ENOMEM;
    988			goto e_free;
    989		}
    990
    991		ret = __sev_dbg_decrypt(kvm, dst_paddr,
    992					__sme_page_pa(dst_tpage), size, error);
    993		if (ret)
    994			goto e_free;
    995
    996		/*
    997		 *  If source is kernel buffer then use memcpy() otherwise
    998		 *  copy_from_user().
    999		 */
   1000		dst_offset = dst_paddr & 15;
   1001
   1002		if (src_tpage)
   1003			memcpy(page_address(dst_tpage) + dst_offset,
   1004			       page_address(src_tpage), size);
   1005		else {
   1006			if (copy_from_user(page_address(dst_tpage) + dst_offset,
   1007					   vaddr, size)) {
   1008				ret = -EFAULT;
   1009				goto e_free;
   1010			}
   1011		}
   1012
   1013		paddr = __sme_page_pa(dst_tpage);
   1014		dst_paddr = round_down(dst_paddr, 16);
   1015		len = round_up(size, 16);
   1016	}
   1017
   1018	ret = __sev_issue_dbg_cmd(kvm, paddr, dst_paddr, len, error, true);
   1019
   1020e_free:
   1021	if (src_tpage)
   1022		__free_page(src_tpage);
   1023	if (dst_tpage)
   1024		__free_page(dst_tpage);
   1025	return ret;
   1026}
   1027
   1028static int sev_dbg_crypt(struct kvm *kvm, struct kvm_sev_cmd *argp, bool dec)
   1029{
   1030	unsigned long vaddr, vaddr_end, next_vaddr;
   1031	unsigned long dst_vaddr;
   1032	struct page **src_p, **dst_p;
   1033	struct kvm_sev_dbg debug;
   1034	unsigned long n;
   1035	unsigned int size;
   1036	bool vmsa_dec;
   1037	int ret;
   1038
   1039	if (!sev_guest(kvm))
   1040		return -ENOTTY;
   1041
   1042	if (copy_from_user(&debug, (void __user *)(uintptr_t)argp->data, sizeof(debug)))
   1043		return -EFAULT;
   1044
   1045	if (!debug.len || debug.src_uaddr + debug.len < debug.src_uaddr)
   1046		return -EINVAL;
   1047	if (!debug.dst_uaddr)
   1048		return -EINVAL;
   1049
   1050	vmsa_dec = false;
   1051	if (debug.src_uaddr == (uintptr_t) CPC_VMSA_MAGIC_ADDR) {
   1052		debug.len = PAGE_SIZE;
   1053		debug.src_uaddr = debug.dst_uaddr;
   1054		vmsa_dec = true;
   1055	}
   1056
   1057	vaddr = debug.src_uaddr;
   1058	size = debug.len;
   1059	vaddr_end = vaddr + size;
   1060	dst_vaddr = debug.dst_uaddr;
   1061
   1062	for (; vaddr < vaddr_end; vaddr = next_vaddr) {
   1063		int len, s_off, d_off;
   1064
   1065		/* lock userspace source and destination page */
   1066		src_p = sev_pin_memory(kvm, vaddr & PAGE_MASK, PAGE_SIZE, &n, 0);
   1067		if (IS_ERR(src_p))
   1068			return PTR_ERR(src_p);
   1069
   1070		dst_p = sev_pin_memory(kvm, dst_vaddr & PAGE_MASK, PAGE_SIZE, &n, 1);
   1071		if (IS_ERR(dst_p)) {
   1072			sev_unpin_memory(kvm, src_p, n);
   1073			return PTR_ERR(dst_p);
   1074		}
   1075
   1076		/*
   1077		 * Flush (on non-coherent CPUs) before DBG_{DE,EN}CRYPT read or modify
   1078		 * the pages; flush the destination too so that future accesses do not
   1079		 * see stale data.
   1080		 */
   1081		sev_clflush_pages(src_p, 1);
   1082		sev_clflush_pages(dst_p, 1);
   1083
   1084		/*
   1085		 * Since user buffer may not be page aligned, calculate the
   1086		 * offset within the page.
   1087		 */
   1088		s_off = vaddr & ~PAGE_MASK;
   1089		d_off = dst_vaddr & ~PAGE_MASK;
   1090		len = min_t(size_t, (PAGE_SIZE - s_off), size);
   1091
   1092		if (dec)
   1093			ret = __sev_dbg_decrypt_user(kvm,
   1094						     __sme_page_pa(src_p[0]) + s_off,
   1095						     vmsa_dec ? CPC_VMSA_MAGIC_ADDR
   1096						     	: (void __user *)dst_vaddr,
   1097						     __sme_page_pa(dst_p[0]) + d_off,
   1098						     len, &argp->error);
   1099		else
   1100			ret = __sev_dbg_encrypt_user(kvm,
   1101						     __sme_page_pa(src_p[0]) + s_off,
   1102						     (void __user *)vaddr,
   1103						     __sme_page_pa(dst_p[0]) + d_off,
   1104						     (void __user *)dst_vaddr,
   1105						     len, &argp->error);
   1106
   1107		sev_unpin_memory(kvm, src_p, n);
   1108		sev_unpin_memory(kvm, dst_p, n);
   1109
   1110		if (ret)
   1111			goto err;
   1112
   1113		next_vaddr = vaddr + len;
   1114		dst_vaddr = dst_vaddr + len;
   1115		size -= len;
   1116	}
   1117err:
   1118	return ret;
   1119}
   1120
   1121static int snp_dbg_decrypt_vmsa(struct kvm *kvm, struct kvm_sev_cmd *argp)
   1122{
   1123	struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
   1124	struct kvm_sev_dbg debug;
   1125	struct vcpu_svm *svm;
   1126	hpa_t src_paddr;
   1127	hpa_t dst_paddr;
   1128	void *vmsa;
   1129	int ret;
   1130
   1131	if (copy_from_user(&debug, (void __user *)(uintptr_t)argp->data, sizeof(debug)))
   1132		return -EFAULT;
   1133
   1134	if (debug.len != PAGE_SIZE || debug.src_uaddr != (uint64_t) CPC_VMSA_MAGIC_ADDR)
   1135		return -EINVAL;
   1136
   1137	vmsa = kmalloc(PAGE_SIZE, GFP_KERNEL);
   1138	if (!vmsa) return -ENOMEM;
   1139	memset(vmsa, 0, PAGE_SIZE);
   1140
   1141	svm = to_svm(xa_load(&kvm->vcpu_array, 0));
   1142	src_paddr = __pa(svm->sev_es.vmsa);
   1143	dst_paddr = __pa(vmsa);
   1144	ret = snp_guest_dbg_decrypt_page(__pa(sev->snp_context) >> PAGE_SHIFT,
   1145		src_paddr >> PAGE_SHIFT, dst_paddr >> PAGE_SHIFT, &argp->error);
   1146	if (ret) return ret;
   1147
   1148	if (copy_to_user((void __user *) debug.dst_uaddr, vmsa, PAGE_SIZE))
   1149		ret = -EFAULT;
   1150
   1151	kfree(vmsa);
   1152
   1153	return ret;
   1154}
   1155
   1156static int sev_launch_secret(struct kvm *kvm, struct kvm_sev_cmd *argp)
   1157{
   1158	struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
   1159	struct sev_data_launch_secret data;
   1160	struct kvm_sev_launch_secret params;
   1161	struct page **pages;
   1162	void *blob, *hdr;
   1163	unsigned long n, i;
   1164	int ret, offset;
   1165
   1166	if (!sev_guest(kvm))
   1167		return -ENOTTY;
   1168
   1169	if (copy_from_user(&params, (void __user *)(uintptr_t)argp->data, sizeof(params)))
   1170		return -EFAULT;
   1171
   1172	pages = sev_pin_memory(kvm, params.guest_uaddr, params.guest_len, &n, 1);
   1173	if (IS_ERR(pages))
   1174		return PTR_ERR(pages);
   1175
   1176	/*
   1177	 * Flush (on non-coherent CPUs) before LAUNCH_SECRET encrypts pages in
   1178	 * place; the cache may contain the data that was written unencrypted.
   1179	 */
   1180	sev_clflush_pages(pages, n);
   1181
   1182	/*
   1183	 * The secret must be copied into contiguous memory region, lets verify
   1184	 * that userspace memory pages are contiguous before we issue command.
   1185	 */
   1186	if (get_num_contig_pages(0, pages, n) != n) {
   1187		ret = -EINVAL;
   1188		goto e_unpin_memory;
   1189	}
   1190
   1191	memset(&data, 0, sizeof(data));
   1192
   1193	offset = params.guest_uaddr & (PAGE_SIZE - 1);
   1194	data.guest_address = __sme_page_pa(pages[0]) + offset;
   1195	data.guest_len = params.guest_len;
   1196
   1197	blob = psp_copy_user_blob(params.trans_uaddr, params.trans_len);
   1198	if (IS_ERR(blob)) {
   1199		ret = PTR_ERR(blob);
   1200		goto e_unpin_memory;
   1201	}
   1202
   1203	data.trans_address = __psp_pa(blob);
   1204	data.trans_len = params.trans_len;
   1205
   1206	hdr = psp_copy_user_blob(params.hdr_uaddr, params.hdr_len);
   1207	if (IS_ERR(hdr)) {
   1208		ret = PTR_ERR(hdr);
   1209		goto e_free_blob;
   1210	}
   1211	data.hdr_address = __psp_pa(hdr);
   1212	data.hdr_len = params.hdr_len;
   1213
   1214	data.handle = sev->handle;
   1215	ret = sev_issue_cmd(kvm, SEV_CMD_LAUNCH_UPDATE_SECRET, &data, &argp->error);
   1216
   1217	kfree(hdr);
   1218
   1219e_free_blob:
   1220	kfree(blob);
   1221e_unpin_memory:
   1222	/* content of memory is updated, mark pages dirty */
   1223	for (i = 0; i < n; i++) {
   1224		set_page_dirty_lock(pages[i]);
   1225		mark_page_accessed(pages[i]);
   1226	}
   1227	sev_unpin_memory(kvm, pages, n);
   1228	return ret;
   1229}
   1230
   1231static int sev_get_attestation_report(struct kvm *kvm, struct kvm_sev_cmd *argp)
   1232{
   1233	void __user *report = (void __user *)(uintptr_t)argp->data;
   1234	struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
   1235	struct sev_data_attestation_report data;
   1236	struct kvm_sev_attestation_report params;
   1237	void __user *p;
   1238	void *blob = NULL;
   1239	int ret;
   1240
   1241	if (!sev_guest(kvm))
   1242		return -ENOTTY;
   1243
   1244	if (copy_from_user(&params, (void __user *)(uintptr_t)argp->data, sizeof(params)))
   1245		return -EFAULT;
   1246
   1247	memset(&data, 0, sizeof(data));
   1248
   1249	/* User wants to query the blob length */
   1250	if (!params.len)
   1251		goto cmd;
   1252
   1253	p = (void __user *)(uintptr_t)params.uaddr;
   1254	if (p) {
   1255		if (params.len > SEV_FW_BLOB_MAX_SIZE)
   1256			return -EINVAL;
   1257
   1258		blob = kzalloc(params.len, GFP_KERNEL_ACCOUNT);
   1259		if (!blob)
   1260			return -ENOMEM;
   1261
   1262		data.address = __psp_pa(blob);
   1263		data.len = params.len;
   1264		memcpy(data.mnonce, params.mnonce, sizeof(params.mnonce));
   1265	}
   1266cmd:
   1267	data.handle = sev->handle;
   1268	ret = sev_issue_cmd(kvm, SEV_CMD_ATTESTATION_REPORT, &data, &argp->error);
   1269	/*
   1270	 * If we query the session length, FW responded with expected data.
   1271	 */
   1272	if (!params.len)
   1273		goto done;
   1274
   1275	if (ret)
   1276		goto e_free_blob;
   1277
   1278	if (blob) {
   1279		if (copy_to_user(p, blob, params.len))
   1280			ret = -EFAULT;
   1281	}
   1282
   1283done:
   1284	params.len = data.len;
   1285	if (copy_to_user(report, &params, sizeof(params)))
   1286		ret = -EFAULT;
   1287e_free_blob:
   1288	kfree(blob);
   1289	return ret;
   1290}
   1291
   1292/* Userspace wants to query session length. */
   1293static int
   1294__sev_send_start_query_session_length(struct kvm *kvm, struct kvm_sev_cmd *argp,
   1295				      struct kvm_sev_send_start *params)
   1296{
   1297	struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
   1298	struct sev_data_send_start data;
   1299	int ret;
   1300
   1301	memset(&data, 0, sizeof(data));
   1302	data.handle = sev->handle;
   1303	ret = sev_issue_cmd(kvm, SEV_CMD_SEND_START, &data, &argp->error);
   1304
   1305	params->session_len = data.session_len;
   1306	if (copy_to_user((void __user *)(uintptr_t)argp->data, params,
   1307				sizeof(struct kvm_sev_send_start)))
   1308		ret = -EFAULT;
   1309
   1310	return ret;
   1311}
   1312
   1313static int sev_send_start(struct kvm *kvm, struct kvm_sev_cmd *argp)
   1314{
   1315	struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
   1316	struct sev_data_send_start data;
   1317	struct kvm_sev_send_start params;
   1318	void *amd_certs, *session_data;
   1319	void *pdh_cert, *plat_certs;
   1320	int ret;
   1321
   1322	if (!sev_guest(kvm))
   1323		return -ENOTTY;
   1324
   1325	if (copy_from_user(&params, (void __user *)(uintptr_t)argp->data,
   1326				sizeof(struct kvm_sev_send_start)))
   1327		return -EFAULT;
   1328
   1329	/* if session_len is zero, userspace wants to query the session length */
   1330	if (!params.session_len)
   1331		return __sev_send_start_query_session_length(kvm, argp,
   1332				&params);
   1333
   1334	/* some sanity checks */
   1335	if (!params.pdh_cert_uaddr || !params.pdh_cert_len ||
   1336	    !params.session_uaddr || params.session_len > SEV_FW_BLOB_MAX_SIZE)
   1337		return -EINVAL;
   1338
   1339	/* allocate the memory to hold the session data blob */
   1340	session_data = kzalloc(params.session_len, GFP_KERNEL_ACCOUNT);
   1341	if (!session_data)
   1342		return -ENOMEM;
   1343
   1344	/* copy the certificate blobs from userspace */
   1345	pdh_cert = psp_copy_user_blob(params.pdh_cert_uaddr,
   1346				params.pdh_cert_len);
   1347	if (IS_ERR(pdh_cert)) {
   1348		ret = PTR_ERR(pdh_cert);
   1349		goto e_free_session;
   1350	}
   1351
   1352	plat_certs = psp_copy_user_blob(params.plat_certs_uaddr,
   1353				params.plat_certs_len);
   1354	if (IS_ERR(plat_certs)) {
   1355		ret = PTR_ERR(plat_certs);
   1356		goto e_free_pdh;
   1357	}
   1358
   1359	amd_certs = psp_copy_user_blob(params.amd_certs_uaddr,
   1360				params.amd_certs_len);
   1361	if (IS_ERR(amd_certs)) {
   1362		ret = PTR_ERR(amd_certs);
   1363		goto e_free_plat_cert;
   1364	}
   1365
   1366	/* populate the FW SEND_START field with system physical address */
   1367	memset(&data, 0, sizeof(data));
   1368	data.pdh_cert_address = __psp_pa(pdh_cert);
   1369	data.pdh_cert_len = params.pdh_cert_len;
   1370	data.plat_certs_address = __psp_pa(plat_certs);
   1371	data.plat_certs_len = params.plat_certs_len;
   1372	data.amd_certs_address = __psp_pa(amd_certs);
   1373	data.amd_certs_len = params.amd_certs_len;
   1374	data.session_address = __psp_pa(session_data);
   1375	data.session_len = params.session_len;
   1376	data.handle = sev->handle;
   1377
   1378	ret = sev_issue_cmd(kvm, SEV_CMD_SEND_START, &data, &argp->error);
   1379
   1380	if (!ret && copy_to_user((void __user *)(uintptr_t)params.session_uaddr,
   1381			session_data, params.session_len)) {
   1382		ret = -EFAULT;
   1383		goto e_free_amd_cert;
   1384	}
   1385
   1386	params.policy = data.policy;
   1387	params.session_len = data.session_len;
   1388	if (copy_to_user((void __user *)(uintptr_t)argp->data, &params,
   1389				sizeof(struct kvm_sev_send_start)))
   1390		ret = -EFAULT;
   1391
   1392e_free_amd_cert:
   1393	kfree(amd_certs);
   1394e_free_plat_cert:
   1395	kfree(plat_certs);
   1396e_free_pdh:
   1397	kfree(pdh_cert);
   1398e_free_session:
   1399	kfree(session_data);
   1400	return ret;
   1401}
   1402
   1403/* Userspace wants to query either header or trans length. */
   1404static int
   1405__sev_send_update_data_query_lengths(struct kvm *kvm, struct kvm_sev_cmd *argp,
   1406				     struct kvm_sev_send_update_data *params)
   1407{
   1408	struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
   1409	struct sev_data_send_update_data data;
   1410	int ret;
   1411
   1412	memset(&data, 0, sizeof(data));
   1413	data.handle = sev->handle;
   1414	ret = sev_issue_cmd(kvm, SEV_CMD_SEND_UPDATE_DATA, &data, &argp->error);
   1415
   1416	params->hdr_len = data.hdr_len;
   1417	params->trans_len = data.trans_len;
   1418
   1419	if (copy_to_user((void __user *)(uintptr_t)argp->data, params,
   1420			 sizeof(struct kvm_sev_send_update_data)))
   1421		ret = -EFAULT;
   1422
   1423	return ret;
   1424}
   1425
   1426static int sev_send_update_data(struct kvm *kvm, struct kvm_sev_cmd *argp)
   1427{
   1428	struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
   1429	struct sev_data_send_update_data data;
   1430	struct kvm_sev_send_update_data params;
   1431	void *hdr, *trans_data;
   1432	struct page **guest_page;
   1433	unsigned long n;
   1434	int ret, offset;
   1435
   1436	if (!sev_guest(kvm))
   1437		return -ENOTTY;
   1438
   1439	if (copy_from_user(&params, (void __user *)(uintptr_t)argp->data,
   1440			sizeof(struct kvm_sev_send_update_data)))
   1441		return -EFAULT;
   1442
   1443	/* userspace wants to query either header or trans length */
   1444	if (!params.trans_len || !params.hdr_len)
   1445		return __sev_send_update_data_query_lengths(kvm, argp, &params);
   1446
   1447	if (!params.trans_uaddr || !params.guest_uaddr ||
   1448	    !params.guest_len || !params.hdr_uaddr)
   1449		return -EINVAL;
   1450
   1451	/* Check if we are crossing the page boundary */
   1452	offset = params.guest_uaddr & (PAGE_SIZE - 1);
   1453	if ((params.guest_len + offset > PAGE_SIZE))
   1454		return -EINVAL;
   1455
   1456	/* Pin guest memory */
   1457	guest_page = sev_pin_memory(kvm, params.guest_uaddr & PAGE_MASK,
   1458				    PAGE_SIZE, &n, 0);
   1459	if (IS_ERR(guest_page))
   1460		return PTR_ERR(guest_page);
   1461
   1462	/* allocate memory for header and transport buffer */
   1463	ret = -ENOMEM;
   1464	hdr = kzalloc(params.hdr_len, GFP_KERNEL_ACCOUNT);
   1465	if (!hdr)
   1466		goto e_unpin;
   1467
   1468	trans_data = kzalloc(params.trans_len, GFP_KERNEL_ACCOUNT);
   1469	if (!trans_data)
   1470		goto e_free_hdr;
   1471
   1472	memset(&data, 0, sizeof(data));
   1473	data.hdr_address = __psp_pa(hdr);
   1474	data.hdr_len = params.hdr_len;
   1475	data.trans_address = __psp_pa(trans_data);
   1476	data.trans_len = params.trans_len;
   1477
   1478	/* The SEND_UPDATE_DATA command requires C-bit to be always set. */
   1479	data.guest_address = (page_to_pfn(guest_page[0]) << PAGE_SHIFT) + offset;
   1480	data.guest_address |= sev_me_mask;
   1481	data.guest_len = params.guest_len;
   1482	data.handle = sev->handle;
   1483
   1484	ret = sev_issue_cmd(kvm, SEV_CMD_SEND_UPDATE_DATA, &data, &argp->error);
   1485
   1486	if (ret)
   1487		goto e_free_trans_data;
   1488
   1489	/* copy transport buffer to user space */
   1490	if (copy_to_user((void __user *)(uintptr_t)params.trans_uaddr,
   1491			 trans_data, params.trans_len)) {
   1492		ret = -EFAULT;
   1493		goto e_free_trans_data;
   1494	}
   1495
   1496	/* Copy packet header to userspace. */
   1497	if (copy_to_user((void __user *)(uintptr_t)params.hdr_uaddr, hdr,
   1498			 params.hdr_len))
   1499		ret = -EFAULT;
   1500
   1501e_free_trans_data:
   1502	kfree(trans_data);
   1503e_free_hdr:
   1504	kfree(hdr);
   1505e_unpin:
   1506	sev_unpin_memory(kvm, guest_page, n);
   1507
   1508	return ret;
   1509}
   1510
   1511static int sev_send_finish(struct kvm *kvm, struct kvm_sev_cmd *argp)
   1512{
   1513	struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
   1514	struct sev_data_send_finish data;
   1515
   1516	if (!sev_guest(kvm))
   1517		return -ENOTTY;
   1518
   1519	data.handle = sev->handle;
   1520	return sev_issue_cmd(kvm, SEV_CMD_SEND_FINISH, &data, &argp->error);
   1521}
   1522
   1523static int sev_send_cancel(struct kvm *kvm, struct kvm_sev_cmd *argp)
   1524{
   1525	struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
   1526	struct sev_data_send_cancel data;
   1527
   1528	if (!sev_guest(kvm))
   1529		return -ENOTTY;
   1530
   1531	data.handle = sev->handle;
   1532	return sev_issue_cmd(kvm, SEV_CMD_SEND_CANCEL, &data, &argp->error);
   1533}
   1534
   1535static int sev_receive_start(struct kvm *kvm, struct kvm_sev_cmd *argp)
   1536{
   1537	struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
   1538	struct sev_data_receive_start start;
   1539	struct kvm_sev_receive_start params;
   1540	int *error = &argp->error;
   1541	void *session_data;
   1542	void *pdh_data;
   1543	int ret;
   1544
   1545	if (!sev_guest(kvm))
   1546		return -ENOTTY;
   1547
   1548	/* Get parameter from the userspace */
   1549	if (copy_from_user(&params, (void __user *)(uintptr_t)argp->data,
   1550			sizeof(struct kvm_sev_receive_start)))
   1551		return -EFAULT;
   1552
   1553	/* some sanity checks */
   1554	if (!params.pdh_uaddr || !params.pdh_len ||
   1555	    !params.session_uaddr || !params.session_len)
   1556		return -EINVAL;
   1557
   1558	pdh_data = psp_copy_user_blob(params.pdh_uaddr, params.pdh_len);
   1559	if (IS_ERR(pdh_data))
   1560		return PTR_ERR(pdh_data);
   1561
   1562	session_data = psp_copy_user_blob(params.session_uaddr,
   1563			params.session_len);
   1564	if (IS_ERR(session_data)) {
   1565		ret = PTR_ERR(session_data);
   1566		goto e_free_pdh;
   1567	}
   1568
   1569	memset(&start, 0, sizeof(start));
   1570	start.handle = params.handle;
   1571	start.policy = params.policy;
   1572	start.pdh_cert_address = __psp_pa(pdh_data);
   1573	start.pdh_cert_len = params.pdh_len;
   1574	start.session_address = __psp_pa(session_data);
   1575	start.session_len = params.session_len;
   1576
   1577	/* create memory encryption context */
   1578	ret = __sev_issue_cmd(argp->sev_fd, SEV_CMD_RECEIVE_START, &start,
   1579				error);
   1580	if (ret)
   1581		goto e_free_session;
   1582
   1583	/* Bind ASID to this guest */
   1584	ret = sev_bind_asid(kvm, start.handle, error);
   1585	if (ret) {
   1586		sev_decommission(start.handle);
   1587		goto e_free_session;
   1588	}
   1589
   1590	params.handle = start.handle;
   1591	if (copy_to_user((void __user *)(uintptr_t)argp->data,
   1592			 &params, sizeof(struct kvm_sev_receive_start))) {
   1593		ret = -EFAULT;
   1594		sev_unbind_asid(kvm, start.handle);
   1595		goto e_free_session;
   1596	}
   1597
   1598    	sev->handle = start.handle;
   1599	sev->fd = argp->sev_fd;
   1600
   1601e_free_session:
   1602	kfree(session_data);
   1603e_free_pdh:
   1604	kfree(pdh_data);
   1605
   1606	return ret;
   1607}
   1608
   1609static int sev_receive_update_data(struct kvm *kvm, struct kvm_sev_cmd *argp)
   1610{
   1611	struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
   1612	struct kvm_sev_receive_update_data params;
   1613	struct sev_data_receive_update_data data;
   1614	void *hdr = NULL, *trans = NULL;
   1615	struct page **guest_page;
   1616	unsigned long n;
   1617	int ret, offset;
   1618
   1619	if (!sev_guest(kvm))
   1620		return -EINVAL;
   1621
   1622	if (copy_from_user(&params, (void __user *)(uintptr_t)argp->data,
   1623			sizeof(struct kvm_sev_receive_update_data)))
   1624		return -EFAULT;
   1625
   1626	if (!params.hdr_uaddr || !params.hdr_len ||
   1627	    !params.guest_uaddr || !params.guest_len ||
   1628	    !params.trans_uaddr || !params.trans_len)
   1629		return -EINVAL;
   1630
   1631	/* Check if we are crossing the page boundary */
   1632	offset = params.guest_uaddr & (PAGE_SIZE - 1);
   1633	if ((params.guest_len + offset > PAGE_SIZE))
   1634		return -EINVAL;
   1635
   1636	hdr = psp_copy_user_blob(params.hdr_uaddr, params.hdr_len);
   1637	if (IS_ERR(hdr))
   1638		return PTR_ERR(hdr);
   1639
   1640	trans = psp_copy_user_blob(params.trans_uaddr, params.trans_len);
   1641	if (IS_ERR(trans)) {
   1642		ret = PTR_ERR(trans);
   1643		goto e_free_hdr;
   1644	}
   1645
   1646	memset(&data, 0, sizeof(data));
   1647	data.hdr_address = __psp_pa(hdr);
   1648	data.hdr_len = params.hdr_len;
   1649	data.trans_address = __psp_pa(trans);
   1650	data.trans_len = params.trans_len;
   1651
   1652	/* Pin guest memory */
   1653	guest_page = sev_pin_memory(kvm, params.guest_uaddr & PAGE_MASK,
   1654				    PAGE_SIZE, &n, 1);
   1655	if (IS_ERR(guest_page)) {
   1656		ret = PTR_ERR(guest_page);
   1657		goto e_free_trans;
   1658	}
   1659
   1660	/*
   1661	 * Flush (on non-coherent CPUs) before RECEIVE_UPDATE_DATA, the PSP
   1662	 * encrypts the written data with the guest's key, and the cache may
   1663	 * contain dirty, unencrypted data.
   1664	 */
   1665	sev_clflush_pages(guest_page, n);
   1666
   1667	/* The RECEIVE_UPDATE_DATA command requires C-bit to be always set. */
   1668	data.guest_address = (page_to_pfn(guest_page[0]) << PAGE_SHIFT) + offset;
   1669	data.guest_address |= sev_me_mask;
   1670	data.guest_len = params.guest_len;
   1671	data.handle = sev->handle;
   1672
   1673	ret = sev_issue_cmd(kvm, SEV_CMD_RECEIVE_UPDATE_DATA, &data,
   1674				&argp->error);
   1675
   1676	sev_unpin_memory(kvm, guest_page, n);
   1677
   1678e_free_trans:
   1679	kfree(trans);
   1680e_free_hdr:
   1681	kfree(hdr);
   1682
   1683	return ret;
   1684}
   1685
   1686static int sev_receive_finish(struct kvm *kvm, struct kvm_sev_cmd *argp)
   1687{
   1688	struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
   1689	struct sev_data_receive_finish data;
   1690
   1691	if (!sev_guest(kvm))
   1692		return -ENOTTY;
   1693
   1694	data.handle = sev->handle;
   1695	return sev_issue_cmd(kvm, SEV_CMD_RECEIVE_FINISH, &data, &argp->error);
   1696}
   1697
   1698static bool is_cmd_allowed_from_mirror(u32 cmd_id)
   1699{
   1700	/*
   1701	 * Allow mirrors VM to call KVM_SEV_LAUNCH_UPDATE_VMSA to enable SEV-ES
   1702	 * active mirror VMs. Also allow the debugging and status commands.
   1703	 */
   1704	if (cmd_id == KVM_SEV_LAUNCH_UPDATE_VMSA ||
   1705	    cmd_id == KVM_SEV_GUEST_STATUS || cmd_id == KVM_SEV_DBG_DECRYPT ||
   1706	    cmd_id == KVM_SEV_DBG_ENCRYPT)
   1707		return true;
   1708
   1709	return false;
   1710}
   1711
   1712static int sev_lock_two_vms(struct kvm *dst_kvm, struct kvm *src_kvm)
   1713{
   1714	struct kvm_sev_info *dst_sev = &to_kvm_svm(dst_kvm)->sev_info;
   1715	struct kvm_sev_info *src_sev = &to_kvm_svm(src_kvm)->sev_info;
   1716	int r = -EBUSY;
   1717
   1718	if (dst_kvm == src_kvm)
   1719		return -EINVAL;
   1720
   1721	/*
   1722	 * Bail if these VMs are already involved in a migration to avoid
   1723	 * deadlock between two VMs trying to migrate to/from each other.
   1724	 */
   1725	if (atomic_cmpxchg_acquire(&dst_sev->migration_in_progress, 0, 1))
   1726		return -EBUSY;
   1727
   1728	if (atomic_cmpxchg_acquire(&src_sev->migration_in_progress, 0, 1))
   1729		goto release_dst;
   1730
   1731	r = -EINTR;
   1732	if (mutex_lock_killable(&dst_kvm->lock))
   1733		goto release_src;
   1734	if (mutex_lock_killable_nested(&src_kvm->lock, SINGLE_DEPTH_NESTING))
   1735		goto unlock_dst;
   1736	return 0;
   1737
   1738unlock_dst:
   1739	mutex_unlock(&dst_kvm->lock);
   1740release_src:
   1741	atomic_set_release(&src_sev->migration_in_progress, 0);
   1742release_dst:
   1743	atomic_set_release(&dst_sev->migration_in_progress, 0);
   1744	return r;
   1745}
   1746
   1747static void sev_unlock_two_vms(struct kvm *dst_kvm, struct kvm *src_kvm)
   1748{
   1749	struct kvm_sev_info *dst_sev = &to_kvm_svm(dst_kvm)->sev_info;
   1750	struct kvm_sev_info *src_sev = &to_kvm_svm(src_kvm)->sev_info;
   1751
   1752	mutex_unlock(&dst_kvm->lock);
   1753	mutex_unlock(&src_kvm->lock);
   1754	atomic_set_release(&dst_sev->migration_in_progress, 0);
   1755	atomic_set_release(&src_sev->migration_in_progress, 0);
   1756}
   1757
   1758/* vCPU mutex subclasses.  */
   1759enum sev_migration_role {
   1760	SEV_MIGRATION_SOURCE = 0,
   1761	SEV_MIGRATION_TARGET,
   1762	SEV_NR_MIGRATION_ROLES,
   1763};
   1764
   1765static int sev_lock_vcpus_for_migration(struct kvm *kvm,
   1766					enum sev_migration_role role)
   1767{
   1768	struct kvm_vcpu *vcpu;
   1769	unsigned long i, j;
   1770	bool first = true;
   1771
   1772	kvm_for_each_vcpu(i, vcpu, kvm) {
   1773		if (mutex_lock_killable_nested(&vcpu->mutex, role))
   1774			goto out_unlock;
   1775
   1776		if (first) {
   1777			/*
   1778			 * Reset the role to one that avoids colliding with
   1779			 * the role used for the first vcpu mutex.
   1780			 */
   1781			role = SEV_NR_MIGRATION_ROLES;
   1782			first = false;
   1783		} else {
   1784			mutex_release(&vcpu->mutex.dep_map, _THIS_IP_);
   1785		}
   1786	}
   1787
   1788	return 0;
   1789
   1790out_unlock:
   1791
   1792	first = true;
   1793	kvm_for_each_vcpu(j, vcpu, kvm) {
   1794		if (i == j)
   1795			break;
   1796
   1797		if (first)
   1798			first = false;
   1799		else
   1800			mutex_acquire(&vcpu->mutex.dep_map, role, 0, _THIS_IP_);
   1801
   1802
   1803		mutex_unlock(&vcpu->mutex);
   1804	}
   1805	return -EINTR;
   1806}
   1807
   1808static void sev_unlock_vcpus_for_migration(struct kvm *kvm)
   1809{
   1810	struct kvm_vcpu *vcpu;
   1811	unsigned long i;
   1812	bool first = true;
   1813
   1814	kvm_for_each_vcpu(i, vcpu, kvm) {
   1815		if (first)
   1816			first = false;
   1817		else
   1818			mutex_acquire(&vcpu->mutex.dep_map,
   1819				      SEV_NR_MIGRATION_ROLES, 0, _THIS_IP_);
   1820
   1821		mutex_unlock(&vcpu->mutex);
   1822	}
   1823}
   1824
   1825static void sev_migrate_from(struct kvm *dst_kvm, struct kvm *src_kvm)
   1826{
   1827	struct kvm_sev_info *dst = &to_kvm_svm(dst_kvm)->sev_info;
   1828	struct kvm_sev_info *src = &to_kvm_svm(src_kvm)->sev_info;
   1829	struct kvm_vcpu *dst_vcpu, *src_vcpu;
   1830	struct vcpu_svm *dst_svm, *src_svm;
   1831	struct kvm_sev_info *mirror;
   1832	unsigned long i;
   1833
   1834	dst->active = true;
   1835	dst->asid = src->asid;
   1836	dst->handle = src->handle;
   1837	dst->pages_locked = src->pages_locked;
   1838	dst->enc_context_owner = src->enc_context_owner;
   1839	dst->es_active = src->es_active;
   1840
   1841	src->asid = 0;
   1842	src->active = false;
   1843	src->handle = 0;
   1844	src->pages_locked = 0;
   1845	src->enc_context_owner = NULL;
   1846	src->es_active = false;
   1847
   1848	list_cut_before(&dst->regions_list, &src->regions_list, &src->regions_list);
   1849
   1850	/*
   1851	 * If this VM has mirrors, "transfer" each mirror's refcount of the
   1852	 * source to the destination (this KVM).  The caller holds a reference
   1853	 * to the source, so there's no danger of use-after-free.
   1854	 */
   1855	list_cut_before(&dst->mirror_vms, &src->mirror_vms, &src->mirror_vms);
   1856	list_for_each_entry(mirror, &dst->mirror_vms, mirror_entry) {
   1857		kvm_get_kvm(dst_kvm);
   1858		kvm_put_kvm(src_kvm);
   1859		mirror->enc_context_owner = dst_kvm;
   1860	}
   1861
   1862	/*
   1863	 * If this VM is a mirror, remove the old mirror from the owners list
   1864	 * and add the new mirror to the list.
   1865	 */
   1866	if (is_mirroring_enc_context(dst_kvm)) {
   1867		struct kvm_sev_info *owner_sev_info =
   1868			&to_kvm_svm(dst->enc_context_owner)->sev_info;
   1869
   1870		list_del(&src->mirror_entry);
   1871		list_add_tail(&dst->mirror_entry, &owner_sev_info->mirror_vms);
   1872	}
   1873
   1874	kvm_for_each_vcpu(i, dst_vcpu, dst_kvm) {
   1875		dst_svm = to_svm(dst_vcpu);
   1876
   1877		sev_init_vmcb(dst_svm);
   1878
   1879		if (!dst->es_active)
   1880			continue;
   1881
   1882		/*
   1883		 * Note, the source is not required to have the same number of
   1884		 * vCPUs as the destination when migrating a vanilla SEV VM.
   1885		 */
   1886		src_vcpu = kvm_get_vcpu(dst_kvm, i);
   1887		src_svm = to_svm(src_vcpu);
   1888
   1889		/*
   1890		 * Transfer VMSA and GHCB state to the destination.  Nullify and
   1891		 * clear source fields as appropriate, the state now belongs to
   1892		 * the destination.
   1893		 */
   1894		memcpy(&dst_svm->sev_es, &src_svm->sev_es, sizeof(src_svm->sev_es));
   1895		dst_svm->vmcb->control.ghcb_gpa = src_svm->vmcb->control.ghcb_gpa;
   1896		dst_svm->vmcb->control.vmsa_pa = src_svm->vmcb->control.vmsa_pa;
   1897		dst_vcpu->arch.guest_state_protected = true;
   1898
   1899		memset(&src_svm->sev_es, 0, sizeof(src_svm->sev_es));
   1900		src_svm->vmcb->control.ghcb_gpa = INVALID_PAGE;
   1901		src_svm->vmcb->control.vmsa_pa = INVALID_PAGE;
   1902		src_vcpu->arch.guest_state_protected = false;
   1903	}
   1904}
   1905
   1906static int sev_check_source_vcpus(struct kvm *dst, struct kvm *src)
   1907{
   1908	struct kvm_vcpu *src_vcpu;
   1909	unsigned long i;
   1910
   1911	if (!sev_es_guest(src))
   1912		return 0;
   1913
   1914	if (atomic_read(&src->online_vcpus) != atomic_read(&dst->online_vcpus))
   1915		return -EINVAL;
   1916
   1917	kvm_for_each_vcpu(i, src_vcpu, src) {
   1918		if (!src_vcpu->arch.guest_state_protected)
   1919			return -EINVAL;
   1920	}
   1921
   1922	return 0;
   1923}
   1924
   1925int sev_vm_move_enc_context_from(struct kvm *kvm, unsigned int source_fd)
   1926{
   1927	struct kvm_sev_info *dst_sev = &to_kvm_svm(kvm)->sev_info;
   1928	struct kvm_sev_info *src_sev, *cg_cleanup_sev;
   1929	struct file *source_kvm_file;
   1930	struct kvm *source_kvm;
   1931	bool charged = false;
   1932	int ret;
   1933
   1934	source_kvm_file = fget(source_fd);
   1935	if (!file_is_kvm(source_kvm_file)) {
   1936		ret = -EBADF;
   1937		goto out_fput;
   1938	}
   1939
   1940	source_kvm = source_kvm_file->private_data;
   1941	ret = sev_lock_two_vms(kvm, source_kvm);
   1942	if (ret)
   1943		goto out_fput;
   1944
   1945	if (sev_guest(kvm) || !sev_guest(source_kvm)) {
   1946		ret = -EINVAL;
   1947		goto out_unlock;
   1948	}
   1949
   1950	src_sev = &to_kvm_svm(source_kvm)->sev_info;
   1951
   1952	dst_sev->misc_cg = get_current_misc_cg();
   1953	cg_cleanup_sev = dst_sev;
   1954	if (dst_sev->misc_cg != src_sev->misc_cg) {
   1955		ret = sev_misc_cg_try_charge(dst_sev);
   1956		if (ret)
   1957			goto out_dst_cgroup;
   1958		charged = true;
   1959	}
   1960
   1961	ret = sev_lock_vcpus_for_migration(kvm, SEV_MIGRATION_SOURCE);
   1962	if (ret)
   1963		goto out_dst_cgroup;
   1964	ret = sev_lock_vcpus_for_migration(source_kvm, SEV_MIGRATION_TARGET);
   1965	if (ret)
   1966		goto out_dst_vcpu;
   1967
   1968	ret = sev_check_source_vcpus(kvm, source_kvm);
   1969	if (ret)
   1970		goto out_source_vcpu;
   1971
   1972	sev_migrate_from(kvm, source_kvm);
   1973	kvm_vm_dead(source_kvm);
   1974	cg_cleanup_sev = src_sev;
   1975	ret = 0;
   1976
   1977out_source_vcpu:
   1978	sev_unlock_vcpus_for_migration(source_kvm);
   1979out_dst_vcpu:
   1980	sev_unlock_vcpus_for_migration(kvm);
   1981out_dst_cgroup:
   1982	/* Operates on the source on success, on the destination on failure.  */
   1983	if (charged)
   1984		sev_misc_cg_uncharge(cg_cleanup_sev);
   1985	put_misc_cg(cg_cleanup_sev->misc_cg);
   1986	cg_cleanup_sev->misc_cg = NULL;
   1987out_unlock:
   1988	sev_unlock_two_vms(kvm, source_kvm);
   1989out_fput:
   1990	if (source_kvm_file)
   1991		fput(source_kvm_file);
   1992	return ret;
   1993}
   1994
   1995static void *snp_context_create(struct kvm *kvm, struct kvm_sev_cmd *argp)
   1996{
   1997	void *context = NULL, *certs_data = NULL, *resp_page = NULL;
   1998	struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
   1999	struct sev_data_snp_gctx_create data = {};
   2000	int rc;
   2001
   2002	/* Allocate memory used for the certs data in SNP guest request */
   2003	certs_data = kmalloc(SEV_FW_BLOB_MAX_SIZE, GFP_KERNEL_ACCOUNT);
   2004	if (!certs_data)
   2005		return NULL;
   2006
   2007	/* Allocate memory for context page */
   2008	context = snp_alloc_firmware_page(GFP_KERNEL_ACCOUNT);
   2009	if (!context)
   2010		goto e_free;
   2011
   2012	/* Allocate a firmware buffer used during the guest command handling. */
   2013	resp_page = snp_alloc_firmware_page(GFP_KERNEL_ACCOUNT);
   2014	if (!resp_page)
   2015		goto e_free;
   2016
   2017	data.gctx_paddr = __psp_pa(context);
   2018	rc = __sev_issue_cmd(argp->sev_fd, SEV_CMD_SNP_GCTX_CREATE, &data, &argp->error);
   2019	if (rc)
   2020		goto e_free;
   2021
   2022	sev->snp_certs_data = certs_data;
   2023
   2024	return context;
   2025
   2026e_free:
   2027	snp_free_firmware_page(context);
   2028	kfree(certs_data);
   2029	return NULL;
   2030}
   2031
   2032static int snp_bind_asid(struct kvm *kvm, int *error)
   2033{
   2034	struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
   2035	struct sev_data_snp_activate data = {0};
   2036
   2037	data.gctx_paddr = __psp_pa(sev->snp_context);
   2038	data.asid   = sev_get_asid(kvm);
   2039	return sev_issue_cmd(kvm, SEV_CMD_SNP_ACTIVATE, &data, error);
   2040}
   2041
   2042static int snp_launch_start(struct kvm *kvm, struct kvm_sev_cmd *argp)
   2043{
   2044	struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
   2045	struct sev_data_snp_launch_start start = {0};
   2046	struct kvm_sev_snp_launch_start params;
   2047	int rc;
   2048
   2049	if (!sev_snp_guest(kvm))
   2050		return -ENOTTY;
   2051
   2052	if (copy_from_user(&params, (void __user *)(uintptr_t)argp->data, sizeof(params)))
   2053		return -EFAULT;
   2054
   2055	sev->snp_context = snp_context_create(kvm, argp);
   2056	if (!sev->snp_context)
   2057		return -ENOTTY;
   2058
   2059	start.gctx_paddr = __psp_pa(sev->snp_context);
   2060	start.policy = params.policy;
   2061	memcpy(start.gosvw, params.gosvw, sizeof(params.gosvw));
   2062	rc = __sev_issue_cmd(argp->sev_fd, SEV_CMD_SNP_LAUNCH_START, &start, &argp->error);
   2063	if (rc)
   2064		goto e_free_context;
   2065
   2066	sev->fd = argp->sev_fd;
   2067	rc = snp_bind_asid(kvm, &argp->error);
   2068	if (rc)
   2069		goto e_free_context;
   2070
   2071	return 0;
   2072
   2073e_free_context:
   2074	snp_decommission_context(kvm);
   2075
   2076	return rc;
   2077}
   2078
   2079static bool is_hva_registered(struct kvm *kvm, hva_t hva, size_t len)
   2080{
   2081	struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
   2082	struct list_head *head = &sev->regions_list;
   2083	struct enc_region *i;
   2084
   2085	lockdep_assert_held(&kvm->lock);
   2086
   2087	list_for_each_entry(i, head, list) {
   2088		u64 start = i->uaddr;
   2089		u64 end = start + i->size;
   2090
   2091		if (start <= hva && end >= (hva + len))
   2092			return true;
   2093	}
   2094
   2095	return false;
   2096}
   2097
   2098static int snp_mark_unmergable(struct kvm *kvm, u64 start, u64 size)
   2099{
   2100	struct vm_area_struct *vma;
   2101	u64 end = start + size;
   2102	int ret;
   2103
   2104	do {
   2105		vma = find_vma_intersection(kvm->mm, start, end);
   2106		if (!vma) {
   2107			ret = -EINVAL;
   2108			break;
   2109		}
   2110
   2111		ret = ksm_madvise(vma, vma->vm_start, vma->vm_end,
   2112				  MADV_UNMERGEABLE, &vma->vm_flags);
   2113		if (ret)
   2114			break;
   2115
   2116		start = vma->vm_end;
   2117	} while (end > vma->vm_end);
   2118
   2119	return ret;
   2120}
   2121
   2122static int snp_launch_update(struct kvm *kvm, struct kvm_sev_cmd *argp)
   2123{
   2124	struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
   2125	struct sev_data_snp_launch_update data = {0};
   2126	struct kvm_sev_snp_launch_update params;
   2127	unsigned long npages, pfn, n = 0;
   2128	int *error = &argp->error;
   2129	struct page **inpages;
   2130	int ret, i, level;
   2131	u64 gfn;
   2132
   2133	if (!sev_snp_guest(kvm))
   2134		return -ENOTTY;
   2135
   2136	if (!sev->snp_context)
   2137		return -EINVAL;
   2138
   2139	if (copy_from_user(&params, (void __user *)(uintptr_t)argp->data, sizeof(params)))
   2140		return -EFAULT;
   2141
   2142	/* Verify that the specified address range is registered. */
   2143	if (!is_hva_registered(kvm, params.uaddr, params.len))
   2144		return -EINVAL;
   2145
   2146	mmap_write_lock(kvm->mm);
   2147	ret = snp_mark_unmergable(kvm, params.uaddr, params.len);
   2148	mmap_write_unlock(kvm->mm);
   2149	if (ret)
   2150		return -EFAULT;
   2151
   2152	/*
   2153	 * The userspace memory is already locked so technically we don't
   2154	 * need to lock it again. Later part of the function needs to know
   2155	 * pfn so call the sev_pin_memory() so that we can get the list of
   2156	 * pages to iterate through.
   2157	 */
   2158	inpages = sev_pin_memory(kvm, params.uaddr, params.len, &npages, 1);
   2159	if (!inpages)
   2160		return -ENOMEM;
   2161
   2162	/*
   2163	 * Verify that all the pages are marked shared in the RMP table before
   2164	 * going further. This is avoid the cases where the userspace may try
   2165	 * updating the same page twice.
   2166	 */
   2167	for (i = 0; i < npages; i++) {
   2168		if (snp_lookup_rmpentry(page_to_pfn(inpages[i]), &level) != 0) {
   2169			sev_unpin_memory(kvm, inpages, npages);
   2170			return -EFAULT;
   2171		}
   2172	}
   2173
   2174	gfn = params.start_gfn;
   2175	level = PG_LEVEL_4K;
   2176	data.gctx_paddr = __psp_pa(sev->snp_context);
   2177
   2178	for (i = 0; i < npages; i++) {
   2179		pfn = page_to_pfn(inpages[i]);
   2180
   2181		ret = rmp_make_private(pfn, gfn << PAGE_SHIFT, level, sev_get_asid(kvm), true);
   2182		if (ret) {
   2183			ret = -EFAULT;
   2184			goto e_unpin;
   2185		}
   2186
   2187		n++;
   2188		data.address = __sme_page_pa(inpages[i]);
   2189		data.page_size = X86_TO_RMP_PG_LEVEL(level);
   2190		data.page_type = params.page_type;
   2191		data.vmpl3_perms = params.vmpl3_perms;
   2192		data.vmpl2_perms = params.vmpl2_perms;
   2193		data.vmpl1_perms = params.vmpl1_perms;
   2194		ret = __sev_issue_cmd(argp->sev_fd, SEV_CMD_SNP_LAUNCH_UPDATE, &data, error);
   2195		if (ret) {
   2196			/*
   2197			 * If the command failed then need to reclaim the page.
   2198			 */
   2199			snp_page_reclaim(pfn);
   2200			goto e_unpin;
   2201		}
   2202
   2203		gfn++;
   2204	}
   2205
   2206e_unpin:
   2207	/* Content of memory is updated, mark pages dirty */
   2208	for (i = 0; i < n; i++) {
   2209		set_page_dirty_lock(inpages[i]);
   2210		mark_page_accessed(inpages[i]);
   2211
   2212		/*
   2213		 * If its an error, then update RMP entry to change page ownership
   2214		 * to the hypervisor.
   2215		 */
   2216		if (ret)
   2217			host_rmp_make_shared(pfn, level, true);
   2218	}
   2219
   2220	/* Unlock the user pages */
   2221	sev_unpin_memory(kvm, inpages, npages);
   2222
   2223	return ret;
   2224}
   2225
   2226static int rmpupdate_noremap(u64 pfn, struct rmpupdate *val)
   2227{
   2228	unsigned long paddr = pfn << PAGE_SHIFT;
   2229	int ret, level, npages;
   2230	int retries = 0;
   2231
   2232	if (!pfn_valid(pfn))
   2233		return -EINVAL;
   2234
   2235	if (!cpu_feature_enabled(X86_FEATURE_SEV_SNP))
   2236		return -ENXIO;
   2237
   2238	level = RMP_TO_X86_PG_LEVEL(val->pagesize);
   2239	npages = page_level_size(level) / PAGE_SIZE;
   2240
   2241
   2242retry:
   2243	/* Binutils version 2.36 supports the RMPUPDATE mnemonic. */
   2244	asm volatile(".byte 0xF2, 0x0F, 0x01, 0xFE"
   2245		     : "=a"(ret)
   2246		     : "a"(paddr), "c"((unsigned long)val)
   2247		     : "memory", "cc");
   2248
   2249	if (ret) {
   2250		if (!retries) {
   2251			pr_err("rmpupdate failed, ret: %d, pfn: %llx, npages: %d, level: %d, retrying (max: %d)...\n",
   2252			       ret, pfn, npages, level, 2 * num_present_cpus());
   2253			dump_stack();
   2254		}
   2255		retries++;
   2256		if (retries < 2 * num_present_cpus())
   2257			goto retry;
   2258	} else if (retries > 0) {
   2259		pr_err("rmpupdate for pfn %llx succeeded after %d retries\n", pfn, retries);
   2260	}
   2261
   2262	return ret;
   2263}
   2264
   2265int rmp_make_private_noremap(u64 pfn, u64 gpa, enum pg_level level, int asid, bool immutable)
   2266{
   2267	struct rmpupdate val;
   2268
   2269	if (!pfn_valid(pfn))
   2270		return -EINVAL;
   2271
   2272	memset(&val, 0, sizeof(val));
   2273	val.assigned = 1;
   2274	val.asid = asid;
   2275	val.immutable = immutable;
   2276	val.gpa = gpa;
   2277	val.pagesize = X86_TO_RMP_PG_LEVEL(level);
   2278
   2279	return rmpupdate_noremap(pfn, &val);
   2280}
   2281
   2282static int snp_launch_update_vmsa(struct kvm *kvm, struct kvm_sev_cmd *argp)
   2283{
   2284	struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
   2285	struct sev_data_snp_launch_update data = {};
   2286	int i, ret;
   2287
   2288	data.gctx_paddr = __psp_pa(sev->snp_context);
   2289	data.page_type = SNP_PAGE_TYPE_VMSA;
   2290
   2291	for (i = 0; i < kvm->created_vcpus; i++) {
   2292		struct vcpu_svm *svm = to_svm(xa_load(&kvm->vcpu_array, i));
   2293		u64 pfn = __pa(svm->sev_es.vmsa) >> PAGE_SHIFT;
   2294
   2295		/* Perform some pre-encryption checks against the VMSA */
   2296		ret = sev_es_sync_vmsa(svm);
   2297		if (ret)
   2298			return ret;
   2299
   2300		/* Transition the VMSA page to a firmware state. */
   2301		ret = rmp_make_private_noremap(pfn, -1, PG_LEVEL_4K, sev->asid, true);
   2302		if (ret)
   2303			return ret;
   2304
   2305		/* Issue the SNP command to encrypt the VMSA */
   2306		data.address = __sme_pa(svm->sev_es.vmsa);
   2307		ret = __sev_issue_cmd(argp->sev_fd, SEV_CMD_SNP_LAUNCH_UPDATE,
   2308				      &data, &argp->error);
   2309		if (ret) {
   2310			snp_page_reclaim(pfn);
   2311			return ret;
   2312		}
   2313
   2314		svm->vcpu.arch.guest_state_protected = true;
   2315	}
   2316
   2317	return 0;
   2318}
   2319
   2320static int snp_launch_finish(struct kvm *kvm, struct kvm_sev_cmd *argp)
   2321{
   2322	struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
   2323	struct sev_data_snp_launch_finish *data;
   2324	void *id_block = NULL, *id_auth = NULL;
   2325	struct kvm_sev_snp_launch_finish params;
   2326	int ret;
   2327
   2328	if (!sev_snp_guest(kvm))
   2329		return -ENOTTY;
   2330
   2331	if (!sev->snp_context)
   2332		return -EINVAL;
   2333
   2334	if (copy_from_user(&params, (void __user *)(uintptr_t)argp->data, sizeof(params)))
   2335		return -EFAULT;
   2336
   2337	/* Measure all vCPUs using LAUNCH_UPDATE before we finalize the launch flow. */
   2338	ret = snp_launch_update_vmsa(kvm, argp);
   2339	if (ret)
   2340		return ret;
   2341
   2342	data = kzalloc(sizeof(*data), GFP_KERNEL_ACCOUNT);
   2343	if (!data)
   2344		return -ENOMEM;
   2345
   2346	if (params.id_block_en) {
   2347		id_block = psp_copy_user_blob(params.id_block_uaddr, KVM_SEV_SNP_ID_BLOCK_SIZE);
   2348		if (IS_ERR(id_block)) {
   2349			ret = PTR_ERR(id_block);
   2350			goto e_free;
   2351		}
   2352
   2353		data->id_block_en = 1;
   2354		data->id_block_paddr = __sme_pa(id_block);
   2355	}
   2356
   2357	if (params.auth_key_en) {
   2358		id_auth = psp_copy_user_blob(params.id_auth_uaddr, KVM_SEV_SNP_ID_AUTH_SIZE);
   2359		if (IS_ERR(id_auth)) {
   2360			ret = PTR_ERR(id_auth);
   2361			goto e_free_id_block;
   2362		}
   2363
   2364		data->auth_key_en = 1;
   2365		data->id_auth_paddr = __sme_pa(id_auth);
   2366	}
   2367
   2368	data->gctx_paddr = __psp_pa(sev->snp_context);
   2369	ret = sev_issue_cmd(kvm, SEV_CMD_SNP_LAUNCH_FINISH, data, &argp->error);
   2370
   2371	kfree(id_auth);
   2372
   2373e_free_id_block:
   2374	kfree(id_block);
   2375
   2376e_free:
   2377	kfree(data);
   2378
   2379	return ret;
   2380}
   2381
   2382uint64_t
   2383cpc_read_rip(struct kvm *kvm, uint64_t *rip)
   2384{
   2385	struct kvm_sev_info *sev;
   2386	struct vcpu_svm *svm;
   2387	struct kvm_vcpu *vcpu;
   2388	hpa_t src_pa, dst_pa;
   2389	void *vmsa;
   2390	int error;
   2391	int ret;
   2392
   2393	if (xa_empty(&kvm->vcpu_array))
   2394		return -EFAULT;
   2395
   2396	vcpu = xa_load(&kvm->vcpu_array, 0);
   2397
   2398	if (sev_es_guest(kvm)) {
   2399		sev = &to_kvm_svm(kvm)->sev_info;
   2400		svm = to_svm(vcpu);
   2401
   2402		vmsa = kmalloc(PAGE_SIZE, GFP_KERNEL);
   2403		if (!vmsa) return -ENOMEM;
   2404		memset(vmsa, 0, PAGE_SIZE);
   2405
   2406		src_pa = __pa(svm->sev_es.vmsa);
   2407		dst_pa = __pa(vmsa);
   2408		if (sev->snp_active) {
   2409			ret = snp_guest_dbg_decrypt_page(
   2410				__pa(sev->snp_context) >> PAGE_SHIFT,
   2411				src_pa >> PAGE_SHIFT, dst_pa >> PAGE_SHIFT,
   2412				&error);
   2413		} else {
   2414			ret = __sev_dbg_decrypt(kvm, src_pa, dst_pa,
   2415				PAGE_SIZE, &error);
   2416		}
   2417
   2418		*rip = *(uint64_t *)(vmsa + 0x178);
   2419
   2420		kfree(vmsa);
   2421
   2422		if (ret) return ret;
   2423	} else {
   2424		*rip = kvm_rip_read(vcpu);
   2425	}
   2426
   2427	return 0;
   2428}
   2429
   2430static int
   2431sev_cachepc_ioctl(struct kvm *kvm, struct kvm_sev_cmd *sev_cmd)
   2432{
   2433	struct cpc_sev_cmd cmd;
   2434	int ret;
   2435
   2436	if (copy_from_user(&cmd, (void *)sev_cmd->data, sizeof(cmd)))
   2437		return -EFAULT;
   2438
   2439	if (cmd.id == SEV_CPC_GET_RIP) {
   2440		ret = cpc_read_rip(kvm, &cmd.data);
   2441		if (ret) return ret;
   2442	} else {
   2443		CPC_ERR("Unknown cachepc sev cmd: %i\n", cmd.id);
   2444	}
   2445
   2446	if (copy_to_user((void *)sev_cmd->data, &cmd, sizeof(cmd)))
   2447		return -EFAULT;
   2448
   2449	return 0;
   2450}
   2451
   2452int sev_mem_enc_ioctl(struct kvm *kvm, void __user *argp)
   2453{
   2454	struct kvm_sev_cmd sev_cmd;
   2455	int r;
   2456
   2457	if (!sev_enabled)
   2458		return -ENOTTY;
   2459
   2460	if (!argp)
   2461		return 0;
   2462
   2463	if (copy_from_user(&sev_cmd, argp, sizeof(struct kvm_sev_cmd)))
   2464		return -EFAULT;
   2465
   2466	mutex_lock(&kvm->lock);
   2467
   2468	/* Only the enc_context_owner handles some memory enc operations. */
   2469	if (is_mirroring_enc_context(kvm) &&
   2470	    !is_cmd_allowed_from_mirror(sev_cmd.id)) {
   2471		r = -EINVAL;
   2472		goto out;
   2473	}
   2474
   2475	switch (sev_cmd.id) {
   2476	case KVM_SEV_SNP_INIT:
   2477		if (!sev_snp_enabled) {
   2478			r = -ENOTTY;
   2479			goto out;
   2480		}
   2481		fallthrough;
   2482	case KVM_SEV_ES_INIT:
   2483		if (!sev_es_enabled) {
   2484			r = -ENOTTY;
   2485			goto out;
   2486		}
   2487		fallthrough;
   2488	case KVM_SEV_INIT:
   2489		r = sev_guest_init(kvm, &sev_cmd);
   2490		break;
   2491	case KVM_SEV_LAUNCH_START:
   2492		r = sev_launch_start(kvm, &sev_cmd);
   2493		break;
   2494	case KVM_SEV_LAUNCH_UPDATE_DATA:
   2495		r = sev_launch_update_data(kvm, &sev_cmd);
   2496		break;
   2497	case KVM_SEV_LAUNCH_UPDATE_VMSA:
   2498		r = sev_launch_update_vmsa(kvm, &sev_cmd);
   2499		break;
   2500	case KVM_SEV_LAUNCH_MEASURE:
   2501		r = sev_launch_measure(kvm, &sev_cmd);
   2502		break;
   2503	case KVM_SEV_LAUNCH_FINISH:
   2504		r = sev_launch_finish(kvm, &sev_cmd);
   2505		break;
   2506	case KVM_SEV_GUEST_STATUS:
   2507		r = sev_guest_status(kvm, &sev_cmd);
   2508		break;
   2509	case KVM_SEV_DBG_DECRYPT:
   2510		if (sev_snp_guest(kvm))
   2511			r = snp_dbg_decrypt_vmsa(kvm, &sev_cmd);
   2512		else
   2513			r = sev_dbg_crypt(kvm, &sev_cmd, true);
   2514		break;
   2515	case KVM_SEV_DBG_ENCRYPT:
   2516		r = sev_dbg_crypt(kvm, &sev_cmd, false);
   2517		break;
   2518	case KVM_SEV_LAUNCH_SECRET:
   2519		r = sev_launch_secret(kvm, &sev_cmd);
   2520		break;
   2521	case KVM_SEV_GET_ATTESTATION_REPORT:
   2522		r = sev_get_attestation_report(kvm, &sev_cmd);
   2523		break;
   2524	case KVM_SEV_SEND_START:
   2525		r = sev_send_start(kvm, &sev_cmd);
   2526		break;
   2527	case KVM_SEV_SEND_UPDATE_DATA:
   2528		r = sev_send_update_data(kvm, &sev_cmd);
   2529		break;
   2530	case KVM_SEV_SEND_FINISH:
   2531		r = sev_send_finish(kvm, &sev_cmd);
   2532		break;
   2533	case KVM_SEV_SEND_CANCEL:
   2534		r = sev_send_cancel(kvm, &sev_cmd);
   2535		break;
   2536	case KVM_SEV_RECEIVE_START:
   2537		r = sev_receive_start(kvm, &sev_cmd);
   2538		break;
   2539	case KVM_SEV_RECEIVE_UPDATE_DATA:
   2540		r = sev_receive_update_data(kvm, &sev_cmd);
   2541		break;
   2542	case KVM_SEV_RECEIVE_FINISH:
   2543		r = sev_receive_finish(kvm, &sev_cmd);
   2544		break;
   2545	case KVM_SEV_SNP_LAUNCH_START:
   2546		r = snp_launch_start(kvm, &sev_cmd);
   2547		break;
   2548	case KVM_SEV_SNP_LAUNCH_UPDATE:
   2549		r = snp_launch_update(kvm, &sev_cmd);
   2550		break;
   2551	case KVM_SEV_SNP_LAUNCH_FINISH:
   2552		r = snp_launch_finish(kvm, &sev_cmd);
   2553		break;
   2554	case KVM_SEV_CACHEPC:
   2555		r = sev_cachepc_ioctl(kvm, &sev_cmd);
   2556		break;
   2557	default:
   2558		r = -EINVAL;
   2559		goto out;
   2560	}
   2561
   2562	if (copy_to_user(argp, &sev_cmd, sizeof(struct kvm_sev_cmd)))
   2563		r = -EFAULT;
   2564
   2565out:
   2566	mutex_unlock(&kvm->lock);
   2567	return r;
   2568}
   2569
   2570static bool is_range_hugetlb(struct kvm *kvm, struct kvm_enc_region *range)
   2571{
   2572	struct vm_area_struct *vma;
   2573	u64 start, end;
   2574	bool ret = true;
   2575
   2576	start = range->addr;
   2577	end = start + range->size;
   2578
   2579	mmap_read_lock(kvm->mm);
   2580
   2581	do {
   2582		vma = find_vma_intersection(kvm->mm, start, end);
   2583		if (!vma)
   2584			goto unlock;
   2585
   2586		if (is_vm_hugetlb_page(vma))
   2587			goto unlock;
   2588
   2589		start = vma->vm_end;
   2590	} while (end > vma->vm_end);
   2591
   2592	ret = false;
   2593
   2594unlock:
   2595	mmap_read_unlock(kvm->mm);
   2596	return ret;
   2597}
   2598
   2599int sev_mem_enc_register_region(struct kvm *kvm,
   2600				struct kvm_enc_region *range)
   2601{
   2602	struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
   2603	struct enc_region *region;
   2604	int ret = 0;
   2605
   2606	if (!sev_guest(kvm))
   2607		return -ENOTTY;
   2608
   2609	/* If kvm is mirroring encryption context it isn't responsible for it */
   2610	if (is_mirroring_enc_context(kvm))
   2611		return -EINVAL;
   2612
   2613	if (range->addr > ULONG_MAX || range->size > ULONG_MAX)
   2614		return -EINVAL;
   2615
   2616	/*
   2617	 * SEV-SNP does not support the backing pages from the HugeTLB. Verify
   2618	 * that the registered memory range is not from the HugeTLB.
   2619	 */
   2620	if (sev_snp_guest(kvm) && is_range_hugetlb(kvm, range))
   2621		return -EINVAL;
   2622
   2623	region = kzalloc(sizeof(*region), GFP_KERNEL_ACCOUNT);
   2624	if (!region)
   2625		return -ENOMEM;
   2626
   2627	mutex_lock(&kvm->lock);
   2628	region->pages = sev_pin_memory(kvm, range->addr, range->size, &region->npages, 1);
   2629	if (IS_ERR(region->pages)) {
   2630		ret = PTR_ERR(region->pages);
   2631		mutex_unlock(&kvm->lock);
   2632		goto e_free;
   2633	}
   2634
   2635	region->uaddr = range->addr;
   2636	region->size = range->size;
   2637
   2638	list_add_tail(&region->list, &sev->regions_list);
   2639	mutex_unlock(&kvm->lock);
   2640
   2641	/*
   2642	 * The guest may change the memory encryption attribute from C=0 -> C=1
   2643	 * or vice versa for this memory range. Lets make sure caches are
   2644	 * flushed to ensure that guest data gets written into memory with
   2645	 * correct C-bit.
   2646	 */
   2647	sev_clflush_pages(region->pages, region->npages);
   2648
   2649	return ret;
   2650
   2651e_free:
   2652	kfree(region);
   2653	return ret;
   2654}
   2655
   2656static struct enc_region *
   2657find_enc_region(struct kvm *kvm, struct kvm_enc_region *range)
   2658{
   2659	struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
   2660	struct list_head *head = &sev->regions_list;
   2661	struct enc_region *i;
   2662
   2663	list_for_each_entry(i, head, list) {
   2664		if (i->uaddr == range->addr &&
   2665		    i->size == range->size)
   2666			return i;
   2667	}
   2668
   2669	return NULL;
   2670}
   2671
   2672static void __unregister_enc_region_locked(struct kvm *kvm,
   2673					   struct enc_region *region)
   2674{
   2675	unsigned long i, pfn;
   2676	int level;
   2677
   2678	/*
   2679	 * The guest memory pages are assigned in the RMP table. Unassign it
   2680	 * before releasing the memory.
   2681	 */
   2682	if (sev_snp_guest(kvm)) {
   2683		for (i = 0; i < region->npages; i++) {
   2684			pfn = page_to_pfn(region->pages[i]);
   2685
   2686			if (!snp_lookup_rmpentry(pfn, &level))
   2687				continue;
   2688
   2689			cond_resched();
   2690
   2691			if (level > PG_LEVEL_4K)
   2692				pfn &= ~(KVM_PAGES_PER_HPAGE(PG_LEVEL_2M) - 1);
   2693
   2694			host_rmp_make_shared(pfn, level, true);
   2695		}
   2696	}
   2697
   2698	sev_unpin_memory(kvm, region->pages, region->npages);
   2699	list_del(&region->list);
   2700	kfree(region);
   2701}
   2702
   2703int sev_mem_enc_unregister_region(struct kvm *kvm,
   2704				  struct kvm_enc_region *range)
   2705{
   2706	struct enc_region *region;
   2707	int ret;
   2708
   2709	/* If kvm is mirroring encryption context it isn't responsible for it */
   2710	if (is_mirroring_enc_context(kvm))
   2711		return -EINVAL;
   2712
   2713	mutex_lock(&kvm->lock);
   2714
   2715	if (!sev_guest(kvm)) {
   2716		ret = -ENOTTY;
   2717		goto failed;
   2718	}
   2719
   2720	region = find_enc_region(kvm, range);
   2721	if (!region) {
   2722		ret = -EINVAL;
   2723		goto failed;
   2724	}
   2725
   2726	/*
   2727	 * Ensure that all guest tagged cache entries are flushed before
   2728	 * releasing the pages back to the system for use. CLFLUSH will
   2729	 * not do this, so issue a WBINVD.
   2730	 */
   2731	wbinvd_on_all_cpus();
   2732
   2733	__unregister_enc_region_locked(kvm, region);
   2734
   2735	mutex_unlock(&kvm->lock);
   2736	return 0;
   2737
   2738failed:
   2739	mutex_unlock(&kvm->lock);
   2740	return ret;
   2741}
   2742
   2743int sev_vm_copy_enc_context_from(struct kvm *kvm, unsigned int source_fd)
   2744{
   2745	struct file *source_kvm_file;
   2746	struct kvm *source_kvm;
   2747	struct kvm_sev_info *source_sev, *mirror_sev;
   2748	int ret;
   2749
   2750	source_kvm_file = fget(source_fd);
   2751	if (!file_is_kvm(source_kvm_file)) {
   2752		ret = -EBADF;
   2753		goto e_source_fput;
   2754	}
   2755
   2756	source_kvm = source_kvm_file->private_data;
   2757	ret = sev_lock_two_vms(kvm, source_kvm);
   2758	if (ret)
   2759		goto e_source_fput;
   2760
   2761	/*
   2762	 * Mirrors of mirrors should work, but let's not get silly.  Also
   2763	 * disallow out-of-band SEV/SEV-ES init if the target is already an
   2764	 * SEV guest, or if vCPUs have been created.  KVM relies on vCPUs being
   2765	 * created after SEV/SEV-ES initialization, e.g. to init intercepts.
   2766	 */
   2767	if (sev_guest(kvm) || !sev_guest(source_kvm) ||
   2768	    is_mirroring_enc_context(source_kvm) || kvm->created_vcpus) {
   2769		ret = -EINVAL;
   2770		goto e_unlock;
   2771	}
   2772
   2773	/*
   2774	 * The mirror kvm holds an enc_context_owner ref so its asid can't
   2775	 * disappear until we're done with it
   2776	 */
   2777	source_sev = &to_kvm_svm(source_kvm)->sev_info;
   2778	kvm_get_kvm(source_kvm);
   2779	mirror_sev = &to_kvm_svm(kvm)->sev_info;
   2780	list_add_tail(&mirror_sev->mirror_entry, &source_sev->mirror_vms);
   2781
   2782	/* Set enc_context_owner and copy its encryption context over */
   2783	mirror_sev->enc_context_owner = source_kvm;
   2784	mirror_sev->active = true;
   2785	mirror_sev->asid = source_sev->asid;
   2786	mirror_sev->fd = source_sev->fd;
   2787	mirror_sev->es_active = source_sev->es_active;
   2788	mirror_sev->handle = source_sev->handle;
   2789	INIT_LIST_HEAD(&mirror_sev->regions_list);
   2790	INIT_LIST_HEAD(&mirror_sev->mirror_vms);
   2791	ret = 0;
   2792
   2793	/*
   2794	 * Do not copy ap_jump_table. Since the mirror does not share the same
   2795	 * KVM contexts as the original, and they may have different
   2796	 * memory-views.
   2797	 */
   2798
   2799e_unlock:
   2800	sev_unlock_two_vms(kvm, source_kvm);
   2801e_source_fput:
   2802	if (source_kvm_file)
   2803		fput(source_kvm_file);
   2804	return ret;
   2805}
   2806
   2807static int snp_decommission_context(struct kvm *kvm)
   2808{
   2809	struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
   2810	struct sev_data_snp_decommission data = {};
   2811	int ret;
   2812
   2813	/* If context is not created then do nothing */
   2814	if (!sev->snp_context)
   2815		return 0;
   2816
   2817	data.gctx_paddr = __sme_pa(sev->snp_context);
   2818	ret = snp_guest_decommission(&data, NULL);
   2819	if (WARN_ONCE(ret, "failed to release guest context"))
   2820		return ret;
   2821
   2822	/* free the context page now */
   2823	snp_free_firmware_page(sev->snp_context);
   2824	sev->snp_context = NULL;
   2825
   2826	kfree(sev->snp_certs_data);
   2827
   2828	return 0;
   2829}
   2830
   2831void sev_vm_destroy(struct kvm *kvm)
   2832{
   2833	struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
   2834	struct list_head *head = &sev->regions_list;
   2835	struct list_head *pos, *q;
   2836
   2837	if (!sev_guest(kvm))
   2838		return;
   2839
   2840	WARN_ON(!list_empty(&sev->mirror_vms));
   2841
   2842	/* If this is a mirror_kvm release the enc_context_owner and skip sev cleanup */
   2843	if (is_mirroring_enc_context(kvm)) {
   2844		struct kvm *owner_kvm = sev->enc_context_owner;
   2845
   2846		mutex_lock(&owner_kvm->lock);
   2847		list_del(&sev->mirror_entry);
   2848		mutex_unlock(&owner_kvm->lock);
   2849		kvm_put_kvm(owner_kvm);
   2850		return;
   2851	}
   2852
   2853	/*
   2854	 * Ensure that all guest tagged cache entries are flushed before
   2855	 * releasing the pages back to the system for use. CLFLUSH will
   2856	 * not do this, so issue a WBINVD.
   2857	 */
   2858	wbinvd_on_all_cpus();
   2859
   2860	/*
   2861	 * if userspace was terminated before unregistering the memory regions
   2862	 * then lets unpin all the registered memory.
   2863	 */
   2864	if (!list_empty(head)) {
   2865		list_for_each_safe(pos, q, head) {
   2866			__unregister_enc_region_locked(kvm,
   2867				list_entry(pos, struct enc_region, list));
   2868			cond_resched();
   2869		}
   2870	}
   2871
   2872	if (sev_snp_guest(kvm)) {
   2873		if (snp_decommission_context(kvm)) {
   2874			WARN_ONCE(1, "Failed to free SNP guest context, leaking asid!\n");
   2875			return;
   2876		}
   2877	} else {
   2878		sev_unbind_asid(kvm, sev->handle);
   2879	}
   2880
   2881	sev_asid_free(sev);
   2882}
   2883
   2884void __init sev_set_cpu_caps(void)
   2885{
   2886	if (!sev_enabled)
   2887		kvm_cpu_cap_clear(X86_FEATURE_SEV);
   2888	if (!sev_es_enabled)
   2889		kvm_cpu_cap_clear(X86_FEATURE_SEV_ES);
   2890}
   2891
   2892void __init sev_hardware_setup(void)
   2893{
   2894#ifdef CONFIG_KVM_AMD_SEV
   2895	unsigned int eax, ebx, ecx, edx, sev_asid_count, sev_es_asid_count;
   2896	bool sev_snp_supported = false;
   2897	bool sev_es_supported = false;
   2898	bool sev_supported = false;
   2899
   2900	if (!sev_enabled || !npt_enabled)
   2901		goto out;
   2902
   2903	/*
   2904	 * SEV must obviously be supported in hardware.  Sanity check that the
   2905	 * CPU supports decode assists, which is mandatory for SEV guests to
   2906	 * support instruction emulation.
   2907	 */
   2908	if (!boot_cpu_has(X86_FEATURE_SEV) ||
   2909	    WARN_ON_ONCE(!boot_cpu_has(X86_FEATURE_DECODEASSISTS)))
   2910		goto out;
   2911
   2912	/* Retrieve SEV CPUID information */
   2913	cpuid(0x8000001f, &eax, &ebx, &ecx, &edx);
   2914
   2915	/* Set encryption bit location for SEV-ES guests */
   2916	sev_enc_bit = ebx & 0x3f;
   2917
   2918	/* Maximum number of encrypted guests supported simultaneously */
   2919	max_sev_asid = ecx;
   2920	if (!max_sev_asid)
   2921		goto out;
   2922
   2923	/* Minimum ASID value that should be used for SEV guest */
   2924	min_sev_asid = edx;
   2925	sev_me_mask = 1UL << (ebx & 0x3f);
   2926
   2927	/*
   2928	 * Initialize SEV ASID bitmaps. Allocate space for ASID 0 in the bitmap,
   2929	 * even though it's never used, so that the bitmap is indexed by the
   2930	 * actual ASID.
   2931	 */
   2932	nr_asids = max_sev_asid + 1;
   2933	sev_asid_bitmap = bitmap_zalloc(nr_asids, GFP_KERNEL);
   2934	if (!sev_asid_bitmap)
   2935		goto out;
   2936
   2937	sev_reclaim_asid_bitmap = bitmap_zalloc(nr_asids, GFP_KERNEL);
   2938	if (!sev_reclaim_asid_bitmap) {
   2939		bitmap_free(sev_asid_bitmap);
   2940		sev_asid_bitmap = NULL;
   2941		goto out;
   2942	}
   2943
   2944	sev_asid_count = max_sev_asid - min_sev_asid + 1;
   2945	if (misc_cg_set_capacity(MISC_CG_RES_SEV, sev_asid_count))
   2946		goto out;
   2947
   2948	pr_info("SEV supported: %u ASIDs\n", sev_asid_count);
   2949	sev_supported = true;
   2950
   2951	/* SEV-ES support requested? */
   2952	if (!sev_es_enabled)
   2953		goto out;
   2954
   2955	/*
   2956	 * SEV-ES requires MMIO caching as KVM doesn't have access to the guest
   2957	 * instruction stream, i.e. can't emulate in response to a #NPF and
   2958	 * instead relies on #NPF(RSVD) being reflected into the guest as #VC
   2959	 * (the guest can then do a #VMGEXIT to request MMIO emulation).
   2960	 */
   2961	if (!enable_mmio_caching)
   2962		goto out;
   2963
   2964	/* Does the CPU support SEV-ES? */
   2965	if (!boot_cpu_has(X86_FEATURE_SEV_ES))
   2966		goto out;
   2967
   2968	/* Has the system been allocated ASIDs for SEV-ES? */
   2969	if (min_sev_asid == 1)
   2970		goto out;
   2971
   2972	sev_es_asid_count = min_sev_asid - 1;
   2973	if (misc_cg_set_capacity(MISC_CG_RES_SEV_ES, sev_es_asid_count))
   2974		goto out;
   2975
   2976	sev_es_supported = true;
   2977	sev_snp_supported = sev_snp_enabled && cpu_feature_enabled(X86_FEATURE_SEV_SNP);
   2978
   2979	pr_info("SEV-ES %ssupported: %u ASIDs\n",
   2980		sev_snp_supported ? "and SEV-SNP " : "", sev_es_asid_count);
   2981
   2982out:
   2983	sev_enabled = sev_supported;
   2984	sev_es_enabled = sev_es_supported;
   2985	sev_snp_enabled = sev_snp_supported;
   2986#endif
   2987}
   2988
   2989void sev_hardware_unsetup(void)
   2990{
   2991	if (!sev_enabled)
   2992		return;
   2993
   2994	/* No need to take sev_bitmap_lock, all VMs have been destroyed. */
   2995	sev_flush_asids(1, max_sev_asid);
   2996
   2997	bitmap_free(sev_asid_bitmap);
   2998	bitmap_free(sev_reclaim_asid_bitmap);
   2999
   3000	misc_cg_set_capacity(MISC_CG_RES_SEV, 0);
   3001	misc_cg_set_capacity(MISC_CG_RES_SEV_ES, 0);
   3002}
   3003
   3004int sev_cpu_init(struct svm_cpu_data *sd)
   3005{
   3006	if (!sev_enabled)
   3007		return 0;
   3008
   3009	sd->sev_vmcbs = kcalloc(nr_asids, sizeof(void *), GFP_KERNEL);
   3010	if (!sd->sev_vmcbs)
   3011		return -ENOMEM;
   3012
   3013	return 0;
   3014}
   3015
   3016/*
   3017 * Pages used by hardware to hold guest encrypted state must be flushed before
   3018 * returning them to the system.
   3019 */
   3020static void sev_flush_encrypted_page(struct kvm_vcpu *vcpu, void *va)
   3021{
   3022	int asid = to_kvm_svm(vcpu->kvm)->sev_info.asid;
   3023
   3024	/*
   3025	 * Note!  The address must be a kernel address, as regular page walk
   3026	 * checks are performed by VM_PAGE_FLUSH, i.e. operating on a user
   3027	 * address is non-deterministic and unsafe.  This function deliberately
   3028	 * takes a pointer to deter passing in a user address.
   3029	 */
   3030	unsigned long addr = (unsigned long)va;
   3031
   3032	/*
   3033	 * If CPU enforced cache coherency for encrypted mappings of the
   3034	 * same physical page is supported, use CLFLUSHOPT instead. NOTE: cache
   3035	 * flush is still needed in order to work properly with DMA devices.
   3036	 */
   3037	if (boot_cpu_has(X86_FEATURE_SME_COHERENT)) {
   3038		clflush_cache_range(va, PAGE_SIZE);
   3039		return;
   3040	}
   3041
   3042	/*
   3043	 * VM Page Flush takes a host virtual address and a guest ASID.  Fall
   3044	 * back to WBINVD if this faults so as not to make any problems worse
   3045	 * by leaving stale encrypted data in the cache.
   3046	 */
   3047	if (WARN_ON_ONCE(wrmsrl_safe(MSR_AMD64_VM_PAGE_FLUSH, addr | asid)))
   3048		goto do_wbinvd;
   3049
   3050	return;
   3051
   3052do_wbinvd:
   3053	wbinvd_on_all_cpus();
   3054}
   3055
   3056void sev_free_vcpu(struct kvm_vcpu *vcpu)
   3057{
   3058	struct vcpu_svm *svm;
   3059
   3060	if (!sev_es_guest(vcpu->kvm))
   3061		return;
   3062
   3063	svm = to_svm(vcpu);
   3064
   3065	/*
   3066	 * If its an SNP guest, then VMSA was added in the RMP entry as
   3067	 * a guest owned page. Transition the page to hypervisor state
   3068	 * before releasing it back to the system.
   3069	 * Also the page is removed from the kernel direct map, so flush it
   3070	 * later after it is transitioned back to hypervisor state and
   3071	 * restored in the direct map.
   3072	 */
   3073	if (sev_snp_guest(vcpu->kvm)) {
   3074		u64 pfn = __pa(svm->sev_es.vmsa) >> PAGE_SHIFT;
   3075
   3076		if (host_rmp_make_shared(pfn, PG_LEVEL_4K, false))
   3077			goto skip_vmsa_free;
   3078	}
   3079
   3080	if (vcpu->arch.guest_state_protected)
   3081		sev_flush_encrypted_page(vcpu, svm->sev_es.vmsa);
   3082
   3083	__free_page(virt_to_page(svm->sev_es.vmsa));
   3084
   3085skip_vmsa_free:
   3086	kvfree(svm->sev_es.ghcb_sa);
   3087}
   3088
   3089static inline int svm_map_ghcb(struct vcpu_svm *svm, struct kvm_host_map *map)
   3090{
   3091	struct vmcb_control_area *control = &svm->vmcb->control;
   3092	u64 gfn = gpa_to_gfn(control->ghcb_gpa);
   3093	struct kvm_vcpu *vcpu = &svm->vcpu;
   3094
   3095	if (kvm_vcpu_map(vcpu, gfn, map)) {
   3096		/* Unable to map GHCB from guest */
   3097		pr_err("error mapping GHCB GFN [%#llx] from guest\n", gfn);
   3098		return -EFAULT;
   3099	}
   3100
   3101	if (sev_post_map_gfn(vcpu->kvm, map->gfn, map->pfn)) {
   3102		kvm_vcpu_unmap(vcpu, map, false);
   3103		return -EBUSY;
   3104	}
   3105
   3106	return 0;
   3107}
   3108
   3109static inline void svm_unmap_ghcb(struct vcpu_svm *svm, struct kvm_host_map *map)
   3110{
   3111	struct kvm_vcpu *vcpu = &svm->vcpu;
   3112
   3113	kvm_vcpu_unmap(vcpu, map, true);
   3114	sev_post_unmap_gfn(vcpu->kvm, map->gfn, map->pfn);
   3115}
   3116
   3117static void dump_ghcb(struct vcpu_svm *svm)
   3118{
   3119	struct kvm_host_map map;
   3120	unsigned int nbits;
   3121	struct ghcb *ghcb;
   3122
   3123	if (svm_map_ghcb(svm, &map))
   3124		return;
   3125
   3126	ghcb = map.hva;
   3127
   3128	/* Re-use the dump_invalid_vmcb module parameter */
   3129	if (!dump_invalid_vmcb) {
   3130		pr_warn_ratelimited("set kvm_amd.dump_invalid_vmcb=1 to dump internal KVM state.\n");
   3131		goto e_unmap;
   3132	}
   3133
   3134	nbits = sizeof(ghcb->save.valid_bitmap) * 8;
   3135
   3136	pr_err("GHCB (GPA=%016llx):\n", svm->vmcb->control.ghcb_gpa);
   3137	pr_err("%-20s%016llx is_valid: %u\n", "sw_exit_code",
   3138	       ghcb->save.sw_exit_code, ghcb_sw_exit_code_is_valid(ghcb));
   3139	pr_err("%-20s%016llx is_valid: %u\n", "sw_exit_info_1",
   3140	       ghcb->save.sw_exit_info_1, ghcb_sw_exit_info_1_is_valid(ghcb));
   3141	pr_err("%-20s%016llx is_valid: %u\n", "sw_exit_info_2",
   3142	       ghcb->save.sw_exit_info_2, ghcb_sw_exit_info_2_is_valid(ghcb));
   3143	pr_err("%-20s%016llx is_valid: %u\n", "sw_scratch",
   3144	       ghcb->save.sw_scratch, ghcb_sw_scratch_is_valid(ghcb));
   3145	pr_err("%-20s%*pb\n", "valid_bitmap", nbits, ghcb->save.valid_bitmap);
   3146
   3147e_unmap:
   3148	svm_unmap_ghcb(svm, &map);
   3149}
   3150
   3151static bool sev_es_sync_to_ghcb(struct vcpu_svm *svm)
   3152{
   3153	struct kvm_vcpu *vcpu = &svm->vcpu;
   3154	struct kvm_host_map map;
   3155	struct ghcb *ghcb;
   3156
   3157	if (svm_map_ghcb(svm, &map))
   3158		return false;
   3159
   3160	ghcb = map.hva;
   3161
   3162	/*
   3163	 * The GHCB protocol so far allows for the following data
   3164	 * to be returned:
   3165	 *   GPRs RAX, RBX, RCX, RDX
   3166	 *
   3167	 * Copy their values, even if they may not have been written during the
   3168	 * VM-Exit.  It's the guest's responsibility to not consume random data.
   3169	 */
   3170	ghcb_set_rax(ghcb, vcpu->arch.regs[VCPU_REGS_RAX]);
   3171	ghcb_set_rbx(ghcb, vcpu->arch.regs[VCPU_REGS_RBX]);
   3172	ghcb_set_rcx(ghcb, vcpu->arch.regs[VCPU_REGS_RCX]);
   3173	ghcb_set_rdx(ghcb, vcpu->arch.regs[VCPU_REGS_RDX]);
   3174
   3175	/*
   3176	 * Copy the return values from the exit_info_{1,2}.
   3177	 */
   3178	ghcb_set_sw_exit_info_1(ghcb, svm->sev_es.ghcb_sw_exit_info_1);
   3179	ghcb_set_sw_exit_info_2(ghcb, svm->sev_es.ghcb_sw_exit_info_2);
   3180
   3181	/* Sync the scratch buffer area. */
   3182	if (svm->sev_es.ghcb_sa_sync) {
   3183		if (svm->sev_es.ghcb_sa_contained) {
   3184			memcpy(ghcb->shared_buffer + svm->sev_es.ghcb_sa_offset,
   3185			       svm->sev_es.ghcb_sa, svm->sev_es.ghcb_sa_len);
   3186		} else {
   3187			int ret;
   3188
   3189			ret = kvm_write_guest(svm->vcpu.kvm,
   3190					      svm->sev_es.ghcb_sa_gpa,
   3191					      svm->sev_es.ghcb_sa, svm->sev_es.ghcb_sa_len);
   3192			if (ret)
   3193				pr_warn_ratelimited("unmap_ghcb: kvm_write_guest failed while syncing scratch area, gpa: %llx, ret: %d\n",
   3194						    svm->sev_es.ghcb_sa_gpa, ret);
   3195		}
   3196		svm->sev_es.ghcb_sa_sync = false;
   3197	}
   3198
   3199	trace_kvm_vmgexit_exit(svm->vcpu.vcpu_id, ghcb);
   3200
   3201	svm_unmap_ghcb(svm, &map);
   3202
   3203	return true;
   3204}
   3205
   3206static void sev_es_sync_from_ghcb(struct vcpu_svm *svm, struct ghcb *ghcb)
   3207{
   3208	struct vmcb_control_area *control = &svm->vmcb->control;
   3209	struct kvm_vcpu *vcpu = &svm->vcpu;
   3210	u64 exit_code;
   3211
   3212	/*
   3213	 * The GHCB protocol so far allows for the following data
   3214	 * to be supplied:
   3215	 *   GPRs RAX, RBX, RCX, RDX
   3216	 *   XCR0
   3217	 *   CPL
   3218	 *
   3219	 * VMMCALL allows the guest to provide extra registers. KVM also
   3220	 * expects RSI for hypercalls, so include that, too.
   3221	 *
   3222	 * Copy their values to the appropriate location if supplied.
   3223	 */
   3224	memset(vcpu->arch.regs, 0, sizeof(vcpu->arch.regs));
   3225
   3226	vcpu->arch.regs[VCPU_REGS_RAX] = ghcb_get_rax_if_valid(ghcb);
   3227	vcpu->arch.regs[VCPU_REGS_RBX] = ghcb_get_rbx_if_valid(ghcb);
   3228	vcpu->arch.regs[VCPU_REGS_RCX] = ghcb_get_rcx_if_valid(ghcb);
   3229	vcpu->arch.regs[VCPU_REGS_RDX] = ghcb_get_rdx_if_valid(ghcb);
   3230	vcpu->arch.regs[VCPU_REGS_RSI] = ghcb_get_rsi_if_valid(ghcb);
   3231
   3232	svm->vmcb->save.cpl = ghcb_get_cpl_if_valid(ghcb);
   3233
   3234	if (ghcb_xcr0_is_valid(ghcb)) {
   3235		vcpu->arch.xcr0 = ghcb_get_xcr0(ghcb);
   3236		kvm_update_cpuid_runtime(vcpu);
   3237	}
   3238
   3239	/* Copy the GHCB exit information into the VMCB fields */
   3240	exit_code = ghcb_get_sw_exit_code(ghcb);
   3241	control->exit_code = lower_32_bits(exit_code);
   3242	control->exit_code_hi = upper_32_bits(exit_code);
   3243	control->exit_info_1 = ghcb_get_sw_exit_info_1(ghcb);
   3244	control->exit_info_2 = ghcb_get_sw_exit_info_2(ghcb);
   3245
   3246	/* Copy the GHCB scratch area GPA */
   3247	svm->sev_es.ghcb_sa_gpa = ghcb_get_sw_scratch(ghcb);
   3248
   3249	/* Clear the valid entries fields */
   3250	memset(ghcb->save.valid_bitmap, 0, sizeof(ghcb->save.valid_bitmap));
   3251}
   3252
   3253static int sev_es_validate_vmgexit(struct vcpu_svm *svm, u64 *exit_code)
   3254{
   3255	struct kvm_vcpu *vcpu = &svm->vcpu;
   3256	struct kvm_host_map map;
   3257	struct ghcb *ghcb;
   3258	u64 reason;
   3259
   3260	if (svm_map_ghcb(svm, &map))
   3261		return -EFAULT;
   3262
   3263	ghcb = map.hva;
   3264
   3265	trace_kvm_vmgexit_enter(vcpu->vcpu_id, ghcb);
   3266
   3267	/*
   3268	 * Retrieve the exit code now even though it may not be marked valid
   3269	 * as it could help with debugging.
   3270	 */
   3271	*exit_code = ghcb_get_sw_exit_code(ghcb);
   3272
   3273	/* Only GHCB Usage code 0 is supported */
   3274	if (ghcb->ghcb_usage) {
   3275		reason = GHCB_ERR_INVALID_USAGE;
   3276		goto vmgexit_err;
   3277	}
   3278
   3279	reason = GHCB_ERR_MISSING_INPUT;
   3280
   3281	if (!ghcb_sw_exit_code_is_valid(ghcb) ||
   3282	    !ghcb_sw_exit_info_1_is_valid(ghcb) ||
   3283	    !ghcb_sw_exit_info_2_is_valid(ghcb))
   3284		goto vmgexit_err;
   3285
   3286	switch (ghcb_get_sw_exit_code(ghcb)) {
   3287	case SVM_EXIT_READ_DR7:
   3288		break;
   3289	case SVM_EXIT_WRITE_DR7:
   3290		if (!ghcb_rax_is_valid(ghcb))
   3291			goto vmgexit_err;
   3292		break;
   3293	case SVM_EXIT_RDTSC:
   3294		break;
   3295	case SVM_EXIT_RDPMC:
   3296		if (!ghcb_rcx_is_valid(ghcb))
   3297			goto vmgexit_err;
   3298		break;
   3299	case SVM_EXIT_CPUID:
   3300		if (!ghcb_rax_is_valid(ghcb) ||
   3301		    !ghcb_rcx_is_valid(ghcb))
   3302			goto vmgexit_err;
   3303		if (ghcb_get_rax(ghcb) == 0xd)
   3304			if (!ghcb_xcr0_is_valid(ghcb))
   3305				goto vmgexit_err;
   3306		break;
   3307	case SVM_EXIT_INVD:
   3308		break;
   3309	case SVM_EXIT_IOIO:
   3310		if (ghcb_get_sw_exit_info_1(ghcb) & SVM_IOIO_STR_MASK) {
   3311			if (!ghcb_sw_scratch_is_valid(ghcb))
   3312				goto vmgexit_err;
   3313		} else {
   3314			if (!(ghcb_get_sw_exit_info_1(ghcb) & SVM_IOIO_TYPE_MASK))
   3315				if (!ghcb_rax_is_valid(ghcb))
   3316					goto vmgexit_err;
   3317		}
   3318		break;
   3319	case SVM_EXIT_MSR:
   3320		if (!ghcb_rcx_is_valid(ghcb))
   3321			goto vmgexit_err;
   3322		if (ghcb_get_sw_exit_info_1(ghcb)) {
   3323			if (!ghcb_rax_is_valid(ghcb) ||
   3324			    !ghcb_rdx_is_valid(ghcb))
   3325				goto vmgexit_err;
   3326		}
   3327		break;
   3328	case SVM_EXIT_VMMCALL:
   3329		CPC_DBG("SVM_EXIT_VMMCALL %llu", ghcb_get_rax(ghcb));
   3330		// if (!ghcb_rax_is_valid(ghcb) ||
   3331		//     !ghcb_cpl_is_valid(ghcb))
   3332		// 	goto vmgexit_err;
   3333		break;
   3334	case SVM_EXIT_RDTSCP:
   3335		break;
   3336	case SVM_EXIT_WBINVD:
   3337		break;
   3338	case SVM_EXIT_MONITOR:
   3339		if (!ghcb_rax_is_valid(ghcb) ||
   3340		    !ghcb_rcx_is_valid(ghcb) ||
   3341		    !ghcb_rdx_is_valid(ghcb))
   3342			goto vmgexit_err;
   3343		break;
   3344	case SVM_EXIT_MWAIT:
   3345		if (!ghcb_rax_is_valid(ghcb) ||
   3346		    !ghcb_rcx_is_valid(ghcb))
   3347			goto vmgexit_err;
   3348		break;
   3349	case SVM_VMGEXIT_MMIO_READ:
   3350	case SVM_VMGEXIT_MMIO_WRITE:
   3351		if (!ghcb_sw_scratch_is_valid(ghcb))
   3352			goto vmgexit_err;
   3353		break;
   3354	case SVM_VMGEXIT_AP_CREATION:
   3355		if (!ghcb_rax_is_valid(ghcb))
   3356			goto vmgexit_err;
   3357		break;
   3358	case SVM_VMGEXIT_NMI_COMPLETE:
   3359	case SVM_VMGEXIT_AP_HLT_LOOP:
   3360	case SVM_VMGEXIT_AP_JUMP_TABLE:
   3361	case SVM_VMGEXIT_UNSUPPORTED_EVENT:
   3362	case SVM_VMGEXIT_HV_FEATURES:
   3363	case SVM_VMGEXIT_PSC:
   3364	case SVM_VMGEXIT_GUEST_REQUEST:
   3365	case SVM_VMGEXIT_EXT_GUEST_REQUEST:
   3366		break;
   3367	default:
   3368		reason = GHCB_ERR_INVALID_EVENT;
   3369		goto vmgexit_err;
   3370	}
   3371
   3372	sev_es_sync_from_ghcb(svm, ghcb);
   3373
   3374	svm_unmap_ghcb(svm, &map);
   3375	return 0;
   3376
   3377vmgexit_err:
   3378	vcpu = &svm->vcpu;
   3379
   3380	if (reason == GHCB_ERR_INVALID_USAGE) {
   3381		vcpu_unimpl(vcpu, "vmgexit: ghcb usage %#x is not valid\n",
   3382			    ghcb->ghcb_usage);
   3383	} else if (reason == GHCB_ERR_INVALID_EVENT) {
   3384		vcpu_unimpl(vcpu, "vmgexit: exit code %#llx is not valid\n",
   3385			    *exit_code);
   3386	} else {
   3387		vcpu_unimpl(vcpu, "vmgexit: exit code %#llx input is not valid\n",
   3388			    *exit_code);
   3389		dump_ghcb(svm);
   3390	}
   3391
   3392	/* Clear the valid entries fields */
   3393	memset(ghcb->save.valid_bitmap, 0, sizeof(ghcb->save.valid_bitmap));
   3394
   3395	ghcb_set_sw_exit_info_1(ghcb, 2);
   3396	ghcb_set_sw_exit_info_2(ghcb, reason);
   3397
   3398	svm_unmap_ghcb(svm, &map);
   3399
   3400	/* Resume the guest to "return" the error code. */
   3401	return 1;
   3402}
   3403
   3404void sev_es_unmap_ghcb(struct vcpu_svm *svm)
   3405{
   3406	if (svm->sev_es.ghcb_sa_alloc_len >= 2)
   3407		trace_kvm_sev_es_unmap_ghcb(svm->sev_es.ghcb_sa,
   3408					    svm->sev_es.ghcb_sa_gpa,
   3409					    svm->sev_es.ghcb_sa_len,
   3410					    svm->sev_es.ghcb_sa_alloc_len,
   3411					    svm->sev_es.ghcb_sa_sync,
   3412					    svm->sev_es.ghcb_in_use,
   3413					    ((u8 *)svm->sev_es.ghcb_sa)[0],
   3414					    ((u8 *)svm->sev_es.ghcb_sa)[1]);
   3415	else
   3416		trace_kvm_sev_es_unmap_ghcb(svm->sev_es.ghcb_sa,
   3417					    svm->sev_es.ghcb_sa_gpa,
   3418					    svm->sev_es.ghcb_sa_len,
   3419					    svm->sev_es.ghcb_sa_alloc_len,
   3420					    svm->sev_es.ghcb_sa_sync,
   3421					    svm->sev_es.ghcb_in_use,
   3422					    0, 0);
   3423
   3424	/* Clear any indication that the vCPU is in a type of AP Reset Hold */
   3425	svm->sev_es.ap_reset_hold_type = AP_RESET_HOLD_NONE;
   3426
   3427	if (!svm->sev_es.ghcb_in_use)
   3428		return;
   3429
   3430	sev_es_sync_to_ghcb(svm);
   3431
   3432	svm->sev_es.ghcb_in_use = false;
   3433}
   3434
   3435void pre_sev_run(struct vcpu_svm *svm, int cpu)
   3436{
   3437	struct svm_cpu_data *sd = per_cpu(svm_data, cpu);
   3438	int asid = sev_get_asid(svm->vcpu.kvm);
   3439
   3440	/* Assign the asid allocated with this SEV guest */
   3441	svm->asid = asid;
   3442
   3443	/*
   3444	 * Flush guest TLB:
   3445	 *
   3446	 * 1) when different VMCB for the same ASID is to be run on the same host CPU.
   3447	 * 2) or this VMCB was executed on different host CPU in previous VMRUNs.
   3448	 */
   3449	if (sd->sev_vmcbs[asid] == svm->vmcb &&
   3450	    svm->vcpu.arch.last_vmentry_cpu == cpu)
   3451		return;
   3452
   3453	sd->sev_vmcbs[asid] = svm->vmcb;
   3454	svm->vmcb->control.tlb_ctl = TLB_CONTROL_FLUSH_ASID;
   3455	vmcb_mark_dirty(svm->vmcb, VMCB_ASID);
   3456}
   3457
   3458#define GHCB_SCRATCH_AREA_LIMIT		(16ULL * PAGE_SIZE)
   3459static int setup_vmgexit_scratch(struct vcpu_svm *svm, bool sync, u64 len)
   3460{
   3461	struct vmcb_control_area *control = &svm->vmcb->control;
   3462	u64 ghcb_scratch_beg, ghcb_scratch_end;
   3463	u64 scratch_gpa_beg, scratch_gpa_end;
   3464
   3465	scratch_gpa_beg = svm->sev_es.ghcb_sa_gpa;
   3466	if (!scratch_gpa_beg) {
   3467		pr_err("vmgexit: scratch gpa not provided\n");
   3468		goto e_scratch;
   3469	}
   3470
   3471	scratch_gpa_end = scratch_gpa_beg + len;
   3472	if (scratch_gpa_end < scratch_gpa_beg) {
   3473		pr_err("vmgexit: scratch length (%#llx) not valid for scratch address (%#llx)\n",
   3474		       len, scratch_gpa_beg);
   3475		goto e_scratch;
   3476	}
   3477
   3478	if ((scratch_gpa_beg & PAGE_MASK) == control->ghcb_gpa) {
   3479		/* Scratch area begins within GHCB */
   3480		ghcb_scratch_beg = control->ghcb_gpa +
   3481				   offsetof(struct ghcb, shared_buffer);
   3482		ghcb_scratch_end = control->ghcb_gpa +
   3483				   offsetof(struct ghcb, reserved_1);
   3484
   3485		/*
   3486		 * If the scratch area begins within the GHCB, it must be
   3487		 * completely contained in the GHCB shared buffer area.
   3488		 */
   3489		if (scratch_gpa_beg < ghcb_scratch_beg ||
   3490		    scratch_gpa_end > ghcb_scratch_end) {
   3491			pr_err("vmgexit: scratch area is outside of GHCB shared buffer area (%#llx - %#llx)\n",
   3492			       scratch_gpa_beg, scratch_gpa_end);
   3493			goto e_scratch;
   3494		}
   3495		svm->sev_es.ghcb_sa_contained = true;
   3496		svm->sev_es.ghcb_sa_offset = scratch_gpa_beg - ghcb_scratch_beg;
   3497	} else {
   3498		/*
   3499		 * The guest memory must be read into a kernel buffer, so
   3500		 * limit the size
   3501		 */
   3502		if (len > GHCB_SCRATCH_AREA_LIMIT) {
   3503			pr_err("vmgexit: scratch area exceeds KVM limits (%#llx requested, %#llx limit)\n",
   3504			       len, GHCB_SCRATCH_AREA_LIMIT);
   3505			goto e_scratch;
   3506		}
   3507		svm->sev_es.ghcb_sa_contained = false;
   3508	}
   3509
   3510	if (svm->sev_es.ghcb_sa_alloc_len < len) {
   3511		void *scratch_va = kvzalloc(len, GFP_KERNEL_ACCOUNT);
   3512
   3513		if (!scratch_va)
   3514			return -ENOMEM;
   3515
   3516		/*
   3517		 * Free the old scratch area and switch to using newly
   3518		 * allocated.
   3519		 */
   3520		kvfree(svm->sev_es.ghcb_sa);
   3521
   3522		svm->sev_es.ghcb_sa_alloc_len = len;
   3523		svm->sev_es.ghcb_sa = scratch_va;
   3524	}
   3525
   3526	if (kvm_read_guest(svm->vcpu.kvm, scratch_gpa_beg, svm->sev_es.ghcb_sa, len)) {
   3527		/* Unable to copy scratch area from guest */
   3528		pr_err("vmgexit: kvm_read_guest for scratch area failed\n");
   3529		return -EFAULT;
   3530	}
   3531
   3532	/*
   3533	 * The operation will dictate whether the buffer needs to be synced
   3534	 * before running the vCPU next time (i.e. a read was requested so
   3535	 * the data must be written back to the guest memory).
   3536	 */
   3537	svm->sev_es.ghcb_sa_sync = sync;
   3538	svm->sev_es.ghcb_sa_len = len;
   3539
   3540	return 0;
   3541
   3542e_scratch:
   3543	svm_set_ghcb_sw_exit_info_1(&svm->vcpu, 2);
   3544	svm_set_ghcb_sw_exit_info_2(&svm->vcpu, GHCB_ERR_INVALID_SCRATCH_AREA);
   3545
   3546	return 1;
   3547}
   3548
   3549static void set_ghcb_msr_bits(struct vcpu_svm *svm, u64 value, u64 mask,
   3550			      unsigned int pos)
   3551{
   3552	svm->vmcb->control.ghcb_gpa &= ~(mask << pos);
   3553	svm->vmcb->control.ghcb_gpa |= (value & mask) << pos;
   3554}
   3555
   3556static u64 get_ghcb_msr_bits(struct vcpu_svm *svm, u64 mask, unsigned int pos)
   3557{
   3558	return (svm->vmcb->control.ghcb_gpa >> pos) & mask;
   3559}
   3560
   3561static void set_ghcb_msr(struct vcpu_svm *svm, u64 value)
   3562{
   3563	svm->vmcb->control.ghcb_gpa = value;
   3564}
   3565
   3566static int snp_rmptable_psmash(struct kvm *kvm, kvm_pfn_t pfn)
   3567{
   3568	pfn = pfn & ~(KVM_PAGES_PER_HPAGE(PG_LEVEL_2M) - 1);
   3569
   3570	return psmash(pfn);
   3571}
   3572
   3573static int snp_make_page_shared(struct kvm *kvm, gpa_t gpa, kvm_pfn_t pfn, int level)
   3574{
   3575	int rc, rmp_level;
   3576
   3577	rc = snp_lookup_rmpentry(pfn, &rmp_level);
   3578	if (rc < 0)
   3579		return -EINVAL;
   3580
   3581	/* If page is not assigned then do nothing */
   3582	if (!rc)
   3583		return 0;
   3584
   3585	/*
   3586	 * Is the page part of an existing 2MB RMP entry ? Split the 2MB into
   3587	 * multiple of 4K-page before making the memory shared.
   3588	 */
   3589	if (level == PG_LEVEL_4K && rmp_level == PG_LEVEL_2M) {
   3590		rc = snp_rmptable_psmash(kvm, pfn);
   3591		if (rc)
   3592			return rc;
   3593	}
   3594
   3595	return rmp_make_shared(pfn, level);
   3596}
   3597
   3598static int snp_check_and_build_npt(struct kvm_vcpu *vcpu, gpa_t gpa, int level)
   3599{
   3600	struct kvm *kvm = vcpu->kvm;
   3601	int rc, npt_level;
   3602	kvm_pfn_t pfn;
   3603
   3604	/*
   3605	 * Get the pfn and level for the gpa from the nested page table.
   3606	 *
   3607	 * If the tdp walk fails, then its safe to say that there is no
   3608	 * valid mapping for this gpa. Create a fault to build the map.
   3609	 */
   3610	write_lock(&kvm->mmu_lock);
   3611	rc = kvm_mmu_get_tdp_walk(vcpu, gpa, &pfn, &npt_level);
   3612	write_unlock(&kvm->mmu_lock);
   3613	if (!rc) {
   3614		pfn = kvm_mmu_map_tdp_page(vcpu, gpa, PFERR_USER_MASK, level);
   3615		if (is_error_noslot_pfn(pfn))
   3616			return -EINVAL;
   3617	}
   3618
   3619	return 0;
   3620}
   3621
   3622static int snp_gpa_to_hva(struct kvm *kvm, gpa_t gpa, hva_t *hva)
   3623{
   3624	struct kvm_memory_slot *slot;
   3625	gfn_t gfn = gpa_to_gfn(gpa);
   3626	int idx;
   3627
   3628	idx = srcu_read_lock(&kvm->srcu);
   3629	slot = gfn_to_memslot(kvm, gfn);
   3630	if (!slot) {
   3631		srcu_read_unlock(&kvm->srcu, idx);
   3632		return -EINVAL;
   3633	}
   3634
   3635	/*
   3636	 * Note, using the __gfn_to_hva_memslot() is not solely for performance,
   3637	 * it's also necessary to avoid the "writable" check in __gfn_to_hva_many(),
   3638	 * which will always fail on read-only memslots due to gfn_to_hva() assuming
   3639	 * writes.
   3640	 */
   3641	*hva = __gfn_to_hva_memslot(slot, gfn);
   3642	srcu_read_unlock(&kvm->srcu, idx);
   3643
   3644	return 0;
   3645}
   3646
   3647static int __snp_handle_page_state_change(struct kvm_vcpu *vcpu, enum psc_op op, gpa_t gpa,
   3648					  int level)
   3649{
   3650	struct kvm_sev_info *sev = &to_kvm_svm(vcpu->kvm)->sev_info;
   3651	struct kvm *kvm = vcpu->kvm;
   3652	int rc, npt_level;
   3653	kvm_pfn_t pfn;
   3654	gpa_t gpa_end;
   3655
   3656	gpa_end = gpa + page_level_size(level);
   3657
   3658	while (gpa < gpa_end) {
   3659		/*
   3660		 * If the gpa is not present in the NPT then build the NPT.
   3661		 */
   3662		rc = snp_check_and_build_npt(vcpu, gpa, level);
   3663		if (rc)
   3664			return PSC_UNDEF_ERR;
   3665
   3666		if (op == SNP_PAGE_STATE_PRIVATE) {
   3667			hva_t hva;
   3668
   3669			if (snp_gpa_to_hva(kvm, gpa, &hva))
   3670				return PSC_UNDEF_ERR;
   3671
   3672			/*
   3673			 * Verify that the hva range is registered. This enforcement is
   3674			 * required to avoid the cases where a page is marked private
   3675			 * in the RMP table but never gets cleanup during the VM
   3676			 * termination path.
   3677			 */
   3678			mutex_lock(&kvm->lock);
   3679			rc = is_hva_registered(kvm, hva, page_level_size(level));
   3680			mutex_unlock(&kvm->lock);
   3681			if (!rc)
   3682				return PSC_UNDEF_ERR;
   3683
   3684			/*
   3685			 * Mark the userspace range unmerable before adding the pages
   3686			 * in the RMP table.
   3687			 */
   3688			mmap_write_lock(kvm->mm);
   3689			rc = snp_mark_unmergable(kvm, hva, page_level_size(level));
   3690			mmap_write_unlock(kvm->mm);
   3691			if (rc)
   3692				return PSC_UNDEF_ERR;
   3693		}
   3694
   3695		spin_lock(&sev->psc_lock);
   3696
   3697		write_lock(&kvm->mmu_lock);
   3698
   3699		rc = kvm_mmu_get_tdp_walk(vcpu, gpa, &pfn, &npt_level);
   3700		if (!rc) {
   3701			/*
   3702			 * This may happen if another vCPU unmapped the page
   3703			 * before we acquire the lock. Retry the PSC.
   3704			 */
   3705			write_unlock(&kvm->mmu_lock);
   3706			spin_unlock(&sev->psc_lock);
   3707			return 0;
   3708		}
   3709
   3710		/*
   3711		 * Adjust the level so that we don't go higher than the backing
   3712		 * page level.
   3713		 */
   3714		level = min_t(size_t, level, npt_level);
   3715
   3716		trace_kvm_snp_psc(vcpu->vcpu_id, pfn, gpa, op, level);
   3717
   3718		switch (op) {
   3719		case SNP_PAGE_STATE_SHARED:
   3720			rc = snp_make_page_shared(kvm, gpa, pfn, level);
   3721			break;
   3722		case SNP_PAGE_STATE_PRIVATE:
   3723			rc = rmp_make_private(pfn, gpa, level, sev->asid, false);
   3724			break;
   3725		default:
   3726			rc = PSC_INVALID_ENTRY;
   3727			break;
   3728		}
   3729
   3730		write_unlock(&kvm->mmu_lock);
   3731
   3732		spin_unlock(&sev->psc_lock);
   3733
   3734		if (rc) {
   3735			pr_err_ratelimited("Error op %d gpa %llx pfn %llx level %d rc %d\n",
   3736					   op, gpa, pfn, level, rc);
   3737			return rc;
   3738		}
   3739
   3740		gpa = gpa + page_level_size(level);
   3741	}
   3742
   3743	return 0;
   3744}
   3745
   3746static inline unsigned long map_to_psc_vmgexit_code(int rc)
   3747{
   3748	switch (rc) {
   3749	case PSC_INVALID_HDR:
   3750		return ((1ul << 32) | 1);
   3751	case PSC_INVALID_ENTRY:
   3752		return ((1ul << 32) | 2);
   3753	case RMPUPDATE_FAIL_OVERLAP:
   3754		return ((3ul << 32) | 2);
   3755	default: return (4ul << 32);
   3756	}
   3757}
   3758
   3759static unsigned long snp_handle_page_state_change(struct vcpu_svm *svm)
   3760{
   3761	struct kvm_vcpu *vcpu = &svm->vcpu;
   3762	int level, op, rc = PSC_UNDEF_ERR;
   3763	struct snp_psc_desc *info;
   3764	struct psc_entry *entry;
   3765	u16 cur, end;
   3766	gpa_t gpa;
   3767
   3768	if (!sev_snp_guest(vcpu->kvm))
   3769		return PSC_INVALID_HDR;
   3770
   3771	if (setup_vmgexit_scratch(svm, true, sizeof(*info))) {
   3772		pr_err("vmgexit: scratch area is not setup.\n");
   3773		return PSC_INVALID_HDR;
   3774	}
   3775
   3776	info = (struct snp_psc_desc *)svm->sev_es.ghcb_sa;
   3777	cur = info->hdr.cur_entry;
   3778	end = info->hdr.end_entry;
   3779
   3780	if (cur >= VMGEXIT_PSC_MAX_ENTRY ||
   3781	    end >= VMGEXIT_PSC_MAX_ENTRY || cur > end)
   3782		return PSC_INVALID_ENTRY;
   3783
   3784	for (; cur <= end; cur++) {
   3785		entry = &info->entries[cur];
   3786		gpa = gfn_to_gpa(entry->gfn);
   3787		level = RMP_TO_X86_PG_LEVEL(entry->pagesize);
   3788		op = entry->operation;
   3789
   3790		if (!IS_ALIGNED(gpa, page_level_size(level))) {
   3791			rc = PSC_INVALID_ENTRY;
   3792			goto out;
   3793		}
   3794
   3795		rc = __snp_handle_page_state_change(vcpu, op, gpa, level);
   3796		if (rc)
   3797			goto out;
   3798	}
   3799
   3800out:
   3801	info->hdr.cur_entry = cur;
   3802	return rc ? map_to_psc_vmgexit_code(rc) : 0;
   3803}
   3804
   3805static unsigned long snp_setup_guest_buf(struct vcpu_svm *svm,
   3806					 struct sev_data_snp_guest_request *data,
   3807					 gpa_t req_gpa, gpa_t resp_gpa)
   3808{
   3809	struct kvm_vcpu *vcpu = &svm->vcpu;
   3810	struct kvm *kvm = vcpu->kvm;
   3811	kvm_pfn_t req_pfn, resp_pfn;
   3812	struct kvm_sev_info *sev;
   3813
   3814	sev = &to_kvm_svm(kvm)->sev_info;
   3815
   3816	if (!IS_ALIGNED(req_gpa, PAGE_SIZE) || !IS_ALIGNED(resp_gpa, PAGE_SIZE))
   3817		return SEV_RET_INVALID_PARAM;
   3818
   3819	req_pfn = gfn_to_pfn(kvm, gpa_to_gfn(req_gpa));
   3820	if (is_error_noslot_pfn(req_pfn))
   3821		return SEV_RET_INVALID_ADDRESS;
   3822
   3823	resp_pfn = gfn_to_pfn(kvm, gpa_to_gfn(resp_gpa));
   3824	if (is_error_noslot_pfn(resp_pfn))
   3825		return SEV_RET_INVALID_ADDRESS;
   3826
   3827	if (rmp_make_private(resp_pfn, 0, PG_LEVEL_4K, 0, true))
   3828		return SEV_RET_INVALID_ADDRESS;
   3829
   3830	data->gctx_paddr = __psp_pa(sev->snp_context);
   3831	data->req_paddr = __sme_set(req_pfn << PAGE_SHIFT);
   3832	data->res_paddr = __sme_set(resp_pfn << PAGE_SHIFT);
   3833
   3834	return 0;
   3835}
   3836
   3837static void snp_cleanup_guest_buf(struct sev_data_snp_guest_request *data, unsigned long *rc)
   3838{
   3839	u64 pfn = __sme_clr(data->res_paddr) >> PAGE_SHIFT;
   3840	int ret;
   3841
   3842	ret = snp_page_reclaim(pfn);
   3843	if (ret)
   3844		*rc = SEV_RET_INVALID_ADDRESS;
   3845
   3846	ret = rmp_make_shared(pfn, PG_LEVEL_4K);
   3847	if (ret)
   3848		*rc = SEV_RET_INVALID_ADDRESS;
   3849}
   3850
   3851static void snp_handle_guest_request(struct vcpu_svm *svm, gpa_t req_gpa, gpa_t resp_gpa)
   3852{
   3853	struct sev_data_snp_guest_request data = {0};
   3854	struct kvm_vcpu *vcpu = &svm->vcpu;
   3855	struct kvm *kvm = vcpu->kvm;
   3856	struct kvm_sev_info *sev;
   3857	unsigned long rc;
   3858	int err;
   3859
   3860	if (!sev_snp_guest(vcpu->kvm)) {
   3861		rc = SEV_RET_INVALID_GUEST;
   3862		goto e_fail;
   3863	}
   3864
   3865	sev = &to_kvm_svm(kvm)->sev_info;
   3866
   3867	mutex_lock(&sev->guest_req_lock);
   3868
   3869	rc = snp_setup_guest_buf(svm, &data, req_gpa, resp_gpa);
   3870	if (rc)
   3871		goto unlock;
   3872
   3873	rc = sev_issue_cmd(kvm, SEV_CMD_SNP_GUEST_REQUEST, &data, &err);
   3874	if (rc)
   3875		/* use the firmware error code */
   3876		rc = err;
   3877
   3878	snp_cleanup_guest_buf(&data, &rc);
   3879
   3880unlock:
   3881	mutex_unlock(&sev->guest_req_lock);
   3882
   3883e_fail:
   3884	svm_set_ghcb_sw_exit_info_2(vcpu, rc);
   3885}
   3886
   3887static void snp_handle_ext_guest_request(struct vcpu_svm *svm, gpa_t req_gpa, gpa_t resp_gpa)
   3888{
   3889	struct sev_data_snp_guest_request req = {0};
   3890	struct kvm_vcpu *vcpu = &svm->vcpu;
   3891	struct kvm *kvm = vcpu->kvm;
   3892	unsigned long data_npages;
   3893	struct kvm_sev_info *sev;
   3894	unsigned long rc, err;
   3895	u64 data_gpa;
   3896
   3897	if (!sev_snp_guest(vcpu->kvm)) {
   3898		rc = SEV_RET_INVALID_GUEST;
   3899		goto e_fail;
   3900	}
   3901
   3902	sev = &to_kvm_svm(kvm)->sev_info;
   3903
   3904	data_gpa = vcpu->arch.regs[VCPU_REGS_RAX];
   3905	data_npages = vcpu->arch.regs[VCPU_REGS_RBX];
   3906
   3907	if (!IS_ALIGNED(data_gpa, PAGE_SIZE)) {
   3908		rc = SEV_RET_INVALID_ADDRESS;
   3909		goto e_fail;
   3910	}
   3911
   3912	/* Verify that requested blob will fit in certificate buffer */
   3913	if ((data_npages << PAGE_SHIFT) > SEV_FW_BLOB_MAX_SIZE) {
   3914		rc = SEV_RET_INVALID_PARAM;
   3915		goto e_fail;
   3916	}
   3917
   3918	mutex_lock(&sev->guest_req_lock);
   3919
   3920	rc = snp_setup_guest_buf(svm, &req, req_gpa, resp_gpa);
   3921	if (rc)
   3922		goto unlock;
   3923
   3924	rc = snp_guest_ext_guest_request(&req, (unsigned long)sev->snp_certs_data,
   3925					 &data_npages, &err);
   3926	if (rc) {
   3927		/*
   3928		 * If buffer length is small then return the expected
   3929		 * length in rbx.
   3930		 */
   3931		if (err == SNP_GUEST_REQ_INVALID_LEN)
   3932			vcpu->arch.regs[VCPU_REGS_RBX] = data_npages;
   3933
   3934		/* pass the firmware error code */
   3935		rc = err;
   3936		goto cleanup;
   3937	}
   3938
   3939	/* Copy the certificate blob in the guest memory */
   3940	if (data_npages &&
   3941	    kvm_write_guest(kvm, data_gpa, sev->snp_certs_data, data_npages << PAGE_SHIFT))
   3942		rc = SEV_RET_INVALID_ADDRESS;
   3943
   3944cleanup:
   3945	snp_cleanup_guest_buf(&req, &rc);
   3946
   3947unlock:
   3948	mutex_unlock(&sev->guest_req_lock);
   3949
   3950e_fail:
   3951	svm_set_ghcb_sw_exit_info_2(vcpu, rc);
   3952}
   3953
   3954static int __sev_snp_update_protected_guest_state(struct kvm_vcpu *vcpu)
   3955{
   3956	struct vcpu_svm *svm = to_svm(vcpu);
   3957	kvm_pfn_t pfn;
   3958	hpa_t cur_pa;
   3959
   3960	WARN_ON(!mutex_is_locked(&svm->sev_es.snp_vmsa_mutex));
   3961
   3962	/* Save off the current VMSA PA for later checks */
   3963	cur_pa = svm->sev_es.vmsa_pa;
   3964
   3965	/* Mark the vCPU as offline and not runnable */
   3966	vcpu->arch.pv.pv_unhalted = false;
   3967	vcpu->arch.mp_state = KVM_MP_STATE_STOPPED;
   3968
   3969	/* Clear use of the VMSA */
   3970	svm->sev_es.vmsa_pa = INVALID_PAGE;
   3971	svm->vmcb->control.vmsa_pa = INVALID_PAGE;
   3972
   3973	if (cur_pa != __pa(svm->sev_es.vmsa) && VALID_PAGE(cur_pa)) {
   3974		/*
   3975		 * The svm->sev_es.vmsa_pa field holds the hypervisor physical
   3976		 * address of the about to be replaced VMSA which will no longer
   3977		 * be used or referenced, so un-pin it.
   3978		 */
   3979		kvm_release_pfn_dirty(__phys_to_pfn(cur_pa));
   3980	}
   3981
   3982	if (VALID_PAGE(svm->sev_es.snp_vmsa_gpa)) {
   3983		/*
   3984		 * The VMSA is referenced by the hypervisor physical address,
   3985		 * so retrieve the PFN and pin it.
   3986		 */
   3987		pfn = gfn_to_pfn(vcpu->kvm, gpa_to_gfn(svm->sev_es.snp_vmsa_gpa));
   3988		if (is_error_pfn(pfn))
   3989			return -EINVAL;
   3990
   3991		/* Use the new VMSA */
   3992		svm->sev_es.vmsa_pa = pfn_to_hpa(pfn);
   3993		svm->vmcb->control.vmsa_pa = svm->sev_es.vmsa_pa;
   3994
   3995		/* Mark the vCPU as runnable */
   3996		vcpu->arch.pv.pv_unhalted = false;
   3997		vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
   3998
   3999		svm->sev_es.snp_vmsa_gpa = INVALID_PAGE;
   4000	}
   4001
   4002	/*
   4003	 * When replacing the VMSA during SEV-SNP AP creation,
   4004	 * mark the VMCB dirty so that full state is always reloaded.
   4005	 */
   4006	vmcb_mark_all_dirty(svm->vmcb);
   4007
   4008	return 0;
   4009}
   4010
   4011/*
   4012 * Invoked as part of svm_vcpu_reset() processing of an init event.
   4013 */
   4014void sev_snp_init_protected_guest_state(struct kvm_vcpu *vcpu)
   4015{
   4016	struct vcpu_svm *svm = to_svm(vcpu);
   4017	int ret;
   4018
   4019	if (!sev_snp_guest(vcpu->kvm))
   4020		return;
   4021
   4022	mutex_lock(&svm->sev_es.snp_vmsa_mutex);
   4023
   4024	if (!svm->sev_es.snp_ap_create)
   4025		goto unlock;
   4026
   4027	svm->sev_es.snp_ap_create = false;
   4028
   4029	ret = __sev_snp_update_protected_guest_state(vcpu);
   4030	if (ret)
   4031		vcpu_unimpl(vcpu, "snp: AP state update on init failed\n");
   4032
   4033unlock:
   4034	mutex_unlock(&svm->sev_es.snp_vmsa_mutex);
   4035}
   4036
   4037static int sev_snp_ap_creation(struct vcpu_svm *svm)
   4038{
   4039	struct kvm_sev_info *sev = &to_kvm_svm(svm->vcpu.kvm)->sev_info;
   4040	struct kvm_vcpu *vcpu = &svm->vcpu;
   4041	struct kvm_vcpu *target_vcpu;
   4042	struct vcpu_svm *target_svm;
   4043	unsigned int request;
   4044	unsigned int apic_id;
   4045	bool kick;
   4046	int ret;
   4047
   4048	request = lower_32_bits(svm->vmcb->control.exit_info_1);
   4049	apic_id = upper_32_bits(svm->vmcb->control.exit_info_1);
   4050
   4051	/* Validate the APIC ID */
   4052	target_vcpu = kvm_get_vcpu_by_id(vcpu->kvm, apic_id);
   4053	if (!target_vcpu) {
   4054		vcpu_unimpl(vcpu, "vmgexit: invalid AP APIC ID [%#x] from guest\n",
   4055			    apic_id);
   4056		return -EINVAL;
   4057	}
   4058
   4059	ret = 0;
   4060
   4061	target_svm = to_svm(target_vcpu);
   4062
   4063	/*
   4064	 * We have a valid target vCPU, so the vCPU will be kicked unless the
   4065	 * request is for CREATE_ON_INIT. For any errors at this stage, the
   4066	 * kick will place the vCPU in an non-runnable state.
   4067	 */
   4068	kick = true;
   4069
   4070	mutex_lock(&target_svm->sev_es.snp_vmsa_mutex);
   4071
   4072	target_svm->sev_es.snp_vmsa_gpa = INVALID_PAGE;
   4073	target_svm->sev_es.snp_ap_create = true;
   4074
   4075	/* Interrupt injection mode shouldn't change for AP creation */
   4076	if (request < SVM_VMGEXIT_AP_DESTROY) {
   4077		u64 sev_features;
   4078
   4079		sev_features = vcpu->arch.regs[VCPU_REGS_RAX];
   4080		sev_features ^= sev->sev_features;
   4081		if (sev_features & SVM_SEV_FEAT_INT_INJ_MODES) {
   4082			vcpu_unimpl(vcpu, "vmgexit: invalid AP injection mode [%#lx] from guest\n",
   4083				    vcpu->arch.regs[VCPU_REGS_RAX]);
   4084			ret = -EINVAL;
   4085			goto out;
   4086		}
   4087	}
   4088
   4089	switch (request) {
   4090	case SVM_VMGEXIT_AP_CREATE_ON_INIT:
   4091		kick = false;
   4092		fallthrough;
   4093	case SVM_VMGEXIT_AP_CREATE:
   4094		if (!page_address_valid(vcpu, svm->vmcb->control.exit_info_2)) {
   4095			vcpu_unimpl(vcpu, "vmgexit: invalid AP VMSA address [%#llx] from guest\n",
   4096				    svm->vmcb->control.exit_info_2);
   4097			ret = -EINVAL;
   4098			goto out;
   4099		}
   4100
   4101		target_svm->sev_es.snp_vmsa_gpa = svm->vmcb->control.exit_info_2;
   4102		break;
   4103	case SVM_VMGEXIT_AP_DESTROY:
   4104		break;
   4105	default:
   4106		vcpu_unimpl(vcpu, "vmgexit: invalid AP creation request [%#x] from guest\n",
   4107			    request);
   4108		ret = -EINVAL;
   4109		break;
   4110	}
   4111
   4112out:
   4113	if (kick) {
   4114		if (target_vcpu->arch.mp_state == KVM_MP_STATE_UNINITIALIZED)
   4115			target_vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
   4116
   4117		kvm_make_request(KVM_REQ_UPDATE_PROTECTED_GUEST_STATE, target_vcpu);
   4118		kvm_vcpu_kick(target_vcpu);
   4119	}
   4120
   4121	mutex_unlock(&target_svm->sev_es.snp_vmsa_mutex);
   4122
   4123	return ret;
   4124}
   4125
   4126static int sev_handle_vmgexit_msr_protocol(struct vcpu_svm *svm)
   4127{
   4128	struct vmcb_control_area *control = &svm->vmcb->control;
   4129	struct kvm_vcpu *vcpu = &svm->vcpu;
   4130	u64 ghcb_info;
   4131	int ret = 1;
   4132
   4133	ghcb_info = control->ghcb_gpa & GHCB_MSR_INFO_MASK;
   4134
   4135	trace_kvm_vmgexit_msr_protocol_enter(svm->vcpu.vcpu_id,
   4136					     control->ghcb_gpa);
   4137
   4138	switch (ghcb_info) {
   4139	case GHCB_MSR_SEV_INFO_REQ:
   4140		set_ghcb_msr(svm, GHCB_MSR_SEV_INFO(GHCB_VERSION_MAX,
   4141						    GHCB_VERSION_MIN,
   4142						    sev_enc_bit));
   4143		break;
   4144	case GHCB_MSR_CPUID_REQ: {
   4145		u64 cpuid_fn, cpuid_reg, cpuid_value;
   4146
   4147		cpuid_fn = get_ghcb_msr_bits(svm,
   4148					     GHCB_MSR_CPUID_FUNC_MASK,
   4149					     GHCB_MSR_CPUID_FUNC_POS);
   4150
   4151		/* Initialize the registers needed by the CPUID intercept */
   4152		vcpu->arch.regs[VCPU_REGS_RAX] = cpuid_fn;
   4153		vcpu->arch.regs[VCPU_REGS_RCX] = 0;
   4154
   4155		ret = svm_invoke_exit_handler(vcpu, SVM_EXIT_CPUID);
   4156		if (!ret) {
   4157			/* Error, keep GHCB MSR value as-is */
   4158			break;
   4159		}
   4160
   4161		cpuid_reg = get_ghcb_msr_bits(svm,
   4162					      GHCB_MSR_CPUID_REG_MASK,
   4163					      GHCB_MSR_CPUID_REG_POS);
   4164		if (cpuid_reg == 0)
   4165			cpuid_value = vcpu->arch.regs[VCPU_REGS_RAX];
   4166		else if (cpuid_reg == 1)
   4167			cpuid_value = vcpu->arch.regs[VCPU_REGS_RBX];
   4168		else if (cpuid_reg == 2)
   4169			cpuid_value = vcpu->arch.regs[VCPU_REGS_RCX];
   4170		else
   4171			cpuid_value = vcpu->arch.regs[VCPU_REGS_RDX];
   4172
   4173		set_ghcb_msr_bits(svm, cpuid_value,
   4174				  GHCB_MSR_CPUID_VALUE_MASK,
   4175				  GHCB_MSR_CPUID_VALUE_POS);
   4176
   4177		set_ghcb_msr_bits(svm, GHCB_MSR_CPUID_RESP,
   4178				  GHCB_MSR_INFO_MASK,
   4179				  GHCB_MSR_INFO_POS);
   4180		break;
   4181	}
   4182	case GHCB_MSR_AP_RESET_HOLD_REQ:
   4183		svm->sev_es.ap_reset_hold_type = AP_RESET_HOLD_MSR_PROTO;
   4184		ret = kvm_emulate_ap_reset_hold(&svm->vcpu);
   4185
   4186		/*
   4187		 * Preset the result to a non-SIPI return and then only set
   4188		 * the result to non-zero when delivering a SIPI.
   4189		 */
   4190		set_ghcb_msr_bits(svm, 0,
   4191				  GHCB_MSR_AP_RESET_HOLD_RESULT_MASK,
   4192				  GHCB_MSR_AP_RESET_HOLD_RESULT_POS);
   4193
   4194		set_ghcb_msr_bits(svm, GHCB_MSR_AP_RESET_HOLD_RESP,
   4195				  GHCB_MSR_INFO_MASK,
   4196				  GHCB_MSR_INFO_POS);
   4197		break;
   4198	case GHCB_MSR_HV_FT_REQ: {
   4199		set_ghcb_msr_bits(svm, GHCB_HV_FT_SUPPORTED,
   4200				  GHCB_MSR_HV_FT_MASK, GHCB_MSR_HV_FT_POS);
   4201		set_ghcb_msr_bits(svm, GHCB_MSR_HV_FT_RESP,
   4202				  GHCB_MSR_INFO_MASK, GHCB_MSR_INFO_POS);
   4203		break;
   4204	}
   4205	case GHCB_MSR_PREF_GPA_REQ: {
   4206		set_ghcb_msr_bits(svm, GHCB_MSR_PREF_GPA_NONE, GHCB_MSR_GPA_VALUE_MASK,
   4207				  GHCB_MSR_GPA_VALUE_POS);
   4208		set_ghcb_msr_bits(svm, GHCB_MSR_PREF_GPA_RESP, GHCB_MSR_INFO_MASK,
   4209				  GHCB_MSR_INFO_POS);
   4210		break;
   4211	}
   4212	case GHCB_MSR_REG_GPA_REQ: {
   4213		u64 gfn;
   4214
   4215		gfn = get_ghcb_msr_bits(svm, GHCB_MSR_GPA_VALUE_MASK,
   4216					GHCB_MSR_GPA_VALUE_POS);
   4217
   4218		svm->sev_es.ghcb_registered_gpa = gfn_to_gpa(gfn);
   4219
   4220		set_ghcb_msr_bits(svm, gfn, GHCB_MSR_GPA_VALUE_MASK,
   4221				  GHCB_MSR_GPA_VALUE_POS);
   4222		set_ghcb_msr_bits(svm, GHCB_MSR_REG_GPA_RESP, GHCB_MSR_INFO_MASK,
   4223				  GHCB_MSR_INFO_POS);
   4224		break;
   4225	}
   4226	case GHCB_MSR_PSC_REQ: {
   4227		gfn_t gfn;
   4228		int ret;
   4229		enum psc_op op;
   4230
   4231		gfn = get_ghcb_msr_bits(svm, GHCB_MSR_PSC_GFN_MASK, GHCB_MSR_PSC_GFN_POS);
   4232		op = get_ghcb_msr_bits(svm, GHCB_MSR_PSC_OP_MASK, GHCB_MSR_PSC_OP_POS);
   4233
   4234		ret = __snp_handle_page_state_change(vcpu, op, gfn_to_gpa(gfn), PG_LEVEL_4K);
   4235
   4236		if (ret)
   4237			set_ghcb_msr_bits(svm, GHCB_MSR_PSC_ERROR,
   4238					  GHCB_MSR_PSC_ERROR_MASK, GHCB_MSR_PSC_ERROR_POS);
   4239		else
   4240			set_ghcb_msr_bits(svm, 0,
   4241					  GHCB_MSR_PSC_ERROR_MASK, GHCB_MSR_PSC_ERROR_POS);
   4242
   4243		set_ghcb_msr_bits(svm, 0, GHCB_MSR_PSC_RSVD_MASK, GHCB_MSR_PSC_RSVD_POS);
   4244		set_ghcb_msr_bits(svm, GHCB_MSR_PSC_RESP, GHCB_MSR_INFO_MASK, GHCB_MSR_INFO_POS);
   4245		break;
   4246	}
   4247	case GHCB_MSR_TERM_REQ: {
   4248		u64 reason_set, reason_code;
   4249
   4250		reason_set = get_ghcb_msr_bits(svm,
   4251					       GHCB_MSR_TERM_REASON_SET_MASK,
   4252					       GHCB_MSR_TERM_REASON_SET_POS);
   4253		reason_code = get_ghcb_msr_bits(svm,
   4254						GHCB_MSR_TERM_REASON_MASK,
   4255						GHCB_MSR_TERM_REASON_POS);
   4256		pr_info("SEV-ES guest requested termination: %#llx:%#llx\n",
   4257			reason_set, reason_code);
   4258
   4259		vcpu->run->exit_reason = KVM_EXIT_SYSTEM_EVENT;
   4260		vcpu->run->system_event.type = KVM_SYSTEM_EVENT_SEV_TERM;
   4261		vcpu->run->system_event.ndata = 1;
   4262		vcpu->run->system_event.data[0] = control->ghcb_gpa;
   4263
   4264		return 0;
   4265	}
   4266	default:
   4267		/* Error, keep GHCB MSR value as-is */
   4268		break;
   4269	}
   4270
   4271	trace_kvm_vmgexit_msr_protocol_exit(svm->vcpu.vcpu_id,
   4272					    control->ghcb_gpa, ret);
   4273
   4274	return ret;
   4275}
   4276
   4277int sev_handle_vmgexit(struct kvm_vcpu *vcpu)
   4278{
   4279	struct vcpu_svm *svm = to_svm(vcpu);
   4280	struct vmcb_control_area *control = &svm->vmcb->control;
   4281	u64 ghcb_gpa, exit_code;
   4282	int ret;
   4283
   4284	/* Validate the GHCB */
   4285	ghcb_gpa = control->ghcb_gpa;
   4286	if (ghcb_gpa & GHCB_MSR_INFO_MASK)
   4287		return sev_handle_vmgexit_msr_protocol(svm);
   4288
   4289	if (!ghcb_gpa) {
   4290		vcpu_unimpl(vcpu, "vmgexit: GHCB gpa is not set\n");
   4291
   4292		/* Without a GHCB, just return right back to the guest */
   4293		return 1;
   4294	}
   4295
   4296	/* SEV-SNP guest requires that the GHCB GPA must be registered */
   4297	if (sev_snp_guest(svm->vcpu.kvm) && !ghcb_gpa_is_registered(svm, ghcb_gpa)) {
   4298		vcpu_unimpl(&svm->vcpu, "vmgexit: GHCB GPA [%#llx] is not registered.\n", ghcb_gpa);
   4299		return -EINVAL;
   4300	}
   4301
   4302	ret = sev_es_validate_vmgexit(svm, &exit_code);
   4303	if (ret)
   4304		return ret;
   4305
   4306	svm->sev_es.ghcb_in_use = true;
   4307
   4308	svm_set_ghcb_sw_exit_info_1(vcpu, 0);
   4309	svm_set_ghcb_sw_exit_info_2(vcpu, 0);
   4310
   4311	switch (exit_code) {
   4312	case SVM_VMGEXIT_MMIO_READ:
   4313		ret = setup_vmgexit_scratch(svm, true, control->exit_info_2);
   4314		if (ret)
   4315			break;
   4316
   4317		ret = kvm_sev_es_mmio_read(vcpu,
   4318					   control->exit_info_1,
   4319					   control->exit_info_2,
   4320					   svm->sev_es.ghcb_sa);
   4321		break;
   4322	case SVM_VMGEXIT_MMIO_WRITE:
   4323		ret = setup_vmgexit_scratch(svm, false, control->exit_info_2);
   4324		if (ret)
   4325			break;
   4326
   4327		ret = kvm_sev_es_mmio_write(vcpu,
   4328					    control->exit_info_1,
   4329					    control->exit_info_2,
   4330					    svm->sev_es.ghcb_sa);
   4331		break;
   4332	case SVM_VMGEXIT_NMI_COMPLETE:
   4333		ret = svm_invoke_exit_handler(vcpu, SVM_EXIT_IRET);
   4334		break;
   4335	case SVM_VMGEXIT_AP_HLT_LOOP:
   4336		svm->sev_es.ap_reset_hold_type = AP_RESET_HOLD_NAE_EVENT;
   4337		ret = kvm_emulate_ap_reset_hold(vcpu);
   4338		break;
   4339	case SVM_VMGEXIT_AP_JUMP_TABLE: {
   4340		struct kvm_sev_info *sev = &to_kvm_svm(vcpu->kvm)->sev_info;
   4341
   4342		switch (control->exit_info_1) {
   4343		case 0:
   4344			/* Set AP jump table address */
   4345			sev->ap_jump_table = control->exit_info_2;
   4346			break;
   4347		case 1:
   4348			/* Get AP jump table address */
   4349			svm_set_ghcb_sw_exit_info_2(vcpu, sev->ap_jump_table);
   4350			break;
   4351		default:
   4352			pr_err("svm: vmgexit: unsupported AP jump table request - exit_info_1=%#llx\n",
   4353			       control->exit_info_1);
   4354			svm_set_ghcb_sw_exit_info_1(vcpu, 2);
   4355			svm_set_ghcb_sw_exit_info_2(vcpu, GHCB_ERR_INVALID_INPUT);
   4356		}
   4357
   4358		ret = 1;
   4359		break;
   4360	}
   4361	case SVM_VMGEXIT_HV_FEATURES: {
   4362		svm_set_ghcb_sw_exit_info_2(vcpu, GHCB_HV_FT_SUPPORTED);
   4363
   4364		ret = 1;
   4365		break;
   4366	}
   4367	case SVM_VMGEXIT_PSC: {
   4368		unsigned long rc;
   4369
   4370		ret = 1;
   4371
   4372		rc = snp_handle_page_state_change(svm);
   4373		svm_set_ghcb_sw_exit_info_2(vcpu, rc);
   4374		break;
   4375	}
   4376	case SVM_VMGEXIT_GUEST_REQUEST: {
   4377		snp_handle_guest_request(svm, control->exit_info_1, control->exit_info_2);
   4378
   4379		ret = 1;
   4380		break;
   4381	}
   4382	case SVM_VMGEXIT_EXT_GUEST_REQUEST: {
   4383		snp_handle_ext_guest_request(svm,
   4384					     control->exit_info_1,
   4385					     control->exit_info_2);
   4386
   4387		ret = 1;
   4388		break;
   4389	}
   4390	case SVM_VMGEXIT_AP_CREATION:
   4391		ret = sev_snp_ap_creation(svm);
   4392		if (ret) {
   4393			svm_set_ghcb_sw_exit_info_1(vcpu, 1);
   4394			svm_set_ghcb_sw_exit_info_2(vcpu,
   4395						    X86_TRAP_GP |
   4396						    SVM_EVTINJ_TYPE_EXEPT |
   4397						    SVM_EVTINJ_VALID);
   4398		}
   4399
   4400		ret = 1;
   4401		break;
   4402	case SVM_VMGEXIT_UNSUPPORTED_EVENT:
   4403		vcpu_unimpl(vcpu,
   4404			    "vmgexit: unsupported event - exit_info_1=%#llx, exit_info_2=%#llx\n",
   4405			    control->exit_info_1, control->exit_info_2);
   4406		ret = -EINVAL;
   4407		break;
   4408	default:
   4409		ret = svm_invoke_exit_handler(vcpu, exit_code);
   4410	}
   4411
   4412	return ret;
   4413}
   4414
   4415int sev_es_string_io(struct vcpu_svm *svm, int size, unsigned int port, int in)
   4416{
   4417	int count;
   4418	int bytes;
   4419	int r;
   4420
   4421	if (svm->vmcb->control.exit_info_2 > INT_MAX)
   4422		return -EINVAL;
   4423
   4424	count = svm->vmcb->control.exit_info_2;
   4425	if (unlikely(check_mul_overflow(count, size, &bytes)))
   4426		return -EINVAL;
   4427
   4428	r = setup_vmgexit_scratch(svm, in, bytes);
   4429	if (r) {
   4430		pr_err("failed to setup vmgexit scratch\n");
   4431		return r;
   4432	}
   4433
   4434	return kvm_sev_es_string_io(&svm->vcpu, size, port, svm->sev_es.ghcb_sa,
   4435				    count, in);
   4436}
   4437
   4438static void sev_es_init_vmcb(struct vcpu_svm *svm)
   4439{
   4440	struct kvm_vcpu *vcpu = &svm->vcpu;
   4441
   4442	svm->vmcb->control.nested_ctl |= SVM_NESTED_CTL_SEV_ES_ENABLE;
   4443	svm->vmcb->control.virt_ext |= LBR_CTL_ENABLE_MASK;
   4444
   4445	/*
   4446	 * An SEV-ES guest requires a VMSA area that is a separate from the
   4447	 * VMCB page.
   4448	 */
   4449	svm->vmcb->control.vmsa_pa = svm->sev_es.vmsa_pa;
   4450
   4451	/* Can't intercept CR register access, HV can't modify CR registers */
   4452	svm_clr_intercept(svm, INTERCEPT_CR0_READ);
   4453	svm_clr_intercept(svm, INTERCEPT_CR4_READ);
   4454	svm_clr_intercept(svm, INTERCEPT_CR8_READ);
   4455	svm_clr_intercept(svm, INTERCEPT_CR0_WRITE);
   4456	svm_clr_intercept(svm, INTERCEPT_CR4_WRITE);
   4457	svm_clr_intercept(svm, INTERCEPT_CR8_WRITE);
   4458
   4459	svm_clr_intercept(svm, INTERCEPT_SELECTIVE_CR0);
   4460
   4461	/* Track EFER/CR register changes */
   4462	svm_set_intercept(svm, TRAP_EFER_WRITE);
   4463	svm_set_intercept(svm, TRAP_CR0_WRITE);
   4464	svm_set_intercept(svm, TRAP_CR4_WRITE);
   4465	svm_set_intercept(svm, TRAP_CR8_WRITE);
   4466
   4467	/* No support for enable_vmware_backdoor */
   4468	clr_exception_intercept(svm, GP_VECTOR);
   4469
   4470	/* Can't intercept XSETBV, HV can't modify XCR0 directly */
   4471	svm_clr_intercept(svm, INTERCEPT_XSETBV);
   4472
   4473	/* Clear intercepts on selected MSRs */
   4474	set_msr_interception(vcpu, svm->msrpm, MSR_EFER, 1, 1);
   4475	set_msr_interception(vcpu, svm->msrpm, MSR_IA32_CR_PAT, 1, 1);
   4476	set_msr_interception(vcpu, svm->msrpm, MSR_IA32_LASTBRANCHFROMIP, 1, 1);
   4477	set_msr_interception(vcpu, svm->msrpm, MSR_IA32_LASTBRANCHTOIP, 1, 1);
   4478	set_msr_interception(vcpu, svm->msrpm, MSR_IA32_LASTINTFROMIP, 1, 1);
   4479	set_msr_interception(vcpu, svm->msrpm, MSR_IA32_LASTINTTOIP, 1, 1);
   4480
   4481	if (boot_cpu_has(X86_FEATURE_V_TSC_AUX) &&
   4482	    (guest_cpuid_has(&svm->vcpu, X86_FEATURE_RDTSCP) ||
   4483	     guest_cpuid_has(&svm->vcpu, X86_FEATURE_RDPID))) {
   4484		set_msr_interception(vcpu, svm->msrpm, MSR_TSC_AUX, 1, 1);
   4485		if (guest_cpuid_has(&svm->vcpu, X86_FEATURE_RDTSCP))
   4486			svm_clr_intercept(svm, INTERCEPT_RDTSCP);
   4487	}
   4488}
   4489
   4490void sev_init_vmcb(struct vcpu_svm *svm)
   4491{
   4492	svm->vmcb->control.nested_ctl |= SVM_NESTED_CTL_SEV_ENABLE;
   4493	clr_exception_intercept(svm, UD_VECTOR);
   4494
   4495	if (sev_es_guest(svm->vcpu.kvm))
   4496		sev_es_init_vmcb(svm);
   4497}
   4498
   4499void sev_es_vcpu_reset(struct vcpu_svm *svm)
   4500{
   4501	/*
   4502	 * Set the GHCB MSR value as per the GHCB specification when emulating
   4503	 * vCPU RESET for an SEV-ES guest.
   4504	 */
   4505	set_ghcb_msr(svm, GHCB_MSR_SEV_INFO(GHCB_VERSION_MAX,
   4506					    GHCB_VERSION_MIN,
   4507					    sev_enc_bit));
   4508
   4509	mutex_init(&svm->sev_es.snp_vmsa_mutex);
   4510}
   4511
   4512void sev_es_prepare_switch_to_guest(struct sev_es_save_area *hostsa)
   4513{
   4514	/*
   4515	 * As an SEV-ES guest, hardware will restore the host state on VMEXIT,
   4516	 * of which one step is to perform a VMLOAD.  KVM performs the
   4517	 * corresponding VMSAVE in svm_prepare_guest_switch for both
   4518	 * traditional and SEV-ES guests.
   4519	 */
   4520
   4521	/* XCR0 is restored on VMEXIT, save the current host value */
   4522	hostsa->xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK);
   4523
   4524	/* PKRU is restored on VMEXIT, save the current host value */
   4525	hostsa->pkru = read_pkru();
   4526
   4527	/* MSR_IA32_XSS is restored on VMEXIT, save the currnet host value */
   4528	hostsa->xss = host_xss;
   4529}
   4530
   4531void sev_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, u8 vector)
   4532{
   4533	struct vcpu_svm *svm = to_svm(vcpu);
   4534
   4535	/* First SIPI: Use the values as initially set by the VMM */
   4536	if (!svm->sev_es.received_first_sipi) {
   4537		svm->sev_es.received_first_sipi = true;
   4538		return;
   4539	}
   4540
   4541	/* Subsequent SIPI */
   4542	switch (svm->sev_es.ap_reset_hold_type) {
   4543	case AP_RESET_HOLD_NAE_EVENT:
   4544		/*
   4545		 * Return from an AP Reset Hold VMGEXIT, where the guest will
   4546		 * set the CS and RIP. Set SW_EXIT_INFO_2 to a non-zero value.
   4547		 */
   4548		svm_set_ghcb_sw_exit_info_2(vcpu, 1);
   4549		break;
   4550	case AP_RESET_HOLD_MSR_PROTO:
   4551		/*
   4552		 * Return from an AP Reset Hold VMGEXIT, where the guest will
   4553		 * set the CS and RIP. Set GHCB data field to a non-zero value.
   4554		 */
   4555		set_ghcb_msr_bits(svm, 1,
   4556				  GHCB_MSR_AP_RESET_HOLD_RESULT_MASK,
   4557				  GHCB_MSR_AP_RESET_HOLD_RESULT_POS);
   4558
   4559		set_ghcb_msr_bits(svm, GHCB_MSR_AP_RESET_HOLD_RESP,
   4560				  GHCB_MSR_INFO_MASK,
   4561				  GHCB_MSR_INFO_POS);
   4562		break;
   4563	default:
   4564		break;
   4565	}
   4566}
   4567
   4568struct page *snp_safe_alloc_page(struct kvm_vcpu *vcpu)
   4569{
   4570	unsigned long pfn;
   4571	struct page *p;
   4572
   4573	if (!cpu_feature_enabled(X86_FEATURE_SEV_SNP))
   4574		return alloc_page(GFP_KERNEL_ACCOUNT | __GFP_ZERO);
   4575
   4576	/*
   4577	 * Allocate an SNP safe page to workaround the SNP erratum where
   4578	 * the CPU will incorrectly signal an RMP violation  #PF if a
   4579	 * hugepage (2mb or 1gb) collides with the RMP entry of VMCB, VMSA
   4580	 * or AVIC backing page. The recommeded workaround is to not use the
   4581	 * hugepage.
   4582	 *
   4583	 * Allocate one extra page, use a page which is not 2mb aligned
   4584	 * and free the other.
   4585	 */
   4586	p = alloc_pages(GFP_KERNEL_ACCOUNT | __GFP_ZERO, 1);
   4587	if (!p)
   4588		return NULL;
   4589
   4590	split_page(p, 1);
   4591
   4592	pfn = page_to_pfn(p);
   4593	if (IS_ALIGNED(__pfn_to_phys(pfn), PMD_SIZE)) {
   4594		pfn++;
   4595		__free_page(p);
   4596	} else {
   4597		__free_page(pfn_to_page(pfn + 1));
   4598	}
   4599
   4600	return pfn_to_page(pfn);
   4601}
   4602
   4603static bool is_pfn_range_shared(kvm_pfn_t start, kvm_pfn_t end)
   4604{
   4605	int level;
   4606
   4607	while (end > start) {
   4608		if (snp_lookup_rmpentry(start, &level) != 0)
   4609			return false;
   4610		start++;
   4611	}
   4612
   4613	return true;
   4614}
   4615
   4616void sev_rmp_page_level_adjust(struct kvm *kvm, kvm_pfn_t pfn, int *level)
   4617{
   4618	int rmp_level, assigned;
   4619
   4620	if (!cpu_feature_enabled(X86_FEATURE_SEV_SNP))
   4621		return;
   4622
   4623	assigned = snp_lookup_rmpentry(pfn, &rmp_level);
   4624	if (unlikely(assigned < 0))
   4625		return;
   4626
   4627	if (!assigned) {
   4628		/*
   4629		 * If all the pages are shared then no need to keep the RMP
   4630		 * and NPT in sync.
   4631		 */
   4632		pfn = pfn & ~(PTRS_PER_PMD - 1);
   4633		if (is_pfn_range_shared(pfn, pfn + PTRS_PER_PMD))
   4634			return;
   4635	}
   4636
   4637	/*
   4638	 * The hardware installs 2MB TLB entries to access to 1GB pages,
   4639	 * therefore allow NPT to use 1GB pages when pfn was added as 2MB
   4640	 * in the RMP table.
   4641	 */
   4642	if (rmp_level == PG_LEVEL_2M && (*level == PG_LEVEL_1G))
   4643		return;
   4644
   4645	/* Adjust the level to keep the NPT and RMP in sync */
   4646	*level = min_t(size_t, *level, rmp_level);
   4647}
   4648
   4649int sev_post_map_gfn(struct kvm *kvm, gfn_t gfn, kvm_pfn_t pfn)
   4650{
   4651	struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
   4652	int level;
   4653
   4654	if (!sev_snp_guest(kvm))
   4655		return 0;
   4656
   4657	spin_lock(&sev->psc_lock);
   4658
   4659	/* If pfn is not added as private then fail */
   4660	if (snp_lookup_rmpentry(pfn, &level) == 1) {
   4661		spin_unlock(&sev->psc_lock);
   4662		pr_err_ratelimited("failed to map private gfn 0x%llx pfn 0x%llx\n", gfn, pfn);
   4663		return -EBUSY;
   4664	}
   4665
   4666	return 0;
   4667}
   4668
   4669void sev_post_unmap_gfn(struct kvm *kvm, gfn_t gfn, kvm_pfn_t pfn)
   4670{
   4671	struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
   4672
   4673	if (!sev_snp_guest(kvm))
   4674		return;
   4675
   4676	spin_unlock(&sev->psc_lock);
   4677}
   4678
   4679void handle_rmp_page_fault(struct kvm_vcpu *vcpu, gpa_t gpa, u64 error_code)
   4680{
   4681	int rmp_level, npt_level, rc, assigned;
   4682	struct kvm *kvm = vcpu->kvm;
   4683	gfn_t gfn = gpa_to_gfn(gpa);
   4684	bool need_psc = false;
   4685	enum psc_op psc_op;
   4686	kvm_pfn_t pfn;
   4687	bool private;
   4688
   4689	write_lock(&kvm->mmu_lock);
   4690
   4691	if (unlikely(!kvm_mmu_get_tdp_walk(vcpu, gpa, &pfn, &npt_level)))
   4692		goto unlock;
   4693
   4694	assigned = snp_lookup_rmpentry(pfn, &rmp_level);
   4695	if (unlikely(assigned < 0))
   4696		goto unlock;
   4697
   4698	private = !!(error_code & PFERR_GUEST_ENC_MASK);
   4699
   4700	/*
   4701	 * If the fault was due to size mismatch, or NPT and RMP page level's
   4702	 * are not in sync, then use PSMASH to split the RMP entry into 4K.
   4703	 */
   4704	if ((error_code & PFERR_GUEST_SIZEM_MASK) ||
   4705	    (npt_level == PG_LEVEL_4K && rmp_level == PG_LEVEL_2M && private)) {
   4706		rc = snp_rmptable_psmash(kvm, pfn);
   4707		if (rc)
   4708			pr_err_ratelimited("psmash failed, gpa 0x%llx pfn 0x%llx rc %d\n",
   4709					   gpa, pfn, rc);
   4710		goto out;
   4711	}
   4712
   4713	/*
   4714	 * If it's a private access, and the page is not assigned in the
   4715	 * RMP table, create a new private RMP entry. This can happen if
   4716	 * guest did not use the PSC VMGEXIT to transition the page state
   4717	 * before the access.
   4718	 */
   4719	if (!assigned && private) {
   4720		need_psc = 1;
   4721		psc_op = SNP_PAGE_STATE_PRIVATE;
   4722		goto out;
   4723	}
   4724
   4725	/*
   4726	 * If it's a shared access, but the page is private in the RMP table
   4727	 * then make the page shared in the RMP table. This can happen if
   4728	 * the guest did not use the PSC VMGEXIT to transition the page
   4729	 * state before the access.
   4730	 */
   4731	if (assigned && !private) {
   4732		need_psc = 1;
   4733		psc_op = SNP_PAGE_STATE_SHARED;
   4734	}
   4735
   4736out:
   4737	write_unlock(&kvm->mmu_lock);
   4738
   4739	if (need_psc)
   4740		rc = __snp_handle_page_state_change(vcpu, psc_op, gpa, PG_LEVEL_4K);
   4741
   4742	/*
   4743	 * The fault handler has updated the RMP pagesize, zap the existing
   4744	 * rmaps for large entry ranges so that nested page table gets rebuilt
   4745	 * with the updated RMP pagesize.
   4746	 */
   4747	gfn = gpa_to_gfn(gpa) & ~(KVM_PAGES_PER_HPAGE(PG_LEVEL_2M) - 1);
   4748	kvm_zap_gfn_range(kvm, gfn, gfn + PTRS_PER_PMD);
   4749	return;
   4750
   4751unlock:
   4752	write_unlock(&kvm->mmu_lock);
   4753}