sev.c (119804B)
1// SPDX-License-Identifier: GPL-2.0-only 2/* 3 * Kernel-based Virtual Machine driver for Linux 4 * 5 * AMD SVM-SEV support 6 * 7 * Copyright 2010 Red Hat, Inc. and/or its affiliates. 8 */ 9 10#include <linux/kvm_types.h> 11#include <linux/kvm_host.h> 12#include <linux/kernel.h> 13#include <linux/highmem.h> 14#include <linux/psp-sev.h> 15#include <linux/pagemap.h> 16#include <linux/swap.h> 17#include <linux/misc_cgroup.h> 18#include <linux/processor.h> 19#include <linux/trace_events.h> 20#include <linux/hugetlb.h> 21#include <linux/sev.h> 22#include <linux/ksm.h> 23 24#include <asm/pkru.h> 25#include <asm/trapnr.h> 26#include <asm/fpu/xcr.h> 27#include <asm/sev.h> 28#include <asm/mman.h> 29 30#include "mmu.h" 31#include "x86.h" 32#include "svm.h" 33#include "svm_ops.h" 34#include "cpuid.h" 35#include "trace.h" 36#include "mmu.h" 37 38#include "asm/set_memory.h" 39#include "cachepc/cachepc.h" 40 41#ifndef CONFIG_KVM_AMD_SEV 42/* 43 * When this config is not defined, SEV feature is not supported and APIs in 44 * this file are not used but this file still gets compiled into the KVM AMD 45 * module. 46 * 47 * We will not have MISC_CG_RES_SEV and MISC_CG_RES_SEV_ES entries in the enum 48 * misc_res_type {} defined in linux/misc_cgroup.h. 49 * 50 * Below macros allow compilation to succeed. 51 */ 52#define MISC_CG_RES_SEV MISC_CG_RES_TYPES 53#define MISC_CG_RES_SEV_ES MISC_CG_RES_TYPES 54#endif 55 56#ifdef CONFIG_KVM_AMD_SEV 57/* enable/disable SEV support */ 58static bool sev_enabled = true; 59module_param_named(sev, sev_enabled, bool, 0444); 60 61/* enable/disable SEV-ES support */ 62static bool sev_es_enabled = true; 63module_param_named(sev_es, sev_es_enabled, bool, 0444); 64 65/* enable/disable SEV-SNP support */ 66static bool sev_snp_enabled = true; 67module_param_named(sev_snp, sev_snp_enabled, bool, 0444); 68#else 69#define sev_enabled false 70#define sev_es_enabled false 71#endif /* CONFIG_KVM_AMD_SEV */ 72 73#define AP_RESET_HOLD_NONE 0 74#define AP_RESET_HOLD_NAE_EVENT 1 75#define AP_RESET_HOLD_MSR_PROTO 2 76 77static u8 sev_enc_bit; 78static DECLARE_RWSEM(sev_deactivate_lock); 79static DEFINE_MUTEX(sev_bitmap_lock); 80unsigned int max_sev_asid; 81static unsigned int min_sev_asid; 82static unsigned long sev_me_mask; 83static unsigned int nr_asids; 84static unsigned long *sev_asid_bitmap; 85static unsigned long *sev_reclaim_asid_bitmap; 86 87static int snp_decommission_context(struct kvm *kvm); 88 89struct enc_region { 90 struct list_head list; 91 unsigned long npages; 92 struct page **pages; 93 unsigned long uaddr; 94 unsigned long size; 95}; 96 97/* Called with the sev_bitmap_lock held, or on shutdown */ 98static int sev_flush_asids(int min_asid, int max_asid) 99{ 100 int ret, asid, error = 0; 101 102 /* Check if there are any ASIDs to reclaim before performing a flush */ 103 asid = find_next_bit(sev_reclaim_asid_bitmap, nr_asids, min_asid); 104 if (asid > max_asid) 105 return -EBUSY; 106 107 /* 108 * DEACTIVATE will clear the WBINVD indicator causing DF_FLUSH to fail, 109 * so it must be guarded. 110 */ 111 down_write(&sev_deactivate_lock); 112 113 wbinvd_on_all_cpus(); 114 115 if (sev_snp_enabled) 116 ret = snp_guest_df_flush(&error); 117 else 118 ret = sev_guest_df_flush(&error); 119 120 up_write(&sev_deactivate_lock); 121 122 if (ret) 123 pr_err("SEV%s: DF_FLUSH failed, ret=%d, error=%#x\n", 124 sev_snp_enabled ? "-SNP" : "", ret, error); 125 126 return ret; 127} 128 129static inline bool is_mirroring_enc_context(struct kvm *kvm) 130{ 131 return !!to_kvm_svm(kvm)->sev_info.enc_context_owner; 132} 133 134/* Must be called with the sev_bitmap_lock held */ 135static bool __sev_recycle_asids(int min_asid, int max_asid) 136{ 137 if (sev_flush_asids(min_asid, max_asid)) 138 return false; 139 140 /* The flush process will flush all reclaimable SEV and SEV-ES ASIDs */ 141 bitmap_xor(sev_asid_bitmap, sev_asid_bitmap, sev_reclaim_asid_bitmap, 142 nr_asids); 143 bitmap_zero(sev_reclaim_asid_bitmap, nr_asids); 144 145 return true; 146} 147 148static int sev_misc_cg_try_charge(struct kvm_sev_info *sev) 149{ 150 enum misc_res_type type = sev->es_active ? MISC_CG_RES_SEV_ES : MISC_CG_RES_SEV; 151 return misc_cg_try_charge(type, sev->misc_cg, 1); 152} 153 154static void sev_misc_cg_uncharge(struct kvm_sev_info *sev) 155{ 156 enum misc_res_type type = sev->es_active ? MISC_CG_RES_SEV_ES : MISC_CG_RES_SEV; 157 misc_cg_uncharge(type, sev->misc_cg, 1); 158} 159 160static int sev_asid_new(struct kvm_sev_info *sev) 161{ 162 int asid, min_asid, max_asid, ret; 163 bool retry = true; 164 165 WARN_ON(sev->misc_cg); 166 sev->misc_cg = get_current_misc_cg(); 167 ret = sev_misc_cg_try_charge(sev); 168 if (ret) { 169 put_misc_cg(sev->misc_cg); 170 sev->misc_cg = NULL; 171 return ret; 172 } 173 174 mutex_lock(&sev_bitmap_lock); 175 176 /* 177 * SEV-enabled guests must use asid from min_sev_asid to max_sev_asid. 178 * SEV-ES-enabled guest can use from 1 to min_sev_asid - 1. 179 */ 180 min_asid = sev->es_active ? 1 : min_sev_asid; 181 max_asid = sev->es_active ? min_sev_asid - 1 : max_sev_asid; 182again: 183 asid = find_next_zero_bit(sev_asid_bitmap, max_asid + 1, min_asid); 184 if (asid > max_asid) { 185 if (retry && __sev_recycle_asids(min_asid, max_asid)) { 186 retry = false; 187 goto again; 188 } 189 mutex_unlock(&sev_bitmap_lock); 190 ret = -EBUSY; 191 goto e_uncharge; 192 } 193 194 __set_bit(asid, sev_asid_bitmap); 195 196 mutex_unlock(&sev_bitmap_lock); 197 198 return asid; 199e_uncharge: 200 sev_misc_cg_uncharge(sev); 201 put_misc_cg(sev->misc_cg); 202 sev->misc_cg = NULL; 203 return ret; 204} 205 206static int sev_get_asid(struct kvm *kvm) 207{ 208 struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; 209 210 return sev->asid; 211} 212 213static void sev_asid_free(struct kvm_sev_info *sev) 214{ 215 struct svm_cpu_data *sd; 216 int cpu; 217 218 mutex_lock(&sev_bitmap_lock); 219 220 __set_bit(sev->asid, sev_reclaim_asid_bitmap); 221 222 for_each_possible_cpu(cpu) { 223 sd = per_cpu(svm_data, cpu); 224 sd->sev_vmcbs[sev->asid] = NULL; 225 } 226 227 mutex_unlock(&sev_bitmap_lock); 228 229 sev_misc_cg_uncharge(sev); 230 put_misc_cg(sev->misc_cg); 231 sev->misc_cg = NULL; 232} 233 234static void sev_decommission(unsigned int handle) 235{ 236 struct sev_data_decommission decommission; 237 238 if (!handle) 239 return; 240 241 decommission.handle = handle; 242 sev_guest_decommission(&decommission, NULL); 243} 244 245static inline void snp_leak_pages(u64 pfn, enum pg_level level) 246{ 247 unsigned int npages = page_level_size(level) >> PAGE_SHIFT; 248 249 WARN(1, "psc failed pfn 0x%llx pages %d (leaking)\n", pfn, npages); 250 251 while (npages) { 252 memory_failure(pfn, 0); 253 dump_rmpentry(pfn); 254 npages--; 255 pfn++; 256 } 257} 258 259static int snp_page_reclaim(u64 pfn) 260{ 261 struct sev_data_snp_page_reclaim data = {0}; 262 int err, rc; 263 264 data.paddr = __sme_set(pfn << PAGE_SHIFT); 265 rc = snp_guest_page_reclaim(&data, &err); 266 if (rc) { 267 /* 268 * If the reclaim failed, then page is no longer safe 269 * to use. 270 */ 271 snp_leak_pages(pfn, PG_LEVEL_4K); 272 } 273 274 return rc; 275} 276 277static int host_rmp_make_shared(u64 pfn, enum pg_level level, bool leak) 278{ 279 int rc; 280 281 rc = rmp_make_shared(pfn, level); 282 if (rc && leak) 283 snp_leak_pages(pfn, level); 284 285 return rc; 286} 287 288static void sev_unbind_asid(struct kvm *kvm, unsigned int handle) 289{ 290 struct sev_data_deactivate deactivate; 291 292 if (!handle) 293 return; 294 295 deactivate.handle = handle; 296 297 /* Guard DEACTIVATE against WBINVD/DF_FLUSH used in ASID recycling */ 298 down_read(&sev_deactivate_lock); 299 sev_guest_deactivate(&deactivate, NULL); 300 up_read(&sev_deactivate_lock); 301 302 sev_decommission(handle); 303} 304 305static int verify_snp_init_flags(struct kvm *kvm, struct kvm_sev_cmd *argp) 306{ 307 struct kvm_snp_init params; 308 int ret = 0; 309 310 if (copy_from_user(¶ms, (void __user *)(uintptr_t)argp->data, sizeof(params))) 311 return -EFAULT; 312 313 if (params.flags & ~SEV_SNP_SUPPORTED_FLAGS) 314 ret = -EOPNOTSUPP; 315 316 params.flags = SEV_SNP_SUPPORTED_FLAGS; 317 318 if (copy_to_user((void __user *)(uintptr_t)argp->data, ¶ms, sizeof(params))) 319 ret = -EFAULT; 320 321 return ret; 322} 323 324static int sev_guest_init(struct kvm *kvm, struct kvm_sev_cmd *argp) 325{ 326 struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; 327 int asid, ret; 328 329 if (kvm->created_vcpus) 330 return -EINVAL; 331 332 ret = -EBUSY; 333 if (unlikely(sev->active)) 334 return ret; 335 336 sev->active = true; 337 sev->es_active = (argp->id == KVM_SEV_ES_INIT || argp->id == KVM_SEV_SNP_INIT); 338 sev->snp_active = argp->id == KVM_SEV_SNP_INIT; 339 asid = sev_asid_new(sev); 340 if (asid < 0) 341 goto e_no_asid; 342 sev->asid = asid; 343 344 if (sev->snp_active) { 345 ret = verify_snp_init_flags(kvm, argp); 346 if (ret) 347 goto e_free; 348 349 spin_lock_init(&sev->psc_lock); 350 ret = sev_snp_init(&argp->error); 351 mutex_init(&sev->guest_req_lock); 352 } else { 353 ret = sev_platform_init(&argp->error); 354 } 355 356 if (ret) 357 goto e_free; 358 359 INIT_LIST_HEAD(&sev->regions_list); 360 INIT_LIST_HEAD(&sev->mirror_vms); 361 362 kvm_set_apicv_inhibit(kvm, APICV_INHIBIT_REASON_SEV); 363 364 return 0; 365 366e_free: 367 sev_asid_free(sev); 368 sev->asid = 0; 369e_no_asid: 370 sev->snp_active = false; 371 sev->es_active = false; 372 sev->active = false; 373 return ret; 374} 375 376static int sev_bind_asid(struct kvm *kvm, unsigned int handle, int *error) 377{ 378 struct sev_data_activate activate; 379 int asid = sev_get_asid(kvm); 380 int ret; 381 382 /* activate ASID on the given handle */ 383 activate.handle = handle; 384 activate.asid = asid; 385 ret = sev_guest_activate(&activate, error); 386 387 return ret; 388} 389 390static int __sev_issue_cmd(int fd, int id, void *data, int *error) 391{ 392 struct fd f; 393 int ret; 394 395 f = fdget(fd); 396 if (!f.file) 397 return -EBADF; 398 399 ret = sev_issue_cmd_external_user(f.file, id, data, error); 400 401 fdput(f); 402 return ret; 403} 404 405static int sev_issue_cmd(struct kvm *kvm, int id, void *data, int *error) 406{ 407 struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; 408 409 return __sev_issue_cmd(sev->fd, id, data, error); 410} 411 412static int sev_launch_start(struct kvm *kvm, struct kvm_sev_cmd *argp) 413{ 414 struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; 415 struct sev_data_launch_start start; 416 struct kvm_sev_launch_start params; 417 void *dh_blob, *session_blob; 418 int *error = &argp->error; 419 int ret; 420 421 if (!sev_guest(kvm)) 422 return -ENOTTY; 423 424 if (copy_from_user(¶ms, (void __user *)(uintptr_t)argp->data, sizeof(params))) 425 return -EFAULT; 426 427 memset(&start, 0, sizeof(start)); 428 429 dh_blob = NULL; 430 if (params.dh_uaddr) { 431 dh_blob = psp_copy_user_blob(params.dh_uaddr, params.dh_len); 432 if (IS_ERR(dh_blob)) 433 return PTR_ERR(dh_blob); 434 435 start.dh_cert_address = __sme_set(__pa(dh_blob)); 436 start.dh_cert_len = params.dh_len; 437 } 438 439 session_blob = NULL; 440 if (params.session_uaddr) { 441 session_blob = psp_copy_user_blob(params.session_uaddr, params.session_len); 442 if (IS_ERR(session_blob)) { 443 ret = PTR_ERR(session_blob); 444 goto e_free_dh; 445 } 446 447 start.session_address = __sme_set(__pa(session_blob)); 448 start.session_len = params.session_len; 449 } 450 451 start.handle = params.handle; 452 start.policy = params.policy; 453 454 /* create memory encryption context */ 455 ret = __sev_issue_cmd(argp->sev_fd, SEV_CMD_LAUNCH_START, &start, error); 456 if (ret) 457 goto e_free_session; 458 459 /* Bind ASID to this guest */ 460 ret = sev_bind_asid(kvm, start.handle, error); 461 if (ret) { 462 sev_decommission(start.handle); 463 goto e_free_session; 464 } 465 466 /* return handle to userspace */ 467 params.handle = start.handle; 468 if (copy_to_user((void __user *)(uintptr_t)argp->data, ¶ms, sizeof(params))) { 469 sev_unbind_asid(kvm, start.handle); 470 ret = -EFAULT; 471 goto e_free_session; 472 } 473 474 sev->handle = start.handle; 475 sev->fd = argp->sev_fd; 476 477e_free_session: 478 kfree(session_blob); 479e_free_dh: 480 kfree(dh_blob); 481 return ret; 482} 483 484static struct page **sev_pin_memory(struct kvm *kvm, unsigned long uaddr, 485 unsigned long ulen, unsigned long *n, 486 int write) 487{ 488 struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; 489 unsigned long npages, size; 490 int npinned; 491 unsigned long locked, lock_limit; 492 struct page **pages; 493 unsigned long first, last; 494 int ret; 495 496 lockdep_assert_held(&kvm->lock); 497 498 if (ulen == 0 || uaddr + ulen < uaddr) 499 return ERR_PTR(-EINVAL); 500 501 /* Calculate number of pages. */ 502 first = (uaddr & PAGE_MASK) >> PAGE_SHIFT; 503 last = ((uaddr + ulen - 1) & PAGE_MASK) >> PAGE_SHIFT; 504 npages = (last - first + 1); 505 506 locked = sev->pages_locked + npages; 507 lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; 508 if (locked > lock_limit && !capable(CAP_IPC_LOCK)) { 509 pr_err("SEV: %lu locked pages exceed the lock limit of %lu.\n", locked, lock_limit); 510 return ERR_PTR(-ENOMEM); 511 } 512 513 if (WARN_ON_ONCE(npages > INT_MAX)) 514 return ERR_PTR(-EINVAL); 515 516 /* Avoid using vmalloc for smaller buffers. */ 517 size = npages * sizeof(struct page *); 518 if (size > PAGE_SIZE) 519 pages = __vmalloc(size, GFP_KERNEL_ACCOUNT | __GFP_ZERO); 520 else 521 pages = kmalloc(size, GFP_KERNEL_ACCOUNT); 522 523 if (!pages) 524 return ERR_PTR(-ENOMEM); 525 526 /* Pin the user virtual address. */ 527 npinned = pin_user_pages_fast(uaddr, npages, write ? FOLL_WRITE : 0, pages); 528 if (npinned != npages) { 529 pr_err("SEV: Failure locking %lu pages.\n", npages); 530 ret = -ENOMEM; 531 goto err; 532 } 533 534 *n = npages; 535 sev->pages_locked = locked; 536 537 return pages; 538 539err: 540 if (npinned > 0) 541 unpin_user_pages(pages, npinned); 542 543 kvfree(pages); 544 return ERR_PTR(ret); 545} 546 547static void sev_unpin_memory(struct kvm *kvm, struct page **pages, 548 unsigned long npages) 549{ 550 struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; 551 552 unpin_user_pages(pages, npages); 553 kvfree(pages); 554 sev->pages_locked -= npages; 555} 556 557static void sev_clflush_pages(struct page *pages[], unsigned long npages) 558{ 559 uint8_t *page_virtual; 560 unsigned long i; 561 562 if (this_cpu_has(X86_FEATURE_SME_COHERENT) || npages == 0 || 563 pages == NULL) 564 return; 565 566 for (i = 0; i < npages; i++) { 567 page_virtual = kmap_atomic(pages[i]); 568 clflush_cache_range(page_virtual, PAGE_SIZE); 569 kunmap_atomic(page_virtual); 570 cond_resched(); 571 } 572} 573 574static unsigned long get_num_contig_pages(unsigned long idx, 575 struct page **inpages, unsigned long npages) 576{ 577 unsigned long paddr, next_paddr; 578 unsigned long i = idx + 1, pages = 1; 579 580 /* find the number of contiguous pages starting from idx */ 581 paddr = __sme_page_pa(inpages[idx]); 582 while (i < npages) { 583 next_paddr = __sme_page_pa(inpages[i++]); 584 if ((paddr + PAGE_SIZE) == next_paddr) { 585 pages++; 586 paddr = next_paddr; 587 continue; 588 } 589 break; 590 } 591 592 return pages; 593} 594 595static int sev_launch_update_data(struct kvm *kvm, struct kvm_sev_cmd *argp) 596{ 597 unsigned long vaddr, vaddr_end, next_vaddr, npages, pages, size, i; 598 struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; 599 struct kvm_sev_launch_update_data params; 600 struct sev_data_launch_update_data data; 601 struct page **inpages; 602 int ret; 603 604 if (!sev_guest(kvm)) 605 return -ENOTTY; 606 607 if (copy_from_user(¶ms, (void __user *)(uintptr_t)argp->data, sizeof(params))) 608 return -EFAULT; 609 610 vaddr = params.uaddr; 611 size = params.len; 612 vaddr_end = vaddr + size; 613 614 /* Lock the user memory. */ 615 inpages = sev_pin_memory(kvm, vaddr, size, &npages, 1); 616 if (IS_ERR(inpages)) 617 return PTR_ERR(inpages); 618 619 /* 620 * Flush (on non-coherent CPUs) before LAUNCH_UPDATE encrypts pages in 621 * place; the cache may contain the data that was written unencrypted. 622 */ 623 sev_clflush_pages(inpages, npages); 624 625 data.reserved = 0; 626 data.handle = sev->handle; 627 628 for (i = 0; vaddr < vaddr_end; vaddr = next_vaddr, i += pages) { 629 int offset, len; 630 631 /* 632 * If the user buffer is not page-aligned, calculate the offset 633 * within the page. 634 */ 635 offset = vaddr & (PAGE_SIZE - 1); 636 637 /* Calculate the number of pages that can be encrypted in one go. */ 638 pages = get_num_contig_pages(i, inpages, npages); 639 640 len = min_t(size_t, ((pages * PAGE_SIZE) - offset), size); 641 642 data.len = len; 643 data.address = __sme_page_pa(inpages[i]) + offset; 644 ret = sev_issue_cmd(kvm, SEV_CMD_LAUNCH_UPDATE_DATA, &data, &argp->error); 645 if (ret) 646 goto e_unpin; 647 648 size -= len; 649 next_vaddr = vaddr + len; 650 } 651 652e_unpin: 653 /* content of memory is updated, mark pages dirty */ 654 for (i = 0; i < npages; i++) { 655 set_page_dirty_lock(inpages[i]); 656 mark_page_accessed(inpages[i]); 657 } 658 /* unlock the user pages */ 659 sev_unpin_memory(kvm, inpages, npages); 660 return ret; 661} 662 663static int sev_es_sync_vmsa(struct vcpu_svm *svm) 664{ 665 struct kvm_sev_info *sev = &to_kvm_svm(svm->vcpu.kvm)->sev_info; 666 struct sev_es_save_area *save = svm->sev_es.vmsa; 667 668 /* Check some debug related fields before encrypting the VMSA */ 669 if (svm->vcpu.guest_debug || (svm->vmcb->save.dr7 & ~DR7_FIXED_1)) 670 return -EINVAL; 671 672 /* 673 * SEV-ES will use a VMSA that is pointed to by the VMCB, not 674 * the traditional VMSA that is part of the VMCB. Copy the 675 * traditional VMSA as it has been built so far (in prep 676 * for LAUNCH_UPDATE_VMSA) to be the initial SEV-ES state. 677 */ 678 memcpy(save, &svm->vmcb->save, sizeof(svm->vmcb->save)); 679 680 /* Sync registgers */ 681 save->rax = svm->vcpu.arch.regs[VCPU_REGS_RAX]; 682 save->rbx = svm->vcpu.arch.regs[VCPU_REGS_RBX]; 683 save->rcx = svm->vcpu.arch.regs[VCPU_REGS_RCX]; 684 save->rdx = svm->vcpu.arch.regs[VCPU_REGS_RDX]; 685 save->rsp = svm->vcpu.arch.regs[VCPU_REGS_RSP]; 686 save->rbp = svm->vcpu.arch.regs[VCPU_REGS_RBP]; 687 save->rsi = svm->vcpu.arch.regs[VCPU_REGS_RSI]; 688 save->rdi = svm->vcpu.arch.regs[VCPU_REGS_RDI]; 689#ifdef CONFIG_X86_64 690 save->r8 = svm->vcpu.arch.regs[VCPU_REGS_R8]; 691 save->r9 = svm->vcpu.arch.regs[VCPU_REGS_R9]; 692 save->r10 = svm->vcpu.arch.regs[VCPU_REGS_R10]; 693 save->r11 = svm->vcpu.arch.regs[VCPU_REGS_R11]; 694 save->r12 = svm->vcpu.arch.regs[VCPU_REGS_R12]; 695 save->r13 = svm->vcpu.arch.regs[VCPU_REGS_R13]; 696 save->r14 = svm->vcpu.arch.regs[VCPU_REGS_R14]; 697 save->r15 = svm->vcpu.arch.regs[VCPU_REGS_R15]; 698#endif 699 save->rip = svm->vcpu.arch.regs[VCPU_REGS_RIP]; 700 701 /* Sync some non-GPR registers before encrypting */ 702 save->xcr0 = svm->vcpu.arch.xcr0; 703 save->pkru = svm->vcpu.arch.pkru; 704 save->xss = svm->vcpu.arch.ia32_xss; 705 save->dr6 = svm->vcpu.arch.dr6; 706 707 /* Enable the SEV-SNP feature */ 708 if (sev_snp_guest(svm->vcpu.kvm)) 709 save->sev_features |= SVM_SEV_FEAT_SNP_ACTIVE; 710 711 /* 712 * Save the VMSA synced SEV features. For now, they are the same for 713 * all vCPUs, so just save each time. 714 */ 715 sev->sev_features = save->sev_features; 716 717 return 0; 718} 719 720static int __sev_launch_update_vmsa(struct kvm *kvm, struct kvm_vcpu *vcpu, 721 int *error) 722{ 723 struct sev_data_launch_update_vmsa vmsa; 724 struct vcpu_svm *svm = to_svm(vcpu); 725 int ret; 726 727 /* Perform some pre-encryption checks against the VMSA */ 728 ret = sev_es_sync_vmsa(svm); 729 if (ret) 730 return ret; 731 732 /* 733 * The LAUNCH_UPDATE_VMSA command will perform in-place encryption of 734 * the VMSA memory content (i.e it will write the same memory region 735 * with the guest's key), so invalidate it first. 736 */ 737 clflush_cache_range(svm->sev_es.vmsa, PAGE_SIZE); 738 739 vmsa.reserved = 0; 740 vmsa.handle = to_kvm_svm(kvm)->sev_info.handle; 741 vmsa.address = __sme_pa(svm->sev_es.vmsa); 742 vmsa.len = PAGE_SIZE; 743 ret = sev_issue_cmd(kvm, SEV_CMD_LAUNCH_UPDATE_VMSA, &vmsa, error); 744 if (ret) 745 return ret; 746 747 vcpu->arch.guest_state_protected = true; 748 return 0; 749} 750 751static int sev_launch_update_vmsa(struct kvm *kvm, struct kvm_sev_cmd *argp) 752{ 753 struct kvm_vcpu *vcpu; 754 unsigned long i; 755 int ret; 756 757 if (!sev_es_guest(kvm)) 758 return -ENOTTY; 759 760 kvm_for_each_vcpu(i, vcpu, kvm) { 761 ret = mutex_lock_killable(&vcpu->mutex); 762 if (ret) 763 return ret; 764 765 ret = __sev_launch_update_vmsa(kvm, vcpu, &argp->error); 766 767 mutex_unlock(&vcpu->mutex); 768 if (ret) 769 return ret; 770 } 771 772 return 0; 773} 774 775static int sev_launch_measure(struct kvm *kvm, struct kvm_sev_cmd *argp) 776{ 777 void __user *measure = (void __user *)(uintptr_t)argp->data; 778 struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; 779 struct sev_data_launch_measure data; 780 struct kvm_sev_launch_measure params; 781 void __user *p = NULL; 782 void *blob = NULL; 783 int ret; 784 785 if (!sev_guest(kvm)) 786 return -ENOTTY; 787 788 if (copy_from_user(¶ms, measure, sizeof(params))) 789 return -EFAULT; 790 791 memset(&data, 0, sizeof(data)); 792 793 /* User wants to query the blob length */ 794 if (!params.len) 795 goto cmd; 796 797 p = (void __user *)(uintptr_t)params.uaddr; 798 if (p) { 799 if (params.len > SEV_FW_BLOB_MAX_SIZE) 800 return -EINVAL; 801 802 blob = kzalloc(params.len, GFP_KERNEL_ACCOUNT); 803 if (!blob) 804 return -ENOMEM; 805 806 data.address = __psp_pa(blob); 807 data.len = params.len; 808 } 809 810cmd: 811 data.handle = sev->handle; 812 ret = sev_issue_cmd(kvm, SEV_CMD_LAUNCH_MEASURE, &data, &argp->error); 813 814 /* 815 * If we query the session length, FW responded with expected data. 816 */ 817 if (!params.len) 818 goto done; 819 820 if (ret) 821 goto e_free_blob; 822 823 if (blob) { 824 if (copy_to_user(p, blob, params.len)) 825 ret = -EFAULT; 826 } 827 828done: 829 params.len = data.len; 830 if (copy_to_user(measure, ¶ms, sizeof(params))) 831 ret = -EFAULT; 832e_free_blob: 833 kfree(blob); 834 return ret; 835} 836 837static int sev_launch_finish(struct kvm *kvm, struct kvm_sev_cmd *argp) 838{ 839 struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; 840 struct sev_data_launch_finish data; 841 842 if (!sev_guest(kvm)) 843 return -ENOTTY; 844 845 data.handle = sev->handle; 846 return sev_issue_cmd(kvm, SEV_CMD_LAUNCH_FINISH, &data, &argp->error); 847} 848 849static int sev_guest_status(struct kvm *kvm, struct kvm_sev_cmd *argp) 850{ 851 struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; 852 struct kvm_sev_guest_status params; 853 struct sev_data_guest_status data; 854 int ret; 855 856 if (!sev_guest(kvm)) 857 return -ENOTTY; 858 859 memset(&data, 0, sizeof(data)); 860 861 data.handle = sev->handle; 862 ret = sev_issue_cmd(kvm, SEV_CMD_GUEST_STATUS, &data, &argp->error); 863 if (ret) 864 return ret; 865 866 params.policy = data.policy; 867 params.state = data.state; 868 params.handle = data.handle; 869 870 if (copy_to_user((void __user *)(uintptr_t)argp->data, ¶ms, sizeof(params))) 871 ret = -EFAULT; 872 873 return ret; 874} 875 876static int __sev_issue_dbg_cmd(struct kvm *kvm, unsigned long src, 877 unsigned long dst, int size, 878 int *error, bool enc) 879{ 880 struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; 881 struct sev_data_dbg data; 882 883 data.reserved = 0; 884 data.handle = sev->handle; 885 data.dst_addr = dst; 886 data.src_addr = src; 887 data.len = size; 888 889 return sev_issue_cmd(kvm, 890 enc ? SEV_CMD_DBG_ENCRYPT : SEV_CMD_DBG_DECRYPT, 891 &data, error); 892} 893 894static int __sev_dbg_decrypt(struct kvm *kvm, unsigned long src_paddr, 895 unsigned long dst_paddr, int sz, int *err) 896{ 897 int offset; 898 899 /* 900 * Its safe to read more than we are asked, caller should ensure that 901 * destination has enough space. 902 */ 903 offset = src_paddr & 15; 904 src_paddr = round_down(src_paddr, 16); 905 sz = round_up(sz + offset, 16); 906 907 return __sev_issue_dbg_cmd(kvm, src_paddr, dst_paddr, sz, err, false); 908} 909 910static int __sev_dbg_decrypt_user(struct kvm *kvm, unsigned long paddr, 911 void __user *dst_uaddr, 912 unsigned long dst_paddr, 913 int size, int *err) 914{ 915 struct page *tpage = NULL; 916 struct vcpu_svm *svm; 917 int ret, offset; 918 919 /* if inputs are not 16-byte then use intermediate buffer */ 920 if (!IS_ALIGNED(dst_paddr, 16) || 921 !IS_ALIGNED(paddr, 16) || 922 !IS_ALIGNED(size, 16)) { 923 tpage = (void *)alloc_page(GFP_KERNEL | __GFP_ZERO); 924 if (!tpage) 925 return -ENOMEM; 926 927 dst_paddr = __sme_page_pa(tpage); 928 } 929 930 if (dst_uaddr == CPC_VMSA_MAGIC_ADDR) { 931 svm = to_svm(xa_load(&kvm->vcpu_array, 0)); 932 paddr = __pa(svm->sev_es.vmsa); 933 } 934 935 ret = __sev_dbg_decrypt(kvm, paddr, dst_paddr, size, err); 936 if (ret) 937 goto e_free; 938 939 if (tpage) { 940 offset = paddr & 15; 941 if (copy_to_user(dst_uaddr, page_address(tpage) + offset, size)) 942 ret = -EFAULT; 943 } 944 945e_free: 946 if (tpage) 947 __free_page(tpage); 948 949 return ret; 950} 951 952static int __sev_dbg_encrypt_user(struct kvm *kvm, unsigned long paddr, 953 void __user *vaddr, 954 unsigned long dst_paddr, 955 void __user *dst_vaddr, 956 int size, int *error) 957{ 958 struct page *src_tpage = NULL; 959 struct page *dst_tpage = NULL; 960 int ret, len = size; 961 962 /* If source buffer is not aligned then use an intermediate buffer */ 963 if (!IS_ALIGNED((unsigned long)vaddr, 16)) { 964 src_tpage = alloc_page(GFP_KERNEL_ACCOUNT); 965 if (!src_tpage) 966 return -ENOMEM; 967 968 if (copy_from_user(page_address(src_tpage), vaddr, size)) { 969 __free_page(src_tpage); 970 return -EFAULT; 971 } 972 973 paddr = __sme_page_pa(src_tpage); 974 } 975 976 /* 977 * If destination buffer or length is not aligned then do read-modify-write: 978 * - decrypt destination in an intermediate buffer 979 * - copy the source buffer in an intermediate buffer 980 * - use the intermediate buffer as source buffer 981 */ 982 if (!IS_ALIGNED((unsigned long)dst_vaddr, 16) || !IS_ALIGNED(size, 16)) { 983 int dst_offset; 984 985 dst_tpage = alloc_page(GFP_KERNEL_ACCOUNT); 986 if (!dst_tpage) { 987 ret = -ENOMEM; 988 goto e_free; 989 } 990 991 ret = __sev_dbg_decrypt(kvm, dst_paddr, 992 __sme_page_pa(dst_tpage), size, error); 993 if (ret) 994 goto e_free; 995 996 /* 997 * If source is kernel buffer then use memcpy() otherwise 998 * copy_from_user(). 999 */ 1000 dst_offset = dst_paddr & 15; 1001 1002 if (src_tpage) 1003 memcpy(page_address(dst_tpage) + dst_offset, 1004 page_address(src_tpage), size); 1005 else { 1006 if (copy_from_user(page_address(dst_tpage) + dst_offset, 1007 vaddr, size)) { 1008 ret = -EFAULT; 1009 goto e_free; 1010 } 1011 } 1012 1013 paddr = __sme_page_pa(dst_tpage); 1014 dst_paddr = round_down(dst_paddr, 16); 1015 len = round_up(size, 16); 1016 } 1017 1018 ret = __sev_issue_dbg_cmd(kvm, paddr, dst_paddr, len, error, true); 1019 1020e_free: 1021 if (src_tpage) 1022 __free_page(src_tpage); 1023 if (dst_tpage) 1024 __free_page(dst_tpage); 1025 return ret; 1026} 1027 1028static int sev_dbg_crypt(struct kvm *kvm, struct kvm_sev_cmd *argp, bool dec) 1029{ 1030 unsigned long vaddr, vaddr_end, next_vaddr; 1031 unsigned long dst_vaddr; 1032 struct page **src_p, **dst_p; 1033 struct kvm_sev_dbg debug; 1034 unsigned long n; 1035 unsigned int size; 1036 bool vmsa_dec; 1037 int ret; 1038 1039 if (!sev_guest(kvm)) 1040 return -ENOTTY; 1041 1042 if (copy_from_user(&debug, (void __user *)(uintptr_t)argp->data, sizeof(debug))) 1043 return -EFAULT; 1044 1045 if (!debug.len || debug.src_uaddr + debug.len < debug.src_uaddr) 1046 return -EINVAL; 1047 if (!debug.dst_uaddr) 1048 return -EINVAL; 1049 1050 vmsa_dec = false; 1051 if (debug.src_uaddr == (uintptr_t) CPC_VMSA_MAGIC_ADDR) { 1052 debug.len = PAGE_SIZE; 1053 debug.src_uaddr = debug.dst_uaddr; 1054 vmsa_dec = true; 1055 } 1056 1057 vaddr = debug.src_uaddr; 1058 size = debug.len; 1059 vaddr_end = vaddr + size; 1060 dst_vaddr = debug.dst_uaddr; 1061 1062 for (; vaddr < vaddr_end; vaddr = next_vaddr) { 1063 int len, s_off, d_off; 1064 1065 /* lock userspace source and destination page */ 1066 src_p = sev_pin_memory(kvm, vaddr & PAGE_MASK, PAGE_SIZE, &n, 0); 1067 if (IS_ERR(src_p)) 1068 return PTR_ERR(src_p); 1069 1070 dst_p = sev_pin_memory(kvm, dst_vaddr & PAGE_MASK, PAGE_SIZE, &n, 1); 1071 if (IS_ERR(dst_p)) { 1072 sev_unpin_memory(kvm, src_p, n); 1073 return PTR_ERR(dst_p); 1074 } 1075 1076 /* 1077 * Flush (on non-coherent CPUs) before DBG_{DE,EN}CRYPT read or modify 1078 * the pages; flush the destination too so that future accesses do not 1079 * see stale data. 1080 */ 1081 sev_clflush_pages(src_p, 1); 1082 sev_clflush_pages(dst_p, 1); 1083 1084 /* 1085 * Since user buffer may not be page aligned, calculate the 1086 * offset within the page. 1087 */ 1088 s_off = vaddr & ~PAGE_MASK; 1089 d_off = dst_vaddr & ~PAGE_MASK; 1090 len = min_t(size_t, (PAGE_SIZE - s_off), size); 1091 1092 if (dec) 1093 ret = __sev_dbg_decrypt_user(kvm, 1094 __sme_page_pa(src_p[0]) + s_off, 1095 vmsa_dec ? CPC_VMSA_MAGIC_ADDR 1096 : (void __user *)dst_vaddr, 1097 __sme_page_pa(dst_p[0]) + d_off, 1098 len, &argp->error); 1099 else 1100 ret = __sev_dbg_encrypt_user(kvm, 1101 __sme_page_pa(src_p[0]) + s_off, 1102 (void __user *)vaddr, 1103 __sme_page_pa(dst_p[0]) + d_off, 1104 (void __user *)dst_vaddr, 1105 len, &argp->error); 1106 1107 sev_unpin_memory(kvm, src_p, n); 1108 sev_unpin_memory(kvm, dst_p, n); 1109 1110 if (ret) 1111 goto err; 1112 1113 next_vaddr = vaddr + len; 1114 dst_vaddr = dst_vaddr + len; 1115 size -= len; 1116 } 1117err: 1118 return ret; 1119} 1120 1121static int snp_dbg_decrypt_vmsa(struct kvm *kvm, struct kvm_sev_cmd *argp) 1122{ 1123 struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; 1124 struct kvm_sev_dbg debug; 1125 struct vcpu_svm *svm; 1126 hpa_t src_paddr; 1127 hpa_t dst_paddr; 1128 void *vmsa; 1129 int ret; 1130 1131 if (copy_from_user(&debug, (void __user *)(uintptr_t)argp->data, sizeof(debug))) 1132 return -EFAULT; 1133 1134 if (debug.len != PAGE_SIZE || debug.src_uaddr != (uint64_t) CPC_VMSA_MAGIC_ADDR) 1135 return -EINVAL; 1136 1137 vmsa = kmalloc(PAGE_SIZE, GFP_KERNEL); 1138 if (!vmsa) return -ENOMEM; 1139 memset(vmsa, 0, PAGE_SIZE); 1140 1141 svm = to_svm(xa_load(&kvm->vcpu_array, 0)); 1142 src_paddr = __pa(svm->sev_es.vmsa); 1143 dst_paddr = __pa(vmsa); 1144 ret = snp_guest_dbg_decrypt_page(__pa(sev->snp_context) >> PAGE_SHIFT, 1145 src_paddr >> PAGE_SHIFT, dst_paddr >> PAGE_SHIFT, &argp->error); 1146 if (ret) return ret; 1147 1148 if (copy_to_user((void __user *) debug.dst_uaddr, vmsa, PAGE_SIZE)) 1149 ret = -EFAULT; 1150 1151 kfree(vmsa); 1152 1153 return ret; 1154} 1155 1156static int sev_launch_secret(struct kvm *kvm, struct kvm_sev_cmd *argp) 1157{ 1158 struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; 1159 struct sev_data_launch_secret data; 1160 struct kvm_sev_launch_secret params; 1161 struct page **pages; 1162 void *blob, *hdr; 1163 unsigned long n, i; 1164 int ret, offset; 1165 1166 if (!sev_guest(kvm)) 1167 return -ENOTTY; 1168 1169 if (copy_from_user(¶ms, (void __user *)(uintptr_t)argp->data, sizeof(params))) 1170 return -EFAULT; 1171 1172 pages = sev_pin_memory(kvm, params.guest_uaddr, params.guest_len, &n, 1); 1173 if (IS_ERR(pages)) 1174 return PTR_ERR(pages); 1175 1176 /* 1177 * Flush (on non-coherent CPUs) before LAUNCH_SECRET encrypts pages in 1178 * place; the cache may contain the data that was written unencrypted. 1179 */ 1180 sev_clflush_pages(pages, n); 1181 1182 /* 1183 * The secret must be copied into contiguous memory region, lets verify 1184 * that userspace memory pages are contiguous before we issue command. 1185 */ 1186 if (get_num_contig_pages(0, pages, n) != n) { 1187 ret = -EINVAL; 1188 goto e_unpin_memory; 1189 } 1190 1191 memset(&data, 0, sizeof(data)); 1192 1193 offset = params.guest_uaddr & (PAGE_SIZE - 1); 1194 data.guest_address = __sme_page_pa(pages[0]) + offset; 1195 data.guest_len = params.guest_len; 1196 1197 blob = psp_copy_user_blob(params.trans_uaddr, params.trans_len); 1198 if (IS_ERR(blob)) { 1199 ret = PTR_ERR(blob); 1200 goto e_unpin_memory; 1201 } 1202 1203 data.trans_address = __psp_pa(blob); 1204 data.trans_len = params.trans_len; 1205 1206 hdr = psp_copy_user_blob(params.hdr_uaddr, params.hdr_len); 1207 if (IS_ERR(hdr)) { 1208 ret = PTR_ERR(hdr); 1209 goto e_free_blob; 1210 } 1211 data.hdr_address = __psp_pa(hdr); 1212 data.hdr_len = params.hdr_len; 1213 1214 data.handle = sev->handle; 1215 ret = sev_issue_cmd(kvm, SEV_CMD_LAUNCH_UPDATE_SECRET, &data, &argp->error); 1216 1217 kfree(hdr); 1218 1219e_free_blob: 1220 kfree(blob); 1221e_unpin_memory: 1222 /* content of memory is updated, mark pages dirty */ 1223 for (i = 0; i < n; i++) { 1224 set_page_dirty_lock(pages[i]); 1225 mark_page_accessed(pages[i]); 1226 } 1227 sev_unpin_memory(kvm, pages, n); 1228 return ret; 1229} 1230 1231static int sev_get_attestation_report(struct kvm *kvm, struct kvm_sev_cmd *argp) 1232{ 1233 void __user *report = (void __user *)(uintptr_t)argp->data; 1234 struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; 1235 struct sev_data_attestation_report data; 1236 struct kvm_sev_attestation_report params; 1237 void __user *p; 1238 void *blob = NULL; 1239 int ret; 1240 1241 if (!sev_guest(kvm)) 1242 return -ENOTTY; 1243 1244 if (copy_from_user(¶ms, (void __user *)(uintptr_t)argp->data, sizeof(params))) 1245 return -EFAULT; 1246 1247 memset(&data, 0, sizeof(data)); 1248 1249 /* User wants to query the blob length */ 1250 if (!params.len) 1251 goto cmd; 1252 1253 p = (void __user *)(uintptr_t)params.uaddr; 1254 if (p) { 1255 if (params.len > SEV_FW_BLOB_MAX_SIZE) 1256 return -EINVAL; 1257 1258 blob = kzalloc(params.len, GFP_KERNEL_ACCOUNT); 1259 if (!blob) 1260 return -ENOMEM; 1261 1262 data.address = __psp_pa(blob); 1263 data.len = params.len; 1264 memcpy(data.mnonce, params.mnonce, sizeof(params.mnonce)); 1265 } 1266cmd: 1267 data.handle = sev->handle; 1268 ret = sev_issue_cmd(kvm, SEV_CMD_ATTESTATION_REPORT, &data, &argp->error); 1269 /* 1270 * If we query the session length, FW responded with expected data. 1271 */ 1272 if (!params.len) 1273 goto done; 1274 1275 if (ret) 1276 goto e_free_blob; 1277 1278 if (blob) { 1279 if (copy_to_user(p, blob, params.len)) 1280 ret = -EFAULT; 1281 } 1282 1283done: 1284 params.len = data.len; 1285 if (copy_to_user(report, ¶ms, sizeof(params))) 1286 ret = -EFAULT; 1287e_free_blob: 1288 kfree(blob); 1289 return ret; 1290} 1291 1292/* Userspace wants to query session length. */ 1293static int 1294__sev_send_start_query_session_length(struct kvm *kvm, struct kvm_sev_cmd *argp, 1295 struct kvm_sev_send_start *params) 1296{ 1297 struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; 1298 struct sev_data_send_start data; 1299 int ret; 1300 1301 memset(&data, 0, sizeof(data)); 1302 data.handle = sev->handle; 1303 ret = sev_issue_cmd(kvm, SEV_CMD_SEND_START, &data, &argp->error); 1304 1305 params->session_len = data.session_len; 1306 if (copy_to_user((void __user *)(uintptr_t)argp->data, params, 1307 sizeof(struct kvm_sev_send_start))) 1308 ret = -EFAULT; 1309 1310 return ret; 1311} 1312 1313static int sev_send_start(struct kvm *kvm, struct kvm_sev_cmd *argp) 1314{ 1315 struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; 1316 struct sev_data_send_start data; 1317 struct kvm_sev_send_start params; 1318 void *amd_certs, *session_data; 1319 void *pdh_cert, *plat_certs; 1320 int ret; 1321 1322 if (!sev_guest(kvm)) 1323 return -ENOTTY; 1324 1325 if (copy_from_user(¶ms, (void __user *)(uintptr_t)argp->data, 1326 sizeof(struct kvm_sev_send_start))) 1327 return -EFAULT; 1328 1329 /* if session_len is zero, userspace wants to query the session length */ 1330 if (!params.session_len) 1331 return __sev_send_start_query_session_length(kvm, argp, 1332 ¶ms); 1333 1334 /* some sanity checks */ 1335 if (!params.pdh_cert_uaddr || !params.pdh_cert_len || 1336 !params.session_uaddr || params.session_len > SEV_FW_BLOB_MAX_SIZE) 1337 return -EINVAL; 1338 1339 /* allocate the memory to hold the session data blob */ 1340 session_data = kzalloc(params.session_len, GFP_KERNEL_ACCOUNT); 1341 if (!session_data) 1342 return -ENOMEM; 1343 1344 /* copy the certificate blobs from userspace */ 1345 pdh_cert = psp_copy_user_blob(params.pdh_cert_uaddr, 1346 params.pdh_cert_len); 1347 if (IS_ERR(pdh_cert)) { 1348 ret = PTR_ERR(pdh_cert); 1349 goto e_free_session; 1350 } 1351 1352 plat_certs = psp_copy_user_blob(params.plat_certs_uaddr, 1353 params.plat_certs_len); 1354 if (IS_ERR(plat_certs)) { 1355 ret = PTR_ERR(plat_certs); 1356 goto e_free_pdh; 1357 } 1358 1359 amd_certs = psp_copy_user_blob(params.amd_certs_uaddr, 1360 params.amd_certs_len); 1361 if (IS_ERR(amd_certs)) { 1362 ret = PTR_ERR(amd_certs); 1363 goto e_free_plat_cert; 1364 } 1365 1366 /* populate the FW SEND_START field with system physical address */ 1367 memset(&data, 0, sizeof(data)); 1368 data.pdh_cert_address = __psp_pa(pdh_cert); 1369 data.pdh_cert_len = params.pdh_cert_len; 1370 data.plat_certs_address = __psp_pa(plat_certs); 1371 data.plat_certs_len = params.plat_certs_len; 1372 data.amd_certs_address = __psp_pa(amd_certs); 1373 data.amd_certs_len = params.amd_certs_len; 1374 data.session_address = __psp_pa(session_data); 1375 data.session_len = params.session_len; 1376 data.handle = sev->handle; 1377 1378 ret = sev_issue_cmd(kvm, SEV_CMD_SEND_START, &data, &argp->error); 1379 1380 if (!ret && copy_to_user((void __user *)(uintptr_t)params.session_uaddr, 1381 session_data, params.session_len)) { 1382 ret = -EFAULT; 1383 goto e_free_amd_cert; 1384 } 1385 1386 params.policy = data.policy; 1387 params.session_len = data.session_len; 1388 if (copy_to_user((void __user *)(uintptr_t)argp->data, ¶ms, 1389 sizeof(struct kvm_sev_send_start))) 1390 ret = -EFAULT; 1391 1392e_free_amd_cert: 1393 kfree(amd_certs); 1394e_free_plat_cert: 1395 kfree(plat_certs); 1396e_free_pdh: 1397 kfree(pdh_cert); 1398e_free_session: 1399 kfree(session_data); 1400 return ret; 1401} 1402 1403/* Userspace wants to query either header or trans length. */ 1404static int 1405__sev_send_update_data_query_lengths(struct kvm *kvm, struct kvm_sev_cmd *argp, 1406 struct kvm_sev_send_update_data *params) 1407{ 1408 struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; 1409 struct sev_data_send_update_data data; 1410 int ret; 1411 1412 memset(&data, 0, sizeof(data)); 1413 data.handle = sev->handle; 1414 ret = sev_issue_cmd(kvm, SEV_CMD_SEND_UPDATE_DATA, &data, &argp->error); 1415 1416 params->hdr_len = data.hdr_len; 1417 params->trans_len = data.trans_len; 1418 1419 if (copy_to_user((void __user *)(uintptr_t)argp->data, params, 1420 sizeof(struct kvm_sev_send_update_data))) 1421 ret = -EFAULT; 1422 1423 return ret; 1424} 1425 1426static int sev_send_update_data(struct kvm *kvm, struct kvm_sev_cmd *argp) 1427{ 1428 struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; 1429 struct sev_data_send_update_data data; 1430 struct kvm_sev_send_update_data params; 1431 void *hdr, *trans_data; 1432 struct page **guest_page; 1433 unsigned long n; 1434 int ret, offset; 1435 1436 if (!sev_guest(kvm)) 1437 return -ENOTTY; 1438 1439 if (copy_from_user(¶ms, (void __user *)(uintptr_t)argp->data, 1440 sizeof(struct kvm_sev_send_update_data))) 1441 return -EFAULT; 1442 1443 /* userspace wants to query either header or trans length */ 1444 if (!params.trans_len || !params.hdr_len) 1445 return __sev_send_update_data_query_lengths(kvm, argp, ¶ms); 1446 1447 if (!params.trans_uaddr || !params.guest_uaddr || 1448 !params.guest_len || !params.hdr_uaddr) 1449 return -EINVAL; 1450 1451 /* Check if we are crossing the page boundary */ 1452 offset = params.guest_uaddr & (PAGE_SIZE - 1); 1453 if ((params.guest_len + offset > PAGE_SIZE)) 1454 return -EINVAL; 1455 1456 /* Pin guest memory */ 1457 guest_page = sev_pin_memory(kvm, params.guest_uaddr & PAGE_MASK, 1458 PAGE_SIZE, &n, 0); 1459 if (IS_ERR(guest_page)) 1460 return PTR_ERR(guest_page); 1461 1462 /* allocate memory for header and transport buffer */ 1463 ret = -ENOMEM; 1464 hdr = kzalloc(params.hdr_len, GFP_KERNEL_ACCOUNT); 1465 if (!hdr) 1466 goto e_unpin; 1467 1468 trans_data = kzalloc(params.trans_len, GFP_KERNEL_ACCOUNT); 1469 if (!trans_data) 1470 goto e_free_hdr; 1471 1472 memset(&data, 0, sizeof(data)); 1473 data.hdr_address = __psp_pa(hdr); 1474 data.hdr_len = params.hdr_len; 1475 data.trans_address = __psp_pa(trans_data); 1476 data.trans_len = params.trans_len; 1477 1478 /* The SEND_UPDATE_DATA command requires C-bit to be always set. */ 1479 data.guest_address = (page_to_pfn(guest_page[0]) << PAGE_SHIFT) + offset; 1480 data.guest_address |= sev_me_mask; 1481 data.guest_len = params.guest_len; 1482 data.handle = sev->handle; 1483 1484 ret = sev_issue_cmd(kvm, SEV_CMD_SEND_UPDATE_DATA, &data, &argp->error); 1485 1486 if (ret) 1487 goto e_free_trans_data; 1488 1489 /* copy transport buffer to user space */ 1490 if (copy_to_user((void __user *)(uintptr_t)params.trans_uaddr, 1491 trans_data, params.trans_len)) { 1492 ret = -EFAULT; 1493 goto e_free_trans_data; 1494 } 1495 1496 /* Copy packet header to userspace. */ 1497 if (copy_to_user((void __user *)(uintptr_t)params.hdr_uaddr, hdr, 1498 params.hdr_len)) 1499 ret = -EFAULT; 1500 1501e_free_trans_data: 1502 kfree(trans_data); 1503e_free_hdr: 1504 kfree(hdr); 1505e_unpin: 1506 sev_unpin_memory(kvm, guest_page, n); 1507 1508 return ret; 1509} 1510 1511static int sev_send_finish(struct kvm *kvm, struct kvm_sev_cmd *argp) 1512{ 1513 struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; 1514 struct sev_data_send_finish data; 1515 1516 if (!sev_guest(kvm)) 1517 return -ENOTTY; 1518 1519 data.handle = sev->handle; 1520 return sev_issue_cmd(kvm, SEV_CMD_SEND_FINISH, &data, &argp->error); 1521} 1522 1523static int sev_send_cancel(struct kvm *kvm, struct kvm_sev_cmd *argp) 1524{ 1525 struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; 1526 struct sev_data_send_cancel data; 1527 1528 if (!sev_guest(kvm)) 1529 return -ENOTTY; 1530 1531 data.handle = sev->handle; 1532 return sev_issue_cmd(kvm, SEV_CMD_SEND_CANCEL, &data, &argp->error); 1533} 1534 1535static int sev_receive_start(struct kvm *kvm, struct kvm_sev_cmd *argp) 1536{ 1537 struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; 1538 struct sev_data_receive_start start; 1539 struct kvm_sev_receive_start params; 1540 int *error = &argp->error; 1541 void *session_data; 1542 void *pdh_data; 1543 int ret; 1544 1545 if (!sev_guest(kvm)) 1546 return -ENOTTY; 1547 1548 /* Get parameter from the userspace */ 1549 if (copy_from_user(¶ms, (void __user *)(uintptr_t)argp->data, 1550 sizeof(struct kvm_sev_receive_start))) 1551 return -EFAULT; 1552 1553 /* some sanity checks */ 1554 if (!params.pdh_uaddr || !params.pdh_len || 1555 !params.session_uaddr || !params.session_len) 1556 return -EINVAL; 1557 1558 pdh_data = psp_copy_user_blob(params.pdh_uaddr, params.pdh_len); 1559 if (IS_ERR(pdh_data)) 1560 return PTR_ERR(pdh_data); 1561 1562 session_data = psp_copy_user_blob(params.session_uaddr, 1563 params.session_len); 1564 if (IS_ERR(session_data)) { 1565 ret = PTR_ERR(session_data); 1566 goto e_free_pdh; 1567 } 1568 1569 memset(&start, 0, sizeof(start)); 1570 start.handle = params.handle; 1571 start.policy = params.policy; 1572 start.pdh_cert_address = __psp_pa(pdh_data); 1573 start.pdh_cert_len = params.pdh_len; 1574 start.session_address = __psp_pa(session_data); 1575 start.session_len = params.session_len; 1576 1577 /* create memory encryption context */ 1578 ret = __sev_issue_cmd(argp->sev_fd, SEV_CMD_RECEIVE_START, &start, 1579 error); 1580 if (ret) 1581 goto e_free_session; 1582 1583 /* Bind ASID to this guest */ 1584 ret = sev_bind_asid(kvm, start.handle, error); 1585 if (ret) { 1586 sev_decommission(start.handle); 1587 goto e_free_session; 1588 } 1589 1590 params.handle = start.handle; 1591 if (copy_to_user((void __user *)(uintptr_t)argp->data, 1592 ¶ms, sizeof(struct kvm_sev_receive_start))) { 1593 ret = -EFAULT; 1594 sev_unbind_asid(kvm, start.handle); 1595 goto e_free_session; 1596 } 1597 1598 sev->handle = start.handle; 1599 sev->fd = argp->sev_fd; 1600 1601e_free_session: 1602 kfree(session_data); 1603e_free_pdh: 1604 kfree(pdh_data); 1605 1606 return ret; 1607} 1608 1609static int sev_receive_update_data(struct kvm *kvm, struct kvm_sev_cmd *argp) 1610{ 1611 struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; 1612 struct kvm_sev_receive_update_data params; 1613 struct sev_data_receive_update_data data; 1614 void *hdr = NULL, *trans = NULL; 1615 struct page **guest_page; 1616 unsigned long n; 1617 int ret, offset; 1618 1619 if (!sev_guest(kvm)) 1620 return -EINVAL; 1621 1622 if (copy_from_user(¶ms, (void __user *)(uintptr_t)argp->data, 1623 sizeof(struct kvm_sev_receive_update_data))) 1624 return -EFAULT; 1625 1626 if (!params.hdr_uaddr || !params.hdr_len || 1627 !params.guest_uaddr || !params.guest_len || 1628 !params.trans_uaddr || !params.trans_len) 1629 return -EINVAL; 1630 1631 /* Check if we are crossing the page boundary */ 1632 offset = params.guest_uaddr & (PAGE_SIZE - 1); 1633 if ((params.guest_len + offset > PAGE_SIZE)) 1634 return -EINVAL; 1635 1636 hdr = psp_copy_user_blob(params.hdr_uaddr, params.hdr_len); 1637 if (IS_ERR(hdr)) 1638 return PTR_ERR(hdr); 1639 1640 trans = psp_copy_user_blob(params.trans_uaddr, params.trans_len); 1641 if (IS_ERR(trans)) { 1642 ret = PTR_ERR(trans); 1643 goto e_free_hdr; 1644 } 1645 1646 memset(&data, 0, sizeof(data)); 1647 data.hdr_address = __psp_pa(hdr); 1648 data.hdr_len = params.hdr_len; 1649 data.trans_address = __psp_pa(trans); 1650 data.trans_len = params.trans_len; 1651 1652 /* Pin guest memory */ 1653 guest_page = sev_pin_memory(kvm, params.guest_uaddr & PAGE_MASK, 1654 PAGE_SIZE, &n, 1); 1655 if (IS_ERR(guest_page)) { 1656 ret = PTR_ERR(guest_page); 1657 goto e_free_trans; 1658 } 1659 1660 /* 1661 * Flush (on non-coherent CPUs) before RECEIVE_UPDATE_DATA, the PSP 1662 * encrypts the written data with the guest's key, and the cache may 1663 * contain dirty, unencrypted data. 1664 */ 1665 sev_clflush_pages(guest_page, n); 1666 1667 /* The RECEIVE_UPDATE_DATA command requires C-bit to be always set. */ 1668 data.guest_address = (page_to_pfn(guest_page[0]) << PAGE_SHIFT) + offset; 1669 data.guest_address |= sev_me_mask; 1670 data.guest_len = params.guest_len; 1671 data.handle = sev->handle; 1672 1673 ret = sev_issue_cmd(kvm, SEV_CMD_RECEIVE_UPDATE_DATA, &data, 1674 &argp->error); 1675 1676 sev_unpin_memory(kvm, guest_page, n); 1677 1678e_free_trans: 1679 kfree(trans); 1680e_free_hdr: 1681 kfree(hdr); 1682 1683 return ret; 1684} 1685 1686static int sev_receive_finish(struct kvm *kvm, struct kvm_sev_cmd *argp) 1687{ 1688 struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; 1689 struct sev_data_receive_finish data; 1690 1691 if (!sev_guest(kvm)) 1692 return -ENOTTY; 1693 1694 data.handle = sev->handle; 1695 return sev_issue_cmd(kvm, SEV_CMD_RECEIVE_FINISH, &data, &argp->error); 1696} 1697 1698static bool is_cmd_allowed_from_mirror(u32 cmd_id) 1699{ 1700 /* 1701 * Allow mirrors VM to call KVM_SEV_LAUNCH_UPDATE_VMSA to enable SEV-ES 1702 * active mirror VMs. Also allow the debugging and status commands. 1703 */ 1704 if (cmd_id == KVM_SEV_LAUNCH_UPDATE_VMSA || 1705 cmd_id == KVM_SEV_GUEST_STATUS || cmd_id == KVM_SEV_DBG_DECRYPT || 1706 cmd_id == KVM_SEV_DBG_ENCRYPT) 1707 return true; 1708 1709 return false; 1710} 1711 1712static int sev_lock_two_vms(struct kvm *dst_kvm, struct kvm *src_kvm) 1713{ 1714 struct kvm_sev_info *dst_sev = &to_kvm_svm(dst_kvm)->sev_info; 1715 struct kvm_sev_info *src_sev = &to_kvm_svm(src_kvm)->sev_info; 1716 int r = -EBUSY; 1717 1718 if (dst_kvm == src_kvm) 1719 return -EINVAL; 1720 1721 /* 1722 * Bail if these VMs are already involved in a migration to avoid 1723 * deadlock between two VMs trying to migrate to/from each other. 1724 */ 1725 if (atomic_cmpxchg_acquire(&dst_sev->migration_in_progress, 0, 1)) 1726 return -EBUSY; 1727 1728 if (atomic_cmpxchg_acquire(&src_sev->migration_in_progress, 0, 1)) 1729 goto release_dst; 1730 1731 r = -EINTR; 1732 if (mutex_lock_killable(&dst_kvm->lock)) 1733 goto release_src; 1734 if (mutex_lock_killable_nested(&src_kvm->lock, SINGLE_DEPTH_NESTING)) 1735 goto unlock_dst; 1736 return 0; 1737 1738unlock_dst: 1739 mutex_unlock(&dst_kvm->lock); 1740release_src: 1741 atomic_set_release(&src_sev->migration_in_progress, 0); 1742release_dst: 1743 atomic_set_release(&dst_sev->migration_in_progress, 0); 1744 return r; 1745} 1746 1747static void sev_unlock_two_vms(struct kvm *dst_kvm, struct kvm *src_kvm) 1748{ 1749 struct kvm_sev_info *dst_sev = &to_kvm_svm(dst_kvm)->sev_info; 1750 struct kvm_sev_info *src_sev = &to_kvm_svm(src_kvm)->sev_info; 1751 1752 mutex_unlock(&dst_kvm->lock); 1753 mutex_unlock(&src_kvm->lock); 1754 atomic_set_release(&dst_sev->migration_in_progress, 0); 1755 atomic_set_release(&src_sev->migration_in_progress, 0); 1756} 1757 1758/* vCPU mutex subclasses. */ 1759enum sev_migration_role { 1760 SEV_MIGRATION_SOURCE = 0, 1761 SEV_MIGRATION_TARGET, 1762 SEV_NR_MIGRATION_ROLES, 1763}; 1764 1765static int sev_lock_vcpus_for_migration(struct kvm *kvm, 1766 enum sev_migration_role role) 1767{ 1768 struct kvm_vcpu *vcpu; 1769 unsigned long i, j; 1770 bool first = true; 1771 1772 kvm_for_each_vcpu(i, vcpu, kvm) { 1773 if (mutex_lock_killable_nested(&vcpu->mutex, role)) 1774 goto out_unlock; 1775 1776 if (first) { 1777 /* 1778 * Reset the role to one that avoids colliding with 1779 * the role used for the first vcpu mutex. 1780 */ 1781 role = SEV_NR_MIGRATION_ROLES; 1782 first = false; 1783 } else { 1784 mutex_release(&vcpu->mutex.dep_map, _THIS_IP_); 1785 } 1786 } 1787 1788 return 0; 1789 1790out_unlock: 1791 1792 first = true; 1793 kvm_for_each_vcpu(j, vcpu, kvm) { 1794 if (i == j) 1795 break; 1796 1797 if (first) 1798 first = false; 1799 else 1800 mutex_acquire(&vcpu->mutex.dep_map, role, 0, _THIS_IP_); 1801 1802 1803 mutex_unlock(&vcpu->mutex); 1804 } 1805 return -EINTR; 1806} 1807 1808static void sev_unlock_vcpus_for_migration(struct kvm *kvm) 1809{ 1810 struct kvm_vcpu *vcpu; 1811 unsigned long i; 1812 bool first = true; 1813 1814 kvm_for_each_vcpu(i, vcpu, kvm) { 1815 if (first) 1816 first = false; 1817 else 1818 mutex_acquire(&vcpu->mutex.dep_map, 1819 SEV_NR_MIGRATION_ROLES, 0, _THIS_IP_); 1820 1821 mutex_unlock(&vcpu->mutex); 1822 } 1823} 1824 1825static void sev_migrate_from(struct kvm *dst_kvm, struct kvm *src_kvm) 1826{ 1827 struct kvm_sev_info *dst = &to_kvm_svm(dst_kvm)->sev_info; 1828 struct kvm_sev_info *src = &to_kvm_svm(src_kvm)->sev_info; 1829 struct kvm_vcpu *dst_vcpu, *src_vcpu; 1830 struct vcpu_svm *dst_svm, *src_svm; 1831 struct kvm_sev_info *mirror; 1832 unsigned long i; 1833 1834 dst->active = true; 1835 dst->asid = src->asid; 1836 dst->handle = src->handle; 1837 dst->pages_locked = src->pages_locked; 1838 dst->enc_context_owner = src->enc_context_owner; 1839 dst->es_active = src->es_active; 1840 1841 src->asid = 0; 1842 src->active = false; 1843 src->handle = 0; 1844 src->pages_locked = 0; 1845 src->enc_context_owner = NULL; 1846 src->es_active = false; 1847 1848 list_cut_before(&dst->regions_list, &src->regions_list, &src->regions_list); 1849 1850 /* 1851 * If this VM has mirrors, "transfer" each mirror's refcount of the 1852 * source to the destination (this KVM). The caller holds a reference 1853 * to the source, so there's no danger of use-after-free. 1854 */ 1855 list_cut_before(&dst->mirror_vms, &src->mirror_vms, &src->mirror_vms); 1856 list_for_each_entry(mirror, &dst->mirror_vms, mirror_entry) { 1857 kvm_get_kvm(dst_kvm); 1858 kvm_put_kvm(src_kvm); 1859 mirror->enc_context_owner = dst_kvm; 1860 } 1861 1862 /* 1863 * If this VM is a mirror, remove the old mirror from the owners list 1864 * and add the new mirror to the list. 1865 */ 1866 if (is_mirroring_enc_context(dst_kvm)) { 1867 struct kvm_sev_info *owner_sev_info = 1868 &to_kvm_svm(dst->enc_context_owner)->sev_info; 1869 1870 list_del(&src->mirror_entry); 1871 list_add_tail(&dst->mirror_entry, &owner_sev_info->mirror_vms); 1872 } 1873 1874 kvm_for_each_vcpu(i, dst_vcpu, dst_kvm) { 1875 dst_svm = to_svm(dst_vcpu); 1876 1877 sev_init_vmcb(dst_svm); 1878 1879 if (!dst->es_active) 1880 continue; 1881 1882 /* 1883 * Note, the source is not required to have the same number of 1884 * vCPUs as the destination when migrating a vanilla SEV VM. 1885 */ 1886 src_vcpu = kvm_get_vcpu(dst_kvm, i); 1887 src_svm = to_svm(src_vcpu); 1888 1889 /* 1890 * Transfer VMSA and GHCB state to the destination. Nullify and 1891 * clear source fields as appropriate, the state now belongs to 1892 * the destination. 1893 */ 1894 memcpy(&dst_svm->sev_es, &src_svm->sev_es, sizeof(src_svm->sev_es)); 1895 dst_svm->vmcb->control.ghcb_gpa = src_svm->vmcb->control.ghcb_gpa; 1896 dst_svm->vmcb->control.vmsa_pa = src_svm->vmcb->control.vmsa_pa; 1897 dst_vcpu->arch.guest_state_protected = true; 1898 1899 memset(&src_svm->sev_es, 0, sizeof(src_svm->sev_es)); 1900 src_svm->vmcb->control.ghcb_gpa = INVALID_PAGE; 1901 src_svm->vmcb->control.vmsa_pa = INVALID_PAGE; 1902 src_vcpu->arch.guest_state_protected = false; 1903 } 1904} 1905 1906static int sev_check_source_vcpus(struct kvm *dst, struct kvm *src) 1907{ 1908 struct kvm_vcpu *src_vcpu; 1909 unsigned long i; 1910 1911 if (!sev_es_guest(src)) 1912 return 0; 1913 1914 if (atomic_read(&src->online_vcpus) != atomic_read(&dst->online_vcpus)) 1915 return -EINVAL; 1916 1917 kvm_for_each_vcpu(i, src_vcpu, src) { 1918 if (!src_vcpu->arch.guest_state_protected) 1919 return -EINVAL; 1920 } 1921 1922 return 0; 1923} 1924 1925int sev_vm_move_enc_context_from(struct kvm *kvm, unsigned int source_fd) 1926{ 1927 struct kvm_sev_info *dst_sev = &to_kvm_svm(kvm)->sev_info; 1928 struct kvm_sev_info *src_sev, *cg_cleanup_sev; 1929 struct file *source_kvm_file; 1930 struct kvm *source_kvm; 1931 bool charged = false; 1932 int ret; 1933 1934 source_kvm_file = fget(source_fd); 1935 if (!file_is_kvm(source_kvm_file)) { 1936 ret = -EBADF; 1937 goto out_fput; 1938 } 1939 1940 source_kvm = source_kvm_file->private_data; 1941 ret = sev_lock_two_vms(kvm, source_kvm); 1942 if (ret) 1943 goto out_fput; 1944 1945 if (sev_guest(kvm) || !sev_guest(source_kvm)) { 1946 ret = -EINVAL; 1947 goto out_unlock; 1948 } 1949 1950 src_sev = &to_kvm_svm(source_kvm)->sev_info; 1951 1952 dst_sev->misc_cg = get_current_misc_cg(); 1953 cg_cleanup_sev = dst_sev; 1954 if (dst_sev->misc_cg != src_sev->misc_cg) { 1955 ret = sev_misc_cg_try_charge(dst_sev); 1956 if (ret) 1957 goto out_dst_cgroup; 1958 charged = true; 1959 } 1960 1961 ret = sev_lock_vcpus_for_migration(kvm, SEV_MIGRATION_SOURCE); 1962 if (ret) 1963 goto out_dst_cgroup; 1964 ret = sev_lock_vcpus_for_migration(source_kvm, SEV_MIGRATION_TARGET); 1965 if (ret) 1966 goto out_dst_vcpu; 1967 1968 ret = sev_check_source_vcpus(kvm, source_kvm); 1969 if (ret) 1970 goto out_source_vcpu; 1971 1972 sev_migrate_from(kvm, source_kvm); 1973 kvm_vm_dead(source_kvm); 1974 cg_cleanup_sev = src_sev; 1975 ret = 0; 1976 1977out_source_vcpu: 1978 sev_unlock_vcpus_for_migration(source_kvm); 1979out_dst_vcpu: 1980 sev_unlock_vcpus_for_migration(kvm); 1981out_dst_cgroup: 1982 /* Operates on the source on success, on the destination on failure. */ 1983 if (charged) 1984 sev_misc_cg_uncharge(cg_cleanup_sev); 1985 put_misc_cg(cg_cleanup_sev->misc_cg); 1986 cg_cleanup_sev->misc_cg = NULL; 1987out_unlock: 1988 sev_unlock_two_vms(kvm, source_kvm); 1989out_fput: 1990 if (source_kvm_file) 1991 fput(source_kvm_file); 1992 return ret; 1993} 1994 1995static void *snp_context_create(struct kvm *kvm, struct kvm_sev_cmd *argp) 1996{ 1997 void *context = NULL, *certs_data = NULL, *resp_page = NULL; 1998 struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; 1999 struct sev_data_snp_gctx_create data = {}; 2000 int rc; 2001 2002 /* Allocate memory used for the certs data in SNP guest request */ 2003 certs_data = kmalloc(SEV_FW_BLOB_MAX_SIZE, GFP_KERNEL_ACCOUNT); 2004 if (!certs_data) 2005 return NULL; 2006 2007 /* Allocate memory for context page */ 2008 context = snp_alloc_firmware_page(GFP_KERNEL_ACCOUNT); 2009 if (!context) 2010 goto e_free; 2011 2012 /* Allocate a firmware buffer used during the guest command handling. */ 2013 resp_page = snp_alloc_firmware_page(GFP_KERNEL_ACCOUNT); 2014 if (!resp_page) 2015 goto e_free; 2016 2017 data.gctx_paddr = __psp_pa(context); 2018 rc = __sev_issue_cmd(argp->sev_fd, SEV_CMD_SNP_GCTX_CREATE, &data, &argp->error); 2019 if (rc) 2020 goto e_free; 2021 2022 sev->snp_certs_data = certs_data; 2023 2024 return context; 2025 2026e_free: 2027 snp_free_firmware_page(context); 2028 kfree(certs_data); 2029 return NULL; 2030} 2031 2032static int snp_bind_asid(struct kvm *kvm, int *error) 2033{ 2034 struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; 2035 struct sev_data_snp_activate data = {0}; 2036 2037 data.gctx_paddr = __psp_pa(sev->snp_context); 2038 data.asid = sev_get_asid(kvm); 2039 return sev_issue_cmd(kvm, SEV_CMD_SNP_ACTIVATE, &data, error); 2040} 2041 2042static int snp_launch_start(struct kvm *kvm, struct kvm_sev_cmd *argp) 2043{ 2044 struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; 2045 struct sev_data_snp_launch_start start = {0}; 2046 struct kvm_sev_snp_launch_start params; 2047 int rc; 2048 2049 if (!sev_snp_guest(kvm)) 2050 return -ENOTTY; 2051 2052 if (copy_from_user(¶ms, (void __user *)(uintptr_t)argp->data, sizeof(params))) 2053 return -EFAULT; 2054 2055 sev->snp_context = snp_context_create(kvm, argp); 2056 if (!sev->snp_context) 2057 return -ENOTTY; 2058 2059 start.gctx_paddr = __psp_pa(sev->snp_context); 2060 start.policy = params.policy; 2061 memcpy(start.gosvw, params.gosvw, sizeof(params.gosvw)); 2062 rc = __sev_issue_cmd(argp->sev_fd, SEV_CMD_SNP_LAUNCH_START, &start, &argp->error); 2063 if (rc) 2064 goto e_free_context; 2065 2066 sev->fd = argp->sev_fd; 2067 rc = snp_bind_asid(kvm, &argp->error); 2068 if (rc) 2069 goto e_free_context; 2070 2071 return 0; 2072 2073e_free_context: 2074 snp_decommission_context(kvm); 2075 2076 return rc; 2077} 2078 2079static bool is_hva_registered(struct kvm *kvm, hva_t hva, size_t len) 2080{ 2081 struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; 2082 struct list_head *head = &sev->regions_list; 2083 struct enc_region *i; 2084 2085 lockdep_assert_held(&kvm->lock); 2086 2087 list_for_each_entry(i, head, list) { 2088 u64 start = i->uaddr; 2089 u64 end = start + i->size; 2090 2091 if (start <= hva && end >= (hva + len)) 2092 return true; 2093 } 2094 2095 return false; 2096} 2097 2098static int snp_mark_unmergable(struct kvm *kvm, u64 start, u64 size) 2099{ 2100 struct vm_area_struct *vma; 2101 u64 end = start + size; 2102 int ret; 2103 2104 do { 2105 vma = find_vma_intersection(kvm->mm, start, end); 2106 if (!vma) { 2107 ret = -EINVAL; 2108 break; 2109 } 2110 2111 ret = ksm_madvise(vma, vma->vm_start, vma->vm_end, 2112 MADV_UNMERGEABLE, &vma->vm_flags); 2113 if (ret) 2114 break; 2115 2116 start = vma->vm_end; 2117 } while (end > vma->vm_end); 2118 2119 return ret; 2120} 2121 2122static int snp_launch_update(struct kvm *kvm, struct kvm_sev_cmd *argp) 2123{ 2124 struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; 2125 struct sev_data_snp_launch_update data = {0}; 2126 struct kvm_sev_snp_launch_update params; 2127 unsigned long npages, pfn, n = 0; 2128 int *error = &argp->error; 2129 struct page **inpages; 2130 int ret, i, level; 2131 u64 gfn; 2132 2133 if (!sev_snp_guest(kvm)) 2134 return -ENOTTY; 2135 2136 if (!sev->snp_context) 2137 return -EINVAL; 2138 2139 if (copy_from_user(¶ms, (void __user *)(uintptr_t)argp->data, sizeof(params))) 2140 return -EFAULT; 2141 2142 /* Verify that the specified address range is registered. */ 2143 if (!is_hva_registered(kvm, params.uaddr, params.len)) 2144 return -EINVAL; 2145 2146 mmap_write_lock(kvm->mm); 2147 ret = snp_mark_unmergable(kvm, params.uaddr, params.len); 2148 mmap_write_unlock(kvm->mm); 2149 if (ret) 2150 return -EFAULT; 2151 2152 /* 2153 * The userspace memory is already locked so technically we don't 2154 * need to lock it again. Later part of the function needs to know 2155 * pfn so call the sev_pin_memory() so that we can get the list of 2156 * pages to iterate through. 2157 */ 2158 inpages = sev_pin_memory(kvm, params.uaddr, params.len, &npages, 1); 2159 if (!inpages) 2160 return -ENOMEM; 2161 2162 /* 2163 * Verify that all the pages are marked shared in the RMP table before 2164 * going further. This is avoid the cases where the userspace may try 2165 * updating the same page twice. 2166 */ 2167 for (i = 0; i < npages; i++) { 2168 if (snp_lookup_rmpentry(page_to_pfn(inpages[i]), &level) != 0) { 2169 sev_unpin_memory(kvm, inpages, npages); 2170 return -EFAULT; 2171 } 2172 } 2173 2174 gfn = params.start_gfn; 2175 level = PG_LEVEL_4K; 2176 data.gctx_paddr = __psp_pa(sev->snp_context); 2177 2178 for (i = 0; i < npages; i++) { 2179 pfn = page_to_pfn(inpages[i]); 2180 2181 ret = rmp_make_private(pfn, gfn << PAGE_SHIFT, level, sev_get_asid(kvm), true); 2182 if (ret) { 2183 ret = -EFAULT; 2184 goto e_unpin; 2185 } 2186 2187 n++; 2188 data.address = __sme_page_pa(inpages[i]); 2189 data.page_size = X86_TO_RMP_PG_LEVEL(level); 2190 data.page_type = params.page_type; 2191 data.vmpl3_perms = params.vmpl3_perms; 2192 data.vmpl2_perms = params.vmpl2_perms; 2193 data.vmpl1_perms = params.vmpl1_perms; 2194 ret = __sev_issue_cmd(argp->sev_fd, SEV_CMD_SNP_LAUNCH_UPDATE, &data, error); 2195 if (ret) { 2196 /* 2197 * If the command failed then need to reclaim the page. 2198 */ 2199 snp_page_reclaim(pfn); 2200 goto e_unpin; 2201 } 2202 2203 gfn++; 2204 } 2205 2206e_unpin: 2207 /* Content of memory is updated, mark pages dirty */ 2208 for (i = 0; i < n; i++) { 2209 set_page_dirty_lock(inpages[i]); 2210 mark_page_accessed(inpages[i]); 2211 2212 /* 2213 * If its an error, then update RMP entry to change page ownership 2214 * to the hypervisor. 2215 */ 2216 if (ret) 2217 host_rmp_make_shared(pfn, level, true); 2218 } 2219 2220 /* Unlock the user pages */ 2221 sev_unpin_memory(kvm, inpages, npages); 2222 2223 return ret; 2224} 2225 2226static int rmpupdate_noremap(u64 pfn, struct rmpupdate *val) 2227{ 2228 unsigned long paddr = pfn << PAGE_SHIFT; 2229 int ret, level, npages; 2230 int retries = 0; 2231 2232 if (!pfn_valid(pfn)) 2233 return -EINVAL; 2234 2235 if (!cpu_feature_enabled(X86_FEATURE_SEV_SNP)) 2236 return -ENXIO; 2237 2238 level = RMP_TO_X86_PG_LEVEL(val->pagesize); 2239 npages = page_level_size(level) / PAGE_SIZE; 2240 2241 2242retry: 2243 /* Binutils version 2.36 supports the RMPUPDATE mnemonic. */ 2244 asm volatile(".byte 0xF2, 0x0F, 0x01, 0xFE" 2245 : "=a"(ret) 2246 : "a"(paddr), "c"((unsigned long)val) 2247 : "memory", "cc"); 2248 2249 if (ret) { 2250 if (!retries) { 2251 pr_err("rmpupdate failed, ret: %d, pfn: %llx, npages: %d, level: %d, retrying (max: %d)...\n", 2252 ret, pfn, npages, level, 2 * num_present_cpus()); 2253 dump_stack(); 2254 } 2255 retries++; 2256 if (retries < 2 * num_present_cpus()) 2257 goto retry; 2258 } else if (retries > 0) { 2259 pr_err("rmpupdate for pfn %llx succeeded after %d retries\n", pfn, retries); 2260 } 2261 2262 return ret; 2263} 2264 2265int rmp_make_private_noremap(u64 pfn, u64 gpa, enum pg_level level, int asid, bool immutable) 2266{ 2267 struct rmpupdate val; 2268 2269 if (!pfn_valid(pfn)) 2270 return -EINVAL; 2271 2272 memset(&val, 0, sizeof(val)); 2273 val.assigned = 1; 2274 val.asid = asid; 2275 val.immutable = immutable; 2276 val.gpa = gpa; 2277 val.pagesize = X86_TO_RMP_PG_LEVEL(level); 2278 2279 return rmpupdate_noremap(pfn, &val); 2280} 2281 2282static int snp_launch_update_vmsa(struct kvm *kvm, struct kvm_sev_cmd *argp) 2283{ 2284 struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; 2285 struct sev_data_snp_launch_update data = {}; 2286 int i, ret; 2287 2288 data.gctx_paddr = __psp_pa(sev->snp_context); 2289 data.page_type = SNP_PAGE_TYPE_VMSA; 2290 2291 for (i = 0; i < kvm->created_vcpus; i++) { 2292 struct vcpu_svm *svm = to_svm(xa_load(&kvm->vcpu_array, i)); 2293 u64 pfn = __pa(svm->sev_es.vmsa) >> PAGE_SHIFT; 2294 2295 /* Perform some pre-encryption checks against the VMSA */ 2296 ret = sev_es_sync_vmsa(svm); 2297 if (ret) 2298 return ret; 2299 2300 /* Transition the VMSA page to a firmware state. */ 2301 ret = rmp_make_private_noremap(pfn, -1, PG_LEVEL_4K, sev->asid, true); 2302 if (ret) 2303 return ret; 2304 2305 /* Issue the SNP command to encrypt the VMSA */ 2306 data.address = __sme_pa(svm->sev_es.vmsa); 2307 ret = __sev_issue_cmd(argp->sev_fd, SEV_CMD_SNP_LAUNCH_UPDATE, 2308 &data, &argp->error); 2309 if (ret) { 2310 snp_page_reclaim(pfn); 2311 return ret; 2312 } 2313 2314 svm->vcpu.arch.guest_state_protected = true; 2315 } 2316 2317 return 0; 2318} 2319 2320static int snp_launch_finish(struct kvm *kvm, struct kvm_sev_cmd *argp) 2321{ 2322 struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; 2323 struct sev_data_snp_launch_finish *data; 2324 void *id_block = NULL, *id_auth = NULL; 2325 struct kvm_sev_snp_launch_finish params; 2326 int ret; 2327 2328 if (!sev_snp_guest(kvm)) 2329 return -ENOTTY; 2330 2331 if (!sev->snp_context) 2332 return -EINVAL; 2333 2334 if (copy_from_user(¶ms, (void __user *)(uintptr_t)argp->data, sizeof(params))) 2335 return -EFAULT; 2336 2337 /* Measure all vCPUs using LAUNCH_UPDATE before we finalize the launch flow. */ 2338 ret = snp_launch_update_vmsa(kvm, argp); 2339 if (ret) 2340 return ret; 2341 2342 data = kzalloc(sizeof(*data), GFP_KERNEL_ACCOUNT); 2343 if (!data) 2344 return -ENOMEM; 2345 2346 if (params.id_block_en) { 2347 id_block = psp_copy_user_blob(params.id_block_uaddr, KVM_SEV_SNP_ID_BLOCK_SIZE); 2348 if (IS_ERR(id_block)) { 2349 ret = PTR_ERR(id_block); 2350 goto e_free; 2351 } 2352 2353 data->id_block_en = 1; 2354 data->id_block_paddr = __sme_pa(id_block); 2355 } 2356 2357 if (params.auth_key_en) { 2358 id_auth = psp_copy_user_blob(params.id_auth_uaddr, KVM_SEV_SNP_ID_AUTH_SIZE); 2359 if (IS_ERR(id_auth)) { 2360 ret = PTR_ERR(id_auth); 2361 goto e_free_id_block; 2362 } 2363 2364 data->auth_key_en = 1; 2365 data->id_auth_paddr = __sme_pa(id_auth); 2366 } 2367 2368 data->gctx_paddr = __psp_pa(sev->snp_context); 2369 ret = sev_issue_cmd(kvm, SEV_CMD_SNP_LAUNCH_FINISH, data, &argp->error); 2370 2371 kfree(id_auth); 2372 2373e_free_id_block: 2374 kfree(id_block); 2375 2376e_free: 2377 kfree(data); 2378 2379 return ret; 2380} 2381 2382uint64_t 2383cpc_read_rip(struct kvm *kvm, uint64_t *rip) 2384{ 2385 struct kvm_sev_info *sev; 2386 struct vcpu_svm *svm; 2387 struct kvm_vcpu *vcpu; 2388 hpa_t src_pa, dst_pa; 2389 void *vmsa; 2390 int error; 2391 int ret; 2392 2393 if (xa_empty(&kvm->vcpu_array)) 2394 return -EFAULT; 2395 2396 vcpu = xa_load(&kvm->vcpu_array, 0); 2397 2398 if (sev_es_guest(kvm)) { 2399 sev = &to_kvm_svm(kvm)->sev_info; 2400 svm = to_svm(vcpu); 2401 2402 vmsa = kmalloc(PAGE_SIZE, GFP_KERNEL); 2403 if (!vmsa) return -ENOMEM; 2404 memset(vmsa, 0, PAGE_SIZE); 2405 2406 src_pa = __pa(svm->sev_es.vmsa); 2407 dst_pa = __pa(vmsa); 2408 if (sev->snp_active) { 2409 ret = snp_guest_dbg_decrypt_page( 2410 __pa(sev->snp_context) >> PAGE_SHIFT, 2411 src_pa >> PAGE_SHIFT, dst_pa >> PAGE_SHIFT, 2412 &error); 2413 } else { 2414 ret = __sev_dbg_decrypt(kvm, src_pa, dst_pa, 2415 PAGE_SIZE, &error); 2416 } 2417 2418 *rip = *(uint64_t *)(vmsa + 0x178); 2419 2420 kfree(vmsa); 2421 2422 if (ret) return ret; 2423 } else { 2424 *rip = kvm_rip_read(vcpu); 2425 } 2426 2427 return 0; 2428} 2429 2430static int 2431sev_cachepc_ioctl(struct kvm *kvm, struct kvm_sev_cmd *sev_cmd) 2432{ 2433 struct cpc_sev_cmd cmd; 2434 int ret; 2435 2436 if (copy_from_user(&cmd, (void *)sev_cmd->data, sizeof(cmd))) 2437 return -EFAULT; 2438 2439 if (cmd.id == SEV_CPC_GET_RIP) { 2440 ret = cpc_read_rip(kvm, &cmd.data); 2441 if (ret) return ret; 2442 } else { 2443 CPC_ERR("Unknown cachepc sev cmd: %i\n", cmd.id); 2444 } 2445 2446 if (copy_to_user((void *)sev_cmd->data, &cmd, sizeof(cmd))) 2447 return -EFAULT; 2448 2449 return 0; 2450} 2451 2452int sev_mem_enc_ioctl(struct kvm *kvm, void __user *argp) 2453{ 2454 struct kvm_sev_cmd sev_cmd; 2455 int r; 2456 2457 if (!sev_enabled) 2458 return -ENOTTY; 2459 2460 if (!argp) 2461 return 0; 2462 2463 if (copy_from_user(&sev_cmd, argp, sizeof(struct kvm_sev_cmd))) 2464 return -EFAULT; 2465 2466 mutex_lock(&kvm->lock); 2467 2468 /* Only the enc_context_owner handles some memory enc operations. */ 2469 if (is_mirroring_enc_context(kvm) && 2470 !is_cmd_allowed_from_mirror(sev_cmd.id)) { 2471 r = -EINVAL; 2472 goto out; 2473 } 2474 2475 switch (sev_cmd.id) { 2476 case KVM_SEV_SNP_INIT: 2477 if (!sev_snp_enabled) { 2478 r = -ENOTTY; 2479 goto out; 2480 } 2481 fallthrough; 2482 case KVM_SEV_ES_INIT: 2483 if (!sev_es_enabled) { 2484 r = -ENOTTY; 2485 goto out; 2486 } 2487 fallthrough; 2488 case KVM_SEV_INIT: 2489 r = sev_guest_init(kvm, &sev_cmd); 2490 break; 2491 case KVM_SEV_LAUNCH_START: 2492 r = sev_launch_start(kvm, &sev_cmd); 2493 break; 2494 case KVM_SEV_LAUNCH_UPDATE_DATA: 2495 r = sev_launch_update_data(kvm, &sev_cmd); 2496 break; 2497 case KVM_SEV_LAUNCH_UPDATE_VMSA: 2498 r = sev_launch_update_vmsa(kvm, &sev_cmd); 2499 break; 2500 case KVM_SEV_LAUNCH_MEASURE: 2501 r = sev_launch_measure(kvm, &sev_cmd); 2502 break; 2503 case KVM_SEV_LAUNCH_FINISH: 2504 r = sev_launch_finish(kvm, &sev_cmd); 2505 break; 2506 case KVM_SEV_GUEST_STATUS: 2507 r = sev_guest_status(kvm, &sev_cmd); 2508 break; 2509 case KVM_SEV_DBG_DECRYPT: 2510 if (sev_snp_guest(kvm)) 2511 r = snp_dbg_decrypt_vmsa(kvm, &sev_cmd); 2512 else 2513 r = sev_dbg_crypt(kvm, &sev_cmd, true); 2514 break; 2515 case KVM_SEV_DBG_ENCRYPT: 2516 r = sev_dbg_crypt(kvm, &sev_cmd, false); 2517 break; 2518 case KVM_SEV_LAUNCH_SECRET: 2519 r = sev_launch_secret(kvm, &sev_cmd); 2520 break; 2521 case KVM_SEV_GET_ATTESTATION_REPORT: 2522 r = sev_get_attestation_report(kvm, &sev_cmd); 2523 break; 2524 case KVM_SEV_SEND_START: 2525 r = sev_send_start(kvm, &sev_cmd); 2526 break; 2527 case KVM_SEV_SEND_UPDATE_DATA: 2528 r = sev_send_update_data(kvm, &sev_cmd); 2529 break; 2530 case KVM_SEV_SEND_FINISH: 2531 r = sev_send_finish(kvm, &sev_cmd); 2532 break; 2533 case KVM_SEV_SEND_CANCEL: 2534 r = sev_send_cancel(kvm, &sev_cmd); 2535 break; 2536 case KVM_SEV_RECEIVE_START: 2537 r = sev_receive_start(kvm, &sev_cmd); 2538 break; 2539 case KVM_SEV_RECEIVE_UPDATE_DATA: 2540 r = sev_receive_update_data(kvm, &sev_cmd); 2541 break; 2542 case KVM_SEV_RECEIVE_FINISH: 2543 r = sev_receive_finish(kvm, &sev_cmd); 2544 break; 2545 case KVM_SEV_SNP_LAUNCH_START: 2546 r = snp_launch_start(kvm, &sev_cmd); 2547 break; 2548 case KVM_SEV_SNP_LAUNCH_UPDATE: 2549 r = snp_launch_update(kvm, &sev_cmd); 2550 break; 2551 case KVM_SEV_SNP_LAUNCH_FINISH: 2552 r = snp_launch_finish(kvm, &sev_cmd); 2553 break; 2554 case KVM_SEV_CACHEPC: 2555 r = sev_cachepc_ioctl(kvm, &sev_cmd); 2556 break; 2557 default: 2558 r = -EINVAL; 2559 goto out; 2560 } 2561 2562 if (copy_to_user(argp, &sev_cmd, sizeof(struct kvm_sev_cmd))) 2563 r = -EFAULT; 2564 2565out: 2566 mutex_unlock(&kvm->lock); 2567 return r; 2568} 2569 2570static bool is_range_hugetlb(struct kvm *kvm, struct kvm_enc_region *range) 2571{ 2572 struct vm_area_struct *vma; 2573 u64 start, end; 2574 bool ret = true; 2575 2576 start = range->addr; 2577 end = start + range->size; 2578 2579 mmap_read_lock(kvm->mm); 2580 2581 do { 2582 vma = find_vma_intersection(kvm->mm, start, end); 2583 if (!vma) 2584 goto unlock; 2585 2586 if (is_vm_hugetlb_page(vma)) 2587 goto unlock; 2588 2589 start = vma->vm_end; 2590 } while (end > vma->vm_end); 2591 2592 ret = false; 2593 2594unlock: 2595 mmap_read_unlock(kvm->mm); 2596 return ret; 2597} 2598 2599int sev_mem_enc_register_region(struct kvm *kvm, 2600 struct kvm_enc_region *range) 2601{ 2602 struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; 2603 struct enc_region *region; 2604 int ret = 0; 2605 2606 if (!sev_guest(kvm)) 2607 return -ENOTTY; 2608 2609 /* If kvm is mirroring encryption context it isn't responsible for it */ 2610 if (is_mirroring_enc_context(kvm)) 2611 return -EINVAL; 2612 2613 if (range->addr > ULONG_MAX || range->size > ULONG_MAX) 2614 return -EINVAL; 2615 2616 /* 2617 * SEV-SNP does not support the backing pages from the HugeTLB. Verify 2618 * that the registered memory range is not from the HugeTLB. 2619 */ 2620 if (sev_snp_guest(kvm) && is_range_hugetlb(kvm, range)) 2621 return -EINVAL; 2622 2623 region = kzalloc(sizeof(*region), GFP_KERNEL_ACCOUNT); 2624 if (!region) 2625 return -ENOMEM; 2626 2627 mutex_lock(&kvm->lock); 2628 region->pages = sev_pin_memory(kvm, range->addr, range->size, ®ion->npages, 1); 2629 if (IS_ERR(region->pages)) { 2630 ret = PTR_ERR(region->pages); 2631 mutex_unlock(&kvm->lock); 2632 goto e_free; 2633 } 2634 2635 region->uaddr = range->addr; 2636 region->size = range->size; 2637 2638 list_add_tail(®ion->list, &sev->regions_list); 2639 mutex_unlock(&kvm->lock); 2640 2641 /* 2642 * The guest may change the memory encryption attribute from C=0 -> C=1 2643 * or vice versa for this memory range. Lets make sure caches are 2644 * flushed to ensure that guest data gets written into memory with 2645 * correct C-bit. 2646 */ 2647 sev_clflush_pages(region->pages, region->npages); 2648 2649 return ret; 2650 2651e_free: 2652 kfree(region); 2653 return ret; 2654} 2655 2656static struct enc_region * 2657find_enc_region(struct kvm *kvm, struct kvm_enc_region *range) 2658{ 2659 struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; 2660 struct list_head *head = &sev->regions_list; 2661 struct enc_region *i; 2662 2663 list_for_each_entry(i, head, list) { 2664 if (i->uaddr == range->addr && 2665 i->size == range->size) 2666 return i; 2667 } 2668 2669 return NULL; 2670} 2671 2672static void __unregister_enc_region_locked(struct kvm *kvm, 2673 struct enc_region *region) 2674{ 2675 unsigned long i, pfn; 2676 int level; 2677 2678 /* 2679 * The guest memory pages are assigned in the RMP table. Unassign it 2680 * before releasing the memory. 2681 */ 2682 if (sev_snp_guest(kvm)) { 2683 for (i = 0; i < region->npages; i++) { 2684 pfn = page_to_pfn(region->pages[i]); 2685 2686 if (!snp_lookup_rmpentry(pfn, &level)) 2687 continue; 2688 2689 cond_resched(); 2690 2691 if (level > PG_LEVEL_4K) 2692 pfn &= ~(KVM_PAGES_PER_HPAGE(PG_LEVEL_2M) - 1); 2693 2694 host_rmp_make_shared(pfn, level, true); 2695 } 2696 } 2697 2698 sev_unpin_memory(kvm, region->pages, region->npages); 2699 list_del(®ion->list); 2700 kfree(region); 2701} 2702 2703int sev_mem_enc_unregister_region(struct kvm *kvm, 2704 struct kvm_enc_region *range) 2705{ 2706 struct enc_region *region; 2707 int ret; 2708 2709 /* If kvm is mirroring encryption context it isn't responsible for it */ 2710 if (is_mirroring_enc_context(kvm)) 2711 return -EINVAL; 2712 2713 mutex_lock(&kvm->lock); 2714 2715 if (!sev_guest(kvm)) { 2716 ret = -ENOTTY; 2717 goto failed; 2718 } 2719 2720 region = find_enc_region(kvm, range); 2721 if (!region) { 2722 ret = -EINVAL; 2723 goto failed; 2724 } 2725 2726 /* 2727 * Ensure that all guest tagged cache entries are flushed before 2728 * releasing the pages back to the system for use. CLFLUSH will 2729 * not do this, so issue a WBINVD. 2730 */ 2731 wbinvd_on_all_cpus(); 2732 2733 __unregister_enc_region_locked(kvm, region); 2734 2735 mutex_unlock(&kvm->lock); 2736 return 0; 2737 2738failed: 2739 mutex_unlock(&kvm->lock); 2740 return ret; 2741} 2742 2743int sev_vm_copy_enc_context_from(struct kvm *kvm, unsigned int source_fd) 2744{ 2745 struct file *source_kvm_file; 2746 struct kvm *source_kvm; 2747 struct kvm_sev_info *source_sev, *mirror_sev; 2748 int ret; 2749 2750 source_kvm_file = fget(source_fd); 2751 if (!file_is_kvm(source_kvm_file)) { 2752 ret = -EBADF; 2753 goto e_source_fput; 2754 } 2755 2756 source_kvm = source_kvm_file->private_data; 2757 ret = sev_lock_two_vms(kvm, source_kvm); 2758 if (ret) 2759 goto e_source_fput; 2760 2761 /* 2762 * Mirrors of mirrors should work, but let's not get silly. Also 2763 * disallow out-of-band SEV/SEV-ES init if the target is already an 2764 * SEV guest, or if vCPUs have been created. KVM relies on vCPUs being 2765 * created after SEV/SEV-ES initialization, e.g. to init intercepts. 2766 */ 2767 if (sev_guest(kvm) || !sev_guest(source_kvm) || 2768 is_mirroring_enc_context(source_kvm) || kvm->created_vcpus) { 2769 ret = -EINVAL; 2770 goto e_unlock; 2771 } 2772 2773 /* 2774 * The mirror kvm holds an enc_context_owner ref so its asid can't 2775 * disappear until we're done with it 2776 */ 2777 source_sev = &to_kvm_svm(source_kvm)->sev_info; 2778 kvm_get_kvm(source_kvm); 2779 mirror_sev = &to_kvm_svm(kvm)->sev_info; 2780 list_add_tail(&mirror_sev->mirror_entry, &source_sev->mirror_vms); 2781 2782 /* Set enc_context_owner and copy its encryption context over */ 2783 mirror_sev->enc_context_owner = source_kvm; 2784 mirror_sev->active = true; 2785 mirror_sev->asid = source_sev->asid; 2786 mirror_sev->fd = source_sev->fd; 2787 mirror_sev->es_active = source_sev->es_active; 2788 mirror_sev->handle = source_sev->handle; 2789 INIT_LIST_HEAD(&mirror_sev->regions_list); 2790 INIT_LIST_HEAD(&mirror_sev->mirror_vms); 2791 ret = 0; 2792 2793 /* 2794 * Do not copy ap_jump_table. Since the mirror does not share the same 2795 * KVM contexts as the original, and they may have different 2796 * memory-views. 2797 */ 2798 2799e_unlock: 2800 sev_unlock_two_vms(kvm, source_kvm); 2801e_source_fput: 2802 if (source_kvm_file) 2803 fput(source_kvm_file); 2804 return ret; 2805} 2806 2807static int snp_decommission_context(struct kvm *kvm) 2808{ 2809 struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; 2810 struct sev_data_snp_decommission data = {}; 2811 int ret; 2812 2813 /* If context is not created then do nothing */ 2814 if (!sev->snp_context) 2815 return 0; 2816 2817 data.gctx_paddr = __sme_pa(sev->snp_context); 2818 ret = snp_guest_decommission(&data, NULL); 2819 if (WARN_ONCE(ret, "failed to release guest context")) 2820 return ret; 2821 2822 /* free the context page now */ 2823 snp_free_firmware_page(sev->snp_context); 2824 sev->snp_context = NULL; 2825 2826 kfree(sev->snp_certs_data); 2827 2828 return 0; 2829} 2830 2831void sev_vm_destroy(struct kvm *kvm) 2832{ 2833 struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; 2834 struct list_head *head = &sev->regions_list; 2835 struct list_head *pos, *q; 2836 2837 if (!sev_guest(kvm)) 2838 return; 2839 2840 WARN_ON(!list_empty(&sev->mirror_vms)); 2841 2842 /* If this is a mirror_kvm release the enc_context_owner and skip sev cleanup */ 2843 if (is_mirroring_enc_context(kvm)) { 2844 struct kvm *owner_kvm = sev->enc_context_owner; 2845 2846 mutex_lock(&owner_kvm->lock); 2847 list_del(&sev->mirror_entry); 2848 mutex_unlock(&owner_kvm->lock); 2849 kvm_put_kvm(owner_kvm); 2850 return; 2851 } 2852 2853 /* 2854 * Ensure that all guest tagged cache entries are flushed before 2855 * releasing the pages back to the system for use. CLFLUSH will 2856 * not do this, so issue a WBINVD. 2857 */ 2858 wbinvd_on_all_cpus(); 2859 2860 /* 2861 * if userspace was terminated before unregistering the memory regions 2862 * then lets unpin all the registered memory. 2863 */ 2864 if (!list_empty(head)) { 2865 list_for_each_safe(pos, q, head) { 2866 __unregister_enc_region_locked(kvm, 2867 list_entry(pos, struct enc_region, list)); 2868 cond_resched(); 2869 } 2870 } 2871 2872 if (sev_snp_guest(kvm)) { 2873 if (snp_decommission_context(kvm)) { 2874 WARN_ONCE(1, "Failed to free SNP guest context, leaking asid!\n"); 2875 return; 2876 } 2877 } else { 2878 sev_unbind_asid(kvm, sev->handle); 2879 } 2880 2881 sev_asid_free(sev); 2882} 2883 2884void __init sev_set_cpu_caps(void) 2885{ 2886 if (!sev_enabled) 2887 kvm_cpu_cap_clear(X86_FEATURE_SEV); 2888 if (!sev_es_enabled) 2889 kvm_cpu_cap_clear(X86_FEATURE_SEV_ES); 2890} 2891 2892void __init sev_hardware_setup(void) 2893{ 2894#ifdef CONFIG_KVM_AMD_SEV 2895 unsigned int eax, ebx, ecx, edx, sev_asid_count, sev_es_asid_count; 2896 bool sev_snp_supported = false; 2897 bool sev_es_supported = false; 2898 bool sev_supported = false; 2899 2900 if (!sev_enabled || !npt_enabled) 2901 goto out; 2902 2903 /* 2904 * SEV must obviously be supported in hardware. Sanity check that the 2905 * CPU supports decode assists, which is mandatory for SEV guests to 2906 * support instruction emulation. 2907 */ 2908 if (!boot_cpu_has(X86_FEATURE_SEV) || 2909 WARN_ON_ONCE(!boot_cpu_has(X86_FEATURE_DECODEASSISTS))) 2910 goto out; 2911 2912 /* Retrieve SEV CPUID information */ 2913 cpuid(0x8000001f, &eax, &ebx, &ecx, &edx); 2914 2915 /* Set encryption bit location for SEV-ES guests */ 2916 sev_enc_bit = ebx & 0x3f; 2917 2918 /* Maximum number of encrypted guests supported simultaneously */ 2919 max_sev_asid = ecx; 2920 if (!max_sev_asid) 2921 goto out; 2922 2923 /* Minimum ASID value that should be used for SEV guest */ 2924 min_sev_asid = edx; 2925 sev_me_mask = 1UL << (ebx & 0x3f); 2926 2927 /* 2928 * Initialize SEV ASID bitmaps. Allocate space for ASID 0 in the bitmap, 2929 * even though it's never used, so that the bitmap is indexed by the 2930 * actual ASID. 2931 */ 2932 nr_asids = max_sev_asid + 1; 2933 sev_asid_bitmap = bitmap_zalloc(nr_asids, GFP_KERNEL); 2934 if (!sev_asid_bitmap) 2935 goto out; 2936 2937 sev_reclaim_asid_bitmap = bitmap_zalloc(nr_asids, GFP_KERNEL); 2938 if (!sev_reclaim_asid_bitmap) { 2939 bitmap_free(sev_asid_bitmap); 2940 sev_asid_bitmap = NULL; 2941 goto out; 2942 } 2943 2944 sev_asid_count = max_sev_asid - min_sev_asid + 1; 2945 if (misc_cg_set_capacity(MISC_CG_RES_SEV, sev_asid_count)) 2946 goto out; 2947 2948 pr_info("SEV supported: %u ASIDs\n", sev_asid_count); 2949 sev_supported = true; 2950 2951 /* SEV-ES support requested? */ 2952 if (!sev_es_enabled) 2953 goto out; 2954 2955 /* 2956 * SEV-ES requires MMIO caching as KVM doesn't have access to the guest 2957 * instruction stream, i.e. can't emulate in response to a #NPF and 2958 * instead relies on #NPF(RSVD) being reflected into the guest as #VC 2959 * (the guest can then do a #VMGEXIT to request MMIO emulation). 2960 */ 2961 if (!enable_mmio_caching) 2962 goto out; 2963 2964 /* Does the CPU support SEV-ES? */ 2965 if (!boot_cpu_has(X86_FEATURE_SEV_ES)) 2966 goto out; 2967 2968 /* Has the system been allocated ASIDs for SEV-ES? */ 2969 if (min_sev_asid == 1) 2970 goto out; 2971 2972 sev_es_asid_count = min_sev_asid - 1; 2973 if (misc_cg_set_capacity(MISC_CG_RES_SEV_ES, sev_es_asid_count)) 2974 goto out; 2975 2976 sev_es_supported = true; 2977 sev_snp_supported = sev_snp_enabled && cpu_feature_enabled(X86_FEATURE_SEV_SNP); 2978 2979 pr_info("SEV-ES %ssupported: %u ASIDs\n", 2980 sev_snp_supported ? "and SEV-SNP " : "", sev_es_asid_count); 2981 2982out: 2983 sev_enabled = sev_supported; 2984 sev_es_enabled = sev_es_supported; 2985 sev_snp_enabled = sev_snp_supported; 2986#endif 2987} 2988 2989void sev_hardware_unsetup(void) 2990{ 2991 if (!sev_enabled) 2992 return; 2993 2994 /* No need to take sev_bitmap_lock, all VMs have been destroyed. */ 2995 sev_flush_asids(1, max_sev_asid); 2996 2997 bitmap_free(sev_asid_bitmap); 2998 bitmap_free(sev_reclaim_asid_bitmap); 2999 3000 misc_cg_set_capacity(MISC_CG_RES_SEV, 0); 3001 misc_cg_set_capacity(MISC_CG_RES_SEV_ES, 0); 3002} 3003 3004int sev_cpu_init(struct svm_cpu_data *sd) 3005{ 3006 if (!sev_enabled) 3007 return 0; 3008 3009 sd->sev_vmcbs = kcalloc(nr_asids, sizeof(void *), GFP_KERNEL); 3010 if (!sd->sev_vmcbs) 3011 return -ENOMEM; 3012 3013 return 0; 3014} 3015 3016/* 3017 * Pages used by hardware to hold guest encrypted state must be flushed before 3018 * returning them to the system. 3019 */ 3020static void sev_flush_encrypted_page(struct kvm_vcpu *vcpu, void *va) 3021{ 3022 int asid = to_kvm_svm(vcpu->kvm)->sev_info.asid; 3023 3024 /* 3025 * Note! The address must be a kernel address, as regular page walk 3026 * checks are performed by VM_PAGE_FLUSH, i.e. operating on a user 3027 * address is non-deterministic and unsafe. This function deliberately 3028 * takes a pointer to deter passing in a user address. 3029 */ 3030 unsigned long addr = (unsigned long)va; 3031 3032 /* 3033 * If CPU enforced cache coherency for encrypted mappings of the 3034 * same physical page is supported, use CLFLUSHOPT instead. NOTE: cache 3035 * flush is still needed in order to work properly with DMA devices. 3036 */ 3037 if (boot_cpu_has(X86_FEATURE_SME_COHERENT)) { 3038 clflush_cache_range(va, PAGE_SIZE); 3039 return; 3040 } 3041 3042 /* 3043 * VM Page Flush takes a host virtual address and a guest ASID. Fall 3044 * back to WBINVD if this faults so as not to make any problems worse 3045 * by leaving stale encrypted data in the cache. 3046 */ 3047 if (WARN_ON_ONCE(wrmsrl_safe(MSR_AMD64_VM_PAGE_FLUSH, addr | asid))) 3048 goto do_wbinvd; 3049 3050 return; 3051 3052do_wbinvd: 3053 wbinvd_on_all_cpus(); 3054} 3055 3056void sev_free_vcpu(struct kvm_vcpu *vcpu) 3057{ 3058 struct vcpu_svm *svm; 3059 3060 if (!sev_es_guest(vcpu->kvm)) 3061 return; 3062 3063 svm = to_svm(vcpu); 3064 3065 /* 3066 * If its an SNP guest, then VMSA was added in the RMP entry as 3067 * a guest owned page. Transition the page to hypervisor state 3068 * before releasing it back to the system. 3069 * Also the page is removed from the kernel direct map, so flush it 3070 * later after it is transitioned back to hypervisor state and 3071 * restored in the direct map. 3072 */ 3073 if (sev_snp_guest(vcpu->kvm)) { 3074 u64 pfn = __pa(svm->sev_es.vmsa) >> PAGE_SHIFT; 3075 3076 if (host_rmp_make_shared(pfn, PG_LEVEL_4K, false)) 3077 goto skip_vmsa_free; 3078 } 3079 3080 if (vcpu->arch.guest_state_protected) 3081 sev_flush_encrypted_page(vcpu, svm->sev_es.vmsa); 3082 3083 __free_page(virt_to_page(svm->sev_es.vmsa)); 3084 3085skip_vmsa_free: 3086 kvfree(svm->sev_es.ghcb_sa); 3087} 3088 3089static inline int svm_map_ghcb(struct vcpu_svm *svm, struct kvm_host_map *map) 3090{ 3091 struct vmcb_control_area *control = &svm->vmcb->control; 3092 u64 gfn = gpa_to_gfn(control->ghcb_gpa); 3093 struct kvm_vcpu *vcpu = &svm->vcpu; 3094 3095 if (kvm_vcpu_map(vcpu, gfn, map)) { 3096 /* Unable to map GHCB from guest */ 3097 pr_err("error mapping GHCB GFN [%#llx] from guest\n", gfn); 3098 return -EFAULT; 3099 } 3100 3101 if (sev_post_map_gfn(vcpu->kvm, map->gfn, map->pfn)) { 3102 kvm_vcpu_unmap(vcpu, map, false); 3103 return -EBUSY; 3104 } 3105 3106 return 0; 3107} 3108 3109static inline void svm_unmap_ghcb(struct vcpu_svm *svm, struct kvm_host_map *map) 3110{ 3111 struct kvm_vcpu *vcpu = &svm->vcpu; 3112 3113 kvm_vcpu_unmap(vcpu, map, true); 3114 sev_post_unmap_gfn(vcpu->kvm, map->gfn, map->pfn); 3115} 3116 3117static void dump_ghcb(struct vcpu_svm *svm) 3118{ 3119 struct kvm_host_map map; 3120 unsigned int nbits; 3121 struct ghcb *ghcb; 3122 3123 if (svm_map_ghcb(svm, &map)) 3124 return; 3125 3126 ghcb = map.hva; 3127 3128 /* Re-use the dump_invalid_vmcb module parameter */ 3129 if (!dump_invalid_vmcb) { 3130 pr_warn_ratelimited("set kvm_amd.dump_invalid_vmcb=1 to dump internal KVM state.\n"); 3131 goto e_unmap; 3132 } 3133 3134 nbits = sizeof(ghcb->save.valid_bitmap) * 8; 3135 3136 pr_err("GHCB (GPA=%016llx):\n", svm->vmcb->control.ghcb_gpa); 3137 pr_err("%-20s%016llx is_valid: %u\n", "sw_exit_code", 3138 ghcb->save.sw_exit_code, ghcb_sw_exit_code_is_valid(ghcb)); 3139 pr_err("%-20s%016llx is_valid: %u\n", "sw_exit_info_1", 3140 ghcb->save.sw_exit_info_1, ghcb_sw_exit_info_1_is_valid(ghcb)); 3141 pr_err("%-20s%016llx is_valid: %u\n", "sw_exit_info_2", 3142 ghcb->save.sw_exit_info_2, ghcb_sw_exit_info_2_is_valid(ghcb)); 3143 pr_err("%-20s%016llx is_valid: %u\n", "sw_scratch", 3144 ghcb->save.sw_scratch, ghcb_sw_scratch_is_valid(ghcb)); 3145 pr_err("%-20s%*pb\n", "valid_bitmap", nbits, ghcb->save.valid_bitmap); 3146 3147e_unmap: 3148 svm_unmap_ghcb(svm, &map); 3149} 3150 3151static bool sev_es_sync_to_ghcb(struct vcpu_svm *svm) 3152{ 3153 struct kvm_vcpu *vcpu = &svm->vcpu; 3154 struct kvm_host_map map; 3155 struct ghcb *ghcb; 3156 3157 if (svm_map_ghcb(svm, &map)) 3158 return false; 3159 3160 ghcb = map.hva; 3161 3162 /* 3163 * The GHCB protocol so far allows for the following data 3164 * to be returned: 3165 * GPRs RAX, RBX, RCX, RDX 3166 * 3167 * Copy their values, even if they may not have been written during the 3168 * VM-Exit. It's the guest's responsibility to not consume random data. 3169 */ 3170 ghcb_set_rax(ghcb, vcpu->arch.regs[VCPU_REGS_RAX]); 3171 ghcb_set_rbx(ghcb, vcpu->arch.regs[VCPU_REGS_RBX]); 3172 ghcb_set_rcx(ghcb, vcpu->arch.regs[VCPU_REGS_RCX]); 3173 ghcb_set_rdx(ghcb, vcpu->arch.regs[VCPU_REGS_RDX]); 3174 3175 /* 3176 * Copy the return values from the exit_info_{1,2}. 3177 */ 3178 ghcb_set_sw_exit_info_1(ghcb, svm->sev_es.ghcb_sw_exit_info_1); 3179 ghcb_set_sw_exit_info_2(ghcb, svm->sev_es.ghcb_sw_exit_info_2); 3180 3181 /* Sync the scratch buffer area. */ 3182 if (svm->sev_es.ghcb_sa_sync) { 3183 if (svm->sev_es.ghcb_sa_contained) { 3184 memcpy(ghcb->shared_buffer + svm->sev_es.ghcb_sa_offset, 3185 svm->sev_es.ghcb_sa, svm->sev_es.ghcb_sa_len); 3186 } else { 3187 int ret; 3188 3189 ret = kvm_write_guest(svm->vcpu.kvm, 3190 svm->sev_es.ghcb_sa_gpa, 3191 svm->sev_es.ghcb_sa, svm->sev_es.ghcb_sa_len); 3192 if (ret) 3193 pr_warn_ratelimited("unmap_ghcb: kvm_write_guest failed while syncing scratch area, gpa: %llx, ret: %d\n", 3194 svm->sev_es.ghcb_sa_gpa, ret); 3195 } 3196 svm->sev_es.ghcb_sa_sync = false; 3197 } 3198 3199 trace_kvm_vmgexit_exit(svm->vcpu.vcpu_id, ghcb); 3200 3201 svm_unmap_ghcb(svm, &map); 3202 3203 return true; 3204} 3205 3206static void sev_es_sync_from_ghcb(struct vcpu_svm *svm, struct ghcb *ghcb) 3207{ 3208 struct vmcb_control_area *control = &svm->vmcb->control; 3209 struct kvm_vcpu *vcpu = &svm->vcpu; 3210 u64 exit_code; 3211 3212 /* 3213 * The GHCB protocol so far allows for the following data 3214 * to be supplied: 3215 * GPRs RAX, RBX, RCX, RDX 3216 * XCR0 3217 * CPL 3218 * 3219 * VMMCALL allows the guest to provide extra registers. KVM also 3220 * expects RSI for hypercalls, so include that, too. 3221 * 3222 * Copy their values to the appropriate location if supplied. 3223 */ 3224 memset(vcpu->arch.regs, 0, sizeof(vcpu->arch.regs)); 3225 3226 vcpu->arch.regs[VCPU_REGS_RAX] = ghcb_get_rax_if_valid(ghcb); 3227 vcpu->arch.regs[VCPU_REGS_RBX] = ghcb_get_rbx_if_valid(ghcb); 3228 vcpu->arch.regs[VCPU_REGS_RCX] = ghcb_get_rcx_if_valid(ghcb); 3229 vcpu->arch.regs[VCPU_REGS_RDX] = ghcb_get_rdx_if_valid(ghcb); 3230 vcpu->arch.regs[VCPU_REGS_RSI] = ghcb_get_rsi_if_valid(ghcb); 3231 3232 svm->vmcb->save.cpl = ghcb_get_cpl_if_valid(ghcb); 3233 3234 if (ghcb_xcr0_is_valid(ghcb)) { 3235 vcpu->arch.xcr0 = ghcb_get_xcr0(ghcb); 3236 kvm_update_cpuid_runtime(vcpu); 3237 } 3238 3239 /* Copy the GHCB exit information into the VMCB fields */ 3240 exit_code = ghcb_get_sw_exit_code(ghcb); 3241 control->exit_code = lower_32_bits(exit_code); 3242 control->exit_code_hi = upper_32_bits(exit_code); 3243 control->exit_info_1 = ghcb_get_sw_exit_info_1(ghcb); 3244 control->exit_info_2 = ghcb_get_sw_exit_info_2(ghcb); 3245 3246 /* Copy the GHCB scratch area GPA */ 3247 svm->sev_es.ghcb_sa_gpa = ghcb_get_sw_scratch(ghcb); 3248 3249 /* Clear the valid entries fields */ 3250 memset(ghcb->save.valid_bitmap, 0, sizeof(ghcb->save.valid_bitmap)); 3251} 3252 3253static int sev_es_validate_vmgexit(struct vcpu_svm *svm, u64 *exit_code) 3254{ 3255 struct kvm_vcpu *vcpu = &svm->vcpu; 3256 struct kvm_host_map map; 3257 struct ghcb *ghcb; 3258 u64 reason; 3259 3260 if (svm_map_ghcb(svm, &map)) 3261 return -EFAULT; 3262 3263 ghcb = map.hva; 3264 3265 trace_kvm_vmgexit_enter(vcpu->vcpu_id, ghcb); 3266 3267 /* 3268 * Retrieve the exit code now even though it may not be marked valid 3269 * as it could help with debugging. 3270 */ 3271 *exit_code = ghcb_get_sw_exit_code(ghcb); 3272 3273 /* Only GHCB Usage code 0 is supported */ 3274 if (ghcb->ghcb_usage) { 3275 reason = GHCB_ERR_INVALID_USAGE; 3276 goto vmgexit_err; 3277 } 3278 3279 reason = GHCB_ERR_MISSING_INPUT; 3280 3281 if (!ghcb_sw_exit_code_is_valid(ghcb) || 3282 !ghcb_sw_exit_info_1_is_valid(ghcb) || 3283 !ghcb_sw_exit_info_2_is_valid(ghcb)) 3284 goto vmgexit_err; 3285 3286 switch (ghcb_get_sw_exit_code(ghcb)) { 3287 case SVM_EXIT_READ_DR7: 3288 break; 3289 case SVM_EXIT_WRITE_DR7: 3290 if (!ghcb_rax_is_valid(ghcb)) 3291 goto vmgexit_err; 3292 break; 3293 case SVM_EXIT_RDTSC: 3294 break; 3295 case SVM_EXIT_RDPMC: 3296 if (!ghcb_rcx_is_valid(ghcb)) 3297 goto vmgexit_err; 3298 break; 3299 case SVM_EXIT_CPUID: 3300 if (!ghcb_rax_is_valid(ghcb) || 3301 !ghcb_rcx_is_valid(ghcb)) 3302 goto vmgexit_err; 3303 if (ghcb_get_rax(ghcb) == 0xd) 3304 if (!ghcb_xcr0_is_valid(ghcb)) 3305 goto vmgexit_err; 3306 break; 3307 case SVM_EXIT_INVD: 3308 break; 3309 case SVM_EXIT_IOIO: 3310 if (ghcb_get_sw_exit_info_1(ghcb) & SVM_IOIO_STR_MASK) { 3311 if (!ghcb_sw_scratch_is_valid(ghcb)) 3312 goto vmgexit_err; 3313 } else { 3314 if (!(ghcb_get_sw_exit_info_1(ghcb) & SVM_IOIO_TYPE_MASK)) 3315 if (!ghcb_rax_is_valid(ghcb)) 3316 goto vmgexit_err; 3317 } 3318 break; 3319 case SVM_EXIT_MSR: 3320 if (!ghcb_rcx_is_valid(ghcb)) 3321 goto vmgexit_err; 3322 if (ghcb_get_sw_exit_info_1(ghcb)) { 3323 if (!ghcb_rax_is_valid(ghcb) || 3324 !ghcb_rdx_is_valid(ghcb)) 3325 goto vmgexit_err; 3326 } 3327 break; 3328 case SVM_EXIT_VMMCALL: 3329 CPC_DBG("SVM_EXIT_VMMCALL %llu", ghcb_get_rax(ghcb)); 3330 // if (!ghcb_rax_is_valid(ghcb) || 3331 // !ghcb_cpl_is_valid(ghcb)) 3332 // goto vmgexit_err; 3333 break; 3334 case SVM_EXIT_RDTSCP: 3335 break; 3336 case SVM_EXIT_WBINVD: 3337 break; 3338 case SVM_EXIT_MONITOR: 3339 if (!ghcb_rax_is_valid(ghcb) || 3340 !ghcb_rcx_is_valid(ghcb) || 3341 !ghcb_rdx_is_valid(ghcb)) 3342 goto vmgexit_err; 3343 break; 3344 case SVM_EXIT_MWAIT: 3345 if (!ghcb_rax_is_valid(ghcb) || 3346 !ghcb_rcx_is_valid(ghcb)) 3347 goto vmgexit_err; 3348 break; 3349 case SVM_VMGEXIT_MMIO_READ: 3350 case SVM_VMGEXIT_MMIO_WRITE: 3351 if (!ghcb_sw_scratch_is_valid(ghcb)) 3352 goto vmgexit_err; 3353 break; 3354 case SVM_VMGEXIT_AP_CREATION: 3355 if (!ghcb_rax_is_valid(ghcb)) 3356 goto vmgexit_err; 3357 break; 3358 case SVM_VMGEXIT_NMI_COMPLETE: 3359 case SVM_VMGEXIT_AP_HLT_LOOP: 3360 case SVM_VMGEXIT_AP_JUMP_TABLE: 3361 case SVM_VMGEXIT_UNSUPPORTED_EVENT: 3362 case SVM_VMGEXIT_HV_FEATURES: 3363 case SVM_VMGEXIT_PSC: 3364 case SVM_VMGEXIT_GUEST_REQUEST: 3365 case SVM_VMGEXIT_EXT_GUEST_REQUEST: 3366 break; 3367 default: 3368 reason = GHCB_ERR_INVALID_EVENT; 3369 goto vmgexit_err; 3370 } 3371 3372 sev_es_sync_from_ghcb(svm, ghcb); 3373 3374 svm_unmap_ghcb(svm, &map); 3375 return 0; 3376 3377vmgexit_err: 3378 vcpu = &svm->vcpu; 3379 3380 if (reason == GHCB_ERR_INVALID_USAGE) { 3381 vcpu_unimpl(vcpu, "vmgexit: ghcb usage %#x is not valid\n", 3382 ghcb->ghcb_usage); 3383 } else if (reason == GHCB_ERR_INVALID_EVENT) { 3384 vcpu_unimpl(vcpu, "vmgexit: exit code %#llx is not valid\n", 3385 *exit_code); 3386 } else { 3387 vcpu_unimpl(vcpu, "vmgexit: exit code %#llx input is not valid\n", 3388 *exit_code); 3389 dump_ghcb(svm); 3390 } 3391 3392 /* Clear the valid entries fields */ 3393 memset(ghcb->save.valid_bitmap, 0, sizeof(ghcb->save.valid_bitmap)); 3394 3395 ghcb_set_sw_exit_info_1(ghcb, 2); 3396 ghcb_set_sw_exit_info_2(ghcb, reason); 3397 3398 svm_unmap_ghcb(svm, &map); 3399 3400 /* Resume the guest to "return" the error code. */ 3401 return 1; 3402} 3403 3404void sev_es_unmap_ghcb(struct vcpu_svm *svm) 3405{ 3406 if (svm->sev_es.ghcb_sa_alloc_len >= 2) 3407 trace_kvm_sev_es_unmap_ghcb(svm->sev_es.ghcb_sa, 3408 svm->sev_es.ghcb_sa_gpa, 3409 svm->sev_es.ghcb_sa_len, 3410 svm->sev_es.ghcb_sa_alloc_len, 3411 svm->sev_es.ghcb_sa_sync, 3412 svm->sev_es.ghcb_in_use, 3413 ((u8 *)svm->sev_es.ghcb_sa)[0], 3414 ((u8 *)svm->sev_es.ghcb_sa)[1]); 3415 else 3416 trace_kvm_sev_es_unmap_ghcb(svm->sev_es.ghcb_sa, 3417 svm->sev_es.ghcb_sa_gpa, 3418 svm->sev_es.ghcb_sa_len, 3419 svm->sev_es.ghcb_sa_alloc_len, 3420 svm->sev_es.ghcb_sa_sync, 3421 svm->sev_es.ghcb_in_use, 3422 0, 0); 3423 3424 /* Clear any indication that the vCPU is in a type of AP Reset Hold */ 3425 svm->sev_es.ap_reset_hold_type = AP_RESET_HOLD_NONE; 3426 3427 if (!svm->sev_es.ghcb_in_use) 3428 return; 3429 3430 sev_es_sync_to_ghcb(svm); 3431 3432 svm->sev_es.ghcb_in_use = false; 3433} 3434 3435void pre_sev_run(struct vcpu_svm *svm, int cpu) 3436{ 3437 struct svm_cpu_data *sd = per_cpu(svm_data, cpu); 3438 int asid = sev_get_asid(svm->vcpu.kvm); 3439 3440 /* Assign the asid allocated with this SEV guest */ 3441 svm->asid = asid; 3442 3443 /* 3444 * Flush guest TLB: 3445 * 3446 * 1) when different VMCB for the same ASID is to be run on the same host CPU. 3447 * 2) or this VMCB was executed on different host CPU in previous VMRUNs. 3448 */ 3449 if (sd->sev_vmcbs[asid] == svm->vmcb && 3450 svm->vcpu.arch.last_vmentry_cpu == cpu) 3451 return; 3452 3453 sd->sev_vmcbs[asid] = svm->vmcb; 3454 svm->vmcb->control.tlb_ctl = TLB_CONTROL_FLUSH_ASID; 3455 vmcb_mark_dirty(svm->vmcb, VMCB_ASID); 3456} 3457 3458#define GHCB_SCRATCH_AREA_LIMIT (16ULL * PAGE_SIZE) 3459static int setup_vmgexit_scratch(struct vcpu_svm *svm, bool sync, u64 len) 3460{ 3461 struct vmcb_control_area *control = &svm->vmcb->control; 3462 u64 ghcb_scratch_beg, ghcb_scratch_end; 3463 u64 scratch_gpa_beg, scratch_gpa_end; 3464 3465 scratch_gpa_beg = svm->sev_es.ghcb_sa_gpa; 3466 if (!scratch_gpa_beg) { 3467 pr_err("vmgexit: scratch gpa not provided\n"); 3468 goto e_scratch; 3469 } 3470 3471 scratch_gpa_end = scratch_gpa_beg + len; 3472 if (scratch_gpa_end < scratch_gpa_beg) { 3473 pr_err("vmgexit: scratch length (%#llx) not valid for scratch address (%#llx)\n", 3474 len, scratch_gpa_beg); 3475 goto e_scratch; 3476 } 3477 3478 if ((scratch_gpa_beg & PAGE_MASK) == control->ghcb_gpa) { 3479 /* Scratch area begins within GHCB */ 3480 ghcb_scratch_beg = control->ghcb_gpa + 3481 offsetof(struct ghcb, shared_buffer); 3482 ghcb_scratch_end = control->ghcb_gpa + 3483 offsetof(struct ghcb, reserved_1); 3484 3485 /* 3486 * If the scratch area begins within the GHCB, it must be 3487 * completely contained in the GHCB shared buffer area. 3488 */ 3489 if (scratch_gpa_beg < ghcb_scratch_beg || 3490 scratch_gpa_end > ghcb_scratch_end) { 3491 pr_err("vmgexit: scratch area is outside of GHCB shared buffer area (%#llx - %#llx)\n", 3492 scratch_gpa_beg, scratch_gpa_end); 3493 goto e_scratch; 3494 } 3495 svm->sev_es.ghcb_sa_contained = true; 3496 svm->sev_es.ghcb_sa_offset = scratch_gpa_beg - ghcb_scratch_beg; 3497 } else { 3498 /* 3499 * The guest memory must be read into a kernel buffer, so 3500 * limit the size 3501 */ 3502 if (len > GHCB_SCRATCH_AREA_LIMIT) { 3503 pr_err("vmgexit: scratch area exceeds KVM limits (%#llx requested, %#llx limit)\n", 3504 len, GHCB_SCRATCH_AREA_LIMIT); 3505 goto e_scratch; 3506 } 3507 svm->sev_es.ghcb_sa_contained = false; 3508 } 3509 3510 if (svm->sev_es.ghcb_sa_alloc_len < len) { 3511 void *scratch_va = kvzalloc(len, GFP_KERNEL_ACCOUNT); 3512 3513 if (!scratch_va) 3514 return -ENOMEM; 3515 3516 /* 3517 * Free the old scratch area and switch to using newly 3518 * allocated. 3519 */ 3520 kvfree(svm->sev_es.ghcb_sa); 3521 3522 svm->sev_es.ghcb_sa_alloc_len = len; 3523 svm->sev_es.ghcb_sa = scratch_va; 3524 } 3525 3526 if (kvm_read_guest(svm->vcpu.kvm, scratch_gpa_beg, svm->sev_es.ghcb_sa, len)) { 3527 /* Unable to copy scratch area from guest */ 3528 pr_err("vmgexit: kvm_read_guest for scratch area failed\n"); 3529 return -EFAULT; 3530 } 3531 3532 /* 3533 * The operation will dictate whether the buffer needs to be synced 3534 * before running the vCPU next time (i.e. a read was requested so 3535 * the data must be written back to the guest memory). 3536 */ 3537 svm->sev_es.ghcb_sa_sync = sync; 3538 svm->sev_es.ghcb_sa_len = len; 3539 3540 return 0; 3541 3542e_scratch: 3543 svm_set_ghcb_sw_exit_info_1(&svm->vcpu, 2); 3544 svm_set_ghcb_sw_exit_info_2(&svm->vcpu, GHCB_ERR_INVALID_SCRATCH_AREA); 3545 3546 return 1; 3547} 3548 3549static void set_ghcb_msr_bits(struct vcpu_svm *svm, u64 value, u64 mask, 3550 unsigned int pos) 3551{ 3552 svm->vmcb->control.ghcb_gpa &= ~(mask << pos); 3553 svm->vmcb->control.ghcb_gpa |= (value & mask) << pos; 3554} 3555 3556static u64 get_ghcb_msr_bits(struct vcpu_svm *svm, u64 mask, unsigned int pos) 3557{ 3558 return (svm->vmcb->control.ghcb_gpa >> pos) & mask; 3559} 3560 3561static void set_ghcb_msr(struct vcpu_svm *svm, u64 value) 3562{ 3563 svm->vmcb->control.ghcb_gpa = value; 3564} 3565 3566static int snp_rmptable_psmash(struct kvm *kvm, kvm_pfn_t pfn) 3567{ 3568 pfn = pfn & ~(KVM_PAGES_PER_HPAGE(PG_LEVEL_2M) - 1); 3569 3570 return psmash(pfn); 3571} 3572 3573static int snp_make_page_shared(struct kvm *kvm, gpa_t gpa, kvm_pfn_t pfn, int level) 3574{ 3575 int rc, rmp_level; 3576 3577 rc = snp_lookup_rmpentry(pfn, &rmp_level); 3578 if (rc < 0) 3579 return -EINVAL; 3580 3581 /* If page is not assigned then do nothing */ 3582 if (!rc) 3583 return 0; 3584 3585 /* 3586 * Is the page part of an existing 2MB RMP entry ? Split the 2MB into 3587 * multiple of 4K-page before making the memory shared. 3588 */ 3589 if (level == PG_LEVEL_4K && rmp_level == PG_LEVEL_2M) { 3590 rc = snp_rmptable_psmash(kvm, pfn); 3591 if (rc) 3592 return rc; 3593 } 3594 3595 return rmp_make_shared(pfn, level); 3596} 3597 3598static int snp_check_and_build_npt(struct kvm_vcpu *vcpu, gpa_t gpa, int level) 3599{ 3600 struct kvm *kvm = vcpu->kvm; 3601 int rc, npt_level; 3602 kvm_pfn_t pfn; 3603 3604 /* 3605 * Get the pfn and level for the gpa from the nested page table. 3606 * 3607 * If the tdp walk fails, then its safe to say that there is no 3608 * valid mapping for this gpa. Create a fault to build the map. 3609 */ 3610 write_lock(&kvm->mmu_lock); 3611 rc = kvm_mmu_get_tdp_walk(vcpu, gpa, &pfn, &npt_level); 3612 write_unlock(&kvm->mmu_lock); 3613 if (!rc) { 3614 pfn = kvm_mmu_map_tdp_page(vcpu, gpa, PFERR_USER_MASK, level); 3615 if (is_error_noslot_pfn(pfn)) 3616 return -EINVAL; 3617 } 3618 3619 return 0; 3620} 3621 3622static int snp_gpa_to_hva(struct kvm *kvm, gpa_t gpa, hva_t *hva) 3623{ 3624 struct kvm_memory_slot *slot; 3625 gfn_t gfn = gpa_to_gfn(gpa); 3626 int idx; 3627 3628 idx = srcu_read_lock(&kvm->srcu); 3629 slot = gfn_to_memslot(kvm, gfn); 3630 if (!slot) { 3631 srcu_read_unlock(&kvm->srcu, idx); 3632 return -EINVAL; 3633 } 3634 3635 /* 3636 * Note, using the __gfn_to_hva_memslot() is not solely for performance, 3637 * it's also necessary to avoid the "writable" check in __gfn_to_hva_many(), 3638 * which will always fail on read-only memslots due to gfn_to_hva() assuming 3639 * writes. 3640 */ 3641 *hva = __gfn_to_hva_memslot(slot, gfn); 3642 srcu_read_unlock(&kvm->srcu, idx); 3643 3644 return 0; 3645} 3646 3647static int __snp_handle_page_state_change(struct kvm_vcpu *vcpu, enum psc_op op, gpa_t gpa, 3648 int level) 3649{ 3650 struct kvm_sev_info *sev = &to_kvm_svm(vcpu->kvm)->sev_info; 3651 struct kvm *kvm = vcpu->kvm; 3652 int rc, npt_level; 3653 kvm_pfn_t pfn; 3654 gpa_t gpa_end; 3655 3656 gpa_end = gpa + page_level_size(level); 3657 3658 while (gpa < gpa_end) { 3659 /* 3660 * If the gpa is not present in the NPT then build the NPT. 3661 */ 3662 rc = snp_check_and_build_npt(vcpu, gpa, level); 3663 if (rc) 3664 return PSC_UNDEF_ERR; 3665 3666 if (op == SNP_PAGE_STATE_PRIVATE) { 3667 hva_t hva; 3668 3669 if (snp_gpa_to_hva(kvm, gpa, &hva)) 3670 return PSC_UNDEF_ERR; 3671 3672 /* 3673 * Verify that the hva range is registered. This enforcement is 3674 * required to avoid the cases where a page is marked private 3675 * in the RMP table but never gets cleanup during the VM 3676 * termination path. 3677 */ 3678 mutex_lock(&kvm->lock); 3679 rc = is_hva_registered(kvm, hva, page_level_size(level)); 3680 mutex_unlock(&kvm->lock); 3681 if (!rc) 3682 return PSC_UNDEF_ERR; 3683 3684 /* 3685 * Mark the userspace range unmerable before adding the pages 3686 * in the RMP table. 3687 */ 3688 mmap_write_lock(kvm->mm); 3689 rc = snp_mark_unmergable(kvm, hva, page_level_size(level)); 3690 mmap_write_unlock(kvm->mm); 3691 if (rc) 3692 return PSC_UNDEF_ERR; 3693 } 3694 3695 spin_lock(&sev->psc_lock); 3696 3697 write_lock(&kvm->mmu_lock); 3698 3699 rc = kvm_mmu_get_tdp_walk(vcpu, gpa, &pfn, &npt_level); 3700 if (!rc) { 3701 /* 3702 * This may happen if another vCPU unmapped the page 3703 * before we acquire the lock. Retry the PSC. 3704 */ 3705 write_unlock(&kvm->mmu_lock); 3706 spin_unlock(&sev->psc_lock); 3707 return 0; 3708 } 3709 3710 /* 3711 * Adjust the level so that we don't go higher than the backing 3712 * page level. 3713 */ 3714 level = min_t(size_t, level, npt_level); 3715 3716 trace_kvm_snp_psc(vcpu->vcpu_id, pfn, gpa, op, level); 3717 3718 switch (op) { 3719 case SNP_PAGE_STATE_SHARED: 3720 rc = snp_make_page_shared(kvm, gpa, pfn, level); 3721 break; 3722 case SNP_PAGE_STATE_PRIVATE: 3723 rc = rmp_make_private(pfn, gpa, level, sev->asid, false); 3724 break; 3725 default: 3726 rc = PSC_INVALID_ENTRY; 3727 break; 3728 } 3729 3730 write_unlock(&kvm->mmu_lock); 3731 3732 spin_unlock(&sev->psc_lock); 3733 3734 if (rc) { 3735 pr_err_ratelimited("Error op %d gpa %llx pfn %llx level %d rc %d\n", 3736 op, gpa, pfn, level, rc); 3737 return rc; 3738 } 3739 3740 gpa = gpa + page_level_size(level); 3741 } 3742 3743 return 0; 3744} 3745 3746static inline unsigned long map_to_psc_vmgexit_code(int rc) 3747{ 3748 switch (rc) { 3749 case PSC_INVALID_HDR: 3750 return ((1ul << 32) | 1); 3751 case PSC_INVALID_ENTRY: 3752 return ((1ul << 32) | 2); 3753 case RMPUPDATE_FAIL_OVERLAP: 3754 return ((3ul << 32) | 2); 3755 default: return (4ul << 32); 3756 } 3757} 3758 3759static unsigned long snp_handle_page_state_change(struct vcpu_svm *svm) 3760{ 3761 struct kvm_vcpu *vcpu = &svm->vcpu; 3762 int level, op, rc = PSC_UNDEF_ERR; 3763 struct snp_psc_desc *info; 3764 struct psc_entry *entry; 3765 u16 cur, end; 3766 gpa_t gpa; 3767 3768 if (!sev_snp_guest(vcpu->kvm)) 3769 return PSC_INVALID_HDR; 3770 3771 if (setup_vmgexit_scratch(svm, true, sizeof(*info))) { 3772 pr_err("vmgexit: scratch area is not setup.\n"); 3773 return PSC_INVALID_HDR; 3774 } 3775 3776 info = (struct snp_psc_desc *)svm->sev_es.ghcb_sa; 3777 cur = info->hdr.cur_entry; 3778 end = info->hdr.end_entry; 3779 3780 if (cur >= VMGEXIT_PSC_MAX_ENTRY || 3781 end >= VMGEXIT_PSC_MAX_ENTRY || cur > end) 3782 return PSC_INVALID_ENTRY; 3783 3784 for (; cur <= end; cur++) { 3785 entry = &info->entries[cur]; 3786 gpa = gfn_to_gpa(entry->gfn); 3787 level = RMP_TO_X86_PG_LEVEL(entry->pagesize); 3788 op = entry->operation; 3789 3790 if (!IS_ALIGNED(gpa, page_level_size(level))) { 3791 rc = PSC_INVALID_ENTRY; 3792 goto out; 3793 } 3794 3795 rc = __snp_handle_page_state_change(vcpu, op, gpa, level); 3796 if (rc) 3797 goto out; 3798 } 3799 3800out: 3801 info->hdr.cur_entry = cur; 3802 return rc ? map_to_psc_vmgexit_code(rc) : 0; 3803} 3804 3805static unsigned long snp_setup_guest_buf(struct vcpu_svm *svm, 3806 struct sev_data_snp_guest_request *data, 3807 gpa_t req_gpa, gpa_t resp_gpa) 3808{ 3809 struct kvm_vcpu *vcpu = &svm->vcpu; 3810 struct kvm *kvm = vcpu->kvm; 3811 kvm_pfn_t req_pfn, resp_pfn; 3812 struct kvm_sev_info *sev; 3813 3814 sev = &to_kvm_svm(kvm)->sev_info; 3815 3816 if (!IS_ALIGNED(req_gpa, PAGE_SIZE) || !IS_ALIGNED(resp_gpa, PAGE_SIZE)) 3817 return SEV_RET_INVALID_PARAM; 3818 3819 req_pfn = gfn_to_pfn(kvm, gpa_to_gfn(req_gpa)); 3820 if (is_error_noslot_pfn(req_pfn)) 3821 return SEV_RET_INVALID_ADDRESS; 3822 3823 resp_pfn = gfn_to_pfn(kvm, gpa_to_gfn(resp_gpa)); 3824 if (is_error_noslot_pfn(resp_pfn)) 3825 return SEV_RET_INVALID_ADDRESS; 3826 3827 if (rmp_make_private(resp_pfn, 0, PG_LEVEL_4K, 0, true)) 3828 return SEV_RET_INVALID_ADDRESS; 3829 3830 data->gctx_paddr = __psp_pa(sev->snp_context); 3831 data->req_paddr = __sme_set(req_pfn << PAGE_SHIFT); 3832 data->res_paddr = __sme_set(resp_pfn << PAGE_SHIFT); 3833 3834 return 0; 3835} 3836 3837static void snp_cleanup_guest_buf(struct sev_data_snp_guest_request *data, unsigned long *rc) 3838{ 3839 u64 pfn = __sme_clr(data->res_paddr) >> PAGE_SHIFT; 3840 int ret; 3841 3842 ret = snp_page_reclaim(pfn); 3843 if (ret) 3844 *rc = SEV_RET_INVALID_ADDRESS; 3845 3846 ret = rmp_make_shared(pfn, PG_LEVEL_4K); 3847 if (ret) 3848 *rc = SEV_RET_INVALID_ADDRESS; 3849} 3850 3851static void snp_handle_guest_request(struct vcpu_svm *svm, gpa_t req_gpa, gpa_t resp_gpa) 3852{ 3853 struct sev_data_snp_guest_request data = {0}; 3854 struct kvm_vcpu *vcpu = &svm->vcpu; 3855 struct kvm *kvm = vcpu->kvm; 3856 struct kvm_sev_info *sev; 3857 unsigned long rc; 3858 int err; 3859 3860 if (!sev_snp_guest(vcpu->kvm)) { 3861 rc = SEV_RET_INVALID_GUEST; 3862 goto e_fail; 3863 } 3864 3865 sev = &to_kvm_svm(kvm)->sev_info; 3866 3867 mutex_lock(&sev->guest_req_lock); 3868 3869 rc = snp_setup_guest_buf(svm, &data, req_gpa, resp_gpa); 3870 if (rc) 3871 goto unlock; 3872 3873 rc = sev_issue_cmd(kvm, SEV_CMD_SNP_GUEST_REQUEST, &data, &err); 3874 if (rc) 3875 /* use the firmware error code */ 3876 rc = err; 3877 3878 snp_cleanup_guest_buf(&data, &rc); 3879 3880unlock: 3881 mutex_unlock(&sev->guest_req_lock); 3882 3883e_fail: 3884 svm_set_ghcb_sw_exit_info_2(vcpu, rc); 3885} 3886 3887static void snp_handle_ext_guest_request(struct vcpu_svm *svm, gpa_t req_gpa, gpa_t resp_gpa) 3888{ 3889 struct sev_data_snp_guest_request req = {0}; 3890 struct kvm_vcpu *vcpu = &svm->vcpu; 3891 struct kvm *kvm = vcpu->kvm; 3892 unsigned long data_npages; 3893 struct kvm_sev_info *sev; 3894 unsigned long rc, err; 3895 u64 data_gpa; 3896 3897 if (!sev_snp_guest(vcpu->kvm)) { 3898 rc = SEV_RET_INVALID_GUEST; 3899 goto e_fail; 3900 } 3901 3902 sev = &to_kvm_svm(kvm)->sev_info; 3903 3904 data_gpa = vcpu->arch.regs[VCPU_REGS_RAX]; 3905 data_npages = vcpu->arch.regs[VCPU_REGS_RBX]; 3906 3907 if (!IS_ALIGNED(data_gpa, PAGE_SIZE)) { 3908 rc = SEV_RET_INVALID_ADDRESS; 3909 goto e_fail; 3910 } 3911 3912 /* Verify that requested blob will fit in certificate buffer */ 3913 if ((data_npages << PAGE_SHIFT) > SEV_FW_BLOB_MAX_SIZE) { 3914 rc = SEV_RET_INVALID_PARAM; 3915 goto e_fail; 3916 } 3917 3918 mutex_lock(&sev->guest_req_lock); 3919 3920 rc = snp_setup_guest_buf(svm, &req, req_gpa, resp_gpa); 3921 if (rc) 3922 goto unlock; 3923 3924 rc = snp_guest_ext_guest_request(&req, (unsigned long)sev->snp_certs_data, 3925 &data_npages, &err); 3926 if (rc) { 3927 /* 3928 * If buffer length is small then return the expected 3929 * length in rbx. 3930 */ 3931 if (err == SNP_GUEST_REQ_INVALID_LEN) 3932 vcpu->arch.regs[VCPU_REGS_RBX] = data_npages; 3933 3934 /* pass the firmware error code */ 3935 rc = err; 3936 goto cleanup; 3937 } 3938 3939 /* Copy the certificate blob in the guest memory */ 3940 if (data_npages && 3941 kvm_write_guest(kvm, data_gpa, sev->snp_certs_data, data_npages << PAGE_SHIFT)) 3942 rc = SEV_RET_INVALID_ADDRESS; 3943 3944cleanup: 3945 snp_cleanup_guest_buf(&req, &rc); 3946 3947unlock: 3948 mutex_unlock(&sev->guest_req_lock); 3949 3950e_fail: 3951 svm_set_ghcb_sw_exit_info_2(vcpu, rc); 3952} 3953 3954static int __sev_snp_update_protected_guest_state(struct kvm_vcpu *vcpu) 3955{ 3956 struct vcpu_svm *svm = to_svm(vcpu); 3957 kvm_pfn_t pfn; 3958 hpa_t cur_pa; 3959 3960 WARN_ON(!mutex_is_locked(&svm->sev_es.snp_vmsa_mutex)); 3961 3962 /* Save off the current VMSA PA for later checks */ 3963 cur_pa = svm->sev_es.vmsa_pa; 3964 3965 /* Mark the vCPU as offline and not runnable */ 3966 vcpu->arch.pv.pv_unhalted = false; 3967 vcpu->arch.mp_state = KVM_MP_STATE_STOPPED; 3968 3969 /* Clear use of the VMSA */ 3970 svm->sev_es.vmsa_pa = INVALID_PAGE; 3971 svm->vmcb->control.vmsa_pa = INVALID_PAGE; 3972 3973 if (cur_pa != __pa(svm->sev_es.vmsa) && VALID_PAGE(cur_pa)) { 3974 /* 3975 * The svm->sev_es.vmsa_pa field holds the hypervisor physical 3976 * address of the about to be replaced VMSA which will no longer 3977 * be used or referenced, so un-pin it. 3978 */ 3979 kvm_release_pfn_dirty(__phys_to_pfn(cur_pa)); 3980 } 3981 3982 if (VALID_PAGE(svm->sev_es.snp_vmsa_gpa)) { 3983 /* 3984 * The VMSA is referenced by the hypervisor physical address, 3985 * so retrieve the PFN and pin it. 3986 */ 3987 pfn = gfn_to_pfn(vcpu->kvm, gpa_to_gfn(svm->sev_es.snp_vmsa_gpa)); 3988 if (is_error_pfn(pfn)) 3989 return -EINVAL; 3990 3991 /* Use the new VMSA */ 3992 svm->sev_es.vmsa_pa = pfn_to_hpa(pfn); 3993 svm->vmcb->control.vmsa_pa = svm->sev_es.vmsa_pa; 3994 3995 /* Mark the vCPU as runnable */ 3996 vcpu->arch.pv.pv_unhalted = false; 3997 vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; 3998 3999 svm->sev_es.snp_vmsa_gpa = INVALID_PAGE; 4000 } 4001 4002 /* 4003 * When replacing the VMSA during SEV-SNP AP creation, 4004 * mark the VMCB dirty so that full state is always reloaded. 4005 */ 4006 vmcb_mark_all_dirty(svm->vmcb); 4007 4008 return 0; 4009} 4010 4011/* 4012 * Invoked as part of svm_vcpu_reset() processing of an init event. 4013 */ 4014void sev_snp_init_protected_guest_state(struct kvm_vcpu *vcpu) 4015{ 4016 struct vcpu_svm *svm = to_svm(vcpu); 4017 int ret; 4018 4019 if (!sev_snp_guest(vcpu->kvm)) 4020 return; 4021 4022 mutex_lock(&svm->sev_es.snp_vmsa_mutex); 4023 4024 if (!svm->sev_es.snp_ap_create) 4025 goto unlock; 4026 4027 svm->sev_es.snp_ap_create = false; 4028 4029 ret = __sev_snp_update_protected_guest_state(vcpu); 4030 if (ret) 4031 vcpu_unimpl(vcpu, "snp: AP state update on init failed\n"); 4032 4033unlock: 4034 mutex_unlock(&svm->sev_es.snp_vmsa_mutex); 4035} 4036 4037static int sev_snp_ap_creation(struct vcpu_svm *svm) 4038{ 4039 struct kvm_sev_info *sev = &to_kvm_svm(svm->vcpu.kvm)->sev_info; 4040 struct kvm_vcpu *vcpu = &svm->vcpu; 4041 struct kvm_vcpu *target_vcpu; 4042 struct vcpu_svm *target_svm; 4043 unsigned int request; 4044 unsigned int apic_id; 4045 bool kick; 4046 int ret; 4047 4048 request = lower_32_bits(svm->vmcb->control.exit_info_1); 4049 apic_id = upper_32_bits(svm->vmcb->control.exit_info_1); 4050 4051 /* Validate the APIC ID */ 4052 target_vcpu = kvm_get_vcpu_by_id(vcpu->kvm, apic_id); 4053 if (!target_vcpu) { 4054 vcpu_unimpl(vcpu, "vmgexit: invalid AP APIC ID [%#x] from guest\n", 4055 apic_id); 4056 return -EINVAL; 4057 } 4058 4059 ret = 0; 4060 4061 target_svm = to_svm(target_vcpu); 4062 4063 /* 4064 * We have a valid target vCPU, so the vCPU will be kicked unless the 4065 * request is for CREATE_ON_INIT. For any errors at this stage, the 4066 * kick will place the vCPU in an non-runnable state. 4067 */ 4068 kick = true; 4069 4070 mutex_lock(&target_svm->sev_es.snp_vmsa_mutex); 4071 4072 target_svm->sev_es.snp_vmsa_gpa = INVALID_PAGE; 4073 target_svm->sev_es.snp_ap_create = true; 4074 4075 /* Interrupt injection mode shouldn't change for AP creation */ 4076 if (request < SVM_VMGEXIT_AP_DESTROY) { 4077 u64 sev_features; 4078 4079 sev_features = vcpu->arch.regs[VCPU_REGS_RAX]; 4080 sev_features ^= sev->sev_features; 4081 if (sev_features & SVM_SEV_FEAT_INT_INJ_MODES) { 4082 vcpu_unimpl(vcpu, "vmgexit: invalid AP injection mode [%#lx] from guest\n", 4083 vcpu->arch.regs[VCPU_REGS_RAX]); 4084 ret = -EINVAL; 4085 goto out; 4086 } 4087 } 4088 4089 switch (request) { 4090 case SVM_VMGEXIT_AP_CREATE_ON_INIT: 4091 kick = false; 4092 fallthrough; 4093 case SVM_VMGEXIT_AP_CREATE: 4094 if (!page_address_valid(vcpu, svm->vmcb->control.exit_info_2)) { 4095 vcpu_unimpl(vcpu, "vmgexit: invalid AP VMSA address [%#llx] from guest\n", 4096 svm->vmcb->control.exit_info_2); 4097 ret = -EINVAL; 4098 goto out; 4099 } 4100 4101 target_svm->sev_es.snp_vmsa_gpa = svm->vmcb->control.exit_info_2; 4102 break; 4103 case SVM_VMGEXIT_AP_DESTROY: 4104 break; 4105 default: 4106 vcpu_unimpl(vcpu, "vmgexit: invalid AP creation request [%#x] from guest\n", 4107 request); 4108 ret = -EINVAL; 4109 break; 4110 } 4111 4112out: 4113 if (kick) { 4114 if (target_vcpu->arch.mp_state == KVM_MP_STATE_UNINITIALIZED) 4115 target_vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; 4116 4117 kvm_make_request(KVM_REQ_UPDATE_PROTECTED_GUEST_STATE, target_vcpu); 4118 kvm_vcpu_kick(target_vcpu); 4119 } 4120 4121 mutex_unlock(&target_svm->sev_es.snp_vmsa_mutex); 4122 4123 return ret; 4124} 4125 4126static int sev_handle_vmgexit_msr_protocol(struct vcpu_svm *svm) 4127{ 4128 struct vmcb_control_area *control = &svm->vmcb->control; 4129 struct kvm_vcpu *vcpu = &svm->vcpu; 4130 u64 ghcb_info; 4131 int ret = 1; 4132 4133 ghcb_info = control->ghcb_gpa & GHCB_MSR_INFO_MASK; 4134 4135 trace_kvm_vmgexit_msr_protocol_enter(svm->vcpu.vcpu_id, 4136 control->ghcb_gpa); 4137 4138 switch (ghcb_info) { 4139 case GHCB_MSR_SEV_INFO_REQ: 4140 set_ghcb_msr(svm, GHCB_MSR_SEV_INFO(GHCB_VERSION_MAX, 4141 GHCB_VERSION_MIN, 4142 sev_enc_bit)); 4143 break; 4144 case GHCB_MSR_CPUID_REQ: { 4145 u64 cpuid_fn, cpuid_reg, cpuid_value; 4146 4147 cpuid_fn = get_ghcb_msr_bits(svm, 4148 GHCB_MSR_CPUID_FUNC_MASK, 4149 GHCB_MSR_CPUID_FUNC_POS); 4150 4151 /* Initialize the registers needed by the CPUID intercept */ 4152 vcpu->arch.regs[VCPU_REGS_RAX] = cpuid_fn; 4153 vcpu->arch.regs[VCPU_REGS_RCX] = 0; 4154 4155 ret = svm_invoke_exit_handler(vcpu, SVM_EXIT_CPUID); 4156 if (!ret) { 4157 /* Error, keep GHCB MSR value as-is */ 4158 break; 4159 } 4160 4161 cpuid_reg = get_ghcb_msr_bits(svm, 4162 GHCB_MSR_CPUID_REG_MASK, 4163 GHCB_MSR_CPUID_REG_POS); 4164 if (cpuid_reg == 0) 4165 cpuid_value = vcpu->arch.regs[VCPU_REGS_RAX]; 4166 else if (cpuid_reg == 1) 4167 cpuid_value = vcpu->arch.regs[VCPU_REGS_RBX]; 4168 else if (cpuid_reg == 2) 4169 cpuid_value = vcpu->arch.regs[VCPU_REGS_RCX]; 4170 else 4171 cpuid_value = vcpu->arch.regs[VCPU_REGS_RDX]; 4172 4173 set_ghcb_msr_bits(svm, cpuid_value, 4174 GHCB_MSR_CPUID_VALUE_MASK, 4175 GHCB_MSR_CPUID_VALUE_POS); 4176 4177 set_ghcb_msr_bits(svm, GHCB_MSR_CPUID_RESP, 4178 GHCB_MSR_INFO_MASK, 4179 GHCB_MSR_INFO_POS); 4180 break; 4181 } 4182 case GHCB_MSR_AP_RESET_HOLD_REQ: 4183 svm->sev_es.ap_reset_hold_type = AP_RESET_HOLD_MSR_PROTO; 4184 ret = kvm_emulate_ap_reset_hold(&svm->vcpu); 4185 4186 /* 4187 * Preset the result to a non-SIPI return and then only set 4188 * the result to non-zero when delivering a SIPI. 4189 */ 4190 set_ghcb_msr_bits(svm, 0, 4191 GHCB_MSR_AP_RESET_HOLD_RESULT_MASK, 4192 GHCB_MSR_AP_RESET_HOLD_RESULT_POS); 4193 4194 set_ghcb_msr_bits(svm, GHCB_MSR_AP_RESET_HOLD_RESP, 4195 GHCB_MSR_INFO_MASK, 4196 GHCB_MSR_INFO_POS); 4197 break; 4198 case GHCB_MSR_HV_FT_REQ: { 4199 set_ghcb_msr_bits(svm, GHCB_HV_FT_SUPPORTED, 4200 GHCB_MSR_HV_FT_MASK, GHCB_MSR_HV_FT_POS); 4201 set_ghcb_msr_bits(svm, GHCB_MSR_HV_FT_RESP, 4202 GHCB_MSR_INFO_MASK, GHCB_MSR_INFO_POS); 4203 break; 4204 } 4205 case GHCB_MSR_PREF_GPA_REQ: { 4206 set_ghcb_msr_bits(svm, GHCB_MSR_PREF_GPA_NONE, GHCB_MSR_GPA_VALUE_MASK, 4207 GHCB_MSR_GPA_VALUE_POS); 4208 set_ghcb_msr_bits(svm, GHCB_MSR_PREF_GPA_RESP, GHCB_MSR_INFO_MASK, 4209 GHCB_MSR_INFO_POS); 4210 break; 4211 } 4212 case GHCB_MSR_REG_GPA_REQ: { 4213 u64 gfn; 4214 4215 gfn = get_ghcb_msr_bits(svm, GHCB_MSR_GPA_VALUE_MASK, 4216 GHCB_MSR_GPA_VALUE_POS); 4217 4218 svm->sev_es.ghcb_registered_gpa = gfn_to_gpa(gfn); 4219 4220 set_ghcb_msr_bits(svm, gfn, GHCB_MSR_GPA_VALUE_MASK, 4221 GHCB_MSR_GPA_VALUE_POS); 4222 set_ghcb_msr_bits(svm, GHCB_MSR_REG_GPA_RESP, GHCB_MSR_INFO_MASK, 4223 GHCB_MSR_INFO_POS); 4224 break; 4225 } 4226 case GHCB_MSR_PSC_REQ: { 4227 gfn_t gfn; 4228 int ret; 4229 enum psc_op op; 4230 4231 gfn = get_ghcb_msr_bits(svm, GHCB_MSR_PSC_GFN_MASK, GHCB_MSR_PSC_GFN_POS); 4232 op = get_ghcb_msr_bits(svm, GHCB_MSR_PSC_OP_MASK, GHCB_MSR_PSC_OP_POS); 4233 4234 ret = __snp_handle_page_state_change(vcpu, op, gfn_to_gpa(gfn), PG_LEVEL_4K); 4235 4236 if (ret) 4237 set_ghcb_msr_bits(svm, GHCB_MSR_PSC_ERROR, 4238 GHCB_MSR_PSC_ERROR_MASK, GHCB_MSR_PSC_ERROR_POS); 4239 else 4240 set_ghcb_msr_bits(svm, 0, 4241 GHCB_MSR_PSC_ERROR_MASK, GHCB_MSR_PSC_ERROR_POS); 4242 4243 set_ghcb_msr_bits(svm, 0, GHCB_MSR_PSC_RSVD_MASK, GHCB_MSR_PSC_RSVD_POS); 4244 set_ghcb_msr_bits(svm, GHCB_MSR_PSC_RESP, GHCB_MSR_INFO_MASK, GHCB_MSR_INFO_POS); 4245 break; 4246 } 4247 case GHCB_MSR_TERM_REQ: { 4248 u64 reason_set, reason_code; 4249 4250 reason_set = get_ghcb_msr_bits(svm, 4251 GHCB_MSR_TERM_REASON_SET_MASK, 4252 GHCB_MSR_TERM_REASON_SET_POS); 4253 reason_code = get_ghcb_msr_bits(svm, 4254 GHCB_MSR_TERM_REASON_MASK, 4255 GHCB_MSR_TERM_REASON_POS); 4256 pr_info("SEV-ES guest requested termination: %#llx:%#llx\n", 4257 reason_set, reason_code); 4258 4259 vcpu->run->exit_reason = KVM_EXIT_SYSTEM_EVENT; 4260 vcpu->run->system_event.type = KVM_SYSTEM_EVENT_SEV_TERM; 4261 vcpu->run->system_event.ndata = 1; 4262 vcpu->run->system_event.data[0] = control->ghcb_gpa; 4263 4264 return 0; 4265 } 4266 default: 4267 /* Error, keep GHCB MSR value as-is */ 4268 break; 4269 } 4270 4271 trace_kvm_vmgexit_msr_protocol_exit(svm->vcpu.vcpu_id, 4272 control->ghcb_gpa, ret); 4273 4274 return ret; 4275} 4276 4277int sev_handle_vmgexit(struct kvm_vcpu *vcpu) 4278{ 4279 struct vcpu_svm *svm = to_svm(vcpu); 4280 struct vmcb_control_area *control = &svm->vmcb->control; 4281 u64 ghcb_gpa, exit_code; 4282 int ret; 4283 4284 /* Validate the GHCB */ 4285 ghcb_gpa = control->ghcb_gpa; 4286 if (ghcb_gpa & GHCB_MSR_INFO_MASK) 4287 return sev_handle_vmgexit_msr_protocol(svm); 4288 4289 if (!ghcb_gpa) { 4290 vcpu_unimpl(vcpu, "vmgexit: GHCB gpa is not set\n"); 4291 4292 /* Without a GHCB, just return right back to the guest */ 4293 return 1; 4294 } 4295 4296 /* SEV-SNP guest requires that the GHCB GPA must be registered */ 4297 if (sev_snp_guest(svm->vcpu.kvm) && !ghcb_gpa_is_registered(svm, ghcb_gpa)) { 4298 vcpu_unimpl(&svm->vcpu, "vmgexit: GHCB GPA [%#llx] is not registered.\n", ghcb_gpa); 4299 return -EINVAL; 4300 } 4301 4302 ret = sev_es_validate_vmgexit(svm, &exit_code); 4303 if (ret) 4304 return ret; 4305 4306 svm->sev_es.ghcb_in_use = true; 4307 4308 svm_set_ghcb_sw_exit_info_1(vcpu, 0); 4309 svm_set_ghcb_sw_exit_info_2(vcpu, 0); 4310 4311 switch (exit_code) { 4312 case SVM_VMGEXIT_MMIO_READ: 4313 ret = setup_vmgexit_scratch(svm, true, control->exit_info_2); 4314 if (ret) 4315 break; 4316 4317 ret = kvm_sev_es_mmio_read(vcpu, 4318 control->exit_info_1, 4319 control->exit_info_2, 4320 svm->sev_es.ghcb_sa); 4321 break; 4322 case SVM_VMGEXIT_MMIO_WRITE: 4323 ret = setup_vmgexit_scratch(svm, false, control->exit_info_2); 4324 if (ret) 4325 break; 4326 4327 ret = kvm_sev_es_mmio_write(vcpu, 4328 control->exit_info_1, 4329 control->exit_info_2, 4330 svm->sev_es.ghcb_sa); 4331 break; 4332 case SVM_VMGEXIT_NMI_COMPLETE: 4333 ret = svm_invoke_exit_handler(vcpu, SVM_EXIT_IRET); 4334 break; 4335 case SVM_VMGEXIT_AP_HLT_LOOP: 4336 svm->sev_es.ap_reset_hold_type = AP_RESET_HOLD_NAE_EVENT; 4337 ret = kvm_emulate_ap_reset_hold(vcpu); 4338 break; 4339 case SVM_VMGEXIT_AP_JUMP_TABLE: { 4340 struct kvm_sev_info *sev = &to_kvm_svm(vcpu->kvm)->sev_info; 4341 4342 switch (control->exit_info_1) { 4343 case 0: 4344 /* Set AP jump table address */ 4345 sev->ap_jump_table = control->exit_info_2; 4346 break; 4347 case 1: 4348 /* Get AP jump table address */ 4349 svm_set_ghcb_sw_exit_info_2(vcpu, sev->ap_jump_table); 4350 break; 4351 default: 4352 pr_err("svm: vmgexit: unsupported AP jump table request - exit_info_1=%#llx\n", 4353 control->exit_info_1); 4354 svm_set_ghcb_sw_exit_info_1(vcpu, 2); 4355 svm_set_ghcb_sw_exit_info_2(vcpu, GHCB_ERR_INVALID_INPUT); 4356 } 4357 4358 ret = 1; 4359 break; 4360 } 4361 case SVM_VMGEXIT_HV_FEATURES: { 4362 svm_set_ghcb_sw_exit_info_2(vcpu, GHCB_HV_FT_SUPPORTED); 4363 4364 ret = 1; 4365 break; 4366 } 4367 case SVM_VMGEXIT_PSC: { 4368 unsigned long rc; 4369 4370 ret = 1; 4371 4372 rc = snp_handle_page_state_change(svm); 4373 svm_set_ghcb_sw_exit_info_2(vcpu, rc); 4374 break; 4375 } 4376 case SVM_VMGEXIT_GUEST_REQUEST: { 4377 snp_handle_guest_request(svm, control->exit_info_1, control->exit_info_2); 4378 4379 ret = 1; 4380 break; 4381 } 4382 case SVM_VMGEXIT_EXT_GUEST_REQUEST: { 4383 snp_handle_ext_guest_request(svm, 4384 control->exit_info_1, 4385 control->exit_info_2); 4386 4387 ret = 1; 4388 break; 4389 } 4390 case SVM_VMGEXIT_AP_CREATION: 4391 ret = sev_snp_ap_creation(svm); 4392 if (ret) { 4393 svm_set_ghcb_sw_exit_info_1(vcpu, 1); 4394 svm_set_ghcb_sw_exit_info_2(vcpu, 4395 X86_TRAP_GP | 4396 SVM_EVTINJ_TYPE_EXEPT | 4397 SVM_EVTINJ_VALID); 4398 } 4399 4400 ret = 1; 4401 break; 4402 case SVM_VMGEXIT_UNSUPPORTED_EVENT: 4403 vcpu_unimpl(vcpu, 4404 "vmgexit: unsupported event - exit_info_1=%#llx, exit_info_2=%#llx\n", 4405 control->exit_info_1, control->exit_info_2); 4406 ret = -EINVAL; 4407 break; 4408 default: 4409 ret = svm_invoke_exit_handler(vcpu, exit_code); 4410 } 4411 4412 return ret; 4413} 4414 4415int sev_es_string_io(struct vcpu_svm *svm, int size, unsigned int port, int in) 4416{ 4417 int count; 4418 int bytes; 4419 int r; 4420 4421 if (svm->vmcb->control.exit_info_2 > INT_MAX) 4422 return -EINVAL; 4423 4424 count = svm->vmcb->control.exit_info_2; 4425 if (unlikely(check_mul_overflow(count, size, &bytes))) 4426 return -EINVAL; 4427 4428 r = setup_vmgexit_scratch(svm, in, bytes); 4429 if (r) { 4430 pr_err("failed to setup vmgexit scratch\n"); 4431 return r; 4432 } 4433 4434 return kvm_sev_es_string_io(&svm->vcpu, size, port, svm->sev_es.ghcb_sa, 4435 count, in); 4436} 4437 4438static void sev_es_init_vmcb(struct vcpu_svm *svm) 4439{ 4440 struct kvm_vcpu *vcpu = &svm->vcpu; 4441 4442 svm->vmcb->control.nested_ctl |= SVM_NESTED_CTL_SEV_ES_ENABLE; 4443 svm->vmcb->control.virt_ext |= LBR_CTL_ENABLE_MASK; 4444 4445 /* 4446 * An SEV-ES guest requires a VMSA area that is a separate from the 4447 * VMCB page. 4448 */ 4449 svm->vmcb->control.vmsa_pa = svm->sev_es.vmsa_pa; 4450 4451 /* Can't intercept CR register access, HV can't modify CR registers */ 4452 svm_clr_intercept(svm, INTERCEPT_CR0_READ); 4453 svm_clr_intercept(svm, INTERCEPT_CR4_READ); 4454 svm_clr_intercept(svm, INTERCEPT_CR8_READ); 4455 svm_clr_intercept(svm, INTERCEPT_CR0_WRITE); 4456 svm_clr_intercept(svm, INTERCEPT_CR4_WRITE); 4457 svm_clr_intercept(svm, INTERCEPT_CR8_WRITE); 4458 4459 svm_clr_intercept(svm, INTERCEPT_SELECTIVE_CR0); 4460 4461 /* Track EFER/CR register changes */ 4462 svm_set_intercept(svm, TRAP_EFER_WRITE); 4463 svm_set_intercept(svm, TRAP_CR0_WRITE); 4464 svm_set_intercept(svm, TRAP_CR4_WRITE); 4465 svm_set_intercept(svm, TRAP_CR8_WRITE); 4466 4467 /* No support for enable_vmware_backdoor */ 4468 clr_exception_intercept(svm, GP_VECTOR); 4469 4470 /* Can't intercept XSETBV, HV can't modify XCR0 directly */ 4471 svm_clr_intercept(svm, INTERCEPT_XSETBV); 4472 4473 /* Clear intercepts on selected MSRs */ 4474 set_msr_interception(vcpu, svm->msrpm, MSR_EFER, 1, 1); 4475 set_msr_interception(vcpu, svm->msrpm, MSR_IA32_CR_PAT, 1, 1); 4476 set_msr_interception(vcpu, svm->msrpm, MSR_IA32_LASTBRANCHFROMIP, 1, 1); 4477 set_msr_interception(vcpu, svm->msrpm, MSR_IA32_LASTBRANCHTOIP, 1, 1); 4478 set_msr_interception(vcpu, svm->msrpm, MSR_IA32_LASTINTFROMIP, 1, 1); 4479 set_msr_interception(vcpu, svm->msrpm, MSR_IA32_LASTINTTOIP, 1, 1); 4480 4481 if (boot_cpu_has(X86_FEATURE_V_TSC_AUX) && 4482 (guest_cpuid_has(&svm->vcpu, X86_FEATURE_RDTSCP) || 4483 guest_cpuid_has(&svm->vcpu, X86_FEATURE_RDPID))) { 4484 set_msr_interception(vcpu, svm->msrpm, MSR_TSC_AUX, 1, 1); 4485 if (guest_cpuid_has(&svm->vcpu, X86_FEATURE_RDTSCP)) 4486 svm_clr_intercept(svm, INTERCEPT_RDTSCP); 4487 } 4488} 4489 4490void sev_init_vmcb(struct vcpu_svm *svm) 4491{ 4492 svm->vmcb->control.nested_ctl |= SVM_NESTED_CTL_SEV_ENABLE; 4493 clr_exception_intercept(svm, UD_VECTOR); 4494 4495 if (sev_es_guest(svm->vcpu.kvm)) 4496 sev_es_init_vmcb(svm); 4497} 4498 4499void sev_es_vcpu_reset(struct vcpu_svm *svm) 4500{ 4501 /* 4502 * Set the GHCB MSR value as per the GHCB specification when emulating 4503 * vCPU RESET for an SEV-ES guest. 4504 */ 4505 set_ghcb_msr(svm, GHCB_MSR_SEV_INFO(GHCB_VERSION_MAX, 4506 GHCB_VERSION_MIN, 4507 sev_enc_bit)); 4508 4509 mutex_init(&svm->sev_es.snp_vmsa_mutex); 4510} 4511 4512void sev_es_prepare_switch_to_guest(struct sev_es_save_area *hostsa) 4513{ 4514 /* 4515 * As an SEV-ES guest, hardware will restore the host state on VMEXIT, 4516 * of which one step is to perform a VMLOAD. KVM performs the 4517 * corresponding VMSAVE in svm_prepare_guest_switch for both 4518 * traditional and SEV-ES guests. 4519 */ 4520 4521 /* XCR0 is restored on VMEXIT, save the current host value */ 4522 hostsa->xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK); 4523 4524 /* PKRU is restored on VMEXIT, save the current host value */ 4525 hostsa->pkru = read_pkru(); 4526 4527 /* MSR_IA32_XSS is restored on VMEXIT, save the currnet host value */ 4528 hostsa->xss = host_xss; 4529} 4530 4531void sev_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, u8 vector) 4532{ 4533 struct vcpu_svm *svm = to_svm(vcpu); 4534 4535 /* First SIPI: Use the values as initially set by the VMM */ 4536 if (!svm->sev_es.received_first_sipi) { 4537 svm->sev_es.received_first_sipi = true; 4538 return; 4539 } 4540 4541 /* Subsequent SIPI */ 4542 switch (svm->sev_es.ap_reset_hold_type) { 4543 case AP_RESET_HOLD_NAE_EVENT: 4544 /* 4545 * Return from an AP Reset Hold VMGEXIT, where the guest will 4546 * set the CS and RIP. Set SW_EXIT_INFO_2 to a non-zero value. 4547 */ 4548 svm_set_ghcb_sw_exit_info_2(vcpu, 1); 4549 break; 4550 case AP_RESET_HOLD_MSR_PROTO: 4551 /* 4552 * Return from an AP Reset Hold VMGEXIT, where the guest will 4553 * set the CS and RIP. Set GHCB data field to a non-zero value. 4554 */ 4555 set_ghcb_msr_bits(svm, 1, 4556 GHCB_MSR_AP_RESET_HOLD_RESULT_MASK, 4557 GHCB_MSR_AP_RESET_HOLD_RESULT_POS); 4558 4559 set_ghcb_msr_bits(svm, GHCB_MSR_AP_RESET_HOLD_RESP, 4560 GHCB_MSR_INFO_MASK, 4561 GHCB_MSR_INFO_POS); 4562 break; 4563 default: 4564 break; 4565 } 4566} 4567 4568struct page *snp_safe_alloc_page(struct kvm_vcpu *vcpu) 4569{ 4570 unsigned long pfn; 4571 struct page *p; 4572 4573 if (!cpu_feature_enabled(X86_FEATURE_SEV_SNP)) 4574 return alloc_page(GFP_KERNEL_ACCOUNT | __GFP_ZERO); 4575 4576 /* 4577 * Allocate an SNP safe page to workaround the SNP erratum where 4578 * the CPU will incorrectly signal an RMP violation #PF if a 4579 * hugepage (2mb or 1gb) collides with the RMP entry of VMCB, VMSA 4580 * or AVIC backing page. The recommeded workaround is to not use the 4581 * hugepage. 4582 * 4583 * Allocate one extra page, use a page which is not 2mb aligned 4584 * and free the other. 4585 */ 4586 p = alloc_pages(GFP_KERNEL_ACCOUNT | __GFP_ZERO, 1); 4587 if (!p) 4588 return NULL; 4589 4590 split_page(p, 1); 4591 4592 pfn = page_to_pfn(p); 4593 if (IS_ALIGNED(__pfn_to_phys(pfn), PMD_SIZE)) { 4594 pfn++; 4595 __free_page(p); 4596 } else { 4597 __free_page(pfn_to_page(pfn + 1)); 4598 } 4599 4600 return pfn_to_page(pfn); 4601} 4602 4603static bool is_pfn_range_shared(kvm_pfn_t start, kvm_pfn_t end) 4604{ 4605 int level; 4606 4607 while (end > start) { 4608 if (snp_lookup_rmpentry(start, &level) != 0) 4609 return false; 4610 start++; 4611 } 4612 4613 return true; 4614} 4615 4616void sev_rmp_page_level_adjust(struct kvm *kvm, kvm_pfn_t pfn, int *level) 4617{ 4618 int rmp_level, assigned; 4619 4620 if (!cpu_feature_enabled(X86_FEATURE_SEV_SNP)) 4621 return; 4622 4623 assigned = snp_lookup_rmpentry(pfn, &rmp_level); 4624 if (unlikely(assigned < 0)) 4625 return; 4626 4627 if (!assigned) { 4628 /* 4629 * If all the pages are shared then no need to keep the RMP 4630 * and NPT in sync. 4631 */ 4632 pfn = pfn & ~(PTRS_PER_PMD - 1); 4633 if (is_pfn_range_shared(pfn, pfn + PTRS_PER_PMD)) 4634 return; 4635 } 4636 4637 /* 4638 * The hardware installs 2MB TLB entries to access to 1GB pages, 4639 * therefore allow NPT to use 1GB pages when pfn was added as 2MB 4640 * in the RMP table. 4641 */ 4642 if (rmp_level == PG_LEVEL_2M && (*level == PG_LEVEL_1G)) 4643 return; 4644 4645 /* Adjust the level to keep the NPT and RMP in sync */ 4646 *level = min_t(size_t, *level, rmp_level); 4647} 4648 4649int sev_post_map_gfn(struct kvm *kvm, gfn_t gfn, kvm_pfn_t pfn) 4650{ 4651 struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; 4652 int level; 4653 4654 if (!sev_snp_guest(kvm)) 4655 return 0; 4656 4657 spin_lock(&sev->psc_lock); 4658 4659 /* If pfn is not added as private then fail */ 4660 if (snp_lookup_rmpentry(pfn, &level) == 1) { 4661 spin_unlock(&sev->psc_lock); 4662 pr_err_ratelimited("failed to map private gfn 0x%llx pfn 0x%llx\n", gfn, pfn); 4663 return -EBUSY; 4664 } 4665 4666 return 0; 4667} 4668 4669void sev_post_unmap_gfn(struct kvm *kvm, gfn_t gfn, kvm_pfn_t pfn) 4670{ 4671 struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; 4672 4673 if (!sev_snp_guest(kvm)) 4674 return; 4675 4676 spin_unlock(&sev->psc_lock); 4677} 4678 4679void handle_rmp_page_fault(struct kvm_vcpu *vcpu, gpa_t gpa, u64 error_code) 4680{ 4681 int rmp_level, npt_level, rc, assigned; 4682 struct kvm *kvm = vcpu->kvm; 4683 gfn_t gfn = gpa_to_gfn(gpa); 4684 bool need_psc = false; 4685 enum psc_op psc_op; 4686 kvm_pfn_t pfn; 4687 bool private; 4688 4689 write_lock(&kvm->mmu_lock); 4690 4691 if (unlikely(!kvm_mmu_get_tdp_walk(vcpu, gpa, &pfn, &npt_level))) 4692 goto unlock; 4693 4694 assigned = snp_lookup_rmpentry(pfn, &rmp_level); 4695 if (unlikely(assigned < 0)) 4696 goto unlock; 4697 4698 private = !!(error_code & PFERR_GUEST_ENC_MASK); 4699 4700 /* 4701 * If the fault was due to size mismatch, or NPT and RMP page level's 4702 * are not in sync, then use PSMASH to split the RMP entry into 4K. 4703 */ 4704 if ((error_code & PFERR_GUEST_SIZEM_MASK) || 4705 (npt_level == PG_LEVEL_4K && rmp_level == PG_LEVEL_2M && private)) { 4706 rc = snp_rmptable_psmash(kvm, pfn); 4707 if (rc) 4708 pr_err_ratelimited("psmash failed, gpa 0x%llx pfn 0x%llx rc %d\n", 4709 gpa, pfn, rc); 4710 goto out; 4711 } 4712 4713 /* 4714 * If it's a private access, and the page is not assigned in the 4715 * RMP table, create a new private RMP entry. This can happen if 4716 * guest did not use the PSC VMGEXIT to transition the page state 4717 * before the access. 4718 */ 4719 if (!assigned && private) { 4720 need_psc = 1; 4721 psc_op = SNP_PAGE_STATE_PRIVATE; 4722 goto out; 4723 } 4724 4725 /* 4726 * If it's a shared access, but the page is private in the RMP table 4727 * then make the page shared in the RMP table. This can happen if 4728 * the guest did not use the PSC VMGEXIT to transition the page 4729 * state before the access. 4730 */ 4731 if (assigned && !private) { 4732 need_psc = 1; 4733 psc_op = SNP_PAGE_STATE_SHARED; 4734 } 4735 4736out: 4737 write_unlock(&kvm->mmu_lock); 4738 4739 if (need_psc) 4740 rc = __snp_handle_page_state_change(vcpu, psc_op, gpa, PG_LEVEL_4K); 4741 4742 /* 4743 * The fault handler has updated the RMP pagesize, zap the existing 4744 * rmaps for large entry ranges so that nested page table gets rebuilt 4745 * with the updated RMP pagesize. 4746 */ 4747 gfn = gpa_to_gfn(gpa) & ~(KVM_PAGES_PER_HPAGE(PG_LEVEL_2M) - 1); 4748 kvm_zap_gfn_range(kvm, gfn, gfn + PTRS_PER_PMD); 4749 return; 4750 4751unlock: 4752 write_unlock(&kvm->mmu_lock); 4753}