diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile index b804444e16d4..17167ccfca22 100644 --- a/arch/x86/kvm/Makefile +++ b/arch/x86/kvm/Makefile @@ -1,6 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 -ccflags-y += -Iarch/x86/kvm +ccflags-y += -Iarch/x86/kvm -O2 ccflags-$(CONFIG_KVM_WERROR) += -Werror ifeq ($(CONFIG_FRAME_POINTER),y) @@ -10,7 +10,9 @@ endif KVM := ../../../virt/kvm kvm-y += $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o \ - $(KVM)/eventfd.o $(KVM)/irqchip.o $(KVM)/vfio.o + $(KVM)/eventfd.o $(KVM)/irqchip.o $(KVM)/vfio.o \ + svm/cachepc/cachepc.o svm/cachepc/util.o + kvm-$(CONFIG_KVM_ASYNC_PF) += $(KVM)/async_pf.o kvm-y += x86.o emulate.o i8259.o irq.o lapic.o \ @@ -20,7 +22,8 @@ kvm-y += x86.o emulate.o i8259.o irq.o lapic.o \ kvm-intel-y += vmx/vmx.o vmx/vmenter.o vmx/pmu_intel.o vmx/vmcs12.o \ vmx/evmcs.o vmx/nested.o vmx/posted_intr.o -kvm-amd-y += svm/svm.o svm/vmenter.o svm/pmu.o svm/nested.o svm/avic.o svm/sev.o +kvm-amd-y += svm/svm.o svm/vmenter.o svm/pmu.o svm/nested.o svm/avic.o svm/sev.o \ + svm/cachepc/cachepc.o svm/cachepc/util.o obj-$(CONFIG_KVM) += kvm.o obj-$(CONFIG_KVM_INTEL) += kvm-intel.o diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 7b3cfbe8f7e3..cb60859f7d17 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -2,6 +2,8 @@ #include +#include "cachepc/cachepc.h" + #include "irq.h" #include "mmu.h" #include "kvm_cache_regs.h" @@ -3751,7 +3753,14 @@ static noinstr void svm_vcpu_enter_exit(struct kvm_vcpu *vcpu, if (sev_es_guest(svm->vcpu.kvm)) { __svm_sev_es_vcpu_run(svm->vmcb_pa); } else { + memset(cachepc_msrmts, 0, 64 * 2); + int cpu = get_cpu(); + local_irq_disable(); + WARN_ON(cpu != 2); __svm_vcpu_run(svm->vmcb_pa, (unsigned long *)&svm->vcpu.arch.regs); + cachepc_save_msrmts(cachepc_ds); + local_irq_enable(); + put_cpu(); #ifdef CONFIG_X86_64 native_wrmsrl(MSR_GS_BASE, svm->host.gs_base); @@ -3785,8 +3794,12 @@ static noinstr void svm_vcpu_enter_exit(struct kvm_vcpu *vcpu, static __no_kcsan fastpath_t svm_vcpu_run(struct kvm_vcpu *vcpu) { - struct vcpu_svm *svm = to_svm(vcpu); + struct vcpu_svm *svm; + printk(KERN_WARNING "CachePC: svm_cpu_enter_exit()\n"); + WARN_ON(smp_processor_id() != 2); + + svm = to_svm(vcpu); svm->vmcb->save.rax = vcpu->arch.regs[VCPU_REGS_RAX]; svm->vmcb->save.rsp = vcpu->arch.regs[VCPU_REGS_RSP]; svm->vmcb->save.rip = vcpu->arch.regs[VCPU_REGS_RIP]; diff --git a/arch/x86/kvm/svm/vmenter.S b/arch/x86/kvm/svm/vmenter.S index 6feb8c08f45a..2f259db92037 100644 --- a/arch/x86/kvm/svm/vmenter.S +++ b/arch/x86/kvm/svm/vmenter.S @@ -27,14 +27,53 @@ #define VCPU_R15 __VCPU_REGS_R15 * WORD_SIZE #endif +.extern cachepc_msrmts + .section .noinstr.text, "ax" +.macro push_xmm gpr xmm + vmovq \gpr, \xmm +.endm + +.macro pop_xmm gpr xmm + vmovq \xmm, \gpr +.endm + +.macro swap_xmm grp xmm + vmovq \grp, %xmm1 + vmovq \xmm, \grp + vmovq %xmm1, \xmm +.endm + +.macro barrier + mfence + mov $0x80000005,%eax + cpuid +.endm + /** * __svm_vcpu_run - Run a vCPU via a transition to SVM guest mode * @vmcb_pa: unsigned long * @regs: unsigned long * (to guest registers) */ SYM_FUNC_START(__svm_vcpu_run) + /* store regs in zmm */ + push_xmm %rax, %xmm0 + push_xmm %rcx, %xmm2 + push_xmm %rdx, %xmm3 + push_xmm %rbp, %xmm4 + push_xmm %rsp, %xmm5 + push_xmm %rdi, %xmm6 + push_xmm %rsi, %xmm7 + push_xmm %r8, %xmm8 + push_xmm %r9, %xmm9 + push_xmm %r10, %xmm10 + push_xmm %r11, %xmm11 + push_xmm %r12, %xmm12 + push_xmm %r13, %xmm13 + push_xmm %r14, %xmm14 + push_xmm %r15, %xmm15 + push %_ASM_BP #ifdef CONFIG_X86_64 push %r15 @@ -45,6 +84,7 @@ SYM_FUNC_START(__svm_vcpu_run) push %edi push %esi #endif + push %_ASM_BX /* Save @regs. */ @@ -85,8 +125,87 @@ SYM_FUNC_START(__svm_vcpu_run) jne 3f ud2 _ASM_EXTABLE(1b, 2b) + +3: + swap_xmm %rax, %xmm0 + swap_xmm %rcx, %xmm2 + swap_xmm %rdx, %xmm3 + swap_xmm %rbp, %xmm4 + swap_xmm %rsp, %xmm5 + swap_xmm %rdi, %xmm6 + swap_xmm %rsi, %xmm7 + swap_xmm %r8, %xmm8 + swap_xmm %r9, %xmm9 + swap_xmm %r10, %xmm10 + swap_xmm %r11, %xmm11 + swap_xmm %r12, %xmm12 + swap_xmm %r13, %xmm13 + swap_xmm %r14, %xmm14 + swap_xmm %r15, %xmm15 + + mov cachepc_ds, %rsi + mov 0x8(%rsi), %r15 + + lea prime_ret(%rip), %rdi + jmp cachepc_prime_vcall+1 // skip stack pushes +prime_ret: + + swap_xmm %rax, %xmm0 + swap_xmm %rcx, %xmm2 + swap_xmm %rdx, %xmm3 + swap_xmm %rbp, %xmm4 + swap_xmm %rsp, %xmm5 + swap_xmm %rdi, %xmm6 + swap_xmm %rsi, %xmm7 + swap_xmm %r8, %xmm8 + swap_xmm %r9, %xmm9 + swap_xmm %r10, %xmm10 + swap_xmm %r11, %xmm11 + swap_xmm %r12, %xmm12 + swap_xmm %r13, %xmm13 + swap_xmm %r14, %xmm14 + swap_xmm %r15, %xmm15 + + vmrun %_ASM_AX + + swap_xmm %rax, %xmm0 + swap_xmm %rcx, %xmm2 + swap_xmm %rdx, %xmm3 + swap_xmm %rbp, %xmm4 + swap_xmm %rsp, %xmm5 + swap_xmm %rdi, %xmm6 + swap_xmm %rsi, %xmm7 + swap_xmm %r8, %xmm8 + swap_xmm %r9, %xmm9 + swap_xmm %r10, %xmm10 + swap_xmm %r11, %xmm11 + swap_xmm %r12, %xmm12 + swap_xmm %r13, %xmm13 + swap_xmm %r14, %xmm14 + swap_xmm %r15, %xmm15 + + mov %r15, %rsi + lea probe_ret(%rip), %rdi + jmp cachepc_probe_vcall+6 // skip stack pushs +probe_ret: + + swap_xmm %rax, %xmm0 + swap_xmm %rcx, %xmm2 + swap_xmm %rdx, %xmm3 + swap_xmm %rbp, %xmm4 + swap_xmm %rsp, %xmm5 + swap_xmm %rdi, %xmm6 + swap_xmm %rsi, %xmm7 + swap_xmm %r8, %xmm8 + swap_xmm %r9, %xmm9 + swap_xmm %r10, %xmm10 + swap_xmm %r11, %xmm11 + swap_xmm %r12, %xmm12 + swap_xmm %r13, %xmm13 + swap_xmm %r14, %xmm14 + swap_xmm %r15, %xmm15 + -3: vmrun %_ASM_AX jmp 5f 4: cmpb $0, kvm_rebooting jne 5f @@ -100,7 +219,7 @@ SYM_FUNC_START(__svm_vcpu_run) ud2 _ASM_EXTABLE(5b, 6b) 7: - cli + cli #ifdef CONFIG_RETPOLINE /* IMPORTANT: Stuff the RSB immediately after VM-Exit, before RET! */ @@ -166,6 +285,11 @@ SYM_FUNC_START(__svm_vcpu_run) pop %edi #endif pop %_ASM_BP + + # mov cachepc_msrmts(%rip), %rax + # mov $0x1, %edx + # mov %dx, (%rax) + ret SYM_FUNC_END(__svm_vcpu_run) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 4097d028c3ab..81685bd567a2 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -5414,6 +5414,7 @@ static int kvm_vm_ioctl_set_msr_filter(struct kvm *kvm, void __user *argp) long kvm_arch_vm_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg) { + printk(KERN_WARNING "Vincent kvm arch ioctl \n"); struct kvm *kvm = filp->private_data; void __user *argp = (void __user *)arg; int r = -ENOTTY; diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 2541a17ff1c4..7efbdfd0e3e2 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -51,6 +51,9 @@ #include #include #include +#include +#include +#include #include #include @@ -66,6 +69,8 @@ /* Worst case buffer size needed for holding an integer. */ #define ITOA_MAX_LEN 12 +#include "../../arch/x86/kvm/svm/cachepc/cachepc.h" + MODULE_AUTHOR("Qumranet"); MODULE_LICENSE("GPL"); @@ -143,6 +148,18 @@ static void hardware_disable_all(void); static void kvm_io_bus_destroy(struct kvm_io_bus *bus); +struct proc_ops cachepc_proc_ops; + +uint16_t *cachepc_msrmts; +size_t cachepc_msrmts_count; +EXPORT_SYMBOL(cachepc_msrmts); +EXPORT_SYMBOL(cachepc_msrmts_count); + +cache_ctx *cachepc_ctx; +cacheline *cachepc_ds; +EXPORT_SYMBOL(cachepc_ctx); +EXPORT_SYMBOL(cachepc_ds); + __visible bool kvm_rebooting; EXPORT_SYMBOL_GPL(kvm_rebooting); @@ -4765,12 +4782,327 @@ static void check_processor_compat(void *data) *c->ret = kvm_arch_check_processor_compat(c->opaque); } +int +kvm_cachepc_open(struct inode *inode, struct file *file) +{ + try_module_get(THIS_MODULE); + + return 0; +} + +int +kvm_cachepc_close(struct inode *inode, struct file *file) +{ + module_put(THIS_MODULE); + + return 0; +} + +ssize_t +kvm_cachepc_read(struct file *file, char *buf, size_t buflen, loff_t *off) +{ + size_t len, left; + size_t size; + + printk(KERN_WARNING "CachePC: Reading entries (%lu:%lli)\n", + buflen, off ? *off : 0); + + size = cachepc_msrmts_count * sizeof(uint16_t); + if (!off || *off >= size || *off < 0) + return 0; + + len = size - *off; + if (len > buflen) len = buflen; + + left = copy_to_user(buf, (uint8_t *) cachepc_msrmts + *off, len); + + len -= left; + *off += len; + + return len; +} + +ssize_t +kvm_cachepc_write(struct file *file, const char *buf, size_t buflen, loff_t *off) +{ + return 0; +} + +void +kvm_cachepc_prime_probe_test(void *p) +{ + cacheline *lines; + cacheline *cl, *head; + uint32_t count; + uint32_t *arg; + int i, max; + + arg = p; + + /* l2 data cache, hit or miss */ + cachepc_init_pmc(0, 0x64, 0xD8); + + lines = cachepc_aligned_alloc(PAGE_SIZE, cachepc_ctx->cache_size); + BUG_ON(lines == NULL); + + max = cachepc_ctx->nr_of_cachelines; + + cachepc_cpuid(); + cachepc_mfence(); + + for (i = 0; i < max; i++) + asm volatile ("mov (%0), %%rbx" : : "r"(lines + i) : "rbx"); + + head = cachepc_prime(cachepc_ds); + cachepc_probe(head); + + count = 0; + cl = head = cachepc_ds; + do { + count += cl->count; + cl = cl->next; + } while (cl != head); + + printk(KERN_WARNING "CachePC: Prime-probe test done (%u vs. %u => %s)\n", + count, 0, (count == 0) ? "passed" : "failed"); + + if (arg) *arg = (count == 0); + + kfree(lines); +} + +void +kvm_cachepc_stream_hwpf_test(void *p) +{ + cacheline *lines; + uint32_t count; + uint32_t *arg; + uint32_t i, max; + + arg = p; + + /* TODO: accurately detect hwpf */ + + /* l2 data cache, hit or miss */ + cachepc_init_pmc(0, 0x64, 0xD8); + + lines = cachepc_aligned_alloc(PAGE_SIZE, cachepc_ctx->cache_size); + BUG_ON(lines == NULL); + + max = cachepc_ctx->nr_of_cachelines; + + cachepc_prime(cachepc_ds); + + count -= cachepc_read_pmc(0); + for (i = 0; i < max; i++) + asm volatile ("mov (%0), %%rbx" : : "r"(lines + i) : "rbx"); + count += cachepc_read_pmc(0); + + printk(KERN_WARNING "CachePC: HWPF test done (%u vs. %u => %s)\n", + count, max, (count == max) ? "passed" : "failed"); + + if (arg) *arg = (count == max); + + kfree(lines); +} + +void +kvm_cachepc_single_access_test(void *p) +{ + cacheline *ptr; + uint64_t pre, post; + uint32_t *arg; + + /* l2 data cache, hit or miss */ + cachepc_init_pmc(0, 0x64, 0xD8); + + arg = p; + + WARN_ON(arg && *arg >= L1_SETS); + if (arg && *arg >= L1_SETS) return; + ptr = cachepc_prepare_victim(cachepc_ctx, arg ? *arg : 48); + + cachepc_prime(cachepc_ds); + + pre = cachepc_read_pmc(0); + cachepc_victim(ptr); + post = cachepc_read_pmc(0); + + printk(KERN_WARNING "CachePC: Single access test done (%llu vs %u => %s)", + post - pre, 1, (post - pre == 1) ? "passed" : "failed"); + + if (arg) *arg = post - pre; + + cachepc_release_victim(cachepc_ctx, ptr); +} + +void +kvm_cachepc_single_eviction_test(void *p) +{ + cacheline *head, *cl, *evicted; + cacheline *ptr; + uint32_t target; + uint32_t *arg; + int count; + + arg = p; + + /* l2 data cache, hit or miss */ + cachepc_init_pmc(0, 0x64, 0xD8); + + WARN_ON(arg && *arg >= L1_SETS); + if (arg && *arg >= L1_SETS) return; + target = arg ? *arg : 48; + + ptr = cachepc_prepare_victim(cachepc_ctx, target); + + head = cachepc_prime(cachepc_ds); + cachepc_victim(ptr); + cachepc_probe(head); + + count = 0; + evicted = NULL; + cl = head = cachepc_ds; + do { + if (IS_FIRST(cl->flags) && cl->count > 0) { + evicted = cl; + count += cl->count; + } + cl = cl->next; + } while (cl != head); + + printk(KERN_WARNING "CachePC: Single eviction test done (%u vs %u => %s)\n", + count, 1, (count == 1 && evicted->cache_set == target) ? "passed" : "failed"); + cachepc_save_msrmts(head); + + if (arg) *arg = count; + + cachepc_release_victim(cachepc_ctx, ptr); +} + +void +kwm_cachepc_system_setup(void) +{ + uint64_t reg_addr, val; + uint32_t lo, hi; + + /* disable streaming store */ + reg_addr = 0xc0011020; + asm volatile ("rdmsr" : "=a"(lo), "=d"(hi) : "c"(reg_addr)); + val = (uint64_t) lo | ((uint64_t) hi << 32); + val |= 1 << 13; + asm volatile ("wrmsr" : : "c"(reg_addr), "a"(val), "d"(0x00)); + printk("CachePC: Writing MSR %08llX: %016llX\n", reg_addr, val); + + /* disable speculative data cache tlb reloads */ + reg_addr = 0xc0011022; + asm volatile ("rdmsr" : "=a"(lo), "=d"(hi) : "c"(reg_addr)); + val = (uint64_t) lo | ((uint64_t) hi << 32); + val |= 1 << 4; + asm volatile ("wrmsr" : : "c"(reg_addr), "a"(val), "d"(0x00)); + printk("CachePC: Writing MSR %08llX: %016llX\n", reg_addr, val); + + /* disable data cache hardware prefetcher */ + reg_addr = 0xc0011022; + asm volatile ("rdmsr" : "=a"(lo), "=d"(hi) : "c"(reg_addr)); + val = (uint64_t) lo | ((uint64_t) hi << 32); + val |= 1 << 13; + asm volatile ("wrmsr" : : "c"(reg_addr), "a"(val), "d"(0x00)); + printk("CachePC: Writing MSR %08llX: %016llX\n", reg_addr, val); +} + +void +kvm_cachepc_init(void *p) +{ + int cpu; + + cpu = get_cpu(); + + printk(KERN_WARNING "CachePC: Running on core %i\n", cpu); + + cachepc_ctx = cachepc_get_ctx(L1); + cachepc_ds = cachepc_prepare_ds(cachepc_ctx); + + kwm_cachepc_system_setup(); + + kvm_cachepc_prime_probe_test(NULL); + kvm_cachepc_single_access_test(NULL); + kvm_cachepc_single_eviction_test(NULL); + kvm_cachepc_stream_hwpf_test(NULL); + + put_cpu(); +} + +void +kvm_cachepc_init_pmc_ioctl(void *p) +{ + uint32_t event; + uint8_t index, event_no, event_mask; + + WARN_ON(p == NULL); + if (!p) return; + + event = *(uint32_t *)p; + + index = (event & 0xFF000000) >> 24; + event_no = (event & 0x0000FF00) >> 8; + event_mask = (event & 0x000000FF) >> 0; + + cachepc_init_pmc(index, event_no, event_mask); +} + +long +kvm_cachepc_ioctl(struct file *file, unsigned int cmd, unsigned long argp) +{ + void __user *arg_user; + uint32_t u32; + int r; + + arg_user = (void __user *)argp; + switch (cmd) { + case CACHEPC_IOCTL_TEST_ACCESS: + printk(KERN_WARNING "CachePC: Called ioctl access test\n"); + if (!arg_user) return -EINVAL; + if (copy_from_user(&u32, arg_user, sizeof(uint32_t))) + return -EFAULT; + r = smp_call_function_single(2, + kvm_cachepc_single_access_test, &u32, true); + WARN_ON(r != 0); + if (copy_to_user(arg_user, &u32, sizeof(uint32_t))) + return -EFAULT; + break; + case CACHEPC_IOCTL_TEST_EVICTION: + printk(KERN_WARNING "CachePC: Called ioctl eviction test\n"); + if (!arg_user) return -EINVAL; + if (copy_from_user(&u32, arg_user, sizeof(uint32_t))) + return -EFAULT; + r = smp_call_function_single(2, + kvm_cachepc_single_eviction_test, &u32, true); + WARN_ON(r != 0); + if (copy_to_user(arg_user, &u32, sizeof(uint32_t))) + return -EFAULT; + break; + case CACHEPC_IOCTL_INIT_PMC: + printk(KERN_WARNING "CachePC: Called ioctl init counter\n"); + if (!arg_user) return -EINVAL; + if (copy_from_user(&u32, arg_user, sizeof(uint32_t))) + return -EFAULT; + r = smp_call_function_single(2, + kvm_cachepc_init_pmc_ioctl, &u32, true); + WARN_ON(r != 0); + break; + default: + return -EINVAL; + } + + return 0; +} + int kvm_init(void *opaque, unsigned vcpu_size, unsigned vcpu_align, struct module *module) { struct kvm_cpu_compat_check c; - int r; - int cpu; + int r, cpu; r = kvm_arch_init(opaque); if (r) @@ -4848,6 +5180,21 @@ int kvm_init(void *opaque, unsigned vcpu_size, unsigned vcpu_align, r = kvm_vfio_ops_init(); WARN_ON(r); + cachepc_msrmts_count = L1_SETS; + cachepc_msrmts = kzalloc(cachepc_msrmts_count * sizeof(uint16_t), GFP_KERNEL); + BUG_ON(cachepc_msrmts == NULL); + + r = smp_call_function_single(2, kvm_cachepc_init, NULL, true); + WARN_ON(r != 0); + + memset(&cachepc_proc_ops, 0, sizeof(cachepc_proc_ops)); + cachepc_proc_ops.proc_open = kvm_cachepc_open; + cachepc_proc_ops.proc_read = kvm_cachepc_read; + cachepc_proc_ops.proc_write = kvm_cachepc_write; + cachepc_proc_ops.proc_release = kvm_cachepc_close; + cachepc_proc_ops.proc_ioctl = kvm_cachepc_ioctl; + proc_create("cachepc", 0644, NULL, &cachepc_proc_ops); + return 0; out_unreg: @@ -4872,6 +5219,12 @@ EXPORT_SYMBOL_GPL(kvm_init); void kvm_exit(void) { + remove_proc_entry("cachepc", NULL); + kfree(cachepc_msrmts); + + cachepc_release_ds(cachepc_ctx, cachepc_ds); + cachepc_release_ctx(cachepc_ctx); + debugfs_remove_recursive(kvm_debugfs_dir); misc_deregister(&kvm_dev); kmem_cache_destroy(kvm_vcpu_cache);