commit d4c8266836e9a4e6fa073667e4edfbbbb61e8666
parent da76c11f2059a8696a3df41844d49f82e6988843
Author: Louis Burda <quent.burda@gmail.com>
Date: Wed, 5 Oct 2022 16:49:09 +0200
Sevstep and cachepc refactoring
Diffstat:
15 files changed, 317 insertions(+), 333 deletions(-)
diff --git a/cachepc/cachepc.c b/cachepc/cachepc.c
@@ -16,22 +16,13 @@ static void build_randomized_list_for_cache_set(cache_ctx *ctx, cacheline **cach
static cacheline **allocate_cache_ds(cache_ctx *ctx);
static uint16_t get_virt_cache_set(cache_ctx *ctx, void *ptr);
-void __attribute__((optimize(1))) // prevent instruction reordering
-cachepc_prime_vcall(uintptr_t ret, cacheline *cl)
-{
- cachepc_prime(cl);
- asm volatile ("mov %0, %%rax; jmp *%%rax" : : "r"(ret) : "rax");
-}
-
-void __attribute__((optimize(1))) // prevent instruction reordering
-cachepc_probe_vcall(uintptr_t ret, cacheline *cl)
-{
- cachepc_probe(cl);
- asm volatile ("mov %0, %%rax; jmp *%%rax" : : "r"(ret) : "rax");
-}
+static void random_perm(uint32_t *arr, uint32_t arr_len);
+static void gen_random_indices(uint32_t *arr, uint32_t arr_len);
+static bool is_in_arr(uint32_t elem, uint32_t *arr, uint32_t arr_len);
void
-cachepc_init_pmc(uint8_t index, uint8_t event_no, uint8_t event_mask)
+cachepc_init_pmc(uint8_t index, uint8_t event_no, uint8_t event_mask,
+ bool host, bool guest, bool kernel, bool user)
{
uint64_t event;
uint64_t reg_addr;
@@ -47,9 +38,9 @@ cachepc_init_pmc(uint8_t index, uint8_t event_no, uint8_t event_mask)
reg_addr = 0xc0010200 + index * 2;
event = event_no | (event_mask << 8);
- event |= (1ULL << 17); /* OS (kernel) events only */
event |= (1ULL << 22); /* enable performance counter */
- event |= (1ULL << 40); /* Host events only */
+ event |= ((kernel * 2ULL + user * 1ULL) << 16);
+ event |= ((host * 2ULL + guest * 1ULL) << 40);
printk(KERN_WARNING "CachePC: Initialized %i. PMC %02X:%02X\n",
index, event_no, event_mask);
asm volatile ("wrmsr" : : "c"(reg_addr), "a"(event), "d"(0x00));
@@ -149,6 +140,19 @@ cachepc_release_victim(cache_ctx *ctx, cacheline *victim)
kfree(remove_cache_set(ctx, victim));
}
+void *
+cachepc_aligned_alloc(size_t alignment, size_t size)
+{
+ void *p;
+
+ if (size % alignment != 0)
+ size = size - (size % alignment) + alignment;
+ p = kzalloc(size, GFP_KERNEL);
+ BUG_ON(((uintptr_t) p) % alignment != 0);
+
+ return p;
+}
+
void
cachepc_save_msrmts(cacheline *head)
{
@@ -183,6 +187,19 @@ cachepc_print_msrmts(cacheline *head)
} while (curr_cl != head);
}
+void __attribute__((optimize(1))) // prevent instruction reordering
+cachepc_prime_vcall(uintptr_t ret, cacheline *cl)
+{
+ cachepc_prime(cl);
+ asm volatile ("mov %0, %%rax; jmp *%%rax" : : "r"(ret) : "rax");
+}
+
+void __attribute__((optimize(1))) // prevent instruction reordering
+cachepc_probe_vcall(uintptr_t ret, cacheline *cl)
+{
+ cachepc_probe(cl);
+ asm volatile ("mov %0, %%rax; jmp *%%rax" : : "r"(ret) : "rax");
+}
cacheline *
prepare_cache_set_ds(cache_ctx *ctx, uint32_t *sets, uint32_t sets_len)
@@ -430,16 +447,39 @@ get_virt_cache_set(cache_ctx *ctx, void *ptr)
return (uint16_t) ((((uintptr_t) ptr) & SET_MASK(ctx->sets)) / CACHELINE_SIZE);
}
-void *
-cachepc_aligned_alloc(size_t alignment, size_t size)
+void
+random_perm(uint32_t *arr, uint32_t arr_len)
{
- void *p;
+ uint32_t i;
- if (size % alignment != 0)
- size = size - (size % alignment) + alignment;
- p = kzalloc(size, GFP_KERNEL);
- BUG_ON(((uintptr_t) p) % alignment != 0);
+ /* no special ordering needed when prefetcher is disabled */
+ for (i = 0; i < arr_len; i++)
+ arr[i] = i;
- return p;
+ // /* prevent stream prefetching by alternating access direction */
+ // mid = arr_len / 2;
+ // for (i = 0; i < arr_len; i++)
+ // arr[i] = mid + (i % 2 ? -1 : 1) * ((i + 1) / 2);
}
+void
+gen_random_indices(uint32_t *arr, uint32_t arr_len)
+{
+ uint32_t i;
+
+ for (i = 0; i < arr_len; ++i)
+ arr[i] = i;
+ random_perm(arr, arr_len);
+}
+
+
+bool is_in_arr(uint32_t elem, uint32_t *arr, uint32_t arr_len) {
+ uint32_t i;
+
+ for (i = 0; i < arr_len; ++i) {
+ if (arr[i] == elem)
+ return true;
+ }
+
+ return false;
+}
diff --git a/cachepc/cachepc.h b/cachepc/cachepc.h
@@ -2,10 +2,10 @@
#include "asm.h"
#include "cache_types.h"
-#include "util.h"
#include "uapi.h"
-void cachepc_init_pmc(uint8_t index, uint8_t event_no, uint8_t event_mask);
+void cachepc_init_pmc(uint8_t index, uint8_t event_no, uint8_t event_mask,
+ bool host, bool guest, bool kernel, bool user);
cache_ctx *cachepc_get_ctx(cache_level cl);
void cachepc_release_ctx(cache_ctx *ctx);
diff --git a/cachepc/kvm.c b/cachepc/kvm.c
@@ -101,7 +101,7 @@ cachepc_kvm_prime_probe_test(void *p)
arg = p;
/* l2 data cache, hit or miss */
- cachepc_init_pmc(0, 0x64, 0xD8);
+ cachepc_init_pmc(0, 0x64, 0xD8, true, false, true, false);
lines = cachepc_aligned_alloc(PAGE_SIZE, cachepc_ctx->cache_size);
BUG_ON(lines == NULL);
@@ -142,10 +142,10 @@ cachepc_kvm_stream_hwpf_test(void *p)
arg = p;
- /* TODO: accurately detect hwpf */
+ /* TODO: improve detection */
/* l2 data cache, hit or miss */
- cachepc_init_pmc(0, 0x64, 0xD8);
+ cachepc_init_pmc(0, 0x64, 0xD8, true, false, true, false);
lines = cachepc_aligned_alloc(PAGE_SIZE, cachepc_ctx->cache_size);
BUG_ON(lines == NULL);
@@ -175,7 +175,7 @@ cachepc_kvm_single_access_test(void *p)
uint32_t *arg;
/* l2 data cache, hit or miss */
- cachepc_init_pmc(0, 0x64, 0xD8);
+ cachepc_init_pmc(0, 0x64, 0xD8, true, false, true, false);
arg = p;
@@ -209,7 +209,7 @@ cachepc_kvm_single_eviction_test(void *p)
arg = p;
/* l2 data cache, hit or miss */
- cachepc_init_pmc(0, 0x64, 0xD8);
+ cachepc_init_pmc(0, 0x64, 0xD8, true, false, true, false);
WARN_ON(arg && *arg >= L1_SETS);
if (arg && *arg >= L1_SETS) return;
@@ -277,17 +277,22 @@ cachepc_kvm_init_pmc_ioctl(void *p)
{
uint32_t event;
uint8_t index, event_no, event_mask;
+ uint8_t host_guest, kernel_user;
WARN_ON(p == NULL);
if (!p) return;
event = *(uint32_t *)p;
- index = (event & 0xFF000000) >> 24;
- event_no = (event & 0x0000FF00) >> 8;
- event_mask = (event & 0x000000FF) >> 0;
+ index = (event & 0xFF000000) >> 24;
+ host_guest = (event & 0x00F00000) >> 20;
+ kernel_user = (event & 0x000F0000) >> 16;
+ event_no = (event & 0x0000FF00) >> 8;
+ event_mask = (event & 0x000000FF) >> 0;
- cachepc_init_pmc(index, event_no, event_mask);
+ cachepc_init_pmc(index, event_no, event_mask,
+ host_guest >> 1, host_guest & 1,
+ kernel_user >> 1, kernel_user & 1);
}
long
diff --git a/cachepc/util.c b/cachepc/util.c
@@ -1,38 +0,0 @@
-#include "util.h"
-
-void
-random_perm(uint32_t *arr, uint32_t arr_len)
-{
- uint32_t i;
-
- /* no special ordering needed when prefetcher is disabled */
- for (i = 0; i < arr_len; i++)
- arr[i] = i;
-
- // /* prevent stream prefetching by alternating access direction */
- // mid = arr_len / 2;
- // for (i = 0; i < arr_len; i++)
- // arr[i] = mid + (i % 2 ? -1 : 1) * ((i + 1) / 2);
-}
-
-void
-gen_random_indices(uint32_t *arr, uint32_t arr_len)
-{
- uint32_t i;
-
- for (i = 0; i < arr_len; ++i)
- arr[i] = i;
- random_perm(arr, arr_len);
-}
-
-
-bool is_in_arr(uint32_t elem, uint32_t *arr, uint32_t arr_len) {
- uint32_t i;
-
- for (i = 0; i < arr_len; ++i) {
- if (arr[i] == elem)
- return true;
- }
-
- return false;
-}
diff --git a/cachepc/util.h b/cachepc/util.h
@@ -1,8 +0,0 @@
-#pragma once
-
-#include <linux/kernel.h>
-
-void random_perm(uint32_t *arr, uint32_t arr_len);
-void gen_random_indices(uint32_t *arr, uint32_t arr_len);
-
-bool is_in_arr(uint32_t elem, uint32_t *arr, uint32_t arr_len);
diff --git a/patch.diff b/patch.diff
@@ -17,7 +17,7 @@ index eb186bc57f6a..3f767a27045e 100644
/*
* The notifier represented by @kvm_page_track_notifier_node is linked into
diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile
-index 30f244b64523..ddfd48fbd8ca 100644
+index 30f244b64523..3c5f65040878 100644
--- a/arch/x86/kvm/Makefile
+++ b/arch/x86/kvm/Makefile
@@ -1,6 +1,6 @@
@@ -35,8 +35,8 @@ index 30f244b64523..ddfd48fbd8ca 100644
- hyperv.o debugfs.o mmu/mmu.o mmu/page_track.o \
- mmu/spte.o
+ hyperv.o debugfs.o mmu/mmu.o mmu/page_track.o mmu/spte.o \
-+ svm/cachepc/cachepc.o svm/cachepc/util.o svm/cachepc/kvm.o \
-+ sevstep/sevstep.o sevstep/uspt.o sevstep/kvm.o
++ svm/cachepc/cachepc.o svm/cachepc/kvm.o \
++ sevstep/sevstep.o sevstep/uspt.o
ifdef CONFIG_HYPERV
kvm-y += kvm_onhyperv.o
@@ -46,7 +46,7 @@ index 30f244b64523..ddfd48fbd8ca 100644
-kvm-amd-y += svm/svm.o svm/vmenter.o svm/pmu.o svm/nested.o svm/avic.o svm/sev.o
+kvm-amd-y += svm/svm.o svm/vmenter.o svm/pmu.o svm/nested.o svm/avic.o svm/sev.o \
-+ svm/cachepc/cachepc.o svm/cachepc/util.o
++ svm/cachepc/cachepc.o
ifdef CONFIG_HYPERV
kvm-amd-y += svm/svm_onhyperv.o
@@ -382,14 +382,14 @@ index dfaeb47fcf2a..0626f3fdddfd 100644
2: cli
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
-index d9adf79124f9..1809b79cb6cd 100644
+index d9adf79124f9..082dc8553566 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -82,6 +82,8 @@
#include <asm/sgx.h>
#include <clocksource/hyperv_timer.h>
-+#include "sevstep/kvm.h"
++#include "sevstep/sevstep.h"
+
#define CREATE_TRACE_POINTS
#include "trace.h"
diff --git a/sevstep/kvm.c b/sevstep/kvm.c
@@ -1,205 +0,0 @@
-#include "kvm.h"
-
-#include <linux/types.h>
-
-bool
-__untrack_single_page(struct kvm_vcpu *vcpu, gfn_t gfn,
- enum kvm_page_track_mode mode)
-{
- int idx;
- bool ret;
- struct kvm_memory_slot *slot;
-
- ret = false;
- idx = srcu_read_lock(&vcpu->kvm->srcu);
- if (mode == KVM_PAGE_TRACK_ACCESS) {
- //printk("Removing gfn: %016llx from acess page track pool\n", gfn);
- }
- if (mode == KVM_PAGE_TRACK_WRITE) {
- //printk("Removing gfn: %016llx from write page track pool\n", gfn);
- }
- slot = kvm_vcpu_gfn_to_memslot(vcpu, gfn);
-
- if (slot != NULL && kvm_slot_page_track_is_active(vcpu->kvm, slot, gfn, mode)) {
- write_lock(&vcpu->kvm->mmu_lock);
- kvm_slot_page_track_remove_page(vcpu->kvm, slot, gfn, mode);
- write_unlock(&vcpu->kvm->mmu_lock);
- ret = true;
- } else {
- printk("Failed to untrack %016llx because ", gfn);
- if (slot == NULL) {
- printk(KERN_CONT "slot was null");
- } else if (!kvm_slot_page_track_is_active(vcpu->kvm, slot, gfn, mode)) {
- printk(KERN_CONT "page track was not active");
- }
- printk(KERN_CONT "\n");
- }
- srcu_read_unlock(&vcpu->kvm->srcu, idx);
- return ret;
-}
-EXPORT_SYMBOL(__untrack_single_page);
-
-bool
-__reset_accessed_on_page(struct kvm_vcpu *vcpu, gfn_t gfn)
-{
- int idx;
- bool ret;
- struct kvm_memory_slot *slot;
-
- ret = false;
- idx = srcu_read_lock(&vcpu->kvm->srcu);
- slot = kvm_vcpu_gfn_to_memslot(vcpu, gfn);
- if( slot != NULL ) {
- write_lock(&vcpu->kvm->mmu_lock);
- //Vincent: The kvm mmu function now requires min_level
- //We want all pages to protected so we do PG_LEVEL_4K
- //https://patchwork.kernel.org/project/kvm/patch/20210416082511.2856-2-zhukeqian1@huawei.com/
- sevstep_kvm_mmu_slot_gfn_protect(vcpu->kvm,slot,gfn,PG_LEVEL_4K,KVM_PAGE_TRACK_RESET_ACCESSED);
- write_unlock(&vcpu->kvm->mmu_lock);
- ret = true;
- }
- srcu_read_unlock(&vcpu->kvm->srcu, idx);
- return ret;
-}
-EXPORT_SYMBOL(__reset_accessed_on_page);
-
-bool
-__clear_nx_on_page(struct kvm_vcpu *vcpu, gfn_t gfn)
-{
- int idx;
- bool ret;
- struct kvm_memory_slot *slot;
-
- ret = false;
- idx = srcu_read_lock(&vcpu->kvm->srcu);
- slot = kvm_vcpu_gfn_to_memslot(vcpu, gfn);
- if( slot != NULL ) {
- write_lock(&vcpu->kvm->mmu_lock);
- //Vincent: The kvm mmu function now requires min_level
- //We want all pages to protected so we do PG_LEVEL_4K
- //https://patchwork.kernel.org/project/kvm/patch/20210416082511.2856-2-zhukeqian1@huawei.com/
- sevstep_kvm_mmu_slot_gfn_protect(vcpu->kvm, slot, gfn,
- PG_LEVEL_4K, KVM_PAGE_TRACK_RESET_EXEC);
- write_unlock(&vcpu->kvm->mmu_lock);
- ret = true;
- }
- srcu_read_unlock(&vcpu->kvm->srcu, idx);
- return ret;
-}
-EXPORT_SYMBOL(__clear_nx_on_page);
-
-bool
-__track_single_page(struct kvm_vcpu *vcpu, gfn_t gfn,
- enum kvm_page_track_mode mode)
-{
- int idx;
- bool ret;
- struct kvm_memory_slot *slot;
-
- ret = false;
- idx = srcu_read_lock(&vcpu->kvm->srcu);
- if (mode == KVM_PAGE_TRACK_ACCESS) {
- //printk_ratelimited("Adding gfn: %016llx to acess page track pool\n", gfn);
- //printk("Adding gfn: %016llx to acess page track pool\n", gfn);
- }
- if (mode == KVM_PAGE_TRACK_WRITE) {
- //printk_ratelimited("Adding gfn: %016llx to write page track pool\n", gfn);
- }
- slot = kvm_vcpu_gfn_to_memslot(vcpu, gfn);
- if (slot != NULL && !kvm_slot_page_track_is_active(vcpu->kvm,slot, gfn, mode)) {
-
- write_lock(&vcpu->kvm->mmu_lock);
- kvm_slot_page_track_add_page(vcpu->kvm, slot, gfn, mode);
- write_unlock(&vcpu->kvm->mmu_lock);
- ret = true;
-
- } else {
-
- printk("Failed to track %016llx because ", gfn);
- if (slot == NULL) {
- printk(KERN_CONT "slot was null");
- }
- if (kvm_slot_page_track_is_active(vcpu->kvm, slot, gfn, mode)) {
- printk(KERN_CONT "page is already tracked");
- }
- printk(KERN_CONT "\n");
- }
- srcu_read_unlock(&vcpu->kvm->srcu, idx);
- return ret;
-}
-EXPORT_SYMBOL(__track_single_page);
-
-long
-kvm_start_tracking(struct kvm_vcpu *vcpu,enum kvm_page_track_mode mode )
-{
- long count = 0;
- u64 iterator, iterat_max;
- struct kvm_memory_slot *slot;
- int idx;
-
- //Vincent: Memslots interface changed into a rb tree, see
- //here: https://lwn.net/Articles/856392/
- //and here: https://lore.kernel.org/all/cover.1632171478.git.maciej.szmigiero@oracle.com/T/#u
- //Thus we use instead of
- //iterat_max = vcpu->kvm->memslots[0]->memslots[0].base_gfn
- // + vcpu->kvm->memslots[0]->memslots[0].npages;
- struct rb_node *node;
- struct kvm_memory_slot *first_memslot;
- node = rb_last(&(vcpu->kvm->memslots[0]->gfn_tree));
- first_memslot = container_of(node, struct kvm_memory_slot, gfn_node[0]);
- iterat_max = first_memslot->base_gfn + first_memslot->npages;
- for (iterator=0; iterator < iterat_max; iterator++)
- {
- idx = srcu_read_lock(&vcpu->kvm->srcu);
- slot = kvm_vcpu_gfn_to_memslot(vcpu, iterator);
- if ( slot != NULL && !kvm_slot_page_track_is_active(vcpu->kvm, slot, iterator, mode)) {
- write_lock(&vcpu->kvm->mmu_lock);
- kvm_slot_page_track_add_page(vcpu->kvm, slot, iterator, mode);
- write_unlock(&vcpu->kvm->mmu_lock);
- count++;
- }
- srcu_read_unlock(&vcpu->kvm->srcu, idx);
- }
-
- return count;
-}
-EXPORT_SYMBOL(kvm_start_tracking);
-
-long
-kvm_stop_tracking(struct kvm_vcpu *vcpu,enum kvm_page_track_mode mode)
-{
- long count = 0;
- u64 iterator, iterat_max;
- struct kvm_memory_slot *slot;
- int idx;
-
-
- //Vincent: Memslots interface changed into a rb tree, see
- //here: https://lwn.net/Articles/856392/
- //and here: https://lore.kernel.org/all/cover.1632171478.git.maciej.szmigiero@oracle.com/T/#u
- //Thus we use instead of
- //iterat_max = vcpu->kvm->memslots[0]->memslots[0].base_gfn
- // + vcpu->kvm->memslots[0]->memslots[0].npages;
- struct rb_node *node;
- struct kvm_memory_slot *first_memslot;
- node = rb_last(&(vcpu->kvm->memslots[0]->gfn_tree));
- first_memslot = container_of(node, struct kvm_memory_slot, gfn_node[0]);
- iterat_max = first_memslot->base_gfn + first_memslot->npages;
- for (iterator=0; iterator < iterat_max; iterator++)
- {
- idx = srcu_read_lock(&vcpu->kvm->srcu);
- slot = kvm_vcpu_gfn_to_memslot(vcpu, iterator);
- //Vincent: I think see here https://patchwork.kernel.org/project/kvm/patch/20210924163152.289027-22-pbonzini@redhat.com/
- if ( slot != NULL && kvm_slot_page_track_is_active(vcpu->kvm, slot, iterator, mode)) {
- write_lock(&vcpu->kvm->mmu_lock);
- kvm_slot_page_track_remove_page(vcpu->kvm, slot, iterator, mode);
- write_unlock(&vcpu->kvm->mmu_lock);
- count++;
- }
- srcu_read_unlock(&vcpu->kvm->srcu, idx);
- }
-
- return count;
-}
-EXPORT_SYMBOL(kvm_stop_tracking);
-
diff --git a/sevstep/kvm.h b/sevstep/kvm.h
@@ -1,4 +0,0 @@
-#pragma once
-
-#include "sevstep.h"
-#include "uapi.h"
diff --git a/sevstep/mmu.c b/sevstep/mmu.c
@@ -1,7 +1,7 @@
#include "../sevstep/sevstep.h"
#include "../sevstep/uspt.h"
-void
+static void
sevstep_uspt_page_fault_handle(struct kvm_vcpu *vcpu,
struct kvm_page_fault *fault)
{
@@ -19,7 +19,7 @@ sevstep_uspt_page_fault_handle(struct kvm_vcpu *vcpu,
for (i = 0; i < sizeof(modes) / sizeof(modes[0]); i++) {
if (kvm_slot_page_track_is_active(vcpu->kvm,
fault->slot, fault->gfn, modes[i])) {
- __untrack_single_page(vcpu, fault->gfn, modes[i]);
+ sevstep_untrack_single_page(vcpu, fault->gfn, modes[i]);
was_tracked = true;
}
}
@@ -27,7 +27,7 @@ sevstep_uspt_page_fault_handle(struct kvm_vcpu *vcpu,
if (was_tracked) {
have_rip = false;
if (uspt_should_get_rip())
- have_rip = sev_step_get_rip_kvm_vcpu(vcpu,¤t_rip) == 0;
+ have_rip = sevstep_get_rip_kvm_vcpu(vcpu, ¤t_rip) == 0;
if (uspt_batch_tracking_in_progress()) {
send_err = uspt_batch_tracking_save(fault->gfn << PAGE_SHIFT,
fault->error_code, have_rip, current_rip);
diff --git a/sevstep/sevstep.c b/sevstep/sevstep.c
@@ -13,7 +13,6 @@
#include "cpuid.h"
#include "mmu/spte.h"
-
#include <linux/kvm_host.h>
#include <linux/types.h>
#include <linux/string.h>
@@ -44,12 +43,7 @@
struct kvm* main_vm;
EXPORT_SYMBOL(main_vm);
-// used to store performance counter values; 6 counters, 2 readings per counter
-// TODO: static!
-uint64_t perf_reads[6][2];
-perf_ctl_config_t perf_configs[6];
-int perf_cpu;
-
+static perf_ctl_config_t perf_configs[6];
uint64_t
perf_ctl_to_u64(perf_ctl_config_t * config)
@@ -60,10 +54,10 @@ perf_ctl_to_u64(perf_ctl_config_t * config)
result |= config->EventSelect & 0xffULL;
result |= (config->UintMask & 0xffULL) << 8;
result |= (config->OsUserMode & 0x3ULL) << 16;
- result |= (config->Edge & 0x1ULL ) << 18;
- result |= (config->Int & 0x1ULL ) << 20;
- result |= (config->En & 0x1ULL ) << 22;
- result |= (config->Inv & 0x1ULL ) << 23;
+ result |= (config->Edge & 0x1ULL) << 18;
+ result |= (config->Int & 0x1ULL) << 20;
+ result |= (config->En & 0x1ULL) << 22;
+ result |= (config->Inv & 0x1ULL) << 23;
result |= (config->CntMask & 0xffULL) << 24;
result |= ((config->EventSelect & 0xf00ULL) >> 8) << 32;
result |= (config->HostGuestOnly & 0x3ULL) << 40;
@@ -88,8 +82,9 @@ read_ctr(uint64_t ctr_msr, int cpu, uint64_t* result)
}
void
-setup_perfs()
+sevstep_setup_pmcs(void)
{
+ int perf_cpu;
int i;
perf_cpu = smp_processor_id();
@@ -107,7 +102,7 @@ setup_perfs()
perf_configs[0].EventSelect = 0x0c0;
perf_configs[0].UintMask = 0x0;
perf_configs[0].En = 0x1;
- write_ctl(&perf_configs[0],perf_cpu, CTL_MSR_0);
+ write_ctl(&perf_configs[0], perf_cpu, CTL_MSR_0);
/*
* programm l2d hit from data cache miss perf for
@@ -118,12 +113,213 @@ setup_perfs()
perf_configs[1].UintMask = 0x70;
perf_configs[1].En = 0x1;
perf_configs[1].HostGuestOnly = 0x2; /* count only host events */
- write_ctl(&perf_configs[1],perf_cpu,CTL_MSR_1);
+ write_ctl(&perf_configs[1], perf_cpu, CTL_MSR_1);
+}
+EXPORT_SYMBOL(sevstep_setup_pmcs);
+
+bool
+sevstep_untrack_single_page(struct kvm_vcpu *vcpu, gfn_t gfn,
+ enum kvm_page_track_mode mode)
+{
+ int idx;
+ bool ret;
+ struct kvm_memory_slot *slot;
+
+ ret = false;
+ idx = srcu_read_lock(&vcpu->kvm->srcu);
+ if (mode == KVM_PAGE_TRACK_ACCESS) {
+ //printk("Removing gfn: %016llx from acess page track pool\n", gfn);
+ }
+ if (mode == KVM_PAGE_TRACK_WRITE) {
+ //printk("Removing gfn: %016llx from write page track pool\n", gfn);
+ }
+ slot = kvm_vcpu_gfn_to_memslot(vcpu, gfn);
+
+ if (slot != NULL && kvm_slot_page_track_is_active(vcpu->kvm, slot, gfn, mode)) {
+ write_lock(&vcpu->kvm->mmu_lock);
+ kvm_slot_page_track_remove_page(vcpu->kvm, slot, gfn, mode);
+ write_unlock(&vcpu->kvm->mmu_lock);
+ ret = true;
+ } else {
+ printk("Failed to untrack %016llx because ", gfn);
+ if (slot == NULL) {
+ printk(KERN_CONT "slot was null");
+ } else if (!kvm_slot_page_track_is_active(vcpu->kvm, slot, gfn, mode)) {
+ printk(KERN_CONT "page track was not active");
+ }
+ printk(KERN_CONT "\n");
+ }
+ srcu_read_unlock(&vcpu->kvm->srcu, idx);
+ return ret;
+}
+EXPORT_SYMBOL(sevstep_untrack_single_page);
+
+bool
+sevstep_reset_accessed_on_page(struct kvm_vcpu *vcpu, gfn_t gfn)
+{
+ int idx;
+ bool ret;
+ struct kvm_memory_slot *slot;
+
+ ret = false;
+ idx = srcu_read_lock(&vcpu->kvm->srcu);
+ slot = kvm_vcpu_gfn_to_memslot(vcpu, gfn);
+ if( slot != NULL ) {
+ write_lock(&vcpu->kvm->mmu_lock);
+ //Vincent: The kvm mmu function now requires min_level
+ //We want all pages to protected so we do PG_LEVEL_4K
+ //https://patchwork.kernel.org/project/kvm/patch/20210416082511.2856-2-zhukeqian1@huawei.com/
+ sevstep_kvm_mmu_slot_gfn_protect(vcpu->kvm,slot,gfn,PG_LEVEL_4K,KVM_PAGE_TRACK_RESET_ACCESSED);
+ write_unlock(&vcpu->kvm->mmu_lock);
+ ret = true;
+ }
+ srcu_read_unlock(&vcpu->kvm->srcu, idx);
+ return ret;
+}
+EXPORT_SYMBOL(sevstep_reset_accessed_on_page);
+
+bool
+sevstep_clear_nx_on_page(struct kvm_vcpu *vcpu, gfn_t gfn)
+{
+ int idx;
+ bool ret;
+ struct kvm_memory_slot *slot;
+
+ ret = false;
+ idx = srcu_read_lock(&vcpu->kvm->srcu);
+ slot = kvm_vcpu_gfn_to_memslot(vcpu, gfn);
+ if( slot != NULL ) {
+ write_lock(&vcpu->kvm->mmu_lock);
+ //Vincent: The kvm mmu function now requires min_level
+ //We want all pages to protected so we do PG_LEVEL_4K
+ //https://patchwork.kernel.org/project/kvm/patch/20210416082511.2856-2-zhukeqian1@huawei.com/
+ sevstep_kvm_mmu_slot_gfn_protect(vcpu->kvm, slot, gfn,
+ PG_LEVEL_4K, KVM_PAGE_TRACK_RESET_EXEC);
+ write_unlock(&vcpu->kvm->mmu_lock);
+ ret = true;
+ }
+ srcu_read_unlock(&vcpu->kvm->srcu, idx);
+ return ret;
+}
+EXPORT_SYMBOL(sevstep_clear_nx_on_page);
+
+bool
+sevstep_track_single_page(struct kvm_vcpu *vcpu, gfn_t gfn,
+ enum kvm_page_track_mode mode)
+{
+ int idx;
+ bool ret;
+ struct kvm_memory_slot *slot;
+
+ ret = false;
+ idx = srcu_read_lock(&vcpu->kvm->srcu);
+ if (mode == KVM_PAGE_TRACK_ACCESS) {
+ //printk_ratelimited("Adding gfn: %016llx to acess page track pool\n", gfn);
+ //printk("Adding gfn: %016llx to acess page track pool\n", gfn);
+ }
+ if (mode == KVM_PAGE_TRACK_WRITE) {
+ //printk_ratelimited("Adding gfn: %016llx to write page track pool\n", gfn);
+ }
+ slot = kvm_vcpu_gfn_to_memslot(vcpu, gfn);
+ if (slot != NULL && !kvm_slot_page_track_is_active(vcpu->kvm,slot, gfn, mode)) {
+
+ write_lock(&vcpu->kvm->mmu_lock);
+ kvm_slot_page_track_add_page(vcpu->kvm, slot, gfn, mode);
+ write_unlock(&vcpu->kvm->mmu_lock);
+ ret = true;
+
+ } else {
+
+ printk("Failed to track %016llx because ", gfn);
+ if (slot == NULL) {
+ printk(KERN_CONT "slot was null");
+ }
+ if (kvm_slot_page_track_is_active(vcpu->kvm, slot, gfn, mode)) {
+ printk(KERN_CONT "page is already tracked");
+ }
+ printk(KERN_CONT "\n");
+ }
+ srcu_read_unlock(&vcpu->kvm->srcu, idx);
+ return ret;
+}
+EXPORT_SYMBOL(sevstep_track_single_page);
+
+long
+sevstep_start_tracking(struct kvm_vcpu *vcpu, enum kvm_page_track_mode mode)
+{
+ long count = 0;
+ u64 iterator, iterat_max;
+ struct kvm_memory_slot *slot;
+ int idx;
+
+ //Vincent: Memslots interface changed into a rb tree, see
+ //here: https://lwn.net/Articles/856392/
+ //and here: https://lore.kernel.org/all/cover.1632171478.git.maciej.szmigiero@oracle.com/T/#u
+ //Thus we use instead of
+ //iterat_max = vcpu->kvm->memslots[0]->memslots[0].base_gfn
+ // + vcpu->kvm->memslots[0]->memslots[0].npages;
+ struct rb_node *node;
+ struct kvm_memory_slot *first_memslot;
+ node = rb_last(&(vcpu->kvm->memslots[0]->gfn_tree));
+ first_memslot = container_of(node, struct kvm_memory_slot, gfn_node[0]);
+ iterat_max = first_memslot->base_gfn + first_memslot->npages;
+ for (iterator = 0; iterator < iterat_max; iterator++)
+ {
+ idx = srcu_read_lock(&vcpu->kvm->srcu);
+ slot = kvm_vcpu_gfn_to_memslot(vcpu, iterator);
+ if (slot != NULL && !kvm_slot_page_track_is_active(vcpu->kvm, slot, iterator, mode)) {
+ write_lock(&vcpu->kvm->mmu_lock);
+ kvm_slot_page_track_add_page(vcpu->kvm, slot, iterator, mode);
+ write_unlock(&vcpu->kvm->mmu_lock);
+ count++;
+ }
+ srcu_read_unlock(&vcpu->kvm->srcu, idx);
+ }
+
+ return count;
+}
+EXPORT_SYMBOL(sevstep_start_tracking);
+
+long
+sevstep_stop_tracking(struct kvm_vcpu *vcpu, enum kvm_page_track_mode mode)
+{
+ long count = 0;
+ u64 iterator, iterat_max;
+ struct kvm_memory_slot *slot;
+ int idx;
+
+
+ //Vincent: Memslots interface changed into a rb tree, see
+ //here: https://lwn.net/Articles/856392/
+ //and here: https://lore.kernel.org/all/cover.1632171478.git.maciej.szmigiero@oracle.com/T/#u
+ //Thus we use instead of
+ //iterat_max = vcpu->kvm->memslots[0]->memslots[0].base_gfn
+ // + vcpu->kvm->memslots[0]->memslots[0].npages;
+ struct rb_node *node;
+ struct kvm_memory_slot *first_memslot;
+ node = rb_last(&(vcpu->kvm->memslots[0]->gfn_tree));
+ first_memslot = container_of(node, struct kvm_memory_slot, gfn_node[0]);
+ iterat_max = first_memslot->base_gfn + first_memslot->npages;
+ for (iterator=0; iterator < iterat_max; iterator++)
+ {
+ idx = srcu_read_lock(&vcpu->kvm->srcu);
+ slot = kvm_vcpu_gfn_to_memslot(vcpu, iterator);
+ //Vincent: I think see here https://patchwork.kernel.org/project/kvm/patch/20210924163152.289027-22-pbonzini@redhat.com/
+ if ( slot != NULL && kvm_slot_page_track_is_active(vcpu->kvm, slot, iterator, mode)) {
+ write_lock(&vcpu->kvm->mmu_lock);
+ kvm_slot_page_track_remove_page(vcpu->kvm, slot, iterator, mode);
+ write_unlock(&vcpu->kvm->mmu_lock);
+ count++;
+ }
+ srcu_read_unlock(&vcpu->kvm->srcu, idx);
+ }
+
+ return count;
}
-EXPORT_SYMBOL(setup_perfs);
+EXPORT_SYMBOL(sevstep_stop_tracking);
int
-sev_step_get_rip_kvm_vcpu(struct kvm_vcpu* vcpu,uint64_t *rip)
+sevstep_get_rip_kvm_vcpu(struct kvm_vcpu *vcpu, uint64_t *rip)
{
return 0;
}
diff --git a/sevstep/sevstep.h b/sevstep/sevstep.h
@@ -10,7 +10,6 @@
#include <linux/pid.h>
#include <linux/psp-sev.h>
-
#define CTL_MSR_0 0xc0010200ULL
#define CTL_MSR_1 0xc0010202ULL
#define CTL_MSR_2 0xc0010204ULL
@@ -47,21 +46,20 @@ bool sevstep_rmap_protect(struct kvm_rmap_head *rmap_head,
bool sevstep_kvm_mmu_slot_gfn_protect(struct kvm *kvm, struct kvm_memory_slot *slot,
uint64_t gfn, int min_level, enum kvm_page_track_mode mode);
-bool __untrack_single_page(struct kvm_vcpu *vcpu, gfn_t gfn,
+bool sevstep_untrack_single_page(struct kvm_vcpu *vcpu, gfn_t gfn,
enum kvm_page_track_mode mode);
-bool __track_single_page(struct kvm_vcpu *vcpu, gfn_t gfn,
+bool sevstep_track_single_page(struct kvm_vcpu *vcpu, gfn_t gfn,
enum kvm_page_track_mode mode);
-bool __reset_accessed_on_page(struct kvm_vcpu *vcpu, gfn_t gfn);
-bool __clear_nx_on_page(struct kvm_vcpu *vcpu, gfn_t gfn);
+bool sevstep_reset_accessed_on_page(struct kvm_vcpu *vcpu, gfn_t gfn);
+bool sevstep_clear_nx_on_page(struct kvm_vcpu *vcpu, gfn_t gfn);
-long kvm_start_tracking(struct kvm_vcpu *vcpu, enum kvm_page_track_mode mode);
-long kvm_stop_tracking(struct kvm_vcpu *vcpu, enum kvm_page_track_mode mode);
-void sev_step_handle_callback(void);
+long sevstep_start_tracking(struct kvm_vcpu *vcpu, enum kvm_page_track_mode mode);
+long sevstep_stop_tracking(struct kvm_vcpu *vcpu, enum kvm_page_track_mode mode);
uint64_t perf_ctl_to_u64(perf_ctl_config_t *config);
void write_ctl(perf_ctl_config_t *config, int cpu, uint64_t ctl_msr);
void read_ctr(uint64_t ctr_msr, int cpu, uint64_t *result);
-void setup_perfs(void);
+void sevstep_setup_pmcs(void);
-int sev_step_get_rip_kvm_vcpu(struct kvm_vcpu *vcpu, uint64_t *rip);
+int sevstep_get_rip_kvm_vcpu(struct kvm_vcpu *vcpu, uint64_t *rip);
diff --git a/sevstep/uspt.c b/sevstep/uspt.c
@@ -225,7 +225,7 @@ _perf_state_setup_retired_instructions(void)
retired_instructions_perf_config.EventSelect = 0x0c0;
retired_instructions_perf_config.UintMask = 0x0;
retired_instructions_perf_config.En = 0x1;
- write_ctl(&retired_instructions_perf_config,batch_track_state.perf_cpu, CTL_MSR_0);
+ write_ctl(&retired_instructions_perf_config, batch_track_state.perf_cpu, CTL_MSR_0);
}
@@ -236,17 +236,17 @@ _perf_state_update_and_get_delta(uint64_t current_event_idx)
{
uint64_t current_value;
- // check if value is "cached"
+ /* check if value is "cached" */
if (perf_state.delta_valid_idx == current_event_idx) {
if (current_event_idx == 0) {
read_ctr(CTR_MSR_0, batch_track_state.perf_cpu, ¤t_value);
perf_state.idx_for_last_perf_reading = current_event_idx;
- perf_state.last_perf_reading = current_event_idx;
+ perf_state.last_perf_reading = current_value;
}
return perf_state.delta;
}
- // otherwise update, but logic is only valid for two consecutive events
+ /* otherwise update, but logic is only valid for two consecutive events */
if (current_event_idx != perf_state.idx_for_last_perf_reading+1) {
printk_ratelimited(KERN_CRIT "_perf_state_update_and_get_delta: "
"last reading was for idx %llu but was queried for %llu\n",
@@ -377,7 +377,7 @@ uspt_batch_tracking_handle_retrack(struct kvm_vcpu* vcpu,
/* made progress, retrack everything in backlog and reset idx */
for (i = 0; i < batch_track_state.gfn_retrack_backlog_next_idx; i++) {
- __track_single_page(vcpu,
+ sevstep_track_single_page(vcpu,
batch_track_state.gfn_retrack_backlog[i],
batch_track_state.tracking_type);
}
diff --git a/test/kvm.c b/test/kvm.c
@@ -276,7 +276,7 @@ main(int argc, const char **argv)
if (cachepc_fd < 0) err(1, "open");
/* init L1 miss counter */
- arg = 0x000064D8;
+ arg = 0x002264D8;
ret = ioctl(cachepc_fd, CACHEPC_IOCTL_INIT_PMC, &arg);
if (ret == -1) err(1, "ioctl fail");
diff --git a/test/sev-es.c b/test/sev-es.c
@@ -485,7 +485,7 @@ main(int argc, const char **argv)
if (ret != 12) errx(1, "KVM_GET_API_VERSION %d, expected 12", ret);
// Init L1 miss counter
- arg = 0x000064D8;
+ arg = 0x002264D8;
ret = ioctl(cachepc_dev, CACHEPC_IOCTL_INIT_PMC, &arg);
if (ret < 0) err(1, "ioctl fail");
diff --git a/test/sev.c b/test/sev.c
@@ -493,7 +493,7 @@ main(int argc, const char **argv)
if (ret != 12) errx(1, "KVM_GET_API_VERSION %d, expected 12", ret);
// Init L1 miss counter
- arg = 0x000064D8;
+ arg = 0x002264D8;
ret = ioctl(cachepc_dev, CACHEPC_IOCTL_INIT_PMC, &arg);
if (ret < 0) err(1, "ioctl fail");