commit 7e2719c913c833bdd93b463f5a7dc878d5a22273
parent b98fabdf4b91a6093b3d91f15cb55b21ab89c0f3
Author: Louis Burda <quent.burda@gmail.com>
Date: Mon, 25 Jul 2022 13:58:41 +0200
Read program counters directly, add print and remove procfs approach for testing
Diffstat:
5 files changed, 86 insertions(+), 31 deletions(-)
diff --git a/Makefile b/Makefile
@@ -1,24 +1,28 @@
KERNEL_SOURCE ?= /usr/src/linux
PWD := $(shell pwd)
-.PHONY: all reset prepare build
+.PHONY: all reset clean prepare build
-all: clean reset prepare build
+all: reset prepare build
clean:
$(MAKE) -C $(KERNEL_SOURCE) SUBDIRS=arch/x86/kvm clean
reset:
git -C $(KERNEL_SOURCE) reset --hard
-
-prepare:
- git -C $(KERNEL_SOURCE) apply $(PWD)/patch.diff
+ #git -C $(KERNEL_SOURCE) clean -dfx
+ #cp .config $(KERNEL_SOURCE)/.config
$(KERNEL_SOURCE)/arch/x86/kvm/svm/cachepc:
ln -s $(PWD)/src $@
-build: $(KERNEL_SOURCE)/arch/x86/kvm/svm/cachepc
- $(MAKE) -C $(KERNEL_SOURCE) arch/x86/kvm/kvm.ko arch/x86/kvm/kvm-amd.ko
+prepare: $(KERNEL_SOURCE)/arch/x86/kvm/svm/cachepc
+ git -C $(KERNEL_SOURCE) apply $(PWD)/patch.diff
+
+build:
+ # $(MAKE) -C $(KERNEL_SOURCE) arch/x86/kvm/kvm.ko arch/x86/kvm/kvm-amd.ko
+ $(MAKE) -C $(KERNEL_SOURCE) -v modules -j6 SUBDIRS=arch/x86/kvm
+ $(MAKE) -C $(KERNEL_SOURCE) -j6 M=arch/x86/kvm
load:
sudo rmmod kvm_amd || true
diff --git a/patch.diff b/patch.diff
@@ -13,7 +13,7 @@ index b804444e16d4..1f7d3b15cf4a 100644
obj-$(CONFIG_KVM) += kvm.o
obj-$(CONFIG_KVM_INTEL) += kvm-intel.o
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
-index 7b3cfbe8f7e3..12c5ff2447a0 100644
+index 7b3cfbe8f7e3..4c6ebe040c30 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -2,6 +2,8 @@
@@ -72,7 +72,7 @@ index 7f2e2a09ebbd..762eb35f19e5 100644
#define KVM_MAX_VCPU_ID KVM_MAX_VCPUS
#endif
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
-index 2541a17ff1c4..5f001419362b 100644
+index 2541a17ff1c4..f900cf449fb8 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -51,6 +51,7 @@
@@ -83,7 +83,16 @@ index 2541a17ff1c4..5f001419362b 100644
#include <asm/processor.h>
#include <asm/ioctl.h>
-@@ -143,6 +144,10 @@ static void hardware_disable_all(void);
+@@ -66,6 +67,8 @@
+ /* Worst case buffer size needed for holding an integer. */
+ #define ITOA_MAX_LEN 12
+
++#include "../../arch/x86/kvm/svm/cachepc/cachepc.h"
++
+ MODULE_AUTHOR("Qumranet");
+ MODULE_LICENSE("GPL");
+
+@@ -143,6 +146,10 @@ static void hardware_disable_all(void);
static void kvm_io_bus_destroy(struct kvm_io_bus *bus);
@@ -94,7 +103,7 @@ index 2541a17ff1c4..5f001419362b 100644
__visible bool kvm_rebooting;
EXPORT_SYMBOL_GPL(kvm_rebooting);
-@@ -4765,10 +4770,29 @@ static void check_processor_compat(void *data)
+@@ -4765,10 +4772,29 @@ static void check_processor_compat(void *data)
*c->ret = kvm_arch_check_processor_compat(c->opaque);
}
@@ -124,27 +133,28 @@ index 2541a17ff1c4..5f001419362b 100644
int r;
int cpu;
-@@ -4848,6 +4872,15 @@ int kvm_init(void *opaque, unsigned vcpu_size, unsigned vcpu_align,
+@@ -4848,6 +4874,16 @@ int kvm_init(void *opaque, unsigned vcpu_size, unsigned vcpu_align,
r = kvm_vfio_ops_init();
WARN_ON(r);
++ cachepc_init_counters();
+ cachepc_msrmts_count = 0;
+
-+ memset(&proc_ops, 0, sizeof(proc_ops));
-+ proc_ops.proc_read = kvm_cachepc_read;
++ //memset(&proc_ops, 0, sizeof(proc_ops));
++ //proc_ops.proc_read = kvm_cachepc_read;
+
-+ cachepc_msrmts_file = proc_create("cachepc", 0644, NULL, &proc_ops);
-+ BUG_ON(cachepc_msrmts_file == NULL);
++ //cachepc_msrmts_file = proc_create("cachepc", 0644, NULL, &proc_ops);
++ //BUG_ON(cachepc_msrmts_file == NULL);
+
+
return 0;
out_unreg:
-@@ -4872,6 +4905,7 @@ EXPORT_SYMBOL_GPL(kvm_init);
+@@ -4872,6 +4908,7 @@ EXPORT_SYMBOL_GPL(kvm_init);
void kvm_exit(void)
{
-+ remove_proc_entry("cachepc", cachepc_msrmts_file);
++ //remove_proc_entry("cachepc", cachepc_msrmts_file);
debugfs_remove_recursive(kvm_debugfs_dir);
misc_deregister(&kvm_dev);
kmem_cache_destroy(kvm_vcpu_cache);
diff --git a/src/asm.h b/src/asm.h
@@ -25,10 +25,9 @@ cachepc_readpmc(uint64_t event)
uint32_t lo, hi;
asm volatile (
- "mov %[event], %%rcx\t\n"
- "rdpmc\t\n"
+ "rdmsr"
: "=a" (lo), "=d" (hi)
- : [event] "r" (event)
+ : "c"(event)
);
return ((uint64_t) hi << 32) | lo;
diff --git a/src/cachepc.c b/src/cachepc.c
@@ -6,10 +6,45 @@ static cacheline **allocate_cache_ds(cache_ctx *ctx);
static uint16_t get_virt_cache_set(cache_ctx *ctx, void *ptr);
static void *aligned_alloc(size_t alignment, size_t size);
+void
+cachepc_init_counters(void)
+{
+ uint32_t event, event_no, event_mask;
+ uint64_t reg_addr;
+
+ /* SEE: https://developer.amd.com/resources/developer-guides-manuals (PPR 17H 31H, P.166)
+ *
+ * performance event selection is done via 0xC001_020X with X = (0..A)[::2]
+ * performance event reading is done viea 0XC001_020X with X = (1..B)[::2]
+ *
+ * 6 slots total
+ */
+
+ reg_addr = 0xc0010200; /* first slot */
+ event_no = 0x64;
+ event_mask = 0x08;
+ event = event_no | (event_mask << 8);
+ event |= (1<< 17); /* OsUserMode bit */
+ event |= (1 << 22); /* enable performance counter */
+ printk(KERN_INFO "Writing to msr event %d", event);
+ asm volatile ("wrmsr" : : "c"(reg_addr), "a"(event), "d"(0x00));
+
+ reg_addr = 0xc0010202;
+ event_no = 0x64;
+ event_mask = 0xC8;
+ event = event_no | (event_mask << 8);
+ event |= (1<< 17); /* OsUserMode bit */
+ event |= (1 << 22); /* enable performance counter */
+ printk(KERN_INFO "Writing to msr event %d", event);
+ asm volatile ("wrmsr" : : "c"(reg_addr), "a"(event), "d"(0x00));
+}
+
cache_ctx *
cachepc_get_ctx(cache_level cache_level)
{
cache_ctx *ctx;
+
+ printk(KERN_INFO "CACHEPC_GET_CTX");
ctx = kzalloc(sizeof(cache_ctx), GFP_KERNEL);
BUG_ON(ctx == NULL);
@@ -44,6 +79,8 @@ cachepc_prepare_ds(cache_ctx *ctx)
{
cacheline **cacheline_ptr_arr;
cacheline *cache_ds;
+
+ printk(KERN_INFO "CACHEPC_BUILD_CACHE_DS");
cacheline_ptr_arr = allocate_cache_ds(ctx);
cache_ds = build_cache_ds(ctx, cacheline_ptr_arr);
diff --git a/src/cachepc.h b/src/cachepc.h
@@ -8,6 +8,11 @@
#include "cache_types.h"
#include "util.h"
+#define L2_HIT_CNTR 0xC0010201
+#define L2_MISS_CNTR 0xC0010203
+
+void cachepc_init_counters(void);
+
cache_ctx *cachepc_get_ctx(cache_level cl);
cacheline *cachepc_prepare_ds(cache_ctx *ctx);
void cachepc_save_msrmts(cacheline *head, const char *prefix, int index);
@@ -34,6 +39,8 @@ cachepc_prime(cacheline *head)
{
cacheline *curr_cl;
+ printk(KERN_WARNING "PROBE");
+
cachepc_cpuid();
curr_cl = head;
do {
@@ -78,13 +85,12 @@ cachepc_prime_rev(cacheline *head)
static inline cacheline *
cachepc_probe_set(cacheline *curr_cl)
{
- uint64_t pre1, pre2, pre3;
- uint64_t post1, post2, post3;
+ uint64_t pre1, pre2;
+ uint64_t post1, post2;
cacheline *next_cl;
- pre1 = cachepc_readpmc(0);
- pre2 = cachepc_readpmc(1);
- pre3 = cachepc_readpmc(2);
+ pre1 = cachepc_readpmc(L2_HIT_CNTR);
+ pre2 = cachepc_readpmc(L2_MISS_CNTR);
cachepc_mfence();
asm volatile(
@@ -104,18 +110,15 @@ cachepc_probe_set(cacheline *curr_cl)
cachepc_mfence();
cachepc_cpuid();
- post1 = cachepc_readpmc(0);
+ post1 = cachepc_readpmc(L2_HIT_CNTR);
cachepc_cpuid();
- post2 = cachepc_readpmc(1);
- cachepc_cpuid();
- post3 = cachepc_readpmc(2);
+ post2 = cachepc_readpmc(L2_MISS_CNTR);
cachepc_cpuid();
/* works across size boundary */
curr_cl->count = 0;
curr_cl->count += post1 - pre1;
curr_cl->count += post2 - pre2;
- curr_cl->count += post3 - pre3;
return next_cl;
}
@@ -124,6 +127,8 @@ static inline cacheline *
cachepc_probe(cacheline *head)
{
cacheline *curr_cs;
+
+ printk(KERN_WARNING "PROBE");
curr_cs = head;
do {