commit 476f6c892d90e66fbd17ba616b82b000a990f63e
parent 0f3b9caf389b486541614836bf180b64544615cb
Author: Louis Burda <quent.burda@gmail.com>
Date: Sat, 13 Aug 2022 20:05:27 +0200
Add cache line ordering that prevents hardware prefetching, fix cachepc counts read
Diffstat:
5 files changed, 61 insertions(+), 75 deletions(-)
diff --git a/kmod/cache_types.h b/kmod/cache_types.h
@@ -22,9 +22,8 @@
#define IS_LAST(flags) GET_BIT(flags, 1)
#define IS_CACHE_GROUP_INIT(flags) GET_BIT(flags, 2)
-// Offset of the next and prev field in the cacheline struct
-#define CL_NEXT_OFFSET 0
-#define CL_PREV_OFFSET 8
+#define CL_NEXT_OFFSET offsetof(struct cacheline, next)
+#define CL_PREV_OFFSET offsetof(struct cacheline, prev)
typedef enum cache_level cache_level;
typedef enum addressing_type addressing_type;
@@ -53,12 +52,15 @@ struct cacheline {
cacheline *next;
cacheline *prev;
- uint16_t cache_set;
- uint16_t flags;
+ uint32_t cache_set;
+ uint32_t cache_line;
+ uint32_t flags;
// Unused padding to fill cache line
uint64_t count;
- char padding[32];
+
+ char padding[24];
};
static_assert(sizeof(struct cacheline) == CACHELINE_SIZE, "Bad cache line struct size");
+static_assert(CL_NEXT_OFFSET == 0 && CL_PREV_OFFSET == 8);
diff --git a/kmod/cachepc.c b/kmod/cachepc.c
@@ -46,8 +46,6 @@ cache_ctx *
cachepc_get_ctx(cache_level cache_level)
{
cache_ctx *ctx;
-
- // printk(KERN_WARNING "CachePC: Getting ctx..\n");
ctx = kzalloc(sizeof(cache_ctx), GFP_KERNEL);
BUG_ON(ctx == NULL);
@@ -72,8 +70,6 @@ cachepc_get_ctx(cache_level cache_level)
ctx->set_size = CACHELINE_SIZE * ctx->associativity;
ctx->cache_size = ctx->sets * ctx->set_size;
- // printk(KERN_WARNING "CachePC: Getting ctx done\n");
-
return ctx;
}
@@ -395,6 +391,7 @@ allocate_cache_ds(cache_ctx *ctx)
for (i = 0; i < ctx->nr_of_cachelines; ++i) {
cl_ptr_arr[i] = cl_arr + i;
cl_ptr_arr[i]->cache_set = get_virt_cache_set(ctx, cl_ptr_arr[i]);
+ cl_ptr_arr[i]->cache_line = i / ctx->sets;
}
return cl_ptr_arr;
diff --git a/kmod/cachepc.h b/kmod/cachepc.h
@@ -49,13 +49,12 @@ cachepc_prime(cacheline *head)
{
cacheline *curr_cl;
- cachepc_cpuid();
+ //cachepc_cpuid();
curr_cl = head;
do {
curr_cl = curr_cl->next;
- cachepc_mfence();
} while(curr_cl != head);
- cachepc_cpuid();
+ //cachepc_cpuid();
return curr_cl->prev;
}
@@ -79,13 +78,12 @@ cachepc_prime_rev(cacheline *head)
{
cacheline *curr_cl;
- cachepc_cpuid();
+ //cachepc_cpuid();
curr_cl = head;
do {
curr_cl = curr_cl->prev;
- cachepc_mfence();
} while(curr_cl != head);
- cachepc_cpuid();
+ //cachepc_cpuid();
return curr_cl->prev;
}
@@ -96,16 +94,14 @@ cachepc_probe(cacheline *start_cl)
uint64_t pre, post;
cacheline *next_cl;
cacheline *curr_cl;
- volatile register uint64_t i asm("r12");
curr_cl = start_cl;
do {
pre = cachepc_read_pmc(0);
- pre += cachepc_read_pmc(1);
cachepc_mfence();
- cachepc_cpuid();
+ //cachepc_cpuid();
asm volatile(
"mov 8(%[curr_cl]), %%rax \n\t" // +8
@@ -123,13 +119,12 @@ cachepc_probe(cacheline *start_cl)
);
cachepc_mfence();
- cachepc_cpuid();
+ //cachepc_cpuid();
post = cachepc_read_pmc(0);
- post += cachepc_read_pmc(1);
cachepc_mfence();
- cachepc_cpuid();
+ //cachepc_cpuid();
/* works across size boundary */
curr_cl->count = post - pre;
diff --git a/kmod/util.c b/kmod/util.c
@@ -20,16 +20,22 @@ prng_bytes(uint8_t *dst, size_t size)
void
random_perm(uint32_t *arr, uint32_t arr_len)
{
- uint32_t i, idx, tmp;
+ uint32_t i, mid; // idx, tmp;
- for (i = arr_len - 1; i > 0; --i) {
- prng_bytes((void*)&idx, 4);
- idx = idx % i;
-
- tmp = arr[idx];
- arr[idx] = arr[i];
- arr[i] = tmp;
+ /* defeat stream prefetcher by preventing access direction */
+ mid = arr_len / 2;
+ for (i = 0; i < arr_len; i++) {
+ arr[i] = mid + (i % 2 ? -1 : 1) * ((i + 1) / 2);
}
+
+ // for (i = arr_len - 1; i > 0; --i) {
+ // prng_bytes((void*)&idx, 4);
+ // idx = idx % i;
+
+ // tmp = arr[idx];
+ // arr[idx] = arr[i];
+ // arr[i] = tmp;
+ // }
}
void
diff --git a/patch.diff b/patch.diff
@@ -32,7 +32,7 @@ index b804444e16d4..17167ccfca22 100644
obj-$(CONFIG_KVM) += kvm.o
obj-$(CONFIG_KVM_INTEL) += kvm-intel.o
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
-index 7b3cfbe8f7e3..71697d08e9e4 100644
+index 7b3cfbe8f7e3..16dfd9b2938e 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -2,6 +2,8 @@
@@ -44,31 +44,29 @@ index 7b3cfbe8f7e3..71697d08e9e4 100644
#include "irq.h"
#include "mmu.h"
#include "kvm_cache_regs.h"
-@@ -3785,8 +3787,18 @@ static noinstr void svm_vcpu_enter_exit(struct kvm_vcpu *vcpu,
+@@ -3785,8 +3787,13 @@ static noinstr void svm_vcpu_enter_exit(struct kvm_vcpu *vcpu,
static __no_kcsan fastpath_t svm_vcpu_run(struct kvm_vcpu *vcpu)
{
- struct vcpu_svm *svm = to_svm(vcpu);
-+ static struct cache_ctx *ctx = NULL;
-+ static struct cacheline *ds = NULL;
-+ static struct cacheline *head = NULL;
-+ static int run_index = 0;
++ struct cacheline *head;
+ struct vcpu_svm *svm;
++ int cpu;
+
+ printk(KERN_WARNING "CachePC: svm_cpu_enter_exit()\n");
-+
-+ if (!ctx) ctx = cachepc_get_ctx(L1);
-+ if (!ds) ds = cachepc_prepare_ds(ctx);
+ svm = to_svm(vcpu);
svm->vmcb->save.rax = vcpu->arch.regs[VCPU_REGS_RAX];
svm->vmcb->save.rsp = vcpu->arch.regs[VCPU_REGS_RSP];
svm->vmcb->save.rip = vcpu->arch.regs[VCPU_REGS_RIP];
-@@ -3835,8 +3847,14 @@ static __no_kcsan fastpath_t svm_vcpu_run(struct kvm_vcpu *vcpu)
+@@ -3835,8 +3842,19 @@ static __no_kcsan fastpath_t svm_vcpu_run(struct kvm_vcpu *vcpu)
*/
x86_spec_ctrl_set_guest(svm->spec_ctrl, svm->virt_spec_ctrl);
-+ head = cachepc_prime(ds);
++ cpu = get_cpu();
++ WARN_ON(cpu != 2);
++
++ head = cachepc_prime(cachepc_ds);
+
svm_vcpu_enter_exit(vcpu, svm);
@@ -76,20 +74,13 @@ index 7b3cfbe8f7e3..71697d08e9e4 100644
+ //cachepc_print_msrmts(head);
+ cachepc_save_msrmts(head);
+
++ put_cpu();
++
/*
* We do not use IBRS in the kernel. If this vCPU has used the
* SPEC_CTRL MSR it may have left it on; save the value and
-@@ -3912,6 +3930,8 @@ static __no_kcsan fastpath_t svm_vcpu_run(struct kvm_vcpu *vcpu)
- if (is_guest_mode(vcpu))
- return EXIT_FASTPATH_NONE;
-
-+ run_index += 1;
-+
- return svm_exit_handlers_fastpath(vcpu);
- }
-
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
-index 2541a17ff1c4..a84a99f4b182 100644
+index 2541a17ff1c4..8c46d509bd13 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -51,6 +51,9 @@
@@ -130,7 +121,7 @@ index 2541a17ff1c4..a84a99f4b182 100644
__visible bool kvm_rebooting;
EXPORT_SYMBOL_GPL(kvm_rebooting);
-@@ -4765,12 +4782,245 @@ static void check_processor_compat(void *data)
+@@ -4765,12 +4782,240 @@ static void check_processor_compat(void *data)
*c->ret = kvm_arch_check_processor_compat(c->opaque);
}
@@ -189,10 +180,15 @@ index 2541a17ff1c4..a84a99f4b182 100644
+ uint32_t *user;
+
+ /* l2 prefetches, hit or miss */
-+ cachepc_init_pmc(0, 0x60, 0x01);
++ // cachepc_init_pmc(0, 0x60, 0x01);
++
++ /* l2 data cache, hit or miss */
++ cachepc_init_pmc(0, 0x64, 0xD8);
+
+ user = p;
+
++ WARN_ON(user && *user >= L1_SETS);
++ if (user && *user >= L1_SETS) return;
+ ptr = cachepc_prepare_victim(cachepc_ctx, user ? *user : 48);
+
+ cachepc_mfence();
@@ -210,16 +206,6 @@ index 2541a17ff1c4..a84a99f4b182 100644
+
+ pre = cachepc_read_pmc(0);
+
-+ //cachepc_mfence();
-+ //cachepc_cpuid();
-+
-+ //pre += cachepc_read_pmc(1);
-+
-+ //cachepc_mfence();
-+ //cachepc_cpuid();
-+
-+ //pre += cachepc_read_pmc(2);
-+
+ cachepc_mfence();
+ cachepc_cpuid();
+
@@ -235,16 +221,6 @@ index 2541a17ff1c4..a84a99f4b182 100644
+
+ post = cachepc_read_pmc(0);
+
-+ //cachepc_mfence();
-+ //cachepc_cpuid();
-+
-+ //post += cachepc_read_pmc(1);
-+
-+ //cachepc_mfence();
-+ //cachepc_cpuid();
-+
-+ //post += cachepc_read_pmc(2);
-+
+ cachepc_mfence();
+ cachepc_cpuid();
+
@@ -267,7 +243,10 @@ index 2541a17ff1c4..a84a99f4b182 100644
+ /* l2 data cache, hit or miss */
+ cachepc_init_pmc(0, 0x64, 0xD8);
+
++ WARN_ON(user && *user >= L1_SETS);
++ if (user && *user >= L1_SETS) return;
+ ptr = cachepc_prepare_victim(cachepc_ctx, user ? *user : 48);
++
+ head = cachepc_prime(cachepc_ds);
+ cachepc_victim(ptr);
+ cachepc_probe(head);
@@ -281,6 +260,7 @@ index 2541a17ff1c4..a84a99f4b182 100644
+void
+kvm_cachepc_init(void *p)
+{
++ cacheline *cl, *head;
+ int cpu;
+
+ local_irq_disable();
@@ -292,6 +272,12 @@ index 2541a17ff1c4..a84a99f4b182 100644
+ cachepc_ctx = cachepc_get_ctx(L1);
+ cachepc_ds = cachepc_prepare_ds(cachepc_ctx);
+
++ head = cl = cachepc_ds;
++ do {
++ cl = cl->next;
++ printk(KERN_WARNING "%i:%i\n", cl->cache_set, cl->cache_line);
++ } while (cl != head);
++
+ kvm_cachepc_single_access_test(p);
+ kvm_cachepc_single_eviction_test(p);
+
@@ -378,7 +364,7 @@ index 2541a17ff1c4..a84a99f4b182 100644
r = kvm_arch_init(opaque);
if (r)
-@@ -4848,6 +5098,21 @@ int kvm_init(void *opaque, unsigned vcpu_size, unsigned vcpu_align,
+@@ -4848,6 +5093,21 @@ int kvm_init(void *opaque, unsigned vcpu_size, unsigned vcpu_align,
r = kvm_vfio_ops_init();
WARN_ON(r);
@@ -400,7 +386,7 @@ index 2541a17ff1c4..a84a99f4b182 100644
return 0;
out_unreg:
-@@ -4872,6 +5137,12 @@ EXPORT_SYMBOL_GPL(kvm_init);
+@@ -4872,6 +5132,12 @@ EXPORT_SYMBOL_GPL(kvm_init);
void kvm_exit(void)
{