Validate cache topology at module init - cachepc - Prime+Probe cache-based side-channel attack on AMD SEV-SNP protected virtual machines

	cachepc Prime+Probe cache-based side-channel attack on AMD SEV-SNP protected virtual machines
	git clone https://git.sinitax.com/sinitax/cachepc
	Log \| Files \| Refs \| Submodules \| README \| sfeed.txt

commit 4804320a2e3c89664afc93214d3ad81b3a36c670
parent 1c198ee120f69d435289dbb4510058b536d091d9
Author: Louis Burda <quent.burda@gmail.com>
Date:   Fri, 21 Oct 2022 18:48:02 +0200

Validate cache topology at module init

Diffstat:
M cachepc/cachepc.c  | 109 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++---------
M cachepc/cachepc.h  | 24 +++++++++++-------------
M cachepc/kvm.c  | 19 +++++++++++++++----
A compile_commands.json  | 1 +

4 files changed, 124 insertions(+), 29 deletions(-)
diff --git a/cachepc/cachepc.c b/cachepc/cachepc.c
@@ -24,6 +24,92 @@ static void random_perm(uint32_t *arr, uint32_t arr_len);
 static void gen_random_indices(uint32_t *arr, uint32_t arr_len);
 static bool is_in_arr(uint32_t elem, uint32_t *arr, uint32_t arr_len);
 
+bool
+cachepc_verify_topology(void)
+{
+	uint32_t val;
+	uint32_t assoc;
+	uint32_t linesize;
+	uint32_t size;
+	uint32_t sets;
+
+	/* REF: https://developer.amd.com/resources/developer-guides-manuals
+	 * (PPR 17H 31H, P.81) */
+
+	asm volatile ("cpuid" : "=c"(val) : "a"(0x80000005));
+	size = ((val >> 24) & 0xFF) * 1024;
+	assoc = (val >> 16) & 0xFF;
+	linesize = val & 0xFF;
+	sets = size / (linesize * assoc);
+	if (size != L1_SIZE || assoc != L1_ASSOC
+			|| linesize != L1_LINESIZE || sets != L1_SETS) {
+		pr_warn("CachePC: L1 topology is invalid!\n");
+		pr_warn("CachePC: L1_SIZE (expected) %u vs. (real) %u\n",
+			L1_SIZE, size);
+		pr_warn("CachePC: L1_ASSOC (expected) %u vs. (real) %u\n",
+			L1_ASSOC, assoc);
+		pr_warn("CachePC: L1_LINESIZE (expected) %u vs. (real) %u\n",
+			L1_LINESIZE, linesize);
+		pr_warn("CachePC: L1_SETS (expected) %u vs. (real) %u\n",
+			L1_SETS, sets);
+		return true;
+	}
+
+	asm volatile ("cpuid" : "=c"(val) : "a"(0x80000006));
+	size = ((val >> 16) & 0xFFFF) * 1024;
+	assoc = (val >> 12) & 0xF;
+	linesize = val & 0xFF;
+	switch (assoc) {
+	case 0x1:
+	case 0x2:
+	case 0x4:
+		break;
+	case 0x6:
+		assoc = 8;
+		break;
+	case 0x8:
+		assoc = 16;
+		break;
+	case 0xA:
+		assoc = 32;
+		break;
+	case 0xB:
+		assoc = 48;
+		break;
+	case 0xC:
+		assoc = 64;
+		break;
+	case 0xD:
+		assoc = 96;
+		break;
+	case 0xE:
+		assoc = 128;
+		break;
+	case 0xF:
+		assoc = size / linesize;
+		break;
+	default:
+		pr_warn("CachePC: Read invalid L2 associativity: %i\n", assoc);
+		return true;
+	}
+	sets = size / (linesize * assoc);
+	if (size != L2_SIZE || assoc != L2_ASSOC
+			|| linesize != L2_LINESIZE || sets != L2_SETS) {
+		pr_warn("CachePC: L2 topology is invalid!\n");
+		pr_warn("CachePC: L2_SIZE (expected) %u vs. (real) %u\n",
+			L2_SIZE, size);
+		pr_warn("CachePC: L2_ASSOC (expected) %u vs. (real) %u\n",
+			L2_ASSOC, assoc);
+		pr_warn("CachePC: L2_LINESIZE (expected) %u vs. (real) %u\n",
+			L2_LINESIZE, linesize);
+		pr_warn("CachePC: L2_SETS (expected) %u vs. (real) %u\n",
+			L2_SETS, sets);
+		return true;
+	}
+
+	return false;
+}
+
 void
 cachepc_init_pmc(uint8_t index, uint8_t event_no, uint8_t event_mask,
 	uint8_t host_guest, uint8_t kernel_user)
@@ -31,7 +117,8 @@ cachepc_init_pmc(uint8_t index, uint8_t event_no, uint8_t event_mask,
 	uint64_t event;
 	uint64_t reg_addr;
 
-	/* REF: https://developer.amd.com/resources/developer-guides-manuals (PPR 17H 31H, P.166)
+	/* REF: https://developer.amd.com/resources/developer-guides-manuals
+	 * (PPR 17H 31H, P.166)
 	 *
 	 * performance event selection via 0xC001_020X with X = (0..A)[::2]
 	 * performance event reading viea 0XC001_020X with X = (1..B)[::2]
@@ -62,13 +149,11 @@ cachepc_get_ctx(int cache_level)
 	if (cache_level == L1_CACHE) {
 		ctx->addressing = L1_ADDRESSING;
 		ctx->sets = L1_SETS;
-		ctx->associativity = L1_ASSOCIATIVITY;
-		ctx->access_time = L1_ACCESS_TIME;
+		ctx->associativity = L1_ASSOC;
 	} else if (cache_level == L2_CACHE) {
 		ctx->addressing = L2_ADDRESSING;
 		ctx->sets = L2_SETS;
-		ctx->associativity = L2_ASSOCIATIVITY;
-		ctx->access_time = L2_ACCESS_TIME;
+		ctx->associativity = L2_ASSOC;
 	} else {
 		return NULL;
 	}
@@ -97,14 +182,10 @@ cachepc_prepare_ds(cache_ctx *ctx)
 	cacheline **cacheline_ptr_arr;
 	cacheline *cache_ds;
 
-	//printk(KERN_WARNING "CachePC: Preparing ds..\n");
-	
        	cacheline_ptr_arr = allocate_cache_ds(ctx);
 	cache_ds = build_cache_ds(ctx, cacheline_ptr_arr);
 	kfree(cacheline_ptr_arr);
 
-	// printk(KERN_WARNING "CachePC: Preparing ds done\n");
-
 	return cache_ds;
 }
 
@@ -317,7 +398,8 @@ remove_cache_group_set(void *ptr)
  * where x0, x1, ..., xD is a random permutation of 1, 2, ..., D
  * and D = Associativity = | cache set |
  */
-cacheline *build_cache_ds(cache_ctx *ctx, cacheline **cl_ptr_arr) {
+cacheline *
+build_cache_ds(cache_ctx *ctx, cacheline **cl_ptr_arr) {
 	cacheline **first_cl_in_sets, **last_cl_in_sets;
 	cacheline **cl_ptr_arr_sorted;
 	cacheline *curr_cl;
@@ -387,7 +469,8 @@ cacheline *build_cache_ds(cache_ctx *ctx, cacheline **cl_ptr_arr) {
 /*
  * Helper function to build a randomised list of cacheline structs for a set
  */
-void build_randomized_list_for_cache_set(cache_ctx *ctx, cacheline **cacheline_ptr_arr)
+void
+build_randomized_list_for_cache_set(cache_ctx *ctx, cacheline **cacheline_ptr_arr)
 {
 	cacheline *curr_cl;
 	uint32_t len, *idx_map;
@@ -476,7 +559,9 @@ gen_random_indices(uint32_t *arr, uint32_t arr_len)
 }
 
 
-bool is_in_arr(uint32_t elem, uint32_t *arr, uint32_t arr_len) {
+bool
+is_in_arr(uint32_t elem, uint32_t *arr, uint32_t arr_len)
+{
 	uint32_t i;
 
 	for (i = 0; i < arr_len; ++i) {
diff --git a/cachepc/cachepc.h b/cachepc/cachepc.h
@@ -3,9 +3,6 @@
 #include "asm.h"
 #include "uapi.h"
 
-#define CACHELINE_SIZE 64
-#define CACHE_GROUP_SIZE (PAGE_SIZE / CACHELINE_SIZE)
-
 #define L1_CACHE 0
 #define L2_CACHE 1
 
@@ -13,19 +10,19 @@
 #define PHYSICAL_ADDRESSING 1
 
 #define L1_ADDRESSING VIRTUAL_ADDRESSING
+#define L1_ASSOC 8
+#define L1_LINESIZE 64
 #define L1_SETS 64
-#define L1_ASSOCIATIVITY 8
-#define L1_ACCESS_TIME 4
+#define L1_SIZE (L1_SETS * L1_ASSOC * L1_LINESIZE)
 
 #define L2_ADDRESSING PHYSICAL_ADDRESSING
-#define L2_SETS 512
-#define L2_ASSOCIATIVITY 8
-#define L2_ACCESS_TIME 12
+#define L2_ASSOC 8
+#define L2_LINESIZE 64
+#define L2_SETS 1024
+#define L2_SIZE (L2_SETS * L2_ASSOC * L2_LINESIZE)
 
-#define L3_ADDRESSING PHYSICAL_ADDRESSING
-#define L3_SETS 4096
-#define L3_ASSOCIATIVITY 16
-#define L3_ACCESS_TIME 30
+#define CACHELINE_SIZE L1_LINESIZE
+#define CACHE_GROUP_SIZE (PAGE_SIZE / CACHELINE_SIZE)
 
 #define CACHEPC_GET_BIT(b, i) (((b) >> (i)) & 1)
 #define CACHEPC_SET_BIT(b, i) ((b) | (1 << (i)))
@@ -61,7 +58,6 @@ struct cache_ctx {
 
     uint32_t sets;
     uint32_t associativity;
-    uint32_t access_time;
     uint32_t nr_of_cachelines;
     uint32_t set_size;
     uint32_t cache_size;
@@ -85,6 +81,8 @@ struct cacheline {
 static_assert(sizeof(struct cacheline) == CACHELINE_SIZE, "Bad cache line struct size");
 static_assert(CL_NEXT_OFFSET == 0 && CL_PREV_OFFSET == 8);
 
+bool cachepc_verify_topology(void);
+
 void cachepc_init_pmc(uint8_t index, uint8_t event_no, uint8_t event_mask,
 	uint8_t host_guest, uint8_t kernel_user);
 
diff --git a/cachepc/kvm.c b/cachepc/kvm.c
@@ -391,8 +391,9 @@ cachepc_kvm_stream_hwpf_test(void *p)
 		asm volatile ("mov (%0), %%rbx" : : "r"(lines + i) : "rbx");
 	count += cachepc_read_pmc(CPC_L1MISS_PMC);
 
+	pass = (count == max) || (count == max + 1); /* +1 for pot. counter miss */
 	printk(KERN_WARNING "CachePC: HWPF test done (%u vs. %u => %s)\n",
-		count, max, (count == max) ? "passed" : "failed");
+		count, max, pass ? "passed" : "failed");
 
 	if (arg) *arg = (count == max);
 
@@ -968,7 +969,10 @@ cachepc_kvm_setup_test(void *p)
 
 	cpu = get_cpu();
 
-	printk(KERN_WARNING "CachePC: Running on core %i\n", cpu);
+	pr_warn("CachePC: Running on core %i\n", cpu);
+
+	if (cachepc_verify_topology())
+		goto exit;
 
 	cachepc_ctx = cachepc_get_ctx(L1_CACHE);
 	cachepc_ds = cachepc_prepare_ds(cachepc_ctx);
@@ -980,6 +984,7 @@ cachepc_kvm_setup_test(void *p)
 	cachepc_kvm_single_eviction_test(NULL);
 	cachepc_kvm_stream_hwpf_test(NULL);
 
+exit:
 	put_cpu();
 }
 
@@ -988,6 +993,9 @@ cachepc_kvm_init(void)
 {
 	int ret;
 
+	cachepc_ctx = NULL;
+	cachepc_ds = NULL;
+
 	cachepc_msrmts_count = L1_SETS;
 	cachepc_msrmts = kzalloc(cachepc_msrmts_count * sizeof(uint16_t), GFP_KERNEL);
 	BUG_ON(cachepc_msrmts == NULL);
@@ -1001,6 +1009,9 @@ cachepc_kvm_exit(void)
 {
 	kfree(cachepc_msrmts);
 
-	cachepc_release_ds(cachepc_ctx, cachepc_ds);
-	cachepc_release_ctx(cachepc_ctx);
+	if (cachepc_ds)
+		cachepc_release_ds(cachepc_ctx, cachepc_ds);
+
+	if (cachepc_ctx)
+		cachepc_release_ctx(cachepc_ctx);
 }
diff --git a/compile_commands.json b/compile_commands.json
@@ -0,0 +1 @@
+[]

M	cachepc/cachepc.c	\|	109	++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++---------
M	cachepc/cachepc.h	\|	24	+++++++++++-------------
M	cachepc/kvm.c	\|	19	+++++++++++++++----
A	compile_commands.json	\|	1	+