Replace bloated cache_ds alloc method - cachepc - Prime+Probe cache-based side-channel attack on AMD SEV-SNP protected virtual machines

	cachepc Prime+Probe cache-based side-channel attack on AMD SEV-SNP protected virtual machines
	git clone https://git.sinitax.com/sinitax/cachepc
	Log \| Files \| Refs \| Submodules \| README \| sfeed.txt

commit 8f02a7bcb39234442039ae92dd5d3500c5b3d46d
parent 65daf1cb353f4ba5e2f08ccbce6b0d5220b0099a
Author: Louis Burda <quent.burda@gmail.com>
Date:   Wed, 25 Jan 2023 16:34:49 +0100

Replace bloated cache_ds alloc method

Diffstat:
M Makefile  | 2 +-
M cachepc/asm.S  | 4 ----
M cachepc/cachepc.c  | 430 +++++++++----------------------------------------------------------------------
M cachepc/cachepc.h  | 59 ++++++++++-------------------------------------------------
M cachepc/const.h  | 6 +-----
M cachepc/event.c  | 10 ++++++++--
M cachepc/kvm.c  | 44 ++++++++++++++++++--------------------------
M test/kvm-step.c  | 3 +++

8 files changed, 87 insertions(+), 471 deletions(-)
diff --git a/Makefile b/Makefile
@@ -9,7 +9,7 @@ BINS = test/eviction test/kvm-eviction
 BINS += test/kvm-eviction-with_guest test/kvm-eviction-without_guest
 BINS += test/kvm-step test/kvm-step_guest
 BINS += test/kvm-pagestep test/kvm-pagestep_guest
-#BINS += test/qemu-eviction_guest test/qemu-eviction
+# BINS += test/qemu-eviction_guest test/qemu-eviction
 # BINS += test/qemu-aes_guest test/qemu-aes
 BINS += util/debug util/reset
 
diff --git a/cachepc/asm.S b/cachepc/asm.S
@@ -35,9 +35,7 @@ cachepc_prime_probe_test_asm:
 
     mov cachepc_ds, %r9
     prime prime_probe_test %r9 %r10 %r8
-    mov cachepc_ds, %r9
     prime prime_probe_test1 %r9 %r10 %r8
-    mov cachepc_ds, %r9
     prime prime_probe_test2 %r9 %r10 %r8
     probe prime_probe_test %r8 %r9 %r10 %r11 %r12
 
@@ -101,9 +99,7 @@ cachepc_single_eviction_test_asm:
 
     mov cachepc_ds, %r9
     prime single_eviction_test %r9 %r10 %r8
-    mov cachepc_ds, %r9
     prime single_eviction_test2 %r9 %r10 %r8
-    mov cachepc_ds, %r9
     prime single_eviction_test3 %r9 %r10 %r8
     mov (%rdi), %rax
     probe single_eviction_test %r8 %r9 %r10 %r11 %r12
diff --git a/cachepc/cachepc.c b/cachepc/cachepc.c
@@ -9,24 +9,8 @@
 #include <linux/delay.h>
 #include <linux/ioctl.h>
 
-#define SET_MASK(SETS) (((((uintptr_t) SETS) * L1_LINESIZE) - 1) ^ (L1_LINESIZE - 1))
-
-#define REMOVE_PAGE_OFFSET(ptr) ((void *) (((uintptr_t) ptr) & PAGE_MASK))
-
 #define MIN(a, b) ((a) < (b) ? (a) : (b))
 
-static void *remove_cache_set(cache_ctx *ctx, void *ptr);
-
-static cacheline *prepare_cache_set_ds(cache_ctx *ctx, uint32_t *set, uint32_t sets_len);
-static cacheline *build_cache_ds(cache_ctx *ctx, cacheline **cacheline_ptr_arr);
-static void build_randomized_list_for_cache_set(cache_ctx *ctx, cacheline **cacheline_ptr_arr);
-static cacheline **allocate_cache_ds(cache_ctx *ctx);
-static uint16_t get_virt_cache_set(cache_ctx *ctx, void *ptr);
-
-static void random_perm(uint32_t *arr, uint32_t arr_len);
-static void gen_random_indices(uint32_t *arr, uint32_t arr_len);
-static bool is_in_arr(uint32_t elem, uint32_t *arr, uint32_t arr_len);
-
 bool
 cachepc_verify_topology(void)
 {
@@ -59,59 +43,6 @@ cachepc_verify_topology(void)
 		return true;
 	}
 
-	/* REF: PPR Family 19h Model 01h Vol 1/2 Rev 0.50 May 27.2021 P.94 */
-	val = native_cpuid_ecx(0x80000006);
-	size = ((val >> 16) & 0xFFFF) * 1024;
-	assoc = (val >> 12) & 0xF;
-	linesize = val & 0xFF;
-	switch (assoc) {
-	case 0x1:
-	case 0x2:
-	case 0x4:
-		break;
-	case 0x6:
-		assoc = 8;
-		break;
-	case 0x8:
-		assoc = 16;
-		break;
-	case 0xA:
-		assoc = 32;
-		break;
-	case 0xB:
-		assoc = 48;
-		break;
-	case 0xC:
-		assoc = 64;
-		break;
-	case 0xD:
-		assoc = 96;
-		break;
-	case 0xE:
-		assoc = 128;
-		break;
-	case 0xF:
-		assoc = size / linesize;
-		break;
-	default:
-		CPC_ERR("Read invalid L2 associativity: %i\n", assoc);
-		return true;
-	}
-	sets = size / (linesize * assoc);
-	if (size != L2_SIZE || assoc != L2_ASSOC
-			|| linesize != L2_LINESIZE || sets != L2_SETS) {
-		CPC_ERR("L2 topology is invalid!\n");
-		CPC_ERR("L2_SIZE (expected) %u vs. (real) %u\n",
-			L2_SIZE, size);
-		CPC_ERR("L2_ASSOC (expected) %u vs. (real) %u\n",
-			L2_ASSOC, assoc);
-		CPC_ERR("L2_LINESIZE (expected) %u vs. (real) %u\n",
-			L2_LINESIZE, linesize);
-		CPC_ERR("L2_SETS (expected) %u vs. (real) %u\n",
-			L2_SETS, sets);
-		return true;
-	}
-
 	return false;
 }
 
@@ -148,8 +79,8 @@ cachepc_init_pmc(uint8_t index, uint8_t event_no, uint8_t event_mask,
 	event |= ((kernel_user & 0b11) * 1ULL) << 16;
 	event |= ((host_guest & 0b11) * 1ULL) << 40;
 
-	// CPC_INFO("Initializing %i. PMC %02X:%02X (%016llx)\n",
-	// 	index, event_no, event_mask, event);
+	CPC_DBG("Initializing %i. PMC %02X:%02X (%016llx)\n",
+		index, event_no, event_mask, event);
 	cachepc_write_msr(0xc0010200 + index * 2, ~0ULL, event);
 }
 
@@ -162,64 +93,41 @@ cachepc_reset_pmc(uint8_t index)
 	cachepc_write_msr(0xc0010201 + index * 2, ~0ULL, 0);
 }
 
-cache_ctx *
-cachepc_get_ctx(void)
-{
-	cache_ctx *ctx;
-
-	ctx = kzalloc(sizeof(cache_ctx), GFP_KERNEL);
-	BUG_ON(ctx == NULL);
-
-	ctx->sets = L1_SETS;
-	ctx->associativity = L1_ASSOC;
-	ctx->nr_of_cachelines = ctx->sets * ctx->associativity;
-	ctx->set_size = L1_LINESIZE * ctx->associativity;
-	ctx->cache_size = ctx->sets * ctx->set_size;
-
-	return ctx;
-}
-
-void
-cachepc_release_ctx(cache_ctx *ctx)
+struct cacheline *
+cachepc_ds_alloc(struct cacheline **cl_arr_out)
 {
-	kfree(ctx);
-}
+	struct cacheline **cl_ptr_arr;
+	struct cacheline *cl_arr, *ds;
+	size_t i, idx;
 
-/* initialises the complete cache data structure for the given context */
-cacheline *
-cachepc_prepare_ds(cache_ctx *ctx)
-{
-	cacheline **cacheline_ptr_arr;
-	cacheline *cache_ds;
+	cl_arr = cachepc_aligned_alloc(PAGE_SIZE, L1_SIZE);
 
-       	cacheline_ptr_arr = allocate_cache_ds(ctx);
-	cache_ds = build_cache_ds(ctx, cacheline_ptr_arr);
-	kfree(cacheline_ptr_arr);
+	cl_ptr_arr = kzalloc(L1_LINES * sizeof(struct cacheline *), GFP_KERNEL);
+	BUG_ON(cl_ptr_arr == NULL);
 
-	return cache_ds;
-}
+	/* order cachelines by set then line number */
+	for (i = 0; i < L1_LINES; i++) {
+		idx = (i % L1_SETS) * L1_ASSOC + i / L1_SETS;
+		cl_ptr_arr[idx] = cl_arr + i;
+		cl_ptr_arr[idx]->cache_set = i % L1_SETS;
+		cl_ptr_arr[idx]->cache_line = i / L1_SETS;
+		cl_ptr_arr[idx]->first = (i / L1_SETS) == 0;
+		cl_ptr_arr[idx]->count = 0;
+	}
 
-void
-cachepc_release_ds(cache_ctx *ctx, cacheline *ds)
-{
-	kfree(remove_cache_set(ctx, ds));
-}
+	/* create doubly linked-list */
+	for (i = 0; i < L1_LINES; i++) {
+		cl_ptr_arr[i]->next = cl_ptr_arr[(i + L1_LINES + 1) % L1_LINES];
+		cl_ptr_arr[i]->prev = cl_ptr_arr[(i + L1_LINES - 1) % L1_LINES];
+	}
 
-cacheline *
-cachepc_prepare_victim(cache_ctx *ctx, uint32_t set)
-{
-	cacheline *victim_set, *victim_cl;
+	*cl_arr_out = cl_arr;
 
-	victim_set = prepare_cache_set_ds(ctx, &set, 1);
-	victim_cl = victim_set;
+	ds = cl_ptr_arr[0];
 
-	return victim_cl;
-}
+	kfree(cl_ptr_arr);
 
-void
-cachepc_release_victim(cache_ctx *ctx, cacheline *victim)
-{
-	kfree(remove_cache_set(ctx, victim));
+	return ds;
 }
 
 void *
@@ -236,23 +144,24 @@ cachepc_aligned_alloc(size_t alignment, size_t size)
 }
 
 void
-cachepc_save_msrmts(cacheline *head)
+cachepc_save_msrmts(struct cacheline *head)
 {
-	cacheline *curr_cl;
+	struct cacheline *cl;
 	size_t i;
 
-	curr_cl = head;
+	cl = head;
 	do {
-		if (CL_IS_FIRST(curr_cl->flags)) {
-			BUG_ON(curr_cl->cache_set >= L1_SETS);
-			WARN_ON(curr_cl->count > L1_ASSOC);
-			cachepc_msrmts[curr_cl->cache_set] = curr_cl->count;
+		if (cl->first) {
+			BUG_ON(cl->cache_set >= L1_SETS);
+			WARN_ON(cl->count > L1_ASSOC);
+			cachepc_msrmts[cl->cache_set] = cl->count;
 		} else {
-			BUG_ON(curr_cl->count != 0);
+			WARN_ON(cl->count != 0);
 		}
+		cl->count = 0;
 
-		curr_cl = curr_cl->prev;
-	} while (curr_cl != head);
+		cl = cl->prev;
+	} while (cl != head);
 
 	if (cachepc_baseline_measure) {
 		for (i = 0; i < L1_SETS; i++) {
@@ -271,266 +180,19 @@ cachepc_save_msrmts(cacheline *head)
 }
 
 void
-cachepc_print_msrmts(cacheline *head)
+cachepc_print_msrmts(struct cacheline *head)
 {
-	cacheline *curr_cl;
+	struct cacheline *cl;
 
-	curr_cl = head;
+	cl = head;
 	do {
-		if (CL_IS_FIRST(curr_cl->flags)) {
+		if (cl->first) {
 			CPC_INFO("Count for cache set %i: %llu\n",
-				curr_cl->cache_set, curr_cl->count);
-		}
-
-		curr_cl = curr_cl->prev;
-	} while (curr_cl != head);
-}
-
-cacheline *
-prepare_cache_set_ds(cache_ctx *ctx, uint32_t *sets, uint32_t sets_len)
-{
-	cacheline *cache_ds, **first_cl_in_sets, **last_cl_in_sets;
-	cacheline *to_del_cls, *curr_cl, *next_cl, *cache_set_ds;
-	uint32_t i, cache_groups_len, cache_groups_max_len;
-	uint32_t *cache_groups;
-       
-	cache_ds = cachepc_prepare_ds(ctx);
-
-	first_cl_in_sets = kzalloc(ctx->sets * sizeof(cacheline *), GFP_KERNEL);
-	BUG_ON(first_cl_in_sets == NULL);
-
-	last_cl_in_sets  = kzalloc(ctx->sets * sizeof(cacheline *), GFP_KERNEL);
-	BUG_ON(last_cl_in_sets == NULL);
-
-	/* find the cache groups that are used, so that we can delete the
-	 * other ones later (to avoid memory leaks) */
-	cache_groups_max_len = ctx->sets / CACHE_GROUP_SIZE;
-	cache_groups = kmalloc(cache_groups_max_len * sizeof(uint32_t), GFP_KERNEL);
-	BUG_ON(cache_groups == NULL);
-
-	cache_groups_len = 0;
-	for (i = 0; i < sets_len; ++i) {
-		if (!is_in_arr(sets[i] / CACHE_GROUP_SIZE, cache_groups, cache_groups_len)) {
-			cache_groups[cache_groups_len] = sets[i] / CACHE_GROUP_SIZE;
-			++cache_groups_len;
-		}
-	}
-
-	to_del_cls = NULL;
-	curr_cl = cache_ds;
-
-	/* extract the partial data structure for the cache sets and
-	 * ensure correct freeing */
-	do {
-		next_cl = curr_cl->next;
-
-		if (CL_IS_FIRST(curr_cl->flags)) {
-			first_cl_in_sets[curr_cl->cache_set] = curr_cl;
+				cl->cache_set, cl->count);
 		}
-		if (CL_IS_LAST(curr_cl->flags)) {
-			last_cl_in_sets[curr_cl->cache_set] = curr_cl;
-		}
-
-		curr_cl = next_cl;
-
-	} while (curr_cl != cache_ds);
-
-	/* fix partial cache set ds */
-	for (i = 0; i < sets_len; ++i) {
-		last_cl_in_sets[sets[i]]->next = first_cl_in_sets[sets[(i + 1) % sets_len]];
-		first_cl_in_sets[sets[(i + 1) % sets_len]]->prev = last_cl_in_sets[sets[i]];
-	}
-	cache_set_ds = first_cl_in_sets[sets[0]];
-
-	kfree(first_cl_in_sets);
-	kfree(last_cl_in_sets);
-	kfree(cache_groups);
-
-	return cache_set_ds;
-}
-
-void *
-remove_cache_set(cache_ctx *ctx, void *ptr)
-{
-	return (void *) (((uintptr_t) ptr) & ~SET_MASK(ctx->sets));
-}
 
-/*
- * Create a randomized doubly linked list with the following structure:
- * set A <--> set B <--> ... <--> set X <--> set A
- * where each set is one of the cache sets, in a random order.
- * The sets are a doubly linked list of cachelines themselves:
- * set A:
- *  line[A + x0 * #sets] <--> line[A + x1 * #sets] <--> ...
- * where x0, x1, ..., xD is a random permutation of 1, 2, ..., D
- * and D = Associativity = | cache set |
- */
-cacheline *
-build_cache_ds(cache_ctx *ctx, cacheline **cl_ptr_arr) {
-	cacheline **first_cl_in_sets, **last_cl_in_sets;
-	cacheline **cl_ptr_arr_sorted;
-	cacheline *curr_cl;
-	cacheline *cache_ds;
-	uint32_t *idx_per_set;
-	uint32_t idx_curr_set, set_offset;
-	uint32_t i, j, set, set_len;
-	uint32_t *idx_map;
-
- 	idx_per_set = kzalloc(ctx->sets * sizeof(uint32_t), GFP_KERNEL);
-	BUG_ON(idx_per_set == NULL);
-
-	cl_ptr_arr_sorted = kzalloc(ctx->nr_of_cachelines * sizeof(cacheline *), GFP_KERNEL);
-	BUG_ON(cl_ptr_arr_sorted == NULL);
-
-	set_len = ctx->associativity;
-	for (i = 0; i < ctx->nr_of_cachelines; ++i) {
-		set_offset = cl_ptr_arr[i]->cache_set * set_len;
-		idx_curr_set = idx_per_set[cl_ptr_arr[i]->cache_set];
-
-		cl_ptr_arr_sorted[set_offset + idx_curr_set] = cl_ptr_arr[i];
-		idx_per_set[cl_ptr_arr[i]->cache_set] += 1;
-	}
-
-	/* build doubly linked list for every set */
-	for (set = 0; set < ctx->sets; ++set) {
-		set_offset = set * set_len;
-		build_randomized_list_for_cache_set(ctx, cl_ptr_arr_sorted + set_offset);
-	}
-
-	/* relink the sets among each other */
-	idx_map = kzalloc(ctx->sets * sizeof(uint32_t), GFP_KERNEL);
-	BUG_ON(idx_map == NULL);
-
-	gen_random_indices(idx_map, ctx->sets);
-
-	first_cl_in_sets = kzalloc(ctx->sets * sizeof(cacheline *), GFP_KERNEL);
-	BUG_ON(first_cl_in_sets == NULL);
-
-	last_cl_in_sets  = kzalloc(ctx->sets * sizeof(cacheline *), GFP_KERNEL);
-	BUG_ON(last_cl_in_sets == NULL);
-
-	for (j = 0; j < ctx->nr_of_cachelines; ++j) {
-		curr_cl = cl_ptr_arr_sorted[j];
-		if (CL_IS_FIRST(curr_cl->flags))
-			first_cl_in_sets[curr_cl->cache_set] = curr_cl;
-		if (CL_IS_LAST(curr_cl->flags))
-			last_cl_in_sets[curr_cl->cache_set] = curr_cl;
-	}
-
-	/* connect up sets */
-	for (i = 0; i < ctx->sets; ++i) {
-		last_cl_in_sets[idx_map[i]]->next = first_cl_in_sets[idx_map[(i + 1) % ctx->sets]];
-		first_cl_in_sets[idx_map[(i + 1) % ctx->sets]]->prev = last_cl_in_sets[idx_map[i]];
-	}
-	cache_ds = first_cl_in_sets[idx_map[0]];
-
-	kfree(cl_ptr_arr_sorted);
-	kfree(first_cl_in_sets);
-	kfree(last_cl_in_sets);
-	kfree(idx_per_set);
-	kfree(idx_map);
-
-	return cache_ds;
-}
-
-/*
- * Helper function to build a randomised list of cacheline structs for a set
- */
-void
-build_randomized_list_for_cache_set(cache_ctx *ctx, cacheline **cacheline_ptr_arr)
-{
-	cacheline *curr_cl;
-	uint32_t len, *idx_map;
-	uint16_t i;
-
-	len = ctx->associativity;
-	idx_map = kzalloc(len * sizeof(uint32_t), GFP_KERNEL);
-	BUG_ON(idx_map == NULL);
-
-	gen_random_indices(idx_map, len);
-
-	for (i = 0; i < len; ++i) {
-		curr_cl = cacheline_ptr_arr[idx_map[i]];
-		curr_cl->next = cacheline_ptr_arr[idx_map[(i + 1) % len]];
-		curr_cl->prev = cacheline_ptr_arr[idx_map[(len - 1 + i) % len]];
-
-		if (idx_map[i] == 0) {
-			curr_cl->flags = CL_SET_FIRST(CL_DEFAULT_FLAGS);
-			curr_cl->prev->flags = CL_SET_LAST(CL_DEFAULT_FLAGS);
-		} else {
-			curr_cl->flags |= CL_DEFAULT_FLAGS;
-		}
-	}
-
-	kfree(idx_map);
-}
-
-/*
- * Allocate a data structure that fills the complete cache, i.e. consisting
- * of `associativity` many cache lines for each cache set.
- */
-cacheline **
-allocate_cache_ds(cache_ctx *ctx)
-{
-	cacheline **cl_ptr_arr, *cl_arr;
-	uint32_t i;
-
-	cl_ptr_arr = kzalloc(ctx->nr_of_cachelines * sizeof(cacheline *), GFP_KERNEL);
-	BUG_ON(cl_ptr_arr == NULL);
-
-	cl_arr = cachepc_aligned_alloc(PAGE_SIZE, ctx->cache_size);
-	for (i = 0; i < ctx->nr_of_cachelines; ++i) {
-		cl_ptr_arr[i] = cl_arr + i;
-		cl_ptr_arr[i]->cache_set = get_virt_cache_set(ctx, cl_ptr_arr[i]);
-		cl_ptr_arr[i]->cache_line = i / ctx->sets;
-		cl_ptr_arr[i]->count = 0;
-	}
-
-	return cl_ptr_arr;
-}
-
-uint16_t
-get_virt_cache_set(cache_ctx *ctx, void *ptr)
-{
-	return (uint16_t) ((((uintptr_t) ptr) & SET_MASK(ctx->sets)) / L1_LINESIZE);
-}
-
-void
-random_perm(uint32_t *arr, uint32_t arr_len)
-{
-	uint32_t i;
-
-	/* no special ordering needed when prefetcher is disabled */
-	for (i = 0; i < arr_len; i++)
-		arr[i] = i;
-
-	// /* prevent stream prefetching by alternating access direction */
-	// mid = arr_len / 2;
-	// for (i = 0; i < arr_len; i++)
-	// 	arr[i] = mid + (i % 2 ? -1 : 1) * ((i + 1) / 2);
-}
-
-void
-gen_random_indices(uint32_t *arr, uint32_t arr_len)
-{
-	uint32_t i;
-
-	for (i = 0; i < arr_len; ++i)
-		arr[i] = i;
-	random_perm(arr, arr_len);
-}
-
-bool
-is_in_arr(uint32_t elem, uint32_t *arr, uint32_t arr_len)
-{
-	uint32_t i;
-
-	for (i = 0; i < arr_len; ++i) {
-		if (arr[i] == elem)
-			return true;
-	}
-
-	return false;
+		cl = cl->prev;
+	} while (cl != head);
 }
 
 void
diff --git a/cachepc/cachepc.h b/cachepc/cachepc.h
@@ -5,24 +5,6 @@
 #include "../../include/asm/apic.h"
 #include "../../include/asm/irq_vectors.h"
 
-#define CACHE_GROUP_SIZE (PAGE_SIZE / L1_LINESIZE)
-
-#define CACHEPC_GET_BIT(b, i) (((b) >> (i)) & 1)
-#define CACHEPC_SET_BIT(b, i) ((b) | (1 << (i)))
-
-/* Operate cacheline flags
- * Used flags:
- *  32                    2              1       0
- * |  | ... | cache group initialized | last | first |
- */
-#define CL_DEFAULT_FLAGS 0
-#define CL_SET_FIRST(flags) CACHEPC_SET_BIT(flags, 0)
-#define CL_SET_LAST(flags) CACHEPC_SET_BIT(flags, 1)
-#define CL_SET_GROUP_INIT(flags) CACHEPC_SET_BIT(flags, 2)
-#define CL_IS_FIRST(flags) CACHEPC_GET_BIT(flags, 0)
-#define CL_IS_LAST(flags) CACHEPC_GET_BIT(flags, 1)
-#define CL_IS_GROUP_INIT(flags) CACHEPC_GET_BIT(flags, 2)
-
 #define PMC_KERNEL (1 << 1)
 #define PMC_USER   (1 << 0)
 
@@ -35,27 +17,16 @@
 #define CPC_WARN(...) do { pr_warn("CachePC: " __VA_ARGS__); } while (0)
 #define CPC_ERR(...) do { pr_err("CachePC: " __VA_ARGS__); } while (0)
 
-typedef struct cacheline cacheline;
-typedef struct cache_ctx cache_ctx;
-
-struct cache_ctx {
-	uint32_t sets;
-	uint32_t associativity;
-	uint32_t nr_of_cachelines;
-	uint32_t set_size;
-	uint32_t cache_size;
-};
-
 struct cacheline {
-	cacheline *next;
-	cacheline *prev;
+	struct cacheline *next;
+	struct cacheline *prev;
 	uint64_t count;
 
 	uint32_t cache_set;
 	uint32_t cache_line;
-	uint32_t flags;
+	bool first;
 
-	char padding[28];
+	char padding[31];
 };
 
 struct cpc_fault {
@@ -84,22 +55,15 @@ void cachepc_init_pmc(uint8_t index, uint8_t event_no, uint8_t event_mask,
 	uint8_t host_guest, uint8_t kernel_user);
 void cachepc_reset_pmc(uint8_t index);
 
-cache_ctx *cachepc_get_ctx(void);
-void cachepc_release_ctx(cache_ctx *ctx);
-
-cacheline *cachepc_prepare_ds(cache_ctx *ctx);
-void cachepc_release_ds(cache_ctx *ctx, cacheline *ds);
-
-cacheline *cachepc_prepare_victim(cache_ctx *ctx, uint32_t set);
-void cachepc_release_victim(cache_ctx *ctx, cacheline *ptr);
+struct cacheline *cachepc_ds_alloc(struct cacheline **ds_ul);
 
 void *cachepc_aligned_alloc(size_t alignment, size_t size);
 
-void cachepc_save_msrmts(cacheline *head);
-void cachepc_print_msrmts(cacheline *head);
+void cachepc_save_msrmts(struct cacheline *head);
+void cachepc_print_msrmts(struct cacheline *head);
 
-cacheline *cachepc_prime(cacheline *head);
-void cachepc_probe(cacheline *head);
+struct cacheline *cachepc_prime(struct cacheline *head);
+void cachepc_probe(struct cacheline *head);
 
 uint64_t cachepc_read_pmc(uint64_t event);
 
@@ -107,8 +71,6 @@ void cachepc_apic_oneshot_run(uint32_t interval);
 
 extern bool cachepc_debug;
 
-extern struct cacheline *cachepc_victim;
-
 extern uint8_t *cachepc_msrmts;
 extern uint8_t *cachepc_baseline;
 extern bool cachepc_baseline_measure;
@@ -138,8 +100,7 @@ extern struct cpc_track_exec cachepc_track_exec;
 
 extern struct list_head cachepc_faults;
 
-extern cache_ctx *cachepc_ctx;
-extern cacheline *cachepc_ds;
+extern struct cacheline *cachepc_ds;
 
 extern uint64_t cachepc_regs_tmp[16];
 extern uint64_t cachepc_regs_vm[16];
diff --git a/cachepc/const.h b/cachepc/const.h
@@ -4,11 +4,7 @@
 #define L1_LINESIZE 64
 #define L1_SETS 64
 #define L1_SIZE (L1_SETS * L1_ASSOC * L1_LINESIZE)
-
-#define L2_ASSOC 8
-#define L2_LINESIZE 64
-#define L2_SETS 1024
-#define L2_SIZE (L2_SETS * L2_ASSOC * L2_LINESIZE)
+#define L1_LINES (L1_SETS * L1_ASSOC)
 
 #define CPC_ISOLCPU 2
 
diff --git a/cachepc/event.c b/cachepc/event.c
@@ -49,6 +49,8 @@ cachepc_send_event(struct cpc_event event)
 	event.id = cachepc_last_event_sent;
 	cachepc_event_avail = true;
 	cachepc_event = event;
+
+	CPC_DBG("Sent Event: id %llu\n", event.id);
 	write_unlock(&cachepc_event_lock);
 
 	/* wait for ack with timeout */
@@ -144,7 +146,9 @@ cachepc_event_is_done(uint64_t id)
 	bool done;
 
 	read_lock(&cachepc_event_lock);
-	done = cachepc_last_event_acked >= id;
+	CPC_DBG("Event Send: Event not done %llu %llu\n",
+		cachepc_last_event_acked, id);
+	done = cachepc_last_event_acked == id;
 	read_unlock(&cachepc_event_lock);
 
 	return done;
@@ -157,6 +161,8 @@ cachepc_handle_poll_event_ioctl(struct cpc_event __user *event)
 
 	read_lock(&cachepc_event_lock);
 	if (!cachepc_event_avail) {
+		CPC_DBG("Event Poll: No event avail %llu %llu\n",
+			cachepc_last_event_sent, cachepc_last_event_acked);
 		read_unlock(&cachepc_event_lock);
 		return -EAGAIN;
 	}
@@ -165,10 +171,10 @@ cachepc_handle_poll_event_ioctl(struct cpc_event __user *event)
 	write_lock(&cachepc_event_lock);
 	if (cachepc_event_avail) {
 		err = copy_to_user(event, &cachepc_event, sizeof(struct cpc_event));
-		cachepc_event_avail = false;
 	} else {
 		err = -EAGAIN;
 	}
+	if (!err) cachepc_event_avail = false;
 	write_unlock(&cachepc_event_lock);
 
 	return err;
diff --git a/cachepc/kvm.c b/cachepc/kvm.c
@@ -21,9 +21,6 @@
 bool cachepc_debug = false;
 EXPORT_SYMBOL(cachepc_debug);
 
-struct cacheline *cachepc_victim;
-EXPORT_SYMBOL(cachepc_victim);
-
 uint8_t *cachepc_msrmts = NULL;
 EXPORT_SYMBOL(cachepc_msrmts);
 
@@ -74,9 +71,8 @@ EXPORT_SYMBOL(cachepc_faults);
 struct cpc_track_exec cachepc_track_exec;
 EXPORT_SYMBOL(cachepc_track_exec);
 
-cache_ctx *cachepc_ctx = NULL;
-cacheline *cachepc_ds = NULL;
-EXPORT_SYMBOL(cachepc_ctx);
+struct cacheline *cachepc_ds_ul = NULL;
+struct cacheline *cachepc_ds = NULL;
 EXPORT_SYMBOL(cachepc_ds);
 
 uint64_t cachepc_regs_tmp[16];
@@ -166,7 +162,7 @@ void
 cachepc_stream_hwpf_test(void)
 {
 	const uint32_t max = 10;
-	cacheline *lines;
+	struct cacheline *lines;
 	uint32_t count;
 	int n;
 
@@ -194,9 +190,9 @@ cachepc_stream_hwpf_test(void)
 void
 cachepc_single_eviction_test(void *p)
 {
-	cacheline *victim;
-	uint32_t target;
-	uint32_t *arg;
+	struct cacheline *victim_ul;
+	struct cacheline *victim;
+	uint32_t target, *arg;
 	int n, i, count;
 
 	arg = p;
@@ -208,7 +204,8 @@ cachepc_single_eviction_test(void *p)
 	if (arg && *arg >= L1_SETS) return;
 	target = arg ? *arg : 48;
 
-	victim = cachepc_prepare_victim(cachepc_ctx, target);
+	victim_ul = cachepc_aligned_alloc(PAGE_SIZE, L1_SIZE);
+	victim = &victim_ul[target];
 
 	for (n = 0; n < TEST_REPEAT_MAX; n++) {
 		memset(cachepc_msrmts, 0, L1_SETS);
@@ -232,7 +229,7 @@ cachepc_single_eviction_test(void *p)
 		if (arg) *arg = count;
 	}
 
-	cachepc_release_victim(cachepc_ctx, victim);
+	kfree(victim_ul);
 }
 
 void
@@ -291,6 +288,8 @@ cachepc_kvm_reset_ioctl(void __user *arg_user)
 	cachepc_kvm_reset_tracking_ioctl(NULL);
 	cachepc_kvm_reset_baseline_ioctl(NULL);
 
+	cachepc_pause_vm = false;
+
 	cachepc_singlestep = false;
 	cachepc_singlestep_reset = false;
 
@@ -699,10 +698,7 @@ cachepc_kvm_setup_test(void *p)
 	if (cachepc_verify_topology())
 		goto exit;
 
-	cachepc_ctx = cachepc_get_ctx();
-	cachepc_ds = cachepc_prepare_ds(cachepc_ctx);
-
-	cachepc_victim = cachepc_prepare_victim(cachepc_ctx, 15);
+	cachepc_ds = cachepc_ds_alloc(&cachepc_ds_ul);
 
 	cachepc_kvm_system_setup();
 
@@ -721,11 +717,10 @@ cachepc_kvm_init(void)
 {
 	int ret;
 
-	cachepc_ctx = NULL;
-	cachepc_ds = NULL;
-
 	cachepc_debug = false;
-	cachepc_victim = NULL;
+
+	cachepc_ds = NULL;
+	cachepc_ds_ul = NULL;
 
 	cachepc_retinst = 0;
 	cachepc_long_step = false;
@@ -759,12 +754,9 @@ void
 cachepc_kvm_exit(void)
 {
 	kfree(cachepc_msrmts);
-	kfree(cachepc_baseline);
-	kfree(cachepc_victim);
 
-	if (cachepc_ds)
-		cachepc_release_ds(cachepc_ctx, cachepc_ds);
+	kfree(cachepc_baseline);
 
-	if (cachepc_ctx)
-		cachepc_release_ctx(cachepc_ctx);
+	if (cachepc_ds_ul)
+		kfree(cachepc_ds_ul);
 }
diff --git a/test/kvm-step.c b/test/kvm-step.c
@@ -139,13 +139,16 @@ main(int argc, const char **argv)
 			eventcnt += monitor(&kvm, true);
 		}
 
+		printf("Req pause\n");
 		ret = ioctl(kvm_dev, KVM_CPC_VM_REQ_PAUSE);
 		if (ret) err(1, "ioctl KVM_CPC_VM_REQ_PAUSE");
 
 		while (1) {
+			printf("Monitor Polling\n");
 			ret = ioctl(kvm_dev, KVM_CPC_POLL_EVENT, &event);
 			if (ret && errno == EAGAIN) continue;
 			if (ret) err(1, "ioctl KVM_CPC_POLL_EVENT");
+			printf("Monitor Event\n");
 
 			if (event.type == CPC_EVENT_PAUSE) break;

M	Makefile	\|	2	+-
M	cachepc/asm.S	\|	4	----
M	cachepc/cachepc.c	\|	430	+++++++++----------------------------------------------------------------------
M	cachepc/cachepc.h	\|	59	++++++++++-------------------------------------------------
M	cachepc/const.h	\|	6	+-----
M	cachepc/event.c	\|	10	++++++++--
M	cachepc/kvm.c	\|	44	++++++++++++++++++--------------------------
M	test/kvm-step.c	\|	3	+++