From 58d8565f015f9e06e1e51a0fe4654b966b2c27c0 Mon Sep 17 00:00:00 2001
From: Louis Burda <quent.burda@gmail.com>
Date: Wed, 5 Oct 2022 15:05:19 +0200
Subject: Refactor sevstep kernel patch into repository

---
 Makefile              |   31 +-
 cachepc/asm.h         |   66 ++
 cachepc/cache_types.h |   66 ++
 cachepc/cachepc.c     |  445 ++++++++++++
 cachepc/cachepc.h     |  188 +++++
 cachepc/device_conf.h |   29 +
 cachepc/kvm.c         |  392 +++++++++++
 cachepc/kvm.h         |    6 +
 cachepc/uapi.h        |    8 +
 cachepc/util.c        |   38 +
 cachepc/util.h        |    8 +
 kmod/asm.h            |   66 --
 kmod/cache_types.h    |   66 --
 kmod/cachepc.c        |  445 ------------
 kmod/cachepc.h        |  188 -----
 kmod/cachepc_user.h   |    8 -
 kmod/device_conf.h    |   29 -
 kmod/kvm.c            |  392 -----------
 kmod/kvm.h            |    6 -
 kmod/util.c           |   38 -
 kmod/util.h           |    8 -
 patch.diff            | 1864 +++----------------------------------------------
 sevstep/kvm.c         |  205 ++++++
 sevstep/kvm.h         |    4 +
 sevstep/mmu.c         |  132 ++++
 sevstep/sevstep.c     |  129 ++++
 sevstep/sevstep.h     |   67 ++
 sevstep/uapi.h        |   86 +++
 sevstep/uspt.c        |  503 +++++++++++++
 sevstep/uspt.h        |   49 ++
 test/access.c         |    2 +-
 test/eviction.c       |    2 +-
 test/kvm.c            |    3 +-
 test/sev-es.c         |    3 +-
 test/sev.c            |    3 +-
 test/sevstep.c        |   32 +
 36 files changed, 2574 insertions(+), 3033 deletions(-)
 create mode 100644 cachepc/asm.h
 create mode 100644 cachepc/cache_types.h
 create mode 100644 cachepc/cachepc.c
 create mode 100644 cachepc/cachepc.h
 create mode 100644 cachepc/device_conf.h
 create mode 100644 cachepc/kvm.c
 create mode 100644 cachepc/kvm.h
 create mode 100644 cachepc/uapi.h
 create mode 100644 cachepc/util.c
 create mode 100644 cachepc/util.h
 delete mode 100644 kmod/asm.h
 delete mode 100644 kmod/cache_types.h
 delete mode 100644 kmod/cachepc.c
 delete mode 100644 kmod/cachepc.h
 delete mode 100644 kmod/cachepc_user.h
 delete mode 100644 kmod/device_conf.h
 delete mode 100644 kmod/kvm.c
 delete mode 100644 kmod/kvm.h
 delete mode 100644 kmod/util.c
 delete mode 100644 kmod/util.h
 create mode 100644 sevstep/kvm.c
 create mode 100644 sevstep/kvm.h
 create mode 100644 sevstep/mmu.c
 create mode 100644 sevstep/sevstep.c
 create mode 100644 sevstep/sevstep.h
 create mode 100644 sevstep/uapi.h
 create mode 100644 sevstep/uspt.c
 create mode 100644 sevstep/uspt.h
 mode change 100755 => 100644 test/access.c
 mode change 100755 => 100644 test/eviction.c
 mode change 100755 => 100644 test/kvm.c
 mode change 100755 => 100644 test/sev-es.c
 mode change 100755 => 100644 test/sev.c
 create mode 100644 test/sevstep.c

diff --git a/Makefile b/Makefile
index 5f394db..3388608 100755
--- a/Makefile
+++ b/Makefile
@@ -1,28 +1,31 @@
-KERNEL_SOURCE ?= /usr/src/linux
+LINUX ?= /usr/src/linux
 PWD := $(shell pwd)
 
-all: build test/eviction test/access test/kvm test/sev test/sev-es
+all: build test/eviction test/access test/kvm test/sev test/sev-es test/sevstep
 
 clean:
-	$(MAKE) -C $(KERNEL_SOURCE) SUBDIRS=arch/x86/kvm clean
+	$(MAKE) -C $(LINUX) SUBDIRS=arch/x86/kvm clean
 
-$(KERNEL_SOURCE)/arch/x86/kvm/svm/cachepc:
-	ln -sf $(PWD)/kmod $@
+$(LINUX)/arch/x86/kvm/svm/cachepc:
+	ln -sf $(PWD)/cachepc $@
 
-build:
-	$(MAKE) -C $(KERNEL_SOURCE) -j6 M=arch/x86/kvm
+$(LINUX)/arch/x86/kvm/sevstep:
+	ln -sf $(PWD)/sevstep $@
+
+build: $(LINUX)/arch/x86/kvm/svm/cachepc $(LINUX)/arch/x86/kvm/sevstep
+	$(MAKE) -C $(LINUX) -j6 M=arch/x86/kvm
 
 load:
 	sudo rmmod kvm_amd || true
 	sudo rmmod kvm || true
-	sudo insmod $(KERNEL_SOURCE)/arch/x86/kvm/kvm.ko
-	sudo insmod $(KERNEL_SOURCE)/arch/x86/kvm/kvm-amd.ko
+	sudo insmod $(LINUX)/arch/x86/kvm/kvm.ko
+	sudo insmod $(LINUX)/arch/x86/kvm/kvm-amd.ko
+
+test/%: test/%.c cachepc/cachepc_user.h
+	clang -o $@ $< -fsanitize=address -I . -Wunused-variable
 
-test/%: test/%.c kmod/cachepc_user.h
-#	$(CC) -o $@ $< -I kmod
-	clang -fsanitize=address -o $@ $< -I kmod -Wunused-variable
 
-update: 
-	git -C $(KERNEL_SOURCE) diff 0aaa1e599bee256b3b15643bbb95e80ce7aa9be5 -G. > patch.diff
+update:
+	git -C $(LINUX) diff 0aaa1e599bee256b3b15643bbb95e80ce7aa9be5 -G. > patch.diff
 
 .PHONY: all clean build load update
diff --git a/cachepc/asm.h b/cachepc/asm.h
new file mode 100644
index 0000000..9e9385a
--- /dev/null
+++ b/cachepc/asm.h
@@ -0,0 +1,66 @@
+#pragma once
+
+#include <linux/kernel.h>
+
+#define CPUID_AFFECTED_REGS "rax", "rbx", "rcx", "rdx"
+
+__attribute__((always_inline))
+static inline void cachepc_cpuid(void);
+
+__attribute__((always_inline))
+static inline void cachepc_lfence(void);
+
+__attribute__((always_inline))
+static inline void cachepc_sfence(void);
+
+__attribute__((always_inline))
+static inline void cachepc_mfence(void);
+
+__attribute__((always_inline))
+static inline void cachepc_readq(void *p);
+
+void
+cachepc_cpuid(void)
+{
+	asm volatile(
+		"mov $0x80000005, %%eax\n\t"
+		"cpuid\n\t"
+		::: CPUID_AFFECTED_REGS
+	);
+}
+
+void
+cachepc_lfence(void)
+{
+	asm volatile(
+		"lfence\n\t"
+		::: "memory"
+	);
+}
+
+void
+cachepc_sfence(void)
+{
+	asm volatile(
+		"sfence\n\t"
+		::: "memory"
+	);
+}
+
+void
+cachepc_mfence(void)
+{
+	asm volatile(
+		"mfence\n\t"
+		::: "memory"
+	);
+}
+
+void
+cachepc_readq(void *p)
+{
+	asm volatile (
+		"movq (%0), %%r10\n\t"
+		: : "r" (p) : "r10"
+	);
+}
diff --git a/cachepc/cache_types.h b/cachepc/cache_types.h
new file mode 100644
index 0000000..b337d55
--- /dev/null
+++ b/cachepc/cache_types.h
@@ -0,0 +1,66 @@
+#pragma once
+
+#include "device_conf.h"
+
+#define SET_MASK(SETS) (((((uintptr_t) SETS) * CACHELINE_SIZE) - 1) ^ (CACHELINE_SIZE - 1))
+
+#define REMOVE_PAGE_OFFSET(ptr) ((void *) (((uintptr_t) ptr) & PAGE_MASK))
+
+#define GET_BIT(b, i) (((b) >> (i)) & 1)
+#define SET_BIT(b, i) ((b) | (1 << (i)))
+
+/* Operate cacheline flags
+ * Used flags:
+ *  32                    2              1       0
+ * |  | ... | cache group initialized | last | first |
+ */
+#define DEFAULT_FLAGS 0
+#define SET_FIRST(flags) SET_BIT(flags, 0)
+#define SET_LAST(flags) SET_BIT(flags, 1)
+#define SET_CACHE_GROUP_INIT(flags) SET_BIT(flags, 2)
+#define IS_FIRST(flags) GET_BIT(flags, 0)
+#define IS_LAST(flags) GET_BIT(flags, 1)
+#define IS_CACHE_GROUP_INIT(flags) GET_BIT(flags, 2)
+
+#define CL_NEXT_OFFSET offsetof(struct cacheline, next)
+#define CL_PREV_OFFSET offsetof(struct cacheline, prev)
+
+typedef enum cache_level cache_level;
+typedef enum addressing_type addressing_type;
+typedef struct cacheline cacheline;
+typedef struct cache_ctx cache_ctx;
+
+enum cache_level {L1, L2};
+enum addressing_type {VIRTUAL, PHYSICAL};
+
+struct cache_ctx {
+    cache_level cache_level;
+    addressing_type addressing;
+
+    uint32_t sets;
+    uint32_t associativity;
+    uint32_t access_time;
+    uint32_t nr_of_cachelines;
+    uint32_t set_size;
+    uint32_t cache_size;
+};
+
+struct cacheline {
+    // Doubly linked list inside same set
+    // Attention: CL_NEXT_OFFSET and CL_PREV_OFFSET
+    // must be kept up to date
+    cacheline *next;
+    cacheline *prev;
+
+    uint32_t cache_set;
+    uint32_t cache_line;
+    uint32_t flags;
+
+    // Unused padding to fill cache line
+    uint64_t count;
+
+    char padding[24];
+};
+
+static_assert(sizeof(struct cacheline) == CACHELINE_SIZE, "Bad cache line struct size");
+static_assert(CL_NEXT_OFFSET == 0 && CL_PREV_OFFSET == 8);
diff --git a/cachepc/cachepc.c b/cachepc/cachepc.c
new file mode 100644
index 0000000..09ed705
--- /dev/null
+++ b/cachepc/cachepc.c
@@ -0,0 +1,445 @@
+#include "cachepc.h"
+
+#include <linux/kernel.h>
+#include <linux/types.h> 
+#include <linux/slab.h>
+#include <linux/delay.h>
+#include <linux/ioctl.h>
+
+static void cl_insert(cacheline *last_cl, cacheline *new_cl);
+static void *remove_cache_set(cache_ctx *ctx, void *ptr);
+static void *remove_cache_group_set(void *ptr);
+
+static cacheline *prepare_cache_set_ds(cache_ctx *ctx, uint32_t *set, uint32_t sets_len);
+static cacheline *build_cache_ds(cache_ctx *ctx, cacheline **cacheline_ptr_arr);
+static void build_randomized_list_for_cache_set(cache_ctx *ctx, cacheline **cacheline_ptr_arr);
+static cacheline **allocate_cache_ds(cache_ctx *ctx);
+static uint16_t get_virt_cache_set(cache_ctx *ctx, void *ptr);
+
+void __attribute__((optimize(1))) // prevent instruction reordering
+cachepc_prime_vcall(uintptr_t ret, cacheline *cl)
+{
+	cachepc_prime(cl);
+	asm volatile ("mov %0, %%rax; jmp *%%rax" : : "r"(ret) : "rax");
+}
+
+void __attribute__((optimize(1))) // prevent instruction reordering
+cachepc_probe_vcall(uintptr_t ret, cacheline *cl)
+{
+	cachepc_probe(cl);
+	asm volatile ("mov %0, %%rax; jmp *%%rax" : : "r"(ret) : "rax");
+}
+
+void
+cachepc_init_pmc(uint8_t index, uint8_t event_no, uint8_t event_mask)
+{
+	uint64_t event;
+	uint64_t reg_addr;
+
+	/* REF: https://developer.amd.com/resources/developer-guides-manuals (PPR 17H 31H, P.166)
+	 *
+	 * performance event selection via 0xC001_020X with X = (0..A)[::2]
+	 * performance event reading viea 0XC001_020X with X = (1..B)[::2]
+	 */
+
+	WARN_ON(index >= 6);
+	if (index >= 6) return;
+
+	reg_addr = 0xc0010200 + index * 2;
+	event = event_no | (event_mask << 8);
+	event |= (1ULL << 17); /* OS (kernel) events only */
+	event |= (1ULL << 22); /* enable performance counter */
+	event |= (1ULL << 40); /* Host events only */
+	printk(KERN_WARNING "CachePC: Initialized %i. PMC %02X:%02X\n",
+		index, event_no, event_mask);
+	asm volatile ("wrmsr" : : "c"(reg_addr), "a"(event), "d"(0x00));
+}
+
+cache_ctx *
+cachepc_get_ctx(cache_level cache_level)
+{
+	cache_ctx *ctx;
+       
+	ctx = kzalloc(sizeof(cache_ctx), GFP_KERNEL);
+	BUG_ON(ctx == NULL);
+
+	BUG_ON(cache_level != L1);
+	if (cache_level == L1) {
+		ctx->addressing = L1_ADDRESSING;
+		ctx->sets = L1_SETS;
+		ctx->associativity = L1_ASSOCIATIVITY;
+		ctx->access_time = L1_ACCESS_TIME;
+	} else if (cache_level == L2) {
+		ctx->addressing = L2_ADDRESSING;
+		ctx->sets = L2_SETS;
+		ctx->associativity = L2_ASSOCIATIVITY;
+		ctx->access_time = L2_ACCESS_TIME;
+	} else {
+		return NULL;
+	}
+
+	ctx->cache_level = cache_level;
+	ctx->nr_of_cachelines = ctx->sets * ctx->associativity;
+	ctx->set_size = CACHELINE_SIZE * ctx->associativity;
+	ctx->cache_size = ctx->sets * ctx->set_size;
+
+	return ctx;
+}
+
+void
+cachepc_release_ctx(cache_ctx *ctx)
+{
+	kfree(ctx);
+}
+
+
+/*
+ * Initialises the complete cache data structure for the given context
+ */
+cacheline *
+cachepc_prepare_ds(cache_ctx *ctx)
+{
+	cacheline **cacheline_ptr_arr;
+	cacheline *cache_ds;
+
+	//printk(KERN_WARNING "CachePC: Preparing ds..\n");
+	
+       	cacheline_ptr_arr = allocate_cache_ds(ctx);
+	cache_ds = build_cache_ds(ctx, cacheline_ptr_arr);
+	kfree(cacheline_ptr_arr);
+
+	// printk(KERN_WARNING "CachePC: Preparing ds done\n");
+
+	return cache_ds;
+}
+
+void
+cachepc_release_ds(cache_ctx *ctx, cacheline *ds)
+{
+	kfree(remove_cache_set(ctx, ds));
+}
+
+cacheline *
+cachepc_prepare_victim(cache_ctx *ctx, uint32_t set)
+{
+	cacheline *victim_set, *victim_cl;
+	cacheline *curr_cl, *next_cl;
+
+	victim_set = prepare_cache_set_ds(ctx, &set, 1);
+	victim_cl = victim_set;
+
+	// Free the other lines in the same set that are not used.
+	if (ctx->addressing == PHYSICAL) {
+		curr_cl = victim_cl->next;
+		do {
+			next_cl = curr_cl->next;
+			// Here, it is ok to free them directly, as every line in the same
+			// set is from a different page anyway.
+			kfree(remove_cache_group_set(curr_cl));
+			curr_cl = next_cl;
+		} while(curr_cl != victim_cl);
+	}
+
+	return victim_cl;
+}
+
+void
+cachepc_release_victim(cache_ctx *ctx, cacheline *victim)
+{
+	kfree(remove_cache_set(ctx, victim));
+}
+
+void
+cachepc_save_msrmts(cacheline *head)
+{
+	cacheline *curr_cl;
+
+	// printk(KERN_WARNING "CachePC: Updating /proc/cachepc\n");
+
+	curr_cl = head;
+	do {
+		if (IS_FIRST(curr_cl->flags)) {
+			BUG_ON(curr_cl->cache_set >= cachepc_msrmts_count);
+			cachepc_msrmts[curr_cl->cache_set] = curr_cl->count;
+		}
+
+		curr_cl = curr_cl->prev;
+	} while (curr_cl != head);
+}
+
+void
+cachepc_print_msrmts(cacheline *head)
+{
+	cacheline *curr_cl;
+
+	curr_cl = head;
+	do {
+		if (IS_FIRST(curr_cl->flags)) {
+			printk(KERN_WARNING "CachePC: Count for cache set %i: %llu\n",
+				curr_cl->cache_set, curr_cl->count);
+		}
+
+		curr_cl = curr_cl->prev;
+	} while (curr_cl != head);
+}
+
+
+cacheline *
+prepare_cache_set_ds(cache_ctx *ctx, uint32_t *sets, uint32_t sets_len)
+{
+	cacheline *cache_ds, **first_cl_in_sets, **last_cl_in_sets;
+	cacheline *to_del_cls, *curr_cl, *next_cl, *cache_set_ds;
+	uint32_t i, cache_groups_len, cache_groups_max_len;
+	uint32_t *cache_groups;
+       
+	cache_ds = cachepc_prepare_ds(ctx);
+
+	first_cl_in_sets = kzalloc(ctx->sets * sizeof(cacheline *), GFP_KERNEL);
+	BUG_ON(first_cl_in_sets == NULL);
+
+	last_cl_in_sets  = kzalloc(ctx->sets * sizeof(cacheline *), GFP_KERNEL);
+	BUG_ON(last_cl_in_sets == NULL);
+
+	// Find the cache groups that are used, so that we can delete the other ones
+	// later (to avoid memory leaks)
+	cache_groups_max_len = ctx->sets / CACHE_GROUP_SIZE;
+	cache_groups = kmalloc(cache_groups_max_len * sizeof(uint32_t), GFP_KERNEL);
+	BUG_ON(cache_groups == NULL);
+
+	cache_groups_len = 0;
+	for (i = 0; i < sets_len; ++i) {
+		if (!is_in_arr(sets[i] / CACHE_GROUP_SIZE, cache_groups, cache_groups_len)) {
+			cache_groups[cache_groups_len] = sets[i] / CACHE_GROUP_SIZE;
+			++cache_groups_len;
+		}
+	}
+
+	to_del_cls = NULL;
+	curr_cl = cache_ds;
+
+	// Extract the partial data structure for the cache sets and ensure correct freeing
+	do {
+		next_cl = curr_cl->next;
+
+		if (IS_FIRST(curr_cl->flags)) {
+			first_cl_in_sets[curr_cl->cache_set] = curr_cl;
+		}
+		if (IS_LAST(curr_cl->flags)) {
+			last_cl_in_sets[curr_cl->cache_set] = curr_cl;
+		}
+
+		if (ctx->addressing == PHYSICAL && !is_in_arr(
+			curr_cl->cache_set / CACHE_GROUP_SIZE, cache_groups, cache_groups_len))
+		{
+			// Already free all unused blocks of the cache ds for physical
+			// addressing, because we loose their refs
+			cl_insert(to_del_cls, curr_cl);
+			to_del_cls = curr_cl;
+		}
+		curr_cl = next_cl;
+
+	} while(curr_cl != cache_ds);
+
+	// Fix partial cache set ds
+	for (i = 0; i < sets_len; ++i) {
+		last_cl_in_sets[sets[i]]->next = first_cl_in_sets[sets[(i + 1) % sets_len]];
+		first_cl_in_sets[sets[(i + 1) % sets_len]]->prev = last_cl_in_sets[sets[i]];
+	}
+	cache_set_ds = first_cl_in_sets[sets[0]];
+
+	// Free unused cache lines
+	if (ctx->addressing == PHYSICAL) {
+		cachepc_release_ds(ctx, to_del_cls);
+	}
+
+	kfree(first_cl_in_sets);
+	kfree(last_cl_in_sets);
+	kfree(cache_groups);
+
+	return cache_set_ds;
+}
+
+void 
+cl_insert(cacheline *last_cl, cacheline *new_cl)
+{
+    if (last_cl == NULL) {
+        // Adding the first entry is a special case
+        new_cl->next = new_cl;
+        new_cl->prev = new_cl;
+    } else {
+        new_cl->next = last_cl->next;
+        new_cl->prev = last_cl;
+        last_cl->next->prev = new_cl;
+        last_cl->next = new_cl;
+    }
+}
+
+void *
+remove_cache_set(cache_ctx *ctx, void *ptr)
+{
+	return (void *) (((uintptr_t) ptr) & ~SET_MASK(ctx->sets));
+}
+
+void *
+remove_cache_group_set(void *ptr)
+{
+	return (void *) (((uintptr_t) ptr) & ~SET_MASK(CACHE_GROUP_SIZE));
+}
+
+
+/*
+ * Create a randomized doubly linked list with the following structure:
+ * set A <--> set B <--> ... <--> set X <--> set A
+ * where each set is one of the cache sets, in a random order.
+ * The sets are a doubly linked list of cachelines themselves:
+ * set A:
+ *  line[A + x0 * #sets] <--> line[A + x1 * #sets] <--> ...
+ * where x0, x1, ..., xD is a random permutation of 1, 2, ..., D
+ * and D = Associativity = | cache set |
+ */
+cacheline *build_cache_ds(cache_ctx *ctx, cacheline **cl_ptr_arr) {
+	cacheline **first_cl_in_sets, **last_cl_in_sets;
+	cacheline **cl_ptr_arr_sorted;
+	cacheline *curr_cl;
+	cacheline *cache_ds;
+	uint32_t *idx_per_set;
+	uint32_t idx_curr_set, set_offset;
+	uint32_t i, j, set, set_len;
+	uint32_t *idx_map;
+
+ 	idx_per_set = kzalloc(ctx->sets * sizeof(uint32_t), GFP_KERNEL);
+	BUG_ON(idx_per_set == NULL);
+
+	cl_ptr_arr_sorted = kzalloc(ctx->nr_of_cachelines * sizeof(cacheline *), GFP_KERNEL);
+	BUG_ON(cl_ptr_arr_sorted == NULL);
+
+	set_len = ctx->associativity;
+	for (i = 0; i < ctx->nr_of_cachelines; ++i) {
+		set_offset = cl_ptr_arr[i]->cache_set * set_len;
+		idx_curr_set = idx_per_set[cl_ptr_arr[i]->cache_set];
+
+		cl_ptr_arr_sorted[set_offset + idx_curr_set] = cl_ptr_arr[i];
+		idx_per_set[cl_ptr_arr[i]->cache_set] += 1;
+	}
+
+	// Build doubly linked list for every set
+	for (set = 0; set < ctx->sets; ++set) {
+		set_offset = set * set_len;
+		build_randomized_list_for_cache_set(ctx, cl_ptr_arr_sorted + set_offset);
+	}
+
+	// Relink the sets among each other
+	idx_map = kzalloc(ctx->sets * sizeof(uint32_t), GFP_KERNEL);
+	BUG_ON(idx_map == NULL);
+
+	gen_random_indices(idx_map, ctx->sets);
+
+	first_cl_in_sets = kzalloc(ctx->sets * sizeof(cacheline *), GFP_KERNEL);
+	BUG_ON(first_cl_in_sets == NULL);
+
+	last_cl_in_sets  = kzalloc(ctx->sets * sizeof(cacheline *), GFP_KERNEL);
+	BUG_ON(last_cl_in_sets == NULL);
+
+	for (j = 0; j < ctx->nr_of_cachelines; ++j) {
+		curr_cl = cl_ptr_arr_sorted[j];
+		if (IS_FIRST(curr_cl->flags))
+			first_cl_in_sets[curr_cl->cache_set] = curr_cl;
+		if (IS_LAST(curr_cl->flags))
+			last_cl_in_sets[curr_cl->cache_set] = curr_cl;
+	}
+
+	/* connect up sets */
+	for (i = 0; i < ctx->sets; ++i) {
+		last_cl_in_sets[idx_map[i]]->next = first_cl_in_sets[idx_map[(i + 1) % ctx->sets]];
+		first_cl_in_sets[idx_map[(i + 1) % ctx->sets]]->prev = last_cl_in_sets[idx_map[i]];
+	}
+	cache_ds = first_cl_in_sets[idx_map[0]];
+
+	kfree(cl_ptr_arr_sorted);
+	kfree(first_cl_in_sets);
+	kfree(last_cl_in_sets);
+	kfree(idx_per_set);
+	kfree(idx_map);
+
+	return cache_ds;
+}
+
+/*
+ * Helper function to build a randomised list of cacheline structs for a set
+ */
+void build_randomized_list_for_cache_set(cache_ctx *ctx, cacheline **cacheline_ptr_arr)
+{
+	cacheline *curr_cl;
+	uint32_t len, *idx_map;
+	uint16_t i;
+
+	len = ctx->associativity;
+	idx_map = kzalloc(len * sizeof(uint32_t), GFP_KERNEL);
+	BUG_ON(idx_map == NULL);
+
+	gen_random_indices(idx_map, len);
+
+	for (i = 0; i < len; ++i) {
+		curr_cl = cacheline_ptr_arr[idx_map[i]];
+		curr_cl->next = cacheline_ptr_arr[idx_map[(i + 1) % len]];
+		curr_cl->prev = cacheline_ptr_arr[idx_map[(len - 1 + i) % len]];
+
+		if (idx_map[i] == 0) {
+			curr_cl->flags = SET_FIRST(DEFAULT_FLAGS);
+			curr_cl->prev->flags = SET_LAST(DEFAULT_FLAGS);
+		} else {
+			curr_cl->flags |= DEFAULT_FLAGS;
+		}
+	}
+
+	kfree(idx_map);
+}
+
+/*
+ * Allocate a data structure that fills the complete cache, i.e. consisting
+ * of `associativity` many cache lines for each cache set.
+ */
+cacheline **
+allocate_cache_ds(cache_ctx *ctx)
+{
+	cacheline **cl_ptr_arr, *cl_arr;
+	uint32_t i;
+
+	cl_ptr_arr = kzalloc(ctx->nr_of_cachelines * sizeof(cacheline *), GFP_KERNEL);
+	BUG_ON(cl_ptr_arr == NULL);
+
+	BUG_ON(ctx->addressing != VIRTUAL);
+
+	// For virtual addressing, allocating a consecutive chunk of memory is enough
+	cl_arr = cachepc_aligned_alloc(PAGE_SIZE, ctx->cache_size);
+	BUG_ON(cl_arr == NULL);
+
+	for (i = 0; i < ctx->nr_of_cachelines; ++i) {
+		cl_ptr_arr[i] = cl_arr + i;
+		cl_ptr_arr[i]->cache_set = get_virt_cache_set(ctx, cl_ptr_arr[i]);
+		cl_ptr_arr[i]->cache_line = i / ctx->sets;
+		cl_ptr_arr[i]->count = 0;
+	}
+
+	return cl_ptr_arr;
+}
+
+uint16_t
+get_virt_cache_set(cache_ctx *ctx, void *ptr)
+{
+	return (uint16_t) ((((uintptr_t) ptr) & SET_MASK(ctx->sets)) / CACHELINE_SIZE);
+}
+
+void *
+cachepc_aligned_alloc(size_t alignment, size_t size)
+{
+	void *p;
+
+	if (size % alignment != 0)
+		size = size - (size % alignment) + alignment;
+	p = kzalloc(size, GFP_KERNEL);
+	BUG_ON(((uintptr_t) p) % alignment != 0);
+
+	return p;
+}
+
diff --git a/cachepc/cachepc.h b/cachepc/cachepc.h
new file mode 100644
index 0000000..ad2dff1
--- /dev/null
+++ b/cachepc/cachepc.h
@@ -0,0 +1,188 @@
+#pragma once
+
+#include "asm.h"
+#include "cache_types.h"
+#include "util.h"
+#include "uapi.h"
+
+void cachepc_init_pmc(uint8_t index, uint8_t event_no, uint8_t event_mask);
+
+cache_ctx *cachepc_get_ctx(cache_level cl);
+void cachepc_release_ctx(cache_ctx *ctx);
+
+cacheline *cachepc_prepare_ds(cache_ctx *ctx);
+void cachepc_release_ds(cache_ctx *ctx, cacheline *ds);
+
+cacheline *cachepc_prepare_victim(cache_ctx *ctx, uint32_t set);
+void cachepc_release_victim(cache_ctx *ctx, cacheline *ptr);
+
+void *cachepc_aligned_alloc(size_t alignment, size_t size);
+
+void cachepc_save_msrmts(cacheline *head);
+void cachepc_print_msrmts(cacheline *head);
+
+void cachepc_prime_vcall(uintptr_t ret, cacheline *cl);
+void cachepc_probe_vcall(uintptr_t ret, cacheline *cl);
+
+__attribute__((always_inline))
+static inline cacheline *cachepc_prime(cacheline *head);
+
+__attribute__((always_inline))
+static inline cacheline *cachepc_prime_rev(cacheline *head);
+
+__attribute__((always_inline))
+static inline cacheline *cachepc_probe(cacheline *head);
+
+__attribute__((always_inline))
+static inline void cachepc_victim(void *p);
+
+__attribute__((always_inline))
+static inline uint64_t cachepc_read_pmc(uint64_t event);
+
+extern uint16_t *cachepc_msrmts;
+extern size_t cachepc_msrmts_count;
+
+extern cache_ctx *cachepc_ctx;
+extern cacheline *cachepc_ds;
+
+extern uint64_t cachepc_regs_tmp[16];
+extern uint64_t cachepc_regs_vm[16];
+
+/*
+ * Prime phase: fill the target cache (encoded in the size of the data structure)
+ * with the prepared data structure, i.e. with attacker data.
+ */
+cacheline *
+cachepc_prime(cacheline *head)
+{
+	cacheline *curr_cl, *prev_cl;
+
+	cachepc_mfence();
+	cachepc_cpuid();
+	
+	curr_cl = head;
+	do {
+		prev_cl = curr_cl;
+		curr_cl = curr_cl->next;
+	} while (curr_cl != head);
+
+	cachepc_mfence();
+	cachepc_cpuid();
+
+	return prev_cl;
+}
+
+/*
+ * Same as prime, but in the reverse direction, i.e. the same direction that probe
+ * uses. This is beneficial for the following scenarios:
+ *     - L1:
+ *         - Trigger collision chain-reaction to amplify an evicted set (but this has
+ *           the downside of more noisy measurements).
+ *     - L2:
+ *         - Always use this for L2, otherwise the first cache sets will still reside
+ *           in L1 unless the victim filled L1 completely. In this case, an eviction
+ *           has randomly (depending on where the cache set is placed in the randomised
+ *           data structure) the following effect:
+ *             A) An evicted set is L2_ACCESS_TIME - L1_ACCESS_TIME slower
+ *             B) An evicted set is L3_ACCESS_TIME - L2_ACCESS_TIME slower
+ */
+cacheline *
+cachepc_prime_rev(cacheline *head)
+{
+	cacheline *curr_cl;
+
+	cachepc_mfence();
+	cachepc_cpuid();
+
+	curr_cl = head;
+	do {
+		curr_cl = curr_cl->prev;
+	} while(curr_cl != head);
+
+	cachepc_mfence();
+	cachepc_cpuid();
+
+	return curr_cl->prev;
+}
+
+cacheline *
+cachepc_probe(cacheline *start_cl)
+{
+	uint64_t pre, post;
+	cacheline *next_cl;
+	cacheline *curr_cl;
+
+	cachepc_mfence();
+	cachepc_cpuid();
+
+	curr_cl = start_cl;
+
+	do {
+		pre = cachepc_read_pmc(0);
+
+		asm volatile(
+			"mov 8(%[curr_cl]), %%rax \n\t"              // +8
+			"mov 8(%%rax), %%rcx \n\t"                   // +16
+			"mov 8(%%rcx), %%rax \n\t"                   // +24
+			"mov 8(%%rax), %%rcx \n\t"                   // +32
+			"mov 8(%%rcx), %%rax \n\t"                   // +40
+			"mov 8(%%rax), %%rcx \n\t"                   // +48
+			"mov 8(%%rcx), %[curr_cl_out] \n\t"          // +56
+			"mov 8(%[curr_cl_out]), %[next_cl_out] \n\t" // +64
+			: [next_cl_out] "=r" (next_cl),
+			  [curr_cl_out] "=r" (curr_cl)
+			: [curr_cl] "r" (curr_cl)
+			: "rax", "rcx"
+		);
+
+		post = cachepc_read_pmc(0);
+
+		/* works across size boundary */
+		curr_cl->count = post - pre;
+
+		curr_cl = next_cl;
+	} while (__builtin_expect(curr_cl != start_cl, 1));
+
+	next_cl = curr_cl->next;
+
+	cachepc_mfence();
+	cachepc_cpuid();
+
+	return next_cl;
+}
+
+void
+cachepc_victim(void *p)
+{
+	cachepc_mfence();
+	cachepc_cpuid();
+
+	cachepc_readq(p);
+
+	cachepc_mfence();
+	cachepc_cpuid();
+}
+
+uint64_t
+cachepc_read_pmc(uint64_t event)
+{
+	uint32_t lo, hi;
+	uint64_t res;
+
+	cachepc_mfence();
+	cachepc_cpuid();
+
+	event = 0xC0010201 + 2 * event;
+
+	asm volatile (
+		"rdmsr"
+		: "=a" (lo), "=d" (hi)
+		: "c"(event)
+	);
+	res = ((uint64_t) hi << 32) | (uint64_t) lo;
+
+	cachepc_mfence();
+	cachepc_cpuid();
+
+	return res;
+}
diff --git a/cachepc/device_conf.h b/cachepc/device_conf.h
new file mode 100644
index 0000000..e24d681
--- /dev/null
+++ b/cachepc/device_conf.h
@@ -0,0 +1,29 @@
+#pragma once
+
+// TODO: Read from kernel headers
+
+// General settings
+// #define PAGE_SIZE 4096
+#define PROCESSOR_FREQ 2900000000
+
+// Cache related settings
+#define CACHELINE_SIZE 64
+#define CACHE_GROUP_SIZE (PAGE_SIZE / CACHELINE_SIZE)
+
+// Addressing:
+// - virtual:   0
+// - physical:  1
+#define L1_ADDRESSING 0
+#define L1_SETS 64
+#define L1_ASSOCIATIVITY 8
+#define L1_ACCESS_TIME 4
+
+#define L2_ADDRESSING 1
+#define L2_SETS 512
+#define L2_ASSOCIATIVITY 8
+#define L2_ACCESS_TIME 12
+
+#define L3_ADDRESSING 1
+#define L3_SETS 4096
+#define L3_ASSOCIATIVITY 16
+#define L3_ACCESS_TIME 30
diff --git a/cachepc/kvm.c b/cachepc/kvm.c
new file mode 100644
index 0000000..4deb4fa
--- /dev/null
+++ b/cachepc/kvm.c
@@ -0,0 +1,392 @@
+#include "kvm.h"
+
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/proc_fs.h>
+#include <linux/init.h>
+#include <asm/uaccess.h>
+
+struct proc_ops cachepc_proc_ops;
+
+uint16_t *cachepc_msrmts;
+size_t cachepc_msrmts_count;
+EXPORT_SYMBOL(cachepc_msrmts);
+EXPORT_SYMBOL(cachepc_msrmts_count);
+
+cache_ctx *cachepc_ctx;
+cacheline *cachepc_ds;
+EXPORT_SYMBOL(cachepc_ctx);
+EXPORT_SYMBOL(cachepc_ds);
+
+uint64_t cachepc_regs_tmp[16];
+uint64_t cachepc_regs_vm[16];
+EXPORT_SYMBOL(cachepc_regs_tmp);
+EXPORT_SYMBOL(cachepc_regs_vm);
+
+int
+cachepc_kvm_proc_open(struct inode *inode, struct file *file)
+{
+	try_module_get(THIS_MODULE);
+
+	return 0;
+}
+
+int
+cachepc_kvm_proc_close(struct inode *inode, struct file *file)
+{
+	module_put(THIS_MODULE);
+
+	return 0;
+}
+
+ssize_t
+cachepc_kvm_proc_read(struct file *file, char *buf, size_t buflen, loff_t *off)
+{
+	size_t len, left;
+	size_t size;
+
+	printk(KERN_WARNING "CachePC: Reading entries (%lu:%lli)\n",
+		buflen, off ? *off : 0);
+
+	size = cachepc_msrmts_count * sizeof(uint16_t);
+	if (!off || *off >= size || *off < 0)
+		return 0;
+
+	len = size - *off;
+	if (len > buflen) len = buflen;
+
+	left = copy_to_user(buf, (uint8_t *) cachepc_msrmts + *off, len);
+
+	len -= left;
+	*off += len;
+
+	return len;
+}
+
+ssize_t
+cachepc_kvm_proc_write(struct file *file, const char *buf, size_t buflen, loff_t *off)
+{
+	return 0;
+}
+
+loff_t
+cachepc_kvm_proc_lseek(struct file *file, loff_t off, int mode)
+{
+	switch (mode) {
+	case SEEK_SET:
+		file->f_pos = off;
+		break;
+	case SEEK_CUR:
+		file->f_pos += off;
+		break;
+	case SEEK_END:
+		file->f_pos = cachepc_msrmts_count * sizeof(uint16_t) + off;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	return file->f_pos;
+}
+
+void
+cachepc_kvm_prime_probe_test(void *p)
+{
+	cacheline *lines;
+	cacheline *cl, *head;
+	uint32_t count;
+	uint32_t *arg;
+	int i, max;
+
+	arg = p;
+
+	/* l2 data cache, hit or miss */
+	cachepc_init_pmc(0, 0x64, 0xD8);
+
+	lines = cachepc_aligned_alloc(PAGE_SIZE, cachepc_ctx->cache_size);
+	BUG_ON(lines == NULL);
+
+	max = cachepc_ctx->nr_of_cachelines;
+
+	cachepc_cpuid();
+	cachepc_mfence();
+
+	for (i = 0; i < max; i++)
+		asm volatile ("mov (%0), %%rbx" : : "r"(lines + i) : "rbx");
+
+	head = cachepc_prime(cachepc_ds);
+	cachepc_probe(head);
+
+	count = 0;
+	cl = head = cachepc_ds;
+	do {
+		count += cl->count;
+		cl = cl->next;
+	} while (cl != head);
+
+	printk(KERN_WARNING "CachePC: Prime-probe test done (%u vs. %u => %s)\n",
+		count, 0, (count == 0) ? "passed" : "failed");
+
+	if (arg) *arg = (count == 0);
+
+	kfree(lines);
+}
+
+void
+cachepc_kvm_stream_hwpf_test(void *p)
+{
+	cacheline *lines;
+	uint32_t count;
+	uint32_t *arg;
+	uint32_t i, max;
+
+	arg = p;
+
+	/* TODO: accurately detect hwpf */
+
+	/* l2 data cache, hit or miss */
+	cachepc_init_pmc(0, 0x64, 0xD8);
+
+	lines = cachepc_aligned_alloc(PAGE_SIZE, cachepc_ctx->cache_size);
+	BUG_ON(lines == NULL);
+
+	max = cachepc_ctx->nr_of_cachelines;
+
+	cachepc_prime(cachepc_ds);
+
+	count -= cachepc_read_pmc(0);
+	for (i = 0; i < max; i++)
+		asm volatile ("mov (%0), %%rbx" : : "r"(lines + i) : "rbx");
+	count += cachepc_read_pmc(0);
+
+	printk(KERN_WARNING "CachePC: HWPF test done (%u vs. %u => %s)\n",
+		count, max, (count == max) ? "passed" : "failed");
+
+	if (arg) *arg = (count == max);
+
+	kfree(lines);
+}
+
+void
+cachepc_kvm_single_access_test(void *p)
+{
+	cacheline *ptr;
+	uint64_t pre, post;
+	uint32_t *arg;
+
+	/* l2 data cache, hit or miss */
+	cachepc_init_pmc(0, 0x64, 0xD8);
+
+	arg = p;
+	
+	WARN_ON(arg && *arg >= L1_SETS);
+	if (arg && *arg >= L1_SETS) return;	
+	ptr = cachepc_prepare_victim(cachepc_ctx, arg ? *arg : 48);
+
+	cachepc_prime(cachepc_ds);
+
+	pre = cachepc_read_pmc(0);
+	cachepc_victim(ptr);
+	post = cachepc_read_pmc(0);
+
+	printk(KERN_WARNING "CachePC: Single access test done (%llu vs %u => %s)",
+		post - pre, 1, (post - pre == 1) ? "passed" : "failed");
+
+	if (arg) *arg = post - pre;
+
+	cachepc_release_victim(cachepc_ctx, ptr);
+}
+
+void
+cachepc_kvm_single_eviction_test(void *p)
+{
+	cacheline *head, *cl, *evicted;
+        cacheline *ptr;
+	uint32_t target;
+	uint32_t *arg;
+	int count;
+
+	arg = p;
+
+	/* l2 data cache, hit or miss */
+	cachepc_init_pmc(0, 0x64, 0xD8);
+
+	WARN_ON(arg && *arg >= L1_SETS);
+	if (arg && *arg >= L1_SETS) return;	
+	target = arg ? *arg : 48;
+
+	ptr = cachepc_prepare_victim(cachepc_ctx, target);
+
+	head = cachepc_prime(cachepc_ds);
+	cachepc_victim(ptr);
+	cachepc_probe(head);
+
+	count = 0;
+	evicted = NULL;
+	cl = head = cachepc_ds;
+	do {
+		if (IS_FIRST(cl->flags) && cl->count > 0) {
+			evicted = cl;
+			count += cl->count;
+		}
+		cl = cl->next;
+	} while (cl != head);
+
+	printk(KERN_WARNING "CachePC: Single eviction test done (%u vs %u => %s)\n",
+		count, 1, (count == 1 && evicted->cache_set == target) ? "passed" : "failed");
+	cachepc_save_msrmts(head);
+
+	if (arg) *arg = count;
+
+	cachepc_release_victim(cachepc_ctx, ptr);
+}
+
+void
+cachepc_kvm_system_setup(void)
+{
+	uint64_t reg_addr, val;
+	uint32_t lo, hi;
+
+	/* disable streaming store */
+	reg_addr = 0xc0011020;
+	asm volatile ("rdmsr" : "=a"(lo), "=d"(hi) : "c"(reg_addr));
+	val = (uint64_t) lo | ((uint64_t) hi << 32);
+	val |= 1 << 13;
+	asm volatile ("wrmsr" : : "c"(reg_addr), "a"(val), "d"(0x00));
+	printk("CachePC: Writing MSR %08llX: %016llX\n", reg_addr, val);
+
+	/* disable speculative data cache tlb reloads */
+	reg_addr = 0xc0011022;
+	asm volatile ("rdmsr" : "=a"(lo), "=d"(hi) : "c"(reg_addr));
+	val = (uint64_t) lo | ((uint64_t) hi << 32);
+	val |= 1 << 4;
+	asm volatile ("wrmsr" : : "c"(reg_addr), "a"(val), "d"(0x00));
+	printk("CachePC: Writing MSR %08llX: %016llX\n", reg_addr, val);
+
+	/* disable data cache hardware prefetcher */
+	reg_addr = 0xc0011022;
+	asm volatile ("rdmsr" : "=a"(lo), "=d"(hi) : "c"(reg_addr));
+	val = (uint64_t) lo | ((uint64_t) hi << 32);
+	val |= 1 << 13;
+	asm volatile ("wrmsr" : : "c"(reg_addr), "a"(val), "d"(0x00));
+	printk("CachePC: Writing MSR %08llX: %016llX\n", reg_addr, val);
+}
+
+void
+cachepc_kvm_init_pmc_ioctl(void *p)
+{
+	uint32_t event;
+	uint8_t index, event_no, event_mask;
+
+	WARN_ON(p == NULL);
+	if (!p) return;
+
+	event = *(uint32_t *)p;
+
+	index = (event & 0xFF000000) >> 24;
+	event_no = (event & 0x0000FF00) >> 8;
+	event_mask = (event & 0x000000FF) >> 0;
+
+	cachepc_init_pmc(index, event_no, event_mask);
+}
+
+long
+cachepc_kvm_ioctl(struct file *file, unsigned int cmd, unsigned long argp)
+{
+	void __user *arg_user;
+	uint32_t u32;
+	int ret;
+
+	arg_user = (void __user *)argp;
+	switch (cmd) {
+	case CACHEPC_IOCTL_TEST_ACCESS:
+		printk(KERN_WARNING "CachePC: Called ioctl access test\n");
+		if (!arg_user) return -EINVAL;
+		if (copy_from_user(&u32, arg_user, sizeof(uint32_t)))
+			return -EFAULT;
+		ret = smp_call_function_single(2,
+			cachepc_kvm_single_access_test, &u32, true);
+		WARN_ON(ret != 0);
+		if (copy_to_user(arg_user, &u32, sizeof(uint32_t)))
+			return -EFAULT;
+		break;
+	case CACHEPC_IOCTL_TEST_EVICTION:
+		printk(KERN_WARNING "CachePC: Called ioctl eviction test\n");
+		if (!arg_user) return -EINVAL;
+		if (copy_from_user(&u32, arg_user, sizeof(uint32_t)))
+			return -EFAULT;
+		ret = smp_call_function_single(2,
+			cachepc_kvm_single_eviction_test, &u32, true);
+		WARN_ON(ret != 0);
+		if (copy_to_user(arg_user, &u32, sizeof(uint32_t)))
+			return -EFAULT;
+		break;
+	case CACHEPC_IOCTL_INIT_PMC:
+		printk(KERN_WARNING "CachePC: Called ioctl init counter\n");
+		if (!arg_user) return -EINVAL;
+		if (copy_from_user(&u32, arg_user, sizeof(uint32_t)))
+			return -EFAULT;
+		ret = smp_call_function_single(2,
+			cachepc_kvm_init_pmc_ioctl, &u32, true);
+		WARN_ON(ret != 0);
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+void
+cachepc_kvm_setup_test(void *p)
+{
+	int cpu;
+
+	cpu = get_cpu();
+
+	printk(KERN_WARNING "CachePC: Running on core %i\n", cpu);
+
+	cachepc_ctx = cachepc_get_ctx(L1);
+	cachepc_ds = cachepc_prepare_ds(cachepc_ctx);
+
+	cachepc_kvm_system_setup();
+
+	cachepc_kvm_prime_probe_test(NULL);
+	cachepc_kvm_single_access_test(NULL);
+	cachepc_kvm_single_eviction_test(NULL);
+	cachepc_kvm_stream_hwpf_test(NULL);
+
+	put_cpu();
+}
+
+void
+cachepc_kvm_init(void)
+{
+	int ret;
+
+	cachepc_msrmts_count = L1_SETS;
+	cachepc_msrmts = kzalloc(cachepc_msrmts_count * sizeof(uint16_t), GFP_KERNEL);
+	BUG_ON(cachepc_msrmts == NULL);
+
+	ret = smp_call_function_single(2, cachepc_kvm_setup_test, NULL, true);
+	WARN_ON(ret != 0);
+
+	memset(&cachepc_proc_ops, 0, sizeof(cachepc_proc_ops));
+	cachepc_proc_ops.proc_open = cachepc_kvm_proc_open;
+	cachepc_proc_ops.proc_read = cachepc_kvm_proc_read;
+	cachepc_proc_ops.proc_write = cachepc_kvm_proc_write;
+	cachepc_proc_ops.proc_lseek = cachepc_kvm_proc_lseek;
+	cachepc_proc_ops.proc_release = cachepc_kvm_proc_close;
+	cachepc_proc_ops.proc_ioctl = cachepc_kvm_ioctl;
+	proc_create("cachepc", 0644, NULL, &cachepc_proc_ops);
+}
+
+void
+cachepc_kvm_exit(void)
+{
+	remove_proc_entry("cachepc", NULL);
+	kfree(cachepc_msrmts);
+
+	cachepc_release_ds(cachepc_ctx, cachepc_ds);
+	cachepc_release_ctx(cachepc_ctx);
+}
diff --git a/cachepc/kvm.h b/cachepc/kvm.h
new file mode 100644
index 0000000..a44491e
--- /dev/null
+++ b/cachepc/kvm.h
@@ -0,0 +1,6 @@
+#pragma once
+
+#include "cachepc.h"
+
+void cachepc_kvm_init(void);
+void cachepc_kvm_exit(void);
diff --git a/cachepc/uapi.h b/cachepc/uapi.h
new file mode 100644
index 0000000..f815839
--- /dev/null
+++ b/cachepc/uapi.h
@@ -0,0 +1,8 @@
+#pragma once
+
+#include <linux/ioctl.h>
+
+#define CACHEPC_IOCTL_MAGIC 0xBF
+#define CACHEPC_IOCTL_TEST_ACCESS _IOWR(CACHEPC_IOCTL_MAGIC, 0, uint32_t)
+#define CACHEPC_IOCTL_TEST_EVICTION _IOWR(CACHEPC_IOCTL_MAGIC, 1, uint32_t)
+#define CACHEPC_IOCTL_INIT_PMC _IOW(CACHEPC_IOCTL_MAGIC, 2, uint32_t)
diff --git a/cachepc/util.c b/cachepc/util.c
new file mode 100644
index 0000000..abf2b71
--- /dev/null
+++ b/cachepc/util.c
@@ -0,0 +1,38 @@
+#include "util.h"
+
+void
+random_perm(uint32_t *arr, uint32_t arr_len)
+{
+	uint32_t i;
+
+	/* no special ordering needed when prefetcher is disabled */
+	for (i = 0; i < arr_len; i++)
+		arr[i] = i;
+
+	// /* prevent stream prefetching by alternating access direction */
+	// mid = arr_len / 2;
+	// for (i = 0; i < arr_len; i++)
+	// 	arr[i] = mid + (i % 2 ? -1 : 1) * ((i + 1) / 2);
+}
+
+void
+gen_random_indices(uint32_t *arr, uint32_t arr_len)
+{
+	uint32_t i;
+
+	for (i = 0; i < arr_len; ++i)
+		arr[i] = i;
+	random_perm(arr, arr_len);
+}
+
+
+bool is_in_arr(uint32_t elem, uint32_t *arr, uint32_t arr_len) {
+	uint32_t i;
+
+	for (i = 0; i < arr_len; ++i) {
+		if (arr[i] == elem)
+			return true;
+	}
+
+	return false;
+}
diff --git a/cachepc/util.h b/cachepc/util.h
new file mode 100644
index 0000000..a0ff8be
--- /dev/null
+++ b/cachepc/util.h
@@ -0,0 +1,8 @@
+#pragma once 
+
+#include <linux/kernel.h>
+
+void random_perm(uint32_t *arr, uint32_t arr_len);
+void gen_random_indices(uint32_t *arr, uint32_t arr_len);
+
+bool is_in_arr(uint32_t elem, uint32_t *arr, uint32_t arr_len);
diff --git a/kmod/asm.h b/kmod/asm.h
deleted file mode 100644
index 9e9385a..0000000
--- a/kmod/asm.h
+++ /dev/null
@@ -1,66 +0,0 @@
-#pragma once
-
-#include <linux/kernel.h>
-
-#define CPUID_AFFECTED_REGS "rax", "rbx", "rcx", "rdx"
-
-__attribute__((always_inline))
-static inline void cachepc_cpuid(void);
-
-__attribute__((always_inline))
-static inline void cachepc_lfence(void);
-
-__attribute__((always_inline))
-static inline void cachepc_sfence(void);
-
-__attribute__((always_inline))
-static inline void cachepc_mfence(void);
-
-__attribute__((always_inline))
-static inline void cachepc_readq(void *p);
-
-void
-cachepc_cpuid(void)
-{
-	asm volatile(
-		"mov $0x80000005, %%eax\n\t"
-		"cpuid\n\t"
-		::: CPUID_AFFECTED_REGS
-	);
-}
-
-void
-cachepc_lfence(void)
-{
-	asm volatile(
-		"lfence\n\t"
-		::: "memory"
-	);
-}
-
-void
-cachepc_sfence(void)
-{
-	asm volatile(
-		"sfence\n\t"
-		::: "memory"
-	);
-}
-
-void
-cachepc_mfence(void)
-{
-	asm volatile(
-		"mfence\n\t"
-		::: "memory"
-	);
-}
-
-void
-cachepc_readq(void *p)
-{
-	asm volatile (
-		"movq (%0), %%r10\n\t"
-		: : "r" (p) : "r10"
-	);
-}
diff --git a/kmod/cache_types.h b/kmod/cache_types.h
deleted file mode 100644
index b337d55..0000000
--- a/kmod/cache_types.h
+++ /dev/null
@@ -1,66 +0,0 @@
-#pragma once
-
-#include "device_conf.h"
-
-#define SET_MASK(SETS) (((((uintptr_t) SETS) * CACHELINE_SIZE) - 1) ^ (CACHELINE_SIZE - 1))
-
-#define REMOVE_PAGE_OFFSET(ptr) ((void *) (((uintptr_t) ptr) & PAGE_MASK))
-
-#define GET_BIT(b, i) (((b) >> (i)) & 1)
-#define SET_BIT(b, i) ((b) | (1 << (i)))
-
-/* Operate cacheline flags
- * Used flags:
- *  32                    2              1       0
- * |  | ... | cache group initialized | last | first |
- */
-#define DEFAULT_FLAGS 0
-#define SET_FIRST(flags) SET_BIT(flags, 0)
-#define SET_LAST(flags) SET_BIT(flags, 1)
-#define SET_CACHE_GROUP_INIT(flags) SET_BIT(flags, 2)
-#define IS_FIRST(flags) GET_BIT(flags, 0)
-#define IS_LAST(flags) GET_BIT(flags, 1)
-#define IS_CACHE_GROUP_INIT(flags) GET_BIT(flags, 2)
-
-#define CL_NEXT_OFFSET offsetof(struct cacheline, next)
-#define CL_PREV_OFFSET offsetof(struct cacheline, prev)
-
-typedef enum cache_level cache_level;
-typedef enum addressing_type addressing_type;
-typedef struct cacheline cacheline;
-typedef struct cache_ctx cache_ctx;
-
-enum cache_level {L1, L2};
-enum addressing_type {VIRTUAL, PHYSICAL};
-
-struct cache_ctx {
-    cache_level cache_level;
-    addressing_type addressing;
-
-    uint32_t sets;
-    uint32_t associativity;
-    uint32_t access_time;
-    uint32_t nr_of_cachelines;
-    uint32_t set_size;
-    uint32_t cache_size;
-};
-
-struct cacheline {
-    // Doubly linked list inside same set
-    // Attention: CL_NEXT_OFFSET and CL_PREV_OFFSET
-    // must be kept up to date
-    cacheline *next;
-    cacheline *prev;
-
-    uint32_t cache_set;
-    uint32_t cache_line;
-    uint32_t flags;
-
-    // Unused padding to fill cache line
-    uint64_t count;
-
-    char padding[24];
-};
-
-static_assert(sizeof(struct cacheline) == CACHELINE_SIZE, "Bad cache line struct size");
-static_assert(CL_NEXT_OFFSET == 0 && CL_PREV_OFFSET == 8);
diff --git a/kmod/cachepc.c b/kmod/cachepc.c
deleted file mode 100644
index 09ed705..0000000
--- a/kmod/cachepc.c
+++ /dev/null
@@ -1,445 +0,0 @@
-#include "cachepc.h"
-
-#include <linux/kernel.h>
-#include <linux/types.h> 
-#include <linux/slab.h>
-#include <linux/delay.h>
-#include <linux/ioctl.h>
-
-static void cl_insert(cacheline *last_cl, cacheline *new_cl);
-static void *remove_cache_set(cache_ctx *ctx, void *ptr);
-static void *remove_cache_group_set(void *ptr);
-
-static cacheline *prepare_cache_set_ds(cache_ctx *ctx, uint32_t *set, uint32_t sets_len);
-static cacheline *build_cache_ds(cache_ctx *ctx, cacheline **cacheline_ptr_arr);
-static void build_randomized_list_for_cache_set(cache_ctx *ctx, cacheline **cacheline_ptr_arr);
-static cacheline **allocate_cache_ds(cache_ctx *ctx);
-static uint16_t get_virt_cache_set(cache_ctx *ctx, void *ptr);
-
-void __attribute__((optimize(1))) // prevent instruction reordering
-cachepc_prime_vcall(uintptr_t ret, cacheline *cl)
-{
-	cachepc_prime(cl);
-	asm volatile ("mov %0, %%rax; jmp *%%rax" : : "r"(ret) : "rax");
-}
-
-void __attribute__((optimize(1))) // prevent instruction reordering
-cachepc_probe_vcall(uintptr_t ret, cacheline *cl)
-{
-	cachepc_probe(cl);
-	asm volatile ("mov %0, %%rax; jmp *%%rax" : : "r"(ret) : "rax");
-}
-
-void
-cachepc_init_pmc(uint8_t index, uint8_t event_no, uint8_t event_mask)
-{
-	uint64_t event;
-	uint64_t reg_addr;
-
-	/* REF: https://developer.amd.com/resources/developer-guides-manuals (PPR 17H 31H, P.166)
-	 *
-	 * performance event selection via 0xC001_020X with X = (0..A)[::2]
-	 * performance event reading viea 0XC001_020X with X = (1..B)[::2]
-	 */
-
-	WARN_ON(index >= 6);
-	if (index >= 6) return;
-
-	reg_addr = 0xc0010200 + index * 2;
-	event = event_no | (event_mask << 8);
-	event |= (1ULL << 17); /* OS (kernel) events only */
-	event |= (1ULL << 22); /* enable performance counter */
-	event |= (1ULL << 40); /* Host events only */
-	printk(KERN_WARNING "CachePC: Initialized %i. PMC %02X:%02X\n",
-		index, event_no, event_mask);
-	asm volatile ("wrmsr" : : "c"(reg_addr), "a"(event), "d"(0x00));
-}
-
-cache_ctx *
-cachepc_get_ctx(cache_level cache_level)
-{
-	cache_ctx *ctx;
-       
-	ctx = kzalloc(sizeof(cache_ctx), GFP_KERNEL);
-	BUG_ON(ctx == NULL);
-
-	BUG_ON(cache_level != L1);
-	if (cache_level == L1) {
-		ctx->addressing = L1_ADDRESSING;
-		ctx->sets = L1_SETS;
-		ctx->associativity = L1_ASSOCIATIVITY;
-		ctx->access_time = L1_ACCESS_TIME;
-	} else if (cache_level == L2) {
-		ctx->addressing = L2_ADDRESSING;
-		ctx->sets = L2_SETS;
-		ctx->associativity = L2_ASSOCIATIVITY;
-		ctx->access_time = L2_ACCESS_TIME;
-	} else {
-		return NULL;
-	}
-
-	ctx->cache_level = cache_level;
-	ctx->nr_of_cachelines = ctx->sets * ctx->associativity;
-	ctx->set_size = CACHELINE_SIZE * ctx->associativity;
-	ctx->cache_size = ctx->sets * ctx->set_size;
-
-	return ctx;
-}
-
-void
-cachepc_release_ctx(cache_ctx *ctx)
-{
-	kfree(ctx);
-}
-
-
-/*
- * Initialises the complete cache data structure for the given context
- */
-cacheline *
-cachepc_prepare_ds(cache_ctx *ctx)
-{
-	cacheline **cacheline_ptr_arr;
-	cacheline *cache_ds;
-
-	//printk(KERN_WARNING "CachePC: Preparing ds..\n");
-	
-       	cacheline_ptr_arr = allocate_cache_ds(ctx);
-	cache_ds = build_cache_ds(ctx, cacheline_ptr_arr);
-	kfree(cacheline_ptr_arr);
-
-	// printk(KERN_WARNING "CachePC: Preparing ds done\n");
-
-	return cache_ds;
-}
-
-void
-cachepc_release_ds(cache_ctx *ctx, cacheline *ds)
-{
-	kfree(remove_cache_set(ctx, ds));
-}
-
-cacheline *
-cachepc_prepare_victim(cache_ctx *ctx, uint32_t set)
-{
-	cacheline *victim_set, *victim_cl;
-	cacheline *curr_cl, *next_cl;
-
-	victim_set = prepare_cache_set_ds(ctx, &set, 1);
-	victim_cl = victim_set;
-
-	// Free the other lines in the same set that are not used.
-	if (ctx->addressing == PHYSICAL) {
-		curr_cl = victim_cl->next;
-		do {
-			next_cl = curr_cl->next;
-			// Here, it is ok to free them directly, as every line in the same
-			// set is from a different page anyway.
-			kfree(remove_cache_group_set(curr_cl));
-			curr_cl = next_cl;
-		} while(curr_cl != victim_cl);
-	}
-
-	return victim_cl;
-}
-
-void
-cachepc_release_victim(cache_ctx *ctx, cacheline *victim)
-{
-	kfree(remove_cache_set(ctx, victim));
-}
-
-void
-cachepc_save_msrmts(cacheline *head)
-{
-	cacheline *curr_cl;
-
-	// printk(KERN_WARNING "CachePC: Updating /proc/cachepc\n");
-
-	curr_cl = head;
-	do {
-		if (IS_FIRST(curr_cl->flags)) {
-			BUG_ON(curr_cl->cache_set >= cachepc_msrmts_count);
-			cachepc_msrmts[curr_cl->cache_set] = curr_cl->count;
-		}
-
-		curr_cl = curr_cl->prev;
-	} while (curr_cl != head);
-}
-
-void
-cachepc_print_msrmts(cacheline *head)
-{
-	cacheline *curr_cl;
-
-	curr_cl = head;
-	do {
-		if (IS_FIRST(curr_cl->flags)) {
-			printk(KERN_WARNING "CachePC: Count for cache set %i: %llu\n",
-				curr_cl->cache_set, curr_cl->count);
-		}
-
-		curr_cl = curr_cl->prev;
-	} while (curr_cl != head);
-}
-
-
-cacheline *
-prepare_cache_set_ds(cache_ctx *ctx, uint32_t *sets, uint32_t sets_len)
-{
-	cacheline *cache_ds, **first_cl_in_sets, **last_cl_in_sets;
-	cacheline *to_del_cls, *curr_cl, *next_cl, *cache_set_ds;
-	uint32_t i, cache_groups_len, cache_groups_max_len;
-	uint32_t *cache_groups;
-       
-	cache_ds = cachepc_prepare_ds(ctx);
-
-	first_cl_in_sets = kzalloc(ctx->sets * sizeof(cacheline *), GFP_KERNEL);
-	BUG_ON(first_cl_in_sets == NULL);
-
-	last_cl_in_sets  = kzalloc(ctx->sets * sizeof(cacheline *), GFP_KERNEL);
-	BUG_ON(last_cl_in_sets == NULL);
-
-	// Find the cache groups that are used, so that we can delete the other ones
-	// later (to avoid memory leaks)
-	cache_groups_max_len = ctx->sets / CACHE_GROUP_SIZE;
-	cache_groups = kmalloc(cache_groups_max_len * sizeof(uint32_t), GFP_KERNEL);
-	BUG_ON(cache_groups == NULL);
-
-	cache_groups_len = 0;
-	for (i = 0; i < sets_len; ++i) {
-		if (!is_in_arr(sets[i] / CACHE_GROUP_SIZE, cache_groups, cache_groups_len)) {
-			cache_groups[cache_groups_len] = sets[i] / CACHE_GROUP_SIZE;
-			++cache_groups_len;
-		}
-	}
-
-	to_del_cls = NULL;
-	curr_cl = cache_ds;
-
-	// Extract the partial data structure for the cache sets and ensure correct freeing
-	do {
-		next_cl = curr_cl->next;
-
-		if (IS_FIRST(curr_cl->flags)) {
-			first_cl_in_sets[curr_cl->cache_set] = curr_cl;
-		}
-		if (IS_LAST(curr_cl->flags)) {
-			last_cl_in_sets[curr_cl->cache_set] = curr_cl;
-		}
-
-		if (ctx->addressing == PHYSICAL && !is_in_arr(
-			curr_cl->cache_set / CACHE_GROUP_SIZE, cache_groups, cache_groups_len))
-		{
-			// Already free all unused blocks of the cache ds for physical
-			// addressing, because we loose their refs
-			cl_insert(to_del_cls, curr_cl);
-			to_del_cls = curr_cl;
-		}
-		curr_cl = next_cl;
-
-	} while(curr_cl != cache_ds);
-
-	// Fix partial cache set ds
-	for (i = 0; i < sets_len; ++i) {
-		last_cl_in_sets[sets[i]]->next = first_cl_in_sets[sets[(i + 1) % sets_len]];
-		first_cl_in_sets[sets[(i + 1) % sets_len]]->prev = last_cl_in_sets[sets[i]];
-	}
-	cache_set_ds = first_cl_in_sets[sets[0]];
-
-	// Free unused cache lines
-	if (ctx->addressing == PHYSICAL) {
-		cachepc_release_ds(ctx, to_del_cls);
-	}
-
-	kfree(first_cl_in_sets);
-	kfree(last_cl_in_sets);
-	kfree(cache_groups);
-
-	return cache_set_ds;
-}
-
-void 
-cl_insert(cacheline *last_cl, cacheline *new_cl)
-{
-    if (last_cl == NULL) {
-        // Adding the first entry is a special case
-        new_cl->next = new_cl;
-        new_cl->prev = new_cl;
-    } else {
-        new_cl->next = last_cl->next;
-        new_cl->prev = last_cl;
-        last_cl->next->prev = new_cl;
-        last_cl->next = new_cl;
-    }
-}
-
-void *
-remove_cache_set(cache_ctx *ctx, void *ptr)
-{
-	return (void *) (((uintptr_t) ptr) & ~SET_MASK(ctx->sets));
-}
-
-void *
-remove_cache_group_set(void *ptr)
-{
-	return (void *) (((uintptr_t) ptr) & ~SET_MASK(CACHE_GROUP_SIZE));
-}
-
-
-/*
- * Create a randomized doubly linked list with the following structure:
- * set A <--> set B <--> ... <--> set X <--> set A
- * where each set is one of the cache sets, in a random order.
- * The sets are a doubly linked list of cachelines themselves:
- * set A:
- *  line[A + x0 * #sets] <--> line[A + x1 * #sets] <--> ...
- * where x0, x1, ..., xD is a random permutation of 1, 2, ..., D
- * and D = Associativity = | cache set |
- */
-cacheline *build_cache_ds(cache_ctx *ctx, cacheline **cl_ptr_arr) {
-	cacheline **first_cl_in_sets, **last_cl_in_sets;
-	cacheline **cl_ptr_arr_sorted;
-	cacheline *curr_cl;
-	cacheline *cache_ds;
-	uint32_t *idx_per_set;
-	uint32_t idx_curr_set, set_offset;
-	uint32_t i, j, set, set_len;
-	uint32_t *idx_map;
-
- 	idx_per_set = kzalloc(ctx->sets * sizeof(uint32_t), GFP_KERNEL);
-	BUG_ON(idx_per_set == NULL);
-
-	cl_ptr_arr_sorted = kzalloc(ctx->nr_of_cachelines * sizeof(cacheline *), GFP_KERNEL);
-	BUG_ON(cl_ptr_arr_sorted == NULL);
-
-	set_len = ctx->associativity;
-	for (i = 0; i < ctx->nr_of_cachelines; ++i) {
-		set_offset = cl_ptr_arr[i]->cache_set * set_len;
-		idx_curr_set = idx_per_set[cl_ptr_arr[i]->cache_set];
-
-		cl_ptr_arr_sorted[set_offset + idx_curr_set] = cl_ptr_arr[i];
-		idx_per_set[cl_ptr_arr[i]->cache_set] += 1;
-	}
-
-	// Build doubly linked list for every set
-	for (set = 0; set < ctx->sets; ++set) {
-		set_offset = set * set_len;
-		build_randomized_list_for_cache_set(ctx, cl_ptr_arr_sorted + set_offset);
-	}
-
-	// Relink the sets among each other
-	idx_map = kzalloc(ctx->sets * sizeof(uint32_t), GFP_KERNEL);
-	BUG_ON(idx_map == NULL);
-
-	gen_random_indices(idx_map, ctx->sets);
-
-	first_cl_in_sets = kzalloc(ctx->sets * sizeof(cacheline *), GFP_KERNEL);
-	BUG_ON(first_cl_in_sets == NULL);
-
-	last_cl_in_sets  = kzalloc(ctx->sets * sizeof(cacheline *), GFP_KERNEL);
-	BUG_ON(last_cl_in_sets == NULL);
-
-	for (j = 0; j < ctx->nr_of_cachelines; ++j) {
-		curr_cl = cl_ptr_arr_sorted[j];
-		if (IS_FIRST(curr_cl->flags))
-			first_cl_in_sets[curr_cl->cache_set] = curr_cl;
-		if (IS_LAST(curr_cl->flags))
-			last_cl_in_sets[curr_cl->cache_set] = curr_cl;
-	}
-
-	/* connect up sets */
-	for (i = 0; i < ctx->sets; ++i) {
-		last_cl_in_sets[idx_map[i]]->next = first_cl_in_sets[idx_map[(i + 1) % ctx->sets]];
-		first_cl_in_sets[idx_map[(i + 1) % ctx->sets]]->prev = last_cl_in_sets[idx_map[i]];
-	}
-	cache_ds = first_cl_in_sets[idx_map[0]];
-
-	kfree(cl_ptr_arr_sorted);
-	kfree(first_cl_in_sets);
-	kfree(last_cl_in_sets);
-	kfree(idx_per_set);
-	kfree(idx_map);
-
-	return cache_ds;
-}
-
-/*
- * Helper function to build a randomised list of cacheline structs for a set
- */
-void build_randomized_list_for_cache_set(cache_ctx *ctx, cacheline **cacheline_ptr_arr)
-{
-	cacheline *curr_cl;
-	uint32_t len, *idx_map;
-	uint16_t i;
-
-	len = ctx->associativity;
-	idx_map = kzalloc(len * sizeof(uint32_t), GFP_KERNEL);
-	BUG_ON(idx_map == NULL);
-
-	gen_random_indices(idx_map, len);
-
-	for (i = 0; i < len; ++i) {
-		curr_cl = cacheline_ptr_arr[idx_map[i]];
-		curr_cl->next = cacheline_ptr_arr[idx_map[(i + 1) % len]];
-		curr_cl->prev = cacheline_ptr_arr[idx_map[(len - 1 + i) % len]];
-
-		if (idx_map[i] == 0) {
-			curr_cl->flags = SET_FIRST(DEFAULT_FLAGS);
-			curr_cl->prev->flags = SET_LAST(DEFAULT_FLAGS);
-		} else {
-			curr_cl->flags |= DEFAULT_FLAGS;
-		}
-	}
-
-	kfree(idx_map);
-}
-
-/*
- * Allocate a data structure that fills the complete cache, i.e. consisting
- * of `associativity` many cache lines for each cache set.
- */
-cacheline **
-allocate_cache_ds(cache_ctx *ctx)
-{
-	cacheline **cl_ptr_arr, *cl_arr;
-	uint32_t i;
-
-	cl_ptr_arr = kzalloc(ctx->nr_of_cachelines * sizeof(cacheline *), GFP_KERNEL);
-	BUG_ON(cl_ptr_arr == NULL);
-
-	BUG_ON(ctx->addressing != VIRTUAL);
-
-	// For virtual addressing, allocating a consecutive chunk of memory is enough
-	cl_arr = cachepc_aligned_alloc(PAGE_SIZE, ctx->cache_size);
-	BUG_ON(cl_arr == NULL);
-
-	for (i = 0; i < ctx->nr_of_cachelines; ++i) {
-		cl_ptr_arr[i] = cl_arr + i;
-		cl_ptr_arr[i]->cache_set = get_virt_cache_set(ctx, cl_ptr_arr[i]);
-		cl_ptr_arr[i]->cache_line = i / ctx->sets;
-		cl_ptr_arr[i]->count = 0;
-	}
-
-	return cl_ptr_arr;
-}
-
-uint16_t
-get_virt_cache_set(cache_ctx *ctx, void *ptr)
-{
-	return (uint16_t) ((((uintptr_t) ptr) & SET_MASK(ctx->sets)) / CACHELINE_SIZE);
-}
-
-void *
-cachepc_aligned_alloc(size_t alignment, size_t size)
-{
-	void *p;
-
-	if (size % alignment != 0)
-		size = size - (size % alignment) + alignment;
-	p = kzalloc(size, GFP_KERNEL);
-	BUG_ON(((uintptr_t) p) % alignment != 0);
-
-	return p;
-}
-
diff --git a/kmod/cachepc.h b/kmod/cachepc.h
deleted file mode 100644
index 6237eba..0000000
--- a/kmod/cachepc.h
+++ /dev/null
@@ -1,188 +0,0 @@
-#pragma once
-
-#include "asm.h"
-#include "cache_types.h"
-#include "util.h"
-#include "cachepc_user.h"
-
-void cachepc_init_pmc(uint8_t index, uint8_t event_no, uint8_t event_mask);
-
-cache_ctx *cachepc_get_ctx(cache_level cl);
-void cachepc_release_ctx(cache_ctx *ctx);
-
-cacheline *cachepc_prepare_ds(cache_ctx *ctx);
-void cachepc_release_ds(cache_ctx *ctx, cacheline *ds);
-
-cacheline *cachepc_prepare_victim(cache_ctx *ctx, uint32_t set);
-void cachepc_release_victim(cache_ctx *ctx, cacheline *ptr);
-
-void *cachepc_aligned_alloc(size_t alignment, size_t size);
-
-void cachepc_save_msrmts(cacheline *head);
-void cachepc_print_msrmts(cacheline *head);
-
-void cachepc_prime_vcall(uintptr_t ret, cacheline *cl);
-void cachepc_probe_vcall(uintptr_t ret, cacheline *cl);
-
-__attribute__((always_inline))
-static inline cacheline *cachepc_prime(cacheline *head);
-
-__attribute__((always_inline))
-static inline cacheline *cachepc_prime_rev(cacheline *head);
-
-__attribute__((always_inline))
-static inline cacheline *cachepc_probe(cacheline *head);
-
-__attribute__((always_inline))
-static inline void cachepc_victim(void *p);
-
-__attribute__((always_inline))
-static inline uint64_t cachepc_read_pmc(uint64_t event);
-
-extern uint16_t *cachepc_msrmts;
-extern size_t cachepc_msrmts_count;
-
-extern cache_ctx *cachepc_ctx;
-extern cacheline *cachepc_ds;
-
-extern uint64_t cachepc_regs_tmp[16];
-extern uint64_t cachepc_regs_vm[16];
-
-/*
- * Prime phase: fill the target cache (encoded in the size of the data structure)
- * with the prepared data structure, i.e. with attacker data.
- */
-cacheline *
-cachepc_prime(cacheline *head)
-{
-	cacheline *curr_cl, *prev_cl;
-
-	cachepc_mfence();
-	cachepc_cpuid();
-	
-	curr_cl = head;
-	do {
-		prev_cl = curr_cl;
-		curr_cl = curr_cl->next;
-	} while (curr_cl != head);
-
-	cachepc_mfence();
-	cachepc_cpuid();
-
-	return prev_cl;
-}
-
-/*
- * Same as prime, but in the reverse direction, i.e. the same direction that probe
- * uses. This is beneficial for the following scenarios:
- *     - L1:
- *         - Trigger collision chain-reaction to amplify an evicted set (but this has
- *           the downside of more noisy measurements).
- *     - L2:
- *         - Always use this for L2, otherwise the first cache sets will still reside
- *           in L1 unless the victim filled L1 completely. In this case, an eviction
- *           has randomly (depending on where the cache set is placed in the randomised
- *           data structure) the following effect:
- *             A) An evicted set is L2_ACCESS_TIME - L1_ACCESS_TIME slower
- *             B) An evicted set is L3_ACCESS_TIME - L2_ACCESS_TIME slower
- */
-cacheline *
-cachepc_prime_rev(cacheline *head)
-{
-	cacheline *curr_cl;
-
-	cachepc_mfence();
-	cachepc_cpuid();
-
-	curr_cl = head;
-	do {
-		curr_cl = curr_cl->prev;
-	} while(curr_cl != head);
-
-	cachepc_mfence();
-	cachepc_cpuid();
-
-	return curr_cl->prev;
-}
-
-cacheline *
-cachepc_probe(cacheline *start_cl)
-{
-	uint64_t pre, post;
-	cacheline *next_cl;
-	cacheline *curr_cl;
-
-	cachepc_mfence();
-	cachepc_cpuid();
-
-	curr_cl = start_cl;
-
-	do {
-		pre = cachepc_read_pmc(0);
-
-		asm volatile(
-			"mov 8(%[curr_cl]), %%rax \n\t"              // +8
-			"mov 8(%%rax), %%rcx \n\t"                   // +16
-			"mov 8(%%rcx), %%rax \n\t"                   // +24
-			"mov 8(%%rax), %%rcx \n\t"                   // +32
-			"mov 8(%%rcx), %%rax \n\t"                   // +40
-			"mov 8(%%rax), %%rcx \n\t"                   // +48
-			"mov 8(%%rcx), %[curr_cl_out] \n\t"          // +56
-			"mov 8(%[curr_cl_out]), %[next_cl_out] \n\t" // +64
-			: [next_cl_out] "=r" (next_cl),
-			  [curr_cl_out] "=r" (curr_cl)
-			: [curr_cl] "r" (curr_cl)
-			: "rax", "rcx"
-		);
-
-		post = cachepc_read_pmc(0);
-
-		/* works across size boundary */
-		curr_cl->count = post - pre;
-
-		curr_cl = next_cl;
-	} while (__builtin_expect(curr_cl != start_cl, 1));
-
-	next_cl = curr_cl->next;
-
-	cachepc_mfence();
-	cachepc_cpuid();
-
-	return next_cl;
-}
-
-void
-cachepc_victim(void *p)
-{
-	cachepc_mfence();
-	cachepc_cpuid();
-
-	cachepc_readq(p);
-
-	cachepc_mfence();
-	cachepc_cpuid();
-}
-
-uint64_t
-cachepc_read_pmc(uint64_t event)
-{
-	uint32_t lo, hi;
-	uint64_t res;
-
-	cachepc_mfence();
-	cachepc_cpuid();
-
-	event = 0xC0010201 + 2 * event;
-
-	asm volatile (
-		"rdmsr"
-		: "=a" (lo), "=d" (hi)
-		: "c"(event)
-	);
-	res = ((uint64_t) hi << 32) | (uint64_t) lo;
-
-	cachepc_mfence();
-	cachepc_cpuid();
-
-	return res;
-}
diff --git a/kmod/cachepc_user.h b/kmod/cachepc_user.h
deleted file mode 100644
index f815839..0000000
--- a/kmod/cachepc_user.h
+++ /dev/null
@@ -1,8 +0,0 @@
-#pragma once
-
-#include <linux/ioctl.h>
-
-#define CACHEPC_IOCTL_MAGIC 0xBF
-#define CACHEPC_IOCTL_TEST_ACCESS _IOWR(CACHEPC_IOCTL_MAGIC, 0, uint32_t)
-#define CACHEPC_IOCTL_TEST_EVICTION _IOWR(CACHEPC_IOCTL_MAGIC, 1, uint32_t)
-#define CACHEPC_IOCTL_INIT_PMC _IOW(CACHEPC_IOCTL_MAGIC, 2, uint32_t)
diff --git a/kmod/device_conf.h b/kmod/device_conf.h
deleted file mode 100644
index e24d681..0000000
--- a/kmod/device_conf.h
+++ /dev/null
@@ -1,29 +0,0 @@
-#pragma once
-
-// TODO: Read from kernel headers
-
-// General settings
-// #define PAGE_SIZE 4096
-#define PROCESSOR_FREQ 2900000000
-
-// Cache related settings
-#define CACHELINE_SIZE 64
-#define CACHE_GROUP_SIZE (PAGE_SIZE / CACHELINE_SIZE)
-
-// Addressing:
-// - virtual:   0
-// - physical:  1
-#define L1_ADDRESSING 0
-#define L1_SETS 64
-#define L1_ASSOCIATIVITY 8
-#define L1_ACCESS_TIME 4
-
-#define L2_ADDRESSING 1
-#define L2_SETS 512
-#define L2_ASSOCIATIVITY 8
-#define L2_ACCESS_TIME 12
-
-#define L3_ADDRESSING 1
-#define L3_SETS 4096
-#define L3_ASSOCIATIVITY 16
-#define L3_ACCESS_TIME 30
diff --git a/kmod/kvm.c b/kmod/kvm.c
deleted file mode 100644
index 4deb4fa..0000000
--- a/kmod/kvm.c
+++ /dev/null
@@ -1,392 +0,0 @@
-#include "kvm.h"
-
-#include <linux/module.h>
-#include <linux/slab.h>
-#include <linux/proc_fs.h>
-#include <linux/init.h>
-#include <asm/uaccess.h>
-
-struct proc_ops cachepc_proc_ops;
-
-uint16_t *cachepc_msrmts;
-size_t cachepc_msrmts_count;
-EXPORT_SYMBOL(cachepc_msrmts);
-EXPORT_SYMBOL(cachepc_msrmts_count);
-
-cache_ctx *cachepc_ctx;
-cacheline *cachepc_ds;
-EXPORT_SYMBOL(cachepc_ctx);
-EXPORT_SYMBOL(cachepc_ds);
-
-uint64_t cachepc_regs_tmp[16];
-uint64_t cachepc_regs_vm[16];
-EXPORT_SYMBOL(cachepc_regs_tmp);
-EXPORT_SYMBOL(cachepc_regs_vm);
-
-int
-cachepc_kvm_proc_open(struct inode *inode, struct file *file)
-{
-	try_module_get(THIS_MODULE);
-
-	return 0;
-}
-
-int
-cachepc_kvm_proc_close(struct inode *inode, struct file *file)
-{
-	module_put(THIS_MODULE);
-
-	return 0;
-}
-
-ssize_t
-cachepc_kvm_proc_read(struct file *file, char *buf, size_t buflen, loff_t *off)
-{
-	size_t len, left;
-	size_t size;
-
-	printk(KERN_WARNING "CachePC: Reading entries (%lu:%lli)\n",
-		buflen, off ? *off : 0);
-
-	size = cachepc_msrmts_count * sizeof(uint16_t);
-	if (!off || *off >= size || *off < 0)
-		return 0;
-
-	len = size - *off;
-	if (len > buflen) len = buflen;
-
-	left = copy_to_user(buf, (uint8_t *) cachepc_msrmts + *off, len);
-
-	len -= left;
-	*off += len;
-
-	return len;
-}
-
-ssize_t
-cachepc_kvm_proc_write(struct file *file, const char *buf, size_t buflen, loff_t *off)
-{
-	return 0;
-}
-
-loff_t
-cachepc_kvm_proc_lseek(struct file *file, loff_t off, int mode)
-{
-	switch (mode) {
-	case SEEK_SET:
-		file->f_pos = off;
-		break;
-	case SEEK_CUR:
-		file->f_pos += off;
-		break;
-	case SEEK_END:
-		file->f_pos = cachepc_msrmts_count * sizeof(uint16_t) + off;
-		break;
-	default:
-		return -EINVAL;
-	}
-
-	return file->f_pos;
-}
-
-void
-cachepc_kvm_prime_probe_test(void *p)
-{
-	cacheline *lines;
-	cacheline *cl, *head;
-	uint32_t count;
-	uint32_t *arg;
-	int i, max;
-
-	arg = p;
-
-	/* l2 data cache, hit or miss */
-	cachepc_init_pmc(0, 0x64, 0xD8);
-
-	lines = cachepc_aligned_alloc(PAGE_SIZE, cachepc_ctx->cache_size);
-	BUG_ON(lines == NULL);
-
-	max = cachepc_ctx->nr_of_cachelines;
-
-	cachepc_cpuid();
-	cachepc_mfence();
-
-	for (i = 0; i < max; i++)
-		asm volatile ("mov (%0), %%rbx" : : "r"(lines + i) : "rbx");
-
-	head = cachepc_prime(cachepc_ds);
-	cachepc_probe(head);
-
-	count = 0;
-	cl = head = cachepc_ds;
-	do {
-		count += cl->count;
-		cl = cl->next;
-	} while (cl != head);
-
-	printk(KERN_WARNING "CachePC: Prime-probe test done (%u vs. %u => %s)\n",
-		count, 0, (count == 0) ? "passed" : "failed");
-
-	if (arg) *arg = (count == 0);
-
-	kfree(lines);
-}
-
-void
-cachepc_kvm_stream_hwpf_test(void *p)
-{
-	cacheline *lines;
-	uint32_t count;
-	uint32_t *arg;
-	uint32_t i, max;
-
-	arg = p;
-
-	/* TODO: accurately detect hwpf */
-
-	/* l2 data cache, hit or miss */
-	cachepc_init_pmc(0, 0x64, 0xD8);
-
-	lines = cachepc_aligned_alloc(PAGE_SIZE, cachepc_ctx->cache_size);
-	BUG_ON(lines == NULL);
-
-	max = cachepc_ctx->nr_of_cachelines;
-
-	cachepc_prime(cachepc_ds);
-
-	count -= cachepc_read_pmc(0);
-	for (i = 0; i < max; i++)
-		asm volatile ("mov (%0), %%rbx" : : "r"(lines + i) : "rbx");
-	count += cachepc_read_pmc(0);
-
-	printk(KERN_WARNING "CachePC: HWPF test done (%u vs. %u => %s)\n",
-		count, max, (count == max) ? "passed" : "failed");
-
-	if (arg) *arg = (count == max);
-
-	kfree(lines);
-}
-
-void
-cachepc_kvm_single_access_test(void *p)
-{
-	cacheline *ptr;
-	uint64_t pre, post;
-	uint32_t *arg;
-
-	/* l2 data cache, hit or miss */
-	cachepc_init_pmc(0, 0x64, 0xD8);
-
-	arg = p;
-	
-	WARN_ON(arg && *arg >= L1_SETS);
-	if (arg && *arg >= L1_SETS) return;	
-	ptr = cachepc_prepare_victim(cachepc_ctx, arg ? *arg : 48);
-
-	cachepc_prime(cachepc_ds);
-
-	pre = cachepc_read_pmc(0);
-	cachepc_victim(ptr);
-	post = cachepc_read_pmc(0);
-
-	printk(KERN_WARNING "CachePC: Single access test done (%llu vs %u => %s)",
-		post - pre, 1, (post - pre == 1) ? "passed" : "failed");
-
-	if (arg) *arg = post - pre;
-
-	cachepc_release_victim(cachepc_ctx, ptr);
-}
-
-void
-cachepc_kvm_single_eviction_test(void *p)
-{
-	cacheline *head, *cl, *evicted;
-        cacheline *ptr;
-	uint32_t target;
-	uint32_t *arg;
-	int count;
-
-	arg = p;
-
-	/* l2 data cache, hit or miss */
-	cachepc_init_pmc(0, 0x64, 0xD8);
-
-	WARN_ON(arg && *arg >= L1_SETS);
-	if (arg && *arg >= L1_SETS) return;	
-	target = arg ? *arg : 48;
-
-	ptr = cachepc_prepare_victim(cachepc_ctx, target);
-
-	head = cachepc_prime(cachepc_ds);
-	cachepc_victim(ptr);
-	cachepc_probe(head);
-
-	count = 0;
-	evicted = NULL;
-	cl = head = cachepc_ds;
-	do {
-		if (IS_FIRST(cl->flags) && cl->count > 0) {
-			evicted = cl;
-			count += cl->count;
-		}
-		cl = cl->next;
-	} while (cl != head);
-
-	printk(KERN_WARNING "CachePC: Single eviction test done (%u vs %u => %s)\n",
-		count, 1, (count == 1 && evicted->cache_set == target) ? "passed" : "failed");
-	cachepc_save_msrmts(head);
-
-	if (arg) *arg = count;
-
-	cachepc_release_victim(cachepc_ctx, ptr);
-}
-
-void
-cachepc_kvm_system_setup(void)
-{
-	uint64_t reg_addr, val;
-	uint32_t lo, hi;
-
-	/* disable streaming store */
-	reg_addr = 0xc0011020;
-	asm volatile ("rdmsr" : "=a"(lo), "=d"(hi) : "c"(reg_addr));
-	val = (uint64_t) lo | ((uint64_t) hi << 32);
-	val |= 1 << 13;
-	asm volatile ("wrmsr" : : "c"(reg_addr), "a"(val), "d"(0x00));
-	printk("CachePC: Writing MSR %08llX: %016llX\n", reg_addr, val);
-
-	/* disable speculative data cache tlb reloads */
-	reg_addr = 0xc0011022;
-	asm volatile ("rdmsr" : "=a"(lo), "=d"(hi) : "c"(reg_addr));
-	val = (uint64_t) lo | ((uint64_t) hi << 32);
-	val |= 1 << 4;
-	asm volatile ("wrmsr" : : "c"(reg_addr), "a"(val), "d"(0x00));
-	printk("CachePC: Writing MSR %08llX: %016llX\n", reg_addr, val);
-
-	/* disable data cache hardware prefetcher */
-	reg_addr = 0xc0011022;
-	asm volatile ("rdmsr" : "=a"(lo), "=d"(hi) : "c"(reg_addr));
-	val = (uint64_t) lo | ((uint64_t) hi << 32);
-	val |= 1 << 13;
-	asm volatile ("wrmsr" : : "c"(reg_addr), "a"(val), "d"(0x00));
-	printk("CachePC: Writing MSR %08llX: %016llX\n", reg_addr, val);
-}
-
-void
-cachepc_kvm_init_pmc_ioctl(void *p)
-{
-	uint32_t event;
-	uint8_t index, event_no, event_mask;
-
-	WARN_ON(p == NULL);
-	if (!p) return;
-
-	event = *(uint32_t *)p;
-
-	index = (event & 0xFF000000) >> 24;
-	event_no = (event & 0x0000FF00) >> 8;
-	event_mask = (event & 0x000000FF) >> 0;
-
-	cachepc_init_pmc(index, event_no, event_mask);
-}
-
-long
-cachepc_kvm_ioctl(struct file *file, unsigned int cmd, unsigned long argp)
-{
-	void __user *arg_user;
-	uint32_t u32;
-	int ret;
-
-	arg_user = (void __user *)argp;
-	switch (cmd) {
-	case CACHEPC_IOCTL_TEST_ACCESS:
-		printk(KERN_WARNING "CachePC: Called ioctl access test\n");
-		if (!arg_user) return -EINVAL;
-		if (copy_from_user(&u32, arg_user, sizeof(uint32_t)))
-			return -EFAULT;
-		ret = smp_call_function_single(2,
-			cachepc_kvm_single_access_test, &u32, true);
-		WARN_ON(ret != 0);
-		if (copy_to_user(arg_user, &u32, sizeof(uint32_t)))
-			return -EFAULT;
-		break;
-	case CACHEPC_IOCTL_TEST_EVICTION:
-		printk(KERN_WARNING "CachePC: Called ioctl eviction test\n");
-		if (!arg_user) return -EINVAL;
-		if (copy_from_user(&u32, arg_user, sizeof(uint32_t)))
-			return -EFAULT;
-		ret = smp_call_function_single(2,
-			cachepc_kvm_single_eviction_test, &u32, true);
-		WARN_ON(ret != 0);
-		if (copy_to_user(arg_user, &u32, sizeof(uint32_t)))
-			return -EFAULT;
-		break;
-	case CACHEPC_IOCTL_INIT_PMC:
-		printk(KERN_WARNING "CachePC: Called ioctl init counter\n");
-		if (!arg_user) return -EINVAL;
-		if (copy_from_user(&u32, arg_user, sizeof(uint32_t)))
-			return -EFAULT;
-		ret = smp_call_function_single(2,
-			cachepc_kvm_init_pmc_ioctl, &u32, true);
-		WARN_ON(ret != 0);
-		break;
-	default:
-		return -EINVAL;
-	}
-
-	return 0;
-}
-
-void
-cachepc_kvm_setup_test(void *p)
-{
-	int cpu;
-
-	cpu = get_cpu();
-
-	printk(KERN_WARNING "CachePC: Running on core %i\n", cpu);
-
-	cachepc_ctx = cachepc_get_ctx(L1);
-	cachepc_ds = cachepc_prepare_ds(cachepc_ctx);
-
-	cachepc_kvm_system_setup();
-
-	cachepc_kvm_prime_probe_test(NULL);
-	cachepc_kvm_single_access_test(NULL);
-	cachepc_kvm_single_eviction_test(NULL);
-	cachepc_kvm_stream_hwpf_test(NULL);
-
-	put_cpu();
-}
-
-void
-cachepc_kvm_init(void)
-{
-	int ret;
-
-	cachepc_msrmts_count = L1_SETS;
-	cachepc_msrmts = kzalloc(cachepc_msrmts_count * sizeof(uint16_t), GFP_KERNEL);
-	BUG_ON(cachepc_msrmts == NULL);
-
-	ret = smp_call_function_single(2, cachepc_kvm_setup_test, NULL, true);
-	WARN_ON(ret != 0);
-
-	memset(&cachepc_proc_ops, 0, sizeof(cachepc_proc_ops));
-	cachepc_proc_ops.proc_open = cachepc_kvm_proc_open;
-	cachepc_proc_ops.proc_read = cachepc_kvm_proc_read;
-	cachepc_proc_ops.proc_write = cachepc_kvm_proc_write;
-	cachepc_proc_ops.proc_lseek = cachepc_kvm_proc_lseek;
-	cachepc_proc_ops.proc_release = cachepc_kvm_proc_close;
-	cachepc_proc_ops.proc_ioctl = cachepc_kvm_ioctl;
-	proc_create("cachepc", 0644, NULL, &cachepc_proc_ops);
-}
-
-void
-cachepc_kvm_exit(void)
-{
-	remove_proc_entry("cachepc", NULL);
-	kfree(cachepc_msrmts);
-
-	cachepc_release_ds(cachepc_ctx, cachepc_ds);
-	cachepc_release_ctx(cachepc_ctx);
-}
diff --git a/kmod/kvm.h b/kmod/kvm.h
deleted file mode 100644
index a44491e..0000000
--- a/kmod/kvm.h
+++ /dev/null
@@ -1,6 +0,0 @@
-#pragma once
-
-#include "cachepc.h"
-
-void cachepc_kvm_init(void);
-void cachepc_kvm_exit(void);
diff --git a/kmod/util.c b/kmod/util.c
deleted file mode 100644
index abf2b71..0000000
--- a/kmod/util.c
+++ /dev/null
@@ -1,38 +0,0 @@
-#include "util.h"
-
-void
-random_perm(uint32_t *arr, uint32_t arr_len)
-{
-	uint32_t i;
-
-	/* no special ordering needed when prefetcher is disabled */
-	for (i = 0; i < arr_len; i++)
-		arr[i] = i;
-
-	// /* prevent stream prefetching by alternating access direction */
-	// mid = arr_len / 2;
-	// for (i = 0; i < arr_len; i++)
-	// 	arr[i] = mid + (i % 2 ? -1 : 1) * ((i + 1) / 2);
-}
-
-void
-gen_random_indices(uint32_t *arr, uint32_t arr_len)
-{
-	uint32_t i;
-
-	for (i = 0; i < arr_len; ++i)
-		arr[i] = i;
-	random_perm(arr, arr_len);
-}
-
-
-bool is_in_arr(uint32_t elem, uint32_t *arr, uint32_t arr_len) {
-	uint32_t i;
-
-	for (i = 0; i < arr_len; ++i) {
-		if (arr[i] == elem)
-			return true;
-	}
-
-	return false;
-}
diff --git a/kmod/util.h b/kmod/util.h
deleted file mode 100644
index a0ff8be..0000000
--- a/kmod/util.h
+++ /dev/null
@@ -1,8 +0,0 @@
-#pragma once 
-
-#include <linux/kernel.h>
-
-void random_perm(uint32_t *arr, uint32_t arr_len);
-void gen_random_indices(uint32_t *arr, uint32_t arr_len);
-
-bool is_in_arr(uint32_t elem, uint32_t *arr, uint32_t arr_len);
diff --git a/patch.diff b/patch.diff
index b6d69ce..fa112c0 100755
--- a/patch.diff
+++ b/patch.diff
@@ -1,7 +1,5 @@
 diff --git a/arch/x86/include/asm/kvm_page_track.h b/arch/x86/include/asm/kvm_page_track.h
-old mode 100644
-new mode 100755
-index eb186bc57f6a..cefc1589e398
+index eb186bc57f6a..cefc1589e398 100644
 --- a/arch/x86/include/asm/kvm_page_track.h
 +++ b/arch/x86/include/asm/kvm_page_track.h
 @@ -2,8 +2,14 @@
@@ -19,267 +17,8 @@ index eb186bc57f6a..cefc1589e398
  	KVM_PAGE_TRACK_MAX,
  };
  
-diff --git a/arch/x86/include/asm/sev-step.c b/arch/x86/include/asm/sev-step.c
-new file mode 100755
-index 000000000000..489583f33342
---- /dev/null
-+++ b/arch/x86/include/asm/sev-step.c
-@@ -0,0 +1,250 @@
-+
-+#include <linux/sev-step.h>
-+#include <linux/smp.h>
-+#include <linux/vmalloc.h>
-+#include <linux/slab.h>
-+#include <linux/sched.h>
-+
-+#include "kvm_cache_regs.h"
-+#include "svm/svm.h"
-+
-+
-+
-+struct kvm* main_vm;
-+EXPORT_SYMBOL(main_vm);
-+
-+//used to store performance counter values; 6 counters, 2 readings per counter
-+uint64_t perf_reads[6][2];
-+perf_ctl_config_t perf_configs[6];
-+int perf_cpu;
-+
-+
-+uint64_t perf_ctl_to_u64(perf_ctl_config_t * config) {
-+
-+	uint64_t result = 0;
-+	result |= (  config->EventSelect & 0xffULL); //[7:0] in result and  [7:0] in EventSelect
-+	result |= ( (config->UintMask & 0xffULL) << 8 ); //[15:8]
-+	result |= ( (config->OsUserMode & 0x3ULL) << 16); //[17:16]
-+	result |= ( (config->Edge & 0x1ULL ) << 18 ); // 18
-+	result |= ( (config->Int & 0x1ULL ) << 20 ); // 20
-+	result |= ( (config->En & 0x1ULL ) << 22 ); //22
-+	result |= ( (config->Inv & 0x1ULL ) << 23); //23
-+	result |= ( (config->CntMask & 0xffULL) << 24); //[31:24]
-+	result |= ( ( (config->EventSelect & 0xf00ULL) >> 8 ) << 32); //[35:32] in result and [11:8] in EventSelect
-+	result |= ( (config->HostGuestOnly & 0x3ULL) << 40); // [41:40]
-+
-+	return result;
-+
-+}
-+
-+void write_ctl(perf_ctl_config_t * config, int cpu, uint64_t ctl_msr){
-+	wrmsrl_on_cpu(cpu, ctl_msr, perf_ctl_to_u64(config)); //always returns zero
-+}
-+
-+void read_ctr(uint64_t ctr_msr, int cpu, uint64_t* result) {
-+    uint64_t tmp;
-+	rdmsrl_on_cpu(cpu, ctr_msr, &tmp); //always returns zero
-+	*result = tmp & ( (0x1ULL << 48) - 1);
-+}
-+
-+void setup_perfs() {
-+    int i;
-+
-+    perf_cpu = smp_processor_id();
-+
-+    for( i = 0; i < 6; i++) {
-+        perf_configs[i].HostGuestOnly = 0x1; //0x1 means: count only guest
-+        perf_configs[i].CntMask = 0x0;
-+        perf_configs[i].Inv = 0x0;
-+        perf_configs[i].En = 0x0;
-+        perf_configs[i].Int = 0x0;
-+        perf_configs[i].Edge = 0x0;
-+        perf_configs[i].OsUserMode = 0x3; //0x3 means: count userland and kernel events
-+    }
-+
-+    //remember to set .En to enable the individual counter
-+
-+    perf_configs[0].EventSelect = 0x0c0;
-+	perf_configs[0].UintMask = 0x0;
-+    perf_configs[0].En = 0x1;
-+	write_ctl(&perf_configs[0],perf_cpu, CTL_MSR_0);
-+
-+    /*programm l2d hit from data cache miss perf for
-+    cpu_probe_pointer_chasing_inplace without counting thread.
-+    N.B. that this time we count host events
-+    */
-+    perf_configs[1].EventSelect = 0x064;
-+    perf_configs[1].UintMask = 0x70;
-+    perf_configs[1].En = 0x1;
-+    perf_configs[1].HostGuestOnly = 0x2; //0x2 means: count only host events, as we do the chase here
-+    write_ctl(&perf_configs[1],perf_cpu,CTL_MSR_1);
-+}
-+EXPORT_SYMBOL(setup_perfs);
-+
-+
-+/*
-+static int __my_sev_issue_dbg_cmd(struct kvm *kvm, unsigned long src,
-+			       unsigned long dst, int size,
-+			       int *error);
-+
-+int my_sev_decrypt(struct kvm* kvm, void* dst_vaddr, void* src_vaddr, uint64_t dst_paddr, uint64_t src_paddr, uint64_t len, int* api_res) {
-+
-+	int call_res;
-+	call_res  = 0x1337;
-+	*api_res = 0x1337;
-+
-+
-+	if( dst_paddr % PAGE_SIZE != 0 || src_paddr % PAGE_SIZE != 0) {
-+		printk("decrypt: for now, src_paddr, and dst_paddr must be page aligned");
-+		return -1;
-+	}
-+
-+	if( len > PAGE_SIZE ) {
-+		printk("decrypt: for now, can be at most 4096 byte");
-+		return -1;
-+	}
-+
-+	memset(dst_vaddr,0,PAGE_SIZE);
-+
-+	//clflush_cache_range(src_vaddr, PAGE_SIZE);
-+	//clflush_cache_range(dst_vaddr, PAGE_SIZE);
-+	wbinvd_on_all_cpus();
-+
-+	call_res = __my_sev_issue_dbg_cmd(kvm, __sme_set(src_paddr),
-+		__sme_set(dst_paddr), len, api_res);
-+
-+	return call_res;
-+
-+}
-+EXPORT_SYMBOL(my_sev_decrypt);
-+
-+static int __my_sev_issue_dbg_cmd(struct kvm *kvm, unsigned long src,
-+			       unsigned long dst, int size,
-+			       int *error)
-+{
-+	struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
-+	struct sev_data_dbg *data;
-+	int ret;
-+
-+	data = kzalloc(sizeof(*data), GFP_KERNEL_ACCOUNT);
-+	if (!data)
-+		return -ENOMEM;
-+
-+	data->handle = sev->handle;
-+	data->dst_addr = dst;
-+	data->src_addr = src;
-+	data->len = size;
-+
-+	//ret = sev_issue_cmd(kvm,
-+	//		     SEV_CMD_DBG_DECRYPT,
-+	//		    data, error);
-+	ret = sev_do_cmd(SEV_CMD_DBG_DECRYPT, data, error);
-+	kfree(data);
-+	return ret;
-+}
-+
-+int decrypt_vmsa(struct vcpu_svm* svm, struct vmcb_save_area* save_area) {
-+
-+	uint64_t src_paddr, dst_paddr;
-+	void * dst_vaddr;
-+	void * src_vaddr;
-+	struct page * dst_page;
-+	int call_res,api_res;
-+	call_res = 1337;
-+	api_res = 1337;
-+
-+	src_vaddr = svm->vmsa;
-+	src_paddr = svm->vmcb->control.vmsa_pa;
-+
-+	if( src_paddr % 16 != 0) {
-+		printk("decrypt_vmsa: src_paddr was not 16b aligned");
-+	}
-+
-+	if( sizeof( struct vmcb_save_area) % 16 != 0 ) {
-+		printk("decrypt_vmsa: size of vmcb_save_area is not 16 b aligned\n");
-+	}
-+
-+	dst_page = alloc_page(GFP_KERNEL);
-+	dst_vaddr =  vmap(&dst_page, 1, 0, PAGE_KERNEL);
-+	dst_paddr = page_to_pfn(dst_page) << PAGE_SHIFT;
-+	memset(dst_vaddr,0,PAGE_SIZE);
-+
-+
-+
-+	if( dst_paddr % 16 != 0 ) {
-+		printk("decrypt_vmsa: dst_paddr was not 16 byte aligned");
-+	}
-+
-+	//printk("src_paddr = 0x%llx dst_paddr = 0x%llx\n", __sme_clr(src_paddr), __sme_clr(dst_paddr));
-+	//printk("Sizeof vmcb_save_area is: 0x%lx\n", sizeof( struct vmcb_save_area) );
-+
-+
-+	call_res = __my_sev_issue_dbg_cmd(svm->vcpu.kvm, __sme_set(src_paddr), __sme_set(dst_paddr), sizeof(struct vmcb_save_area), &api_res);
-+
-+
-+	//printk("decrypt_vmsa: result of call was %d, result of api command was %d\n",call_res, api_res);
-+
-+	//todo error handling
-+	if( api_res != 0 ) {
-+		__free_page(dst_page);
-+		return -1;
-+	}
-+
-+	memcpy(save_area, dst_vaddr, sizeof( struct vmcb_save_area) );
-+
-+
-+	__free_page(dst_page);
-+
-+	return 0;
-+
-+
-+}
-+
-+
-+//
-+// Contains a switch to work  SEV and SEV-ES
-+ //
-+uint64_t sev_step_get_rip(struct vcpu_svm* svm) {
-+	struct vmcb_save_area* save_area;
-+	struct kvm * kvm;
-+	struct kvm_sev_info *sev;
-+	uint64_t rip;
-+
-+
-+	kvm = svm->vcpu.kvm;
-+	sev = &to_kvm_svm(kvm)->sev_info;
-+
-+	//for sev-es we need to use the debug api, to decrypt the vmsa
-+	if( sev->active && sev->es_active) {
-+		int res;
-+		save_area = vmalloc(sizeof(struct vmcb_save_area) );
-+		memset(save_area,0, sizeof(struct vmcb_save_area));
-+
-+		res = decrypt_vmsa(svm, save_area);
-+		if( res != 0) {
-+			printk("sev_step_get_rip failed to decrypt\n");
-+			return 0;
-+		}
-+
-+		rip =  save_area->rip;
-+
-+		vfree(save_area);
-+	} else { //otherwise we can just access as plaintexts
-+		rip = svm->vmcb->save.rip;
-+	}
-+	return rip;
-+
-+}
-+EXPORT_SYMBOL(sev_step_get_rip);
-+*/
-+
-+int sev_step_get_rip_kvm_vcpu(struct kvm_vcpu* vcpu,uint64_t *rip) {
-+	/*
-+	struct vcpu_svm *svm = container_of(vcpu, struct vcpu_svm, vcpu);
-+	if( svm == NULL ) {
-+		return 1;
-+	}
-+	(*rip) = sev_step_get_rip(svm);
-+	*/
-+	return 0;
-+}
-\ No newline at end of file
 diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile
-old mode 100644
-new mode 100755
-index 30f244b64523..6d4a2a6530b6
+index 30f244b64523..7992f8cce838 100644
 --- a/arch/x86/kvm/Makefile
 +++ b/arch/x86/kvm/Makefile
 @@ -1,8 +1,10 @@
@@ -294,18 +33,19 @@ index 30f244b64523..6d4a2a6530b6
  ifeq ($(CONFIG_FRAME_POINTER),y)
  OBJECT_FILES_NON_STANDARD_vmenter.o := y
  endif
-@@ -11,8 +13,8 @@ include $(srctree)/virt/kvm/Makefile.kvm
+@@ -11,8 +13,9 @@ include $(srctree)/virt/kvm/Makefile.kvm
  
  kvm-y			+= x86.o emulate.o i8259.o irq.o lapic.o \
  			   i8254.o ioapic.o irq_comm.o cpuid.o pmu.o mtrr.o \
 -			   hyperv.o debugfs.o mmu/mmu.o mmu/page_track.o \
 -			   mmu/spte.o
 +			   hyperv.o debugfs.o mmu/mmu.o mmu/page_track.o mmu/spte.o \
-+			   sev-step.o userspace_page_track_signals.o svm/cachepc/cachepc.o svm/cachepc/util.o svm/cachepc/kvm.o
++			   svm/cachepc/cachepc.o svm/cachepc/util.o svm/cachepc/kvm.o \
++			   sevstep/sevstep.o sevstep/uspt.o sevstep/kvm.o
  
  ifdef CONFIG_HYPERV
  kvm-y			+= kvm_onhyperv.o
-@@ -25,7 +27,8 @@ kvm-intel-y		+= vmx/vmx.o vmx/vmenter.o vmx/pmu_intel.o vmx/vmcs12.o \
+@@ -25,7 +28,8 @@ kvm-intel-y		+= vmx/vmx.o vmx/vmenter.o vmx/pmu_intel.o vmx/vmcs12.o \
  			   vmx/evmcs.o vmx/nested.o vmx/posted_intr.o
  kvm-intel-$(CONFIG_X86_SGX_KVM)	+= vmx/sgx.o
  
@@ -316,529 +56,150 @@ index 30f244b64523..6d4a2a6530b6
  ifdef CONFIG_HYPERV
  kvm-amd-y		+= svm/svm_onhyperv.o
 diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
-old mode 100644
-new mode 100755
-index d871b8dee7b3..b6e1dc265cac
+index d871b8dee7b3..32900ef5ee0b 100644
 --- a/arch/x86/kvm/mmu/mmu.c
 +++ b/arch/x86/kvm/mmu/mmu.c
-@@ -56,6 +56,9 @@
- 
- #include "paging.h"
- 
-+#include <linux/sev-step.h>
-+#include <linux/userspace_page_track_signals.h>
-+
- extern bool itlb_multihit_kvm_mitigation;
- 
- int __read_mostly nx_huge_pages = -1;
-@@ -1152,8 +1155,8 @@ static void drop_large_spte(struct kvm_vcpu *vcpu, u64 *sptep)
+@@ -1152,6 +1152,8 @@ static void drop_large_spte(struct kvm_vcpu *vcpu, u64 *sptep)
  	}
  }
  
--/*
-- * Write-protect on the specified @sptep, @pt_protect indicates whether
-+/* Apply the protection mode specified in @mode to the specified @sptep,
-+ * @pt_protect indicates whether
++#include "../sevstep/mmu.c"
++
+ /*
+  * Write-protect on the specified @sptep, @pt_protect indicates whether
   * spte write-protection is caused by protecting shadow page table.
-  *
-  * Note: write protection is difference between dirty logging and spte
-@@ -1165,9 +1168,10 @@ static void drop_large_spte(struct kvm_vcpu *vcpu, u64 *sptep)
+@@ -1165,34 +1167,15 @@ static void drop_large_spte(struct kvm_vcpu *vcpu, u64 *sptep)
   *
   * Return true if tlb need be flushed.
   */
 -static bool spte_write_protect(u64 *sptep, bool pt_protect)
-+static bool spte_protect(u64 *sptep, bool pt_protect, enum kvm_page_track_mode mode)
- {
- 	u64 spte = *sptep;
-+	bool shouldFlush = false;
- 
- 	if (!is_writable_pte(spte) &&
- 	    !(pt_protect && is_mmu_writable_spte(spte)))
-@@ -1175,22 +1179,45 @@ static bool spte_write_protect(u64 *sptep, bool pt_protect)
- 
- 	rmap_printk("spte %p %llx\n", sptep, *sptep);
- 
+-{
+-	u64 spte = *sptep;
+-
+-	if (!is_writable_pte(spte) &&
+-	    !(pt_protect && is_mmu_writable_spte(spte)))
+-		return false;
+-
+-	rmap_printk("spte %p %llx\n", sptep, *sptep);
+-
 -	if (pt_protect)
 -		spte &= ~shadow_mmu_writable_mask;
 -	spte = spte & ~PT_WRITABLE_MASK;
 -
 -	return mmu_spte_update(sptep, spte);
-+	if (pt_protect){
-+		//spte &= ~shadow_mmu_writable_mask;
-+		spte &= ~EPT_SPTE_MMU_WRITABLE;
-+	}
-+	//spte = spte & ~PT_WRITABLE_MASK;
-+	if(mode == KVM_PAGE_TRACK_WRITE) {
-+		spte = spte & ~PT_WRITABLE_MASK;
-+		shouldFlush = true;
-+	} else if( mode == KVM_PAGE_TRACK_RESET_ACCESSED) {
-+		spte = spte & ~PT_ACCESSED_MASK;
-+	} else if(mode == KVM_PAGE_TRACK_ACCESS) {
-+		spte = spte & ~PT_PRESENT_MASK;
-+		spte = spte & ~PT_WRITABLE_MASK;
-+		spte = spte & ~PT_USER_MASK;
-+		spte = spte | (0x1ULL << PT64_NX_SHIFT);
-+		shouldFlush = true;
-+	} else if( mode == KVM_PAGE_TRACK_EXEC) {
-+		spte = spte | (0x1ULL << PT64_NX_SHIFT); //nx bit is set, to prevent execution, not removed
-+		shouldFlush = true;
-+	} else if (mode == KVM_PAGE_TRACK_RESET_EXEC) {
-+		spte = spte & (~(0x1ULL << PT64_NX_SHIFT));
-+		shouldFlush = true;
-+	} else {
-+		printk(KERN_WARNING "spte_protect was called with invalid mode"
-+		"parameter %d\n",mode);
-+	}
-+	shouldFlush |= mmu_spte_update(sptep, spte);
-+	return shouldFlush;
- }
- 
--static bool rmap_write_protect(struct kvm_rmap_head *rmap_head,
--			       bool pt_protect)
-+static bool rmap_protect(struct kvm_rmap_head *rmap_head, bool pt_protect, enum kvm_page_track_mode mode)
+-}
++// static bool spte_write_protect(u64 *sptep, bool pt_protect)
++// {
++// 	return sevstep_spte_protect(sptep, pt_protect, KVM_PAGE_TRACK_WRITE);
++// }
+ 
+ static bool rmap_write_protect(struct kvm_rmap_head *rmap_head,
+ 			       bool pt_protect)
  {
- 	u64 *sptep;
- 	struct rmap_iterator iter;
- 	bool flush = false;
- 
+-	u64 *sptep;
+-	struct rmap_iterator iter;
+-	bool flush = false;
+-
 -	for_each_rmap_spte(rmap_head, &iter, sptep)
 -		flush |= spte_write_protect(sptep, pt_protect);
-+	for_each_rmap_spte(rmap_head, &iter, sptep) {
-+		flush |= spte_protect(sptep, pt_protect, mode);
-+	}
- 
- 	return flush;
- }
-@@ -1263,7 +1290,7 @@ static void kvm_mmu_write_protect_pt_masked(struct kvm *kvm,
- 	while (mask) {
- 		rmap_head = gfn_to_rmap(slot->base_gfn + gfn_offset + __ffs(mask),
- 					PG_LEVEL_4K, slot);
--		rmap_write_protect(rmap_head, false);
-+		rmap_protect(rmap_head, false, KVM_PAGE_TRACK_WRITE);
- 
- 		/* clear the first set bit */
- 		mask &= mask - 1;
-@@ -1333,13 +1360,13 @@ void kvm_arch_mmu_enable_log_dirty_pt_masked(struct kvm *kvm,
- 		if (READ_ONCE(eager_page_split))
- 			kvm_mmu_try_split_huge_pages(kvm, slot, start, end, PG_LEVEL_4K);
- 
--		kvm_mmu_slot_gfn_write_protect(kvm, slot, start, PG_LEVEL_2M);
-+		kvm_mmu_slot_gfn_protect(kvm, slot, start, PG_LEVEL_2M, KVM_PAGE_TRACK_WRITE);
- 
- 		/* Cross two large pages? */
- 		if (ALIGN(start << PAGE_SHIFT, PMD_SIZE) !=
- 		    ALIGN(end << PAGE_SHIFT, PMD_SIZE))
--			kvm_mmu_slot_gfn_write_protect(kvm, slot, end,
--						       PG_LEVEL_2M);
-+			kvm_mmu_slot_gfn_protect(kvm, slot, end,
-+						       PG_LEVEL_2M, KVM_PAGE_TRACK_WRITE);
- 	}
- 
- 	/* Now handle 4K PTEs.  */
-@@ -1354,26 +1381,29 @@ int kvm_cpu_dirty_log_size(void)
- 	return kvm_x86_ops.cpu_dirty_log_size;
+-
+-	return flush;
++	return sevstep_rmap_protect(rmap_head, pt_protect, KVM_PAGE_TRACK_WRITE);
  }
  
--bool kvm_mmu_slot_gfn_write_protect(struct kvm *kvm,
-+bool kvm_mmu_slot_gfn_protect(struct kvm *kvm,
+ static bool spte_clear_dirty(u64 *sptep)
+@@ -1358,22 +1341,8 @@ bool kvm_mmu_slot_gfn_write_protect(struct kvm *kvm,
  				    struct kvm_memory_slot *slot, u64 gfn,
--				    int min_level)
-+				    int min_level, enum kvm_page_track_mode mode)
+ 				    int min_level)
  {
- 	struct kvm_rmap_head *rmap_head;
- 	int i;
+-	struct kvm_rmap_head *rmap_head;
+-	int i;
 -	bool write_protected = false;
-+	//bool write_protected = false;
-+	bool protected = false;
- 
- 	if (kvm_memslots_have_rmaps(kvm)) {
- 		for (i = min_level; i <= KVM_MAX_HUGEPAGE_LEVEL; ++i) {
- 			rmap_head = gfn_to_rmap(gfn, i, slot);
+-
+-	if (kvm_memslots_have_rmaps(kvm)) {
+-		for (i = min_level; i <= KVM_MAX_HUGEPAGE_LEVEL; ++i) {
+-			rmap_head = gfn_to_rmap(gfn, i, slot);
 -			write_protected |= rmap_write_protect(rmap_head, true);
-+			//write_protected |= rmap_write_protect(rmap_head, true);
-+			protected |= rmap_protect(rmap_head, true, mode);
- 		}
- 	}
- 
- 	if (is_tdp_mmu_enabled(kvm))
+-		}
+-	}
+-
+-	if (is_tdp_mmu_enabled(kvm))
 -		write_protected |=
-+		//write_protected |=
-+		protected |=
- 			kvm_tdp_mmu_write_protect_gfn(kvm, slot, gfn, min_level);
- 
+-			kvm_tdp_mmu_write_protect_gfn(kvm, slot, gfn, min_level);
+-
 -	return write_protected;
-+	return protected;
++	return sevstep_kvm_mmu_slot_gfn_protect(kvm, slot,
++		gfn, min_level, KVM_PAGE_TRACK_WRITE);
  }
  
  static bool kvm_vcpu_write_protect_gfn(struct kvm_vcpu *vcpu, u64 gfn)
-@@ -1381,7 +1411,7 @@ static bool kvm_vcpu_write_protect_gfn(struct kvm_vcpu *vcpu, u64 gfn)
- 	struct kvm_memory_slot *slot;
- 
- 	slot = kvm_vcpu_gfn_to_memslot(vcpu, gfn);
--	return kvm_mmu_slot_gfn_write_protect(vcpu->kvm, slot, gfn, PG_LEVEL_4K);
-+	return kvm_mmu_slot_gfn_protect(vcpu->kvm, slot, gfn, PG_LEVEL_4K, KVM_PAGE_TRACK_WRITE);
- }
- 
- static bool kvm_zap_rmapp(struct kvm *kvm, struct kvm_rmap_head *rmap_head,
-@@ -3901,6 +3931,38 @@ static int handle_mmio_page_fault(struct kvm_vcpu *vcpu, u64 addr, bool direct)
+@@ -3901,6 +3870,10 @@ static int handle_mmio_page_fault(struct kvm_vcpu *vcpu, u64 addr, bool direct)
  static bool page_fault_handle_page_track(struct kvm_vcpu *vcpu,
  					 struct kvm_page_fault *fault)
  {
-+	int send_err;
-+	uint64_t current_rip;
-+	int have_rip;
-+	int i;
-+	bool was_tracked;
-+	int modes[] = {KVM_PAGE_TRACK_WRITE,KVM_PAGE_TRACK_ACCESS,KVM_PAGE_TRACK_EXEC};
-+	was_tracked = false;
-+	for( i = 0; i < sizeof(modes) / sizeof(modes[0]); i++ ) {
-+		if(kvm_slot_page_track_is_active(vcpu->kvm, fault->slot, fault->gfn,modes[i])) {
-+			__untrack_single_page(vcpu, fault->gfn, modes[i]);
-+			was_tracked = true;
-+		}
-+	}
-+	if( was_tracked ) {
-+		have_rip = false;
-+		if( uspt_should_get_rip() ) {
-+			//! because 0 indicates "no error" but have_rip should be one if successfull
-+			have_rip = (!sev_step_get_rip_kvm_vcpu(vcpu,&current_rip));
-+		}
-+		if( uspt_batch_tracking_in_progress() ) {
-+			if( (send_err = uspt_batch_tracking_save(fault->gfn << PAGE_SHIFT,fault->error_code,have_rip,current_rip)) ) {
-+				printk_ratelimited("uspt_batch_tracking_save failed with %d\n##########################\n",send_err);
-+			}
-+			uspt_batch_tracking_handle_retrack(vcpu,fault->gfn);
-+			uspt_batch_tracking_inc_event_idx();
-+		} else {
-+			if( (send_err = uspt_send_and_block(fault->gfn << PAGE_SHIFT,fault->error_code,have_rip,current_rip)) ) {
-+				printk("uspt_send_and_block failed with %d\n##########################\n",send_err);
-+			}
-+		}
-+	}
++	int active;
++
++	sevstep_uspt_page_fault_handle(vcpu, fault);
 +
  	if (unlikely(fault->rsvd))
  		return false;
  
-@@ -3911,7 +3973,7 @@ static bool page_fault_handle_page_track(struct kvm_vcpu *vcpu,
+@@ -3911,8 +3884,11 @@ static bool page_fault_handle_page_track(struct kvm_vcpu *vcpu,
  	 * guest is writing the page which is write tracked which can
  	 * not be fixed by page fault handler.
  	 */
 -	if (kvm_slot_page_track_is_active(vcpu->kvm, fault->slot, fault->gfn, KVM_PAGE_TRACK_WRITE))
-+	if (kvm_slot_page_track_is_active(vcpu->kvm, fault->slot, fault->gfn, KVM_PAGE_TRACK_WRITE) || kvm_slot_page_track_is_active(vcpu->kvm, fault->slot, fault->gfn, KVM_PAGE_TRACK_ACCESS))
- 		return true;
+-		return true;
++	active = kvm_slot_page_track_is_active(vcpu->kvm,
++		fault->slot, fault->gfn, KVM_PAGE_TRACK_WRITE);
++	active |= kvm_slot_page_track_is_active(vcpu->kvm,
++		fault->slot, fault->gfn, KVM_PAGE_TRACK_ACCESS);
++	if (active) return true;
  
  	return false;
-@@ -5991,7 +6053,7 @@ static bool slot_rmap_write_protect(struct kvm *kvm,
- 				    struct kvm_rmap_head *rmap_head,
- 				    const struct kvm_memory_slot *slot)
- {
--	return rmap_write_protect(rmap_head, false);
-+	return rmap_protect(rmap_head, false, KVM_PAGE_TRACK_WRITE);
  }
- 
- void kvm_mmu_slot_remove_write_access(struct kvm *kvm,
-diff --git a/arch/x86/kvm/mmu/mmu_internal.h b/arch/x86/kvm/mmu/mmu_internal.h
-old mode 100644
-new mode 100755
-index bd2a26897b97..aa57ab1b4c89
---- a/arch/x86/kvm/mmu/mmu_internal.h
-+++ b/arch/x86/kvm/mmu/mmu_internal.h
-@@ -133,9 +133,9 @@ int mmu_try_to_unsync_pages(struct kvm *kvm, const struct kvm_memory_slot *slot,
- 
- void kvm_mmu_gfn_disallow_lpage(const struct kvm_memory_slot *slot, gfn_t gfn);
- void kvm_mmu_gfn_allow_lpage(const struct kvm_memory_slot *slot, gfn_t gfn);
--bool kvm_mmu_slot_gfn_write_protect(struct kvm *kvm,
-+bool kvm_mmu_slot_gfn_protect(struct kvm *kvm,
- 				    struct kvm_memory_slot *slot, u64 gfn,
--				    int min_level);
-+				    int min_level, enum kvm_page_track_mode mode);
- void kvm_flush_remote_tlbs_with_address(struct kvm *kvm,
- 					u64 start_gfn, u64 pages);
- unsigned int pte_list_count(struct kvm_rmap_head *rmap_head);
 diff --git a/arch/x86/kvm/mmu/page_track.c b/arch/x86/kvm/mmu/page_track.c
-old mode 100644
-new mode 100755
-index 2e09d1b6249f..22b631351673
+index 2e09d1b6249f..17b69a1f2b40 100644
 --- a/arch/x86/kvm/mmu/page_track.c
 +++ b/arch/x86/kvm/mmu/page_track.c
-@@ -131,9 +131,11 @@ void kvm_slot_page_track_add_page(struct kvm *kvm,
+@@ -19,6 +19,8 @@
+ #include "mmu.h"
+ #include "mmu_internal.h"
+ 
++#include "../sevstep/sevstep.h"
++
+ bool kvm_page_track_write_tracking_enabled(struct kvm *kvm)
+ {
+ 	return IS_ENABLED(CONFIG_KVM_EXTERNAL_WRITE_TRACKING) ||
+@@ -131,9 +133,10 @@ void kvm_slot_page_track_add_page(struct kvm *kvm,
  	 */
  	kvm_mmu_gfn_disallow_lpage(slot, gfn);
  
 -	if (mode == KVM_PAGE_TRACK_WRITE)
 -		if (kvm_mmu_slot_gfn_write_protect(kvm, slot, gfn, PG_LEVEL_4K))
-+	//if (mode == KVM_PAGE_TRACK_WRITE)
-+	//	if (kvm_mmu_slot_gfn_write_protect(kvm, slot, gfn, PG_LEVEL_4K))
-+	if (kvm_mmu_slot_gfn_protect(kvm, slot, gfn, PG_LEVEL_4K, mode)) {
- 			kvm_flush_remote_tlbs(kvm);
+-			kvm_flush_remote_tlbs(kvm);
++	if (sevstep_kvm_mmu_slot_gfn_protect(kvm,
++			slot, gfn, PG_LEVEL_4K, mode)) {
++		kvm_flush_remote_tlbs(kvm);
 +	}
  }
  EXPORT_SYMBOL_GPL(kvm_slot_page_track_add_page);
  
-diff --git a/arch/x86/kvm/sev-step.c b/arch/x86/kvm/sev-step.c
-new file mode 100755
-index 000000000000..489583f33342
+diff --git a/arch/x86/kvm/sevstep b/arch/x86/kvm/sevstep
+new file mode 120000
+index 000000000000..642ea24bf098
 --- /dev/null
-+++ b/arch/x86/kvm/sev-step.c
-@@ -0,0 +1,250 @@
-+
-+#include <linux/sev-step.h>
-+#include <linux/smp.h>
-+#include <linux/vmalloc.h>
-+#include <linux/slab.h>
-+#include <linux/sched.h>
-+
-+#include "kvm_cache_regs.h"
-+#include "svm/svm.h"
-+
-+
-+
-+struct kvm* main_vm;
-+EXPORT_SYMBOL(main_vm);
-+
-+//used to store performance counter values; 6 counters, 2 readings per counter
-+uint64_t perf_reads[6][2];
-+perf_ctl_config_t perf_configs[6];
-+int perf_cpu;
-+
-+
-+uint64_t perf_ctl_to_u64(perf_ctl_config_t * config) {
-+
-+	uint64_t result = 0;
-+	result |= (  config->EventSelect & 0xffULL); //[7:0] in result and  [7:0] in EventSelect
-+	result |= ( (config->UintMask & 0xffULL) << 8 ); //[15:8]
-+	result |= ( (config->OsUserMode & 0x3ULL) << 16); //[17:16]
-+	result |= ( (config->Edge & 0x1ULL ) << 18 ); // 18
-+	result |= ( (config->Int & 0x1ULL ) << 20 ); // 20
-+	result |= ( (config->En & 0x1ULL ) << 22 ); //22
-+	result |= ( (config->Inv & 0x1ULL ) << 23); //23
-+	result |= ( (config->CntMask & 0xffULL) << 24); //[31:24]
-+	result |= ( ( (config->EventSelect & 0xf00ULL) >> 8 ) << 32); //[35:32] in result and [11:8] in EventSelect
-+	result |= ( (config->HostGuestOnly & 0x3ULL) << 40); // [41:40]
-+
-+	return result;
-+
-+}
-+
-+void write_ctl(perf_ctl_config_t * config, int cpu, uint64_t ctl_msr){
-+	wrmsrl_on_cpu(cpu, ctl_msr, perf_ctl_to_u64(config)); //always returns zero
-+}
-+
-+void read_ctr(uint64_t ctr_msr, int cpu, uint64_t* result) {
-+    uint64_t tmp;
-+	rdmsrl_on_cpu(cpu, ctr_msr, &tmp); //always returns zero
-+	*result = tmp & ( (0x1ULL << 48) - 1);
-+}
-+
-+void setup_perfs() {
-+    int i;
-+
-+    perf_cpu = smp_processor_id();
-+
-+    for( i = 0; i < 6; i++) {
-+        perf_configs[i].HostGuestOnly = 0x1; //0x1 means: count only guest
-+        perf_configs[i].CntMask = 0x0;
-+        perf_configs[i].Inv = 0x0;
-+        perf_configs[i].En = 0x0;
-+        perf_configs[i].Int = 0x0;
-+        perf_configs[i].Edge = 0x0;
-+        perf_configs[i].OsUserMode = 0x3; //0x3 means: count userland and kernel events
-+    }
-+
-+    //remember to set .En to enable the individual counter
-+
-+    perf_configs[0].EventSelect = 0x0c0;
-+	perf_configs[0].UintMask = 0x0;
-+    perf_configs[0].En = 0x1;
-+	write_ctl(&perf_configs[0],perf_cpu, CTL_MSR_0);
-+
-+    /*programm l2d hit from data cache miss perf for
-+    cpu_probe_pointer_chasing_inplace without counting thread.
-+    N.B. that this time we count host events
-+    */
-+    perf_configs[1].EventSelect = 0x064;
-+    perf_configs[1].UintMask = 0x70;
-+    perf_configs[1].En = 0x1;
-+    perf_configs[1].HostGuestOnly = 0x2; //0x2 means: count only host events, as we do the chase here
-+    write_ctl(&perf_configs[1],perf_cpu,CTL_MSR_1);
-+}
-+EXPORT_SYMBOL(setup_perfs);
-+
-+
-+/*
-+static int __my_sev_issue_dbg_cmd(struct kvm *kvm, unsigned long src,
-+			       unsigned long dst, int size,
-+			       int *error);
-+
-+int my_sev_decrypt(struct kvm* kvm, void* dst_vaddr, void* src_vaddr, uint64_t dst_paddr, uint64_t src_paddr, uint64_t len, int* api_res) {
-+
-+	int call_res;
-+	call_res  = 0x1337;
-+	*api_res = 0x1337;
-+
-+
-+	if( dst_paddr % PAGE_SIZE != 0 || src_paddr % PAGE_SIZE != 0) {
-+		printk("decrypt: for now, src_paddr, and dst_paddr must be page aligned");
-+		return -1;
-+	}
-+
-+	if( len > PAGE_SIZE ) {
-+		printk("decrypt: for now, can be at most 4096 byte");
-+		return -1;
-+	}
-+
-+	memset(dst_vaddr,0,PAGE_SIZE);
-+
-+	//clflush_cache_range(src_vaddr, PAGE_SIZE);
-+	//clflush_cache_range(dst_vaddr, PAGE_SIZE);
-+	wbinvd_on_all_cpus();
-+
-+	call_res = __my_sev_issue_dbg_cmd(kvm, __sme_set(src_paddr),
-+		__sme_set(dst_paddr), len, api_res);
-+
-+	return call_res;
-+
-+}
-+EXPORT_SYMBOL(my_sev_decrypt);
-+
-+static int __my_sev_issue_dbg_cmd(struct kvm *kvm, unsigned long src,
-+			       unsigned long dst, int size,
-+			       int *error)
-+{
-+	struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
-+	struct sev_data_dbg *data;
-+	int ret;
-+
-+	data = kzalloc(sizeof(*data), GFP_KERNEL_ACCOUNT);
-+	if (!data)
-+		return -ENOMEM;
-+
-+	data->handle = sev->handle;
-+	data->dst_addr = dst;
-+	data->src_addr = src;
-+	data->len = size;
-+
-+	//ret = sev_issue_cmd(kvm,
-+	//		     SEV_CMD_DBG_DECRYPT,
-+	//		    data, error);
-+	ret = sev_do_cmd(SEV_CMD_DBG_DECRYPT, data, error);
-+	kfree(data);
-+	return ret;
-+}
-+
-+int decrypt_vmsa(struct vcpu_svm* svm, struct vmcb_save_area* save_area) {
-+
-+	uint64_t src_paddr, dst_paddr;
-+	void * dst_vaddr;
-+	void * src_vaddr;
-+	struct page * dst_page;
-+	int call_res,api_res;
-+	call_res = 1337;
-+	api_res = 1337;
-+
-+	src_vaddr = svm->vmsa;
-+	src_paddr = svm->vmcb->control.vmsa_pa;
-+
-+	if( src_paddr % 16 != 0) {
-+		printk("decrypt_vmsa: src_paddr was not 16b aligned");
-+	}
-+
-+	if( sizeof( struct vmcb_save_area) % 16 != 0 ) {
-+		printk("decrypt_vmsa: size of vmcb_save_area is not 16 b aligned\n");
-+	}
-+
-+	dst_page = alloc_page(GFP_KERNEL);
-+	dst_vaddr =  vmap(&dst_page, 1, 0, PAGE_KERNEL);
-+	dst_paddr = page_to_pfn(dst_page) << PAGE_SHIFT;
-+	memset(dst_vaddr,0,PAGE_SIZE);
-+
-+
-+
-+	if( dst_paddr % 16 != 0 ) {
-+		printk("decrypt_vmsa: dst_paddr was not 16 byte aligned");
-+	}
-+
-+	//printk("src_paddr = 0x%llx dst_paddr = 0x%llx\n", __sme_clr(src_paddr), __sme_clr(dst_paddr));
-+	//printk("Sizeof vmcb_save_area is: 0x%lx\n", sizeof( struct vmcb_save_area) );
-+
-+
-+	call_res = __my_sev_issue_dbg_cmd(svm->vcpu.kvm, __sme_set(src_paddr), __sme_set(dst_paddr), sizeof(struct vmcb_save_area), &api_res);
-+
-+
-+	//printk("decrypt_vmsa: result of call was %d, result of api command was %d\n",call_res, api_res);
-+
-+	//todo error handling
-+	if( api_res != 0 ) {
-+		__free_page(dst_page);
-+		return -1;
-+	}
-+
-+	memcpy(save_area, dst_vaddr, sizeof( struct vmcb_save_area) );
-+
-+
-+	__free_page(dst_page);
-+
-+	return 0;
-+
-+
-+}
-+
-+
-+//
-+// Contains a switch to work  SEV and SEV-ES
-+ //
-+uint64_t sev_step_get_rip(struct vcpu_svm* svm) {
-+	struct vmcb_save_area* save_area;
-+	struct kvm * kvm;
-+	struct kvm_sev_info *sev;
-+	uint64_t rip;
-+
-+
-+	kvm = svm->vcpu.kvm;
-+	sev = &to_kvm_svm(kvm)->sev_info;
-+
-+	//for sev-es we need to use the debug api, to decrypt the vmsa
-+	if( sev->active && sev->es_active) {
-+		int res;
-+		save_area = vmalloc(sizeof(struct vmcb_save_area) );
-+		memset(save_area,0, sizeof(struct vmcb_save_area));
-+
-+		res = decrypt_vmsa(svm, save_area);
-+		if( res != 0) {
-+			printk("sev_step_get_rip failed to decrypt\n");
-+			return 0;
-+		}
-+
-+		rip =  save_area->rip;
-+
-+		vfree(save_area);
-+	} else { //otherwise we can just access as plaintexts
-+		rip = svm->vmcb->save.rip;
-+	}
-+	return rip;
-+
-+}
-+EXPORT_SYMBOL(sev_step_get_rip);
-+*/
-+
-+int sev_step_get_rip_kvm_vcpu(struct kvm_vcpu* vcpu,uint64_t *rip) {
-+	/*
-+	struct vcpu_svm *svm = container_of(vcpu, struct vcpu_svm, vcpu);
-+	if( svm == NULL ) {
-+		return 1;
-+	}
-+	(*rip) = sev_step_get_rip(svm);
-+	*/
-+	return 0;
-+}
++++ b/arch/x86/kvm/sevstep
+@@ -0,0 +1 @@
++/home/louis/kvm-prime-count/sevstep
 \ No newline at end of file
 diff --git a/arch/x86/kvm/svm/cachepc b/arch/x86/kvm/svm/cachepc
 new file mode 120000
-index 000000000000..7bef8c5db46c
+index 000000000000..9119e44af1f0
 --- /dev/null
 +++ b/arch/x86/kvm/svm/cachepc
 @@ -0,0 +1 @@
-+/home/louis/kvm-prime-count/kmod
++/home/louis/kvm-prime-count/cachepc
 \ No newline at end of file
 diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
 index cf0bf456d520..4dbb8041541f 100644
@@ -894,9 +255,7 @@ index cf0bf456d520..4dbb8041541f 100644
  
  	guest_state_exit_irqoff();
 diff --git a/arch/x86/kvm/svm/vmenter.S b/arch/x86/kvm/svm/vmenter.S
-old mode 100644
-new mode 100755
-index dfaeb47fcf2a..0626f3fdddfd
+index dfaeb47fcf2a..0626f3fdddfd 100644
 --- a/arch/x86/kvm/svm/vmenter.S
 +++ b/arch/x86/kvm/svm/vmenter.S
 @@ -29,12 +29,59 @@
@@ -1027,677 +386,21 @@ index dfaeb47fcf2a..0626f3fdddfd
  
  2:	cli
  
-diff --git a/arch/x86/kvm/userspace_page_track_signals.c b/arch/x86/kvm/userspace_page_track_signals.c
-new file mode 100755
-index 000000000000..7f37c9c7e4cd
---- /dev/null
-+++ b/arch/x86/kvm/userspace_page_track_signals.c
-@@ -0,0 +1,445 @@
-+#include <linux/userspace_page_track_signals.h>
-+#include <linux/kvm.h>
-+#include <linux/timekeeping.h>
-+#include <linux/uaccess.h>
-+#include <linux/types.h>
-+#include <linux/vmalloc.h>
-+#include <linux/sev-step.h>
-+#include <linux/printk.h>
-+#include <linux/ratelimit.h>
-+
-+
-+
-+//crude sync mechanism. don't know a good way to act on errors yet.
-+uint64_t last_sent_event_id = 1;
-+uint64_t last_acked_event_id = 1;
-+DEFINE_RWLOCK(event_lock);
-+
-+page_fault_event_t sent_event;
-+static int have_event = 0;
-+
-+static bool get_rip = true;
-+
-+static int inited = 0;
-+
-+
-+
-+
-+
-+void uspt_clear(void) {
-+    write_lock(&event_lock);
-+    inited = 0;
-+    last_sent_event_id = 1;
-+    last_acked_event_id = 1;
-+    have_event = 0;
-+    get_rip = false;
-+    write_unlock(&event_lock);
-+}
-+
-+int uspt_initialize(int pid,bool should_get_rip) {
-+    write_lock(&event_lock);
-+
-+    inited = 1;
-+    last_sent_event_id = 1;
-+    last_acked_event_id = 1;
-+    have_event = 0;
-+    get_rip = should_get_rip;
-+    write_unlock(&event_lock);
-+    return 0;
-+}
-+
-+int uspt_is_initialiized() {
-+    return inited;
-+}
-+
-+bool uspt_should_get_rip() {
-+    bool tmp;
-+    read_lock(&event_lock);
-+    tmp = get_rip;
-+    read_unlock(&event_lock);
-+    return tmp;
-+}
-+
-+int uspt_send_and_block(uint64_t faulted_gpa, uint32_t error_code,bool have_rip,uint64_t rip) {
-+	ktime_t abort_after;
-+    page_fault_event_t message_for_user;
-+
-+    read_lock(&event_lock);
-+    if( !uspt_is_initialiized() ) {
-+        printk("userspace_page_track_signals: uspt_send_and_block : ctx not initialized!\n");
-+        read_unlock(&event_lock);
-+        return 1;
-+    }
-+    read_unlock(&event_lock);
-+
-+    write_lock(&event_lock);
-+     if( last_sent_event_id != last_acked_event_id ) {
-+        printk("event id_s out of sync, aborting. Fix this later\n");
-+        write_unlock(&event_lock);
-+        return 1;
-+    } else {
-+        //TODO: handle overflow
-+        last_sent_event_id++;
-+    }
-+    message_for_user.id = last_sent_event_id;
-+    message_for_user.faulted_gpa = faulted_gpa;
-+    message_for_user.error_code = error_code;
-+    message_for_user.have_rip_info = have_rip;
-+    message_for_user.rip = rip;
-+    message_for_user.ns_timestamp = ktime_get_real_ns();
-+    message_for_user.have_retired_instructions = false;
-+
-+    //for poll based system;
-+    have_event = 1;
-+    sent_event = message_for_user;
-+    //printk("uspt_send_and_block sending event %llu\n",sent_event.id);
-+
-+    write_unlock(&event_lock);
-+
-+
-+    //wait for ack, but with tiemout. Otherwise small bugs in userland easily lead
-+    //to a kernel hang
-+    abort_after = ktime_get() + 1000000000ULL; //1 sec in nanosecond
-+    while( !uspt_is_event_done(sent_event.id) ) {
-+        if( ktime_get() > abort_after ) {
-+            printk("Waiting for ack of event %llu timed out, continuing\n",sent_event.id);
-+            return 3;
-+        }
-+    }
-+    return 0;
-+}
-+
-+int uspt_is_event_done(uint64_t id) {
-+    int res;
-+    read_lock(&event_lock);
-+    res = last_acked_event_id >= id;
-+    read_unlock(&event_lock);
-+    return res;
-+
-+}
-+
-+int uspt_handle_poll_event(page_fault_event_t* userpace_mem) {
-+    int err;
-+
-+    //most of the time we won't have an event
-+    read_lock(&event_lock);
-+    if( !have_event) {
-+        read_unlock(&event_lock);
-+        return KVM_USPT_POLL_EVENT_NO_EVENT;
-+    }
-+    read_unlock(&event_lock);
-+
-+    write_lock(&event_lock);
-+    if( have_event) {
-+        err = copy_to_user(userpace_mem, &sent_event, sizeof(page_fault_event_t));
-+        have_event = 0;
-+    } else {
-+        err = KVM_USPT_POLL_EVENT_NO_EVENT;
-+    }
-+    write_unlock(&event_lock);
-+    return err;
-+
-+}
-+
-+static int _uspt_handle_ack_event(uint64_t id) {
-+    int err = 0;
-+    write_lock(&event_lock);
-+    if( id == last_sent_event_id) {
-+        last_acked_event_id = last_sent_event_id;
-+        //printk("successfull ack\n");
-+    } else  {
-+        err = 1;
-+        printk("last sent event id is %llu but received ack for %llu\n",last_sent_event_id,id);
-+    }
-+    write_unlock(&event_lock);
-+    return err;
-+
-+
-+}
-+
-+int uspt_handle_ack_event_ioctl(ack_event_t event) {
-+    return _uspt_handle_ack_event(event.id);
-+}
-+
-+
-+
-+typedef struct {
-+    bool is_active;
-+    int tracking_type;
-+    bool retrack;
-+
-+    int perf_cpu;
-+
-+    uint64_t gfn_retrack_backlog[10];
-+    int gfn_retrack_backlog_next_idx;
-+
-+    page_fault_event_t * events;
-+    uint64_t event_next_idx;
-+    uint64_t events_size;
-+
-+    bool error_occured;
-+
-+
-+} batch_track_state_t;
-+
-+DEFINE_SPINLOCK(batch_track_state_lock);
-+static batch_track_state_t batch_track_state;
-+
-+typedef struct {
-+    uint64_t idx_for_last_perf_reading;
-+    uint64_t last_perf_reading;
-+    uint64_t delta_valid_idx;
-+    uint64_t delta;
-+} perf_state_t;
-+
-+perf_state_t perf_state;
-+
-+//setup perf_state and program retired instruction performance counter
-+void _perf_state_setup_retired_instructions(void) {
-+	perf_ctl_config_t retired_instructions_perf_config;
-+    retired_instructions_perf_config.HostGuestOnly = 0x1; //0x1 means: count only guest
-+    retired_instructions_perf_config.CntMask = 0x0;
-+    retired_instructions_perf_config.Inv = 0x0;
-+    retired_instructions_perf_config.Int = 0x0;
-+    retired_instructions_perf_config.Edge = 0x0;
-+    retired_instructions_perf_config.OsUserMode = 0x3; //0x3 means: count kern and user events
-+    retired_instructions_perf_config.EventSelect = 0x0c0;
-+    retired_instructions_perf_config.UintMask = 0x0;
-+    retired_instructions_perf_config.En = 0x1;
-+    write_ctl(&retired_instructions_perf_config,batch_track_state.perf_cpu, CTL_MSR_0);
-+}
-+
-+
-+//get retired instructions between current_event_idx-1 and current_event_idx
-+//value is cached for multiple calls to the same current_event_idx
-+uint64_t _perf_state_update_and_get_delta(uint64_t current_event_idx) {
-+    uint64_t current_value;
-+
-+    //check if value is "cached"
-+    if( perf_state.delta_valid_idx == current_event_idx) {
-+        if( current_event_idx == 0) {
-+            read_ctr(CTR_MSR_0, batch_track_state.perf_cpu, &current_value);
-+            perf_state.idx_for_last_perf_reading = current_event_idx;
-+            perf_state.last_perf_reading = current_event_idx;
-+        }
-+        return perf_state.delta;
-+    }
-+
-+    //otherwise update, but logic is only valid for two consecutive events
-+    if (current_event_idx != perf_state.idx_for_last_perf_reading+1) {
-+        printk_ratelimited(KERN_CRIT "_perf_state_update_and_get_delta: last reading was for idx %llu but was queried for %llu\n",perf_state.idx_for_last_perf_reading,current_event_idx);
-+    }
-+
-+    read_ctr(CTR_MSR_0, batch_track_state.perf_cpu, &current_value);
-+    perf_state.delta = (current_value - perf_state.last_perf_reading);
-+    perf_state.delta_valid_idx = current_event_idx;
-+
-+    perf_state.idx_for_last_perf_reading = current_event_idx;
-+    perf_state.last_perf_reading = current_value;
-+
-+    return perf_state.delta;
-+}
-+
-+void uspt_batch_tracking_inc_event_idx(void) {
-+    spin_lock(&batch_track_state_lock);
-+    batch_track_state.event_next_idx++;
-+    spin_unlock(&batch_track_state_lock);
-+}
-+
-+int uspt_batch_tracking_start(int tracking_type,uint64_t expected_events, int perf_cpu,bool retrack) {
-+    page_fault_event_t* events;
-+    uint64_t buffer_size;
-+    uint64_t idx = 0;
-+    spin_lock(&batch_track_state_lock);
-+    if( batch_track_state.is_active ) {
-+        printk("userspace_page_track_signals: overwriting active batch track config!\n");
-+        if( batch_track_state.events != NULL ) {
-+            vfree(batch_track_state.events);
-+        }
-+    }
-+    batch_track_state.is_active = false;
-+    spin_unlock(&batch_track_state_lock);
-+
-+    buffer_size = expected_events*sizeof(page_fault_event_t);
-+    printk("uspt_batch_tracking_start trying to alloc %llu bytes buffer for events\n",buffer_size);
-+    events = vmalloc(buffer_size);
-+    if( events  == NULL) {
-+        printk("userspace_page_track_signals: faperf_cpuiled to alloc %llu bytes for event buffer\n",buffer_size);
-+        return 1; //note: lock not held here
-+    }
-+
-+    //access each element once to force them into memory, improving performance
-+    //during tracking
-+    for( idx = 0; idx < expected_events*sizeof(page_fault_event_t);idx++) {
-+        ((volatile uint8_t*)events)[idx] = 0;
-+    }
-+
-+    perf_state.idx_for_last_perf_reading = 0;
-+    perf_state.last_perf_reading = 0;
-+    perf_state.delta_valid_idx = 0;
-+    perf_state.delta = 0;
-+    _perf_state_setup_retired_instructions();
-+
-+
-+    spin_lock(&batch_track_state_lock);
-+
-+    batch_track_state.perf_cpu = perf_cpu;
-+    batch_track_state.retrack = retrack;
-+
-+    batch_track_state.events = events;
-+    batch_track_state.event_next_idx = 0;
-+    batch_track_state.events_size = expected_events;
-+
-+    batch_track_state.gfn_retrack_backlog_next_idx = 0;
-+    batch_track_state.tracking_type = tracking_type;
-+    batch_track_state.error_occured = false;
-+
-+    batch_track_state.is_active = true;
-+
-+    spin_unlock(&batch_track_state_lock);
-+
-+    return 0;
-+
-+
-+}
-+
-+void uspt_batch_tracking_handle_retrack(struct kvm_vcpu* vcpu, uint64_t current_fault_gfn) {
-+    int i;
-+    uint64_t ret_instr_delta;
-+
-+    spin_lock(&batch_track_state_lock);
-+
-+    if( !batch_track_state.retrack ) {
-+        spin_unlock(&batch_track_state_lock);
-+        return;
-+    }
-+
-+    if( smp_processor_id() != batch_track_state.perf_cpu) {
-+        printk("uspt_batch_tracking_handle_retrack: perf was programmed on logical cpu %d but handler was called on %d. Did you forget to pin the vcpu thread?\n",batch_track_state.perf_cpu,smp_processor_id());
-+    }
-+    ret_instr_delta = _perf_state_update_and_get_delta(batch_track_state.event_next_idx);
-+
-+
-+    //faulting instructions is probably the same as on last fault
-+    //try to add current fault to retrack log and return
-+    //for first event idx we do not have a valid ret_instr_delta. Retracking for the frist time is fine, if we loop, we end up here again but with a valid delta on one of the next event
-+    if( (ret_instr_delta < 2) && ( batch_track_state.event_next_idx != 0) ) {
-+        int next_idx = batch_track_state.gfn_retrack_backlog_next_idx;
-+        if( next_idx >= sizeof(batch_track_state.gfn_retrack_backlog)/sizeof(batch_track_state.gfn_retrack_backlog[0])) {
-+            printk("uspt_batch_tracking_handle_retrack: retrack backlog full, dropping retrack for fault at 0x%llx\n",current_fault_gfn);
-+        } else {
-+            batch_track_state.gfn_retrack_backlog[next_idx] = current_fault_gfn;
-+            batch_track_state.gfn_retrack_backlog_next_idx++;
-+        }
-+
-+        spin_unlock(&batch_track_state_lock);
-+        return;
-+    }
-+
-+    //made progress, retrack everything in backlog and reset idx
-+    for( i = 0; i < batch_track_state.gfn_retrack_backlog_next_idx;i++) {
-+        __track_single_page(vcpu,batch_track_state.gfn_retrack_backlog[i],batch_track_state.tracking_type);
-+    }
-+
-+    //add current fault to list
-+    batch_track_state.gfn_retrack_backlog[0] = current_fault_gfn;
-+    batch_track_state.gfn_retrack_backlog_next_idx = 1;
-+
-+    spin_unlock(&batch_track_state_lock);
-+
-+}
-+
-+int uspt_batch_tracking_save(uint64_t faulted_gpa, uint32_t error_code, bool have_rip,uint64_t rip) {
-+    uint64_t ret_instr_delta;
-+    page_fault_event_t* event;
-+
-+    spin_lock(&batch_track_state_lock);
-+
-+   if( !batch_track_state.is_active ) {
-+        printk_ratelimited("userspace_page_track_signals: got save but batch tracking is not active!\n");
-+        batch_track_state.error_occured = true;
-+        spin_unlock(&batch_track_state_lock);
-+        return 1;
-+   }
-+
-+
-+    if( batch_track_state.event_next_idx >= batch_track_state.events_size) {
-+        printk_ratelimited("userspace_page_track_signals: events buffer is full!\n");
-+        batch_track_state.error_occured = true;
-+        spin_unlock(&batch_track_state_lock);
-+        return 1;
-+    }
-+
-+    if( smp_processor_id() != batch_track_state.perf_cpu) {
-+        printk("uspt_batch_tracking_handle_retrack: perf was programmed on logical cpu %d but handler was called on %d. Did you forget to pin the vcpu thread?\n",batch_track_state.perf_cpu,smp_processor_id());
-+    }
-+    ret_instr_delta = _perf_state_update_and_get_delta(batch_track_state.event_next_idx);
-+
-+
-+    if( batch_track_state.events == NULL ) {
-+        printk(KERN_CRIT "userspace_page_track_signals: events buf was NULL but \"is_active\" was set! This should never happen!!!\n");
-+        spin_unlock(&batch_track_state_lock);
-+        return 1;
-+    }
-+
-+    event = &batch_track_state.events[batch_track_state.event_next_idx];
-+    event->id = batch_track_state.event_next_idx;
-+    event->faulted_gpa = faulted_gpa;
-+    event->error_code = error_code;
-+    event->have_rip_info = have_rip;
-+    event->rip = rip;
-+    event->ns_timestamp = ktime_get_real_ns();
-+    event->have_retired_instructions = true;
-+    event->retired_instructions = ret_instr_delta;
-+
-+//old inc was here
-+
-+    if(batch_track_state.gfn_retrack_backlog_next_idx > (sizeof(batch_track_state.gfn_retrack_backlog)/sizeof(batch_track_state.gfn_retrack_backlog[0])) ) {
-+        printk_ratelimited("userspace_page_track_signals: gfn retrack backlog overflow!\n");
-+        batch_track_state.error_occured = true;
-+        spin_unlock(&batch_track_state_lock);
-+        return 1;
-+    }
-+
-+    spin_unlock(&batch_track_state_lock);
-+    return 0;
-+}
-+
-+int uspt_batch_tracking_stop(page_fault_event_t* results, uint64_t len, bool* error_occured) {
-+    spin_lock(&batch_track_state_lock);
-+    if( !batch_track_state.is_active ) {
-+        printk("userspace_page_track_signals: batch tracking not active\n");
-+        spin_unlock(&batch_track_state_lock);
-+        return 1;
-+
-+    }
-+    batch_track_state.is_active = false;
-+
-+    if( len > batch_track_state.event_next_idx) {
-+        printk("userspace_page_track_signals: requested %llu events but got only %llu\n",len,batch_track_state.event_next_idx );
-+        spin_unlock(&batch_track_state_lock);
-+        return 1;
-+    }
-+
-+    memcpy(results,batch_track_state.events, len*sizeof(page_fault_event_t));
-+    vfree(batch_track_state.events);
-+
-+    (*error_occured) = batch_track_state.error_occured;
-+
-+    spin_unlock(&batch_track_state_lock);
-+
-+    return 0;
-+}
-+
-+uint64_t uspt_batch_tracking_get_events_count() {
-+    uint64_t buf;
-+    spin_lock(&batch_track_state_lock);
-+    buf = batch_track_state.event_next_idx;
-+    spin_unlock(&batch_track_state_lock);
-+
-+    return buf;
-+}
-+
-+bool uspt_batch_tracking_in_progress() {
-+    return batch_track_state.is_active;
-+}
-\ No newline at end of file
 diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
-old mode 100644
-new mode 100755
-index d9adf79124f9..0003b96f8565
+index d9adf79124f9..1809b79cb6cd 100644
 --- a/arch/x86/kvm/x86.c
 +++ b/arch/x86/kvm/x86.c
-@@ -82,6 +82,9 @@
+@@ -82,6 +82,8 @@
  #include <asm/sgx.h>
  #include <clocksource/hyperv_timer.h>
  
-+#include <linux/sev-step.h>
-+#include "mmu/mmu_internal.h"
++#include "sevstep/kvm.h"
 +
  #define CREATE_TRACE_POINTS
  #include "trace.h"
  
-@@ -13083,6 +13086,198 @@ int kvm_sev_es_string_io(struct kvm_vcpu *vcpu, unsigned int size,
- 		  : kvm_sev_es_outs(vcpu, size, port);
- }
- EXPORT_SYMBOL_GPL(kvm_sev_es_string_io);
-+bool __untrack_single_page(struct kvm_vcpu *vcpu, gfn_t gfn,
-+                           enum kvm_page_track_mode mode) {
-+  int idx;
-+  bool ret;
-+  struct kvm_memory_slot *slot;
-+
-+  ret = false;
-+  idx = srcu_read_lock(&vcpu->kvm->srcu);
-+  if (mode == KVM_PAGE_TRACK_ACCESS) {
-+    //printk("Removing gfn: %016llx from acess page track pool\n", gfn);
-+  }
-+  if (mode == KVM_PAGE_TRACK_WRITE) {
-+    //printk("Removing gfn: %016llx from write page track pool\n", gfn);
-+  }
-+  slot = kvm_vcpu_gfn_to_memslot(vcpu, gfn);
-+
-+  if (slot != NULL && kvm_slot_page_track_is_active(vcpu->kvm, slot, gfn, mode)) {
-+
-+    write_lock(&vcpu->kvm->mmu_lock);
-+    kvm_slot_page_track_remove_page(vcpu->kvm, slot, gfn, mode);
-+    write_unlock(&vcpu->kvm->mmu_lock);
-+    ret = true;
-+
-+  } else {
-+
-+    printk("Failed to untrack %016llx because ", gfn);
-+    if (slot == NULL) {
-+      printk(KERN_CONT "slot was  null");
-+    } else if (!kvm_slot_page_track_is_active(vcpu->kvm, slot, gfn, mode)) {
-+      printk(KERN_CONT "page track was not active");
-+    }
-+    printk(KERN_CONT "\n");
-+  }
-+  srcu_read_unlock(&vcpu->kvm->srcu, idx);
-+  return ret;
-+}
-+EXPORT_SYMBOL(__untrack_single_page);
-+
-+bool __reset_accessed_on_page(struct kvm_vcpu *vcpu, gfn_t gfn) {
-+	int idx;
-+	bool ret;
-+	struct kvm_memory_slot *slot;
-+
-+	ret = false;
-+	idx = srcu_read_lock(&vcpu->kvm->srcu);
-+	slot = kvm_vcpu_gfn_to_memslot(vcpu, gfn);
-+	if( slot != NULL ) {
-+		write_lock(&vcpu->kvm->mmu_lock);
-+		//Vincent: The kvm mmu function now requires min_level
-+		//We want all pages to protected so we do PG_LEVEL_4K
-+		//https://patchwork.kernel.org/project/kvm/patch/20210416082511.2856-2-zhukeqian1@huawei.com/
-+		kvm_mmu_slot_gfn_protect(vcpu->kvm,slot,gfn,PG_LEVEL_4K,KVM_PAGE_TRACK_RESET_ACCESSED);
-+		write_unlock(&vcpu->kvm->mmu_lock);
-+		ret = true;
-+	}
-+	srcu_read_unlock(&vcpu->kvm->srcu, idx);
-+	return ret;
-+}
-+EXPORT_SYMBOL(__reset_accessed_on_page);
-+
-+bool __clear_nx_on_page(struct kvm_vcpu *vcpu, gfn_t gfn) {
-+	int idx;
-+	bool ret;
-+	struct kvm_memory_slot *slot;
-+
-+	ret = false;
-+	idx = srcu_read_lock(&vcpu->kvm->srcu);
-+	slot = kvm_vcpu_gfn_to_memslot(vcpu, gfn);
-+	if( slot != NULL ) {
-+		write_lock(&vcpu->kvm->mmu_lock);
-+		//Vincent: The kvm mmu function now requires min_level
-+		//We want all pages to protected so we do PG_LEVEL_4K
-+		//https://patchwork.kernel.org/project/kvm/patch/20210416082511.2856-2-zhukeqian1@huawei.com/
-+		kvm_mmu_slot_gfn_protect(vcpu->kvm,slot,gfn,PG_LEVEL_4K,KVM_PAGE_TRACK_RESET_EXEC);
-+		write_unlock(&vcpu->kvm->mmu_lock);
-+		ret = true;
-+	}
-+	srcu_read_unlock(&vcpu->kvm->srcu, idx);
-+	return ret;
-+}
-+EXPORT_SYMBOL(__clear_nx_on_page);
-+
-+bool __track_single_page(struct kvm_vcpu *vcpu, gfn_t gfn,
-+                         enum kvm_page_track_mode mode) {
-+  int idx;
-+  bool ret;
-+  struct kvm_memory_slot *slot;
-+
-+  ret = false;
-+  idx = srcu_read_lock(&vcpu->kvm->srcu);
-+  if (mode == KVM_PAGE_TRACK_ACCESS) {
-+    //printk_ratelimited("Adding gfn: %016llx to acess page track pool\n", gfn);
-+    //printk("Adding gfn: %016llx to acess page track pool\n", gfn);
-+  }
-+  if (mode == KVM_PAGE_TRACK_WRITE) {
-+    //printk_ratelimited("Adding gfn: %016llx to write page track pool\n", gfn);
-+  }
-+  slot = kvm_vcpu_gfn_to_memslot(vcpu, gfn);
-+  if (slot != NULL && !kvm_slot_page_track_is_active(vcpu->kvm,slot, gfn, mode)) {
-+
-+    write_lock(&vcpu->kvm->mmu_lock);
-+    kvm_slot_page_track_add_page(vcpu->kvm, slot, gfn, mode);
-+    write_unlock(&vcpu->kvm->mmu_lock);
-+    ret = true;
-+
-+  } else {
-+
-+    printk("Failed to track %016llx because ", gfn);
-+    if (slot == NULL) {
-+      printk(KERN_CONT "slot was  null");
-+    }
-+    if (kvm_slot_page_track_is_active(vcpu->kvm, slot, gfn, mode)) {
-+      printk(KERN_CONT "page is already tracked");
-+    }
-+    printk(KERN_CONT "\n");
-+  }
-+  srcu_read_unlock(&vcpu->kvm->srcu, idx);
-+  return ret;
-+}
-+EXPORT_SYMBOL(__track_single_page);
-+
-+//track all pages; taken from severed repo
-+long kvm_start_tracking(struct kvm_vcpu *vcpu,enum kvm_page_track_mode mode ) {
-+        long count = 0;
-+        u64 iterator, iterat_max;
-+        struct kvm_memory_slot *slot;
-+        int idx;
-+
-+	//Vincent: Memslots interface changed into a rb tree, see
-+	//here: https://lwn.net/Articles/856392/
-+	//and here: https://lore.kernel.org/all/cover.1632171478.git.maciej.szmigiero@oracle.com/T/#u
-+	//Thus we use instead of
-+        //iterat_max = vcpu->kvm->memslots[0]->memslots[0].base_gfn
-+	//	     + vcpu->kvm->memslots[0]->memslots[0].npages;
-+	struct rb_node *node;
-+	struct kvm_memory_slot *first_memslot;
-+	node = rb_last(&(vcpu->kvm->memslots[0]->gfn_tree));
-+	first_memslot = container_of(node, struct kvm_memory_slot, gfn_node[0]);
-+	iterat_max = first_memslot->base_gfn + first_memslot->npages;
-+        for (iterator=0; iterator < iterat_max; iterator++)
-+        {
-+                idx = srcu_read_lock(&vcpu->kvm->srcu);
-+                slot = kvm_vcpu_gfn_to_memslot(vcpu, iterator);
-+                if ( slot != NULL  && !kvm_slot_page_track_is_active(vcpu->kvm, slot, iterator, mode)) {
-+                        write_lock(&vcpu->kvm->mmu_lock);
-+                        kvm_slot_page_track_add_page(vcpu->kvm, slot, iterator, mode);
-+                        write_unlock(&vcpu->kvm->mmu_lock);
-+                        count++;
-+                }
-+                srcu_read_unlock(&vcpu->kvm->srcu, idx);
-+        }
-+
-+        return count;
-+}
-+EXPORT_SYMBOL(kvm_start_tracking);
-+
-+//track all pages; taken from severed repo
-+long kvm_stop_tracking(struct kvm_vcpu *vcpu,enum kvm_page_track_mode mode ) {
-+		long count = 0;
-+		u64 iterator, iterat_max;
-+		struct kvm_memory_slot *slot;
-+		int idx;
-+
-+
-+	//Vincent: Memslots interface changed into a rb tree, see
-+	//here: https://lwn.net/Articles/856392/
-+	//and here: https://lore.kernel.org/all/cover.1632171478.git.maciej.szmigiero@oracle.com/T/#u
-+	//Thus we use instead of
-+        //iterat_max = vcpu->kvm->memslots[0]->memslots[0].base_gfn
-+	//	     + vcpu->kvm->memslots[0]->memslots[0].npages;
-+	struct rb_node *node;
-+	struct kvm_memory_slot *first_memslot;
-+	node = rb_last(&(vcpu->kvm->memslots[0]->gfn_tree));
-+	first_memslot = container_of(node, struct kvm_memory_slot, gfn_node[0]);
-+	iterat_max = first_memslot->base_gfn + first_memslot->npages;
-+        for (iterator=0; iterator < iterat_max; iterator++)
-+        {
-+        	idx = srcu_read_lock(&vcpu->kvm->srcu);
-+		slot = kvm_vcpu_gfn_to_memslot(vcpu, iterator);
-+			//Vincent: I think see here https://patchwork.kernel.org/project/kvm/patch/20210924163152.289027-22-pbonzini@redhat.com/
-+			if ( slot != NULL && kvm_slot_page_track_is_active(vcpu->kvm, slot, iterator, mode)) {
-+				write_lock(&vcpu->kvm->mmu_lock);
-+				kvm_slot_page_track_remove_page(vcpu->kvm, slot, iterator, mode);
-+				write_unlock(&vcpu->kvm->mmu_lock);
-+				count++;
-+            	}
-+		srcu_read_unlock(&vcpu->kvm->srcu, idx);
-+        }
-+
-+        return count;
-+}
-+EXPORT_SYMBOL(kvm_stop_tracking);
- 
- EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_entry);
- EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_exit);
 diff --git a/drivers/crypto/ccp/sev-dev.c b/drivers/crypto/ccp/sev-dev.c
-old mode 100644
-new mode 100755
-index e089fbf9017f..7899e1efe852
+index e089fbf9017f..7899e1efe852 100644
 --- a/drivers/crypto/ccp/sev-dev.c
 +++ b/drivers/crypto/ccp/sev-dev.c
 @@ -87,7 +87,7 @@ static void *sev_init_ex_buffer;
@@ -1726,391 +429,22 @@ index e089fbf9017f..7899e1efe852
  
  static int __sev_init_locked(int *error)
  {
-diff --git a/include/linux/sev-step.h b/include/linux/sev-step.h
-new file mode 100755
-index 000000000000..ec49e5526edd
---- /dev/null
-+++ b/include/linux/sev-step.h
-@@ -0,0 +1,68 @@
-+#ifndef SEV_STEP_H
-+#define SEV_STEP_H
-+
-+#include <linux/types.h>
-+#include <linux/spinlock_types.h>
-+#include <asm/atomic.h>
-+#include <linux/kvm_types.h>
-+#include <asm/kvm_page_track.h>
-+
-+#include <linux/kvm_host.h> //struct kvm
-+#include <linux/pid.h>
-+#include <linux/psp-sev.h>
-+
-+
-+
-+
-+
-+#define CTL_MSR_0  0xc0010200ULL
-+#define CTL_MSR_1  0xc0010202ULL
-+#define CTL_MSR_2  0xc0010204ULL
-+#define CTL_MSR_3  0xc0010206ULL
-+#define CTL_MSR_4  0xc0010208ULL
-+#define CTL_MSR_5  0xc001020aULL
-+
-+#define CTR_MSR_0  0xc0010201ULL
-+#define CTR_MSR_1  0xc0010203ULL
-+#define CTR_MSR_2  0xc0010205ULL
-+#define CTR_MSR_3  0xc0010207ULL
-+#define CTR_MSR_4  0xc0010209ULL
-+#define CTR_MSR_5  0xc001020bULL
-+
-+typedef struct {
-+	uint64_t HostGuestOnly;
-+	uint64_t CntMask;
-+	uint64_t Inv;
-+	uint64_t En;
-+	uint64_t Int;
-+	uint64_t Edge;
-+	uint64_t OsUserMode;
-+	uint64_t UintMask;
-+	uint64_t EventSelect; //12 bits in total split in [11:8] and [7:0]
-+
-+} perf_ctl_config_t;
-+
-+
-+extern struct kvm* main_vm;
-+
-+
-+bool __untrack_single_page(struct kvm_vcpu *vcpu, gfn_t gfn,
-+                           enum kvm_page_track_mode mode);//defined in x86.c
-+
-+bool __track_single_page(struct kvm_vcpu *vcpu, gfn_t gfn,
-+                         enum kvm_page_track_mode mode); //defined in x86.c
-+bool __reset_accessed_on_page(struct kvm_vcpu *vcpu, gfn_t gfn); //defined in x86.c
-+bool __clear_nx_on_page(struct kvm_vcpu *vcpu, gfn_t gfn); //defined in x86.c
-+long kvm_start_tracking(struct kvm_vcpu *vcpu,enum kvm_page_track_mode mode );
-+long kvm_stop_tracking(struct kvm_vcpu *vcpu,enum kvm_page_track_mode mode );
-+void sev_step_handle_callback(void);
-+
-+uint64_t perf_ctl_to_u64(perf_ctl_config_t * config);
-+void write_ctl(perf_ctl_config_t * config, int cpu, uint64_t ctl_msr);
-+void read_ctr(uint64_t ctr_msr, int cpu, uint64_t* result);
-+void setup_perfs(void);
-+
-+
-+int sev_step_get_rip_kvm_vcpu(struct kvm_vcpu* vcpu,uint64_t *rip);
-+
-+#endif
-diff --git a/include/linux/userspace_page_track_signals.h b/include/linux/userspace_page_track_signals.h
-new file mode 100755
-index 000000000000..dc3fea4a9af7
---- /dev/null
-+++ b/include/linux/userspace_page_track_signals.h
-@@ -0,0 +1,59 @@
-+#ifndef USERSPACE_PAGE_TRACK_SIGNALS
-+#define USERSPACE_PAGE_TRACK_SIGNALS
-+
-+#include<linux/kvm.h>
-+#include<linux/kvm_host.h>
-+#include<linux/types.h>
-+
-+
-+//
-+// User space signaling
-+//
-+
-+int uspt_initialize(int pid,bool should_get_rip);
-+int uspt_is_initialiized(void);
-+void uspt_clear(void);
-+
-+bool uspt_should_get_rip(void);
-+
-+
-+int uspt_send_and_block(uint64_t faulted_gpa, uint32_t error_code, bool have_rip,uint64_t rip);
-+
-+int uspt_is_event_done(uint64_t id);
-+
-+//prepare next event based on faulted_gpa and error_code. Notify process behind pid_number. Event must be polled
-+//id is result param with the id used for the event. Can be used to call uspt_is_event_done
-+int uspt_send_notification(int pid_number, uint64_t faulted_gpa, uint32_t error_code,uint64_t* id);
-+
-+//copy next event to userpace_mem
-+int uspt_handle_poll_event(page_fault_event_t* userpace_mem);
-+
-+//acknowledge receival of event to event handling logic
-+int uspt_handle_ack_event_ioctl(ack_event_t event);
-+
-+//
-+// Batch Tracking
-+//
-+
-+//should be called after "uspt_batch_tracking_save", "uspt_batch_tracking_handle_retrack" and any future custom logic
-+//for an event is processed
-+void uspt_batch_tracking_inc_event_idx(void);
-+
-+int uspt_batch_tracking_start(int tracking_type,uint64_t expected_events, int perf_cpu,bool retrack);
-+
-+int uspt_batch_tracking_save(uint64_t faulted_gpa, uint32_t error_code, bool have_rip,uint64_t rip);
-+
-+uint64_t uspt_batch_tracking_get_events_count(void);
-+
-+//Stops batch tracking on copies the first @len events into @result. If an error occured at some point
-+//during the batch tracking, error_occured is set(there should also be a dmesg, but this allows programatic access);
-+//Caller can use uspt_batch_tracking_get_events_count() to determine the amount of memory they should allocate for
-+//@results
-+int uspt_batch_tracking_stop(page_fault_event_t* results, uint64_t len,bool* error_occured);
-+
-+void uspt_batch_tracking_handle_retrack(struct kvm_vcpu* vcpu,uint64_t current_fault_gfn);
-+
-+void uspt_batch_tracking_get_retrack_gfns(uint64_t** gfns, uint64_t* len,int * tracking_type);
-+
-+bool uspt_batch_tracking_in_progress(void);
-+#endif
-diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
-old mode 100644
-new mode 100755
-index f288b421b603..81b232132f66
---- a/include/uapi/linux/kvm.h
-+++ b/include/uapi/linux/kvm.h
-@@ -16,6 +16,78 @@
- 
- #define KVM_API_VERSION 12
- 
-+#define KVM_USPT_POLL_EVENT_NO_EVENT 1000
-+#define KVM_USPT_POLL_EVENT_GOT_EVENT 0
-+
-+
-+typedef struct {
-+    uint64_t id; //filled automatically
-+    uint64_t faulted_gpa;
-+    uint32_t error_code;
-+	bool have_rip_info;
-+	uint64_t rip;
-+	uint64_t ns_timestamp;
-+	bool have_retired_instructions;
-+	uint64_t retired_instructions;
-+} page_fault_event_t;
-+
-+typedef struct {
-+	int tracking_type;
-+	uint64_t expected_events;
-+	int perf_cpu;
-+	bool retrack;
-+} batch_track_config_t;
-+
-+typedef struct {
-+	uint64_t event_count;
-+} batch_track_event_count_t;
-+
-+typedef struct {
-+	page_fault_event_t* out_buf;
-+	uint64_t len;
-+	bool error_during_batch;
-+} batch_track_stop_and_get_t;
-+
-+typedef struct {
-+	int cpu; //cpu on which we want to read the counter
-+	uint64_t retired_instruction_count; //result param
-+} retired_instr_perf_t;
-+
-+typedef struct {
-+	int cpu; //cpu on which counter should be programmed
-+} retired_instr_perf_config_t;
-+
-+typedef struct {
-+	uint64_t gpa;
-+	uint64_t len;
-+	bool decrypt_with_host_key;
-+	int wbinvd_cpu; //-1: do not flush; else logical cpu on which we flush
-+	void* output_buffer;
-+}read_guest_memory_t;
-+
-+typedef struct {
-+    int pid;
-+	bool get_rip;
-+} userspace_ctx_t;
-+
-+
-+typedef struct {
-+    uint64_t id;
-+} ack_event_t;
-+
-+
-+typedef struct {
-+	uint64_t gpa;
-+	int track_mode;
-+} track_page_param_t;
-+
-+
-+typedef struct {
-+	int track_mode;
-+} track_all_pages_t;
-+
-+
-+
- /* *** Deprecated interfaces *** */
- 
- #define KVM_TRC_SHIFT           16
-@@ -921,6 +993,29 @@ struct kvm_ppc_resize_hpt {
- #define KVM_GET_EMULATED_CPUID	  _IOWR(KVMIO, 0x09, struct kvm_cpuid2)
- #define KVM_GET_MSR_FEATURE_INDEX_LIST    _IOWR(KVMIO, 0x0a, struct kvm_msr_list)
- 
-+
-+//
-+// SNP ATTACK IOCTLS
-+//
-+
-+#define KVM_TRACK_PAGE _IOWR(KVMIO, 0x20, track_page_param_t)
-+#define KVM_USPT_REGISTER_PID _IOWR(KVMIO, 0x21, userspace_ctx_t)
-+#define KVM_USPT_WAIT_AND_SEND _IO(KVMIO, 0x22)
-+#define KVM_USPT_POLL_EVENT _IOWR(KVMIO, 0x23, page_fault_event_t)
-+#define KVM_USPT_ACK_EVENT _IOWR(KVMIO, 0x24, ack_event_t)
-+#define KVM_READ_GUEST_MEMORY _IOWR(KVMIO, 0x25, read_guest_memory_t)
-+#define KVM_USPT_RESET _IO(KVMIO, 0x26)
-+#define KVM_USPT_TRACK_ALL _IOWR(KVMIO, 0x27, track_all_pages_t)
-+#define KVM_USPT_UNTRACK_ALL _IOWR(KVMIO, 0x28, track_all_pages_t)
-+#define KVM_USPT_SETUP_RETINSTR_PERF _IOWR(KVMIO, 0x30,retired_instr_perf_config_t)
-+#define KVM_USPT_READ_RETINSTR_PERF _IOWR(KVMIO,0x31, retired_instr_perf_t)
-+#define KVM_USPT_BATCH_TRACK_START _IOWR(KVMIO,0x32,batch_track_config_t)
-+#define KVM_USPT_BATCH_TRACK_STOP _IOWR(KVMIO,0x33,batch_track_stop_and_get_t)
-+#define KVM_USPT_BATCH_TRACK_EVENT_COUNT _IOWR(KVMIO,0x34,batch_track_event_count_t)
-+
-+
-+
-+
- /*
-  * Extension capability list.
-  */
-diff --git a/my-make-ccp-modules.sh b/my-make-ccp-modules.sh
-new file mode 100755
-index 000000000000..b5068c264ed0
---- /dev/null
-+++ b/my-make-ccp-modules.sh
-@@ -0,0 +1,24 @@
-+#/bin/sh
-+cores=$(nproc --all)
-+#sudo -u luca make distclean &&
-+#./my-configure-sev.sh &&
-+EXTRAVERSION=""
-+MODPATH="drivers/crypto/ccp"
-+make clean M="$MODPATH" &&
-+make -j $cores scripts &&
-+make -j $cores prepare &&
-+make -j $cores modules_prepare &&
-+cp /usr/src/linux-headers-`uname -r`/Module.symvers "$MODPATH"/Module.symvers  &&
-+cp /usr/src/linux-headers-`uname -r`/Module.symvers Module.symvers  &&
-+chown luca:luca "$MODPATH"/Module.symvers
-+cp "/boot/System.map-$(uname -r)" .
-+cp "/boot/System.map-$(uname -r)" "$MODPATH"
-+touch .scmversion &&
-+make -j $cores modules M="$MODPATH" LOCALVERSION= &&
-+make modules_install M="$MODPATH" LOCALVERSION=
-+
-+exit
-+
-+echo "Installing module file"
-+cp ./drivers/crypto/ccp/ccp.ko "/lib/modules/$(uname -r)/kernel/drivers/crypto/ccp/ccp.ko"
-+cp ./drivers/crypto/ccp/ccp-crypto.ko "/lib/modules/$(uname -r)/kernel/drivers/crypto/ccp/ccp-crypto.ko"
-diff --git a/my-make-kernel.sh b/my-make-kernel.sh
-new file mode 100755
-index 000000000000..0418f607cb43
---- /dev/null
-+++ b/my-make-kernel.sh
-@@ -0,0 +1,38 @@
-+#!/bin/bash
-+
-+run_cmd()
-+{
-+   echo "$*"
-+
-+   eval "$*" || {
-+      echo "ERROR: $*"
-+      exit 1
-+   }
-+}
-+
-+
-+[ -d linux-patches ] && {
-+
-+	for P in linux-patches/*.patch; do
-+		run_cmd patch -p1 -d linux < $P
-+	done
-+}
-+
-+MAKE="make -j $(getconf _NPROCESSORS_ONLN) LOCALVERSION="
-+
-+run_cmd $MAKE distclean
-+
-+	run_cmd cp /boot/config-$(uname -r) .config
-+	run_cmd ./scripts/config --set-str LOCALVERSION "-sev-step-snp"
-+	run_cmd ./scripts/config --disable LOCALVERSION_AUTO
-+	run_cmd ./scripts/config --disable CONFIG_DEBUG_INFO
-+#	run_cmd ./scripts/config --undefine CONFIG_SYSTEM_TRUSTED_KEYS
-+#	run_cmd ./scripts/config --undefine CONFIG_MODULE_SIG_KEY
-+
-+run_cmd $MAKE olddefconfig
-+
-+# Build
-+run_cmd $MAKE >/dev/null
-+
-+run_cmd $MAKE bindeb-pkg
-+
-diff --git a/my-make-kvm-modules.sh b/my-make-kvm-modules.sh
-new file mode 100755
-index 000000000000..22f1f95b063f
---- /dev/null
-+++ b/my-make-kvm-modules.sh
-@@ -0,0 +1,29 @@
-+#/bin/sh
-+cores=$(nproc --all)
-+#sudo -u luca make distclean &&
-+#./my-configure-sev.sh &&
-+EXTRAVERSION=""
-+make clean M=arch/x86/kvm/ &&
-+make -j $cores scripts &&
-+make -j $cores prepare &&
-+make -j $cores modules_prepare &&
-+cp /usr/src/linux-headers-`uname -r`/Module.symvers arch/x86/kvm/Module.symvers  &&
-+cp /usr/src/linux-headers-`uname -r`/Module.symvers Module.symvers  &&
-+chown luca:luca arch/x86/kvm/Module.symvers
-+cp "/boot/System.map-$(uname -r)" .
-+cp "/boot/System.map-$(uname -r)" arch/x86/kvm/
-+touch .scmversion &&
-+make -j $cores modules M=arch/x86/kvm/ LOCALVERSION= &&
-+make modules_install M=arch/x86/kvm/ LOCALVERSION= &&
-+
-+echo "Unload old modules"
-+modprobe -r kvm_amd kvm
-+cp ./arch/x86/kvm/kvm.ko "/lib/modules/$(uname -r)/kernel/arch/x86/kvm/"
-+cp ./arch/x86/kvm/kvm-amd.ko "/lib/modules/$(uname -r)/kernel/arch/x86/kvm/"
-+echo "Load new modules"
-+modprobe kvm
-+modprobe kvm-amd sev-snp=1 sev=1 sev-es=1
-+#insmod "/lib/modules/$(uname -r)/kernel/virt/lib/irqbypass.ko"
-+#insmod ./arch/x86/kvm/kvm.ko
-+#insmod "/lib/modules/$(uname -r)/kernel/drivers/crypto/ccp/ccp.ko"
-+#insmod ./arch/x86/kvm/kvm-amd.ko sev=1
 diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
-old mode 100644
-new mode 100755
-index f2a63cb2658b..ac5fc6c64b7e
+index f2a63cb2658b..bfe4a57bcc10 100644
 --- a/virt/kvm/kvm_main.c
 +++ b/virt/kvm/kvm_main.c
-@@ -67,9 +67,14 @@
- 
- #include <linux/kvm_dirty_ring.h>
- 
-+#include <linux/sev-step.h>
-+#include <linux/userspace_page_track_signals.h>
-+
+@@ -70,6 +70,10 @@
  /* Worst case buffer size needed for holding an integer. */
  #define ITOA_MAX_LEN 12
  
 +#include "../../arch/x86/kvm/svm/cachepc/kvm.h"
++#include "../../arch/x86/kvm/sevstep/sevstep.h"
++#include "../../arch/x86/kvm/sevstep/uspt.h"
 +
  MODULE_AUTHOR("Qumranet");
  MODULE_LICENSE("GPL");
  
-@@ -5792,6 +5797,8 @@ int kvm_init(void *opaque, unsigned vcpu_size, unsigned vcpu_align,
+@@ -5792,6 +5796,8 @@ int kvm_init(void *opaque, unsigned vcpu_size, unsigned vcpu_align,
  	r = kvm_vfio_ops_init();
  	WARN_ON(r);
  
@@ -2119,7 +453,7 @@ index f2a63cb2658b..ac5fc6c64b7e
  	return 0;
  
  out_unreg:
-@@ -5821,6 +5828,8 @@ void kvm_exit(void)
+@@ -5821,6 +5827,8 @@ void kvm_exit(void)
  {
  	int cpu;
  
diff --git a/sevstep/kvm.c b/sevstep/kvm.c
new file mode 100644
index 0000000..b6b0d49
--- /dev/null
+++ b/sevstep/kvm.c
@@ -0,0 +1,205 @@
+#include "kvm.h"
+
+#include <linux/types.h>
+
+bool
+__untrack_single_page(struct kvm_vcpu *vcpu, gfn_t gfn,
+	enum kvm_page_track_mode mode)
+{
+	int idx;
+	bool ret;
+	struct kvm_memory_slot *slot;
+
+	ret = false;
+	idx = srcu_read_lock(&vcpu->kvm->srcu);
+	if (mode == KVM_PAGE_TRACK_ACCESS) {
+		//printk("Removing gfn: %016llx from acess page track pool\n", gfn);
+	}
+	if (mode == KVM_PAGE_TRACK_WRITE) {
+		//printk("Removing gfn: %016llx from write page track pool\n", gfn);
+	}
+	slot = kvm_vcpu_gfn_to_memslot(vcpu, gfn);
+
+	if (slot != NULL && kvm_slot_page_track_is_active(vcpu->kvm, slot, gfn, mode)) {
+		write_lock(&vcpu->kvm->mmu_lock);
+		kvm_slot_page_track_remove_page(vcpu->kvm, slot, gfn, mode);
+		write_unlock(&vcpu->kvm->mmu_lock);
+		ret = true;
+	} else {
+		printk("Failed to untrack %016llx because ", gfn);
+		if (slot == NULL) {
+			printk(KERN_CONT "slot was	null");
+		} else if (!kvm_slot_page_track_is_active(vcpu->kvm, slot, gfn, mode)) {
+			printk(KERN_CONT "page track was not active");
+		}
+		printk(KERN_CONT "\n");
+	}
+	srcu_read_unlock(&vcpu->kvm->srcu, idx);
+	return ret;
+}
+EXPORT_SYMBOL(__untrack_single_page);
+
+bool
+__reset_accessed_on_page(struct kvm_vcpu *vcpu, gfn_t gfn)
+{
+	int idx;
+	bool ret;
+	struct kvm_memory_slot *slot;
+
+	ret = false;
+	idx = srcu_read_lock(&vcpu->kvm->srcu);
+	slot = kvm_vcpu_gfn_to_memslot(vcpu, gfn);
+	if( slot != NULL ) {
+		write_lock(&vcpu->kvm->mmu_lock);
+		//Vincent: The kvm mmu function now requires min_level
+		//We want all pages to protected so we do PG_LEVEL_4K
+		//https://patchwork.kernel.org/project/kvm/patch/20210416082511.2856-2-zhukeqian1@huawei.com/
+		sevstep_kvm_mmu_slot_gfn_protect(vcpu->kvm,slot,gfn,PG_LEVEL_4K,KVM_PAGE_TRACK_RESET_ACCESSED);
+		write_unlock(&vcpu->kvm->mmu_lock);
+		ret = true;
+	}
+	srcu_read_unlock(&vcpu->kvm->srcu, idx);
+	return ret;
+}
+EXPORT_SYMBOL(__reset_accessed_on_page);
+
+bool
+__clear_nx_on_page(struct kvm_vcpu *vcpu, gfn_t gfn)
+{
+	int idx;
+	bool ret;
+	struct kvm_memory_slot *slot;
+
+	ret = false;
+	idx = srcu_read_lock(&vcpu->kvm->srcu);
+	slot = kvm_vcpu_gfn_to_memslot(vcpu, gfn);
+	if( slot != NULL ) {
+		write_lock(&vcpu->kvm->mmu_lock);
+		//Vincent: The kvm mmu function now requires min_level
+		//We want all pages to protected so we do PG_LEVEL_4K
+		//https://patchwork.kernel.org/project/kvm/patch/20210416082511.2856-2-zhukeqian1@huawei.com/
+		sevstep_kvm_mmu_slot_gfn_protect(vcpu->kvm, slot, gfn,
+			PG_LEVEL_4K, KVM_PAGE_TRACK_RESET_EXEC);
+		write_unlock(&vcpu->kvm->mmu_lock);
+		ret = true;
+	}
+	srcu_read_unlock(&vcpu->kvm->srcu, idx);
+	return ret;
+}
+EXPORT_SYMBOL(__clear_nx_on_page);
+
+bool
+__track_single_page(struct kvm_vcpu *vcpu, gfn_t gfn,
+	enum kvm_page_track_mode mode)
+{
+	int idx;
+	bool ret;
+	struct kvm_memory_slot *slot;
+
+	ret = false;
+	idx = srcu_read_lock(&vcpu->kvm->srcu);
+	if (mode == KVM_PAGE_TRACK_ACCESS) {
+		//printk_ratelimited("Adding gfn: %016llx to acess page track pool\n", gfn);
+		//printk("Adding gfn: %016llx to acess page track pool\n", gfn);
+	}
+	if (mode == KVM_PAGE_TRACK_WRITE) {
+		//printk_ratelimited("Adding gfn: %016llx to write page track pool\n", gfn);
+	}
+	slot = kvm_vcpu_gfn_to_memslot(vcpu, gfn);
+	if (slot != NULL && !kvm_slot_page_track_is_active(vcpu->kvm,slot, gfn, mode)) {
+
+		write_lock(&vcpu->kvm->mmu_lock);
+		kvm_slot_page_track_add_page(vcpu->kvm, slot, gfn, mode);
+		write_unlock(&vcpu->kvm->mmu_lock);
+		ret = true;
+
+	} else {
+
+		printk("Failed to track %016llx because ", gfn);
+		if (slot == NULL) {
+			printk(KERN_CONT "slot was	null");
+		}
+		if (kvm_slot_page_track_is_active(vcpu->kvm, slot, gfn, mode)) {
+			printk(KERN_CONT "page is already tracked");
+		}
+		printk(KERN_CONT "\n");
+	}
+	srcu_read_unlock(&vcpu->kvm->srcu, idx);
+	return ret;
+}
+EXPORT_SYMBOL(__track_single_page);
+
+long
+kvm_start_tracking(struct kvm_vcpu *vcpu,enum kvm_page_track_mode mode )
+{
+	long count = 0;
+	u64 iterator, iterat_max;
+	struct kvm_memory_slot *slot;
+	int idx;
+
+	//Vincent: Memslots interface changed into a rb tree, see
+	//here: https://lwn.net/Articles/856392/
+	//and here: https://lore.kernel.org/all/cover.1632171478.git.maciej.szmigiero@oracle.com/T/#u
+	//Thus we use instead of
+	//iterat_max = vcpu->kvm->memslots[0]->memslots[0].base_gfn
+	//	     + vcpu->kvm->memslots[0]->memslots[0].npages;
+	struct rb_node *node;
+	struct kvm_memory_slot *first_memslot;
+	node = rb_last(&(vcpu->kvm->memslots[0]->gfn_tree));
+	first_memslot = container_of(node, struct kvm_memory_slot, gfn_node[0]);
+	iterat_max = first_memslot->base_gfn + first_memslot->npages;
+	for (iterator=0; iterator < iterat_max; iterator++)
+	{
+		idx = srcu_read_lock(&vcpu->kvm->srcu);
+		slot = kvm_vcpu_gfn_to_memslot(vcpu, iterator);
+		if ( slot != NULL  && !kvm_slot_page_track_is_active(vcpu->kvm, slot, iterator, mode)) {
+			write_lock(&vcpu->kvm->mmu_lock);
+			kvm_slot_page_track_add_page(vcpu->kvm, slot, iterator, mode);
+			write_unlock(&vcpu->kvm->mmu_lock);
+			count++;
+		}
+		srcu_read_unlock(&vcpu->kvm->srcu, idx);
+	}
+
+	return count;
+}
+EXPORT_SYMBOL(kvm_start_tracking);
+
+long
+kvm_stop_tracking(struct kvm_vcpu *vcpu,enum kvm_page_track_mode mode)
+{
+	long count = 0;
+	u64 iterator, iterat_max;
+	struct kvm_memory_slot *slot;
+	int idx;
+
+
+	//Vincent: Memslots interface changed into a rb tree, see
+	//here: https://lwn.net/Articles/856392/
+	//and here: https://lore.kernel.org/all/cover.1632171478.git.maciej.szmigiero@oracle.com/T/#u
+	//Thus we use instead of
+	//iterat_max = vcpu->kvm->memslots[0]->memslots[0].base_gfn
+	//	     + vcpu->kvm->memslots[0]->memslots[0].npages;
+	struct rb_node *node;
+	struct kvm_memory_slot *first_memslot;
+	node = rb_last(&(vcpu->kvm->memslots[0]->gfn_tree));
+	first_memslot = container_of(node, struct kvm_memory_slot, gfn_node[0]);
+	iterat_max = first_memslot->base_gfn + first_memslot->npages;
+	for (iterator=0; iterator < iterat_max; iterator++)
+	{
+		idx = srcu_read_lock(&vcpu->kvm->srcu);
+		slot = kvm_vcpu_gfn_to_memslot(vcpu, iterator);
+			//Vincent: I think see here https://patchwork.kernel.org/project/kvm/patch/20210924163152.289027-22-pbonzini@redhat.com/
+			if ( slot != NULL && kvm_slot_page_track_is_active(vcpu->kvm, slot, iterator, mode)) {
+				write_lock(&vcpu->kvm->mmu_lock);
+				kvm_slot_page_track_remove_page(vcpu->kvm, slot, iterator, mode);
+				write_unlock(&vcpu->kvm->mmu_lock);
+				count++;
+		}
+		srcu_read_unlock(&vcpu->kvm->srcu, idx);
+	}
+
+	return count;
+}
+EXPORT_SYMBOL(kvm_stop_tracking);
+
diff --git a/sevstep/kvm.h b/sevstep/kvm.h
new file mode 100644
index 0000000..35cb4d5
--- /dev/null
+++ b/sevstep/kvm.h
@@ -0,0 +1,4 @@
+#pragma once
+
+#include "sev-step.h"
+#include "uapi.h"
diff --git a/sevstep/mmu.c b/sevstep/mmu.c
new file mode 100644
index 0000000..4eefea2
--- /dev/null
+++ b/sevstep/mmu.c
@@ -0,0 +1,132 @@
+#include "../sevstep/sevstep.h"
+#include "../sevstep/uspt.h"
+
+void
+sevstep_uspt_page_fault_handle(struct kvm_vcpu *vcpu,
+	struct kvm_page_fault *fault)
+{
+	const int modes[] = {
+		KVM_PAGE_TRACK_WRITE,
+		KVM_PAGE_TRACK_ACCESS,
+		KVM_PAGE_TRACK_EXEC
+	};
+	uint64_t current_rip;
+	bool was_tracked;
+	int have_rip, i;
+	int send_err;
+
+	was_tracked = false;
+	for (i = 0; i < sizeof(modes) / sizeof(modes[0]); i++) {
+		if (kvm_slot_page_track_is_active(vcpu->kvm,
+				fault->slot, fault->gfn, modes[i])) {
+			__untrack_single_page(vcpu, fault->gfn, modes[i]);
+			was_tracked = true;
+		}
+	}
+
+	if (was_tracked) {
+		have_rip = false;
+		if (uspt_should_get_rip())
+			have_rip = sev_step_get_rip_kvm_vcpu(vcpu,&current_rip) == 0;
+		if (uspt_batch_tracking_in_progress()) {
+			send_err = uspt_batch_tracking_save(fault->gfn << PAGE_SHIFT,
+				fault->error_code, have_rip, current_rip);
+			if (send_err) {
+				printk_ratelimited(
+					"uspt_batch_tracking_save failed with %d\n"
+					"##########################\n", send_err);
+			}
+			uspt_batch_tracking_handle_retrack(vcpu, fault->gfn);
+			uspt_batch_tracking_inc_event_idx();
+		} else {
+			send_err = uspt_send_and_block(fault->gfn << PAGE_SHIFT,
+				fault->error_code, have_rip, current_rip);
+			if (send_err) {
+				printk("uspt_send_and_block failed with %d\n"
+					"##########################\n", send_err);
+			}
+		}
+	}
+}
+
+bool
+sevstep_spte_protect(u64 *sptep, bool pt_protect, enum kvm_page_track_mode mode)
+{
+	u64 spte = *sptep;
+	bool shouldFlush = false;
+
+	if (!is_writable_pte(spte) && !(pt_protect && is_mmu_writable_spte(spte)))
+		return false;
+
+	rmap_printk("spte %p %llx\n", sptep, *sptep);
+
+	if (pt_protect)
+		spte &= ~EPT_SPTE_MMU_WRITABLE;
+
+	if (mode == KVM_PAGE_TRACK_WRITE) {
+		spte = spte & ~PT_WRITABLE_MASK;
+		shouldFlush = true;
+	} else if (mode == KVM_PAGE_TRACK_RESET_ACCESSED) {
+		spte = spte & ~PT_ACCESSED_MASK;
+	} else if (mode == KVM_PAGE_TRACK_ACCESS) {
+		spte = spte & ~PT_PRESENT_MASK;
+		spte = spte & ~PT_WRITABLE_MASK;
+		spte = spte & ~PT_USER_MASK;
+		spte = spte | (0x1ULL << PT64_NX_SHIFT);
+		shouldFlush = true;
+	} else if (mode == KVM_PAGE_TRACK_EXEC) {
+		spte = spte | (0x1ULL << PT64_NX_SHIFT);
+		shouldFlush = true;
+	} else if (mode == KVM_PAGE_TRACK_RESET_EXEC) {
+		spte = spte & ~(0x1ULL << PT64_NX_SHIFT);
+		shouldFlush = true;
+	} else {
+		printk(KERN_WARNING "spte_protect was called with invalid mode"
+			"parameter %d\n",mode);
+	}
+	shouldFlush |= mmu_spte_update(sptep, spte);
+	return shouldFlush;
+}
+EXPORT_SYMBOL(sevstep_spte_protect);
+
+bool sevstep_rmap_protect(struct kvm_rmap_head *rmap_head,
+	bool pt_protect, enum kvm_page_track_mode mode)
+{
+	u64 *sptep;
+	struct rmap_iterator iter;
+	bool flush = false;
+
+	for_each_rmap_spte(rmap_head, &iter, sptep) {
+		flush |= sevstep_spte_protect(sptep, pt_protect, mode);
+	}
+
+	return flush;
+}
+EXPORT_SYMBOL(sevstep_rmap_protect);
+
+bool
+sevstep_kvm_mmu_slot_gfn_protect(struct kvm *kvm, struct kvm_memory_slot *slot,
+	uint64_t gfn, int min_level, enum kvm_page_track_mode mode)
+{
+	struct kvm_rmap_head *rmap_head;
+	bool protected;
+	int i;
+
+	protected = false;
+
+	if (kvm_memslots_have_rmaps(kvm)) {
+		for (i = min_level; i <= KVM_MAX_HUGEPAGE_LEVEL; ++i) {
+			rmap_head = gfn_to_rmap(gfn, i, slot);
+			protected |= sevstep_rmap_protect(rmap_head, true, mode);
+		}
+	}
+
+	if (is_tdp_mmu_enabled(kvm)) {
+		protected |= kvm_tdp_mmu_write_protect_gfn(kvm,
+			slot, gfn, min_level);
+	}
+
+	return protected;
+}
+EXPORT_SYMBOL(sevstep_kvm_mmu_slot_gfn_protect);
+
diff --git a/sevstep/sevstep.c b/sevstep/sevstep.c
new file mode 100644
index 0000000..3345e04
--- /dev/null
+++ b/sevstep/sevstep.c
@@ -0,0 +1,129 @@
+#include "sevstep.h"
+
+#include "mmu/mmu_internal.h"
+#include "mmu.h"
+
+#include "irq.h"
+#include "ioapic.h"
+#include "mmu.h"
+#include "mmu/tdp_mmu.h"
+#include "x86.h"
+#include "kvm_cache_regs.h"
+#include "kvm_emulate.h"
+#include "cpuid.h"
+#include "mmu/spte.h"
+
+
+#include <linux/kvm_host.h>
+#include <linux/types.h>
+#include <linux/string.h>
+#include <linux/mm.h>
+#include <linux/highmem.h>
+#include <linux/moduleparam.h>
+#include <linux/export.h>
+#include <linux/swap.h>
+#include <linux/hugetlb.h>
+#include <linux/compiler.h>
+#include <linux/srcu.h>
+#include <linux/slab.h>
+#include <linux/sched/signal.h>
+#include <linux/uaccess.h>
+#include <linux/hash.h>
+#include <linux/kern_levels.h>
+#include <linux/kthread.h>
+#include <linux/sev.h>
+#include <linux/mm.h>
+#include <linux/smp.h>
+#include <linux/vmalloc.h>
+#include <linux/slab.h>
+#include <linux/sched.h>
+
+#include "kvm_cache_regs.h"
+#include "svm/svm.h"
+
+struct kvm* main_vm;
+EXPORT_SYMBOL(main_vm);
+
+// used to store performance counter values; 6 counters, 2 readings per counter
+// TODO: static!
+uint64_t perf_reads[6][2];
+perf_ctl_config_t perf_configs[6];
+int perf_cpu;
+
+
+uint64_t
+perf_ctl_to_u64(perf_ctl_config_t * config)
+{
+	uint64_t result;
+
+	result = 0;
+	result |= config->EventSelect & 0xffULL;
+	result |= (config->UintMask & 0xffULL) << 8;
+	result |= (config->OsUserMode & 0x3ULL) << 16;
+	result |= (config->Edge & 0x1ULL ) << 18;
+	result |= (config->Int & 0x1ULL ) << 20;
+	result |= (config->En & 0x1ULL ) << 22;
+	result |= (config->Inv & 0x1ULL ) << 23;
+	result |= (config->CntMask & 0xffULL) << 24;
+	result |= ((config->EventSelect & 0xf00ULL) >> 8) << 32;
+	result |= (config->HostGuestOnly & 0x3ULL) << 40;
+
+	return result;
+
+}
+
+void
+write_ctl(perf_ctl_config_t * config, int cpu, uint64_t ctl_msr)
+{
+	wrmsrl_on_cpu(cpu, ctl_msr, perf_ctl_to_u64(config));
+}
+
+void
+read_ctr(uint64_t ctr_msr, int cpu, uint64_t* result)
+{
+	uint64_t tmp;
+
+	rdmsrl_on_cpu(cpu, ctr_msr, &tmp);
+	*result = tmp & ( (0x1ULL << 48) - 1);
+}
+
+void
+setup_perfs()
+{
+	int i;
+
+	perf_cpu = smp_processor_id();
+
+	for (i = 0; i < 6; i++) {
+		perf_configs[i].HostGuestOnly = 0x1; /* count only guest */
+		perf_configs[i].CntMask = 0x0;
+		perf_configs[i].Inv = 0x0;
+		perf_configs[i].En = 0x0;
+		perf_configs[i].Int = 0x0;
+		perf_configs[i].Edge = 0x0;
+		perf_configs[i].OsUserMode = 0x3; /* count userland and kernel events */
+	}
+
+	perf_configs[0].EventSelect = 0x0c0;
+	perf_configs[0].UintMask = 0x0;
+	perf_configs[0].En = 0x1;
+	write_ctl(&perf_configs[0],perf_cpu, CTL_MSR_0);
+
+	/*
+	 * programm l2d hit from data cache miss perf for
+	 * cpu_probe_pointer_chasing_inplace without counting thread.
+	 * N.B. that this time we count host events
+	 */
+	perf_configs[1].EventSelect = 0x064;
+	perf_configs[1].UintMask = 0x70;
+	perf_configs[1].En = 0x1;
+	perf_configs[1].HostGuestOnly = 0x2; /* count only host events */
+	write_ctl(&perf_configs[1],perf_cpu,CTL_MSR_1);
+}
+EXPORT_SYMBOL(setup_perfs);
+
+int
+sev_step_get_rip_kvm_vcpu(struct kvm_vcpu* vcpu,uint64_t *rip)
+{
+	return 0;
+}
diff --git a/sevstep/sevstep.h b/sevstep/sevstep.h
new file mode 100644
index 0000000..86d25f7
--- /dev/null
+++ b/sevstep/sevstep.h
@@ -0,0 +1,67 @@
+#pragma once
+
+#include <linux/types.h>
+#include <linux/spinlock_types.h>
+#include <asm/atomic.h>
+#include <linux/kvm_types.h>
+#include <asm/kvm_page_track.h>
+
+#include <linux/kvm_host.h>
+#include <linux/pid.h>
+#include <linux/psp-sev.h>
+
+
+#define CTL_MSR_0  0xc0010200ULL
+#define CTL_MSR_1  0xc0010202ULL
+#define CTL_MSR_2  0xc0010204ULL
+#define CTL_MSR_3  0xc0010206ULL
+#define CTL_MSR_4  0xc0010208ULL
+#define CTL_MSR_5  0xc001020aULL
+
+#define CTR_MSR_0  0xc0010201ULL
+#define CTR_MSR_1  0xc0010203ULL
+#define CTR_MSR_2  0xc0010205ULL
+#define CTR_MSR_3  0xc0010207ULL
+#define CTR_MSR_4  0xc0010209ULL
+#define CTR_MSR_5  0xc001020bULL
+
+typedef struct {
+	uint64_t HostGuestOnly;
+	uint64_t CntMask;
+	uint64_t Inv;
+	uint64_t En;
+	uint64_t Int;
+	uint64_t Edge;
+	uint64_t OsUserMode;
+	uint64_t UintMask;
+	uint64_t EventSelect; //12 bits in total split in [11:8] and [7:0]
+
+} perf_ctl_config_t;
+
+extern struct kvm* main_vm;
+
+bool sevstep_spte_protect(u64 *sptep,
+	bool pt_protect, enum kvm_page_track_mode mode);
+bool sevstep_rmap_protect(struct kvm_rmap_head *rmap_head,
+	bool pt_protect, enum kvm_page_track_mode mode);
+bool sevstep_kvm_mmu_slot_gfn_protect(struct kvm *kvm, struct kvm_memory_slot *slot,
+	uint64_t gfn, int min_level, enum kvm_page_track_mode mode);
+
+bool __untrack_single_page(struct kvm_vcpu *vcpu, gfn_t gfn,
+	enum kvm_page_track_mode mode);
+bool __track_single_page(struct kvm_vcpu *vcpu, gfn_t gfn,
+	enum kvm_page_track_mode mode);
+bool __reset_accessed_on_page(struct kvm_vcpu *vcpu, gfn_t gfn);
+bool __clear_nx_on_page(struct kvm_vcpu *vcpu, gfn_t gfn);
+
+long kvm_start_tracking(struct kvm_vcpu *vcpu, enum kvm_page_track_mode mode);
+long kvm_stop_tracking(struct kvm_vcpu *vcpu, enum kvm_page_track_mode mode);
+void sev_step_handle_callback(void);
+
+uint64_t perf_ctl_to_u64(perf_ctl_config_t *config);
+void write_ctl(perf_ctl_config_t *config, int cpu, uint64_t ctl_msr);
+void read_ctr(uint64_t ctr_msr, int cpu, uint64_t *result);
+
+void setup_perfs(void);
+
+int sev_step_get_rip_kvm_vcpu(struct kvm_vcpu *vcpu, uint64_t *rip);
diff --git a/sevstep/uapi.h b/sevstep/uapi.h
new file mode 100644
index 0000000..e41a036
--- /dev/null
+++ b/sevstep/uapi.h
@@ -0,0 +1,86 @@
+#pragma once
+
+#include <linux/ioctl.h>
+#include <linux/types.h>
+
+#define KVM_TRACK_PAGE _IOWR(KVMIO, 0x20, track_page_param_t)
+#define KVM_USPT_REGISTER_PID _IOWR(KVMIO, 0x21, userspace_ctx_t)
+#define KVM_USPT_WAIT_AND_SEND _IO(KVMIO, 0x22)
+#define KVM_USPT_POLL_EVENT _IOWR(KVMIO, 0x23, page_fault_event_t)
+#define KVM_USPT_ACK_EVENT _IOWR(KVMIO, 0x24, ack_event_t)
+#define KVM_READ_GUEST_MEMORY _IOWR(KVMIO, 0x25, read_guest_memory_t)
+#define KVM_USPT_RESET _IO(KVMIO, 0x26)
+#define KVM_USPT_TRACK_ALL _IOWR(KVMIO, 0x27, track_all_pages_t)
+#define KVM_USPT_UNTRACK_ALL _IOWR(KVMIO, 0x28, track_all_pages_t)
+#define KVM_USPT_SETUP_RETINSTR_PERF _IOWR(KVMIO, 0x30,retired_instr_perf_config_t)
+#define KVM_USPT_READ_RETINSTR_PERF _IOWR(KVMIO,0x31, retired_instr_perf_t)
+#define KVM_USPT_BATCH_TRACK_START _IOWR(KVMIO,0x32,batch_track_config_t)
+#define KVM_USPT_BATCH_TRACK_STOP _IOWR(KVMIO,0x33,batch_track_stop_and_get_t)
+#define KVM_USPT_BATCH_TRACK_EVENT_COUNT _IOWR(KVMIO,0x34,batch_track_event_count_t)
+
+#define KVM_USPT_POLL_EVENT_NO_EVENT 1000
+#define KVM_USPT_POLL_EVENT_GOT_EVENT 0
+
+typedef struct {
+	uint64_t id; // filled automatically
+	uint64_t faulted_gpa;
+	uint32_t error_code;
+	bool have_rip_info;
+	uint64_t rip;
+	uint64_t ns_timestamp;
+	bool have_retired_instructions;
+	uint64_t retired_instructions;
+} page_fault_event_t;
+
+typedef struct {
+	int tracking_type;
+	uint64_t expected_events;
+	int perf_cpu;
+	bool retrack;
+} batch_track_config_t;
+
+typedef struct {
+	uint64_t event_count;
+} batch_track_event_count_t;
+
+typedef struct {
+	page_fault_event_t* out_buf;
+	uint64_t len;
+	bool error_during_batch;
+} batch_track_stop_and_get_t;
+
+typedef struct {
+	int cpu; // cpu on which we want to read the counter
+	uint64_t retired_instruction_count; // result param
+} retired_instr_perf_t;
+
+typedef struct {
+	int cpu; // cpu on which counter should be programmed
+} retired_instr_perf_config_t;
+
+typedef struct {
+	uint64_t gpa;
+	uint64_t len;
+	bool decrypt_with_host_key;
+	int wbinvd_cpu; // -1: do not flush; else logical cpu on which we flush
+	void* output_buffer;
+} read_guest_memory_t;
+
+typedef struct {
+    int pid;
+	bool get_rip;
+} userspace_ctx_t;
+
+typedef struct {
+	uint64_t id;
+} ack_event_t;
+
+typedef struct {
+	uint64_t gpa;
+	int track_mode;
+} track_page_param_t;
+
+typedef struct {
+	int track_mode;
+} track_all_pages_t;
+
diff --git a/sevstep/uspt.c b/sevstep/uspt.c
new file mode 100644
index 0000000..f7b329d
--- /dev/null
+++ b/sevstep/uspt.c
@@ -0,0 +1,503 @@
+#include "uspt.h"
+#include "sevstep.h"
+
+#include <linux/kvm.h>
+#include <linux/timekeeping.h>
+#include <linux/uaccess.h>
+#include <linux/types.h>
+#include <linux/vmalloc.h>
+#include <linux/printk.h>
+#include <linux/ratelimit.h>
+
+#define ARRLEN(x) (sizeof(x)/sizeof((x)[0]))
+
+typedef struct {
+	bool is_active;
+	int tracking_type;
+	bool retrack;
+
+	int perf_cpu;
+
+	uint64_t gfn_retrack_backlog[10];
+	int gfn_retrack_backlog_next_idx;
+
+	page_fault_event_t * events;
+	uint64_t event_next_idx;
+	uint64_t events_size;
+
+	bool error_occured;
+} batch_track_state_t;
+
+// crude sync mechanism. don't know a good way to act on errors yet.
+uint64_t last_sent_event_id = 1;
+uint64_t last_acked_event_id = 1;
+DEFINE_RWLOCK(event_lock);
+
+page_fault_event_t sent_event;
+static int have_event = 0;
+
+static bool get_rip = true;
+
+static int inited = 0;
+
+DEFINE_SPINLOCK(batch_track_state_lock);
+static batch_track_state_t batch_track_state;
+
+typedef struct {
+	uint64_t idx_for_last_perf_reading;
+	uint64_t last_perf_reading;
+	uint64_t delta_valid_idx;
+	uint64_t delta;
+} perf_state_t;
+
+perf_state_t perf_state;
+
+
+void
+uspt_clear(void)
+{
+	write_lock(&event_lock);
+	inited = 0;
+	last_sent_event_id = 1;
+	last_acked_event_id = 1;
+	have_event = 0;
+	get_rip = false;
+	write_unlock(&event_lock);
+}
+
+int
+uspt_initialize(int pid,bool should_get_rip)
+{
+	write_lock(&event_lock);
+	inited = 1;
+	last_sent_event_id = 1;
+	last_acked_event_id = 1;
+	have_event = 0;
+	get_rip = should_get_rip;
+	write_unlock(&event_lock);
+
+	return 0;
+}
+
+int
+uspt_is_initialiized()
+{
+	return inited;
+}
+
+bool
+uspt_should_get_rip()
+{
+	bool tmp;
+
+	read_lock(&event_lock);
+	tmp = get_rip;
+	read_unlock(&event_lock);
+
+	return tmp;
+}
+
+int
+uspt_send_and_block(uint64_t faulted_gpa, uint32_t error_code,
+	bool have_rip, uint64_t rip)
+{
+	ktime_t abort_after;
+	page_fault_event_t message_for_user;
+
+	read_lock(&event_lock);
+	if (!uspt_is_initialiized()) {
+		printk("userspace_page_track_signals: "
+			"uspt_send_and_block : ctx not initialized!\n");
+		read_unlock(&event_lock);
+		return 1;
+	}
+	read_unlock(&event_lock);
+
+	write_lock(&event_lock);
+	 if (last_sent_event_id != last_acked_event_id) {
+		printk("event id_s out of sync, aborting. Fix this later\n");
+		write_unlock(&event_lock);
+		return 1;
+	} else {
+		// TODO: handle overflow
+		last_sent_event_id++;
+	}
+	message_for_user.id = last_sent_event_id;
+	message_for_user.faulted_gpa = faulted_gpa;
+	message_for_user.error_code = error_code;
+	message_for_user.have_rip_info = have_rip;
+	message_for_user.rip = rip;
+	message_for_user.ns_timestamp = ktime_get_real_ns();
+	message_for_user.have_retired_instructions = false;
+
+	// for poll based system;
+	have_event = 1;
+	sent_event = message_for_user;
+	// printk("uspt_send_and_block sending event %llu\n",sent_event.id);
+
+	write_unlock(&event_lock);
+
+	// wait for ack, but with timeout. Otherwise small bugs in userland
+	// easily lead to a kernel hang
+	abort_after = ktime_get() + 1000000000ULL; // 1 sec in nanosecond
+	while (!uspt_is_event_done(sent_event.id)) {
+		if (ktime_get() > abort_after) {
+			printk("Waiting for ack of event %llu timed out, continuing\n",sent_event.id);
+			return 3;
+		}
+	}
+
+	return 0;
+}
+
+int
+uspt_is_event_done(uint64_t id)
+{
+	int res;
+
+	read_lock(&event_lock);
+	res = last_acked_event_id >= id;
+	read_unlock(&event_lock);
+
+	return res;
+}
+
+int
+uspt_handle_poll_event(page_fault_event_t* userpace_mem)
+{
+	int err;
+
+	// most of the time we won't have an event
+	read_lock(&event_lock);
+	if (!have_event) {
+		read_unlock(&event_lock);
+		return KVM_USPT_POLL_EVENT_NO_EVENT;
+	}
+	read_unlock(&event_lock);
+
+	write_lock(&event_lock);
+	if (have_event) {
+		err = copy_to_user(userpace_mem,
+			&sent_event, sizeof(page_fault_event_t));
+		have_event = 0;
+	} else {
+		err = KVM_USPT_POLL_EVENT_NO_EVENT;
+	}
+	write_unlock(&event_lock);
+
+	return err;
+}
+
+static int
+_uspt_handle_ack_event(uint64_t id)
+{
+	int err = 0;
+
+	write_lock(&event_lock);
+	if (id == last_sent_event_id) {
+		last_acked_event_id = last_sent_event_id;
+	} else {
+		err = 1;
+		printk("last sent event id is %llu but received ack for %llu\n",last_sent_event_id,id);
+	}
+	write_unlock(&event_lock);
+
+	return err;
+}
+
+int
+uspt_handle_ack_event_ioctl(ack_event_t event)
+{
+	return _uspt_handle_ack_event(event.id);
+}
+
+// setup perf_state and program retired instruction performance counter
+void
+_perf_state_setup_retired_instructions(void)
+{
+	perf_ctl_config_t retired_instructions_perf_config;
+	retired_instructions_perf_config.HostGuestOnly = 0x1; // 0x1 means: count only guest
+	retired_instructions_perf_config.CntMask = 0x0;
+	retired_instructions_perf_config.Inv = 0x0;
+	retired_instructions_perf_config.Int = 0x0;
+	retired_instructions_perf_config.Edge = 0x0;
+	retired_instructions_perf_config.OsUserMode = 0x3; // 0x3 means: count kern and user events
+	retired_instructions_perf_config.EventSelect = 0x0c0;
+	retired_instructions_perf_config.UintMask = 0x0;
+	retired_instructions_perf_config.En = 0x1;
+	write_ctl(&retired_instructions_perf_config,batch_track_state.perf_cpu, CTL_MSR_0);
+}
+
+
+// get retired instructions between current_event_idx-1 and current_event_idx
+// value is cached for multiple calls to the same current_event_idx
+uint64_t
+_perf_state_update_and_get_delta(uint64_t current_event_idx)
+{
+	uint64_t current_value;
+
+	// check if value is "cached"
+	if (perf_state.delta_valid_idx == current_event_idx) {
+		if (current_event_idx == 0) {
+			read_ctr(CTR_MSR_0, batch_track_state.perf_cpu, &current_value);
+			perf_state.idx_for_last_perf_reading = current_event_idx;
+			perf_state.last_perf_reading = current_event_idx;
+		}
+		return perf_state.delta;
+	}
+
+	// otherwise update, but logic is only valid for two consecutive events
+	if (current_event_idx != perf_state.idx_for_last_perf_reading+1) {
+		printk_ratelimited(KERN_CRIT "_perf_state_update_and_get_delta: "
+			"last reading was for idx %llu but was queried for %llu\n",
+			perf_state.idx_for_last_perf_reading, current_event_idx);
+	}
+
+	read_ctr(CTR_MSR_0, batch_track_state.perf_cpu, &current_value);
+	perf_state.delta = (current_value - perf_state.last_perf_reading);
+	perf_state.delta_valid_idx = current_event_idx;
+
+	perf_state.idx_for_last_perf_reading = current_event_idx;
+	perf_state.last_perf_reading = current_value;
+
+	return perf_state.delta;
+}
+
+void
+uspt_batch_tracking_inc_event_idx(void)
+{
+	spin_lock(&batch_track_state_lock);
+	batch_track_state.event_next_idx++;
+	spin_unlock(&batch_track_state_lock);
+}
+
+int
+uspt_batch_tracking_start(int tracking_type,uint64_t expected_events,
+	int perf_cpu, bool retrack)
+{
+	page_fault_event_t* events;
+	uint64_t buffer_size, i;
+
+	spin_lock(&batch_track_state_lock);
+	if (batch_track_state.is_active) {
+		printk("userspace_page_track_signals: overwriting "
+			"active batch track config!\n");
+		if (batch_track_state.events != NULL ) {
+			vfree(batch_track_state.events);
+		}
+	}
+	batch_track_state.is_active = false;
+	spin_unlock(&batch_track_state_lock);
+
+	buffer_size = expected_events * sizeof(page_fault_event_t);
+	printk("uspt_batch_tracking_start trying to alloc %llu "
+		"bytes buffer for events\n", buffer_size);
+	events = vmalloc(buffer_size);
+	if (events  == NULL) {
+		printk("userspace_page_track_signals: "
+			"faperf_cpuiled to alloc %llu bytes for event buffer\n",
+			buffer_size);
+		return 1; // note: lock not held here
+	}
+
+	// access each element once to force them into memory, improving performance
+	// during tracking
+	for (i = 0; i < expected_events * sizeof(page_fault_event_t); i++) {
+		((volatile uint8_t*)events)[i] = 0;
+	}
+
+	perf_state.idx_for_last_perf_reading = 0;
+	perf_state.last_perf_reading = 0;
+	perf_state.delta_valid_idx = 0;
+	perf_state.delta = 0;
+	_perf_state_setup_retired_instructions();
+
+	spin_lock(&batch_track_state_lock);
+
+	batch_track_state.perf_cpu = perf_cpu;
+	batch_track_state.retrack = retrack;
+
+	batch_track_state.events = events;
+	batch_track_state.event_next_idx = 0;
+	batch_track_state.events_size = expected_events;
+
+	batch_track_state.gfn_retrack_backlog_next_idx = 0;
+	batch_track_state.tracking_type = tracking_type;
+	batch_track_state.error_occured = false;
+
+	batch_track_state.is_active = true;
+
+	spin_unlock(&batch_track_state_lock);
+
+	return 0;
+}
+
+void
+uspt_batch_tracking_handle_retrack(struct kvm_vcpu* vcpu,
+	uint64_t current_fault_gfn)
+{
+	uint64_t ret_instr_delta;
+	int i, next_idx;
+
+	spin_lock(&batch_track_state_lock);
+
+	if (!batch_track_state.retrack) {
+		spin_unlock(&batch_track_state_lock);
+		return;
+	}
+
+	if (smp_processor_id() != batch_track_state.perf_cpu) {
+		printk("uspt_batch_tracking_handle_retrack: perf was "
+			"programmed on logical cpu %d but handler was called "
+			"on %d. Did you forget to pin the vcpu thread?\n",
+			batch_track_state.perf_cpu, smp_processor_id());
+	}
+	ret_instr_delta = _perf_state_update_and_get_delta(batch_track_state.event_next_idx);
+
+
+	// faulting instructions is probably the same as on last fault
+	// try to add current fault to retrack log and return
+	// for first event idx we do not have a valid ret_instr_delta.
+	// Retracking for the frist time is fine, if we loop, we end up here
+	// again but with a valid delta on one of the next event
+	if( (ret_instr_delta < 2) && ( batch_track_state.event_next_idx != 0) ) {
+		next_idx = batch_track_state.gfn_retrack_backlog_next_idx;
+		if (next_idx >= ARRLEN(batch_track_state.gfn_retrack_backlog)) {
+			printk("uspt_batch_tracking_handle_retrack: retrack "
+				"backlog full, dropping retrack for fault "
+				"at 0x%llx\n", current_fault_gfn);
+		} else {
+			batch_track_state.gfn_retrack_backlog[next_idx] = current_fault_gfn;
+			batch_track_state.gfn_retrack_backlog_next_idx++;
+		}
+
+		spin_unlock(&batch_track_state_lock);
+		return;
+	}
+
+	/* made progress, retrack everything in backlog and reset idx */
+	for (i = 0; i < batch_track_state.gfn_retrack_backlog_next_idx; i++) {
+		__track_single_page(vcpu,
+			batch_track_state.gfn_retrack_backlog[i],
+			batch_track_state.tracking_type);
+	}
+
+	/* add current fault to list */
+	batch_track_state.gfn_retrack_backlog[0] = current_fault_gfn;
+	batch_track_state.gfn_retrack_backlog_next_idx = 1;
+
+	spin_unlock(&batch_track_state_lock);
+
+}
+
+int
+uspt_batch_tracking_save(uint64_t faulted_gpa, uint32_t error_code,
+	bool have_rip, uint64_t rip)
+{
+	uint64_t ret_instr_delta;
+	page_fault_event_t* event;
+
+	spin_lock(&batch_track_state_lock);
+
+	if (!batch_track_state.is_active) {
+		printk_ratelimited("userspace_page_track_signals: got save but batch tracking is not active!\n");
+		batch_track_state.error_occured = true;
+		spin_unlock(&batch_track_state_lock);
+		return 1;
+	}
+
+
+	if (batch_track_state.event_next_idx >= batch_track_state.events_size) {
+		printk_ratelimited("userspace_page_track_signals: events buffer is full!\n");
+		batch_track_state.error_occured = true;
+		spin_unlock(&batch_track_state_lock);
+		return 1;
+	}
+
+	if (smp_processor_id() != batch_track_state.perf_cpu) {
+		printk("uspt_batch_tracking_handle_retrack: perf was "
+			"programmed on logical cpu %d but handler was called "
+			"on %d. Did you forget to pin the vcpu thread?\n",
+			batch_track_state.perf_cpu, smp_processor_id());
+	}
+	ret_instr_delta = _perf_state_update_and_get_delta(batch_track_state.event_next_idx);
+
+
+	if (batch_track_state.events == NULL) {
+		printk(KERN_CRIT "userspace_page_track_signals: events buf was "
+			"NULL but \"is_active\" was set! This should never happen!!!\n");
+		spin_unlock(&batch_track_state_lock);
+		return 1;
+	}
+
+	event = &batch_track_state.events[batch_track_state.event_next_idx];
+	event->id = batch_track_state.event_next_idx;
+	event->faulted_gpa = faulted_gpa;
+	event->error_code = error_code;
+	event->have_rip_info = have_rip;
+	event->rip = rip;
+	event->ns_timestamp = ktime_get_real_ns();
+	event->have_retired_instructions = true;
+	event->retired_instructions = ret_instr_delta;
+
+	// old inc was here
+
+	if (batch_track_state.gfn_retrack_backlog_next_idx
+			> ARRLEN(batch_track_state.gfn_retrack_backlog)) {
+		printk_ratelimited("userspace_page_track_signals: "
+			"gfn retrack backlog overflow!\n");
+		batch_track_state.error_occured = true;
+		spin_unlock(&batch_track_state_lock);
+		return 1;
+	}
+
+	spin_unlock(&batch_track_state_lock);
+	return 0;
+}
+
+int
+uspt_batch_tracking_stop(page_fault_event_t* results, uint64_t len, bool* error_occured)
+{
+	spin_lock(&batch_track_state_lock);
+	if (!batch_track_state.is_active) {
+		printk("userspace_page_track_signals: batch tracking not active\n");
+		spin_unlock(&batch_track_state_lock);
+		return 1;
+
+	}
+	batch_track_state.is_active = false;
+
+	if (len > batch_track_state.event_next_idx) {
+		printk("userspace_page_track_signals: requested %llu "
+			"events but got only %llu\n",
+			len, batch_track_state.event_next_idx);
+		spin_unlock(&batch_track_state_lock);
+		return 1;
+	}
+
+	memcpy(results,batch_track_state.events, len*sizeof(page_fault_event_t));
+	vfree(batch_track_state.events);
+
+	*error_occured = batch_track_state.error_occured;
+
+	spin_unlock(&batch_track_state_lock);
+
+	return 0;
+}
+
+uint64_t
+uspt_batch_tracking_get_events_count()
+{
+	uint64_t buf;
+	spin_lock(&batch_track_state_lock);
+	buf = batch_track_state.event_next_idx;
+	spin_unlock(&batch_track_state_lock);
+
+	return buf;
+}
+
+bool
+uspt_batch_tracking_in_progress()
+{
+	return batch_track_state.is_active;
+}
diff --git a/sevstep/uspt.h b/sevstep/uspt.h
new file mode 100644
index 0000000..7c34996
--- /dev/null
+++ b/sevstep/uspt.h
@@ -0,0 +1,49 @@
+#pragma once
+
+#include "uapi.h"
+
+#include <linux/kvm.h>
+#include <linux/kvm_host.h>
+#include <linux/types.h>
+
+
+int uspt_initialize(int pid,bool should_get_rip);
+int uspt_is_initialiized(void);
+void uspt_clear(void);
+
+bool uspt_should_get_rip(void);
+
+int uspt_send_and_block(uint64_t faulted_gpa, uint32_t error_code,
+	bool have_rip, uint64_t rip);
+
+int uspt_is_event_done(uint64_t id);
+
+/* prepare next event based on faulted_gpa and error_code. Notify process
+ * behind pid_number. Event must be polled id is result param with the id
+ * used for the event. Can be used to call uspt_is_event_done */
+int uspt_send_notification(int pid_number, uint64_t faulted_gpa,
+	uint32_t error_code, uint64_t *id);
+
+/* copy next event to userpace_mem */
+int uspt_handle_poll_event(page_fault_event_t* userpace_mem);
+
+/* acknowledge receival of event to event handling logic */
+int uspt_handle_ack_event_ioctl(ack_event_t event);
+
+/* should be called after "uspt_batch_tracking_save",
+ * "uspt_batch_tracking_handle_retrack" and any future custom logic
+ * for an event is processed */
+void uspt_batch_tracking_inc_event_idx(void);
+int uspt_batch_tracking_start(int tracking_type, uint64_t expected_events, int perf_cpu, bool retrack);
+int uspt_batch_tracking_save(uint64_t faulted_gpa, uint32_t error_code, bool have_rip, uint64_t rip);
+uint64_t uspt_batch_tracking_get_events_count(void);
+
+/* Stops batch tracking on copies the first @len events into @result.
+ * If an error occured at some point during the batch tracking,
+ * error_occured is set(there should also be a dmesg, but this allows programatic access);
+ * Caller can use uspt_batch_tracking_get_events_count() to determine the amount
+ * of memory they should allocate for @results */
+int uspt_batch_tracking_stop(page_fault_event_t *results, uint64_t len, bool *error_occured);
+void uspt_batch_tracking_handle_retrack(struct kvm_vcpu *vcpu, uint64_t current_fault_gfn);
+void uspt_batch_tracking_get_retrack_gfns(uint64_t **gfns, uint64_t *len, int *tracking_type);
+bool uspt_batch_tracking_in_progress(void);
diff --git a/test/access.c b/test/access.c
old mode 100755
new mode 100644
index 22e2fb8..1e38e1e
--- a/test/access.c
+++ b/test/access.c
@@ -1,4 +1,4 @@
-#include "cachepc_user.h"
+#include "cachepc/uapi.h"
 
 #include <sys/ioctl.h>
 #include <stdlib.h>
diff --git a/test/eviction.c b/test/eviction.c
old mode 100755
new mode 100644
index e68132b..9fb57b5
--- a/test/eviction.c
+++ b/test/eviction.c
@@ -1,4 +1,4 @@
-#include "cachepc_user.h"
+#include "cachepc/uapi.h"
 
 #include <sys/ioctl.h>
 #include <fcntl.h>
diff --git a/test/kvm.c b/test/kvm.c
old mode 100755
new mode 100644
index 42d7f5a..cd0dd4d
--- a/test/kvm.c
+++ b/test/kvm.c
@@ -1,7 +1,6 @@
-/* for CPU_ZERO macros.. */
 #define _GNU_SOURCE
 
-#include "cachepc_user.h"
+#include "cachepc/uapi.h"
 
 #include <linux/kvm.h>
 #include <sys/syscall.h>
diff --git a/test/sev-es.c b/test/sev-es.c
old mode 100755
new mode 100644
index 17cb72c..f2a6f5c
--- a/test/sev-es.c
+++ b/test/sev-es.c
@@ -1,7 +1,6 @@
-/* for CPU_ZERO macros.. */
 #define _GNU_SOURCE
 
-#include "cachepc_user.h"
+#include "cachepc/uapi.h"
 
 #include <linux/psp-sev.h>
 #include <linux/kvm.h>
diff --git a/test/sev.c b/test/sev.c
old mode 100755
new mode 100644
index e6da94c..73bb91f
--- a/test/sev.c
+++ b/test/sev.c
@@ -1,7 +1,6 @@
-/* for CPU_ZERO macros.. */
 #define _GNU_SOURCE
 
-#include "cachepc_user.h"
+#include "cachepc/uapi.h"
 
 #include <linux/psp-sev.h>
 #include <linux/kvm.h>
diff --git a/test/sevstep.c b/test/sevstep.c
new file mode 100644
index 0000000..3ca7f03
--- /dev/null
+++ b/test/sevstep.c
@@ -0,0 +1,32 @@
+#include "sevstep/uapi.h"
+
+#include <linux/kvm.h>
+#include <sys/ioctl.h>
+
+#include <err.h>
+#include <stdlib.h>
+#include <stdio.h>
+
+int
+main(int argc, const char **argv)
+{
+	track_all_pages_t tracking;
+	int ret, fd;
+
+	fd = open("/proc/cachepc");
+	if (!fd) err(1, "open");
+
+	tracking.track_mode = KVM_PAGE_TRACK_ACCESS;
+	ret = ioctl(fd, KVM_USPT_TRACK_ALL, &tracking); 
+	if (ret == -1) err(1, "ioctl TRACK_ALL ACCESS");
+
+
+	tracking.track_mode = KVM_PAGE_TRACK_RESET_ACCESSED;
+	ret = ioctl(fd, KVM_USPT_TRACK_ALL, &tracking); 
+	if (ret == -1) err(1, "ioctl TRACK_ALL RESET_ACCESSED");
+
+	ret = ioctl(fd, KVM_USPT_UNTRACK_ALL, &tracking); 
+	if (ret == -1) err(1, "ioctl UNTRACK_ALL");
+
+	close(fd);
+}
-- 
cgit v1.2.3-71-gd317