summaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rwxr-xr-xsrc/asm.h72
-rwxr-xr-xsrc/cache_types.h66
-rwxr-xr-xsrc/cachepc.c221
-rwxr-xr-xsrc/cachepc.h135
-rwxr-xr-xsrc/device_conf.h29
-rw-r--r--src/util.c40
-rw-r--r--src/util.h9
7 files changed, 572 insertions, 0 deletions
diff --git a/src/asm.h b/src/asm.h
new file mode 100755
index 0000000..9509952
--- /dev/null
+++ b/src/asm.h
@@ -0,0 +1,72 @@
+#pragma once
+
+#include <linux/kernel.h>
+
+#define CPUID_AFFECTED_REGS "rax", "rbx", "rcx", "rdx"
+
+__attribute__((always_inline))
+static inline uint64_t cachepc_readpmc(uint64_t event);
+
+__attribute__((always_inline))
+static inline void cachepc_cpuid(void);
+
+__attribute__((always_inline))
+static inline void cachepc_lfence(void);
+
+__attribute__((always_inline))
+static inline void cachepc_sfence(void);
+
+__attribute__((always_inline))
+static inline void cachepc_mfence(void);
+
+uint64_t
+cachepc_readpmc(uint64_t event)
+{
+ uint32_t lo, hi;
+
+ asm volatile (
+ "mov %[event], %%rcx\t\n"
+ "rdpmc\t\n"
+ : "=a" (lo), "=d" (hi)
+ : [event] "r" (event)
+ );
+
+ return ((uint64_t) hi << 32) | lo;
+}
+
+void
+cachepc_cpuid(void)
+{
+ asm volatile(
+ "mov $0x80000005, %%eax\n\t"
+ "cpuid\n\t"
+ ::: CPUID_AFFECTED_REGS
+ );
+}
+
+void
+cachepc_lfence(void)
+{
+ asm volatile(
+ "lfence\n\t"
+ ::
+ );
+}
+
+void
+cachepc_sfence(void)
+{
+ asm volatile(
+ "sfence\n\t"
+ ::
+ );
+}
+
+void
+cachepc_mfence(void)
+{
+ asm volatile(
+ "mfence\n\t"
+ ::
+ );
+}
diff --git a/src/cache_types.h b/src/cache_types.h
new file mode 100755
index 0000000..4bc112c
--- /dev/null
+++ b/src/cache_types.h
@@ -0,0 +1,66 @@
+#pragma once
+
+#include "device_conf.h"
+
+#include <linux/build_bug.h>
+
+#define SET_MASK(SETS) (((((uintptr_t) SETS) * CACHELINE_SIZE) - 1) ^ (CACHELINE_SIZE - 1))
+
+#define REMOVE_PAGE_OFFSET(ptr) ((void *) (((uintptr_t) ptr) & PAGE_MASK))
+
+#define GET_BIT(b, i) (((b & (1 << i)) >> i) & 1)
+#define SET_BIT(b, i) (b | (1 << i))
+
+/* Operate cacheline flags
+ * Used flags:
+ * 32 2 1 0
+ * | | ... | cache group initialized | last | first |
+ */
+#define DEFAULT_FLAGS 0
+#define SET_FIRST(flags) SET_BIT(flags, 0)
+#define SET_LAST(flags) SET_BIT(flags, 1)
+#define SET_CACHE_GROUP_INIT(flags) SET_BIT(flags, 2)
+#define IS_FIRST(flags) GET_BIT(flags, 0)
+#define IS_LAST(flags) GET_BIT(flags, 1)
+#define IS_CACHE_GROUP_INIT(flags) GET_BIT(flags, 2)
+
+// Offset of the next and prev field in the cacheline struct
+#define CL_NEXT_OFFSET 0
+#define CL_PREV_OFFSET 8
+
+typedef enum cache_level cache_level;
+typedef enum addressing_type addressing_type;
+typedef struct cacheline cacheline;
+typedef struct cache_ctx cache_ctx;
+
+enum cache_level {L1, L2};
+enum addressing_type {VIRTUAL, PHYSICAL};
+
+struct cache_ctx {
+ cache_level cache_level;
+ addressing_type addressing;
+
+ uint32_t sets;
+ uint32_t associativity;
+ uint32_t access_time;
+ uint32_t nr_of_cachelines;
+ uint32_t set_size;
+ uint32_t cache_size;
+};
+
+struct cacheline {
+ // Doubly linked list inside same set
+ // Attention: CL_NEXT_OFFSET and CL_PREV_OFFSET
+ // must be kept up to date
+ cacheline *next;
+ cacheline *prev;
+
+ uint16_t cache_set;
+ uint16_t flags;
+
+ // Unused padding to fill cache line
+ uint64_t count;
+ char padding[32];
+};
+
+static_assert(sizeof(struct cacheline) == CACHELINE_SIZE, "Bad cache line struct size");
diff --git a/src/cachepc.c b/src/cachepc.c
new file mode 100755
index 0000000..01b3b95
--- /dev/null
+++ b/src/cachepc.c
@@ -0,0 +1,221 @@
+#include "cachepc.h"
+
+static cacheline *build_cache_ds(cache_ctx *ctx, cacheline **cacheline_ptr_arr);
+static void build_randomized_list_for_cache_set(cache_ctx *ctx, cacheline **cacheline_ptr_arr);
+static cacheline **allocate_cache_ds(cache_ctx *ctx);
+static uint16_t get_virt_cache_set(cache_ctx *ctx, void *ptr);
+static void *aligned_alloc(size_t alignment, size_t size);
+
+cache_ctx *
+get_cache_ctx(cache_level cache_level)
+{
+ cache_ctx *ctx;
+
+ ctx = kzalloc(sizeof(cache_ctx), GFP_KERNEL);
+ BUG_ON(ctx == NULL);
+
+ if (cache_level == L1) {
+ ctx->addressing = L1_ADDRESSING;
+ ctx->sets = L1_SETS;
+ ctx->associativity = L1_ASSOCIATIVITY;
+ ctx->access_time = L1_ACCESS_TIME;
+ } else if (cache_level == L2) {
+ ctx->addressing = L2_ADDRESSING;
+ ctx->sets = L2_SETS;
+ ctx->associativity = L2_ASSOCIATIVITY;
+ ctx->access_time = L2_ACCESS_TIME;
+ } else {
+ return NULL;
+ }
+
+ ctx->cache_level = cache_level;
+ ctx->nr_of_cachelines = ctx->sets * ctx->associativity;
+ ctx->set_size = CACHELINE_SIZE * ctx->associativity;
+ ctx->cache_size = ctx->sets * ctx->set_size;
+
+ return ctx;
+}
+
+/*
+ * Initialises the complete cache data structure for the given context
+ */
+cacheline *
+cachepc_prepare_ds(cache_ctx *ctx)
+{
+ cacheline **cacheline_ptr_arr;
+ cacheline *cache_ds;
+
+ cacheline_ptr_arr = allocate_cache_ds(ctx);
+ cache_ds = build_cache_ds(ctx, cacheline_ptr_arr);
+ kfree(cacheline_ptr_arr);
+
+ return cache_ds;
+}
+
+void
+cachepc_save_msrmt(cacheline *head, const char *prefix, int index)
+{
+ char filename[256];
+
+ snprintf(filename, sizeof(filename), "%s.%i", prefix, index);
+
+}
+
+void
+cache_print_msrmts(cacheline *head)
+{
+ cacheline *curr_cl;
+
+ curr_cl = head;
+ do {
+ if (IS_FIRST(curr_cl->flags)) {
+ printk(KERN_WARNING "Count for cache set %i: %llu\n",
+ curr_cl->cache_set, curr_cl->count);
+ }
+
+ curr_cl = curr_cl->prev;
+ } while (curr_cl != head);
+}
+
+/*
+ * Create a randomized doubly linked list with the following structure:
+ * set A <--> set B <--> ... <--> set X <--> set A
+ * where each set is one of the cache sets, in a random order.
+ * The sets are a doubly linked list of cachelines themselves:
+ * set A:
+ * line[A + x0 * #sets] <--> line[A + x1 * #sets] <--> ...
+ * where x0, x1, ..., xD is a random permutation of 1, 2, ..., D
+ * and D = Associativity = | cache set |
+ */
+cacheline *build_cache_ds(cache_ctx *ctx, cacheline **cl_ptr_arr) {
+ cacheline **cl_ptr_arr_sorted;
+ cacheline *curr_cl, *next_cl;
+ cacheline *cache_ds;
+ uint32_t *idx_per_set;
+ uint32_t idx_curr_set, set_offset;
+ uint32_t i, j, set, set_len;
+ uint32_t *idx_map;
+
+ idx_per_set = kzalloc(ctx->sets * sizeof(uint32_t), GFP_KERNEL);
+ BUG_ON(idx_per_set == NULL);
+
+ cl_ptr_arr_sorted = kmalloc(ctx->nr_of_cachelines * sizeof(cacheline *), GFP_KERNEL);
+ BUG_ON(cl_ptr_arr_sorted == NULL);
+
+ set_len = ctx->associativity;
+ for (i = 0; i < ctx->nr_of_cachelines; ++i) {
+ set_offset = cl_ptr_arr[i]->cache_set * set_len;
+ idx_curr_set = idx_per_set[cl_ptr_arr[i]->cache_set];
+
+ cl_ptr_arr_sorted[set_offset + idx_curr_set] = cl_ptr_arr[i];
+ idx_per_set[cl_ptr_arr[i]->cache_set] += 1;
+ }
+
+ // Build doubly linked list for every set
+ for (set = 0; set < ctx->sets; ++set) {
+ set_offset = set * set_len;
+ build_randomized_list_for_cache_set(ctx, cl_ptr_arr_sorted + set_offset);
+ }
+
+ // Relink the sets among each other
+ idx_map = kzalloc(ctx->sets * sizeof(uint32_t), GFP_KERNEL);
+ BUG_ON(idx_map == NULL);
+
+ gen_random_indices(idx_map, ctx->sets);
+
+ curr_cl = cl_ptr_arr_sorted[idx_map[0] * set_len]->prev;
+ for (j = 0; j < ctx->sets; ++j) {
+ curr_cl->next = cl_ptr_arr_sorted[idx_map[(j + 1) % ctx->sets] * set_len];
+ next_cl = curr_cl->next->prev;
+ curr_cl->next->prev = curr_cl;
+ curr_cl = next_cl;
+ }
+
+ cache_ds = cl_ptr_arr_sorted[idx_map[0] * set_len];
+
+ kfree(cl_ptr_arr_sorted);
+ kfree(idx_per_set);
+ kfree(idx_map);
+
+ return cache_ds;
+}
+
+/*
+ * Helper function to build a randomised list of cacheline structs for a set
+ */
+void build_randomized_list_for_cache_set(cache_ctx *ctx, cacheline **cacheline_ptr_arr)
+{
+ cacheline *curr_cl;
+ uint32_t len, *idx_map;
+ uint16_t i;
+
+ len = ctx->associativity;
+ idx_map = kzalloc(len * sizeof(uint32_t), GFP_KERNEL);
+ BUG_ON(idx_map == NULL);
+
+ gen_random_indices(idx_map, len);
+
+ for (i = 0; i < len; ++i) {
+ curr_cl = cacheline_ptr_arr[idx_map[i]];
+ curr_cl->next = cacheline_ptr_arr[idx_map[(i + 1) % len]];
+ curr_cl->prev = cacheline_ptr_arr[idx_map[(len - 1 + i) % len]];
+ curr_cl->count = 0;
+
+ if (curr_cl == cacheline_ptr_arr[0]) {
+ curr_cl->flags = SET_FIRST(DEFAULT_FLAGS);
+ curr_cl->prev->flags = SET_LAST(DEFAULT_FLAGS);
+ } else {
+ curr_cl->flags = curr_cl->flags | DEFAULT_FLAGS;
+ }
+ }
+
+ kfree(idx_map);
+}
+
+/*
+ * Allocate a data structure that fills the complete cache, i.e. consisting
+ * of `associativity` many cache lines for each cache set.
+ */
+cacheline **
+allocate_cache_ds(cache_ctx *ctx)
+{
+ cacheline **cl_ptr_arr, *cl_arr;
+ uint32_t i;
+
+ cl_ptr_arr = (cacheline **) kzalloc(ctx->nr_of_cachelines * sizeof(cacheline *), GFP_KERNEL);
+ BUG_ON(cl_ptr_arr == NULL);
+
+ BUG_ON(ctx->addressing != VIRTUAL);
+
+ // For virtual addressing, allocating a consecutive chunk of memory is enough
+ cl_arr = (cacheline *) aligned_alloc(PAGE_SIZE, ctx->cache_size);
+ BUG_ON(cl_arr == NULL);
+
+ for (i = 0; i < ctx->nr_of_cachelines; ++i) {
+ cl_ptr_arr[i] = cl_arr + i;
+ cl_ptr_arr[i]->cache_set = get_virt_cache_set(ctx, cl_ptr_arr[i]);
+ }
+
+ return cl_ptr_arr;
+}
+
+uint16_t
+get_virt_cache_set(cache_ctx *ctx, void *ptr)
+{
+ return (uint16_t) ((((uintptr_t) ptr) & SET_MASK(ctx->sets)) / CACHELINE_SIZE);
+}
+
+void *
+aligned_alloc(size_t alignment, size_t size)
+{
+ void *p;
+
+ if (size % alignment != 0)
+ size = size - (size % alignment) + alignment;
+ p = kmalloc(size, GFP_KERNEL);
+ BUG_ON(((uintptr_t) p) % alignment != 0);
+
+ return p;
+}
+
+
diff --git a/src/cachepc.h b/src/cachepc.h
new file mode 100755
index 0000000..ee80338
--- /dev/null
+++ b/src/cachepc.h
@@ -0,0 +1,135 @@
+#pragma once
+
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/slab.h>
+
+#include "asm.h"
+#include "cache_types.h"
+#include "util.h"
+
+cache_ctx *cachepc_get_ctx(cache_level cl);
+cacheline *cachepc_prepare_ds(cache_ctx *ctx);
+void cachepc_save_msrmt(cacheline *head, const char *prefix, int index);
+void cachepc_print_msrmt(cacheline *head);
+
+__attribute__((always_inline))
+static inline cacheline *cachepc_prime(cacheline *head);
+
+__attribute__((always_inline))
+static inline cacheline *cachepc_prime_rev(cacheline *head);
+
+__attribute__((always_inline))
+static inline cacheline *cachepc_probe_set(cacheline *curr_cl);
+
+__attribute__((always_inline))
+static inline cacheline *cachepc_probe(cacheline *head);
+
+/*
+ * Prime phase: fill the target cache (encoded in the size of the data structure)
+ * with the prepared data structure, i.e. with attacker data.
+ */
+static inline cacheline *
+cachepc_prime(cacheline *head)
+{
+ cacheline *curr_cl;
+
+ cachepc_cpuid();
+ curr_cl = head;
+ do {
+ curr_cl = curr_cl->next;
+ cachepc_mfence();
+ } while(curr_cl != head);
+ cachepc_cpuid();
+
+ return curr_cl->prev;
+}
+
+/*
+ * Same as prime, but in the reverse direction, i.e. the same direction that probe
+ * uses. This is beneficial for the following scenarios:
+ * - L1:
+ * - Trigger collision chain-reaction to amplify an evicted set (but this has
+ * the downside of more noisy measurements).
+ * - L2:
+ * - Always use this for L2, otherwise the first cache sets will still reside
+ * in L1 unless the victim filled L1 completely. In this case, an eviction
+ * has randomly (depending on where the cache set is placed in the randomised
+ * data structure) the following effect:
+ * A) An evicted set is L2_ACCESS_TIME - L1_ACCESS_TIME slower
+ * B) An evicted set is L3_ACCESS_TIME - L2_ACCESS_TIME slower
+ */
+static inline cacheline *
+cachepc_prime_rev(cacheline *head)
+{
+ cacheline *curr_cl;
+
+ cachepc_cpuid();
+ curr_cl = head;
+ do {
+ curr_cl = curr_cl->prev;
+ cachepc_mfence();
+ } while(curr_cl != head);
+ cachepc_cpuid();
+
+ return curr_cl->prev;
+}
+
+static inline cacheline *
+cachepc_probe_set(cacheline *curr_cl)
+{
+ uint64_t pre1, pre2, pre3;
+ uint64_t post1, post2, post3;
+ cacheline *next_cl;
+
+ pre1 = cachepc_readpmc(0);
+ pre2 = cachepc_readpmc(1);
+ pre3 = cachepc_readpmc(2);
+
+ cachepc_mfence();
+ asm volatile(
+ "mov 8(%[curr_cl]), %%rax \n\t" // +8
+ "mov 8(%%rax), %%rcx \n\t" // +16
+ "mov 8(%%rcx), %%rax \n\t" // +24
+ "mov 8(%%rax), %%rcx \n\t" // +32
+ "mov 8(%%rcx), %%rax \n\t" // +40
+ "mov 8(%%rax), %%rcx \n\t" // +48
+ "mov 8(%%rcx), %[curr_cl_out] \n\t" // +56
+ "mov 8(%[curr_cl_out]), %[next_cl_out] \n\t" // +64
+ : [next_cl_out] "=r" (next_cl),
+ [curr_cl_out] "=r" (curr_cl)
+ : [curr_cl] "r" (curr_cl)
+ : "rax", "rcx"
+ );
+ cachepc_mfence();
+ cachepc_cpuid();
+
+ post1 = cachepc_readpmc(0);
+ cachepc_cpuid();
+ post2 = cachepc_readpmc(1);
+ cachepc_cpuid();
+ post3 = cachepc_readpmc(2);
+ cachepc_cpuid();
+
+ /* works across size boundary */
+ curr_cl->count = 0;
+ curr_cl->count += post1 - pre1;
+ curr_cl->count += post2 - pre2;
+ curr_cl->count += post3 - pre3;
+
+ return next_cl;
+}
+
+static inline cacheline *
+cachepc_probe(cacheline *head)
+{
+ cacheline *curr_cs;
+
+ curr_cs = head;
+ do {
+ curr_cs = cachepc_probe_set(curr_cs);
+ } while (__builtin_expect(curr_cs != head, 1));
+
+ return curr_cs->next;
+}
+
diff --git a/src/device_conf.h b/src/device_conf.h
new file mode 100755
index 0000000..e24d681
--- /dev/null
+++ b/src/device_conf.h
@@ -0,0 +1,29 @@
+#pragma once
+
+// TODO: Read from kernel headers
+
+// General settings
+// #define PAGE_SIZE 4096
+#define PROCESSOR_FREQ 2900000000
+
+// Cache related settings
+#define CACHELINE_SIZE 64
+#define CACHE_GROUP_SIZE (PAGE_SIZE / CACHELINE_SIZE)
+
+// Addressing:
+// - virtual: 0
+// - physical: 1
+#define L1_ADDRESSING 0
+#define L1_SETS 64
+#define L1_ASSOCIATIVITY 8
+#define L1_ACCESS_TIME 4
+
+#define L2_ADDRESSING 1
+#define L2_SETS 512
+#define L2_ASSOCIATIVITY 8
+#define L2_ACCESS_TIME 12
+
+#define L3_ADDRESSING 1
+#define L3_SETS 4096
+#define L3_ASSOCIATIVITY 16
+#define L3_ACCESS_TIME 30
diff --git a/src/util.c b/src/util.c
new file mode 100644
index 0000000..83f265b
--- /dev/null
+++ b/src/util.c
@@ -0,0 +1,40 @@
+#include "util.h"
+
+#include <linux/random.h>
+
+void
+random_perm(uint32_t *arr, uint32_t arr_len)
+{
+ uint32_t i, idx, tmp;
+
+ for (i = arr_len - 1; i > 0; --i) {
+ get_random_bytes(&idx, 4);
+ idx = idx % i;
+
+ tmp = arr[idx];
+ arr[i] = arr[idx];
+ arr[idx] = tmp;
+ }
+}
+
+void
+gen_random_indices(uint32_t *arr, uint32_t arr_len)
+{
+ uint32_t i;
+
+ for (i = 0; i < arr_len; ++i)
+ arr[i] = i;
+ random_perm(arr, arr_len);
+}
+
+
+bool is_in_arr(uint32_t elem, uint32_t *arr, uint32_t arr_len) {
+ uint32_t i;
+
+ for (i = 0; i < arr_len; ++i) {
+ if (arr[i] == elem)
+ return true;
+ }
+
+ return false;
+}
diff --git a/src/util.h b/src/util.h
new file mode 100644
index 0000000..7b543aa
--- /dev/null
+++ b/src/util.h
@@ -0,0 +1,9 @@
+#pragma once
+
+#include <linux/kernel.h>
+
+void gen_rand_bytes(unsigned char *arr, uint32_t arr_len);
+void random_perm(uint32_t *arr, uint32_t arr_len);
+void gen_random_indices(uint32_t *arr, uint32_t arr_len);
+
+bool is_in_arr(uint32_t elem, uint32_t *arr, uint32_t arr_len);