From 98babf91dcf166bc7644a3d70a90dac272f12a75 Mon Sep 17 00:00:00 2001 From: Louis Burda Date: Sat, 13 Aug 2022 16:44:04 +0200 Subject: Reorder repo into module and tests --- .gitignore | 2 - Makefile | 14 +- access.c | 29 ---- access.sh | 8 - kmod/asm.h | 85 ++++++++++ kmod/cache_types.h | 64 ++++++++ kmod/cachepc.c | 442 ++++++++++++++++++++++++++++++++++++++++++++++++++++ kmod/cachepc.h | 150 ++++++++++++++++++ kmod/cachepc_user.h | 7 + kmod/device_conf.h | 29 ++++ kmod/util.c | 55 +++++++ kmod/util.h | 9 ++ read.c | 33 ---- src/asm.h | 85 ---------- src/cache_types.h | 64 -------- src/cachepc.c | 442 ---------------------------------------------------- src/cachepc.h | 150 ------------------ src/cachepc_user.h | 7 - src/device_conf.h | 29 ---- src/util.c | 55 ------- src/util.h | 9 -- test/.gitignore | 3 + test/access.c | 29 ++++ test/eviction.c | 39 +++++ test/kvm.c | 405 +++++++++++++++++++++++++++++++++++++++++++++++ 25 files changed, 1321 insertions(+), 923 deletions(-) delete mode 100644 access.c delete mode 100755 access.sh create mode 100755 kmod/asm.h create mode 100755 kmod/cache_types.h create mode 100755 kmod/cachepc.c create mode 100755 kmod/cachepc.h create mode 100644 kmod/cachepc_user.h create mode 100755 kmod/device_conf.h create mode 100755 kmod/util.c create mode 100755 kmod/util.h delete mode 100755 read.c delete mode 100755 src/asm.h delete mode 100755 src/cache_types.h delete mode 100755 src/cachepc.c delete mode 100755 src/cachepc.h delete mode 100644 src/cachepc_user.h delete mode 100755 src/device_conf.h delete mode 100755 src/util.c delete mode 100755 src/util.h create mode 100644 test/.gitignore create mode 100644 test/access.c create mode 100644 test/eviction.c create mode 100644 test/kvm.c diff --git a/.gitignore b/.gitignore index c6e0f83..aef9d48 100755 --- a/.gitignore +++ b/.gitignore @@ -2,6 +2,4 @@ build.sh push.sh *.o.cmd *.o -read .vscode -access diff --git a/Makefile b/Makefile index 19c2879..a4a5caf 100755 --- a/Makefile +++ b/Makefile @@ -3,7 +3,7 @@ PWD := $(shell pwd) .PHONY: all reset clean prepare build -all: reset clean prepare build +all: reset clean prepare build test/eviction test/access test/kvm clean: $(MAKE) -C $(KERNEL_SOURCE) SUBDIRS=arch/x86/kvm clean @@ -12,7 +12,7 @@ reset: git -C $(KERNEL_SOURCE) reset --hard $(KERNEL_SOURCE)/arch/x86/kvm/svm/cachepc: - ln -s $(PWD)/src $@ + ln -sf $(PWD)/kmod $@ prepare: $(KERNEL_SOURCE)/arch/x86/kvm/svm/cachepc git -C $(KERNEL_SOURCE) apply $(PWD)/patch.diff @@ -27,14 +27,8 @@ load: sudo insmod $(KERNEL_SOURCE)/arch/x86/kvm/kvm.ko sudo insmod $(KERNEL_SOURCE)/arch/x86/kvm/kvm-amd.ko -read: read.c - $(CC) -o $@ $< - -access: access.c src/cachepc_user.h - $(CC) -o $@ $< -I src - -test: load read - @./read +test/%: test/%.c kmod/cachepc_user.h + $(CC) -o $@ $< -I kmod update: git -C $(KERNEL_SOURCE) diff > patch.diff diff --git a/access.c b/access.c deleted file mode 100644 index 1f16552..0000000 --- a/access.c +++ /dev/null @@ -1,29 +0,0 @@ -#include -#include -#include -#include -#include -#include -#include -#include - -#include "cachepc_user.h" - -int -main(int argc, const char **argv) -{ - size_t i, len; - int fd, ret; - int count; - - fd = open("/proc/cachepc", O_RDONLY); - if (fd < 0) err(1, "open"); - - for (i = 0; i < 50; i++) { - ret = ioctl(fd, CACHEPC_IOCTL_ACCESS_TEST, &count); - if (ret == -1) err(1, "ioctl fail"); - printf("%i\n", count); - } - - close(fd); -} diff --git a/access.sh b/access.sh deleted file mode 100755 index 5f862ab..0000000 --- a/access.sh +++ /dev/null @@ -1,8 +0,0 @@ -#!/bin/sh - -for i in $(seq 0 100); do - echo -n "\rRun $i" - bash build.sh load 1>/dev/null -done -echo "" -dmesg -k | grep "CachePC:" | grep "access test" | tail -n100 diff --git a/kmod/asm.h b/kmod/asm.h new file mode 100755 index 0000000..35f803b --- /dev/null +++ b/kmod/asm.h @@ -0,0 +1,85 @@ +#pragma once + +#include + +#define CPUID_AFFECTED_REGS "rax", "rbx", "rcx", "rdx" + +__attribute__((always_inline)) +static inline uint64_t cachepc_readpmc(uint64_t event); + +__attribute__((always_inline)) +static inline void cachepc_cpuid(void); + +__attribute__((always_inline)) +static inline void cachepc_lfence(void); + +__attribute__((always_inline)) +static inline void cachepc_sfence(void); + +__attribute__((always_inline)) +static inline void cachepc_mfence(void); + +__attribute__((always_inline)) +static inline void cachepc_readq(void *p); + +uint64_t +cachepc_readpmc(uint64_t event) +{ + uint32_t lo, hi; + + event = 0xC0010201 + 2 * event; + + asm volatile ( + "rdmsr" + : "=a" (lo), "=d" (hi) + : "c"(event) + ); + + return ((uint64_t) hi << 32) | (uint64_t) lo; +} + +void +cachepc_cpuid(void) +{ + asm volatile( + "mov $0x80000005, %%eax\n\t" + "cpuid\n\t" + ::: CPUID_AFFECTED_REGS + ); +} + +void +cachepc_lfence(void) +{ + asm volatile( + "lfence\n\t" + ::: "memory" + ); +} + +void +cachepc_sfence(void) +{ + asm volatile( + "sfence\n\t" + ::: "memory" + ); +} + +void +cachepc_mfence(void) +{ + asm volatile( + "mfence\n\t" + ::: "memory" + ); +} + +void +cachepc_readq(void *p) +{ + asm volatile ( + "movq (%0), %%r10\n\t" + : : "r" (p) : "r10" + ); +} diff --git a/kmod/cache_types.h b/kmod/cache_types.h new file mode 100755 index 0000000..33da39b --- /dev/null +++ b/kmod/cache_types.h @@ -0,0 +1,64 @@ +#pragma once + +#include "device_conf.h" + +#define SET_MASK(SETS) (((((uintptr_t) SETS) * CACHELINE_SIZE) - 1) ^ (CACHELINE_SIZE - 1)) + +#define REMOVE_PAGE_OFFSET(ptr) ((void *) (((uintptr_t) ptr) & PAGE_MASK)) + +#define GET_BIT(b, i) (((b) >> (i)) & 1) +#define SET_BIT(b, i) ((b) | (1 << (i))) + +/* Operate cacheline flags + * Used flags: + * 32 2 1 0 + * | | ... | cache group initialized | last | first | + */ +#define DEFAULT_FLAGS 0 +#define SET_FIRST(flags) SET_BIT(flags, 0) +#define SET_LAST(flags) SET_BIT(flags, 1) +#define SET_CACHE_GROUP_INIT(flags) SET_BIT(flags, 2) +#define IS_FIRST(flags) GET_BIT(flags, 0) +#define IS_LAST(flags) GET_BIT(flags, 1) +#define IS_CACHE_GROUP_INIT(flags) GET_BIT(flags, 2) + +// Offset of the next and prev field in the cacheline struct +#define CL_NEXT_OFFSET 0 +#define CL_PREV_OFFSET 8 + +typedef enum cache_level cache_level; +typedef enum addressing_type addressing_type; +typedef struct cacheline cacheline; +typedef struct cache_ctx cache_ctx; + +enum cache_level {L1, L2}; +enum addressing_type {VIRTUAL, PHYSICAL}; + +struct cache_ctx { + cache_level cache_level; + addressing_type addressing; + + uint32_t sets; + uint32_t associativity; + uint32_t access_time; + uint32_t nr_of_cachelines; + uint32_t set_size; + uint32_t cache_size; +}; + +struct cacheline { + // Doubly linked list inside same set + // Attention: CL_NEXT_OFFSET and CL_PREV_OFFSET + // must be kept up to date + cacheline *next; + cacheline *prev; + + uint16_t cache_set; + uint16_t flags; + + // Unused padding to fill cache line + uint64_t count; + char padding[32]; +}; + +static_assert(sizeof(struct cacheline) == CACHELINE_SIZE, "Bad cache line struct size"); diff --git a/kmod/cachepc.c b/kmod/cachepc.c new file mode 100755 index 0000000..702cfad --- /dev/null +++ b/kmod/cachepc.c @@ -0,0 +1,442 @@ +#include "cachepc.h" + +#include +#include +#include +#include +#include + +static void cl_insert(cacheline *last_cl, cacheline *new_cl); +static void *remove_cache_set(cache_ctx *ctx, void *ptr); +static void *remove_cache_group_set(void *ptr); + +static cacheline *prepare_cache_set_ds(cache_ctx *ctx, uint32_t *set, uint32_t sets_len); +static cacheline *build_cache_ds(cache_ctx *ctx, cacheline **cacheline_ptr_arr); +static void build_randomized_list_for_cache_set(cache_ctx *ctx, cacheline **cacheline_ptr_arr); +static cacheline **allocate_cache_ds(cache_ctx *ctx); +static uint16_t get_virt_cache_set(cache_ctx *ctx, void *ptr); +static void *aligned_alloc(size_t alignment, size_t size); + +void +cachepc_init_counters(void) +{ + uint64_t event, event_no, event_mask; + uint64_t reg_addr; + + /* SEE: https://developer.amd.com/resources/developer-guides-manuals (PPR 17H 31H, P.166) + * + * performance event selection is done via 0xC001_020X with X = (0..A)[::2] + * performance event reading is done viea 0XC001_020X with X = (1..B)[::2] + * + * 6 slots total + */ + + reg_addr = 0xc0010200; + event_no = 0x70; + event_mask = 0xFF; + event = event_no | (event_mask << 8); + event |= (1ULL << 17); /* OS (kernel) events only */ + event |= (1ULL << 22); /* enable performance counter */ + event |= (1ULL << 40); /* Host events only */ + printk(KERN_WARNING "CachePC: Initialized event %llu\n", event); + asm volatile ("wrmsr" : : "c"(reg_addr), "a"(event), "d"(0x00)); + + reg_addr = 0xc0010202; + event_no = 0x71; + event_mask = 0xFF; + event = event_no | (event_mask << 8); + event |= (1ULL << 17); /* OS (kernel) events only */ + event |= (1ULL << 22); /* enable performance counter */ + event |= (1ULL << 40); /* Host events only */ + printk(KERN_WARNING "CachePC: Initialized event %llu\n", event); + asm volatile ("wrmsr" : : "c"(reg_addr), "a"(event), "d"(0x00)); + + reg_addr = 0xc0010204; + event_no = 0x72; + event_mask = 0xFF; + event = event_no | (event_mask << 8); + event |= (1ULL << 17); /* OS (kernel) events only */ + event |= (1ULL << 22); /* enable performance counter */ + event |= (1ULL << 40); /* Host events only */ + printk(KERN_WARNING "CachePC: Initialized event %llu\n", event); + asm volatile ("wrmsr" : : "c"(reg_addr), "a"(event), "d"(0x00)); + +} + +cache_ctx * +cachepc_get_ctx(cache_level cache_level) +{ + cache_ctx *ctx; + + // printk(KERN_WARNING "CachePC: Getting ctx..\n"); + + ctx = kzalloc(sizeof(cache_ctx), GFP_KERNEL); + BUG_ON(ctx == NULL); + + BUG_ON(cache_level != L1); + if (cache_level == L1) { + ctx->addressing = L1_ADDRESSING; + ctx->sets = L1_SETS; + ctx->associativity = L1_ASSOCIATIVITY; + ctx->access_time = L1_ACCESS_TIME; + } else if (cache_level == L2) { + ctx->addressing = L2_ADDRESSING; + ctx->sets = L2_SETS; + ctx->associativity = L2_ASSOCIATIVITY; + ctx->access_time = L2_ACCESS_TIME; + } else { + return NULL; + } + + ctx->cache_level = cache_level; + ctx->nr_of_cachelines = ctx->sets * ctx->associativity; + ctx->set_size = CACHELINE_SIZE * ctx->associativity; + ctx->cache_size = ctx->sets * ctx->set_size; + + // printk(KERN_WARNING "CachePC: Getting ctx done\n"); + + return ctx; +} + +void +cachepc_release_ctx(cache_ctx *ctx) +{ + kfree(ctx); +} + + +/* + * Initialises the complete cache data structure for the given context + */ +cacheline * +cachepc_prepare_ds(cache_ctx *ctx) +{ + cacheline **cacheline_ptr_arr; + cacheline *cache_ds; + + //printk(KERN_WARNING "CachePC: Preparing ds..\n"); + + cacheline_ptr_arr = allocate_cache_ds(ctx); + cache_ds = build_cache_ds(ctx, cacheline_ptr_arr); + kfree(cacheline_ptr_arr); + + // printk(KERN_WARNING "CachePC: Preparing ds done\n"); + + return cache_ds; +} + +void +cachepc_release_ds(cache_ctx *ctx, cacheline *ds) +{ + kfree(remove_cache_set(ctx, ds)); +} + +cacheline * +cachepc_prepare_victim(cache_ctx *ctx, uint32_t set) +{ + cacheline *victim_set, *victim_cl; + cacheline *curr_cl, *next_cl; + + victim_set = prepare_cache_set_ds(ctx, &set, 1); + victim_cl = victim_set; + + // Free the other lines in the same set that are not used. + if (ctx->addressing == PHYSICAL) { + curr_cl = victim_cl->next; + do { + next_cl = curr_cl->next; + // Here, it is ok to free them directly, as every line in the same + // set is from a different page anyway. + kfree(remove_cache_group_set(curr_cl)); + curr_cl = next_cl; + } while(curr_cl != victim_cl); + } + + return victim_cl; +} + +void +cachepc_release_victim(cache_ctx *ctx, cacheline *victim) +{ + kfree(remove_cache_set(ctx, victim)); +} + +void +cachepc_save_msrmts(cacheline *head) +{ + cacheline *curr_cl; + + printk(KERN_WARNING "CachePC: Updating /proc/cachepc\n"); + + curr_cl = head; + do { + if (IS_FIRST(curr_cl->flags)) { + BUG_ON(curr_cl->cache_set >= cachepc_msrmts_count); + cachepc_msrmts[curr_cl->cache_set] = curr_cl->count; + } + + curr_cl = curr_cl->prev; + } while (curr_cl != head); +} + +void +cachepc_print_msrmts(cacheline *head) +{ + cacheline *curr_cl; + + curr_cl = head; + do { + if (IS_FIRST(curr_cl->flags)) { + printk(KERN_WARNING "CachePC: Count for cache set %i: %llu\n", + curr_cl->cache_set, curr_cl->count); + } + + curr_cl = curr_cl->prev; + } while (curr_cl != head); +} + + +cacheline * +prepare_cache_set_ds(cache_ctx *ctx, uint32_t *sets, uint32_t sets_len) +{ + cacheline *cache_ds, **first_cl_in_sets, **last_cl_in_sets; + cacheline *to_del_cls, *curr_cl, *next_cl, *cache_set_ds; + uint32_t i, cache_groups_len, cache_groups_max_len; + uint32_t *cache_groups; + + cache_ds = cachepc_prepare_ds(ctx); + + first_cl_in_sets = kzalloc(ctx->sets * sizeof(cacheline *), GFP_KERNEL); + BUG_ON(first_cl_in_sets == NULL); + + last_cl_in_sets = kzalloc(ctx->sets * sizeof(cacheline *), GFP_KERNEL); + BUG_ON(last_cl_in_sets == NULL); + + // Find the cache groups that are used, so that we can delete the other ones + // later (to avoid memory leaks) + cache_groups_max_len = ctx->sets / CACHE_GROUP_SIZE; + cache_groups = kmalloc(cache_groups_max_len * sizeof(uint32_t), GFP_KERNEL); + BUG_ON(cache_groups == NULL); + + cache_groups_len = 0; + for (i = 0; i < sets_len; ++i) { + if (!is_in_arr(sets[i] / CACHE_GROUP_SIZE, cache_groups, cache_groups_len)) { + cache_groups[cache_groups_len] = sets[i] / CACHE_GROUP_SIZE; + ++cache_groups_len; + } + } + + to_del_cls = NULL; + curr_cl = cache_ds; + + // Extract the partial data structure for the cache sets and ensure correct freeing + do { + next_cl = curr_cl->next; + + if (IS_FIRST(curr_cl->flags)) { + first_cl_in_sets[curr_cl->cache_set] = curr_cl; + } + if (IS_LAST(curr_cl->flags)) { + last_cl_in_sets[curr_cl->cache_set] = curr_cl; + } + + if (ctx->addressing == PHYSICAL && !is_in_arr( + curr_cl->cache_set / CACHE_GROUP_SIZE, cache_groups, cache_groups_len)) + { + // Already free all unused blocks of the cache ds for physical + // addressing, because we loose their refs + cl_insert(to_del_cls, curr_cl); + to_del_cls = curr_cl; + } + curr_cl = next_cl; + + } while(curr_cl != cache_ds); + + // Fix partial cache set ds + for (i = 0; i < sets_len; ++i) { + last_cl_in_sets[sets[i]]->next = first_cl_in_sets[sets[(i + 1) % sets_len]]; + first_cl_in_sets[sets[(i + 1) % sets_len]]->prev = last_cl_in_sets[sets[i]]; + } + cache_set_ds = first_cl_in_sets[sets[0]]; + + // Free unused cache lines + if (ctx->addressing == PHYSICAL) { + cachepc_release_ds(ctx, to_del_cls); + } + + kfree(first_cl_in_sets); + kfree(last_cl_in_sets); + kfree(cache_groups); + + return cache_set_ds; +} + +void +cl_insert(cacheline *last_cl, cacheline *new_cl) +{ + if (last_cl == NULL) { + // Adding the first entry is a special case + new_cl->next = new_cl; + new_cl->prev = new_cl; + } else { + new_cl->next = last_cl->next; + new_cl->prev = last_cl; + last_cl->next->prev = new_cl; + last_cl->next = new_cl; + } +} + +void * +remove_cache_set(cache_ctx *ctx, void *ptr) +{ + return (void *) (((uintptr_t) ptr) & ~SET_MASK(ctx->sets)); +} + +void * +remove_cache_group_set(void *ptr) +{ + return (void *) (((uintptr_t) ptr) & ~SET_MASK(CACHE_GROUP_SIZE)); +} + + +/* + * Create a randomized doubly linked list with the following structure: + * set A <--> set B <--> ... <--> set X <--> set A + * where each set is one of the cache sets, in a random order. + * The sets are a doubly linked list of cachelines themselves: + * set A: + * line[A + x0 * #sets] <--> line[A + x1 * #sets] <--> ... + * where x0, x1, ..., xD is a random permutation of 1, 2, ..., D + * and D = Associativity = | cache set | + */ +cacheline *build_cache_ds(cache_ctx *ctx, cacheline **cl_ptr_arr) { + cacheline **cl_ptr_arr_sorted; + cacheline *curr_cl, *next_cl; + cacheline *cache_ds; + uint32_t *idx_per_set; + uint32_t idx_curr_set, set_offset; + uint32_t i, j, set, set_len; + uint32_t *idx_map; + + idx_per_set = kzalloc(ctx->sets * sizeof(uint32_t), GFP_KERNEL); + BUG_ON(idx_per_set == NULL); + + cl_ptr_arr_sorted = kzalloc(ctx->nr_of_cachelines * sizeof(cacheline *), GFP_KERNEL); + BUG_ON(cl_ptr_arr_sorted == NULL); + + set_len = ctx->associativity; + for (i = 0; i < ctx->nr_of_cachelines; ++i) { + set_offset = cl_ptr_arr[i]->cache_set * set_len; + idx_curr_set = idx_per_set[cl_ptr_arr[i]->cache_set]; + + cl_ptr_arr_sorted[set_offset + idx_curr_set] = cl_ptr_arr[i]; + idx_per_set[cl_ptr_arr[i]->cache_set] += 1; + } + + // Build doubly linked list for every set + for (set = 0; set < ctx->sets; ++set) { + set_offset = set * set_len; + build_randomized_list_for_cache_set(ctx, cl_ptr_arr_sorted + set_offset); + } + + // Relink the sets among each other + idx_map = kzalloc(ctx->sets * sizeof(uint32_t), GFP_KERNEL); + BUG_ON(idx_map == NULL); + + gen_random_indices(idx_map, ctx->sets); + + curr_cl = cl_ptr_arr_sorted[idx_map[0] * set_len]->prev; + for (j = 0; j < ctx->sets; ++j) { + curr_cl->next = cl_ptr_arr_sorted[idx_map[(j + 1) % ctx->sets] * set_len]; + next_cl = curr_cl->next->prev; + curr_cl->next->prev = curr_cl; + curr_cl = next_cl; + } + + cache_ds = cl_ptr_arr_sorted[idx_map[0] * set_len]; + + kfree(cl_ptr_arr_sorted); + kfree(idx_per_set); + kfree(idx_map); + + return cache_ds; +} + +/* + * Helper function to build a randomised list of cacheline structs for a set + */ +void build_randomized_list_for_cache_set(cache_ctx *ctx, cacheline **cacheline_ptr_arr) +{ + cacheline *curr_cl; + uint32_t len, *idx_map; + uint16_t i; + + len = ctx->associativity; + idx_map = kzalloc(len * sizeof(uint32_t), GFP_KERNEL); + BUG_ON(idx_map == NULL); + + gen_random_indices(idx_map, len); + + for (i = 0; i < len; ++i) { + curr_cl = cacheline_ptr_arr[idx_map[i]]; + curr_cl->next = cacheline_ptr_arr[idx_map[(i + 1) % len]]; + curr_cl->prev = cacheline_ptr_arr[idx_map[(len - 1 + i) % len]]; + curr_cl->count = 0; + + if (idx_map[i] == 0) { + curr_cl->flags = SET_FIRST(DEFAULT_FLAGS); + curr_cl->prev->flags = SET_LAST(DEFAULT_FLAGS); + } else { + curr_cl->flags = curr_cl->flags | DEFAULT_FLAGS; + } + } + + kfree(idx_map); +} + +/* + * Allocate a data structure that fills the complete cache, i.e. consisting + * of `associativity` many cache lines for each cache set. + */ +cacheline ** +allocate_cache_ds(cache_ctx *ctx) +{ + cacheline **cl_ptr_arr, *cl_arr; + uint32_t i; + + cl_ptr_arr = kzalloc(ctx->nr_of_cachelines * sizeof(cacheline *), GFP_KERNEL); + BUG_ON(cl_ptr_arr == NULL); + + BUG_ON(ctx->addressing != VIRTUAL); + + // For virtual addressing, allocating a consecutive chunk of memory is enough + cl_arr = aligned_alloc(PAGE_SIZE, ctx->cache_size); + BUG_ON(cl_arr == NULL); + + for (i = 0; i < ctx->nr_of_cachelines; ++i) { + cl_ptr_arr[i] = cl_arr + i; + cl_ptr_arr[i]->cache_set = get_virt_cache_set(ctx, cl_ptr_arr[i]); + } + + return cl_ptr_arr; +} + +uint16_t +get_virt_cache_set(cache_ctx *ctx, void *ptr) +{ + return (uint16_t) ((((uintptr_t) ptr) & SET_MASK(ctx->sets)) / CACHELINE_SIZE); +} + +void * +aligned_alloc(size_t alignment, size_t size) +{ + void *p; + + if (size % alignment != 0) + size = size - (size % alignment) + alignment; + p = kzalloc(size, GFP_KERNEL); + BUG_ON(((uintptr_t) p) % alignment != 0); + + return p; +} + diff --git a/kmod/cachepc.h b/kmod/cachepc.h new file mode 100755 index 0000000..a88edb8 --- /dev/null +++ b/kmod/cachepc.h @@ -0,0 +1,150 @@ +#pragma once + +#include "asm.h" +#include "cache_types.h" +#include "util.h" +#include "cachepc_user.h" + +void cachepc_init_counters(void); + +cache_ctx *cachepc_get_ctx(cache_level cl); +void cachepc_release_ctx(cache_ctx *ctx); + +cacheline *cachepc_prepare_ds(cache_ctx *ctx); +void cachepc_release_ds(cache_ctx *ctx, cacheline *ds); + +cacheline *cachepc_prepare_victim(cache_ctx *ctx, uint32_t set); +void cachepc_release_victim(cache_ctx *ctx, cacheline *ptr); + +void cachepc_save_msrmts(cacheline *head); +void cachepc_print_msrmts(cacheline *head); + +__attribute__((always_inline)) +static inline cacheline *cachepc_prime(cacheline *head); + +__attribute__((always_inline)) +static inline cacheline *cachepc_prime_rev(cacheline *head); + +__attribute__((always_inline)) +static inline cacheline *cachepc_probe(cacheline *head); + +__attribute__((always_inline)) +static inline void cachepc_victim(void *p); + +extern uint16_t *cachepc_msrmts; +extern size_t cachepc_msrmts_count; + +extern cache_ctx *cachepc_ctx; +extern cacheline *cachepc_ds; + +/* + * Prime phase: fill the target cache (encoded in the size of the data structure) + * with the prepared data structure, i.e. with attacker data. + */ +cacheline * +cachepc_prime(cacheline *head) +{ + cacheline *curr_cl; + + //printk(KERN_WARNING "CachePC: Priming..\n"); + + cachepc_cpuid(); + curr_cl = head; + do { + curr_cl = curr_cl->next; + cachepc_mfence(); + } while(curr_cl != head); + cachepc_cpuid(); + + //printk(KERN_WARNING "CachePC: Priming done\n"); + + return curr_cl->prev; +} + +/* + * Same as prime, but in the reverse direction, i.e. the same direction that probe + * uses. This is beneficial for the following scenarios: + * - L1: + * - Trigger collision chain-reaction to amplify an evicted set (but this has + * the downside of more noisy measurements). + * - L2: + * - Always use this for L2, otherwise the first cache sets will still reside + * in L1 unless the victim filled L1 completely. In this case, an eviction + * has randomly (depending on where the cache set is placed in the randomised + * data structure) the following effect: + * A) An evicted set is L2_ACCESS_TIME - L1_ACCESS_TIME slower + * B) An evicted set is L3_ACCESS_TIME - L2_ACCESS_TIME slower + */ +cacheline * +cachepc_prime_rev(cacheline *head) +{ + cacheline *curr_cl; + + cachepc_cpuid(); + curr_cl = head; + do { + curr_cl = curr_cl->prev; + cachepc_mfence(); + } while(curr_cl != head); + cachepc_cpuid(); + + return curr_cl->prev; +} + +cacheline * +cachepc_probe(cacheline *start_cl) +{ + uint64_t pre, post; + cacheline *next_cl; + cacheline *curr_cl; + volatile register uint64_t i asm("r12"); + + curr_cl = start_cl; + + do { + pre = cachepc_readpmc(0); + pre += cachepc_readpmc(1); + + cachepc_mfence(); + cachepc_cpuid(); + + asm volatile( + "mov 8(%[curr_cl]), %%rax \n\t" // +8 + "mov 8(%%rax), %%rcx \n\t" // +16 + "mov 8(%%rcx), %%rax \n\t" // +24 + "mov 8(%%rax), %%rcx \n\t" // +32 + "mov 8(%%rcx), %%rax \n\t" // +40 + "mov 8(%%rax), %%rcx \n\t" // +48 + "mov 8(%%rcx), %[curr_cl_out] \n\t" // +56 + "mov 8(%[curr_cl_out]), %[next_cl_out] \n\t" // +64 + : [next_cl_out] "=r" (next_cl), + [curr_cl_out] "=r" (curr_cl) + : [curr_cl] "r" (curr_cl) + : "rax", "rcx" + ); + + cachepc_mfence(); + cachepc_cpuid(); + + post = cachepc_readpmc(0); + post += cachepc_readpmc(1); + + cachepc_mfence(); + cachepc_cpuid(); + + /* works across size boundary */ + curr_cl->count = post - pre; + + curr_cl = next_cl; + } while (__builtin_expect(curr_cl != start_cl, 1)); + + return curr_cl->next; +} + +void +cachepc_victim(void *p) +{ + cachepc_cpuid(); + cachepc_mfence(); + cachepc_readq(p); +} diff --git a/kmod/cachepc_user.h b/kmod/cachepc_user.h new file mode 100644 index 0000000..cb4e921 --- /dev/null +++ b/kmod/cachepc_user.h @@ -0,0 +1,7 @@ +#pragma once + +#include + +#define CACHEPC_IOCTL_MAGIC 0xBF +#define CACHEPC_IOCTL_ACCESS_TEST _IOR(CACHEPC_IOCTL_MAGIC, 0, int) +#define CACHEPC_IOCTL_EVICTION_TEST _IOR(CACHEPC_IOCTL_MAGIC, 1, int) diff --git a/kmod/device_conf.h b/kmod/device_conf.h new file mode 100755 index 0000000..e24d681 --- /dev/null +++ b/kmod/device_conf.h @@ -0,0 +1,29 @@ +#pragma once + +// TODO: Read from kernel headers + +// General settings +// #define PAGE_SIZE 4096 +#define PROCESSOR_FREQ 2900000000 + +// Cache related settings +#define CACHELINE_SIZE 64 +#define CACHE_GROUP_SIZE (PAGE_SIZE / CACHELINE_SIZE) + +// Addressing: +// - virtual: 0 +// - physical: 1 +#define L1_ADDRESSING 0 +#define L1_SETS 64 +#define L1_ASSOCIATIVITY 8 +#define L1_ACCESS_TIME 4 + +#define L2_ADDRESSING 1 +#define L2_SETS 512 +#define L2_ASSOCIATIVITY 8 +#define L2_ACCESS_TIME 12 + +#define L3_ADDRESSING 1 +#define L3_SETS 4096 +#define L3_ASSOCIATIVITY 16 +#define L3_ACCESS_TIME 30 diff --git a/kmod/util.c b/kmod/util.c new file mode 100755 index 0000000..166ec3e --- /dev/null +++ b/kmod/util.c @@ -0,0 +1,55 @@ +#include "util.h" + +static size_t random_pos = 0; +static uint8_t random[] = { 90, 227, 179, 229, 27, 117, 69, 81, 188, 253, 129, 140, 140, 180, 191, 152, 194, 98, 169, 205, 254, 155, 249, 81, 208, 245, 186, 80, 81, 50, 63, 67, 200, 108, 70, 32, 239, 158, 38, 234, 183, 130, 141, 175, 39, 230, 107, 199, 59, 43, 238, 122, 103, 25, 184, 66, 31, 239, 57, 92, 119, 101, 147, 188, 171, 112, 209, 227, 92, 224, 9, 150, 220, 10, 154, 92, 86, 39, 154, 140, 65, 57, 158, 47, 142, 168, 222, 200, 69, 183, 160, 249, 103, 45, 241, 112, 49, 85, 2, 73, 255, 16, 132, 215, 190, 143, 215, 128, 119, 75, 136, 112, 67, 27, 213, 78, 127, 1, 197, 18, 122, 216, 123, 244, 11, 154, 124, 212, 171, 29, 184, 45, 42, 128, 124, 168, 112, 191, 139, 136, 20, 127, 169, 75, 220, 4, 162, 207, 80, 147, 25, 39, 232, 219, 100, 13, 199, 88, 19, 40, 141, 2, 16, 109, 40, 127, 47, 60, 221, 151, 156, 115, 182, 198, 231, 193, 36, 89, 127, 31, 187, 47, 109, 70, 75, 115, 221, 236, 46, 65, 151, 48, 185, 157, 177, 152, 134, 38, 246, 146, 15, 67, 80, 192, 74, 244, 250, 194, 21, 19, 151, 199, 124, 9, 174, 171, 239, 146, 213, 214, 226, 137, 237, 13, 92, 87, 10, 144, 21, 143, 158, 130, 129, 176, 40, 25, 247, 182, 90, 226, 14, 199, 219, 242, 52, 225, 154, 218, 242, 191, 53, 253, 36, 62, 154, 13, 145, 182, 72, 234, 140, 166, 125, 93, 236, 14, 40, 183, 48, 138, 240, 243, 100, 119, 160, 73, 182, 204, 130, 108, 80, 226, 13, 36, 118, 245, 85, 205, 131, 110, 69, 116, 130, 211, 243, 182, 180, 28, 197, 224, 245, 78, 122, 135, 194, 31, 138, 178, 194, 150, 42, 190, 7, 217, 100, 19, 161, 154, 237, 76, 135, 63, 2, 33, 229, 164, 223, 175, 0, 51, 177, 78, 13, 241, 198, 152, 109, 166, 92, 226, 42, 213, 148, 149, 144, 39, 20, 51, 239, 153, 56, 198, 190, 165, 243, 108, 66, 132, 127, 179, 182, 211, 207, 107, 223, 188, 198, 103, 147, 127, 87, 187, 137, 123, 72, 141, 156, 28, 76, 234, 244, 108, 176, 227, 221, 26, 110, 81, 28, 187, 14, 24, 82, 218, 201, 156, 20, 184, 105, 117, 188, 132, 243, 11, 13, 188, 243, 181, 98, 136, 124, 152, 254, 228, 221, 114, 140, 103, 44, 55, 147, 227, 241, 96, 198, 27, 98, 35, 179, 6, 244, 17, 152, 128, 44, 75, 8, 18, 122, 79, 244, 210, 8, 168, 99, 80, 19, 100, 38, 6, 243, 216, 200, 105, 164, 29, 171, 232, 247, 218, 17, 133, 232, 68, 140, 100, 106, 49, 17, 90, 178, 38, 69, 238, 23, 174, 180, 90, 18, 12, 71, 45, 101, 200, 83, 77, 95, 218, 91, 176, 63, 179, 203, 125, 56, 171, 218, 98, 135, 127, 214, 63, 41, 151, 197, 157, 192, 152, 67, 67, 157, 54, 123, 111, 118, 45, 94, 15, 81, 123, 125, 169, 67, 50, 150, 113, 147, 13, 16, 86, 2, 135, 129, 88, 154, 246, 170, 223, 47, 247, 190, 187, 35, 213, 194, 67, 226, 181, 208, 135, 75, 30, 233, 136, 45, 222, 121, 60, 157, 48, 171, 244, 52, 40, 187, 8, 23, 173, 41, 157, 165, 158, 92, 139, 22, 95, 72, 164, 142, 213, 156, 102, 196, 108, 228, 203, 99, 72, 254, 173, 37, 212, 150, 145, 104, 76, 117, 242, 185, 180, 108, 50, 188, 206, 40, 52, 55, 147, 240, 89, 248, 203, 110, 237, 24, 88, 63, 99, 224, 121, 229, 90, 253, 12, 72, 24, 3, 247, 127, 35, 178, 198, 80, 151, 223, 243, 195, 114, 5, 134, 250, 85, 182, 154, 206, 41, 53, 50, 59, 174, 117, 203, 200, 33, 182, 230, 147, 101, 36, 111, 23, 187, 130, 16, 211, 90, 102, 207, 154, 140, 123, 212, 66, 45, 35, 165, 139, 109, 169, 226, 210, 115, 16, 92, 196, 31, 245, 154, 110, 181, 161, 126, 184, 177, 237, 125, 181, 71, 120, 86, 222, 179, 133, 113, 72, 206, 157, 89, 162, 80, 164, 223, 38, 17, 238, 114, 188, 125, 69, 1, 28, 126, 249, 180, 189, 144, 215, 152, 89, 92, 62, 98, 151, 242, 46, 48, 162, 3, 95, 211, 122, 217, 36, 235, 109, 100, 94, 233, 173, 150, 71, 125, 201, 168, 4, 180, 248, 249, 240, 50, 206, 242, 169, 201, 31, 137, 198, 93, 241, 219, 11, 9, 1, 229, 249, 194, 67, 41, 143, 117, 103, 238, 247, 72, 178, 21, 193, 146, 119, 159, 21, 253, 206, 66, 186, 60, 200, 102, 179, 117, 103, 32, 0, 116, 31, 133, 129, 127, 38, 5, 177, 195, 25, 23, 86, 29, 222, 53, 0, 140, 179, 118, 239, 141, 237, 122, 80, 200, 92, 47, 15, 58, 167, 26, 37, 146, 254, 3, 74, 148, 159, 221, 38, 68, 110, 98, 82, 16, 171, 232, 139, 72, 87, 114, 113, 61, 210, 82, 180, 196, 8, 14, 249, 185, 159, 253, 166, 200, 82, 176, 112, 173, 246, 40, 22, 202, 140, 76, 60, 92, 225, 10, 198, 41, 26, 223, 250, 181, 135, 196, 230, 10, 103, 197, 128, 155, 148, 121, 150, 51, 196, 143, 183, 153, 229, 93, 118, 12, 235, 237, 105, 73, 27, 24, 86, 248, 39, 190, 71, 184, 212, 74, 196, 181, 46, 140, 9, 18, 168, 110, 30, 93, 166, 44, 153, 88, 82, 148, 237, 146, 173, 158, 29, 215, 202, 3, 224, 240, 186, 202, 52, 123, 244, 226, 109, 79, 174, 245, 35, 242, 82, 187, 101, 69, 245, 104, 139, 118, 134, 236, 135, 243, 10, 149, 162, 212, 245, 132, 3, 90, 38, 96, 28, 98, 200, 80, 141, 252, 40, 214, 80, 152, 221, 239, 166, 135, 104, 105, 227, 248, 102, 53, 78, 186, 95, 15, 97, 58, 129, 98, 219, 233, 167, 89, 198, 175, 98, 77, 20, 182, 112, 104, 165, 34 }; + +void +prng_bytes(uint8_t *dst, size_t size) +{ + size_t i; + + if (random_pos + size > sizeof(random)) + random_pos = 0; + + for (i = 0; i < size; i++) + dst[i] = random[random_pos + i]; + + random_pos += size; +} + +void +random_perm(uint32_t *arr, uint32_t arr_len) +{ + uint32_t i, idx, tmp; + + for (i = arr_len - 1; i > 0; --i) { + prng_bytes((void*)&idx, 4); + idx = idx % i; + + tmp = arr[idx]; + arr[idx] = arr[i]; + arr[i] = tmp; + } +} + +void +gen_random_indices(uint32_t *arr, uint32_t arr_len) +{ + uint32_t i; + + for (i = 0; i < arr_len; ++i) + arr[i] = i; + random_perm(arr, arr_len); +} + + +bool is_in_arr(uint32_t elem, uint32_t *arr, uint32_t arr_len) { + uint32_t i; + + for (i = 0; i < arr_len; ++i) { + if (arr[i] == elem) + return true; + } + + return false; +} diff --git a/kmod/util.h b/kmod/util.h new file mode 100755 index 0000000..7b543aa --- /dev/null +++ b/kmod/util.h @@ -0,0 +1,9 @@ +#pragma once + +#include + +void gen_rand_bytes(unsigned char *arr, uint32_t arr_len); +void random_perm(uint32_t *arr, uint32_t arr_len); +void gen_random_indices(uint32_t *arr, uint32_t arr_len); + +bool is_in_arr(uint32_t elem, uint32_t *arr, uint32_t arr_len); diff --git a/read.c b/read.c deleted file mode 100755 index b71271a..0000000 --- a/read.c +++ /dev/null @@ -1,33 +0,0 @@ -#include -#include -#include -#include -#include -#include - -int -main(int argc, const char **argv) -{ - uint16_t counts[64]; - size_t i, len; - int fd; - - fd = open("/proc/cachepc", O_RDONLY); - len = read(fd, counts, sizeof(counts)); - assert(len == sizeof(counts)); - - for (i = 0; i < 64; i++) { - //printf("%d %hu\n", i, counts[i]); - //continue; - if (i % 16 == 0 && i) - printf("\n"); - if (counts[i] > 0) - printf("\x1b[91m"); - printf("%2i ", i); - if (counts[i] > 0) - printf("\x1b[0m"); - } - printf("\n"); - - close(fd); -} diff --git a/src/asm.h b/src/asm.h deleted file mode 100755 index 35f803b..0000000 --- a/src/asm.h +++ /dev/null @@ -1,85 +0,0 @@ -#pragma once - -#include - -#define CPUID_AFFECTED_REGS "rax", "rbx", "rcx", "rdx" - -__attribute__((always_inline)) -static inline uint64_t cachepc_readpmc(uint64_t event); - -__attribute__((always_inline)) -static inline void cachepc_cpuid(void); - -__attribute__((always_inline)) -static inline void cachepc_lfence(void); - -__attribute__((always_inline)) -static inline void cachepc_sfence(void); - -__attribute__((always_inline)) -static inline void cachepc_mfence(void); - -__attribute__((always_inline)) -static inline void cachepc_readq(void *p); - -uint64_t -cachepc_readpmc(uint64_t event) -{ - uint32_t lo, hi; - - event = 0xC0010201 + 2 * event; - - asm volatile ( - "rdmsr" - : "=a" (lo), "=d" (hi) - : "c"(event) - ); - - return ((uint64_t) hi << 32) | (uint64_t) lo; -} - -void -cachepc_cpuid(void) -{ - asm volatile( - "mov $0x80000005, %%eax\n\t" - "cpuid\n\t" - ::: CPUID_AFFECTED_REGS - ); -} - -void -cachepc_lfence(void) -{ - asm volatile( - "lfence\n\t" - ::: "memory" - ); -} - -void -cachepc_sfence(void) -{ - asm volatile( - "sfence\n\t" - ::: "memory" - ); -} - -void -cachepc_mfence(void) -{ - asm volatile( - "mfence\n\t" - ::: "memory" - ); -} - -void -cachepc_readq(void *p) -{ - asm volatile ( - "movq (%0), %%r10\n\t" - : : "r" (p) : "r10" - ); -} diff --git a/src/cache_types.h b/src/cache_types.h deleted file mode 100755 index 33da39b..0000000 --- a/src/cache_types.h +++ /dev/null @@ -1,64 +0,0 @@ -#pragma once - -#include "device_conf.h" - -#define SET_MASK(SETS) (((((uintptr_t) SETS) * CACHELINE_SIZE) - 1) ^ (CACHELINE_SIZE - 1)) - -#define REMOVE_PAGE_OFFSET(ptr) ((void *) (((uintptr_t) ptr) & PAGE_MASK)) - -#define GET_BIT(b, i) (((b) >> (i)) & 1) -#define SET_BIT(b, i) ((b) | (1 << (i))) - -/* Operate cacheline flags - * Used flags: - * 32 2 1 0 - * | | ... | cache group initialized | last | first | - */ -#define DEFAULT_FLAGS 0 -#define SET_FIRST(flags) SET_BIT(flags, 0) -#define SET_LAST(flags) SET_BIT(flags, 1) -#define SET_CACHE_GROUP_INIT(flags) SET_BIT(flags, 2) -#define IS_FIRST(flags) GET_BIT(flags, 0) -#define IS_LAST(flags) GET_BIT(flags, 1) -#define IS_CACHE_GROUP_INIT(flags) GET_BIT(flags, 2) - -// Offset of the next and prev field in the cacheline struct -#define CL_NEXT_OFFSET 0 -#define CL_PREV_OFFSET 8 - -typedef enum cache_level cache_level; -typedef enum addressing_type addressing_type; -typedef struct cacheline cacheline; -typedef struct cache_ctx cache_ctx; - -enum cache_level {L1, L2}; -enum addressing_type {VIRTUAL, PHYSICAL}; - -struct cache_ctx { - cache_level cache_level; - addressing_type addressing; - - uint32_t sets; - uint32_t associativity; - uint32_t access_time; - uint32_t nr_of_cachelines; - uint32_t set_size; - uint32_t cache_size; -}; - -struct cacheline { - // Doubly linked list inside same set - // Attention: CL_NEXT_OFFSET and CL_PREV_OFFSET - // must be kept up to date - cacheline *next; - cacheline *prev; - - uint16_t cache_set; - uint16_t flags; - - // Unused padding to fill cache line - uint64_t count; - char padding[32]; -}; - -static_assert(sizeof(struct cacheline) == CACHELINE_SIZE, "Bad cache line struct size"); diff --git a/src/cachepc.c b/src/cachepc.c deleted file mode 100755 index 702cfad..0000000 --- a/src/cachepc.c +++ /dev/null @@ -1,442 +0,0 @@ -#include "cachepc.h" - -#include -#include -#include -#include -#include - -static void cl_insert(cacheline *last_cl, cacheline *new_cl); -static void *remove_cache_set(cache_ctx *ctx, void *ptr); -static void *remove_cache_group_set(void *ptr); - -static cacheline *prepare_cache_set_ds(cache_ctx *ctx, uint32_t *set, uint32_t sets_len); -static cacheline *build_cache_ds(cache_ctx *ctx, cacheline **cacheline_ptr_arr); -static void build_randomized_list_for_cache_set(cache_ctx *ctx, cacheline **cacheline_ptr_arr); -static cacheline **allocate_cache_ds(cache_ctx *ctx); -static uint16_t get_virt_cache_set(cache_ctx *ctx, void *ptr); -static void *aligned_alloc(size_t alignment, size_t size); - -void -cachepc_init_counters(void) -{ - uint64_t event, event_no, event_mask; - uint64_t reg_addr; - - /* SEE: https://developer.amd.com/resources/developer-guides-manuals (PPR 17H 31H, P.166) - * - * performance event selection is done via 0xC001_020X with X = (0..A)[::2] - * performance event reading is done viea 0XC001_020X with X = (1..B)[::2] - * - * 6 slots total - */ - - reg_addr = 0xc0010200; - event_no = 0x70; - event_mask = 0xFF; - event = event_no | (event_mask << 8); - event |= (1ULL << 17); /* OS (kernel) events only */ - event |= (1ULL << 22); /* enable performance counter */ - event |= (1ULL << 40); /* Host events only */ - printk(KERN_WARNING "CachePC: Initialized event %llu\n", event); - asm volatile ("wrmsr" : : "c"(reg_addr), "a"(event), "d"(0x00)); - - reg_addr = 0xc0010202; - event_no = 0x71; - event_mask = 0xFF; - event = event_no | (event_mask << 8); - event |= (1ULL << 17); /* OS (kernel) events only */ - event |= (1ULL << 22); /* enable performance counter */ - event |= (1ULL << 40); /* Host events only */ - printk(KERN_WARNING "CachePC: Initialized event %llu\n", event); - asm volatile ("wrmsr" : : "c"(reg_addr), "a"(event), "d"(0x00)); - - reg_addr = 0xc0010204; - event_no = 0x72; - event_mask = 0xFF; - event = event_no | (event_mask << 8); - event |= (1ULL << 17); /* OS (kernel) events only */ - event |= (1ULL << 22); /* enable performance counter */ - event |= (1ULL << 40); /* Host events only */ - printk(KERN_WARNING "CachePC: Initialized event %llu\n", event); - asm volatile ("wrmsr" : : "c"(reg_addr), "a"(event), "d"(0x00)); - -} - -cache_ctx * -cachepc_get_ctx(cache_level cache_level) -{ - cache_ctx *ctx; - - // printk(KERN_WARNING "CachePC: Getting ctx..\n"); - - ctx = kzalloc(sizeof(cache_ctx), GFP_KERNEL); - BUG_ON(ctx == NULL); - - BUG_ON(cache_level != L1); - if (cache_level == L1) { - ctx->addressing = L1_ADDRESSING; - ctx->sets = L1_SETS; - ctx->associativity = L1_ASSOCIATIVITY; - ctx->access_time = L1_ACCESS_TIME; - } else if (cache_level == L2) { - ctx->addressing = L2_ADDRESSING; - ctx->sets = L2_SETS; - ctx->associativity = L2_ASSOCIATIVITY; - ctx->access_time = L2_ACCESS_TIME; - } else { - return NULL; - } - - ctx->cache_level = cache_level; - ctx->nr_of_cachelines = ctx->sets * ctx->associativity; - ctx->set_size = CACHELINE_SIZE * ctx->associativity; - ctx->cache_size = ctx->sets * ctx->set_size; - - // printk(KERN_WARNING "CachePC: Getting ctx done\n"); - - return ctx; -} - -void -cachepc_release_ctx(cache_ctx *ctx) -{ - kfree(ctx); -} - - -/* - * Initialises the complete cache data structure for the given context - */ -cacheline * -cachepc_prepare_ds(cache_ctx *ctx) -{ - cacheline **cacheline_ptr_arr; - cacheline *cache_ds; - - //printk(KERN_WARNING "CachePC: Preparing ds..\n"); - - cacheline_ptr_arr = allocate_cache_ds(ctx); - cache_ds = build_cache_ds(ctx, cacheline_ptr_arr); - kfree(cacheline_ptr_arr); - - // printk(KERN_WARNING "CachePC: Preparing ds done\n"); - - return cache_ds; -} - -void -cachepc_release_ds(cache_ctx *ctx, cacheline *ds) -{ - kfree(remove_cache_set(ctx, ds)); -} - -cacheline * -cachepc_prepare_victim(cache_ctx *ctx, uint32_t set) -{ - cacheline *victim_set, *victim_cl; - cacheline *curr_cl, *next_cl; - - victim_set = prepare_cache_set_ds(ctx, &set, 1); - victim_cl = victim_set; - - // Free the other lines in the same set that are not used. - if (ctx->addressing == PHYSICAL) { - curr_cl = victim_cl->next; - do { - next_cl = curr_cl->next; - // Here, it is ok to free them directly, as every line in the same - // set is from a different page anyway. - kfree(remove_cache_group_set(curr_cl)); - curr_cl = next_cl; - } while(curr_cl != victim_cl); - } - - return victim_cl; -} - -void -cachepc_release_victim(cache_ctx *ctx, cacheline *victim) -{ - kfree(remove_cache_set(ctx, victim)); -} - -void -cachepc_save_msrmts(cacheline *head) -{ - cacheline *curr_cl; - - printk(KERN_WARNING "CachePC: Updating /proc/cachepc\n"); - - curr_cl = head; - do { - if (IS_FIRST(curr_cl->flags)) { - BUG_ON(curr_cl->cache_set >= cachepc_msrmts_count); - cachepc_msrmts[curr_cl->cache_set] = curr_cl->count; - } - - curr_cl = curr_cl->prev; - } while (curr_cl != head); -} - -void -cachepc_print_msrmts(cacheline *head) -{ - cacheline *curr_cl; - - curr_cl = head; - do { - if (IS_FIRST(curr_cl->flags)) { - printk(KERN_WARNING "CachePC: Count for cache set %i: %llu\n", - curr_cl->cache_set, curr_cl->count); - } - - curr_cl = curr_cl->prev; - } while (curr_cl != head); -} - - -cacheline * -prepare_cache_set_ds(cache_ctx *ctx, uint32_t *sets, uint32_t sets_len) -{ - cacheline *cache_ds, **first_cl_in_sets, **last_cl_in_sets; - cacheline *to_del_cls, *curr_cl, *next_cl, *cache_set_ds; - uint32_t i, cache_groups_len, cache_groups_max_len; - uint32_t *cache_groups; - - cache_ds = cachepc_prepare_ds(ctx); - - first_cl_in_sets = kzalloc(ctx->sets * sizeof(cacheline *), GFP_KERNEL); - BUG_ON(first_cl_in_sets == NULL); - - last_cl_in_sets = kzalloc(ctx->sets * sizeof(cacheline *), GFP_KERNEL); - BUG_ON(last_cl_in_sets == NULL); - - // Find the cache groups that are used, so that we can delete the other ones - // later (to avoid memory leaks) - cache_groups_max_len = ctx->sets / CACHE_GROUP_SIZE; - cache_groups = kmalloc(cache_groups_max_len * sizeof(uint32_t), GFP_KERNEL); - BUG_ON(cache_groups == NULL); - - cache_groups_len = 0; - for (i = 0; i < sets_len; ++i) { - if (!is_in_arr(sets[i] / CACHE_GROUP_SIZE, cache_groups, cache_groups_len)) { - cache_groups[cache_groups_len] = sets[i] / CACHE_GROUP_SIZE; - ++cache_groups_len; - } - } - - to_del_cls = NULL; - curr_cl = cache_ds; - - // Extract the partial data structure for the cache sets and ensure correct freeing - do { - next_cl = curr_cl->next; - - if (IS_FIRST(curr_cl->flags)) { - first_cl_in_sets[curr_cl->cache_set] = curr_cl; - } - if (IS_LAST(curr_cl->flags)) { - last_cl_in_sets[curr_cl->cache_set] = curr_cl; - } - - if (ctx->addressing == PHYSICAL && !is_in_arr( - curr_cl->cache_set / CACHE_GROUP_SIZE, cache_groups, cache_groups_len)) - { - // Already free all unused blocks of the cache ds for physical - // addressing, because we loose their refs - cl_insert(to_del_cls, curr_cl); - to_del_cls = curr_cl; - } - curr_cl = next_cl; - - } while(curr_cl != cache_ds); - - // Fix partial cache set ds - for (i = 0; i < sets_len; ++i) { - last_cl_in_sets[sets[i]]->next = first_cl_in_sets[sets[(i + 1) % sets_len]]; - first_cl_in_sets[sets[(i + 1) % sets_len]]->prev = last_cl_in_sets[sets[i]]; - } - cache_set_ds = first_cl_in_sets[sets[0]]; - - // Free unused cache lines - if (ctx->addressing == PHYSICAL) { - cachepc_release_ds(ctx, to_del_cls); - } - - kfree(first_cl_in_sets); - kfree(last_cl_in_sets); - kfree(cache_groups); - - return cache_set_ds; -} - -void -cl_insert(cacheline *last_cl, cacheline *new_cl) -{ - if (last_cl == NULL) { - // Adding the first entry is a special case - new_cl->next = new_cl; - new_cl->prev = new_cl; - } else { - new_cl->next = last_cl->next; - new_cl->prev = last_cl; - last_cl->next->prev = new_cl; - last_cl->next = new_cl; - } -} - -void * -remove_cache_set(cache_ctx *ctx, void *ptr) -{ - return (void *) (((uintptr_t) ptr) & ~SET_MASK(ctx->sets)); -} - -void * -remove_cache_group_set(void *ptr) -{ - return (void *) (((uintptr_t) ptr) & ~SET_MASK(CACHE_GROUP_SIZE)); -} - - -/* - * Create a randomized doubly linked list with the following structure: - * set A <--> set B <--> ... <--> set X <--> set A - * where each set is one of the cache sets, in a random order. - * The sets are a doubly linked list of cachelines themselves: - * set A: - * line[A + x0 * #sets] <--> line[A + x1 * #sets] <--> ... - * where x0, x1, ..., xD is a random permutation of 1, 2, ..., D - * and D = Associativity = | cache set | - */ -cacheline *build_cache_ds(cache_ctx *ctx, cacheline **cl_ptr_arr) { - cacheline **cl_ptr_arr_sorted; - cacheline *curr_cl, *next_cl; - cacheline *cache_ds; - uint32_t *idx_per_set; - uint32_t idx_curr_set, set_offset; - uint32_t i, j, set, set_len; - uint32_t *idx_map; - - idx_per_set = kzalloc(ctx->sets * sizeof(uint32_t), GFP_KERNEL); - BUG_ON(idx_per_set == NULL); - - cl_ptr_arr_sorted = kzalloc(ctx->nr_of_cachelines * sizeof(cacheline *), GFP_KERNEL); - BUG_ON(cl_ptr_arr_sorted == NULL); - - set_len = ctx->associativity; - for (i = 0; i < ctx->nr_of_cachelines; ++i) { - set_offset = cl_ptr_arr[i]->cache_set * set_len; - idx_curr_set = idx_per_set[cl_ptr_arr[i]->cache_set]; - - cl_ptr_arr_sorted[set_offset + idx_curr_set] = cl_ptr_arr[i]; - idx_per_set[cl_ptr_arr[i]->cache_set] += 1; - } - - // Build doubly linked list for every set - for (set = 0; set < ctx->sets; ++set) { - set_offset = set * set_len; - build_randomized_list_for_cache_set(ctx, cl_ptr_arr_sorted + set_offset); - } - - // Relink the sets among each other - idx_map = kzalloc(ctx->sets * sizeof(uint32_t), GFP_KERNEL); - BUG_ON(idx_map == NULL); - - gen_random_indices(idx_map, ctx->sets); - - curr_cl = cl_ptr_arr_sorted[idx_map[0] * set_len]->prev; - for (j = 0; j < ctx->sets; ++j) { - curr_cl->next = cl_ptr_arr_sorted[idx_map[(j + 1) % ctx->sets] * set_len]; - next_cl = curr_cl->next->prev; - curr_cl->next->prev = curr_cl; - curr_cl = next_cl; - } - - cache_ds = cl_ptr_arr_sorted[idx_map[0] * set_len]; - - kfree(cl_ptr_arr_sorted); - kfree(idx_per_set); - kfree(idx_map); - - return cache_ds; -} - -/* - * Helper function to build a randomised list of cacheline structs for a set - */ -void build_randomized_list_for_cache_set(cache_ctx *ctx, cacheline **cacheline_ptr_arr) -{ - cacheline *curr_cl; - uint32_t len, *idx_map; - uint16_t i; - - len = ctx->associativity; - idx_map = kzalloc(len * sizeof(uint32_t), GFP_KERNEL); - BUG_ON(idx_map == NULL); - - gen_random_indices(idx_map, len); - - for (i = 0; i < len; ++i) { - curr_cl = cacheline_ptr_arr[idx_map[i]]; - curr_cl->next = cacheline_ptr_arr[idx_map[(i + 1) % len]]; - curr_cl->prev = cacheline_ptr_arr[idx_map[(len - 1 + i) % len]]; - curr_cl->count = 0; - - if (idx_map[i] == 0) { - curr_cl->flags = SET_FIRST(DEFAULT_FLAGS); - curr_cl->prev->flags = SET_LAST(DEFAULT_FLAGS); - } else { - curr_cl->flags = curr_cl->flags | DEFAULT_FLAGS; - } - } - - kfree(idx_map); -} - -/* - * Allocate a data structure that fills the complete cache, i.e. consisting - * of `associativity` many cache lines for each cache set. - */ -cacheline ** -allocate_cache_ds(cache_ctx *ctx) -{ - cacheline **cl_ptr_arr, *cl_arr; - uint32_t i; - - cl_ptr_arr = kzalloc(ctx->nr_of_cachelines * sizeof(cacheline *), GFP_KERNEL); - BUG_ON(cl_ptr_arr == NULL); - - BUG_ON(ctx->addressing != VIRTUAL); - - // For virtual addressing, allocating a consecutive chunk of memory is enough - cl_arr = aligned_alloc(PAGE_SIZE, ctx->cache_size); - BUG_ON(cl_arr == NULL); - - for (i = 0; i < ctx->nr_of_cachelines; ++i) { - cl_ptr_arr[i] = cl_arr + i; - cl_ptr_arr[i]->cache_set = get_virt_cache_set(ctx, cl_ptr_arr[i]); - } - - return cl_ptr_arr; -} - -uint16_t -get_virt_cache_set(cache_ctx *ctx, void *ptr) -{ - return (uint16_t) ((((uintptr_t) ptr) & SET_MASK(ctx->sets)) / CACHELINE_SIZE); -} - -void * -aligned_alloc(size_t alignment, size_t size) -{ - void *p; - - if (size % alignment != 0) - size = size - (size % alignment) + alignment; - p = kzalloc(size, GFP_KERNEL); - BUG_ON(((uintptr_t) p) % alignment != 0); - - return p; -} - diff --git a/src/cachepc.h b/src/cachepc.h deleted file mode 100755 index a88edb8..0000000 --- a/src/cachepc.h +++ /dev/null @@ -1,150 +0,0 @@ -#pragma once - -#include "asm.h" -#include "cache_types.h" -#include "util.h" -#include "cachepc_user.h" - -void cachepc_init_counters(void); - -cache_ctx *cachepc_get_ctx(cache_level cl); -void cachepc_release_ctx(cache_ctx *ctx); - -cacheline *cachepc_prepare_ds(cache_ctx *ctx); -void cachepc_release_ds(cache_ctx *ctx, cacheline *ds); - -cacheline *cachepc_prepare_victim(cache_ctx *ctx, uint32_t set); -void cachepc_release_victim(cache_ctx *ctx, cacheline *ptr); - -void cachepc_save_msrmts(cacheline *head); -void cachepc_print_msrmts(cacheline *head); - -__attribute__((always_inline)) -static inline cacheline *cachepc_prime(cacheline *head); - -__attribute__((always_inline)) -static inline cacheline *cachepc_prime_rev(cacheline *head); - -__attribute__((always_inline)) -static inline cacheline *cachepc_probe(cacheline *head); - -__attribute__((always_inline)) -static inline void cachepc_victim(void *p); - -extern uint16_t *cachepc_msrmts; -extern size_t cachepc_msrmts_count; - -extern cache_ctx *cachepc_ctx; -extern cacheline *cachepc_ds; - -/* - * Prime phase: fill the target cache (encoded in the size of the data structure) - * with the prepared data structure, i.e. with attacker data. - */ -cacheline * -cachepc_prime(cacheline *head) -{ - cacheline *curr_cl; - - //printk(KERN_WARNING "CachePC: Priming..\n"); - - cachepc_cpuid(); - curr_cl = head; - do { - curr_cl = curr_cl->next; - cachepc_mfence(); - } while(curr_cl != head); - cachepc_cpuid(); - - //printk(KERN_WARNING "CachePC: Priming done\n"); - - return curr_cl->prev; -} - -/* - * Same as prime, but in the reverse direction, i.e. the same direction that probe - * uses. This is beneficial for the following scenarios: - * - L1: - * - Trigger collision chain-reaction to amplify an evicted set (but this has - * the downside of more noisy measurements). - * - L2: - * - Always use this for L2, otherwise the first cache sets will still reside - * in L1 unless the victim filled L1 completely. In this case, an eviction - * has randomly (depending on where the cache set is placed in the randomised - * data structure) the following effect: - * A) An evicted set is L2_ACCESS_TIME - L1_ACCESS_TIME slower - * B) An evicted set is L3_ACCESS_TIME - L2_ACCESS_TIME slower - */ -cacheline * -cachepc_prime_rev(cacheline *head) -{ - cacheline *curr_cl; - - cachepc_cpuid(); - curr_cl = head; - do { - curr_cl = curr_cl->prev; - cachepc_mfence(); - } while(curr_cl != head); - cachepc_cpuid(); - - return curr_cl->prev; -} - -cacheline * -cachepc_probe(cacheline *start_cl) -{ - uint64_t pre, post; - cacheline *next_cl; - cacheline *curr_cl; - volatile register uint64_t i asm("r12"); - - curr_cl = start_cl; - - do { - pre = cachepc_readpmc(0); - pre += cachepc_readpmc(1); - - cachepc_mfence(); - cachepc_cpuid(); - - asm volatile( - "mov 8(%[curr_cl]), %%rax \n\t" // +8 - "mov 8(%%rax), %%rcx \n\t" // +16 - "mov 8(%%rcx), %%rax \n\t" // +24 - "mov 8(%%rax), %%rcx \n\t" // +32 - "mov 8(%%rcx), %%rax \n\t" // +40 - "mov 8(%%rax), %%rcx \n\t" // +48 - "mov 8(%%rcx), %[curr_cl_out] \n\t" // +56 - "mov 8(%[curr_cl_out]), %[next_cl_out] \n\t" // +64 - : [next_cl_out] "=r" (next_cl), - [curr_cl_out] "=r" (curr_cl) - : [curr_cl] "r" (curr_cl) - : "rax", "rcx" - ); - - cachepc_mfence(); - cachepc_cpuid(); - - post = cachepc_readpmc(0); - post += cachepc_readpmc(1); - - cachepc_mfence(); - cachepc_cpuid(); - - /* works across size boundary */ - curr_cl->count = post - pre; - - curr_cl = next_cl; - } while (__builtin_expect(curr_cl != start_cl, 1)); - - return curr_cl->next; -} - -void -cachepc_victim(void *p) -{ - cachepc_cpuid(); - cachepc_mfence(); - cachepc_readq(p); -} diff --git a/src/cachepc_user.h b/src/cachepc_user.h deleted file mode 100644 index cb4e921..0000000 --- a/src/cachepc_user.h +++ /dev/null @@ -1,7 +0,0 @@ -#pragma once - -#include - -#define CACHEPC_IOCTL_MAGIC 0xBF -#define CACHEPC_IOCTL_ACCESS_TEST _IOR(CACHEPC_IOCTL_MAGIC, 0, int) -#define CACHEPC_IOCTL_EVICTION_TEST _IOR(CACHEPC_IOCTL_MAGIC, 1, int) diff --git a/src/device_conf.h b/src/device_conf.h deleted file mode 100755 index e24d681..0000000 --- a/src/device_conf.h +++ /dev/null @@ -1,29 +0,0 @@ -#pragma once - -// TODO: Read from kernel headers - -// General settings -// #define PAGE_SIZE 4096 -#define PROCESSOR_FREQ 2900000000 - -// Cache related settings -#define CACHELINE_SIZE 64 -#define CACHE_GROUP_SIZE (PAGE_SIZE / CACHELINE_SIZE) - -// Addressing: -// - virtual: 0 -// - physical: 1 -#define L1_ADDRESSING 0 -#define L1_SETS 64 -#define L1_ASSOCIATIVITY 8 -#define L1_ACCESS_TIME 4 - -#define L2_ADDRESSING 1 -#define L2_SETS 512 -#define L2_ASSOCIATIVITY 8 -#define L2_ACCESS_TIME 12 - -#define L3_ADDRESSING 1 -#define L3_SETS 4096 -#define L3_ASSOCIATIVITY 16 -#define L3_ACCESS_TIME 30 diff --git a/src/util.c b/src/util.c deleted file mode 100755 index 166ec3e..0000000 --- a/src/util.c +++ /dev/null @@ -1,55 +0,0 @@ -#include "util.h" - -static size_t random_pos = 0; -static uint8_t random[] = { 90, 227, 179, 229, 27, 117, 69, 81, 188, 253, 129, 140, 140, 180, 191, 152, 194, 98, 169, 205, 254, 155, 249, 81, 208, 245, 186, 80, 81, 50, 63, 67, 200, 108, 70, 32, 239, 158, 38, 234, 183, 130, 141, 175, 39, 230, 107, 199, 59, 43, 238, 122, 103, 25, 184, 66, 31, 239, 57, 92, 119, 101, 147, 188, 171, 112, 209, 227, 92, 224, 9, 150, 220, 10, 154, 92, 86, 39, 154, 140, 65, 57, 158, 47, 142, 168, 222, 200, 69, 183, 160, 249, 103, 45, 241, 112, 49, 85, 2, 73, 255, 16, 132, 215, 190, 143, 215, 128, 119, 75, 136, 112, 67, 27, 213, 78, 127, 1, 197, 18, 122, 216, 123, 244, 11, 154, 124, 212, 171, 29, 184, 45, 42, 128, 124, 168, 112, 191, 139, 136, 20, 127, 169, 75, 220, 4, 162, 207, 80, 147, 25, 39, 232, 219, 100, 13, 199, 88, 19, 40, 141, 2, 16, 109, 40, 127, 47, 60, 221, 151, 156, 115, 182, 198, 231, 193, 36, 89, 127, 31, 187, 47, 109, 70, 75, 115, 221, 236, 46, 65, 151, 48, 185, 157, 177, 152, 134, 38, 246, 146, 15, 67, 80, 192, 74, 244, 250, 194, 21, 19, 151, 199, 124, 9, 174, 171, 239, 146, 213, 214, 226, 137, 237, 13, 92, 87, 10, 144, 21, 143, 158, 130, 129, 176, 40, 25, 247, 182, 90, 226, 14, 199, 219, 242, 52, 225, 154, 218, 242, 191, 53, 253, 36, 62, 154, 13, 145, 182, 72, 234, 140, 166, 125, 93, 236, 14, 40, 183, 48, 138, 240, 243, 100, 119, 160, 73, 182, 204, 130, 108, 80, 226, 13, 36, 118, 245, 85, 205, 131, 110, 69, 116, 130, 211, 243, 182, 180, 28, 197, 224, 245, 78, 122, 135, 194, 31, 138, 178, 194, 150, 42, 190, 7, 217, 100, 19, 161, 154, 237, 76, 135, 63, 2, 33, 229, 164, 223, 175, 0, 51, 177, 78, 13, 241, 198, 152, 109, 166, 92, 226, 42, 213, 148, 149, 144, 39, 20, 51, 239, 153, 56, 198, 190, 165, 243, 108, 66, 132, 127, 179, 182, 211, 207, 107, 223, 188, 198, 103, 147, 127, 87, 187, 137, 123, 72, 141, 156, 28, 76, 234, 244, 108, 176, 227, 221, 26, 110, 81, 28, 187, 14, 24, 82, 218, 201, 156, 20, 184, 105, 117, 188, 132, 243, 11, 13, 188, 243, 181, 98, 136, 124, 152, 254, 228, 221, 114, 140, 103, 44, 55, 147, 227, 241, 96, 198, 27, 98, 35, 179, 6, 244, 17, 152, 128, 44, 75, 8, 18, 122, 79, 244, 210, 8, 168, 99, 80, 19, 100, 38, 6, 243, 216, 200, 105, 164, 29, 171, 232, 247, 218, 17, 133, 232, 68, 140, 100, 106, 49, 17, 90, 178, 38, 69, 238, 23, 174, 180, 90, 18, 12, 71, 45, 101, 200, 83, 77, 95, 218, 91, 176, 63, 179, 203, 125, 56, 171, 218, 98, 135, 127, 214, 63, 41, 151, 197, 157, 192, 152, 67, 67, 157, 54, 123, 111, 118, 45, 94, 15, 81, 123, 125, 169, 67, 50, 150, 113, 147, 13, 16, 86, 2, 135, 129, 88, 154, 246, 170, 223, 47, 247, 190, 187, 35, 213, 194, 67, 226, 181, 208, 135, 75, 30, 233, 136, 45, 222, 121, 60, 157, 48, 171, 244, 52, 40, 187, 8, 23, 173, 41, 157, 165, 158, 92, 139, 22, 95, 72, 164, 142, 213, 156, 102, 196, 108, 228, 203, 99, 72, 254, 173, 37, 212, 150, 145, 104, 76, 117, 242, 185, 180, 108, 50, 188, 206, 40, 52, 55, 147, 240, 89, 248, 203, 110, 237, 24, 88, 63, 99, 224, 121, 229, 90, 253, 12, 72, 24, 3, 247, 127, 35, 178, 198, 80, 151, 223, 243, 195, 114, 5, 134, 250, 85, 182, 154, 206, 41, 53, 50, 59, 174, 117, 203, 200, 33, 182, 230, 147, 101, 36, 111, 23, 187, 130, 16, 211, 90, 102, 207, 154, 140, 123, 212, 66, 45, 35, 165, 139, 109, 169, 226, 210, 115, 16, 92, 196, 31, 245, 154, 110, 181, 161, 126, 184, 177, 237, 125, 181, 71, 120, 86, 222, 179, 133, 113, 72, 206, 157, 89, 162, 80, 164, 223, 38, 17, 238, 114, 188, 125, 69, 1, 28, 126, 249, 180, 189, 144, 215, 152, 89, 92, 62, 98, 151, 242, 46, 48, 162, 3, 95, 211, 122, 217, 36, 235, 109, 100, 94, 233, 173, 150, 71, 125, 201, 168, 4, 180, 248, 249, 240, 50, 206, 242, 169, 201, 31, 137, 198, 93, 241, 219, 11, 9, 1, 229, 249, 194, 67, 41, 143, 117, 103, 238, 247, 72, 178, 21, 193, 146, 119, 159, 21, 253, 206, 66, 186, 60, 200, 102, 179, 117, 103, 32, 0, 116, 31, 133, 129, 127, 38, 5, 177, 195, 25, 23, 86, 29, 222, 53, 0, 140, 179, 118, 239, 141, 237, 122, 80, 200, 92, 47, 15, 58, 167, 26, 37, 146, 254, 3, 74, 148, 159, 221, 38, 68, 110, 98, 82, 16, 171, 232, 139, 72, 87, 114, 113, 61, 210, 82, 180, 196, 8, 14, 249, 185, 159, 253, 166, 200, 82, 176, 112, 173, 246, 40, 22, 202, 140, 76, 60, 92, 225, 10, 198, 41, 26, 223, 250, 181, 135, 196, 230, 10, 103, 197, 128, 155, 148, 121, 150, 51, 196, 143, 183, 153, 229, 93, 118, 12, 235, 237, 105, 73, 27, 24, 86, 248, 39, 190, 71, 184, 212, 74, 196, 181, 46, 140, 9, 18, 168, 110, 30, 93, 166, 44, 153, 88, 82, 148, 237, 146, 173, 158, 29, 215, 202, 3, 224, 240, 186, 202, 52, 123, 244, 226, 109, 79, 174, 245, 35, 242, 82, 187, 101, 69, 245, 104, 139, 118, 134, 236, 135, 243, 10, 149, 162, 212, 245, 132, 3, 90, 38, 96, 28, 98, 200, 80, 141, 252, 40, 214, 80, 152, 221, 239, 166, 135, 104, 105, 227, 248, 102, 53, 78, 186, 95, 15, 97, 58, 129, 98, 219, 233, 167, 89, 198, 175, 98, 77, 20, 182, 112, 104, 165, 34 }; - -void -prng_bytes(uint8_t *dst, size_t size) -{ - size_t i; - - if (random_pos + size > sizeof(random)) - random_pos = 0; - - for (i = 0; i < size; i++) - dst[i] = random[random_pos + i]; - - random_pos += size; -} - -void -random_perm(uint32_t *arr, uint32_t arr_len) -{ - uint32_t i, idx, tmp; - - for (i = arr_len - 1; i > 0; --i) { - prng_bytes((void*)&idx, 4); - idx = idx % i; - - tmp = arr[idx]; - arr[idx] = arr[i]; - arr[i] = tmp; - } -} - -void -gen_random_indices(uint32_t *arr, uint32_t arr_len) -{ - uint32_t i; - - for (i = 0; i < arr_len; ++i) - arr[i] = i; - random_perm(arr, arr_len); -} - - -bool is_in_arr(uint32_t elem, uint32_t *arr, uint32_t arr_len) { - uint32_t i; - - for (i = 0; i < arr_len; ++i) { - if (arr[i] == elem) - return true; - } - - return false; -} diff --git a/src/util.h b/src/util.h deleted file mode 100755 index 7b543aa..0000000 --- a/src/util.h +++ /dev/null @@ -1,9 +0,0 @@ -#pragma once - -#include - -void gen_rand_bytes(unsigned char *arr, uint32_t arr_len); -void random_perm(uint32_t *arr, uint32_t arr_len); -void gen_random_indices(uint32_t *arr, uint32_t arr_len); - -bool is_in_arr(uint32_t elem, uint32_t *arr, uint32_t arr_len); diff --git a/test/.gitignore b/test/.gitignore new file mode 100644 index 0000000..54ec702 --- /dev/null +++ b/test/.gitignore @@ -0,0 +1,3 @@ +access +eviction +kvm diff --git a/test/access.c b/test/access.c new file mode 100644 index 0000000..636471e --- /dev/null +++ b/test/access.c @@ -0,0 +1,29 @@ +#include "cachepc_user.h" + +#include +#include +#include +#include +#include +#include +#include +#include + +int +main(int argc, const char **argv) +{ + size_t i, len; + int fd, ret; + int count; + + fd = open("/proc/cachepc", O_RDONLY); + if (fd < 0) err(1, "open"); + + for (i = 0; i < 50; i++) { + ret = ioctl(fd, CACHEPC_IOCTL_ACCESS_TEST, &count); + if (ret == -1) err(1, "ioctl fail"); + printf("%i\n", count); + } + + close(fd); +} diff --git a/test/eviction.c b/test/eviction.c new file mode 100644 index 0000000..23e3430 --- /dev/null +++ b/test/eviction.c @@ -0,0 +1,39 @@ +#include "cachepc_user.h" + +#include +#include +#include +#include +#include +#include +#include +#include + +int +main(int argc, const char **argv) +{ + uint16_t counts[64]; + size_t i, len; + int fd, ret; + + fd = open("/proc/cachepc", O_RDONLY); + + ret = ioctl(fd, CACHEPC_IOCTL_EVICTION_TEST, NULL); + if (ret == -1) err(1, "ioctl fail"); + + len = read(fd, counts, sizeof(counts)); + assert(len == sizeof(counts)); + + for (i = 0; i < 64; i++) { + if (i % 16 == 0 && i) + printf("\n"); + if (counts[i] > 0) + printf("\x1b[91m"); + printf("%2i ", i); + if (counts[i] > 0) + printf("\x1b[0m"); + } + printf("\n"); + + close(fd); +} diff --git a/test/kvm.c b/test/kvm.c new file mode 100644 index 0000000..682178c --- /dev/null +++ b/test/kvm.c @@ -0,0 +1,405 @@ +/* for CPU_ZERO macros.. */ +#define _GNU_SOURCE + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define ARRLEN(x) (sizeof(x) / sizeof((x)[0])) + +#define SAMPLE_COUNT 100 + +#define TARGET_CORE 2 +#define SECONDARY_CORE 2 + +#define TARGET_CACHE L1 + +struct kvm { + int fd; + int vmfd; + int vcpufd; + void *mem; +}; + +/* start and end for guest assembly */ +extern uint8_t __start_guest_with[]; +extern uint8_t __stop_guest_with[]; +extern uint8_t __start_guest_without[]; +extern uint8_t __stop_guest_without[]; + +static const uint8_t kvm_code[] = { + 0xba, 0xf8, 0x03, /* mov $0x3f8, %dx */ + 0x00, 0xd8, /* add %bl, %al */ + 0x04, '0', /* add $'0', %al */ + 0xee, /* out %al, (%dx) */ + 0xb0, '\n', /* mov $'\n', %al */ + 0xee, /* out %al, (%dx) */ + 0xf4, /* hlt */ +}; + +#if TARGET_CACHE == L1 +static int perf_counters[] = {400, 401}; /* L1 Miss */ +#elif TARGET_CACHE == L2 +static int perf_counters[] = {404, 402, 403}; /* L2 Miss */ +#endif + +static bool ready = false; +static bool processed = false; + +static ssize_t sysret; +static pid_t victim_pid; + +static struct kvm kvm; +static struct kvm_run *kvm_run; + +#define TARGET_CACHE_LINESIZE 64 +#define TARGET_SET 15 + +__attribute__((section("guest_with"))) void +vm_guest_with(void) +{ + while (1) { + asm volatile("mov %%bl, (%[v])" + : : [v] "r" (TARGET_CACHE_LINESIZE * TARGET_SET)); + asm volatile("out %%al, (%%dx)" : : ); + } +} + +__attribute__((section("guest_without"))) void +vm_guest_without(void) +{ + while (1) { + asm volatile("out %%al, (%%dx)" : : ); + } +} + +static inline uint64_t +read_pmc(uint64_t event) +{ + uint32_t lo, hi; + + asm volatile ( + "mov %[event], %%rcx\t\n" + "rdpmc\t\n" + : "=a" (lo), "=d" (hi) + : [event] "r" (event) + ); + + return ((uint64_t) hi << 32) | lo; +} + +bool +pin_process(pid_t pid, int cpu, bool assert) +{ + cpu_set_t cpuset; + int status; + + CPU_ZERO(&cpuset); + CPU_SET(cpu, &cpuset); + return true; + status = sched_setaffinity(pid, sizeof(cpu_set_t), &cpuset); + if (status < 0) { + if (assert) err(EXIT_FAILURE, "sched_setaffinity"); + return false; + } + + return true; +} + +int +read_stat_core(pid_t pid) +{ + char path[256]; + char line[2048]; + FILE *file; + char *p; + int i, cpu; + + snprintf(path, sizeof(path), "/proc/%u/stat", pid); + file = fopen(path, "r"); + if (!file) return -1; + + if (!fgets(line, sizeof(line), file)) + err(EXIT_FAILURE, "read stat"); + + p = line; + for (i = 0; i < 38 && (p = strchr(p, ' ')); i++) + p += 1; + + if (!p) errx(EXIT_FAILURE, "stat format"); + cpu = atoi(p); + + fclose(file); + + return cpu; +} + +void +clear_cores(uint64_t cpu_mask) +{ + DIR *proc_dir, *task_dir; + struct dirent *proc_ent, *task_ent; + char taskpath[256]; + pid_t pid, tid; + bool res; + int cpu; + + /* move all processes from the target cpu to secondary */ + + proc_dir = opendir("/proc"); + if (!proc_dir) err(EXIT_FAILURE, "opendir"); + + while ((proc_ent = readdir(proc_dir))) { + pid = atoi(proc_ent->d_name); + if (!pid) continue; + + cpu = read_stat_core(pid); + if (cpu >= 0 && (1 << cpu) & cpu_mask) { + res = pin_process(pid, SECONDARY_CORE, false); + if (!res) printf("Failed pin %i from %i\n", pid, cpu); + continue; + } + + snprintf(taskpath, sizeof(taskpath), "/proc/%u/task", pid); + task_dir = opendir(taskpath); + if (!task_dir) err(EXIT_FAILURE, "opendir"); + + while ((task_ent = readdir(task_dir))) { + tid = atoi(task_ent->d_name); + if (!tid || tid == pid) continue; + + cpu = read_stat_core(tid); + if (cpu >= 0 && (1 << cpu) & cpu_mask) { + res = pin_process(tid, SECONDARY_CORE, false); + if (!res) printf("Failed pin %i from %i\n", tid, cpu); + } + } + + closedir(task_dir); + } + + closedir(proc_dir); +} + +void +kvm_init(size_t ramsize, size_t code_start, size_t code_stop) +{ + struct kvm_userspace_memory_region region; + struct kvm_regs regs; + struct kvm_sregs sregs; + int ret; + + kvm.fd = open("/dev/kvm", O_RDWR | O_CLOEXEC); + if (kvm.fd < 0) + err(EXIT_FAILURE, "/dev/kvm"); + + /* Make sure we have the stable version of the API */ + ret = ioctl(kvm.fd, KVM_GET_API_VERSION, NULL); + if (ret == -1) + err(EXIT_FAILURE, "KVM_GET_API_VERSION"); + if (ret != 12) + errx(EXIT_FAILURE, "KVM_GET_API_VERSION %d, expected 12", ret); + + kvm.vmfd = ioctl(kvm.fd, KVM_CREATE_VM, 0); + if (kvm.vmfd < 0) + err(EXIT_FAILURE, "KVM_CREATE_VM"); + + /* Allocate one aligned page of guest memory to hold the code. */ + kvm.mem = mmap(NULL, ramsize, PROT_READ | PROT_WRITE, + MAP_SHARED | MAP_ANONYMOUS, -1, 0); + if (!kvm.mem) err(EXIT_FAILURE, "allocating guest memory"); + assert(code_stop - code_start <= ramsize); + memcpy(kvm.mem, code_start, code_stop - code_start); + + /* Map it to the second page frame (to avoid the real-mode IDT at 0). */ + memset(®ion, 0, sizeof(region)); + region.slot = 0; + region.memory_size = ramsize; + region.guest_phys_addr = 0x0000; + region.userspace_addr = (uint64_t) kvm.mem; + printf("Ramsize %d\n", region.memory_size); + printf("Access guest %d\n", TARGET_CACHE_LINESIZE * TARGET_SET); + ret = ioctl(kvm.vmfd, KVM_SET_USER_MEMORY_REGION, ®ion); + if (ret < 0) err(EXIT_FAILURE, "KVM_SET_USER_MEMORY_REGION"); + + kvm.vcpufd = ioctl(kvm.vmfd, KVM_CREATE_VCPU, 0); + if (kvm.vcpufd < 0) err(EXIT_FAILURE, "KVM_CREATE_VCPU"); + + /* Map the shared kvm_run structure and following data. */ + ret = ioctl(kvm.fd, KVM_GET_VCPU_MMAP_SIZE, NULL); + if (ret < 0) err(EXIT_FAILURE, "KVM_GET_VCPU_MMAP_SIZE"); + + if (ret < sizeof(struct kvm_run)) + errx(EXIT_FAILURE, "KVM_GET_VCPU_MMAP_SIZE too small"); + kvm_run = mmap(NULL, ret, PROT_READ | PROT_WRITE, + MAP_SHARED, kvm.vcpufd, 0); + if (!kvm_run) err(EXIT_FAILURE, "mmap vcpu"); + + /* Initialize CS to point at 0, via a read-modify-write of sregs. */ + memset(&sregs, 0, sizeof(sregs)); + ret = ioctl(kvm.vcpufd, KVM_GET_SREGS, &sregs); + if (ret < 0) err(EXIT_FAILURE, "KVM_GET_SREGS"); + sregs.cs.base = 0; + sregs.cs.selector = 0; + ret = ioctl(kvm.vcpufd, KVM_SET_SREGS, &sregs); + if (ret < 0) err(EXIT_FAILURE, "KVM_SET_SREGS"); + + /* Initialize registers: instruction pointer for our code, addends, and + * initial flags required by x86 architecture. */ + memset(®s, 0, sizeof(regs)); + regs.rip = 0x0; + regs.rax = 0; + regs.rdx = 0; + regs.rflags = 0x2; + ret = ioctl(kvm.vcpufd, KVM_SET_REGS, ®s); + if (ret < 0) err(EXIT_FAILURE, "KVM_SET_REGS"); +} + +int16_t *print_accessed_sets(){ + //int16_t counts[64]; + int16_t *counts = (int16_t *)malloc(64*sizeof(int16_t)); + size_t i, len; + int fd; + fd = open("/proc/cachepc", O_RDONLY); + len = read(fd, counts, 64*sizeof(int16_t)); // sizeof(counts)); + assert(len == 64*sizeof(int16_t));//sizeof(counts)); + + for (i = 0; i < 64; i++) { + //printf("%d %hu\n", i, counts[i]); + //continue; + if (i % 16 == 0 && i) + printf("\n"); + if (counts[i] > 0) + printf("\x1b[91m"); + printf("%2i ", i); + if (counts[i] > 0) + printf("\x1b[0m"); + } + printf("\n Target Set Count: %d %hu \n", TARGET_SET, counts[TARGET_SET]); + printf("\n"); + close(fd); + return counts; +} + + +void +collect( const char *prefix, size_t code_start, size_t code_stop) +{ + int ret; + + /* using cache size for alignment of kvm memory access */ + //kvm_init(32768, code_start, code_stop); + kvm_init(131072, code_start, code_stop); + printf("KVm init done\n"); + + + ret = 0; + kvm_run->exit_reason = KVM_EXIT_IO; + + + printf("Now calling KVM_RUN"); + ret = ioctl(kvm.vcpufd, KVM_RUN, NULL); + if (kvm_run->exit_reason == KVM_EXIT_MMIO) + errx(EXIT_FAILURE, "Victim access OOB: %lu\n", + kvm_run->mmio.phys_addr); + + if (ret < 0 || kvm_run->exit_reason != KVM_EXIT_IO) + errx(EXIT_FAILURE, "KVM died: %i %i\n", + ret, kvm_run->exit_reason); + close(kvm.fd); + close(kvm.vmfd); + close(kvm.vcpufd); +} + +void dump_msrmt_results_to_log(char *log_file_path, int16_t msrmt_results[SAMPLE_COUNT][64]){ + FILE *fp = fopen(log_file_path,"w+"); + if (!fp){ + errx(EXIT_FAILURE, "Failed to open log file\n"); + } + fprintf(fp, "Number of samples: %d\n", SAMPLE_COUNT); + fprintf(fp, "Target set: %d\n", TARGET_SET); + fprintf(fp, "Measurements per sample: %d\n", 64); + fprintf(fp, "Legend: target set: %d\n", TARGET_SET); + fprintf(fp, "Output cache attack data\n"); + for(int i=0; i 0)); + } + fprintf(fp,"\n"); + } + close(fp); + +} + +int +main(int argc, const char **argv) +{ + + setvbuf(stdout, NULL, _IONBF, 0); + + clear_cores(1 << TARGET_CORE); + pin_process(0, TARGET_CORE, true); + + + printf("\n"); + printf("Number of samples: %d\n", SAMPLE_COUNT); + printf("Target set: %d\n", TARGET_SET); + + int16_t msmrt_without_access[SAMPLE_COUNT][64]; + int16_t msmrt_with_access[SAMPLE_COUNT][64]; + for(int i=0; i < SAMPLE_COUNT; ++i){ + printf("First: Testing VM without memory access \n"); + collect("without", __start_guest_without, __stop_guest_without); + int16_t *tmp_res = print_accessed_sets(); + memcpy(msmrt_without_access[i], tmp_res, 64*sizeof(int16_t)); + free(tmp_res); + printf("Now: Testing access with memory access \n"); + collect( "with", __start_guest_with, __stop_guest_with); + tmp_res = print_accessed_sets(); + memcpy(msmrt_with_access[i], tmp_res, 64*sizeof(int16_t)); + free(tmp_res); + } + printf("#### MSRT_WITHOUT_ACCESS ####\n"); + for(int i=0; i 0)); + } + putchar('\n'); + } + printf("\n"); + dump_msrmt_results_to_log("msmrt_without_access.out", msmrt_without_access); + dump_msrmt_results_to_log("msmrt_with_access.out", msmrt_with_access); + +} -- cgit v1.2.3-71-gd317