diff options
Diffstat (limited to 'kmod')
| -rw-r--r-- | kmod/asm.h | 66 | ||||
| -rw-r--r-- | kmod/cache_types.h | 66 | ||||
| -rw-r--r-- | kmod/cachepc.c | 445 | ||||
| -rw-r--r-- | kmod/cachepc.h | 188 | ||||
| -rw-r--r-- | kmod/cachepc_user.h | 8 | ||||
| -rw-r--r-- | kmod/device_conf.h | 29 | ||||
| -rw-r--r-- | kmod/kvm.c | 392 | ||||
| -rw-r--r-- | kmod/kvm.h | 6 | ||||
| -rw-r--r-- | kmod/util.c | 38 | ||||
| -rw-r--r-- | kmod/util.h | 8 |
10 files changed, 0 insertions, 1246 deletions
diff --git a/kmod/asm.h b/kmod/asm.h deleted file mode 100644 index 9e9385a..0000000 --- a/kmod/asm.h +++ /dev/null @@ -1,66 +0,0 @@ -#pragma once - -#include <linux/kernel.h> - -#define CPUID_AFFECTED_REGS "rax", "rbx", "rcx", "rdx" - -__attribute__((always_inline)) -static inline void cachepc_cpuid(void); - -__attribute__((always_inline)) -static inline void cachepc_lfence(void); - -__attribute__((always_inline)) -static inline void cachepc_sfence(void); - -__attribute__((always_inline)) -static inline void cachepc_mfence(void); - -__attribute__((always_inline)) -static inline void cachepc_readq(void *p); - -void -cachepc_cpuid(void) -{ - asm volatile( - "mov $0x80000005, %%eax\n\t" - "cpuid\n\t" - ::: CPUID_AFFECTED_REGS - ); -} - -void -cachepc_lfence(void) -{ - asm volatile( - "lfence\n\t" - ::: "memory" - ); -} - -void -cachepc_sfence(void) -{ - asm volatile( - "sfence\n\t" - ::: "memory" - ); -} - -void -cachepc_mfence(void) -{ - asm volatile( - "mfence\n\t" - ::: "memory" - ); -} - -void -cachepc_readq(void *p) -{ - asm volatile ( - "movq (%0), %%r10\n\t" - : : "r" (p) : "r10" - ); -} diff --git a/kmod/cache_types.h b/kmod/cache_types.h deleted file mode 100644 index b337d55..0000000 --- a/kmod/cache_types.h +++ /dev/null @@ -1,66 +0,0 @@ -#pragma once - -#include "device_conf.h" - -#define SET_MASK(SETS) (((((uintptr_t) SETS) * CACHELINE_SIZE) - 1) ^ (CACHELINE_SIZE - 1)) - -#define REMOVE_PAGE_OFFSET(ptr) ((void *) (((uintptr_t) ptr) & PAGE_MASK)) - -#define GET_BIT(b, i) (((b) >> (i)) & 1) -#define SET_BIT(b, i) ((b) | (1 << (i))) - -/* Operate cacheline flags - * Used flags: - * 32 2 1 0 - * | | ... | cache group initialized | last | first | - */ -#define DEFAULT_FLAGS 0 -#define SET_FIRST(flags) SET_BIT(flags, 0) -#define SET_LAST(flags) SET_BIT(flags, 1) -#define SET_CACHE_GROUP_INIT(flags) SET_BIT(flags, 2) -#define IS_FIRST(flags) GET_BIT(flags, 0) -#define IS_LAST(flags) GET_BIT(flags, 1) -#define IS_CACHE_GROUP_INIT(flags) GET_BIT(flags, 2) - -#define CL_NEXT_OFFSET offsetof(struct cacheline, next) -#define CL_PREV_OFFSET offsetof(struct cacheline, prev) - -typedef enum cache_level cache_level; -typedef enum addressing_type addressing_type; -typedef struct cacheline cacheline; -typedef struct cache_ctx cache_ctx; - -enum cache_level {L1, L2}; -enum addressing_type {VIRTUAL, PHYSICAL}; - -struct cache_ctx { - cache_level cache_level; - addressing_type addressing; - - uint32_t sets; - uint32_t associativity; - uint32_t access_time; - uint32_t nr_of_cachelines; - uint32_t set_size; - uint32_t cache_size; -}; - -struct cacheline { - // Doubly linked list inside same set - // Attention: CL_NEXT_OFFSET and CL_PREV_OFFSET - // must be kept up to date - cacheline *next; - cacheline *prev; - - uint32_t cache_set; - uint32_t cache_line; - uint32_t flags; - - // Unused padding to fill cache line - uint64_t count; - - char padding[24]; -}; - -static_assert(sizeof(struct cacheline) == CACHELINE_SIZE, "Bad cache line struct size"); -static_assert(CL_NEXT_OFFSET == 0 && CL_PREV_OFFSET == 8); diff --git a/kmod/cachepc.c b/kmod/cachepc.c deleted file mode 100644 index 09ed705..0000000 --- a/kmod/cachepc.c +++ /dev/null @@ -1,445 +0,0 @@ -#include "cachepc.h" - -#include <linux/kernel.h> -#include <linux/types.h> -#include <linux/slab.h> -#include <linux/delay.h> -#include <linux/ioctl.h> - -static void cl_insert(cacheline *last_cl, cacheline *new_cl); -static void *remove_cache_set(cache_ctx *ctx, void *ptr); -static void *remove_cache_group_set(void *ptr); - -static cacheline *prepare_cache_set_ds(cache_ctx *ctx, uint32_t *set, uint32_t sets_len); -static cacheline *build_cache_ds(cache_ctx *ctx, cacheline **cacheline_ptr_arr); -static void build_randomized_list_for_cache_set(cache_ctx *ctx, cacheline **cacheline_ptr_arr); -static cacheline **allocate_cache_ds(cache_ctx *ctx); -static uint16_t get_virt_cache_set(cache_ctx *ctx, void *ptr); - -void __attribute__((optimize(1))) // prevent instruction reordering -cachepc_prime_vcall(uintptr_t ret, cacheline *cl) -{ - cachepc_prime(cl); - asm volatile ("mov %0, %%rax; jmp *%%rax" : : "r"(ret) : "rax"); -} - -void __attribute__((optimize(1))) // prevent instruction reordering -cachepc_probe_vcall(uintptr_t ret, cacheline *cl) -{ - cachepc_probe(cl); - asm volatile ("mov %0, %%rax; jmp *%%rax" : : "r"(ret) : "rax"); -} - -void -cachepc_init_pmc(uint8_t index, uint8_t event_no, uint8_t event_mask) -{ - uint64_t event; - uint64_t reg_addr; - - /* REF: https://developer.amd.com/resources/developer-guides-manuals (PPR 17H 31H, P.166) - * - * performance event selection via 0xC001_020X with X = (0..A)[::2] - * performance event reading viea 0XC001_020X with X = (1..B)[::2] - */ - - WARN_ON(index >= 6); - if (index >= 6) return; - - reg_addr = 0xc0010200 + index * 2; - event = event_no | (event_mask << 8); - event |= (1ULL << 17); /* OS (kernel) events only */ - event |= (1ULL << 22); /* enable performance counter */ - event |= (1ULL << 40); /* Host events only */ - printk(KERN_WARNING "CachePC: Initialized %i. PMC %02X:%02X\n", - index, event_no, event_mask); - asm volatile ("wrmsr" : : "c"(reg_addr), "a"(event), "d"(0x00)); -} - -cache_ctx * -cachepc_get_ctx(cache_level cache_level) -{ - cache_ctx *ctx; - - ctx = kzalloc(sizeof(cache_ctx), GFP_KERNEL); - BUG_ON(ctx == NULL); - - BUG_ON(cache_level != L1); - if (cache_level == L1) { - ctx->addressing = L1_ADDRESSING; - ctx->sets = L1_SETS; - ctx->associativity = L1_ASSOCIATIVITY; - ctx->access_time = L1_ACCESS_TIME; - } else if (cache_level == L2) { - ctx->addressing = L2_ADDRESSING; - ctx->sets = L2_SETS; - ctx->associativity = L2_ASSOCIATIVITY; - ctx->access_time = L2_ACCESS_TIME; - } else { - return NULL; - } - - ctx->cache_level = cache_level; - ctx->nr_of_cachelines = ctx->sets * ctx->associativity; - ctx->set_size = CACHELINE_SIZE * ctx->associativity; - ctx->cache_size = ctx->sets * ctx->set_size; - - return ctx; -} - -void -cachepc_release_ctx(cache_ctx *ctx) -{ - kfree(ctx); -} - - -/* - * Initialises the complete cache data structure for the given context - */ -cacheline * -cachepc_prepare_ds(cache_ctx *ctx) -{ - cacheline **cacheline_ptr_arr; - cacheline *cache_ds; - - //printk(KERN_WARNING "CachePC: Preparing ds..\n"); - - cacheline_ptr_arr = allocate_cache_ds(ctx); - cache_ds = build_cache_ds(ctx, cacheline_ptr_arr); - kfree(cacheline_ptr_arr); - - // printk(KERN_WARNING "CachePC: Preparing ds done\n"); - - return cache_ds; -} - -void -cachepc_release_ds(cache_ctx *ctx, cacheline *ds) -{ - kfree(remove_cache_set(ctx, ds)); -} - -cacheline * -cachepc_prepare_victim(cache_ctx *ctx, uint32_t set) -{ - cacheline *victim_set, *victim_cl; - cacheline *curr_cl, *next_cl; - - victim_set = prepare_cache_set_ds(ctx, &set, 1); - victim_cl = victim_set; - - // Free the other lines in the same set that are not used. - if (ctx->addressing == PHYSICAL) { - curr_cl = victim_cl->next; - do { - next_cl = curr_cl->next; - // Here, it is ok to free them directly, as every line in the same - // set is from a different page anyway. - kfree(remove_cache_group_set(curr_cl)); - curr_cl = next_cl; - } while(curr_cl != victim_cl); - } - - return victim_cl; -} - -void -cachepc_release_victim(cache_ctx *ctx, cacheline *victim) -{ - kfree(remove_cache_set(ctx, victim)); -} - -void -cachepc_save_msrmts(cacheline *head) -{ - cacheline *curr_cl; - - // printk(KERN_WARNING "CachePC: Updating /proc/cachepc\n"); - - curr_cl = head; - do { - if (IS_FIRST(curr_cl->flags)) { - BUG_ON(curr_cl->cache_set >= cachepc_msrmts_count); - cachepc_msrmts[curr_cl->cache_set] = curr_cl->count; - } - - curr_cl = curr_cl->prev; - } while (curr_cl != head); -} - -void -cachepc_print_msrmts(cacheline *head) -{ - cacheline *curr_cl; - - curr_cl = head; - do { - if (IS_FIRST(curr_cl->flags)) { - printk(KERN_WARNING "CachePC: Count for cache set %i: %llu\n", - curr_cl->cache_set, curr_cl->count); - } - - curr_cl = curr_cl->prev; - } while (curr_cl != head); -} - - -cacheline * -prepare_cache_set_ds(cache_ctx *ctx, uint32_t *sets, uint32_t sets_len) -{ - cacheline *cache_ds, **first_cl_in_sets, **last_cl_in_sets; - cacheline *to_del_cls, *curr_cl, *next_cl, *cache_set_ds; - uint32_t i, cache_groups_len, cache_groups_max_len; - uint32_t *cache_groups; - - cache_ds = cachepc_prepare_ds(ctx); - - first_cl_in_sets = kzalloc(ctx->sets * sizeof(cacheline *), GFP_KERNEL); - BUG_ON(first_cl_in_sets == NULL); - - last_cl_in_sets = kzalloc(ctx->sets * sizeof(cacheline *), GFP_KERNEL); - BUG_ON(last_cl_in_sets == NULL); - - // Find the cache groups that are used, so that we can delete the other ones - // later (to avoid memory leaks) - cache_groups_max_len = ctx->sets / CACHE_GROUP_SIZE; - cache_groups = kmalloc(cache_groups_max_len * sizeof(uint32_t), GFP_KERNEL); - BUG_ON(cache_groups == NULL); - - cache_groups_len = 0; - for (i = 0; i < sets_len; ++i) { - if (!is_in_arr(sets[i] / CACHE_GROUP_SIZE, cache_groups, cache_groups_len)) { - cache_groups[cache_groups_len] = sets[i] / CACHE_GROUP_SIZE; - ++cache_groups_len; - } - } - - to_del_cls = NULL; - curr_cl = cache_ds; - - // Extract the partial data structure for the cache sets and ensure correct freeing - do { - next_cl = curr_cl->next; - - if (IS_FIRST(curr_cl->flags)) { - first_cl_in_sets[curr_cl->cache_set] = curr_cl; - } - if (IS_LAST(curr_cl->flags)) { - last_cl_in_sets[curr_cl->cache_set] = curr_cl; - } - - if (ctx->addressing == PHYSICAL && !is_in_arr( - curr_cl->cache_set / CACHE_GROUP_SIZE, cache_groups, cache_groups_len)) - { - // Already free all unused blocks of the cache ds for physical - // addressing, because we loose their refs - cl_insert(to_del_cls, curr_cl); - to_del_cls = curr_cl; - } - curr_cl = next_cl; - - } while(curr_cl != cache_ds); - - // Fix partial cache set ds - for (i = 0; i < sets_len; ++i) { - last_cl_in_sets[sets[i]]->next = first_cl_in_sets[sets[(i + 1) % sets_len]]; - first_cl_in_sets[sets[(i + 1) % sets_len]]->prev = last_cl_in_sets[sets[i]]; - } - cache_set_ds = first_cl_in_sets[sets[0]]; - - // Free unused cache lines - if (ctx->addressing == PHYSICAL) { - cachepc_release_ds(ctx, to_del_cls); - } - - kfree(first_cl_in_sets); - kfree(last_cl_in_sets); - kfree(cache_groups); - - return cache_set_ds; -} - -void -cl_insert(cacheline *last_cl, cacheline *new_cl) -{ - if (last_cl == NULL) { - // Adding the first entry is a special case - new_cl->next = new_cl; - new_cl->prev = new_cl; - } else { - new_cl->next = last_cl->next; - new_cl->prev = last_cl; - last_cl->next->prev = new_cl; - last_cl->next = new_cl; - } -} - -void * -remove_cache_set(cache_ctx *ctx, void *ptr) -{ - return (void *) (((uintptr_t) ptr) & ~SET_MASK(ctx->sets)); -} - -void * -remove_cache_group_set(void *ptr) -{ - return (void *) (((uintptr_t) ptr) & ~SET_MASK(CACHE_GROUP_SIZE)); -} - - -/* - * Create a randomized doubly linked list with the following structure: - * set A <--> set B <--> ... <--> set X <--> set A - * where each set is one of the cache sets, in a random order. - * The sets are a doubly linked list of cachelines themselves: - * set A: - * line[A + x0 * #sets] <--> line[A + x1 * #sets] <--> ... - * where x0, x1, ..., xD is a random permutation of 1, 2, ..., D - * and D = Associativity = | cache set | - */ -cacheline *build_cache_ds(cache_ctx *ctx, cacheline **cl_ptr_arr) { - cacheline **first_cl_in_sets, **last_cl_in_sets; - cacheline **cl_ptr_arr_sorted; - cacheline *curr_cl; - cacheline *cache_ds; - uint32_t *idx_per_set; - uint32_t idx_curr_set, set_offset; - uint32_t i, j, set, set_len; - uint32_t *idx_map; - - idx_per_set = kzalloc(ctx->sets * sizeof(uint32_t), GFP_KERNEL); - BUG_ON(idx_per_set == NULL); - - cl_ptr_arr_sorted = kzalloc(ctx->nr_of_cachelines * sizeof(cacheline *), GFP_KERNEL); - BUG_ON(cl_ptr_arr_sorted == NULL); - - set_len = ctx->associativity; - for (i = 0; i < ctx->nr_of_cachelines; ++i) { - set_offset = cl_ptr_arr[i]->cache_set * set_len; - idx_curr_set = idx_per_set[cl_ptr_arr[i]->cache_set]; - - cl_ptr_arr_sorted[set_offset + idx_curr_set] = cl_ptr_arr[i]; - idx_per_set[cl_ptr_arr[i]->cache_set] += 1; - } - - // Build doubly linked list for every set - for (set = 0; set < ctx->sets; ++set) { - set_offset = set * set_len; - build_randomized_list_for_cache_set(ctx, cl_ptr_arr_sorted + set_offset); - } - - // Relink the sets among each other - idx_map = kzalloc(ctx->sets * sizeof(uint32_t), GFP_KERNEL); - BUG_ON(idx_map == NULL); - - gen_random_indices(idx_map, ctx->sets); - - first_cl_in_sets = kzalloc(ctx->sets * sizeof(cacheline *), GFP_KERNEL); - BUG_ON(first_cl_in_sets == NULL); - - last_cl_in_sets = kzalloc(ctx->sets * sizeof(cacheline *), GFP_KERNEL); - BUG_ON(last_cl_in_sets == NULL); - - for (j = 0; j < ctx->nr_of_cachelines; ++j) { - curr_cl = cl_ptr_arr_sorted[j]; - if (IS_FIRST(curr_cl->flags)) - first_cl_in_sets[curr_cl->cache_set] = curr_cl; - if (IS_LAST(curr_cl->flags)) - last_cl_in_sets[curr_cl->cache_set] = curr_cl; - } - - /* connect up sets */ - for (i = 0; i < ctx->sets; ++i) { - last_cl_in_sets[idx_map[i]]->next = first_cl_in_sets[idx_map[(i + 1) % ctx->sets]]; - first_cl_in_sets[idx_map[(i + 1) % ctx->sets]]->prev = last_cl_in_sets[idx_map[i]]; - } - cache_ds = first_cl_in_sets[idx_map[0]]; - - kfree(cl_ptr_arr_sorted); - kfree(first_cl_in_sets); - kfree(last_cl_in_sets); - kfree(idx_per_set); - kfree(idx_map); - - return cache_ds; -} - -/* - * Helper function to build a randomised list of cacheline structs for a set - */ -void build_randomized_list_for_cache_set(cache_ctx *ctx, cacheline **cacheline_ptr_arr) -{ - cacheline *curr_cl; - uint32_t len, *idx_map; - uint16_t i; - - len = ctx->associativity; - idx_map = kzalloc(len * sizeof(uint32_t), GFP_KERNEL); - BUG_ON(idx_map == NULL); - - gen_random_indices(idx_map, len); - - for (i = 0; i < len; ++i) { - curr_cl = cacheline_ptr_arr[idx_map[i]]; - curr_cl->next = cacheline_ptr_arr[idx_map[(i + 1) % len]]; - curr_cl->prev = cacheline_ptr_arr[idx_map[(len - 1 + i) % len]]; - - if (idx_map[i] == 0) { - curr_cl->flags = SET_FIRST(DEFAULT_FLAGS); - curr_cl->prev->flags = SET_LAST(DEFAULT_FLAGS); - } else { - curr_cl->flags |= DEFAULT_FLAGS; - } - } - - kfree(idx_map); -} - -/* - * Allocate a data structure that fills the complete cache, i.e. consisting - * of `associativity` many cache lines for each cache set. - */ -cacheline ** -allocate_cache_ds(cache_ctx *ctx) -{ - cacheline **cl_ptr_arr, *cl_arr; - uint32_t i; - - cl_ptr_arr = kzalloc(ctx->nr_of_cachelines * sizeof(cacheline *), GFP_KERNEL); - BUG_ON(cl_ptr_arr == NULL); - - BUG_ON(ctx->addressing != VIRTUAL); - - // For virtual addressing, allocating a consecutive chunk of memory is enough - cl_arr = cachepc_aligned_alloc(PAGE_SIZE, ctx->cache_size); - BUG_ON(cl_arr == NULL); - - for (i = 0; i < ctx->nr_of_cachelines; ++i) { - cl_ptr_arr[i] = cl_arr + i; - cl_ptr_arr[i]->cache_set = get_virt_cache_set(ctx, cl_ptr_arr[i]); - cl_ptr_arr[i]->cache_line = i / ctx->sets; - cl_ptr_arr[i]->count = 0; - } - - return cl_ptr_arr; -} - -uint16_t -get_virt_cache_set(cache_ctx *ctx, void *ptr) -{ - return (uint16_t) ((((uintptr_t) ptr) & SET_MASK(ctx->sets)) / CACHELINE_SIZE); -} - -void * -cachepc_aligned_alloc(size_t alignment, size_t size) -{ - void *p; - - if (size % alignment != 0) - size = size - (size % alignment) + alignment; - p = kzalloc(size, GFP_KERNEL); - BUG_ON(((uintptr_t) p) % alignment != 0); - - return p; -} - diff --git a/kmod/cachepc.h b/kmod/cachepc.h deleted file mode 100644 index 6237eba..0000000 --- a/kmod/cachepc.h +++ /dev/null @@ -1,188 +0,0 @@ -#pragma once - -#include "asm.h" -#include "cache_types.h" -#include "util.h" -#include "cachepc_user.h" - -void cachepc_init_pmc(uint8_t index, uint8_t event_no, uint8_t event_mask); - -cache_ctx *cachepc_get_ctx(cache_level cl); -void cachepc_release_ctx(cache_ctx *ctx); - -cacheline *cachepc_prepare_ds(cache_ctx *ctx); -void cachepc_release_ds(cache_ctx *ctx, cacheline *ds); - -cacheline *cachepc_prepare_victim(cache_ctx *ctx, uint32_t set); -void cachepc_release_victim(cache_ctx *ctx, cacheline *ptr); - -void *cachepc_aligned_alloc(size_t alignment, size_t size); - -void cachepc_save_msrmts(cacheline *head); -void cachepc_print_msrmts(cacheline *head); - -void cachepc_prime_vcall(uintptr_t ret, cacheline *cl); -void cachepc_probe_vcall(uintptr_t ret, cacheline *cl); - -__attribute__((always_inline)) -static inline cacheline *cachepc_prime(cacheline *head); - -__attribute__((always_inline)) -static inline cacheline *cachepc_prime_rev(cacheline *head); - -__attribute__((always_inline)) -static inline cacheline *cachepc_probe(cacheline *head); - -__attribute__((always_inline)) -static inline void cachepc_victim(void *p); - -__attribute__((always_inline)) -static inline uint64_t cachepc_read_pmc(uint64_t event); - -extern uint16_t *cachepc_msrmts; -extern size_t cachepc_msrmts_count; - -extern cache_ctx *cachepc_ctx; -extern cacheline *cachepc_ds; - -extern uint64_t cachepc_regs_tmp[16]; -extern uint64_t cachepc_regs_vm[16]; - -/* - * Prime phase: fill the target cache (encoded in the size of the data structure) - * with the prepared data structure, i.e. with attacker data. - */ -cacheline * -cachepc_prime(cacheline *head) -{ - cacheline *curr_cl, *prev_cl; - - cachepc_mfence(); - cachepc_cpuid(); - - curr_cl = head; - do { - prev_cl = curr_cl; - curr_cl = curr_cl->next; - } while (curr_cl != head); - - cachepc_mfence(); - cachepc_cpuid(); - - return prev_cl; -} - -/* - * Same as prime, but in the reverse direction, i.e. the same direction that probe - * uses. This is beneficial for the following scenarios: - * - L1: - * - Trigger collision chain-reaction to amplify an evicted set (but this has - * the downside of more noisy measurements). - * - L2: - * - Always use this for L2, otherwise the first cache sets will still reside - * in L1 unless the victim filled L1 completely. In this case, an eviction - * has randomly (depending on where the cache set is placed in the randomised - * data structure) the following effect: - * A) An evicted set is L2_ACCESS_TIME - L1_ACCESS_TIME slower - * B) An evicted set is L3_ACCESS_TIME - L2_ACCESS_TIME slower - */ -cacheline * -cachepc_prime_rev(cacheline *head) -{ - cacheline *curr_cl; - - cachepc_mfence(); - cachepc_cpuid(); - - curr_cl = head; - do { - curr_cl = curr_cl->prev; - } while(curr_cl != head); - - cachepc_mfence(); - cachepc_cpuid(); - - return curr_cl->prev; -} - -cacheline * -cachepc_probe(cacheline *start_cl) -{ - uint64_t pre, post; - cacheline *next_cl; - cacheline *curr_cl; - - cachepc_mfence(); - cachepc_cpuid(); - - curr_cl = start_cl; - - do { - pre = cachepc_read_pmc(0); - - asm volatile( - "mov 8(%[curr_cl]), %%rax \n\t" // +8 - "mov 8(%%rax), %%rcx \n\t" // +16 - "mov 8(%%rcx), %%rax \n\t" // +24 - "mov 8(%%rax), %%rcx \n\t" // +32 - "mov 8(%%rcx), %%rax \n\t" // +40 - "mov 8(%%rax), %%rcx \n\t" // +48 - "mov 8(%%rcx), %[curr_cl_out] \n\t" // +56 - "mov 8(%[curr_cl_out]), %[next_cl_out] \n\t" // +64 - : [next_cl_out] "=r" (next_cl), - [curr_cl_out] "=r" (curr_cl) - : [curr_cl] "r" (curr_cl) - : "rax", "rcx" - ); - - post = cachepc_read_pmc(0); - - /* works across size boundary */ - curr_cl->count = post - pre; - - curr_cl = next_cl; - } while (__builtin_expect(curr_cl != start_cl, 1)); - - next_cl = curr_cl->next; - - cachepc_mfence(); - cachepc_cpuid(); - - return next_cl; -} - -void -cachepc_victim(void *p) -{ - cachepc_mfence(); - cachepc_cpuid(); - - cachepc_readq(p); - - cachepc_mfence(); - cachepc_cpuid(); -} - -uint64_t -cachepc_read_pmc(uint64_t event) -{ - uint32_t lo, hi; - uint64_t res; - - cachepc_mfence(); - cachepc_cpuid(); - - event = 0xC0010201 + 2 * event; - - asm volatile ( - "rdmsr" - : "=a" (lo), "=d" (hi) - : "c"(event) - ); - res = ((uint64_t) hi << 32) | (uint64_t) lo; - - cachepc_mfence(); - cachepc_cpuid(); - - return res; -} diff --git a/kmod/cachepc_user.h b/kmod/cachepc_user.h deleted file mode 100644 index f815839..0000000 --- a/kmod/cachepc_user.h +++ /dev/null @@ -1,8 +0,0 @@ -#pragma once - -#include <linux/ioctl.h> - -#define CACHEPC_IOCTL_MAGIC 0xBF -#define CACHEPC_IOCTL_TEST_ACCESS _IOWR(CACHEPC_IOCTL_MAGIC, 0, uint32_t) -#define CACHEPC_IOCTL_TEST_EVICTION _IOWR(CACHEPC_IOCTL_MAGIC, 1, uint32_t) -#define CACHEPC_IOCTL_INIT_PMC _IOW(CACHEPC_IOCTL_MAGIC, 2, uint32_t) diff --git a/kmod/device_conf.h b/kmod/device_conf.h deleted file mode 100644 index e24d681..0000000 --- a/kmod/device_conf.h +++ /dev/null @@ -1,29 +0,0 @@ -#pragma once - -// TODO: Read from kernel headers - -// General settings -// #define PAGE_SIZE 4096 -#define PROCESSOR_FREQ 2900000000 - -// Cache related settings -#define CACHELINE_SIZE 64 -#define CACHE_GROUP_SIZE (PAGE_SIZE / CACHELINE_SIZE) - -// Addressing: -// - virtual: 0 -// - physical: 1 -#define L1_ADDRESSING 0 -#define L1_SETS 64 -#define L1_ASSOCIATIVITY 8 -#define L1_ACCESS_TIME 4 - -#define L2_ADDRESSING 1 -#define L2_SETS 512 -#define L2_ASSOCIATIVITY 8 -#define L2_ACCESS_TIME 12 - -#define L3_ADDRESSING 1 -#define L3_SETS 4096 -#define L3_ASSOCIATIVITY 16 -#define L3_ACCESS_TIME 30 diff --git a/kmod/kvm.c b/kmod/kvm.c deleted file mode 100644 index 4deb4fa..0000000 --- a/kmod/kvm.c +++ /dev/null @@ -1,392 +0,0 @@ -#include "kvm.h" - -#include <linux/module.h> -#include <linux/slab.h> -#include <linux/proc_fs.h> -#include <linux/init.h> -#include <asm/uaccess.h> - -struct proc_ops cachepc_proc_ops; - -uint16_t *cachepc_msrmts; -size_t cachepc_msrmts_count; -EXPORT_SYMBOL(cachepc_msrmts); -EXPORT_SYMBOL(cachepc_msrmts_count); - -cache_ctx *cachepc_ctx; -cacheline *cachepc_ds; -EXPORT_SYMBOL(cachepc_ctx); -EXPORT_SYMBOL(cachepc_ds); - -uint64_t cachepc_regs_tmp[16]; -uint64_t cachepc_regs_vm[16]; -EXPORT_SYMBOL(cachepc_regs_tmp); -EXPORT_SYMBOL(cachepc_regs_vm); - -int -cachepc_kvm_proc_open(struct inode *inode, struct file *file) -{ - try_module_get(THIS_MODULE); - - return 0; -} - -int -cachepc_kvm_proc_close(struct inode *inode, struct file *file) -{ - module_put(THIS_MODULE); - - return 0; -} - -ssize_t -cachepc_kvm_proc_read(struct file *file, char *buf, size_t buflen, loff_t *off) -{ - size_t len, left; - size_t size; - - printk(KERN_WARNING "CachePC: Reading entries (%lu:%lli)\n", - buflen, off ? *off : 0); - - size = cachepc_msrmts_count * sizeof(uint16_t); - if (!off || *off >= size || *off < 0) - return 0; - - len = size - *off; - if (len > buflen) len = buflen; - - left = copy_to_user(buf, (uint8_t *) cachepc_msrmts + *off, len); - - len -= left; - *off += len; - - return len; -} - -ssize_t -cachepc_kvm_proc_write(struct file *file, const char *buf, size_t buflen, loff_t *off) -{ - return 0; -} - -loff_t -cachepc_kvm_proc_lseek(struct file *file, loff_t off, int mode) -{ - switch (mode) { - case SEEK_SET: - file->f_pos = off; - break; - case SEEK_CUR: - file->f_pos += off; - break; - case SEEK_END: - file->f_pos = cachepc_msrmts_count * sizeof(uint16_t) + off; - break; - default: - return -EINVAL; - } - - return file->f_pos; -} - -void -cachepc_kvm_prime_probe_test(void *p) -{ - cacheline *lines; - cacheline *cl, *head; - uint32_t count; - uint32_t *arg; - int i, max; - - arg = p; - - /* l2 data cache, hit or miss */ - cachepc_init_pmc(0, 0x64, 0xD8); - - lines = cachepc_aligned_alloc(PAGE_SIZE, cachepc_ctx->cache_size); - BUG_ON(lines == NULL); - - max = cachepc_ctx->nr_of_cachelines; - - cachepc_cpuid(); - cachepc_mfence(); - - for (i = 0; i < max; i++) - asm volatile ("mov (%0), %%rbx" : : "r"(lines + i) : "rbx"); - - head = cachepc_prime(cachepc_ds); - cachepc_probe(head); - - count = 0; - cl = head = cachepc_ds; - do { - count += cl->count; - cl = cl->next; - } while (cl != head); - - printk(KERN_WARNING "CachePC: Prime-probe test done (%u vs. %u => %s)\n", - count, 0, (count == 0) ? "passed" : "failed"); - - if (arg) *arg = (count == 0); - - kfree(lines); -} - -void -cachepc_kvm_stream_hwpf_test(void *p) -{ - cacheline *lines; - uint32_t count; - uint32_t *arg; - uint32_t i, max; - - arg = p; - - /* TODO: accurately detect hwpf */ - - /* l2 data cache, hit or miss */ - cachepc_init_pmc(0, 0x64, 0xD8); - - lines = cachepc_aligned_alloc(PAGE_SIZE, cachepc_ctx->cache_size); - BUG_ON(lines == NULL); - - max = cachepc_ctx->nr_of_cachelines; - - cachepc_prime(cachepc_ds); - - count -= cachepc_read_pmc(0); - for (i = 0; i < max; i++) - asm volatile ("mov (%0), %%rbx" : : "r"(lines + i) : "rbx"); - count += cachepc_read_pmc(0); - - printk(KERN_WARNING "CachePC: HWPF test done (%u vs. %u => %s)\n", - count, max, (count == max) ? "passed" : "failed"); - - if (arg) *arg = (count == max); - - kfree(lines); -} - -void -cachepc_kvm_single_access_test(void *p) -{ - cacheline *ptr; - uint64_t pre, post; - uint32_t *arg; - - /* l2 data cache, hit or miss */ - cachepc_init_pmc(0, 0x64, 0xD8); - - arg = p; - - WARN_ON(arg && *arg >= L1_SETS); - if (arg && *arg >= L1_SETS) return; - ptr = cachepc_prepare_victim(cachepc_ctx, arg ? *arg : 48); - - cachepc_prime(cachepc_ds); - - pre = cachepc_read_pmc(0); - cachepc_victim(ptr); - post = cachepc_read_pmc(0); - - printk(KERN_WARNING "CachePC: Single access test done (%llu vs %u => %s)", - post - pre, 1, (post - pre == 1) ? "passed" : "failed"); - - if (arg) *arg = post - pre; - - cachepc_release_victim(cachepc_ctx, ptr); -} - -void -cachepc_kvm_single_eviction_test(void *p) -{ - cacheline *head, *cl, *evicted; - cacheline *ptr; - uint32_t target; - uint32_t *arg; - int count; - - arg = p; - - /* l2 data cache, hit or miss */ - cachepc_init_pmc(0, 0x64, 0xD8); - - WARN_ON(arg && *arg >= L1_SETS); - if (arg && *arg >= L1_SETS) return; - target = arg ? *arg : 48; - - ptr = cachepc_prepare_victim(cachepc_ctx, target); - - head = cachepc_prime(cachepc_ds); - cachepc_victim(ptr); - cachepc_probe(head); - - count = 0; - evicted = NULL; - cl = head = cachepc_ds; - do { - if (IS_FIRST(cl->flags) && cl->count > 0) { - evicted = cl; - count += cl->count; - } - cl = cl->next; - } while (cl != head); - - printk(KERN_WARNING "CachePC: Single eviction test done (%u vs %u => %s)\n", - count, 1, (count == 1 && evicted->cache_set == target) ? "passed" : "failed"); - cachepc_save_msrmts(head); - - if (arg) *arg = count; - - cachepc_release_victim(cachepc_ctx, ptr); -} - -void -cachepc_kvm_system_setup(void) -{ - uint64_t reg_addr, val; - uint32_t lo, hi; - - /* disable streaming store */ - reg_addr = 0xc0011020; - asm volatile ("rdmsr" : "=a"(lo), "=d"(hi) : "c"(reg_addr)); - val = (uint64_t) lo | ((uint64_t) hi << 32); - val |= 1 << 13; - asm volatile ("wrmsr" : : "c"(reg_addr), "a"(val), "d"(0x00)); - printk("CachePC: Writing MSR %08llX: %016llX\n", reg_addr, val); - - /* disable speculative data cache tlb reloads */ - reg_addr = 0xc0011022; - asm volatile ("rdmsr" : "=a"(lo), "=d"(hi) : "c"(reg_addr)); - val = (uint64_t) lo | ((uint64_t) hi << 32); - val |= 1 << 4; - asm volatile ("wrmsr" : : "c"(reg_addr), "a"(val), "d"(0x00)); - printk("CachePC: Writing MSR %08llX: %016llX\n", reg_addr, val); - - /* disable data cache hardware prefetcher */ - reg_addr = 0xc0011022; - asm volatile ("rdmsr" : "=a"(lo), "=d"(hi) : "c"(reg_addr)); - val = (uint64_t) lo | ((uint64_t) hi << 32); - val |= 1 << 13; - asm volatile ("wrmsr" : : "c"(reg_addr), "a"(val), "d"(0x00)); - printk("CachePC: Writing MSR %08llX: %016llX\n", reg_addr, val); -} - -void -cachepc_kvm_init_pmc_ioctl(void *p) -{ - uint32_t event; - uint8_t index, event_no, event_mask; - - WARN_ON(p == NULL); - if (!p) return; - - event = *(uint32_t *)p; - - index = (event & 0xFF000000) >> 24; - event_no = (event & 0x0000FF00) >> 8; - event_mask = (event & 0x000000FF) >> 0; - - cachepc_init_pmc(index, event_no, event_mask); -} - -long -cachepc_kvm_ioctl(struct file *file, unsigned int cmd, unsigned long argp) -{ - void __user *arg_user; - uint32_t u32; - int ret; - - arg_user = (void __user *)argp; - switch (cmd) { - case CACHEPC_IOCTL_TEST_ACCESS: - printk(KERN_WARNING "CachePC: Called ioctl access test\n"); - if (!arg_user) return -EINVAL; - if (copy_from_user(&u32, arg_user, sizeof(uint32_t))) - return -EFAULT; - ret = smp_call_function_single(2, - cachepc_kvm_single_access_test, &u32, true); - WARN_ON(ret != 0); - if (copy_to_user(arg_user, &u32, sizeof(uint32_t))) - return -EFAULT; - break; - case CACHEPC_IOCTL_TEST_EVICTION: - printk(KERN_WARNING "CachePC: Called ioctl eviction test\n"); - if (!arg_user) return -EINVAL; - if (copy_from_user(&u32, arg_user, sizeof(uint32_t))) - return -EFAULT; - ret = smp_call_function_single(2, - cachepc_kvm_single_eviction_test, &u32, true); - WARN_ON(ret != 0); - if (copy_to_user(arg_user, &u32, sizeof(uint32_t))) - return -EFAULT; - break; - case CACHEPC_IOCTL_INIT_PMC: - printk(KERN_WARNING "CachePC: Called ioctl init counter\n"); - if (!arg_user) return -EINVAL; - if (copy_from_user(&u32, arg_user, sizeof(uint32_t))) - return -EFAULT; - ret = smp_call_function_single(2, - cachepc_kvm_init_pmc_ioctl, &u32, true); - WARN_ON(ret != 0); - break; - default: - return -EINVAL; - } - - return 0; -} - -void -cachepc_kvm_setup_test(void *p) -{ - int cpu; - - cpu = get_cpu(); - - printk(KERN_WARNING "CachePC: Running on core %i\n", cpu); - - cachepc_ctx = cachepc_get_ctx(L1); - cachepc_ds = cachepc_prepare_ds(cachepc_ctx); - - cachepc_kvm_system_setup(); - - cachepc_kvm_prime_probe_test(NULL); - cachepc_kvm_single_access_test(NULL); - cachepc_kvm_single_eviction_test(NULL); - cachepc_kvm_stream_hwpf_test(NULL); - - put_cpu(); -} - -void -cachepc_kvm_init(void) -{ - int ret; - - cachepc_msrmts_count = L1_SETS; - cachepc_msrmts = kzalloc(cachepc_msrmts_count * sizeof(uint16_t), GFP_KERNEL); - BUG_ON(cachepc_msrmts == NULL); - - ret = smp_call_function_single(2, cachepc_kvm_setup_test, NULL, true); - WARN_ON(ret != 0); - - memset(&cachepc_proc_ops, 0, sizeof(cachepc_proc_ops)); - cachepc_proc_ops.proc_open = cachepc_kvm_proc_open; - cachepc_proc_ops.proc_read = cachepc_kvm_proc_read; - cachepc_proc_ops.proc_write = cachepc_kvm_proc_write; - cachepc_proc_ops.proc_lseek = cachepc_kvm_proc_lseek; - cachepc_proc_ops.proc_release = cachepc_kvm_proc_close; - cachepc_proc_ops.proc_ioctl = cachepc_kvm_ioctl; - proc_create("cachepc", 0644, NULL, &cachepc_proc_ops); -} - -void -cachepc_kvm_exit(void) -{ - remove_proc_entry("cachepc", NULL); - kfree(cachepc_msrmts); - - cachepc_release_ds(cachepc_ctx, cachepc_ds); - cachepc_release_ctx(cachepc_ctx); -} diff --git a/kmod/kvm.h b/kmod/kvm.h deleted file mode 100644 index a44491e..0000000 --- a/kmod/kvm.h +++ /dev/null @@ -1,6 +0,0 @@ -#pragma once - -#include "cachepc.h" - -void cachepc_kvm_init(void); -void cachepc_kvm_exit(void); diff --git a/kmod/util.c b/kmod/util.c deleted file mode 100644 index abf2b71..0000000 --- a/kmod/util.c +++ /dev/null @@ -1,38 +0,0 @@ -#include "util.h" - -void -random_perm(uint32_t *arr, uint32_t arr_len) -{ - uint32_t i; - - /* no special ordering needed when prefetcher is disabled */ - for (i = 0; i < arr_len; i++) - arr[i] = i; - - // /* prevent stream prefetching by alternating access direction */ - // mid = arr_len / 2; - // for (i = 0; i < arr_len; i++) - // arr[i] = mid + (i % 2 ? -1 : 1) * ((i + 1) / 2); -} - -void -gen_random_indices(uint32_t *arr, uint32_t arr_len) -{ - uint32_t i; - - for (i = 0; i < arr_len; ++i) - arr[i] = i; - random_perm(arr, arr_len); -} - - -bool is_in_arr(uint32_t elem, uint32_t *arr, uint32_t arr_len) { - uint32_t i; - - for (i = 0; i < arr_len; ++i) { - if (arr[i] == elem) - return true; - } - - return false; -} diff --git a/kmod/util.h b/kmod/util.h deleted file mode 100644 index a0ff8be..0000000 --- a/kmod/util.h +++ /dev/null @@ -1,8 +0,0 @@ -#pragma once - -#include <linux/kernel.h> - -void random_perm(uint32_t *arr, uint32_t arr_len); -void gen_random_indices(uint32_t *arr, uint32_t arr_len); - -bool is_in_arr(uint32_t elem, uint32_t *arr, uint32_t arr_len); |
