diff options
Diffstat (limited to 'kmod/cachepc.h')
| -rwxr-xr-x | kmod/cachepc.h | 150 |
1 files changed, 150 insertions, 0 deletions
diff --git a/kmod/cachepc.h b/kmod/cachepc.h new file mode 100755 index 0000000..a88edb8 --- /dev/null +++ b/kmod/cachepc.h @@ -0,0 +1,150 @@ +#pragma once + +#include "asm.h" +#include "cache_types.h" +#include "util.h" +#include "cachepc_user.h" + +void cachepc_init_counters(void); + +cache_ctx *cachepc_get_ctx(cache_level cl); +void cachepc_release_ctx(cache_ctx *ctx); + +cacheline *cachepc_prepare_ds(cache_ctx *ctx); +void cachepc_release_ds(cache_ctx *ctx, cacheline *ds); + +cacheline *cachepc_prepare_victim(cache_ctx *ctx, uint32_t set); +void cachepc_release_victim(cache_ctx *ctx, cacheline *ptr); + +void cachepc_save_msrmts(cacheline *head); +void cachepc_print_msrmts(cacheline *head); + +__attribute__((always_inline)) +static inline cacheline *cachepc_prime(cacheline *head); + +__attribute__((always_inline)) +static inline cacheline *cachepc_prime_rev(cacheline *head); + +__attribute__((always_inline)) +static inline cacheline *cachepc_probe(cacheline *head); + +__attribute__((always_inline)) +static inline void cachepc_victim(void *p); + +extern uint16_t *cachepc_msrmts; +extern size_t cachepc_msrmts_count; + +extern cache_ctx *cachepc_ctx; +extern cacheline *cachepc_ds; + +/* + * Prime phase: fill the target cache (encoded in the size of the data structure) + * with the prepared data structure, i.e. with attacker data. + */ +cacheline * +cachepc_prime(cacheline *head) +{ + cacheline *curr_cl; + + //printk(KERN_WARNING "CachePC: Priming..\n"); + + cachepc_cpuid(); + curr_cl = head; + do { + curr_cl = curr_cl->next; + cachepc_mfence(); + } while(curr_cl != head); + cachepc_cpuid(); + + //printk(KERN_WARNING "CachePC: Priming done\n"); + + return curr_cl->prev; +} + +/* + * Same as prime, but in the reverse direction, i.e. the same direction that probe + * uses. This is beneficial for the following scenarios: + * - L1: + * - Trigger collision chain-reaction to amplify an evicted set (but this has + * the downside of more noisy measurements). + * - L2: + * - Always use this for L2, otherwise the first cache sets will still reside + * in L1 unless the victim filled L1 completely. In this case, an eviction + * has randomly (depending on where the cache set is placed in the randomised + * data structure) the following effect: + * A) An evicted set is L2_ACCESS_TIME - L1_ACCESS_TIME slower + * B) An evicted set is L3_ACCESS_TIME - L2_ACCESS_TIME slower + */ +cacheline * +cachepc_prime_rev(cacheline *head) +{ + cacheline *curr_cl; + + cachepc_cpuid(); + curr_cl = head; + do { + curr_cl = curr_cl->prev; + cachepc_mfence(); + } while(curr_cl != head); + cachepc_cpuid(); + + return curr_cl->prev; +} + +cacheline * +cachepc_probe(cacheline *start_cl) +{ + uint64_t pre, post; + cacheline *next_cl; + cacheline *curr_cl; + volatile register uint64_t i asm("r12"); + + curr_cl = start_cl; + + do { + pre = cachepc_readpmc(0); + pre += cachepc_readpmc(1); + + cachepc_mfence(); + cachepc_cpuid(); + + asm volatile( + "mov 8(%[curr_cl]), %%rax \n\t" // +8 + "mov 8(%%rax), %%rcx \n\t" // +16 + "mov 8(%%rcx), %%rax \n\t" // +24 + "mov 8(%%rax), %%rcx \n\t" // +32 + "mov 8(%%rcx), %%rax \n\t" // +40 + "mov 8(%%rax), %%rcx \n\t" // +48 + "mov 8(%%rcx), %[curr_cl_out] \n\t" // +56 + "mov 8(%[curr_cl_out]), %[next_cl_out] \n\t" // +64 + : [next_cl_out] "=r" (next_cl), + [curr_cl_out] "=r" (curr_cl) + : [curr_cl] "r" (curr_cl) + : "rax", "rcx" + ); + + cachepc_mfence(); + cachepc_cpuid(); + + post = cachepc_readpmc(0); + post += cachepc_readpmc(1); + + cachepc_mfence(); + cachepc_cpuid(); + + /* works across size boundary */ + curr_cl->count = post - pre; + + curr_cl = next_cl; + } while (__builtin_expect(curr_cl != start_cl, 1)); + + return curr_cl->next; +} + +void +cachepc_victim(void *p) +{ + cachepc_cpuid(); + cachepc_mfence(); + cachepc_readq(p); +} |
