diff options
| author | Louis Burda <quent.burda@gmail.com> | 2022-08-13 20:05:27 +0200 |
|---|---|---|
| committer | Louis Burda <quent.burda@gmail.com> | 2022-08-13 20:05:27 +0200 |
| commit | 476f6c892d90e66fbd17ba616b82b000a990f63e (patch) | |
| tree | 268efc588158ded4bf88aec234d44baf9584473f /kmod | |
| parent | 0f3b9caf389b486541614836bf180b64544615cb (diff) | |
| download | cachepc-476f6c892d90e66fbd17ba616b82b000a990f63e.tar.gz cachepc-476f6c892d90e66fbd17ba616b82b000a990f63e.zip | |
Add cache line ordering that prevents hardware prefetching, fix cachepc counts read
Diffstat (limited to 'kmod')
| -rwxr-xr-x | kmod/cache_types.h | 14 | ||||
| -rwxr-xr-x | kmod/cachepc.c | 5 | ||||
| -rwxr-xr-x | kmod/cachepc.h | 19 | ||||
| -rwxr-xr-x | kmod/util.c | 22 |
4 files changed, 30 insertions, 30 deletions
diff --git a/kmod/cache_types.h b/kmod/cache_types.h index 33da39b..b337d55 100755 --- a/kmod/cache_types.h +++ b/kmod/cache_types.h @@ -22,9 +22,8 @@ #define IS_LAST(flags) GET_BIT(flags, 1) #define IS_CACHE_GROUP_INIT(flags) GET_BIT(flags, 2) -// Offset of the next and prev field in the cacheline struct -#define CL_NEXT_OFFSET 0 -#define CL_PREV_OFFSET 8 +#define CL_NEXT_OFFSET offsetof(struct cacheline, next) +#define CL_PREV_OFFSET offsetof(struct cacheline, prev) typedef enum cache_level cache_level; typedef enum addressing_type addressing_type; @@ -53,12 +52,15 @@ struct cacheline { cacheline *next; cacheline *prev; - uint16_t cache_set; - uint16_t flags; + uint32_t cache_set; + uint32_t cache_line; + uint32_t flags; // Unused padding to fill cache line uint64_t count; - char padding[32]; + + char padding[24]; }; static_assert(sizeof(struct cacheline) == CACHELINE_SIZE, "Bad cache line struct size"); +static_assert(CL_NEXT_OFFSET == 0 && CL_PREV_OFFSET == 8); diff --git a/kmod/cachepc.c b/kmod/cachepc.c index ed36ac7..9c896f3 100755 --- a/kmod/cachepc.c +++ b/kmod/cachepc.c @@ -46,8 +46,6 @@ cache_ctx * cachepc_get_ctx(cache_level cache_level) { cache_ctx *ctx; - - // printk(KERN_WARNING "CachePC: Getting ctx..\n"); ctx = kzalloc(sizeof(cache_ctx), GFP_KERNEL); BUG_ON(ctx == NULL); @@ -72,8 +70,6 @@ cachepc_get_ctx(cache_level cache_level) ctx->set_size = CACHELINE_SIZE * ctx->associativity; ctx->cache_size = ctx->sets * ctx->set_size; - // printk(KERN_WARNING "CachePC: Getting ctx done\n"); - return ctx; } @@ -395,6 +391,7 @@ allocate_cache_ds(cache_ctx *ctx) for (i = 0; i < ctx->nr_of_cachelines; ++i) { cl_ptr_arr[i] = cl_arr + i; cl_ptr_arr[i]->cache_set = get_virt_cache_set(ctx, cl_ptr_arr[i]); + cl_ptr_arr[i]->cache_line = i / ctx->sets; } return cl_ptr_arr; diff --git a/kmod/cachepc.h b/kmod/cachepc.h index 5deb712..8a9521c 100755 --- a/kmod/cachepc.h +++ b/kmod/cachepc.h @@ -49,13 +49,12 @@ cachepc_prime(cacheline *head) { cacheline *curr_cl; - cachepc_cpuid(); + //cachepc_cpuid(); curr_cl = head; do { curr_cl = curr_cl->next; - cachepc_mfence(); } while(curr_cl != head); - cachepc_cpuid(); + //cachepc_cpuid(); return curr_cl->prev; } @@ -79,13 +78,12 @@ cachepc_prime_rev(cacheline *head) { cacheline *curr_cl; - cachepc_cpuid(); + //cachepc_cpuid(); curr_cl = head; do { curr_cl = curr_cl->prev; - cachepc_mfence(); } while(curr_cl != head); - cachepc_cpuid(); + //cachepc_cpuid(); return curr_cl->prev; } @@ -96,16 +94,14 @@ cachepc_probe(cacheline *start_cl) uint64_t pre, post; cacheline *next_cl; cacheline *curr_cl; - volatile register uint64_t i asm("r12"); curr_cl = start_cl; do { pre = cachepc_read_pmc(0); - pre += cachepc_read_pmc(1); cachepc_mfence(); - cachepc_cpuid(); + //cachepc_cpuid(); asm volatile( "mov 8(%[curr_cl]), %%rax \n\t" // +8 @@ -123,13 +119,12 @@ cachepc_probe(cacheline *start_cl) ); cachepc_mfence(); - cachepc_cpuid(); + //cachepc_cpuid(); post = cachepc_read_pmc(0); - post += cachepc_read_pmc(1); cachepc_mfence(); - cachepc_cpuid(); + //cachepc_cpuid(); /* works across size boundary */ curr_cl->count = post - pre; diff --git a/kmod/util.c b/kmod/util.c index 166ec3e..1932c9b 100755 --- a/kmod/util.c +++ b/kmod/util.c @@ -20,16 +20,22 @@ prng_bytes(uint8_t *dst, size_t size) void random_perm(uint32_t *arr, uint32_t arr_len) { - uint32_t i, idx, tmp; + uint32_t i, mid; // idx, tmp; - for (i = arr_len - 1; i > 0; --i) { - prng_bytes((void*)&idx, 4); - idx = idx % i; - - tmp = arr[idx]; - arr[idx] = arr[i]; - arr[i] = tmp; + /* defeat stream prefetcher by preventing access direction */ + mid = arr_len / 2; + for (i = 0; i < arr_len; i++) { + arr[i] = mid + (i % 2 ? -1 : 1) * ((i + 1) / 2); } + + // for (i = arr_len - 1; i > 0; --i) { + // prng_bytes((void*)&idx, 4); + // idx = idx % i; + + // tmp = arr[idx]; + // arr[idx] = arr[i]; + // arr[i] = tmp; + // } } void |
