diff options
| author | Louis Burda <quent.burda@gmail.com> | 2022-07-25 13:58:41 +0200 |
|---|---|---|
| committer | Louis Burda <quent.burda@gmail.com> | 2022-07-25 13:58:41 +0200 |
| commit | 7e2719c913c833bdd93b463f5a7dc878d5a22273 (patch) | |
| tree | a008dcd0d07c6be5b19b9eec0e7330b025d9e215 /src | |
| parent | b98fabdf4b91a6093b3d91f15cb55b21ab89c0f3 (diff) | |
| download | cachepc-7e2719c913c833bdd93b463f5a7dc878d5a22273.tar.gz cachepc-7e2719c913c833bdd93b463f5a7dc878d5a22273.zip | |
Read program counters directly, add print and remove procfs approach for testing
Diffstat (limited to 'src')
| -rwxr-xr-x | src/asm.h | 5 | ||||
| -rwxr-xr-x | src/cachepc.c | 37 | ||||
| -rwxr-xr-x | src/cachepc.h | 25 |
3 files changed, 54 insertions, 13 deletions
@@ -25,10 +25,9 @@ cachepc_readpmc(uint64_t event) uint32_t lo, hi; asm volatile ( - "mov %[event], %%rcx\t\n" - "rdpmc\t\n" + "rdmsr" : "=a" (lo), "=d" (hi) - : [event] "r" (event) + : "c"(event) ); return ((uint64_t) hi << 32) | lo; diff --git a/src/cachepc.c b/src/cachepc.c index 0905d5a..bd5f4d2 100755 --- a/src/cachepc.c +++ b/src/cachepc.c @@ -6,10 +6,45 @@ static cacheline **allocate_cache_ds(cache_ctx *ctx); static uint16_t get_virt_cache_set(cache_ctx *ctx, void *ptr); static void *aligned_alloc(size_t alignment, size_t size); +void +cachepc_init_counters(void) +{ + uint32_t event, event_no, event_mask; + uint64_t reg_addr; + + /* SEE: https://developer.amd.com/resources/developer-guides-manuals (PPR 17H 31H, P.166) + * + * performance event selection is done via 0xC001_020X with X = (0..A)[::2] + * performance event reading is done viea 0XC001_020X with X = (1..B)[::2] + * + * 6 slots total + */ + + reg_addr = 0xc0010200; /* first slot */ + event_no = 0x64; + event_mask = 0x08; + event = event_no | (event_mask << 8); + event |= (1<< 17); /* OsUserMode bit */ + event |= (1 << 22); /* enable performance counter */ + printk(KERN_INFO "Writing to msr event %d", event); + asm volatile ("wrmsr" : : "c"(reg_addr), "a"(event), "d"(0x00)); + + reg_addr = 0xc0010202; + event_no = 0x64; + event_mask = 0xC8; + event = event_no | (event_mask << 8); + event |= (1<< 17); /* OsUserMode bit */ + event |= (1 << 22); /* enable performance counter */ + printk(KERN_INFO "Writing to msr event %d", event); + asm volatile ("wrmsr" : : "c"(reg_addr), "a"(event), "d"(0x00)); +} + cache_ctx * cachepc_get_ctx(cache_level cache_level) { cache_ctx *ctx; + + printk(KERN_INFO "CACHEPC_GET_CTX"); ctx = kzalloc(sizeof(cache_ctx), GFP_KERNEL); BUG_ON(ctx == NULL); @@ -44,6 +79,8 @@ cachepc_prepare_ds(cache_ctx *ctx) { cacheline **cacheline_ptr_arr; cacheline *cache_ds; + + printk(KERN_INFO "CACHEPC_BUILD_CACHE_DS"); cacheline_ptr_arr = allocate_cache_ds(ctx); cache_ds = build_cache_ds(ctx, cacheline_ptr_arr); diff --git a/src/cachepc.h b/src/cachepc.h index 44d34ca..06f85f0 100755 --- a/src/cachepc.h +++ b/src/cachepc.h @@ -8,6 +8,11 @@ #include "cache_types.h" #include "util.h" +#define L2_HIT_CNTR 0xC0010201 +#define L2_MISS_CNTR 0xC0010203 + +void cachepc_init_counters(void); + cache_ctx *cachepc_get_ctx(cache_level cl); cacheline *cachepc_prepare_ds(cache_ctx *ctx); void cachepc_save_msrmts(cacheline *head, const char *prefix, int index); @@ -34,6 +39,8 @@ cachepc_prime(cacheline *head) { cacheline *curr_cl; + printk(KERN_WARNING "PROBE"); + cachepc_cpuid(); curr_cl = head; do { @@ -78,13 +85,12 @@ cachepc_prime_rev(cacheline *head) static inline cacheline * cachepc_probe_set(cacheline *curr_cl) { - uint64_t pre1, pre2, pre3; - uint64_t post1, post2, post3; + uint64_t pre1, pre2; + uint64_t post1, post2; cacheline *next_cl; - pre1 = cachepc_readpmc(0); - pre2 = cachepc_readpmc(1); - pre3 = cachepc_readpmc(2); + pre1 = cachepc_readpmc(L2_HIT_CNTR); + pre2 = cachepc_readpmc(L2_MISS_CNTR); cachepc_mfence(); asm volatile( @@ -104,18 +110,15 @@ cachepc_probe_set(cacheline *curr_cl) cachepc_mfence(); cachepc_cpuid(); - post1 = cachepc_readpmc(0); + post1 = cachepc_readpmc(L2_HIT_CNTR); cachepc_cpuid(); - post2 = cachepc_readpmc(1); - cachepc_cpuid(); - post3 = cachepc_readpmc(2); + post2 = cachepc_readpmc(L2_MISS_CNTR); cachepc_cpuid(); /* works across size boundary */ curr_cl->count = 0; curr_cl->count += post1 - pre1; curr_cl->count += post2 - pre2; - curr_cl->count += post3 - pre3; return next_cl; } @@ -124,6 +127,8 @@ static inline cacheline * cachepc_probe(cacheline *head) { cacheline *curr_cs; + + printk(KERN_WARNING "PROBE"); curr_cs = head; do { |
