summaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorLouis Burda <quent.burda@gmail.com>2022-07-25 13:58:41 +0200
committerLouis Burda <quent.burda@gmail.com>2022-07-25 13:58:41 +0200
commit7e2719c913c833bdd93b463f5a7dc878d5a22273 (patch)
treea008dcd0d07c6be5b19b9eec0e7330b025d9e215 /src
parentb98fabdf4b91a6093b3d91f15cb55b21ab89c0f3 (diff)
downloadcachepc-7e2719c913c833bdd93b463f5a7dc878d5a22273.tar.gz
cachepc-7e2719c913c833bdd93b463f5a7dc878d5a22273.zip
Read program counters directly, add print and remove procfs approach for testing
Diffstat (limited to 'src')
-rwxr-xr-xsrc/asm.h5
-rwxr-xr-xsrc/cachepc.c37
-rwxr-xr-xsrc/cachepc.h25
3 files changed, 54 insertions, 13 deletions
diff --git a/src/asm.h b/src/asm.h
index 9509952..14c6593 100755
--- a/src/asm.h
+++ b/src/asm.h
@@ -25,10 +25,9 @@ cachepc_readpmc(uint64_t event)
uint32_t lo, hi;
asm volatile (
- "mov %[event], %%rcx\t\n"
- "rdpmc\t\n"
+ "rdmsr"
: "=a" (lo), "=d" (hi)
- : [event] "r" (event)
+ : "c"(event)
);
return ((uint64_t) hi << 32) | lo;
diff --git a/src/cachepc.c b/src/cachepc.c
index 0905d5a..bd5f4d2 100755
--- a/src/cachepc.c
+++ b/src/cachepc.c
@@ -6,10 +6,45 @@ static cacheline **allocate_cache_ds(cache_ctx *ctx);
static uint16_t get_virt_cache_set(cache_ctx *ctx, void *ptr);
static void *aligned_alloc(size_t alignment, size_t size);
+void
+cachepc_init_counters(void)
+{
+ uint32_t event, event_no, event_mask;
+ uint64_t reg_addr;
+
+ /* SEE: https://developer.amd.com/resources/developer-guides-manuals (PPR 17H 31H, P.166)
+ *
+ * performance event selection is done via 0xC001_020X with X = (0..A)[::2]
+ * performance event reading is done viea 0XC001_020X with X = (1..B)[::2]
+ *
+ * 6 slots total
+ */
+
+ reg_addr = 0xc0010200; /* first slot */
+ event_no = 0x64;
+ event_mask = 0x08;
+ event = event_no | (event_mask << 8);
+ event |= (1<< 17); /* OsUserMode bit */
+ event |= (1 << 22); /* enable performance counter */
+ printk(KERN_INFO "Writing to msr event %d", event);
+ asm volatile ("wrmsr" : : "c"(reg_addr), "a"(event), "d"(0x00));
+
+ reg_addr = 0xc0010202;
+ event_no = 0x64;
+ event_mask = 0xC8;
+ event = event_no | (event_mask << 8);
+ event |= (1<< 17); /* OsUserMode bit */
+ event |= (1 << 22); /* enable performance counter */
+ printk(KERN_INFO "Writing to msr event %d", event);
+ asm volatile ("wrmsr" : : "c"(reg_addr), "a"(event), "d"(0x00));
+}
+
cache_ctx *
cachepc_get_ctx(cache_level cache_level)
{
cache_ctx *ctx;
+
+ printk(KERN_INFO "CACHEPC_GET_CTX");
ctx = kzalloc(sizeof(cache_ctx), GFP_KERNEL);
BUG_ON(ctx == NULL);
@@ -44,6 +79,8 @@ cachepc_prepare_ds(cache_ctx *ctx)
{
cacheline **cacheline_ptr_arr;
cacheline *cache_ds;
+
+ printk(KERN_INFO "CACHEPC_BUILD_CACHE_DS");
cacheline_ptr_arr = allocate_cache_ds(ctx);
cache_ds = build_cache_ds(ctx, cacheline_ptr_arr);
diff --git a/src/cachepc.h b/src/cachepc.h
index 44d34ca..06f85f0 100755
--- a/src/cachepc.h
+++ b/src/cachepc.h
@@ -8,6 +8,11 @@
#include "cache_types.h"
#include "util.h"
+#define L2_HIT_CNTR 0xC0010201
+#define L2_MISS_CNTR 0xC0010203
+
+void cachepc_init_counters(void);
+
cache_ctx *cachepc_get_ctx(cache_level cl);
cacheline *cachepc_prepare_ds(cache_ctx *ctx);
void cachepc_save_msrmts(cacheline *head, const char *prefix, int index);
@@ -34,6 +39,8 @@ cachepc_prime(cacheline *head)
{
cacheline *curr_cl;
+ printk(KERN_WARNING "PROBE");
+
cachepc_cpuid();
curr_cl = head;
do {
@@ -78,13 +85,12 @@ cachepc_prime_rev(cacheline *head)
static inline cacheline *
cachepc_probe_set(cacheline *curr_cl)
{
- uint64_t pre1, pre2, pre3;
- uint64_t post1, post2, post3;
+ uint64_t pre1, pre2;
+ uint64_t post1, post2;
cacheline *next_cl;
- pre1 = cachepc_readpmc(0);
- pre2 = cachepc_readpmc(1);
- pre3 = cachepc_readpmc(2);
+ pre1 = cachepc_readpmc(L2_HIT_CNTR);
+ pre2 = cachepc_readpmc(L2_MISS_CNTR);
cachepc_mfence();
asm volatile(
@@ -104,18 +110,15 @@ cachepc_probe_set(cacheline *curr_cl)
cachepc_mfence();
cachepc_cpuid();
- post1 = cachepc_readpmc(0);
+ post1 = cachepc_readpmc(L2_HIT_CNTR);
cachepc_cpuid();
- post2 = cachepc_readpmc(1);
- cachepc_cpuid();
- post3 = cachepc_readpmc(2);
+ post2 = cachepc_readpmc(L2_MISS_CNTR);
cachepc_cpuid();
/* works across size boundary */
curr_cl->count = 0;
curr_cl->count += post1 - pre1;
curr_cl->count += post2 - pre2;
- curr_cl->count += post3 - pre3;
return next_cl;
}
@@ -124,6 +127,8 @@ static inline cacheline *
cachepc_probe(cacheline *head)
{
cacheline *curr_cs;
+
+ printk(KERN_WARNING "PROBE");
curr_cs = head;
do {