commit 4804320a2e3c89664afc93214d3ad81b3a36c670
parent 1c198ee120f69d435289dbb4510058b536d091d9
Author: Louis Burda <quent.burda@gmail.com>
Date: Fri, 21 Oct 2022 18:48:02 +0200
Validate cache topology at module init
Diffstat:
4 files changed, 124 insertions(+), 29 deletions(-)
diff --git a/cachepc/cachepc.c b/cachepc/cachepc.c
@@ -24,6 +24,92 @@ static void random_perm(uint32_t *arr, uint32_t arr_len);
static void gen_random_indices(uint32_t *arr, uint32_t arr_len);
static bool is_in_arr(uint32_t elem, uint32_t *arr, uint32_t arr_len);
+bool
+cachepc_verify_topology(void)
+{
+ uint32_t val;
+ uint32_t assoc;
+ uint32_t linesize;
+ uint32_t size;
+ uint32_t sets;
+
+ /* REF: https://developer.amd.com/resources/developer-guides-manuals
+ * (PPR 17H 31H, P.81) */
+
+ asm volatile ("cpuid" : "=c"(val) : "a"(0x80000005));
+ size = ((val >> 24) & 0xFF) * 1024;
+ assoc = (val >> 16) & 0xFF;
+ linesize = val & 0xFF;
+ sets = size / (linesize * assoc);
+ if (size != L1_SIZE || assoc != L1_ASSOC
+ || linesize != L1_LINESIZE || sets != L1_SETS) {
+ pr_warn("CachePC: L1 topology is invalid!\n");
+ pr_warn("CachePC: L1_SIZE (expected) %u vs. (real) %u\n",
+ L1_SIZE, size);
+ pr_warn("CachePC: L1_ASSOC (expected) %u vs. (real) %u\n",
+ L1_ASSOC, assoc);
+ pr_warn("CachePC: L1_LINESIZE (expected) %u vs. (real) %u\n",
+ L1_LINESIZE, linesize);
+ pr_warn("CachePC: L1_SETS (expected) %u vs. (real) %u\n",
+ L1_SETS, sets);
+ return true;
+ }
+
+ asm volatile ("cpuid" : "=c"(val) : "a"(0x80000006));
+ size = ((val >> 16) & 0xFFFF) * 1024;
+ assoc = (val >> 12) & 0xF;
+ linesize = val & 0xFF;
+ switch (assoc) {
+ case 0x1:
+ case 0x2:
+ case 0x4:
+ break;
+ case 0x6:
+ assoc = 8;
+ break;
+ case 0x8:
+ assoc = 16;
+ break;
+ case 0xA:
+ assoc = 32;
+ break;
+ case 0xB:
+ assoc = 48;
+ break;
+ case 0xC:
+ assoc = 64;
+ break;
+ case 0xD:
+ assoc = 96;
+ break;
+ case 0xE:
+ assoc = 128;
+ break;
+ case 0xF:
+ assoc = size / linesize;
+ break;
+ default:
+ pr_warn("CachePC: Read invalid L2 associativity: %i\n", assoc);
+ return true;
+ }
+ sets = size / (linesize * assoc);
+ if (size != L2_SIZE || assoc != L2_ASSOC
+ || linesize != L2_LINESIZE || sets != L2_SETS) {
+ pr_warn("CachePC: L2 topology is invalid!\n");
+ pr_warn("CachePC: L2_SIZE (expected) %u vs. (real) %u\n",
+ L2_SIZE, size);
+ pr_warn("CachePC: L2_ASSOC (expected) %u vs. (real) %u\n",
+ L2_ASSOC, assoc);
+ pr_warn("CachePC: L2_LINESIZE (expected) %u vs. (real) %u\n",
+ L2_LINESIZE, linesize);
+ pr_warn("CachePC: L2_SETS (expected) %u vs. (real) %u\n",
+ L2_SETS, sets);
+ return true;
+ }
+
+ return false;
+}
+
void
cachepc_init_pmc(uint8_t index, uint8_t event_no, uint8_t event_mask,
uint8_t host_guest, uint8_t kernel_user)
@@ -31,7 +117,8 @@ cachepc_init_pmc(uint8_t index, uint8_t event_no, uint8_t event_mask,
uint64_t event;
uint64_t reg_addr;
- /* REF: https://developer.amd.com/resources/developer-guides-manuals (PPR 17H 31H, P.166)
+ /* REF: https://developer.amd.com/resources/developer-guides-manuals
+ * (PPR 17H 31H, P.166)
*
* performance event selection via 0xC001_020X with X = (0..A)[::2]
* performance event reading viea 0XC001_020X with X = (1..B)[::2]
@@ -62,13 +149,11 @@ cachepc_get_ctx(int cache_level)
if (cache_level == L1_CACHE) {
ctx->addressing = L1_ADDRESSING;
ctx->sets = L1_SETS;
- ctx->associativity = L1_ASSOCIATIVITY;
- ctx->access_time = L1_ACCESS_TIME;
+ ctx->associativity = L1_ASSOC;
} else if (cache_level == L2_CACHE) {
ctx->addressing = L2_ADDRESSING;
ctx->sets = L2_SETS;
- ctx->associativity = L2_ASSOCIATIVITY;
- ctx->access_time = L2_ACCESS_TIME;
+ ctx->associativity = L2_ASSOC;
} else {
return NULL;
}
@@ -97,14 +182,10 @@ cachepc_prepare_ds(cache_ctx *ctx)
cacheline **cacheline_ptr_arr;
cacheline *cache_ds;
- //printk(KERN_WARNING "CachePC: Preparing ds..\n");
-
cacheline_ptr_arr = allocate_cache_ds(ctx);
cache_ds = build_cache_ds(ctx, cacheline_ptr_arr);
kfree(cacheline_ptr_arr);
- // printk(KERN_WARNING "CachePC: Preparing ds done\n");
-
return cache_ds;
}
@@ -317,7 +398,8 @@ remove_cache_group_set(void *ptr)
* where x0, x1, ..., xD is a random permutation of 1, 2, ..., D
* and D = Associativity = | cache set |
*/
-cacheline *build_cache_ds(cache_ctx *ctx, cacheline **cl_ptr_arr) {
+cacheline *
+build_cache_ds(cache_ctx *ctx, cacheline **cl_ptr_arr) {
cacheline **first_cl_in_sets, **last_cl_in_sets;
cacheline **cl_ptr_arr_sorted;
cacheline *curr_cl;
@@ -387,7 +469,8 @@ cacheline *build_cache_ds(cache_ctx *ctx, cacheline **cl_ptr_arr) {
/*
* Helper function to build a randomised list of cacheline structs for a set
*/
-void build_randomized_list_for_cache_set(cache_ctx *ctx, cacheline **cacheline_ptr_arr)
+void
+build_randomized_list_for_cache_set(cache_ctx *ctx, cacheline **cacheline_ptr_arr)
{
cacheline *curr_cl;
uint32_t len, *idx_map;
@@ -476,7 +559,9 @@ gen_random_indices(uint32_t *arr, uint32_t arr_len)
}
-bool is_in_arr(uint32_t elem, uint32_t *arr, uint32_t arr_len) {
+bool
+is_in_arr(uint32_t elem, uint32_t *arr, uint32_t arr_len)
+{
uint32_t i;
for (i = 0; i < arr_len; ++i) {
diff --git a/cachepc/cachepc.h b/cachepc/cachepc.h
@@ -3,9 +3,6 @@
#include "asm.h"
#include "uapi.h"
-#define CACHELINE_SIZE 64
-#define CACHE_GROUP_SIZE (PAGE_SIZE / CACHELINE_SIZE)
-
#define L1_CACHE 0
#define L2_CACHE 1
@@ -13,19 +10,19 @@
#define PHYSICAL_ADDRESSING 1
#define L1_ADDRESSING VIRTUAL_ADDRESSING
+#define L1_ASSOC 8
+#define L1_LINESIZE 64
#define L1_SETS 64
-#define L1_ASSOCIATIVITY 8
-#define L1_ACCESS_TIME 4
+#define L1_SIZE (L1_SETS * L1_ASSOC * L1_LINESIZE)
#define L2_ADDRESSING PHYSICAL_ADDRESSING
-#define L2_SETS 512
-#define L2_ASSOCIATIVITY 8
-#define L2_ACCESS_TIME 12
+#define L2_ASSOC 8
+#define L2_LINESIZE 64
+#define L2_SETS 1024
+#define L2_SIZE (L2_SETS * L2_ASSOC * L2_LINESIZE)
-#define L3_ADDRESSING PHYSICAL_ADDRESSING
-#define L3_SETS 4096
-#define L3_ASSOCIATIVITY 16
-#define L3_ACCESS_TIME 30
+#define CACHELINE_SIZE L1_LINESIZE
+#define CACHE_GROUP_SIZE (PAGE_SIZE / CACHELINE_SIZE)
#define CACHEPC_GET_BIT(b, i) (((b) >> (i)) & 1)
#define CACHEPC_SET_BIT(b, i) ((b) | (1 << (i)))
@@ -61,7 +58,6 @@ struct cache_ctx {
uint32_t sets;
uint32_t associativity;
- uint32_t access_time;
uint32_t nr_of_cachelines;
uint32_t set_size;
uint32_t cache_size;
@@ -85,6 +81,8 @@ struct cacheline {
static_assert(sizeof(struct cacheline) == CACHELINE_SIZE, "Bad cache line struct size");
static_assert(CL_NEXT_OFFSET == 0 && CL_PREV_OFFSET == 8);
+bool cachepc_verify_topology(void);
+
void cachepc_init_pmc(uint8_t index, uint8_t event_no, uint8_t event_mask,
uint8_t host_guest, uint8_t kernel_user);
diff --git a/cachepc/kvm.c b/cachepc/kvm.c
@@ -391,8 +391,9 @@ cachepc_kvm_stream_hwpf_test(void *p)
asm volatile ("mov (%0), %%rbx" : : "r"(lines + i) : "rbx");
count += cachepc_read_pmc(CPC_L1MISS_PMC);
+ pass = (count == max) || (count == max + 1); /* +1 for pot. counter miss */
printk(KERN_WARNING "CachePC: HWPF test done (%u vs. %u => %s)\n",
- count, max, (count == max) ? "passed" : "failed");
+ count, max, pass ? "passed" : "failed");
if (arg) *arg = (count == max);
@@ -968,7 +969,10 @@ cachepc_kvm_setup_test(void *p)
cpu = get_cpu();
- printk(KERN_WARNING "CachePC: Running on core %i\n", cpu);
+ pr_warn("CachePC: Running on core %i\n", cpu);
+
+ if (cachepc_verify_topology())
+ goto exit;
cachepc_ctx = cachepc_get_ctx(L1_CACHE);
cachepc_ds = cachepc_prepare_ds(cachepc_ctx);
@@ -980,6 +984,7 @@ cachepc_kvm_setup_test(void *p)
cachepc_kvm_single_eviction_test(NULL);
cachepc_kvm_stream_hwpf_test(NULL);
+exit:
put_cpu();
}
@@ -988,6 +993,9 @@ cachepc_kvm_init(void)
{
int ret;
+ cachepc_ctx = NULL;
+ cachepc_ds = NULL;
+
cachepc_msrmts_count = L1_SETS;
cachepc_msrmts = kzalloc(cachepc_msrmts_count * sizeof(uint16_t), GFP_KERNEL);
BUG_ON(cachepc_msrmts == NULL);
@@ -1001,6 +1009,9 @@ cachepc_kvm_exit(void)
{
kfree(cachepc_msrmts);
- cachepc_release_ds(cachepc_ctx, cachepc_ds);
- cachepc_release_ctx(cachepc_ctx);
+ if (cachepc_ds)
+ cachepc_release_ds(cachepc_ctx, cachepc_ds);
+
+ if (cachepc_ctx)
+ cachepc_release_ctx(cachepc_ctx);
}
diff --git a/compile_commands.json b/compile_commands.json
@@ -0,0 +1 @@
+[]