commit 5ad426b7b5859275739616e5f62041317b5511a1
parent 7eed418586797f379093a7073ead428ed7a5caea
Author: Louis Burda <quent.burda@gmail.com>
Date: Thu, 9 Feb 2023 06:22:52 -0600
Added more tests and sanity checks
Diffstat:
5 files changed, 434 insertions(+), 125 deletions(-)
diff --git a/cachepc/asm.S b/cachepc/asm.S
@@ -3,119 +3,206 @@
#include <linux/linkage.h>
.global cpc_read_pmc
+.global cpc_eviction_prio_test_asm
+.global cpc_prime_test1_asm
+.global cpc_prime_test2_asm
+.global cpc_prime_test3_asm
.global cpc_prime_probe_test_asm
.global cpc_stream_hwpf_test_asm
.global cpc_single_eviction_test_asm
+# x86-64 calling convention specifies only these register need to be saved
+# rbx, r12, r13, r14, r15, rbp and rsp need
+
SYM_FUNC_START(cpc_read_pmc)
push %rbx
- push %rcx
- push %rdx
- push %r8
readpmc %rdi %r8
mov %r8, %rax
- pop %r8
- pop %rdx
- pop %rcx
pop %rbx
ret
SYM_FUNC_END(cpc_read_pmc)
-SYM_FUNC_START(cpc_prime_probe_test_asm)
+SYM_FUNC_START(cpc_stream_hwpf_test_asm)
+ push %rbx
+
+ wbinvd
+
+ readpmc $0 %r8
+
+ mov cpc_ds, %rdi
+ prime stream_hwpf_test %rdi %rsi %r15
+
+ readpmc $0 %r9
+
+ mov %r9, %rax
+ sub %r8, %rax
+
+ pop %rbx
+
+ ret
+SYM_FUNC_END(cpc_stream_hwpf_test_asm)
+
+SYM_FUNC_START(cpc_prime_test1_asm)
push %rbx
- push %rcx
- push %rdx
- push %r8
- push %r9
- push %r10
- push %r11
push %r12
+ # this test checks wether the correct amount of cache lines
+ # were evicted by prime, however it does not guarantee that the
+ # L1 is filled completely with only our cache lines
+
+ mov cpc_ds, %r10
+
wbinvd
- mov cpc_ds, %r9
- prime prime_probe_test %r9 %r10 %r8
- prime prime_probe_test1 %r9 %r10 %r8
- prime prime_probe_test2 %r9 %r10 %r8
- probe prime_probe_test %r8 %r9 %r10 %r11 %r12
+ readpmc $0 %r8
+ prime_pass prime_test1_2 %r10 %r11 %r12
+ readpmc $0 %r9
+
+ mov %r9, %rax
+ sub %r8, %rax
pop %r12
- pop %r11
- pop %r10
- pop %r9
- pop %r8
- pop %rdx
- pop %rcx
pop %rbx
ret
-SYM_FUNC_END(cpc_prime_probe_test_asm)
+SYM_FUNC_END(cpc_prime_test1_asm)
-SYM_FUNC_START(cpc_stream_hwpf_test_asm)
+SYM_FUNC_START(cpc_prime_test2_asm)
push %rbx
- push %rcx
- push %rdx
- push %r8
- push %r9
+ push %r12
+
+ # this test checks wether the L1 is filled *completely* with our cache
+ # lines by prime, it does not however guarantee that the eviction
+ # preference of the cachelines is in line-order (0 first, 7 last)
+
+ mov cpc_ds, %r10
wbinvd
- readpmc $CPC_L1MISS_PMC %r8
+ # try to convince replacement policy to
+ # keep this line in
+ mov (%rdi), %rax
+ mov (%rdi), %rax
+ mov (%rdi), %rax
+ mov (%rdi), %rax
+ mov (%rdi), %rax
+ mov (%rdi), %rax
+ mov (%rdi), %rax
- mov 0x000(%rdi), %rax
- mov 0x040(%rdi), %rax
- mov 0x080(%rdi), %rax
- mov 0x0c0(%rdi), %rax
- mov 0x100(%rdi), %rax
- mov 0x140(%rdi), %rax
- mov 0x180(%rdi), %rax
- mov 0x1c0(%rdi), %rax
- mov 0x200(%rdi), %rax
- mov 0x240(%rdi), %rax
+ prime_pass prime_test2_1 %r10 %r11 %r12
- readpmc $CPC_L1MISS_PMC %r9
+ readpmc $0 %r8
+ prime_pass prime_test2_2 %r12 %r11 %r10
+ readpmc $0 %r9
mov %r9, %rax
sub %r8, %rax
- pop %r9
- pop %r8
- pop %rdx
- pop %rcx
+ pop %r12
pop %rbx
ret
-SYM_FUNC_END(cpc_stream_hwpf_test_asm)
+SYM_FUNC_END(cpc_prime_test2_asm)
-SYM_FUNC_START(cpc_single_eviction_test_asm)
+SYM_FUNC_START(cpc_prime_test3_asm)
+ push %rbx
+ push %r12
+
+ # this test is similar to test2 in that it checks wether
+ # the cache is completely filled with our cache lines after prime,
+ # this time just one prime+evict at a time
+
+ mov cpc_ds, %r10
+
+ wbinvd
+ barrier
+
+ mov (%rsi), %rax
+ mov (%rsi), %rax
+ mov (%rsi), %rax
+ mov (%rsi), %rax
+ mov (%rsi), %rax
+ mov (%rsi), %rax
+ mov (%rsi), %rax
+ mov (%rsi), %rax
+
+ prime_pass prime_test3 %r10 %r11 %r12
+
+ readpmc $0 %r8
+.rept L1_ASSOC
+ mov CPC_CL_NEXT_OFFSET(%rdi), %rdi
+.endr
+ readpmc $0 %r9
+
+ mov %r9, %rax
+ sub %r8, %rax
+
+ pop %r12
+ pop %rbx
+
+ ret
+SYM_FUNC_END(cpc_prime_test3_asm)
+
+SYM_FUNC_START(cpc_eviction_prio_test_asm)
push %rbx
- push %rcx
- push %rdx
- push %r8
- push %r9
- push %r10
- push %r11
push %r12
+ mov cpc_ds, %r10
+
wbinvd
+ prime eviction_prio_test %r10 %r11 %r12
+
+ mov (%rdi), %rax
+
+ readpmc $0 %r8
+ mov (%rsi), %rax
+ readpmc $0 %r9
+
+ mov %r9, %rax
+ sub %r8, %rax
+
+ pop %r12
+ pop %rbx
+
+ ret
+SYM_FUNC_END(cpc_eviction_prio_test_asm)
+
+SYM_FUNC_START(cpc_prime_probe_test_asm)
+ push %rbx
+ push %r12
+
+ mov cpc_ds, %r9
+ prime prime_probe_test %r9 %r10 %r8
+ probe prime_probe_test $0 %r8 %r9 %r10 %r11 %r12
+
+ pop %r12
+ pop %rbx
+
+ ret
+SYM_FUNC_END(cpc_prime_probe_test_asm)
+
+SYM_FUNC_START(cpc_single_eviction_test_asm)
+ push %rbx
+ push %r12
+ push %r13
+
+ readpmc $1 %r13
+
mov cpc_ds, %r9
prime single_eviction_test %r9 %r10 %r8
- prime single_eviction_test2 %r9 %r10 %r8
- prime single_eviction_test3 %r9 %r10 %r8
mov (%rdi), %rax
- probe single_eviction_test %r8 %r9 %r10 %r11 %r12
+ probe single_eviction_test $0 %r8 %r9 %r10 %r11 %r12
+
+ readpmc $1 %r12
+ mov %r12, %rax
+ sub %r13, %rax
+ pop %r13
pop %r12
- pop %r11
- pop %r10
- pop %r9
- pop %r8
- pop %rdx
- pop %rcx
pop %rbx
ret
diff --git a/cachepc/cachepc.h b/cachepc/cachepc.h
@@ -11,6 +11,18 @@
#define PMC_HOST (1 << 1)
#define PMC_GUEST (1 << 0)
+#define CPC_L1MISS_PMC_INIT(pmc) \
+ cpc_init_pmc(pmc, 0x60, 0b10000000, 0, PMC_KERNEL)
+
+#define CPC_L1MISS_GUEST_PMC_INIT(pmc) \
+ cpc_init_pmc(pmc, 0x60, 0b10000000, PMC_GUEST, PMC_USER | PMC_KERNEL)
+
+#define CPC_RETINST_PMC_INIT(pmc) \
+ cpc_init_pmc(pmc, 0xC0, 0x00, PMC_GUEST, PMC_USER | PMC_KERNEL)
+
+#define CPC_RETINST_USER_PMC_INIT(pmc) \
+ cpc_init_pmc(pmc, 0xC0, 0x00, PMC_GUEST, PMC_USER)
+
#define CPC_DBG(...) do { \
if (cpc_loglevel >= CPC_LOGLVL_DBG) \
pr_info("CachePC: " __VA_ARGS__); \
diff --git a/cachepc/const.h b/cachepc/const.h
@@ -9,9 +9,9 @@
#define CPC_ISOLCPU 2
#define CPC_L1MISS_PMC 0
-#define CPC_RETINST_PMC 1
-#define CPC_RETINST_USER_PMC 2
-#define CPC_L1MISS_GUEST_PMC 3
+#define CPC_L1MISS_GUEST_PMC 1
+#define CPC_RETINST_PMC 2
+#define CPC_RETINST_USER_PMC 3
#define CPC_VMSA_MAGIC_ADDR ((void *) 0xC0FFEE)
diff --git a/cachepc/kvm.c b/cachepc/kvm.c
@@ -16,7 +16,7 @@
#include <linux/types.h>
#include <asm/uaccess.h>
-#define TEST_REPEAT_MAX 1000
+#define TEST_REPEAT_MAX 50
uint32_t cpc_loglevel = 0;
EXPORT_SYMBOL(cpc_loglevel);
@@ -100,13 +100,25 @@ struct cpc_cl *cpc_ds_probe = NULL;
EXPORT_SYMBOL(cpc_ds);
EXPORT_SYMBOL(cpc_ds_probe);
+uint64_t cpc_stream_hwpf_test_asm(struct cpc_cl *lines);
+static noinline void cpc_stream_hwpf_test(void);
+
+uint64_t cpc_prime_test1_asm(void);
+static noinline void cpc_prime_test1(void);
+
+uint64_t cpc_prime_test2_asm(struct cpc_cl *lines);
+static noinline void cpc_prime_test2(void);
+
+uint64_t cpc_prime_test3_asm(struct cpc_cl *cl, struct cpc_cl *lines);
+static noinline void cpc_prime_test3(void);
+
+uint64_t cpc_eviction_prio_test_asm(struct cpc_cl *access, struct cpc_cl *cl);
+static noinline void cpc_eviction_prio_test(void);
+
void cpc_prime_probe_test_asm(void);
static noinline void cpc_prime_probe_test(void);
-uint64_t cpc_stream_hwpf_test_asm(void *lines);
-static noinline void cpc_stream_hwpf_test(void);
-
-void cpc_single_eviction_test_asm(void *ptr);
+uint64_t cpc_single_eviction_test_asm(void *ptr);
static noinline void cpc_single_eviction_test(void *p);
static void cpc_pmc_setup(void *p);
@@ -133,75 +145,215 @@ static int cpc_track_mode_ioctl(void __user *arg_user);
static int cpc_req_pause_ioctl(void __user *arg_user);
+
void
-cpc_prime_probe_test(void)
+cpc_stream_hwpf_test(void)
{
- int i, n, count;
+ const uint32_t max = 0;
+ struct cpc_cl *lines;
+ uint32_t count;
+ int n;
+
+ /* l1 hardware prefetches */
+ cpc_init_pmc(0, 0x70, 0xE0, 0, PMC_KERNEL);
+ cpc_init_pmc(1, 0x71, 0xE0, 0, PMC_KERNEL);
+ cpc_init_pmc(2, 0x72, 0xE0, 0, PMC_KERNEL);
- /* l2 data cache hit & miss */
- cpc_init_pmc(CPC_L1MISS_PMC, 0x64, 0xD8, 0, PMC_KERNEL);
+ /* l2 hardware prefetches */
+ cpc_init_pmc(3, 0x70, 0x1F, 0, PMC_KERNEL);
+ cpc_init_pmc(4, 0x71, 0x1F, 0, PMC_KERNEL);
+ cpc_init_pmc(5, 0x72, 0x1F, 0, PMC_KERNEL);
+
+ lines = cpc_aligned_alloc(L1_SIZE, L1_SIZE);
for (n = 0; n < TEST_REPEAT_MAX; n++) {
- memset(cpc_msrmts, 0, L1_SETS);
- cpc_prime_probe_test_asm();
- cpc_save_msrmts(cpc_ds);
+ count = cpc_stream_hwpf_test_asm(lines);
+ if (count != max) {
+ CPC_ERR("HWPF %i. test failed (%u vs. %u)\n",
+ n, count, max);
+ break;
+ }
+ }
- count = 0;
- for (i = 0; i < L1_SETS; i++)
- count += cpc_msrmts[i];
+ if (n == TEST_REPEAT_MAX)
+ CPC_INFO("HWPF test ok (%u vs. %u)\n", count, max);
- if (count != 0) {
- CPC_ERR("Prime-probe %i. test failed (%u vs. %u)\n",
- n, count, 0);
+ kfree(lines);
+}
+
+void
+cpc_prime_test1(void)
+{
+ const uint64_t max = L1_SETS * L1_ASSOC;
+ uint64_t ret;
+ int n;
+
+ CPC_L1MISS_PMC_INIT(0);
+
+ for (n = 0; n < TEST_REPEAT_MAX; n++) {
+ ret = cpc_prime_test1_asm();
+ if (ret != max) {
+ CPC_ERR("Prime (1) %i. test failed (%llu vs. %llu)\n",
+ n, ret, max);
break;
}
}
if (n == TEST_REPEAT_MAX)
- CPC_INFO("Prime-probe test ok (%u vs. %u)\n", count, 0);
+ CPC_INFO("Prime (1) test ok (%llu vs. %llu)\n", ret, max);
}
void
-cpc_stream_hwpf_test(void)
+cpc_prime_test2(void)
{
- const uint32_t max = 10;
+ const uint64_t max = 0;
struct cpc_cl *lines;
- uint32_t count;
+ uint64_t ret;
int n;
- /* l2 data cache hit & miss */
- cpc_init_pmc(CPC_L1MISS_PMC, 0x64, 0xD8, 0, PMC_KERNEL);
+ CPC_L1MISS_PMC_INIT(0);
- lines = cpc_aligned_alloc(L1_SIZE, L1_SIZE);
+ lines = cpc_aligned_alloc(PAGE_SIZE, L1_SIZE);
- count = 0;
for (n = 0; n < TEST_REPEAT_MAX; n++) {
- count = cpc_stream_hwpf_test_asm(lines);
- if (count != max) {
- CPC_ERR("HWPF %i. test failed (%u vs. %u)\n",
- n, count, max);
+ ret = cpc_prime_test2_asm(lines);
+ if (ret != max) {
+ CPC_ERR("Prime (2) %i. test failed (%llu vs. %llu)\n",
+ n, ret, max);
break;
}
}
if (n == TEST_REPEAT_MAX)
- CPC_INFO("HWPF test ok (%u vs. %u)\n", count, max);
+ CPC_INFO("Prime (2) test ok (%llu vs. %llu)\n", ret, max);
+
+ kfree(lines);
+}
+
+void
+cpc_prime_test3(void)
+{
+ uint64_t count;
+ struct cpc_cl *lines;
+ struct cpc_cl *cl;
+ int n, set, line;
+
+ CPC_L1MISS_PMC_INIT(0);
+
+ lines = cpc_aligned_alloc(PAGE_SIZE, L1_SIZE);
+
+ for (n = 0; n < TEST_REPEAT_MAX; n++) {
+ cl = cpc_ds;
+ for (set = 0; set < L1_SETS; set++) {
+ count = cpc_prime_test3_asm(cl, &lines[set]);
+ for (line = 0; line < L1_ASSOC; line++)
+ cl = cl->next;
+ if (count) {
+ CPC_ERR("Prime (3) %u. test failed "
+ "(set %u, count %llu)\n",
+ n, set, count);
+ goto exit;
+ }
+ }
+ }
+
+exit:
+ if (n == TEST_REPEAT_MAX)
+ CPC_INFO("Prime (3) test ok\n");
kfree(lines);
}
void
+cpc_eviction_prio_test(void)
+{
+ uint64_t ret, count;
+ struct cpc_cl *access_ul, *access;
+ struct cpc_cl *cl;
+ int n, set, line, evic;
+
+ CPC_L1MISS_PMC_INIT(0);
+
+ access_ul = cpc_aligned_alloc(L1_SIZE, L1_SIZE);
+
+ for (n = 0; n < TEST_REPEAT_MAX / 2; n++) {
+ cl = cpc_ds;
+ for (set = 0; set < L1_SETS; set++) {
+ access = &access_ul[set];
+ count = 0;
+ evic = -1;
+ for (line = 0; line < L1_ASSOC; line++) {
+ ret = cpc_eviction_prio_test_asm(access, cl);
+ if (ret) {
+ evic = line;
+ count++;
+ }
+ cl = cl->next;
+ }
+ if (!count) {
+ CPC_ERR("Eviction prio %u. test failed "
+ "(set %u, count %llu)\n",
+ n, set, count);
+ break;
+ }
+ if (count > 1 || evic != 0) {
+ CPC_ERR("Eviction prio %u. test failed "
+ "(set %u, count %llu, evic %u vs %u)\n",
+ n, set, count, evic, 0);
+ break;
+ }
+ }
+ if (set != L1_SETS) break;
+ }
+
+ if (n == TEST_REPEAT_MAX)
+ CPC_INFO("Eviction prio test ok\n");
+
+ kfree(access_ul);
+}
+
+void
+cpc_prime_probe_test(void)
+{
+ int i, n, count;
+
+ CPC_L1MISS_PMC_INIT(0);
+
+ for (n = 0; n < TEST_REPEAT_MAX; n++) {
+ memset(cpc_msrmts, 0, L1_SETS);
+ cpc_prime_probe_test_asm();
+ cpc_save_msrmts(cpc_ds);
+
+ count = 0;
+ for (i = 0; i < L1_SETS; i++)
+ count += cpc_msrmts[i];
+
+ if (count != 0) {
+ CPC_ERR("Prime+Probe %i. test failed (%u vs. %u)\n",
+ n, count, 0);
+ break;
+ }
+ }
+
+ if (n == TEST_REPEAT_MAX)
+ CPC_INFO("Prime+Probe test ok (%u vs. %u)\n", count, 0);
+}
+
+void
cpc_single_eviction_test(void *p)
{
struct cpc_cl *victim_ul;
struct cpc_cl *victim;
uint32_t target, *arg;
+ uint64_t ret;
int n, i, count;
arg = p;
- /* l2 data cache hit & miss */
- cpc_init_pmc(CPC_L1MISS_PMC, 0x64, 0xD8, 0, PMC_KERNEL);
+ CPC_L1MISS_PMC_INIT(0);
+
+ /* interrupts taken */
+ cpc_init_pmc(1, 0x2C, 0x00, 0, 0);
WARN_ON(arg && *arg >= L1_SETS);
if (arg && *arg >= L1_SETS) return;
@@ -212,7 +364,7 @@ cpc_single_eviction_test(void *p)
for (n = 0; n < TEST_REPEAT_MAX; n++) {
memset(cpc_msrmts, 0, L1_SETS);
- cpc_single_eviction_test_asm(victim);
+ ret = cpc_single_eviction_test_asm(victim);
cpc_save_msrmts(cpc_ds);
count = 0;
@@ -220,11 +372,14 @@ cpc_single_eviction_test(void *p)
count += cpc_msrmts[i];
if (count != 1 || cpc_msrmts[target] != 1) {
- CPC_ERR("Single eviction %i. test failed (%u vs %u)\n",
- n, count, 1);
+ CPC_ERR("Single eviction %i. test failed (%u vs %u) (intr %llu)\n",
+ n, count, 1, ret);
if (arg) *arg = 1;
break;
}
+
+ if (ret != 0)
+ CPC_INFO("Single eviction %llu interrupts but no issue\n", ret);
}
if (n == TEST_REPEAT_MAX) {
@@ -239,26 +394,23 @@ void
cpc_pmc_setup(void *p)
{
/* L1 misses in host kernel */
- cpc_init_pmc(CPC_L1MISS_PMC, 0x64, 0xD8,
- PMC_HOST, PMC_KERNEL);
+ CPC_L1MISS_PMC_INIT(CPC_L1MISS_PMC);
+
+ /* L1 misses in guest */
+ CPC_L1MISS_GUEST_PMC_INIT(CPC_L1MISS_GUEST_PMC);
/* retired instructions in guest */
- cpc_init_pmc(CPC_RETINST_PMC, 0xC0, 0x00,
- PMC_GUEST, PMC_USER | PMC_KERNEL);
+ CPC_RETINST_PMC_INIT(CPC_RETINST_PMC);
/* retired instructions in guest userspace */
- cpc_init_pmc(CPC_RETINST_USER_PMC, 0xC0, 0x00,
- PMC_GUEST, PMC_USER);
-
- /* L1 misses in guest */
- cpc_init_pmc(CPC_L1MISS_GUEST_PMC, 0x64, 0xD8,
- PMC_GUEST, PMC_USER | PMC_KERNEL);
-
+ CPC_RETINST_USER_PMC_INIT(CPC_RETINST_USER_PMC);
}
void
cpc_system_setup(void)
{
+ uint32_t n;
+
/* NOTE: since most of these MSRs are poorly documented and some
* guessing work was involved, it is likely that one or more of
* these operations are not needed */
@@ -284,6 +436,10 @@ cpc_system_setup(void)
/* REF: PPR Family 19h Model 01h Vol 1/2 Rev 0.50 May 27.2021 P.111 */
/* disable speculation */
cpc_write_msr(0x00000048, 0, 0b10000111);
+
+ //for (n = 0; n < 8; n++)
+ // CPC_INFO("MASK %u: %08llx\n", n, __rdmsr(0x848 + n) & 0xffffffff);
+ (void)n;
}
int
@@ -623,7 +779,12 @@ void
cpc_setup_test(void *p)
{
spinlock_t lock;
- int cpu;
+ uint64_t intr_count;
+ uint64_t shared_l2;
+ uint32_t taskpri;
+ uint32_t prev_ctrl, ctrl;
+ uint32_t prev_mask[8];
+ int n, cpu;
spin_lock_init(&lock);
@@ -636,12 +797,54 @@ cpc_setup_test(void *p)
cpc_system_setup();
+ (void) taskpri;
+ //taskpri = native_apic_mem_read(APIC_TASKPRI);
+ //CPC_INFO("TASK PRI %u\n", taskpri);
+ //native_apic_mem_write(APIC_TASKPRI, 0xff);
+
+ (void) prev_ctrl;
+ (void) ctrl;
+ (void) prev_mask;
+ // prev_ctrl = native_apic_mem_read(APIC_ECTRL);
+ // ctrl = prev_ctrl | 1;
+ // native_apic_mem_write(APIC_ECTRL, ctrl);
+
+ (void) n;
+ // for (n = 0; n < 8; n++) {
+ // prev_mask[n] = native_apic_mem_read(0x480 + 0x10 * n);
+ // native_apic_mem_write(0x480 + 0x10 * n, 0x00);
+ // }
+
+ cpc_init_pmc(4, 0x60, 0b100000, 0, 0);
+ cpc_init_pmc(5, 0x2C, 0x00, 0, 0);
+
spin_lock_irq(&lock);
- cpc_prime_probe_test();
+
+ shared_l2 = cpc_read_pmc(4);
+ intr_count = cpc_read_pmc(5);
+
cpc_stream_hwpf_test();
+ cpc_prime_test1();
+ cpc_prime_test2();
+ cpc_prime_test3();
+ cpc_eviction_prio_test();
+ cpc_prime_probe_test();
cpc_single_eviction_test(NULL);
+
+ shared_l2 = cpc_read_pmc(4) - shared_l2;
+ intr_count = cpc_read_pmc(5) - intr_count;
+ CPC_INFO("Shared L2 accesses: %llu\n", shared_l2);
+ CPC_INFO("Interrupts during test: %llu\n", intr_count);
+
spin_unlock_irq(&lock);
+ // native_apic_mem_write(APIC_ECTRL, prev_ctrl);
+ // for (n = 0; n < 8; n++) {
+ // native_apic_mem_write(0x480 + 0x10 * n, prev_mask[n]);
+ // }
+
+ //native_apic_mem_write(APIC_TASKPRI, taskpri);
+
exit:
put_cpu();
}
diff --git a/cachepc/macro.S b/cachepc/macro.S
@@ -3,8 +3,8 @@
# clobbers rax, rbx, rcx, rdx
.macro barrier
mfence # finish load and stores
- lfence # prevent reordering
- rdtsc # prevent reordering
+ mov $0x80000005, %eax
+ cpuid # prevent reordering
.endm
# clobbers rax, rbx, rcx, rdx, (out)
@@ -27,34 +27,41 @@
.endm
# clobbers rax, rbx, rcx, rdx, cl_tmp, (cl_out)
-.macro prime name cl_in cl_tmp cl_out
+.macro prime_pass name cl_in cl_tmp cl_out
barrier
mov \cl_in, \cl_tmp
-prime_loop_\name:
+.rept L1_SETS * L1_ASSOC - 1
+ mov CPC_CL_NEXT_OFFSET(\cl_tmp), \cl_tmp
+.endr
mov \cl_tmp, \cl_out
mov CPC_CL_NEXT_OFFSET(\cl_tmp), \cl_tmp
- cmp \cl_tmp, \cl_in
- jne prime_loop_\name
barrier
.endm
+# clobbers rax, rbx, rcx, rdx, cl_tmp, (cl_out)
+.macro prime name cl_in cl_tmp cl_out
+ prime_pass pass1_\name \cl_in \cl_tmp \cl_out
+ prime_pass pass2_\name \cl_in \cl_tmp \cl_out
+ prime_pass pass3_\name \cl_in \cl_tmp \cl_out
+.endm
+
# clobbers rax, rbx, rcx, rdx, cl_tmp1, cl_tmp2, pmc_tmp, pmc_tmp2
-.macro probe name cl_in cl_tmp1 cl_tmp2 pmc_tmp1 pmc_tmp2
+.macro probe name pmc cl_in cl_tmp1 cl_tmp2 pmc_tmp1 pmc_tmp2
barrier
mov \cl_in, \cl_tmp1
probe_loop_\name:
- readpmc $CPC_L1MISS_PMC \pmc_tmp1
+ readpmc \pmc \pmc_tmp1
-.rept L1_ASSOC-1
+.rept L1_ASSOC - 1
mov CPC_CL_PREV_OFFSET(\cl_tmp1), \cl_tmp1
.endr
mov CPC_CL_PREV_OFFSET(\cl_tmp1), \cl_tmp2
- readpmc $CPC_L1MISS_PMC \pmc_tmp2
+ readpmc \pmc \pmc_tmp2
sub \pmc_tmp1, \pmc_tmp2
mov \pmc_tmp2, CPC_CL_COUNT_OFFSET(\cl_tmp1)