cachepc

Prime+Probe cache-based side-channel attack on AMD SEV-SNP protected virtual machines
git clone https://git.sinitax.com/sinitax/cachepc
Log | Files | Refs | Submodules | README | sfeed.txt

commit 5ad426b7b5859275739616e5f62041317b5511a1
parent 7eed418586797f379093a7073ead428ed7a5caea
Author: Louis Burda <quent.burda@gmail.com>
Date:   Thu,  9 Feb 2023 06:22:52 -0600

Added more tests and sanity checks

Diffstat:
Mcachepc/asm.S | 213+++++++++++++++++++++++++++++++++++++++++++++++++++++++------------------------
Mcachepc/cachepc.h | 12++++++++++++
Mcachepc/const.h | 6+++---
Mcachepc/kvm.c | 301++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-------------
Mcachepc/macro.S | 27+++++++++++++++++----------
5 files changed, 434 insertions(+), 125 deletions(-)

diff --git a/cachepc/asm.S b/cachepc/asm.S @@ -3,119 +3,206 @@ #include <linux/linkage.h> .global cpc_read_pmc +.global cpc_eviction_prio_test_asm +.global cpc_prime_test1_asm +.global cpc_prime_test2_asm +.global cpc_prime_test3_asm .global cpc_prime_probe_test_asm .global cpc_stream_hwpf_test_asm .global cpc_single_eviction_test_asm +# x86-64 calling convention specifies only these register need to be saved +# rbx, r12, r13, r14, r15, rbp and rsp need + SYM_FUNC_START(cpc_read_pmc) push %rbx - push %rcx - push %rdx - push %r8 readpmc %rdi %r8 mov %r8, %rax - pop %r8 - pop %rdx - pop %rcx pop %rbx ret SYM_FUNC_END(cpc_read_pmc) -SYM_FUNC_START(cpc_prime_probe_test_asm) +SYM_FUNC_START(cpc_stream_hwpf_test_asm) + push %rbx + + wbinvd + + readpmc $0 %r8 + + mov cpc_ds, %rdi + prime stream_hwpf_test %rdi %rsi %r15 + + readpmc $0 %r9 + + mov %r9, %rax + sub %r8, %rax + + pop %rbx + + ret +SYM_FUNC_END(cpc_stream_hwpf_test_asm) + +SYM_FUNC_START(cpc_prime_test1_asm) push %rbx - push %rcx - push %rdx - push %r8 - push %r9 - push %r10 - push %r11 push %r12 + # this test checks wether the correct amount of cache lines + # were evicted by prime, however it does not guarantee that the + # L1 is filled completely with only our cache lines + + mov cpc_ds, %r10 + wbinvd - mov cpc_ds, %r9 - prime prime_probe_test %r9 %r10 %r8 - prime prime_probe_test1 %r9 %r10 %r8 - prime prime_probe_test2 %r9 %r10 %r8 - probe prime_probe_test %r8 %r9 %r10 %r11 %r12 + readpmc $0 %r8 + prime_pass prime_test1_2 %r10 %r11 %r12 + readpmc $0 %r9 + + mov %r9, %rax + sub %r8, %rax pop %r12 - pop %r11 - pop %r10 - pop %r9 - pop %r8 - pop %rdx - pop %rcx pop %rbx ret -SYM_FUNC_END(cpc_prime_probe_test_asm) +SYM_FUNC_END(cpc_prime_test1_asm) -SYM_FUNC_START(cpc_stream_hwpf_test_asm) +SYM_FUNC_START(cpc_prime_test2_asm) push %rbx - push %rcx - push %rdx - push %r8 - push %r9 + push %r12 + + # this test checks wether the L1 is filled *completely* with our cache + # lines by prime, it does not however guarantee that the eviction + # preference of the cachelines is in line-order (0 first, 7 last) + + mov cpc_ds, %r10 wbinvd - readpmc $CPC_L1MISS_PMC %r8 + # try to convince replacement policy to + # keep this line in + mov (%rdi), %rax + mov (%rdi), %rax + mov (%rdi), %rax + mov (%rdi), %rax + mov (%rdi), %rax + mov (%rdi), %rax + mov (%rdi), %rax - mov 0x000(%rdi), %rax - mov 0x040(%rdi), %rax - mov 0x080(%rdi), %rax - mov 0x0c0(%rdi), %rax - mov 0x100(%rdi), %rax - mov 0x140(%rdi), %rax - mov 0x180(%rdi), %rax - mov 0x1c0(%rdi), %rax - mov 0x200(%rdi), %rax - mov 0x240(%rdi), %rax + prime_pass prime_test2_1 %r10 %r11 %r12 - readpmc $CPC_L1MISS_PMC %r9 + readpmc $0 %r8 + prime_pass prime_test2_2 %r12 %r11 %r10 + readpmc $0 %r9 mov %r9, %rax sub %r8, %rax - pop %r9 - pop %r8 - pop %rdx - pop %rcx + pop %r12 pop %rbx ret -SYM_FUNC_END(cpc_stream_hwpf_test_asm) +SYM_FUNC_END(cpc_prime_test2_asm) -SYM_FUNC_START(cpc_single_eviction_test_asm) +SYM_FUNC_START(cpc_prime_test3_asm) + push %rbx + push %r12 + + # this test is similar to test2 in that it checks wether + # the cache is completely filled with our cache lines after prime, + # this time just one prime+evict at a time + + mov cpc_ds, %r10 + + wbinvd + barrier + + mov (%rsi), %rax + mov (%rsi), %rax + mov (%rsi), %rax + mov (%rsi), %rax + mov (%rsi), %rax + mov (%rsi), %rax + mov (%rsi), %rax + mov (%rsi), %rax + + prime_pass prime_test3 %r10 %r11 %r12 + + readpmc $0 %r8 +.rept L1_ASSOC + mov CPC_CL_NEXT_OFFSET(%rdi), %rdi +.endr + readpmc $0 %r9 + + mov %r9, %rax + sub %r8, %rax + + pop %r12 + pop %rbx + + ret +SYM_FUNC_END(cpc_prime_test3_asm) + +SYM_FUNC_START(cpc_eviction_prio_test_asm) push %rbx - push %rcx - push %rdx - push %r8 - push %r9 - push %r10 - push %r11 push %r12 + mov cpc_ds, %r10 + wbinvd + prime eviction_prio_test %r10 %r11 %r12 + + mov (%rdi), %rax + + readpmc $0 %r8 + mov (%rsi), %rax + readpmc $0 %r9 + + mov %r9, %rax + sub %r8, %rax + + pop %r12 + pop %rbx + + ret +SYM_FUNC_END(cpc_eviction_prio_test_asm) + +SYM_FUNC_START(cpc_prime_probe_test_asm) + push %rbx + push %r12 + + mov cpc_ds, %r9 + prime prime_probe_test %r9 %r10 %r8 + probe prime_probe_test $0 %r8 %r9 %r10 %r11 %r12 + + pop %r12 + pop %rbx + + ret +SYM_FUNC_END(cpc_prime_probe_test_asm) + +SYM_FUNC_START(cpc_single_eviction_test_asm) + push %rbx + push %r12 + push %r13 + + readpmc $1 %r13 + mov cpc_ds, %r9 prime single_eviction_test %r9 %r10 %r8 - prime single_eviction_test2 %r9 %r10 %r8 - prime single_eviction_test3 %r9 %r10 %r8 mov (%rdi), %rax - probe single_eviction_test %r8 %r9 %r10 %r11 %r12 + probe single_eviction_test $0 %r8 %r9 %r10 %r11 %r12 + + readpmc $1 %r12 + mov %r12, %rax + sub %r13, %rax + pop %r13 pop %r12 - pop %r11 - pop %r10 - pop %r9 - pop %r8 - pop %rdx - pop %rcx pop %rbx ret diff --git a/cachepc/cachepc.h b/cachepc/cachepc.h @@ -11,6 +11,18 @@ #define PMC_HOST (1 << 1) #define PMC_GUEST (1 << 0) +#define CPC_L1MISS_PMC_INIT(pmc) \ + cpc_init_pmc(pmc, 0x60, 0b10000000, 0, PMC_KERNEL) + +#define CPC_L1MISS_GUEST_PMC_INIT(pmc) \ + cpc_init_pmc(pmc, 0x60, 0b10000000, PMC_GUEST, PMC_USER | PMC_KERNEL) + +#define CPC_RETINST_PMC_INIT(pmc) \ + cpc_init_pmc(pmc, 0xC0, 0x00, PMC_GUEST, PMC_USER | PMC_KERNEL) + +#define CPC_RETINST_USER_PMC_INIT(pmc) \ + cpc_init_pmc(pmc, 0xC0, 0x00, PMC_GUEST, PMC_USER) + #define CPC_DBG(...) do { \ if (cpc_loglevel >= CPC_LOGLVL_DBG) \ pr_info("CachePC: " __VA_ARGS__); \ diff --git a/cachepc/const.h b/cachepc/const.h @@ -9,9 +9,9 @@ #define CPC_ISOLCPU 2 #define CPC_L1MISS_PMC 0 -#define CPC_RETINST_PMC 1 -#define CPC_RETINST_USER_PMC 2 -#define CPC_L1MISS_GUEST_PMC 3 +#define CPC_L1MISS_GUEST_PMC 1 +#define CPC_RETINST_PMC 2 +#define CPC_RETINST_USER_PMC 3 #define CPC_VMSA_MAGIC_ADDR ((void *) 0xC0FFEE) diff --git a/cachepc/kvm.c b/cachepc/kvm.c @@ -16,7 +16,7 @@ #include <linux/types.h> #include <asm/uaccess.h> -#define TEST_REPEAT_MAX 1000 +#define TEST_REPEAT_MAX 50 uint32_t cpc_loglevel = 0; EXPORT_SYMBOL(cpc_loglevel); @@ -100,13 +100,25 @@ struct cpc_cl *cpc_ds_probe = NULL; EXPORT_SYMBOL(cpc_ds); EXPORT_SYMBOL(cpc_ds_probe); +uint64_t cpc_stream_hwpf_test_asm(struct cpc_cl *lines); +static noinline void cpc_stream_hwpf_test(void); + +uint64_t cpc_prime_test1_asm(void); +static noinline void cpc_prime_test1(void); + +uint64_t cpc_prime_test2_asm(struct cpc_cl *lines); +static noinline void cpc_prime_test2(void); + +uint64_t cpc_prime_test3_asm(struct cpc_cl *cl, struct cpc_cl *lines); +static noinline void cpc_prime_test3(void); + +uint64_t cpc_eviction_prio_test_asm(struct cpc_cl *access, struct cpc_cl *cl); +static noinline void cpc_eviction_prio_test(void); + void cpc_prime_probe_test_asm(void); static noinline void cpc_prime_probe_test(void); -uint64_t cpc_stream_hwpf_test_asm(void *lines); -static noinline void cpc_stream_hwpf_test(void); - -void cpc_single_eviction_test_asm(void *ptr); +uint64_t cpc_single_eviction_test_asm(void *ptr); static noinline void cpc_single_eviction_test(void *p); static void cpc_pmc_setup(void *p); @@ -133,75 +145,215 @@ static int cpc_track_mode_ioctl(void __user *arg_user); static int cpc_req_pause_ioctl(void __user *arg_user); + void -cpc_prime_probe_test(void) +cpc_stream_hwpf_test(void) { - int i, n, count; + const uint32_t max = 0; + struct cpc_cl *lines; + uint32_t count; + int n; + + /* l1 hardware prefetches */ + cpc_init_pmc(0, 0x70, 0xE0, 0, PMC_KERNEL); + cpc_init_pmc(1, 0x71, 0xE0, 0, PMC_KERNEL); + cpc_init_pmc(2, 0x72, 0xE0, 0, PMC_KERNEL); - /* l2 data cache hit & miss */ - cpc_init_pmc(CPC_L1MISS_PMC, 0x64, 0xD8, 0, PMC_KERNEL); + /* l2 hardware prefetches */ + cpc_init_pmc(3, 0x70, 0x1F, 0, PMC_KERNEL); + cpc_init_pmc(4, 0x71, 0x1F, 0, PMC_KERNEL); + cpc_init_pmc(5, 0x72, 0x1F, 0, PMC_KERNEL); + + lines = cpc_aligned_alloc(L1_SIZE, L1_SIZE); for (n = 0; n < TEST_REPEAT_MAX; n++) { - memset(cpc_msrmts, 0, L1_SETS); - cpc_prime_probe_test_asm(); - cpc_save_msrmts(cpc_ds); + count = cpc_stream_hwpf_test_asm(lines); + if (count != max) { + CPC_ERR("HWPF %i. test failed (%u vs. %u)\n", + n, count, max); + break; + } + } - count = 0; - for (i = 0; i < L1_SETS; i++) - count += cpc_msrmts[i]; + if (n == TEST_REPEAT_MAX) + CPC_INFO("HWPF test ok (%u vs. %u)\n", count, max); - if (count != 0) { - CPC_ERR("Prime-probe %i. test failed (%u vs. %u)\n", - n, count, 0); + kfree(lines); +} + +void +cpc_prime_test1(void) +{ + const uint64_t max = L1_SETS * L1_ASSOC; + uint64_t ret; + int n; + + CPC_L1MISS_PMC_INIT(0); + + for (n = 0; n < TEST_REPEAT_MAX; n++) { + ret = cpc_prime_test1_asm(); + if (ret != max) { + CPC_ERR("Prime (1) %i. test failed (%llu vs. %llu)\n", + n, ret, max); break; } } if (n == TEST_REPEAT_MAX) - CPC_INFO("Prime-probe test ok (%u vs. %u)\n", count, 0); + CPC_INFO("Prime (1) test ok (%llu vs. %llu)\n", ret, max); } void -cpc_stream_hwpf_test(void) +cpc_prime_test2(void) { - const uint32_t max = 10; + const uint64_t max = 0; struct cpc_cl *lines; - uint32_t count; + uint64_t ret; int n; - /* l2 data cache hit & miss */ - cpc_init_pmc(CPC_L1MISS_PMC, 0x64, 0xD8, 0, PMC_KERNEL); + CPC_L1MISS_PMC_INIT(0); - lines = cpc_aligned_alloc(L1_SIZE, L1_SIZE); + lines = cpc_aligned_alloc(PAGE_SIZE, L1_SIZE); - count = 0; for (n = 0; n < TEST_REPEAT_MAX; n++) { - count = cpc_stream_hwpf_test_asm(lines); - if (count != max) { - CPC_ERR("HWPF %i. test failed (%u vs. %u)\n", - n, count, max); + ret = cpc_prime_test2_asm(lines); + if (ret != max) { + CPC_ERR("Prime (2) %i. test failed (%llu vs. %llu)\n", + n, ret, max); break; } } if (n == TEST_REPEAT_MAX) - CPC_INFO("HWPF test ok (%u vs. %u)\n", count, max); + CPC_INFO("Prime (2) test ok (%llu vs. %llu)\n", ret, max); + + kfree(lines); +} + +void +cpc_prime_test3(void) +{ + uint64_t count; + struct cpc_cl *lines; + struct cpc_cl *cl; + int n, set, line; + + CPC_L1MISS_PMC_INIT(0); + + lines = cpc_aligned_alloc(PAGE_SIZE, L1_SIZE); + + for (n = 0; n < TEST_REPEAT_MAX; n++) { + cl = cpc_ds; + for (set = 0; set < L1_SETS; set++) { + count = cpc_prime_test3_asm(cl, &lines[set]); + for (line = 0; line < L1_ASSOC; line++) + cl = cl->next; + if (count) { + CPC_ERR("Prime (3) %u. test failed " + "(set %u, count %llu)\n", + n, set, count); + goto exit; + } + } + } + +exit: + if (n == TEST_REPEAT_MAX) + CPC_INFO("Prime (3) test ok\n"); kfree(lines); } void +cpc_eviction_prio_test(void) +{ + uint64_t ret, count; + struct cpc_cl *access_ul, *access; + struct cpc_cl *cl; + int n, set, line, evic; + + CPC_L1MISS_PMC_INIT(0); + + access_ul = cpc_aligned_alloc(L1_SIZE, L1_SIZE); + + for (n = 0; n < TEST_REPEAT_MAX / 2; n++) { + cl = cpc_ds; + for (set = 0; set < L1_SETS; set++) { + access = &access_ul[set]; + count = 0; + evic = -1; + for (line = 0; line < L1_ASSOC; line++) { + ret = cpc_eviction_prio_test_asm(access, cl); + if (ret) { + evic = line; + count++; + } + cl = cl->next; + } + if (!count) { + CPC_ERR("Eviction prio %u. test failed " + "(set %u, count %llu)\n", + n, set, count); + break; + } + if (count > 1 || evic != 0) { + CPC_ERR("Eviction prio %u. test failed " + "(set %u, count %llu, evic %u vs %u)\n", + n, set, count, evic, 0); + break; + } + } + if (set != L1_SETS) break; + } + + if (n == TEST_REPEAT_MAX) + CPC_INFO("Eviction prio test ok\n"); + + kfree(access_ul); +} + +void +cpc_prime_probe_test(void) +{ + int i, n, count; + + CPC_L1MISS_PMC_INIT(0); + + for (n = 0; n < TEST_REPEAT_MAX; n++) { + memset(cpc_msrmts, 0, L1_SETS); + cpc_prime_probe_test_asm(); + cpc_save_msrmts(cpc_ds); + + count = 0; + for (i = 0; i < L1_SETS; i++) + count += cpc_msrmts[i]; + + if (count != 0) { + CPC_ERR("Prime+Probe %i. test failed (%u vs. %u)\n", + n, count, 0); + break; + } + } + + if (n == TEST_REPEAT_MAX) + CPC_INFO("Prime+Probe test ok (%u vs. %u)\n", count, 0); +} + +void cpc_single_eviction_test(void *p) { struct cpc_cl *victim_ul; struct cpc_cl *victim; uint32_t target, *arg; + uint64_t ret; int n, i, count; arg = p; - /* l2 data cache hit & miss */ - cpc_init_pmc(CPC_L1MISS_PMC, 0x64, 0xD8, 0, PMC_KERNEL); + CPC_L1MISS_PMC_INIT(0); + + /* interrupts taken */ + cpc_init_pmc(1, 0x2C, 0x00, 0, 0); WARN_ON(arg && *arg >= L1_SETS); if (arg && *arg >= L1_SETS) return; @@ -212,7 +364,7 @@ cpc_single_eviction_test(void *p) for (n = 0; n < TEST_REPEAT_MAX; n++) { memset(cpc_msrmts, 0, L1_SETS); - cpc_single_eviction_test_asm(victim); + ret = cpc_single_eviction_test_asm(victim); cpc_save_msrmts(cpc_ds); count = 0; @@ -220,11 +372,14 @@ cpc_single_eviction_test(void *p) count += cpc_msrmts[i]; if (count != 1 || cpc_msrmts[target] != 1) { - CPC_ERR("Single eviction %i. test failed (%u vs %u)\n", - n, count, 1); + CPC_ERR("Single eviction %i. test failed (%u vs %u) (intr %llu)\n", + n, count, 1, ret); if (arg) *arg = 1; break; } + + if (ret != 0) + CPC_INFO("Single eviction %llu interrupts but no issue\n", ret); } if (n == TEST_REPEAT_MAX) { @@ -239,26 +394,23 @@ void cpc_pmc_setup(void *p) { /* L1 misses in host kernel */ - cpc_init_pmc(CPC_L1MISS_PMC, 0x64, 0xD8, - PMC_HOST, PMC_KERNEL); + CPC_L1MISS_PMC_INIT(CPC_L1MISS_PMC); + + /* L1 misses in guest */ + CPC_L1MISS_GUEST_PMC_INIT(CPC_L1MISS_GUEST_PMC); /* retired instructions in guest */ - cpc_init_pmc(CPC_RETINST_PMC, 0xC0, 0x00, - PMC_GUEST, PMC_USER | PMC_KERNEL); + CPC_RETINST_PMC_INIT(CPC_RETINST_PMC); /* retired instructions in guest userspace */ - cpc_init_pmc(CPC_RETINST_USER_PMC, 0xC0, 0x00, - PMC_GUEST, PMC_USER); - - /* L1 misses in guest */ - cpc_init_pmc(CPC_L1MISS_GUEST_PMC, 0x64, 0xD8, - PMC_GUEST, PMC_USER | PMC_KERNEL); - + CPC_RETINST_USER_PMC_INIT(CPC_RETINST_USER_PMC); } void cpc_system_setup(void) { + uint32_t n; + /* NOTE: since most of these MSRs are poorly documented and some * guessing work was involved, it is likely that one or more of * these operations are not needed */ @@ -284,6 +436,10 @@ cpc_system_setup(void) /* REF: PPR Family 19h Model 01h Vol 1/2 Rev 0.50 May 27.2021 P.111 */ /* disable speculation */ cpc_write_msr(0x00000048, 0, 0b10000111); + + //for (n = 0; n < 8; n++) + // CPC_INFO("MASK %u: %08llx\n", n, __rdmsr(0x848 + n) & 0xffffffff); + (void)n; } int @@ -623,7 +779,12 @@ void cpc_setup_test(void *p) { spinlock_t lock; - int cpu; + uint64_t intr_count; + uint64_t shared_l2; + uint32_t taskpri; + uint32_t prev_ctrl, ctrl; + uint32_t prev_mask[8]; + int n, cpu; spin_lock_init(&lock); @@ -636,12 +797,54 @@ cpc_setup_test(void *p) cpc_system_setup(); + (void) taskpri; + //taskpri = native_apic_mem_read(APIC_TASKPRI); + //CPC_INFO("TASK PRI %u\n", taskpri); + //native_apic_mem_write(APIC_TASKPRI, 0xff); + + (void) prev_ctrl; + (void) ctrl; + (void) prev_mask; + // prev_ctrl = native_apic_mem_read(APIC_ECTRL); + // ctrl = prev_ctrl | 1; + // native_apic_mem_write(APIC_ECTRL, ctrl); + + (void) n; + // for (n = 0; n < 8; n++) { + // prev_mask[n] = native_apic_mem_read(0x480 + 0x10 * n); + // native_apic_mem_write(0x480 + 0x10 * n, 0x00); + // } + + cpc_init_pmc(4, 0x60, 0b100000, 0, 0); + cpc_init_pmc(5, 0x2C, 0x00, 0, 0); + spin_lock_irq(&lock); - cpc_prime_probe_test(); + + shared_l2 = cpc_read_pmc(4); + intr_count = cpc_read_pmc(5); + cpc_stream_hwpf_test(); + cpc_prime_test1(); + cpc_prime_test2(); + cpc_prime_test3(); + cpc_eviction_prio_test(); + cpc_prime_probe_test(); cpc_single_eviction_test(NULL); + + shared_l2 = cpc_read_pmc(4) - shared_l2; + intr_count = cpc_read_pmc(5) - intr_count; + CPC_INFO("Shared L2 accesses: %llu\n", shared_l2); + CPC_INFO("Interrupts during test: %llu\n", intr_count); + spin_unlock_irq(&lock); + // native_apic_mem_write(APIC_ECTRL, prev_ctrl); + // for (n = 0; n < 8; n++) { + // native_apic_mem_write(0x480 + 0x10 * n, prev_mask[n]); + // } + + //native_apic_mem_write(APIC_TASKPRI, taskpri); + exit: put_cpu(); } diff --git a/cachepc/macro.S b/cachepc/macro.S @@ -3,8 +3,8 @@ # clobbers rax, rbx, rcx, rdx .macro barrier mfence # finish load and stores - lfence # prevent reordering - rdtsc # prevent reordering + mov $0x80000005, %eax + cpuid # prevent reordering .endm # clobbers rax, rbx, rcx, rdx, (out) @@ -27,34 +27,41 @@ .endm # clobbers rax, rbx, rcx, rdx, cl_tmp, (cl_out) -.macro prime name cl_in cl_tmp cl_out +.macro prime_pass name cl_in cl_tmp cl_out barrier mov \cl_in, \cl_tmp -prime_loop_\name: +.rept L1_SETS * L1_ASSOC - 1 + mov CPC_CL_NEXT_OFFSET(\cl_tmp), \cl_tmp +.endr mov \cl_tmp, \cl_out mov CPC_CL_NEXT_OFFSET(\cl_tmp), \cl_tmp - cmp \cl_tmp, \cl_in - jne prime_loop_\name barrier .endm +# clobbers rax, rbx, rcx, rdx, cl_tmp, (cl_out) +.macro prime name cl_in cl_tmp cl_out + prime_pass pass1_\name \cl_in \cl_tmp \cl_out + prime_pass pass2_\name \cl_in \cl_tmp \cl_out + prime_pass pass3_\name \cl_in \cl_tmp \cl_out +.endm + # clobbers rax, rbx, rcx, rdx, cl_tmp1, cl_tmp2, pmc_tmp, pmc_tmp2 -.macro probe name cl_in cl_tmp1 cl_tmp2 pmc_tmp1 pmc_tmp2 +.macro probe name pmc cl_in cl_tmp1 cl_tmp2 pmc_tmp1 pmc_tmp2 barrier mov \cl_in, \cl_tmp1 probe_loop_\name: - readpmc $CPC_L1MISS_PMC \pmc_tmp1 + readpmc \pmc \pmc_tmp1 -.rept L1_ASSOC-1 +.rept L1_ASSOC - 1 mov CPC_CL_PREV_OFFSET(\cl_tmp1), \cl_tmp1 .endr mov CPC_CL_PREV_OFFSET(\cl_tmp1), \cl_tmp2 - readpmc $CPC_L1MISS_PMC \pmc_tmp2 + readpmc \pmc \pmc_tmp2 sub \pmc_tmp1, \pmc_tmp2 mov \pmc_tmp2, CPC_CL_COUNT_OFFSET(\cl_tmp1)