cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

kmmio.c (16547B)


      1// SPDX-License-Identifier: GPL-2.0
      2/* Support for MMIO probes.
      3 * Benefit many code from kprobes
      4 * (C) 2002 Louis Zhuang <louis.zhuang@intel.com>.
      5 *     2007 Alexander Eichner
      6 *     2008 Pekka Paalanen <pq@iki.fi>
      7 */
      8
      9#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
     10
     11#include <linux/list.h>
     12#include <linux/rculist.h>
     13#include <linux/spinlock.h>
     14#include <linux/hash.h>
     15#include <linux/export.h>
     16#include <linux/kernel.h>
     17#include <linux/uaccess.h>
     18#include <linux/ptrace.h>
     19#include <linux/preempt.h>
     20#include <linux/percpu.h>
     21#include <linux/kdebug.h>
     22#include <linux/mutex.h>
     23#include <linux/io.h>
     24#include <linux/slab.h>
     25#include <asm/cacheflush.h>
     26#include <asm/tlbflush.h>
     27#include <linux/errno.h>
     28#include <asm/debugreg.h>
     29#include <linux/mmiotrace.h>
     30
     31#define KMMIO_PAGE_HASH_BITS 4
     32#define KMMIO_PAGE_TABLE_SIZE (1 << KMMIO_PAGE_HASH_BITS)
     33
     34struct kmmio_fault_page {
     35	struct list_head list;
     36	struct kmmio_fault_page *release_next;
     37	unsigned long addr; /* the requested address */
     38	pteval_t old_presence; /* page presence prior to arming */
     39	bool armed;
     40
     41	/*
     42	 * Number of times this page has been registered as a part
     43	 * of a probe. If zero, page is disarmed and this may be freed.
     44	 * Used only by writers (RCU) and post_kmmio_handler().
     45	 * Protected by kmmio_lock, when linked into kmmio_page_table.
     46	 */
     47	int count;
     48
     49	bool scheduled_for_release;
     50};
     51
     52struct kmmio_delayed_release {
     53	struct rcu_head rcu;
     54	struct kmmio_fault_page *release_list;
     55};
     56
     57struct kmmio_context {
     58	struct kmmio_fault_page *fpage;
     59	struct kmmio_probe *probe;
     60	unsigned long saved_flags;
     61	unsigned long addr;
     62	int active;
     63};
     64
     65static DEFINE_SPINLOCK(kmmio_lock);
     66
     67/* Protected by kmmio_lock */
     68unsigned int kmmio_count;
     69
     70/* Read-protected by RCU, write-protected by kmmio_lock. */
     71static struct list_head kmmio_page_table[KMMIO_PAGE_TABLE_SIZE];
     72static LIST_HEAD(kmmio_probes);
     73
     74static struct list_head *kmmio_page_list(unsigned long addr)
     75{
     76	unsigned int l;
     77	pte_t *pte = lookup_address(addr, &l);
     78
     79	if (!pte)
     80		return NULL;
     81	addr &= page_level_mask(l);
     82
     83	return &kmmio_page_table[hash_long(addr, KMMIO_PAGE_HASH_BITS)];
     84}
     85
     86/* Accessed per-cpu */
     87static DEFINE_PER_CPU(struct kmmio_context, kmmio_ctx);
     88
     89/*
     90 * this is basically a dynamic stabbing problem:
     91 * Could use the existing prio tree code or
     92 * Possible better implementations:
     93 * The Interval Skip List: A Data Structure for Finding All Intervals That
     94 * Overlap a Point (might be simple)
     95 * Space Efficient Dynamic Stabbing with Fast Queries - Mikkel Thorup
     96 */
     97/* Get the kmmio at this addr (if any). You must be holding RCU read lock. */
     98static struct kmmio_probe *get_kmmio_probe(unsigned long addr)
     99{
    100	struct kmmio_probe *p;
    101	list_for_each_entry_rcu(p, &kmmio_probes, list) {
    102		if (addr >= p->addr && addr < (p->addr + p->len))
    103			return p;
    104	}
    105	return NULL;
    106}
    107
    108/* You must be holding RCU read lock. */
    109static struct kmmio_fault_page *get_kmmio_fault_page(unsigned long addr)
    110{
    111	struct list_head *head;
    112	struct kmmio_fault_page *f;
    113	unsigned int l;
    114	pte_t *pte = lookup_address(addr, &l);
    115
    116	if (!pte)
    117		return NULL;
    118	addr &= page_level_mask(l);
    119	head = kmmio_page_list(addr);
    120	list_for_each_entry_rcu(f, head, list) {
    121		if (f->addr == addr)
    122			return f;
    123	}
    124	return NULL;
    125}
    126
    127static void clear_pmd_presence(pmd_t *pmd, bool clear, pmdval_t *old)
    128{
    129	pmd_t new_pmd;
    130	pmdval_t v = pmd_val(*pmd);
    131	if (clear) {
    132		*old = v;
    133		new_pmd = pmd_mkinvalid(*pmd);
    134	} else {
    135		/* Presume this has been called with clear==true previously */
    136		new_pmd = __pmd(*old);
    137	}
    138	set_pmd(pmd, new_pmd);
    139}
    140
    141static void clear_pte_presence(pte_t *pte, bool clear, pteval_t *old)
    142{
    143	pteval_t v = pte_val(*pte);
    144	if (clear) {
    145		*old = v;
    146		/* Nothing should care about address */
    147		pte_clear(&init_mm, 0, pte);
    148	} else {
    149		/* Presume this has been called with clear==true previously */
    150		set_pte_atomic(pte, __pte(*old));
    151	}
    152}
    153
    154static int clear_page_presence(struct kmmio_fault_page *f, bool clear)
    155{
    156	unsigned int level;
    157	pte_t *pte = lookup_address(f->addr, &level);
    158
    159	if (!pte) {
    160		pr_err("no pte for addr 0x%08lx\n", f->addr);
    161		return -1;
    162	}
    163
    164	switch (level) {
    165	case PG_LEVEL_2M:
    166		clear_pmd_presence((pmd_t *)pte, clear, &f->old_presence);
    167		break;
    168	case PG_LEVEL_4K:
    169		clear_pte_presence(pte, clear, &f->old_presence);
    170		break;
    171	default:
    172		pr_err("unexpected page level 0x%x.\n", level);
    173		return -1;
    174	}
    175
    176	flush_tlb_one_kernel(f->addr);
    177	return 0;
    178}
    179
    180/*
    181 * Mark the given page as not present. Access to it will trigger a fault.
    182 *
    183 * Struct kmmio_fault_page is protected by RCU and kmmio_lock, but the
    184 * protection is ignored here. RCU read lock is assumed held, so the struct
    185 * will not disappear unexpectedly. Furthermore, the caller must guarantee,
    186 * that double arming the same virtual address (page) cannot occur.
    187 *
    188 * Double disarming on the other hand is allowed, and may occur when a fault
    189 * and mmiotrace shutdown happen simultaneously.
    190 */
    191static int arm_kmmio_fault_page(struct kmmio_fault_page *f)
    192{
    193	int ret;
    194	WARN_ONCE(f->armed, KERN_ERR pr_fmt("kmmio page already armed.\n"));
    195	if (f->armed) {
    196		pr_warn("double-arm: addr 0x%08lx, ref %d, old %d\n",
    197			f->addr, f->count, !!f->old_presence);
    198	}
    199	ret = clear_page_presence(f, true);
    200	WARN_ONCE(ret < 0, KERN_ERR pr_fmt("arming at 0x%08lx failed.\n"),
    201		  f->addr);
    202	f->armed = true;
    203	return ret;
    204}
    205
    206/** Restore the given page to saved presence state. */
    207static void disarm_kmmio_fault_page(struct kmmio_fault_page *f)
    208{
    209	int ret = clear_page_presence(f, false);
    210	WARN_ONCE(ret < 0,
    211			KERN_ERR "kmmio disarming at 0x%08lx failed.\n", f->addr);
    212	f->armed = false;
    213}
    214
    215/*
    216 * This is being called from do_page_fault().
    217 *
    218 * We may be in an interrupt or a critical section. Also prefecthing may
    219 * trigger a page fault. We may be in the middle of process switch.
    220 * We cannot take any locks, because we could be executing especially
    221 * within a kmmio critical section.
    222 *
    223 * Local interrupts are disabled, so preemption cannot happen.
    224 * Do not enable interrupts, do not sleep, and watch out for other CPUs.
    225 */
    226/*
    227 * Interrupts are disabled on entry as trap3 is an interrupt gate
    228 * and they remain disabled throughout this function.
    229 */
    230int kmmio_handler(struct pt_regs *regs, unsigned long addr)
    231{
    232	struct kmmio_context *ctx;
    233	struct kmmio_fault_page *faultpage;
    234	int ret = 0; /* default to fault not handled */
    235	unsigned long page_base = addr;
    236	unsigned int l;
    237	pte_t *pte = lookup_address(addr, &l);
    238	if (!pte)
    239		return -EINVAL;
    240	page_base &= page_level_mask(l);
    241
    242	/*
    243	 * Preemption is now disabled to prevent process switch during
    244	 * single stepping. We can only handle one active kmmio trace
    245	 * per cpu, so ensure that we finish it before something else
    246	 * gets to run. We also hold the RCU read lock over single
    247	 * stepping to avoid looking up the probe and kmmio_fault_page
    248	 * again.
    249	 */
    250	preempt_disable();
    251	rcu_read_lock();
    252
    253	faultpage = get_kmmio_fault_page(page_base);
    254	if (!faultpage) {
    255		/*
    256		 * Either this page fault is not caused by kmmio, or
    257		 * another CPU just pulled the kmmio probe from under
    258		 * our feet. The latter case should not be possible.
    259		 */
    260		goto no_kmmio;
    261	}
    262
    263	ctx = this_cpu_ptr(&kmmio_ctx);
    264	if (ctx->active) {
    265		if (page_base == ctx->addr) {
    266			/*
    267			 * A second fault on the same page means some other
    268			 * condition needs handling by do_page_fault(), the
    269			 * page really not being present is the most common.
    270			 */
    271			pr_debug("secondary hit for 0x%08lx CPU %d.\n",
    272				 addr, smp_processor_id());
    273
    274			if (!faultpage->old_presence)
    275				pr_info("unexpected secondary hit for address 0x%08lx on CPU %d.\n",
    276					addr, smp_processor_id());
    277		} else {
    278			/*
    279			 * Prevent overwriting already in-flight context.
    280			 * This should not happen, let's hope disarming at
    281			 * least prevents a panic.
    282			 */
    283			pr_emerg("recursive probe hit on CPU %d, for address 0x%08lx. Ignoring.\n",
    284				 smp_processor_id(), addr);
    285			pr_emerg("previous hit was at 0x%08lx.\n", ctx->addr);
    286			disarm_kmmio_fault_page(faultpage);
    287		}
    288		goto no_kmmio;
    289	}
    290	ctx->active++;
    291
    292	ctx->fpage = faultpage;
    293	ctx->probe = get_kmmio_probe(page_base);
    294	ctx->saved_flags = (regs->flags & (X86_EFLAGS_TF | X86_EFLAGS_IF));
    295	ctx->addr = page_base;
    296
    297	if (ctx->probe && ctx->probe->pre_handler)
    298		ctx->probe->pre_handler(ctx->probe, regs, addr);
    299
    300	/*
    301	 * Enable single-stepping and disable interrupts for the faulting
    302	 * context. Local interrupts must not get enabled during stepping.
    303	 */
    304	regs->flags |= X86_EFLAGS_TF;
    305	regs->flags &= ~X86_EFLAGS_IF;
    306
    307	/* Now we set present bit in PTE and single step. */
    308	disarm_kmmio_fault_page(ctx->fpage);
    309
    310	/*
    311	 * If another cpu accesses the same page while we are stepping,
    312	 * the access will not be caught. It will simply succeed and the
    313	 * only downside is we lose the event. If this becomes a problem,
    314	 * the user should drop to single cpu before tracing.
    315	 */
    316
    317	return 1; /* fault handled */
    318
    319no_kmmio:
    320	rcu_read_unlock();
    321	preempt_enable_no_resched();
    322	return ret;
    323}
    324
    325/*
    326 * Interrupts are disabled on entry as trap1 is an interrupt gate
    327 * and they remain disabled throughout this function.
    328 * This must always get called as the pair to kmmio_handler().
    329 */
    330static int post_kmmio_handler(unsigned long condition, struct pt_regs *regs)
    331{
    332	int ret = 0;
    333	struct kmmio_context *ctx = this_cpu_ptr(&kmmio_ctx);
    334
    335	if (!ctx->active) {
    336		/*
    337		 * debug traps without an active context are due to either
    338		 * something external causing them (f.e. using a debugger while
    339		 * mmio tracing enabled), or erroneous behaviour
    340		 */
    341		pr_warn("unexpected debug trap on CPU %d.\n", smp_processor_id());
    342		goto out;
    343	}
    344
    345	if (ctx->probe && ctx->probe->post_handler)
    346		ctx->probe->post_handler(ctx->probe, condition, regs);
    347
    348	/* Prevent racing against release_kmmio_fault_page(). */
    349	spin_lock(&kmmio_lock);
    350	if (ctx->fpage->count)
    351		arm_kmmio_fault_page(ctx->fpage);
    352	spin_unlock(&kmmio_lock);
    353
    354	regs->flags &= ~X86_EFLAGS_TF;
    355	regs->flags |= ctx->saved_flags;
    356
    357	/* These were acquired in kmmio_handler(). */
    358	ctx->active--;
    359	BUG_ON(ctx->active);
    360	rcu_read_unlock();
    361	preempt_enable_no_resched();
    362
    363	/*
    364	 * if somebody else is singlestepping across a probe point, flags
    365	 * will have TF set, in which case, continue the remaining processing
    366	 * of do_debug, as if this is not a probe hit.
    367	 */
    368	if (!(regs->flags & X86_EFLAGS_TF))
    369		ret = 1;
    370out:
    371	return ret;
    372}
    373
    374/* You must be holding kmmio_lock. */
    375static int add_kmmio_fault_page(unsigned long addr)
    376{
    377	struct kmmio_fault_page *f;
    378
    379	f = get_kmmio_fault_page(addr);
    380	if (f) {
    381		if (!f->count)
    382			arm_kmmio_fault_page(f);
    383		f->count++;
    384		return 0;
    385	}
    386
    387	f = kzalloc(sizeof(*f), GFP_ATOMIC);
    388	if (!f)
    389		return -1;
    390
    391	f->count = 1;
    392	f->addr = addr;
    393
    394	if (arm_kmmio_fault_page(f)) {
    395		kfree(f);
    396		return -1;
    397	}
    398
    399	list_add_rcu(&f->list, kmmio_page_list(f->addr));
    400
    401	return 0;
    402}
    403
    404/* You must be holding kmmio_lock. */
    405static void release_kmmio_fault_page(unsigned long addr,
    406				struct kmmio_fault_page **release_list)
    407{
    408	struct kmmio_fault_page *f;
    409
    410	f = get_kmmio_fault_page(addr);
    411	if (!f)
    412		return;
    413
    414	f->count--;
    415	BUG_ON(f->count < 0);
    416	if (!f->count) {
    417		disarm_kmmio_fault_page(f);
    418		if (!f->scheduled_for_release) {
    419			f->release_next = *release_list;
    420			*release_list = f;
    421			f->scheduled_for_release = true;
    422		}
    423	}
    424}
    425
    426/*
    427 * With page-unaligned ioremaps, one or two armed pages may contain
    428 * addresses from outside the intended mapping. Events for these addresses
    429 * are currently silently dropped. The events may result only from programming
    430 * mistakes by accessing addresses before the beginning or past the end of a
    431 * mapping.
    432 */
    433int register_kmmio_probe(struct kmmio_probe *p)
    434{
    435	unsigned long flags;
    436	int ret = 0;
    437	unsigned long size = 0;
    438	unsigned long addr = p->addr & PAGE_MASK;
    439	const unsigned long size_lim = p->len + (p->addr & ~PAGE_MASK);
    440	unsigned int l;
    441	pte_t *pte;
    442
    443	spin_lock_irqsave(&kmmio_lock, flags);
    444	if (get_kmmio_probe(addr)) {
    445		ret = -EEXIST;
    446		goto out;
    447	}
    448
    449	pte = lookup_address(addr, &l);
    450	if (!pte) {
    451		ret = -EINVAL;
    452		goto out;
    453	}
    454
    455	kmmio_count++;
    456	list_add_rcu(&p->list, &kmmio_probes);
    457	while (size < size_lim) {
    458		if (add_kmmio_fault_page(addr + size))
    459			pr_err("Unable to set page fault.\n");
    460		size += page_level_size(l);
    461	}
    462out:
    463	spin_unlock_irqrestore(&kmmio_lock, flags);
    464	/*
    465	 * XXX: What should I do here?
    466	 * Here was a call to global_flush_tlb(), but it does not exist
    467	 * anymore. It seems it's not needed after all.
    468	 */
    469	return ret;
    470}
    471EXPORT_SYMBOL(register_kmmio_probe);
    472
    473static void rcu_free_kmmio_fault_pages(struct rcu_head *head)
    474{
    475	struct kmmio_delayed_release *dr = container_of(
    476						head,
    477						struct kmmio_delayed_release,
    478						rcu);
    479	struct kmmio_fault_page *f = dr->release_list;
    480	while (f) {
    481		struct kmmio_fault_page *next = f->release_next;
    482		BUG_ON(f->count);
    483		kfree(f);
    484		f = next;
    485	}
    486	kfree(dr);
    487}
    488
    489static void remove_kmmio_fault_pages(struct rcu_head *head)
    490{
    491	struct kmmio_delayed_release *dr =
    492		container_of(head, struct kmmio_delayed_release, rcu);
    493	struct kmmio_fault_page *f = dr->release_list;
    494	struct kmmio_fault_page **prevp = &dr->release_list;
    495	unsigned long flags;
    496
    497	spin_lock_irqsave(&kmmio_lock, flags);
    498	while (f) {
    499		if (!f->count) {
    500			list_del_rcu(&f->list);
    501			prevp = &f->release_next;
    502		} else {
    503			*prevp = f->release_next;
    504			f->release_next = NULL;
    505			f->scheduled_for_release = false;
    506		}
    507		f = *prevp;
    508	}
    509	spin_unlock_irqrestore(&kmmio_lock, flags);
    510
    511	/* This is the real RCU destroy call. */
    512	call_rcu(&dr->rcu, rcu_free_kmmio_fault_pages);
    513}
    514
    515/*
    516 * Remove a kmmio probe. You have to synchronize_rcu() before you can be
    517 * sure that the callbacks will not be called anymore. Only after that
    518 * you may actually release your struct kmmio_probe.
    519 *
    520 * Unregistering a kmmio fault page has three steps:
    521 * 1. release_kmmio_fault_page()
    522 *    Disarm the page, wait a grace period to let all faults finish.
    523 * 2. remove_kmmio_fault_pages()
    524 *    Remove the pages from kmmio_page_table.
    525 * 3. rcu_free_kmmio_fault_pages()
    526 *    Actually free the kmmio_fault_page structs as with RCU.
    527 */
    528void unregister_kmmio_probe(struct kmmio_probe *p)
    529{
    530	unsigned long flags;
    531	unsigned long size = 0;
    532	unsigned long addr = p->addr & PAGE_MASK;
    533	const unsigned long size_lim = p->len + (p->addr & ~PAGE_MASK);
    534	struct kmmio_fault_page *release_list = NULL;
    535	struct kmmio_delayed_release *drelease;
    536	unsigned int l;
    537	pte_t *pte;
    538
    539	pte = lookup_address(addr, &l);
    540	if (!pte)
    541		return;
    542
    543	spin_lock_irqsave(&kmmio_lock, flags);
    544	while (size < size_lim) {
    545		release_kmmio_fault_page(addr + size, &release_list);
    546		size += page_level_size(l);
    547	}
    548	list_del_rcu(&p->list);
    549	kmmio_count--;
    550	spin_unlock_irqrestore(&kmmio_lock, flags);
    551
    552	if (!release_list)
    553		return;
    554
    555	drelease = kmalloc(sizeof(*drelease), GFP_ATOMIC);
    556	if (!drelease) {
    557		pr_crit("leaking kmmio_fault_page objects.\n");
    558		return;
    559	}
    560	drelease->release_list = release_list;
    561
    562	/*
    563	 * This is not really RCU here. We have just disarmed a set of
    564	 * pages so that they cannot trigger page faults anymore. However,
    565	 * we cannot remove the pages from kmmio_page_table,
    566	 * because a probe hit might be in flight on another CPU. The
    567	 * pages are collected into a list, and they will be removed from
    568	 * kmmio_page_table when it is certain that no probe hit related to
    569	 * these pages can be in flight. RCU grace period sounds like a
    570	 * good choice.
    571	 *
    572	 * If we removed the pages too early, kmmio page fault handler might
    573	 * not find the respective kmmio_fault_page and determine it's not
    574	 * a kmmio fault, when it actually is. This would lead to madness.
    575	 */
    576	call_rcu(&drelease->rcu, remove_kmmio_fault_pages);
    577}
    578EXPORT_SYMBOL(unregister_kmmio_probe);
    579
    580static int
    581kmmio_die_notifier(struct notifier_block *nb, unsigned long val, void *args)
    582{
    583	struct die_args *arg = args;
    584	unsigned long* dr6_p = (unsigned long *)ERR_PTR(arg->err);
    585
    586	if (val == DIE_DEBUG && (*dr6_p & DR_STEP))
    587		if (post_kmmio_handler(*dr6_p, arg->regs) == 1) {
    588			/*
    589			 * Reset the BS bit in dr6 (pointed by args->err) to
    590			 * denote completion of processing
    591			 */
    592			*dr6_p &= ~DR_STEP;
    593			return NOTIFY_STOP;
    594		}
    595
    596	return NOTIFY_DONE;
    597}
    598
    599static struct notifier_block nb_die = {
    600	.notifier_call = kmmio_die_notifier
    601};
    602
    603int kmmio_init(void)
    604{
    605	int i;
    606
    607	for (i = 0; i < KMMIO_PAGE_TABLE_SIZE; i++)
    608		INIT_LIST_HEAD(&kmmio_page_table[i]);
    609
    610	return register_die_notifier(&nb_die);
    611}
    612
    613void kmmio_cleanup(void)
    614{
    615	int i;
    616
    617	unregister_die_notifier(&nb_die);
    618	for (i = 0; i < KMMIO_PAGE_TABLE_SIZE; i++) {
    619		WARN_ONCE(!list_empty(&kmmio_page_table[i]),
    620			KERN_ERR "kmmio_page_table not empty at cleanup, any further tracing will leak memory.\n");
    621	}
    622}