cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

opt.c (15273B)


      1// SPDX-License-Identifier: GPL-2.0-or-later
      2/*
      3 *  Kernel Probes Jump Optimization (Optprobes)
      4 *
      5 * Copyright (C) IBM Corporation, 2002, 2004
      6 * Copyright (C) Hitachi Ltd., 2012
      7 */
      8#include <linux/kprobes.h>
      9#include <linux/perf_event.h>
     10#include <linux/ptrace.h>
     11#include <linux/string.h>
     12#include <linux/slab.h>
     13#include <linux/hardirq.h>
     14#include <linux/preempt.h>
     15#include <linux/extable.h>
     16#include <linux/kdebug.h>
     17#include <linux/kallsyms.h>
     18#include <linux/ftrace.h>
     19#include <linux/objtool.h>
     20#include <linux/pgtable.h>
     21#include <linux/static_call.h>
     22
     23#include <asm/text-patching.h>
     24#include <asm/cacheflush.h>
     25#include <asm/desc.h>
     26#include <linux/uaccess.h>
     27#include <asm/alternative.h>
     28#include <asm/insn.h>
     29#include <asm/debugreg.h>
     30#include <asm/set_memory.h>
     31#include <asm/sections.h>
     32#include <asm/nospec-branch.h>
     33
     34#include "common.h"
     35
     36unsigned long __recover_optprobed_insn(kprobe_opcode_t *buf, unsigned long addr)
     37{
     38	struct optimized_kprobe *op;
     39	struct kprobe *kp;
     40	long offs;
     41	int i;
     42
     43	for (i = 0; i < JMP32_INSN_SIZE; i++) {
     44		kp = get_kprobe((void *)addr - i);
     45		/* This function only handles jump-optimized kprobe */
     46		if (kp && kprobe_optimized(kp)) {
     47			op = container_of(kp, struct optimized_kprobe, kp);
     48			/* If op->list is not empty, op is under optimizing */
     49			if (list_empty(&op->list))
     50				goto found;
     51		}
     52	}
     53
     54	return addr;
     55found:
     56	/*
     57	 * If the kprobe can be optimized, original bytes which can be
     58	 * overwritten by jump destination address. In this case, original
     59	 * bytes must be recovered from op->optinsn.copied_insn buffer.
     60	 */
     61	if (copy_from_kernel_nofault(buf, (void *)addr,
     62		MAX_INSN_SIZE * sizeof(kprobe_opcode_t)))
     63		return 0UL;
     64
     65	if (addr == (unsigned long)kp->addr) {
     66		buf[0] = kp->opcode;
     67		memcpy(buf + 1, op->optinsn.copied_insn, DISP32_SIZE);
     68	} else {
     69		offs = addr - (unsigned long)kp->addr - 1;
     70		memcpy(buf, op->optinsn.copied_insn + offs, DISP32_SIZE - offs);
     71	}
     72
     73	return (unsigned long)buf;
     74}
     75
     76static void synthesize_clac(kprobe_opcode_t *addr)
     77{
     78	/*
     79	 * Can't be static_cpu_has() due to how objtool treats this feature bit.
     80	 * This isn't a fast path anyway.
     81	 */
     82	if (!boot_cpu_has(X86_FEATURE_SMAP))
     83		return;
     84
     85	/* Replace the NOP3 with CLAC */
     86	addr[0] = 0x0f;
     87	addr[1] = 0x01;
     88	addr[2] = 0xca;
     89}
     90
     91/* Insert a move instruction which sets a pointer to eax/rdi (1st arg). */
     92static void synthesize_set_arg1(kprobe_opcode_t *addr, unsigned long val)
     93{
     94#ifdef CONFIG_X86_64
     95	*addr++ = 0x48;
     96	*addr++ = 0xbf;
     97#else
     98	*addr++ = 0xb8;
     99#endif
    100	*(unsigned long *)addr = val;
    101}
    102
    103asm (
    104			".pushsection .rodata\n"
    105			"optprobe_template_func:\n"
    106			".global optprobe_template_entry\n"
    107			"optprobe_template_entry:\n"
    108#ifdef CONFIG_X86_64
    109			"       pushq $" __stringify(__KERNEL_DS) "\n"
    110			/* Save the 'sp - 8', this will be fixed later. */
    111			"	pushq %rsp\n"
    112			"	pushfq\n"
    113			".global optprobe_template_clac\n"
    114			"optprobe_template_clac:\n"
    115			ASM_NOP3
    116			SAVE_REGS_STRING
    117			"	movq %rsp, %rsi\n"
    118			".global optprobe_template_val\n"
    119			"optprobe_template_val:\n"
    120			ASM_NOP5
    121			ASM_NOP5
    122			".global optprobe_template_call\n"
    123			"optprobe_template_call:\n"
    124			ASM_NOP5
    125			/* Copy 'regs->flags' into 'regs->ss'. */
    126			"	movq 18*8(%rsp), %rdx\n"
    127			"	movq %rdx, 20*8(%rsp)\n"
    128			RESTORE_REGS_STRING
    129			/* Skip 'regs->flags' and 'regs->sp'. */
    130			"	addq $16, %rsp\n"
    131			/* And pop flags register from 'regs->ss'. */
    132			"	popfq\n"
    133#else /* CONFIG_X86_32 */
    134			"	pushl %ss\n"
    135			/* Save the 'sp - 4', this will be fixed later. */
    136			"	pushl %esp\n"
    137			"	pushfl\n"
    138			".global optprobe_template_clac\n"
    139			"optprobe_template_clac:\n"
    140			ASM_NOP3
    141			SAVE_REGS_STRING
    142			"	movl %esp, %edx\n"
    143			".global optprobe_template_val\n"
    144			"optprobe_template_val:\n"
    145			ASM_NOP5
    146			".global optprobe_template_call\n"
    147			"optprobe_template_call:\n"
    148			ASM_NOP5
    149			/* Copy 'regs->flags' into 'regs->ss'. */
    150			"	movl 14*4(%esp), %edx\n"
    151			"	movl %edx, 16*4(%esp)\n"
    152			RESTORE_REGS_STRING
    153			/* Skip 'regs->flags' and 'regs->sp'. */
    154			"	addl $8, %esp\n"
    155			/* And pop flags register from 'regs->ss'. */
    156			"	popfl\n"
    157#endif
    158			".global optprobe_template_end\n"
    159			"optprobe_template_end:\n"
    160			".popsection\n");
    161
    162void optprobe_template_func(void);
    163STACK_FRAME_NON_STANDARD(optprobe_template_func);
    164
    165#define TMPL_CLAC_IDX \
    166	((long)optprobe_template_clac - (long)optprobe_template_entry)
    167#define TMPL_MOVE_IDX \
    168	((long)optprobe_template_val - (long)optprobe_template_entry)
    169#define TMPL_CALL_IDX \
    170	((long)optprobe_template_call - (long)optprobe_template_entry)
    171#define TMPL_END_IDX \
    172	((long)optprobe_template_end - (long)optprobe_template_entry)
    173
    174/* Optimized kprobe call back function: called from optinsn */
    175static void
    176optimized_callback(struct optimized_kprobe *op, struct pt_regs *regs)
    177{
    178	/* This is possible if op is under delayed unoptimizing */
    179	if (kprobe_disabled(&op->kp))
    180		return;
    181
    182	preempt_disable();
    183	if (kprobe_running()) {
    184		kprobes_inc_nmissed_count(&op->kp);
    185	} else {
    186		struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
    187		/* Adjust stack pointer */
    188		regs->sp += sizeof(long);
    189		/* Save skipped registers */
    190		regs->cs = __KERNEL_CS;
    191#ifdef CONFIG_X86_32
    192		regs->gs = 0;
    193#endif
    194		regs->ip = (unsigned long)op->kp.addr + INT3_INSN_SIZE;
    195		regs->orig_ax = ~0UL;
    196
    197		__this_cpu_write(current_kprobe, &op->kp);
    198		kcb->kprobe_status = KPROBE_HIT_ACTIVE;
    199		opt_pre_handler(&op->kp, regs);
    200		__this_cpu_write(current_kprobe, NULL);
    201	}
    202	preempt_enable();
    203}
    204NOKPROBE_SYMBOL(optimized_callback);
    205
    206static int copy_optimized_instructions(u8 *dest, u8 *src, u8 *real)
    207{
    208	struct insn insn;
    209	int len = 0, ret;
    210
    211	while (len < JMP32_INSN_SIZE) {
    212		ret = __copy_instruction(dest + len, src + len, real + len, &insn);
    213		if (!ret || !can_boost(&insn, src + len))
    214			return -EINVAL;
    215		len += ret;
    216	}
    217	/* Check whether the address range is reserved */
    218	if (ftrace_text_reserved(src, src + len - 1) ||
    219	    alternatives_text_reserved(src, src + len - 1) ||
    220	    jump_label_text_reserved(src, src + len - 1) ||
    221	    static_call_text_reserved(src, src + len - 1))
    222		return -EBUSY;
    223
    224	return len;
    225}
    226
    227/* Check whether insn is indirect jump */
    228static int __insn_is_indirect_jump(struct insn *insn)
    229{
    230	return ((insn->opcode.bytes[0] == 0xff &&
    231		(X86_MODRM_REG(insn->modrm.value) & 6) == 4) || /* Jump */
    232		insn->opcode.bytes[0] == 0xea);	/* Segment based jump */
    233}
    234
    235/* Check whether insn jumps into specified address range */
    236static int insn_jump_into_range(struct insn *insn, unsigned long start, int len)
    237{
    238	unsigned long target = 0;
    239
    240	switch (insn->opcode.bytes[0]) {
    241	case 0xe0:	/* loopne */
    242	case 0xe1:	/* loope */
    243	case 0xe2:	/* loop */
    244	case 0xe3:	/* jcxz */
    245	case 0xe9:	/* near relative jump */
    246	case 0xeb:	/* short relative jump */
    247		break;
    248	case 0x0f:
    249		if ((insn->opcode.bytes[1] & 0xf0) == 0x80) /* jcc near */
    250			break;
    251		return 0;
    252	default:
    253		if ((insn->opcode.bytes[0] & 0xf0) == 0x70) /* jcc short */
    254			break;
    255		return 0;
    256	}
    257	target = (unsigned long)insn->next_byte + insn->immediate.value;
    258
    259	return (start <= target && target <= start + len);
    260}
    261
    262static int insn_is_indirect_jump(struct insn *insn)
    263{
    264	int ret = __insn_is_indirect_jump(insn);
    265
    266#ifdef CONFIG_RETPOLINE
    267	/*
    268	 * Jump to x86_indirect_thunk_* is treated as an indirect jump.
    269	 * Note that even with CONFIG_RETPOLINE=y, the kernel compiled with
    270	 * older gcc may use indirect jump. So we add this check instead of
    271	 * replace indirect-jump check.
    272	 */
    273	if (!ret)
    274		ret = insn_jump_into_range(insn,
    275				(unsigned long)__indirect_thunk_start,
    276				(unsigned long)__indirect_thunk_end -
    277				(unsigned long)__indirect_thunk_start);
    278#endif
    279	return ret;
    280}
    281
    282static bool is_padding_int3(unsigned long addr, unsigned long eaddr)
    283{
    284	unsigned char ops;
    285
    286	for (; addr < eaddr; addr++) {
    287		if (get_kernel_nofault(ops, (void *)addr) < 0 ||
    288		    ops != INT3_INSN_OPCODE)
    289			return false;
    290	}
    291
    292	return true;
    293}
    294
    295/* Decode whole function to ensure any instructions don't jump into target */
    296static int can_optimize(unsigned long paddr)
    297{
    298	unsigned long addr, size = 0, offset = 0;
    299	struct insn insn;
    300	kprobe_opcode_t buf[MAX_INSN_SIZE];
    301
    302	/* Lookup symbol including addr */
    303	if (!kallsyms_lookup_size_offset(paddr, &size, &offset))
    304		return 0;
    305
    306	/*
    307	 * Do not optimize in the entry code due to the unstable
    308	 * stack handling and registers setup.
    309	 */
    310	if (((paddr >= (unsigned long)__entry_text_start) &&
    311	     (paddr <  (unsigned long)__entry_text_end)))
    312		return 0;
    313
    314	/* Check there is enough space for a relative jump. */
    315	if (size - offset < JMP32_INSN_SIZE)
    316		return 0;
    317
    318	/* Decode instructions */
    319	addr = paddr - offset;
    320	while (addr < paddr - offset + size) { /* Decode until function end */
    321		unsigned long recovered_insn;
    322		int ret;
    323
    324		if (search_exception_tables(addr))
    325			/*
    326			 * Since some fixup code will jumps into this function,
    327			 * we can't optimize kprobe in this function.
    328			 */
    329			return 0;
    330		recovered_insn = recover_probed_instruction(buf, addr);
    331		if (!recovered_insn)
    332			return 0;
    333
    334		ret = insn_decode_kernel(&insn, (void *)recovered_insn);
    335		if (ret < 0)
    336			return 0;
    337
    338		/*
    339		 * In the case of detecting unknown breakpoint, this could be
    340		 * a padding INT3 between functions. Let's check that all the
    341		 * rest of the bytes are also INT3.
    342		 */
    343		if (insn.opcode.bytes[0] == INT3_INSN_OPCODE)
    344			return is_padding_int3(addr, paddr - offset + size) ? 1 : 0;
    345
    346		/* Recover address */
    347		insn.kaddr = (void *)addr;
    348		insn.next_byte = (void *)(addr + insn.length);
    349		/* Check any instructions don't jump into target */
    350		if (insn_is_indirect_jump(&insn) ||
    351		    insn_jump_into_range(&insn, paddr + INT3_INSN_SIZE,
    352					 DISP32_SIZE))
    353			return 0;
    354		addr += insn.length;
    355	}
    356
    357	return 1;
    358}
    359
    360/* Check optimized_kprobe can actually be optimized. */
    361int arch_check_optimized_kprobe(struct optimized_kprobe *op)
    362{
    363	int i;
    364	struct kprobe *p;
    365
    366	for (i = 1; i < op->optinsn.size; i++) {
    367		p = get_kprobe(op->kp.addr + i);
    368		if (p && !kprobe_disabled(p))
    369			return -EEXIST;
    370	}
    371
    372	return 0;
    373}
    374
    375/* Check the addr is within the optimized instructions. */
    376int arch_within_optimized_kprobe(struct optimized_kprobe *op,
    377				 kprobe_opcode_t *addr)
    378{
    379	return (op->kp.addr <= addr &&
    380		op->kp.addr + op->optinsn.size > addr);
    381}
    382
    383/* Free optimized instruction slot */
    384static
    385void __arch_remove_optimized_kprobe(struct optimized_kprobe *op, int dirty)
    386{
    387	u8 *slot = op->optinsn.insn;
    388	if (slot) {
    389		int len = TMPL_END_IDX + op->optinsn.size + JMP32_INSN_SIZE;
    390
    391		/* Record the perf event before freeing the slot */
    392		if (dirty)
    393			perf_event_text_poke(slot, slot, len, NULL, 0);
    394
    395		free_optinsn_slot(slot, dirty);
    396		op->optinsn.insn = NULL;
    397		op->optinsn.size = 0;
    398	}
    399}
    400
    401void arch_remove_optimized_kprobe(struct optimized_kprobe *op)
    402{
    403	__arch_remove_optimized_kprobe(op, 1);
    404}
    405
    406/*
    407 * Copy replacing target instructions
    408 * Target instructions MUST be relocatable (checked inside)
    409 * This is called when new aggr(opt)probe is allocated or reused.
    410 */
    411int arch_prepare_optimized_kprobe(struct optimized_kprobe *op,
    412				  struct kprobe *__unused)
    413{
    414	u8 *buf = NULL, *slot;
    415	int ret, len;
    416	long rel;
    417
    418	if (!can_optimize((unsigned long)op->kp.addr))
    419		return -EILSEQ;
    420
    421	buf = kzalloc(MAX_OPTINSN_SIZE, GFP_KERNEL);
    422	if (!buf)
    423		return -ENOMEM;
    424
    425	op->optinsn.insn = slot = get_optinsn_slot();
    426	if (!slot) {
    427		ret = -ENOMEM;
    428		goto out;
    429	}
    430
    431	/*
    432	 * Verify if the address gap is in 2GB range, because this uses
    433	 * a relative jump.
    434	 */
    435	rel = (long)slot - (long)op->kp.addr + JMP32_INSN_SIZE;
    436	if (abs(rel) > 0x7fffffff) {
    437		ret = -ERANGE;
    438		goto err;
    439	}
    440
    441	/* Copy arch-dep-instance from template */
    442	memcpy(buf, optprobe_template_entry, TMPL_END_IDX);
    443
    444	/* Copy instructions into the out-of-line buffer */
    445	ret = copy_optimized_instructions(buf + TMPL_END_IDX, op->kp.addr,
    446					  slot + TMPL_END_IDX);
    447	if (ret < 0)
    448		goto err;
    449	op->optinsn.size = ret;
    450	len = TMPL_END_IDX + op->optinsn.size;
    451
    452	synthesize_clac(buf + TMPL_CLAC_IDX);
    453
    454	/* Set probe information */
    455	synthesize_set_arg1(buf + TMPL_MOVE_IDX, (unsigned long)op);
    456
    457	/* Set probe function call */
    458	synthesize_relcall(buf + TMPL_CALL_IDX,
    459			   slot + TMPL_CALL_IDX, optimized_callback);
    460
    461	/* Set returning jmp instruction at the tail of out-of-line buffer */
    462	synthesize_reljump(buf + len, slot + len,
    463			   (u8 *)op->kp.addr + op->optinsn.size);
    464	len += JMP32_INSN_SIZE;
    465
    466	/*
    467	 * Note	len = TMPL_END_IDX + op->optinsn.size + JMP32_INSN_SIZE is also
    468	 * used in __arch_remove_optimized_kprobe().
    469	 */
    470
    471	/* We have to use text_poke() for instruction buffer because it is RO */
    472	perf_event_text_poke(slot, NULL, 0, buf, len);
    473	text_poke(slot, buf, len);
    474
    475	ret = 0;
    476out:
    477	kfree(buf);
    478	return ret;
    479
    480err:
    481	__arch_remove_optimized_kprobe(op, 0);
    482	goto out;
    483}
    484
    485/*
    486 * Replace breakpoints (INT3) with relative jumps (JMP.d32).
    487 * Caller must call with locking kprobe_mutex and text_mutex.
    488 *
    489 * The caller will have installed a regular kprobe and after that issued
    490 * syncrhonize_rcu_tasks(), this ensures that the instruction(s) that live in
    491 * the 4 bytes after the INT3 are unused and can now be overwritten.
    492 */
    493void arch_optimize_kprobes(struct list_head *oplist)
    494{
    495	struct optimized_kprobe *op, *tmp;
    496	u8 insn_buff[JMP32_INSN_SIZE];
    497
    498	list_for_each_entry_safe(op, tmp, oplist, list) {
    499		s32 rel = (s32)((long)op->optinsn.insn -
    500			((long)op->kp.addr + JMP32_INSN_SIZE));
    501
    502		WARN_ON(kprobe_disabled(&op->kp));
    503
    504		/* Backup instructions which will be replaced by jump address */
    505		memcpy(op->optinsn.copied_insn, op->kp.addr + INT3_INSN_SIZE,
    506		       DISP32_SIZE);
    507
    508		insn_buff[0] = JMP32_INSN_OPCODE;
    509		*(s32 *)(&insn_buff[1]) = rel;
    510
    511		text_poke_bp(op->kp.addr, insn_buff, JMP32_INSN_SIZE, NULL);
    512
    513		list_del_init(&op->list);
    514	}
    515}
    516
    517/*
    518 * Replace a relative jump (JMP.d32) with a breakpoint (INT3).
    519 *
    520 * After that, we can restore the 4 bytes after the INT3 to undo what
    521 * arch_optimize_kprobes() scribbled. This is safe since those bytes will be
    522 * unused once the INT3 lands.
    523 */
    524void arch_unoptimize_kprobe(struct optimized_kprobe *op)
    525{
    526	u8 new[JMP32_INSN_SIZE] = { INT3_INSN_OPCODE, };
    527	u8 old[JMP32_INSN_SIZE];
    528	u8 *addr = op->kp.addr;
    529
    530	memcpy(old, op->kp.addr, JMP32_INSN_SIZE);
    531	memcpy(new + INT3_INSN_SIZE,
    532	       op->optinsn.copied_insn,
    533	       JMP32_INSN_SIZE - INT3_INSN_SIZE);
    534
    535	text_poke(addr, new, INT3_INSN_SIZE);
    536	text_poke_sync();
    537	text_poke(addr + INT3_INSN_SIZE,
    538		  new + INT3_INSN_SIZE,
    539		  JMP32_INSN_SIZE - INT3_INSN_SIZE);
    540	text_poke_sync();
    541
    542	perf_event_text_poke(op->kp.addr, old, JMP32_INSN_SIZE, new, JMP32_INSN_SIZE);
    543}
    544
    545/*
    546 * Recover original instructions and breakpoints from relative jumps.
    547 * Caller must call with locking kprobe_mutex.
    548 */
    549extern void arch_unoptimize_kprobes(struct list_head *oplist,
    550				    struct list_head *done_list)
    551{
    552	struct optimized_kprobe *op, *tmp;
    553
    554	list_for_each_entry_safe(op, tmp, oplist, list) {
    555		arch_unoptimize_kprobe(op);
    556		list_move(&op->list, done_list);
    557	}
    558}
    559
    560int setup_detour_execution(struct kprobe *p, struct pt_regs *regs, int reenter)
    561{
    562	struct optimized_kprobe *op;
    563
    564	if (p->flags & KPROBE_FLAG_OPTIMIZED) {
    565		/* This kprobe is really able to run optimized path. */
    566		op = container_of(p, struct optimized_kprobe, kp);
    567		/* Detour through copied instructions */
    568		regs->ip = (unsigned long)op->optinsn.insn + TMPL_END_IDX;
    569		if (!reenter)
    570			reset_current_kprobe();
    571		return 1;
    572	}
    573	return 0;
    574}
    575NOKPROBE_SYMBOL(setup_detour_execution);