cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

core.c (31336B)


      1// SPDX-License-Identifier: GPL-2.0-or-later
      2/*
      3 *  Kernel Probes (KProbes)
      4 *
      5 * Copyright (C) IBM Corporation, 2002, 2004
      6 *
      7 * 2002-Oct	Created by Vamsi Krishna S <vamsi_krishna@in.ibm.com> Kernel
      8 *		Probes initial implementation ( includes contributions from
      9 *		Rusty Russell).
     10 * 2004-July	Suparna Bhattacharya <suparna@in.ibm.com> added jumper probes
     11 *		interface to access function arguments.
     12 * 2004-Oct	Jim Keniston <jkenisto@us.ibm.com> and Prasanna S Panchamukhi
     13 *		<prasanna@in.ibm.com> adapted for x86_64 from i386.
     14 * 2005-Mar	Roland McGrath <roland@redhat.com>
     15 *		Fixed to handle %rip-relative addressing mode correctly.
     16 * 2005-May	Hien Nguyen <hien@us.ibm.com>, Jim Keniston
     17 *		<jkenisto@us.ibm.com> and Prasanna S Panchamukhi
     18 *		<prasanna@in.ibm.com> added function-return probes.
     19 * 2005-May	Rusty Lynch <rusty.lynch@intel.com>
     20 *		Added function return probes functionality
     21 * 2006-Feb	Masami Hiramatsu <hiramatu@sdl.hitachi.co.jp> added
     22 *		kprobe-booster and kretprobe-booster for i386.
     23 * 2007-Dec	Masami Hiramatsu <mhiramat@redhat.com> added kprobe-booster
     24 *		and kretprobe-booster for x86-64
     25 * 2007-Dec	Masami Hiramatsu <mhiramat@redhat.com>, Arjan van de Ven
     26 *		<arjan@infradead.org> and Jim Keniston <jkenisto@us.ibm.com>
     27 *		unified x86 kprobes code.
     28 */
     29#include <linux/kprobes.h>
     30#include <linux/ptrace.h>
     31#include <linux/string.h>
     32#include <linux/slab.h>
     33#include <linux/hardirq.h>
     34#include <linux/preempt.h>
     35#include <linux/sched/debug.h>
     36#include <linux/perf_event.h>
     37#include <linux/extable.h>
     38#include <linux/kdebug.h>
     39#include <linux/kallsyms.h>
     40#include <linux/ftrace.h>
     41#include <linux/kasan.h>
     42#include <linux/moduleloader.h>
     43#include <linux/objtool.h>
     44#include <linux/vmalloc.h>
     45#include <linux/pgtable.h>
     46
     47#include <asm/text-patching.h>
     48#include <asm/cacheflush.h>
     49#include <asm/desc.h>
     50#include <linux/uaccess.h>
     51#include <asm/alternative.h>
     52#include <asm/insn.h>
     53#include <asm/debugreg.h>
     54#include <asm/set_memory.h>
     55#include <asm/ibt.h>
     56
     57#include "common.h"
     58
     59DEFINE_PER_CPU(struct kprobe *, current_kprobe) = NULL;
     60DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk);
     61
     62#define stack_addr(regs) ((unsigned long *)regs->sp)
     63
     64#define W(row, b0, b1, b2, b3, b4, b5, b6, b7, b8, b9, ba, bb, bc, bd, be, bf)\
     65	(((b0##UL << 0x0)|(b1##UL << 0x1)|(b2##UL << 0x2)|(b3##UL << 0x3) |   \
     66	  (b4##UL << 0x4)|(b5##UL << 0x5)|(b6##UL << 0x6)|(b7##UL << 0x7) |   \
     67	  (b8##UL << 0x8)|(b9##UL << 0x9)|(ba##UL << 0xa)|(bb##UL << 0xb) |   \
     68	  (bc##UL << 0xc)|(bd##UL << 0xd)|(be##UL << 0xe)|(bf##UL << 0xf))    \
     69	 << (row % 32))
     70	/*
     71	 * Undefined/reserved opcodes, conditional jump, Opcode Extension
     72	 * Groups, and some special opcodes can not boost.
     73	 * This is non-const and volatile to keep gcc from statically
     74	 * optimizing it out, as variable_test_bit makes gcc think only
     75	 * *(unsigned long*) is used.
     76	 */
     77static volatile u32 twobyte_is_boostable[256 / 32] = {
     78	/*      0  1  2  3  4  5  6  7  8  9  a  b  c  d  e  f          */
     79	/*      ----------------------------------------------          */
     80	W(0x00, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0) | /* 00 */
     81	W(0x10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1) , /* 10 */
     82	W(0x20, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) | /* 20 */
     83	W(0x30, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) , /* 30 */
     84	W(0x40, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 40 */
     85	W(0x50, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) , /* 50 */
     86	W(0x60, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1) | /* 60 */
     87	W(0x70, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1) , /* 70 */
     88	W(0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) | /* 80 */
     89	W(0x90, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 90 */
     90	W(0xa0, 1, 1, 0, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 0, 1) | /* a0 */
     91	W(0xb0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1) , /* b0 */
     92	W(0xc0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1) | /* c0 */
     93	W(0xd0, 0, 1, 1, 1, 0, 1, 0, 0, 1, 1, 0, 1, 1, 1, 0, 1) , /* d0 */
     94	W(0xe0, 0, 1, 1, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 1, 0, 1) | /* e0 */
     95	W(0xf0, 0, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 0, 1, 1, 1, 0)   /* f0 */
     96	/*      -----------------------------------------------         */
     97	/*      0  1  2  3  4  5  6  7  8  9  a  b  c  d  e  f          */
     98};
     99#undef W
    100
    101struct kretprobe_blackpoint kretprobe_blacklist[] = {
    102	{"__switch_to", }, /* This function switches only current task, but
    103			      doesn't switch kernel stack.*/
    104	{NULL, NULL}	/* Terminator */
    105};
    106
    107const int kretprobe_blacklist_size = ARRAY_SIZE(kretprobe_blacklist);
    108
    109static nokprobe_inline void
    110__synthesize_relative_insn(void *dest, void *from, void *to, u8 op)
    111{
    112	struct __arch_relative_insn {
    113		u8 op;
    114		s32 raddr;
    115	} __packed *insn;
    116
    117	insn = (struct __arch_relative_insn *)dest;
    118	insn->raddr = (s32)((long)(to) - ((long)(from) + 5));
    119	insn->op = op;
    120}
    121
    122/* Insert a jump instruction at address 'from', which jumps to address 'to'.*/
    123void synthesize_reljump(void *dest, void *from, void *to)
    124{
    125	__synthesize_relative_insn(dest, from, to, JMP32_INSN_OPCODE);
    126}
    127NOKPROBE_SYMBOL(synthesize_reljump);
    128
    129/* Insert a call instruction at address 'from', which calls address 'to'.*/
    130void synthesize_relcall(void *dest, void *from, void *to)
    131{
    132	__synthesize_relative_insn(dest, from, to, CALL_INSN_OPCODE);
    133}
    134NOKPROBE_SYMBOL(synthesize_relcall);
    135
    136/*
    137 * Returns non-zero if INSN is boostable.
    138 * RIP relative instructions are adjusted at copying time in 64 bits mode
    139 */
    140int can_boost(struct insn *insn, void *addr)
    141{
    142	kprobe_opcode_t opcode;
    143	insn_byte_t prefix;
    144	int i;
    145
    146	if (search_exception_tables((unsigned long)addr))
    147		return 0;	/* Page fault may occur on this address. */
    148
    149	/* 2nd-byte opcode */
    150	if (insn->opcode.nbytes == 2)
    151		return test_bit(insn->opcode.bytes[1],
    152				(unsigned long *)twobyte_is_boostable);
    153
    154	if (insn->opcode.nbytes != 1)
    155		return 0;
    156
    157	for_each_insn_prefix(insn, i, prefix) {
    158		insn_attr_t attr;
    159
    160		attr = inat_get_opcode_attribute(prefix);
    161		/* Can't boost Address-size override prefix and CS override prefix */
    162		if (prefix == 0x2e || inat_is_address_size_prefix(attr))
    163			return 0;
    164	}
    165
    166	opcode = insn->opcode.bytes[0];
    167
    168	switch (opcode) {
    169	case 0x62:		/* bound */
    170	case 0x70 ... 0x7f:	/* Conditional jumps */
    171	case 0x9a:		/* Call far */
    172	case 0xc0 ... 0xc1:	/* Grp2 */
    173	case 0xcc ... 0xce:	/* software exceptions */
    174	case 0xd0 ... 0xd3:	/* Grp2 */
    175	case 0xd6:		/* (UD) */
    176	case 0xd8 ... 0xdf:	/* ESC */
    177	case 0xe0 ... 0xe3:	/* LOOP*, JCXZ */
    178	case 0xe8 ... 0xe9:	/* near Call, JMP */
    179	case 0xeb:		/* Short JMP */
    180	case 0xf0 ... 0xf4:	/* LOCK/REP, HLT */
    181	case 0xf6 ... 0xf7:	/* Grp3 */
    182	case 0xfe:		/* Grp4 */
    183		/* ... are not boostable */
    184		return 0;
    185	case 0xff:		/* Grp5 */
    186		/* Only indirect jmp is boostable */
    187		return X86_MODRM_REG(insn->modrm.bytes[0]) == 4;
    188	default:
    189		return 1;
    190	}
    191}
    192
    193static unsigned long
    194__recover_probed_insn(kprobe_opcode_t *buf, unsigned long addr)
    195{
    196	struct kprobe *kp;
    197	bool faddr;
    198
    199	kp = get_kprobe((void *)addr);
    200	faddr = ftrace_location(addr) == addr;
    201	/*
    202	 * Use the current code if it is not modified by Kprobe
    203	 * and it cannot be modified by ftrace.
    204	 */
    205	if (!kp && !faddr)
    206		return addr;
    207
    208	/*
    209	 * Basically, kp->ainsn.insn has an original instruction.
    210	 * However, RIP-relative instruction can not do single-stepping
    211	 * at different place, __copy_instruction() tweaks the displacement of
    212	 * that instruction. In that case, we can't recover the instruction
    213	 * from the kp->ainsn.insn.
    214	 *
    215	 * On the other hand, in case on normal Kprobe, kp->opcode has a copy
    216	 * of the first byte of the probed instruction, which is overwritten
    217	 * by int3. And the instruction at kp->addr is not modified by kprobes
    218	 * except for the first byte, we can recover the original instruction
    219	 * from it and kp->opcode.
    220	 *
    221	 * In case of Kprobes using ftrace, we do not have a copy of
    222	 * the original instruction. In fact, the ftrace location might
    223	 * be modified at anytime and even could be in an inconsistent state.
    224	 * Fortunately, we know that the original code is the ideal 5-byte
    225	 * long NOP.
    226	 */
    227	if (copy_from_kernel_nofault(buf, (void *)addr,
    228		MAX_INSN_SIZE * sizeof(kprobe_opcode_t)))
    229		return 0UL;
    230
    231	if (faddr)
    232		memcpy(buf, x86_nops[5], 5);
    233	else
    234		buf[0] = kp->opcode;
    235	return (unsigned long)buf;
    236}
    237
    238/*
    239 * Recover the probed instruction at addr for further analysis.
    240 * Caller must lock kprobes by kprobe_mutex, or disable preemption
    241 * for preventing to release referencing kprobes.
    242 * Returns zero if the instruction can not get recovered (or access failed).
    243 */
    244unsigned long recover_probed_instruction(kprobe_opcode_t *buf, unsigned long addr)
    245{
    246	unsigned long __addr;
    247
    248	__addr = __recover_optprobed_insn(buf, addr);
    249	if (__addr != addr)
    250		return __addr;
    251
    252	return __recover_probed_insn(buf, addr);
    253}
    254
    255/* Check if paddr is at an instruction boundary */
    256static int can_probe(unsigned long paddr)
    257{
    258	unsigned long addr, __addr, offset = 0;
    259	struct insn insn;
    260	kprobe_opcode_t buf[MAX_INSN_SIZE];
    261
    262	if (!kallsyms_lookup_size_offset(paddr, NULL, &offset))
    263		return 0;
    264
    265	/* Decode instructions */
    266	addr = paddr - offset;
    267	while (addr < paddr) {
    268		int ret;
    269
    270		/*
    271		 * Check if the instruction has been modified by another
    272		 * kprobe, in which case we replace the breakpoint by the
    273		 * original instruction in our buffer.
    274		 * Also, jump optimization will change the breakpoint to
    275		 * relative-jump. Since the relative-jump itself is
    276		 * normally used, we just go through if there is no kprobe.
    277		 */
    278		__addr = recover_probed_instruction(buf, addr);
    279		if (!__addr)
    280			return 0;
    281
    282		ret = insn_decode_kernel(&insn, (void *)__addr);
    283		if (ret < 0)
    284			return 0;
    285
    286		/*
    287		 * Another debugging subsystem might insert this breakpoint.
    288		 * In that case, we can't recover it.
    289		 */
    290		if (insn.opcode.bytes[0] == INT3_INSN_OPCODE)
    291			return 0;
    292		addr += insn.length;
    293	}
    294
    295	return (addr == paddr);
    296}
    297
    298/* If x86 supports IBT (ENDBR) it must be skipped. */
    299kprobe_opcode_t *arch_adjust_kprobe_addr(unsigned long addr, unsigned long offset,
    300					 bool *on_func_entry)
    301{
    302	if (is_endbr(*(u32 *)addr)) {
    303		*on_func_entry = !offset || offset == 4;
    304		if (*on_func_entry)
    305			offset = 4;
    306
    307	} else {
    308		*on_func_entry = !offset;
    309	}
    310
    311	return (kprobe_opcode_t *)(addr + offset);
    312}
    313
    314/*
    315 * Copy an instruction with recovering modified instruction by kprobes
    316 * and adjust the displacement if the instruction uses the %rip-relative
    317 * addressing mode. Note that since @real will be the final place of copied
    318 * instruction, displacement must be adjust by @real, not @dest.
    319 * This returns the length of copied instruction, or 0 if it has an error.
    320 */
    321int __copy_instruction(u8 *dest, u8 *src, u8 *real, struct insn *insn)
    322{
    323	kprobe_opcode_t buf[MAX_INSN_SIZE];
    324	unsigned long recovered_insn = recover_probed_instruction(buf, (unsigned long)src);
    325	int ret;
    326
    327	if (!recovered_insn || !insn)
    328		return 0;
    329
    330	/* This can access kernel text if given address is not recovered */
    331	if (copy_from_kernel_nofault(dest, (void *)recovered_insn,
    332			MAX_INSN_SIZE))
    333		return 0;
    334
    335	ret = insn_decode_kernel(insn, dest);
    336	if (ret < 0)
    337		return 0;
    338
    339	/* We can not probe force emulate prefixed instruction */
    340	if (insn_has_emulate_prefix(insn))
    341		return 0;
    342
    343	/* Another subsystem puts a breakpoint, failed to recover */
    344	if (insn->opcode.bytes[0] == INT3_INSN_OPCODE)
    345		return 0;
    346
    347	/* We should not singlestep on the exception masking instructions */
    348	if (insn_masking_exception(insn))
    349		return 0;
    350
    351#ifdef CONFIG_X86_64
    352	/* Only x86_64 has RIP relative instructions */
    353	if (insn_rip_relative(insn)) {
    354		s64 newdisp;
    355		u8 *disp;
    356		/*
    357		 * The copied instruction uses the %rip-relative addressing
    358		 * mode.  Adjust the displacement for the difference between
    359		 * the original location of this instruction and the location
    360		 * of the copy that will actually be run.  The tricky bit here
    361		 * is making sure that the sign extension happens correctly in
    362		 * this calculation, since we need a signed 32-bit result to
    363		 * be sign-extended to 64 bits when it's added to the %rip
    364		 * value and yield the same 64-bit result that the sign-
    365		 * extension of the original signed 32-bit displacement would
    366		 * have given.
    367		 */
    368		newdisp = (u8 *) src + (s64) insn->displacement.value
    369			  - (u8 *) real;
    370		if ((s64) (s32) newdisp != newdisp) {
    371			pr_err("Kprobes error: new displacement does not fit into s32 (%llx)\n", newdisp);
    372			return 0;
    373		}
    374		disp = (u8 *) dest + insn_offset_displacement(insn);
    375		*(s32 *) disp = (s32) newdisp;
    376	}
    377#endif
    378	return insn->length;
    379}
    380
    381/* Prepare reljump or int3 right after instruction */
    382static int prepare_singlestep(kprobe_opcode_t *buf, struct kprobe *p,
    383			      struct insn *insn)
    384{
    385	int len = insn->length;
    386
    387	if (!IS_ENABLED(CONFIG_PREEMPTION) &&
    388	    !p->post_handler && can_boost(insn, p->addr) &&
    389	    MAX_INSN_SIZE - len >= JMP32_INSN_SIZE) {
    390		/*
    391		 * These instructions can be executed directly if it
    392		 * jumps back to correct address.
    393		 */
    394		synthesize_reljump(buf + len, p->ainsn.insn + len,
    395				   p->addr + insn->length);
    396		len += JMP32_INSN_SIZE;
    397		p->ainsn.boostable = 1;
    398	} else {
    399		/* Otherwise, put an int3 for trapping singlestep */
    400		if (MAX_INSN_SIZE - len < INT3_INSN_SIZE)
    401			return -ENOSPC;
    402
    403		buf[len] = INT3_INSN_OPCODE;
    404		len += INT3_INSN_SIZE;
    405	}
    406
    407	return len;
    408}
    409
    410/* Make page to RO mode when allocate it */
    411void *alloc_insn_page(void)
    412{
    413	void *page;
    414
    415	page = module_alloc(PAGE_SIZE);
    416	if (!page)
    417		return NULL;
    418
    419	set_vm_flush_reset_perms(page);
    420	/*
    421	 * First make the page read-only, and only then make it executable to
    422	 * prevent it from being W+X in between.
    423	 */
    424	set_memory_ro((unsigned long)page, 1);
    425
    426	/*
    427	 * TODO: Once additional kernel code protection mechanisms are set, ensure
    428	 * that the page was not maliciously altered and it is still zeroed.
    429	 */
    430	set_memory_x((unsigned long)page, 1);
    431
    432	return page;
    433}
    434
    435/* Kprobe x86 instruction emulation - only regs->ip or IF flag modifiers */
    436
    437static void kprobe_emulate_ifmodifiers(struct kprobe *p, struct pt_regs *regs)
    438{
    439	switch (p->ainsn.opcode) {
    440	case 0xfa:	/* cli */
    441		regs->flags &= ~(X86_EFLAGS_IF);
    442		break;
    443	case 0xfb:	/* sti */
    444		regs->flags |= X86_EFLAGS_IF;
    445		break;
    446	case 0x9c:	/* pushf */
    447		int3_emulate_push(regs, regs->flags);
    448		break;
    449	case 0x9d:	/* popf */
    450		regs->flags = int3_emulate_pop(regs);
    451		break;
    452	}
    453	regs->ip = regs->ip - INT3_INSN_SIZE + p->ainsn.size;
    454}
    455NOKPROBE_SYMBOL(kprobe_emulate_ifmodifiers);
    456
    457static void kprobe_emulate_ret(struct kprobe *p, struct pt_regs *regs)
    458{
    459	int3_emulate_ret(regs);
    460}
    461NOKPROBE_SYMBOL(kprobe_emulate_ret);
    462
    463static void kprobe_emulate_call(struct kprobe *p, struct pt_regs *regs)
    464{
    465	unsigned long func = regs->ip - INT3_INSN_SIZE + p->ainsn.size;
    466
    467	func += p->ainsn.rel32;
    468	int3_emulate_call(regs, func);
    469}
    470NOKPROBE_SYMBOL(kprobe_emulate_call);
    471
    472static nokprobe_inline
    473void __kprobe_emulate_jmp(struct kprobe *p, struct pt_regs *regs, bool cond)
    474{
    475	unsigned long ip = regs->ip - INT3_INSN_SIZE + p->ainsn.size;
    476
    477	if (cond)
    478		ip += p->ainsn.rel32;
    479	int3_emulate_jmp(regs, ip);
    480}
    481
    482static void kprobe_emulate_jmp(struct kprobe *p, struct pt_regs *regs)
    483{
    484	__kprobe_emulate_jmp(p, regs, true);
    485}
    486NOKPROBE_SYMBOL(kprobe_emulate_jmp);
    487
    488static const unsigned long jcc_mask[6] = {
    489	[0] = X86_EFLAGS_OF,
    490	[1] = X86_EFLAGS_CF,
    491	[2] = X86_EFLAGS_ZF,
    492	[3] = X86_EFLAGS_CF | X86_EFLAGS_ZF,
    493	[4] = X86_EFLAGS_SF,
    494	[5] = X86_EFLAGS_PF,
    495};
    496
    497static void kprobe_emulate_jcc(struct kprobe *p, struct pt_regs *regs)
    498{
    499	bool invert = p->ainsn.jcc.type & 1;
    500	bool match;
    501
    502	if (p->ainsn.jcc.type < 0xc) {
    503		match = regs->flags & jcc_mask[p->ainsn.jcc.type >> 1];
    504	} else {
    505		match = ((regs->flags & X86_EFLAGS_SF) >> X86_EFLAGS_SF_BIT) ^
    506			((regs->flags & X86_EFLAGS_OF) >> X86_EFLAGS_OF_BIT);
    507		if (p->ainsn.jcc.type >= 0xe)
    508			match = match && (regs->flags & X86_EFLAGS_ZF);
    509	}
    510	__kprobe_emulate_jmp(p, regs, (match && !invert) || (!match && invert));
    511}
    512NOKPROBE_SYMBOL(kprobe_emulate_jcc);
    513
    514static void kprobe_emulate_loop(struct kprobe *p, struct pt_regs *regs)
    515{
    516	bool match;
    517
    518	if (p->ainsn.loop.type != 3) {	/* LOOP* */
    519		if (p->ainsn.loop.asize == 32)
    520			match = ((*(u32 *)&regs->cx)--) != 0;
    521#ifdef CONFIG_X86_64
    522		else if (p->ainsn.loop.asize == 64)
    523			match = ((*(u64 *)&regs->cx)--) != 0;
    524#endif
    525		else
    526			match = ((*(u16 *)&regs->cx)--) != 0;
    527	} else {			/* JCXZ */
    528		if (p->ainsn.loop.asize == 32)
    529			match = *(u32 *)(&regs->cx) == 0;
    530#ifdef CONFIG_X86_64
    531		else if (p->ainsn.loop.asize == 64)
    532			match = *(u64 *)(&regs->cx) == 0;
    533#endif
    534		else
    535			match = *(u16 *)(&regs->cx) == 0;
    536	}
    537
    538	if (p->ainsn.loop.type == 0)	/* LOOPNE */
    539		match = match && !(regs->flags & X86_EFLAGS_ZF);
    540	else if (p->ainsn.loop.type == 1)	/* LOOPE */
    541		match = match && (regs->flags & X86_EFLAGS_ZF);
    542
    543	__kprobe_emulate_jmp(p, regs, match);
    544}
    545NOKPROBE_SYMBOL(kprobe_emulate_loop);
    546
    547static const int addrmode_regoffs[] = {
    548	offsetof(struct pt_regs, ax),
    549	offsetof(struct pt_regs, cx),
    550	offsetof(struct pt_regs, dx),
    551	offsetof(struct pt_regs, bx),
    552	offsetof(struct pt_regs, sp),
    553	offsetof(struct pt_regs, bp),
    554	offsetof(struct pt_regs, si),
    555	offsetof(struct pt_regs, di),
    556#ifdef CONFIG_X86_64
    557	offsetof(struct pt_regs, r8),
    558	offsetof(struct pt_regs, r9),
    559	offsetof(struct pt_regs, r10),
    560	offsetof(struct pt_regs, r11),
    561	offsetof(struct pt_regs, r12),
    562	offsetof(struct pt_regs, r13),
    563	offsetof(struct pt_regs, r14),
    564	offsetof(struct pt_regs, r15),
    565#endif
    566};
    567
    568static void kprobe_emulate_call_indirect(struct kprobe *p, struct pt_regs *regs)
    569{
    570	unsigned long offs = addrmode_regoffs[p->ainsn.indirect.reg];
    571
    572	int3_emulate_call(regs, regs_get_register(regs, offs));
    573}
    574NOKPROBE_SYMBOL(kprobe_emulate_call_indirect);
    575
    576static void kprobe_emulate_jmp_indirect(struct kprobe *p, struct pt_regs *regs)
    577{
    578	unsigned long offs = addrmode_regoffs[p->ainsn.indirect.reg];
    579
    580	int3_emulate_jmp(regs, regs_get_register(regs, offs));
    581}
    582NOKPROBE_SYMBOL(kprobe_emulate_jmp_indirect);
    583
    584static int prepare_emulation(struct kprobe *p, struct insn *insn)
    585{
    586	insn_byte_t opcode = insn->opcode.bytes[0];
    587
    588	switch (opcode) {
    589	case 0xfa:		/* cli */
    590	case 0xfb:		/* sti */
    591	case 0x9c:		/* pushfl */
    592	case 0x9d:		/* popf/popfd */
    593		/*
    594		 * IF modifiers must be emulated since it will enable interrupt while
    595		 * int3 single stepping.
    596		 */
    597		p->ainsn.emulate_op = kprobe_emulate_ifmodifiers;
    598		p->ainsn.opcode = opcode;
    599		break;
    600	case 0xc2:	/* ret/lret */
    601	case 0xc3:
    602	case 0xca:
    603	case 0xcb:
    604		p->ainsn.emulate_op = kprobe_emulate_ret;
    605		break;
    606	case 0x9a:	/* far call absolute -- segment is not supported */
    607	case 0xea:	/* far jmp absolute -- segment is not supported */
    608	case 0xcc:	/* int3 */
    609	case 0xcf:	/* iret -- in-kernel IRET is not supported */
    610		return -EOPNOTSUPP;
    611		break;
    612	case 0xe8:	/* near call relative */
    613		p->ainsn.emulate_op = kprobe_emulate_call;
    614		if (insn->immediate.nbytes == 2)
    615			p->ainsn.rel32 = *(s16 *)&insn->immediate.value;
    616		else
    617			p->ainsn.rel32 = *(s32 *)&insn->immediate.value;
    618		break;
    619	case 0xeb:	/* short jump relative */
    620	case 0xe9:	/* near jump relative */
    621		p->ainsn.emulate_op = kprobe_emulate_jmp;
    622		if (insn->immediate.nbytes == 1)
    623			p->ainsn.rel32 = *(s8 *)&insn->immediate.value;
    624		else if (insn->immediate.nbytes == 2)
    625			p->ainsn.rel32 = *(s16 *)&insn->immediate.value;
    626		else
    627			p->ainsn.rel32 = *(s32 *)&insn->immediate.value;
    628		break;
    629	case 0x70 ... 0x7f:
    630		/* 1 byte conditional jump */
    631		p->ainsn.emulate_op = kprobe_emulate_jcc;
    632		p->ainsn.jcc.type = opcode & 0xf;
    633		p->ainsn.rel32 = *(char *)insn->immediate.bytes;
    634		break;
    635	case 0x0f:
    636		opcode = insn->opcode.bytes[1];
    637		if ((opcode & 0xf0) == 0x80) {
    638			/* 2 bytes Conditional Jump */
    639			p->ainsn.emulate_op = kprobe_emulate_jcc;
    640			p->ainsn.jcc.type = opcode & 0xf;
    641			if (insn->immediate.nbytes == 2)
    642				p->ainsn.rel32 = *(s16 *)&insn->immediate.value;
    643			else
    644				p->ainsn.rel32 = *(s32 *)&insn->immediate.value;
    645		} else if (opcode == 0x01 &&
    646			   X86_MODRM_REG(insn->modrm.bytes[0]) == 0 &&
    647			   X86_MODRM_MOD(insn->modrm.bytes[0]) == 3) {
    648			/* VM extensions - not supported */
    649			return -EOPNOTSUPP;
    650		}
    651		break;
    652	case 0xe0:	/* Loop NZ */
    653	case 0xe1:	/* Loop */
    654	case 0xe2:	/* Loop */
    655	case 0xe3:	/* J*CXZ */
    656		p->ainsn.emulate_op = kprobe_emulate_loop;
    657		p->ainsn.loop.type = opcode & 0x3;
    658		p->ainsn.loop.asize = insn->addr_bytes * 8;
    659		p->ainsn.rel32 = *(s8 *)&insn->immediate.value;
    660		break;
    661	case 0xff:
    662		/*
    663		 * Since the 0xff is an extended group opcode, the instruction
    664		 * is determined by the MOD/RM byte.
    665		 */
    666		opcode = insn->modrm.bytes[0];
    667		if ((opcode & 0x30) == 0x10) {
    668			if ((opcode & 0x8) == 0x8)
    669				return -EOPNOTSUPP;	/* far call */
    670			/* call absolute, indirect */
    671			p->ainsn.emulate_op = kprobe_emulate_call_indirect;
    672		} else if ((opcode & 0x30) == 0x20) {
    673			if ((opcode & 0x8) == 0x8)
    674				return -EOPNOTSUPP;	/* far jmp */
    675			/* jmp near absolute indirect */
    676			p->ainsn.emulate_op = kprobe_emulate_jmp_indirect;
    677		} else
    678			break;
    679
    680		if (insn->addr_bytes != sizeof(unsigned long))
    681			return -EOPNOTSUPP;	/* Don't support different size */
    682		if (X86_MODRM_MOD(opcode) != 3)
    683			return -EOPNOTSUPP;	/* TODO: support memory addressing */
    684
    685		p->ainsn.indirect.reg = X86_MODRM_RM(opcode);
    686#ifdef CONFIG_X86_64
    687		if (X86_REX_B(insn->rex_prefix.value))
    688			p->ainsn.indirect.reg += 8;
    689#endif
    690		break;
    691	default:
    692		break;
    693	}
    694	p->ainsn.size = insn->length;
    695
    696	return 0;
    697}
    698
    699static int arch_copy_kprobe(struct kprobe *p)
    700{
    701	struct insn insn;
    702	kprobe_opcode_t buf[MAX_INSN_SIZE];
    703	int ret, len;
    704
    705	/* Copy an instruction with recovering if other optprobe modifies it.*/
    706	len = __copy_instruction(buf, p->addr, p->ainsn.insn, &insn);
    707	if (!len)
    708		return -EINVAL;
    709
    710	/* Analyze the opcode and setup emulate functions */
    711	ret = prepare_emulation(p, &insn);
    712	if (ret < 0)
    713		return ret;
    714
    715	/* Add int3 for single-step or booster jmp */
    716	len = prepare_singlestep(buf, p, &insn);
    717	if (len < 0)
    718		return len;
    719
    720	/* Also, displacement change doesn't affect the first byte */
    721	p->opcode = buf[0];
    722
    723	p->ainsn.tp_len = len;
    724	perf_event_text_poke(p->ainsn.insn, NULL, 0, buf, len);
    725
    726	/* OK, write back the instruction(s) into ROX insn buffer */
    727	text_poke(p->ainsn.insn, buf, len);
    728
    729	return 0;
    730}
    731
    732int arch_prepare_kprobe(struct kprobe *p)
    733{
    734	int ret;
    735
    736	if (alternatives_text_reserved(p->addr, p->addr))
    737		return -EINVAL;
    738
    739	if (!can_probe((unsigned long)p->addr))
    740		return -EILSEQ;
    741
    742	memset(&p->ainsn, 0, sizeof(p->ainsn));
    743
    744	/* insn: must be on special executable page on x86. */
    745	p->ainsn.insn = get_insn_slot();
    746	if (!p->ainsn.insn)
    747		return -ENOMEM;
    748
    749	ret = arch_copy_kprobe(p);
    750	if (ret) {
    751		free_insn_slot(p->ainsn.insn, 0);
    752		p->ainsn.insn = NULL;
    753	}
    754
    755	return ret;
    756}
    757
    758void arch_arm_kprobe(struct kprobe *p)
    759{
    760	u8 int3 = INT3_INSN_OPCODE;
    761
    762	text_poke(p->addr, &int3, 1);
    763	text_poke_sync();
    764	perf_event_text_poke(p->addr, &p->opcode, 1, &int3, 1);
    765}
    766
    767void arch_disarm_kprobe(struct kprobe *p)
    768{
    769	u8 int3 = INT3_INSN_OPCODE;
    770
    771	perf_event_text_poke(p->addr, &int3, 1, &p->opcode, 1);
    772	text_poke(p->addr, &p->opcode, 1);
    773	text_poke_sync();
    774}
    775
    776void arch_remove_kprobe(struct kprobe *p)
    777{
    778	if (p->ainsn.insn) {
    779		/* Record the perf event before freeing the slot */
    780		perf_event_text_poke(p->ainsn.insn, p->ainsn.insn,
    781				     p->ainsn.tp_len, NULL, 0);
    782		free_insn_slot(p->ainsn.insn, p->ainsn.boostable);
    783		p->ainsn.insn = NULL;
    784	}
    785}
    786
    787static nokprobe_inline void
    788save_previous_kprobe(struct kprobe_ctlblk *kcb)
    789{
    790	kcb->prev_kprobe.kp = kprobe_running();
    791	kcb->prev_kprobe.status = kcb->kprobe_status;
    792	kcb->prev_kprobe.old_flags = kcb->kprobe_old_flags;
    793	kcb->prev_kprobe.saved_flags = kcb->kprobe_saved_flags;
    794}
    795
    796static nokprobe_inline void
    797restore_previous_kprobe(struct kprobe_ctlblk *kcb)
    798{
    799	__this_cpu_write(current_kprobe, kcb->prev_kprobe.kp);
    800	kcb->kprobe_status = kcb->prev_kprobe.status;
    801	kcb->kprobe_old_flags = kcb->prev_kprobe.old_flags;
    802	kcb->kprobe_saved_flags = kcb->prev_kprobe.saved_flags;
    803}
    804
    805static nokprobe_inline void
    806set_current_kprobe(struct kprobe *p, struct pt_regs *regs,
    807		   struct kprobe_ctlblk *kcb)
    808{
    809	__this_cpu_write(current_kprobe, p);
    810	kcb->kprobe_saved_flags = kcb->kprobe_old_flags
    811		= (regs->flags & X86_EFLAGS_IF);
    812}
    813
    814static void kprobe_post_process(struct kprobe *cur, struct pt_regs *regs,
    815			       struct kprobe_ctlblk *kcb)
    816{
    817	if ((kcb->kprobe_status != KPROBE_REENTER) && cur->post_handler) {
    818		kcb->kprobe_status = KPROBE_HIT_SSDONE;
    819		cur->post_handler(cur, regs, 0);
    820	}
    821
    822	/* Restore back the original saved kprobes variables and continue. */
    823	if (kcb->kprobe_status == KPROBE_REENTER)
    824		restore_previous_kprobe(kcb);
    825	else
    826		reset_current_kprobe();
    827}
    828NOKPROBE_SYMBOL(kprobe_post_process);
    829
    830static void setup_singlestep(struct kprobe *p, struct pt_regs *regs,
    831			     struct kprobe_ctlblk *kcb, int reenter)
    832{
    833	if (setup_detour_execution(p, regs, reenter))
    834		return;
    835
    836#if !defined(CONFIG_PREEMPTION)
    837	if (p->ainsn.boostable) {
    838		/* Boost up -- we can execute copied instructions directly */
    839		if (!reenter)
    840			reset_current_kprobe();
    841		/*
    842		 * Reentering boosted probe doesn't reset current_kprobe,
    843		 * nor set current_kprobe, because it doesn't use single
    844		 * stepping.
    845		 */
    846		regs->ip = (unsigned long)p->ainsn.insn;
    847		return;
    848	}
    849#endif
    850	if (reenter) {
    851		save_previous_kprobe(kcb);
    852		set_current_kprobe(p, regs, kcb);
    853		kcb->kprobe_status = KPROBE_REENTER;
    854	} else
    855		kcb->kprobe_status = KPROBE_HIT_SS;
    856
    857	if (p->ainsn.emulate_op) {
    858		p->ainsn.emulate_op(p, regs);
    859		kprobe_post_process(p, regs, kcb);
    860		return;
    861	}
    862
    863	/* Disable interrupt, and set ip register on trampoline */
    864	regs->flags &= ~X86_EFLAGS_IF;
    865	regs->ip = (unsigned long)p->ainsn.insn;
    866}
    867NOKPROBE_SYMBOL(setup_singlestep);
    868
    869/*
    870 * Called after single-stepping.  p->addr is the address of the
    871 * instruction whose first byte has been replaced by the "int3"
    872 * instruction.  To avoid the SMP problems that can occur when we
    873 * temporarily put back the original opcode to single-step, we
    874 * single-stepped a copy of the instruction.  The address of this
    875 * copy is p->ainsn.insn. We also doesn't use trap, but "int3" again
    876 * right after the copied instruction.
    877 * Different from the trap single-step, "int3" single-step can not
    878 * handle the instruction which changes the ip register, e.g. jmp,
    879 * call, conditional jmp, and the instructions which changes the IF
    880 * flags because interrupt must be disabled around the single-stepping.
    881 * Such instructions are software emulated, but others are single-stepped
    882 * using "int3".
    883 *
    884 * When the 2nd "int3" handled, the regs->ip and regs->flags needs to
    885 * be adjusted, so that we can resume execution on correct code.
    886 */
    887static void resume_singlestep(struct kprobe *p, struct pt_regs *regs,
    888			      struct kprobe_ctlblk *kcb)
    889{
    890	unsigned long copy_ip = (unsigned long)p->ainsn.insn;
    891	unsigned long orig_ip = (unsigned long)p->addr;
    892
    893	/* Restore saved interrupt flag and ip register */
    894	regs->flags |= kcb->kprobe_saved_flags;
    895	/* Note that regs->ip is executed int3 so must be a step back */
    896	regs->ip += (orig_ip - copy_ip) - INT3_INSN_SIZE;
    897}
    898NOKPROBE_SYMBOL(resume_singlestep);
    899
    900/*
    901 * We have reentered the kprobe_handler(), since another probe was hit while
    902 * within the handler. We save the original kprobes variables and just single
    903 * step on the instruction of the new probe without calling any user handlers.
    904 */
    905static int reenter_kprobe(struct kprobe *p, struct pt_regs *regs,
    906			  struct kprobe_ctlblk *kcb)
    907{
    908	switch (kcb->kprobe_status) {
    909	case KPROBE_HIT_SSDONE:
    910	case KPROBE_HIT_ACTIVE:
    911	case KPROBE_HIT_SS:
    912		kprobes_inc_nmissed_count(p);
    913		setup_singlestep(p, regs, kcb, 1);
    914		break;
    915	case KPROBE_REENTER:
    916		/* A probe has been hit in the codepath leading up to, or just
    917		 * after, single-stepping of a probed instruction. This entire
    918		 * codepath should strictly reside in .kprobes.text section.
    919		 * Raise a BUG or we'll continue in an endless reentering loop
    920		 * and eventually a stack overflow.
    921		 */
    922		pr_err("Unrecoverable kprobe detected.\n");
    923		dump_kprobe(p);
    924		BUG();
    925	default:
    926		/* impossible cases */
    927		WARN_ON(1);
    928		return 0;
    929	}
    930
    931	return 1;
    932}
    933NOKPROBE_SYMBOL(reenter_kprobe);
    934
    935static nokprobe_inline int kprobe_is_ss(struct kprobe_ctlblk *kcb)
    936{
    937	return (kcb->kprobe_status == KPROBE_HIT_SS ||
    938		kcb->kprobe_status == KPROBE_REENTER);
    939}
    940
    941/*
    942 * Interrupts are disabled on entry as trap3 is an interrupt gate and they
    943 * remain disabled throughout this function.
    944 */
    945int kprobe_int3_handler(struct pt_regs *regs)
    946{
    947	kprobe_opcode_t *addr;
    948	struct kprobe *p;
    949	struct kprobe_ctlblk *kcb;
    950
    951	if (user_mode(regs))
    952		return 0;
    953
    954	addr = (kprobe_opcode_t *)(regs->ip - sizeof(kprobe_opcode_t));
    955	/*
    956	 * We don't want to be preempted for the entire duration of kprobe
    957	 * processing. Since int3 and debug trap disables irqs and we clear
    958	 * IF while singlestepping, it must be no preemptible.
    959	 */
    960
    961	kcb = get_kprobe_ctlblk();
    962	p = get_kprobe(addr);
    963
    964	if (p) {
    965		if (kprobe_running()) {
    966			if (reenter_kprobe(p, regs, kcb))
    967				return 1;
    968		} else {
    969			set_current_kprobe(p, regs, kcb);
    970			kcb->kprobe_status = KPROBE_HIT_ACTIVE;
    971
    972			/*
    973			 * If we have no pre-handler or it returned 0, we
    974			 * continue with normal processing.  If we have a
    975			 * pre-handler and it returned non-zero, that means
    976			 * user handler setup registers to exit to another
    977			 * instruction, we must skip the single stepping.
    978			 */
    979			if (!p->pre_handler || !p->pre_handler(p, regs))
    980				setup_singlestep(p, regs, kcb, 0);
    981			else
    982				reset_current_kprobe();
    983			return 1;
    984		}
    985	} else if (kprobe_is_ss(kcb)) {
    986		p = kprobe_running();
    987		if ((unsigned long)p->ainsn.insn < regs->ip &&
    988		    (unsigned long)p->ainsn.insn + MAX_INSN_SIZE > regs->ip) {
    989			/* Most provably this is the second int3 for singlestep */
    990			resume_singlestep(p, regs, kcb);
    991			kprobe_post_process(p, regs, kcb);
    992			return 1;
    993		}
    994	}
    995
    996	if (*addr != INT3_INSN_OPCODE) {
    997		/*
    998		 * The breakpoint instruction was removed right
    999		 * after we hit it.  Another cpu has removed
   1000		 * either a probepoint or a debugger breakpoint
   1001		 * at this address.  In either case, no further
   1002		 * handling of this interrupt is appropriate.
   1003		 * Back up over the (now missing) int3 and run
   1004		 * the original instruction.
   1005		 */
   1006		regs->ip = (unsigned long)addr;
   1007		return 1;
   1008	} /* else: not a kprobe fault; let the kernel handle it */
   1009
   1010	return 0;
   1011}
   1012NOKPROBE_SYMBOL(kprobe_int3_handler);
   1013
   1014int kprobe_fault_handler(struct pt_regs *regs, int trapnr)
   1015{
   1016	struct kprobe *cur = kprobe_running();
   1017	struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
   1018
   1019	if (unlikely(regs->ip == (unsigned long)cur->ainsn.insn)) {
   1020		/* This must happen on single-stepping */
   1021		WARN_ON(kcb->kprobe_status != KPROBE_HIT_SS &&
   1022			kcb->kprobe_status != KPROBE_REENTER);
   1023		/*
   1024		 * We are here because the instruction being single
   1025		 * stepped caused a page fault. We reset the current
   1026		 * kprobe and the ip points back to the probe address
   1027		 * and allow the page fault handler to continue as a
   1028		 * normal page fault.
   1029		 */
   1030		regs->ip = (unsigned long)cur->addr;
   1031
   1032		/*
   1033		 * If the IF flag was set before the kprobe hit,
   1034		 * don't touch it:
   1035		 */
   1036		regs->flags |= kcb->kprobe_old_flags;
   1037
   1038		if (kcb->kprobe_status == KPROBE_REENTER)
   1039			restore_previous_kprobe(kcb);
   1040		else
   1041			reset_current_kprobe();
   1042	}
   1043
   1044	return 0;
   1045}
   1046NOKPROBE_SYMBOL(kprobe_fault_handler);
   1047
   1048int __init arch_populate_kprobe_blacklist(void)
   1049{
   1050	return kprobe_add_area_blacklist((unsigned long)__entry_text_start,
   1051					 (unsigned long)__entry_text_end);
   1052}
   1053
   1054int __init arch_init_kprobes(void)
   1055{
   1056	return 0;
   1057}
   1058
   1059int arch_trampoline_kprobe(struct kprobe *p)
   1060{
   1061	return 0;
   1062}