cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

kprobes.c (75238B)


      1// SPDX-License-Identifier: GPL-2.0-or-later
      2/*
      3 *  Kernel Probes (KProbes)
      4 *
      5 * Copyright (C) IBM Corporation, 2002, 2004
      6 *
      7 * 2002-Oct	Created by Vamsi Krishna S <vamsi_krishna@in.ibm.com> Kernel
      8 *		Probes initial implementation (includes suggestions from
      9 *		Rusty Russell).
     10 * 2004-Aug	Updated by Prasanna S Panchamukhi <prasanna@in.ibm.com> with
     11 *		hlists and exceptions notifier as suggested by Andi Kleen.
     12 * 2004-July	Suparna Bhattacharya <suparna@in.ibm.com> added jumper probes
     13 *		interface to access function arguments.
     14 * 2004-Sep	Prasanna S Panchamukhi <prasanna@in.ibm.com> Changed Kprobes
     15 *		exceptions notifier to be first on the priority list.
     16 * 2005-May	Hien Nguyen <hien@us.ibm.com>, Jim Keniston
     17 *		<jkenisto@us.ibm.com> and Prasanna S Panchamukhi
     18 *		<prasanna@in.ibm.com> added function-return probes.
     19 */
     20
     21#define pr_fmt(fmt) "kprobes: " fmt
     22
     23#include <linux/kprobes.h>
     24#include <linux/hash.h>
     25#include <linux/init.h>
     26#include <linux/slab.h>
     27#include <linux/stddef.h>
     28#include <linux/export.h>
     29#include <linux/moduleloader.h>
     30#include <linux/kallsyms.h>
     31#include <linux/freezer.h>
     32#include <linux/seq_file.h>
     33#include <linux/debugfs.h>
     34#include <linux/sysctl.h>
     35#include <linux/kdebug.h>
     36#include <linux/memory.h>
     37#include <linux/ftrace.h>
     38#include <linux/cpu.h>
     39#include <linux/jump_label.h>
     40#include <linux/static_call.h>
     41#include <linux/perf_event.h>
     42
     43#include <asm/sections.h>
     44#include <asm/cacheflush.h>
     45#include <asm/errno.h>
     46#include <linux/uaccess.h>
     47
     48#define KPROBE_HASH_BITS 6
     49#define KPROBE_TABLE_SIZE (1 << KPROBE_HASH_BITS)
     50
     51#if !defined(CONFIG_OPTPROBES) || !defined(CONFIG_SYSCTL)
     52#define kprobe_sysctls_init() do { } while (0)
     53#endif
     54
     55static int kprobes_initialized;
     56/* kprobe_table can be accessed by
     57 * - Normal hlist traversal and RCU add/del under 'kprobe_mutex' is held.
     58 * Or
     59 * - RCU hlist traversal under disabling preempt (breakpoint handlers)
     60 */
     61static struct hlist_head kprobe_table[KPROBE_TABLE_SIZE];
     62
     63/* NOTE: change this value only with 'kprobe_mutex' held */
     64static bool kprobes_all_disarmed;
     65
     66/* This protects 'kprobe_table' and 'optimizing_list' */
     67static DEFINE_MUTEX(kprobe_mutex);
     68static DEFINE_PER_CPU(struct kprobe *, kprobe_instance);
     69
     70kprobe_opcode_t * __weak kprobe_lookup_name(const char *name,
     71					unsigned int __unused)
     72{
     73	return ((kprobe_opcode_t *)(kallsyms_lookup_name(name)));
     74}
     75
     76/*
     77 * Blacklist -- list of 'struct kprobe_blacklist_entry' to store info where
     78 * kprobes can not probe.
     79 */
     80static LIST_HEAD(kprobe_blacklist);
     81
     82#ifdef __ARCH_WANT_KPROBES_INSN_SLOT
     83/*
     84 * 'kprobe::ainsn.insn' points to the copy of the instruction to be
     85 * single-stepped. x86_64, POWER4 and above have no-exec support and
     86 * stepping on the instruction on a vmalloced/kmalloced/data page
     87 * is a recipe for disaster
     88 */
     89struct kprobe_insn_page {
     90	struct list_head list;
     91	kprobe_opcode_t *insns;		/* Page of instruction slots */
     92	struct kprobe_insn_cache *cache;
     93	int nused;
     94	int ngarbage;
     95	char slot_used[];
     96};
     97
     98#define KPROBE_INSN_PAGE_SIZE(slots)			\
     99	(offsetof(struct kprobe_insn_page, slot_used) +	\
    100	 (sizeof(char) * (slots)))
    101
    102static int slots_per_page(struct kprobe_insn_cache *c)
    103{
    104	return PAGE_SIZE/(c->insn_size * sizeof(kprobe_opcode_t));
    105}
    106
    107enum kprobe_slot_state {
    108	SLOT_CLEAN = 0,
    109	SLOT_DIRTY = 1,
    110	SLOT_USED = 2,
    111};
    112
    113void __weak *alloc_insn_page(void)
    114{
    115	/*
    116	 * Use module_alloc() so this page is within +/- 2GB of where the
    117	 * kernel image and loaded module images reside. This is required
    118	 * for most of the architectures.
    119	 * (e.g. x86-64 needs this to handle the %rip-relative fixups.)
    120	 */
    121	return module_alloc(PAGE_SIZE);
    122}
    123
    124static void free_insn_page(void *page)
    125{
    126	module_memfree(page);
    127}
    128
    129struct kprobe_insn_cache kprobe_insn_slots = {
    130	.mutex = __MUTEX_INITIALIZER(kprobe_insn_slots.mutex),
    131	.alloc = alloc_insn_page,
    132	.free = free_insn_page,
    133	.sym = KPROBE_INSN_PAGE_SYM,
    134	.pages = LIST_HEAD_INIT(kprobe_insn_slots.pages),
    135	.insn_size = MAX_INSN_SIZE,
    136	.nr_garbage = 0,
    137};
    138static int collect_garbage_slots(struct kprobe_insn_cache *c);
    139
    140/**
    141 * __get_insn_slot() - Find a slot on an executable page for an instruction.
    142 * We allocate an executable page if there's no room on existing ones.
    143 */
    144kprobe_opcode_t *__get_insn_slot(struct kprobe_insn_cache *c)
    145{
    146	struct kprobe_insn_page *kip;
    147	kprobe_opcode_t *slot = NULL;
    148
    149	/* Since the slot array is not protected by rcu, we need a mutex */
    150	mutex_lock(&c->mutex);
    151 retry:
    152	rcu_read_lock();
    153	list_for_each_entry_rcu(kip, &c->pages, list) {
    154		if (kip->nused < slots_per_page(c)) {
    155			int i;
    156
    157			for (i = 0; i < slots_per_page(c); i++) {
    158				if (kip->slot_used[i] == SLOT_CLEAN) {
    159					kip->slot_used[i] = SLOT_USED;
    160					kip->nused++;
    161					slot = kip->insns + (i * c->insn_size);
    162					rcu_read_unlock();
    163					goto out;
    164				}
    165			}
    166			/* kip->nused is broken. Fix it. */
    167			kip->nused = slots_per_page(c);
    168			WARN_ON(1);
    169		}
    170	}
    171	rcu_read_unlock();
    172
    173	/* If there are any garbage slots, collect it and try again. */
    174	if (c->nr_garbage && collect_garbage_slots(c) == 0)
    175		goto retry;
    176
    177	/* All out of space.  Need to allocate a new page. */
    178	kip = kmalloc(KPROBE_INSN_PAGE_SIZE(slots_per_page(c)), GFP_KERNEL);
    179	if (!kip)
    180		goto out;
    181
    182	kip->insns = c->alloc();
    183	if (!kip->insns) {
    184		kfree(kip);
    185		goto out;
    186	}
    187	INIT_LIST_HEAD(&kip->list);
    188	memset(kip->slot_used, SLOT_CLEAN, slots_per_page(c));
    189	kip->slot_used[0] = SLOT_USED;
    190	kip->nused = 1;
    191	kip->ngarbage = 0;
    192	kip->cache = c;
    193	list_add_rcu(&kip->list, &c->pages);
    194	slot = kip->insns;
    195
    196	/* Record the perf ksymbol register event after adding the page */
    197	perf_event_ksymbol(PERF_RECORD_KSYMBOL_TYPE_OOL, (unsigned long)kip->insns,
    198			   PAGE_SIZE, false, c->sym);
    199out:
    200	mutex_unlock(&c->mutex);
    201	return slot;
    202}
    203
    204/* Return true if all garbages are collected, otherwise false. */
    205static bool collect_one_slot(struct kprobe_insn_page *kip, int idx)
    206{
    207	kip->slot_used[idx] = SLOT_CLEAN;
    208	kip->nused--;
    209	if (kip->nused == 0) {
    210		/*
    211		 * Page is no longer in use.  Free it unless
    212		 * it's the last one.  We keep the last one
    213		 * so as not to have to set it up again the
    214		 * next time somebody inserts a probe.
    215		 */
    216		if (!list_is_singular(&kip->list)) {
    217			/*
    218			 * Record perf ksymbol unregister event before removing
    219			 * the page.
    220			 */
    221			perf_event_ksymbol(PERF_RECORD_KSYMBOL_TYPE_OOL,
    222					   (unsigned long)kip->insns, PAGE_SIZE, true,
    223					   kip->cache->sym);
    224			list_del_rcu(&kip->list);
    225			synchronize_rcu();
    226			kip->cache->free(kip->insns);
    227			kfree(kip);
    228		}
    229		return true;
    230	}
    231	return false;
    232}
    233
    234static int collect_garbage_slots(struct kprobe_insn_cache *c)
    235{
    236	struct kprobe_insn_page *kip, *next;
    237
    238	/* Ensure no-one is interrupted on the garbages */
    239	synchronize_rcu();
    240
    241	list_for_each_entry_safe(kip, next, &c->pages, list) {
    242		int i;
    243
    244		if (kip->ngarbage == 0)
    245			continue;
    246		kip->ngarbage = 0;	/* we will collect all garbages */
    247		for (i = 0; i < slots_per_page(c); i++) {
    248			if (kip->slot_used[i] == SLOT_DIRTY && collect_one_slot(kip, i))
    249				break;
    250		}
    251	}
    252	c->nr_garbage = 0;
    253	return 0;
    254}
    255
    256void __free_insn_slot(struct kprobe_insn_cache *c,
    257		      kprobe_opcode_t *slot, int dirty)
    258{
    259	struct kprobe_insn_page *kip;
    260	long idx;
    261
    262	mutex_lock(&c->mutex);
    263	rcu_read_lock();
    264	list_for_each_entry_rcu(kip, &c->pages, list) {
    265		idx = ((long)slot - (long)kip->insns) /
    266			(c->insn_size * sizeof(kprobe_opcode_t));
    267		if (idx >= 0 && idx < slots_per_page(c))
    268			goto out;
    269	}
    270	/* Could not find this slot. */
    271	WARN_ON(1);
    272	kip = NULL;
    273out:
    274	rcu_read_unlock();
    275	/* Mark and sweep: this may sleep */
    276	if (kip) {
    277		/* Check double free */
    278		WARN_ON(kip->slot_used[idx] != SLOT_USED);
    279		if (dirty) {
    280			kip->slot_used[idx] = SLOT_DIRTY;
    281			kip->ngarbage++;
    282			if (++c->nr_garbage > slots_per_page(c))
    283				collect_garbage_slots(c);
    284		} else {
    285			collect_one_slot(kip, idx);
    286		}
    287	}
    288	mutex_unlock(&c->mutex);
    289}
    290
    291/*
    292 * Check given address is on the page of kprobe instruction slots.
    293 * This will be used for checking whether the address on a stack
    294 * is on a text area or not.
    295 */
    296bool __is_insn_slot_addr(struct kprobe_insn_cache *c, unsigned long addr)
    297{
    298	struct kprobe_insn_page *kip;
    299	bool ret = false;
    300
    301	rcu_read_lock();
    302	list_for_each_entry_rcu(kip, &c->pages, list) {
    303		if (addr >= (unsigned long)kip->insns &&
    304		    addr < (unsigned long)kip->insns + PAGE_SIZE) {
    305			ret = true;
    306			break;
    307		}
    308	}
    309	rcu_read_unlock();
    310
    311	return ret;
    312}
    313
    314int kprobe_cache_get_kallsym(struct kprobe_insn_cache *c, unsigned int *symnum,
    315			     unsigned long *value, char *type, char *sym)
    316{
    317	struct kprobe_insn_page *kip;
    318	int ret = -ERANGE;
    319
    320	rcu_read_lock();
    321	list_for_each_entry_rcu(kip, &c->pages, list) {
    322		if ((*symnum)--)
    323			continue;
    324		strscpy(sym, c->sym, KSYM_NAME_LEN);
    325		*type = 't';
    326		*value = (unsigned long)kip->insns;
    327		ret = 0;
    328		break;
    329	}
    330	rcu_read_unlock();
    331
    332	return ret;
    333}
    334
    335#ifdef CONFIG_OPTPROBES
    336void __weak *alloc_optinsn_page(void)
    337{
    338	return alloc_insn_page();
    339}
    340
    341void __weak free_optinsn_page(void *page)
    342{
    343	free_insn_page(page);
    344}
    345
    346/* For optimized_kprobe buffer */
    347struct kprobe_insn_cache kprobe_optinsn_slots = {
    348	.mutex = __MUTEX_INITIALIZER(kprobe_optinsn_slots.mutex),
    349	.alloc = alloc_optinsn_page,
    350	.free = free_optinsn_page,
    351	.sym = KPROBE_OPTINSN_PAGE_SYM,
    352	.pages = LIST_HEAD_INIT(kprobe_optinsn_slots.pages),
    353	/* .insn_size is initialized later */
    354	.nr_garbage = 0,
    355};
    356#endif
    357#endif
    358
    359/* We have preemption disabled.. so it is safe to use __ versions */
    360static inline void set_kprobe_instance(struct kprobe *kp)
    361{
    362	__this_cpu_write(kprobe_instance, kp);
    363}
    364
    365static inline void reset_kprobe_instance(void)
    366{
    367	__this_cpu_write(kprobe_instance, NULL);
    368}
    369
    370/*
    371 * This routine is called either:
    372 *	- under the 'kprobe_mutex' - during kprobe_[un]register().
    373 *				OR
    374 *	- with preemption disabled - from architecture specific code.
    375 */
    376struct kprobe *get_kprobe(void *addr)
    377{
    378	struct hlist_head *head;
    379	struct kprobe *p;
    380
    381	head = &kprobe_table[hash_ptr(addr, KPROBE_HASH_BITS)];
    382	hlist_for_each_entry_rcu(p, head, hlist,
    383				 lockdep_is_held(&kprobe_mutex)) {
    384		if (p->addr == addr)
    385			return p;
    386	}
    387
    388	return NULL;
    389}
    390NOKPROBE_SYMBOL(get_kprobe);
    391
    392static int aggr_pre_handler(struct kprobe *p, struct pt_regs *regs);
    393
    394/* Return true if 'p' is an aggregator */
    395static inline bool kprobe_aggrprobe(struct kprobe *p)
    396{
    397	return p->pre_handler == aggr_pre_handler;
    398}
    399
    400/* Return true if 'p' is unused */
    401static inline bool kprobe_unused(struct kprobe *p)
    402{
    403	return kprobe_aggrprobe(p) && kprobe_disabled(p) &&
    404	       list_empty(&p->list);
    405}
    406
    407/* Keep all fields in the kprobe consistent. */
    408static inline void copy_kprobe(struct kprobe *ap, struct kprobe *p)
    409{
    410	memcpy(&p->opcode, &ap->opcode, sizeof(kprobe_opcode_t));
    411	memcpy(&p->ainsn, &ap->ainsn, sizeof(struct arch_specific_insn));
    412}
    413
    414#ifdef CONFIG_OPTPROBES
    415/* NOTE: This is protected by 'kprobe_mutex'. */
    416static bool kprobes_allow_optimization;
    417
    418/*
    419 * Call all 'kprobe::pre_handler' on the list, but ignores its return value.
    420 * This must be called from arch-dep optimized caller.
    421 */
    422void opt_pre_handler(struct kprobe *p, struct pt_regs *regs)
    423{
    424	struct kprobe *kp;
    425
    426	list_for_each_entry_rcu(kp, &p->list, list) {
    427		if (kp->pre_handler && likely(!kprobe_disabled(kp))) {
    428			set_kprobe_instance(kp);
    429			kp->pre_handler(kp, regs);
    430		}
    431		reset_kprobe_instance();
    432	}
    433}
    434NOKPROBE_SYMBOL(opt_pre_handler);
    435
    436/* Free optimized instructions and optimized_kprobe */
    437static void free_aggr_kprobe(struct kprobe *p)
    438{
    439	struct optimized_kprobe *op;
    440
    441	op = container_of(p, struct optimized_kprobe, kp);
    442	arch_remove_optimized_kprobe(op);
    443	arch_remove_kprobe(p);
    444	kfree(op);
    445}
    446
    447/* Return true if the kprobe is ready for optimization. */
    448static inline int kprobe_optready(struct kprobe *p)
    449{
    450	struct optimized_kprobe *op;
    451
    452	if (kprobe_aggrprobe(p)) {
    453		op = container_of(p, struct optimized_kprobe, kp);
    454		return arch_prepared_optinsn(&op->optinsn);
    455	}
    456
    457	return 0;
    458}
    459
    460/* Return true if the kprobe is disarmed. Note: p must be on hash list */
    461static inline bool kprobe_disarmed(struct kprobe *p)
    462{
    463	struct optimized_kprobe *op;
    464
    465	/* If kprobe is not aggr/opt probe, just return kprobe is disabled */
    466	if (!kprobe_aggrprobe(p))
    467		return kprobe_disabled(p);
    468
    469	op = container_of(p, struct optimized_kprobe, kp);
    470
    471	return kprobe_disabled(p) && list_empty(&op->list);
    472}
    473
    474/* Return true if the probe is queued on (un)optimizing lists */
    475static bool kprobe_queued(struct kprobe *p)
    476{
    477	struct optimized_kprobe *op;
    478
    479	if (kprobe_aggrprobe(p)) {
    480		op = container_of(p, struct optimized_kprobe, kp);
    481		if (!list_empty(&op->list))
    482			return true;
    483	}
    484	return false;
    485}
    486
    487/*
    488 * Return an optimized kprobe whose optimizing code replaces
    489 * instructions including 'addr' (exclude breakpoint).
    490 */
    491static struct kprobe *get_optimized_kprobe(kprobe_opcode_t *addr)
    492{
    493	int i;
    494	struct kprobe *p = NULL;
    495	struct optimized_kprobe *op;
    496
    497	/* Don't check i == 0, since that is a breakpoint case. */
    498	for (i = 1; !p && i < MAX_OPTIMIZED_LENGTH / sizeof(kprobe_opcode_t); i++)
    499		p = get_kprobe(addr - i);
    500
    501	if (p && kprobe_optready(p)) {
    502		op = container_of(p, struct optimized_kprobe, kp);
    503		if (arch_within_optimized_kprobe(op, addr))
    504			return p;
    505	}
    506
    507	return NULL;
    508}
    509
    510/* Optimization staging list, protected by 'kprobe_mutex' */
    511static LIST_HEAD(optimizing_list);
    512static LIST_HEAD(unoptimizing_list);
    513static LIST_HEAD(freeing_list);
    514
    515static void kprobe_optimizer(struct work_struct *work);
    516static DECLARE_DELAYED_WORK(optimizing_work, kprobe_optimizer);
    517#define OPTIMIZE_DELAY 5
    518
    519/*
    520 * Optimize (replace a breakpoint with a jump) kprobes listed on
    521 * 'optimizing_list'.
    522 */
    523static void do_optimize_kprobes(void)
    524{
    525	lockdep_assert_held(&text_mutex);
    526	/*
    527	 * The optimization/unoptimization refers 'online_cpus' via
    528	 * stop_machine() and cpu-hotplug modifies the 'online_cpus'.
    529	 * And same time, 'text_mutex' will be held in cpu-hotplug and here.
    530	 * This combination can cause a deadlock (cpu-hotplug tries to lock
    531	 * 'text_mutex' but stop_machine() can not be done because
    532	 * the 'online_cpus' has been changed)
    533	 * To avoid this deadlock, caller must have locked cpu-hotplug
    534	 * for preventing cpu-hotplug outside of 'text_mutex' locking.
    535	 */
    536	lockdep_assert_cpus_held();
    537
    538	/* Optimization never be done when disarmed */
    539	if (kprobes_all_disarmed || !kprobes_allow_optimization ||
    540	    list_empty(&optimizing_list))
    541		return;
    542
    543	arch_optimize_kprobes(&optimizing_list);
    544}
    545
    546/*
    547 * Unoptimize (replace a jump with a breakpoint and remove the breakpoint
    548 * if need) kprobes listed on 'unoptimizing_list'.
    549 */
    550static void do_unoptimize_kprobes(void)
    551{
    552	struct optimized_kprobe *op, *tmp;
    553
    554	lockdep_assert_held(&text_mutex);
    555	/* See comment in do_optimize_kprobes() */
    556	lockdep_assert_cpus_held();
    557
    558	/* Unoptimization must be done anytime */
    559	if (list_empty(&unoptimizing_list))
    560		return;
    561
    562	arch_unoptimize_kprobes(&unoptimizing_list, &freeing_list);
    563	/* Loop on 'freeing_list' for disarming */
    564	list_for_each_entry_safe(op, tmp, &freeing_list, list) {
    565		/* Switching from detour code to origin */
    566		op->kp.flags &= ~KPROBE_FLAG_OPTIMIZED;
    567		/* Disarm probes if marked disabled */
    568		if (kprobe_disabled(&op->kp))
    569			arch_disarm_kprobe(&op->kp);
    570		if (kprobe_unused(&op->kp)) {
    571			/*
    572			 * Remove unused probes from hash list. After waiting
    573			 * for synchronization, these probes are reclaimed.
    574			 * (reclaiming is done by do_free_cleaned_kprobes().)
    575			 */
    576			hlist_del_rcu(&op->kp.hlist);
    577		} else
    578			list_del_init(&op->list);
    579	}
    580}
    581
    582/* Reclaim all kprobes on the 'freeing_list' */
    583static void do_free_cleaned_kprobes(void)
    584{
    585	struct optimized_kprobe *op, *tmp;
    586
    587	list_for_each_entry_safe(op, tmp, &freeing_list, list) {
    588		list_del_init(&op->list);
    589		if (WARN_ON_ONCE(!kprobe_unused(&op->kp))) {
    590			/*
    591			 * This must not happen, but if there is a kprobe
    592			 * still in use, keep it on kprobes hash list.
    593			 */
    594			continue;
    595		}
    596		free_aggr_kprobe(&op->kp);
    597	}
    598}
    599
    600/* Start optimizer after OPTIMIZE_DELAY passed */
    601static void kick_kprobe_optimizer(void)
    602{
    603	schedule_delayed_work(&optimizing_work, OPTIMIZE_DELAY);
    604}
    605
    606/* Kprobe jump optimizer */
    607static void kprobe_optimizer(struct work_struct *work)
    608{
    609	mutex_lock(&kprobe_mutex);
    610	cpus_read_lock();
    611	mutex_lock(&text_mutex);
    612
    613	/*
    614	 * Step 1: Unoptimize kprobes and collect cleaned (unused and disarmed)
    615	 * kprobes before waiting for quiesence period.
    616	 */
    617	do_unoptimize_kprobes();
    618
    619	/*
    620	 * Step 2: Wait for quiesence period to ensure all potentially
    621	 * preempted tasks to have normally scheduled. Because optprobe
    622	 * may modify multiple instructions, there is a chance that Nth
    623	 * instruction is preempted. In that case, such tasks can return
    624	 * to 2nd-Nth byte of jump instruction. This wait is for avoiding it.
    625	 * Note that on non-preemptive kernel, this is transparently converted
    626	 * to synchronoze_sched() to wait for all interrupts to have completed.
    627	 */
    628	synchronize_rcu_tasks();
    629
    630	/* Step 3: Optimize kprobes after quiesence period */
    631	do_optimize_kprobes();
    632
    633	/* Step 4: Free cleaned kprobes after quiesence period */
    634	do_free_cleaned_kprobes();
    635
    636	mutex_unlock(&text_mutex);
    637	cpus_read_unlock();
    638
    639	/* Step 5: Kick optimizer again if needed */
    640	if (!list_empty(&optimizing_list) || !list_empty(&unoptimizing_list))
    641		kick_kprobe_optimizer();
    642
    643	mutex_unlock(&kprobe_mutex);
    644}
    645
    646/* Wait for completing optimization and unoptimization */
    647void wait_for_kprobe_optimizer(void)
    648{
    649	mutex_lock(&kprobe_mutex);
    650
    651	while (!list_empty(&optimizing_list) || !list_empty(&unoptimizing_list)) {
    652		mutex_unlock(&kprobe_mutex);
    653
    654		/* This will also make 'optimizing_work' execute immmediately */
    655		flush_delayed_work(&optimizing_work);
    656		/* 'optimizing_work' might not have been queued yet, relax */
    657		cpu_relax();
    658
    659		mutex_lock(&kprobe_mutex);
    660	}
    661
    662	mutex_unlock(&kprobe_mutex);
    663}
    664
    665static bool optprobe_queued_unopt(struct optimized_kprobe *op)
    666{
    667	struct optimized_kprobe *_op;
    668
    669	list_for_each_entry(_op, &unoptimizing_list, list) {
    670		if (op == _op)
    671			return true;
    672	}
    673
    674	return false;
    675}
    676
    677/* Optimize kprobe if p is ready to be optimized */
    678static void optimize_kprobe(struct kprobe *p)
    679{
    680	struct optimized_kprobe *op;
    681
    682	/* Check if the kprobe is disabled or not ready for optimization. */
    683	if (!kprobe_optready(p) || !kprobes_allow_optimization ||
    684	    (kprobe_disabled(p) || kprobes_all_disarmed))
    685		return;
    686
    687	/* kprobes with 'post_handler' can not be optimized */
    688	if (p->post_handler)
    689		return;
    690
    691	op = container_of(p, struct optimized_kprobe, kp);
    692
    693	/* Check there is no other kprobes at the optimized instructions */
    694	if (arch_check_optimized_kprobe(op) < 0)
    695		return;
    696
    697	/* Check if it is already optimized. */
    698	if (op->kp.flags & KPROBE_FLAG_OPTIMIZED) {
    699		if (optprobe_queued_unopt(op)) {
    700			/* This is under unoptimizing. Just dequeue the probe */
    701			list_del_init(&op->list);
    702		}
    703		return;
    704	}
    705	op->kp.flags |= KPROBE_FLAG_OPTIMIZED;
    706
    707	/*
    708	 * On the 'unoptimizing_list' and 'optimizing_list',
    709	 * 'op' must have OPTIMIZED flag
    710	 */
    711	if (WARN_ON_ONCE(!list_empty(&op->list)))
    712		return;
    713
    714	list_add(&op->list, &optimizing_list);
    715	kick_kprobe_optimizer();
    716}
    717
    718/* Short cut to direct unoptimizing */
    719static void force_unoptimize_kprobe(struct optimized_kprobe *op)
    720{
    721	lockdep_assert_cpus_held();
    722	arch_unoptimize_kprobe(op);
    723	op->kp.flags &= ~KPROBE_FLAG_OPTIMIZED;
    724}
    725
    726/* Unoptimize a kprobe if p is optimized */
    727static void unoptimize_kprobe(struct kprobe *p, bool force)
    728{
    729	struct optimized_kprobe *op;
    730
    731	if (!kprobe_aggrprobe(p) || kprobe_disarmed(p))
    732		return; /* This is not an optprobe nor optimized */
    733
    734	op = container_of(p, struct optimized_kprobe, kp);
    735	if (!kprobe_optimized(p))
    736		return;
    737
    738	if (!list_empty(&op->list)) {
    739		if (optprobe_queued_unopt(op)) {
    740			/* Queued in unoptimizing queue */
    741			if (force) {
    742				/*
    743				 * Forcibly unoptimize the kprobe here, and queue it
    744				 * in the freeing list for release afterwards.
    745				 */
    746				force_unoptimize_kprobe(op);
    747				list_move(&op->list, &freeing_list);
    748			}
    749		} else {
    750			/* Dequeue from the optimizing queue */
    751			list_del_init(&op->list);
    752			op->kp.flags &= ~KPROBE_FLAG_OPTIMIZED;
    753		}
    754		return;
    755	}
    756
    757	/* Optimized kprobe case */
    758	if (force) {
    759		/* Forcibly update the code: this is a special case */
    760		force_unoptimize_kprobe(op);
    761	} else {
    762		list_add(&op->list, &unoptimizing_list);
    763		kick_kprobe_optimizer();
    764	}
    765}
    766
    767/* Cancel unoptimizing for reusing */
    768static int reuse_unused_kprobe(struct kprobe *ap)
    769{
    770	struct optimized_kprobe *op;
    771
    772	/*
    773	 * Unused kprobe MUST be on the way of delayed unoptimizing (means
    774	 * there is still a relative jump) and disabled.
    775	 */
    776	op = container_of(ap, struct optimized_kprobe, kp);
    777	WARN_ON_ONCE(list_empty(&op->list));
    778	/* Enable the probe again */
    779	ap->flags &= ~KPROBE_FLAG_DISABLED;
    780	/* Optimize it again. (remove from 'op->list') */
    781	if (!kprobe_optready(ap))
    782		return -EINVAL;
    783
    784	optimize_kprobe(ap);
    785	return 0;
    786}
    787
    788/* Remove optimized instructions */
    789static void kill_optimized_kprobe(struct kprobe *p)
    790{
    791	struct optimized_kprobe *op;
    792
    793	op = container_of(p, struct optimized_kprobe, kp);
    794	if (!list_empty(&op->list))
    795		/* Dequeue from the (un)optimization queue */
    796		list_del_init(&op->list);
    797	op->kp.flags &= ~KPROBE_FLAG_OPTIMIZED;
    798
    799	if (kprobe_unused(p)) {
    800		/* Enqueue if it is unused */
    801		list_add(&op->list, &freeing_list);
    802		/*
    803		 * Remove unused probes from the hash list. After waiting
    804		 * for synchronization, this probe is reclaimed.
    805		 * (reclaiming is done by do_free_cleaned_kprobes().)
    806		 */
    807		hlist_del_rcu(&op->kp.hlist);
    808	}
    809
    810	/* Don't touch the code, because it is already freed. */
    811	arch_remove_optimized_kprobe(op);
    812}
    813
    814static inline
    815void __prepare_optimized_kprobe(struct optimized_kprobe *op, struct kprobe *p)
    816{
    817	if (!kprobe_ftrace(p))
    818		arch_prepare_optimized_kprobe(op, p);
    819}
    820
    821/* Try to prepare optimized instructions */
    822static void prepare_optimized_kprobe(struct kprobe *p)
    823{
    824	struct optimized_kprobe *op;
    825
    826	op = container_of(p, struct optimized_kprobe, kp);
    827	__prepare_optimized_kprobe(op, p);
    828}
    829
    830/* Allocate new optimized_kprobe and try to prepare optimized instructions. */
    831static struct kprobe *alloc_aggr_kprobe(struct kprobe *p)
    832{
    833	struct optimized_kprobe *op;
    834
    835	op = kzalloc(sizeof(struct optimized_kprobe), GFP_KERNEL);
    836	if (!op)
    837		return NULL;
    838
    839	INIT_LIST_HEAD(&op->list);
    840	op->kp.addr = p->addr;
    841	__prepare_optimized_kprobe(op, p);
    842
    843	return &op->kp;
    844}
    845
    846static void init_aggr_kprobe(struct kprobe *ap, struct kprobe *p);
    847
    848/*
    849 * Prepare an optimized_kprobe and optimize it.
    850 * NOTE: 'p' must be a normal registered kprobe.
    851 */
    852static void try_to_optimize_kprobe(struct kprobe *p)
    853{
    854	struct kprobe *ap;
    855	struct optimized_kprobe *op;
    856
    857	/* Impossible to optimize ftrace-based kprobe. */
    858	if (kprobe_ftrace(p))
    859		return;
    860
    861	/* For preparing optimization, jump_label_text_reserved() is called. */
    862	cpus_read_lock();
    863	jump_label_lock();
    864	mutex_lock(&text_mutex);
    865
    866	ap = alloc_aggr_kprobe(p);
    867	if (!ap)
    868		goto out;
    869
    870	op = container_of(ap, struct optimized_kprobe, kp);
    871	if (!arch_prepared_optinsn(&op->optinsn)) {
    872		/* If failed to setup optimizing, fallback to kprobe. */
    873		arch_remove_optimized_kprobe(op);
    874		kfree(op);
    875		goto out;
    876	}
    877
    878	init_aggr_kprobe(ap, p);
    879	optimize_kprobe(ap);	/* This just kicks optimizer thread. */
    880
    881out:
    882	mutex_unlock(&text_mutex);
    883	jump_label_unlock();
    884	cpus_read_unlock();
    885}
    886
    887static void optimize_all_kprobes(void)
    888{
    889	struct hlist_head *head;
    890	struct kprobe *p;
    891	unsigned int i;
    892
    893	mutex_lock(&kprobe_mutex);
    894	/* If optimization is already allowed, just return. */
    895	if (kprobes_allow_optimization)
    896		goto out;
    897
    898	cpus_read_lock();
    899	kprobes_allow_optimization = true;
    900	for (i = 0; i < KPROBE_TABLE_SIZE; i++) {
    901		head = &kprobe_table[i];
    902		hlist_for_each_entry(p, head, hlist)
    903			if (!kprobe_disabled(p))
    904				optimize_kprobe(p);
    905	}
    906	cpus_read_unlock();
    907	pr_info("kprobe jump-optimization is enabled. All kprobes are optimized if possible.\n");
    908out:
    909	mutex_unlock(&kprobe_mutex);
    910}
    911
    912#ifdef CONFIG_SYSCTL
    913static void unoptimize_all_kprobes(void)
    914{
    915	struct hlist_head *head;
    916	struct kprobe *p;
    917	unsigned int i;
    918
    919	mutex_lock(&kprobe_mutex);
    920	/* If optimization is already prohibited, just return. */
    921	if (!kprobes_allow_optimization) {
    922		mutex_unlock(&kprobe_mutex);
    923		return;
    924	}
    925
    926	cpus_read_lock();
    927	kprobes_allow_optimization = false;
    928	for (i = 0; i < KPROBE_TABLE_SIZE; i++) {
    929		head = &kprobe_table[i];
    930		hlist_for_each_entry(p, head, hlist) {
    931			if (!kprobe_disabled(p))
    932				unoptimize_kprobe(p, false);
    933		}
    934	}
    935	cpus_read_unlock();
    936	mutex_unlock(&kprobe_mutex);
    937
    938	/* Wait for unoptimizing completion. */
    939	wait_for_kprobe_optimizer();
    940	pr_info("kprobe jump-optimization is disabled. All kprobes are based on software breakpoint.\n");
    941}
    942
    943static DEFINE_MUTEX(kprobe_sysctl_mutex);
    944static int sysctl_kprobes_optimization;
    945static int proc_kprobes_optimization_handler(struct ctl_table *table,
    946					     int write, void *buffer,
    947					     size_t *length, loff_t *ppos)
    948{
    949	int ret;
    950
    951	mutex_lock(&kprobe_sysctl_mutex);
    952	sysctl_kprobes_optimization = kprobes_allow_optimization ? 1 : 0;
    953	ret = proc_dointvec_minmax(table, write, buffer, length, ppos);
    954
    955	if (sysctl_kprobes_optimization)
    956		optimize_all_kprobes();
    957	else
    958		unoptimize_all_kprobes();
    959	mutex_unlock(&kprobe_sysctl_mutex);
    960
    961	return ret;
    962}
    963
    964static struct ctl_table kprobe_sysctls[] = {
    965	{
    966		.procname	= "kprobes-optimization",
    967		.data		= &sysctl_kprobes_optimization,
    968		.maxlen		= sizeof(int),
    969		.mode		= 0644,
    970		.proc_handler	= proc_kprobes_optimization_handler,
    971		.extra1		= SYSCTL_ZERO,
    972		.extra2		= SYSCTL_ONE,
    973	},
    974	{}
    975};
    976
    977static void __init kprobe_sysctls_init(void)
    978{
    979	register_sysctl_init("debug", kprobe_sysctls);
    980}
    981#endif /* CONFIG_SYSCTL */
    982
    983/* Put a breakpoint for a probe. */
    984static void __arm_kprobe(struct kprobe *p)
    985{
    986	struct kprobe *_p;
    987
    988	lockdep_assert_held(&text_mutex);
    989
    990	/* Find the overlapping optimized kprobes. */
    991	_p = get_optimized_kprobe(p->addr);
    992	if (unlikely(_p))
    993		/* Fallback to unoptimized kprobe */
    994		unoptimize_kprobe(_p, true);
    995
    996	arch_arm_kprobe(p);
    997	optimize_kprobe(p);	/* Try to optimize (add kprobe to a list) */
    998}
    999
   1000/* Remove the breakpoint of a probe. */
   1001static void __disarm_kprobe(struct kprobe *p, bool reopt)
   1002{
   1003	struct kprobe *_p;
   1004
   1005	lockdep_assert_held(&text_mutex);
   1006
   1007	/* Try to unoptimize */
   1008	unoptimize_kprobe(p, kprobes_all_disarmed);
   1009
   1010	if (!kprobe_queued(p)) {
   1011		arch_disarm_kprobe(p);
   1012		/* If another kprobe was blocked, re-optimize it. */
   1013		_p = get_optimized_kprobe(p->addr);
   1014		if (unlikely(_p) && reopt)
   1015			optimize_kprobe(_p);
   1016	}
   1017	/*
   1018	 * TODO: Since unoptimization and real disarming will be done by
   1019	 * the worker thread, we can not check whether another probe are
   1020	 * unoptimized because of this probe here. It should be re-optimized
   1021	 * by the worker thread.
   1022	 */
   1023}
   1024
   1025#else /* !CONFIG_OPTPROBES */
   1026
   1027#define optimize_kprobe(p)			do {} while (0)
   1028#define unoptimize_kprobe(p, f)			do {} while (0)
   1029#define kill_optimized_kprobe(p)		do {} while (0)
   1030#define prepare_optimized_kprobe(p)		do {} while (0)
   1031#define try_to_optimize_kprobe(p)		do {} while (0)
   1032#define __arm_kprobe(p)				arch_arm_kprobe(p)
   1033#define __disarm_kprobe(p, o)			arch_disarm_kprobe(p)
   1034#define kprobe_disarmed(p)			kprobe_disabled(p)
   1035#define wait_for_kprobe_optimizer()		do {} while (0)
   1036
   1037static int reuse_unused_kprobe(struct kprobe *ap)
   1038{
   1039	/*
   1040	 * If the optimized kprobe is NOT supported, the aggr kprobe is
   1041	 * released at the same time that the last aggregated kprobe is
   1042	 * unregistered.
   1043	 * Thus there should be no chance to reuse unused kprobe.
   1044	 */
   1045	WARN_ON_ONCE(1);
   1046	return -EINVAL;
   1047}
   1048
   1049static void free_aggr_kprobe(struct kprobe *p)
   1050{
   1051	arch_remove_kprobe(p);
   1052	kfree(p);
   1053}
   1054
   1055static struct kprobe *alloc_aggr_kprobe(struct kprobe *p)
   1056{
   1057	return kzalloc(sizeof(struct kprobe), GFP_KERNEL);
   1058}
   1059#endif /* CONFIG_OPTPROBES */
   1060
   1061#ifdef CONFIG_KPROBES_ON_FTRACE
   1062static struct ftrace_ops kprobe_ftrace_ops __read_mostly = {
   1063	.func = kprobe_ftrace_handler,
   1064	.flags = FTRACE_OPS_FL_SAVE_REGS,
   1065};
   1066
   1067static struct ftrace_ops kprobe_ipmodify_ops __read_mostly = {
   1068	.func = kprobe_ftrace_handler,
   1069	.flags = FTRACE_OPS_FL_SAVE_REGS | FTRACE_OPS_FL_IPMODIFY,
   1070};
   1071
   1072static int kprobe_ipmodify_enabled;
   1073static int kprobe_ftrace_enabled;
   1074
   1075static int __arm_kprobe_ftrace(struct kprobe *p, struct ftrace_ops *ops,
   1076			       int *cnt)
   1077{
   1078	int ret = 0;
   1079
   1080	lockdep_assert_held(&kprobe_mutex);
   1081
   1082	ret = ftrace_set_filter_ip(ops, (unsigned long)p->addr, 0, 0);
   1083	if (WARN_ONCE(ret < 0, "Failed to arm kprobe-ftrace at %pS (error %d)\n", p->addr, ret))
   1084		return ret;
   1085
   1086	if (*cnt == 0) {
   1087		ret = register_ftrace_function(ops);
   1088		if (WARN(ret < 0, "Failed to register kprobe-ftrace (error %d)\n", ret))
   1089			goto err_ftrace;
   1090	}
   1091
   1092	(*cnt)++;
   1093	return ret;
   1094
   1095err_ftrace:
   1096	/*
   1097	 * At this point, sinec ops is not registered, we should be sefe from
   1098	 * registering empty filter.
   1099	 */
   1100	ftrace_set_filter_ip(ops, (unsigned long)p->addr, 1, 0);
   1101	return ret;
   1102}
   1103
   1104static int arm_kprobe_ftrace(struct kprobe *p)
   1105{
   1106	bool ipmodify = (p->post_handler != NULL);
   1107
   1108	return __arm_kprobe_ftrace(p,
   1109		ipmodify ? &kprobe_ipmodify_ops : &kprobe_ftrace_ops,
   1110		ipmodify ? &kprobe_ipmodify_enabled : &kprobe_ftrace_enabled);
   1111}
   1112
   1113static int __disarm_kprobe_ftrace(struct kprobe *p, struct ftrace_ops *ops,
   1114				  int *cnt)
   1115{
   1116	int ret = 0;
   1117
   1118	lockdep_assert_held(&kprobe_mutex);
   1119
   1120	if (*cnt == 1) {
   1121		ret = unregister_ftrace_function(ops);
   1122		if (WARN(ret < 0, "Failed to unregister kprobe-ftrace (error %d)\n", ret))
   1123			return ret;
   1124	}
   1125
   1126	(*cnt)--;
   1127
   1128	ret = ftrace_set_filter_ip(ops, (unsigned long)p->addr, 1, 0);
   1129	WARN_ONCE(ret < 0, "Failed to disarm kprobe-ftrace at %pS (error %d)\n",
   1130		  p->addr, ret);
   1131	return ret;
   1132}
   1133
   1134static int disarm_kprobe_ftrace(struct kprobe *p)
   1135{
   1136	bool ipmodify = (p->post_handler != NULL);
   1137
   1138	return __disarm_kprobe_ftrace(p,
   1139		ipmodify ? &kprobe_ipmodify_ops : &kprobe_ftrace_ops,
   1140		ipmodify ? &kprobe_ipmodify_enabled : &kprobe_ftrace_enabled);
   1141}
   1142#else	/* !CONFIG_KPROBES_ON_FTRACE */
   1143static inline int arm_kprobe_ftrace(struct kprobe *p)
   1144{
   1145	return -ENODEV;
   1146}
   1147
   1148static inline int disarm_kprobe_ftrace(struct kprobe *p)
   1149{
   1150	return -ENODEV;
   1151}
   1152#endif
   1153
   1154static int prepare_kprobe(struct kprobe *p)
   1155{
   1156	/* Must ensure p->addr is really on ftrace */
   1157	if (kprobe_ftrace(p))
   1158		return arch_prepare_kprobe_ftrace(p);
   1159
   1160	return arch_prepare_kprobe(p);
   1161}
   1162
   1163static int arm_kprobe(struct kprobe *kp)
   1164{
   1165	if (unlikely(kprobe_ftrace(kp)))
   1166		return arm_kprobe_ftrace(kp);
   1167
   1168	cpus_read_lock();
   1169	mutex_lock(&text_mutex);
   1170	__arm_kprobe(kp);
   1171	mutex_unlock(&text_mutex);
   1172	cpus_read_unlock();
   1173
   1174	return 0;
   1175}
   1176
   1177static int disarm_kprobe(struct kprobe *kp, bool reopt)
   1178{
   1179	if (unlikely(kprobe_ftrace(kp)))
   1180		return disarm_kprobe_ftrace(kp);
   1181
   1182	cpus_read_lock();
   1183	mutex_lock(&text_mutex);
   1184	__disarm_kprobe(kp, reopt);
   1185	mutex_unlock(&text_mutex);
   1186	cpus_read_unlock();
   1187
   1188	return 0;
   1189}
   1190
   1191/*
   1192 * Aggregate handlers for multiple kprobes support - these handlers
   1193 * take care of invoking the individual kprobe handlers on p->list
   1194 */
   1195static int aggr_pre_handler(struct kprobe *p, struct pt_regs *regs)
   1196{
   1197	struct kprobe *kp;
   1198
   1199	list_for_each_entry_rcu(kp, &p->list, list) {
   1200		if (kp->pre_handler && likely(!kprobe_disabled(kp))) {
   1201			set_kprobe_instance(kp);
   1202			if (kp->pre_handler(kp, regs))
   1203				return 1;
   1204		}
   1205		reset_kprobe_instance();
   1206	}
   1207	return 0;
   1208}
   1209NOKPROBE_SYMBOL(aggr_pre_handler);
   1210
   1211static void aggr_post_handler(struct kprobe *p, struct pt_regs *regs,
   1212			      unsigned long flags)
   1213{
   1214	struct kprobe *kp;
   1215
   1216	list_for_each_entry_rcu(kp, &p->list, list) {
   1217		if (kp->post_handler && likely(!kprobe_disabled(kp))) {
   1218			set_kprobe_instance(kp);
   1219			kp->post_handler(kp, regs, flags);
   1220			reset_kprobe_instance();
   1221		}
   1222	}
   1223}
   1224NOKPROBE_SYMBOL(aggr_post_handler);
   1225
   1226/* Walks the list and increments 'nmissed' if 'p' has child probes. */
   1227void kprobes_inc_nmissed_count(struct kprobe *p)
   1228{
   1229	struct kprobe *kp;
   1230
   1231	if (!kprobe_aggrprobe(p)) {
   1232		p->nmissed++;
   1233	} else {
   1234		list_for_each_entry_rcu(kp, &p->list, list)
   1235			kp->nmissed++;
   1236	}
   1237}
   1238NOKPROBE_SYMBOL(kprobes_inc_nmissed_count);
   1239
   1240static struct kprobe kprobe_busy = {
   1241	.addr = (void *) get_kprobe,
   1242};
   1243
   1244void kprobe_busy_begin(void)
   1245{
   1246	struct kprobe_ctlblk *kcb;
   1247
   1248	preempt_disable();
   1249	__this_cpu_write(current_kprobe, &kprobe_busy);
   1250	kcb = get_kprobe_ctlblk();
   1251	kcb->kprobe_status = KPROBE_HIT_ACTIVE;
   1252}
   1253
   1254void kprobe_busy_end(void)
   1255{
   1256	__this_cpu_write(current_kprobe, NULL);
   1257	preempt_enable();
   1258}
   1259
   1260/* Add the new probe to 'ap->list'. */
   1261static int add_new_kprobe(struct kprobe *ap, struct kprobe *p)
   1262{
   1263	if (p->post_handler)
   1264		unoptimize_kprobe(ap, true);	/* Fall back to normal kprobe */
   1265
   1266	list_add_rcu(&p->list, &ap->list);
   1267	if (p->post_handler && !ap->post_handler)
   1268		ap->post_handler = aggr_post_handler;
   1269
   1270	return 0;
   1271}
   1272
   1273/*
   1274 * Fill in the required fields of the aggregator kprobe. Replace the
   1275 * earlier kprobe in the hlist with the aggregator kprobe.
   1276 */
   1277static void init_aggr_kprobe(struct kprobe *ap, struct kprobe *p)
   1278{
   1279	/* Copy the insn slot of 'p' to 'ap'. */
   1280	copy_kprobe(p, ap);
   1281	flush_insn_slot(ap);
   1282	ap->addr = p->addr;
   1283	ap->flags = p->flags & ~KPROBE_FLAG_OPTIMIZED;
   1284	ap->pre_handler = aggr_pre_handler;
   1285	/* We don't care the kprobe which has gone. */
   1286	if (p->post_handler && !kprobe_gone(p))
   1287		ap->post_handler = aggr_post_handler;
   1288
   1289	INIT_LIST_HEAD(&ap->list);
   1290	INIT_HLIST_NODE(&ap->hlist);
   1291
   1292	list_add_rcu(&p->list, &ap->list);
   1293	hlist_replace_rcu(&p->hlist, &ap->hlist);
   1294}
   1295
   1296/*
   1297 * This registers the second or subsequent kprobe at the same address.
   1298 */
   1299static int register_aggr_kprobe(struct kprobe *orig_p, struct kprobe *p)
   1300{
   1301	int ret = 0;
   1302	struct kprobe *ap = orig_p;
   1303
   1304	cpus_read_lock();
   1305
   1306	/* For preparing optimization, jump_label_text_reserved() is called */
   1307	jump_label_lock();
   1308	mutex_lock(&text_mutex);
   1309
   1310	if (!kprobe_aggrprobe(orig_p)) {
   1311		/* If 'orig_p' is not an 'aggr_kprobe', create new one. */
   1312		ap = alloc_aggr_kprobe(orig_p);
   1313		if (!ap) {
   1314			ret = -ENOMEM;
   1315			goto out;
   1316		}
   1317		init_aggr_kprobe(ap, orig_p);
   1318	} else if (kprobe_unused(ap)) {
   1319		/* This probe is going to die. Rescue it */
   1320		ret = reuse_unused_kprobe(ap);
   1321		if (ret)
   1322			goto out;
   1323	}
   1324
   1325	if (kprobe_gone(ap)) {
   1326		/*
   1327		 * Attempting to insert new probe at the same location that
   1328		 * had a probe in the module vaddr area which already
   1329		 * freed. So, the instruction slot has already been
   1330		 * released. We need a new slot for the new probe.
   1331		 */
   1332		ret = arch_prepare_kprobe(ap);
   1333		if (ret)
   1334			/*
   1335			 * Even if fail to allocate new slot, don't need to
   1336			 * free the 'ap'. It will be used next time, or
   1337			 * freed by unregister_kprobe().
   1338			 */
   1339			goto out;
   1340
   1341		/* Prepare optimized instructions if possible. */
   1342		prepare_optimized_kprobe(ap);
   1343
   1344		/*
   1345		 * Clear gone flag to prevent allocating new slot again, and
   1346		 * set disabled flag because it is not armed yet.
   1347		 */
   1348		ap->flags = (ap->flags & ~KPROBE_FLAG_GONE)
   1349			    | KPROBE_FLAG_DISABLED;
   1350	}
   1351
   1352	/* Copy the insn slot of 'p' to 'ap'. */
   1353	copy_kprobe(ap, p);
   1354	ret = add_new_kprobe(ap, p);
   1355
   1356out:
   1357	mutex_unlock(&text_mutex);
   1358	jump_label_unlock();
   1359	cpus_read_unlock();
   1360
   1361	if (ret == 0 && kprobe_disabled(ap) && !kprobe_disabled(p)) {
   1362		ap->flags &= ~KPROBE_FLAG_DISABLED;
   1363		if (!kprobes_all_disarmed) {
   1364			/* Arm the breakpoint again. */
   1365			ret = arm_kprobe(ap);
   1366			if (ret) {
   1367				ap->flags |= KPROBE_FLAG_DISABLED;
   1368				list_del_rcu(&p->list);
   1369				synchronize_rcu();
   1370			}
   1371		}
   1372	}
   1373	return ret;
   1374}
   1375
   1376bool __weak arch_within_kprobe_blacklist(unsigned long addr)
   1377{
   1378	/* The '__kprobes' functions and entry code must not be probed. */
   1379	return addr >= (unsigned long)__kprobes_text_start &&
   1380	       addr < (unsigned long)__kprobes_text_end;
   1381}
   1382
   1383static bool __within_kprobe_blacklist(unsigned long addr)
   1384{
   1385	struct kprobe_blacklist_entry *ent;
   1386
   1387	if (arch_within_kprobe_blacklist(addr))
   1388		return true;
   1389	/*
   1390	 * If 'kprobe_blacklist' is defined, check the address and
   1391	 * reject any probe registration in the prohibited area.
   1392	 */
   1393	list_for_each_entry(ent, &kprobe_blacklist, list) {
   1394		if (addr >= ent->start_addr && addr < ent->end_addr)
   1395			return true;
   1396	}
   1397	return false;
   1398}
   1399
   1400bool within_kprobe_blacklist(unsigned long addr)
   1401{
   1402	char symname[KSYM_NAME_LEN], *p;
   1403
   1404	if (__within_kprobe_blacklist(addr))
   1405		return true;
   1406
   1407	/* Check if the address is on a suffixed-symbol */
   1408	if (!lookup_symbol_name(addr, symname)) {
   1409		p = strchr(symname, '.');
   1410		if (!p)
   1411			return false;
   1412		*p = '\0';
   1413		addr = (unsigned long)kprobe_lookup_name(symname, 0);
   1414		if (addr)
   1415			return __within_kprobe_blacklist(addr);
   1416	}
   1417	return false;
   1418}
   1419
   1420/*
   1421 * arch_adjust_kprobe_addr - adjust the address
   1422 * @addr: symbol base address
   1423 * @offset: offset within the symbol
   1424 * @on_func_entry: was this @addr+@offset on the function entry
   1425 *
   1426 * Typically returns @addr + @offset, except for special cases where the
   1427 * function might be prefixed by a CFI landing pad, in that case any offset
   1428 * inside the landing pad is mapped to the first 'real' instruction of the
   1429 * symbol.
   1430 *
   1431 * Specifically, for things like IBT/BTI, skip the resp. ENDBR/BTI.C
   1432 * instruction at +0.
   1433 */
   1434kprobe_opcode_t *__weak arch_adjust_kprobe_addr(unsigned long addr,
   1435						unsigned long offset,
   1436						bool *on_func_entry)
   1437{
   1438	*on_func_entry = !offset;
   1439	return (kprobe_opcode_t *)(addr + offset);
   1440}
   1441
   1442/*
   1443 * If 'symbol_name' is specified, look it up and add the 'offset'
   1444 * to it. This way, we can specify a relative address to a symbol.
   1445 * This returns encoded errors if it fails to look up symbol or invalid
   1446 * combination of parameters.
   1447 */
   1448static kprobe_opcode_t *
   1449_kprobe_addr(kprobe_opcode_t *addr, const char *symbol_name,
   1450	     unsigned long offset, bool *on_func_entry)
   1451{
   1452	if ((symbol_name && addr) || (!symbol_name && !addr))
   1453		goto invalid;
   1454
   1455	if (symbol_name) {
   1456		/*
   1457		 * Input: @sym + @offset
   1458		 * Output: @addr + @offset
   1459		 *
   1460		 * NOTE: kprobe_lookup_name() does *NOT* fold the offset
   1461		 *       argument into it's output!
   1462		 */
   1463		addr = kprobe_lookup_name(symbol_name, offset);
   1464		if (!addr)
   1465			return ERR_PTR(-ENOENT);
   1466	}
   1467
   1468	/*
   1469	 * So here we have @addr + @offset, displace it into a new
   1470	 * @addr' + @offset' where @addr' is the symbol start address.
   1471	 */
   1472	addr = (void *)addr + offset;
   1473	if (!kallsyms_lookup_size_offset((unsigned long)addr, NULL, &offset))
   1474		return ERR_PTR(-ENOENT);
   1475	addr = (void *)addr - offset;
   1476
   1477	/*
   1478	 * Then ask the architecture to re-combine them, taking care of
   1479	 * magical function entry details while telling us if this was indeed
   1480	 * at the start of the function.
   1481	 */
   1482	addr = arch_adjust_kprobe_addr((unsigned long)addr, offset, on_func_entry);
   1483	if (addr)
   1484		return addr;
   1485
   1486invalid:
   1487	return ERR_PTR(-EINVAL);
   1488}
   1489
   1490static kprobe_opcode_t *kprobe_addr(struct kprobe *p)
   1491{
   1492	bool on_func_entry;
   1493	return _kprobe_addr(p->addr, p->symbol_name, p->offset, &on_func_entry);
   1494}
   1495
   1496/*
   1497 * Check the 'p' is valid and return the aggregator kprobe
   1498 * at the same address.
   1499 */
   1500static struct kprobe *__get_valid_kprobe(struct kprobe *p)
   1501{
   1502	struct kprobe *ap, *list_p;
   1503
   1504	lockdep_assert_held(&kprobe_mutex);
   1505
   1506	ap = get_kprobe(p->addr);
   1507	if (unlikely(!ap))
   1508		return NULL;
   1509
   1510	if (p != ap) {
   1511		list_for_each_entry(list_p, &ap->list, list)
   1512			if (list_p == p)
   1513			/* kprobe p is a valid probe */
   1514				goto valid;
   1515		return NULL;
   1516	}
   1517valid:
   1518	return ap;
   1519}
   1520
   1521/*
   1522 * Warn and return error if the kprobe is being re-registered since
   1523 * there must be a software bug.
   1524 */
   1525static inline int warn_kprobe_rereg(struct kprobe *p)
   1526{
   1527	int ret = 0;
   1528
   1529	mutex_lock(&kprobe_mutex);
   1530	if (WARN_ON_ONCE(__get_valid_kprobe(p)))
   1531		ret = -EINVAL;
   1532	mutex_unlock(&kprobe_mutex);
   1533
   1534	return ret;
   1535}
   1536
   1537static int check_ftrace_location(struct kprobe *p)
   1538{
   1539	unsigned long addr = (unsigned long)p->addr;
   1540
   1541	if (ftrace_location(addr) == addr) {
   1542#ifdef CONFIG_KPROBES_ON_FTRACE
   1543		p->flags |= KPROBE_FLAG_FTRACE;
   1544#else	/* !CONFIG_KPROBES_ON_FTRACE */
   1545		return -EINVAL;
   1546#endif
   1547	}
   1548	return 0;
   1549}
   1550
   1551static int check_kprobe_address_safe(struct kprobe *p,
   1552				     struct module **probed_mod)
   1553{
   1554	int ret;
   1555
   1556	ret = check_ftrace_location(p);
   1557	if (ret)
   1558		return ret;
   1559	jump_label_lock();
   1560	preempt_disable();
   1561
   1562	/* Ensure it is not in reserved area nor out of text */
   1563	if (!kernel_text_address((unsigned long) p->addr) ||
   1564	    within_kprobe_blacklist((unsigned long) p->addr) ||
   1565	    jump_label_text_reserved(p->addr, p->addr) ||
   1566	    static_call_text_reserved(p->addr, p->addr) ||
   1567	    find_bug((unsigned long)p->addr)) {
   1568		ret = -EINVAL;
   1569		goto out;
   1570	}
   1571
   1572	/* Check if 'p' is probing a module. */
   1573	*probed_mod = __module_text_address((unsigned long) p->addr);
   1574	if (*probed_mod) {
   1575		/*
   1576		 * We must hold a refcount of the probed module while updating
   1577		 * its code to prohibit unexpected unloading.
   1578		 */
   1579		if (unlikely(!try_module_get(*probed_mod))) {
   1580			ret = -ENOENT;
   1581			goto out;
   1582		}
   1583
   1584		/*
   1585		 * If the module freed '.init.text', we couldn't insert
   1586		 * kprobes in there.
   1587		 */
   1588		if (within_module_init((unsigned long)p->addr, *probed_mod) &&
   1589		    (*probed_mod)->state != MODULE_STATE_COMING) {
   1590			module_put(*probed_mod);
   1591			*probed_mod = NULL;
   1592			ret = -ENOENT;
   1593		}
   1594	}
   1595out:
   1596	preempt_enable();
   1597	jump_label_unlock();
   1598
   1599	return ret;
   1600}
   1601
   1602int register_kprobe(struct kprobe *p)
   1603{
   1604	int ret;
   1605	struct kprobe *old_p;
   1606	struct module *probed_mod;
   1607	kprobe_opcode_t *addr;
   1608
   1609	/* Adjust probe address from symbol */
   1610	addr = kprobe_addr(p);
   1611	if (IS_ERR(addr))
   1612		return PTR_ERR(addr);
   1613	p->addr = addr;
   1614
   1615	ret = warn_kprobe_rereg(p);
   1616	if (ret)
   1617		return ret;
   1618
   1619	/* User can pass only KPROBE_FLAG_DISABLED to register_kprobe */
   1620	p->flags &= KPROBE_FLAG_DISABLED;
   1621	p->nmissed = 0;
   1622	INIT_LIST_HEAD(&p->list);
   1623
   1624	ret = check_kprobe_address_safe(p, &probed_mod);
   1625	if (ret)
   1626		return ret;
   1627
   1628	mutex_lock(&kprobe_mutex);
   1629
   1630	old_p = get_kprobe(p->addr);
   1631	if (old_p) {
   1632		/* Since this may unoptimize 'old_p', locking 'text_mutex'. */
   1633		ret = register_aggr_kprobe(old_p, p);
   1634		goto out;
   1635	}
   1636
   1637	cpus_read_lock();
   1638	/* Prevent text modification */
   1639	mutex_lock(&text_mutex);
   1640	ret = prepare_kprobe(p);
   1641	mutex_unlock(&text_mutex);
   1642	cpus_read_unlock();
   1643	if (ret)
   1644		goto out;
   1645
   1646	INIT_HLIST_NODE(&p->hlist);
   1647	hlist_add_head_rcu(&p->hlist,
   1648		       &kprobe_table[hash_ptr(p->addr, KPROBE_HASH_BITS)]);
   1649
   1650	if (!kprobes_all_disarmed && !kprobe_disabled(p)) {
   1651		ret = arm_kprobe(p);
   1652		if (ret) {
   1653			hlist_del_rcu(&p->hlist);
   1654			synchronize_rcu();
   1655			goto out;
   1656		}
   1657	}
   1658
   1659	/* Try to optimize kprobe */
   1660	try_to_optimize_kprobe(p);
   1661out:
   1662	mutex_unlock(&kprobe_mutex);
   1663
   1664	if (probed_mod)
   1665		module_put(probed_mod);
   1666
   1667	return ret;
   1668}
   1669EXPORT_SYMBOL_GPL(register_kprobe);
   1670
   1671/* Check if all probes on the 'ap' are disabled. */
   1672static bool aggr_kprobe_disabled(struct kprobe *ap)
   1673{
   1674	struct kprobe *kp;
   1675
   1676	lockdep_assert_held(&kprobe_mutex);
   1677
   1678	list_for_each_entry(kp, &ap->list, list)
   1679		if (!kprobe_disabled(kp))
   1680			/*
   1681			 * Since there is an active probe on the list,
   1682			 * we can't disable this 'ap'.
   1683			 */
   1684			return false;
   1685
   1686	return true;
   1687}
   1688
   1689static struct kprobe *__disable_kprobe(struct kprobe *p)
   1690{
   1691	struct kprobe *orig_p;
   1692	int ret;
   1693
   1694	lockdep_assert_held(&kprobe_mutex);
   1695
   1696	/* Get an original kprobe for return */
   1697	orig_p = __get_valid_kprobe(p);
   1698	if (unlikely(orig_p == NULL))
   1699		return ERR_PTR(-EINVAL);
   1700
   1701	if (!kprobe_disabled(p)) {
   1702		/* Disable probe if it is a child probe */
   1703		if (p != orig_p)
   1704			p->flags |= KPROBE_FLAG_DISABLED;
   1705
   1706		/* Try to disarm and disable this/parent probe */
   1707		if (p == orig_p || aggr_kprobe_disabled(orig_p)) {
   1708			/*
   1709			 * If 'kprobes_all_disarmed' is set, 'orig_p'
   1710			 * should have already been disarmed, so
   1711			 * skip unneed disarming process.
   1712			 */
   1713			if (!kprobes_all_disarmed) {
   1714				ret = disarm_kprobe(orig_p, true);
   1715				if (ret) {
   1716					p->flags &= ~KPROBE_FLAG_DISABLED;
   1717					return ERR_PTR(ret);
   1718				}
   1719			}
   1720			orig_p->flags |= KPROBE_FLAG_DISABLED;
   1721		}
   1722	}
   1723
   1724	return orig_p;
   1725}
   1726
   1727/*
   1728 * Unregister a kprobe without a scheduler synchronization.
   1729 */
   1730static int __unregister_kprobe_top(struct kprobe *p)
   1731{
   1732	struct kprobe *ap, *list_p;
   1733
   1734	/* Disable kprobe. This will disarm it if needed. */
   1735	ap = __disable_kprobe(p);
   1736	if (IS_ERR(ap))
   1737		return PTR_ERR(ap);
   1738
   1739	if (ap == p)
   1740		/*
   1741		 * This probe is an independent(and non-optimized) kprobe
   1742		 * (not an aggrprobe). Remove from the hash list.
   1743		 */
   1744		goto disarmed;
   1745
   1746	/* Following process expects this probe is an aggrprobe */
   1747	WARN_ON(!kprobe_aggrprobe(ap));
   1748
   1749	if (list_is_singular(&ap->list) && kprobe_disarmed(ap))
   1750		/*
   1751		 * !disarmed could be happen if the probe is under delayed
   1752		 * unoptimizing.
   1753		 */
   1754		goto disarmed;
   1755	else {
   1756		/* If disabling probe has special handlers, update aggrprobe */
   1757		if (p->post_handler && !kprobe_gone(p)) {
   1758			list_for_each_entry(list_p, &ap->list, list) {
   1759				if ((list_p != p) && (list_p->post_handler))
   1760					goto noclean;
   1761			}
   1762			ap->post_handler = NULL;
   1763		}
   1764noclean:
   1765		/*
   1766		 * Remove from the aggrprobe: this path will do nothing in
   1767		 * __unregister_kprobe_bottom().
   1768		 */
   1769		list_del_rcu(&p->list);
   1770		if (!kprobe_disabled(ap) && !kprobes_all_disarmed)
   1771			/*
   1772			 * Try to optimize this probe again, because post
   1773			 * handler may have been changed.
   1774			 */
   1775			optimize_kprobe(ap);
   1776	}
   1777	return 0;
   1778
   1779disarmed:
   1780	hlist_del_rcu(&ap->hlist);
   1781	return 0;
   1782}
   1783
   1784static void __unregister_kprobe_bottom(struct kprobe *p)
   1785{
   1786	struct kprobe *ap;
   1787
   1788	if (list_empty(&p->list))
   1789		/* This is an independent kprobe */
   1790		arch_remove_kprobe(p);
   1791	else if (list_is_singular(&p->list)) {
   1792		/* This is the last child of an aggrprobe */
   1793		ap = list_entry(p->list.next, struct kprobe, list);
   1794		list_del(&p->list);
   1795		free_aggr_kprobe(ap);
   1796	}
   1797	/* Otherwise, do nothing. */
   1798}
   1799
   1800int register_kprobes(struct kprobe **kps, int num)
   1801{
   1802	int i, ret = 0;
   1803
   1804	if (num <= 0)
   1805		return -EINVAL;
   1806	for (i = 0; i < num; i++) {
   1807		ret = register_kprobe(kps[i]);
   1808		if (ret < 0) {
   1809			if (i > 0)
   1810				unregister_kprobes(kps, i);
   1811			break;
   1812		}
   1813	}
   1814	return ret;
   1815}
   1816EXPORT_SYMBOL_GPL(register_kprobes);
   1817
   1818void unregister_kprobe(struct kprobe *p)
   1819{
   1820	unregister_kprobes(&p, 1);
   1821}
   1822EXPORT_SYMBOL_GPL(unregister_kprobe);
   1823
   1824void unregister_kprobes(struct kprobe **kps, int num)
   1825{
   1826	int i;
   1827
   1828	if (num <= 0)
   1829		return;
   1830	mutex_lock(&kprobe_mutex);
   1831	for (i = 0; i < num; i++)
   1832		if (__unregister_kprobe_top(kps[i]) < 0)
   1833			kps[i]->addr = NULL;
   1834	mutex_unlock(&kprobe_mutex);
   1835
   1836	synchronize_rcu();
   1837	for (i = 0; i < num; i++)
   1838		if (kps[i]->addr)
   1839			__unregister_kprobe_bottom(kps[i]);
   1840}
   1841EXPORT_SYMBOL_GPL(unregister_kprobes);
   1842
   1843int __weak kprobe_exceptions_notify(struct notifier_block *self,
   1844					unsigned long val, void *data)
   1845{
   1846	return NOTIFY_DONE;
   1847}
   1848NOKPROBE_SYMBOL(kprobe_exceptions_notify);
   1849
   1850static struct notifier_block kprobe_exceptions_nb = {
   1851	.notifier_call = kprobe_exceptions_notify,
   1852	.priority = 0x7fffffff /* we need to be notified first */
   1853};
   1854
   1855#ifdef CONFIG_KRETPROBES
   1856
   1857#if !defined(CONFIG_KRETPROBE_ON_RETHOOK)
   1858static void free_rp_inst_rcu(struct rcu_head *head)
   1859{
   1860	struct kretprobe_instance *ri = container_of(head, struct kretprobe_instance, rcu);
   1861
   1862	if (refcount_dec_and_test(&ri->rph->ref))
   1863		kfree(ri->rph);
   1864	kfree(ri);
   1865}
   1866NOKPROBE_SYMBOL(free_rp_inst_rcu);
   1867
   1868static void recycle_rp_inst(struct kretprobe_instance *ri)
   1869{
   1870	struct kretprobe *rp = get_kretprobe(ri);
   1871
   1872	if (likely(rp))
   1873		freelist_add(&ri->freelist, &rp->freelist);
   1874	else
   1875		call_rcu(&ri->rcu, free_rp_inst_rcu);
   1876}
   1877NOKPROBE_SYMBOL(recycle_rp_inst);
   1878
   1879/*
   1880 * This function is called from delayed_put_task_struct() when a task is
   1881 * dead and cleaned up to recycle any kretprobe instances associated with
   1882 * this task. These left over instances represent probed functions that
   1883 * have been called but will never return.
   1884 */
   1885void kprobe_flush_task(struct task_struct *tk)
   1886{
   1887	struct kretprobe_instance *ri;
   1888	struct llist_node *node;
   1889
   1890	/* Early boot, not yet initialized. */
   1891	if (unlikely(!kprobes_initialized))
   1892		return;
   1893
   1894	kprobe_busy_begin();
   1895
   1896	node = __llist_del_all(&tk->kretprobe_instances);
   1897	while (node) {
   1898		ri = container_of(node, struct kretprobe_instance, llist);
   1899		node = node->next;
   1900
   1901		recycle_rp_inst(ri);
   1902	}
   1903
   1904	kprobe_busy_end();
   1905}
   1906NOKPROBE_SYMBOL(kprobe_flush_task);
   1907
   1908static inline void free_rp_inst(struct kretprobe *rp)
   1909{
   1910	struct kretprobe_instance *ri;
   1911	struct freelist_node *node;
   1912	int count = 0;
   1913
   1914	node = rp->freelist.head;
   1915	while (node) {
   1916		ri = container_of(node, struct kretprobe_instance, freelist);
   1917		node = node->next;
   1918
   1919		kfree(ri);
   1920		count++;
   1921	}
   1922
   1923	if (refcount_sub_and_test(count, &rp->rph->ref)) {
   1924		kfree(rp->rph);
   1925		rp->rph = NULL;
   1926	}
   1927}
   1928
   1929/* This assumes the 'tsk' is the current task or the is not running. */
   1930static kprobe_opcode_t *__kretprobe_find_ret_addr(struct task_struct *tsk,
   1931						  struct llist_node **cur)
   1932{
   1933	struct kretprobe_instance *ri = NULL;
   1934	struct llist_node *node = *cur;
   1935
   1936	if (!node)
   1937		node = tsk->kretprobe_instances.first;
   1938	else
   1939		node = node->next;
   1940
   1941	while (node) {
   1942		ri = container_of(node, struct kretprobe_instance, llist);
   1943		if (ri->ret_addr != kretprobe_trampoline_addr()) {
   1944			*cur = node;
   1945			return ri->ret_addr;
   1946		}
   1947		node = node->next;
   1948	}
   1949	return NULL;
   1950}
   1951NOKPROBE_SYMBOL(__kretprobe_find_ret_addr);
   1952
   1953/**
   1954 * kretprobe_find_ret_addr -- Find correct return address modified by kretprobe
   1955 * @tsk: Target task
   1956 * @fp: A frame pointer
   1957 * @cur: a storage of the loop cursor llist_node pointer for next call
   1958 *
   1959 * Find the correct return address modified by a kretprobe on @tsk in unsigned
   1960 * long type. If it finds the return address, this returns that address value,
   1961 * or this returns 0.
   1962 * The @tsk must be 'current' or a task which is not running. @fp is a hint
   1963 * to get the currect return address - which is compared with the
   1964 * kretprobe_instance::fp field. The @cur is a loop cursor for searching the
   1965 * kretprobe return addresses on the @tsk. The '*@cur' should be NULL at the
   1966 * first call, but '@cur' itself must NOT NULL.
   1967 */
   1968unsigned long kretprobe_find_ret_addr(struct task_struct *tsk, void *fp,
   1969				      struct llist_node **cur)
   1970{
   1971	struct kretprobe_instance *ri = NULL;
   1972	kprobe_opcode_t *ret;
   1973
   1974	if (WARN_ON_ONCE(!cur))
   1975		return 0;
   1976
   1977	do {
   1978		ret = __kretprobe_find_ret_addr(tsk, cur);
   1979		if (!ret)
   1980			break;
   1981		ri = container_of(*cur, struct kretprobe_instance, llist);
   1982	} while (ri->fp != fp);
   1983
   1984	return (unsigned long)ret;
   1985}
   1986NOKPROBE_SYMBOL(kretprobe_find_ret_addr);
   1987
   1988void __weak arch_kretprobe_fixup_return(struct pt_regs *regs,
   1989					kprobe_opcode_t *correct_ret_addr)
   1990{
   1991	/*
   1992	 * Do nothing by default. Please fill this to update the fake return
   1993	 * address on the stack with the correct one on each arch if possible.
   1994	 */
   1995}
   1996
   1997unsigned long __kretprobe_trampoline_handler(struct pt_regs *regs,
   1998					     void *frame_pointer)
   1999{
   2000	kprobe_opcode_t *correct_ret_addr = NULL;
   2001	struct kretprobe_instance *ri = NULL;
   2002	struct llist_node *first, *node = NULL;
   2003	struct kretprobe *rp;
   2004
   2005	/* Find correct address and all nodes for this frame. */
   2006	correct_ret_addr = __kretprobe_find_ret_addr(current, &node);
   2007	if (!correct_ret_addr) {
   2008		pr_err("kretprobe: Return address not found, not execute handler. Maybe there is a bug in the kernel.\n");
   2009		BUG_ON(1);
   2010	}
   2011
   2012	/*
   2013	 * Set the return address as the instruction pointer, because if the
   2014	 * user handler calls stack_trace_save_regs() with this 'regs',
   2015	 * the stack trace will start from the instruction pointer.
   2016	 */
   2017	instruction_pointer_set(regs, (unsigned long)correct_ret_addr);
   2018
   2019	/* Run the user handler of the nodes. */
   2020	first = current->kretprobe_instances.first;
   2021	while (first) {
   2022		ri = container_of(first, struct kretprobe_instance, llist);
   2023
   2024		if (WARN_ON_ONCE(ri->fp != frame_pointer))
   2025			break;
   2026
   2027		rp = get_kretprobe(ri);
   2028		if (rp && rp->handler) {
   2029			struct kprobe *prev = kprobe_running();
   2030
   2031			__this_cpu_write(current_kprobe, &rp->kp);
   2032			ri->ret_addr = correct_ret_addr;
   2033			rp->handler(ri, regs);
   2034			__this_cpu_write(current_kprobe, prev);
   2035		}
   2036		if (first == node)
   2037			break;
   2038
   2039		first = first->next;
   2040	}
   2041
   2042	arch_kretprobe_fixup_return(regs, correct_ret_addr);
   2043
   2044	/* Unlink all nodes for this frame. */
   2045	first = current->kretprobe_instances.first;
   2046	current->kretprobe_instances.first = node->next;
   2047	node->next = NULL;
   2048
   2049	/* Recycle free instances. */
   2050	while (first) {
   2051		ri = container_of(first, struct kretprobe_instance, llist);
   2052		first = first->next;
   2053
   2054		recycle_rp_inst(ri);
   2055	}
   2056
   2057	return (unsigned long)correct_ret_addr;
   2058}
   2059NOKPROBE_SYMBOL(__kretprobe_trampoline_handler)
   2060
   2061/*
   2062 * This kprobe pre_handler is registered with every kretprobe. When probe
   2063 * hits it will set up the return probe.
   2064 */
   2065static int pre_handler_kretprobe(struct kprobe *p, struct pt_regs *regs)
   2066{
   2067	struct kretprobe *rp = container_of(p, struct kretprobe, kp);
   2068	struct kretprobe_instance *ri;
   2069	struct freelist_node *fn;
   2070
   2071	fn = freelist_try_get(&rp->freelist);
   2072	if (!fn) {
   2073		rp->nmissed++;
   2074		return 0;
   2075	}
   2076
   2077	ri = container_of(fn, struct kretprobe_instance, freelist);
   2078
   2079	if (rp->entry_handler && rp->entry_handler(ri, regs)) {
   2080		freelist_add(&ri->freelist, &rp->freelist);
   2081		return 0;
   2082	}
   2083
   2084	arch_prepare_kretprobe(ri, regs);
   2085
   2086	__llist_add(&ri->llist, &current->kretprobe_instances);
   2087
   2088	return 0;
   2089}
   2090NOKPROBE_SYMBOL(pre_handler_kretprobe);
   2091#else /* CONFIG_KRETPROBE_ON_RETHOOK */
   2092/*
   2093 * This kprobe pre_handler is registered with every kretprobe. When probe
   2094 * hits it will set up the return probe.
   2095 */
   2096static int pre_handler_kretprobe(struct kprobe *p, struct pt_regs *regs)
   2097{
   2098	struct kretprobe *rp = container_of(p, struct kretprobe, kp);
   2099	struct kretprobe_instance *ri;
   2100	struct rethook_node *rhn;
   2101
   2102	rhn = rethook_try_get(rp->rh);
   2103	if (!rhn) {
   2104		rp->nmissed++;
   2105		return 0;
   2106	}
   2107
   2108	ri = container_of(rhn, struct kretprobe_instance, node);
   2109
   2110	if (rp->entry_handler && rp->entry_handler(ri, regs))
   2111		rethook_recycle(rhn);
   2112	else
   2113		rethook_hook(rhn, regs, kprobe_ftrace(p));
   2114
   2115	return 0;
   2116}
   2117NOKPROBE_SYMBOL(pre_handler_kretprobe);
   2118
   2119static void kretprobe_rethook_handler(struct rethook_node *rh, void *data,
   2120				      struct pt_regs *regs)
   2121{
   2122	struct kretprobe *rp = (struct kretprobe *)data;
   2123	struct kretprobe_instance *ri;
   2124	struct kprobe_ctlblk *kcb;
   2125
   2126	/* The data must NOT be null. This means rethook data structure is broken. */
   2127	if (WARN_ON_ONCE(!data) || !rp->handler)
   2128		return;
   2129
   2130	__this_cpu_write(current_kprobe, &rp->kp);
   2131	kcb = get_kprobe_ctlblk();
   2132	kcb->kprobe_status = KPROBE_HIT_ACTIVE;
   2133
   2134	ri = container_of(rh, struct kretprobe_instance, node);
   2135	rp->handler(ri, regs);
   2136
   2137	__this_cpu_write(current_kprobe, NULL);
   2138}
   2139NOKPROBE_SYMBOL(kretprobe_rethook_handler);
   2140
   2141#endif /* !CONFIG_KRETPROBE_ON_RETHOOK */
   2142
   2143/**
   2144 * kprobe_on_func_entry() -- check whether given address is function entry
   2145 * @addr: Target address
   2146 * @sym:  Target symbol name
   2147 * @offset: The offset from the symbol or the address
   2148 *
   2149 * This checks whether the given @addr+@offset or @sym+@offset is on the
   2150 * function entry address or not.
   2151 * This returns 0 if it is the function entry, or -EINVAL if it is not.
   2152 * And also it returns -ENOENT if it fails the symbol or address lookup.
   2153 * Caller must pass @addr or @sym (either one must be NULL), or this
   2154 * returns -EINVAL.
   2155 */
   2156int kprobe_on_func_entry(kprobe_opcode_t *addr, const char *sym, unsigned long offset)
   2157{
   2158	bool on_func_entry;
   2159	kprobe_opcode_t *kp_addr = _kprobe_addr(addr, sym, offset, &on_func_entry);
   2160
   2161	if (IS_ERR(kp_addr))
   2162		return PTR_ERR(kp_addr);
   2163
   2164	if (!on_func_entry)
   2165		return -EINVAL;
   2166
   2167	return 0;
   2168}
   2169
   2170int register_kretprobe(struct kretprobe *rp)
   2171{
   2172	int ret;
   2173	struct kretprobe_instance *inst;
   2174	int i;
   2175	void *addr;
   2176
   2177	ret = kprobe_on_func_entry(rp->kp.addr, rp->kp.symbol_name, rp->kp.offset);
   2178	if (ret)
   2179		return ret;
   2180
   2181	/* If only 'rp->kp.addr' is specified, check reregistering kprobes */
   2182	if (rp->kp.addr && warn_kprobe_rereg(&rp->kp))
   2183		return -EINVAL;
   2184
   2185	if (kretprobe_blacklist_size) {
   2186		addr = kprobe_addr(&rp->kp);
   2187		if (IS_ERR(addr))
   2188			return PTR_ERR(addr);
   2189
   2190		for (i = 0; kretprobe_blacklist[i].name != NULL; i++) {
   2191			if (kretprobe_blacklist[i].addr == addr)
   2192				return -EINVAL;
   2193		}
   2194	}
   2195
   2196	if (rp->data_size > KRETPROBE_MAX_DATA_SIZE)
   2197		return -E2BIG;
   2198
   2199	rp->kp.pre_handler = pre_handler_kretprobe;
   2200	rp->kp.post_handler = NULL;
   2201
   2202	/* Pre-allocate memory for max kretprobe instances */
   2203	if (rp->maxactive <= 0) {
   2204#ifdef CONFIG_PREEMPTION
   2205		rp->maxactive = max_t(unsigned int, 10, 2*num_possible_cpus());
   2206#else
   2207		rp->maxactive = num_possible_cpus();
   2208#endif
   2209	}
   2210#ifdef CONFIG_KRETPROBE_ON_RETHOOK
   2211	rp->rh = rethook_alloc((void *)rp, kretprobe_rethook_handler);
   2212	if (!rp->rh)
   2213		return -ENOMEM;
   2214
   2215	for (i = 0; i < rp->maxactive; i++) {
   2216		inst = kzalloc(sizeof(struct kretprobe_instance) +
   2217			       rp->data_size, GFP_KERNEL);
   2218		if (inst == NULL) {
   2219			rethook_free(rp->rh);
   2220			rp->rh = NULL;
   2221			return -ENOMEM;
   2222		}
   2223		rethook_add_node(rp->rh, &inst->node);
   2224	}
   2225	rp->nmissed = 0;
   2226	/* Establish function entry probe point */
   2227	ret = register_kprobe(&rp->kp);
   2228	if (ret != 0) {
   2229		rethook_free(rp->rh);
   2230		rp->rh = NULL;
   2231	}
   2232#else	/* !CONFIG_KRETPROBE_ON_RETHOOK */
   2233	rp->freelist.head = NULL;
   2234	rp->rph = kzalloc(sizeof(struct kretprobe_holder), GFP_KERNEL);
   2235	if (!rp->rph)
   2236		return -ENOMEM;
   2237
   2238	rp->rph->rp = rp;
   2239	for (i = 0; i < rp->maxactive; i++) {
   2240		inst = kzalloc(sizeof(struct kretprobe_instance) +
   2241			       rp->data_size, GFP_KERNEL);
   2242		if (inst == NULL) {
   2243			refcount_set(&rp->rph->ref, i);
   2244			free_rp_inst(rp);
   2245			return -ENOMEM;
   2246		}
   2247		inst->rph = rp->rph;
   2248		freelist_add(&inst->freelist, &rp->freelist);
   2249	}
   2250	refcount_set(&rp->rph->ref, i);
   2251
   2252	rp->nmissed = 0;
   2253	/* Establish function entry probe point */
   2254	ret = register_kprobe(&rp->kp);
   2255	if (ret != 0)
   2256		free_rp_inst(rp);
   2257#endif
   2258	return ret;
   2259}
   2260EXPORT_SYMBOL_GPL(register_kretprobe);
   2261
   2262int register_kretprobes(struct kretprobe **rps, int num)
   2263{
   2264	int ret = 0, i;
   2265
   2266	if (num <= 0)
   2267		return -EINVAL;
   2268	for (i = 0; i < num; i++) {
   2269		ret = register_kretprobe(rps[i]);
   2270		if (ret < 0) {
   2271			if (i > 0)
   2272				unregister_kretprobes(rps, i);
   2273			break;
   2274		}
   2275	}
   2276	return ret;
   2277}
   2278EXPORT_SYMBOL_GPL(register_kretprobes);
   2279
   2280void unregister_kretprobe(struct kretprobe *rp)
   2281{
   2282	unregister_kretprobes(&rp, 1);
   2283}
   2284EXPORT_SYMBOL_GPL(unregister_kretprobe);
   2285
   2286void unregister_kretprobes(struct kretprobe **rps, int num)
   2287{
   2288	int i;
   2289
   2290	if (num <= 0)
   2291		return;
   2292	mutex_lock(&kprobe_mutex);
   2293	for (i = 0; i < num; i++) {
   2294		if (__unregister_kprobe_top(&rps[i]->kp) < 0)
   2295			rps[i]->kp.addr = NULL;
   2296#ifdef CONFIG_KRETPROBE_ON_RETHOOK
   2297		rethook_free(rps[i]->rh);
   2298#else
   2299		rps[i]->rph->rp = NULL;
   2300#endif
   2301	}
   2302	mutex_unlock(&kprobe_mutex);
   2303
   2304	synchronize_rcu();
   2305	for (i = 0; i < num; i++) {
   2306		if (rps[i]->kp.addr) {
   2307			__unregister_kprobe_bottom(&rps[i]->kp);
   2308#ifndef CONFIG_KRETPROBE_ON_RETHOOK
   2309			free_rp_inst(rps[i]);
   2310#endif
   2311		}
   2312	}
   2313}
   2314EXPORT_SYMBOL_GPL(unregister_kretprobes);
   2315
   2316#else /* CONFIG_KRETPROBES */
   2317int register_kretprobe(struct kretprobe *rp)
   2318{
   2319	return -EOPNOTSUPP;
   2320}
   2321EXPORT_SYMBOL_GPL(register_kretprobe);
   2322
   2323int register_kretprobes(struct kretprobe **rps, int num)
   2324{
   2325	return -EOPNOTSUPP;
   2326}
   2327EXPORT_SYMBOL_GPL(register_kretprobes);
   2328
   2329void unregister_kretprobe(struct kretprobe *rp)
   2330{
   2331}
   2332EXPORT_SYMBOL_GPL(unregister_kretprobe);
   2333
   2334void unregister_kretprobes(struct kretprobe **rps, int num)
   2335{
   2336}
   2337EXPORT_SYMBOL_GPL(unregister_kretprobes);
   2338
   2339static int pre_handler_kretprobe(struct kprobe *p, struct pt_regs *regs)
   2340{
   2341	return 0;
   2342}
   2343NOKPROBE_SYMBOL(pre_handler_kretprobe);
   2344
   2345#endif /* CONFIG_KRETPROBES */
   2346
   2347/* Set the kprobe gone and remove its instruction buffer. */
   2348static void kill_kprobe(struct kprobe *p)
   2349{
   2350	struct kprobe *kp;
   2351
   2352	lockdep_assert_held(&kprobe_mutex);
   2353
   2354	p->flags |= KPROBE_FLAG_GONE;
   2355	if (kprobe_aggrprobe(p)) {
   2356		/*
   2357		 * If this is an aggr_kprobe, we have to list all the
   2358		 * chained probes and mark them GONE.
   2359		 */
   2360		list_for_each_entry(kp, &p->list, list)
   2361			kp->flags |= KPROBE_FLAG_GONE;
   2362		p->post_handler = NULL;
   2363		kill_optimized_kprobe(p);
   2364	}
   2365	/*
   2366	 * Here, we can remove insn_slot safely, because no thread calls
   2367	 * the original probed function (which will be freed soon) any more.
   2368	 */
   2369	arch_remove_kprobe(p);
   2370
   2371	/*
   2372	 * The module is going away. We should disarm the kprobe which
   2373	 * is using ftrace, because ftrace framework is still available at
   2374	 * 'MODULE_STATE_GOING' notification.
   2375	 */
   2376	if (kprobe_ftrace(p) && !kprobe_disabled(p) && !kprobes_all_disarmed)
   2377		disarm_kprobe_ftrace(p);
   2378}
   2379
   2380/* Disable one kprobe */
   2381int disable_kprobe(struct kprobe *kp)
   2382{
   2383	int ret = 0;
   2384	struct kprobe *p;
   2385
   2386	mutex_lock(&kprobe_mutex);
   2387
   2388	/* Disable this kprobe */
   2389	p = __disable_kprobe(kp);
   2390	if (IS_ERR(p))
   2391		ret = PTR_ERR(p);
   2392
   2393	mutex_unlock(&kprobe_mutex);
   2394	return ret;
   2395}
   2396EXPORT_SYMBOL_GPL(disable_kprobe);
   2397
   2398/* Enable one kprobe */
   2399int enable_kprobe(struct kprobe *kp)
   2400{
   2401	int ret = 0;
   2402	struct kprobe *p;
   2403
   2404	mutex_lock(&kprobe_mutex);
   2405
   2406	/* Check whether specified probe is valid. */
   2407	p = __get_valid_kprobe(kp);
   2408	if (unlikely(p == NULL)) {
   2409		ret = -EINVAL;
   2410		goto out;
   2411	}
   2412
   2413	if (kprobe_gone(kp)) {
   2414		/* This kprobe has gone, we couldn't enable it. */
   2415		ret = -EINVAL;
   2416		goto out;
   2417	}
   2418
   2419	if (p != kp)
   2420		kp->flags &= ~KPROBE_FLAG_DISABLED;
   2421
   2422	if (!kprobes_all_disarmed && kprobe_disabled(p)) {
   2423		p->flags &= ~KPROBE_FLAG_DISABLED;
   2424		ret = arm_kprobe(p);
   2425		if (ret)
   2426			p->flags |= KPROBE_FLAG_DISABLED;
   2427	}
   2428out:
   2429	mutex_unlock(&kprobe_mutex);
   2430	return ret;
   2431}
   2432EXPORT_SYMBOL_GPL(enable_kprobe);
   2433
   2434/* Caller must NOT call this in usual path. This is only for critical case */
   2435void dump_kprobe(struct kprobe *kp)
   2436{
   2437	pr_err("Dump kprobe:\n.symbol_name = %s, .offset = %x, .addr = %pS\n",
   2438	       kp->symbol_name, kp->offset, kp->addr);
   2439}
   2440NOKPROBE_SYMBOL(dump_kprobe);
   2441
   2442int kprobe_add_ksym_blacklist(unsigned long entry)
   2443{
   2444	struct kprobe_blacklist_entry *ent;
   2445	unsigned long offset = 0, size = 0;
   2446
   2447	if (!kernel_text_address(entry) ||
   2448	    !kallsyms_lookup_size_offset(entry, &size, &offset))
   2449		return -EINVAL;
   2450
   2451	ent = kmalloc(sizeof(*ent), GFP_KERNEL);
   2452	if (!ent)
   2453		return -ENOMEM;
   2454	ent->start_addr = entry;
   2455	ent->end_addr = entry + size;
   2456	INIT_LIST_HEAD(&ent->list);
   2457	list_add_tail(&ent->list, &kprobe_blacklist);
   2458
   2459	return (int)size;
   2460}
   2461
   2462/* Add all symbols in given area into kprobe blacklist */
   2463int kprobe_add_area_blacklist(unsigned long start, unsigned long end)
   2464{
   2465	unsigned long entry;
   2466	int ret = 0;
   2467
   2468	for (entry = start; entry < end; entry += ret) {
   2469		ret = kprobe_add_ksym_blacklist(entry);
   2470		if (ret < 0)
   2471			return ret;
   2472		if (ret == 0)	/* In case of alias symbol */
   2473			ret = 1;
   2474	}
   2475	return 0;
   2476}
   2477
   2478/* Remove all symbols in given area from kprobe blacklist */
   2479static void kprobe_remove_area_blacklist(unsigned long start, unsigned long end)
   2480{
   2481	struct kprobe_blacklist_entry *ent, *n;
   2482
   2483	list_for_each_entry_safe(ent, n, &kprobe_blacklist, list) {
   2484		if (ent->start_addr < start || ent->start_addr >= end)
   2485			continue;
   2486		list_del(&ent->list);
   2487		kfree(ent);
   2488	}
   2489}
   2490
   2491static void kprobe_remove_ksym_blacklist(unsigned long entry)
   2492{
   2493	kprobe_remove_area_blacklist(entry, entry + 1);
   2494}
   2495
   2496int __weak arch_kprobe_get_kallsym(unsigned int *symnum, unsigned long *value,
   2497				   char *type, char *sym)
   2498{
   2499	return -ERANGE;
   2500}
   2501
   2502int kprobe_get_kallsym(unsigned int symnum, unsigned long *value, char *type,
   2503		       char *sym)
   2504{
   2505#ifdef __ARCH_WANT_KPROBES_INSN_SLOT
   2506	if (!kprobe_cache_get_kallsym(&kprobe_insn_slots, &symnum, value, type, sym))
   2507		return 0;
   2508#ifdef CONFIG_OPTPROBES
   2509	if (!kprobe_cache_get_kallsym(&kprobe_optinsn_slots, &symnum, value, type, sym))
   2510		return 0;
   2511#endif
   2512#endif
   2513	if (!arch_kprobe_get_kallsym(&symnum, value, type, sym))
   2514		return 0;
   2515	return -ERANGE;
   2516}
   2517
   2518int __init __weak arch_populate_kprobe_blacklist(void)
   2519{
   2520	return 0;
   2521}
   2522
   2523/*
   2524 * Lookup and populate the kprobe_blacklist.
   2525 *
   2526 * Unlike the kretprobe blacklist, we'll need to determine
   2527 * the range of addresses that belong to the said functions,
   2528 * since a kprobe need not necessarily be at the beginning
   2529 * of a function.
   2530 */
   2531static int __init populate_kprobe_blacklist(unsigned long *start,
   2532					     unsigned long *end)
   2533{
   2534	unsigned long entry;
   2535	unsigned long *iter;
   2536	int ret;
   2537
   2538	for (iter = start; iter < end; iter++) {
   2539		entry = (unsigned long)dereference_symbol_descriptor((void *)*iter);
   2540		ret = kprobe_add_ksym_blacklist(entry);
   2541		if (ret == -EINVAL)
   2542			continue;
   2543		if (ret < 0)
   2544			return ret;
   2545	}
   2546
   2547	/* Symbols in '__kprobes_text' are blacklisted */
   2548	ret = kprobe_add_area_blacklist((unsigned long)__kprobes_text_start,
   2549					(unsigned long)__kprobes_text_end);
   2550	if (ret)
   2551		return ret;
   2552
   2553	/* Symbols in 'noinstr' section are blacklisted */
   2554	ret = kprobe_add_area_blacklist((unsigned long)__noinstr_text_start,
   2555					(unsigned long)__noinstr_text_end);
   2556
   2557	return ret ? : arch_populate_kprobe_blacklist();
   2558}
   2559
   2560static void add_module_kprobe_blacklist(struct module *mod)
   2561{
   2562	unsigned long start, end;
   2563	int i;
   2564
   2565	if (mod->kprobe_blacklist) {
   2566		for (i = 0; i < mod->num_kprobe_blacklist; i++)
   2567			kprobe_add_ksym_blacklist(mod->kprobe_blacklist[i]);
   2568	}
   2569
   2570	start = (unsigned long)mod->kprobes_text_start;
   2571	if (start) {
   2572		end = start + mod->kprobes_text_size;
   2573		kprobe_add_area_blacklist(start, end);
   2574	}
   2575
   2576	start = (unsigned long)mod->noinstr_text_start;
   2577	if (start) {
   2578		end = start + mod->noinstr_text_size;
   2579		kprobe_add_area_blacklist(start, end);
   2580	}
   2581}
   2582
   2583static void remove_module_kprobe_blacklist(struct module *mod)
   2584{
   2585	unsigned long start, end;
   2586	int i;
   2587
   2588	if (mod->kprobe_blacklist) {
   2589		for (i = 0; i < mod->num_kprobe_blacklist; i++)
   2590			kprobe_remove_ksym_blacklist(mod->kprobe_blacklist[i]);
   2591	}
   2592
   2593	start = (unsigned long)mod->kprobes_text_start;
   2594	if (start) {
   2595		end = start + mod->kprobes_text_size;
   2596		kprobe_remove_area_blacklist(start, end);
   2597	}
   2598
   2599	start = (unsigned long)mod->noinstr_text_start;
   2600	if (start) {
   2601		end = start + mod->noinstr_text_size;
   2602		kprobe_remove_area_blacklist(start, end);
   2603	}
   2604}
   2605
   2606/* Module notifier call back, checking kprobes on the module */
   2607static int kprobes_module_callback(struct notifier_block *nb,
   2608				   unsigned long val, void *data)
   2609{
   2610	struct module *mod = data;
   2611	struct hlist_head *head;
   2612	struct kprobe *p;
   2613	unsigned int i;
   2614	int checkcore = (val == MODULE_STATE_GOING);
   2615
   2616	if (val == MODULE_STATE_COMING) {
   2617		mutex_lock(&kprobe_mutex);
   2618		add_module_kprobe_blacklist(mod);
   2619		mutex_unlock(&kprobe_mutex);
   2620	}
   2621	if (val != MODULE_STATE_GOING && val != MODULE_STATE_LIVE)
   2622		return NOTIFY_DONE;
   2623
   2624	/*
   2625	 * When 'MODULE_STATE_GOING' was notified, both of module '.text' and
   2626	 * '.init.text' sections would be freed. When 'MODULE_STATE_LIVE' was
   2627	 * notified, only '.init.text' section would be freed. We need to
   2628	 * disable kprobes which have been inserted in the sections.
   2629	 */
   2630	mutex_lock(&kprobe_mutex);
   2631	for (i = 0; i < KPROBE_TABLE_SIZE; i++) {
   2632		head = &kprobe_table[i];
   2633		hlist_for_each_entry(p, head, hlist)
   2634			if (within_module_init((unsigned long)p->addr, mod) ||
   2635			    (checkcore &&
   2636			     within_module_core((unsigned long)p->addr, mod))) {
   2637				/*
   2638				 * The vaddr this probe is installed will soon
   2639				 * be vfreed buy not synced to disk. Hence,
   2640				 * disarming the breakpoint isn't needed.
   2641				 *
   2642				 * Note, this will also move any optimized probes
   2643				 * that are pending to be removed from their
   2644				 * corresponding lists to the 'freeing_list' and
   2645				 * will not be touched by the delayed
   2646				 * kprobe_optimizer() work handler.
   2647				 */
   2648				kill_kprobe(p);
   2649			}
   2650	}
   2651	if (val == MODULE_STATE_GOING)
   2652		remove_module_kprobe_blacklist(mod);
   2653	mutex_unlock(&kprobe_mutex);
   2654	return NOTIFY_DONE;
   2655}
   2656
   2657static struct notifier_block kprobe_module_nb = {
   2658	.notifier_call = kprobes_module_callback,
   2659	.priority = 0
   2660};
   2661
   2662void kprobe_free_init_mem(void)
   2663{
   2664	void *start = (void *)(&__init_begin);
   2665	void *end = (void *)(&__init_end);
   2666	struct hlist_head *head;
   2667	struct kprobe *p;
   2668	int i;
   2669
   2670	mutex_lock(&kprobe_mutex);
   2671
   2672	/* Kill all kprobes on initmem because the target code has been freed. */
   2673	for (i = 0; i < KPROBE_TABLE_SIZE; i++) {
   2674		head = &kprobe_table[i];
   2675		hlist_for_each_entry(p, head, hlist) {
   2676			if (start <= (void *)p->addr && (void *)p->addr < end)
   2677				kill_kprobe(p);
   2678		}
   2679	}
   2680
   2681	mutex_unlock(&kprobe_mutex);
   2682}
   2683
   2684static int __init init_kprobes(void)
   2685{
   2686	int i, err = 0;
   2687
   2688	/* FIXME allocate the probe table, currently defined statically */
   2689	/* initialize all list heads */
   2690	for (i = 0; i < KPROBE_TABLE_SIZE; i++)
   2691		INIT_HLIST_HEAD(&kprobe_table[i]);
   2692
   2693	err = populate_kprobe_blacklist(__start_kprobe_blacklist,
   2694					__stop_kprobe_blacklist);
   2695	if (err)
   2696		pr_err("Failed to populate blacklist (error %d), kprobes not restricted, be careful using them!\n", err);
   2697
   2698	if (kretprobe_blacklist_size) {
   2699		/* lookup the function address from its name */
   2700		for (i = 0; kretprobe_blacklist[i].name != NULL; i++) {
   2701			kretprobe_blacklist[i].addr =
   2702				kprobe_lookup_name(kretprobe_blacklist[i].name, 0);
   2703			if (!kretprobe_blacklist[i].addr)
   2704				pr_err("Failed to lookup symbol '%s' for kretprobe blacklist. Maybe the target function is removed or renamed.\n",
   2705				       kretprobe_blacklist[i].name);
   2706		}
   2707	}
   2708
   2709	/* By default, kprobes are armed */
   2710	kprobes_all_disarmed = false;
   2711
   2712#if defined(CONFIG_OPTPROBES) && defined(__ARCH_WANT_KPROBES_INSN_SLOT)
   2713	/* Init 'kprobe_optinsn_slots' for allocation */
   2714	kprobe_optinsn_slots.insn_size = MAX_OPTINSN_SIZE;
   2715#endif
   2716
   2717	err = arch_init_kprobes();
   2718	if (!err)
   2719		err = register_die_notifier(&kprobe_exceptions_nb);
   2720	if (!err)
   2721		err = register_module_notifier(&kprobe_module_nb);
   2722
   2723	kprobes_initialized = (err == 0);
   2724	kprobe_sysctls_init();
   2725	return err;
   2726}
   2727early_initcall(init_kprobes);
   2728
   2729#if defined(CONFIG_OPTPROBES)
   2730static int __init init_optprobes(void)
   2731{
   2732	/*
   2733	 * Enable kprobe optimization - this kicks the optimizer which
   2734	 * depends on synchronize_rcu_tasks() and ksoftirqd, that is
   2735	 * not spawned in early initcall. So delay the optimization.
   2736	 */
   2737	optimize_all_kprobes();
   2738
   2739	return 0;
   2740}
   2741subsys_initcall(init_optprobes);
   2742#endif
   2743
   2744#ifdef CONFIG_DEBUG_FS
   2745static void report_probe(struct seq_file *pi, struct kprobe *p,
   2746		const char *sym, int offset, char *modname, struct kprobe *pp)
   2747{
   2748	char *kprobe_type;
   2749	void *addr = p->addr;
   2750
   2751	if (p->pre_handler == pre_handler_kretprobe)
   2752		kprobe_type = "r";
   2753	else
   2754		kprobe_type = "k";
   2755
   2756	if (!kallsyms_show_value(pi->file->f_cred))
   2757		addr = NULL;
   2758
   2759	if (sym)
   2760		seq_printf(pi, "%px  %s  %s+0x%x  %s ",
   2761			addr, kprobe_type, sym, offset,
   2762			(modname ? modname : " "));
   2763	else	/* try to use %pS */
   2764		seq_printf(pi, "%px  %s  %pS ",
   2765			addr, kprobe_type, p->addr);
   2766
   2767	if (!pp)
   2768		pp = p;
   2769	seq_printf(pi, "%s%s%s%s\n",
   2770		(kprobe_gone(p) ? "[GONE]" : ""),
   2771		((kprobe_disabled(p) && !kprobe_gone(p)) ?  "[DISABLED]" : ""),
   2772		(kprobe_optimized(pp) ? "[OPTIMIZED]" : ""),
   2773		(kprobe_ftrace(pp) ? "[FTRACE]" : ""));
   2774}
   2775
   2776static void *kprobe_seq_start(struct seq_file *f, loff_t *pos)
   2777{
   2778	return (*pos < KPROBE_TABLE_SIZE) ? pos : NULL;
   2779}
   2780
   2781static void *kprobe_seq_next(struct seq_file *f, void *v, loff_t *pos)
   2782{
   2783	(*pos)++;
   2784	if (*pos >= KPROBE_TABLE_SIZE)
   2785		return NULL;
   2786	return pos;
   2787}
   2788
   2789static void kprobe_seq_stop(struct seq_file *f, void *v)
   2790{
   2791	/* Nothing to do */
   2792}
   2793
   2794static int show_kprobe_addr(struct seq_file *pi, void *v)
   2795{
   2796	struct hlist_head *head;
   2797	struct kprobe *p, *kp;
   2798	const char *sym = NULL;
   2799	unsigned int i = *(loff_t *) v;
   2800	unsigned long offset = 0;
   2801	char *modname, namebuf[KSYM_NAME_LEN];
   2802
   2803	head = &kprobe_table[i];
   2804	preempt_disable();
   2805	hlist_for_each_entry_rcu(p, head, hlist) {
   2806		sym = kallsyms_lookup((unsigned long)p->addr, NULL,
   2807					&offset, &modname, namebuf);
   2808		if (kprobe_aggrprobe(p)) {
   2809			list_for_each_entry_rcu(kp, &p->list, list)
   2810				report_probe(pi, kp, sym, offset, modname, p);
   2811		} else
   2812			report_probe(pi, p, sym, offset, modname, NULL);
   2813	}
   2814	preempt_enable();
   2815	return 0;
   2816}
   2817
   2818static const struct seq_operations kprobes_sops = {
   2819	.start = kprobe_seq_start,
   2820	.next  = kprobe_seq_next,
   2821	.stop  = kprobe_seq_stop,
   2822	.show  = show_kprobe_addr
   2823};
   2824
   2825DEFINE_SEQ_ATTRIBUTE(kprobes);
   2826
   2827/* kprobes/blacklist -- shows which functions can not be probed */
   2828static void *kprobe_blacklist_seq_start(struct seq_file *m, loff_t *pos)
   2829{
   2830	mutex_lock(&kprobe_mutex);
   2831	return seq_list_start(&kprobe_blacklist, *pos);
   2832}
   2833
   2834static void *kprobe_blacklist_seq_next(struct seq_file *m, void *v, loff_t *pos)
   2835{
   2836	return seq_list_next(v, &kprobe_blacklist, pos);
   2837}
   2838
   2839static int kprobe_blacklist_seq_show(struct seq_file *m, void *v)
   2840{
   2841	struct kprobe_blacklist_entry *ent =
   2842		list_entry(v, struct kprobe_blacklist_entry, list);
   2843
   2844	/*
   2845	 * If '/proc/kallsyms' is not showing kernel address, we won't
   2846	 * show them here either.
   2847	 */
   2848	if (!kallsyms_show_value(m->file->f_cred))
   2849		seq_printf(m, "0x%px-0x%px\t%ps\n", NULL, NULL,
   2850			   (void *)ent->start_addr);
   2851	else
   2852		seq_printf(m, "0x%px-0x%px\t%ps\n", (void *)ent->start_addr,
   2853			   (void *)ent->end_addr, (void *)ent->start_addr);
   2854	return 0;
   2855}
   2856
   2857static void kprobe_blacklist_seq_stop(struct seq_file *f, void *v)
   2858{
   2859	mutex_unlock(&kprobe_mutex);
   2860}
   2861
   2862static const struct seq_operations kprobe_blacklist_sops = {
   2863	.start = kprobe_blacklist_seq_start,
   2864	.next  = kprobe_blacklist_seq_next,
   2865	.stop  = kprobe_blacklist_seq_stop,
   2866	.show  = kprobe_blacklist_seq_show,
   2867};
   2868DEFINE_SEQ_ATTRIBUTE(kprobe_blacklist);
   2869
   2870static int arm_all_kprobes(void)
   2871{
   2872	struct hlist_head *head;
   2873	struct kprobe *p;
   2874	unsigned int i, total = 0, errors = 0;
   2875	int err, ret = 0;
   2876
   2877	mutex_lock(&kprobe_mutex);
   2878
   2879	/* If kprobes are armed, just return */
   2880	if (!kprobes_all_disarmed)
   2881		goto already_enabled;
   2882
   2883	/*
   2884	 * optimize_kprobe() called by arm_kprobe() checks
   2885	 * kprobes_all_disarmed, so set kprobes_all_disarmed before
   2886	 * arm_kprobe.
   2887	 */
   2888	kprobes_all_disarmed = false;
   2889	/* Arming kprobes doesn't optimize kprobe itself */
   2890	for (i = 0; i < KPROBE_TABLE_SIZE; i++) {
   2891		head = &kprobe_table[i];
   2892		/* Arm all kprobes on a best-effort basis */
   2893		hlist_for_each_entry(p, head, hlist) {
   2894			if (!kprobe_disabled(p)) {
   2895				err = arm_kprobe(p);
   2896				if (err)  {
   2897					errors++;
   2898					ret = err;
   2899				}
   2900				total++;
   2901			}
   2902		}
   2903	}
   2904
   2905	if (errors)
   2906		pr_warn("Kprobes globally enabled, but failed to enable %d out of %d probes. Please check which kprobes are kept disabled via debugfs.\n",
   2907			errors, total);
   2908	else
   2909		pr_info("Kprobes globally enabled\n");
   2910
   2911already_enabled:
   2912	mutex_unlock(&kprobe_mutex);
   2913	return ret;
   2914}
   2915
   2916static int disarm_all_kprobes(void)
   2917{
   2918	struct hlist_head *head;
   2919	struct kprobe *p;
   2920	unsigned int i, total = 0, errors = 0;
   2921	int err, ret = 0;
   2922
   2923	mutex_lock(&kprobe_mutex);
   2924
   2925	/* If kprobes are already disarmed, just return */
   2926	if (kprobes_all_disarmed) {
   2927		mutex_unlock(&kprobe_mutex);
   2928		return 0;
   2929	}
   2930
   2931	kprobes_all_disarmed = true;
   2932
   2933	for (i = 0; i < KPROBE_TABLE_SIZE; i++) {
   2934		head = &kprobe_table[i];
   2935		/* Disarm all kprobes on a best-effort basis */
   2936		hlist_for_each_entry(p, head, hlist) {
   2937			if (!arch_trampoline_kprobe(p) && !kprobe_disabled(p)) {
   2938				err = disarm_kprobe(p, false);
   2939				if (err) {
   2940					errors++;
   2941					ret = err;
   2942				}
   2943				total++;
   2944			}
   2945		}
   2946	}
   2947
   2948	if (errors)
   2949		pr_warn("Kprobes globally disabled, but failed to disable %d out of %d probes. Please check which kprobes are kept enabled via debugfs.\n",
   2950			errors, total);
   2951	else
   2952		pr_info("Kprobes globally disabled\n");
   2953
   2954	mutex_unlock(&kprobe_mutex);
   2955
   2956	/* Wait for disarming all kprobes by optimizer */
   2957	wait_for_kprobe_optimizer();
   2958
   2959	return ret;
   2960}
   2961
   2962/*
   2963 * XXX: The debugfs bool file interface doesn't allow for callbacks
   2964 * when the bool state is switched. We can reuse that facility when
   2965 * available
   2966 */
   2967static ssize_t read_enabled_file_bool(struct file *file,
   2968	       char __user *user_buf, size_t count, loff_t *ppos)
   2969{
   2970	char buf[3];
   2971
   2972	if (!kprobes_all_disarmed)
   2973		buf[0] = '1';
   2974	else
   2975		buf[0] = '0';
   2976	buf[1] = '\n';
   2977	buf[2] = 0x00;
   2978	return simple_read_from_buffer(user_buf, count, ppos, buf, 2);
   2979}
   2980
   2981static ssize_t write_enabled_file_bool(struct file *file,
   2982	       const char __user *user_buf, size_t count, loff_t *ppos)
   2983{
   2984	bool enable;
   2985	int ret;
   2986
   2987	ret = kstrtobool_from_user(user_buf, count, &enable);
   2988	if (ret)
   2989		return ret;
   2990
   2991	ret = enable ? arm_all_kprobes() : disarm_all_kprobes();
   2992	if (ret)
   2993		return ret;
   2994
   2995	return count;
   2996}
   2997
   2998static const struct file_operations fops_kp = {
   2999	.read =         read_enabled_file_bool,
   3000	.write =        write_enabled_file_bool,
   3001	.llseek =	default_llseek,
   3002};
   3003
   3004static int __init debugfs_kprobe_init(void)
   3005{
   3006	struct dentry *dir;
   3007
   3008	dir = debugfs_create_dir("kprobes", NULL);
   3009
   3010	debugfs_create_file("list", 0400, dir, NULL, &kprobes_fops);
   3011
   3012	debugfs_create_file("enabled", 0600, dir, NULL, &fops_kp);
   3013
   3014	debugfs_create_file("blacklist", 0400, dir, NULL,
   3015			    &kprobe_blacklist_fops);
   3016
   3017	return 0;
   3018}
   3019
   3020late_initcall(debugfs_kprobe_init);
   3021#endif /* CONFIG_DEBUG_FS */