From b3e55c727ff7349c5db722fbdb8d99a151e8e0bf Mon Sep 17 00:00:00 2001 From: "Mao, Bibo" Date: Mon, 12 Dec 2005 00:37:00 -0800 Subject: [PATCH] Kprobes: Reference count the modules when probed on it When a Kprobes are inserted/removed on a modules, the modules must be ref counted so as not to allow to unload while probes are registered on that module. Without this patch, the probed module is free to unload, and when the probing module unregister the probe, the kpobes code while trying to replace the original instruction might crash. Signed-off-by: Anil S Keshavamurthy Signed-off-by: Mao Bibo Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/kprobes.c | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) (limited to 'kernel') diff --git a/kernel/kprobes.c b/kernel/kprobes.c index 5beda378cc75..fde5a16a2913 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c @@ -462,9 +462,16 @@ int __kprobes register_kprobe(struct kprobe *p) int ret = 0; unsigned long flags = 0; struct kprobe *old_p; + struct module *mod; + + if ((!kernel_text_address((unsigned long) p->addr)) || + in_kprobes_functions((unsigned long) p->addr)) + return -EINVAL; + + if ((mod = module_text_address((unsigned long) p->addr)) && + (unlikely(!try_module_get(mod)))) + return -EINVAL; - if ((ret = in_kprobes_functions((unsigned long) p->addr)) != 0) - return ret; if ((ret = arch_prepare_kprobe(p)) != 0) goto rm_kprobe; @@ -488,6 +495,8 @@ out: rm_kprobe: if (ret == -EEXIST) arch_remove_kprobe(p); + if (ret && mod) + module_put(mod); return ret; } @@ -495,6 +504,7 @@ void __kprobes unregister_kprobe(struct kprobe *p) { unsigned long flags; struct kprobe *old_p; + struct module *mod; spin_lock_irqsave(&kprobe_lock, flags); old_p = get_kprobe(p->addr); @@ -506,6 +516,10 @@ void __kprobes unregister_kprobe(struct kprobe *p) cleanup_kprobe(p, flags); synchronize_sched(); + + if ((mod = module_text_address((unsigned long)p->addr))) + module_put(mod); + if (old_p->pre_handler == aggr_pre_handler && list_empty(&old_p->list)) kfree(old_p); -- cgit v1.2.3-71-gd317 From ab4720ec76b756e1f8705e207a7b392b0453afd6 Mon Sep 17 00:00:00 2001 From: Dipankar Sarma Date: Mon, 12 Dec 2005 00:37:05 -0800 Subject: [PATCH] add rcu_barrier() synchronization point This introduces a new interface - rcu_barrier() which waits until all the RCUs queued until this call have been completed. Reiser4 needs this, because we do more than just freeing memory object in our RCU callback: we also remove it from the list hanging off super-block. This means, that before freeing reiser4-specific portion of super-block (during umount) we have to wait until all pending RCU callbacks are executed. The only change of reiser4 made to the original patch, is exporting of rcu_barrier(). Cc: Hans Reiser Cc: Vladimir V. Saveliev Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/rcupdate.h | 2 ++ kernel/rcupdate.c | 41 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 43 insertions(+) (limited to 'kernel') diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index cce25591eec2..a471f3bb713e 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -100,6 +100,7 @@ struct rcu_data { struct rcu_head *donelist; struct rcu_head **donetail; int cpu; + struct rcu_head barrier; }; DECLARE_PER_CPU(struct rcu_data, rcu_data); @@ -285,6 +286,7 @@ extern void FASTCALL(call_rcu_bh(struct rcu_head *head, extern __deprecated_for_modules void synchronize_kernel(void); extern void synchronize_rcu(void); void synchronize_idle(void); +extern void rcu_barrier(void); #endif /* __KERNEL__ */ #endif /* __LINUX_RCUPDATE_H */ diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c index c4d159a21e04..f45b91723dc6 100644 --- a/kernel/rcupdate.c +++ b/kernel/rcupdate.c @@ -116,6 +116,10 @@ void fastcall call_rcu(struct rcu_head *head, local_irq_restore(flags); } +static atomic_t rcu_barrier_cpu_count; +static struct semaphore rcu_barrier_sema; +static struct completion rcu_barrier_completion; + /** * call_rcu_bh - Queue an RCU for invocation after a quicker grace period. * @head: structure to be used for queueing the RCU updates. @@ -162,6 +166,42 @@ long rcu_batches_completed(void) return rcu_ctrlblk.completed; } +static void rcu_barrier_callback(struct rcu_head *notused) +{ + if (atomic_dec_and_test(&rcu_barrier_cpu_count)) + complete(&rcu_barrier_completion); +} + +/* + * Called with preemption disabled, and from cross-cpu IRQ context. + */ +static void rcu_barrier_func(void *notused) +{ + int cpu = smp_processor_id(); + struct rcu_data *rdp = &per_cpu(rcu_data, cpu); + struct rcu_head *head; + + head = &rdp->barrier; + atomic_inc(&rcu_barrier_cpu_count); + call_rcu(head, rcu_barrier_callback); +} + +/** + * rcu_barrier - Wait until all the in-flight RCUs are complete. + */ +void rcu_barrier(void) +{ + BUG_ON(in_interrupt()); + /* Take cpucontrol semaphore to protect against CPU hotplug */ + down(&rcu_barrier_sema); + init_completion(&rcu_barrier_completion); + atomic_set(&rcu_barrier_cpu_count, 0); + on_each_cpu(rcu_barrier_func, NULL, 0, 1); + wait_for_completion(&rcu_barrier_completion); + up(&rcu_barrier_sema); +} +EXPORT_SYMBOL_GPL(rcu_barrier); + /* * Invoke the completed RCU callbacks. They are expected to be in * a per-cpu list. @@ -457,6 +497,7 @@ static struct notifier_block __devinitdata rcu_nb = { */ void __init rcu_init(void) { + sema_init(&rcu_barrier_sema, 1); rcu_cpu_notify(&rcu_nb, CPU_UP_PREPARE, (void *)(long)smp_processor_id()); /* Register notifier for non-boot CPUs */ -- cgit v1.2.3-71-gd317 From 89d46b8778f65223f732d82c0166e0abba20fb1e Mon Sep 17 00:00:00 2001 From: Srivatsa Vaddagiri Date: Mon, 12 Dec 2005 00:37:06 -0800 Subject: [PATCH] Fix bug in RCU torture test While doing some test of RCU torture module, I hit a OOPS in rcu_do_batch, which was trying to processes callback of a module that was just removed. This is because we weren't waiting long enough for all callbacks to fire. Signed-off-by: Srivatsa Vaddagiri Cc: Dipankar Sarma Acked-by: "Paul E. McKenney" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/rcutorture.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'kernel') diff --git a/kernel/rcutorture.c b/kernel/rcutorture.c index 88c28d476550..49fbbeff201c 100644 --- a/kernel/rcutorture.c +++ b/kernel/rcutorture.c @@ -409,9 +409,8 @@ rcu_torture_cleanup(void) stats_task = NULL; /* Wait for all RCU callbacks to fire. */ + rcu_barrier(); - for (i = 0; i < RCU_TORTURE_PIPE_LEN; i++) - synchronize_rcu(); rcu_torture_stats_print(); /* -After- the stats thread is stopped! */ printk(KERN_ALERT TORTURE_FLAG "--- End of test: %s\n", -- cgit v1.2.3-71-gd317 From c3f5902325d3053986e7359f706581d8f032e72f Mon Sep 17 00:00:00 2001 From: Srivatsa Vaddagiri Date: Mon, 12 Dec 2005 00:37:07 -0800 Subject: [PATCH] Fix RCU race in access of nohz_cpu_mask Accessing nohz_cpu_mask before incrementing rcp->cur is racy. It can cause tickless idle CPUs to be included in rsp->cpumask, which will extend graceperiods unnecessarily. Fix this race. It has been tested using extensions to RCU torture module that forces various CPUs to become idle. Signed-off-by: Srivatsa Vaddagiri Cc: Dipankar Sarma Cc: "Paul E. McKenney" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/rcupdate.c | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) (limited to 'kernel') diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c index f45b91723dc6..48d3bce465b8 100644 --- a/kernel/rcupdate.c +++ b/kernel/rcupdate.c @@ -257,15 +257,23 @@ static void rcu_start_batch(struct rcu_ctrlblk *rcp, struct rcu_state *rsp, if (rcp->next_pending && rcp->completed == rcp->cur) { - /* Can't change, since spin lock held. */ - cpus_andnot(rsp->cpumask, cpu_online_map, nohz_cpu_mask); - rcp->next_pending = 0; - /* next_pending == 0 must be visible in __rcu_process_callbacks() - * before it can see new value of cur. + /* + * next_pending == 0 must be visible in + * __rcu_process_callbacks() before it can see new value of cur. */ smp_wmb(); rcp->cur++; + + /* + * Accessing nohz_cpu_mask before incrementing rcp->cur needs a + * Barrier Otherwise it can cause tickless idle CPUs to be + * included in rsp->cpumask, which will extend graceperiods + * unnecessarily. + */ + smp_mb(); + cpus_andnot(rsp->cpumask, cpu_online_map, nohz_cpu_mask); + } } -- cgit v1.2.3-71-gd317 From 64123fd42c7a1e4ebf6acd2399c98caddc7e0c26 Mon Sep 17 00:00:00 2001 From: Matt Helsley Date: Mon, 12 Dec 2005 00:37:09 -0800 Subject: [PATCH] Add getnstimestamp function There are several functions that might seem appropriate for a timestamp: get_cycles() current_kernel_time() do_gettimeofday() Each has problems with combinations of SMP-safety, low resolution, and monotonicity. This patch adds a new function that returns a monotonic SMP-safe timestamp with nanosecond resolution where available. Changes: Split timestamp into separate patch Moved to kernel/time.c Renamed to getnstimestamp Fixed unintended-pointer-arithmetic bug Signed-off-by: Matt Helsley Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/time.h | 1 + kernel/time.c | 22 ++++++++++++++++++++++ 2 files changed, 23 insertions(+) (limited to 'kernel') diff --git a/include/linux/time.h b/include/linux/time.h index bfbe92d0767c..797ccd813bb0 100644 --- a/include/linux/time.h +++ b/include/linux/time.h @@ -95,6 +95,7 @@ struct itimerval; extern int do_setitimer(int which, struct itimerval *value, struct itimerval *ovalue); extern int do_getitimer(int which, struct itimerval *value); extern void getnstimeofday (struct timespec *tv); +extern void getnstimestamp(struct timespec *ts); extern struct timespec timespec_trunc(struct timespec t, unsigned gran); diff --git a/kernel/time.c b/kernel/time.c index 245d595a13cb..b94bfa8c03e0 100644 --- a/kernel/time.c +++ b/kernel/time.c @@ -561,6 +561,28 @@ void getnstimeofday(struct timespec *tv) EXPORT_SYMBOL_GPL(getnstimeofday); #endif +void getnstimestamp(struct timespec *ts) +{ + unsigned int seq; + struct timespec wall2mono; + + /* synchronize with settimeofday() changes */ + do { + seq = read_seqbegin(&xtime_lock); + getnstimeofday(ts); + wall2mono = wall_to_monotonic; + } while(unlikely(read_seqretry(&xtime_lock, seq))); + + /* adjust to monotonicaly-increasing values */ + ts->tv_sec += wall2mono.tv_sec; + ts->tv_nsec += wall2mono.tv_nsec; + while (unlikely(ts->tv_nsec >= NSEC_PER_SEC)) { + ts->tv_nsec -= NSEC_PER_SEC; + ts->tv_sec++; + } +} +EXPORT_SYMBOL_GPL(getnstimestamp); + #if (BITS_PER_LONG < 64) u64 get_jiffies_64(void) { -- cgit v1.2.3-71-gd317 From adad0f331f9c693129e81e233c5461e2e7c3e443 Mon Sep 17 00:00:00 2001 From: Keshavamurthy Anil S Date: Mon, 12 Dec 2005 00:37:12 -0800 Subject: [PATCH] kprobes: fix race in aggregate kprobe registration When registering multiple kprobes at the same address, we leave a small window where the kprobe hlist will not contain a reference to the registered kprobe, leading to potentially, a system crash if the breakpoint is hit on another processor. Patch below now automically relpace the old kprobe with the new kprobe from the hash list. Signed-off-by: Anil S Keshavamurthy Acked-by: Ananth N Mavinakayanahalli Cc: "Paul E. McKenney" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/kprobes.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'kernel') diff --git a/kernel/kprobes.c b/kernel/kprobes.c index fde5a16a2913..e4f0fc62bd3e 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c @@ -399,10 +399,7 @@ static inline void add_aggr_kprobe(struct kprobe *ap, struct kprobe *p) INIT_LIST_HEAD(&ap->list); list_add_rcu(&p->list, &ap->list); - INIT_HLIST_NODE(&ap->hlist); - hlist_del_rcu(&p->hlist); - hlist_add_head_rcu(&ap->hlist, - &kprobe_table[hash_ptr(ap->addr, KPROBE_HASH_BITS)]); + hlist_replace_rcu(&p->hlist, &ap->hlist); } /* -- cgit v1.2.3-71-gd317 From 7a4ae749a478f8bca73d4b5b8c1b8cbb178b2db5 Mon Sep 17 00:00:00 2001 From: Pierre Ossman Date: Mon, 12 Dec 2005 00:37:22 -0800 Subject: [PATCH] Add try_to_freeze to kauditd kauditd was causing suspends to fail because it refused to freeze. Adding a try_to_freeze() to its sleep loop solves the issue. Signed-off-by: Pierre Ossman Acked-by: Pavel Machek Cc: David Woodhouse Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/audit.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/audit.c b/kernel/audit.c index 0c56320d38dc..32fa03ad1984 100644 --- a/kernel/audit.c +++ b/kernel/audit.c @@ -291,8 +291,10 @@ int kauditd_thread(void *dummy) set_current_state(TASK_INTERRUPTIBLE); add_wait_queue(&kauditd_wait, &wait); - if (!skb_queue_len(&audit_skb_queue)) + if (!skb_queue_len(&audit_skb_queue)) { + try_to_freeze(); schedule(); + } __set_current_state(TASK_RUNNING); remove_wait_queue(&kauditd_wait, &wait); -- cgit v1.2.3-71-gd317 From 00d7c05ab168c10f9b520e07400923267bc04419 Mon Sep 17 00:00:00 2001 From: Keshavamurthy Anil S Date: Mon, 12 Dec 2005 00:37:33 -0800 Subject: [PATCH] kprobes: no probes on critical path For Kprobes critical path is the path from debug break exception handler till the control reaches kprobes exception code. No probes can be supported in this path as we will end up in recursion. This patch prevents this by moving the below function to safe __kprobes section onto which no probes can be inserted. Signed-off-by: Anil S Keshavamurthy Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kprobes.h | 7 ++++++- kernel/sys.c | 3 ++- 2 files changed, 8 insertions(+), 2 deletions(-) (limited to 'kernel') diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h index e373c4a9de53..434ecfd7cf48 100644 --- a/include/linux/kprobes.h +++ b/include/linux/kprobes.h @@ -37,6 +37,7 @@ #include #include +#ifdef CONFIG_KPROBES #include /* kprobe_status settings */ @@ -147,7 +148,6 @@ struct kretprobe_instance { struct task_struct *task; }; -#ifdef CONFIG_KPROBES extern spinlock_t kretprobe_lock; extern int arch_prepare_kprobe(struct kprobe *p); extern void arch_copy_kprobe(struct kprobe *p); @@ -195,6 +195,11 @@ void add_rp_inst(struct kretprobe_instance *ri); void kprobe_flush_task(struct task_struct *tk); void recycle_rp_inst(struct kretprobe_instance *ri); #else /* CONFIG_KPROBES */ + +#define __kprobes /**/ +struct jprobe; +struct kretprobe; + static inline struct kprobe *kprobe_running(void) { return NULL; diff --git a/kernel/sys.c b/kernel/sys.c index bce933ebb29f..eecf84526afe 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -32,6 +32,7 @@ #include #include +#include #include #include @@ -168,7 +169,7 @@ EXPORT_SYMBOL(notifier_chain_unregister); * of the last notifier function called. */ -int notifier_call_chain(struct notifier_block **n, unsigned long val, void *v) +int __kprobes notifier_call_chain(struct notifier_block **n, unsigned long val, void *v) { int ret=NOTIFY_DONE; struct notifier_block *nb = *n; -- cgit v1.2.3-71-gd317 From bf8d5c52c3b6b27061e3b7d779057fd9a6cac164 Mon Sep 17 00:00:00 2001 From: Keshavamurthy Anil S Date: Mon, 12 Dec 2005 00:37:34 -0800 Subject: [PATCH] kprobes: increment kprobe missed count for multiprobes When multiple probes are registered at the same address and if due to some recursion (probe getting triggered within a probe handler), we skip calling pre_handlers and just increment nmissed field. The below patch make sure it walks the list for multiple probes case. Without the below patch we get incorrect results of nmissed count for multiple probe case. Signed-off-by: Anil S Keshavamurthy Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/i386/kernel/kprobes.c | 2 +- arch/ia64/kernel/kprobes.c | 2 +- arch/powerpc/kernel/kprobes.c | 2 +- arch/sparc64/kernel/kprobes.c | 2 +- arch/x86_64/kernel/kprobes.c | 2 +- include/linux/kprobes.h | 1 + kernel/kprobes.c | 13 +++++++++++++ 7 files changed, 19 insertions(+), 5 deletions(-) (limited to 'kernel') diff --git a/arch/i386/kernel/kprobes.c b/arch/i386/kernel/kprobes.c index 32b0c24ab9a6..19edcd526ba4 100644 --- a/arch/i386/kernel/kprobes.c +++ b/arch/i386/kernel/kprobes.c @@ -191,7 +191,7 @@ static int __kprobes kprobe_handler(struct pt_regs *regs) */ save_previous_kprobe(kcb); set_current_kprobe(p, regs, kcb); - p->nmissed++; + kprobes_inc_nmissed_count(p); prepare_singlestep(p, regs); kcb->kprobe_status = KPROBE_REENTER; return 1; diff --git a/arch/ia64/kernel/kprobes.c b/arch/ia64/kernel/kprobes.c index 2895d6e6062f..89a70400c4f6 100644 --- a/arch/ia64/kernel/kprobes.c +++ b/arch/ia64/kernel/kprobes.c @@ -630,7 +630,7 @@ static int __kprobes pre_kprobes_handler(struct die_args *args) */ save_previous_kprobe(kcb); set_current_kprobe(p, kcb); - p->nmissed++; + kprobes_inc_nmissed_count(p); prepare_ss(p, regs); kcb->kprobe_status = KPROBE_REENTER; return 1; diff --git a/arch/powerpc/kernel/kprobes.c b/arch/powerpc/kernel/kprobes.c index 511af54e6230..5368f9c2e6bf 100644 --- a/arch/powerpc/kernel/kprobes.c +++ b/arch/powerpc/kernel/kprobes.c @@ -177,7 +177,7 @@ static inline int kprobe_handler(struct pt_regs *regs) save_previous_kprobe(kcb); set_current_kprobe(p, regs, kcb); kcb->kprobe_saved_msr = regs->msr; - p->nmissed++; + kprobes_inc_nmissed_count(p); prepare_singlestep(p, regs); kcb->kprobe_status = KPROBE_REENTER; return 1; diff --git a/arch/sparc64/kernel/kprobes.c b/arch/sparc64/kernel/kprobes.c index 96bd09b098f4..a97b0f0727ab 100644 --- a/arch/sparc64/kernel/kprobes.c +++ b/arch/sparc64/kernel/kprobes.c @@ -138,7 +138,7 @@ static int __kprobes kprobe_handler(struct pt_regs *regs) */ save_previous_kprobe(kcb); set_current_kprobe(p, regs, kcb); - p->nmissed++; + kprobes_inc_nmissed_count(p); kcb->kprobe_status = KPROBE_REENTER; prepare_singlestep(p, regs, kcb); return 1; diff --git a/arch/x86_64/kernel/kprobes.c b/arch/x86_64/kernel/kprobes.c index dddeb678b440..afe11f4fbd1d 100644 --- a/arch/x86_64/kernel/kprobes.c +++ b/arch/x86_64/kernel/kprobes.c @@ -329,7 +329,7 @@ int __kprobes kprobe_handler(struct pt_regs *regs) */ save_previous_kprobe(kcb); set_current_kprobe(p, regs, kcb); - p->nmissed++; + kprobes_inc_nmissed_count(p); prepare_singlestep(p, regs); kcb->kprobe_status = KPROBE_REENTER; return 1; diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h index 434ecfd7cf48..c03f2dc933de 100644 --- a/include/linux/kprobes.h +++ b/include/linux/kprobes.h @@ -158,6 +158,7 @@ extern int arch_init_kprobes(void); extern void show_registers(struct pt_regs *regs); extern kprobe_opcode_t *get_insn_slot(void); extern void free_insn_slot(kprobe_opcode_t *slot); +extern void kprobes_inc_nmissed_count(struct kprobe *p); /* Get the kprobe at this addr (if any) - called with preemption disabled */ struct kprobe *get_kprobe(void *addr); diff --git a/kernel/kprobes.c b/kernel/kprobes.c index e4f0fc62bd3e..3bb71e63a37e 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c @@ -246,6 +246,19 @@ static int __kprobes aggr_break_handler(struct kprobe *p, struct pt_regs *regs) return ret; } +/* Walks the list and increments nmissed count for multiprobe case */ +void __kprobes kprobes_inc_nmissed_count(struct kprobe *p) +{ + struct kprobe *kp; + if (p->pre_handler != aggr_pre_handler) { + p->nmissed++; + } else { + list_for_each_entry_rcu(kp, &p->list, list) + kp->nmissed++; + } + return; +} + /* Called with kretprobe_lock held */ struct kretprobe_instance __kprobes *get_free_rp_inst(struct kretprobe *rp) { -- cgit v1.2.3-71-gd317