From adf5091e6ccaa02905e7a28f9ff44f46c7f4c230 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Thu, 28 Jun 2012 11:20:21 -0700 Subject: rcu: New rcu_user_enter() and rcu_user_exit() APIs RCU currently insists that only idle tasks can enter RCU idle mode, which prohibits an adaptive tickless kernel (AKA nohz cpusets), which in turn would mean that usermode execution would always take scheduling-clock interrupts, even when there is only one task runnable on the CPU in question. This commit therefore adds rcu_user_enter() and rcu_user_exit(), which allow non-idle tasks to enter RCU idle mode. These are quite similar to rcu_idle_enter() and rcu_idle_exit(), respectively, except that they omit the idle-task checks. [ Updated to use "user" flag rather than separate check functions. ] [ paulmck: Updated to drop exports of new functions based on Josh's patch getting rid of the need for them. ] Signed-off-by: Frederic Weisbecker Signed-off-by: Paul E. McKenney Cc: Alessio Igor Bogani Cc: Andrew Morton Cc: Avi Kivity Cc: Chris Metcalf Cc: Christoph Lameter Cc: Daniel Lezcano Cc: Geoff Levand Cc: Gilad Ben Yossef Cc: Hakan Akkan Cc: Ingo Molnar Cc: Kevin Hilman Cc: Max Krasnyansky Cc: Peter Zijlstra Cc: Stephen Hemminger Cc: Steven Rostedt Cc: Sven-Thorsten Dietrich Cc: Thomas Gleixner Reviewed-by: Josh Triplett --- include/linux/rcupdate.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 0fbbd52e01f9..d8b20bfd4795 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -191,6 +191,8 @@ extern void rcu_idle_enter(void); extern void rcu_idle_exit(void); extern void rcu_irq_enter(void); extern void rcu_irq_exit(void); +extern void rcu_user_enter(void); +extern void rcu_user_exit(void); extern void exit_rcu(void); /** -- cgit v1.2.3-71-gd317 From 19dd1591fc379f1d89f39cd99cbbe97433baa3c3 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Mon, 4 Jun 2012 16:42:35 -0700 Subject: rcu: New rcu_user_enter_after_irq() and rcu_user_exit_after_irq() APIs In some cases, it is necessary to enter or exit userspace-RCU-idle mode from an interrupt handler, for example, if some other CPU sends this CPU a resched IPI. In this case, the current CPU would enter the IPI handler in userspace-RCU-idle mode, but would need to exit the IPI handler after having exited that mode. To allow this to work, this commit adds two new APIs to TREE_RCU: - rcu_user_enter_after_irq(). This must be called from an interrupt between rcu_irq_enter() and rcu_irq_exit(). After the irq calls rcu_irq_exit(), the irq handler will return into an RCU extended quiescent state. In theory, this interrupt is never a nested interrupt, but in practice it might interrupt softirq, which looks to RCU like a nested interrupt. - rcu_user_exit_after_irq(). This must be called from a non-nesting interrupt, interrupting an RCU extended quiescent state, also between rcu_irq_enter() and rcu_irq_exit(). After the irq calls rcu_irq_exit(), the irq handler will return in an RCU non-quiescent state. [ Combined with "Allow calls to rcu_exit_user_irq from nesting irqs." ] Signed-off-by: Frederic Weisbecker Signed-off-by: Paul E. McKenney Reviewed-by: Josh Triplett --- include/linux/rcupdate.h | 2 ++ kernel/rcutree.c | 43 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 45 insertions(+) (limited to 'include/linux') diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index d8b20bfd4795..f818dd165b44 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -193,6 +193,8 @@ extern void rcu_irq_enter(void); extern void rcu_irq_exit(void); extern void rcu_user_enter(void); extern void rcu_user_exit(void); +extern void rcu_user_enter_after_irq(void); +extern void rcu_user_exit_after_irq(void); extern void exit_rcu(void); /** diff --git a/kernel/rcutree.c b/kernel/rcutree.c index af0dc3472a4b..4138f59fa2f4 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c @@ -425,6 +425,27 @@ void rcu_user_enter(void) } +/** + * rcu_user_enter_after_irq - inform RCU that we are going to resume userspace + * after the current irq returns. + * + * This is similar to rcu_user_enter() but in the context of a non-nesting + * irq. After this call, RCU enters into idle mode when the interrupt + * returns. + */ +void rcu_user_enter_after_irq(void) +{ + unsigned long flags; + struct rcu_dynticks *rdtp; + + local_irq_save(flags); + rdtp = &__get_cpu_var(rcu_dynticks); + /* Ensure this irq is interrupting a non-idle RCU state. */ + WARN_ON_ONCE(!(rdtp->dynticks_nesting & DYNTICK_TASK_MASK)); + rdtp->dynticks_nesting = 1; + local_irq_restore(flags); +} + /** * rcu_irq_exit - inform RCU that current CPU is exiting irq towards idle * @@ -549,6 +570,28 @@ void rcu_user_exit(void) rcu_eqs_exit(1); } +/** + * rcu_user_exit_after_irq - inform RCU that we won't resume to userspace + * idle mode after the current non-nesting irq returns. + * + * This is similar to rcu_user_exit() but in the context of an irq. + * This is called when the irq has interrupted a userspace RCU idle mode + * context. When the current non-nesting interrupt returns after this call, + * the CPU won't restore the RCU idle mode. + */ +void rcu_user_exit_after_irq(void) +{ + unsigned long flags; + struct rcu_dynticks *rdtp; + + local_irq_save(flags); + rdtp = &__get_cpu_var(rcu_dynticks); + /* Ensure we are interrupting an RCU idle mode. */ + WARN_ON_ONCE(rdtp->dynticks_nesting & DYNTICK_TASK_NEST_MASK); + rdtp->dynticks_nesting += DYNTICK_TASK_EXIT_IDLE; + local_irq_restore(flags); +} + /** * rcu_irq_enter - inform RCU that current CPU is entering irq away from idle * -- cgit v1.2.3-71-gd317 From 2b1d5024e17be459aa6385763ca3faa8f01c52d9 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Wed, 11 Jul 2012 20:26:30 +0200 Subject: rcu: Settle config for userspace extended quiescent state Create a new config option under the RCU menu that put CPUs under RCU extended quiescent state (as in dynticks idle mode) when they run in userspace. This require some contribution from architectures to hook into kernel and userspace boundaries. Signed-off-by: Frederic Weisbecker Cc: Alessio Igor Bogani Cc: Andrew Morton Cc: Avi Kivity Cc: Chris Metcalf Cc: Christoph Lameter Cc: Geoff Levand Cc: Gilad Ben Yossef Cc: Hakan Akkan Cc: H. Peter Anvin Cc: Ingo Molnar Cc: Josh Triplett Cc: Kevin Hilman Cc: Max Krasnyansky Cc: Peter Zijlstra Cc: Stephen Hemminger Cc: Steven Rostedt Cc: Sven-Thorsten Dietrich Cc: Thomas Gleixner Signed-off-by: Paul E. McKenney Reviewed-by: Josh Triplett --- arch/Kconfig | 10 ++++++++++ include/linux/rcupdate.h | 9 +++++++++ init/Kconfig | 10 ++++++++++ kernel/rcutree.c | 5 ++++- 4 files changed, 33 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/arch/Kconfig b/arch/Kconfig index 72f2fa189cc5..1401a7587973 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -281,4 +281,14 @@ config SECCOMP_FILTER See Documentation/prctl/seccomp_filter.txt for details. +config HAVE_RCU_USER_QS + bool + help + Provide kernel entry/exit hooks necessary for userspace + RCU extended quiescent state. Syscalls need to be wrapped inside + rcu_user_exit()-rcu_user_enter() through the slow path using + TIF_NOHZ flag. Exceptions handlers must be wrapped as well. Irqs + are already protected inside rcu_irq_enter/rcu_irq_exit() but + preemption or signal handling on irq exit still need to be protected. + source "kernel/gcov/Kconfig" diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index f818dd165b44..f5034f22e94b 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -191,10 +191,19 @@ extern void rcu_idle_enter(void); extern void rcu_idle_exit(void); extern void rcu_irq_enter(void); extern void rcu_irq_exit(void); + +#ifdef CONFIG_RCU_USER_QS extern void rcu_user_enter(void); extern void rcu_user_exit(void); extern void rcu_user_enter_after_irq(void); extern void rcu_user_exit_after_irq(void); +#else +static inline void rcu_user_enter(void) { } +static inline void rcu_user_exit(void) { } +static inline void rcu_user_enter_after_irq(void) { } +static inline void rcu_user_exit_after_irq(void) { } +#endif /* CONFIG_RCU_USER_QS */ + extern void exit_rcu(void); /** diff --git a/init/Kconfig b/init/Kconfig index af6c7f8ba019..f6a1830165ce 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -441,6 +441,16 @@ config PREEMPT_RCU This option enables preemptible-RCU code that is common between the TREE_PREEMPT_RCU and TINY_PREEMPT_RCU implementations. +config RCU_USER_QS + bool "Consider userspace as in RCU extended quiescent state" + depends on HAVE_RCU_USER_QS && SMP + help + This option sets hooks on kernel / userspace boundaries and + puts RCU in extended quiescent state when the CPU runs in + userspace. It means that when a CPU runs in userspace, it is + excluded from the global RCU state machine and thus doesn't + to keep the timer tick on for RCU. + config RCU_FANOUT int "Tree-based hierarchical RCU fanout value" range 2 64 if 64BIT diff --git a/kernel/rcutree.c b/kernel/rcutree.c index 4138f59fa2f4..79fa2db1595b 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c @@ -400,6 +400,7 @@ void rcu_idle_enter(void) } EXPORT_SYMBOL_GPL(rcu_idle_enter); +#ifdef CONFIG_RCU_USER_QS /** * rcu_user_enter - inform RCU that we are resuming userspace. * @@ -424,7 +425,6 @@ void rcu_user_enter(void) rcu_eqs_enter(1); } - /** * rcu_user_enter_after_irq - inform RCU that we are going to resume userspace * after the current irq returns. @@ -445,6 +445,7 @@ void rcu_user_enter_after_irq(void) rdtp->dynticks_nesting = 1; local_irq_restore(flags); } +#endif /* CONFIG_RCU_USER_QS */ /** * rcu_irq_exit - inform RCU that current CPU is exiting irq towards idle @@ -548,6 +549,7 @@ void rcu_idle_exit(void) } EXPORT_SYMBOL_GPL(rcu_idle_exit); +#ifdef CONFIG_RCU_USER_QS /** * rcu_user_exit - inform RCU that we are exiting userspace. * @@ -591,6 +593,7 @@ void rcu_user_exit_after_irq(void) rdtp->dynticks_nesting += DYNTICK_TASK_EXIT_IDLE; local_irq_restore(flags); } +#endif /* CONFIG_RCU_USER_QS */ /** * rcu_irq_enter - inform RCU that current CPU is entering irq away from idle -- cgit v1.2.3-71-gd317 From 04e7e951532b390b16feb070be9972b8fad2fc57 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Mon, 16 Jul 2012 15:06:40 -0700 Subject: rcu: Switch task's syscall hooks on context switch Clear the syscalls hook of a task when it's scheduled out so that if the task migrates, it doesn't run the syscall slow path on a CPU that might not need it. Also set the syscalls hook on the next task if needed. Signed-off-by: Frederic Weisbecker Cc: Alessio Igor Bogani Cc: Andrew Morton Cc: Avi Kivity Cc: Chris Metcalf Cc: Christoph Lameter Cc: Geoff Levand Cc: Gilad Ben Yossef Cc: Hakan Akkan Cc: H. Peter Anvin Cc: Ingo Molnar Cc: Josh Triplett Cc: Kevin Hilman Cc: Max Krasnyansky Cc: Peter Zijlstra Cc: Stephen Hemminger Cc: Steven Rostedt Cc: Sven-Thorsten Dietrich Cc: Thomas Gleixner Signed-off-by: Paul E. McKenney Reviewed-by: Josh Triplett --- arch/um/drivers/mconsole_kern.c | 1 + include/linux/rcupdate.h | 2 ++ include/linux/sched.h | 8 ++++++++ kernel/rcutree.c | 15 +++++++++++++++ kernel/sched/core.c | 1 + 5 files changed, 27 insertions(+) (limited to 'include/linux') diff --git a/arch/um/drivers/mconsole_kern.c b/arch/um/drivers/mconsole_kern.c index 664a60e8dfb4..c17de0db6736 100644 --- a/arch/um/drivers/mconsole_kern.c +++ b/arch/um/drivers/mconsole_kern.c @@ -705,6 +705,7 @@ static void stack_proc(void *arg) struct task_struct *from = current, *to = arg; to->thread.saved_task = from; + rcu_switch(from, to); switch_to(from, to, from); } diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index f5034f22e94b..7c968e4f929e 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -197,6 +197,8 @@ extern void rcu_user_enter(void); extern void rcu_user_exit(void); extern void rcu_user_enter_after_irq(void); extern void rcu_user_exit_after_irq(void); +extern void rcu_user_hooks_switch(struct task_struct *prev, + struct task_struct *next); #else static inline void rcu_user_enter(void) { } static inline void rcu_user_exit(void) { } diff --git a/include/linux/sched.h b/include/linux/sched.h index 23bddac4bad8..335720a1fc33 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1885,6 +1885,14 @@ static inline void rcu_copy_process(struct task_struct *p) #endif +static inline void rcu_switch(struct task_struct *prev, + struct task_struct *next) +{ +#ifdef CONFIG_RCU_USER_QS + rcu_user_hooks_switch(prev, next); +#endif +} + static inline void tsk_restore_flags(struct task_struct *task, unsigned long orig_flags, unsigned long flags) { diff --git a/kernel/rcutree.c b/kernel/rcutree.c index 6b82a9565149..d2e74c8d4b0e 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c @@ -717,6 +717,21 @@ int rcu_is_cpu_idle(void) } EXPORT_SYMBOL(rcu_is_cpu_idle); +#ifdef CONFIG_RCU_USER_QS +void rcu_user_hooks_switch(struct task_struct *prev, + struct task_struct *next) +{ + struct rcu_dynticks *rdtp; + + /* Interrupts are disabled in context switch */ + rdtp = &__get_cpu_var(rcu_dynticks); + if (!rdtp->ignore_user_qs) { + clear_tsk_thread_flag(prev, TIF_NOHZ); + set_tsk_thread_flag(next, TIF_NOHZ); + } +} +#endif /* #ifdef CONFIG_RCU_USER_QS */ + #if defined(CONFIG_PROVE_RCU) && defined(CONFIG_HOTPLUG_CPU) /* diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 1a48cdbc8631..ea2213b07d9d 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -2081,6 +2081,7 @@ context_switch(struct rq *rq, struct task_struct *prev, #endif /* Here we just switch the register state and the stack. */ + rcu_switch(prev, next); switch_to(prev, next, prev); barrier(); -- cgit v1.2.3-71-gd317