From cee4393989333795ae04dc9f3b83a578afe3fca6 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Fri, 2 Mar 2018 16:35:27 -0800 Subject: rcu: Rename cond_resched_rcu_qs() to cond_resched_tasks_rcu_qs() Commit e31d28b6ab8f ("trace: Eliminate cond_resched_rcu_qs() in favor of cond_resched()") substituted cond_resched() for the earlier call to cond_resched_rcu_qs(). However, the new-age cond_resched() does not do anything to help RCU-tasks grace periods because (1) RCU-tasks is only enabled when CONFIG_PREEMPT=y and (2) cond_resched() is a complete no-op when preemption is enabled. This situation results in hangs when running the trace benchmarks. A number of potential fixes were discussed on LKML (https://lkml.kernel.org/r/20180224151240.0d63a059@vmware.local.home), including making cond_resched() not be a no-op; making cond_resched() not be a no-op, but only when running tracing benchmarks; reverting the aforementioned commit (which works because cond_resched_rcu_qs() does provide an RCU-tasks quiescent state; and adding a call to the scheduler/RCU rcu_note_voluntary_context_switch() function. All were deemed unsatisfactory, either due to added cond_resched() overhead or due to magic functions inviting cargo culting. This commit renames cond_resched_rcu_qs() to cond_resched_tasks_rcu_qs(), which provides a clear hint as to what this function is doing and why and where it should be used, and then replaces the call to cond_resched() with cond_resched_tasks_rcu_qs() in the trace benchmark's benchmark_event_kthread() function. Reported-by: Steven Rostedt Signed-off-by: Paul E. McKenney Tested-by: Nicholas Piggin --- include/linux/rcupdate.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 36360d07f25b..19d235fefdb9 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -188,13 +188,13 @@ static inline void exit_tasks_rcu_finish(void) { } #endif /* #else #ifdef CONFIG_TASKS_RCU */ /** - * cond_resched_rcu_qs - Report potential quiescent states to RCU + * cond_resched_tasks_rcu_qs - Report potential quiescent states to RCU * * This macro resembles cond_resched(), except that it is defined to * report potential quiescent states to RCU-tasks even if the cond_resched() * machinery were to be shut off, as some advocate for PREEMPT kernels. */ -#define cond_resched_rcu_qs() \ +#define cond_resched_tasks_rcu_qs() \ do { \ if (!cond_resched()) \ rcu_note_voluntary_context_switch_lite(current); \ -- cgit v1.2.3-71-gd317 From c3442697c2d73d3cdb9d4135cf630ad36ba8552f Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 5 Mar 2018 11:29:40 -0800 Subject: softirq: Eliminate unused cond_resched_softirq() macro The cond_resched_softirq() macro is not used anywhere in mainline, so this commit simplifies the kernel by eliminating it. Suggested-by: Eric Dumazet Signed-off-by: Paul E. McKenney Cc: Ingo Molnar Acked-by: Peter Zijlstra (Intel) Reviewed-by: Eric Dumazet Tested-by: Nicholas Piggin --- include/linux/sched.h | 8 -------- kernel/sched/core.c | 14 -------------- kernel/softirq.c | 3 +-- 3 files changed, 1 insertion(+), 24 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index b3d697f3b573..6fc99045658a 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1613,7 +1613,6 @@ static inline int test_tsk_need_resched(struct task_struct *tsk) * explicit rescheduling in places that are safe. The return * value indicates whether a reschedule was done in fact. * cond_resched_lock() will drop the spinlock before scheduling, - * cond_resched_softirq() will enable bhs before scheduling. */ #ifndef CONFIG_PREEMPT extern int _cond_resched(void); @@ -1633,13 +1632,6 @@ extern int __cond_resched_lock(spinlock_t *lock); __cond_resched_lock(lock); \ }) -extern int __cond_resched_softirq(void); - -#define cond_resched_softirq() ({ \ - ___might_sleep(__FILE__, __LINE__, SOFTIRQ_DISABLE_OFFSET); \ - __cond_resched_softirq(); \ -}) - static inline void cond_resched_rcu(void) { #if defined(CONFIG_DEBUG_ATOMIC_SLEEP) || !defined(CONFIG_PREEMPT_RCU) diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 5e10aaeebfcc..6a09e6af64b9 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -5012,20 +5012,6 @@ int __cond_resched_lock(spinlock_t *lock) } EXPORT_SYMBOL(__cond_resched_lock); -int __sched __cond_resched_softirq(void) -{ - BUG_ON(!in_softirq()); - - if (should_resched(SOFTIRQ_DISABLE_OFFSET)) { - local_bh_enable(); - preempt_schedule_common(); - local_bh_disable(); - return 1; - } - return 0; -} -EXPORT_SYMBOL(__cond_resched_softirq); - /** * yield - yield the current processor to other threads. * diff --git a/kernel/softirq.c b/kernel/softirq.c index 177de3640c78..03981f1c39ea 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c @@ -145,8 +145,7 @@ static void __local_bh_enable(unsigned int cnt) } /* - * Special-case - softirqs can safely be enabled in - * cond_resched_softirq(), or by __do_softirq(), + * Special-case - softirqs can safely be enabled by __do_softirq(), * without processing still-pending softirqs: */ void _local_bh_enable(void) -- cgit v1.2.3-71-gd317 From 17672480fb1e953f999623b598a98130f8aacfbc Mon Sep 17 00:00:00 2001 From: Yury Norov Date: Sun, 25 Mar 2018 20:50:03 +0300 Subject: rcu: Declare rcu_eqs_special_set() in public header Because rcu_eqs_special_set() is declared only in internal header kernel/rcu/tree.h and stubbed in include/linux/rcutiny.h, it is inaccessible outside of the RCU implementation. This patch therefore moves the rcu_eqs_special_set() declaration to include/linux/rcutree.h, which allows it to be used in non-rcu kernel code. Signed-off-by: Yury Norov Signed-off-by: Paul E. McKenney Tested-by: Nicholas Piggin --- include/linux/rcutree.h | 1 + kernel/rcu/tree.h | 1 - 2 files changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h index fd996cdf1833..448f20f27396 100644 --- a/include/linux/rcutree.h +++ b/include/linux/rcutree.h @@ -74,6 +74,7 @@ static inline void synchronize_rcu_bh_expedited(void) void rcu_barrier(void); void rcu_barrier_bh(void); void rcu_barrier_sched(void); +bool rcu_eqs_special_set(int cpu); unsigned long get_state_synchronize_rcu(void); void cond_synchronize_rcu(unsigned long oldstate); unsigned long get_state_synchronize_sched(void); diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h index 5fd374c71404..0b3a90ebe225 100644 --- a/kernel/rcu/tree.h +++ b/kernel/rcu/tree.h @@ -404,7 +404,6 @@ extern struct rcu_state rcu_preempt_state; #endif /* #ifdef CONFIG_PREEMPT_RCU */ int rcu_dynticks_snap(struct rcu_dynticks *rdtp); -bool rcu_eqs_special_set(int cpu); #ifdef CONFIG_RCU_BOOST DECLARE_PER_CPU(unsigned int, rcu_cpu_kthread_status); -- cgit v1.2.3-71-gd317 From f7194ac32ca241d28765a98e42a7fe13debc85a7 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Thu, 5 Apr 2018 17:19:17 -0700 Subject: srcu: Add cleanup_srcu_struct_quiesced() The current cleanup_srcu_struct() flushes work, which prevents it from being invoked from some workqueue contexts, as well as from atomic (non-blocking) contexts. This patch therefore introduced a cleanup_srcu_struct_quiesced(), which can be invoked only after all activity on the specified srcu_struct has completed. This restriction allows cleanup_srcu_struct_quiesced() to be invoked from workqueue contexts as well as from atomic contexts. Suggested-by: Christoph Hellwig Signed-off-by: Paul E. McKenney Tested-by: Nitzan Carmi Tested-by: Nicholas Piggin --- include/linux/srcu.h | 36 +++++++++++++++++++++++++++++++++++- kernel/rcu/rcutorture.c | 7 ++++++- kernel/rcu/srcutiny.c | 9 ++++++--- kernel/rcu/srcutree.c | 30 +++++++++++++++++------------- 4 files changed, 64 insertions(+), 18 deletions(-) (limited to 'include/linux') diff --git a/include/linux/srcu.h b/include/linux/srcu.h index 33c1c698df09..91494d7e8e41 100644 --- a/include/linux/srcu.h +++ b/include/linux/srcu.h @@ -69,11 +69,45 @@ struct srcu_struct { }; void call_srcu(struct srcu_struct *sp, struct rcu_head *head, void (*func)(struct rcu_head *head)); -void cleanup_srcu_struct(struct srcu_struct *sp); +void _cleanup_srcu_struct(struct srcu_struct *sp, bool quiesced); int __srcu_read_lock(struct srcu_struct *sp) __acquires(sp); void __srcu_read_unlock(struct srcu_struct *sp, int idx) __releases(sp); void synchronize_srcu(struct srcu_struct *sp); +/** + * cleanup_srcu_struct - deconstruct a sleep-RCU structure + * @sp: structure to clean up. + * + * Must invoke this after you are finished using a given srcu_struct that + * was initialized via init_srcu_struct(), else you leak memory. + */ +static inline void cleanup_srcu_struct(struct srcu_struct *sp) +{ + _cleanup_srcu_struct(sp, false); +} + +/** + * cleanup_srcu_struct_quiesced - deconstruct a quiesced sleep-RCU structure + * @sp: structure to clean up. + * + * Must invoke this after you are finished using a given srcu_struct that + * was initialized via init_srcu_struct(), else you leak memory. Also, + * all grace-period processing must have completed. + * + * "Completed" means that the last synchronize_srcu() and + * synchronize_srcu_expedited() calls must have returned before the call + * to cleanup_srcu_struct_quiesced(). It also means that the callback + * from the last call_srcu() must have been invoked before the call to + * cleanup_srcu_struct_quiesced(), but you can use srcu_barrier() to help + * with this last. Violating these rules will get you a WARN_ON() splat + * (with high probability, anyway), and will also cause the srcu_struct + * to be leaked. + */ +static inline void cleanup_srcu_struct_quiesced(struct srcu_struct *sp) +{ + _cleanup_srcu_struct(sp, true); +} + #ifdef CONFIG_DEBUG_LOCK_ALLOC /** diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c index 680c96d8c00f..f0e1d44459f8 100644 --- a/kernel/rcu/rcutorture.c +++ b/kernel/rcu/rcutorture.c @@ -593,7 +593,12 @@ static void srcu_torture_init(void) static void srcu_torture_cleanup(void) { - cleanup_srcu_struct(&srcu_ctld); + static DEFINE_TORTURE_RANDOM(rand); + + if (torture_random(&rand) & 0x800) + cleanup_srcu_struct(&srcu_ctld); + else + cleanup_srcu_struct_quiesced(&srcu_ctld); srcu_ctlp = &srcu_ctl; /* In case of a later rcutorture run. */ } diff --git a/kernel/rcu/srcutiny.c b/kernel/rcu/srcutiny.c index 76ac5f50b2c7..622792abe41a 100644 --- a/kernel/rcu/srcutiny.c +++ b/kernel/rcu/srcutiny.c @@ -86,16 +86,19 @@ EXPORT_SYMBOL_GPL(init_srcu_struct); * Must invoke this after you are finished using a given srcu_struct that * was initialized via init_srcu_struct(), else you leak memory. */ -void cleanup_srcu_struct(struct srcu_struct *sp) +void _cleanup_srcu_struct(struct srcu_struct *sp, bool quiesced) { WARN_ON(sp->srcu_lock_nesting[0] || sp->srcu_lock_nesting[1]); - flush_work(&sp->srcu_work); + if (quiesced) + WARN_ON(work_pending(&sp->srcu_work)); + else + flush_work(&sp->srcu_work); WARN_ON(sp->srcu_gp_running); WARN_ON(sp->srcu_gp_waiting); WARN_ON(sp->srcu_cb_head); WARN_ON(&sp->srcu_cb_head != sp->srcu_cb_tail); } -EXPORT_SYMBOL_GPL(cleanup_srcu_struct); +EXPORT_SYMBOL_GPL(_cleanup_srcu_struct); /* * Removes the count for the old reader from the appropriate element of diff --git a/kernel/rcu/srcutree.c b/kernel/rcu/srcutree.c index fb560fca9ef4..b4123d7a2cec 100644 --- a/kernel/rcu/srcutree.c +++ b/kernel/rcu/srcutree.c @@ -366,24 +366,28 @@ static unsigned long srcu_get_delay(struct srcu_struct *sp) return SRCU_INTERVAL; } -/** - * cleanup_srcu_struct - deconstruct a sleep-RCU structure - * @sp: structure to clean up. - * - * Must invoke this after you are finished using a given srcu_struct that - * was initialized via init_srcu_struct(), else you leak memory. - */ -void cleanup_srcu_struct(struct srcu_struct *sp) +/* Helper for cleanup_srcu_struct() and cleanup_srcu_struct_quiesced(). */ +void _cleanup_srcu_struct(struct srcu_struct *sp, bool quiesced) { int cpu; if (WARN_ON(!srcu_get_delay(sp))) - return; /* Leakage unless caller handles error. */ + return; /* Just leak it! */ if (WARN_ON(srcu_readers_active(sp))) - return; /* Leakage unless caller handles error. */ - flush_delayed_work(&sp->work); + return; /* Just leak it! */ + if (quiesced) { + if (WARN_ON(delayed_work_pending(&sp->work))) + return; /* Just leak it! */ + } else { + flush_delayed_work(&sp->work); + } for_each_possible_cpu(cpu) - flush_delayed_work(&per_cpu_ptr(sp->sda, cpu)->work); + if (quiesced) { + if (WARN_ON(delayed_work_pending(&per_cpu_ptr(sp->sda, cpu)->work))) + return; /* Just leak it! */ + } else { + flush_delayed_work(&per_cpu_ptr(sp->sda, cpu)->work); + } if (WARN_ON(rcu_seq_state(READ_ONCE(sp->srcu_gp_seq)) != SRCU_STATE_IDLE) || WARN_ON(srcu_readers_active(sp))) { pr_info("%s: Active srcu_struct %p state: %d\n", __func__, sp, rcu_seq_state(READ_ONCE(sp->srcu_gp_seq))); @@ -392,7 +396,7 @@ void cleanup_srcu_struct(struct srcu_struct *sp) free_percpu(sp->sda); sp->sda = NULL; } -EXPORT_SYMBOL_GPL(cleanup_srcu_struct); +EXPORT_SYMBOL_GPL(_cleanup_srcu_struct); /* * Counts the new reader in the appropriate per-CPU element of the -- cgit v1.2.3-71-gd317