From a831881be220358a1d28c5d95d69449fb6d623ca Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Tue, 18 Dec 2012 17:32:19 +0100 Subject: nohz: Basic full dynticks interface For extreme usecases such as Real Time or HPC, having the ability to shutdown the tick when a single task runs on a CPU is a desired feature: * Reducing the amount of interrupts improves throughput for CPU-bound tasks. The CPU is less distracted from its real job, from an execution time and from the cache point of views. * This also improve latency response as we have less critical sections. Start with introducing a very simple interface to define full dynticks CPU: use a boot time option defined cpumask through the "nohz_extended=" kernel parameter. CPUs that are part of this range will have their tick shutdown whenever possible: provided they run a single task and they don't do kernel activity that require the periodic tick. These details will be later documented in Documentation/* An online CPU must be kept outside this range to handle the timekeeping. Suggested-by: Paul E. McKenney Signed-off-by: Frederic Weisbecker Cc: Andrew Morton Cc: Chris Metcalf Cc: Christoph Lameter Cc: Geoff Levand Cc: Gilad Ben Yossef Cc: Hakan Akkan Cc: Ingo Molnar Cc: Kevin Hilman Cc: Li Zhong Cc: Namhyung Kim Cc: Paul E. McKenney Cc: Paul Gortmaker Cc: Peter Zijlstra Cc: Steven Rostedt Cc: Thomas Gleixner --- include/linux/tick.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/tick.h b/include/linux/tick.h index 553272e6af55..44bfa8aa439f 100644 --- a/include/linux/tick.h +++ b/include/linux/tick.h @@ -157,6 +157,13 @@ static inline u64 get_cpu_idle_time_us(int cpu, u64 *unused) { return -1; } static inline u64 get_cpu_iowait_time_us(int cpu, u64 *unused) { return -1; } # endif /* !NO_HZ */ +#ifdef CONFIG_NO_HZ_EXTENDED +extern int tick_nohz_extended_cpu(int cpu); +#else +static inline int tick_nohz_extended_cpu(int cpu) { return 0; } +#endif + + # ifdef CONFIG_CPU_IDLE_GOV_MENU extern void menu_hrtimer_cancel(void); # else -- cgit v1.2.3-71-gd317 From 1c20091e77fc5a9b7d7d905176443b4822a23cdb Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Wed, 10 Aug 2011 23:21:01 +0200 Subject: nohz: Wake up full dynticks CPUs when a timer gets enqueued Wake up a CPU when a timer list timer is enqueued there and the target is part of the full dynticks range. Sending an IPI to it makes it reconsidering the next timer to program on top of recent updates. This may later be improved by checking if the tick is really stopped on the target. This would need some careful synchronization though. So deal with such optimization later and start simple. Signed-off-by: Frederic Weisbecker Cc: Andrew Morton Cc: Chris Metcalf Cc: Christoph Lameter Cc: Geoff Levand Cc: Gilad Ben Yossef Cc: Hakan Akkan Cc: Ingo Molnar Cc: Kevin Hilman Cc: Li Zhong Cc: Namhyung Kim Cc: Paul E. McKenney Cc: Paul Gortmaker Cc: Peter Zijlstra Cc: Steven Rostedt Cc: Thomas Gleixner --- include/linux/sched.h | 4 ++-- kernel/sched/core.c | 20 +++++++++++++++++++- kernel/timer.c | 12 ++++++------ 3 files changed, 27 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 9004f6e19eac..10626e2ee688 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1851,9 +1851,9 @@ static inline void idle_task_exit(void) {} #endif #if defined(CONFIG_NO_HZ) && defined(CONFIG_SMP) -extern void wake_up_idle_cpu(int cpu); +extern void wake_up_nohz_cpu(int cpu); #else -static inline void wake_up_idle_cpu(int cpu) { } +static inline void wake_up_nohz_cpu(int cpu) { } #endif #ifdef CONFIG_SCHED_AUTOGROUP diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 849deb96e61e..e91ee589f793 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -587,7 +587,7 @@ unlock: * account when the CPU goes back to idle and evaluates the timer * wheel for the next timer event. */ -void wake_up_idle_cpu(int cpu) +static void wake_up_idle_cpu(int cpu) { struct rq *rq = cpu_rq(cpu); @@ -617,6 +617,24 @@ void wake_up_idle_cpu(int cpu) smp_send_reschedule(cpu); } +static bool wake_up_extended_nohz_cpu(int cpu) +{ + if (tick_nohz_extended_cpu(cpu)) { + if (cpu != smp_processor_id() || + tick_nohz_tick_stopped()) + smp_send_reschedule(cpu); + return true; + } + + return false; +} + +void wake_up_nohz_cpu(int cpu) +{ + if (!wake_up_extended_nohz_cpu(cpu)) + wake_up_idle_cpu(cpu); +} + static inline bool got_nohz_idle_kick(void) { int cpu = smp_processor_id(); diff --git a/kernel/timer.c b/kernel/timer.c index dbf7a78a1ef1..4e3040b40d16 100644 --- a/kernel/timer.c +++ b/kernel/timer.c @@ -930,14 +930,14 @@ void add_timer_on(struct timer_list *timer, int cpu) debug_activate(timer, timer->expires); internal_add_timer(base, timer); /* - * Check whether the other CPU is idle and needs to be - * triggered to reevaluate the timer wheel when nohz is - * active. We are protected against the other CPU fiddling + * Check whether the other CPU is in dynticks mode and needs + * to be triggered to reevaluate the timer wheel. + * We are protected against the other CPU fiddling * with the timer by holding the timer base lock. This also - * makes sure that a CPU on the way to idle can not evaluate - * the timer wheel. + * makes sure that a CPU on the way to stop its tick can not + * evaluate the timer wheel. */ - wake_up_idle_cpu(cpu); + wake_up_nohz_cpu(cpu); spin_unlock_irqrestore(&base->lock, flags); } EXPORT_SYMBOL_GPL(add_timer_on); -- cgit v1.2.3-71-gd317 From 3451d0243c3cdfd729b36f9684a14659d4895ca3 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Wed, 10 Aug 2011 23:21:01 +0200 Subject: nohz: Rename CONFIG_NO_HZ to CONFIG_NO_HZ_COMMON We are planning to convert the dynticks Kconfig options layout into a choice menu. The user must be able to easily pick any of the following implementations: constant periodic tick, idle dynticks, full dynticks. As this implies a mutual exclusion, the two dynticks implementions need to converge on the selection of a common Kconfig option in order to ease the sharing of a common infrastructure. It would thus seem pretty natural to reuse CONFIG_NO_HZ to that end. It already implements all the idle dynticks code and the full dynticks depends on all that code for now. So ideally the choice menu would propose CONFIG_NO_HZ_IDLE and CONFIG_NO_HZ_EXTENDED then both would select CONFIG_NO_HZ. On the other hand we want to stay backward compatible: if CONFIG_NO_HZ is set in an older config file, we want to enable CONFIG_NO_HZ_IDLE by default. But we can't afford both at the same time or we run into a circular dependency: 1) CONFIG_NO_HZ_IDLE and CONFIG_NO_HZ_EXTENDED both select CONFIG_NO_HZ 2) If CONFIG_NO_HZ is set, we default to CONFIG_NO_HZ_IDLE We might be able to support that from Kconfig/Kbuild but it may not be wise to introduce such a confusing behaviour. So to solve this, create a new CONFIG_NO_HZ_COMMON option which gathers the common code between idle and full dynticks (that common code for now is simply the idle dynticks code) and select it from their referring Kconfig. Then we'll later create CONFIG_NO_HZ_IDLE and map CONFIG_NO_HZ to it for backward compatibility. Signed-off-by: Frederic Weisbecker Cc: Andrew Morton Cc: Chris Metcalf Cc: Christoph Lameter Cc: Geoff Levand Cc: Gilad Ben Yossef Cc: Hakan Akkan Cc: Ingo Molnar Cc: Kevin Hilman Cc: Li Zhong Cc: Namhyung Kim Cc: Paul E. McKenney Cc: Paul Gortmaker Cc: Peter Zijlstra Cc: Steven Rostedt Cc: Thomas Gleixner --- Documentation/RCU/stallwarn.txt | 2 +- Documentation/cpu-freq/governors.txt | 4 ++-- arch/um/include/shared/common-offsets.h | 4 ++-- arch/um/os-Linux/time.c | 2 +- include/linux/sched.h | 8 ++++---- include/linux/tick.h | 8 ++++---- init/Kconfig | 2 +- kernel/hrtimer.c | 4 ++-- kernel/sched/core.c | 18 +++++++++--------- kernel/sched/fair.c | 10 +++++----- kernel/sched/sched.h | 4 ++-- kernel/softirq.c | 2 +- kernel/time/Kconfig | 13 +++++++++---- kernel/time/tick-sched.c | 12 ++++++------ kernel/timer.c | 4 ++-- 15 files changed, 51 insertions(+), 46 deletions(-) (limited to 'include/linux') diff --git a/Documentation/RCU/stallwarn.txt b/Documentation/RCU/stallwarn.txt index 1927151b386b..b336755b71ed 100644 --- a/Documentation/RCU/stallwarn.txt +++ b/Documentation/RCU/stallwarn.txt @@ -176,7 +176,7 @@ o A CPU-bound real-time task in a CONFIG_PREEMPT_RT kernel that o A hardware or software issue shuts off the scheduler-clock interrupt on a CPU that is not in dyntick-idle mode. This problem really has happened, and seems to be most likely to - result in RCU CPU stall warnings for CONFIG_NO_HZ=n kernels. + result in RCU CPU stall warnings for CONFIG_NO_HZ_COMMON=n kernels. o A bug in the RCU implementation. diff --git a/Documentation/cpu-freq/governors.txt b/Documentation/cpu-freq/governors.txt index c7a2eb8450c2..e3e5d9ae50cd 100644 --- a/Documentation/cpu-freq/governors.txt +++ b/Documentation/cpu-freq/governors.txt @@ -131,8 +131,8 @@ sampling_rate_min: The sampling rate is limited by the HW transition latency: transition_latency * 100 Or by kernel restrictions: -If CONFIG_NO_HZ is set, the limit is 10ms fixed. -If CONFIG_NO_HZ is not set or nohz=off boot parameter is used, the +If CONFIG_NO_HZ_COMMON is set, the limit is 10ms fixed. +If CONFIG_NO_HZ_COMMON is not set or nohz=off boot parameter is used, the limits depend on the CONFIG_HZ option: HZ=1000: min=20000us (20ms) HZ=250: min=80000us (80ms) diff --git a/arch/um/include/shared/common-offsets.h b/arch/um/include/shared/common-offsets.h index 2df313b6a586..c92306809029 100644 --- a/arch/um/include/shared/common-offsets.h +++ b/arch/um/include/shared/common-offsets.h @@ -30,8 +30,8 @@ DEFINE(UM_NSEC_PER_USEC, NSEC_PER_USEC); #ifdef CONFIG_PRINTK DEFINE(UML_CONFIG_PRINTK, CONFIG_PRINTK); #endif -#ifdef CONFIG_NO_HZ -DEFINE(UML_CONFIG_NO_HZ, CONFIG_NO_HZ); +#ifdef CONFIG_NO_HZ_COMMON +DEFINE(UML_CONFIG_NO_HZ_COMMON, CONFIG_NO_HZ_COMMON); #endif #ifdef CONFIG_UML_X86 DEFINE(UML_CONFIG_UML_X86, CONFIG_UML_X86); diff --git a/arch/um/os-Linux/time.c b/arch/um/os-Linux/time.c index fac388cb464f..e9824d5dd7d5 100644 --- a/arch/um/os-Linux/time.c +++ b/arch/um/os-Linux/time.c @@ -79,7 +79,7 @@ long long os_nsecs(void) return timeval_to_ns(&tv); } -#ifdef UML_CONFIG_NO_HZ +#ifdef UML_CONFIG_NO_HZ_COMMON static int after_sleep_interval(struct timespec *ts) { return 0; diff --git a/include/linux/sched.h b/include/linux/sched.h index 10626e2ee688..1ff9e0a5de27 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -230,7 +230,7 @@ extern void init_idle_bootup_task(struct task_struct *idle); extern int runqueue_is_locked(int cpu); -#if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ) +#if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON) extern void nohz_balance_enter_idle(int cpu); extern void set_cpu_sd_state_idle(void); extern int get_nohz_timer_target(void); @@ -1758,13 +1758,13 @@ static inline int set_cpus_allowed_ptr(struct task_struct *p, } #endif -#ifdef CONFIG_NO_HZ +#ifdef CONFIG_NO_HZ_COMMON void calc_load_enter_idle(void); void calc_load_exit_idle(void); #else static inline void calc_load_enter_idle(void) { } static inline void calc_load_exit_idle(void) { } -#endif /* CONFIG_NO_HZ */ +#endif /* CONFIG_NO_HZ_COMMON */ #ifndef CONFIG_CPUMASK_OFFSTACK static inline int set_cpus_allowed(struct task_struct *p, cpumask_t new_mask) @@ -1850,7 +1850,7 @@ extern void idle_task_exit(void); static inline void idle_task_exit(void) {} #endif -#if defined(CONFIG_NO_HZ) && defined(CONFIG_SMP) +#if defined(CONFIG_NO_HZ_COMMON) && defined(CONFIG_SMP) extern void wake_up_nohz_cpu(int cpu); #else static inline void wake_up_nohz_cpu(int cpu) { } diff --git a/include/linux/tick.h b/include/linux/tick.h index 44bfa8aa439f..5e403339ee14 100644 --- a/include/linux/tick.h +++ b/include/linux/tick.h @@ -82,7 +82,7 @@ extern int tick_program_event(ktime_t expires, int force); extern void tick_setup_sched_timer(void); # endif -# if defined CONFIG_NO_HZ || defined CONFIG_HIGH_RES_TIMERS +# if defined CONFIG_NO_HZ_COMMON || defined CONFIG_HIGH_RES_TIMERS extern void tick_cancel_sched_timer(int cpu); # else static inline void tick_cancel_sched_timer(int cpu) { } @@ -123,7 +123,7 @@ static inline void tick_check_idle(int cpu) { } static inline int tick_oneshot_mode_active(void) { return 0; } #endif /* !CONFIG_GENERIC_CLOCKEVENTS */ -# ifdef CONFIG_NO_HZ +# ifdef CONFIG_NO_HZ_COMMON DECLARE_PER_CPU(struct tick_sched, tick_cpu_sched); static inline int tick_nohz_tick_stopped(void) @@ -138,7 +138,7 @@ extern ktime_t tick_nohz_get_sleep_length(void); extern u64 get_cpu_idle_time_us(int cpu, u64 *last_update_time); extern u64 get_cpu_iowait_time_us(int cpu, u64 *last_update_time); -# else /* !CONFIG_NO_HZ */ +# else /* !CONFIG_NO_HZ_COMMON */ static inline int tick_nohz_tick_stopped(void) { return 0; @@ -155,7 +155,7 @@ static inline ktime_t tick_nohz_get_sleep_length(void) } static inline u64 get_cpu_idle_time_us(int cpu, u64 *unused) { return -1; } static inline u64 get_cpu_iowait_time_us(int cpu, u64 *unused) { return -1; } -# endif /* !NO_HZ */ +# endif /* !CONFIG_NO_HZ_COMMON */ #ifdef CONFIG_NO_HZ_EXTENDED extern int tick_nohz_extended_cpu(int cpu); diff --git a/init/Kconfig b/init/Kconfig index 8a1dac2f80a9..edc8132584f1 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -580,7 +580,7 @@ config RCU_FANOUT_EXACT config RCU_FAST_NO_HZ bool "Accelerate last non-dyntick-idle CPU's grace periods" - depends on NO_HZ && SMP + depends on NO_HZ_COMMON && SMP default n help This option causes RCU to attempt to accelerate grace periods in diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c index cc47812d3feb..ec60482d8b03 100644 --- a/kernel/hrtimer.c +++ b/kernel/hrtimer.c @@ -160,7 +160,7 @@ struct hrtimer_clock_base *lock_hrtimer_base(const struct hrtimer *timer, */ static int hrtimer_get_target(int this_cpu, int pinned) { -#ifdef CONFIG_NO_HZ +#ifdef CONFIG_NO_HZ_COMMON if (!pinned && get_sysctl_timer_migration() && idle_cpu(this_cpu)) return get_nohz_timer_target(); #endif @@ -1106,7 +1106,7 @@ ktime_t hrtimer_get_remaining(const struct hrtimer *timer) } EXPORT_SYMBOL_GPL(hrtimer_get_remaining); -#ifdef CONFIG_NO_HZ +#ifdef CONFIG_NO_HZ_COMMON /** * hrtimer_get_next_event - get the time until next expiry event * diff --git a/kernel/sched/core.c b/kernel/sched/core.c index e91ee589f793..9bb397da63d6 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -549,7 +549,7 @@ void resched_cpu(int cpu) raw_spin_unlock_irqrestore(&rq->lock, flags); } -#ifdef CONFIG_NO_HZ +#ifdef CONFIG_NO_HZ_COMMON /* * In the semi idle case, use the nearest busy cpu for migrating timers * from an idle cpu. This is good for power-savings. @@ -641,14 +641,14 @@ static inline bool got_nohz_idle_kick(void) return idle_cpu(cpu) && test_bit(NOHZ_BALANCE_KICK, nohz_flags(cpu)); } -#else /* CONFIG_NO_HZ */ +#else /* CONFIG_NO_HZ_COMMON */ static inline bool got_nohz_idle_kick(void) { return false; } -#endif /* CONFIG_NO_HZ */ +#endif /* CONFIG_NO_HZ_COMMON */ void sched_avg_update(struct rq *rq) { @@ -2139,7 +2139,7 @@ calc_load(unsigned long load, unsigned long exp, unsigned long active) return load >> FSHIFT; } -#ifdef CONFIG_NO_HZ +#ifdef CONFIG_NO_HZ_COMMON /* * Handle NO_HZ for the global load-average. * @@ -2365,12 +2365,12 @@ static void calc_global_nohz(void) smp_wmb(); calc_load_idx++; } -#else /* !CONFIG_NO_HZ */ +#else /* !CONFIG_NO_HZ_COMMON */ static inline long calc_load_fold_idle(void) { return 0; } static inline void calc_global_nohz(void) { } -#endif /* CONFIG_NO_HZ */ +#endif /* CONFIG_NO_HZ_COMMON */ /* * calc_load - update the avenrun load estimates 10 ticks after the @@ -2530,7 +2530,7 @@ static void __update_cpu_load(struct rq *this_rq, unsigned long this_load, sched_avg_update(this_rq); } -#ifdef CONFIG_NO_HZ +#ifdef CONFIG_NO_HZ_COMMON /* * There is no sane way to deal with nohz on smp when using jiffies because the * cpu doing the jiffies update might drift wrt the cpu doing the jiffy reading @@ -2590,7 +2590,7 @@ void update_cpu_load_nohz(void) } raw_spin_unlock(&this_rq->lock); } -#endif /* CONFIG_NO_HZ */ +#endif /* CONFIG_NO_HZ_COMMON */ /* * Called from scheduler_tick() @@ -7023,7 +7023,7 @@ void __init sched_init(void) INIT_LIST_HEAD(&rq->cfs_tasks); rq_attach_root(rq, &def_root_domain); -#ifdef CONFIG_NO_HZ +#ifdef CONFIG_NO_HZ_COMMON rq->nohz_flags = 0; #endif #endif diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 539760ef00c4..5c97fca091a7 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -5331,7 +5331,7 @@ out_unlock: return 0; } -#ifdef CONFIG_NO_HZ +#ifdef CONFIG_NO_HZ_COMMON /* * idle load balancing details * - When one of the busy CPUs notice that there may be an idle rebalancing @@ -5541,9 +5541,9 @@ out: rq->next_balance = next_balance; } -#ifdef CONFIG_NO_HZ +#ifdef CONFIG_NO_HZ_COMMON /* - * In CONFIG_NO_HZ case, the idle balance kickee will do the + * In CONFIG_NO_HZ_COMMON case, the idle balance kickee will do the * rebalancing for all the cpus for whom scheduler ticks are stopped. */ static void nohz_idle_balance(int this_cpu, enum cpu_idle_type idle) @@ -5686,7 +5686,7 @@ void trigger_load_balance(struct rq *rq, int cpu) if (time_after_eq(jiffies, rq->next_balance) && likely(!on_null_domain(cpu))) raise_softirq(SCHED_SOFTIRQ); -#ifdef CONFIG_NO_HZ +#ifdef CONFIG_NO_HZ_COMMON if (nohz_kick_needed(rq, cpu) && likely(!on_null_domain(cpu))) nohz_balancer_kick(cpu); #endif @@ -6156,7 +6156,7 @@ __init void init_sched_fair_class(void) #ifdef CONFIG_SMP open_softirq(SCHED_SOFTIRQ, run_rebalance_domains); -#ifdef CONFIG_NO_HZ +#ifdef CONFIG_NO_HZ_COMMON nohz.next_balance = jiffies; zalloc_cpumask_var(&nohz.idle_cpus_mask, GFP_NOWAIT); cpu_notifier(sched_ilb_notifier, 0); diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 3bd15a43eebc..889904dd6d77 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -404,7 +404,7 @@ struct rq { #define CPU_LOAD_IDX_MAX 5 unsigned long cpu_load[CPU_LOAD_IDX_MAX]; unsigned long last_load_update_tick; -#ifdef CONFIG_NO_HZ +#ifdef CONFIG_NO_HZ_COMMON u64 nohz_stamp; unsigned long nohz_flags; #endif @@ -1333,7 +1333,7 @@ extern void init_rt_rq(struct rt_rq *rt_rq, struct rq *rq); extern void account_cfs_bandwidth_used(int enabled, int was_enabled); -#ifdef CONFIG_NO_HZ +#ifdef CONFIG_NO_HZ_COMMON enum rq_nohz_flag_bits { NOHZ_TICK_STOPPED, NOHZ_BALANCE_KICK, diff --git a/kernel/softirq.c b/kernel/softirq.c index b4d252fd195b..de15813f2a66 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c @@ -348,7 +348,7 @@ void irq_exit(void) if (!in_interrupt() && local_softirq_pending()) invoke_softirq(); -#ifdef CONFIG_NO_HZ +#ifdef CONFIG_NO_HZ_COMMON /* Make sure that timer wheel updates are propagated */ if (idle_cpu(smp_processor_id()) && !in_interrupt() && !need_resched()) tick_nohz_irq_exit(); diff --git a/kernel/time/Kconfig b/kernel/time/Kconfig index 726c33e00da2..c88fc43494c9 100644 --- a/kernel/time/Kconfig +++ b/kernel/time/Kconfig @@ -64,16 +64,21 @@ config GENERIC_CMOS_UPDATE if GENERIC_CLOCKEVENTS menu "Timers subsystem" -# Core internal switch. Selected by NO_HZ / HIGH_RES_TIMERS. This is +# Core internal switch. Selected by NO_HZ_COMMON / HIGH_RES_TIMERS. This is # only related to the tick functionality. Oneshot clockevent devices # are supported independ of this. config TICK_ONESHOT bool +config NO_HZ_COMMON + bool + depends on !ARCH_USES_GETTIMEOFFSET && GENERIC_CLOCKEVENTS + select TICK_ONESHOT + config NO_HZ bool "Tickless System (Dynamic Ticks)" depends on !ARCH_USES_GETTIMEOFFSET && GENERIC_CLOCKEVENTS - select TICK_ONESHOT + select NO_HZ_COMMON help This option enables a tickless system: timer interrupts will only trigger on an as-needed basis both when the system is @@ -81,14 +86,14 @@ config NO_HZ config NO_HZ_EXTENDED bool "Full dynticks system" - # NO_HZ dependency + # NO_HZ_COMMON dependency depends on !ARCH_USES_GETTIMEOFFSET && GENERIC_CLOCKEVENTS # RCU_USER_QS depends on HAVE_CONTEXT_TRACKING && SMP # RCU_NOCB_CPU dependency depends on TREE_RCU || TREE_PREEMPT_RCU depends on VIRT_CPU_ACCOUNTING_GEN - select NO_HZ + select NO_HZ_COMMON select RCU_USER_QS select RCU_NOCB_CPU select CONTEXT_TRACKING_FORCE diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index 57bb3fe5aaa3..ccfc2086cd4b 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -104,7 +104,7 @@ static void tick_sched_do_timer(ktime_t now) { int cpu = smp_processor_id(); -#ifdef CONFIG_NO_HZ +#ifdef CONFIG_NO_HZ_COMMON /* * Check if the do_timer duty was dropped. We don't care about * concurrency: This happens only when the cpu in charge went @@ -124,7 +124,7 @@ static void tick_sched_do_timer(ktime_t now) static void tick_sched_handle(struct tick_sched *ts, struct pt_regs *regs) { -#ifdef CONFIG_NO_HZ +#ifdef CONFIG_NO_HZ_COMMON /* * When we are idle and the tick is stopped, we have to touch * the watchdog as we might not schedule for a really long @@ -235,7 +235,7 @@ core_initcall(init_tick_nohz_extended); /* * NOHZ - aka dynamic tick functionality */ -#ifdef CONFIG_NO_HZ +#ifdef CONFIG_NO_HZ_COMMON /* * NO HZ enabled ? */ @@ -907,7 +907,7 @@ static inline void tick_check_nohz(int cpu) static inline void tick_nohz_switch_to_nohz(void) { } static inline void tick_check_nohz(int cpu) { } -#endif /* NO_HZ */ +#endif /* CONFIG_NO_HZ_COMMON */ /* * Called from irq_enter to notify about the possible interruption of idle() @@ -992,14 +992,14 @@ void tick_setup_sched_timer(void) now = ktime_get(); } -#ifdef CONFIG_NO_HZ +#ifdef CONFIG_NO_HZ_COMMON if (tick_nohz_enabled) ts->nohz_mode = NOHZ_MODE_HIGHRES; #endif } #endif /* HIGH_RES_TIMERS */ -#if defined CONFIG_NO_HZ || defined CONFIG_HIGH_RES_TIMERS +#if defined CONFIG_NO_HZ_COMMON || defined CONFIG_HIGH_RES_TIMERS void tick_cancel_sched_timer(int cpu) { struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu); diff --git a/kernel/timer.c b/kernel/timer.c index 4e3040b40d16..1b7489fdea41 100644 --- a/kernel/timer.c +++ b/kernel/timer.c @@ -738,7 +738,7 @@ __mod_timer(struct timer_list *timer, unsigned long expires, cpu = smp_processor_id(); -#if defined(CONFIG_NO_HZ) && defined(CONFIG_SMP) +#if defined(CONFIG_NO_HZ_COMMON) && defined(CONFIG_SMP) if (!pinned && get_sysctl_timer_migration() && idle_cpu(cpu)) cpu = get_nohz_timer_target(); #endif @@ -1188,7 +1188,7 @@ static inline void __run_timers(struct tvec_base *base) spin_unlock_irq(&base->lock); } -#ifdef CONFIG_NO_HZ +#ifdef CONFIG_NO_HZ_COMMON /* * Find out when the next timer event is due to happen. This * is used on S/390 to stop all activity when a CPU is idle. -- cgit v1.2.3-71-gd317 From c5bfece2d6129131b4ade985e63bc35ddf5868d4 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Fri, 12 Apr 2013 16:45:34 +0200 Subject: nohz: Switch from "extended nohz" to "full nohz" based naming "Extended nohz" was used as a naming base for the full dynticks API and Kconfig symbols. It reflects the fact the system tries to stop the tick in more places than just idle. But that "extended" name is a bit opaque and vague. Rename it to "full" makes it clearer what the system tries to do under this config: try to shutdown the tick anytime it can. The various constraints that prevent that to happen shouldn't be considered as fundamental properties of this feature but rather technical issues that may be solved in the future. Reported-by: Ingo Molnar Signed-off-by: Frederic Weisbecker Cc: Chris Metcalf Cc: Christoph Lameter Cc: Geoff Levand Cc: Gilad Ben Yossef Cc: Hakan Akkan Cc: Ingo Molnar Cc: Kevin Hilman Cc: Li Zhong Cc: Paul E. McKenney Cc: Paul Gortmaker Cc: Peter Zijlstra Cc: Steven Rostedt Cc: Thomas Gleixner --- Documentation/kernel-parameters.txt | 4 +-- include/linux/tick.h | 6 ++--- kernel/sched/core.c | 6 ++--- kernel/time/Kconfig | 4 +-- kernel/time/tick-broadcast.c | 2 +- kernel/time/tick-common.c | 2 +- kernel/time/tick-sched.c | 54 ++++++++++++++++++------------------- 7 files changed, 39 insertions(+), 39 deletions(-) (limited to 'include/linux') diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 231698feaddc..82365dde00a8 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -1913,8 +1913,8 @@ bytes respectively. Such letter suffixes can also be entirely omitted. Valid arguments: on, off Default: on - nohz_extended= [KNL,BOOT] - In kernels built with CONFIG_NO_HZ_EXTENDED=y, set + nohz_full= [KNL,BOOT] + In kernels built with CONFIG_NO_HZ_FULL=y, set the specified list of CPUs whose tick will be stopped whenever possible. You need to keep at least one online CPU outside the range to maintain the timekeeping. diff --git a/include/linux/tick.h b/include/linux/tick.h index 5e403339ee14..b4e3b0c9639e 100644 --- a/include/linux/tick.h +++ b/include/linux/tick.h @@ -157,10 +157,10 @@ static inline u64 get_cpu_idle_time_us(int cpu, u64 *unused) { return -1; } static inline u64 get_cpu_iowait_time_us(int cpu, u64 *unused) { return -1; } # endif /* !CONFIG_NO_HZ_COMMON */ -#ifdef CONFIG_NO_HZ_EXTENDED -extern int tick_nohz_extended_cpu(int cpu); +#ifdef CONFIG_NO_HZ_FULL +extern int tick_nohz_full_cpu(int cpu); #else -static inline int tick_nohz_extended_cpu(int cpu) { return 0; } +static inline int tick_nohz_full_cpu(int cpu) { return 0; } #endif diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 9bb397da63d6..0f0a5b3fd62c 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -617,9 +617,9 @@ static void wake_up_idle_cpu(int cpu) smp_send_reschedule(cpu); } -static bool wake_up_extended_nohz_cpu(int cpu) +static bool wake_up_full_nohz_cpu(int cpu) { - if (tick_nohz_extended_cpu(cpu)) { + if (tick_nohz_full_cpu(cpu)) { if (cpu != smp_processor_id() || tick_nohz_tick_stopped()) smp_send_reschedule(cpu); @@ -631,7 +631,7 @@ static bool wake_up_extended_nohz_cpu(int cpu) void wake_up_nohz_cpu(int cpu) { - if (!wake_up_extended_nohz_cpu(cpu)) + if (!wake_up_full_nohz_cpu(cpu)) wake_up_idle_cpu(cpu); } diff --git a/kernel/time/Kconfig b/kernel/time/Kconfig index cbe64be17d1f..4a17b5069466 100644 --- a/kernel/time/Kconfig +++ b/kernel/time/Kconfig @@ -96,7 +96,7 @@ config NO_HZ_IDLE Most of the time you want to say Y here. -config NO_HZ_EXTENDED +config NO_HZ_FULL bool "Full dynticks system (tickless single task)" # NO_HZ_COMMON dependency depends on !ARCH_USES_GETTIMEOFFSET && GENERIC_CLOCKEVENTS @@ -115,7 +115,7 @@ config NO_HZ_EXTENDED task on the CPU. Chances for running tickless are maximized when the task mostly runs in userspace and has few kernel activity. - You need to fill up the nohz_extended boot parameter with the + You need to fill up the nohz_full boot parameter with the desired range of dynticks CPUs. This is implemented at the expense of some overhead in user <-> kernel diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c index 8a6875cc1879..a3a3123f6272 100644 --- a/kernel/time/tick-broadcast.c +++ b/kernel/time/tick-broadcast.c @@ -573,7 +573,7 @@ void tick_broadcast_setup_oneshot(struct clock_event_device *bc) bc->event_handler = tick_handle_oneshot_broadcast; /* Take the do_timer update */ - if (!tick_nohz_extended_cpu(cpu)) + if (!tick_nohz_full_cpu(cpu)) tick_do_timer_cpu = cpu; /* diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c index b7dc0cbdb59b..83f2bd967161 100644 --- a/kernel/time/tick-common.c +++ b/kernel/time/tick-common.c @@ -163,7 +163,7 @@ static void tick_setup_device(struct tick_device *td, * this cpu: */ if (tick_do_timer_cpu == TICK_DO_TIMER_BOOT) { - if (!tick_nohz_extended_cpu(cpu)) + if (!tick_nohz_full_cpu(cpu)) tick_do_timer_cpu = cpu; else tick_do_timer_cpu = TICK_DO_TIMER_NONE; diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index e057d338daa4..369b5769fc97 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -113,7 +113,7 @@ static void tick_sched_do_timer(ktime_t now) * jiffies_lock. */ if (unlikely(tick_do_timer_cpu == TICK_DO_TIMER_NONE) - && !tick_nohz_extended_cpu(cpu)) + && !tick_nohz_full_cpu(cpu)) tick_do_timer_cpu = cpu; #endif @@ -143,29 +143,29 @@ static void tick_sched_handle(struct tick_sched *ts, struct pt_regs *regs) profile_tick(CPU_PROFILING); } -#ifdef CONFIG_NO_HZ_EXTENDED -static cpumask_var_t nohz_extended_mask; -bool have_nohz_extended_mask; +#ifdef CONFIG_NO_HZ_FULL +static cpumask_var_t nohz_full_mask; +bool have_nohz_full_mask; -int tick_nohz_extended_cpu(int cpu) +int tick_nohz_full_cpu(int cpu) { - if (!have_nohz_extended_mask) + if (!have_nohz_full_mask) return 0; - return cpumask_test_cpu(cpu, nohz_extended_mask); + return cpumask_test_cpu(cpu, nohz_full_mask); } /* Parse the boot-time nohz CPU list from the kernel parameters. */ -static int __init tick_nohz_extended_setup(char *str) +static int __init tick_nohz_full_setup(char *str) { - alloc_bootmem_cpumask_var(&nohz_extended_mask); - if (cpulist_parse(str, nohz_extended_mask) < 0) - pr_warning("NOHZ: Incorrect nohz_extended cpumask\n"); + alloc_bootmem_cpumask_var(&nohz_full_mask); + if (cpulist_parse(str, nohz_full_mask) < 0) + pr_warning("NOHZ: Incorrect nohz_full cpumask\n"); else - have_nohz_extended_mask = true; + have_nohz_full_mask = true; return 1; } -__setup("nohz_extended=", tick_nohz_extended_setup); +__setup("nohz_full=", tick_nohz_full_setup); static int __cpuinit tick_nohz_cpu_down_callback(struct notifier_block *nfb, unsigned long action, @@ -179,7 +179,7 @@ static int __cpuinit tick_nohz_cpu_down_callback(struct notifier_block *nfb, * If we handle the timekeeping duty for full dynticks CPUs, * we can't safely shutdown that CPU. */ - if (have_nohz_extended_mask && tick_do_timer_cpu == cpu) + if (have_nohz_full_mask && tick_do_timer_cpu == cpu) return -EINVAL; break; } @@ -191,20 +191,20 @@ static int __cpuinit tick_nohz_cpu_down_callback(struct notifier_block *nfb, * separations: 0,2,4,6,... * This is NR_CPUS + sizeof('\0') */ -static char __initdata nohz_ext_buf[NR_CPUS + 1]; +static char __initdata nohz_full_buf[NR_CPUS + 1]; -static int __init init_tick_nohz_extended(void) +static int __init init_tick_nohz_full(void) { cpumask_var_t online_nohz; int cpu; - if (!have_nohz_extended_mask) + if (!have_nohz_full_mask) return 0; cpu_notifier(tick_nohz_cpu_down_callback, 0); if (!zalloc_cpumask_var(&online_nohz, GFP_KERNEL)) { - pr_warning("NO_HZ: Not enough memory to check extended nohz mask\n"); + pr_warning("NO_HZ: Not enough memory to check full nohz mask\n"); return -ENOMEM; } @@ -215,31 +215,31 @@ static int __init init_tick_nohz_extended(void) get_online_cpus(); /* Ensure we keep a CPU outside the dynticks range for timekeeping */ - cpumask_and(online_nohz, cpu_online_mask, nohz_extended_mask); + cpumask_and(online_nohz, cpu_online_mask, nohz_full_mask); if (cpumask_equal(online_nohz, cpu_online_mask)) { pr_warning("NO_HZ: Must keep at least one online CPU " - "out of nohz_extended range\n"); + "out of nohz_full range\n"); /* * We know the current CPU doesn't have its tick stopped. * Let's use it for the timekeeping duty. */ preempt_disable(); cpu = smp_processor_id(); - pr_warning("NO_HZ: Clearing %d from nohz_extended range\n", cpu); - cpumask_clear_cpu(cpu, nohz_extended_mask); + pr_warning("NO_HZ: Clearing %d from nohz_full range\n", cpu); + cpumask_clear_cpu(cpu, nohz_full_mask); preempt_enable(); } put_online_cpus(); free_cpumask_var(online_nohz); - cpulist_scnprintf(nohz_ext_buf, sizeof(nohz_ext_buf), nohz_extended_mask); - pr_info("NO_HZ: Full dynticks CPUs: %s.\n", nohz_ext_buf); + cpulist_scnprintf(nohz_full_buf, sizeof(nohz_full_buf), nohz_full_mask); + pr_info("NO_HZ: Full dynticks CPUs: %s.\n", nohz_full_buf); return 0; } -core_initcall(init_tick_nohz_extended); +core_initcall(init_tick_nohz_full); #else -#define have_nohz_extended_mask (0) +#define have_nohz_full_mask (0) #endif /* @@ -589,7 +589,7 @@ static bool can_stop_idle_tick(int cpu, struct tick_sched *ts) return false; } - if (have_nohz_extended_mask) { + if (have_nohz_full_mask) { /* * Keep the tick alive to guarantee timekeeping progression * if there are full dynticks CPUs around -- cgit v1.2.3-71-gd317 From 76c24fb054b52b34af4dcde589cbb9e2b98fc74c Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Thu, 18 Apr 2013 00:15:40 +0200 Subject: nohz: New APIs to re-evaluate the tick on full dynticks CPUs Provide two new helpers in order to notify the full dynticks CPUs about some internal system changes against which they may reconsider the state of their tick. Some practical examples include: posix cpu timers, perf tick and sched clock tick. For now the notifying handler, implemented through IPIs, is a stub that will be implemented when we get the tick stop/restart infrastructure in. Signed-off-by: Frederic Weisbecker Cc: Chris Metcalf Cc: Christoph Lameter Cc: Geoff Levand Cc: Gilad Ben Yossef Cc: Hakan Akkan Cc: Ingo Molnar Cc: Kevin Hilman Cc: Li Zhong Cc: Oleg Nesterov Cc: Paul E. McKenney Cc: Paul Gortmaker Cc: Peter Zijlstra Cc: Steven Rostedt Cc: Thomas Gleixner --- include/linux/tick.h | 4 ++++ kernel/time/Kconfig | 1 + kernel/time/tick-sched.c | 51 ++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 56 insertions(+) (limited to 'include/linux') diff --git a/include/linux/tick.h b/include/linux/tick.h index b4e3b0c9639e..c2dcfb18f65b 100644 --- a/include/linux/tick.h +++ b/include/linux/tick.h @@ -159,8 +159,12 @@ static inline u64 get_cpu_iowait_time_us(int cpu, u64 *unused) { return -1; } #ifdef CONFIG_NO_HZ_FULL extern int tick_nohz_full_cpu(int cpu); +extern void tick_nohz_full_kick(void); +extern void tick_nohz_full_kick_all(void); #else static inline int tick_nohz_full_cpu(int cpu) { return 0; } +static inline void tick_nohz_full_kick(void) { } +static inline void tick_nohz_full_kick_all(void) { } #endif diff --git a/kernel/time/Kconfig b/kernel/time/Kconfig index 358d601a4fec..fbb4c7eb92a0 100644 --- a/kernel/time/Kconfig +++ b/kernel/time/Kconfig @@ -111,6 +111,7 @@ config NO_HZ_FULL select RCU_USER_QS select RCU_NOCB_CPU select CONTEXT_TRACKING_FORCE + select IRQ_WORK help Adaptively try to shutdown the tick whenever possible, even when the CPU is running tasks. Typically this requires running a single diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index 369b5769fc97..2bcad5b904d8 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -147,6 +147,57 @@ static void tick_sched_handle(struct tick_sched *ts, struct pt_regs *regs) static cpumask_var_t nohz_full_mask; bool have_nohz_full_mask; +/* + * Re-evaluate the need for the tick on the current CPU + * and restart it if necessary. + */ +static void tick_nohz_full_check(void) +{ + /* + * STUB for now, will be filled with the full tick stop/restart + * infrastructure patches + */ +} + +static void nohz_full_kick_work_func(struct irq_work *work) +{ + tick_nohz_full_check(); +} + +static DEFINE_PER_CPU(struct irq_work, nohz_full_kick_work) = { + .func = nohz_full_kick_work_func, +}; + +/* + * Kick the current CPU if it's full dynticks in order to force it to + * re-evaluate its dependency on the tick and restart it if necessary. + */ +void tick_nohz_full_kick(void) +{ + if (tick_nohz_full_cpu(smp_processor_id())) + irq_work_queue(&__get_cpu_var(nohz_full_kick_work)); +} + +static void nohz_full_kick_ipi(void *info) +{ + tick_nohz_full_check(); +} + +/* + * Kick all full dynticks CPUs in order to force these to re-evaluate + * their dependency on the tick and restart it if necessary. + */ +void tick_nohz_full_kick_all(void) +{ + if (!have_nohz_full_mask) + return; + + preempt_disable(); + smp_call_function_many(nohz_full_mask, + nohz_full_kick_ipi, NULL, false); + preempt_enable(); +} + int tick_nohz_full_cpu(int cpu) { if (!have_nohz_full_mask) -- cgit v1.2.3-71-gd317 From d1e43fa5f8bb25f83a86a29f11fcfb57ed4d7566 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Tue, 26 Mar 2013 23:47:24 +0100 Subject: nohz: Ensure full dynticks CPUs are RCU nocbs We need full dynticks CPU to also be RCU nocb so that we don't have to keep the tick to handle RCU callbacks. Make sure the range passed to nohz_full= boot parameter is a subset of rcu_nocbs= The CPUs that fail to meet this requirement will be excluded from the nohz_full range. This is checked early in boot time, before any CPU has the opportunity to stop its tick. Suggested-by: Steven Rostedt Reviewed-by: Paul E. McKenney Signed-off-by: Frederic Weisbecker Cc: Andrew Morton Cc: Chris Metcalf Cc: Christoph Lameter Cc: Geoff Levand Cc: Gilad Ben Yossef Cc: Hakan Akkan Cc: Ingo Molnar Cc: Kevin Hilman Cc: Li Zhong Cc: Paul E. McKenney Cc: Paul Gortmaker Cc: Peter Zijlstra Cc: Steven Rostedt Cc: Thomas Gleixner --- Documentation/kernel-parameters.txt | 2 ++ include/linux/rcupdate.h | 7 +++++++ include/linux/tick.h | 2 ++ init/main.c | 1 + kernel/rcutree.c | 6 +++--- kernel/rcutree.h | 1 - kernel/rcutree_plugin.h | 13 ++++--------- kernel/time/tick-sched.c | 22 ++++++++++++++++------ 8 files changed, 35 insertions(+), 19 deletions(-) (limited to 'include/linux') diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 887b29708447..4865e9bfd08d 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -1918,6 +1918,8 @@ bytes respectively. Such letter suffixes can also be entirely omitted. the specified list of CPUs whose tick will be stopped whenever possible. The boot CPU will be forced outside the range to maintain the timekeeping. + The CPUs in this range must also be included in the + rcu_nocbs= set. noiotrap [SH] Disables trapped I/O port accesses. diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index b758ce17b309..8e0948c872fc 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -999,4 +999,11 @@ static inline notrace void rcu_read_unlock_sched_notrace(void) #define kfree_rcu(ptr, rcu_head) \ __kfree_rcu(&((ptr)->rcu_head), offsetof(typeof(*(ptr)), rcu_head)) +#ifdef CONFIG_RCU_NOCB_CPU +extern bool rcu_is_nocb_cpu(int cpu); +#else +static inline bool rcu_is_nocb_cpu(int cpu) { return false; } +#endif /* #else #ifdef CONFIG_RCU_NOCB_CPU */ + + #endif /* __LINUX_RCUPDATE_H */ diff --git a/include/linux/tick.h b/include/linux/tick.h index b4e3b0c9639e..0b6873cbf512 100644 --- a/include/linux/tick.h +++ b/include/linux/tick.h @@ -158,8 +158,10 @@ static inline u64 get_cpu_iowait_time_us(int cpu, u64 *unused) { return -1; } # endif /* !CONFIG_NO_HZ_COMMON */ #ifdef CONFIG_NO_HZ_FULL +extern void tick_nohz_init(void); extern int tick_nohz_full_cpu(int cpu); #else +static inline void tick_nohz_init(void) { } static inline int tick_nohz_full_cpu(int cpu) { return 0; } #endif diff --git a/init/main.c b/init/main.c index 63534a141b4e..2acb5bbde99b 100644 --- a/init/main.c +++ b/init/main.c @@ -547,6 +547,7 @@ asmlinkage void __init start_kernel(void) idr_init_cache(); perf_event_init(); rcu_init(); + tick_nohz_init(); radix_tree_init(); /* init some links before init_ISA_irqs() */ early_irq_init(); diff --git a/kernel/rcutree.c b/kernel/rcutree.c index f5ab50235cba..1d4ceff793a4 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c @@ -1695,7 +1695,7 @@ rcu_send_cbs_to_orphanage(int cpu, struct rcu_state *rsp, struct rcu_node *rnp, struct rcu_data *rdp) { /* No-CBs CPUs do not have orphanable callbacks. */ - if (is_nocb_cpu(rdp->cpu)) + if (rcu_is_nocb_cpu(rdp->cpu)) return; /* @@ -2757,10 +2757,10 @@ static void _rcu_barrier(struct rcu_state *rsp) * corresponding CPU's preceding callbacks have been invoked. */ for_each_possible_cpu(cpu) { - if (!cpu_online(cpu) && !is_nocb_cpu(cpu)) + if (!cpu_online(cpu) && !rcu_is_nocb_cpu(cpu)) continue; rdp = per_cpu_ptr(rsp->rda, cpu); - if (is_nocb_cpu(cpu)) { + if (rcu_is_nocb_cpu(cpu)) { _rcu_barrier_trace(rsp, "OnlineNoCB", cpu, rsp->n_barrier_done); atomic_inc(&rsp->barrier_cpu_count); diff --git a/kernel/rcutree.h b/kernel/rcutree.h index f993c0ac47db..38acc49da2c6 100644 --- a/kernel/rcutree.h +++ b/kernel/rcutree.h @@ -529,7 +529,6 @@ static void print_cpu_stall_info(struct rcu_state *rsp, int cpu); static void print_cpu_stall_info_end(void); static void zero_cpu_stall_ticks(struct rcu_data *rdp); static void increment_cpu_stall_ticks(void); -static bool is_nocb_cpu(int cpu); static bool __call_rcu_nocb(struct rcu_data *rdp, struct rcu_head *rhp, bool lazy); static bool rcu_nocb_adopt_orphan_cbs(struct rcu_state *rsp, diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h index a5745e9b5d5a..0cd91cc18db4 100644 --- a/kernel/rcutree_plugin.h +++ b/kernel/rcutree_plugin.h @@ -2167,7 +2167,7 @@ static int __init parse_rcu_nocb_poll(char *arg) early_param("rcu_nocb_poll", parse_rcu_nocb_poll); /* Is the specified CPU a no-CPUs CPU? */ -static bool is_nocb_cpu(int cpu) +bool rcu_is_nocb_cpu(int cpu) { if (have_rcu_nocb_mask) return cpumask_test_cpu(cpu, rcu_nocb_mask); @@ -2225,7 +2225,7 @@ static bool __call_rcu_nocb(struct rcu_data *rdp, struct rcu_head *rhp, bool lazy) { - if (!is_nocb_cpu(rdp->cpu)) + if (!rcu_is_nocb_cpu(rdp->cpu)) return 0; __call_rcu_nocb_enqueue(rdp, rhp, &rhp->next, 1, lazy); return 1; @@ -2242,7 +2242,7 @@ static bool __maybe_unused rcu_nocb_adopt_orphan_cbs(struct rcu_state *rsp, long qll = rsp->qlen_lazy; /* If this is not a no-CBs CPU, tell the caller to do it the old way. */ - if (!is_nocb_cpu(smp_processor_id())) + if (!rcu_is_nocb_cpu(smp_processor_id())) return 0; rsp->qlen = 0; rsp->qlen_lazy = 0; @@ -2282,7 +2282,7 @@ static bool nocb_cpu_expendable(int cpu) * If there are no no-CB CPUs or if this CPU is not a no-CB CPU, * then offlining this CPU is harmless. Let it happen. */ - if (!have_rcu_nocb_mask || is_nocb_cpu(cpu)) + if (!have_rcu_nocb_mask || rcu_is_nocb_cpu(cpu)) return 1; /* If no memory, play it safe and keep the CPU around. */ @@ -2464,11 +2464,6 @@ static void __init rcu_init_nocb(void) #else /* #ifdef CONFIG_RCU_NOCB_CPU */ -static bool is_nocb_cpu(int cpu) -{ - return false; -} - static bool __call_rcu_nocb(struct rcu_data *rdp, struct rcu_head *rhp, bool lazy) { diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index 2bac5ea2c9af..d71a5f2bd7b2 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -203,17 +203,27 @@ static int __cpuinit tick_nohz_cpu_down_callback(struct notifier_block *nfb, */ static char __initdata nohz_full_buf[NR_CPUS + 1]; -static int __init init_tick_nohz_full(void) +void __init tick_nohz_init(void) { - if (have_nohz_full_mask) - cpu_notifier(tick_nohz_cpu_down_callback, 0); + int cpu; + + if (!have_nohz_full_mask) + return; + + cpu_notifier(tick_nohz_cpu_down_callback, 0); + + /* Make sure full dynticks CPU are also RCU nocbs */ + for_each_cpu(cpu, nohz_full_mask) { + if (!rcu_is_nocb_cpu(cpu)) { + pr_warning("NO_HZ: CPU %d is not RCU nocb: " + "cleared from nohz_full range", cpu); + cpumask_clear_cpu(cpu, nohz_full_mask); + } + } cpulist_scnprintf(nohz_full_buf, sizeof(nohz_full_buf), nohz_full_mask); pr_info("NO_HZ: Full dynticks CPUs: %s.\n", nohz_full_buf); - - return 0; } -core_initcall(init_tick_nohz_full); #else #define have_nohz_full_mask (0) #endif -- cgit v1.2.3-71-gd317 From 555347f6c080d2f25265f981c963605b4dd3610d Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Fri, 19 Apr 2013 16:17:38 +0200 Subject: posix_timers: New API to prevent from stopping the tick when timers are running Bring a new helper that the full dynticks infrastructure can call in order to know if it can safely stop the tick from the posix cpu timers subsystem point of view. Signed-off-by: Frederic Weisbecker Cc: Chris Metcalf Cc: Christoph Lameter Cc: Geoff Levand Cc: Gilad Ben Yossef Cc: Hakan Akkan Cc: Ingo Molnar Cc: Kevin Hilman Cc: Li Zhong Cc: Oleg Nesterov Cc: Paul E. McKenney Cc: Paul Gortmaker Cc: Peter Zijlstra Cc: Steven Rostedt Cc: Thomas Gleixner --- include/linux/posix-timers.h | 2 ++ kernel/posix-cpu-timers.c | 41 ++++++++++++++++++++++++++--------------- 2 files changed, 28 insertions(+), 15 deletions(-) (limited to 'include/linux') diff --git a/include/linux/posix-timers.h b/include/linux/posix-timers.h index 042058fdb0af..3698d9d08978 100644 --- a/include/linux/posix-timers.h +++ b/include/linux/posix-timers.h @@ -122,6 +122,8 @@ void run_posix_cpu_timers(struct task_struct *task); void posix_cpu_timers_exit(struct task_struct *task); void posix_cpu_timers_exit_group(struct task_struct *task); +bool posix_cpu_timers_can_stop_tick(struct task_struct *tsk); + void set_process_cpu_timer(struct task_struct *task, unsigned int clock_idx, cputime_t *newval, cputime_t *oldval); diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c index 0bc33561a435..84d5cb372ed5 100644 --- a/kernel/posix-cpu-timers.c +++ b/kernel/posix-cpu-timers.c @@ -155,6 +155,21 @@ static void bump_cpu_timer(struct k_itimer *timer, } } +/** + * task_cputime_zero - Check a task_cputime struct for all zero fields. + * + * @cputime: The struct to compare. + * + * Checks @cputime to see if all fields are zero. Returns true if all fields + * are zero, false if any field is nonzero. + */ +static inline int task_cputime_zero(const struct task_cputime *cputime) +{ + if (!cputime->utime && !cputime->stime && !cputime->sum_exec_runtime) + return 1; + return 0; +} + static inline cputime_t prof_ticks(struct task_struct *p) { cputime_t utime, stime; @@ -654,6 +669,17 @@ static void posix_cpu_timer_kick_nohz(void) { schedule_work(&nohz_kick_work); } + +bool posix_cpu_timers_can_stop_tick(struct task_struct *tsk) +{ + if (!task_cputime_zero(&tsk->cputime_expires)) + return true; + + if (tsk->signal->cputimer.running) + return true; + + return false; +} #else static inline void posix_cpu_timer_kick_nohz(void) { } #endif @@ -1032,21 +1058,6 @@ static void check_cpu_itimer(struct task_struct *tsk, struct cpu_itimer *it, } } -/** - * task_cputime_zero - Check a task_cputime struct for all zero fields. - * - * @cputime: The struct to compare. - * - * Checks @cputime to see if all fields are zero. Returns true if all fields - * are zero, false if any field is nonzero. - */ -static inline int task_cputime_zero(const struct task_cputime *cputime) -{ - if (!cputime->utime && !cputime->stime && !cputime->sum_exec_runtime) - return 1; - return 0; -} - /* * Check for any per-thread CPU timers that have fired and move them * off the tsk->*_timers list onto the firing list. Per-thread timers -- cgit v1.2.3-71-gd317 From 026249ef100b5384b6c74c360db46728e98354da Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Sat, 20 Apr 2013 15:58:34 +0200 Subject: perf: New helper to prevent full dynticks CPUs from stopping tick Provide a new helper that help full dynticks CPUs to prevent from stopping their tick in case there are events in the local rotation list. This way we make sure that perf_event_task_tick() is serviced on demand. Signed-off-by: Frederic Weisbecker Cc: Chris Metcalf Cc: Christoph Lameter Cc: Geoff Levand Cc: Gilad Ben Yossef Cc: Hakan Akkan Cc: Ingo Molnar Cc: Kevin Hilman Cc: Li Zhong Cc: Oleg Nesterov Cc: Paul E. McKenney Cc: Paul Gortmaker Cc: Peter Zijlstra Cc: Steven Rostedt Cc: Thomas Gleixner Cc: Stephane Eranian Cc: Jiri Olsa --- include/linux/perf_event.h | 6 ++++++ kernel/events/core.c | 10 ++++++++++ 2 files changed, 16 insertions(+) (limited to 'include/linux') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index e47ee462c2f2..0140830225e2 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -799,6 +799,12 @@ static inline int __perf_event_disable(void *info) { return -1; } static inline void perf_event_task_tick(void) { } #endif +#if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_NO_HZ_FULL) +extern bool perf_event_can_stop_tick(void); +#else +static inline bool perf_event_can_stop_tick(void) { return true; } +#endif + #define perf_output_put(handle, x) perf_output_copy((handle), &(x), sizeof(x)) /* diff --git a/kernel/events/core.c b/kernel/events/core.c index 75b58bb75b32..ddb993b52190 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -2560,6 +2560,16 @@ done: list_del_init(&cpuctx->rotation_list); } +#ifdef CONFIG_NO_HZ_FULL +bool perf_event_can_stop_tick(void) +{ + if (list_empty(&__get_cpu_var(rotation_list))) + return true; + else + return false; +} +#endif + void perf_event_task_tick(void) { struct list_head *head = &__get_cpu_var(rotation_list); -- cgit v1.2.3-71-gd317 From ce831b38ca4920739a7a5b0c73b921da41f03718 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Sat, 20 Apr 2013 15:15:35 +0200 Subject: sched: New helper to prevent from stopping the tick in full dynticks Provide a new helper to be called from the full dynticks engine before stopping the tick in order to make sure we don't stop it when there is more than one task running on the CPU. This way we make sure that the tick stays alive to maintain fairness. Signed-off-by: Frederic Weisbecker Cc: Chris Metcalf Cc: Christoph Lameter Cc: Geoff Levand Cc: Gilad Ben Yossef Cc: Hakan Akkan Cc: Ingo Molnar Cc: Kevin Hilman Cc: Li Zhong Cc: Oleg Nesterov Cc: Paul E. McKenney Cc: Paul Gortmaker Cc: Peter Zijlstra Cc: Steven Rostedt Cc: Thomas Gleixner --- include/linux/sched.h | 6 ++++++ kernel/sched/core.c | 18 ++++++++++++++++++ 2 files changed, 24 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 1ff9e0a5de27..a74adedcdd10 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1856,6 +1856,12 @@ extern void wake_up_nohz_cpu(int cpu); static inline void wake_up_nohz_cpu(int cpu) { } #endif +#ifdef CONFIG_NO_HZ_FULL +extern bool sched_can_stop_tick(void); +#else +static inline bool sched_can_stop_tick(void) { return false; } +#endif + #ifdef CONFIG_SCHED_AUTOGROUP extern void sched_autogroup_create_attach(struct task_struct *p); extern void sched_autogroup_detach(struct task_struct *p); diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 0f0a5b3fd62c..69f71335984f 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -650,6 +650,24 @@ static inline bool got_nohz_idle_kick(void) #endif /* CONFIG_NO_HZ_COMMON */ +#ifdef CONFIG_NO_HZ_FULL +bool sched_can_stop_tick(void) +{ + struct rq *rq; + + rq = this_rq(); + + /* Make sure rq->nr_running update is visible after the IPI */ + smp_rmb(); + + /* More than one running task need preemption */ + if (rq->nr_running > 1) + return false; + + return true; +} +#endif /* CONFIG_NO_HZ_FULL */ + void sched_avg_update(struct rq *rq) { s64 period = sched_avg_period(); -- cgit v1.2.3-71-gd317 From ff442c51f6543378cf23107c75b7949dc64a9119 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Sat, 20 Apr 2013 15:27:08 +0200 Subject: nohz: Re-evaluate the tick from the scheduler IPI The scheduler IPI is used by the scheduler to kick full dynticks CPUs asynchronously when more than one task are running or when a new timer list timer is enqueued. This way the destination CPU can decide to restart the tick to handle this new situation. Now let's call that kick in the scheduler IPI. (Reusing the scheduler IPI rather than implementing a new IPI was suggested by Peter Zijlstra a while ago) Signed-off-by: Frederic Weisbecker Cc: Chris Metcalf Cc: Christoph Lameter Cc: Geoff Levand Cc: Gilad Ben Yossef Cc: Hakan Akkan Cc: Ingo Molnar Cc: Kevin Hilman Cc: Li Zhong Cc: Oleg Nesterov Cc: Paul E. McKenney Cc: Paul Gortmaker Cc: Peter Zijlstra Cc: Steven Rostedt Cc: Thomas Gleixner --- include/linux/tick.h | 2 ++ kernel/sched/core.c | 4 +++- kernel/time/tick-sched.c | 2 +- 3 files changed, 6 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/tick.h b/include/linux/tick.h index d290168335bc..e31e67623ea1 100644 --- a/include/linux/tick.h +++ b/include/linux/tick.h @@ -160,11 +160,13 @@ static inline u64 get_cpu_iowait_time_us(int cpu, u64 *unused) { return -1; } #ifdef CONFIG_NO_HZ_FULL extern void tick_nohz_init(void); extern int tick_nohz_full_cpu(int cpu); +extern void tick_nohz_full_check(void); extern void tick_nohz_full_kick(void); extern void tick_nohz_full_kick_all(void); #else static inline void tick_nohz_init(void) { } static inline int tick_nohz_full_cpu(int cpu) { return 0; } +static inline void tick_nohz_full_check(void) { } static inline void tick_nohz_full_kick(void) { } static inline void tick_nohz_full_kick_all(void) { } #endif diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 69f71335984f..9ad35005f1cb 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -1398,7 +1398,8 @@ static void sched_ttwu_pending(void) void scheduler_ipi(void) { - if (llist_empty(&this_rq()->wake_list) && !got_nohz_idle_kick()) + if (llist_empty(&this_rq()->wake_list) && !got_nohz_idle_kick() + && !tick_nohz_full_cpu(smp_processor_id())) return; /* @@ -1415,6 +1416,7 @@ void scheduler_ipi(void) * somewhat pessimize the simple resched case. */ irq_enter(); + tick_nohz_full_check(); sched_ttwu_pending(); /* diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index 884a9f302a06..4d74a68b2c34 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -151,7 +151,7 @@ bool have_nohz_full_mask; * Re-evaluate the need for the tick on the current CPU * and restart it if necessary. */ -static void tick_nohz_full_check(void) +void tick_nohz_full_check(void) { /* * STUB for now, will be filled with the full tick stop/restart -- cgit v1.2.3-71-gd317 From 99e5ada9407cc19d7c4c05ce2165f20dc46fc093 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Sat, 20 Apr 2013 17:11:50 +0200 Subject: nohz: Re-evaluate the tick for the new task after a context switch When a task is scheduled in, it may have some properties of its own that could make the CPU reconsider the need for the tick: posix cpu timers, perf events, ... So notify the full dynticks subsystem when a task gets scheduled in and re-check the tick dependency at this stage. This is done through a self IPI to avoid messing up with any current lock scenario. Signed-off-by: Frederic Weisbecker Cc: Chris Metcalf Cc: Christoph Lameter Cc: Geoff Levand Cc: Gilad Ben Yossef Cc: Hakan Akkan Cc: Ingo Molnar Cc: Kevin Hilman Cc: Li Zhong Cc: Oleg Nesterov Cc: Paul E. McKenney Cc: Paul Gortmaker Cc: Peter Zijlstra Cc: Steven Rostedt Cc: Thomas Gleixner --- include/linux/tick.h | 2 ++ kernel/sched/core.c | 2 ++ kernel/time/tick-sched.c | 20 ++++++++++++++++++++ 3 files changed, 24 insertions(+) (limited to 'include/linux') diff --git a/include/linux/tick.h b/include/linux/tick.h index e31e67623ea1..9180f4b85e6d 100644 --- a/include/linux/tick.h +++ b/include/linux/tick.h @@ -163,12 +163,14 @@ extern int tick_nohz_full_cpu(int cpu); extern void tick_nohz_full_check(void); extern void tick_nohz_full_kick(void); extern void tick_nohz_full_kick_all(void); +extern void tick_nohz_task_switch(struct task_struct *tsk); #else static inline void tick_nohz_init(void) { } static inline int tick_nohz_full_cpu(int cpu) { return 0; } static inline void tick_nohz_full_check(void) { } static inline void tick_nohz_full_kick(void) { } static inline void tick_nohz_full_kick_all(void) { } +static inline void tick_nohz_task_switch(struct task_struct *tsk) { } #endif diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 9ad35005f1cb..dd09def88567 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -1896,6 +1896,8 @@ static void finish_task_switch(struct rq *rq, struct task_struct *prev) kprobe_flush_task(prev); put_task_struct(prev); } + + tick_nohz_task_switch(current); } #ifdef CONFIG_SMP diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index d0ed1905a85c..12a900dbb819 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -232,6 +232,26 @@ void tick_nohz_full_kick_all(void) preempt_enable(); } +/* + * Re-evaluate the need for the tick as we switch the current task. + * It might need the tick due to per task/process properties: + * perf events, posix cpu timers, ... + */ +void tick_nohz_task_switch(struct task_struct *tsk) +{ + unsigned long flags; + + if (!tick_nohz_full_cpu(smp_processor_id())) + return; + + local_irq_save(flags); + + if (tick_nohz_tick_stopped() && !can_stop_full_tick()) + tick_nohz_full_kick(); + + local_irq_restore(flags); +} + int tick_nohz_full_cpu(int cpu) { if (!have_nohz_full_mask) -- cgit v1.2.3-71-gd317 From 265f22a975c1e4cc3a4d1f94a3ec53ffbb6f5b9f Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Fri, 3 May 2013 03:39:05 +0200 Subject: sched: Keep at least 1 tick per second for active dynticks tasks The scheduler doesn't yet fully support environments with a single task running without a periodic tick. In order to ensure we still maintain the duties of scheduler_tick(), keep at least 1 tick per second. This makes sure that we keep the progression of various scheduler accounting and background maintainance even with a very low granularity. Examples include cpu load, sched average, CFS entity vruntime, avenrun and events such as load balancing, amongst other details handled in sched_class::task_tick(). This limitation will be removed in the future once we get these individual items to work in full dynticks CPUs. Suggested-by: Ingo Molnar Signed-off-by: Frederic Weisbecker Cc: Christoph Lameter Cc: Hakan Akkan Cc: Ingo Molnar Cc: Kevin Hilman Cc: Li Zhong Cc: Paul E. McKenney Cc: Paul Gortmaker Cc: Peter Zijlstra Cc: Steven Rostedt Cc: Thomas Gleixner --- include/linux/sched.h | 1 + kernel/sched/core.c | 30 ++++++++++++++++++++++++++++++ kernel/sched/idle_task.c | 1 + kernel/sched/sched.h | 10 ++++++++++ kernel/time/tick-sched.c | 7 +++++++ 5 files changed, 49 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index ebf7095158a9..af008d7bad57 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1862,6 +1862,7 @@ static inline void wake_up_nohz_cpu(int cpu) { } #ifdef CONFIG_NO_HZ_FULL extern bool sched_can_stop_tick(void); +extern u64 scheduler_tick_max_deferment(void); #else static inline bool sched_can_stop_tick(void) { return false; } #endif diff --git a/kernel/sched/core.c b/kernel/sched/core.c index e94842d4400c..3bdf986a091a 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -2736,8 +2736,35 @@ void scheduler_tick(void) rq->idle_balance = idle_cpu(cpu); trigger_load_balance(rq, cpu); #endif + rq_last_tick_reset(rq); } +#ifdef CONFIG_NO_HZ_FULL +/** + * scheduler_tick_max_deferment + * + * Keep at least one tick per second when a single + * active task is running because the scheduler doesn't + * yet completely support full dynticks environment. + * + * This makes sure that uptime, CFS vruntime, load + * balancing, etc... continue to move forward, even + * with a very low granularity. + */ +u64 scheduler_tick_max_deferment(void) +{ + struct rq *rq = this_rq(); + unsigned long next, now = ACCESS_ONCE(jiffies); + + next = rq->last_sched_tick + HZ; + + if (time_before_eq(next, now)) + return 0; + + return jiffies_to_usecs(next - now) * NSEC_PER_USEC; +} +#endif + notrace unsigned long get_parent_ip(unsigned long addr) { if (in_lock_functions(addr)) { @@ -6993,6 +7020,9 @@ void __init sched_init(void) #ifdef CONFIG_NO_HZ_COMMON rq->nohz_flags = 0; #endif +#ifdef CONFIG_NO_HZ_FULL + rq->last_sched_tick = 0; +#endif #endif init_rq_hrtick(rq); atomic_set(&rq->nr_iowait, 0); diff --git a/kernel/sched/idle_task.c b/kernel/sched/idle_task.c index b8ce77328341..d8da01008d39 100644 --- a/kernel/sched/idle_task.c +++ b/kernel/sched/idle_task.c @@ -17,6 +17,7 @@ select_task_rq_idle(struct task_struct *p, int sd_flag, int flags) static void pre_schedule_idle(struct rq *rq, struct task_struct *prev) { idle_exit_fair(rq); + rq_last_tick_reset(rq); } static void post_schedule_idle(struct rq *rq) diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 24dc29897749..ce39224d6155 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -409,6 +409,9 @@ struct rq { #ifdef CONFIG_NO_HZ_COMMON u64 nohz_stamp; unsigned long nohz_flags; +#endif +#ifdef CONFIG_NO_HZ_FULL + unsigned long last_sched_tick; #endif int skip_clock_update; @@ -1090,6 +1093,13 @@ static inline void dec_nr_running(struct rq *rq) rq->nr_running--; } +static inline void rq_last_tick_reset(struct rq *rq) +{ +#ifdef CONFIG_NO_HZ_FULL + rq->last_sched_tick = jiffies; +#endif +} + extern void update_rq_clock(struct rq *rq); extern void activate_task(struct rq *rq, struct task_struct *p, int flags); diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index 1c9f53b2ddb7..07929c633570 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -600,6 +600,13 @@ static ktime_t tick_nohz_stop_sched_tick(struct tick_sched *ts, time_delta = KTIME_MAX; } +#ifdef CONFIG_NO_HZ_FULL + if (!ts->inidle) { + time_delta = min(time_delta, + scheduler_tick_max_deferment()); + } +#endif + /* * calculate the expiry time for the next timer wheel * timer. delta_jiffies >= NEXT_TIMER_MAX_DELTA signals -- cgit v1.2.3-71-gd317