From 8f4d37ec073c17e2d4aa8851df5837d798606d6f Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 25 Jan 2008 21:08:29 +0100 Subject: sched: high-res preemption tick Use HR-timers (when available) to deliver an accurate preemption tick. The regular scheduler tick that runs at 1/HZ can be too coarse when nice level are used. The fairness system will still keep the cpu utilisation 'fair' by then delaying the task that got an excessive amount of CPU time but try to minimize this by delivering preemption points spot-on. The average frequency of this extra interrupt is sched_latency / nr_latency. Which need not be higher than 1/HZ, its just that the distribution within the sched_latency period is important. Signed-off-by: Peter Zijlstra Signed-off-by: Ingo Molnar --- include/linux/hrtimer.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/linux/hrtimer.h') diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index 7a9398e19704..ecc8e2685e2b 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -217,6 +217,11 @@ static inline ktime_t hrtimer_cb_get_time(struct hrtimer *timer) return timer->base->get_time(); } +static inline int hrtimer_is_hres_active(struct hrtimer *timer) +{ + return timer->base->cpu_base->hres_active; +} + /* * The resolution of the clocks. The resolution value is returned in * the clock_getres() system call to give application programmers an @@ -248,6 +253,10 @@ static inline ktime_t hrtimer_cb_get_time(struct hrtimer *timer) return timer->base->softirq_time; } +static inline int hrtimer_is_hres_active(struct hrtimer *timer) +{ + return 0; +} #endif extern ktime_t ktime_get(void); -- cgit v1.2.3-71-gd317 From d3d74453c34f8fd87674a8cf5b8a327c68f22e99 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 25 Jan 2008 21:08:31 +0100 Subject: hrtimer: fixup the HRTIMER_CB_IRQSAFE_NO_SOFTIRQ fallback Currently all highres=off timers are run from softirq context, but HRTIMER_CB_IRQSAFE_NO_SOFTIRQ timers expect to run from irq context. Fix this up by splitting it similar to the highres=on case. Signed-off-by: Peter Zijlstra Signed-off-by: Ingo Molnar --- include/linux/hrtimer.h | 5 +- kernel/hrtimer.c | 270 +++++++++++++++++++++++++----------------------- kernel/timer.c | 3 +- 3 files changed, 143 insertions(+), 135 deletions(-) (limited to 'include/linux/hrtimer.h') diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index ecc8e2685e2b..49067f14fac1 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -115,10 +115,8 @@ struct hrtimer { enum hrtimer_restart (*function)(struct hrtimer *); struct hrtimer_clock_base *base; unsigned long state; -#ifdef CONFIG_HIGH_RES_TIMERS enum hrtimer_cb_mode cb_mode; struct list_head cb_entry; -#endif #ifdef CONFIG_TIMER_STATS void *start_site; char start_comm[16]; @@ -194,10 +192,10 @@ struct hrtimer_cpu_base { spinlock_t lock; struct lock_class_key lock_key; struct hrtimer_clock_base clock_base[HRTIMER_MAX_CLOCK_BASES]; + struct list_head cb_pending; #ifdef CONFIG_HIGH_RES_TIMERS ktime_t expires_next; int hres_active; - struct list_head cb_pending; unsigned long nr_events; #endif }; @@ -319,6 +317,7 @@ extern void hrtimer_init_sleeper(struct hrtimer_sleeper *sl, /* Soft interrupt function to run the hrtimer queues: */ extern void hrtimer_run_queues(void); +extern void hrtimer_run_pending(void); /* Bootup initialization: */ extern void __init hrtimers_init(void); diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c index 9f850ca032b6..061ae28a36a0 100644 --- a/kernel/hrtimer.c +++ b/kernel/hrtimer.c @@ -325,6 +325,22 @@ unsigned long ktime_divns(const ktime_t kt, s64 div) } #endif /* BITS_PER_LONG >= 64 */ +/* + * Check, whether the timer is on the callback pending list + */ +static inline int hrtimer_cb_pending(const struct hrtimer *timer) +{ + return timer->state & HRTIMER_STATE_PENDING; +} + +/* + * Remove a timer from the callback pending list + */ +static inline void hrtimer_remove_cb_pending(struct hrtimer *timer) +{ + list_del_init(&timer->cb_entry); +} + /* High resolution timer related functions */ #ifdef CONFIG_HIGH_RES_TIMERS @@ -493,22 +509,6 @@ void hres_timers_resume(void) retrigger_next_event(NULL); } -/* - * Check, whether the timer is on the callback pending list - */ -static inline int hrtimer_cb_pending(const struct hrtimer *timer) -{ - return timer->state & HRTIMER_STATE_PENDING; -} - -/* - * Remove a timer from the callback pending list - */ -static inline void hrtimer_remove_cb_pending(struct hrtimer *timer) -{ - list_del_init(&timer->cb_entry); -} - /* * Initialize the high resolution related parts of cpu_base */ @@ -516,7 +516,6 @@ static inline void hrtimer_init_hres(struct hrtimer_cpu_base *base) { base->expires_next.tv64 = KTIME_MAX; base->hres_active = 0; - INIT_LIST_HEAD(&base->cb_pending); } /* @@ -524,7 +523,6 @@ static inline void hrtimer_init_hres(struct hrtimer_cpu_base *base) */ static inline void hrtimer_init_timer_hres(struct hrtimer *timer) { - INIT_LIST_HEAD(&timer->cb_entry); } /* @@ -618,10 +616,13 @@ static inline int hrtimer_enqueue_reprogram(struct hrtimer *timer, { return 0; } -static inline int hrtimer_cb_pending(struct hrtimer *timer) { return 0; } -static inline void hrtimer_remove_cb_pending(struct hrtimer *timer) { } static inline void hrtimer_init_hres(struct hrtimer_cpu_base *base) { } static inline void hrtimer_init_timer_hres(struct hrtimer *timer) { } +static inline int hrtimer_reprogram(struct hrtimer *timer, + struct hrtimer_clock_base *base) +{ + return 0; +} #endif /* CONFIG_HIGH_RES_TIMERS */ @@ -1001,6 +1002,7 @@ void hrtimer_init(struct hrtimer *timer, clockid_t clock_id, clock_id = CLOCK_MONOTONIC; timer->base = &cpu_base->clock_base[clock_id]; + INIT_LIST_HEAD(&timer->cb_entry); hrtimer_init_timer_hres(timer); #ifdef CONFIG_TIMER_STATS @@ -1030,6 +1032,85 @@ int hrtimer_get_res(const clockid_t which_clock, struct timespec *tp) } EXPORT_SYMBOL_GPL(hrtimer_get_res); +static void run_hrtimer_pending(struct hrtimer_cpu_base *cpu_base) +{ + spin_lock_irq(&cpu_base->lock); + + while (!list_empty(&cpu_base->cb_pending)) { + enum hrtimer_restart (*fn)(struct hrtimer *); + struct hrtimer *timer; + int restart; + + timer = list_entry(cpu_base->cb_pending.next, + struct hrtimer, cb_entry); + + timer_stats_account_hrtimer(timer); + + fn = timer->function; + __remove_hrtimer(timer, timer->base, HRTIMER_STATE_CALLBACK, 0); + spin_unlock_irq(&cpu_base->lock); + + restart = fn(timer); + + spin_lock_irq(&cpu_base->lock); + + timer->state &= ~HRTIMER_STATE_CALLBACK; + if (restart == HRTIMER_RESTART) { + BUG_ON(hrtimer_active(timer)); + /* + * Enqueue the timer, allow reprogramming of the event + * device + */ + enqueue_hrtimer(timer, timer->base, 1); + } else if (hrtimer_active(timer)) { + /* + * If the timer was rearmed on another CPU, reprogram + * the event device. + */ + if (timer->base->first == &timer->node) + hrtimer_reprogram(timer, timer->base); + } + } + spin_unlock_irq(&cpu_base->lock); +} + +static void __run_hrtimer(struct hrtimer *timer) +{ + struct hrtimer_clock_base *base = timer->base; + struct hrtimer_cpu_base *cpu_base = base->cpu_base; + enum hrtimer_restart (*fn)(struct hrtimer *); + int restart; + + __remove_hrtimer(timer, base, HRTIMER_STATE_CALLBACK, 0); + timer_stats_account_hrtimer(timer); + + fn = timer->function; + if (timer->cb_mode == HRTIMER_CB_IRQSAFE_NO_SOFTIRQ) { + /* + * Used for scheduler timers, avoid lock inversion with + * rq->lock and tasklist_lock. + * + * These timers are required to deal with enqueue expiry + * themselves and are not allowed to migrate. + */ + spin_unlock(&cpu_base->lock); + restart = fn(timer); + spin_lock(&cpu_base->lock); + } else + restart = fn(timer); + + /* + * Note: We clear the CALLBACK bit after enqueue_hrtimer to avoid + * reprogramming of the event hardware. This happens at the end of this + * function anyway. + */ + if (restart != HRTIMER_NORESTART) { + BUG_ON(timer->state != HRTIMER_STATE_CALLBACK); + enqueue_hrtimer(timer, base, 0); + } + timer->state &= ~HRTIMER_STATE_CALLBACK; +} + #ifdef CONFIG_HIGH_RES_TIMERS /* @@ -1063,9 +1144,7 @@ void hrtimer_interrupt(struct clock_event_device *dev) basenow = ktime_add(now, base->offset); while ((node = base->first)) { - enum hrtimer_restart (*fn)(struct hrtimer *); struct hrtimer *timer; - int restart; timer = rb_entry(node, struct hrtimer, node); @@ -1089,37 +1168,7 @@ void hrtimer_interrupt(struct clock_event_device *dev) continue; } - __remove_hrtimer(timer, base, - HRTIMER_STATE_CALLBACK, 0); - timer_stats_account_hrtimer(timer); - - fn = timer->function; - if (timer->cb_mode == HRTIMER_CB_IRQSAFE_NO_SOFTIRQ) { - /* - * Used for scheduler timers, avoid lock - * inversion with rq->lock and tasklist_lock. - * - * These timers are required to deal with - * enqueue expiry themselves and are not - * allowed to migrate. - */ - spin_unlock(&cpu_base->lock); - restart = fn(timer); - spin_lock(&cpu_base->lock); - } else - restart = fn(timer); - - /* - * Note: We clear the CALLBACK bit after - * enqueue_hrtimer to avoid reprogramming of - * the event hardware. This happens at the end - * of this function anyway. - */ - if (restart != HRTIMER_NORESTART) { - BUG_ON(timer->state != HRTIMER_STATE_CALLBACK); - enqueue_hrtimer(timer, base, 0); - } - timer->state &= ~HRTIMER_STATE_CALLBACK; + __run_hrtimer(timer); } spin_unlock(&cpu_base->lock); base++; @@ -1140,52 +1189,41 @@ void hrtimer_interrupt(struct clock_event_device *dev) static void run_hrtimer_softirq(struct softirq_action *h) { - struct hrtimer_cpu_base *cpu_base = &__get_cpu_var(hrtimer_bases); - - spin_lock_irq(&cpu_base->lock); - - while (!list_empty(&cpu_base->cb_pending)) { - enum hrtimer_restart (*fn)(struct hrtimer *); - struct hrtimer *timer; - int restart; - - timer = list_entry(cpu_base->cb_pending.next, - struct hrtimer, cb_entry); + run_hrtimer_pending(&__get_cpu_var(hrtimer_bases)); +} - timer_stats_account_hrtimer(timer); +#endif /* CONFIG_HIGH_RES_TIMERS */ - fn = timer->function; - __remove_hrtimer(timer, timer->base, HRTIMER_STATE_CALLBACK, 0); - spin_unlock_irq(&cpu_base->lock); +/* + * Called from timer softirq every jiffy, expire hrtimers: + * + * For HRT its the fall back code to run the softirq in the timer + * softirq context in case the hrtimer initialization failed or has + * not been done yet. + */ +void hrtimer_run_pending(void) +{ + struct hrtimer_cpu_base *cpu_base = &__get_cpu_var(hrtimer_bases); - restart = fn(timer); + if (hrtimer_hres_active()) + return; - spin_lock_irq(&cpu_base->lock); + /* + * This _is_ ugly: We have to check in the softirq context, + * whether we can switch to highres and / or nohz mode. The + * clocksource switch happens in the timer interrupt with + * xtime_lock held. Notification from there only sets the + * check bit in the tick_oneshot code, otherwise we might + * deadlock vs. xtime_lock. + */ + if (tick_check_oneshot_change(!hrtimer_is_hres_enabled())) + hrtimer_switch_to_hres(); - timer->state &= ~HRTIMER_STATE_CALLBACK; - if (restart == HRTIMER_RESTART) { - BUG_ON(hrtimer_active(timer)); - /* - * Enqueue the timer, allow reprogramming of the event - * device - */ - enqueue_hrtimer(timer, timer->base, 1); - } else if (hrtimer_active(timer)) { - /* - * If the timer was rearmed on another CPU, reprogram - * the event device. - */ - if (timer->base->first == &timer->node) - hrtimer_reprogram(timer, timer->base); - } - } - spin_unlock_irq(&cpu_base->lock); + run_hrtimer_pending(cpu_base); } -#endif /* CONFIG_HIGH_RES_TIMERS */ - /* - * Expire the per base hrtimer-queue: + * Called from hardirq context every jiffy */ static inline void run_hrtimer_queue(struct hrtimer_cpu_base *cpu_base, int index) @@ -1199,46 +1237,27 @@ static inline void run_hrtimer_queue(struct hrtimer_cpu_base *cpu_base, if (base->get_softirq_time) base->softirq_time = base->get_softirq_time(); - spin_lock_irq(&cpu_base->lock); + spin_lock(&cpu_base->lock); while ((node = base->first)) { struct hrtimer *timer; - enum hrtimer_restart (*fn)(struct hrtimer *); - int restart; timer = rb_entry(node, struct hrtimer, node); if (base->softirq_time.tv64 <= timer->expires.tv64) break; -#ifdef CONFIG_HIGH_RES_TIMERS - WARN_ON_ONCE(timer->cb_mode == HRTIMER_CB_IRQSAFE_NO_SOFTIRQ); -#endif - timer_stats_account_hrtimer(timer); - - fn = timer->function; - __remove_hrtimer(timer, base, HRTIMER_STATE_CALLBACK, 0); - spin_unlock_irq(&cpu_base->lock); - - restart = fn(timer); - - spin_lock_irq(&cpu_base->lock); - - timer->state &= ~HRTIMER_STATE_CALLBACK; - if (restart != HRTIMER_NORESTART) { - BUG_ON(hrtimer_active(timer)); - enqueue_hrtimer(timer, base, 0); + if (timer->cb_mode == HRTIMER_CB_SOFTIRQ) { + __remove_hrtimer(timer, base, HRTIMER_STATE_PENDING, 0); + list_add_tail(&timer->cb_entry, + &base->cpu_base->cb_pending); + continue; } + + __run_hrtimer(timer); } - spin_unlock_irq(&cpu_base->lock); + spin_unlock(&cpu_base->lock); } -/* - * Called from timer softirq every jiffy, expire hrtimers: - * - * For HRT its the fall back code to run the softirq in the timer - * softirq context in case the hrtimer initialization failed or has - * not been done yet. - */ void hrtimer_run_queues(void) { struct hrtimer_cpu_base *cpu_base = &__get_cpu_var(hrtimer_bases); @@ -1247,18 +1266,6 @@ void hrtimer_run_queues(void) if (hrtimer_hres_active()) return; - /* - * This _is_ ugly: We have to check in the softirq context, - * whether we can switch to highres and / or nohz mode. The - * clocksource switch happens in the timer interrupt with - * xtime_lock held. Notification from there only sets the - * check bit in the tick_oneshot code, otherwise we might - * deadlock vs. xtime_lock. - */ - if (tick_check_oneshot_change(!hrtimer_is_hres_enabled())) - if (hrtimer_switch_to_hres()) - return; - hrtimer_get_softirq_time(cpu_base); for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) @@ -1407,6 +1414,7 @@ static void __cpuinit init_hrtimers_cpu(int cpu) for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) cpu_base->clock_base[i].cpu_base = cpu_base; + INIT_LIST_HEAD(&cpu_base->cb_pending); hrtimer_init_hres(cpu_base); } diff --git a/kernel/timer.c b/kernel/timer.c index 2a00c22203f3..f739dfb539ce 100644 --- a/kernel/timer.c +++ b/kernel/timer.c @@ -896,7 +896,7 @@ static void run_timer_softirq(struct softirq_action *h) { tvec_base_t *base = __get_cpu_var(tvec_bases); - hrtimer_run_queues(); + hrtimer_run_pending(); if (time_after_eq(jiffies, base->timer_jiffies)) __run_timers(base); @@ -907,6 +907,7 @@ static void run_timer_softirq(struct softirq_action *h) */ void run_local_timers(void) { + hrtimer_run_queues(); raise_softirq(TIMER_SOFTIRQ); softlockup_tick(); } -- cgit v1.2.3-71-gd317