From ae9d67aff60af59548b6c7d1a74febea09660122 Mon Sep 17 00:00:00 2001 From: Jan Engelhardt Date: Tue, 18 Jan 2011 06:48:12 +0100 Subject: audit: export symbol for use with xt_AUDIT When xt_AUDIT is built as a module, modpost reports a problem. MODPOST 322 modules ERROR: "audit_enabled" [net/netfilter/x_tables.ko] undefined! WARNING: modpost: Found 1 section mismatch(es). Cc: Thomas Graf Signed-off-by: Jan Engelhardt --- kernel/audit.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'kernel') diff --git a/kernel/audit.c b/kernel/audit.c index 77770a034d59..5842f65bedcb 100644 --- a/kernel/audit.c +++ b/kernel/audit.c @@ -74,6 +74,8 @@ static int audit_initialized; int audit_enabled; int audit_ever_enabled; +EXPORT_SYMBOL_GPL(audit_enabled); + /* Default state when kernel boots without any parameters. */ static int audit_default; -- cgit v1.2.3-71-gd317 From 42b16b3fbb5ee4555f5dee6220f3ccaa6e1ebe47 Mon Sep 17 00:00:00 2001 From: Jesper Juhl Date: Mon, 17 Jan 2011 00:09:38 +0100 Subject: Kill off warning: ‘inline’ is not at beginning of declaration MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix a bunch of warning: ‘inline’ is not at beginning of declaration messages when building a 'make allyesconfig' kernel with -Wextra. These warnings are trivial to kill, yet rather annoying when building with -Wextra. The more we can cut down on pointless crap like this the better (IMHO). A previous patch to do this for a 'allnoconfig' build has already been merged. This just takes the cleanup a little further. Signed-off-by: Jesper Juhl Signed-off-by: Jiri Kosina --- arch/x86/oprofile/op_model_p4.c | 2 +- drivers/bluetooth/btusb.c | 4 ++-- drivers/cpuidle/sysfs.c | 2 +- drivers/edac/i7300_edac.c | 2 +- fs/ocfs2/dir.c | 2 +- kernel/trace/ring_buffer.c | 2 +- net/ipv6/inet6_hashtables.c | 2 +- net/mac80211/tx.c | 2 +- sound/pci/au88x0/au88x0.h | 4 ++-- sound/pci/au88x0/au88x0_core.c | 4 ++-- 10 files changed, 13 insertions(+), 13 deletions(-) (limited to 'kernel') diff --git a/arch/x86/oprofile/op_model_p4.c b/arch/x86/oprofile/op_model_p4.c index 9fadec074142..98ab13058f89 100644 --- a/arch/x86/oprofile/op_model_p4.c +++ b/arch/x86/oprofile/op_model_p4.c @@ -50,7 +50,7 @@ static inline void setup_num_counters(void) #endif } -static int inline addr_increment(void) +static inline int addr_increment(void) { #ifdef CONFIG_SMP return smp_num_siblings == 2 ? 2 : 1; diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c index 1da773f899a2..92d29bfa2f48 100644 --- a/drivers/bluetooth/btusb.c +++ b/drivers/bluetooth/btusb.c @@ -424,7 +424,7 @@ static void btusb_isoc_complete(struct urb *urb) } } -static void inline __fill_isoc_descriptor(struct urb *urb, int len, int mtu) +static inline void __fill_isoc_descriptor(struct urb *urb, int len, int mtu) { int i, offset = 0; @@ -775,7 +775,7 @@ static void btusb_notify(struct hci_dev *hdev, unsigned int evt) } } -static int inline __set_isoc_interface(struct hci_dev *hdev, int altsetting) +static inline int __set_isoc_interface(struct hci_dev *hdev, int altsetting) { struct btusb_data *data = hdev->driver_data; struct usb_interface *intf = data->isoc; diff --git a/drivers/cpuidle/sysfs.c b/drivers/cpuidle/sysfs.c index 0310ffaec9df..be7917ec40c9 100644 --- a/drivers/cpuidle/sysfs.c +++ b/drivers/cpuidle/sysfs.c @@ -300,7 +300,7 @@ static struct kobj_type ktype_state_cpuidle = { .release = cpuidle_state_sysfs_release, }; -static void inline cpuidle_free_state_kobj(struct cpuidle_device *device, int i) +static inline void cpuidle_free_state_kobj(struct cpuidle_device *device, int i) { kobject_put(&device->kobjs[i]->kobj); wait_for_completion(&device->kobjs[i]->kobj_unregister); diff --git a/drivers/edac/i7300_edac.c b/drivers/edac/i7300_edac.c index 05523b504271..76d1f576cdc8 100644 --- a/drivers/edac/i7300_edac.c +++ b/drivers/edac/i7300_edac.c @@ -162,7 +162,7 @@ static struct edac_pci_ctl_info *i7300_pci; #define AMBPRESENT_0 0x64 #define AMBPRESENT_1 0x66 -const static u16 mtr_regs[MAX_SLOTS] = { +static const u16 mtr_regs[MAX_SLOTS] = { 0x80, 0x84, 0x88, 0x8c, 0x82, 0x86, 0x8a, 0x8e }; diff --git a/fs/ocfs2/dir.c b/fs/ocfs2/dir.c index d417b3f9b0c7..f97b6f1c61dd 100644 --- a/fs/ocfs2/dir.c +++ b/fs/ocfs2/dir.c @@ -354,7 +354,7 @@ static inline int ocfs2_match(int len, /* * Returns 0 if not found, -1 on failure, and 1 on success */ -static int inline ocfs2_search_dirblock(struct buffer_head *bh, +static inline int ocfs2_search_dirblock(struct buffer_head *bh, struct inode *dir, const char *name, int namelen, unsigned long offset, diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index bd1c35a4fbcc..6ee56b4ad136 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -669,7 +669,7 @@ static struct list_head *rb_list_head(struct list_head *list) * the reader page). But if the next page is a header page, * its flags will be non zero. */ -static int inline +static inline int rb_is_head_page(struct ring_buffer_per_cpu *cpu_buffer, struct buffer_page *page, struct list_head *list) { diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c index 633a6c266136..b53197233709 100644 --- a/net/ipv6/inet6_hashtables.c +++ b/net/ipv6/inet6_hashtables.c @@ -124,7 +124,7 @@ out: } EXPORT_SYMBOL(__inet6_lookup_established); -static int inline compute_score(struct sock *sk, struct net *net, +static inline int compute_score(struct sock *sk, struct net *net, const unsigned short hnum, const struct in6_addr *daddr, const int dif) diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 5950e3abead9..a449dd508682 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -173,7 +173,7 @@ static __le16 ieee80211_duration(struct ieee80211_tx_data *tx, int group_addr, return cpu_to_le16(dur); } -static int inline is_ieee80211_device(struct ieee80211_local *local, +static inline int is_ieee80211_device(struct ieee80211_local *local, struct net_device *dev) { return local == wdev_priv(dev->ieee80211_ptr); diff --git a/sound/pci/au88x0/au88x0.h b/sound/pci/au88x0/au88x0.h index cf46bba563cf..ecb8f4daf408 100644 --- a/sound/pci/au88x0/au88x0.h +++ b/sound/pci/au88x0/au88x0.h @@ -211,7 +211,7 @@ static void vortex_adbdma_startfifo(vortex_t * vortex, int adbdma); //static void vortex_adbdma_stopfifo(vortex_t *vortex, int adbdma); static void vortex_adbdma_pausefifo(vortex_t * vortex, int adbdma); static void vortex_adbdma_resumefifo(vortex_t * vortex, int adbdma); -static int inline vortex_adbdma_getlinearpos(vortex_t * vortex, int adbdma); +static inline int vortex_adbdma_getlinearpos(vortex_t * vortex, int adbdma); static void vortex_adbdma_resetup(vortex_t *vortex, int adbdma); #ifndef CHIP_AU8810 @@ -219,7 +219,7 @@ static void vortex_wtdma_startfifo(vortex_t * vortex, int wtdma); static void vortex_wtdma_stopfifo(vortex_t * vortex, int wtdma); static void vortex_wtdma_pausefifo(vortex_t * vortex, int wtdma); static void vortex_wtdma_resumefifo(vortex_t * vortex, int wtdma); -static int inline vortex_wtdma_getlinearpos(vortex_t * vortex, int wtdma); +static inline int vortex_wtdma_getlinearpos(vortex_t * vortex, int wtdma); #endif /* global stuff. */ diff --git a/sound/pci/au88x0/au88x0_core.c b/sound/pci/au88x0/au88x0_core.c index 23f49f356e0f..d43252a08b58 100644 --- a/sound/pci/au88x0/au88x0_core.c +++ b/sound/pci/au88x0/au88x0_core.c @@ -1249,7 +1249,7 @@ static void vortex_adbdma_resetup(vortex_t *vortex, int adbdma) { } } -static int inline vortex_adbdma_getlinearpos(vortex_t * vortex, int adbdma) +static inline int vortex_adbdma_getlinearpos(vortex_t * vortex, int adbdma) { stream_t *dma = &vortex->dma_adb[adbdma]; int temp; @@ -1498,7 +1498,7 @@ static int vortex_wtdma_getcursubuffer(vortex_t * vortex, int wtdma) POS_SHIFT) & POS_MASK); } #endif -static int inline vortex_wtdma_getlinearpos(vortex_t * vortex, int wtdma) +static inline int vortex_wtdma_getlinearpos(vortex_t * vortex, int wtdma) { stream_t *dma = &vortex->dma_wt[wtdma]; int temp; -- cgit v1.2.3-71-gd317 From 490da40d82b31c0562d3f5edb37810f492ca1c34 Mon Sep 17 00:00:00 2001 From: Tao Ma Date: Wed, 19 Jan 2011 10:51:44 +0800 Subject: blktrace: Don't output messages if NOTIFY isn't set. Now if we enable blktrace, cfq has too many messages output to the trace buffer. It is fine if we don't specify any action mask. But if I do like this: blktrace /dev/sdb -a issue -a complete -o - | blkparse -i - I only want to see 'D' and 'C', while with the following command dd if=/mnt/ocfs2/test of=/dev/null bs=4k count=1 iflag=direct I will get(with a 2.6.37 vanilla kernel): 8,16 0 0 0.000000000 0 m N cfq3805 alloced 8,16 0 0 0.000004126 0 m N cfq3805 insert_request 8,16 0 0 0.000004884 0 m N cfq3805 add_to_rr 8,16 0 0 0.000008417 0 m N cfq workload slice:300 8,16 0 0 0.000009557 0 m N cfq3805 set_active wl_prio:0 wl_type:2 8,16 0 0 0.000010640 0 m N cfq3805 fifo= (null) 8,16 0 0 0.000011193 0 m N cfq3805 dispatch_insert 8,16 0 0 0.000012221 0 m N cfq3805 dispatched a request 8,16 0 0 0.000012802 0 m N cfq3805 activate rq, drv=1 8,16 0 1 0.000013181 3805 D R 114759 + 8 [dd] 8,16 0 2 0.000164244 0 C R 114759 + 8 [0] 8,16 0 0 0.000167997 0 m N cfq3805 complete rqnoidle 0 8,16 0 0 0.000168782 0 m N cfq3805 set_slice=100 8,16 0 0 0.000169874 0 m N cfq3805 arm_idle: 8 group_idle: 0 8,16 0 0 0.000170189 0 m N cfq schedule dispatch 8,16 0 0 0.000397938 0 m N cfq3805 slice expired t=0 8,16 0 0 0.000399763 0 m N cfq3805 sl_used=1 disp=1 charge=1 iops=0 sect=8 8,16 0 0 0.000400227 0 m N cfq3805 del_from_rr 8,16 0 0 0.000400882 0 m N cfq3805 put_queue See, there are 19 lines while I only need 2. I don't think it is appropriate for a user. So this patch will disable any messages if the BLK_TC_NOTIFY isn't set. Now the output for the same command will look like: 8,16 0 1 0.000000000 4908 D R 114759 + 8 [dd] 8,16 0 2 0.000146827 0 C R 114759 + 8 [0] Yes, it is what I want to see. Cc: Steven Rostedt Cc: Jeff Moyer Signed-off-by: Tao Ma Signed-off-by: Jens Axboe --- kernel/trace/blktrace.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'kernel') diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c index 153562d0b93c..d95721f33702 100644 --- a/kernel/trace/blktrace.c +++ b/kernel/trace/blktrace.c @@ -138,6 +138,13 @@ void __trace_note_message(struct blk_trace *bt, const char *fmt, ...) !blk_tracer_enabled)) return; + /* + * If the BLK_TC_NOTIFY action mask isn't set, don't send any note + * message to the trace. + */ + if (!(bt->act_mask & BLK_TC_NOTIFY)) + return; + local_irq_save(flags); buf = per_cpu_ptr(bt->msg_data, smp_processor_id()); va_start(args, fmt); -- cgit v1.2.3-71-gd317 From cd7eab44e9946c28d595abe3e9a43e945bc49141 Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Wed, 19 Jan 2011 21:01:44 +0000 Subject: genirq: Add IRQ affinity notifiers When initiating I/O on a multiqueue and multi-IRQ device, we may want to select a queue for which the response will be handled on the same or a nearby CPU. This requires a reverse-map of IRQ affinity. Add a notification mechanism to support this. This is based closely on work by Thomas Gleixner . Signed-off-by: Ben Hutchings Cc: linux-net-drivers@solarflare.com Cc: Tom Herbert Cc: David Miller LKML-Reference: <1295470904.11126.84.camel@bwh-desktop> Signed-off-by: Thomas Gleixner --- include/linux/interrupt.h | 33 ++++++++++++++++++- include/linux/irqdesc.h | 3 ++ kernel/irq/manage.c | 82 +++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 117 insertions(+), 1 deletion(-) (limited to 'kernel') diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index 55e0d4253e49..63c5ad78e37c 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -14,6 +14,8 @@ #include #include #include +#include +#include #include #include @@ -240,6 +242,35 @@ extern int irq_can_set_affinity(unsigned int irq); extern int irq_select_affinity(unsigned int irq); extern int irq_set_affinity_hint(unsigned int irq, const struct cpumask *m); + +/** + * struct irq_affinity_notify - context for notification of IRQ affinity changes + * @irq: Interrupt to which notification applies + * @kref: Reference count, for internal use + * @work: Work item, for internal use + * @notify: Function to be called on change. This will be + * called in process context. + * @release: Function to be called on release. This will be + * called in process context. Once registered, the + * structure must only be freed when this function is + * called or later. + */ +struct irq_affinity_notify { + unsigned int irq; + struct kref kref; + struct work_struct work; + void (*notify)(struct irq_affinity_notify *, const cpumask_t *mask); + void (*release)(struct kref *ref); +}; + +extern int +irq_set_affinity_notifier(unsigned int irq, struct irq_affinity_notify *notify); + +static inline void irq_run_affinity_notifiers(void) +{ + flush_scheduled_work(); +} + #else /* CONFIG_SMP */ static inline int irq_set_affinity(unsigned int irq, const struct cpumask *m) @@ -255,7 +286,7 @@ static inline int irq_can_set_affinity(unsigned int irq) static inline int irq_select_affinity(unsigned int irq) { return 0; } static inline int irq_set_affinity_hint(unsigned int irq, - const struct cpumask *m) + const struct cpumask *m) { return -EINVAL; } diff --git a/include/linux/irqdesc.h b/include/linux/irqdesc.h index c1a95b7b58de..bfef56dadddb 100644 --- a/include/linux/irqdesc.h +++ b/include/linux/irqdesc.h @@ -8,6 +8,7 @@ * For now it's included from */ +struct irq_affinity_notify; struct proc_dir_entry; struct timer_rand_state; /** @@ -24,6 +25,7 @@ struct timer_rand_state; * @last_unhandled: aging timer for unhandled count * @irqs_unhandled: stats field for spurious unhandled interrupts * @lock: locking for SMP + * @affinity_notify: context for notification of affinity changes * @pending_mask: pending rebalanced interrupts * @threads_active: number of irqaction threads currently running * @wait_for_threads: wait queue for sync_irq to wait for threaded handlers @@ -70,6 +72,7 @@ struct irq_desc { raw_spinlock_t lock; #ifdef CONFIG_SMP const struct cpumask *affinity_hint; + struct irq_affinity_notify *affinity_notify; #ifdef CONFIG_GENERIC_PENDING_IRQ cpumask_var_t pending_mask; #endif diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index 0caa59f747dd..0587c5ceaed8 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c @@ -134,6 +134,10 @@ int irq_set_affinity(unsigned int irq, const struct cpumask *cpumask) irq_set_thread_affinity(desc); } #endif + if (desc->affinity_notify) { + kref_get(&desc->affinity_notify->kref); + schedule_work(&desc->affinity_notify->work); + } desc->status |= IRQ_AFFINITY_SET; raw_spin_unlock_irqrestore(&desc->lock, flags); return 0; @@ -155,6 +159,79 @@ int irq_set_affinity_hint(unsigned int irq, const struct cpumask *m) } EXPORT_SYMBOL_GPL(irq_set_affinity_hint); +static void irq_affinity_notify(struct work_struct *work) +{ + struct irq_affinity_notify *notify = + container_of(work, struct irq_affinity_notify, work); + struct irq_desc *desc = irq_to_desc(notify->irq); + cpumask_var_t cpumask; + unsigned long flags; + + if (!desc) + goto out; + + if (!alloc_cpumask_var(&cpumask, GFP_KERNEL)) + goto out; + + raw_spin_lock_irqsave(&desc->lock, flags); +#ifdef CONFIG_GENERIC_PENDING_IRQ + if (desc->status & IRQ_MOVE_PENDING) + cpumask_copy(cpumask, desc->pending_mask); + else +#endif + cpumask_copy(cpumask, desc->affinity); + raw_spin_unlock_irqrestore(&desc->lock, flags); + + notify->notify(notify, cpumask); + + free_cpumask_var(cpumask); +out: + kref_put(¬ify->kref, notify->release); +} + +/** + * irq_set_affinity_notifier - control notification of IRQ affinity changes + * @irq: Interrupt for which to enable/disable notification + * @notify: Context for notification, or %NULL to disable + * notification. Function pointers must be initialised; + * the other fields will be initialised by this function. + * + * Must be called in process context. Notification may only be enabled + * after the IRQ is allocated and must be disabled before the IRQ is + * freed using free_irq(). + */ +int +irq_set_affinity_notifier(unsigned int irq, struct irq_affinity_notify *notify) +{ + struct irq_desc *desc = irq_to_desc(irq); + struct irq_affinity_notify *old_notify; + unsigned long flags; + + /* The release function is promised process context */ + might_sleep(); + + if (!desc) + return -EINVAL; + + /* Complete initialisation of *notify */ + if (notify) { + notify->irq = irq; + kref_init(¬ify->kref); + INIT_WORK(¬ify->work, irq_affinity_notify); + } + + raw_spin_lock_irqsave(&desc->lock, flags); + old_notify = desc->affinity_notify; + desc->affinity_notify = notify; + raw_spin_unlock_irqrestore(&desc->lock, flags); + + if (old_notify) + kref_put(&old_notify->kref, old_notify->release); + + return 0; +} +EXPORT_SYMBOL_GPL(irq_set_affinity_notifier); + #ifndef CONFIG_AUTO_IRQ_AFFINITY /* * Generic version of the affinity autoselector. @@ -1004,6 +1081,11 @@ void free_irq(unsigned int irq, void *dev_id) if (!desc) return; +#ifdef CONFIG_SMP + if (WARN_ON(desc->affinity_notify)) + desc->affinity_notify = NULL; +#endif + chip_bus_lock(desc); kfree(__free_irq(irq, dev_id)); chip_bus_sync_unlock(desc); -- cgit v1.2.3-71-gd317 From 6d5ab2932a21ea54406ab95c43ecff90a3eddfda Mon Sep 17 00:00:00 2001 From: Paul Turner Date: Fri, 21 Jan 2011 20:45:01 -0800 Subject: sched: Simplify update_cfs_shares parameters Re-visiting this: Since update_cfs_shares will now only ever re-weight an entity that is a relative parent of the current entity in enqueue_entity; we can safely issue the account_entity_enqueue relative to that cfs_rq and avoid the requirement for special handling of the enqueue case in update_cfs_shares. Signed-off-by: Paul Turner Signed-off-by: Peter Zijlstra LKML-Reference: <20110122044851.915214637@google.com> Signed-off-by: Ingo Molnar --- kernel/sched.c | 2 +- kernel/sched_fair.c | 30 ++++++++++++++---------------- 2 files changed, 15 insertions(+), 17 deletions(-) (limited to 'kernel') diff --git a/kernel/sched.c b/kernel/sched.c index 18d38e4ec7ba..e0fa3ff7f194 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -8510,7 +8510,7 @@ int sched_group_set_shares(struct task_group *tg, unsigned long shares) /* Propagate contribution to hierarchy */ raw_spin_lock_irqsave(&rq->lock, flags); for_each_sched_entity(se) - update_cfs_shares(group_cfs_rq(se), 0); + update_cfs_shares(group_cfs_rq(se)); raw_spin_unlock_irqrestore(&rq->lock, flags); } diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index 0c26e2df450e..0c550c841eee 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c @@ -540,7 +540,7 @@ static u64 sched_vslice(struct cfs_rq *cfs_rq, struct sched_entity *se) } static void update_cfs_load(struct cfs_rq *cfs_rq, int global_update); -static void update_cfs_shares(struct cfs_rq *cfs_rq, long weight_delta); +static void update_cfs_shares(struct cfs_rq *cfs_rq); /* * Update the current task's runtime statistics. Skip current tasks that @@ -763,16 +763,15 @@ static void update_cfs_load(struct cfs_rq *cfs_rq, int global_update) list_del_leaf_cfs_rq(cfs_rq); } -static long calc_cfs_shares(struct cfs_rq *cfs_rq, struct task_group *tg, - long weight_delta) +static long calc_cfs_shares(struct cfs_rq *cfs_rq, struct task_group *tg) { long load_weight, load, shares; - load = cfs_rq->load.weight + weight_delta; + load = cfs_rq->load.weight; load_weight = atomic_read(&tg->load_weight); - load_weight -= cfs_rq->load_contribution; load_weight += load; + load_weight -= cfs_rq->load_contribution; shares = (tg->shares * load); if (load_weight) @@ -790,7 +789,7 @@ static void update_entity_shares_tick(struct cfs_rq *cfs_rq) { if (cfs_rq->load_unacc_exec_time > sysctl_sched_shares_window) { update_cfs_load(cfs_rq, 0); - update_cfs_shares(cfs_rq, 0); + update_cfs_shares(cfs_rq); } } # else /* CONFIG_SMP */ @@ -798,8 +797,7 @@ static void update_cfs_load(struct cfs_rq *cfs_rq, int global_update) { } -static inline long calc_cfs_shares(struct cfs_rq *cfs_rq, struct task_group *tg, - long weight_delta) +static inline long calc_cfs_shares(struct cfs_rq *cfs_rq, struct task_group *tg) { return tg->shares; } @@ -824,7 +822,7 @@ static void reweight_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, account_entity_enqueue(cfs_rq, se); } -static void update_cfs_shares(struct cfs_rq *cfs_rq, long weight_delta) +static void update_cfs_shares(struct cfs_rq *cfs_rq) { struct task_group *tg; struct sched_entity *se; @@ -838,7 +836,7 @@ static void update_cfs_shares(struct cfs_rq *cfs_rq, long weight_delta) if (likely(se->load.weight == tg->shares)) return; #endif - shares = calc_cfs_shares(cfs_rq, tg, weight_delta); + shares = calc_cfs_shares(cfs_rq, tg); reweight_entity(cfs_rq_of(se), se, shares); } @@ -847,7 +845,7 @@ static void update_cfs_load(struct cfs_rq *cfs_rq, int global_update) { } -static inline void update_cfs_shares(struct cfs_rq *cfs_rq, long weight_delta) +static inline void update_cfs_shares(struct cfs_rq *cfs_rq) { } @@ -978,8 +976,8 @@ enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags) */ update_curr(cfs_rq); update_cfs_load(cfs_rq, 0); - update_cfs_shares(cfs_rq, se->load.weight); account_entity_enqueue(cfs_rq, se); + update_cfs_shares(cfs_rq); if (flags & ENQUEUE_WAKEUP) { place_entity(cfs_rq, se, 0); @@ -1041,7 +1039,7 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags) update_cfs_load(cfs_rq, 0); account_entity_dequeue(cfs_rq, se); update_min_vruntime(cfs_rq); - update_cfs_shares(cfs_rq, 0); + update_cfs_shares(cfs_rq); /* * Normalize the entity after updating the min_vruntime because the @@ -1282,7 +1280,7 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags) struct cfs_rq *cfs_rq = cfs_rq_of(se); update_cfs_load(cfs_rq, 0); - update_cfs_shares(cfs_rq, 0); + update_cfs_shares(cfs_rq); } hrtick_update(rq); @@ -1312,7 +1310,7 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags) struct cfs_rq *cfs_rq = cfs_rq_of(se); update_cfs_load(cfs_rq, 0); - update_cfs_shares(cfs_rq, 0); + update_cfs_shares(cfs_rq); } hrtick_update(rq); @@ -2123,7 +2121,7 @@ static int update_shares_cpu(struct task_group *tg, int cpu) * We need to update shares after updating tg->load_weight in * order to adjust the weight of groups with long running tasks. */ - update_cfs_shares(cfs_rq, 0); + update_cfs_shares(cfs_rq); raw_spin_unlock_irqrestore(&rq->lock, flags); -- cgit v1.2.3-71-gd317 From f07333bf6ee66d9b49286cec4371cf375e745b7a Mon Sep 17 00:00:00 2001 From: Paul Turner Date: Fri, 21 Jan 2011 20:45:03 -0800 Subject: sched: Avoid expensive initial update_cfs_load() Since cfs->{load_stamp,load_last} are zero-initalized the initial load update will consider the delta to be 'since the beginning of time'. This results in a lot of pointless divisions to bring this large period to be within the sysctl_sched_shares_window. Fix this by initializing load_stamp to be 1 at cfs_rq initialization, this allows for an initial load_stamp > load_last which then lets standard idle truncation proceed. We avoid spinning (and slightly improve consistency) by fixing delta to be [period - 1] in this path resulting in a slightly more predictable shares ramp. (Previously the amount of idle time preserved by the overflow would range between [period/2,period-1].) Signed-off-by: Paul Turner Signed-off-by: Peter Zijlstra LKML-Reference: <20110122044852.102126037@google.com> Signed-off-by: Ingo Molnar --- kernel/sched.c | 2 ++ kernel/sched_fair.c | 1 + 2 files changed, 3 insertions(+) (limited to 'kernel') diff --git a/kernel/sched.c b/kernel/sched.c index e0fa3ff7f194..6820b5b3a969 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -7796,6 +7796,8 @@ static void init_cfs_rq(struct cfs_rq *cfs_rq, struct rq *rq) INIT_LIST_HEAD(&cfs_rq->tasks); #ifdef CONFIG_FAIR_GROUP_SCHED cfs_rq->rq = rq; + /* allow initial update_cfs_load() to truncate */ + cfs_rq->load_stamp = 1; #endif cfs_rq->min_vruntime = (u64)(-(1LL << 20)); } diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index 0c550c841eee..4cbc9121094c 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c @@ -733,6 +733,7 @@ static void update_cfs_load(struct cfs_rq *cfs_rq, int global_update) now - cfs_rq->load_last > 4 * period) { cfs_rq->load_period = 0; cfs_rq->load_avg = 0; + delta = period - 1; } cfs_rq->load_stamp = now; -- cgit v1.2.3-71-gd317 From 4dd53d891ca46dcc1fde0376a33540d3fd83cb9a Mon Sep 17 00:00:00 2001 From: Venkatesh Pallipadi Date: Tue, 21 Dec 2010 17:09:00 -0800 Subject: softirqs: Free up pf flag PF_KSOFTIRQD Cleanup patch, freeing up PF_KSOFTIRQD and use per_cpu ksoftirqd pointer instead, as suggested by Eric Dumazet. Tested-by: Shaun Ruffell Signed-off-by: Venkatesh Pallipadi Signed-off-by: Peter Zijlstra LKML-Reference: <1292980144-28796-2-git-send-email-venki@google.com> Signed-off-by: Ingo Molnar --- include/linux/interrupt.h | 7 +++++++ include/linux/sched.h | 1 - kernel/sched.c | 2 +- kernel/softirq.c | 3 +-- 4 files changed, 9 insertions(+), 4 deletions(-) (limited to 'kernel') diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index 55e0d4253e49..a1382b9b5813 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -426,6 +426,13 @@ extern void raise_softirq(unsigned int nr); */ DECLARE_PER_CPU(struct list_head [NR_SOFTIRQS], softirq_work_list); +DECLARE_PER_CPU(struct task_struct *, ksoftirqd); + +static inline struct task_struct *this_cpu_ksoftirqd(void) +{ + return this_cpu_read(ksoftirqd); +} + /* Try to send a softirq to a remote cpu. If this cannot be done, the * work will be queued to the local cpu. */ diff --git a/include/linux/sched.h b/include/linux/sched.h index d747f948b34e..af6e15fbfb78 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1715,7 +1715,6 @@ extern void thread_group_times(struct task_struct *p, cputime_t *ut, cputime_t * /* * Per process flags */ -#define PF_KSOFTIRQD 0x00000001 /* I am ksoftirqd */ #define PF_STARTING 0x00000002 /* being created */ #define PF_EXITING 0x00000004 /* getting shut down */ #define PF_EXITPIDONE 0x00000008 /* pi exit done on shut down */ diff --git a/kernel/sched.c b/kernel/sched.c index 6820b5b3a969..8b89b3bba565 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -1880,7 +1880,7 @@ void account_system_vtime(struct task_struct *curr) */ if (hardirq_count()) __this_cpu_add(cpu_hardirq_time, delta); - else if (in_serving_softirq() && !(curr->flags & PF_KSOFTIRQD)) + else if (in_serving_softirq() && curr != this_cpu_ksoftirqd()) __this_cpu_add(cpu_softirq_time, delta); irq_time_write_end(); diff --git a/kernel/softirq.c b/kernel/softirq.c index 68eb5efec388..0cee50487629 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c @@ -54,7 +54,7 @@ EXPORT_SYMBOL(irq_stat); static struct softirq_action softirq_vec[NR_SOFTIRQS] __cacheline_aligned_in_smp; -static DEFINE_PER_CPU(struct task_struct *, ksoftirqd); +DEFINE_PER_CPU(struct task_struct *, ksoftirqd); char *softirq_to_name[NR_SOFTIRQS] = { "HI", "TIMER", "NET_TX", "NET_RX", "BLOCK", "BLOCK_IOPOLL", @@ -721,7 +721,6 @@ static int run_ksoftirqd(void * __bind_cpu) { set_current_state(TASK_INTERRUPTIBLE); - current->flags |= PF_KSOFTIRQD; while (!kthread_should_stop()) { preempt_disable(); if (!local_softirq_pending()) { -- cgit v1.2.3-71-gd317 From a1dabb6bfffccb897eff3e1d102dacf2a4bedf3b Mon Sep 17 00:00:00 2001 From: Venkatesh Pallipadi Date: Tue, 21 Dec 2010 17:09:01 -0800 Subject: time: Add nsecs_to_cputime64 interface for asm-generic Add nsecs_to_cputime64 interface. This is used in following patches that updates cpu irq stat based on ns granularity info in IRQ_TIME_ACCOUNTING. Tested-by: Shaun Ruffell Signed-off-by: Venkatesh Pallipadi Signed-off-by: Peter Zijlstra LKML-Reference: <1292980144-28796-3-git-send-email-venki@google.com> Signed-off-by: Ingo Molnar --- include/asm-generic/cputime.h | 3 +++ include/linux/jiffies.h | 1 + kernel/time.c | 23 +++++++++++++++++++++-- 3 files changed, 25 insertions(+), 2 deletions(-) (limited to 'kernel') diff --git a/include/asm-generic/cputime.h b/include/asm-generic/cputime.h index 2bcc5c7c22a6..61e03dd7939e 100644 --- a/include/asm-generic/cputime.h +++ b/include/asm-generic/cputime.h @@ -30,6 +30,9 @@ typedef u64 cputime64_t; #define cputime64_to_jiffies64(__ct) (__ct) #define jiffies64_to_cputime64(__jif) (__jif) #define cputime_to_cputime64(__ct) ((u64) __ct) +#define cputime64_gt(__a, __b) ((__a) > (__b)) + +#define nsecs_to_cputime64(__ct) nsecs_to_jiffies64(__ct) /* diff --git a/include/linux/jiffies.h b/include/linux/jiffies.h index 6811f4bfc6e7..922aa313c9f9 100644 --- a/include/linux/jiffies.h +++ b/include/linux/jiffies.h @@ -307,6 +307,7 @@ extern clock_t jiffies_to_clock_t(long x); extern unsigned long clock_t_to_jiffies(unsigned long x); extern u64 jiffies_64_to_clock_t(u64 x); extern u64 nsec_to_clock_t(u64 x); +extern u64 nsecs_to_jiffies64(u64 n); extern unsigned long nsecs_to_jiffies(u64 n); #define TIMESTAMP_SIZE 30 diff --git a/kernel/time.c b/kernel/time.c index 32174359576f..55337a816b20 100644 --- a/kernel/time.c +++ b/kernel/time.c @@ -645,7 +645,7 @@ u64 nsec_to_clock_t(u64 x) } /** - * nsecs_to_jiffies - Convert nsecs in u64 to jiffies + * nsecs_to_jiffies64 - Convert nsecs in u64 to jiffies64 * * @n: nsecs in u64 * @@ -657,7 +657,7 @@ u64 nsec_to_clock_t(u64 x) * NSEC_PER_SEC = 10^9 = (5^9 * 2^9) = (1953125 * 512) * ULLONG_MAX ns = 18446744073.709551615 secs = about 584 years */ -unsigned long nsecs_to_jiffies(u64 n) +u64 nsecs_to_jiffies64(u64 n) { #if (NSEC_PER_SEC % HZ) == 0 /* Common case, HZ = 100, 128, 200, 250, 256, 500, 512, 1000 etc. */ @@ -674,6 +674,25 @@ unsigned long nsecs_to_jiffies(u64 n) #endif } + +/** + * nsecs_to_jiffies - Convert nsecs in u64 to jiffies + * + * @n: nsecs in u64 + * + * Unlike {m,u}secs_to_jiffies, type of input is not unsigned int but u64. + * And this doesn't return MAX_JIFFY_OFFSET since this function is designed + * for scheduler, not for use in device drivers to calculate timeout value. + * + * note: + * NSEC_PER_SEC = 10^9 = (5^9 * 2^9) = (1953125 * 512) + * ULLONG_MAX ns = 18446744073.709551615 secs = about 584 years + */ +unsigned long nsecs_to_jiffies(u64 n) +{ + return (unsigned long)nsecs_to_jiffies64(n); +} + #if (BITS_PER_LONG < 64) u64 get_jiffies_64(void) { -- cgit v1.2.3-71-gd317 From 70a89a6620f658d47a1488515bada4b8ee6291d8 Mon Sep 17 00:00:00 2001 From: Venkatesh Pallipadi Date: Tue, 21 Dec 2010 17:09:02 -0800 Subject: sched: Refactor account_system_time separating id-update Refactor account_system_time, to separate out the logic of identifying the update needed and code that does actual update. This is used by following patch for IRQ_TIME_ACCOUNTING, which has different identification logic and same update logic. Tested-by: Shaun Ruffell Signed-off-by: Venkatesh Pallipadi Signed-off-by: Peter Zijlstra LKML-Reference: <1292980144-28796-4-git-send-email-venki@google.com> Signed-off-by: Ingo Molnar --- kernel/sched.c | 46 +++++++++++++++++++++++++++++++--------------- 1 file changed, 31 insertions(+), 15 deletions(-) (limited to 'kernel') diff --git a/kernel/sched.c b/kernel/sched.c index 8b89b3bba565..e3fa92106ed7 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -3567,6 +3567,32 @@ static void account_guest_time(struct task_struct *p, cputime_t cputime, } } +/* + * Account system cpu time to a process and desired cpustat field + * @p: the process that the cpu time gets accounted to + * @cputime: the cpu time spent in kernel space since the last update + * @cputime_scaled: cputime scaled by cpu frequency + * @target_cputime64: pointer to cpustat field that has to be updated + */ +static inline +void __account_system_time(struct task_struct *p, cputime_t cputime, + cputime_t cputime_scaled, cputime64_t *target_cputime64) +{ + cputime64_t tmp = cputime_to_cputime64(cputime); + + /* Add system time to process. */ + p->stime = cputime_add(p->stime, cputime); + p->stimescaled = cputime_add(p->stimescaled, cputime_scaled); + account_group_system_time(p, cputime); + + /* Add system time to cpustat. */ + *target_cputime64 = cputime64_add(*target_cputime64, tmp); + cpuacct_update_stats(p, CPUACCT_STAT_SYSTEM, cputime); + + /* Account for system time used */ + acct_update_integrals(p); +} + /* * Account system cpu time to a process. * @p: the process that the cpu time gets accounted to @@ -3578,31 +3604,21 @@ void account_system_time(struct task_struct *p, int hardirq_offset, cputime_t cputime, cputime_t cputime_scaled) { struct cpu_usage_stat *cpustat = &kstat_this_cpu.cpustat; - cputime64_t tmp; + cputime64_t *target_cputime64; if ((p->flags & PF_VCPU) && (irq_count() - hardirq_offset == 0)) { account_guest_time(p, cputime, cputime_scaled); return; } - /* Add system time to process. */ - p->stime = cputime_add(p->stime, cputime); - p->stimescaled = cputime_add(p->stimescaled, cputime_scaled); - account_group_system_time(p, cputime); - - /* Add system time to cpustat. */ - tmp = cputime_to_cputime64(cputime); if (hardirq_count() - hardirq_offset) - cpustat->irq = cputime64_add(cpustat->irq, tmp); + target_cputime64 = &cpustat->irq; else if (in_serving_softirq()) - cpustat->softirq = cputime64_add(cpustat->softirq, tmp); + target_cputime64 = &cpustat->softirq; else - cpustat->system = cputime64_add(cpustat->system, tmp); + target_cputime64 = &cpustat->system; - cpuacct_update_stats(p, CPUACCT_STAT_SYSTEM, cputime); - - /* Account for system time used */ - acct_update_integrals(p); + __account_system_time(p, cputime, cputime_scaled, target_cputime64); } /* -- cgit v1.2.3-71-gd317 From abb74cefa9c682fb38ba86c17ca3c86fed6cc464 Mon Sep 17 00:00:00 2001 From: Venkatesh Pallipadi Date: Tue, 21 Dec 2010 17:09:03 -0800 Subject: sched: Export ns irqtimes through /proc/stat CONFIG_IRQ_TIME_ACCOUNTING adds ns granularity irq time on each CPU. This info is already used in scheduler to do proper task chargeback (earlier patches). This patch retro-fits this ns granularity hardirq and softirq information to /proc/stat irq and softirq fields. The update is still done on timer tick, where we look at accumulated ns hardirq/softirq time and account the tick to user/system/irq/hardirq/guest accordingly. No new interface added. Earlier versions looked at adding this as new fields in some /proc files. This one seems to be the best in terms of impact to existing apps, even though it has somewhat more kernel code than earlier versions. Tested-by: Shaun Ruffell Signed-off-by: Venkatesh Pallipadi Signed-off-by: Peter Zijlstra LKML-Reference: <1292980144-28796-5-git-send-email-venki@google.com> Signed-off-by: Ingo Molnar --- kernel/sched.c | 102 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 102 insertions(+) (limited to 'kernel') diff --git a/kernel/sched.c b/kernel/sched.c index e3fa92106ed7..2a3c9799d76b 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -1920,8 +1920,40 @@ static void update_rq_clock_task(struct rq *rq, s64 delta) sched_rt_avg_update(rq, irq_delta); } +static int irqtime_account_hi_update(void) +{ + struct cpu_usage_stat *cpustat = &kstat_this_cpu.cpustat; + unsigned long flags; + u64 latest_ns; + int ret = 0; + + local_irq_save(flags); + latest_ns = this_cpu_read(cpu_hardirq_time); + if (cputime64_gt(nsecs_to_cputime64(latest_ns), cpustat->irq)) + ret = 1; + local_irq_restore(flags); + return ret; +} + +static int irqtime_account_si_update(void) +{ + struct cpu_usage_stat *cpustat = &kstat_this_cpu.cpustat; + unsigned long flags; + u64 latest_ns; + int ret = 0; + + local_irq_save(flags); + latest_ns = this_cpu_read(cpu_softirq_time); + if (cputime64_gt(nsecs_to_cputime64(latest_ns), cpustat->softirq)) + ret = 1; + local_irq_restore(flags); + return ret; +} + #else /* CONFIG_IRQ_TIME_ACCOUNTING */ +#define sched_clock_irqtime (0) + static void update_rq_clock_task(struct rq *rq, s64 delta) { rq->clock_task += delta; @@ -3621,6 +3653,65 @@ void account_system_time(struct task_struct *p, int hardirq_offset, __account_system_time(p, cputime, cputime_scaled, target_cputime64); } +#ifdef CONFIG_IRQ_TIME_ACCOUNTING +/* + * Account a tick to a process and cpustat + * @p: the process that the cpu time gets accounted to + * @user_tick: is the tick from userspace + * @rq: the pointer to rq + * + * Tick demultiplexing follows the order + * - pending hardirq update + * - pending softirq update + * - user_time + * - idle_time + * - system time + * - check for guest_time + * - else account as system_time + * + * Check for hardirq is done both for system and user time as there is + * no timer going off while we are on hardirq and hence we may never get an + * opportunity to update it solely in system time. + * p->stime and friends are only updated on system time and not on irq + * softirq as those do not count in task exec_runtime any more. + */ +static void irqtime_account_process_tick(struct task_struct *p, int user_tick, + struct rq *rq) +{ + cputime_t one_jiffy_scaled = cputime_to_scaled(cputime_one_jiffy); + cputime64_t tmp = cputime_to_cputime64(cputime_one_jiffy); + struct cpu_usage_stat *cpustat = &kstat_this_cpu.cpustat; + + if (irqtime_account_hi_update()) { + cpustat->irq = cputime64_add(cpustat->irq, tmp); + } else if (irqtime_account_si_update()) { + cpustat->softirq = cputime64_add(cpustat->softirq, tmp); + } else if (user_tick) { + account_user_time(p, cputime_one_jiffy, one_jiffy_scaled); + } else if (p == rq->idle) { + account_idle_time(cputime_one_jiffy); + } else if (p->flags & PF_VCPU) { /* System time or guest time */ + account_guest_time(p, cputime_one_jiffy, one_jiffy_scaled); + } else { + __account_system_time(p, cputime_one_jiffy, one_jiffy_scaled, + &cpustat->system); + } +} + +static void irqtime_account_idle_ticks(int ticks) +{ + int i; + struct rq *rq = this_rq(); + + for (i = 0; i < ticks; i++) + irqtime_account_process_tick(current, 0, rq); +} +#else +static void irqtime_account_idle_ticks(int ticks) {} +static void irqtime_account_process_tick(struct task_struct *p, int user_tick, + struct rq *rq) {} +#endif + /* * Account for involuntary wait time. * @steal: the cpu time spent in involuntary wait @@ -3661,6 +3752,11 @@ void account_process_tick(struct task_struct *p, int user_tick) cputime_t one_jiffy_scaled = cputime_to_scaled(cputime_one_jiffy); struct rq *rq = this_rq(); + if (sched_clock_irqtime) { + irqtime_account_process_tick(p, user_tick, rq); + return; + } + if (user_tick) account_user_time(p, cputime_one_jiffy, one_jiffy_scaled); else if ((p != rq->idle) || (irq_count() != HARDIRQ_OFFSET)) @@ -3686,6 +3782,12 @@ void account_steal_ticks(unsigned long ticks) */ void account_idle_ticks(unsigned long ticks) { + + if (sched_clock_irqtime) { + irqtime_account_idle_ticks(ticks); + return; + } + account_idle_time(jiffies_to_cputime(ticks)); } -- cgit v1.2.3-71-gd317 From 414bee9ba613adb3804965e2d84db32d0599f9c6 Mon Sep 17 00:00:00 2001 From: Venkatesh Pallipadi Date: Tue, 21 Dec 2010 17:09:04 -0800 Subject: softirqs: Account ksoftirqd time as cpustat softirq softirq time in ksoftirqd context is not accounted in ns granularity per cpu softirq stats, as we want that to be a part of ksoftirqd exec_runtime. Accounting them as softirq on /proc/stat separately. Tested-by: Shaun Ruffell Signed-off-by: Venkatesh Pallipadi Signed-off-by: Peter Zijlstra LKML-Reference: <1292980144-28796-6-git-send-email-venki@google.com> Signed-off-by: Ingo Molnar --- kernel/sched.c | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'kernel') diff --git a/kernel/sched.c b/kernel/sched.c index 2a3c9799d76b..8b718b59b09f 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -3686,6 +3686,14 @@ static void irqtime_account_process_tick(struct task_struct *p, int user_tick, cpustat->irq = cputime64_add(cpustat->irq, tmp); } else if (irqtime_account_si_update()) { cpustat->softirq = cputime64_add(cpustat->softirq, tmp); + } else if (this_cpu_ksoftirqd() == p) { + /* + * ksoftirqd time do not get accounted in cpu_softirq_time. + * So, we have to handle it separately here. + * Also, p->stime needs to be updated for ksoftirqd. + */ + __account_system_time(p, cputime_one_jiffy, one_jiffy_scaled, + &cpustat->softirq); } else if (user_tick) { account_user_time(p, cputime_one_jiffy, one_jiffy_scaled); } else if (p == rq->idle) { -- cgit v1.2.3-71-gd317 From a8941d7ec81678fb69aea7183338175f112f3e0d Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 25 Jan 2011 16:30:03 +0100 Subject: sched: Simplify the idle scheduling class Since commit 48c5ccae88dcd (sched: Simplify cpu-hot-unplug task migration) this should no longer happen, so remove the code. Signed-off-by: Peter Zijlstra LKML-Reference: Signed-off-by: Ingo Molnar --- kernel/sched_idletask.c | 23 ++++------------------- 1 file changed, 4 insertions(+), 19 deletions(-) (limited to 'kernel') diff --git a/kernel/sched_idletask.c b/kernel/sched_idletask.c index 9fa0f402c87c..41eb62a0808b 100644 --- a/kernel/sched_idletask.c +++ b/kernel/sched_idletask.c @@ -52,31 +52,16 @@ static void set_curr_task_idle(struct rq *rq) { } -static void switched_to_idle(struct rq *rq, struct task_struct *p, - int running) +static void +switched_to_idle(struct rq *rq, struct task_struct *p, int running) { - /* Can this actually happen?? */ - if (running) - resched_task(rq->curr); - else - check_preempt_curr(rq, p, 0); + BUG(); } static void prio_changed_idle(struct rq *rq, struct task_struct *p, int oldprio, int running) { - /* This can happen for hot plug CPUS */ - - /* - * Reschedule if we are currently running on this runqueue and - * our priority decreased, or if we are not currently running on - * this runqueue and our priority is higher than the current's - */ - if (running) { - if (p->prio > oldprio) - resched_task(rq->curr); - } else - check_preempt_curr(rq, p, 0); + BUG(); } static unsigned int get_rr_interval_idle(struct rq *rq, struct task_struct *task) -- cgit v1.2.3-71-gd317 From da7a735e51f9622eb3e1672594d4a41da01d7e4f Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 17 Jan 2011 17:03:27 +0100 Subject: sched: Fix switch_from_fair() When a task is taken out of the fair class we must ensure the vruntime is properly normalized because when we put it back in it will assume to be normalized. The case that goes wrong is when changing away from the fair class while sleeping. Sleeping tasks have non-normalized vruntime in order to make sleeper-fairness work. So treat the switch away from fair as a wakeup and preserve the relative vruntime. Also update sysrq-n to call the ->switch_{to,from} methods. Reported-by: Onkalo Samu Signed-off-by: Peter Zijlstra LKML-Reference: Signed-off-by: Ingo Molnar --- include/linux/sched.h | 8 +++----- kernel/sched.c | 25 ++++++++++++++----------- kernel/sched_fair.c | 42 ++++++++++++++++++++++++++++++++++++------ kernel/sched_idletask.c | 7 +++---- kernel/sched_rt.c | 19 ++++++++++--------- kernel/sched_stoptask.c | 7 +++---- 6 files changed, 69 insertions(+), 39 deletions(-) (limited to 'kernel') diff --git a/include/linux/sched.h b/include/linux/sched.h index af6e15fbfb78..0542774914d4 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1084,12 +1084,10 @@ struct sched_class { void (*task_tick) (struct rq *rq, struct task_struct *p, int queued); void (*task_fork) (struct task_struct *p); - void (*switched_from) (struct rq *this_rq, struct task_struct *task, - int running); - void (*switched_to) (struct rq *this_rq, struct task_struct *task, - int running); + void (*switched_from) (struct rq *this_rq, struct task_struct *task); + void (*switched_to) (struct rq *this_rq, struct task_struct *task); void (*prio_changed) (struct rq *this_rq, struct task_struct *task, - int oldprio, int running); + int oldprio); unsigned int (*get_rr_interval) (struct rq *rq, struct task_struct *task); diff --git a/kernel/sched.c b/kernel/sched.c index 8b718b59b09f..78fa75394011 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -2057,14 +2057,14 @@ inline int task_curr(const struct task_struct *p) static inline void check_class_changed(struct rq *rq, struct task_struct *p, const struct sched_class *prev_class, - int oldprio, int running) + int oldprio) { if (prev_class != p->sched_class) { if (prev_class->switched_from) - prev_class->switched_from(rq, p, running); - p->sched_class->switched_to(rq, p, running); - } else - p->sched_class->prio_changed(rq, p, oldprio, running); + prev_class->switched_from(rq, p); + p->sched_class->switched_to(rq, p); + } else if (oldprio != p->prio) + p->sched_class->prio_changed(rq, p, oldprio); } static void check_preempt_curr(struct rq *rq, struct task_struct *p, int flags) @@ -2598,6 +2598,7 @@ static void __sched_fork(struct task_struct *p) p->se.sum_exec_runtime = 0; p->se.prev_sum_exec_runtime = 0; p->se.nr_migrations = 0; + p->se.vruntime = 0; #ifdef CONFIG_SCHEDSTATS memset(&p->se.statistics, 0, sizeof(p->se.statistics)); @@ -4696,11 +4697,10 @@ void rt_mutex_setprio(struct task_struct *p, int prio) if (running) p->sched_class->set_curr_task(rq); - if (on_rq) { + if (on_rq) enqueue_task(rq, p, oldprio < prio ? ENQUEUE_HEAD : 0); - check_class_changed(rq, p, prev_class, oldprio, running); - } + check_class_changed(rq, p, prev_class, oldprio); task_rq_unlock(rq, &flags); } @@ -5028,11 +5028,10 @@ recheck: if (running) p->sched_class->set_curr_task(rq); - if (on_rq) { + if (on_rq) activate_task(rq, p, 0); - check_class_changed(rq, p, prev_class, oldprio, running); - } + check_class_changed(rq, p, prev_class, oldprio); __task_rq_unlock(rq); raw_spin_unlock_irqrestore(&p->pi_lock, flags); @@ -8237,6 +8236,8 @@ EXPORT_SYMBOL(__might_sleep); #ifdef CONFIG_MAGIC_SYSRQ static void normalize_task(struct rq *rq, struct task_struct *p) { + const struct sched_class *prev_class = p->sched_class; + int old_prio = p->prio; int on_rq; on_rq = p->se.on_rq; @@ -8247,6 +8248,8 @@ static void normalize_task(struct rq *rq, struct task_struct *p) activate_task(rq, p, 0); resched_task(rq->curr); } + + check_class_changed(rq, p, prev_class, old_prio); } void normalize_rt_tasks(void) diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index 4cbc9121094c..55040f3938d8 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c @@ -4078,33 +4078,62 @@ static void task_fork_fair(struct task_struct *p) * Priority of the task has changed. Check to see if we preempt * the current task. */ -static void prio_changed_fair(struct rq *rq, struct task_struct *p, - int oldprio, int running) +static void +prio_changed_fair(struct rq *rq, struct task_struct *p, int oldprio) { + if (!p->se.on_rq) + return; + /* * Reschedule if we are currently running on this runqueue and * our priority decreased, or if we are not currently running on * this runqueue and our priority is higher than the current's */ - if (running) { + if (rq->curr == p) { if (p->prio > oldprio) resched_task(rq->curr); } else check_preempt_curr(rq, p, 0); } +static void switched_from_fair(struct rq *rq, struct task_struct *p) +{ + struct sched_entity *se = &p->se; + struct cfs_rq *cfs_rq = cfs_rq_of(se); + + /* + * Ensure the task's vruntime is normalized, so that when its + * switched back to the fair class the enqueue_entity(.flags=0) will + * do the right thing. + * + * If it was on_rq, then the dequeue_entity(.flags=0) will already + * have normalized the vruntime, if it was !on_rq, then only when + * the task is sleeping will it still have non-normalized vruntime. + */ + if (!se->on_rq && p->state != TASK_RUNNING) { + /* + * Fix up our vruntime so that the current sleep doesn't + * cause 'unlimited' sleep bonus. + */ + place_entity(cfs_rq, se, 0); + se->vruntime -= cfs_rq->min_vruntime; + } +} + /* * We switched to the sched_fair class. */ -static void switched_to_fair(struct rq *rq, struct task_struct *p, - int running) +static void switched_to_fair(struct rq *rq, struct task_struct *p) { + if (!p->se.on_rq) + return; + /* * We were most likely switched from sched_rt, so * kick off the schedule if running, otherwise just see * if we can still preempt the current task. */ - if (running) + if (rq->curr == p) resched_task(rq->curr); else check_preempt_curr(rq, p, 0); @@ -4190,6 +4219,7 @@ static const struct sched_class fair_sched_class = { .task_fork = task_fork_fair, .prio_changed = prio_changed_fair, + .switched_from = switched_from_fair, .switched_to = switched_to_fair, .get_rr_interval = get_rr_interval_fair, diff --git a/kernel/sched_idletask.c b/kernel/sched_idletask.c index 41eb62a0808b..c82f26c1b7c3 100644 --- a/kernel/sched_idletask.c +++ b/kernel/sched_idletask.c @@ -52,14 +52,13 @@ static void set_curr_task_idle(struct rq *rq) { } -static void -switched_to_idle(struct rq *rq, struct task_struct *p, int running) +static void switched_to_idle(struct rq *rq, struct task_struct *p) { BUG(); } -static void prio_changed_idle(struct rq *rq, struct task_struct *p, - int oldprio, int running) +static void +prio_changed_idle(struct rq *rq, struct task_struct *p, int oldprio) { BUG(); } diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c index c914ec747ca6..c381fdc18c64 100644 --- a/kernel/sched_rt.c +++ b/kernel/sched_rt.c @@ -1595,8 +1595,7 @@ static void rq_offline_rt(struct rq *rq) * When switch from the rt queue, we bring ourselves to a position * that we might want to pull RT tasks from other runqueues. */ -static void switched_from_rt(struct rq *rq, struct task_struct *p, - int running) +static void switched_from_rt(struct rq *rq, struct task_struct *p) { /* * If there are other RT tasks then we will reschedule @@ -1605,7 +1604,7 @@ static void switched_from_rt(struct rq *rq, struct task_struct *p, * we may need to handle the pulling of RT tasks * now. */ - if (!rq->rt.rt_nr_running) + if (p->se.on_rq && !rq->rt.rt_nr_running) pull_rt_task(rq); } @@ -1624,8 +1623,7 @@ static inline void init_sched_rt_class(void) * with RT tasks. In this case we try to push them off to * other runqueues. */ -static void switched_to_rt(struct rq *rq, struct task_struct *p, - int running) +static void switched_to_rt(struct rq *rq, struct task_struct *p) { int check_resched = 1; @@ -1636,7 +1634,7 @@ static void switched_to_rt(struct rq *rq, struct task_struct *p, * If that current running task is also an RT task * then see if we can move to another run queue. */ - if (!running) { + if (p->se.on_rq && rq->curr != p) { #ifdef CONFIG_SMP if (rq->rt.overloaded && push_rt_task(rq) && /* Don't resched if we changed runqueues */ @@ -1652,10 +1650,13 @@ static void switched_to_rt(struct rq *rq, struct task_struct *p, * Priority of the task has changed. This may cause * us to initiate a push or pull. */ -static void prio_changed_rt(struct rq *rq, struct task_struct *p, - int oldprio, int running) +static void +prio_changed_rt(struct rq *rq, struct task_struct *p, int oldprio) { - if (running) { + if (!p->se.on_rq) + return; + + if (rq->curr == p) { #ifdef CONFIG_SMP /* * If our priority decreases while running, we diff --git a/kernel/sched_stoptask.c b/kernel/sched_stoptask.c index 2bf6b47058c1..84ec9bcf82d9 100644 --- a/kernel/sched_stoptask.c +++ b/kernel/sched_stoptask.c @@ -59,14 +59,13 @@ static void set_curr_task_stop(struct rq *rq) { } -static void switched_to_stop(struct rq *rq, struct task_struct *p, - int running) +static void switched_to_stop(struct rq *rq, struct task_struct *p) { BUG(); /* its impossible to change to this class */ } -static void prio_changed_stop(struct rq *rq, struct task_struct *p, - int oldprio, int running) +static void +prio_changed_stop(struct rq *rq, struct task_struct *p, int oldprio) { BUG(); /* how!?, what priority? */ } -- cgit v1.2.3-71-gd317 From ccaa8d657117bb1876d471bd91579d774106778d Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 25 Jan 2011 23:17:32 +0100 Subject: rtmutex-tester: Remove BKL tests The BKL is going away, no need to test it any more. I left the definitions of the test case numbers in, so that the other tests do not get renumbered. Signed-off-by: Arnd Bergmann Cc: Arjan van de Ven Cc: Ingo Molnar Cc: Andrew Morton LKML-Reference: <1295993854-4971-19-git-send-email-arnd@arndb.de> Signed-off-by: Thomas Gleixner --- kernel/rtmutex-tester.c | 39 ++++----------------------------------- 1 file changed, 4 insertions(+), 35 deletions(-) (limited to 'kernel') diff --git a/kernel/rtmutex-tester.c b/kernel/rtmutex-tester.c index 66cb89bc5ef1..d5b543506cbc 100644 --- a/kernel/rtmutex-tester.c +++ b/kernel/rtmutex-tester.c @@ -9,7 +9,6 @@ #include #include #include -#include #include #include #include @@ -27,7 +26,6 @@ struct test_thread_data { int opcode; int opdata; int mutexes[MAX_RT_TEST_MUTEXES]; - int bkl; int event; struct sys_device sysdev; }; @@ -46,8 +44,8 @@ enum test_opcodes { RTTEST_LOCKINTNOWAIT, /* 6 Lock interruptible no wait in wakeup, data = lockindex */ RTTEST_LOCKCONT, /* 7 Continue locking after the wakeup delay */ RTTEST_UNLOCK, /* 8 Unlock, data = lockindex */ - RTTEST_LOCKBKL, /* 9 Lock BKL */ - RTTEST_UNLOCKBKL, /* 10 Unlock BKL */ + RTTEST_LOCKBKL, /* 9 Was: Lock BKL */ + RTTEST_UNLOCKBKL, /* 10 Was: Unlock BKL */ RTTEST_SIGNAL, /* 11 Signal other test thread, data = thread id */ RTTEST_RESETEVENT = 98, /* 98 Reset event counter */ RTTEST_RESET = 99, /* 99 Reset all pending operations */ @@ -74,13 +72,6 @@ static int handle_op(struct test_thread_data *td, int lockwakeup) td->mutexes[i] = 0; } } - - if (!lockwakeup && td->bkl == 4) { -#ifdef CONFIG_LOCK_KERNEL - unlock_kernel(); -#endif - td->bkl = 0; - } return 0; case RTTEST_RESETEVENT: @@ -131,25 +122,6 @@ static int handle_op(struct test_thread_data *td, int lockwakeup) td->mutexes[id] = 0; return 0; - case RTTEST_LOCKBKL: - if (td->bkl) - return 0; - td->bkl = 1; -#ifdef CONFIG_LOCK_KERNEL - lock_kernel(); -#endif - td->bkl = 4; - return 0; - - case RTTEST_UNLOCKBKL: - if (td->bkl != 4) - break; -#ifdef CONFIG_LOCK_KERNEL - unlock_kernel(); -#endif - td->bkl = 0; - return 0; - default: break; } @@ -196,7 +168,6 @@ void schedule_rt_mutex_test(struct rt_mutex *mutex) td->event = atomic_add_return(1, &rttest_event); break; - case RTTEST_LOCKBKL: default: break; } @@ -229,8 +200,6 @@ void schedule_rt_mutex_test(struct rt_mutex *mutex) td->event = atomic_add_return(1, &rttest_event); return; - case RTTEST_LOCKBKL: - return; default: return; } @@ -380,11 +349,11 @@ static ssize_t sysfs_test_status(struct sys_device *dev, struct sysdev_attribute spin_lock(&rttest_lock); curr += sprintf(curr, - "O: %4d, E:%8d, S: 0x%08lx, P: %4d, N: %4d, B: %p, K: %d, M:", + "O: %4d, E:%8d, S: 0x%08lx, P: %4d, N: %4d, B: %p, M:", td->opcode, td->event, tsk->state, (MAX_RT_PRIO - 1) - tsk->prio, (MAX_RT_PRIO - 1) - tsk->normal_prio, - tsk->pi_blocked_on, td->bkl); + tsk->pi_blocked_on); for (i = MAX_RT_TEST_MUTEXES - 1; i >=0 ; i--) curr += sprintf(curr, "%d", td->mutexes[i]); -- cgit v1.2.3-71-gd317 From 10389a15e25fd4784d42de7e0e3fc8c242f2011d Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 23 Jan 2011 15:25:56 +0100 Subject: cred: Replace deprecated spinlock initialization SPIN_LOCK_UNLOCK is deprecated. Use the lockdep capable variant instead. Signed-off-by: Thomas Gleixner --- kernel/cred.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/cred.c b/kernel/cred.c index 6a1aa004e376..b5496e81b0f7 100644 --- a/kernel/cred.c +++ b/kernel/cred.c @@ -35,7 +35,7 @@ static struct kmem_cache *cred_jar; static struct thread_group_cred init_tgcred = { .usage = ATOMIC_INIT(2), .tgid = 0, - .lock = SPIN_LOCK_UNLOCKED, + .lock = __SPIN_LOCK_UNLOCKED(init_cred.tgcred.lock), }; #endif -- cgit v1.2.3-71-gd317 From 6ea72f12069306b235151c5b05ac0cca7e1dedfa Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 26 Jan 2011 13:36:03 +0100 Subject: sched: Avoid expensive initial update_cfs_load(), on UP too Fix the build on UP. Signed-off-by: Peter Zijlstra Cc: Paul Turner LKML-Reference: <20110122044852.102126037@google.com> Signed-off-by: Ingo Molnar --- kernel/sched.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'kernel') diff --git a/kernel/sched.c b/kernel/sched.c index 78fa75394011..477e1bcc63f9 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -7922,7 +7922,9 @@ static void init_cfs_rq(struct cfs_rq *cfs_rq, struct rq *rq) #ifdef CONFIG_FAIR_GROUP_SCHED cfs_rq->rq = rq; /* allow initial update_cfs_load() to truncate */ +#ifdef CONFIG_SMP cfs_rq->load_stamp = 1; +#endif #endif cfs_rq->min_vruntime = (u64)(-(1LL << 20)); } -- cgit v1.2.3-71-gd317 From 88d4f0db7fa8785859c1d637f9aac210932b6216 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 25 Jan 2011 19:40:51 +0100 Subject: perf: Fix alloc_callchain_buffers() Commit 927c7a9e92c4 ("perf: Fix race in callchains") introduced a mismatch in the sizing of struct callchain_cpus_entries. nr_cpu_ids must be used instead of num_possible_cpus(), or we might get out of bound memory accesses on some machines. Signed-off-by: Eric Dumazet Cc: Frederic Weisbecker Cc: Peter Zijlstra Cc: Arnaldo Carvalho de Melo Cc: David Miller Cc: Stephane Eranian CC: stable@kernel.org LKML-Reference: <1295980851.3588.351.camel@edumazet-laptop> Signed-off-by: Ingo Molnar --- kernel/perf_event.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'kernel') diff --git a/kernel/perf_event.c b/kernel/perf_event.c index 126a302c481c..852ae8c66502 100644 --- a/kernel/perf_event.c +++ b/kernel/perf_event.c @@ -1999,8 +1999,7 @@ static int alloc_callchain_buffers(void) * accessed from NMI. Use a temporary manual per cpu allocation * until that gets sorted out. */ - size = sizeof(*entries) + sizeof(struct perf_callchain_entry *) * - num_possible_cpus(); + size = offsetof(struct callchain_cpus_entries, cpu_entries[nr_cpu_ids]); entries = kzalloc(size, GFP_KERNEL); if (!entries) -- cgit v1.2.3-71-gd317 From 8161239a8bcce9ad6b537c04a1fa3b5c68bae693 Mon Sep 17 00:00:00 2001 From: Lai Jiangshan Date: Fri, 14 Jan 2011 17:09:41 +0800 Subject: rtmutex: Simplify PI algorithm and make highest prio task get lock In current rtmutex, the pending owner may be boosted by the tasks in the rtmutex's waitlist when the pending owner is deboosted or a task in the waitlist is boosted. This boosting is unrelated, because the pending owner does not really take the rtmutex. It is not reasonable. Example. time1: A(high prio) onwers the rtmutex. B(mid prio) and C (low prio) in the waitlist. time2 A release the lock, B becomes the pending owner A(or other high prio task) continues to run. B's prio is lower than A, so B is just queued at the runqueue. time3 A or other high prio task sleeps, but we have passed some time The B and C's prio are changed in the period (time2 ~ time3) due to boosting or deboosting. Now C has the priority higher than B. ***Is it reasonable that C has to boost B and help B to get the rtmutex? NO!! I think, it is unrelated/unneed boosting before B really owns the rtmutex. We should give C a chance to beat B and win the rtmutex. This is the motivation of this patch. This patch *ensures* only the top waiter or higher priority task can take the lock. How? 1) we don't dequeue the top waiter when unlock, if the top waiter is changed, the old top waiter will fail and go to sleep again. 2) when requiring lock, it will get the lock when the lock is not taken and: there is no waiter OR higher priority than waiters OR it is top waiter. 3) In any time, the top waiter is changed, the top waiter will be woken up. The algorithm is much simpler than before, no pending owner, no boosting for pending owner. Other advantage of this patch: 1) The states of a rtmutex are reduced a half, easier to read the code. 2) the codes become shorter. 3) top waiter is not dequeued until it really take the lock: they will retain FIFO when it is stolen. Not advantage nor disadvantage 1) Even we may wakeup multiple waiters(any time when top waiter changed), we hardly cause "thundering herd", the number of wokenup task is likely 1 or very little. 2) two APIs are changed. rt_mutex_owner() will not return pending owner, it will return NULL when the top waiter is going to take the lock. rt_mutex_next_owner() always return the top waiter. will not return NULL if we have waiters because the top waiter is not dequeued. I have fixed the code that use these APIs. need updated after this patch is accepted 1) Document/* 2) the testcase scripts/rt-tester/t4-l2-pi-deboost.tst Signed-off-by: Lai Jiangshan LKML-Reference: <4D3012D5.4060709@cn.fujitsu.com> Reviewed-by: Steven Rostedt Signed-off-by: Steven Rostedt --- kernel/futex.c | 22 ++-- kernel/rtmutex-debug.c | 1 - kernel/rtmutex.c | 318 +++++++++++++++++------------------------------- kernel/rtmutex_common.h | 16 +-- 4 files changed, 127 insertions(+), 230 deletions(-) (limited to 'kernel') diff --git a/kernel/futex.c b/kernel/futex.c index b766d28accd6..64c38115c7b6 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -1556,10 +1556,10 @@ static int fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q, /* * We are here either because we stole the rtmutex from the - * pending owner or we are the pending owner which failed to - * get the rtmutex. We have to replace the pending owner TID - * in the user space variable. This must be atomic as we have - * to preserve the owner died bit here. + * previous highest priority waiter or we are the highest priority + * waiter but failed to get the rtmutex the first time. + * We have to replace the newowner TID in the user space variable. + * This must be atomic as we have to preserve the owner died bit here. * * Note: We write the user space value _before_ changing the pi_state * because we can fault here. Imagine swapped out pages or a fork @@ -1608,8 +1608,8 @@ retry: /* * To handle the page fault we need to drop the hash bucket - * lock here. That gives the other task (either the pending - * owner itself or the task which stole the rtmutex) the + * lock here. That gives the other task (either the highest priority + * waiter itself or the task which stole the rtmutex) the * chance to try the fixup of the pi_state. So once we are * back from handling the fault we need to check the pi_state * after reacquiring the hash bucket lock and before trying to @@ -1685,18 +1685,20 @@ static int fixup_owner(u32 __user *uaddr, struct futex_q *q, int locked) /* * pi_state is incorrect, some other task did a lock steal and * we returned due to timeout or signal without taking the - * rt_mutex. Too late. We can access the rt_mutex_owner without - * locking, as the other task is now blocked on the hash bucket - * lock. Fix the state up. + * rt_mutex. Too late. */ + raw_spin_lock(&q->pi_state->pi_mutex.wait_lock); owner = rt_mutex_owner(&q->pi_state->pi_mutex); + if (!owner) + owner = rt_mutex_next_owner(&q->pi_state->pi_mutex); + raw_spin_unlock(&q->pi_state->pi_mutex.wait_lock); ret = fixup_pi_state_owner(uaddr, q, owner); goto out; } /* * Paranoia check. If we did not take the lock, then we should not be - * the owner, nor the pending owner, of the rt_mutex. + * the owner of the rt_mutex. */ if (rt_mutex_owner(&q->pi_state->pi_mutex) == current) printk(KERN_ERR "fixup_owner: ret = %d pi-mutex: %p " diff --git a/kernel/rtmutex-debug.c b/kernel/rtmutex-debug.c index ddabb54bb5c8..3c7cbc2c33be 100644 --- a/kernel/rtmutex-debug.c +++ b/kernel/rtmutex-debug.c @@ -215,7 +215,6 @@ void debug_rt_mutex_free_waiter(struct rt_mutex_waiter *waiter) put_pid(waiter->deadlock_task_pid); TRACE_WARN_ON(!plist_node_empty(&waiter->list_entry)); TRACE_WARN_ON(!plist_node_empty(&waiter->pi_list_entry)); - TRACE_WARN_ON(waiter->task); memset(waiter, 0x22, sizeof(*waiter)); } diff --git a/kernel/rtmutex.c b/kernel/rtmutex.c index a9604815786a..ab449117aaf2 100644 --- a/kernel/rtmutex.c +++ b/kernel/rtmutex.c @@ -20,41 +20,34 @@ /* * lock->owner state tracking: * - * lock->owner holds the task_struct pointer of the owner. Bit 0 and 1 - * are used to keep track of the "owner is pending" and "lock has - * waiters" state. + * lock->owner holds the task_struct pointer of the owner. Bit 0 + * is used to keep track of the "lock has waiters" state. * - * owner bit1 bit0 - * NULL 0 0 lock is free (fast acquire possible) - * NULL 0 1 invalid state - * NULL 1 0 Transitional State* - * NULL 1 1 invalid state - * taskpointer 0 0 lock is held (fast release possible) - * taskpointer 0 1 task is pending owner - * taskpointer 1 0 lock is held and has waiters - * taskpointer 1 1 task is pending owner and lock has more waiters - * - * Pending ownership is assigned to the top (highest priority) - * waiter of the lock, when the lock is released. The thread is woken - * up and can now take the lock. Until the lock is taken (bit 0 - * cleared) a competing higher priority thread can steal the lock - * which puts the woken up thread back on the waiters list. + * owner bit0 + * NULL 0 lock is free (fast acquire possible) + * NULL 1 lock is free and has waiters and the top waiter + * is going to take the lock* + * taskpointer 0 lock is held (fast release possible) + * taskpointer 1 lock is held and has waiters** * * The fast atomic compare exchange based acquire and release is only - * possible when bit 0 and 1 of lock->owner are 0. + * possible when bit 0 of lock->owner is 0. + * + * (*) It also can be a transitional state when grabbing the lock + * with ->wait_lock is held. To prevent any fast path cmpxchg to the lock, + * we need to set the bit0 before looking at the lock, and the owner may be + * NULL in this small time, hence this can be a transitional state. * - * (*) There's a small time where the owner can be NULL and the - * "lock has waiters" bit is set. This can happen when grabbing the lock. - * To prevent a cmpxchg of the owner releasing the lock, we need to set this - * bit before looking at the lock, hence the reason this is a transitional - * state. + * (**) There is a small time when bit 0 is set but there are no + * waiters. This can happen when grabbing the lock in the slow path. + * To prevent a cmpxchg of the owner releasing the lock, we need to + * set this bit before looking at the lock. */ static void -rt_mutex_set_owner(struct rt_mutex *lock, struct task_struct *owner, - unsigned long mask) +rt_mutex_set_owner(struct rt_mutex *lock, struct task_struct *owner) { - unsigned long val = (unsigned long)owner | mask; + unsigned long val = (unsigned long)owner; if (rt_mutex_has_waiters(lock)) val |= RT_MUTEX_HAS_WAITERS; @@ -203,15 +196,14 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task, * reached or the state of the chain has changed while we * dropped the locks. */ - if (!waiter || !waiter->task) + if (!waiter) goto out_unlock_pi; /* * Check the orig_waiter state. After we dropped the locks, - * the previous owner of the lock might have released the lock - * and made us the pending owner: + * the previous owner of the lock might have released the lock. */ - if (orig_waiter && !orig_waiter->task) + if (orig_waiter && !rt_mutex_owner(orig_lock)) goto out_unlock_pi; /* @@ -254,6 +246,17 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task, /* Release the task */ raw_spin_unlock_irqrestore(&task->pi_lock, flags); + if (!rt_mutex_owner(lock)) { + /* + * If the requeue above changed the top waiter, then we need + * to wake the new top waiter up to try to get the lock. + */ + + if (top_waiter != rt_mutex_top_waiter(lock)) + wake_up_process(rt_mutex_top_waiter(lock)->task); + raw_spin_unlock(&lock->wait_lock); + goto out_put_task; + } put_task_struct(task); /* Grab the next task */ @@ -295,79 +298,17 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task, return ret; } -/* - * Optimization: check if we can steal the lock from the - * assigned pending owner [which might not have taken the - * lock yet]: - */ -static inline int try_to_steal_lock(struct rt_mutex *lock, - struct task_struct *task) -{ - struct task_struct *pendowner = rt_mutex_owner(lock); - struct rt_mutex_waiter *next; - unsigned long flags; - - if (!rt_mutex_owner_pending(lock)) - return 0; - - if (pendowner == task) - return 1; - - raw_spin_lock_irqsave(&pendowner->pi_lock, flags); - if (task->prio >= pendowner->prio) { - raw_spin_unlock_irqrestore(&pendowner->pi_lock, flags); - return 0; - } - - /* - * Check if a waiter is enqueued on the pending owners - * pi_waiters list. Remove it and readjust pending owners - * priority. - */ - if (likely(!rt_mutex_has_waiters(lock))) { - raw_spin_unlock_irqrestore(&pendowner->pi_lock, flags); - return 1; - } - - /* No chain handling, pending owner is not blocked on anything: */ - next = rt_mutex_top_waiter(lock); - plist_del(&next->pi_list_entry, &pendowner->pi_waiters); - __rt_mutex_adjust_prio(pendowner); - raw_spin_unlock_irqrestore(&pendowner->pi_lock, flags); - - /* - * We are going to steal the lock and a waiter was - * enqueued on the pending owners pi_waiters queue. So - * we have to enqueue this waiter into - * task->pi_waiters list. This covers the case, - * where task is boosted because it holds another - * lock and gets unboosted because the booster is - * interrupted, so we would delay a waiter with higher - * priority as task->normal_prio. - * - * Note: in the rare case of a SCHED_OTHER task changing - * its priority and thus stealing the lock, next->task - * might be task: - */ - if (likely(next->task != task)) { - raw_spin_lock_irqsave(&task->pi_lock, flags); - plist_add(&next->pi_list_entry, &task->pi_waiters); - __rt_mutex_adjust_prio(task); - raw_spin_unlock_irqrestore(&task->pi_lock, flags); - } - return 1; -} - /* * Try to take an rt-mutex * - * This fails - * - when the lock has a real owner - * - when a different pending owner exists and has higher priority than current - * * Must be called with lock->wait_lock held. + * + * @lock: the lock to be acquired. + * @task: the task which wants to acquire the lock + * @waiter: the waiter that is queued to the lock's wait list. (could be NULL) */ -static int try_to_take_rt_mutex(struct rt_mutex *lock) +static int try_to_take_rt_mutex(struct rt_mutex *lock, struct task_struct *task, + struct rt_mutex_waiter *waiter) { /* * We have to be careful here if the atomic speedups are @@ -390,15 +331,52 @@ static int try_to_take_rt_mutex(struct rt_mutex *lock) */ mark_rt_mutex_waiters(lock); - if (rt_mutex_owner(lock) && !try_to_steal_lock(lock, current)) + if (rt_mutex_owner(lock)) return 0; + /* + * It will get the lock because of one of these conditions: + * 1) there is no waiter + * 2) higher priority than waiters + * 3) it is top waiter + */ + if (rt_mutex_has_waiters(lock)) { + if (task->prio >= rt_mutex_top_waiter(lock)->list_entry.prio) { + if (!waiter || waiter != rt_mutex_top_waiter(lock)) + return 0; + } + } + + if (waiter || rt_mutex_has_waiters(lock)) { + unsigned long flags; + struct rt_mutex_waiter *top; + + raw_spin_lock_irqsave(&task->pi_lock, flags); + + /* remove the queued waiter. */ + if (waiter) { + plist_del(&waiter->list_entry, &lock->wait_list); + task->pi_blocked_on = NULL; + } + + /* + * We have to enqueue the top waiter(if it exists) into + * task->pi_waiters list. + */ + if (rt_mutex_has_waiters(lock)) { + top = rt_mutex_top_waiter(lock); + top->pi_list_entry.prio = top->list_entry.prio; + plist_add(&top->pi_list_entry, &task->pi_waiters); + } + raw_spin_unlock_irqrestore(&task->pi_lock, flags); + } + /* We got the lock. */ debug_rt_mutex_lock(lock); - rt_mutex_set_owner(lock, current, 0); + rt_mutex_set_owner(lock, task); - rt_mutex_deadlock_account_lock(lock, current); + rt_mutex_deadlock_account_lock(lock, task); return 1; } @@ -436,6 +414,9 @@ static int task_blocks_on_rt_mutex(struct rt_mutex *lock, raw_spin_unlock_irqrestore(&task->pi_lock, flags); + if (!owner) + return 0; + if (waiter == rt_mutex_top_waiter(lock)) { raw_spin_lock_irqsave(&owner->pi_lock, flags); plist_del(&top_waiter->pi_list_entry, &owner->pi_waiters); @@ -472,21 +453,18 @@ static int task_blocks_on_rt_mutex(struct rt_mutex *lock, /* * Wake up the next waiter on the lock. * - * Remove the top waiter from the current tasks waiter list and from - * the lock waiter list. Set it as pending owner. Then wake it up. + * Remove the top waiter from the current tasks waiter list and wake it up. * * Called with lock->wait_lock held. */ static void wakeup_next_waiter(struct rt_mutex *lock) { struct rt_mutex_waiter *waiter; - struct task_struct *pendowner; unsigned long flags; raw_spin_lock_irqsave(¤t->pi_lock, flags); waiter = rt_mutex_top_waiter(lock); - plist_del(&waiter->list_entry, &lock->wait_list); /* * Remove it from current->pi_waiters. We do not adjust a @@ -495,43 +473,19 @@ static void wakeup_next_waiter(struct rt_mutex *lock) * lock->wait_lock. */ plist_del(&waiter->pi_list_entry, ¤t->pi_waiters); - pendowner = waiter->task; - waiter->task = NULL; - rt_mutex_set_owner(lock, pendowner, RT_MUTEX_OWNER_PENDING); + rt_mutex_set_owner(lock, NULL); raw_spin_unlock_irqrestore(¤t->pi_lock, flags); - /* - * Clear the pi_blocked_on variable and enqueue a possible - * waiter into the pi_waiters list of the pending owner. This - * prevents that in case the pending owner gets unboosted a - * waiter with higher priority than pending-owner->normal_prio - * is blocked on the unboosted (pending) owner. - */ - raw_spin_lock_irqsave(&pendowner->pi_lock, flags); - - WARN_ON(!pendowner->pi_blocked_on); - WARN_ON(pendowner->pi_blocked_on != waiter); - WARN_ON(pendowner->pi_blocked_on->lock != lock); - - pendowner->pi_blocked_on = NULL; - - if (rt_mutex_has_waiters(lock)) { - struct rt_mutex_waiter *next; - - next = rt_mutex_top_waiter(lock); - plist_add(&next->pi_list_entry, &pendowner->pi_waiters); - } - raw_spin_unlock_irqrestore(&pendowner->pi_lock, flags); - - wake_up_process(pendowner); + wake_up_process(waiter->task); } /* - * Remove a waiter from a lock + * Remove a waiter from a lock and give up * - * Must be called with lock->wait_lock held + * Must be called with lock->wait_lock held and + * have just failed to try_to_take_rt_mutex(). */ static void remove_waiter(struct rt_mutex *lock, struct rt_mutex_waiter *waiter) @@ -543,11 +497,13 @@ static void remove_waiter(struct rt_mutex *lock, raw_spin_lock_irqsave(¤t->pi_lock, flags); plist_del(&waiter->list_entry, &lock->wait_list); - waiter->task = NULL; current->pi_blocked_on = NULL; raw_spin_unlock_irqrestore(¤t->pi_lock, flags); - if (first && owner != current) { + if (!owner) + return; + + if (first) { raw_spin_lock_irqsave(&owner->pi_lock, flags); @@ -614,21 +570,19 @@ void rt_mutex_adjust_pi(struct task_struct *task) * or TASK_UNINTERRUPTIBLE) * @timeout: the pre-initialized and started timer, or NULL for none * @waiter: the pre-initialized rt_mutex_waiter - * @detect_deadlock: passed to task_blocks_on_rt_mutex * * lock->wait_lock must be held by the caller. */ static int __sched __rt_mutex_slowlock(struct rt_mutex *lock, int state, struct hrtimer_sleeper *timeout, - struct rt_mutex_waiter *waiter, - int detect_deadlock) + struct rt_mutex_waiter *waiter) { int ret = 0; for (;;) { /* Try to acquire the lock: */ - if (try_to_take_rt_mutex(lock)) + if (try_to_take_rt_mutex(lock, current, waiter)) break; /* @@ -645,39 +599,11 @@ __rt_mutex_slowlock(struct rt_mutex *lock, int state, break; } - /* - * waiter->task is NULL the first time we come here and - * when we have been woken up by the previous owner - * but the lock got stolen by a higher prio task. - */ - if (!waiter->task) { - ret = task_blocks_on_rt_mutex(lock, waiter, current, - detect_deadlock); - /* - * If we got woken up by the owner then start loop - * all over without going into schedule to try - * to get the lock now: - */ - if (unlikely(!waiter->task)) { - /* - * Reset the return value. We might - * have returned with -EDEADLK and the - * owner released the lock while we - * were walking the pi chain. - */ - ret = 0; - continue; - } - if (unlikely(ret)) - break; - } - raw_spin_unlock(&lock->wait_lock); debug_rt_mutex_print_deadlock(waiter); - if (waiter->task) - schedule_rt_mutex(lock); + schedule_rt_mutex(lock); raw_spin_lock(&lock->wait_lock); set_current_state(state); @@ -698,12 +624,11 @@ rt_mutex_slowlock(struct rt_mutex *lock, int state, int ret = 0; debug_rt_mutex_init_waiter(&waiter); - waiter.task = NULL; raw_spin_lock(&lock->wait_lock); /* Try to acquire the lock again: */ - if (try_to_take_rt_mutex(lock)) { + if (try_to_take_rt_mutex(lock, current, NULL)) { raw_spin_unlock(&lock->wait_lock); return 0; } @@ -717,12 +642,14 @@ rt_mutex_slowlock(struct rt_mutex *lock, int state, timeout->task = NULL; } - ret = __rt_mutex_slowlock(lock, state, timeout, &waiter, - detect_deadlock); + ret = task_blocks_on_rt_mutex(lock, &waiter, current, detect_deadlock); + + if (likely(!ret)) + ret = __rt_mutex_slowlock(lock, state, timeout, &waiter); set_current_state(TASK_RUNNING); - if (unlikely(waiter.task)) + if (unlikely(ret)) remove_waiter(lock, &waiter); /* @@ -737,14 +664,6 @@ rt_mutex_slowlock(struct rt_mutex *lock, int state, if (unlikely(timeout)) hrtimer_cancel(&timeout->timer); - /* - * Readjust priority, when we did not get the lock. We might - * have been the pending owner and boosted. Since we did not - * take the lock, the PI boost has to go. - */ - if (unlikely(ret)) - rt_mutex_adjust_prio(current); - debug_rt_mutex_free_waiter(&waiter); return ret; @@ -762,7 +681,7 @@ rt_mutex_slowtrylock(struct rt_mutex *lock) if (likely(rt_mutex_owner(lock) != current)) { - ret = try_to_take_rt_mutex(lock); + ret = try_to_take_rt_mutex(lock, current, NULL); /* * try_to_take_rt_mutex() sets the lock waiters * bit unconditionally. Clean this up. @@ -992,7 +911,7 @@ void rt_mutex_init_proxy_locked(struct rt_mutex *lock, { __rt_mutex_init(lock, NULL); debug_rt_mutex_proxy_lock(lock, proxy_owner); - rt_mutex_set_owner(lock, proxy_owner, 0); + rt_mutex_set_owner(lock, proxy_owner); rt_mutex_deadlock_account_lock(lock, proxy_owner); } @@ -1008,7 +927,7 @@ void rt_mutex_proxy_unlock(struct rt_mutex *lock, struct task_struct *proxy_owner) { debug_rt_mutex_proxy_unlock(lock); - rt_mutex_set_owner(lock, NULL, 0); + rt_mutex_set_owner(lock, NULL); rt_mutex_deadlock_account_unlock(proxy_owner); } @@ -1034,20 +953,14 @@ int rt_mutex_start_proxy_lock(struct rt_mutex *lock, raw_spin_lock(&lock->wait_lock); - mark_rt_mutex_waiters(lock); - - if (!rt_mutex_owner(lock) || try_to_steal_lock(lock, task)) { - /* We got the lock for task. */ - debug_rt_mutex_lock(lock); - rt_mutex_set_owner(lock, task, 0); + if (try_to_take_rt_mutex(lock, task, NULL)) { raw_spin_unlock(&lock->wait_lock); - rt_mutex_deadlock_account_lock(lock, task); return 1; } ret = task_blocks_on_rt_mutex(lock, waiter, task, detect_deadlock); - if (ret && !waiter->task) { + if (ret && !rt_mutex_owner(lock)) { /* * Reset the return value. We might have * returned with -EDEADLK and the owner @@ -1056,6 +969,10 @@ int rt_mutex_start_proxy_lock(struct rt_mutex *lock, */ ret = 0; } + + if (unlikely(ret)) + remove_waiter(lock, waiter); + raw_spin_unlock(&lock->wait_lock); debug_rt_mutex_print_deadlock(waiter); @@ -1110,12 +1027,11 @@ int rt_mutex_finish_proxy_lock(struct rt_mutex *lock, set_current_state(TASK_INTERRUPTIBLE); - ret = __rt_mutex_slowlock(lock, TASK_INTERRUPTIBLE, to, waiter, - detect_deadlock); + ret = __rt_mutex_slowlock(lock, TASK_INTERRUPTIBLE, to, waiter); set_current_state(TASK_RUNNING); - if (unlikely(waiter->task)) + if (unlikely(ret)) remove_waiter(lock, waiter); /* @@ -1126,13 +1042,5 @@ int rt_mutex_finish_proxy_lock(struct rt_mutex *lock, raw_spin_unlock(&lock->wait_lock); - /* - * Readjust priority, when we did not get the lock. We might have been - * the pending owner and boosted. Since we did not take the lock, the - * PI boost has to go. - */ - if (unlikely(ret)) - rt_mutex_adjust_prio(current); - return ret; } diff --git a/kernel/rtmutex_common.h b/kernel/rtmutex_common.h index 97a2f81866af..53a66c85261b 100644 --- a/kernel/rtmutex_common.h +++ b/kernel/rtmutex_common.h @@ -91,9 +91,8 @@ task_top_pi_waiter(struct task_struct *p) /* * lock->owner state tracking: */ -#define RT_MUTEX_OWNER_PENDING 1UL -#define RT_MUTEX_HAS_WAITERS 2UL -#define RT_MUTEX_OWNER_MASKALL 3UL +#define RT_MUTEX_HAS_WAITERS 1UL +#define RT_MUTEX_OWNER_MASKALL 1UL static inline struct task_struct *rt_mutex_owner(struct rt_mutex *lock) { @@ -101,17 +100,6 @@ static inline struct task_struct *rt_mutex_owner(struct rt_mutex *lock) ((unsigned long)lock->owner & ~RT_MUTEX_OWNER_MASKALL); } -static inline struct task_struct *rt_mutex_real_owner(struct rt_mutex *lock) -{ - return (struct task_struct *) - ((unsigned long)lock->owner & ~RT_MUTEX_HAS_WAITERS); -} - -static inline unsigned long rt_mutex_owner_pending(struct rt_mutex *lock) -{ - return (unsigned long)lock->owner & RT_MUTEX_OWNER_PENDING; -} - /* * PI-futex support (proxy locking functions, etc.): */ -- cgit v1.2.3-71-gd317 From 1fb0ef31f428f345a7c3666f8e7444a563edd537 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 31 Jan 2011 08:57:41 +0100 Subject: genirq: Fix affinity notifier fallout The new code of commit cd7eab44e(genirq: Add IRQ affinity notifiers) references irq_desc.affinity which fails to compile with CONFIG_GENERIC_HARDIRQS_NO_DEPRECATED=y. Use irq_desc.irq_data.affinity instead. Signed-off-by: Thomas Gleixner Cc: Ben Hutchings --- kernel/irq/manage.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index 0587c5ceaed8..538fce2db51c 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c @@ -179,7 +179,7 @@ static void irq_affinity_notify(struct work_struct *work) cpumask_copy(cpumask, desc->pending_mask); else #endif - cpumask_copy(cpumask, desc->affinity); + cpumask_copy(cpumask, desc->irq_data.affinity); raw_spin_unlock_irqrestore(&desc->lock, flags); notify->notify(notify, cpumask); -- cgit v1.2.3-71-gd317 From 4135038a582c20ffdadfcf6564852e0b72a20968 Mon Sep 17 00:00:00 2001 From: Marcin Slusarz Date: Fri, 28 Jan 2011 11:00:31 -0500 Subject: watchdog: Fix broken nowatchdog logic Passing nowatchdog to kernel disables 2 things: creation of watchdog threads AND initialization of percpu watchdog_hrtimer. As hrtimers are initialized only at boot it's not possible to enable watchdog later - for me all watchdog threads started to eat 100% of CPU time, but they could just crash. Additionally, even if these threads would start properly, watchdog_disable_all_cpus was guarded by no_watchdog check, so you couldn't disable watchdog. To fix this, remove no_watchdog variable and use already existing watchdog_enabled variable. Signed-off-by: Marcin Slusarz [ removed another no_watchdog instance ] Signed-off-by: Don Zickus Cc: Stephane Eranian Cc: Peter Zijlstra Cc: Frederic Weisbecker Cc: LKML-Reference: <1296230433-6261-1-git-send-email-dzickus@redhat.com> Signed-off-by: Ingo Molnar --- kernel/watchdog.c | 20 ++++++-------------- 1 file changed, 6 insertions(+), 14 deletions(-) (limited to 'kernel') diff --git a/kernel/watchdog.c b/kernel/watchdog.c index d7ebdf4cea98..d9961ea1c3f4 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c @@ -27,7 +27,7 @@ #include #include -int watchdog_enabled; +int watchdog_enabled = 1; int __read_mostly softlockup_thresh = 60; static DEFINE_PER_CPU(unsigned long, watchdog_touch_ts); @@ -43,9 +43,6 @@ static DEFINE_PER_CPU(unsigned long, hrtimer_interrupts_saved); static DEFINE_PER_CPU(struct perf_event *, watchdog_ev); #endif -static int no_watchdog; - - /* boot commands */ /* * Should we panic when a soft-lockup or hard-lockup occurs: @@ -58,7 +55,7 @@ static int __init hardlockup_panic_setup(char *str) if (!strncmp(str, "panic", 5)) hardlockup_panic = 1; else if (!strncmp(str, "0", 1)) - no_watchdog = 1; + watchdog_enabled = 0; return 1; } __setup("nmi_watchdog=", hardlockup_panic_setup); @@ -77,7 +74,7 @@ __setup("softlockup_panic=", softlockup_panic_setup); static int __init nowatchdog_setup(char *str) { - no_watchdog = 1; + watchdog_enabled = 0; return 1; } __setup("nowatchdog", nowatchdog_setup); @@ -85,7 +82,7 @@ __setup("nowatchdog", nowatchdog_setup); /* deprecated */ static int __init nosoftlockup_setup(char *str) { - no_watchdog = 1; + watchdog_enabled = 0; return 1; } __setup("nosoftlockup", nosoftlockup_setup); @@ -476,9 +473,6 @@ static void watchdog_disable_all_cpus(void) { int cpu; - if (no_watchdog) - return; - for_each_online_cpu(cpu) watchdog_disable(cpu); @@ -530,7 +524,8 @@ cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) break; case CPU_ONLINE: case CPU_ONLINE_FROZEN: - err = watchdog_enable(hotcpu); + if (watchdog_enabled) + err = watchdog_enable(hotcpu); break; #ifdef CONFIG_HOTPLUG_CPU case CPU_UP_CANCELED: @@ -555,9 +550,6 @@ void __init lockup_detector_init(void) void *cpu = (void *)(long)smp_processor_id(); int err; - if (no_watchdog) - return; - err = cpu_callback(&cpu_nfb, CPU_UP_PREPARE, cpu); WARN_ON(notifier_to_errno(err)); -- cgit v1.2.3-71-gd317 From 397357666de6b5b6adb5fa99f9758ec8cf30ac34 Mon Sep 17 00:00:00 2001 From: Marcin Slusarz Date: Fri, 28 Jan 2011 11:00:32 -0500 Subject: watchdog: Fix sysctl consistency If it was not possible to enable watchdog for any cpu, switch watchdog_enabled back to 0, because it's visible via kernel.watchdog sysctl. Signed-off-by: Marcin Slusarz Signed-off-by: Don Zickus Cc: Peter Zijlstra Cc: Frederic Weisbecker Cc: LKML-Reference: <1296230433-6261-2-git-send-email-dzickus@redhat.com> Signed-off-by: Ingo Molnar --- kernel/watchdog.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) (limited to 'kernel') diff --git a/kernel/watchdog.c b/kernel/watchdog.c index d9961ea1c3f4..c7e0049344bd 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c @@ -429,9 +429,6 @@ static int watchdog_enable(int cpu) wake_up_process(p); } - /* if any cpu succeeds, watchdog is considered enabled for the system */ - watchdog_enabled = 1; - return 0; } @@ -459,12 +456,16 @@ static void watchdog_disable(int cpu) static void watchdog_enable_all_cpus(void) { int cpu; - int result = 0; + + watchdog_enabled = 0; for_each_online_cpu(cpu) - result += watchdog_enable(cpu); + if (!watchdog_enable(cpu)) + /* if any cpu succeeds, watchdog is considered + enabled for the system */ + watchdog_enabled = 1; - if (result) + if (!watchdog_enabled) printk(KERN_ERR "watchdog: failed to be enabled on some cpus\n"); } -- cgit v1.2.3-71-gd317 From 9ffdc6c37df131f89d52001e0ef03091b158826f Mon Sep 17 00:00:00 2001 From: Marcin Slusarz Date: Fri, 28 Jan 2011 11:00:33 -0500 Subject: watchdog: Don't change watchdog state on read of sysctl Signed-off-by: Marcin Slusarz [ add {}'s to fix a warning ] Signed-off-by: Don Zickus Cc: Peter Zijlstra Cc: Frederic Weisbecker Cc: LKML-Reference: <1296230433-6261-3-git-send-email-dzickus@redhat.com> Signed-off-by: Ingo Molnar --- kernel/watchdog.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'kernel') diff --git a/kernel/watchdog.c b/kernel/watchdog.c index c7e0049344bd..f37f974aa81b 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c @@ -493,10 +493,12 @@ int proc_dowatchdog_enabled(struct ctl_table *table, int write, { proc_dointvec(table, write, buffer, length, ppos); - if (watchdog_enabled) - watchdog_enable_all_cpus(); - else - watchdog_disable_all_cpus(); + if (write) { + if (watchdog_enabled) + watchdog_enable_all_cpus(); + else + watchdog_disable_all_cpus(); + } return 0; } -- cgit v1.2.3-71-gd317 From 871cf1e5f2a17702f58539a3af8b18fc8666ad4c Mon Sep 17 00:00:00 2001 From: Torben Hohn Date: Thu, 27 Jan 2011 15:58:55 +0100 Subject: time: Move do_timer() to kernel/time/timekeeping.c do_timer() is primary timekeeping related. calc_global_load() is called from do_timer() as well, but that's more for historical reasons. [ tglx: Fixed up the calc_global_load() reject andmassaged changelog ] Signed-off-by: Torben Hohn Cc: Peter Zijlstra Cc: johnstul@us.ibm.com Cc: yong.zhang0@gmail.com Cc: hch@infradead.org LKML-Reference: <20110127145855.23248.56933.stgit@localhost> Signed-off-by: Thomas Gleixner --- include/linux/time.h | 1 - kernel/time/timekeeping.c | 14 +++++++++++++- kernel/timer.c | 13 ------------- 3 files changed, 13 insertions(+), 15 deletions(-) (limited to 'kernel') diff --git a/include/linux/time.h b/include/linux/time.h index 1e6d3b59238d..86a9c487fdd8 100644 --- a/include/linux/time.h +++ b/include/linux/time.h @@ -166,7 +166,6 @@ extern void monotonic_to_bootbased(struct timespec *ts); extern struct timespec timespec_trunc(struct timespec t, unsigned gran); extern int timekeeping_valid_for_hres(void); extern u64 timekeeping_max_deferment(void); -extern void update_wall_time(void); extern void timekeeping_leap_insert(int leapsecond); struct tms; diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index d27c7562902c..c1a178ca0f50 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -779,7 +779,7 @@ static cycle_t logarithmic_accumulation(cycle_t offset, int shift) * * Called from the timer interrupt, must hold a write on xtime_lock. */ -void update_wall_time(void) +static void update_wall_time(void) { struct clocksource *clock; cycle_t offset; @@ -946,3 +946,15 @@ struct timespec get_monotonic_coarse(void) now.tv_nsec + mono.tv_nsec); return now; } + +/* + * The 64-bit jiffies value is not atomic - you MUST NOT read it + * without sampling the sequence number in xtime_lock. + * jiffies is defined in the linker script... + */ +void do_timer(unsigned long ticks) +{ + jiffies_64 += ticks; + update_wall_time(); + calc_global_load(ticks); +} diff --git a/kernel/timer.c b/kernel/timer.c index 43ca9936f2d0..87f656cc2a55 100644 --- a/kernel/timer.c +++ b/kernel/timer.c @@ -1293,19 +1293,6 @@ void run_local_timers(void) raise_softirq(TIMER_SOFTIRQ); } -/* - * The 64-bit jiffies value is not atomic - you MUST NOT read it - * without sampling the sequence number in xtime_lock. - * jiffies is defined in the linker script... - */ - -void do_timer(unsigned long ticks) -{ - jiffies_64 += ticks; - update_wall_time(); - calc_global_load(ticks); -} - #ifdef __ARCH_WANT_SYS_ALARM /* -- cgit v1.2.3-71-gd317 From fbad1ea94159a71bc0f68b00e57ae803606af9fb Mon Sep 17 00:00:00 2001 From: Torben Hohn Date: Thu, 27 Jan 2011 15:59:00 +0100 Subject: time: Move get_jiffies_64 to kernel/time/jiffies.c Move the jiffies access functions to the jiffies clocksource code. [ tglx: Add missing include ] Signed-off-by: Torben Hohn Cc: Peter Zijlstra Cc: johnstul@us.ibm.com Cc: yong.zhang0@gmail.com Cc: hch@infradead.org LKML-Reference: <20110127145900.23248.73352.stgit@localhost> Signed-off-by: Thomas Gleixner --- kernel/time.c | 17 ----------------- kernel/time/jiffies.c | 18 ++++++++++++++++++ 2 files changed, 18 insertions(+), 17 deletions(-) (limited to 'kernel') diff --git a/kernel/time.c b/kernel/time.c index 32174359576f..a31b51220ac6 100644 --- a/kernel/time.c +++ b/kernel/time.c @@ -674,23 +674,6 @@ unsigned long nsecs_to_jiffies(u64 n) #endif } -#if (BITS_PER_LONG < 64) -u64 get_jiffies_64(void) -{ - unsigned long seq; - u64 ret; - - do { - seq = read_seqbegin(&xtime_lock); - ret = jiffies_64; - } while (read_seqretry(&xtime_lock, seq)); - return ret; -} -EXPORT_SYMBOL(get_jiffies_64); -#endif - -EXPORT_SYMBOL(jiffies); - /* * Add two timespec values and do a safety check for overflow. * It's assumed that both values are valid (>= 0) diff --git a/kernel/time/jiffies.c b/kernel/time/jiffies.c index 5404a8456909..2fbc20744797 100644 --- a/kernel/time/jiffies.c +++ b/kernel/time/jiffies.c @@ -22,6 +22,7 @@ ************************************************************************/ #include #include +#include #include /* The Jiffies based clocksource is the lowest common @@ -64,6 +65,23 @@ struct clocksource clocksource_jiffies = { .shift = JIFFIES_SHIFT, }; +#if (BITS_PER_LONG < 64) +u64 get_jiffies_64(void) +{ + unsigned long seq; + u64 ret; + + do { + seq = read_seqbegin(&xtime_lock); + ret = jiffies_64; + } while (read_seqretry(&xtime_lock, seq)); + return ret; +} +EXPORT_SYMBOL(get_jiffies_64); +#endif + +EXPORT_SYMBOL(jiffies); + static int __init init_jiffies_clocksource(void) { return clocksource_register(&clocksource_jiffies); -- cgit v1.2.3-71-gd317 From 48cf76f7104f655bbd48a75c7759dce82c3e1ab6 Mon Sep 17 00:00:00 2001 From: Torben Hohn Date: Thu, 27 Jan 2011 15:59:05 +0100 Subject: time: Provide get_xtime_and_monotonic_offset() The hrtimer code accesses timekeeping variables under xtime_lock. Provide a sensible accessor function and use it. [ tglx: Removed the conditionals, unused variable, fixed codingstyle and massaged changelog ] Signed-off-by: Torben Hohn Cc: Peter Zijlstra Cc: johnstul@us.ibm.com Cc: yong.zhang0@gmail.com Cc: hch@infradead.org LKML-Reference: <20110127145905.23248.30458.stgit@localhost> Signed-off-by: Thomas Gleixner --- include/linux/time.h | 1 + kernel/hrtimer.c | 13 ++----------- kernel/time/timekeeping.c | 16 ++++++++++++++++ 3 files changed, 19 insertions(+), 11 deletions(-) (limited to 'kernel') diff --git a/include/linux/time.h b/include/linux/time.h index 86a9c487fdd8..4007a12a1b50 100644 --- a/include/linux/time.h +++ b/include/linux/time.h @@ -127,6 +127,7 @@ struct timespec current_kernel_time(void); struct timespec __current_kernel_time(void); /* does not take xtime_lock */ struct timespec __get_wall_to_monotonic(void); /* does not take xtime_lock */ struct timespec get_monotonic_coarse(void); +void get_xtime_and_monotonic_offset(struct timespec *xtim, struct timespec *wtom); #define CURRENT_TIME (current_kernel_time()) #define CURRENT_TIME_SEC ((struct timespec) { get_seconds(), 0 }) diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c index 0c8d7c048615..57c4d33c9a9d 100644 --- a/kernel/hrtimer.c +++ b/kernel/hrtimer.c @@ -85,13 +85,8 @@ static void hrtimer_get_softirq_time(struct hrtimer_cpu_base *base) { ktime_t xtim, tomono; struct timespec xts, tom; - unsigned long seq; - do { - seq = read_seqbegin(&xtime_lock); - xts = __current_kernel_time(); - tom = __get_wall_to_monotonic(); - } while (read_seqretry(&xtime_lock, seq)); + get_xtime_and_monotonic_offset(&xts, &tom); xtim = timespec_to_ktime(xts); tomono = timespec_to_ktime(tom); @@ -612,15 +607,11 @@ static void retrigger_next_event(void *arg) { struct hrtimer_cpu_base *base; struct timespec realtime_offset, wtm; - unsigned long seq; if (!hrtimer_hres_active()) return; - do { - seq = read_seqbegin(&xtime_lock); - wtm = __get_wall_to_monotonic(); - } while (read_seqretry(&xtime_lock, seq)); + get_xtime_and_monotonic_offset(&realtime_offset, &wtm); set_normalized_timespec(&realtime_offset, -wtm.tv_sec, -wtm.tv_nsec); base = &__get_cpu_var(hrtimer_bases); diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index c1a178ca0f50..c50aaf6cd01d 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -958,3 +958,19 @@ void do_timer(unsigned long ticks) update_wall_time(); calc_global_load(ticks); } + +/** + * get_xtime_and_monotonic_offset() - get xtime and wall_to_monotonic + * @xtim: pointer to timespec to be set with xtime + * @wtom: pointer to timespec to be set with wall_to_monotonic + */ +void get_xtime_and_monotonic_offset(struct timespec *xtim, struct timespec *wtom) +{ + unsigned long seq; + + do { + seq = read_seqbegin(&xtime_lock); + *xtim = xtime; + *wtom = wall_to_monotonic; + } while (read_seqretry(&xtime_lock, seq)); +} -- cgit v1.2.3-71-gd317 From 79ecaf0d15344d78904becf0f25de3fc9b49d430 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 31 Jan 2011 11:07:54 +0100 Subject: time: Remove unused __get_wall_to_monotonic() No users left. Remove it. Signed-off-by: Thomas Gleixner --- include/linux/time.h | 1 - kernel/time/timekeeping.c | 5 ----- 2 files changed, 6 deletions(-) (limited to 'kernel') diff --git a/include/linux/time.h b/include/linux/time.h index 4007a12a1b50..ce29c86882b1 100644 --- a/include/linux/time.h +++ b/include/linux/time.h @@ -125,7 +125,6 @@ extern int timekeeping_suspended; unsigned long get_seconds(void); struct timespec current_kernel_time(void); struct timespec __current_kernel_time(void); /* does not take xtime_lock */ -struct timespec __get_wall_to_monotonic(void); /* does not take xtime_lock */ struct timespec get_monotonic_coarse(void); void get_xtime_and_monotonic_offset(struct timespec *xtim, struct timespec *wtom); diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index c50aaf6cd01d..8da35d1b9e16 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -910,11 +910,6 @@ struct timespec __current_kernel_time(void) return xtime; } -struct timespec __get_wall_to_monotonic(void) -{ - return wall_to_monotonic; -} - struct timespec current_kernel_time(void) { struct timespec now; -- cgit v1.2.3-71-gd317 From f0af911a9dec9de702645182c8d269449e24d24b Mon Sep 17 00:00:00 2001 From: Torben Hohn Date: Thu, 27 Jan 2011 15:59:10 +0100 Subject: time: Provide xtime_update() xtime_update() takes xtime_lock write locked and calls do_timer(). Provided to replace the do_timer() calls in the architecture code. Signed-off-by: Torben Hohn Cc: Peter Zijlstra Cc: johnstul@us.ibm.com Cc: yong.zhang0@gmail.com Cc: hch@infradead.org LKML-Reference: <20110127145910.23248.21379.stgit@localhost> Signed-off-by: Thomas Gleixner --- include/linux/sched.h | 1 + kernel/time/timekeeping.c | 13 +++++++++++++ 2 files changed, 14 insertions(+) (limited to 'kernel') diff --git a/include/linux/sched.h b/include/linux/sched.h index d747f948b34e..9d9a0787eed3 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -2050,6 +2050,7 @@ extern void release_uids(struct user_namespace *ns); #include extern void do_timer(unsigned long ticks); +extern void xtime_update(unsigned long ticks); extern int wake_up_state(struct task_struct *tsk, unsigned int state); extern int wake_up_process(struct task_struct *tsk); diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index 8da35d1b9e16..02c13a313d15 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -969,3 +969,16 @@ void get_xtime_and_monotonic_offset(struct timespec *xtim, struct timespec *wtom *wtom = wall_to_monotonic; } while (read_seqretry(&xtime_lock, seq)); } + +/** + * xtime_update() - advances the timekeeping infrastructure + * @ticks: number of ticks, that have elapsed since the last call. + * + * Must be called with interrupts disabled. + */ +void xtime_update(unsigned long ticks) +{ + write_seqlock(&xtime_lock); + do_timer(ticks); + write_sequnlock(&xtime_lock); +} -- cgit v1.2.3-71-gd317 From e2830b5c1b2b2217894370a3b95af87d4a958401 Mon Sep 17 00:00:00 2001 From: Torben Hohn Date: Thu, 27 Jan 2011 16:00:32 +0100 Subject: time: Make do_timer() and xtime_lock local to kernel/time/ All callers of do_timer() are converted to xtime_update(). The only users of xtime_lock are in kernel/time/. Make both local to kernel/time/ and remove them from the global header files. [ tglx: Reuse tick-internal.h instead of creating another local header file. Massaged changelog ] Signed-off-by: Torben Hohn Cc: Peter Zijlstra Cc: johnstul@us.ibm.com Cc: yong.zhang0@gmail.com Cc: hch@infradead.org Signed-off-by: Thomas Gleixner --- include/linux/sched.h | 1 - include/linux/time.h | 2 -- kernel/time/clockevents.c | 1 - kernel/time/jiffies.c | 2 ++ kernel/time/ntp.c | 2 ++ kernel/time/tick-broadcast.c | 1 - kernel/time/tick-common.c | 1 - kernel/time/tick-internal.h | 5 +++++ kernel/time/tick-oneshot.c | 1 - kernel/time/tick-sched.c | 1 - 10 files changed, 9 insertions(+), 8 deletions(-) (limited to 'kernel') diff --git a/include/linux/sched.h b/include/linux/sched.h index 9d9a0787eed3..cdef640aa446 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -2049,7 +2049,6 @@ extern void release_uids(struct user_namespace *ns); #include -extern void do_timer(unsigned long ticks); extern void xtime_update(unsigned long ticks); extern int wake_up_state(struct task_struct *tsk, unsigned int state); diff --git a/include/linux/time.h b/include/linux/time.h index ce29c86882b1..38c5206c2673 100644 --- a/include/linux/time.h +++ b/include/linux/time.h @@ -113,8 +113,6 @@ static inline struct timespec timespec_sub(struct timespec lhs, #define timespec_valid(ts) \ (((ts)->tv_sec >= 0) && (((unsigned long) (ts)->tv_nsec) < NSEC_PER_SEC)) -extern seqlock_t xtime_lock; - extern void read_persistent_clock(struct timespec *ts); extern void read_boot_clock(struct timespec *ts); extern int update_persistent_clock(struct timespec now); diff --git a/kernel/time/clockevents.c b/kernel/time/clockevents.c index d7395fdfb9f3..0d74b9ba90c8 100644 --- a/kernel/time/clockevents.c +++ b/kernel/time/clockevents.c @@ -18,7 +18,6 @@ #include #include #include -#include #include "tick-internal.h" diff --git a/kernel/time/jiffies.c b/kernel/time/jiffies.c index 2fbc20744797..b2fa506667c0 100644 --- a/kernel/time/jiffies.c +++ b/kernel/time/jiffies.c @@ -25,6 +25,8 @@ #include #include +#include "tick-internal.h" + /* The Jiffies based clocksource is the lowest common * denominator clock source which should function on * all systems. It has the same coarse resolution as diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c index 5c00242fa921..ed8cfdf16983 100644 --- a/kernel/time/ntp.c +++ b/kernel/time/ntp.c @@ -16,6 +16,8 @@ #include #include +#include "tick-internal.h" + /* * NTP timekeeping variables: */ diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c index 48b2761b5668..92ef9a54f0a4 100644 --- a/kernel/time/tick-broadcast.c +++ b/kernel/time/tick-broadcast.c @@ -18,7 +18,6 @@ #include #include #include -#include #include "tick-internal.h" diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c index 051bc80a0c43..0e98fac3d479 100644 --- a/kernel/time/tick-common.c +++ b/kernel/time/tick-common.c @@ -18,7 +18,6 @@ #include #include #include -#include #include diff --git a/kernel/time/tick-internal.h b/kernel/time/tick-internal.h index 290eefbc1f60..28c578568c9d 100644 --- a/kernel/time/tick-internal.h +++ b/kernel/time/tick-internal.h @@ -1,6 +1,8 @@ /* * tick internal variable and functions used by low/high res code */ +#include +#include #define TICK_DO_TIMER_NONE -1 #define TICK_DO_TIMER_BOOT -2 @@ -132,3 +134,6 @@ static inline int tick_device_is_functional(struct clock_event_device *dev) { return !(dev->features & CLOCK_EVT_FEAT_DUMMY); } + +extern void do_timer(unsigned long ticks); +extern seqlock_t xtime_lock; diff --git a/kernel/time/tick-oneshot.c b/kernel/time/tick-oneshot.c index 5cbc101f908b..2d04411a5f05 100644 --- a/kernel/time/tick-oneshot.c +++ b/kernel/time/tick-oneshot.c @@ -18,7 +18,6 @@ #include #include #include -#include #include "tick-internal.h" diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index c55ea2433471..d5097c44b407 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -19,7 +19,6 @@ #include #include #include -#include #include #include -- cgit v1.2.3-71-gd317 From 7cf37e87dd2cfa17a64f28ea7f31eed4525f79e4 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 1 Feb 2011 09:34:58 +0100 Subject: time: Fix legacy arch fallout The xtime/dotimer cleanup broke architectures which do not implement clockevents. Time to send out another __do_IRQ threat. Signed-off-by: Thomas Gleixner Reported-by: Ingo Molnar Cc: Torben Hohn Cc: Peter Zijlstra Cc: johnstul@us.ibm.com Cc: yong.zhang0@gmail.com Cc: hch@infradead.org LKML-Reference: <20110127145905.23248.30458.stgit@localhost> Signed-off-by: Ingo Molnar --- kernel/time/tick-internal.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'kernel') diff --git a/kernel/time/tick-internal.h b/kernel/time/tick-internal.h index 28c578568c9d..f77b93df0006 100644 --- a/kernel/time/tick-internal.h +++ b/kernel/time/tick-internal.h @@ -4,6 +4,8 @@ #include #include +#ifdef CONFIG_GENERIC_CLOCKEVENTS_BUILD + #define TICK_DO_TIMER_NONE -1 #define TICK_DO_TIMER_BOOT -2 @@ -135,5 +137,7 @@ static inline int tick_device_is_functional(struct clock_event_device *dev) return !(dev->features & CLOCK_EVT_FEAT_DUMMY); } +#endif + extern void do_timer(unsigned long ticks); extern seqlock_t xtime_lock; -- cgit v1.2.3-71-gd317 From 4916ca401e3051dad326ddd69765bd0e3f32fb9b Mon Sep 17 00:00:00 2001 From: Lucian Adrian Grijincu Date: Tue, 1 Feb 2011 18:44:56 +0200 Subject: security: remove unused security_sysctl hook The only user for this hook was selinux. sysctl routes every call through /proc/sys/. Selinux and other security modules use the file system checks for sysctl too, so no need for this hook any more. Signed-off-by: Lucian Adrian Grijincu Signed-off-by: Eric Paris --- include/linux/security.h | 13 ------------- kernel/sysctl.c | 5 ----- security/capability.c | 6 ------ security/security.c | 5 ----- 4 files changed, 29 deletions(-) (limited to 'kernel') diff --git a/include/linux/security.h b/include/linux/security.h index 05dd5a64aa76..14167f2eb35a 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -1259,12 +1259,6 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts) * @cap contains the capability . * @audit: Whether to write an audit message or not * Return 0 if the capability is granted for @tsk. - * @sysctl: - * Check permission before accessing the @table sysctl variable in the - * manner specified by @op. - * @table contains the ctl_table structure for the sysctl variable. - * @op contains the operation (001 = search, 002 = write, 004 = read). - * Return 0 if permission is granted. * @syslog: * Check permission before accessing the kernel message ring or changing * logging to the console. @@ -1385,7 +1379,6 @@ struct security_operations { const kernel_cap_t *permitted); int (*capable) (struct task_struct *tsk, const struct cred *cred, int cap, int audit); - int (*sysctl) (struct ctl_table *table, int op); int (*quotactl) (int cmds, int type, int id, struct super_block *sb); int (*quota_on) (struct dentry *dentry); int (*syslog) (int type); @@ -1668,7 +1661,6 @@ int security_capset(struct cred *new, const struct cred *old, int security_capable(int cap); int security_real_capable(struct task_struct *tsk, int cap); int security_real_capable_noaudit(struct task_struct *tsk, int cap); -int security_sysctl(struct ctl_table *table, int op); int security_quotactl(int cmds, int type, int id, struct super_block *sb); int security_quota_on(struct dentry *dentry); int security_syslog(int type); @@ -1887,11 +1879,6 @@ int security_real_capable_noaudit(struct task_struct *tsk, int cap) return ret; } -static inline int security_sysctl(struct ctl_table *table, int op) -{ - return 0; -} - static inline int security_quotactl(int cmds, int type, int id, struct super_block *sb) { diff --git a/kernel/sysctl.c b/kernel/sysctl.c index ae5cbb1e3ced..e24254c27eaf 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -1691,13 +1691,8 @@ static int test_perm(int mode, int op) int sysctl_perm(struct ctl_table_root *root, struct ctl_table *table, int op) { - int error; int mode; - error = security_sysctl(table, op & (MAY_READ | MAY_WRITE | MAY_EXEC)); - if (error) - return error; - if (root->permissions) mode = root->permissions(root, current->nsproxy, table); else diff --git a/security/capability.c b/security/capability.c index 383d14dc12ef..85b67c8632df 100644 --- a/security/capability.c +++ b/security/capability.c @@ -12,11 +12,6 @@ #include -static int cap_sysctl(ctl_table *table, int op) -{ - return 0; -} - static int cap_syslog(int type) { return 0; @@ -881,7 +876,6 @@ void __init security_fixup_ops(struct security_operations *ops) set_to_cap_if_null(ops, capable); set_to_cap_if_null(ops, quotactl); set_to_cap_if_null(ops, quota_on); - set_to_cap_if_null(ops, sysctl); set_to_cap_if_null(ops, syslog); set_to_cap_if_null(ops, settime); set_to_cap_if_null(ops, vm_enough_memory); diff --git a/security/security.c b/security/security.c index 4830f36e1ab5..8f28685ee0d9 100644 --- a/security/security.c +++ b/security/security.c @@ -182,11 +182,6 @@ int security_real_capable_noaudit(struct task_struct *tsk, int cap) return ret; } -int security_sysctl(struct ctl_table *table, int op) -{ - return security_ops->sysctl(table, op); -} - int security_quotactl(int cmds, int type, int id, struct super_block *sb) { return security_ops->quotactl(cmds, type, id, sb); -- cgit v1.2.3-71-gd317 From 1e6d767924c74929c0cfe839ae8f37bcee9e544e Mon Sep 17 00:00:00 2001 From: Richard Cochran Date: Tue, 1 Feb 2011 13:50:58 +0000 Subject: time: Correct the *settime* parameters Both settimeofday() and clock_settime() promise with a 'const' attribute not to alter the arguments passed in. This patch adds the missing 'const' attribute into the various kernel functions implementing these calls. Signed-off-by: Richard Cochran Acked-by: John Stultz LKML-Reference: <20110201134417.545698637@linutronix.de> Signed-off-by: Thomas Gleixner --- drivers/char/mmtimer.c | 2 +- include/linux/posix-timers.h | 5 +++-- include/linux/security.h | 9 +++++---- include/linux/time.h | 5 +++-- kernel/posix-timers.c | 4 ++-- kernel/time.c | 2 +- kernel/time/timekeeping.c | 2 +- security/commoncap.c | 2 +- security/security.c | 2 +- 9 files changed, 18 insertions(+), 15 deletions(-) (limited to 'kernel') diff --git a/drivers/char/mmtimer.c b/drivers/char/mmtimer.c index e6d75627c6c8..ecd0082502ef 100644 --- a/drivers/char/mmtimer.c +++ b/drivers/char/mmtimer.c @@ -487,7 +487,7 @@ static int sgi_clock_get(clockid_t clockid, struct timespec *tp) return 0; }; -static int sgi_clock_set(clockid_t clockid, struct timespec *tp) +static int sgi_clock_set(const clockid_t clockid, const struct timespec *tp) { u64 nsec; diff --git a/include/linux/posix-timers.h b/include/linux/posix-timers.h index 3e23844a6990..b2c14cbd47a6 100644 --- a/include/linux/posix-timers.h +++ b/include/linux/posix-timers.h @@ -69,7 +69,8 @@ struct k_itimer { struct k_clock { int res; /* in nanoseconds */ int (*clock_getres) (const clockid_t which_clock, struct timespec *tp); - int (*clock_set) (const clockid_t which_clock, struct timespec * tp); + int (*clock_set) (const clockid_t which_clock, + const struct timespec *tp); int (*clock_get) (const clockid_t which_clock, struct timespec * tp); int (*timer_create) (struct k_itimer *timer); int (*nsleep) (const clockid_t which_clock, int flags, @@ -89,7 +90,7 @@ void register_posix_clock(const clockid_t clock_id, struct k_clock *new_clock); /* error handlers for timer_create, nanosleep and settime */ int do_posix_clock_nonanosleep(const clockid_t, int flags, struct timespec *, struct timespec __user *); -int do_posix_clock_nosettime(const clockid_t, struct timespec *tp); +int do_posix_clock_nosettime(const clockid_t, const struct timespec *tp); /* function to call to trigger timer event */ int posix_timer_event(struct k_itimer *timr, int si_private); diff --git a/include/linux/security.h b/include/linux/security.h index c642bb8b8f5a..c096aa6fca60 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -53,7 +53,7 @@ struct audit_krule; */ extern int cap_capable(struct task_struct *tsk, const struct cred *cred, int cap, int audit); -extern int cap_settime(struct timespec *ts, struct timezone *tz); +extern int cap_settime(const struct timespec *ts, const struct timezone *tz); extern int cap_ptrace_access_check(struct task_struct *child, unsigned int mode); extern int cap_ptrace_traceme(struct task_struct *parent); extern int cap_capget(struct task_struct *target, kernel_cap_t *effective, kernel_cap_t *inheritable, kernel_cap_t *permitted); @@ -1387,7 +1387,7 @@ struct security_operations { int (*quotactl) (int cmds, int type, int id, struct super_block *sb); int (*quota_on) (struct dentry *dentry); int (*syslog) (int type); - int (*settime) (struct timespec *ts, struct timezone *tz); + int (*settime) (const struct timespec *ts, const struct timezone *tz); int (*vm_enough_memory) (struct mm_struct *mm, long pages); int (*bprm_set_creds) (struct linux_binprm *bprm); @@ -1669,7 +1669,7 @@ int security_sysctl(struct ctl_table *table, int op); int security_quotactl(int cmds, int type, int id, struct super_block *sb); int security_quota_on(struct dentry *dentry); int security_syslog(int type); -int security_settime(struct timespec *ts, struct timezone *tz); +int security_settime(const struct timespec *ts, const struct timezone *tz); int security_vm_enough_memory(long pages); int security_vm_enough_memory_mm(struct mm_struct *mm, long pages); int security_vm_enough_memory_kern(long pages); @@ -1904,7 +1904,8 @@ static inline int security_syslog(int type) return 0; } -static inline int security_settime(struct timespec *ts, struct timezone *tz) +static inline int security_settime(const struct timespec *ts, + const struct timezone *tz) { return cap_settime(ts, tz); } diff --git a/include/linux/time.h b/include/linux/time.h index 38c5206c2673..7c44e7778033 100644 --- a/include/linux/time.h +++ b/include/linux/time.h @@ -145,8 +145,9 @@ static inline u32 arch_gettimeoffset(void) { return 0; } #endif extern void do_gettimeofday(struct timeval *tv); -extern int do_settimeofday(struct timespec *tv); -extern int do_sys_settimeofday(struct timespec *tv, struct timezone *tz); +extern int do_settimeofday(const struct timespec *tv); +extern int do_sys_settimeofday(const struct timespec *tv, + const struct timezone *tz); #define do_posix_clock_monotonic_gettime(ts) ktime_get_ts(ts) extern long do_utimes(int dfd, const char __user *filename, struct timespec *times, int flags); struct itimerval; diff --git a/kernel/posix-timers.c b/kernel/posix-timers.c index 93bd2eb2bc53..21b7ca205f38 100644 --- a/kernel/posix-timers.c +++ b/kernel/posix-timers.c @@ -192,7 +192,7 @@ static int common_clock_get(clockid_t which_clock, struct timespec *tp) } static inline int common_clock_set(const clockid_t which_clock, - struct timespec *tp) + const struct timespec *tp) { return do_sys_settimeofday(tp, NULL); } @@ -928,7 +928,7 @@ void exit_itimers(struct signal_struct *sig) } /* Not available / possible... functions */ -int do_posix_clock_nosettime(const clockid_t clockid, struct timespec *tp) +int do_posix_clock_nosettime(const clockid_t clockid, const struct timespec *tp) { return -EINVAL; } diff --git a/kernel/time.c b/kernel/time.c index a31b51220ac6..5cb80533d8b5 100644 --- a/kernel/time.c +++ b/kernel/time.c @@ -150,7 +150,7 @@ static inline void warp_clock(void) * various programs will get confused when the clock gets warped. */ -int do_sys_settimeofday(struct timespec *tv, struct timezone *tz) +int do_sys_settimeofday(const struct timespec *tv, const struct timezone *tz) { static int firsttime = 1; int error = 0; diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index 02c13a313d15..4f9f65b91323 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -353,7 +353,7 @@ EXPORT_SYMBOL(do_gettimeofday); * * Sets the time of day to the new time and update NTP and notify hrtimers */ -int do_settimeofday(struct timespec *tv) +int do_settimeofday(const struct timespec *tv) { struct timespec ts_delta; unsigned long flags; diff --git a/security/commoncap.c b/security/commoncap.c index 64c2ed9c9015..dbfdaed4cc66 100644 --- a/security/commoncap.c +++ b/security/commoncap.c @@ -93,7 +93,7 @@ int cap_capable(struct task_struct *tsk, const struct cred *cred, int cap, * Determine whether the current process may set the system clock and timezone * information, returning 0 if permission granted, -ve if denied. */ -int cap_settime(struct timespec *ts, struct timezone *tz) +int cap_settime(const struct timespec *ts, const struct timezone *tz) { if (!capable(CAP_SYS_TIME)) return -EPERM; diff --git a/security/security.c b/security/security.c index 739e40362f44..b995428f1c96 100644 --- a/security/security.c +++ b/security/security.c @@ -202,7 +202,7 @@ int security_syslog(int type) return security_ops->syslog(type); } -int security_settime(struct timespec *ts, struct timezone *tz) +int security_settime(const struct timespec *ts, const struct timezone *tz) { return security_ops->settime(ts, tz); } -- cgit v1.2.3-71-gd317 From 65da528d7cc94966cf24d2a1e0837b689159b543 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 1 Feb 2011 13:51:01 +0000 Subject: posix-timers: Define nanosleep not supported error separate Define the conditional nanosleep not supported error value outside of do_posix_clock_nonanosleep(). Preparatory patch for further cleanups. Signed-off-by: Thomas Gleixner Acked-by: John Stultz Tested-by: Richard Cochran LKML-Reference: <20110201134417.643486574@linutronix.de> --- kernel/posix-timers.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) (limited to 'kernel') diff --git a/kernel/posix-timers.c b/kernel/posix-timers.c index 21b7ca205f38..89bff3766d7d 100644 --- a/kernel/posix-timers.c +++ b/kernel/posix-timers.c @@ -81,6 +81,14 @@ static DEFINE_SPINLOCK(idr_lock); #error "SIGEV_THREAD_ID must not share bit with other SIGEV values!" #endif +/* + * parisc wants ENOTSUP instead of EOPNOTSUPP + */ +#ifndef ENOTSUP +# define ENANOSLEEP_NOTSUP EOPNOTSUPP +#else +# define ENANOSLEEP_NOTSUP ENOTSUP +#endif /* * The timer ID is turned into a timer address by idr_find(). @@ -937,11 +945,7 @@ EXPORT_SYMBOL_GPL(do_posix_clock_nosettime); int do_posix_clock_nonanosleep(const clockid_t clock, int flags, struct timespec *t, struct timespec __user *r) { -#ifndef ENOTSUP - return -EOPNOTSUPP; /* aka ENOTSUP in userland for POSIX */ -#else /* parisc does define it separately. */ - return -ENOTSUP; -#endif + return -ENANOSLEEP_NOTSUP; } EXPORT_SYMBOL_GPL(do_posix_clock_nonanosleep); -- cgit v1.2.3-71-gd317 From 2fd1f04089cb657c5d6c484b280ec4d3398aa157 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 1 Feb 2011 13:51:03 +0000 Subject: posix-timers: Cleanup struct initializers Cosmetic. No functional change Signed-off-by: Thomas Gleixner Acked-by: John Stultz Tested-by: Richard Cochran LKML-Reference: <20110201134417.745627057@linutronix.de> --- drivers/char/mmtimer.c | 14 +++++++------- kernel/posix-cpu-timers.c | 24 ++++++++++++------------ kernel/posix-timers.c | 38 +++++++++++++++++++------------------- 3 files changed, 38 insertions(+), 38 deletions(-) (limited to 'kernel') diff --git a/drivers/char/mmtimer.c b/drivers/char/mmtimer.c index ecd0082502ef..fd51cd8ee063 100644 --- a/drivers/char/mmtimer.c +++ b/drivers/char/mmtimer.c @@ -765,13 +765,13 @@ static int sgi_timer_set(struct k_itimer *timr, int flags, static struct k_clock sgi_clock = { .res = 0, - .clock_set = sgi_clock_set, - .clock_get = sgi_clock_get, - .timer_create = sgi_timer_create, - .nsleep = do_posix_clock_nonanosleep, - .timer_set = sgi_timer_set, - .timer_del = sgi_timer_del, - .timer_get = sgi_timer_get + .clock_set = sgi_clock_set, + .clock_get = sgi_clock_get, + .timer_create = sgi_timer_create, + .nsleep = do_posix_clock_nonanosleep, + .timer_set = sgi_timer_set, + .timer_del = sgi_timer_del, + .timer_get = sgi_timer_get }; /** diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c index 05bb7173850e..11b91dc0992b 100644 --- a/kernel/posix-cpu-timers.c +++ b/kernel/posix-cpu-timers.c @@ -1607,20 +1607,20 @@ static long thread_cpu_nsleep_restart(struct restart_block *restart_block) static __init int init_posix_cpu_timers(void) { struct k_clock process = { - .clock_getres = process_cpu_clock_getres, - .clock_get = process_cpu_clock_get, - .clock_set = do_posix_clock_nosettime, - .timer_create = process_cpu_timer_create, - .nsleep = process_cpu_nsleep, - .nsleep_restart = process_cpu_nsleep_restart, + .clock_getres = process_cpu_clock_getres, + .clock_get = process_cpu_clock_get, + .clock_set = do_posix_clock_nosettime, + .timer_create = process_cpu_timer_create, + .nsleep = process_cpu_nsleep, + .nsleep_restart = process_cpu_nsleep_restart, }; struct k_clock thread = { - .clock_getres = thread_cpu_clock_getres, - .clock_get = thread_cpu_clock_get, - .clock_set = do_posix_clock_nosettime, - .timer_create = thread_cpu_timer_create, - .nsleep = thread_cpu_nsleep, - .nsleep_restart = thread_cpu_nsleep_restart, + .clock_getres = thread_cpu_clock_getres, + .clock_get = thread_cpu_clock_get, + .clock_set = do_posix_clock_nosettime, + .timer_create = thread_cpu_timer_create, + .nsleep = thread_cpu_nsleep, + .nsleep_restart = thread_cpu_nsleep_restart, }; struct timespec ts; diff --git a/kernel/posix-timers.c b/kernel/posix-timers.c index 89bff3766d7d..e7d26afd8ee5 100644 --- a/kernel/posix-timers.c +++ b/kernel/posix-timers.c @@ -281,33 +281,33 @@ static int posix_get_coarse_res(const clockid_t which_clock, struct timespec *tp static __init int init_posix_timers(void) { struct k_clock clock_realtime = { - .clock_getres = hrtimer_get_res, + .clock_getres = hrtimer_get_res, }; struct k_clock clock_monotonic = { - .clock_getres = hrtimer_get_res, - .clock_get = posix_ktime_get_ts, - .clock_set = do_posix_clock_nosettime, + .clock_getres = hrtimer_get_res, + .clock_get = posix_ktime_get_ts, + .clock_set = do_posix_clock_nosettime, }; struct k_clock clock_monotonic_raw = { - .clock_getres = hrtimer_get_res, - .clock_get = posix_get_monotonic_raw, - .clock_set = do_posix_clock_nosettime, - .timer_create = no_timer_create, - .nsleep = no_nsleep, + .clock_getres = hrtimer_get_res, + .clock_get = posix_get_monotonic_raw, + .clock_set = do_posix_clock_nosettime, + .timer_create = no_timer_create, + .nsleep = no_nsleep, }; struct k_clock clock_realtime_coarse = { - .clock_getres = posix_get_coarse_res, - .clock_get = posix_get_realtime_coarse, - .clock_set = do_posix_clock_nosettime, - .timer_create = no_timer_create, - .nsleep = no_nsleep, + .clock_getres = posix_get_coarse_res, + .clock_get = posix_get_realtime_coarse, + .clock_set = do_posix_clock_nosettime, + .timer_create = no_timer_create, + .nsleep = no_nsleep, }; struct k_clock clock_monotonic_coarse = { - .clock_getres = posix_get_coarse_res, - .clock_get = posix_get_monotonic_coarse, - .clock_set = do_posix_clock_nosettime, - .timer_create = no_timer_create, - .nsleep = no_nsleep, + .clock_getres = posix_get_coarse_res, + .clock_get = posix_get_monotonic_coarse, + .clock_set = do_posix_clock_nosettime, + .timer_create = no_timer_create, + .nsleep = no_nsleep, }; register_posix_clock(CLOCK_REALTIME, &clock_realtime); -- cgit v1.2.3-71-gd317 From 1976945eeaab5fa461735a6225a82c3cf1e65d62 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 1 Feb 2011 13:51:06 +0000 Subject: posix-timers: Introduce clock_posix_cpu The CLOCK_DISPATCH() macro is a horrible magic. We call common functions if a function pointer is not set. That's just backwards. To support dynamic file decriptor based clocks we need to cleanup that dispatch logic. Create a k_clock struct clock_posix_cpu which has all the posix-cpu-timer functions filled in. After the cleanup the functions can be made static. Signed-off-by: Thomas Gleixner Acked-by: John Stultz Tested-by: Richard Cochran LKML-Reference: <20110201134417.841974553@linutronix.de> --- include/linux/posix-timers.h | 2 ++ kernel/posix-cpu-timers.c | 12 ++++++++++++ 2 files changed, 14 insertions(+) (limited to 'kernel') diff --git a/include/linux/posix-timers.h b/include/linux/posix-timers.h index b2c14cbd47a6..1330ff331526 100644 --- a/include/linux/posix-timers.h +++ b/include/linux/posix-timers.h @@ -85,6 +85,8 @@ struct k_clock { struct itimerspec * cur_setting); }; +extern struct k_clock clock_posix_cpu; + void register_posix_clock(const clockid_t clock_id, struct k_clock *new_clock); /* error handlers for timer_create, nanosleep and settime */ diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c index 11b91dc0992b..816cd49a5ad9 100644 --- a/kernel/posix-cpu-timers.c +++ b/kernel/posix-cpu-timers.c @@ -1604,6 +1604,18 @@ static long thread_cpu_nsleep_restart(struct restart_block *restart_block) return -EINVAL; } +struct k_clock clock_posix_cpu = { + .clock_getres = posix_cpu_clock_getres, + .clock_set = posix_cpu_clock_set, + .clock_get = posix_cpu_clock_get, + .timer_create = posix_cpu_timer_create, + .nsleep = posix_cpu_nsleep, + .nsleep_restart = posix_cpu_nsleep_restart, + .timer_set = posix_cpu_timer_set, + .timer_del = posix_cpu_timer_del, + .timer_get = posix_cpu_timer_get, +}; + static __init int init_posix_cpu_timers(void) { struct k_clock process = { -- cgit v1.2.3-71-gd317 From cc785ac22b17ed53e8ff5c1501e422be6d10be3c Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 1 Feb 2011 13:51:09 +0000 Subject: posix-timers: Introduce clockid_to_kclock() New function to find the kclock for a given clockid. Returns a pointer to clock_posix_cpu if clockid < 0. If clockid >= MAXCLOCK or if the clock_getres pointer is not set it returns NULL. For valid clocks it returns a pointer to the matching posix_clock. Signed-off-by: Thomas Gleixner Cc: John Stultz Acked-by: Richard Cochran LKML-Reference: <20110201134417.938447839@linutronix.de> --- kernel/posix-timers.c | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'kernel') diff --git a/kernel/posix-timers.c b/kernel/posix-timers.c index e7d26afd8ee5..14b0a70ffb1e 100644 --- a/kernel/posix-timers.c +++ b/kernel/posix-timers.c @@ -531,6 +531,16 @@ static void release_posix_timer(struct k_itimer *tmr, int it_id_set) kmem_cache_free(posix_timers_cache, tmr); } +static struct k_clock *clockid_to_kclock(const clockid_t id) +{ + if (id < 0) + return &clock_posix_cpu; + + if (id >= MAX_CLOCKS || !posix_clocks[id].clock_getres) + return NULL; + return &posix_clocks[id]; +} + /* Create a POSIX.1b interval timer. */ SYSCALL_DEFINE3(timer_create, const clockid_t, which_clock, -- cgit v1.2.3-71-gd317 From a5cd2880106cb2c79b3fe24f1c53dadba6a542a0 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 1 Feb 2011 13:51:11 +0000 Subject: posix-timers: Convert clock_nanosleep to clockid_to_kclock() Use the new kclock decoding function in clock_nanosleep and cleanup all kclocks which use the default functions. Signed-off-by: Thomas Gleixner Acked-by: John Stultz Tested-by: Richard Cochran LKML-Reference: <20110201134418.034175556@linutronix.de> --- drivers/char/mmtimer.c | 1 - include/linux/posix-timers.h | 2 -- kernel/posix-timers.c | 26 +++++++------------------- 3 files changed, 7 insertions(+), 22 deletions(-) (limited to 'kernel') diff --git a/drivers/char/mmtimer.c b/drivers/char/mmtimer.c index fd51cd8ee063..262d10453cb8 100644 --- a/drivers/char/mmtimer.c +++ b/drivers/char/mmtimer.c @@ -768,7 +768,6 @@ static struct k_clock sgi_clock = { .clock_set = sgi_clock_set, .clock_get = sgi_clock_get, .timer_create = sgi_timer_create, - .nsleep = do_posix_clock_nonanosleep, .timer_set = sgi_timer_set, .timer_del = sgi_timer_del, .timer_get = sgi_timer_get diff --git a/include/linux/posix-timers.h b/include/linux/posix-timers.h index 1330ff331526..cd6da067bce1 100644 --- a/include/linux/posix-timers.h +++ b/include/linux/posix-timers.h @@ -90,8 +90,6 @@ extern struct k_clock clock_posix_cpu; void register_posix_clock(const clockid_t clock_id, struct k_clock *new_clock); /* error handlers for timer_create, nanosleep and settime */ -int do_posix_clock_nonanosleep(const clockid_t, int flags, struct timespec *, - struct timespec __user *); int do_posix_clock_nosettime(const clockid_t, const struct timespec *tp); /* function to call to trigger timer event */ diff --git a/kernel/posix-timers.c b/kernel/posix-timers.c index 14b0a70ffb1e..ee69b216d5c3 100644 --- a/kernel/posix-timers.c +++ b/kernel/posix-timers.c @@ -216,12 +216,6 @@ static int no_timer_create(struct k_itimer *new_timer) return -EOPNOTSUPP; } -static int no_nsleep(const clockid_t which_clock, int flags, - struct timespec *tsave, struct timespec __user *rmtp) -{ - return -EOPNOTSUPP; -} - /* * Return nonzero if we know a priori this clockid_t value is bogus. */ @@ -282,32 +276,31 @@ static __init int init_posix_timers(void) { struct k_clock clock_realtime = { .clock_getres = hrtimer_get_res, + .nsleep = common_nsleep, }; struct k_clock clock_monotonic = { .clock_getres = hrtimer_get_res, .clock_get = posix_ktime_get_ts, .clock_set = do_posix_clock_nosettime, + .nsleep = common_nsleep, }; struct k_clock clock_monotonic_raw = { .clock_getres = hrtimer_get_res, .clock_get = posix_get_monotonic_raw, .clock_set = do_posix_clock_nosettime, .timer_create = no_timer_create, - .nsleep = no_nsleep, }; struct k_clock clock_realtime_coarse = { .clock_getres = posix_get_coarse_res, .clock_get = posix_get_realtime_coarse, .clock_set = do_posix_clock_nosettime, .timer_create = no_timer_create, - .nsleep = no_nsleep, }; struct k_clock clock_monotonic_coarse = { .clock_getres = posix_get_coarse_res, .clock_get = posix_get_monotonic_coarse, .clock_set = do_posix_clock_nosettime, .timer_create = no_timer_create, - .nsleep = no_nsleep, }; register_posix_clock(CLOCK_REALTIME, &clock_realtime); @@ -952,13 +945,6 @@ int do_posix_clock_nosettime(const clockid_t clockid, const struct timespec *tp) } EXPORT_SYMBOL_GPL(do_posix_clock_nosettime); -int do_posix_clock_nonanosleep(const clockid_t clock, int flags, - struct timespec *t, struct timespec __user *r) -{ - return -ENANOSLEEP_NOTSUP; -} -EXPORT_SYMBOL_GPL(do_posix_clock_nonanosleep); - SYSCALL_DEFINE2(clock_settime, const clockid_t, which_clock, const struct timespec __user *, tp) { @@ -1023,10 +1009,13 @@ SYSCALL_DEFINE4(clock_nanosleep, const clockid_t, which_clock, int, flags, const struct timespec __user *, rqtp, struct timespec __user *, rmtp) { + struct k_clock *kc = clockid_to_kclock(which_clock); struct timespec t; - if (invalid_clockid(which_clock)) + if (!kc) return -EINVAL; + if (!kc->nsleep) + return -ENANOSLEEP_NOTSUP; if (copy_from_user(&t, rqtp, sizeof (struct timespec))) return -EFAULT; @@ -1034,8 +1023,7 @@ SYSCALL_DEFINE4(clock_nanosleep, const clockid_t, which_clock, int, flags, if (!timespec_valid(&t)) return -EINVAL; - return CLOCK_DISPATCH(which_clock, nsleep, - (which_clock, flags, &t, rmtp)); + return kc->nsleep(which_clock, flags, &t, rmtp); } /* -- cgit v1.2.3-71-gd317 From 59bd5bc24aa69f6c62da1e242c16f09f667def96 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 1 Feb 2011 13:51:17 +0000 Subject: posix-timers: Convert clock_nanosleep_restart to clockid_to_kclock() Use the new kclock decoding function in clock_nanosleep_restart. Signed-off-by: Thomas Gleixner Acked-by: John Stultz Tested-by: Richard Cochran LKML-Reference: <20110201134418.131263211@linutronix.de> --- kernel/posix-timers.c | 20 ++++++++------------ 1 file changed, 8 insertions(+), 12 deletions(-) (limited to 'kernel') diff --git a/kernel/posix-timers.c b/kernel/posix-timers.c index ee69b216d5c3..4dd86d15bbd0 100644 --- a/kernel/posix-timers.c +++ b/kernel/posix-timers.c @@ -277,12 +277,14 @@ static __init int init_posix_timers(void) struct k_clock clock_realtime = { .clock_getres = hrtimer_get_res, .nsleep = common_nsleep, + .nsleep_restart = hrtimer_nanosleep_restart, }; struct k_clock clock_monotonic = { .clock_getres = hrtimer_get_res, .clock_get = posix_ktime_get_ts, .clock_set = do_posix_clock_nosettime, .nsleep = common_nsleep, + .nsleep_restart = hrtimer_nanosleep_restart, }; struct k_clock clock_monotonic_raw = { .clock_getres = hrtimer_get_res, @@ -1026,23 +1028,17 @@ SYSCALL_DEFINE4(clock_nanosleep, const clockid_t, which_clock, int, flags, return kc->nsleep(which_clock, flags, &t, rmtp); } -/* - * nanosleep_restart for monotonic and realtime clocks - */ -static int common_nsleep_restart(struct restart_block *restart_block) -{ - return hrtimer_nanosleep_restart(restart_block); -} - /* * This will restart clock_nanosleep. This is required only by * compat_clock_nanosleep_restart for now. */ -long -clock_nanosleep_restart(struct restart_block *restart_block) +long clock_nanosleep_restart(struct restart_block *restart_block) { clockid_t which_clock = restart_block->arg0; + struct k_clock *kc = clockid_to_kclock(which_clock); + + if (WARN_ON_ONCE(!kc || !kc->nsleep_restart)) + return -EINVAL; - return CLOCK_DISPATCH(which_clock, nsleep_restart, - (restart_block)); + return kc->nsleep_restart(restart_block); } -- cgit v1.2.3-71-gd317 From 3751f9f29bcbc19bd10e92254a273486f150c245 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 1 Feb 2011 13:51:20 +0000 Subject: posix-timers: Cleanup restart_block usage posix timers still use the legacy arg0-arg3 members of restart_block. Use restart_block.nanosleep instead Signed-off-by: Thomas Gleixner Acked-by: John Stultz Tested-by: Richard Cochran LKML-Reference: <20110201134418.232288779@linutronix.de> --- kernel/posix-cpu-timers.c | 38 +++++++++++++++----------------------- kernel/posix-timers.c | 2 +- 2 files changed, 16 insertions(+), 24 deletions(-) (limited to 'kernel') diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c index 816cd49a5ad9..9e617b00afa9 100644 --- a/kernel/posix-cpu-timers.c +++ b/kernel/posix-cpu-timers.c @@ -1485,7 +1485,7 @@ int posix_cpu_nsleep(const clockid_t which_clock, int flags, struct timespec *rqtp, struct timespec __user *rmtp) { struct restart_block *restart_block = - ¤t_thread_info()->restart_block; + ¤t_thread_info()->restart_block; struct itimerspec it; int error; @@ -1501,50 +1501,42 @@ int posix_cpu_nsleep(const clockid_t which_clock, int flags, if (error == -ERESTART_RESTARTBLOCK) { - if (flags & TIMER_ABSTIME) + if (flags & TIMER_ABSTIME) return -ERESTARTNOHAND; /* - * Report back to the user the time still remaining. - */ - if (rmtp != NULL && copy_to_user(rmtp, &it.it_value, sizeof *rmtp)) + * Report back to the user the time still remaining. + */ + if (rmtp && copy_to_user(rmtp, &it.it_value, sizeof *rmtp)) return -EFAULT; restart_block->fn = posix_cpu_nsleep_restart; - restart_block->arg0 = which_clock; - restart_block->arg1 = (unsigned long) rmtp; - restart_block->arg2 = rqtp->tv_sec; - restart_block->arg3 = rqtp->tv_nsec; + restart_block->nanosleep.index = which_clock; + restart_block->nanosleep.rmtp = rmtp; + restart_block->nanosleep.expires = timespec_to_ns(rqtp); } return error; } long posix_cpu_nsleep_restart(struct restart_block *restart_block) { - clockid_t which_clock = restart_block->arg0; - struct timespec __user *rmtp; + clockid_t which_clock = restart_block->nanosleep.index; struct timespec t; struct itimerspec it; int error; - rmtp = (struct timespec __user *) restart_block->arg1; - t.tv_sec = restart_block->arg2; - t.tv_nsec = restart_block->arg3; + t = ns_to_timespec(restart_block->nanosleep.expires); - restart_block->fn = do_no_restart_syscall; error = do_cpu_nanosleep(which_clock, TIMER_ABSTIME, &t, &it); if (error == -ERESTART_RESTARTBLOCK) { + struct timespec __user *rmtp = restart_block->nanosleep.rmtp; /* - * Report back to the user the time still remaining. - */ - if (rmtp != NULL && copy_to_user(rmtp, &it.it_value, sizeof *rmtp)) + * Report back to the user the time still remaining. + */ + if (rmtp && copy_to_user(rmtp, &it.it_value, sizeof *rmtp)) return -EFAULT; - restart_block->fn = posix_cpu_nsleep_restart; - restart_block->arg0 = which_clock; - restart_block->arg1 = (unsigned long) rmtp; - restart_block->arg2 = t.tv_sec; - restart_block->arg3 = t.tv_nsec; + restart_block->nanosleep.expires = timespec_to_ns(&t); } return error; diff --git a/kernel/posix-timers.c b/kernel/posix-timers.c index 4dd86d15bbd0..4762986814c8 100644 --- a/kernel/posix-timers.c +++ b/kernel/posix-timers.c @@ -1034,7 +1034,7 @@ SYSCALL_DEFINE4(clock_nanosleep, const clockid_t, which_clock, int, flags, */ long clock_nanosleep_restart(struct restart_block *restart_block) { - clockid_t which_clock = restart_block->arg0; + clockid_t which_clock = restart_block->nanosleep.index; struct k_clock *kc = clockid_to_kclock(which_clock); if (WARN_ON_ONCE(!kc || !kc->nsleep_restart)) -- cgit v1.2.3-71-gd317 From 79c9da0d0539fb341a1b48a2a5a23d974726de90 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 1 Feb 2011 13:51:45 +0000 Subject: posix-cpu-timers: Remove the stub nanosleep functions CLOCK_THREAD_CPUTIME_ID implements stub functions for nanosleep and nanosleep_restart, which return -EINVAL. That return value is wrong. The correct return value is -ENOTSUP. Remove the stubs and let the new dispatch code return the correct error code. Signed-off-by: Thomas Gleixner Acked-by: John Stultz Tested-by: Richard Cochran LKML-Reference: <20110201134418.422446502@linutronix.de> --- kernel/posix-cpu-timers.c | 11 ----------- 1 file changed, 11 deletions(-) (limited to 'kernel') diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c index 9e617b00afa9..8dc4cd7faf89 100644 --- a/kernel/posix-cpu-timers.c +++ b/kernel/posix-cpu-timers.c @@ -1586,15 +1586,6 @@ static int thread_cpu_timer_create(struct k_itimer *timer) timer->it_clock = THREAD_CLOCK; return posix_cpu_timer_create(timer); } -static int thread_cpu_nsleep(const clockid_t which_clock, int flags, - struct timespec *rqtp, struct timespec __user *rmtp) -{ - return -EINVAL; -} -static long thread_cpu_nsleep_restart(struct restart_block *restart_block) -{ - return -EINVAL; -} struct k_clock clock_posix_cpu = { .clock_getres = posix_cpu_clock_getres, @@ -1623,8 +1614,6 @@ static __init int init_posix_cpu_timers(void) .clock_get = thread_cpu_clock_get, .clock_set = do_posix_clock_nosettime, .timer_create = thread_cpu_timer_create, - .nsleep = thread_cpu_nsleep, - .nsleep_restart = thread_cpu_nsleep_restart, }; struct timespec ts; -- cgit v1.2.3-71-gd317 From 26f9a4796af330173d790c8d2b5e2efcc489e755 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 1 Feb 2011 13:51:48 +0000 Subject: posix-timers: Convert clock_settime to clockid_to_kclock() Use the new kclock decoding function in clock_settime and cleanup all kclocks which use the default functions. Rename the misnomed common_clock_set() to posix_clock_realtime_set(). Signed-off-by: Thomas Gleixner Acked-by: John Stultz Tested-by: Richard Cochran LKML-Reference: <20110201134418.518851246@linutronix.de> --- include/linux/posix-timers.h | 3 --- kernel/posix-cpu-timers.c | 2 -- kernel/posix-timers.c | 31 ++++++++++++------------------- 3 files changed, 12 insertions(+), 24 deletions(-) (limited to 'kernel') diff --git a/include/linux/posix-timers.h b/include/linux/posix-timers.h index cd6da067bce1..4aaf0c5c7cea 100644 --- a/include/linux/posix-timers.h +++ b/include/linux/posix-timers.h @@ -89,9 +89,6 @@ extern struct k_clock clock_posix_cpu; void register_posix_clock(const clockid_t clock_id, struct k_clock *new_clock); -/* error handlers for timer_create, nanosleep and settime */ -int do_posix_clock_nosettime(const clockid_t, const struct timespec *tp); - /* function to call to trigger timer event */ int posix_timer_event(struct k_itimer *timr, int si_private); diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c index 8dc4cd7faf89..504fbab10b82 100644 --- a/kernel/posix-cpu-timers.c +++ b/kernel/posix-cpu-timers.c @@ -1604,7 +1604,6 @@ static __init int init_posix_cpu_timers(void) struct k_clock process = { .clock_getres = process_cpu_clock_getres, .clock_get = process_cpu_clock_get, - .clock_set = do_posix_clock_nosettime, .timer_create = process_cpu_timer_create, .nsleep = process_cpu_nsleep, .nsleep_restart = process_cpu_nsleep_restart, @@ -1612,7 +1611,6 @@ static __init int init_posix_cpu_timers(void) struct k_clock thread = { .clock_getres = thread_cpu_clock_getres, .clock_get = thread_cpu_clock_get, - .clock_set = do_posix_clock_nosettime, .timer_create = thread_cpu_timer_create, }; struct timespec ts; diff --git a/kernel/posix-timers.c b/kernel/posix-timers.c index 4762986814c8..49f358c37708 100644 --- a/kernel/posix-timers.c +++ b/kernel/posix-timers.c @@ -199,12 +199,6 @@ static int common_clock_get(clockid_t which_clock, struct timespec *tp) return 0; } -static inline int common_clock_set(const clockid_t which_clock, - const struct timespec *tp) -{ - return do_sys_settimeofday(tp, NULL); -} - static int common_timer_create(struct k_itimer *new_timer) { hrtimer_init(&new_timer->it.real.timer, new_timer->it_clock, 0); @@ -232,6 +226,13 @@ static inline int invalid_clockid(const clockid_t which_clock) return 1; } +/* Set clock_realtime */ +static int posix_clock_realtime_set(const clockid_t which_clock, + const struct timespec *tp) +{ + return do_sys_settimeofday(tp, NULL); +} + /* * Get monotonic time for posix timers */ @@ -276,32 +277,29 @@ static __init int init_posix_timers(void) { struct k_clock clock_realtime = { .clock_getres = hrtimer_get_res, + .clock_set = posix_clock_realtime_set, .nsleep = common_nsleep, .nsleep_restart = hrtimer_nanosleep_restart, }; struct k_clock clock_monotonic = { .clock_getres = hrtimer_get_res, .clock_get = posix_ktime_get_ts, - .clock_set = do_posix_clock_nosettime, .nsleep = common_nsleep, .nsleep_restart = hrtimer_nanosleep_restart, }; struct k_clock clock_monotonic_raw = { .clock_getres = hrtimer_get_res, .clock_get = posix_get_monotonic_raw, - .clock_set = do_posix_clock_nosettime, .timer_create = no_timer_create, }; struct k_clock clock_realtime_coarse = { .clock_getres = posix_get_coarse_res, .clock_get = posix_get_realtime_coarse, - .clock_set = do_posix_clock_nosettime, .timer_create = no_timer_create, }; struct k_clock clock_monotonic_coarse = { .clock_getres = posix_get_coarse_res, .clock_get = posix_get_monotonic_coarse, - .clock_set = do_posix_clock_nosettime, .timer_create = no_timer_create, }; @@ -940,24 +938,19 @@ void exit_itimers(struct signal_struct *sig) } } -/* Not available / possible... functions */ -int do_posix_clock_nosettime(const clockid_t clockid, const struct timespec *tp) -{ - return -EINVAL; -} -EXPORT_SYMBOL_GPL(do_posix_clock_nosettime); - SYSCALL_DEFINE2(clock_settime, const clockid_t, which_clock, const struct timespec __user *, tp) { + struct k_clock *kc = clockid_to_kclock(which_clock); struct timespec new_tp; - if (invalid_clockid(which_clock)) + if (!kc || !kc->clock_set) return -EINVAL; + if (copy_from_user(&new_tp, tp, sizeof (*tp))) return -EFAULT; - return CLOCK_DISPATCH(which_clock, clock_set, (which_clock, &new_tp)); + return kc->clock_set(which_clock, &new_tp); } SYSCALL_DEFINE2(clock_gettime, const clockid_t, which_clock, -- cgit v1.2.3-71-gd317 From 42285777631aa0654fbb6442057b3e176445c6c5 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 1 Feb 2011 13:51:50 +0000 Subject: posix-timers: Convert clock_gettime() to clockid_to_kclock() Use the new kclock decoding mechanism and rename the misnomed common_clock_get() to posix_clock_realtime_get(). Signed-off-by: Thomas Gleixner Acked-by: John Stultz Tested-by: Richard Cochran LKML-Reference: <20110201134418.611097203@linutronix.de> --- kernel/posix-timers.c | 28 +++++++++++++++------------- 1 file changed, 15 insertions(+), 13 deletions(-) (limited to 'kernel') diff --git a/kernel/posix-timers.c b/kernel/posix-timers.c index 49f358c37708..d9e5edfe8a1b 100644 --- a/kernel/posix-timers.c +++ b/kernel/posix-timers.c @@ -190,15 +190,6 @@ static inline int common_clock_getres(const clockid_t which_clock, return 0; } -/* - * Get real time for posix timers - */ -static int common_clock_get(clockid_t which_clock, struct timespec *tp) -{ - ktime_get_real_ts(tp); - return 0; -} - static int common_timer_create(struct k_itimer *new_timer) { hrtimer_init(&new_timer->it.real.timer, new_timer->it_clock, 0); @@ -226,6 +217,13 @@ static inline int invalid_clockid(const clockid_t which_clock) return 1; } +/* Get clock_realtime */ +static int posix_clock_realtime_get(clockid_t which_clock, struct timespec *tp) +{ + ktime_get_real_ts(tp); + return 0; +} + /* Set clock_realtime */ static int posix_clock_realtime_set(const clockid_t which_clock, const struct timespec *tp) @@ -277,6 +275,7 @@ static __init int init_posix_timers(void) { struct k_clock clock_realtime = { .clock_getres = hrtimer_get_res, + .clock_get = posix_clock_realtime_get, .clock_set = posix_clock_realtime_set, .nsleep = common_nsleep, .nsleep_restart = hrtimer_nanosleep_restart, @@ -956,18 +955,21 @@ SYSCALL_DEFINE2(clock_settime, const clockid_t, which_clock, SYSCALL_DEFINE2(clock_gettime, const clockid_t, which_clock, struct timespec __user *,tp) { + struct k_clock *kc = clockid_to_kclock(which_clock); struct timespec kernel_tp; int error; - if (invalid_clockid(which_clock)) + if (!kc) return -EINVAL; - error = CLOCK_DISPATCH(which_clock, clock_get, - (which_clock, &kernel_tp)); + if (!kc->clock_get) + return -EOPNOTSUPP; + + error = kc->clock_get(which_clock, &kernel_tp); + if (!error && copy_to_user(tp, &kernel_tp, sizeof (kernel_tp))) error = -EFAULT; return error; - } SYSCALL_DEFINE2(clock_getres, const clockid_t, which_clock, -- cgit v1.2.3-71-gd317 From 4359ac0ace1a2a267927390ad27f781a2f8e0ab8 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 2 Feb 2011 11:45:23 +0100 Subject: posix-timers: Make clock_getres and clock_get mandatory Richard said: "I would think that we can require k_clocks to provide the read function. This could be checked and enforced in register_posix_clock()." Add checks for clock_getres and clock_get in the register function. Suggested-by: Richard Cochran Cc: John Stultz Signed-off-by: Thomas Gleixner --- kernel/posix-timers.c | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) (limited to 'kernel') diff --git a/kernel/posix-timers.c b/kernel/posix-timers.c index d9e5edfe8a1b..7f66143d1ce5 100644 --- a/kernel/posix-timers.c +++ b/kernel/posix-timers.c @@ -485,7 +485,18 @@ static struct pid *good_sigevent(sigevent_t * event) void register_posix_clock(const clockid_t clock_id, struct k_clock *new_clock) { if ((unsigned) clock_id >= MAX_CLOCKS) { - printk("POSIX clock register failed for clock_id %d\n", + printk(KERN_WARNING "POSIX clock register failed for clock_id %d\n", + clock_id); + return; + } + + if (!new_clock->clock_get) { + printk(KERN_WARNING "POSIX clock id %d lacks clock_get()\n", + clock_id); + return; + } + if (!new_clock->clock_getres) { + printk(KERN_WARNING "POSIX clock id %d lacks clock_getres()\n", clock_id); return; } @@ -961,8 +972,6 @@ SYSCALL_DEFINE2(clock_gettime, const clockid_t, which_clock, if (!kc) return -EINVAL; - if (!kc->clock_get) - return -EOPNOTSUPP; error = kc->clock_get(which_clock, &kernel_tp); -- cgit v1.2.3-71-gd317 From e5e542eea9075dd008993c2ee80b2cc9f31fc494 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 1 Feb 2011 13:51:53 +0000 Subject: posix-timers: Convert clock_getres() to clockid_to_kclock() Use the new kclock decoding. Fixup the fallout in mmtimer.c Signed-off-by: Thomas Gleixner Acked-by: John Stultz Tested-by: Richard Cochran LKML-Reference: <20110201134418.709802797@linutronix.de> --- drivers/char/mmtimer.c | 10 ++++++++++ kernel/posix-timers.c | 17 ++++------------- 2 files changed, 14 insertions(+), 13 deletions(-) (limited to 'kernel') diff --git a/drivers/char/mmtimer.c b/drivers/char/mmtimer.c index 262d10453cb8..141ffaeb976c 100644 --- a/drivers/char/mmtimer.c +++ b/drivers/char/mmtimer.c @@ -53,6 +53,8 @@ MODULE_LICENSE("GPL"); #define RTC_BITS 55 /* 55 bits for this implementation */ +static struct k_clock sgi_clock; + extern unsigned long sn_rtc_cycles_per_second; #define RTC_COUNTER_ADDR ((long *)LOCAL_MMR_ADDR(SH_RTC)) @@ -763,10 +765,18 @@ static int sgi_timer_set(struct k_itimer *timr, int flags, return err; } +static int sgi_clock_getres(const clockid_t which_clock, struct timespec *tp) +{ + tp->tv_sec = 0; + tp->tv_nsec = sgi_clock.res; + return 0; +} + static struct k_clock sgi_clock = { .res = 0, .clock_set = sgi_clock_set, .clock_get = sgi_clock_get, + .clock_getres = sgi_clock_getres, .timer_create = sgi_timer_create, .timer_set = sgi_timer_set, .timer_del = sgi_timer_del, diff --git a/kernel/posix-timers.c b/kernel/posix-timers.c index 7f66143d1ce5..748497fffd0f 100644 --- a/kernel/posix-timers.c +++ b/kernel/posix-timers.c @@ -182,14 +182,6 @@ static inline void unlock_timer(struct k_itimer *timr, unsigned long flags) * the function pointer CALL in struct k_clock. */ -static inline int common_clock_getres(const clockid_t which_clock, - struct timespec *tp) -{ - tp->tv_sec = 0; - tp->tv_nsec = posix_clocks[which_clock].res; - return 0; -} - static int common_timer_create(struct k_itimer *new_timer) { hrtimer_init(&new_timer->it.real.timer, new_timer->it_clock, 0); @@ -984,18 +976,17 @@ SYSCALL_DEFINE2(clock_gettime, const clockid_t, which_clock, SYSCALL_DEFINE2(clock_getres, const clockid_t, which_clock, struct timespec __user *, tp) { + struct k_clock *kc = clockid_to_kclock(which_clock); struct timespec rtn_tp; int error; - if (invalid_clockid(which_clock)) + if (!kc) return -EINVAL; - error = CLOCK_DISPATCH(which_clock, clock_getres, - (which_clock, &rtn_tp)); + error = kc->clock_getres(which_clock, &rtn_tp); - if (!error && tp && copy_to_user(tp, &rtn_tp, sizeof (rtn_tp))) { + if (!error && tp && copy_to_user(tp, &rtn_tp, sizeof (rtn_tp))) error = -EFAULT; - } return error; } -- cgit v1.2.3-71-gd317 From ebaac757acae0431e2c79c00e09f1debdabbddd7 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 1 Feb 2011 13:51:56 +0000 Subject: posix-timers: Remove useless res field from k_clock The res member of kclock is only used by mmtimer.c, but even there it contains redundant information. Remove the field and fixup mmtimer. Signed-off-by: Thomas Gleixner Acked-by: John Stultz Tested-by: Richard Cochran LKML-Reference: <20110201134418.808714587@linutronix.de> --- drivers/char/mmtimer.c | 5 ++--- include/linux/posix-timers.h | 1 - kernel/posix-timers.c | 2 -- 3 files changed, 2 insertions(+), 6 deletions(-) (limited to 'kernel') diff --git a/drivers/char/mmtimer.c b/drivers/char/mmtimer.c index 141ffaeb976c..ff41eb3eec92 100644 --- a/drivers/char/mmtimer.c +++ b/drivers/char/mmtimer.c @@ -768,12 +768,11 @@ static int sgi_timer_set(struct k_itimer *timr, int flags, static int sgi_clock_getres(const clockid_t which_clock, struct timespec *tp) { tp->tv_sec = 0; - tp->tv_nsec = sgi_clock.res; + tp->tv_nsec = sgi_clock_period; return 0; } static struct k_clock sgi_clock = { - .res = 0, .clock_set = sgi_clock_set, .clock_get = sgi_clock_get, .clock_getres = sgi_clock_getres, @@ -840,7 +839,7 @@ static int __init mmtimer_init(void) (unsigned long) node); } - sgi_clock_period = sgi_clock.res = NSEC_PER_SEC / sn_rtc_cycles_per_second; + sgi_clock_period = NSEC_PER_SEC / sn_rtc_cycles_per_second; register_posix_clock(CLOCK_SGI_CYCLE, &sgi_clock); printk(KERN_INFO "%s: v%s, %ld MHz\n", MMTIMER_DESC, MMTIMER_VERSION, diff --git a/include/linux/posix-timers.h b/include/linux/posix-timers.h index 4aaf0c5c7cea..ef574d177fb6 100644 --- a/include/linux/posix-timers.h +++ b/include/linux/posix-timers.h @@ -67,7 +67,6 @@ struct k_itimer { }; struct k_clock { - int res; /* in nanoseconds */ int (*clock_getres) (const clockid_t which_clock, struct timespec *tp); int (*clock_set) (const clockid_t which_clock, const struct timespec *tp); diff --git a/kernel/posix-timers.c b/kernel/posix-timers.c index 748497fffd0f..f9142a99b5cb 100644 --- a/kernel/posix-timers.c +++ b/kernel/posix-timers.c @@ -204,8 +204,6 @@ static inline int invalid_clockid(const clockid_t which_clock) return 1; if (posix_clocks[which_clock].clock_getres != NULL) return 0; - if (posix_clocks[which_clock].res != 0) - return 0; return 1; } -- cgit v1.2.3-71-gd317 From 838394fbf989973ec7f5a0ad82cb6ff09e5c39aa Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 1 Feb 2011 13:51:58 +0000 Subject: posix-timers: Convert timer_create() to clockid_to_kclock() Setup timer_create for CLOCK_MONOTONIC and CLOCK_REALTIME kclocks and remove the no_timer_create() implementation. Signed-off-by: Thomas Gleixner Acked-by: John Stultz Tested-by: Richard Cochran LKML-Reference: <20110201134418.903604289@linutronix.de> --- kernel/posix-timers.c | 40 +++++++++++++++------------------------- 1 file changed, 15 insertions(+), 25 deletions(-) (limited to 'kernel') diff --git a/kernel/posix-timers.c b/kernel/posix-timers.c index f9142a99b5cb..4f71382a4ca8 100644 --- a/kernel/posix-timers.c +++ b/kernel/posix-timers.c @@ -146,6 +146,7 @@ static struct k_clock posix_clocks[MAX_CLOCKS]; */ static int common_nsleep(const clockid_t, int flags, struct timespec *t, struct timespec __user *rmtp); +static int common_timer_create(struct k_itimer *new_timer); static void common_timer_get(struct k_itimer *, struct itimerspec *); static int common_timer_set(struct k_itimer *, int, struct itimerspec *, struct itimerspec *); @@ -174,25 +175,6 @@ static inline void unlock_timer(struct k_itimer *timr, unsigned long flags) (posix_clocks[clock].call != NULL \ ? (*posix_clocks[clock].call) arglist : common_##call arglist)) -/* - * Default clock hook functions when the struct k_clock passed - * to register_posix_clock leaves a function pointer null. - * - * The function common_CALL is the default implementation for - * the function pointer CALL in struct k_clock. - */ - -static int common_timer_create(struct k_itimer *new_timer) -{ - hrtimer_init(&new_timer->it.real.timer, new_timer->it_clock, 0); - return 0; -} - -static int no_timer_create(struct k_itimer *new_timer) -{ - return -EOPNOTSUPP; -} - /* * Return nonzero if we know a priori this clockid_t value is bogus. */ @@ -269,27 +251,26 @@ static __init int init_posix_timers(void) .clock_set = posix_clock_realtime_set, .nsleep = common_nsleep, .nsleep_restart = hrtimer_nanosleep_restart, + .timer_create = common_timer_create, }; struct k_clock clock_monotonic = { .clock_getres = hrtimer_get_res, .clock_get = posix_ktime_get_ts, .nsleep = common_nsleep, .nsleep_restart = hrtimer_nanosleep_restart, + .timer_create = common_timer_create, }; struct k_clock clock_monotonic_raw = { .clock_getres = hrtimer_get_res, .clock_get = posix_get_monotonic_raw, - .timer_create = no_timer_create, }; struct k_clock clock_realtime_coarse = { .clock_getres = posix_get_coarse_res, .clock_get = posix_get_realtime_coarse, - .timer_create = no_timer_create, }; struct k_clock clock_monotonic_coarse = { .clock_getres = posix_get_coarse_res, .clock_get = posix_get_monotonic_coarse, - .timer_create = no_timer_create, }; register_posix_clock(CLOCK_REALTIME, &clock_realtime); @@ -534,19 +515,28 @@ static struct k_clock *clockid_to_kclock(const clockid_t id) return &posix_clocks[id]; } +static int common_timer_create(struct k_itimer *new_timer) +{ + hrtimer_init(&new_timer->it.real.timer, new_timer->it_clock, 0); + return 0; +} + /* Create a POSIX.1b interval timer. */ SYSCALL_DEFINE3(timer_create, const clockid_t, which_clock, struct sigevent __user *, timer_event_spec, timer_t __user *, created_timer_id) { + struct k_clock *kc = clockid_to_kclock(which_clock); struct k_itimer *new_timer; int error, new_timer_id; sigevent_t event; int it_id_set = IT_ID_NOT_SET; - if (invalid_clockid(which_clock)) + if (!kc) return -EINVAL; + if (!kc->timer_create) + return -EOPNOTSUPP; new_timer = alloc_posix_timer(); if (unlikely(!new_timer)) @@ -608,7 +598,7 @@ SYSCALL_DEFINE3(timer_create, const clockid_t, which_clock, goto out; } - error = CLOCK_DISPATCH(which_clock, timer_create, (new_timer)); + error = kc->timer_create(new_timer); if (error) goto out; @@ -618,7 +608,7 @@ SYSCALL_DEFINE3(timer_create, const clockid_t, which_clock, spin_unlock_irq(¤t->sighand->siglock); return 0; - /* + /* * In the case of the timer belonging to another task, after * the task is unlocked, the timer is owned by the other task * and may cease to exist at any time. Don't use or modify -- cgit v1.2.3-71-gd317 From 27722df16ef143017db55ac7baac1703a68017ff Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 1 Feb 2011 13:52:01 +0000 Subject: posix-timers: Convert timer_settime() to clockid_to_kclock() Set the common function for CLOCK_MONOTONIC and CLOCK_REALTIME kclocks and use the new decoding function. Signed-off-by: Thomas Gleixner Acked-by: John Stultz Tested-by: Richard Cochran LKML-Reference: <20110201134419.001863714@linutronix.de> --- kernel/posix-timers.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'kernel') diff --git a/kernel/posix-timers.c b/kernel/posix-timers.c index 4f71382a4ca8..a4dbfe71c5a5 100644 --- a/kernel/posix-timers.c +++ b/kernel/posix-timers.c @@ -252,6 +252,7 @@ static __init int init_posix_timers(void) .nsleep = common_nsleep, .nsleep_restart = hrtimer_nanosleep_restart, .timer_create = common_timer_create, + .timer_set = common_timer_set, }; struct k_clock clock_monotonic = { .clock_getres = hrtimer_get_res, @@ -259,6 +260,7 @@ static __init int init_posix_timers(void) .nsleep = common_nsleep, .nsleep_restart = hrtimer_nanosleep_restart, .timer_create = common_timer_create, + .timer_set = common_timer_set, }; struct k_clock clock_monotonic_raw = { .clock_getres = hrtimer_get_res, @@ -814,6 +816,7 @@ SYSCALL_DEFINE4(timer_settime, timer_t, timer_id, int, flags, int error = 0; unsigned long flag; struct itimerspec *rtn = old_setting ? &old_spec : NULL; + struct k_clock *kc; if (!new_setting) return -EINVAL; @@ -829,8 +832,11 @@ retry: if (!timr) return -EINVAL; - error = CLOCK_DISPATCH(timr->it_clock, timer_set, - (timr, flags, &new_spec, rtn)); + kc = clockid_to_kclock(timr->it_clock); + if (WARN_ON_ONCE(!kc || !kc->timer_set)) + error = -EINVAL; + else + error = kc->timer_set(timr, flags, &new_spec, rtn); unlock_timer(timr, flag); if (error == TIMER_RETRY) { -- cgit v1.2.3-71-gd317 From a7319fa253a549c4c6528fb550ae6e72a9c83811 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 1 Feb 2011 13:52:04 +0000 Subject: posix-timers: Convert timer_gettime() to clockid_to_kclock() Set the common function for CLOCK_MONOTONIC and CLOCK_REALTIME kclocks and use the new decoding function. Signed-off-by: Thomas Gleixner Acked-by: John Stultz Tested-by: Richard Cochran LKML-Reference: <20110201134419.101243181@linutronix.de> --- kernel/posix-timers.c | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) (limited to 'kernel') diff --git a/kernel/posix-timers.c b/kernel/posix-timers.c index a4dbfe71c5a5..c1e2636f9e45 100644 --- a/kernel/posix-timers.c +++ b/kernel/posix-timers.c @@ -253,6 +253,7 @@ static __init int init_posix_timers(void) .nsleep_restart = hrtimer_nanosleep_restart, .timer_create = common_timer_create, .timer_set = common_timer_set, + .timer_get = common_timer_get, }; struct k_clock clock_monotonic = { .clock_getres = hrtimer_get_res, @@ -261,6 +262,7 @@ static __init int init_posix_timers(void) .nsleep_restart = hrtimer_nanosleep_restart, .timer_create = common_timer_create, .timer_set = common_timer_set, + .timer_get = common_timer_get, }; struct k_clock clock_monotonic_raw = { .clock_getres = hrtimer_get_res, @@ -712,22 +714,28 @@ common_timer_get(struct k_itimer *timr, struct itimerspec *cur_setting) SYSCALL_DEFINE2(timer_gettime, timer_t, timer_id, struct itimerspec __user *, setting) { - struct k_itimer *timr; struct itimerspec cur_setting; + struct k_itimer *timr; + struct k_clock *kc; unsigned long flags; + int ret = 0; timr = lock_timer(timer_id, &flags); if (!timr) return -EINVAL; - CLOCK_DISPATCH(timr->it_clock, timer_get, (timr, &cur_setting)); + kc = clockid_to_kclock(timr->it_clock); + if (WARN_ON_ONCE(!kc || !kc->timer_get)) + ret = -EINVAL; + else + kc->timer_get(timr, &cur_setting); unlock_timer(timr, flags); - if (copy_to_user(setting, &cur_setting, sizeof (cur_setting))) + if (!ret && copy_to_user(setting, &cur_setting, sizeof (cur_setting))) return -EFAULT; - return 0; + return ret; } /* -- cgit v1.2.3-71-gd317 From 6761c6702e2c647582e1829abe8cf90794f61d9d Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 1 Feb 2011 13:52:07 +0000 Subject: posix-timers: Convert timer_delete() to clockid_to_kclock() Set the common function for CLOCK_MONOTONIC and CLOCK_REALTIME kclocks and use the new decoding function. Signed-off-by: Thomas Gleixner Acked-by: John Stultz Tested-by: Richard Cochran LKML-Reference: <20110201134419.198999420@linutronix.de> --- kernel/posix-timers.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'kernel') diff --git a/kernel/posix-timers.c b/kernel/posix-timers.c index c1e2636f9e45..ade7dec49f96 100644 --- a/kernel/posix-timers.c +++ b/kernel/posix-timers.c @@ -254,6 +254,7 @@ static __init int init_posix_timers(void) .timer_create = common_timer_create, .timer_set = common_timer_set, .timer_get = common_timer_get, + .timer_del = common_timer_del, }; struct k_clock clock_monotonic = { .clock_getres = hrtimer_get_res, @@ -263,6 +264,7 @@ static __init int init_posix_timers(void) .timer_create = common_timer_create, .timer_set = common_timer_set, .timer_get = common_timer_get, + .timer_del = common_timer_del, }; struct k_clock clock_monotonic_raw = { .clock_getres = hrtimer_get_res, @@ -859,7 +861,7 @@ retry: return error; } -static inline int common_timer_del(struct k_itimer *timer) +static int common_timer_del(struct k_itimer *timer) { timer->it.real.interval.tv64 = 0; @@ -870,7 +872,11 @@ static inline int common_timer_del(struct k_itimer *timer) static inline int timer_delete_hook(struct k_itimer *timer) { - return CLOCK_DISPATCH(timer->it_clock, timer_del, (timer)); + struct k_clock *kc = clockid_to_kclock(timer->it_clock); + + if (WARN_ON_ONCE(!kc || !kc->timer_del)) + return -EINVAL; + return kc->timer_del(timer); } /* Delete a POSIX.1b interval timer. */ -- cgit v1.2.3-71-gd317 From 0aa3975f02ce78f27be3076fbfa3d94ae5a659d5 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 1 Feb 2011 13:52:09 +0000 Subject: posix-timers: Remove CLOCK_DISPATCH leftovers All users gone. Remove the cruft. Huge thanks to Richard Cochran who tackled that maze first. Signed-off-by: Thomas Gleixner Acked-by: John Stultz Tested-by: Richard Cochran LKML-Reference: <20110201134419.294620613@linutronix.de> --- kernel/posix-timers.c | 22 ---------------------- 1 file changed, 22 deletions(-) (limited to 'kernel') diff --git a/kernel/posix-timers.c b/kernel/posix-timers.c index ade7dec49f96..ad154dfd7c51 100644 --- a/kernel/posix-timers.c +++ b/kernel/posix-timers.c @@ -167,28 +167,6 @@ static inline void unlock_timer(struct k_itimer *timr, unsigned long flags) spin_unlock_irqrestore(&timr->it_lock, flags); } -/* - * Call the k_clock hook function if non-null, or the default function. - */ -#define CLOCK_DISPATCH(clock, call, arglist) \ - ((clock) < 0 ? posix_cpu_##call arglist : \ - (posix_clocks[clock].call != NULL \ - ? (*posix_clocks[clock].call) arglist : common_##call arglist)) - -/* - * Return nonzero if we know a priori this clockid_t value is bogus. - */ -static inline int invalid_clockid(const clockid_t which_clock) -{ - if (which_clock < 0) /* CPU clock, posix_cpu_* will check it */ - return 0; - if ((unsigned) which_clock >= MAX_CLOCKS) - return 1; - if (posix_clocks[which_clock].clock_getres != NULL) - return 0; - return 1; -} - /* Get clock_realtime */ static int posix_clock_realtime_get(clockid_t which_clock, struct timespec *tp) { -- cgit v1.2.3-71-gd317 From bc2c8ea483d73e95fc88f1fc9e7755180f89b892 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 1 Feb 2011 13:52:12 +0000 Subject: posix-timers: Make posix-cpu-timers functions static All functions are accessed via clock_posix_cpu now. So make them static. Signed-off-by: Thomas Gleixner Acked-by: John Stultz Tested-by: Richard Cochran LKML-Reference: <20110201134419.389755466@linutronix.de> --- include/linux/posix-timers.h | 12 ------------ kernel/posix-cpu-timers.c | 27 +++++++++++++++------------ 2 files changed, 15 insertions(+), 24 deletions(-) (limited to 'kernel') diff --git a/include/linux/posix-timers.h b/include/linux/posix-timers.h index ef574d177fb6..8206255a547c 100644 --- a/include/linux/posix-timers.h +++ b/include/linux/posix-timers.h @@ -91,18 +91,6 @@ void register_posix_clock(const clockid_t clock_id, struct k_clock *new_clock); /* function to call to trigger timer event */ int posix_timer_event(struct k_itimer *timr, int si_private); -int posix_cpu_clock_getres(const clockid_t which_clock, struct timespec *ts); -int posix_cpu_clock_get(const clockid_t which_clock, struct timespec *ts); -int posix_cpu_clock_set(const clockid_t which_clock, const struct timespec *ts); -int posix_cpu_timer_create(struct k_itimer *timer); -int posix_cpu_nsleep(const clockid_t which_clock, int flags, - struct timespec *rqtp, struct timespec __user *rmtp); -long posix_cpu_nsleep_restart(struct restart_block *restart_block); -int posix_cpu_timer_set(struct k_itimer *timer, int flags, - struct itimerspec *new, struct itimerspec *old); -int posix_cpu_timer_del(struct k_itimer *timer); -void posix_cpu_timer_get(struct k_itimer *timer, struct itimerspec *itp); - void posix_cpu_timer_schedule(struct k_itimer *timer); void run_posix_cpu_timers(struct task_struct *task); diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c index 504fbab10b82..609e187f43e7 100644 --- a/kernel/posix-cpu-timers.c +++ b/kernel/posix-cpu-timers.c @@ -176,7 +176,8 @@ static inline cputime_t virt_ticks(struct task_struct *p) return p->utime; } -int posix_cpu_clock_getres(const clockid_t which_clock, struct timespec *tp) +static int +posix_cpu_clock_getres(const clockid_t which_clock, struct timespec *tp) { int error = check_clock(which_clock); if (!error) { @@ -194,7 +195,8 @@ int posix_cpu_clock_getres(const clockid_t which_clock, struct timespec *tp) return error; } -int posix_cpu_clock_set(const clockid_t which_clock, const struct timespec *tp) +static int +posix_cpu_clock_set(const clockid_t which_clock, const struct timespec *tp) { /* * You can never reset a CPU clock, but we check for other errors @@ -317,7 +319,7 @@ static int cpu_clock_sample_group(const clockid_t which_clock, } -int posix_cpu_clock_get(const clockid_t which_clock, struct timespec *tp) +static int posix_cpu_clock_get(const clockid_t which_clock, struct timespec *tp) { const pid_t pid = CPUCLOCK_PID(which_clock); int error = -EINVAL; @@ -379,7 +381,7 @@ int posix_cpu_clock_get(const clockid_t which_clock, struct timespec *tp) * This is called from sys_timer_create() and do_cpu_nanosleep() with the * new timer already all-zeros initialized. */ -int posix_cpu_timer_create(struct k_itimer *new_timer) +static int posix_cpu_timer_create(struct k_itimer *new_timer) { int ret = 0; const pid_t pid = CPUCLOCK_PID(new_timer->it_clock); @@ -425,7 +427,7 @@ int posix_cpu_timer_create(struct k_itimer *new_timer) * If we return TIMER_RETRY, it's necessary to release the timer's lock * and try again. (This happens when the timer is in the middle of firing.) */ -int posix_cpu_timer_del(struct k_itimer *timer) +static int posix_cpu_timer_del(struct k_itimer *timer) { struct task_struct *p = timer->it.cpu.task; int ret = 0; @@ -665,8 +667,8 @@ static int cpu_timer_sample_group(const clockid_t which_clock, * If we return TIMER_RETRY, it's necessary to release the timer's lock * and try again. (This happens when the timer is in the middle of firing.) */ -int posix_cpu_timer_set(struct k_itimer *timer, int flags, - struct itimerspec *new, struct itimerspec *old) +static int posix_cpu_timer_set(struct k_itimer *timer, int flags, + struct itimerspec *new, struct itimerspec *old) { struct task_struct *p = timer->it.cpu.task; union cpu_time_count old_expires, new_expires, old_incr, val; @@ -820,7 +822,7 @@ int posix_cpu_timer_set(struct k_itimer *timer, int flags, return ret; } -void posix_cpu_timer_get(struct k_itimer *timer, struct itimerspec *itp) +static void posix_cpu_timer_get(struct k_itimer *timer, struct itimerspec *itp) { union cpu_time_count now; struct task_struct *p = timer->it.cpu.task; @@ -1481,8 +1483,10 @@ static int do_cpu_nanosleep(const clockid_t which_clock, int flags, return error; } -int posix_cpu_nsleep(const clockid_t which_clock, int flags, - struct timespec *rqtp, struct timespec __user *rmtp) +static long posix_cpu_nsleep_restart(struct restart_block *restart_block); + +static int posix_cpu_nsleep(const clockid_t which_clock, int flags, + struct timespec *rqtp, struct timespec __user *rmtp) { struct restart_block *restart_block = ¤t_thread_info()->restart_block; @@ -1517,7 +1521,7 @@ int posix_cpu_nsleep(const clockid_t which_clock, int flags, return error; } -long posix_cpu_nsleep_restart(struct restart_block *restart_block) +static long posix_cpu_nsleep_restart(struct restart_block *restart_block) { clockid_t which_clock = restart_block->nanosleep.index; struct timespec t; @@ -1542,7 +1546,6 @@ long posix_cpu_nsleep_restart(struct restart_block *restart_block) } - #define PROCESS_CLOCK MAKE_PROCESS_CPUCLOCK(0, CPUCLOCK_SCHED) #define THREAD_CLOCK MAKE_THREAD_CPUCLOCK(0, CPUCLOCK_SCHED) -- cgit v1.2.3-71-gd317 From 0061748dd2400d0bcd4d49d258db5d7b5d106ca0 Mon Sep 17 00:00:00 2001 From: Richard Cochran Date: Tue, 1 Feb 2011 13:52:15 +0000 Subject: posix-timer: Update comment Pick the cleanup to the comment in posix-timers.c from Richards all in one conversion patch. Originally-from: Richard Cochran Signed-off-by: Thomas Gleixner Acked-by: John Stultz LKML-Reference: <20110201134419.487708516@linutronix.de> --- kernel/posix-timers.c | 25 +++++++------------------ 1 file changed, 7 insertions(+), 18 deletions(-) (limited to 'kernel') diff --git a/kernel/posix-timers.c b/kernel/posix-timers.c index ad154dfd7c51..a3fdfd4be0ec 100644 --- a/kernel/posix-timers.c +++ b/kernel/posix-timers.c @@ -102,11 +102,7 @@ static DEFINE_SPINLOCK(idr_lock); /* * CLOCKs: The POSIX standard calls for a couple of clocks and allows us * to implement others. This structure defines the various - * clocks and allows the possibility of adding others. We - * provide an interface to add clocks to the table and expect - * the "arch" code to add at least one clock that is high - * resolution. Here we define the standard CLOCK_REALTIME as a - * 1/HZ resolution clock. + * clocks. * * RESOLUTION: Clock resolution is used to round up timer and interval * times, NOT to report clock times, which are reported with as @@ -116,20 +112,13 @@ static DEFINE_SPINLOCK(idr_lock); * necessary code is written. The standard says we should say * something about this issue in the documentation... * - * FUNCTIONS: The CLOCKs structure defines possible functions to handle - * various clock functions. For clocks that use the standard - * system timer code these entries should be NULL. This will - * allow dispatch without the overhead of indirect function - * calls. CLOCKS that depend on other sources (e.g. WWV or GPS) - * must supply functions here, even if the function just returns - * ENOSYS. The standard POSIX timer management code assumes the - * following: 1.) The k_itimer struct (sched.h) is used for the - * timer. 2.) The list, it_lock, it_clock, it_id and it_pid - * fields are not modified by timer code. + * FUNCTIONS: The CLOCKs structure defines possible functions to + * handle various clock functions. * - * At this time all functions EXCEPT clock_nanosleep can be - * redirected by the CLOCKS structure. Clock_nanosleep is in - * there, but the code ignores it. + * The standard POSIX timer management code assumes the + * following: 1.) The k_itimer struct (sched.h) is used for + * the timer. 2.) The list, it_lock, it_clock, it_id and + * it_pid fields are not modified by timer code. * * Permissions: It is assumed that the clock_settime() function defined * for each clock will take care of permission checks. Some -- cgit v1.2.3-71-gd317 From c528f7c6c208f1fae6b4025957173dec045e5f21 Mon Sep 17 00:00:00 2001 From: John Stultz Date: Tue, 1 Feb 2011 13:52:17 +0000 Subject: time: Introduce timekeeping_inject_offset This adds a kernel-internal timekeeping interface to add or subtract a fixed amount from CLOCK_REALTIME. This makes it so kernel users or interfaces trying to do so do not have to read the time, then add an offset and then call settimeofday(), which adds some extra error in comparision to just simply adding the offset in the kernel timekeeping core. Signed-off-by: John Stultz Signed-off-by: Richard Cochran LKML-Reference: <20110201134419.584311693@linutronix.de> Signed-off-by: Thomas Gleixner --- include/linux/time.h | 1 + kernel/time/timekeeping.c | 36 ++++++++++++++++++++++++++++++++++++ 2 files changed, 37 insertions(+) (limited to 'kernel') diff --git a/include/linux/time.h b/include/linux/time.h index 7c44e7778033..379b9037b5b4 100644 --- a/include/linux/time.h +++ b/include/linux/time.h @@ -166,6 +166,7 @@ extern struct timespec timespec_trunc(struct timespec t, unsigned gran); extern int timekeeping_valid_for_hres(void); extern u64 timekeeping_max_deferment(void); extern void timekeeping_leap_insert(int leapsecond); +extern int timekeeping_inject_offset(struct timespec *ts); struct tms; extern void do_sys_times(struct tms *); diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index 4f9f65b91323..6262c1d18397 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -387,6 +387,42 @@ int do_settimeofday(const struct timespec *tv) EXPORT_SYMBOL(do_settimeofday); + +/** + * timekeeping_inject_offset - Adds or subtracts from the current time. + * @tv: pointer to the timespec variable containing the offset + * + * Adds or subtracts an offset value from the current time. + */ +int timekeeping_inject_offset(struct timespec *ts) +{ + unsigned long flags; + + if ((unsigned long)ts->tv_nsec >= NSEC_PER_SEC) + return -EINVAL; + + write_seqlock_irqsave(&xtime_lock, flags); + + timekeeping_forward_now(); + + xtime = timespec_add(xtime, *ts); + wall_to_monotonic = timespec_sub(wall_to_monotonic, *ts); + + timekeeper.ntp_error = 0; + ntp_clear(); + + update_vsyscall(&xtime, &wall_to_monotonic, timekeeper.clock, + timekeeper.mult); + + write_sequnlock_irqrestore(&xtime_lock, flags); + + /* signal hrtimers about time change */ + clock_was_set(); + + return 0; +} +EXPORT_SYMBOL(timekeeping_inject_offset); + /** * change_clocksource - Swaps clocksources if a new one is available * -- cgit v1.2.3-71-gd317 From 094aa1881fdc1b8889b442eb3511b31f3ec2b762 Mon Sep 17 00:00:00 2001 From: Richard Cochran Date: Tue, 1 Feb 2011 13:52:20 +0000 Subject: ntp: Add ADJ_SETOFFSET mode bit This patch adds a new mode bit into the timex structure. When set, the bit instructs the kernel to add the given time value to the current time. Signed-off-by: Richard Cochran Acked-by: John Stultz LKML-Reference: <20110201134320.688829863@linutronix.de> Signed-off-by: Thomas Gleixner --- include/linux/timex.h | 3 ++- kernel/time/ntp.c | 11 +++++++++++ 2 files changed, 13 insertions(+), 1 deletion(-) (limited to 'kernel') diff --git a/include/linux/timex.h b/include/linux/timex.h index d23999f9499d..aa60fe7b6ed6 100644 --- a/include/linux/timex.h +++ b/include/linux/timex.h @@ -73,7 +73,7 @@ struct timex { long tolerance; /* clock frequency tolerance (ppm) * (read only) */ - struct timeval time; /* (read only) */ + struct timeval time; /* (read only, except for ADJ_SETOFFSET) */ long tick; /* (modified) usecs between clock ticks */ long ppsfreq; /* pps frequency (scaled ppm) (ro) */ @@ -102,6 +102,7 @@ struct timex { #define ADJ_STATUS 0x0010 /* clock status */ #define ADJ_TIMECONST 0x0020 /* pll time constant */ #define ADJ_TAI 0x0080 /* set TAI offset */ +#define ADJ_SETOFFSET 0x0100 /* add 'time' to current time */ #define ADJ_MICRO 0x1000 /* select microsecond resolution */ #define ADJ_NANO 0x2000 /* select nanosecond resolution */ #define ADJ_TICK 0x4000 /* tick value */ diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c index ed8cfdf16983..5ac593267a26 100644 --- a/kernel/time/ntp.c +++ b/kernel/time/ntp.c @@ -648,6 +648,17 @@ int do_adjtimex(struct timex *txc) hrtimer_cancel(&leap_timer); } + if (txc->modes & ADJ_SETOFFSET) { + struct timespec delta; + if ((unsigned long)txc->time.tv_usec >= NSEC_PER_SEC) + return -EINVAL; + delta.tv_sec = txc->time.tv_sec; + delta.tv_nsec = txc->time.tv_usec; + if (!(txc->modes & ADJ_NANO)) + delta.tv_nsec *= 1000; + timekeeping_inject_offset(&delta); + } + getnstimeofday(&ts); write_seqlock_irq(&xtime_lock); -- cgit v1.2.3-71-gd317 From 65f5d80bdf83ec0d7f3887db10153bf3f36ed73c Mon Sep 17 00:00:00 2001 From: Richard Cochran Date: Tue, 1 Feb 2011 13:52:23 +0000 Subject: time: Splitout compat timex accessors Split out the compat timex accessors into separate functions. Preparatory patch for a new syscall. [ tglx: Split that patch from Richards "posix-timers: Introduce a syscall for clock tuning.". Keeps the changes strictly separate ] Originally-from: Richard Cochran Acked-by: John Stultz Signed-off-by: Thomas Gleixner LKML-Reference: <20110201134419.772343089@linutronix.de> --- kernel/compat.c | 113 ++++++++++++++++++++++++++++++++------------------------ 1 file changed, 65 insertions(+), 48 deletions(-) (limited to 'kernel') diff --git a/kernel/compat.c b/kernel/compat.c index c9e2ec0b34a8..449e853cf41d 100644 --- a/kernel/compat.c +++ b/kernel/compat.c @@ -52,6 +52,64 @@ static int compat_put_timeval(struct compat_timeval __user *o, put_user(i->tv_usec, &o->tv_usec)) ? -EFAULT : 0; } +static int compat_get_timex(struct timex *txc, struct compat_timex __user *utp) +{ + memset(txc, 0, sizeof(struct timex)); + + if (!access_ok(VERIFY_READ, utp, sizeof(struct compat_timex)) || + __get_user(txc->modes, &utp->modes) || + __get_user(txc->offset, &utp->offset) || + __get_user(txc->freq, &utp->freq) || + __get_user(txc->maxerror, &utp->maxerror) || + __get_user(txc->esterror, &utp->esterror) || + __get_user(txc->status, &utp->status) || + __get_user(txc->constant, &utp->constant) || + __get_user(txc->precision, &utp->precision) || + __get_user(txc->tolerance, &utp->tolerance) || + __get_user(txc->time.tv_sec, &utp->time.tv_sec) || + __get_user(txc->time.tv_usec, &utp->time.tv_usec) || + __get_user(txc->tick, &utp->tick) || + __get_user(txc->ppsfreq, &utp->ppsfreq) || + __get_user(txc->jitter, &utp->jitter) || + __get_user(txc->shift, &utp->shift) || + __get_user(txc->stabil, &utp->stabil) || + __get_user(txc->jitcnt, &utp->jitcnt) || + __get_user(txc->calcnt, &utp->calcnt) || + __get_user(txc->errcnt, &utp->errcnt) || + __get_user(txc->stbcnt, &utp->stbcnt)) + return -EFAULT; + + return 0; +} + +static int compat_put_timex(struct compat_timex __user *utp, struct timex *txc) +{ + if (!access_ok(VERIFY_WRITE, utp, sizeof(struct compat_timex)) || + __put_user(txc->modes, &utp->modes) || + __put_user(txc->offset, &utp->offset) || + __put_user(txc->freq, &utp->freq) || + __put_user(txc->maxerror, &utp->maxerror) || + __put_user(txc->esterror, &utp->esterror) || + __put_user(txc->status, &utp->status) || + __put_user(txc->constant, &utp->constant) || + __put_user(txc->precision, &utp->precision) || + __put_user(txc->tolerance, &utp->tolerance) || + __put_user(txc->time.tv_sec, &utp->time.tv_sec) || + __put_user(txc->time.tv_usec, &utp->time.tv_usec) || + __put_user(txc->tick, &utp->tick) || + __put_user(txc->ppsfreq, &utp->ppsfreq) || + __put_user(txc->jitter, &utp->jitter) || + __put_user(txc->shift, &utp->shift) || + __put_user(txc->stabil, &utp->stabil) || + __put_user(txc->jitcnt, &utp->jitcnt) || + __put_user(txc->calcnt, &utp->calcnt) || + __put_user(txc->errcnt, &utp->errcnt) || + __put_user(txc->stbcnt, &utp->stbcnt) || + __put_user(txc->tai, &utp->tai)) + return -EFAULT; + return 0; +} + asmlinkage long compat_sys_gettimeofday(struct compat_timeval __user *tv, struct timezone __user *tz) { @@ -951,58 +1009,17 @@ asmlinkage long compat_sys_rt_sigsuspend(compat_sigset_t __user *unewset, compat asmlinkage long compat_sys_adjtimex(struct compat_timex __user *utp) { struct timex txc; - int ret; + int err, ret; - memset(&txc, 0, sizeof(struct timex)); - - if (!access_ok(VERIFY_READ, utp, sizeof(struct compat_timex)) || - __get_user(txc.modes, &utp->modes) || - __get_user(txc.offset, &utp->offset) || - __get_user(txc.freq, &utp->freq) || - __get_user(txc.maxerror, &utp->maxerror) || - __get_user(txc.esterror, &utp->esterror) || - __get_user(txc.status, &utp->status) || - __get_user(txc.constant, &utp->constant) || - __get_user(txc.precision, &utp->precision) || - __get_user(txc.tolerance, &utp->tolerance) || - __get_user(txc.time.tv_sec, &utp->time.tv_sec) || - __get_user(txc.time.tv_usec, &utp->time.tv_usec) || - __get_user(txc.tick, &utp->tick) || - __get_user(txc.ppsfreq, &utp->ppsfreq) || - __get_user(txc.jitter, &utp->jitter) || - __get_user(txc.shift, &utp->shift) || - __get_user(txc.stabil, &utp->stabil) || - __get_user(txc.jitcnt, &utp->jitcnt) || - __get_user(txc.calcnt, &utp->calcnt) || - __get_user(txc.errcnt, &utp->errcnt) || - __get_user(txc.stbcnt, &utp->stbcnt)) - return -EFAULT; + err = compat_get_timex(&txc, utp); + if (err) + return err; ret = do_adjtimex(&txc); - if (!access_ok(VERIFY_WRITE, utp, sizeof(struct compat_timex)) || - __put_user(txc.modes, &utp->modes) || - __put_user(txc.offset, &utp->offset) || - __put_user(txc.freq, &utp->freq) || - __put_user(txc.maxerror, &utp->maxerror) || - __put_user(txc.esterror, &utp->esterror) || - __put_user(txc.status, &utp->status) || - __put_user(txc.constant, &utp->constant) || - __put_user(txc.precision, &utp->precision) || - __put_user(txc.tolerance, &utp->tolerance) || - __put_user(txc.time.tv_sec, &utp->time.tv_sec) || - __put_user(txc.time.tv_usec, &utp->time.tv_usec) || - __put_user(txc.tick, &utp->tick) || - __put_user(txc.ppsfreq, &utp->ppsfreq) || - __put_user(txc.jitter, &utp->jitter) || - __put_user(txc.shift, &utp->shift) || - __put_user(txc.stabil, &utp->stabil) || - __put_user(txc.jitcnt, &utp->jitcnt) || - __put_user(txc.calcnt, &utp->calcnt) || - __put_user(txc.errcnt, &utp->errcnt) || - __put_user(txc.stbcnt, &utp->stbcnt) || - __put_user(txc.tai, &utp->tai)) - ret = -EFAULT; + err = compat_put_timex(utp, &txc); + if (err) + return err; return ret; } -- cgit v1.2.3-71-gd317 From f1f1d5ebd10ffa4242bce7a90a56a222d6b7bc77 Mon Sep 17 00:00:00 2001 From: Richard Cochran Date: Tue, 1 Feb 2011 13:52:26 +0000 Subject: posix-timers: Introduce a syscall for clock tuning. A new syscall is introduced that allows tuning of a POSIX clock. The new call, clock_adjtime, takes two parameters, the clock ID and a pointer to a struct timex. Any ADJTIMEX(2) operation may be requested via this system call, but various POSIX clocks may or may not support tuning. [ tglx: Adapted to the posix-timer cleanup series. Avoid copy_to_user in the error case ] Signed-off-by: Richard Cochran Acked-by: John Stultz LKML-Reference: <20110201134419.869804645@linutronix.de> Signed-off-by: Thomas Gleixner --- include/linux/posix-timers.h | 2 ++ include/linux/syscalls.h | 2 ++ kernel/compat.c | 23 +++++++++++++++++++++++ kernel/posix-timers.c | 30 ++++++++++++++++++++++++++++++ 4 files changed, 57 insertions(+) (limited to 'kernel') diff --git a/include/linux/posix-timers.h b/include/linux/posix-timers.h index 8206255a547c..79a1cea7f6ed 100644 --- a/include/linux/posix-timers.h +++ b/include/linux/posix-timers.h @@ -4,6 +4,7 @@ #include #include #include +#include union cpu_time_count { cputime_t cpu; @@ -71,6 +72,7 @@ struct k_clock { int (*clock_set) (const clockid_t which_clock, const struct timespec *tp); int (*clock_get) (const clockid_t which_clock, struct timespec * tp); + int (*clock_adj) (const clockid_t which_clock, struct timex *tx); int (*timer_create) (struct k_itimer *timer); int (*nsleep) (const clockid_t which_clock, int flags, struct timespec *, struct timespec __user *); diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 18cd0684fc4e..bfacab921239 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -311,6 +311,8 @@ asmlinkage long sys_clock_settime(clockid_t which_clock, const struct timespec __user *tp); asmlinkage long sys_clock_gettime(clockid_t which_clock, struct timespec __user *tp); +asmlinkage long sys_clock_adjtime(clockid_t which_clock, + struct timex __user *tx); asmlinkage long sys_clock_getres(clockid_t which_clock, struct timespec __user *tp); asmlinkage long sys_clock_nanosleep(clockid_t which_clock, int flags, diff --git a/kernel/compat.c b/kernel/compat.c index 449e853cf41d..38b1d2c1cbe8 100644 --- a/kernel/compat.c +++ b/kernel/compat.c @@ -675,6 +675,29 @@ long compat_sys_clock_gettime(clockid_t which_clock, return err; } +long compat_sys_clock_adjtime(clockid_t which_clock, + struct compat_timex __user *utp) +{ + struct timex txc; + mm_segment_t oldfs; + int err, ret; + + err = compat_get_timex(&txc, utp); + if (err) + return err; + + oldfs = get_fs(); + set_fs(KERNEL_DS); + ret = sys_clock_adjtime(which_clock, (struct timex __user *) &txc); + set_fs(oldfs); + + err = compat_put_timex(utp, &txc); + if (err) + return err; + + return ret; +} + long compat_sys_clock_getres(clockid_t which_clock, struct compat_timespec __user *tp) { diff --git a/kernel/posix-timers.c b/kernel/posix-timers.c index a3fdfd4be0ec..5a5a4f1c0971 100644 --- a/kernel/posix-timers.c +++ b/kernel/posix-timers.c @@ -170,6 +170,12 @@ static int posix_clock_realtime_set(const clockid_t which_clock, return do_sys_settimeofday(tp, NULL); } +static int posix_clock_realtime_adj(const clockid_t which_clock, + struct timex *t) +{ + return do_adjtimex(t); +} + /* * Get monotonic time for posix timers */ @@ -216,6 +222,7 @@ static __init int init_posix_timers(void) .clock_getres = hrtimer_get_res, .clock_get = posix_clock_realtime_get, .clock_set = posix_clock_realtime_set, + .clock_adj = posix_clock_realtime_adj, .nsleep = common_nsleep, .nsleep_restart = hrtimer_nanosleep_restart, .timer_create = common_timer_create, @@ -948,6 +955,29 @@ SYSCALL_DEFINE2(clock_gettime, const clockid_t, which_clock, return error; } +SYSCALL_DEFINE2(clock_adjtime, const clockid_t, which_clock, + struct timex __user *, utx) +{ + struct k_clock *kc = clockid_to_kclock(which_clock); + struct timex ktx; + int err; + + if (!kc) + return -EINVAL; + if (!kc->clock_adj) + return -EOPNOTSUPP; + + if (copy_from_user(&ktx, utx, sizeof(ktx))) + return -EFAULT; + + err = kc->clock_adj(which_clock, &ktx); + + if (!err && copy_to_user(utx, &ktx, sizeof(ktx))) + return -EFAULT; + + return err; +} + SYSCALL_DEFINE2(clock_getres, const clockid_t, which_clock, struct timespec __user *, tp) { -- cgit v1.2.3-71-gd317 From 81e294cba2596f5f10848bbe19d98b344c2a2d5c Mon Sep 17 00:00:00 2001 From: Richard Cochran Date: Tue, 1 Feb 2011 13:52:32 +0000 Subject: posix-timers: Add support for fd based clocks Extend the negative clockids which are currently used by posix cpu timers to encode the PID with a file descriptor based type which encodes the fd in the upper bits. Originally-from: Richard Cochran Signed-off-by: Thomas Gleixner Acked-by: John Stultz LKML-Reference: <20110201134420.062860200@linutronix.de> --- include/linux/posix-timers.h | 13 +++++++++++++ kernel/posix-timers.c | 2 +- 2 files changed, 14 insertions(+), 1 deletion(-) (limited to 'kernel') diff --git a/include/linux/posix-timers.h b/include/linux/posix-timers.h index 79a1cea7f6ed..88b9256169f8 100644 --- a/include/linux/posix-timers.h +++ b/include/linux/posix-timers.h @@ -18,6 +18,17 @@ struct cpu_timer_list { int firing; }; +/* + * Bit fields within a clockid: + * + * The most significant 29 bits hold either a pid or a file descriptor. + * + * Bit 2 indicates whether a cpu clock refers to a thread or a process. + * + * Bits 1 and 0 give the type: PROF=0, VIRT=1, SCHED=2, or FD=3. + * + * A clockid is invalid if bits 2, 1, and 0 are all set. + */ #define CPUCLOCK_PID(clock) ((pid_t) ~((clock) >> 3)) #define CPUCLOCK_PERTHREAD(clock) \ (((clock) & (clockid_t) CPUCLOCK_PERTHREAD_MASK) != 0) @@ -29,6 +40,8 @@ struct cpu_timer_list { #define CPUCLOCK_VIRT 1 #define CPUCLOCK_SCHED 2 #define CPUCLOCK_MAX 3 +#define CLOCKFD CPUCLOCK_MAX +#define CLOCKFD_MASK (CPUCLOCK_PERTHREAD_MASK|CPUCLOCK_CLOCK_MASK) #define MAKE_PROCESS_CPUCLOCK(pid, clock) \ ((~(clockid_t) (pid) << 3) | (clockid_t) (clock)) diff --git a/kernel/posix-timers.c b/kernel/posix-timers.c index 5a5a4f1c0971..df629d853a81 100644 --- a/kernel/posix-timers.c +++ b/kernel/posix-timers.c @@ -488,7 +488,7 @@ static void release_posix_timer(struct k_itimer *tmr, int it_id_set) static struct k_clock *clockid_to_kclock(const clockid_t id) { if (id < 0) - return &clock_posix_cpu; + return (id & CLOCKFD_MASK) == CLOCKFD ? NULL : &clock_posix_cpu; if (id >= MAX_CLOCKS || !posix_clocks[id].clock_getres) return NULL; -- cgit v1.2.3-71-gd317 From 527087374faa488776a789375a7d6ea74fda6f71 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 2 Feb 2011 12:10:09 +0100 Subject: posix-timers: Cleanup namespace Rename register_posix_clock() to posix_timers_register_clock(). That's what the function really does. As a side effect this cleans up the posix_clock namespace for the upcoming dynamic posix_clock infrastructure. Signed-off-by: Thomas Gleixner Tested-by: Richard Cochran Cc: John Stultz LKML-Reference: --- drivers/char/mmtimer.c | 2 +- include/linux/posix-timers.h | 2 +- kernel/posix-cpu-timers.c | 4 ++-- kernel/posix-timers.c | 15 ++++++++------- 4 files changed, 12 insertions(+), 11 deletions(-) (limited to 'kernel') diff --git a/drivers/char/mmtimer.c b/drivers/char/mmtimer.c index ff41eb3eec92..33dc2298af73 100644 --- a/drivers/char/mmtimer.c +++ b/drivers/char/mmtimer.c @@ -840,7 +840,7 @@ static int __init mmtimer_init(void) } sgi_clock_period = NSEC_PER_SEC / sn_rtc_cycles_per_second; - register_posix_clock(CLOCK_SGI_CYCLE, &sgi_clock); + posix_timers_register_clock(CLOCK_SGI_CYCLE, &sgi_clock); printk(KERN_INFO "%s: v%s, %ld MHz\n", MMTIMER_DESC, MMTIMER_VERSION, sn_rtc_cycles_per_second/(unsigned long)1E6); diff --git a/include/linux/posix-timers.h b/include/linux/posix-timers.h index 88b9256169f8..9d6ffe2c92e5 100644 --- a/include/linux/posix-timers.h +++ b/include/linux/posix-timers.h @@ -101,7 +101,7 @@ struct k_clock { extern struct k_clock clock_posix_cpu; -void register_posix_clock(const clockid_t clock_id, struct k_clock *new_clock); +void posix_timers_register_clock(const clockid_t clock_id, struct k_clock *new_clock); /* function to call to trigger timer event */ int posix_timer_event(struct k_itimer *timr, int si_private); diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c index 609e187f43e7..67fea9d25d55 100644 --- a/kernel/posix-cpu-timers.c +++ b/kernel/posix-cpu-timers.c @@ -1618,8 +1618,8 @@ static __init int init_posix_cpu_timers(void) }; struct timespec ts; - register_posix_clock(CLOCK_PROCESS_CPUTIME_ID, &process); - register_posix_clock(CLOCK_THREAD_CPUTIME_ID, &thread); + posix_timers_register_clock(CLOCK_PROCESS_CPUTIME_ID, &process); + posix_timers_register_clock(CLOCK_THREAD_CPUTIME_ID, &thread); cputime_to_timespec(cputime_one_jiffy, &ts); onecputick = ts.tv_nsec; diff --git a/kernel/posix-timers.c b/kernel/posix-timers.c index df629d853a81..af936fd37140 100644 --- a/kernel/posix-timers.c +++ b/kernel/posix-timers.c @@ -253,11 +253,11 @@ static __init int init_posix_timers(void) .clock_get = posix_get_monotonic_coarse, }; - register_posix_clock(CLOCK_REALTIME, &clock_realtime); - register_posix_clock(CLOCK_MONOTONIC, &clock_monotonic); - register_posix_clock(CLOCK_MONOTONIC_RAW, &clock_monotonic_raw); - register_posix_clock(CLOCK_REALTIME_COARSE, &clock_realtime_coarse); - register_posix_clock(CLOCK_MONOTONIC_COARSE, &clock_monotonic_coarse); + posix_timers_register_clock(CLOCK_REALTIME, &clock_realtime); + posix_timers_register_clock(CLOCK_MONOTONIC, &clock_monotonic); + posix_timers_register_clock(CLOCK_MONOTONIC_RAW, &clock_monotonic_raw); + posix_timers_register_clock(CLOCK_REALTIME_COARSE, &clock_realtime_coarse); + posix_timers_register_clock(CLOCK_MONOTONIC_COARSE, &clock_monotonic_coarse); posix_timers_cache = kmem_cache_create("posix_timers_cache", sizeof (struct k_itimer), 0, SLAB_PANIC, @@ -433,7 +433,8 @@ static struct pid *good_sigevent(sigevent_t * event) return task_pid(rtn); } -void register_posix_clock(const clockid_t clock_id, struct k_clock *new_clock) +void posix_timers_register_clock(const clockid_t clock_id, + struct k_clock *new_clock) { if ((unsigned) clock_id >= MAX_CLOCKS) { printk(KERN_WARNING "POSIX clock register failed for clock_id %d\n", @@ -454,7 +455,7 @@ void register_posix_clock(const clockid_t clock_id, struct k_clock *new_clock) posix_clocks[clock_id] = *new_clock; } -EXPORT_SYMBOL_GPL(register_posix_clock); +EXPORT_SYMBOL_GPL(posix_timers_register_clock); static struct k_itimer * alloc_posix_timer(void) { -- cgit v1.2.3-71-gd317 From 0606f422b453f76c31ab2b1bd52943ff06a2dcf2 Mon Sep 17 00:00:00 2001 From: Richard Cochran Date: Tue, 1 Feb 2011 13:52:35 +0000 Subject: posix clocks: Introduce dynamic clocks This patch adds support for adding and removing posix clocks. The clock lifetime cycle is patterned after usb devices. Each clock is represented by a standard character device. In addition, the driver may optionally implement custom character device operations. The posix clock and timer system calls listed below now work with dynamic posix clocks, as well as the traditional static clocks. The following system calls are affected: - clock_adjtime (brand new syscall) - clock_gettime - clock_getres - clock_settime - timer_create - timer_delete - timer_gettime - timer_settime [ tglx: Adapted to the posix-timer cleanup. Moved clock_posix_dynamic to posix-clock.c and made all referenced functions static ] Signed-off-by: Richard Cochran Acked-by: John Stultz LKML-Reference: <20110201134420.164172635@linutronix.de> Signed-off-by: Thomas Gleixner --- include/linux/posix-clock.h | 150 +++++++++++++++ include/linux/posix-timers.h | 6 +- kernel/posix-timers.c | 4 +- kernel/time/Makefile | 3 +- kernel/time/posix-clock.c | 441 +++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 601 insertions(+), 3 deletions(-) create mode 100644 include/linux/posix-clock.h create mode 100644 kernel/time/posix-clock.c (limited to 'kernel') diff --git a/include/linux/posix-clock.h b/include/linux/posix-clock.h new file mode 100644 index 000000000000..369e19d3750b --- /dev/null +++ b/include/linux/posix-clock.h @@ -0,0 +1,150 @@ +/* + * posix-clock.h - support for dynamic clock devices + * + * Copyright (C) 2010 OMICRON electronics GmbH + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ +#ifndef _LINUX_POSIX_CLOCK_H_ +#define _LINUX_POSIX_CLOCK_H_ + +#include +#include +#include +#include + +struct posix_clock; + +/** + * struct posix_clock_operations - functional interface to the clock + * + * Every posix clock is represented by a character device. Drivers may + * optionally offer extended capabilities by implementing the + * character device methods. The character device file operations are + * first handled by the clock device layer, then passed on to the + * driver by calling these functions. + * + * @owner: The clock driver should set to THIS_MODULE + * @clock_adjtime: Adjust the clock + * @clock_gettime: Read the current time + * @clock_getres: Get the clock resolution + * @clock_settime: Set the current time value + * @timer_create: Create a new timer + * @timer_delete: Remove a previously created timer + * @timer_gettime: Get remaining time and interval of a timer + * @timer_setttime: Set a timer's initial expiration and interval + * @fasync: Optional character device fasync method + * @mmap: Optional character device mmap method + * @open: Optional character device open method + * @release: Optional character device release method + * @ioctl: Optional character device ioctl method + * @read: Optional character device read method + * @poll: Optional character device poll method + */ +struct posix_clock_operations { + struct module *owner; + + int (*clock_adjtime)(struct posix_clock *pc, struct timex *tx); + + int (*clock_gettime)(struct posix_clock *pc, struct timespec *ts); + + int (*clock_getres) (struct posix_clock *pc, struct timespec *ts); + + int (*clock_settime)(struct posix_clock *pc, + const struct timespec *ts); + + int (*timer_create) (struct posix_clock *pc, struct k_itimer *kit); + + int (*timer_delete) (struct posix_clock *pc, struct k_itimer *kit); + + void (*timer_gettime)(struct posix_clock *pc, + struct k_itimer *kit, struct itimerspec *tsp); + + int (*timer_settime)(struct posix_clock *pc, + struct k_itimer *kit, int flags, + struct itimerspec *tsp, struct itimerspec *old); + /* + * Optional character device methods: + */ + int (*fasync) (struct posix_clock *pc, + int fd, struct file *file, int on); + + long (*ioctl) (struct posix_clock *pc, + unsigned int cmd, unsigned long arg); + + int (*mmap) (struct posix_clock *pc, + struct vm_area_struct *vma); + + int (*open) (struct posix_clock *pc, fmode_t f_mode); + + uint (*poll) (struct posix_clock *pc, + struct file *file, poll_table *wait); + + int (*release) (struct posix_clock *pc); + + ssize_t (*read) (struct posix_clock *pc, + uint flags, char __user *buf, size_t cnt); +}; + +/** + * struct posix_clock - represents a dynamic posix clock + * + * @ops: Functional interface to the clock + * @cdev: Character device instance for this clock + * @kref: Reference count. + * @mutex: Protects the 'zombie' field from concurrent access. + * @zombie: If 'zombie' is true, then the hardware has disappeared. + * @release: A function to free the structure when the reference count reaches + * zero. May be NULL if structure is statically allocated. + * + * Drivers should embed their struct posix_clock within a private + * structure, obtaining a reference to it during callbacks using + * container_of(). + */ +struct posix_clock { + struct posix_clock_operations ops; + struct cdev cdev; + struct kref kref; + struct mutex mutex; + bool zombie; + void (*release)(struct posix_clock *clk); +}; + +/** + * posix_clock_register() - register a new clock + * @clk: Pointer to the clock. Caller must provide 'ops' and 'release' + * @devid: Allocated device id + * + * A clock driver calls this function to register itself with the + * clock device subsystem. If 'clk' points to dynamically allocated + * memory, then the caller must provide a 'release' function to free + * that memory. + * + * Returns zero on success, non-zero otherwise. + */ +int posix_clock_register(struct posix_clock *clk, dev_t devid); + +/** + * posix_clock_unregister() - unregister a clock + * @clk: Clock instance previously registered via posix_clock_register() + * + * A clock driver calls this function to remove itself from the clock + * device subsystem. The posix_clock itself will remain (in an + * inactive state) until its reference count drops to zero, at which + * point it will be deallocated with its 'release' method. + */ +void posix_clock_unregister(struct posix_clock *clk); + +#endif diff --git a/include/linux/posix-timers.h b/include/linux/posix-timers.h index 9d6ffe2c92e5..d51243ae0726 100644 --- a/include/linux/posix-timers.h +++ b/include/linux/posix-timers.h @@ -32,7 +32,7 @@ struct cpu_timer_list { #define CPUCLOCK_PID(clock) ((pid_t) ~((clock) >> 3)) #define CPUCLOCK_PERTHREAD(clock) \ (((clock) & (clockid_t) CPUCLOCK_PERTHREAD_MASK) != 0) -#define CPUCLOCK_PID_MASK 7 + #define CPUCLOCK_PERTHREAD_MASK 4 #define CPUCLOCK_WHICH(clock) ((clock) & (clockid_t) CPUCLOCK_CLOCK_MASK) #define CPUCLOCK_CLOCK_MASK 3 @@ -48,6 +48,9 @@ struct cpu_timer_list { #define MAKE_THREAD_CPUCLOCK(tid, clock) \ MAKE_PROCESS_CPUCLOCK((tid), (clock) | CPUCLOCK_PERTHREAD_MASK) +#define FD_TO_CLOCKID(fd) ((~(clockid_t) (fd) << 3) | CLOCKFD) +#define CLOCKID_TO_FD(clk) ((unsigned int) ~((clk) >> 3)) + /* POSIX.1b interval timer structure. */ struct k_itimer { struct list_head list; /* free/ allocate list */ @@ -100,6 +103,7 @@ struct k_clock { }; extern struct k_clock clock_posix_cpu; +extern struct k_clock clock_posix_dynamic; void posix_timers_register_clock(const clockid_t clock_id, struct k_clock *new_clock); diff --git a/kernel/posix-timers.c b/kernel/posix-timers.c index af936fd37140..44fcff131b38 100644 --- a/kernel/posix-timers.c +++ b/kernel/posix-timers.c @@ -41,6 +41,7 @@ #include #include #include +#include #include #include #include @@ -489,7 +490,8 @@ static void release_posix_timer(struct k_itimer *tmr, int it_id_set) static struct k_clock *clockid_to_kclock(const clockid_t id) { if (id < 0) - return (id & CLOCKFD_MASK) == CLOCKFD ? NULL : &clock_posix_cpu; + return (id & CLOCKFD_MASK) == CLOCKFD ? + &clock_posix_dynamic : &clock_posix_cpu; if (id >= MAX_CLOCKS || !posix_clocks[id].clock_getres) return NULL; diff --git a/kernel/time/Makefile b/kernel/time/Makefile index ee266620b06c..b0425991e9ac 100644 --- a/kernel/time/Makefile +++ b/kernel/time/Makefile @@ -1,4 +1,5 @@ -obj-y += timekeeping.o ntp.o clocksource.o jiffies.o timer_list.o timecompare.o timeconv.o +obj-y += timekeeping.o ntp.o clocksource.o jiffies.o timer_list.o timecompare.o +obj-y += timeconv.o posix-clock.o obj-$(CONFIG_GENERIC_CLOCKEVENTS_BUILD) += clockevents.o obj-$(CONFIG_GENERIC_CLOCKEVENTS) += tick-common.o diff --git a/kernel/time/posix-clock.c b/kernel/time/posix-clock.c new file mode 100644 index 000000000000..04498cbf6002 --- /dev/null +++ b/kernel/time/posix-clock.c @@ -0,0 +1,441 @@ +/* + * posix-clock.c - support for dynamic clock devices + * + * Copyright (C) 2010 OMICRON electronics GmbH + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ +#include +#include +#include +#include +#include +#include +#include + +static void delete_clock(struct kref *kref); + +/* + * Returns NULL if the posix_clock instance attached to 'fp' is old and stale. + */ +static struct posix_clock *get_posix_clock(struct file *fp) +{ + struct posix_clock *clk = fp->private_data; + + mutex_lock(&clk->mutex); + + if (!clk->zombie) + return clk; + + mutex_unlock(&clk->mutex); + + return NULL; +} + +static void put_posix_clock(struct posix_clock *clk) +{ + mutex_unlock(&clk->mutex); +} + +static ssize_t posix_clock_read(struct file *fp, char __user *buf, + size_t count, loff_t *ppos) +{ + struct posix_clock *clk = get_posix_clock(fp); + int err = -EINVAL; + + if (!clk) + return -ENODEV; + + if (clk->ops.read) + err = clk->ops.read(clk, fp->f_flags, buf, count); + + put_posix_clock(clk); + + return err; +} + +static unsigned int posix_clock_poll(struct file *fp, poll_table *wait) +{ + struct posix_clock *clk = get_posix_clock(fp); + int result = 0; + + if (!clk) + return -ENODEV; + + if (clk->ops.poll) + result = clk->ops.poll(clk, fp, wait); + + put_posix_clock(clk); + + return result; +} + +static int posix_clock_fasync(int fd, struct file *fp, int on) +{ + struct posix_clock *clk = get_posix_clock(fp); + int err = 0; + + if (!clk) + return -ENODEV; + + if (clk->ops.fasync) + err = clk->ops.fasync(clk, fd, fp, on); + + put_posix_clock(clk); + + return err; +} + +static int posix_clock_mmap(struct file *fp, struct vm_area_struct *vma) +{ + struct posix_clock *clk = get_posix_clock(fp); + int err = -ENODEV; + + if (!clk) + return -ENODEV; + + if (clk->ops.mmap) + err = clk->ops.mmap(clk, vma); + + put_posix_clock(clk); + + return err; +} + +static long posix_clock_ioctl(struct file *fp, + unsigned int cmd, unsigned long arg) +{ + struct posix_clock *clk = get_posix_clock(fp); + int err = -ENOTTY; + + if (!clk) + return -ENODEV; + + if (clk->ops.ioctl) + err = clk->ops.ioctl(clk, cmd, arg); + + put_posix_clock(clk); + + return err; +} + +#ifdef CONFIG_COMPAT +static long posix_clock_compat_ioctl(struct file *fp, + unsigned int cmd, unsigned long arg) +{ + struct posix_clock *clk = get_posix_clock(fp); + int err = -ENOTTY; + + if (!clk) + return -ENODEV; + + if (clk->ops.ioctl) + err = clk->ops.ioctl(clk, cmd, arg); + + put_posix_clock(clk); + + return err; +} +#endif + +static int posix_clock_open(struct inode *inode, struct file *fp) +{ + int err; + struct posix_clock *clk = + container_of(inode->i_cdev, struct posix_clock, cdev); + + mutex_lock(&clk->mutex); + + if (clk->zombie) { + err = -ENODEV; + goto out; + } + if (clk->ops.open) + err = clk->ops.open(clk, fp->f_mode); + else + err = 0; + + if (!err) { + kref_get(&clk->kref); + fp->private_data = clk; + } +out: + mutex_unlock(&clk->mutex); + return err; +} + +static int posix_clock_release(struct inode *inode, struct file *fp) +{ + struct posix_clock *clk = fp->private_data; + int err = 0; + + if (clk->ops.release) + err = clk->ops.release(clk); + + kref_put(&clk->kref, delete_clock); + + fp->private_data = NULL; + + return err; +} + +static const struct file_operations posix_clock_file_operations = { + .owner = THIS_MODULE, + .llseek = no_llseek, + .read = posix_clock_read, + .poll = posix_clock_poll, + .unlocked_ioctl = posix_clock_ioctl, + .open = posix_clock_open, + .release = posix_clock_release, + .fasync = posix_clock_fasync, + .mmap = posix_clock_mmap, +#ifdef CONFIG_COMPAT + .compat_ioctl = posix_clock_compat_ioctl, +#endif +}; + +int posix_clock_register(struct posix_clock *clk, dev_t devid) +{ + int err; + + kref_init(&clk->kref); + mutex_init(&clk->mutex); + + cdev_init(&clk->cdev, &posix_clock_file_operations); + clk->cdev.owner = clk->ops.owner; + err = cdev_add(&clk->cdev, devid, 1); + if (err) + goto no_cdev; + + return err; +no_cdev: + mutex_destroy(&clk->mutex); + return err; +} +EXPORT_SYMBOL_GPL(posix_clock_register); + +static void delete_clock(struct kref *kref) +{ + struct posix_clock *clk = container_of(kref, struct posix_clock, kref); + mutex_destroy(&clk->mutex); + if (clk->release) + clk->release(clk); +} + +void posix_clock_unregister(struct posix_clock *clk) +{ + cdev_del(&clk->cdev); + + mutex_lock(&clk->mutex); + clk->zombie = true; + mutex_unlock(&clk->mutex); + + kref_put(&clk->kref, delete_clock); +} +EXPORT_SYMBOL_GPL(posix_clock_unregister); + +struct posix_clock_desc { + struct file *fp; + struct posix_clock *clk; +}; + +static int get_clock_desc(const clockid_t id, struct posix_clock_desc *cd) +{ + struct file *fp = fget(CLOCKID_TO_FD(id)); + int err = -EINVAL; + + if (!fp) + return err; + + if (fp->f_op->open != posix_clock_open || !fp->private_data) + goto out; + + cd->fp = fp; + cd->clk = get_posix_clock(fp); + + err = cd->clk ? 0 : -ENODEV; +out: + if (err) + fput(fp); + return err; +} + +static void put_clock_desc(struct posix_clock_desc *cd) +{ + put_posix_clock(cd->clk); + fput(cd->fp); +} + +static int pc_clock_adjtime(clockid_t id, struct timex *tx) +{ + struct posix_clock_desc cd; + int err; + + err = get_clock_desc(id, &cd); + if (err) + return err; + + if (cd.clk->ops.clock_adjtime) + err = cd.clk->ops.clock_adjtime(cd.clk, tx); + else + err = -EOPNOTSUPP; + + put_clock_desc(&cd); + + return err; +} + +static int pc_clock_gettime(clockid_t id, struct timespec *ts) +{ + struct posix_clock_desc cd; + int err; + + err = get_clock_desc(id, &cd); + if (err) + return err; + + if (cd.clk->ops.clock_gettime) + err = cd.clk->ops.clock_gettime(cd.clk, ts); + else + err = -EOPNOTSUPP; + + put_clock_desc(&cd); + + return err; +} + +static int pc_clock_getres(clockid_t id, struct timespec *ts) +{ + struct posix_clock_desc cd; + int err; + + err = get_clock_desc(id, &cd); + if (err) + return err; + + if (cd.clk->ops.clock_getres) + err = cd.clk->ops.clock_getres(cd.clk, ts); + else + err = -EOPNOTSUPP; + + put_clock_desc(&cd); + + return err; +} + +static int pc_clock_settime(clockid_t id, const struct timespec *ts) +{ + struct posix_clock_desc cd; + int err; + + err = get_clock_desc(id, &cd); + if (err) + return err; + + if (cd.clk->ops.clock_settime) + err = cd.clk->ops.clock_settime(cd.clk, ts); + else + err = -EOPNOTSUPP; + + put_clock_desc(&cd); + + return err; +} + +static int pc_timer_create(struct k_itimer *kit) +{ + clockid_t id = kit->it_clock; + struct posix_clock_desc cd; + int err; + + err = get_clock_desc(id, &cd); + if (err) + return err; + + if (cd.clk->ops.timer_create) + err = cd.clk->ops.timer_create(cd.clk, kit); + else + err = -EOPNOTSUPP; + + put_clock_desc(&cd); + + return err; +} + +static int pc_timer_delete(struct k_itimer *kit) +{ + clockid_t id = kit->it_clock; + struct posix_clock_desc cd; + int err; + + err = get_clock_desc(id, &cd); + if (err) + return err; + + if (cd.clk->ops.timer_delete) + err = cd.clk->ops.timer_delete(cd.clk, kit); + else + err = -EOPNOTSUPP; + + put_clock_desc(&cd); + + return err; +} + +static void pc_timer_gettime(struct k_itimer *kit, struct itimerspec *ts) +{ + clockid_t id = kit->it_clock; + struct posix_clock_desc cd; + + if (get_clock_desc(id, &cd)) + return; + + if (cd.clk->ops.timer_gettime) + cd.clk->ops.timer_gettime(cd.clk, kit, ts); + + put_clock_desc(&cd); +} + +static int pc_timer_settime(struct k_itimer *kit, int flags, + struct itimerspec *ts, struct itimerspec *old) +{ + clockid_t id = kit->it_clock; + struct posix_clock_desc cd; + int err; + + err = get_clock_desc(id, &cd); + if (err) + return err; + + if (cd.clk->ops.timer_settime) + err = cd.clk->ops.timer_settime(cd.clk, kit, flags, ts, old); + else + err = -EOPNOTSUPP; + + put_clock_desc(&cd); + + return err; +} + +struct k_clock clock_posix_dynamic = { + .clock_getres = pc_clock_getres, + .clock_set = pc_clock_settime, + .clock_get = pc_clock_gettime, + .clock_adj = pc_clock_adjtime, + .timer_create = pc_timer_create, + .timer_set = pc_timer_settime, + .timer_del = pc_timer_delete, + .timer_get = pc_timer_gettime, +}; -- cgit v1.2.3-71-gd317 From f1a06390d013244e721372b3f9b66e39b6429c71 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 28 Jan 2011 08:47:15 +0100 Subject: genirq: Prevent irq storm on migration move_native_irq() masks and unmasks the interrupt line unconditionally, but the interrupt line might be masked due to a threaded oneshot handler in progress. Unmasking the line in that case can lead to interrupt storms. Observed on PREEMPT_RT. Originally-from: Ingo Molnar Signed-off-by: Thomas Gleixner Cc: stable@kernel.org --- kernel/irq/migration.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) (limited to 'kernel') diff --git a/kernel/irq/migration.c b/kernel/irq/migration.c index 1d2541940480..441fd629ff04 100644 --- a/kernel/irq/migration.c +++ b/kernel/irq/migration.c @@ -56,6 +56,7 @@ void move_masked_irq(int irq) void move_native_irq(int irq) { struct irq_desc *desc = irq_to_desc(irq); + bool masked; if (likely(!(desc->status & IRQ_MOVE_PENDING))) return; @@ -63,8 +64,15 @@ void move_native_irq(int irq) if (unlikely(desc->status & IRQ_DISABLED)) return; - desc->irq_data.chip->irq_mask(&desc->irq_data); + /* + * Be careful vs. already masked interrupts. If this is a + * threaded interrupt with ONESHOT set, we can end up with an + * interrupt storm. + */ + masked = desc->status & IRQ_MASKED; + if (!masked) + desc->irq_data.chip->irq_mask(&desc->irq_data); move_masked_irq(irq); - desc->irq_data.chip->irq_unmask(&desc->irq_data); + if (!masked) + desc->irq_data.chip->irq_unmask(&desc->irq_data); } - -- cgit v1.2.3-71-gd317 From e4a9ea5ee7c8812a7bf0c3fb725ceeaa3d4c2fcc Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Thu, 27 Jan 2011 09:15:30 -0500 Subject: tracing: Replace trace_event struct array with pointer array Currently the trace_event structures are placed in the _ftrace_events section, and at link time, the linker makes one large array of all the trace_event structures. On boot up, this array is read (much like the initcall sections) and the events are processed. The problem is that there is no guarantee that gcc will place complex structures nicely together in an array format. Two structures in the same file may be placed awkwardly, because gcc has no clue that they are suppose to be in an array. A hack was used previous to force the alignment to 4, to pack the structures together. But this caused alignment issues with other architectures (sparc). Instead of packing the structures into an array, the structures' addresses are now put into the _ftrace_event section. As pointers are always the natural alignment, gcc should always pack them tightly together (otherwise initcall, extable, etc would also fail). By having the pointers to the structures in the section, we can still iterate the trace_events without causing unnecessary alignment problems with other architectures, or depending on the current behaviour of gcc that will likely change in the future just to tick us kernel developers off a little more. The _ftrace_event section is also moved into the .init.data section as it is now only needed at boot up. Suggested-by: David Miller Cc: Mathieu Desnoyers Acked-by: David S. Miller Signed-off-by: Steven Rostedt --- include/asm-generic/vmlinux.lds.h | 7 +++---- include/linux/module.h | 2 +- include/linux/syscalls.h | 10 ++++++---- include/trace/ftrace.h | 24 +++++++++++++----------- kernel/trace/trace_events.c | 12 ++++++------ kernel/trace/trace_export.c | 6 +++--- 6 files changed, 32 insertions(+), 29 deletions(-) (limited to 'kernel') diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index 6ebb81030d2d..f53708be95eb 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -124,7 +124,8 @@ #endif #ifdef CONFIG_EVENT_TRACING -#define FTRACE_EVENTS() VMLINUX_SYMBOL(__start_ftrace_events) = .; \ +#define FTRACE_EVENTS() . = ALIGN(8); \ + VMLINUX_SYMBOL(__start_ftrace_events) = .; \ *(_ftrace_events) \ VMLINUX_SYMBOL(__stop_ftrace_events) = .; #else @@ -179,9 +180,6 @@ TRACE_PRINTKS() \ \ STRUCT_ALIGN(); \ - FTRACE_EVENTS() \ - \ - STRUCT_ALIGN(); \ TRACE_SYSCALLS() /* @@ -482,6 +480,7 @@ KERNEL_CTORS() \ *(.init.rodata) \ MCOUNT_REC() \ + FTRACE_EVENTS() \ DEV_DISCARD(init.rodata) \ CPU_DISCARD(init.rodata) \ MEM_DISCARD(init.rodata) \ diff --git a/include/linux/module.h b/include/linux/module.h index e7c6385c6683..7695a303bb55 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -389,7 +389,7 @@ struct module unsigned int num_trace_bprintk_fmt; #endif #ifdef CONFIG_EVENT_TRACING - struct ftrace_event_call *trace_events; + struct ftrace_event_call **trace_events; unsigned int num_trace_events; #endif #ifdef CONFIG_FTRACE_MCOUNT_RECORD diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 18cd0684fc4e..45508fec366d 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -128,28 +128,30 @@ extern struct trace_event_functions exit_syscall_print_funcs; static struct syscall_metadata \ __attribute__((__aligned__(4))) __syscall_meta_##sname; \ static struct ftrace_event_call __used \ - __attribute__((__aligned__(4))) \ - __attribute__((section("_ftrace_events"))) \ event_enter_##sname = { \ .name = "sys_enter"#sname, \ .class = &event_class_syscall_enter, \ .event.funcs = &enter_syscall_print_funcs, \ .data = (void *)&__syscall_meta_##sname,\ }; \ + static struct ftrace_event_call __used \ + __attribute__((section("_ftrace_events"))) \ + *__event_enter_##sname = &event_enter_##sname; \ __TRACE_EVENT_FLAGS(enter_##sname, TRACE_EVENT_FL_CAP_ANY) #define SYSCALL_TRACE_EXIT_EVENT(sname) \ static struct syscall_metadata \ __attribute__((__aligned__(4))) __syscall_meta_##sname; \ static struct ftrace_event_call __used \ - __attribute__((__aligned__(4))) \ - __attribute__((section("_ftrace_events"))) \ event_exit_##sname = { \ .name = "sys_exit"#sname, \ .class = &event_class_syscall_exit, \ .event.funcs = &exit_syscall_print_funcs, \ .data = (void *)&__syscall_meta_##sname,\ }; \ + static struct ftrace_event_call __used \ + __attribute__((section("_ftrace_events"))) \ + *__event_exit_##sname = &event_exit_##sname; \ __TRACE_EVENT_FLAGS(exit_##sname, TRACE_EVENT_FL_CAP_ANY) #define SYSCALL_METADATA(sname, nb) \ diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h index e16610c208c9..3e68366d485a 100644 --- a/include/trace/ftrace.h +++ b/include/trace/ftrace.h @@ -446,14 +446,16 @@ static inline notrace int ftrace_get_offsets_##call( \ * .reg = ftrace_event_reg, * }; * - * static struct ftrace_event_call __used - * __attribute__((__aligned__(4))) - * __attribute__((section("_ftrace_events"))) event_ = { + * static struct ftrace_event_call event_ = { * .name = "", * .class = event_class_