From d0959024d8fb6555ba8bfdc6624cc7b7c2e675fd Mon Sep 17 00:00:00 2001 From: Richard Kennedy Date: Wed, 20 Oct 2010 15:57:30 -0700 Subject: timer_list: Remove alignment padding on 64 bit when CONFIG_TIMER_STATS Reorder struct timer_list to remove 8 bytes of alignment padding on 64 bit builds when CONFIG_TIMER_STATS is selected. timer_list is widely used across the kernel so many structures will benefit and shrink in size. For example, with my config on x86_64 per_cpu_dm_data shrinks from 136 to 128 bytes and ahci_port_priv shrinks from 1032 to 968 bytes. Signed-off-by: Richard Kennedy Cc: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Thomas Gleixner --- include/linux/timer.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/timer.h b/include/linux/timer.h index 38cf093ef62c..f3dccdb44f95 100644 --- a/include/linux/timer.h +++ b/include/linux/timer.h @@ -24,9 +24,9 @@ struct timer_list { int slack; #ifdef CONFIG_TIMER_STATS + int start_pid; void *start_site; char start_comm[16]; - int start_pid; #endif #ifdef CONFIG_LOCKDEP struct lockdep_map lockdep_map; -- cgit v1.2.3-71-gd317 From aaabe31c25a439b92cc281b14ca18b85bae7e7a6 Mon Sep 17 00:00:00 2001 From: Changli Gao Date: Wed, 20 Oct 2010 15:57:30 -0700 Subject: timer: Initialize the field slack of timer_list TIMER_INITIALIZER() should initialize the field slack of timer_list as __init_timer() does. Signed-off-by: Changli Gao Cc: Arjan van de Ven Signed-off-by: Andrew Morton Signed-off-by: Thomas Gleixner --- include/linux/timer.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/timer.h b/include/linux/timer.h index f3dccdb44f95..1794674c1a52 100644 --- a/include/linux/timer.h +++ b/include/linux/timer.h @@ -54,6 +54,7 @@ extern struct tvec_base boot_tvec_bases; .expires = (_expires), \ .data = (_data), \ .base = &boot_tvec_bases, \ + .slack = -1, \ __TIMER_LOCKDEP_MAP_INITIALIZER( \ __FILE__ ":" __stringify(__LINE__)) \ } -- cgit v1.2.3-71-gd317 From dd6414b50fa2b1cd247a8aa8f8bd42414b7453e1 Mon Sep 17 00:00:00 2001 From: Phil Carmody Date: Wed, 20 Oct 2010 15:57:33 -0700 Subject: timer: Permit statically-declared work with deferrable timers Currently, you have to just define a delayed_work uninitialised, and then initialise it before first use. That's a tad clumsy. At risk of playing mind-games with the compiler, fooling it into doing pointer arithmetic with compile-time-constants, this lets clients properly initialise delayed work with deferrable timers statically. This patch was inspired by the issues which lead Artem Bityutskiy to commit 8eab945c5616fc984 ("sunrpc: make the cache cleaner workqueue deferrable"). Signed-off-by: Phil Carmody Acked-by: Artem Bityutskiy Cc: Arjan van de Ven Signed-off-by: Andrew Morton Signed-off-by: Thomas Gleixner --- include/linux/timer.h | 25 +++++++++++++++++++++++++ include/linux/workqueue.h | 8 ++++++++ kernel/timer.c | 15 +-------------- 3 files changed, 34 insertions(+), 14 deletions(-) (limited to 'include/linux') diff --git a/include/linux/timer.h b/include/linux/timer.h index 1794674c1a52..cbfb7a355d30 100644 --- a/include/linux/timer.h +++ b/include/linux/timer.h @@ -48,6 +48,18 @@ extern struct tvec_base boot_tvec_bases; #define __TIMER_LOCKDEP_MAP_INITIALIZER(_kn) #endif +/* + * Note that all tvec_bases are 2 byte aligned and lower bit of + * base in timer_list is guaranteed to be zero. Use the LSB to + * indicate whether the timer is deferrable. + * + * A deferrable timer will work normally when the system is busy, but + * will not cause a CPU to come out of idle just to service it; instead, + * the timer will be serviced when the CPU eventually wakes up with a + * subsequent non-deferrable timer. + */ +#define TBASE_DEFERRABLE_FLAG (0x1) + #define TIMER_INITIALIZER(_function, _expires, _data) { \ .entry = { .prev = TIMER_ENTRY_STATIC }, \ .function = (_function), \ @@ -59,6 +71,19 @@ extern struct tvec_base boot_tvec_bases; __FILE__ ":" __stringify(__LINE__)) \ } +#define TBASE_MAKE_DEFERRED(ptr) ((struct tvec_base *) \ + ((unsigned char *)(ptr) + TBASE_DEFERRABLE_FLAG)) + +#define TIMER_DEFERRED_INITIALIZER(_function, _expires, _data) {\ + .entry = { .prev = TIMER_ENTRY_STATIC }, \ + .function = (_function), \ + .expires = (_expires), \ + .data = (_data), \ + .base = TBASE_MAKE_DEFERRED(&boot_tvec_bases), \ + __TIMER_LOCKDEP_MAP_INITIALIZER( \ + __FILE__ ":" __stringify(__LINE__)) \ + } + #define DEFINE_TIMER(_name, _function, _expires, _data) \ struct timer_list _name = \ TIMER_INITIALIZER(_function, _expires, _data) diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h index f11100f96482..88238c15ec3e 100644 --- a/include/linux/workqueue.h +++ b/include/linux/workqueue.h @@ -127,12 +127,20 @@ struct execute_work { .timer = TIMER_INITIALIZER(NULL, 0, 0), \ } +#define __DEFERRED_WORK_INITIALIZER(n, f) { \ + .work = __WORK_INITIALIZER((n).work, (f)), \ + .timer = TIMER_DEFERRED_INITIALIZER(NULL, 0, 0), \ + } + #define DECLARE_WORK(n, f) \ struct work_struct n = __WORK_INITIALIZER(n, f) #define DECLARE_DELAYED_WORK(n, f) \ struct delayed_work n = __DELAYED_WORK_INITIALIZER(n, f) +#define DECLARE_DEFERRED_WORK(n, f) \ + struct delayed_work n = __DEFERRED_WORK_INITIALIZER(n, f) + /* * initialize a work item's function pointer */ diff --git a/kernel/timer.c b/kernel/timer.c index 97bf05baade7..72853b256ff2 100644 --- a/kernel/timer.c +++ b/kernel/timer.c @@ -88,18 +88,6 @@ struct tvec_base boot_tvec_bases; EXPORT_SYMBOL(boot_tvec_bases); static DEFINE_PER_CPU(struct tvec_base *, tvec_bases) = &boot_tvec_bases; -/* - * Note that all tvec_bases are 2 byte aligned and lower bit of - * base in timer_list is guaranteed to be zero. Use the LSB to - * indicate whether the timer is deferrable. - * - * A deferrable timer will work normally when the system is busy, but - * will not cause a CPU to come out of idle just to service it; instead, - * the timer will be serviced when the CPU eventually wakes up with a - * subsequent non-deferrable timer. - */ -#define TBASE_DEFERRABLE_FLAG (0x1) - /* Functions below help us manage 'deferrable' flag */ static inline unsigned int tbase_get_deferrable(struct tvec_base *base) { @@ -113,8 +101,7 @@ static inline struct tvec_base *tbase_get_base(struct tvec_base *base) static inline void timer_set_deferrable(struct timer_list *timer) { - timer->base = ((struct tvec_base *)((unsigned long)(timer->base) | - TBASE_DEFERRABLE_FLAG)); + timer->base = TBASE_MAKE_DEFERRED(timer->base); } static inline void -- cgit v1.2.3-71-gd317 From 6f1bc451e6a79470b122a37ee1fc6bbca450f444 Mon Sep 17 00:00:00 2001 From: Yong Zhang Date: Wed, 20 Oct 2010 15:57:31 -0700 Subject: timer: Make try_to_del_timer_sync() the same on SMP and UP On UP try_to_del_timer_sync() is mapped to del_timer() which does not take the running timer callback into account, so it has different semantics. Remove the SMP dependency of try_to_del_timer_sync() by using base->running_timer in the UP case as well. [ tglx: Removed set_running_timer() inline and tweaked the changelog ] Signed-off-by: Yong Zhang Cc: Ingo Molnar Cc: Peter Zijlstra Acked-by: Oleg Nesterov Signed-off-by: Andrew Morton Signed-off-by: Thomas Gleixner --- include/linux/timer.h | 4 ++-- kernel/timer.c | 17 +++-------------- 2 files changed, 5 insertions(+), 16 deletions(-) (limited to 'include/linux') diff --git a/include/linux/timer.h b/include/linux/timer.h index cbfb7a355d30..6abd9138beda 100644 --- a/include/linux/timer.h +++ b/include/linux/timer.h @@ -274,11 +274,11 @@ static inline void timer_stats_timer_clear_start_info(struct timer_list *timer) extern void add_timer(struct timer_list *timer); +extern int try_to_del_timer_sync(struct timer_list *timer); + #ifdef CONFIG_SMP - extern int try_to_del_timer_sync(struct timer_list *timer); extern int del_timer_sync(struct timer_list *timer); #else -# define try_to_del_timer_sync(t) del_timer(t) # define del_timer_sync(t) del_timer(t) #endif diff --git a/kernel/timer.c b/kernel/timer.c index 72853b256ff2..47b86c1e3226 100644 --- a/kernel/timer.c +++ b/kernel/timer.c @@ -330,15 +330,6 @@ void set_timer_slack(struct timer_list *timer, int slack_hz) } EXPORT_SYMBOL_GPL(set_timer_slack); - -static inline void set_running_timer(struct tvec_base *base, - struct timer_list *timer) -{ -#ifdef CONFIG_SMP - base->running_timer = timer; -#endif -} - static void internal_add_timer(struct tvec_base *base, struct timer_list *timer) { unsigned long expires = timer->expires; @@ -923,15 +914,12 @@ int del_timer(struct timer_list *timer) } EXPORT_SYMBOL(del_timer); -#ifdef CONFIG_SMP /** * try_to_del_timer_sync - Try to deactivate a timer * @timer: timer do del * * This function tries to deactivate a timer. Upon successful (ret >= 0) * exit the timer is not queued and the handler is not running on any CPU. - * - * It must not be called from interrupt contexts. */ int try_to_del_timer_sync(struct timer_list *timer) { @@ -960,6 +948,7 @@ out: } EXPORT_SYMBOL(try_to_del_timer_sync); +#ifdef CONFIG_SMP /** * del_timer_sync - deactivate a timer and wait for the handler to finish. * @timer: the timer to be deactivated @@ -1098,7 +1087,7 @@ static inline void __run_timers(struct tvec_base *base) timer_stats_account_timer(timer); - set_running_timer(base, timer); + base->running_timer = timer; detach_timer(timer, 1); spin_unlock_irq(&base->lock); @@ -1106,7 +1095,7 @@ static inline void __run_timers(struct tvec_base *base) spin_lock_irq(&base->lock); } } - set_running_timer(base, NULL); + base->running_timer = NULL; spin_unlock_irq(&base->lock); } -- cgit v1.2.3-71-gd317 From fe7de49f9d4e53f24ec9ef762a503f70b562341c Mon Sep 17 00:00:00 2001 From: KOSAKI Motohiro Date: Wed, 20 Oct 2010 16:01:12 -0700 Subject: sched: Make sched_param argument static in sched_setscheduler() callers Andrew Morton pointed out almost all sched_setscheduler() callers are using fixed parameters and can be converted to static. It reduces runtime memory use a little. Signed-off-by: KOSAKI Motohiro Reported-by: Andrew Morton Acked-by: James Morris Cc: Ingo Molnar Cc: Steven Rostedt Signed-off-by: Andrew Morton Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar --- include/linux/sched.h | 5 +++-- kernel/irq/manage.c | 4 +++- kernel/kthread.c | 2 +- kernel/sched.c | 6 +++--- kernel/softirq.c | 4 +++- kernel/trace/trace_selftest.c | 2 +- kernel/watchdog.c | 2 +- 7 files changed, 15 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 0383601a927c..849c8670583d 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1942,9 +1942,10 @@ extern int task_nice(const struct task_struct *p); extern int can_nice(const struct task_struct *p, const int nice); extern int task_curr(const struct task_struct *p); extern int idle_cpu(int cpu); -extern int sched_setscheduler(struct task_struct *, int, struct sched_param *); +extern int sched_setscheduler(struct task_struct *, int, + const struct sched_param *); extern int sched_setscheduler_nocheck(struct task_struct *, int, - struct sched_param *); + const struct sched_param *); extern struct task_struct *idle_task(int cpu); extern struct task_struct *curr_task(int cpu); extern void set_curr_task(int cpu, struct task_struct *p); diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index 644e8d5fa367..850f030fa0c2 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c @@ -573,7 +573,9 @@ irq_thread_check_affinity(struct irq_desc *desc, struct irqaction *action) { } */ static int irq_thread(void *data) { - struct sched_param param = { .sched_priority = MAX_USER_RT_PRIO/2, }; + static struct sched_param param = { + .sched_priority = MAX_USER_RT_PRIO/2, + }; struct irqaction *action = data; struct irq_desc *desc = irq_to_desc(action->irq); int wake, oneshot = desc->status & IRQ_ONESHOT; diff --git a/kernel/kthread.c b/kernel/kthread.c index 2dc3786349d1..74cf6f5e7ade 100644 --- a/kernel/kthread.c +++ b/kernel/kthread.c @@ -148,7 +148,7 @@ struct task_struct *kthread_create(int (*threadfn)(void *data), wait_for_completion(&create.done); if (!IS_ERR(create.result)) { - struct sched_param param = { .sched_priority = 0 }; + static struct sched_param param = { .sched_priority = 0 }; va_list args; va_start(args, namefmt); diff --git a/kernel/sched.c b/kernel/sched.c index d42992bccdfa..51944e8c38a8 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -4701,7 +4701,7 @@ static bool check_same_owner(struct task_struct *p) } static int __sched_setscheduler(struct task_struct *p, int policy, - struct sched_param *param, bool user) + const struct sched_param *param, bool user) { int retval, oldprio, oldpolicy = -1, on_rq, running; unsigned long flags; @@ -4856,7 +4856,7 @@ recheck: * NOTE that the task may be already dead. */ int sched_setscheduler(struct task_struct *p, int policy, - struct sched_param *param) + const struct sched_param *param) { return __sched_setscheduler(p, policy, param, true); } @@ -4874,7 +4874,7 @@ EXPORT_SYMBOL_GPL(sched_setscheduler); * but our caller might not have that capability. */ int sched_setscheduler_nocheck(struct task_struct *p, int policy, - struct sched_param *param) + const struct sched_param *param) { return __sched_setscheduler(p, policy, param, false); } diff --git a/kernel/softirq.c b/kernel/softirq.c index fc978889b194..081869ed3a9f 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c @@ -851,7 +851,9 @@ static int __cpuinit cpu_callback(struct notifier_block *nfb, cpumask_any(cpu_online_mask)); case CPU_DEAD: case CPU_DEAD_FROZEN: { - struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 }; + static struct sched_param param = { + .sched_priority = MAX_RT_PRIO-1 + }; p = per_cpu(ksoftirqd, hotcpu); per_cpu(ksoftirqd, hotcpu) = NULL; diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c index 155a415b3209..562c56e048fd 100644 --- a/kernel/trace/trace_selftest.c +++ b/kernel/trace/trace_selftest.c @@ -558,7 +558,7 @@ trace_selftest_startup_nop(struct tracer *trace, struct trace_array *tr) static int trace_wakeup_test_thread(void *data) { /* Make this a RT thread, doesn't need to be too high */ - struct sched_param param = { .sched_priority = 5 }; + static struct sched_param param = { .sched_priority = 5 }; struct completion *x = data; sched_setscheduler(current, SCHED_FIFO, ¶m); diff --git a/kernel/watchdog.c b/kernel/watchdog.c index bafba687a6d8..94ca779aa9c2 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c @@ -307,7 +307,7 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer) */ static int watchdog(void *unused) { - struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 }; + static struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 }; struct hrtimer *hrtimer = &__raw_get_cpu_var(watchdog_hrtimer); sched_setscheduler(current, SCHED_FIFO, ¶m); -- cgit v1.2.3-71-gd317 From 4b6ba8aacbb3185703b797286547d0f8f3859b02 Mon Sep 17 00:00:00 2001 From: David Daney Date: Tue, 26 Oct 2010 15:07:13 -0700 Subject: of/net: Move of_get_mac_address() to a common source file. There are two identical implementations of of_get_mac_address(), one each in arch/powerpc/kernel/prom_parse.c and arch/microblaze/kernel/prom_parse.c. Move this function to a new common file of_net.{c,h} and adjust all the callers to include the new header. Signed-off-by: David Daney [grant.likely@secretlab.ca: protect header with #ifdef] Signed-off-by: Grant Likely --- arch/microblaze/include/asm/prom.h | 3 --- arch/microblaze/kernel/prom_parse.c | 38 ----------------------------- arch/powerpc/include/asm/prom.h | 3 --- arch/powerpc/kernel/prom_parse.c | 38 ----------------------------- arch/powerpc/sysdev/mv64x60_dev.c | 1 + arch/powerpc/sysdev/tsi108_dev.c | 1 + drivers/net/fs_enet/fs_enet-main.c | 1 + drivers/net/gianfar.c | 1 + drivers/net/ucc_geth.c | 1 + drivers/net/xilinx_emaclite.c | 1 + drivers/of/Kconfig | 4 ++++ drivers/of/Makefile | 1 + drivers/of/of_net.c | 48 +++++++++++++++++++++++++++++++++++++ include/linux/of_net.h | 15 ++++++++++++ 14 files changed, 74 insertions(+), 82 deletions(-) create mode 100644 drivers/of/of_net.c create mode 100644 include/linux/of_net.h (limited to 'include/linux') diff --git a/arch/microblaze/include/asm/prom.h b/arch/microblaze/include/asm/prom.h index bdc38312ae4a..2e72af078b05 100644 --- a/arch/microblaze/include/asm/prom.h +++ b/arch/microblaze/include/asm/prom.h @@ -64,9 +64,6 @@ extern void kdump_move_device_tree(void); /* CPU OF node matching */ struct device_node *of_get_cpu_node(int cpu, unsigned int *thread); -/* Get the MAC address */ -extern const void *of_get_mac_address(struct device_node *np); - /** * of_irq_map_pci - Resolve the interrupt for a PCI device * @pdev: the device whose interrupt is to be resolved diff --git a/arch/microblaze/kernel/prom_parse.c b/arch/microblaze/kernel/prom_parse.c index 99d9b61cccb5..9ae24f4b882b 100644 --- a/arch/microblaze/kernel/prom_parse.c +++ b/arch/microblaze/kernel/prom_parse.c @@ -110,41 +110,3 @@ void of_parse_dma_window(struct device_node *dn, const void *dma_window_prop, cells = prop ? *(u32 *)prop : of_n_size_cells(dn); *size = of_read_number(dma_window, cells); } - -/** - * Search the device tree for the best MAC address to use. 'mac-address' is - * checked first, because that is supposed to contain to "most recent" MAC - * address. If that isn't set, then 'local-mac-address' is checked next, - * because that is the default address. If that isn't set, then the obsolete - * 'address' is checked, just in case we're using an old device tree. - * - * Note that the 'address' property is supposed to contain a virtual address of - * the register set, but some DTS files have redefined that property to be the - * MAC address. - * - * All-zero MAC addresses are rejected, because those could be properties that - * exist in the device tree, but were not set by U-Boot. For example, the - * DTS could define 'mac-address' and 'local-mac-address', with zero MAC - * addresses. Some older U-Boots only initialized 'local-mac-address'. In - * this case, the real MAC is in 'local-mac-address', and 'mac-address' exists - * but is all zeros. -*/ -const void *of_get_mac_address(struct device_node *np) -{ - struct property *pp; - - pp = of_find_property(np, "mac-address", NULL); - if (pp && (pp->length == 6) && is_valid_ether_addr(pp->value)) - return pp->value; - - pp = of_find_property(np, "local-mac-address", NULL); - if (pp && (pp->length == 6) && is_valid_ether_addr(pp->value)) - return pp->value; - - pp = of_find_property(np, "address", NULL); - if (pp && (pp->length == 6) && is_valid_ether_addr(pp->value)) - return pp->value; - - return NULL; -} -EXPORT_SYMBOL(of_get_mac_address); diff --git a/arch/powerpc/include/asm/prom.h b/arch/powerpc/include/asm/prom.h index ae26f2efd089..98264bf0a433 100644 --- a/arch/powerpc/include/asm/prom.h +++ b/arch/powerpc/include/asm/prom.h @@ -63,9 +63,6 @@ struct device_node *of_get_cpu_node(int cpu, unsigned int *thread); /* cache lookup */ struct device_node *of_find_next_cache_node(struct device_node *np); -/* Get the MAC address */ -extern const void *of_get_mac_address(struct device_node *np); - #ifdef CONFIG_NUMA extern int of_node_to_nid(struct device_node *device); #else diff --git a/arch/powerpc/kernel/prom_parse.c b/arch/powerpc/kernel/prom_parse.c index 88334af038e5..c2b7a07cc3d3 100644 --- a/arch/powerpc/kernel/prom_parse.c +++ b/arch/powerpc/kernel/prom_parse.c @@ -117,41 +117,3 @@ void of_parse_dma_window(struct device_node *dn, const void *dma_window_prop, cells = prop ? *(u32 *)prop : of_n_size_cells(dn); *size = of_read_number(dma_window, cells); } - -/** - * Search the device tree for the best MAC address to use. 'mac-address' is - * checked first, because that is supposed to contain to "most recent" MAC - * address. If that isn't set, then 'local-mac-address' is checked next, - * because that is the default address. If that isn't set, then the obsolete - * 'address' is checked, just in case we're using an old device tree. - * - * Note that the 'address' property is supposed to contain a virtual address of - * the register set, but some DTS files have redefined that property to be the - * MAC address. - * - * All-zero MAC addresses are rejected, because those could be properties that - * exist in the device tree, but were not set by U-Boot. For example, the - * DTS could define 'mac-address' and 'local-mac-address', with zero MAC - * addresses. Some older U-Boots only initialized 'local-mac-address'. In - * this case, the real MAC is in 'local-mac-address', and 'mac-address' exists - * but is all zeros. -*/ -const void *of_get_mac_address(struct device_node *np) -{ - struct property *pp; - - pp = of_find_property(np, "mac-address", NULL); - if (pp && (pp->length == 6) && is_valid_ether_addr(pp->value)) - return pp->value; - - pp = of_find_property(np, "local-mac-address", NULL); - if (pp && (pp->length == 6) && is_valid_ether_addr(pp->value)) - return pp->value; - - pp = of_find_property(np, "address", NULL); - if (pp && (pp->length == 6) && is_valid_ether_addr(pp->value)) - return pp->value; - - return NULL; -} -EXPORT_SYMBOL(of_get_mac_address); diff --git a/arch/powerpc/sysdev/mv64x60_dev.c b/arch/powerpc/sysdev/mv64x60_dev.c index 1398bc454999..feaee402e2d6 100644 --- a/arch/powerpc/sysdev/mv64x60_dev.c +++ b/arch/powerpc/sysdev/mv64x60_dev.c @@ -16,6 +16,7 @@ #include #include #include +#include #include #include diff --git a/arch/powerpc/sysdev/tsi108_dev.c b/arch/powerpc/sysdev/tsi108_dev.c index d4d15aaf18fa..c2d675b6392c 100644 --- a/arch/powerpc/sysdev/tsi108_dev.c +++ b/arch/powerpc/sysdev/tsi108_dev.c @@ -19,6 +19,7 @@ #include #include #include +#include #include #include diff --git a/drivers/net/fs_enet/fs_enet-main.c b/drivers/net/fs_enet/fs_enet-main.c index d684f187de57..7a1f3d0ffa78 100644 --- a/drivers/net/fs_enet/fs_enet-main.c +++ b/drivers/net/fs_enet/fs_enet-main.c @@ -40,6 +40,7 @@ #include #include #include +#include #include #include diff --git a/drivers/net/gianfar.c b/drivers/net/gianfar.c index 49e4ce1246a7..f860072e2f68 100644 --- a/drivers/net/gianfar.c +++ b/drivers/net/gianfar.c @@ -95,6 +95,7 @@ #include #include #include +#include #include "gianfar.h" #include "fsl_pq_mdio.h" diff --git a/drivers/net/ucc_geth.c b/drivers/net/ucc_geth.c index a4c3f5708246..f7e370fd8ddc 100644 --- a/drivers/net/ucc_geth.c +++ b/drivers/net/ucc_geth.c @@ -28,6 +28,7 @@ #include #include #include +#include #include #include diff --git a/drivers/net/xilinx_emaclite.c b/drivers/net/xilinx_emaclite.c index 14f0955eca68..2a34b22ea26a 100644 --- a/drivers/net/xilinx_emaclite.c +++ b/drivers/net/xilinx_emaclite.c @@ -24,6 +24,7 @@ #include #include #include +#include #include #define DRIVER_NAME "xilinx_emaclite" diff --git a/drivers/of/Kconfig b/drivers/of/Kconfig index aa675ebd8eb3..e4b93a0a15d2 100644 --- a/drivers/of/Kconfig +++ b/drivers/of/Kconfig @@ -49,6 +49,10 @@ config OF_I2C help OpenFirmware I2C accessors +config OF_NET + depends on NETDEVICES + def_bool y + config OF_SPI def_tristate SPI depends on SPI && !SPARC diff --git a/drivers/of/Makefile b/drivers/of/Makefile index 7888155bea08..3ab21a0a4907 100644 --- a/drivers/of/Makefile +++ b/drivers/of/Makefile @@ -6,5 +6,6 @@ obj-$(CONFIG_OF_IRQ) += irq.o obj-$(CONFIG_OF_DEVICE) += device.o platform.o obj-$(CONFIG_OF_GPIO) += gpio.o obj-$(CONFIG_OF_I2C) += of_i2c.o +obj-$(CONFIG_OF_NET) += of_net.o obj-$(CONFIG_OF_SPI) += of_spi.o obj-$(CONFIG_OF_MDIO) += of_mdio.o diff --git a/drivers/of/of_net.c b/drivers/of/of_net.c new file mode 100644 index 000000000000..86f334a2769c --- /dev/null +++ b/drivers/of/of_net.c @@ -0,0 +1,48 @@ +/* + * OF helpers for network devices. + * + * This file is released under the GPLv2 + * + * Initially copied out of arch/powerpc/kernel/prom_parse.c + */ +#include +#include +#include + +/** + * Search the device tree for the best MAC address to use. 'mac-address' is + * checked first, because that is supposed to contain to "most recent" MAC + * address. If that isn't set, then 'local-mac-address' is checked next, + * because that is the default address. If that isn't set, then the obsolete + * 'address' is checked, just in case we're using an old device tree. + * + * Note that the 'address' property is supposed to contain a virtual address of + * the register set, but some DTS files have redefined that property to be the + * MAC address. + * + * All-zero MAC addresses are rejected, because those could be properties that + * exist in the device tree, but were not set by U-Boot. For example, the + * DTS could define 'mac-address' and 'local-mac-address', with zero MAC + * addresses. Some older U-Boots only initialized 'local-mac-address'. In + * this case, the real MAC is in 'local-mac-address', and 'mac-address' exists + * but is all zeros. +*/ +const void *of_get_mac_address(struct device_node *np) +{ + struct property *pp; + + pp = of_find_property(np, "mac-address", NULL); + if (pp && (pp->length == 6) && is_valid_ether_addr(pp->value)) + return pp->value; + + pp = of_find_property(np, "local-mac-address", NULL); + if (pp && (pp->length == 6) && is_valid_ether_addr(pp->value)) + return pp->value; + + pp = of_find_property(np, "address", NULL); + if (pp && (pp->length == 6) && is_valid_ether_addr(pp->value)) + return pp->value; + + return NULL; +} +EXPORT_SYMBOL(of_get_mac_address); diff --git a/include/linux/of_net.h b/include/linux/of_net.h new file mode 100644 index 000000000000..e913081fb52a --- /dev/null +++ b/include/linux/of_net.h @@ -0,0 +1,15 @@ +/* + * OF helpers for network devices. + * + * This file is released under the GPLv2 + */ + +#ifndef __LINUX_OF_NET_H +#define __LINUX_OF_NET_H + +#ifdef CONFIG_OF_NET +#include +extern const void *of_get_mac_address(struct device_node *np); +#endif + +#endif /* __LINUX_OF_NET_H */ -- cgit v1.2.3-71-gd317 From 4a92379bdfb48680a5e6775dd53a586df7b6b0b1 Mon Sep 17 00:00:00 2001 From: Richard Kennedy Date: Thu, 21 Oct 2010 10:29:19 +0100 Subject: slub tracing: move trace calls out of always inlined functions to reduce kernel code size Having the trace calls defined in the always inlined kmalloc functions in include/linux/slub_def.h causes a lot of code duplication as the trace functions get instantiated for each kamalloc call site. This can simply be removed by pushing the trace calls down into the functions in slub.c. On my x86_64 built this patch shrinks the code size of the kernel by approx 36K and also shrinks the code size of many modules -- too many to list here ;) size vmlinux (2.6.36) reports text data bss dec hex filename 5410611 743172 828928 6982711 6a8c37 vmlinux 5373738 744244 828928 6946910 6a005e vmlinux + patch The resulting kernel has had some testing & kmalloc trace still seems to work. This patch - moves trace_kmalloc out of the inlined kmalloc() and pushes it down into kmem_cache_alloc_trace() so this it only get instantiated once. - rename kmem_cache_alloc_notrace() to kmem_cache_alloc_trace() to indicate that now is does have tracing. (maybe this would better being called something like kmalloc_kmem_cache ?) - adds a new function kmalloc_order() to handle allocation and tracing of large allocations of page order. - removes tracing from the inlined kmalloc_large() replacing them with a call to kmalloc_order(); - move tracing out of inlined kmalloc_node() and pushing it down into kmem_cache_alloc_node_trace - rename kmem_cache_alloc_node_notrace() to kmem_cache_alloc_node_trace() - removes the include of trace/events/kmem.h from slub_def.h. v2 - keep kmalloc_order_trace inline when !CONFIG_TRACE Signed-off-by: Richard Kennedy Signed-off-by: Pekka Enberg --- include/linux/slub_def.h | 55 +++++++++++++++++++++++------------------------- mm/slub.c | 30 ++++++++++++++++++++------ 2 files changed, 49 insertions(+), 36 deletions(-) (limited to 'include/linux') diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h index e4f5ed180b9b..8b6e8ae5d5ca 100644 --- a/include/linux/slub_def.h +++ b/include/linux/slub_def.h @@ -10,9 +10,8 @@ #include #include #include -#include -#include +#include enum stat_item { ALLOC_FASTPATH, /* Allocation from cpu slab */ @@ -216,31 +215,40 @@ static __always_inline struct kmem_cache *kmalloc_slab(size_t size) void *kmem_cache_alloc(struct kmem_cache *, gfp_t); void *__kmalloc(size_t size, gfp_t flags); +static __always_inline void * +kmalloc_order(size_t size, gfp_t flags, unsigned int order) +{ + void *ret = (void *) __get_free_pages(flags | __GFP_COMP, order); + kmemleak_alloc(ret, size, 1, flags); + return ret; +} + #ifdef CONFIG_TRACING -extern void *kmem_cache_alloc_notrace(struct kmem_cache *s, gfp_t gfpflags); +extern void * +kmem_cache_alloc_trace(struct kmem_cache *s, gfp_t gfpflags, size_t size); +extern void *kmalloc_order_trace(size_t size, gfp_t flags, unsigned int order); #else static __always_inline void * -kmem_cache_alloc_notrace(struct kmem_cache *s, gfp_t gfpflags) +kmem_cache_alloc_trace(struct kmem_cache *s, gfp_t gfpflags, size_t size) { return kmem_cache_alloc(s, gfpflags); } + +static __always_inline void * +kmalloc_order_trace(size_t size, gfp_t flags, unsigned int order) +{ + return kmalloc_order(size, flags, order); +} #endif static __always_inline void *kmalloc_large(size_t size, gfp_t flags) { unsigned int order = get_order(size); - void *ret = (void *) __get_free_pages(flags | __GFP_COMP, order); - - kmemleak_alloc(ret, size, 1, flags); - trace_kmalloc(_THIS_IP_, ret, size, PAGE_SIZE << order, flags); - - return ret; + return kmalloc_order_trace(size, flags, order); } static __always_inline void *kmalloc(size_t size, gfp_t flags) { - void *ret; - if (__builtin_constant_p(size)) { if (size > SLUB_MAX_SIZE) return kmalloc_large(size, flags); @@ -251,11 +259,7 @@ static __always_inline void *kmalloc(size_t size, gfp_t flags) if (!s) return ZERO_SIZE_PTR; - ret = kmem_cache_alloc_notrace(s, flags); - - trace_kmalloc(_THIS_IP_, ret, size, s->size, flags); - - return ret; + return kmem_cache_alloc_trace(s, flags, size); } } return __kmalloc(size, flags); @@ -266,14 +270,14 @@ void *__kmalloc_node(size_t size, gfp_t flags, int node); void *kmem_cache_alloc_node(struct kmem_cache *, gfp_t flags, int node); #ifdef CONFIG_TRACING -extern void *kmem_cache_alloc_node_notrace(struct kmem_cache *s, +extern void *kmem_cache_alloc_node_trace(struct kmem_cache *s, gfp_t gfpflags, - int node); + int node, size_t size); #else static __always_inline void * -kmem_cache_alloc_node_notrace(struct kmem_cache *s, +kmem_cache_alloc_node_trace(struct kmem_cache *s, gfp_t gfpflags, - int node) + int node, size_t size) { return kmem_cache_alloc_node(s, gfpflags, node); } @@ -281,8 +285,6 @@ kmem_cache_alloc_node_notrace(struct kmem_cache *s, static __always_inline void *kmalloc_node(size_t size, gfp_t flags, int node) { - void *ret; - if (__builtin_constant_p(size) && size <= SLUB_MAX_SIZE && !(flags & SLUB_DMA)) { struct kmem_cache *s = kmalloc_slab(size); @@ -290,12 +292,7 @@ static __always_inline void *kmalloc_node(size_t size, gfp_t flags, int node) if (!s) return ZERO_SIZE_PTR; - ret = kmem_cache_alloc_node_notrace(s, flags, node); - - trace_kmalloc_node(_THIS_IP_, ret, - size, s->size, flags, node); - - return ret; + return kmem_cache_alloc_node_trace(s, flags, node, size); } return __kmalloc_node(size, flags, node); } diff --git a/mm/slub.c b/mm/slub.c index 8fd5401bb071..7e657aa19475 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -28,6 +28,8 @@ #include #include +#include + /* * Lock order: * 1. slab_lock(page) @@ -1774,11 +1776,21 @@ void *kmem_cache_alloc(struct kmem_cache *s, gfp_t gfpflags) EXPORT_SYMBOL(kmem_cache_alloc); #ifdef CONFIG_TRACING -void *kmem_cache_alloc_notrace(struct kmem_cache *s, gfp_t gfpflags) +void *kmem_cache_alloc_trace(struct kmem_cache *s, gfp_t gfpflags, size_t size) +{ + void *ret = slab_alloc(s, gfpflags, NUMA_NO_NODE, _RET_IP_); + trace_kmalloc(_RET_IP_, ret, size, s->size, gfpflags); + return ret; +} +EXPORT_SYMBOL(kmem_cache_alloc_trace); + +void *kmalloc_order_trace(size_t size, gfp_t flags, unsigned int order) { - return slab_alloc(s, gfpflags, NUMA_NO_NODE, _RET_IP_); + void *ret = kmalloc_order(size, flags, order); + trace_kmalloc(_RET_IP_, ret, size, PAGE_SIZE << order, flags); + return ret; } -EXPORT_SYMBOL(kmem_cache_alloc_notrace); +EXPORT_SYMBOL(kmalloc_order_trace); #endif #ifdef CONFIG_NUMA @@ -1794,13 +1806,17 @@ void *kmem_cache_alloc_node(struct kmem_cache *s, gfp_t gfpflags, int node) EXPORT_SYMBOL(kmem_cache_alloc_node); #ifdef CONFIG_TRACING -void *kmem_cache_alloc_node_notrace(struct kmem_cache *s, +void *kmem_cache_alloc_node_trace(struct kmem_cache *s, gfp_t gfpflags, - int node) + int node, size_t size) { - return slab_alloc(s, gfpflags, node, _RET_IP_); + void *ret = slab_alloc(s, gfpflags, node, _RET_IP_); + + trace_kmalloc_node(_RET_IP_, ret, + size, s->size, gfpflags, node); + return ret; } -EXPORT_SYMBOL(kmem_cache_alloc_node_notrace); +EXPORT_SYMBOL(kmem_cache_alloc_node_trace); #endif #endif -- cgit v1.2.3-71-gd317 From fc766e4c4965915ab52a1d1fa3c7a7b3e7bc07f0 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 29 Oct 2010 03:09:24 +0000 Subject: decnet: RCU conversion and get rid of dev_base_lock While tracking dev_base_lock users, I found decnet used it in dnet_select_source(), but for a wrong purpose: Writers only hold RTNL, not dev_base_lock, so readers must use RCU if they cannot use RTNL. Adds an rcu_head in struct dn_ifaddr and handle proper RCU management. Adds __rcu annotation in dn_route as well. Signed-off-by: Eric Dumazet Acked-by: Steven Whitehouse Signed-off-by: David S. Miller --- include/linux/netdevice.h | 2 +- include/net/dn_dev.h | 27 ++++++++----- include/net/dst.h | 8 ++-- net/decnet/af_decnet.c | 2 +- net/decnet/dn_dev.c | 100 +++++++++++++++++++++++++++------------------- net/decnet/dn_fib.c | 6 ++- net/decnet/dn_neigh.c | 2 +- net/decnet/dn_route.c | 68 +++++++++++++++++-------------- 8 files changed, 127 insertions(+), 88 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index d8fd2c23a1b9..578debb801f4 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -951,7 +951,7 @@ struct net_device { #endif void *atalk_ptr; /* AppleTalk link */ struct in_device __rcu *ip_ptr; /* IPv4 specific data */ - void *dn_ptr; /* DECnet specific data */ + struct dn_dev __rcu *dn_ptr; /* DECnet specific data */ struct inet6_dev __rcu *ip6_ptr; /* IPv6 specific data */ void *ec_ptr; /* Econet specific data */ void *ax25_ptr; /* AX.25 specific data */ diff --git a/include/net/dn_dev.h b/include/net/dn_dev.h index 0916bbf3bdff..b9e32db03f20 100644 --- a/include/net/dn_dev.h +++ b/include/net/dn_dev.h @@ -5,13 +5,14 @@ struct dn_dev; struct dn_ifaddr { - struct dn_ifaddr *ifa_next; + struct dn_ifaddr __rcu *ifa_next; struct dn_dev *ifa_dev; __le16 ifa_local; __le16 ifa_address; __u8 ifa_flags; __u8 ifa_scope; char ifa_label[IFNAMSIZ]; + struct rcu_head rcu; }; #define DN_DEV_S_RU 0 /* Run - working normally */ @@ -83,7 +84,7 @@ struct dn_dev_parms { struct dn_dev { - struct dn_ifaddr *ifa_list; + struct dn_ifaddr __rcu *ifa_list; struct net_device *dev; struct dn_dev_parms parms; char use_long; @@ -171,19 +172,27 @@ extern int unregister_dnaddr_notifier(struct notifier_block *nb); static inline int dn_dev_islocal(struct net_device *dev, __le16 addr) { - struct dn_dev *dn_db = dev->dn_ptr; + struct dn_dev *dn_db; struct dn_ifaddr *ifa; + int res = 0; + rcu_read_lock(); + dn_db = rcu_dereference(dev->dn_ptr); if (dn_db == NULL) { printk(KERN_DEBUG "dn_dev_islocal: Called for non DECnet device\n"); - return 0; + goto out; } - for(ifa = dn_db->ifa_list; ifa; ifa = ifa->ifa_next) - if ((addr ^ ifa->ifa_local) == 0) - return 1; - - return 0; + for (ifa = rcu_dereference(dn_db->ifa_list); + ifa != NULL; + ifa = rcu_dereference(ifa->ifa_next)) + if ((addr ^ ifa->ifa_local) == 0) { + res = 1; + break; + } +out: + rcu_read_unlock(); + return res; } #endif /* _NET_DN_DEV_H */ diff --git a/include/net/dst.h b/include/net/dst.h index ffe9cb719c0e..a5bd72646d65 100644 --- a/include/net/dst.h +++ b/include/net/dst.h @@ -94,10 +94,10 @@ struct dst_entry { int __use; unsigned long lastuse; union { - struct dst_entry *next; - struct rtable __rcu *rt_next; - struct rt6_info *rt6_next; - struct dn_route *dn_next; + struct dst_entry *next; + struct rtable __rcu *rt_next; + struct rt6_info *rt6_next; + struct dn_route __rcu *dn_next; }; }; diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c index d6b93d19790f..18b8a2cbdf77 100644 --- a/net/decnet/af_decnet.c +++ b/net/decnet/af_decnet.c @@ -1848,7 +1848,7 @@ unsigned dn_mss_from_pmtu(struct net_device *dev, int mtu) { unsigned mss = 230 - DN_MAX_NSP_DATA_HEADER; if (dev) { - struct dn_dev *dn_db = dev->dn_ptr; + struct dn_dev *dn_db = rcu_dereference_raw(dev->dn_ptr); mtu -= LL_RESERVED_SPACE(dev); if (dn_db->use_long) mtu -= 21; diff --git a/net/decnet/dn_dev.c b/net/decnet/dn_dev.c index 4c409b46aa35..0ba15633c418 100644 --- a/net/decnet/dn_dev.c +++ b/net/decnet/dn_dev.c @@ -267,7 +267,7 @@ static int dn_forwarding_proc(ctl_table *table, int write, if (table->extra1 == NULL) return -EINVAL; - dn_db = dev->dn_ptr; + dn_db = rcu_dereference_raw(dev->dn_ptr); old = dn_db->parms.forwarding; err = proc_dointvec(table, write, buffer, lenp, ppos); @@ -332,14 +332,19 @@ static struct dn_ifaddr *dn_dev_alloc_ifa(void) return ifa; } -static __inline__ void dn_dev_free_ifa(struct dn_ifaddr *ifa) +static void dn_dev_free_ifa_rcu(struct rcu_head *head) { - kfree(ifa); + kfree(container_of(head, struct dn_ifaddr, rcu)); } -static void dn_dev_del_ifa(struct dn_dev *dn_db, struct dn_ifaddr **ifap, int destroy) +static void dn_dev_free_ifa(struct dn_ifaddr *ifa) { - struct dn_ifaddr *ifa1 = *ifap; + call_rcu(&ifa->rcu, dn_dev_free_ifa_rcu); +} + +static void dn_dev_del_ifa(struct dn_dev *dn_db, struct dn_ifaddr __rcu **ifap, int destroy) +{ + struct dn_ifaddr *ifa1 = rtnl_dereference(*ifap); unsigned char mac_addr[6]; struct net_device *dev = dn_db->dev; @@ -373,7 +378,9 @@ static int dn_dev_insert_ifa(struct dn_dev *dn_db, struct dn_ifaddr *ifa) ASSERT_RTNL(); /* Check for duplicates */ - for(ifa1 = dn_db->ifa_list; ifa1; ifa1 = ifa1->ifa_next) { + for (ifa1 = rtnl_dereference(dn_db->ifa_list); + ifa1 != NULL; + ifa1 = rtnl_dereference(ifa1->ifa_next)) { if (ifa1->ifa_local == ifa->ifa_local) return -EEXIST; } @@ -386,7 +393,7 @@ static int dn_dev_insert_ifa(struct dn_dev *dn_db, struct dn_ifaddr *ifa) } ifa->ifa_next = dn_db->ifa_list; - dn_db->ifa_list = ifa; + rcu_assign_pointer(dn_db->ifa_list, ifa); dn_ifaddr_notify(RTM_NEWADDR, ifa); blocking_notifier_call_chain(&dnaddr_chain, NETDEV_UP, ifa); @@ -396,7 +403,7 @@ static int dn_dev_insert_ifa(struct dn_dev *dn_db, struct dn_ifaddr *ifa) static int dn_dev_set_ifa(struct net_device *dev, struct dn_ifaddr *ifa) { - struct dn_dev *dn_db = dev->dn_ptr; + struct dn_dev *dn_db = rtnl_dereference(dev->dn_ptr); int rv; if (dn_db == NULL) { @@ -425,7 +432,8 @@ int dn_dev_ioctl(unsigned int cmd, void __user *arg) struct sockaddr_dn *sdn = (struct sockaddr_dn *)&ifr->ifr_addr; struct dn_dev *dn_db; struct net_device *dev; - struct dn_ifaddr *ifa = NULL, **ifap = NULL; + struct dn_ifaddr *ifa = NULL; + struct dn_ifaddr __rcu **ifap = NULL; int ret = 0; if (copy_from_user(ifr, arg, DN_IFREQ_SIZE)) @@ -454,8 +462,10 @@ int dn_dev_ioctl(unsigned int cmd, void __user *arg) goto done; } - if ((dn_db = dev->dn_ptr) != NULL) { - for (ifap = &dn_db->ifa_list; (ifa=*ifap) != NULL; ifap = &ifa->ifa_next) + if ((dn_db = rtnl_dereference(dev->dn_ptr)) != NULL) { + for (ifap = &dn_db->ifa_list; + (ifa = rtnl_dereference(*ifap)) != NULL; + ifap = &ifa->ifa_next) if (strcmp(ifr->ifr_name, ifa->ifa_label) == 0) break; } @@ -558,7 +568,7 @@ static struct dn_dev *dn_dev_by_index(int ifindex) dev = __dev_get_by_index(&init_net, ifindex); if (dev) - dn_dev = dev->dn_ptr; + dn_dev = rtnl_dereference(dev->dn_ptr); return dn_dev; } @@ -576,7 +586,8 @@ static int dn_nl_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) struct nlattr *tb[IFA_MAX+1]; struct dn_dev *dn_db; struct ifaddrmsg *ifm; - struct dn_ifaddr *ifa, **ifap; + struct dn_ifaddr *ifa; + struct dn_ifaddr __rcu **ifap; int err = -EINVAL; if (!net_eq(net, &init_net)) @@ -592,7 +603,9 @@ static int dn_nl_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) goto errout; err = -EADDRNOTAVAIL; - for (ifap = &dn_db->ifa_list; (ifa = *ifap); ifap = &ifa->ifa_next) { + for (ifap = &dn_db->ifa_list; + (ifa = rtnl_dereference(*ifap)) != NULL; + ifap = &ifa->ifa_next) { if (tb[IFA_LOCAL] && nla_memcmp(tb[IFA_LOCAL], &ifa->ifa_local, 2)) continue; @@ -632,7 +645,7 @@ static int dn_nl_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) if ((dev = __dev_get_by_index(&init_net, ifm->ifa_index)) == NULL) return -ENODEV; - if ((dn_db = dev->dn_ptr) == NULL) { + if ((dn_db = rtnl_dereference(dev->dn_ptr)) == NULL) { dn_db = dn_dev_create(dev, &err); if (!dn_db) return err; @@ -748,11 +761,11 @@ static int dn_nl_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb) skip_naddr = 0; } - if ((dn_db = dev->dn_ptr) == NULL) + if ((dn_db = rtnl_dereference(dev->dn_ptr)) == NULL) goto cont; - for (ifa = dn_db->ifa_list, dn_idx = 0; ifa; - ifa = ifa->ifa_next, dn_idx++) { + for (ifa = rtnl_dereference(dn_db->ifa_list), dn_idx = 0; ifa; + ifa = rtnl_dereference(ifa->ifa_next), dn_idx++) { if (dn_idx < skip_naddr) continue; @@ -773,21 +786,22 @@ done: static int dn_dev_get_first(struct net_device *dev, __le16 *addr) { - struct dn_dev *dn_db = (struct dn_dev *)dev->dn_ptr; + struct dn_dev *dn_db; struct dn_ifaddr *ifa; int rv = -ENODEV; + rcu_read_lock(); + dn_db = rcu_dereference(dev->dn_ptr); if (dn_db == NULL) goto out; - rtnl_lock(); - ifa = dn_db->ifa_list; + ifa = rcu_dereference(dn_db->ifa_list); if (ifa != NULL) { *addr = ifa->ifa_local; rv = 0; } - rtnl_unlock(); out: + rcu_read_unlock(); return rv; } @@ -823,7 +837,7 @@ static void dn_send_endnode_hello(struct net_device *dev, struct dn_ifaddr *ifa) struct endnode_hello_message *msg; struct sk_buff *skb = NULL; __le16 *pktlen; - struct dn_dev *dn_db = (struct dn_dev *)dev->dn_ptr; + struct dn_dev *dn_db = rcu_dereference_raw(dev->dn_ptr); if ((skb = dn_alloc_skb(NULL, sizeof(*msg), GFP_ATOMIC)) == NULL) return; @@ -889,7 +903,7 @@ static int dn_am_i_a_router(struct dn_neigh *dn, struct dn_dev *dn_db, struct dn static void dn_send_router_hello(struct net_device *dev, struct dn_ifaddr *ifa) { int n; - struct dn_dev *dn_db = dev->dn_ptr; + struct dn_dev *dn_db = rcu_dereference_raw(dev->dn_ptr); struct dn_neigh *dn = (struct dn_neigh *)dn_db->router; struct sk_buff *skb; size_t size; @@ -960,7 +974,7 @@ static void dn_send_router_hello(struct net_device *dev, struct dn_ifaddr *ifa) static void dn_send_brd_hello(struct net_device *dev, struct dn_ifaddr *ifa) { - struct dn_dev *dn_db = (struct dn_dev *)dev->dn_ptr; + struct dn_dev *dn_db = rcu_dereference_raw(dev->dn_ptr); if (dn_db->parms.forwarding == 0) dn_send_endnode_hello(dev, ifa); @@ -998,7 +1012,7 @@ static void dn_send_ptp_hello(struct net_device *dev, struct dn_ifaddr *ifa) static int dn_eth_up(struct net_device *dev) { - struct dn_dev *dn_db = dev->dn_ptr; + struct dn_dev *dn_db = rcu_dereference_raw(dev->dn_ptr); if (dn_db->parms.forwarding == 0) dev_mc_add(dev, dn_rt_all_end_mcast); @@ -1012,7 +1026,7 @@ static int dn_eth_up(struct net_device *dev) static void dn_eth_down(struct net_device *dev) { - struct dn_dev *dn_db = dev->dn_ptr; + struct dn_dev *dn_db = rcu_dereference_raw(dev->dn_ptr); if (dn_db->parms.forwarding == 0) dev_mc_del(dev, dn_rt_all_end_mcast); @@ -1025,12 +1039,16 @@ static void dn_dev_set_timer(struct net_device *dev); static void dn_dev_timer_func(unsigned long arg) { struct net_device *dev = (struct net_device *)arg; - struct dn_dev *dn_db = dev->dn_ptr; + struct dn_dev *dn_db; struct dn_ifaddr *ifa; + rcu_read_lock(); + dn_db = rcu_dereference(dev->dn_ptr); if (dn_db->t3 <= dn_db->parms.t2) { if (dn_db->parms.timer3) { - for(ifa = dn_db->ifa_list; ifa; ifa = ifa->ifa_next) { + for (ifa = rcu_dereference(dn_db->ifa_list); + ifa; + ifa = rcu_dereference(ifa->ifa_next)) { if (!(ifa->ifa_flags & IFA_F_SECONDARY)) dn_db->parms.timer3(dev, ifa); } @@ -1039,13 +1057,13 @@ static void dn_dev_timer_func(unsigned long arg) } else { dn_db->t3 -= dn_db->parms.t2; } - + rcu_read_unlock(); dn_dev_set_timer(dev); } static void dn_dev_set_timer(struct net_device *dev) { - struct dn_dev *dn_db = dev->dn_ptr; + struct dn_dev *dn_db = rcu_dereference_raw(dev->dn_ptr); if (dn_db->parms.t2 > dn_db->parms.t3) dn_db->parms.t2 = dn_db->parms.t3; @@ -1077,8 +1095,8 @@ static struct dn_dev *dn_dev_create(struct net_device *dev, int *err) return NULL; memcpy(&dn_db->parms, p, sizeof(struct dn_dev_parms)); - smp_wmb(); - dev->dn_ptr = dn_db; + + rcu_assign_pointer(dev->dn_ptr, dn_db); dn_db->dev = dev; init_timer(&dn_db->timer); @@ -1086,7 +1104,7 @@ static struct dn_dev *dn_dev_create(struct net_device *dev, int *err) dn_db->neigh_parms = neigh_parms_alloc(dev, &dn_neigh_table); if (!dn_db->neigh_parms) { - dev->dn_ptr = NULL; + rcu_assign_pointer(dev->dn_ptr, NULL); kfree(dn_db); return NULL; } @@ -1125,7 +1143,7 @@ void dn_dev_up(struct net_device *dev) struct dn_ifaddr *ifa; __le16 addr = decnet_address; int maybe_default = 0; - struct dn_dev *dn_db = (struct dn_dev *)dev->dn_ptr; + struct dn_dev *dn_db = rtnl_dereference(dev->dn_ptr); if ((dev->type != ARPHRD_ETHER) && (dev->type != ARPHRD_LOOPBACK)) return; @@ -1176,7 +1194,7 @@ void dn_dev_up(struct net_device *dev) static void dn_dev_delete(struct net_device *dev) { - struct dn_dev *dn_db = dev->dn_ptr; + struct dn_dev *dn_db = rtnl_dereference(dev->dn_ptr); if (dn_db == NULL) return; @@ -1204,13 +1222,13 @@ static void dn_dev_delete(struct net_device *dev) void dn_dev_down(struct net_device *dev) { - struct dn_dev *dn_db = dev->dn_ptr; + struct dn_dev *dn_db = rtnl_dereference(dev->dn_ptr); struct dn_ifaddr *ifa; if (dn_db == NULL) return; - while((ifa = dn_db->ifa_list) != NULL) { + while ((ifa = rtnl_dereference(dn_db->ifa_list)) != NULL) { dn_dev_del_ifa(dn_db, &dn_db->ifa_list, 0); dn_dev_free_ifa(ifa); } @@ -1270,7 +1288,7 @@ static inline int is_dn_dev(struct net_device *dev) } static void *dn_dev_seq_start(struct seq_file *seq, loff_t *pos) - __acquires(rcu) + __acquires(RCU) { int i; struct net_device *dev; @@ -1313,7 +1331,7 @@ static void *dn_dev_seq_next(struct seq_file *seq, void *v, loff_t *pos) } static void dn_dev_seq_stop(struct seq_file *seq, void *v) - __releases(rcu) + __releases(RCU) { rcu_read_unlock(); } @@ -1340,7 +1358,7 @@ static int dn_dev_seq_show(struct seq_file *seq, void *v) struct net_device *dev = v; char peer_buf[DN_ASCBUF_LEN]; char router_buf[DN_ASCBUF_LEN]; - struct dn_dev *dn_db = dev->dn_ptr; + struct dn_dev *dn_db = rcu_dereference(dev->dn_ptr); seq_printf(seq, "%-8s %1s %04u %04u %04lu %04lu" " %04hu %03d %02x %-10s %-7s %-7s\n", diff --git a/net/decnet/dn_fib.c b/net/decnet/dn_fib.c index 4ab96c15166d..0ef0a81bcd72 100644 --- a/net/decnet/dn_fib.c +++ b/net/decnet/dn_fib.c @@ -610,10 +610,12 @@ static void dn_fib_del_ifaddr(struct dn_ifaddr *ifa) /* Scan device list */ rcu_read_lock(); for_each_netdev_rcu(&init_net, dev) { - dn_db = dev->dn_ptr; + dn_db = rcu_dereference(dev->dn_ptr); if (dn_db == NULL) continue; - for(ifa2 = dn_db->ifa_list; ifa2; ifa2 = ifa2->ifa_next) { + for (ifa2 = rcu_dereference(dn_db->ifa_list); + ifa2 != NULL; + ifa2 = rcu_dereference(ifa2->ifa_next)) { if (ifa2->ifa_local == ifa->ifa_local) { found_it = 1; break; diff --git a/net/decnet/dn_neigh.c b/net/decnet/dn_neigh.c index a085dbcf5c7f..602dade7e9a3 100644 --- a/net/decnet/dn_neigh.c +++ b/net/decnet/dn_neigh.c @@ -391,7 +391,7 @@ int dn_neigh_router_hello(struct sk_buff *skb) write_lock(&neigh->lock); neigh->used = jiffies; - dn_db = (struct dn_dev *)neigh->dev->dn_ptr; + dn_db = rcu_dereference(neigh->dev->dn_ptr); if (!(neigh->nud_state & NUD_PERMANENT)) { neigh->updated = jiffies; diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c index df0f3e54ff8a..94a9eb1d313e 100644 --- a/net/decnet/dn_route.c +++ b/net/decnet/dn_route.c @@ -93,7 +93,7 @@ struct dn_rt_hash_bucket { - struct dn_route *chain; + struct dn_route __rcu *chain; spinlock_t lock; }; @@ -157,15 +157,17 @@ static inline void dnrt_drop(struct dn_route *rt) static void dn_dst_check_expire(unsigned long dummy) { int i; - struct dn_route *rt, **rtp; + struct dn_route *rt; + struct dn_route __rcu **rtp; unsigned long now = jiffies; unsigned long expire = 120 * HZ; - for(i = 0; i <= dn_rt_hash_mask; i++) { + for (i = 0; i <= dn_rt_hash_mask; i++) { rtp = &dn_rt_hash_table[i].chain; spin_lock(&dn_rt_hash_table[i].lock); - while((rt=*rtp) != NULL) { + while ((rt = rcu_dereference_protected(*rtp, + lockdep_is_held(&dn_rt_hash_table[i].lock))) != NULL) { if (atomic_read(&rt->dst.__refcnt) || (now - rt->dst.lastuse) < expire) { rtp = &rt->dst.dn_next; @@ -186,17 +188,19 @@ static void dn_dst_check_expire(unsigned long dummy) static int dn_dst_gc(struct dst_ops *ops) { - struct dn_route *rt, **rtp; + struct dn_route *rt; + struct dn_route __rcu **rtp; int i; unsigned long now = jiffies; unsigned long expire = 10 * HZ; - for(i = 0; i <= dn_rt_hash_mask; i++) { + for (i = 0; i <= dn_rt_hash_mask; i++) { spin_lock_bh(&dn_rt_hash_table[i].lock); rtp = &dn_rt_hash_table[i].chain; - while((rt=*rtp) != NULL) { + while ((rt = rcu_dereference_protected(*rtp, + lockdep_is_held(&dn_rt_hash_table[i].lock))) != NULL) { if (atomic_read(&rt->dst.__refcnt) || (now - rt->dst.lastuse) < expire) { rtp = &rt->dst.dn_next; @@ -227,7 +231,7 @@ static void dn_dst_update_pmtu(struct dst_entry *dst, u32 mtu) { u32 min_mtu = 230; struct dn_dev *dn = dst->neighbour ? - (struct dn_dev *)dst->neighbour->dev->dn_ptr : NULL; + rcu_dereference_raw(dst->neighbour->dev->dn_ptr) : NULL; if (dn && dn->use_long == 0) min_mtu -= 6; @@ -277,13 +281,15 @@ static inline int compare_keys(struct flowi *fl1, struct flowi *fl2) static int dn_insert_route(struct dn_route *rt, unsigned hash, struct dn_route **rp) { - struct dn_route *rth, **rthp; + struct dn_route *rth; + struct dn_route __rcu **rthp; unsigned long now = jiffies; rthp = &dn_rt_hash_table[hash].chain; spin_lock_bh(&dn_rt_hash_table[hash].lock); - while((rth = *rthp) != NULL) { + while ((rth = rcu_dereference_protected(*rthp, + lockdep_is_held(&dn_rt_hash_table[hash].lock))) != NULL) { if (compare_keys(&rth->fl, &rt->fl)) { /* Put it first */ *rthp = rth->dst.dn_next; @@ -315,15 +321,15 @@ static void dn_run_flush(unsigned long dummy) int i; struct dn_route *rt, *next; - for(i = 0; i < dn_rt_hash_mask; i++) { + for (i = 0; i < dn_rt_hash_mask; i++) { spin_lock_bh(&dn_rt_hash_table[i].lock); - if ((rt = xchg(&dn_rt_hash_table[i].chain, NULL)) == NULL) + if ((rt = xchg((struct dn_route **)&dn_rt_hash_table[i].chain, NULL)) == NULL) goto nothing_to_declare; - for(; rt; rt=next) { - next = rt->dst.dn_next; - rt->dst.dn_next = NULL; + for(; rt; rt = next) { + next = rcu_dereference_raw(rt->dst.dn_next); + RCU_INIT_POINTER(rt->dst.dn_next, NULL); dst_free((struct dst_entry *)rt); } @@ -458,15 +464,16 @@ static int dn_return_long(struct sk_buff *skb) */ static int dn_route_rx_packet(struct sk_buff *skb) { - struct dn_skb_cb *cb = DN_SKB_CB(skb); + struct dn_skb_cb *cb; int err; if ((err = dn_route_input(skb)) == 0) return dst_input(skb); + cb = DN_SKB_CB(skb); if (decnet_debug_level & 4) { char *devname = skb->dev ? skb->dev->name : "???"; - struct dn_skb_cb *cb = DN_SKB_CB(skb); + printk(KERN_DEBUG "DECnet: dn_route_rx_packet: rt_flags=0x%02x dev=%s len=%d src=0x%04hx dst=0x%04hx err=%d type=%d\n", (int)cb->rt_flags, devname, skb->len, @@ -573,7 +580,7 @@ int dn_route_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type struct dn_skb_cb *cb; unsigned char flags = 0; __u16 len = le16_to_cpu(*(__le16 *)skb->data); - struct dn_dev *dn = (struct dn_dev *)dev->dn_ptr; + struct dn_dev *dn = rcu_dereference(dev->dn_ptr); unsigned char padlen = 0; if (!net_eq(dev_net(dev), &init_net)) @@ -728,7 +735,7 @@ static int dn_forward(struct sk_buff *skb) { struct dn_skb_cb *cb = DN_SKB_CB(skb); struct dst_entry *dst = skb_dst(skb); - struct dn_dev *dn_db = dst->dev->dn_ptr; + struct dn_dev *dn_db = rcu_dereference(dst->dev->dn_ptr); struct dn_route *rt; struct neighbour *neigh = dst->neighbour; int header_len; @@ -835,13 +842,16 @@ static inline int dn_match_addr(__le16 addr1, __le16 addr2) static __le16 dnet_select_source(const struct net_device *dev, __le16 daddr, int scope) { __le16 saddr = 0; - struct dn_dev *dn_db = dev->dn_ptr; + struct dn_dev *dn_db; struct dn_ifaddr *ifa; int best_match = 0; int ret; - read_lock(&dev_base_lock); - for(ifa = dn_db->ifa_list; ifa; ifa = ifa->ifa_next) { + rcu_read_lock(); + dn_db = rcu_dereference(dev->dn_ptr); + for (ifa = rcu_dereference(dn_db->ifa_list); + ifa != NULL; + ifa = rcu_dereference(ifa->ifa_next)) { if (ifa->ifa_scope > scope) continue; if (!daddr) { @@ -854,7 +864,7 @@ static __le16 dnet_select_source(const struct net_device *dev, __le16 daddr, int if (best_match == 0) saddr = ifa->ifa_local; } - read_unlock(&dev_base_lock); + rcu_read_unlock(); return saddr; } @@ -1020,7 +1030,7 @@ source_ok: err = -ENODEV; if (dev_out == NULL) goto out; - dn_db = dev_out->dn_ptr; + dn_db = rcu_dereference_raw(dev_out->dn_ptr); /* Possible improvement - check all devices for local addr */ if (dn_dev_islocal(dev_out, fl.fld_dst)) { dev_put(dev_out); @@ -1233,7 +1243,7 @@ static int dn_route_input_slow(struct sk_buff *skb) dev_hold(in_dev); - if ((dn_db = in_dev->dn_ptr) == NULL) + if ((dn_db = rcu_dereference(in_dev->dn_ptr)) == NULL) goto out; /* Zero source addresses are not allowed */ @@ -1677,15 +1687,15 @@ static struct dn_route *dn_rt_cache_get_next(struct seq_file *seq, struct dn_rou { struct dn_rt_cache_iter_state *s = seq->private; - rt = rt->dst.dn_next; - while(!rt) { + rt = rcu_dereference_bh(rt->dst.dn_next); + while (!rt) { rcu_read_unlock_bh(); if (--s->bucket < 0) break; rcu_read_lock_bh(); - rt = dn_rt_hash_table[s->bucket].chain; + rt = rcu_dereference_bh(dn_rt_hash_table[s->bucket].chain); } - return rcu_dereference_bh(rt); + return rt; } static void *dn_rt_cache_seq_start(struct seq_file *seq, loff_t *pos) -- cgit v1.2.3-71-gd317 From 1da4b1c6a4dfb5a13d7147a27c1ac53fed09befd Mon Sep 17 00:00:00 2001 From: Feng Tang Date: Tue, 9 Nov 2010 11:22:58 +0000 Subject: x86/mrst: Add SFI platform device parsing code MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit SFI provides a series of tables. These describe the platform devices present including SPI and I²C devices, as well as various sensors, keypads and other glue as well as interfaces provided via the SCU IPC mechanism (intel_scu_ipc.c) This patch is a merge of the core elements and relevant fixes from the Intel development code by Feng, Alek, myself into a single coherent patch for upstream submission. It provides the needed infrastructure to register I2C, SPI and platform devices described by the tables, as well as handlers for some of the hardware already supported in kernel. The 0.8 firmware also provides GPIO tables. Devices are created at boot time or if they are SCU dependant at the point an SCU is discovered. The existing Linux device mechanisms will then handle the device binding. At an abstract level this is an SFI to Linux device translator. Device/platform specific setup/glue is in this file. This is done so that the drivers for the generic I²C and SPI bus devices remain cross platform as they should. (Updated from RFC version to correct the emc1403 name used by the firmware and a wrongly used #define) Signed-off-by: Alek Du LKML-Reference: <20101109112158.20013.6158.stgit@localhost.localdomain> [Clean ups, removal of 0.7 support] Signed-off-by: Feng Tang [Clean ups] Signed-off-by: Alan Cox Signed-off-by: Thomas Gleixner --- arch/x86/Kconfig | 2 + arch/x86/include/asm/mrst.h | 4 + arch/x86/platform/mrst/mrst.c | 515 ++++++++++++++++++++++++++++++++++- drivers/platform/x86/intel_scu_ipc.c | 5 + include/linux/sfi.h | 8 +- 5 files changed, 527 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index e8327686d3c5..b306b84fc8c8 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -385,6 +385,8 @@ config X86_MRST depends on X86_EXTENDED_PLATFORM depends on X86_IO_APIC select APB_TIMER + select I2C + select SPI ---help--- Moorestown is Intel's Low Power Intel Architecture (LPIA) based Moblin Internet Device(MID) platform. Moorestown consists of two chips: diff --git a/arch/x86/include/asm/mrst.h b/arch/x86/include/asm/mrst.h index 4a711a684b17..283debd29fc0 100644 --- a/arch/x86/include/asm/mrst.h +++ b/arch/x86/include/asm/mrst.h @@ -50,4 +50,8 @@ extern void mrst_early_console_init(void); extern struct console early_hsu_console; extern void hsu_early_console_init(void); + +extern void intel_scu_devices_create(void); +extern void intel_scu_devices_destroy(void); + #endif /* _ASM_X86_MRST_H */ diff --git a/arch/x86/platform/mrst/mrst.c b/arch/x86/platform/mrst/mrst.c index 79ae68154e87..cfa1af24edd5 100644 --- a/arch/x86/platform/mrst/mrst.c +++ b/arch/x86/platform/mrst/mrst.c @@ -9,9 +9,19 @@ * as published by the Free Software Foundation; version 2 * of the License. */ + +#define pr_fmt(fmt) "mrst: " fmt + #include #include #include +#include +#include +#include +#include +#include +#include +#include #include #include @@ -23,8 +33,10 @@ #include #include #include +#include #include + /* * the clockevent devices on Moorestown/Medfield can be APBT or LAPIC clock, * cmdline option x86_mrst_timer can be used to override the configuration @@ -102,10 +114,10 @@ static int __init sfi_parse_mtmr(struct sfi_table_header *table) memcpy(sfi_mtimer_array, pentry, totallen); } - printk(KERN_INFO "SFI: MTIMER info (num = %d):\n", sfi_mtimer_num); + pr_debug("SFI MTIMER info (num = %d):\n", sfi_mtimer_num); pentry = sfi_mtimer_array; for (totallen = 0; totallen < sfi_mtimer_num; totallen++, pentry++) { - printk(KERN_INFO "timer[%d]: paddr = 0x%08x, freq = %dHz," + pr_debug("timer[%d]: paddr = 0x%08x, freq = %dHz," " irq = %d\n", totallen, (u32)pentry->phys_addr, pentry->freq_hz, pentry->irq); if (!pentry->irq) @@ -176,10 +188,10 @@ int __init sfi_parse_mrtc(struct sfi_table_header *table) memcpy(sfi_mrtc_array, pentry, totallen); } - printk(KERN_INFO "SFI: RTC info (num = %d):\n", sfi_mrtc_num); + pr_debug("SFI RTC info (num = %d):\n", sfi_mrtc_num); pentry = sfi_mrtc_array; for (totallen = 0; totallen < sfi_mrtc_num; totallen++, pentry++) { - printk(KERN_INFO "RTC[%d]: paddr = 0x%08x, irq = %d\n", + pr_debug("RTC[%d]: paddr = 0x%08x, irq = %d\n", totallen, (u32)pentry->phys_addr, pentry->irq); mp_irq.type = MP_IOAPIC; mp_irq.irqtype = mp_INT; @@ -309,3 +321,498 @@ static inline int __init setup_x86_mrst_timer(char *arg) return 0; } __setup("x86_mrst_timer=", setup_x86_mrst_timer); + +/* + * Parsing GPIO table first, since the DEVS table will need this table + * to map the pin name to the actual pin. + */ +static struct sfi_gpio_table_entry *gpio_table; +static int gpio_num_entry; + +static int __init sfi_parse_gpio(struct sfi_table_header *table) +{ + struct sfi_table_simple *sb; + struct sfi_gpio_table_entry *pentry; + int num, i; + + if (gpio_table) + return 0; + sb = (struct sfi_table_simple *)table; + num = SFI_GET_NUM_ENTRIES(sb, struct sfi_gpio_table_entry); + pentry = (struct sfi_gpio_table_entry *)sb->pentry; + + gpio_table = (struct sfi_gpio_table_entry *) + kmalloc(num * sizeof(*pentry), GFP_KERNEL); + if (!gpio_table) + return -1; + memcpy(gpio_table, pentry, num * sizeof(*pentry)); + gpio_num_entry = num; + + pr_debug("GPIO pin info:\n"); + for (i = 0; i < num; i++, pentry++) + pr_debug("info[%2d]: controller = %16.16s, pin_name = %16.16s," + " pin = %d\n", i, + pentry->controller_name, + pentry->pin_name, + pentry->pin_no); + return 0; +} + +static int get_gpio_by_name(const char *name) +{ + struct sfi_gpio_table_entry *pentry = gpio_table; + int i; + + if (!pentry) + return -1; + for (i = 0; i < gpio_num_entry; i++, pentry++) { + if (!strncmp(name, pentry->pin_name, SFI_NAME_LEN)) + return pentry->pin_no; + } + return -1; +} + +/* + * Here defines the array of devices platform data that IAFW would export + * through SFI "DEVS" table, we use name and type to match the device and + * its platform data. + */ +struct devs_id { + char name[SFI_NAME_LEN + 1]; + u8 type; + u8 delay; + void *(*get_platform_data)(void *info); +}; + +/* the offset for the mapping of global gpio pin to irq */ +#define MRST_IRQ_OFFSET 0x100 + +static void __init *pmic_gpio_platform_data(void *info) +{ + static struct intel_pmic_gpio_platform_data pmic_gpio_pdata; + int gpio_base = get_gpio_by_name("pmic_gpio_base"); + + if (gpio_base == -1) + gpio_base = 64; + pmic_gpio_pdata.gpio_base = gpio_base; + pmic_gpio_pdata.irq_base = gpio_base + MRST_IRQ_OFFSET; + pmic_gpio_pdata.gpiointr = 0xffffeff8; + + return &pmic_gpio_pdata; +} + +static void __init *max3111_platform_data(void *info) +{ + struct spi_board_info *spi_info = info; + int intr = get_gpio_by_name("max3111_int"); + + if (intr == -1) + return NULL; + spi_info->irq = intr + MRST_IRQ_OFFSET; + return NULL; +} + +/* we have multiple max7315 on the board ... */ +#define MAX7315_NUM 2 +static void __init *max7315_platform_data(void *info) +{ + static struct pca953x_platform_data max7315_pdata[MAX7315_NUM]; + static int nr; + struct pca953x_platform_data *max7315 = &max7315_pdata[nr]; + struct i2c_board_info *i2c_info = info; + int gpio_base, intr; + char base_pin_name[SFI_NAME_LEN + 1]; + char intr_pin_name[SFI_NAME_LEN + 1]; + + if (nr == MAX7315_NUM) { + pr_err("too many max7315s, we only support %d\n", + MAX7315_NUM); + return NULL; + } + /* we have several max7315 on the board, we only need load several + * instances of the same pca953x driver to cover them + */ + strcpy(i2c_info->type, "max7315"); + if (nr++) { + sprintf(base_pin_name, "max7315_%d_base", nr); + sprintf(intr_pin_name, "max7315_%d_int", nr); + } else { + strcpy(base_pin_name, "max7315_base"); + strcpy(intr_pin_name, "max7315_int"); + } + + gpio_base = get_gpio_by_name(base_pin_name); + intr = get_gpio_by_name(intr_pin_name); + + if (gpio_base == -1) + return NULL; + max7315->gpio_base = gpio_base; + if (intr != -1) { + i2c_info->irq = intr + MRST_IRQ_OFFSET; + max7315->irq_base = gpio_base + MRST_IRQ_OFFSET; + } else { + i2c_info->irq = -1; + max7315->irq_base = -1; + } + return max7315; +} + +static void __init *emc1403_platform_data(void *info) +{ + static short intr2nd_pdata; + struct i2c_board_info *i2c_info = info; + int intr = get_gpio_by_name("thermal_int"); + int intr2nd = get_gpio_by_name("thermal_alert"); + + if (intr == -1 || intr2nd == -1) + return NULL; + + i2c_info->irq = intr + MRST_IRQ_OFFSET; + intr2nd_pdata = intr2nd + MRST_IRQ_OFFSET; + + return &intr2nd_pdata; +} + +static void __init *lis331dl_platform_data(void *info) +{ + static short intr2nd_pdata; + struct i2c_board_info *i2c_info = info; + int intr = get_gpio_by_name("accel_int"); + int intr2nd = get_gpio_by_name("accel_2"); + + if (intr == -1 || intr2nd == -1) + return NULL; + + i2c_info->irq = intr + MRST_IRQ_OFFSET; + intr2nd_pdata = intr2nd + MRST_IRQ_OFFSET; + + return &intr2nd_pdata; +} + +static const struct devs_id __initconst device_ids[] = { + {"pmic_gpio", SFI_DEV_TYPE_SPI, 1, &pmic_gpio_platform_data}, + {"spi_max3111", SFI_DEV_TYPE_SPI, 0, &max3111_platform_data}, + {"i2c_max7315", SFI_DEV_TYPE_I2C, 1, &max7315_platform_data}, + {"i2c_max7315_2", SFI_DEV_TYPE_I2C, 1, &max7315_platform_data}, + {"emc1403", SFI_DEV_TYPE_I2C, 1, &emc1403_platform_data}, + {"i2c_accel", SFI_DEV_TYPE_I2C, 0, &lis331dl_platform_data}, + {}, +}; + +#define MAX_IPCDEVS 24 +static struct platform_device *ipc_devs[MAX_IPCDEVS]; +static int ipc_next_dev; + +#define MAX_SCU_SPI 24 +static struct spi_board_info *spi_devs[MAX_SCU_SPI]; +static int spi_next_dev; + +#define MAX_SCU_I2C 24 +static struct i2c_board_info *i2c_devs[MAX_SCU_I2C]; +static int i2c_bus[MAX_SCU_I2C]; +static int i2c_next_dev; + +static void __init intel_scu_device_register(struct platform_device *pdev) +{ + if(ipc_next_dev == MAX_IPCDEVS) + pr_err("too many SCU IPC devices"); + else + ipc_devs[ipc_next_dev++] = pdev; +} + +static void __init intel_scu_spi_device_register(struct spi_board_info *sdev) +{ + struct spi_board_info *new_dev; + + if (spi_next_dev == MAX_SCU_SPI) { + pr_err("too many SCU SPI devices"); + return; + } + + new_dev = kzalloc(sizeof(*sdev), GFP_KERNEL); + if (!new_dev) { + pr_err("failed to alloc mem for delayed spi dev %s\n", + sdev->modalias); + return; + } + memcpy(new_dev, sdev, sizeof(*sdev)); + + spi_devs[spi_next_dev++] = new_dev; +} + +static void __init intel_scu_i2c_device_register(int bus, + struct i2c_board_info *idev) +{ + struct i2c_board_info *new_dev; + + if (i2c_next_dev == MAX_SCU_I2C) { + pr_err("too many SCU I2C devices"); + return; + } + + new_dev = kzalloc(sizeof(*idev), GFP_KERNEL); + if (!new_dev) { + pr_err("failed to alloc mem for delayed i2c dev %s\n", + idev->type); + return; + } + memcpy(new_dev, idev, sizeof(*idev)); + + i2c_bus[i2c_next_dev] = bus; + i2c_devs[i2c_next_dev++] = new_dev; +} + +/* Called by IPC driver */ +void intel_scu_devices_create(void) +{ + int i; + + for (i = 0; i < ipc_next_dev; i++) + platform_device_add(ipc_devs[i]); + + for (i = 0; i < spi_next_dev; i++) + spi_register_board_info(spi_devs[i], 1); + + for (i = 0; i < i2c_next_dev; i++) { + struct i2c_adapter *adapter; + struct i2c_client *client; + + adapter = i2c_get_adapter(i2c_bus[i]); + if (adapter) { + client = i2c_new_device(adapter, i2c_devs[i]); + if (!client) + pr_err("can't create i2c device %s\n", + i2c_devs[i]->type); + } else + i2c_register_board_info(i2c_bus[i], i2c_devs[i], 1); + } +} +EXPORT_SYMBOL_GPL(intel_scu_devices_create); + +/* Called by IPC driver */ +void intel_scu_devices_destroy(void) +{ + int i; + + for (i = 0; i < ipc_next_dev; i++) + platform_device_del(ipc_devs[i]); +} +EXPORT_SYMBOL_GPL(intel_scu_devices_destroy); + +static void __init install_irq_resource(struct platform_device *pdev, int irq) +{ + /* Single threaded */ + static struct resource __initdata res = { + .name = "IRQ", + .flags = IORESOURCE_IRQ, + }; + res.start = irq; + platform_device_add_resources(pdev, &res, 1); +} + +static void __init sfi_handle_ipc_dev(struct platform_device *pdev) +{ + const struct devs_id *dev = device_ids; + void *pdata = NULL; + + while (dev->name[0]) { + if (dev->type == SFI_DEV_TYPE_IPC && + !strncmp(dev->name, pdev->name, SFI_NAME_LEN)) { + pdata = dev->get_platform_data(pdev); + break; + } + dev++; + } + pdev->dev.platform_data = pdata; + intel_scu_device_register(pdev); +} + +static void __init sfi_handle_spi_dev(struct spi_board_info *spi_info) +{ + const struct devs_id *dev = device_ids; + void *pdata = NULL; + + while (dev->name[0]) { + if (dev->type == SFI_DEV_TYPE_SPI && + !strncmp(dev->name, spi_info->modalias, SFI_NAME_LEN)) { + pdata = dev->get_platform_data(spi_info); + break; + } + dev++; + } + spi_info->platform_data = pdata; + if (dev->delay) + intel_scu_spi_device_register(spi_info); + else + spi_register_board_info(spi_info, 1); +} + +static void __init sfi_handle_i2c_dev(int bus, struct i2c_board_info *i2c_info) +{ + const struct devs_id *dev = device_ids; + void *pdata = NULL; + + while (dev->name[0]) { + if (dev->type == SFI_DEV_TYPE_I2C && + !strncmp(dev->name, i2c_info->type, SFI_NAME_LEN)) { + pdata = dev->get_platform_data(i2c_info); + break; + } + dev++; + } + i2c_info->platform_data = pdata; + + if (dev->delay) + intel_scu_i2c_device_register(bus, i2c_info); + else + i2c_register_board_info(bus, i2c_info, 1); + } + + +static int __init sfi_parse_devs(struct sfi_table_header *table) +{ + struct sfi_table_simple *sb; + struct sfi_device_table_entry *pentry; + struct spi_board_info spi_info; + struct i2c_board_info i2c_info; + struct platform_device *pdev; + int num, i, bus; + int ioapic; + struct io_apic_irq_attr irq_attr; + + sb = (struct sfi_table_simple *)table; + num = SFI_GET_NUM_ENTRIES(sb, struct sfi_device_table_entry); + pentry = (struct sfi_device_table_entry *)sb->pentry; + + for (i = 0; i < num; i++, pentry++) { + if (pentry->irq != (u8)0xff) { /* native RTE case */ + /* these SPI2 devices are not exposed to system as PCI + * devices, but they have separate RTE entry in IOAPIC + * so we have to enable them one by one here + */ + ioapic = mp_find_ioapic(pentry->irq); + irq_attr.ioapic = ioapic; + irq_attr.ioapic_pin = pentry->irq; + irq_attr.trigger = 1; + irq_attr.polarity = 1; + io_apic_set_pci_routing(NULL, pentry->irq, &irq_attr); + } + switch (pentry->type) { + case SFI_DEV_TYPE_IPC: + /* ID as IRQ is a hack that will go away */ + pdev = platform_device_alloc(pentry->name, pentry->irq); + if (pdev == NULL) { + pr_err("out of memory for SFI platform device '%s'.\n", + pentry->name); + continue; + } + install_irq_resource(pdev, pentry->irq); + pr_debug("info[%2d]: IPC bus, name = %16.16s, " + "irq = 0x%2x\n", i, pentry->name, pentry->irq); + sfi_handle_ipc_dev(pdev); + break; + case SFI_DEV_TYPE_SPI: + memset(&spi_info, 0, sizeof(spi_info)); + strncpy(spi_info.modalias, pentry->name, SFI_NAME_LEN); + spi_info.irq = pentry->irq; + spi_info.bus_num = pentry->host_num; + spi_info.chip_select = pentry->addr; + spi_info.max_speed_hz = pentry->max_freq; + pr_debug("info[%2d]: SPI bus = %d, name = %16.16s, " + "irq = 0x%2x, max_freq = %d, cs = %d\n", i, + spi_info.bus_num, + spi_info.modalias, + spi_info.irq, + spi_info.max_speed_hz, + spi_info.chip_select); + sfi_handle_spi_dev(&spi_info); + break; + case SFI_DEV_TYPE_I2C: + memset(&i2c_info, 0, sizeof(i2c_info)); + bus = pentry->host_num; + strncpy(i2c_info.type, pentry->name, SFI_NAME_LEN); + i2c_info.irq = pentry->irq; + i2c_info.addr = pentry->addr; + pr_debug("info[%2d]: I2C bus = %d, name = %16.16s, " + "irq = 0x%2x, addr = 0x%x\n", i, bus, + i2c_info.type, + i2c_info.irq, + i2c_info.addr); + sfi_handle_i2c_dev(bus, &i2c_info); + break; + case SFI_DEV_TYPE_UART: + case SFI_DEV_TYPE_HSI: + default: + ; + } + } + return 0; +} + +static int __init mrst_platform_init(void) +{ + sfi_table_parse(SFI_SIG_GPIO, NULL, NULL, sfi_parse_gpio); + sfi_table_parse(SFI_SIG_DEVS, NULL, NULL, sfi_parse_devs); + return 0; +} +arch_initcall(mrst_platform_init); + +/* + * we will search these buttons in SFI GPIO table (by name) + * and register them dynamically. Please add all possible + * buttons here, we will shrink them if no GPIO found. + */ +static struct gpio_keys_button gpio_button[] = { + {KEY_POWER, -1, 1, "power_btn", EV_KEY, 0, 3000}, + {KEY_PROG1, -1, 1, "prog_btn1", EV_KEY, 0, 20}, + {KEY_PROG2, -1, 1, "prog_btn2", EV_KEY, 0, 20}, + {SW_LID, -1, 1, "lid_switch", EV_SW, 0, 20}, + {KEY_VOLUMEUP, -1, 1, "vol_up", EV_KEY, 0, 20}, + {KEY_VOLUMEDOWN, -1, 1, "vol_down", EV_KEY, 0, 20}, + {KEY_CAMERA, -1, 1, "camera_full", EV_KEY, 0, 20}, + {KEY_CAMERA_FOCUS, -1, 1, "camera_half", EV_KEY, 0, 20}, + {SW_KEYPAD_SLIDE, -1, 1, "MagSw1", EV_SW, 0, 20}, + {SW_KEYPAD_SLIDE, -1, 1, "MagSw2", EV_SW, 0, 20}, +}; + +static struct gpio_keys_platform_data mrst_gpio_keys = { + .buttons = gpio_button, + .rep = 1, + .nbuttons = -1, /* will fill it after search */ +}; + +static struct platform_device pb_device = { + .name = "gpio-keys", + .id = -1, + .dev = { + .platform_data = &mrst_gpio_keys, + }, +}; + +/* + * Shrink the non-existent buttons, register the gpio button + * device if there is some + */ +static int __init pb_keys_init(void) +{ + struct gpio_keys_button *gb = gpio_button; + int i, num, good = 0; + + num = sizeof(gpio_button) / sizeof(struct gpio_keys_button); + for (i = 0; i < num; i++) { + gb[i].gpio = get_gpio_by_name(gb[i].desc); + if (gb[i].gpio == -1) + continue; + + if (i != good) + gb[good] = gb[i]; + good++; + } + + if (good) { + mrst_gpio_keys.nbuttons = good; + return platform_device_register(&pb_device); + } + return 0; +} +late_initcall(pb_keys_init); diff --git a/drivers/platform/x86/intel_scu_ipc.c b/drivers/platform/x86/intel_scu_ipc.c index 41a9e34899ac..ca35b0ce944a 100644 --- a/drivers/platform/x86/intel_scu_ipc.c +++ b/drivers/platform/x86/intel_scu_ipc.c @@ -26,6 +26,7 @@ #include #include #include +#include /* IPC defines the following message types */ #define IPCMSG_WATCHDOG_TIMER 0xF8 /* Set Kernel Watchdog Threshold */ @@ -699,6 +700,9 @@ static int ipc_probe(struct pci_dev *dev, const struct pci_device_id *id) iounmap(ipcdev.ipc_base); return -ENOMEM; } + + intel_scu_devices_create(); + return 0; } @@ -720,6 +724,7 @@ static void ipc_remove(struct pci_dev *pdev) iounmap(ipcdev.ipc_base); iounmap(ipcdev.i2c_base); ipcdev.pdev = NULL; + intel_scu_devices_destroy(); } static const struct pci_device_id pci_ids[] = { diff --git a/include/linux/sfi.h b/include/linux/sfi.h index 7f770c638e99..fe817918b30e 100644 --- a/include/linux/sfi.h +++ b/include/linux/sfi.h @@ -77,6 +77,8 @@ #define SFI_OEM_ID_SIZE 6 #define SFI_OEM_TABLE_ID_SIZE 8 +#define SFI_NAME_LEN 16 + #define SFI_SYST_SEARCH_BEGIN 0x000E0000 #define SFI_SYST_SEARCH_END 0x000FFFFF @@ -156,13 +158,13 @@ struct sfi_device_table_entry { u16 addr; u8 irq; u32 max_freq; - char name[16]; + char name[SFI_NAME_LEN]; } __packed; struct sfi_gpio_table_entry { - char controller_name[16]; + char controller_name[SFI_NAME_LEN]; u16 pin_no; - char pin_name[16]; + char pin_name[SFI_NAME_LEN]; } __packed; typedef int (*sfi_table_handler) (struct sfi_table_header *table); -- cgit v1.2.3-71-gd317 From 751305d9b2fd3e03eaab7808e976241d85ca4353 Mon Sep 17 00:00:00 2001 From: Daniel Drake Date: Thu, 28 Oct 2010 18:23:01 +0100 Subject: viafb: General power management infrastructure Multiple devices need S/R hooks (framebuffer, GPIO, camera). Add infrastructure and convert existing framebuffer code to the new model. This patch should create no functional change. Based on earlier work by Jonathan Corbet. Signed-off-by: Daniel Drake Acked-by: Jonathan Corbet Signed-off-by: Florian Tobias Schandinat --- drivers/video/via/via-core.c | 79 ++++++++++++++++++++++++++++++++++++++++++-- drivers/video/via/viafbdev.c | 34 +++++++++---------- drivers/video/via/viafbdev.h | 2 -- include/linux/via-core.h | 15 +++++++++ 4 files changed, 107 insertions(+), 23 deletions(-) (limited to 'include/linux') diff --git a/drivers/video/via/via-core.c b/drivers/video/via/via-core.c index 31e30338e893..42be3d955887 100644 --- a/drivers/video/via/via-core.c +++ b/drivers/video/via/via-core.c @@ -15,6 +15,8 @@ #include #include #include +#include +#include /* * The default port config. @@ -563,6 +565,78 @@ static void via_teardown_subdevs(void) } } +/* + * Power management functions + */ +#ifdef CONFIG_PM +static LIST_HEAD(viafb_pm_hooks); +static DEFINE_MUTEX(viafb_pm_hooks_lock); + +void viafb_pm_register(struct viafb_pm_hooks *hooks) +{ + INIT_LIST_HEAD(&hooks->list); + + mutex_lock(&viafb_pm_hooks_lock); + list_add_tail(&hooks->list, &viafb_pm_hooks); + mutex_unlock(&viafb_pm_hooks_lock); +} +EXPORT_SYMBOL_GPL(viafb_pm_register); + +void viafb_pm_unregister(struct viafb_pm_hooks *hooks) +{ + mutex_lock(&viafb_pm_hooks_lock); + list_del(&hooks->list); + mutex_unlock(&viafb_pm_hooks_lock); +} +EXPORT_SYMBOL_GPL(viafb_pm_unregister); + +static int via_suspend(struct pci_dev *pdev, pm_message_t state) +{ + struct viafb_pm_hooks *hooks; + + if (state.event != PM_EVENT_SUSPEND) + return 0; + /* + * "I've occasionally hit a few drivers that caused suspend + * failures, and each and every time it was a driver bug, and + * the right thing to do was to just ignore the error and suspend + * anyway - returning an error code and trying to undo the suspend + * is not what anybody ever really wants, even if our model + *_allows_ for it." + * -- Linus Torvalds, Dec. 7, 2009 + */ + mutex_lock(&viafb_pm_hooks_lock); + list_for_each_entry_reverse(hooks, &viafb_pm_hooks, list) + hooks->suspend(hooks->private); + mutex_unlock(&viafb_pm_hooks_lock); + + pci_save_state(pdev); + pci_disable_device(pdev); + pci_set_power_state(pdev, pci_choose_state(pdev, state)); + return 0; +} + +static int via_resume(struct pci_dev *pdev) +{ + struct viafb_pm_hooks *hooks; + + /* Get the bus side powered up */ + pci_set_power_state(pdev, PCI_D0); + pci_restore_state(pdev); + if (pci_enable_device(pdev)) + return 0; + + pci_set_master(pdev); + + /* Now bring back any subdevs */ + mutex_lock(&viafb_pm_hooks_lock); + list_for_each_entry(hooks, &viafb_pm_hooks, list) + hooks->resume(hooks->private); + mutex_unlock(&viafb_pm_hooks_lock); + + return 0; +} +#endif /* CONFIG_PM */ static int __devinit via_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent) @@ -572,6 +646,7 @@ static int __devinit via_pci_probe(struct pci_dev *pdev, ret = pci_enable_device(pdev); if (ret) return ret; + /* * Global device initialization. */ @@ -651,8 +726,8 @@ static struct pci_driver via_driver = { .probe = via_pci_probe, .remove = __devexit_p(via_pci_remove), #ifdef CONFIG_PM - .suspend = viafb_suspend, - .resume = viafb_resume, + .suspend = via_suspend, + .resume = via_resume, #endif }; diff --git a/drivers/video/via/viafbdev.c b/drivers/video/via/viafbdev.c index d298cfccd6fc..289edd519527 100644 --- a/drivers/video/via/viafbdev.c +++ b/drivers/video/via/viafbdev.c @@ -1672,31 +1672,19 @@ static int parse_mode(const char *str, u32 *xres, u32 *yres) #ifdef CONFIG_PM -int viafb_suspend(struct pci_dev *pdev, pm_message_t state) +static int viafb_suspend(void *unused) { - if (state.event == PM_EVENT_SUSPEND) { - acquire_console_sem(); - fb_set_suspend(viafbinfo, 1); - - viafb_sync(viafbinfo); - - pci_save_state(pdev); - pci_disable_device(pdev); - pci_set_power_state(pdev, pci_choose_state(pdev, state)); - release_console_sem(); - } + acquire_console_sem(); + fb_set_suspend(viafbinfo, 1); + viafb_sync(viafbinfo); + release_console_sem(); return 0; } -int viafb_resume(struct pci_dev *pdev) +static int viafb_resume(void *unused) { acquire_console_sem(); - pci_set_power_state(pdev, PCI_D0); - pci_restore_state(pdev); - if (pci_enable_device(pdev)) - goto fail; - pci_set_master(pdev); if (viaparinfo->shared->vdev->engine_mmio) viafb_reset_engine(viaparinfo); viafb_set_par(viafbinfo); @@ -1704,11 +1692,15 @@ int viafb_resume(struct pci_dev *pdev) viafb_set_par(viafbinfo1); fb_set_suspend(viafbinfo, 0); -fail: release_console_sem(); return 0; } +static struct viafb_pm_hooks viafb_fb_pm_hooks = { + .suspend = viafb_suspend, + .resume = viafb_resume +}; + #endif @@ -1899,6 +1891,10 @@ int __devinit via_fb_pci_probe(struct viafb_dev *vdev) viafb_init_proc(viaparinfo->shared); viafb_init_dac(IGA2); + +#ifdef CONFIG_PM + viafb_pm_register(&viafb_fb_pm_hooks); +#endif return 0; out_fb_unreg: diff --git a/drivers/video/via/viafbdev.h b/drivers/video/via/viafbdev.h index 4960e3da6645..d66f963e930e 100644 --- a/drivers/video/via/viafbdev.h +++ b/drivers/video/via/viafbdev.h @@ -108,6 +108,4 @@ void via_fb_pci_remove(struct pci_dev *pdev); /* Temporary */ int viafb_init(void); void viafb_exit(void); -int viafb_suspend(struct pci_dev *pdev, pm_message_t state); -int viafb_resume(struct pci_dev *pdev); #endif /* __VIAFBDEV_H__ */ diff --git a/include/linux/via-core.h b/include/linux/via-core.h index 7ffb521e1a7a..a4327a0c8efc 100644 --- a/include/linux/via-core.h +++ b/include/linux/via-core.h @@ -59,6 +59,21 @@ struct via_port_cfg { u8 ioport_index; }; +/* + * Allow subdevs to register suspend/resume hooks. + */ +#ifdef CONFIG_PM +struct viafb_pm_hooks { + struct list_head list; + int (*suspend)(void *private); + int (*resume)(void *private); + void *private; +}; + +void viafb_pm_register(struct viafb_pm_hooks *hooks); +void viafb_pm_unregister(struct viafb_pm_hooks *hooks); +#endif /* CONFIG_PM */ + /* * This is the global viafb "device" containing stuff needed by * all subdevs. -- cgit v1.2.3-71-gd317 From 4d17aeb1c5b2375769446d13012a98e6d265ec13 Mon Sep 17 00:00:00 2001 From: Paul Walmsley Date: Tue, 21 Sep 2010 19:37:15 +0530 Subject: OMAP: I2C: split device registration and convert OMAP2+ to omap_device Split the OMAP1 and OMAP2+ platform_device build and register code. Convert the OMAP2+ variant to use omap_device. This patch was developed in collaboration with Rajendra Nayak . Signed-off-by: Paul Walmsley Signed-off-by: Rajendra Nayak Cc: Kevin Hilman Signed-off-by: Kevin Hilman --- arch/arm/plat-omap/i2c.c | 124 +++++++++++++++++++---------------------------- include/linux/i2c-omap.h | 5 ++ 2 files changed, 54 insertions(+), 75 deletions(-) (limited to 'include/linux') diff --git a/arch/arm/plat-omap/i2c.c b/arch/arm/plat-omap/i2c.c index a5ce4f0aad35..a5bff9ce7cbe 100644 --- a/arch/arm/plat-omap/i2c.c +++ b/arch/arm/plat-omap/i2c.c @@ -27,18 +27,18 @@ #include #include #include +#include +#include +#include #include #include #include #include +#include #define OMAP_I2C_SIZE 0x3f #define OMAP1_I2C_BASE 0xfffb3800 -#define OMAP2_I2C_BASE1 0x48070000 -#define OMAP2_I2C_BASE2 0x48072000 -#define OMAP2_I2C_BASE3 0x48060000 -#define OMAP4_I2C_BASE4 0x48350000 static const char name[] = "i2c_omap"; @@ -55,15 +55,6 @@ static const char name[] = "i2c_omap"; static struct resource i2c_resources[][2] = { { I2C_RESOURCE_BUILDER(0, 0) }, -#if defined(CONFIG_ARCH_OMAP2PLUS) - { I2C_RESOURCE_BUILDER(OMAP2_I2C_BASE2, 0) }, -#endif -#if defined(CONFIG_ARCH_OMAP3) || defined(CONFIG_ARCH_OMAP4) - { I2C_RESOURCE_BUILDER(OMAP2_I2C_BASE3, 0) }, -#endif -#if defined(CONFIG_ARCH_OMAP4) - { I2C_RESOURCE_BUILDER(OMAP4_I2C_BASE4, 0) }, -#endif }; #define I2C_DEV_BUILDER(bus_id, res, data) \ @@ -77,18 +68,11 @@ static struct resource i2c_resources[][2] = { }, \ } -static struct omap_i2c_bus_platform_data i2c_pdata[ARRAY_SIZE(i2c_resources)]; +#define MAX_OMAP_I2C_HWMOD_NAME_LEN 16 +#define OMAP_I2C_MAX_CONTROLLERS 4 +static struct omap_i2c_bus_platform_data i2c_pdata[OMAP_I2C_MAX_CONTROLLERS]; static struct platform_device omap_i2c_devices[] = { I2C_DEV_BUILDER(1, i2c_resources[0], &i2c_pdata[0]), -#if defined(CONFIG_ARCH_OMAP2PLUS) - I2C_DEV_BUILDER(2, i2c_resources[1], &i2c_pdata[1]), -#endif -#if defined(CONFIG_ARCH_OMAP3) || defined(CONFIG_ARCH_OMAP4) - I2C_DEV_BUILDER(3, i2c_resources[2], &i2c_pdata[2]), -#endif -#if defined(CONFIG_ARCH_OMAP4) - I2C_DEV_BUILDER(4, i2c_resources[3], &i2c_pdata[3]), -#endif }; #define OMAP_I2C_CMDLINE_SETUP (BIT(31)) @@ -109,35 +93,20 @@ static int __init omap_i2c_nr_ports(void) return ports; } -/* Shared between omap2 and 3 */ -static resource_size_t omap2_i2c_irq[3] __initdata = { - INT_24XX_I2C1_IRQ, - INT_24XX_I2C2_IRQ, - INT_34XX_I2C3_IRQ, -}; - -static resource_size_t omap4_i2c_irq[4] __initdata = { - OMAP44XX_IRQ_I2C1, - OMAP44XX_IRQ_I2C2, - OMAP44XX_IRQ_I2C3, - OMAP44XX_IRQ_I2C4, -}; - -static inline int omap1_i2c_add_bus(struct platform_device *pdev, int bus_id) +static inline int omap1_i2c_add_bus(int bus_id) { - struct omap_i2c_bus_platform_data *pd; - struct resource *res; - - pd = pdev->dev.platform_data; - res = pdev->resource; - res[0].start = OMAP1_I2C_BASE; - res[0].end = res[0].start + OMAP_I2C_SIZE; - res[1].start = INT_I2C; + struct platform_device *pdev; + struct omap_i2c_bus_platform_data *pdata; + omap1_i2c_mux_pins(bus_id); + pdev = &omap_i2c_devices[bus_id - 1]; + pdata = &i2c_pdata[bus_id - 1]; + return platform_device_register(pdev); } + /* * XXX This function is a temporary compatibility wrapper - only * needed until the I2C driver can be converted to call @@ -148,52 +117,57 @@ static void omap_pm_set_max_mpu_wakeup_lat_compat(struct device *dev, long t) omap_pm_set_max_mpu_wakeup_lat(dev, t); } -static inline int omap2_i2c_add_bus(struct platform_device *pdev, int bus_id) -{ - struct resource *res; - resource_size_t *irq; +static struct omap_device_pm_latency omap_i2c_latency[] = { + [0] = { + .deactivate_func = omap_device_idle_hwmods, + .activate_func = omap_device_enable_hwmods, + .flags = OMAP_DEVICE_LATENCY_AUTO_ADJUST, + }, +}; - res = pdev->resource; +static inline int omap2_i2c_add_bus(int bus_id) +{ + int l; + struct omap_hwmod *oh; + struct omap_device *od; + char oh_name[MAX_OMAP_I2C_HWMOD_NAME_LEN]; + struct omap_i2c_bus_platform_data *pdata; - if (!cpu_is_omap44xx()) - irq = omap2_i2c_irq; - else - irq = omap4_i2c_irq; + omap2_i2c_mux_pins(bus_id); - if (bus_id == 1) { - res[0].start = OMAP2_I2C_BASE1; - res[0].end = res[0].start + OMAP_I2C_SIZE; + l = snprintf(oh_name, MAX_OMAP_I2C_HWMOD_NAME_LEN, "i2c%d", bus_id); + WARN(l >= MAX_OMAP_I2C_HWMOD_NAME_LEN, + "String buffer overflow in I2C%d device setup\n", bus_id); + oh = omap_hwmod_lookup(oh_name); + if (!oh) { + pr_err("Could not look up %s\n", oh_name); + return -EEXIST; } - res[1].start = irq[bus_id - 1]; - omap2_i2c_mux_pins(bus_id); - + pdata = &i2c_pdata[bus_id - 1]; /* * When waiting for completion of a i2c transfer, we need to * set a wake up latency constraint for the MPU. This is to * ensure quick enough wakeup from idle, when transfer * completes. + * Only omap3 has support for constraints */ - if (cpu_is_omap34xx()) { - struct omap_i2c_bus_platform_data *pd; - - pd = pdev->dev.platform_data; - pd->set_mpu_wkup_lat = omap_pm_set_max_mpu_wakeup_lat_compat; - } - - return platform_device_register(pdev); + if (cpu_is_omap34xx()) + pdata->set_mpu_wkup_lat = omap_pm_set_max_mpu_wakeup_lat_compat; + od = omap_device_build(name, bus_id, oh, pdata, + sizeof(struct omap_i2c_bus_platform_data), + omap_i2c_latency, ARRAY_SIZE(omap_i2c_latency), 0); + WARN(IS_ERR(od), "Could not build omap_device for %s\n", name); + + return PTR_ERR(od); } static int __init omap_i2c_add_bus(int bus_id) { - struct platform_device *pdev; - - pdev = &omap_i2c_devices[bus_id - 1]; - if (cpu_class_is_omap1()) - return omap1_i2c_add_bus(pdev, bus_id); + return omap1_i2c_add_bus(bus_id); else - return omap2_i2c_add_bus(pdev, bus_id); + return omap2_i2c_add_bus(bus_id); } /** diff --git a/include/linux/i2c-omap.h b/include/linux/i2c-omap.h index 78ebf507ce56..7472449cbb74 100644 --- a/include/linux/i2c-omap.h +++ b/include/linux/i2c-omap.h @@ -1,9 +1,14 @@ #ifndef __I2C_OMAP_H__ #define __I2C_OMAP_H__ +#include + struct omap_i2c_bus_platform_data { u32 clkrate; void (*set_mpu_wkup_lat)(struct device *dev, long set); + int (*device_enable) (struct platform_device *pdev); + int (*device_shutdown) (struct platform_device *pdev); + int (*device_idle) (struct platform_device *pdev); }; #endif -- cgit v1.2.3-71-gd317 From 074e61ec3751da9ab88ee66d3818574556c03489 Mon Sep 17 00:00:00 2001 From: James Morris Date: Wed, 10 Nov 2010 09:01:31 +1100 Subject: kernel: add roundup() code comment from akpm Add roundup() code comment from akpm. Signed-off-by: Andrew Morton Signed-off-by: James Morris --- include/linux/kernel.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/kernel.h b/include/linux/kernel.h index b526947bdf48..3f648d204c37 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -58,6 +58,8 @@ extern const char linux_proc_banner[]; #define FIELD_SIZEOF(t, f) (sizeof(((t*)0)->f)) #define DIV_ROUND_UP(n,d) (((n) + (d) - 1) / (d)) + +/* The `const' in roundup() prevents gcc-3.3 from calling __divdi3 */ #define roundup(x, y) ( \ { \ const typeof(y) __y = y; \ -- cgit v1.2.3-71-gd317 From f6cd24777513fcc673d432cc29ef59881d3e4df1 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 4 Nov 2010 11:13:48 +0100 Subject: irq: Better struct irqaction layout We currently use kmalloc-96 slab for struct irqaction allocations on 64bit arches. This is unfortunate because of possible false sharing and two cache lines accesses. Move 'name' and 'dir' fields at the end of the structure, and force a suitable alignement. Hot path fields now use one cache line on x86_64. Signed-off-by: Eric Dumazet Reviewed-by: Andi Kleen Cc: Peter Zijlstra LKML-Reference: <1288865628.2659.69.camel@edumazet-laptop> Signed-off-by: Thomas Gleixner --- include/linux/interrupt.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index 79d0c4f6d071..55e0d4253e49 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -114,15 +114,15 @@ typedef irqreturn_t (*irq_handler_t)(int, void *); struct irqaction { irq_handler_t handler; unsigned long flags; - const char *name; void *dev_id; struct irqaction *next; int irq; - struct proc_dir_entry *dir; irq_handler_t thread_fn; struct task_struct *thread; unsigned long thread_flags; -}; + const char *name; + struct proc_dir_entry *dir; +} ____cacheline_internodealigned_in_smp; extern irqreturn_t no_action(int cpl, void *dev_id); -- cgit v1.2.3-71-gd317 From da1d39e3903bc35be2b5e8d2116fdd5d337244d4 Mon Sep 17 00:00:00 2001 From: Simon Horman Date: Tue, 9 Nov 2010 17:47:02 +0900 Subject: mmc, sh: Move constants to sh_mmcif.h This moves some constants from sh_mmcif.c to sh_mmcif.h so that they can be used in sh_mmcif_boot_init(). It also alters the definition of SOFT_RST_OFF from (0 << 31) to ~SOFT_RST_ON (= ~(1 << 31)). The former seems bogus. The latter is consistent with the code in sh_mmcif_boot_init(). Cc: Yusuke Goda Cc: Magnus Damm Signed-off-by: Simon Horman Signed-off-by: Paul Mundt --- drivers/mmc/host/sh_mmcif.c | 23 ----------------------- include/linux/mmc/sh_mmcif.h | 39 ++++++++++++++++++++++++++++++++++----- 2 files changed, 34 insertions(+), 28 deletions(-) (limited to 'include/linux') diff --git a/drivers/mmc/host/sh_mmcif.c b/drivers/mmc/host/sh_mmcif.c index ddd09840520b..3f492730ec05 100644 --- a/drivers/mmc/host/sh_mmcif.c +++ b/drivers/mmc/host/sh_mmcif.c @@ -62,25 +62,6 @@ /* CE_BLOCK_SET */ #define BLOCK_SIZE_MASK 0x0000ffff -/* CE_CLK_CTRL */ -#define CLK_ENABLE (1 << 24) /* 1: output mmc clock */ -#define CLK_CLEAR ((1 << 19) | (1 << 18) | (1 << 17) | (1 << 16)) -#define CLK_SUP_PCLK ((1 << 19) | (1 << 18) | (1 << 17) | (1 << 16)) -#define SRSPTO_256 ((1 << 13) | (0 << 12)) /* resp timeout */ -#define SRBSYTO_29 ((1 << 11) | (1 << 10) | \ - (1 << 9) | (1 << 8)) /* resp busy timeout */ -#define SRWDTO_29 ((1 << 7) | (1 << 6) | \ - (1 << 5) | (1 << 4)) /* read/write timeout */ -#define SCCSTO_29 ((1 << 3) | (1 << 2) | \ - (1 << 1) | (1 << 0)) /* ccs timeout */ - -/* CE_BUF_ACC */ -#define BUF_ACC_DMAWEN (1 << 25) -#define BUF_ACC_DMAREN (1 << 24) -#define BUF_ACC_BUSW_32 (0 << 17) -#define BUF_ACC_BUSW_16 (1 << 17) -#define BUF_ACC_ATYP (1 << 16) - /* CE_INT */ #define INT_CCSDE (1 << 29) #define INT_CMD12DRE (1 << 26) @@ -165,10 +146,6 @@ STS2_AC12BSYTO | STS2_RSPBSYTO | \ STS2_AC12RSPTO | STS2_RSPTO) -/* CE_VERSION */ -#define SOFT_RST_ON (1 << 31) -#define SOFT_RST_OFF (0 << 31) - #define CLKDEV_EMMC_DATA 52000000 /* 52MHz */ #define CLKDEV_MMC_DATA 20000000 /* 20MHz */ #define CLKDEV_INIT 400000 /* 400 KHz */ diff --git a/include/linux/mmc/sh_mmcif.h b/include/linux/mmc/sh_mmcif.h index 5c99da1078aa..a6bfa5296495 100644 --- a/include/linux/mmc/sh_mmcif.h +++ b/include/linux/mmc/sh_mmcif.h @@ -59,6 +59,29 @@ struct sh_mmcif_plat_data { #define MMCIF_CE_HOST_STS2 0x0000004C #define MMCIF_CE_VERSION 0x0000007C +/* CE_BUF_ACC */ +#define BUF_ACC_DMAWEN (1 << 25) +#define BUF_ACC_DMAREN (1 << 24) +#define BUF_ACC_BUSW_32 (0 << 17) +#define BUF_ACC_BUSW_16 (1 << 17) +#define BUF_ACC_ATYP (1 << 16) + +/* CE_CLK_CTRL */ +#define CLK_ENABLE (1 << 24) /* 1: output mmc clock */ +#define CLK_CLEAR ((1 << 19) | (1 << 18) | (1 << 17) | (1 << 16)) +#define CLK_SUP_PCLK ((1 << 19) | (1 << 18) | (1 << 17) | (1 << 16)) +#define SRSPTO_256 ((1 << 13) | (0 << 12)) /* resp timeout */ +#define SRBSYTO_29 ((1 << 11) | (1 << 10) | \ + (1 << 9) | (1 << 8)) /* resp busy timeout */ +#define SRWDTO_29 ((1 << 7) | (1 << 6) | \ + (1 << 5) | (1 << 4)) /* read/write timeout */ +#define SCCSTO_29 ((1 << 3) | (1 << 2) | \ + (1 << 1) | (1 << 0)) /* ccs timeout */ + +/* CE_VERSION */ +#define SOFT_RST_ON (1 << 31) +#define SOFT_RST_OFF ~SOFT_RST_ON + static inline u32 sh_mmcif_readl(void __iomem *addr, int reg) { return readl(addr + reg); @@ -149,17 +172,23 @@ static inline void sh_mmcif_boot_init(void __iomem *base) /* reset */ tmp = sh_mmcif_readl(base, MMCIF_CE_VERSION); - sh_mmcif_writel(base, MMCIF_CE_VERSION, tmp | 0x80000000); - sh_mmcif_writel(base, MMCIF_CE_VERSION, tmp & ~0x80000000); + sh_mmcif_writel(base, MMCIF_CE_VERSION, tmp | SOFT_RST_ON); + sh_mmcif_writel(base, MMCIF_CE_VERSION, tmp & SOFT_RST_OFF); /* byte swap */ - sh_mmcif_writel(base, MMCIF_CE_BUF_ACC, 0x00010000); + sh_mmcif_writel(base, MMCIF_CE_BUF_ACC, BUF_ACC_ATYP); /* Set block size in MMCIF hardware */ sh_mmcif_writel(base, MMCIF_CE_BLOCK_SET, SH_MMCIF_BBS); - /* Enable the clock, set it to Bus clock/256 (about 325Khz)*/ - sh_mmcif_writel(base, MMCIF_CE_CLK_CTRL, 0x01072fff); + /* Enable the clock, set it to Bus clock/256 (about 325Khz). + * It is unclear where 0x70000 comes from or if it is even needed. + * It is there for byte-compatibility with code that is known to + * work. + */ + sh_mmcif_writel(base, MMCIF_CE_CLK_CTRL, + CLK_ENABLE | SRSPTO_256 | SRBSYTO_29 | SRWDTO_29 | + SCCSTO_29 | 0x70000); /* CMD0 */ sh_mmcif_boot_cmd(base, 0x00000040, 0); -- cgit v1.2.3-71-gd317 From 5e4f083f78d03e9f8d2e327daccde16976f9bb00 Mon Sep 17 00:00:00 2001 From: Yong Zhang Date: Sun, 24 Oct 2010 11:50:53 +0800 Subject: hrtimer: Remove stale comment on curr_timer curr_timer doesn't resident in struct hrtimer_cpu_base anymore. Signed-off-by: Yong Zhang LKML-Reference: <1287892253-2587-1-git-send-email-yong.zhang0@gmail.com> Signed-off-by: Thomas Gleixner --- include/linux/hrtimer.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index fd0c1b857d3d..dd9954b79342 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -158,7 +158,6 @@ struct hrtimer_clock_base { * @lock: lock protecting the base and associated clock bases * and timers * @clock_base: array of clock bases for this cpu - * @curr_timer: the timer which is executing a callback right now * @expires_next: absolute time of the next event which was scheduled * via clock_set_next_event() * @hres_active: State of high resolution mode -- cgit v1.2.3-71-gd317 From 0eadcc09203349b11ca477ec367079b23d32ab91 Mon Sep 17 00:00:00 2001 From: Tatyana Brokhman Date: Mon, 1 Nov 2010 18:18:24 +0200 Subject: usb: USB3.0 ch11 definitions Adding hub SuperSpeed usb definitions as defined by ch10 of the USB3.0 spec. Signed-off-by: Tatyana Brokhman Signed-off-by: Greg Kroah-Hartman --- include/linux/usb/ch11.h | 47 +++++++++++++++++++++++++++++++++++++++++++++++ include/linux/usb/hcd.h | 4 ++++ 2 files changed, 51 insertions(+) (limited to 'include/linux') diff --git a/include/linux/usb/ch11.h b/include/linux/usb/ch11.h index 119194c85d10..10ec0699bea4 100644 --- a/include/linux/usb/ch11.h +++ b/include/linux/usb/ch11.h @@ -27,6 +27,13 @@ #define HUB_GET_TT_STATE 10 #define HUB_STOP_TT 11 +/* + * Hub class additional requests defined by USB 3.0 spec + * See USB 3.0 spec Table 10-6 + */ +#define HUB_SET_DEPTH 12 +#define HUB_GET_PORT_ERR_COUNT 13 + /* * Hub Class feature numbers * See USB 2.0 spec Table 11-17 @@ -55,6 +62,20 @@ #define USB_PORT_FEAT_INDICATOR 22 #define USB_PORT_FEAT_C_PORT_L1 23 +/* + * Port feature selectors added by USB 3.0 spec. + * See USB 3.0 spec Table 10-7 + */ +#define USB_PORT_FEAT_LINK_STATE 5 +#define USB_PORT_FEAT_U1_TIMEOUT 23 +#define USB_PORT_FEAT_U2_TIMEOUT 24 +#define USB_PORT_FEAT_C_LINK_STATE 25 +#define USB_PORT_FEAT_C_CONFIG_ERR 26 +#define USB_PORT_FEAT_REMOTE_WAKE_MASK 27 +#define USB_PORT_FEAT_BH_PORT_RESET 28 +#define USB_PORT_FEAT_C_BH_PORT_RESET 29 +#define USB_PORT_FEAT_FORCE_LINKPM_ACCEPT 30 + /* * Hub Status and Hub Change results * See USB 2.0 spec Table 11-19 and Table 11-20 @@ -83,6 +104,32 @@ struct usb_port_status { /* bits 13 to 15 are reserved */ #define USB_PORT_STAT_SUPER_SPEED 0x8000 /* Linux-internal */ +/* + * Additions to wPortStatus bit field from USB 3.0 + * See USB 3.0 spec Table 10-10 + */ +#define USB_PORT_STAT_LINK_STATE 0x01e0 +#define USB_SS_PORT_STAT_POWER 0x0200 +#define USB_PORT_STAT_SPEED_5GBPS 0x0000 +/* Valid only if port is enabled */ + +/* + * Definitions for PORT_LINK_STATE values + * (bits 5-8) in wPortStatus + */ +#define USB_SS_PORT_LS_U0 0x0000 +#define USB_SS_PORT_LS_U1 0x0020 +#define USB_SS_PORT_LS_U2 0x0040 +#define USB_SS_PORT_LS_U3 0x0060 +#define USB_SS_PORT_LS_SS_DISABLED 0x0080 +#define USB_SS_PORT_LS_RX_DETECT 0x00a0 +#define USB_SS_PORT_LS_SS_INACTIVE 0x00c0 +#define USB_SS_PORT_LS_POLLING 0x00e0 +#define USB_SS_PORT_LS_RECOVERY 0x0100 +#define USB_SS_PORT_LS_HOT_RESET 0x0120 +#define USB_SS_PORT_LS_COMP_MOD 0x0140 +#define USB_SS_PORT_LS_LOOPBACK 0x0160 + /* * wPortChange bit field * See USB 2.0 spec Table 11-22 diff --git a/include/linux/usb/hcd.h b/include/linux/usb/hcd.h index 0b6e751ea0b1..dd6ee49a0844 100644 --- a/include/linux/usb/hcd.h +++ b/include/linux/usb/hcd.h @@ -471,6 +471,10 @@ extern void usb_ep0_reinit(struct usb_device *); /*-------------------------------------------------------------------------*/ +/* class requests from USB 3.0 hub spec, table 10-5 */ +#define SetHubDepth (0x3000 | HUB_SET_DEPTH) +#define GetPortErrorCount (0x8000 | HUB_GET_PORT_ERR_COUNT) + /* * Generic bandwidth allocation constants/support */ -- cgit v1.2.3-71-gd317 From af3b8881f4c9852eefe9c7f1a997b3ecf580561b Mon Sep 17 00:00:00 2001 From: Russ Gorby Date: Tue, 26 Oct 2010 14:13:52 +0100 Subject: ifx6x60: SPI protocol driver for Infineon 6x60 modem Prototype driver for the IFX6x60 series of SPI attached modems by Jim Stanley and Russ Gorby Signed-off-by: Russ Gorby [Some reworking and a major cleanup] Signed-off-by: Alan Cox Signed-off-by: Greg Kroah-Hartman --- drivers/serial/Kconfig | 6 + drivers/serial/Makefile | 1 + drivers/serial/ifx6x60.c | 1402 +++++++++++++++++++++++++++++++++++++++++ drivers/serial/ifx6x60.h | 129 ++++ include/linux/spi/ifx_modem.h | 14 + 5 files changed, 1552 insertions(+) create mode 100644 drivers/serial/ifx6x60.c create mode 100644 drivers/serial/ifx6x60.h create mode 100644 include/linux/spi/ifx_modem.h (limited to 'include/linux') diff --git a/drivers/serial/Kconfig b/drivers/serial/Kconfig index aff9dcd051c6..0b9cc17b380b 100644 --- a/drivers/serial/Kconfig +++ b/drivers/serial/Kconfig @@ -1632,4 +1632,10 @@ config SERIAL_ALTERA_UART_CONSOLE help Enable a Altera UART port to be the system console. +config SERIAL_IFX6X60 + tristate "SPI protocol driver for Infineon 6x60 modem (EXPERIMENTAL)" + depends on GPIOLIB && EXPERIMENTAL + help + Support for the IFX6x60 modem devices on Intel MID platforms. + endmenu diff --git a/drivers/serial/Makefile b/drivers/serial/Makefile index c5705765454f..783638b10698 100644 --- a/drivers/serial/Makefile +++ b/drivers/serial/Makefile @@ -89,3 +89,4 @@ obj-$(CONFIG_SERIAL_ALTERA_UART) += altera_uart.o obj-$(CONFIG_SERIAL_MRST_MAX3110) += mrst_max3110.o obj-$(CONFIG_SERIAL_MFD_HSU) += mfd.o obj-$(CONFIG_SERIAL_OMAP) += omap-serial.o +obj-$(CONFIG_SERIAL_IFX6X60) += ifx6x60.o diff --git a/drivers/serial/ifx6x60.c b/drivers/serial/ifx6x60.c new file mode 100644 index 000000000000..b9b7e0601961 --- /dev/null +++ b/drivers/serial/ifx6x60.c @@ -0,0 +1,1402 @@ +/**************************************************************************** + * + * Driver for the IFX 6x60 spi modem. + * + * Copyright (C) 2008 Option International + * Copyright (C) 2008 Filip Aben + * Denis Joseph Barrow + * Jan Dumon + * + * Copyright (C) 2009, 2010 Intel Corp + * Russ Gorby + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, + * USA + * + * Driver modified by Intel from Option gtm501l_spi.c + * + * Notes + * o The driver currently assumes a single device only. If you need to + * change this then look for saved_ifx_dev and add a device lookup + * o The driver is intended to be big-endian safe but has never been + * tested that way (no suitable hardware). There are a couple of FIXME + * notes by areas that may need addressing + * o Some of the GPIO naming/setup assumptions may need revisiting if + * you need to use this driver for another platform. + * + *****************************************************************************/ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "ifx6x60.h" + +#define IFX_SPI_MORE_MASK 0x10 +#define IFX_SPI_MORE_BIT 12 /* bit position in u16 */ +#define IFX_SPI_CTS_BIT 13 /* bit position in u16 */ +#define IFX_SPI_TTY_ID 0 +#define IFX_SPI_TIMEOUT_SEC 2 +#define IFX_SPI_HEADER_0 (-1) +#define IFX_SPI_HEADER_F (-2) + +/* forward reference */ +static void ifx_spi_handle_srdy(struct ifx_spi_device *ifx_dev); + +/* local variables */ +static int spi_b16 = 1; /* 8 or 16 bit word length */ +static struct tty_driver *tty_drv; +static struct ifx_spi_device *saved_ifx_dev; +static struct lock_class_key ifx_spi_key; + +/* GPIO/GPE settings */ + +/** + * mrdy_set_high - set MRDY GPIO + * @ifx: device we are controlling + * + */ +static inline void mrdy_set_high(struct ifx_spi_device *ifx) +{ + gpio_set_value(ifx->gpio.mrdy, 1); +} + +/** + * mrdy_set_low - clear MRDY GPIO + * @ifx: device we are controlling + * + */ +static inline void mrdy_set_low(struct ifx_spi_device *ifx) +{ + gpio_set_value(ifx->gpio.mrdy, 0); +} + +/** + * ifx_spi_power_state_set + * @ifx_dev: our SPI device + * @val: bits to set + * + * Set bit in power status and signal power system if status becomes non-0 + */ +static void +ifx_spi_power_state_set(struct ifx_spi_device *ifx_dev, unsigned char val) +{ + unsigned long flags; + + spin_lock_irqsave(&ifx_dev->power_lock, flags); + + /* + * if power status is already non-0, just update, else + * tell power system + */ + if (!ifx_dev->power_status) + pm_runtime_get(&ifx_dev->spi_dev->dev); + ifx_dev->power_status |= val; + + spin_unlock_irqrestore(&ifx_dev->power_lock, flags); +} + +/** + * ifx_spi_power_state_clear - clear power bit + * @ifx_dev: our SPI device + * @val: bits to clear + * + * clear bit in power status and signal power system if status becomes 0 + */ +static void +ifx_spi_power_state_clear(struct ifx_spi_device *ifx_dev, unsigned char val) +{ + unsigned long flags; + + spin_lock_irqsave(&ifx_dev->power_lock, flags); + + if (ifx_dev->power_status) { + ifx_dev->power_status &= ~val; + if (!ifx_dev->power_status) + pm_runtime_put(&ifx_dev->spi_dev->dev); + } + + spin_unlock_irqrestore(&ifx_dev->power_lock, flags); +} + +/** + * swap_buf + * @buf: our buffer + * @len : number of bytes (not words) in the buffer + * @end: end of buffer + * + * Swap the contents of a buffer into big endian format + */ +static inline void swap_buf(u16 *buf, int len, void *end) +{ + int n; + + len = ((len + 1) >> 1); + if ((void *)&buf[len] > end) { + pr_err("swap_buf: swap exceeds boundary (%p > %p)!", + &buf[len], end); + return; + } + for (n = 0; n < len; n++) { + *buf = cpu_to_be16(*buf); + buf++; + } +} + +/** + * mrdy_assert - assert MRDY line + * @ifx_dev: our SPI device + * + * Assert mrdy and set timer to wait for SRDY interrupt, if SRDY is low + * now. + * + * FIXME: Can SRDY even go high as we are running this code ? + */ +static void mrdy_assert(struct ifx_spi_device *ifx_dev) +{ + int val = gpio_get_value(ifx_dev->gpio.srdy); + if (!val) { + if (!test_and_set_bit(IFX_SPI_STATE_TIMER_PENDING, + &ifx_dev->flags)) { + ifx_dev->spi_timer.expires = + jiffies + IFX_SPI_TIMEOUT_SEC*HZ; + add_timer(&ifx_dev->spi_timer); + + } + } + ifx_spi_power_state_set(ifx_dev, IFX_SPI_POWER_DATA_PENDING); + mrdy_set_high(ifx_dev); +} + +/** + * ifx_spi_hangup - hang up an IFX device + * @ifx_dev: our SPI device + * + * Hang up the tty attached to the IFX device if one is currently + * open. If not take no action + */ +static void ifx_spi_ttyhangup(struct ifx_spi_device *ifx_dev) +{ + struct tty_port *pport = &ifx_dev->tty_port; + struct tty_struct *tty = tty_port_tty_get(pport); + if (tty) { + tty_hangup(tty); + tty_kref_put(tty); + } +} + +/** + * ifx_spi_timeout - SPI timeout + * @arg: our SPI device + * + * The SPI has timed out: hang up the tty. Users will then see a hangup + * and error events. + */ +static void ifx_spi_timeout(unsigned long arg) +{ + struct ifx_spi_device *ifx_dev = (struct ifx_spi_device *)arg; + + dev_warn(&ifx_dev->spi_dev->dev, "*** SPI Timeout ***"); + ifx_spi_ttyhangup(ifx_dev); + mrdy_set_low(ifx_dev); + clear_bit(IFX_SPI_STATE_TIMER_PENDING, &ifx_dev->flags); +} + +/* char/tty operations */ + +/** + * ifx_spi_tiocmget - get modem lines + * @tty: our tty device + * @filp: file handle issuing the request + * + * Map the signal state into Linux modem flags and report the value + * in Linux terms + */ +static int ifx_spi_tiocmget(struct tty_struct *tty, struct file *filp) +{ + unsigned int value; + struct ifx_spi_device *ifx_dev = tty->driver_data; + + value = + (test_bit(IFX_SPI_RTS, &ifx_dev->signal_state) ? TIOCM_RTS : 0) | + (test_bit(IFX_SPI_DTR, &ifx_dev->signal_state) ? TIOCM_DTR : 0) | + (test_bit(IFX_SPI_CTS, &ifx_dev->signal_state) ? TIOCM_CTS : 0) | + (test_bit(IFX_SPI_DSR, &ifx_dev->signal_state) ? TIOCM_DSR : 0) | + (test_bit(IFX_SPI_DCD, &ifx_dev->signal_state) ? TIOCM_CAR : 0) | + (test_bit(IFX_SPI_RI, &ifx_dev->signal_state) ? TIOCM_RNG : 0); + return value; +} + +/** + * ifx_spi_tiocmset - set modem bits + * @tty: the tty structure + * @filp: file handle issuing the request + * @set: bits to set + * @clear: bits to clear + * + * The IFX6x60 only supports DTR and RTS. Set them accordingly + * and flag that an update to the modem is needed. + * + * FIXME: do we need to kick the tranfers when we do this ? + */ +static int ifx_spi_tiocmset(struct tty_struct *tty, struct file *filp, + unsigned int set, unsigned int clear) +{ + struct ifx_spi_device *ifx_dev = tty->driver_data; + + if (set & TIOCM_RTS) + set_bit(IFX_SPI_RTS, &ifx_dev->signal_state); + if (set & TIOCM_DTR) + set_bit(IFX_SPI_DTR, &ifx_dev->signal_state); + if (clear & TIOCM_RTS) + clear_bit(IFX_SPI_RTS, &ifx_dev->signal_state); + if (clear & TIOCM_DTR) + clear_bit(IFX_SPI_DTR, &ifx_dev->signal_state); + + set_bit(IFX_SPI_UPDATE, &ifx_dev->signal_state); + return 0; +} + +/** + * ifx_spi_open - called on tty open + * @tty: our tty device + * @filp: file handle being associated with the tty + * + * Open the tty interface. We let the tty_port layer do all the work + * for us. + * + * FIXME: Remove single device assumption and saved_ifx_dev + */ +static int ifx_spi_open(struct tty_struct *tty, struct file *filp) +{ + return tty_port_open(&saved_ifx_dev->tty_port, tty, filp); +} + +/** + * ifx_spi_close - called when our tty closes + * @tty: the tty being closed + * @filp: the file handle being closed + * + * Perform the close of the tty. We use the tty_port layer to do all + * our hard work. + */ +static void ifx_spi_close(struct tty_struct *tty, struct file *filp) +{ + struct ifx_spi_device *ifx_dev = tty->driver_data; + tty_port_close(&ifx_dev->tty_port, tty, filp); + /* FIXME: should we do an ifx_spi_reset here ? */ +} + +/** + * ifx_decode_spi_header - decode received header + * @buffer: the received data + * @length: decoded length + * @more: decoded more flag + * @received_cts: status of cts we received + * + * Note how received_cts is handled -- if header is all F it is left + * the same as it was, if header is all 0 it is set to 0 otherwise it is + * taken from the incoming header. + * + * FIXME: endianness + */ +static int ifx_spi_decode_spi_header(unsigned char *buffer, int *length, + unsigned char *more, unsigned char *received_cts) +{ + u16 h1; + u16 h2; + u16 *in_buffer = (u16 *)buffer; + + h1 = *in_buffer; + h2 = *(in_buffer+1); + + if (h1 == 0 && h2 == 0) { + *received_cts = 0; + return IFX_SPI_HEADER_0; + } else if (h1 == 0xffff && h2 == 0xffff) { + /* spi_slave_cts remains as it was */ + return IFX_SPI_HEADER_F; + } + + *length = h1 & 0xfff; /* upper bits of byte are flags */ + *more = (buffer[1] >> IFX_SPI_MORE_BIT) & 1; + *received_cts = (buffer[3] >> IFX_SPI_CTS_BIT) & 1; + return 0; +} + +/** + * ifx_setup_spi_header - set header fields + * @txbuffer: pointer to start of SPI buffer + * @tx_count: bytes + * @more: indicate if more to follow + * + * Format up an SPI header for a transfer + * + * FIXME: endianness? + */ +static void ifx_spi_setup_spi_header(unsigned char *txbuffer, int tx_count, + unsigned char more) +{ + *(u16 *)(txbuffer) = tx_count; + *(u16 *)(txbuffer+2) = IFX_SPI_PAYLOAD_SIZE; + txbuffer[1] |= (more << IFX_SPI_MORE_BIT) & IFX_SPI_MORE_MASK; +} + +/** + * ifx_spi_wakeup_serial - SPI space made + * @port_data: our SPI device + * + * We have emptied the FIFO enough that we want to get more data + * queued into it. Poke the line discipline via tty_wakeup so that + * it will feed us more bits + */ +static void ifx_spi_wakeup_serial(struct ifx_spi_device *ifx_dev) +{ + struct tty_struct *tty; + + tty = tty_port_tty_get(&ifx_dev->tty_port); + if (!tty) + return; + tty_wakeup(tty); + tty_kref_put(tty); +} + +/** + * ifx_spi_prepare_tx_buffer - prepare transmit frame + * @ifx_dev: our SPI device + * + * The transmit buffr needs a header and various other bits of + * information followed by as much data as we can pull from the FIFO + * and transfer. This function formats up a suitable buffer in the + * ifx_dev->tx_buffer + * + * FIXME: performance - should we wake the tty when the queue is half + * empty ? + */ +static int ifx_spi_prepare_tx_buffer(struct ifx_spi_device *ifx_dev) +{ + int temp_count; + int queue_length; + int tx_count; + unsigned char *tx_buffer; + + tx_buffer = ifx_dev->tx_buffer; + memset(tx_buffer, 0, IFX_SPI_TRANSFER_SIZE); + + /* make room for required SPI header */ + tx_buffer += IFX_SPI_HEADER_OVERHEAD; + tx_count = IFX_SPI_HEADER_OVERHEAD; + + /* clear to signal no more data if this turns out to be the + * last buffer sent in a sequence */ + ifx_dev->spi_more = 0; + + /* if modem cts is set, just send empty buffer */ + if (!ifx_dev->spi_slave_cts) { + /* see if there's tx data */ + queue_length = kfifo_len(&ifx_dev->tx_fifo); + if (queue_length != 0) { + /* data to mux -- see if there's room for it */ + temp_count = min(queue_length, IFX_SPI_PAYLOAD_SIZE); + temp_count = kfifo_out_locked(&ifx_dev->tx_fifo, + tx_buffer, temp_count, + &ifx_dev->fifo_lock); + + /* update buffer pointer and data count in message */ + tx_buffer += temp_count; + tx_count += temp_count; + if (temp_count == queue_length) + /* poke port to get more data */ + ifx_spi_wakeup_serial(ifx_dev); + else /* more data in port, use next SPI message */ + ifx_dev->spi_more = 1; + } + } + /* have data and info for header -- set up SPI header in buffer */ + /* spi header needs payload size, not entire buffer size */ + ifx_spi_setup_spi_header(ifx_dev->tx_buffer, + tx_count-IFX_SPI_HEADER_OVERHEAD, + ifx_dev->spi_more); + /* swap actual data in the buffer */ + swap_buf((u16 *)(ifx_dev->tx_buffer), tx_count, + &ifx_dev->tx_buffer[IFX_SPI_TRANSFER_SIZE]); + return tx_count; +} + +/** + * ifx_spi_write - line discipline write + * @tty: our tty device + * @buf: pointer to buffer to write (kernel space) + * @count: size of buffer + * + * Write the characters we have been given into the FIFO. If the device + * is not active then activate it, when the SRDY line is asserted back + * this will commence I/O + */ +static int ifx_spi_write(struct tty_struct *tty, const unsigned char *buf, + int count) +{ + struct ifx_spi_device *ifx_dev = tty->driver_data; + unsigned char *tmp_buf = (unsigned char *)buf; + int tx_count = kfifo_in_locked(&ifx_dev->tx_fifo, tmp_buf, count, + &ifx_dev->fifo_lock); + mrdy_assert(ifx_dev); + return tx_count; +} + +/** + * ifx_spi_chars_in_buffer - line discipline helper + * @tty: our tty device + * + * Report how much data we can accept before we drop bytes. As we use + * a simple FIFO this is nice and easy. + */ +static int ifx_spi_write_room(struct tty_struct *tty) +{ + struct ifx_spi_device *ifx_dev = tty->driver_data; + return IFX_SPI_FIFO_SIZE - kfifo_len(&ifx_dev->tx_fifo); +} + +/** + * ifx_spi_chars_in_buffer - line discipline helper + * @tty: our tty device + * + * Report how many characters we have buffered. In our case this is the + * number of bytes sitting in our transmit FIFO. + */ +static int ifx_spi_chars_in_buffer(struct tty_struct *tty) +{ + struct ifx_spi_device *ifx_dev = tty->driver_data; + return kfifo_len(&ifx_dev->tx_fifo); +} + +/** + * ifx_port_hangup + * @port: our tty port + * + * tty port hang up. Called when tty_hangup processing is invoked either + * by loss of carrier, or by software (eg vhangup). Serialized against + * activate/shutdown by the tty layer. + */ +static void ifx_spi_hangup(struct tty_struct *tty) +{ + struct ifx_spi_device *ifx_dev = tty->driver_data; + tty_port_hangup(&ifx_dev->tty_port); +} + +/** + * ifx_port_activate + * @port: our tty port + * + * tty port activate method - called for first open. Serialized + * with hangup and shutdown by the tty layer. + */ +static int ifx_port_activate(struct tty_port *port, struct tty_struct *tty) +{ + struct ifx_spi_device *ifx_dev = + container_of(port, struct ifx_spi_device, tty_port); + + /* clear any old data; can't do this in 'close' */ + kfifo_reset(&ifx_dev->tx_fifo); + + /* put port data into this tty */ + tty->driver_data = ifx_dev; + + /* allows flip string push from int context */ + tty->low_latency = 1; + + return 0; +} + +/** + * ifx_port_shutdown + * @port: our tty port + * + * tty port shutdown method - called for last port close. Serialized + * with hangup and activate by the tty layer. + */ +static void ifx_port_shutdown(struct tty_port *port) +{ + struct ifx_spi_device *ifx_dev = + container_of(port, struct ifx_spi_device, tty_port); + + mrdy_set_low(ifx_dev); + clear_bit(IFX_SPI_STATE_TIMER_PENDING, &ifx_dev->flags); + tasklet_kill(&ifx_dev->io_work_tasklet); +} + +static const struct tty_port_operations ifx_tty_port_ops = { + .activate = ifx_port_activate, + .shutdown = ifx_port_shutdown, +}; + +static const struct tty_operations ifx_spi_serial_ops = { + .open = ifx_spi_open, + .close = ifx_spi_close, + .write = ifx_spi_write, + .hangup = ifx_spi_hangup, + .write_room = ifx_spi_write_room, + .chars_in_buffer = ifx_spi_chars_in_buffer, + .tiocmget = ifx_spi_tiocmget, + .tiocmset = ifx_spi_tiocmset, +}; + +/** + * ifx_spi_insert_fip_string - queue received data + * @ifx_ser: our SPI device + * @chars: buffer we have received + * @size: number of chars reeived + * + * Queue bytes to the tty assuming the tty side is currently open. If + * not the discard the data. + */ +static void ifx_spi_insert_flip_string(struct ifx_spi_device *ifx_dev, + unsigned char *chars, size_t size) +{ + struct tty_struct *tty = tty_port_tty_get(&ifx_dev->tty_port); + if (!tty) + return; + tty_insert_flip_string(tty, chars, size); + tty_flip_buffer_push(tty); + tty_kref_put(tty); +} + +/** + * ifx_spi_complete - SPI transfer completed + * @ctx: our SPI device + * + * An SPI transfer has completed. Process any received data and kick off + * any further transmits we can commence. + */ +static void ifx_spi_complete(void *ctx) +{ + struct ifx_spi_device *ifx_dev = ctx; + struct tty_struct *tty; + struct tty_ldisc *ldisc = NULL; + int length; + int actual_length; + unsigned char more; + unsigned char cts; + int local_write_pending = 0; + int queue_length; + int srdy; + int decode_result; + + mrdy_set_low(ifx_dev); + + if (!ifx_dev->spi_msg.status) { + /* check header validity, get comm flags */ + swap_buf((u16 *)ifx_dev->rx_buffer, IFX_SPI_HEADER_OVERHEAD, + &ifx_dev->rx_buffer[IFX_SPI_HEADER_OVERHEAD]); + decode_result = ifx_spi_decode_spi_header(ifx_dev->rx_buffer, + &length, &more, &cts); + if (decode_result == IFX_SPI_HEADER_0) { + dev_dbg(&ifx_dev->spi_dev->dev, + "ignore input: invalid header 0"); + ifx_dev->spi_slave_cts = 0; + goto complete_exit; + } else if (decode_result == IFX_SPI_HEADER_F) { + dev_dbg(&ifx_dev->spi_dev->dev, + "ignore input: invalid header F"); + goto complete_exit; + } + + ifx_dev->spi_slave_cts = cts; + + actual_length = min((unsigned int)length, + ifx_dev->spi_msg.actual_length); + swap_buf((u16 *)(ifx_dev->rx_buffer + IFX_SPI_HEADER_OVERHEAD), + actual_length, + &ifx_dev->rx_buffer[IFX_SPI_TRANSFER_SIZE]); + ifx_spi_insert_flip_string( + ifx_dev, + ifx_dev->rx_buffer + IFX_SPI_HEADER_OVERHEAD, + (size_t)actual_length); + } else { + dev_dbg(&ifx_dev->spi_dev->dev, "SPI transfer error %d", + ifx_dev->spi_msg.status); + } + +complete_exit: + if (ifx_dev->write_pending) { + ifx_dev->write_pending = 0; + local_write_pending = 1; + } + + clear_bit(IFX_SPI_STATE_IO_IN_PROGRESS, &(ifx_dev->flags)); + + queue_length = kfifo_len(&ifx_dev->tx_fifo); + srdy = gpio_get_value(ifx_dev->gpio.srdy); + if (!srdy) + ifx_spi_power_state_clear(ifx_dev, IFX_SPI_POWER_SRDY); + + /* schedule output if there is more to do */ + if (test_and_clear_bit(IFX_SPI_STATE_IO_READY, &ifx_dev->flags)) + tasklet_schedule(&ifx_dev->io_work_tasklet); + else { + if (more || ifx_dev->spi_more || queue_length > 0 || + local_write_pending) { + if (ifx_dev->spi_slave_cts) { + if (more) + mrdy_assert(ifx_dev); + } else + mrdy_assert(ifx_dev); + } else { + /* + * poke line discipline driver if any for more data + * may or may not get more data to write + * for now, say not busy + */ + ifx_spi_power_state_clear(ifx_dev, + IFX_SPI_POWER_DATA_PENDING); + tty = tty_port_tty_get(&ifx_dev->tty_port); + if (tty) { + ldisc = tty_ldisc_ref(tty); + if (ldisc) { + ldisc->ops->write_wakeup(tty); + tty_ldisc_deref(ldisc); + } + tty_kref_put(tty); + } + } + } +} + +/** + * ifx_spio_io - I/O tasklet + * @data: our SPI device + * + * Queue data for transmission if possible and then kick off the + * transfer. + */ +static void ifx_spi_io(unsigned long data) +{ + int retval; + struct ifx_spi_device *ifx_dev = (struct ifx_spi_device *) data; + + if (!test_and_set_bit(IFX_SPI_STATE_IO_IN_PROGRESS, &ifx_dev->flags)) { + if (ifx_dev->gpio.unack_srdy_int_nb > 0) + ifx_dev->gpio.unack_srdy_int_nb--; + + ifx_spi_prepare_tx_buffer(ifx_dev); + + spi_message_init(&ifx_dev->spi_msg); + INIT_LIST_HEAD(&ifx_dev->spi_msg.queue); + + ifx_dev->spi_msg.context = ifx_dev; + ifx_dev->spi_msg.complete = ifx_spi_complete; + + /* set up our spi transfer */ + /* note len is BYTES, not transfers */ + ifx_dev->spi_xfer.len = IFX_SPI_TRANSFER_SIZE; + ifx_dev->spi_xfer.cs_change = 0; + ifx_dev->spi_xfer.speed_hz = 12500000; + /* ifx_dev->spi_xfer.speed_hz = 390625; */ + ifx_dev->spi_xfer.bits_per_word = spi_b16 ? 16 : 8; + + ifx_dev->spi_xfer.tx_buf = ifx_dev->tx_buffer; + ifx_dev->spi_xfer.rx_buf = ifx_dev->rx_buffer; + + /* + * setup dma pointers + */ + if (ifx_dev->is_6160) { + ifx_dev->spi_msg.is_dma_mapped = 1; + ifx_dev->tx_dma = ifx_dev->tx_bus; + ifx_dev->rx_dma = ifx_dev->rx_bus; + ifx_dev->spi_xfer.tx_dma = ifx_dev->tx_dma; + ifx_dev->spi_xfer.rx_dma = ifx_dev->rx_dma; + } else { + ifx_dev->spi_msg.is_dma_mapped = 0; + ifx_dev->tx_dma = (dma_addr_t)0; + ifx_dev->rx_dma = (dma_addr_t)0; + ifx_dev->spi_xfer.tx_dma = (dma_addr_t)0; + ifx_dev->spi_xfer.rx_dma = (dma_addr_t)0; + } + + spi_message_add_tail(&ifx_dev->spi_xfer, &ifx_dev->spi_msg); + + /* Assert MRDY. This may have already been done by the write + * routine. + */ + mrdy_assert(ifx_dev); + + retval = spi_async(ifx_dev->spi_dev, &ifx_dev->spi_msg); + if (retval) { + clear_bit(IFX_SPI_STATE_IO_IN_PROGRESS, + &ifx_dev->flags); + tasklet_schedule(&ifx_dev->io_work_tasklet); + return; + } + } else + ifx_dev->write_pending = 1; +} + +/** + * ifx_spi_free_port - free up the tty side + * @ifx_dev: IFX device going away + * + * Unregister and free up a port when the device goes away + */ +static void ifx_spi_free_port(struct ifx_spi_device *ifx_dev) +{ + if (ifx_dev->tty_dev) + tty_unregister_device(tty_drv, ifx_dev->minor); + kfifo_free(&ifx_dev->tx_fifo); +} + +/** + * ifx_spi_create_port - create a new port + * @ifx_dev: our spi device + * + * Allocate and initialise the tty port that goes with this interface + * and add it to the tty layer so that it can be opened. + */ +static int ifx_spi_create_port(struct ifx_spi_device *ifx_dev) +{ + int ret = 0; + struct tty_port *pport = &ifx_dev->tty_port; + + spin_lock_init(&ifx_dev->fifo_lock); + lockdep_set_class_and_subclass(&ifx_dev->fifo_lock, + &ifx_spi_key, 0); + + if (kfifo_alloc(&ifx_dev->tx_fifo, IFX_SPI_FIFO_SIZE, GFP_KERNEL)) { + ret = -ENOMEM; + goto error_ret; + } + + pport->ops = &ifx_tty_port_ops; + tty_port_init(pport); + ifx_dev->minor = IFX_SPI_TTY_ID; + ifx_dev->tty_dev = tty_register_device(tty_drv, ifx_dev->minor, + &ifx_dev->spi_dev->dev); + if (IS_ERR(ifx_dev->tty_dev)) { + dev_dbg(&ifx_dev->spi_dev->dev, + "%s: registering tty device failed", __func__); + ret = PTR_ERR(ifx_dev->tty_dev); + goto error_ret; + } + return 0; + +error_ret: + ifx_spi_free_port(ifx_dev); + return ret; +} + +/** + * ifx_spi_handle_srdy - handle SRDY + * @ifx_dev: device asserting SRDY + * + * Check our device state and see what we need to kick off when SRDY + * is asserted. This usually means killing the timer and firing off the + * I/O processing. + */ +static void ifx_spi_handle_srdy(struct ifx_spi_device *ifx_dev) +{ + if (test_bit(IFX_SPI_STATE_TIMER_PENDING, &ifx_dev->flags)) { + del_timer_sync(&ifx_dev->spi_timer); + clear_bit(IFX_SPI_STATE_TIMER_PENDING, &ifx_dev->flags); + } + + ifx_spi_power_state_set(ifx_dev, IFX_SPI_POWER_SRDY); + + if (!test_bit(IFX_SPI_STATE_IO_IN_PROGRESS, &ifx_dev->flags)) + tasklet_schedule(&ifx_dev->io_work_tasklet); + else + set_bit(IFX_SPI_STATE_IO_READY, &ifx_dev->flags); +} + +/** + * ifx_spi_srdy_interrupt - SRDY asserted + * @irq: our IRQ number + * @dev: our ifx device + * + * The modem asserted SRDY. Handle the srdy event + */ +static irqreturn_t ifx_spi_srdy_interrupt(int irq, void *dev) +{ + struct ifx_spi_device *ifx_dev = dev; + ifx_dev->gpio.unack_srdy_int_nb++; + ifx_spi_handle_srdy(ifx_dev); + return IRQ_HANDLED; +} + +/** + * ifx_spi_reset_interrupt - Modem has changed reset state + * @irq: interrupt number + * @dev: our device pointer + * + * The modem has either entered or left reset state. Check the GPIO + * line to see which. + * + * FIXME: review locking on MR_INPROGRESS versus + * parallel unsolicited reset/solicited reset + */ +static irqreturn_t ifx_spi_reset_interrupt(int irq, void *dev) +{ + struct ifx_spi_device *ifx_dev = dev; + int val = gpio_get_value(ifx_dev->gpio.reset_out); + int solreset = test_bit(MR_START, &ifx_dev->mdm_reset_state); + + if (val == 0) { + /* entered reset */ + set_bit(MR_INPROGRESS, &ifx_dev->mdm_reset_state); + if (!solreset) { + /* unsolicited reset */ + ifx_spi_ttyhangup(ifx_dev); + } + } else { + /* exited reset */ + clear_bit(MR_INPROGRESS, &ifx_dev->mdm_reset_state); + if (solreset) { + set_bit(MR_COMPLETE, &ifx_dev->mdm_reset_state); + wake_up(&ifx_dev->mdm_reset_wait); + } + } + return IRQ_HANDLED; +} + +/** + * ifx_spi_free_device - free device + * @ifx_dev: device to free + * + * Free the IFX device + */ +static void ifx_spi_free_device(struct ifx_spi_device *ifx_dev) +{ + ifx_spi_free_port(ifx_dev); + dma_free_coherent(&ifx_dev->spi_dev->dev, + IFX_SPI_TRANSFER_SIZE, + ifx_dev->tx_buffer, + ifx_dev->tx_bus); + dma_free_coherent(&ifx_dev->spi_dev->dev, + IFX_SPI_TRANSFER_SIZE, + ifx_dev->rx_buffer, + ifx_dev->rx_bus); +} + +/** + * ifx_spi_reset - reset modem + * @ifx_dev: modem to reset + * + * Perform a reset on the modem + */ +static int ifx_spi_reset(struct ifx_spi_device *ifx_dev) +{ + int ret; + /* + * set up modem power, reset + * + * delays are required on some platforms for the modem + * to reset properly + */ + set_bit(MR_START, &ifx_dev->mdm_reset_state); + gpio_set_value(ifx_dev->gpio.po, 0); + gpio_set_value(ifx_dev->gpio.reset, 0); + msleep(25); + gpio_set_value(ifx_dev->gpio.reset, 1); + msleep(1); + gpio_set_value(ifx_dev->gpio.po, 1); + msleep(1); + gpio_set_value(ifx_dev->gpio.po, 0); + ret = wait_event_timeout(ifx_dev->mdm_reset_wait, + test_bit(MR_COMPLETE, + &ifx_dev->mdm_reset_state), + IFX_RESET_TIMEOUT); + if (!ret) + dev_warn(&ifx_dev->spi_dev->dev, "Modem reset timeout: (state:%lx)", + ifx_dev->mdm_reset_state); + + ifx_dev->mdm_reset_state = 0; + return ret; +} + +/** + * ifx_spi_spi_probe - probe callback + * @spi: our possible matching SPI device + * + * Probe for a 6x60 modem on SPI bus. Perform any needed device and + * GPIO setup. + * + * FIXME: + * - Support for multiple devices + * - Split out MID specific GPIO handling eventually + */ + +static int ifx_spi_spi_probe(struct spi_device *spi) +{ + int ret; + int srdy; + struct ifx_modem_platform_data *pl_data = NULL; + struct ifx_spi_device *ifx_dev; + + if (saved_ifx_dev) { + dev_dbg(&spi->dev, "ignoring subsequent detection"); + return -ENODEV; + } + + /* initialize structure to hold our device variables */ + ifx_dev = kzalloc(sizeof(struct ifx_spi_device), GFP_KERNEL); + if (!ifx_dev) { + dev_err(&spi->dev, "spi device allocation failed"); + return -ENOMEM; + } + saved_ifx_dev = ifx_dev; + ifx_dev->spi_dev = spi; + clear_bit(IFX_SPI_STATE_IO_IN_PROGRESS, &ifx_dev->flags); + spin_lock_init(&ifx_dev->write_lock); + spin_lock_init(&ifx_dev->power_lock); + ifx_dev->power_status = 0; + init_timer(&ifx_dev->spi_timer); + ifx_dev->spi_timer.function = ifx_spi_timeout; + ifx_dev->spi_timer.data = (unsigned long)ifx_dev; + ifx_dev->is_6160 = pl_data->is_6160; + + /* ensure SPI protocol flags are initialized to enable transfer */ + ifx_dev->spi_more = 0; + ifx_dev->spi_slave_cts = 0; + + /*initialize transfer and dma buffers */ + ifx_dev->tx_buffer = dma_alloc_coherent(&ifx_dev->spi_dev->dev, + IFX_SPI_TRANSFER_SIZE, + &ifx_dev->tx_bus, + GFP_KERNEL); + if (!ifx_dev->tx_buffer) { + dev_err(&spi->dev, "DMA-TX buffer allocation failed"); + ret = -ENOMEM; + goto error_ret; + } + ifx_dev->rx_buffer = dma_alloc_coherent(&ifx_dev->spi_dev->dev, + IFX_SPI_TRANSFER_SIZE, + &ifx_dev->rx_bus, + GFP_KERNEL); + if (!ifx_dev->rx_buffer) { + dev_err(&spi->dev, "DMA-RX buffer allocation failed"); + ret = -ENOMEM; + goto error_ret; + } + + /* initialize waitq for modem reset */ + init_waitqueue_head(&ifx_dev->mdm_reset_wait); + + spi_set_drvdata(spi, ifx_dev); + tasklet_init(&ifx_dev->io_work_tasklet, ifx_spi_io, + (unsigned long)ifx_dev); + + set_bit(IFX_SPI_STATE_PRESENT, &ifx_dev->flags); + + /* create our tty port */ + ret = ifx_spi_create_port(ifx_dev); + if (ret != 0) { + dev_err(&spi->dev, "create default tty port failed"); + goto error_ret; + } + + pl_data = (struct ifx_modem_platform_data *)spi->dev.platform_data; + if (pl_data) { + ifx_dev->gpio.reset = pl_data->rst_pmu; + ifx_dev->gpio.po = pl_data->pwr_on; + ifx_dev->gpio.mrdy = pl_data->mrdy; + ifx_dev->gpio.srdy = pl_data->srdy; + ifx_dev->gpio.reset_out = pl_data->rst_out; + } else { + dev_err(&spi->dev, "missing platform data!"); + ret = -ENODEV; + goto error_ret; + } + + dev_info(&spi->dev, "gpios %d, %d, %d, %d, %d", + ifx_dev->gpio.reset, ifx_dev->gpio.po, ifx_dev->gpio.mrdy, + ifx_dev->gpio.srdy, ifx_dev->gpio.reset_out); + + /* Configure gpios */ + ret = gpio_request(ifx_dev->gpio.reset, "ifxModem"); + if (ret < 0) { + dev_err(&spi->dev, "Unable to allocate GPIO%d (RESET)", + ifx_dev->gpio.reset); + goto error_ret; + } + ret += gpio_direction_output(ifx_dev->gpio.reset, 0); + ret += gpio_export(ifx_dev->gpio.reset, 1); + if (ret) { + dev_err(&spi->dev, "Unable to configure GPIO%d (RESET)", + ifx_dev->gpio.reset); + ret = -EBUSY; + goto error_ret2; + } + + ret = gpio_request(ifx_dev->gpio.po, "ifxModem"); + ret += gpio_direction_output(ifx_dev->gpio.po, 0); + ret += gpio_export(ifx_dev->gpio.po, 1); + if (ret) { + dev_err(&spi->dev, "Unable to configure GPIO%d (ON)", + ifx_dev->gpio.po); + ret = -EBUSY; + goto error_ret3; + } + + ret = gpio_request(ifx_dev->gpio.mrdy, "ifxModem"); + if (ret < 0) { + dev_err(&spi->dev, "Unable to allocate GPIO%d (MRDY)", + ifx_dev->gpio.mrdy); + goto error_ret3; + } + ret += gpio_export(ifx_dev->gpio.mrdy, 1); + ret += gpio_direction_output(ifx_dev->gpio.mrdy, 0); + if (ret) { + dev_err(&spi->dev, "Unable to configure GPIO%d (MRDY)", + ifx_dev->gpio.mrdy); + ret = -EBUSY; + goto error_ret4; + } + + ret = gpio_request(ifx_dev->gpio.srdy, "ifxModem"); + if (ret < 0) { + dev_err(&spi->dev, "Unable to allocate GPIO%d (SRDY)", + ifx_dev->gpio.srdy); + ret = -EBUSY; + goto error_ret4; + } + ret += gpio_export(ifx_dev->gpio.srdy, 1); + ret += gpio_direction_input(ifx_dev->gpio.srdy); + if (ret) { + dev_err(&spi->dev, "Unable to configure GPIO%d (SRDY)", + ifx_dev->gpio.srdy); + ret = -EBUSY; + goto error_ret5; + } + + ret = gpio_request(ifx_dev->gpio.reset_out, "ifxModem"); + if (ret < 0) { + dev_err(&spi->dev, "Unable to allocate GPIO%d (RESET_OUT)", + ifx_dev->gpio.reset_out); + goto error_ret5; + } + ret += gpio_export(ifx_dev->gpio.reset_out, 1); + ret += gpio_direction_input(ifx_dev->gpio.reset_out); + if (ret) { + dev_err(&spi->dev, "Unable to configure GPIO%d (RESET_OUT)", + ifx_dev->gpio.reset_out); + ret = -EBUSY; + goto error_ret6; + } + + ret = request_irq(gpio_to_irq(ifx_dev->gpio.reset_out), + ifx_spi_reset_interrupt, + IRQF_TRIGGER_RISING|IRQF_TRIGGER_FALLING, DRVNAME, + (void *)ifx_dev); + if (ret) { + dev_err(&spi->dev, "Unable to get irq %x\n", + gpio_to_irq(ifx_dev->gpio.reset_out)); + goto error_ret6; + } + + ret = ifx_spi_reset(ifx_dev); + + ret = request_irq(gpio_to_irq(ifx_dev->gpio.srdy), + ifx_spi_srdy_interrupt, + IRQF_TRIGGER_RISING, DRVNAME, + (void *)ifx_dev); + if (ret) { + dev_err(&spi->dev, "Unable to get irq %x", + gpio_to_irq(ifx_dev->gpio.srdy)); + goto error_ret6; + } + + /* set pm runtime power state and register with power system */ + pm_runtime_set_active(&spi->dev); + pm_runtime_enable(&spi->dev); + + /* handle case that modem is already signaling SRDY */ + /* no outgoing tty open at this point, this just satisfies the + * modem's read and should reset communication properly + */ + srdy = gpio_get_value(ifx_dev->gpio.srdy); + + if (srdy) { + mrdy_assert(ifx_dev); + ifx_spi_handle_srdy(ifx_dev); + } else + mrdy_set_low(ifx_dev); + return 0; + +error_ret6: + gpio_free(ifx_dev->gpio.srdy); +error_ret5: + gpio_free(ifx_dev->gpio.mrdy); +error_ret4: + gpio_free(ifx_dev->gpio.reset); +error_ret3: + gpio_free(ifx_dev->gpio.po); +error_ret2: + gpio_free(ifx_dev->gpio.reset_out); +error_ret: + ifx_spi_free_device(ifx_dev); + saved_ifx_dev = NULL; + return ret; +} + +/** + * ifx_spi_spi_remove - SPI device was removed + * @spi: SPI device + * + * FIXME: We should be shutting the device down here not in + * the module unload path. + */ + +static int ifx_spi_spi_remove(struct spi_device *spi) +{ + struct ifx_spi_device *ifx_dev = spi_get_drvdata(spi); + /* stop activity */ + tasklet_kill(&ifx_dev->io_work_tasklet); + /* free irq */ + free_irq(gpio_to_irq(ifx_dev->gpio.reset_out), (void *)ifx_dev); + free_irq(gpio_to_irq(ifx_dev->gpio.srdy), (void *)ifx_dev); + + gpio_free(ifx_dev->gpio.srdy); + gpio_free(ifx_dev->gpio.mrdy); + gpio_free(ifx_dev->gpio.reset); + gpio_free(ifx_dev->gpio.po); + gpio_free(ifx_dev->gpio.reset_out); + + /* free allocations */ + ifx_spi_free_device(ifx_dev); + + saved_ifx_dev = NULL; + return 0; +} + +/** + * ifx_spi_spi_shutdown - called on SPI shutdown + * @spi: SPI device + * + * No action needs to be taken here + */ + +static void ifx_spi_spi_shutdown(struct spi_device *spi) +{ +} + +/* + * various suspends and resumes have nothing to do + * no hardware to save state for + */ + +/** + * ifx_spi_spi_suspend - suspend SPI on system suspend + * @dev: device being suspended + * + * Suspend the SPI side. No action needed on Intel MID platforms, may + * need extending for other systems. + */ +static int ifx_spi_spi_suspend(struct spi_device *spi, pm_message_t msg) +{ + return 0; +} + +/** + * ifx_spi_spi_resume - resume SPI side on system resume + * @dev: device being suspended + * + * Suspend the SPI side. No action needed on Intel MID platforms, may + * need extending for other systems. + */ +static int ifx_spi_spi_resume(struct spi_device *spi) +{ + return 0; +} + +/** + * ifx_spi_pm_suspend - suspend modem on system suspend + * @dev: device being suspended + * + * Suspend the modem. No action needed on Intel MID platforms, may + * need extending for other systems. + */ +static int ifx_spi_pm_suspend(struct device *dev) +{ + return 0; +} + +/** + * ifx_spi_pm_resume - resume modem on system resume + * @dev: device being suspended + * + * Allow the modem to resume. No action needed. + * + * FIXME: do we need to reset anything here ? + */ +static int ifx_spi_pm_resume(struct device *dev) +{ + return 0; +} + +/** + * ifx_spi_pm_runtime_resume - suspend modem + * @dev: device being suspended + * + * Allow the modem to resume. No action needed. + */ +static int ifx_spi_pm_runtime_resume(struct device *dev) +{ + return 0; +} + +/** + * ifx_spi_pm_runtime_suspend - suspend modem + * @dev: device being suspended + * + * Allow the modem to suspend and thus suspend to continue up the + * device tree. + */ +static int ifx_spi_pm_runtime_suspend(struct device *dev) +{ + return 0; +} + +/** + * ifx_spi_pm_runtime_idle - check if modem idle + * @dev: our device + * + * Check conditions and queue runtime suspend if idle. + */ +static int ifx_spi_pm_runtime_idle(struct device *dev) +{ + struct spi_device *spi = to_spi_device(dev); + struct ifx_spi_device *ifx_dev = spi_get_drvdata(spi); + + if (!ifx_dev->power_status) + pm_runtime_suspend(dev); + + return 0; +} + +static const struct dev_pm_ops ifx_spi_pm = { + .resume = ifx_spi_pm_resume, + .suspend = ifx_spi_pm_suspend, + .runtime_resume = ifx_spi_pm_runtime_resume, + .runtime_suspend = ifx_spi_pm_runtime_suspend, + .runtime_idle = ifx_spi_pm_runtime_idle +}; + +static const struct spi_device_id ifx_id_table[] = { + {"ifx6160", 0}, + {"ifx6260", 0}, + { } +}; +MODULE_DEVICE_TABLE(spi, ifx_id_table); + +/* spi operations */ +static const struct spi_driver ifx_spi_driver_6160 = { + .driver = { + .name = "ifx6160", + .bus = &spi_bus_type, + .pm = &ifx_spi_pm, + .owner = THIS_MODULE}, + .probe = ifx_spi_spi_probe, + .shutdown = ifx_spi_spi_shutdown, + .remove = __devexit_p(ifx_spi_spi_remove), + .suspend = ifx_spi_spi_suspend, + .resume = ifx_spi_spi_resume, + .id_table = ifx_id_table +}; + +/** + * ifx_spi_exit - module exit + * + * Unload the module. + */ + +static void __exit ifx_spi_exit(void) +{ + /* unregister */ + tty_unregister_driver(tty_drv); + spi_unregister_driver((void *)&ifx_spi_driver_6160); +} + +/** + * ifx_spi_init - module entry point + * + * Initialise the SPI and tty interfaces for the IFX SPI driver + * We need to initialize upper-edge spi driver after the tty + * driver because otherwise the spi probe will race + */ + +static int __init ifx_spi_init(void) +{ + int result; + + tty_drv = alloc_tty_driver(1); + if (!tty_drv) { + pr_err("%s: alloc_tty_driver failed", DRVNAME); + return -ENOMEM; + } + + tty_drv->magic = TTY_DRIVER_MAGIC; + tty_drv->owner = THIS_MODULE; + tty_drv->driver_name = DRVNAME; + tty_drv->name = TTYNAME; + tty_drv->minor_start = IFX_SPI_TTY_ID; + tty_drv->num = 1; + tty_drv->type = TTY_DRIVER_TYPE_SERIAL; + tty_drv->subtype = SERIAL_TYPE_NORMAL; + tty_drv->flags = TTY_DRIVER_REAL_RAW | TTY_DRIVER_DYNAMIC_DEV; + tty_drv->init_termios = tty_std_termios; + + tty_set_operations(tty_drv, &ifx_spi_serial_ops); + + result = tty_register_driver(tty_drv); + if (result) { + pr_err("%s: tty_register_driver failed(%d)", + DRVNAME, result); + return result; + } + + result = spi_register_driver((void *)&ifx_spi_driver_6160); + if (result) { + pr_err("%s: spi_register_driver failed(%d)", + DRVNAME, result); + tty_unregister_driver(tty_drv); + } + return result; +} + +module_init(ifx_spi_init); +module_exit(ifx_spi_exit); + +MODULE_AUTHOR("Intel"); +MODULE_DESCRIPTION("IFX6x60 spi driver"); +MODULE_LICENSE("GPL"); +MODULE_INFO(Version, "0.1-IFX6x60"); diff --git a/drivers/serial/ifx6x60.h b/drivers/serial/ifx6x60.h new file mode 100644 index 000000000000..deb7b8d977dc --- /dev/null +++ b/drivers/serial/ifx6x60.h @@ -0,0 +1,129 @@ +/**************************************************************************** + * + * Driver for the IFX spi modem. + * + * Copyright (C) 2009, 2010 Intel Corp + * Jim Stanley + * + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, + * USA + * + * + * + *****************************************************************************/ +#ifndef _IFX6X60_H +#define _IFX6X60_H + +#define DRVNAME "ifx6x60" +#define TTYNAME "ttyIFX" + +/* #define IFX_THROTTLE_CODE */ + +#define IFX_SPI_MAX_MINORS 1 +#define IFX_SPI_TRANSFER_SIZE 2048 +#define IFX_SPI_FIFO_SIZE 4096 + +#define IFX_SPI_HEADER_OVERHEAD 4 +#define IFX_RESET_TIMEOUT msecs_to_jiffies(50) + +/* device flags bitfield definitions */ +#define IFX_SPI_STATE_PRESENT 0 +#define IFX_SPI_STATE_IO_IN_PROGRESS 1 +#define IFX_SPI_STATE_IO_READY 2 +#define IFX_SPI_STATE_TIMER_PENDING 3 + +/* flow control bitfields */ +#define IFX_SPI_DCD 0 +#define IFX_SPI_CTS 1 +#define IFX_SPI_DSR 2 +#define IFX_SPI_RI 3 +#define IFX_SPI_DTR 4 +#define IFX_SPI_RTS 5 +#define IFX_SPI_TX_FC 6 +#define IFX_SPI_RX_FC 7 +#define IFX_SPI_UPDATE 8 + +#define IFX_SPI_PAYLOAD_SIZE (IFX_SPI_TRANSFER_SIZE - \ + IFX_SPI_HEADER_OVERHEAD) + +#define IFX_SPI_IRQ_TYPE DETECT_EDGE_RISING +#define IFX_SPI_GPIO_TARGET 0 +#define IFX_SPI_GPIO0 0x105 + +#define IFX_SPI_STATUS_TIMEOUT (2000*HZ) + +/* values for bits in power status byte */ +#define IFX_SPI_POWER_DATA_PENDING 1 +#define IFX_SPI_POWER_SRDY 2 + +struct ifx_spi_device { + /* Our SPI device */ + struct spi_device *spi_dev; + + /* Port specific data */ + struct kfifo tx_fifo; + spinlock_t fifo_lock; + unsigned long signal_state; + + /* TTY Layer logic */ + struct tty_port tty_port; + struct device *tty_dev; + int minor; + + /* Low level I/O work */ + struct tasklet_struct io_work_tasklet; + unsigned long flags; + dma_addr_t rx_dma; + dma_addr_t tx_dma; + + int is_6160; /* Modem type */ + + spinlock_t write_lock; + int write_pending; + spinlock_t power_lock; + unsigned char power_status; + + unsigned char *rx_buffer; + unsigned char *tx_buffer; + dma_addr_t rx_bus; + dma_addr_t tx_bus; + unsigned char spi_more; + unsigned char spi_slave_cts; + + struct timer_list spi_timer; + + struct spi_message spi_msg; + struct spi_transfer spi_xfer; + + struct { + /* gpio lines */ + unsigned short srdy; /* slave-ready gpio */ + unsigned short mrdy; /* master-ready gpio */ + unsigned short reset; /* modem-reset gpio */ + unsigned short po; /* modem-on gpio */ + unsigned short reset_out; /* modem-in-reset gpio */ + /* state/stats */ + int unack_srdy_int_nb; + } gpio; + + /* modem reset */ + unsigned long mdm_reset_state; +#define MR_START 0 +#define MR_INPROGRESS 1 +#define MR_COMPLETE 2 + wait_queue_head_t mdm_reset_wait; +}; + +#endif /* _IFX6X60_H */ diff --git a/include/linux/spi/ifx_modem.h b/include/linux/spi/ifx_modem.h new file mode 100644 index 000000000000..a68f3b19d112 --- /dev/null +++ b/include/linux/spi/ifx_modem.h @@ -0,0 +1,14 @@ +#ifndef LINUX_IFX_MODEM_H +#define LINUX_IFX_MODEM_H + +struct ifx_modem_platform_data { + unsigned short rst_out; /* modem reset out */ + unsigned short pwr_on; /* power on */ + unsigned short rst_pmu; /* reset modem */ + unsigned short tx_pwr; /* modem power threshold */ + unsigned short srdy; /* SRDY */ + unsigned short mrdy; /* MRDY */ + unsigned short is_6160; /* Modem type */ +}; + +#endif -- cgit v1.2.3-71-gd317 From 304e12665a4a7b8b25dfe8c64fa4fd56a04a67ea Mon Sep 17 00:00:00 2001 From: Alexey Charkov Date: Mon, 8 Nov 2010 20:33:20 +0300 Subject: serial: Add support for UART on VIA VT8500 and compatibles This adds a driver for the serial ports found in VIA and WonderMedia Systems-on-Chip. Interrupt-driven FIFO operation is implemented. The hardware also supports pure register-based operation (which is slower) and DMA-based FIFO operation. As the FIFOs are only 16 bytes long, DMA operation is probably not worth the hassle. Signed-off-by: Alexey Charkov Signed-off-by: Greg Kroah-Hartman --- drivers/serial/Kconfig | 10 + drivers/serial/Makefile | 1 + drivers/serial/vt8500_serial.c | 648 +++++++++++++++++++++++++++++++++++++++++ include/linux/serial_core.h | 3 + 4 files changed, 662 insertions(+) create mode 100644 drivers/serial/vt8500_serial.c (limited to 'include/linux') diff --git a/drivers/serial/Kconfig b/drivers/serial/Kconfig index 0b9cc17b380b..388e37132cc9 100644 --- a/drivers/serial/Kconfig +++ b/drivers/serial/Kconfig @@ -1381,6 +1381,16 @@ config SERIAL_MSM_CONSOLE depends on SERIAL_MSM=y select SERIAL_CORE_CONSOLE +config SERIAL_VT8500 + bool "VIA VT8500 on-chip serial port support" + depends on ARM && ARCH_VT8500 + select SERIAL_CORE + +config SERIAL_VT8500_CONSOLE + bool "VIA VT8500 serial console support" + depends on SERIAL_VT8500=y + select SERIAL_CORE_CONSOLE + config SERIAL_NETX tristate "NetX serial port support" depends on ARM && ARCH_NETX diff --git a/drivers/serial/Makefile b/drivers/serial/Makefile index 783638b10698..a5e2264b2a80 100644 --- a/drivers/serial/Makefile +++ b/drivers/serial/Makefile @@ -86,6 +86,7 @@ obj-$(CONFIG_SERIAL_TIMBERDALE) += timbuart.o obj-$(CONFIG_SERIAL_GRLIB_GAISLER_APBUART) += apbuart.o obj-$(CONFIG_SERIAL_ALTERA_JTAGUART) += altera_jtaguart.o obj-$(CONFIG_SERIAL_ALTERA_UART) += altera_uart.o +obj-$(CONFIG_SERIAL_VT8500) += vt8500_serial.o obj-$(CONFIG_SERIAL_MRST_MAX3110) += mrst_max3110.o obj-$(CONFIG_SERIAL_MFD_HSU) += mfd.o obj-$(CONFIG_SERIAL_OMAP) += omap-serial.o diff --git a/drivers/serial/vt8500_serial.c b/drivers/serial/vt8500_serial.c new file mode 100644 index 000000000000..322bf56c0d89 --- /dev/null +++ b/drivers/serial/vt8500_serial.c @@ -0,0 +1,648 @@ +/* + * drivers/serial/vt8500_serial.c + * + * Copyright (C) 2010 Alexey Charkov + * + * Based on msm_serial.c, which is: + * Copyright (C) 2007 Google, Inc. + * Author: Robert Love + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#if defined(CONFIG_SERIAL_VT8500_CONSOLE) && defined(CONFIG_MAGIC_SYSRQ) +# define SUPPORT_SYSRQ +#endif + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* + * UART Register offsets + */ + +#define VT8500_URTDR 0x0000 /* Transmit data */ +#define VT8500_URRDR 0x0004 /* Receive data */ +#define VT8500_URDIV 0x0008 /* Clock/Baud rate divisor */ +#define VT8500_URLCR 0x000C /* Line control */ +#define VT8500_URICR 0x0010 /* IrDA control */ +#define VT8500_URIER 0x0014 /* Interrupt enable */ +#define VT8500_URISR 0x0018 /* Interrupt status */ +#define VT8500_URUSR 0x001c /* UART status */ +#define VT8500_URFCR 0x0020 /* FIFO control */ +#define VT8500_URFIDX 0x0024 /* FIFO index */ +#define VT8500_URBKR 0x0028 /* Break signal count */ +#define VT8500_URTOD 0x002c /* Time out divisor */ +#define VT8500_TXFIFO 0x1000 /* Transmit FIFO (16x8) */ +#define VT8500_RXFIFO 0x1020 /* Receive FIFO (16x10) */ + +/* + * Interrupt enable and status bits + */ + +#define TXDE (1 << 0) /* Tx Data empty */ +#define RXDF (1 << 1) /* Rx Data full */ +#define TXFAE (1 << 2) /* Tx FIFO almost empty */ +#define TXFE (1 << 3) /* Tx FIFO empty */ +#define RXFAF (1 << 4) /* Rx FIFO almost full */ +#define RXFF (1 << 5) /* Rx FIFO full */ +#define TXUDR (1 << 6) /* Tx underrun */ +#define RXOVER (1 << 7) /* Rx overrun */ +#define PER (1 << 8) /* Parity error */ +#define FER (1 << 9) /* Frame error */ +#define TCTS (1 << 10) /* Toggle of CTS */ +#define RXTOUT (1 << 11) /* Rx timeout */ +#define BKDONE (1 << 12) /* Break signal done */ +#define ERR (1 << 13) /* AHB error response */ + +#define RX_FIFO_INTS (RXFAF | RXFF | RXOVER | PER | FER | RXTOUT) +#define TX_FIFO_INTS (TXFAE | TXFE | TXUDR) + +struct vt8500_port { + struct uart_port uart; + char name[16]; + struct clk *clk; + unsigned int ier; +}; + +static inline void vt8500_write(struct uart_port *port, unsigned int val, + unsigned int off) +{ + writel(val, port->membase + off); +} + +static inline unsigned int vt8500_read(struct uart_port *port, unsigned int off) +{ + return readl(port->membase + off); +} + +static void vt8500_stop_tx(struct uart_port *port) +{ + struct vt8500_port *vt8500_port = container_of(port, + struct vt8500_port, + uart); + + vt8500_port->ier &= ~TX_FIFO_INTS; + vt8500_write(port, vt8500_port->ier, VT8500_URIER); +} + +static void vt8500_stop_rx(struct uart_port *port) +{ + struct vt8500_port *vt8500_port = container_of(port, + struct vt8500_port, + uart); + + vt8500_port->ier &= ~RX_FIFO_INTS; + vt8500_write(port, vt8500_port->ier, VT8500_URIER); +} + +static void vt8500_enable_ms(struct uart_port *port) +{ + struct vt8500_port *vt8500_port = container_of(port, + struct vt8500_port, + uart); + + vt8500_port->ier |= TCTS; + vt8500_write(port, vt8500_port->ier, VT8500_URIER); +} + +static void handle_rx(struct uart_port *port) +{ + struct tty_struct *tty = tty_port_tty_get(&port->state->port); + if (!tty) { + /* Discard data: no tty available */ + int count = (vt8500_read(port, VT8500_URFIDX) & 0x1f00) >> 8; + u16 ch; + while (count--) + ch = readw(port->membase + VT8500_RXFIFO); + return; + } + + /* + * Handle overrun + */ + if ((vt8500_read(port, VT8500_URISR) & RXOVER)) { + port->icount.overrun++; + tty_insert_flip_char(tty, 0, TTY_OVERRUN); + } + + /* and now the main RX loop */ + while (vt8500_read(port, VT8500_URFIDX) & 0x1f00) { + unsigned int c; + char flag = TTY_NORMAL; + + c = readw(port->membase + VT8500_RXFIFO) & 0x3ff; + + /* Mask conditions we're ignorning. */ + c &= ~port->read_status_mask; + + if (c & FER) { + port->icount.frame++; + flag = TTY_FRAME; + } else if (c & PER) { + port->icount.parity++; + flag = TTY_PARITY; + } + port->icount.rx++; + + if (!uart_handle_sysrq_char(port, c)) + tty_insert_flip_char(tty, c, flag); + } + + tty_flip_buffer_push(tty); + tty_kref_put(tty); +} + +static void handle_tx(struct uart_port *port) +{ + struct circ_buf *xmit = &port->state->xmit; + + if (port->x_char) { + writeb(port->x_char, port->membase + VT8500_TXFIFO); + port->icount.tx++; + port->x_char = 0; + } + if (uart_circ_empty(xmit) || uart_tx_stopped(port)) { + vt8500_stop_tx(port); + return; + } + + while ((vt8500_read(port, VT8500_URFIDX) & 0x1f) < 16) { + if (uart_circ_empty(xmit)) + break; + + writeb(xmit->buf[xmit->tail], port->membase + VT8500_TXFIFO); + + xmit->tail = (xmit->tail + 1) & (UART_XMIT_SIZE - 1); + port->icount.tx++; + } + + if (uart_circ_chars_pending(xmit) < WAKEUP_CHARS) + uart_write_wakeup(port); + + if (uart_circ_empty(xmit)) + vt8500_stop_tx(port); +} + +static void vt8500_start_tx(struct uart_port *port) +{ + struct vt8500_port *vt8500_port = container_of(port, + struct vt8500_port, + uart); + + vt8500_port->ier &= ~TX_FIFO_INTS; + vt8500_write(port, vt8500_port->ier, VT8500_URIER); + handle_tx(port); + vt8500_port->ier |= TX_FIFO_INTS; + vt8500_write(port, vt8500_port->ier, VT8500_URIER); +} + +static void handle_delta_cts(struct uart_port *port) +{ + port->icount.cts++; + wake_up_interruptible(&port->state->port.delta_msr_wait); +} + +static irqreturn_t vt8500_irq(int irq, void *dev_id) +{ + struct uart_port *port = dev_id; + unsigned long isr; + + spin_lock(&port->lock); + isr = vt8500_read(port, VT8500_URISR); + + /* Acknowledge active status bits */ + vt8500_write(port, isr, VT8500_URISR); + + if (isr & RX_FIFO_INTS) + handle_rx(port); + if (isr & TX_FIFO_INTS) + handle_tx(port); + if (isr & TCTS) + handle_delta_cts(port); + + spin_unlock(&port->lock); + + return IRQ_HANDLED; +} + +static unsigned int vt8500_tx_empty(struct uart_port *port) +{ + return (vt8500_read(port, VT8500_URFIDX) & 0x1f) < 16 ? + TIOCSER_TEMT : 0; +} + +static unsigned int vt8500_get_mctrl(struct uart_port *port) +{ + unsigned int usr; + + usr = vt8500_read(port, VT8500_URUSR); + if (usr & (1 << 4)) + return TIOCM_CTS; + else + return 0; +} + +static void vt8500_set_mctrl(struct uart_port *port, unsigned int mctrl) +{ +} + +static void vt8500_break_ctl(struct uart_port *port, int break_ctl) +{ + if (break_ctl) + vt8500_write(port, vt8500_read(port, VT8500_URLCR) | (1 << 9), + VT8500_URLCR); +} + +static int vt8500_set_baud_rate(struct uart_port *port, unsigned int baud) +{ + unsigned long div; + unsigned int loops = 1000; + + div = vt8500_read(port, VT8500_URDIV) & ~(0x3ff); + + if (unlikely((baud < 900) || (baud > 921600))) + div |= 7; + else + div |= (921600 / baud) - 1; + + while ((vt8500_read(port, VT8500_URUSR) & (1 << 5)) && --loops) + cpu_relax(); + vt8500_write(port, div, VT8500_URDIV); + + return baud; +} + +static int vt8500_startup(struct uart_port *port) +{ + struct vt8500_port *vt8500_port = + container_of(port, struct vt8500_port, uart); + int ret; + + snprintf(vt8500_port->name, sizeof(vt8500_port->name), + "vt8500_serial%d", port->line); + + ret = request_irq(port->irq, vt8500_irq, IRQF_TRIGGER_HIGH, + vt8500_port->name, port); + if (unlikely(ret)) + return ret; + + vt8500_write(port, 0x03, VT8500_URLCR); /* enable TX & RX */ + + return 0; +} + +static void vt8500_shutdown(struct uart_port *port) +{ + struct vt8500_port *vt8500_port = + container_of(port, struct vt8500_port, uart); + + vt8500_port->ier = 0; + + /* disable interrupts and FIFOs */ + vt8500_write(&vt8500_port->uart, 0, VT8500_URIER); + vt8500_write(&vt8500_port->uart, 0x880, VT8500_URFCR); + free_irq(port->irq, port); +} + +static void vt8500_set_termios(struct uart_port *port, + struct ktermios *termios, + struct ktermios *old) +{ + struct vt8500_port *vt8500_port = + container_of(port, struct vt8500_port, uart); + unsigned long flags; + unsigned int baud, lcr; + unsigned int loops = 1000; + + spin_lock_irqsave(&port->lock, flags); + + /* calculate and set baud rate */ + baud = uart_get_baud_rate(port, termios, old, 900, 921600); + baud = vt8500_set_baud_rate(port, baud); + if (tty_termios_baud_rate(termios)) + tty_termios_encode_baud_rate(termios, baud, baud); + + /* calculate parity */ + lcr = vt8500_read(&vt8500_port->uart, VT8500_URLCR); + lcr &= ~((1 << 5) | (1 << 4)); + if (termios->c_cflag & PARENB) { + lcr |= (1 << 4); + termios->c_cflag &= ~CMSPAR; + if (termios->c_cflag & PARODD) + lcr |= (1 << 5); + } + + /* calculate bits per char */ + lcr &= ~(1 << 2); + switch (termios->c_cflag & CSIZE) { + case CS7: + break; + case CS8: + default: + lcr |= (1 << 2); + termios->c_cflag &= ~CSIZE; + termios->c_cflag |= CS8; + break; + } + + /* calculate stop bits */ + lcr &= ~(1 << 3); + if (termios->c_cflag & CSTOPB) + lcr |= (1 << 3); + + /* set parity, bits per char, and stop bit */ + vt8500_write(&vt8500_port->uart, lcr, VT8500_URLCR); + + /* Configure status bits to ignore based on termio flags. */ + port->read_status_mask = 0; + if (termios->c_iflag & IGNPAR) + port->read_status_mask = FER | PER; + + uart_update_timeout(port, termios->c_cflag, baud); + + /* Reset FIFOs */ + vt8500_write(&vt8500_port->uart, 0x88c, VT8500_URFCR); + while ((vt8500_read(&vt8500_port->uart, VT8500_URFCR) & 0xc) + && --loops) + cpu_relax(); + + /* Every possible FIFO-related interrupt */ + vt8500_port->ier = RX_FIFO_INTS | TX_FIFO_INTS; + + /* + * CTS flow control + */ + if (UART_ENABLE_MS(&vt8500_port->uart, termios->c_cflag)) + vt8500_port->ier |= TCTS; + + vt8500_write(&vt8500_port->uart, 0x881, VT8500_URFCR); + vt8500_write(&vt8500_port->uart, vt8500_port->ier, VT8500_URIER); + + spin_unlock_irqrestore(&port->lock, flags); +} + +static const char *vt8500_type(struct uart_port *port) +{ + struct vt8500_port *vt8500_port = + container_of(port, struct vt8500_port, uart); + return vt8500_port->name; +} + +static void vt8500_release_port(struct uart_port *port) +{ +} + +static int vt8500_request_port(struct uart_port *port) +{ + return 0; +} + +static void vt8500_config_port(struct uart_port *port, int flags) +{ + port->type = PORT_VT8500; +} + +static int vt8500_verify_port(struct uart_port *port, + struct serial_struct *ser) +{ + if (unlikely(ser->type != PORT_UNKNOWN && ser->type != PORT_VT8500)) + return -EINVAL; + if (unlikely(port->irq != ser->irq)) + return -EINVAL; + return 0; +} + +static struct vt8500_port *vt8500_uart_ports[4]; +static struct uart_driver vt8500_uart_driver; + +#ifdef CONFIG_SERIAL_VT8500_CONSOLE + +static inline void wait_for_xmitr(struct uart_port *port) +{ + unsigned int status, tmout = 10000; + + /* Wait up to 10ms for the character(s) to be sent. */ + do { + status = vt8500_read(port, VT8500_URFIDX); + + if (--tmout == 0) + break; + udelay(1); + } while (status & 0x10); +} + +static void vt8500_console_putchar(struct uart_port *port, int c) +{ + wait_for_xmitr(port); + writeb(c, port->membase + VT8500_TXFIFO); +} + +static void vt8500_console_write(struct console *co, const char *s, + unsigned int count) +{ + struct vt8500_port *vt8500_port = vt8500_uart_ports[co->index]; + unsigned long ier; + + BUG_ON(co->index < 0 || co->index >= vt8500_uart_driver.nr); + + ier = vt8500_read(&vt8500_port->uart, VT8500_URIER); + vt8500_write(&vt8500_port->uart, VT8500_URIER, 0); + + uart_console_write(&vt8500_port->uart, s, count, + vt8500_console_putchar); + + /* + * Finally, wait for transmitter to become empty + * and switch back to FIFO + */ + wait_for_xmitr(&vt8500_port->uart); + vt8500_write(&vt8500_port->uart, VT8500_URIER, ier); +} + +static int __init vt8500_console_setup(struct console *co, char *options) +{ + struct vt8500_port *vt8500_port; + int baud = 9600; + int bits = 8; + int parity = 'n'; + int flow = 'n'; + + if (unlikely(co->index >= vt8500_uart_driver.nr || co->index < 0)) + return -ENXIO; + + vt8500_port = vt8500_uart_ports[co->index]; + + if (!vt8500_port) + return -ENODEV; + + if (options) + uart_parse_options(options, &baud, &parity, &bits, &flow); + + return uart_set_options(&vt8500_port->uart, + co, baud, parity, bits, flow); +} + +static struct console vt8500_console = { + .name = "ttyWMT", + .write = vt8500_console_write, + .device = uart_console_device, + .setup = vt8500_console_setup, + .flags = CON_PRINTBUFFER, + .index = -1, + .data = &vt8500_uart_driver, +}; + +#define VT8500_CONSOLE (&vt8500_console) + +#else +#define VT8500_CONSOLE NULL +#endif + +static struct uart_ops vt8500_uart_pops = { + .tx_empty = vt8500_tx_empty, + .set_mctrl = vt8500_set_mctrl, + .get_mctrl = vt8500_get_mctrl, + .stop_tx = vt8500_stop_tx, + .start_tx = vt8500_start_tx, + .stop_rx = vt8500_stop_rx, + .enable_ms = vt8500_enable_ms, + .break_ctl = vt8500_break_ctl, + .startup = vt8500_startup, + .shutdown = vt8500_shutdown, + .set_termios = vt8500_set_termios, + .type = vt8500_type, + .release_port = vt8500_release_port, + .request_port = vt8500_request_port, + .config_port = vt8500_config_port, + .verify_port = vt8500_verify_port, +}; + +static struct uart_driver vt8500_uart_driver = { + .owner = THIS_MODULE, + .driver_name = "vt8500_serial", + .dev_name = "ttyWMT", + .nr = 6, + .cons = VT8500_CONSOLE, +}; + +static int __init vt8500_serial_probe(struct platform_device *pdev) +{ + struct vt8500_port *vt8500_port; + struct resource *mmres, *irqres; + int ret; + + mmres = platform_get_resource(pdev, IORESOURCE_MEM, 0); + irqres = platform_get_resource(pdev, IORESOURCE_IRQ, 0); + if (!mmres || !irqres) + return -ENODEV; + + vt8500_port = kzalloc(sizeof(struct vt8500_port), GFP_KERNEL); + if (!vt8500_port) + return -ENOMEM; + + vt8500_port->uart.type = PORT_VT8500; + vt8500_port->uart.iotype = UPIO_MEM; + vt8500_port->uart.mapbase = mmres->start; + vt8500_port->uart.irq = irqres->start; + vt8500_port->uart.fifosize = 16; + vt8500_port->uart.ops = &vt8500_uart_pops; + vt8500_port->uart.line = pdev->id; + vt8500_port->uart.dev = &pdev->dev; + vt8500_port->uart.flags = UPF_IOREMAP | UPF_BOOT_AUTOCONF; + vt8500_port->uart.uartclk = 24000000; + + snprintf(vt8500_port->name, sizeof(vt8500_port->name), + "VT8500 UART%d", pdev->id); + + vt8500_port->uart.membase = ioremap(mmres->start, + mmres->end - mmres->start + 1); + if (!vt8500_port->uart.membase) { + ret = -ENOMEM; + goto err; + } + + vt8500_uart_ports[pdev->id] = vt8500_port; + + uart_add_one_port(&vt8500_uart_driver, &vt8500_port->uart); + + platform_set_drvdata(pdev, vt8500_port); + + return 0; + +err: + kfree(vt8500_port); + return ret; +} + +static int __devexit vt8500_serial_remove(struct platform_device *pdev) +{ + struct vt8500_port *vt8500_port = platform_get_drvdata(pdev); + + platform_set_drvdata(pdev, NULL); + uart_remove_one_port(&vt8500_uart_driver, &vt8500_port->uart); + kfree(vt8500_port); + + return 0; +} + +static struct platform_driver vt8500_platform_driver = { + .probe = vt8500_serial_probe, + .remove = vt8500_serial_remove, + .driver = { + .name = "vt8500_serial", + .owner = THIS_MODULE, + }, +}; + +static int __init vt8500_serial_init(void) +{ + int ret; + + ret = uart_register_driver(&vt8500_uart_driver); + if (unlikely(ret)) + return ret; + + ret = platform_driver_register(&vt8500_platform_driver); + + if (unlikely(ret)) + uart_unregister_driver(&vt8500_uart_driver); + + return ret; +} + +static void __exit vt8500_serial_exit(void) +{ +#ifdef CONFIG_SERIAL_VT8500_CONSOLE + unregister_console(&vt8500_console); +#endif + platform_driver_unregister(&vt8500_platform_driver); + uart_unregister_driver(&vt8500_uart_driver); +} + +module_init(vt8500_serial_init); +module_exit(vt8500_serial_exit); + +MODULE_AUTHOR("Alexey Charkov "); +MODULE_DESCRIPTION("Driver for vt8500 serial device"); +MODULE_LICENSE("GPL"); diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h index 212eb4c67797..41603d690433 100644 --- a/include/linux/serial_core.h +++ b/include/linux/serial_core.h @@ -199,6 +199,9 @@ /* TI OMAP-UART */ #define PORT_OMAP 96 +/* VIA VT8500 SoC */ +#define PORT_VT8500 97 + #ifdef __KERNEL__ #include -- cgit v1.2.3-71-gd317 From 1d7138de878d1d4210727c1200193e69596f93b3 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 12 Nov 2010 05:46:50 +0000 Subject: igmp: RCU conversion of in_dev->mc_list MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit in_dev->mc_list is protected by one rwlock (in_dev->mc_list_lock). This can easily be converted to a RCU protection. Writers hold RTNL, so mc_list_lock is removed, not replaced by a spinlock. Signed-off-by: Eric Dumazet Cc: Cypher Wu Cc: Américo Wang Signed-off-by: David S. Miller --- include/linux/igmp.h | 12 ++- include/linux/inetdevice.h | 5 +- include/net/inet_sock.h | 2 +- net/ipv4/igmp.c | 223 +++++++++++++++++++++------------------------ 4 files changed, 115 insertions(+), 127 deletions(-) (limited to 'include/linux') diff --git a/include/linux/igmp.h b/include/linux/igmp.h index 93fc2449af10..7d164670f264 100644 --- a/include/linux/igmp.h +++ b/include/linux/igmp.h @@ -167,10 +167,10 @@ struct ip_sf_socklist { */ struct ip_mc_socklist { - struct ip_mc_socklist *next; + struct ip_mc_socklist __rcu *next_rcu; struct ip_mreqn multi; unsigned int sfmode; /* MCAST_{INCLUDE,EXCLUDE} */ - struct ip_sf_socklist *sflist; + struct ip_sf_socklist __rcu *sflist; struct rcu_head rcu; }; @@ -186,11 +186,14 @@ struct ip_sf_list { struct ip_mc_list { struct in_device *interface; __be32 multiaddr; + unsigned int sfmode; struct ip_sf_list *sources; struct ip_sf_list *tomb; - unsigned int sfmode; unsigned long sfcount[2]; - struct ip_mc_list *next; + union { + struct ip_mc_list *next; + struct ip_mc_list __rcu *next_rcu; + }; struct timer_list timer; int users; atomic_t refcnt; @@ -201,6 +204,7 @@ struct ip_mc_list { char loaded; unsigned char gsquery; /* check source marks? */ unsigned char crcount; + struct rcu_head rcu; }; /* V3 exponential field decoding */ diff --git a/include/linux/inetdevice.h b/include/linux/inetdevice.h index ccd5b07d678d..380ba6bc5db1 100644 --- a/include/linux/inetdevice.h +++ b/include/linux/inetdevice.h @@ -52,9 +52,8 @@ struct in_device { atomic_t refcnt; int dead; struct in_ifaddr *ifa_list; /* IP ifaddr chain */ - rwlock_t mc_list_lock; - struct ip_mc_list *mc_list; /* IP multicast filter chain */ - int mc_count; /* Number of installed mcasts */ + struct ip_mc_list __rcu *mc_list; /* IP multicast filter chain */ + int mc_count; /* Number of installed mcasts */ spinlock_t mc_tomb_lock; struct ip_mc_list *mc_tomb; unsigned long mr_v1_seen; diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h index 1989cfd7405f..8945f9fb192a 100644 --- a/include/net/inet_sock.h +++ b/include/net/inet_sock.h @@ -141,7 +141,7 @@ struct inet_sock { nodefrag:1; int mc_index; __be32 mc_addr; - struct ip_mc_socklist *mc_list; + struct ip_mc_socklist __rcu *mc_list; struct { unsigned int flags; unsigned int fragsize; diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index 08d0d81ffc15..6f49d6c087da 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -149,11 +149,17 @@ static void ip_mc_clear_src(struct ip_mc_list *pmc); static int ip_mc_add_src(struct in_device *in_dev, __be32 *pmca, int sfmode, int sfcount, __be32 *psfsrc, int delta); + +static void ip_mc_list_reclaim(struct rcu_head *head) +{ + kfree(container_of(head, struct ip_mc_list, rcu)); +} + static void ip_ma_put(struct ip_mc_list *im) { if (atomic_dec_and_test(&im->refcnt)) { in_dev_put(im->interface); - kfree(im); + call_rcu(&im->rcu, ip_mc_list_reclaim); } } @@ -163,7 +169,7 @@ static void ip_ma_put(struct ip_mc_list *im) * Timer management */ -static __inline__ void igmp_stop_timer(struct ip_mc_list *im) +static void igmp_stop_timer(struct ip_mc_list *im) { spin_lock_bh(&im->lock); if (del_timer(&im->timer)) @@ -496,14 +502,24 @@ empty_source: return skb; } +#define for_each_pmc_rcu(in_dev, pmc) \ + for (pmc = rcu_dereference(in_dev->mc_list); \ + pmc != NULL; \ + pmc = rcu_dereference(pmc->next_rcu)) + +#define for_each_pmc_rtnl(in_dev, pmc) \ + for (pmc = rtnl_dereference(in_dev->mc_list); \ + pmc != NULL; \ + pmc = rtnl_dereference(pmc->next_rcu)) + static int igmpv3_send_report(struct in_device *in_dev, struct ip_mc_list *pmc) { struct sk_buff *skb = NULL; int type; if (!pmc) { - read_lock(&in_dev->mc_list_lock); - for (pmc=in_dev->mc_list; pmc; pmc=pmc->next) { + rcu_read_lock(); + for_each_pmc_rcu(in_dev, pmc) { if (pmc->multiaddr == IGMP_ALL_HOSTS) continue; spin_lock_bh(&pmc->lock); @@ -514,7 +530,7 @@ static int igmpv3_send_report(struct in_device *in_dev, struct ip_mc_list *pmc) skb = add_grec(skb, pmc, type, 0, 0); spin_unlock_bh(&pmc->lock); } - read_unlock(&in_dev->mc_list_lock); + rcu_read_unlock(); } else { spin_lock_bh(&pmc->lock); if (pmc->sfcount[MCAST_EXCLUDE]) @@ -556,7 +572,7 @@ static void igmpv3_send_cr(struct in_device *in_dev) struct sk_buff *skb = NULL; int type, dtype; - read_lock(&in_dev->mc_list_lock); + rcu_read_lock(); spin_lock_bh(&in_dev->mc_tomb_lock); /* deleted MCA's */ @@ -593,7 +609,7 @@ static void igmpv3_send_cr(struct in_device *in_dev) spin_unlock_bh(&in_dev->mc_tomb_lock); /* change recs */ - for (pmc=in_dev->mc_list; pmc; pmc=pmc->next) { + for_each_pmc_rcu(in_dev, pmc) { spin_lock_bh(&pmc->lock); if (pmc->sfcount[MCAST_EXCLUDE]) { type = IGMPV3_BLOCK_OLD_SOURCES; @@ -616,7 +632,7 @@ static void igmpv3_send_cr(struct in_device *in_dev) } spin_unlock_bh(&pmc->lock); } - read_unlock(&in_dev->mc_list_lock); + rcu_read_unlock(); if (!skb) return; @@ -813,14 +829,14 @@ static void igmp_heard_report(struct in_device *in_dev, __be32 group) if (group == IGMP_ALL_HOSTS) return; - read_lock(&in_dev->mc_list_lock); - for (im=in_dev->mc_list; im!=NULL; im=im->next) { + rcu_read_lock(); + for_each_pmc_rcu(in_dev, im) { if (im->multiaddr == group) { igmp_stop_timer(im); break; } } - read_unlock(&in_dev->mc_list_lock); + rcu_read_unlock(); } static void igmp_heard_query(struct in_device *in_dev, struct sk_buff *skb, @@ -906,8 +922,8 @@ static void igmp_heard_query(struct in_device *in_dev, struct sk_buff *skb, * - Use the igmp->igmp_code field as the maximum * delay possible */ - read_lock(&in_dev->mc_list_lock); - for (im=in_dev->mc_list; im!=NULL; im=im->next) { + rcu_read_lock(); + for_each_pmc_rcu(in_dev, im) { int changed; if (group && group != im->multiaddr) @@ -925,7 +941,7 @@ static void igmp_heard_query(struct in_device *in_dev, struct sk_buff *skb, if (changed) igmp_mod_timer(im, max_delay); } - read_unlock(&in_dev->mc_list_lock); + rcu_read_unlock(); } /* called in rcu_read_lock() section */ @@ -1110,8 +1126,8 @@ static void igmpv3_clear_delrec(struct in_device *in_dev) kfree(pmc); } /* clear dead sources, too */ - read_lock(&in_dev->mc_list_lock); - for (pmc=in_dev->mc_list; pmc; pmc=pmc->next) { + rcu_read_lock(); + for_each_pmc_rcu(in_dev, pmc) { struct ip_sf_list *psf, *psf_next; spin_lock_bh(&pmc->lock); @@ -1123,7 +1139,7 @@ static void igmpv3_clear_delrec(struct in_device *in_dev) kfree(psf); } } - read_unlock(&in_dev->mc_list_lock); + rcu_read_unlock(); } #endif @@ -1209,7 +1225,7 @@ void ip_mc_inc_group(struct in_device *in_dev, __be32 addr) ASSERT_RTNL(); - for (im=in_dev->mc_list; im; im=im->next) { + for_each_pmc_rtnl(in_dev, im) { if (im->multiaddr == addr) { im->users++; ip_mc_add_src(in_dev, &addr, MCAST_EXCLUDE, 0, NULL, 0); @@ -1217,7 +1233,7 @@ void ip_mc_inc_group(struct in_device *in_dev, __be32 addr) } } - im = kmalloc(sizeof(*im), GFP_KERNEL); + im = kzalloc(sizeof(*im), GFP_KERNEL); if (!im) goto out; @@ -1227,26 +1243,18 @@ void ip_mc_inc_group(struct in_device *in_dev, __be32 addr) im->multiaddr = addr; /* initial mode is (EX, empty) */ im->sfmode = MCAST_EXCLUDE; - im->sfcount[MCAST_INCLUDE] = 0; im->sfcount[MCAST_EXCLUDE] = 1; - im->sources = NULL; - im->tomb = NULL; - im->crcount = 0; atomic_set(&im->refcnt, 1); spin_lock_init(&im->lock); #ifdef CONFIG_IP_MULTICAST - im->tm_running = 0; setup_timer(&im->timer, &igmp_timer_expire, (unsigned long)im); im->unsolicit_count = IGMP_Unsolicited_Report_Count; - im->reporter = 0; - im->gsquery = 0; #endif - im->loaded = 0; - write_lock_bh(&in_dev->mc_list_lock); - im->next = in_dev->mc_list; - in_dev->mc_list = im; + + im->next_rcu = in_dev->mc_list; in_dev->mc_count++; - write_unlock_bh(&in_dev->mc_list_lock); + rcu_assign_pointer(in_dev->mc_list, im); + #ifdef CONFIG_IP_MULTICAST igmpv3_del_delrec(in_dev, im->multiaddr); #endif @@ -1287,17 +1295,18 @@ EXPORT_SYMBOL(ip_mc_rejoin_group); void ip_mc_dec_group(struct in_device *in_dev, __be32 addr) { - struct ip_mc_list *i, **ip; + struct ip_mc_list *i; + struct ip_mc_list __rcu **ip; ASSERT_RTNL(); - for (ip=&in_dev->mc_list; (i=*ip)!=NULL; ip=&i->next) { + for (ip = &in_dev->mc_list; + (i = rtnl_dereference(*ip)) != NULL; + ip = &i->next_rcu) { if (i->multiaddr == addr) { if (--i->users == 0) { - write_lock_bh(&in_dev->mc_list_lock); - *ip = i->next; + *ip = i->next_rcu; in_dev->mc_count--; - write_unlock_bh(&in_dev->mc_list_lock); igmp_group_dropped(i); if (!in_dev->dead) @@ -1316,34 +1325,34 @@ EXPORT_SYMBOL(ip_mc_dec_group); void ip_mc_unmap(struct in_device *in_dev) { - struct ip_mc_list *i; + struct ip_mc_list *pmc; ASSERT_RTNL(); - for (i = in_dev->mc_list; i; i = i->next) - igmp_group_dropped(i); + for_each_pmc_rtnl(in_dev, pmc) + igmp_group_dropped(pmc); } void ip_mc_remap(struct in_device *in_dev) { - struct ip_mc_list *i; + struct ip_mc_list *pmc; ASSERT_RTNL(); - for (i = in_dev->mc_list; i; i = i->next) - igmp_group_added(i); + for_each_pmc_rtnl(in_dev, pmc) + igmp_group_added(pmc); } /* Device going down */ void ip_mc_down(struct in_device *in_dev) { - struct ip_mc_list *i; + struct ip_mc_list *pmc; ASSERT_RTNL(); - for (i=in_dev->mc_list; i; i=i->next) - igmp_group_dropped(i); + for_each_pmc_rtnl(in_dev, pmc) + igmp_group_dropped(pmc); #ifdef CONFIG_IP_MULTICAST in_dev->mr_ifc_count = 0; @@ -1374,7 +1383,6 @@ void ip_mc_init_dev(struct in_device *in_dev) in_dev->mr_qrv = IGMP_Unsolicited_Report_Count; #endif - rwlock_init(&in_dev->mc_list_lock); spin_lock_init(&in_dev->mc_tomb_lock); } @@ -1382,14 +1390,14 @@ void ip_mc_init_dev(struct in_device *in_dev) void ip_mc_up(struct in_device *in_dev) { - struct ip_mc_list *i; + struct ip_mc_list *pmc; ASSERT_RTNL(); ip_mc_inc_group(in_dev, IGMP_ALL_HOSTS); - for (i=in_dev->mc_list; i; i=i->next) - igmp_group_added(i); + for_each_pmc_rtnl(in_dev, pmc) + igmp_group_added(pmc); } /* @@ -1405,17 +1413,13 @@ void ip_mc_destroy_dev(struct in_device *in_dev) /* Deactivate timers */ ip_mc_down(in_dev); - write_lock_bh(&in_dev->mc_list_lock); - while ((i = in_dev->mc_list) != NULL) { - in_dev->mc_list = i->next; + while ((i = rtnl_dereference(in_dev->mc_list)) != NULL) { + in_dev->mc_list = i->next_rcu; in_dev->mc_count--; - write_unlock_bh(&in_dev->mc_list_lock); + igmp_group_dropped(i); ip_ma_put(i); - - write_lock_bh(&in_dev->mc_list_lock); } - write_unlock_bh(&in_dev->mc_list_lock); } /* RTNL is locked */ @@ -1513,18 +1517,18 @@ static int ip_mc_del_src(struct in_device *in_dev, __be32 *pmca, int sfmode, if (!in_dev) return -ENODEV; - read_lock(&in_dev->mc_list_lock); - for (pmc=in_dev->mc_list; pmc; pmc=pmc->next) { + rcu_read_lock(); + for_each_pmc_rcu(in_dev, pmc) { if (*pmca == pmc->multiaddr) break; } if (!pmc) { /* MCA not found?? bug */ - read_unlock(&in_dev->mc_list_lock); + rcu_read_unlock(); return -ESRCH; } spin_lock_bh(&pmc->lock); - read_unlock(&in_dev->mc_list_lock); + rcu_read_unlock(); #ifdef CONFIG_IP_MULTICAST sf_markstate(pmc); #endif @@ -1685,18 +1689,18 @@ static int ip_mc_add_src(struct in_device *in_dev, __be32 *pmca, int sfmode, if (!in_dev) return -ENODEV; - read_lock(&in_dev->mc_list_lock); - for (pmc=in_dev->mc_list; pmc; pmc=pmc->next) { + rcu_read_lock(); + for_each_pmc_rcu(in_dev, pmc) { if (*pmca == pmc->multiaddr) break; } if (!pmc) { /* MCA not found?? bug */ - read_unlock(&in_dev->mc_list_lock); + rcu_read_unlock(); return -ESRCH; } spin_lock_bh(&pmc->lock); - read_unlock(&in_dev->mc_list_lock); + rcu_read_unlock(); #ifdef CONFIG_IP_MULTICAST sf_markstate(pmc); @@ -1793,7 +1797,7 @@ int ip_mc_join_group(struct sock *sk , struct ip_mreqn *imr) err = -EADDRINUSE; ifindex = imr->imr_ifindex; - for (i = inet->mc_list; i; i = i->next) { + for_each_pmc_rtnl(inet, i) { if (i->multi.imr_multiaddr.s_addr == addr && i->multi.imr_ifindex == ifindex) goto done; @@ -1807,7 +1811,7 @@ int ip_mc_join_group(struct sock *sk , struct ip_mreqn *imr) goto done; memcpy(&iml->multi, imr, sizeof(*imr)); - iml->next = inet->mc_list; + iml->next_rcu = inet->mc_list; iml->sflist = NULL; iml->sfmode = MCAST_EXCLUDE; rcu_assign_pointer(inet->mc_list, iml); @@ -1821,17 +1825,14 @@ EXPORT_SYMBOL(ip_mc_join_group); static void ip_sf_socklist_reclaim(struct rcu_head *rp) { - struct ip_sf_socklist *psf; - - psf = container_of(rp, struct ip_sf_socklist, rcu); + kfree(container_of(rp, struct ip_sf_socklist, rcu)); /* sk_omem_alloc should have been decreased by the caller*/ - kfree(psf); } static int ip_mc_leave_src(struct sock *sk, struct ip_mc_socklist *iml, struct in_device *in_dev) { - struct ip_sf_socklist *psf = iml->sflist; + struct ip_sf_socklist *psf = rtnl_dereference(iml->sflist); int err; if (psf == NULL) { @@ -1851,11 +1852,8 @@ static int ip_mc_leave_src(struct sock *sk, struct ip_mc_socklist *iml, static void ip_mc_socklist_reclaim(struct rcu_head *rp) { - struct ip_mc_socklist *iml; - - iml = container_of(rp, struct ip_mc_socklist, rcu); + kfree(container_of(rp, struct ip_mc_socklist, rcu)); /* sk_omem_alloc should have been decreased by the caller*/ - kfree(iml); } @@ -1866,7 +1864,8 @@ static void ip_mc_socklist_reclaim(struct rcu_head *rp) int ip_mc_leave_group(struct sock *sk, struct ip_mreqn *imr) { struct inet_sock *inet = inet_sk(sk); - struct ip_mc_socklist *iml, **imlp; + struct ip_mc_socklist *iml; + struct ip_mc_socklist __rcu **imlp; struct in_device *in_dev; struct net *net = sock_net(sk); __be32 group = imr->imr_multiaddr.s_addr; @@ -1876,7 +1875,9 @@ int ip_mc_leave_group(struct sock *sk, struct ip_mreqn *imr) rtnl_lock(); in_dev = ip_mc_find_dev(net, imr); ifindex = imr->imr_ifindex; - for (imlp = &inet->mc_list; (iml = *imlp) != NULL; imlp = &iml->next) { + for (imlp = &inet->mc_list; + (iml = rtnl_dereference(*imlp)) != NULL; + imlp = &iml->next_rcu) { if (iml->multi.imr_multiaddr.s_addr != group) continue; if (ifindex) { @@ -1888,7 +1889,7 @@ int ip_mc_leave_group(struct sock *sk, struct ip_mreqn *imr) (void) ip_mc_leave_src(sk, iml, in_dev); - rcu_assign_pointer(*imlp, iml->next); + *imlp = iml->next_rcu; if (in_dev) ip_mc_dec_group(in_dev, group); @@ -1934,7 +1935,7 @@ int ip_mc_source(int add, int omode, struct sock *sk, struct } err = -EADDRNOTAVAIL; - for (pmc=inet->mc_list; pmc; pmc=pmc->next) { + for_each_pmc_rtnl(inet, pmc) { if ((pmc->multi.imr_multiaddr.s_addr == imr.imr_multiaddr.s_addr) && (pmc->multi.imr_ifindex == imr.imr_ifindex)) @@ -1958,7 +1959,7 @@ int ip_mc_source(int add, int omode, struct sock *sk, struct pmc->sfmode = omode; } - psl = pmc->sflist; + psl = rtnl_dereference(pmc->sflist); if (!add) { if (!psl) goto done; /* err = -EADDRNOTAVAIL */ @@ -2077,7 +2078,7 @@ int ip_mc_msfilter(struct sock *sk, struct ip_msfilter *msf, int ifindex) goto done; } - for (pmc=inet->mc_list; pmc; pmc=pmc->next) { + for_each_pmc_rtnl(inet, pmc) { if (pmc->multi.imr_multiaddr.s_addr == msf->imsf_multiaddr && pmc->multi.imr_ifindex == imr.imr_ifindex) break; @@ -2107,7 +2108,7 @@ int ip_mc_msfilter(struct sock *sk, struct ip_msfilter *msf, int ifindex) (void) ip_mc_add_src(in_dev, &msf->imsf_multiaddr, msf->imsf_fmode, 0, NULL, 0); } - psl = pmc->sflist; + psl = rtnl_dereference(pmc->sflist); if (psl) { (void) ip_mc_del_src(in_dev, &msf->imsf_multiaddr, pmc->sfmode, psl->sl_count, psl->sl_addr, 0); @@ -2155,7 +2156,7 @@ int ip_mc_msfget(struct sock *sk, struct ip_msfilter *msf, } err = -EADDRNOTAVAIL; - for (pmc=inet->mc_list; pmc; pmc=pmc->next) { + for_each_pmc_rtnl(inet, pmc) { if (pmc->multi.imr_multiaddr.s_addr == msf->imsf_multiaddr && pmc->multi.imr_ifindex == imr.imr_ifindex) break; @@ -2163,7 +2164,7 @@ int ip_mc_msfget(struct sock *sk, struct ip_msfilter *msf, if (!pmc) /* must have a prior join */ goto done; msf->imsf_fmode = pmc->sfmode; - psl = pmc->sflist; + psl = rtnl_dereference(pmc->sflist); rtnl_unlock(); if (!psl) { len = 0; @@ -2208,7 +2209,7 @@ int ip_mc_gsfget(struct sock *sk, struct group_filter *gsf, err = -EADDRNOTAVAIL; - for (pmc=inet->mc_list; pmc; pmc=pmc->next) { + for_each_pmc_rtnl(inet, pmc) { if (pmc->multi.imr_multiaddr.s_addr == addr && pmc->multi.imr_ifindex == gsf->gf_interface) break; @@ -2216,7 +2217,7 @@ int ip_mc_gsfget(struct sock *sk, struct group_filter *gsf, if (!pmc) /* must have a prior join */ goto done; gsf->gf_fmode = pmc->sfmode; - psl = pmc->sflist; + psl = rtnl_dereference(pmc->sflist); rtnl_unlock(); count = psl ? psl->sl_count : 0; copycount = count < gsf->gf_numsrc ? count : gsf->gf_numsrc; @@ -2257,7 +2258,7 @@ int ip_mc_sf_allow(struct sock *sk, __be32 loc_addr, __be32 rmt_addr, int dif) goto out; rcu_read_lock(); - for (pmc=rcu_dereference(inet->mc_list); pmc; pmc=rcu_dereference(pmc->next)) { + for_each_pmc_rcu(inet, pmc) { if (pmc->multi.imr_multiaddr.s_addr == loc_addr && pmc->multi.imr_ifindex == dif) break; @@ -2265,7 +2266,7 @@ int ip_mc_sf_allow(struct sock *sk, __be32 loc_addr, __be32 rmt_addr, int dif) ret = inet->mc_all; if (!pmc) goto unlock; - psl = pmc->sflist; + psl = rcu_dereference(pmc->sflist); ret = (pmc->sfmode == MCAST_EXCLUDE); if (!psl) goto unlock; @@ -2300,10 +2301,10 @@ void ip_mc_drop_socket(struct sock *sk) return; rtnl_lock(); - while ((iml = inet->mc_list) != NULL) { + while ((iml = rtnl_dereference(inet->mc_list)) != NULL) { struct in_device *in_dev; - rcu_assign_pointer(inet->mc_list, iml->next); + inet->mc_list = iml->next_rcu; in_dev = inetdev_by_index(net, iml->multi.imr_ifindex); (void) ip_mc_leave_src(sk, iml, in_dev); if (in_dev != NULL) { @@ -2323,8 +2324,8 @@ int ip_check_mc(struct in_device *in_dev, __be32 mc_addr, __be32 src_addr, u16 p struct ip_sf_list *psf; int rv = 0; - read_lock(&in_dev->mc_list_lock); - for (im=in_dev->mc_list; im; im=im->next) { + rcu_read_lock(); + for_each_pmc_rcu(in_dev, im) { if (im->multiaddr == mc_addr) break; } @@ -2345,7 +2346,7 @@ int ip_check_mc(struct in_device *in_dev, __be32 mc_addr, __be32 src_addr, u16 p } else rv = 1; /* unspecified source; tentatively allow */ } - read_unlock(&in_dev->mc_list_lock); + rcu_read_unlock(); return rv; } @@ -2371,13 +2372,11 @@ static inline struct ip_mc_list *igmp_mc_get_first(struct seq_file *seq) in_dev = __in_dev_get_rcu(state->dev); if (!in_dev) continue; - read_lock(&in_dev->mc_list_lock); - im = in_dev->mc_list; + im = rcu_dereference(in_dev->mc_list); if (im) { state->in_dev = in_dev; break; } - read_unlock(&in_dev->mc_list_lock); } return im; } @@ -2385,11 +2384,9 @@ static inline struct ip_mc_list *igmp_mc_get_first(struct seq_file *seq) static struct ip_mc_list *igmp_mc_get_next(struct seq_file *seq, struct ip_mc_list *im) { struct igmp_mc_iter_state *state = igmp_mc_seq_private(seq); - im = im->next; - while (!im) { - if (likely(state->in_dev != NULL)) - read_unlock(&state->in_dev->mc_list_lock); + im = rcu_dereference(im->next_rcu); + while (!im) { state->dev = next_net_device_rcu(state->dev); if (!state->dev) { state->in_dev = NULL; @@ -2398,8 +2395,7 @@ static struct ip_mc_list *igmp_mc_get_next(struct seq_file *seq, struct ip_mc_li state->in_dev = __in_dev_get_rcu(state->dev); if (!state->in_dev) continue; - read_lock(&state->in_dev->mc_list_lock); - im = state->in_dev->mc_list; + im = rcu_dereference(state->in_dev->mc_list); } return im; } @@ -2435,10 +2431,8 @@ static void igmp_mc_seq_stop(struct seq_file *seq, void *v) __releases(rcu) { struct igmp_mc_iter_state *state = igmp_mc_seq_private(seq); - if (likely(state->in_dev != NULL)) { - read_unlock(&state->in_dev->mc_list_lock); - state->in_dev = NULL; - } + + state->in_dev = NULL; state->dev = NULL; rcu_read_unlock(); } @@ -2460,7 +2454,7 @@ static int igmp_mc_seq_show(struct seq_file *seq, void *v) querier = "NONE"; #endif - if (state->in_dev->mc_list == im) { + if (rcu_dereference(state->in_dev->mc_list) == im) { seq_printf(seq, "%d\t%-10s: %5d %7s\n", state->dev->ifindex, state->dev->name, state->in_dev->mc_count, querier); } @@ -2519,8 +2513,7 @@ static inline struct ip_sf_list *igmp_mcf_get_first(struct seq_file *seq) idev = __in_dev_get_rcu(state->dev); if (unlikely(idev == NULL)) continue; - read_lock(&idev->mc_list_lock); - im = idev->mc_list; + im = rcu_dereference(idev->mc_list); if (likely(im != NULL)) { spin_lock_bh(&im->lock); psf = im->sources; @@ -2531,7 +2524,6 @@ static inline struct ip_sf_list *igmp_mcf_get_first(struct seq_file *seq) } spin_unlock_bh(&im->lock); } - read_unlock(&idev->mc_list_lock); } return psf; } @@ -2545,9 +2537,6 @@ static struct ip_sf_list *igmp_mcf_get_next(struct seq_file *seq, struct ip_sf_l spin_unlock_bh(&state->im->lock); state->im = state->im->next; while (!state->im) { - if (likely(state->idev != NULL)) - read_unlock(&state->idev->mc_list_lock); - state->dev = next_net_device_rcu(state->dev); if (!state->dev) { state->idev = NULL; @@ -2556,8 +2545,7 @@ static struct ip_sf_list *igmp_mcf_get_next(struct seq_file *seq, struct ip_sf_l state->idev = __in_dev_get_rcu(state->dev); if (!state->idev) continue; - read_lock(&state->idev->mc_list_lock); - state->im = state->idev->mc_list; + state->im = rcu_dereference(state->idev->mc_list); } if (!state->im) break; @@ -2603,10 +2591,7 @@ static void igmp_mcf_seq_stop(struct seq_file *seq, void *v) spin_unlock_bh(&state->im->lock); state->im = NULL; } - if (likely(state->idev != NULL)) { - read_unlock(&state->idev->mc_list_lock); - state->idev = NULL; - } + state->idev = NULL; state->dev = NULL; rcu_read_unlock(); } -- cgit v1.2.3-71-gd317 From 9fbbdde93231ad7f35c217aa6bbbc7995133f483 Mon Sep 17 00:00:00 2001 From: Erik Gilling Date: Thu, 11 Nov 2010 15:44:43 +0100 Subject: video: add fb_edid_add_monspecs for parsing extended edid information Modern monitors/tvs have more extended EDID information blocks which can contain extra detailed modes. This adds a fb_edid_add_monspecs function which drivers can use to parse those additions blocks. Signed-off-by: Erik Gilling Signed-off-by: Guennadi Liakhovetski Signed-off-by: Paul Mundt --- drivers/video/fbmon.c | 57 +++++++++++++++++++++++++++++++++++++++++++++++++++ include/linux/fb.h | 2 ++ 2 files changed, 59 insertions(+) (limited to 'include/linux') diff --git a/drivers/video/fbmon.c b/drivers/video/fbmon.c index 563a98b88e9b..a0b5a93b72d2 100644 --- a/drivers/video/fbmon.c +++ b/drivers/video/fbmon.c @@ -973,6 +973,63 @@ void fb_edid_to_monspecs(unsigned char *edid, struct fb_monspecs *specs) DPRINTK("========================================\n"); } +void fb_edid_add_monspecs(unsigned char *edid, struct fb_monspecs *specs) +{ + unsigned char *block; + struct fb_videomode *mode, *m; + int num = 0, i, first = 1; + + if (edid == NULL) + return; + + if (!edid_checksum(edid)) + return; + + if (edid[0] != 0x2) + return; + + mode = kzalloc(50 * sizeof(struct fb_videomode), GFP_KERNEL); + if (mode == NULL) + return; + + block = edid + edid[0x2]; + + DPRINTK(" Extended Detailed Timings\n"); + + for (i = 0; i < (128 - edid[0x2]) / DETAILED_TIMING_DESCRIPTION_SIZE; + i++, block += DETAILED_TIMING_DESCRIPTION_SIZE) { + if (!(block[0] == 0x00 && block[1] == 0x00)) { + get_detailed_timing(block, &mode[num]); + if (first) { + mode[num].flag |= FB_MODE_IS_FIRST; + first = 0; + } + num++; + } + } + + /* Yikes, EDID data is totally useless */ + if (!num) { + kfree(mode); + return; + } + + m = kzalloc((specs->modedb_len + num) * + sizeof(struct fb_videomode), GFP_KERNEL); + + if (!m) { + kfree(mode); + return; + } + + memmove(m, specs->modedb, specs->modedb_len * sizeof(struct fb_videomode)); + memmove(m + specs->modedb_len, mode, num * sizeof(struct fb_videomode)); + kfree(mode); + kfree(specs->modedb); + specs->modedb = m; + specs->modedb_len = specs->modedb_len + num; +} + /* * VESA Generalized Timing Formula (GTF) */ diff --git a/include/linux/fb.h b/include/linux/fb.h index 7fca3dc4e475..6f0274d96f0c 100644 --- a/include/linux/fb.h +++ b/include/linux/fb.h @@ -1092,6 +1092,8 @@ extern int fb_parse_edid(unsigned char *edid, struct fb_var_screeninfo *var); extern const unsigned char *fb_firmware_edid(struct device *device); extern void fb_edid_to_monspecs(unsigned char *edid, struct fb_monspecs *specs); +extern void fb_edid_add_monspecs(unsigned char *edid, + struct fb_monspecs *specs); extern void fb_destroy_modedb(struct fb_videomode *modedb); extern int fb_find_mode_cvt(struct fb_videomode *mode, int margins, int rb); extern unsigned char *fb_ddc_read(struct i2c_adapter *adapter); -- cgit v1.2.3-71-gd317 From 0ad83f6882c41df1a7fa387086029e162038c1f2 Mon Sep 17 00:00:00 2001 From: Guennadi Liakhovetski Date: Thu, 11 Nov 2010 15:45:04 +0100 Subject: fbdev: when parsing E-EDID blocks, also use SVD entries Add parsing of E-EDID SVD entries. In this first version only a few CEA/EIA-861E modes are implemented, more can be added as needed. Signed-off-by: Guennadi Liakhovetski Signed-off-by: Paul Mundt --- drivers/video/fbmon.c | 37 +++++++++++++++++++++++++++++++++---- drivers/video/modedb.c | 43 +++++++++++++++++++++++++++++++++++++++++++ include/linux/fb.h | 1 + 3 files changed, 77 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/drivers/video/fbmon.c b/drivers/video/fbmon.c index b25399abcf49..4f57485f8c54 100644 --- a/drivers/video/fbmon.c +++ b/drivers/video/fbmon.c @@ -983,7 +983,8 @@ void fb_edid_add_monspecs(unsigned char *edid, struct fb_monspecs *specs) unsigned char *block; struct fb_videomode *m; int num = 0, i; - u8 edt[(128 - 4) / DETAILED_TIMING_DESCRIPTION_SIZE]; + u8 svd[64], edt[(128 - 4) / DETAILED_TIMING_DESCRIPTION_SIZE]; + u8 pos = 4, svd_n = 0; if (!edid) return; @@ -995,6 +996,21 @@ void fb_edid_add_monspecs(unsigned char *edid, struct fb_monspecs *specs) edid[2] < 4 || edid[2] > 128 - DETAILED_TIMING_DESCRIPTION_SIZE) return; + DPRINTK(" Short Video Descriptors\n"); + + while (pos < edid[2]) { + u8 len = edid[pos] & 0x1f, type = (edid[pos] >> 5) & 7; + pr_debug("Data block %u of %u bytes\n", type, len); + if (type == 2) + for (i = pos; i < pos + len; i++) { + u8 idx = edid[pos + i] & 0x7f; + svd[svd_n++] = idx; + pr_debug("N%sative mode #%d\n", + edid[pos + i] & 0x80 ? "" : "on-n", idx); + } + pos += len + 1; + } + block = edid + edid[2]; DPRINTK(" Extended Detailed Timings\n"); @@ -1005,10 +1021,10 @@ void fb_edid_add_monspecs(unsigned char *edid, struct fb_monspecs *specs) edt[num++] = block - edid; /* Yikes, EDID data is totally useless */ - if (!num) + if (!(num + svd_n)) return; - m = kzalloc((specs->modedb_len + num) * + m = kzalloc((specs->modedb_len + num + svd_n) * sizeof(struct fb_videomode), GFP_KERNEL); if (!m) @@ -1023,9 +1039,22 @@ void fb_edid_add_monspecs(unsigned char *edid, struct fb_monspecs *specs) pr_debug("Adding %ux%u@%u\n", m[i].xres, m[i].yres, m[i].refresh); } + for (i = specs->modedb_len + num; i < specs->modedb_len + num + svd_n; i++) { + int idx = svd[i - specs->modedb_len - num]; + if (!idx || idx > 63) { + pr_warning("Reserved SVD code %d\n", idx); + } else if (idx > ARRAY_SIZE(cea_modes) || !cea_modes[idx].xres) { + pr_warning("Unimplemented SVD code %d\n", idx); + } else { + memcpy(&m[i], cea_modes + idx, sizeof(m[i])); + pr_debug("Adding SVD #%d: %ux%u@%u\n", idx, + m[i].xres, m[i].yres, m[i].refresh); + } + } + kfree(specs->modedb); specs->modedb = m; - specs->modedb_len = specs->modedb_len + num; + specs->modedb_len = specs->modedb_len + num + svd_n; } /* diff --git a/drivers/video/modedb.c b/drivers/video/modedb.c index 0a4dbdc1693a..9a0ae6ca5427 100644 --- a/drivers/video/modedb.c +++ b/drivers/video/modedb.c @@ -278,6 +278,49 @@ static const struct fb_videomode modedb[] = { }; #ifdef CONFIG_FB_MODE_HELPERS +const struct fb_videomode cea_modes[64] = { + /* #1: 640x480p@59.94/60Hz */ + [1] = { + NULL, 60, 640, 480, 39722, 48, 16, 33, 10, 96, 2, 0, FB_VMODE_NONINTERLACED, 0, + }, + /* #3: 720x480p@59.94/60Hz */ + [3] = { + NULL, 60, 720, 480, 37037, 60, 16, 30, 9, 62, 6, 0, FB_VMODE_NONINTERLACED, 0, + }, + /* #5: 1920x1080i@59.94/60Hz */ + [5] = { + NULL, 60, 1920, 1080, 13763, 148, 88, 15, 2, 44, 5, 0, FB_VMODE_INTERLACED, 0, + }, + /* #7: 720(1440)x480iH@59.94/60Hz */ + [7] = { + NULL, 60, 1440, 480, 18554/*37108*/, 114, 38, 15, 4, 124, 3, 0, FB_VMODE_INTERLACED, 0, + }, + /* #9: 720(1440)x240pH@59.94/60Hz */ + [9] = { + NULL, 60, 1440, 240, 18554, 114, 38, 16, 4, 124, 3, 0, FB_VMODE_NONINTERLACED, 0, + }, + /* #18: 720x576pH@50Hz */ + [18] = { + NULL, 50, 720, 576, 37037, 68, 12, 39, 5, 64, 5, 0, FB_VMODE_NONINTERLACED, 0, + }, + /* #19: 1280x720p@50Hz */ + [19] = { + NULL, 50, 1280, 720, 13468, 220, 440, 20, 5, 40, 5, 0, FB_VMODE_NONINTERLACED, 0, + }, + /* #20: 1920x1080i@50Hz */ + [20] = { + NULL, 50, 1920, 1080, 13480, 148, 528, 15, 5, 528, 5, 0, FB_VMODE_INTERLACED, 0, + }, + /* #32: 1920x1080p@23.98/24Hz */ + [32] = { + NULL, 24, 1920, 1080, 13468, 148, 638, 36, 4, 44, 5, 0, FB_VMODE_NONINTERLACED, 0, + }, + /* #35: (2880)x480p4x@59.94/60Hz */ + [35] = { + NULL, 50, 2880, 480, 11100, 240, 64, 30, 9, 248, 6, 0, FB_VMODE_NONINTERLACED, 0, + }, +}; + const struct fb_videomode vesa_modes[] = { /* 0 640x350-85 VESA */ { NULL, 85, 640, 350, 31746, 96, 32, 60, 32, 64, 3, diff --git a/include/linux/fb.h b/include/linux/fb.h index 6f0274d96f0c..e154a79b8322 100644 --- a/include/linux/fb.h +++ b/include/linux/fb.h @@ -1151,6 +1151,7 @@ struct fb_videomode { extern const char *fb_mode_option; extern const struct fb_videomode vesa_modes[]; +extern const struct fb_videomode cea_modes[64]; struct fb_modelist { struct list_head list; -- cgit v1.2.3-71-gd317 From d83447f0944e73d690218d79c07762ffa4ceb9e4 Mon Sep 17 00:00:00 2001 From: Gerrit Renker Date: Sun, 14 Nov 2010 17:25:46 +0100 Subject: dccp ccid-2: Schedule Sync as out-of-band mechanism The problem with Ack Vectors is that i) their length is variable and can in principle grow quite large, ii) it is hard to predict exactly how large they will be. Due to the second point it seems not a good idea to reduce the MPS; in particular when on average there is enough room for the Ack Vector and an increase in length is momentarily due to some burst loss, after which the Ack Vector returns to its normal/average length. The solution taken by this patch is to subtract a minimum-expected Ack Vector length from the MPS, and to defer any larger Ack Vectors onto a separate Sync - but only if indeed there is no space left on the skb. This patch provides the infrastructure to schedule Sync-packets for transporting (urgent) out-of-band data. Its signalling is quicker than scheduling an Ack, since it does not need to wait for new application data. Signed-off-by: Gerrit Renker --- include/linux/dccp.h | 2 ++ net/dccp/options.c | 24 ++++++++++++++++++++---- net/dccp/output.c | 15 +++++++++++++++ 3 files changed, 37 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/dccp.h b/include/linux/dccp.h index 749f01ccd26e..eed52bcd35d0 100644 --- a/include/linux/dccp.h +++ b/include/linux/dccp.h @@ -462,6 +462,7 @@ struct dccp_ackvec; * @dccps_hc_rx_insert_options - receiver wants to add options when acking * @dccps_hc_tx_insert_options - sender wants to add options when sending * @dccps_server_timewait - server holds timewait state on close (RFC 4340, 8.3) + * @dccps_sync_scheduled - flag which signals "send out-of-band message soon" * @dccps_xmitlet - tasklet scheduled by the TX CCID to dequeue data packets * @dccps_xmit_timer - used by the TX CCID to delay sending (rate-based pacing) * @dccps_syn_rtt - RTT sample from Request/Response exchange (in usecs) @@ -503,6 +504,7 @@ struct dccp_sock { __u8 dccps_hc_rx_insert_options:1; __u8 dccps_hc_tx_insert_options:1; __u8 dccps_server_timewait:1; + __u8 dccps_sync_scheduled:1; struct tasklet_struct dccps_xmitlet; struct timer_list dccps_xmit_timer; }; diff --git a/net/dccp/options.c b/net/dccp/options.c index 7743df00f5b1..dabd6ee34d45 100644 --- a/net/dccp/options.c +++ b/net/dccp/options.c @@ -427,6 +427,7 @@ static int dccp_insert_option_ackvec(struct sock *sk, struct sk_buff *skb) { struct dccp_sock *dp = dccp_sk(sk); struct dccp_ackvec *av = dp->dccps_hc_rx_ackvec; + struct dccp_skb_cb *dcb = DCCP_SKB_CB(skb); const u16 buflen = dccp_ackvec_buflen(av); /* Figure out how many options do we need to represent the ackvec */ const u8 nr_opts = DIV_ROUND_UP(buflen, DCCP_SINGLE_OPT_MAXLEN); @@ -435,10 +436,25 @@ static int dccp_insert_option_ackvec(struct sock *sk, struct sk_buff *skb) const unsigned char *tail, *from; unsigned char *to; - if (DCCP_SKB_CB(skb)->dccpd_opt_len + len > DCCP_MAX_OPT_LEN) + if (dcb->dccpd_opt_len + len > DCCP_MAX_OPT_LEN) { + DCCP_WARN("Lacking space for %u bytes on %s packet\n", len, + dccp_packet_name(dcb->dccpd_type)); return -1; - - DCCP_SKB_CB(skb)->dccpd_opt_len += len; + } + /* + * Since Ack Vectors are variable-length, we can not always predict + * their size. To catch exception cases where the space is running out + * on the skb, a separate Sync is scheduled to carry the Ack Vector. + */ + if (len > DCCPAV_MIN_OPTLEN && + len + dcb->dccpd_opt_len + skb->len > dp->dccps_mss_cache) { + DCCP_WARN("No space left for Ack Vector (%u) on skb (%u+%u), " + "MPS=%u ==> reduce payload size?\n", len, skb->len, + dcb->dccpd_opt_len, dp->dccps_mss_cache); + dp->dccps_sync_scheduled = 1; + return 0; + } + dcb->dccpd_opt_len += len; to = skb_push(skb, len); len = buflen; @@ -479,7 +495,7 @@ static int dccp_insert_option_ackvec(struct sock *sk, struct sk_buff *skb) /* * Each sent Ack Vector is recorded in the list, as per A.2 of RFC 4340. */ - if (dccp_ackvec_update_records(av, DCCP_SKB_CB(skb)->dccpd_seq, nonce)) + if (dccp_ackvec_update_records(av, dcb->dccpd_seq, nonce)) return -ENOBUFS; return 0; } diff --git a/net/dccp/output.c b/net/dccp/output.c index 45b91853f5ae..d96dd9d362ae 100644 --- a/net/dccp/output.c +++ b/net/dccp/output.c @@ -283,6 +283,15 @@ static void dccp_xmit_packet(struct sock *sk) * any local drop will eventually be reported via receiver feedback. */ ccid_hc_tx_packet_sent(dp->dccps_hc_tx_ccid, sk, len); + + /* + * If the CCID needs to transfer additional header options out-of-band + * (e.g. Ack Vectors or feature-negotiation options), it activates this + * flag to schedule a Sync. The Sync will automatically incorporate all + * currently pending header options, thus clearing the backlog. + */ + if (dp->dccps_sync_scheduled) + dccp_send_sync(sk, dp->dccps_gsr, DCCP_PKT_SYNC); } /** @@ -636,6 +645,12 @@ void dccp_send_sync(struct sock *sk, const u64 ackno, DCCP_SKB_CB(skb)->dccpd_type = pkt_type; DCCP_SKB_CB(skb)->dccpd_ack_seq = ackno; + /* + * Clear the flag in case the Sync was scheduled for out-of-band data, + * such as carrying a long Ack Vector. + */ + dccp_sk(sk)->dccps_sync_scheduled = 0; + dccp_transmit_skb(sk, skb); } -- cgit v1.2.3-71-gd317 From 58e998c6d23988490162cef0784b19ea274d90bb Mon Sep 17 00:00:00 2001 From: Jesse Gross Date: Fri, 29 Oct 2010 12:14:55 +0000 Subject: offloading: Force software GSO for multiple vlan tags. We currently use vlan_features to check for TSO support if there is a vlan tag. However, it's quite likely that the NIC is not able to do TSO when there is an arbitrary number of tags. Therefore if there is more than one tag (in-band or out-of-band), fall back to software emulation. Signed-off-by: Jesse Gross CC: Ben Hutchings Signed-off-by: David S. Miller --- include/linux/netdevice.h | 7 +++---- net/core/dev.c | 16 ++++++++++++++++ 2 files changed, 19 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 578debb801f4..6e4cfbc53d4c 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2239,6 +2239,8 @@ unsigned long netdev_fix_features(unsigned long features, const char *name); void netif_stacked_transfer_operstate(const struct net_device *rootdev, struct net_device *dev); +int netif_get_vlan_features(struct sk_buff *skb, struct net_device *dev); + static inline int net_gso_ok(int features, int gso_type) { int feature = gso_type << NETIF_F_GSO_SHIFT; @@ -2254,10 +2256,7 @@ static inline int skb_gso_ok(struct sk_buff *skb, int features) static inline int netif_needs_gso(struct net_device *dev, struct sk_buff *skb) { if (skb_is_gso(skb)) { - int features = dev->features; - - if (skb->protocol == htons(ETH_P_8021Q) || skb->vlan_tci) - features &= dev->vlan_features; + int features = netif_get_vlan_features(skb, dev); return (!skb_gso_ok(skb, features) || unlikely(skb->ip_summed != CHECKSUM_PARTIAL)); diff --git a/net/core/dev.c b/net/core/dev.c index 368930a988e3..8b500c3e0297 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1968,6 +1968,22 @@ static inline void skb_orphan_try(struct sk_buff *skb) } } +int netif_get_vlan_features(struct sk_buff *skb, struct net_device *dev) +{ + __be16 protocol = skb->protocol; + + if (protocol == htons(ETH_P_8021Q)) { + struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data; + protocol = veh->h_vlan_encapsulated_proto; + } else if (!skb->vlan_tci) + return dev->features; + + if (protocol != htons(ETH_P_8021Q)) + return dev->features & dev->vlan_features; + else + return 0; +} + /* * Returns true if either: * 1. skb has frag_list and the device doesn't support FRAGLIST, or -- cgit v1.2.3-71-gd317 From 2e48928d8a0f38c1b5c81eb3f1294de8a6382c68 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Wed, 20 Oct 2010 10:16:58 -0700 Subject: rfkill: remove dead code The following code is defined but never used. Signed-off-by: Stephen Hemminger Signed-off-by: John W. Linville --- include/linux/rfkill.h | 31 ------------------------------- net/rfkill/core.c | 14 -------------- 2 files changed, 45 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rfkill.h b/include/linux/rfkill.h index 08c32e4f261a..c6c608482cba 100644 --- a/include/linux/rfkill.h +++ b/include/linux/rfkill.h @@ -354,37 +354,6 @@ static inline bool rfkill_blocked(struct rfkill *rfkill) } #endif /* RFKILL || RFKILL_MODULE */ - -#ifdef CONFIG_RFKILL_LEDS -/** - * rfkill_get_led_trigger_name - Get the LED trigger name for the button's LED. - * This function might return a NULL pointer if registering of the - * LED trigger failed. Use this as "default_trigger" for the LED. - */ -const char *rfkill_get_led_trigger_name(struct rfkill *rfkill); - -/** - * rfkill_set_led_trigger_name -- set the LED trigger name - * @rfkill: rfkill struct - * @name: LED trigger name - * - * This function sets the LED trigger name of the radio LED - * trigger that rfkill creates. It is optional, but if called - * must be called before rfkill_register() to be effective. - */ -void rfkill_set_led_trigger_name(struct rfkill *rfkill, const char *name); -#else -static inline const char *rfkill_get_led_trigger_name(struct rfkill *rfkill) -{ - return NULL; -} - -static inline void -rfkill_set_led_trigger_name(struct rfkill *rfkill, const char *name) -{ -} -#endif - #endif /* __KERNEL__ */ #endif /* RFKILL_H */ diff --git a/net/rfkill/core.c b/net/rfkill/core.c index 04f599089e6d..0198191b756d 100644 --- a/net/rfkill/core.c +++ b/net/rfkill/core.c @@ -149,20 +149,6 @@ static void rfkill_led_trigger_activate(struct led_classdev *led) rfkill_led_trigger_event(rfkill); } -const char *rfkill_get_led_trigger_name(struct rfkill *rfkill) -{ - return rfkill->led_trigger.name; -} -EXPORT_SYMBOL(rfkill_get_led_trigger_name); - -void rfkill_set_led_trigger_name(struct rfkill *rfkill, const char *name) -{ - BUG_ON(!rfkill); - - rfkill->ledtrigname = name; -} -EXPORT_SYMBOL(rfkill_set_led_trigger_name); - static int rfkill_led_trigger_register(struct rfkill *rfkill) { rfkill->led_trigger.name = rfkill->ledtrigname -- cgit v1.2.3-71-gd317 From ca4ffe8f2848169a8ded0ea8a60b2d81925564c9 Mon Sep 17 00:00:00 2001 From: "Luis R. Rodriguez" Date: Wed, 20 Oct 2010 10:18:55 -0700 Subject: cfg80211: fix disabling channels based on hints After a module loads you will have loaded the world roaming regulatory domain or a custom regulatory domain. Further regulatory hints are welcomed and should be respected unless the regulatory hint is coming from a country IE as the IEEE spec allows for a country IE to be a subset of what is allowed by the local regulatory agencies. So disable all channels that do not fit a regulatory domain sent from a unless the hint is from a country IE and the country IE had no information about the band we are currently processing. This fixes a few regulatory issues, for example for drivers that depend on CRDA and had no 5 GHz freqencies allowed were not properly disabling 5 GHz at all, furthermore it also allows users to restrict devices further as was intended. If you recieve a country IE upon association we will also disable the channels that are not allowed if the country IE had at least one channel on the respective band we are procesing. This was the original intention behind this design but it was completely overlooked... Cc: David Quan Cc: Jouni Malinen cc: Easwar Krishnan Cc: stable@kernel.org Signed-off-by: Luis R. Rodriguez Signed-off-by: John W. Linville --- include/linux/nl80211.h | 6 +++++- net/wireless/reg.c | 20 +++++++++++++++++++- 2 files changed, 24 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h index 0edb2566c14c..fb877b5621b7 100644 --- a/include/linux/nl80211.h +++ b/include/linux/nl80211.h @@ -1307,7 +1307,11 @@ enum nl80211_bitrate_attr { * wireless core it thinks its knows the regulatory domain we should be in. * @NL80211_REGDOM_SET_BY_COUNTRY_IE: the wireless core has received an * 802.11 country information element with regulatory information it - * thinks we should consider. + * thinks we should consider. cfg80211 only processes the country + * code from the IE, and relies on the regulatory domain information + * structure pased by userspace (CRDA) from our wireless-regdb. + * If a channel is enabled but the country code indicates it should + * be disabled we disable the channel and re-enable it upon disassociation. */ enum nl80211_reg_initiator { NL80211_REGDOM_SET_BY_CORE, diff --git a/net/wireless/reg.c b/net/wireless/reg.c index 1bc8131a5185..8ab65f2afe70 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -750,8 +750,26 @@ static void handle_channel(struct wiphy *wiphy, desired_bw_khz, ®_rule); - if (r) + if (r) { + /* + * We will disable all channels that do not match our + * recieved regulatory rule unless the hint is coming + * from a Country IE and the Country IE had no information + * about a band. The IEEE 802.11 spec allows for an AP + * to send only a subset of the regulatory rules allowed, + * so an AP in the US that only supports 2.4 GHz may only send + * a country IE with information for the 2.4 GHz band + * while 5 GHz is still supported. + */ + if (initiator == NL80211_REGDOM_SET_BY_COUNTRY_IE && + r == -ERANGE) + return; + + REG_DBG_PRINT("cfg80211: Disabling freq %d MHz\n", + chan->center_freq); + chan->flags = IEEE80211_CHAN_DISABLED; return; + } power_rule = ®_rule->power_rule; freq_range = ®_rule->freq_range; -- cgit v1.2.3-71-gd317 From c8aea565e8f715d9f10064b1cbfbc15bf75df501 Mon Sep 17 00:00:00 2001 From: Gery Kahn Date: Tue, 5 Oct 2010 16:09:05 +0200 Subject: wl1271: ref_clock cosmetic changes Cosmetic cleanup for ref_clock code while configured by board. Signed-off-by: Gery Kahn Signed-off-by: Luciano Coelho --- drivers/net/wireless/wl12xx/wl1271_boot.c | 10 ++++------ include/linux/wl12xx.h | 8 ++++++++ 2 files changed, 12 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/wireless/wl12xx/wl1271_boot.c b/drivers/net/wireless/wl12xx/wl1271_boot.c index b91021242098..5b190728ca55 100644 --- a/drivers/net/wireless/wl12xx/wl1271_boot.c +++ b/drivers/net/wireless/wl12xx/wl1271_boot.c @@ -471,20 +471,19 @@ int wl1271_boot(struct wl1271 *wl) { int ret = 0; u32 tmp, clk, pause; - int ref_clock = wl->ref_clock; wl1271_boot_hw_version(wl); - if (ref_clock == 0 || ref_clock == 2 || ref_clock == 4) + if (wl->ref_clock == 0 || wl->ref_clock == 2 || wl->ref_clock == 4) /* ref clk: 19.2/38.4/38.4-XTAL */ clk = 0x3; - else if (ref_clock == 1 || ref_clock == 3) + else if (wl->ref_clock == 1 || wl->ref_clock == 3) /* ref clk: 26/52 */ clk = 0x5; else return -EINVAL; - if (ref_clock != 0) { + if (wl->ref_clock != 0) { u16 val; /* Set clock type (open drain) */ val = wl1271_top_reg_read(wl, OCP_REG_CLK_TYPE); @@ -529,8 +528,7 @@ int wl1271_boot(struct wl1271 *wl) wl1271_debug(DEBUG_BOOT, "clk2 0x%x", clk); - /* 2 */ - clk |= (ref_clock << 1) << 4; + clk |= (wl->ref_clock << 1) << 4; wl1271_write32(wl, DRPW_SCRATCH_START, clk); wl1271_set_partition(wl, &part_table[PART_WORK]); diff --git a/include/linux/wl12xx.h b/include/linux/wl12xx.h index 4f902e1908aa..bebb8efea0a6 100644 --- a/include/linux/wl12xx.h +++ b/include/linux/wl12xx.h @@ -24,6 +24,14 @@ #ifndef _LINUX_WL12XX_H #define _LINUX_WL12XX_H +/* The board reference clock values */ +enum { + WL12XX_REFCLOCK_19 = 0, /* 19.2 MHz */ + WL12XX_REFCLOCK_26 = 1, /* 26 MHz */ + WL12XX_REFCLOCK_38 = 2, /* 38.4 MHz */ + WL12XX_REFCLOCK_54 = 3, /* 54 MHz */ +}; + struct wl12xx_platform_data { void (*set_power)(bool enable); /* SDIO only: IRQ number if WLAN_IRQ line is used, 0 for SDIO IRQs */ -- cgit v1.2.3-71-gd317 From 7919a57bc608140aa8614c19eac40c6916fb61d2 Mon Sep 17 00:00:00 2001 From: Andreas Herrmann Date: Mon, 30 Aug 2010 19:04:01 +0000 Subject: bitops: Provide generic sign_extend32 function This patch moves code out from wireless drivers where two different functions are defined in three code locations for the same purpose and provides a common function to sign extend a 32-bit value. Signed-off-by: Andreas Herrmann Signed-off-by: John W. Linville --- drivers/net/wireless/ath/ath5k/phy.c | 8 +------- drivers/net/wireless/ath/ath9k/ar5008_phy.c | 12 ++++++------ drivers/net/wireless/ath/ath9k/ar9002_phy.c | 8 ++++---- drivers/net/wireless/ath/ath9k/ar9003_phy.c | 12 ++++++------ drivers/net/wireless/ath/ath9k/hw.h | 6 ------ drivers/net/wireless/iwlwifi/iwl-4965.c | 20 ++------------------ include/linux/bitops.h | 11 +++++++++++ 7 files changed, 30 insertions(+), 47 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/wireless/ath/ath5k/phy.c b/drivers/net/wireless/ath/ath5k/phy.c index 219367884e64..6b43f535ff53 100644 --- a/drivers/net/wireless/ath/ath5k/phy.c +++ b/drivers/net/wireless/ath/ath5k/phy.c @@ -1102,18 +1102,12 @@ int ath5k_hw_channel(struct ath5k_hw *ah, struct ieee80211_channel *channel) PHY calibration \*****************/ -static int sign_extend(int val, const int nbits) -{ - int order = BIT(nbits-1); - return (val ^ order) - order; -} - static s32 ath5k_hw_read_measured_noise_floor(struct ath5k_hw *ah) { s32 val; val = ath5k_hw_reg_read(ah, AR5K_PHY_NF); - return sign_extend(AR5K_REG_MS(val, AR5K_PHY_NF_MINCCA_PWR), 9); + return sign_extend32(AR5K_REG_MS(val, AR5K_PHY_NF_MINCCA_PWR), 8); } void ath5k_hw_init_nfcal_hist(struct ath5k_hw *ah) diff --git a/drivers/net/wireless/ath/ath9k/ar5008_phy.c b/drivers/net/wireless/ath/ath9k/ar5008_phy.c index 777a602176f2..c83a22cfbe1e 100644 --- a/drivers/net/wireless/ath/ath9k/ar5008_phy.c +++ b/drivers/net/wireless/ath/ath9k/ar5008_phy.c @@ -1490,25 +1490,25 @@ static void ar5008_hw_do_getnf(struct ath_hw *ah, int16_t nf; nf = MS(REG_READ(ah, AR_PHY_CCA), AR_PHY_MINCCA_PWR); - nfarray[0] = sign_extend(nf, 9); + nfarray[0] = sign_extend32(nf, 8); nf = MS(REG_READ(ah, AR_PHY_CH1_CCA), AR_PHY_CH1_MINCCA_PWR); - nfarray[1] = sign_extend(nf, 9); + nfarray[1] = sign_extend32(nf, 8); nf = MS(REG_READ(ah, AR_PHY_CH2_CCA), AR_PHY_CH2_MINCCA_PWR); - nfarray[2] = sign_extend(nf, 9); + nfarray[2] = sign_extend32(nf, 8); if (!IS_CHAN_HT40(ah->curchan)) return; nf = MS(REG_READ(ah, AR_PHY_EXT_CCA), AR_PHY_EXT_MINCCA_PWR); - nfarray[3] = sign_extend(nf, 9); + nfarray[3] = sign_extend32(nf, 8); nf = MS(REG_READ(ah, AR_PHY_CH1_EXT_CCA), AR_PHY_CH1_EXT_MINCCA_PWR); - nfarray[4] = sign_extend(nf, 9); + nfarray[4] = sign_extend32(nf, 8); nf = MS(REG_READ(ah, AR_PHY_CH2_EXT_CCA), AR_PHY_CH2_EXT_MINCCA_PWR); - nfarray[5] = sign_extend(nf, 9); + nfarray[5] = sign_extend32(nf, 8); } /* diff --git a/drivers/net/wireless/ath/ath9k/ar9002_phy.c b/drivers/net/wireless/ath/ath9k/ar9002_phy.c index c00cdc67b55b..3fb97fdc1240 100644 --- a/drivers/net/wireless/ath/ath9k/ar9002_phy.c +++ b/drivers/net/wireless/ath/ath9k/ar9002_phy.c @@ -473,21 +473,21 @@ static void ar9002_hw_do_getnf(struct ath_hw *ah, int16_t nf; nf = MS(REG_READ(ah, AR_PHY_CCA), AR9280_PHY_MINCCA_PWR); - nfarray[0] = sign_extend(nf, 9); + nfarray[0] = sign_extend32(nf, 8); nf = MS(REG_READ(ah, AR_PHY_EXT_CCA), AR9280_PHY_EXT_MINCCA_PWR); if (IS_CHAN_HT40(ah->curchan)) - nfarray[3] = sign_extend(nf, 9); + nfarray[3] = sign_extend32(nf, 8); if (AR_SREV_9285(ah) || AR_SREV_9271(ah)) return; nf = MS(REG_READ(ah, AR_PHY_CH1_CCA), AR9280_PHY_CH1_MINCCA_PWR); - nfarray[1] = sign_extend(nf, 9); + nfarray[1] = sign_extend32(nf, 8); nf = MS(REG_READ(ah, AR_PHY_CH1_EXT_CCA), AR9280_PHY_CH1_EXT_MINCCA_PWR); if (IS_CHAN_HT40(ah->curchan)) - nfarray[4] = sign_extend(nf, 9); + nfarray[4] = sign_extend32(nf, 8); } static void ar9002_hw_set_nf_limits(struct ath_hw *ah) diff --git a/drivers/net/wireless/ath/ath9k/ar9003_phy.c b/drivers/net/wireless/ath/ath9k/ar9003_phy.c index 06a9c4cd2f44..44c5454b2ad8 100644 --- a/drivers/net/wireless/ath/ath9k/ar9003_phy.c +++ b/drivers/net/wireless/ath/ath9k/ar9003_phy.c @@ -1023,25 +1023,25 @@ static void ar9003_hw_do_getnf(struct ath_hw *ah, int16_t nf; nf = MS(REG_READ(ah, AR_PHY_CCA_0), AR_PHY_MINCCA_PWR); - nfarray[0] = sign_extend(nf, 9); + nfarray[0] = sign_extend32(nf, 8); nf = MS(REG_READ(ah, AR_PHY_CCA_1), AR_PHY_CH1_MINCCA_PWR); - nfarray[1] = sign_extend(nf, 9); + nfarray[1] = sign_extend32(nf, 8); nf = MS(REG_READ(ah, AR_PHY_CCA_2), AR_PHY_CH2_MINCCA_PWR); - nfarray[2] = sign_extend(nf, 9); + nfarray[2] = sign_extend32(nf, 8); if (!IS_CHAN_HT40(ah->curchan)) return; nf = MS(REG_READ(ah, AR_PHY_EXT_CCA), AR_PHY_EXT_MINCCA_PWR); - nfarray[3] = sign_extend(nf, 9); + nfarray[3] = sign_extend32(nf, 8); nf = MS(REG_READ(ah, AR_PHY_EXT_CCA_1), AR_PHY_CH1_EXT_MINCCA_PWR); - nfarray[4] = sign_extend(nf, 9); + nfarray[4] = sign_extend32(nf, 8); nf = MS(REG_READ(ah, AR_PHY_EXT_CCA_2), AR_PHY_CH2_EXT_MINCCA_PWR); - nfarray[5] = sign_extend(nf, 9); + nfarray[5] = sign_extend32(nf, 8); } static void ar9003_hw_set_nf_limits(struct ath_hw *ah) diff --git a/drivers/net/wireless/ath/ath9k/hw.h b/drivers/net/wireless/ath/ath9k/hw.h index e5b72262fd96..f821a28bcda3 100644 --- a/drivers/net/wireless/ath/ath9k/hw.h +++ b/drivers/net/wireless/ath/ath9k/hw.h @@ -825,12 +825,6 @@ static inline struct ath_hw_ops *ath9k_hw_ops(struct ath_hw *ah) return &ah->ops; } -static inline int sign_extend(int val, const int nbits) -{ - int order = BIT(nbits-1); - return (val ^ order) - order; -} - /* Initialization, Detach, Reset */ const char *ath9k_hw_probe(u16 vendorid, u16 devid); void ath9k_hw_deinit(struct ath_hw *ah); diff --git a/drivers/net/wireless/iwlwifi/iwl-4965.c b/drivers/net/wireless/iwlwifi/iwl-4965.c index cd14843878ae..4748d067eb1d 100644 --- a/drivers/net/wireless/iwlwifi/iwl-4965.c +++ b/drivers/net/wireless/iwlwifi/iwl-4965.c @@ -1686,22 +1686,6 @@ static void iwl4965_txq_update_byte_cnt_tbl(struct iwl_priv *priv, tfd_offset[TFD_QUEUE_SIZE_MAX + write_ptr] = bc_ent; } -/** - * sign_extend - Sign extend a value using specified bit as sign-bit - * - * Example: sign_extend(9, 3) would return -7 as bit3 of 1001b is 1 - * and bit0..2 is 001b which when sign extended to 1111111111111001b is -7. - * - * @param oper value to sign extend - * @param index 0 based bit index (0<=index<32) to sign bit - */ -static s32 sign_extend(u32 oper, int index) -{ - u8 shift = 31 - index; - - return (s32)(oper << shift) >> shift; -} - /** * iwl4965_hw_get_temperature - return the calibrated temperature (in Kelvin) * @statistics: Provides the temperature reading from the uCode @@ -1739,9 +1723,9 @@ static int iwl4965_hw_get_temperature(struct iwl_priv *priv) * "initialize" ALIVE response. */ if (!test_bit(STATUS_TEMPERATURE, &priv->status)) - vt = sign_extend(R4, 23); + vt = sign_extend32(R4, 23); else - vt = sign_extend(le32_to_cpu(priv->_agn.statistics. + vt = sign_extend32(le32_to_cpu(priv->_agn.statistics. general.common.temperature), 23); IWL_DEBUG_TEMP(priv, "Calib values R[1-3]: %d %d %d R4: %d\n", R1, R2, R3, vt); diff --git a/include/linux/bitops.h b/include/linux/bitops.h index 827cc95711ef..2184c6b97aeb 100644 --- a/include/linux/bitops.h +++ b/include/linux/bitops.h @@ -109,6 +109,17 @@ static inline __u8 ror8(__u8 word, unsigned int shift) return (word >> shift) | (word << (8 - shift)); } +/** + * sign_extend32 - sign extend a 32-bit value using specified bit as sign-bit + * @value: value to sign extend + * @index: 0 based bit index (0<=index<32) to sign bit + */ +static inline __s32 sign_extend32(__u32 value, int index) +{ + __u8 shift = 31 - index; + return (__s32)(value << shift) >> shift; +} + static inline unsigned fls_long(unsigned long l) { if (sizeof(l) == 4) -- cgit v1.2.3-71-gd317 From fe8222406c8277a21172479d3a8283d31c209028 Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Tue, 9 Nov 2010 10:47:38 +0000 Subject: net: Simplify RX queue allocation This patch move RX queue allocation to alloc_netdev_mq and freeing of the queues to free_netdev (symmetric to TX queue allocation). Each kobject RX queue takes a reference to the queue's device so that the device can't be freed before all the kobjects have been released-- this obviates the need for reference counts specific to RX queues. Signed-off-by: Tom Herbert Signed-off-by: David S. Miller --- include/linux/netdevice.h | 3 +-- net/core/dev.c | 19 ++++++++++--------- net/core/net-sysfs.c | 7 ++----- 3 files changed, 13 insertions(+), 16 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 6e4cfbc53d4c..fccb11f879e5 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -592,8 +592,7 @@ struct netdev_rx_queue { struct rps_map __rcu *rps_map; struct rps_dev_flow_table __rcu *rps_flow_table; struct kobject kobj; - struct netdev_rx_queue *first; - atomic_t count; + struct net_device *dev; } ____cacheline_aligned_in_smp; #endif /* CONFIG_RPS */ diff --git a/net/core/dev.c b/net/core/dev.c index 75490670e0a9..8725d168d1f5 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -5051,12 +5051,8 @@ static int netif_alloc_rx_queues(struct net_device *dev) } dev->_rx = rx; - /* - * Set a pointer to first element in the array which holds the - * reference count. - */ for (i = 0; i < count; i++) - rx[i].first = rx; + rx[i].dev = dev; #endif return 0; } @@ -5132,10 +5128,6 @@ int register_netdevice(struct net_device *dev) dev->iflink = -1; - ret = netif_alloc_rx_queues(dev); - if (ret) - goto out; - netdev_init_queues(dev); /* Init, if this function is available */ @@ -5601,6 +5593,8 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name, #ifdef CONFIG_RPS dev->num_rx_queues = queue_count; dev->real_num_rx_queues = queue_count; + if (netif_alloc_rx_queues(dev)) + goto free_pcpu; #endif dev->gso_max_size = GSO_MAX_SIZE; @@ -5618,6 +5612,10 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name, free_pcpu: free_percpu(dev->pcpu_refcnt); kfree(dev->_tx); +#ifdef CONFIG_RPS + kfree(dev->_rx); +#endif + free_p: kfree(p); return NULL; @@ -5639,6 +5637,9 @@ void free_netdev(struct net_device *dev) release_net(dev_net(dev)); kfree(dev->_tx); +#ifdef CONFIG_RPS + kfree(dev->_rx); +#endif kfree(rcu_dereference_raw(dev->ingress_queue)); diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index a5ff5a89f376..3ba526b56fe3 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -706,7 +706,6 @@ static struct attribute *rx_queue_default_attrs[] = { static void rx_queue_release(struct kobject *kobj) { struct netdev_rx_queue *queue = to_rx_queue(kobj); - struct netdev_rx_queue *first = queue->first; struct rps_map *map; struct rps_dev_flow_table *flow_table; @@ -719,8 +718,7 @@ static void rx_queue_release(struct kobject *kobj) if (flow_table) call_rcu(&flow_table->rcu, rps_dev_flow_table_release); - if (atomic_dec_and_test(&first->count)) - kfree(first); + dev_put(queue->dev); } static struct kobj_type rx_queue_ktype = { @@ -732,7 +730,6 @@ static struct kobj_type rx_queue_ktype = { static int rx_queue_add_kobject(struct net_device *net, int index) { struct netdev_rx_queue *queue = net->_rx + index; - struct netdev_rx_queue *first = queue->first; struct kobject *kobj = &queue->kobj; int error = 0; @@ -745,7 +742,7 @@ static int rx_queue_add_kobject(struct net_device *net, int index) } kobject_uevent(kobj, KOBJ_ADD); - atomic_inc(&first->count); + dev_hold(queue->dev); return error; } -- cgit v1.2.3-71-gd317 From c59504ebc5baa628706d10c2d3c7e1f4bc3c2147 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Sun, 14 Nov 2010 17:04:57 +0000 Subject: include/linux/if_macvlan.h: Remove unnecessary semicolons Signed-off-by: Joe Perches Signed-off-by: David S. Miller --- include/linux/if_macvlan.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/if_macvlan.h b/include/linux/if_macvlan.h index 8a2fd66a8b5f..ac96a2d76291 100644 --- a/include/linux/if_macvlan.h +++ b/include/linux/if_macvlan.h @@ -69,7 +69,7 @@ static inline void macvlan_count_rx(const struct macvlan_dev *vlan, rx_stats = this_cpu_ptr(vlan->rx_stats); if (likely(success)) { u64_stats_update_begin(&rx_stats->syncp); - rx_stats->rx_packets++;; + rx_stats->rx_packets++; rx_stats->rx_bytes += len; if (multicast) rx_stats->rx_multicast++; -- cgit v1.2.3-71-gd317 From a386f99025f13b32502fe5dedf223c20d7283826 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 15 Nov 2010 06:38:11 +0000 Subject: bridge: add proper RCU annotation to should_route_hook Add br_should_route_hook_t typedef, this is the only way we can get a clean RCU implementation for function pointer. Move route_hook to location where it is used. Signed-off-by: Eric Dumazet Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- include/linux/if_bridge.h | 4 +++- net/bridge/br.c | 4 ---- net/bridge/br_input.c | 10 +++++++--- net/bridge/netfilter/ebtable_broute.c | 3 ++- 4 files changed, 12 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/if_bridge.h b/include/linux/if_bridge.h index 0d241a5c4909..f7e73c338c40 100644 --- a/include/linux/if_bridge.h +++ b/include/linux/if_bridge.h @@ -102,7 +102,9 @@ struct __fdb_entry { #include extern void brioctl_set(int (*ioctl_hook)(struct net *, unsigned int, void __user *)); -extern int (*br_should_route_hook)(struct sk_buff *skb); + +typedef int (*br_should_route_hook_t)(struct sk_buff *skb); +extern br_should_route_hook_t __rcu *br_should_route_hook; #endif diff --git a/net/bridge/br.c b/net/bridge/br.c index c8436fa31344..84bbb82599b2 100644 --- a/net/bridge/br.c +++ b/net/bridge/br.c @@ -22,8 +22,6 @@ #include "br_private.h" -int (*br_should_route_hook)(struct sk_buff *skb); - static const struct stp_proto br_stp_proto = { .rcv = br_stp_rcv, }; @@ -102,8 +100,6 @@ static void __exit br_deinit(void) br_fdb_fini(); } -EXPORT_SYMBOL(br_should_route_hook); - module_init(br_init) module_exit(br_deinit) MODULE_LICENSE("GPL"); diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c index 25207a1f182b..6f6d8e1b776f 100644 --- a/net/bridge/br_input.c +++ b/net/bridge/br_input.c @@ -21,6 +21,10 @@ /* Bridge group multicast address 802.1d (pg 51). */ const u8 br_group_address[ETH_ALEN] = { 0x01, 0x80, 0xc2, 0x00, 0x00, 0x00 }; +/* Hook for brouter */ +br_should_route_hook_t __rcu *br_should_route_hook __read_mostly; +EXPORT_SYMBOL(br_should_route_hook); + static int br_pass_frame_up(struct sk_buff *skb) { struct net_device *indev, *brdev = BR_INPUT_SKB_CB(skb)->brdev; @@ -139,7 +143,7 @@ struct sk_buff *br_handle_frame(struct sk_buff *skb) { struct net_bridge_port *p; const unsigned char *dest = eth_hdr(skb)->h_dest; - int (*rhook)(struct sk_buff *skb); + br_should_route_hook_t *rhook; if (unlikely(skb->pkt_type == PACKET_LOOPBACK)) return skb; @@ -173,8 +177,8 @@ forward: switch (p->state) { case BR_STATE_FORWARDING: rhook = rcu_dereference(br_should_route_hook); - if (rhook != NULL) { - if (rhook(skb)) + if (rhook) { + if ((*rhook)(skb)) return skb; dest = eth_hdr(skb)->h_dest; } diff --git a/net/bridge/netfilter/ebtable_broute.c b/net/bridge/netfilter/ebtable_broute.c index ae3f106c3908..1bcaf36ad612 100644 --- a/net/bridge/netfilter/ebtable_broute.c +++ b/net/bridge/netfilter/ebtable_broute.c @@ -87,7 +87,8 @@ static int __init ebtable_broute_init(void) if (ret < 0) return ret; /* see br_input.c */ - rcu_assign_pointer(br_should_route_hook, ebt_broute); + rcu_assign_pointer(br_should_route_hook, + (br_should_route_hook_t *)ebt_broute); return 0; } -- cgit v1.2.3-71-gd317 From 61391cde9eefac5cfcf6d214aa80c77e58b1626b Mon Sep 17 00:00:00 2001 From: stephen hemminger Date: Mon, 15 Nov 2010 06:38:12 +0000 Subject: netdev: add rcu annotations to receive handler hook Suggested by Eric's bridge RCU changes. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- include/linux/netdevice.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index fccb11f879e5..b45c1b8b1d19 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -994,8 +994,8 @@ struct net_device { unsigned int real_num_rx_queues; #endif - rx_handler_func_t *rx_handler; - void *rx_handler_data; + rx_handler_func_t __rcu *rx_handler; + void __rcu *rx_handler_data; struct netdev_queue __rcu *ingress_queue; -- cgit v1.2.3-71-gd317 From 8ffab51b3dfc54876f145f15b351c41f3f703195 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 10 Nov 2010 21:14:04 +0000 Subject: macvlan: lockless tx path macvlan is a stacked device, like tunnels. We should use the lockless mechanism we are using in tunnels and loopback. This patch completely removes locking in TX path. tx stat counters are added into existing percpu stat structure, renamed from rx_stats to pcpu_stats. Note : this reverts commit 2c11455321f37 (macvlan: add multiqueue capability) Note : rx_errors converted to a 32bit counter, like tx_dropped, since they dont need 64bit range. Signed-off-by: Eric Dumazet Cc: Patrick McHardy Cc: Ben Greear Cc: Ben Hutchings Acked-by: Patrick McHardy Signed-off-by: David S. Miller --- drivers/net/macvlan.c | 80 ++++++++++++++++++++-------------------------- include/linux/if_macvlan.h | 34 ++++++++++++-------- 2 files changed, 55 insertions(+), 59 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c index 0fc9dc7f20db..93f0ba25c808 100644 --- a/drivers/net/macvlan.c +++ b/drivers/net/macvlan.c @@ -243,18 +243,22 @@ xmit_world: netdev_tx_t macvlan_start_xmit(struct sk_buff *skb, struct net_device *dev) { - int i = skb_get_queue_mapping(skb); - struct netdev_queue *txq = netdev_get_tx_queue(dev, i); unsigned int len = skb->len; int ret; + const struct macvlan_dev *vlan = netdev_priv(dev); ret = macvlan_queue_xmit(skb, dev); if (likely(ret == NET_XMIT_SUCCESS || ret == NET_XMIT_CN)) { - txq->tx_packets++; - txq->tx_bytes += len; - } else - txq->tx_dropped++; + struct macvlan_pcpu_stats *pcpu_stats; + pcpu_stats = this_cpu_ptr(vlan->pcpu_stats); + u64_stats_update_begin(&pcpu_stats->syncp); + pcpu_stats->tx_packets++; + pcpu_stats->tx_bytes += len; + u64_stats_update_end(&pcpu_stats->syncp); + } else { + this_cpu_inc(vlan->pcpu_stats->tx_dropped); + } return ret; } EXPORT_SYMBOL_GPL(macvlan_start_xmit); @@ -414,14 +418,15 @@ static int macvlan_init(struct net_device *dev) dev->state = (dev->state & ~MACVLAN_STATE_MASK) | (lowerdev->state & MACVLAN_STATE_MASK); dev->features = lowerdev->features & MACVLAN_FEATURES; + dev->features |= NETIF_F_LLTX; dev->gso_max_size = lowerdev->gso_max_size; dev->iflink = lowerdev->ifindex; dev->hard_header_len = lowerdev->hard_header_len; macvlan_set_lockdep_class(dev); - vlan->rx_stats = alloc_percpu(struct macvlan_rx_stats); - if (!vlan->rx_stats) + vlan->pcpu_stats = alloc_percpu(struct macvlan_pcpu_stats); + if (!vlan->pcpu_stats) return -ENOMEM; return 0; @@ -431,7 +436,7 @@ static void macvlan_uninit(struct net_device *dev) { struct macvlan_dev *vlan = netdev_priv(dev); - free_percpu(vlan->rx_stats); + free_percpu(vlan->pcpu_stats); } static struct rtnl_link_stats64 *macvlan_dev_get_stats64(struct net_device *dev, @@ -439,33 +444,38 @@ static struct rtnl_link_stats64 *macvlan_dev_get_stats64(struct net_device *dev, { struct macvlan_dev *vlan = netdev_priv(dev); - dev_txq_stats_fold(dev, stats); - - if (vlan->rx_stats) { - struct macvlan_rx_stats *p, accum = {0}; - u64 rx_packets, rx_bytes, rx_multicast; + if (vlan->pcpu_stats) { + struct macvlan_pcpu_stats *p; + u64 rx_packets, rx_bytes, rx_multicast, tx_packets, tx_bytes; + u32 rx_errors = 0, tx_dropped = 0; unsigned int start; int i; for_each_possible_cpu(i) { - p = per_cpu_ptr(vlan->rx_stats, i); + p = per_cpu_ptr(vlan->pcpu_stats, i); do { start = u64_stats_fetch_begin_bh(&p->syncp); rx_packets = p->rx_packets; rx_bytes = p->rx_bytes; rx_multicast = p->rx_multicast; + tx_packets = p->tx_packets; + tx_bytes = p->tx_bytes; } while (u64_stats_fetch_retry_bh(&p->syncp, start)); - accum.rx_packets += rx_packets; - accum.rx_bytes += rx_bytes; - accum.rx_multicast += rx_multicast; - /* rx_errors is an ulong, updated without syncp protection */ - accum.rx_errors += p->rx_errors; + + stats->rx_packets += rx_packets; + stats->rx_bytes += rx_bytes; + stats->multicast += rx_multicast; + stats->tx_packets += tx_packets; + stats->tx_bytes += tx_bytes; + /* rx_errors & tx_dropped are u32, updated + * without syncp protection. + */ + rx_errors += p->rx_errors; + tx_dropped += p->tx_dropped; } - stats->rx_packets = accum.rx_packets; - stats->rx_bytes = accum.rx_bytes; - stats->rx_errors = accum.rx_errors; - stats->rx_dropped = accum.rx_errors; - stats->multicast = accum.rx_multicast; + stats->rx_errors = rx_errors; + stats->rx_dropped = rx_errors; + stats->tx_dropped = tx_dropped; } return stats; } @@ -601,25 +611,6 @@ static int macvlan_validate(struct nlattr *tb[], struct nlattr *data[]) return 0; } -static int macvlan_get_tx_queues(struct net *net, - struct nlattr *tb[], - unsigned int *num_tx_queues, - unsigned int *real_num_tx_queues) -{ - struct net_device *real_dev; - - if (!tb[IFLA_LINK]) - return -EINVAL; - - real_dev = __dev_get_by_index(net, nla_get_u32(tb[IFLA_LINK])); - if (!real_dev) - return -ENODEV; - - *num_tx_queues = real_dev->num_tx_queues; - *real_num_tx_queues = real_dev->real_num_tx_queues; - return 0; -} - int macvlan_common_newlink(struct net *src_net, struct net_device *dev, struct nlattr *tb[], struct nlattr *data[], int (*receive)(struct sk_buff *skb), @@ -743,7 +734,6 @@ int macvlan_link_register(struct rtnl_link_ops *ops) { /* common fields */ ops->priv_size = sizeof(struct macvlan_dev); - ops->get_tx_queues = macvlan_get_tx_queues; ops->validate = macvlan_validate; ops->maxtype = IFLA_MACVLAN_MAX; ops->policy = macvlan_policy; diff --git a/include/linux/if_macvlan.h b/include/linux/if_macvlan.h index ac96a2d76291..e28b2e4959d4 100644 --- a/include/linux/if_macvlan.h +++ b/include/linux/if_macvlan.h @@ -25,19 +25,25 @@ struct macvlan_port; struct macvtap_queue; /** - * struct macvlan_rx_stats - MACVLAN percpu rx stats + * struct macvlan_pcpu_stats - MACVLAN percpu stats * @rx_packets: number of received packets * @rx_bytes: number of received bytes * @rx_multicast: number of received multicast packets + * @tx_packets: number of transmitted packets + * @tx_bytes: number of transmitted bytes * @syncp: synchronization point for 64bit counters - * @rx_errors: number of errors + * @rx_errors: number of rx errors + * @tx_dropped: number of tx dropped packets */ -struct macvlan_rx_stats { +struct macvlan_pcpu_stats { u64 rx_packets; u64 rx_bytes; u64 rx_multicast; + u64 tx_packets; + u64 tx_bytes; struct u64_stats_sync syncp; - unsigned long rx_errors; + u32 rx_errors; + u32 tx_dropped; }; /* @@ -52,7 +58,7 @@ struct macvlan_dev { struct hlist_node hlist; struct macvlan_port *port; struct net_device *lowerdev; - struct macvlan_rx_stats __percpu *rx_stats; + struct macvlan_pcpu_stats __percpu *pcpu_stats; enum macvlan_mode mode; int (*receive)(struct sk_buff *skb); int (*forward)(struct net_device *dev, struct sk_buff *skb); @@ -64,18 +70,18 @@ static inline void macvlan_count_rx(const struct macvlan_dev *vlan, unsigned int len, bool success, bool multicast) { - struct macvlan_rx_stats *rx_stats; - - rx_stats = this_cpu_ptr(vlan->rx_stats); if (likely(success)) { - u64_stats_update_begin(&rx_stats->syncp); - rx_stats->rx_packets++; - rx_stats->rx_bytes += len; + struct macvlan_pcpu_stats *pcpu_stats; + + pcpu_stats = this_cpu_ptr(vlan->pcpu_stats); + u64_stats_update_begin(&pcpu_stats->syncp); + pcpu_stats->rx_packets++; + pcpu_stats->rx_bytes += len; if (multicast) - rx_stats->rx_multicast++; - u64_stats_update_end(&rx_stats->syncp); + pcpu_stats->rx_multicast++; + u64_stats_update_end(&pcpu_stats->syncp); } else { - rx_stats->rx_errors++; + this_cpu_inc(vlan->pcpu_stats->rx_errors); } } -- cgit v1.2.3-71-gd317 From a75d946f42ae1771424a9582129fc5182ff48a1b Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Thu, 4 Nov 2010 16:20:20 +0100 Subject: console: move for_each_console to linux/console.h Move it out of printk.c so that we can use it all over the code. There are some potential users which will be converted to that macro in next patches. Signed-off-by: Jiri Slaby Signed-off-by: Greg Kroah-Hartman --- include/linux/console.h | 6 ++++++ kernel/printk.c | 6 ------ 2 files changed, 6 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/console.h b/include/linux/console.h index 95cf6f08a59d..875cfb1c8132 100644 --- a/include/linux/console.h +++ b/include/linux/console.h @@ -126,6 +126,12 @@ struct console { struct console *next; }; +/* + * for_each_console() allows you to iterate on each console + */ +#define for_each_console(con) \ + for (con = console_drivers; con != NULL; con = con->next) + extern int console_set_on_cmdline; extern int add_preferred_console(char *name, int idx, char *options); diff --git a/kernel/printk.c b/kernel/printk.c index b2ebaee8c377..bf0420a92a1a 100644 --- a/kernel/printk.c +++ b/kernel/printk.c @@ -42,12 +42,6 @@ #include -/* - * for_each_console() allows you to iterate on each console - */ -#define for_each_console(con) \ - for (con = console_drivers; con != NULL; con = con->next) - /* * Architectures can override it: */ -- cgit v1.2.3-71-gd317 From 3dfbd044d0d99cad2fe50e4f6c79845703fa0558 Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Thu, 4 Nov 2010 16:20:23 +0100 Subject: TTY: include termios.h in tty_driver.h We reference termios and termiox in tty_driver.h, but we do not include linux/termios.h where these are defined. Add the #include properly. Otherwise when we include tty_driver.h, we get compile errors. Signed-off-by: Jiri Slaby Cc: Alan Cox Cc: Greg KH Signed-off-by: Greg Kroah-Hartman --- include/linux/tty_driver.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/tty_driver.h b/include/linux/tty_driver.h index db2d227694da..09678ed370f8 100644 --- a/include/linux/tty_driver.h +++ b/include/linux/tty_driver.h @@ -235,6 +235,7 @@ #include #include #include +#include struct tty_struct; struct tty_driver; -- cgit v1.2.3-71-gd317 From e44dcb6c377529805bbaae505d5b333daab69111 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Fri, 12 Nov 2010 19:47:47 +0100 Subject: serial: mpc52xx: make printout for type more generic The printout for the type should be just "5xxx", so 512x users won't wonder why they have a mpc52xx-type UART. Signed-off-by: Wolfram Sang Cc: Grant Likely Cc: Greg Kroah-Hartman Signed-off-by: Greg Kroah-Hartman --- drivers/serial/mpc52xx_uart.c | 6 +++++- include/linux/serial_core.h | 2 +- 2 files changed, 6 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/drivers/serial/mpc52xx_uart.c b/drivers/serial/mpc52xx_uart.c index c4399e23565a..126ec7f568ec 100644 --- a/drivers/serial/mpc52xx_uart.c +++ b/drivers/serial/mpc52xx_uart.c @@ -838,7 +838,11 @@ mpc52xx_uart_set_termios(struct uart_port *port, struct ktermios *new, static const char * mpc52xx_uart_type(struct uart_port *port) { - return port->type == PORT_MPC52xx ? "MPC52xx PSC" : NULL; + /* + * We keep using PORT_MPC52xx for historic reasons although it applies + * for MPC512x, too, but print "MPC5xxx" to not irritate users + */ + return port->type == PORT_MPC52xx ? "MPC5xxx PSC" : NULL; } static void diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h index 41603d690433..9ff9b7db293b 100644 --- a/include/linux/serial_core.h +++ b/include/linux/serial_core.h @@ -95,7 +95,7 @@ /* PPC CPM type number */ #define PORT_CPM 58 -/* MPC52xx type numbers */ +/* MPC52xx (and MPC512x) type numbers */ #define PORT_MPC52xx 59 /* IBM icom */ -- cgit v1.2.3-71-gd317 From afe0cbf87500f0585d217deb8c6fd329793a7957 Mon Sep 17 00:00:00 2001 From: Bruno Randolf Date: Wed, 10 Nov 2010 12:50:50 +0900 Subject: cfg80211: Add nl80211 antenna configuration Allow setting of TX and RX antennas configuration via nl80211. The antenna configuration is defined as a bitmap of allowed antennas to use. This API can be used to mask out antennas which are not attached or should not be used for other reasons like regulatory concerns or special setups. Separate bitmaps are used for RX and TX to allow configuring different antennas for receiving and transmitting. Each bitmap is 32 bit long, each bit representing one antenna, starting with antenna 1 at the first bit. If an antenna bit is set, this means the driver is allowed to use this antenna for RX or TX respectively; if the bit is not set the hardware is not allowed to use this antenna. Using bitmaps has the benefit of allowing for a flexible configuration interface which can support many different configurations and which can be used for 802.11n as well as non-802.11n devices. Instead of relying on some hardware specific assumptions, drivers can use this information to know which antennas are actually attached to the system and derive their capabilities based on that. 802.11n devices should enable or disable chains, based on which antennas are present (If all antennas belonging to a particular chain are disabled, the entire chain should be disabled). HT capabilities (like STBC, TX Beamforming, Antenna selection) should be calculated based on the available chains after applying the antenna masks. Should a 802.11n device have diversity antennas attached to one of their chains, diversity can be enabled or disabled based on the antenna information. Non-802.11n drivers can use the antenna masks to select RX and TX antennas and to enable or disable antenna diversity. While covering chainmasks for 802.11n and the standard "legacy" modes "fixed antenna 1", "fixed antenna 2" and "diversity" this API also allows more rare, but useful configurations as follows: 1) Send on antenna 1, receive on antenna 2 (or vice versa). This can be used to have a low gain antenna for TX in order to keep within the regulatory constraints and a high gain antenna for RX in order to receive weaker signals ("speak softly, but listen harder"). This can be useful for building long-shot outdoor links. Another usage of this setup is having a low-noise pre-amplifier on antenna 1 and a power amplifier on the other antenna. This way transmit noise is mostly kept out of the low noise receive channel. (This would be bitmaps: tx 1 rx 2). 2) Another similar setup is: Use RX diversity on both antennas, but always send on antenna 1. Again that would allow us to benefit from a higher gain RX antenna, while staying within the legal limits. (This would be: tx 0 rx 3). 3) And finally there can be special experimental setups in research and development even with pre 802.11n hardware where more than 2 antennas are available. It's good to keep the API simple, yet flexible. Signed-off-by: Bruno Randolf -- v7: Made bitmasks 32 bit wide and rebased to latest wireless-testing. Signed-off-by: John W. Linville --- include/linux/nl80211.h | 25 +++++++++++++++++++++++++ include/net/cfg80211.h | 3 +++ net/wireless/nl80211.c | 31 ++++++++++++++++++++++++++++++- 3 files changed, 58 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h index fb877b5621b7..17c5c8849250 100644 --- a/include/linux/nl80211.h +++ b/include/linux/nl80211.h @@ -804,6 +804,28 @@ enum nl80211_commands { * @NL80211_ATTR_SUPPORT_IBSS_RSN: The device supports IBSS RSN, which mostly * means support for per-station GTKs. * + * @NL80211_ATTR_WIPHY_ANTENNA_TX: Bitmap of allowed antennas for transmitting. + * This can be used to mask out antennas which are not attached or should + * not be used for transmitting. If an antenna is not selected in this + * bitmap the hardware is not allowed to transmit on this antenna. + * + * Each bit represents one antenna, starting with antenna 1 at the first + * bit. Depending on which antennas are selected in the bitmap, 802.11n + * drivers can derive which chainmasks to use (if all antennas belonging to + * a particular chain are disabled this chain should be disabled) and if + * a chain has diversity antennas wether diversity should be used or not. + * HT capabilities (STBC, TX Beamforming, Antenna selection) can be + * derived from the available chains after applying the antenna mask. + * Non-802.11n drivers can derive wether to use diversity or not. + * Drivers may reject configurations or RX/TX mask combinations they cannot + * support by returning -EINVAL. + * + * @NL80211_ATTR_WIPHY_ANTENNA_RX: Bitmap of allowed antennas for receiving. + * This can be used to mask out antennas which are not attached or should + * not be used for receiving. If an antenna is not selected in this bitmap + * the hardware should not be configured to receive on this antenna. + * For a more detailed descripton see @NL80211_ATTR_WIPHY_ANTENNA_TX. + * * @NL80211_ATTR_MAX: highest attribute number currently defined * @__NL80211_ATTR_AFTER_LAST: internal use */ @@ -973,6 +995,9 @@ enum nl80211_attrs { NL80211_ATTR_SUPPORT_IBSS_RSN, + NL80211_ATTR_WIPHY_ANTENNA_TX, + NL80211_ATTR_WIPHY_ANTENNA_RX, + /* add attributes here, update the policy in nl80211.c */ __NL80211_ATTR_AFTER_LAST, diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index e5702f5ac57c..07425e648a09 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -1304,6 +1304,9 @@ struct cfg80211_ops { void (*mgmt_frame_register)(struct wiphy *wiphy, struct net_device *dev, u16 frame_type, bool reg); + + int (*set_antenna)(struct wiphy *wiphy, u32 tx_ant, u32 rx_ant); + int (*get_antenna)(struct wiphy *wiphy, u32 *tx_ant, u32 *rx_ant); }; /* diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index c506241f8637..5e4dda4c0fd3 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -166,7 +166,11 @@ static const struct nla_policy nl80211_policy[NL80211_ATTR_MAX+1] = { [NL80211_ATTR_WIPHY_TX_POWER_SETTING] = { .type = NLA_U32 }, [NL80211_ATTR_WIPHY_TX_POWER_LEVEL] = { .type = NLA_U32 }, + [NL80211_ATTR_FRAME_TYPE] = { .type = NLA_U16 }, + + [NL80211_ATTR_WIPHY_ANTENNA_TX] = { .type = NLA_U32 }, + [NL80211_ATTR_WIPHY_ANTENNA_RX] = { .type = NLA_U32 }, }; /* policy for the key attributes */ @@ -526,7 +530,6 @@ static int nl80211_send_wiphy(struct sk_buff *msg, u32 pid, u32 seq, int flags, dev->wiphy.rts_threshold); NLA_PUT_U8(msg, NL80211_ATTR_WIPHY_COVERAGE_CLASS, dev->wiphy.coverage_class); - NLA_PUT_U8(msg, NL80211_ATTR_MAX_NUM_SCAN_SSIDS, dev->wiphy.max_scan_ssids); NLA_PUT_U16(msg, NL80211_ATTR_MAX_SCAN_IE_LEN, @@ -545,6 +548,16 @@ static int nl80211_send_wiphy(struct sk_buff *msg, u32 pid, u32 seq, int flags, if (dev->wiphy.flags & WIPHY_FLAG_CONTROL_PORT_PROTOCOL) NLA_PUT_FLAG(msg, NL80211_ATTR_CONTROL_PORT_ETHERTYPE); + if (dev->ops->get_antenna) { + u32 tx_ant = 0, rx_ant = 0; + int res; + res = dev->ops->get_antenna(&dev->wiphy, &tx_ant, &rx_ant); + if (!res) { + NLA_PUT_U32(msg, NL80211_ATTR_WIPHY_ANTENNA_TX, tx_ant); + NLA_PUT_U32(msg, NL80211_ATTR_WIPHY_ANTENNA_RX, rx_ant); + } + } + nl_modes = nla_nest_start(msg, NL80211_ATTR_SUPPORTED_IFTYPES); if (!nl_modes) goto nla_put_failure; @@ -1024,6 +1037,22 @@ static int nl80211_set_wiphy(struct sk_buff *skb, struct genl_info *info) goto bad_res; } + if (info->attrs[NL80211_ATTR_WIPHY_ANTENNA_TX] && + info->attrs[NL80211_ATTR_WIPHY_ANTENNA_RX]) { + u32 tx_ant, rx_ant; + if (!rdev->ops->set_antenna) { + result = -EOPNOTSUPP; + goto bad_res; + } + + tx_ant = nla_get_u32(info->attrs[NL80211_ATTR_WIPHY_ANTENNA_TX]); + rx_ant = nla_get_u32(info->attrs[NL80211_ATTR_WIPHY_ANTENNA_RX]); + + result = rdev->ops->set_antenna(&rdev->wiphy, tx_ant, rx_ant); + if (result) + goto bad_res; + } + changed = 0; if (info->attrs[NL80211_ATTR_WIPHY_RETRY_SHORT]) { -- cgit v1.2.3-71-gd317 From 885a46d0f7942d76c2f3860acb45f75237d3bb42 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Thu, 11 Nov 2010 15:07:22 +0100 Subject: cfg80211: add support for setting the ad-hoc multicast rate Signed-off-by: Felix Fietkau Signed-off-by: John W. Linville --- include/linux/nl80211.h | 4 ++++ include/net/cfg80211.h | 2 ++ net/wireless/nl80211.c | 5 +++++ 3 files changed, 11 insertions(+) (limited to 'include/linux') diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h index 17c5c8849250..037b4e498890 100644 --- a/include/linux/nl80211.h +++ b/include/linux/nl80211.h @@ -826,6 +826,8 @@ enum nl80211_commands { * the hardware should not be configured to receive on this antenna. * For a more detailed descripton see @NL80211_ATTR_WIPHY_ANTENNA_TX. * + * @NL80211_ATTR_MCAST_RATE: Multicast tx rate (in 100 kbps) for IBSS + * * @NL80211_ATTR_MAX: highest attribute number currently defined * @__NL80211_ATTR_AFTER_LAST: internal use */ @@ -998,6 +1000,8 @@ enum nl80211_attrs { NL80211_ATTR_WIPHY_ANTENNA_TX, NL80211_ATTR_WIPHY_ANTENNA_RX, + NL80211_ATTR_MCAST_RATE, + /* add attributes here, update the policy in nl80211.c */ __NL80211_ATTR_AFTER_LAST, diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 07425e648a09..8fd9eebd0cc9 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -923,6 +923,7 @@ struct cfg80211_disassoc_request { * @privacy: this is a protected network, keys will be configured * after joining * @basic_rates: bitmap of basic rates to use when creating the IBSS + * @mcast_rate: multicast tx rate (in 100 kbps) */ struct cfg80211_ibss_params { u8 *ssid; @@ -934,6 +935,7 @@ struct cfg80211_ibss_params { u32 basic_rates; bool channel_fixed; bool privacy; + int mcast_rate; }; /** diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 5e4dda4c0fd3..605553842226 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -171,6 +171,8 @@ static const struct nla_policy nl80211_policy[NL80211_ATTR_MAX+1] = { [NL80211_ATTR_WIPHY_ANTENNA_TX] = { .type = NLA_U32 }, [NL80211_ATTR_WIPHY_ANTENNA_RX] = { .type = NLA_U32 }, + + [NL80211_ATTR_MCAST_RATE] = { .type = NLA_U32 }, }; /* policy for the key attributes */ @@ -3681,6 +3683,9 @@ static int nl80211_join_ibss(struct sk_buff *skb, struct genl_info *info) return -EINVAL; } } + if (info->attrs[NL80211_ATTR_MCAST_RATE]) + ibss.mcast_rate = + nla_get_u32(info->attrs[NL80211_ATTR_MCAST_RATE]); if (ibss.privacy && info->attrs[NL80211_ATTR_KEYS]) { connkeys = nl80211_parse_connkeys(rdev, -- cgit v1.2.3-71-gd317 From 6ddf27cdbc218a412d7e993fdc08e30eec2042ce Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Mon, 15 Nov 2010 15:57:30 -0500 Subject: USB: make usb_mark_last_busy use pm_runtime_mark_last_busy Since the runtime-PM core already defines a .last_busy field in device.power, this patch uses it to replace the .last_busy field defined in usb_device and uses pm_runtime_mark_last_busy to implement usb_mark_last_busy. Signed-off-by: Ming Lei Reviewed-by: Alan Stern Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/driver.c | 23 +++++++++++------------ drivers/usb/core/hcd-pci.c | 1 - drivers/usb/core/hcd.c | 1 - drivers/usb/core/hub.c | 1 - drivers/usb/core/message.c | 1 - include/linux/usb.h | 5 ++--- 6 files changed, 13 insertions(+), 19 deletions(-) (limited to 'include/linux') diff --git a/drivers/usb/core/driver.c b/drivers/usb/core/driver.c index eda2d2c25459..0a63e968c683 100644 --- a/drivers/usb/core/driver.c +++ b/drivers/usb/core/driver.c @@ -27,7 +27,6 @@ #include #include #include -#include #include "usb.h" @@ -1329,7 +1328,7 @@ int usb_resume(struct device *dev, pm_message_t msg) pm_runtime_disable(dev); pm_runtime_set_active(dev); pm_runtime_enable(dev); - udev->last_busy = jiffies; + usb_mark_last_busy(udev); do_unbind_rebind(udev, DO_REBIND); } } @@ -1397,7 +1396,7 @@ void usb_autosuspend_device(struct usb_device *udev) { int status; - udev->last_busy = jiffies; + usb_mark_last_busy(udev); status = pm_runtime_put_sync(&udev->dev); dev_vdbg(&udev->dev, "%s: cnt %d -> %d\n", __func__, atomic_read(&udev->dev.power.usage_count), @@ -1482,7 +1481,7 @@ void usb_autopm_put_interface(struct usb_interface *intf) struct usb_device *udev = interface_to_usbdev(intf); int status; - udev->last_busy = jiffies; + usb_mark_last_busy(udev); atomic_dec(&intf->pm_usage_cnt); status = pm_runtime_put_sync(&intf->dev); dev_vdbg(&intf->dev, "%s: cnt %d -> %d\n", @@ -1512,8 +1511,8 @@ void usb_autopm_put_interface_async(struct usb_interface *intf) unsigned long last_busy; int status = 0; - last_busy = udev->last_busy; - udev->last_busy = jiffies; + last_busy = udev->dev.power.last_busy; + usb_mark_last_busy(udev); atomic_dec(&intf->pm_usage_cnt); pm_runtime_put_noidle(&intf->dev); @@ -1554,7 +1553,7 @@ void usb_autopm_put_interface_no_suspend(struct usb_interface *intf) { struct usb_device *udev = interface_to_usbdev(intf); - udev->last_busy = jiffies; + usb_mark_last_busy(udev); atomic_dec(&intf->pm_usage_cnt); pm_runtime_put_noidle(&intf->dev); } @@ -1641,7 +1640,7 @@ void usb_autopm_get_interface_no_resume(struct usb_interface *intf) { struct usb_device *udev = interface_to_usbdev(intf); - udev->last_busy = jiffies; + usb_mark_last_busy(udev); atomic_inc(&intf->pm_usage_cnt); pm_runtime_get_noresume(&intf->dev); } @@ -1697,7 +1696,7 @@ static int autosuspend_check(struct usb_device *udev) * enough, queue a delayed autosuspend request. */ j = ACCESS_ONCE(jiffies); - suspend_time = udev->last_busy + udev->autosuspend_delay; + suspend_time = udev->dev.power.last_busy + udev->autosuspend_delay; if (time_before(j, suspend_time)) { pm_schedule_suspend(&udev->dev, jiffies_to_msecs( round_jiffies_up_relative(suspend_time - j))); @@ -1725,13 +1724,13 @@ static int usb_runtime_suspend(struct device *dev) * away. */ if (status) { - udev->last_busy = jiffies + + udev->dev.power.last_busy = jiffies + (udev->autosuspend_delay == 0 ? HZ/2 : 0); } /* Prevent the parent from suspending immediately after */ else if (udev->parent) - udev->parent->last_busy = jiffies; + usb_mark_last_busy(udev->parent); return status; } @@ -1745,7 +1744,7 @@ static int usb_runtime_resume(struct device *dev) * and all its interfaces. */ status = usb_resume_both(udev, PMSG_AUTO_RESUME); - udev->last_busy = jiffies; + usb_mark_last_busy(udev); return status; } diff --git a/drivers/usb/core/hcd-pci.c b/drivers/usb/core/hcd-pci.c index 3799573bd385..b55d46070a25 100644 --- a/drivers/usb/core/hcd-pci.c +++ b/drivers/usb/core/hcd-pci.c @@ -19,7 +19,6 @@ #include #include #include -#include #include #include diff --git a/drivers/usb/core/hcd.c b/drivers/usb/core/hcd.c index 61800f77dac8..e70aeaf3dc1a 100644 --- a/drivers/usb/core/hcd.c +++ b/drivers/usb/core/hcd.c @@ -38,7 +38,6 @@ #include #include #include -#include #include #include diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c index 27115b45edc5..7c2405eccc4b 100644 --- a/drivers/usb/core/hub.c +++ b/drivers/usb/core/hub.c @@ -24,7 +24,6 @@ #include #include #include -#include #include #include diff --git a/drivers/usb/core/message.c b/drivers/usb/core/message.c index f377e49fcb30..832487423826 100644 --- a/drivers/usb/core/message.c +++ b/drivers/usb/core/message.c @@ -12,7 +12,6 @@ #include #include #include -#include #include #include #include /* for usbcore internals */ diff --git a/include/linux/usb.h b/include/linux/usb.h index 35fe6ab222bb..7d22b3340a7f 100644 --- a/include/linux/usb.h +++ b/include/linux/usb.h @@ -20,6 +20,7 @@ #include /* for struct completion */ #include /* for current && schedule_timeout */ #include /* for struct mutex */ +#include /* for runtime PM */ struct usb_device; struct usb_driver; @@ -407,7 +408,6 @@ struct usb_tt; * @quirks: quirks of the whole device * @urbnum: number of URBs submitted for the whole device * @active_duration: total time device is not suspended - * @last_busy: time of last use * @autosuspend_delay: in jiffies * @connect_time: time device was first connected * @do_remote_wakeup: remote wakeup should be enabled @@ -481,7 +481,6 @@ struct usb_device { unsigned long active_duration; #ifdef CONFIG_PM - unsigned long last_busy; int autosuspend_delay; unsigned long connect_time; @@ -527,7 +526,7 @@ extern void usb_autopm_put_interface_no_suspend(struct usb_interface *intf); static inline void usb_mark_last_busy(struct usb_device *udev) { - udev->last_busy = jiffies; + pm_runtime_mark_last_busy(&udev->dev); } #else -- cgit v1.2.3-71-gd317 From fcc4a01eb8661226e80632327673f67bf6a5840b Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Mon, 15 Nov 2010 15:57:51 -0500 Subject: USB: use the runtime-PM autosuspend implementation This patch (as1428) converts USB over to the new runtime-PM core autosuspend framework. One slightly awkward aspect of the conversion is that USB devices will now have two suspend-delay attributes: the old power/autosuspend file and the new power/autosuspend_delay_ms file. One expresses the delay time in seconds and the other in milliseconds, but otherwise they do the same thing. The old attribute can be deprecated and then removed eventually. Signed-off-by: Alan Stern Signed-off-by: Greg Kroah-Hartman --- Documentation/usb/power-management.txt | 113 +++++++++++++++++---------------- drivers/usb/core/driver.c | 77 ++-------------------- drivers/usb/core/hub.c | 1 + drivers/usb/core/quirks.c | 6 -- drivers/usb/core/sysfs.c | 34 ++-------- drivers/usb/core/usb.c | 3 +- drivers/usb/core/usb.h | 2 - include/linux/usb.h | 2 - 8 files changed, 71 insertions(+), 167 deletions(-) (limited to 'include/linux') diff --git a/Documentation/usb/power-management.txt b/Documentation/usb/power-management.txt index b29d8e56cf28..c9ffa9ced7ee 100644 --- a/Documentation/usb/power-management.txt +++ b/Documentation/usb/power-management.txt @@ -2,7 +2,7 @@ Alan Stern - December 11, 2009 + October 28, 2010 @@ -107,9 +107,14 @@ allowed to issue dynamic suspends. The user interface for controlling dynamic PM is located in the power/ subdirectory of each USB device's sysfs directory, that is, in /sys/bus/usb/devices/.../power/ where "..." is the device's ID. The -relevant attribute files are: wakeup, control, and autosuspend. -(There may also be a file named "level"; this file was deprecated -as of the 2.6.35 kernel and replaced by the "control" file.) +relevant attribute files are: wakeup, control, and +autosuspend_delay_ms. (There may also be a file named "level"; this +file was deprecated as of the 2.6.35 kernel and replaced by the +"control" file. In 2.6.38 the "autosuspend" file will be deprecated +and replaced by the "autosuspend_delay_ms" file. The only difference +is that the newer file expresses the delay in milliseconds whereas the +older file uses seconds. Confusingly, both files are present in 2.6.37 +but only "autosuspend" works.) power/wakeup @@ -140,33 +145,36 @@ as of the 2.6.35 kernel and replaced by the "control" file.) suspended and autoresume was not allowed. This setting is no longer supported.) - power/autosuspend + power/autosuspend_delay_ms This file contains an integer value, which is the - number of seconds the device should remain idle before - the kernel will autosuspend it (the idle-delay time). - The default is 2. 0 means to autosuspend as soon as - the device becomes idle, and negative values mean - never to autosuspend. You can write a number to the - file to change the autosuspend idle-delay time. - -Writing "-1" to power/autosuspend and writing "on" to power/control do -essentially the same thing -- they both prevent the device from being -autosuspended. Yes, this is a redundancy in the API. + number of milliseconds the device should remain idle + before the kernel will autosuspend it (the idle-delay + time). The default is 2000. 0 means to autosuspend + as soon as the device becomes idle, and negative + values mean never to autosuspend. You can write a + number to the file to change the autosuspend + idle-delay time. + +Writing "-1" to power/autosuspend_delay_ms and writing "on" to +power/control do essentially the same thing -- they both prevent the +device from being autosuspended. Yes, this is a redundancy in the +API. (In 2.6.21 writing "0" to power/autosuspend would prevent the device from being autosuspended; the behavior was changed in 2.6.22. The power/autosuspend attribute did not exist prior to 2.6.21, and the power/level attribute did not exist prior to 2.6.22. power/control -was added in 2.6.34.) +was added in 2.6.34, and power/autosuspend_delay_ms was added in +2.6.37 but did not become functional until 2.6.38.) Changing the default idle-delay time ------------------------------------ -The default autosuspend idle-delay time is controlled by a module -parameter in usbcore. You can specify the value when usbcore is -loaded. For example, to set it to 5 seconds instead of 2 you would +The default autosuspend idle-delay time (in seconds) is controlled by +a module parameter in usbcore. You can specify the value when usbcore +is loaded. For example, to set it to 5 seconds instead of 2 you would do: modprobe usbcore autosuspend=5 @@ -234,25 +242,23 @@ every device. If a driver knows that its device has proper suspend/resume support, it can enable autosuspend all by itself. For example, the video -driver for a laptop's webcam might do this, since these devices are -rarely used and so should normally be autosuspended. +driver for a laptop's webcam might do this (in recent kernels they +do), since these devices are rarely used and so should normally be +autosuspended. Sometimes it turns out that even when a device does work okay with -autosuspend there are still problems. For example, there are -experimental patches adding autosuspend support to the usbhid driver, -which manages keyboards and mice, among other things. Tests with a -number of keyboards showed that typing on a suspended keyboard, while -causing the keyboard to do a remote wakeup all right, would -nonetheless frequently result in lost keystrokes. Tests with mice -showed that some of them would issue a remote-wakeup request in -response to button presses but not to motion, and some in response to -neither. +autosuspend there are still problems. For example, the usbhid driver, +which manages keyboards and mice, has autosuspend support. Tests with +a number of keyboards show that typing on a suspended keyboard, while +causing the keyboard to do a remote wakeup all right, will nonetheless +frequently result in lost keystrokes. Tests with mice show that some +of them will issue a remote-wakeup request in response to button +presses but not to motion, and some in response to neither. The kernel will not prevent you from enabling autosuspend on devices that can't handle it. It is even possible in theory to damage a -device by suspending it at the wrong time -- for example, suspending a -USB hard disk might cause it to spin down without parking the heads. -(Highly unlikely, but possible.) Take care. +device by suspending it at the wrong time. (Highly unlikely, but +possible.) Take care. The driver interface for Power Management @@ -336,10 +342,6 @@ autosuspend the interface's device. When the usage counter is = 0 then the interface is considered to be idle, and the kernel may autosuspend the device. -(There is a similar usage counter field in struct usb_device, -associated with the device itself rather than any of its interfaces. -This counter is used only by the USB core.) - Drivers need not be concerned about balancing changes to the usage counter; the USB core will undo any remaining "get"s when a driver is unbound from its interface. As a corollary, drivers must not call @@ -409,11 +411,11 @@ during autosuspend. For example, there's not much point autosuspending a keyboard if the user can't cause the keyboard to do a remote wakeup by typing on it. If the driver sets intf->needs_remote_wakeup to 1, the kernel won't autosuspend the -device if remote wakeup isn't available or has been disabled through -the power/wakeup attribute. (If the device is already autosuspended, -though, setting this flag won't cause the kernel to autoresume it. -Normally a driver would set this flag in its probe method, at which -time the device is guaranteed not to be autosuspended.) +device if remote wakeup isn't available. (If the device is already +autosuspended, though, setting this flag won't cause the kernel to +autoresume it. Normally a driver would set this flag in its probe +method, at which time the device is guaranteed not to be +autosuspended.) If a driver does its I/O asynchronously in interrupt context, it should call usb_autopm_get_interface_async() before starting output and @@ -422,20 +424,19 @@ it receives an input event, it should call usb_mark_last_busy(struct usb_device *udev); -in the event handler. This sets udev->last_busy to the current time. -udev->last_busy is the field used for idle-delay calculations; -updating it will cause any pending autosuspend to be moved back. Most -of the usb_autopm_* routines will also set the last_busy field to the -current time. +in the event handler. This tells the PM core that the device was just +busy and therefore the next autosuspend idle-delay expiration should +be pushed back. Many of the usb_autopm_* routines also make this call, +so drivers need to worry only when interrupt-driven input arrives. Asynchronous operation is always subject to races. For example, a -driver may call one of the usb_autopm_*_interface_async() routines at -a time when the core has just finished deciding the device has been -idle for long enough but not yet gotten around to calling the driver's -suspend method. The suspend method must be responsible for -synchronizing with the output request routine and the URB completion -handler; it should cause autosuspends to fail with -EBUSY if the -driver needs to use the device. +driver may call the usb_autopm_get_interface_async() routine at a time +when the core has just finished deciding the device has been idle for +long enough but not yet gotten around to calling the driver's suspend +method. The suspend method must be responsible for synchronizing with +the I/O request routine and the URB completion handler; it should +cause autosuspends to fail with -EBUSY if the driver needs to use the +device. External suspend calls should never be allowed to fail in this way, only autosuspend calls. The driver can tell them apart by checking @@ -472,7 +473,9 @@ Firstly, a device may already be autosuspended when a system suspend occurs. Since system suspends are supposed to be as transparent as possible, the device should remain suspended following the system resume. But this theory may not work out well in practice; over time -the kernel's behavior in this regard has changed. +the kernel's behavior in this regard has changed. As of 2.6.37 the +policy is to resume all devices during a system resume and let them +handle their own runtime suspends afterward. Secondly, a dynamic power-management event may occur as a system suspend is underway. The window for this is short, since system diff --git a/drivers/usb/core/driver.c b/drivers/usb/core/driver.c index 0a63e968c683..43c25c29ac1f 100644 --- a/drivers/usb/core/driver.c +++ b/drivers/usb/core/driver.c @@ -1397,32 +1397,7 @@ void usb_autosuspend_device(struct usb_device *udev) int status; usb_mark_last_busy(udev); - status = pm_runtime_put_sync(&udev->dev); - dev_vdbg(&udev->dev, "%s: cnt %d -> %d\n", - __func__, atomic_read(&udev->dev.power.usage_count), - status); -} - -/** - * usb_try_autosuspend_device - attempt an autosuspend of a USB device and its interfaces - * @udev: the usb_device to autosuspend - * - * This routine should be called when a core subsystem thinks @udev may - * be ready to autosuspend. - * - * @udev's usage counter left unchanged. If it is 0 and all the interfaces - * are inactive then an autosuspend will be attempted. The attempt may - * fail or be delayed. - * - * The caller must hold @udev's device lock. - * - * This routine can run only in process context. - */ -void usb_try_autosuspend_device(struct usb_device *udev) -{ - int status; - - status = pm_runtime_idle(&udev->dev); + status = pm_runtime_put_sync_autosuspend(&udev->dev); dev_vdbg(&udev->dev, "%s: cnt %d -> %d\n", __func__, atomic_read(&udev->dev.power.usage_count), status); @@ -1508,32 +1483,11 @@ EXPORT_SYMBOL_GPL(usb_autopm_put_interface); void usb_autopm_put_interface_async(struct usb_interface *intf) { struct usb_device *udev = interface_to_usbdev(intf); - unsigned long last_busy; - int status = 0; + int status; - last_busy = udev->dev.power.last_busy; usb_mark_last_busy(udev); atomic_dec(&intf->pm_usage_cnt); - pm_runtime_put_noidle(&intf->dev); - - if (udev->dev.power.runtime_auto) { - /* Optimization: Don't schedule a delayed autosuspend if - * the timer is already running and the expiration time - * wouldn't change. - * - * We have to use the interface's timer. Attempts to - * schedule a suspend for the device would fail because - * the interface is still active. - */ - if (intf->dev.power.timer_expires == 0 || - round_jiffies_up(last_busy) != - round_jiffies_up(jiffies)) { - status = pm_schedule_suspend(&intf->dev, - jiffies_to_msecs( - round_jiffies_up_relative( - udev->autosuspend_delay))); - } - } + status = pm_runtime_put(&intf->dev); dev_vdbg(&intf->dev, "%s: cnt %d -> %d\n", __func__, atomic_read(&intf->dev.power.usage_count), status); @@ -1651,7 +1605,6 @@ static int autosuspend_check(struct usb_device *udev) { int w, i; struct usb_interface *intf; - unsigned long suspend_time, j; /* Fail if autosuspend is disabled, or any interfaces are in use, or * any interface drivers require remote wakeup but it isn't available. @@ -1691,17 +1644,6 @@ static int autosuspend_check(struct usb_device *udev) return -EOPNOTSUPP; } udev->do_remote_wakeup = w; - - /* If everything is okay but the device hasn't been idle for long - * enough, queue a delayed autosuspend request. - */ - j = ACCESS_ONCE(jiffies); - suspend_time = udev->dev.power.last_busy + udev->autosuspend_delay; - if (time_before(j, suspend_time)) { - pm_schedule_suspend(&udev->dev, jiffies_to_msecs( - round_jiffies_up_relative(suspend_time - j))); - return -EAGAIN; - } return 0; } @@ -1719,17 +1661,8 @@ static int usb_runtime_suspend(struct device *dev) status = usb_suspend_both(udev, PMSG_AUTO_SUSPEND); - /* If an interface fails the suspend, adjust the last_busy - * time so that we don't get another suspend attempt right - * away. - */ - if (status) { - udev->dev.power.last_busy = jiffies + - (udev->autosuspend_delay == 0 ? HZ/2 : 0); - } - /* Prevent the parent from suspending immediately after */ - else if (udev->parent) + if (status == 0 && udev->parent) usb_mark_last_busy(udev->parent); return status; @@ -1756,7 +1689,7 @@ static int usb_runtime_idle(struct device *dev) * autosuspend checks. */ if (autosuspend_check(udev) == 0) - pm_runtime_suspend(dev); + pm_runtime_autosuspend(dev); return 0; } diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c index 7c2405eccc4b..fdb62ca10d86 100644 --- a/drivers/usb/core/hub.c +++ b/drivers/usb/core/hub.c @@ -1803,6 +1803,7 @@ int usb_new_device(struct usb_device *udev) /* Tell the runtime-PM framework the device is active */ pm_runtime_set_active(&udev->dev); + pm_runtime_use_autosuspend(&udev->dev); pm_runtime_enable(&udev->dev); err = usb_enumerate_device(udev); /* Read descriptors */ diff --git a/drivers/usb/core/quirks.c b/drivers/usb/core/quirks.c index 25719da45e33..e3531da16137 100644 --- a/drivers/usb/core/quirks.c +++ b/drivers/usb/core/quirks.c @@ -124,12 +124,6 @@ void usb_detect_quirks(struct usb_device *udev) */ usb_disable_autosuspend(udev); - /* Autosuspend can also be disabled if the initial autosuspend_delay - * is negative. - */ - if (udev->autosuspend_delay < 0) - usb_autoresume_device(udev); - #endif /* For the present, all devices default to USB-PERSIST enabled */ diff --git a/drivers/usb/core/sysfs.c b/drivers/usb/core/sysfs.c index 9561e087907d..6781c369ce2d 100644 --- a/drivers/usb/core/sysfs.c +++ b/drivers/usb/core/sysfs.c @@ -334,44 +334,20 @@ static DEVICE_ATTR(active_duration, S_IRUGO, show_active_duration, NULL); static ssize_t show_autosuspend(struct device *dev, struct device_attribute *attr, char *buf) { - struct usb_device *udev = to_usb_device(dev); - - return sprintf(buf, "%d\n", udev->autosuspend_delay / HZ); + return sprintf(buf, "%d\n", dev->power.autosuspend_delay / 1000); } static ssize_t set_autosuspend(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { - struct usb_device *udev = to_usb_device(dev); - int value, old_delay; - int rc; + int value; - if (sscanf(buf, "%d", &value) != 1 || value >= INT_MAX/HZ || - value <= - INT_MAX/HZ) + if (sscanf(buf, "%d", &value) != 1 || value >= INT_MAX/1000 || + value <= -INT_MAX/1000) return -EINVAL; - value *= HZ; - - usb_lock_device(udev); - old_delay = udev->autosuspend_delay; - udev->autosuspend_delay = value; - - if (old_delay < 0) { /* Autosuspend wasn't allowed */ - if (value >= 0) - usb_autosuspend_device(udev); - } else { /* Autosuspend was allowed */ - if (value < 0) { - rc = usb_autoresume_device(udev); - if (rc < 0) { - count = rc; - udev->autosuspend_delay = old_delay; - } - } else { - usb_try_autosuspend_device(udev); - } - } - usb_unlock_device(udev); + pm_runtime_set_autosuspend_delay(dev, value * 1000); return count; } diff --git a/drivers/usb/core/usb.c b/drivers/usb/core/usb.c index fdd4130fbb7d..079cb57bab4f 100644 --- a/drivers/usb/core/usb.c +++ b/drivers/usb/core/usb.c @@ -445,7 +445,8 @@ struct usb_device *usb_alloc_dev(struct usb_device *parent, INIT_LIST_HEAD(&dev->filelist); #ifdef CONFIG_PM - dev->autosuspend_delay = usb_autosuspend_delay * HZ; + pm_runtime_set_autosuspend_delay(&dev->dev, + usb_autosuspend_delay * 1000); dev->connect_time = jiffies; dev->active_duration = -jiffies; #endif diff --git a/drivers/usb/core/usb.h b/drivers/usb/core/usb.h index cd882203ad34..b975450f403e 100644 --- a/drivers/usb/core/usb.h +++ b/drivers/usb/core/usb.h @@ -75,14 +75,12 @@ static inline int usb_port_resume(struct usb_device *udev, pm_message_t msg) #ifdef CONFIG_USB_SUSPEND extern void usb_autosuspend_device(struct usb_device *udev); -extern void usb_try_autosuspend_device(struct usb_device *udev); extern int usb_autoresume_device(struct usb_device *udev); extern int usb_remote_wakeup(struct usb_device *dev); #else #define usb_autosuspend_device(udev) do {} while (0) -#define usb_try_autosuspend_device(udev) do {} while (0) static inline int usb_autoresume_device(struct usb_device *udev) { return 0; diff --git a/include/linux/usb.h b/include/linux/usb.h index 7d22b3340a7f..5ee2223af08c 100644 --- a/include/linux/usb.h +++ b/include/linux/usb.h @@ -408,7 +408,6 @@ struct usb_tt; * @quirks: quirks of the whole device * @urbnum: number of URBs submitted for the whole device * @active_duration: total time device is not suspended - * @autosuspend_delay: in jiffies * @connect_time: time device was first connected * @do_remote_wakeup: remote wakeup should be enabled * @reset_resume: needs reset instead of resume @@ -481,7 +480,6 @@ struct usb_device { unsigned long active_duration; #ifdef CONFIG_PM - int autosuspend_delay; unsigned long connect_time; unsigned do_remote_wakeup:1; -- cgit v1.2.3-71-gd317 From da6836500414ae734cd9873c2d553db594f831e9 Mon Sep 17 00:00:00 2001 From: Eric Paris Date: Tue, 16 Nov 2010 11:52:38 +0000 Subject: netfilter: allow hooks to pass error code back up the stack SELinux would like to pass certain fatal errors back up the stack. This patch implements the generic netfilter support for this functionality. Based-on-patch-by: Patrick McHardy Signed-off-by: Eric Paris Signed-off-by: David S. Miller --- include/linux/netfilter.h | 2 ++ net/netfilter/core.c | 6 ++++-- 2 files changed, 6 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h index 03317c8d4077..1893837b3966 100644 --- a/include/linux/netfilter.h +++ b/include/linux/netfilter.h @@ -33,6 +33,8 @@ #define NF_QUEUE_NR(x) ((((x) << NF_VERDICT_BITS) & NF_VERDICT_QMASK) | NF_QUEUE) +#define NF_DROP_ERR(x) (((-x) << NF_VERDICT_BITS) | NF_DROP) + /* only for userspace compatibility */ #ifndef __KERNEL__ /* Generic cache responses from hook functions. diff --git a/net/netfilter/core.c b/net/netfilter/core.c index 85dabb86be6f..32fcbe290c04 100644 --- a/net/netfilter/core.c +++ b/net/netfilter/core.c @@ -173,9 +173,11 @@ next_hook: outdev, &elem, okfn, hook_thresh); if (verdict == NF_ACCEPT || verdict == NF_STOP) { ret = 1; - } else if (verdict == NF_DROP) { + } else if ((verdict & NF_VERDICT_MASK) == NF_DROP) { kfree_skb(skb); - ret = -EPERM; + ret = -(verdict >> NF_VERDICT_BITS); + if (ret == 0) + ret = -EPERM; } else if ((verdict & NF_VERDICT_MASK) == NF_QUEUE) { if (!nf_queue(skb, elem, pf, hook, indev, outdev, okfn, verdict >> NF_VERDICT_BITS)) -- cgit v1.2.3-71-gd317 From f8ff182c716c6f11ca3061961f5722f26a14e101 Mon Sep 17 00:00:00 2001 From: Thomas Graf Date: Tue, 16 Nov 2010 04:30:14 +0000 Subject: rtnetlink: Link address family API Each net_device contains address family specific data such as per device settings and statistics. We already expose this data via procfs/sysfs and partially netlink. The netlink method requires the requester to send one RTM_GETLINK request for each address family it wishes to receive data of and then merge this data itself. This patch implements a new API which combines all address family specific link data in a new netlink attribute IFLA_AF_SPEC. IFLA_AF_SPEC contains a sequence of nested attributes, one for each address family which in turn defines the structure of its own attribute. Example: [IFLA_AF_SPEC] = { [AF_INET] = { [IFLA_INET_CONF] = ..., }, [AF_INET6] = { [IFLA_INET6_FLAGS] = ..., [IFLA_INET6_CONF] = ..., } } The API also allows for address families to implement a function which parses the IFLA_AF_SPEC attribute sent by userspace to implement address family specific link options. Signed-off-by: Thomas Graf Signed-off-by: David S. Miller --- include/linux/if_link.h | 19 +++++++ include/net/rtnetlink.h | 31 ++++++++++ net/core/rtnetlink.c | 147 +++++++++++++++++++++++++++++++++++++++++++++++- 3 files changed, 195 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/if_link.h b/include/linux/if_link.h index 2fc66dd783ee..443d04a66a79 100644 --- a/include/linux/if_link.h +++ b/include/linux/if_link.h @@ -80,6 +80,24 @@ struct rtnl_link_ifmap { __u8 port; }; +/* + * IFLA_AF_SPEC + * Contains nested attributes for address family specific attributes. + * Each address family may create a attribute with the address family + * number as type and create its own attribute structure in it. + * + * Example: + * [IFLA_AF_SPEC] = { + * [AF_INET] = { + * [IFLA_INET_CONF] = ..., + * }, + * [AF_INET6] = { + * [IFLA_INET6_FLAGS] = ..., + * [IFLA_INET6_CONF] = ..., + * } + * } + */ + enum { IFLA_UNSPEC, IFLA_ADDRESS, @@ -116,6 +134,7 @@ enum { IFLA_STATS64, IFLA_VF_PORTS, IFLA_PORT_SELF, + IFLA_AF_SPEC, __IFLA_MAX }; diff --git a/include/net/rtnetlink.h b/include/net/rtnetlink.h index e013c68bfb00..35be0bbcd7da 100644 --- a/include/net/rtnetlink.h +++ b/include/net/rtnetlink.h @@ -83,6 +83,37 @@ extern void __rtnl_link_unregister(struct rtnl_link_ops *ops); extern int rtnl_link_register(struct rtnl_link_ops *ops); extern void rtnl_link_unregister(struct rtnl_link_ops *ops); +/** + * struct rtnl_af_ops - rtnetlink address family operations + * + * @list: Used internally + * @family: Address family + * @fill_link_af: Function to fill IFLA_AF_SPEC with address family + * specific netlink attributes. + * @get_link_af_size: Function to calculate size of address family specific + * netlink attributes exlusive the container attribute. + * @parse_link_af: Function to parse a IFLA_AF_SPEC attribute and modify + * net_device accordingly. + */ +struct rtnl_af_ops { + struct list_head list; + int family; + + int (*fill_link_af)(struct sk_buff *skb, + const struct net_device *dev); + size_t (*get_link_af_size)(const struct net_device *dev); + + int (*parse_link_af)(struct net_device *dev, + const struct nlattr *attr); +}; + +extern int __rtnl_af_register(struct rtnl_af_ops *ops); +extern void __rtnl_af_unregister(struct rtnl_af_ops *ops); + +extern int rtnl_af_register(struct rtnl_af_ops *ops); +extern void rtnl_af_unregister(struct rtnl_af_ops *ops); + + extern struct net *rtnl_link_get_net(struct net *src_net, struct nlattr *tb[]); extern struct net_device *rtnl_create_link(struct net *src_net, struct net *net, char *ifname, const struct rtnl_link_ops *ops, struct nlattr *tb[]); diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 841c287ef40a..bf69e5871b1a 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -362,6 +362,95 @@ static size_t rtnl_link_get_size(const struct net_device *dev) return size; } +static LIST_HEAD(rtnl_af_ops); + +static const struct rtnl_af_ops *rtnl_af_lookup(const int family) +{ + const struct rtnl_af_ops *ops; + + list_for_each_entry(ops, &rtnl_af_ops, list) { + if (ops->family == family) + return ops; + } + + return NULL; +} + +/** + * __rtnl_af_register - Register rtnl_af_ops with rtnetlink. + * @ops: struct rtnl_af_ops * to register + * + * The caller must hold the rtnl_mutex. + * + * Returns 0 on success or a negative error code. + */ +int __rtnl_af_register(struct rtnl_af_ops *ops) +{ + list_add_tail(&ops->list, &rtnl_af_ops); + return 0; +} +EXPORT_SYMBOL_GPL(__rtnl_af_register); + +/** + * rtnl_af_register - Register rtnl_af_ops with rtnetlink. + * @ops: struct rtnl_af_ops * to register + * + * Returns 0 on success or a negative error code. + */ +int rtnl_af_register(struct rtnl_af_ops *ops) +{ + int err; + + rtnl_lock(); + err = __rtnl_af_register(ops); + rtnl_unlock(); + return err; +} +EXPORT_SYMBOL_GPL(rtnl_af_register); + +/** + * __rtnl_af_unregister - Unregister rtnl_af_ops from rtnetlink. + * @ops: struct rtnl_af_ops * to unregister + * + * The caller must hold the rtnl_mutex. + */ +void __rtnl_af_unregister(struct rtnl_af_ops *ops) +{ + list_del(&ops->list); +} +EXPORT_SYMBOL_GPL(__rtnl_af_unregister); + +/** + * rtnl_af_unregister - Unregister rtnl_af_ops from rtnetlink. + * @ops: struct rtnl_af_ops * to unregister + */ +void rtnl_af_unregister(struct rtnl_af_ops *ops) +{ + rtnl_lock(); + __rtnl_af_unregister(ops); + rtnl_unlock(); +} +EXPORT_SYMBOL_GPL(rtnl_af_unregister); + +static size_t rtnl_link_get_af_size(const struct net_device *dev) +{ + struct rtnl_af_ops *af_ops; + size_t size; + + /* IFLA_AF_SPEC */ + size = nla_total_size(sizeof(struct nlattr)); + + list_for_each_entry(af_ops, &rtnl_af_ops, list) { + if (af_ops->get_link_af_size) { + /* AF_* + nested data */ + size += nla_total_size(sizeof(struct nlattr)) + + af_ops->get_link_af_size(dev); + } + } + + return size; +} + static int rtnl_link_fill(struct sk_buff *skb, const struct net_device *dev) { const struct rtnl_link_ops *ops = dev->rtnl_link_ops; @@ -671,7 +760,8 @@ static noinline size_t if_nlmsg_size(const struct net_device *dev) + nla_total_size(4) /* IFLA_NUM_VF */ + rtnl_vfinfo_size(dev) /* IFLA_VFINFO_LIST */ + rtnl_port_size(dev) /* IFLA_VF_PORTS + IFLA_PORT_SELF */ - + rtnl_link_get_size(dev); /* IFLA_LINKINFO */ + + rtnl_link_get_size(dev) /* IFLA_LINKINFO */ + + rtnl_link_get_af_size(dev); /* IFLA_AF_SPEC */ } static int rtnl_vf_ports_fill(struct sk_buff *skb, struct net_device *dev) @@ -757,7 +847,8 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, struct nlmsghdr *nlh; struct rtnl_link_stats64 temp; const struct rtnl_link_stats64 *stats; - struct nlattr *attr; + struct nlattr *attr, *af_spec; + struct rtnl_af_ops *af_ops; nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ifm), flags); if (nlh == NULL) @@ -866,6 +957,36 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, goto nla_put_failure; } + if (!(af_spec = nla_nest_start(skb, IFLA_AF_SPEC))) + goto nla_put_failure; + + list_for_each_entry(af_ops, &rtnl_af_ops, list) { + if (af_ops->fill_link_af) { + struct nlattr *af; + int err; + + if (!(af = nla_nest_start(skb, af_ops->family))) + goto nla_put_failure; + + err = af_ops->fill_link_af(skb, dev); + + /* + * Caller may return ENODATA to indicate that there + * was no data to be dumped. This is not an error, it + * means we should trim the attribute header and + * continue. + */ + if (err == -ENODATA) + nla_nest_cancel(skb, af); + else if (err < 0) + goto nla_put_failure; + + nla_nest_end(skb, af); + } + } + + nla_nest_end(skb, af_spec); + return nlmsg_end(skb, nlh); nla_put_failure: @@ -924,6 +1045,7 @@ const struct nla_policy ifla_policy[IFLA_MAX+1] = { [IFLA_VFINFO_LIST] = {. type = NLA_NESTED }, [IFLA_VF_PORTS] = { .type = NLA_NESTED }, [IFLA_PORT_SELF] = { .type = NLA_NESTED }, + [IFLA_AF_SPEC] = { .type = NLA_NESTED }, }; EXPORT_SYMBOL(ifla_policy); @@ -1225,6 +1347,27 @@ static int do_setlink(struct net_device *dev, struct ifinfomsg *ifm, goto errout; modified = 1; } + + if (tb[IFLA_AF_SPEC]) { + struct nlattr *af; + int rem; + + nla_for_each_nested(af, tb[IFLA_AF_SPEC], rem) { + const struct rtnl_af_ops *af_ops; + + if (!(af_ops = rtnl_af_lookup(nla_type(af)))) + continue; + + if (!af_ops->parse_link_af) + continue; + + err = af_ops->parse_link_af(dev, af); + if (err < 0) + goto errout; + + modified = 1; + } + } err = 0; errout: -- cgit v1.2.3-71-gd317 From ca7479ebbd9f7621646bf2792cb7143647f035bb Mon Sep 17 00:00:00 2001 From: Thomas Graf Date: Tue, 16 Nov 2010 04:31:20 +0000 Subject: inet: Define IPV4_DEVCONF_MAX Define IPV4_DEVCONF_MAX to get rid of MAX - 1 notation. Signed-off-by: Thomas Graf Signed-off-by: David S. Miller --- include/linux/inetdevice.h | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/inetdevice.h b/include/linux/inetdevice.h index 380ba6bc5db1..2b86eaf11773 100644 --- a/include/linux/inetdevice.h +++ b/include/linux/inetdevice.h @@ -41,10 +41,12 @@ enum __IPV4_DEVCONF_MAX }; +#define IPV4_DEVCONF_MAX (__IPV4_DEVCONF_MAX - 1) + struct ipv4_devconf { void *sysctl; - int data[__IPV4_DEVCONF_MAX - 1]; - DECLARE_BITMAP(state, __IPV4_DEVCONF_MAX - 1); + int data[IPV4_DEVCONF_MAX]; + DECLARE_BITMAP(state, IPV4_DEVCONF_MAX); }; struct in_device { @@ -90,7 +92,7 @@ static inline void ipv4_devconf_set(struct in_device *in_dev, int index, static inline void ipv4_devconf_setall(struct in_device *in_dev) { - bitmap_fill(in_dev->cnf.state, __IPV4_DEVCONF_MAX - 1); + bitmap_fill(in_dev->cnf.state, IPV4_DEVCONF_MAX); } #define IN_DEV_CONF_GET(in_dev, attr) \ -- cgit v1.2.3-71-gd317 From 9f0f7272ac9506f4c8c05cc597b7e376b0b9f3e4 Mon Sep 17 00:00:00 2001 From: Thomas Graf Date: Tue, 16 Nov 2010 04:32:48 +0000 Subject: ipv4: AF_INET link address family Implements the AF_INET link address family exposing the per device configuration settings via netlink using the attribute IFLA_INET_CONF. The format of IFLA_INET_CONF differs depending on the direction the attribute is sent. The attribute sent by the kernel consists of a u32 array, basically a 1:1 copy of in_device->cnf.data[]. The attribute expected by the kernel must consist of a sequence of nested u32 attributes, each representing a change request, e.g. [IFLA_INET_CONF] = { [IPV4_DEVCONF_FORWARDING] = 1, [IPV4_DEVCONF_NOXFRM] = 0, } libnl userspace API documentation and example available from: http://www.infradead.org/~tgr/libnl/doc-git/group__link__inet.html Signed-off-by: Thomas Graf Signed-off-by: David S. Miller --- include/linux/if_link.h | 8 ++++++ net/ipv4/devinet.c | 75 +++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 83 insertions(+) (limited to 'include/linux') diff --git a/include/linux/if_link.h b/include/linux/if_link.h index 443d04a66a79..2e02e4d7b11e 100644 --- a/include/linux/if_link.h +++ b/include/linux/if_link.h @@ -147,6 +147,14 @@ enum { #define IFLA_PAYLOAD(n) NLMSG_PAYLOAD(n,sizeof(struct ifinfomsg)) #endif +enum { + IFLA_INET_UNSPEC, + IFLA_INET_CONF, + __IFLA_INET_MAX, +}; + +#define IFLA_INET_MAX (__IFLA_INET_MAX - 1) + /* ifi_flags. IFF_* flags. diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index dc94b0316b78..71afc26c2df8 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -1256,6 +1256,72 @@ errout: rtnl_set_sk_err(net, RTNLGRP_IPV4_IFADDR, err); } +static size_t inet_get_link_af_size(const struct net_device *dev) +{ + struct in_device *in_dev = __in_dev_get_rcu(dev); + + if (!in_dev) + return 0; + + return nla_total_size(IPV4_DEVCONF_MAX * 4); /* IFLA_INET_CONF */ +} + +static int inet_fill_link_af(struct sk_buff *skb, const struct net_device *dev) +{ + struct in_device *in_dev = __in_dev_get_rcu(dev); + struct nlattr *nla; + int i; + + if (!in_dev) + return -ENODATA; + + nla = nla_reserve(skb, IFLA_INET_CONF, IPV4_DEVCONF_MAX * 4); + if (nla == NULL) + return -EMSGSIZE; + + for (i = 0; i < IPV4_DEVCONF_MAX; i++) + ((u32 *) nla_data(nla))[i] = in_dev->cnf.data[i]; + + return 0; +} + +static const struct nla_policy inet_af_policy[IFLA_INET_MAX+1] = { + [IFLA_INET_CONF] = { .type = NLA_NESTED }, +}; + +static int inet_parse_link_af(struct net_device *dev, const struct nlattr *nla) +{ + struct in_device *in_dev = __in_dev_get_rcu(dev); + struct nlattr *a, *tb[IFLA_INET_MAX+1]; + int err, rem; + + if (!in_dev) + return -EOPNOTSUPP; + + err = nla_parse_nested(tb, IFLA_INET_MAX, nla, inet_af_policy); + if (err < 0) + return err; + + if (tb[IFLA_INET_CONF]) { + nla_for_each_nested(a, tb[IFLA_INET_CONF], rem) { + int cfgid = nla_type(a); + + if (nla_len(a) < 4) + return -EINVAL; + + if (cfgid <= 0 || cfgid > IPV4_DEVCONF_MAX) + return -EINVAL; + } + } + + if (tb[IFLA_INET_CONF]) { + nla_for_each_nested(a, tb[IFLA_INET_CONF], rem) + ipv4_devconf_set(in_dev, nla_type(a), nla_get_u32(a)); + } + + return 0; +} + #ifdef CONFIG_SYSCTL static void devinet_copy_dflt_conf(struct net *net, int i) @@ -1619,6 +1685,13 @@ static __net_initdata struct pernet_operations devinet_ops = { .exit = devinet_exit_net, }; +static struct rtnl_af_ops inet_af_ops = { + .family = AF_INET, + .fill_link_af = inet_fill_link_af, + .get_link_af_size = inet_get_link_af_size, + .parse_link_af = inet_parse_link_af, +}; + void __init devinet_init(void) { register_pernet_subsys(&devinet_ops); @@ -1626,6 +1699,8 @@ void __init devinet_init(void) register_gifconf(PF_INET, inet_gifconf); register_netdevice_notifier(&ip_netdev_notifier); + rtnl_af_register(&inet_af_ops); + rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL); rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL); rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr); -- cgit v1.2.3-71-gd317 From 6b6e39a6a8da7234c538d14c43d3583da8875f9c Mon Sep 17 00:00:00 2001 From: Kay Sievers Date: Mon, 15 Nov 2010 23:13:18 +0100 Subject: driver-core: merge private parts of class and bus As classes and busses are pretty much the same thing, and we want to merge them together into a 'subsystem' in the future, let us share the same private data parts to make that merge easier. Signed-off-by: Kay Sievers Signed-off-by: Greg Kroah-Hartman --- drivers/base/base.h | 62 ++++++++++++++++++++------------------------------ drivers/base/bus.c | 13 +++++------ drivers/base/class.c | 38 +++++++++++++++---------------- drivers/base/core.c | 22 +++++++++--------- include/linux/device.h | 7 +++--- 5 files changed, 64 insertions(+), 78 deletions(-) (limited to 'include/linux') diff --git a/drivers/base/base.h b/drivers/base/base.h index 2ca7f5b7b824..19f49e41ce5d 100644 --- a/drivers/base/base.h +++ b/drivers/base/base.h @@ -1,31 +1,46 @@ /** - * struct bus_type_private - structure to hold the private to the driver core portions of the bus_type structure. + * struct subsys_private - structure to hold the private to the driver core portions of the bus_type/class structure. * - * @subsys - the struct kset that defines this bus. This is the main kobject - * @drivers_kset - the list of drivers associated with this bus - * @devices_kset - the list of devices associated with this bus + * @subsys - the struct kset that defines this subsystem + * @devices_kset - the list of devices associated + * + * @drivers_kset - the list of drivers associated * @klist_devices - the klist to iterate over the @devices_kset * @klist_drivers - the klist to iterate over the @drivers_kset * @bus_notifier - the bus notifier list for anything that cares about things - * on this bus. + * on this bus. * @bus - pointer back to the struct bus_type that this structure is associated - * with. + * with. + * + * @class_interfaces - list of class_interfaces associated + * @glue_dirs - "glue" directory to put in-between the parent device to + * avoid namespace conflicts + * @class_mutex - mutex to protect the children, devices, and interfaces lists. + * @class - pointer back to the struct class that this structure is associated + * with. * * This structure is the one that is the actual kobject allowing struct - * bus_type to be statically allocated safely. Nothing outside of the driver - * core should ever touch these fields. + * bus_type/class to be statically allocated safely. Nothing outside of the + * driver core should ever touch these fields. */ -struct bus_type_private { +struct subsys_private { struct kset subsys; - struct kset *drivers_kset; struct kset *devices_kset; + + struct kset *drivers_kset; struct klist klist_devices; struct klist klist_drivers; struct blocking_notifier_head bus_notifier; unsigned int drivers_autoprobe:1; struct bus_type *bus; + + struct list_head class_interfaces; + struct kset glue_dirs; + struct mutex class_mutex; + struct class *class; }; +#define to_subsys_private(obj) container_of(obj, struct subsys_private, subsys.kobj) struct driver_private { struct kobject kobj; @@ -36,33 +51,6 @@ struct driver_private { }; #define to_driver(obj) container_of(obj, struct driver_private, kobj) - -/** - * struct class_private - structure to hold the private to the driver core portions of the class structure. - * - * @class_subsys - the struct kset that defines this class. This is the main kobject - * @class_devices - list of devices associated with this class - * @class_interfaces - list of class_interfaces associated with this class - * @class_dirs - "glue" directory for virtual devices associated with this class - * @class_mutex - mutex to protect the children, devices, and interfaces lists. - * @class - pointer back to the struct class that this structure is associated - * with. - * - * This structure is the one that is the actual kobject allowing struct - * class to be statically allocated safely. Nothing outside of the driver - * core should ever touch these fields. - */ -struct class_private { - struct kset class_subsys; - struct klist class_devices; - struct list_head class_interfaces; - struct kset class_dirs; - struct mutex class_mutex; - struct class *class; -}; -#define to_class(obj) \ - container_of(obj, struct class_private, class_subsys.kobj) - /** * struct device_private - structure to hold the private to the driver core portions of the device structure. * diff --git a/drivers/base/bus.c b/drivers/base/bus.c index 33c270a64db7..e243bd49764b 100644 --- a/drivers/base/bus.c +++ b/drivers/base/bus.c @@ -20,7 +20,6 @@ #include "power/power.h" #define to_bus_attr(_attr) container_of(_attr, struct bus_attribute, attr) -#define to_bus(obj) container_of(obj, struct bus_type_private, subsys.kobj) /* * sysfs bindings for drivers @@ -96,11 +95,11 @@ static ssize_t bus_attr_show(struct kobject *kobj, struct attribute *attr, char *buf) { struct bus_attribute *bus_attr = to_bus_attr(attr); - struct bus_type_private *bus_priv = to_bus(kobj); + struct subsys_private *subsys_priv = to_subsys_private(kobj); ssize_t ret = 0; if (bus_attr->show) - ret = bus_attr->show(bus_priv->bus, buf); + ret = bus_attr->show(subsys_priv->bus, buf); return ret; } @@ -108,11 +107,11 @@ static ssize_t bus_attr_store(struct kobject *kobj, struct attribute *attr, const char *buf, size_t count) { struct bus_attribute *bus_attr = to_bus_attr(attr); - struct bus_type_private *bus_priv = to_bus(kobj); + struct subsys_private *subsys_priv = to_subsys_private(kobj); ssize_t ret = 0; if (bus_attr->store) - ret = bus_attr->store(bus_priv->bus, buf, count); + ret = bus_attr->store(subsys_priv->bus, buf, count); return ret; } @@ -858,9 +857,9 @@ static BUS_ATTR(uevent, S_IWUSR, NULL, bus_uevent_store); int bus_register(struct bus_type *bus) { int retval; - struct bus_type_private *priv; + struct subsys_private *priv; - priv = kzalloc(sizeof(struct bus_type_private), GFP_KERNEL); + priv = kzalloc(sizeof(struct subsys_private), GFP_KERNEL); if (!priv) return -ENOMEM; diff --git a/drivers/base/class.c b/drivers/base/class.c index 7975a52bdf5b..4f1df2e8fd74 100644 --- a/drivers/base/class.c +++ b/drivers/base/class.c @@ -27,7 +27,7 @@ static ssize_t class_attr_show(struct kobject *kobj, struct attribute *attr, char *buf) { struct class_attribute *class_attr = to_class_attr(attr); - struct class_private *cp = to_class(kobj); + struct subsys_private *cp = to_subsys_private(kobj); ssize_t ret = -EIO; if (class_attr->show) @@ -39,7 +39,7 @@ static ssize_t class_attr_store(struct kobject *kobj, struct attribute *attr, const char *buf, size_t count) { struct class_attribute *class_attr = to_class_attr(attr); - struct class_private *cp = to_class(kobj); + struct subsys_private *cp = to_subsys_private(kobj); ssize_t ret = -EIO; if (class_attr->store) @@ -49,7 +49,7 @@ static ssize_t class_attr_store(struct kobject *kobj, struct attribute *attr, static void class_release(struct kobject *kobj) { - struct class_private *cp = to_class(kobj); + struct subsys_private *cp = to_subsys_private(kobj); struct class *class = cp->class; pr_debug("class '%s': release.\n", class->name); @@ -65,7 +65,7 @@ static void class_release(struct kobject *kobj) static const struct kobj_ns_type_operations *class_child_ns_type(struct kobject *kobj) { - struct class_private *cp = to_class(kobj); + struct subsys_private *cp = to_subsys_private(kobj); struct class *class = cp->class; return class->ns_type; @@ -82,7 +82,7 @@ static struct kobj_type class_ktype = { .child_ns_type = class_child_ns_type, }; -/* Hotplug events for classes go to the class class_subsys */ +/* Hotplug events for classes go to the class subsys */ static struct kset *class_kset; @@ -90,7 +90,7 @@ int class_create_file(struct class *cls, const struct class_attribute *attr) { int error; if (cls) - error = sysfs_create_file(&cls->p->class_subsys.kobj, + error = sysfs_create_file(&cls->p->subsys.kobj, &attr->attr); else error = -EINVAL; @@ -100,20 +100,20 @@ int class_create_file(struct class *cls, const struct class_attribute *attr) void class_remove_file(struct class *cls, const struct class_attribute *attr) { if (cls) - sysfs_remove_file(&cls->p->class_subsys.kobj, &attr->attr); + sysfs_remove_file(&cls->p->subsys.kobj, &attr->attr); } static struct class *class_get(struct class *cls) { if (cls) - kset_get(&cls->p->class_subsys); + kset_get(&cls->p->subsys); return cls; } static void class_put(struct class *cls) { if (cls) - kset_put(&cls->p->class_subsys); + kset_put(&cls->p->subsys); } static int add_class_attrs(struct class *cls) @@ -162,7 +162,7 @@ static void klist_class_dev_put(struct klist_node *n) int __class_register(struct class *cls, struct lock_class_key *key) { - struct class_private *cp; + struct subsys_private *cp; int error; pr_debug("device class '%s': registering\n", cls->name); @@ -170,11 +170,11 @@ int __class_register(struct class *cls, struct lock_class_key *key) cp = kzalloc(sizeof(*cp), GFP_KERNEL); if (!cp) return -ENOMEM; - klist_init(&cp->class_devices, klist_class_dev_get, klist_class_dev_put); + klist_init(&cp->klist_devices, klist_class_dev_get, klist_class_dev_put); INIT_LIST_HEAD(&cp->class_interfaces); - kset_init(&cp->class_dirs); + kset_init(&cp->glue_dirs); __mutex_init(&cp->class_mutex, "struct class mutex", key); - error = kobject_set_name(&cp->class_subsys.kobj, "%s", cls->name); + error = kobject_set_name(&cp->subsys.kobj, "%s", cls->name); if (error) { kfree(cp); return error; @@ -187,15 +187,15 @@ int __class_register(struct class *cls, struct lock_class_key *key) #if defined(CONFIG_BLOCK) /* let the block class directory show up in the root of sysfs */ if (!sysfs_deprecated || cls != &block_class) - cp->class_subsys.kobj.kset = class_kset; + cp->subsys.kobj.kset = class_kset; #else - cp->class_subsys.kobj.kset = class_kset; + cp->subsys.kobj.kset = class_kset; #endif - cp->class_subsys.kobj.ktype = &class_ktype; + cp->subsys.kobj.ktype = &class_ktype; cp->class = cls; cls->p = cp; - error = kset_register(&cp->class_subsys); + error = kset_register(&cp->subsys); if (error) { kfree(cp); return error; @@ -210,7 +210,7 @@ void class_unregister(struct class *cls) { pr_debug("device class '%s': unregistering\n", cls->name); remove_class_attrs(cls); - kset_unregister(&cls->p->class_subsys); + kset_unregister(&cls->p->subsys); } static void class_create_release(struct class *cls) @@ -295,7 +295,7 @@ void class_dev_iter_init(struct class_dev_iter *iter, struct class *class, if (start) start_knode = &start->knode_class; - klist_iter_init_node(&class->p->class_devices, &iter->ki, start_knode); + klist_iter_init_node(&class->p->klist_devices, &iter->ki, start_knode); iter->type = type; } EXPORT_SYMBOL_GPL(class_dev_iter_init); diff --git a/drivers/base/core.c b/drivers/base/core.c index 6ed645411c40..46ff6c251932 100644 --- a/drivers/base/core.c +++ b/drivers/base/core.c @@ -610,7 +610,7 @@ class_dir_create_and_add(struct class *class, struct kobject *parent_kobj) dir->class = class; kobject_init(&dir->kobj, &class_dir_ktype); - dir->kobj.kset = &class->p->class_dirs; + dir->kobj.kset = &class->p->glue_dirs; retval = kobject_add(&dir->kobj, parent_kobj, "%s", class->name); if (retval < 0) { @@ -635,7 +635,7 @@ static struct kobject *get_device_parent(struct device *dev, if (sysfs_deprecated && dev->class == &block_class) { if (parent && parent->class == &block_class) return &parent->kobj; - return &block_class.p->class_subsys.kobj; + return &block_class.p->subsys.kobj; } #endif @@ -654,13 +654,13 @@ static struct kobject *get_device_parent(struct device *dev, mutex_lock(&gdp_mutex); /* find our class-directory at the parent and reference it */ - spin_lock(&dev->class->p->class_dirs.list_lock); - list_for_each_entry(k, &dev->class->p->class_dirs.list, entry) + spin_lock(&dev->class->p->glue_dirs.list_lock); + list_for_each_entry(k, &dev->class->p->glue_dirs.list, entry) if (k->parent == parent_kobj) { kobj = kobject_get(k); break; } - spin_unlock(&dev->class->p->class_dirs.list_lock); + spin_unlock(&dev->class->p->glue_dirs.list_lock); if (kobj) { mutex_unlock(&gdp_mutex); return kobj; @@ -682,7 +682,7 @@ static void cleanup_glue_dir(struct device *dev, struct kobject *glue_dir) { /* see if we live in a "glue" directory */ if (!glue_dir || !dev->class || - glue_dir->kset != &dev->class->p->class_dirs) + glue_dir->kset != &dev->class->p->glue_dirs) return; kobject_put(glue_dir); @@ -709,7 +709,7 @@ static int device_add_class_symlinks(struct device *dev) return 0; error = sysfs_create_link(&dev->kobj, - &dev->class->p->class_subsys.kobj, + &dev->class->p->subsys.kobj, "subsystem"); if (error) goto out; @@ -728,7 +728,7 @@ static int device_add_class_symlinks(struct device *dev) #endif /* link in the class directory pointing to the device */ - error = sysfs_create_link(&dev->class->p->class_subsys.kobj, + error = sysfs_create_link(&dev->class->p->subsys.kobj, &dev->kobj, dev_name(dev)); if (error) goto out_device; @@ -756,7 +756,7 @@ static void device_remove_class_symlinks(struct device *dev) if (sysfs_deprecated && dev->class == &block_class) return; #endif - sysfs_delete_link(&dev->class->p->class_subsys.kobj, &dev->kobj, dev_name(dev)); + sysfs_delete_link(&dev->class->p->subsys.kobj, &dev->kobj, dev_name(dev)); } /** @@ -947,7 +947,7 @@ int device_add(struct device *dev) mutex_lock(&dev->class->p->class_mutex); /* tie the class to the device */ klist_add_tail(&dev->knode_class, - &dev->class->p->class_devices); + &dev->class->p->klist_devices); /* notify any interfaces that the device is here */ list_for_each_entry(class_intf, @@ -1535,7 +1535,7 @@ int device_rename(struct device *dev, const char *new_name) } if (dev->class) { - error = sysfs_rename_link(&dev->class->p->class_subsys.kobj, + error = sysfs_rename_link(&dev->class->p->subsys.kobj, &dev->kobj, old_device_name, new_name); if (error) goto out; diff --git a/include/linux/device.h b/include/linux/device.h index dd4895313468..1e2d335ab683 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -30,9 +30,8 @@ struct device_private; struct device_driver; struct driver_private; struct class; -struct class_private; +struct subsys_private; struct bus_type; -struct bus_type_private; struct device_node; struct bus_attribute { @@ -65,7 +64,7 @@ struct bus_type { const struct dev_pm_ops *pm; - struct bus_type_private *p; + struct subsys_private *p; }; extern int __must_check bus_register(struct bus_type *bus); @@ -213,7 +212,7 @@ struct class { const struct dev_pm_ops *pm; - struct class_private *p; + struct subsys_private *p; }; struct class_dev_iter { -- cgit v1.2.3-71-gd317 From b2c0710c464ede15e1fc52fb1e7ee9ba54cea186 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Thu, 9 Sep 2010 13:40:39 -0700 Subject: rcu: move TINY_RCU from softirq to kthread If RCU priority boosting is to be meaningful, callback invocation must be boosted in addition to preempted RCU readers. Otherwise, in presence of CPU real-time threads, the grace period ends, but the callbacks don't get invoked. If the callbacks don't get invoked, the associated memory doesn't get freed, so the system is still subject to OOM. But it is not reasonable to priority-boost RCU_SOFTIRQ, so this commit moves the callback invocations to a kthread, which can be boosted easily. Signed-off-by: Paul E. McKenney Signed-off-by: Paul E. McKenney --- include/linux/rcupdate.h | 1 - include/linux/rcutiny.h | 8 ++---- include/linux/rcutree.h | 1 + kernel/rcutiny.c | 71 +++++++++++++++++++++++++++++++++++++++--------- kernel/rcutiny_plugin.h | 15 +++++----- 5 files changed, 70 insertions(+), 26 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 03cda7bed985..7142ee3304ab 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -118,7 +118,6 @@ static inline int rcu_preempt_depth(void) #endif /* #else #ifdef CONFIG_PREEMPT_RCU */ /* Internal to kernel */ -extern void rcu_init(void); extern void rcu_sched_qs(int cpu); extern void rcu_bh_qs(int cpu); extern void rcu_check_callbacks(int cpu, int user); diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h index 13877cb93a60..ea025a611fcc 100644 --- a/include/linux/rcutiny.h +++ b/include/linux/rcutiny.h @@ -27,7 +27,9 @@ #include -#define rcu_init_sched() do { } while (0) +static inline void rcu_init(void) +{ +} #ifdef CONFIG_TINY_RCU @@ -125,16 +127,12 @@ static inline void rcu_cpu_stall_reset(void) } #ifdef CONFIG_DEBUG_LOCK_ALLOC - extern int rcu_scheduler_active __read_mostly; extern void rcu_scheduler_starting(void); - #else /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */ - static inline void rcu_scheduler_starting(void) { } - #endif /* #else #ifdef CONFIG_DEBUG_LOCK_ALLOC */ #endif /* __LINUX_RCUTINY_H */ diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h index 95518e628794..c0e96833aa73 100644 --- a/include/linux/rcutree.h +++ b/include/linux/rcutree.h @@ -30,6 +30,7 @@ #ifndef __LINUX_RCUTREE_H #define __LINUX_RCUTREE_H +extern void rcu_init(void); extern void rcu_note_context_switch(int cpu); extern int rcu_needs_cpu(int cpu); extern void rcu_cpu_stall_reset(void); diff --git a/kernel/rcutiny.c b/kernel/rcutiny.c index d806735342ac..86eef29cdfb2 100644 --- a/kernel/rcutiny.c +++ b/kernel/rcutiny.c @@ -59,8 +59,15 @@ int rcu_scheduler_active __read_mostly; EXPORT_SYMBOL_GPL(rcu_scheduler_active); #endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */ +/* Controls for rcu_cbs() kthread, replacing RCU_SOFTIRQ used previously. */ +static struct task_struct *rcu_cbs_task; +static DECLARE_WAIT_QUEUE_HEAD(rcu_cbs_wq); +static unsigned long have_rcu_cbs; +static void invoke_rcu_cbs(void); + /* Forward declarations for rcutiny_plugin.h. */ -static void __rcu_process_callbacks(struct rcu_ctrlblk *rcp); +static void rcu_process_callbacks(struct rcu_ctrlblk *rcp); +static int rcu_cbs(void *arg); static void __call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu), struct rcu_ctrlblk *rcp); @@ -123,7 +130,7 @@ void rcu_sched_qs(int cpu) { if (rcu_qsctr_help(&rcu_sched_ctrlblk) + rcu_qsctr_help(&rcu_bh_ctrlblk)) - raise_softirq(RCU_SOFTIRQ); + invoke_rcu_cbs(); } /* @@ -132,7 +139,7 @@ void rcu_sched_qs(int cpu) void rcu_bh_qs(int cpu) { if (rcu_qsctr_help(&rcu_bh_ctrlblk)) - raise_softirq(RCU_SOFTIRQ); + invoke_rcu_cbs(); } /* @@ -152,10 +159,10 @@ void rcu_check_callbacks(int cpu, int user) } /* - * Helper function for rcu_process_callbacks() that operates on the - * specified rcu_ctrlkblk structure. + * Invoke the RCU callbacks on the specified rcu_ctrlkblk structure + * whose grace period has elapsed. */ -static void __rcu_process_callbacks(struct rcu_ctrlblk *rcp) +static void rcu_process_callbacks(struct rcu_ctrlblk *rcp) { struct rcu_head *next, *list; unsigned long flags; @@ -180,19 +187,52 @@ static void __rcu_process_callbacks(struct rcu_ctrlblk *rcp) next = list->next; prefetch(next); debug_rcu_head_unqueue(list); + local_bh_disable(); list->func(list); + local_bh_enable(); list = next; } } /* - * Invoke any callbacks whose grace period has completed. + * This kthread invokes RCU callbacks whose grace periods have + * elapsed. It is awakened as needed, and takes the place of the + * RCU_SOFTIRQ that was used previously for this purpose. + * This is a kthread, but it is never stopped, at least not until + * the system goes down. + */ +static int rcu_cbs(void *arg) +{ + unsigned long work; + unsigned long flags; + + for (;;) { + wait_event(rcu_cbs_wq, have_rcu_cbs != 0); + local_irq_save(flags); + work = have_rcu_cbs; + have_rcu_cbs = 0; + local_irq_restore(flags); + if (work) { + rcu_process_callbacks(&rcu_sched_ctrlblk); + rcu_process_callbacks(&rcu_bh_ctrlblk); + rcu_preempt_process_callbacks(); + } + } + + return 0; /* Not reached, but needed to shut gcc up. */ +} + +/* + * Wake up rcu_cbs() to process callbacks now eligible for invocation. */ -static void rcu_process_callbacks(struct softirq_action *unused) +static void invoke_rcu_cbs(void) { - __rcu_process_callbacks(&rcu_sched_ctrlblk); - __rcu_process_callbacks(&rcu_bh_ctrlblk); - rcu_preempt_process_callbacks(); + unsigned long flags; + + local_irq_save(flags); + have_rcu_cbs = 1; + wake_up(&rcu_cbs_wq); + local_irq_restore(flags); } /* @@ -282,7 +322,12 @@ void rcu_barrier_sched(void) } EXPORT_SYMBOL_GPL(rcu_barrier_sched); -void __init rcu_init(void) +/* + * Spawn the kthread that invokes RCU callbacks. + */ +static int __init rcu_spawn_kthreads(void) { - open_softirq(RCU_SOFTIRQ, rcu_process_callbacks); + rcu_cbs_task = kthread_run(rcu_cbs, NULL, "rcu_cbs"); + return 0; } +early_initcall(rcu_spawn_kthreads); diff --git a/kernel/rcutiny_plugin.h b/kernel/rcutiny_plugin.h index 6ceca4f745ff..95f9239df512 100644 --- a/kernel/rcutiny_plugin.h +++ b/kernel/rcutiny_plugin.h @@ -22,6 +22,8 @@ * Author: Paul E. McKenney */ +#include + #ifdef CONFIG_TINY_PREEMPT_RCU #include @@ -164,9 +166,9 @@ static void rcu_preempt_cpu_qs(void) if (!rcu_preempt_blocked_readers_any()) rcu_preempt_ctrlblk.rcb.donetail = rcu_preempt_ctrlblk.nexttail; - /* If there are done callbacks, make RCU_SOFTIRQ process them. */ + /* If there are done callbacks, cause them to be invoked. */ if (*rcu_preempt_ctrlblk.rcb.donetail != NULL) - raise_softirq(RCU_SOFTIRQ); + invoke_rcu_cbs(); } /* @@ -374,7 +376,7 @@ static void rcu_preempt_check_callbacks(void) rcu_preempt_cpu_qs(); if (&rcu_preempt_ctrlblk.rcb.rcucblist != rcu_preempt_ctrlblk.rcb.donetail) - raise_softirq(RCU_SOFTIRQ); + invoke_rcu_cbs(); if (rcu_preempt_gp_in_progress() && rcu_cpu_blocking_cur_gp() && rcu_preempt_running_reader()) @@ -383,7 +385,7 @@ static void rcu_preempt_check_callbacks(void) /* * TINY_PREEMPT_RCU has an extra callback-list tail pointer to - * update, so this is invoked from __rcu_process_callbacks() to + * update, so this is invoked from rcu_process_callbacks() to * handle that case. Of course, it is invoked for all flavors of * RCU, but RCU callbacks can appear only on one of the lists, and * neither ->nexttail nor ->donetail can possibly be NULL, so there @@ -400,7 +402,7 @@ static void rcu_preempt_remove_callbacks(struct rcu_ctrlblk *rcp) */ static void rcu_preempt_process_callbacks(void) { - __rcu_process_callbacks(&rcu_preempt_ctrlblk.rcb); + rcu_process_callbacks(&rcu_preempt_ctrlblk.rcb); } /* @@ -599,14 +601,13 @@ static void rcu_preempt_process_callbacks(void) #endif /* #else #ifdef CONFIG_TINY_PREEMPT_RCU */ #ifdef CONFIG_DEBUG_LOCK_ALLOC - #include /* * During boot, we forgive RCU lockdep issues. After this function is * invoked, we start taking RCU lockdep issues seriously. */ -void rcu_scheduler_starting(void) +void __init rcu_scheduler_starting(void) { WARN_ON(nr_context_switches() > 0); rcu_scheduler_active = 1; -- cgit v1.2.3-71-gd317 From 5f2b0ba4d94b3ac23cbc4b7f675d98eb677a760a Mon Sep 17 00:00:00 2001 From: Don Zickus Date: Fri, 12 Nov 2010 11:22:23 -0500 Subject: x86, nmi_watchdog: Remove the old nmi_watchdog Now that we have a new nmi_watchdog that is more generic and sits on top of the perf subsystem, we really do not need the old nmi_watchdog any more. In addition, the old nmi_watchdog doesn't really work if you are using the default clocksource, hpet. The old nmi_watchdog code relied on local apic interrupts to determine if the cpu is still alive. With hpet as the clocksource, these interrupts don't increment any more and the old nmi_watchdog triggers false postives. This piece removes the old nmi_watchdog code and stubs out any variables and functions calls. The stubs are the same ones used by the new nmi_watchdog code, so it should be well tested. Signed-off-by: Don Zickus Cc: fweisbec@gmail.com Cc: gorcunov@openvz.org LKML-Reference: <1289578944-28564-2-git-send-email-dzickus@redhat.com> Signed-off-by: Ingo Molnar --- arch/x86/include/asm/nmi.h | 4 - arch/x86/kernel/apic/Makefile | 5 +- arch/x86/kernel/apic/hw_nmi.c | 6 +- arch/x86/kernel/apic/nmi.c | 567 ------------------------------------------ arch/x86/kernel/traps.c | 9 - include/linux/nmi.h | 6 +- kernel/sysctl.c | 16 -- 7 files changed, 5 insertions(+), 608 deletions(-) delete mode 100644 arch/x86/kernel/apic/nmi.c (limited to 'include/linux') diff --git a/arch/x86/include/asm/nmi.h b/arch/x86/include/asm/nmi.h index 932f0f86b4b7..33292ec848ca 100644 --- a/arch/x86/include/asm/nmi.h +++ b/arch/x86/include/asm/nmi.h @@ -17,9 +17,6 @@ int do_nmi_callback(struct pt_regs *regs, int cpu); extern void die_nmi(char *str, struct pt_regs *regs, int do_panic); extern int check_nmi_watchdog(void); -#if !defined(CONFIG_LOCKUP_DETECTOR) -extern int nmi_watchdog_enabled; -#endif extern int avail_to_resrv_perfctr_nmi_bit(unsigned int); extern int reserve_perfctr_nmi(unsigned int); extern void release_perfctr_nmi(unsigned int); @@ -30,7 +27,6 @@ extern void setup_apic_nmi_watchdog(void *); extern void stop_apic_nmi_watchdog(void *); extern void disable_timer_nmi_watchdog(void); extern void enable_timer_nmi_watchdog(void); -extern int nmi_watchdog_tick(struct pt_regs *regs, unsigned reason); extern void cpu_nmi_set_wd_enabled(void); extern atomic_t nmi_active; diff --git a/arch/x86/kernel/apic/Makefile b/arch/x86/kernel/apic/Makefile index 910f20b457c4..3966b564ea47 100644 --- a/arch/x86/kernel/apic/Makefile +++ b/arch/x86/kernel/apic/Makefile @@ -3,10 +3,7 @@ # obj-$(CONFIG_X86_LOCAL_APIC) += apic.o apic_noop.o probe_$(BITS).o ipi.o -ifneq ($(CONFIG_HARDLOCKUP_DETECTOR),y) -obj-$(CONFIG_X86_LOCAL_APIC) += nmi.o -endif -obj-$(CONFIG_HARDLOCKUP_DETECTOR) += hw_nmi.o +obj-y += hw_nmi.o obj-$(CONFIG_X86_IO_APIC) += io_apic.o obj-$(CONFIG_SMP) += ipi.o diff --git a/arch/x86/kernel/apic/hw_nmi.c b/arch/x86/kernel/apic/hw_nmi.c index cefd6942f0e9..b68b17460016 100644 --- a/arch/x86/kernel/apic/hw_nmi.c +++ b/arch/x86/kernel/apic/hw_nmi.c @@ -20,12 +20,14 @@ /* For reliability, we're prepared to waste bits here. */ static DECLARE_BITMAP(backtrace_mask, NR_CPUS) __read_mostly; +#ifdef CONFIG_HARDLOCKUP_DETECTOR u64 hw_nmi_get_sample_period(void) { return (u64)(cpu_khz) * 1000 * 60; } +#endif -#ifdef ARCH_HAS_NMI_WATCHDOG +#ifdef arch_trigger_all_cpu_backtrace void arch_trigger_all_cpu_backtrace(void) { int i; @@ -95,8 +97,6 @@ early_initcall(register_trigger_all_cpu_backtrace); #if defined(CONFIG_X86_LOCAL_APIC) unsigned int nmi_watchdog = NMI_NONE; EXPORT_SYMBOL(nmi_watchdog); -void acpi_nmi_enable(void) { return; } -void acpi_nmi_disable(void) { return; } #endif atomic_t nmi_active = ATOMIC_INIT(0); /* oprofile uses this */ EXPORT_SYMBOL(nmi_active); diff --git a/arch/x86/kernel/apic/nmi.c b/arch/x86/kernel/apic/nmi.c deleted file mode 100644 index c90041ccb742..000000000000 --- a/arch/x86/kernel/apic/nmi.c +++ /dev/null @@ -1,567 +0,0 @@ -/* - * NMI watchdog support on APIC systems - * - * Started by Ingo Molnar - * - * Fixes: - * Mikael Pettersson : AMD K7 support for local APIC NMI watchdog. - * Mikael Pettersson : Power Management for local APIC NMI watchdog. - * Mikael Pettersson : Pentium 4 support for local APIC NMI watchdog. - * Pavel Machek and - * Mikael Pettersson : PM converted to driver model. Disable/enable API. - */ - -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include - -#include - -#include - -int unknown_nmi_panic; -int nmi_watchdog_enabled; - -/* For reliability, we're prepared to waste bits here. */ -static DECLARE_BITMAP(backtrace_mask, NR_CPUS) __read_mostly; - -/* nmi_active: - * >0: the lapic NMI watchdog is active, but can be disabled - * <0: the lapic NMI watchdog has not been set up, and cannot - * be enabled - * 0: the lapic NMI watchdog is disabled, but can be enabled - */ -atomic_t nmi_active = ATOMIC_INIT(0); /* oprofile uses this */ -EXPORT_SYMBOL(nmi_active); - -unsigned int nmi_watchdog = NMI_NONE; -EXPORT_SYMBOL(nmi_watchdog); - -static int panic_on_timeout; - -static unsigned int nmi_hz = HZ; -static DEFINE_PER_CPU(short, wd_enabled); -static int endflag __initdata; - -static inline unsigned int get_nmi_count(int cpu) -{ - return per_cpu(irq_stat, cpu).__nmi_count; -} - -static inline int mce_in_progress(void) -{ -#if defined(CONFIG_X86_MCE) - return atomic_read(&mce_entry) > 0; -#endif - return 0; -} - -/* - * Take the local apic timer and PIT/HPET into account. We don't - * know which one is active, when we have highres/dyntick on - */ -static inline unsigned int get_timer_irqs(int cpu) -{ - return per_cpu(irq_stat, cpu).apic_timer_irqs + - per_cpu(irq_stat, cpu).irq0_irqs; -} - -#ifdef CONFIG_SMP -/* - * The performance counters used by NMI_LOCAL_APIC don't trigger when - * the CPU is idle. To make sure the NMI watchdog really ticks on all - * CPUs during the test make them busy. - */ -static __init void nmi_cpu_busy(void *data) -{ - local_irq_enable_in_hardirq(); - /* - * Intentionally don't use cpu_relax here. This is - * to make sure that the performance counter really ticks, - * even if there is a simulator or similar that catches the - * pause instruction. On a real HT machine this is fine because - * all other CPUs are busy with "useless" delay loops and don't - * care if they get somewhat less cycles. - */ - while (endflag == 0) - mb(); -} -#endif - -static void report_broken_nmi(int cpu, unsigned int *prev_nmi_count) -{ - printk(KERN_CONT "\n"); - - printk(KERN_WARNING - "WARNING: CPU#%d: NMI appears to be stuck (%d->%d)!\n", - cpu, prev_nmi_count[cpu], get_nmi_count(cpu)); - - printk(KERN_WARNING - "Please report this to bugzilla.kernel.org,\n"); - printk(KERN_WARNING - "and attach the output of the 'dmesg' command.\n"); - - per_cpu(wd_enabled, cpu) = 0; - atomic_dec(&nmi_active); -} - -static void __acpi_nmi_disable(void *__unused) -{ - apic_write(APIC_LVT0, APIC_DM_NMI | APIC_LVT_MASKED); -} - -int __init check_nmi_watchdog(void) -{ - unsigned int *prev_nmi_count; - int cpu; - - if (!nmi_watchdog_active() || !atomic_read(&nmi_active)) - return 0; - - prev_nmi_count = kmalloc(nr_cpu_ids * sizeof(int), GFP_KERNEL); - if (!prev_nmi_count) - goto error; - - printk(KERN_INFO "Testing NMI watchdog ... "); - -#ifdef CONFIG_SMP - if (nmi_watchdog == NMI_LOCAL_APIC) - smp_call_function(nmi_cpu_busy, (void *)&endflag, 0); -#endif - - for_each_possible_cpu(cpu) - prev_nmi_count[cpu] = get_nmi_count(cpu); - local_irq_enable(); - mdelay((20 * 1000) / nmi_hz); /* wait 20 ticks */ - - for_each_online_cpu(cpu) { - if (!per_cpu(wd_enabled, cpu)) - continue; - if (get_nmi_count(cpu) - prev_nmi_count[cpu] <= 5) - report_broken_nmi(cpu, prev_nmi_count); - } - endflag = 1; - if (!atomic_read(&nmi_active)) { - kfree(prev_nmi_count); - atomic_set(&nmi_active, -1); - goto error; - } - printk("OK.\n"); - - /* - * now that we know it works we can reduce NMI frequency to - * something more reasonable; makes a difference in some configs - */ - if (nmi_watchdog == NMI_LOCAL_APIC) - nmi_hz = lapic_adjust_nmi_hz(1); - - kfree(prev_nmi_count); - return 0; -error: - if (nmi_watchdog == NMI_IO_APIC) { - if (!timer_through_8259) - legacy_pic->mask(0); - on_each_cpu(__acpi_nmi_disable, NULL, 1); - } - -#ifdef CONFIG_X86_32 - timer_ack = 0; -#endif - return -1; -} - -static int __init setup_nmi_watchdog(char *str) -{ - unsigned int nmi; - - if (!strncmp(str, "panic", 5)) { - panic_on_timeout = 1; - str = strchr(str, ','); - if (!str) - return 1; - ++str; - } - - if (!strncmp(str, "lapic", 5)) - nmi_watchdog = NMI_LOCAL_APIC; - else if (!strncmp(str, "ioapic", 6)) - nmi_watchdog = NMI_IO_APIC; - else { - get_option(&str, &nmi); - if (nmi >= NMI_INVALID) - return 0; - nmi_watchdog = nmi; - } - - return 1; -} -__setup("nmi_watchdog=", setup_nmi_watchdog); - -/* - * Suspend/resume support - */ -#ifdef CONFIG_PM - -static int nmi_pm_active; /* nmi_active before suspend */ - -static int lapic_nmi_suspend(struct sys_device *dev, pm_message_t state) -{ - /* only CPU0 goes here, other CPUs should be offline */ - nmi_pm_active = atomic_read(&nmi_active); - stop_apic_nmi_watchdog(NULL); - BUG_ON(atomic_read(&nmi_active) != 0); - return 0; -} - -static int lapic_nmi_resume(struct sys_device *dev) -{ - /* only CPU0 goes here, other CPUs should be offline */ - if (nmi_pm_active > 0) { - setup_apic_nmi_watchdog(NULL); - touch_nmi_watchdog(); - } - return 0; -} - -static struct sysdev_class nmi_sysclass = { - .name = "lapic_nmi", - .resume = lapic_nmi_resume, - .suspend = lapic_nmi_suspend, -}; - -static struct sys_device device_lapic_nmi = { - .id = 0, - .cls = &nmi_sysclass, -}; - -static int __init init_lapic_nmi_sysfs(void) -{ - int error; - - /* - * should really be a BUG_ON but b/c this is an - * init call, it just doesn't work. -dcz - */ - if (nmi_watchdog != NMI_LOCAL_APIC) - return 0; - - if (atomic_read(&nmi_active) < 0) - return 0; - - error = sysdev_class_register(&nmi_sysclass); - if (!error) - error = sysdev_register(&device_lapic_nmi); - return error; -} - -/* must come after the local APIC's device_initcall() */ -late_initcall(init_lapic_nmi_sysfs); - -#endif /* CONFIG_PM */ - -static void __acpi_nmi_enable(void *__unused) -{ - apic_write(APIC_LVT0, APIC_DM_NMI); -} - -/* - * Enable timer based NMIs on all CPUs: - */ -void acpi_nmi_enable(void) -{ - if (atomic_read(&nmi_active) && nmi_watchdog == NMI_IO_APIC) - on_each_cpu(__acpi_nmi_enable, NULL, 1); -} - -/* - * Disable timer based NMIs on all CPUs: - */ -void acpi_nmi_disable(void) -{ - if (atomic_read(&nmi_active) && nmi_watchdog == NMI_IO_APIC) - on_each_cpu(__acpi_nmi_disable, NULL, 1); -} - -/* - * This function is called as soon the LAPIC NMI watchdog driver has everything - * in place and it's ready to check if the NMIs belong to the NMI watchdog - */ -void cpu_nmi_set_wd_enabled(void) -{ - __get_cpu_var(wd_enabled) = 1; -} - -void setup_apic_nmi_watchdog(void *unused) -{ - if (__get_cpu_var(wd_enabled)) - return; - - /* cheap hack to support suspend/resume */ - /* if cpu0 is not active neither should the other cpus */ - if (smp_processor_id() != 0 && atomic_read(&nmi_active) <= 0) - return; - - switch (nmi_watchdog) { - case NMI_LOCAL_APIC: - if (lapic_watchdog_init(nmi_hz) < 0) { - __get_cpu_var(wd_enabled) = 0; - return; - } - /* FALL THROUGH */ - case NMI_IO_APIC: - __get_cpu_var(wd_enabled) = 1; - atomic_inc(&nmi_active); - } -} - -void stop_apic_nmi_watchdog(void *unused) -{ - /* only support LOCAL and IO APICs for now */ - if (!nmi_watchdog_active()) - return; - if (__get_cpu_var(wd_enabled) == 0) - return; - if (nmi_watchdog == NMI_LOCAL_APIC) - lapic_watchdog_stop(); - else - __acpi_nmi_disable(NULL); - __get_cpu_var(wd_enabled) = 0; - atomic_dec(&nmi_active); -} - -/* - * the best way to detect whether a CPU has a 'hard lockup' problem - * is to check it's local APIC timer IRQ counts. If they are not - * changing then that CPU has some problem. - * - * as these watchdog NMI IRQs are generated on every CPU, we only - * have to check the current processor. - * - * since NMIs don't listen to _any_ locks, we have to be extremely - * careful not to rely on unsafe variables. The printk might lock - * up though, so we have to break up any console locks first ... - * [when there will be more tty-related locks, break them up here too!] - */ - -static DEFINE_PER_CPU(unsigned, last_irq_sum); -static DEFINE_PER_CPU(long, alert_counter); -static DEFINE_PER_CPU(int, nmi_touch); - -void touch_nmi_watchdog(void) -{ - if (nmi_watchdog_active()) { - unsigned cpu; - - /* - * Tell other CPUs to reset their alert counters. We cannot - * do it ourselves because the alert count increase is not - * atomic. - */ - for_each_present_cpu(cpu) { - if (per_cpu(nmi_touch, cpu) != 1) - per_cpu(nmi_touch, cpu) = 1; - } - } - - /* - * Tickle the softlockup detector too: - */ - touch_softlockup_watchdog(); -} -EXPORT_SYMBOL(touch_nmi_watchdog); - -notrace __kprobes int -nmi_watchdog_tick(struct pt_regs *regs, unsigned reason) -{ - /* - * Since current_thread_info()-> is always on the stack, and we - * always switch the stack NMI-atomically, it's safe to use - * smp_processor_id(). - */ - unsigned int sum; - int touched = 0; - int cpu = smp_processor_id(); - int rc = 0; - - sum = get_timer_irqs(cpu); - - if (__get_cpu_var(nmi_touch)) { - __get_cpu_var(nmi_touch) = 0; - touched = 1; - } - - /* We can be called before check_nmi_watchdog, hence NULL check. */ - if (cpumask_test_cpu(cpu, to_cpumask(backtrace_mask))) { - static DEFINE_RAW_SPINLOCK(lock); /* Serialise the printks */ - - raw_spin_lock(&lock); - printk(KERN_WARNING "NMI backtrace for cpu %d\n", cpu); - show_regs(regs); - dump_stack(); - raw_spin_unlock(&lock); - cpumask_clear_cpu(cpu, to_cpumask(backtrace_mask)); - - rc = 1; - } - - /* Could check oops_in_progress here too, but it's safer not to */ - if (mce_in_progress()) - touched = 1; - - /* if the none of the timers isn't firing, this cpu isn't doing much */ - if (!touched && __get_cpu_var(last_irq_sum) == sum) { - /* - * Ayiee, looks like this CPU is stuck ... - * wait a few IRQs (5 seconds) before doing the oops ... - */ - __this_cpu_inc(alert_counter); - if (__this_cpu_read(alert_counter) == 5 * nmi_hz) - /* - * die_nmi will return ONLY if NOTIFY_STOP happens.. - */ - die_nmi("BUG: NMI Watchdog detected LOCKUP", - regs, panic_on_timeout); - } else { - __get_cpu_var(last_irq_sum) = sum; - __this_cpu_write(alert_counter, 0); - } - - /* see if the nmi watchdog went off */ - if (!__get_cpu_var(wd_enabled)) - return rc; - switch (nmi_watchdog) { - case NMI_LOCAL_APIC: - rc |= lapic_wd_event(nmi_hz); - break; - case NMI_IO_APIC: - /* - * don't know how to accurately check for this. - * just assume it was a watchdog timer interrupt - * This matches the old behaviour. - */ - rc = 1; - break; - } - return rc; -} - -#ifdef CONFIG_SYSCTL - -static void enable_ioapic_nmi_watchdog_single(void *unused) -{ - __get_cpu_var(wd_enabled) = 1; - atomic_inc(&nmi_active); - __acpi_nmi_enable(NULL); -} - -static void enable_ioapic_nmi_watchdog(void) -{ - on_each_cpu(enable_ioapic_nmi_watchdog_single, NULL, 1); - touch_nmi_watchdog(); -} - -static void disable_ioapic_nmi_watchdog(void) -{ - on_each_cpu(stop_apic_nmi_watchdog, NULL, 1); -} - -static int __init setup_unknown_nmi_panic(char *str) -{ - unknown_nmi_panic = 1; - return 1; -} -__setup("unknown_nmi_panic", setup_unknown_nmi_panic); - -static int unknown_nmi_panic_callback(struct pt_regs *regs, int cpu) -{ - unsigned char reason = get_nmi_reason(); - char buf[64]; - - sprintf(buf, "NMI received for unknown reason %02x\n", reason); - die_nmi(buf, regs, 1); /* Always panic here */ - return 0; -} - -/* - * proc handler for /proc/sys/kernel/nmi - */ -int proc_nmi_enabled(struct ctl_table *table, int write, - void __user *buffer, size_t *length, loff_t *ppos) -{ - int old_state; - - nmi_watchdog_enabled = (atomic_read(&nmi_active) > 0) ? 1 : 0; - old_state = nmi_watchdog_enabled; - proc_dointvec(table, write, buffer, length, ppos); - if (!!old_state == !!nmi_watchdog_enabled) - return 0; - - if (atomic_read(&nmi_active) < 0 || !nmi_watchdog_active()) { - printk(KERN_WARNING - "NMI watchdog is permanently disabled\n"); - return -EIO; - } - - if (nmi_watchdog == NMI_LOCAL_APIC) { - if (nmi_watchdog_enabled) - enable_lapic_nmi_watchdog(); - else - disable_lapic_nmi_watchdog(); - } else if (nmi_watchdog == NMI_IO_APIC) { - if (nmi_watchdog_enabled) - enable_ioapic_nmi_watchdog(); - else - disable_ioapic_nmi_watchdog(); - } else { - printk(KERN_WARNING - "NMI watchdog doesn't know what hardware to touch\n"); - return -EIO; - } - return 0; -} - -#endif /* CONFIG_SYSCTL */ - -int do_nmi_callback(struct pt_regs *regs, int cpu) -{ -#ifdef CONFIG_SYSCTL - if (unknown_nmi_panic) - return unknown_nmi_panic_callback(regs, cpu); -#endif - return 0; -} - -void arch_trigger_all_cpu_backtrace(void) -{ - int i; - - cpumask_copy(to_cpumask(backtrace_mask), cpu_online_mask); - - printk(KERN_INFO "sending NMI to all CPUs:\n"); - apic->send_IPI_all(NMI_VECTOR); - - /* Wait for up to 10 seconds for all CPUs to do the backtrace */ - for (i = 0; i < 10 * 1000; i++) { - if (cpumask_empty(to_cpumask(backtrace_mask))) - break; - mdelay(1); - } -} diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index cb838ca42c96..db30d9cb9dd6 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -398,15 +398,6 @@ static notrace __kprobes void default_do_nmi(struct pt_regs *regs) == NOTIFY_STOP) return; -#ifndef CONFIG_LOCKUP_DETECTOR - /* - * Ok, so this is none of the documented NMI sources, - * so it must be the NMI watchdog. - */ - if (nmi_watchdog_tick(regs, reason)) - return; - if (!do_nmi_callback(regs, cpu)) -#endif /* !CONFIG_LOCKUP_DETECTOR */ unknown_nmi_error(reason, regs); #else unknown_nmi_error(reason, regs); diff --git a/include/linux/nmi.h b/include/linux/nmi.h index 06aab5eee134..0cb3e5c246d0 100644 --- a/include/linux/nmi.h +++ b/include/linux/nmi.h @@ -16,10 +16,7 @@ */ #ifdef ARCH_HAS_NMI_WATCHDOG #include -extern void touch_nmi_watchdog(void); -extern void acpi_nmi_disable(void); -extern void acpi_nmi_enable(void); -#else +#endif #ifndef CONFIG_HARDLOCKUP_DETECTOR static inline void touch_nmi_watchdog(void) { @@ -30,7 +27,6 @@ extern void touch_nmi_watchdog(void); #endif static inline void acpi_nmi_disable(void) { } static inline void acpi_nmi_enable(void) { } -#endif /* * Create trigger_all_cpu_backtrace() out of the arch-provided diff --git a/kernel/sysctl.c b/kernel/sysctl.c index b65bf634035e..ce33e2a2afea 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -746,22 +746,6 @@ static struct ctl_table kern_table[] = { .extra2 = &one, }, #endif -#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86) && !defined(CONFIG_LOCKUP_DETECTOR) - { - .procname = "unknown_nmi_panic", - .data = &unknown_nmi_panic, - .maxlen = sizeof (int), - .mode = 0644, - .proc_handler = proc_dointvec, - }, - { - .procname = "nmi_watchdog", - .data = &nmi_watchdog_enabled, - .maxlen = sizeof (int), - .mode = 0644, - .proc_handler = proc_nmi_enabled, - }, -#endif #if defined(CONFIG_X86) { .procname = "panic_on_unrecovered_nmi", -- cgit v1.2.3-71-gd317 From 072b198a4ad48bd722ec6d203d65422a4698eae7 Mon Sep 17 00:00:00 2001 From: Don Zickus Date: Fri, 12 Nov 2010 11:22:24 -0500 Subject: x86, nmi_watchdog: Remove all stub function calls from old nmi_watchdog Now that the bulk of the old nmi_watchdog is gone, remove all the stub variables and hooks associated with it. This touches lots of files mainly because of how the io_apic nmi_watchdog was implemented. Now that the io_apic nmi_watchdog is forever gone, remove all its fingers. Most of this code was not being exercised by virtue of nmi_watchdog != NMI_IO_APIC, so there shouldn't be anything to risky here. Signed-off-by: Don Zickus Cc: fweisbec@gmail.com Cc: gorcunov@openvz.org LKML-Reference: <1289578944-28564-3-git-send-email-dzickus@redhat.com> Signed-off-by: Ingo Molnar --- arch/x86/include/asm/nmi.h | 47 --- arch/x86/include/asm/smpboot_hooks.h | 1 - arch/x86/include/asm/timer.h | 6 - arch/x86/kernel/apic/apic.c | 15 +- arch/x86/kernel/apic/hw_nmi.c | 10 - arch/x86/kernel/apic/io_apic.c | 46 --- arch/x86/kernel/cpu/perf_event.c | 9 - arch/x86/kernel/cpu/perfctr-watchdog.c | 642 --------------------------------- arch/x86/kernel/smpboot.c | 11 - arch/x86/kernel/time.c | 18 - arch/x86/kernel/traps.c | 2 - arch/x86/oprofile/nmi_timer_int.c | 3 - drivers/acpi/acpica/nsinit.c | 2 - drivers/watchdog/hpwdt.c | 7 +- include/linux/nmi.h | 2 - 15 files changed, 2 insertions(+), 819 deletions(-) (limited to 'include/linux') diff --git a/arch/x86/include/asm/nmi.h b/arch/x86/include/asm/nmi.h index 33292ec848ca..3545838cddeb 100644 --- a/arch/x86/include/asm/nmi.h +++ b/arch/x86/include/asm/nmi.h @@ -7,35 +7,13 @@ #ifdef ARCH_HAS_NMI_WATCHDOG -/** - * do_nmi_callback - * - * Check to see if a callback exists and execute it. Return 1 - * if the handler exists and was handled successfully. - */ -int do_nmi_callback(struct pt_regs *regs, int cpu); - extern void die_nmi(char *str, struct pt_regs *regs, int do_panic); -extern int check_nmi_watchdog(void); extern int avail_to_resrv_perfctr_nmi_bit(unsigned int); extern int reserve_perfctr_nmi(unsigned int); extern void release_perfctr_nmi(unsigned int); extern int reserve_evntsel_nmi(unsigned int); extern void release_evntsel_nmi(unsigned int); -extern void setup_apic_nmi_watchdog(void *); -extern void stop_apic_nmi_watchdog(void *); -extern void disable_timer_nmi_watchdog(void); -extern void enable_timer_nmi_watchdog(void); -extern void cpu_nmi_set_wd_enabled(void); - -extern atomic_t nmi_active; -extern unsigned int nmi_watchdog; -#define NMI_NONE 0 -#define NMI_IO_APIC 1 -#define NMI_LOCAL_APIC 2 -#define NMI_INVALID 3 - struct ctl_table; extern int proc_nmi_enabled(struct ctl_table *, int , void __user *, size_t *, loff_t *); @@ -43,33 +21,8 @@ extern int unknown_nmi_panic; void arch_trigger_all_cpu_backtrace(void); #define arch_trigger_all_cpu_backtrace arch_trigger_all_cpu_backtrace - -static inline void localise_nmi_watchdog(void) -{ - if (nmi_watchdog == NMI_IO_APIC) - nmi_watchdog = NMI_LOCAL_APIC; -} - -/* check if nmi_watchdog is active (ie was specified at boot) */ -static inline int nmi_watchdog_active(void) -{ - /* - * actually it should be: - * return (nmi_watchdog == NMI_LOCAL_APIC || - * nmi_watchdog == NMI_IO_APIC) - * but since they are power of two we could use a - * cheaper way --cvg - */ - return nmi_watchdog & (NMI_LOCAL_APIC | NMI_IO_APIC); -} #endif -void lapic_watchdog_stop(void); -int lapic_watchdog_init(unsigned nmi_hz); -int lapic_wd_event(unsigned nmi_hz); -unsigned lapic_adjust_nmi_hz(unsigned hz); -void disable_lapic_nmi_watchdog(void); -void enable_lapic_nmi_watchdog(void); void stop_nmi(void); void restart_nmi(void); diff --git a/arch/x86/include/asm/smpboot_hooks.h b/arch/x86/include/asm/smpboot_hooks.h index 1def60114906..6c22bf353f26 100644 --- a/arch/x86/include/asm/smpboot_hooks.h +++ b/arch/x86/include/asm/smpboot_hooks.h @@ -48,7 +48,6 @@ static inline void __init smpboot_setup_io_apic(void) setup_IO_APIC(); else { nr_ioapics = 0; - localise_nmi_watchdog(); } #endif } diff --git a/arch/x86/include/asm/timer.h b/arch/x86/include/asm/timer.h index 5469630b27f5..fa7b9176b76c 100644 --- a/arch/x86/include/asm/timer.h +++ b/arch/x86/include/asm/timer.h @@ -10,12 +10,6 @@ unsigned long long native_sched_clock(void); extern int recalibrate_cpu_khz(void); -#if defined(CONFIG_X86_32) && defined(CONFIG_X86_IO_APIC) -extern int timer_ack; -#else -# define timer_ack (0) -#endif - extern int no_timer_check; /* Accelerators for sched_clock() diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index 3f838d537392..e9e2a93783f9 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -31,7 +31,6 @@ #include #include #include -#include #include #include @@ -799,11 +798,7 @@ void __init setup_boot_APIC_clock(void) * PIT/HPET going. Otherwise register lapic as a dummy * device. */ - if (nmi_watchdog != NMI_IO_APIC) - lapic_clockevent.features &= ~CLOCK_EVT_FEAT_DUMMY; - else - pr_warning("APIC timer registered as dummy," - " due to nmi_watchdog=%d!\n", nmi_watchdog); + lapic_clockevent.features &= ~CLOCK_EVT_FEAT_DUMMY; /* Setup the lapic or request the broadcast */ setup_APIC_timer(); @@ -1387,7 +1382,6 @@ void __cpuinit end_local_APIC_setup(void) } #endif - setup_apic_nmi_watchdog(NULL); apic_pm_activate(); } @@ -1750,17 +1744,10 @@ int __init APIC_init_uniprocessor(void) setup_IO_APIC(); else { nr_ioapics = 0; - localise_nmi_watchdog(); } -#else - localise_nmi_watchdog(); #endif x86_init.timers.setup_percpu_clockev(); -#ifdef CONFIG_X86_64 - check_nmi_watchdog(); -#endif - return 0; } diff --git a/arch/x86/kernel/apic/hw_nmi.c b/arch/x86/kernel/apic/hw_nmi.c index b68b17460016..3e25afe9a62a 100644 --- a/arch/x86/kernel/apic/hw_nmi.c +++ b/arch/x86/kernel/apic/hw_nmi.c @@ -94,14 +94,4 @@ early_initcall(register_trigger_all_cpu_backtrace); #endif /* STUB calls to mimic old nmi_watchdog behaviour */ -#if defined(CONFIG_X86_LOCAL_APIC) -unsigned int nmi_watchdog = NMI_NONE; -EXPORT_SYMBOL(nmi_watchdog); -#endif -atomic_t nmi_active = ATOMIC_INIT(0); /* oprofile uses this */ -EXPORT_SYMBOL(nmi_active); int unknown_nmi_panic; -void cpu_nmi_set_wd_enabled(void) { return; } -void stop_apic_nmi_watchdog(void *unused) { return; } -void setup_apic_nmi_watchdog(void *unused) { return; } -int __init check_nmi_watchdog(void) { return 0; } diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 7cc0a721f628..e4a040c28de1 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -54,7 +54,6 @@ #include #include #include -#include #include #include #include @@ -2643,24 +2642,6 @@ static void lapic_register_intr(int irq) "edge"); } -static void __init setup_nmi(void) -{ - /* - * Dirty trick to enable the NMI watchdog ... - * We put the 8259A master into AEOI mode and - * unmask on all local APICs LVT0 as NMI. - * - * The idea to use the 8259A in AEOI mode ('8259A Virtual Wire') - * is from Maciej W. Rozycki - so we do not have to EOI from - * the NMI handler or the timer interrupt. - */ - apic_printk(APIC_VERBOSE, KERN_INFO "activating NMI Watchdog ..."); - - enable_NMI_through_LVT0(); - - apic_printk(APIC_VERBOSE, " done.\n"); -} - /* * This looks a bit hackish but it's about the only one way of sending * a few INTA cycles to 8259As and any associated glue logic. ICR does @@ -2766,15 +2747,6 @@ static inline void __init check_timer(void) */ apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT); legacy_pic->init(1); -#ifdef CONFIG_X86_32 - { - unsigned int ver; - - ver = apic_read(APIC_LVR); - ver = GET_APIC_VERSION(ver); - timer_ack = (nmi_watchdog == NMI_IO_APIC && !APIC_INTEGRATED(ver)); - } -#endif pin1 = find_isa_irq_pin(0, mp_INT); apic1 = find_isa_irq_apic(0, mp_INT); @@ -2822,10 +2794,6 @@ static inline void __init check_timer(void) unmask_ioapic(cfg); } if (timer_irq_works()) { - if (nmi_watchdog == NMI_IO_APIC) { - setup_nmi(); - legacy_pic->unmask(0); - } if (disable_timer_pin_1 > 0) clear_IO_APIC_pin(0, pin1); goto out; @@ -2851,11 +2819,6 @@ static inline void __init check_timer(void) if (timer_irq_works()) { apic_printk(APIC_QUIET, KERN_INFO "....... works.\n"); timer_through_8259 = 1; - if (nmi_watchdog == NMI_IO_APIC) { - legacy_pic->mask(0); - setup_nmi(); - legacy_pic->unmask(0); - } goto out; } /* @@ -2867,15 +2830,6 @@ static inline void __init check_timer(void) apic_printk(APIC_QUIET, KERN_INFO "....... failed.\n"); } - if (nmi_watchdog == NMI_IO_APIC) { - apic_printk(APIC_QUIET, KERN_WARNING "timer doesn't work " - "through the IO-APIC - disabling NMI Watchdog!\n"); - nmi_watchdog = NMI_NONE; - } -#ifdef CONFIG_X86_32 - timer_ack = 0; -#endif - apic_printk(APIC_QUIET, KERN_INFO "...trying to set up timer as Virtual Wire IRQ...\n"); diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index ed6310183efb..1f129a14e3a2 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -330,9 +330,6 @@ static bool reserve_pmc_hardware(void) { int i; - if (nmi_watchdog == NMI_LOCAL_APIC) - disable_lapic_nmi_watchdog(); - for (i = 0; i < x86_pmu.num_counters; i++) { if (!reserve_perfctr_nmi(x86_pmu.perfctr + i)) goto perfctr_fail; @@ -355,9 +352,6 @@ perfctr_fail: for (i--; i >= 0; i--) release_perfctr_nmi(x86_pmu.perfctr + i); - if (nmi_watchdog == NMI_LOCAL_APIC) - enable_lapic_nmi_watchdog(); - return false; } @@ -369,9 +363,6 @@ static void release_pmc_hardware(void) release_perfctr_nmi(x86_pmu.perfctr + i); release_evntsel_nmi(x86_pmu.eventsel + i); } - - if (nmi_watchdog == NMI_LOCAL_APIC) - enable_lapic_nmi_watchdog(); } #else diff --git a/arch/x86/kernel/cpu/perfctr-watchdog.c b/arch/x86/kernel/cpu/perfctr-watchdog.c index d9f4ff8fcd69..14d45928c282 100644 --- a/arch/x86/kernel/cpu/perfctr-watchdog.c +++ b/arch/x86/kernel/cpu/perfctr-watchdog.c @@ -22,26 +22,6 @@ #include #include -struct nmi_watchdog_ctlblk { - unsigned int cccr_msr; - unsigned int perfctr_msr; /* the MSR to reset in NMI handler */ - unsigned int evntsel_msr; /* the MSR to select the events to handle */ -}; - -/* Interface defining a CPU specific perfctr watchdog */ -struct wd_ops { - int (*reserve)(void); - void (*unreserve)(void); - int (*setup)(unsigned nmi_hz); - void (*rearm)(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz); - void (*stop)(void); - unsigned perfctr; - unsigned evntsel; - u64 checkbit; -}; - -static const struct wd_ops *wd_ops; - /* * this number is calculated from Intel's MSR_P4_CRU_ESCR5 register and it's * offset from MSR_P4_BSU_ESCR0. @@ -60,8 +40,6 @@ static const struct wd_ops *wd_ops; static DECLARE_BITMAP(perfctr_nmi_owner, NMI_MAX_COUNTER_BITS); static DECLARE_BITMAP(evntsel_nmi_owner, NMI_MAX_COUNTER_BITS); -static DEFINE_PER_CPU(struct nmi_watchdog_ctlblk, nmi_watchdog_ctlblk); - /* converts an msr to an appropriate reservation bit */ static inline unsigned int nmi_perfctr_msr_to_bit(unsigned int msr) { @@ -172,623 +150,3 @@ void release_evntsel_nmi(unsigned int msr) clear_bit(counter, evntsel_nmi_owner); } EXPORT_SYMBOL(release_evntsel_nmi); - -void disable_lapic_nmi_watchdog(void) -{ - BUG_ON(nmi_watchdog != NMI_LOCAL_APIC); - - if (atomic_read(&nmi_active) <= 0) - return; - - on_each_cpu(stop_apic_nmi_watchdog, NULL, 1); - - if (wd_ops) - wd_ops->unreserve(); - - BUG_ON(atomic_read(&nmi_active) != 0); -} - -void enable_lapic_nmi_watchdog(void) -{ - BUG_ON(nmi_watchdog != NMI_LOCAL_APIC); - - /* are we already enabled */ - if (atomic_read(&nmi_active) != 0) - return; - - /* are we lapic aware */ - if (!wd_ops) - return; - if (!wd_ops->reserve()) { - printk(KERN_ERR "NMI watchdog: cannot reserve perfctrs\n"); - return; - } - - on_each_cpu(setup_apic_nmi_watchdog, NULL, 1); - touch_nmi_watchdog(); -} - -/* - * Activate the NMI watchdog via the local APIC. - */ - -static unsigned int adjust_for_32bit_ctr(unsigned int hz) -{ - u64 counter_val; - unsigned int retval = hz; - - /* - * On Intel CPUs with P6/ARCH_PERFMON only 32 bits in the counter - * are writable, with higher bits sign extending from bit 31. - * So, we can only program the counter with 31 bit values and - * 32nd bit should be 1, for 33.. to be 1. - * Find the appropriate nmi_hz - */ - counter_val = (u64)cpu_khz * 1000; - do_div(counter_val, retval); - if (counter_val > 0x7fffffffULL) { - u64 count = (u64)cpu_khz * 1000; - do_div(count, 0x7fffffffUL); - retval = count + 1; - } - return retval; -} - -static void write_watchdog_counter(unsigned int perfctr_msr, - const char *descr, unsigned nmi_hz) -{ - u64 count = (u64)cpu_khz * 1000; - - do_div(count, nmi_hz); - if (descr) - pr_debug("setting %s to -0x%08Lx\n", descr, count); - wrmsrl(perfctr_msr, 0 - count); -} - -static void write_watchdog_counter32(unsigned int perfctr_msr, - const char *descr, unsigned nmi_hz) -{ - u64 count = (u64)cpu_khz * 1000; - - do_div(count, nmi_hz); - if (descr) - pr_debug("setting %s to -0x%08Lx\n", descr, count); - wrmsr(perfctr_msr, (u32)(-count), 0); -} - -/* - * AMD K7/K8/Family10h/Family11h support. - * AMD keeps this interface nicely stable so there is not much variety - */ -#define K7_EVNTSEL_ENABLE (1 << 22) -#define K7_EVNTSEL_INT (1 << 20) -#define K7_EVNTSEL_OS (1 << 17) -#define K7_EVNTSEL_USR (1 << 16) -#define K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING 0x76 -#define K7_NMI_EVENT K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING - -static int setup_k7_watchdog(unsigned nmi_hz) -{ - unsigned int perfctr_msr, evntsel_msr; - unsigned int evntsel; - struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); - - perfctr_msr = wd_ops->perfctr; - evntsel_msr = wd_ops->evntsel; - - wrmsrl(perfctr_msr, 0UL); - - evntsel = K7_EVNTSEL_INT - | K7_EVNTSEL_OS - | K7_EVNTSEL_USR - | K7_NMI_EVENT; - - /* setup the timer */ - wrmsr(evntsel_msr, evntsel, 0); - write_watchdog_counter(perfctr_msr, "K7_PERFCTR0", nmi_hz); - - /* initialize the wd struct before enabling */ - wd->perfctr_msr = perfctr_msr; - wd->evntsel_msr = evntsel_msr; - wd->cccr_msr = 0; /* unused */ - - /* ok, everything is initialized, announce that we're set */ - cpu_nmi_set_wd_enabled(); - - apic_write(APIC_LVTPC, APIC_DM_NMI); - evntsel |= K7_EVNTSEL_ENABLE; - wrmsr(evntsel_msr, evntsel, 0); - - return 1; -} - -static void single_msr_stop_watchdog(void) -{ - struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); - - wrmsr(wd->evntsel_msr, 0, 0); -} - -static int single_msr_reserve(void) -{ - if (!reserve_perfctr_nmi(wd_ops->perfctr)) - return 0; - - if (!reserve_evntsel_nmi(wd_ops->evntsel)) { - release_perfctr_nmi(wd_ops->perfctr); - return 0; - } - return 1; -} - -static void single_msr_unreserve(void) -{ - release_evntsel_nmi(wd_ops->evntsel); - release_perfctr_nmi(wd_ops->perfctr); -} - -static void __kprobes -single_msr_rearm(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz) -{ - /* start the cycle over again */ - write_watchdog_counter(wd->perfctr_msr, NULL, nmi_hz); -} - -static const struct wd_ops k7_wd_ops = { - .reserve = single_msr_reserve, - .unreserve = single_msr_unreserve, - .setup = setup_k7_watchdog, - .rearm = single_msr_rearm, - .stop = single_msr_stop_watchdog, - .perfctr = MSR_K7_PERFCTR0, - .evntsel = MSR_K7_EVNTSEL0, - .checkbit = 1ULL << 47, -}; - -/* - * Intel Model 6 (PPro+,P2,P3,P-M,Core1) - */ -#define P6_EVNTSEL0_ENABLE (1 << 22) -#define P6_EVNTSEL_INT (1 << 20) -#define P6_EVNTSEL_OS (1 << 17) -#define P6_EVNTSEL_USR (1 << 16) -#define P6_EVENT_CPU_CLOCKS_NOT_HALTED 0x79 -#define P6_NMI_EVENT P6_EVENT_CPU_CLOCKS_NOT_HALTED - -static int setup_p6_watchdog(unsigned nmi_hz) -{ - unsigned int perfctr_msr, evntsel_msr; - unsigned int evntsel; - struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); - - perfctr_msr = wd_ops->perfctr; - evntsel_msr = wd_ops->evntsel; - - /* KVM doesn't implement this MSR */ - if (wrmsr_safe(perfctr_msr, 0, 0) < 0) - return 0; - - evntsel = P6_EVNTSEL_INT - | P6_EVNTSEL_OS - | P6_EVNTSEL_USR - | P6_NMI_EVENT; - - /* setup the timer */ - wrmsr(evntsel_msr, evntsel, 0); - nmi_hz = adjust_for_32bit_ctr(nmi_hz); - write_watchdog_counter32(perfctr_msr, "P6_PERFCTR0", nmi_hz); - - /* initialize the wd struct before enabling */ - wd->perfctr_msr = perfctr_msr; - wd->evntsel_msr = evntsel_msr; - wd->cccr_msr = 0; /* unused */ - - /* ok, everything is initialized, announce that we're set */ - cpu_nmi_set_wd_enabled(); - - apic_write(APIC_LVTPC, APIC_DM_NMI); - evntsel |= P6_EVNTSEL0_ENABLE; - wrmsr(evntsel_msr, evntsel, 0); - - return 1; -} - -static void __kprobes p6_rearm(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz) -{ - /* - * P6 based Pentium M need to re-unmask - * the apic vector but it doesn't hurt - * other P6 variant. - * ArchPerfom/Core Duo also needs this - */ - apic_write(APIC_LVTPC, APIC_DM_NMI); - - /* P6/ARCH_PERFMON has 32 bit counter write */ - write_watchdog_counter32(wd->perfctr_msr, NULL, nmi_hz); -} - -static const struct wd_ops p6_wd_ops = { - .reserve = single_msr_reserve, - .unreserve = single_msr_unreserve, - .setup = setup_p6_watchdog, - .rearm = p6_rearm, - .stop = single_msr_stop_watchdog, - .perfctr = MSR_P6_PERFCTR0, - .evntsel = MSR_P6_EVNTSEL0, - .checkbit = 1ULL << 39, -}; - -/* - * Intel P4 performance counters. - * By far the most complicated of all. - */ -#define MSR_P4_MISC_ENABLE_PERF_AVAIL (1 << 7) -#define P4_ESCR_EVENT_SELECT(N) ((N) << 25) -#define P4_ESCR_OS (1 << 3) -#define P4_ESCR_USR (1 << 2) -#define P4_CCCR_OVF_PMI0 (1 << 26) -#define P4_CCCR_OVF_PMI1 (1 << 27) -#define P4_CCCR_THRESHOLD(N) ((N) << 20) -#define P4_CCCR_COMPLEMENT (1 << 19) -#define P4_CCCR_COMPARE (1 << 18) -#define P4_CCCR_REQUIRED (3 << 16) -#define P4_CCCR_ESCR_SELECT(N) ((N) << 13) -#define P4_CCCR_ENABLE (1 << 12) -#define P4_CCCR_OVF (1 << 31) - -#define P4_CONTROLS 18 -static unsigned int p4_controls[18] = { - MSR_P4_BPU_CCCR0, - MSR_P4_BPU_CCCR1, - MSR_P4_BPU_CCCR2, - MSR_P4_BPU_CCCR3, - MSR_P4_MS_CCCR0, - MSR_P4_MS_CCCR1, - MSR_P4_MS_CCCR2, - MSR_P4_MS_CCCR3, - MSR_P4_FLAME_CCCR0, - MSR_P4_FLAME_CCCR1, - MSR_P4_FLAME_CCCR2, - MSR_P4_FLAME_CCCR3, - MSR_P4_IQ_CCCR0, - MSR_P4_IQ_CCCR1, - MSR_P4_IQ_CCCR2, - MSR_P4_IQ_CCCR3, - MSR_P4_IQ_CCCR4, - MSR_P4_IQ_CCCR5, -}; -/* - * Set up IQ_COUNTER0 to behave like a clock, by having IQ_CCCR0 filter - * CRU_ESCR0 (with any non-null event selector) through a complemented - * max threshold. [IA32-Vol3, Section 14.9.9] - */ -static int setup_p4_watchdog(unsigned nmi_hz) -{ - unsigned int perfctr_msr, evntsel_msr, cccr_msr; - unsigned int evntsel, cccr_val; - unsigned int misc_enable, dummy; - unsigned int ht_num; - struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); - - rdmsr(MSR_IA32_MISC_ENABLE, misc_enable, dummy); - if (!(misc_enable & MSR_P4_MISC_ENABLE_PERF_AVAIL)) - return 0; - -#ifdef CONFIG_SMP - /* detect which hyperthread we are on */ - if (smp_num_siblings == 2) { - unsigned int ebx, apicid; - - ebx = cpuid_ebx(1); - apicid = (ebx >> 24) & 0xff; - ht_num = apicid & 1; - } else -#endif - ht_num = 0; - - /* - * performance counters are shared resources - * assign each hyperthread its own set - * (re-use the ESCR0 register, seems safe - * and keeps the cccr_val the same) - */ - if (!ht_num) { - /* logical cpu 0 */ - perfctr_msr = MSR_P4_IQ_PERFCTR0; - evntsel_msr = MSR_P4_CRU_ESCR0; - cccr_msr = MSR_P4_IQ_CCCR0; - cccr_val = P4_CCCR_OVF_PMI0 | P4_CCCR_ESCR_SELECT(4); - - /* - * If we're on the kdump kernel or other situation, we may - * still have other performance counter registers set to - * interrupt and they'll keep interrupting forever because - * of the P4_CCCR_OVF quirk. So we need to ACK all the - * pending interrupts and disable all the registers here, - * before reenabling the NMI delivery. Refer to p4_rearm() - * about the P4_CCCR_OVF quirk. - */ - if (reset_devices) { - unsigned int low, high; - int i; - - for (i = 0; i < P4_CONTROLS; i++) { - rdmsr(p4_controls[i], low, high); - low &= ~(P4_CCCR_ENABLE | P4_CCCR_OVF); - wrmsr(p4_controls[i], low, high); - } - } - } else { - /* logical cpu 1 */ - perfctr_msr = MSR_P4_IQ_PERFCTR1; - evntsel_msr = MSR_P4_CRU_ESCR0; - cccr_msr = MSR_P4_IQ_CCCR1; - - /* Pentium 4 D processors don't support P4_CCCR_OVF_PMI1 */ - if (boot_cpu_data.x86_model == 4 && boot_cpu_data.x86_mask == 4) - cccr_val = P4_CCCR_OVF_PMI0; - else - cccr_val = P4_CCCR_OVF_PMI1; - cccr_val |= P4_CCCR_ESCR_SELECT(4); - } - - evntsel = P4_ESCR_EVENT_SELECT(0x3F) - | P4_ESCR_OS - | P4_ESCR_USR; - - cccr_val |= P4_CCCR_THRESHOLD(15) - | P4_CCCR_COMPLEMENT - | P4_CCCR_COMPARE - | P4_CCCR_REQUIRED; - - wrmsr(evntsel_msr, evntsel, 0); - wrmsr(cccr_msr, cccr_val, 0); - write_watchdog_counter(perfctr_msr, "P4_IQ_COUNTER0", nmi_hz); - - wd->perfctr_msr = perfctr_msr; - wd->evntsel_msr = evntsel_msr; - wd->cccr_msr = cccr_msr; - - /* ok, everything is initialized, announce that we're set */ - cpu_nmi_set_wd_enabled(); - - apic_write(APIC_LVTPC, APIC_DM_NMI); - cccr_val |= P4_CCCR_ENABLE; - wrmsr(cccr_msr, cccr_val, 0); - return 1; -} - -static void stop_p4_watchdog(void) -{ - struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); - wrmsr(wd->cccr_msr, 0, 0); - wrmsr(wd->evntsel_msr, 0, 0); -} - -static int p4_reserve(void) -{ - if (!reserve_perfctr_nmi(MSR_P4_IQ_PERFCTR0)) - return 0; -#ifdef CONFIG_SMP - if (smp_num_siblings > 1 && !reserve_perfctr_nmi(MSR_P4_IQ_PERFCTR1)) - goto fail1; -#endif - if (!reserve_evntsel_nmi(MSR_P4_CRU_ESCR0)) - goto fail2; - /* RED-PEN why is ESCR1 not reserved here? */ - return 1; - fail2: -#ifdef CONFIG_SMP - if (smp_num_siblings > 1) - release_perfctr_nmi(MSR_P4_IQ_PERFCTR1); - fail1: -#endif - release_perfctr_nmi(MSR_P4_IQ_PERFCTR0); - return 0; -} - -static void p4_unreserve(void) -{ -#ifdef CONFIG_SMP - if (smp_num_siblings > 1) - release_perfctr_nmi(MSR_P4_IQ_PERFCTR1); -#endif - release_evntsel_nmi(MSR_P4_CRU_ESCR0); - release_perfctr_nmi(MSR_P4_IQ_PERFCTR0); -} - -static void __kprobes p4_rearm(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz) -{ - unsigned dummy; - /* - * P4 quirks: - * - An overflown perfctr will assert its interrupt - * until the OVF flag in its CCCR is cleared. - * - LVTPC is masked on interrupt and must be - * unmasked by the LVTPC handler. - */ - rdmsrl(wd->cccr_msr, dummy); - dummy &= ~P4_CCCR_OVF; - wrmsrl(wd->cccr_msr, dummy); - apic_write(APIC_LVTPC, APIC_DM_NMI); - /* start the cycle over again */ - write_watchdog_counter(wd->perfctr_msr, NULL, nmi_hz); -} - -static const struct wd_ops p4_wd_ops = { - .reserve = p4_reserve, - .unreserve = p4_unreserve, - .setup = setup_p4_watchdog, - .rearm = p4_rearm, - .stop = stop_p4_watchdog, - /* RED-PEN this is wrong for the other sibling */ - .perfctr = MSR_P4_BPU_PERFCTR0, - .evntsel = MSR_P4_BSU_ESCR0, - .checkbit = 1ULL << 39, -}; - -/* - * Watchdog using the Intel architected PerfMon. - * Used for Core2 and hopefully all future Intel CPUs. - */ -#define ARCH_PERFMON_NMI_EVENT_SEL ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL -#define ARCH_PERFMON_NMI_EVENT_UMASK ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK - -static struct wd_ops intel_arch_wd_ops; - -static int setup_intel_arch_watchdog(unsigned nmi_hz) -{ - unsigned int ebx; - union cpuid10_eax eax; - unsigned int unused; - unsigned int perfctr_msr, evntsel_msr; - unsigned int evntsel; - struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); - - /* - * Check whether the Architectural PerfMon supports - * Unhalted Core Cycles Event or not. - * NOTE: Corresponding bit = 0 in ebx indicates event present. - */ - cpuid(10, &(eax.full), &ebx, &unused, &unused); - if ((eax.split.mask_length < - (ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX+1)) || - (ebx & ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT)) - return 0; - - perfctr_msr = wd_ops->perfctr; - evntsel_msr = wd_ops->evntsel; - - wrmsrl(perfctr_msr, 0UL); - - evntsel = ARCH_PERFMON_EVENTSEL_INT - | ARCH_PERFMON_EVENTSEL_OS - | ARCH_PERFMON_EVENTSEL_USR - | ARCH_PERFMON_NMI_EVENT_SEL - | ARCH_PERFMON_NMI_EVENT_UMASK; - - /* setup the timer */ - wrmsr(evntsel_msr, evntsel, 0); - nmi_hz = adjust_for_32bit_ctr(nmi_hz); - write_watchdog_counter32(perfctr_msr, "INTEL_ARCH_PERFCTR0", nmi_hz); - - wd->perfctr_msr = perfctr_msr; - wd->evntsel_msr = evntsel_msr; - wd->cccr_msr = 0; /* unused */ - - /* ok, everything is initialized, announce that we're set */ - cpu_nmi_set_wd_enabled(); - - apic_write(APIC_LVTPC, APIC_DM_NMI); - evntsel |= ARCH_PERFMON_EVENTSEL_ENABLE; - wrmsr(evntsel_msr, evntsel, 0); - intel_arch_wd_ops.checkbit = 1ULL << (eax.split.bit_width - 1); - return 1; -} - -static struct wd_ops intel_arch_wd_ops __read_mostly = { - .reserve = single_msr_reserve, - .unreserve = single_msr_unreserve, - .setup = setup_intel_arch_watchdog, - .rearm = p6_rearm, - .stop = single_msr_stop_watchdog, - .perfctr = MSR_ARCH_PERFMON_PERFCTR1, - .evntsel = MSR_ARCH_PERFMON_EVENTSEL1, -}; - -static void probe_nmi_watchdog(void) -{ - switch (boot_cpu_data.x86_vendor) { - case X86_VENDOR_AMD: - if (boot_cpu_data.x86 == 6 || - (boot_cpu_data.x86 >= 0xf && boot_cpu_data.x86 <= 0x15)) - wd_ops = &k7_wd_ops; - return; - case X86_VENDOR_INTEL: - /* Work around where perfctr1 doesn't have a working enable - * bit as described in the following errata: - * AE49 Core Duo and Intel Core Solo 65 nm - * AN49 Intel Pentium Dual-Core - * AF49 Dual-Core Intel Xeon Processor LV - */ - if ((boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model == 14) || - ((boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model == 15 && - boot_cpu_data.x86_mask == 4))) { - intel_arch_wd_ops.perfctr = MSR_ARCH_PERFMON_PERFCTR0; - intel_arch_wd_ops.evntsel = MSR_ARCH_PERFMON_EVENTSEL0; - } - if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) { - wd_ops = &intel_arch_wd_ops; - break; - } - switch (boot_cpu_data.x86) { - case 6: - if (boot_cpu_data.x86_model > 13) - return; - - wd_ops = &p6_wd_ops; - break; - case 15: - wd_ops = &p4_wd_ops; - break; - default: - return; - } - break; - } -} - -/* Interface to nmi.c */ - -int lapic_watchdog_init(unsigned nmi_hz) -{ - if (!wd_ops) { - probe_nmi_watchdog(); - if (!wd_ops) { - printk(KERN_INFO "NMI watchdog: CPU not supported\n"); - return -1; - } - - if (!wd_ops->reserve()) { - printk(KERN_ERR - "NMI watchdog: cannot reserve perfctrs\n"); - return -1; - } - } - - if (!(wd_ops->setup(nmi_hz))) { - printk(KERN_ERR "Cannot setup NMI watchdog on CPU %d\n", - raw_smp_processor_id()); - return -1; - } - - return 0; -} - -void lapic_watchdog_stop(void) -{ - if (wd_ops) - wd_ops->stop(); -} - -unsigned lapic_adjust_nmi_hz(unsigned hz) -{ - struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); - if (wd->perfctr_msr == MSR_P6_PERFCTR0 || - wd->perfctr_msr == MSR_ARCH_PERFMON_PERFCTR1) - hz = adjust_for_32bit_ctr(hz); - return hz; -} - -int __kprobes lapic_wd_event(unsigned nmi_hz) -{ - struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); - u64 ctr; - - rdmsrl(wd->perfctr_msr, ctr); - if (ctr & wd_ops->checkbit) /* perfctr still running? */ - return 0; - - wd_ops->rearm(wd, nmi_hz); - return 1; -} diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 083e99d1b7df..f0a0624eea55 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -316,12 +316,6 @@ notrace static void __cpuinit start_secondary(void *unused) */ check_tsc_sync_target(); - if (nmi_watchdog == NMI_IO_APIC) { - legacy_pic->mask(0); - enable_NMI_through_LVT0(); - legacy_pic->unmask(0); - } - /* This must be done before setting cpu_online_mask */ set_cpu_sibling_map(raw_smp_processor_id()); wmb(); @@ -1061,8 +1055,6 @@ static int __init smp_sanity_check(unsigned max_cpus) printk(KERN_INFO "SMP mode deactivated.\n"); smpboot_clear_io_apic(); - localise_nmi_watchdog(); - connect_bsp_APIC(); setup_local_APIC(); end_local_APIC_setup(); @@ -1196,7 +1188,6 @@ void __init native_smp_cpus_done(unsigned int max_cpus) #ifdef CONFIG_X86_IO_APIC setup_ioapic_dest(); #endif - check_nmi_watchdog(); mtrr_aps_init(); } @@ -1341,8 +1332,6 @@ int native_cpu_disable(void) if (cpu == 0) return -EBUSY; - if (nmi_watchdog == NMI_LOCAL_APIC) - stop_apic_nmi_watchdog(NULL); clear_local_APIC(); cpu_disable_common(); diff --git a/arch/x86/kernel/time.c b/arch/x86/kernel/time.c index fb5cc5e14cfa..25a28a245937 100644 --- a/arch/x86/kernel/time.c +++ b/arch/x86/kernel/time.c @@ -22,10 +22,6 @@ #include #include -#if defined(CONFIG_X86_32) && defined(CONFIG_X86_IO_APIC) -int timer_ack; -#endif - #ifdef CONFIG_X86_64 volatile unsigned long __jiffies __section_jiffies = INITIAL_JIFFIES; #endif @@ -63,20 +59,6 @@ static irqreturn_t timer_interrupt(int irq, void *dev_id) /* Keep nmi watchdog up to date */ inc_irq_stat(irq0_irqs); - /* Optimized out for !IO_APIC and x86_64 */ - if (timer_ack) { - /* - * Subtle, when I/O APICs are used we have to ack timer IRQ - * manually to deassert NMI lines for the watchdog if run - * on an 82489DX-based system. - */ - raw_spin_lock(&i8259A_lock); - outb(0x0c, PIC_MASTER_OCW3); - /* Ack the IRQ; AEOI will end it automatically. */ - inb(PIC_MASTER_POLL); - raw_spin_unlock(&i8259A_lock); - } - global_clock_event->event_handler(global_clock_event); /* MCA bus quirk: Acknowledge irq0 by setting bit 7 in port 0x61 */ diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index db30d9cb9dd6..f02c179c2552 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -437,14 +437,12 @@ do_nmi(struct pt_regs *regs, long error_code) void stop_nmi(void) { - acpi_nmi_disable(); ignore_nmis++; } void restart_nmi(void) { ignore_nmis--; - acpi_nmi_enable(); } /* May run on IST stack. */ diff --git a/arch/x86/oprofile/nmi_timer_int.c b/arch/x86/oprofile/nmi_timer_int.c index e3ecb71b5790..0636dd93cef8 100644 --- a/arch/x86/oprofile/nmi_timer_int.c +++ b/arch/x86/oprofile/nmi_timer_int.c @@ -58,9 +58,6 @@ static void timer_stop(void) int __init op_nmi_timer_init(struct oprofile_operations *ops) { - if ((nmi_watchdog != NMI_IO_APIC) || (atomic_read(&nmi_active) <= 0)) - return -ENODEV; - ops->start = timer_start; ops->stop = timer_stop; ops->cpu_type = "timer"; diff --git a/drivers/acpi/acpica/nsinit.c b/drivers/acpi/acpica/nsinit.c index 660a2728908d..0cac7ec0d2ec 100644 --- a/drivers/acpi/acpica/nsinit.c +++ b/drivers/acpi/acpica/nsinit.c @@ -577,9 +577,7 @@ acpi_ns_init_one_device(acpi_handle obj_handle, * as possible (without an NMI being received in the middle of * this) - so disable NMIs and initialize the device: */ - acpi_nmi_disable(); status = acpi_ns_evaluate(info); - acpi_nmi_enable(); if (ACPI_SUCCESS(status)) { walk_info->num_INI++; diff --git a/drivers/watchdog/hpwdt.c b/drivers/watchdog/hpwdt.c index 3d77116e4634..c19f4a20794a 100644 --- a/drivers/watchdog/hpwdt.c +++ b/drivers/watchdog/hpwdt.c @@ -649,12 +649,7 @@ static void __devinit hpwdt_check_nmi_decoding(struct pci_dev *dev) * If nmi_watchdog is turned off then we can turn on * our nmi decoding capability. */ - if (!nmi_watchdog_active()) - hpwdt_nmi_decoding = 1; - else - dev_warn(&dev->dev, "NMI decoding is disabled. To enable this " - "functionality you must reboot with nmi_watchdog=0 " - "and load the hpwdt driver with priority=1.\n"); + hpwdt_nmi_decoding = 1; } #else static void __devinit hpwdt_check_nmi_decoding(struct pci_dev *dev) diff --git a/include/linux/nmi.h b/include/linux/nmi.h index 0cb3e5c246d0..1c451e6ecc17 100644 --- a/include/linux/nmi.h +++ b/include/linux/nmi.h @@ -25,8 +25,6 @@ static inline void touch_nmi_watchdog(void) #else extern void touch_nmi_watchdog(void); #endif -static inline void acpi_nmi_disable(void) { } -static inline void acpi_nmi_enable(void) { } /* * Create trigger_all_cpu_backtrace() out of the arch-provided -- cgit v1.2.3-71-gd317 From 48c5ccae88dcd989d9de507e8510313c6cbd352b Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Sat, 13 Nov 2010 19:32:29 +0100 Subject: sched: Simplify cpu-hot-unplug task migration While discussing the need for sched_idle_next(), Oleg remarked that since try_to_wake_up() ensures sleeping tasks will end up running on a sane cpu, we can do away with migrate_live_tasks(). If we then extend the existing hack of migrating current from CPU_DYING to migrating the full rq worth of tasks from CPU_DYING, the need for the sched_idle_next() abomination disappears as well, since idle will be the only possible thread left after the migration thread stops. This greatly simplifies the hot-unplug task migration path, as can be seen from the resulting code reduction (and about half the new lines are comments). Suggested-by: Oleg Nesterov Signed-off-by: Peter Zijlstra LKML-Reference: <1289851597.2109.547.camel@laptop> Signed-off-by: Ingo Molnar --- include/linux/sched.h | 3 - kernel/cpu.c | 16 ++-- kernel/sched.c | 206 +++++++++++++++----------------------------------- 3 files changed, 67 insertions(+), 158 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 3cd70cf91fde..29d953abb5ad 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1871,14 +1871,11 @@ extern void sched_clock_idle_sleep_event(void); extern void sched_clock_idle_wakeup_event(u64 delta_ns); #ifdef CONFIG_HOTPLUG_CPU -extern void move_task_off_dead_cpu(int dead_cpu, struct task_struct *p); extern void idle_task_exit(void); #else static inline void idle_task_exit(void) {} #endif -extern void sched_idle_next(void); - #if defined(CONFIG_NO_HZ) && defined(CONFIG_SMP) extern void wake_up_idle_cpu(int cpu); #else diff --git a/kernel/cpu.c b/kernel/cpu.c index f6e726f18491..8615aa65d927 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -189,7 +189,6 @@ static inline void check_for_tasks(int cpu) } struct take_cpu_down_param { - struct task_struct *caller; unsigned long mod; void *hcpu; }; @@ -208,11 +207,6 @@ static int __ref take_cpu_down(void *_param) cpu_notify(CPU_DYING | param->mod, param->hcpu); - if (task_cpu(param->caller) == cpu) - move_task_off_dead_cpu(cpu, param->caller); - /* Force idle task to run as soon as we yield: it should - immediately notice cpu is offline and die quickly. */ - sched_idle_next(); return 0; } @@ -223,7 +217,6 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen) void *hcpu = (void *)(long)cpu; unsigned long mod = tasks_frozen ? CPU_TASKS_FROZEN : 0; struct take_cpu_down_param tcd_param = { - .caller = current, .mod = mod, .hcpu = hcpu, }; @@ -253,9 +246,12 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen) } BUG_ON(cpu_online(cpu)); - /* Wait for it to sleep (leaving idle task). */ - while (!idle_cpu(cpu)) - yield(); + /* + * The migration_call() CPU_DYING callback will have removed all + * runnable tasks from the cpu, there's only the idle task left now + * that the migration thread is done doing the stop_machine thing. + */ + BUG_ON(!idle_cpu(cpu)); /* This actually kills the CPU. */ __cpu_die(cpu); diff --git a/kernel/sched.c b/kernel/sched.c index 41f18695b730..b0d5f1b24a39 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -2366,18 +2366,15 @@ static int select_fallback_rq(int cpu, struct task_struct *p) return dest_cpu; /* No more Mr. Nice Guy. */ - if (unlikely(dest_cpu >= nr_cpu_ids)) { - dest_cpu = cpuset_cpus_allowed_fallback(p); - /* - * Don't tell them about moving exiting tasks or - * kernel threads (both mm NULL), since they never - * leave kernel. - */ - if (p->mm && printk_ratelimit()) { - printk(KERN_INFO "process %d (%s) no " - "longer affine to cpu%d\n", - task_pid_nr(p), p->comm, cpu); - } + dest_cpu = cpuset_cpus_allowed_fallback(p); + /* + * Don't tell them about moving exiting tasks or + * kernel threads (both mm NULL), since they never + * leave kernel. + */ + if (p->mm && printk_ratelimit()) { + printk(KERN_INFO "process %d (%s) no longer affine to cpu%d\n", + task_pid_nr(p), p->comm, cpu); } return dest_cpu; @@ -5712,29 +5709,20 @@ static int migration_cpu_stop(void *data) } #ifdef CONFIG_HOTPLUG_CPU + /* - * Figure out where task on dead CPU should go, use force if necessary. + * Ensures that the idle task is using init_mm right before its cpu goes + * offline. */ -void move_task_off_dead_cpu(int dead_cpu, struct task_struct *p) +void idle_task_exit(void) { - struct rq *rq = cpu_rq(dead_cpu); - int needs_cpu, uninitialized_var(dest_cpu); - unsigned long flags; + struct mm_struct *mm = current->active_mm; - local_irq_save(flags); + BUG_ON(cpu_online(smp_processor_id())); - raw_spin_lock(&rq->lock); - needs_cpu = (task_cpu(p) == dead_cpu) && (p->state != TASK_WAKING); - if (needs_cpu) - dest_cpu = select_fallback_rq(dead_cpu, p); - raw_spin_unlock(&rq->lock); - /* - * It can only fail if we race with set_cpus_allowed(), - * in the racer should migrate the task anyway. - */ - if (needs_cpu) - __migrate_task(p, dead_cpu, dest_cpu); - local_irq_restore(flags); + if (mm != &init_mm) + switch_mm(mm, &init_mm, current); + mmdrop(mm); } /* @@ -5747,128 +5735,69 @@ void move_task_off_dead_cpu(int dead_cpu, struct task_struct *p) static void migrate_nr_uninterruptible(struct rq *rq_src) { struct rq *rq_dest = cpu_rq(cpumask_any(cpu_active_mask)); - unsigned long flags; - local_irq_save(flags); - double_rq_lock(rq_src, rq_dest); rq_dest->nr_uninterruptible += rq_src->nr_uninterruptible; rq_src->nr_uninterruptible = 0; - double_rq_unlock(rq_src, rq_dest); - local_irq_restore(flags); -} - -/* Run through task list and migrate tasks from the dead cpu. */ -static void migrate_live_tasks(int src_cpu) -{ - struct task_struct *p, *t; - - read_lock(&tasklist_lock); - - do_each_thread(t, p) { - if (p == current) - continue; - - if (task_cpu(p) == src_cpu) - move_task_off_dead_cpu(src_cpu, p); - } while_each_thread(t, p); - - read_unlock(&tasklist_lock); } /* - * Schedules idle task to be the next runnable task on current CPU. - * It does so by boosting its priority to highest possible. - * Used by CPU offline code. + * remove the tasks which were accounted by rq from calc_load_tasks. */ -void sched_idle_next(void) +static void calc_global_load_remove(struct rq *rq) { - int this_cpu = smp_processor_id(); - struct rq *rq = cpu_rq(this_cpu); - struct task_struct *p = rq->idle; - unsigned long flags; - - /* cpu has to be offline */ - BUG_ON(cpu_online(this_cpu)); - - /* - * Strictly not necessary since rest of the CPUs are stopped by now - * and interrupts disabled on the current cpu. - */ - raw_spin_lock_irqsave(&rq->lock, flags); - - __setscheduler(rq, p, SCHED_FIFO, MAX_RT_PRIO-1); - - activate_task(rq, p, 0); - - raw_spin_unlock_irqrestore(&rq->lock, flags); + atomic_long_sub(rq->calc_load_active, &calc_load_tasks); + rq->calc_load_active = 0; } /* - * Ensures that the idle task is using init_mm right before its cpu goes - * offline. + * Migrate all tasks from the rq, sleeping tasks will be migrated by + * try_to_wake_up()->select_task_rq(). + * + * Called with rq->lock held even though we'er in stop_machine() and + * there's no concurrency possible, we hold the required locks anyway + * because of lock validation efforts. */ -void idle_task_exit(void) -{ - struct mm_struct *mm = current->active_mm; - - BUG_ON(cpu_online(smp_processor_id())); - - if (mm != &init_mm) - switch_mm(mm, &init_mm, current); - mmdrop(mm); -} - -/* called under rq->lock with disabled interrupts */ -static void migrate_dead(unsigned int dead_cpu, struct task_struct *p) +static void migrate_tasks(unsigned int dead_cpu) { struct rq *rq = cpu_rq(dead_cpu); - - /* Must be exiting, otherwise would be on tasklist. */ - BUG_ON(!p->exit_state); - - /* Cannot have done final schedule yet: would have vanished. */ - BUG_ON(p->state == TASK_DEAD); - - get_task_struct(p); + struct task_struct *next, *stop = rq->stop; + int dest_cpu; /* - * Drop lock around migration; if someone else moves it, - * that's OK. No task can be added to this CPU, so iteration is - * fine. + * Fudge the rq selection such that the below task selection loop + * doesn't get stuck on the currently eligible stop task. + * + * We're currently inside stop_machine() and the rq is either stuck + * in the stop_machine_cpu_stop() loop, or we're executing this code, + * either way we should never end up calling schedule() until we're + * done here. */ - raw_spin_unlock_irq(&rq->lock); - move_task_off_dead_cpu(dead_cpu, p); - raw_spin_lock_irq(&rq->lock); - - put_task_struct(p); -} - -/* release_task() removes task from tasklist, so we won't find dead tasks. */ -static void migrate_dead_tasks(unsigned int dead_cpu) -{ - struct rq *rq = cpu_rq(dead_cpu); - struct task_struct *next; + rq->stop = NULL; for ( ; ; ) { - if (!rq->nr_running) + /* + * There's this thread running, bail when that's the only + * remaining thread. + */ + if (rq->nr_running == 1) break; + next = pick_next_task(rq); - if (!next) - break; + BUG_ON(!next); next->sched_class->put_prev_task(rq, next); - migrate_dead(dead_cpu, next); + /* Find suitable destination for @next, with force if needed. */ + dest_cpu = select_fallback_rq(dead_cpu, next); + raw_spin_unlock(&rq->lock); + + __migrate_task(next, dead_cpu, dest_cpu); + + raw_spin_lock(&rq->lock); } -} -/* - * remove the tasks which were accounted by rq from calc_load_tasks. - */ -static void calc_global_load_remove(struct rq *rq) -{ - atomic_long_sub(rq->calc_load_active, &calc_load_tasks); - rq->calc_load_active = 0; + rq->stop = stop; } + #endif /* CONFIG_HOTPLUG_CPU */ #if defined(CONFIG_SCHED_DEBUG) && defined(CONFIG_SYSCTL) @@ -6078,15 +6007,13 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu) unsigned long flags; struct rq *rq = cpu_rq(cpu); - switch (action) { + switch (action & ~CPU_TASKS_FROZEN) { case CPU_UP_PREPARE: - case CPU_UP_PREPARE_FROZEN: rq->calc_load_update = calc_load_update; break; case CPU_ONLINE: - case CPU_ONLINE_FROZEN: /* Update our root-domain */ raw_spin_lock_irqsave(&rq->lock, flags); if (rq->rd) { @@ -6098,30 +6025,19 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu) break; #ifdef CONFIG_HOTPLUG_CPU - case CPU_DEAD: - case CPU_DEAD_FROZEN: - migrate_live_tasks(cpu); - /* Idle task back to normal (off runqueue, low prio) */ - raw_spin_lock_irq(&rq->lock); - deactivate_task(rq, rq->idle, 0); - __setscheduler(rq, rq->idle, SCHED_NORMAL, 0); - rq->idle->sched_class = &idle_sched_class; - migrate_dead_tasks(cpu); - raw_spin_unlock_irq(&rq->lock); - migrate_nr_uninterruptible(rq); - BUG_ON(rq->nr_running != 0); - calc_global_load_remove(rq); - break; - case CPU_DYING: - case CPU_DYING_FROZEN: /* Update our root-domain */ raw_spin_lock_irqsave(&rq->lock, flags); if (rq->rd) { BUG_ON(!cpumask_test_cpu(cpu, rq->rd->span)); set_rq_offline(rq); } + migrate_tasks(cpu); + BUG_ON(rq->nr_running != 1); /* the migration thread */ raw_spin_unlock_irqrestore(&rq->lock, flags); + + migrate_nr_uninterruptible(rq); + calc_global_load_remove(rq); break; #endif } -- cgit v1.2.3-71-gd317 From 2069dd75c7d0f49355939e5586daf5a9ab216db7 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 15 Nov 2010 15:47:00 -0800 Subject: sched: Rewrite tg_shares_up) By tracking a per-cpu load-avg for each cfs_rq and folding it into a global task_group load on each tick we can rework tg_shares_up to be strictly per-cpu. This should improve cpu-cgroup performance for smp systems significantly. [ Paul: changed to use queueing cfs_rq + bug fixes ] Signed-off-by: Paul Turner Signed-off-by: Peter Zijlstra LKML-Reference: <20101115234937.580480400@google.com> Signed-off-by: Ingo Molnar --- include/linux/sched.h | 2 - kernel/sched.c | 173 ++++++++++++------------------------------------ kernel/sched_debug.c | 15 +++-- kernel/sched_fair.c | 164 +++++++++++++++++++++++++++++---------------- kernel/sched_features.h | 2 - kernel/sysctl.c | 19 ------ 6 files changed, 162 insertions(+), 213 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 29d953abb5ad..8abb8aa59664 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1885,8 +1885,6 @@ static inline void wake_up_idle_cpu(int cpu) { } extern unsigned int sysctl_sched_latency; extern unsigned int sysctl_sched_min_granularity; extern unsigned int sysctl_sched_wakeup_granularity; -extern unsigned int sysctl_sched_shares_ratelimit; -extern unsigned int sysctl_sched_shares_thresh; extern unsigned int sysctl_sched_child_runs_first; enum sched_tunable_scaling { diff --git a/kernel/sched.c b/kernel/sched.c index b0d5f1b24a39..e2f1a3024a99 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -253,6 +253,8 @@ struct task_group { /* runqueue "owned" by this group on each cpu */ struct cfs_rq **cfs_rq; unsigned long shares; + + atomic_t load_weight; #endif #ifdef CONFIG_RT_GROUP_SCHED @@ -359,15 +361,11 @@ struct cfs_rq { */ unsigned long h_load; - /* - * this cpu's part of tg->shares - */ - unsigned long shares; + u64 load_avg; + u64 load_period; + u64 load_stamp; - /* - * load.weight at the time we set shares - */ - unsigned long rq_weight; + unsigned long load_contribution; #endif #endif }; @@ -806,20 +804,6 @@ late_initcall(sched_init_debug); */ const_debug unsigned int sysctl_sched_nr_migrate = 32; -/* - * ratelimit for updating the group shares. - * default: 0.25ms - */ -unsigned int sysctl_sched_shares_ratelimit = 250000; -unsigned int normalized_sysctl_sched_shares_ratelimit = 250000; - -/* - * Inject some fuzzyness into changing the per-cpu group shares - * this avoids remote rq-locks at the expense of fairness. - * default: 4 - */ -unsigned int sysctl_sched_shares_thresh = 4; - /* * period over which we average the RT time consumption, measured * in ms. @@ -1369,6 +1353,12 @@ static inline void update_load_sub(struct load_weight *lw, unsigned long dec) lw->inv_weight = 0; } +static inline void update_load_set(struct load_weight *lw, unsigned long w) +{ + lw->weight = w; + lw->inv_weight = 0; +} + /* * To aid in avoiding the subversion of "niceness" due to uneven distribution * of tasks with abnormal "nice" values across CPUs the contribution that @@ -1557,97 +1547,44 @@ static unsigned long cpu_avg_load_per_task(int cpu) #ifdef CONFIG_FAIR_GROUP_SCHED -static __read_mostly unsigned long __percpu *update_shares_data; - -static void __set_se_shares(struct sched_entity *se, unsigned long shares); - -/* - * Calculate and set the cpu's group shares. - */ -static void update_group_shares_cpu(struct task_group *tg, int cpu, - unsigned long sd_shares, - unsigned long sd_rq_weight, - unsigned long *usd_rq_weight) -{ - unsigned long shares, rq_weight; - int boost = 0; - - rq_weight = usd_rq_weight[cpu]; - if (!rq_weight) { - boost = 1; - rq_weight = NICE_0_LOAD; - } - - /* - * \Sum_j shares_j * rq_weight_i - * shares_i = ----------------------------- - * \Sum_j rq_weight_j - */ - shares = (sd_shares * rq_weight) / sd_rq_weight; - shares = clamp_t(unsigned long, shares, MIN_SHARES, MAX_SHARES); - - if (abs(shares - tg->se[cpu]->load.weight) > - sysctl_sched_shares_thresh) { - struct rq *rq = cpu_rq(cpu); - unsigned long flags; - - raw_spin_lock_irqsave(&rq->lock, flags); - tg->cfs_rq[cpu]->rq_weight = boost ? 0 : rq_weight; - tg->cfs_rq[cpu]->shares = boost ? 0 : shares; - __set_se_shares(tg->se[cpu], shares); - raw_spin_unlock_irqrestore(&rq->lock, flags); - } -} +static void update_cfs_load(struct cfs_rq *cfs_rq); +static void update_cfs_shares(struct cfs_rq *cfs_rq); /* - * Re-compute the task group their per cpu shares over the given domain. - * This needs to be done in a bottom-up fashion because the rq weight of a - * parent group depends on the shares of its child groups. + * update tg->load_weight by folding this cpu's load_avg */ static int tg_shares_up(struct task_group *tg, void *data) { - unsigned long weight, rq_weight = 0, sum_weight = 0, shares = 0; - unsigned long *usd_rq_weight; - struct sched_domain *sd = data; + long load_avg; + struct cfs_rq *cfs_rq; unsigned long flags; - int i; + int cpu = (long)data; + struct rq *rq; - if (!tg->se[0]) + if (!tg->se[cpu]) return 0; - local_irq_save(flags); - usd_rq_weight = per_cpu_ptr(update_shares_data, smp_processor_id()); - - for_each_cpu(i, sched_domain_span(sd)) { - weight = tg->cfs_rq[i]->load.weight; - usd_rq_weight[i] = weight; - - rq_weight += weight; - /* - * If there are currently no tasks on the cpu pretend there - * is one of average load so that when a new task gets to - * run here it will not get delayed by group starvation. - */ - if (!weight) - weight = NICE_0_LOAD; + rq = cpu_rq(cpu); + cfs_rq = tg->cfs_rq[cpu]; - sum_weight += weight; - shares += tg->cfs_rq[i]->shares; - } + raw_spin_lock_irqsave(&rq->lock, flags); - if (!rq_weight) - rq_weight = sum_weight; + update_rq_clock(rq); + update_cfs_load(cfs_rq); - if ((!shares && rq_weight) || shares > tg->shares) - shares = tg->shares; + load_avg = div64_u64(cfs_rq->load_avg, cfs_rq->load_period+1); + load_avg -= cfs_rq->load_contribution; - if (!sd->parent || !(sd->parent->flags & SD_LOAD_BALANCE)) - shares = tg->shares; + atomic_add(load_avg, &tg->load_weight); + cfs_rq->load_contribution += load_avg; - for_each_cpu(i, sched_domain_span(sd)) - update_group_shares_cpu(tg, i, shares, rq_weight, usd_rq_weight); + /* + * We need to update shares after updating tg->load_weight in + * order to adjust the weight of groups with long running tasks. + */ + update_cfs_shares(cfs_rq); - local_irq_restore(flags); + raw_spin_unlock_irqrestore(&rq->lock, flags); return 0; } @@ -1666,7 +1603,7 @@ static int tg_load_down(struct task_group *tg, void *data) load = cpu_rq(cpu)->load.weight; } else { load = tg->parent->cfs_rq[cpu]->h_load; - load *= tg->cfs_rq[cpu]->shares; + load *= tg->se[cpu]->load.weight; load /= tg->parent->cfs_rq[cpu]->load.weight + 1; } @@ -1675,21 +1612,16 @@ static int tg_load_down(struct task_group *tg, void *data) return 0; } -static void update_shares(struct sched_domain *sd) +static void update_shares(long cpu) { - s64 elapsed; - u64 now; - if (root_task_group_empty()) return; - now = local_clock(); - elapsed = now - sd->last_update; + /* + * XXX: replace with an on-demand list + */ - if (elapsed >= (s64)(u64)sysctl_sched_shares_ratelimit) { - sd->last_update = now; - walk_tg_tree(tg_nop, tg_shares_up, sd); - } + walk_tg_tree(tg_nop, tg_shares_up, (void *)cpu); } static void update_h_load(long cpu) @@ -1699,7 +1631,7 @@ static void update_h_load(long cpu) #else -static inline void update_shares(struct sched_domain *sd) +static inline void update_shares(int cpu) { } @@ -1824,15 +1756,6 @@ static void double_rq_unlock(struct rq *rq1, struct rq *rq2) #endif -#ifdef CONFIG_FAIR_GROUP_SCHED -static void cfs_rq_set_shares(struct cfs_rq *cfs_rq, unsigned long shares) -{ -#ifdef CONFIG_SMP - cfs_rq->shares = shares; -#endif -} -#endif - static void calc_load_account_idle(struct rq *this_rq); static void update_sysctl(void); static int get_update_sysctl_factor(void); @@ -5551,7 +5474,6 @@ static void update_sysctl(void) SET_SYSCTL(sched_min_granularity); SET_SYSCTL(sched_latency); SET_SYSCTL(sched_wakeup_granularity); - SET_SYSCTL(sched_shares_ratelimit); #undef SET_SYSCTL } @@ -7787,8 +7709,7 @@ static void init_tg_cfs_entry(struct task_group *tg, struct cfs_rq *cfs_rq, se->cfs_rq = parent->my_q; se->my_q = cfs_rq; - se->load.weight = tg->shares; - se->load.inv_weight = 0; + update_load_set(&se->load, tg->shares); se->parent = parent; } #endif @@ -7881,10 +7802,6 @@ void __init sched_init(void) #endif /* CONFIG_CGROUP_SCHED */ -#if defined CONFIG_FAIR_GROUP_SCHED && defined CONFIG_SMP - update_shares_data = __alloc_percpu(nr_cpu_ids * sizeof(unsigned long), - __alignof__(unsigned long)); -#endif for_each_possible_cpu(i) { struct rq *rq; @@ -8452,8 +8369,7 @@ static void __set_se_shares(struct sched_entity *se, unsigned long shares) if (on_rq) dequeue_entity(cfs_rq, se, 0); - se->load.weight = shares; - se->load.inv_weight = 0; + update_load_set(&se->load, shares); if (on_rq) enqueue_entity(cfs_rq, se, 0); @@ -8510,7 +8426,6 @@ int sched_group_set_shares(struct task_group *tg, unsigned long shares) /* * force a rebalance */ - cfs_rq_set_shares(tg->cfs_rq[i], 0); set_se_shares(tg->se[i], shares); } diff --git a/kernel/sched_debug.c b/kernel/sched_debug.c index 2e1b0d17dd9b..e6590e7312e8 100644 --- a/kernel/sched_debug.c +++ b/kernel/sched_debug.c @@ -202,15 +202,22 @@ void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq) spread0 = min_vruntime - rq0_min_vruntime; SEQ_printf(m, " .%-30s: %Ld.%06ld\n", "spread0", SPLIT_NS(spread0)); - SEQ_printf(m, " .%-30s: %ld\n", "nr_running", cfs_rq->nr_running); - SEQ_printf(m, " .%-30s: %ld\n", "load", cfs_rq->load.weight); - SEQ_printf(m, " .%-30s: %d\n", "nr_spread_over", cfs_rq->nr_spread_over); + SEQ_printf(m, " .%-30s: %ld\n", "nr_running", cfs_rq->nr_running); + SEQ_printf(m, " .%-30s: %ld\n", "load", cfs_rq->load.weight); #ifdef CONFIG_FAIR_GROUP_SCHED #ifdef CONFIG_SMP - SEQ_printf(m, " .%-30s: %lu\n", "shares", cfs_rq->shares); + SEQ_printf(m, " .%-30s: %Ld.%06ld\n", "load_avg", + SPLIT_NS(cfs_rq->load_avg)); + SEQ_printf(m, " .%-30s: %Ld.%06ld\n", "load_period", + SPLIT_NS(cfs_rq->load_period)); + SEQ_printf(m, " .%-30s: %ld\n", "load_contrib", + cfs_rq->load_contribution); + SEQ_printf(m, " .%-30s: %d\n", "load_tg", + atomic_read(&tg->load_weight)); #endif + print_cfs_group_stats(m, cpu, cfs_rq->tg); #endif } diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index f4f6a8326dd0..d86544b4151c 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c @@ -417,7 +417,6 @@ int sched_proc_update_handler(struct ctl_table *table, int write, WRT_SYSCTL(sched_min_granularity); WRT_SYSCTL(sched_latency); WRT_SYSCTL(sched_wakeup_granularity); - WRT_SYSCTL(sched_shares_ratelimit); #undef WRT_SYSCTL return 0; @@ -633,7 +632,6 @@ account_entity_enqueue(struct cfs_rq *cfs_rq, struct sched_entity *se) list_add(&se->group_node, &cfs_rq->tasks); } cfs_rq->nr_running++; - se->on_rq = 1; } static void @@ -647,9 +645,89 @@ account_entity_dequeue(struct cfs_rq *cfs_rq, struct sched_entity *se) list_del_init(&se->group_node); } cfs_rq->nr_running--; - se->on_rq = 0; } +#if defined CONFIG_SMP && defined CONFIG_FAIR_GROUP_SCHED +static void update_cfs_load(struct cfs_rq *cfs_rq) +{ + u64 period = sched_avg_period(); + u64 now, delta; + + if (!cfs_rq) + return; + + now = rq_of(cfs_rq)->clock; + delta = now - cfs_rq->load_stamp; + + cfs_rq->load_stamp = now; + cfs_rq->load_period += delta; + cfs_rq->load_avg += delta * cfs_rq->load.weight; + + while (cfs_rq->load_period > period) { + /* + * Inline assembly required to prevent the compiler + * optimising this loop into a divmod call. + * See __iter_div_u64_rem() for another example of this. + */ + asm("" : "+rm" (cfs_rq->load_period)); + cfs_rq->load_period /= 2; + cfs_rq->load_avg /= 2; + } +} + +static void reweight_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, + unsigned long weight) +{ + if (se->on_rq) + account_entity_dequeue(cfs_rq, se); + + update_load_set(&se->load, weight); + + if (se->on_rq) + account_entity_enqueue(cfs_rq, se); +} + +static void update_cfs_shares(struct cfs_rq *cfs_rq) +{ + struct task_group *tg; + struct sched_entity *se; + long load_weight, load, shares; + + if (!cfs_rq) + return; + + tg = cfs_rq->tg; + se = tg->se[cpu_of(rq_of(cfs_rq))]; + if (!se) + return; + + load = cfs_rq->load.weight; + + load_weight = atomic_read(&tg->load_weight); + load_weight -= cfs_rq->load_contribution; + load_weight += load; + + shares = (tg->shares * load); + if (load_weight) + shares /= load_weight; + + if (shares < MIN_SHARES) + shares = MIN_SHARES; + if (shares > tg->shares) + shares = tg->shares; + + reweight_entity(cfs_rq_of(se), se, shares); +} +#else /* CONFIG_FAIR_GROUP_SCHED */ +static inline void update_cfs_load(struct cfs_rq *cfs_rq) +{ +} + +static inline void update_cfs_shares(struct cfs_rq *cfs_rq) +{ +} +#endif /* CONFIG_FAIR_GROUP_SCHED */ + static void enqueue_sleeper(struct cfs_rq *cfs_rq, struct sched_entity *se) { #ifdef CONFIG_SCHEDSTATS @@ -771,7 +849,9 @@ enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags) * Update run-time statistics of the 'current'. */ update_curr(cfs_rq); + update_cfs_load(cfs_rq); account_entity_enqueue(cfs_rq, se); + update_cfs_shares(cfs_rq); if (flags & ENQUEUE_WAKEUP) { place_entity(cfs_rq, se, 0); @@ -782,6 +862,7 @@ enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags) check_spread(cfs_rq, se); if (se != cfs_rq->curr) __enqueue_entity(cfs_rq, se); + se->on_rq = 1; } static void __clear_buddies(struct cfs_rq *cfs_rq, struct sched_entity *se) @@ -825,8 +906,11 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags) if (se != cfs_rq->curr) __dequeue_entity(cfs_rq, se); + se->on_rq = 0; + update_cfs_load(cfs_rq); account_entity_dequeue(cfs_rq, se); update_min_vruntime(cfs_rq); + update_cfs_shares(cfs_rq); /* * Normalize the entity after updating the min_vruntime because the @@ -1055,6 +1139,13 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags) flags = ENQUEUE_WAKEUP; } + for_each_sched_entity(se) { + struct cfs_rq *cfs_rq = cfs_rq_of(se); + + update_cfs_load(cfs_rq); + update_cfs_shares(cfs_rq); + } + hrtick_update(rq); } @@ -1071,12 +1162,20 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags) for_each_sched_entity(se) { cfs_rq = cfs_rq_of(se); dequeue_entity(cfs_rq, se, flags); + /* Don't dequeue parent if it has other entities besides us */ if (cfs_rq->load.weight) break; flags |= DEQUEUE_SLEEP; } + for_each_sched_entity(se) { + struct cfs_rq *cfs_rq = cfs_rq_of(se); + + update_cfs_load(cfs_rq); + update_cfs_shares(cfs_rq); + } + hrtick_update(rq); } @@ -1143,51 +1242,20 @@ static void task_waking_fair(struct rq *rq, struct task_struct *p) * Adding load to a group doesn't make a group heavier, but can cause movement * of group shares between cpus. Assuming the shares were perfectly aligned one * can calculate the shift in shares. - * - * The problem is that perfectly aligning the shares is rather expensive, hence - * we try to avoid doing that too often - see update_shares(), which ratelimits - * this change. - * - * We compensate this by not only taking the current delta into account, but - * also considering the delta between when the shares were last adjusted and - * now. - * - * We still saw a performance dip, some tracing learned us that between - * cgroup:/ and cgroup:/foo balancing the number of affine wakeups increased - * significantly. Therefore try to bias the error in direction of failing - * the affine wakeup. - * */ -static long effective_load(struct task_group *tg, int cpu, - long wl, long wg) +static long effective_load(struct task_group *tg, int cpu, long wl, long wg) { struct sched_entity *se = tg->se[cpu]; if (!tg->parent) return wl; - /* - * By not taking the decrease of shares on the other cpu into - * account our error leans towards reducing the affine wakeups. - */ - if (!wl && sched_feat(ASYM_EFF_LOAD)) - return wl; - for_each_sched_entity(se) { long S, rw, s, a, b; - long more_w; - - /* - * Instead of using this increment, also add the difference - * between when the shares were last updated and now. - */ - more_w = se->my_q->load.weight - se->my_q->rq_weight; - wl += more_w; - wg += more_w; S = se->my_q->tg->shares; - s = se->my_q->shares; - rw = se->my_q->rq_weight; + s = se->load.weight; + rw = se->my_q->load.weight; a = S*(rw + wl); b = S*rw + s*wg; @@ -1508,23 +1576,6 @@ select_task_rq_fair(struct rq *rq, struct task_struct *p, int sd_flag, int wake_ sd = tmp; } -#ifdef CONFIG_FAIR_GROUP_SCHED - if (sched_feat(LB_SHARES_UPDATE)) { - /* - * Pick the largest domain to update shares over - */ - tmp = sd; - if (affine_sd && (!tmp || affine_sd->span_weight > sd->span_weight)) - tmp = affine_sd; - - if (tmp) { - raw_spin_unlock(&rq->lock); - update_shares(tmp); - raw_spin_lock(&rq->lock); - } - } -#endif - if (affine_sd) { if (cpu == prev_cpu || wake_affine(affine_sd, p, sync)) return select_idle_sibling(p, cpu); @@ -3014,7 +3065,6 @@ static int load_balance(int this_cpu, struct rq *this_rq, schedstat_inc(sd, lb_count[idle]); redo: - update_shares(sd); group = find_busiest_group(sd, this_cpu, &imbalance, idle, &sd_idle, cpus, balance); @@ -3156,8 +3206,6 @@ out_one_pinned: else ld_moved = 0; out: - if (ld_moved) - update_shares(sd); return ld_moved; } @@ -3549,6 +3597,8 @@ static void rebalance_domains(int cpu, enum cpu_idle_type idle) int update_next_balance = 0; int need_serialize; + update_shares(cpu); + for_each_domain(cpu, sd) { if (!(sd->flags & SD_LOAD_BALANCE)) continue; diff --git a/kernel/sched_features.h b/kernel/sched_features.h index 185f920ec1a2..68e69acc29b9 100644 --- a/kernel/sched_features.h +++ b/kernel/sched_features.h @@ -52,8 +52,6 @@ SCHED_FEAT(ARCH_POWER, 0) SCHED_FEAT(HRTICK, 0) SCHED_FEAT(DOUBLE_TICK, 0) SCHED_FEAT(LB_BIAS, 1) -SCHED_FEAT(LB_SHARES_UPDATE, 1) -SCHED_FEAT(ASYM_EFF_LOAD, 1) /* * Spin-wait on mutex acquisition when the mutex owner is running on diff --git a/kernel/sysctl.c b/kernel/sysctl.c index b65bf634035e..3132b25193db 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -259,8 +259,6 @@ static int min_wakeup_granularity_ns; /* 0 usecs */ static int max_wakeup_granularity_ns = NSEC_PER_SEC; /* 1 second */ static int min_sched_tunable_scaling = SCHED_TUNABLESCALING_NONE; static int max_sched_tunable_scaling = SCHED_TUNABLESCALING_END-1; -static int min_sched_shares_ratelimit = 100000; /* 100 usec */ -static int max_sched_shares_ratelimit = NSEC_PER_SEC; /* 1 second */ #endif #ifdef CONFIG_COMPACTION @@ -304,15 +302,6 @@ static struct ctl_table kern_table[] = { .extra1 = &min_wakeup_granularity_ns, .extra2 = &max_wakeup_granularity_ns, }, - { - .procname = "sched_shares_ratelimit", - .data = &sysctl_sched_shares_ratelimit, - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = sched_proc_update_handler, - .extra1 = &min_sched_shares_ratelimit, - .extra2 = &max_sched_shares_ratelimit, - }, { .procname = "sched_tunable_scaling", .data = &sysctl_sched_tunable_scaling, @@ -322,14 +311,6 @@ static struct ctl_table kern_table[] = { .extra1 = &min_sched_tunable_scaling, .extra2 = &max_sched_tunable_scaling, }, - { - .procname = "sched_shares_thresh", - .data = &sysctl_sched_shares_thresh, - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = proc_dointvec_minmax, - .extra1 = &zero, - }, { .procname = "sched_migration_cost", .data = &sysctl_sched_migration_cost, -- cgit v1.2.3-71-gd317 From a7a4f8a752ec734b2eab904fc863d5dc873de338 Mon Sep 17 00:00:00 2001 From: Paul Turner Date: Mon, 15 Nov 2010 15:47:06 -0800 Subject: sched: Add sysctl_sched_shares_window Introduce a new sysctl for the shares window and disambiguate it from sched_time_avg. A 10ms window appears to be a good compromise between accuracy and performance. Signed-off-by: Paul Turner Signed-off-by: Peter Zijlstra LKML-Reference: <20101115234938.112173964@google.com> Signed-off-by: Ingo Molnar --- include/linux/sched.h | 1 + kernel/sched_fair.c | 9 ++++++++- kernel/sysctl.c | 7 +++++++ 3 files changed, 16 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 8abb8aa59664..840f1277492f 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1900,6 +1900,7 @@ extern unsigned int sysctl_sched_migration_cost; extern unsigned int sysctl_sched_nr_migrate; extern unsigned int sysctl_sched_time_avg; extern unsigned int sysctl_timer_migration; +extern unsigned int sysctl_sched_shares_window; int sched_proc_update_handler(struct ctl_table *table, int write, void __user *buffer, size_t *length, diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index b320753aa6c9..6c84439ce987 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c @@ -89,6 +89,13 @@ unsigned int normalized_sysctl_sched_wakeup_granularity = 1000000UL; const_debug unsigned int sysctl_sched_migration_cost = 500000UL; +/* + * The exponential sliding window over which load is averaged for shares + * distribution. + * (default: 10msec) + */ +unsigned int __read_mostly sysctl_sched_shares_window = 10000000UL; + static const struct sched_class fair_sched_class; /************************************************************** @@ -688,7 +695,7 @@ account_entity_dequeue(struct cfs_rq *cfs_rq, struct sched_entity *se) #if defined CONFIG_SMP && defined CONFIG_FAIR_GROUP_SCHED static void update_cfs_load(struct cfs_rq *cfs_rq) { - u64 period = sched_avg_period(); + u64 period = sysctl_sched_shares_window; u64 now, delta; unsigned long load = cfs_rq->load.weight; diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 3132b25193db..9b520d74f052 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -332,6 +332,13 @@ static struct ctl_table kern_table[] = { .mode = 0644, .proc_handler = proc_dointvec, }, + { + .procname = "sched_shares_window", + .data = &sysctl_sched_shares_window, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, { .procname = "timer_migration", .data = &sysctl_timer_migration, -- cgit v1.2.3-71-gd317 From 84e1c6bb38eb318e456558b610396d9f1afaabf0 Mon Sep 17 00:00:00 2001 From: matthieu castet Date: Tue, 16 Nov 2010 22:35:16 +0100 Subject: x86: Add RO/NX protection for loadable kernel modules This patch is a logical extension of the protection provided by CONFIG_DEBUG_RODATA to LKMs. The protection is provided by splitting module_core and module_init into three logical parts each and setting appropriate page access permissions for each individual section: 1. Code: RO+X 2. RO data: RO+NX 3. RW data: RW+NX In order to achieve proper protection, layout_sections() have been modified to align each of the three parts mentioned above onto page boundary. Next, the corresponding page access permissions are set right before successful exit from load_module(). Further, free_module() and sys_init_module have been modified to set module_core and module_init as RW+NX right before calling module_free(). By default, the original section layout and access flags are preserved. When compiled with CONFIG_DEBUG_SET_MODULE_RONX=y, the patch will page-align each group of sections to ensure that each page contains only one type of content and will enforce RO/NX for each group of pages. -v1: Initial proof-of-concept patch. -v2: The patch have been re-written to reduce the number of #ifdefs and to make it architecture-agnostic. Code formatting has also been corrected. -v3: Opportunistic RO/NX protection is now unconditional. Section page-alignment is enabled when CONFIG_DEBUG_RODATA=y. -v4: Removed most macros and improved coding style. -v5: Changed page-alignment and RO/NX section size calculation -v6: Fixed comments. Restricted RO/NX enforcement to x86 only -v7: Introduced CONFIG_DEBUG_SET_MODULE_RONX, added calls to set_all_modules_text_rw() and set_all_modules_text_ro() in ftrace -v8: updated for compatibility with linux 2.6.33-rc5 -v9: coding style fixes -v10: more coding style fixes -v11: minor adjustments for -tip -v12: minor adjustments for v2.6.35-rc2-tip -v13: minor adjustments for v2.6.37-rc1-tip Signed-off-by: Siarhei Liakh Signed-off-by: Xuxian Jiang Acked-by: Arjan van de Ven Reviewed-by: James Morris Signed-off-by: H. Peter Anvin Cc: Andi Kleen Cc: Rusty Russell Cc: Stephen Rothwell Cc: Dave Jones Cc: Kees Cook Cc: Linus Torvalds LKML-Reference: <4CE2F914.9070106@free.fr> [ minor cleanliness edits, -v14: build failure fix ] Signed-off-by: Ingo Molnar --- arch/x86/Kconfig.debug | 11 +++ arch/x86/kernel/ftrace.c | 3 + include/linux/module.h | 11 ++- kernel/module.c | 171 ++++++++++++++++++++++++++++++++++++++++++++++- 4 files changed, 193 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug index b59ee765414e..45143bbcfe5e 100644 --- a/arch/x86/Kconfig.debug +++ b/arch/x86/Kconfig.debug @@ -117,6 +117,17 @@ config DEBUG_RODATA_TEST feature as well as for the change_page_attr() infrastructure. If in doubt, say "N" +config DEBUG_SET_MODULE_RONX + bool "Set loadable kernel module data as NX and text as RO" + depends on MODULES + ---help--- + This option helps catch unintended modifications to loadable + kernel module's text and read-only data. It also prevents execution + of module data. Such protection may interfere with run-time code + patching and dynamic kernel tracing - and they might also protect + against certain classes of kernel exploits. + If in doubt, say "N". + config DEBUG_NX_TEST tristate "Testcase for the NX non-executable stack feature" depends on DEBUG_KERNEL && m diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index 3afb33f14d2d..298448656b60 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c @@ -19,6 +19,7 @@ #include #include #include +#include #include @@ -49,6 +50,7 @@ static DEFINE_PER_CPU(int, save_modifying_code); int ftrace_arch_code_modify_prepare(void) { set_kernel_text_rw(); + set_all_modules_text_rw(); modifying_code = 1; return 0; } @@ -56,6 +58,7 @@ int ftrace_arch_code_modify_prepare(void) int ftrace_arch_code_modify_post_process(void) { modifying_code = 0; + set_all_modules_text_ro(); set_kernel_text_ro(); return 0; } diff --git a/include/linux/module.h b/include/linux/module.h index b29e7458b966..ddaa689d71bd 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -308,6 +308,9 @@ struct module /* The size of the executable code in each section. */ unsigned int init_text_size, core_text_size; + /* Size of RO sections of the module (text+rodata) */ + unsigned int init_ro_size, core_ro_size; + /* Arch-specific module values */ struct mod_arch_specific arch; @@ -672,7 +675,6 @@ static inline int module_get_iter_tracepoints(struct tracepoint_iter *iter) { return 0; } - #endif /* CONFIG_MODULES */ #ifdef CONFIG_SYSFS @@ -687,6 +689,13 @@ extern int module_sysfs_initialized; #define __MODULE_STRING(x) __stringify(x) +#ifdef CONFIG_DEBUG_SET_MODULE_RONX +extern void set_all_modules_text_rw(void); +extern void set_all_modules_text_ro(void); +#else +static inline void set_all_modules_text_rw(void) { } +static inline void set_all_modules_text_ro(void) { } +#endif #ifdef CONFIG_GENERIC_BUG void module_bug_finalize(const Elf_Ehdr *, const Elf_Shdr *, diff --git a/kernel/module.c b/kernel/module.c index 437a74a7524a..ba421e6b4ada 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -56,6 +56,7 @@ #include #include #include +#include #define CREATE_TRACE_POINTS #include @@ -70,6 +71,26 @@ #define ARCH_SHF_SMALL 0 #endif +/* + * Modules' sections will be aligned on page boundaries + * to ensure complete separation of code and data, but + * only when CONFIG_DEBUG_SET_MODULE_RONX=y + */ +#ifdef CONFIG_DEBUG_SET_MODULE_RONX +# define debug_align(X) ALIGN(X, PAGE_SIZE) +#else +# define debug_align(X) (X) +#endif + +/* + * Given BASE and SIZE this macro calculates the number of pages the + * memory regions occupies + */ +#define MOD_NUMBER_OF_PAGES(BASE, SIZE) (((SIZE) > 0) ? \ + (PFN_DOWN((unsigned long)(BASE) + (SIZE) - 1) - \ + PFN_DOWN((unsigned long)BASE) + 1) \ + : (0UL)) + /* If this is set, the section belongs in the init part of the module */ #define INIT_OFFSET_MASK (1UL << (BITS_PER_LONG-1)) @@ -1542,6 +1563,115 @@ static int __unlink_module(void *_mod) return 0; } +#ifdef CONFIG_DEBUG_SET_MODULE_RONX +/* + * LKM RO/NX protection: protect module's text/ro-data + * from modification and any data from execution. + */ +void set_page_attributes(void *start, void *end, int (*set)(unsigned long start, int num_pages)) +{ + unsigned long begin_pfn = PFN_DOWN((unsigned long)start); + unsigned long end_pfn = PFN_DOWN((unsigned long)end); + + if (end_pfn > begin_pfn) + set(begin_pfn << PAGE_SHIFT, end_pfn - begin_pfn); +} + +static void set_section_ro_nx(void *base, + unsigned long text_size, + unsigned long ro_size, + unsigned long total_size) +{ + /* begin and end PFNs of the current subsection */ + unsigned long begin_pfn; + unsigned long end_pfn; + + /* + * Set RO for module text and RO-data: + * - Always protect first page. + * - Do not protect last partial page. + */ + if (ro_size > 0) + set_page_attributes(base, base + ro_size, set_memory_ro); + + /* + * Set NX permissions for module data: + * - Do not protect first partial page. + * - Always protect last page. + */ + if (total_size > text_size) { + begin_pfn = PFN_UP((unsigned long)base + text_size); + end_pfn = PFN_UP((unsigned long)base + total_size); + if (end_pfn > begin_pfn) + set_memory_nx(begin_pfn << PAGE_SHIFT, end_pfn - begin_pfn); + } +} + +/* Setting memory back to RW+NX before releasing it */ +void unset_section_ro_nx(struct module *mod, void *module_region) +{ + unsigned long total_pages; + + if (mod->module_core == module_region) { + /* Set core as NX+RW */ + total_pages = MOD_NUMBER_OF_PAGES(mod->module_core, mod->core_size); + set_memory_nx((unsigned long)mod->module_core, total_pages); + set_memory_rw((unsigned long)mod->module_core, total_pages); + + } else if (mod->module_init == module_region) { + /* Set init as NX+RW */ + total_pages = MOD_NUMBER_OF_PAGES(mod->module_init, mod->init_size); + set_memory_nx((unsigned long)mod->module_init, total_pages); + set_memory_rw((unsigned long)mod->module_init, total_pages); + } +} + +/* Iterate through all modules and set each module's text as RW */ +void set_all_modules_text_rw() +{ + struct module *mod; + + mutex_lock(&module_mutex); + list_for_each_entry_rcu(mod, &modules, list) { + if ((mod->module_core) && (mod->core_text_size)) { + set_page_attributes(mod->module_core, + mod->module_core + mod->core_text_size, + set_memory_rw); + } + if ((mod->module_init) && (mod->init_text_size)) { + set_page_attributes(mod->module_init, + mod->module_init + mod->init_text_size, + set_memory_rw); + } + } + mutex_unlock(&module_mutex); +} + +/* Iterate through all modules and set each module's text as RO */ +void set_all_modules_text_ro() +{ + struct module *mod; + + mutex_lock(&module_mutex); + list_for_each_entry_rcu(mod, &modules, list) { + if ((mod->module_core) && (mod->core_text_size)) { + set_page_attributes(mod->module_core, + mod->module_core + mod->core_text_size, + set_memory_ro); + } + if ((mod->module_init) && (mod->init_text_size)) { + set_page_attributes(mod->module_init, + mod->module_init + mod->init_text_size, + set_memory_ro); + } + } + mutex_unlock(&module_mutex); +} +#else +static inline void set_section_ro_nx(void *base, unsigned long text_size, unsigned long ro_size, unsigned long total_size) { } +static inline void unset_section_ro_nx(struct module *mod, void *module_region) { } +#endif + /* Free a module, remove from lists, etc. */ static void free_module(struct module *mod) { @@ -1566,6 +1696,7 @@ static void free_module(struct module *mod) destroy_params(mod->kp, mod->num_kp); /* This may be NULL, but that's OK */ + unset_section_ro_nx(mod, mod->module_init); module_free(mod, mod->module_init); kfree(mod->args); percpu_modfree(mod); @@ -1574,6 +1705,7 @@ static void free_module(struct module *mod) lockdep_free_key_range(mod->module_core, mod->core_size); /* Finally, free the core (containing the module structure) */ + unset_section_ro_nx(mod, mod->module_core); module_free(mod, mod->module_core); #ifdef CONFIG_MPU @@ -1777,8 +1909,19 @@ static void layout_sections(struct module *mod, struct load_info *info) s->sh_entsize = get_offset(mod, &mod->core_size, s, i); DEBUGP("\t%s\n", name); } - if (m == 0) + switch (m) { + case 0: /* executable */ + mod->core_size = debug_align(mod->core_size); mod->core_text_size = mod->core_size; + break; + case 1: /* RO: text and ro-data */ + mod->core_size = debug_align(mod->core_size); + mod->core_ro_size = mod->core_size; + break; + case 3: /* whole core */ + mod->core_size = debug_align(mod->core_size); + break; + } } DEBUGP("Init section allocation order:\n"); @@ -1796,8 +1939,19 @@ static void layout_sections(struct module *mod, struct load_info *info) | INIT_OFFSET_MASK); DEBUGP("\t%s\n", sname); } - if (m == 0) + switch (m) { + case 0: /* executable */ + mod->init_size = debug_align(mod->init_size); mod->init_text_size = mod->init_size; + break; + case 1: /* RO: text and ro-data */ + mod->init_size = debug_align(mod->init_size); + mod->init_ro_size = mod->init_size; + break; + case 3: /* whole init */ + mod->init_size = debug_align(mod->init_size); + break; + } } } @@ -2650,6 +2804,18 @@ static struct module *load_module(void __user *umod, kfree(info.strmap); free_copy(&info); + /* Set RO and NX regions for core */ + set_section_ro_nx(mod->module_core, + mod->core_text_size, + mod->core_ro_size, + mod->core_size); + + /* Set RO and NX regions for init */ + set_section_ro_nx(mod->module_init, + mod->init_text_size, + mod->init_ro_size, + mod->init_size); + /* Done! */ trace_module_load(mod); return mod; @@ -2753,6 +2919,7 @@ SYSCALL_DEFINE3(init_module, void __user *, umod, mod->symtab = mod->core_symtab; mod->strtab = mod->core_strtab; #endif + unset_section_ro_nx(mod, mod->module_init); module_free(mod, mod->module_init); mod->module_init = NULL; mod->init_size = 0; -- cgit v1.2.3-71-gd317 From 9c0729dc8062bed96189bd14ac6d4920f3958743 Mon Sep 17 00:00:00 2001 From: Soeren Sandmann Pedersen Date: Fri, 5 Nov 2010 05:59:39 -0400 Subject: x86: Eliminate bp argument from the stack tracing routines The various stack tracing routines take a 'bp' argument in which the caller is supposed to provide the base pointer to use, or 0 if doesn't have one. Since bp is garbage whenever CONFIG_FRAME_POINTER is not defined, this means all callers in principle should either always pass 0, or be conditional on CONFIG_FRAME_POINTER. However, there are only really three use cases for stack tracing: (a) Trace the current task, including IRQ stack if any (b) Trace the current task, but skip IRQ stack (c) Trace some other task In all cases, if CONFIG_FRAME_POINTER is not defined, bp should just be 0. If it _is_ defined, then - in case (a) bp should be gotten directly from the CPU's register, so the caller should pass NULL for regs, - in case (b) the caller should should pass the IRQ registers to dump_trace(), - in case (c) bp should be gotten from the top of the task's stack, so the caller should pass NULL for regs. Hence, the bp argument is not necessary because the combination of task and regs is sufficient to determine an appropriate value for bp. This patch introduces a new inline function stack_frame(task, regs) that computes the desired bp. This function is then called from the two versions of dump_stack(). Signed-off-by: Soren Sandmann Acked-by: Steven Rostedt Cc: Thomas Gleixner Cc: Ingo Molnar Cc: H. Peter Anvin Cc: Peter Zijlstra Cc: Arjan van de Ven , Cc: Frederic Weisbecker , Cc: Arnaldo Carvalho de Melo , LKML-Reference: > Signed-off-by: Frederic Weisbecker --- arch/x86/include/asm/kdebug.h | 2 +- arch/x86/include/asm/stacktrace.h | 33 ++++++++++++++++++++++++++++++--- arch/x86/kernel/cpu/perf_event.c | 2 +- arch/x86/kernel/dumpstack.c | 12 ++++++------ arch/x86/kernel/dumpstack_32.c | 25 +++++++------------------ arch/x86/kernel/dumpstack_64.c | 24 +++++++----------------- arch/x86/kernel/process.c | 3 +-- arch/x86/kernel/stacktrace.c | 8 ++++---- arch/x86/mm/kmemcheck/error.c | 2 +- arch/x86/oprofile/backtrace.c | 2 +- include/linux/stacktrace.h | 4 +++- 11 files changed, 62 insertions(+), 55 deletions(-) (limited to 'include/linux') diff --git a/arch/x86/include/asm/kdebug.h b/arch/x86/include/asm/kdebug.h index 5bdfca86581b..f23eb2528464 100644 --- a/arch/x86/include/asm/kdebug.h +++ b/arch/x86/include/asm/kdebug.h @@ -28,7 +28,7 @@ extern void die(const char *, struct pt_regs *,long); extern int __must_check __die(const char *, struct pt_regs *, long); extern void show_registers(struct pt_regs *regs); extern void show_trace(struct task_struct *t, struct pt_regs *regs, - unsigned long *sp, unsigned long bp); + unsigned long *sp); extern void __show_regs(struct pt_regs *regs, int all); extern void show_regs(struct pt_regs *regs); extern unsigned long oops_begin(void); diff --git a/arch/x86/include/asm/stacktrace.h b/arch/x86/include/asm/stacktrace.h index 2b16a2ad23dc..52b5c7ed3608 100644 --- a/arch/x86/include/asm/stacktrace.h +++ b/arch/x86/include/asm/stacktrace.h @@ -7,6 +7,7 @@ #define _ASM_X86_STACKTRACE_H #include +#include extern int kstack_depth_to_print; @@ -46,7 +47,7 @@ struct stacktrace_ops { }; void dump_trace(struct task_struct *tsk, struct pt_regs *regs, - unsigned long *stack, unsigned long bp, + unsigned long *stack, const struct stacktrace_ops *ops, void *data); #ifdef CONFIG_X86_32 @@ -57,13 +58,39 @@ void dump_trace(struct task_struct *tsk, struct pt_regs *regs, #define get_bp(bp) asm("movq %%rbp, %0" : "=r" (bp) :) #endif +#ifdef CONFIG_FRAME_POINTER +static inline unsigned long +stack_frame(struct task_struct *task, struct pt_regs *regs) +{ + unsigned long bp; + + if (regs) + return regs->bp; + + if (task == current) { + /* Grab bp right from our regs */ + get_bp(bp); + return bp; + } + + /* bp is the last reg pushed by switch_to */ + return *(unsigned long *)task->thread.sp; +} +#else +static inline unsigned long +stack_frame(struct task_struct *task, struct pt_regs *regs) +{ + return 0; +} +#endif + extern void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs, - unsigned long *stack, unsigned long bp, char *log_lvl); + unsigned long *stack, char *log_lvl); extern void show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, - unsigned long *sp, unsigned long bp, char *log_lvl); + unsigned long *sp, char *log_lvl); extern unsigned int code_bytes; diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index ed6310183efb..461a85dcaba4 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -1666,7 +1666,7 @@ perf_callchain_kernel(struct perf_callchain_entry *entry, struct pt_regs *regs) perf_callchain_store(entry, regs->ip); - dump_trace(NULL, regs, NULL, regs->bp, &backtrace_ops, entry); + dump_trace(NULL, regs, NULL, &backtrace_ops, entry); } #ifdef CONFIG_COMPAT diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c index 6e8752c1bd52..8474c998cbd4 100644 --- a/arch/x86/kernel/dumpstack.c +++ b/arch/x86/kernel/dumpstack.c @@ -175,21 +175,21 @@ static const struct stacktrace_ops print_trace_ops = { void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs, - unsigned long *stack, unsigned long bp, char *log_lvl) + unsigned long *stack, char *log_lvl) { printk("%sCall Trace:\n", log_lvl); - dump_trace(task, regs, stack, bp, &print_trace_ops, log_lvl); + dump_trace(task, regs, stack, &print_trace_ops, log_lvl); } void show_trace(struct task_struct *task, struct pt_regs *regs, - unsigned long *stack, unsigned long bp) + unsigned long *stack) { - show_trace_log_lvl(task, regs, stack, bp, ""); + show_trace_log_lvl(task, regs, stack, ""); } void show_stack(struct task_struct *task, unsigned long *sp) { - show_stack_log_lvl(task, NULL, sp, 0, ""); + show_stack_log_lvl(task, NULL, sp, ""); } /* @@ -210,7 +210,7 @@ void dump_stack(void) init_utsname()->release, (int)strcspn(init_utsname()->version, " "), init_utsname()->version); - show_trace(NULL, NULL, &stack, bp); + show_trace(NULL, NULL, &stack); } EXPORT_SYMBOL(dump_stack); diff --git a/arch/x86/kernel/dumpstack_32.c b/arch/x86/kernel/dumpstack_32.c index 1bc7f75a5bda..74cc1eda384b 100644 --- a/arch/x86/kernel/dumpstack_32.c +++ b/arch/x86/kernel/dumpstack_32.c @@ -17,11 +17,12 @@ #include -void dump_trace(struct task_struct *task, struct pt_regs *regs, - unsigned long *stack, unsigned long bp, +void dump_trace(struct task_struct *task, + struct pt_regs *regs, unsigned long *stack, const struct stacktrace_ops *ops, void *data) { int graph = 0; + unsigned long bp; if (!task) task = current; @@ -34,18 +35,7 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs, stack = (unsigned long *)task->thread.sp; } -#ifdef CONFIG_FRAME_POINTER - if (!bp) { - if (task == current) { - /* Grab bp right from our regs */ - get_bp(bp); - } else { - /* bp is the last reg pushed by switch_to */ - bp = *(unsigned long *) task->thread.sp; - } - } -#endif - + bp = stack_frame(task, regs); for (;;) { struct thread_info *context; @@ -65,7 +55,7 @@ EXPORT_SYMBOL(dump_trace); void show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, - unsigned long *sp, unsigned long bp, char *log_lvl) + unsigned long *sp, char *log_lvl) { unsigned long *stack; int i; @@ -87,7 +77,7 @@ show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, touch_nmi_watchdog(); } printk(KERN_CONT "\n"); - show_trace_log_lvl(task, regs, sp, bp, log_lvl); + show_trace_log_lvl(task, regs, sp, log_lvl); } @@ -112,8 +102,7 @@ void show_registers(struct pt_regs *regs) u8 *ip; printk(KERN_EMERG "Stack:\n"); - show_stack_log_lvl(NULL, regs, ®s->sp, - 0, KERN_EMERG); + show_stack_log_lvl(NULL, regs, ®s->sp, KERN_EMERG); printk(KERN_EMERG "Code: "); diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c index 6a340485249a..64101335de19 100644 --- a/arch/x86/kernel/dumpstack_64.c +++ b/arch/x86/kernel/dumpstack_64.c @@ -139,8 +139,8 @@ fixup_bp_irq_link(unsigned long bp, unsigned long *stack, * severe exception (double fault, nmi, stack fault, debug, mce) hardware stack */ -void dump_trace(struct task_struct *task, struct pt_regs *regs, - unsigned long *stack, unsigned long bp, +void dump_trace(struct task_struct *task, + struct pt_regs *regs, unsigned long *stack, const struct stacktrace_ops *ops, void *data) { const unsigned cpu = get_cpu(); @@ -149,6 +149,7 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs, unsigned used = 0; struct thread_info *tinfo; int graph = 0; + unsigned long bp; if (!task) task = current; @@ -160,18 +161,7 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs, stack = (unsigned long *)task->thread.sp; } -#ifdef CONFIG_FRAME_POINTER - if (!bp) { - if (task == current) { - /* Grab bp right from our regs */ - get_bp(bp); - } else { - /* bp is the last reg pushed by switch_to */ - bp = *(unsigned long *) task->thread.sp; - } - } -#endif - + bp = stack_frame(task, regs); /* * Print function call entries in all stacks, starting at the * current stack address. If the stacks consist of nested @@ -235,7 +225,7 @@ EXPORT_SYMBOL(dump_trace); void show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, - unsigned long *sp, unsigned long bp, char *log_lvl) + unsigned long *sp, char *log_lvl) { unsigned long *irq_stack_end; unsigned long *irq_stack; @@ -279,7 +269,7 @@ show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, preempt_enable(); printk(KERN_CONT "\n"); - show_trace_log_lvl(task, regs, sp, bp, log_lvl); + show_trace_log_lvl(task, regs, sp, log_lvl); } void show_registers(struct pt_regs *regs) @@ -308,7 +298,7 @@ void show_registers(struct pt_regs *regs) printk(KERN_EMERG "Stack:\n"); show_stack_log_lvl(NULL, regs, (unsigned long *)sp, - regs->bp, KERN_EMERG); + KERN_EMERG); printk(KERN_EMERG "Code: "); diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 57d1868a86aa..96ed1aac543a 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -91,8 +91,7 @@ void exit_thread(void) void show_regs(struct pt_regs *regs) { show_registers(regs); - show_trace(NULL, regs, (unsigned long *)kernel_stack_pointer(regs), - regs->bp); + show_trace(NULL, regs, (unsigned long *)kernel_stack_pointer(regs)); } void show_regs_common(void) diff --git a/arch/x86/kernel/stacktrace.c b/arch/x86/kernel/stacktrace.c index b53c525368a7..938c8e10a19a 100644 --- a/arch/x86/kernel/stacktrace.c +++ b/arch/x86/kernel/stacktrace.c @@ -73,22 +73,22 @@ static const struct stacktrace_ops save_stack_ops_nosched = { */ void save_stack_trace(struct stack_trace *trace) { - dump_trace(current, NULL, NULL, 0, &save_stack_ops, trace); + dump_trace(current, NULL, NULL, &save_stack_ops, trace); if (trace->nr_entries < trace->max_entries) trace->entries[trace->nr_entries++] = ULONG_MAX; } EXPORT_SYMBOL_GPL(save_stack_trace); -void save_stack_trace_bp(struct stack_trace *trace, unsigned long bp) +void save_stack_trace_regs(struct stack_trace *trace, struct pt_regs *regs) { - dump_trace(current, NULL, NULL, bp, &save_stack_ops, trace); + dump_trace(current, regs, NULL, &save_stack_ops, trace); if (trace->nr_entries < trace->max_entries) trace->entries[trace->nr_entries++] = ULONG_MAX; } void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace) { - dump_trace(tsk, NULL, NULL, 0, &save_stack_ops_nosched, trace); + dump_trace(tsk, NULL, NULL, &save_stack_ops_nosched, trace); if (trace->nr_entries < trace->max_entries) trace->entries[trace->nr_entries++] = ULONG_MAX; } diff --git a/arch/x86/mm/kmemcheck/error.c b/arch/x86/mm/kmemcheck/error.c index af3b6c8a436f..704a37cedddb 100644 --- a/arch/x86/mm/kmemcheck/error.c +++ b/arch/x86/mm/kmemcheck/error.c @@ -185,7 +185,7 @@ void kmemcheck_error_save(enum kmemcheck_shadow state, e->trace.entries = e->trace_entries; e->trace.max_entries = ARRAY_SIZE(e->trace_entries); e->trace.skip = 0; - save_stack_trace_bp(&e->trace, regs->bp); + save_stack_trace_regs(&e->trace, regs); /* Round address down to nearest 16 bytes */ shadow_copy = kmemcheck_shadow_lookup(address diff --git a/arch/x86/oprofile/backtrace.c b/arch/x86/oprofile/backtrace.c index 2d49d4e19a36..72cbec14d783 100644 --- a/arch/x86/oprofile/backtrace.c +++ b/arch/x86/oprofile/backtrace.c @@ -126,7 +126,7 @@ x86_backtrace(struct pt_regs * const regs, unsigned int depth) if (!user_mode_vm(regs)) { unsigned long stack = kernel_stack_pointer(regs); if (depth) - dump_trace(NULL, regs, (unsigned long *)stack, 0, + dump_trace(NULL, regs, (unsigned long *)stack, &backtrace_ops, &depth); return; } diff --git a/include/linux/stacktrace.h b/include/linux/stacktrace.h index 51efbef38fb0..25310f1d7f37 100644 --- a/include/linux/stacktrace.h +++ b/include/linux/stacktrace.h @@ -2,6 +2,7 @@ #define __LINUX_STACKTRACE_H struct task_struct; +struct pt_regs; #ifdef CONFIG_STACKTRACE struct task_struct; @@ -13,7 +14,8 @@ struct stack_trace { }; extern void save_stack_trace(struct stack_trace *trace); -extern void save_stack_trace_bp(struct stack_trace *trace, unsigned long bp); +extern void save_stack_trace_regs(struct stack_trace *trace, + struct pt_regs *regs); extern void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace); -- cgit v1.2.3-71-gd317 From 61c32659b12c44e62de32fbf99f7e4ca783dc38b Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Thu, 18 Nov 2010 01:39:17 +0100 Subject: tracing: New flag to allow non privileged users to use a trace event This adds a new trace event internal flag that allows them to be used in perf by non privileged users in case of task bound tracing. This is desired for syscalls tracepoint because they don't leak global system informations, like some other tracepoints. Signed-off-by: Frederic Weisbecker Cc: Ingo Molnar Cc: Peter Zijlstra Cc: Arnaldo Carvalho de Melo Cc: Thomas Gleixner Cc: Steven Rostedt Cc: Li Zefan Cc: Jason Baron --- include/linux/ftrace_event.h | 2 ++ kernel/perf_event.c | 9 --------- kernel/trace/trace_event_perf.c | 31 ++++++++++++++++++++++++++++++- 3 files changed, 32 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h index 8beabb958f61..312dce7e0d52 100644 --- a/include/linux/ftrace_event.h +++ b/include/linux/ftrace_event.h @@ -154,12 +154,14 @@ enum { TRACE_EVENT_FL_ENABLED_BIT, TRACE_EVENT_FL_FILTERED_BIT, TRACE_EVENT_FL_RECORDED_CMD_BIT, + TRACE_EVENT_FL_CAP_ANY_BIT, }; enum { TRACE_EVENT_FL_ENABLED = (1 << TRACE_EVENT_FL_ENABLED_BIT), TRACE_EVENT_FL_FILTERED = (1 << TRACE_EVENT_FL_FILTERED_BIT), TRACE_EVENT_FL_RECORDED_CMD = (1 << TRACE_EVENT_FL_RECORDED_CMD_BIT), + TRACE_EVENT_FL_CAP_ANY = (1 << TRACE_EVENT_FL_CAP_ANY_BIT), }; struct ftrace_event_call { diff --git a/kernel/perf_event.c b/kernel/perf_event.c index 517d827f4982..ee1e903f983c 100644 --- a/kernel/perf_event.c +++ b/kernel/perf_event.c @@ -4747,15 +4747,6 @@ static int perf_tp_event_init(struct perf_event *event) if (event->attr.type != PERF_TYPE_TRACEPOINT) return -ENOENT; - /* - * Raw tracepoint data is a severe data leak, only allow root to - * have these. - */ - if ((event->attr.sample_type & PERF_SAMPLE_RAW) && - perf_paranoid_tracepoint_raw() && - !capable(CAP_SYS_ADMIN)) - return -EPERM; - err = perf_trace_init(event); if (err) return err; diff --git a/kernel/trace/trace_event_perf.c b/kernel/trace/trace_event_perf.c index 39c059ca670e..19a359d5e6d5 100644 --- a/kernel/trace/trace_event_perf.c +++ b/kernel/trace/trace_event_perf.c @@ -21,17 +21,46 @@ typedef typeof(unsigned long [PERF_MAX_TRACE_SIZE / sizeof(unsigned long)]) /* Count the events in use (per event id, not per instance) */ static int total_ref_count; +static int perf_trace_event_perm(struct ftrace_event_call *tp_event, + struct perf_event *p_event) +{ + /* No tracing, just counting, so no obvious leak */ + if (!(p_event->attr.sample_type & PERF_SAMPLE_RAW)) + return 0; + + /* Some events are ok to be traced by non-root users... */ + if (p_event->attach_state == PERF_ATTACH_TASK) { + if (tp_event->flags & TRACE_EVENT_FL_CAP_ANY) + return 0; + } + + /* + * ...otherwise raw tracepoint data can be a severe data leak, + * only allow root to have these. + */ + if (perf_paranoid_tracepoint_raw() && !capable(CAP_SYS_ADMIN)) + return -EPERM; + + return 0; +} + static int perf_trace_event_init(struct ftrace_event_call *tp_event, struct perf_event *p_event) { struct hlist_head __percpu *list; - int ret = -ENOMEM; + int ret; int cpu; + ret = perf_trace_event_perm(tp_event, p_event); + if (ret) + return ret; + p_event->tp_event = tp_event; if (tp_event->perf_refcount++ > 0) return 0; + ret = -ENOMEM; + list = alloc_percpu(struct hlist_head); if (!list) goto fail; -- cgit v1.2.3-71-gd317 From 1ed0c5971159974185653170543a764cc061c857 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Thu, 18 Nov 2010 01:46:57 +0100 Subject: tracing: New macro to set up initial event flags value This introduces the new TRACE_EVENT_FLAGS() macro in order to set up initial event flags value. This macro must simply follow the definition of a trace event and take the event name and the flag value as parameters: TRACE_EVENT(my_event, ..... .... ); TRACE_EVENT_FLAGS(my_event, 1) This will set up 1 as the initial my_event->flags value. Signed-off-by: Frederic Weisbecker Cc: Ingo Molnar Cc: Peter Zijlstra Cc: Arnaldo Carvalho de Melo Cc: Thomas Gleixner Cc: Steven Rostedt Cc: Li Zefan Cc: Jason Baron --- include/linux/tracepoint.h | 4 ++++ include/trace/ftrace.h | 12 ++++++++++++ 2 files changed, 16 insertions(+) (limited to 'include/linux') diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h index a4a90b6726ce..5a6074fcd81d 100644 --- a/include/linux/tracepoint.h +++ b/include/linux/tracepoint.h @@ -234,6 +234,8 @@ do_trace: \ PARAMS(void *__data, proto), \ PARAMS(__data, args)) +#define TRACE_EVENT_FLAGS(event, flag) + #endif /* DECLARE_TRACE */ #ifndef TRACE_EVENT @@ -354,4 +356,6 @@ do_trace: \ assign, print, reg, unreg) \ DECLARE_TRACE(name, PARAMS(proto), PARAMS(args)) +#define TRACE_EVENT_FLAGS(event, flag) + #endif /* ifdef TRACE_EVENT (see note above) */ diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h index a9377c0083ad..6f540123d43e 100644 --- a/include/trace/ftrace.h +++ b/include/trace/ftrace.h @@ -82,6 +82,15 @@ TRACE_EVENT(name, PARAMS(proto), PARAMS(args), \ PARAMS(tstruct), PARAMS(assign), PARAMS(print)) \ +#undef TRACE_EVENT_FLAGS +#define TRACE_EVENT_FLAGS(name, value) \ + static int __init trace_init_flags_##name(void) \ + { \ + event_##name.flags = value; \ + return 0; \ + } \ + early_initcall(trace_init_flags_##name); + #include TRACE_INCLUDE(TRACE_INCLUDE_FILE) @@ -129,6 +138,9 @@ #define DEFINE_EVENT_PRINT(template, name, proto, args, print) \ DEFINE_EVENT(template, name, PARAMS(proto), PARAMS(args)) +#undef TRACE_EVENT_FLAGS +#define TRACE_EVENT_FLAGS(event, flag) + #include TRACE_INCLUDE(TRACE_INCLUDE_FILE) /* -- cgit v1.2.3-71-gd317 From 53cf810b1934f08a68e131aeeb16267a778f43df Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Thu, 18 Nov 2010 02:11:42 +0100 Subject: tracing: Allow syscall trace events for non privileged users As for the raw syscalls events, individual syscall events won't leak system wide information on task bound tracing. Allow non privileged users to use them in such workflow. Signed-off-by: Frederic Weisbecker Cc: Ingo Molnar Cc: Peter Zijlstra Cc: Arnaldo Carvalho de Melo Cc: Thomas Gleixner Cc: Steven Rostedt Cc: Li Zefan Cc: Jason Baron --- include/linux/ftrace_event.h | 8 ++++++++ include/linux/syscalls.h | 6 ++++-- include/trace/ftrace.h | 7 +------ 3 files changed, 13 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h index 312dce7e0d52..725bf6bd39f7 100644 --- a/include/linux/ftrace_event.h +++ b/include/linux/ftrace_event.h @@ -198,6 +198,14 @@ struct ftrace_event_call { #endif }; +#define __TRACE_EVENT_FLAGS(name, value) \ + static int __init trace_init_flags_##name(void) \ + { \ + event_##name.flags = value; \ + return 0; \ + } \ + early_initcall(trace_init_flags_##name); + #define PERF_MAX_TRACE_SIZE 2048 #define MAX_FILTER_PRED 32 diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index cacc27a0e285..13b9731d30cf 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -137,7 +137,8 @@ extern struct trace_event_functions exit_syscall_print_funcs; .class = &event_class_syscall_enter, \ .event.funcs = &enter_syscall_print_funcs, \ .data = (void *)&__syscall_meta_##sname,\ - } + }; \ + __TRACE_EVENT_FLAGS(enter_##sname, TRACE_EVENT_FL_CAP_ANY) #define SYSCALL_TRACE_EXIT_EVENT(sname) \ static struct syscall_metadata \ @@ -152,7 +153,8 @@ extern struct trace_event_functions exit_syscall_print_funcs; .class = &event_class_syscall_exit, \ .event.funcs = &exit_syscall_print_funcs, \ .data = (void *)&__syscall_meta_##sname,\ - } + }; \ + __TRACE_EVENT_FLAGS(exit_##sname, TRACE_EVENT_FL_CAP_ANY) #define SYSCALL_METADATA(sname, nb) \ SYSCALL_TRACE_ENTER_EVENT(sname); \ diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h index 6f540123d43e..e718a917d897 100644 --- a/include/trace/ftrace.h +++ b/include/trace/ftrace.h @@ -84,12 +84,7 @@ #undef TRACE_EVENT_FLAGS #define TRACE_EVENT_FLAGS(name, value) \ - static int __init trace_init_flags_##name(void) \ - { \ - event_##name.flags = value; \ - return 0; \ - } \ - early_initcall(trace_init_flags_##name); + __TRACE_EVENT_FLAGS(name, value) #include TRACE_INCLUDE(TRACE_INCLUDE_FILE) -- cgit v1.2.3-71-gd317 From 423478cde453eebdfcfebf4b8d378d8f5d49b853 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Thu, 18 Nov 2010 02:21:26 +0100 Subject: tracing: Remove useless syscall ftrace_event_call declaration It is defined right after, which makes the declaration completely useless. Signed-off-by: Frederic Weisbecker Cc: Ingo Molnar Cc: Peter Zijlstra Cc: Arnaldo Carvalho de Melo Cc: Thomas Gleixner Cc: Steven Rostedt Cc: Li Zefan Cc: Jason Baron --- include/linux/syscalls.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 13b9731d30cf..18cd0684fc4e 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -127,8 +127,6 @@ extern struct trace_event_functions exit_syscall_print_funcs; #define SYSCALL_TRACE_ENTER_EVENT(sname) \ static struct syscall_metadata \ __attribute__((__aligned__(4))) __syscall_meta_##sname; \ - static struct ftrace_event_call \ - __attribute__((__aligned__(4))) event_enter_##sname; \ static struct ftrace_event_call __used \ __attribute__((__aligned__(4))) \ __attribute__((section("_ftrace_events"))) \ @@ -143,8 +141,6 @@ extern struct trace_event_functions exit_syscall_print_funcs; #define SYSCALL_TRACE_EXIT_EVENT(sname) \ static struct syscall_metadata \ __attribute__((__aligned__(4))) __syscall_meta_##sname; \ - static struct ftrace_event_call \ - __attribute__((__aligned__(4))) event_exit_##sname; \ static struct ftrace_event_call __used \ __attribute__((__aligned__(4))) \ __attribute__((section("_ftrace_events"))) \ -- cgit v1.2.3-71-gd317 From 866f3b25a2eb60d7529c227a0ecd80c3aba443fd Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 18 Nov 2010 09:33:19 -0800 Subject: bonding: IGMP handling cleanup Instead of iterating in_dev->mc_list from bonding driver, its better to call a helper function provided by igmp.c Details of implementation (locking) are private to igmp code. ip_mc_rejoin_group(struct ip_mc_list *im) becomes ip_mc_rejoin_groups(struct in_device *in_dev); Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- drivers/net/bonding/bond_main.c | 8 ++------ include/linux/igmp.h | 2 +- net/ipv4/igmp.c | 32 +++++++++++++++++++------------- 3 files changed, 22 insertions(+), 20 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 518844852f06..e588b2e1c3b3 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -873,15 +873,11 @@ static void bond_mc_del(struct bonding *bond, void *addr) static void __bond_resend_igmp_join_requests(struct net_device *dev) { struct in_device *in_dev; - struct ip_mc_list *im; rcu_read_lock(); in_dev = __in_dev_get_rcu(dev); - if (in_dev) { - for (im = in_dev->mc_list; im; im = im->next) - ip_mc_rejoin_group(im); - } - + if (in_dev) + ip_mc_rejoin_groups(in_dev); rcu_read_unlock(); } diff --git a/include/linux/igmp.h b/include/linux/igmp.h index 7d164670f264..c4987f265109 100644 --- a/include/linux/igmp.h +++ b/include/linux/igmp.h @@ -238,7 +238,7 @@ extern void ip_mc_unmap(struct in_device *); extern void ip_mc_remap(struct in_device *); extern void ip_mc_dec_group(struct in_device *in_dev, __be32 addr); extern void ip_mc_inc_group(struct in_device *in_dev, __be32 addr); -extern void ip_mc_rejoin_group(struct ip_mc_list *im); +extern void ip_mc_rejoin_groups(struct in_device *in_dev); #endif #endif diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index afb1e82a59f9..50f6bc1a002a 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -1267,26 +1267,32 @@ EXPORT_SYMBOL(ip_mc_inc_group); /* * Resend IGMP JOIN report; used for bonding. + * Called with rcu_read_lock() */ -void ip_mc_rejoin_group(struct ip_mc_list *im) +void ip_mc_rejoin_groups(struct in_device *in_dev) { #ifdef CONFIG_IP_MULTICAST - struct in_device *in_dev = im->interface; + struct ip_mc_list *im; + int type; - if (im->multiaddr == IGMP_ALL_HOSTS) - return; + for_each_pmc_rcu(in_dev, im) { + if (im->multiaddr == IGMP_ALL_HOSTS) + continue; - /* a failover is happening and switches - * must be notified immediately */ - if (IGMP_V1_SEEN(in_dev)) - igmp_send_report(in_dev, im, IGMP_HOST_MEMBERSHIP_REPORT); - else if (IGMP_V2_SEEN(in_dev)) - igmp_send_report(in_dev, im, IGMPV2_HOST_MEMBERSHIP_REPORT); - else - igmp_send_report(in_dev, im, IGMPV3_HOST_MEMBERSHIP_REPORT); + /* a failover is happening and switches + * must be notified immediately + */ + if (IGMP_V1_SEEN(in_dev)) + type = IGMP_HOST_MEMBERSHIP_REPORT; + else if (IGMP_V2_SEEN(in_dev)) + type = IGMPV2_HOST_MEMBERSHIP_REPORT; + else + type = IGMPV3_HOST_MEMBERSHIP_REPORT; + igmp_send_report(in_dev, im, type); + } #endif } -EXPORT_SYMBOL(ip_mc_rejoin_group); +EXPORT_SYMBOL(ip_mc_rejoin_groups); /* * A socket has left a multicast group on device dev -- cgit v1.2.3-71-gd317 From 4c3710afbc333c33100739dec10662b4ee64e219 Mon Sep 17 00:00:00 2001 From: Changli Gao Date: Tue, 16 Nov 2010 20:28:24 +0000 Subject: net: move definitions of BPF_S_* to net/core/filter.c BPF_S_* are used internally, should not be exposed to the others. Signed-off-by: Changli Gao Acked-by: Eric Dumazet Acked-by: Hagen Paul Pfeifer Signed-off-by: David S. Miller --- include/linux/filter.h | 48 ------------------------------------------------ net/core/filter.c | 48 ++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 48 insertions(+), 48 deletions(-) (limited to 'include/linux') diff --git a/include/linux/filter.h b/include/linux/filter.h index 69b43dbea6c6..151f5d703b7e 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -91,54 +91,6 @@ struct sock_fprog { /* Required for SO_ATTACH_FILTER. */ #define BPF_TAX 0x00 #define BPF_TXA 0x80 -enum { - BPF_S_RET_K = 0, - BPF_S_RET_A, - BPF_S_ALU_ADD_K, - BPF_S_ALU_ADD_X, - BPF_S_ALU_SUB_K, - BPF_S_ALU_SUB_X, - BPF_S_ALU_MUL_K, - BPF_S_ALU_MUL_X, - BPF_S_ALU_DIV_X, - BPF_S_ALU_AND_K, - BPF_S_ALU_AND_X, - BPF_S_ALU_OR_K, - BPF_S_ALU_OR_X, - BPF_S_ALU_LSH_K, - BPF_S_ALU_LSH_X, - BPF_S_ALU_RSH_K, - BPF_S_ALU_RSH_X, - BPF_S_ALU_NEG, - BPF_S_LD_W_ABS, - BPF_S_LD_H_ABS, - BPF_S_LD_B_ABS, - BPF_S_LD_W_LEN, - BPF_S_LD_W_IND, - BPF_S_LD_H_IND, - BPF_S_LD_B_IND, - BPF_S_LD_IMM, - BPF_S_LDX_W_LEN, - BPF_S_LDX_B_MSH, - BPF_S_LDX_IMM, - BPF_S_MISC_TAX, - BPF_S_MISC_TXA, - BPF_S_ALU_DIV_K, - BPF_S_LD_MEM, - BPF_S_LDX_MEM, - BPF_S_ST, - BPF_S_STX, - BPF_S_JMP_JA, - BPF_S_JMP_JEQ_K, - BPF_S_JMP_JEQ_X, - BPF_S_JMP_JGE_K, - BPF_S_JMP_JGE_X, - BPF_S_JMP_JGT_K, - BPF_S_JMP_JGT_X, - BPF_S_JMP_JSET_K, - BPF_S_JMP_JSET_X, -}; - #ifndef BPF_MAXINSNS #define BPF_MAXINSNS 4096 #endif diff --git a/net/core/filter.c b/net/core/filter.c index 03dc0710194f..15a545d39cd3 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -38,6 +38,54 @@ #include #include +enum { + BPF_S_RET_K = 0, + BPF_S_RET_A, + BPF_S_ALU_ADD_K, + BPF_S_ALU_ADD_X, + BPF_S_ALU_SUB_K, + BPF_S_ALU_SUB_X, + BPF_S_ALU_MUL_K, + BPF_S_ALU_MUL_X, + BPF_S_ALU_DIV_X, + BPF_S_ALU_AND_K, + BPF_S_ALU_AND_X, + BPF_S_ALU_OR_K, + BPF_S_ALU_OR_X, + BPF_S_ALU_LSH_K, + BPF_S_ALU_LSH_X, + BPF_S_ALU_RSH_K, + BPF_S_ALU_RSH_X, + BPF_S_ALU_NEG, + BPF_S_LD_W_ABS, + BPF_S_LD_H_ABS, + BPF_S_LD_B_ABS, + BPF_S_LD_W_LEN, + BPF_S_LD_W_IND, + BPF_S_LD_H_IND, + BPF_S_LD_B_IND, + BPF_S_LD_IMM, + BPF_S_LDX_W_LEN, + BPF_S_LDX_B_MSH, + BPF_S_LDX_IMM, + BPF_S_MISC_TAX, + BPF_S_MISC_TXA, + BPF_S_ALU_DIV_K, + BPF_S_LD_MEM, + BPF_S_LDX_MEM, + BPF_S_ST, + BPF_S_STX, + BPF_S_JMP_JA, + BPF_S_JMP_JEQ_K, + BPF_S_JMP_JEQ_X, + BPF_S_JMP_JGE_K, + BPF_S_JMP_JGE_X, + BPF_S_JMP_JGT_K, + BPF_S_JMP_JGT_X, + BPF_S_JMP_JSET_K, + BPF_S_JMP_JSET_X, +}; + /* No hurry in this branch */ static void *__load_pointer(struct sk_buff *skb, int k) { -- cgit v1.2.3-71-gd317 From c5485a7e7569ab32eea240c850198519e2a765ef Mon Sep 17 00:00:00 2001 From: Bruno Randolf Date: Tue, 16 Nov 2010 10:58:37 +0900 Subject: lib: Add generic exponentially weighted moving average (EWMA) function This adds generic functions for calculating Exponentially Weighted Moving Averages (EWMA). This implementation makes use of a structure which keeps the EWMA parameters and a scaled up internal representation to reduce rounding errors. The original idea for this implementation came from the rt2x00 driver (rt2x00link.c). I would like to use it in several places in the mac80211 and ath5k code and I hope it can be useful in many other places in the kernel code. Signed-off-by: Bruno Randolf Reviewed-by: KOSAKI Motohiro Signed-off-by: John W. Linville --- include/linux/average.h | 32 +++++++++++++++++++++++++++ lib/Kconfig | 3 +++ lib/Makefile | 2 ++ lib/average.c | 57 +++++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 94 insertions(+) create mode 100644 include/linux/average.h create mode 100644 lib/average.c (limited to 'include/linux') diff --git a/include/linux/average.h b/include/linux/average.h new file mode 100644 index 000000000000..7706e40f95fa --- /dev/null +++ b/include/linux/average.h @@ -0,0 +1,32 @@ +#ifndef _LINUX_AVERAGE_H +#define _LINUX_AVERAGE_H + +#include + +/* Exponentially weighted moving average (EWMA) */ + +/* For more documentation see lib/average.c */ + +struct ewma { + unsigned long internal; + unsigned long factor; + unsigned long weight; +}; + +extern void ewma_init(struct ewma *avg, unsigned long factor, + unsigned long weight); + +extern struct ewma *ewma_add(struct ewma *avg, unsigned long val); + +/** + * ewma_read() - Get average value + * @avg: Average structure + * + * Returns the average value held in @avg. + */ +static inline unsigned long ewma_read(const struct ewma *avg) +{ + return DIV_ROUND_CLOSEST(avg->internal, avg->factor); +} + +#endif /* _LINUX_AVERAGE_H */ diff --git a/lib/Kconfig b/lib/Kconfig index fa9bf2c06199..3116aa631af6 100644 --- a/lib/Kconfig +++ b/lib/Kconfig @@ -210,4 +210,7 @@ config GENERIC_ATOMIC64 config LRU_CACHE tristate +config AVERAGE + bool + endmenu diff --git a/lib/Makefile b/lib/Makefile index e6a3763b8212..76d3b8514903 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -106,6 +106,8 @@ obj-$(CONFIG_GENERIC_ATOMIC64) += atomic64.o obj-$(CONFIG_ATOMIC64_SELFTEST) += atomic64_test.o +obj-$(CONFIG_AVERAGE) += average.o + hostprogs-y := gen_crc32table clean-files := crc32table.h diff --git a/lib/average.c b/lib/average.c new file mode 100644 index 000000000000..f1d1b4660c42 --- /dev/null +++ b/lib/average.c @@ -0,0 +1,57 @@ +/* + * lib/average.c + * + * This source code is licensed under the GNU General Public License, + * Version 2. See the file COPYING for more details. + */ + +#include +#include +#include + +/** + * DOC: Exponentially Weighted Moving Average (EWMA) + * + * These are generic functions for calculating Exponentially Weighted Moving + * Averages (EWMA). We keep a structure with the EWMA parameters and a scaled + * up internal representation of the average value to prevent rounding errors. + * The factor for scaling up and the exponential weight (or decay rate) have to + * be specified thru the init fuction. The structure should not be accessed + * directly but only thru the helper functions. + */ + +/** + * ewma_init() - Initialize EWMA parameters + * @avg: Average structure + * @factor: Factor to use for the scaled up internal value. The maximum value + * of averages can be ULONG_MAX/(factor*weight). + * @weight: Exponential weight, or decay rate. This defines how fast the + * influence of older values decreases. Has to be bigger than 1. + * + * Initialize the EWMA parameters for a given struct ewma @avg. + */ +void ewma_init(struct ewma *avg, unsigned long factor, unsigned long weight) +{ + WARN_ON(weight <= 1 || factor == 0); + avg->internal = 0; + avg->weight = weight; + avg->factor = factor; +} +EXPORT_SYMBOL(ewma_init); + +/** + * ewma_add() - Exponentially weighted moving average (EWMA) + * @avg: Average structure + * @val: Current value + * + * Add a sample to the average. + */ +struct ewma *ewma_add(struct ewma *avg, unsigned long val) +{ + avg->internal = avg->internal ? + (((avg->internal * (avg->weight - 1)) + + (val * avg->factor)) / avg->weight) : + (val * avg->factor); + return avg; +} +EXPORT_SYMBOL(ewma_add); -- cgit v1.2.3-71-gd317 From 86107fd170bc379869250eb7e1bd393a3a70e8ae Mon Sep 17 00:00:00 2001 From: Bruno Randolf Date: Tue, 16 Nov 2010 10:58:48 +0900 Subject: nl80211/mac80211: Report signal average Extend nl80211 to report an exponential weighted moving average (EWMA) of the signal value. Since the signal value usually fluctuates between different packets, an average can be more useful than the value of the last packet. This uses the recently added generic EWMA library function. Signed-off-by: Bruno Randolf Signed-off-by: John W. Linville --- include/linux/nl80211.h | 2 ++ include/net/cfg80211.h | 4 ++++ net/mac80211/Kconfig | 1 + net/mac80211/cfg.c | 3 ++- net/mac80211/rx.c | 1 + net/mac80211/sta_info.c | 2 ++ net/mac80211/sta_info.h | 3 +++ net/wireless/nl80211.c | 3 +++ 8 files changed, 18 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h index 037b4e498890..1ce3775e9e26 100644 --- a/include/linux/nl80211.h +++ b/include/linux/nl80211.h @@ -1161,6 +1161,7 @@ enum nl80211_rate_info { * @__NL80211_STA_INFO_AFTER_LAST: internal * @NL80211_STA_INFO_MAX: highest possible station info attribute * @NL80211_STA_INFO_SIGNAL: signal strength of last received PPDU (u8, dBm) + * @NL80211_STA_INFO_SIGNAL_AVG: signal strength average (u8, dBm) * @NL80211_STA_INFO_TX_BITRATE: current unicast tx rate, nested attribute * containing info as possible, see &enum nl80211_sta_info_txrate. * @NL80211_STA_INFO_RX_PACKETS: total received packet (u32, from this station) @@ -1178,6 +1179,7 @@ enum nl80211_sta_info { NL80211_STA_INFO_PLID, NL80211_STA_INFO_PLINK_STATE, NL80211_STA_INFO_SIGNAL, + NL80211_STA_INFO_SIGNAL_AVG, NL80211_STA_INFO_TX_BITRATE, NL80211_STA_INFO_RX_PACKETS, NL80211_STA_INFO_TX_PACKETS, diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 8fd9eebd0cc9..69e2364889f1 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -424,6 +424,7 @@ struct station_parameters { * @STATION_INFO_TX_RETRIES: @tx_retries filled * @STATION_INFO_TX_FAILED: @tx_failed filled * @STATION_INFO_RX_DROP_MISC: @rx_dropped_misc filled + * @STATION_INFO_SIGNAL_AVG: @signal_avg filled */ enum station_info_flags { STATION_INFO_INACTIVE_TIME = 1<<0, @@ -439,6 +440,7 @@ enum station_info_flags { STATION_INFO_TX_RETRIES = 1<<10, STATION_INFO_TX_FAILED = 1<<11, STATION_INFO_RX_DROP_MISC = 1<<12, + STATION_INFO_SIGNAL_AVG = 1<<13, }; /** @@ -485,6 +487,7 @@ struct rate_info { * @plid: mesh peer link id * @plink_state: mesh peer link state * @signal: signal strength of last received packet in dBm + * @signal_avg: signal strength average in dBm * @txrate: current unicast bitrate to this station * @rx_packets: packets received from this station * @tx_packets: packets transmitted to this station @@ -505,6 +508,7 @@ struct station_info { u16 plid; u8 plink_state; s8 signal; + s8 signal_avg; struct rate_info txrate; u32 rx_packets; u32 tx_packets; diff --git a/net/mac80211/Kconfig b/net/mac80211/Kconfig index 4d6f8653ec88..798d9b9462e2 100644 --- a/net/mac80211/Kconfig +++ b/net/mac80211/Kconfig @@ -6,6 +6,7 @@ config MAC80211 select CRYPTO_ARC4 select CRYPTO_AES select CRC32 + select AVERAGE ---help--- This option enables the hardware independent IEEE 802.11 networking stack. diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 0c544074479e..92c9cf6a7d1c 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -343,8 +343,9 @@ static void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo) if ((sta->local->hw.flags & IEEE80211_HW_SIGNAL_DBM) || (sta->local->hw.flags & IEEE80211_HW_SIGNAL_UNSPEC)) { - sinfo->filled |= STATION_INFO_SIGNAL; + sinfo->filled |= STATION_INFO_SIGNAL | STATION_INFO_SIGNAL_AVG; sinfo->signal = (s8)sta->last_signal; + sinfo->signal_avg = (s8) -ewma_read(&sta->avg_signal); } sinfo->txrate.flags = 0; diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index d2fcd22ab06d..9dd60a74181f 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -1156,6 +1156,7 @@ ieee80211_rx_h_sta_process(struct ieee80211_rx_data *rx) sta->rx_fragments++; sta->rx_bytes += rx->skb->len; sta->last_signal = status->signal; + ewma_add(&sta->avg_signal, -status->signal); /* * Change STA power saving mode only at the end of a frame diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index eff58571fd7e..f43fca8907f7 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -244,6 +244,8 @@ struct sta_info *sta_info_alloc(struct ieee80211_sub_if_data *sdata, sta->local = local; sta->sdata = sdata; + ewma_init(&sta->avg_signal, 1000, 8); + if (sta_prepare_rate_control(local, sta, gfp)) { kfree(sta); return NULL; diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h index 9265acadef32..84062e2c782c 100644 --- a/net/mac80211/sta_info.h +++ b/net/mac80211/sta_info.h @@ -13,6 +13,7 @@ #include #include #include +#include #include "key.h" /** @@ -224,6 +225,7 @@ enum plink_state { * @rx_fragments: number of received MPDUs * @rx_dropped: number of dropped MPDUs from this STA * @last_signal: signal of last received frame from this STA + * @avg_signal: moving average of signal of received frames from this STA * @last_seq_ctrl: last received seq/frag number from this STA (per RX queue) * @tx_filtered_count: number of frames the hardware filtered for this STA * @tx_retry_failed: number of frames that failed retry @@ -291,6 +293,7 @@ struct sta_info { unsigned long rx_fragments; unsigned long rx_dropped; int last_signal; + struct ewma avg_signal; __le16 last_seq_ctrl[NUM_RX_DATA_QUEUES]; /* Updated from TX status path only, no locking requirements */ diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 605553842226..d06a40d17002 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -1872,6 +1872,9 @@ static int nl80211_send_station(struct sk_buff *msg, u32 pid, u32 seq, if (sinfo->filled & STATION_INFO_SIGNAL) NLA_PUT_U8(msg, NL80211_STA_INFO_SIGNAL, sinfo->signal); + if (sinfo->filled & STATION_INFO_SIGNAL_AVG) + NLA_PUT_U8(msg, NL80211_STA_INFO_SIGNAL_AVG, + sinfo->signal_avg); if (sinfo->filled & STATION_INFO_TX_BITRATE) { txrate = nla_nest_start(msg, NL80211_STA_INFO_TX_BITRATE); if (!txrate) -- cgit v1.2.3-71-gd317 From 042957801626465492b9428860de39a3cb2a8219 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Fri, 12 Nov 2010 22:32:11 -0500 Subject: tracing/events: Show real number in array fields Currently we have in something like the sched_switch event: field:char prev_comm[TASK_COMM_LEN]; offset:12; size:16; signed:1; When a userspace tool such as perf tries to parse this, the TASK_COMM_LEN is meaningless. This is done because the TRACE_EVENT() macro simply uses a #len to show the string of the length. When the length is an enum, we get a string that means nothing for tools. By adding a static buffer and a mutex to protect it, we can store the string into that buffer with snprintf and show the actual number. Now we get: field:char prev_comm[16]; offset:12; size:16; signed:1; Something much more useful. Signed-off-by: Steven Rostedt --- include/linux/ftrace_event.h | 4 ++++ include/trace/ftrace.h | 14 ++++++++++---- kernel/trace/trace_events.c | 6 ++++++ kernel/trace/trace_export.c | 14 ++++++++++---- 4 files changed, 30 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h index 725bf6bd39f7..47e3997f7b5c 100644 --- a/include/linux/ftrace_event.h +++ b/include/linux/ftrace_event.h @@ -225,6 +225,10 @@ enum { FILTER_PTR_STRING, }; +#define EVENT_STORAGE_SIZE 128 +extern struct mutex event_storage_mutex; +extern char event_storage[EVENT_STORAGE_SIZE]; + extern int trace_event_raw_init(struct ftrace_event_call *call); extern int trace_define_field(struct ftrace_event_call *call, const char *type, const char *name, int offset, int size, diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h index e718a917d897..e16610c208c9 100644 --- a/include/trace/ftrace.h +++ b/include/trace/ftrace.h @@ -296,13 +296,19 @@ static struct trace_event_functions ftrace_event_type_funcs_##call = { \ #undef __array #define __array(type, item, len) \ - BUILD_BUG_ON(len > MAX_FILTER_STR_VAL); \ - ret = trace_define_field(event_call, #type "[" #len "]", #item, \ + do { \ + mutex_lock(&event_storage_mutex); \ + BUILD_BUG_ON(len > MAX_FILTER_STR_VAL); \ + snprintf(event_storage, sizeof(event_storage), \ + "%s[%d]", #type, len); \ + ret = trace_define_field(event_call, event_storage, #item, \ offsetof(typeof(field), item), \ sizeof(field.item), \ is_signed_type(type), FILTER_OTHER); \ - if (ret) \ - return ret; + mutex_unlock(&event_storage_mutex); \ + if (ret) \ + return ret; \ + } while (0); #undef __dynamic_array #define __dynamic_array(type, item, len) \ diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index 0725eeab1937..35fde09b81de 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -27,6 +27,12 @@ DEFINE_MUTEX(event_mutex); +DEFINE_MUTEX(event_storage_mutex); +EXPORT_SYMBOL_GPL(event_storage_mutex); + +char event_storage[EVENT_STORAGE_SIZE]; +EXPORT_SYMBOL_GPL(event_storage); + LIST_HEAD(ftrace_events); LIST_HEAD(ftrace_common_fields); diff --git a/kernel/trace/trace_export.c b/kernel/trace/trace_export.c index 4ba44deaac25..4b74d71705c0 100644 --- a/kernel/trace/trace_export.c +++ b/kernel/trace/trace_export.c @@ -83,13 +83,19 @@ static void __always_unused ____ftrace_check_##name(void) \ #undef __array #define __array(type, item, len) \ - BUILD_BUG_ON(len > MAX_FILTER_STR_VAL); \ - ret = trace_define_field(event_call, #type "[" #len "]", #item, \ + do { \ + BUILD_BUG_ON(len > MAX_FILTER_STR_VAL); \ + mutex_lock(&event_storage_mutex); \ + snprintf(event_storage, sizeof(event_storage), \ + "%s[%d]", #type, len); \ + ret = trace_define_field(event_call, event_storage, #item, \ offsetof(typeof(field), item), \ sizeof(field.item), \ is_signed_type(type), FILTER_OTHER); \ - if (ret) \ - return ret; + mutex_unlock(&event_storage_mutex); \ + if (ret) \ + return ret; \ + } while (0); #undef __array_desc #define __array_desc(type, container, item, len) \ -- cgit v1.2.3-71-gd317 From 93aaae2e01e57483256b7da05c9a7ebd65ad4686 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 19 Nov 2010 09:49:59 -0800 Subject: filter: optimize sk_run_filter Remove pc variable to avoid arithmetic to compute fentry at each filter instruction. Jumps directly manipulate fentry pointer. As the last instruction of filter[] is guaranteed to be a RETURN, and all jumps are before the last instruction, we dont need to check filter bounds (number of instructions in filter array) at each iteration, so we remove it from sk_run_filter() params. On x86_32 remove f_k var introduced in commit 57fe93b374a6b871 (filter: make sure filters dont read uninitialized memory) Note : We could use a CONFIG_ARCH_HAS_{FEW|MANY}_REGISTERS in order to avoid too many ifdefs in this code. This helps compiler to use cpu registers to hold fentry and A accumulator. On x86_32, this saves 401 bytes, and more important, sk_run_filter() runs much faster because less register pressure (One less conditional branch per BPF instruction) # size net/core/filter.o net/core/filter_pre.o text data bss dec hex filename 2948 0 0 2948 b84 net/core/filter.o 3349 0 0 3349 d15 net/core/filter_pre.o on x86_64 : # size net/core/filter.o net/core/filter_pre.o text data bss dec hex filename 5173 0 0 5173 1435 net/core/filter.o 5224 0 0 5224 1468 net/core/filter_pre.o Signed-off-by: Eric Dumazet Acked-by: Changli Gao Signed-off-by: David S. Miller --- drivers/isdn/i4l/isdn_ppp.c | 14 +++---- drivers/net/ppp_generic.c | 12 ++---- include/linux/filter.h | 2 +- net/core/filter.c | 93 +++++++++++++++++++++++---------------------- net/core/timestamping.c | 2 +- net/packet/af_packet.c | 2 +- 6 files changed, 61 insertions(+), 64 deletions(-) (limited to 'include/linux') diff --git a/drivers/isdn/i4l/isdn_ppp.c b/drivers/isdn/i4l/isdn_ppp.c index 97c5cc2997f5..9e8162c80bb0 100644 --- a/drivers/isdn/i4l/isdn_ppp.c +++ b/drivers/isdn/i4l/isdn_ppp.c @@ -1147,15 +1147,14 @@ isdn_ppp_push_higher(isdn_net_dev * net_dev, isdn_net_local * lp, struct sk_buff } if (is->pass_filter - && sk_run_filter(skb, is->pass_filter, is->pass_len) == 0) { + && sk_run_filter(skb, is->pass_filter) == 0) { if (is->debug & 0x2) printk(KERN_DEBUG "IPPP: inbound frame filtered.\n"); kfree_skb(skb); return; } if (!(is->active_filter - && sk_run_filter(skb, is->active_filter, - is->active_len) == 0)) { + && sk_run_filter(skb, is->active_filter) == 0)) { if (is->debug & 0x2) printk(KERN_DEBUG "IPPP: link-active filter: reseting huptimer.\n"); lp->huptimer = 0; @@ -1294,15 +1293,14 @@ isdn_ppp_xmit(struct sk_buff *skb, struct net_device *netdev) } if (ipt->pass_filter - && sk_run_filter(skb, ipt->pass_filter, ipt->pass_len) == 0) { + && sk_run_filter(skb, ipt->pass_filter) == 0) { if (ipt->debug & 0x4) printk(KERN_DEBUG "IPPP: outbound frame filtered.\n"); kfree_skb(skb); goto unlock; } if (!(ipt->active_filter - && sk_run_filter(skb, ipt->active_filter, - ipt->active_len) == 0)) { + && sk_run_filter(skb, ipt->active_filter) == 0)) { if (ipt->debug & 0x4) printk(KERN_DEBUG "IPPP: link-active filter: reseting huptimer.\n"); lp->huptimer = 0; @@ -1492,9 +1490,9 @@ int isdn_ppp_autodial_filter(struct sk_buff *skb, isdn_net_local *lp) } drop |= is->pass_filter - && sk_run_filter(skb, is->pass_filter, is->pass_len) == 0; + && sk_run_filter(skb, is->pass_filter) == 0; drop |= is->active_filter - && sk_run_filter(skb, is->active_filter, is->active_len) == 0; + && sk_run_filter(skb, is->active_filter) == 0; skb_push(skb, IPPP_MAX_HEADER - 4); return drop; diff --git a/drivers/net/ppp_generic.c b/drivers/net/ppp_generic.c index 09cf56d0416a..0c91598ae280 100644 --- a/drivers/net/ppp_generic.c +++ b/drivers/net/ppp_generic.c @@ -1136,8 +1136,7 @@ ppp_send_frame(struct ppp *ppp, struct sk_buff *skb) a four-byte PPP header on each packet */ *skb_push(skb, 2) = 1; if (ppp->pass_filter && - sk_run_filter(skb, ppp->pass_filter, - ppp->pass_len) == 0) { + sk_run_filter(skb, ppp->pass_filter) == 0) { if (ppp->debug & 1) printk(KERN_DEBUG "PPP: outbound frame not passed\n"); kfree_skb(skb); @@ -1145,8 +1144,7 @@ ppp_send_frame(struct ppp *ppp, struct sk_buff *skb) } /* if this packet passes the active filter, record the time */ if (!(ppp->active_filter && - sk_run_filter(skb, ppp->active_filter, - ppp->active_len) == 0)) + sk_run_filter(skb, ppp->active_filter) == 0)) ppp->last_xmit = jiffies; skb_pull(skb, 2); #else @@ -1758,8 +1756,7 @@ ppp_receive_nonmp_frame(struct ppp *ppp, struct sk_buff *skb) *skb_push(skb, 2) = 0; if (ppp->pass_filter && - sk_run_filter(skb, ppp->pass_filter, - ppp->pass_len) == 0) { + sk_run_filter(skb, ppp->pass_filter) == 0) { if (ppp->debug & 1) printk(KERN_DEBUG "PPP: inbound frame " "not passed\n"); @@ -1767,8 +1764,7 @@ ppp_receive_nonmp_frame(struct ppp *ppp, struct sk_buff *skb) return; } if (!(ppp->active_filter && - sk_run_filter(skb, ppp->active_filter, - ppp->active_len) == 0)) + sk_run_filter(skb, ppp->active_filter) == 0)) ppp->last_recv = jiffies; __skb_pull(skb, 2); } else diff --git a/include/linux/filter.h b/include/linux/filter.h index 151f5d703b7e..447a775878fb 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -147,7 +147,7 @@ struct sock; extern int sk_filter(struct sock *sk, struct sk_buff *skb); extern unsigned int sk_run_filter(struct sk_buff *skb, - struct sock_filter *filter, int flen); + const struct sock_filter *filter); extern int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk); extern int sk_detach_filter(struct sock *sk); extern int sk_chk_filter(struct sock_filter *filter, int flen); diff --git a/net/core/filter.c b/net/core/filter.c index 15a545d39cd3..9e77b3c816c5 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -137,7 +137,7 @@ int sk_filter(struct sock *sk, struct sk_buff *skb) rcu_read_lock_bh(); filter = rcu_dereference_bh(sk->sk_filter); if (filter) { - unsigned int pkt_len = sk_run_filter(skb, filter->insns, filter->len); + unsigned int pkt_len = sk_run_filter(skb, filter->insns); err = pkt_len ? pskb_trim(skb, pkt_len) : -EPERM; } @@ -151,14 +151,15 @@ EXPORT_SYMBOL(sk_filter); * sk_run_filter - run a filter on a socket * @skb: buffer to run the filter on * @filter: filter to apply - * @flen: length of filter * * Decode and apply filter instructions to the skb->data. - * Return length to keep, 0 for none. skb is the data we are - * filtering, filter is the array of filter instructions, and - * len is the number of filter blocks in the array. + * Return length to keep, 0 for none. @skb is the data we are + * filtering, @filter is the array of filter instructions. + * Because all jumps are guaranteed to be before last instruction, + * and last instruction guaranteed to be a RET, we dont need to check + * flen. (We used to pass to this function the length of filter) */ -unsigned int sk_run_filter(struct sk_buff *skb, struct sock_filter *filter, int flen) +unsigned int sk_run_filter(struct sk_buff *skb, const struct sock_filter *fentry) { void *ptr; u32 A = 0; /* Accumulator */ @@ -167,34 +168,36 @@ unsigned int sk_run_filter(struct sk_buff *skb, struct sock_filter *filter, int unsigned long memvalid = 0; u32 tmp; int k; - int pc; BUILD_BUG_ON(BPF_MEMWORDS > BITS_PER_LONG); /* * Process array of filter instructions. */ - for (pc = 0; pc < flen; pc++) { - const struct sock_filter *fentry = &filter[pc]; - u32 f_k = fentry->k; + for (;; fentry++) { +#if defined(CONFIG_X86_32) +#define K (fentry->k) +#else + const u32 K = fentry->k; +#endif switch (fentry->code) { case BPF_S_ALU_ADD_X: A += X; continue; case BPF_S_ALU_ADD_K: - A += f_k; + A += K; continue; case BPF_S_ALU_SUB_X: A -= X; continue; case BPF_S_ALU_SUB_K: - A -= f_k; + A -= K; continue; case BPF_S_ALU_MUL_X: A *= X; continue; case BPF_S_ALU_MUL_K: - A *= f_k; + A *= K; continue; case BPF_S_ALU_DIV_X: if (X == 0) @@ -202,64 +205,64 @@ unsigned int sk_run_filter(struct sk_buff *skb, struct sock_filter *filter, int A /= X; continue; case BPF_S_ALU_DIV_K: - A /= f_k; + A /= K; continue; case BPF_S_ALU_AND_X: A &= X; continue; case BPF_S_ALU_AND_K: - A &= f_k; + A &= K; continue; case BPF_S_ALU_OR_X: A |= X; continue; case BPF_S_ALU_OR_K: - A |= f_k; + A |= K; continue; case BPF_S_ALU_LSH_X: A <<= X; continue; case BPF_S_ALU_LSH_K: - A <<= f_k; + A <<= K; continue; case BPF_S_ALU_RSH_X: A >>= X; continue; case BPF_S_ALU_RSH_K: - A >>= f_k; + A >>= K; continue; case BPF_S_ALU_NEG: A = -A; continue; case BPF_S_JMP_JA: - pc += f_k; + fentry += K; continue; case BPF_S_JMP_JGT_K: - pc += (A > f_k) ? fentry->jt : fentry->jf; + fentry += (A > K) ? fentry->jt : fentry->jf; continue; case BPF_S_JMP_JGE_K: - pc += (A >= f_k) ? fentry->jt : fentry->jf; + fentry += (A >= K) ? fentry->jt : fentry->jf; continue; case BPF_S_JMP_JEQ_K: - pc += (A == f_k) ? fentry->jt : fentry->jf; + fentry += (A == K) ? fentry->jt : fentry->jf; continue; case BPF_S_JMP_JSET_K: - pc += (A & f_k) ? fentry->jt : fentry->jf; + fentry += (A & K) ? fentry->jt : fentry->jf; continue; case BPF_S_JMP_JGT_X: - pc += (A > X) ? fentry->jt : fentry->jf; + fentry += (A > X) ? fentry->jt : fentry->jf; continue; case BPF_S_JMP_JGE_X: - pc += (A >= X) ? fentry->jt : fentry->jf; + fentry += (A >= X) ? fentry->jt : fentry->jf; continue; case BPF_S_JMP_JEQ_X: - pc += (A == X) ? fentry->jt : fentry->jf; + fentry += (A == X) ? fentry->jt : fentry->jf; continue; case BPF_S_JMP_JSET_X: - pc += (A & X) ? fentry->jt : fentry->jf; + fentry += (A & X) ? fentry->jt : fentry->jf; continue; case BPF_S_LD_W_ABS: - k = f_k; + k = K; load_w: ptr = load_pointer(skb, k, 4, &tmp); if (ptr != NULL) { @@ -268,7 +271,7 @@ load_w: } break; case BPF_S_LD_H_ABS: - k = f_k; + k = K; load_h: ptr = load_pointer(skb, k, 2, &tmp); if (ptr != NULL) { @@ -277,7 +280,7 @@ load_h: } break; case BPF_S_LD_B_ABS: - k = f_k; + k = K; load_b: ptr = load_pointer(skb, k, 1, &tmp); if (ptr != NULL) { @@ -292,34 +295,34 @@ load_b: X = skb->len; continue; case BPF_S_LD_W_IND: - k = X + f_k; + k = X + K; goto load_w; case BPF_S_LD_H_IND: - k = X + f_k; + k = X + K; goto load_h; case BPF_S_LD_B_IND: - k = X + f_k; + k = X + K; goto load_b; case BPF_S_LDX_B_MSH: - ptr = load_pointer(skb, f_k, 1, &tmp); + ptr = load_pointer(skb, K, 1, &tmp); if (ptr != NULL) { X = (*(u8 *)ptr & 0xf) << 2; continue; } return 0; case BPF_S_LD_IMM: - A = f_k; + A = K; continue; case BPF_S_LDX_IMM: - X = f_k; + X = K; continue; case BPF_S_LD_MEM: - A = (memvalid & (1UL << f_k)) ? - mem[f_k] : 0; + A = (memvalid & (1UL << K)) ? + mem[K] : 0; continue; case BPF_S_LDX_MEM: - X = (memvalid & (1UL << f_k)) ? - mem[f_k] : 0; + X = (memvalid & (1UL << K)) ? + mem[K] : 0; continue; case BPF_S_MISC_TAX: X = A; @@ -328,16 +331,16 @@ load_b: A = X; continue; case BPF_S_RET_K: - return f_k; + return K; case BPF_S_RET_A: return A; case BPF_S_ST: - memvalid |= 1UL << f_k; - mem[f_k] = A; + memvalid |= 1UL << K; + mem[K] = A; continue; case BPF_S_STX: - memvalid |= 1UL << f_k; - mem[f_k] = X; + memvalid |= 1UL << K; + mem[K] = X; continue; default: WARN_ON(1); diff --git a/net/core/timestamping.c b/net/core/timestamping.c index 0ae6c22da85b..dac7ed687f60 100644 --- a/net/core/timestamping.c +++ b/net/core/timestamping.c @@ -31,7 +31,7 @@ static unsigned int classify(struct sk_buff *skb) if (likely(skb->dev && skb->dev->phydev && skb->dev->phydev->drv)) - return sk_run_filter(skb, ptp_filter, ARRAY_SIZE(ptp_filter)); + return sk_run_filter(skb, ptp_filter); else return PTP_CLASS_NONE; } diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 20964560a0ed..b6372dd128d7 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -519,7 +519,7 @@ static inline unsigned int run_filter(struct sk_buff *skb, struct sock *sk, rcu_read_lock_bh(); filter = rcu_dereference_bh(sk->sk_filter); if (filter != NULL) - res = sk_run_filter(skb, filter->insns, filter->len); + res = sk_run_filter(skb, filter->insns); rcu_read_unlock_bh(); return res; -- cgit v1.2.3-71-gd317 From ecf7ace9a8450303a987aa8364e53860cd50e554 Mon Sep 17 00:00:00 2001 From: Thomas Hellstrom Date: Tue, 16 Nov 2010 15:21:07 +0100 Subject: kref: Add a kref_sub function Makes it possible to optimize batched multiple unrefs. Initial user will be drivers/gpu/ttm which accumulates unrefs to be processed outside of atomic code. Signed-off-by: Thomas Hellstrom Signed-off-by: Dave Airlie --- include/linux/kref.h | 2 ++ lib/kref.c | 30 ++++++++++++++++++++++++++++++ 2 files changed, 32 insertions(+) (limited to 'include/linux') diff --git a/include/linux/kref.h b/include/linux/kref.h index 6cc38fc07ab7..d4a62ab2ee5e 100644 --- a/include/linux/kref.h +++ b/include/linux/kref.h @@ -24,5 +24,7 @@ struct kref { void kref_init(struct kref *kref); void kref_get(struct kref *kref); int kref_put(struct kref *kref, void (*release) (struct kref *kref)); +int kref_sub(struct kref *kref, unsigned int count, + void (*release) (struct kref *kref)); #endif /* _KREF_H_ */ diff --git a/lib/kref.c b/lib/kref.c index d3d227a08a4b..3efb882b11db 100644 --- a/lib/kref.c +++ b/lib/kref.c @@ -62,6 +62,36 @@ int kref_put(struct kref *kref, void (*release)(struct kref *kref)) return 0; } + +/** + * kref_sub - subtract a number of refcounts for object. + * @kref: object. + * @count: Number of recounts to subtract. + * @release: pointer to the function that will clean up the object when the + * last reference to the object is released. + * This pointer is required, and it is not acceptable to pass kfree + * in as this function. + * + * Subtract @count from the refcount, and if 0, call release(). + * Return 1 if the object was removed, otherwise return 0. Beware, if this + * function returns 0, you still can not count on the kref from remaining in + * memory. Only use the return value if you want to see if the kref is now + * gone, not present. + */ +int kref_sub(struct kref *kref, unsigned int count, + void (*release)(struct kref *kref)) +{ + WARN_ON(release == NULL); + WARN_ON(release == (void (*)(struct kref *))kfree); + + if (atomic_sub_and_test((int) count, &kref->refcount)) { + release(kref); + return 1; + } + return 0; +} + EXPORT_SYMBOL(kref_init); EXPORT_SYMBOL(kref_get); EXPORT_SYMBOL(kref_put); +EXPORT_SYMBOL(kref_sub); -- cgit v1.2.3-71-gd317 From eb06acdc85585f28864261f28659157848762ee4 Mon Sep 17 00:00:00 2001 From: Sridhar Samudrala Date: Thu, 28 Oct 2010 13:10:50 +0000 Subject: macvlan: Introduce 'passthru' mode to takeover the underlying device With the current default 'vepa' mode, a KVM guest using virtio with macvtap backend has the following limitations. - cannot change/add a mac address on the guest virtio-net - cannot create a vlan device on the guest virtio-net - cannot enable promiscuous mode on guest virtio-net To address these limitations, this patch introduces a new mode called 'passthru' when creating a macvlan device which allows takeover of the underlying device and passing it to a guest using virtio with macvtap backend. Only one macvlan device is allowed in passthru mode and it inherits the mac address from the underlying device and sets it in promiscuous mode to receive and forward all the packets. Signed-off-by: Sridhar Samudrala ------------------------------------------------------------------------- Signed-off-by: David S. Miller --- drivers/net/macvlan.c | 33 ++++++++++++++++++++++++++++++++- include/linux/if_link.h | 1 + 2 files changed, 33 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c index 93f0ba25c808..6ed577b065df 100644 --- a/drivers/net/macvlan.c +++ b/drivers/net/macvlan.c @@ -38,6 +38,7 @@ struct macvlan_port { struct hlist_head vlan_hash[MACVLAN_HASH_SIZE]; struct list_head vlans; struct rcu_head rcu; + bool passthru; }; #define macvlan_port_get_rcu(dev) \ @@ -169,6 +170,7 @@ static struct sk_buff *macvlan_handle_frame(struct sk_buff *skb) macvlan_broadcast(skb, port, NULL, MACVLAN_MODE_PRIVATE | MACVLAN_MODE_VEPA | + MACVLAN_MODE_PASSTHRU| MACVLAN_MODE_BRIDGE); else if (src->mode == MACVLAN_MODE_VEPA) /* flood to everyone except source */ @@ -185,7 +187,10 @@ static struct sk_buff *macvlan_handle_frame(struct sk_buff *skb) return skb; } - vlan = macvlan_hash_lookup(port, eth->h_dest); + if (port->passthru) + vlan = list_first_entry(&port->vlans, struct macvlan_dev, list); + else + vlan = macvlan_hash_lookup(port, eth->h_dest); if (vlan == NULL) return skb; @@ -288,6 +293,11 @@ static int macvlan_open(struct net_device *dev) struct net_device *lowerdev = vlan->lowerdev; int err; + if (vlan->port->passthru) { + dev_set_promiscuity(lowerdev, 1); + goto hash_add; + } + err = -EBUSY; if (macvlan_addr_busy(vlan->port, dev->dev_addr)) goto out; @@ -300,6 +310,8 @@ static int macvlan_open(struct net_device *dev) if (err < 0) goto del_unicast; } + +hash_add: macvlan_hash_add(vlan); return 0; @@ -314,12 +326,18 @@ static int macvlan_stop(struct net_device *dev) struct macvlan_dev *vlan = netdev_priv(dev); struct net_device *lowerdev = vlan->lowerdev; + if (vlan->port->passthru) { + dev_set_promiscuity(lowerdev, -1); + goto hash_del; + } + dev_mc_unsync(lowerdev, dev); if (dev->flags & IFF_ALLMULTI) dev_set_allmulti(lowerdev, -1); dev_uc_del(lowerdev, dev->dev_addr); +hash_del: macvlan_hash_del(vlan); return 0; } @@ -559,6 +577,7 @@ static int macvlan_port_create(struct net_device *dev) if (port == NULL) return -ENOMEM; + port->passthru = false; port->dev = dev; INIT_LIST_HEAD(&port->vlans); for (i = 0; i < MACVLAN_HASH_SIZE; i++) @@ -603,6 +622,7 @@ static int macvlan_validate(struct nlattr *tb[], struct nlattr *data[]) case MACVLAN_MODE_PRIVATE: case MACVLAN_MODE_VEPA: case MACVLAN_MODE_BRIDGE: + case MACVLAN_MODE_PASSTHRU: break; default: return -EINVAL; @@ -652,6 +672,10 @@ int macvlan_common_newlink(struct net *src_net, struct net_device *dev, } port = macvlan_port_get(lowerdev); + /* Only 1 macvlan device can be created in passthru mode */ + if (port->passthru) + return -EINVAL; + vlan->lowerdev = lowerdev; vlan->dev = dev; vlan->port = port; @@ -662,6 +686,13 @@ int macvlan_common_newlink(struct net *src_net, struct net_device *dev, if (data && data[IFLA_MACVLAN_MODE]) vlan->mode = nla_get_u32(data[IFLA_MACVLAN_MODE]); + if (vlan->mode == MACVLAN_MODE_PASSTHRU) { + if (!list_empty(&port->vlans)) + return -EINVAL; + port->passthru = true; + memcpy(dev->dev_addr, lowerdev->dev_addr, ETH_ALEN); + } + err = register_netdevice(dev); if (err < 0) goto destroy_port; diff --git a/include/linux/if_link.h b/include/linux/if_link.h index 2e02e4d7b11e..6485d2a89bec 100644 --- a/include/linux/if_link.h +++ b/include/linux/if_link.h @@ -259,6 +259,7 @@ enum macvlan_mode { MACVLAN_MODE_PRIVATE = 1, /* don't talk to other macvlans */ MACVLAN_MODE_VEPA = 2, /* talk to other ports through ext bridge */ MACVLAN_MODE_BRIDGE = 4, /* talk to bridge ports directly */ + MACVLAN_MODE_PASSTHRU = 8,/* take over the underlying device */ }; /* SR-IOV virtual function management section */ -- cgit v1.2.3-71-gd317 From 23ed992a5ebe6964ebe312b54142fbc5e8185cdc Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Fri, 5 Nov 2010 18:04:52 +0100 Subject: drm/i915|intel-gtt: consolidate intel-gtt.h headers ... and a few other defines. Signed-off-by: Daniel Vetter Signed-off-by: Chris Wilson --- drivers/char/agp/intel-gtt.c | 5 ----- drivers/gpu/drm/i915/i915_gem.c | 1 - include/drm/intel-gtt.h | 12 ++++++++++++ include/linux/intel-gtt.h | 20 -------------------- 4 files changed, 12 insertions(+), 26 deletions(-) delete mode 100644 include/linux/intel-gtt.h (limited to 'include/linux') diff --git a/drivers/char/agp/intel-gtt.c b/drivers/char/agp/intel-gtt.c index 72267c801637..291ac5113576 100644 --- a/drivers/char/agp/intel-gtt.c +++ b/drivers/char/agp/intel-gtt.c @@ -24,7 +24,6 @@ #include #include "agp.h" #include "intel-agp.h" -#include #include /* @@ -39,10 +38,6 @@ #define USE_PCI_DMA_API 0 #endif -#define AGP_DCACHE_MEMORY 1 -#define AGP_PHYS_MEMORY 2 -#define INTEL_AGP_CACHED_MEMORY 3 - struct intel_gtt_driver { unsigned int gen : 8; unsigned int is_g33 : 1; diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index bf05ac414b1f..68492357658c 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -34,7 +34,6 @@ #include #include #include -#include struct change_domains { uint32_t invalidate_domains; diff --git a/include/drm/intel-gtt.h b/include/drm/intel-gtt.h index 020f8aab7e5b..9f91cbe35157 100644 --- a/include/drm/intel-gtt.h +++ b/include/drm/intel-gtt.h @@ -13,5 +13,17 @@ const struct intel_gtt { unsigned int gtt_mappable_entries; } *intel_gtt_get(void); + +/* Special gtt memory types */ +#define AGP_DCACHE_MEMORY 1 +#define AGP_PHYS_MEMORY 2 + +/* New caching attributes for gen6/sandybridge */ +#define AGP_USER_CACHED_MEMORY_LLC_MLC (AGP_USER_TYPES + 2) +#define AGP_USER_UNCACHED_MEMORY (AGP_USER_TYPES + 4) + +/* flag for GFDT type */ +#define AGP_USER_CACHED_MEMORY_GFDT (1 << 3) + #endif diff --git a/include/linux/intel-gtt.h b/include/linux/intel-gtt.h deleted file mode 100644 index 1d19ab2afa39..000000000000 --- a/include/linux/intel-gtt.h +++ /dev/null @@ -1,20 +0,0 @@ -/* - * Common Intel AGPGART and GTT definitions. - */ -#ifndef _INTEL_GTT_H -#define _INTEL_GTT_H - -#include - -/* This is for Intel only GTT controls. - * - * Sandybridge: AGP_USER_CACHED_MEMORY default to LLC only - */ - -#define AGP_USER_CACHED_MEMORY_LLC_MLC (AGP_USER_TYPES + 2) -#define AGP_USER_UNCACHED_MEMORY (AGP_USER_TYPES + 4) - -/* flag for GFDT type */ -#define AGP_USER_CACHED_MEMORY_GFDT (1 << 3) - -#endif -- cgit v1.2.3-71-gd317 From f050a8abbda0efcd597c6b1825e3b9ce4d613383 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Fri, 5 Nov 2010 18:40:56 +0100 Subject: agp: kill agp_flush_chipset and corresponding ioctl The intel drm calls the chipset functions now directly. Userspace never called the corresponding ioctl, hence it can be killed, too. Cc: Dave Airlie Signed-off-by: Daniel Vetter Signed-off-by: Chris Wilson --- drivers/char/agp/agp.h | 1 - drivers/char/agp/compat_ioctl.c | 1 - drivers/char/agp/compat_ioctl.h | 1 - drivers/char/agp/frontend.c | 8 -------- drivers/char/agp/generic.c | 7 ------- drivers/char/agp/intel-gtt.c | 6 ------ include/linux/agp_backend.h | 1 - 7 files changed, 25 deletions(-) (limited to 'include/linux') diff --git a/drivers/char/agp/agp.h b/drivers/char/agp/agp.h index 5259065f3c79..3e67ddde9e16 100644 --- a/drivers/char/agp/agp.h +++ b/drivers/char/agp/agp.h @@ -120,7 +120,6 @@ struct agp_bridge_driver { void (*agp_destroy_page)(struct page *, int flags); void (*agp_destroy_pages)(struct agp_memory *); int (*agp_type_to_mask_type) (struct agp_bridge_data *, int); - void (*chipset_flush)(struct agp_bridge_data *); }; struct agp_bridge_data { diff --git a/drivers/char/agp/compat_ioctl.c b/drivers/char/agp/compat_ioctl.c index 9d2c97a69cdd..a48e05b31593 100644 --- a/drivers/char/agp/compat_ioctl.c +++ b/drivers/char/agp/compat_ioctl.c @@ -276,7 +276,6 @@ long compat_agp_ioctl(struct file *file, unsigned int cmd, unsigned long arg) break; case AGPIOC_CHIPSET_FLUSH32: - ret_val = agpioc_chipset_flush_wrap(curr_priv); break; } diff --git a/drivers/char/agp/compat_ioctl.h b/drivers/char/agp/compat_ioctl.h index 0c9678ac0371..f30e0fd97963 100644 --- a/drivers/char/agp/compat_ioctl.h +++ b/drivers/char/agp/compat_ioctl.h @@ -102,6 +102,5 @@ void agp_free_memory_wrap(struct agp_memory *memory); struct agp_memory *agp_allocate_memory_wrap(size_t pg_count, u32 type); struct agp_memory *agp_find_mem_by_key(int key); struct agp_client *agp_find_client_by_pid(pid_t id); -int agpioc_chipset_flush_wrap(struct agp_file_private *priv); #endif /* _AGP_COMPAT_H */ diff --git a/drivers/char/agp/frontend.c b/drivers/char/agp/frontend.c index 3cb4539a98b2..2e044338753c 100644 --- a/drivers/char/agp/frontend.c +++ b/drivers/char/agp/frontend.c @@ -957,13 +957,6 @@ static int agpioc_unbind_wrap(struct agp_file_private *priv, void __user *arg) return agp_unbind_memory(memory); } -int agpioc_chipset_flush_wrap(struct agp_file_private *priv) -{ - DBG(""); - agp_flush_chipset(agp_bridge); - return 0; -} - static long agp_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { @@ -1039,7 +1032,6 @@ static long agp_ioctl(struct file *file, break; case AGPIOC_CHIPSET_FLUSH: - ret_val = agpioc_chipset_flush_wrap(curr_priv); break; } diff --git a/drivers/char/agp/generic.c b/drivers/char/agp/generic.c index 4956f1c8f9d5..78bc8de0f234 100644 --- a/drivers/char/agp/generic.c +++ b/drivers/char/agp/generic.c @@ -81,13 +81,6 @@ static int agp_get_key(void) return -1; } -void agp_flush_chipset(struct agp_bridge_data *bridge) -{ - if (bridge->driver->chipset_flush) - bridge->driver->chipset_flush(bridge); -} -EXPORT_SYMBOL(agp_flush_chipset); - /* * Use kmalloc if possible for the page list. Otherwise fall back to * vmalloc. This speeds things up and also saves memory for small AGP diff --git a/drivers/char/agp/intel-gtt.c b/drivers/char/agp/intel-gtt.c index 8e2e208c925b..1603e4f8ae73 100644 --- a/drivers/char/agp/intel-gtt.c +++ b/drivers/char/agp/intel-gtt.c @@ -984,11 +984,6 @@ static int intel_fake_agp_remove_entries(struct agp_memory *mem, return 0; } -static void intel_fake_agp_chipset_flush(struct agp_bridge_data *bridge) -{ - intel_private.driver->chipset_flush(); -} - static struct agp_memory *intel_fake_agp_alloc_by_type(size_t pg_count, int type) { @@ -1222,7 +1217,6 @@ static const struct agp_bridge_driver intel_fake_agp_driver = { .agp_alloc_pages = agp_generic_alloc_pages, .agp_destroy_page = agp_generic_destroy_page, .agp_destroy_pages = agp_generic_destroy_pages, - .chipset_flush = intel_fake_agp_chipset_flush, }; static const struct intel_gtt_driver i81x_gtt_driver = { diff --git a/include/linux/agp_backend.h b/include/linux/agp_backend.h index 09ea4a1e9505..a479b4885d25 100644 --- a/include/linux/agp_backend.h +++ b/include/linux/agp_backend.h @@ -106,6 +106,5 @@ extern int agp_rebind_memory(void); extern void agp_enable(struct agp_bridge_data *, u32); extern struct agp_bridge_data *agp_backend_acquire(struct pci_dev *); extern void agp_backend_release(struct agp_bridge_data *); -extern void agp_flush_chipset(struct agp_bridge_data *); #endif /* _AGP_BACKEND_H */ -- cgit v1.2.3-71-gd317 From cb16b67b5cb33b7d6732e0c416d29d933eea13ce Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Fri, 5 Nov 2010 22:27:10 +0100 Subject: agp: kill agp_rebind_memory Its only user, intel-gtt.c is now gone. Cc: Dave Airlie Signed-off-by: Daniel Vetter Signed-off-by: Chris Wilson --- drivers/char/agp/generic.c | 20 -------------------- include/linux/agp_backend.h | 1 - 2 files changed, 21 deletions(-) (limited to 'include/linux') diff --git a/drivers/char/agp/generic.c b/drivers/char/agp/generic.c index 78bc8de0f234..012cba0d6d96 100644 --- a/drivers/char/agp/generic.c +++ b/drivers/char/agp/generic.c @@ -480,26 +480,6 @@ int agp_unbind_memory(struct agp_memory *curr) } EXPORT_SYMBOL(agp_unbind_memory); -/** - * agp_rebind_emmory - Rewrite the entire GATT, useful on resume - */ -int agp_rebind_memory(void) -{ - struct agp_memory *curr; - int ret_val = 0; - - spin_lock(&agp_bridge->mapped_lock); - list_for_each_entry(curr, &agp_bridge->mapped_list, mapped_list) { - ret_val = curr->bridge->driver->insert_memory(curr, - curr->pg_start, - curr->type); - if (ret_val != 0) - break; - } - spin_unlock(&agp_bridge->mapped_lock); - return ret_val; -} -EXPORT_SYMBOL(agp_rebind_memory); /* End - Routines for handling swapping of agp_memory into the GATT */ diff --git a/include/linux/agp_backend.h b/include/linux/agp_backend.h index a479b4885d25..eaf6cd75a1b1 100644 --- a/include/linux/agp_backend.h +++ b/include/linux/agp_backend.h @@ -102,7 +102,6 @@ extern struct agp_memory *agp_allocate_memory(struct agp_bridge_data *, size_t, extern int agp_copy_info(struct agp_bridge_data *, struct agp_kern_info *); extern int agp_bind_memory(struct agp_memory *, off_t); extern int agp_unbind_memory(struct agp_memory *); -extern int agp_rebind_memory(void); extern void agp_enable(struct agp_bridge_data *, u32); extern struct agp_bridge_data *agp_backend_acquire(struct pci_dev *); extern void agp_backend_release(struct agp_bridge_data *); -- cgit v1.2.3-71-gd317 From 293bb1c41b728d4aa248fe8a0acd2b9066ff5c34 Mon Sep 17 00:00:00 2001 From: Giuseppe CAVALLARO Date: Wed, 24 Nov 2010 02:38:05 +0000 Subject: stmmac: add init/exit callback in plat_stmmacenet_data struct This patch adds in the plat_stmmacenet_data the init and exit callbacks that can be used for invoking specific platform functions. For example, on ST targets, these call the PAD manager functions to set PIO lines and syscfg registers. The patch removes the stmmac_claim_resource only used on STM Kernels as well. Signed-off-by: Giuseppe Cavallaro Signed-off-by: David S. Miller --- drivers/net/stmmac/stmmac.h | 22 ---------------------- drivers/net/stmmac/stmmac_main.c | 18 +++++++++++++----- include/linux/stmmac.h | 6 +++--- 3 files changed, 16 insertions(+), 30 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/stmmac/stmmac.h b/drivers/net/stmmac/stmmac.h index 31575670d862..8ae76501eb74 100644 --- a/drivers/net/stmmac/stmmac.h +++ b/drivers/net/stmmac/stmmac.h @@ -87,28 +87,6 @@ struct stmmac_priv { struct plat_stmmacenet_data *plat; }; -#ifdef CONFIG_STM_DRIVERS -#include -static inline int stmmac_claim_resource(struct platform_device *pdev) -{ - int ret = 0; - struct plat_stmmacenet_data *plat_dat = pdev->dev.platform_data; - - /* Pad routing setup */ - if (IS_ERR(devm_stm_pad_claim(&pdev->dev, plat_dat->pad_config, - dev_name(&pdev->dev)))) { - printk(KERN_ERR "%s: Failed to request pads!\n", __func__); - ret = -ENODEV; - } - return ret; -} -#else -static inline int stmmac_claim_resource(struct platform_device *pdev) -{ - return 0; -} -#endif - extern int stmmac_mdio_unregister(struct net_device *ndev); extern int stmmac_mdio_register(struct net_device *ndev); extern void stmmac_set_ethtool_ops(struct net_device *netdev); diff --git a/drivers/net/stmmac/stmmac_main.c b/drivers/net/stmmac/stmmac_main.c index 29ba28660fa9..b806cd3515b4 100644 --- a/drivers/net/stmmac/stmmac_main.c +++ b/drivers/net/stmmac/stmmac_main.c @@ -1643,7 +1643,7 @@ static int stmmac_dvr_probe(struct platform_device *pdev) struct resource *res; void __iomem *addr = NULL; struct net_device *ndev = NULL; - struct stmmac_priv *priv; + struct stmmac_priv *priv = NULL; struct plat_stmmacenet_data *plat_dat; pr_info("STMMAC driver:\n\tplatform registration... "); @@ -1708,10 +1708,12 @@ static int stmmac_dvr_probe(struct platform_device *pdev) /* Set the I/O base addr */ ndev->base_addr = (unsigned long)addr; - /* Verify embedded resource for the platform */ - ret = stmmac_claim_resource(pdev); - if (ret < 0) - goto out; + /* Custom initialisation */ + if (priv->plat->init) { + ret = priv->plat->init(pdev); + if (unlikely(ret)) + goto out; + } /* MAC HW revice detection */ ret = stmmac_mac_device_setup(ndev); @@ -1745,6 +1747,9 @@ static int stmmac_dvr_probe(struct platform_device *pdev) out: if (ret < 0) { + if (priv->plat->exit) + priv->plat->exit(pdev); + platform_set_drvdata(pdev, NULL); release_mem_region(res->start, resource_size(res)); if (addr != NULL) @@ -1778,6 +1783,9 @@ static int stmmac_dvr_remove(struct platform_device *pdev) stmmac_mdio_unregister(ndev); + if (priv->plat->exit) + priv->plat->exit(pdev); + platform_set_drvdata(pdev, NULL); unregister_netdev(ndev); diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h index d66c61774d95..e10352915698 100644 --- a/include/linux/stmmac.h +++ b/include/linux/stmmac.h @@ -40,9 +40,9 @@ struct plat_stmmacenet_data { int pmt; void (*fix_mac_speed)(void *priv, unsigned int speed); void (*bus_setup)(void __iomem *ioaddr); -#ifdef CONFIG_STM_DRIVERS - struct stm_pad_config *pad_config; -#endif + int (*init)(struct platform_device *pdev); + void (*exit)(struct platform_device *pdev); + void *custom_cfg; void *bsp_priv; }; -- cgit v1.2.3-71-gd317 From 456b61bca8ee324ab6c18b065e632c9a8c88aa39 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 23 Nov 2010 13:12:15 +0000 Subject: ipv6: mcast: RCU conversion ipv6_sk_mc_lock rwlock becomes a spinlock. readers (inet6_mc_check()) now takes rcu_read_lock() instead of read lock. Writers dont need to disable BH anymore. struct ipv6_mc_socklist objects are reclaimed after one RCU grace period. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/ipv6.h | 2 +- include/net/if_inet6.h | 3 +- net/ipv6/mcast.c | 75 +++++++++++++++++++++++++++++--------------------- 3 files changed, 47 insertions(+), 33 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index 8e429d0e0405..0c997767429a 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -364,7 +364,7 @@ struct ipv6_pinfo { __u32 dst_cookie; - struct ipv6_mc_socklist *ipv6_mc_list; + struct ipv6_mc_socklist __rcu *ipv6_mc_list; struct ipv6_ac_socklist *ipv6_ac_list; struct ipv6_fl_socklist *ipv6_fl_list; diff --git a/include/net/if_inet6.h b/include/net/if_inet6.h index f95ff8d9aa47..04977eefb0ee 100644 --- a/include/net/if_inet6.h +++ b/include/net/if_inet6.h @@ -89,10 +89,11 @@ struct ip6_sf_socklist { struct ipv6_mc_socklist { struct in6_addr addr; int ifindex; - struct ipv6_mc_socklist *next; + struct ipv6_mc_socklist __rcu *next; rwlock_t sflock; unsigned int sfmode; /* MCAST_{INCLUDE,EXCLUDE} */ struct ip6_sf_socklist *sflist; + struct rcu_head rcu; }; struct ip6_sf_list { diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c index 9c5074528a71..49f986d626a0 100644 --- a/net/ipv6/mcast.c +++ b/net/ipv6/mcast.c @@ -82,7 +82,7 @@ static void *__mld2_query_bugs[] __attribute__((__unused__)) = { static struct in6_addr mld2_all_mcr = MLD2_ALL_MCR_INIT; /* Big mc list lock for all the sockets */ -static DEFINE_RWLOCK(ipv6_sk_mc_lock); +static DEFINE_SPINLOCK(ipv6_sk_mc_lock); static void igmp6_join_group(struct ifmcaddr6 *ma); static void igmp6_leave_group(struct ifmcaddr6 *ma); @@ -123,6 +123,11 @@ int sysctl_mld_max_msf __read_mostly = IPV6_MLD_MAX_MSF; * socket join on multicast group */ +#define for_each_pmc_rcu(np, pmc) \ + for (pmc = rcu_dereference(np->ipv6_mc_list); \ + pmc != NULL; \ + pmc = rcu_dereference(pmc->next)) + int ipv6_sock_mc_join(struct sock *sk, int ifindex, const struct in6_addr *addr) { struct net_device *dev = NULL; @@ -134,15 +139,15 @@ int ipv6_sock_mc_join(struct sock *sk, int ifindex, const struct in6_addr *addr) if (!ipv6_addr_is_multicast(addr)) return -EINVAL; - read_lock_bh(&ipv6_sk_mc_lock); - for (mc_lst=np->ipv6_mc_list; mc_lst; mc_lst=mc_lst->next) { + rcu_read_lock(); + for_each_pmc_rcu(np, mc_lst) { if ((ifindex == 0 || mc_lst->ifindex == ifindex) && ipv6_addr_equal(&mc_lst->addr, addr)) { - read_unlock_bh(&ipv6_sk_mc_lock); + rcu_read_unlock(); return -EADDRINUSE; } } - read_unlock_bh(&ipv6_sk_mc_lock); + rcu_read_unlock(); mc_lst = sock_kmalloc(sk, sizeof(struct ipv6_mc_socklist), GFP_KERNEL); @@ -186,33 +191,41 @@ int ipv6_sock_mc_join(struct sock *sk, int ifindex, const struct in6_addr *addr) return err; } - write_lock_bh(&ipv6_sk_mc_lock); + spin_lock(&ipv6_sk_mc_lock); mc_lst->next = np->ipv6_mc_list; - np->ipv6_mc_list = mc_lst; - write_unlock_bh(&ipv6_sk_mc_lock); + rcu_assign_pointer(np->ipv6_mc_list, mc_lst); + spin_unlock(&ipv6_sk_mc_lock); rcu_read_unlock(); return 0; } +static void ipv6_mc_socklist_reclaim(struct rcu_head *head) +{ + kfree(container_of(head, struct ipv6_mc_socklist, rcu)); +} /* * socket leave on multicast group */ int ipv6_sock_mc_drop(struct sock *sk, int ifindex, const struct in6_addr *addr) { struct ipv6_pinfo *np = inet6_sk(sk); - struct ipv6_mc_socklist *mc_lst, **lnk; + struct ipv6_mc_socklist *mc_lst; + struct ipv6_mc_socklist __rcu **lnk; struct net *net = sock_net(sk); - write_lock_bh(&ipv6_sk_mc_lock); - for (lnk = &np->ipv6_mc_list; (mc_lst = *lnk) !=NULL ; lnk = &mc_lst->next) { + spin_lock(&ipv6_sk_mc_lock); + for (lnk = &np->ipv6_mc_list; + (mc_lst = rcu_dereference_protected(*lnk, + lockdep_is_held(&ipv6_sk_mc_lock))) !=NULL ; + lnk = &mc_lst->next) { if ((ifindex == 0 || mc_lst->ifindex == ifindex) && ipv6_addr_equal(&mc_lst->addr, addr)) { struct net_device *dev; *lnk = mc_lst->next; - write_unlock_bh(&ipv6_sk_mc_lock); + spin_unlock(&ipv6_sk_mc_lock); rcu_read_lock(); dev = dev_get_by_index_rcu(net, mc_lst->ifindex); @@ -225,11 +238,12 @@ int ipv6_sock_mc_drop(struct sock *sk, int ifindex, const struct in6_addr *addr) } else (void) ip6_mc_leave_src(sk, mc_lst, NULL); rcu_read_unlock(); - sock_kfree_s(sk, mc_lst, sizeof(*mc_lst)); + atomic_sub(sizeof(*mc_lst), &sk->sk_omem_alloc); + call_rcu(&mc_lst->rcu, ipv6_mc_socklist_reclaim); return 0; } } - write_unlock_bh(&ipv6_sk_mc_lock); + spin_unlock(&ipv6_sk_mc_lock); return -EADDRNOTAVAIL; } @@ -272,12 +286,13 @@ void ipv6_sock_mc_close(struct sock *sk) struct ipv6_mc_socklist *mc_lst; struct net *net = sock_net(sk); - write_lock_bh(&ipv6_sk_mc_lock); - while ((mc_lst = np->ipv6_mc_list) != NULL) { + spin_lock(&ipv6_sk_mc_lock); + while ((mc_lst = rcu_dereference_protected(np->ipv6_mc_list, + lockdep_is_held(&ipv6_sk_mc_lock))) != NULL) { struct net_device *dev; np->ipv6_mc_list = mc_lst->next; - write_unlock_bh(&ipv6_sk_mc_lock); + spin_unlock(&ipv6_sk_mc_lock); rcu_read_lock(); dev = dev_get_by_index_rcu(net, mc_lst->ifindex); @@ -290,11 +305,13 @@ void ipv6_sock_mc_close(struct sock *sk) } else (void) ip6_mc_leave_src(sk, mc_lst, NULL); rcu_read_unlock(); - sock_kfree_s(sk, mc_lst, sizeof(*mc_lst)); - write_lock_bh(&ipv6_sk_mc_lock); + atomic_sub(sizeof(*mc_lst), &sk->sk_omem_alloc); + call_rcu(&mc_lst->rcu, ipv6_mc_socklist_reclaim); + + spin_lock(&ipv6_sk_mc_lock); } - write_unlock_bh(&ipv6_sk_mc_lock); + spin_unlock(&ipv6_sk_mc_lock); } int ip6_mc_source(int add, int omode, struct sock *sk, @@ -328,8 +345,7 @@ int ip6_mc_source(int add, int omode, struct sock *sk, err = -EADDRNOTAVAIL; - read_lock(&ipv6_sk_mc_lock); - for (pmc=inet6->ipv6_mc_list; pmc; pmc=pmc->next) { + for_each_pmc_rcu(inet6, pmc) { if (pgsr->gsr_interface && pmc->ifindex != pgsr->gsr_interface) continue; if (ipv6_addr_equal(&pmc->addr, group)) @@ -428,7 +444,6 @@ int ip6_mc_source(int add, int omode, struct sock *sk, done: if (pmclocked) write_unlock(&pmc->sflock); - read_unlock(&ipv6_sk_mc_lock); read_unlock_bh(&idev->lock); rcu_read_unlock(); if (leavegroup) @@ -466,14 +481,13 @@ int ip6_mc_msfilter(struct sock *sk, struct group_filter *gsf) dev = idev->dev; err = 0; - read_lock(&ipv6_sk_mc_lock); if (gsf->gf_fmode == MCAST_INCLUDE && gsf->gf_numsrc == 0) { leavegroup = 1; goto done; } - for (pmc=inet6->ipv6_mc_list; pmc; pmc=pmc->next) { + for_each_pmc_rcu(inet6, pmc) { if (pmc->ifindex != gsf->gf_interface) continue; if (ipv6_addr_equal(&pmc->addr, group)) @@ -521,7 +535,6 @@ int ip6_mc_msfilter(struct sock *sk, struct group_filter *gsf) write_unlock(&pmc->sflock); err = 0; done: - read_unlock(&ipv6_sk_mc_lock); read_unlock_bh(&idev->lock); rcu_read_unlock(); if (leavegroup) @@ -562,7 +575,7 @@ int ip6_mc_msfget(struct sock *sk, struct group_filter *gsf, * so reading the list is safe. */ - for (pmc=inet6->ipv6_mc_list; pmc; pmc=pmc->next) { + for_each_pmc_rcu(inet6, pmc) { if (pmc->ifindex != gsf->gf_interface) continue; if (ipv6_addr_equal(group, &pmc->addr)) @@ -612,13 +625,13 @@ int inet6_mc_check(struct sock *sk, const struct in6_addr *mc_addr, struct ip6_sf_socklist *psl; int rv = 1; - read_lock(&ipv6_sk_mc_lock); - for (mc = np->ipv6_mc_list; mc; mc = mc->next) { + rcu_read_lock(); + for_each_pmc_rcu(np, mc) { if (ipv6_addr_equal(&mc->addr, mc_addr)) break; } if (!mc) { - read_unlock(&ipv6_sk_mc_lock); + rcu_read_unlock(); return 1; } read_lock(&mc->sflock); @@ -638,7 +651,7 @@ int inet6_mc_check(struct sock *sk, const struct in6_addr *mc_addr, rv = 0; } read_unlock(&mc->sflock); - read_unlock(&ipv6_sk_mc_lock); + rcu_read_unlock(); return rv; } -- cgit v1.2.3-71-gd317 From 3853b5841c01a3f492fe137afaad9c209e5162c6 Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Sun, 21 Nov 2010 13:17:29 +0000 Subject: xps: Improvements in TX queue selection In dev_pick_tx, don't do work in calculating queue index or setting the index in the sock unless the device has more than one queue. This allows the sock to be set only with a queue index of a multi-queue device which is desirable if device are stacked like in a tunnel. We also allow the mapping of a socket to queue to be changed. To maintain in order packet transmission a flag (ooo_okay) has been added to the sk_buff structure. If a transport layer sets this flag on a packet, the transmit queue can be changed for the socket. Presumably, the transport would set this if there was no possbility of creating OOO packets (for instance, there are no packets in flight for the socket). This patch includes the modification in TCP output for setting this flag. Signed-off-by: Tom Herbert Signed-off-by: David S. Miller --- include/linux/skbuff.h | 3 ++- net/core/dev.c | 18 +++++++++++------- net/ipv4/tcp_output.c | 5 ++++- 3 files changed, 17 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index e6ba898de61c..19f37a6ee6c4 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -386,9 +386,10 @@ struct sk_buff { #else __u8 deliver_no_wcard:1; #endif + __u8 ooo_okay:1; kmemcheck_bitfield_end(flags2); - /* 0/14 bit hole */ + /* 0/13 bit hole */ #ifdef CONFIG_NET_DMA dma_cookie_t dma_cookie; diff --git a/net/core/dev.c b/net/core/dev.c index 381b8e280162..7b17674a29ec 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2148,20 +2148,24 @@ static struct netdev_queue *dev_pick_tx(struct net_device *dev, int queue_index; const struct net_device_ops *ops = dev->netdev_ops; - if (ops->ndo_select_queue) { + if (dev->real_num_tx_queues == 1) + queue_index = 0; + else if (ops->ndo_select_queue) { queue_index = ops->ndo_select_queue(dev, skb); queue_index = dev_cap_txqueue(dev, queue_index); } else { struct sock *sk = skb->sk; queue_index = sk_tx_queue_get(sk); - if (queue_index < 0 || queue_index >= dev->real_num_tx_queues) { - queue_index = 0; - if (dev->real_num_tx_queues > 1) - queue_index = skb_tx_hash(dev, skb); + if (queue_index < 0 || skb->ooo_okay || + queue_index >= dev->real_num_tx_queues) { + int old_index = queue_index; - if (sk) { - struct dst_entry *dst = rcu_dereference_check(sk->sk_dst_cache, 1); + queue_index = skb_tx_hash(dev, skb); + + if (queue_index != old_index && sk) { + struct dst_entry *dst = + rcu_dereference_check(sk->sk_dst_cache, 1); if (dst && skb_dst(skb) == dst) sk_tx_queue_set(sk, queue_index); diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index bb8f547fc7d2..5f29b2e20e23 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -822,8 +822,11 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, &md5); tcp_header_size = tcp_options_size + sizeof(struct tcphdr); - if (tcp_packets_in_flight(tp) == 0) + if (tcp_packets_in_flight(tp) == 0) { tcp_ca_event(sk, CA_EVENT_TX_START); + skb->ooo_okay = 1; + } else + skb->ooo_okay = 0; skb_push(skb, tcp_header_size); skb_reset_transport_header(skb); -- cgit v1.2.3-71-gd317 From 1d24eb4815d1e0e8b451ecc546645f8ef1176d4f Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Sun, 21 Nov 2010 13:17:27 +0000 Subject: xps: Transmit Packet Steering This patch implements transmit packet steering (XPS) for multiqueue devices. XPS selects a transmit queue during packet transmission based on configuration. This is done by mapping the CPU transmitting the packet to a queue. This is the transmit side analogue to RPS-- where RPS is selecting a CPU based on receive queue, XPS selects a queue based on the CPU (previously there was an XPS patch from Eric Dumazet, but that might more appropriately be called transmit completion steering). Each transmit queue can be associated with a number of CPUs which will use the queue to send packets. This is configured as a CPU mask on a per queue basis in: /sys/class/net/eth/queues/tx-/xps_cpus The mappings are stored per device in an inverted data structure that maps CPUs to queues. In the netdevice structure this is an array of num_possible_cpu structures where each structure holds and array of queue_indexes for queues which that CPU can use. The benefits of XPS are improved locality in the per queue data structures. Also, transmit completions are more likely to be done nearer to the sending thread, so this should promote locality back to the socket on free (e.g. UDP). The benefits of XPS are dependent on cache hierarchy, application load, and other factors. XPS would nominally be configured so that a queue would only be shared by CPUs which are sharing a cache, the degenerative configuration woud be that each CPU has it's own queue. Below are some benchmark results which show the potential benfit of this patch. The netperf test has 500 instances of netperf TCP_RR test with 1 byte req. and resp. bnx2x on 16 core AMD XPS (16 queues, 1 TX queue per CPU) 1234K at 100% CPU No XPS (16 queues) 996K at 100% CPU Signed-off-by: Tom Herbert Signed-off-by: David S. Miller --- include/linux/netdevice.h | 30 ++++ net/core/dev.c | 53 ++++++- net/core/net-sysfs.c | 369 +++++++++++++++++++++++++++++++++++++++++++++- net/core/net-sysfs.h | 3 + 4 files changed, 447 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index b45c1b8b1d19..badf9285fe0d 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -503,6 +503,10 @@ struct netdev_queue { struct Qdisc *qdisc; unsigned long state; struct Qdisc *qdisc_sleeping; +#ifdef CONFIG_RPS + struct kobject kobj; +#endif + /* * write mostly part */ @@ -529,6 +533,30 @@ struct rps_map { }; #define RPS_MAP_SIZE(_num) (sizeof(struct rps_map) + (_num * sizeof(u16))) +/* + * This structure holds an XPS map which can be of variable length. The + * map is an array of queues. + */ +struct xps_map { + unsigned int len; + unsigned int alloc_len; + struct rcu_head rcu; + u16 queues[0]; +}; +#define XPS_MAP_SIZE(_num) (sizeof(struct xps_map) + (_num * sizeof(u16))) +#define XPS_MIN_MAP_ALLOC ((L1_CACHE_BYTES - sizeof(struct xps_map)) \ + / sizeof(u16)) + +/* + * This structure holds all XPS maps for device. Maps are indexed by CPU. + */ +struct xps_dev_maps { + struct rcu_head rcu; + struct xps_map *cpu_map[0]; +}; +#define XPS_DEV_MAPS_SIZE (sizeof(struct xps_dev_maps) + \ + (nr_cpu_ids * sizeof(struct xps_map *))) + /* * The rps_dev_flow structure contains the mapping of a flow to a CPU and the * tail pointer for that CPU's input queue at the time of last enqueue. @@ -1016,6 +1044,8 @@ struct net_device { unsigned long tx_queue_len; /* Max frames per queue allowed */ spinlock_t tx_global_lock; + struct xps_dev_maps *xps_maps; + /* These may be needed for future network-power-down code. */ /* diff --git a/net/core/dev.c b/net/core/dev.c index 7b17674a29ec..c852f0038a08 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1557,12 +1557,16 @@ static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev) */ int netif_set_real_num_tx_queues(struct net_device *dev, unsigned int txq) { + int rc; + if (txq < 1 || txq > dev->num_tx_queues) return -EINVAL; if (dev->reg_state == NETREG_REGISTERED) { ASSERT_RTNL(); + rc = netdev_queue_update_kobjects(dev, dev->real_num_tx_queues, + txq); if (txq < dev->real_num_tx_queues) qdisc_reset_all_tx_gt(dev, txq); } @@ -2142,6 +2146,44 @@ static inline u16 dev_cap_txqueue(struct net_device *dev, u16 queue_index) return queue_index; } +static inline int get_xps_queue(struct net_device *dev, struct sk_buff *skb) +{ +#ifdef CONFIG_RPS + struct xps_dev_maps *dev_maps; + struct xps_map *map; + int queue_index = -1; + + rcu_read_lock(); + dev_maps = rcu_dereference(dev->xps_maps); + if (dev_maps) { + map = rcu_dereference( + dev_maps->cpu_map[raw_smp_processor_id()]); + if (map) { + if (map->len == 1) + queue_index = map->queues[0]; + else { + u32 hash; + if (skb->sk && skb->sk->sk_hash) + hash = skb->sk->sk_hash; + else + hash = (__force u16) skb->protocol ^ + skb->rxhash; + hash = jhash_1word(hash, hashrnd); + queue_index = map->queues[ + ((u64)hash * map->len) >> 32]; + } + if (unlikely(queue_index >= dev->real_num_tx_queues)) + queue_index = -1; + } + } + rcu_read_unlock(); + + return queue_index; +#else + return -1; +#endif +} + static struct netdev_queue *dev_pick_tx(struct net_device *dev, struct sk_buff *skb) { @@ -2161,7 +2203,9 @@ static struct netdev_queue *dev_pick_tx(struct net_device *dev, queue_index >= dev->real_num_tx_queues) { int old_index = queue_index; - queue_index = skb_tx_hash(dev, skb); + queue_index = get_xps_queue(dev, skb); + if (queue_index < 0) + queue_index = skb_tx_hash(dev, skb); if (queue_index != old_index && sk) { struct dst_entry *dst = @@ -5066,6 +5110,7 @@ static int netif_alloc_netdev_queues(struct net_device *dev) { unsigned int count = dev->num_tx_queues; struct netdev_queue *tx; + int i; BUG_ON(count < 1); @@ -5076,6 +5121,10 @@ static int netif_alloc_netdev_queues(struct net_device *dev) return -ENOMEM; } dev->_tx = tx; + + for (i = 0; i < count; i++) + tx[i].dev = dev; + return 0; } @@ -5083,8 +5132,6 @@ static void netdev_init_one_queue(struct net_device *dev, struct netdev_queue *queue, void *_unused) { - queue->dev = dev; - /* Initialize queue lock */ spin_lock_init(&queue->_xmit_lock); netdev_set_xmit_lockdep_class(&queue->_xmit_lock, dev->type); diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index 7abeb7ceaa4c..68dbbfdee274 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -772,18 +772,377 @@ net_rx_queue_update_kobjects(struct net_device *net, int old_num, int new_num) return error; } -static int rx_queue_register_kobjects(struct net_device *net) +/* + * netdev_queue sysfs structures and functions. + */ +struct netdev_queue_attribute { + struct attribute attr; + ssize_t (*show)(struct netdev_queue *queue, + struct netdev_queue_attribute *attr, char *buf); + ssize_t (*store)(struct netdev_queue *queue, + struct netdev_queue_attribute *attr, const char *buf, size_t len); +}; +#define to_netdev_queue_attr(_attr) container_of(_attr, \ + struct netdev_queue_attribute, attr) + +#define to_netdev_queue(obj) container_of(obj, struct netdev_queue, kobj) + +static ssize_t netdev_queue_attr_show(struct kobject *kobj, + struct attribute *attr, char *buf) +{ + struct netdev_queue_attribute *attribute = to_netdev_queue_attr(attr); + struct netdev_queue *queue = to_netdev_queue(kobj); + + if (!attribute->show) + return -EIO; + + return attribute->show(queue, attribute, buf); +} + +static ssize_t netdev_queue_attr_store(struct kobject *kobj, + struct attribute *attr, + const char *buf, size_t count) +{ + struct netdev_queue_attribute *attribute = to_netdev_queue_attr(attr); + struct netdev_queue *queue = to_netdev_queue(kobj); + + if (!attribute->store) + return -EIO; + + return attribute->store(queue, attribute, buf, count); +} + +static const struct sysfs_ops netdev_queue_sysfs_ops = { + .show = netdev_queue_attr_show, + .store = netdev_queue_attr_store, +}; + +static inline unsigned int get_netdev_queue_index(struct netdev_queue *queue) { + struct net_device *dev = queue->dev; + int i; + + for (i = 0; i < dev->num_tx_queues; i++) + if (queue == &dev->_tx[i]) + break; + + BUG_ON(i >= dev->num_tx_queues); + + return i; +} + + +static ssize_t show_xps_map(struct netdev_queue *queue, + struct netdev_queue_attribute *attribute, char *buf) +{ + struct net_device *dev = queue->dev; + struct xps_dev_maps *dev_maps; + cpumask_var_t mask; + unsigned long index; + size_t len = 0; + int i; + + if (!zalloc_cpumask_var(&mask, GFP_KERNEL)) + return -ENOMEM; + + index = get_netdev_queue_index(queue); + + rcu_read_lock(); + dev_maps = rcu_dereference(dev->xps_maps); + if (dev_maps) { + for_each_possible_cpu(i) { + struct xps_map *map = + rcu_dereference(dev_maps->cpu_map[i]); + if (map) { + int j; + for (j = 0; j < map->len; j++) { + if (map->queues[j] == index) { + cpumask_set_cpu(i, mask); + break; + } + } + } + } + } + rcu_read_unlock(); + + len += cpumask_scnprintf(buf + len, PAGE_SIZE, mask); + if (PAGE_SIZE - len < 3) { + free_cpumask_var(mask); + return -EINVAL; + } + + free_cpumask_var(mask); + len += sprintf(buf + len, "\n"); + return len; +} + +static void xps_map_release(struct rcu_head *rcu) +{ + struct xps_map *map = container_of(rcu, struct xps_map, rcu); + + kfree(map); +} + +static void xps_dev_maps_release(struct rcu_head *rcu) +{ + struct xps_dev_maps *dev_maps = + container_of(rcu, struct xps_dev_maps, rcu); + + kfree(dev_maps); +} + +static DEFINE_MUTEX(xps_map_mutex); + +static ssize_t store_xps_map(struct netdev_queue *queue, + struct netdev_queue_attribute *attribute, + const char *buf, size_t len) +{ + struct net_device *dev = queue->dev; + cpumask_var_t mask; + int err, i, cpu, pos, map_len, alloc_len, need_set; + unsigned long index; + struct xps_map *map, *new_map; + struct xps_dev_maps *dev_maps, *new_dev_maps; + int nonempty = 0; + + if (!capable(CAP_NET_ADMIN)) + return -EPERM; + + if (!alloc_cpumask_var(&mask, GFP_KERNEL)) + return -ENOMEM; + + index = get_netdev_queue_index(queue); + + err = bitmap_parse(buf, len, cpumask_bits(mask), nr_cpumask_bits); + if (err) { + free_cpumask_var(mask); + return err; + } + + new_dev_maps = kzalloc(max_t(unsigned, + XPS_DEV_MAPS_SIZE, L1_CACHE_BYTES), GFP_KERNEL); + if (!new_dev_maps) { + free_cpumask_var(mask); + return -ENOMEM; + } + + mutex_lock(&xps_map_mutex); + + dev_maps = dev->xps_maps; + + for_each_possible_cpu(cpu) { + new_map = map = dev_maps ? dev_maps->cpu_map[cpu] : NULL; + + if (map) { + for (pos = 0; pos < map->len; pos++) + if (map->queues[pos] == index) + break; + map_len = map->len; + alloc_len = map->alloc_len; + } else + pos = map_len = alloc_len = 0; + + need_set = cpu_isset(cpu, *mask) && cpu_online(cpu); + + if (need_set && pos >= map_len) { + /* Need to add queue to this CPU's map */ + if (map_len >= alloc_len) { + alloc_len = alloc_len ? + 2 * alloc_len : XPS_MIN_MAP_ALLOC; + new_map = kzalloc(XPS_MAP_SIZE(alloc_len), + GFP_KERNEL); + if (!new_map) + goto error; + new_map->alloc_len = alloc_len; + for (i = 0; i < map_len; i++) + new_map->queues[i] = map->queues[i]; + new_map->len = map_len; + } + new_map->queues[new_map->len++] = index; + } else if (!need_set && pos < map_len) { + /* Need to remove queue from this CPU's map */ + if (map_len > 1) + new_map->queues[pos] = + new_map->queues[--new_map->len]; + else + new_map = NULL; + } + new_dev_maps->cpu_map[cpu] = new_map; + } + + /* Cleanup old maps */ + for_each_possible_cpu(cpu) { + map = dev_maps ? dev_maps->cpu_map[cpu] : NULL; + if (map && new_dev_maps->cpu_map[cpu] != map) + call_rcu(&map->rcu, xps_map_release); + if (new_dev_maps->cpu_map[cpu]) + nonempty = 1; + } + + if (nonempty) + rcu_assign_pointer(dev->xps_maps, new_dev_maps); + else { + kfree(new_dev_maps); + rcu_assign_pointer(dev->xps_maps, NULL); + } + + if (dev_maps) + call_rcu(&dev_maps->rcu, xps_dev_maps_release); + + mutex_unlock(&xps_map_mutex); + + free_cpumask_var(mask); + return len; + +error: + mutex_unlock(&xps_map_mutex); + + if (new_dev_maps) + for_each_possible_cpu(i) + kfree(new_dev_maps->cpu_map[i]); + kfree(new_dev_maps); + free_cpumask_var(mask); + return -ENOMEM; +} + +static struct netdev_queue_attribute xps_cpus_attribute = + __ATTR(xps_cpus, S_IRUGO | S_IWUSR, show_xps_map, store_xps_map); + +static struct attribute *netdev_queue_default_attrs[] = { + &xps_cpus_attribute.attr, + NULL +}; + +static void netdev_queue_release(struct kobject *kobj) +{ + struct netdev_queue *queue = to_netdev_queue(kobj); + struct net_device *dev = queue->dev; + struct xps_dev_maps *dev_maps; + struct xps_map *map; + unsigned long index; + int i, pos, nonempty = 0; + + index = get_netdev_queue_index(queue); + + mutex_lock(&xps_map_mutex); + dev_maps = dev->xps_maps; + + if (dev_maps) { + for_each_possible_cpu(i) { + map = dev_maps->cpu_map[i]; + if (!map) + continue; + + for (pos = 0; pos < map->len; pos++) + if (map->queues[pos] == index) + break; + + if (pos < map->len) { + if (map->len > 1) + map->queues[pos] = + map->queues[--map->len]; + else { + RCU_INIT_POINTER(dev_maps->cpu_map[i], + NULL); + call_rcu(&map->rcu, xps_map_release); + map = NULL; + } + } + if (map) + nonempty = 1; + } + + if (!nonempty) { + RCU_INIT_POINTER(dev->xps_maps, NULL); + call_rcu(&dev_maps->rcu, xps_dev_maps_release); + } + } + + mutex_unlock(&xps_map_mutex); + + memset(kobj, 0, sizeof(*kobj)); + dev_put(queue->dev); +} + +static struct kobj_type netdev_queue_ktype = { + .sysfs_ops = &netdev_queue_sysfs_ops, + .release = netdev_queue_release, + .default_attrs = netdev_queue_default_attrs, +}; + +static int netdev_queue_add_kobject(struct net_device *net, int index) +{ + struct netdev_queue *queue = net->_tx + index; + struct kobject *kobj = &queue->kobj; + int error = 0; + + kobj->kset = net->queues_kset; + error = kobject_init_and_add(kobj, &netdev_queue_ktype, NULL, + "tx-%u", index); + if (error) { + kobject_put(kobj); + return error; + } + + kobject_uevent(kobj, KOBJ_ADD); + dev_hold(queue->dev); + + return error; +} + +int +netdev_queue_update_kobjects(struct net_device *net, int old_num, int new_num) +{ + int i; + int error = 0; + + for (i = old_num; i < new_num; i++) { + error = netdev_queue_add_kobject(net, i); + if (error) { + new_num = old_num; + break; + } + } + + while (--i >= new_num) + kobject_put(&net->_tx[i].kobj); + + return error; +} + +static int register_queue_kobjects(struct net_device *net) +{ + int error = 0, txq = 0, rxq = 0; + net->queues_kset = kset_create_and_add("queues", NULL, &net->dev.kobj); if (!net->queues_kset) return -ENOMEM; - return net_rx_queue_update_kobjects(net, 0, net->real_num_rx_queues); + + error = net_rx_queue_update_kobjects(net, 0, net->real_num_rx_queues); + if (error) + goto error; + rxq = net->real_num_rx_queues; + + error = netdev_queue_update_kobjects(net, 0, + net->real_num_tx_queues); + if (error) + goto error; + txq = net->real_num_tx_queues; + + return 0; + +error: + netdev_queue_update_kobjects(net, txq, 0); + net_rx_queue_update_kobjects(net, rxq, 0); + return error; } -static void rx_queue_remove_kobjects(struct net_device *net) +static void remove_queue_kobjects(struct net_device *net) { net_rx_queue_update_kobjects(net, net->real_num_rx_queues, 0); + netdev_queue_update_kobjects(net, net->real_num_tx_queues, 0); kset_unregister(net->queues_kset); } #endif /* CONFIG_RPS */ @@ -886,7 +1245,7 @@ void netdev_unregister_kobject(struct net_device * net) kobject_get(&dev->kobj); #ifdef CONFIG_RPS - rx_queue_remove_kobjects(net); + remove_queue_kobjects(net); #endif device_del(dev); @@ -927,7 +1286,7 @@ int netdev_register_kobject(struct net_device *net) return error; #ifdef CONFIG_RPS - error = rx_queue_register_kobjects(net); + error = register_queue_kobjects(net); if (error) { device_del(dev); return error; diff --git a/net/core/net-sysfs.h b/net/core/net-sysfs.h index 778e1571548d..25ec2ee57df7 100644 --- a/net/core/net-sysfs.h +++ b/net/core/net-sysfs.h @@ -6,6 +6,9 @@ int netdev_register_kobject(struct net_device *); void netdev_unregister_kobject(struct net_device *); #ifdef CONFIG_RPS int net_rx_queue_update_kobjects(struct net_device *, int old_num, int new_num); +int netdev_queue_update_kobjects(struct net_device *net, + int old_num, int new_num); + #endif #endif -- cgit v1.2.3-71-gd317 From ccb14354017272ddac002e859a2711610b6af174 Mon Sep 17 00:00:00 2001 From: "John W. Linville" Date: Wed, 24 Nov 2010 16:18:36 -0500 Subject: Revert "nl80211/mac80211: Report signal average" This reverts commit 86107fd170bc379869250eb7e1bd393a3a70e8ae. This patch inadvertantly changed the userland ABI. Signed-off-by: John W. Linville --- include/linux/nl80211.h | 2 -- include/net/cfg80211.h | 4 ---- net/mac80211/Kconfig | 1 - net/mac80211/cfg.c | 3 +-- net/mac80211/rx.c | 1 - net/mac80211/sta_info.c | 2 -- net/mac80211/sta_info.h | 3 --- net/wireless/nl80211.c | 3 --- 8 files changed, 1 insertion(+), 18 deletions(-) (limited to 'include/linux') diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h index 1ce3775e9e26..037b4e498890 100644 --- a/include/linux/nl80211.h +++ b/include/linux/nl80211.h @@ -1161,7 +1161,6 @@ enum nl80211_rate_info { * @__NL80211_STA_INFO_AFTER_LAST: internal * @NL80211_STA_INFO_MAX: highest possible station info attribute * @NL80211_STA_INFO_SIGNAL: signal strength of last received PPDU (u8, dBm) - * @NL80211_STA_INFO_SIGNAL_AVG: signal strength average (u8, dBm) * @NL80211_STA_INFO_TX_BITRATE: current unicast tx rate, nested attribute * containing info as possible, see &enum nl80211_sta_info_txrate. * @NL80211_STA_INFO_RX_PACKETS: total received packet (u32, from this station) @@ -1179,7 +1178,6 @@ enum nl80211_sta_info { NL80211_STA_INFO_PLID, NL80211_STA_INFO_PLINK_STATE, NL80211_STA_INFO_SIGNAL, - NL80211_STA_INFO_SIGNAL_AVG, NL80211_STA_INFO_TX_BITRATE, NL80211_STA_INFO_RX_PACKETS, NL80211_STA_INFO_TX_PACKETS, diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 69e2364889f1..8fd9eebd0cc9 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -424,7 +424,6 @@ struct station_parameters { * @STATION_INFO_TX_RETRIES: @tx_retries filled * @STATION_INFO_TX_FAILED: @tx_failed filled * @STATION_INFO_RX_DROP_MISC: @rx_dropped_misc filled - * @STATION_INFO_SIGNAL_AVG: @signal_avg filled */ enum station_info_flags { STATION_INFO_INACTIVE_TIME = 1<<0, @@ -440,7 +439,6 @@ enum station_info_flags { STATION_INFO_TX_RETRIES = 1<<10, STATION_INFO_TX_FAILED = 1<<11, STATION_INFO_RX_DROP_MISC = 1<<12, - STATION_INFO_SIGNAL_AVG = 1<<13, }; /** @@ -487,7 +485,6 @@ struct rate_info { * @plid: mesh peer link id * @plink_state: mesh peer link state * @signal: signal strength of last received packet in dBm - * @signal_avg: signal strength average in dBm * @txrate: current unicast bitrate to this station * @rx_packets: packets received from this station * @tx_packets: packets transmitted to this station @@ -508,7 +505,6 @@ struct station_info { u16 plid; u8 plink_state; s8 signal; - s8 signal_avg; struct rate_info txrate; u32 rx_packets; u32 tx_packets; diff --git a/net/mac80211/Kconfig b/net/mac80211/Kconfig index 798d9b9462e2..4d6f8653ec88 100644 --- a/net/mac80211/Kconfig +++ b/net/mac80211/Kconfig @@ -6,7 +6,6 @@ config MAC80211 select CRYPTO_ARC4 select CRYPTO_AES select CRC32 - select AVERAGE ---help--- This option enables the hardware independent IEEE 802.11 networking stack. diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 92c9cf6a7d1c..0c544074479e 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -343,9 +343,8 @@ static void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo) if ((sta->local->hw.flags & IEEE80211_HW_SIGNAL_DBM) || (sta->local->hw.flags & IEEE80211_HW_SIGNAL_UNSPEC)) { - sinfo->filled |= STATION_INFO_SIGNAL | STATION_INFO_SIGNAL_AVG; + sinfo->filled |= STATION_INFO_SIGNAL; sinfo->signal = (s8)sta->last_signal; - sinfo->signal_avg = (s8) -ewma_read(&sta->avg_signal); } sinfo->txrate.flags = 0; diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 9dd60a74181f..d2fcd22ab06d 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -1156,7 +1156,6 @@ ieee80211_rx_h_sta_process(struct ieee80211_rx_data *rx) sta->rx_fragments++; sta->rx_bytes += rx->skb->len; sta->last_signal = status->signal; - ewma_add(&sta->avg_signal, -status->signal); /* * Change STA power saving mode only at the end of a frame diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index f43fca8907f7..eff58571fd7e 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -244,8 +244,6 @@ struct sta_info *sta_info_alloc(struct ieee80211_sub_if_data *sdata, sta->local = local; sta->sdata = sdata; - ewma_init(&sta->avg_signal, 1000, 8); - if (sta_prepare_rate_control(local, sta, gfp)) { kfree(sta); return NULL; diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h index 84062e2c782c..9265acadef32 100644 --- a/net/mac80211/sta_info.h +++ b/net/mac80211/sta_info.h @@ -13,7 +13,6 @@ #include #include #include -#include #include "key.h" /** @@ -225,7 +224,6 @@ enum plink_state { * @rx_fragments: number of received MPDUs * @rx_dropped: number of dropped MPDUs from this STA * @last_signal: signal of last received frame from this STA - * @avg_signal: moving average of signal of received frames from this STA * @last_seq_ctrl: last received seq/frag number from this STA (per RX queue) * @tx_filtered_count: number of frames the hardware filtered for this STA * @tx_retry_failed: number of frames that failed retry @@ -293,7 +291,6 @@ struct sta_info { unsigned long rx_fragments; unsigned long rx_dropped; int last_signal; - struct ewma avg_signal; __le16 last_seq_ctrl[NUM_RX_DATA_QUEUES]; /* Updated from TX status path only, no locking requirements */ diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index d06a40d17002..605553842226 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -1872,9 +1872,6 @@ static int nl80211_send_station(struct sk_buff *msg, u32 pid, u32 seq, if (sinfo->filled & STATION_INFO_SIGNAL) NLA_PUT_U8(msg, NL80211_STA_INFO_SIGNAL, sinfo->signal); - if (sinfo->filled & STATION_INFO_SIGNAL_AVG) - NLA_PUT_U8(msg, NL80211_STA_INFO_SIGNAL_AVG, - sinfo->signal_avg); if (sinfo->filled & STATION_INFO_TX_BITRATE) { txrate = nla_nest_start(msg, NL80211_STA_INFO_TX_BITRATE); if (!txrate) -- cgit v1.2.3-71-gd317 From c063dbf52b998b852122dff07a8b8dd430b38437 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 24 Nov 2010 08:10:05 +0100 Subject: cfg80211: allow using CQM event to notify packet loss This adds the ability for drivers to use CQM events to notify about packet loss for specific stations (which could be the AP for the managed mode case). Since the threshold might be determined by the driver (it isn't passed in right now) it will be passed out of the driver to userspace in the event. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- include/linux/nl80211.h | 3 +++ include/net/cfg80211.h | 12 ++++++++++++ net/wireless/mlme.c | 12 ++++++++++++ net/wireless/nl80211.c | 45 +++++++++++++++++++++++++++++++++++++++++++++ net/wireless/nl80211.h | 4 ++++ 5 files changed, 76 insertions(+) (limited to 'include/linux') diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h index 037b4e498890..d706bf3badc8 100644 --- a/include/linux/nl80211.h +++ b/include/linux/nl80211.h @@ -1819,6 +1819,8 @@ enum nl80211_ps_state { * the minimum amount the RSSI level must change after an event before a * new event may be issued (to reduce effects of RSSI oscillation). * @NL80211_ATTR_CQM_RSSI_THRESHOLD_EVENT: RSSI threshold event + * @NL80211_ATTR_CQM_PKT_LOSS_EVENT: a u32 value indicating that this many + * consecutive packets were not acknowledged by the peer * @__NL80211_ATTR_CQM_AFTER_LAST: internal * @NL80211_ATTR_CQM_MAX: highest key attribute */ @@ -1827,6 +1829,7 @@ enum nl80211_attr_cqm { NL80211_ATTR_CQM_RSSI_THOLD, NL80211_ATTR_CQM_RSSI_HYST, NL80211_ATTR_CQM_RSSI_THRESHOLD_EVENT, + NL80211_ATTR_CQM_PKT_LOSS_EVENT, /* keep last */ __NL80211_ATTR_CQM_AFTER_LAST, diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index dd4c43f512e2..0663945cfa48 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -2601,6 +2601,18 @@ void cfg80211_cqm_rssi_notify(struct net_device *dev, enum nl80211_cqm_rssi_threshold_event rssi_event, gfp_t gfp); +/** + * cfg80211_cqm_pktloss_notify - notify userspace about packetloss to peer + * @dev: network device + * @peer: peer's MAC address + * @num_packets: how many packets were lost -- should be a fixed threshold + * but probably no less than maybe 50, or maybe a throughput dependent + * threshold (to account for temporary interference) + * @gfp: context flags + */ +void cfg80211_cqm_pktloss_notify(struct net_device *dev, + const u8 *peer, u32 num_packets, gfp_t gfp); + /* Logging, debugging and troubleshooting/diagnostic helpers. */ /* wiphy_printk helpers, similar to dev_printk */ diff --git a/net/wireless/mlme.c b/net/wireless/mlme.c index 26838d903b9a..6980a0c315b2 100644 --- a/net/wireless/mlme.c +++ b/net/wireless/mlme.c @@ -1028,3 +1028,15 @@ void cfg80211_cqm_rssi_notify(struct net_device *dev, nl80211_send_cqm_rssi_notify(rdev, dev, rssi_event, gfp); } EXPORT_SYMBOL(cfg80211_cqm_rssi_notify); + +void cfg80211_cqm_pktloss_notify(struct net_device *dev, + const u8 *peer, u32 num_packets, gfp_t gfp) +{ + struct wireless_dev *wdev = dev->ieee80211_ptr; + struct wiphy *wiphy = wdev->wiphy; + struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); + + /* Indicate roaming trigger event to user space */ + nl80211_send_cqm_pktloss_notify(rdev, dev, peer, num_packets, gfp); +} +EXPORT_SYMBOL(cfg80211_cqm_pktloss_notify); diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 8734efa663d1..67ff7e92cb99 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -5715,6 +5715,51 @@ nl80211_send_cqm_rssi_notify(struct cfg80211_registered_device *rdev, nlmsg_free(msg); } +void +nl80211_send_cqm_pktloss_notify(struct cfg80211_registered_device *rdev, + struct net_device *netdev, const u8 *peer, + u32 num_packets, gfp_t gfp) +{ + struct sk_buff *msg; + struct nlattr *pinfoattr; + void *hdr; + + msg = nlmsg_new(NLMSG_GOODSIZE, gfp); + if (!msg) + return; + + hdr = nl80211hdr_put(msg, 0, 0, 0, NL80211_CMD_NOTIFY_CQM); + if (!hdr) { + nlmsg_free(msg); + return; + } + + NLA_PUT_U32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx); + NLA_PUT_U32(msg, NL80211_ATTR_IFINDEX, netdev->ifindex); + NLA_PUT(msg, NL80211_ATTR_MAC, ETH_ALEN, peer); + + pinfoattr = nla_nest_start(msg, NL80211_ATTR_CQM); + if (!pinfoattr) + goto nla_put_failure; + + NLA_PUT_U32(msg, NL80211_ATTR_CQM_PKT_LOSS_EVENT, num_packets); + + nla_nest_end(msg, pinfoattr); + + if (genlmsg_end(msg, hdr) < 0) { + nlmsg_free(msg); + return; + } + + genlmsg_multicast_netns(wiphy_net(&rdev->wiphy), msg, 0, + nl80211_mlme_mcgrp.id, gfp); + return; + + nla_put_failure: + genlmsg_cancel(msg, hdr); + nlmsg_free(msg); +} + static int nl80211_netlink_notify(struct notifier_block * nb, unsigned long state, void *_notify) diff --git a/net/wireless/nl80211.h b/net/wireless/nl80211.h index 30d2f939150d..16c2f7190768 100644 --- a/net/wireless/nl80211.h +++ b/net/wireless/nl80211.h @@ -87,5 +87,9 @@ nl80211_send_cqm_rssi_notify(struct cfg80211_registered_device *rdev, struct net_device *netdev, enum nl80211_cqm_rssi_threshold_event rssi_event, gfp_t gfp); +void +nl80211_send_cqm_pktloss_notify(struct cfg80211_registered_device *rdev, + struct net_device *netdev, const u8 *peer, + u32 num_packets, gfp_t gfp); #endif /* __NET_WIRELESS_NL80211_H */ -- cgit v1.2.3-71-gd317 From a782d688e9c6f9ca9a7a9a28e8e2876969ddef53 Mon Sep 17 00:00:00 2001 From: Guennadi Liakhovetski Date: Wed, 24 Nov 2010 10:05:22 +0000 Subject: mmc: sh_mmcif: add DMA support The MMCIF controller on sh-mobile platforms can use the DMA controller for data transfers. Interface to the SH dmaengine driver to enable DMA. We also have to lower the maximum number of segments to match with the number od DMA descriptors on SuperH, this doesn't significantly affect driver's PIO performance. Signed-off-by: Guennadi Liakhovetski Signed-off-by: Paul Mundt --- drivers/mmc/host/Kconfig | 6 ++ drivers/mmc/host/sh_mmcif.c | 246 ++++++++++++++++++++++++++++++++++++++++++- include/linux/mmc/sh_mmcif.h | 15 ++- 3 files changed, 258 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/drivers/mmc/host/Kconfig b/drivers/mmc/host/Kconfig index d618e8673996..859e352d0b5f 100644 --- a/drivers/mmc/host/Kconfig +++ b/drivers/mmc/host/Kconfig @@ -466,6 +466,12 @@ config MMC_SH_MMCIF This driver supports MMCIF in sh7724/sh7757/sh7372. +config SH_MMCIF_DMA + bool "Use DMA for MMCIF" + depends on MMC_SH_MMCIF + help + Use SH dma-engine driver for data transfer + config MMC_JZ4740 tristate "JZ4740 SD/Multimedia Card Interface support" depends on MACH_JZ4740 diff --git a/drivers/mmc/host/sh_mmcif.c b/drivers/mmc/host/sh_mmcif.c index b2f261cdaec1..d09a2b38eeeb 100644 --- a/drivers/mmc/host/sh_mmcif.c +++ b/drivers/mmc/host/sh_mmcif.c @@ -20,12 +20,14 @@ #include #include #include +#include #include #include #include #include #include #include +#include #include #define DRIVER_NAME "sh_mmcif" @@ -162,8 +164,13 @@ struct sh_mmcif_host { long timeout; void __iomem *addr; struct completion intr_wait; -}; + /* DMA support */ + struct dma_chan *chan_rx; + struct dma_chan *chan_tx; + struct completion dma_complete; + unsigned int dma_sglen; +}; static inline void sh_mmcif_bitset(struct sh_mmcif_host *host, unsigned int reg, u32 val) @@ -177,6 +184,208 @@ static inline void sh_mmcif_bitclr(struct sh_mmcif_host *host, writel(~val & readl(host->addr + reg), host->addr + reg); } +#ifdef CONFIG_SH_MMCIF_DMA +static void mmcif_dma_complete(void *arg) +{ + struct sh_mmcif_host *host = arg; + dev_dbg(&host->pd->dev, "Command completed\n"); + + if (WARN(!host->data, "%s: NULL data in DMA completion!\n", + dev_name(&host->pd->dev))) + return; + + if (host->data->flags & MMC_DATA_READ) + dma_unmap_sg(&host->pd->dev, host->data->sg, host->dma_sglen, + DMA_FROM_DEVICE); + else + dma_unmap_sg(&host->pd->dev, host->data->sg, host->dma_sglen, + DMA_TO_DEVICE); + + complete(&host->dma_complete); +} + +static void sh_mmcif_start_dma_rx(struct sh_mmcif_host *host) +{ + struct scatterlist *sg = host->data->sg; + struct dma_async_tx_descriptor *desc = NULL; + struct dma_chan *chan = host->chan_rx; + dma_cookie_t cookie = -EINVAL; + int ret; + + ret = dma_map_sg(&host->pd->dev, sg, host->data->sg_len, DMA_FROM_DEVICE); + if (ret > 0) { + host->dma_sglen = ret; + desc = chan->device->device_prep_slave_sg(chan, sg, ret, + DMA_FROM_DEVICE, DMA_PREP_INTERRUPT | DMA_CTRL_ACK); + } + + if (desc) { + desc->callback = mmcif_dma_complete; + desc->callback_param = host; + cookie = desc->tx_submit(desc); + if (cookie < 0) { + desc = NULL; + ret = cookie; + } else { + sh_mmcif_bitset(host, MMCIF_CE_BUF_ACC, BUF_ACC_DMAREN); + chan->device->device_issue_pending(chan); + } + } + dev_dbg(&host->pd->dev, "%s(): mapped %d -> %d, cookie %d\n", + __func__, host->data->sg_len, ret, cookie); + + if (!desc) { + /* DMA failed, fall back to PIO */ + if (ret >= 0) + ret = -EIO; + host->chan_rx = NULL; + host->dma_sglen = 0; + dma_release_channel(chan); + /* Free the Tx channel too */ + chan = host->chan_tx; + if (chan) { + host->chan_tx = NULL; + dma_release_channel(chan); + } + dev_warn(&host->pd->dev, + "DMA failed: %d, falling back to PIO\n", ret); + sh_mmcif_bitclr(host, MMCIF_CE_BUF_ACC, BUF_ACC_DMAREN | BUF_ACC_DMAWEN); + } + + dev_dbg(&host->pd->dev, "%s(): desc %p, cookie %d, sg[%d]\n", __func__, + desc, cookie, host->data->sg_len); +} + +static void sh_mmcif_start_dma_tx(struct sh_mmcif_host *host) +{ + struct scatterlist *sg = host->data->sg; + struct dma_async_tx_descriptor *desc = NULL; + struct dma_chan *chan = host->chan_tx; + dma_cookie_t cookie = -EINVAL; + int ret; + + ret = dma_map_sg(&host->pd->dev, sg, host->data->sg_len, DMA_TO_DEVICE); + if (ret > 0) { + host->dma_sglen = ret; + desc = chan->device->device_prep_slave_sg(chan, sg, ret, + DMA_TO_DEVICE, DMA_PREP_INTERRUPT | DMA_CTRL_ACK); + } + + if (desc) { + desc->callback = mmcif_dma_complete; + desc->callback_param = host; + cookie = desc->tx_submit(desc); + if (cookie < 0) { + desc = NULL; + ret = cookie; + } else { + sh_mmcif_bitset(host, MMCIF_CE_BUF_ACC, BUF_ACC_DMAWEN); + chan->device->device_issue_pending(chan); + } + } + dev_dbg(&host->pd->dev, "%s(): mapped %d -> %d, cookie %d\n", + __func__, host->data->sg_len, ret, cookie); + + if (!desc) { + /* DMA failed, fall back to PIO */ + if (ret >= 0) + ret = -EIO; + host->chan_tx = NULL; + host->dma_sglen = 0; + dma_release_channel(chan); + /* Free the Rx channel too */ + chan = host->chan_rx; + if (chan) { + host->chan_rx = NULL; + dma_release_channel(chan); + } + dev_warn(&host->pd->dev, + "DMA failed: %d, falling back to PIO\n", ret); + sh_mmcif_bitclr(host, MMCIF_CE_BUF_ACC, BUF_ACC_DMAREN | BUF_ACC_DMAWEN); + } + + dev_dbg(&host->pd->dev, "%s(): desc %p, cookie %d\n", __func__, + desc, cookie); +} + +static bool sh_mmcif_filter(struct dma_chan *chan, void *arg) +{ + dev_dbg(chan->device->dev, "%s: slave data %p\n", __func__, arg); + chan->private = arg; + return true; +} + +static void sh_mmcif_request_dma(struct sh_mmcif_host *host, + struct sh_mmcif_plat_data *pdata) +{ + host->dma_sglen = 0; + + /* We can only either use DMA for both Tx and Rx or not use it at all */ + if (pdata->dma) { + dma_cap_mask_t mask; + + dma_cap_zero(mask); + dma_cap_set(DMA_SLAVE, mask); + + host->chan_tx = dma_request_channel(mask, sh_mmcif_filter, + &pdata->dma->chan_priv_tx); + dev_dbg(&host->pd->dev, "%s: TX: got channel %p\n", __func__, + host->chan_tx); + + if (!host->chan_tx) + return; + + host->chan_rx = dma_request_channel(mask, sh_mmcif_filter, + &pdata->dma->chan_priv_rx); + dev_dbg(&host->pd->dev, "%s: RX: got channel %p\n", __func__, + host->chan_rx); + + if (!host->chan_rx) { + dma_release_channel(host->chan_tx); + host->chan_tx = NULL; + return; + } + + init_completion(&host->dma_complete); + } +} + +static void sh_mmcif_release_dma(struct sh_mmcif_host *host) +{ + sh_mmcif_bitclr(host, MMCIF_CE_BUF_ACC, BUF_ACC_DMAREN | BUF_ACC_DMAWEN); + /* Descriptors are freed automatically */ + if (host->chan_tx) { + struct dma_chan *chan = host->chan_tx; + host->chan_tx = NULL; + dma_release_channel(chan); + } + if (host->chan_rx) { + struct dma_chan *chan = host->chan_rx; + host->chan_rx = NULL; + dma_release_channel(chan); + } + + host->dma_sglen = 0; +} +#else +static void sh_mmcif_start_dma_tx(struct sh_mmcif_host *host) +{ +} + +static void sh_mmcif_start_dma_rx(struct sh_mmcif_host *host) +{ +} + +static void sh_mmcif_request_dma(struct sh_mmcif_host *host, + struct sh_mmcif_plat_data *pdata) +{ + /* host->chan_tx, host->chan_tx and host->dma_sglen are all zero */ +} + +static void sh_mmcif_release_dma(struct sh_mmcif_host *host) +{ +} +#endif static void sh_mmcif_clock_control(struct sh_mmcif_host *host, unsigned int clk) { @@ -564,7 +773,20 @@ static void sh_mmcif_start_cmd(struct sh_mmcif_host *host, } sh_mmcif_get_response(host, cmd); if (host->data) { - ret = sh_mmcif_data_trans(host, mrq, cmd->opcode); + if (!host->dma_sglen) { + ret = sh_mmcif_data_trans(host, mrq, cmd->opcode); + } else { + long time = + wait_for_completion_interruptible_timeout(&host->dma_complete, + host->timeout); + if (!time) + ret = -ETIMEDOUT; + else if (time < 0) + ret = time; + sh_mmcif_bitclr(host, MMCIF_CE_BUF_ACC, + BUF_ACC_DMAREN | BUF_ACC_DMAWEN); + host->dma_sglen = 0; + } if (ret < 0) mrq->data->bytes_xfered = 0; else @@ -622,6 +844,15 @@ static void sh_mmcif_request(struct mmc_host *mmc, struct mmc_request *mrq) break; } host->data = mrq->data; + if (mrq->data) { + if (mrq->data->flags & MMC_DATA_READ) { + if (host->chan_rx) + sh_mmcif_start_dma_rx(host); + } else { + if (host->chan_tx) + sh_mmcif_start_dma_tx(host); + } + } sh_mmcif_start_cmd(host, mrq, mrq->cmd); host->data = NULL; @@ -806,14 +1037,18 @@ static int __devinit sh_mmcif_probe(struct platform_device *pdev) mmc->caps = MMC_CAP_MMC_HIGHSPEED; if (pd->caps) mmc->caps |= pd->caps; - mmc->max_segs = 128; + mmc->max_segs = 32; mmc->max_blk_size = 512; - mmc->max_blk_count = 65535; - mmc->max_req_size = mmc->max_blk_size * mmc->max_blk_count; + mmc->max_req_size = PAGE_CACHE_SIZE * mmc->max_segs; + mmc->max_blk_count = mmc->max_req_size / mmc->max_blk_size; mmc->max_seg_size = mmc->max_req_size; sh_mmcif_sync_reset(host); platform_set_drvdata(pdev, host); + + /* See if we also get DMA */ + sh_mmcif_request_dma(host, pd); + mmc_add_host(mmc); ret = request_irq(irq[0], sh_mmcif_intr, 0, "sh_mmc:error", host); @@ -852,6 +1087,7 @@ static int __devexit sh_mmcif_remove(struct platform_device *pdev) int irq[2]; mmc_remove_host(host->mmc); + sh_mmcif_release_dma(host); if (host->addr) iounmap(host->addr); diff --git a/include/linux/mmc/sh_mmcif.h b/include/linux/mmc/sh_mmcif.h index a6bfa5296495..f216a8879b58 100644 --- a/include/linux/mmc/sh_mmcif.h +++ b/include/linux/mmc/sh_mmcif.h @@ -14,8 +14,9 @@ #ifndef __SH_MMCIF_H__ #define __SH_MMCIF_H__ -#include #include +#include +#include /* * MMCIF : CE_CLK_CTRL [19:16] @@ -31,13 +32,19 @@ * 1111 : Peripheral clock (sup_pclk set '1') */ +struct sh_mmcif_dma { + struct sh_dmae_slave chan_priv_tx; + struct sh_dmae_slave chan_priv_rx; +}; + struct sh_mmcif_plat_data { void (*set_pwr)(struct platform_device *pdev, int state); void (*down_pwr)(struct platform_device *pdev); int (*get_cd)(struct platform_device *pdef); - u8 sup_pclk; /* 1 :SH7757, 0: SH7724/SH7372 */ - unsigned long caps; - u32 ocr; + struct sh_mmcif_dma *dma; + u8 sup_pclk; /* 1 :SH7757, 0: SH7724/SH7372 */ + unsigned long caps; + u32 ocr; }; #define MMCIF_CE_CMD_SET 0x00000000 -- cgit v1.2.3-71-gd317 From 6d803ba736abb5e122dede70a4720e4843dd6df4 Mon Sep 17 00:00:00 2001 From: Jean-Christop PLAGNIOL-VILLARD Date: Wed, 17 Nov 2010 10:04:33 +0100 Subject: ARM: 6483/1: arm & sh: factorised duplicated clkdev.c factorise some generic infrastructure to assist looking up struct clks for the ARM & SH architecture. as the code is identical at 99% put the arch specific code for allocation as example in asm/clkdev.h Signed-off-by: Jean-Christophe PLAGNIOL-VILLARD Acked-by: Paul Mundt Signed-off-by: Russell King --- arch/arm/Kconfig | 42 +++--- arch/arm/common/Kconfig | 4 - arch/arm/common/Makefile | 1 - arch/arm/common/clkdev.c | 179 -------------------------- arch/arm/include/asm/clkdev.h | 22 +--- arch/arm/mach-bcmring/clock.c | 3 +- arch/arm/mach-bcmring/core.c | 2 +- arch/arm/mach-davinci/clock.h | 2 +- arch/arm/mach-ep93xx/clock.c | 2 +- arch/arm/mach-imx/clock-imx1.c | 3 +- arch/arm/mach-imx/clock-imx21.c | 2 +- arch/arm/mach-imx/clock-imx27.c | 2 +- arch/arm/mach-integrator/core.c | 3 +- arch/arm/mach-integrator/impd1.c | 3 +- arch/arm/mach-integrator/integrator_cp.c | 3 +- arch/arm/mach-lpc32xx/clock.c | 3 +- arch/arm/mach-mmp/clock.h | 2 +- arch/arm/mach-mx25/clock.c | 3 +- arch/arm/mach-mx3/clock-imx31.c | 2 +- arch/arm/mach-mx3/clock-imx35.c | 3 +- arch/arm/mach-mx5/clock-mx51.c | 2 +- arch/arm/mach-mxc91231/clock.c | 2 +- arch/arm/mach-nomadik/clock.c | 2 +- arch/arm/mach-nuc93x/clock.h | 2 +- arch/arm/mach-omap1/clock.c | 2 +- arch/arm/mach-omap2/dpll3xxx.c | 2 +- arch/arm/mach-pnx4008/clock.c | 3 +- arch/arm/mach-pxa/clock.c | 2 +- arch/arm/mach-pxa/clock.h | 2 +- arch/arm/mach-realview/core.c | 3 +- arch/arm/mach-shmobile/Kconfig | 6 +- arch/arm/mach-shmobile/clock-sh7367.c | 2 +- arch/arm/mach-shmobile/clock-sh7372.c | 2 +- arch/arm/mach-shmobile/clock-sh7377.c | 2 +- arch/arm/mach-tcc8k/clock.c | 3 +- arch/arm/mach-tegra/clock.c | 2 +- arch/arm/mach-tegra/clock.h | 2 +- arch/arm/mach-tegra/tegra2_clocks.c | 3 +- arch/arm/mach-u300/clock.c | 2 +- arch/arm/mach-ux500/clock.c | 3 +- arch/arm/mach-versatile/core.c | 3 +- arch/arm/mach-vexpress/ct-ca9x4.c | 3 +- arch/arm/mach-vexpress/v2m.c | 3 +- arch/arm/mach-w90x900/clock.h | 2 +- arch/arm/plat-omap/Kconfig | 4 +- arch/arm/plat-omap/include/plat/clkdev_omap.h | 2 +- arch/arm/plat-spear/include/plat/clock.h | 2 +- arch/arm/plat-stmp3xxx/clock.c | 2 +- arch/sh/Kconfig | 2 +- arch/sh/boards/mach-highlander/setup.c | 2 +- arch/sh/include/asm/clkdev.h | 38 +++--- arch/sh/kernel/Makefile | 2 +- arch/sh/kernel/clkdev.c | 171 ------------------------ arch/sh/kernel/cpu/clock-cpg.c | 2 +- arch/sh/kernel/cpu/clock.c | 16 --- arch/sh/kernel/cpu/sh4/clock-sh4-202.c | 2 +- arch/sh/kernel/cpu/sh4a/clock-sh7343.c | 2 +- arch/sh/kernel/cpu/sh4a/clock-sh7366.c | 2 +- arch/sh/kernel/cpu/sh4a/clock-sh7722.c | 2 +- arch/sh/kernel/cpu/sh4a/clock-sh7723.c | 2 +- arch/sh/kernel/cpu/sh4a/clock-sh7724.c | 2 +- arch/sh/kernel/cpu/sh4a/clock-sh7757.c | 2 +- arch/sh/kernel/cpu/sh4a/clock-sh7763.c | 2 +- arch/sh/kernel/cpu/sh4a/clock-sh7780.c | 2 +- arch/sh/kernel/cpu/sh4a/clock-sh7785.c | 2 +- arch/sh/kernel/cpu/sh4a/clock-sh7786.c | 2 +- arch/sh/kernel/cpu/sh4a/clock-shx3.c | 2 +- drivers/Kconfig | 2 + drivers/Makefile | 2 + drivers/clk/Kconfig | 4 + drivers/clk/Makefile | 2 + drivers/clk/clkdev.c | 176 +++++++++++++++++++++++++ include/linux/clkdev.h | 36 ++++++ 73 files changed, 328 insertions(+), 507 deletions(-) delete mode 100644 arch/arm/common/clkdev.c delete mode 100644 arch/sh/kernel/clkdev.c create mode 100644 drivers/clk/Kconfig create mode 100644 drivers/clk/Makefile create mode 100644 drivers/clk/clkdev.c create mode 100644 include/linux/clkdev.h (limited to 'include/linux') diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index a19a5266d5fc..0e51342b3c02 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -221,7 +221,7 @@ config ARCH_INTEGRATOR bool "ARM Ltd. Integrator family" select ARM_AMBA select ARCH_HAS_CPUFREQ - select COMMON_CLKDEV + select CLKDEV_LOOKUP select ICST select GENERIC_CLOCKEVENTS select PLAT_VERSATILE @@ -231,7 +231,7 @@ config ARCH_INTEGRATOR config ARCH_REALVIEW bool "ARM Ltd. RealView family" select ARM_AMBA - select COMMON_CLKDEV + select CLKDEV_LOOKUP select ICST select GENERIC_CLOCKEVENTS select ARCH_WANT_OPTIONAL_GPIOLIB @@ -245,7 +245,7 @@ config ARCH_VERSATILE bool "ARM Ltd. Versatile family" select ARM_AMBA select ARM_VIC - select COMMON_CLKDEV + select CLKDEV_LOOKUP select ICST select GENERIC_CLOCKEVENTS select ARCH_WANT_OPTIONAL_GPIOLIB @@ -259,7 +259,7 @@ config ARCH_VEXPRESS select ARCH_WANT_OPTIONAL_GPIOLIB select ARM_AMBA select ARM_TIMER_SP804 - select COMMON_CLKDEV + select CLKDEV_LOOKUP select GENERIC_CLOCKEVENTS select HAVE_CLK select ICST @@ -280,7 +280,7 @@ config ARCH_BCMRING depends on MMU select CPU_V6 select ARM_AMBA - select COMMON_CLKDEV + select CLKDEV_LOOKUP select GENERIC_CLOCKEVENTS select ARCH_WANT_OPTIONAL_GPIOLIB help @@ -327,7 +327,7 @@ config ARCH_EP93XX select CPU_ARM920T select ARM_AMBA select ARM_VIC - select COMMON_CLKDEV + select CLKDEV_LOOKUP select ARCH_REQUIRE_GPIOLIB select ARCH_HAS_HOLES_MEMORYMODEL select ARCH_USES_GETTIMEOFFSET @@ -347,14 +347,14 @@ config ARCH_MXC bool "Freescale MXC/iMX-based" select GENERIC_CLOCKEVENTS select ARCH_REQUIRE_GPIOLIB - select COMMON_CLKDEV + select CLKDEV_LOOKUP help Support for Freescale MXC/iMX-based family of processors config ARCH_STMP3XXX bool "Freescale STMP3xxx" select CPU_ARM926T - select COMMON_CLKDEV + select CLKDEV_LOOKUP select ARCH_REQUIRE_GPIOLIB select GENERIC_CLOCKEVENTS select USB_ARCH_HAS_EHCI @@ -472,7 +472,7 @@ config ARCH_LPC32XX select HAVE_IDE select ARM_AMBA select USB_ARCH_HAS_OHCI - select COMMON_CLKDEV + select CLKDEV_LOOKUP select GENERIC_TIME select GENERIC_CLOCKEVENTS help @@ -506,7 +506,7 @@ config ARCH_MMP bool "Marvell PXA168/910/MMP2" depends on MMU select ARCH_REQUIRE_GPIOLIB - select COMMON_CLKDEV + select CLKDEV_LOOKUP select GENERIC_CLOCKEVENTS select TICK_ONESHOT select PLAT_PXA @@ -539,7 +539,7 @@ config ARCH_W90X900 bool "Nuvoton W90X900 CPU" select CPU_ARM926T select ARCH_REQUIRE_GPIOLIB - select COMMON_CLKDEV + select CLKDEV_LOOKUP select GENERIC_CLOCKEVENTS help Support for Nuvoton (Winbond logic dept.) ARM9 processor, @@ -553,7 +553,7 @@ config ARCH_W90X900 config ARCH_NUC93X bool "Nuvoton NUC93X CPU" select CPU_ARM926T - select COMMON_CLKDEV + select CLKDEV_LOOKUP help Support for Nuvoton (Winbond logic dept.) NUC93X MCU,The NUC93X is a low-power and high performance MPEG-4/JPEG multimedia controller chip. @@ -564,7 +564,7 @@ config ARCH_TEGRA select GENERIC_CLOCKEVENTS select GENERIC_GPIO select HAVE_CLK - select COMMON_CLKDEV + select CLKDEV_LOOKUP select ARCH_HAS_BARRIERS if CACHE_L2X0 select ARCH_HAS_CPUFREQ help @@ -574,7 +574,7 @@ config ARCH_TEGRA config ARCH_PNX4008 bool "Philips Nexperia PNX4008 Mobile" select CPU_ARM926T - select COMMON_CLKDEV + select CLKDEV_LOOKUP select ARCH_USES_GETTIMEOFFSET help This enables support for Philips PNX4008 mobile platform. @@ -584,7 +584,7 @@ config ARCH_PXA depends on MMU select ARCH_MTD_XIP select ARCH_HAS_CPUFREQ - select COMMON_CLKDEV + select CLKDEV_LOOKUP select ARCH_REQUIRE_GPIOLIB select GENERIC_CLOCKEVENTS select TICK_ONESHOT @@ -761,7 +761,7 @@ config ARCH_TCC_926 bool "Telechips TCC ARM926-based systems" select CPU_ARM926T select HAVE_CLK - select COMMON_CLKDEV + select CLKDEV_LOOKUP select GENERIC_CLOCKEVENTS help Support for Telechips TCC ARM926-based systems. @@ -785,7 +785,7 @@ config ARCH_U300 select ARM_AMBA select ARM_VIC select GENERIC_CLOCKEVENTS - select COMMON_CLKDEV + select CLKDEV_LOOKUP select GENERIC_GPIO help Support for ST-Ericsson U300 series mobile platforms. @@ -795,7 +795,7 @@ config ARCH_U8500 select CPU_V7 select ARM_AMBA select GENERIC_CLOCKEVENTS - select COMMON_CLKDEV + select CLKDEV_LOOKUP select ARCH_REQUIRE_GPIOLIB help Support for ST-Ericsson's Ux500 architecture @@ -805,7 +805,7 @@ config ARCH_NOMADIK select ARM_AMBA select ARM_VIC select CPU_ARM926T - select COMMON_CLKDEV + select CLKDEV_LOOKUP select GENERIC_CLOCKEVENTS select ARCH_REQUIRE_GPIOLIB help @@ -817,7 +817,7 @@ config ARCH_DAVINCI select ARCH_REQUIRE_GPIOLIB select ZONE_DMA select HAVE_IDE - select COMMON_CLKDEV + select CLKDEV_LOOKUP select GENERIC_ALLOCATOR select ARCH_HAS_HOLES_MEMORYMODEL help @@ -837,7 +837,7 @@ config PLAT_SPEAR bool "ST SPEAr" select ARM_AMBA select ARCH_REQUIRE_GPIOLIB - select COMMON_CLKDEV + select CLKDEV_LOOKUP select GENERIC_CLOCKEVENTS select HAVE_CLK help diff --git a/arch/arm/common/Kconfig b/arch/arm/common/Kconfig index 0a34c8186924..778655f0257a 100644 --- a/arch/arm/common/Kconfig +++ b/arch/arm/common/Kconfig @@ -37,7 +37,3 @@ config SHARP_PARAM config SHARP_SCOOP bool - -config COMMON_CLKDEV - bool - select HAVE_CLK diff --git a/arch/arm/common/Makefile b/arch/arm/common/Makefile index e6e8664a9413..799e140274f1 100644 --- a/arch/arm/common/Makefile +++ b/arch/arm/common/Makefile @@ -16,4 +16,3 @@ obj-$(CONFIG_SHARP_SCOOP) += scoop.o obj-$(CONFIG_ARCH_IXP2000) += uengine.o obj-$(CONFIG_ARCH_IXP23XX) += uengine.o obj-$(CONFIG_PCI_HOST_ITE8152) += it8152.o -obj-$(CONFIG_COMMON_CLKDEV) += clkdev.o diff --git a/arch/arm/common/clkdev.c b/arch/arm/common/clkdev.c deleted file mode 100644 index e2b2bb66e094..000000000000 --- a/arch/arm/common/clkdev.c +++ /dev/null @@ -1,179 +0,0 @@ -/* - * arch/arm/common/clkdev.c - * - * Copyright (C) 2008 Russell King. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * Helper for the clk API to assist looking up a struct clk. - */ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include - -static LIST_HEAD(clocks); -static DEFINE_MUTEX(clocks_mutex); - -/* - * Find the correct struct clk for the device and connection ID. - * We do slightly fuzzy matching here: - * An entry with a NULL ID is assumed to be a wildcard. - * If an entry has a device ID, it must match - * If an entry has a connection ID, it must match - * Then we take the most specific entry - with the following - * order of precedence: dev+con > dev only > con only. - */ -static struct clk *clk_find(const char *dev_id, const char *con_id) -{ - struct clk_lookup *p; - struct clk *clk = NULL; - int match, best = 0; - - list_for_each_entry(p, &clocks, node) { - match = 0; - if (p->dev_id) { - if (!dev_id || strcmp(p->dev_id, dev_id)) - continue; - match += 2; - } - if (p->con_id) { - if (!con_id || strcmp(p->con_id, con_id)) - continue; - match += 1; - } - - if (match > best) { - clk = p->clk; - if (match != 3) - best = match; - else - break; - } - } - return clk; -} - -struct clk *clk_get_sys(const char *dev_id, const char *con_id) -{ - struct clk *clk; - - mutex_lock(&clocks_mutex); - clk = clk_find(dev_id, con_id); - if (clk && !__clk_get(clk)) - clk = NULL; - mutex_unlock(&clocks_mutex); - - return clk ? clk : ERR_PTR(-ENOENT); -} -EXPORT_SYMBOL(clk_get_sys); - -struct clk *clk_get(struct device *dev, const char *con_id) -{ - const char *dev_id = dev ? dev_name(dev) : NULL; - - return clk_get_sys(dev_id, con_id); -} -EXPORT_SYMBOL(clk_get); - -void clk_put(struct clk *clk) -{ - __clk_put(clk); -} -EXPORT_SYMBOL(clk_put); - -void clkdev_add(struct clk_lookup *cl) -{ - mutex_lock(&clocks_mutex); - list_add_tail(&cl->node, &clocks); - mutex_unlock(&clocks_mutex); -} -EXPORT_SYMBOL(clkdev_add); - -void __init clkdev_add_table(struct clk_lookup *cl, size_t num) -{ - mutex_lock(&clocks_mutex); - while (num--) { - list_add_tail(&cl->node, &clocks); - cl++; - } - mutex_unlock(&clocks_mutex); -} - -#define MAX_DEV_ID 20 -#define MAX_CON_ID 16 - -struct clk_lookup_alloc { - struct clk_lookup cl; - char dev_id[MAX_DEV_ID]; - char con_id[MAX_CON_ID]; -}; - -struct clk_lookup *clkdev_alloc(struct clk *clk, const char *con_id, - const char *dev_fmt, ...) -{ - struct clk_lookup_alloc *cla; - - cla = kzalloc(sizeof(*cla), GFP_KERNEL); - if (!cla) - return NULL; - - cla->cl.clk = clk; - if (con_id) { - strlcpy(cla->con_id, con_id, sizeof(cla->con_id)); - cla->cl.con_id = cla->con_id; - } - - if (dev_fmt) { - va_list ap; - - va_start(ap, dev_fmt); - vscnprintf(cla->dev_id, sizeof(cla->dev_id), dev_fmt, ap); - cla->cl.dev_id = cla->dev_id; - va_end(ap); - } - - return &cla->cl; -} -EXPORT_SYMBOL(clkdev_alloc); - -int clk_add_alias(const char *alias, const char *alias_dev_name, char *id, - struct device *dev) -{ - struct clk *r = clk_get(dev, id); - struct clk_lookup *l; - - if (IS_ERR(r)) - return PTR_ERR(r); - - l = clkdev_alloc(r, alias, alias_dev_name); - clk_put(r); - if (!l) - return -ENODEV; - clkdev_add(l); - return 0; -} -EXPORT_SYMBOL(clk_add_alias); - -/* - * clkdev_drop - remove a clock dynamically allocated - */ -void clkdev_drop(struct clk_lookup *cl) -{ - mutex_lock(&clocks_mutex); - list_del(&cl->node); - mutex_unlock(&clocks_mutex); - kfree(cl); -} -EXPORT_SYMBOL(clkdev_drop); diff --git a/arch/arm/include/asm/clkdev.h b/arch/arm/include/asm/clkdev.h index b56c1389b6fa..765d33222369 100644 --- a/arch/arm/include/asm/clkdev.h +++ b/arch/arm/include/asm/clkdev.h @@ -12,23 +12,13 @@ #ifndef __ASM_CLKDEV_H #define __ASM_CLKDEV_H -struct clk; -struct device; +#include -struct clk_lookup { - struct list_head node; - const char *dev_id; - const char *con_id; - struct clk *clk; -}; +#include -struct clk_lookup *clkdev_alloc(struct clk *clk, const char *con_id, - const char *dev_fmt, ...); - -void clkdev_add(struct clk_lookup *cl); -void clkdev_drop(struct clk_lookup *cl); - -void clkdev_add_table(struct clk_lookup *, size_t); -int clk_add_alias(const char *, const char *, char *, struct device *); +static inline struct clk_lookup_alloc *__clkdev_alloc(size_t size) +{ + return kzalloc(size, GFP_KERNEL); +} #endif diff --git a/arch/arm/mach-bcmring/clock.c b/arch/arm/mach-bcmring/clock.c index 14bafc38f2dc..ad237a42d265 100644 --- a/arch/arm/mach-bcmring/clock.c +++ b/arch/arm/mach-bcmring/clock.c @@ -21,13 +21,12 @@ #include #include #include +#include #include #include #include #include -#include - #include "clock.h" #define clk_is_primary(x) ((x)->type & CLK_TYPE_PRIMARY) diff --git a/arch/arm/mach-bcmring/core.c b/arch/arm/mach-bcmring/core.c index d3f959e92b2d..ed96ef400474 100644 --- a/arch/arm/mach-bcmring/core.c +++ b/arch/arm/mach-bcmring/core.c @@ -30,10 +30,10 @@ #include #include #include +#include #include #include -#include #include #include #include diff --git a/arch/arm/mach-davinci/clock.h b/arch/arm/mach-davinci/clock.h index 11099980b58b..0dd22031ec62 100644 --- a/arch/arm/mach-davinci/clock.h +++ b/arch/arm/mach-davinci/clock.h @@ -68,7 +68,7 @@ #ifndef __ASSEMBLER__ #include -#include +#include #define PLLSTAT_GOSTAT BIT(0) #define PLLCMD_GOSET BIT(0) diff --git a/arch/arm/mach-ep93xx/clock.c b/arch/arm/mach-ep93xx/clock.c index ef06c66a6f16..ca4de7105097 100644 --- a/arch/arm/mach-ep93xx/clock.c +++ b/arch/arm/mach-ep93xx/clock.c @@ -19,10 +19,10 @@ #include #include #include +#include #include -#include #include diff --git a/arch/arm/mach-imx/clock-imx1.c b/arch/arm/mach-imx/clock-imx1.c index daca30b2d5b1..3938a563b280 100644 --- a/arch/arm/mach-imx/clock-imx1.c +++ b/arch/arm/mach-imx/clock-imx1.c @@ -22,8 +22,7 @@ #include #include #include - -#include +#include #include #include diff --git a/arch/arm/mach-imx/clock-imx21.c b/arch/arm/mach-imx/clock-imx21.c index cf15ea516a72..d7056559715a 100644 --- a/arch/arm/mach-imx/clock-imx21.c +++ b/arch/arm/mach-imx/clock-imx21.c @@ -21,11 +21,11 @@ #include #include #include +#include #include #include #include -#include #include #define IO_ADDR_CCM(off) (MX21_IO_ADDRESS(MX21_CCM_BASE_ADDR + (off))) diff --git a/arch/arm/mach-imx/clock-imx27.c b/arch/arm/mach-imx/clock-imx27.c index 98a25bada783..ca1017b9028d 100644 --- a/arch/arm/mach-imx/clock-imx27.c +++ b/arch/arm/mach-imx/clock-imx27.c @@ -21,8 +21,8 @@ #include #include #include +#include -#include #include #include diff --git a/arch/arm/mach-integrator/core.c b/arch/arm/mach-integrator/core.c index 8f4fb6d638f7..b8e884b450da 100644 --- a/arch/arm/mach-integrator/core.c +++ b/arch/arm/mach-integrator/core.c @@ -21,9 +21,8 @@ #include #include #include +#include -#include -#include #include #include #include diff --git a/arch/arm/mach-integrator/impd1.c b/arch/arm/mach-integrator/impd1.c index fd684bf205e5..5db574f8ae3f 100644 --- a/arch/arm/mach-integrator/impd1.c +++ b/arch/arm/mach-integrator/impd1.c @@ -22,9 +22,8 @@ #include #include #include +#include -#include -#include #include #include #include diff --git a/arch/arm/mach-integrator/integrator_cp.c b/arch/arm/mach-integrator/integrator_cp.c index 6258c90d020c..9403d2fa13a3 100644 --- a/arch/arm/mach-integrator/integrator_cp.c +++ b/arch/arm/mach-integrator/integrator_cp.c @@ -21,9 +21,8 @@ #include #include #include +#include -#include -#include #include #include #include diff --git a/arch/arm/mach-lpc32xx/clock.c b/arch/arm/mach-lpc32xx/clock.c index 32d63796430a..da0e6498110a 100644 --- a/arch/arm/mach-lpc32xx/clock.c +++ b/arch/arm/mach-lpc32xx/clock.c @@ -90,10 +90,9 @@ #include #include #include +#include #include -#include -#include #include #include "clock.h" #include "common.h" diff --git a/arch/arm/mach-mmp/clock.h b/arch/arm/mach-mmp/clock.h index 016ae94691c0..9b027d7491f5 100644 --- a/arch/arm/mach-mmp/clock.h +++ b/arch/arm/mach-mmp/clock.h @@ -6,7 +6,7 @@ * published by the Free Software Foundation. */ -#include +#include struct clkops { void (*enable)(struct clk *); diff --git a/arch/arm/mach-mx25/clock.c b/arch/arm/mach-mx25/clock.c index 9e4a5578c2fb..00dcb08019e9 100644 --- a/arch/arm/mach-mx25/clock.c +++ b/arch/arm/mach-mx25/clock.c @@ -21,8 +21,7 @@ #include #include #include - -#include +#include #include #include diff --git a/arch/arm/mach-mx3/clock-imx31.c b/arch/arm/mach-mx3/clock-imx31.c index 109e98f323e0..1cd8b40b7676 100644 --- a/arch/arm/mach-mx3/clock-imx31.c +++ b/arch/arm/mach-mx3/clock-imx31.c @@ -23,8 +23,8 @@ #include #include #include +#include -#include #include #include diff --git a/arch/arm/mach-mx3/clock-imx35.c b/arch/arm/mach-mx3/clock-imx35.c index 61e4a318980a..819dd809615a 100644 --- a/arch/arm/mach-mx3/clock-imx35.c +++ b/arch/arm/mach-mx3/clock-imx35.c @@ -21,8 +21,7 @@ #include #include #include - -#include +#include #include #include diff --git a/arch/arm/mach-mx5/clock-mx51.c b/arch/arm/mach-mx5/clock-mx51.c index 8ac36d882927..5975edb47de8 100644 --- a/arch/arm/mach-mx5/clock-mx51.c +++ b/arch/arm/mach-mx5/clock-mx51.c @@ -14,8 +14,8 @@ #include #include #include +#include -#include #include #include diff --git a/arch/arm/mach-mxc91231/clock.c b/arch/arm/mach-mxc91231/clock.c index 5c85075d8a56..9fab505f1eb1 100644 --- a/arch/arm/mach-mxc91231/clock.c +++ b/arch/arm/mach-mxc91231/clock.c @@ -2,12 +2,12 @@ #include #include #include +#include #include #include #include -#include #include #include diff --git a/arch/arm/mach-nomadik/clock.c b/arch/arm/mach-nomadik/clock.c index 89f793adf776..48a59f24e10c 100644 --- a/arch/arm/mach-nomadik/clock.c +++ b/arch/arm/mach-nomadik/clock.c @@ -7,7 +7,7 @@ #include #include #include -#include +#include #include "clock.h" /* diff --git a/arch/arm/mach-nuc93x/clock.h b/arch/arm/mach-nuc93x/clock.h index 18e51be4816f..4de1f1da9dc5 100644 --- a/arch/arm/mach-nuc93x/clock.h +++ b/arch/arm/mach-nuc93x/clock.h @@ -10,7 +10,7 @@ * the Free Software Foundation; either version 2 of the License. */ -#include +#include void nuc93x_clk_enable(struct clk *clk, int enable); void clks_register(struct clk_lookup *clks, size_t num); diff --git a/arch/arm/mach-omap1/clock.c b/arch/arm/mach-omap1/clock.c index b8c7fb9d7921..84ef70476b51 100644 --- a/arch/arm/mach-omap1/clock.c +++ b/arch/arm/mach-omap1/clock.c @@ -17,9 +17,9 @@ #include #include #include +#include #include -#include #include #include diff --git a/arch/arm/mach-omap2/dpll3xxx.c b/arch/arm/mach-omap2/dpll3xxx.c index ed8d330522f1..ebb888f59365 100644 --- a/arch/arm/mach-omap2/dpll3xxx.c +++ b/arch/arm/mach-omap2/dpll3xxx.c @@ -26,10 +26,10 @@ #include #include #include +#include #include #include -#include #include "clock.h" #include "prm.h" diff --git a/arch/arm/mach-pnx4008/clock.c b/arch/arm/mach-pnx4008/clock.c index 9d1975fa4d9f..a4a3819c96cb 100644 --- a/arch/arm/mach-pnx4008/clock.c +++ b/arch/arm/mach-pnx4008/clock.c @@ -21,8 +21,7 @@ #include #include #include - -#include +#include #include #include diff --git a/arch/arm/mach-pxa/clock.c b/arch/arm/mach-pxa/clock.c index abba0089a2ae..4e4a84be96ba 100644 --- a/arch/arm/mach-pxa/clock.c +++ b/arch/arm/mach-pxa/clock.c @@ -11,8 +11,8 @@ #include #include #include +#include -#include #include #include diff --git a/arch/arm/mach-pxa/clock.h b/arch/arm/mach-pxa/clock.h index d8488742b807..12cc0e87e6c4 100644 --- a/arch/arm/mach-pxa/clock.h +++ b/arch/arm/mach-pxa/clock.h @@ -1,4 +1,4 @@ -#include +#include struct clkops { void (*enable)(struct clk *); diff --git a/arch/arm/mach-realview/core.c b/arch/arm/mach-realview/core.c index 07c08151dfe6..3d915b1ccdb5 100644 --- a/arch/arm/mach-realview/core.c +++ b/arch/arm/mach-realview/core.c @@ -30,8 +30,8 @@ #include #include #include +#include -#include #include #include #include @@ -47,7 +47,6 @@ #include -#include #include #include #include diff --git a/arch/arm/mach-shmobile/Kconfig b/arch/arm/mach-shmobile/Kconfig index 54b479c35ee0..f8f06e9fec35 100644 --- a/arch/arm/mach-shmobile/Kconfig +++ b/arch/arm/mach-shmobile/Kconfig @@ -6,7 +6,7 @@ config ARCH_SH7367 bool "SH-Mobile G3 (SH7367)" select CPU_V6 select HAVE_CLK - select COMMON_CLKDEV + select CLKDEV_LOOKUP select SH_CLK_CPG select GENERIC_CLOCKEVENTS @@ -14,7 +14,7 @@ config ARCH_SH7377 bool "SH-Mobile G4 (SH7377)" select CPU_V7 select HAVE_CLK - select COMMON_CLKDEV + select CLKDEV_LOOKUP select SH_CLK_CPG select GENERIC_CLOCKEVENTS @@ -22,7 +22,7 @@ config ARCH_SH7372 bool "SH-Mobile AP4 (SH7372)" select CPU_V7 select HAVE_CLK - select COMMON_CLKDEV + select CLKDEV_LOOKUP select SH_CLK_CPG select GENERIC_CLOCKEVENTS diff --git a/arch/arm/mach-shmobile/clock-sh7367.c b/arch/arm/mach-shmobile/clock-sh7367.c index 9f78729098f2..6b186aefcbd6 100644 --- a/arch/arm/mach-shmobile/clock-sh7367.c +++ b/arch/arm/mach-shmobile/clock-sh7367.c @@ -20,8 +20,8 @@ #include #include #include +#include #include -#include /* SH7367 registers */ #define RTFRQCR 0xe6150000 diff --git a/arch/arm/mach-shmobile/clock-sh7372.c b/arch/arm/mach-shmobile/clock-sh7372.c index 8565aefa21fd..445112adba46 100644 --- a/arch/arm/mach-shmobile/clock-sh7372.c +++ b/arch/arm/mach-shmobile/clock-sh7372.c @@ -20,8 +20,8 @@ #include #include #include +#include #include -#include /* SH7372 registers */ #define FRQCRA 0xe6150000 diff --git a/arch/arm/mach-shmobile/clock-sh7377.c b/arch/arm/mach-shmobile/clock-sh7377.c index f91395aeb9ab..95942466e63f 100644 --- a/arch/arm/mach-shmobile/clock-sh7377.c +++ b/arch/arm/mach-shmobile/clock-sh7377.c @@ -20,8 +20,8 @@ #include #include #include +#include #include -#include /* SH7377 registers */ #define RTFRQCR 0xe6150000 diff --git a/arch/arm/mach-tcc8k/clock.c b/arch/arm/mach-tcc8k/clock.c index ba32a15127ab..3970a9cdce26 100644 --- a/arch/arm/mach-tcc8k/clock.c +++ b/arch/arm/mach-tcc8k/clock.c @@ -12,8 +12,7 @@ #include #include #include - -#include +#include #include #include diff --git a/arch/arm/mach-tegra/clock.c b/arch/arm/mach-tegra/clock.c index ae19f95585be..77948e0f4909 100644 --- a/arch/arm/mach-tegra/clock.c +++ b/arch/arm/mach-tegra/clock.c @@ -25,7 +25,7 @@ #include #include #include -#include +#include #include "clock.h" #include "board.h" diff --git a/arch/arm/mach-tegra/clock.h b/arch/arm/mach-tegra/clock.h index 94fd859770f1..083a4cfc6cf0 100644 --- a/arch/arm/mach-tegra/clock.h +++ b/arch/arm/mach-tegra/clock.h @@ -21,7 +21,7 @@ #define __MACH_TEGRA_CLOCK_H #include -#include +#include #define DIV_BUS (1 << 0) #define DIV_U71 (1 << 1) diff --git a/arch/arm/mach-tegra/tegra2_clocks.c b/arch/arm/mach-tegra/tegra2_clocks.c index ae3b308e22a4..f0dae6d8ba52 100644 --- a/arch/arm/mach-tegra/tegra2_clocks.c +++ b/arch/arm/mach-tegra/tegra2_clocks.c @@ -24,8 +24,7 @@ #include #include #include - -#include +#include #include diff --git a/arch/arm/mach-u300/clock.c b/arch/arm/mach-u300/clock.c index 7458fc6df5c6..fabcc49abe80 100644 --- a/arch/arm/mach-u300/clock.c +++ b/arch/arm/mach-u300/clock.c @@ -25,8 +25,8 @@ #include #include #include +#include -#include #include #include diff --git a/arch/arm/mach-ux500/clock.c b/arch/arm/mach-ux500/clock.c index 1675047daf20..531de5c63641 100644 --- a/arch/arm/mach-ux500/clock.c +++ b/arch/arm/mach-ux500/clock.c @@ -13,8 +13,7 @@ #include #include #include - -#include +#include #include #include diff --git a/arch/arm/mach-versatile/core.c b/arch/arm/mach-versatile/core.c index e38acb0f89c8..8c1ca1d63538 100644 --- a/arch/arm/mach-versatile/core.c +++ b/arch/arm/mach-versatile/core.c @@ -31,8 +31,8 @@ #include #include #include +#include -#include #include #include #include @@ -46,7 +46,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/arm/mach-vexpress/ct-ca9x4.c b/arch/arm/mach-vexpress/ct-ca9x4.c index c2e405a9e025..26a02eb57571 100644 --- a/arch/arm/mach-vexpress/ct-ca9x4.c +++ b/arch/arm/mach-vexpress/ct-ca9x4.c @@ -8,8 +8,8 @@ #include #include #include +#include -#include #include #include #include @@ -18,7 +18,6 @@ #include #include -#include #include #include diff --git a/arch/arm/mach-vexpress/v2m.c b/arch/arm/mach-vexpress/v2m.c index 7eaa232180a5..d374a78986e8 100644 --- a/arch/arm/mach-vexpress/v2m.c +++ b/arch/arm/mach-vexpress/v2m.c @@ -11,15 +11,14 @@ #include #include #include +#include -#include #include #include #include #include #include -#include #include #include diff --git a/arch/arm/mach-w90x900/clock.h b/arch/arm/mach-w90x900/clock.h index c56ddab3d912..b88a1b16b2e9 100644 --- a/arch/arm/mach-w90x900/clock.h +++ b/arch/arm/mach-w90x900/clock.h @@ -10,7 +10,7 @@ * the Free Software Foundation; either version 2 of the License. */ -#include +#include void nuc900_clk_enable(struct clk *clk, int enable); void nuc900_subclk_enable(struct clk *clk, int enable); diff --git a/arch/arm/plat-omap/Kconfig b/arch/arm/plat-omap/Kconfig index 92c5bb7909f5..c9408434a855 100644 --- a/arch/arm/plat-omap/Kconfig +++ b/arch/arm/plat-omap/Kconfig @@ -11,13 +11,13 @@ choice config ARCH_OMAP1 bool "TI OMAP1" - select COMMON_CLKDEV + select CLKDEV_LOOKUP help "Systems based on omap7xx, omap15xx or omap16xx" config ARCH_OMAP2PLUS bool "TI OMAP2/3/4" - select COMMON_CLKDEV + select CLKDEV_LOOKUP help "Systems based on OMAP2, OMAP3 or OMAP4" diff --git a/arch/arm/plat-omap/include/plat/clkdev_omap.h b/arch/arm/plat-omap/include/plat/clkdev_omap.h index bb937f3fabed..4b2028ab4d2b 100644 --- a/arch/arm/plat-omap/include/plat/clkdev_omap.h +++ b/arch/arm/plat-omap/include/plat/clkdev_omap.h @@ -8,7 +8,7 @@ #ifndef __ARCH_ARM_PLAT_OMAP_INCLUDE_PLAT_CLKDEV_OMAP_H #define __ARCH_ARM_PLAT_OMAP_INCLUDE_PLAT_CLKDEV_OMAP_H -#include +#include struct omap_clk { u16 cpu; diff --git a/arch/arm/plat-spear/include/plat/clock.h b/arch/arm/plat-spear/include/plat/clock.h index 298bafc0a52f..2572260f990f 100644 --- a/arch/arm/plat-spear/include/plat/clock.h +++ b/arch/arm/plat-spear/include/plat/clock.h @@ -15,7 +15,7 @@ #define __PLAT_CLOCK_H #include -#include +#include #include /* clk structure flags */ diff --git a/arch/arm/plat-stmp3xxx/clock.c b/arch/arm/plat-stmp3xxx/clock.c index e593a2a801c6..2e712e17ce72 100644 --- a/arch/arm/plat-stmp3xxx/clock.c +++ b/arch/arm/plat-stmp3xxx/clock.c @@ -25,9 +25,9 @@ #include #include #include +#include #include -#include #include #include diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig index 5c075f562eba..cfc510608039 100644 --- a/arch/sh/Kconfig +++ b/arch/sh/Kconfig @@ -1,7 +1,7 @@ config SUPERH def_bool y select EMBEDDED - select HAVE_CLK + select CLKDEV_LOOKUP select HAVE_IDE if HAS_IOPORT select HAVE_MEMBLOCK select HAVE_OPROFILE diff --git a/arch/sh/boards/mach-highlander/setup.c b/arch/sh/boards/mach-highlander/setup.c index a5ecfbacaf36..87618c91d178 100644 --- a/arch/sh/boards/mach-highlander/setup.c +++ b/arch/sh/boards/mach-highlander/setup.c @@ -24,10 +24,10 @@ #include #include #include +#include #include #include #include -#include #include #include #include diff --git a/arch/sh/include/asm/clkdev.h b/arch/sh/include/asm/clkdev.h index 5645f358128b..6ba91868201c 100644 --- a/arch/sh/include/asm/clkdev.h +++ b/arch/sh/include/asm/clkdev.h @@ -1,9 +1,5 @@ /* - * arch/sh/include/asm/clkdev.h - * - * Cloned from arch/arm/include/asm/clkdev.h: - * - * Copyright (C) 2008 Russell King. + * Copyright (C) 2010 Paul Mundt * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -11,25 +7,25 @@ * * Helper for the clk API to assist looking up a struct clk. */ -#ifndef __ASM_CLKDEV_H -#define __ASM_CLKDEV_H -struct clk; +#ifndef __CLKDEV__H_ +#define __CLKDEV__H_ -struct clk_lookup { - struct list_head node; - const char *dev_id; - const char *con_id; - struct clk *clk; -}; +#include +#include +#include -struct clk_lookup *clkdev_alloc(struct clk *clk, const char *con_id, - const char *dev_fmt, ...); +#include -void clkdev_add(struct clk_lookup *cl); -void clkdev_drop(struct clk_lookup *cl); +static inline struct clk_lookup_alloc *__clkdev_alloc(size_t size) +{ + if (!slab_is_available()) + return alloc_bootmem_low_pages(size); + else + return kzalloc(size, GFP_KERNEL); +} -void clkdev_add_table(struct clk_lookup *, size_t); -int clk_add_alias(const char *, const char *, char *, struct device *); +#define __clk_put(clk) +#define __clk_get(clk) ({ 1; }) -#endif +#endif /* __CLKDEV_H__ */ diff --git a/arch/sh/kernel/Makefile b/arch/sh/kernel/Makefile index 8eed6a485446..cf6522179523 100644 --- a/arch/sh/kernel/Makefile +++ b/arch/sh/kernel/Makefile @@ -11,7 +11,7 @@ endif CFLAGS_REMOVE_return_address.o = -pg -obj-y := clkdev.o debugtraps.o dma-nommu.o dumpstack.o \ +obj-y := debugtraps.o dma-nommu.o dumpstack.o \ idle.o io.o irq.o irq_$(BITS).o kdebugfs.o \ machvec.o nmi_debug.o process.o \ process_$(BITS).o ptrace.o ptrace_$(BITS).o \ diff --git a/arch/sh/kernel/clkdev.c b/arch/sh/kernel/clkdev.c deleted file mode 100644 index 1f800ef4a735..000000000000 --- a/arch/sh/kernel/clkdev.c +++ /dev/null @@ -1,171 +0,0 @@ -/* - * arch/sh/kernel/clkdev.c - * - * Cloned from arch/arm/common/clkdev.c: - * - * Copyright (C) 2008 Russell King. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * Helper for the clk API to assist looking up a struct clk. - */ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -static LIST_HEAD(clocks); -static DEFINE_MUTEX(clocks_mutex); - -/* - * Find the correct struct clk for the device and connection ID. - * We do slightly fuzzy matching here: - * An entry with a NULL ID is assumed to be a wildcard. - * If an entry has a device ID, it must match - * If an entry has a connection ID, it must match - * Then we take the most specific entry - with the following - * order of precedence: dev+con > dev only > con only. - */ -static struct clk *clk_find(const char *dev_id, const char *con_id) -{ - struct clk_lookup *p; - struct clk *clk = NULL; - int match, best = 0; - - list_for_each_entry(p, &clocks, node) { - match = 0; - if (p->dev_id) { - if (!dev_id || strcmp(p->dev_id, dev_id)) - continue; - match += 2; - } - if (p->con_id) { - if (!con_id || strcmp(p->con_id, con_id)) - continue; - match += 1; - } - if (match == 0) - continue; - - if (match > best) { - clk = p->clk; - best = match; - } - } - return clk; -} - -struct clk *clk_get_sys(const char *dev_id, const char *con_id) -{ - struct clk *clk; - - mutex_lock(&clocks_mutex); - clk = clk_find(dev_id, con_id); - mutex_unlock(&clocks_mutex); - - return clk ? clk : ERR_PTR(-ENOENT); -} -EXPORT_SYMBOL(clk_get_sys); - -void clkdev_add(struct clk_lookup *cl) -{ - mutex_lock(&clocks_mutex); - list_add_tail(&cl->node, &clocks); - mutex_unlock(&clocks_mutex); -} -EXPORT_SYMBOL(clkdev_add); - -void __init clkdev_add_table(struct clk_lookup *cl, size_t num) -{ - mutex_lock(&clocks_mutex); - while (num--) { - list_add_tail(&cl->node, &clocks); - cl++; - } - mutex_unlock(&clocks_mutex); -} - -#define MAX_DEV_ID 20 -#define MAX_CON_ID 16 - -struct clk_lookup_alloc { - struct clk_lookup cl; - char dev_id[MAX_DEV_ID]; - char con_id[MAX_CON_ID]; -}; - -struct clk_lookup * __init_refok -clkdev_alloc(struct clk *clk, const char *con_id, const char *dev_fmt, ...) -{ - struct clk_lookup_alloc *cla; - - if (!slab_is_available()) - cla = alloc_bootmem_low_pages(sizeof(*cla)); - else - cla = kzalloc(sizeof(*cla), GFP_KERNEL); - - if (!cla) - return NULL; - - cla->cl.clk = clk; - if (con_id) { - strlcpy(cla->con_id, con_id, sizeof(cla->con_id)); - cla->cl.con_id = cla->con_id; - } - - if (dev_fmt) { - va_list ap; - - va_start(ap, dev_fmt); - vscnprintf(cla->dev_id, sizeof(cla->dev_id), dev_fmt, ap); - cla->cl.dev_id = cla->dev_id; - va_end(ap); - } - - return &cla->cl; -} -EXPORT_SYMBOL(clkdev_alloc); - -int clk_add_alias(const char *alias, const char *alias_dev_name, char *id, - struct device *dev) -{ - struct clk *r = clk_get(dev, id); - struct clk_lookup *l; - - if (IS_ERR(r)) - return PTR_ERR(r); - - l = clkdev_alloc(r, alias, alias_dev_name); - clk_put(r); - if (!l) - return -ENODEV; - clkdev_add(l); - return 0; -} -EXPORT_SYMBOL(clk_add_alias); - -/* - * clkdev_drop - remove a clock dynamically allocated - */ -void clkdev_drop(struct clk_lookup *cl) -{ - struct clk_lookup_alloc *cla = container_of(cl, struct clk_lookup_alloc, cl); - - mutex_lock(&clocks_mutex); - list_del(&cl->node); - mutex_unlock(&clocks_mutex); - kfree(cla); -} -EXPORT_SYMBOL(clkdev_drop); diff --git a/arch/sh/kernel/cpu/clock-cpg.c b/arch/sh/kernel/cpu/clock-cpg.c index e2f63d68da51..dd0e0f211359 100644 --- a/arch/sh/kernel/cpu/clock-cpg.c +++ b/arch/sh/kernel/cpu/clock-cpg.c @@ -2,7 +2,7 @@ #include #include #include -#include +#include #include static struct clk master_clk = { diff --git a/arch/sh/kernel/cpu/clock.c b/arch/sh/kernel/cpu/clock.c index 50f887dda565..4187cf4fe185 100644 --- a/arch/sh/kernel/cpu/clock.c +++ b/arch/sh/kernel/cpu/clock.c @@ -48,20 +48,4 @@ int __init clk_init(void) return ret; } -/* - * Returns a clock. Note that we first try to use device id on the bus - * and clock name. If this fails, we try to use clock name only. - */ -struct clk *clk_get(struct device *dev, const char *con_id) -{ - const char *dev_id = dev ? dev_name(dev) : NULL; - - return clk_get_sys(dev_id, con_id); -} -EXPORT_SYMBOL_GPL(clk_get); - -void clk_put(struct clk *clk) -{ -} -EXPORT_SYMBOL_GPL(clk_put); diff --git a/arch/sh/kernel/cpu/sh4/clock-sh4-202.c b/arch/sh/kernel/cpu/sh4/clock-sh4-202.c index 4eabc68cd753..6c1492b8431a 100644 --- a/arch/sh/kernel/cpu/sh4/clock-sh4-202.c +++ b/arch/sh/kernel/cpu/sh4/clock-sh4-202.c @@ -13,7 +13,7 @@ #include #include #include -#include +#include #include #include diff --git a/arch/sh/kernel/cpu/sh4a/clock-sh7343.c b/arch/sh/kernel/cpu/sh4a/clock-sh7343.c index 71291ae201b9..93c646072c1b 100644 --- a/arch/sh/kernel/cpu/sh4a/clock-sh7343.c +++ b/arch/sh/kernel/cpu/sh4a/clock-sh7343.c @@ -21,7 +21,7 @@ #include #include #include -#include +#include #include /* SH7343 registers */ diff --git a/arch/sh/kernel/cpu/sh4a/clock-sh7366.c b/arch/sh/kernel/cpu/sh4a/clock-sh7366.c index 7ce5bbcd4084..049dc0628ccc 100644 --- a/arch/sh/kernel/cpu/sh4a/clock-sh7366.c +++ b/arch/sh/kernel/cpu/sh4a/clock-sh7366.c @@ -21,7 +21,7 @@ #include #include #include -#include +#include #include /* SH7366 registers */ diff --git a/arch/sh/kernel/cpu/sh4a/clock-sh7722.c b/arch/sh/kernel/cpu/sh4a/clock-sh7722.c index 2030f3d9fac7..9d23a36f0647 100644 --- a/arch/sh/kernel/cpu/sh4a/clock-sh7722.c +++ b/arch/sh/kernel/cpu/sh4a/clock-sh7722.c @@ -21,7 +21,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/sh/kernel/cpu/sh4a/clock-sh7723.c b/arch/sh/kernel/cpu/sh4a/clock-sh7723.c index d3938f0d3702..55493cd5bd8f 100644 --- a/arch/sh/kernel/cpu/sh4a/clock-sh7723.c +++ b/arch/sh/kernel/cpu/sh4a/clock-sh7723.c @@ -22,7 +22,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/sh/kernel/cpu/sh4a/clock-sh7724.c b/arch/sh/kernel/cpu/sh4a/clock-sh7724.c index 2d9700c6b53a..527936bb3ce0 100644 --- a/arch/sh/kernel/cpu/sh4a/clock-sh7724.c +++ b/arch/sh/kernel/cpu/sh4a/clock-sh7724.c @@ -22,7 +22,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/sh/kernel/cpu/sh4a/clock-sh7757.c b/arch/sh/kernel/cpu/sh4a/clock-sh7757.c index ce39a2ae8c6c..e073e3eb4c3d 100644 --- a/arch/sh/kernel/cpu/sh4a/clock-sh7757.c +++ b/arch/sh/kernel/cpu/sh4a/clock-sh7757.c @@ -12,7 +12,7 @@ #include #include #include -#include +#include #include #include diff --git a/arch/sh/kernel/cpu/sh4a/clock-sh7763.c b/arch/sh/kernel/cpu/sh4a/clock-sh7763.c index 1f1df48008cd..599630fc4d3b 100644 --- a/arch/sh/kernel/cpu/sh4a/clock-sh7763.c +++ b/arch/sh/kernel/cpu/sh4a/clock-sh7763.c @@ -13,7 +13,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/sh/kernel/cpu/sh4a/clock-sh7780.c b/arch/sh/kernel/cpu/sh4a/clock-sh7780.c index 62d706350060..8894926479a6 100644 --- a/arch/sh/kernel/cpu/sh4a/clock-sh7780.c +++ b/arch/sh/kernel/cpu/sh4a/clock-sh7780.c @@ -12,7 +12,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/sh/kernel/cpu/sh4a/clock-sh7785.c b/arch/sh/kernel/cpu/sh4a/clock-sh7785.c index c3e458aaa2b7..2d960247f3eb 100644 --- a/arch/sh/kernel/cpu/sh4a/clock-sh7785.c +++ b/arch/sh/kernel/cpu/sh4a/clock-sh7785.c @@ -14,7 +14,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/sh/kernel/cpu/sh4a/clock-sh7786.c b/arch/sh/kernel/cpu/sh4a/clock-sh7786.c index 597c9fbe49c6..42e403be9076 100644 --- a/arch/sh/kernel/cpu/sh4a/clock-sh7786.c +++ b/arch/sh/kernel/cpu/sh4a/clock-sh7786.c @@ -13,7 +13,7 @@ #include #include #include -#include +#include #include #include diff --git a/arch/sh/kernel/cpu/sh4a/clock-shx3.c b/arch/sh/kernel/cpu/sh4a/clock-shx3.c index 4f70df6b6169..1afdb93b8ccb 100644 --- a/arch/sh/kernel/cpu/sh4a/clock-shx3.c +++ b/arch/sh/kernel/cpu/sh4a/clock-shx3.c @@ -14,7 +14,7 @@ #include #include #include -#include +#include #include #include diff --git a/drivers/Kconfig b/drivers/Kconfig index a2b902f4d437..3d93b3a3d630 100644 --- a/drivers/Kconfig +++ b/drivers/Kconfig @@ -111,4 +111,6 @@ source "drivers/xen/Kconfig" source "drivers/staging/Kconfig" source "drivers/platform/Kconfig" + +source "drivers/clk/Kconfig" endmenu diff --git a/drivers/Makefile b/drivers/Makefile index 14cf9077bb2b..4af7d5b124ce 100644 --- a/drivers/Makefile +++ b/drivers/Makefile @@ -114,3 +114,5 @@ obj-$(CONFIG_VLYNQ) += vlynq/ obj-$(CONFIG_STAGING) += staging/ obj-y += platform/ obj-y += ieee802154/ +#common clk code +obj-y += clk/ diff --git a/drivers/clk/Kconfig b/drivers/clk/Kconfig new file mode 100644 index 000000000000..4168c8896e16 --- /dev/null +++ b/drivers/clk/Kconfig @@ -0,0 +1,4 @@ + +config CLKDEV_LOOKUP + bool + select HAVE_CLK diff --git a/drivers/clk/Makefile b/drivers/clk/Makefile new file mode 100644 index 000000000000..07613fa172c9 --- /dev/null +++ b/drivers/clk/Makefile @@ -0,0 +1,2 @@ + +obj-$(CONFIG_CLKDEV_LOOKUP) += clkdev.o diff --git a/drivers/clk/clkdev.c b/drivers/clk/clkdev.c new file mode 100644 index 000000000000..0fc0a79852de --- /dev/null +++ b/drivers/clk/clkdev.c @@ -0,0 +1,176 @@ +/* + * drivers/clk/clkdev.c + * + * Copyright (C) 2008 Russell King. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Helper for the clk API to assist looking up a struct clk. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static LIST_HEAD(clocks); +static DEFINE_MUTEX(clocks_mutex); + +/* + * Find the correct struct clk for the device and connection ID. + * We do slightly fuzzy matching here: + * An entry with a NULL ID is assumed to be a wildcard. + * If an entry has a device ID, it must match + * If an entry has a connection ID, it must match + * Then we take the most specific entry - with the following + * order of precedence: dev+con > dev only > con only. + */ +static struct clk *clk_find(const char *dev_id, const char *con_id) +{ + struct clk_lookup *p; + struct clk *clk = NULL; + int match, best = 0; + + list_for_each_entry(p, &clocks, node) { + match = 0; + if (p->dev_id) { + if (!dev_id || strcmp(p->dev_id, dev_id)) + continue; + match += 2; + } + if (p->con_id) { + if (!con_id || strcmp(p->con_id, con_id)) + continue; + match += 1; + } + + if (match > best) { + clk = p->clk; + if (match != 3) + best = match; + else + break; + } + } + return clk; +} + +struct clk *clk_get_sys(const char *dev_id, const char *con_id) +{ + struct clk *clk; + + mutex_lock(&clocks_mutex); + clk = clk_find(dev_id, con_id); + if (clk && !__clk_get(clk)) + clk = NULL; + mutex_unlock(&clocks_mutex); + + return clk ? clk : ERR_PTR(-ENOENT); +} +EXPORT_SYMBOL(clk_get_sys); + +struct clk *clk_get(struct device *dev, const char *con_id) +{ + const char *dev_id = dev ? dev_name(dev) : NULL; + + return clk_get_sys(dev_id, con_id); +} +EXPORT_SYMBOL(clk_get); + +void clk_put(struct clk *clk) +{ + __clk_put(clk); +} +EXPORT_SYMBOL(clk_put); + +void clkdev_add(struct clk_lookup *cl) +{ + mutex_lock(&clocks_mutex); + list_add_tail(&cl->node, &clocks); + mutex_unlock(&clocks_mutex); +} +EXPORT_SYMBOL(clkdev_add); + +void __init clkdev_add_table(struct clk_lookup *cl, size_t num) +{ + mutex_lock(&clocks_mutex); + while (num--) { + list_add_tail(&cl->node, &clocks); + cl++; + } + mutex_unlock(&clocks_mutex); +} + +#define MAX_DEV_ID 20 +#define MAX_CON_ID 16 + +struct clk_lookup_alloc { + struct clk_lookup cl; + char dev_id[MAX_DEV_ID]; + char con_id[MAX_CON_ID]; +}; + +struct clk_lookup * __init_refok +clkdev_alloc(struct clk *clk, const char *con_id, const char *dev_fmt, ...) +{ + struct clk_lookup_alloc *cla; + + cla = __clkdev_alloc(sizeof(*cla)); + if (!cla) + return NULL; + + cla->cl.clk = clk; + if (con_id) { + strlcpy(cla->con_id, con_id, sizeof(cla->con_id)); + cla->cl.con_id = cla->con_id; + } + + if (dev_fmt) { + va_list ap; + + va_start(ap, dev_fmt); + vscnprintf(cla->dev_id, sizeof(cla->dev_id), dev_fmt, ap); + cla->cl.dev_id = cla->dev_id; + va_end(ap); + } + + return &cla->cl; +} +EXPORT_SYMBOL(clkdev_alloc); + +int clk_add_alias(const char *alias, const char *alias_dev_name, char *id, + struct device *dev) +{ + struct clk *r = clk_get(dev, id); + struct clk_lookup *l; + + if (IS_ERR(r)) + return PTR_ERR(r); + + l = clkdev_alloc(r, alias, alias_dev_name); + clk_put(r); + if (!l) + return -ENODEV; + clkdev_add(l); + return 0; +} +EXPORT_SYMBOL(clk_add_alias); + +/* + * clkdev_drop - remove a clock dynamically allocated + */ +void clkdev_drop(struct clk_lookup *cl) +{ + mutex_lock(&clocks_mutex); + list_del(&cl->node); + mutex_unlock(&clocks_mutex); + kfree(cl); +} +EXPORT_SYMBOL(clkdev_drop); diff --git a/include/linux/clkdev.h b/include/linux/clkdev.h new file mode 100644 index 000000000000..457bcb0a310a --- /dev/null +++ b/include/linux/clkdev.h @@ -0,0 +1,36 @@ +/* + * include/linux/clkdev.h + * + * Copyright (C) 2008 Russell King. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Helper for the clk API to assist looking up a struct clk. + */ +#ifndef __CLKDEV_H +#define __CLKDEV_H + +#include + +struct clk; +struct device; + +struct clk_lookup { + struct list_head node; + const char *dev_id; + const char *con_id; + struct clk *clk; +}; + +struct clk_lookup *clkdev_alloc(struct clk *clk, const char *con_id, + const char *dev_fmt, ...); + +void clkdev_add(struct clk_lookup *cl); +void clkdev_drop(struct clk_lookup *cl); + +void clkdev_add_table(struct clk_lookup *, size_t); +int clk_add_alias(const char *, const char *, char *, struct device *); + +#endif -- cgit v1.2.3-71-gd317 From 65500fa94aaeb3475e39c0c5180f188014164ca4 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Thu, 4 Nov 2010 13:06:59 +0100 Subject: ARM: 6467/1: amba: optional PrimeCell core voltage switch On some contemporary sub-micron SoCs, peripherals on the chip have power domain switches, i.e. the voltage to the core may be turned off to conserve power. In the Ux500 we have this for out PrimeCell derivates. This patch makes it possible to specify an (optional) regulator to handle the voltage domain switch on AMBA PrimeCells, modeled very similar to how block clocks are handled. Additional amba_vcore_[enable|disable] calls are supplied to make it possible introduce optional powering off of the core voltage. Using this will require code to spool/unspool any core HW state. Cc: Rabin Vincent Cc: Bengt Jonsson Cc: Jonas Aaberg Signed-off-by: Linus Walleij Signed-off-by: Russell King --- drivers/amba/bus.c | 39 +++++++++++++++++++++++++++++++++++++++ include/linux/amba/bus.h | 8 ++++++++ 2 files changed, 47 insertions(+) (limited to 'include/linux') diff --git a/drivers/amba/bus.c b/drivers/amba/bus.c index 2737b9752205..e7df019d29d4 100644 --- a/drivers/amba/bus.c +++ b/drivers/amba/bus.c @@ -147,6 +147,39 @@ static void amba_put_disable_pclk(struct amba_device *pcdev) clk_put(pclk); } +static int amba_get_enable_vcore(struct amba_device *pcdev) +{ + struct regulator *vcore = regulator_get(&pcdev->dev, "vcore"); + int ret; + + pcdev->vcore = vcore; + + if (IS_ERR(vcore)) { + /* It is OK not to supply a vcore regulator */ + if (PTR_ERR(vcore) == -ENODEV) + return 0; + return PTR_ERR(vcore); + } + + ret = regulator_enable(vcore); + if (ret) { + regulator_put(vcore); + pcdev->vcore = ERR_PTR(-ENODEV); + } + + return ret; +} + +static void amba_put_disable_vcore(struct amba_device *pcdev) +{ + struct regulator *vcore = pcdev->vcore; + + if (!IS_ERR(vcore)) { + regulator_disable(vcore); + regulator_put(vcore); + } +} + /* * These are the device model conversion veneers; they convert the * device model structures to our more specific structures. @@ -159,6 +192,10 @@ static int amba_probe(struct device *dev) int ret; do { + ret = amba_get_enable_vcore(pcdev); + if (ret) + break; + ret = amba_get_enable_pclk(pcdev); if (ret) break; @@ -168,6 +205,7 @@ static int amba_probe(struct device *dev) break; amba_put_disable_pclk(pcdev); + amba_put_disable_vcore(pcdev); } while (0); return ret; @@ -180,6 +218,7 @@ static int amba_remove(struct device *dev) int ret = drv->remove(pcdev); amba_put_disable_pclk(pcdev); + amba_put_disable_vcore(pcdev); return ret; } diff --git a/include/linux/amba/bus.h b/include/linux/amba/bus.h index c6454cca0447..9e7f259346e1 100644 --- a/include/linux/amba/bus.h +++ b/include/linux/amba/bus.h @@ -18,6 +18,7 @@ #include #include #include +#include #define AMBA_NR_IRQS 2 #define AMBA_CID 0xb105f00d @@ -28,6 +29,7 @@ struct amba_device { struct device dev; struct resource res; struct clk *pclk; + struct regulator *vcore; u64 dma_mask; unsigned int periphid; unsigned int irq[AMBA_NR_IRQS]; @@ -71,6 +73,12 @@ void amba_release_regions(struct amba_device *); #define amba_pclk_disable(d) \ do { if (!IS_ERR((d)->pclk)) clk_disable((d)->pclk); } while (0) +#define amba_vcore_enable(d) \ + (IS_ERR((d)->vcore) ? 0 : regulator_enable((d)->vcore)) + +#define amba_vcore_disable(d) \ + do { if (!IS_ERR((d)->vcore)) regulator_disable((d)->vcore); } while (0) + /* Some drivers don't use the struct amba_device */ #define AMBA_CONFIG_BITS(a) (((a) >> 24) & 0xff) #define AMBA_REV_BITS(a) (((a) >> 20) & 0x0f) -- cgit v1.2.3-71-gd317 From 335d7afbfb71faac833734a94240c1e07cf0ead8 Mon Sep 17 00:00:00 2001 From: Gerald Schaefer Date: Mon, 22 Nov 2010 15:47:36 +0100 Subject: mutexes, sched: Introduce arch_mutex_cpu_relax() The spinning mutex implementation uses cpu_relax() in busy loops as a compiler barrier. Depending on the architecture, cpu_relax() may do more than needed in this specific mutex spin loops. On System z we also give up the time slice of the virtual cpu in cpu_relax(), which prevents effective spinning on the mutex. This patch replaces cpu_relax() in the spinning mutex code with arch_mutex_cpu_relax(), which can be defined by each architecture that selects HAVE_ARCH_MUTEX_CPU_RELAX. The default is still cpu_relax(), so this patch should not affect other architectures than System z for now. Signed-off-by: Gerald Schaefer Signed-off-by: Peter Zijlstra LKML-Reference: <1290437256.7455.4.camel@thinkpad> Signed-off-by: Ingo Molnar --- arch/Kconfig | 3 +++ arch/s390/Kconfig | 1 + arch/s390/include/asm/mutex.h | 2 ++ include/linux/mutex.h | 4 ++++ kernel/mutex.c | 2 +- kernel/sched.c | 3 ++- 6 files changed, 13 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/arch/Kconfig b/arch/Kconfig index 8bf0fa652eb6..f78c2be4242b 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -175,4 +175,7 @@ config HAVE_PERF_EVENTS_NMI config HAVE_ARCH_JUMP_LABEL bool +config HAVE_ARCH_MUTEX_CPU_RELAX + bool + source "kernel/gcov/Kconfig" diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index e0b98e71ff47..6c6d7b339aae 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -99,6 +99,7 @@ config S390 select HAVE_KERNEL_LZMA select HAVE_KERNEL_LZO select HAVE_GET_USER_PAGES_FAST + select HAVE_ARCH_MUTEX_CPU_RELAX select ARCH_INLINE_SPIN_TRYLOCK select ARCH_INLINE_SPIN_TRYLOCK_BH select ARCH_INLINE_SPIN_LOCK diff --git a/arch/s390/include/asm/mutex.h b/arch/s390/include/asm/mutex.h index 458c1f7fbc18..688271f5f2e4 100644 --- a/arch/s390/include/asm/mutex.h +++ b/arch/s390/include/asm/mutex.h @@ -7,3 +7,5 @@ */ #include + +#define arch_mutex_cpu_relax() barrier() diff --git a/include/linux/mutex.h b/include/linux/mutex.h index f363bc8fdc74..94b48bd40dd7 100644 --- a/include/linux/mutex.h +++ b/include/linux/mutex.h @@ -160,4 +160,8 @@ extern int mutex_trylock(struct mutex *lock); extern void mutex_unlock(struct mutex *lock); extern int atomic_dec_and_mutex_lock(atomic_t *cnt, struct mutex *lock); +#ifndef CONFIG_HAVE_ARCH_MUTEX_CPU_RELAX +#define arch_mutex_cpu_relax() cpu_relax() +#endif + #endif diff --git a/kernel/mutex.c b/kernel/mutex.c index 200407c1502f..a5889fb28ecf 100644 --- a/kernel/mutex.c +++ b/kernel/mutex.c @@ -199,7 +199,7 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass, * memory barriers as we'll eventually observe the right * values at the cost of a few extra spins. */ - cpu_relax(); + arch_mutex_cpu_relax(); } #endif spin_lock_mutex(&lock->wait_lock, flags); diff --git a/kernel/sched.c b/kernel/sched.c index 3e8a7db951a6..abe7aec55763 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -75,6 +75,7 @@ #include #include +#include #include "sched_cpupri.h" #include "workqueue_sched.h" @@ -3888,7 +3889,7 @@ int mutex_spin_on_owner(struct mutex *lock, struct thread_info *owner) if (task_thread_info(rq->curr) != owner || need_resched()) return 0; - cpu_relax(); + arch_mutex_cpu_relax(); } return 1; -- cgit v1.2.3-71-gd317 From 6c7e550f13f8ad82efb6a5653ae628c2543c1768 Mon Sep 17 00:00:00 2001 From: Franck Bui-Huu Date: Tue, 23 Nov 2010 16:21:43 +0100 Subject: perf: Introduce is_sampling_event() and use it when appropriate. Signed-off-by: Franck Bui-Huu Signed-off-by: Peter Zijlstra LKML-Reference: <1290525705-6265-1-git-send-email-fbuihuu@gmail.com> Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event.c | 2 +- include/linux/perf_event.h | 5 +++++ kernel/perf_event.c | 10 +++++----- 3 files changed, 11 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 7c1a4c35fd41..c01dfec635db 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -442,7 +442,7 @@ static int x86_setup_perfctr(struct perf_event *event) struct hw_perf_event *hwc = &event->hw; u64 config; - if (!hwc->sample_period) { + if (!is_sampling_event(event)) { hwc->sample_period = x86_pmu.max_period; hwc->last_period = hwc->sample_period; local64_set(&hwc->period_left, hwc->sample_period); diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index de2c41758e29..cbf04cc1e630 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -969,6 +969,11 @@ extern int perf_event_overflow(struct perf_event *event, int nmi, struct perf_sample_data *data, struct pt_regs *regs); +static inline bool is_sampling_event(struct perf_event *event) +{ + return event->attr.sample_period != 0; +} + /* * Return 1 for a software event, 0 for a hardware event */ diff --git a/kernel/perf_event.c b/kernel/perf_event.c index 43f757ccf831..880698488c91 100644 --- a/kernel/perf_event.c +++ b/kernel/perf_event.c @@ -2514,7 +2514,7 @@ static int perf_event_period(struct perf_event *event, u64 __user *arg) int ret = 0; u64 value; - if (!event->attr.sample_period) + if (!is_sampling_event(event)) return -EINVAL; if (copy_from_user(&value, arg, sizeof(value))) @@ -4385,7 +4385,7 @@ static void perf_swevent_event(struct perf_event *event, u64 nr, if (!regs) return; - if (!hwc->sample_period) + if (!is_sampling_event(event)) return; if (nr == 1 && hwc->sample_period == 1 && !event->attr.freq) @@ -4548,7 +4548,7 @@ static int perf_swevent_add(struct perf_event *event, int flags) struct hw_perf_event *hwc = &event->hw; struct hlist_head *head; - if (hwc->sample_period) { + if (is_sampling_event(event)) { hwc->last_period = hwc->sample_period; perf_swevent_set_period(event); } @@ -4920,7 +4920,7 @@ static void perf_swevent_start_hrtimer(struct perf_event *event) hrtimer_init(&hwc->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); hwc->hrtimer.function = perf_swevent_hrtimer; - if (hwc->sample_period) { + if (is_sampling_event(event)) { s64 period = local64_read(&hwc->period_left); if (period) { @@ -4941,7 +4941,7 @@ static void perf_swevent_cancel_hrtimer(struct perf_event *event) { struct hw_perf_event *hwc = &event->hw; - if (hwc->sample_period) { + if (is_sampling_event(event)) { ktime_t remaining = hrtimer_get_remaining(&hwc->hrtimer); local64_set(&hwc->period_left, ktime_to_ns(remaining)); -- cgit v1.2.3-71-gd317 From 004417a6d468e24399e383645c068b498eed84ad Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 25 Nov 2010 18:38:29 +0100 Subject: perf, arch: Cleanup perf-pmu init vs lockup-detector The perf hardware pmu got initialized at various points in the boot, some before early_initcall() some after (notably arch_initcall). The problem is that the NMI lockup detector is ran from early_initcall() and expects the hardware pmu to be present. Sanitize this by moving all architecture hardware pmu implementations to initialize at early_initcall() and move the lockup detector to an explicit initcall right after that. Cc: paulus Cc: davem Cc: Michael Cree Cc: Deng-Cheng Zhu Acked-by: Paul Mundt Acked-by: Will Deacon Signed-off-by: Peter Zijlstra LKML-Reference: <1290707759.2145.119.camel@laptop> Signed-off-by: Ingo Molnar --- arch/alpha/include/asm/perf_event.h | 6 ------ arch/alpha/kernel/irq_alpha.c | 2 -- arch/alpha/kernel/perf_event.c | 9 ++++++--- arch/arm/kernel/perf_event.c | 2 +- arch/mips/kernel/perf_event_mipsxx.c | 2 +- arch/powerpc/kernel/e500-pmu.c | 2 +- arch/powerpc/kernel/mpc7450-pmu.c | 2 +- arch/powerpc/kernel/power4-pmu.c | 2 +- arch/powerpc/kernel/power5+-pmu.c | 2 +- arch/powerpc/kernel/power5-pmu.c | 2 +- arch/powerpc/kernel/power6-pmu.c | 2 +- arch/powerpc/kernel/power7-pmu.c | 2 +- arch/powerpc/kernel/ppc970-pmu.c | 2 +- arch/sh/kernel/cpu/sh4/perf_event.c | 2 +- arch/sh/kernel/cpu/sh4a/perf_event.c | 2 +- arch/sparc/include/asm/perf_event.h | 4 ---- arch/sparc/kernel/nmi.c | 2 -- arch/sparc/kernel/perf_event.c | 7 +++++-- arch/x86/include/asm/perf_event.h | 2 -- arch/x86/kernel/cpu/common.c | 1 - arch/x86/kernel/cpu/perf_event.c | 11 +++++++---- include/linux/sched.h | 4 ++++ init/main.c | 1 + kernel/watchdog.c | 7 +++---- 24 files changed, 38 insertions(+), 42 deletions(-) (limited to 'include/linux') diff --git a/arch/alpha/include/asm/perf_event.h b/arch/alpha/include/asm/perf_event.h index fe792ca818f6..5996e7a6757e 100644 --- a/arch/alpha/include/asm/perf_event.h +++ b/arch/alpha/include/asm/perf_event.h @@ -1,10 +1,4 @@ #ifndef __ASM_ALPHA_PERF_EVENT_H #define __ASM_ALPHA_PERF_EVENT_H -#ifdef CONFIG_PERF_EVENTS -extern void init_hw_perf_events(void); -#else -static inline void init_hw_perf_events(void) { } -#endif - #endif /* __ASM_ALPHA_PERF_EVENT_H */ diff --git a/arch/alpha/kernel/irq_alpha.c b/arch/alpha/kernel/irq_alpha.c index 5f77afb88e89..4c8bb374eb0a 100644 --- a/arch/alpha/kernel/irq_alpha.c +++ b/arch/alpha/kernel/irq_alpha.c @@ -112,8 +112,6 @@ init_IRQ(void) wrent(entInt, 0); alpha_mv.init_irq(); - - init_hw_perf_events(); } /* diff --git a/arch/alpha/kernel/perf_event.c b/arch/alpha/kernel/perf_event.c index 1cc49683fb69..3283059b6e85 100644 --- a/arch/alpha/kernel/perf_event.c +++ b/arch/alpha/kernel/perf_event.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include @@ -863,13 +864,13 @@ static void alpha_perf_event_irq_handler(unsigned long la_ptr, /* * Init call to initialise performance events at kernel startup. */ -void __init init_hw_perf_events(void) +int __init init_hw_perf_events(void) { pr_info("Performance events: "); if (!supported_cpu()) { pr_cont("No support for your CPU.\n"); - return; + return 0; } pr_cont("Supported CPU type!\n"); @@ -882,5 +883,7 @@ void __init init_hw_perf_events(void) alpha_pmu = &ev67_pmu; perf_pmu_register(&pmu); -} + return 0; +} +early_initcall(init_hw_perf_events); diff --git a/arch/arm/kernel/perf_event.c b/arch/arm/kernel/perf_event.c index 07a50357492a..d45f70e5f2ee 100644 --- a/arch/arm/kernel/perf_event.c +++ b/arch/arm/kernel/perf_event.c @@ -3038,7 +3038,7 @@ init_hw_perf_events(void) return 0; } -arch_initcall(init_hw_perf_events); +early_initcall(init_hw_perf_events); /* * Callchain handling code. diff --git a/arch/mips/kernel/perf_event_mipsxx.c b/arch/mips/kernel/perf_event_mipsxx.c index 5c7c6fc07565..183e0d226669 100644 --- a/arch/mips/kernel/perf_event_mipsxx.c +++ b/arch/mips/kernel/perf_event_mipsxx.c @@ -1047,6 +1047,6 @@ init_hw_perf_events(void) return 0; } -arch_initcall(init_hw_perf_events); +early_initcall(init_hw_perf_events); #endif /* defined(CONFIG_CPU_MIPS32)... */ diff --git a/arch/powerpc/kernel/e500-pmu.c b/arch/powerpc/kernel/e500-pmu.c index 7c07de0d8943..b150b510510f 100644 --- a/arch/powerpc/kernel/e500-pmu.c +++ b/arch/powerpc/kernel/e500-pmu.c @@ -126,4 +126,4 @@ static int init_e500_pmu(void) return register_fsl_emb_pmu(&e500_pmu); } -arch_initcall(init_e500_pmu); +early_initcall(init_e500_pmu); diff --git a/arch/powerpc/kernel/mpc7450-pmu.c b/arch/powerpc/kernel/mpc7450-pmu.c index 09d72028f317..2cc5e0301d0b 100644 --- a/arch/powerpc/kernel/mpc7450-pmu.c +++ b/arch/powerpc/kernel/mpc7450-pmu.c @@ -414,4 +414,4 @@ static int init_mpc7450_pmu(void) return register_power_pmu(&mpc7450_pmu); } -arch_initcall(init_mpc7450_pmu); +early_initcall(init_mpc7450_pmu); diff --git a/arch/powerpc/kernel/power4-pmu.c b/arch/powerpc/kernel/power4-pmu.c index 2a361cdda635..ead8b3c2649e 100644 --- a/arch/powerpc/kernel/power4-pmu.c +++ b/arch/powerpc/kernel/power4-pmu.c @@ -613,4 +613,4 @@ static int init_power4_pmu(void) return register_power_pmu(&power4_pmu); } -arch_initcall(init_power4_pmu); +early_initcall(init_power4_pmu); diff --git a/arch/powerpc/kernel/power5+-pmu.c b/arch/powerpc/kernel/power5+-pmu.c index 199de527d411..eca0ac595cb6 100644 --- a/arch/powerpc/kernel/power5+-pmu.c +++ b/arch/powerpc/kernel/power5+-pmu.c @@ -682,4 +682,4 @@ static int init_power5p_pmu(void) return register_power_pmu(&power5p_pmu); } -arch_initcall(init_power5p_pmu); +early_initcall(init_power5p_pmu); diff --git a/arch/powerpc/kernel/power5-pmu.c b/arch/powerpc/kernel/power5-pmu.c index 98b6a729a9dd..d5ff0f64a5e6 100644 --- a/arch/powerpc/kernel/power5-pmu.c +++ b/arch/powerpc/kernel/power5-pmu.c @@ -621,4 +621,4 @@ static int init_power5_pmu(void) return register_power_pmu(&power5_pmu); } -arch_initcall(init_power5_pmu); +early_initcall(init_power5_pmu); diff --git a/arch/powerpc/kernel/power6-pmu.c b/arch/powerpc/kernel/power6-pmu.c index 84a607bda8fb..31603927e376 100644 --- a/arch/powerpc/kernel/power6-pmu.c +++ b/arch/powerpc/kernel/power6-pmu.c @@ -544,4 +544,4 @@ static int init_power6_pmu(void) return register_power_pmu(&power6_pmu); } -arch_initcall(init_power6_pmu); +early_initcall(init_power6_pmu); diff --git a/arch/powerpc/kernel/power7-pmu.c b/arch/powerpc/kernel/power7-pmu.c index 852f7b7f6b40..593740fcb799 100644 --- a/arch/powerpc/kernel/power7-pmu.c +++ b/arch/powerpc/kernel/power7-pmu.c @@ -369,4 +369,4 @@ static int init_power7_pmu(void) return register_power_pmu(&power7_pmu); } -arch_initcall(init_power7_pmu); +early_initcall(init_power7_pmu); diff --git a/arch/powerpc/kernel/ppc970-pmu.c b/arch/powerpc/kernel/ppc970-pmu.c index 3fee685de4df..9a6e093858fe 100644 --- a/arch/powerpc/kernel/ppc970-pmu.c +++ b/arch/powerpc/kernel/ppc970-pmu.c @@ -494,4 +494,4 @@ static int init_ppc970_pmu(void) return register_power_pmu(&ppc970_pmu); } -arch_initcall(init_ppc970_pmu); +early_initcall(init_ppc970_pmu); diff --git a/arch/sh/kernel/cpu/sh4/perf_event.c b/arch/sh/kernel/cpu/sh4/perf_event.c index dbf3b4bb71fe..748955df018d 100644 --- a/arch/sh/kernel/cpu/sh4/perf_event.c +++ b/arch/sh/kernel/cpu/sh4/perf_event.c @@ -250,4 +250,4 @@ static int __init sh7750_pmu_init(void) return register_sh_pmu(&sh7750_pmu); } -arch_initcall(sh7750_pmu_init); +early_initcall(sh7750_pmu_init); diff --git a/arch/sh/kernel/cpu/sh4a/perf_event.c b/arch/sh/kernel/cpu/sh4a/perf_event.c index 580276525731..17e6bebfede0 100644 --- a/arch/sh/kernel/cpu/sh4a/perf_event.c +++ b/arch/sh/kernel/cpu/sh4a/perf_event.c @@ -284,4 +284,4 @@ static int __init sh4a_pmu_init(void) return register_sh_pmu(&sh4a_pmu); } -arch_initcall(sh4a_pmu_init); +early_initcall(sh4a_pmu_init); diff --git a/arch/sparc/include/asm/perf_event.h b/arch/sparc/include/asm/perf_event.h index 6e8bfa1786da..4d3dbe3703e9 100644 --- a/arch/sparc/include/asm/perf_event.h +++ b/arch/sparc/include/asm/perf_event.h @@ -4,8 +4,6 @@ #ifdef CONFIG_PERF_EVENTS #include -extern void init_hw_perf_events(void); - #define perf_arch_fetch_caller_regs(regs, ip) \ do { \ unsigned long _pstate, _asi, _pil, _i7, _fp; \ @@ -26,8 +24,6 @@ do { \ (regs)->u_regs[UREG_I6] = _fp; \ (regs)->u_regs[UREG_I7] = _i7; \ } while (0) -#else -static inline void init_hw_perf_events(void) { } #endif #endif diff --git a/arch/sparc/kernel/nmi.c b/arch/sparc/kernel/nmi.c index a4bd7ba74c89..300f810142f5 100644 --- a/arch/sparc/kernel/nmi.c +++ b/arch/sparc/kernel/nmi.c @@ -270,8 +270,6 @@ int __init nmi_init(void) atomic_set(&nmi_active, -1); } } - if (!err) - init_hw_perf_events(); return err; } diff --git a/arch/sparc/kernel/perf_event.c b/arch/sparc/kernel/perf_event.c index 0d6deb55a2ae..75c5b1263970 100644 --- a/arch/sparc/kernel/perf_event.c +++ b/arch/sparc/kernel/perf_event.c @@ -1307,20 +1307,23 @@ static bool __init supported_pmu(void) return false; } -void __init init_hw_perf_events(void) +int __init init_hw_perf_events(void) { pr_info("Performance events: "); if (!supported_pmu()) { pr_cont("No support for PMU type '%s'\n", sparc_pmu_type); - return; + return 0; } pr_cont("Supported PMU type is '%s'\n", sparc_pmu_type); perf_pmu_register(&pmu); register_die_notifier(&perf_event_nmi_notifier); + + return 0; } +early_initcall(init_hw_perf_event); void perf_callchain_kernel(struct perf_callchain_entry *entry, struct pt_regs *regs) diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h index 550e26b1dbb3..d9d4dae305f6 100644 --- a/arch/x86/include/asm/perf_event.h +++ b/arch/x86/include/asm/perf_event.h @@ -125,7 +125,6 @@ union cpuid10_edx { #define IBS_OP_MAX_CNT_EXT 0x007FFFFFULL /* not a register bit mask */ #ifdef CONFIG_PERF_EVENTS -extern void init_hw_perf_events(void); extern void perf_events_lapic_init(void); #define PERF_EVENT_INDEX_OFFSET 0 @@ -156,7 +155,6 @@ extern unsigned long perf_misc_flags(struct pt_regs *regs); } #else -static inline void init_hw_perf_events(void) { } static inline void perf_events_lapic_init(void) { } #endif diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 4b68bda30938..1d59834396bd 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -894,7 +894,6 @@ void __init identify_boot_cpu(void) #else vgetcpu_set_mode(); #endif - init_hw_perf_events(); } void __cpuinit identify_secondary_cpu(struct cpuinfo_x86 *c) diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index c01dfec635db..817d2b195e8e 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -1353,7 +1353,7 @@ static void __init pmu_check_apic(void) pr_info("no hardware sampling interrupt available.\n"); } -void __init init_hw_perf_events(void) +int __init init_hw_perf_events(void) { struct event_constraint *c; int err; @@ -1368,11 +1368,11 @@ void __init init_hw_perf_events(void) err = amd_pmu_init(); break; default: - return; + return 0; } if (err != 0) { pr_cont("no PMU driver, software events only.\n"); - return; + return 0; } pmu_check_apic(); @@ -1380,7 +1380,7 @@ void __init init_hw_perf_events(void) /* sanity check that the hardware exists or is emulated */ if (!check_hw_exists()) { pr_cont("Broken PMU hardware detected, software events only.\n"); - return; + return 0; } pr_cont("%s PMU driver.\n", x86_pmu.name); @@ -1431,7 +1431,10 @@ void __init init_hw_perf_events(void) perf_pmu_register(&pmu); perf_cpu_notifier(x86_pmu_notifier); + + return 0; } +early_initcall(init_hw_perf_events); static inline void x86_pmu_read(struct perf_event *event) { diff --git a/include/linux/sched.h b/include/linux/sched.h index 2c79e921a68b..d2e63d1e725c 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -316,6 +316,7 @@ extern int proc_dowatchdog_thresh(struct ctl_table *table, int write, size_t *lenp, loff_t *ppos); extern unsigned int softlockup_panic; extern int softlockup_thresh; +void lockup_detector_init(void); #else static inline void touch_softlockup_watchdog(void) { @@ -326,6 +327,9 @@ static inline void touch_softlockup_watchdog_sync(void) static inline void touch_all_softlockup_watchdogs(void) { } +static inline void lockup_detector_init(void) +{ +} #endif #ifdef CONFIG_DETECT_HUNG_TASK diff --git a/init/main.c b/init/main.c index 8646401f7a0e..261ad7b3fe0b 100644 --- a/init/main.c +++ b/init/main.c @@ -882,6 +882,7 @@ static int __init kernel_init(void * unused) smp_prepare_cpus(setup_max_cpus); do_pre_smp_initcalls(); + lockup_detector_init(); smp_init(); sched_init_smp(); diff --git a/kernel/watchdog.c b/kernel/watchdog.c index 6e3c41a4024c..cad4e42060a9 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c @@ -547,13 +547,13 @@ static struct notifier_block __cpuinitdata cpu_nfb = { .notifier_call = cpu_callback }; -static int __init spawn_watchdog_task(void) +void __init lockup_detector_init(void) { void *cpu = (void *)(long)smp_processor_id(); int err; if (no_watchdog) - return 0; + return; err = cpu_callback(&cpu_nfb, CPU_UP_PREPARE, cpu); WARN_ON(notifier_to_errno(err)); @@ -561,6 +561,5 @@ static int __init spawn_watchdog_task(void) cpu_callback(&cpu_nfb, CPU_ONLINE, cpu); register_cpu_notifier(&cpu_nfb); - return 0; + return; } -early_initcall(spawn_watchdog_task); -- cgit v1.2.3-71-gd317 From 5a0d2268d259886f0c87131639d19eb4a67b4532 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 23 Nov 2010 10:42:02 +0000 Subject: net: add netif_tx_queue_frozen_or_stopped When testing struct netdev_queue state against FROZEN bit, we also test XOFF bit. We can test both bits at once and save some cycles. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/netdevice.h | 6 ++++-- net/core/netpoll.c | 3 +-- net/core/pktgen.c | 2 +- net/sched/sch_generic.c | 8 +++----- net/sched/sch_teql.c | 3 +-- 5 files changed, 10 insertions(+), 12 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index badf9285fe0d..7c6ae2f4b9ab 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -493,6 +493,8 @@ static inline void napi_synchronize(const struct napi_struct *n) enum netdev_queue_state_t { __QUEUE_STATE_XOFF, __QUEUE_STATE_FROZEN, +#define QUEUE_STATE_XOFF_OR_FROZEN ((1 << __QUEUE_STATE_XOFF) | \ + (1 << __QUEUE_STATE_FROZEN)) }; struct netdev_queue { @@ -1629,9 +1631,9 @@ static inline int netif_queue_stopped(const struct net_device *dev) return netif_tx_queue_stopped(netdev_get_tx_queue(dev, 0)); } -static inline int netif_tx_queue_frozen(const struct netdev_queue *dev_queue) +static inline int netif_tx_queue_frozen_or_stopped(const struct netdev_queue *dev_queue) { - return test_bit(__QUEUE_STATE_FROZEN, &dev_queue->state); + return dev_queue->state & QUEUE_STATE_XOFF_OR_FROZEN; } /** diff --git a/net/core/netpoll.c b/net/core/netpoll.c index 4e98ffac3af0..ee38acb6d463 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -76,8 +76,7 @@ static void queue_process(struct work_struct *work) local_irq_save(flags); __netif_tx_lock(txq, smp_processor_id()); - if (netif_tx_queue_stopped(txq) || - netif_tx_queue_frozen(txq) || + if (netif_tx_queue_frozen_or_stopped(txq) || ops->ndo_start_xmit(skb, dev) != NETDEV_TX_OK) { skb_queue_head(&npinfo->txq, skb); __netif_tx_unlock(txq); diff --git a/net/core/pktgen.c b/net/core/pktgen.c index 2e57830cbeb2..2953b2abc971 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -3527,7 +3527,7 @@ static void pktgen_xmit(struct pktgen_dev *pkt_dev) __netif_tx_lock_bh(txq); - if (unlikely(netif_tx_queue_stopped(txq) || netif_tx_queue_frozen(txq))) { + if (unlikely(netif_tx_queue_frozen_or_stopped(txq))) { ret = NETDEV_TX_BUSY; pkt_dev->last_ok = 0; goto unlock; diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 5dbb3cd96e59..7f0bd8952646 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -60,8 +60,7 @@ static inline struct sk_buff *dequeue_skb(struct Qdisc *q) /* check the reason of requeuing without tx lock first */ txq = netdev_get_tx_queue(dev, skb_get_queue_mapping(skb)); - if (!netif_tx_queue_stopped(txq) && - !netif_tx_queue_frozen(txq)) { + if (!netif_tx_queue_frozen_or_stopped(txq)) { q->gso_skb = NULL; q->q.qlen--; } else @@ -122,7 +121,7 @@ int sch_direct_xmit(struct sk_buff *skb, struct Qdisc *q, spin_unlock(root_lock); HARD_TX_LOCK(dev, txq, smp_processor_id()); - if (!netif_tx_queue_stopped(txq) && !netif_tx_queue_frozen(txq)) + if (!netif_tx_queue_frozen_or_stopped(txq)) ret = dev_hard_start_xmit(skb, dev, txq); HARD_TX_UNLOCK(dev, txq); @@ -144,8 +143,7 @@ int sch_direct_xmit(struct sk_buff *skb, struct Qdisc *q, ret = dev_requeue_skb(skb, q); } - if (ret && (netif_tx_queue_stopped(txq) || - netif_tx_queue_frozen(txq))) + if (ret && netif_tx_queue_frozen_or_stopped(txq)) ret = 0; return ret; diff --git a/net/sched/sch_teql.c b/net/sched/sch_teql.c index 401af9596709..106479a7c94a 100644 --- a/net/sched/sch_teql.c +++ b/net/sched/sch_teql.c @@ -309,8 +309,7 @@ restart: if (__netif_tx_trylock(slave_txq)) { unsigned int length = qdisc_pkt_len(skb); - if (!netif_tx_queue_stopped(slave_txq) && - !netif_tx_queue_frozen(slave_txq) && + if (!netif_tx_queue_frozen_or_stopped(slave_txq) && slave_ops->ndo_start_xmit(skb, slave) == NETDEV_TX_OK) { txq_trans_update(slave_txq); __netif_tx_unlock(slave_txq); -- cgit v1.2.3-71-gd317 From 85beb5869a4f6abb52a7cf8e01de6fa57e9ee47d Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Wed, 24 Nov 2010 16:23:34 -0500 Subject: tracing/slab: Move kmalloc tracepoint out of inline code The tracepoint for kmalloc is in the slab inlined code which causes every instance of kmalloc to have the tracepoint. This patch moves the tracepoint out of the inline code to the slab C file, which removes a large number of inlined trace points. objdump -dr vmlinux.slab| grep 'jmpq.* Signed-off-by: Pekka Enberg --- include/linux/slab_def.h | 33 +++++++++++++-------------------- mm/slab.c | 38 +++++++++++++++++++++++--------------- 2 files changed, 36 insertions(+), 35 deletions(-) (limited to 'include/linux') diff --git a/include/linux/slab_def.h b/include/linux/slab_def.h index 791a502f6906..83203ae9390b 100644 --- a/include/linux/slab_def.h +++ b/include/linux/slab_def.h @@ -138,11 +138,12 @@ void *kmem_cache_alloc(struct kmem_cache *, gfp_t); void *__kmalloc(size_t size, gfp_t flags); #ifdef CONFIG_TRACING -extern void *kmem_cache_alloc_notrace(struct kmem_cache *cachep, gfp_t flags); +extern void *kmem_cache_alloc_trace(size_t size, + struct kmem_cache *cachep, gfp_t flags); extern size_t slab_buffer_size(struct kmem_cache *cachep); #else static __always_inline void * -kmem_cache_alloc_notrace(struct kmem_cache *cachep, gfp_t flags) +kmem_cache_alloc_trace(size_t size, struct kmem_cache *cachep, gfp_t flags) { return kmem_cache_alloc(cachep, flags); } @@ -179,10 +180,7 @@ found: #endif cachep = malloc_sizes[i].cs_cachep; - ret = kmem_cache_alloc_notrace(cachep, flags); - - trace_kmalloc(_THIS_IP_, ret, - size, slab_buffer_size(cachep), flags); + ret = kmem_cache_alloc_trace(size, cachep, flags); return ret; } @@ -194,14 +192,16 @@ extern void *__kmalloc_node(size_t size, gfp_t flags, int node); extern void *kmem_cache_alloc_node(struct kmem_cache *, gfp_t flags, int node); #ifdef CONFIG_TRACING -extern void *kmem_cache_alloc_node_notrace(struct kmem_cache *cachep, - gfp_t flags, - int nodeid); +extern void *kmem_cache_alloc_node_trace(size_t size, + struct kmem_cache *cachep, + gfp_t flags, + int nodeid); #else static __always_inline void * -kmem_cache_alloc_node_notrace(struct kmem_cache *cachep, - gfp_t flags, - int nodeid) +kmem_cache_alloc_node_trace(size_t size, + struct kmem_cache *cachep, + gfp_t flags, + int nodeid) { return kmem_cache_alloc_node(cachep, flags, nodeid); } @@ -210,7 +210,6 @@ kmem_cache_alloc_node_notrace(struct kmem_cache *cachep, static __always_inline void *kmalloc_node(size_t size, gfp_t flags, int node) { struct kmem_cache *cachep; - void *ret; if (__builtin_constant_p(size)) { int i = 0; @@ -234,13 +233,7 @@ found: #endif cachep = malloc_sizes[i].cs_cachep; - ret = kmem_cache_alloc_node_notrace(cachep, flags, node); - - trace_kmalloc_node(_THIS_IP_, ret, - size, slab_buffer_size(cachep), - flags, node); - - return ret; + return kmem_cache_alloc_node_trace(size, cachep, flags, node); } return __kmalloc_node(size, flags, node); } diff --git a/mm/slab.c b/mm/slab.c index b1e40dafbab3..dfcc8885d7d5 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -3653,11 +3653,18 @@ void *kmem_cache_alloc(struct kmem_cache *cachep, gfp_t flags) EXPORT_SYMBOL(kmem_cache_alloc); #ifdef CONFIG_TRACING -void *kmem_cache_alloc_notrace(struct kmem_cache *cachep, gfp_t flags) +void * +kmem_cache_alloc_trace(size_t size, struct kmem_cache *cachep, gfp_t flags) { - return __cache_alloc(cachep, flags, __builtin_return_address(0)); + void *ret; + + ret = __cache_alloc(cachep, flags, __builtin_return_address(0)); + + trace_kmalloc(_RET_IP_, ret, + size, slab_buffer_size(cachep), flags); + return ret; } -EXPORT_SYMBOL(kmem_cache_alloc_notrace); +EXPORT_SYMBOL(kmem_cache_alloc_trace); #endif /** @@ -3705,31 +3712,32 @@ void *kmem_cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid) EXPORT_SYMBOL(kmem_cache_alloc_node); #ifdef CONFIG_TRACING -void *kmem_cache_alloc_node_notrace(struct kmem_cache *cachep, - gfp_t flags, - int nodeid) +void *kmem_cache_alloc_node_trace(size_t size, + struct kmem_cache *cachep, + gfp_t flags, + int nodeid) { - return __cache_alloc_node(cachep, flags, nodeid, + void *ret; + + ret = __cache_alloc_node(cachep, flags, nodeid, __builtin_return_address(0)); + trace_kmalloc_node(_RET_IP_, ret, + size, slab_buffer_size(cachep), + flags, nodeid); + return ret; } -EXPORT_SYMBOL(kmem_cache_alloc_node_notrace); +EXPORT_SYMBOL(kmem_cache_alloc_node_trace); #endif static __always_inline void * __do_kmalloc_node(size_t size, gfp_t flags, int node, void *caller) { struct kmem_cache *cachep; - void *ret; cachep = kmem_find_general_cachep(size, flags); if (unlikely(ZERO_OR_NULL_PTR(cachep))) return cachep; - ret = kmem_cache_alloc_node_notrace(cachep, flags, node); - - trace_kmalloc_node((unsigned long) caller, ret, - size, cachep->buffer_size, flags, node); - - return ret; + return kmem_cache_alloc_node_trace(size, cachep, flags, node); } #if defined(CONFIG_DEBUG_SLAB) || defined(CONFIG_TRACING) -- cgit v1.2.3-71-gd317 From ce6ada35bdf710d16582cc4869c26722547e6f11 Mon Sep 17 00:00:00 2001 From: "Serge E. Hallyn" Date: Thu, 25 Nov 2010 17:11:32 +0000 Subject: security: Define CAP_SYSLOG Privileged syslog operations currently require CAP_SYS_ADMIN. Split this off into a new CAP_SYSLOG privilege which we can sanely take away from a container through the capability bounding set. With this patch, an lxc container can be prevented from messing with the host's syslog (i.e. dmesg -c). Changelog: mar 12 2010: add selinux capability2:cap_syslog perm Changelog: nov 22 2010: . port to new kernel . add a WARN_ONCE if userspace isn't using CAP_SYSLOG Signed-off-by: Serge Hallyn Acked-by: Andrew G. Morgan Acked-By: Kees Cook Cc: James Morris Cc: Michael Kerrisk Cc: Stephen Smalley Cc: "Christopher J. PeBenito" Cc: Eric Paris Signed-off-by: James Morris --- include/linux/capability.h | 7 +++++-- kernel/printk.c | 8 +++++++- security/selinux/include/classmap.h | 2 +- 3 files changed, 13 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/capability.h b/include/linux/capability.h index 90012b9ddbf3..fb16a3699b99 100644 --- a/include/linux/capability.h +++ b/include/linux/capability.h @@ -246,7 +246,6 @@ struct cpu_vfs_cap_data { /* Allow configuration of the secure attention key */ /* Allow administration of the random device */ /* Allow examination and configuration of disk quotas */ -/* Allow configuring the kernel's syslog (printk behaviour) */ /* Allow setting the domainname */ /* Allow setting the hostname */ /* Allow calling bdflush() */ @@ -352,7 +351,11 @@ struct cpu_vfs_cap_data { #define CAP_MAC_ADMIN 33 -#define CAP_LAST_CAP CAP_MAC_ADMIN +/* Allow configuring the kernel's syslog (printk behaviour) */ + +#define CAP_SYSLOG 34 + +#define CAP_LAST_CAP CAP_SYSLOG #define cap_valid(x) ((x) >= 0 && (x) <= CAP_LAST_CAP) diff --git a/kernel/printk.c b/kernel/printk.c index 9a2264fc42ca..0712380737b3 100644 --- a/kernel/printk.c +++ b/kernel/printk.c @@ -283,8 +283,14 @@ int do_syslog(int type, char __user *buf, int len, bool from_file) return -EPERM; if ((type != SYSLOG_ACTION_READ_ALL && type != SYSLOG_ACTION_SIZE_BUFFER) && - !capable(CAP_SYS_ADMIN)) + !capable(CAP_SYSLOG)) { + /* remove after 2.6.38 */ + if (capable(CAP_SYS_ADMIN)) + WARN_ONCE(1, "Attempt to access syslog with " + "CAP_SYS_ADMIN but no CAP_SYSLOG " + "(deprecated and denied).\n"); return -EPERM; + } } error = security_syslog(type); diff --git a/security/selinux/include/classmap.h b/security/selinux/include/classmap.h index 8858d2b2d4b6..7ed3663332ec 100644 --- a/security/selinux/include/classmap.h +++ b/security/selinux/include/classmap.h @@ -142,7 +142,7 @@ struct security_class_mapping secclass_map[] = { "node_bind", "name_connect", NULL } }, { "memprotect", { "mmap_zero", NULL } }, { "peer", { "recv", NULL } }, - { "capability2", { "mac_override", "mac_admin", NULL } }, + { "capability2", { "mac_override", "mac_admin", "syslog", NULL } }, { "kernel_service", { "use_as_override", "create_files_as", NULL } }, { "tun_socket", { COMMON_SOCK_PERMS, NULL } }, -- cgit v1.2.3-71-gd317 From dc88e46029486ed475c71fe1bb696d39511ac8fe Mon Sep 17 00:00:00 2001 From: Mimi Zohar Date: Tue, 23 Nov 2010 17:50:31 -0500 Subject: lib: hex2bin converts ascii hexadecimal string to binary Similar to the kgdb_hex2mem() code, hex2bin converts a string to binary using the hex_to_bin() library call. Changelog: - Replace parameter names with src/dst (based on David Howell's comment) - Add 'const' where needed (based on David Howell's comment) - Replace int with size_t (based on David Howell's comment) Signed-off-by: Mimi Zohar Acked-by: Serge E. Hallyn Acked-by: David Howells Signed-off-by: James Morris --- include/linux/kernel.h | 1 + lib/hexdump.c | 16 ++++++++++++++++ 2 files changed, 17 insertions(+) (limited to 'include/linux') diff --git a/include/linux/kernel.h b/include/linux/kernel.h index a35b4f7332f0..d0fbc043de60 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -265,6 +265,7 @@ static inline char *pack_hex_byte(char *buf, u8 byte) } extern int hex_to_bin(char ch); +extern void hex2bin(u8 *dst, const char *src, size_t count); /* * General tracing related utility functions - trace_printk(), diff --git a/lib/hexdump.c b/lib/hexdump.c index 5d7a4802c562..b66b2bd67952 100644 --- a/lib/hexdump.c +++ b/lib/hexdump.c @@ -33,6 +33,22 @@ int hex_to_bin(char ch) } EXPORT_SYMBOL(hex_to_bin); +/** + * hex2bin - convert an ascii hexadecimal string to its binary representation + * @dst: binary result + * @src: ascii hexadecimal string + * @count: result length + */ +void hex2bin(u8 *dst, const char *src, size_t count) +{ + while (count--) { + *dst = hex_to_bin(*src++) << 4; + *dst += hex_to_bin(*src++); + dst++; + } +} +EXPORT_SYMBOL(hex2bin); + /** * hex_dump_to_buffer - convert a blob of data to "hex ASCII" in memory * @buf: data blob to dump -- cgit v1.2.3-71-gd317 From c749ba912e87ccebd674ae24b97462176c63732e Mon Sep 17 00:00:00 2001 From: Mimi Zohar Date: Tue, 23 Nov 2010 18:54:16 -0500 Subject: key: add tpm_send command Add internal kernel tpm_send() command used to seal/unseal keys. Changelog: - replaced module_put in tpm_send() with new tpm_chip_put() wrapper (suggested by David Howells) - Make tpm_send() cmd argument a 'void *' (suggested by David Howells) Signed-off-by: David Safford Signed-off-by: Mimi Zohar Acked-by: David Howells Acked-by: Serge E. Hallyn Signed-off-by: James Morris --- drivers/char/tpm/tpm.c | 16 ++++++++++++++++ include/linux/tpm.h | 4 ++++ 2 files changed, 20 insertions(+) (limited to 'include/linux') diff --git a/drivers/char/tpm/tpm.c b/drivers/char/tpm/tpm.c index 26c09f3b4a74..068bac858b4a 100644 --- a/drivers/char/tpm/tpm.c +++ b/drivers/char/tpm/tpm.c @@ -780,6 +780,22 @@ int tpm_pcr_extend(u32 chip_num, int pcr_idx, const u8 *hash) } EXPORT_SYMBOL_GPL(tpm_pcr_extend); +int tpm_send(u32 chip_num, void *cmd, size_t buflen) +{ + struct tpm_chip *chip; + int rc; + + chip = tpm_chip_find_get(chip_num); + if (chip == NULL) + return -ENODEV; + + rc = transmit_cmd(chip, cmd, buflen, "attempting tpm_cmd"); + + tpm_chip_put(chip); + return rc; +} +EXPORT_SYMBOL_GPL(tpm_send); + ssize_t tpm_show_pcrs(struct device *dev, struct device_attribute *attr, char *buf) { diff --git a/include/linux/tpm.h b/include/linux/tpm.h index ac5d1c1285d9..fdc718abf83b 100644 --- a/include/linux/tpm.h +++ b/include/linux/tpm.h @@ -31,6 +31,7 @@ extern int tpm_pcr_read(u32 chip_num, int pcr_idx, u8 *res_buf); extern int tpm_pcr_extend(u32 chip_num, int pcr_idx, const u8 *hash); +extern int tpm_send(u32 chip_num, void *cmd, size_t buflen); #else static inline int tpm_pcr_read(u32 chip_num, int pcr_idx, u8 *res_buf) { return -ENODEV; @@ -38,5 +39,8 @@ static inline int tpm_pcr_read(u32 chip_num, int pcr_idx, u8 *res_buf) { static inline int tpm_pcr_extend(u32 chip_num, int pcr_idx, const u8 *hash) { return -ENODEV; } +static inline int tpm_send(u32 chip_num, void *cmd, size_t buflen) { + return -ENODEV; +} #endif #endif -- cgit v1.2.3-71-gd317 From d00a1c72f7f4661212299e6cb132dfa58030bcdb Mon Sep 17 00:00:00 2001 From: Mimi Zohar Date: Tue, 23 Nov 2010 17:50:34 -0500 Subject: keys: add new trusted key-type Define a new kernel key-type called 'trusted'. Trusted keys are random number symmetric keys, generated and RSA-sealed by the TPM. The TPM only unseals the keys, if the boot PCRs and other criteria match. Userspace can only ever see encrypted blobs. Based on suggestions by Jason Gunthorpe, several new options have been added to support additional usages. The new options are: migratable= designates that the key may/may not ever be updated (resealed under a new key, new pcrinfo or new auth.) pcrlock=n extends the designated PCR 'n' with a random value, so that a key sealed to that PCR may not be unsealed again until after a reboot. keyhandle= specifies the sealing/unsealing key handle. keyauth= specifies the sealing/unsealing key auth. blobauth= specifies the sealed data auth. Implementation of a kernel reserved locality for trusted keys will be investigated for a possible future extension. Changelog: - Updated and added examples to Documentation/keys-trusted-encrypted.txt - Moved generic TPM constants to include/linux/tpm_command.h (David Howell's suggestion.) - trusted_defined.c: replaced kzalloc with kmalloc, added pcrlock failure error handling, added const qualifiers where appropriate. - moved to late_initcall - updated from hash to shash (suggestion by David Howells) - reduced worst stack usage (tpm_seal) from 530 to 312 bytes - moved documentation to Documentation directory (suggestion by David Howells) - all the other code cleanups suggested by David Howells - Add pcrlock CAP_SYS_ADMIN dependency (based on comment by Jason Gunthorpe) - New options: migratable, pcrlock, keyhandle, keyauth, blobauth (based on discussions with Jason Gunthorpe) - Free payload on failure to create key(reported/fixed by Roberto Sassu) - Updated Kconfig and other descriptions (based on Serge Hallyn's suggestion) - Replaced kzalloc() with kmalloc() (reported by Serge Hallyn) Signed-off-by: David Safford Signed-off-by: Mimi Zohar Signed-off-by: James Morris --- Documentation/keys-trusted-encrypted.txt | 145 ++++ include/keys/trusted-type.h | 31 + include/linux/tpm_command.h | 28 + security/Kconfig | 15 + security/keys/Makefile | 1 + security/keys/trusted_defined.c | 1151 ++++++++++++++++++++++++++++++ security/keys/trusted_defined.h | 134 ++++ 7 files changed, 1505 insertions(+) create mode 100644 Documentation/keys-trusted-encrypted.txt create mode 100644 include/keys/trusted-type.h create mode 100644 include/linux/tpm_command.h create mode 100644 security/keys/trusted_defined.c create mode 100644 security/keys/trusted_defined.h (limited to 'include/linux') diff --git a/Documentation/keys-trusted-encrypted.txt b/Documentation/keys-trusted-encrypted.txt new file mode 100644 index 000000000000..8fb79bc1ac4b --- /dev/null +++ b/Documentation/keys-trusted-encrypted.txt @@ -0,0 +1,145 @@ + Trusted and Encrypted Keys + +Trusted and Encrypted Keys are two new key types added to the existing kernel +key ring service. Both of these new types are variable length symmetic keys, +and in both cases all keys are created in the kernel, and user space sees, +stores, and loads only encrypted blobs. Trusted Keys require the availability +of a Trusted Platform Module (TPM) chip for greater security, while Encrypted +Keys can be used on any system. All user level blobs, are displayed and loaded +in hex ascii for convenience, and are integrity verified. + +Trusted Keys use a TPM both to generate and to seal the keys. Keys are sealed +under a 2048 bit RSA key in the TPM, and optionally sealed to specified PCR +(integrity measurement) values, and only unsealed by the TPM, if PCRs and blob +integrity verifications match. A loaded Trusted Key can be updated with new +(future) PCR values, so keys are easily migrated to new pcr values, such as +when the kernel and initramfs are updated. The same key can have many saved +blobs under different PCR values, so multiple boots are easily supported. + +By default, trusted keys are sealed under the SRK, which has the default +authorization value (20 zeros). This can be set at takeownership time with the +trouser's utility: "tpm_takeownership -u -z". + +Usage: + keyctl add trusted name "new keylen [options]" ring + keyctl add trusted name "load hex_blob [pcrlock=pcrnum]" ring + keyctl update key "update [options]" + keyctl print keyid + + options: + keyhandle= ascii hex value of sealing key default 0x40000000 (SRK) + keyauth= ascii hex auth for sealing key default 0x00...i + (40 ascii zeros) + blobauth= ascii hex auth for sealed data default 0x00... + (40 ascii zeros) + blobauth= ascii hex auth for sealed data default 0x00... + (40 ascii zeros) + pcrinfo= ascii hex of PCR_INFO or PCR_INFO_LONG (no default) + pcrlock= pcr number to be extended to "lock" blob + migratable= 0|1 indicating permission to reseal to new PCR values, + default 1 (resealing allowed) + +"keyctl print" returns an ascii hex copy of the sealed key, which is in standard +TPM_STORED_DATA format. The key length for new keys are always in bytes. +Trusted Keys can be 32 - 128 bytes (256 - 1024 bits), the upper limit is to fit +within the 2048 bit SRK (RSA) keylength, with all necessary structure/padding. + +Encrypted keys do not depend on a TPM, and are faster, as they use AES for +encryption/decryption. New keys are created from kernel generated random +numbers, and are encrypted/decrypted using a specified 'master' key. The +'master' key can either be a trusted-key or user-key type. The main +disadvantage of encrypted keys is that if they are not rooted in a trusted key, +they are only as secure as the user key encrypting them. The master user key +should therefore be loaded in as secure a way as possible, preferably early in +boot. + +Usage: + keyctl add encrypted name "new key-type:master-key-name keylen" ring + keyctl add encrypted name "load hex_blob" ring + keyctl update keyid "update key-type:master-key-name" + +where 'key-type' is either 'trusted' or 'user'. + +Examples of trusted and encrypted key usage: + +Create and save a trusted key named "kmk" of length 32 bytes: + + $ keyctl add trusted kmk "new 32" @u + 440502848 + + $ keyctl show + Session Keyring + -3 --alswrv 500 500 keyring: _ses + 97833714 --alswrv 500 -1 \_ keyring: _uid.500 + 440502848 --alswrv 500 500 \_ trusted: kmk + + $ keyctl print 440502848 + 0101000000000000000001005d01b7e3f4a6be5709930f3b70a743cbb42e0cc95e18e915 + 3f60da455bbf1144ad12e4f92b452f966929f6105fd29ca28e4d4d5a031d068478bacb0b + 27351119f822911b0a11ba3d3498ba6a32e50dac7f32894dd890eb9ad578e4e292c83722 + a52e56a097e6a68b3f56f7a52ece0cdccba1eb62cad7d817f6dc58898b3ac15f36026fec + d568bd4a706cb60bb37be6d8f1240661199d640b66fb0fe3b079f97f450b9ef9c22c6d5d + dd379f0facd1cd020281dfa3c70ba21a3fa6fc2471dc6d13ecf8298b946f65345faa5ef0 + f1f8fff03ad0acb083725535636addb08d73dedb9832da198081e5deae84bfaf0409c22b + e4a8aea2b607ec96931e6f4d4fe563ba + + $ keyctl pipe 440502848 > kmk.blob + +Load a trusted key from the saved blob: + + $ keyctl add trusted kmk "load `cat kmk.blob`" @u + 268728824 + + $ keyctl print 268728824 + 0101000000000000000001005d01b7e3f4a6be5709930f3b70a743cbb42e0cc95e18e915 + 3f60da455bbf1144ad12e4f92b452f966929f6105fd29ca28e4d4d5a031d068478bacb0b + 27351119f822911b0a11ba3d3498ba6a32e50dac7f32894dd890eb9ad578e4e292c83722 + a52e56a097e6a68b3f56f7a52ece0cdccba1eb62cad7d817f6dc58898b3ac15f36026fec + d568bd4a706cb60bb37be6d8f1240661199d640b66fb0fe3b079f97f450b9ef9c22c6d5d + dd379f0facd1cd020281dfa3c70ba21a3fa6fc2471dc6d13ecf8298b946f65345faa5ef0 + f1f8fff03ad0acb083725535636addb08d73dedb9832da198081e5deae84bfaf0409c22b + e4a8aea2b607ec96931e6f4d4fe563ba + +Reseal a trusted key under new pcr values: + + $ keyctl update 268728824 "update pcrinfo=`cat pcr.blob`" + $ keyctl print 268728824 + 010100000000002c0002800093c35a09b70fff26e7a98ae786c641e678ec6ffb6b46d805 + 77c8a6377aed9d3219c6dfec4b23ffe3000001005d37d472ac8a44023fbb3d18583a4f73 + d3a076c0858f6f1dcaa39ea0f119911ff03f5406df4f7f27f41da8d7194f45c9f4e00f2e + df449f266253aa3f52e55c53de147773e00f0f9aca86c64d94c95382265968c354c5eab4 + 9638c5ae99c89de1e0997242edfb0b501744e11ff9762dfd951cffd93227cc513384e7e6 + e782c29435c7ec2edafaa2f4c1fe6e7a781b59549ff5296371b42133777dcc5b8b971610 + 94bc67ede19e43ddb9dc2baacad374a36feaf0314d700af0a65c164b7082401740e489c9 + 7ef6a24defe4846104209bf0c3eced7fa1a672ed5b125fc9d8cd88b476a658a4434644ef + df8ae9a178e9f83ba9f08d10fa47e4226b98b0702f06b3b8 + +Create and save an encrypted key "evm" using the above trusted key "kmk": + + $ keyctl add encrypted evm "new trusted:kmk 32" @u + 159771175 + + $ keyctl print 159771175 + trusted:kmk 32 2375725ad57798846a9bbd240de8906f006e66c03af53b1b382dbbc55 + be2a44616e4959430436dc4f2a7a9659aa60bb4652aeb2120f149ed197c564e024717c64 + 5972dcb82ab2dde83376d82b2e3c09ffc + + $ keyctl pipe 159771175 > evm.blob + +Load an encrypted key "evm" from saved blob: + + $ keyctl add encrypted evm "load `cat evm.blob`" @u + 831684262 + + $ keyctl print 831684262 + trusted:kmk 32 2375725ad57798846a9bbd240de8906f006e66c03af53b1b382dbbc55 + be2a44616e4959430436dc4f2a7a9659aa60bb4652aeb2120f149ed197c564e024717c64 + 5972dcb82ab2dde83376d82b2e3c09ffc + + +The initial consumer of trusted keys is EVM, which at boot time needs a high +quality symmetric key for HMAC protection of file metadata. The use of a +trusted key provides strong guarantees that the EVM key has not been +compromised by a user level problem, and when sealed to specific boot PCR +values, protects against boot and offline attacks. Other uses for trusted and +encrypted keys, such as for disk and file encryption are anticipated. diff --git a/include/keys/trusted-type.h b/include/keys/trusted-type.h new file mode 100644 index 000000000000..56f82e5c9975 --- /dev/null +++ b/include/keys/trusted-type.h @@ -0,0 +1,31 @@ +/* + * Copyright (C) 2010 IBM Corporation + * Author: David Safford + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, version 2 of the License. + */ + +#ifndef _KEYS_TRUSTED_TYPE_H +#define _KEYS_TRUSTED_TYPE_H + +#include +#include + +#define MIN_KEY_SIZE 32 +#define MAX_KEY_SIZE 128 +#define MAX_BLOB_SIZE 320 + +struct trusted_key_payload { + struct rcu_head rcu; + unsigned int key_len; + unsigned int blob_len; + unsigned char migratable; + unsigned char key[MAX_KEY_SIZE + 1]; + unsigned char blob[MAX_BLOB_SIZE]; +}; + +extern struct key_type key_type_trusted; + +#endif /* _KEYS_TRUSTED_TYPE_H */ diff --git a/include/linux/tpm_command.h b/include/linux/tpm_command.h new file mode 100644 index 000000000000..727512e249b5 --- /dev/null +++ b/include/linux/tpm_command.h @@ -0,0 +1,28 @@ +#ifndef __LINUX_TPM_COMMAND_H__ +#define __LINUX_TPM_COMMAND_H__ + +/* + * TPM Command constants from specifications at + * http://www.trustedcomputinggroup.org + */ + +/* Command TAGS */ +#define TPM_TAG_RQU_COMMAND 193 +#define TPM_TAG_RQU_AUTH1_COMMAND 194 +#define TPM_TAG_RQU_AUTH2_COMMAND 195 +#define TPM_TAG_RSP_COMMAND 196 +#define TPM_TAG_RSP_AUTH1_COMMAND 197 +#define TPM_TAG_RSP_AUTH2_COMMAND 198 + +/* Command Ordinals */ +#define TPM_ORD_GETRANDOM 70 +#define TPM_ORD_OSAP 11 +#define TPM_ORD_OIAP 10 +#define TPM_ORD_SEAL 23 +#define TPM_ORD_UNSEAL 24 + +/* Other constants */ +#define SRKHANDLE 0x40000000 +#define TPM_NONCE_SIZE 20 + +#endif diff --git a/security/Kconfig b/security/Kconfig index e80da955e687..24b8f9b491b8 100644 --- a/security/Kconfig +++ b/security/Kconfig @@ -21,6 +21,21 @@ config KEYS If you are unsure as to whether this is required, answer N. +config TRUSTED_KEYS + tristate "TRUSTED KEYS" + depends on KEYS && TCG_TPM + select CRYPTO + select CRYPTO_HMAC + select CRYPTO_SHA1 + help + This option provides support for creating, sealing, and unsealing + keys in the kernel. Trusted keys are random number symmetric keys, + generated and RSA-sealed by the TPM. The TPM only unseals the keys, + if the boot PCRs and other criteria match. Userspace will only ever + see encrypted blobs. + + If you are unsure as to whether this is required, answer N. + config KEYS_DEBUG_PROC_KEYS bool "Enable the /proc/keys file by which keys may be viewed" depends on KEYS diff --git a/security/keys/Makefile b/security/keys/Makefile index 74d5447d7df7..fcb107020b4a 100644 --- a/security/keys/Makefile +++ b/security/keys/Makefile @@ -13,6 +13,7 @@ obj-y := \ request_key_auth.o \ user_defined.o +obj-$(CONFIG_TRUSTED_KEYS) += trusted_defined.o obj-$(CONFIG_KEYS_COMPAT) += compat.o obj-$(CONFIG_PROC_FS) += proc.o obj-$(CONFIG_SYSCTL) += sysctl.o diff --git a/security/keys/trusted_defined.c b/security/keys/trusted_defined.c new file mode 100644 index 000000000000..1bec72e7596d --- /dev/null +++ b/security/keys/trusted_defined.c @@ -0,0 +1,1151 @@ +/* + * Copyright (C) 2010 IBM Corporation + * + * Author: + * David Safford + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, version 2 of the License. + * + * See Documentation/keys-trusted-encrypted.txt + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "trusted_defined.h" + +static const char hmac_alg[] = "hmac(sha1)"; +static const char hash_alg[] = "sha1"; + +struct sdesc { + struct shash_desc shash; + char ctx[]; +}; + +static struct crypto_shash *hashalg; +static struct crypto_shash *hmacalg; + +static struct sdesc *init_sdesc(struct crypto_shash *alg) +{ + struct sdesc *sdesc; + int size; + + size = sizeof(struct shash_desc) + crypto_shash_descsize(alg); + sdesc = kmalloc(size, GFP_KERNEL); + if (!sdesc) + return ERR_PTR(-ENOMEM); + sdesc->shash.tfm = alg; + sdesc->shash.flags = 0x0; + return sdesc; +} + +static int TSS_sha1(const unsigned char *data, const unsigned int datalen, + unsigned char *digest) +{ + struct sdesc *sdesc; + int ret; + + sdesc = init_sdesc(hashalg); + if (IS_ERR(sdesc)) { + pr_info("trusted_key: can't alloc %s\n", hash_alg); + return PTR_ERR(sdesc); + } + + ret = crypto_shash_digest(&sdesc->shash, data, datalen, digest); + kfree(sdesc); + return ret; +} + +static int TSS_rawhmac(unsigned char *digest, const unsigned char *key, + const unsigned int keylen, ...) +{ + struct sdesc *sdesc; + va_list argp; + unsigned int dlen; + unsigned char *data; + int ret; + + sdesc = init_sdesc(hmacalg); + if (IS_ERR(sdesc)) { + pr_info("trusted_key: can't alloc %s\n", hmac_alg); + return PTR_ERR(sdesc); + } + + ret = crypto_shash_setkey(hmacalg, key, keylen); + if (ret < 0) + goto out; + ret = crypto_shash_init(&sdesc->shash); + if (ret < 0) + goto out; + + va_start(argp, keylen); + for (;;) { + dlen = va_arg(argp, unsigned int); + if (dlen == 0) + break; + data = va_arg(argp, unsigned char *); + if (data == NULL) + return -EINVAL; + ret = crypto_shash_update(&sdesc->shash, data, dlen); + if (ret < 0) + goto out; + } + va_end(argp); + ret = crypto_shash_final(&sdesc->shash, digest); +out: + kfree(sdesc); + return ret; +} + +/* + * calculate authorization info fields to send to TPM + */ +static uint32_t TSS_authhmac(unsigned char *digest, const unsigned char *key, + const unsigned int keylen, unsigned char *h1, + unsigned char *h2, unsigned char h3, ...) +{ + unsigned char paramdigest[SHA1_DIGEST_SIZE]; + struct sdesc *sdesc; + unsigned int dlen; + unsigned char *data; + unsigned char c; + int ret; + va_list argp; + + sdesc = init_sdesc(hashalg); + if (IS_ERR(sdesc)) { + pr_info("trusted_key: can't alloc %s\n", hash_alg); + return PTR_ERR(sdesc); + } + + c = h3; + ret = crypto_shash_init(&sdesc->shash); + if (ret < 0) + goto out; + va_start(argp, h3); + for (;;) { + dlen = va_arg(argp, unsigned int); + if (dlen == 0) + break; + data = va_arg(argp, unsigned char *); + ret = crypto_shash_update(&sdesc->shash, data, dlen); + if (ret < 0) + goto out; + } + va_end(argp); + ret = crypto_shash_final(&sdesc->shash, paramdigest); + if (!ret) + TSS_rawhmac(digest, key, keylen, SHA1_DIGEST_SIZE, + paramdigest, TPM_NONCE_SIZE, h1, + TPM_NONCE_SIZE, h2, 1, &c, 0, 0); +out: + kfree(sdesc); + return ret; +} + +/* + * verify the AUTH1_COMMAND (Seal) result from TPM + */ +static uint32_t TSS_checkhmac1(unsigned char *buffer, + const uint32_t command, + const unsigned char *ononce, + const unsigned char *key, + const unsigned int keylen, ...) +{ + uint32_t bufsize; + uint16_t tag; + uint32_t ordinal; + uint32_t result; + unsigned char *enonce; + unsigned char *continueflag; + unsigned char *authdata; + unsigned char testhmac[SHA1_DIGEST_SIZE]; + unsigned char paramdigest[SHA1_DIGEST_SIZE]; + struct sdesc *sdesc; + unsigned int dlen; + unsigned int dpos; + va_list argp; + int ret; + + bufsize = LOAD32(buffer, TPM_SIZE_OFFSET); + tag = LOAD16(buffer, 0); + ordinal = command; + result = LOAD32N(buffer, TPM_RETURN_OFFSET); + if (tag == TPM_TAG_RSP_COMMAND) + return 0; + if (tag != TPM_TAG_RSP_AUTH1_COMMAND) + return -EINVAL; + authdata = buffer + bufsize - SHA1_DIGEST_SIZE; + continueflag = authdata - 1; + enonce = continueflag - TPM_NONCE_SIZE; + + sdesc = init_sdesc(hashalg); + if (IS_ERR(sdesc)) { + pr_info("trusted_key: can't alloc %s\n", hash_alg); + return PTR_ERR(sdesc); + } + ret = crypto_shash_init(&sdesc->shash); + if (ret < 0) + goto out; + ret = crypto_shash_update(&sdesc->shash, (const u8 *)&result, + sizeof result); + if (ret < 0) + goto out; + ret = crypto_shash_update(&sdesc->shash, (const u8 *)&ordinal, + sizeof ordinal); + if (ret < 0) + goto out; + va_start(argp, keylen); + for (;;) { + dlen = va_arg(argp, unsigned int); + if (dlen == 0) + break; + dpos = va_arg(argp, unsigned int); + ret = crypto_shash_update(&sdesc->shash, buffer + dpos, dlen); + if (ret < 0) + goto out; + } + va_end(argp); + ret = crypto_shash_final(&sdesc->shash, paramdigest); + if (ret < 0) + goto out; + ret = TSS_rawhmac(testhmac, key, keylen, SHA1_DIGEST_SIZE, paramdigest, + TPM_NONCE_SIZE, enonce, TPM_NONCE_SIZE, ononce, + 1, continueflag, 0, 0); + if (ret < 0) + goto out; + if (memcmp(testhmac, authdata, SHA1_DIGEST_SIZE)) + ret = -EINVAL; +out: + kfree(sdesc); + return ret; +} + +/* + * verify the AUTH2_COMMAND (unseal) result from TPM + */ +static uint32_t TSS_checkhmac2(unsigned char *buffer, + const uint32_t command, + const unsigned char *ononce, + const unsigned char *key1, + const unsigned int keylen1, + const unsigned char *key2, + const unsigned int keylen2, ...) +{ + uint32_t bufsize; + uint16_t tag; + uint32_t ordinal; + uint32_t result; + unsigned char *enonce1; + unsigned char *continueflag1; + unsigned char *authdata1; + unsigned char *enonce2; + unsigned char *continueflag2; + unsigned char *authdata2; + unsigned char testhmac1[SHA1_DIGEST_SIZE]; + unsigned char testhmac2[SHA1_DIGEST_SIZE]; + unsigned char paramdigest[SHA1_DIGEST_SIZE]; + struct sdesc *sdesc; + unsigned int dlen; + unsigned int dpos; + va_list argp; + int ret; + + bufsize = LOAD32(buffer, TPM_SIZE_OFFSET); + tag = LOAD16(buffer, 0); + ordinal = command; + result = LOAD32N(buffer, TPM_RETURN_OFFSET); + + if (tag == TPM_TAG_RSP_COMMAND) + return 0; + if (tag != TPM_TAG_RSP_AUTH2_COMMAND) + return -EINVAL; + authdata1 = buffer + bufsize - (SHA1_DIGEST_SIZE + 1 + + SHA1_DIGEST_SIZE + SHA1_DIGEST_SIZE); + authdata2 = buffer + bufsize - (SHA1_DIGEST_SIZE); + continueflag1 = authdata1 - 1; + continueflag2 = authdata2 - 1; + enonce1 = continueflag1 - TPM_NONCE_SIZE; + enonce2 = continueflag2 - TPM_NONCE_SIZE; + + sdesc = init_sdesc(hashalg); + if (IS_ERR(sdesc)) { + pr_info("trusted_key: can't alloc %s\n", hash_alg); + return PTR_ERR(sdesc); + } + ret = crypto_shash_init(&sdesc->shash); + if (ret < 0) + goto out; + ret = crypto_shash_update(&sdesc->shash, (const u8 *)&result, + sizeof result); + if (ret < 0) + goto out; + ret = crypto_shash_update(&sdesc->shash, (const u8 *)&ordinal, + sizeof ordinal); + if (ret < 0) + goto out; + + va_start(argp, keylen2); + for (;;) { + dlen = va_arg(argp, unsigned int); + if (dlen == 0) + break; + dpos = va_arg(argp, unsigned int); + ret = crypto_shash_update(&sdesc->shash, buffer + dpos, dlen); + if (ret < 0) + goto out; + } + ret = crypto_shash_final(&sdesc->shash, paramdigest); + if (ret < 0) + goto out; + + ret = TSS_rawhmac(testhmac1, key1, keylen1, SHA1_DIGEST_SIZE, + paramdigest, TPM_NONCE_SIZE, enonce1, + TPM_NONCE_SIZE, ononce, 1, continueflag1, 0, 0); + if (memcmp(testhmac1, authdata1, SHA1_DIGEST_SIZE)) { + ret = -EINVAL; + goto out; + } + ret = TSS_rawhmac(testhmac2, key2, keylen2, SHA1_DIGEST_SIZE, + paramdigest, TPM_NONCE_SIZE, enonce2, + TPM_NONCE_SIZE, ononce, 1, continueflag2, 0, 0); + if (memcmp(testhmac2, authdata2, SHA1_DIGEST_SIZE)) + ret = -EINVAL; +out: + kfree(sdesc); + return ret; +} + +/* + * For key specific tpm requests, we will generate and send our + * own TPM command packets using the drivers send function. + */ +static int trusted_tpm_send(const u32 chip_num, unsigned char *cmd, + size_t buflen) +{ + int rc; + + dump_tpm_buf(cmd); + rc = tpm_send(chip_num, cmd, buflen); + dump_tpm_buf(cmd); + if (rc > 0) + /* Can't return positive return codes values to keyctl */ + rc = -EPERM; + return rc; +} + +/* + * get a random value from TPM + */ +static int tpm_get_random(struct tpm_buf *tb, unsigned char *buf, uint32_t len) +{ + int ret; + + INIT_BUF(tb); + store16(tb, TPM_TAG_RQU_COMMAND); + store32(tb, TPM_GETRANDOM_SIZE); + store32(tb, TPM_ORD_GETRANDOM); + store32(tb, len); + ret = trusted_tpm_send(TPM_ANY_NUM, tb->data, sizeof tb->data); + memcpy(buf, tb->data + TPM_GETRANDOM_SIZE, len); + + return ret; +} + +static int my_get_random(unsigned char *buf, int len) +{ + struct tpm_buf *tb; + int ret; + + tb = kzalloc(sizeof *tb, GFP_KERNEL); + if (!tb) + return -ENOMEM; + ret = tpm_get_random(tb, buf, len); + + kfree(tb); + return ret; +} + +/* + * Lock a trusted key, by extending a selected PCR. + * + * Prevents a trusted key that is sealed to PCRs from being accessed. + * This uses the tpm driver's extend function. + */ +static int pcrlock(const int pcrnum) +{ + unsigned char hash[SHA1_DIGEST_SIZE]; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + my_get_random(hash, SHA1_DIGEST_SIZE); + return tpm_pcr_extend(TPM_ANY_NUM, pcrnum, hash) ? -EINVAL : 0; +} + +/* + * Create an object specific authorisation protocol (OSAP) session + */ +static int osap(struct tpm_buf *tb, struct osapsess *s, + const unsigned char *key, const uint16_t type, + const uint32_t handle) +{ + unsigned char enonce[TPM_NONCE_SIZE]; + unsigned char ononce[TPM_NONCE_SIZE]; + int ret; + + ret = tpm_get_random(tb, ononce, TPM_NONCE_SIZE); + if (ret < 0) + return ret; + + INIT_BUF(tb); + store16(tb, TPM_TAG_RQU_COMMAND); + store32(tb, TPM_OSAP_SIZE); + store32(tb, TPM_ORD_OSAP); + store16(tb, type); + store32(tb, handle); + storebytes(tb, ononce, TPM_NONCE_SIZE); + + ret = trusted_tpm_send(TPM_ANY_NUM, tb->data, MAX_BUF_SIZE); + if (ret < 0) + return ret; + + s->handle = LOAD32(tb->data, TPM_DATA_OFFSET); + memcpy(s->enonce, &(tb->data[TPM_DATA_OFFSET + sizeof(uint32_t)]), + TPM_NONCE_SIZE); + memcpy(enonce, &(tb->data[TPM_DATA_OFFSET + sizeof(uint32_t) + + TPM_NONCE_SIZE]), TPM_NONCE_SIZE); + ret = TSS_rawhmac(s->secret, key, SHA1_DIGEST_SIZE, TPM_NONCE_SIZE, + enonce, TPM_NONCE_SIZE, ononce, 0, 0); + return ret; +} + +/* + * Create an object independent authorisation protocol (oiap) session + */ +static int oiap(struct tpm_buf *tb, uint32_t *handle, unsigned char *nonce) +{ + int ret; + + INIT_BUF(tb); + store16(tb, TPM_TAG_RQU_COMMAND); + store32(tb, TPM_OIAP_SIZE); + store32(tb, TPM_ORD_OIAP); + ret = trusted_tpm_send(TPM_ANY_NUM, tb->data, MAX_BUF_SIZE); + if (ret < 0) + return ret; + + *handle = LOAD32(tb->data, TPM_DATA_OFFSET); + memcpy(nonce, &tb->data[TPM_DATA_OFFSET + sizeof(uint32_t)], + TPM_NONCE_SIZE); + return ret; +} + +struct tpm_digests { + unsigned char encauth[SHA1_DIGEST_SIZE]; + unsigned char pubauth[SHA1_DIGEST_SIZE]; + unsigned char xorwork[SHA1_DIGEST_SIZE * 2]; + unsigned char xorhash[SHA1_DIGEST_SIZE]; + unsigned char nonceodd[TPM_NONCE_SIZE]; +}; + +/* + * Have the TPM seal(encrypt) the trusted key, possibly based on + * Platform Configuration Registers (PCRs). AUTH1 for sealing key. + */ +static int tpm_seal(struct tpm_buf *tb, const uint16_t keytype, + const uint32_t keyhandle, const unsigned char *keyauth, + const unsigned char *data, const uint32_t datalen, + unsigned char *blob, uint32_t *bloblen, + const unsigned char *blobauth, + const unsigned char *pcrinfo, const uint32_t pcrinfosize) +{ + struct osapsess sess; + struct tpm_digests *td; + unsigned char cont; + uint32_t ordinal; + uint32_t pcrsize; + uint32_t datsize; + int sealinfosize; + int encdatasize; + int storedsize; + int ret; + int i; + + /* alloc some work space for all the hashes */ + td = kmalloc(sizeof *td, GFP_KERNEL); + if (!td) + return -ENOMEM; + + /* get session for sealing key */ + ret = osap(tb, &sess, keyauth, keytype, keyhandle); + if (ret < 0) + return ret; + dump_sess(&sess); + + /* calculate encrypted authorization value */ + memcpy(td->xorwork, sess.secret, SHA1_DIGEST_SIZE); + memcpy(td->xorwork + SHA1_DIGEST_SIZE, sess.enonce, SHA1_DIGEST_SIZE); + ret = TSS_sha1(td->xorwork, SHA1_DIGEST_SIZE * 2, td->xorhash); + if (ret < 0) + return ret; + + ret = tpm_get_random(tb, td->nonceodd, TPM_NONCE_SIZE); + if (ret < 0) + return ret; + ordinal = htonl(TPM_ORD_SEAL); + datsize = htonl(datalen); + pcrsize = htonl(pcrinfosize); + cont = 0; + + /* encrypt data authorization key */ + for (i = 0; i < SHA1_DIGEST_SIZE; ++i) + td->encauth[i] = td->xorhash[i] ^ blobauth[i]; + + /* calculate authorization HMAC value */ + if (pcrinfosize == 0) { + /* no pcr info specified */ + TSS_authhmac(td->pubauth, sess.secret, SHA1_DIGEST_SIZE, + sess.enonce, td->nonceodd, cont, sizeof(uint32_t), + &ordinal, SHA1_DIGEST_SIZE, td->encauth, + sizeof(uint32_t), &pcrsize, sizeof(uint32_t), + &datsize, datalen, data, 0, 0); + } else { + /* pcr info specified */ + TSS_authhmac(td->pubauth, sess.secret, SHA1_DIGEST_SIZE, + sess.enonce, td->nonceodd, cont, sizeof(uint32_t), + &ordinal, SHA1_DIGEST_SIZE, td->encauth, + sizeof(uint32_t), &pcrsize, pcrinfosize, + pcrinfo, sizeof(uint32_t), &datsize, datalen, + data, 0, 0); + } + + /* build and send the TPM request packet */ + INIT_BUF(tb); + store16(tb, TPM_TAG_RQU_AUTH1_COMMAND); + store32(tb, TPM_SEAL_SIZE + pcrinfosize + datalen); + store32(tb, TPM_ORD_SEAL); + store32(tb, keyhandle); + storebytes(tb, td->encauth, SHA1_DIGEST_SIZE); + store32(tb, pcrinfosize); + storebytes(tb, pcrinfo, pcrinfosize); + store32(tb, datalen); + storebytes(tb, data, datalen); + store32(tb, sess.handle); + storebytes(tb, td->nonceodd, TPM_NONCE_SIZE); + store8(tb, cont); + storebytes(tb, td->pubauth, SHA1_DIGEST_SIZE); + + ret = trusted_tpm_send(TPM_ANY_NUM, tb->data, MAX_BUF_SIZE); + if (ret < 0) + return ret; + + /* calculate the size of the returned Blob */ + sealinfosize = LOAD32(tb->data, TPM_DATA_OFFSET + sizeof(uint32_t)); + encdatasize = LOAD32(tb->data, TPM_DATA_OFFSET + sizeof(uint32_t) + + sizeof(uint32_t) + sealinfosize); + storedsize = sizeof(uint32_t) + sizeof(uint32_t) + sealinfosize + + sizeof(uint32_t) + encdatasize; + + /* check the HMAC in the response */ + ret = TSS_checkhmac1(tb->data, ordinal, td->nonceodd, sess.secret, + SHA1_DIGEST_SIZE, storedsize, TPM_DATA_OFFSET, 0, + 0); + + /* copy the returned blob to caller */ + memcpy(blob, tb->data + TPM_DATA_OFFSET, storedsize); + *bloblen = storedsize; + return ret; +} + +/* + * use the AUTH2_COMMAND form of unseal, to authorize both key and blob + */ +static int tpm_unseal(struct tpm_buf *tb, + const uint32_t keyhandle, const unsigned char *keyauth, + const unsigned char *blob, const int bloblen, + const unsigned char *blobauth, + unsigned char *data, unsigned int *datalen) +{ + unsigned char nonceodd[TPM_NONCE_SIZE]; + unsigned char enonce1[TPM_NONCE_SIZE]; + unsigned char enonce2[TPM_NONCE_SIZE]; + unsigned char authdata1[SHA1_DIGEST_SIZE]; + unsigned char authdata2[SHA1_DIGEST_SIZE]; + uint32_t authhandle1 = 0; + uint32_t authhandle2 = 0; + unsigned char cont = 0; + uint32_t ordinal; + uint32_t keyhndl; + int ret; + + /* sessions for unsealing key and data */ + ret = oiap(tb, &authhandle1, enonce1); + if (ret < 0) { + pr_info("trusted_key: oiap failed (%d)\n", ret); + return ret; + } + ret = oiap(tb, &authhandle2, enonce2); + if (ret < 0) { + pr_info("trusted_key: oiap failed (%d)\n", ret); + return ret; + } + + ordinal = htonl(TPM_ORD_UNSEAL); + keyhndl = htonl(SRKHANDLE); + ret = tpm_get_random(tb, nonceodd, TPM_NONCE_SIZE); + if (ret < 0) { + pr_info("trusted_key: tpm_get_random failed (%d)\n", ret); + return ret; + } + TSS_authhmac(authdata1, keyauth, TPM_NONCE_SIZE, + enonce1, nonceodd, cont, sizeof(uint32_t), + &ordinal, bloblen, blob, 0, 0); + TSS_authhmac(authdata2, blobauth, TPM_NONCE_SIZE, + enonce2, nonceodd, cont, sizeof(uint32_t), + &ordinal, bloblen, blob, 0, 0); + + /* build and send TPM request packet */ + INIT_BUF(tb); + store16(tb, TPM_TAG_RQU_AUTH2_COMMAND); + store32(tb, TPM_UNSEAL_SIZE + bloblen); + store32(tb, TPM_ORD_UNSEAL); + store32(tb, keyhandle); + storebytes(tb, blob, bloblen); + store32(tb, authhandle1); + storebytes(tb, nonceodd, TPM_NONCE_SIZE); + store8(tb, cont); + storebytes(tb, authdata1, SHA1_DIGEST_SIZE); + store32(tb, authhandle2); + storebytes(tb, nonceodd, TPM_NONCE_SIZE); + store8(tb, cont); + storebytes(tb, authdata2, SHA1_DIGEST_SIZE); + + ret = trusted_tpm_send(TPM_ANY_NUM, tb->data, MAX_BUF_SIZE); + if (ret < 0) { + pr_info("trusted_key: authhmac failed (%d)\n", ret); + return ret; + } + + *datalen = LOAD32(tb->data, TPM_DATA_OFFSET); + ret = TSS_checkhmac2(tb->data, ordinal, nonceodd, + keyauth, SHA1_DIGEST_SIZE, + blobauth, SHA1_DIGEST_SIZE, + sizeof(uint32_t), TPM_DATA_OFFSET, + *datalen, TPM_DATA_OFFSET + sizeof(uint32_t), 0, + 0); + if (ret < 0) + pr_info("trusted_key: TSS_checkhmac2 failed (%d)\n", ret); + memcpy(data, tb->data + TPM_DATA_OFFSET + sizeof(uint32_t), *datalen); + return ret; +} + +/* + * Have the TPM seal(encrypt) the symmetric key + */ +static int key_seal(struct trusted_key_payload *p, + struct trusted_key_options *o) +{ + struct tpm_buf *tb; + int ret; + + tb = kzalloc(sizeof *tb, GFP_KERNEL); + if (!tb) + return -ENOMEM; + + /* include migratable flag at end of sealed key */ + p->key[p->key_len] = p->migratable; + + ret = tpm_seal(tb, o->keytype, o->keyhandle, o->keyauth, + p->key, p->key_len + 1, p->blob, &p->blob_len, + o->blobauth, o->pcrinfo, o->pcrinfo_len); + if (ret < 0) + pr_info("trusted_key: srkseal failed (%d)\n", ret); + + kfree(tb); + return ret; +} + +/* + * Have the TPM unseal(decrypt) the symmetric key + */ +static int key_unseal(struct trusted_key_payload *p, + struct trusted_key_options *o) +{ + struct tpm_buf *tb; + int ret; + + tb = kzalloc(sizeof *tb, GFP_KERNEL); + if (!tb) + return -ENOMEM; + + ret = tpm_unseal(tb, o->keyhandle, o->keyauth, p->blob, p->blob_len, + o->blobauth, p->key, &p->key_len); + /* pull migratable flag out of sealed key */ + p->migratable = p->key[--p->key_len]; + + if (ret < 0) + pr_info("trusted_key: srkunseal failed (%d)\n", ret); + + kfree(tb); + return ret; +} + +enum { + Opt_err = -1, + Opt_new, Opt_load, Opt_update, + Opt_keyhandle, Opt_keyauth, Opt_blobauth, + Opt_pcrinfo, Opt_pcrlock, Opt_migratable +}; + +static const match_table_t key_tokens = { + {Opt_new, "new"}, + {Opt_load, "load"}, + {Opt_update, "update"}, + {Opt_keyhandle, "keyhandle=%s"}, + {Opt_keyauth, "keyauth=%s"}, + {Opt_blobauth, "blobauth=%s"}, + {Opt_pcrinfo, "pcrinfo=%s"}, + {Opt_pcrlock, "pcrlock=%s"}, + {Opt_migratable, "migratable=%s"}, + {Opt_err, NULL} +}; + +/* can have zero or more token= options */ +static int getoptions(char *c, struct trusted_key_payload *pay, + struct trusted_key_options *opt) +{ + substring_t args[MAX_OPT_ARGS]; + char *p = c; + int token; + int res; + unsigned long handle; + unsigned long lock; + + while ((p = strsep(&c, " \t"))) { + if (*p == '\0' || *p == ' ' || *p == '\t') + continue; + token = match_token(p, key_tokens, args); + + switch (token) { + case Opt_pcrinfo: + opt->pcrinfo_len = strlen(args[0].from) / 2; + if (opt->pcrinfo_len > MAX_PCRINFO_SIZE) + return -EINVAL; + hex2bin(opt->pcrinfo, args[0].from, opt->pcrinfo_len); + break; + case Opt_keyhandle: + res = strict_strtoul(args[0].from, 16, &handle); + if (res < 0) + return -EINVAL; + opt->keytype = SEAL_keytype; + opt->keyhandle = handle; + break; + case Opt_keyauth: + if (strlen(args[0].from) != 2 * SHA1_DIGEST_SIZE) + return -EINVAL; + hex2bin(opt->keyauth, args[0].from, SHA1_DIGEST_SIZE); + break; + case Opt_blobauth: + if (strlen(args[0].from) != 2 * SHA1_DIGEST_SIZE) + return -EINVAL; + hex2bin(opt->blobauth, args[0].from, SHA1_DIGEST_SIZE); + break; + case Opt_migratable: + if (*args[0].from == '0') + pay->migratable = 0; + else + return -EINVAL; + break; + case Opt_pcrlock: + res = strict_strtoul(args[0].from, 10, &lock); + if (res < 0) + return -EINVAL; + opt->pcrlock = lock; + break; + default: + return -EINVAL; + } + } + return 0; +} + +/* + * datablob_parse - parse the keyctl data and fill in the + * payload and options structures + * + * On success returns 0, otherwise -EINVAL. + */ +static int datablob_parse(char *datablob, struct trusted_key_payload *p, + struct trusted_key_options *o) +{ + substring_t args[MAX_OPT_ARGS]; + long keylen; + int ret = -EINVAL; + int key_cmd; + char *c; + + /* main command */ + c = strsep(&datablob, " \t"); + if (!c) + return -EINVAL; + key_cmd = match_token(c, key_tokens, args); + switch (key_cmd) { + case Opt_new: + /* first argument is key size */ + c = strsep(&datablob, " \t"); + if (!c) + return -EINVAL; + ret = strict_strtol(c, 10, &keylen); + if (ret < 0 || keylen < MIN_KEY_SIZE || keylen > MAX_KEY_SIZE) + return -EINVAL; + p->key_len = keylen; + ret = getoptions(datablob, p, o); + if (ret < 0) + return ret; + ret = Opt_new; + break; + case Opt_load: + /* first argument is sealed blob */ + c = strsep(&datablob, " \t"); + if (!c) + return -EINVAL; + p->blob_len = strlen(c) / 2; + if (p->blob_len > MAX_BLOB_SIZE) + return -EINVAL; + hex2bin(p->blob, c, p->blob_len); + ret = getoptions(datablob, p, o); + if (ret < 0) + return ret; + ret = Opt_load; + break; + case Opt_update: + /* all arguments are options */ + ret = getoptions(datablob, p, o); + if (ret < 0) + return ret; + ret = Opt_update; + break; + case Opt_err: + return -EINVAL; + break; + } + return ret; +} + +static struct trusted_key_options *trusted_options_alloc(void) +{ + struct trusted_key_options *options; + + options = kzalloc(sizeof *options, GFP_KERNEL); + if (!options) + return options; + + /* set any non-zero defaults */ + options->keytype = SRK_keytype; + options->keyhandle = SRKHANDLE; + return options; +} + +static struct trusted_key_payload *trusted_payload_alloc(struct key *key) +{ + struct trusted_key_payload *p = NULL; + int ret; + + ret = key_payload_reserve(key, sizeof *p); + if (ret < 0) + return p; + p = kzalloc(sizeof *p, GFP_KERNEL); + + /* migratable by default */ + p->migratable = 1; + return p; +} + +/* + * trusted_instantiate - create a new trusted key + * + * Unseal an existing trusted blob or, for a new key, get a + * random key, then seal and create a trusted key-type key, + * adding it to the specified keyring. + * + * On success, return 0. Otherwise return errno. + */ +static int trusted_instantiate(struct key *key, const void *data, + const size_t datalen) +{ + struct trusted_key_payload *payload = NULL; + struct trusted_key_options *options = NULL; + char *datablob; + int ret = 0; + int key_cmd; + + if (datalen <= 0 || datalen > 32767 || !data) + return -EINVAL; + + datablob = kmalloc(datalen + 1, GFP_KERNEL); + if (!datablob) + return -ENOMEM; + memcpy(datablob, data, datalen); + datablob[datalen] = '\0'; + + options = trusted_options_alloc(); + if (!options) { + ret = -ENOMEM; + goto out; + } + payload = trusted_payload_alloc(key); + if (!payload) { + ret = -ENOMEM; + goto out; + } + + key_cmd = datablob_parse(datablob, payload, options); + if (key_cmd < 0) { + ret = key_cmd; + goto out; + } + + dump_payload(payload); + dump_options(options); + + switch (key_cmd) { + case Opt_load: + ret = key_unseal(payload, options); + dump_payload(payload); + dump_options(options); + if (ret < 0) + pr_info("trusted_key: key_unseal failed (%d)\n", ret); + break; + case Opt_new: + ret = my_get_random(payload->key, payload->key_len); + if (ret < 0) { + pr_info("trusted_key: key_create failed (%d)\n", ret); + goto out; + } + ret = key_seal(payload, options); + if (ret < 0) + pr_info("trusted_key: key_seal failed (%d)\n", ret); + break; + default: + ret = -EINVAL; + goto out; + } + if (!ret && options->pcrlock) + ret = pcrlock(options->pcrlock); +out: + kfree(datablob); + kfree(options); + if (!ret) + rcu_assign_pointer(key->payload.data, payload); + else + kfree(payload); + return ret; +} + +static void trusted_rcu_free(struct rcu_head *rcu) +{ + struct trusted_key_payload *p; + + p = container_of(rcu, struct trusted_key_payload, rcu); + memset(p->key, 0, p->key_len); + kfree(p); +} + +/* + * trusted_update - reseal an existing key with new PCR values + */ +static int trusted_update(struct key *key, const void *data, + const size_t datalen) +{ + struct trusted_key_payload *p = key->payload.data; + struct trusted_key_payload *new_p; + struct trusted_key_options *new_o; + char *datablob; + int ret = 0; + + if (!p->migratable) + return -EPERM; + if (datalen <= 0 || datalen > 32767 || !data) + return -EINVAL; + + datablob = kmalloc(datalen + 1, GFP_KERNEL); + if (!datablob) + return -ENOMEM; + new_o = trusted_options_alloc(); + if (!new_o) { + ret = -ENOMEM; + goto out; + } + new_p = trusted_payload_alloc(key); + if (!new_p) { + ret = -ENOMEM; + goto out; + } + + memcpy(datablob, data, datalen); + datablob[datalen] = '\0'; + ret = datablob_parse(datablob, new_p, new_o); + if (ret != Opt_update) { + ret = -EINVAL; + goto out; + } + /* copy old key values, and reseal with new pcrs */ + new_p->migratable = p->migratable; + new_p->key_len = p->key_len; + memcpy(new_p->key, p->key, p->key_len); + dump_payload(p); + dump_payload(new_p); + + ret = key_seal(new_p, new_o); + if (ret < 0) { + pr_info("trusted_key: key_seal failed (%d)\n", ret); + kfree(new_p); + goto out; + } + if (new_o->pcrlock) { + ret = pcrlock(new_o->pcrlock); + if (ret < 0) { + pr_info("trusted_key: pcrlock failed (%d)\n", ret); + kfree(new_p); + goto out; + } + } + rcu_assign_pointer(key->payload.data, new_p); + call_rcu(&p->rcu, trusted_rcu_free); +out: + kfree(datablob); + kfree(new_o); + return ret; +} + +/* + * trusted_read - copy the sealed blob data to userspace in hex. + * On success, return to userspace the trusted key datablob size. + */ +static long trusted_read(const struct key *key, char __user *buffer, + size_t buflen) +{ + struct trusted_key_payload *p; + char *ascii_buf; + char *bufp; + int i; + + p = rcu_dereference_protected(key->payload.data, + rwsem_is_locked(&((struct key *)key)->sem)); + if (!p) + return -EINVAL; + if (!buffer || buflen <= 0) + return 2 * p->blob_len; + ascii_buf = kmalloc(2 * p->blob_len, GFP_KERNEL); + if (!ascii_buf) + return -ENOMEM; + + bufp = ascii_buf; + for (i = 0; i < p->blob_len; i++) + bufp = pack_hex_byte(bufp, p->blob[i]); + if ((copy_to_user(buffer, ascii_buf, 2 * p->blob_len)) != 0) { + kfree(ascii_buf); + return -EFAULT; + } + kfree(ascii_buf); + return 2 * p->blob_len; +} + +/* + * trusted_destroy - before freeing the key, clear the decrypted data + */ +static void trusted_destroy(struct key *key) +{ + struct trusted_key_payload *p = key->payload.data; + + if (!p) + return; + memset(p->key, 0, p->key_len); + kfree(key->payload.data); +} + +struct key_type key_type_trusted = { + .name = "trusted", + .instantiate = trusted_instantiate, + .update = trusted_update, + .match = user_match, + .destroy = trusted_destroy, + .describe = user_describe, + .read = trusted_read, +}; + +EXPORT_SYMBOL_GPL(key_type_trusted); + +static void trusted_shash_release(void) +{ + if (hashalg) + crypto_free_shash(hashalg); + if (hmacalg) + crypto_free_shash(hmacalg); +} + +static int __init trusted_shash_alloc(void) +{ + int ret; + + hmacalg = crypto_alloc_shash(hmac_alg, 0, CRYPTO_ALG_ASYNC); + if (IS_ERR(hmacalg)) { + pr_info("trusted_key: could not allocate crypto %s\n", + hmac_alg); + return PTR_ERR(hmacalg); + } + + hashalg = crypto_alloc_shash(hash_alg, 0, CRYPTO_ALG_ASYNC); + if (IS_ERR(hashalg)) { + pr_info("trusted_key: could not allocate crypto %s\n", + hash_alg); + ret = PTR_ERR(hashalg); + goto hashalg_fail; + } + + return 0; + +hashalg_fail: + crypto_free_shash(hmacalg); + return ret; +} + +static int __init init_trusted(void) +{ + int ret; + + ret = trusted_shash_alloc(); + if (ret < 0) + return ret; + ret = register_key_type(&key_type_trusted); + if (ret < 0) + trusted_shash_release(); + return ret; +} + +static void __exit cleanup_trusted(void) +{ + trusted_shash_release(); + unregister_key_type(&key_type_trusted); +} + +late_initcall(init_trusted); +module_exit(cleanup_trusted); + +MODULE_LICENSE("GPL"); diff --git a/security/keys/trusted_defined.h b/security/keys/trusted_defined.h new file mode 100644 index 000000000000..3249fbd2b653 --- /dev/null +++ b/security/keys/trusted_defined.h @@ -0,0 +1,134 @@ +#ifndef __TRUSTED_KEY_H +#define __TRUSTED_KEY_H + +/* implementation specific TPM constants */ +#define MAX_PCRINFO_SIZE 64 +#define MAX_BUF_SIZE 512 +#define TPM_GETRANDOM_SIZE 14 +#define TPM_OSAP_SIZE 36 +#define TPM_OIAP_SIZE 10 +#define TPM_SEAL_SIZE 87 +#define TPM_UNSEAL_SIZE 104 +#define TPM_SIZE_OFFSET 2 +#define TPM_RETURN_OFFSET 6 +#define TPM_DATA_OFFSET 10 + +#define LOAD32(buffer, offset) (ntohl(*(uint32_t *)&buffer[offset])) +#define LOAD32N(buffer, offset) (*(uint32_t *)&buffer[offset]) +#define LOAD16(buffer, offset) (ntohs(*(uint16_t *)&buffer[offset])) + +struct tpm_buf { + int len; + unsigned char data[MAX_BUF_SIZE]; +}; + +#define INIT_BUF(tb) (tb->len = 0) + +struct osapsess { + uint32_t handle; + unsigned char secret[SHA1_DIGEST_SIZE]; + unsigned char enonce[TPM_NONCE_SIZE]; +}; + +/* discrete values, but have to store in uint16_t for TPM use */ +enum { + SEAL_keytype = 1, + SRK_keytype = 4 +}; + +struct trusted_key_options { + uint16_t keytype; + uint32_t keyhandle; + unsigned char keyauth[SHA1_DIGEST_SIZE]; + unsigned char blobauth[SHA1_DIGEST_SIZE]; + uint32_t pcrinfo_len; + unsigned char pcrinfo[MAX_PCRINFO_SIZE]; + int pcrlock; +}; + +#define TPM_DEBUG 0 + +#if TPM_DEBUG +static inline void dump_options(struct trusted_key_options *o) +{ + pr_info("trusted_key: sealing key type %d\n", o->keytype); + pr_info("trusted_key: sealing key handle %0X\n", o->keyhandle); + pr_info("trusted_key: pcrlock %d\n", o->pcrlock); + pr_info("trusted_key: pcrinfo %d\n", o->pcrinfo_len); + print_hex_dump(KERN_INFO, "pcrinfo ", DUMP_PREFIX_NONE, + 16, 1, o->pcrinfo, o->pcrinfo_len, 0); +} + +static inline void dump_payload(struct trusted_key_payload *p) +{ + pr_info("trusted_key: key_len %d\n", p->key_len); + print_hex_dump(KERN_INFO, "key ", DUMP_PREFIX_NONE, + 16, 1, p->key, p->key_len, 0); + pr_info("trusted_key: bloblen %d\n", p->blob_len); + print_hex_dump(KERN_INFO, "blob ", DUMP_PREFIX_NONE, + 16, 1, p->blob, p->blob_len, 0); + pr_info("trusted_key: migratable %d\n", p->migratable); +} + +static inline void dump_sess(struct osapsess *s) +{ + print_hex_dump(KERN_INFO, "trusted-key: handle ", DUMP_PREFIX_NONE, + 16, 1, &s->handle, 4, 0); + pr_info("trusted-key: secret:\n"); + print_hex_dump(KERN_INFO, "", DUMP_PREFIX_NONE, + 16, 1, &s->secret, SHA1_DIGEST_SIZE, 0); + pr_info("trusted-key: enonce:\n"); + print_hex_dump(KERN_INFO, "", DUMP_PREFIX_NONE, + 16, 1, &s->enonce, SHA1_DIGEST_SIZE, 0); +} + +static inline void dump_tpm_buf(unsigned char *buf) +{ + int len; + + pr_info("\ntrusted-key: tpm buffer\n"); + len = LOAD32(buf, TPM_SIZE_OFFSET); + print_hex_dump(KERN_INFO, "", DUMP_PREFIX_NONE, 16, 1, buf, len, 0); +} +#else +static inline void dump_options(struct trusted_key_options *o) +{ +} + +static inline void dump_payload(struct trusted_key_payload *p) +{ +} + +static inline void dump_sess(struct osapsess *s) +{ +} + +static inline void dump_tpm_buf(unsigned char *buf) +{ +} +#endif + +static inline void store8(struct tpm_buf *buf, const unsigned char value) +{ + buf->data[buf->len++] = value; +} + +static inline void store16(struct tpm_buf *buf, const uint16_t value) +{ + *(uint16_t *) & buf->data[buf->len] = htons(value); + buf->len += sizeof value; +} + +static inline void store32(struct tpm_buf *buf, const uint32_t value) +{ + *(uint32_t *) & buf->data[buf->len] = htonl(value); + buf->len += sizeof value; +} + +static inline void storebytes(struct tpm_buf *buf, const unsigned char *in, + const int len) +{ + memcpy(buf->data + buf->len, in, len); + buf->len += len; +} +#endif -- cgit v1.2.3-71-gd317 From bf26414510103448ad3dc069c7422462f03ea3d7 Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Fri, 26 Nov 2010 08:36:09 +0000 Subject: xps: Add CONFIG_XPS This patch adds XPS_CONFIG option to enable and disable XPS. This is done in the same manner as RPS_CONFIG. This is also fixes build failure in XPS code when SMP is not enabled. Signed-off-by: Tom Herbert Signed-off-by: David S. Miller --- include/linux/netdevice.h | 52 +++++++++++++++++++++++++---------------------- net/Kconfig | 5 +++++ net/core/dev.c | 9 +++++--- net/core/net-sysfs.c | 47 ++++++++++++++++++++++++++++++------------ net/core/net-sysfs.h | 3 --- 5 files changed, 73 insertions(+), 43 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 7c6ae2f4b9ab..9ae4544f0cf0 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -535,30 +535,6 @@ struct rps_map { }; #define RPS_MAP_SIZE(_num) (sizeof(struct rps_map) + (_num * sizeof(u16))) -/* - * This structure holds an XPS map which can be of variable length. The - * map is an array of queues. - */ -struct xps_map { - unsigned int len; - unsigned int alloc_len; - struct rcu_head rcu; - u16 queues[0]; -}; -#define XPS_MAP_SIZE(_num) (sizeof(struct xps_map) + (_num * sizeof(u16))) -#define XPS_MIN_MAP_ALLOC ((L1_CACHE_BYTES - sizeof(struct xps_map)) \ - / sizeof(u16)) - -/* - * This structure holds all XPS maps for device. Maps are indexed by CPU. - */ -struct xps_dev_maps { - struct rcu_head rcu; - struct xps_map *cpu_map[0]; -}; -#define XPS_DEV_MAPS_SIZE (sizeof(struct xps_dev_maps) + \ - (nr_cpu_ids * sizeof(struct xps_map *))) - /* * The rps_dev_flow structure contains the mapping of a flow to a CPU and the * tail pointer for that CPU's input queue at the time of last enqueue. @@ -626,6 +602,32 @@ struct netdev_rx_queue { } ____cacheline_aligned_in_smp; #endif /* CONFIG_RPS */ +#ifdef CONFIG_XPS +/* + * This structure holds an XPS map which can be of variable length. The + * map is an array of queues. + */ +struct xps_map { + unsigned int len; + unsigned int alloc_len; + struct rcu_head rcu; + u16 queues[0]; +}; +#define XPS_MAP_SIZE(_num) (sizeof(struct xps_map) + (_num * sizeof(u16))) +#define XPS_MIN_MAP_ALLOC ((L1_CACHE_BYTES - sizeof(struct xps_map)) \ + / sizeof(u16)) + +/* + * This structure holds all XPS maps for device. Maps are indexed by CPU. + */ +struct xps_dev_maps { + struct rcu_head rcu; + struct xps_map *cpu_map[0]; +}; +#define XPS_DEV_MAPS_SIZE (sizeof(struct xps_dev_maps) + \ + (nr_cpu_ids * sizeof(struct xps_map *))) +#endif /* CONFIG_XPS */ + /* * This structure defines the management hooks for network devices. * The following hooks can be defined; unless noted otherwise, they are @@ -1046,7 +1048,9 @@ struct net_device { unsigned long tx_queue_len; /* Max frames per queue allowed */ spinlock_t tx_global_lock; +#ifdef CONFIG_XPS struct xps_dev_maps *xps_maps; +#endif /* These may be needed for future network-power-down code. */ diff --git a/net/Kconfig b/net/Kconfig index 55fd82e9ffd9..126c2af0fc1f 100644 --- a/net/Kconfig +++ b/net/Kconfig @@ -220,6 +220,11 @@ config RPS depends on SMP && SYSFS && USE_GENERIC_SMP_HELPERS default y +config XPS + boolean + depends on SMP && SYSFS && USE_GENERIC_SMP_HELPERS + default y + menu "Network testing" config NET_PKTGEN diff --git a/net/core/dev.c b/net/core/dev.c index c852f0038a08..3259d2c323a6 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1567,6 +1567,9 @@ int netif_set_real_num_tx_queues(struct net_device *dev, unsigned int txq) rc = netdev_queue_update_kobjects(dev, dev->real_num_tx_queues, txq); + if (rc) + return rc; + if (txq < dev->real_num_tx_queues) qdisc_reset_all_tx_gt(dev, txq); } @@ -2148,7 +2151,7 @@ static inline u16 dev_cap_txqueue(struct net_device *dev, u16 queue_index) static inline int get_xps_queue(struct net_device *dev, struct sk_buff *skb) { -#ifdef CONFIG_RPS +#ifdef CONFIG_XPS struct xps_dev_maps *dev_maps; struct xps_map *map; int queue_index = -1; @@ -5085,9 +5088,9 @@ void netif_stacked_transfer_operstate(const struct net_device *rootdev, } EXPORT_SYMBOL(netif_stacked_transfer_operstate); +#ifdef CONFIG_RPS static int netif_alloc_rx_queues(struct net_device *dev) { -#ifdef CONFIG_RPS unsigned int i, count = dev->num_rx_queues; struct netdev_rx_queue *rx; @@ -5102,9 +5105,9 @@ static int netif_alloc_rx_queues(struct net_device *dev) for (i = 0; i < count; i++) rx[i].dev = dev; -#endif return 0; } +#endif static int netif_alloc_netdev_queues(struct net_device *dev) { diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index 68dbbfdee274..99c11294623f 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -751,10 +751,12 @@ static int rx_queue_add_kobject(struct net_device *net, int index) return error; } +#endif /* CONFIG_RPS */ int net_rx_queue_update_kobjects(struct net_device *net, int old_num, int new_num) { +#ifdef CONFIG_RPS int i; int error = 0; @@ -770,8 +772,12 @@ net_rx_queue_update_kobjects(struct net_device *net, int old_num, int new_num) kobject_put(&net->_rx[i].kobj); return error; +#else + return 0; +#endif } +#ifdef CONFIG_XPS /* * netdev_queue sysfs structures and functions. */ @@ -1090,10 +1096,12 @@ static int netdev_queue_add_kobject(struct net_device *net, int index) return error; } +#endif /* CONFIG_XPS */ int netdev_queue_update_kobjects(struct net_device *net, int old_num, int new_num) { +#ifdef CONFIG_XPS int i; int error = 0; @@ -1109,27 +1117,36 @@ netdev_queue_update_kobjects(struct net_device *net, int old_num, int new_num) kobject_put(&net->_tx[i].kobj); return error; +#else + return 0; +#endif } static int register_queue_kobjects(struct net_device *net) { - int error = 0, txq = 0, rxq = 0; + int error = 0, txq = 0, rxq = 0, real_rx = 0, real_tx = 0; +#if defined(CONFIG_RPS) || defined(CONFIG_XPS) net->queues_kset = kset_create_and_add("queues", NULL, &net->dev.kobj); if (!net->queues_kset) return -ENOMEM; +#endif + +#ifdef CONFIG_RPS + real_rx = net->real_num_rx_queues; +#endif + real_tx = net->real_num_tx_queues; - error = net_rx_queue_update_kobjects(net, 0, net->real_num_rx_queues); + error = net_rx_queue_update_kobjects(net, 0, real_rx); if (error) goto error; - rxq = net->real_num_rx_queues; + rxq = real_rx; - error = netdev_queue_update_kobjects(net, 0, - net->real_num_tx_queues); + error = netdev_queue_update_kobjects(net, 0, real_tx); if (error) goto error; - txq = net->real_num_tx_queues; + txq = real_tx; return 0; @@ -1141,11 +1158,19 @@ error: static void remove_queue_kobjects(struct net_device *net) { - net_rx_queue_update_kobjects(net, net->real_num_rx_queues, 0); - netdev_queue_update_kobjects(net, net->real_num_tx_queues, 0); + int real_rx = 0, real_tx = 0; + +#ifdef CONFIG_RPS + real_rx = net->real_num_rx_queues; +#endif + real_tx = net->real_num_tx_queues; + + net_rx_queue_update_kobjects(net, real_rx, 0); + netdev_queue_update_kobjects(net, real_tx, 0); +#if defined(CONFIG_RPS) || defined(CONFIG_XPS) kset_unregister(net->queues_kset); +#endif } -#endif /* CONFIG_RPS */ static const void *net_current_ns(void) { @@ -1244,9 +1269,7 @@ void netdev_unregister_kobject(struct net_device * net) kobject_get(&dev->kobj); -#ifdef CONFIG_RPS remove_queue_kobjects(net); -#endif device_del(dev); } @@ -1285,13 +1308,11 @@ int netdev_register_kobject(struct net_device *net) if (error) return error; -#ifdef CONFIG_RPS error = register_queue_kobjects(net); if (error) { device_del(dev); return error; } -#endif return error; } diff --git a/net/core/net-sysfs.h b/net/core/net-sysfs.h index 25ec2ee57df7..bd7751ec1c4d 100644 --- a/net/core/net-sysfs.h +++ b/net/core/net-sysfs.h @@ -4,11 +4,8 @@ int netdev_kobject_init(void); int netdev_register_kobject(struct net_device *); void netdev_unregister_kobject(struct net_device *); -#ifdef CONFIG_RPS int net_rx_queue_update_kobjects(struct net_device *, int old_num, int new_num); int netdev_queue_update_kobjects(struct net_device *net, int old_num, int new_num); #endif - -#endif -- cgit v1.2.3-71-gd317 From 1ae0affedce1d3e401991fbe7f2674753f0a7641 Mon Sep 17 00:00:00 2001 From: Simon Horman Date: Fri, 26 Nov 2010 23:02:58 +0000 Subject: mmc, sh: Correct value for reset This resolves a regression that I introduced in "mmc, sh: Move constants to sh_mmcif.h". Having examined the manual and tested the code on an AP4EVB board it seems that the correct sequence is. 1) Write 1 to bit 31 and zeros to all other bits 2) Write zero to all bits Cc: Yusuke Goda Cc: Magnus Damm Signed-off-by: Simon Horman Signed-off-by: Paul Mundt --- include/linux/mmc/sh_mmcif.h | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mmc/sh_mmcif.h b/include/linux/mmc/sh_mmcif.h index a6bfa5296495..342ec1a38684 100644 --- a/include/linux/mmc/sh_mmcif.h +++ b/include/linux/mmc/sh_mmcif.h @@ -80,7 +80,7 @@ struct sh_mmcif_plat_data { /* CE_VERSION */ #define SOFT_RST_ON (1 << 31) -#define SOFT_RST_OFF ~SOFT_RST_ON +#define SOFT_RST_OFF 0 static inline u32 sh_mmcif_readl(void __iomem *addr, int reg) { @@ -168,12 +168,9 @@ static inline int sh_mmcif_boot_do_read(void __iomem *base, static inline void sh_mmcif_boot_init(void __iomem *base) { - unsigned long tmp; - /* reset */ - tmp = sh_mmcif_readl(base, MMCIF_CE_VERSION); - sh_mmcif_writel(base, MMCIF_CE_VERSION, tmp | SOFT_RST_ON); - sh_mmcif_writel(base, MMCIF_CE_VERSION, tmp & SOFT_RST_OFF); + sh_mmcif_writel(base, MMCIF_CE_VERSION, SOFT_RST_ON); + sh_mmcif_writel(base, MMCIF_CE_VERSION, SOFT_RST_OFF); /* byte swap */ sh_mmcif_writel(base, MMCIF_CE_BUF_ACC, BUF_ACC_ATYP); -- cgit v1.2.3-71-gd317 From 22efa0fee32d9e7f6f6fbc396a872b5708d86048 Mon Sep 17 00:00:00 2001 From: Simon Horman Date: Sat, 27 Nov 2010 00:11:55 +0000 Subject: sh, mmc: Use defines when setting CE_CLK_CTRL The 16-19th bits of CE_CLK_CTRL set the MMC clock frequency. Cc: Yusuke Goda Cc: Magnus Damm Signed-off-by: Simon Horman Signed-off-by: Paul Mundt --- include/linux/mmc/sh_mmcif.h | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mmc/sh_mmcif.h b/include/linux/mmc/sh_mmcif.h index 342ec1a38684..ffabf8c0a531 100644 --- a/include/linux/mmc/sh_mmcif.h +++ b/include/linux/mmc/sh_mmcif.h @@ -70,6 +70,9 @@ struct sh_mmcif_plat_data { #define CLK_ENABLE (1 << 24) /* 1: output mmc clock */ #define CLK_CLEAR ((1 << 19) | (1 << 18) | (1 << 17) | (1 << 16)) #define CLK_SUP_PCLK ((1 << 19) | (1 << 18) | (1 << 17) | (1 << 16)) +#define CLKDIV_4 (1<<16) /* mmc clock frequency. + * n: bus clock/(2^(n+1)) */ +#define CLKDIV_256 (7<<16) /* mmc clock frequency. (see above) */ #define SRSPTO_256 ((1 << 13) | (0 << 12)) /* resp timeout */ #define SRBSYTO_29 ((1 << 11) | (1 << 10) | \ (1 << 9) | (1 << 8)) /* resp busy timeout */ @@ -178,14 +181,10 @@ static inline void sh_mmcif_boot_init(void __iomem *base) /* Set block size in MMCIF hardware */ sh_mmcif_writel(base, MMCIF_CE_BLOCK_SET, SH_MMCIF_BBS); - /* Enable the clock, set it to Bus clock/256 (about 325Khz). - * It is unclear where 0x70000 comes from or if it is even needed. - * It is there for byte-compatibility with code that is known to - * work. - */ + /* Enable the clock, set it to Bus clock/256 (about 325Khz). */ sh_mmcif_writel(base, MMCIF_CE_CLK_CTRL, - CLK_ENABLE | SRSPTO_256 | SRBSYTO_29 | SRWDTO_29 | - SCCSTO_29 | 0x70000); + CLK_ENABLE | CLKDIV_256 | SRSPTO_256 | + SRBSYTO_29 | SRWDTO_29 | SCCSTO_29); /* CMD0 */ sh_mmcif_boot_cmd(base, 0x00000040, 0); @@ -210,7 +209,9 @@ static inline void sh_mmcif_boot_slurp(void __iomem *base, unsigned long tmp; /* In data transfer mode: Set clock to Bus clock/4 (about 20Mhz) */ - sh_mmcif_writel(base, MMCIF_CE_CLK_CTRL, 0x01012fff); + sh_mmcif_writel(base, MMCIF_CE_CLK_CTRL, + CLK_ENABLE | CLKDIV_4 | SRSPTO_256 | + SRBSYTO_29 | SRWDTO_29 | SCCSTO_29); /* CMD9 - Get CSD */ sh_mmcif_boot_cmd(base, 0x09806000, 0x00010000); -- cgit v1.2.3-71-gd317 From a41778694806ac1ccd4b1dafed1abef8d5ba98ac Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sun, 28 Nov 2010 21:43:02 +0000 Subject: xps: add __rcu annotations Avoid sparse warnings : add __rcu annotations and use rcu_dereference_protected() where necessary. Signed-off-by: Eric Dumazet Cc: Tom Herbert Signed-off-by: David S. Miller --- include/linux/netdevice.h | 4 ++-- net/core/net-sysfs.c | 24 +++++++++++++++--------- 2 files changed, 17 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 9ae4544f0cf0..4b0c7f3aa32b 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -622,7 +622,7 @@ struct xps_map { */ struct xps_dev_maps { struct rcu_head rcu; - struct xps_map *cpu_map[0]; + struct xps_map __rcu *cpu_map[0]; }; #define XPS_DEV_MAPS_SIZE (sizeof(struct xps_dev_maps) + \ (nr_cpu_ids * sizeof(struct xps_map *))) @@ -1049,7 +1049,7 @@ struct net_device { spinlock_t tx_global_lock; #ifdef CONFIG_XPS - struct xps_dev_maps *xps_maps; + struct xps_dev_maps __rcu *xps_maps; #endif /* These may be needed for future network-power-down code. */ diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index 35ef42fa0cf3..f85cee3d869e 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -899,6 +899,8 @@ static void xps_dev_maps_release(struct rcu_head *rcu) } static DEFINE_MUTEX(xps_map_mutex); +#define xmap_dereference(P) \ + rcu_dereference_protected((P), lockdep_is_held(&xps_map_mutex)) static ssize_t store_xps_map(struct netdev_queue *queue, struct netdev_queue_attribute *attribute, @@ -935,11 +937,12 @@ static ssize_t store_xps_map(struct netdev_queue *queue, mutex_lock(&xps_map_mutex); - dev_maps = dev->xps_maps; + dev_maps = xmap_dereference(dev->xps_maps); for_each_possible_cpu(cpu) { - new_map = map = dev_maps ? dev_maps->cpu_map[cpu] : NULL; - + map = dev_maps ? + xmap_dereference(dev_maps->cpu_map[cpu]) : NULL; + new_map = map; if (map) { for (pos = 0; pos < map->len; pos++) if (map->queues[pos] == index) @@ -975,13 +978,14 @@ static ssize_t store_xps_map(struct netdev_queue *queue, else new_map = NULL; } - new_dev_maps->cpu_map[cpu] = new_map; + RCU_INIT_POINTER(new_dev_maps->cpu_map[cpu], new_map); } /* Cleanup old maps */ for_each_possible_cpu(cpu) { - map = dev_maps ? dev_maps->cpu_map[cpu] : NULL; - if (map && new_dev_maps->cpu_map[cpu] != map) + map = dev_maps ? + xmap_dereference(dev_maps->cpu_map[cpu]) : NULL; + if (map && xmap_dereference(new_dev_maps->cpu_map[cpu]) != map) call_rcu(&map->rcu, xps_map_release); if (new_dev_maps->cpu_map[cpu]) nonempty = 1; @@ -1007,7 +1011,9 @@ error: if (new_dev_maps) for_each_possible_cpu(i) - kfree(new_dev_maps->cpu_map[i]); + kfree(rcu_dereference_protected( + new_dev_maps->cpu_map[i], + 1)); kfree(new_dev_maps); free_cpumask_var(mask); return -ENOMEM; @@ -1033,11 +1039,11 @@ static void netdev_queue_release(struct kobject *kobj) index = get_netdev_queue_index(queue); mutex_lock(&xps_map_mutex); - dev_maps = dev->xps_maps; + dev_maps = xmap_dereference(dev->xps_maps); if (dev_maps) { for_each_possible_cpu(i) { - map = dev_maps->cpu_map[i]; + map = xmap_dereference(dev_maps->cpu_map[i]); if (!map) continue; -- cgit v1.2.3-71-gd317 From 62a8c3a32e4143812ed8e0f3783ef1ea40dc87e4 Mon Sep 17 00:00:00 2001 From: Alan Cox Date: Wed, 24 Nov 2010 19:33:43 +0000 Subject: Staging: sep: handle the rar definition stuff in the header SEP isn't the only driver that may need to handle both cases easily Signed-off-by: Alan Cox Signed-off-by: Greg Kroah-Hartman --- drivers/staging/sep/sep_driver.c | 1 + drivers/staging/sep/sep_driver_config.h | 61 --------------------------------- include/linux/rar_register.h | 16 +++++++++ 3 files changed, 17 insertions(+), 61 deletions(-) (limited to 'include/linux') diff --git a/drivers/staging/sep/sep_driver.c b/drivers/staging/sep/sep_driver.c index ef36239c7b24..8a1ff861b135 100644 --- a/drivers/staging/sep/sep_driver.c +++ b/drivers/staging/sep/sep_driver.c @@ -53,6 +53,7 @@ #include #include #include +#include #include #include diff --git a/drivers/staging/sep/sep_driver_config.h b/drivers/staging/sep/sep_driver_config.h index 68688cbc2a92..cfda86f2aaff 100644 --- a/drivers/staging/sep/sep_driver_config.h +++ b/drivers/staging/sep/sep_driver_config.h @@ -235,15 +235,6 @@ held by the proccess (struct file) */ /* This stub header is for non Moorestown driver only */ -/* - * Constants that specify different kinds of RAR regions that could be - * set up. - */ -static __u32 const RAR_TYPE_VIDEO; /* 0 */ -static __u32 const RAR_TYPE_AUDIO = 1; -static __u32 const RAR_TYPE_IMAGE = 2; -static __u32 const RAR_TYPE_DATA = 3; - /* * @struct RAR_stat * @@ -373,56 +364,4 @@ struct RAR_buffer { #endif /* MEMRAR */ -/* rar_register */ -#ifndef CONFIG_RAR_REGISTER -/* This stub header is for non Moorestown driver only */ - -/* The register_rar function is to used by other device drivers - * to ensure that this driver is ready. As we cannot be sure of - * the compile/execute order of dirvers in ther kernel, it is - * best to give this driver a callback function to call when - * it is ready to give out addresses. The callback function - * would have those steps that continue the initialization of - * a driver that do require a valid RAR address. One of those - * steps would be to call get_rar_address() - * This function return 0 on success an -1 on failure. - */ -#define register_rar(a, b, c) (-ENODEV) - -/* The get_rar_address function is used by other device drivers - * to obtain RAR address information on a RAR. It takes two - * parameter: - * - * int rar_index - * The rar_index is an index to the rar for which you wish to retrieve - * the address information. - * Values can be 0,1, or 2. - * - * struct RAR_address_struct is a pointer to a place to which the function - * can return the address structure for the RAR. - * - * The function returns a 0 upon success or a -1 if there is no RAR - * facility on this system. - */ -#define rar_get_address(a, b, c) (-ENODEV) - -/* The lock_rar function is ued by other device drivers to lock an RAR. - * once an RAR is locked, it stays locked until the next system reboot. - * The function takes one parameter: - * - * int rar_index - * The rar_index is an index to the rar that you want to lock. - * Values can be 0,1, or 2. - * - * The function returns a 0 upon success or a -1 if there is no RAR - * facility on this system. - */ -#define rar_lock(a) (-1) - -#else /* using real RAR_REGISTER */ - -#include - -#endif /* CONFIG_RAR_REGISTER */ - #endif /* SEP DRIVER CONFIG */ diff --git a/include/linux/rar_register.h b/include/linux/rar_register.h index ffa805780f85..5c6118189363 100644 --- a/include/linux/rar_register.h +++ b/include/linux/rar_register.h @@ -34,11 +34,27 @@ struct rar_device; +#if defined(CONFIG_RAR_REGISTER) int register_rar(int num, int (*callback)(unsigned long data), unsigned long data); void unregister_rar(int num); int rar_get_address(int rar_index, dma_addr_t *start, dma_addr_t *end); int rar_lock(int rar_index); +#else +extern void unregister_rar(int num) { } +extern int rar_lock(int rar_index) { return -EIO; } + +extern inline int register_rar(int num, + int (*callback)(unsigned long data), unsigned long data) +{ + return -ENODEV; +} + +extern int rar_get_address(int rar_index, dma_addr_t *start, dma_addr_t *end) +{ + return -ENODEV; +} +#endif /* RAR_REGISTER */ #endif /* __KERNEL__ */ #endif /* _RAR_REGISTER_H */ -- cgit v1.2.3-71-gd317 From f7ca38dfe58c20cb1aa2ed9643187e8b194b5bae Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 25 Nov 2010 10:02:29 +0100 Subject: nl80211/cfg80211: extend mgmt-tx API for off-channel With p2p, it is sometimes necessary to transmit a frame (typically an action frame) on another channel than the current channel. Enable this through the CMD_FRAME API, and allow it to wait for a response. A new command allows that wait to be aborted. However, allow userspace to specify whether or not it wants to allow off-channel TX, it may actually want to use the same channel only. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- include/linux/nl80211.h | 25 +++++++++++++++++----- include/net/cfg80211.h | 11 +++++++--- net/mac80211/cfg.c | 7 ++++-- net/wireless/core.h | 4 ++-- net/wireless/mlme.c | 9 ++++---- net/wireless/nl80211.c | 57 +++++++++++++++++++++++++++++++++++++++++++------ 6 files changed, 91 insertions(+), 22 deletions(-) (limited to 'include/linux') diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h index d706bf3badc8..5cfa579df476 100644 --- a/include/linux/nl80211.h +++ b/include/linux/nl80211.h @@ -358,11 +358,16 @@ * user space application). %NL80211_ATTR_FRAME is used to specify the * frame contents (including header). %NL80211_ATTR_WIPHY_FREQ (and * optionally %NL80211_ATTR_WIPHY_CHANNEL_TYPE) is used to indicate on - * which channel the frame is to be transmitted or was received. This - * channel has to be the current channel (remain-on-channel or the - * operational channel). When called, this operation returns a cookie - * (%NL80211_ATTR_COOKIE) that will be included with the TX status event - * pertaining to the TX request. + * which channel the frame is to be transmitted or was received. If this + * channel is not the current channel (remain-on-channel or the + * operational channel) the device will switch to the given channel and + * transmit the frame, optionally waiting for a response for the time + * specified using %NL80211_ATTR_DURATION. When called, this operation + * returns a cookie (%NL80211_ATTR_COOKIE) that will be included with the + * TX status event pertaining to the TX request. + * @NL80211_CMD_FRAME_WAIT_CANCEL: When an off-channel TX was requested, this + * command may be used with the corresponding cookie to cancel the wait + * time if it is known that it is no longer necessary. * @NL80211_CMD_ACTION: Alias for @NL80211_CMD_FRAME for backward compatibility. * @NL80211_CMD_FRAME_TX_STATUS: Report TX status of a management frame * transmitted with %NL80211_CMD_FRAME. %NL80211_ATTR_COOKIE identifies @@ -493,6 +498,8 @@ enum nl80211_commands { NL80211_CMD_SET_CHANNEL, NL80211_CMD_SET_WDS_PEER, + NL80211_CMD_FRAME_WAIT_CANCEL, + /* add new commands above here */ /* used to define NL80211_CMD_MAX below */ @@ -828,6 +835,12 @@ enum nl80211_commands { * * @NL80211_ATTR_MCAST_RATE: Multicast tx rate (in 100 kbps) for IBSS * + * @NL80211_ATTR_OFFCHANNEL_TX_OK: For management frame TX, the frame may be + * transmitted on another channel when the channel given doesn't match + * the current channel. If the current channel doesn't match and this + * flag isn't set, the frame will be rejected. This is also used as an + * nl80211 capability flag. + * * @NL80211_ATTR_MAX: highest attribute number currently defined * @__NL80211_ATTR_AFTER_LAST: internal use */ @@ -1002,6 +1015,8 @@ enum nl80211_attrs { NL80211_ATTR_MCAST_RATE, + NL80211_ATTR_OFFCHANNEL_TX_OK, + /* add attributes here, update the policy in nl80211.c */ __NL80211_ATTR_AFTER_LAST, diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 0663945cfa48..49a7c53a48ca 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -1134,7 +1134,9 @@ struct cfg80211_pmksa { * @cancel_remain_on_channel: Cancel an on-going remain-on-channel operation. * This allows the operation to be terminated prior to timeout based on * the duration value. - * @mgmt_tx: Transmit a management frame + * @mgmt_tx: Transmit a management frame. + * @mgmt_tx_cancel_wait: Cancel the wait time from transmitting a management + * frame on another channel * * @testmode_cmd: run a test mode command * @@ -1291,10 +1293,13 @@ struct cfg80211_ops { u64 cookie); int (*mgmt_tx)(struct wiphy *wiphy, struct net_device *dev, - struct ieee80211_channel *chan, + struct ieee80211_channel *chan, bool offchan, enum nl80211_channel_type channel_type, - bool channel_type_valid, + bool channel_type_valid, unsigned int wait, const u8 *buf, size_t len, u64 *cookie); + int (*mgmt_tx_cancel_wait)(struct wiphy *wiphy, + struct net_device *dev, + u64 cookie); int (*set_power_mgmt)(struct wiphy *wiphy, struct net_device *dev, bool enabled, int timeout); diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 0c544074479e..aac2d7de828e 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -1552,9 +1552,9 @@ static int ieee80211_cancel_remain_on_channel(struct wiphy *wiphy, } static int ieee80211_mgmt_tx(struct wiphy *wiphy, struct net_device *dev, - struct ieee80211_channel *chan, + struct ieee80211_channel *chan, bool offchan, enum nl80211_channel_type channel_type, - bool channel_type_valid, + bool channel_type_valid, unsigned int wait, const u8 *buf, size_t len, u64 *cookie) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); @@ -1565,6 +1565,9 @@ static int ieee80211_mgmt_tx(struct wiphy *wiphy, struct net_device *dev, u32 flags = IEEE80211_TX_INTFL_NL80211_FRAME_TX | IEEE80211_TX_CTL_REQ_TX_STATUS; + if (offchan) + return -EOPNOTSUPP; + /* Check that we are on the requested channel for transmission */ if (chan != local->tmp_channel && chan != local->oper_channel) diff --git a/net/wireless/core.h b/net/wireless/core.h index 6583cca0e2ee..ee80ad8dc655 100644 --- a/net/wireless/core.h +++ b/net/wireless/core.h @@ -341,9 +341,9 @@ void cfg80211_mlme_unregister_socket(struct wireless_dev *wdev, u32 nlpid); void cfg80211_mlme_purge_registrations(struct wireless_dev *wdev); int cfg80211_mlme_mgmt_tx(struct cfg80211_registered_device *rdev, struct net_device *dev, - struct ieee80211_channel *chan, + struct ieee80211_channel *chan, bool offchan, enum nl80211_channel_type channel_type, - bool channel_type_valid, + bool channel_type_valid, unsigned int wait, const u8 *buf, size_t len, u64 *cookie); /* SME */ diff --git a/net/wireless/mlme.c b/net/wireless/mlme.c index 6980a0c315b2..d7680f2a4c5b 100644 --- a/net/wireless/mlme.c +++ b/net/wireless/mlme.c @@ -864,9 +864,9 @@ void cfg80211_mlme_purge_registrations(struct wireless_dev *wdev) int cfg80211_mlme_mgmt_tx(struct cfg80211_registered_device *rdev, struct net_device *dev, - struct ieee80211_channel *chan, + struct ieee80211_channel *chan, bool offchan, enum nl80211_channel_type channel_type, - bool channel_type_valid, + bool channel_type_valid, unsigned int wait, const u8 *buf, size_t len, u64 *cookie) { struct wireless_dev *wdev = dev->ieee80211_ptr; @@ -946,8 +946,9 @@ int cfg80211_mlme_mgmt_tx(struct cfg80211_registered_device *rdev, return -EINVAL; /* Transmit the Action frame as requested by user space */ - return rdev->ops->mgmt_tx(&rdev->wiphy, dev, chan, channel_type, - channel_type_valid, buf, len, cookie); + return rdev->ops->mgmt_tx(&rdev->wiphy, dev, chan, offchan, + channel_type, channel_type_valid, + wait, buf, len, cookie); } bool cfg80211_rx_mgmt(struct net_device *dev, int freq, const u8 *buf, diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 67ff7e92cb99..960be4e650f0 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -163,16 +163,13 @@ static const struct nla_policy nl80211_policy[NL80211_ATTR_MAX+1] = { [NL80211_ATTR_CQM] = { .type = NLA_NESTED, }, [NL80211_ATTR_LOCAL_STATE_CHANGE] = { .type = NLA_FLAG }, [NL80211_ATTR_AP_ISOLATE] = { .type = NLA_U8 }, - [NL80211_ATTR_WIPHY_TX_POWER_SETTING] = { .type = NLA_U32 }, [NL80211_ATTR_WIPHY_TX_POWER_LEVEL] = { .type = NLA_U32 }, - [NL80211_ATTR_FRAME_TYPE] = { .type = NLA_U16 }, - [NL80211_ATTR_WIPHY_ANTENNA_TX] = { .type = NLA_U32 }, [NL80211_ATTR_WIPHY_ANTENNA_RX] = { .type = NLA_U32 }, - [NL80211_ATTR_MCAST_RATE] = { .type = NLA_U32 }, + [NL80211_ATTR_OFFCHANNEL_TX_OK] = { .type = NLA_FLAG }, }; /* policy for the key attributes */ @@ -677,6 +674,7 @@ static int nl80211_send_wiphy(struct sk_buff *msg, u32 pid, u32 seq, int flags, CMD(remain_on_channel, REMAIN_ON_CHANNEL); CMD(set_bitrate_mask, SET_TX_BITRATE_MASK); CMD(mgmt_tx, FRAME); + CMD(mgmt_tx_cancel_wait, FRAME_WAIT_CANCEL); if (dev->wiphy.flags & WIPHY_FLAG_NETNS_OK) { i++; NLA_PUT_U32(msg, i, NL80211_CMD_SET_WIPHY_NETNS); @@ -698,6 +696,10 @@ static int nl80211_send_wiphy(struct sk_buff *msg, u32 pid, u32 seq, int flags, nla_nest_end(msg, nl_cmds); + /* for now at least assume all drivers have it */ + if (dev->ops->mgmt_tx) + NLA_PUT_FLAG(msg, NL80211_ATTR_OFFCHANNEL_TX_OK); + if (mgmt_stypes) { u16 stypes; struct nlattr *nl_ftypes, *nl_ifs; @@ -4244,6 +4246,8 @@ static int nl80211_tx_mgmt(struct sk_buff *skb, struct genl_info *info) void *hdr; u64 cookie; struct sk_buff *msg; + unsigned int wait = 0; + bool offchan; if (!info->attrs[NL80211_ATTR_FRAME] || !info->attrs[NL80211_ATTR_WIPHY_FREQ]) @@ -4260,6 +4264,12 @@ static int nl80211_tx_mgmt(struct sk_buff *skb, struct genl_info *info) dev->ieee80211_ptr->iftype != NL80211_IFTYPE_P2P_GO) return -EOPNOTSUPP; + if (info->attrs[NL80211_ATTR_DURATION]) { + if (!rdev->ops->mgmt_tx_cancel_wait) + return -EINVAL; + wait = nla_get_u32(info->attrs[NL80211_ATTR_DURATION]); + } + if (info->attrs[NL80211_ATTR_WIPHY_CHANNEL_TYPE]) { channel_type = nla_get_u32( info->attrs[NL80211_ATTR_WIPHY_CHANNEL_TYPE]); @@ -4271,6 +4281,8 @@ static int nl80211_tx_mgmt(struct sk_buff *skb, struct genl_info *info) channel_type_valid = true; } + offchan = info->attrs[NL80211_ATTR_OFFCHANNEL_TX_OK]; + freq = nla_get_u32(info->attrs[NL80211_ATTR_WIPHY_FREQ]); chan = rdev_freq_to_chan(rdev, freq, channel_type); if (chan == NULL) @@ -4287,8 +4299,8 @@ static int nl80211_tx_mgmt(struct sk_buff *skb, struct genl_info *info) err = PTR_ERR(hdr); goto free_msg; } - err = cfg80211_mlme_mgmt_tx(rdev, dev, chan, channel_type, - channel_type_valid, + err = cfg80211_mlme_mgmt_tx(rdev, dev, chan, offchan, channel_type, + channel_type_valid, wait, nla_data(info->attrs[NL80211_ATTR_FRAME]), nla_len(info->attrs[NL80211_ATTR_FRAME]), &cookie); @@ -4307,6 +4319,31 @@ static int nl80211_tx_mgmt(struct sk_buff *skb, struct genl_info *info) return err; } +static int nl80211_tx_mgmt_cancel_wait(struct sk_buff *skb, struct genl_info *info) +{ + struct cfg80211_registered_device *rdev = info->user_ptr[0]; + struct net_device *dev = info->user_ptr[1]; + u64 cookie; + + if (!info->attrs[NL80211_ATTR_COOKIE]) + return -EINVAL; + + if (!rdev->ops->mgmt_tx_cancel_wait) + return -EOPNOTSUPP; + + if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_STATION && + dev->ieee80211_ptr->iftype != NL80211_IFTYPE_ADHOC && + dev->ieee80211_ptr->iftype != NL80211_IFTYPE_P2P_CLIENT && + dev->ieee80211_ptr->iftype != NL80211_IFTYPE_AP && + dev->ieee80211_ptr->iftype != NL80211_IFTYPE_AP_VLAN && + dev->ieee80211_ptr->iftype != NL80211_IFTYPE_P2P_GO) + return -EOPNOTSUPP; + + cookie = nla_get_u64(info->attrs[NL80211_ATTR_COOKIE]); + + return rdev->ops->mgmt_tx_cancel_wait(&rdev->wiphy, dev, cookie); +} + static int nl80211_set_power_save(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; @@ -4879,6 +4916,14 @@ static struct genl_ops nl80211_ops[] = { .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | NL80211_FLAG_NEED_RTNL, }, + { + .cmd = NL80211_CMD_FRAME_WAIT_CANCEL, + .doit = nl80211_tx_mgmt_cancel_wait, + .policy = nl80211_policy, + .flags = GENL_ADMIN_PERM, + .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | + NL80211_FLAG_NEED_RTNL, + }, { .cmd = NL80211_CMD_SET_POWER_SAVE, .doit = nl80211_set_power_save, -- cgit v1.2.3-71-gd317 From 24278d148316d2180be6df40e06db013d8b232b8 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 27 Sep 2010 17:25:23 -0700 Subject: rcu: priority boosting for TINY_PREEMPT_RCU Add priority boosting, but only for TINY_PREEMPT_RCU. This is enabled by the default-off RCU_BOOST kernel parameter. The priority to which to boost preempted RCU readers is controlled by the RCU_BOOST_PRIO kernel parameter (defaulting to real-time priority 1) and the time to wait before boosting the readers blocking a given grace period is controlled by the RCU_BOOST_DELAY kernel parameter (defaulting to 500 milliseconds). Signed-off-by: Paul E. McKenney Signed-off-by: Paul E. McKenney --- include/linux/init_task.h | 9 +- include/linux/sched.h | 11 ++- init/Kconfig | 39 +++++++++ kernel/rcutiny.c | 66 ++++++--------- kernel/rcutiny_plugin.h | 208 +++++++++++++++++++++++++++++++++++++++++++--- 5 files changed, 280 insertions(+), 53 deletions(-) (limited to 'include/linux') diff --git a/include/linux/init_task.h b/include/linux/init_task.h index 2fea6c8ef6ba..69f91aacdeee 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h @@ -81,6 +81,12 @@ extern struct group_info init_groups; */ # define CAP_INIT_BSET CAP_FULL_SET +#ifdef CONFIG_RCU_BOOST +#define INIT_TASK_RCU_BOOST() \ + .rcu_boost_mutex = NULL, +#else +#define INIT_TASK_RCU_BOOST() +#endif #ifdef CONFIG_TREE_PREEMPT_RCU #define INIT_TASK_RCU_TREE_PREEMPT() \ .rcu_blocked_node = NULL, @@ -92,7 +98,8 @@ extern struct group_info init_groups; .rcu_read_lock_nesting = 0, \ .rcu_read_unlock_special = 0, \ .rcu_node_entry = LIST_HEAD_INIT(tsk.rcu_node_entry), \ - INIT_TASK_RCU_TREE_PREEMPT() + INIT_TASK_RCU_TREE_PREEMPT() \ + INIT_TASK_RCU_BOOST() #else #define INIT_TASK_RCU_PREEMPT(tsk) #endif diff --git a/include/linux/sched.h b/include/linux/sched.h index e18473f0eb78..ed1a9bc52b2f 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1210,6 +1210,9 @@ struct task_struct { #ifdef CONFIG_TREE_PREEMPT_RCU struct rcu_node *rcu_blocked_node; #endif /* #ifdef CONFIG_TREE_PREEMPT_RCU */ +#ifdef CONFIG_RCU_BOOST + struct rt_mutex *rcu_boost_mutex; +#endif /* #ifdef CONFIG_RCU_BOOST */ #if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT) struct sched_info sched_info; @@ -1745,7 +1748,8 @@ extern void thread_group_times(struct task_struct *p, cputime_t *ut, cputime_t * #ifdef CONFIG_PREEMPT_RCU #define RCU_READ_UNLOCK_BLOCKED (1 << 0) /* blocked while in RCU read-side. */ -#define RCU_READ_UNLOCK_NEED_QS (1 << 1) /* RCU core needs CPU response. */ +#define RCU_READ_UNLOCK_BOOSTED (1 << 1) /* boosted while in RCU read-side. */ +#define RCU_READ_UNLOCK_NEED_QS (1 << 2) /* RCU core needs CPU response. */ static inline void rcu_copy_process(struct task_struct *p) { @@ -1753,7 +1757,10 @@ static inline void rcu_copy_process(struct task_struct *p) p->rcu_read_unlock_special = 0; #ifdef CONFIG_TREE_PREEMPT_RCU p->rcu_blocked_node = NULL; -#endif +#endif /* #ifdef CONFIG_TREE_PREEMPT_RCU */ +#ifdef CONFIG_RCU_BOOST + p->rcu_boost_mutex = NULL; +#endif /* #ifdef CONFIG_RCU_BOOST */ INIT_LIST_HEAD(&p->rcu_node_entry); } diff --git a/init/Kconfig b/init/Kconfig index a619a1ac7f4c..48efefcac12a 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -450,6 +450,45 @@ config TREE_RCU_TRACE TREE_PREEMPT_RCU implementations, permitting Makefile to trivially select kernel/rcutree_trace.c. +config RCU_BOOST + bool "Enable RCU priority boosting" + depends on RT_MUTEXES && TINY_PREEMPT_RCU + default n + help + This option boosts the priority of preempted RCU readers that + block the current preemptible RCU grace period for too long. + This option also prevents heavy loads from blocking RCU + callback invocation for all flavors of RCU. + + Say Y here if you are working with real-time apps or heavy loads + Say N here if you are unsure. + +config RCU_BOOST_PRIO + int "Real-time priority to boost RCU readers to" + range 1 99 + depends on RCU_BOOST + default 1 + help + This option specifies the real-time priority to which preempted + RCU readers are to be boosted. If you are working with CPU-bound + real-time applications, you should specify a priority higher then + the highest-priority CPU-bound application. + + Specify the real-time priority, or take the default if unsure. + +config RCU_BOOST_DELAY + int "Milliseconds to delay boosting after RCU grace-period start" + range 0 3000 + depends on RCU_BOOST + default 500 + help + This option specifies the time to wait after the beginning of + a given grace period before priority-boosting preempted RCU + readers blocking that grace period. Note that any RCU reader + blocking an expedited RCU grace period is boosted immediately. + + Accept the default if unsure. + endmenu # "RCU Subsystem" config IKCONFIG diff --git a/kernel/rcutiny.c b/kernel/rcutiny.c index 86eef29cdfb2..93d166582cbb 100644 --- a/kernel/rcutiny.c +++ b/kernel/rcutiny.c @@ -36,38 +36,16 @@ #include #include -/* Global control variables for rcupdate callback mechanism. */ -struct rcu_ctrlblk { - struct rcu_head *rcucblist; /* List of pending callbacks (CBs). */ - struct rcu_head **donetail; /* ->next pointer of last "done" CB. */ - struct rcu_head **curtail; /* ->next pointer of last CB. */ -}; - -/* Definition for rcupdate control block. */ -static struct rcu_ctrlblk rcu_sched_ctrlblk = { - .donetail = &rcu_sched_ctrlblk.rcucblist, - .curtail = &rcu_sched_ctrlblk.rcucblist, -}; - -static struct rcu_ctrlblk rcu_bh_ctrlblk = { - .donetail = &rcu_bh_ctrlblk.rcucblist, - .curtail = &rcu_bh_ctrlblk.rcucblist, -}; - -#ifdef CONFIG_DEBUG_LOCK_ALLOC -int rcu_scheduler_active __read_mostly; -EXPORT_SYMBOL_GPL(rcu_scheduler_active); -#endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */ - -/* Controls for rcu_cbs() kthread, replacing RCU_SOFTIRQ used previously. */ -static struct task_struct *rcu_cbs_task; -static DECLARE_WAIT_QUEUE_HEAD(rcu_cbs_wq); -static unsigned long have_rcu_cbs; -static void invoke_rcu_cbs(void); +/* Controls for rcu_kthread() kthread, replacing RCU_SOFTIRQ used previously. */ +static struct task_struct *rcu_kthread_task; +static DECLARE_WAIT_QUEUE_HEAD(rcu_kthread_wq); +static unsigned long have_rcu_kthread_work; +static void invoke_rcu_kthread(void); /* Forward declarations for rcutiny_plugin.h. */ +struct rcu_ctrlblk; static void rcu_process_callbacks(struct rcu_ctrlblk *rcp); -static int rcu_cbs(void *arg); +static int rcu_kthread(void *arg); static void __call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu), struct rcu_ctrlblk *rcp); @@ -130,7 +108,7 @@ void rcu_sched_qs(int cpu) { if (rcu_qsctr_help(&rcu_sched_ctrlblk) + rcu_qsctr_help(&rcu_bh_ctrlblk)) - invoke_rcu_cbs(); + invoke_rcu_kthread(); } /* @@ -139,7 +117,7 @@ void rcu_sched_qs(int cpu) void rcu_bh_qs(int cpu) { if (rcu_qsctr_help(&rcu_bh_ctrlblk)) - invoke_rcu_cbs(); + invoke_rcu_kthread(); } /* @@ -201,37 +179,41 @@ static void rcu_process_callbacks(struct rcu_ctrlblk *rcp) * This is a kthread, but it is never stopped, at least not until * the system goes down. */ -static int rcu_cbs(void *arg) +static int rcu_kthread(void *arg) { unsigned long work; + unsigned long morework; unsigned long flags; for (;;) { - wait_event(rcu_cbs_wq, have_rcu_cbs != 0); + wait_event(rcu_kthread_wq, have_rcu_kthread_work != 0); + morework = rcu_boost(); local_irq_save(flags); - work = have_rcu_cbs; - have_rcu_cbs = 0; + work = have_rcu_kthread_work; + have_rcu_kthread_work = morework; local_irq_restore(flags); if (work) { rcu_process_callbacks(&rcu_sched_ctrlblk); rcu_process_callbacks(&rcu_bh_ctrlblk); rcu_preempt_process_callbacks(); } + schedule_timeout_interruptible(1); /* Leave CPU for others. */ } return 0; /* Not reached, but needed to shut gcc up. */ } /* - * Wake up rcu_cbs() to process callbacks now eligible for invocation. + * Wake up rcu_kthread() to process callbacks now eligible for invocation + * or to boost readers. */ -static void invoke_rcu_cbs(void) +static void invoke_rcu_kthread(void) { unsigned long flags; local_irq_save(flags); - have_rcu_cbs = 1; - wake_up(&rcu_cbs_wq); + have_rcu_kthread_work = 1; + wake_up(&rcu_kthread_wq); local_irq_restore(flags); } @@ -327,7 +309,11 @@ EXPORT_SYMBOL_GPL(rcu_barrier_sched); */ static int __init rcu_spawn_kthreads(void) { - rcu_cbs_task = kthread_run(rcu_cbs, NULL, "rcu_cbs"); + struct sched_param sp; + + rcu_kthread_task = kthread_run(rcu_kthread, NULL, "rcu_kthread"); + sp.sched_priority = RCU_BOOST_PRIO; + sched_setscheduler_nocheck(rcu_kthread_task, SCHED_FIFO, &sp); return 0; } early_initcall(rcu_spawn_kthreads); diff --git a/kernel/rcutiny_plugin.h b/kernel/rcutiny_plugin.h index 95f9239df512..24f43165f222 100644 --- a/kernel/rcutiny_plugin.h +++ b/kernel/rcutiny_plugin.h @@ -24,6 +24,29 @@ #include +/* Global control variables for rcupdate callback mechanism. */ +struct rcu_ctrlblk { + struct rcu_head *rcucblist; /* List of pending callbacks (CBs). */ + struct rcu_head **donetail; /* ->next pointer of last "done" CB. */ + struct rcu_head **curtail; /* ->next pointer of last CB. */ +}; + +/* Definition for rcupdate control block. */ +static struct rcu_ctrlblk rcu_sched_ctrlblk = { + .donetail = &rcu_sched_ctrlblk.rcucblist, + .curtail = &rcu_sched_ctrlblk.rcucblist, +}; + +static struct rcu_ctrlblk rcu_bh_ctrlblk = { + .donetail = &rcu_bh_ctrlblk.rcucblist, + .curtail = &rcu_bh_ctrlblk.rcucblist, +}; + +#ifdef CONFIG_DEBUG_LOCK_ALLOC +int rcu_scheduler_active __read_mostly; +EXPORT_SYMBOL_GPL(rcu_scheduler_active); +#endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */ + #ifdef CONFIG_TINY_PREEMPT_RCU #include @@ -48,17 +71,27 @@ struct rcu_preempt_ctrlblk { struct list_head *gp_tasks; /* Pointer to the first task blocking the */ /* current grace period, or NULL if there */ - /* is not such task. */ + /* is no such task. */ struct list_head *exp_tasks; /* Pointer to first task blocking the */ /* current expedited grace period, or NULL */ /* if there is no such task. If there */ /* is no current expedited grace period, */ /* then there cannot be any such task. */ +#ifdef CONFIG_RCU_BOOST + struct list_head *boost_tasks; + /* Pointer to first task that needs to be */ + /* priority-boosted, or NULL if no priority */ + /* boosting is needed. If there is no */ + /* current or expedited grace period, there */ + /* can be no such task. */ +#endif /* #ifdef CONFIG_RCU_BOOST */ u8 gpnum; /* Current grace period. */ u8 gpcpu; /* Last grace period blocked by the CPU. */ u8 completed; /* Last grace period completed. */ /* If all three are equal, RCU is idle. */ + s8 boosted_this_gp; /* Has boosting already happened? */ + unsigned long boost_time; /* When to start boosting (jiffies) */ }; static struct rcu_preempt_ctrlblk rcu_preempt_ctrlblk = { @@ -123,6 +156,130 @@ static int rcu_preempt_gp_in_progress(void) return rcu_preempt_ctrlblk.completed != rcu_preempt_ctrlblk.gpnum; } +/* + * Advance a ->blkd_tasks-list pointer to the next entry, instead + * returning NULL if at the end of the list. + */ +static struct list_head *rcu_next_node_entry(struct task_struct *t) +{ + struct list_head *np; + + np = t->rcu_node_entry.next; + if (np == &rcu_preempt_ctrlblk.blkd_tasks) + np = NULL; + return np; +} + +#ifdef CONFIG_RCU_BOOST + +#include "rtmutex_common.h" + +/* + * Carry out RCU priority boosting on the task indicated by ->boost_tasks, + * and advance ->boost_tasks to the next task in the ->blkd_tasks list. + */ +static int rcu_boost(void) +{ + unsigned long flags; + struct rt_mutex mtx; + struct list_head *np; + struct task_struct *t; + + if (rcu_preempt_ctrlblk.boost_tasks == NULL) + return 0; /* Nothing to boost. */ + raw_local_irq_save(flags); + rcu_preempt_ctrlblk.boosted_this_gp++; + t = container_of(rcu_preempt_ctrlblk.boost_tasks, struct task_struct, + rcu_node_entry); + np = rcu_next_node_entry(t); + rt_mutex_init_proxy_locked(&mtx, t); + t->rcu_boost_mutex = &mtx; + t->rcu_read_unlock_special |= RCU_READ_UNLOCK_BOOSTED; + raw_local_irq_restore(flags); + rt_mutex_lock(&mtx); + rt_mutex_unlock(&mtx); + return rcu_preempt_ctrlblk.boost_tasks != NULL; +} + +/* + * Check to see if it is now time to start boosting RCU readers blocking + * the current grace period, and, if so, tell the rcu_kthread_task to + * start boosting them. If there is an expedited boost in progress, + * we wait for it to complete. + */ +static void rcu_initiate_boost(void) +{ + if (rcu_preempt_ctrlblk.gp_tasks != NULL && + rcu_preempt_ctrlblk.boost_tasks == NULL && + rcu_preempt_ctrlblk.boosted_this_gp == 0 && + ULONG_CMP_GE(jiffies, rcu_preempt_ctrlblk.boost_time)) { + rcu_preempt_ctrlblk.boost_tasks = rcu_preempt_ctrlblk.gp_tasks; + invoke_rcu_kthread(); + } +} + +/* + * Initiate boosting for an expedited grace period. + */ +static void rcu_initiate_expedited_boost(void) +{ + unsigned long flags; + + raw_local_irq_save(flags); + if (!list_empty(&rcu_preempt_ctrlblk.blkd_tasks)) { + rcu_preempt_ctrlblk.boost_tasks = + rcu_preempt_ctrlblk.blkd_tasks.next; + rcu_preempt_ctrlblk.boosted_this_gp = -1; + invoke_rcu_kthread(); + } + raw_local_irq_restore(flags); +} + +#define RCU_BOOST_DELAY_JIFFIES DIV_ROUND_UP(CONFIG_RCU_BOOST_DELAY * HZ, 1000); + +/* + * Do priority-boost accounting for the start of a new grace period. + */ +static void rcu_preempt_boost_start_gp(void) +{ + rcu_preempt_ctrlblk.boost_time = jiffies + RCU_BOOST_DELAY_JIFFIES; + if (rcu_preempt_ctrlblk.boosted_this_gp > 0) + rcu_preempt_ctrlblk.boosted_this_gp = 0; +} + +#else /* #ifdef CONFIG_RCU_BOOST */ + +/* + * If there is no RCU priority boosting, we don't boost. + */ +static int rcu_boost(void) +{ + return 0; +} + +/* + * If there is no RCU priority boosting, we don't initiate boosting. + */ +static void rcu_initiate_boost(void) +{ +} + +/* + * If there is no RCU priority boosting, we don't initiate expedited boosting. + */ +static void rcu_initiate_expedited_boost(void) +{ +} + +/* + * If there is no RCU priority boosting, nothing to do at grace-period start. + */ +static void rcu_preempt_boost_start_gp(void) +{ +} + +#endif /* else #ifdef CONFIG_RCU_BOOST */ + /* * Record a preemptible-RCU quiescent state for the specified CPU. Note * that this just means that the task currently running on the CPU is @@ -150,12 +307,14 @@ static void rcu_preempt_cpu_qs(void) rcu_preempt_ctrlblk.gpcpu = rcu_preempt_ctrlblk.gpnum; current->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_NEED_QS; - /* - * If there is no GP, or if blocked readers are still blocking GP, - * then there is nothing more to do. - */ + /* If there is no GP then there is nothing more to do. */ if (!rcu_preempt_gp_in_progress() || rcu_preempt_blocked_readers_cgp()) return; + /* If there are blocked readers, go check up on boosting. */ + if (rcu_preempt_blocked_readers_cgp()) { + rcu_initiate_boost(); + return; + } /* Advance callbacks. */ rcu_preempt_ctrlblk.completed = rcu_preempt_ctrlblk.gpnum; @@ -168,7 +327,7 @@ static void rcu_preempt_cpu_qs(void) /* If there are done callbacks, cause them to be invoked. */ if (*rcu_preempt_ctrlblk.rcb.donetail != NULL) - invoke_rcu_cbs(); + invoke_rcu_kthread(); } /* @@ -186,6 +345,9 @@ static void rcu_preempt_start_gp(void) rcu_preempt_ctrlblk.gp_tasks = rcu_preempt_ctrlblk.blkd_tasks.next; + /* Set up for RCU priority boosting. */ + rcu_preempt_boost_start_gp(); + /* If there is no running reader, CPU is done with GP. */ if (!rcu_preempt_running_reader()) rcu_preempt_cpu_qs(); @@ -306,14 +468,16 @@ static void rcu_read_unlock_special(struct task_struct *t) */ empty = !rcu_preempt_blocked_readers_cgp(); empty_exp = rcu_preempt_ctrlblk.exp_tasks == NULL; - np = t->rcu_node_entry.next; - if (np == &rcu_preempt_ctrlblk.blkd_tasks) - np = NULL; + np = rcu_next_node_entry(t); list_del(&t->rcu_node_entry); if (&t->rcu_node_entry == rcu_preempt_ctrlblk.gp_tasks) rcu_preempt_ctrlblk.gp_tasks = np; if (&t->rcu_node_entry == rcu_preempt_ctrlblk.exp_tasks) rcu_preempt_ctrlblk.exp_tasks = np; +#ifdef CONFIG_RCU_BOOST + if (&t->rcu_node_entry == rcu_preempt_ctrlblk.boost_tasks) + rcu_preempt_ctrlblk.boost_tasks = np; +#endif /* #ifdef CONFIG_RCU_BOOST */ INIT_LIST_HEAD(&t->rcu_node_entry); /* @@ -333,6 +497,14 @@ static void rcu_read_unlock_special(struct task_struct *t) if (!empty_exp && rcu_preempt_ctrlblk.exp_tasks == NULL) rcu_report_exp_done(); } +#ifdef CONFIG_RCU_BOOST + /* Unboost self if was boosted. */ + if (special & RCU_READ_UNLOCK_BOOSTED) { + t->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_BOOSTED; + rt_mutex_unlock(t->rcu_boost_mutex); + t->rcu_boost_mutex = NULL; + } +#endif /* #ifdef CONFIG_RCU_BOOST */ local_irq_restore(flags); } @@ -376,7 +548,7 @@ static void rcu_preempt_check_callbacks(void) rcu_preempt_cpu_qs(); if (&rcu_preempt_ctrlblk.rcb.rcucblist != rcu_preempt_ctrlblk.rcb.donetail) - invoke_rcu_cbs(); + invoke_rcu_kthread(); if (rcu_preempt_gp_in_progress() && rcu_cpu_blocking_cur_gp() && rcu_preempt_running_reader()) @@ -534,6 +706,7 @@ void synchronize_rcu_expedited(void) /* Wait for tail of ->blkd_tasks list to drain. */ if (rcu_preempted_readers_exp()) + rcu_initiate_expedited_boost(); wait_event(sync_rcu_preempt_exp_wq, !rcu_preempted_readers_exp()); @@ -574,6 +747,15 @@ void exit_rcu(void) #else /* #ifdef CONFIG_TINY_PREEMPT_RCU */ +/* + * Because preemptible RCU does not exist, it is never necessary to + * boost preempted RCU readers. + */ +static int rcu_boost(void) +{ + return 0; +} + /* * Because preemptible RCU does not exist, it never has any callbacks * to check. @@ -614,3 +796,9 @@ void __init rcu_scheduler_starting(void) } #endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */ + +#ifdef CONFIG_RCU_BOOST +#define RCU_BOOST_PRIO CONFIG_RCU_BOOST_PRIO +#else /* #ifdef CONFIG_RCU_BOOST */ +#define RCU_BOOST_PRIO 1 +#endif /* #else #ifdef CONFIG_RCU_BOOST */ -- cgit v1.2.3-71-gd317 From 7b27d5475f86186914e54e4a6bb994e9a985337b Mon Sep 17 00:00:00 2001 From: Lai Jiangshan Date: Thu, 21 Oct 2010 11:29:05 +0800 Subject: rcu,cleanup: move synchronize_sched_expedited() out of sched.c The first version of synchronize_sched_expedited() used the migration code in the scheduler, and was therefore implemented in kernel/sched.c. However, the more recent version of this code no longer uses the migration code, so this commit moves it to the main RCU source files. Signed-off-by: Lai Jiangshan Signed-off-by: Paul E. McKenney --- include/linux/rcupdate.h | 1 - include/linux/rcutiny.h | 5 ++++ include/linux/rcutree.h | 1 + kernel/rcutree_plugin.h | 71 ++++++++++++++++++++++++++++++++++++++++++++++++ kernel/sched.c | 69 ---------------------------------------------- 5 files changed, 77 insertions(+), 70 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 7142ee3304ab..49e8e16308e1 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -66,7 +66,6 @@ extern void call_rcu_sched(struct rcu_head *head, extern void synchronize_sched(void); extern void rcu_barrier_bh(void); extern void rcu_barrier_sched(void); -extern void synchronize_sched_expedited(void); extern int sched_expedited_torture_stats(char *page); static inline void __rcu_read_lock_bh(void) diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h index ea025a611fcc..30ebd7c8d874 100644 --- a/include/linux/rcutiny.h +++ b/include/linux/rcutiny.h @@ -60,6 +60,11 @@ static inline void synchronize_rcu_bh_expedited(void) synchronize_sched(); } +static inline void synchronize_sched_expedited(void) +{ + synchronize_sched(); +} + #ifdef CONFIG_TINY_RCU static inline void rcu_preempt_note_context_switch(void) diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h index c0e96833aa73..3a933482734a 100644 --- a/include/linux/rcutree.h +++ b/include/linux/rcutree.h @@ -48,6 +48,7 @@ static inline void exit_rcu(void) #endif /* #else #ifdef CONFIG_TREE_PREEMPT_RCU */ extern void synchronize_rcu_bh(void); +extern void synchronize_sched_expedited(void); extern void synchronize_rcu_expedited(void); static inline void synchronize_rcu_bh_expedited(void) diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h index 71a4147473f9..21df7f3e7273 100644 --- a/kernel/rcutree_plugin.h +++ b/kernel/rcutree_plugin.h @@ -25,6 +25,7 @@ */ #include +#include /* * Check the RCU kernel configuration parameters and print informative @@ -1014,6 +1015,76 @@ static void __init __rcu_init_preempt(void) #endif /* #else #ifdef CONFIG_TREE_PREEMPT_RCU */ +#ifndef CONFIG_SMP + +void synchronize_sched_expedited(void) +{ + cond_resched(); +} +EXPORT_SYMBOL_GPL(synchronize_sched_expedited); + +#else /* #ifndef CONFIG_SMP */ + +static atomic_t synchronize_sched_expedited_count = ATOMIC_INIT(0); + +static int synchronize_sched_expedited_cpu_stop(void *data) +{ + /* + * There must be a full memory barrier on each affected CPU + * between the time that try_stop_cpus() is called and the + * time that it returns. + * + * In the current initial implementation of cpu_stop, the + * above condition is already met when the control reaches + * this point and the following smp_mb() is not strictly + * necessary. Do smp_mb() anyway for documentation and + * robustness against future implementation changes. + */ + smp_mb(); /* See above comment block. */ + return 0; +} + +/* + * Wait for an rcu-sched grace period to elapse, but use "big hammer" + * approach to force grace period to end quickly. This consumes + * significant time on all CPUs, and is thus not recommended for + * any sort of common-case code. + * + * Note that it is illegal to call this function while holding any + * lock that is acquired by a CPU-hotplug notifier. Failing to + * observe this restriction will result in deadlock. + */ +void synchronize_sched_expedited(void) +{ + int snap, trycount = 0; + + smp_mb(); /* ensure prior mod happens before capturing snap. */ + snap = atomic_read(&synchronize_sched_expedited_count) + 1; + get_online_cpus(); + while (try_stop_cpus(cpu_online_mask, + synchronize_sched_expedited_cpu_stop, + NULL) == -EAGAIN) { + put_online_cpus(); + if (trycount++ < 10) + udelay(trycount * num_online_cpus()); + else { + synchronize_sched(); + return; + } + if (atomic_read(&synchronize_sched_expedited_count) - snap > 0) { + smp_mb(); /* ensure test happens before caller kfree */ + return; + } + get_online_cpus(); + } + atomic_inc(&synchronize_sched_expedited_count); + smp_mb__after_atomic_inc(); /* ensure post-GP actions seen after GP. */ + put_online_cpus(); +} +EXPORT_SYMBOL_GPL(synchronize_sched_expedited); + +#endif /* #else #ifndef CONFIG_SMP */ + #if !defined(CONFIG_RCU_FAST_NO_HZ) /* diff --git a/kernel/sched.c b/kernel/sched.c index ae8f75a5ceb4..d1e8889872a1 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -9131,72 +9131,3 @@ struct cgroup_subsys cpuacct_subsys = { }; #endif /* CONFIG_CGROUP_CPUACCT */ -#ifndef CONFIG_SMP - -void synchronize_sched_expedited(void) -{ - barrier(); -} -EXPORT_SYMBOL_GPL(synchronize_sched_expedited); - -#else /* #ifndef CONFIG_SMP */ - -static atomic_t synchronize_sched_expedited_count = ATOMIC_INIT(0); - -static int synchronize_sched_expedited_cpu_stop(void *data) -{ - /* - * There must be a full memory barrier on each affected CPU - * between the time that try_stop_cpus() is called and the - * time that it returns. - * - * In the current initial implementation of cpu_stop, the - * above condition is already met when the control reaches - * this point and the following smp_mb() is not strictly - * necessary. Do smp_mb() anyway for documentation and - * robustness against future implementation changes. - */ - smp_mb(); /* See above comment block. */ - return 0; -} - -/* - * Wait for an rcu-sched grace period to elapse, but use "big hammer" - * approach to force grace period to end quickly. This consumes - * significant time on all CPUs, and is thus not recommended for - * any sort of common-case code. - * - * Note that it is illegal to call this function while holding any - * lock that is acquired by a CPU-hotplug notifier. Failing to - * observe this restriction will result in deadlock. - */ -void synchronize_sched_expedited(void) -{ - int snap, trycount = 0; - - smp_mb(); /* ensure prior mod happens before capturing snap. */ - snap = atomic_read(&synchronize_sched_expedited_count) + 1; - get_online_cpus(); - while (try_stop_cpus(cpu_online_mask, - synchronize_sched_expedited_cpu_stop, - NULL) == -EAGAIN) { - put_online_cpus(); - if (trycount++ < 10) - udelay(trycount * num_online_cpus()); - else { - synchronize_sched(); - return; - } - if (atomic_read(&synchronize_sched_expedited_count) - snap > 0) { - smp_mb(); /* ensure test happens before caller kfree */ - return; - } - get_online_cpus(); - } - atomic_inc(&synchronize_sched_expedited_count); - smp_mb__after_atomic_inc(); /* ensure post-GP actions seen after GP. */ - put_online_cpus(); -} -EXPORT_SYMBOL_GPL(synchronize_sched_expedited); - -#endif /* #else #ifndef CONFIG_SMP */ -- cgit v1.2.3-71-gd317 From 9833c39400c3e6ee19daeded6910df648741611e Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Fri, 19 Nov 2010 09:29:24 +0100 Subject: ARM: 6485/5: proc/vmcore - allow archs to override vmcore_elf_check_arch() Allow architectures to redefine this macro if needed. This is useful for example in architectures where 64-bit ELF vmcores are not supported. Specifying zero vmcore_elf64_check_arch() allows compiler to optimize away unnecessary parts of parse_crash_elf64_headers(). We also rename the macro to vmcore_elf64_check_arch() to reflect that it is used for 64-bit vmcores only. Signed-off-by: Mika Westerberg Signed-off-by: Russell King --- fs/proc/vmcore.c | 2 +- include/linux/crash_dump.h | 9 ++++++++- 2 files changed, 9 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/fs/proc/vmcore.c b/fs/proc/vmcore.c index 2367fb3f70bc..74802bc5ded9 100644 --- a/fs/proc/vmcore.c +++ b/fs/proc/vmcore.c @@ -499,7 +499,7 @@ static int __init parse_crash_elf64_headers(void) /* Do some basic Verification. */ if (memcmp(ehdr.e_ident, ELFMAG, SELFMAG) != 0 || (ehdr.e_type != ET_CORE) || - !vmcore_elf_check_arch(&ehdr) || + !vmcore_elf64_check_arch(&ehdr) || ehdr.e_ident[EI_CLASS] != ELFCLASS64 || ehdr.e_ident[EI_VERSION] != EV_CURRENT || ehdr.e_version != EV_CURRENT || diff --git a/include/linux/crash_dump.h b/include/linux/crash_dump.h index 0026f267da20..088cd4ace4ef 100644 --- a/include/linux/crash_dump.h +++ b/include/linux/crash_dump.h @@ -20,7 +20,14 @@ extern ssize_t copy_oldmem_page(unsigned long, char *, size_t, #define vmcore_elf_check_arch_cross(x) 0 #endif -#define vmcore_elf_check_arch(x) (elf_check_arch(x) || vmcore_elf_check_arch_cross(x)) +/* + * Architecture code can redefine this if there are any special checks + * needed for 64-bit ELF vmcores. In case of 32-bit only architecture, + * this can be set to zero. + */ +#ifndef vmcore_elf64_check_arch +#define vmcore_elf64_check_arch(x) (elf_check_arch(x) || vmcore_elf_check_arch_cross(x)) +#endif /* * is_kdump_kernel() checks whether this kernel is booting after a panic of -- cgit v1.2.3-71-gd317 From 5091faa449ee0b7d73bc296a93bca9540fc51d0a Mon Sep 17 00:00:00 2001 From: Mike Galbraith Date: Tue, 30 Nov 2010 14:18:03 +0100 Subject: sched: Add 'autogroup' scheduling feature: automated per session task groups A recurring complaint from CFS users is that parallel kbuild has a negative impact on desktop interactivity. This patch implements an idea from Linus, to automatically create task groups. Currently, only per session autogroups are implemented, but the patch leaves the way open for enhancement. Implementation: each task's signal struct contains an inherited pointer to a refcounted autogroup struct containing a task group pointer, the default for all tasks pointing to the init_task_group. When a task calls setsid(), a new task group is created, the process is moved into the new task group, and a reference to the preveious task group is dropped. Child processes inherit this task group thereafter, and increase it's refcount. When the last thread of a process exits, the process's reference is dropped, such that when the last process referencing an autogroup exits, the autogroup is destroyed. At runqueue selection time, IFF a task has no cgroup assignment, its current autogroup is used. Autogroup bandwidth is controllable via setting it's nice level through the proc filesystem: cat /proc//autogroup Displays the task's group and the group's nice level. echo > /proc//autogroup Sets the task group's shares to the weight of nice task. Setting nice level is rate limited for !admin users due to the abuse risk of task group locking. The feature is enabled from boot by default if CONFIG_SCHED_AUTOGROUP=y is selected, but can be disabled via the boot option noautogroup, and can also be turned on/off on the fly via: echo [01] > /proc/sys/kernel/sched_autogroup_enabled ... which will automatically move tasks to/from the root task group. Signed-off-by: Mike Galbraith Acked-by: Linus Torvalds Acked-by: Peter Zijlstra Cc: Markus Trippelsdorf Cc: Mathieu Desnoyers Cc: Paul Turner Cc: Oleg Nesterov [ Removed the task_group_path() debug code, and fixed !EVENTFD build failure. ] Signed-off-by: Ingo Molnar LKML-Reference: <1290281700.28711.9.camel@maggy.simson.net> Signed-off-by: Ingo Molnar --- Documentation/kernel-parameters.txt | 2 + fs/proc/base.c | 79 +++++++++++++ include/linux/sched.h | 23 ++++ init/Kconfig | 13 ++ kernel/fork.c | 5 +- kernel/sched.c | 13 +- kernel/sched_autogroup.c | 229 ++++++++++++++++++++++++++++++++++++ kernel/sched_autogroup.h | 32 +++++ kernel/sched_debug.c | 47 +------- kernel/sys.c | 4 +- kernel/sysctl.c | 11 ++ 11 files changed, 409 insertions(+), 49 deletions(-) create mode 100644 kernel/sched_autogroup.c create mode 100644 kernel/sched_autogroup.h (limited to 'include/linux') diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 92e83e53148f..86820a727b0b 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -1622,6 +1622,8 @@ and is between 256 and 4096 characters. It is defined in the file noapic [SMP,APIC] Tells the kernel to not make use of any IOAPICs that may be present in the system. + noautogroup Disable scheduler automatic task group creation. + nobats [PPC] Do not use BATs for mapping kernel lowmem on "Classic" PPC cores. diff --git a/fs/proc/base.c b/fs/proc/base.c index f3d02ca461ec..2fa0ce29b6dc 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -1407,6 +1407,82 @@ static const struct file_operations proc_pid_sched_operations = { #endif +#ifdef CONFIG_SCHED_AUTOGROUP +/* + * Print out autogroup related information: + */ +static int sched_autogroup_show(struct seq_file *m, void *v) +{ + struct inode *inode = m->private; + struct task_struct *p; + + p = get_proc_task(inode); + if (!p) + return -ESRCH; + proc_sched_autogroup_show_task(p, m); + + put_task_struct(p); + + return 0; +} + +static ssize_t +sched_autogroup_write(struct file *file, const char __user *buf, + size_t count, loff_t *offset) +{ + struct inode *inode = file->f_path.dentry->d_inode; + struct task_struct *p; + char buffer[PROC_NUMBUF]; + long nice; + int err; + + memset(buffer, 0, sizeof(buffer)); + if (count > sizeof(buffer) - 1) + count = sizeof(buffer) - 1; + if (copy_from_user(buffer, buf, count)) + return -EFAULT; + + err = strict_strtol(strstrip(buffer), 0, &nice); + if (err) + return -EINVAL; + + p = get_proc_task(inode); + if (!p) + return -ESRCH; + + err = nice; + err = proc_sched_autogroup_set_nice(p, &err); + if (err) + count = err; + + put_task_struct(p); + + return count; +} + +static int sched_autogroup_open(struct inode *inode, struct file *filp) +{ + int ret; + + ret = single_open(filp, sched_autogroup_show, NULL); + if (!ret) { + struct seq_file *m = filp->private_data; + + m->private = inode; + } + return ret; +} + +static const struct file_operations proc_pid_sched_autogroup_operations = { + .open = sched_autogroup_open, + .read = seq_read, + .write = sched_autogroup_write, + .llseek = seq_lseek, + .release = single_release, +}; + +#endif /* CONFIG_SCHED_AUTOGROUP */ + static ssize_t comm_write(struct file *file, const char __user *buf, size_t count, loff_t *offset) { @@ -2732,6 +2808,9 @@ static const struct pid_entry tgid_base_stuff[] = { INF("limits", S_IRUGO, proc_pid_limits), #ifdef CONFIG_SCHED_DEBUG REG("sched", S_IRUGO|S_IWUSR, proc_pid_sched_operations), +#endif +#ifdef CONFIG_SCHED_AUTOGROUP + REG("autogroup", S_IRUGO|S_IWUSR, proc_pid_sched_autogroup_operations), #endif REG("comm", S_IRUGO|S_IWUSR, proc_pid_set_comm_operations), #ifdef CONFIG_HAVE_ARCH_TRACEHOOK diff --git a/include/linux/sched.h b/include/linux/sched.h index a5b92c70c737..9c2d46da486e 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -509,6 +509,8 @@ struct thread_group_cputimer { spinlock_t lock; }; +struct autogroup; + /* * NOTE! "signal_struct" does not have it's own * locking, because a shared signal_struct always @@ -576,6 +578,9 @@ struct signal_struct { struct tty_struct *tty; /* NULL if no tty */ +#ifdef CONFIG_SCHED_AUTOGROUP + struct autogroup *autogroup; +#endif /* * Cumulative resource counters for dead threads in the group, * and for reaped dead child processes forked by this group. @@ -1927,6 +1932,24 @@ int sched_rt_handler(struct ctl_table *table, int write, extern unsigned int sysctl_sched_compat_yield; +#ifdef CONFIG_SCHED_AUTOGROUP +extern unsigned int sysctl_sched_autogroup_enabled; + +extern void sched_autogroup_create_attach(struct task_struct *p); +extern void sched_autogroup_detach(struct task_struct *p); +extern void sched_autogroup_fork(struct signal_struct *sig); +extern void sched_autogroup_exit(struct signal_struct *sig); +#ifdef CONFIG_PROC_FS +extern void proc_sched_autogroup_show_task(struct task_struct *p, struct seq_file *m); +extern int proc_sched_autogroup_set_nice(struct task_struct *p, int *nice); +#endif +#else +static inline void sched_autogroup_create_attach(struct task_struct *p) { } +static inline void sched_autogroup_detach(struct task_struct *p) { } +static inline void sched_autogroup_fork(struct signal_struct *sig) { } +static inline void sched_autogroup_exit(struct signal_struct *sig) { } +#endif + #ifdef CONFIG_RT_MUTEXES extern int rt_mutex_getprio(struct task_struct *p); extern void rt_mutex_setprio(struct task_struct *p, int prio); diff --git a/init/Kconfig b/init/Kconfig index 88c10468db46..f1bba0a1b051 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -728,6 +728,19 @@ config NET_NS endif # NAMESPACES +config SCHED_AUTOGROUP + bool "Automatic process group scheduling" + select EVENTFD + select CGROUPS + select CGROUP_SCHED + select FAIR_GROUP_SCHED + help + This option optimizes the scheduler for common desktop workloads by + automatically creating and populating task groups. This separation + of workloads isolates aggressive CPU burners (like build jobs) from + desktop applications. Task group autogeneration is currently based + upon task session. + config MM_OWNER bool diff --git a/kernel/fork.c b/kernel/fork.c index 3b159c5991b7..b6f2475f1e83 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -174,8 +174,10 @@ static inline void free_signal_struct(struct signal_struct *sig) static inline void put_signal_struct(struct signal_struct *sig) { - if (atomic_dec_and_test(&sig->sigcnt)) + if (atomic_dec_and_test(&sig->sigcnt)) { + sched_autogroup_exit(sig); free_signal_struct(sig); + } } void __put_task_struct(struct task_struct *tsk) @@ -904,6 +906,7 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk) posix_cpu_timers_init_group(sig); tty_audit_fork(sig); + sched_autogroup_fork(sig); sig->oom_adj = current->signal->oom_adj; sig->oom_score_adj = current->signal->oom_score_adj; diff --git a/kernel/sched.c b/kernel/sched.c index 66ef5790d932..b646dad4a40e 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -79,6 +79,7 @@ #include "sched_cpupri.h" #include "workqueue_sched.h" +#include "sched_autogroup.h" #define CREATE_TRACE_POINTS #include @@ -271,6 +272,10 @@ struct task_group { struct task_group *parent; struct list_head siblings; struct list_head children; + +#ifdef CONFIG_SCHED_AUTOGROUP + struct autogroup *autogroup; +#endif }; #define root_task_group init_task_group @@ -603,11 +608,14 @@ static inline int cpu_of(struct rq *rq) */ static inline struct task_group *task_group(struct task_struct *p) { + struct task_group *tg; struct cgroup_subsys_state *css; css = task_subsys_state_check(p, cpu_cgroup_subsys_id, lockdep_is_held(&task_rq(p)->lock)); - return container_of(css, struct task_group, css); + tg = container_of(css, struct task_group, css); + + return autogroup_task_group(p, tg); } /* Change a task's cfs_rq and parent entity if it moves across CPUs/groups */ @@ -1869,6 +1877,7 @@ static void sched_irq_time_avg_update(struct rq *rq, u64 curr_irq_time) { } #include "sched_idletask.c" #include "sched_fair.c" #include "sched_rt.c" +#include "sched_autogroup.c" #include "sched_stoptask.c" #ifdef CONFIG_SCHED_DEBUG # include "sched_debug.c" @@ -7750,7 +7759,7 @@ void __init sched_init(void) #ifdef CONFIG_CGROUP_SCHED list_add(&init_task_group.list, &task_groups); INIT_LIST_HEAD(&init_task_group.children); - + autogroup_init(&init_task); #endif /* CONFIG_CGROUP_SCHED */ for_each_possible_cpu(i) { diff --git a/kernel/sched_autogroup.c b/kernel/sched_autogroup.c new file mode 100644 index 000000000000..57a7ac286a02 --- /dev/null +++ b/kernel/sched_autogroup.c @@ -0,0 +1,229 @@ +#ifdef CONFIG_SCHED_AUTOGROUP + +#include +#include +#include +#include + +unsigned int __read_mostly sysctl_sched_autogroup_enabled = 1; +static struct autogroup autogroup_default; +static atomic_t autogroup_seq_nr; + +static void autogroup_init(struct task_struct *init_task) +{ + autogroup_default.tg = &init_task_group; + init_task_group.autogroup = &autogroup_default; + kref_init(&autogroup_default.kref); + init_rwsem(&autogroup_default.lock); + init_task->signal->autogroup = &autogroup_default; +} + +static inline void autogroup_free(struct task_group *tg) +{ + kfree(tg->autogroup); +} + +static inline void autogroup_destroy(struct kref *kref) +{ + struct autogroup *ag = container_of(kref, struct autogroup, kref); + + sched_destroy_group(ag->tg); +} + +static inline void autogroup_kref_put(struct autogroup *ag) +{ + kref_put(&ag->kref, autogroup_destroy); +} + +static inline struct autogroup *autogroup_kref_get(struct autogroup *ag) +{ + kref_get(&ag->kref); + return ag; +} + +static inline struct autogroup *autogroup_create(void) +{ + struct autogroup *ag = kzalloc(sizeof(*ag), GFP_KERNEL); + struct task_group *tg; + + if (!ag) + goto out_fail; + + tg = sched_create_group(&init_task_group); + + if (IS_ERR(tg)) + goto out_free; + + kref_init(&ag->kref); + init_rwsem(&ag->lock); + ag->id = atomic_inc_return(&autogroup_seq_nr); + ag->tg = tg; + tg->autogroup = ag; + + return ag; + +out_free: + kfree(ag); +out_fail: + if (printk_ratelimit()) { + printk(KERN_WARNING "autogroup_create: %s failure.\n", + ag ? "sched_create_group()" : "kmalloc()"); + } + + return autogroup_kref_get(&autogroup_default); +} + +static inline bool +task_wants_autogroup(struct task_struct *p, struct task_group *tg) +{ + if (tg != &root_task_group) + return false; + + if (p->sched_class != &fair_sched_class) + return false; + + /* + * We can only assume the task group can't go away on us if + * autogroup_move_group() can see us on ->thread_group list. + */ + if (p->flags & PF_EXITING) + return false; + + return true; +} + +static inline struct task_group * +autogroup_task_group(struct task_struct *p, struct task_group *tg) +{ + int enabled = ACCESS_ONCE(sysctl_sched_autogroup_enabled); + + if (enabled && task_wants_autogroup(p, tg)) + return p->signal->autogroup->tg; + + return tg; +} + +static void +autogroup_move_group(struct task_struct *p, struct autogroup *ag) +{ + struct autogroup *prev; + struct task_struct *t; + unsigned long flags; + + BUG_ON(!lock_task_sighand(p, &flags)); + + prev = p->signal->autogroup; + if (prev == ag) { + unlock_task_sighand(p, &flags); + return; + } + + p->signal->autogroup = autogroup_kref_get(ag); + + t = p; + do { + sched_move_task(t); + } while_each_thread(p, t); + + unlock_task_sighand(p, &flags); + autogroup_kref_put(prev); +} + +/* Allocates GFP_KERNEL, cannot be called under any spinlock */ +void sched_autogroup_create_attach(struct task_struct *p) +{ + struct autogroup *ag = autogroup_create(); + + autogroup_move_group(p, ag); + /* drop extra refrence added by autogroup_create() */ + autogroup_kref_put(ag); +} +EXPORT_SYMBOL(sched_autogroup_create_attach); + +/* Cannot be called under siglock. Currently has no users */ +void sched_autogroup_detach(struct task_struct *p) +{ + autogroup_move_group(p, &autogroup_default); +} +EXPORT_SYMBOL(sched_autogroup_detach); + +void sched_autogroup_fork(struct signal_struct *sig) +{ + struct task_struct *p = current; + + spin_lock_irq(&p->sighand->siglock); + sig->autogroup = autogroup_kref_get(p->signal->autogroup); + spin_unlock_irq(&p->sighand->siglock); +} + +void sched_autogroup_exit(struct signal_struct *sig) +{ + autogroup_kref_put(sig->autogroup); +} + +static int __init setup_autogroup(char *str) +{ + sysctl_sched_autogroup_enabled = 0; + + return 1; +} + +__setup("noautogroup", setup_autogroup); + +#ifdef CONFIG_PROC_FS + +/* Called with siglock held. */ +int proc_sched_autogroup_set_nice(struct task_struct *p, int *nice) +{ + static unsigned long next = INITIAL_JIFFIES; + struct autogroup *ag; + int err; + + if (*nice < -20 || *nice > 19) + return -EINVAL; + + err = security_task_setnice(current, *nice); + if (err) + return err; + + if (*nice < 0 && !can_nice(current, *nice)) + return -EPERM; + + /* this is a heavy operation taking global locks.. */ + if (!capable(CAP_SYS_ADMIN) && time_before(jiffies, next)) + return -EAGAIN; + + next = HZ / 10 + jiffies; + ag = autogroup_kref_get(p->signal->autogroup); + + down_write(&ag->lock); + err = sched_group_set_shares(ag->tg, prio_to_weight[*nice + 20]); + if (!err) + ag->nice = *nice; + up_write(&ag->lock); + + autogroup_kref_put(ag); + + return err; +} + +void proc_sched_autogroup_show_task(struct task_struct *p, struct seq_file *m) +{ + struct autogroup *ag = autogroup_kref_get(p->signal->autogroup); + + down_read(&ag->lock); + seq_printf(m, "/autogroup-%ld nice %d\n", ag->id, ag->nice); + up_read(&ag->lock); + + autogroup_kref_put(ag); +} +#endif /* CONFIG_PROC_FS */ + +#ifdef CONFIG_SCHED_DEBUG +static inline int autogroup_path(struct task_group *tg, char *buf, int buflen) +{ + return snprintf(buf, buflen, "%s-%ld", "/autogroup", tg->autogroup->id); +} +#endif /* CONFIG_SCHED_DEBUG */ + +#endif /* CONFIG_SCHED_AUTOGROUP */ diff --git a/kernel/sched_autogroup.h b/kernel/sched_autogroup.h new file mode 100644 index 000000000000..5358e241cb20 --- /dev/null +++ b/kernel/sched_autogroup.h @@ -0,0 +1,32 @@ +#ifdef CONFIG_SCHED_AUTOGROUP + +struct autogroup { + struct kref kref; + struct task_group *tg; + struct rw_semaphore lock; + unsigned long id; + int nice; +}; + +static inline struct task_group * +autogroup_task_group(struct task_struct *p, struct task_group *tg); + +#else /* !CONFIG_SCHED_AUTOGROUP */ + +static inline void autogroup_init(struct task_struct *init_task) { } +static inline void autogroup_free(struct task_group *tg) { } + +static inline struct task_group * +autogroup_task_group(struct task_struct *p, struct task_group *tg) +{ + return tg; +} + +#ifdef CONFIG_SCHED_DEBUG +static inline int autogroup_path(struct task_group *tg, char *buf, int buflen) +{ + return 0; +} +#endif + +#endif /* CONFIG_SCHED_AUTOGROUP */ diff --git a/kernel/sched_debug.c b/kernel/sched_debug.c index e95b77414a99..1dfae3d014b5 100644 --- a/kernel/sched_debug.c +++ b/kernel/sched_debug.c @@ -54,8 +54,7 @@ static unsigned long nsec_low(unsigned long long nsec) #define SPLIT_NS(x) nsec_high(x), nsec_low(x) #ifdef CONFIG_FAIR_GROUP_SCHED -static void print_cfs_group_stats(struct seq_file *m, int cpu, - struct task_group *tg) +static void print_cfs_group_stats(struct seq_file *m, int cpu, struct task_group *tg) { struct sched_entity *se = tg->se[cpu]; if (!se) @@ -110,16 +109,6 @@ print_task(struct seq_file *m, struct rq *rq, struct task_struct *p) 0LL, 0LL, 0LL, 0L, 0LL, 0L, 0LL, 0L); #endif -#ifdef CONFIG_CGROUP_SCHED - { - char path[64]; - - rcu_read_lock(); - cgroup_path(task_group(p)->css.cgroup, path, sizeof(path)); - rcu_read_unlock(); - SEQ_printf(m, " %s", path); - } -#endif SEQ_printf(m, "\n"); } @@ -147,19 +136,6 @@ static void print_rq(struct seq_file *m, struct rq *rq, int rq_cpu) read_unlock_irqrestore(&tasklist_lock, flags); } -#if defined(CONFIG_CGROUP_SCHED) && \ - (defined(CONFIG_FAIR_GROUP_SCHED) || defined(CONFIG_RT_GROUP_SCHED)) -static void task_group_path(struct task_group *tg, char *buf, int buflen) -{ - /* may be NULL if the underlying cgroup isn't fully-created yet */ - if (!tg->css.cgroup) { - buf[0] = '\0'; - return; - } - cgroup_path(tg->css.cgroup, buf, buflen); -} -#endif - void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq) { s64 MIN_vruntime = -1, min_vruntime, max_vruntime = -1, @@ -168,16 +144,7 @@ void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq) struct sched_entity *last; unsigned long flags; -#if defined(CONFIG_CGROUP_SCHED) && defined(CONFIG_FAIR_GROUP_SCHED) - char path[128]; - struct task_group *tg = cfs_rq->tg; - - task_group_path(tg, path, sizeof(path)); - - SEQ_printf(m, "\ncfs_rq[%d]:%s\n", cpu, path); -#else SEQ_printf(m, "\ncfs_rq[%d]:\n", cpu); -#endif SEQ_printf(m, " .%-30s: %Ld.%06ld\n", "exec_clock", SPLIT_NS(cfs_rq->exec_clock)); @@ -215,7 +182,7 @@ void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq) SEQ_printf(m, " .%-30s: %ld\n", "load_contrib", cfs_rq->load_contribution); SEQ_printf(m, " .%-30s: %d\n", "load_tg", - atomic_read(&tg->load_weight)); + atomic_read(&cfs_rq->tg->load_weight)); #endif print_cfs_group_stats(m, cpu, cfs_rq->tg); @@ -224,17 +191,7 @@ void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq) void print_rt_rq(struct seq_file *m, int cpu, struct rt_rq *rt_rq) { -#if defined(CONFIG_CGROUP_SCHED) && defined(CONFIG_RT_GROUP_SCHED) - char path[128]; - struct task_group *tg = rt_rq->tg; - - task_group_path(tg, path, sizeof(path)); - - SEQ_printf(m, "\nrt_rq[%d]:%s\n", cpu, path); -#else SEQ_printf(m, "\nrt_rq[%d]:\n", cpu); -#endif - #define P(x) \ SEQ_printf(m, " .%-30s: %Ld\n", #x, (long long)(rt_rq->x)) diff --git a/kernel/sys.c b/kernel/sys.c index 7f5a0cd296a9..2745dcdb6c6c 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -1080,8 +1080,10 @@ SYSCALL_DEFINE0(setsid) err = session; out: write_unlock_irq(&tasklist_lock); - if (err > 0) + if (err > 0) { proc_sid_connector(group_leader); + sched_autogroup_create_attach(group_leader); + } return err; } diff --git a/kernel/sysctl.c b/kernel/sysctl.c index a00fdefd24ce..121e4fff03d1 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -370,6 +370,17 @@ static struct ctl_table kern_table[] = { .mode = 0644, .proc_handler = proc_dointvec, }, +#ifdef CONFIG_SCHED_AUTOGROUP + { + .procname = "sched_autogroup_enabled", + .data = &sysctl_sched_autogroup_enabled, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = proc_dointvec, + .extra1 = &zero, + .extra2 = &one, + }, +#endif #ifdef CONFIG_PROVE_LOCKING { .procname = "prove_locking", -- cgit v1.2.3-71-gd317 From c320c7b7d380e630f595de1236d9d085b035d5b4 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 20 Oct 2010 12:50:11 -0200 Subject: perf events: Precalculate the header space for PERF_SAMPLE_ fields MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit PERF_SAMPLE_{CALLCHAIN,RAW} have variable lenghts per sample, but the others can be precalculated, reducing a bit the per sample cost. Acked-by: Peter Zijlstra Cc: Frédéric Weisbecker Cc: Ian Munsie Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Paul Mackerras Cc: Stephane Eranian LKML-Reference: Signed-off-by: Arnaldo Carvalho de Melo --- include/linux/perf_event.h | 2 + kernel/perf_event.c | 150 +++++++++++++++++++++++++++------------------ 2 files changed, 93 insertions(+), 59 deletions(-) (limited to 'include/linux') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index cbf04cc1e630..adf6d9931643 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -758,6 +758,8 @@ struct perf_event { u64 shadow_ctx_time; struct perf_event_attr attr; + u16 header_size; + u16 read_size; struct hw_perf_event hw; struct perf_event_context *ctx; diff --git a/kernel/perf_event.c b/kernel/perf_event.c index af1e63f249f3..aede71245e9f 100644 --- a/kernel/perf_event.c +++ b/kernel/perf_event.c @@ -312,9 +312,75 @@ list_add_event(struct perf_event *event, struct perf_event_context *ctx) ctx->nr_stat++; } +/* + * Called at perf_event creation and when events are attached/detached from a + * group. + */ +static void perf_event__read_size(struct perf_event *event) +{ + int entry = sizeof(u64); /* value */ + int size = 0; + int nr = 1; + + if (event->attr.read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) + size += sizeof(u64); + + if (event->attr.read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) + size += sizeof(u64); + + if (event->attr.read_format & PERF_FORMAT_ID) + entry += sizeof(u64); + + if (event->attr.read_format & PERF_FORMAT_GROUP) { + nr += event->group_leader->nr_siblings; + size += sizeof(u64); + } + + size += entry * nr; + event->read_size = size; +} + +static void perf_event__header_size(struct perf_event *event) +{ + struct perf_sample_data *data; + u64 sample_type = event->attr.sample_type; + u16 size = 0; + + perf_event__read_size(event); + + if (sample_type & PERF_SAMPLE_IP) + size += sizeof(data->ip); + + if (sample_type & PERF_SAMPLE_TID) + size += sizeof(data->tid_entry); + + if (sample_type & PERF_SAMPLE_TIME) + size += sizeof(data->time); + + if (sample_type & PERF_SAMPLE_ADDR) + size += sizeof(data->addr); + + if (sample_type & PERF_SAMPLE_ID) + size += sizeof(data->id); + + if (sample_type & PERF_SAMPLE_STREAM_ID) + size += sizeof(data->stream_id); + + if (sample_type & PERF_SAMPLE_CPU) + size += sizeof(data->cpu_entry); + + if (sample_type & PERF_SAMPLE_PERIOD) + size += sizeof(data->period); + + if (sample_type & PERF_SAMPLE_READ) + size += event->read_size; + + event->header_size = size; +} + static void perf_group_attach(struct perf_event *event) { - struct perf_event *group_leader = event->group_leader; + struct perf_event *group_leader = event->group_leader, *pos; /* * We can have double attach due to group movement in perf_event_open. @@ -333,6 +399,11 @@ static void perf_group_attach(struct perf_event *event) list_add_tail(&event->group_entry, &group_leader->sibling_list); group_leader->nr_siblings++; + + perf_event__header_size(group_leader); + + list_for_each_entry(pos, &group_leader->sibling_list, group_entry) + perf_event__header_size(pos); } /* @@ -391,7 +462,7 @@ static void perf_group_detach(struct perf_event *event) if (event->group_leader != event) { list_del_init(&event->group_entry); event->group_leader->nr_siblings--; - return; + goto out; } if (!list_empty(&event->group_entry)) @@ -410,6 +481,12 @@ static void perf_group_detach(struct perf_event *event) /* Inherit group flags from the previous leader */ sibling->group_flags = event->group_flags; } + +out: + perf_event__header_size(event->group_leader); + + list_for_each_entry(tmp, &event->group_leader->sibling_list, group_entry) + perf_event__header_size(tmp); } static inline int @@ -2289,31 +2366,6 @@ static int perf_release(struct inode *inode, struct file *file) return perf_event_release_kernel(event); } -static int perf_event_read_size(struct perf_event *event) -{ - int entry = sizeof(u64); /* value */ - int size = 0; - int nr = 1; - - if (event->attr.read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) - size += sizeof(u64); - - if (event->attr.read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) - size += sizeof(u64); - - if (event->attr.read_format & PERF_FORMAT_ID) - entry += sizeof(u64); - - if (event->attr.read_format & PERF_FORMAT_GROUP) { - nr += event->group_leader->nr_siblings; - size += sizeof(u64); - } - - size += entry * nr; - - return size; -} - u64 perf_event_read_value(struct perf_event *event, u64 *enabled, u64 *running) { struct perf_event *child; @@ -2428,7 +2480,7 @@ perf_read_hw(struct perf_event *event, char __user *buf, size_t count) if (event->state == PERF_EVENT_STATE_ERROR) return 0; - if (count < perf_event_read_size(event)) + if (count < event->read_size) return -ENOSPC; WARN_ON_ONCE(event->ctx->parent_ctx); @@ -3606,59 +3658,34 @@ void perf_prepare_sample(struct perf_event_header *header, data->type = sample_type; header->type = PERF_RECORD_SAMPLE; - header->size = sizeof(*header); + header->size = sizeof(*header) + event->header_size; header->misc = 0; header->misc |= perf_misc_flags(regs); - if (sample_type & PERF_SAMPLE_IP) { + if (sample_type & PERF_SAMPLE_IP) data->ip = perf_instruction_pointer(regs); - header->size += sizeof(data->ip); - } - if (sample_type & PERF_SAMPLE_TID) { /* namespace issues */ data->tid_entry.pid = perf_event_pid(event, current); data->tid_entry.tid = perf_event_tid(event, current); - - header->size += sizeof(data->tid_entry); } - if (sample_type & PERF_SAMPLE_TIME) { + if (sample_type & PERF_SAMPLE_TIME) data->time = perf_clock(); - header->size += sizeof(data->time); - } - - if (sample_type & PERF_SAMPLE_ADDR) - header->size += sizeof(data->addr); - - if (sample_type & PERF_SAMPLE_ID) { + if (sample_type & PERF_SAMPLE_ID) data->id = primary_event_id(event); - header->size += sizeof(data->id); - } - - if (sample_type & PERF_SAMPLE_STREAM_ID) { + if (sample_type & PERF_SAMPLE_STREAM_ID) data->stream_id = event->id; - header->size += sizeof(data->stream_id); - } - if (sample_type & PERF_SAMPLE_CPU) { data->cpu_entry.cpu = raw_smp_processor_id(); data->cpu_entry.reserved = 0; - - header->size += sizeof(data->cpu_entry); } - if (sample_type & PERF_SAMPLE_PERIOD) - header->size += sizeof(data->period); - - if (sample_type & PERF_SAMPLE_READ) - header->size += perf_event_read_size(event); - if (sample_type & PERF_SAMPLE_CALLCHAIN) { int size = 1; @@ -3726,7 +3753,7 @@ perf_event_read_event(struct perf_event *event, .header = { .type = PERF_RECORD_READ, .misc = 0, - .size = sizeof(read_event) + perf_event_read_size(event), + .size = sizeof(read_event) + event->read_size, }, .pid = perf_event_pid(event, task), .tid = perf_event_tid(event, task), @@ -5714,6 +5741,11 @@ SYSCALL_DEFINE5(perf_event_open, list_add_tail(&event->owner_entry, ¤t->perf_event_list); mutex_unlock(¤t->perf_event_mutex); + /* + * Precalculate sample_data sizes + */ + perf_event__header_size(event); + /* * Drop the reference on the group_event after placing the * new event on the sibling_list. This ensures destruction -- cgit v1.2.3-71-gd317 From 498cb95175c29ed96bf32f30df2d11ec1c7f3879 Mon Sep 17 00:00:00 2001 From: Andrei Emeltchenko Date: Tue, 30 Nov 2010 14:11:49 -0800 Subject: OMAP: Serial: Define OMAP uart MDR1 reg and remove magic numbers Define MDR1 register serial definitions used in serial and bluetooth drivers. Change magic number to ones defined in serial_reg for omap1/2 serial driver. Remove redefined MDR1 register definitions in omap-serial driver. Signed-off-by: Andrei Emeltchenko Acked-by: G, Manjunath Kondaiah Acked-by: Govindraj.R Acked-by: Greg Kroah-Hartman Signed-off-by: Tony Lindgren --- arch/arm/mach-omap1/serial.c | 6 ++++-- arch/arm/mach-omap2/serial.c | 15 +++++++++------ arch/arm/plat-omap/include/plat/omap-serial.h | 3 --- drivers/serial/omap-serial.c | 6 +++--- include/linux/serial_reg.h | 12 ++++++++++++ 5 files changed, 28 insertions(+), 14 deletions(-) (limited to 'include/linux') diff --git a/arch/arm/mach-omap1/serial.c b/arch/arm/mach-omap1/serial.c index b78d0749f13d..c73d1b77b364 100644 --- a/arch/arm/mach-omap1/serial.c +++ b/arch/arm/mach-omap1/serial.c @@ -52,9 +52,11 @@ static inline void omap_serial_outp(struct plat_serial8250_port *p, int offset, */ static void __init omap_serial_reset(struct plat_serial8250_port *p) { - omap_serial_outp(p, UART_OMAP_MDR1, 0x07); /* disable UART */ + omap_serial_outp(p, UART_OMAP_MDR1, + UART_OMAP_MDR1_DISABLE); /* disable UART */ omap_serial_outp(p, UART_OMAP_SCR, 0x08); /* TX watermark */ - omap_serial_outp(p, UART_OMAP_MDR1, 0x00); /* enable UART */ + omap_serial_outp(p, UART_OMAP_MDR1, + UART_OMAP_MDR1_16X_MODE); /* enable UART */ if (!cpu_is_omap15xx()) { omap_serial_outp(p, UART_OMAP_SYSC, 0x01); diff --git a/arch/arm/mach-omap2/serial.c b/arch/arm/mach-omap2/serial.c index d17960a1be25..fa9806250b50 100644 --- a/arch/arm/mach-omap2/serial.c +++ b/arch/arm/mach-omap2/serial.c @@ -169,9 +169,9 @@ static inline void serial_write_reg(struct omap_uart_state *uart, int offset, static inline void __init omap_uart_reset(struct omap_uart_state *uart) { - serial_write_reg(uart, UART_OMAP_MDR1, 0x07); + serial_write_reg(uart, UART_OMAP_MDR1, UART_OMAP_MDR1_DISABLE); serial_write_reg(uart, UART_OMAP_SCR, 0x08); - serial_write_reg(uart, UART_OMAP_MDR1, 0x00); + serial_write_reg(uart, UART_OMAP_MDR1, UART_OMAP_MDR1_16X_MODE); } #if defined(CONFIG_PM) && defined(CONFIG_ARCH_OMAP3) @@ -247,9 +247,10 @@ static void omap_uart_restore_context(struct omap_uart_state *uart) uart->context_valid = 0; if (uart->errata & UART_ERRATA_i202_MDR1_ACCESS) - omap_uart_mdr1_errataset(uart, 0x07, 0xA0); + omap_uart_mdr1_errataset(uart, UART_OMAP_MDR1_DISABLE, 0xA0); else - serial_write_reg(uart, UART_OMAP_MDR1, 0x7); + serial_write_reg(uart, UART_OMAP_MDR1, UART_OMAP_MDR1_DISABLE); + serial_write_reg(uart, UART_LCR, 0xBF); /* Config B mode */ efr = serial_read_reg(uart, UART_EFR); serial_write_reg(uart, UART_EFR, UART_EFR_ECB); @@ -268,11 +269,13 @@ static void omap_uart_restore_context(struct omap_uart_state *uart) serial_write_reg(uart, UART_OMAP_SCR, uart->scr); serial_write_reg(uart, UART_OMAP_WER, uart->wer); serial_write_reg(uart, UART_OMAP_SYSC, uart->sysc); + if (uart->errata & UART_ERRATA_i202_MDR1_ACCESS) - omap_uart_mdr1_errataset(uart, 0x00, 0xA1); + omap_uart_mdr1_errataset(uart, UART_OMAP_MDR1_16X_MODE, 0xA1); else /* UART 16x mode */ - serial_write_reg(uart, UART_OMAP_MDR1, 0x00); + serial_write_reg(uart, UART_OMAP_MDR1, + UART_OMAP_MDR1_16X_MODE); } #else static inline void omap_uart_save_context(struct omap_uart_state *uart) {} diff --git a/arch/arm/plat-omap/include/plat/omap-serial.h b/arch/arm/plat-omap/include/plat/omap-serial.h index c8dae02f0704..6a1788014611 100644 --- a/arch/arm/plat-omap/include/plat/omap-serial.h +++ b/arch/arm/plat-omap/include/plat/omap-serial.h @@ -31,9 +31,6 @@ */ #define OMAP_SERIAL_NAME "ttyO" -#define OMAP_MDR1_DISABLE 0x07 -#define OMAP_MDR1_MODE13X 0x03 -#define OMAP_MDR1_MODE16X 0x00 #define OMAP_MODE13X_SPEED 230400 /* diff --git a/drivers/serial/omap-serial.c b/drivers/serial/omap-serial.c index 14365f72b664..03a96db67de4 100644 --- a/drivers/serial/omap-serial.c +++ b/drivers/serial/omap-serial.c @@ -753,7 +753,7 @@ serial_omap_set_termios(struct uart_port *port, struct ktermios *termios, /* Protocol, Baud Rate, and Interrupt Settings */ - serial_out(up, UART_OMAP_MDR1, OMAP_MDR1_DISABLE); + serial_out(up, UART_OMAP_MDR1, UART_OMAP_MDR1_DISABLE); serial_out(up, UART_LCR, OMAP_UART_LCR_CONF_MDB); up->efr = serial_in(up, UART_EFR); @@ -774,9 +774,9 @@ serial_omap_set_termios(struct uart_port *port, struct ktermios *termios, serial_out(up, UART_LCR, cval); if (baud > 230400 && baud != 3000000) - serial_out(up, UART_OMAP_MDR1, OMAP_MDR1_MODE13X); + serial_out(up, UART_OMAP_MDR1, UART_OMAP_MDR1_13X_MODE); else - serial_out(up, UART_OMAP_MDR1, OMAP_MDR1_MODE16X); + serial_out(up, UART_OMAP_MDR1, UART_OMAP_MDR1_16X_MODE); /* Hardware Flow Control Configuration */ diff --git a/include/linux/serial_reg.h b/include/linux/serial_reg.h index c7a0ce11cd47..6f3823474e6c 100644 --- a/include/linux/serial_reg.h +++ b/include/linux/serial_reg.h @@ -341,5 +341,17 @@ #define UART_OMAP_SYSS 0x16 /* System status register */ #define UART_OMAP_WER 0x17 /* Wake-up enable register */ +/* + * These are the definitions for the MDR1 register + */ +#define UART_OMAP_MDR1_16X_MODE 0x00 /* UART 16x mode */ +#define UART_OMAP_MDR1_SIR_MODE 0x01 /* SIR mode */ +#define UART_OMAP_MDR1_16X_ABAUD_MODE 0x02 /* UART 16x auto-baud */ +#define UART_OMAP_MDR1_13X_MODE 0x03 /* UART 13x mode */ +#define UART_OMAP_MDR1_MIR_MODE 0x04 /* MIR mode */ +#define UART_OMAP_MDR1_FIR_MODE 0x05 /* FIR mode */ +#define UART_OMAP_MDR1_CIR_MODE 0x06 /* CIR mode */ +#define UART_OMAP_MDR1_DISABLE 0x07 /* Disable (default state) */ + #endif /* _LINUX_SERIAL_REG_H */ -- cgit v1.2.3-71-gd317 From 662b083a87a3489f3f19c6e0651c1b99b0de5df0 Mon Sep 17 00:00:00 2001 From: Andrei Emeltchenko Date: Tue, 30 Nov 2010 14:11:49 -0800 Subject: omap: Serial: Define register access modes in LCR MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Access to some registers depends on register access mode Three different modes are available for OMAP (at least) • Operational mode LCR_REG[7] = 0x0 • Configuration mode A LCR_REG[7] = 0x1 and LCR_REG[7:0]! = 0xBF • Configuration mode B LCR_REG[7] = 0x1 and LCR_REG[7:0] = 0xBF Define access modes and remove redefinitions and magic numbers in serial drivers (and later in bluetooth driver). Signed-off-by: Andrei Emeltchenko Acked-by: Govindraj.R Acked-by: Greg Kroah-Hartman Signed-off-by: Tony Lindgren --- arch/arm/mach-omap2/serial.c | 12 +++++----- arch/arm/plat-omap/include/plat/omap-serial.h | 9 ------- drivers/serial/8250.c | 26 ++++++++++---------- drivers/serial/omap-serial.c | 34 +++++++++++++-------------- include/linux/serial_reg.h | 7 ++++++ 5 files changed, 43 insertions(+), 45 deletions(-) (limited to 'include/linux') diff --git a/arch/arm/mach-omap2/serial.c b/arch/arm/mach-omap2/serial.c index fa9806250b50..9dc077e2d8af 100644 --- a/arch/arm/mach-omap2/serial.c +++ b/arch/arm/mach-omap2/serial.c @@ -219,7 +219,7 @@ static void omap_uart_save_context(struct omap_uart_state *uart) return; lcr = serial_read_reg(uart, UART_LCR); - serial_write_reg(uart, UART_LCR, 0xBF); + serial_write_reg(uart, UART_LCR, UART_LCR_CONF_MODE_B); uart->dll = serial_read_reg(uart, UART_DLL); uart->dlh = serial_read_reg(uart, UART_DLM); serial_write_reg(uart, UART_LCR, lcr); @@ -227,7 +227,7 @@ static void omap_uart_save_context(struct omap_uart_state *uart) uart->sysc = serial_read_reg(uart, UART_OMAP_SYSC); uart->scr = serial_read_reg(uart, UART_OMAP_SCR); uart->wer = serial_read_reg(uart, UART_OMAP_WER); - serial_write_reg(uart, UART_LCR, 0x80); + serial_write_reg(uart, UART_LCR, UART_LCR_CONF_MODE_A); uart->mcr = serial_read_reg(uart, UART_MCR); serial_write_reg(uart, UART_LCR, lcr); @@ -251,19 +251,19 @@ static void omap_uart_restore_context(struct omap_uart_state *uart) else serial_write_reg(uart, UART_OMAP_MDR1, UART_OMAP_MDR1_DISABLE); - serial_write_reg(uart, UART_LCR, 0xBF); /* Config B mode */ + serial_write_reg(uart, UART_LCR, UART_LCR_CONF_MODE_B); efr = serial_read_reg(uart, UART_EFR); serial_write_reg(uart, UART_EFR, UART_EFR_ECB); serial_write_reg(uart, UART_LCR, 0x0); /* Operational mode */ serial_write_reg(uart, UART_IER, 0x0); - serial_write_reg(uart, UART_LCR, 0xBF); /* Config B mode */ + serial_write_reg(uart, UART_LCR, UART_LCR_CONF_MODE_B); serial_write_reg(uart, UART_DLL, uart->dll); serial_write_reg(uart, UART_DLM, uart->dlh); serial_write_reg(uart, UART_LCR, 0x0); /* Operational mode */ serial_write_reg(uart, UART_IER, uart->ier); - serial_write_reg(uart, UART_LCR, 0x80); + serial_write_reg(uart, UART_LCR, UART_LCR_CONF_MODE_A); serial_write_reg(uart, UART_MCR, uart->mcr); - serial_write_reg(uart, UART_LCR, 0xBF); /* Config B mode */ + serial_write_reg(uart, UART_LCR, UART_LCR_CONF_MODE_B); serial_write_reg(uart, UART_EFR, efr); serial_write_reg(uart, UART_LCR, UART_LCR_WLEN8); serial_write_reg(uart, UART_OMAP_SCR, uart->scr); diff --git a/arch/arm/plat-omap/include/plat/omap-serial.h b/arch/arm/plat-omap/include/plat/omap-serial.h index 6a1788014611..b3e0bad9b77e 100644 --- a/arch/arm/plat-omap/include/plat/omap-serial.h +++ b/arch/arm/plat-omap/include/plat/omap-serial.h @@ -33,15 +33,6 @@ #define OMAP_MODE13X_SPEED 230400 -/* - * LCR = 0XBF: Switch to Configuration Mode B. - * In configuration mode b allow access - * to EFR,DLL,DLH. - * Reference OMAP TRM Chapter 17 - * Section: 1.4.3 Mode Selection - */ -#define OMAP_UART_LCR_CONF_MDB 0XBF - /* WER = 0x7F * Enable module level wakeup in WER reg */ diff --git a/drivers/serial/8250.c b/drivers/serial/8250.c index 4d8e14b7aa93..aaf9907e6014 100644 --- a/drivers/serial/8250.c +++ b/drivers/serial/8250.c @@ -653,13 +653,13 @@ static void serial8250_set_sleep(struct uart_8250_port *p, int sleep) { if (p->capabilities & UART_CAP_SLEEP) { if (p->capabilities & UART_CAP_EFR) { - serial_outp(p, UART_LCR, 0xBF); + serial_outp(p, UART_LCR, UART_LCR_CONF_MODE_B); serial_outp(p, UART_EFR, UART_EFR_ECB); serial_outp(p, UART_LCR, 0); } serial_outp(p, UART_IER, sleep ? UART_IERX_SLEEP : 0); if (p->capabilities & UART_CAP_EFR) { - serial_outp(p, UART_LCR, 0xBF); + serial_outp(p, UART_LCR, UART_LCR_CONF_MODE_B); serial_outp(p, UART_EFR, 0); serial_outp(p, UART_LCR, 0); } @@ -752,7 +752,7 @@ static int size_fifo(struct uart_8250_port *up) serial_outp(up, UART_FCR, UART_FCR_ENABLE_FIFO | UART_FCR_CLEAR_RCVR | UART_FCR_CLEAR_XMIT); serial_outp(up, UART_MCR, UART_MCR_LOOP); - serial_outp(up, UART_LCR, UART_LCR_DLAB); + serial_outp(up, UART_LCR, UART_LCR_CONF_MODE_A); old_dl = serial_dl_read(up); serial_dl_write(up, 0x0001); serial_outp(up, UART_LCR, 0x03); @@ -764,7 +764,7 @@ static int size_fifo(struct uart_8250_port *up) serial_inp(up, UART_RX); serial_outp(up, UART_FCR, old_fcr); serial_outp(up, UART_MCR, old_mcr); - serial_outp(up, UART_LCR, UART_LCR_DLAB); + serial_outp(up, UART_LCR, UART_LCR_CONF_MODE_A); serial_dl_write(up, old_dl); serial_outp(up, UART_LCR, old_lcr); @@ -782,7 +782,7 @@ static unsigned int autoconfig_read_divisor_id(struct uart_8250_port *p) unsigned int id; old_lcr = serial_inp(p, UART_LCR); - serial_outp(p, UART_LCR, UART_LCR_DLAB); + serial_outp(p, UART_LCR, UART_LCR_CONF_MODE_A); old_dll = serial_inp(p, UART_DLL); old_dlm = serial_inp(p, UART_DLM); @@ -836,7 +836,7 @@ static void autoconfig_has_efr(struct uart_8250_port *up) * recommended for new designs). */ up->acr = 0; - serial_out(up, UART_LCR, 0xBF); + serial_out(up, UART_LCR, UART_LCR_CONF_MODE_B); serial_out(up, UART_EFR, UART_EFR_ECB); serial_out(up, UART_LCR, 0x00); id1 = serial_icr_read(up, UART_ID1); @@ -945,7 +945,7 @@ static void autoconfig_16550a(struct uart_8250_port *up) * Check for presence of the EFR when DLAB is set. * Only ST16C650V1 UARTs pass this test. */ - serial_outp(up, UART_LCR, UART_LCR_DLAB); + serial_outp(up, UART_LCR, UART_LCR_CONF_MODE_A); if (serial_in(up, UART_EFR) == 0) { serial_outp(up, UART_EFR, 0xA8); if (serial_in(up, UART_EFR) != 0) { @@ -963,7 +963,7 @@ static void autoconfig_16550a(struct uart_8250_port *up) * Maybe it requires 0xbf to be written to the LCR. * (other ST16C650V2 UARTs, TI16C752A, etc) */ - serial_outp(up, UART_LCR, 0xBF); + serial_outp(up, UART_LCR, UART_LCR_CONF_MODE_B); if (serial_in(up, UART_EFR) == 0 && !broken_efr(up)) { DEBUG_AUTOCONF("EFRv2 "); autoconfig_has_efr(up); @@ -1024,7 +1024,7 @@ static void autoconfig_16550a(struct uart_8250_port *up) serial_outp(up, UART_FCR, UART_FCR_ENABLE_FIFO | UART_FCR7_64BYTE); status1 = serial_in(up, UART_IIR) >> 5; serial_outp(up, UART_FCR, UART_FCR_ENABLE_FIFO); - serial_outp(up, UART_LCR, UART_LCR_DLAB); + serial_outp(up, UART_LCR, UART_LCR_CONF_MODE_A); serial_outp(up, UART_FCR, UART_FCR_ENABLE_FIFO | UART_FCR7_64BYTE); status2 = serial_in(up, UART_IIR) >> 5; serial_outp(up, UART_FCR, UART_FCR_ENABLE_FIFO); @@ -1183,7 +1183,7 @@ static void autoconfig(struct uart_8250_port *up, unsigned int probeflags) * We also initialise the EFR (if any) to zero for later. The * EFR occupies the same register location as the FCR and IIR. */ - serial_outp(up, UART_LCR, 0xBF); + serial_outp(up, UART_LCR, UART_LCR_CONF_MODE_B); serial_outp(up, UART_EFR, 0); serial_outp(up, UART_LCR, 0); @@ -1952,7 +1952,7 @@ static int serial8250_startup(struct uart_port *port) if (up->port.type == PORT_16C950) { /* Wake up and initialize UART */ up->acr = 0; - serial_outp(up, UART_LCR, 0xBF); + serial_outp(up, UART_LCR, UART_LCR_CONF_MODE_B); serial_outp(up, UART_EFR, UART_EFR_ECB); serial_outp(up, UART_IER, 0); serial_outp(up, UART_LCR, 0); @@ -2002,7 +2002,7 @@ static int serial8250_startup(struct uart_port *port) if (up->port.type == PORT_16850) { unsigned char fctr; - serial_outp(up, UART_LCR, 0xbf); + serial_outp(up, UART_LCR, UART_LCR_CONF_MODE_B); fctr = serial_inp(up, UART_FCTR) & ~(UART_FCTR_RX|UART_FCTR_TX); serial_outp(up, UART_FCTR, fctr | UART_FCTR_TRGD | UART_FCTR_RX); @@ -2363,7 +2363,7 @@ serial8250_do_set_termios(struct uart_port *port, struct ktermios *termios, if (termios->c_cflag & CRTSCTS) efr |= UART_EFR_CTS; - serial_outp(up, UART_LCR, 0xBF); + serial_outp(up, UART_LCR, UART_LCR_CONF_MODE_B); serial_outp(up, UART_EFR, efr); } diff --git a/drivers/serial/omap-serial.c b/drivers/serial/omap-serial.c index 03a96db67de4..1201eff1831e 100644 --- a/drivers/serial/omap-serial.c +++ b/drivers/serial/omap-serial.c @@ -570,7 +570,7 @@ serial_omap_configure_xonxoff unsigned char efr = 0; up->lcr = serial_in(up, UART_LCR); - serial_out(up, UART_LCR, OMAP_UART_LCR_CONF_MDB); + serial_out(up, UART_LCR, UART_LCR_CONF_MODE_B); up->efr = serial_in(up, UART_EFR); serial_out(up, UART_EFR, up->efr & ~UART_EFR_ECB); @@ -598,7 +598,7 @@ serial_omap_configure_xonxoff efr |= OMAP_UART_SW_RX; serial_out(up, UART_EFR, up->efr | UART_EFR_ECB); - serial_out(up, UART_LCR, UART_LCR_DLAB); + serial_out(up, UART_LCR, UART_LCR_CONF_MODE_A); up->mcr = serial_in(up, UART_MCR); @@ -612,14 +612,14 @@ serial_omap_configure_xonxoff up->mcr |= UART_MCR_XONANY; serial_out(up, UART_MCR, up->mcr | UART_MCR_TCRTLR); - serial_out(up, UART_LCR, OMAP_UART_LCR_CONF_MDB); + serial_out(up, UART_LCR, UART_LCR_CONF_MODE_B); serial_out(up, UART_TI752_TCR, OMAP_UART_TCR_TRIG); /* Enable special char function UARTi.EFR_REG[5] and * load the new software flow control mode IXON or IXOFF * and restore the UARTi.EFR_REG[4] ENHANCED_EN value. */ serial_out(up, UART_EFR, efr | UART_EFR_SCD); - serial_out(up, UART_LCR, UART_LCR_DLAB); + serial_out(up, UART_LCR, UART_LCR_CONF_MODE_A); serial_out(up, UART_MCR, up->mcr & ~UART_MCR_TCRTLR); serial_out(up, UART_LCR, up->lcr); @@ -724,22 +724,22 @@ serial_omap_set_termios(struct uart_port *port, struct ktermios *termios, * baud clock is not running * DLL_REG and DLH_REG set to 0. */ - serial_out(up, UART_LCR, UART_LCR_DLAB); + serial_out(up, UART_LCR, UART_LCR_CONF_MODE_A); serial_out(up, UART_DLL, 0); serial_out(up, UART_DLM, 0); serial_out(up, UART_LCR, 0); - serial_out(up, UART_LCR, OMAP_UART_LCR_CONF_MDB); + serial_out(up, UART_LCR, UART_LCR_CONF_MODE_B); up->efr = serial_in(up, UART_EFR); serial_out(up, UART_EFR, up->efr | UART_EFR_ECB); - serial_out(up, UART_LCR, UART_LCR_DLAB); + serial_out(up, UART_LCR, UART_LCR_CONF_MODE_A); up->mcr = serial_in(up, UART_MCR); serial_out(up, UART_MCR, up->mcr | UART_MCR_TCRTLR); /* FIFO ENABLE, DMA MODE */ serial_out(up, UART_FCR, up->fcr); - serial_out(up, UART_LCR, OMAP_UART_LCR_CONF_MDB); + serial_out(up, UART_LCR, UART_LCR_CONF_MODE_B); if (up->use_dma) { serial_out(up, UART_TI752_TLR, 0); @@ -748,27 +748,27 @@ serial_omap_set_termios(struct uart_port *port, struct ktermios *termios, } serial_out(up, UART_EFR, up->efr); - serial_out(up, UART_LCR, UART_LCR_DLAB); + serial_out(up, UART_LCR, UART_LCR_CONF_MODE_A); serial_out(up, UART_MCR, up->mcr); /* Protocol, Baud Rate, and Interrupt Settings */ serial_out(up, UART_OMAP_MDR1, UART_OMAP_MDR1_DISABLE); - serial_out(up, UART_LCR, OMAP_UART_LCR_CONF_MDB); + serial_out(up, UART_LCR, UART_LCR_CONF_MODE_B); up->efr = serial_in(up, UART_EFR); serial_out(up, UART_EFR, up->efr | UART_EFR_ECB); serial_out(up, UART_LCR, 0); serial_out(up, UART_IER, 0); - serial_out(up, UART_LCR, OMAP_UART_LCR_CONF_MDB); + serial_out(up, UART_LCR, UART_LCR_CONF_MODE_B); serial_out(up, UART_DLL, quot & 0xff); /* LS of divisor */ serial_out(up, UART_DLM, quot >> 8); /* MS of divisor */ serial_out(up, UART_LCR, 0); serial_out(up, UART_IER, up->ier); - serial_out(up, UART_LCR, OMAP_UART_LCR_CONF_MDB); + serial_out(up, UART_LCR, UART_LCR_CONF_MODE_B); serial_out(up, UART_EFR, up->efr); serial_out(up, UART_LCR, cval); @@ -782,18 +782,18 @@ serial_omap_set_termios(struct uart_port *port, struct ktermios *termios, if (termios->c_cflag & CRTSCTS) { efr |= (UART_EFR_CTS | UART_EFR_RTS); - serial_out(up, UART_LCR, UART_LCR_DLAB); + serial_out(up, UART_LCR, UART_LCR_CONF_MODE_A); up->mcr = serial_in(up, UART_MCR); serial_out(up, UART_MCR, up->mcr | UART_MCR_TCRTLR); - serial_out(up, UART_LCR, OMAP_UART_LCR_CONF_MDB); + serial_out(up, UART_LCR, UART_LCR_CONF_MODE_B); up->efr = serial_in(up, UART_EFR); serial_out(up, UART_EFR, up->efr | UART_EFR_ECB); serial_out(up, UART_TI752_TCR, OMAP_UART_TCR_TRIG); serial_out(up, UART_EFR, efr); /* Enable AUTORTS and AUTOCTS */ - serial_out(up, UART_LCR, UART_LCR_DLAB); + serial_out(up, UART_LCR, UART_LCR_CONF_MODE_A); serial_out(up, UART_MCR, up->mcr | UART_MCR_RTS); serial_out(up, UART_LCR, cval); } @@ -815,13 +815,13 @@ serial_omap_pm(struct uart_port *port, unsigned int state, unsigned char efr; dev_dbg(up->port.dev, "serial_omap_pm+%d\n", up->pdev->id); - serial_out(up, UART_LCR, OMAP_UART_LCR_CONF_MDB); + serial_out(up, UART_LCR, UART_LCR_CONF_MODE_B); efr = serial_in(up, UART_EFR); serial_out(up, UART_EFR, efr | UART_EFR_ECB); serial_out(up, UART_LCR, 0); serial_out(up, UART_IER, (state != 0) ? UART_IERX_SLEEP : 0); - serial_out(up, UART_LCR, OMAP_UART_LCR_CONF_MDB); + serial_out(up, UART_LCR, UART_LCR_CONF_MODE_B); serial_out(up, UART_EFR, efr); serial_out(up, UART_LCR, 0); /* Enable module level wake up */ diff --git a/include/linux/serial_reg.h b/include/linux/serial_reg.h index 6f3823474e6c..3ecb71a9e505 100644 --- a/include/linux/serial_reg.h +++ b/include/linux/serial_reg.h @@ -99,6 +99,13 @@ #define UART_LCR_WLEN7 0x02 /* Wordlength: 7 bits */ #define UART_LCR_WLEN8 0x03 /* Wordlength: 8 bits */ +/* + * Access to some registers depends on register access / configuration + * mode. + */ +#define UART_LCR_CONF_MODE_A UART_LCR_DLAB /* Configutation mode A */ +#define UART_LCR_CONF_MODE_B 0xBF /* Configutation mode B */ + #define UART_MCR 4 /* Out: Modem Control Register */ #define UART_MCR_CLKSEL 0x80 /* Divide clock by 4 (TI16C752, EFR[4]=1) */ #define UART_MCR_TCRTLR 0x40 /* Access TCR/TLR (TI16C752, EFR[4]=1) */ -- cgit v1.2.3-71-gd317 From ad9c2b048b605fbc8d50526e330b88abdd631ab2 Mon Sep 17 00:00:00 2001 From: Tetsuo Handa Date: Tue, 30 Nov 2010 11:06:47 +0900 Subject: security: Fix comment of security_key_permission Comment for return value of security_key_permission() has been wrong since it was added in 2.6.15. Signed-off-by: Tetsuo Handa Signed-off-by: James Morris --- include/linux/security.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/security.h b/include/linux/security.h index fd4d55fb8845..e7d89b0c1fd8 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -1058,8 +1058,7 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts) * @cred points to the credentials to provide the context against which to * evaluate the security data on the key. * @perm describes the combination of permissions required of this key. - * Return 1 if permission granted, 0 if permission denied and -ve it the - * normal permissions model should be effected. + * Return 0 if permission is granted, -ve error otherwise. * @key_getsecurity: * Get a textual representation of the security context attached to a key * for the purposes of honouring KEYCTL_GETSECURITY. This function -- cgit v1.2.3-71-gd317 From c41ab6a1b9028de33e74101cb0aae13098a56fdb Mon Sep 17 00:00:00 2001 From: Eric Paris Date: Mon, 29 Nov 2010 15:47:09 -0500 Subject: flex_array: fix flex_array_put_ptr macro to be valid C MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Using flex_array_put_ptr() results in a compile error "error: lvalue required as unary ‘&’ operand" fix the casting order to fix this. Signed-off-by: Eric Paris --- include/linux/flex_array.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/flex_array.h b/include/linux/flex_array.h index 631b77f2ac70..70e4efabe0fb 100644 --- a/include/linux/flex_array.h +++ b/include/linux/flex_array.h @@ -71,7 +71,7 @@ void *flex_array_get(struct flex_array *fa, unsigned int element_nr); int flex_array_shrink(struct flex_array *fa); #define flex_array_put_ptr(fa, nr, src, gfp) \ - flex_array_put(fa, nr, &(void *)(src), gfp) + flex_array_put(fa, nr, (void *)&(src), gfp) void *flex_array_get_ptr(struct flex_array *fa, unsigned int element_nr); -- cgit v1.2.3-71-gd317 From 7fc56f0d9908fe140a01387d59954e3d0a2e7744 Mon Sep 17 00:00:00 2001 From: Luo Andy Date: Tue, 23 Nov 2010 10:41:21 +0800 Subject: usb: gadget: langwell_udc: add usb test mode support This patch adds test mode support for Langwell gadget driver. Signed-off-by: Henry Yuan Signed-off-by: Andy Luo Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/langwell_udc.c | 23 +++++++++++++++++++++++ include/linux/usb/ch9.h | 10 ++++++++++ 2 files changed, 33 insertions(+) (limited to 'include/linux') diff --git a/drivers/usb/gadget/langwell_udc.c b/drivers/usb/gadget/langwell_udc.c index b8ec954c0692..777972454e3e 100644 --- a/drivers/usb/gadget/langwell_udc.c +++ b/drivers/usb/gadget/langwell_udc.c @@ -2225,6 +2225,7 @@ static void handle_setup_packet(struct langwell_udc *dev, u16 wValue = le16_to_cpu(setup->wValue); u16 wIndex = le16_to_cpu(setup->wIndex); u16 wLength = le16_to_cpu(setup->wLength); + u32 portsc1; dev_vdbg(&dev->pdev->dev, "---> %s()\n", __func__); @@ -2313,6 +2314,28 @@ static void handle_setup_packet(struct langwell_udc *dev, dev->dev_status &= ~(1 << wValue); } break; + case USB_DEVICE_TEST_MODE: + dev_dbg(&dev->pdev->dev, "SETUP: TEST MODE\n"); + if ((wIndex & 0xff) || + (dev->gadget.speed != USB_SPEED_HIGH)) + ep0_stall(dev); + + switch (wIndex >> 8) { + case TEST_J: + case TEST_K: + case TEST_SE0_NAK: + case TEST_PACKET: + case TEST_FORCE_EN: + if (prime_status_phase(dev, EP_DIR_IN)) + ep0_stall(dev); + portsc1 = readl(&dev->op_regs->portsc1); + portsc1 |= (wIndex & 0xf00) << 8; + writel(portsc1, &dev->op_regs->portsc1); + goto end; + default: + rc = -EOPNOTSUPP; + } + break; default: rc = -EOPNOTSUPP; break; diff --git a/include/linux/usb/ch9.h b/include/linux/usb/ch9.h index f917bbbc8901..ab461948b579 100644 --- a/include/linux/usb/ch9.h +++ b/include/linux/usb/ch9.h @@ -123,6 +123,16 @@ #define USB_DEVICE_A_ALT_HNP_SUPPORT 5 /* (otg) other RH port does */ #define USB_DEVICE_DEBUG_MODE 6 /* (special devices only) */ +/* + * Test Mode Selectors + * See USB 2.0 spec Table 9-7 + */ +#define TEST_J 1 +#define TEST_K 2 +#define TEST_SE0_NAK 3 +#define TEST_PACKET 4 +#define TEST_FORCE_EN 5 + /* * New Feature Selectors as added by USB 3.0 * See USB 3.0 spec Table 9-6 -- cgit v1.2.3-71-gd317 From b029ffafe89cf4b97cf39e0225a5205cbbf9e02f Mon Sep 17 00:00:00 2001 From: Hemanth V Date: Tue, 30 Nov 2010 23:03:54 -0800 Subject: Input: add CMA3000 accelerometer driver Add support for CMA3000 Tri-axis accelerometer, which supports Motion detect, Measurement and Free fall modes. CMA3000 supports both I2C/SPI bus for communication, currently the driver supports I2C based communication. Signed-off-by: Hemanth V Reviewed-by: Jonathan Cameron Reviewed-by: Sergio Aguirre Reviewed-by: Shubhrajyoti Signed-off-by: Dmitry Torokhov --- Documentation/input/cma3000_d0x.txt | 115 ++++++++++ drivers/input/misc/Kconfig | 24 +++ drivers/input/misc/Makefile | 2 + drivers/input/misc/cma3000_d0x.c | 398 +++++++++++++++++++++++++++++++++++ drivers/input/misc/cma3000_d0x.h | 42 ++++ drivers/input/misc/cma3000_d0x_i2c.c | 141 +++++++++++++ include/linux/input/cma3000.h | 59 ++++++ 7 files changed, 781 insertions(+) create mode 100644 Documentation/input/cma3000_d0x.txt create mode 100644 drivers/input/misc/cma3000_d0x.c create mode 100644 drivers/input/misc/cma3000_d0x.h create mode 100644 drivers/input/misc/cma3000_d0x_i2c.c create mode 100644 include/linux/input/cma3000.h (limited to 'include/linux') diff --git a/Documentation/input/cma3000_d0x.txt b/Documentation/input/cma3000_d0x.txt new file mode 100644 index 000000000000..29d088db4afd --- /dev/null +++ b/Documentation/input/cma3000_d0x.txt @@ -0,0 +1,115 @@ +Kernel driver for CMA3000-D0x +============================ + +Supported chips: +* VTI CMA3000-D0x +Datasheet: + CMA3000-D0X Product Family Specification 8281000A.02.pdf + + +Author: Hemanth V + + +Description +----------- +CMA3000 Tri-axis accelerometer supports Motion detect, Measurement and +Free fall modes. + +Motion Detect Mode: Its the low power mode where interrupts are generated only +when motion exceeds the defined thresholds. + +Measurement Mode: This mode is used to read the acceleration data on X,Y,Z +axis and supports 400, 100, 40 Hz sample frequency. + +Free fall Mode: This mode is intended to save system resources. + +Threshold values: Chip supports defining threshold values for above modes +which includes time and g value. Refer product specifications for more details. + +CMA3000 chip supports mutually exclusive I2C and SPI interfaces for +communication, currently the driver supports I2C based communication only. +Initial configuration for bus mode is set in non volatile memory and can later +be modified through bus interface command. + +Driver reports acceleration data through input subsystem. It generates ABS_MISC +event with value 1 when free fall is detected. + +Platform data need to be configured for initial default values. + +Platform Data +------------- +fuzz_x: Noise on X Axis + +fuzz_y: Noise on Y Axis + +fuzz_z: Noise on Z Axis + +g_range: G range in milli g i.e 2000 or 8000 + +mode: Default Operating mode + +mdthr: Motion detect g range threshold value + +mdfftmr: Motion detect and free fall time threshold value + +ffthr: Free fall g range threshold value + +Input Interface +-------------- +Input driver version is 1.0.0 +Input device ID: bus 0x18 vendor 0x0 product 0x0 version 0x0 +Input device name: "cma3000-accelerometer" +Supported events: + Event type 0 (Sync) + Event type 3 (Absolute) + Event code 0 (X) + Value 47 + Min -8000 + Max 8000 + Fuzz 200 + Event code 1 (Y) + Value -28 + Min -8000 + Max 8000 + Fuzz 200 + Event code 2 (Z) + Value 905 + Min -8000 + Max 8000 + Fuzz 200 + Event code 40 (Misc) + Value 0 + Min 0 + Max 1 + Event type 4 (Misc) + + +Register/Platform parameters Description +---------------------------------------- + +mode: + 0: power down mode + 1: 100 Hz Measurement mode + 2: 400 Hz Measurement mode + 3: 40 Hz Measurement mode + 4: Motion Detect mode (default) + 5: 100 Hz Free fall mode + 6: 40 Hz Free fall mode + 7: Power off mode + +grange: + 2000: 2000 mg or 2G Range + 8000: 8000 mg or 8G Range + +mdthr: + X: X * 71mg (8G Range) + X: X * 18mg (2G Range) + +mdfftmr: + X: (X & 0x70) * 100 ms (MDTMR) + (X & 0x0F) * 2.5 ms (FFTMR 400 Hz) + (X & 0x0F) * 10 ms (FFTMR 100 Hz) + +ffthr: + X: (X >> 2) * 18mg (2G Range) + X: (X & 0x0F) * 71 mg (8G Range) diff --git a/drivers/input/misc/Kconfig b/drivers/input/misc/Kconfig index b99b8cbde02f..f0d90172a65f 100644 --- a/drivers/input/misc/Kconfig +++ b/drivers/input/misc/Kconfig @@ -448,4 +448,28 @@ config INPUT_ADXL34X_SPI To compile this driver as a module, choose M here: the module will be called adxl34x-spi. +config INPUT_CMA3000 + tristate "VTI CMA3000 Tri-axis accelerometer" + help + Say Y here if you want to use VTI CMA3000_D0x Accelerometer + driver + + This driver currently only supports I2C interface to the + controller. Also select the I2C method. + + If unsure, say N + + To compile this driver as a module, choose M here: the + module will be called cma3000_d0x. + +config INPUT_CMA3000_I2C + tristate "Support I2C bus connection" + depends on INPUT_CMA3000 && I2C + help + Say Y here if you want to use VTI CMA3000_D0x Accelerometer + through I2C interface. + + To compile this driver as a module, choose M here: the + module will be called cma3000_d0x_i2c. + endif diff --git a/drivers/input/misc/Makefile b/drivers/input/misc/Makefile index 1fe1f6c8b737..35bcfe46555e 100644 --- a/drivers/input/misc/Makefile +++ b/drivers/input/misc/Makefile @@ -18,6 +18,8 @@ obj-$(CONFIG_INPUT_ATI_REMOTE2) += ati_remote2.o obj-$(CONFIG_INPUT_ATLAS_BTNS) += atlas_btns.o obj-$(CONFIG_INPUT_BFIN_ROTARY) += bfin_rotary.o obj-$(CONFIG_INPUT_CM109) += cm109.o +obj-$(CONFIG_INPUT_CMA3000) += cma3000_d0x.o +obj-$(CONFIG_INPUT_CMA3000_I2C) += cma3000_d0x_i2c.o obj-$(CONFIG_INPUT_COBALT_BTNS) += cobalt_btns.o obj-$(CONFIG_INPUT_DM355EVM) += dm355evm_keys.o obj-$(CONFIG_HP_SDC_RTC) += hp_sdc_rtc.o diff --git a/drivers/input/misc/cma3000_d0x.c b/drivers/input/misc/cma3000_d0x.c new file mode 100644 index 000000000000..1633b6342267 --- /dev/null +++ b/drivers/input/misc/cma3000_d0x.c @@ -0,0 +1,398 @@ +/* + * VTI CMA3000_D0x Accelerometer driver + * + * Copyright (C) 2010 Texas Instruments + * Author: Hemanth V + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published by + * the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program. If not, see . + */ + +#include +#include +#include +#include +#include +#include + +#include "cma3000_d0x.h" + +#define CMA3000_WHOAMI 0x00 +#define CMA3000_REVID 0x01 +#define CMA3000_CTRL 0x02 +#define CMA3000_STATUS 0x03 +#define CMA3000_RSTR 0x04 +#define CMA3000_INTSTATUS 0x05 +#define CMA3000_DOUTX 0x06 +#define CMA3000_DOUTY 0x07 +#define CMA3000_DOUTZ 0x08 +#define CMA3000_MDTHR 0x09 +#define CMA3000_MDFFTMR 0x0A +#define CMA3000_FFTHR 0x0B + +#define CMA3000_RANGE2G (1 << 7) +#define CMA3000_RANGE8G (0 << 7) +#define CMA3000_BUSI2C (0 << 4) +#define CMA3000_MODEMASK (7 << 1) +#define CMA3000_GRANGEMASK (1 << 7) + +#define CMA3000_STATUS_PERR 1 +#define CMA3000_INTSTATUS_FFDET (1 << 2) + +/* Settling time delay in ms */ +#define CMA3000_SETDELAY 30 + +/* Delay for clearing interrupt in us */ +#define CMA3000_INTDELAY 44 + + +/* + * Bit weights in mg for bit 0, other bits need + * multipy factor 2^n. Eight bit is the sign bit. + */ +#define BIT_TO_2G 18 +#define BIT_TO_8G 71 + +struct cma3000_accl_data { + const struct cma3000_bus_ops *bus_ops; + const struct cma3000_platform_data *pdata; + + struct device *dev; + struct input_dev *input_dev; + + int bit_to_mg; + int irq; + + int g_range; + u8 mode; + + struct mutex mutex; + bool opened; + bool suspended; +}; + +#define CMA3000_READ(data, reg, msg) \ + (data->bus_ops->read(data->dev, reg, msg)) +#define CMA3000_SET(data, reg, val, msg) \ + ((data)->bus_ops->write(data->dev, reg, val, msg)) + +/* + * Conversion for each of the eight modes to g, depending + * on G range i.e 2G or 8G. Some modes always operate in + * 8G. + */ + +static int mode_to_mg[8][2] = { + { 0, 0 }, + { BIT_TO_8G, BIT_TO_2G }, + { BIT_TO_8G, BIT_TO_2G }, + { BIT_TO_8G, BIT_TO_8G }, + { BIT_TO_8G, BIT_TO_8G }, + { BIT_TO_8G, BIT_TO_2G }, + { BIT_TO_8G, BIT_TO_2G }, + { 0, 0}, +}; + +static void decode_mg(struct cma3000_accl_data *data, int *datax, + int *datay, int *dataz) +{ + /* Data in 2's complement, convert to mg */ + *datax = ((s8)*datax) * data->bit_to_mg; + *datay = ((s8)*datay) * data->bit_to_mg; + *dataz = ((s8)*dataz) * data->bit_to_mg; +} + +static irqreturn_t cma3000_thread_irq(int irq, void *dev_id) +{ + struct cma3000_accl_data *data = dev_id; + int datax, datay, dataz; + u8 ctrl, mode, range, intr_status; + + intr_status = CMA3000_READ(data, CMA3000_INTSTATUS, "interrupt status"); + if (intr_status < 0) + return IRQ_NONE; + + /* Check if free fall is detected, report immediately */ + if (intr_status & CMA3000_INTSTATUS_FFDET) { + input_report_abs(data->input_dev, ABS_MISC, 1); + input_sync(data->input_dev); + } else { + input_report_abs(data->input_dev, ABS_MISC, 0); + } + + datax = CMA3000_READ(data, CMA3000_DOUTX, "X"); + datay = CMA3000_READ(data, CMA3000_DOUTY, "Y"); + dataz = CMA3000_READ(data, CMA3000_DOUTZ, "Z"); + + ctrl = CMA3000_READ(data, CMA3000_CTRL, "ctrl"); + mode = (ctrl & CMA3000_MODEMASK) >> 1; + range = (ctrl & CMA3000_GRANGEMASK) >> 7; + + data->bit_to_mg = mode_to_mg[mode][range]; + + /* Interrupt not for this device */ + if (data->bit_to_mg == 0) + return IRQ_NONE; + + /* Decode register values to milli g */ + decode_mg(data, &datax, &datay, &dataz); + + input_report_abs(data->input_dev, ABS_X, datax); + input_report_abs(data->input_dev, ABS_Y, datay); + input_report_abs(data->input_dev, ABS_Z, dataz); + input_sync(data->input_dev); + + return IRQ_HANDLED; +} + +static int cma3000_reset(struct cma3000_accl_data *data) +{ + int val; + + /* Reset sequence */ + CMA3000_SET(data, CMA3000_RSTR, 0x02, "Reset"); + CMA3000_SET(data, CMA3000_RSTR, 0x0A, "Reset"); + CMA3000_SET(data, CMA3000_RSTR, 0x04, "Reset"); + + /* Settling time delay */ + mdelay(10); + + val = CMA3000_READ(data, CMA3000_STATUS, "Status"); + if (val < 0) { + dev_err(data->dev, "Reset failed\n"); + return val; + } + + if (val & CMA3000_STATUS_PERR) { + dev_err(data->dev, "Parity Error\n"); + return -EIO; + } + + return 0; +} + +static int cma3000_poweron(struct cma3000_accl_data *data) +{ + const struct cma3000_platform_data *pdata = data->pdata; + u8 ctrl = 0; + int ret; + + if (data->g_range == CMARANGE_2G) { + ctrl = (data->mode << 1) | CMA3000_RANGE2G; + } else if (data->g_range == CMARANGE_8G) { + ctrl = (data->mode << 1) | CMA3000_RANGE8G; + } else { + dev_info(data->dev, + "Invalid G range specified, assuming 8G\n"); + ctrl = (data->mode << 1) | CMA3000_RANGE8G; + } + + ctrl |= data->bus_ops->ctrl_mod; + + CMA3000_SET(data, CMA3000_MDTHR, pdata->mdthr, + "Motion Detect Threshold"); + CMA3000_SET(data, CMA3000_MDFFTMR, pdata->mdfftmr, + "Time register"); + CMA3000_SET(data, CMA3000_FFTHR, pdata->ffthr, + "Free fall threshold"); + ret = CMA3000_SET(data, CMA3000_CTRL, ctrl, "Mode setting"); + if (ret < 0) + return -EIO; + + msleep(CMA3000_SETDELAY); + + return 0; +} + +static int cma3000_poweroff(struct cma3000_accl_data *data) +{ + int ret; + + ret = CMA3000_SET(data, CMA3000_CTRL, CMAMODE_POFF, "Mode setting"); + msleep(CMA3000_SETDELAY); + + return ret; +} + +static int cma3000_open(struct input_dev *input_dev) +{ + struct cma3000_accl_data *data = input_get_drvdata(input_dev); + + mutex_lock(&data->mutex); + + if (!data->suspended) + cma3000_poweron(data); + + data->opened = true; + + mutex_unlock(&data->mutex); + + return 0; +} + +static void cma3000_close(struct input_dev *input_dev) +{ + struct cma3000_accl_data *data = input_get_drvdata(input_dev); + + mutex_lock(&data->mutex); + + if (!data->suspended) + cma3000_poweroff(data); + + data->opened = false; + + mutex_unlock(&data->mutex); +} + +void cma3000_suspend(struct cma3000_accl_data *data) +{ + mutex_lock(&data->mutex); + + if (!data->suspended && data->opened) + cma3000_poweroff(data); + + data->suspended = true; + + mutex_unlock(&data->mutex); +} +EXPORT_SYMBOL(cma3000_suspend); + + +void cma3000_resume(struct cma3000_accl_data *data) +{ + mutex_lock(&data->mutex); + + if (data->suspended && data->opened) + cma3000_poweron(data); + + data->suspended = false; + + mutex_unlock(&data->mutex); +} +EXPORT_SYMBOL(cma3000_resume); + +struct cma3000_accl_data *cma3000_init(struct device *dev, int irq, + const struct cma3000_bus_ops *bops) +{ + const struct cma3000_platform_data *pdata = dev->platform_data; + struct cma3000_accl_data *data; + struct input_dev *input_dev; + int rev; + int error; + + if (!pdata) { + dev_err(dev, "platform data not found\n"); + error = -EINVAL; + goto err_out; + } + + + /* if no IRQ return error */ + if (irq == 0) { + error = -EINVAL; + goto err_out; + } + + data = kzalloc(sizeof(struct cma3000_accl_data), GFP_KERNEL); + input_dev = input_allocate_device(); + if (!data || !input_dev) { + error = -ENOMEM; + goto err_free_mem; + } + + data->dev = dev; + data->input_dev = input_dev; + data->bus_ops = bops; + data->pdata = pdata; + data->irq = irq; + mutex_init(&data->mutex); + + data->mode = pdata->mode; + if (data->mode < CMAMODE_DEFAULT || data->mode > CMAMODE_POFF) { + data->mode = CMAMODE_MOTDET; + dev_warn(dev, + "Invalid mode specified, assuming Motion Detect\n"); + } + + data->g_range = pdata->g_range; + if (data->g_range != CMARANGE_2G && data->g_range != CMARANGE_8G) { + dev_info(dev, + "Invalid G range specified, assuming 8G\n"); + data->g_range = CMARANGE_8G; + } + + input_dev->name = "cma3000-accelerometer"; + input_dev->id.bustype = bops->bustype; + input_dev->open = cma3000_open; + input_dev->close = cma3000_close; + + __set_bit(EV_ABS, input_dev->evbit); + + input_set_abs_params(input_dev, ABS_X, + -data->g_range, data->g_range, pdata->fuzz_x, 0); + input_set_abs_params(input_dev, ABS_Y, + -data->g_range, data->g_range, pdata->fuzz_y, 0); + input_set_abs_params(input_dev, ABS_Z, + -data->g_range, data->g_range, pdata->fuzz_z, 0); + input_set_abs_params(input_dev, ABS_MISC, 0, 1, 0, 0); + + input_set_drvdata(input_dev, data); + + error = cma3000_reset(data); + if (error) + goto err_free_mem; + + rev = CMA3000_READ(data, CMA3000_REVID, "Revid"); + if (rev < 0) { + error = rev; + goto err_free_mem; + } + + pr_info("CMA3000 Accelerometer: Revision %x\n", rev); + + error = request_threaded_irq(irq, NULL, cma3000_thread_irq, + pdata->irqflags | IRQF_ONESHOT, + "cma3000_d0x", data); + if (error) { + dev_err(dev, "request_threaded_irq failed\n"); + goto err_free_mem; + } + + error = input_register_device(data->input_dev); + if (error) { + dev_err(dev, "Unable to register input device\n"); + goto err_free_irq; + } + + return data; + +err_free_irq: + free_irq(irq, data); +err_free_mem: + input_free_device(input_dev); + kfree(data); +err_out: + return ERR_PTR(error); +} +EXPORT_SYMBOL(cma3000_init); + +void cma3000_exit(struct cma3000_accl_data *data) +{ + free_irq(data->irq, data); + input_unregister_device(data->input_dev); + kfree(data); +} +EXPORT_SYMBOL(cma3000_exit); + +MODULE_DESCRIPTION("CMA3000-D0x Accelerometer Driver"); +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Hemanth V "); diff --git a/drivers/input/misc/cma3000_d0x.h b/drivers/input/misc/cma3000_d0x.h new file mode 100644 index 000000000000..2304ce306e1c --- /dev/null +++ b/drivers/input/misc/cma3000_d0x.h @@ -0,0 +1,42 @@ +/* + * VTI CMA3000_D0x Accelerometer driver + * + * Copyright (C) 2010 Texas Instruments + * Author: Hemanth V + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published by + * the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program. If not, see . + */ + +#ifndef _INPUT_CMA3000_H +#define _INPUT_CMA3000_H + +#include +#include + +struct device; +struct cma3000_accl_data; + +struct cma3000_bus_ops { + u16 bustype; + u8 ctrl_mod; + int (*read)(struct device *, u8, char *); + int (*write)(struct device *, u8, u8, char *); +}; + +struct cma3000_accl_data *cma3000_init(struct device *dev, int irq, + const struct cma3000_bus_ops *bops); +void cma3000_exit(struct cma3000_accl_data *); +void cma3000_suspend(struct cma3000_accl_data *); +void cma3000_resume(struct cma3000_accl_data *); + +#endif diff --git a/drivers/input/misc/cma3000_d0x_i2c.c b/drivers/input/misc/cma3000_d0x_i2c.c new file mode 100644 index 000000000000..c52d278a7942 --- /dev/null +++ b/drivers/input/misc/cma3000_d0x_i2c.c @@ -0,0 +1,141 @@ +/* + * Implements I2C interface for VTI CMA300_D0x Accelerometer driver + * + * Copyright (C) 2010 Texas Instruments + * Author: Hemanth V + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published by + * the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program. If not, see . + */ + +#include +#include +#include +#include "cma3000_d0x.h" + +static int cma3000_i2c_set(struct device *dev, + u8 reg, u8 val, char *msg) +{ + struct i2c_client *client = to_i2c_client(dev); + int ret; + + ret = i2c_smbus_write_byte_data(client, reg, val); + if (ret < 0) + dev_err(&client->dev, + "%s failed (%s, %d)\n", __func__, msg, ret); + return ret; +} + +static int cma3000_i2c_read(struct device *dev, u8 reg, char *msg) +{ + struct i2c_client *client = to_i2c_client(dev); + int ret; + + ret = i2c_smbus_read_byte_data(client, reg); + if (ret < 0) + dev_err(&client->dev, + "%s failed (%s, %d)\n", __func__, msg, ret); + return ret; +} + +static const struct cma3000_bus_ops cma3000_i2c_bops = { + .bustype = BUS_I2C, +#define CMA3000_BUSI2C (0 << 4) + .ctrl_mod = CMA3000_BUSI2C, + .read = cma3000_i2c_read, + .write = cma3000_i2c_set, +}; + +static int __devinit cma3000_i2c_probe(struct i2c_client *client, + const struct i2c_device_id *id) +{ + struct cma3000_accl_data *data; + + data = cma3000_init(&client->dev, client->irq, &cma3000_i2c_bops); + if (IS_ERR(data)) + return PTR_ERR(data); + + i2c_set_clientdata(client, data); + + return 0; +} + +static int __devexit cma3000_i2c_remove(struct i2c_client *client) +{ + struct cma3000_accl_data *data = i2c_get_clientdata(client); + + cma3000_exit(data); + + return 0; +} + +#ifdef CONFIG_PM +static int cma3000_i2c_suspend(struct device *dev) +{ + struct i2c_client *client = to_i2c_client(dev); + struct cma3000_accl_data *data = i2c_get_clientdata(client); + + cma3000_suspend(data); + + return 0; +} + +static int cma3000_i2c_resume(struct device *dev) +{ + struct i2c_client *client = to_i2c_client(dev); + struct cma3000_accl_data *data = i2c_get_clientdata(client); + + cma3000_resume(data); + + return 0; +} + +static const struct dev_pm_ops cma3000_i2c_pm_ops = { + .suspend = cma3000_i2c_suspend, + .resume = cma3000_i2c_resume, +}; +#endif + +static const struct i2c_device_id cma3000_i2c_id[] = { + { "cma3000_d01", 0 }, + { }, +}; + +static struct i2c_driver cma3000_i2c_driver = { + .probe = cma3000_i2c_probe, + .remove = __devexit_p(cma3000_i2c_remove), + .id_table = cma3000_i2c_id, + .driver = { + .name = "cma3000_i2c_accl", + .owner = THIS_MODULE, +#ifdef CONFIG_PM + .pm = &cma3000_i2c_pm_ops, +#endif + }, +}; + +static int __init cma3000_i2c_init(void) +{ + return i2c_add_driver(&cma3000_i2c_driver); +} + +static void __exit cma3000_i2c_exit(void) +{ + i2c_del_driver(&cma3000_i2c_driver); +} + +module_init(cma3000_i2c_init); +module_exit(cma3000_i2c_exit); + +MODULE_DESCRIPTION("CMA3000-D0x Accelerometer I2C Driver"); +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Hemanth V "); diff --git a/include/linux/input/cma3000.h b/include/linux/input/cma3000.h new file mode 100644 index 000000000000..cbbaac27d311 --- /dev/null +++ b/include/linux/input/cma3000.h @@ -0,0 +1,59 @@ +/* + * VTI CMA3000_Dxx Accelerometer driver + * + * Copyright (C) 2010 Texas Instruments + * Author: Hemanth V + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published by + * the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program. If not, see . + */ + +#ifndef _LINUX_CMA3000_H +#define _LINUX_CMA3000_H + +#define CMAMODE_DEFAULT 0 +#define CMAMODE_MEAS100 1 +#define CMAMODE_MEAS400 2 +#define CMAMODE_MEAS40 3 +#define CMAMODE_MOTDET 4 +#define CMAMODE_FF100 5 +#define CMAMODE_FF400 6 +#define CMAMODE_POFF 7 + +#define CMARANGE_2G 2000 +#define CMARANGE_8G 8000 + +/** + * struct cma3000_i2c_platform_data - CMA3000 Platform data + * @fuzz_x: Noise on X Axis + * @fuzz_y: Noise on Y Axis + * @fuzz_z: Noise on Z Axis + * @g_range: G range in milli g i.e 2000 or 8000 + * @mode: Operating mode + * @mdthr: Motion detect threshold value + * @mdfftmr: Motion detect and free fall time value + * @ffthr: Free fall threshold value + */ + +struct cma3000_platform_data { + int fuzz_x; + int fuzz_y; + int fuzz_z; + int g_range; + uint8_t mode; + uint8_t mdthr; + uint8_t mdfftmr; + uint8_t ffthr; + unsigned long irqflags; +}; + +#endif -- cgit v1.2.3-71-gd317 From 8348c259dd6a6019a8fa01b0a3443409480f7b9d Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Mon, 22 Nov 2010 17:12:15 -0800 Subject: arm/pxa2xx: reorgazine SSP and SPI header files The PXA-SPI driver relies on some files / defines which are arm specific and are within the ARM tree. The CE4100 SoC which is x86 has also the SPI core. This patch moves the ssp and spi files from arm/mach-pxa and plat-pxa to include/linux where the CE4100 can access them. This move got verified by building the following defconfigs: cm_x2xx_defconfig corgi_defconfig em_x270_defconfig ezx_defconfig imote2_defconfig pxa3xx_defconfig spitz_defconfig zeus_defconfig raumfeld_defconfig magician_defconfig Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Dirk Brandewie --- Documentation/spi/pxa2xx | 4 +- arch/arm/mach-pxa/cm-x255.c | 2 +- arch/arm/mach-pxa/cm-x270.c | 2 +- arch/arm/mach-pxa/corgi.c | 2 +- arch/arm/mach-pxa/devices.c | 2 +- arch/arm/mach-pxa/em-x270.c | 2 +- arch/arm/mach-pxa/hx4700.c | 2 +- arch/arm/mach-pxa/icontrol.c | 2 +- arch/arm/mach-pxa/include/mach/pxa2xx_spi.h | 47 ------- arch/arm/mach-pxa/littleton.c | 2 +- arch/arm/mach-pxa/lubbock.c | 2 +- arch/arm/mach-pxa/pcm027.c | 2 +- arch/arm/mach-pxa/poodle.c | 2 +- arch/arm/mach-pxa/spitz.c | 3 +- arch/arm/mach-pxa/stargate2.c | 2 +- arch/arm/mach-pxa/tosa.c | 2 +- arch/arm/mach-pxa/trizeps4.c | 1 - arch/arm/mach-pxa/z2.c | 2 +- arch/arm/mach-pxa/zeus.c | 2 +- arch/arm/plat-pxa/include/plat/ssp.h | 187 ---------------------------- arch/arm/plat-pxa/ssp.c | 2 +- drivers/spi/pxa2xx_spi.c | 4 +- include/linux/pxa2xx_ssp.h | 187 ++++++++++++++++++++++++++++ include/linux/spi/pxa2xx_spi.h | 49 ++++++++ sound/soc/pxa/pxa-ssp.c | 2 +- 25 files changed, 257 insertions(+), 259 deletions(-) delete mode 100644 arch/arm/mach-pxa/include/mach/pxa2xx_spi.h delete mode 100644 arch/arm/plat-pxa/include/plat/ssp.h create mode 100644 include/linux/pxa2xx_ssp.h create mode 100644 include/linux/spi/pxa2xx_spi.h (limited to 'include/linux') diff --git a/Documentation/spi/pxa2xx b/Documentation/spi/pxa2xx index 6bb916d57c95..68a4fe3818a1 100644 --- a/Documentation/spi/pxa2xx +++ b/Documentation/spi/pxa2xx @@ -19,7 +19,7 @@ Declaring PXA2xx Master Controllers ----------------------------------- Typically a SPI master is defined in the arch/.../mach-*/board-*.c as a "platform device". The master configuration is passed to the driver via a table -found in arch/arm/mach-pxa/include/mach/pxa2xx_spi.h: +found in include/linux/spi/pxa2xx_spi.h: struct pxa2xx_spi_master { enum pxa_ssp_type ssp_type; @@ -94,7 +94,7 @@ using the "spi_board_info" structure found in "linux/spi/spi.h". See Each slave device attached to the PXA must provide slave specific configuration information via the structure "pxa2xx_spi_chip" found in -"arch/arm/mach-pxa/include/mach/pxa2xx_spi.h". The pxa2xx_spi master controller driver +"include/linux/spi/pxa2xx_spi.h". The pxa2xx_spi master controller driver will uses the configuration whenever the driver communicates with the slave device. All fields are optional. diff --git a/arch/arm/mach-pxa/cm-x255.c b/arch/arm/mach-pxa/cm-x255.c index f1a7703d771b..93f59f877fc6 100644 --- a/arch/arm/mach-pxa/cm-x255.c +++ b/arch/arm/mach-pxa/cm-x255.c @@ -17,13 +17,13 @@ #include #include +#include #include #include #include #include -#include #include "generic.h" diff --git a/arch/arm/mach-pxa/cm-x270.c b/arch/arm/mach-pxa/cm-x270.c index a9926bb75922..b88d601a8090 100644 --- a/arch/arm/mach-pxa/cm-x270.c +++ b/arch/arm/mach-pxa/cm-x270.c @@ -19,12 +19,12 @@ #include