From d0959024d8fb6555ba8bfdc6624cc7b7c2e675fd Mon Sep 17 00:00:00 2001
From: Richard Kennedy <richard@rsk.demon.co.uk>
Date: Wed, 20 Oct 2010 15:57:30 -0700
Subject: timer_list: Remove alignment padding on 64 bit when
 CONFIG_TIMER_STATS

Reorder struct timer_list to remove 8 bytes of alignment padding on 64
bit builds when CONFIG_TIMER_STATS is selected.

timer_list is widely used across the kernel so many structures will
benefit and shrink in size.

For example, with my config on x86_64
	per_cpu_dm_data shrinks from 136 to 128 bytes
and
	ahci_port_priv shrinks from 1032 to 968 bytes.

Signed-off-by: Richard Kennedy <richard@rsk.demon.co.uk>
Cc: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/timer.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/timer.h b/include/linux/timer.h
index 38cf093ef62c..f3dccdb44f95 100644
--- a/include/linux/timer.h
+++ b/include/linux/timer.h
@@ -24,9 +24,9 @@ struct timer_list {
 	int slack;
 
 #ifdef CONFIG_TIMER_STATS
+	int start_pid;
 	void *start_site;
 	char start_comm[16];
-	int start_pid;
 #endif
 #ifdef CONFIG_LOCKDEP
 	struct lockdep_map lockdep_map;
-- 
cgit v1.2.3-71-gd317


From aaabe31c25a439b92cc281b14ca18b85bae7e7a6 Mon Sep 17 00:00:00 2001
From: Changli Gao <xiaosuo@gmail.com>
Date: Wed, 20 Oct 2010 15:57:30 -0700
Subject: timer: Initialize the field slack of timer_list

TIMER_INITIALIZER() should initialize the field slack of timer_list as
__init_timer() does.

Signed-off-by: Changli Gao <xiaosuo@gmail.com>
Cc: Arjan van de Ven <arjan@infradead.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/timer.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/timer.h b/include/linux/timer.h
index f3dccdb44f95..1794674c1a52 100644
--- a/include/linux/timer.h
+++ b/include/linux/timer.h
@@ -54,6 +54,7 @@ extern struct tvec_base boot_tvec_bases;
 		.expires = (_expires),				\
 		.data = (_data),				\
 		.base = &boot_tvec_bases,			\
+		.slack = -1,					\
 		__TIMER_LOCKDEP_MAP_INITIALIZER(		\
 			__FILE__ ":" __stringify(__LINE__))	\
 	}
-- 
cgit v1.2.3-71-gd317


From dd6414b50fa2b1cd247a8aa8f8bd42414b7453e1 Mon Sep 17 00:00:00 2001
From: Phil Carmody <ext-phil.2.carmody@nokia.com>
Date: Wed, 20 Oct 2010 15:57:33 -0700
Subject: timer: Permit statically-declared work with deferrable timers

Currently, you have to just define a delayed_work uninitialised, and then
initialise it before first use.  That's a tad clumsy.  At risk of playing
mind-games with the compiler, fooling it into doing pointer arithmetic
with compile-time-constants, this lets clients properly initialise delayed
work with deferrable timers statically.

This patch was inspired by the issues which lead Artem Bityutskiy to
commit 8eab945c5616fc984 ("sunrpc: make the cache cleaner workqueue
deferrable").

Signed-off-by: Phil Carmody <ext-phil.2.carmody@nokia.com>
Acked-by: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
Cc: Arjan van de Ven <arjan@infradead.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/timer.h     | 25 +++++++++++++++++++++++++
 include/linux/workqueue.h |  8 ++++++++
 kernel/timer.c            | 15 +--------------
 3 files changed, 34 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/timer.h b/include/linux/timer.h
index 1794674c1a52..cbfb7a355d30 100644
--- a/include/linux/timer.h
+++ b/include/linux/timer.h
@@ -48,6 +48,18 @@ extern struct tvec_base boot_tvec_bases;
 #define __TIMER_LOCKDEP_MAP_INITIALIZER(_kn)
 #endif
 
+/*
+ * Note that all tvec_bases are 2 byte aligned and lower bit of
+ * base in timer_list is guaranteed to be zero. Use the LSB to
+ * indicate whether the timer is deferrable.
+ *
+ * A deferrable timer will work normally when the system is busy, but
+ * will not cause a CPU to come out of idle just to service it; instead,
+ * the timer will be serviced when the CPU eventually wakes up with a
+ * subsequent non-deferrable timer.
+ */
+#define TBASE_DEFERRABLE_FLAG		(0x1)
+
 #define TIMER_INITIALIZER(_function, _expires, _data) {		\
 		.entry = { .prev = TIMER_ENTRY_STATIC },	\
 		.function = (_function),			\
@@ -59,6 +71,19 @@ extern struct tvec_base boot_tvec_bases;
 			__FILE__ ":" __stringify(__LINE__))	\
 	}
 
+#define TBASE_MAKE_DEFERRED(ptr) ((struct tvec_base *)		\
+		  ((unsigned char *)(ptr) + TBASE_DEFERRABLE_FLAG))
+
+#define TIMER_DEFERRED_INITIALIZER(_function, _expires, _data) {\
+		.entry = { .prev = TIMER_ENTRY_STATIC },	\
+		.function = (_function),			\
+		.expires = (_expires),				\
+		.data = (_data),				\
+		.base = TBASE_MAKE_DEFERRED(&boot_tvec_bases),	\
+		__TIMER_LOCKDEP_MAP_INITIALIZER(		\
+			__FILE__ ":" __stringify(__LINE__))	\
+	}
+
 #define DEFINE_TIMER(_name, _function, _expires, _data)		\
 	struct timer_list _name =				\
 		TIMER_INITIALIZER(_function, _expires, _data)
diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index f11100f96482..88238c15ec3e 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -127,12 +127,20 @@ struct execute_work {
 	.timer = TIMER_INITIALIZER(NULL, 0, 0),			\
 	}
 
+#define __DEFERRED_WORK_INITIALIZER(n, f) {			\
+	.work = __WORK_INITIALIZER((n).work, (f)),		\
+	.timer = TIMER_DEFERRED_INITIALIZER(NULL, 0, 0),	\
+	}
+
 #define DECLARE_WORK(n, f)					\
 	struct work_struct n = __WORK_INITIALIZER(n, f)
 
 #define DECLARE_DELAYED_WORK(n, f)				\
 	struct delayed_work n = __DELAYED_WORK_INITIALIZER(n, f)
 
+#define DECLARE_DEFERRED_WORK(n, f)				\
+	struct delayed_work n = __DEFERRED_WORK_INITIALIZER(n, f)
+
 /*
  * initialize a work item's function pointer
  */
diff --git a/kernel/timer.c b/kernel/timer.c
index 97bf05baade7..72853b256ff2 100644
--- a/kernel/timer.c
+++ b/kernel/timer.c
@@ -88,18 +88,6 @@ struct tvec_base boot_tvec_bases;
 EXPORT_SYMBOL(boot_tvec_bases);
 static DEFINE_PER_CPU(struct tvec_base *, tvec_bases) = &boot_tvec_bases;
 
-/*
- * Note that all tvec_bases are 2 byte aligned and lower bit of
- * base in timer_list is guaranteed to be zero. Use the LSB to
- * indicate whether the timer is deferrable.
- *
- * A deferrable timer will work normally when the system is busy, but
- * will not cause a CPU to come out of idle just to service it; instead,
- * the timer will be serviced when the CPU eventually wakes up with a
- * subsequent non-deferrable timer.
- */
-#define TBASE_DEFERRABLE_FLAG		(0x1)
-
 /* Functions below help us manage 'deferrable' flag */
 static inline unsigned int tbase_get_deferrable(struct tvec_base *base)
 {
@@ -113,8 +101,7 @@ static inline struct tvec_base *tbase_get_base(struct tvec_base *base)
 
 static inline void timer_set_deferrable(struct timer_list *timer)
 {
-	timer->base = ((struct tvec_base *)((unsigned long)(timer->base) |
-				       TBASE_DEFERRABLE_FLAG));
+	timer->base = TBASE_MAKE_DEFERRED(timer->base);
 }
 
 static inline void
-- 
cgit v1.2.3-71-gd317


From 6f1bc451e6a79470b122a37ee1fc6bbca450f444 Mon Sep 17 00:00:00 2001
From: Yong Zhang <yong.zhang@windriver.com>
Date: Wed, 20 Oct 2010 15:57:31 -0700
Subject: timer: Make try_to_del_timer_sync() the same on SMP and UP

On UP try_to_del_timer_sync() is mapped to del_timer() which does not
take the running timer callback into account, so it has different
semantics.

Remove the SMP dependency of try_to_del_timer_sync() by using
base->running_timer in the UP case as well.

[ tglx: Removed set_running_timer() inline and tweaked the changelog ]

Signed-off-by: Yong Zhang <yong.zhang0@gmail.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Peter Zijlstra <peterz@infradead.org>
Acked-by: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/timer.h |  4 ++--
 kernel/timer.c        | 17 +++--------------
 2 files changed, 5 insertions(+), 16 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/timer.h b/include/linux/timer.h
index cbfb7a355d30..6abd9138beda 100644
--- a/include/linux/timer.h
+++ b/include/linux/timer.h
@@ -274,11 +274,11 @@ static inline void timer_stats_timer_clear_start_info(struct timer_list *timer)
 
 extern void add_timer(struct timer_list *timer);
 
+extern int try_to_del_timer_sync(struct timer_list *timer);
+
 #ifdef CONFIG_SMP
-  extern int try_to_del_timer_sync(struct timer_list *timer);
   extern int del_timer_sync(struct timer_list *timer);
 #else
-# define try_to_del_timer_sync(t)	del_timer(t)
 # define del_timer_sync(t)		del_timer(t)
 #endif
 
diff --git a/kernel/timer.c b/kernel/timer.c
index 72853b256ff2..47b86c1e3226 100644
--- a/kernel/timer.c
+++ b/kernel/timer.c
@@ -330,15 +330,6 @@ void set_timer_slack(struct timer_list *timer, int slack_hz)
 }
 EXPORT_SYMBOL_GPL(set_timer_slack);
 
-
-static inline void set_running_timer(struct tvec_base *base,
-					struct timer_list *timer)
-{
-#ifdef CONFIG_SMP
-	base->running_timer = timer;
-#endif
-}
-
 static void internal_add_timer(struct tvec_base *base, struct timer_list *timer)
 {
 	unsigned long expires = timer->expires;
@@ -923,15 +914,12 @@ int del_timer(struct timer_list *timer)
 }
 EXPORT_SYMBOL(del_timer);
 
-#ifdef CONFIG_SMP
 /**
  * try_to_del_timer_sync - Try to deactivate a timer
  * @timer: timer do del
  *
  * This function tries to deactivate a timer. Upon successful (ret >= 0)
  * exit the timer is not queued and the handler is not running on any CPU.
- *
- * It must not be called from interrupt contexts.
  */
 int try_to_del_timer_sync(struct timer_list *timer)
 {
@@ -960,6 +948,7 @@ out:
 }
 EXPORT_SYMBOL(try_to_del_timer_sync);
 
+#ifdef CONFIG_SMP
 /**
  * del_timer_sync - deactivate a timer and wait for the handler to finish.
  * @timer: the timer to be deactivated
@@ -1098,7 +1087,7 @@ static inline void __run_timers(struct tvec_base *base)
 
 			timer_stats_account_timer(timer);
 
-			set_running_timer(base, timer);
+			base->running_timer = timer;
 			detach_timer(timer, 1);
 
 			spin_unlock_irq(&base->lock);
@@ -1106,7 +1095,7 @@ static inline void __run_timers(struct tvec_base *base)
 			spin_lock_irq(&base->lock);
 		}
 	}
-	set_running_timer(base, NULL);
+	base->running_timer = NULL;
 	spin_unlock_irq(&base->lock);
 }
 
-- 
cgit v1.2.3-71-gd317


From fe7de49f9d4e53f24ec9ef762a503f70b562341c Mon Sep 17 00:00:00 2001
From: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Date: Wed, 20 Oct 2010 16:01:12 -0700
Subject: sched: Make sched_param argument static in sched_setscheduler()
 callers

Andrew Morton pointed out almost all sched_setscheduler() callers are
using fixed parameters and can be converted to static.  It reduces runtime
memory use a little.

Signed-off-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Reported-by: Andrew Morton <akpm@linux-foundation.org>
Acked-by: James Morris <jmorris@namei.org>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Steven Rostedt <rostedt@goodmis.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/sched.h         | 5 +++--
 kernel/irq/manage.c           | 4 +++-
 kernel/kthread.c              | 2 +-
 kernel/sched.c                | 6 +++---
 kernel/softirq.c              | 4 +++-
 kernel/trace/trace_selftest.c | 2 +-
 kernel/watchdog.c             | 2 +-
 7 files changed, 15 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 0383601a927c..849c8670583d 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1942,9 +1942,10 @@ extern int task_nice(const struct task_struct *p);
 extern int can_nice(const struct task_struct *p, const int nice);
 extern int task_curr(const struct task_struct *p);
 extern int idle_cpu(int cpu);
-extern int sched_setscheduler(struct task_struct *, int, struct sched_param *);
+extern int sched_setscheduler(struct task_struct *, int,
+			      const struct sched_param *);
 extern int sched_setscheduler_nocheck(struct task_struct *, int,
-				      struct sched_param *);
+				      const struct sched_param *);
 extern struct task_struct *idle_task(int cpu);
 extern struct task_struct *curr_task(int cpu);
 extern void set_curr_task(int cpu, struct task_struct *p);
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index 644e8d5fa367..850f030fa0c2 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -573,7 +573,9 @@ irq_thread_check_affinity(struct irq_desc *desc, struct irqaction *action) { }
  */
 static int irq_thread(void *data)
 {
-	struct sched_param param = { .sched_priority = MAX_USER_RT_PRIO/2, };
+	static struct sched_param param = {
+		.sched_priority = MAX_USER_RT_PRIO/2,
+	};
 	struct irqaction *action = data;
 	struct irq_desc *desc = irq_to_desc(action->irq);
 	int wake, oneshot = desc->status & IRQ_ONESHOT;
diff --git a/kernel/kthread.c b/kernel/kthread.c
index 2dc3786349d1..74cf6f5e7ade 100644
--- a/kernel/kthread.c
+++ b/kernel/kthread.c
@@ -148,7 +148,7 @@ struct task_struct *kthread_create(int (*threadfn)(void *data),
 	wait_for_completion(&create.done);
 
 	if (!IS_ERR(create.result)) {
-		struct sched_param param = { .sched_priority = 0 };
+		static struct sched_param param = { .sched_priority = 0 };
 		va_list args;
 
 		va_start(args, namefmt);
diff --git a/kernel/sched.c b/kernel/sched.c
index d42992bccdfa..51944e8c38a8 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -4701,7 +4701,7 @@ static bool check_same_owner(struct task_struct *p)
 }
 
 static int __sched_setscheduler(struct task_struct *p, int policy,
-				struct sched_param *param, bool user)
+				const struct sched_param *param, bool user)
 {
 	int retval, oldprio, oldpolicy = -1, on_rq, running;
 	unsigned long flags;
@@ -4856,7 +4856,7 @@ recheck:
  * NOTE that the task may be already dead.
  */
 int sched_setscheduler(struct task_struct *p, int policy,
-		       struct sched_param *param)
+		       const struct sched_param *param)
 {
 	return __sched_setscheduler(p, policy, param, true);
 }
@@ -4874,7 +4874,7 @@ EXPORT_SYMBOL_GPL(sched_setscheduler);
  * but our caller might not have that capability.
  */
 int sched_setscheduler_nocheck(struct task_struct *p, int policy,
-			       struct sched_param *param)
+			       const struct sched_param *param)
 {
 	return __sched_setscheduler(p, policy, param, false);
 }
diff --git a/kernel/softirq.c b/kernel/softirq.c
index fc978889b194..081869ed3a9f 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -851,7 +851,9 @@ static int __cpuinit cpu_callback(struct notifier_block *nfb,
 			     cpumask_any(cpu_online_mask));
 	case CPU_DEAD:
 	case CPU_DEAD_FROZEN: {
-		struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 };
+		static struct sched_param param = {
+			.sched_priority = MAX_RT_PRIO-1
+		};
 
 		p = per_cpu(ksoftirqd, hotcpu);
 		per_cpu(ksoftirqd, hotcpu) = NULL;
diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c
index 155a415b3209..562c56e048fd 100644
--- a/kernel/trace/trace_selftest.c
+++ b/kernel/trace/trace_selftest.c
@@ -558,7 +558,7 @@ trace_selftest_startup_nop(struct tracer *trace, struct trace_array *tr)
 static int trace_wakeup_test_thread(void *data)
 {
 	/* Make this a RT thread, doesn't need to be too high */
-	struct sched_param param = { .sched_priority = 5 };
+	static struct sched_param param = { .sched_priority = 5 };
 	struct completion *x = data;
 
 	sched_setscheduler(current, SCHED_FIFO, &param);
diff --git a/kernel/watchdog.c b/kernel/watchdog.c
index bafba687a6d8..94ca779aa9c2 100644
--- a/kernel/watchdog.c
+++ b/kernel/watchdog.c
@@ -307,7 +307,7 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)
  */
 static int watchdog(void *unused)
 {
-	struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 };
+	static struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 };
 	struct hrtimer *hrtimer = &__raw_get_cpu_var(watchdog_hrtimer);
 
 	sched_setscheduler(current, SCHED_FIFO, &param);
-- 
cgit v1.2.3-71-gd317


From 4b6ba8aacbb3185703b797286547d0f8f3859b02 Mon Sep 17 00:00:00 2001
From: David Daney <ddaney@caviumnetworks.com>
Date: Tue, 26 Oct 2010 15:07:13 -0700
Subject: of/net: Move of_get_mac_address() to a common source file.

There are two identical implementations of of_get_mac_address(), one
each in arch/powerpc/kernel/prom_parse.c and
arch/microblaze/kernel/prom_parse.c.  Move this function to a new
common file of_net.{c,h} and adjust all the callers to include the new
header.

Signed-off-by: David Daney <ddaney@caviumnetworks.com>
[grant.likely@secretlab.ca: protect header with #ifdef]
Signed-off-by: Grant Likely <grant.likely@secretlab.ca>
---
 arch/microblaze/include/asm/prom.h  |  3 ---
 arch/microblaze/kernel/prom_parse.c | 38 -----------------------------
 arch/powerpc/include/asm/prom.h     |  3 ---
 arch/powerpc/kernel/prom_parse.c    | 38 -----------------------------
 arch/powerpc/sysdev/mv64x60_dev.c   |  1 +
 arch/powerpc/sysdev/tsi108_dev.c    |  1 +
 drivers/net/fs_enet/fs_enet-main.c  |  1 +
 drivers/net/gianfar.c               |  1 +
 drivers/net/ucc_geth.c              |  1 +
 drivers/net/xilinx_emaclite.c       |  1 +
 drivers/of/Kconfig                  |  4 ++++
 drivers/of/Makefile                 |  1 +
 drivers/of/of_net.c                 | 48 +++++++++++++++++++++++++++++++++++++
 include/linux/of_net.h              | 15 ++++++++++++
 14 files changed, 74 insertions(+), 82 deletions(-)
 create mode 100644 drivers/of/of_net.c
 create mode 100644 include/linux/of_net.h

(limited to 'include/linux')

diff --git a/arch/microblaze/include/asm/prom.h b/arch/microblaze/include/asm/prom.h
index bdc38312ae4a..2e72af078b05 100644
--- a/arch/microblaze/include/asm/prom.h
+++ b/arch/microblaze/include/asm/prom.h
@@ -64,9 +64,6 @@ extern void kdump_move_device_tree(void);
 /* CPU OF node matching */
 struct device_node *of_get_cpu_node(int cpu, unsigned int *thread);
 
-/* Get the MAC address */
-extern const void *of_get_mac_address(struct device_node *np);
-
 /**
  * of_irq_map_pci - Resolve the interrupt for a PCI device
  * @pdev:	the device whose interrupt is to be resolved
diff --git a/arch/microblaze/kernel/prom_parse.c b/arch/microblaze/kernel/prom_parse.c
index 99d9b61cccb5..9ae24f4b882b 100644
--- a/arch/microblaze/kernel/prom_parse.c
+++ b/arch/microblaze/kernel/prom_parse.c
@@ -110,41 +110,3 @@ void of_parse_dma_window(struct device_node *dn, const void *dma_window_prop,
 	cells = prop ? *(u32 *)prop : of_n_size_cells(dn);
 	*size = of_read_number(dma_window, cells);
 }
-
-/**
- * Search the device tree for the best MAC address to use.  'mac-address' is
- * checked first, because that is supposed to contain to "most recent" MAC
- * address. If that isn't set, then 'local-mac-address' is checked next,
- * because that is the default address.  If that isn't set, then the obsolete
- * 'address' is checked, just in case we're using an old device tree.
- *
- * Note that the 'address' property is supposed to contain a virtual address of
- * the register set, but some DTS files have redefined that property to be the
- * MAC address.
- *
- * All-zero MAC addresses are rejected, because those could be properties that
- * exist in the device tree, but were not set by U-Boot.  For example, the
- * DTS could define 'mac-address' and 'local-mac-address', with zero MAC
- * addresses.  Some older U-Boots only initialized 'local-mac-address'.  In
- * this case, the real MAC is in 'local-mac-address', and 'mac-address' exists
- * but is all zeros.
-*/
-const void *of_get_mac_address(struct device_node *np)
-{
-	struct property *pp;
-
-	pp = of_find_property(np, "mac-address", NULL);
-	if (pp && (pp->length == 6) && is_valid_ether_addr(pp->value))
-		return pp->value;
-
-	pp = of_find_property(np, "local-mac-address", NULL);
-	if (pp && (pp->length == 6) && is_valid_ether_addr(pp->value))
-		return pp->value;
-
-	pp = of_find_property(np, "address", NULL);
-	if (pp && (pp->length == 6) && is_valid_ether_addr(pp->value))
-		return pp->value;
-
-	return NULL;
-}
-EXPORT_SYMBOL(of_get_mac_address);
diff --git a/arch/powerpc/include/asm/prom.h b/arch/powerpc/include/asm/prom.h
index ae26f2efd089..98264bf0a433 100644
--- a/arch/powerpc/include/asm/prom.h
+++ b/arch/powerpc/include/asm/prom.h
@@ -63,9 +63,6 @@ struct device_node *of_get_cpu_node(int cpu, unsigned int *thread);
 /* cache lookup */
 struct device_node *of_find_next_cache_node(struct device_node *np);
 
-/* Get the MAC address */
-extern const void *of_get_mac_address(struct device_node *np);
-
 #ifdef CONFIG_NUMA
 extern int of_node_to_nid(struct device_node *device);
 #else
diff --git a/arch/powerpc/kernel/prom_parse.c b/arch/powerpc/kernel/prom_parse.c
index 88334af038e5..c2b7a07cc3d3 100644
--- a/arch/powerpc/kernel/prom_parse.c
+++ b/arch/powerpc/kernel/prom_parse.c
@@ -117,41 +117,3 @@ void of_parse_dma_window(struct device_node *dn, const void *dma_window_prop,
 	cells = prop ? *(u32 *)prop : of_n_size_cells(dn);
 	*size = of_read_number(dma_window, cells);
 }
-
-/**
- * Search the device tree for the best MAC address to use.  'mac-address' is
- * checked first, because that is supposed to contain to "most recent" MAC
- * address. If that isn't set, then 'local-mac-address' is checked next,
- * because that is the default address.  If that isn't set, then the obsolete
- * 'address' is checked, just in case we're using an old device tree.
- *
- * Note that the 'address' property is supposed to contain a virtual address of
- * the register set, but some DTS files have redefined that property to be the
- * MAC address.
- *
- * All-zero MAC addresses are rejected, because those could be properties that
- * exist in the device tree, but were not set by U-Boot.  For example, the
- * DTS could define 'mac-address' and 'local-mac-address', with zero MAC
- * addresses.  Some older U-Boots only initialized 'local-mac-address'.  In
- * this case, the real MAC is in 'local-mac-address', and 'mac-address' exists
- * but is all zeros.
-*/
-const void *of_get_mac_address(struct device_node *np)
-{
-	struct property *pp;
-
-	pp = of_find_property(np, "mac-address", NULL);
-	if (pp && (pp->length == 6) && is_valid_ether_addr(pp->value))
-		return pp->value;
-
-	pp = of_find_property(np, "local-mac-address", NULL);
-	if (pp && (pp->length == 6) && is_valid_ether_addr(pp->value))
-		return pp->value;
-
-	pp = of_find_property(np, "address", NULL);
-	if (pp && (pp->length == 6) && is_valid_ether_addr(pp->value))
-		return pp->value;
-
-	return NULL;
-}
-EXPORT_SYMBOL(of_get_mac_address);
diff --git a/arch/powerpc/sysdev/mv64x60_dev.c b/arch/powerpc/sysdev/mv64x60_dev.c
index 1398bc454999..feaee402e2d6 100644
--- a/arch/powerpc/sysdev/mv64x60_dev.c
+++ b/arch/powerpc/sysdev/mv64x60_dev.c
@@ -16,6 +16,7 @@
 #include <linux/mv643xx.h>
 #include <linux/platform_device.h>
 #include <linux/of_platform.h>
+#include <linux/of_net.h>
 #include <linux/dma-mapping.h>
 
 #include <asm/prom.h>
diff --git a/arch/powerpc/sysdev/tsi108_dev.c b/arch/powerpc/sysdev/tsi108_dev.c
index d4d15aaf18fa..c2d675b6392c 100644
--- a/arch/powerpc/sysdev/tsi108_dev.c
+++ b/arch/powerpc/sysdev/tsi108_dev.c
@@ -19,6 +19,7 @@
 #include <linux/module.h>
 #include <linux/device.h>
 #include <linux/platform_device.h>
+#include <linux/of_net.h>
 #include <asm/tsi108.h>
 
 #include <asm/system.h>
diff --git a/drivers/net/fs_enet/fs_enet-main.c b/drivers/net/fs_enet/fs_enet-main.c
index d684f187de57..7a1f3d0ffa78 100644
--- a/drivers/net/fs_enet/fs_enet-main.c
+++ b/drivers/net/fs_enet/fs_enet-main.c
@@ -40,6 +40,7 @@
 #include <linux/of_mdio.h>
 #include <linux/of_platform.h>
 #include <linux/of_gpio.h>
+#include <linux/of_net.h>
 
 #include <linux/vmalloc.h>
 #include <asm/pgtable.h>
diff --git a/drivers/net/gianfar.c b/drivers/net/gianfar.c
index 49e4ce1246a7..f860072e2f68 100644
--- a/drivers/net/gianfar.c
+++ b/drivers/net/gianfar.c
@@ -95,6 +95,7 @@
 #include <linux/phy.h>
 #include <linux/phy_fixed.h>
 #include <linux/of.h>
+#include <linux/of_net.h>
 
 #include "gianfar.h"
 #include "fsl_pq_mdio.h"
diff --git a/drivers/net/ucc_geth.c b/drivers/net/ucc_geth.c
index a4c3f5708246..f7e370fd8ddc 100644
--- a/drivers/net/ucc_geth.c
+++ b/drivers/net/ucc_geth.c
@@ -28,6 +28,7 @@
 #include <linux/phy.h>
 #include <linux/workqueue.h>
 #include <linux/of_mdio.h>
+#include <linux/of_net.h>
 #include <linux/of_platform.h>
 
 #include <asm/uaccess.h>
diff --git a/drivers/net/xilinx_emaclite.c b/drivers/net/xilinx_emaclite.c
index 14f0955eca68..2a34b22ea26a 100644
--- a/drivers/net/xilinx_emaclite.c
+++ b/drivers/net/xilinx_emaclite.c
@@ -24,6 +24,7 @@
 #include <linux/of_device.h>
 #include <linux/of_platform.h>
 #include <linux/of_mdio.h>
+#include <linux/of_net.h>
 #include <linux/phy.h>
 
 #define DRIVER_NAME "xilinx_emaclite"
diff --git a/drivers/of/Kconfig b/drivers/of/Kconfig
index aa675ebd8eb3..e4b93a0a15d2 100644
--- a/drivers/of/Kconfig
+++ b/drivers/of/Kconfig
@@ -49,6 +49,10 @@ config OF_I2C
 	help
 	  OpenFirmware I2C accessors
 
+config OF_NET
+	depends on NETDEVICES
+	def_bool y
+
 config OF_SPI
 	def_tristate SPI
 	depends on SPI && !SPARC
diff --git a/drivers/of/Makefile b/drivers/of/Makefile
index 7888155bea08..3ab21a0a4907 100644
--- a/drivers/of/Makefile
+++ b/drivers/of/Makefile
@@ -6,5 +6,6 @@ obj-$(CONFIG_OF_IRQ)    += irq.o
 obj-$(CONFIG_OF_DEVICE) += device.o platform.o
 obj-$(CONFIG_OF_GPIO)   += gpio.o
 obj-$(CONFIG_OF_I2C)	+= of_i2c.o
+obj-$(CONFIG_OF_NET)	+= of_net.o
 obj-$(CONFIG_OF_SPI)	+= of_spi.o
 obj-$(CONFIG_OF_MDIO)	+= of_mdio.o
diff --git a/drivers/of/of_net.c b/drivers/of/of_net.c
new file mode 100644
index 000000000000..86f334a2769c
--- /dev/null
+++ b/drivers/of/of_net.c
@@ -0,0 +1,48 @@
+/*
+ * OF helpers for network devices.
+ *
+ * This file is released under the GPLv2
+ *
+ * Initially copied out of arch/powerpc/kernel/prom_parse.c
+ */
+#include <linux/etherdevice.h>
+#include <linux/kernel.h>
+#include <linux/of_net.h>
+
+/**
+ * Search the device tree for the best MAC address to use.  'mac-address' is
+ * checked first, because that is supposed to contain to "most recent" MAC
+ * address. If that isn't set, then 'local-mac-address' is checked next,
+ * because that is the default address.  If that isn't set, then the obsolete
+ * 'address' is checked, just in case we're using an old device tree.
+ *
+ * Note that the 'address' property is supposed to contain a virtual address of
+ * the register set, but some DTS files have redefined that property to be the
+ * MAC address.
+ *
+ * All-zero MAC addresses are rejected, because those could be properties that
+ * exist in the device tree, but were not set by U-Boot.  For example, the
+ * DTS could define 'mac-address' and 'local-mac-address', with zero MAC
+ * addresses.  Some older U-Boots only initialized 'local-mac-address'.  In
+ * this case, the real MAC is in 'local-mac-address', and 'mac-address' exists
+ * but is all zeros.
+*/
+const void *of_get_mac_address(struct device_node *np)
+{
+	struct property *pp;
+
+	pp = of_find_property(np, "mac-address", NULL);
+	if (pp && (pp->length == 6) && is_valid_ether_addr(pp->value))
+		return pp->value;
+
+	pp = of_find_property(np, "local-mac-address", NULL);
+	if (pp && (pp->length == 6) && is_valid_ether_addr(pp->value))
+		return pp->value;
+
+	pp = of_find_property(np, "address", NULL);
+	if (pp && (pp->length == 6) && is_valid_ether_addr(pp->value))
+		return pp->value;
+
+	return NULL;
+}
+EXPORT_SYMBOL(of_get_mac_address);
diff --git a/include/linux/of_net.h b/include/linux/of_net.h
new file mode 100644
index 000000000000..e913081fb52a
--- /dev/null
+++ b/include/linux/of_net.h
@@ -0,0 +1,15 @@
+/*
+ * OF helpers for network devices.
+ *
+ * This file is released under the GPLv2
+ */
+
+#ifndef __LINUX_OF_NET_H
+#define __LINUX_OF_NET_H
+
+#ifdef CONFIG_OF_NET
+#include <linux/of.h>
+extern const void *of_get_mac_address(struct device_node *np);
+#endif
+
+#endif /* __LINUX_OF_NET_H */
-- 
cgit v1.2.3-71-gd317


From 4a92379bdfb48680a5e6775dd53a586df7b6b0b1 Mon Sep 17 00:00:00 2001
From: Richard Kennedy <richard@rsk.demon.co.uk>
Date: Thu, 21 Oct 2010 10:29:19 +0100
Subject: slub tracing: move trace calls out of always inlined functions to
 reduce kernel code size

Having the trace calls defined in the always inlined kmalloc functions
in include/linux/slub_def.h causes a lot of code duplication as the
trace functions get instantiated for each kamalloc call site. This can
simply be removed by pushing the trace calls down into the functions in
slub.c.

On my x86_64 built this patch shrinks the code size of the kernel by
approx 36K and also shrinks the code size of many modules -- too many to
list here ;)

size vmlinux (2.6.36) reports
       text        data     bss     dec     hex filename
    5410611	 743172	 828928	6982711	 6a8c37	vmlinux
    5373738	 744244	 828928	6946910	 6a005e	vmlinux + patch

The resulting kernel has had some testing & kmalloc trace still seems to
work.

This patch
- moves trace_kmalloc out of the inlined kmalloc() and pushes it down
into kmem_cache_alloc_trace() so this it only get instantiated once.

- rename kmem_cache_alloc_notrace()  to kmem_cache_alloc_trace() to
indicate that now is does have tracing. (maybe this would better being
called something like kmalloc_kmem_cache ?)

- adds a new function kmalloc_order() to handle allocation and tracing
of large allocations of page order.

- removes tracing from the inlined kmalloc_large() replacing them with a
call to kmalloc_order();

- move tracing out of inlined kmalloc_node() and pushing it down into
kmem_cache_alloc_node_trace

- rename kmem_cache_alloc_node_notrace() to
kmem_cache_alloc_node_trace()

- removes the include of trace/events/kmem.h from slub_def.h.

v2
- keep kmalloc_order_trace inline when !CONFIG_TRACE

Signed-off-by: Richard Kennedy <richard@rsk.demon.co.uk>
Signed-off-by: Pekka Enberg <penberg@kernel.org>
---
 include/linux/slub_def.h | 55 +++++++++++++++++++++++-------------------------
 mm/slub.c                | 30 ++++++++++++++++++++------
 2 files changed, 49 insertions(+), 36 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h
index e4f5ed180b9b..8b6e8ae5d5ca 100644
--- a/include/linux/slub_def.h
+++ b/include/linux/slub_def.h
@@ -10,9 +10,8 @@
 #include <linux/gfp.h>
 #include <linux/workqueue.h>
 #include <linux/kobject.h>
-#include <linux/kmemleak.h>
 
-#include <trace/events/kmem.h>
+#include <linux/kmemleak.h>
 
 enum stat_item {
 	ALLOC_FASTPATH,		/* Allocation from cpu slab */
@@ -216,31 +215,40 @@ static __always_inline struct kmem_cache *kmalloc_slab(size_t size)
 void *kmem_cache_alloc(struct kmem_cache *, gfp_t);
 void *__kmalloc(size_t size, gfp_t flags);
 
+static __always_inline void *
+kmalloc_order(size_t size, gfp_t flags, unsigned int order)
+{
+	void *ret = (void *) __get_free_pages(flags | __GFP_COMP, order);
+	kmemleak_alloc(ret, size, 1, flags);
+	return ret;
+}
+
 #ifdef CONFIG_TRACING
-extern void *kmem_cache_alloc_notrace(struct kmem_cache *s, gfp_t gfpflags);
+extern void *
+kmem_cache_alloc_trace(struct kmem_cache *s, gfp_t gfpflags, size_t size);
+extern void *kmalloc_order_trace(size_t size, gfp_t flags, unsigned int order);
 #else
 static __always_inline void *
-kmem_cache_alloc_notrace(struct kmem_cache *s, gfp_t gfpflags)
+kmem_cache_alloc_trace(struct kmem_cache *s, gfp_t gfpflags, size_t size)
 {
 	return kmem_cache_alloc(s, gfpflags);
 }
+
+static __always_inline void *
+kmalloc_order_trace(size_t size, gfp_t flags, unsigned int order)
+{
+	return kmalloc_order(size, flags, order);
+}
 #endif
 
 static __always_inline void *kmalloc_large(size_t size, gfp_t flags)
 {
 	unsigned int order = get_order(size);
-	void *ret = (void *) __get_free_pages(flags | __GFP_COMP, order);
-
-	kmemleak_alloc(ret, size, 1, flags);
-	trace_kmalloc(_THIS_IP_, ret, size, PAGE_SIZE << order, flags);
-
-	return ret;
+	return kmalloc_order_trace(size, flags, order);
 }
 
 static __always_inline void *kmalloc(size_t size, gfp_t flags)
 {
-	void *ret;
-
 	if (__builtin_constant_p(size)) {
 		if (size > SLUB_MAX_SIZE)
 			return kmalloc_large(size, flags);
@@ -251,11 +259,7 @@ static __always_inline void *kmalloc(size_t size, gfp_t flags)
 			if (!s)
 				return ZERO_SIZE_PTR;
 
-			ret = kmem_cache_alloc_notrace(s, flags);
-
-			trace_kmalloc(_THIS_IP_, ret, size, s->size, flags);
-
-			return ret;
+			return kmem_cache_alloc_trace(s, flags, size);
 		}
 	}
 	return __kmalloc(size, flags);
@@ -266,14 +270,14 @@ void *__kmalloc_node(size_t size, gfp_t flags, int node);
 void *kmem_cache_alloc_node(struct kmem_cache *, gfp_t flags, int node);
 
 #ifdef CONFIG_TRACING
-extern void *kmem_cache_alloc_node_notrace(struct kmem_cache *s,
+extern void *kmem_cache_alloc_node_trace(struct kmem_cache *s,
 					   gfp_t gfpflags,
-					   int node);
+					   int node, size_t size);
 #else
 static __always_inline void *
-kmem_cache_alloc_node_notrace(struct kmem_cache *s,
+kmem_cache_alloc_node_trace(struct kmem_cache *s,
 			      gfp_t gfpflags,
-			      int node)
+			      int node, size_t size)
 {
 	return kmem_cache_alloc_node(s, gfpflags, node);
 }
@@ -281,8 +285,6 @@ kmem_cache_alloc_node_notrace(struct kmem_cache *s,
 
 static __always_inline void *kmalloc_node(size_t size, gfp_t flags, int node)
 {
-	void *ret;
-
 	if (__builtin_constant_p(size) &&
 		size <= SLUB_MAX_SIZE && !(flags & SLUB_DMA)) {
 			struct kmem_cache *s = kmalloc_slab(size);
@@ -290,12 +292,7 @@ static __always_inline void *kmalloc_node(size_t size, gfp_t flags, int node)
 		if (!s)
 			return ZERO_SIZE_PTR;
 
-		ret = kmem_cache_alloc_node_notrace(s, flags, node);
-
-		trace_kmalloc_node(_THIS_IP_, ret,
-				   size, s->size, flags, node);
-
-		return ret;
+		return kmem_cache_alloc_node_trace(s, flags, node, size);
 	}
 	return __kmalloc_node(size, flags, node);
 }
diff --git a/mm/slub.c b/mm/slub.c
index 8fd5401bb071..7e657aa19475 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -28,6 +28,8 @@
 #include <linux/math64.h>
 #include <linux/fault-inject.h>
 
+#include <trace/events/kmem.h>
+
 /*
  * Lock order:
  *   1. slab_lock(page)
@@ -1774,11 +1776,21 @@ void *kmem_cache_alloc(struct kmem_cache *s, gfp_t gfpflags)
 EXPORT_SYMBOL(kmem_cache_alloc);
 
 #ifdef CONFIG_TRACING
-void *kmem_cache_alloc_notrace(struct kmem_cache *s, gfp_t gfpflags)
+void *kmem_cache_alloc_trace(struct kmem_cache *s, gfp_t gfpflags, size_t size)
+{
+	void *ret = slab_alloc(s, gfpflags, NUMA_NO_NODE, _RET_IP_);
+	trace_kmalloc(_RET_IP_, ret, size, s->size, gfpflags);
+	return ret;
+}
+EXPORT_SYMBOL(kmem_cache_alloc_trace);
+
+void *kmalloc_order_trace(size_t size, gfp_t flags, unsigned int order)
 {
-	return slab_alloc(s, gfpflags, NUMA_NO_NODE, _RET_IP_);
+	void *ret = kmalloc_order(size, flags, order);
+	trace_kmalloc(_RET_IP_, ret, size, PAGE_SIZE << order, flags);
+	return ret;
 }
-EXPORT_SYMBOL(kmem_cache_alloc_notrace);
+EXPORT_SYMBOL(kmalloc_order_trace);
 #endif
 
 #ifdef CONFIG_NUMA
@@ -1794,13 +1806,17 @@ void *kmem_cache_alloc_node(struct kmem_cache *s, gfp_t gfpflags, int node)
 EXPORT_SYMBOL(kmem_cache_alloc_node);
 
 #ifdef CONFIG_TRACING
-void *kmem_cache_alloc_node_notrace(struct kmem_cache *s,
+void *kmem_cache_alloc_node_trace(struct kmem_cache *s,
 				    gfp_t gfpflags,
-				    int node)
+				    int node, size_t size)
 {
-	return slab_alloc(s, gfpflags, node, _RET_IP_);
+	void *ret = slab_alloc(s, gfpflags, node, _RET_IP_);
+
+	trace_kmalloc_node(_RET_IP_, ret,
+			   size, s->size, gfpflags, node);
+	return ret;
 }
-EXPORT_SYMBOL(kmem_cache_alloc_node_notrace);
+EXPORT_SYMBOL(kmem_cache_alloc_node_trace);
 #endif
 #endif
 
-- 
cgit v1.2.3-71-gd317


From fc766e4c4965915ab52a1d1fa3c7a7b3e7bc07f0 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Fri, 29 Oct 2010 03:09:24 +0000
Subject: decnet: RCU conversion and get rid of dev_base_lock

While tracking dev_base_lock users, I found decnet used it in
dnet_select_source(), but for a wrong purpose:

Writers only hold RTNL, not dev_base_lock, so readers must use RCU if
they cannot use RTNL.

Adds an rcu_head in struct dn_ifaddr and handle proper RCU management.

Adds __rcu annotation in dn_route as well.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Acked-by: Steven Whitehouse <swhiteho@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h |   2 +-
 include/net/dn_dev.h      |  27 ++++++++-----
 include/net/dst.h         |   8 ++--
 net/decnet/af_decnet.c    |   2 +-
 net/decnet/dn_dev.c       | 100 +++++++++++++++++++++++++++-------------------
 net/decnet/dn_fib.c       |   6 ++-
 net/decnet/dn_neigh.c     |   2 +-
 net/decnet/dn_route.c     |  68 +++++++++++++++++--------------
 8 files changed, 127 insertions(+), 88 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index d8fd2c23a1b9..578debb801f4 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -951,7 +951,7 @@ struct net_device {
 #endif
 	void 			*atalk_ptr;	/* AppleTalk link 	*/
 	struct in_device __rcu	*ip_ptr;	/* IPv4 specific data	*/
-	void                    *dn_ptr;        /* DECnet specific data */
+	struct dn_dev __rcu     *dn_ptr;        /* DECnet specific data */
 	struct inet6_dev __rcu	*ip6_ptr;       /* IPv6 specific data */
 	void			*ec_ptr;	/* Econet specific data	*/
 	void			*ax25_ptr;	/* AX.25 specific data */
diff --git a/include/net/dn_dev.h b/include/net/dn_dev.h
index 0916bbf3bdff..b9e32db03f20 100644
--- a/include/net/dn_dev.h
+++ b/include/net/dn_dev.h
@@ -5,13 +5,14 @@
 struct dn_dev;
 
 struct dn_ifaddr {
-	struct dn_ifaddr *ifa_next;
+	struct dn_ifaddr __rcu *ifa_next;
 	struct dn_dev    *ifa_dev;
 	__le16            ifa_local;
 	__le16            ifa_address;
 	__u8              ifa_flags;
 	__u8              ifa_scope;
 	char              ifa_label[IFNAMSIZ];
+	struct rcu_head   rcu;
 };
 
 #define DN_DEV_S_RU  0 /* Run - working normally   */
@@ -83,7 +84,7 @@ struct dn_dev_parms {
 
 
 struct dn_dev {
-	struct dn_ifaddr *ifa_list;
+	struct dn_ifaddr __rcu *ifa_list;
 	struct net_device *dev;
 	struct dn_dev_parms parms;
 	char use_long;
@@ -171,19 +172,27 @@ extern int unregister_dnaddr_notifier(struct notifier_block *nb);
 
 static inline int dn_dev_islocal(struct net_device *dev, __le16 addr)
 {
-	struct dn_dev *dn_db = dev->dn_ptr;
+	struct dn_dev *dn_db;
 	struct dn_ifaddr *ifa;
+	int res = 0;
 
+	rcu_read_lock();
+	dn_db = rcu_dereference(dev->dn_ptr);
 	if (dn_db == NULL) {
 		printk(KERN_DEBUG "dn_dev_islocal: Called for non DECnet device\n");
-		return 0;
+		goto out;
 	}
 
-	for(ifa = dn_db->ifa_list; ifa; ifa = ifa->ifa_next)
-		if ((addr ^ ifa->ifa_local) == 0)
-			return 1;
-
-	return 0;
+	for (ifa = rcu_dereference(dn_db->ifa_list);
+	     ifa != NULL;
+	     ifa = rcu_dereference(ifa->ifa_next))
+		if ((addr ^ ifa->ifa_local) == 0) {
+			res = 1;
+			break;
+		}
+out:
+	rcu_read_unlock();
+	return res;
 }
 
 #endif /* _NET_DN_DEV_H */
diff --git a/include/net/dst.h b/include/net/dst.h
index ffe9cb719c0e..a5bd72646d65 100644
--- a/include/net/dst.h
+++ b/include/net/dst.h
@@ -94,10 +94,10 @@ struct dst_entry {
 	int			__use;
 	unsigned long		lastuse;
 	union {
-		struct dst_entry *next;
-		struct rtable __rcu *rt_next;
-		struct rt6_info   *rt6_next;
-		struct dn_route  *dn_next;
+		struct dst_entry	*next;
+		struct rtable __rcu	*rt_next;
+		struct rt6_info		*rt6_next;
+		struct dn_route __rcu	*dn_next;
 	};
 };
 
diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c
index d6b93d19790f..18b8a2cbdf77 100644
--- a/net/decnet/af_decnet.c
+++ b/net/decnet/af_decnet.c
@@ -1848,7 +1848,7 @@ unsigned dn_mss_from_pmtu(struct net_device *dev, int mtu)
 {
 	unsigned mss = 230 - DN_MAX_NSP_DATA_HEADER;
 	if (dev) {
-		struct dn_dev *dn_db = dev->dn_ptr;
+		struct dn_dev *dn_db = rcu_dereference_raw(dev->dn_ptr);
 		mtu -= LL_RESERVED_SPACE(dev);
 		if (dn_db->use_long)
 			mtu -= 21;
diff --git a/net/decnet/dn_dev.c b/net/decnet/dn_dev.c
index 4c409b46aa35..0ba15633c418 100644
--- a/net/decnet/dn_dev.c
+++ b/net/decnet/dn_dev.c
@@ -267,7 +267,7 @@ static int dn_forwarding_proc(ctl_table *table, int write,
 	if (table->extra1 == NULL)
 		return -EINVAL;
 
-	dn_db = dev->dn_ptr;
+	dn_db = rcu_dereference_raw(dev->dn_ptr);
 	old = dn_db->parms.forwarding;
 
 	err = proc_dointvec(table, write, buffer, lenp, ppos);
@@ -332,14 +332,19 @@ static struct dn_ifaddr *dn_dev_alloc_ifa(void)
 	return ifa;
 }
 
-static __inline__ void dn_dev_free_ifa(struct dn_ifaddr *ifa)
+static void dn_dev_free_ifa_rcu(struct rcu_head *head)
 {
-	kfree(ifa);
+	kfree(container_of(head, struct dn_ifaddr, rcu));
 }
 
-static void dn_dev_del_ifa(struct dn_dev *dn_db, struct dn_ifaddr **ifap, int destroy)
+static void dn_dev_free_ifa(struct dn_ifaddr *ifa)
 {
-	struct dn_ifaddr *ifa1 = *ifap;
+	call_rcu(&ifa->rcu, dn_dev_free_ifa_rcu);
+}
+
+static void dn_dev_del_ifa(struct dn_dev *dn_db, struct dn_ifaddr __rcu **ifap, int destroy)
+{
+	struct dn_ifaddr *ifa1 = rtnl_dereference(*ifap);
 	unsigned char mac_addr[6];
 	struct net_device *dev = dn_db->dev;
 
@@ -373,7 +378,9 @@ static int dn_dev_insert_ifa(struct dn_dev *dn_db, struct dn_ifaddr *ifa)
 	ASSERT_RTNL();
 
 	/* Check for duplicates */
-	for(ifa1 = dn_db->ifa_list; ifa1; ifa1 = ifa1->ifa_next) {
+	for (ifa1 = rtnl_dereference(dn_db->ifa_list);
+	     ifa1 != NULL;
+	     ifa1 = rtnl_dereference(ifa1->ifa_next)) {
 		if (ifa1->ifa_local == ifa->ifa_local)
 			return -EEXIST;
 	}
@@ -386,7 +393,7 @@ static int dn_dev_insert_ifa(struct dn_dev *dn_db, struct dn_ifaddr *ifa)
 	}
 
 	ifa->ifa_next = dn_db->ifa_list;
-	dn_db->ifa_list = ifa;
+	rcu_assign_pointer(dn_db->ifa_list, ifa);
 
 	dn_ifaddr_notify(RTM_NEWADDR, ifa);
 	blocking_notifier_call_chain(&dnaddr_chain, NETDEV_UP, ifa);
@@ -396,7 +403,7 @@ static int dn_dev_insert_ifa(struct dn_dev *dn_db, struct dn_ifaddr *ifa)
 
 static int dn_dev_set_ifa(struct net_device *dev, struct dn_ifaddr *ifa)
 {
-	struct dn_dev *dn_db = dev->dn_ptr;
+	struct dn_dev *dn_db = rtnl_dereference(dev->dn_ptr);
 	int rv;
 
 	if (dn_db == NULL) {
@@ -425,7 +432,8 @@ int dn_dev_ioctl(unsigned int cmd, void __user *arg)
 	struct sockaddr_dn *sdn = (struct sockaddr_dn *)&ifr->ifr_addr;
 	struct dn_dev *dn_db;
 	struct net_device *dev;
-	struct dn_ifaddr *ifa = NULL, **ifap = NULL;
+	struct dn_ifaddr *ifa = NULL;
+	struct dn_ifaddr __rcu **ifap = NULL;
 	int ret = 0;
 
 	if (copy_from_user(ifr, arg, DN_IFREQ_SIZE))
@@ -454,8 +462,10 @@ int dn_dev_ioctl(unsigned int cmd, void __user *arg)
 		goto done;
 	}
 
-	if ((dn_db = dev->dn_ptr) != NULL) {
-		for (ifap = &dn_db->ifa_list; (ifa=*ifap) != NULL; ifap = &ifa->ifa_next)
+	if ((dn_db = rtnl_dereference(dev->dn_ptr)) != NULL) {
+		for (ifap = &dn_db->ifa_list;
+		     (ifa = rtnl_dereference(*ifap)) != NULL;
+		     ifap = &ifa->ifa_next)
 			if (strcmp(ifr->ifr_name, ifa->ifa_label) == 0)
 				break;
 	}
@@ -558,7 +568,7 @@ static struct dn_dev *dn_dev_by_index(int ifindex)
 
 	dev = __dev_get_by_index(&init_net, ifindex);
 	if (dev)
-		dn_dev = dev->dn_ptr;
+		dn_dev = rtnl_dereference(dev->dn_ptr);
 
 	return dn_dev;
 }
@@ -576,7 +586,8 @@ static int dn_nl_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
 	struct nlattr *tb[IFA_MAX+1];
 	struct dn_dev *dn_db;
 	struct ifaddrmsg *ifm;
-	struct dn_ifaddr *ifa, **ifap;
+	struct dn_ifaddr *ifa;
+	struct dn_ifaddr __rcu **ifap;
 	int err = -EINVAL;
 
 	if (!net_eq(net, &init_net))
@@ -592,7 +603,9 @@ static int dn_nl_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
 		goto errout;
 
 	err = -EADDRNOTAVAIL;
-	for (ifap = &dn_db->ifa_list; (ifa = *ifap); ifap = &ifa->ifa_next) {
+	for (ifap = &dn_db->ifa_list;
+	     (ifa = rtnl_dereference(*ifap)) != NULL;
+	     ifap = &ifa->ifa_next) {
 		if (tb[IFA_LOCAL] &&
 		    nla_memcmp(tb[IFA_LOCAL], &ifa->ifa_local, 2))
 			continue;
@@ -632,7 +645,7 @@ static int dn_nl_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
 	if ((dev = __dev_get_by_index(&init_net, ifm->ifa_index)) == NULL)
 		return -ENODEV;
 
-	if ((dn_db = dev->dn_ptr) == NULL) {
+	if ((dn_db = rtnl_dereference(dev->dn_ptr)) == NULL) {
 		dn_db = dn_dev_create(dev, &err);
 		if (!dn_db)
 			return err;
@@ -748,11 +761,11 @@ static int dn_nl_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
 			skip_naddr = 0;
 		}
 
-		if ((dn_db = dev->dn_ptr) == NULL)
+		if ((dn_db = rtnl_dereference(dev->dn_ptr)) == NULL)
 			goto cont;
 
-		for (ifa = dn_db->ifa_list, dn_idx = 0; ifa;
-		     ifa = ifa->ifa_next, dn_idx++) {
+		for (ifa = rtnl_dereference(dn_db->ifa_list), dn_idx = 0; ifa;
+		     ifa = rtnl_dereference(ifa->ifa_next), dn_idx++) {
 			if (dn_idx < skip_naddr)
 				continue;
 
@@ -773,21 +786,22 @@ done:
 
 static int dn_dev_get_first(struct net_device *dev, __le16 *addr)
 {
-	struct dn_dev *dn_db = (struct dn_dev *)dev->dn_ptr;
+	struct dn_dev *dn_db;
 	struct dn_ifaddr *ifa;
 	int rv = -ENODEV;
 
+	rcu_read_lock();
+	dn_db = rcu_dereference(dev->dn_ptr);
 	if (dn_db == NULL)
 		goto out;
 
-	rtnl_lock();
-	ifa = dn_db->ifa_list;
+	ifa = rcu_dereference(dn_db->ifa_list);
 	if (ifa != NULL) {
 		*addr = ifa->ifa_local;
 		rv = 0;
 	}
-	rtnl_unlock();
 out:
+	rcu_read_unlock();
 	return rv;
 }
 
@@ -823,7 +837,7 @@ static void dn_send_endnode_hello(struct net_device *dev, struct dn_ifaddr *ifa)
 	struct endnode_hello_message *msg;
 	struct sk_buff *skb = NULL;
 	__le16 *pktlen;
-	struct dn_dev *dn_db = (struct dn_dev *)dev->dn_ptr;
+	struct dn_dev *dn_db = rcu_dereference_raw(dev->dn_ptr);
 
 	if ((skb = dn_alloc_skb(NULL, sizeof(*msg), GFP_ATOMIC)) == NULL)
 		return;
@@ -889,7 +903,7 @@ static int dn_am_i_a_router(struct dn_neigh *dn, struct dn_dev *dn_db, struct dn
 static void dn_send_router_hello(struct net_device *dev, struct dn_ifaddr *ifa)
 {
 	int n;
-	struct dn_dev *dn_db = dev->dn_ptr;
+	struct dn_dev *dn_db = rcu_dereference_raw(dev->dn_ptr);
 	struct dn_neigh *dn = (struct dn_neigh *)dn_db->router;
 	struct sk_buff *skb;
 	size_t size;
@@ -960,7 +974,7 @@ static void dn_send_router_hello(struct net_device *dev, struct dn_ifaddr *ifa)
 
 static void dn_send_brd_hello(struct net_device *dev, struct dn_ifaddr *ifa)
 {
-	struct dn_dev *dn_db = (struct dn_dev *)dev->dn_ptr;
+	struct dn_dev *dn_db = rcu_dereference_raw(dev->dn_ptr);
 
 	if (dn_db->parms.forwarding == 0)
 		dn_send_endnode_hello(dev, ifa);
@@ -998,7 +1012,7 @@ static void dn_send_ptp_hello(struct net_device *dev, struct dn_ifaddr *ifa)
 
 static int dn_eth_up(struct net_device *dev)
 {
-	struct dn_dev *dn_db = dev->dn_ptr;
+	struct dn_dev *dn_db = rcu_dereference_raw(dev->dn_ptr);
 
 	if (dn_db->parms.forwarding == 0)
 		dev_mc_add(dev, dn_rt_all_end_mcast);
@@ -1012,7 +1026,7 @@ static int dn_eth_up(struct net_device *dev)
 
 static void dn_eth_down(struct net_device *dev)
 {
-	struct dn_dev *dn_db = dev->dn_ptr;
+	struct dn_dev *dn_db = rcu_dereference_raw(dev->dn_ptr);
 
 	if (dn_db->parms.forwarding == 0)
 		dev_mc_del(dev, dn_rt_all_end_mcast);
@@ -1025,12 +1039,16 @@ static void dn_dev_set_timer(struct net_device *dev);
 static void dn_dev_timer_func(unsigned long arg)
 {
 	struct net_device *dev = (struct net_device *)arg;
-	struct dn_dev *dn_db = dev->dn_ptr;
+	struct dn_dev *dn_db;
 	struct dn_ifaddr *ifa;
 
+	rcu_read_lock();
+	dn_db = rcu_dereference(dev->dn_ptr);
 	if (dn_db->t3 <= dn_db->parms.t2) {
 		if (dn_db->parms.timer3) {
-			for(ifa = dn_db->ifa_list; ifa; ifa = ifa->ifa_next) {
+			for (ifa = rcu_dereference(dn_db->ifa_list);
+			     ifa;
+			     ifa = rcu_dereference(ifa->ifa_next)) {
 				if (!(ifa->ifa_flags & IFA_F_SECONDARY))
 					dn_db->parms.timer3(dev, ifa);
 			}
@@ -1039,13 +1057,13 @@ static void dn_dev_timer_func(unsigned long arg)
 	} else {
 		dn_db->t3 -= dn_db->parms.t2;
 	}
-
+	rcu_read_unlock();
 	dn_dev_set_timer(dev);
 }
 
 static void dn_dev_set_timer(struct net_device *dev)
 {
-	struct dn_dev *dn_db = dev->dn_ptr;
+	struct dn_dev *dn_db = rcu_dereference_raw(dev->dn_ptr);
 
 	if (dn_db->parms.t2 > dn_db->parms.t3)
 		dn_db->parms.t2 = dn_db->parms.t3;
@@ -1077,8 +1095,8 @@ static struct dn_dev *dn_dev_create(struct net_device *dev, int *err)
 		return NULL;
 
 	memcpy(&dn_db->parms, p, sizeof(struct dn_dev_parms));
-	smp_wmb();
-	dev->dn_ptr = dn_db;
+
+	rcu_assign_pointer(dev->dn_ptr, dn_db);
 	dn_db->dev = dev;
 	init_timer(&dn_db->timer);
 
@@ -1086,7 +1104,7 @@ static struct dn_dev *dn_dev_create(struct net_device *dev, int *err)
 
 	dn_db->neigh_parms = neigh_parms_alloc(dev, &dn_neigh_table);
 	if (!dn_db->neigh_parms) {
-		dev->dn_ptr = NULL;
+		rcu_assign_pointer(dev->dn_ptr, NULL);
 		kfree(dn_db);
 		return NULL;
 	}
@@ -1125,7 +1143,7 @@ void dn_dev_up(struct net_device *dev)
 	struct dn_ifaddr *ifa;
 	__le16 addr = decnet_address;
 	int maybe_default = 0;
-	struct dn_dev *dn_db = (struct dn_dev *)dev->dn_ptr;
+	struct dn_dev *dn_db = rtnl_dereference(dev->dn_ptr);
 
 	if ((dev->type != ARPHRD_ETHER) && (dev->type != ARPHRD_LOOPBACK))
 		return;
@@ -1176,7 +1194,7 @@ void dn_dev_up(struct net_device *dev)
 
 static void dn_dev_delete(struct net_device *dev)
 {
-	struct dn_dev *dn_db = dev->dn_ptr;
+	struct dn_dev *dn_db = rtnl_dereference(dev->dn_ptr);
 
 	if (dn_db == NULL)
 		return;
@@ -1204,13 +1222,13 @@ static void dn_dev_delete(struct net_device *dev)
 
 void dn_dev_down(struct net_device *dev)
 {
-	struct dn_dev *dn_db = dev->dn_ptr;
+	struct dn_dev *dn_db = rtnl_dereference(dev->dn_ptr);
 	struct dn_ifaddr *ifa;
 
 	if (dn_db == NULL)
 		return;
 
-	while((ifa = dn_db->ifa_list) != NULL) {
+	while ((ifa = rtnl_dereference(dn_db->ifa_list)) != NULL) {
 		dn_dev_del_ifa(dn_db, &dn_db->ifa_list, 0);
 		dn_dev_free_ifa(ifa);
 	}
@@ -1270,7 +1288,7 @@ static inline int is_dn_dev(struct net_device *dev)
 }
 
 static void *dn_dev_seq_start(struct seq_file *seq, loff_t *pos)
-	__acquires(rcu)
+	__acquires(RCU)
 {
 	int i;
 	struct net_device *dev;
@@ -1313,7 +1331,7 @@ static void *dn_dev_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 }
 
 static void dn_dev_seq_stop(struct seq_file *seq, void *v)
-	__releases(rcu)
+	__releases(RCU)
 {
 	rcu_read_unlock();
 }
@@ -1340,7 +1358,7 @@ static int dn_dev_seq_show(struct seq_file *seq, void *v)
 		struct net_device *dev = v;
 		char peer_buf[DN_ASCBUF_LEN];
 		char router_buf[DN_ASCBUF_LEN];
-		struct dn_dev *dn_db = dev->dn_ptr;
+		struct dn_dev *dn_db = rcu_dereference(dev->dn_ptr);
 
 		seq_printf(seq, "%-8s %1s     %04u %04u   %04lu %04lu"
 				"   %04hu    %03d %02x    %-10s %-7s %-7s\n",
diff --git a/net/decnet/dn_fib.c b/net/decnet/dn_fib.c
index 4ab96c15166d..0ef0a81bcd72 100644
--- a/net/decnet/dn_fib.c
+++ b/net/decnet/dn_fib.c
@@ -610,10 +610,12 @@ static void dn_fib_del_ifaddr(struct dn_ifaddr *ifa)
 	/* Scan device list */
 	rcu_read_lock();
 	for_each_netdev_rcu(&init_net, dev) {
-		dn_db = dev->dn_ptr;
+		dn_db = rcu_dereference(dev->dn_ptr);
 		if (dn_db == NULL)
 			continue;
-		for(ifa2 = dn_db->ifa_list; ifa2; ifa2 = ifa2->ifa_next) {
+		for (ifa2 = rcu_dereference(dn_db->ifa_list);
+		     ifa2 != NULL;
+		     ifa2 = rcu_dereference(ifa2->ifa_next)) {
 			if (ifa2->ifa_local == ifa->ifa_local) {
 				found_it = 1;
 				break;
diff --git a/net/decnet/dn_neigh.c b/net/decnet/dn_neigh.c
index a085dbcf5c7f..602dade7e9a3 100644
--- a/net/decnet/dn_neigh.c
+++ b/net/decnet/dn_neigh.c
@@ -391,7 +391,7 @@ int dn_neigh_router_hello(struct sk_buff *skb)
 		write_lock(&neigh->lock);
 
 		neigh->used = jiffies;
-		dn_db = (struct dn_dev *)neigh->dev->dn_ptr;
+		dn_db = rcu_dereference(neigh->dev->dn_ptr);
 
 		if (!(neigh->nud_state & NUD_PERMANENT)) {
 			neigh->updated = jiffies;
diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c
index df0f3e54ff8a..94a9eb1d313e 100644
--- a/net/decnet/dn_route.c
+++ b/net/decnet/dn_route.c
@@ -93,7 +93,7 @@
 
 struct dn_rt_hash_bucket
 {
-	struct dn_route *chain;
+	struct dn_route __rcu *chain;
 	spinlock_t lock;
 };
 
@@ -157,15 +157,17 @@ static inline void dnrt_drop(struct dn_route *rt)
 static void dn_dst_check_expire(unsigned long dummy)
 {
 	int i;
-	struct dn_route *rt, **rtp;
+	struct dn_route *rt;
+	struct dn_route __rcu **rtp;
 	unsigned long now = jiffies;
 	unsigned long expire = 120 * HZ;
 
-	for(i = 0; i <= dn_rt_hash_mask; i++) {
+	for (i = 0; i <= dn_rt_hash_mask; i++) {
 		rtp = &dn_rt_hash_table[i].chain;
 
 		spin_lock(&dn_rt_hash_table[i].lock);
-		while((rt=*rtp) != NULL) {
+		while ((rt = rcu_dereference_protected(*rtp,
+						lockdep_is_held(&dn_rt_hash_table[i].lock))) != NULL) {
 			if (atomic_read(&rt->dst.__refcnt) ||
 					(now - rt->dst.lastuse) < expire) {
 				rtp = &rt->dst.dn_next;
@@ -186,17 +188,19 @@ static void dn_dst_check_expire(unsigned long dummy)
 
 static int dn_dst_gc(struct dst_ops *ops)
 {
-	struct dn_route *rt, **rtp;
+	struct dn_route *rt;
+	struct dn_route __rcu **rtp;
 	int i;
 	unsigned long now = jiffies;
 	unsigned long expire = 10 * HZ;
 
-	for(i = 0; i <= dn_rt_hash_mask; i++) {
+	for (i = 0; i <= dn_rt_hash_mask; i++) {
 
 		spin_lock_bh(&dn_rt_hash_table[i].lock);
 		rtp = &dn_rt_hash_table[i].chain;
 
-		while((rt=*rtp) != NULL) {
+		while ((rt = rcu_dereference_protected(*rtp,
+						lockdep_is_held(&dn_rt_hash_table[i].lock))) != NULL) {
 			if (atomic_read(&rt->dst.__refcnt) ||
 					(now - rt->dst.lastuse) < expire) {
 				rtp = &rt->dst.dn_next;
@@ -227,7 +231,7 @@ static void dn_dst_update_pmtu(struct dst_entry *dst, u32 mtu)
 {
 	u32 min_mtu = 230;
 	struct dn_dev *dn = dst->neighbour ?
-			    (struct dn_dev *)dst->neighbour->dev->dn_ptr : NULL;
+			    rcu_dereference_raw(dst->neighbour->dev->dn_ptr) : NULL;
 
 	if (dn && dn->use_long == 0)
 		min_mtu -= 6;
@@ -277,13 +281,15 @@ static inline int compare_keys(struct flowi *fl1, struct flowi *fl2)
 
 static int dn_insert_route(struct dn_route *rt, unsigned hash, struct dn_route **rp)
 {
-	struct dn_route *rth, **rthp;
+	struct dn_route *rth;
+	struct dn_route __rcu **rthp;
 	unsigned long now = jiffies;
 
 	rthp = &dn_rt_hash_table[hash].chain;
 
 	spin_lock_bh(&dn_rt_hash_table[hash].lock);
-	while((rth = *rthp) != NULL) {
+	while ((rth = rcu_dereference_protected(*rthp,
+						lockdep_is_held(&dn_rt_hash_table[hash].lock))) != NULL) {
 		if (compare_keys(&rth->fl, &rt->fl)) {
 			/* Put it first */
 			*rthp = rth->dst.dn_next;
@@ -315,15 +321,15 @@ static void dn_run_flush(unsigned long dummy)
 	int i;
 	struct dn_route *rt, *next;
 
-	for(i = 0; i < dn_rt_hash_mask; i++) {
+	for (i = 0; i < dn_rt_hash_mask; i++) {
 		spin_lock_bh(&dn_rt_hash_table[i].lock);
 
-		if ((rt = xchg(&dn_rt_hash_table[i].chain, NULL)) == NULL)
+		if ((rt = xchg((struct dn_route **)&dn_rt_hash_table[i].chain, NULL)) == NULL)
 			goto nothing_to_declare;
 
-		for(; rt; rt=next) {
-			next = rt->dst.dn_next;
-			rt->dst.dn_next = NULL;
+		for(; rt; rt = next) {
+			next = rcu_dereference_raw(rt->dst.dn_next);
+			RCU_INIT_POINTER(rt->dst.dn_next, NULL);
 			dst_free((struct dst_entry *)rt);
 		}
 
@@ -458,15 +464,16 @@ static int dn_return_long(struct sk_buff *skb)
  */
 static int dn_route_rx_packet(struct sk_buff *skb)
 {
-	struct dn_skb_cb *cb = DN_SKB_CB(skb);
+	struct dn_skb_cb *cb;
 	int err;
 
 	if ((err = dn_route_input(skb)) == 0)
 		return dst_input(skb);
 
+	cb = DN_SKB_CB(skb);
 	if (decnet_debug_level & 4) {
 		char *devname = skb->dev ? skb->dev->name : "???";
-		struct dn_skb_cb *cb = DN_SKB_CB(skb);
+
 		printk(KERN_DEBUG
 			"DECnet: dn_route_rx_packet: rt_flags=0x%02x dev=%s len=%d src=0x%04hx dst=0x%04hx err=%d type=%d\n",
 			(int)cb->rt_flags, devname, skb->len,
@@ -573,7 +580,7 @@ int dn_route_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type
 	struct dn_skb_cb *cb;
 	unsigned char flags = 0;
 	__u16 len = le16_to_cpu(*(__le16 *)skb->data);
-	struct dn_dev *dn = (struct dn_dev *)dev->dn_ptr;
+	struct dn_dev *dn = rcu_dereference(dev->dn_ptr);
 	unsigned char padlen = 0;
 
 	if (!net_eq(dev_net(dev), &init_net))
@@ -728,7 +735,7 @@ static int dn_forward(struct sk_buff *skb)
 {
 	struct dn_skb_cb *cb = DN_SKB_CB(skb);
 	struct dst_entry *dst = skb_dst(skb);
-	struct dn_dev *dn_db = dst->dev->dn_ptr;
+	struct dn_dev *dn_db = rcu_dereference(dst->dev->dn_ptr);
 	struct dn_route *rt;
 	struct neighbour *neigh = dst->neighbour;
 	int header_len;
@@ -835,13 +842,16 @@ static inline int dn_match_addr(__le16 addr1, __le16 addr2)
 static __le16 dnet_select_source(const struct net_device *dev, __le16 daddr, int scope)
 {
 	__le16 saddr = 0;
-	struct dn_dev *dn_db = dev->dn_ptr;
+	struct dn_dev *dn_db;
 	struct dn_ifaddr *ifa;
 	int best_match = 0;
 	int ret;
 
-	read_lock(&dev_base_lock);
-	for(ifa = dn_db->ifa_list; ifa; ifa = ifa->ifa_next) {
+	rcu_read_lock();
+	dn_db = rcu_dereference(dev->dn_ptr);
+	for (ifa = rcu_dereference(dn_db->ifa_list);
+	     ifa != NULL;
+	     ifa = rcu_dereference(ifa->ifa_next)) {
 		if (ifa->ifa_scope > scope)
 			continue;
 		if (!daddr) {
@@ -854,7 +864,7 @@ static __le16 dnet_select_source(const struct net_device *dev, __le16 daddr, int
 		if (best_match == 0)
 			saddr = ifa->ifa_local;
 	}
-	read_unlock(&dev_base_lock);
+	rcu_read_unlock();
 
 	return saddr;
 }
@@ -1020,7 +1030,7 @@ source_ok:
 		err = -ENODEV;
 		if (dev_out == NULL)
 			goto out;
-		dn_db = dev_out->dn_ptr;
+		dn_db = rcu_dereference_raw(dev_out->dn_ptr);
 		/* Possible improvement - check all devices for local addr */
 		if (dn_dev_islocal(dev_out, fl.fld_dst)) {
 			dev_put(dev_out);
@@ -1233,7 +1243,7 @@ static int dn_route_input_slow(struct sk_buff *skb)
 
 	dev_hold(in_dev);
 
-	if ((dn_db = in_dev->dn_ptr) == NULL)
+	if ((dn_db = rcu_dereference(in_dev->dn_ptr)) == NULL)
 		goto out;
 
 	/* Zero source addresses are not allowed */
@@ -1677,15 +1687,15 @@ static struct dn_route *dn_rt_cache_get_next(struct seq_file *seq, struct dn_rou
 {
 	struct dn_rt_cache_iter_state *s = seq->private;
 
-	rt = rt->dst.dn_next;
-	while(!rt) {
+	rt = rcu_dereference_bh(rt->dst.dn_next);
+	while (!rt) {
 		rcu_read_unlock_bh();
 		if (--s->bucket < 0)
 			break;
 		rcu_read_lock_bh();
-		rt = dn_rt_hash_table[s->bucket].chain;
+		rt = rcu_dereference_bh(dn_rt_hash_table[s->bucket].chain);
 	}
-	return rcu_dereference_bh(rt);
+	return rt;
 }
 
 static void *dn_rt_cache_seq_start(struct seq_file *seq, loff_t *pos)
-- 
cgit v1.2.3-71-gd317


From 1da4b1c6a4dfb5a13d7147a27c1ac53fed09befd Mon Sep 17 00:00:00 2001
From: Feng Tang <feng.tang@intel.com>
Date: Tue, 9 Nov 2010 11:22:58 +0000
Subject: x86/mrst: Add SFI platform device parsing code
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

SFI provides a series of tables. These describe the platform devices present
including SPI and I²C devices, as well as various sensors, keypads and other
glue as well as interfaces provided via the SCU IPC mechanism (intel_scu_ipc.c)

This patch is a merge of the core elements and relevant fixes from the
Intel development code by Feng, Alek, myself into a single coherent patch
for upstream submission.

It provides the needed infrastructure to register I2C, SPI and platform devices
described by the tables, as well as handlers for some of the hardware already
supported in kernel. The 0.8 firmware also provides GPIO tables.

Devices are created at boot time or if they are SCU dependant at the point an
SCU is discovered. The existing Linux device mechanisms will then handle the
device binding. At an abstract level this is an SFI to Linux device translator.

Device/platform specific setup/glue is in this file. This is done so that the
drivers for the generic I²C and SPI bus devices remain cross platform as they
should.

(Updated from RFC version to correct the emc1403 name used by the firmware
 and a wrongly used #define)

Signed-off-by: Alek Du <alek.du@linux.intel.com>
LKML-Reference: <20101109112158.20013.6158.stgit@localhost.localdomain>
[Clean ups, removal of 0.7 support]
Signed-off-by: Feng Tang <feng.tang@linux.intel.com>
[Clean ups]
Signed-off-by: Alan Cox <alan@linux.intel.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/Kconfig                     |   2 +
 arch/x86/include/asm/mrst.h          |   4 +
 arch/x86/platform/mrst/mrst.c        | 515 ++++++++++++++++++++++++++++++++++-
 drivers/platform/x86/intel_scu_ipc.c |   5 +
 include/linux/sfi.h                  |   8 +-
 5 files changed, 527 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index e8327686d3c5..b306b84fc8c8 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -385,6 +385,8 @@ config X86_MRST
 	depends on X86_EXTENDED_PLATFORM
 	depends on X86_IO_APIC
 	select APB_TIMER
+	select I2C
+	select SPI
 	---help---
 	  Moorestown is Intel's Low Power Intel Architecture (LPIA) based Moblin
 	  Internet Device(MID) platform. Moorestown consists of two chips:
diff --git a/arch/x86/include/asm/mrst.h b/arch/x86/include/asm/mrst.h
index 4a711a684b17..283debd29fc0 100644
--- a/arch/x86/include/asm/mrst.h
+++ b/arch/x86/include/asm/mrst.h
@@ -50,4 +50,8 @@ extern void mrst_early_console_init(void);
 
 extern struct console early_hsu_console;
 extern void hsu_early_console_init(void);
+
+extern void intel_scu_devices_create(void);
+extern void intel_scu_devices_destroy(void);
+
 #endif /* _ASM_X86_MRST_H */
diff --git a/arch/x86/platform/mrst/mrst.c b/arch/x86/platform/mrst/mrst.c
index 79ae68154e87..cfa1af24edd5 100644
--- a/arch/x86/platform/mrst/mrst.c
+++ b/arch/x86/platform/mrst/mrst.c
@@ -9,9 +9,19 @@
  * as published by the Free Software Foundation; version 2
  * of the License.
  */
+
+#define pr_fmt(fmt) "mrst: " fmt
+
 #include <linux/init.h>
 #include <linux/kernel.h>
 #include <linux/sfi.h>
+#include <linux/intel_pmic_gpio.h>
+#include <linux/spi/spi.h>
+#include <linux/i2c.h>
+#include <linux/i2c/pca953x.h>
+#include <linux/gpio_keys.h>
+#include <linux/input.h>
+#include <linux/platform_device.h>
 #include <linux/irq.h>
 #include <linux/module.h>
 
@@ -23,8 +33,10 @@
 #include <asm/mrst.h>
 #include <asm/io.h>
 #include <asm/i8259.h>
+#include <asm/intel_scu_ipc.h>
 #include <asm/apb_timer.h>
 
+
 /*
  * the clockevent devices on Moorestown/Medfield can be APBT or LAPIC clock,
  * cmdline option x86_mrst_timer can be used to override the configuration
@@ -102,10 +114,10 @@ static int __init sfi_parse_mtmr(struct sfi_table_header *table)
 		memcpy(sfi_mtimer_array, pentry, totallen);
 	}
 
-	printk(KERN_INFO "SFI: MTIMER info (num = %d):\n", sfi_mtimer_num);
+	pr_debug("SFI MTIMER info (num = %d):\n", sfi_mtimer_num);
 	pentry = sfi_mtimer_array;
 	for (totallen = 0; totallen < sfi_mtimer_num; totallen++, pentry++) {
-		printk(KERN_INFO "timer[%d]: paddr = 0x%08x, freq = %dHz,"
+		pr_debug("timer[%d]: paddr = 0x%08x, freq = %dHz,"
 			" irq = %d\n", totallen, (u32)pentry->phys_addr,
 			pentry->freq_hz, pentry->irq);
 			if (!pentry->irq)
@@ -176,10 +188,10 @@ int __init sfi_parse_mrtc(struct sfi_table_header *table)
 		memcpy(sfi_mrtc_array, pentry, totallen);
 	}
 
-	printk(KERN_INFO "SFI: RTC info (num = %d):\n", sfi_mrtc_num);
+	pr_debug("SFI RTC info (num = %d):\n", sfi_mrtc_num);
 	pentry = sfi_mrtc_array;
 	for (totallen = 0; totallen < sfi_mrtc_num; totallen++, pentry++) {
-		printk(KERN_INFO "RTC[%d]: paddr = 0x%08x, irq = %d\n",
+		pr_debug("RTC[%d]: paddr = 0x%08x, irq = %d\n",
 			totallen, (u32)pentry->phys_addr, pentry->irq);
 		mp_irq.type = MP_IOAPIC;
 		mp_irq.irqtype = mp_INT;
@@ -309,3 +321,498 @@ static inline int __init setup_x86_mrst_timer(char *arg)
 	return 0;
 }
 __setup("x86_mrst_timer=", setup_x86_mrst_timer);
+
+/*
+ * Parsing GPIO table first, since the DEVS table will need this table
+ * to map the pin name to the actual pin.
+ */
+static struct sfi_gpio_table_entry *gpio_table;
+static int gpio_num_entry;
+
+static int __init sfi_parse_gpio(struct sfi_table_header *table)
+{
+	struct sfi_table_simple *sb;
+	struct sfi_gpio_table_entry *pentry;
+	int num, i;
+
+	if (gpio_table)
+		return 0;
+	sb = (struct sfi_table_simple *)table;
+	num = SFI_GET_NUM_ENTRIES(sb, struct sfi_gpio_table_entry);
+	pentry = (struct sfi_gpio_table_entry *)sb->pentry;
+
+	gpio_table = (struct sfi_gpio_table_entry *)
+				kmalloc(num * sizeof(*pentry), GFP_KERNEL);
+	if (!gpio_table)
+		return -1;
+	memcpy(gpio_table, pentry, num * sizeof(*pentry));
+	gpio_num_entry = num;
+
+	pr_debug("GPIO pin info:\n");
+	for (i = 0; i < num; i++, pentry++)
+		pr_debug("info[%2d]: controller = %16.16s, pin_name = %16.16s,"
+		" pin = %d\n", i,
+			pentry->controller_name,
+			pentry->pin_name,
+			pentry->pin_no);
+	return 0;
+}
+
+static int get_gpio_by_name(const char *name)
+{
+	struct sfi_gpio_table_entry *pentry = gpio_table;
+	int i;
+
+	if (!pentry)
+		return -1;
+	for (i = 0; i < gpio_num_entry; i++, pentry++) {
+		if (!strncmp(name, pentry->pin_name, SFI_NAME_LEN))
+			return pentry->pin_no;
+	}
+	return -1;
+}
+
+/*
+ * Here defines the array of devices platform data that IAFW would export
+ * through SFI "DEVS" table, we use name and type to match the device and
+ * its platform data.
+ */
+struct devs_id {
+	char name[SFI_NAME_LEN + 1];
+	u8 type;
+	u8 delay;
+	void *(*get_platform_data)(void *info);
+};
+
+/* the offset for the mapping of global gpio pin to irq */
+#define MRST_IRQ_OFFSET 0x100
+
+static void __init *pmic_gpio_platform_data(void *info)
+{
+	static struct intel_pmic_gpio_platform_data pmic_gpio_pdata;
+	int gpio_base = get_gpio_by_name("pmic_gpio_base");
+
+	if (gpio_base == -1)
+		gpio_base = 64;
+	pmic_gpio_pdata.gpio_base = gpio_base;
+	pmic_gpio_pdata.irq_base = gpio_base + MRST_IRQ_OFFSET;
+	pmic_gpio_pdata.gpiointr = 0xffffeff8;
+
+	return &pmic_gpio_pdata;
+}
+
+static void __init *max3111_platform_data(void *info)
+{
+	struct spi_board_info *spi_info = info;
+	int intr = get_gpio_by_name("max3111_int");
+
+	if (intr == -1)
+		return NULL;
+	spi_info->irq = intr + MRST_IRQ_OFFSET;
+	return NULL;
+}
+
+/* we have multiple max7315 on the board ... */
+#define MAX7315_NUM 2
+static void __init *max7315_platform_data(void *info)
+{
+	static struct pca953x_platform_data max7315_pdata[MAX7315_NUM];
+	static int nr;
+	struct pca953x_platform_data *max7315 = &max7315_pdata[nr];
+	struct i2c_board_info *i2c_info = info;
+	int gpio_base, intr;
+	char base_pin_name[SFI_NAME_LEN + 1];
+	char intr_pin_name[SFI_NAME_LEN + 1];
+
+	if (nr == MAX7315_NUM) {
+		pr_err("too many max7315s, we only support %d\n",
+				MAX7315_NUM);
+		return NULL;
+	}
+	/* we have several max7315 on the board, we only need load several
+	 * instances of the same pca953x driver to cover them
+	 */
+	strcpy(i2c_info->type, "max7315");
+	if (nr++) {
+		sprintf(base_pin_name, "max7315_%d_base", nr);
+		sprintf(intr_pin_name, "max7315_%d_int", nr);
+	} else {
+		strcpy(base_pin_name, "max7315_base");
+		strcpy(intr_pin_name, "max7315_int");
+	}
+
+	gpio_base = get_gpio_by_name(base_pin_name);
+	intr = get_gpio_by_name(intr_pin_name);
+
+	if (gpio_base == -1)
+		return NULL;
+	max7315->gpio_base = gpio_base;
+	if (intr != -1) {
+		i2c_info->irq = intr + MRST_IRQ_OFFSET;
+		max7315->irq_base = gpio_base + MRST_IRQ_OFFSET;
+	} else {
+		i2c_info->irq = -1;
+		max7315->irq_base = -1;
+	}
+	return max7315;
+}
+
+static void __init *emc1403_platform_data(void *info)
+{
+	static short intr2nd_pdata;
+	struct i2c_board_info *i2c_info = info;
+	int intr = get_gpio_by_name("thermal_int");
+	int intr2nd = get_gpio_by_name("thermal_alert");
+
+	if (intr == -1 || intr2nd == -1)
+		return NULL;
+
+	i2c_info->irq = intr + MRST_IRQ_OFFSET;
+	intr2nd_pdata = intr2nd + MRST_IRQ_OFFSET;
+
+	return &intr2nd_pdata;
+}
+
+static void __init *lis331dl_platform_data(void *info)
+{
+	static short intr2nd_pdata;
+	struct i2c_board_info *i2c_info = info;
+	int intr = get_gpio_by_name("accel_int");
+	int intr2nd = get_gpio_by_name("accel_2");
+
+	if (intr == -1 || intr2nd == -1)
+		return NULL;
+
+	i2c_info->irq = intr + MRST_IRQ_OFFSET;
+	intr2nd_pdata = intr2nd + MRST_IRQ_OFFSET;
+
+	return &intr2nd_pdata;
+}
+
+static const struct devs_id __initconst device_ids[] = {
+	{"pmic_gpio", SFI_DEV_TYPE_SPI, 1, &pmic_gpio_platform_data},
+	{"spi_max3111", SFI_DEV_TYPE_SPI, 0, &max3111_platform_data},
+	{"i2c_max7315", SFI_DEV_TYPE_I2C, 1, &max7315_platform_data},
+	{"i2c_max7315_2", SFI_DEV_TYPE_I2C, 1, &max7315_platform_data},
+	{"emc1403", SFI_DEV_TYPE_I2C, 1, &emc1403_platform_data},
+	{"i2c_accel", SFI_DEV_TYPE_I2C, 0, &lis331dl_platform_data},
+	{},
+};
+
+#define MAX_IPCDEVS	24
+static struct platform_device *ipc_devs[MAX_IPCDEVS];
+static int ipc_next_dev;
+
+#define MAX_SCU_SPI	24
+static struct spi_board_info *spi_devs[MAX_SCU_SPI];
+static int spi_next_dev;
+
+#define MAX_SCU_I2C	24
+static struct i2c_board_info *i2c_devs[MAX_SCU_I2C];
+static int i2c_bus[MAX_SCU_I2C];
+static int i2c_next_dev;
+
+static void __init intel_scu_device_register(struct platform_device *pdev)
+{
+	if(ipc_next_dev == MAX_IPCDEVS)
+		pr_err("too many SCU IPC devices");
+	else
+		ipc_devs[ipc_next_dev++] = pdev;
+}
+
+static void __init intel_scu_spi_device_register(struct spi_board_info *sdev)
+{
+	struct spi_board_info *new_dev;
+
+	if (spi_next_dev == MAX_SCU_SPI) {
+		pr_err("too many SCU SPI devices");
+		return;
+	}
+
+	new_dev = kzalloc(sizeof(*sdev), GFP_KERNEL);
+	if (!new_dev) {
+		pr_err("failed to alloc mem for delayed spi dev %s\n",
+			sdev->modalias);
+		return;
+	}
+	memcpy(new_dev, sdev, sizeof(*sdev));
+
+	spi_devs[spi_next_dev++] = new_dev;
+}
+
+static void __init intel_scu_i2c_device_register(int bus,
+						struct i2c_board_info *idev)
+{
+	struct i2c_board_info *new_dev;
+
+	if (i2c_next_dev == MAX_SCU_I2C) {
+		pr_err("too many SCU I2C devices");
+		return;
+	}
+
+	new_dev = kzalloc(sizeof(*idev), GFP_KERNEL);
+	if (!new_dev) {
+		pr_err("failed to alloc mem for delayed i2c dev %s\n",
+			idev->type);
+		return;
+	}
+	memcpy(new_dev, idev, sizeof(*idev));
+
+	i2c_bus[i2c_next_dev] = bus;
+	i2c_devs[i2c_next_dev++] = new_dev;
+}
+
+/* Called by IPC driver */
+void intel_scu_devices_create(void)
+{
+	int i;
+
+	for (i = 0; i < ipc_next_dev; i++)
+		platform_device_add(ipc_devs[i]);
+
+	for (i = 0; i < spi_next_dev; i++)
+		spi_register_board_info(spi_devs[i], 1);
+
+	for (i = 0; i < i2c_next_dev; i++) {
+		struct i2c_adapter *adapter;
+		struct i2c_client *client;
+
+		adapter = i2c_get_adapter(i2c_bus[i]);
+		if (adapter) {
+			client = i2c_new_device(adapter, i2c_devs[i]);
+			if (!client)
+				pr_err("can't create i2c device %s\n",
+					i2c_devs[i]->type);
+		} else
+			i2c_register_board_info(i2c_bus[i], i2c_devs[i], 1);
+	}
+}
+EXPORT_SYMBOL_GPL(intel_scu_devices_create);
+
+/* Called by IPC driver */
+void intel_scu_devices_destroy(void)
+{
+	int i;
+
+	for (i = 0; i < ipc_next_dev; i++)
+		platform_device_del(ipc_devs[i]);
+}
+EXPORT_SYMBOL_GPL(intel_scu_devices_destroy);
+
+static void __init install_irq_resource(struct platform_device *pdev, int irq)
+{
+	/* Single threaded */
+	static struct resource __initdata res = {
+		.name = "IRQ",
+		.flags = IORESOURCE_IRQ,
+	};
+	res.start = irq;
+	platform_device_add_resources(pdev, &res, 1);
+}
+
+static void __init sfi_handle_ipc_dev(struct platform_device *pdev)
+{
+	const struct devs_id *dev = device_ids;
+	void *pdata = NULL;
+
+	while (dev->name[0]) {
+		if (dev->type == SFI_DEV_TYPE_IPC &&
+			!strncmp(dev->name, pdev->name, SFI_NAME_LEN)) {
+			pdata = dev->get_platform_data(pdev);
+			break;
+		}
+		dev++;
+	}
+	pdev->dev.platform_data = pdata;
+	intel_scu_device_register(pdev);
+}
+
+static void __init sfi_handle_spi_dev(struct spi_board_info *spi_info)
+{
+	const struct devs_id *dev = device_ids;
+	void *pdata = NULL;
+
+	while (dev->name[0]) {
+		if (dev->type == SFI_DEV_TYPE_SPI &&
+				!strncmp(dev->name, spi_info->modalias, SFI_NAME_LEN)) {
+			pdata = dev->get_platform_data(spi_info);
+			break;
+		}
+		dev++;
+	}
+	spi_info->platform_data = pdata;
+	if (dev->delay)
+		intel_scu_spi_device_register(spi_info);
+	else
+		spi_register_board_info(spi_info, 1);
+}
+
+static void __init sfi_handle_i2c_dev(int bus, struct i2c_board_info *i2c_info)
+{
+	const struct devs_id *dev = device_ids;
+	void *pdata = NULL;
+
+	while (dev->name[0]) {
+		if (dev->type == SFI_DEV_TYPE_I2C &&
+			!strncmp(dev->name, i2c_info->type, SFI_NAME_LEN)) {
+			pdata = dev->get_platform_data(i2c_info);
+			break;
+		}
+		dev++;
+	}
+	i2c_info->platform_data = pdata;
+
+	if (dev->delay)
+		intel_scu_i2c_device_register(bus, i2c_info);
+	else
+		i2c_register_board_info(bus, i2c_info, 1);
+ }
+
+
+static int __init sfi_parse_devs(struct sfi_table_header *table)
+{
+	struct sfi_table_simple *sb;
+	struct sfi_device_table_entry *pentry;
+	struct spi_board_info spi_info;
+	struct i2c_board_info i2c_info;
+	struct platform_device *pdev;
+	int num, i, bus;
+	int ioapic;
+	struct io_apic_irq_attr irq_attr;
+
+	sb = (struct sfi_table_simple *)table;
+	num = SFI_GET_NUM_ENTRIES(sb, struct sfi_device_table_entry);
+	pentry = (struct sfi_device_table_entry *)sb->pentry;
+
+	for (i = 0; i < num; i++, pentry++) {
+		if (pentry->irq != (u8)0xff) { /* native RTE case */
+			/* these SPI2 devices are not exposed to system as PCI
+			 * devices, but they have separate RTE entry in IOAPIC
+			 * so we have to enable them one by one here
+			 */
+			ioapic = mp_find_ioapic(pentry->irq);
+			irq_attr.ioapic = ioapic;
+			irq_attr.ioapic_pin = pentry->irq;
+			irq_attr.trigger = 1;
+			irq_attr.polarity = 1;
+			io_apic_set_pci_routing(NULL, pentry->irq, &irq_attr);
+		}
+		switch (pentry->type) {
+		case SFI_DEV_TYPE_IPC:
+			/* ID as IRQ is a hack that will go away */
+			pdev = platform_device_alloc(pentry->name, pentry->irq);
+			if (pdev == NULL) {
+				pr_err("out of memory for SFI platform device '%s'.\n",
+							pentry->name);
+				continue;
+			}
+			install_irq_resource(pdev, pentry->irq);
+			pr_debug("info[%2d]: IPC bus, name = %16.16s, "
+				"irq = 0x%2x\n", i, pentry->name, pentry->irq);
+			sfi_handle_ipc_dev(pdev);
+			break;
+		case SFI_DEV_TYPE_SPI:
+			memset(&spi_info, 0, sizeof(spi_info));
+			strncpy(spi_info.modalias, pentry->name, SFI_NAME_LEN);
+			spi_info.irq = pentry->irq;
+			spi_info.bus_num = pentry->host_num;
+			spi_info.chip_select = pentry->addr;
+			spi_info.max_speed_hz = pentry->max_freq;
+			pr_debug("info[%2d]: SPI bus = %d, name = %16.16s, "
+				"irq = 0x%2x, max_freq = %d, cs = %d\n", i,
+				spi_info.bus_num,
+				spi_info.modalias,
+				spi_info.irq,
+				spi_info.max_speed_hz,
+				spi_info.chip_select);
+			sfi_handle_spi_dev(&spi_info);
+			break;
+		case SFI_DEV_TYPE_I2C:
+			memset(&i2c_info, 0, sizeof(i2c_info));
+			bus = pentry->host_num;
+			strncpy(i2c_info.type, pentry->name, SFI_NAME_LEN);
+			i2c_info.irq = pentry->irq;
+			i2c_info.addr = pentry->addr;
+			pr_debug("info[%2d]: I2C bus = %d, name = %16.16s, "
+				"irq = 0x%2x, addr = 0x%x\n", i, bus,
+				i2c_info.type,
+				i2c_info.irq,
+				i2c_info.addr);
+			sfi_handle_i2c_dev(bus, &i2c_info);
+			break;
+		case SFI_DEV_TYPE_UART:
+		case SFI_DEV_TYPE_HSI:
+		default:
+			;
+		}
+	}
+	return 0;
+}
+
+static int __init mrst_platform_init(void)
+{
+	sfi_table_parse(SFI_SIG_GPIO, NULL, NULL, sfi_parse_gpio);
+	sfi_table_parse(SFI_SIG_DEVS, NULL, NULL, sfi_parse_devs);
+	return 0;
+}
+arch_initcall(mrst_platform_init);
+
+/*
+ * we will search these buttons in SFI GPIO table (by name)
+ * and register them dynamically. Please add all possible
+ * buttons here, we will shrink them if no GPIO found.
+ */
+static struct gpio_keys_button gpio_button[] = {
+	{KEY_POWER,		-1, 1, "power_btn",	EV_KEY, 0, 3000},
+	{KEY_PROG1,		-1, 1, "prog_btn1",	EV_KEY, 0, 20},
+	{KEY_PROG2,		-1, 1, "prog_btn2",	EV_KEY, 0, 20},
+	{SW_LID,		-1, 1, "lid_switch",	EV_SW,  0, 20},
+	{KEY_VOLUMEUP,		-1, 1, "vol_up",	EV_KEY, 0, 20},
+	{KEY_VOLUMEDOWN,	-1, 1, "vol_down",	EV_KEY, 0, 20},
+	{KEY_CAMERA,		-1, 1, "camera_full",	EV_KEY, 0, 20},
+	{KEY_CAMERA_FOCUS,	-1, 1, "camera_half",	EV_KEY, 0, 20},
+	{SW_KEYPAD_SLIDE,	-1, 1, "MagSw1",	EV_SW,  0, 20},
+	{SW_KEYPAD_SLIDE,	-1, 1, "MagSw2",	EV_SW,  0, 20},
+};
+
+static struct gpio_keys_platform_data mrst_gpio_keys = {
+	.buttons	= gpio_button,
+	.rep		= 1,
+	.nbuttons	= -1, /* will fill it after search */
+};
+
+static struct platform_device pb_device = {
+	.name		= "gpio-keys",
+	.id		= -1,
+	.dev		= {
+		.platform_data	= &mrst_gpio_keys,
+	},
+};
+
+/*
+ * Shrink the non-existent buttons, register the gpio button
+ * device if there is some
+ */
+static int __init pb_keys_init(void)
+{
+	struct gpio_keys_button *gb = gpio_button;
+	int i, num, good = 0;
+
+	num = sizeof(gpio_button) / sizeof(struct gpio_keys_button);
+	for (i = 0; i < num; i++) {
+		gb[i].gpio = get_gpio_by_name(gb[i].desc);
+		if (gb[i].gpio == -1)
+			continue;
+
+		if (i != good)
+			gb[good] = gb[i];
+		good++;
+	}
+
+	if (good) {
+		mrst_gpio_keys.nbuttons = good;
+		return platform_device_register(&pb_device);
+	}
+	return 0;
+}
+late_initcall(pb_keys_init);
diff --git a/drivers/platform/x86/intel_scu_ipc.c b/drivers/platform/x86/intel_scu_ipc.c
index 41a9e34899ac..ca35b0ce944a 100644
--- a/drivers/platform/x86/intel_scu_ipc.c
+++ b/drivers/platform/x86/intel_scu_ipc.c
@@ -26,6 +26,7 @@
 #include <linux/sfi.h>
 #include <asm/mrst.h>
 #include <asm/intel_scu_ipc.h>
+#include <asm/mrst.h>
 
 /* IPC defines the following message types */
 #define IPCMSG_WATCHDOG_TIMER 0xF8 /* Set Kernel Watchdog Threshold */
@@ -699,6 +700,9 @@ static int ipc_probe(struct pci_dev *dev, const struct pci_device_id *id)
 		iounmap(ipcdev.ipc_base);
 		return -ENOMEM;
 	}
+
+	intel_scu_devices_create();
+
 	return 0;
 }
 
@@ -720,6 +724,7 @@ static void ipc_remove(struct pci_dev *pdev)
 	iounmap(ipcdev.ipc_base);
 	iounmap(ipcdev.i2c_base);
 	ipcdev.pdev = NULL;
+	intel_scu_devices_destroy();
 }
 
 static const struct pci_device_id pci_ids[] = {
diff --git a/include/linux/sfi.h b/include/linux/sfi.h
index 7f770c638e99..fe817918b30e 100644
--- a/include/linux/sfi.h
+++ b/include/linux/sfi.h
@@ -77,6 +77,8 @@
 #define SFI_OEM_ID_SIZE		6
 #define SFI_OEM_TABLE_ID_SIZE	8
 
+#define SFI_NAME_LEN		16
+
 #define SFI_SYST_SEARCH_BEGIN		0x000E0000
 #define SFI_SYST_SEARCH_END		0x000FFFFF
 
@@ -156,13 +158,13 @@ struct sfi_device_table_entry {
 	u16	addr;
 	u8	irq;
 	u32	max_freq;
-	char	name[16];
+	char	name[SFI_NAME_LEN];
 } __packed;
 
 struct sfi_gpio_table_entry {
-	char	controller_name[16];
+	char	controller_name[SFI_NAME_LEN];
 	u16	pin_no;
-	char	pin_name[16];
+	char	pin_name[SFI_NAME_LEN];
 } __packed;
 
 typedef int (*sfi_table_handler) (struct sfi_table_header *table);
-- 
cgit v1.2.3-71-gd317


From 751305d9b2fd3e03eaab7808e976241d85ca4353 Mon Sep 17 00:00:00 2001
From: Daniel Drake <dsd@laptop.org>
Date: Thu, 28 Oct 2010 18:23:01 +0100
Subject: viafb: General power management infrastructure

Multiple devices need S/R hooks (framebuffer, GPIO, camera).
Add infrastructure and convert existing framebuffer code to the new
model.

This patch should create no functional change.
Based on earlier work by Jonathan Corbet.

Signed-off-by: Daniel Drake <dsd@laptop.org>
Acked-by: Jonathan Corbet <corbet@lwn.net>
Signed-off-by: Florian Tobias Schandinat <FlorianSchandinat@gmx.de>
---
 drivers/video/via/via-core.c | 79 ++++++++++++++++++++++++++++++++++++++++++--
 drivers/video/via/viafbdev.c | 34 +++++++++----------
 drivers/video/via/viafbdev.h |  2 --
 include/linux/via-core.h     | 15 +++++++++
 4 files changed, 107 insertions(+), 23 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/video/via/via-core.c b/drivers/video/via/via-core.c
index 31e30338e893..42be3d955887 100644
--- a/drivers/video/via/via-core.c
+++ b/drivers/video/via/via-core.c
@@ -15,6 +15,8 @@
 #include <linux/module.h>
 #include <linux/interrupt.h>
 #include <linux/platform_device.h>
+#include <linux/list.h>
+#include <linux/pm.h>
 
 /*
  * The default port config.
@@ -563,6 +565,78 @@ static void via_teardown_subdevs(void)
 		}
 }
 
+/*
+ * Power management functions
+ */
+#ifdef CONFIG_PM
+static LIST_HEAD(viafb_pm_hooks);
+static DEFINE_MUTEX(viafb_pm_hooks_lock);
+
+void viafb_pm_register(struct viafb_pm_hooks *hooks)
+{
+	INIT_LIST_HEAD(&hooks->list);
+
+	mutex_lock(&viafb_pm_hooks_lock);
+	list_add_tail(&hooks->list, &viafb_pm_hooks);
+	mutex_unlock(&viafb_pm_hooks_lock);
+}
+EXPORT_SYMBOL_GPL(viafb_pm_register);
+
+void viafb_pm_unregister(struct viafb_pm_hooks *hooks)
+{
+	mutex_lock(&viafb_pm_hooks_lock);
+	list_del(&hooks->list);
+	mutex_unlock(&viafb_pm_hooks_lock);
+}
+EXPORT_SYMBOL_GPL(viafb_pm_unregister);
+
+static int via_suspend(struct pci_dev *pdev, pm_message_t state)
+{
+	struct viafb_pm_hooks *hooks;
+
+	if (state.event != PM_EVENT_SUSPEND)
+		return 0;
+	/*
+	 * "I've occasionally hit a few drivers that caused suspend
+	 * failures, and each and every time it was a driver bug, and
+	 * the right thing to do was to just ignore the error and suspend
+	 * anyway - returning an error code and trying to undo the suspend
+	 * is not what anybody ever really wants, even if our model
+	 *_allows_ for it."
+	 * -- Linus Torvalds, Dec. 7, 2009
+	 */
+	mutex_lock(&viafb_pm_hooks_lock);
+	list_for_each_entry_reverse(hooks, &viafb_pm_hooks, list)
+		hooks->suspend(hooks->private);
+	mutex_unlock(&viafb_pm_hooks_lock);
+
+	pci_save_state(pdev);
+	pci_disable_device(pdev);
+	pci_set_power_state(pdev, pci_choose_state(pdev, state));
+	return 0;
+}
+
+static int via_resume(struct pci_dev *pdev)
+{
+	struct viafb_pm_hooks *hooks;
+
+	/* Get the bus side powered up */
+	pci_set_power_state(pdev, PCI_D0);
+	pci_restore_state(pdev);
+	if (pci_enable_device(pdev))
+		return 0;
+
+	pci_set_master(pdev);
+
+	/* Now bring back any subdevs */
+	mutex_lock(&viafb_pm_hooks_lock);
+	list_for_each_entry(hooks, &viafb_pm_hooks, list)
+		hooks->resume(hooks->private);
+	mutex_unlock(&viafb_pm_hooks_lock);
+
+	return 0;
+}
+#endif /* CONFIG_PM */
 
 static int __devinit via_pci_probe(struct pci_dev *pdev,
 		const struct pci_device_id *ent)
@@ -572,6 +646,7 @@ static int __devinit via_pci_probe(struct pci_dev *pdev,
 	ret = pci_enable_device(pdev);
 	if (ret)
 		return ret;
+
 	/*
 	 * Global device initialization.
 	 */
@@ -651,8 +726,8 @@ static struct pci_driver via_driver = {
 	.probe		= via_pci_probe,
 	.remove		= __devexit_p(via_pci_remove),
 #ifdef CONFIG_PM
-	.suspend	= viafb_suspend,
-	.resume		= viafb_resume,
+	.suspend	= via_suspend,
+	.resume		= via_resume,
 #endif
 };
 
diff --git a/drivers/video/via/viafbdev.c b/drivers/video/via/viafbdev.c
index d298cfccd6fc..289edd519527 100644
--- a/drivers/video/via/viafbdev.c
+++ b/drivers/video/via/viafbdev.c
@@ -1672,31 +1672,19 @@ static int parse_mode(const char *str, u32 *xres, u32 *yres)
 
 
 #ifdef CONFIG_PM
-int viafb_suspend(struct pci_dev *pdev, pm_message_t state)
+static int viafb_suspend(void *unused)
 {
-	if (state.event == PM_EVENT_SUSPEND) {
-		acquire_console_sem();
-		fb_set_suspend(viafbinfo, 1);
-
-		viafb_sync(viafbinfo);
-
-		pci_save_state(pdev);
-		pci_disable_device(pdev);
-		pci_set_power_state(pdev, pci_choose_state(pdev, state));
-		release_console_sem();
-	}
+	acquire_console_sem();
+	fb_set_suspend(viafbinfo, 1);
+	viafb_sync(viafbinfo);
+	release_console_sem();
 
 	return 0;
 }
 
-int viafb_resume(struct pci_dev *pdev)
+static int viafb_resume(void *unused)
 {
 	acquire_console_sem();
-	pci_set_power_state(pdev, PCI_D0);
-	pci_restore_state(pdev);
-	if (pci_enable_device(pdev))
-		goto fail;
-	pci_set_master(pdev);
 	if (viaparinfo->shared->vdev->engine_mmio)
 		viafb_reset_engine(viaparinfo);
 	viafb_set_par(viafbinfo);
@@ -1704,11 +1692,15 @@ int viafb_resume(struct pci_dev *pdev)
 		viafb_set_par(viafbinfo1);
 	fb_set_suspend(viafbinfo, 0);
 
-fail:
 	release_console_sem();
 	return 0;
 }
 
+static struct viafb_pm_hooks viafb_fb_pm_hooks = {
+	.suspend = viafb_suspend,
+	.resume = viafb_resume
+};
+
 #endif
 
 
@@ -1899,6 +1891,10 @@ int __devinit via_fb_pci_probe(struct viafb_dev *vdev)
 
 	viafb_init_proc(viaparinfo->shared);
 	viafb_init_dac(IGA2);
+
+#ifdef CONFIG_PM
+	viafb_pm_register(&viafb_fb_pm_hooks);
+#endif
 	return 0;
 
 out_fb_unreg:
diff --git a/drivers/video/via/viafbdev.h b/drivers/video/via/viafbdev.h
index 4960e3da6645..d66f963e930e 100644
--- a/drivers/video/via/viafbdev.h
+++ b/drivers/video/via/viafbdev.h
@@ -108,6 +108,4 @@ void via_fb_pci_remove(struct pci_dev *pdev);
 /* Temporary */
 int viafb_init(void);
 void viafb_exit(void);
-int viafb_suspend(struct pci_dev *pdev, pm_message_t state);
-int viafb_resume(struct pci_dev *pdev);
 #endif /* __VIAFBDEV_H__ */
diff --git a/include/linux/via-core.h b/include/linux/via-core.h
index 7ffb521e1a7a..a4327a0c8efc 100644
--- a/include/linux/via-core.h
+++ b/include/linux/via-core.h
@@ -59,6 +59,21 @@ struct via_port_cfg {
 	u8			ioport_index;
 };
 
+/*
+ * Allow subdevs to register suspend/resume hooks.
+ */
+#ifdef CONFIG_PM
+struct viafb_pm_hooks {
+	struct list_head list;
+	int (*suspend)(void *private);
+	int (*resume)(void *private);
+	void *private;
+};
+
+void viafb_pm_register(struct viafb_pm_hooks *hooks);
+void viafb_pm_unregister(struct viafb_pm_hooks *hooks);
+#endif /* CONFIG_PM */
+
 /*
  * This is the global viafb "device" containing stuff needed by
  * all subdevs.
-- 
cgit v1.2.3-71-gd317


From 4d17aeb1c5b2375769446d13012a98e6d265ec13 Mon Sep 17 00:00:00 2001
From: Paul Walmsley <paul@pwsan.com>
Date: Tue, 21 Sep 2010 19:37:15 +0530
Subject: OMAP: I2C: split device registration and convert OMAP2+ to
 omap_device

Split the OMAP1 and OMAP2+ platform_device build and register code.
Convert the OMAP2+ variant to use omap_device.

This patch was developed in collaboration with Rajendra Nayak
<rnayak@ti.com>.

Signed-off-by: Paul Walmsley <paul@pwsan.com>
Signed-off-by: Rajendra Nayak <rnayak@ti.com>
Cc: Kevin Hilman <khilman@deeprootsystems.com>
Signed-off-by: Kevin Hilman <khilman@deeprootsystems.com>
---
 arch/arm/plat-omap/i2c.c | 124 +++++++++++++++++++----------------------------
 include/linux/i2c-omap.h |   5 ++
 2 files changed, 54 insertions(+), 75 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm/plat-omap/i2c.c b/arch/arm/plat-omap/i2c.c
index a5ce4f0aad35..a5bff9ce7cbe 100644
--- a/arch/arm/plat-omap/i2c.c
+++ b/arch/arm/plat-omap/i2c.c
@@ -27,18 +27,18 @@
 #include <linux/platform_device.h>
 #include <linux/i2c.h>
 #include <linux/i2c-omap.h>
+#include <linux/slab.h>
+#include <linux/err.h>
+#include <linux/clk.h>
 
 #include <mach/irqs.h>
 #include <plat/mux.h>
 #include <plat/i2c.h>
 #include <plat/omap-pm.h>
+#include <plat/omap_device.h>
 
 #define OMAP_I2C_SIZE		0x3f
 #define OMAP1_I2C_BASE		0xfffb3800
-#define OMAP2_I2C_BASE1		0x48070000
-#define OMAP2_I2C_BASE2		0x48072000
-#define OMAP2_I2C_BASE3		0x48060000
-#define OMAP4_I2C_BASE4		0x48350000
 
 static const char name[] = "i2c_omap";
 
@@ -55,15 +55,6 @@ static const char name[] = "i2c_omap";
 
 static struct resource i2c_resources[][2] = {
 	{ I2C_RESOURCE_BUILDER(0, 0) },
-#if	defined(CONFIG_ARCH_OMAP2PLUS)
-	{ I2C_RESOURCE_BUILDER(OMAP2_I2C_BASE2, 0) },
-#endif
-#if	defined(CONFIG_ARCH_OMAP3) || defined(CONFIG_ARCH_OMAP4)
-	{ I2C_RESOURCE_BUILDER(OMAP2_I2C_BASE3, 0) },
-#endif
-#if	defined(CONFIG_ARCH_OMAP4)
-	{ I2C_RESOURCE_BUILDER(OMAP4_I2C_BASE4, 0) },
-#endif
 };
 
 #define I2C_DEV_BUILDER(bus_id, res, data)		\
@@ -77,18 +68,11 @@ static struct resource i2c_resources[][2] = {
 		},					\
 	}
 
-static struct omap_i2c_bus_platform_data i2c_pdata[ARRAY_SIZE(i2c_resources)];
+#define MAX_OMAP_I2C_HWMOD_NAME_LEN	16
+#define OMAP_I2C_MAX_CONTROLLERS 4
+static struct omap_i2c_bus_platform_data i2c_pdata[OMAP_I2C_MAX_CONTROLLERS];
 static struct platform_device omap_i2c_devices[] = {
 	I2C_DEV_BUILDER(1, i2c_resources[0], &i2c_pdata[0]),
-#if	defined(CONFIG_ARCH_OMAP2PLUS)
-	I2C_DEV_BUILDER(2, i2c_resources[1], &i2c_pdata[1]),
-#endif
-#if	defined(CONFIG_ARCH_OMAP3) || defined(CONFIG_ARCH_OMAP4)
-	I2C_DEV_BUILDER(3, i2c_resources[2], &i2c_pdata[2]),
-#endif
-#if	defined(CONFIG_ARCH_OMAP4)
-	I2C_DEV_BUILDER(4, i2c_resources[3], &i2c_pdata[3]),
-#endif
 };
 
 #define OMAP_I2C_CMDLINE_SETUP	(BIT(31))
@@ -109,35 +93,20 @@ static int __init omap_i2c_nr_ports(void)
 	return ports;
 }
 
-/* Shared between omap2 and 3 */
-static resource_size_t omap2_i2c_irq[3] __initdata = {
-	INT_24XX_I2C1_IRQ,
-	INT_24XX_I2C2_IRQ,
-	INT_34XX_I2C3_IRQ,
-};
-
-static resource_size_t omap4_i2c_irq[4] __initdata = {
-	OMAP44XX_IRQ_I2C1,
-	OMAP44XX_IRQ_I2C2,
-	OMAP44XX_IRQ_I2C3,
-	OMAP44XX_IRQ_I2C4,
-};
-
-static inline int omap1_i2c_add_bus(struct platform_device *pdev, int bus_id)
+static inline int omap1_i2c_add_bus(int bus_id)
 {
-	struct omap_i2c_bus_platform_data *pd;
-	struct resource *res;
-
-	pd = pdev->dev.platform_data;
-	res = pdev->resource;
-	res[0].start = OMAP1_I2C_BASE;
-	res[0].end = res[0].start + OMAP_I2C_SIZE;
-	res[1].start = INT_I2C;
+	struct platform_device *pdev;
+	struct omap_i2c_bus_platform_data *pdata;
+
 	omap1_i2c_mux_pins(bus_id);
 
+	pdev = &omap_i2c_devices[bus_id - 1];
+	pdata = &i2c_pdata[bus_id - 1];
+
 	return platform_device_register(pdev);
 }
 
+
 /*
  * XXX This function is a temporary compatibility wrapper - only
  * needed until the I2C driver can be converted to call
@@ -148,52 +117,57 @@ static void omap_pm_set_max_mpu_wakeup_lat_compat(struct device *dev, long t)
 	omap_pm_set_max_mpu_wakeup_lat(dev, t);
 }
 
-static inline int omap2_i2c_add_bus(struct platform_device *pdev, int bus_id)
-{
-	struct resource *res;
-	resource_size_t *irq;
+static struct omap_device_pm_latency omap_i2c_latency[] = {
+	[0] = {
+		.deactivate_func	= omap_device_idle_hwmods,
+		.activate_func		= omap_device_enable_hwmods,
+		.flags			= OMAP_DEVICE_LATENCY_AUTO_ADJUST,
+	},
+};
 
-	res = pdev->resource;
+static inline int omap2_i2c_add_bus(int bus_id)
+{
+	int l;
+	struct omap_hwmod *oh;
+	struct omap_device *od;
+	char oh_name[MAX_OMAP_I2C_HWMOD_NAME_LEN];
+	struct omap_i2c_bus_platform_data *pdata;
 
-	if (!cpu_is_omap44xx())
-		irq = omap2_i2c_irq;
-	else
-		irq = omap4_i2c_irq;
+	omap2_i2c_mux_pins(bus_id);
 
-	if (bus_id == 1) {
-		res[0].start = OMAP2_I2C_BASE1;
-		res[0].end = res[0].start + OMAP_I2C_SIZE;
+	l = snprintf(oh_name, MAX_OMAP_I2C_HWMOD_NAME_LEN, "i2c%d", bus_id);
+	WARN(l >= MAX_OMAP_I2C_HWMOD_NAME_LEN,
+		"String buffer overflow in I2C%d device setup\n", bus_id);
+	oh = omap_hwmod_lookup(oh_name);
+	if (!oh) {
+			pr_err("Could not look up %s\n", oh_name);
+			return -EEXIST;
 	}
 
-	res[1].start = irq[bus_id - 1];
-	omap2_i2c_mux_pins(bus_id);
-
+	pdata = &i2c_pdata[bus_id - 1];
 	/*
 	 * When waiting for completion of a i2c transfer, we need to
 	 * set a wake up latency constraint for the MPU. This is to
 	 * ensure quick enough wakeup from idle, when transfer
 	 * completes.
+	 * Only omap3 has support for constraints
 	 */
-	if (cpu_is_omap34xx()) {
-		struct omap_i2c_bus_platform_data *pd;
-
-		pd = pdev->dev.platform_data;
-		pd->set_mpu_wkup_lat = omap_pm_set_max_mpu_wakeup_lat_compat;
-	}
-
-	return platform_device_register(pdev);
+	if (cpu_is_omap34xx())
+		pdata->set_mpu_wkup_lat = omap_pm_set_max_mpu_wakeup_lat_compat;
+	od = omap_device_build(name, bus_id, oh, pdata,
+			sizeof(struct omap_i2c_bus_platform_data),
+			omap_i2c_latency, ARRAY_SIZE(omap_i2c_latency), 0);
+	WARN(IS_ERR(od), "Could not build omap_device for %s\n", name);
+
+	return PTR_ERR(od);
 }
 
 static int __init omap_i2c_add_bus(int bus_id)
 {
-	struct platform_device *pdev;
-
-	pdev = &omap_i2c_devices[bus_id - 1];
-
 	if (cpu_class_is_omap1())
-		return omap1_i2c_add_bus(pdev, bus_id);
+		return omap1_i2c_add_bus(bus_id);
 	else
-		return omap2_i2c_add_bus(pdev, bus_id);
+		return omap2_i2c_add_bus(bus_id);
 }
 
 /**
diff --git a/include/linux/i2c-omap.h b/include/linux/i2c-omap.h
index 78ebf507ce56..7472449cbb74 100644
--- a/include/linux/i2c-omap.h
+++ b/include/linux/i2c-omap.h
@@ -1,9 +1,14 @@
 #ifndef __I2C_OMAP_H__
 #define __I2C_OMAP_H__
 
+#include <linux/platform_device.h>
+
 struct omap_i2c_bus_platform_data {
 	u32		clkrate;
 	void		(*set_mpu_wkup_lat)(struct device *dev, long set);
+	int		(*device_enable) (struct platform_device *pdev);
+	int		(*device_shutdown) (struct platform_device *pdev);
+	int		(*device_idle) (struct platform_device *pdev);
 };
 
 #endif
-- 
cgit v1.2.3-71-gd317


From 074e61ec3751da9ab88ee66d3818574556c03489 Mon Sep 17 00:00:00 2001
From: James Morris <jmorris@namei.org>
Date: Wed, 10 Nov 2010 09:01:31 +1100
Subject: kernel: add roundup() code comment from akpm

Add roundup() code comment from akpm.

Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: James Morris <jmorris@namei.org>
---
 include/linux/kernel.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index b526947bdf48..3f648d204c37 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -58,6 +58,8 @@ extern const char linux_proc_banner[];
 
 #define FIELD_SIZEOF(t, f) (sizeof(((t*)0)->f))
 #define DIV_ROUND_UP(n,d) (((n) + (d) - 1) / (d))
+
+/* The `const' in roundup() prevents gcc-3.3 from calling __divdi3 */
 #define roundup(x, y) (					\
 {							\
 	const typeof(y) __y = y;			\
-- 
cgit v1.2.3-71-gd317


From f6cd24777513fcc673d432cc29ef59881d3e4df1 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Thu, 4 Nov 2010 11:13:48 +0100
Subject: irq: Better struct irqaction layout

We currently use kmalloc-96 slab for struct irqaction allocations on
64bit arches.

This is unfortunate because of possible false sharing and two cache
lines accesses.

Move 'name' and 'dir' fields at the end of the structure, and force a
suitable alignement.

Hot path fields now use one cache line on x86_64.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Reviewed-by: Andi Kleen <andi@firstfloor.org>
Cc: Peter Zijlstra <peterz@infradead.org>
LKML-Reference: <1288865628.2659.69.camel@edumazet-laptop>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/interrupt.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h
index 79d0c4f6d071..55e0d4253e49 100644
--- a/include/linux/interrupt.h
+++ b/include/linux/interrupt.h
@@ -114,15 +114,15 @@ typedef irqreturn_t (*irq_handler_t)(int, void *);
 struct irqaction {
 	irq_handler_t handler;
 	unsigned long flags;
-	const char *name;
 	void *dev_id;
 	struct irqaction *next;
 	int irq;
-	struct proc_dir_entry *dir;
 	irq_handler_t thread_fn;
 	struct task_struct *thread;
 	unsigned long thread_flags;
-};
+	const char *name;
+	struct proc_dir_entry *dir;
+} ____cacheline_internodealigned_in_smp;
 
 extern irqreturn_t no_action(int cpl, void *dev_id);
 
-- 
cgit v1.2.3-71-gd317


From da1d39e3903bc35be2b5e8d2116fdd5d337244d4 Mon Sep 17 00:00:00 2001
From: Simon Horman <horms@verge.net.au>
Date: Tue, 9 Nov 2010 17:47:02 +0900
Subject: mmc, sh: Move constants to sh_mmcif.h

This moves some constants from sh_mmcif.c to sh_mmcif.h
so that they can be used in sh_mmcif_boot_init().

It also alters the definition of SOFT_RST_OFF from (0 << 31) to
~SOFT_RST_ON (= ~(1 << 31)). The former seems bogus.  The latter is
consistent with the code in sh_mmcif_boot_init().

Cc: Yusuke Goda <yusuke.goda.sx@renesas.com>
Cc: Magnus Damm <magnus.damm@gmail.com>
Signed-off-by: Simon Horman <horms@verge.net.au>
Signed-off-by: Paul Mundt <lethal@linux-sh.org>
---
 drivers/mmc/host/sh_mmcif.c  | 23 -----------------------
 include/linux/mmc/sh_mmcif.h | 39 ++++++++++++++++++++++++++++++++++-----
 2 files changed, 34 insertions(+), 28 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mmc/host/sh_mmcif.c b/drivers/mmc/host/sh_mmcif.c
index ddd09840520b..3f492730ec05 100644
--- a/drivers/mmc/host/sh_mmcif.c
+++ b/drivers/mmc/host/sh_mmcif.c
@@ -62,25 +62,6 @@
 /* CE_BLOCK_SET */
 #define BLOCK_SIZE_MASK		0x0000ffff
 
-/* CE_CLK_CTRL */
-#define CLK_ENABLE		(1 << 24) /* 1: output mmc clock */
-#define CLK_CLEAR		((1 << 19) | (1 << 18) | (1 << 17) | (1 << 16))
-#define CLK_SUP_PCLK		((1 << 19) | (1 << 18) | (1 << 17) | (1 << 16))
-#define SRSPTO_256		((1 << 13) | (0 << 12)) /* resp timeout */
-#define SRBSYTO_29		((1 << 11) | (1 << 10) |	\
-				 (1 << 9) | (1 << 8)) /* resp busy timeout */
-#define SRWDTO_29		((1 << 7) | (1 << 6) |		\
-				 (1 << 5) | (1 << 4)) /* read/write timeout */
-#define SCCSTO_29		((1 << 3) | (1 << 2) |		\
-				 (1 << 1) | (1 << 0)) /* ccs timeout */
-
-/* CE_BUF_ACC */
-#define BUF_ACC_DMAWEN		(1 << 25)
-#define BUF_ACC_DMAREN		(1 << 24)
-#define BUF_ACC_BUSW_32		(0 << 17)
-#define BUF_ACC_BUSW_16		(1 << 17)
-#define BUF_ACC_ATYP		(1 << 16)
-
 /* CE_INT */
 #define INT_CCSDE		(1 << 29)
 #define INT_CMD12DRE		(1 << 26)
@@ -165,10 +146,6 @@
 				 STS2_AC12BSYTO | STS2_RSPBSYTO |	\
 				 STS2_AC12RSPTO | STS2_RSPTO)
 
-/* CE_VERSION */
-#define SOFT_RST_ON		(1 << 31)
-#define SOFT_RST_OFF		(0 << 31)
-
 #define CLKDEV_EMMC_DATA	52000000 /* 52MHz */
 #define CLKDEV_MMC_DATA		20000000 /* 20MHz */
 #define CLKDEV_INIT		400000   /* 400 KHz */
diff --git a/include/linux/mmc/sh_mmcif.h b/include/linux/mmc/sh_mmcif.h
index 5c99da1078aa..a6bfa5296495 100644
--- a/include/linux/mmc/sh_mmcif.h
+++ b/include/linux/mmc/sh_mmcif.h
@@ -59,6 +59,29 @@ struct sh_mmcif_plat_data {
 #define MMCIF_CE_HOST_STS2	0x0000004C
 #define MMCIF_CE_VERSION	0x0000007C
 
+/* CE_BUF_ACC */
+#define BUF_ACC_DMAWEN		(1 << 25)
+#define BUF_ACC_DMAREN		(1 << 24)
+#define BUF_ACC_BUSW_32		(0 << 17)
+#define BUF_ACC_BUSW_16		(1 << 17)
+#define BUF_ACC_ATYP		(1 << 16)
+
+/* CE_CLK_CTRL */
+#define CLK_ENABLE		(1 << 24) /* 1: output mmc clock */
+#define CLK_CLEAR		((1 << 19) | (1 << 18) | (1 << 17) | (1 << 16))
+#define CLK_SUP_PCLK		((1 << 19) | (1 << 18) | (1 << 17) | (1 << 16))
+#define SRSPTO_256		((1 << 13) | (0 << 12)) /* resp timeout */
+#define SRBSYTO_29		((1 << 11) | (1 << 10) |	\
+				 (1 << 9) | (1 << 8)) /* resp busy timeout */
+#define SRWDTO_29		((1 << 7) | (1 << 6) |		\
+				 (1 << 5) | (1 << 4)) /* read/write timeout */
+#define SCCSTO_29		((1 << 3) | (1 << 2) |		\
+				 (1 << 1) | (1 << 0)) /* ccs timeout */
+
+/* CE_VERSION */
+#define SOFT_RST_ON		(1 << 31)
+#define SOFT_RST_OFF		~SOFT_RST_ON
+
 static inline u32 sh_mmcif_readl(void __iomem *addr, int reg)
 {
 	return readl(addr + reg);
@@ -149,17 +172,23 @@ static inline void sh_mmcif_boot_init(void __iomem *base)
 
 	/* reset */
 	tmp = sh_mmcif_readl(base, MMCIF_CE_VERSION);
-	sh_mmcif_writel(base, MMCIF_CE_VERSION, tmp | 0x80000000);
-	sh_mmcif_writel(base, MMCIF_CE_VERSION, tmp & ~0x80000000);
+	sh_mmcif_writel(base, MMCIF_CE_VERSION, tmp | SOFT_RST_ON);
+	sh_mmcif_writel(base, MMCIF_CE_VERSION, tmp & SOFT_RST_OFF);
 
 	/* byte swap */
-	sh_mmcif_writel(base, MMCIF_CE_BUF_ACC, 0x00010000);
+	sh_mmcif_writel(base, MMCIF_CE_BUF_ACC, BUF_ACC_ATYP);
 
 	/* Set block size in MMCIF hardware */
 	sh_mmcif_writel(base, MMCIF_CE_BLOCK_SET, SH_MMCIF_BBS);
 
-	/* Enable the clock, set it to Bus clock/256 (about 325Khz)*/
-	sh_mmcif_writel(base, MMCIF_CE_CLK_CTRL, 0x01072fff);
+	/* Enable the clock, set it to Bus clock/256 (about 325Khz).
+	 * It is unclear where 0x70000 comes from or if it is even needed.
+	 * It is there for byte-compatibility with code that is known to
+	 * work.
+	 */
+	sh_mmcif_writel(base, MMCIF_CE_CLK_CTRL,
+			CLK_ENABLE | SRSPTO_256 | SRBSYTO_29 | SRWDTO_29 |
+			SCCSTO_29 | 0x70000);
 
 	/* CMD0 */
 	sh_mmcif_boot_cmd(base, 0x00000040, 0);
-- 
cgit v1.2.3-71-gd317


From 5e4f083f78d03e9f8d2e327daccde16976f9bb00 Mon Sep 17 00:00:00 2001
From: Yong Zhang <yong.zhang@windriver.com>
Date: Sun, 24 Oct 2010 11:50:53 +0800
Subject: hrtimer: Remove stale comment on curr_timer

curr_timer doesn't resident in struct hrtimer_cpu_base anymore.

Signed-off-by: Yong Zhang <yong.zhang0@gmail.com>
LKML-Reference: <1287892253-2587-1-git-send-email-yong.zhang0@gmail.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/hrtimer.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h
index fd0c1b857d3d..dd9954b79342 100644
--- a/include/linux/hrtimer.h
+++ b/include/linux/hrtimer.h
@@ -158,7 +158,6 @@ struct hrtimer_clock_base {
  * @lock:		lock protecting the base and associated clock bases
  *			and timers
  * @clock_base:		array of clock bases for this cpu
- * @curr_timer:		the timer which is executing a callback right now
  * @expires_next:	absolute time of the next event which was scheduled
  *			via clock_set_next_event()
  * @hres_active:	State of high resolution mode
-- 
cgit v1.2.3-71-gd317


From 0eadcc09203349b11ca477ec367079b23d32ab91 Mon Sep 17 00:00:00 2001
From: Tatyana Brokhman <tlinder@codeaurora.org>
Date: Mon, 1 Nov 2010 18:18:24 +0200
Subject: usb: USB3.0 ch11 definitions

Adding hub SuperSpeed usb definitions as defined by ch10 of the USB3.0
spec.

Signed-off-by: Tatyana Brokhman <tlinder@codeaurora.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/usb/ch11.h | 47 +++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/usb/hcd.h  |  4 ++++
 2 files changed, 51 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/usb/ch11.h b/include/linux/usb/ch11.h
index 119194c85d10..10ec0699bea4 100644
--- a/include/linux/usb/ch11.h
+++ b/include/linux/usb/ch11.h
@@ -27,6 +27,13 @@
 #define HUB_GET_TT_STATE	10
 #define HUB_STOP_TT		11
 
+/*
+ * Hub class additional requests defined by USB 3.0 spec
+ * See USB 3.0 spec Table 10-6
+ */
+#define HUB_SET_DEPTH		12
+#define HUB_GET_PORT_ERR_COUNT	13
+
 /*
  * Hub Class feature numbers
  * See USB 2.0 spec Table 11-17
@@ -55,6 +62,20 @@
 #define USB_PORT_FEAT_INDICATOR         22
 #define USB_PORT_FEAT_C_PORT_L1         23
 
+/*
+ * Port feature selectors added by USB 3.0 spec.
+ * See USB 3.0 spec Table 10-7
+ */
+#define USB_PORT_FEAT_LINK_STATE		5
+#define USB_PORT_FEAT_U1_TIMEOUT		23
+#define USB_PORT_FEAT_U2_TIMEOUT		24
+#define USB_PORT_FEAT_C_LINK_STATE		25
+#define USB_PORT_FEAT_C_CONFIG_ERR		26
+#define USB_PORT_FEAT_REMOTE_WAKE_MASK		27
+#define USB_PORT_FEAT_BH_PORT_RESET		28
+#define USB_PORT_FEAT_C_BH_PORT_RESET		29
+#define USB_PORT_FEAT_FORCE_LINKPM_ACCEPT	30
+
 /*
  * Hub Status and Hub Change results
  * See USB 2.0 spec Table 11-19 and Table 11-20
@@ -83,6 +104,32 @@ struct usb_port_status {
 /* bits 13 to 15 are reserved */
 #define USB_PORT_STAT_SUPER_SPEED	0x8000	/* Linux-internal */
 
+/*
+ * Additions to wPortStatus bit field from USB 3.0
+ * See USB 3.0 spec Table 10-10
+ */
+#define USB_PORT_STAT_LINK_STATE	0x01e0
+#define USB_SS_PORT_STAT_POWER		0x0200
+#define USB_PORT_STAT_SPEED_5GBPS	0x0000
+/* Valid only if port is enabled */
+
+/*
+ * Definitions for PORT_LINK_STATE values
+ * (bits 5-8) in wPortStatus
+ */
+#define USB_SS_PORT_LS_U0		0x0000
+#define USB_SS_PORT_LS_U1		0x0020
+#define USB_SS_PORT_LS_U2		0x0040
+#define USB_SS_PORT_LS_U3		0x0060
+#define USB_SS_PORT_LS_SS_DISABLED	0x0080
+#define USB_SS_PORT_LS_RX_DETECT	0x00a0
+#define USB_SS_PORT_LS_SS_INACTIVE	0x00c0
+#define USB_SS_PORT_LS_POLLING		0x00e0
+#define USB_SS_PORT_LS_RECOVERY		0x0100
+#define USB_SS_PORT_LS_HOT_RESET	0x0120
+#define USB_SS_PORT_LS_COMP_MOD		0x0140
+#define USB_SS_PORT_LS_LOOPBACK		0x0160
+
 /*
  * wPortChange bit field
  * See USB 2.0 spec Table 11-22
diff --git a/include/linux/usb/hcd.h b/include/linux/usb/hcd.h
index 0b6e751ea0b1..dd6ee49a0844 100644
--- a/include/linux/usb/hcd.h
+++ b/include/linux/usb/hcd.h
@@ -471,6 +471,10 @@ extern void usb_ep0_reinit(struct usb_device *);
 
 /*-------------------------------------------------------------------------*/
 
+/* class requests from USB 3.0 hub spec, table 10-5 */
+#define SetHubDepth		(0x3000 | HUB_SET_DEPTH)
+#define GetPortErrorCount	(0x8000 | HUB_GET_PORT_ERR_COUNT)
+
 /*
  * Generic bandwidth allocation constants/support
  */
-- 
cgit v1.2.3-71-gd317


From af3b8881f4c9852eefe9c7f1a997b3ecf580561b Mon Sep 17 00:00:00 2001
From: Russ Gorby <richardx.r.gorby@intel.com>
Date: Tue, 26 Oct 2010 14:13:52 +0100
Subject: ifx6x60: SPI protocol driver for Infineon 6x60 modem

Prototype driver for the IFX6x60 series of SPI attached modems by Jim
Stanley and Russ Gorby

Signed-off-by: Russ Gorby <richardx.r.gorby@intel.com>

[Some reworking and a major cleanup]

Signed-off-by: Alan Cox <alan@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/serial/Kconfig        |    6 +
 drivers/serial/Makefile       |    1 +
 drivers/serial/ifx6x60.c      | 1402 +++++++++++++++++++++++++++++++++++++++++
 drivers/serial/ifx6x60.h      |  129 ++++
 include/linux/spi/ifx_modem.h |   14 +
 5 files changed, 1552 insertions(+)
 create mode 100644 drivers/serial/ifx6x60.c
 create mode 100644 drivers/serial/ifx6x60.h
 create mode 100644 include/linux/spi/ifx_modem.h

(limited to 'include/linux')

diff --git a/drivers/serial/Kconfig b/drivers/serial/Kconfig
index aff9dcd051c6..0b9cc17b380b 100644
--- a/drivers/serial/Kconfig
+++ b/drivers/serial/Kconfig
@@ -1632,4 +1632,10 @@ config SERIAL_ALTERA_UART_CONSOLE
 	help
 	  Enable a Altera UART port to be the system console.
 
+config SERIAL_IFX6X60
+        tristate "SPI protocol driver for Infineon 6x60 modem (EXPERIMENTAL)"
+	depends on GPIOLIB && EXPERIMENTAL
+	help
+	  Support for the IFX6x60 modem devices on Intel MID platforms.
+
 endmenu
diff --git a/drivers/serial/Makefile b/drivers/serial/Makefile
index c5705765454f..783638b10698 100644
--- a/drivers/serial/Makefile
+++ b/drivers/serial/Makefile
@@ -89,3 +89,4 @@ obj-$(CONFIG_SERIAL_ALTERA_UART) += altera_uart.o
 obj-$(CONFIG_SERIAL_MRST_MAX3110)	+= mrst_max3110.o
 obj-$(CONFIG_SERIAL_MFD_HSU)	+= mfd.o
 obj-$(CONFIG_SERIAL_OMAP) += omap-serial.o
+obj-$(CONFIG_SERIAL_IFX6X60)  	+= ifx6x60.o
diff --git a/drivers/serial/ifx6x60.c b/drivers/serial/ifx6x60.c
new file mode 100644
index 000000000000..b9b7e0601961
--- /dev/null
+++ b/drivers/serial/ifx6x60.c
@@ -0,0 +1,1402 @@
+/****************************************************************************
+ *
+ * Driver for the IFX 6x60 spi modem.
+ *
+ * Copyright (C) 2008 Option International
+ * Copyright (C) 2008 Filip Aben <f.aben@option.com>
+ *		      Denis Joseph Barrow <d.barow@option.com>
+ *		      Jan Dumon <j.dumon@option.com>
+ *
+ * Copyright (C) 2009, 2010 Intel Corp
+ * Russ Gorby <richardx.r.gorby@intel.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.	 See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301,
+ * USA
+ *
+ * Driver modified by Intel from Option gtm501l_spi.c
+ *
+ * Notes
+ * o	The driver currently assumes a single device only. If you need to
+ *	change this then look for saved_ifx_dev and add a device lookup
+ * o	The driver is intended to be big-endian safe but has never been
+ *	tested that way (no suitable hardware). There are a couple of FIXME
+ *	notes by areas that may need addressing
+ * o	Some of the GPIO naming/setup assumptions may need revisiting if
+ *	you need to use this driver for another platform.
+ *
+ *****************************************************************************/
+#include <linux/module.h>
+#include <linux/termios.h>
+#include <linux/tty.h>
+#include <linux/device.h>
+#include <linux/spi/spi.h>
+#include <linux/tty.h>
+#include <linux/kfifo.h>
+#include <linux/tty_flip.h>
+#include <linux/timer.h>
+#include <linux/serial.h>
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/rfkill.h>
+#include <linux/fs.h>
+#include <linux/ip.h>
+#include <linux/dmapool.h>
+#include <linux/gpio.h>
+#include <linux/sched.h>
+#include <linux/time.h>
+#include <linux/wait.h>
+#include <linux/tty.h>
+#include <linux/pm.h>
+#include <linux/pm_runtime.h>
+#include <linux/spi/ifx_modem.h>
+
+#include "ifx6x60.h"
+
+#define IFX_SPI_MORE_MASK		0x10
+#define IFX_SPI_MORE_BIT		12	/* bit position in u16 */
+#define IFX_SPI_CTS_BIT			13	/* bit position in u16 */
+#define IFX_SPI_TTY_ID			0
+#define IFX_SPI_TIMEOUT_SEC		2
+#define IFX_SPI_HEADER_0		(-1)
+#define IFX_SPI_HEADER_F		(-2)
+
+/* forward reference */
+static void ifx_spi_handle_srdy(struct ifx_spi_device *ifx_dev);
+
+/* local variables */
+static int spi_b16 = 1;			/* 8 or 16 bit word length */
+static struct tty_driver *tty_drv;
+static struct ifx_spi_device *saved_ifx_dev;
+static struct lock_class_key ifx_spi_key;
+
+/* GPIO/GPE settings */
+
+/**
+ *	mrdy_set_high		-	set MRDY GPIO
+ *	@ifx: device we are controlling
+ *
+ */
+static inline void mrdy_set_high(struct ifx_spi_device *ifx)
+{
+	gpio_set_value(ifx->gpio.mrdy, 1);
+}
+
+/**
+ *	mrdy_set_low		-	clear MRDY GPIO
+ *	@ifx: device we are controlling
+ *
+ */
+static inline void mrdy_set_low(struct ifx_spi_device *ifx)
+{
+	gpio_set_value(ifx->gpio.mrdy, 0);
+}
+
+/**
+ *	ifx_spi_power_state_set
+ *	@ifx_dev: our SPI device
+ *	@val: bits to set
+ *
+ *	Set bit in power status and signal power system if status becomes non-0
+ */
+static void
+ifx_spi_power_state_set(struct ifx_spi_device *ifx_dev, unsigned char val)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&ifx_dev->power_lock, flags);
+
+	/*
+	 * if power status is already non-0, just update, else
+	 * tell power system
+	 */
+	if (!ifx_dev->power_status)
+		pm_runtime_get(&ifx_dev->spi_dev->dev);
+	ifx_dev->power_status |= val;
+
+	spin_unlock_irqrestore(&ifx_dev->power_lock, flags);
+}
+
+/**
+ *	ifx_spi_power_state_clear	-	clear power bit
+ *	@ifx_dev: our SPI device
+ *	@val: bits to clear
+ *
+ *	clear bit in power status and signal power system if status becomes 0
+ */
+static void
+ifx_spi_power_state_clear(struct ifx_spi_device *ifx_dev, unsigned char val)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&ifx_dev->power_lock, flags);
+
+	if (ifx_dev->power_status) {
+		ifx_dev->power_status &= ~val;
+		if (!ifx_dev->power_status)
+			pm_runtime_put(&ifx_dev->spi_dev->dev);
+	}
+
+	spin_unlock_irqrestore(&ifx_dev->power_lock, flags);
+}
+
+/**
+ *	swap_buf
+ *	@buf: our buffer
+ *	@len : number of bytes (not words) in the buffer
+ *	@end: end of buffer
+ *
+ *	Swap the contents of a buffer into big endian format
+ */
+static inline void swap_buf(u16 *buf, int len, void *end)
+{
+	int n;
+
+	len = ((len + 1) >> 1);
+	if ((void *)&buf[len] > end) {
+		pr_err("swap_buf: swap exceeds boundary (%p > %p)!",
+		       &buf[len], end);
+		return;
+	}
+	for (n = 0; n < len; n++) {
+		*buf = cpu_to_be16(*buf);
+		buf++;
+	}
+}
+
+/**
+ *	mrdy_assert		-	assert MRDY line
+ *	@ifx_dev: our SPI device
+ *
+ *	Assert mrdy and set timer to wait for SRDY interrupt, if SRDY is low
+ *	now.
+ *
+ *	FIXME: Can SRDY even go high as we are running this code ?
+ */
+static void mrdy_assert(struct ifx_spi_device *ifx_dev)
+{
+	int val = gpio_get_value(ifx_dev->gpio.srdy);
+	if (!val) {
+		if (!test_and_set_bit(IFX_SPI_STATE_TIMER_PENDING,
+				      &ifx_dev->flags)) {
+			ifx_dev->spi_timer.expires =
+				jiffies + IFX_SPI_TIMEOUT_SEC*HZ;
+			add_timer(&ifx_dev->spi_timer);
+
+		}
+	}
+	ifx_spi_power_state_set(ifx_dev, IFX_SPI_POWER_DATA_PENDING);
+	mrdy_set_high(ifx_dev);
+}
+
+/**
+ *	ifx_spi_hangup		-	hang up an IFX device
+ *	@ifx_dev: our SPI device
+ *
+ *	Hang up the tty attached to the IFX device if one is currently
+ *	open. If not take no action
+ */
+static void ifx_spi_ttyhangup(struct ifx_spi_device *ifx_dev)
+{
+	struct tty_port *pport = &ifx_dev->tty_port;
+	struct tty_struct *tty = tty_port_tty_get(pport);
+	if (tty) {
+		tty_hangup(tty);
+		tty_kref_put(tty);
+	}
+}
+
+/**
+ *	ifx_spi_timeout		-	SPI timeout
+ *	@arg: our SPI device
+ *
+ *	The SPI has timed out: hang up the tty. Users will then see a hangup
+ *	and error events.
+ */
+static void ifx_spi_timeout(unsigned long arg)
+{
+	struct ifx_spi_device *ifx_dev = (struct ifx_spi_device *)arg;
+
+	dev_warn(&ifx_dev->spi_dev->dev, "*** SPI Timeout ***");
+	ifx_spi_ttyhangup(ifx_dev);
+	mrdy_set_low(ifx_dev);
+	clear_bit(IFX_SPI_STATE_TIMER_PENDING, &ifx_dev->flags);
+}
+
+/* char/tty operations */
+
+/**
+ *	ifx_spi_tiocmget	-	get modem lines
+ *	@tty: our tty device
+ *	@filp: file handle issuing the request
+ *
+ *	Map the signal state into Linux modem flags and report the value
+ *	in Linux terms
+ */
+static int ifx_spi_tiocmget(struct tty_struct *tty, struct file *filp)
+{
+	unsigned int value;
+	struct ifx_spi_device *ifx_dev = tty->driver_data;
+
+	value =
+	(test_bit(IFX_SPI_RTS, &ifx_dev->signal_state) ? TIOCM_RTS : 0) |
+	(test_bit(IFX_SPI_DTR, &ifx_dev->signal_state) ? TIOCM_DTR : 0) |
+	(test_bit(IFX_SPI_CTS, &ifx_dev->signal_state) ? TIOCM_CTS : 0) |
+	(test_bit(IFX_SPI_DSR, &ifx_dev->signal_state) ? TIOCM_DSR : 0) |
+	(test_bit(IFX_SPI_DCD, &ifx_dev->signal_state) ? TIOCM_CAR : 0) |
+	(test_bit(IFX_SPI_RI, &ifx_dev->signal_state) ? TIOCM_RNG : 0);
+	return value;
+}
+
+/**
+ *	ifx_spi_tiocmset	-	set modem bits
+ *	@tty: the tty structure
+ *	@filp: file handle issuing the request
+ *	@set: bits to set
+ *	@clear: bits to clear
+ *
+ *	The IFX6x60 only supports DTR and RTS. Set them accordingly
+ *	and flag that an update to the modem is needed.
+ *
+ *	FIXME: do we need to kick the tranfers when we do this ?
+ */
+static int ifx_spi_tiocmset(struct tty_struct *tty, struct file *filp,
+			    unsigned int set, unsigned int clear)
+{
+	struct ifx_spi_device *ifx_dev = tty->driver_data;
+
+	if (set & TIOCM_RTS)
+		set_bit(IFX_SPI_RTS, &ifx_dev->signal_state);
+	if (set & TIOCM_DTR)
+		set_bit(IFX_SPI_DTR, &ifx_dev->signal_state);
+	if (clear & TIOCM_RTS)
+		clear_bit(IFX_SPI_RTS, &ifx_dev->signal_state);
+	if (clear & TIOCM_DTR)
+		clear_bit(IFX_SPI_DTR, &ifx_dev->signal_state);
+
+	set_bit(IFX_SPI_UPDATE, &ifx_dev->signal_state);
+	return 0;
+}
+
+/**
+ *	ifx_spi_open	-	called on tty open
+ *	@tty: our tty device
+ *	@filp: file handle being associated with the tty
+ *
+ *	Open the tty interface. We let the tty_port layer do all the work
+ *	for us.
+ *
+ *	FIXME: Remove single device assumption and saved_ifx_dev
+ */
+static int ifx_spi_open(struct tty_struct *tty, struct file *filp)
+{
+	return tty_port_open(&saved_ifx_dev->tty_port, tty, filp);
+}
+
+/**
+ *	ifx_spi_close	-	called when our tty closes
+ *	@tty: the tty being closed
+ *	@filp: the file handle being closed
+ *
+ *	Perform the close of the tty. We use the tty_port layer to do all
+ *	our hard work.
+ */
+static void ifx_spi_close(struct tty_struct *tty, struct file *filp)
+{
+	struct ifx_spi_device *ifx_dev = tty->driver_data;
+	tty_port_close(&ifx_dev->tty_port, tty, filp);
+	/* FIXME: should we do an ifx_spi_reset here ? */
+}
+
+/**
+ *	ifx_decode_spi_header	-	decode received header
+ *	@buffer: the received data
+ *	@length: decoded length
+ *	@more: decoded more flag
+ *	@received_cts: status of cts we received
+ *
+ *	Note how received_cts is handled -- if header is all F it is left
+ *	the same as it was, if header is all 0 it is set to 0 otherwise it is
+ *	taken from the incoming header.
+ *
+ *	FIXME: endianness
+ */
+static int ifx_spi_decode_spi_header(unsigned char *buffer, int *length,
+			unsigned char *more, unsigned char *received_cts)
+{
+	u16 h1;
+	u16 h2;
+	u16 *in_buffer = (u16 *)buffer;
+
+	h1 = *in_buffer;
+	h2 = *(in_buffer+1);
+
+	if (h1 == 0 && h2 == 0) {
+		*received_cts = 0;
+		return IFX_SPI_HEADER_0;
+	} else if (h1 == 0xffff && h2 == 0xffff) {
+		/* spi_slave_cts remains as it was */
+		return IFX_SPI_HEADER_F;
+	}
+
+	*length = h1 & 0xfff;	/* upper bits of byte are flags */
+	*more = (buffer[1] >> IFX_SPI_MORE_BIT) & 1;
+	*received_cts = (buffer[3] >> IFX_SPI_CTS_BIT) & 1;
+	return 0;
+}
+
+/**
+ *	ifx_setup_spi_header	-	set header fields
+ *	@txbuffer: pointer to start of SPI buffer
+ *	@tx_count: bytes
+ *	@more: indicate if more to follow
+ *
+ *	Format up an SPI header for a transfer
+ *
+ *	FIXME: endianness?
+ */
+static void ifx_spi_setup_spi_header(unsigned char *txbuffer, int tx_count,
+					unsigned char more)
+{
+	*(u16 *)(txbuffer) = tx_count;
+	*(u16 *)(txbuffer+2) = IFX_SPI_PAYLOAD_SIZE;
+	txbuffer[1] |= (more << IFX_SPI_MORE_BIT) & IFX_SPI_MORE_MASK;
+}
+
+/**
+ *	ifx_spi_wakeup_serial	-	SPI space made
+ *	@port_data: our SPI device
+ *
+ *	We have emptied the FIFO enough that we want to get more data
+ *	queued into it. Poke the line discipline via tty_wakeup so that
+ *	it will feed us more bits
+ */
+static void ifx_spi_wakeup_serial(struct ifx_spi_device *ifx_dev)
+{
+	struct tty_struct *tty;
+
+	tty = tty_port_tty_get(&ifx_dev->tty_port);
+	if (!tty)
+		return;
+	tty_wakeup(tty);
+	tty_kref_put(tty);
+}
+
+/**
+ *	ifx_spi_prepare_tx_buffer	-	prepare transmit frame
+ *	@ifx_dev: our SPI device
+ *
+ *	The transmit buffr needs a header and various other bits of
+ *	information followed by as much data as we can pull from the FIFO
+ *	and transfer. This function formats up a suitable buffer in the
+ *	ifx_dev->tx_buffer
+ *
+ *	FIXME: performance - should we wake the tty when the queue is half
+ *			     empty ?
+ */
+static int ifx_spi_prepare_tx_buffer(struct ifx_spi_device *ifx_dev)
+{
+	int temp_count;
+	int queue_length;
+	int tx_count;
+	unsigned char *tx_buffer;
+
+	tx_buffer = ifx_dev->tx_buffer;
+	memset(tx_buffer, 0, IFX_SPI_TRANSFER_SIZE);
+
+	/* make room for required SPI header */
+	tx_buffer += IFX_SPI_HEADER_OVERHEAD;
+	tx_count = IFX_SPI_HEADER_OVERHEAD;
+
+	/* clear to signal no more data if this turns out to be the
+	 * last buffer sent in a sequence */
+	ifx_dev->spi_more = 0;
+
+	/* if modem cts is set, just send empty buffer */
+	if (!ifx_dev->spi_slave_cts) {
+		/* see if there's tx data */
+		queue_length = kfifo_len(&ifx_dev->tx_fifo);
+		if (queue_length != 0) {
+			/* data to mux -- see if there's room for it */
+			temp_count = min(queue_length, IFX_SPI_PAYLOAD_SIZE);
+			temp_count = kfifo_out_locked(&ifx_dev->tx_fifo,
+					tx_buffer, temp_count,
+					&ifx_dev->fifo_lock);
+
+			/* update buffer pointer and data count in message */
+			tx_buffer += temp_count;
+			tx_count += temp_count;
+			if (temp_count == queue_length)
+				/* poke port to get more data */
+				ifx_spi_wakeup_serial(ifx_dev);
+			else /* more data in port, use next SPI message */
+				ifx_dev->spi_more = 1;
+		}
+	}
+	/* have data and info for header -- set up SPI header in buffer */
+	/* spi header needs payload size, not entire buffer size */
+	ifx_spi_setup_spi_header(ifx_dev->tx_buffer,
+					tx_count-IFX_SPI_HEADER_OVERHEAD,
+					ifx_dev->spi_more);
+	/* swap actual data in the buffer */
+	swap_buf((u16 *)(ifx_dev->tx_buffer), tx_count,
+		&ifx_dev->tx_buffer[IFX_SPI_TRANSFER_SIZE]);
+	return tx_count;
+}
+
+/**
+ *	ifx_spi_write		-	line discipline write
+ *	@tty: our tty device
+ *	@buf: pointer to buffer to write (kernel space)
+ *	@count: size of buffer
+ *
+ *	Write the characters we have been given into the FIFO. If the device
+ *	is not active then activate it, when the SRDY line is asserted back
+ *	this will commence I/O
+ */
+static int ifx_spi_write(struct tty_struct *tty, const unsigned char *buf,
+			 int count)
+{
+	struct ifx_spi_device *ifx_dev = tty->driver_data;
+	unsigned char *tmp_buf = (unsigned char *)buf;
+	int tx_count = kfifo_in_locked(&ifx_dev->tx_fifo, tmp_buf, count,
+				   &ifx_dev->fifo_lock);
+	mrdy_assert(ifx_dev);
+	return tx_count;
+}
+
+/**
+ *	ifx_spi_chars_in_buffer	-	line discipline helper
+ *	@tty: our tty device
+ *
+ *	Report how much data we can accept before we drop bytes. As we use
+ *	a simple FIFO this is nice and easy.
+ */
+static int ifx_spi_write_room(struct tty_struct *tty)
+{
+	struct ifx_spi_device *ifx_dev = tty->driver_data;
+	return IFX_SPI_FIFO_SIZE - kfifo_len(&ifx_dev->tx_fifo);
+}
+
+/**
+ *	ifx_spi_chars_in_buffer	-	line discipline helper
+ *	@tty: our tty device
+ *
+ *	Report how many characters we have buffered. In our case this is the
+ *	number of bytes sitting in our transmit FIFO.
+ */
+static int ifx_spi_chars_in_buffer(struct tty_struct *tty)
+{
+	struct ifx_spi_device *ifx_dev = tty->driver_data;
+	return kfifo_len(&ifx_dev->tx_fifo);
+}
+
+/**
+ *	ifx_port_hangup
+ *	@port: our tty port
+ *
+ *	tty port hang up. Called when tty_hangup processing is invoked either
+ *	by loss of carrier, or by software (eg vhangup). Serialized against
+ *	activate/shutdown by the tty layer.
+ */
+static void ifx_spi_hangup(struct tty_struct *tty)
+{
+	struct ifx_spi_device *ifx_dev = tty->driver_data;
+	tty_port_hangup(&ifx_dev->tty_port);
+}
+
+/**
+ *	ifx_port_activate
+ *	@port: our tty port
+ *
+ *	tty port activate method - called for first open. Serialized
+ *	with hangup and shutdown by the tty layer.
+ */
+static int ifx_port_activate(struct tty_port *port, struct tty_struct *tty)
+{
+	struct ifx_spi_device *ifx_dev =
+		container_of(port, struct ifx_spi_device, tty_port);
+
+	/* clear any old data; can't do this in 'close' */
+	kfifo_reset(&ifx_dev->tx_fifo);
+
+	/* put port data into this tty */
+	tty->driver_data = ifx_dev;
+
+	/* allows flip string push from int context */
+	tty->low_latency = 1;
+
+	return 0;
+}
+
+/**
+ *	ifx_port_shutdown
+ *	@port: our tty port
+ *
+ *	tty port shutdown method - called for last port close. Serialized
+ *	with hangup and activate by the tty layer.
+ */
+static void ifx_port_shutdown(struct tty_port *port)
+{
+	struct ifx_spi_device *ifx_dev =
+		container_of(port, struct ifx_spi_device, tty_port);
+
+	mrdy_set_low(ifx_dev);
+	clear_bit(IFX_SPI_STATE_TIMER_PENDING, &ifx_dev->flags);
+	tasklet_kill(&ifx_dev->io_work_tasklet);
+}
+
+static const struct tty_port_operations ifx_tty_port_ops = {
+	.activate = ifx_port_activate,
+	.shutdown = ifx_port_shutdown,
+};
+
+static const struct tty_operations ifx_spi_serial_ops = {
+	.open = ifx_spi_open,
+	.close = ifx_spi_close,
+	.write = ifx_spi_write,
+	.hangup = ifx_spi_hangup,
+	.write_room = ifx_spi_write_room,
+	.chars_in_buffer = ifx_spi_chars_in_buffer,
+	.tiocmget = ifx_spi_tiocmget,
+	.tiocmset = ifx_spi_tiocmset,
+};
+
+/**
+ *	ifx_spi_insert_fip_string	-	queue received data
+ *	@ifx_ser: our SPI device
+ *	@chars: buffer we have received
+ *	@size: number of chars reeived
+ *
+ *	Queue bytes to the tty assuming the tty side is currently open. If
+ *	not the discard the data.
+ */
+static void ifx_spi_insert_flip_string(struct ifx_spi_device *ifx_dev,
+				    unsigned char *chars, size_t size)
+{
+	struct tty_struct *tty = tty_port_tty_get(&ifx_dev->tty_port);
+	if (!tty)
+		return;
+	tty_insert_flip_string(tty, chars, size);
+	tty_flip_buffer_push(tty);
+	tty_kref_put(tty);
+}
+
+/**
+ *	ifx_spi_complete	-	SPI transfer completed
+ *	@ctx: our SPI device
+ *
+ *	An SPI transfer has completed. Process any received data and kick off
+ *	any further transmits we can commence.
+ */
+static void ifx_spi_complete(void *ctx)
+{
+	struct ifx_spi_device *ifx_dev = ctx;
+	struct tty_struct *tty;
+	struct tty_ldisc *ldisc = NULL;
+	int length;
+	int actual_length;
+	unsigned char more;
+	unsigned char cts;
+	int local_write_pending = 0;
+	int queue_length;
+	int srdy;
+	int decode_result;
+
+	mrdy_set_low(ifx_dev);
+
+	if (!ifx_dev->spi_msg.status) {
+		/* check header validity, get comm flags */
+		swap_buf((u16 *)ifx_dev->rx_buffer, IFX_SPI_HEADER_OVERHEAD,
+			&ifx_dev->rx_buffer[IFX_SPI_HEADER_OVERHEAD]);
+		decode_result = ifx_spi_decode_spi_header(ifx_dev->rx_buffer,
+				&length, &more, &cts);
+		if (decode_result == IFX_SPI_HEADER_0) {
+			dev_dbg(&ifx_dev->spi_dev->dev,
+				"ignore input: invalid header 0");
+			ifx_dev->spi_slave_cts = 0;
+			goto complete_exit;
+		} else if (decode_result == IFX_SPI_HEADER_F) {
+			dev_dbg(&ifx_dev->spi_dev->dev,
+				"ignore input: invalid header F");
+			goto complete_exit;
+		}
+
+		ifx_dev->spi_slave_cts = cts;
+
+		actual_length = min((unsigned int)length,
+					ifx_dev->spi_msg.actual_length);
+		swap_buf((u16 *)(ifx_dev->rx_buffer + IFX_SPI_HEADER_OVERHEAD),
+			 actual_length,
+			 &ifx_dev->rx_buffer[IFX_SPI_TRANSFER_SIZE]);
+		ifx_spi_insert_flip_string(
+			ifx_dev,
+			ifx_dev->rx_buffer + IFX_SPI_HEADER_OVERHEAD,
+			(size_t)actual_length);
+	} else {
+		dev_dbg(&ifx_dev->spi_dev->dev, "SPI transfer error %d",
+		       ifx_dev->spi_msg.status);
+	}
+
+complete_exit:
+	if (ifx_dev->write_pending) {
+		ifx_dev->write_pending = 0;
+		local_write_pending = 1;
+	}
+
+	clear_bit(IFX_SPI_STATE_IO_IN_PROGRESS, &(ifx_dev->flags));
+
+	queue_length = kfifo_len(&ifx_dev->tx_fifo);
+	srdy = gpio_get_value(ifx_dev->gpio.srdy);
+	if (!srdy)
+		ifx_spi_power_state_clear(ifx_dev, IFX_SPI_POWER_SRDY);
+
+	/* schedule output if there is more to do */
+	if (test_and_clear_bit(IFX_SPI_STATE_IO_READY, &ifx_dev->flags))
+		tasklet_schedule(&ifx_dev->io_work_tasklet);
+	else {
+		if (more || ifx_dev->spi_more || queue_length > 0 ||
+			local_write_pending) {
+			if (ifx_dev->spi_slave_cts) {
+				if (more)
+					mrdy_assert(ifx_dev);
+			} else
+				mrdy_assert(ifx_dev);
+		} else {
+			/*
+			 * poke line discipline driver if any for more data
+			 * may or may not get more data to write
+			 * for now, say not busy
+			 */
+			ifx_spi_power_state_clear(ifx_dev,
+						  IFX_SPI_POWER_DATA_PENDING);
+			tty = tty_port_tty_get(&ifx_dev->tty_port);
+			if (tty) {
+				ldisc = tty_ldisc_ref(tty);
+				if (ldisc) {
+					ldisc->ops->write_wakeup(tty);
+					tty_ldisc_deref(ldisc);
+				}
+				tty_kref_put(tty);
+			}
+		}
+	}
+}
+
+/**
+ *	ifx_spio_io		-	I/O tasklet
+ *	@data: our SPI device
+ *
+ *	Queue data for transmission if possible and then kick off the
+ *	transfer.
+ */
+static void ifx_spi_io(unsigned long data)
+{
+	int retval;
+	struct ifx_spi_device *ifx_dev = (struct ifx_spi_device *) data;
+
+	if (!test_and_set_bit(IFX_SPI_STATE_IO_IN_PROGRESS, &ifx_dev->flags)) {
+		if (ifx_dev->gpio.unack_srdy_int_nb > 0)
+			ifx_dev->gpio.unack_srdy_int_nb--;
+
+		ifx_spi_prepare_tx_buffer(ifx_dev);
+
+		spi_message_init(&ifx_dev->spi_msg);
+		INIT_LIST_HEAD(&ifx_dev->spi_msg.queue);
+
+		ifx_dev->spi_msg.context = ifx_dev;
+		ifx_dev->spi_msg.complete = ifx_spi_complete;
+
+		/* set up our spi transfer */
+		/* note len is BYTES, not transfers */
+		ifx_dev->spi_xfer.len = IFX_SPI_TRANSFER_SIZE;
+		ifx_dev->spi_xfer.cs_change = 0;
+		ifx_dev->spi_xfer.speed_hz = 12500000;
+		/* ifx_dev->spi_xfer.speed_hz = 390625; */
+		ifx_dev->spi_xfer.bits_per_word = spi_b16 ? 16 : 8;
+
+		ifx_dev->spi_xfer.tx_buf = ifx_dev->tx_buffer;
+		ifx_dev->spi_xfer.rx_buf = ifx_dev->rx_buffer;
+
+		/*
+		 * setup dma pointers
+		 */
+		if (ifx_dev->is_6160) {
+			ifx_dev->spi_msg.is_dma_mapped = 1;
+			ifx_dev->tx_dma = ifx_dev->tx_bus;
+			ifx_dev->rx_dma = ifx_dev->rx_bus;
+			ifx_dev->spi_xfer.tx_dma = ifx_dev->tx_dma;
+			ifx_dev->spi_xfer.rx_dma = ifx_dev->rx_dma;
+		} else {
+			ifx_dev->spi_msg.is_dma_mapped = 0;
+			ifx_dev->tx_dma = (dma_addr_t)0;
+			ifx_dev->rx_dma = (dma_addr_t)0;
+			ifx_dev->spi_xfer.tx_dma = (dma_addr_t)0;
+			ifx_dev->spi_xfer.rx_dma = (dma_addr_t)0;
+		}
+
+		spi_message_add_tail(&ifx_dev->spi_xfer, &ifx_dev->spi_msg);
+
+		/* Assert MRDY. This may have already been done by the write
+		 * routine.
+		 */
+		mrdy_assert(ifx_dev);
+
+		retval = spi_async(ifx_dev->spi_dev, &ifx_dev->spi_msg);
+		if (retval) {
+			clear_bit(IFX_SPI_STATE_IO_IN_PROGRESS,
+				  &ifx_dev->flags);
+			tasklet_schedule(&ifx_dev->io_work_tasklet);
+			return;
+		}
+	} else
+		ifx_dev->write_pending = 1;
+}
+
+/**
+ *	ifx_spi_free_port	-	free up the tty side
+ *	@ifx_dev: IFX device going away
+ *
+ *	Unregister and free up a port when the device goes away
+ */
+static void ifx_spi_free_port(struct ifx_spi_device *ifx_dev)
+{
+	if (ifx_dev->tty_dev)
+		tty_unregister_device(tty_drv, ifx_dev->minor);
+	kfifo_free(&ifx_dev->tx_fifo);
+}
+
+/**
+ *	ifx_spi_create_port	-	create a new port
+ *	@ifx_dev: our spi device
+ *
+ *	Allocate and initialise the tty port that goes with this interface
+ *	and add it to the tty layer so that it can be opened.
+ */
+static int ifx_spi_create_port(struct ifx_spi_device *ifx_dev)
+{
+	int ret = 0;
+	struct tty_port *pport = &ifx_dev->tty_port;
+
+	spin_lock_init(&ifx_dev->fifo_lock);
+	lockdep_set_class_and_subclass(&ifx_dev->fifo_lock,
+		&ifx_spi_key, 0);
+
+	if (kfifo_alloc(&ifx_dev->tx_fifo, IFX_SPI_FIFO_SIZE, GFP_KERNEL)) {
+		ret = -ENOMEM;
+		goto error_ret;
+	}
+
+	pport->ops = &ifx_tty_port_ops;
+	tty_port_init(pport);
+	ifx_dev->minor = IFX_SPI_TTY_ID;
+	ifx_dev->tty_dev = tty_register_device(tty_drv, ifx_dev->minor,
+					       &ifx_dev->spi_dev->dev);
+	if (IS_ERR(ifx_dev->tty_dev)) {
+		dev_dbg(&ifx_dev->spi_dev->dev,
+			"%s: registering tty device failed", __func__);
+		ret = PTR_ERR(ifx_dev->tty_dev);
+		goto error_ret;
+	}
+	return 0;
+
+error_ret:
+	ifx_spi_free_port(ifx_dev);
+	return ret;
+}
+
+/**
+ *	ifx_spi_handle_srdy		-	handle SRDY
+ *	@ifx_dev: device asserting SRDY
+ *
+ *	Check our device state and see what we need to kick off when SRDY
+ *	is asserted. This usually means killing the timer and firing off the
+ *	I/O processing.
+ */
+static void ifx_spi_handle_srdy(struct ifx_spi_device *ifx_dev)
+{
+	if (test_bit(IFX_SPI_STATE_TIMER_PENDING, &ifx_dev->flags)) {
+		del_timer_sync(&ifx_dev->spi_timer);
+		clear_bit(IFX_SPI_STATE_TIMER_PENDING, &ifx_dev->flags);
+	}
+
+	ifx_spi_power_state_set(ifx_dev, IFX_SPI_POWER_SRDY);
+
+	if (!test_bit(IFX_SPI_STATE_IO_IN_PROGRESS, &ifx_dev->flags))
+		tasklet_schedule(&ifx_dev->io_work_tasklet);
+	else
+		set_bit(IFX_SPI_STATE_IO_READY, &ifx_dev->flags);
+}
+
+/**
+ *	ifx_spi_srdy_interrupt	-	SRDY asserted
+ *	@irq: our IRQ number
+ *	@dev: our ifx device
+ *
+ *	The modem asserted SRDY. Handle the srdy event
+ */
+static irqreturn_t ifx_spi_srdy_interrupt(int irq, void *dev)
+{
+	struct ifx_spi_device *ifx_dev = dev;
+	ifx_dev->gpio.unack_srdy_int_nb++;
+	ifx_spi_handle_srdy(ifx_dev);
+	return IRQ_HANDLED;
+}
+
+/**
+ *	ifx_spi_reset_interrupt	-	Modem has changed reset state
+ *	@irq: interrupt number
+ *	@dev: our device pointer
+ *
+ *	The modem has either entered or left reset state. Check the GPIO
+ *	line to see which.
+ *
+ *	FIXME: review locking on MR_INPROGRESS versus
+ *	parallel unsolicited reset/solicited reset
+ */
+static irqreturn_t ifx_spi_reset_interrupt(int irq, void *dev)
+{
+	struct ifx_spi_device *ifx_dev = dev;
+	int val = gpio_get_value(ifx_dev->gpio.reset_out);
+	int solreset = test_bit(MR_START, &ifx_dev->mdm_reset_state);
+
+	if (val == 0) {
+		/* entered reset */
+		set_bit(MR_INPROGRESS, &ifx_dev->mdm_reset_state);
+		if (!solreset) {
+			/* unsolicited reset  */
+			ifx_spi_ttyhangup(ifx_dev);
+		}
+	} else {
+		/* exited reset */
+		clear_bit(MR_INPROGRESS, &ifx_dev->mdm_reset_state);
+		if (solreset) {
+			set_bit(MR_COMPLETE, &ifx_dev->mdm_reset_state);
+			wake_up(&ifx_dev->mdm_reset_wait);
+		}
+	}
+	return IRQ_HANDLED;
+}
+
+/**
+ *	ifx_spi_free_device - free device
+ *	@ifx_dev: device to free
+ *
+ *	Free the IFX device
+ */
+static void ifx_spi_free_device(struct ifx_spi_device *ifx_dev)
+{
+	ifx_spi_free_port(ifx_dev);
+	dma_free_coherent(&ifx_dev->spi_dev->dev,
+				IFX_SPI_TRANSFER_SIZE,
+				ifx_dev->tx_buffer,
+				ifx_dev->tx_bus);
+	dma_free_coherent(&ifx_dev->spi_dev->dev,
+				IFX_SPI_TRANSFER_SIZE,
+				ifx_dev->rx_buffer,
+				ifx_dev->rx_bus);
+}
+
+/**
+ *	ifx_spi_reset	-	reset modem
+ *	@ifx_dev: modem to reset
+ *
+ *	Perform a reset on the modem
+ */
+static int ifx_spi_reset(struct ifx_spi_device *ifx_dev)
+{
+	int ret;
+	/*
+	 * set up modem power, reset
+	 *
+	 * delays are required on some platforms for the modem
+	 * to reset properly
+	 */
+	set_bit(MR_START, &ifx_dev->mdm_reset_state);
+	gpio_set_value(ifx_dev->gpio.po, 0);
+	gpio_set_value(ifx_dev->gpio.reset, 0);
+	msleep(25);
+	gpio_set_value(ifx_dev->gpio.reset, 1);
+	msleep(1);
+	gpio_set_value(ifx_dev->gpio.po, 1);
+	msleep(1);
+	gpio_set_value(ifx_dev->gpio.po, 0);
+	ret = wait_event_timeout(ifx_dev->mdm_reset_wait,
+				 test_bit(MR_COMPLETE,
+					  &ifx_dev->mdm_reset_state),
+				 IFX_RESET_TIMEOUT);
+	if (!ret)
+		dev_warn(&ifx_dev->spi_dev->dev, "Modem reset timeout: (state:%lx)",
+			 ifx_dev->mdm_reset_state);
+
+	ifx_dev->mdm_reset_state = 0;
+	return ret;
+}
+
+/**
+ *	ifx_spi_spi_probe	-	probe callback
+ *	@spi: our possible matching SPI device
+ *
+ *	Probe for a 6x60 modem on SPI bus. Perform any needed device and
+ *	GPIO setup.
+ *
+ *	FIXME:
+ *	-	Support for multiple devices
+ *	-	Split out MID specific GPIO handling eventually
+ */
+
+static int ifx_spi_spi_probe(struct spi_device *spi)
+{
+	int ret;
+	int srdy;
+	struct ifx_modem_platform_data *pl_data = NULL;
+	struct ifx_spi_device *ifx_dev;
+
+	if (saved_ifx_dev) {
+		dev_dbg(&spi->dev, "ignoring subsequent detection");
+		return -ENODEV;
+	}
+
+	/* initialize structure to hold our device variables */
+	ifx_dev = kzalloc(sizeof(struct ifx_spi_device), GFP_KERNEL);
+	if (!ifx_dev) {
+		dev_err(&spi->dev, "spi device allocation failed");
+		return -ENOMEM;
+	}
+	saved_ifx_dev = ifx_dev;
+	ifx_dev->spi_dev = spi;
+	clear_bit(IFX_SPI_STATE_IO_IN_PROGRESS, &ifx_dev->flags);
+	spin_lock_init(&ifx_dev->write_lock);
+	spin_lock_init(&ifx_dev->power_lock);
+	ifx_dev->power_status = 0;
+	init_timer(&ifx_dev->spi_timer);
+	ifx_dev->spi_timer.function = ifx_spi_timeout;
+	ifx_dev->spi_timer.data = (unsigned long)ifx_dev;
+	ifx_dev->is_6160 = pl_data->is_6160;
+
+	/* ensure SPI protocol flags are initialized to enable transfer */
+	ifx_dev->spi_more = 0;
+	ifx_dev->spi_slave_cts = 0;
+
+	/*initialize transfer and dma buffers */
+	ifx_dev->tx_buffer = dma_alloc_coherent(&ifx_dev->spi_dev->dev,
+				IFX_SPI_TRANSFER_SIZE,
+				&ifx_dev->tx_bus,
+				GFP_KERNEL);
+	if (!ifx_dev->tx_buffer) {
+		dev_err(&spi->dev, "DMA-TX buffer allocation failed");
+		ret = -ENOMEM;
+		goto error_ret;
+	}
+	ifx_dev->rx_buffer = dma_alloc_coherent(&ifx_dev->spi_dev->dev,
+				IFX_SPI_TRANSFER_SIZE,
+				&ifx_dev->rx_bus,
+				GFP_KERNEL);
+	if (!ifx_dev->rx_buffer) {
+		dev_err(&spi->dev, "DMA-RX buffer allocation failed");
+		ret = -ENOMEM;
+		goto error_ret;
+	}
+
+	/* initialize waitq for modem reset */
+	init_waitqueue_head(&ifx_dev->mdm_reset_wait);
+
+	spi_set_drvdata(spi, ifx_dev);
+	tasklet_init(&ifx_dev->io_work_tasklet, ifx_spi_io,
+						(unsigned long)ifx_dev);
+
+	set_bit(IFX_SPI_STATE_PRESENT, &ifx_dev->flags);
+
+	/* create our tty port */
+	ret = ifx_spi_create_port(ifx_dev);
+	if (ret != 0) {
+		dev_err(&spi->dev, "create default tty port failed");
+		goto error_ret;
+	}
+
+	pl_data = (struct ifx_modem_platform_data *)spi->dev.platform_data;
+	if (pl_data) {
+		ifx_dev->gpio.reset = pl_data->rst_pmu;
+		ifx_dev->gpio.po = pl_data->pwr_on;
+		ifx_dev->gpio.mrdy = pl_data->mrdy;
+		ifx_dev->gpio.srdy = pl_data->srdy;
+		ifx_dev->gpio.reset_out = pl_data->rst_out;
+	} else {
+		dev_err(&spi->dev, "missing platform data!");
+		ret = -ENODEV;
+		goto error_ret;
+	}
+
+	dev_info(&spi->dev, "gpios %d, %d, %d, %d, %d",
+		 ifx_dev->gpio.reset, ifx_dev->gpio.po, ifx_dev->gpio.mrdy,
+		 ifx_dev->gpio.srdy, ifx_dev->gpio.reset_out);
+
+	/* Configure gpios */
+	ret = gpio_request(ifx_dev->gpio.reset, "ifxModem");
+	if (ret < 0) {
+		dev_err(&spi->dev, "Unable to allocate GPIO%d (RESET)",
+			ifx_dev->gpio.reset);
+		goto error_ret;
+	}
+	ret += gpio_direction_output(ifx_dev->gpio.reset, 0);
+	ret += gpio_export(ifx_dev->gpio.reset, 1);
+	if (ret) {
+		dev_err(&spi->dev, "Unable to configure GPIO%d (RESET)",
+			ifx_dev->gpio.reset);
+		ret = -EBUSY;
+		goto error_ret2;
+	}
+
+	ret = gpio_request(ifx_dev->gpio.po, "ifxModem");
+	ret += gpio_direction_output(ifx_dev->gpio.po, 0);
+	ret += gpio_export(ifx_dev->gpio.po, 1);
+	if (ret) {
+		dev_err(&spi->dev, "Unable to configure GPIO%d (ON)",
+			ifx_dev->gpio.po);
+		ret = -EBUSY;
+		goto error_ret3;
+	}
+
+	ret = gpio_request(ifx_dev->gpio.mrdy, "ifxModem");
+	if (ret < 0) {
+		dev_err(&spi->dev, "Unable to allocate GPIO%d (MRDY)",
+			ifx_dev->gpio.mrdy);
+		goto error_ret3;
+	}
+	ret += gpio_export(ifx_dev->gpio.mrdy, 1);
+	ret += gpio_direction_output(ifx_dev->gpio.mrdy, 0);
+	if (ret) {
+		dev_err(&spi->dev, "Unable to configure GPIO%d (MRDY)",
+			ifx_dev->gpio.mrdy);
+		ret = -EBUSY;
+		goto error_ret4;
+	}
+
+	ret = gpio_request(ifx_dev->gpio.srdy, "ifxModem");
+	if (ret < 0) {
+		dev_err(&spi->dev, "Unable to allocate GPIO%d (SRDY)",
+			ifx_dev->gpio.srdy);
+		ret = -EBUSY;
+		goto error_ret4;
+	}
+	ret += gpio_export(ifx_dev->gpio.srdy, 1);
+	ret += gpio_direction_input(ifx_dev->gpio.srdy);
+	if (ret) {
+		dev_err(&spi->dev, "Unable to configure GPIO%d (SRDY)",
+			ifx_dev->gpio.srdy);
+		ret = -EBUSY;
+		goto error_ret5;
+	}
+
+	ret = gpio_request(ifx_dev->gpio.reset_out, "ifxModem");
+	if (ret < 0) {
+		dev_err(&spi->dev, "Unable to allocate GPIO%d (RESET_OUT)",
+			ifx_dev->gpio.reset_out);
+		goto error_ret5;
+	}
+	ret += gpio_export(ifx_dev->gpio.reset_out, 1);
+	ret += gpio_direction_input(ifx_dev->gpio.reset_out);
+	if (ret) {
+		dev_err(&spi->dev, "Unable to configure GPIO%d (RESET_OUT)",
+			ifx_dev->gpio.reset_out);
+		ret = -EBUSY;
+		goto error_ret6;
+	}
+
+	ret = request_irq(gpio_to_irq(ifx_dev->gpio.reset_out),
+			  ifx_spi_reset_interrupt,
+			  IRQF_TRIGGER_RISING|IRQF_TRIGGER_FALLING, DRVNAME,
+		(void *)ifx_dev);
+	if (ret) {
+		dev_err(&spi->dev, "Unable to get irq %x\n",
+			gpio_to_irq(ifx_dev->gpio.reset_out));
+		goto error_ret6;
+	}
+
+	ret = ifx_spi_reset(ifx_dev);
+
+	ret = request_irq(gpio_to_irq(ifx_dev->gpio.srdy),
+			  ifx_spi_srdy_interrupt,
+			  IRQF_TRIGGER_RISING, DRVNAME,
+			  (void *)ifx_dev);
+	if (ret) {
+		dev_err(&spi->dev, "Unable to get irq %x",
+			gpio_to_irq(ifx_dev->gpio.srdy));
+		goto error_ret6;
+	}
+
+	/* set pm runtime power state and register with power system */
+	pm_runtime_set_active(&spi->dev);
+	pm_runtime_enable(&spi->dev);
+
+	/* handle case that modem is already signaling SRDY */
+	/* no outgoing tty open at this point, this just satisfies the
+	 * modem's read and should reset communication properly
+	 */
+	srdy = gpio_get_value(ifx_dev->gpio.srdy);
+
+	if (srdy) {
+		mrdy_assert(ifx_dev);
+		ifx_spi_handle_srdy(ifx_dev);
+	} else
+		mrdy_set_low(ifx_dev);
+	return 0;
+
+error_ret6:
+	gpio_free(ifx_dev->gpio.srdy);
+error_ret5:
+	gpio_free(ifx_dev->gpio.mrdy);
+error_ret4:
+	gpio_free(ifx_dev->gpio.reset);
+error_ret3:
+	gpio_free(ifx_dev->gpio.po);
+error_ret2:
+	gpio_free(ifx_dev->gpio.reset_out);
+error_ret:
+	ifx_spi_free_device(ifx_dev);
+	saved_ifx_dev = NULL;
+	return ret;
+}
+
+/**
+ *	ifx_spi_spi_remove	-	SPI device was removed
+ *	@spi: SPI device
+ *
+ *	FIXME: We should be shutting the device down here not in
+ *	the module unload path.
+ */
+
+static int ifx_spi_spi_remove(struct spi_device *spi)
+{
+	struct ifx_spi_device *ifx_dev = spi_get_drvdata(spi);
+	/* stop activity */
+	tasklet_kill(&ifx_dev->io_work_tasklet);
+	/* free irq */
+	free_irq(gpio_to_irq(ifx_dev->gpio.reset_out), (void *)ifx_dev);
+	free_irq(gpio_to_irq(ifx_dev->gpio.srdy), (void *)ifx_dev);
+
+	gpio_free(ifx_dev->gpio.srdy);
+	gpio_free(ifx_dev->gpio.mrdy);
+	gpio_free(ifx_dev->gpio.reset);
+	gpio_free(ifx_dev->gpio.po);
+	gpio_free(ifx_dev->gpio.reset_out);
+
+	/* free allocations */
+	ifx_spi_free_device(ifx_dev);
+
+	saved_ifx_dev = NULL;
+	return 0;
+}
+
+/**
+ *	ifx_spi_spi_shutdown	-	called on SPI shutdown
+ *	@spi: SPI device
+ *
+ *	No action needs to be taken here
+ */
+
+static void ifx_spi_spi_shutdown(struct spi_device *spi)
+{
+}
+
+/*
+ * various suspends and resumes have nothing to do
+ * no hardware to save state for
+ */
+
+/**
+ *	ifx_spi_spi_suspend	-	suspend SPI on system suspend
+ *	@dev: device being suspended
+ *
+ *	Suspend the SPI side. No action needed on Intel MID platforms, may
+ *	need extending for other systems.
+ */
+static int ifx_spi_spi_suspend(struct spi_device *spi, pm_message_t msg)
+{
+	return 0;
+}
+
+/**
+ *	ifx_spi_spi_resume	-	resume SPI side on system resume
+ *	@dev: device being suspended
+ *
+ *	Suspend the SPI side. No action needed on Intel MID platforms, may
+ *	need extending for other systems.
+ */
+static int ifx_spi_spi_resume(struct spi_device *spi)
+{
+	return 0;
+}
+
+/**
+ *	ifx_spi_pm_suspend	-	suspend modem on system suspend
+ *	@dev: device being suspended
+ *
+ *	Suspend the modem. No action needed on Intel MID platforms, may
+ *	need extending for other systems.
+ */
+static int ifx_spi_pm_suspend(struct device *dev)
+{
+	return 0;
+}
+
+/**
+ *	ifx_spi_pm_resume	-	resume modem on system resume
+ *	@dev: device being suspended
+ *
+ *	Allow the modem to resume. No action needed.
+ *
+ *	FIXME: do we need to reset anything here ?
+ */
+static int ifx_spi_pm_resume(struct device *dev)
+{
+	return 0;
+}
+
+/**
+ *	ifx_spi_pm_runtime_resume	-	suspend modem
+ *	@dev: device being suspended
+ *
+ *	Allow the modem to resume. No action needed.
+ */
+static int ifx_spi_pm_runtime_resume(struct device *dev)
+{
+	return 0;
+}
+
+/**
+ *	ifx_spi_pm_runtime_suspend	-	suspend modem
+ *	@dev: device being suspended
+ *
+ *	Allow the modem to suspend and thus suspend to continue up the
+ *	device tree.
+ */
+static int ifx_spi_pm_runtime_suspend(struct device *dev)
+{
+	return 0;
+}
+
+/**
+ *	ifx_spi_pm_runtime_idle		-	check if modem idle
+ *	@dev: our device
+ *
+ *	Check conditions and queue runtime suspend if idle.
+ */
+static int ifx_spi_pm_runtime_idle(struct device *dev)
+{
+	struct spi_device *spi = to_spi_device(dev);
+	struct ifx_spi_device *ifx_dev = spi_get_drvdata(spi);
+
+	if (!ifx_dev->power_status)
+		pm_runtime_suspend(dev);
+
+	return 0;
+}
+
+static const struct dev_pm_ops ifx_spi_pm = {
+	.resume = ifx_spi_pm_resume,
+	.suspend = ifx_spi_pm_suspend,
+	.runtime_resume = ifx_spi_pm_runtime_resume,
+	.runtime_suspend = ifx_spi_pm_runtime_suspend,
+	.runtime_idle = ifx_spi_pm_runtime_idle
+};
+
+static const struct spi_device_id ifx_id_table[] = {
+	{"ifx6160", 0},
+	{"ifx6260", 0},
+	{ }
+};
+MODULE_DEVICE_TABLE(spi, ifx_id_table);
+
+/* spi operations */
+static const struct spi_driver ifx_spi_driver_6160 = {
+	.driver = {
+		.name = "ifx6160",
+		.bus = &spi_bus_type,
+		.pm = &ifx_spi_pm,
+		.owner = THIS_MODULE},
+	.probe = ifx_spi_spi_probe,
+	.shutdown = ifx_spi_spi_shutdown,
+	.remove = __devexit_p(ifx_spi_spi_remove),
+	.suspend = ifx_spi_spi_suspend,
+	.resume = ifx_spi_spi_resume,
+	.id_table = ifx_id_table
+};
+
+/**
+ *	ifx_spi_exit	-	module exit
+ *
+ *	Unload the module.
+ */
+
+static void __exit ifx_spi_exit(void)
+{
+	/* unregister */
+	tty_unregister_driver(tty_drv);
+	spi_unregister_driver((void *)&ifx_spi_driver_6160);
+}
+
+/**
+ *	ifx_spi_init		-	module entry point
+ *
+ *	Initialise the SPI and tty interfaces for the IFX SPI driver
+ *	We need to initialize upper-edge spi driver after the tty
+ *	driver because otherwise the spi probe will race
+ */
+
+static int __init ifx_spi_init(void)
+{
+	int result;
+
+	tty_drv = alloc_tty_driver(1);
+	if (!tty_drv) {
+		pr_err("%s: alloc_tty_driver failed", DRVNAME);
+		return -ENOMEM;
+	}
+
+	tty_drv->magic = TTY_DRIVER_MAGIC;
+	tty_drv->owner = THIS_MODULE;
+	tty_drv->driver_name = DRVNAME;
+	tty_drv->name = TTYNAME;
+	tty_drv->minor_start = IFX_SPI_TTY_ID;
+	tty_drv->num = 1;
+	tty_drv->type = TTY_DRIVER_TYPE_SERIAL;
+	tty_drv->subtype = SERIAL_TYPE_NORMAL;
+	tty_drv->flags = TTY_DRIVER_REAL_RAW | TTY_DRIVER_DYNAMIC_DEV;
+	tty_drv->init_termios = tty_std_termios;
+
+	tty_set_operations(tty_drv, &ifx_spi_serial_ops);
+
+	result = tty_register_driver(tty_drv);
+	if (result) {
+		pr_err("%s: tty_register_driver failed(%d)",
+			DRVNAME, result);
+		return result;
+	}
+
+	result = spi_register_driver((void *)&ifx_spi_driver_6160);
+	if (result) {
+		pr_err("%s: spi_register_driver failed(%d)",
+			DRVNAME, result);
+		tty_unregister_driver(tty_drv);
+	}
+	return result;
+}
+
+module_init(ifx_spi_init);
+module_exit(ifx_spi_exit);
+
+MODULE_AUTHOR("Intel");
+MODULE_DESCRIPTION("IFX6x60 spi driver");
+MODULE_LICENSE("GPL");
+MODULE_INFO(Version, "0.1-IFX6x60");
diff --git a/drivers/serial/ifx6x60.h b/drivers/serial/ifx6x60.h
new file mode 100644
index 000000000000..deb7b8d977dc
--- /dev/null
+++ b/drivers/serial/ifx6x60.h
@@ -0,0 +1,129 @@
+/****************************************************************************
+ *
+ * Driver for the IFX spi modem.
+ *
+ * Copyright (C) 2009, 2010 Intel Corp
+ * Jim Stanley <jim.stanley@intel.com>
+ *
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.	 See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301,
+ * USA
+ *
+ *
+ *
+ *****************************************************************************/
+#ifndef _IFX6X60_H
+#define _IFX6X60_H
+
+#define DRVNAME				"ifx6x60"
+#define TTYNAME				"ttyIFX"
+
+/* #define IFX_THROTTLE_CODE */
+
+#define IFX_SPI_MAX_MINORS		1
+#define IFX_SPI_TRANSFER_SIZE		2048
+#define IFX_SPI_FIFO_SIZE		4096
+
+#define IFX_SPI_HEADER_OVERHEAD		4
+#define IFX_RESET_TIMEOUT		msecs_to_jiffies(50)
+
+/* device flags bitfield definitions */
+#define IFX_SPI_STATE_PRESENT		0
+#define IFX_SPI_STATE_IO_IN_PROGRESS	1
+#define IFX_SPI_STATE_IO_READY		2
+#define IFX_SPI_STATE_TIMER_PENDING	3
+
+/* flow control bitfields */
+#define IFX_SPI_DCD			0
+#define IFX_SPI_CTS			1
+#define IFX_SPI_DSR			2
+#define IFX_SPI_RI			3
+#define IFX_SPI_DTR			4
+#define IFX_SPI_RTS			5
+#define IFX_SPI_TX_FC			6
+#define IFX_SPI_RX_FC			7
+#define IFX_SPI_UPDATE			8
+
+#define IFX_SPI_PAYLOAD_SIZE		(IFX_SPI_TRANSFER_SIZE - \
+						IFX_SPI_HEADER_OVERHEAD)
+
+#define IFX_SPI_IRQ_TYPE		DETECT_EDGE_RISING
+#define IFX_SPI_GPIO_TARGET		0
+#define IFX_SPI_GPIO0			0x105
+
+#define IFX_SPI_STATUS_TIMEOUT		(2000*HZ)
+
+/* values for bits in power status byte */
+#define IFX_SPI_POWER_DATA_PENDING	1
+#define IFX_SPI_POWER_SRDY		2
+
+struct ifx_spi_device {
+	/* Our SPI device */
+	struct spi_device *spi_dev;
+
+	/* Port specific data */
+	struct kfifo tx_fifo;
+	spinlock_t fifo_lock;
+	unsigned long signal_state;
+
+	/* TTY Layer logic */
+	struct tty_port tty_port;
+	struct device *tty_dev;
+	int minor;
+
+	/* Low level I/O work */
+	struct tasklet_struct io_work_tasklet;
+	unsigned long flags;
+	dma_addr_t rx_dma;
+	dma_addr_t tx_dma;
+
+	int is_6160;				/* Modem type */
+
+	spinlock_t write_lock;
+	int write_pending;
+	spinlock_t power_lock;
+	unsigned char power_status;
+
+	unsigned char *rx_buffer;
+	unsigned char *tx_buffer;
+	dma_addr_t rx_bus;
+	dma_addr_t tx_bus;
+	unsigned char spi_more;
+	unsigned char spi_slave_cts;
+
+	struct timer_list spi_timer;
+
+	struct spi_message spi_msg;
+	struct spi_transfer spi_xfer;
+
+	struct {
+		/* gpio lines */
+		unsigned short srdy;		/* slave-ready gpio */
+		unsigned short mrdy;		/* master-ready gpio */
+		unsigned short reset;		/* modem-reset gpio */
+		unsigned short po;		/* modem-on gpio */
+		unsigned short reset_out;	/* modem-in-reset gpio */
+		/* state/stats */
+		int unack_srdy_int_nb;
+	} gpio;
+
+	/* modem reset */
+	unsigned long mdm_reset_state;
+#define MR_START	0
+#define MR_INPROGRESS	1
+#define MR_COMPLETE	2
+	wait_queue_head_t mdm_reset_wait;
+};
+
+#endif /* _IFX6X60_H */
diff --git a/include/linux/spi/ifx_modem.h b/include/linux/spi/ifx_modem.h
new file mode 100644
index 000000000000..a68f3b19d112
--- /dev/null
+++ b/include/linux/spi/ifx_modem.h
@@ -0,0 +1,14 @@
+#ifndef LINUX_IFX_MODEM_H
+#define LINUX_IFX_MODEM_H
+
+struct ifx_modem_platform_data {
+	unsigned short rst_out; /* modem reset out */
+	unsigned short pwr_on;  /* power on */
+	unsigned short rst_pmu; /* reset modem */
+	unsigned short tx_pwr;  /* modem power threshold */
+	unsigned short srdy;    /* SRDY */
+	unsigned short mrdy;    /* MRDY */
+	unsigned short is_6160;	/* Modem type */
+};
+
+#endif
-- 
cgit v1.2.3-71-gd317


From 304e12665a4a7b8b25dfe8c64fa4fd56a04a67ea Mon Sep 17 00:00:00 2001
From: Alexey Charkov <alchark@gmail.com>
Date: Mon, 8 Nov 2010 20:33:20 +0300
Subject: serial: Add support for UART on VIA VT8500 and compatibles

This adds a driver for the serial ports found in VIA and WonderMedia
Systems-on-Chip. Interrupt-driven FIFO operation is implemented.
The hardware also supports pure register-based operation (which is
slower) and DMA-based FIFO operation. As the FIFOs are only 16 bytes
long, DMA operation is probably not worth the hassle.

Signed-off-by: Alexey Charkov <alchark@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/serial/Kconfig         |  10 +
 drivers/serial/Makefile        |   1 +
 drivers/serial/vt8500_serial.c | 648 +++++++++++++++++++++++++++++++++++++++++
 include/linux/serial_core.h    |   3 +
 4 files changed, 662 insertions(+)
 create mode 100644 drivers/serial/vt8500_serial.c

(limited to 'include/linux')

diff --git a/drivers/serial/Kconfig b/drivers/serial/Kconfig
index 0b9cc17b380b..388e37132cc9 100644
--- a/drivers/serial/Kconfig
+++ b/drivers/serial/Kconfig
@@ -1381,6 +1381,16 @@ config SERIAL_MSM_CONSOLE
 	depends on SERIAL_MSM=y
 	select SERIAL_CORE_CONSOLE
 
+config SERIAL_VT8500
+	bool "VIA VT8500 on-chip serial port support"
+	depends on ARM && ARCH_VT8500
+	select SERIAL_CORE
+
+config SERIAL_VT8500_CONSOLE
+	bool "VIA VT8500 serial console support"
+	depends on SERIAL_VT8500=y
+	select SERIAL_CORE_CONSOLE
+
 config SERIAL_NETX
 	tristate "NetX serial port support"
 	depends on ARM && ARCH_NETX
diff --git a/drivers/serial/Makefile b/drivers/serial/Makefile
index 783638b10698..a5e2264b2a80 100644
--- a/drivers/serial/Makefile
+++ b/drivers/serial/Makefile
@@ -86,6 +86,7 @@ obj-$(CONFIG_SERIAL_TIMBERDALE)	+= timbuart.o
 obj-$(CONFIG_SERIAL_GRLIB_GAISLER_APBUART) += apbuart.o
 obj-$(CONFIG_SERIAL_ALTERA_JTAGUART) += altera_jtaguart.o
 obj-$(CONFIG_SERIAL_ALTERA_UART) += altera_uart.o
+obj-$(CONFIG_SERIAL_VT8500) += vt8500_serial.o
 obj-$(CONFIG_SERIAL_MRST_MAX3110)	+= mrst_max3110.o
 obj-$(CONFIG_SERIAL_MFD_HSU)	+= mfd.o
 obj-$(CONFIG_SERIAL_OMAP) += omap-serial.o
diff --git a/drivers/serial/vt8500_serial.c b/drivers/serial/vt8500_serial.c
new file mode 100644
index 000000000000..322bf56c0d89
--- /dev/null
+++ b/drivers/serial/vt8500_serial.c
@@ -0,0 +1,648 @@
+/*
+ * drivers/serial/vt8500_serial.c
+ *
+ * Copyright (C) 2010 Alexey Charkov <alchark@gmail.com>
+ *
+ * Based on msm_serial.c, which is:
+ * Copyright (C) 2007 Google, Inc.
+ * Author: Robert Love <rlove@google.com>
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#if defined(CONFIG_SERIAL_VT8500_CONSOLE) && defined(CONFIG_MAGIC_SYSRQ)
+# define SUPPORT_SYSRQ
+#endif
+
+#include <linux/hrtimer.h>
+#include <linux/delay.h>
+#include <linux/module.h>
+#include <linux/io.h>
+#include <linux/ioport.h>
+#include <linux/irq.h>
+#include <linux/init.h>
+#include <linux/console.h>
+#include <linux/tty.h>
+#include <linux/tty_flip.h>
+#include <linux/serial_core.h>
+#include <linux/serial.h>
+#include <linux/slab.h>
+#include <linux/clk.h>
+#include <linux/platform_device.h>
+
+/*
+ * UART Register offsets
+ */
+
+#define VT8500_URTDR		0x0000	/* Transmit data */
+#define VT8500_URRDR		0x0004	/* Receive data */
+#define VT8500_URDIV		0x0008	/* Clock/Baud rate divisor */
+#define VT8500_URLCR		0x000C	/* Line control */
+#define VT8500_URICR		0x0010	/* IrDA control */
+#define VT8500_URIER		0x0014	/* Interrupt enable */
+#define VT8500_URISR		0x0018	/* Interrupt status */
+#define VT8500_URUSR		0x001c	/* UART status */
+#define VT8500_URFCR		0x0020	/* FIFO control */
+#define VT8500_URFIDX		0x0024	/* FIFO index */
+#define VT8500_URBKR		0x0028	/* Break signal count */
+#define VT8500_URTOD		0x002c	/* Time out divisor */
+#define VT8500_TXFIFO		0x1000	/* Transmit FIFO (16x8) */
+#define VT8500_RXFIFO		0x1020	/* Receive FIFO (16x10) */
+
+/*
+ * Interrupt enable and status bits
+ */
+
+#define TXDE	(1 << 0)	/* Tx Data empty */
+#define RXDF	(1 << 1)	/* Rx Data full */
+#define TXFAE	(1 << 2)	/* Tx FIFO almost empty */
+#define TXFE	(1 << 3)	/* Tx FIFO empty */
+#define RXFAF	(1 << 4)	/* Rx FIFO almost full */
+#define RXFF	(1 << 5)	/* Rx FIFO full */
+#define TXUDR	(1 << 6)	/* Tx underrun */
+#define RXOVER	(1 << 7)	/* Rx overrun */
+#define PER	(1 << 8)	/* Parity error */
+#define FER	(1 << 9)	/* Frame error */
+#define TCTS	(1 << 10)	/* Toggle of CTS */
+#define RXTOUT	(1 << 11)	/* Rx timeout */
+#define BKDONE	(1 << 12)	/* Break signal done */
+#define ERR	(1 << 13)	/* AHB error response */
+
+#define RX_FIFO_INTS	(RXFAF | RXFF | RXOVER | PER | FER | RXTOUT)
+#define TX_FIFO_INTS	(TXFAE | TXFE | TXUDR)
+
+struct vt8500_port {
+	struct uart_port	uart;
+	char			name[16];
+	struct clk		*clk;
+	unsigned int		ier;
+};
+
+static inline void vt8500_write(struct uart_port *port, unsigned int val,
+			     unsigned int off)
+{
+	writel(val, port->membase + off);
+}
+
+static inline unsigned int vt8500_read(struct uart_port *port, unsigned int off)
+{
+	return readl(port->membase + off);
+}
+
+static void vt8500_stop_tx(struct uart_port *port)
+{
+	struct vt8500_port *vt8500_port = container_of(port,
+						       struct vt8500_port,
+						       uart);
+
+	vt8500_port->ier &= ~TX_FIFO_INTS;
+	vt8500_write(port, vt8500_port->ier, VT8500_URIER);
+}
+
+static void vt8500_stop_rx(struct uart_port *port)
+{
+	struct vt8500_port *vt8500_port = container_of(port,
+						       struct vt8500_port,
+						       uart);
+
+	vt8500_port->ier &= ~RX_FIFO_INTS;
+	vt8500_write(port, vt8500_port->ier, VT8500_URIER);
+}
+
+static void vt8500_enable_ms(struct uart_port *port)
+{
+	struct vt8500_port *vt8500_port = container_of(port,
+						       struct vt8500_port,
+						       uart);
+
+	vt8500_port->ier |= TCTS;
+	vt8500_write(port, vt8500_port->ier, VT8500_URIER);
+}
+
+static void handle_rx(struct uart_port *port)
+{
+	struct tty_struct *tty = tty_port_tty_get(&port->state->port);
+	if (!tty) {
+		/* Discard data: no tty available */
+		int count = (vt8500_read(port, VT8500_URFIDX) & 0x1f00) >> 8;
+		u16 ch;
+		while (count--)
+			ch = readw(port->membase + VT8500_RXFIFO);
+		return;
+	}
+
+	/*
+	 * Handle overrun
+	 */
+	if ((vt8500_read(port, VT8500_URISR) & RXOVER)) {
+		port->icount.overrun++;
+		tty_insert_flip_char(tty, 0, TTY_OVERRUN);
+	}
+
+	/* and now the main RX loop */
+	while (vt8500_read(port, VT8500_URFIDX) & 0x1f00) {
+		unsigned int c;
+		char flag = TTY_NORMAL;
+
+		c = readw(port->membase + VT8500_RXFIFO) & 0x3ff;
+
+		/* Mask conditions we're ignorning. */
+		c &= ~port->read_status_mask;
+
+		if (c & FER) {
+			port->icount.frame++;
+			flag = TTY_FRAME;
+		} else if (c & PER) {
+			port->icount.parity++;
+			flag = TTY_PARITY;
+		}
+		port->icount.rx++;
+
+		if (!uart_handle_sysrq_char(port, c))
+			tty_insert_flip_char(tty, c, flag);
+	}
+
+	tty_flip_buffer_push(tty);
+	tty_kref_put(tty);
+}
+
+static void handle_tx(struct uart_port *port)
+{
+	struct circ_buf *xmit = &port->state->xmit;
+
+	if (port->x_char) {
+		writeb(port->x_char, port->membase + VT8500_TXFIFO);
+		port->icount.tx++;
+		port->x_char = 0;
+	}
+	if (uart_circ_empty(xmit) || uart_tx_stopped(port)) {
+		vt8500_stop_tx(port);
+		return;
+	}
+
+	while ((vt8500_read(port, VT8500_URFIDX) & 0x1f) < 16) {
+		if (uart_circ_empty(xmit))
+			break;
+
+		writeb(xmit->buf[xmit->tail], port->membase + VT8500_TXFIFO);
+
+		xmit->tail = (xmit->tail + 1) & (UART_XMIT_SIZE - 1);
+		port->icount.tx++;
+	}
+
+	if (uart_circ_chars_pending(xmit) < WAKEUP_CHARS)
+		uart_write_wakeup(port);
+
+	if (uart_circ_empty(xmit))
+		vt8500_stop_tx(port);
+}
+
+static void vt8500_start_tx(struct uart_port *port)
+{
+	struct vt8500_port *vt8500_port = container_of(port,
+						       struct vt8500_port,
+						       uart);
+
+	vt8500_port->ier &= ~TX_FIFO_INTS;
+	vt8500_write(port, vt8500_port->ier, VT8500_URIER);
+	handle_tx(port);
+	vt8500_port->ier |= TX_FIFO_INTS;
+	vt8500_write(port, vt8500_port->ier, VT8500_URIER);
+}
+
+static void handle_delta_cts(struct uart_port *port)
+{
+	port->icount.cts++;
+	wake_up_interruptible(&port->state->port.delta_msr_wait);
+}
+
+static irqreturn_t vt8500_irq(int irq, void *dev_id)
+{
+	struct uart_port *port = dev_id;
+	unsigned long isr;
+
+	spin_lock(&port->lock);
+	isr = vt8500_read(port, VT8500_URISR);
+
+	/* Acknowledge active status bits */
+	vt8500_write(port, isr, VT8500_URISR);
+
+	if (isr & RX_FIFO_INTS)
+		handle_rx(port);
+	if (isr & TX_FIFO_INTS)
+		handle_tx(port);
+	if (isr & TCTS)
+		handle_delta_cts(port);
+
+	spin_unlock(&port->lock);
+
+	return IRQ_HANDLED;
+}
+
+static unsigned int vt8500_tx_empty(struct uart_port *port)
+{
+	return (vt8500_read(port, VT8500_URFIDX) & 0x1f) < 16 ?
+						TIOCSER_TEMT : 0;
+}
+
+static unsigned int vt8500_get_mctrl(struct uart_port *port)
+{
+	unsigned int usr;
+
+	usr = vt8500_read(port, VT8500_URUSR);
+	if (usr & (1 << 4))
+		return TIOCM_CTS;
+	else
+		return 0;
+}
+
+static void vt8500_set_mctrl(struct uart_port *port, unsigned int mctrl)
+{
+}
+
+static void vt8500_break_ctl(struct uart_port *port, int break_ctl)
+{
+	if (break_ctl)
+		vt8500_write(port, vt8500_read(port, VT8500_URLCR) | (1 << 9),
+			     VT8500_URLCR);
+}
+
+static int vt8500_set_baud_rate(struct uart_port *port, unsigned int baud)
+{
+	unsigned long div;
+	unsigned int loops = 1000;
+
+	div = vt8500_read(port, VT8500_URDIV) & ~(0x3ff);
+
+	if (unlikely((baud < 900) || (baud > 921600)))
+		div |= 7;
+	else
+		div |= (921600 / baud) - 1;
+
+	while ((vt8500_read(port, VT8500_URUSR) & (1 << 5)) && --loops)
+		cpu_relax();
+	vt8500_write(port, div, VT8500_URDIV);
+
+	return baud;
+}
+
+static int vt8500_startup(struct uart_port *port)
+{
+	struct vt8500_port *vt8500_port =
+			container_of(port, struct vt8500_port, uart);
+	int ret;
+
+	snprintf(vt8500_port->name, sizeof(vt8500_port->name),
+		 "vt8500_serial%d", port->line);
+
+	ret = request_irq(port->irq, vt8500_irq, IRQF_TRIGGER_HIGH,
+			  vt8500_port->name, port);
+	if (unlikely(ret))
+		return ret;
+
+	vt8500_write(port, 0x03, VT8500_URLCR);	/* enable TX & RX */
+
+	return 0;
+}
+
+static void vt8500_shutdown(struct uart_port *port)
+{
+	struct vt8500_port *vt8500_port =
+			container_of(port, struct vt8500_port, uart);
+
+	vt8500_port->ier = 0;
+
+	/* disable interrupts and FIFOs */
+	vt8500_write(&vt8500_port->uart, 0, VT8500_URIER);
+	vt8500_write(&vt8500_port->uart, 0x880, VT8500_URFCR);
+	free_irq(port->irq, port);
+}
+
+static void vt8500_set_termios(struct uart_port *port,
+			       struct ktermios *termios,
+			       struct ktermios *old)
+{
+	struct vt8500_port *vt8500_port =
+			container_of(port, struct vt8500_port, uart);
+	unsigned long flags;
+	unsigned int baud, lcr;
+	unsigned int loops = 1000;
+
+	spin_lock_irqsave(&port->lock, flags);
+
+	/* calculate and set baud rate */
+	baud = uart_get_baud_rate(port, termios, old, 900, 921600);
+	baud = vt8500_set_baud_rate(port, baud);
+	if (tty_termios_baud_rate(termios))
+		tty_termios_encode_baud_rate(termios, baud, baud);
+
+	/* calculate parity */
+	lcr = vt8500_read(&vt8500_port->uart, VT8500_URLCR);
+	lcr &= ~((1 << 5) | (1 << 4));
+	if (termios->c_cflag & PARENB) {
+		lcr |= (1 << 4);
+		termios->c_cflag &= ~CMSPAR;
+		if (termios->c_cflag & PARODD)
+			lcr |= (1 << 5);
+	}
+
+	/* calculate bits per char */
+	lcr &= ~(1 << 2);
+	switch (termios->c_cflag & CSIZE) {
+	case CS7:
+		break;
+	case CS8:
+	default:
+		lcr |= (1 << 2);
+		termios->c_cflag &= ~CSIZE;
+		termios->c_cflag |= CS8;
+		break;
+	}
+
+	/* calculate stop bits */
+	lcr &= ~(1 << 3);
+	if (termios->c_cflag & CSTOPB)
+		lcr |= (1 << 3);
+
+	/* set parity, bits per char, and stop bit */
+	vt8500_write(&vt8500_port->uart, lcr, VT8500_URLCR);
+
+	/* Configure status bits to ignore based on termio flags. */
+	port->read_status_mask = 0;
+	if (termios->c_iflag & IGNPAR)
+		port->read_status_mask = FER | PER;
+
+	uart_update_timeout(port, termios->c_cflag, baud);
+
+	/* Reset FIFOs */
+	vt8500_write(&vt8500_port->uart, 0x88c, VT8500_URFCR);
+	while ((vt8500_read(&vt8500_port->uart, VT8500_URFCR) & 0xc)
+							&& --loops)
+		cpu_relax();
+
+	/* Every possible FIFO-related interrupt */
+	vt8500_port->ier = RX_FIFO_INTS | TX_FIFO_INTS;
+
+	/*
+	 * CTS flow control
+	 */
+	if (UART_ENABLE_MS(&vt8500_port->uart, termios->c_cflag))
+		vt8500_port->ier |= TCTS;
+
+	vt8500_write(&vt8500_port->uart, 0x881, VT8500_URFCR);
+	vt8500_write(&vt8500_port->uart, vt8500_port->ier, VT8500_URIER);
+
+	spin_unlock_irqrestore(&port->lock, flags);
+}
+
+static const char *vt8500_type(struct uart_port *port)
+{
+	struct vt8500_port *vt8500_port =
+			container_of(port, struct vt8500_port, uart);
+	return vt8500_port->name;
+}
+
+static void vt8500_release_port(struct uart_port *port)
+{
+}
+
+static int vt8500_request_port(struct uart_port *port)
+{
+	return 0;
+}
+
+static void vt8500_config_port(struct uart_port *port, int flags)
+{
+	port->type = PORT_VT8500;
+}
+
+static int vt8500_verify_port(struct uart_port *port,
+			      struct serial_struct *ser)
+{
+	if (unlikely(ser->type != PORT_UNKNOWN && ser->type != PORT_VT8500))
+		return -EINVAL;
+	if (unlikely(port->irq != ser->irq))
+		return -EINVAL;
+	return 0;
+}
+
+static struct vt8500_port *vt8500_uart_ports[4];
+static struct uart_driver vt8500_uart_driver;
+
+#ifdef CONFIG_SERIAL_VT8500_CONSOLE
+
+static inline void wait_for_xmitr(struct uart_port *port)
+{
+	unsigned int status, tmout = 10000;
+
+	/* Wait up to 10ms for the character(s) to be sent. */
+	do {
+		status = vt8500_read(port, VT8500_URFIDX);
+
+		if (--tmout == 0)
+			break;
+		udelay(1);
+	} while (status & 0x10);
+}
+
+static void vt8500_console_putchar(struct uart_port *port, int c)
+{
+	wait_for_xmitr(port);
+	writeb(c, port->membase + VT8500_TXFIFO);
+}
+
+static void vt8500_console_write(struct console *co, const char *s,
+			      unsigned int count)
+{
+	struct vt8500_port *vt8500_port = vt8500_uart_ports[co->index];
+	unsigned long ier;
+
+	BUG_ON(co->index < 0 || co->index >= vt8500_uart_driver.nr);
+
+	ier = vt8500_read(&vt8500_port->uart, VT8500_URIER);
+	vt8500_write(&vt8500_port->uart, VT8500_URIER, 0);
+
+	uart_console_write(&vt8500_port->uart, s, count,
+			   vt8500_console_putchar);
+
+	/*
+	 *	Finally, wait for transmitter to become empty
+	 *	and switch back to FIFO
+	 */
+	wait_for_xmitr(&vt8500_port->uart);
+	vt8500_write(&vt8500_port->uart, VT8500_URIER, ier);
+}
+
+static int __init vt8500_console_setup(struct console *co, char *options)
+{
+	struct vt8500_port *vt8500_port;
+	int baud = 9600;
+	int bits = 8;
+	int parity = 'n';
+	int flow = 'n';
+
+	if (unlikely(co->index >= vt8500_uart_driver.nr || co->index < 0))
+		return -ENXIO;
+
+	vt8500_port = vt8500_uart_ports[co->index];
+
+	if (!vt8500_port)
+		return -ENODEV;
+
+	if (options)
+		uart_parse_options(options, &baud, &parity, &bits, &flow);
+
+	return uart_set_options(&vt8500_port->uart,
+				 co, baud, parity, bits, flow);
+}
+
+static struct console vt8500_console = {
+	.name = "ttyWMT",
+	.write = vt8500_console_write,
+	.device = uart_console_device,
+	.setup = vt8500_console_setup,
+	.flags = CON_PRINTBUFFER,
+	.index = -1,
+	.data = &vt8500_uart_driver,
+};
+
+#define VT8500_CONSOLE	(&vt8500_console)
+
+#else
+#define VT8500_CONSOLE	NULL
+#endif
+
+static struct uart_ops vt8500_uart_pops = {
+	.tx_empty	= vt8500_tx_empty,
+	.set_mctrl	= vt8500_set_mctrl,
+	.get_mctrl	= vt8500_get_mctrl,
+	.stop_tx	= vt8500_stop_tx,
+	.start_tx	= vt8500_start_tx,
+	.stop_rx	= vt8500_stop_rx,
+	.enable_ms	= vt8500_enable_ms,
+	.break_ctl	= vt8500_break_ctl,
+	.startup	= vt8500_startup,
+	.shutdown	= vt8500_shutdown,
+	.set_termios	= vt8500_set_termios,
+	.type		= vt8500_type,
+	.release_port	= vt8500_release_port,
+	.request_port	= vt8500_request_port,
+	.config_port	= vt8500_config_port,
+	.verify_port	= vt8500_verify_port,
+};
+
+static struct uart_driver vt8500_uart_driver = {
+	.owner		= THIS_MODULE,
+	.driver_name	= "vt8500_serial",
+	.dev_name	= "ttyWMT",
+	.nr		= 6,
+	.cons		= VT8500_CONSOLE,
+};
+
+static int __init vt8500_serial_probe(struct platform_device *pdev)
+{
+	struct vt8500_port *vt8500_port;
+	struct resource *mmres, *irqres;
+	int ret;
+
+	mmres = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	irqres = platform_get_resource(pdev, IORESOURCE_IRQ, 0);
+	if (!mmres || !irqres)
+		return -ENODEV;
+
+	vt8500_port = kzalloc(sizeof(struct vt8500_port), GFP_KERNEL);
+	if (!vt8500_port)
+		return -ENOMEM;
+
+	vt8500_port->uart.type = PORT_VT8500;
+	vt8500_port->uart.iotype = UPIO_MEM;
+	vt8500_port->uart.mapbase = mmres->start;
+	vt8500_port->uart.irq = irqres->start;
+	vt8500_port->uart.fifosize = 16;
+	vt8500_port->uart.ops = &vt8500_uart_pops;
+	vt8500_port->uart.line = pdev->id;
+	vt8500_port->uart.dev = &pdev->dev;
+	vt8500_port->uart.flags = UPF_IOREMAP | UPF_BOOT_AUTOCONF;
+	vt8500_port->uart.uartclk = 24000000;
+
+	snprintf(vt8500_port->name, sizeof(vt8500_port->name),
+		 "VT8500 UART%d", pdev->id);
+
+	vt8500_port->uart.membase = ioremap(mmres->start,
+					    mmres->end - mmres->start + 1);
+	if (!vt8500_port->uart.membase) {
+		ret = -ENOMEM;
+		goto err;
+	}
+
+	vt8500_uart_ports[pdev->id] = vt8500_port;
+
+	uart_add_one_port(&vt8500_uart_driver, &vt8500_port->uart);
+
+	platform_set_drvdata(pdev, vt8500_port);
+
+	return 0;
+
+err:
+	kfree(vt8500_port);
+	return ret;
+}
+
+static int __devexit vt8500_serial_remove(struct platform_device *pdev)
+{
+	struct vt8500_port *vt8500_port = platform_get_drvdata(pdev);
+
+	platform_set_drvdata(pdev, NULL);
+	uart_remove_one_port(&vt8500_uart_driver, &vt8500_port->uart);
+	kfree(vt8500_port);
+
+	return 0;
+}
+
+static struct platform_driver vt8500_platform_driver = {
+	.probe  = vt8500_serial_probe,
+	.remove = vt8500_serial_remove,
+	.driver = {
+		.name = "vt8500_serial",
+		.owner = THIS_MODULE,
+	},
+};
+
+static int __init vt8500_serial_init(void)
+{
+	int ret;
+
+	ret = uart_register_driver(&vt8500_uart_driver);
+	if (unlikely(ret))
+		return ret;
+
+	ret = platform_driver_register(&vt8500_platform_driver);
+
+	if (unlikely(ret))
+		uart_unregister_driver(&vt8500_uart_driver);
+
+	return ret;
+}
+
+static void __exit vt8500_serial_exit(void)
+{
+#ifdef CONFIG_SERIAL_VT8500_CONSOLE
+	unregister_console(&vt8500_console);
+#endif
+	platform_driver_unregister(&vt8500_platform_driver);
+	uart_unregister_driver(&vt8500_uart_driver);
+}
+
+module_init(vt8500_serial_init);
+module_exit(vt8500_serial_exit);
+
+MODULE_AUTHOR("Alexey Charkov <alchark@gmail.com>");
+MODULE_DESCRIPTION("Driver for vt8500 serial device");
+MODULE_LICENSE("GPL");
diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h
index 212eb4c67797..41603d690433 100644
--- a/include/linux/serial_core.h
+++ b/include/linux/serial_core.h
@@ -199,6 +199,9 @@
 /* TI OMAP-UART */
 #define PORT_OMAP	96
 
+/* VIA VT8500 SoC */
+#define PORT_VT8500	97
+
 #ifdef __KERNEL__
 
 #include <linux/compiler.h>
-- 
cgit v1.2.3-71-gd317


From 1d7138de878d1d4210727c1200193e69596f93b3 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Fri, 12 Nov 2010 05:46:50 +0000
Subject: igmp: RCU conversion of in_dev->mc_list
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

in_dev->mc_list is protected by one rwlock (in_dev->mc_list_lock).

This can easily be converted to a RCU protection.

Writers hold RTNL, so mc_list_lock is removed, not replaced by a
spinlock.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Cc: Cypher Wu <cypher.w@gmail.com>
Cc: Américo Wang <xiyou.wangcong@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/igmp.h       |  12 ++-
 include/linux/inetdevice.h |   5 +-
 include/net/inet_sock.h    |   2 +-
 net/ipv4/igmp.c            | 223 +++++++++++++++++++++------------------------
 4 files changed, 115 insertions(+), 127 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/igmp.h b/include/linux/igmp.h
index 93fc2449af10..7d164670f264 100644
--- a/include/linux/igmp.h
+++ b/include/linux/igmp.h
@@ -167,10 +167,10 @@ struct ip_sf_socklist {
  */
 
 struct ip_mc_socklist {
-	struct ip_mc_socklist	*next;
+	struct ip_mc_socklist __rcu *next_rcu;
 	struct ip_mreqn		multi;
 	unsigned int		sfmode;		/* MCAST_{INCLUDE,EXCLUDE} */
-	struct ip_sf_socklist	*sflist;
+	struct ip_sf_socklist __rcu	*sflist;
 	struct rcu_head		rcu;
 };
 
@@ -186,11 +186,14 @@ struct ip_sf_list {
 struct ip_mc_list {
 	struct in_device	*interface;
 	__be32			multiaddr;
+	unsigned int		sfmode;
 	struct ip_sf_list	*sources;
 	struct ip_sf_list	*tomb;
-	unsigned int		sfmode;
 	unsigned long		sfcount[2];
-	struct ip_mc_list	*next;
+	union {
+		struct ip_mc_list *next;
+		struct ip_mc_list __rcu *next_rcu;
+	};
 	struct timer_list	timer;
 	int			users;
 	atomic_t		refcnt;
@@ -201,6 +204,7 @@ struct ip_mc_list {
 	char			loaded;
 	unsigned char		gsquery;	/* check source marks? */
 	unsigned char		crcount;
+	struct rcu_head		rcu;
 };
 
 /* V3 exponential field decoding */
diff --git a/include/linux/inetdevice.h b/include/linux/inetdevice.h
index ccd5b07d678d..380ba6bc5db1 100644
--- a/include/linux/inetdevice.h
+++ b/include/linux/inetdevice.h
@@ -52,9 +52,8 @@ struct in_device {
 	atomic_t		refcnt;
 	int			dead;
 	struct in_ifaddr	*ifa_list;	/* IP ifaddr chain		*/
-	rwlock_t		mc_list_lock;
-	struct ip_mc_list	*mc_list;	/* IP multicast filter chain    */
-	int			mc_count;	          /* Number of installed mcasts	*/
+	struct ip_mc_list __rcu	*mc_list;	/* IP multicast filter chain    */
+	int			mc_count;	/* Number of installed mcasts	*/
 	spinlock_t		mc_tomb_lock;
 	struct ip_mc_list	*mc_tomb;
 	unsigned long		mr_v1_seen;
diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h
index 1989cfd7405f..8945f9fb192a 100644
--- a/include/net/inet_sock.h
+++ b/include/net/inet_sock.h
@@ -141,7 +141,7 @@ struct inet_sock {
 				nodefrag:1;
 	int			mc_index;
 	__be32			mc_addr;
-	struct ip_mc_socklist	*mc_list;
+	struct ip_mc_socklist __rcu	*mc_list;
 	struct {
 		unsigned int		flags;
 		unsigned int		fragsize;
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index 08d0d81ffc15..6f49d6c087da 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -149,11 +149,17 @@ static void ip_mc_clear_src(struct ip_mc_list *pmc);
 static int ip_mc_add_src(struct in_device *in_dev, __be32 *pmca, int sfmode,
 			 int sfcount, __be32 *psfsrc, int delta);
 
+
+static void ip_mc_list_reclaim(struct rcu_head *head)
+{
+	kfree(container_of(head, struct ip_mc_list, rcu));
+}
+
 static void ip_ma_put(struct ip_mc_list *im)
 {
 	if (atomic_dec_and_test(&im->refcnt)) {
 		in_dev_put(im->interface);
-		kfree(im);
+		call_rcu(&im->rcu, ip_mc_list_reclaim);
 	}
 }
 
@@ -163,7 +169,7 @@ static void ip_ma_put(struct ip_mc_list *im)
  *	Timer management
  */
 
-static __inline__ void igmp_stop_timer(struct ip_mc_list *im)
+static void igmp_stop_timer(struct ip_mc_list *im)
 {
 	spin_lock_bh(&im->lock);
 	if (del_timer(&im->timer))
@@ -496,14 +502,24 @@ empty_source:
 	return skb;
 }
 
+#define for_each_pmc_rcu(in_dev, pmc)				\
+	for (pmc = rcu_dereference(in_dev->mc_list);		\
+	     pmc != NULL;					\
+	     pmc = rcu_dereference(pmc->next_rcu))
+
+#define for_each_pmc_rtnl(in_dev, pmc)				\
+	for (pmc = rtnl_dereference(in_dev->mc_list);		\
+	     pmc != NULL;					\
+	     pmc = rtnl_dereference(pmc->next_rcu))
+
 static int igmpv3_send_report(struct in_device *in_dev, struct ip_mc_list *pmc)
 {
 	struct sk_buff *skb = NULL;
 	int type;
 
 	if (!pmc) {
-		read_lock(&in_dev->mc_list_lock);
-		for (pmc=in_dev->mc_list; pmc; pmc=pmc->next) {
+		rcu_read_lock();
+		for_each_pmc_rcu(in_dev, pmc) {
 			if (pmc->multiaddr == IGMP_ALL_HOSTS)
 				continue;
 			spin_lock_bh(&pmc->lock);
@@ -514,7 +530,7 @@ static int igmpv3_send_report(struct in_device *in_dev, struct ip_mc_list *pmc)
 			skb = add_grec(skb, pmc, type, 0, 0);
 			spin_unlock_bh(&pmc->lock);
 		}
-		read_unlock(&in_dev->mc_list_lock);
+		rcu_read_unlock();
 	} else {
 		spin_lock_bh(&pmc->lock);
 		if (pmc->sfcount[MCAST_EXCLUDE])
@@ -556,7 +572,7 @@ static void igmpv3_send_cr(struct in_device *in_dev)
 	struct sk_buff *skb = NULL;
 	int type, dtype;
 
-	read_lock(&in_dev->mc_list_lock);
+	rcu_read_lock();
 	spin_lock_bh(&in_dev->mc_tomb_lock);
 
 	/* deleted MCA's */
@@ -593,7 +609,7 @@ static void igmpv3_send_cr(struct in_device *in_dev)
 	spin_unlock_bh(&in_dev->mc_tomb_lock);
 
 	/* change recs */
-	for (pmc=in_dev->mc_list; pmc; pmc=pmc->next) {
+	for_each_pmc_rcu(in_dev, pmc) {
 		spin_lock_bh(&pmc->lock);
 		if (pmc->sfcount[MCAST_EXCLUDE]) {
 			type = IGMPV3_BLOCK_OLD_SOURCES;
@@ -616,7 +632,7 @@ static void igmpv3_send_cr(struct in_device *in_dev)
 		}
 		spin_unlock_bh(&pmc->lock);
 	}
-	read_unlock(&in_dev->mc_list_lock);
+	rcu_read_unlock();
 
 	if (!skb)
 		return;
@@ -813,14 +829,14 @@ static void igmp_heard_report(struct in_device *in_dev, __be32 group)
 	if (group == IGMP_ALL_HOSTS)
 		return;
 
-	read_lock(&in_dev->mc_list_lock);
-	for (im=in_dev->mc_list; im!=NULL; im=im->next) {
+	rcu_read_lock();
+	for_each_pmc_rcu(in_dev, im) {
 		if (im->multiaddr == group) {
 			igmp_stop_timer(im);
 			break;
 		}
 	}
-	read_unlock(&in_dev->mc_list_lock);
+	rcu_read_unlock();
 }
 
 static void igmp_heard_query(struct in_device *in_dev, struct sk_buff *skb,
@@ -906,8 +922,8 @@ static void igmp_heard_query(struct in_device *in_dev, struct sk_buff *skb,
 	 * - Use the igmp->igmp_code field as the maximum
 	 *   delay possible
 	 */
-	read_lock(&in_dev->mc_list_lock);
-	for (im=in_dev->mc_list; im!=NULL; im=im->next) {
+	rcu_read_lock();
+	for_each_pmc_rcu(in_dev, im) {
 		int changed;
 
 		if (group && group != im->multiaddr)
@@ -925,7 +941,7 @@ static void igmp_heard_query(struct in_device *in_dev, struct sk_buff *skb,
 		if (changed)
 			igmp_mod_timer(im, max_delay);
 	}
-	read_unlock(&in_dev->mc_list_lock);
+	rcu_read_unlock();
 }
 
 /* called in rcu_read_lock() section */
@@ -1110,8 +1126,8 @@ static void igmpv3_clear_delrec(struct in_device *in_dev)
 		kfree(pmc);
 	}
 	/* clear dead sources, too */
-	read_lock(&in_dev->mc_list_lock);
-	for (pmc=in_dev->mc_list; pmc; pmc=pmc->next) {
+	rcu_read_lock();
+	for_each_pmc_rcu(in_dev, pmc) {
 		struct ip_sf_list *psf, *psf_next;
 
 		spin_lock_bh(&pmc->lock);
@@ -1123,7 +1139,7 @@ static void igmpv3_clear_delrec(struct in_device *in_dev)
 			kfree(psf);
 		}
 	}
-	read_unlock(&in_dev->mc_list_lock);
+	rcu_read_unlock();
 }
 #endif
 
@@ -1209,7 +1225,7 @@ void ip_mc_inc_group(struct in_device *in_dev, __be32 addr)
 
 	ASSERT_RTNL();
 
-	for (im=in_dev->mc_list; im; im=im->next) {
+	for_each_pmc_rtnl(in_dev, im) {
 		if (im->multiaddr == addr) {
 			im->users++;
 			ip_mc_add_src(in_dev, &addr, MCAST_EXCLUDE, 0, NULL, 0);
@@ -1217,7 +1233,7 @@ void ip_mc_inc_group(struct in_device *in_dev, __be32 addr)
 		}
 	}
 
-	im = kmalloc(sizeof(*im), GFP_KERNEL);
+	im = kzalloc(sizeof(*im), GFP_KERNEL);
 	if (!im)
 		goto out;
 
@@ -1227,26 +1243,18 @@ void ip_mc_inc_group(struct in_device *in_dev, __be32 addr)
 	im->multiaddr = addr;
 	/* initial mode is (EX, empty) */
 	im->sfmode = MCAST_EXCLUDE;
-	im->sfcount[MCAST_INCLUDE] = 0;
 	im->sfcount[MCAST_EXCLUDE] = 1;
-	im->sources = NULL;
-	im->tomb = NULL;
-	im->crcount = 0;
 	atomic_set(&im->refcnt, 1);
 	spin_lock_init(&im->lock);
 #ifdef CONFIG_IP_MULTICAST
-	im->tm_running = 0;
 	setup_timer(&im->timer, &igmp_timer_expire, (unsigned long)im);
 	im->unsolicit_count = IGMP_Unsolicited_Report_Count;
-	im->reporter = 0;
-	im->gsquery = 0;
 #endif
-	im->loaded = 0;
-	write_lock_bh(&in_dev->mc_list_lock);
-	im->next = in_dev->mc_list;
-	in_dev->mc_list = im;
+
+	im->next_rcu = in_dev->mc_list;
 	in_dev->mc_count++;
-	write_unlock_bh(&in_dev->mc_list_lock);
+	rcu_assign_pointer(in_dev->mc_list, im);
+
 #ifdef CONFIG_IP_MULTICAST
 	igmpv3_del_delrec(in_dev, im->multiaddr);
 #endif
@@ -1287,17 +1295,18 @@ EXPORT_SYMBOL(ip_mc_rejoin_group);
 
 void ip_mc_dec_group(struct in_device *in_dev, __be32 addr)
 {
-	struct ip_mc_list *i, **ip;
+	struct ip_mc_list *i;
+	struct ip_mc_list __rcu **ip;
 
 	ASSERT_RTNL();
 
-	for (ip=&in_dev->mc_list; (i=*ip)!=NULL; ip=&i->next) {
+	for (ip = &in_dev->mc_list;
+	     (i = rtnl_dereference(*ip)) != NULL;
+	     ip = &i->next_rcu) {
 		if (i->multiaddr == addr) {
 			if (--i->users == 0) {
-				write_lock_bh(&in_dev->mc_list_lock);
-				*ip = i->next;
+				*ip = i->next_rcu;
 				in_dev->mc_count--;
-				write_unlock_bh(&in_dev->mc_list_lock);
 				igmp_group_dropped(i);
 
 				if (!in_dev->dead)
@@ -1316,34 +1325,34 @@ EXPORT_SYMBOL(ip_mc_dec_group);
 
 void ip_mc_unmap(struct in_device *in_dev)
 {
-	struct ip_mc_list *i;
+	struct ip_mc_list *pmc;
 
 	ASSERT_RTNL();
 
-	for (i = in_dev->mc_list; i; i = i->next)
-		igmp_group_dropped(i);
+	for_each_pmc_rtnl(in_dev, pmc)
+		igmp_group_dropped(pmc);
 }
 
 void ip_mc_remap(struct in_device *in_dev)
 {
-	struct ip_mc_list *i;
+	struct ip_mc_list *pmc;
 
 	ASSERT_RTNL();
 
-	for (i = in_dev->mc_list; i; i = i->next)
-		igmp_group_added(i);
+	for_each_pmc_rtnl(in_dev, pmc)
+		igmp_group_added(pmc);
 }
 
 /* Device going down */
 
 void ip_mc_down(struct in_device *in_dev)
 {
-	struct ip_mc_list *i;
+	struct ip_mc_list *pmc;
 
 	ASSERT_RTNL();
 
-	for (i=in_dev->mc_list; i; i=i->next)
-		igmp_group_dropped(i);
+	for_each_pmc_rtnl(in_dev, pmc)
+		igmp_group_dropped(pmc);
 
 #ifdef CONFIG_IP_MULTICAST
 	in_dev->mr_ifc_count = 0;
@@ -1374,7 +1383,6 @@ void ip_mc_init_dev(struct in_device *in_dev)
 	in_dev->mr_qrv = IGMP_Unsolicited_Report_Count;
 #endif
 
-	rwlock_init(&in_dev->mc_list_lock);
 	spin_lock_init(&in_dev->mc_tomb_lock);
 }
 
@@ -1382,14 +1390,14 @@ void ip_mc_init_dev(struct in_device *in_dev)
 
 void ip_mc_up(struct in_device *in_dev)
 {
-	struct ip_mc_list *i;
+	struct ip_mc_list *pmc;
 
 	ASSERT_RTNL();
 
 	ip_mc_inc_group(in_dev, IGMP_ALL_HOSTS);
 
-	for (i=in_dev->mc_list; i; i=i->next)
-		igmp_group_added(i);
+	for_each_pmc_rtnl(in_dev, pmc)
+		igmp_group_added(pmc);
 }
 
 /*
@@ -1405,17 +1413,13 @@ void ip_mc_destroy_dev(struct in_device *in_dev)
 	/* Deactivate timers */
 	ip_mc_down(in_dev);
 
-	write_lock_bh(&in_dev->mc_list_lock);
-	while ((i = in_dev->mc_list) != NULL) {
-		in_dev->mc_list = i->next;
+	while ((i = rtnl_dereference(in_dev->mc_list)) != NULL) {
+		in_dev->mc_list = i->next_rcu;
 		in_dev->mc_count--;
-		write_unlock_bh(&in_dev->mc_list_lock);
+
 		igmp_group_dropped(i);
 		ip_ma_put(i);
-
-		write_lock_bh(&in_dev->mc_list_lock);
 	}
-	write_unlock_bh(&in_dev->mc_list_lock);
 }
 
 /* RTNL is locked */
@@ -1513,18 +1517,18 @@ static int ip_mc_del_src(struct in_device *in_dev, __be32 *pmca, int sfmode,
 
 	if (!in_dev)
 		return -ENODEV;
-	read_lock(&in_dev->mc_list_lock);
-	for (pmc=in_dev->mc_list; pmc; pmc=pmc->next) {
+	rcu_read_lock();
+	for_each_pmc_rcu(in_dev, pmc) {
 		if (*pmca == pmc->multiaddr)
 			break;
 	}
 	if (!pmc) {
 		/* MCA not found?? bug */
-		read_unlock(&in_dev->mc_list_lock);
+		rcu_read_unlock();
 		return -ESRCH;
 	}
 	spin_lock_bh(&pmc->lock);
-	read_unlock(&in_dev->mc_list_lock);
+	rcu_read_unlock();
 #ifdef CONFIG_IP_MULTICAST
 	sf_markstate(pmc);
 #endif
@@ -1685,18 +1689,18 @@ static int ip_mc_add_src(struct in_device *in_dev, __be32 *pmca, int sfmode,
 
 	if (!in_dev)
 		return -ENODEV;
-	read_lock(&in_dev->mc_list_lock);
-	for (pmc=in_dev->mc_list; pmc; pmc=pmc->next) {
+	rcu_read_lock();
+	for_each_pmc_rcu(in_dev, pmc) {
 		if (*pmca == pmc->multiaddr)
 			break;
 	}
 	if (!pmc) {
 		/* MCA not found?? bug */
-		read_unlock(&in_dev->mc_list_lock);
+		rcu_read_unlock();
 		return -ESRCH;
 	}
 	spin_lock_bh(&pmc->lock);
-	read_unlock(&in_dev->mc_list_lock);
+	rcu_read_unlock();
 
 #ifdef CONFIG_IP_MULTICAST
 	sf_markstate(pmc);
@@ -1793,7 +1797,7 @@ int ip_mc_join_group(struct sock *sk , struct ip_mreqn *imr)
 
 	err = -EADDRINUSE;
 	ifindex = imr->imr_ifindex;
-	for (i = inet->mc_list; i; i = i->next) {
+	for_each_pmc_rtnl(inet, i) {
 		if (i->multi.imr_multiaddr.s_addr == addr &&
 		    i->multi.imr_ifindex == ifindex)
 			goto done;
@@ -1807,7 +1811,7 @@ int ip_mc_join_group(struct sock *sk , struct ip_mreqn *imr)
 		goto done;
 
 	memcpy(&iml->multi, imr, sizeof(*imr));
-	iml->next = inet->mc_list;
+	iml->next_rcu = inet->mc_list;
 	iml->sflist = NULL;
 	iml->sfmode = MCAST_EXCLUDE;
 	rcu_assign_pointer(inet->mc_list, iml);
@@ -1821,17 +1825,14 @@ EXPORT_SYMBOL(ip_mc_join_group);
 
 static void ip_sf_socklist_reclaim(struct rcu_head *rp)
 {
-	struct ip_sf_socklist *psf;
-
-	psf = container_of(rp, struct ip_sf_socklist, rcu);
+	kfree(container_of(rp, struct ip_sf_socklist, rcu));
 	/* sk_omem_alloc should have been decreased by the caller*/
-	kfree(psf);
 }
 
 static int ip_mc_leave_src(struct sock *sk, struct ip_mc_socklist *iml,
 			   struct in_device *in_dev)
 {
-	struct ip_sf_socklist *psf = iml->sflist;
+	struct ip_sf_socklist *psf = rtnl_dereference(iml->sflist);
 	int err;
 
 	if (psf == NULL) {
@@ -1851,11 +1852,8 @@ static int ip_mc_leave_src(struct sock *sk, struct ip_mc_socklist *iml,
 
 static void ip_mc_socklist_reclaim(struct rcu_head *rp)
 {
-	struct ip_mc_socklist *iml;
-
-	iml = container_of(rp, struct ip_mc_socklist, rcu);
+	kfree(container_of(rp, struct ip_mc_socklist, rcu));
 	/* sk_omem_alloc should have been decreased by the caller*/
-	kfree(iml);
 }
 
 
@@ -1866,7 +1864,8 @@ static void ip_mc_socklist_reclaim(struct rcu_head *rp)
 int ip_mc_leave_group(struct sock *sk, struct ip_mreqn *imr)
 {
 	struct inet_sock *inet = inet_sk(sk);
-	struct ip_mc_socklist *iml, **imlp;
+	struct ip_mc_socklist *iml;
+	struct ip_mc_socklist __rcu **imlp;
 	struct in_device *in_dev;
 	struct net *net = sock_net(sk);
 	__be32 group = imr->imr_multiaddr.s_addr;
@@ -1876,7 +1875,9 @@ int ip_mc_leave_group(struct sock *sk, struct ip_mreqn *imr)
 	rtnl_lock();
 	in_dev = ip_mc_find_dev(net, imr);
 	ifindex = imr->imr_ifindex;
-	for (imlp = &inet->mc_list; (iml = *imlp) != NULL; imlp = &iml->next) {
+	for (imlp = &inet->mc_list;
+	     (iml = rtnl_dereference(*imlp)) != NULL;
+	     imlp = &iml->next_rcu) {
 		if (iml->multi.imr_multiaddr.s_addr != group)
 			continue;
 		if (ifindex) {
@@ -1888,7 +1889,7 @@ int ip_mc_leave_group(struct sock *sk, struct ip_mreqn *imr)
 
 		(void) ip_mc_leave_src(sk, iml, in_dev);
 
-		rcu_assign_pointer(*imlp, iml->next);
+		*imlp = iml->next_rcu;
 
 		if (in_dev)
 			ip_mc_dec_group(in_dev, group);
@@ -1934,7 +1935,7 @@ int ip_mc_source(int add, int omode, struct sock *sk, struct
 	}
 	err = -EADDRNOTAVAIL;
 
-	for (pmc=inet->mc_list; pmc; pmc=pmc->next) {
+	for_each_pmc_rtnl(inet, pmc) {
 		if ((pmc->multi.imr_multiaddr.s_addr ==
 		     imr.imr_multiaddr.s_addr) &&
 		    (pmc->multi.imr_ifindex == imr.imr_ifindex))
@@ -1958,7 +1959,7 @@ int ip_mc_source(int add, int omode, struct sock *sk, struct
 		pmc->sfmode = omode;
 	}
 
-	psl = pmc->sflist;
+	psl = rtnl_dereference(pmc->sflist);
 	if (!add) {
 		if (!psl)
 			goto done;	/* err = -EADDRNOTAVAIL */
@@ -2077,7 +2078,7 @@ int ip_mc_msfilter(struct sock *sk, struct ip_msfilter *msf, int ifindex)
 		goto done;
 	}
 
-	for (pmc=inet->mc_list; pmc; pmc=pmc->next) {
+	for_each_pmc_rtnl(inet, pmc) {
 		if (pmc->multi.imr_multiaddr.s_addr == msf->imsf_multiaddr &&
 		    pmc->multi.imr_ifindex == imr.imr_ifindex)
 			break;
@@ -2107,7 +2108,7 @@ int ip_mc_msfilter(struct sock *sk, struct ip_msfilter *msf, int ifindex)
 		(void) ip_mc_add_src(in_dev, &msf->imsf_multiaddr,
 				     msf->imsf_fmode, 0, NULL, 0);
 	}
-	psl = pmc->sflist;
+	psl = rtnl_dereference(pmc->sflist);
 	if (psl) {
 		(void) ip_mc_del_src(in_dev, &msf->imsf_multiaddr, pmc->sfmode,
 			psl->sl_count, psl->sl_addr, 0);
@@ -2155,7 +2156,7 @@ int ip_mc_msfget(struct sock *sk, struct ip_msfilter *msf,
 	}
 	err = -EADDRNOTAVAIL;
 
-	for (pmc=inet->mc_list; pmc; pmc=pmc->next) {
+	for_each_pmc_rtnl(inet, pmc) {
 		if (pmc->multi.imr_multiaddr.s_addr == msf->imsf_multiaddr &&
 		    pmc->multi.imr_ifindex == imr.imr_ifindex)
 			break;
@@ -2163,7 +2164,7 @@ int ip_mc_msfget(struct sock *sk, struct ip_msfilter *msf,
 	if (!pmc)		/* must have a prior join */
 		goto done;
 	msf->imsf_fmode = pmc->sfmode;
-	psl = pmc->sflist;
+	psl = rtnl_dereference(pmc->sflist);
 	rtnl_unlock();
 	if (!psl) {
 		len = 0;
@@ -2208,7 +2209,7 @@ int ip_mc_gsfget(struct sock *sk, struct group_filter *gsf,
 
 	err = -EADDRNOTAVAIL;
 
-	for (pmc=inet->mc_list; pmc; pmc=pmc->next) {
+	for_each_pmc_rtnl(inet, pmc) {
 		if (pmc->multi.imr_multiaddr.s_addr == addr &&
 		    pmc->multi.imr_ifindex == gsf->gf_interface)
 			break;
@@ -2216,7 +2217,7 @@ int ip_mc_gsfget(struct sock *sk, struct group_filter *gsf,
 	if (!pmc)		/* must have a prior join */
 		goto done;
 	gsf->gf_fmode = pmc->sfmode;
-	psl = pmc->sflist;
+	psl = rtnl_dereference(pmc->sflist);
 	rtnl_unlock();
 	count = psl ? psl->sl_count : 0;
 	copycount = count < gsf->gf_numsrc ? count : gsf->gf_numsrc;
@@ -2257,7 +2258,7 @@ int ip_mc_sf_allow(struct sock *sk, __be32 loc_addr, __be32 rmt_addr, int dif)
 		goto out;
 
 	rcu_read_lock();
-	for (pmc=rcu_dereference(inet->mc_list); pmc; pmc=rcu_dereference(pmc->next)) {
+	for_each_pmc_rcu(inet, pmc) {
 		if (pmc->multi.imr_multiaddr.s_addr == loc_addr &&
 		    pmc->multi.imr_ifindex == dif)
 			break;
@@ -2265,7 +2266,7 @@ int ip_mc_sf_allow(struct sock *sk, __be32 loc_addr, __be32 rmt_addr, int dif)
 	ret = inet->mc_all;
 	if (!pmc)
 		goto unlock;
-	psl = pmc->sflist;
+	psl = rcu_dereference(pmc->sflist);
 	ret = (pmc->sfmode == MCAST_EXCLUDE);
 	if (!psl)
 		goto unlock;
@@ -2300,10 +2301,10 @@ void ip_mc_drop_socket(struct sock *sk)
 		return;
 
 	rtnl_lock();
-	while ((iml = inet->mc_list) != NULL) {
+	while ((iml = rtnl_dereference(inet->mc_list)) != NULL) {
 		struct in_device *in_dev;
-		rcu_assign_pointer(inet->mc_list, iml->next);
 
+		inet->mc_list = iml->next_rcu;
 		in_dev = inetdev_by_index(net, iml->multi.imr_ifindex);
 		(void) ip_mc_leave_src(sk, iml, in_dev);
 		if (in_dev != NULL) {
@@ -2323,8 +2324,8 @@ int ip_check_mc(struct in_device *in_dev, __be32 mc_addr, __be32 src_addr, u16 p
 	struct ip_sf_list *psf;
 	int rv = 0;
 
-	read_lock(&in_dev->mc_list_lock);
-	for (im=in_dev->mc_list; im; im=im->next) {
+	rcu_read_lock();
+	for_each_pmc_rcu(in_dev, im) {
 		if (im->multiaddr == mc_addr)
 			break;
 	}
@@ -2345,7 +2346,7 @@ int ip_check_mc(struct in_device *in_dev, __be32 mc_addr, __be32 src_addr, u16 p
 		} else
 			rv = 1; /* unspecified source; tentatively allow */
 	}
-	read_unlock(&in_dev->mc_list_lock);
+	rcu_read_unlock();
 	return rv;
 }
 
@@ -2371,13 +2372,11 @@ static inline struct ip_mc_list *igmp_mc_get_first(struct seq_file *seq)
 		in_dev = __in_dev_get_rcu(state->dev);
 		if (!in_dev)
 			continue;
-		read_lock(&in_dev->mc_list_lock);
-		im = in_dev->mc_list;
+		im = rcu_dereference(in_dev->mc_list);
 		if (im) {
 			state->in_dev = in_dev;
 			break;
 		}
-		read_unlock(&in_dev->mc_list_lock);
 	}
 	return im;
 }
@@ -2385,11 +2384,9 @@ static inline struct ip_mc_list *igmp_mc_get_first(struct seq_file *seq)
 static struct ip_mc_list *igmp_mc_get_next(struct seq_file *seq, struct ip_mc_list *im)
 {
 	struct igmp_mc_iter_state *state = igmp_mc_seq_private(seq);
-	im = im->next;
-	while (!im) {
-		if (likely(state->in_dev != NULL))
-			read_unlock(&state->in_dev->mc_list_lock);
 
+	im = rcu_dereference(im->next_rcu);
+	while (!im) {
 		state->dev = next_net_device_rcu(state->dev);
 		if (!state->dev) {
 			state->in_dev = NULL;
@@ -2398,8 +2395,7 @@ static struct ip_mc_list *igmp_mc_get_next(struct seq_file *seq, struct ip_mc_li
 		state->in_dev = __in_dev_get_rcu(state->dev);
 		if (!state->in_dev)
 			continue;
-		read_lock(&state->in_dev->mc_list_lock);
-		im = state->in_dev->mc_list;
+		im = rcu_dereference(state->in_dev->mc_list);
 	}
 	return im;
 }
@@ -2435,10 +2431,8 @@ static void igmp_mc_seq_stop(struct seq_file *seq, void *v)
 	__releases(rcu)
 {
 	struct igmp_mc_iter_state *state = igmp_mc_seq_private(seq);
-	if (likely(state->in_dev != NULL)) {
-		read_unlock(&state->in_dev->mc_list_lock);
-		state->in_dev = NULL;
-	}
+
+	state->in_dev = NULL;
 	state->dev = NULL;
 	rcu_read_unlock();
 }
@@ -2460,7 +2454,7 @@ static int igmp_mc_seq_show(struct seq_file *seq, void *v)
 		querier = "NONE";
 #endif
 
-		if (state->in_dev->mc_list == im) {
+		if (rcu_dereference(state->in_dev->mc_list) == im) {
 			seq_printf(seq, "%d\t%-10s: %5d %7s\n",
 				   state->dev->ifindex, state->dev->name, state->in_dev->mc_count, querier);
 		}
@@ -2519,8 +2513,7 @@ static inline struct ip_sf_list *igmp_mcf_get_first(struct seq_file *seq)
 		idev = __in_dev_get_rcu(state->dev);
 		if (unlikely(idev == NULL))
 			continue;
-		read_lock(&idev->mc_list_lock);
-		im = idev->mc_list;
+		im = rcu_dereference(idev->mc_list);
 		if (likely(im != NULL)) {
 			spin_lock_bh(&im->lock);
 			psf = im->sources;
@@ -2531,7 +2524,6 @@ static inline struct ip_sf_list *igmp_mcf_get_first(struct seq_file *seq)
 			}
 			spin_unlock_bh(&im->lock);
 		}
-		read_unlock(&idev->mc_list_lock);
 	}
 	return psf;
 }
@@ -2545,9 +2537,6 @@ static struct ip_sf_list *igmp_mcf_get_next(struct seq_file *seq, struct ip_sf_l
 		spin_unlock_bh(&state->im->lock);
 		state->im = state->im->next;
 		while (!state->im) {
-			if (likely(state->idev != NULL))
-				read_unlock(&state->idev->mc_list_lock);
-
 			state->dev = next_net_device_rcu(state->dev);
 			if (!state->dev) {
 				state->idev = NULL;
@@ -2556,8 +2545,7 @@ static struct ip_sf_list *igmp_mcf_get_next(struct seq_file *seq, struct ip_sf_l
 			state->idev = __in_dev_get_rcu(state->dev);
 			if (!state->idev)
 				continue;
-			read_lock(&state->idev->mc_list_lock);
-			state->im = state->idev->mc_list;
+			state->im = rcu_dereference(state->idev->mc_list);
 		}
 		if (!state->im)
 			break;
@@ -2603,10 +2591,7 @@ static void igmp_mcf_seq_stop(struct seq_file *seq, void *v)
 		spin_unlock_bh(&state->im->lock);
 		state->im = NULL;
 	}
-	if (likely(state->idev != NULL)) {
-		read_unlock(&state->idev->mc_list_lock);
-		state->idev = NULL;
-	}
+	state->idev = NULL;
 	state->dev = NULL;
 	rcu_read_unlock();
 }
-- 
cgit v1.2.3-71-gd317


From 9fbbdde93231ad7f35c217aa6bbbc7995133f483 Mon Sep 17 00:00:00 2001
From: Erik Gilling <konkers@android.com>
Date: Thu, 11 Nov 2010 15:44:43 +0100
Subject: video: add fb_edid_add_monspecs for parsing extended edid information

Modern monitors/tvs have more extended EDID information blocks which can
contain extra detailed modes.  This adds a fb_edid_add_monspecs function
which drivers can use to parse those additions blocks.

Signed-off-by: Erik Gilling <konkers@android.com>
Signed-off-by: Guennadi Liakhovetski <g.liakhovetski@gmx.de>
Signed-off-by: Paul Mundt <lethal@linux-sh.org>
---
 drivers/video/fbmon.c | 57 +++++++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/fb.h    |  2 ++
 2 files changed, 59 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/video/fbmon.c b/drivers/video/fbmon.c
index 563a98b88e9b..a0b5a93b72d2 100644
--- a/drivers/video/fbmon.c
+++ b/drivers/video/fbmon.c
@@ -973,6 +973,63 @@ void fb_edid_to_monspecs(unsigned char *edid, struct fb_monspecs *specs)
 	DPRINTK("========================================\n");
 }
 
+void fb_edid_add_monspecs(unsigned char *edid, struct fb_monspecs *specs)
+{
+	unsigned char *block;
+	struct fb_videomode *mode, *m;
+	int num = 0, i, first = 1;
+
+	if (edid == NULL)
+		return;
+
+	if (!edid_checksum(edid))
+		return;
+
+	if (edid[0] != 0x2)
+		return;
+
+	mode = kzalloc(50 * sizeof(struct fb_videomode), GFP_KERNEL);
+	if (mode == NULL)
+		return;
+
+	block = edid + edid[0x2];
+
+	DPRINTK("  Extended Detailed Timings\n");
+
+	for (i = 0; i < (128 - edid[0x2]) / DETAILED_TIMING_DESCRIPTION_SIZE;
+	     i++, block += DETAILED_TIMING_DESCRIPTION_SIZE) {
+		if (!(block[0] == 0x00 && block[1] == 0x00)) {
+			get_detailed_timing(block, &mode[num]);
+			if (first) {
+			        mode[num].flag |= FB_MODE_IS_FIRST;
+				first = 0;
+			}
+			num++;
+		}
+	}
+
+	/* Yikes, EDID data is totally useless */
+	if (!num) {
+		kfree(mode);
+		return;
+	}
+
+	m = kzalloc((specs->modedb_len + num) *
+		       sizeof(struct fb_videomode), GFP_KERNEL);
+
+	if (!m) {
+		kfree(mode);
+		return;
+	}
+
+	memmove(m, specs->modedb, specs->modedb_len * sizeof(struct fb_videomode));
+	memmove(m + specs->modedb_len, mode, num * sizeof(struct fb_videomode));
+	kfree(mode);
+	kfree(specs->modedb);
+	specs->modedb = m;
+	specs->modedb_len = specs->modedb_len + num;
+}
+
 /*
  * VESA Generalized Timing Formula (GTF)
  */
diff --git a/include/linux/fb.h b/include/linux/fb.h
index 7fca3dc4e475..6f0274d96f0c 100644
--- a/include/linux/fb.h
+++ b/include/linux/fb.h
@@ -1092,6 +1092,8 @@ extern int fb_parse_edid(unsigned char *edid, struct fb_var_screeninfo *var);
 extern const unsigned char *fb_firmware_edid(struct device *device);
 extern void fb_edid_to_monspecs(unsigned char *edid,
 				struct fb_monspecs *specs);
+extern void fb_edid_add_monspecs(unsigned char *edid,
+				 struct fb_monspecs *specs);
 extern void fb_destroy_modedb(struct fb_videomode *modedb);
 extern int fb_find_mode_cvt(struct fb_videomode *mode, int margins, int rb);
 extern unsigned char *fb_ddc_read(struct i2c_adapter *adapter);
-- 
cgit v1.2.3-71-gd317


From 0ad83f6882c41df1a7fa387086029e162038c1f2 Mon Sep 17 00:00:00 2001
From: Guennadi Liakhovetski <g.liakhovetski@gmx.de>
Date: Thu, 11 Nov 2010 15:45:04 +0100
Subject: fbdev: when parsing E-EDID blocks, also use SVD entries

Add parsing of E-EDID SVD entries. In this first version only a few
CEA/EIA-861E modes are implemented, more can be added as needed.

Signed-off-by: Guennadi Liakhovetski <g.liakhovetski@gmx.de>
Signed-off-by: Paul Mundt <lethal@linux-sh.org>
---
 drivers/video/fbmon.c  | 37 +++++++++++++++++++++++++++++++++----
 drivers/video/modedb.c | 43 +++++++++++++++++++++++++++++++++++++++++++
 include/linux/fb.h     |  1 +
 3 files changed, 77 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/video/fbmon.c b/drivers/video/fbmon.c
index b25399abcf49..4f57485f8c54 100644
--- a/drivers/video/fbmon.c
+++ b/drivers/video/fbmon.c
@@ -983,7 +983,8 @@ void fb_edid_add_monspecs(unsigned char *edid, struct fb_monspecs *specs)
 	unsigned char *block;
 	struct fb_videomode *m;
 	int num = 0, i;
-	u8 edt[(128 - 4) / DETAILED_TIMING_DESCRIPTION_SIZE];
+	u8 svd[64], edt[(128 - 4) / DETAILED_TIMING_DESCRIPTION_SIZE];
+	u8 pos = 4, svd_n = 0;
 
 	if (!edid)
 		return;
@@ -995,6 +996,21 @@ void fb_edid_add_monspecs(unsigned char *edid, struct fb_monspecs *specs)
 	    edid[2] < 4 || edid[2] > 128 - DETAILED_TIMING_DESCRIPTION_SIZE)
 		return;
 
+	DPRINTK("  Short Video Descriptors\n");
+
+	while (pos < edid[2]) {
+		u8 len = edid[pos] & 0x1f, type = (edid[pos] >> 5) & 7;
+		pr_debug("Data block %u of %u bytes\n", type, len);
+		if (type == 2)
+			for (i = pos; i < pos + len; i++) {
+				u8 idx = edid[pos + i] & 0x7f;
+				svd[svd_n++] = idx;
+				pr_debug("N%sative mode #%d\n",
+					 edid[pos + i] & 0x80 ? "" : "on-n", idx);
+			}
+		pos += len + 1;
+	}
+
 	block = edid + edid[2];
 
 	DPRINTK("  Extended Detailed Timings\n");
@@ -1005,10 +1021,10 @@ void fb_edid_add_monspecs(unsigned char *edid, struct fb_monspecs *specs)
 			edt[num++] = block - edid;
 
 	/* Yikes, EDID data is totally useless */
-	if (!num)
+	if (!(num + svd_n))
 		return;
 
-	m = kzalloc((specs->modedb_len + num) *
+	m = kzalloc((specs->modedb_len + num + svd_n) *
 		       sizeof(struct fb_videomode), GFP_KERNEL);
 
 	if (!m)
@@ -1023,9 +1039,22 @@ void fb_edid_add_monspecs(unsigned char *edid, struct fb_monspecs *specs)
 		pr_debug("Adding %ux%u@%u\n", m[i].xres, m[i].yres, m[i].refresh);
 	}
 
+	for (i = specs->modedb_len + num; i < specs->modedb_len + num + svd_n; i++) {
+		int idx = svd[i - specs->modedb_len - num];
+		if (!idx || idx > 63) {
+			pr_warning("Reserved SVD code %d\n", idx);
+		} else if (idx > ARRAY_SIZE(cea_modes) || !cea_modes[idx].xres) {
+			pr_warning("Unimplemented SVD code %d\n", idx);
+		} else {
+			memcpy(&m[i], cea_modes + idx, sizeof(m[i]));
+			pr_debug("Adding SVD #%d: %ux%u@%u\n", idx,
+				 m[i].xres, m[i].yres, m[i].refresh);
+		}
+	}
+
 	kfree(specs->modedb);
 	specs->modedb = m;
-	specs->modedb_len = specs->modedb_len + num;
+	specs->modedb_len = specs->modedb_len + num + svd_n;
 }
 
 /*
diff --git a/drivers/video/modedb.c b/drivers/video/modedb.c
index 0a4dbdc1693a..9a0ae6ca5427 100644
--- a/drivers/video/modedb.c
+++ b/drivers/video/modedb.c
@@ -278,6 +278,49 @@ static const struct fb_videomode modedb[] = {
 };
 
 #ifdef CONFIG_FB_MODE_HELPERS
+const struct fb_videomode cea_modes[64] = {
+	/* #1: 640x480p@59.94/60Hz */
+	[1] = {
+		NULL, 60, 640, 480, 39722, 48, 16, 33, 10, 96, 2, 0, FB_VMODE_NONINTERLACED, 0,
+	},
+	/* #3: 720x480p@59.94/60Hz */
+	[3] = {
+		NULL, 60, 720, 480, 37037, 60, 16, 30, 9, 62, 6, 0, FB_VMODE_NONINTERLACED, 0,
+	},
+	/* #5: 1920x1080i@59.94/60Hz */
+	[5] = {
+		NULL, 60, 1920, 1080, 13763, 148, 88, 15, 2, 44, 5, 0, FB_VMODE_INTERLACED, 0,
+	},
+	/* #7: 720(1440)x480iH@59.94/60Hz */
+	[7] = {
+		NULL, 60, 1440, 480, 18554/*37108*/, 114, 38, 15, 4, 124, 3, 0, FB_VMODE_INTERLACED, 0,
+	},
+	/* #9: 720(1440)x240pH@59.94/60Hz */
+	[9] = {
+		NULL, 60, 1440, 240, 18554, 114, 38, 16, 4, 124, 3, 0, FB_VMODE_NONINTERLACED, 0,
+	},
+	/* #18: 720x576pH@50Hz */
+	[18] = {
+		NULL, 50, 720, 576, 37037, 68, 12, 39, 5, 64, 5, 0, FB_VMODE_NONINTERLACED, 0,
+	},
+	/* #19: 1280x720p@50Hz */
+	[19] = {
+		NULL, 50, 1280, 720, 13468, 220, 440, 20, 5, 40, 5, 0, FB_VMODE_NONINTERLACED, 0,
+	},
+	/* #20: 1920x1080i@50Hz */
+	[20] = {
+		NULL, 50, 1920, 1080, 13480, 148, 528, 15, 5, 528, 5, 0, FB_VMODE_INTERLACED, 0,
+	},
+	/* #32: 1920x1080p@23.98/24Hz */
+	[32] = {
+		NULL, 24, 1920, 1080, 13468, 148, 638, 36, 4, 44, 5, 0, FB_VMODE_NONINTERLACED, 0,
+	},
+	/* #35: (2880)x480p4x@59.94/60Hz */
+	[35] = {
+		NULL, 50, 2880, 480, 11100, 240, 64, 30, 9, 248, 6, 0, FB_VMODE_NONINTERLACED, 0,
+	},
+};
+
 const struct fb_videomode vesa_modes[] = {
 	/* 0 640x350-85 VESA */
 	{ NULL, 85, 640, 350, 31746,  96, 32, 60, 32, 64, 3,
diff --git a/include/linux/fb.h b/include/linux/fb.h
index 6f0274d96f0c..e154a79b8322 100644
--- a/include/linux/fb.h
+++ b/include/linux/fb.h
@@ -1151,6 +1151,7 @@ struct fb_videomode {
 
 extern const char *fb_mode_option;
 extern const struct fb_videomode vesa_modes[];
+extern const struct fb_videomode cea_modes[64];
 
 struct fb_modelist {
 	struct list_head list;
-- 
cgit v1.2.3-71-gd317


From d83447f0944e73d690218d79c07762ffa4ceb9e4 Mon Sep 17 00:00:00 2001
From: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Date: Sun, 14 Nov 2010 17:25:46 +0100
Subject: dccp ccid-2: Schedule Sync as out-of-band mechanism

The problem with Ack Vectors is that
  i) their length is variable and can in principle grow quite large,
 ii) it is hard to predict exactly how large they will be.

Due to the second point it seems not a good idea to reduce the MPS; in
particular when on average there is enough room for the Ack Vector and an
increase in length is momentarily due to some burst loss, after which the
Ack Vector returns to its normal/average length.

The solution taken by this patch is to subtract a minimum-expected Ack Vector
length from the MPS, and to defer any larger Ack Vectors onto a separate
Sync - but only if indeed there is no space left on the skb.

This patch provides the infrastructure to schedule Sync-packets for transporting
(urgent) out-of-band data. Its signalling is quicker than scheduling an Ack, since
it does not need to wait for new application data.

Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
---
 include/linux/dccp.h |  2 ++
 net/dccp/options.c   | 24 ++++++++++++++++++++----
 net/dccp/output.c    | 15 +++++++++++++++
 3 files changed, 37 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/dccp.h b/include/linux/dccp.h
index 749f01ccd26e..eed52bcd35d0 100644
--- a/include/linux/dccp.h
+++ b/include/linux/dccp.h
@@ -462,6 +462,7 @@ struct dccp_ackvec;
  * @dccps_hc_rx_insert_options - receiver wants to add options when acking
  * @dccps_hc_tx_insert_options - sender wants to add options when sending
  * @dccps_server_timewait - server holds timewait state on close (RFC 4340, 8.3)
+ * @dccps_sync_scheduled - flag which signals "send out-of-band message soon"
  * @dccps_xmitlet - tasklet scheduled by the TX CCID to dequeue data packets
  * @dccps_xmit_timer - used by the TX CCID to delay sending (rate-based pacing)
  * @dccps_syn_rtt - RTT sample from Request/Response exchange (in usecs)
@@ -503,6 +504,7 @@ struct dccp_sock {
 	__u8				dccps_hc_rx_insert_options:1;
 	__u8				dccps_hc_tx_insert_options:1;
 	__u8				dccps_server_timewait:1;
+	__u8				dccps_sync_scheduled:1;
 	struct tasklet_struct		dccps_xmitlet;
 	struct timer_list		dccps_xmit_timer;
 };
diff --git a/net/dccp/options.c b/net/dccp/options.c
index 7743df00f5b1..dabd6ee34d45 100644
--- a/net/dccp/options.c
+++ b/net/dccp/options.c
@@ -427,6 +427,7 @@ static int dccp_insert_option_ackvec(struct sock *sk, struct sk_buff *skb)
 {
 	struct dccp_sock *dp = dccp_sk(sk);
 	struct dccp_ackvec *av = dp->dccps_hc_rx_ackvec;
+	struct dccp_skb_cb *dcb = DCCP_SKB_CB(skb);
 	const u16 buflen = dccp_ackvec_buflen(av);
 	/* Figure out how many options do we need to represent the ackvec */
 	const u8 nr_opts = DIV_ROUND_UP(buflen, DCCP_SINGLE_OPT_MAXLEN);
@@ -435,10 +436,25 @@ static int dccp_insert_option_ackvec(struct sock *sk, struct sk_buff *skb)
 	const unsigned char *tail, *from;
 	unsigned char *to;
 
-	if (DCCP_SKB_CB(skb)->dccpd_opt_len + len > DCCP_MAX_OPT_LEN)
+	if (dcb->dccpd_opt_len + len > DCCP_MAX_OPT_LEN) {
+		DCCP_WARN("Lacking space for %u bytes on %s packet\n", len,
+			  dccp_packet_name(dcb->dccpd_type));
 		return -1;
-
-	DCCP_SKB_CB(skb)->dccpd_opt_len += len;
+	}
+	/*
+	 * Since Ack Vectors are variable-length, we can not always predict
+	 * their size. To catch exception cases where the space is running out
+	 * on the skb, a separate Sync is scheduled to carry the Ack Vector.
+	 */
+	if (len > DCCPAV_MIN_OPTLEN &&
+	    len + dcb->dccpd_opt_len + skb->len > dp->dccps_mss_cache) {
+		DCCP_WARN("No space left for Ack Vector (%u) on skb (%u+%u), "
+			  "MPS=%u ==> reduce payload size?\n", len, skb->len,
+			  dcb->dccpd_opt_len, dp->dccps_mss_cache);
+		dp->dccps_sync_scheduled = 1;
+		return 0;
+	}
+	dcb->dccpd_opt_len += len;
 
 	to   = skb_push(skb, len);
 	len  = buflen;
@@ -479,7 +495,7 @@ static int dccp_insert_option_ackvec(struct sock *sk, struct sk_buff *skb)
 	/*
 	 * Each sent Ack Vector is recorded in the list, as per A.2 of RFC 4340.
 	 */
-	if (dccp_ackvec_update_records(av, DCCP_SKB_CB(skb)->dccpd_seq, nonce))
+	if (dccp_ackvec_update_records(av, dcb->dccpd_seq, nonce))
 		return -ENOBUFS;
 	return 0;
 }
diff --git a/net/dccp/output.c b/net/dccp/output.c
index 45b91853f5ae..d96dd9d362ae 100644
--- a/net/dccp/output.c
+++ b/net/dccp/output.c
@@ -283,6 +283,15 @@ static void dccp_xmit_packet(struct sock *sk)
 	 * any local drop will eventually be reported via receiver feedback.
 	 */
 	ccid_hc_tx_packet_sent(dp->dccps_hc_tx_ccid, sk, len);
+
+	/*
+	 * If the CCID needs to transfer additional header options out-of-band
+	 * (e.g. Ack Vectors or feature-negotiation options), it activates this
+	 * flag to schedule a Sync. The Sync will automatically incorporate all
+	 * currently pending header options, thus clearing the backlog.
+	 */
+	if (dp->dccps_sync_scheduled)
+		dccp_send_sync(sk, dp->dccps_gsr, DCCP_PKT_SYNC);
 }
 
 /**
@@ -636,6 +645,12 @@ void dccp_send_sync(struct sock *sk, const u64 ackno,
 	DCCP_SKB_CB(skb)->dccpd_type = pkt_type;
 	DCCP_SKB_CB(skb)->dccpd_ack_seq = ackno;
 
+	/*
+	 * Clear the flag in case the Sync was scheduled for out-of-band data,
+	 * such as carrying a long Ack Vector.
+	 */
+	dccp_sk(sk)->dccps_sync_scheduled = 0;
+
 	dccp_transmit_skb(sk, skb);
 }
 
-- 
cgit v1.2.3-71-gd317


From 58e998c6d23988490162cef0784b19ea274d90bb Mon Sep 17 00:00:00 2001
From: Jesse Gross <jesse@nicira.com>
Date: Fri, 29 Oct 2010 12:14:55 +0000
Subject: offloading: Force software GSO for multiple vlan tags.

We currently use vlan_features to check for TSO support if there is
a vlan tag.  However, it's quite likely that the NIC is not able to
do TSO when there is an arbitrary number of tags.  Therefore if there
is more than one tag (in-band or out-of-band), fall back to software
emulation.

Signed-off-by: Jesse Gross <jesse@nicira.com>
CC: Ben Hutchings <bhutchings@solarflare.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h |  7 +++----
 net/core/dev.c            | 16 ++++++++++++++++
 2 files changed, 19 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 578debb801f4..6e4cfbc53d4c 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -2239,6 +2239,8 @@ unsigned long netdev_fix_features(unsigned long features, const char *name);
 void netif_stacked_transfer_operstate(const struct net_device *rootdev,
 					struct net_device *dev);
 
+int netif_get_vlan_features(struct sk_buff *skb, struct net_device *dev);
+
 static inline int net_gso_ok(int features, int gso_type)
 {
 	int feature = gso_type << NETIF_F_GSO_SHIFT;
@@ -2254,10 +2256,7 @@ static inline int skb_gso_ok(struct sk_buff *skb, int features)
 static inline int netif_needs_gso(struct net_device *dev, struct sk_buff *skb)
 {
 	if (skb_is_gso(skb)) {
-		int features = dev->features;
-
-		if (skb->protocol == htons(ETH_P_8021Q) || skb->vlan_tci)
-			features &= dev->vlan_features;
+		int features = netif_get_vlan_features(skb, dev);
 
 		return (!skb_gso_ok(skb, features) ||
 			unlikely(skb->ip_summed != CHECKSUM_PARTIAL));
diff --git a/net/core/dev.c b/net/core/dev.c
index 368930a988e3..8b500c3e0297 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1968,6 +1968,22 @@ static inline void skb_orphan_try(struct sk_buff *skb)
 	}
 }
 
+int netif_get_vlan_features(struct sk_buff *skb, struct net_device *dev)
+{
+	__be16 protocol = skb->protocol;
+
+	if (protocol == htons(ETH_P_8021Q)) {
+		struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data;
+		protocol = veh->h_vlan_encapsulated_proto;
+	} else if (!skb->vlan_tci)
+		return dev->features;
+
+	if (protocol != htons(ETH_P_8021Q))
+		return dev->features & dev->vlan_features;
+	else
+		return 0;
+}
+
 /*
  * Returns true if either:
  *	1. skb has frag_list and the device doesn't support FRAGLIST, or
-- 
cgit v1.2.3-71-gd317


From 2e48928d8a0f38c1b5c81eb3f1294de8a6382c68 Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <shemminger@vyatta.com>
Date: Wed, 20 Oct 2010 10:16:58 -0700
Subject: rfkill: remove dead code

The following code is defined but never used.

Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/rfkill.h | 31 -------------------------------
 net/rfkill/core.c      | 14 --------------
 2 files changed, 45 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/rfkill.h b/include/linux/rfkill.h
index 08c32e4f261a..c6c608482cba 100644
--- a/include/linux/rfkill.h
+++ b/include/linux/rfkill.h
@@ -354,37 +354,6 @@ static inline bool rfkill_blocked(struct rfkill *rfkill)
 }
 #endif /* RFKILL || RFKILL_MODULE */
 
-
-#ifdef CONFIG_RFKILL_LEDS
-/**
- * rfkill_get_led_trigger_name - Get the LED trigger name for the button's LED.
- * This function might return a NULL pointer if registering of the
- * LED trigger failed. Use this as "default_trigger" for the LED.
- */
-const char *rfkill_get_led_trigger_name(struct rfkill *rfkill);
-
-/**
- * rfkill_set_led_trigger_name -- set the LED trigger name
- * @rfkill: rfkill struct
- * @name: LED trigger name
- *
- * This function sets the LED trigger name of the radio LED
- * trigger that rfkill creates. It is optional, but if called
- * must be called before rfkill_register() to be effective.
- */
-void rfkill_set_led_trigger_name(struct rfkill *rfkill, const char *name);
-#else
-static inline const char *rfkill_get_led_trigger_name(struct rfkill *rfkill)
-{
-	return NULL;
-}
-
-static inline void
-rfkill_set_led_trigger_name(struct rfkill *rfkill, const char *name)
-{
-}
-#endif
-
 #endif /* __KERNEL__ */
 
 #endif /* RFKILL_H */
diff --git a/net/rfkill/core.c b/net/rfkill/core.c
index 04f599089e6d..0198191b756d 100644
--- a/net/rfkill/core.c
+++ b/net/rfkill/core.c
@@ -149,20 +149,6 @@ static void rfkill_led_trigger_activate(struct led_classdev *led)
 	rfkill_led_trigger_event(rfkill);
 }
 
-const char *rfkill_get_led_trigger_name(struct rfkill *rfkill)
-{
-	return rfkill->led_trigger.name;
-}
-EXPORT_SYMBOL(rfkill_get_led_trigger_name);
-
-void rfkill_set_led_trigger_name(struct rfkill *rfkill, const char *name)
-{
-	BUG_ON(!rfkill);
-
-	rfkill->ledtrigname = name;
-}
-EXPORT_SYMBOL(rfkill_set_led_trigger_name);
-
 static int rfkill_led_trigger_register(struct rfkill *rfkill)
 {
 	rfkill->led_trigger.name = rfkill->ledtrigname
-- 
cgit v1.2.3-71-gd317


From ca4ffe8f2848169a8ded0ea8a60b2d81925564c9 Mon Sep 17 00:00:00 2001
From: "Luis R. Rodriguez" <lrodriguez@atheros.com>
Date: Wed, 20 Oct 2010 10:18:55 -0700
Subject: cfg80211: fix disabling channels based on hints

After a module loads you will have loaded the world roaming regulatory
domain or a custom regulatory domain. Further regulatory hints are
welcomed and should be respected unless the regulatory hint is coming
from a country IE as the IEEE spec allows for a country IE to be a subset
of what is allowed by the local regulatory agencies.

So disable all channels that do not fit a regulatory domain sent
from a unless the hint is from a country IE and the country IE had
no information about the band we are currently processing.

This fixes a few regulatory issues, for example for drivers that depend
on CRDA and had no 5 GHz freqencies allowed were not properly disabling
5 GHz at all, furthermore it also allows users to restrict devices
further as was intended.

If you recieve a country IE upon association we will also disable the
channels that are not allowed if the country IE had at least one
channel on the respective band we are procesing.

This was the original intention behind this design but it was
completely overlooked...

Cc: David Quan <david.quan@atheros.com>
Cc: Jouni Malinen <jouni.malinen@atheros.com>
cc: Easwar Krishnan <easwar.krishnan@atheros.com>
Cc: stable@kernel.org
Signed-off-by: Luis R. Rodriguez <lrodriguez@atheros.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/nl80211.h |  6 +++++-
 net/wireless/reg.c      | 20 +++++++++++++++++++-
 2 files changed, 24 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h
index 0edb2566c14c..fb877b5621b7 100644
--- a/include/linux/nl80211.h
+++ b/include/linux/nl80211.h
@@ -1307,7 +1307,11 @@ enum nl80211_bitrate_attr {
  * 	wireless core it thinks its knows the regulatory domain we should be in.
  * @NL80211_REGDOM_SET_BY_COUNTRY_IE: the wireless core has received an
  * 	802.11 country information element with regulatory information it
- * 	thinks we should consider.
+ * 	thinks we should consider. cfg80211 only processes the country
+ *	code from the IE, and relies on the regulatory domain information
+ *	structure pased by userspace (CRDA) from our wireless-regdb.
+ *	If a channel is enabled but the country code indicates it should
+ *	be disabled we disable the channel and re-enable it upon disassociation.
  */
 enum nl80211_reg_initiator {
 	NL80211_REGDOM_SET_BY_CORE,
diff --git a/net/wireless/reg.c b/net/wireless/reg.c
index 1bc8131a5185..8ab65f2afe70 100644
--- a/net/wireless/reg.c
+++ b/net/wireless/reg.c
@@ -750,8 +750,26 @@ static void handle_channel(struct wiphy *wiphy,
 			  desired_bw_khz,
 			  &reg_rule);
 
-	if (r)
+	if (r) {
+		/*
+		 * We will disable all channels that do not match our
+		 * recieved regulatory rule unless the hint is coming
+		 * from a Country IE and the Country IE had no information
+		 * about a band. The IEEE 802.11 spec allows for an AP
+		 * to send only a subset of the regulatory rules allowed,
+		 * so an AP in the US that only supports 2.4 GHz may only send
+		 * a country IE with information for the 2.4 GHz band
+		 * while 5 GHz is still supported.
+		 */
+		if (initiator == NL80211_REGDOM_SET_BY_COUNTRY_IE &&
+		    r == -ERANGE)
+			return;
+
+		REG_DBG_PRINT("cfg80211: Disabling freq %d MHz\n",
+			      chan->center_freq);
+		chan->flags = IEEE80211_CHAN_DISABLED;
 		return;
+	}
 
 	power_rule = &reg_rule->power_rule;
 	freq_range = &reg_rule->freq_range;
-- 
cgit v1.2.3-71-gd317


From c8aea565e8f715d9f10064b1cbfbc15bf75df501 Mon Sep 17 00:00:00 2001
From: Gery Kahn <geryk@ti.com>
Date: Tue, 5 Oct 2010 16:09:05 +0200
Subject: wl1271: ref_clock cosmetic changes

Cosmetic cleanup for ref_clock code while configured by board.

Signed-off-by: Gery Kahn <geryk@ti.com>
Signed-off-by: Luciano Coelho <luciano.coelho@nokia.com>
---
 drivers/net/wireless/wl12xx/wl1271_boot.c | 10 ++++------
 include/linux/wl12xx.h                    |  8 ++++++++
 2 files changed, 12 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/wireless/wl12xx/wl1271_boot.c b/drivers/net/wireless/wl12xx/wl1271_boot.c
index b91021242098..5b190728ca55 100644
--- a/drivers/net/wireless/wl12xx/wl1271_boot.c
+++ b/drivers/net/wireless/wl12xx/wl1271_boot.c
@@ -471,20 +471,19 @@ int wl1271_boot(struct wl1271 *wl)
 {
 	int ret = 0;
 	u32 tmp, clk, pause;
-	int ref_clock = wl->ref_clock;
 
 	wl1271_boot_hw_version(wl);
 
-	if (ref_clock == 0 || ref_clock == 2 || ref_clock == 4)
+	if (wl->ref_clock == 0 || wl->ref_clock == 2 || wl->ref_clock == 4)
 		/* ref clk: 19.2/38.4/38.4-XTAL */
 		clk = 0x3;
-	else if (ref_clock == 1 || ref_clock == 3)
+	else if (wl->ref_clock == 1 || wl->ref_clock == 3)
 		/* ref clk: 26/52 */
 		clk = 0x5;
 	else
 		return -EINVAL;
 
-	if (ref_clock != 0) {
+	if (wl->ref_clock != 0) {
 		u16 val;
 		/* Set clock type (open drain) */
 		val = wl1271_top_reg_read(wl, OCP_REG_CLK_TYPE);
@@ -529,8 +528,7 @@ int wl1271_boot(struct wl1271 *wl)
 
 	wl1271_debug(DEBUG_BOOT, "clk2 0x%x", clk);
 
-	/* 2 */
-	clk |= (ref_clock << 1) << 4;
+	clk |= (wl->ref_clock << 1) << 4;
 	wl1271_write32(wl, DRPW_SCRATCH_START, clk);
 
 	wl1271_set_partition(wl, &part_table[PART_WORK]);
diff --git a/include/linux/wl12xx.h b/include/linux/wl12xx.h
index 4f902e1908aa..bebb8efea0a6 100644
--- a/include/linux/wl12xx.h
+++ b/include/linux/wl12xx.h
@@ -24,6 +24,14 @@
 #ifndef _LINUX_WL12XX_H
 #define _LINUX_WL12XX_H
 
+/* The board reference clock values */
+enum {
+	WL12XX_REFCLOCK_19 = 0,	/* 19.2 MHz */
+	WL12XX_REFCLOCK_26 = 1,	/* 26 MHz */
+	WL12XX_REFCLOCK_38 = 2,	/* 38.4 MHz */
+	WL12XX_REFCLOCK_54 = 3,	/* 54 MHz */
+};
+
 struct wl12xx_platform_data {
 	void (*set_power)(bool enable);
 	/* SDIO only: IRQ number if WLAN_IRQ line is used, 0 for SDIO IRQs */
-- 
cgit v1.2.3-71-gd317


From 7919a57bc608140aa8614c19eac40c6916fb61d2 Mon Sep 17 00:00:00 2001
From: Andreas Herrmann <andreas.herrmann3@amd.com>
Date: Mon, 30 Aug 2010 19:04:01 +0000
Subject: bitops: Provide generic sign_extend32 function

This patch moves code out from wireless drivers where two different
functions are defined in three code locations for the same purpose and
provides a common function to sign extend a 32-bit value.

Signed-off-by: Andreas Herrmann <andreas.herrmann3@amd.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 drivers/net/wireless/ath/ath5k/phy.c        |  8 +-------
 drivers/net/wireless/ath/ath9k/ar5008_phy.c | 12 ++++++------
 drivers/net/wireless/ath/ath9k/ar9002_phy.c |  8 ++++----
 drivers/net/wireless/ath/ath9k/ar9003_phy.c | 12 ++++++------
 drivers/net/wireless/ath/ath9k/hw.h         |  6 ------
 drivers/net/wireless/iwlwifi/iwl-4965.c     | 20 ++------------------
 include/linux/bitops.h                      | 11 +++++++++++
 7 files changed, 30 insertions(+), 47 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/wireless/ath/ath5k/phy.c b/drivers/net/wireless/ath/ath5k/phy.c
index 219367884e64..6b43f535ff53 100644
--- a/drivers/net/wireless/ath/ath5k/phy.c
+++ b/drivers/net/wireless/ath/ath5k/phy.c
@@ -1102,18 +1102,12 @@ int ath5k_hw_channel(struct ath5k_hw *ah, struct ieee80211_channel *channel)
   PHY calibration
 \*****************/
 
-static int sign_extend(int val, const int nbits)
-{
-	int order = BIT(nbits-1);
-	return (val ^ order) - order;
-}
-
 static s32 ath5k_hw_read_measured_noise_floor(struct ath5k_hw *ah)
 {
 	s32 val;
 
 	val = ath5k_hw_reg_read(ah, AR5K_PHY_NF);
-	return sign_extend(AR5K_REG_MS(val, AR5K_PHY_NF_MINCCA_PWR), 9);
+	return sign_extend32(AR5K_REG_MS(val, AR5K_PHY_NF_MINCCA_PWR), 8);
 }
 
 void ath5k_hw_init_nfcal_hist(struct ath5k_hw *ah)
diff --git a/drivers/net/wireless/ath/ath9k/ar5008_phy.c b/drivers/net/wireless/ath/ath9k/ar5008_phy.c
index 777a602176f2..c83a22cfbe1e 100644
--- a/drivers/net/wireless/ath/ath9k/ar5008_phy.c
+++ b/drivers/net/wireless/ath/ath9k/ar5008_phy.c
@@ -1490,25 +1490,25 @@ static void ar5008_hw_do_getnf(struct ath_hw *ah,
 	int16_t nf;
 
 	nf = MS(REG_READ(ah, AR_PHY_CCA), AR_PHY_MINCCA_PWR);
-	nfarray[0] = sign_extend(nf, 9);
+	nfarray[0] = sign_extend32(nf, 8);
 
 	nf = MS(REG_READ(ah, AR_PHY_CH1_CCA), AR_PHY_CH1_MINCCA_PWR);
-	nfarray[1] = sign_extend(nf, 9);
+	nfarray[1] = sign_extend32(nf, 8);
 
 	nf = MS(REG_READ(ah, AR_PHY_CH2_CCA), AR_PHY_CH2_MINCCA_PWR);
-	nfarray[2] = sign_extend(nf, 9);
+	nfarray[2] = sign_extend32(nf, 8);
 
 	if (!IS_CHAN_HT40(ah->curchan))
 		return;
 
 	nf = MS(REG_READ(ah, AR_PHY_EXT_CCA), AR_PHY_EXT_MINCCA_PWR);
-	nfarray[3] = sign_extend(nf, 9);
+	nfarray[3] = sign_extend32(nf, 8);
 
 	nf = MS(REG_READ(ah, AR_PHY_CH1_EXT_CCA), AR_PHY_CH1_EXT_MINCCA_PWR);
-	nfarray[4] = sign_extend(nf, 9);
+	nfarray[4] = sign_extend32(nf, 8);
 
 	nf = MS(REG_READ(ah, AR_PHY_CH2_EXT_CCA), AR_PHY_CH2_EXT_MINCCA_PWR);
-	nfarray[5] = sign_extend(nf, 9);
+	nfarray[5] = sign_extend32(nf, 8);
 }
 
 /*
diff --git a/drivers/net/wireless/ath/ath9k/ar9002_phy.c b/drivers/net/wireless/ath/ath9k/ar9002_phy.c
index c00cdc67b55b..3fb97fdc1240 100644
--- a/drivers/net/wireless/ath/ath9k/ar9002_phy.c
+++ b/drivers/net/wireless/ath/ath9k/ar9002_phy.c
@@ -473,21 +473,21 @@ static void ar9002_hw_do_getnf(struct ath_hw *ah,
 	int16_t nf;
 
 	nf = MS(REG_READ(ah, AR_PHY_CCA), AR9280_PHY_MINCCA_PWR);
-	nfarray[0] = sign_extend(nf, 9);
+	nfarray[0] = sign_extend32(nf, 8);
 
 	nf = MS(REG_READ(ah, AR_PHY_EXT_CCA), AR9280_PHY_EXT_MINCCA_PWR);
 	if (IS_CHAN_HT40(ah->curchan))
-		nfarray[3] = sign_extend(nf, 9);
+		nfarray[3] = sign_extend32(nf, 8);
 
 	if (AR_SREV_9285(ah) || AR_SREV_9271(ah))
 		return;
 
 	nf = MS(REG_READ(ah, AR_PHY_CH1_CCA), AR9280_PHY_CH1_MINCCA_PWR);
-	nfarray[1] = sign_extend(nf, 9);
+	nfarray[1] = sign_extend32(nf, 8);
 
 	nf = MS(REG_READ(ah, AR_PHY_CH1_EXT_CCA), AR9280_PHY_CH1_EXT_MINCCA_PWR);
 	if (IS_CHAN_HT40(ah->curchan))
-		nfarray[4] = sign_extend(nf, 9);
+		nfarray[4] = sign_extend32(nf, 8);
 }
 
 static void ar9002_hw_set_nf_limits(struct ath_hw *ah)
diff --git a/drivers/net/wireless/ath/ath9k/ar9003_phy.c b/drivers/net/wireless/ath/ath9k/ar9003_phy.c
index 06a9c4cd2f44..44c5454b2ad8 100644
--- a/drivers/net/wireless/ath/ath9k/ar9003_phy.c
+++ b/drivers/net/wireless/ath/ath9k/ar9003_phy.c
@@ -1023,25 +1023,25 @@ static void ar9003_hw_do_getnf(struct ath_hw *ah,
 	int16_t nf;
 
 	nf = MS(REG_READ(ah, AR_PHY_CCA_0), AR_PHY_MINCCA_PWR);
-	nfarray[0] = sign_extend(nf, 9);
+	nfarray[0] = sign_extend32(nf, 8);
 
 	nf = MS(REG_READ(ah, AR_PHY_CCA_1), AR_PHY_CH1_MINCCA_PWR);
-	nfarray[1] = sign_extend(nf, 9);
+	nfarray[1] = sign_extend32(nf, 8);
 
 	nf = MS(REG_READ(ah, AR_PHY_CCA_2), AR_PHY_CH2_MINCCA_PWR);
-	nfarray[2] = sign_extend(nf, 9);
+	nfarray[2] = sign_extend32(nf, 8);
 
 	if (!IS_CHAN_HT40(ah->curchan))
 		return;
 
 	nf = MS(REG_READ(ah, AR_PHY_EXT_CCA), AR_PHY_EXT_MINCCA_PWR);
-	nfarray[3] = sign_extend(nf, 9);
+	nfarray[3] = sign_extend32(nf, 8);
 
 	nf = MS(REG_READ(ah, AR_PHY_EXT_CCA_1), AR_PHY_CH1_EXT_MINCCA_PWR);
-	nfarray[4] = sign_extend(nf, 9);
+	nfarray[4] = sign_extend32(nf, 8);
 
 	nf = MS(REG_READ(ah, AR_PHY_EXT_CCA_2), AR_PHY_CH2_EXT_MINCCA_PWR);
-	nfarray[5] = sign_extend(nf, 9);
+	nfarray[5] = sign_extend32(nf, 8);
 }
 
 static void ar9003_hw_set_nf_limits(struct ath_hw *ah)
diff --git a/drivers/net/wireless/ath/ath9k/hw.h b/drivers/net/wireless/ath/ath9k/hw.h
index e5b72262fd96..f821a28bcda3 100644
--- a/drivers/net/wireless/ath/ath9k/hw.h
+++ b/drivers/net/wireless/ath/ath9k/hw.h
@@ -825,12 +825,6 @@ static inline struct ath_hw_ops *ath9k_hw_ops(struct ath_hw *ah)
 	return &ah->ops;
 }
 
-static inline int sign_extend(int val, const int nbits)
-{
-	int order = BIT(nbits-1);
-	return (val ^ order) - order;
-}
-
 /* Initialization, Detach, Reset */
 const char *ath9k_hw_probe(u16 vendorid, u16 devid);
 void ath9k_hw_deinit(struct ath_hw *ah);
diff --git a/drivers/net/wireless/iwlwifi/iwl-4965.c b/drivers/net/wireless/iwlwifi/iwl-4965.c
index cd14843878ae..4748d067eb1d 100644
--- a/drivers/net/wireless/iwlwifi/iwl-4965.c
+++ b/drivers/net/wireless/iwlwifi/iwl-4965.c
@@ -1686,22 +1686,6 @@ static void iwl4965_txq_update_byte_cnt_tbl(struct iwl_priv *priv,
 			tfd_offset[TFD_QUEUE_SIZE_MAX + write_ptr] = bc_ent;
 }
 
-/**
- * sign_extend - Sign extend a value using specified bit as sign-bit
- *
- * Example: sign_extend(9, 3) would return -7 as bit3 of 1001b is 1
- * and bit0..2 is 001b which when sign extended to 1111111111111001b is -7.
- *
- * @param oper value to sign extend
- * @param index 0 based bit index (0<=index<32) to sign bit
- */
-static s32 sign_extend(u32 oper, int index)
-{
-	u8 shift = 31 - index;
-
-	return (s32)(oper << shift) >> shift;
-}
-
 /**
  * iwl4965_hw_get_temperature - return the calibrated temperature (in Kelvin)
  * @statistics: Provides the temperature reading from the uCode
@@ -1739,9 +1723,9 @@ static int iwl4965_hw_get_temperature(struct iwl_priv *priv)
 	 * "initialize" ALIVE response.
 	 */
 	if (!test_bit(STATUS_TEMPERATURE, &priv->status))
-		vt = sign_extend(R4, 23);
+		vt = sign_extend32(R4, 23);
 	else
-		vt = sign_extend(le32_to_cpu(priv->_agn.statistics.
+		vt = sign_extend32(le32_to_cpu(priv->_agn.statistics.
 				 general.common.temperature), 23);
 
 	IWL_DEBUG_TEMP(priv, "Calib values R[1-3]: %d %d %d R4: %d\n", R1, R2, R3, vt);
diff --git a/include/linux/bitops.h b/include/linux/bitops.h
index 827cc95711ef..2184c6b97aeb 100644
--- a/include/linux/bitops.h
+++ b/include/linux/bitops.h
@@ -109,6 +109,17 @@ static inline __u8 ror8(__u8 word, unsigned int shift)
 	return (word >> shift) | (word << (8 - shift));
 }
 
+/**
+ * sign_extend32 - sign extend a 32-bit value using specified bit as sign-bit
+ * @value: value to sign extend
+ * @index: 0 based bit index (0<=index<32) to sign bit
+ */
+static inline __s32 sign_extend32(__u32 value, int index)
+{
+	__u8 shift = 31 - index;
+	return (__s32)(value << shift) >> shift;
+}
+
 static inline unsigned fls_long(unsigned long l)
 {
 	if (sizeof(l) == 4)
-- 
cgit v1.2.3-71-gd317


From fe8222406c8277a21172479d3a8283d31c209028 Mon Sep 17 00:00:00 2001
From: Tom Herbert <therbert@google.com>
Date: Tue, 9 Nov 2010 10:47:38 +0000
Subject: net: Simplify RX queue allocation

This patch move RX queue allocation to alloc_netdev_mq and freeing of
the queues to free_netdev (symmetric to TX queue allocation).  Each
kobject RX queue takes a reference to the queue's device so that the
device can't be freed before all the kobjects have been released-- this
obviates the need for reference counts specific to RX queues.

Signed-off-by: Tom Herbert <therbert@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h |  3 +--
 net/core/dev.c            | 19 ++++++++++---------
 net/core/net-sysfs.c      |  7 ++-----
 3 files changed, 13 insertions(+), 16 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 6e4cfbc53d4c..fccb11f879e5 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -592,8 +592,7 @@ struct netdev_rx_queue {
 	struct rps_map __rcu		*rps_map;
 	struct rps_dev_flow_table __rcu	*rps_flow_table;
 	struct kobject			kobj;
-	struct netdev_rx_queue		*first;
-	atomic_t			count;
+	struct net_device		*dev;
 } ____cacheline_aligned_in_smp;
 #endif /* CONFIG_RPS */
 
diff --git a/net/core/dev.c b/net/core/dev.c
index 75490670e0a9..8725d168d1f5 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -5051,12 +5051,8 @@ static int netif_alloc_rx_queues(struct net_device *dev)
 	}
 	dev->_rx = rx;
 
-	/*
-	 * Set a pointer to first element in the array which holds the
-	 * reference count.
-	 */
 	for (i = 0; i < count; i++)
-		rx[i].first = rx;
+		rx[i].dev = dev;
 #endif
 	return 0;
 }
@@ -5132,10 +5128,6 @@ int register_netdevice(struct net_device *dev)
 
 	dev->iflink = -1;
 
-	ret = netif_alloc_rx_queues(dev);
-	if (ret)
-		goto out;
-
 	netdev_init_queues(dev);
 
 	/* Init, if this function is available */
@@ -5601,6 +5593,8 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
 #ifdef CONFIG_RPS
 	dev->num_rx_queues = queue_count;
 	dev->real_num_rx_queues = queue_count;
+	if (netif_alloc_rx_queues(dev))
+		goto free_pcpu;
 #endif
 
 	dev->gso_max_size = GSO_MAX_SIZE;
@@ -5618,6 +5612,10 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
 free_pcpu:
 	free_percpu(dev->pcpu_refcnt);
 	kfree(dev->_tx);
+#ifdef CONFIG_RPS
+	kfree(dev->_rx);
+#endif
+
 free_p:
 	kfree(p);
 	return NULL;
@@ -5639,6 +5637,9 @@ void free_netdev(struct net_device *dev)
 	release_net(dev_net(dev));
 
 	kfree(dev->_tx);
+#ifdef CONFIG_RPS
+	kfree(dev->_rx);
+#endif
 
 	kfree(rcu_dereference_raw(dev->ingress_queue));
 
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index a5ff5a89f376..3ba526b56fe3 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -706,7 +706,6 @@ static struct attribute *rx_queue_default_attrs[] = {
 static void rx_queue_release(struct kobject *kobj)
 {
 	struct netdev_rx_queue *queue = to_rx_queue(kobj);
-	struct netdev_rx_queue *first = queue->first;
 	struct rps_map *map;
 	struct rps_dev_flow_table *flow_table;
 
@@ -719,8 +718,7 @@ static void rx_queue_release(struct kobject *kobj)
 	if (flow_table)
 		call_rcu(&flow_table->rcu, rps_dev_flow_table_release);
 
-	if (atomic_dec_and_test(&first->count))
-		kfree(first);
+	dev_put(queue->dev);
 }
 
 static struct kobj_type rx_queue_ktype = {
@@ -732,7 +730,6 @@ static struct kobj_type rx_queue_ktype = {
 static int rx_queue_add_kobject(struct net_device *net, int index)
 {
 	struct netdev_rx_queue *queue = net->_rx + index;
-	struct netdev_rx_queue *first = queue->first;
 	struct kobject *kobj = &queue->kobj;
 	int error = 0;
 
@@ -745,7 +742,7 @@ static int rx_queue_add_kobject(struct net_device *net, int index)
 	}
 
 	kobject_uevent(kobj, KOBJ_ADD);
-	atomic_inc(&first->count);
+	dev_hold(queue->dev);
 
 	return error;
 }
-- 
cgit v1.2.3-71-gd317


From c59504ebc5baa628706d10c2d3c7e1f4bc3c2147 Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Sun, 14 Nov 2010 17:04:57 +0000
Subject: include/linux/if_macvlan.h: Remove unnecessary semicolons

Signed-off-by: Joe Perches <joe@perches.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/if_macvlan.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/if_macvlan.h b/include/linux/if_macvlan.h
index 8a2fd66a8b5f..ac96a2d76291 100644
--- a/include/linux/if_macvlan.h
+++ b/include/linux/if_macvlan.h
@@ -69,7 +69,7 @@ static inline void macvlan_count_rx(const struct macvlan_dev *vlan,
 	rx_stats = this_cpu_ptr(vlan->rx_stats);
 	if (likely(success)) {
 		u64_stats_update_begin(&rx_stats->syncp);
-		rx_stats->rx_packets++;;
+		rx_stats->rx_packets++;
 		rx_stats->rx_bytes += len;
 		if (multicast)
 			rx_stats->rx_multicast++;
-- 
cgit v1.2.3-71-gd317


From a386f99025f13b32502fe5dedf223c20d7283826 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Mon, 15 Nov 2010 06:38:11 +0000
Subject: bridge: add proper RCU annotation to should_route_hook

Add br_should_route_hook_t typedef, this is the only way we can
get a clean RCU implementation for function pointer.

Move route_hook to location where it is used.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/if_bridge.h             |  4 +++-
 net/bridge/br.c                       |  4 ----
 net/bridge/br_input.c                 | 10 +++++++---
 net/bridge/netfilter/ebtable_broute.c |  3 ++-
 4 files changed, 12 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/if_bridge.h b/include/linux/if_bridge.h
index 0d241a5c4909..f7e73c338c40 100644
--- a/include/linux/if_bridge.h
+++ b/include/linux/if_bridge.h
@@ -102,7 +102,9 @@ struct __fdb_entry {
 #include <linux/netdevice.h>
 
 extern void brioctl_set(int (*ioctl_hook)(struct net *, unsigned int, void __user *));
-extern int (*br_should_route_hook)(struct sk_buff *skb);
+
+typedef int (*br_should_route_hook_t)(struct sk_buff *skb);
+extern br_should_route_hook_t __rcu *br_should_route_hook;
 
 #endif
 
diff --git a/net/bridge/br.c b/net/bridge/br.c
index c8436fa31344..84bbb82599b2 100644
--- a/net/bridge/br.c
+++ b/net/bridge/br.c
@@ -22,8 +22,6 @@
 
 #include "br_private.h"
 
-int (*br_should_route_hook)(struct sk_buff *skb);
-
 static const struct stp_proto br_stp_proto = {
 	.rcv	= br_stp_rcv,
 };
@@ -102,8 +100,6 @@ static void __exit br_deinit(void)
 	br_fdb_fini();
 }
 
-EXPORT_SYMBOL(br_should_route_hook);
-
 module_init(br_init)
 module_exit(br_deinit)
 MODULE_LICENSE("GPL");
diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c
index 25207a1f182b..6f6d8e1b776f 100644
--- a/net/bridge/br_input.c
+++ b/net/bridge/br_input.c
@@ -21,6 +21,10 @@
 /* Bridge group multicast address 802.1d (pg 51). */
 const u8 br_group_address[ETH_ALEN] = { 0x01, 0x80, 0xc2, 0x00, 0x00, 0x00 };
 
+/* Hook for brouter */
+br_should_route_hook_t __rcu *br_should_route_hook __read_mostly;
+EXPORT_SYMBOL(br_should_route_hook);
+
 static int br_pass_frame_up(struct sk_buff *skb)
 {
 	struct net_device *indev, *brdev = BR_INPUT_SKB_CB(skb)->brdev;
@@ -139,7 +143,7 @@ struct sk_buff *br_handle_frame(struct sk_buff *skb)
 {
 	struct net_bridge_port *p;
 	const unsigned char *dest = eth_hdr(skb)->h_dest;
-	int (*rhook)(struct sk_buff *skb);
+	br_should_route_hook_t *rhook;
 
 	if (unlikely(skb->pkt_type == PACKET_LOOPBACK))
 		return skb;
@@ -173,8 +177,8 @@ forward:
 	switch (p->state) {
 	case BR_STATE_FORWARDING:
 		rhook = rcu_dereference(br_should_route_hook);
-		if (rhook != NULL) {
-			if (rhook(skb))
+		if (rhook) {
+			if ((*rhook)(skb))
 				return skb;
 			dest = eth_hdr(skb)->h_dest;
 		}
diff --git a/net/bridge/netfilter/ebtable_broute.c b/net/bridge/netfilter/ebtable_broute.c
index ae3f106c3908..1bcaf36ad612 100644
--- a/net/bridge/netfilter/ebtable_broute.c
+++ b/net/bridge/netfilter/ebtable_broute.c
@@ -87,7 +87,8 @@ static int __init ebtable_broute_init(void)
 	if (ret < 0)
 		return ret;
 	/* see br_input.c */
-	rcu_assign_pointer(br_should_route_hook, ebt_broute);
+	rcu_assign_pointer(br_should_route_hook,
+			   (br_should_route_hook_t *)ebt_broute);
 	return 0;
 }
 
-- 
cgit v1.2.3-71-gd317


From 61391cde9eefac5cfcf6d214aa80c77e58b1626b Mon Sep 17 00:00:00 2001
From: stephen hemminger <shemminger@vyatta.com>
Date: Mon, 15 Nov 2010 06:38:12 +0000
Subject: netdev: add rcu annotations to receive handler hook

Suggested by Eric's bridge RCU changes.

Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index fccb11f879e5..b45c1b8b1d19 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -994,8 +994,8 @@ struct net_device {
 	unsigned int		real_num_rx_queues;
 #endif
 
-	rx_handler_func_t	*rx_handler;
-	void			*rx_handler_data;
+	rx_handler_func_t __rcu	*rx_handler;
+	void __rcu		*rx_handler_data;
 
 	struct netdev_queue __rcu *ingress_queue;
 
-- 
cgit v1.2.3-71-gd317


From 8ffab51b3dfc54876f145f15b351c41f3f703195 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Wed, 10 Nov 2010 21:14:04 +0000
Subject: macvlan: lockless tx path

macvlan is a stacked device, like tunnels. We should use the lockless
mechanism we are using in tunnels and loopback.

This patch completely removes locking in TX path.

tx stat counters are added into existing percpu stat structure, renamed
from rx_stats to pcpu_stats.

Note : this reverts commit 2c11455321f37 (macvlan: add multiqueue
capability)

Note : rx_errors converted to a 32bit counter, like tx_dropped, since
they dont need 64bit range.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Cc: Patrick McHardy <kaber@trash.net>
Cc: Ben Greear <greearb@candelatech.com>
Cc: Ben Hutchings <bhutchings@solarflare.com>
Acked-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/macvlan.c      | 80 ++++++++++++++++++++--------------------------
 include/linux/if_macvlan.h | 34 ++++++++++++--------
 2 files changed, 55 insertions(+), 59 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c
index 0fc9dc7f20db..93f0ba25c808 100644
--- a/drivers/net/macvlan.c
+++ b/drivers/net/macvlan.c
@@ -243,18 +243,22 @@ xmit_world:
 netdev_tx_t macvlan_start_xmit(struct sk_buff *skb,
 			       struct net_device *dev)
 {
-	int i = skb_get_queue_mapping(skb);
-	struct netdev_queue *txq = netdev_get_tx_queue(dev, i);
 	unsigned int len = skb->len;
 	int ret;
+	const struct macvlan_dev *vlan = netdev_priv(dev);
 
 	ret = macvlan_queue_xmit(skb, dev);
 	if (likely(ret == NET_XMIT_SUCCESS || ret == NET_XMIT_CN)) {
-		txq->tx_packets++;
-		txq->tx_bytes += len;
-	} else
-		txq->tx_dropped++;
+		struct macvlan_pcpu_stats *pcpu_stats;
 
+		pcpu_stats = this_cpu_ptr(vlan->pcpu_stats);
+		u64_stats_update_begin(&pcpu_stats->syncp);
+		pcpu_stats->tx_packets++;
+		pcpu_stats->tx_bytes += len;
+		u64_stats_update_end(&pcpu_stats->syncp);
+	} else {
+		this_cpu_inc(vlan->pcpu_stats->tx_dropped);
+	}
 	return ret;
 }
 EXPORT_SYMBOL_GPL(macvlan_start_xmit);
@@ -414,14 +418,15 @@ static int macvlan_init(struct net_device *dev)
 	dev->state		= (dev->state & ~MACVLAN_STATE_MASK) |
 				  (lowerdev->state & MACVLAN_STATE_MASK);
 	dev->features 		= lowerdev->features & MACVLAN_FEATURES;
+	dev->features		|= NETIF_F_LLTX;
 	dev->gso_max_size	= lowerdev->gso_max_size;
 	dev->iflink		= lowerdev->ifindex;
 	dev->hard_header_len	= lowerdev->hard_header_len;
 
 	macvlan_set_lockdep_class(dev);
 
-	vlan->rx_stats = alloc_percpu(struct macvlan_rx_stats);
-	if (!vlan->rx_stats)
+	vlan->pcpu_stats = alloc_percpu(struct macvlan_pcpu_stats);
+	if (!vlan->pcpu_stats)
 		return -ENOMEM;
 
 	return 0;
@@ -431,7 +436,7 @@ static void macvlan_uninit(struct net_device *dev)
 {
 	struct macvlan_dev *vlan = netdev_priv(dev);
 
-	free_percpu(vlan->rx_stats);
+	free_percpu(vlan->pcpu_stats);
 }
 
 static struct rtnl_link_stats64 *macvlan_dev_get_stats64(struct net_device *dev,
@@ -439,33 +444,38 @@ static struct rtnl_link_stats64 *macvlan_dev_get_stats64(struct net_device *dev,
 {
 	struct macvlan_dev *vlan = netdev_priv(dev);
 
-	dev_txq_stats_fold(dev, stats);
-
-	if (vlan->rx_stats) {
-		struct macvlan_rx_stats *p, accum = {0};
-		u64 rx_packets, rx_bytes, rx_multicast;
+	if (vlan->pcpu_stats) {
+		struct macvlan_pcpu_stats *p;
+		u64 rx_packets, rx_bytes, rx_multicast, tx_packets, tx_bytes;
+		u32 rx_errors = 0, tx_dropped = 0;
 		unsigned int start;
 		int i;
 
 		for_each_possible_cpu(i) {
-			p = per_cpu_ptr(vlan->rx_stats, i);
+			p = per_cpu_ptr(vlan->pcpu_stats, i);
 			do {
 				start = u64_stats_fetch_begin_bh(&p->syncp);
 				rx_packets	= p->rx_packets;
 				rx_bytes	= p->rx_bytes;
 				rx_multicast	= p->rx_multicast;
+				tx_packets	= p->tx_packets;
+				tx_bytes	= p->tx_bytes;
 			} while (u64_stats_fetch_retry_bh(&p->syncp, start));
-			accum.rx_packets	+= rx_packets;
-			accum.rx_bytes		+= rx_bytes;
-			accum.rx_multicast	+= rx_multicast;
-			/* rx_errors is an ulong, updated without syncp protection */
-			accum.rx_errors		+= p->rx_errors;
+
+			stats->rx_packets	+= rx_packets;
+			stats->rx_bytes		+= rx_bytes;
+			stats->multicast	+= rx_multicast;
+			stats->tx_packets	+= tx_packets;
+			stats->tx_bytes		+= tx_bytes;
+			/* rx_errors & tx_dropped are u32, updated
+			 * without syncp protection.
+			 */
+			rx_errors	+= p->rx_errors;
+			tx_dropped	+= p->tx_dropped;
 		}
-		stats->rx_packets = accum.rx_packets;
-		stats->rx_bytes   = accum.rx_bytes;
-		stats->rx_errors  = accum.rx_errors;
-		stats->rx_dropped = accum.rx_errors;
-		stats->multicast  = accum.rx_multicast;
+		stats->rx_errors	= rx_errors;
+		stats->rx_dropped	= rx_errors;
+		stats->tx_dropped	= tx_dropped;
 	}
 	return stats;
 }
@@ -601,25 +611,6 @@ static int macvlan_validate(struct nlattr *tb[], struct nlattr *data[])
 	return 0;
 }
 
-static int macvlan_get_tx_queues(struct net *net,
-				 struct nlattr *tb[],
-				 unsigned int *num_tx_queues,
-				 unsigned int *real_num_tx_queues)
-{
-	struct net_device *real_dev;
-
-	if (!tb[IFLA_LINK])
-		return -EINVAL;
-
-	real_dev = __dev_get_by_index(net, nla_get_u32(tb[IFLA_LINK]));
-	if (!real_dev)
-		return -ENODEV;
-
-	*num_tx_queues      = real_dev->num_tx_queues;
-	*real_num_tx_queues = real_dev->real_num_tx_queues;
-	return 0;
-}
-
 int macvlan_common_newlink(struct net *src_net, struct net_device *dev,
 			   struct nlattr *tb[], struct nlattr *data[],
 			   int (*receive)(struct sk_buff *skb),
@@ -743,7 +734,6 @@ int macvlan_link_register(struct rtnl_link_ops *ops)
 {
 	/* common fields */
 	ops->priv_size		= sizeof(struct macvlan_dev);
-	ops->get_tx_queues	= macvlan_get_tx_queues;
 	ops->validate		= macvlan_validate;
 	ops->maxtype		= IFLA_MACVLAN_MAX;
 	ops->policy		= macvlan_policy;
diff --git a/include/linux/if_macvlan.h b/include/linux/if_macvlan.h
index ac96a2d76291..e28b2e4959d4 100644
--- a/include/linux/if_macvlan.h
+++ b/include/linux/if_macvlan.h
@@ -25,19 +25,25 @@ struct macvlan_port;
 struct macvtap_queue;
 
 /**
- *	struct macvlan_rx_stats - MACVLAN percpu rx stats
+ *	struct macvlan_pcpu_stats - MACVLAN percpu stats
  *	@rx_packets: number of received packets
  *	@rx_bytes: number of received bytes
  *	@rx_multicast: number of received multicast packets
+ *	@tx_packets: number of transmitted packets
+ *	@tx_bytes: number of transmitted bytes
  *	@syncp: synchronization point for 64bit counters
- *	@rx_errors: number of errors
+ *	@rx_errors: number of rx errors
+ *	@tx_dropped: number of tx dropped packets
  */
-struct macvlan_rx_stats {
+struct macvlan_pcpu_stats {
 	u64			rx_packets;
 	u64			rx_bytes;
 	u64			rx_multicast;
+	u64			tx_packets;
+	u64			tx_bytes;
 	struct u64_stats_sync	syncp;
-	unsigned long		rx_errors;
+	u32			rx_errors;
+	u32			tx_dropped;
 };
 
 /*
@@ -52,7 +58,7 @@ struct macvlan_dev {
 	struct hlist_node	hlist;
 	struct macvlan_port	*port;
 	struct net_device	*lowerdev;
-	struct macvlan_rx_stats __percpu *rx_stats;
+	struct macvlan_pcpu_stats __percpu *pcpu_stats;
 	enum macvlan_mode	mode;
 	int (*receive)(struct sk_buff *skb);
 	int (*forward)(struct net_device *dev, struct sk_buff *skb);
@@ -64,18 +70,18 @@ static inline void macvlan_count_rx(const struct macvlan_dev *vlan,
 				    unsigned int len, bool success,
 				    bool multicast)
 {
-	struct macvlan_rx_stats *rx_stats;
-
-	rx_stats = this_cpu_ptr(vlan->rx_stats);
 	if (likely(success)) {
-		u64_stats_update_begin(&rx_stats->syncp);
-		rx_stats->rx_packets++;
-		rx_stats->rx_bytes += len;
+		struct macvlan_pcpu_stats *pcpu_stats;
+
+		pcpu_stats = this_cpu_ptr(vlan->pcpu_stats);
+		u64_stats_update_begin(&pcpu_stats->syncp);
+		pcpu_stats->rx_packets++;
+		pcpu_stats->rx_bytes += len;
 		if (multicast)
-			rx_stats->rx_multicast++;
-		u64_stats_update_end(&rx_stats->syncp);
+			pcpu_stats->rx_multicast++;
+		u64_stats_update_end(&pcpu_stats->syncp);
 	} else {
-		rx_stats->rx_errors++;
+		this_cpu_inc(vlan->pcpu_stats->rx_errors);
 	}
 }
 
-- 
cgit v1.2.3-71-gd317


From a75d946f42ae1771424a9582129fc5182ff48a1b Mon Sep 17 00:00:00 2001
From: Jiri Slaby <jslaby@suse.cz>
Date: Thu, 4 Nov 2010 16:20:20 +0100
Subject: console: move for_each_console to linux/console.h

Move it out of printk.c so that we can use it all over the code. There
are some potential users which will be converted to that macro in next
patches.

Signed-off-by: Jiri Slaby <jslaby@suse.cz>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/console.h | 6 ++++++
 kernel/printk.c         | 6 ------
 2 files changed, 6 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/console.h b/include/linux/console.h
index 95cf6f08a59d..875cfb1c8132 100644
--- a/include/linux/console.h
+++ b/include/linux/console.h
@@ -126,6 +126,12 @@ struct console {
 	struct	 console *next;
 };
 
+/*
+ * for_each_console() allows you to iterate on each console
+ */
+#define for_each_console(con) \
+	for (con = console_drivers; con != NULL; con = con->next)
+
 extern int console_set_on_cmdline;
 
 extern int add_preferred_console(char *name, int idx, char *options);
diff --git a/kernel/printk.c b/kernel/printk.c
index b2ebaee8c377..bf0420a92a1a 100644
--- a/kernel/printk.c
+++ b/kernel/printk.c
@@ -42,12 +42,6 @@
 
 #include <asm/uaccess.h>
 
-/*
- * for_each_console() allows you to iterate on each console
- */
-#define for_each_console(con) \
-	for (con = console_drivers; con != NULL; con = con->next)
-
 /*
  * Architectures can override it:
  */
-- 
cgit v1.2.3-71-gd317


From 3dfbd044d0d99cad2fe50e4f6c79845703fa0558 Mon Sep 17 00:00:00 2001
From: Jiri Slaby <jslaby@suse.cz>
Date: Thu, 4 Nov 2010 16:20:23 +0100
Subject: TTY: include termios.h in tty_driver.h

We reference termios and termiox in tty_driver.h, but we do not include
linux/termios.h where these are defined. Add the #include properly.

Otherwise when we include tty_driver.h, we get compile errors.

Signed-off-by: Jiri Slaby <jslaby@suse.cz>
Cc: Alan Cox <alan@lxorguk.ukuu.org.uk>
Cc: Greg KH <gregkh@suse.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/tty_driver.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/tty_driver.h b/include/linux/tty_driver.h
index db2d227694da..09678ed370f8 100644
--- a/include/linux/tty_driver.h
+++ b/include/linux/tty_driver.h
@@ -235,6 +235,7 @@
 #include <linux/fs.h>
 #include <linux/list.h>
 #include <linux/cdev.h>
+#include <linux/termios.h>
 
 struct tty_struct;
 struct tty_driver;
-- 
cgit v1.2.3-71-gd317


From e44dcb6c377529805bbaae505d5b333daab69111 Mon Sep 17 00:00:00 2001
From: Wolfram Sang <w.sang@pengutronix.de>
Date: Fri, 12 Nov 2010 19:47:47 +0100
Subject: serial: mpc52xx: make printout for type more generic

The printout for the type should be just "5xxx", so 512x users won't
wonder why they have a mpc52xx-type UART.

Signed-off-by: Wolfram Sang <w.sang@pengutronix.de>
Cc: Grant Likely <grant.likely@secretlab.ca>
Cc: Greg Kroah-Hartman <gregkh@suse.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/serial/mpc52xx_uart.c | 6 +++++-
 include/linux/serial_core.h   | 2 +-
 2 files changed, 6 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/serial/mpc52xx_uart.c b/drivers/serial/mpc52xx_uart.c
index c4399e23565a..126ec7f568ec 100644
--- a/drivers/serial/mpc52xx_uart.c
+++ b/drivers/serial/mpc52xx_uart.c
@@ -838,7 +838,11 @@ mpc52xx_uart_set_termios(struct uart_port *port, struct ktermios *new,
 static const char *
 mpc52xx_uart_type(struct uart_port *port)
 {
-	return port->type == PORT_MPC52xx ? "MPC52xx PSC" : NULL;
+	/*
+	 * We keep using PORT_MPC52xx for historic reasons although it applies
+	 * for MPC512x, too, but print "MPC5xxx" to not irritate users
+	 */
+	return port->type == PORT_MPC52xx ? "MPC5xxx PSC" : NULL;
 }
 
 static void
diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h
index 41603d690433..9ff9b7db293b 100644
--- a/include/linux/serial_core.h
+++ b/include/linux/serial_core.h
@@ -95,7 +95,7 @@
 /* PPC CPM type number */
 #define PORT_CPM        58
 
-/* MPC52xx type numbers */
+/* MPC52xx (and MPC512x) type numbers */
 #define PORT_MPC52xx	59
 
 /* IBM icom */
-- 
cgit v1.2.3-71-gd317


From afe0cbf87500f0585d217deb8c6fd329793a7957 Mon Sep 17 00:00:00 2001
From: Bruno Randolf <br1@einfach.org>
Date: Wed, 10 Nov 2010 12:50:50 +0900
Subject: cfg80211: Add nl80211 antenna configuration

Allow setting of TX and RX antennas configuration via nl80211.

The antenna configuration is defined as a bitmap of allowed antennas to use.
This API can be used to mask out antennas which are not attached or should not
be used for other reasons like regulatory concerns or special setups.

Separate bitmaps are used for RX and TX to allow configuring different antennas
for receiving and transmitting. Each bitmap is 32 bit long, each bit
representing one antenna, starting with antenna 1 at the first bit. If an
antenna bit is set, this means the driver is allowed to use this antenna for RX
or TX respectively; if the bit is not set the hardware is not allowed to use
this antenna.

Using bitmaps has the benefit of allowing for a flexible configuration
interface which can support many different configurations and which can be used
for 802.11n as well as non-802.11n devices. Instead of relying on some hardware
specific assumptions, drivers can use this information to know which antennas
are actually attached to the system and derive their capabilities based on
that.

802.11n devices should enable or disable chains, based on which antennas are
present (If all antennas belonging to a particular chain are disabled, the
entire chain should be disabled). HT capabilities (like STBC, TX Beamforming,
Antenna selection) should be calculated based on the available chains after
applying the antenna masks. Should a 802.11n device have diversity antennas
attached to one of their chains, diversity can be enabled or disabled based on
the antenna information.

Non-802.11n drivers can use the antenna masks to select RX and TX antennas and
to enable or disable antenna diversity.

While covering chainmasks for 802.11n and the standard "legacy" modes "fixed
antenna 1", "fixed antenna 2" and "diversity" this API also allows more rare,
but useful configurations as follows:

1) Send on antenna 1, receive on antenna 2 (or vice versa). This can be used to
have a low gain antenna for TX in order to keep within the regulatory
constraints and a high gain antenna for RX in order to receive weaker signals
("speak softly, but listen harder"). This can be useful for building long-shot
outdoor links. Another usage of this setup is having a low-noise pre-amplifier
on antenna 1 and a power amplifier on the other antenna. This way transmit
noise is mostly kept out of the low noise receive channel.
(This would be bitmaps: tx 1 rx 2).

2) Another similar setup is: Use RX diversity on both antennas, but always send
on antenna 1. Again that would allow us to benefit from a higher gain RX
antenna, while staying within the legal limits.
(This would be: tx 0 rx 3).

3) And finally there can be special experimental setups in research and
development even with pre 802.11n hardware where more than 2 antennas are
available. It's good to keep the API simple, yet flexible.

Signed-off-by: Bruno Randolf <br1@einfach.org>

--
v7:	Made bitmasks 32 bit wide and rebased to latest wireless-testing.
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/nl80211.h | 25 +++++++++++++++++++++++++
 include/net/cfg80211.h  |  3 +++
 net/wireless/nl80211.c  | 31 ++++++++++++++++++++++++++++++-
 3 files changed, 58 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h
index fb877b5621b7..17c5c8849250 100644
--- a/include/linux/nl80211.h
+++ b/include/linux/nl80211.h
@@ -804,6 +804,28 @@ enum nl80211_commands {
  * @NL80211_ATTR_SUPPORT_IBSS_RSN: The device supports IBSS RSN, which mostly
  *	means support for per-station GTKs.
  *
+ * @NL80211_ATTR_WIPHY_ANTENNA_TX: Bitmap of allowed antennas for transmitting.
+ *	This can be used to mask out antennas which are not attached or should
+ *	not be used for transmitting. If an antenna is not selected in this
+ *	bitmap the hardware is not allowed to transmit on this antenna.
+ *
+ *	Each bit represents one antenna, starting with antenna 1 at the first
+ *	bit. Depending on which antennas are selected in the bitmap, 802.11n
+ *	drivers can derive which chainmasks to use (if all antennas belonging to
+ *	a particular chain are disabled this chain should be disabled) and if
+ *	a chain has diversity antennas wether diversity should be used or not.
+ *	HT capabilities (STBC, TX Beamforming, Antenna selection) can be
+ *	derived from the available chains after applying the antenna mask.
+ *	Non-802.11n drivers can derive wether to use diversity or not.
+ *	Drivers may reject configurations or RX/TX mask combinations they cannot
+ *	support by returning -EINVAL.
+ *
+ * @NL80211_ATTR_WIPHY_ANTENNA_RX: Bitmap of allowed antennas for receiving.
+ *	This can be used to mask out antennas which are not attached or should
+ *	not be used for receiving. If an antenna is not selected in this bitmap
+ *	the hardware should not be configured to receive on this antenna.
+ *	For a more detailed descripton see @NL80211_ATTR_WIPHY_ANTENNA_TX.
+ *
  * @NL80211_ATTR_MAX: highest attribute number currently defined
  * @__NL80211_ATTR_AFTER_LAST: internal use
  */
@@ -973,6 +995,9 @@ enum nl80211_attrs {
 
 	NL80211_ATTR_SUPPORT_IBSS_RSN,
 
+	NL80211_ATTR_WIPHY_ANTENNA_TX,
+	NL80211_ATTR_WIPHY_ANTENNA_RX,
+
 	/* add attributes here, update the policy in nl80211.c */
 
 	__NL80211_ATTR_AFTER_LAST,
diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index e5702f5ac57c..07425e648a09 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -1304,6 +1304,9 @@ struct cfg80211_ops {
 	void	(*mgmt_frame_register)(struct wiphy *wiphy,
 				       struct net_device *dev,
 				       u16 frame_type, bool reg);
+
+	int	(*set_antenna)(struct wiphy *wiphy, u32 tx_ant, u32 rx_ant);
+	int	(*get_antenna)(struct wiphy *wiphy, u32 *tx_ant, u32 *rx_ant);
 };
 
 /*
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index c506241f8637..5e4dda4c0fd3 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -166,7 +166,11 @@ static const struct nla_policy nl80211_policy[NL80211_ATTR_MAX+1] = {
 
 	[NL80211_ATTR_WIPHY_TX_POWER_SETTING] = { .type = NLA_U32 },
 	[NL80211_ATTR_WIPHY_TX_POWER_LEVEL] = { .type = NLA_U32 },
+
 	[NL80211_ATTR_FRAME_TYPE] = { .type = NLA_U16 },
+
+	[NL80211_ATTR_WIPHY_ANTENNA_TX] = { .type = NLA_U32 },
+	[NL80211_ATTR_WIPHY_ANTENNA_RX] = { .type = NLA_U32 },
 };
 
 /* policy for the key attributes */
@@ -526,7 +530,6 @@ static int nl80211_send_wiphy(struct sk_buff *msg, u32 pid, u32 seq, int flags,
 		    dev->wiphy.rts_threshold);
 	NLA_PUT_U8(msg, NL80211_ATTR_WIPHY_COVERAGE_CLASS,
 		    dev->wiphy.coverage_class);
-
 	NLA_PUT_U8(msg, NL80211_ATTR_MAX_NUM_SCAN_SSIDS,
 		   dev->wiphy.max_scan_ssids);
 	NLA_PUT_U16(msg, NL80211_ATTR_MAX_SCAN_IE_LEN,
@@ -545,6 +548,16 @@ static int nl80211_send_wiphy(struct sk_buff *msg, u32 pid, u32 seq, int flags,
 	if (dev->wiphy.flags & WIPHY_FLAG_CONTROL_PORT_PROTOCOL)
 		NLA_PUT_FLAG(msg, NL80211_ATTR_CONTROL_PORT_ETHERTYPE);
 
+	if (dev->ops->get_antenna) {
+		u32 tx_ant = 0, rx_ant = 0;
+		int res;
+		res = dev->ops->get_antenna(&dev->wiphy, &tx_ant, &rx_ant);
+		if (!res) {
+			NLA_PUT_U32(msg, NL80211_ATTR_WIPHY_ANTENNA_TX, tx_ant);
+			NLA_PUT_U32(msg, NL80211_ATTR_WIPHY_ANTENNA_RX, rx_ant);
+		}
+	}
+
 	nl_modes = nla_nest_start(msg, NL80211_ATTR_SUPPORTED_IFTYPES);
 	if (!nl_modes)
 		goto nla_put_failure;
@@ -1024,6 +1037,22 @@ static int nl80211_set_wiphy(struct sk_buff *skb, struct genl_info *info)
 			goto bad_res;
 	}
 
+	if (info->attrs[NL80211_ATTR_WIPHY_ANTENNA_TX] &&
+	    info->attrs[NL80211_ATTR_WIPHY_ANTENNA_RX]) {
+		u32 tx_ant, rx_ant;
+		if (!rdev->ops->set_antenna) {
+			result = -EOPNOTSUPP;
+			goto bad_res;
+		}
+
+		tx_ant = nla_get_u32(info->attrs[NL80211_ATTR_WIPHY_ANTENNA_TX]);
+		rx_ant = nla_get_u32(info->attrs[NL80211_ATTR_WIPHY_ANTENNA_RX]);
+
+		result = rdev->ops->set_antenna(&rdev->wiphy, tx_ant, rx_ant);
+		if (result)
+			goto bad_res;
+	}
+
 	changed = 0;
 
 	if (info->attrs[NL80211_ATTR_WIPHY_RETRY_SHORT]) {
-- 
cgit v1.2.3-71-gd317


From 885a46d0f7942d76c2f3860acb45f75237d3bb42 Mon Sep 17 00:00:00 2001
From: Felix Fietkau <nbd@openwrt.org>
Date: Thu, 11 Nov 2010 15:07:22 +0100
Subject: cfg80211: add support for setting the ad-hoc multicast rate

Signed-off-by: Felix Fietkau <nbd@openwrt.org>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/nl80211.h | 4 ++++
 include/net/cfg80211.h  | 2 ++
 net/wireless/nl80211.c  | 5 +++++
 3 files changed, 11 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h
index 17c5c8849250..037b4e498890 100644
--- a/include/linux/nl80211.h
+++ b/include/linux/nl80211.h
@@ -826,6 +826,8 @@ enum nl80211_commands {
  *	the hardware should not be configured to receive on this antenna.
  *	For a more detailed descripton see @NL80211_ATTR_WIPHY_ANTENNA_TX.
  *
+ * @NL80211_ATTR_MCAST_RATE: Multicast tx rate (in 100 kbps) for IBSS
+ *
  * @NL80211_ATTR_MAX: highest attribute number currently defined
  * @__NL80211_ATTR_AFTER_LAST: internal use
  */
@@ -998,6 +1000,8 @@ enum nl80211_attrs {
 	NL80211_ATTR_WIPHY_ANTENNA_TX,
 	NL80211_ATTR_WIPHY_ANTENNA_RX,
 
+	NL80211_ATTR_MCAST_RATE,
+
 	/* add attributes here, update the policy in nl80211.c */
 
 	__NL80211_ATTR_AFTER_LAST,
diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 07425e648a09..8fd9eebd0cc9 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -923,6 +923,7 @@ struct cfg80211_disassoc_request {
  * @privacy: this is a protected network, keys will be configured
  *	after joining
  * @basic_rates: bitmap of basic rates to use when creating the IBSS
+ * @mcast_rate: multicast tx rate (in 100 kbps)
  */
 struct cfg80211_ibss_params {
 	u8 *ssid;
@@ -934,6 +935,7 @@ struct cfg80211_ibss_params {
 	u32 basic_rates;
 	bool channel_fixed;
 	bool privacy;
+	int mcast_rate;
 };
 
 /**
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 5e4dda4c0fd3..605553842226 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -171,6 +171,8 @@ static const struct nla_policy nl80211_policy[NL80211_ATTR_MAX+1] = {
 
 	[NL80211_ATTR_WIPHY_ANTENNA_TX] = { .type = NLA_U32 },
 	[NL80211_ATTR_WIPHY_ANTENNA_RX] = { .type = NLA_U32 },
+
+	[NL80211_ATTR_MCAST_RATE] = { .type = NLA_U32 },
 };
 
 /* policy for the key attributes */
@@ -3681,6 +3683,9 @@ static int nl80211_join_ibss(struct sk_buff *skb, struct genl_info *info)
 				return -EINVAL;
 		}
 	}
+	if (info->attrs[NL80211_ATTR_MCAST_RATE])
+		ibss.mcast_rate =
+			nla_get_u32(info->attrs[NL80211_ATTR_MCAST_RATE]);
 
 	if (ibss.privacy && info->attrs[NL80211_ATTR_KEYS]) {
 		connkeys = nl80211_parse_connkeys(rdev,
-- 
cgit v1.2.3-71-gd317


From 6ddf27cdbc218a412d7e993fdc08e30eec2042ce Mon Sep 17 00:00:00 2001
From: Ming Lei <tom.leiming@gmail.com>
Date: Mon, 15 Nov 2010 15:57:30 -0500
Subject: USB: make usb_mark_last_busy use pm_runtime_mark_last_busy

Since the runtime-PM core already defines a .last_busy field in
device.power, this patch uses it to replace the .last_busy field
defined in usb_device and uses pm_runtime_mark_last_busy to implement
usb_mark_last_busy.

Signed-off-by: Ming Lei <tom.leiming@gmail.com>
Reviewed-by: Alan Stern <stern@rowland.harvard.edu>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/usb/core/driver.c  | 23 +++++++++++------------
 drivers/usb/core/hcd-pci.c |  1 -
 drivers/usb/core/hcd.c     |  1 -
 drivers/usb/core/hub.c     |  1 -
 drivers/usb/core/message.c |  1 -
 include/linux/usb.h        |  5 ++---
 6 files changed, 13 insertions(+), 19 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/usb/core/driver.c b/drivers/usb/core/driver.c
index eda2d2c25459..0a63e968c683 100644
--- a/drivers/usb/core/driver.c
+++ b/drivers/usb/core/driver.c
@@ -27,7 +27,6 @@
 #include <linux/usb.h>
 #include <linux/usb/quirks.h>
 #include <linux/usb/hcd.h>
-#include <linux/pm_runtime.h>
 
 #include "usb.h"
 
@@ -1329,7 +1328,7 @@ int usb_resume(struct device *dev, pm_message_t msg)
 			pm_runtime_disable(dev);
 			pm_runtime_set_active(dev);
 			pm_runtime_enable(dev);
-			udev->last_busy = jiffies;
+			usb_mark_last_busy(udev);
 			do_unbind_rebind(udev, DO_REBIND);
 		}
 	}
@@ -1397,7 +1396,7 @@ void usb_autosuspend_device(struct usb_device *udev)
 {
 	int	status;
 
-	udev->last_busy = jiffies;
+	usb_mark_last_busy(udev);
 	status = pm_runtime_put_sync(&udev->dev);
 	dev_vdbg(&udev->dev, "%s: cnt %d -> %d\n",
 			__func__, atomic_read(&udev->dev.power.usage_count),
@@ -1482,7 +1481,7 @@ void usb_autopm_put_interface(struct usb_interface *intf)
 	struct usb_device	*udev = interface_to_usbdev(intf);
 	int			status;
 
-	udev->last_busy = jiffies;
+	usb_mark_last_busy(udev);
 	atomic_dec(&intf->pm_usage_cnt);
 	status = pm_runtime_put_sync(&intf->dev);
 	dev_vdbg(&intf->dev, "%s: cnt %d -> %d\n",
@@ -1512,8 +1511,8 @@ void usb_autopm_put_interface_async(struct usb_interface *intf)
 	unsigned long		last_busy;
 	int			status = 0;
 
-	last_busy = udev->last_busy;
-	udev->last_busy = jiffies;
+	last_busy = udev->dev.power.last_busy;
+	usb_mark_last_busy(udev);
 	atomic_dec(&intf->pm_usage_cnt);
 	pm_runtime_put_noidle(&intf->dev);
 
@@ -1554,7 +1553,7 @@ void usb_autopm_put_interface_no_suspend(struct usb_interface *intf)
 {
 	struct usb_device	*udev = interface_to_usbdev(intf);
 
-	udev->last_busy = jiffies;
+	usb_mark_last_busy(udev);
 	atomic_dec(&intf->pm_usage_cnt);
 	pm_runtime_put_noidle(&intf->dev);
 }
@@ -1641,7 +1640,7 @@ void usb_autopm_get_interface_no_resume(struct usb_interface *intf)
 {
 	struct usb_device	*udev = interface_to_usbdev(intf);
 
-	udev->last_busy = jiffies;
+	usb_mark_last_busy(udev);
 	atomic_inc(&intf->pm_usage_cnt);
 	pm_runtime_get_noresume(&intf->dev);
 }
@@ -1697,7 +1696,7 @@ static int autosuspend_check(struct usb_device *udev)
 	 * enough, queue a delayed autosuspend request.
 	 */
 	j = ACCESS_ONCE(jiffies);
-	suspend_time = udev->last_busy + udev->autosuspend_delay;
+	suspend_time = udev->dev.power.last_busy + udev->autosuspend_delay;
 	if (time_before(j, suspend_time)) {
 		pm_schedule_suspend(&udev->dev, jiffies_to_msecs(
 				round_jiffies_up_relative(suspend_time - j)));
@@ -1725,13 +1724,13 @@ static int usb_runtime_suspend(struct device *dev)
 	 * away.
 	 */
 	if (status) {
-		udev->last_busy = jiffies +
+		udev->dev.power.last_busy = jiffies +
 				(udev->autosuspend_delay == 0 ? HZ/2 : 0);
 	}
 
 	/* Prevent the parent from suspending immediately after */
 	else if (udev->parent)
-		udev->parent->last_busy = jiffies;
+		usb_mark_last_busy(udev->parent);
 
 	return status;
 }
@@ -1745,7 +1744,7 @@ static int usb_runtime_resume(struct device *dev)
 	 * and all its interfaces.
 	 */
 	status = usb_resume_both(udev, PMSG_AUTO_RESUME);
-	udev->last_busy = jiffies;
+	usb_mark_last_busy(udev);
 	return status;
 }
 
diff --git a/drivers/usb/core/hcd-pci.c b/drivers/usb/core/hcd-pci.c
index 3799573bd385..b55d46070a25 100644
--- a/drivers/usb/core/hcd-pci.c
+++ b/drivers/usb/core/hcd-pci.c
@@ -19,7 +19,6 @@
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/pci.h>
-#include <linux/pm_runtime.h>
 #include <linux/usb.h>
 #include <linux/usb/hcd.h>
 
diff --git a/drivers/usb/core/hcd.c b/drivers/usb/core/hcd.c
index 61800f77dac8..e70aeaf3dc1a 100644
--- a/drivers/usb/core/hcd.c
+++ b/drivers/usb/core/hcd.c
@@ -38,7 +38,6 @@
 #include <asm/unaligned.h>
 #include <linux/platform_device.h>
 #include <linux/workqueue.h>
-#include <linux/pm_runtime.h>
 
 #include <linux/usb.h>
 #include <linux/usb/hcd.h>
diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c
index 27115b45edc5..7c2405eccc4b 100644
--- a/drivers/usb/core/hub.c
+++ b/drivers/usb/core/hub.c
@@ -24,7 +24,6 @@
 #include <linux/kthread.h>
 #include <linux/mutex.h>
 #include <linux/freezer.h>
-#include <linux/pm_runtime.h>
 
 #include <asm/uaccess.h>
 #include <asm/byteorder.h>
diff --git a/drivers/usb/core/message.c b/drivers/usb/core/message.c
index f377e49fcb30..832487423826 100644
--- a/drivers/usb/core/message.c
+++ b/drivers/usb/core/message.c
@@ -12,7 +12,6 @@
 #include <linux/ctype.h>
 #include <linux/nls.h>
 #include <linux/device.h>
-#include <linux/pm_runtime.h>
 #include <linux/scatterlist.h>
 #include <linux/usb/quirks.h>
 #include <linux/usb/hcd.h>	/* for usbcore internals */
diff --git a/include/linux/usb.h b/include/linux/usb.h
index 35fe6ab222bb..7d22b3340a7f 100644
--- a/include/linux/usb.h
+++ b/include/linux/usb.h
@@ -20,6 +20,7 @@
 #include <linux/completion.h>	/* for struct completion */
 #include <linux/sched.h>	/* for current && schedule_timeout */
 #include <linux/mutex.h>	/* for struct mutex */
+#include <linux/pm_runtime.h>	/* for runtime PM */
 
 struct usb_device;
 struct usb_driver;
@@ -407,7 +408,6 @@ struct usb_tt;
  * @quirks: quirks of the whole device
  * @urbnum: number of URBs submitted for the whole device
  * @active_duration: total time device is not suspended
- * @last_busy: time of last use
  * @autosuspend_delay: in jiffies
  * @connect_time: time device was first connected
  * @do_remote_wakeup:  remote wakeup should be enabled
@@ -481,7 +481,6 @@ struct usb_device {
 	unsigned long active_duration;
 
 #ifdef CONFIG_PM
-	unsigned long last_busy;
 	int autosuspend_delay;
 	unsigned long connect_time;
 
@@ -527,7 +526,7 @@ extern void usb_autopm_put_interface_no_suspend(struct usb_interface *intf);
 
 static inline void usb_mark_last_busy(struct usb_device *udev)
 {
-	udev->last_busy = jiffies;
+	pm_runtime_mark_last_busy(&udev->dev);
 }
 
 #else
-- 
cgit v1.2.3-71-gd317


From fcc4a01eb8661226e80632327673f67bf6a5840b Mon Sep 17 00:00:00 2001
From: Alan Stern <stern@rowland.harvard.edu>
Date: Mon, 15 Nov 2010 15:57:51 -0500
Subject: USB: use the runtime-PM autosuspend implementation

This patch (as1428) converts USB over to the new runtime-PM core
autosuspend framework.  One slightly awkward aspect of the conversion
is that USB devices will now have two suspend-delay attributes: the
old power/autosuspend file and the new power/autosuspend_delay_ms
file.  One expresses the delay time in seconds and the other in
milliseconds, but otherwise they do the same thing.  The old attribute
can be deprecated and then removed eventually.

Signed-off-by: Alan Stern <stern@rowland.harvard.edu>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 Documentation/usb/power-management.txt | 113 +++++++++++++++++----------------
 drivers/usb/core/driver.c              |  77 ++--------------------
 drivers/usb/core/hub.c                 |   1 +
 drivers/usb/core/quirks.c              |   6 --
 drivers/usb/core/sysfs.c               |  34 ++--------
 drivers/usb/core/usb.c                 |   3 +-
 drivers/usb/core/usb.h                 |   2 -
 include/linux/usb.h                    |   2 -
 8 files changed, 71 insertions(+), 167 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/usb/power-management.txt b/Documentation/usb/power-management.txt
index b29d8e56cf28..c9ffa9ced7ee 100644
--- a/Documentation/usb/power-management.txt
+++ b/Documentation/usb/power-management.txt
@@ -2,7 +2,7 @@
 
 		 Alan Stern <stern@rowland.harvard.edu>
 
-			    December 11, 2009
+			    October 28, 2010
 
 
@@ -107,9 +107,14 @@ allowed to issue dynamic suspends.
 The user interface for controlling dynamic PM is located in the power/
 subdirectory of each USB device's sysfs directory, that is, in
 /sys/bus/usb/devices/.../power/ where "..." is the device's ID.  The
-relevant attribute files are: wakeup, control, and autosuspend.
-(There may also be a file named "level"; this file was deprecated
-as of the 2.6.35 kernel and replaced by the "control" file.)
+relevant attribute files are: wakeup, control, and
+autosuspend_delay_ms.  (There may also be a file named "level"; this
+file was deprecated as of the 2.6.35 kernel and replaced by the
+"control" file.  In 2.6.38 the "autosuspend" file will be deprecated
+and replaced by the "autosuspend_delay_ms" file.  The only difference
+is that the newer file expresses the delay in milliseconds whereas the
+older file uses seconds.  Confusingly, both files are present in 2.6.37
+but only "autosuspend" works.)
 
 	power/wakeup
 
@@ -140,33 +145,36 @@ as of the 2.6.35 kernel and replaced by the "control" file.)
 		suspended and autoresume was not allowed.  This
 		setting is no longer supported.)
 
-	power/autosuspend
+	power/autosuspend_delay_ms
 
 		This file contains an integer value, which is the
-		number of seconds the device should remain idle before
-		the kernel will autosuspend it (the idle-delay time).
-		The default is 2.  0 means to autosuspend as soon as
-		the device becomes idle, and negative values mean
-		never to autosuspend.  You can write a number to the
-		file to change the autosuspend idle-delay time.
-
-Writing "-1" to power/autosuspend and writing "on" to power/control do
-essentially the same thing -- they both prevent the device from being
-autosuspended.  Yes, this is a redundancy in the API.
+		number of milliseconds the device should remain idle
+		before the kernel will autosuspend it (the idle-delay
+		time).  The default is 2000.  0 means to autosuspend
+		as soon as the device becomes idle, and negative
+		values mean never to autosuspend.  You can write a
+		number to the file to change the autosuspend
+		idle-delay time.
+
+Writing "-1" to power/autosuspend_delay_ms and writing "on" to
+power/control do essentially the same thing -- they both prevent the
+device from being autosuspended.  Yes, this is a redundancy in the
+API.
 
 (In 2.6.21 writing "0" to power/autosuspend would prevent the device
 from being autosuspended; the behavior was changed in 2.6.22.  The
 power/autosuspend attribute did not exist prior to 2.6.21, and the
 power/level attribute did not exist prior to 2.6.22.  power/control
-was added in 2.6.34.)
+was added in 2.6.34, and power/autosuspend_delay_ms was added in
+2.6.37 but did not become functional until 2.6.38.)
 
 
 	Changing the default idle-delay time
 	------------------------------------
 
-The default autosuspend idle-delay time is controlled by a module
-parameter in usbcore.  You can specify the value when usbcore is
-loaded.  For example, to set it to 5 seconds instead of 2 you would
+The default autosuspend idle-delay time (in seconds) is controlled by
+a module parameter in usbcore.  You can specify the value when usbcore
+is loaded.  For example, to set it to 5 seconds instead of 2 you would
 do:
 
 	modprobe usbcore autosuspend=5
@@ -234,25 +242,23 @@ every device.
 
 If a driver knows that its device has proper suspend/resume support,
 it can enable autosuspend all by itself.  For example, the video
-driver for a laptop's webcam might do this, since these devices are
-rarely used and so should normally be autosuspended.
+driver for a laptop's webcam might do this (in recent kernels they
+do), since these devices are rarely used and so should normally be
+autosuspended.
 
 Sometimes it turns out that even when a device does work okay with
-autosuspend there are still problems.  For example, there are
-experimental patches adding autosuspend support to the usbhid driver,
-which manages keyboards and mice, among other things.  Tests with a
-number of keyboards showed that typing on a suspended keyboard, while
-causing the keyboard to do a remote wakeup all right, would
-nonetheless frequently result in lost keystrokes.  Tests with mice
-showed that some of them would issue a remote-wakeup request in
-response to button presses but not to motion, and some in response to
-neither.
+autosuspend there are still problems.  For example, the usbhid driver,
+which manages keyboards and mice, has autosuspend support.  Tests with
+a number of keyboards show that typing on a suspended keyboard, while
+causing the keyboard to do a remote wakeup all right, will nonetheless
+frequently result in lost keystrokes.  Tests with mice show that some
+of them will issue a remote-wakeup request in response to button
+presses but not to motion, and some in response to neither.
 
 The kernel will not prevent you from enabling autosuspend on devices
 that can't handle it.  It is even possible in theory to damage a
-device by suspending it at the wrong time -- for example, suspending a
-USB hard disk might cause it to spin down without parking the heads.
-(Highly unlikely, but possible.)  Take care.
+device by suspending it at the wrong time.  (Highly unlikely, but
+possible.)  Take care.
 
 
 	The driver interface for Power Management
@@ -336,10 +342,6 @@ autosuspend the interface's device.  When the usage counter is = 0
 then the interface is considered to be idle, and the kernel may
 autosuspend the device.
 
-(There is a similar usage counter field in struct usb_device,
-associated with the device itself rather than any of its interfaces.
-This counter is used only by the USB core.)
-
 Drivers need not be concerned about balancing changes to the usage
 counter; the USB core will undo any remaining "get"s when a driver
 is unbound from its interface.  As a corollary, drivers must not call
@@ -409,11 +411,11 @@ during autosuspend.  For example, there's not much point
 autosuspending a keyboard if the user can't cause the keyboard to do a
 remote wakeup by typing on it.  If the driver sets
 intf->needs_remote_wakeup to 1, the kernel won't autosuspend the
-device if remote wakeup isn't available or has been disabled through
-the power/wakeup attribute.  (If the device is already autosuspended,
-though, setting this flag won't cause the kernel to autoresume it.
-Normally a driver would set this flag in its probe method, at which
-time the device is guaranteed not to be autosuspended.)
+device if remote wakeup isn't available.  (If the device is already
+autosuspended, though, setting this flag won't cause the kernel to
+autoresume it.  Normally a driver would set this flag in its probe
+method, at which time the device is guaranteed not to be
+autosuspended.)
 
 If a driver does its I/O asynchronously in interrupt context, it
 should call usb_autopm_get_interface_async() before starting output and
@@ -422,20 +424,19 @@ it receives an input event, it should call
 
 	usb_mark_last_busy(struct usb_device *udev);
 
-in the event handler.  This sets udev->last_busy to the current time.
-udev->last_busy is the field used for idle-delay calculations;
-updating it will cause any pending autosuspend to be moved back.  Most
-of the usb_autopm_* routines will also set the last_busy field to the
-current time.
+in the event handler.  This tells the PM core that the device was just
+busy and therefore the next autosuspend idle-delay expiration should
+be pushed back.  Many of the usb_autopm_* routines also make this call,
+so drivers need to worry only when interrupt-driven input arrives.
 
 Asynchronous operation is always subject to races.  For example, a
-driver may call one of the usb_autopm_*_interface_async() routines at
-a time when the core has just finished deciding the device has been
-idle for long enough but not yet gotten around to calling the driver's
-suspend method.  The suspend method must be responsible for
-synchronizing with the output request routine and the URB completion
-handler; it should cause autosuspends to fail with -EBUSY if the
-driver needs to use the device.
+driver may call the usb_autopm_get_interface_async() routine at a time
+when the core has just finished deciding the device has been idle for
+long enough but not yet gotten around to calling the driver's suspend
+method.  The suspend method must be responsible for synchronizing with
+the I/O request routine and the URB completion handler; it should
+cause autosuspends to fail with -EBUSY if the driver needs to use the
+device.
 
 External suspend calls should never be allowed to fail in this way,
 only autosuspend calls.  The driver can tell them apart by checking
@@ -472,7 +473,9 @@ Firstly, a device may already be autosuspended when a system suspend
 occurs.  Since system suspends are supposed to be as transparent as
 possible, the device should remain suspended following the system
 resume.  But this theory may not work out well in practice; over time
-the kernel's behavior in this regard has changed.
+the kernel's behavior in this regard has changed.  As of 2.6.37 the
+policy is to resume all devices during a system resume and let them
+handle their own runtime suspends afterward.
 
 Secondly, a dynamic power-management event may occur as a system
 suspend is underway.  The window for this is short, since system
diff --git a/drivers/usb/core/driver.c b/drivers/usb/core/driver.c
index 0a63e968c683..43c25c29ac1f 100644
--- a/drivers/usb/core/driver.c
+++ b/drivers/usb/core/driver.c
@@ -1397,32 +1397,7 @@ void usb_autosuspend_device(struct usb_device *udev)
 	int	status;
 
 	usb_mark_last_busy(udev);
-	status = pm_runtime_put_sync(&udev->dev);
-	dev_vdbg(&udev->dev, "%s: cnt %d -> %d\n",
-			__func__, atomic_read(&udev->dev.power.usage_count),
-			status);
-}
-
-/**
- * usb_try_autosuspend_device - attempt an autosuspend of a USB device and its interfaces
- * @udev: the usb_device to autosuspend
- *
- * This routine should be called when a core subsystem thinks @udev may
- * be ready to autosuspend.
- *
- * @udev's usage counter left unchanged.  If it is 0 and all the interfaces
- * are inactive then an autosuspend will be attempted.  The attempt may
- * fail or be delayed.
- *
- * The caller must hold @udev's device lock.
- *
- * This routine can run only in process context.
- */
-void usb_try_autosuspend_device(struct usb_device *udev)
-{
-	int	status;
-
-	status = pm_runtime_idle(&udev->dev);
+	status = pm_runtime_put_sync_autosuspend(&udev->dev);
 	dev_vdbg(&udev->dev, "%s: cnt %d -> %d\n",
 			__func__, atomic_read(&udev->dev.power.usage_count),
 			status);
@@ -1508,32 +1483,11 @@ EXPORT_SYMBOL_GPL(usb_autopm_put_interface);
 void usb_autopm_put_interface_async(struct usb_interface *intf)
 {
 	struct usb_device	*udev = interface_to_usbdev(intf);
-	unsigned long		last_busy;
-	int			status = 0;
+	int			status;
 
-	last_busy = udev->dev.power.last_busy;
 	usb_mark_last_busy(udev);
 	atomic_dec(&intf->pm_usage_cnt);
-	pm_runtime_put_noidle(&intf->dev);
-
-	if (udev->dev.power.runtime_auto) {
-		/* Optimization: Don't schedule a delayed autosuspend if
-		 * the timer is already running and the expiration time
-		 * wouldn't change.
-		 *
-		 * We have to use the interface's timer.  Attempts to
-		 * schedule a suspend for the device would fail because
-		 * the interface is still active.
-		 */
-		if (intf->dev.power.timer_expires == 0 ||
-				round_jiffies_up(last_busy) !=
-				round_jiffies_up(jiffies)) {
-			status = pm_schedule_suspend(&intf->dev,
-					jiffies_to_msecs(
-					round_jiffies_up_relative(
-						udev->autosuspend_delay)));
-		}
-	}
+	status = pm_runtime_put(&intf->dev);
 	dev_vdbg(&intf->dev, "%s: cnt %d -> %d\n",
 			__func__, atomic_read(&intf->dev.power.usage_count),
 			status);
@@ -1651,7 +1605,6 @@ static int autosuspend_check(struct usb_device *udev)
 {
 	int			w, i;
 	struct usb_interface	*intf;
-	unsigned long		suspend_time, j;
 
 	/* Fail if autosuspend is disabled, or any interfaces are in use, or
 	 * any interface drivers require remote wakeup but it isn't available.
@@ -1691,17 +1644,6 @@ static int autosuspend_check(struct usb_device *udev)
 		return -EOPNOTSUPP;
 	}
 	udev->do_remote_wakeup = w;
-
-	/* If everything is okay but the device hasn't been idle for long
-	 * enough, queue a delayed autosuspend request.
-	 */
-	j = ACCESS_ONCE(jiffies);
-	suspend_time = udev->dev.power.last_busy + udev->autosuspend_delay;
-	if (time_before(j, suspend_time)) {
-		pm_schedule_suspend(&udev->dev, jiffies_to_msecs(
-				round_jiffies_up_relative(suspend_time - j)));
-		return -EAGAIN;
-	}
 	return 0;
 }
 
@@ -1719,17 +1661,8 @@ static int usb_runtime_suspend(struct device *dev)
 
 	status = usb_suspend_both(udev, PMSG_AUTO_SUSPEND);
 
-	/* If an interface fails the suspend, adjust the last_busy
-	 * time so that we don't get another suspend attempt right
-	 * away.
-	 */
-	if (status) {
-		udev->dev.power.last_busy = jiffies +
-				(udev->autosuspend_delay == 0 ? HZ/2 : 0);
-	}
-
 	/* Prevent the parent from suspending immediately after */
-	else if (udev->parent)
+	if (status == 0 && udev->parent)
 		usb_mark_last_busy(udev->parent);
 
 	return status;
@@ -1756,7 +1689,7 @@ static int usb_runtime_idle(struct device *dev)
 	 * autosuspend checks.
 	 */
 	if (autosuspend_check(udev) == 0)
-		pm_runtime_suspend(dev);
+		pm_runtime_autosuspend(dev);
 	return 0;
 }
 
diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c
index 7c2405eccc4b..fdb62ca10d86 100644
--- a/drivers/usb/core/hub.c
+++ b/drivers/usb/core/hub.c
@@ -1803,6 +1803,7 @@ int usb_new_device(struct usb_device *udev)
 
 	/* Tell the runtime-PM framework the device is active */
 	pm_runtime_set_active(&udev->dev);
+	pm_runtime_use_autosuspend(&udev->dev);
 	pm_runtime_enable(&udev->dev);
 
 	err = usb_enumerate_device(udev);	/* Read descriptors */
diff --git a/drivers/usb/core/quirks.c b/drivers/usb/core/quirks.c
index 25719da45e33..e3531da16137 100644
--- a/drivers/usb/core/quirks.c
+++ b/drivers/usb/core/quirks.c
@@ -124,12 +124,6 @@ void usb_detect_quirks(struct usb_device *udev)
 	 */
 	usb_disable_autosuspend(udev);
 
-	/* Autosuspend can also be disabled if the initial autosuspend_delay
-	 * is negative.
-	 */
-	if (udev->autosuspend_delay < 0)
-		usb_autoresume_device(udev);
-
 #endif
 
 	/* For the present, all devices default to USB-PERSIST enabled */
diff --git a/drivers/usb/core/sysfs.c b/drivers/usb/core/sysfs.c
index 9561e087907d..6781c369ce2d 100644
--- a/drivers/usb/core/sysfs.c
+++ b/drivers/usb/core/sysfs.c
@@ -334,44 +334,20 @@ static DEVICE_ATTR(active_duration, S_IRUGO, show_active_duration, NULL);
 static ssize_t
 show_autosuspend(struct device *dev, struct device_attribute *attr, char *buf)
 {
-	struct usb_device *udev = to_usb_device(dev);
-
-	return sprintf(buf, "%d\n", udev->autosuspend_delay / HZ);
+	return sprintf(buf, "%d\n", dev->power.autosuspend_delay / 1000);
 }
 
 static ssize_t
 set_autosuspend(struct device *dev, struct device_attribute *attr,
 		const char *buf, size_t count)
 {
-	struct usb_device *udev = to_usb_device(dev);
-	int value, old_delay;
-	int rc;
+	int value;
 
-	if (sscanf(buf, "%d", &value) != 1 || value >= INT_MAX/HZ ||
-			value <= - INT_MAX/HZ)
+	if (sscanf(buf, "%d", &value) != 1 || value >= INT_MAX/1000 ||
+			value <= -INT_MAX/1000)
 		return -EINVAL;
-	value *= HZ;
-
-	usb_lock_device(udev);
-	old_delay = udev->autosuspend_delay;
-	udev->autosuspend_delay = value;
-
-	if (old_delay < 0) {	/* Autosuspend wasn't allowed */
-		if (value >= 0)
-			usb_autosuspend_device(udev);
-	} else {		/* Autosuspend was allowed */
-		if (value < 0) {
-			rc = usb_autoresume_device(udev);
-			if (rc < 0) {
-				count = rc;
-				udev->autosuspend_delay = old_delay;
-			}
-		} else {
-			usb_try_autosuspend_device(udev);
-		}
-	}
 
-	usb_unlock_device(udev);
+	pm_runtime_set_autosuspend_delay(dev, value * 1000);
 	return count;
 }
 
diff --git a/drivers/usb/core/usb.c b/drivers/usb/core/usb.c
index fdd4130fbb7d..079cb57bab4f 100644
--- a/drivers/usb/core/usb.c
+++ b/drivers/usb/core/usb.c
@@ -445,7 +445,8 @@ struct usb_device *usb_alloc_dev(struct usb_device *parent,
 	INIT_LIST_HEAD(&dev->filelist);
 
 #ifdef	CONFIG_PM
-	dev->autosuspend_delay = usb_autosuspend_delay * HZ;
+	pm_runtime_set_autosuspend_delay(&dev->dev,
+			usb_autosuspend_delay * 1000);
 	dev->connect_time = jiffies;
 	dev->active_duration = -jiffies;
 #endif
diff --git a/drivers/usb/core/usb.h b/drivers/usb/core/usb.h
index cd882203ad34..b975450f403e 100644
--- a/drivers/usb/core/usb.h
+++ b/drivers/usb/core/usb.h
@@ -75,14 +75,12 @@ static inline int usb_port_resume(struct usb_device *udev, pm_message_t msg)
 #ifdef CONFIG_USB_SUSPEND
 
 extern void usb_autosuspend_device(struct usb_device *udev);
-extern void usb_try_autosuspend_device(struct usb_device *udev);
 extern int usb_autoresume_device(struct usb_device *udev);
 extern int usb_remote_wakeup(struct usb_device *dev);
 
 #else
 
 #define usb_autosuspend_device(udev)		do {} while (0)
-#define usb_try_autosuspend_device(udev)	do {} while (0)
 static inline int usb_autoresume_device(struct usb_device *udev)
 {
 	return 0;
diff --git a/include/linux/usb.h b/include/linux/usb.h
index 7d22b3340a7f..5ee2223af08c 100644
--- a/include/linux/usb.h
+++ b/include/linux/usb.h
@@ -408,7 +408,6 @@ struct usb_tt;
  * @quirks: quirks of the whole device
  * @urbnum: number of URBs submitted for the whole device
  * @active_duration: total time device is not suspended
- * @autosuspend_delay: in jiffies
  * @connect_time: time device was first connected
  * @do_remote_wakeup:  remote wakeup should be enabled
  * @reset_resume: needs reset instead of resume
@@ -481,7 +480,6 @@ struct usb_device {
 	unsigned long active_duration;
 
 #ifdef CONFIG_PM
-	int autosuspend_delay;
 	unsigned long connect_time;
 
 	unsigned do_remote_wakeup:1;
-- 
cgit v1.2.3-71-gd317


From da6836500414ae734cd9873c2d553db594f831e9 Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Tue, 16 Nov 2010 11:52:38 +0000
Subject: netfilter: allow hooks to pass error code back up the stack

SELinux would like to pass certain fatal errors back up the stack.  This patch
implements the generic netfilter support for this functionality.

Based-on-patch-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: Eric Paris <eparis@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netfilter.h | 2 ++
 net/netfilter/core.c      | 6 ++++--
 2 files changed, 6 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h
index 03317c8d4077..1893837b3966 100644
--- a/include/linux/netfilter.h
+++ b/include/linux/netfilter.h
@@ -33,6 +33,8 @@
 
 #define NF_QUEUE_NR(x) ((((x) << NF_VERDICT_BITS) & NF_VERDICT_QMASK) | NF_QUEUE)
 
+#define NF_DROP_ERR(x) (((-x) << NF_VERDICT_BITS) | NF_DROP)
+
 /* only for userspace compatibility */
 #ifndef __KERNEL__
 /* Generic cache responses from hook functions.
diff --git a/net/netfilter/core.c b/net/netfilter/core.c
index 85dabb86be6f..32fcbe290c04 100644
--- a/net/netfilter/core.c
+++ b/net/netfilter/core.c
@@ -173,9 +173,11 @@ next_hook:
 			     outdev, &elem, okfn, hook_thresh);
 	if (verdict == NF_ACCEPT || verdict == NF_STOP) {
 		ret = 1;
-	} else if (verdict == NF_DROP) {
+	} else if ((verdict & NF_VERDICT_MASK) == NF_DROP) {
 		kfree_skb(skb);
-		ret = -EPERM;
+		ret = -(verdict >> NF_VERDICT_BITS);
+		if (ret == 0)
+			ret = -EPERM;
 	} else if ((verdict & NF_VERDICT_MASK) == NF_QUEUE) {
 		if (!nf_queue(skb, elem, pf, hook, indev, outdev, okfn,
 			      verdict >> NF_VERDICT_BITS))
-- 
cgit v1.2.3-71-gd317


From f8ff182c716c6f11ca3061961f5722f26a14e101 Mon Sep 17 00:00:00 2001
From: Thomas Graf <tgraf@infradead.org>
Date: Tue, 16 Nov 2010 04:30:14 +0000
Subject: rtnetlink: Link address family API

Each net_device contains address family specific data such as
per device settings and statistics. We already expose this data
via procfs/sysfs and partially netlink.

The netlink method requires the requester to send one RTM_GETLINK
request for each address family it wishes to receive data of
and then merge this data itself.

This patch implements a new API which combines all address family
specific link data in a new netlink attribute IFLA_AF_SPEC.
IFLA_AF_SPEC contains a sequence of nested attributes, one for each
address family which in turn defines the structure of its own
attribute. Example:

   [IFLA_AF_SPEC] = {
       [AF_INET] = {
           [IFLA_INET_CONF] = ...,
       },
       [AF_INET6] = {
           [IFLA_INET6_FLAGS] = ...,
           [IFLA_INET6_CONF] = ...,
       }
   }

The API also allows for address families to implement a function
which parses the IFLA_AF_SPEC attribute sent by userspace to
implement address family specific link options.

Signed-off-by: Thomas Graf <tgraf@infradead.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/if_link.h |  19 +++++++
 include/net/rtnetlink.h |  31 ++++++++++
 net/core/rtnetlink.c    | 147 +++++++++++++++++++++++++++++++++++++++++++++++-
 3 files changed, 195 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/if_link.h b/include/linux/if_link.h
index 2fc66dd783ee..443d04a66a79 100644
--- a/include/linux/if_link.h
+++ b/include/linux/if_link.h
@@ -80,6 +80,24 @@ struct rtnl_link_ifmap {
 	__u8	port;
 };
 
+/*
+ * IFLA_AF_SPEC
+ *   Contains nested attributes for address family specific attributes.
+ *   Each address family may create a attribute with the address family
+ *   number as type and create its own attribute structure in it.
+ *
+ *   Example:
+ *   [IFLA_AF_SPEC] = {
+ *       [AF_INET] = {
+ *           [IFLA_INET_CONF] = ...,
+ *       },
+ *       [AF_INET6] = {
+ *           [IFLA_INET6_FLAGS] = ...,
+ *           [IFLA_INET6_CONF] = ...,
+ *       }
+ *   }
+ */
+
 enum {
 	IFLA_UNSPEC,
 	IFLA_ADDRESS,
@@ -116,6 +134,7 @@ enum {
 	IFLA_STATS64,
 	IFLA_VF_PORTS,
 	IFLA_PORT_SELF,
+	IFLA_AF_SPEC,
 	__IFLA_MAX
 };
 
diff --git a/include/net/rtnetlink.h b/include/net/rtnetlink.h
index e013c68bfb00..35be0bbcd7da 100644
--- a/include/net/rtnetlink.h
+++ b/include/net/rtnetlink.h
@@ -83,6 +83,37 @@ extern void	__rtnl_link_unregister(struct rtnl_link_ops *ops);
 extern int	rtnl_link_register(struct rtnl_link_ops *ops);
 extern void	rtnl_link_unregister(struct rtnl_link_ops *ops);
 
+/**
+ * 	struct rtnl_af_ops - rtnetlink address family operations
+ *
+ *	@list: Used internally
+ * 	@family: Address family
+ * 	@fill_link_af: Function to fill IFLA_AF_SPEC with address family
+ * 		       specific netlink attributes.
+ * 	@get_link_af_size: Function to calculate size of address family specific
+ * 			   netlink attributes exlusive the container attribute.
+ * 	@parse_link_af: Function to parse a IFLA_AF_SPEC attribute and modify
+ *			net_device accordingly.
+ */
+struct rtnl_af_ops {
+	struct list_head	list;
+	int			family;
+
+	int			(*fill_link_af)(struct sk_buff *skb,
+						const struct net_device *dev);
+	size_t			(*get_link_af_size)(const struct net_device *dev);
+
+	int			(*parse_link_af)(struct net_device *dev,
+						 const struct nlattr *attr);
+};
+
+extern int	__rtnl_af_register(struct rtnl_af_ops *ops);
+extern void	__rtnl_af_unregister(struct rtnl_af_ops *ops);
+
+extern int	rtnl_af_register(struct rtnl_af_ops *ops);
+extern void	rtnl_af_unregister(struct rtnl_af_ops *ops);
+
+
 extern struct net *rtnl_link_get_net(struct net *src_net, struct nlattr *tb[]);
 extern struct net_device *rtnl_create_link(struct net *src_net, struct net *net,
 	char *ifname, const struct rtnl_link_ops *ops, struct nlattr *tb[]);
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 841c287ef40a..bf69e5871b1a 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -362,6 +362,95 @@ static size_t rtnl_link_get_size(const struct net_device *dev)
 	return size;
 }
 
+static LIST_HEAD(rtnl_af_ops);
+
+static const struct rtnl_af_ops *rtnl_af_lookup(const int family)
+{
+	const struct rtnl_af_ops *ops;
+
+	list_for_each_entry(ops, &rtnl_af_ops, list) {
+		if (ops->family == family)
+			return ops;
+	}
+
+	return NULL;
+}
+
+/**
+ * __rtnl_af_register - Register rtnl_af_ops with rtnetlink.
+ * @ops: struct rtnl_af_ops * to register
+ *
+ * The caller must hold the rtnl_mutex.
+ *
+ * Returns 0 on success or a negative error code.
+ */
+int __rtnl_af_register(struct rtnl_af_ops *ops)
+{
+	list_add_tail(&ops->list, &rtnl_af_ops);
+	return 0;
+}
+EXPORT_SYMBOL_GPL(__rtnl_af_register);
+
+/**
+ * rtnl_af_register - Register rtnl_af_ops with rtnetlink.
+ * @ops: struct rtnl_af_ops * to register
+ *
+ * Returns 0 on success or a negative error code.
+ */
+int rtnl_af_register(struct rtnl_af_ops *ops)
+{
+	int err;
+
+	rtnl_lock();
+	err = __rtnl_af_register(ops);
+	rtnl_unlock();
+	return err;
+}
+EXPORT_SYMBOL_GPL(rtnl_af_register);
+
+/**
+ * __rtnl_af_unregister - Unregister rtnl_af_ops from rtnetlink.
+ * @ops: struct rtnl_af_ops * to unregister
+ *
+ * The caller must hold the rtnl_mutex.
+ */
+void __rtnl_af_unregister(struct rtnl_af_ops *ops)
+{
+	list_del(&ops->list);
+}
+EXPORT_SYMBOL_GPL(__rtnl_af_unregister);
+
+/**
+ * rtnl_af_unregister - Unregister rtnl_af_ops from rtnetlink.
+ * @ops: struct rtnl_af_ops * to unregister
+ */
+void rtnl_af_unregister(struct rtnl_af_ops *ops)
+{
+	rtnl_lock();
+	__rtnl_af_unregister(ops);
+	rtnl_unlock();
+}
+EXPORT_SYMBOL_GPL(rtnl_af_unregister);
+
+static size_t rtnl_link_get_af_size(const struct net_device *dev)
+{
+	struct rtnl_af_ops *af_ops;
+	size_t size;
+
+	/* IFLA_AF_SPEC */
+	size = nla_total_size(sizeof(struct nlattr));
+
+	list_for_each_entry(af_ops, &rtnl_af_ops, list) {
+		if (af_ops->get_link_af_size) {
+			/* AF_* + nested data */
+			size += nla_total_size(sizeof(struct nlattr)) +
+				af_ops->get_link_af_size(dev);
+		}
+	}
+
+	return size;
+}
+
 static int rtnl_link_fill(struct sk_buff *skb, const struct net_device *dev)
 {
 	const struct rtnl_link_ops *ops = dev->rtnl_link_ops;
@@ -671,7 +760,8 @@ static noinline size_t if_nlmsg_size(const struct net_device *dev)
 	       + nla_total_size(4) /* IFLA_NUM_VF */
 	       + rtnl_vfinfo_size(dev) /* IFLA_VFINFO_LIST */
 	       + rtnl_port_size(dev) /* IFLA_VF_PORTS + IFLA_PORT_SELF */
-	       + rtnl_link_get_size(dev); /* IFLA_LINKINFO */
+	       + rtnl_link_get_size(dev) /* IFLA_LINKINFO */
+	       + rtnl_link_get_af_size(dev); /* IFLA_AF_SPEC */
 }
 
 static int rtnl_vf_ports_fill(struct sk_buff *skb, struct net_device *dev)
@@ -757,7 +847,8 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
 	struct nlmsghdr *nlh;
 	struct rtnl_link_stats64 temp;
 	const struct rtnl_link_stats64 *stats;
-	struct nlattr *attr;
+	struct nlattr *attr, *af_spec;
+	struct rtnl_af_ops *af_ops;
 
 	nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ifm), flags);
 	if (nlh == NULL)
@@ -866,6 +957,36 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
 			goto nla_put_failure;
 	}
 
+	if (!(af_spec = nla_nest_start(skb, IFLA_AF_SPEC)))
+		goto nla_put_failure;
+
+	list_for_each_entry(af_ops, &rtnl_af_ops, list) {
+		if (af_ops->fill_link_af) {
+			struct nlattr *af;
+			int err;
+
+			if (!(af = nla_nest_start(skb, af_ops->family)))
+				goto nla_put_failure;
+
+			err = af_ops->fill_link_af(skb, dev);
+
+			/*
+			 * Caller may return ENODATA to indicate that there
+			 * was no data to be dumped. This is not an error, it
+			 * means we should trim the attribute header and
+			 * continue.
+			 */
+			if (err == -ENODATA)
+				nla_nest_cancel(skb, af);
+			else if (err < 0)
+				goto nla_put_failure;
+
+			nla_nest_end(skb, af);
+		}
+	}
+
+	nla_nest_end(skb, af_spec);
+
 	return nlmsg_end(skb, nlh);
 
 nla_put_failure:
@@ -924,6 +1045,7 @@ const struct nla_policy ifla_policy[IFLA_MAX+1] = {
 	[IFLA_VFINFO_LIST]	= {. type = NLA_NESTED },
 	[IFLA_VF_PORTS]		= { .type = NLA_NESTED },
 	[IFLA_PORT_SELF]	= { .type = NLA_NESTED },
+	[IFLA_AF_SPEC]		= { .type = NLA_NESTED },
 };
 EXPORT_SYMBOL(ifla_policy);
 
@@ -1225,6 +1347,27 @@ static int do_setlink(struct net_device *dev, struct ifinfomsg *ifm,
 			goto errout;
 		modified = 1;
 	}
+
+	if (tb[IFLA_AF_SPEC]) {
+		struct nlattr *af;
+		int rem;
+
+		nla_for_each_nested(af, tb[IFLA_AF_SPEC], rem) {
+			const struct rtnl_af_ops *af_ops;
+
+			if (!(af_ops = rtnl_af_lookup(nla_type(af))))
+				continue;
+
+			if (!af_ops->parse_link_af)
+				continue;
+
+			err = af_ops->parse_link_af(dev, af);
+			if (err < 0)
+				goto errout;
+
+			modified = 1;
+		}
+	}
 	err = 0;
 
 errout:
-- 
cgit v1.2.3-71-gd317


From ca7479ebbd9f7621646bf2792cb7143647f035bb Mon Sep 17 00:00:00 2001
From: Thomas Graf <tgraf@infradead.org>
Date: Tue, 16 Nov 2010 04:31:20 +0000
Subject: inet: Define IPV4_DEVCONF_MAX

Define IPV4_DEVCONF_MAX to get rid of MAX - 1 notation.

Signed-off-by: Thomas Graf <tgraf@infradead.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/inetdevice.h | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/inetdevice.h b/include/linux/inetdevice.h
index 380ba6bc5db1..2b86eaf11773 100644
--- a/include/linux/inetdevice.h
+++ b/include/linux/inetdevice.h
@@ -41,10 +41,12 @@ enum
 	__IPV4_DEVCONF_MAX
 };
 
+#define IPV4_DEVCONF_MAX (__IPV4_DEVCONF_MAX - 1)
+
 struct ipv4_devconf {
 	void	*sysctl;
-	int	data[__IPV4_DEVCONF_MAX - 1];
-	DECLARE_BITMAP(state, __IPV4_DEVCONF_MAX - 1);
+	int	data[IPV4_DEVCONF_MAX];
+	DECLARE_BITMAP(state, IPV4_DEVCONF_MAX);
 };
 
 struct in_device {
@@ -90,7 +92,7 @@ static inline void ipv4_devconf_set(struct in_device *in_dev, int index,
 
 static inline void ipv4_devconf_setall(struct in_device *in_dev)
 {
-	bitmap_fill(in_dev->cnf.state, __IPV4_DEVCONF_MAX - 1);
+	bitmap_fill(in_dev->cnf.state, IPV4_DEVCONF_MAX);
 }
 
 #define IN_DEV_CONF_GET(in_dev, attr) \
-- 
cgit v1.2.3-71-gd317


From 9f0f7272ac9506f4c8c05cc597b7e376b0b9f3e4 Mon Sep 17 00:00:00 2001
From: Thomas Graf <tgraf@infradead.org>
Date: Tue, 16 Nov 2010 04:32:48 +0000
Subject: ipv4: AF_INET link address family

Implements the AF_INET link address family exposing the per
device configuration settings via netlink using the attribute
IFLA_INET_CONF.

The format of IFLA_INET_CONF differs depending on the direction
the attribute is sent. The attribute sent by the kernel consists
of a u32 array, basically a 1:1 copy of in_device->cnf.data[].
The attribute expected by the kernel must consist of a sequence
of nested u32 attributes, each representing a change request,
e.g.
	[IFLA_INET_CONF] = {
		[IPV4_DEVCONF_FORWARDING] = 1,
		[IPV4_DEVCONF_NOXFRM] = 0,
	}

libnl userspace API documentation and example available from:
http://www.infradead.org/~tgr/libnl/doc-git/group__link__inet.html

Signed-off-by: Thomas Graf <tgraf@infradead.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/if_link.h |  8 ++++++
 net/ipv4/devinet.c      | 75 +++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 83 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/if_link.h b/include/linux/if_link.h
index 443d04a66a79..2e02e4d7b11e 100644
--- a/include/linux/if_link.h
+++ b/include/linux/if_link.h
@@ -147,6 +147,14 @@ enum {
 #define IFLA_PAYLOAD(n) NLMSG_PAYLOAD(n,sizeof(struct ifinfomsg))
 #endif
 
+enum {
+	IFLA_INET_UNSPEC,
+	IFLA_INET_CONF,
+	__IFLA_INET_MAX,
+};
+
+#define IFLA_INET_MAX (__IFLA_INET_MAX - 1)
+
 /* ifi_flags.
 
    IFF_* flags.
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index dc94b0316b78..71afc26c2df8 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -1256,6 +1256,72 @@ errout:
 		rtnl_set_sk_err(net, RTNLGRP_IPV4_IFADDR, err);
 }
 
+static size_t inet_get_link_af_size(const struct net_device *dev)
+{
+	struct in_device *in_dev = __in_dev_get_rcu(dev);
+
+	if (!in_dev)
+		return 0;
+
+	return nla_total_size(IPV4_DEVCONF_MAX * 4); /* IFLA_INET_CONF */
+}
+
+static int inet_fill_link_af(struct sk_buff *skb, const struct net_device *dev)
+{
+	struct in_device *in_dev = __in_dev_get_rcu(dev);
+	struct nlattr *nla;
+	int i;
+
+	if (!in_dev)
+		return -ENODATA;
+
+	nla = nla_reserve(skb, IFLA_INET_CONF, IPV4_DEVCONF_MAX * 4);
+	if (nla == NULL)
+		return -EMSGSIZE;
+
+	for (i = 0; i < IPV4_DEVCONF_MAX; i++)
+		((u32 *) nla_data(nla))[i] = in_dev->cnf.data[i];
+
+	return 0;
+}
+
+static const struct nla_policy inet_af_policy[IFLA_INET_MAX+1] = {
+	[IFLA_INET_CONF]	= { .type = NLA_NESTED },
+};
+
+static int inet_parse_link_af(struct net_device *dev, const struct nlattr *nla)
+{
+	struct in_device *in_dev = __in_dev_get_rcu(dev);
+	struct nlattr *a, *tb[IFLA_INET_MAX+1];
+	int err, rem;
+
+	if (!in_dev)
+		return -EOPNOTSUPP;
+
+	err = nla_parse_nested(tb, IFLA_INET_MAX, nla, inet_af_policy);
+	if (err < 0)
+		return err;
+
+	if (tb[IFLA_INET_CONF]) {
+		nla_for_each_nested(a, tb[IFLA_INET_CONF], rem) {
+			int cfgid = nla_type(a);
+
+			if (nla_len(a) < 4)
+				return -EINVAL;
+
+			if (cfgid <= 0 || cfgid > IPV4_DEVCONF_MAX)
+				return -EINVAL;
+		}
+	}
+
+	if (tb[IFLA_INET_CONF]) {
+		nla_for_each_nested(a, tb[IFLA_INET_CONF], rem)
+			ipv4_devconf_set(in_dev, nla_type(a), nla_get_u32(a));
+	}
+
+	return 0;
+}
+
 #ifdef CONFIG_SYSCTL
 
 static void devinet_copy_dflt_conf(struct net *net, int i)
@@ -1619,6 +1685,13 @@ static __net_initdata struct pernet_operations devinet_ops = {
 	.exit = devinet_exit_net,
 };
 
+static struct rtnl_af_ops inet_af_ops = {
+	.family		  = AF_INET,
+	.fill_link_af	  = inet_fill_link_af,
+	.get_link_af_size = inet_get_link_af_size,
+	.parse_link_af	  = inet_parse_link_af,
+};
+
 void __init devinet_init(void)
 {
 	register_pernet_subsys(&devinet_ops);
@@ -1626,6 +1699,8 @@ void __init devinet_init(void)
 	register_gifconf(PF_INET, inet_gifconf);
 	register_netdevice_notifier(&ip_netdev_notifier);
 
+	rtnl_af_register(&inet_af_ops);
+
 	rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL);
 	rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL);
 	rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr);
-- 
cgit v1.2.3-71-gd317


From 6b6e39a6a8da7234c538d14c43d3583da8875f9c Mon Sep 17 00:00:00 2001
From: Kay Sievers <kay.sievers@vrfy.org>
Date: Mon, 15 Nov 2010 23:13:18 +0100
Subject: driver-core: merge private parts of class and bus

As classes and busses are pretty much the same thing, and we want to
merge them together into a 'subsystem' in the future, let us share the
same private data parts to make that merge easier.

Signed-off-by: Kay Sievers <kay.sievers@vrfy.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/base/base.h    | 62 ++++++++++++++++++++------------------------------
 drivers/base/bus.c     | 13 +++++------
 drivers/base/class.c   | 38 +++++++++++++++----------------
 drivers/base/core.c    | 22 +++++++++---------
 include/linux/device.h |  7 +++---
 5 files changed, 64 insertions(+), 78 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/base/base.h b/drivers/base/base.h
index 2ca7f5b7b824..19f49e41ce5d 100644
--- a/drivers/base/base.h
+++ b/drivers/base/base.h
@@ -1,31 +1,46 @@
 
 /**
- * struct bus_type_private - structure to hold the private to the driver core portions of the bus_type structure.
+ * struct subsys_private - structure to hold the private to the driver core portions of the bus_type/class structure.
  *
- * @subsys - the struct kset that defines this bus.  This is the main kobject
- * @drivers_kset - the list of drivers associated with this bus
- * @devices_kset - the list of devices associated with this bus
+ * @subsys - the struct kset that defines this subsystem
+ * @devices_kset - the list of devices associated
+ *
+ * @drivers_kset - the list of drivers associated
  * @klist_devices - the klist to iterate over the @devices_kset
  * @klist_drivers - the klist to iterate over the @drivers_kset
  * @bus_notifier - the bus notifier list for anything that cares about things
- * on this bus.
+ *                 on this bus.
  * @bus - pointer back to the struct bus_type that this structure is associated
- * with.
+ *        with.
+ *
+ * @class_interfaces - list of class_interfaces associated
+ * @glue_dirs - "glue" directory to put in-between the parent device to
+ *              avoid namespace conflicts
+ * @class_mutex - mutex to protect the children, devices, and interfaces lists.
+ * @class - pointer back to the struct class that this structure is associated
+ *          with.
  *
  * This structure is the one that is the actual kobject allowing struct
- * bus_type to be statically allocated safely.  Nothing outside of the driver
- * core should ever touch these fields.
+ * bus_type/class to be statically allocated safely.  Nothing outside of the
+ * driver core should ever touch these fields.
  */
-struct bus_type_private {
+struct subsys_private {
 	struct kset subsys;
-	struct kset *drivers_kset;
 	struct kset *devices_kset;
+
+	struct kset *drivers_kset;
 	struct klist klist_devices;
 	struct klist klist_drivers;
 	struct blocking_notifier_head bus_notifier;
 	unsigned int drivers_autoprobe:1;
 	struct bus_type *bus;
+
+	struct list_head class_interfaces;
+	struct kset glue_dirs;
+	struct mutex class_mutex;
+	struct class *class;
 };
+#define to_subsys_private(obj) container_of(obj, struct subsys_private, subsys.kobj)
 
 struct driver_private {
 	struct kobject kobj;
@@ -36,33 +51,6 @@ struct driver_private {
 };
 #define to_driver(obj) container_of(obj, struct driver_private, kobj)
 
-
-/**
- * struct class_private - structure to hold the private to the driver core portions of the class structure.
- *
- * @class_subsys - the struct kset that defines this class.  This is the main kobject
- * @class_devices - list of devices associated with this class
- * @class_interfaces - list of class_interfaces associated with this class
- * @class_dirs - "glue" directory for virtual devices associated with this class
- * @class_mutex - mutex to protect the children, devices, and interfaces lists.
- * @class - pointer back to the struct class that this structure is associated
- * with.
- *
- * This structure is the one that is the actual kobject allowing struct
- * class to be statically allocated safely.  Nothing outside of the driver
- * core should ever touch these fields.
- */
-struct class_private {
-	struct kset class_subsys;
-	struct klist class_devices;
-	struct list_head class_interfaces;
-	struct kset class_dirs;
-	struct mutex class_mutex;
-	struct class *class;
-};
-#define to_class(obj)	\
-	container_of(obj, struct class_private, class_subsys.kobj)
-
 /**
  * struct device_private - structure to hold the private to the driver core portions of the device structure.
  *
diff --git a/drivers/base/bus.c b/drivers/base/bus.c
index 33c270a64db7..e243bd49764b 100644
--- a/drivers/base/bus.c
+++ b/drivers/base/bus.c
@@ -20,7 +20,6 @@
 #include "power/power.h"
 
 #define to_bus_attr(_attr) container_of(_attr, struct bus_attribute, attr)
-#define to_bus(obj) container_of(obj, struct bus_type_private, subsys.kobj)
 
 /*
  * sysfs bindings for drivers
@@ -96,11 +95,11 @@ static ssize_t bus_attr_show(struct kobject *kobj, struct attribute *attr,
 			     char *buf)
 {
 	struct bus_attribute *bus_attr = to_bus_attr(attr);
-	struct bus_type_private *bus_priv = to_bus(kobj);
+	struct subsys_private *subsys_priv = to_subsys_private(kobj);
 	ssize_t ret = 0;
 
 	if (bus_attr->show)
-		ret = bus_attr->show(bus_priv->bus, buf);
+		ret = bus_attr->show(subsys_priv->bus, buf);
 	return ret;
 }
 
@@ -108,11 +107,11 @@ static ssize_t bus_attr_store(struct kobject *kobj, struct attribute *attr,
 			      const char *buf, size_t count)
 {
 	struct bus_attribute *bus_attr = to_bus_attr(attr);
-	struct bus_type_private *bus_priv = to_bus(kobj);
+	struct subsys_private *subsys_priv = to_subsys_private(kobj);
 	ssize_t ret = 0;
 
 	if (bus_attr->store)
-		ret = bus_attr->store(bus_priv->bus, buf, count);
+		ret = bus_attr->store(subsys_priv->bus, buf, count);
 	return ret;
 }
 
@@ -858,9 +857,9 @@ static BUS_ATTR(uevent, S_IWUSR, NULL, bus_uevent_store);
 int bus_register(struct bus_type *bus)
 {
 	int retval;
-	struct bus_type_private *priv;
+	struct subsys_private *priv;
 
-	priv = kzalloc(sizeof(struct bus_type_private), GFP_KERNEL);
+	priv = kzalloc(sizeof(struct subsys_private), GFP_KERNEL);
 	if (!priv)
 		return -ENOMEM;
 
diff --git a/drivers/base/class.c b/drivers/base/class.c
index 7975a52bdf5b..4f1df2e8fd74 100644
--- a/drivers/base/class.c
+++ b/drivers/base/class.c
@@ -27,7 +27,7 @@ static ssize_t class_attr_show(struct kobject *kobj, struct attribute *attr,
 			       char *buf)
 {
 	struct class_attribute *class_attr = to_class_attr(attr);
-	struct class_private *cp = to_class(kobj);
+	struct subsys_private *cp = to_subsys_private(kobj);
 	ssize_t ret = -EIO;
 
 	if (class_attr->show)
@@ -39,7 +39,7 @@ static ssize_t class_attr_store(struct kobject *kobj, struct attribute *attr,
 				const char *buf, size_t count)
 {
 	struct class_attribute *class_attr = to_class_attr(attr);
-	struct class_private *cp = to_class(kobj);
+	struct subsys_private *cp = to_subsys_private(kobj);
 	ssize_t ret = -EIO;
 
 	if (class_attr->store)
@@ -49,7 +49,7 @@ static ssize_t class_attr_store(struct kobject *kobj, struct attribute *attr,
 
 static void class_release(struct kobject *kobj)
 {
-	struct class_private *cp = to_class(kobj);
+	struct subsys_private *cp = to_subsys_private(kobj);
 	struct class *class = cp->class;
 
 	pr_debug("class '%s': release.\n", class->name);
@@ -65,7 +65,7 @@ static void class_release(struct kobject *kobj)
 
 static const struct kobj_ns_type_operations *class_child_ns_type(struct kobject *kobj)
 {
-	struct class_private *cp = to_class(kobj);
+	struct subsys_private *cp = to_subsys_private(kobj);
 	struct class *class = cp->class;
 
 	return class->ns_type;
@@ -82,7 +82,7 @@ static struct kobj_type class_ktype = {
 	.child_ns_type	= class_child_ns_type,
 };
 
-/* Hotplug events for classes go to the class class_subsys */
+/* Hotplug events for classes go to the class subsys */
 static struct kset *class_kset;
 
 
@@ -90,7 +90,7 @@ int class_create_file(struct class *cls, const struct class_attribute *attr)
 {
 	int error;
 	if (cls)
-		error = sysfs_create_file(&cls->p->class_subsys.kobj,
+		error = sysfs_create_file(&cls->p->subsys.kobj,
 					  &attr->attr);
 	else
 		error = -EINVAL;
@@ -100,20 +100,20 @@ int class_create_file(struct class *cls, const struct class_attribute *attr)
 void class_remove_file(struct class *cls, const struct class_attribute *attr)
 {
 	if (cls)
-		sysfs_remove_file(&cls->p->class_subsys.kobj, &attr->attr);
+		sysfs_remove_file(&cls->p->subsys.kobj, &attr->attr);
 }
 
 static struct class *class_get(struct class *cls)
 {
 	if (cls)
-		kset_get(&cls->p->class_subsys);
+		kset_get(&cls->p->subsys);
 	return cls;
 }
 
 static void class_put(struct class *cls)
 {
 	if (cls)
-		kset_put(&cls->p->class_subsys);
+		kset_put(&cls->p->subsys);
 }
 
 static int add_class_attrs(struct class *cls)
@@ -162,7 +162,7 @@ static void klist_class_dev_put(struct klist_node *n)
 
 int __class_register(struct class *cls, struct lock_class_key *key)
 {
-	struct class_private *cp;
+	struct subsys_private *cp;
 	int error;
 
 	pr_debug("device class '%s': registering\n", cls->name);
@@ -170,11 +170,11 @@ int __class_register(struct class *cls, struct lock_class_key *key)
 	cp = kzalloc(sizeof(*cp), GFP_KERNEL);
 	if (!cp)
 		return -ENOMEM;
-	klist_init(&cp->class_devices, klist_class_dev_get, klist_class_dev_put);
+	klist_init(&cp->klist_devices, klist_class_dev_get, klist_class_dev_put);
 	INIT_LIST_HEAD(&cp->class_interfaces);
-	kset_init(&cp->class_dirs);
+	kset_init(&cp->glue_dirs);
 	__mutex_init(&cp->class_mutex, "struct class mutex", key);
-	error = kobject_set_name(&cp->class_subsys.kobj, "%s", cls->name);
+	error = kobject_set_name(&cp->subsys.kobj, "%s", cls->name);
 	if (error) {
 		kfree(cp);
 		return error;
@@ -187,15 +187,15 @@ int __class_register(struct class *cls, struct lock_class_key *key)
 #if defined(CONFIG_BLOCK)
 	/* let the block class directory show up in the root of sysfs */
 	if (!sysfs_deprecated || cls != &block_class)
-		cp->class_subsys.kobj.kset = class_kset;
+		cp->subsys.kobj.kset = class_kset;
 #else
-	cp->class_subsys.kobj.kset = class_kset;
+	cp->subsys.kobj.kset = class_kset;
 #endif
-	cp->class_subsys.kobj.ktype = &class_ktype;
+	cp->subsys.kobj.ktype = &class_ktype;
 	cp->class = cls;
 	cls->p = cp;
 
-	error = kset_register(&cp->class_subsys);
+	error = kset_register(&cp->subsys);
 	if (error) {
 		kfree(cp);
 		return error;
@@ -210,7 +210,7 @@ void class_unregister(struct class *cls)
 {
 	pr_debug("device class '%s': unregistering\n", cls->name);
 	remove_class_attrs(cls);
-	kset_unregister(&cls->p->class_subsys);
+	kset_unregister(&cls->p->subsys);
 }
 
 static void class_create_release(struct class *cls)
@@ -295,7 +295,7 @@ void class_dev_iter_init(struct class_dev_iter *iter, struct class *class,
 
 	if (start)
 		start_knode = &start->knode_class;
-	klist_iter_init_node(&class->p->class_devices, &iter->ki, start_knode);
+	klist_iter_init_node(&class->p->klist_devices, &iter->ki, start_knode);
 	iter->type = type;
 }
 EXPORT_SYMBOL_GPL(class_dev_iter_init);
diff --git a/drivers/base/core.c b/drivers/base/core.c
index 6ed645411c40..46ff6c251932 100644
--- a/drivers/base/core.c
+++ b/drivers/base/core.c
@@ -610,7 +610,7 @@ class_dir_create_and_add(struct class *class, struct kobject *parent_kobj)
 	dir->class = class;
 	kobject_init(&dir->kobj, &class_dir_ktype);
 
-	dir->kobj.kset = &class->p->class_dirs;
+	dir->kobj.kset = &class->p->glue_dirs;
 
 	retval = kobject_add(&dir->kobj, parent_kobj, "%s", class->name);
 	if (retval < 0) {
@@ -635,7 +635,7 @@ static struct kobject *get_device_parent(struct device *dev,
 		if (sysfs_deprecated && dev->class == &block_class) {
 			if (parent && parent->class == &block_class)
 				return &parent->kobj;
-			return &block_class.p->class_subsys.kobj;
+			return &block_class.p->subsys.kobj;
 		}
 #endif
 
@@ -654,13 +654,13 @@ static struct kobject *get_device_parent(struct device *dev,
 		mutex_lock(&gdp_mutex);
 
 		/* find our class-directory at the parent and reference it */
-		spin_lock(&dev->class->p->class_dirs.list_lock);
-		list_for_each_entry(k, &dev->class->p->class_dirs.list, entry)
+		spin_lock(&dev->class->p->glue_dirs.list_lock);
+		list_for_each_entry(k, &dev->class->p->glue_dirs.list, entry)
 			if (k->parent == parent_kobj) {
 				kobj = kobject_get(k);
 				break;
 			}
-		spin_unlock(&dev->class->p->class_dirs.list_lock);
+		spin_unlock(&dev->class->p->glue_dirs.list_lock);
 		if (kobj) {
 			mutex_unlock(&gdp_mutex);
 			return kobj;
@@ -682,7 +682,7 @@ static void cleanup_glue_dir(struct device *dev, struct kobject *glue_dir)
 {
 	/* see if we live in a "glue" directory */
 	if (!glue_dir || !dev->class ||
-	    glue_dir->kset != &dev->class->p->class_dirs)
+	    glue_dir->kset != &dev->class->p->glue_dirs)
 		return;
 
 	kobject_put(glue_dir);
@@ -709,7 +709,7 @@ static int device_add_class_symlinks(struct device *dev)
 		return 0;
 
 	error = sysfs_create_link(&dev->kobj,
-				  &dev->class->p->class_subsys.kobj,
+				  &dev->class->p->subsys.kobj,
 				  "subsystem");
 	if (error)
 		goto out;
@@ -728,7 +728,7 @@ static int device_add_class_symlinks(struct device *dev)
 #endif
 
 	/* link in the class directory pointing to the device */
-	error = sysfs_create_link(&dev->class->p->class_subsys.kobj,
+	error = sysfs_create_link(&dev->class->p->subsys.kobj,
 				  &dev->kobj, dev_name(dev));
 	if (error)
 		goto out_device;
@@ -756,7 +756,7 @@ static void device_remove_class_symlinks(struct device *dev)
 	if (sysfs_deprecated && dev->class == &block_class)
 		return;
 #endif
-	sysfs_delete_link(&dev->class->p->class_subsys.kobj, &dev->kobj, dev_name(dev));
+	sysfs_delete_link(&dev->class->p->subsys.kobj, &dev->kobj, dev_name(dev));
 }
 
 /**
@@ -947,7 +947,7 @@ int device_add(struct device *dev)
 		mutex_lock(&dev->class->p->class_mutex);
 		/* tie the class to the device */
 		klist_add_tail(&dev->knode_class,
-			       &dev->class->p->class_devices);
+			       &dev->class->p->klist_devices);
 
 		/* notify any interfaces that the device is here */
 		list_for_each_entry(class_intf,
@@ -1535,7 +1535,7 @@ int device_rename(struct device *dev, const char *new_name)
 	}
 
 	if (dev->class) {
-		error = sysfs_rename_link(&dev->class->p->class_subsys.kobj,
+		error = sysfs_rename_link(&dev->class->p->subsys.kobj,
 			&dev->kobj, old_device_name, new_name);
 		if (error)
 			goto out;
diff --git a/include/linux/device.h b/include/linux/device.h
index dd4895313468..1e2d335ab683 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -30,9 +30,8 @@ struct device_private;
 struct device_driver;
 struct driver_private;
 struct class;
-struct class_private;
+struct subsys_private;
 struct bus_type;
-struct bus_type_private;
 struct device_node;
 
 struct bus_attribute {
@@ -65,7 +64,7 @@ struct bus_type {
 
 	const struct dev_pm_ops *pm;
 
-	struct bus_type_private *p;
+	struct subsys_private *p;
 };
 
 extern int __must_check bus_register(struct bus_type *bus);
@@ -213,7 +212,7 @@ struct class {
 
 	const struct dev_pm_ops *pm;
 
-	struct class_private *p;
+	struct subsys_private *p;
 };
 
 struct class_dev_iter {
-- 
cgit v1.2.3-71-gd317


From b2c0710c464ede15e1fc52fb1e7ee9ba54cea186 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paul.mckenney@linaro.org>
Date: Thu, 9 Sep 2010 13:40:39 -0700
Subject: rcu: move TINY_RCU from softirq to kthread

If RCU priority boosting is to be meaningful, callback invocation must
be boosted in addition to preempted RCU readers.  Otherwise, in presence
of CPU real-time threads, the grace period ends, but the callbacks don't
get invoked.  If the callbacks don't get invoked, the associated memory
doesn't get freed, so the system is still subject to OOM.

But it is not reasonable to priority-boost RCU_SOFTIRQ, so this commit
moves the callback invocations to a kthread, which can be boosted easily.

Signed-off-by: Paul E. McKenney <paul.mckenney@linaro.org>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 include/linux/rcupdate.h |  1 -
 include/linux/rcutiny.h  |  8 ++----
 include/linux/rcutree.h  |  1 +
 kernel/rcutiny.c         | 71 +++++++++++++++++++++++++++++++++++++++---------
 kernel/rcutiny_plugin.h  | 15 +++++-----
 5 files changed, 70 insertions(+), 26 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index 03cda7bed985..7142ee3304ab 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -118,7 +118,6 @@ static inline int rcu_preempt_depth(void)
 #endif /* #else #ifdef CONFIG_PREEMPT_RCU */
 
 /* Internal to kernel */
-extern void rcu_init(void);
 extern void rcu_sched_qs(int cpu);
 extern void rcu_bh_qs(int cpu);
 extern void rcu_check_callbacks(int cpu, int user);
diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h
index 13877cb93a60..ea025a611fcc 100644
--- a/include/linux/rcutiny.h
+++ b/include/linux/rcutiny.h
@@ -27,7 +27,9 @@
 
 #include <linux/cache.h>
 
-#define rcu_init_sched()	do { } while (0)
+static inline void rcu_init(void)
+{
+}
 
 #ifdef CONFIG_TINY_RCU
 
@@ -125,16 +127,12 @@ static inline void rcu_cpu_stall_reset(void)
 }
 
 #ifdef CONFIG_DEBUG_LOCK_ALLOC
-
 extern int rcu_scheduler_active __read_mostly;
 extern void rcu_scheduler_starting(void);
-
 #else /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */
-
 static inline void rcu_scheduler_starting(void)
 {
 }
-
 #endif /* #else #ifdef CONFIG_DEBUG_LOCK_ALLOC */
 
 #endif /* __LINUX_RCUTINY_H */
diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h
index 95518e628794..c0e96833aa73 100644
--- a/include/linux/rcutree.h
+++ b/include/linux/rcutree.h
@@ -30,6 +30,7 @@
 #ifndef __LINUX_RCUTREE_H
 #define __LINUX_RCUTREE_H
 
+extern void rcu_init(void);
 extern void rcu_note_context_switch(int cpu);
 extern int rcu_needs_cpu(int cpu);
 extern void rcu_cpu_stall_reset(void);
diff --git a/kernel/rcutiny.c b/kernel/rcutiny.c
index d806735342ac..86eef29cdfb2 100644
--- a/kernel/rcutiny.c
+++ b/kernel/rcutiny.c
@@ -59,8 +59,15 @@ int rcu_scheduler_active __read_mostly;
 EXPORT_SYMBOL_GPL(rcu_scheduler_active);
 #endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */
 
+/* Controls for rcu_cbs() kthread, replacing RCU_SOFTIRQ used previously. */
+static struct task_struct *rcu_cbs_task;
+static DECLARE_WAIT_QUEUE_HEAD(rcu_cbs_wq);
+static unsigned long have_rcu_cbs;
+static void invoke_rcu_cbs(void);
+
 /* Forward declarations for rcutiny_plugin.h. */
-static void __rcu_process_callbacks(struct rcu_ctrlblk *rcp);
+static void rcu_process_callbacks(struct rcu_ctrlblk *rcp);
+static int rcu_cbs(void *arg);
 static void __call_rcu(struct rcu_head *head,
 		       void (*func)(struct rcu_head *rcu),
 		       struct rcu_ctrlblk *rcp);
@@ -123,7 +130,7 @@ void rcu_sched_qs(int cpu)
 {
 	if (rcu_qsctr_help(&rcu_sched_ctrlblk) +
 	    rcu_qsctr_help(&rcu_bh_ctrlblk))
-		raise_softirq(RCU_SOFTIRQ);
+		invoke_rcu_cbs();
 }
 
 /*
@@ -132,7 +139,7 @@ void rcu_sched_qs(int cpu)
 void rcu_bh_qs(int cpu)
 {
 	if (rcu_qsctr_help(&rcu_bh_ctrlblk))
-		raise_softirq(RCU_SOFTIRQ);
+		invoke_rcu_cbs();
 }
 
 /*
@@ -152,10 +159,10 @@ void rcu_check_callbacks(int cpu, int user)
 }
 
 /*
- * Helper function for rcu_process_callbacks() that operates on the
- * specified rcu_ctrlkblk structure.
+ * Invoke the RCU callbacks on the specified rcu_ctrlkblk structure
+ * whose grace period has elapsed.
  */
-static void __rcu_process_callbacks(struct rcu_ctrlblk *rcp)
+static void rcu_process_callbacks(struct rcu_ctrlblk *rcp)
 {
 	struct rcu_head *next, *list;
 	unsigned long flags;
@@ -180,19 +187,52 @@ static void __rcu_process_callbacks(struct rcu_ctrlblk *rcp)
 		next = list->next;
 		prefetch(next);
 		debug_rcu_head_unqueue(list);
+		local_bh_disable();
 		list->func(list);
+		local_bh_enable();
 		list = next;
 	}
 }
 
 /*
- * Invoke any callbacks whose grace period has completed.
+ * This kthread invokes RCU callbacks whose grace periods have
+ * elapsed.  It is awakened as needed, and takes the place of the
+ * RCU_SOFTIRQ that was used previously for this purpose.
+ * This is a kthread, but it is never stopped, at least not until
+ * the system goes down.
+ */
+static int rcu_cbs(void *arg)
+{
+	unsigned long work;
+	unsigned long flags;
+
+	for (;;) {
+		wait_event(rcu_cbs_wq, have_rcu_cbs != 0);
+		local_irq_save(flags);
+		work = have_rcu_cbs;
+		have_rcu_cbs = 0;
+		local_irq_restore(flags);
+		if (work) {
+			rcu_process_callbacks(&rcu_sched_ctrlblk);
+			rcu_process_callbacks(&rcu_bh_ctrlblk);
+			rcu_preempt_process_callbacks();
+		}
+	}
+
+	return 0;  /* Not reached, but needed to shut gcc up. */
+}
+
+/*
+ * Wake up rcu_cbs() to process callbacks now eligible for invocation.
  */
-static void rcu_process_callbacks(struct softirq_action *unused)
+static void invoke_rcu_cbs(void)
 {
-	__rcu_process_callbacks(&rcu_sched_ctrlblk);
-	__rcu_process_callbacks(&rcu_bh_ctrlblk);
-	rcu_preempt_process_callbacks();
+	unsigned long flags;
+
+	local_irq_save(flags);
+	have_rcu_cbs = 1;
+	wake_up(&rcu_cbs_wq);
+	local_irq_restore(flags);
 }
 
 /*
@@ -282,7 +322,12 @@ void rcu_barrier_sched(void)
 }
 EXPORT_SYMBOL_GPL(rcu_barrier_sched);
 
-void __init rcu_init(void)
+/*
+ * Spawn the kthread that invokes RCU callbacks.
+ */
+static int __init rcu_spawn_kthreads(void)
 {
-	open_softirq(RCU_SOFTIRQ, rcu_process_callbacks);
+	rcu_cbs_task = kthread_run(rcu_cbs, NULL, "rcu_cbs");
+	return 0;
 }
+early_initcall(rcu_spawn_kthreads);
diff --git a/kernel/rcutiny_plugin.h b/kernel/rcutiny_plugin.h
index 6ceca4f745ff..95f9239df512 100644
--- a/kernel/rcutiny_plugin.h
+++ b/kernel/rcutiny_plugin.h
@@ -22,6 +22,8 @@
  * Author: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
  */
 
+#include <linux/kthread.h>
+
 #ifdef CONFIG_TINY_PREEMPT_RCU
 
 #include <linux/delay.h>
@@ -164,9 +166,9 @@ static void rcu_preempt_cpu_qs(void)
 	if (!rcu_preempt_blocked_readers_any())
 		rcu_preempt_ctrlblk.rcb.donetail = rcu_preempt_ctrlblk.nexttail;
 
-	/* If there are done callbacks, make RCU_SOFTIRQ process them. */
+	/* If there are done callbacks, cause them to be invoked. */
 	if (*rcu_preempt_ctrlblk.rcb.donetail != NULL)
-		raise_softirq(RCU_SOFTIRQ);
+		invoke_rcu_cbs();
 }
 
 /*
@@ -374,7 +376,7 @@ static void rcu_preempt_check_callbacks(void)
 		rcu_preempt_cpu_qs();
 	if (&rcu_preempt_ctrlblk.rcb.rcucblist !=
 	    rcu_preempt_ctrlblk.rcb.donetail)
-		raise_softirq(RCU_SOFTIRQ);
+		invoke_rcu_cbs();
 	if (rcu_preempt_gp_in_progress() &&
 	    rcu_cpu_blocking_cur_gp() &&
 	    rcu_preempt_running_reader())
@@ -383,7 +385,7 @@ static void rcu_preempt_check_callbacks(void)
 
 /*
  * TINY_PREEMPT_RCU has an extra callback-list tail pointer to
- * update, so this is invoked from __rcu_process_callbacks() to
+ * update, so this is invoked from rcu_process_callbacks() to
  * handle that case.  Of course, it is invoked for all flavors of
  * RCU, but RCU callbacks can appear only on one of the lists, and
  * neither ->nexttail nor ->donetail can possibly be NULL, so there
@@ -400,7 +402,7 @@ static void rcu_preempt_remove_callbacks(struct rcu_ctrlblk *rcp)
  */
 static void rcu_preempt_process_callbacks(void)
 {
-	__rcu_process_callbacks(&rcu_preempt_ctrlblk.rcb);
+	rcu_process_callbacks(&rcu_preempt_ctrlblk.rcb);
 }
 
 /*
@@ -599,14 +601,13 @@ static void rcu_preempt_process_callbacks(void)
 #endif /* #else #ifdef CONFIG_TINY_PREEMPT_RCU */
 
 #ifdef CONFIG_DEBUG_LOCK_ALLOC
-
 #include <linux/kernel_stat.h>
 
 /*
  * During boot, we forgive RCU lockdep issues.  After this function is
  * invoked, we start taking RCU lockdep issues seriously.
  */
-void rcu_scheduler_starting(void)
+void __init rcu_scheduler_starting(void)
 {
 	WARN_ON(nr_context_switches() > 0);
 	rcu_scheduler_active = 1;
-- 
cgit v1.2.3-71-gd317


From 5f2b0ba4d94b3ac23cbc4b7f675d98eb677a760a Mon Sep 17 00:00:00 2001
From: Don Zickus <dzickus@redhat.com>
Date: Fri, 12 Nov 2010 11:22:23 -0500
Subject: x86, nmi_watchdog: Remove the old nmi_watchdog

Now that we have a new nmi_watchdog that is more generic and
sits on top of the perf subsystem, we really do not need the old
nmi_watchdog any more.

In addition, the old nmi_watchdog doesn't really work if you are
using the default clocksource, hpet.  The old nmi_watchdog code
relied on local apic interrupts to determine if the cpu is still
alive.  With hpet as the clocksource, these interrupts don't
increment any more and the old nmi_watchdog triggers false
postives.

This piece removes the old nmi_watchdog code and stubs out any
variables and functions calls.  The stubs are the same ones used
by the new nmi_watchdog code, so it should be well tested.

Signed-off-by: Don Zickus <dzickus@redhat.com>
Cc: fweisbec@gmail.com
Cc: gorcunov@openvz.org
LKML-Reference: <1289578944-28564-2-git-send-email-dzickus@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/include/asm/nmi.h    |   4 -
 arch/x86/kernel/apic/Makefile |   5 +-
 arch/x86/kernel/apic/hw_nmi.c |   6 +-
 arch/x86/kernel/apic/nmi.c    | 567 ------------------------------------------
 arch/x86/kernel/traps.c       |   9 -
 include/linux/nmi.h           |   6 +-
 kernel/sysctl.c               |  16 --
 7 files changed, 5 insertions(+), 608 deletions(-)
 delete mode 100644 arch/x86/kernel/apic/nmi.c

(limited to 'include/linux')

diff --git a/arch/x86/include/asm/nmi.h b/arch/x86/include/asm/nmi.h
index 932f0f86b4b7..33292ec848ca 100644
--- a/arch/x86/include/asm/nmi.h
+++ b/arch/x86/include/asm/nmi.h
@@ -17,9 +17,6 @@ int do_nmi_callback(struct pt_regs *regs, int cpu);
 
 extern void die_nmi(char *str, struct pt_regs *regs, int do_panic);
 extern int check_nmi_watchdog(void);
-#if !defined(CONFIG_LOCKUP_DETECTOR)
-extern int nmi_watchdog_enabled;
-#endif
 extern int avail_to_resrv_perfctr_nmi_bit(unsigned int);
 extern int reserve_perfctr_nmi(unsigned int);
 extern void release_perfctr_nmi(unsigned int);
@@ -30,7 +27,6 @@ extern void setup_apic_nmi_watchdog(void *);
 extern void stop_apic_nmi_watchdog(void *);
 extern void disable_timer_nmi_watchdog(void);
 extern void enable_timer_nmi_watchdog(void);
-extern int nmi_watchdog_tick(struct pt_regs *regs, unsigned reason);
 extern void cpu_nmi_set_wd_enabled(void);
 
 extern atomic_t nmi_active;
diff --git a/arch/x86/kernel/apic/Makefile b/arch/x86/kernel/apic/Makefile
index 910f20b457c4..3966b564ea47 100644
--- a/arch/x86/kernel/apic/Makefile
+++ b/arch/x86/kernel/apic/Makefile
@@ -3,10 +3,7 @@
 #
 
 obj-$(CONFIG_X86_LOCAL_APIC)	+= apic.o apic_noop.o probe_$(BITS).o ipi.o
-ifneq ($(CONFIG_HARDLOCKUP_DETECTOR),y)
-obj-$(CONFIG_X86_LOCAL_APIC)	+= nmi.o
-endif
-obj-$(CONFIG_HARDLOCKUP_DETECTOR)	+= hw_nmi.o
+obj-y				+= hw_nmi.o
 
 obj-$(CONFIG_X86_IO_APIC)	+= io_apic.o
 obj-$(CONFIG_SMP)		+= ipi.o
diff --git a/arch/x86/kernel/apic/hw_nmi.c b/arch/x86/kernel/apic/hw_nmi.c
index cefd6942f0e9..b68b17460016 100644
--- a/arch/x86/kernel/apic/hw_nmi.c
+++ b/arch/x86/kernel/apic/hw_nmi.c
@@ -20,12 +20,14 @@
 /* For reliability, we're prepared to waste bits here. */
 static DECLARE_BITMAP(backtrace_mask, NR_CPUS) __read_mostly;
 
+#ifdef CONFIG_HARDLOCKUP_DETECTOR
 u64 hw_nmi_get_sample_period(void)
 {
 	return (u64)(cpu_khz) * 1000 * 60;
 }
+#endif
 
-#ifdef ARCH_HAS_NMI_WATCHDOG
+#ifdef arch_trigger_all_cpu_backtrace
 void arch_trigger_all_cpu_backtrace(void)
 {
 	int i;
@@ -95,8 +97,6 @@ early_initcall(register_trigger_all_cpu_backtrace);
 #if defined(CONFIG_X86_LOCAL_APIC)
 unsigned int nmi_watchdog = NMI_NONE;
 EXPORT_SYMBOL(nmi_watchdog);
-void acpi_nmi_enable(void) { return; }
-void acpi_nmi_disable(void) { return; }
 #endif
 atomic_t nmi_active = ATOMIC_INIT(0);           /* oprofile uses this */
 EXPORT_SYMBOL(nmi_active);
diff --git a/arch/x86/kernel/apic/nmi.c b/arch/x86/kernel/apic/nmi.c
deleted file mode 100644
index c90041ccb742..000000000000
--- a/arch/x86/kernel/apic/nmi.c
+++ /dev/null
@@ -1,567 +0,0 @@
-/*
- *  NMI watchdog support on APIC systems
- *
- *  Started by Ingo Molnar <mingo@redhat.com>
- *
- *  Fixes:
- *  Mikael Pettersson	: AMD K7 support for local APIC NMI watchdog.
- *  Mikael Pettersson	: Power Management for local APIC NMI watchdog.
- *  Mikael Pettersson	: Pentium 4 support for local APIC NMI watchdog.
- *  Pavel Machek and
- *  Mikael Pettersson	: PM converted to driver model. Disable/enable API.
- */
-
-#include <asm/apic.h>
-
-#include <linux/nmi.h>
-#include <linux/mm.h>
-#include <linux/delay.h>
-#include <linux/interrupt.h>
-#include <linux/module.h>
-#include <linux/slab.h>
-#include <linux/sysdev.h>
-#include <linux/sysctl.h>
-#include <linux/percpu.h>
-#include <linux/kprobes.h>
-#include <linux/cpumask.h>
-#include <linux/kernel_stat.h>
-#include <linux/kdebug.h>
-#include <linux/smp.h>
-
-#include <asm/i8259.h>
-#include <asm/io_apic.h>
-#include <asm/proto.h>
-#include <asm/timer.h>
-
-#include <asm/mce.h>
-
-#include <asm/mach_traps.h>
-
-int unknown_nmi_panic;
-int nmi_watchdog_enabled;
-
-/* For reliability, we're prepared to waste bits here. */
-static DECLARE_BITMAP(backtrace_mask, NR_CPUS) __read_mostly;
-
-/* nmi_active:
- * >0: the lapic NMI watchdog is active, but can be disabled
- * <0: the lapic NMI watchdog has not been set up, and cannot
- *     be enabled
- *  0: the lapic NMI watchdog is disabled, but can be enabled
- */
-atomic_t nmi_active = ATOMIC_INIT(0);		/* oprofile uses this */
-EXPORT_SYMBOL(nmi_active);
-
-unsigned int nmi_watchdog = NMI_NONE;
-EXPORT_SYMBOL(nmi_watchdog);
-
-static int panic_on_timeout;
-
-static unsigned int nmi_hz = HZ;
-static DEFINE_PER_CPU(short, wd_enabled);
-static int endflag __initdata;
-
-static inline unsigned int get_nmi_count(int cpu)
-{
-	return per_cpu(irq_stat, cpu).__nmi_count;
-}
-
-static inline int mce_in_progress(void)
-{
-#if defined(CONFIG_X86_MCE)
-	return atomic_read(&mce_entry) > 0;
-#endif
-	return 0;
-}
-
-/*
- * Take the local apic timer and PIT/HPET into account. We don't
- * know which one is active, when we have highres/dyntick on
- */
-static inline unsigned int get_timer_irqs(int cpu)
-{
-	return per_cpu(irq_stat, cpu).apic_timer_irqs +
-		per_cpu(irq_stat, cpu).irq0_irqs;
-}
-
-#ifdef CONFIG_SMP
-/*
- * The performance counters used by NMI_LOCAL_APIC don't trigger when
- * the CPU is idle. To make sure the NMI watchdog really ticks on all
- * CPUs during the test make them busy.
- */
-static __init void nmi_cpu_busy(void *data)
-{
-	local_irq_enable_in_hardirq();
-	/*
-	 * Intentionally don't use cpu_relax here. This is
-	 * to make sure that the performance counter really ticks,
-	 * even if there is a simulator or similar that catches the
-	 * pause instruction. On a real HT machine this is fine because
-	 * all other CPUs are busy with "useless" delay loops and don't
-	 * care if they get somewhat less cycles.
-	 */
-	while (endflag == 0)
-		mb();
-}
-#endif
-
-static void report_broken_nmi(int cpu, unsigned int *prev_nmi_count)
-{
-	printk(KERN_CONT "\n");
-
-	printk(KERN_WARNING
-		"WARNING: CPU#%d: NMI appears to be stuck (%d->%d)!\n",
-			cpu, prev_nmi_count[cpu], get_nmi_count(cpu));
-
-	printk(KERN_WARNING
-		"Please report this to bugzilla.kernel.org,\n");
-	printk(KERN_WARNING
-		"and attach the output of the 'dmesg' command.\n");
-
-	per_cpu(wd_enabled, cpu) = 0;
-	atomic_dec(&nmi_active);
-}
-
-static void __acpi_nmi_disable(void *__unused)
-{
-	apic_write(APIC_LVT0, APIC_DM_NMI | APIC_LVT_MASKED);
-}
-
-int __init check_nmi_watchdog(void)
-{
-	unsigned int *prev_nmi_count;
-	int cpu;
-
-	if (!nmi_watchdog_active() || !atomic_read(&nmi_active))
-		return 0;
-
-	prev_nmi_count = kmalloc(nr_cpu_ids * sizeof(int), GFP_KERNEL);
-	if (!prev_nmi_count)
-		goto error;
-
-	printk(KERN_INFO "Testing NMI watchdog ... ");
-
-#ifdef CONFIG_SMP
-	if (nmi_watchdog == NMI_LOCAL_APIC)
-		smp_call_function(nmi_cpu_busy, (void *)&endflag, 0);
-#endif
-
-	for_each_possible_cpu(cpu)
-		prev_nmi_count[cpu] = get_nmi_count(cpu);
-	local_irq_enable();
-	mdelay((20 * 1000) / nmi_hz); /* wait 20 ticks */
-
-	for_each_online_cpu(cpu) {
-		if (!per_cpu(wd_enabled, cpu))
-			continue;
-		if (get_nmi_count(cpu) - prev_nmi_count[cpu] <= 5)
-			report_broken_nmi(cpu, prev_nmi_count);
-	}
-	endflag = 1;
-	if (!atomic_read(&nmi_active)) {
-		kfree(prev_nmi_count);
-		atomic_set(&nmi_active, -1);
-		goto error;
-	}
-	printk("OK.\n");
-
-	/*
-	 * now that we know it works we can reduce NMI frequency to
-	 * something more reasonable; makes a difference in some configs
-	 */
-	if (nmi_watchdog == NMI_LOCAL_APIC)
-		nmi_hz = lapic_adjust_nmi_hz(1);
-
-	kfree(prev_nmi_count);
-	return 0;
-error:
-	if (nmi_watchdog == NMI_IO_APIC) {
-		if (!timer_through_8259)
-			legacy_pic->mask(0);
-		on_each_cpu(__acpi_nmi_disable, NULL, 1);
-	}
-
-#ifdef CONFIG_X86_32
-	timer_ack = 0;
-#endif
-	return -1;
-}
-
-static int __init setup_nmi_watchdog(char *str)
-{
-	unsigned int nmi;
-
-	if (!strncmp(str, "panic", 5)) {
-		panic_on_timeout = 1;
-		str = strchr(str, ',');
-		if (!str)
-			return 1;
-		++str;
-	}
-
-	if (!strncmp(str, "lapic", 5))
-		nmi_watchdog = NMI_LOCAL_APIC;
-	else if (!strncmp(str, "ioapic", 6))
-		nmi_watchdog = NMI_IO_APIC;
-	else {
-		get_option(&str, &nmi);
-		if (nmi >= NMI_INVALID)
-			return 0;
-		nmi_watchdog = nmi;
-	}
-
-	return 1;
-}
-__setup("nmi_watchdog=", setup_nmi_watchdog);
-
-/*
- * Suspend/resume support
- */
-#ifdef CONFIG_PM
-
-static int nmi_pm_active; /* nmi_active before suspend */
-
-static int lapic_nmi_suspend(struct sys_device *dev, pm_message_t state)
-{
-	/* only CPU0 goes here, other CPUs should be offline */
-	nmi_pm_active = atomic_read(&nmi_active);
-	stop_apic_nmi_watchdog(NULL);
-	BUG_ON(atomic_read(&nmi_active) != 0);
-	return 0;
-}
-
-static int lapic_nmi_resume(struct sys_device *dev)
-{
-	/* only CPU0 goes here, other CPUs should be offline */
-	if (nmi_pm_active > 0) {
-		setup_apic_nmi_watchdog(NULL);
-		touch_nmi_watchdog();
-	}
-	return 0;
-}
-
-static struct sysdev_class nmi_sysclass = {
-	.name		= "lapic_nmi",
-	.resume		= lapic_nmi_resume,
-	.suspend	= lapic_nmi_suspend,
-};
-
-static struct sys_device device_lapic_nmi = {
-	.id	= 0,
-	.cls	= &nmi_sysclass,
-};
-
-static int __init init_lapic_nmi_sysfs(void)
-{
-	int error;
-
-	/*
-	 * should really be a BUG_ON but b/c this is an
-	 * init call, it just doesn't work.  -dcz
-	 */
-	if (nmi_watchdog != NMI_LOCAL_APIC)
-		return 0;
-
-	if (atomic_read(&nmi_active) < 0)
-		return 0;
-
-	error = sysdev_class_register(&nmi_sysclass);
-	if (!error)
-		error = sysdev_register(&device_lapic_nmi);
-	return error;
-}
-
-/* must come after the local APIC's device_initcall() */
-late_initcall(init_lapic_nmi_sysfs);
-
-#endif	/* CONFIG_PM */
-
-static void __acpi_nmi_enable(void *__unused)
-{
-	apic_write(APIC_LVT0, APIC_DM_NMI);
-}
-
-/*
- * Enable timer based NMIs on all CPUs:
- */
-void acpi_nmi_enable(void)
-{
-	if (atomic_read(&nmi_active) && nmi_watchdog == NMI_IO_APIC)
-		on_each_cpu(__acpi_nmi_enable, NULL, 1);
-}
-
-/*
- * Disable timer based NMIs on all CPUs:
- */
-void acpi_nmi_disable(void)
-{
-	if (atomic_read(&nmi_active) && nmi_watchdog == NMI_IO_APIC)
-		on_each_cpu(__acpi_nmi_disable, NULL, 1);
-}
-
-/*
- * This function is called as soon the LAPIC NMI watchdog driver has everything
- * in place and it's ready to check if the NMIs belong to the NMI watchdog
- */
-void cpu_nmi_set_wd_enabled(void)
-{
-	__get_cpu_var(wd_enabled) = 1;
-}
-
-void setup_apic_nmi_watchdog(void *unused)
-{
-	if (__get_cpu_var(wd_enabled))
-		return;
-
-	/* cheap hack to support suspend/resume */
-	/* if cpu0 is not active neither should the other cpus */
-	if (smp_processor_id() != 0 && atomic_read(&nmi_active) <= 0)
-		return;
-
-	switch (nmi_watchdog) {
-	case NMI_LOCAL_APIC:
-		if (lapic_watchdog_init(nmi_hz) < 0) {
-			__get_cpu_var(wd_enabled) = 0;
-			return;
-		}
-		/* FALL THROUGH */
-	case NMI_IO_APIC:
-		__get_cpu_var(wd_enabled) = 1;
-		atomic_inc(&nmi_active);
-	}
-}
-
-void stop_apic_nmi_watchdog(void *unused)
-{
-	/* only support LOCAL and IO APICs for now */
-	if (!nmi_watchdog_active())
-		return;
-	if (__get_cpu_var(wd_enabled) == 0)
-		return;
-	if (nmi_watchdog == NMI_LOCAL_APIC)
-		lapic_watchdog_stop();
-	else
-		__acpi_nmi_disable(NULL);
-	__get_cpu_var(wd_enabled) = 0;
-	atomic_dec(&nmi_active);
-}
-
-/*
- * the best way to detect whether a CPU has a 'hard lockup' problem
- * is to check it's local APIC timer IRQ counts. If they are not
- * changing then that CPU has some problem.
- *
- * as these watchdog NMI IRQs are generated on every CPU, we only
- * have to check the current processor.
- *
- * since NMIs don't listen to _any_ locks, we have to be extremely
- * careful not to rely on unsafe variables. The printk might lock
- * up though, so we have to break up any console locks first ...
- * [when there will be more tty-related locks, break them up here too!]
- */
-
-static DEFINE_PER_CPU(unsigned, last_irq_sum);
-static DEFINE_PER_CPU(long, alert_counter);
-static DEFINE_PER_CPU(int, nmi_touch);
-
-void touch_nmi_watchdog(void)
-{
-	if (nmi_watchdog_active()) {
-		unsigned cpu;
-
-		/*
-		 * Tell other CPUs to reset their alert counters. We cannot
-		 * do it ourselves because the alert count increase is not
-		 * atomic.
-		 */
-		for_each_present_cpu(cpu) {
-			if (per_cpu(nmi_touch, cpu) != 1)
-				per_cpu(nmi_touch, cpu) = 1;
-		}
-	}
-
-	/*
-	 * Tickle the softlockup detector too:
-	 */
-	touch_softlockup_watchdog();
-}
-EXPORT_SYMBOL(touch_nmi_watchdog);
-
-notrace __kprobes int
-nmi_watchdog_tick(struct pt_regs *regs, unsigned reason)
-{
-	/*
-	 * Since current_thread_info()-> is always on the stack, and we
-	 * always switch the stack NMI-atomically, it's safe to use
-	 * smp_processor_id().
-	 */
-	unsigned int sum;
-	int touched = 0;
-	int cpu = smp_processor_id();
-	int rc = 0;
-
-	sum = get_timer_irqs(cpu);
-
-	if (__get_cpu_var(nmi_touch)) {
-		__get_cpu_var(nmi_touch) = 0;
-		touched = 1;
-	}
-
-	/* We can be called before check_nmi_watchdog, hence NULL check. */
-	if (cpumask_test_cpu(cpu, to_cpumask(backtrace_mask))) {
-		static DEFINE_RAW_SPINLOCK(lock); /* Serialise the printks */
-
-		raw_spin_lock(&lock);
-		printk(KERN_WARNING "NMI backtrace for cpu %d\n", cpu);
-		show_regs(regs);
-		dump_stack();
-		raw_spin_unlock(&lock);
-		cpumask_clear_cpu(cpu, to_cpumask(backtrace_mask));
-
-		rc = 1;
-	}
-
-	/* Could check oops_in_progress here too, but it's safer not to */
-	if (mce_in_progress())
-		touched = 1;
-
-	/* if the none of the timers isn't firing, this cpu isn't doing much */
-	if (!touched && __get_cpu_var(last_irq_sum) == sum) {
-		/*
-		 * Ayiee, looks like this CPU is stuck ...
-		 * wait a few IRQs (5 seconds) before doing the oops ...
-		 */
-		__this_cpu_inc(alert_counter);
-		if (__this_cpu_read(alert_counter) == 5 * nmi_hz)
-			/*
-			 * die_nmi will return ONLY if NOTIFY_STOP happens..
-			 */
-			die_nmi("BUG: NMI Watchdog detected LOCKUP",
-				regs, panic_on_timeout);
-	} else {
-		__get_cpu_var(last_irq_sum) = sum;
-		__this_cpu_write(alert_counter, 0);
-	}
-
-	/* see if the nmi watchdog went off */
-	if (!__get_cpu_var(wd_enabled))
-		return rc;
-	switch (nmi_watchdog) {
-	case NMI_LOCAL_APIC:
-		rc |= lapic_wd_event(nmi_hz);
-		break;
-	case NMI_IO_APIC:
-		/*
-		 * don't know how to accurately check for this.
-		 * just assume it was a watchdog timer interrupt
-		 * This matches the old behaviour.
-		 */
-		rc = 1;
-		break;
-	}
-	return rc;
-}
-
-#ifdef CONFIG_SYSCTL
-
-static void enable_ioapic_nmi_watchdog_single(void *unused)
-{
-	__get_cpu_var(wd_enabled) = 1;
-	atomic_inc(&nmi_active);
-	__acpi_nmi_enable(NULL);
-}
-
-static void enable_ioapic_nmi_watchdog(void)
-{
-	on_each_cpu(enable_ioapic_nmi_watchdog_single, NULL, 1);
-	touch_nmi_watchdog();
-}
-
-static void disable_ioapic_nmi_watchdog(void)
-{
-	on_each_cpu(stop_apic_nmi_watchdog, NULL, 1);
-}
-
-static int __init setup_unknown_nmi_panic(char *str)
-{
-	unknown_nmi_panic = 1;
-	return 1;
-}
-__setup("unknown_nmi_panic", setup_unknown_nmi_panic);
-
-static int unknown_nmi_panic_callback(struct pt_regs *regs, int cpu)
-{
-	unsigned char reason = get_nmi_reason();
-	char buf[64];
-
-	sprintf(buf, "NMI received for unknown reason %02x\n", reason);
-	die_nmi(buf, regs, 1); /* Always panic here */
-	return 0;
-}
-
-/*
- * proc handler for /proc/sys/kernel/nmi
- */
-int proc_nmi_enabled(struct ctl_table *table, int write,
-			void __user *buffer, size_t *length, loff_t *ppos)
-{
-	int old_state;
-
-	nmi_watchdog_enabled = (atomic_read(&nmi_active) > 0) ? 1 : 0;
-	old_state = nmi_watchdog_enabled;
-	proc_dointvec(table, write, buffer, length, ppos);
-	if (!!old_state == !!nmi_watchdog_enabled)
-		return 0;
-
-	if (atomic_read(&nmi_active) < 0 || !nmi_watchdog_active()) {
-		printk(KERN_WARNING
-			"NMI watchdog is permanently disabled\n");
-		return -EIO;
-	}
-
-	if (nmi_watchdog == NMI_LOCAL_APIC) {
-		if (nmi_watchdog_enabled)
-			enable_lapic_nmi_watchdog();
-		else
-			disable_lapic_nmi_watchdog();
-	} else if (nmi_watchdog == NMI_IO_APIC) {
-		if (nmi_watchdog_enabled)
-			enable_ioapic_nmi_watchdog();
-		else
-			disable_ioapic_nmi_watchdog();
-	} else {
-		printk(KERN_WARNING
-			"NMI watchdog doesn't know what hardware to touch\n");
-		return -EIO;
-	}
-	return 0;
-}
-
-#endif /* CONFIG_SYSCTL */
-
-int do_nmi_callback(struct pt_regs *regs, int cpu)
-{
-#ifdef CONFIG_SYSCTL
-	if (unknown_nmi_panic)
-		return unknown_nmi_panic_callback(regs, cpu);
-#endif
-	return 0;
-}
-
-void arch_trigger_all_cpu_backtrace(void)
-{
-	int i;
-
-	cpumask_copy(to_cpumask(backtrace_mask), cpu_online_mask);
-
-	printk(KERN_INFO "sending NMI to all CPUs:\n");
-	apic->send_IPI_all(NMI_VECTOR);
-
-	/* Wait for up to 10 seconds for all CPUs to do the backtrace */
-	for (i = 0; i < 10 * 1000; i++) {
-		if (cpumask_empty(to_cpumask(backtrace_mask)))
-			break;
-		mdelay(1);
-	}
-}
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index cb838ca42c96..db30d9cb9dd6 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -398,15 +398,6 @@ static notrace __kprobes void default_do_nmi(struct pt_regs *regs)
 							== NOTIFY_STOP)
 			return;
 
-#ifndef CONFIG_LOCKUP_DETECTOR
-		/*
-		 * Ok, so this is none of the documented NMI sources,
-		 * so it must be the NMI watchdog.
-		 */
-		if (nmi_watchdog_tick(regs, reason))
-			return;
-		if (!do_nmi_callback(regs, cpu))
-#endif /* !CONFIG_LOCKUP_DETECTOR */
 			unknown_nmi_error(reason, regs);
 #else
 		unknown_nmi_error(reason, regs);
diff --git a/include/linux/nmi.h b/include/linux/nmi.h
index 06aab5eee134..0cb3e5c246d0 100644
--- a/include/linux/nmi.h
+++ b/include/linux/nmi.h
@@ -16,10 +16,7 @@
  */
 #ifdef ARCH_HAS_NMI_WATCHDOG
 #include <asm/nmi.h>
-extern void touch_nmi_watchdog(void);
-extern void acpi_nmi_disable(void);
-extern void acpi_nmi_enable(void);
-#else
+#endif
 #ifndef CONFIG_HARDLOCKUP_DETECTOR
 static inline void touch_nmi_watchdog(void)
 {
@@ -30,7 +27,6 @@ extern void touch_nmi_watchdog(void);
 #endif
 static inline void acpi_nmi_disable(void) { }
 static inline void acpi_nmi_enable(void) { }
-#endif
 
 /*
  * Create trigger_all_cpu_backtrace() out of the arch-provided
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index b65bf634035e..ce33e2a2afea 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -746,22 +746,6 @@ static struct ctl_table kern_table[] = {
 		.extra2		= &one,
 	},
 #endif
-#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86) && !defined(CONFIG_LOCKUP_DETECTOR)
-	{
-		.procname       = "unknown_nmi_panic",
-		.data           = &unknown_nmi_panic,
-		.maxlen         = sizeof (int),
-		.mode           = 0644,
-		.proc_handler   = proc_dointvec,
-	},
-	{
-		.procname       = "nmi_watchdog",
-		.data           = &nmi_watchdog_enabled,
-		.maxlen         = sizeof (int),
-		.mode           = 0644,
-		.proc_handler   = proc_nmi_enabled,
-	},
-#endif
 #if defined(CONFIG_X86)
 	{
 		.procname	= "panic_on_unrecovered_nmi",
-- 
cgit v1.2.3-71-gd317


From 072b198a4ad48bd722ec6d203d65422a4698eae7 Mon Sep 17 00:00:00 2001
From: Don Zickus <dzickus@redhat.com>
Date: Fri, 12 Nov 2010 11:22:24 -0500
Subject: x86, nmi_watchdog: Remove all stub function calls from old
 nmi_watchdog

Now that the bulk of the old nmi_watchdog is gone, remove all
the stub variables and hooks associated with it.

This touches lots of files mainly because of how the io_apic
nmi_watchdog was implemented.  Now that the io_apic nmi_watchdog
is forever gone, remove all its fingers.

Most of this code was not being exercised by virtue of
nmi_watchdog != NMI_IO_APIC, so there shouldn't be anything to
risky here.

Signed-off-by: Don Zickus <dzickus@redhat.com>
Cc: fweisbec@gmail.com
Cc: gorcunov@openvz.org
LKML-Reference: <1289578944-28564-3-git-send-email-dzickus@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/include/asm/nmi.h             |  47 ---
 arch/x86/include/asm/smpboot_hooks.h   |   1 -
 arch/x86/include/asm/timer.h           |   6 -
 arch/x86/kernel/apic/apic.c            |  15 +-
 arch/x86/kernel/apic/hw_nmi.c          |  10 -
 arch/x86/kernel/apic/io_apic.c         |  46 ---
 arch/x86/kernel/cpu/perf_event.c       |   9 -
 arch/x86/kernel/cpu/perfctr-watchdog.c | 642 ---------------------------------
 arch/x86/kernel/smpboot.c              |  11 -
 arch/x86/kernel/time.c                 |  18 -
 arch/x86/kernel/traps.c                |   2 -
 arch/x86/oprofile/nmi_timer_int.c      |   3 -
 drivers/acpi/acpica/nsinit.c           |   2 -
 drivers/watchdog/hpwdt.c               |   7 +-
 include/linux/nmi.h                    |   2 -
 15 files changed, 2 insertions(+), 819 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/include/asm/nmi.h b/arch/x86/include/asm/nmi.h
index 33292ec848ca..3545838cddeb 100644
--- a/arch/x86/include/asm/nmi.h
+++ b/arch/x86/include/asm/nmi.h
@@ -7,35 +7,13 @@
 
 #ifdef ARCH_HAS_NMI_WATCHDOG
 
-/**
- * do_nmi_callback
- *
- * Check to see if a callback exists and execute it.  Return 1
- * if the handler exists and was handled successfully.
- */
-int do_nmi_callback(struct pt_regs *regs, int cpu);
-
 extern void die_nmi(char *str, struct pt_regs *regs, int do_panic);
-extern int check_nmi_watchdog(void);
 extern int avail_to_resrv_perfctr_nmi_bit(unsigned int);
 extern int reserve_perfctr_nmi(unsigned int);
 extern void release_perfctr_nmi(unsigned int);
 extern int reserve_evntsel_nmi(unsigned int);
 extern void release_evntsel_nmi(unsigned int);
 
-extern void setup_apic_nmi_watchdog(void *);
-extern void stop_apic_nmi_watchdog(void *);
-extern void disable_timer_nmi_watchdog(void);
-extern void enable_timer_nmi_watchdog(void);
-extern void cpu_nmi_set_wd_enabled(void);
-
-extern atomic_t nmi_active;
-extern unsigned int nmi_watchdog;
-#define NMI_NONE	0
-#define NMI_IO_APIC	1
-#define NMI_LOCAL_APIC	2
-#define NMI_INVALID	3
-
 struct ctl_table;
 extern int proc_nmi_enabled(struct ctl_table *, int ,
 			void __user *, size_t *, loff_t *);
@@ -43,33 +21,8 @@ extern int unknown_nmi_panic;
 
 void arch_trigger_all_cpu_backtrace(void);
 #define arch_trigger_all_cpu_backtrace arch_trigger_all_cpu_backtrace
-
-static inline void localise_nmi_watchdog(void)
-{
-	if (nmi_watchdog == NMI_IO_APIC)
-		nmi_watchdog = NMI_LOCAL_APIC;
-}
-
-/* check if nmi_watchdog is active (ie was specified at boot) */
-static inline int nmi_watchdog_active(void)
-{
-	/*
-	 * actually it should be:
-	 * 	return (nmi_watchdog == NMI_LOCAL_APIC ||
-	 * 		nmi_watchdog == NMI_IO_APIC)
-	 * but since they are power of two we could use a
-	 * cheaper way --cvg
-	 */
-	return nmi_watchdog & (NMI_LOCAL_APIC | NMI_IO_APIC);
-}
 #endif
 
-void lapic_watchdog_stop(void);
-int lapic_watchdog_init(unsigned nmi_hz);
-int lapic_wd_event(unsigned nmi_hz);
-unsigned lapic_adjust_nmi_hz(unsigned hz);
-void disable_lapic_nmi_watchdog(void);
-void enable_lapic_nmi_watchdog(void);
 void stop_nmi(void);
 void restart_nmi(void);
 
diff --git a/arch/x86/include/asm/smpboot_hooks.h b/arch/x86/include/asm/smpboot_hooks.h
index 1def60114906..6c22bf353f26 100644
--- a/arch/x86/include/asm/smpboot_hooks.h
+++ b/arch/x86/include/asm/smpboot_hooks.h
@@ -48,7 +48,6 @@ static inline void __init smpboot_setup_io_apic(void)
 		setup_IO_APIC();
 	else {
 		nr_ioapics = 0;
-		localise_nmi_watchdog();
 	}
 #endif
 }
diff --git a/arch/x86/include/asm/timer.h b/arch/x86/include/asm/timer.h
index 5469630b27f5..fa7b9176b76c 100644
--- a/arch/x86/include/asm/timer.h
+++ b/arch/x86/include/asm/timer.h
@@ -10,12 +10,6 @@
 unsigned long long native_sched_clock(void);
 extern int recalibrate_cpu_khz(void);
 
-#if defined(CONFIG_X86_32) && defined(CONFIG_X86_IO_APIC)
-extern int timer_ack;
-#else
-# define timer_ack (0)
-#endif
-
 extern int no_timer_check;
 
 /* Accelerators for sched_clock()
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index 3f838d537392..e9e2a93783f9 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -31,7 +31,6 @@
 #include <linux/init.h>
 #include <linux/cpu.h>
 #include <linux/dmi.h>
-#include <linux/nmi.h>
 #include <linux/smp.h>
 #include <linux/mm.h>
 
@@ -799,11 +798,7 @@ void __init setup_boot_APIC_clock(void)
 	 * PIT/HPET going.  Otherwise register lapic as a dummy
 	 * device.
 	 */
-	if (nmi_watchdog != NMI_IO_APIC)
-		lapic_clockevent.features &= ~CLOCK_EVT_FEAT_DUMMY;
-	else
-		pr_warning("APIC timer registered as dummy,"
-			" due to nmi_watchdog=%d!\n", nmi_watchdog);
+	lapic_clockevent.features &= ~CLOCK_EVT_FEAT_DUMMY;
 
 	/* Setup the lapic or request the broadcast */
 	setup_APIC_timer();
@@ -1387,7 +1382,6 @@ void __cpuinit end_local_APIC_setup(void)
 	}
 #endif
 
-	setup_apic_nmi_watchdog(NULL);
 	apic_pm_activate();
 }
 
@@ -1750,17 +1744,10 @@ int __init APIC_init_uniprocessor(void)
 		setup_IO_APIC();
 	else {
 		nr_ioapics = 0;
-		localise_nmi_watchdog();
 	}
-#else
-	localise_nmi_watchdog();
 #endif
 
 	x86_init.timers.setup_percpu_clockev();
-#ifdef CONFIG_X86_64
-	check_nmi_watchdog();
-#endif
-
 	return 0;
 }
 
diff --git a/arch/x86/kernel/apic/hw_nmi.c b/arch/x86/kernel/apic/hw_nmi.c
index b68b17460016..3e25afe9a62a 100644
--- a/arch/x86/kernel/apic/hw_nmi.c
+++ b/arch/x86/kernel/apic/hw_nmi.c
@@ -94,14 +94,4 @@ early_initcall(register_trigger_all_cpu_backtrace);
 #endif
 
 /* STUB calls to mimic old nmi_watchdog behaviour */
-#if defined(CONFIG_X86_LOCAL_APIC)
-unsigned int nmi_watchdog = NMI_NONE;
-EXPORT_SYMBOL(nmi_watchdog);
-#endif
-atomic_t nmi_active = ATOMIC_INIT(0);           /* oprofile uses this */
-EXPORT_SYMBOL(nmi_active);
 int unknown_nmi_panic;
-void cpu_nmi_set_wd_enabled(void) { return; }
-void stop_apic_nmi_watchdog(void *unused) { return; }
-void setup_apic_nmi_watchdog(void *unused) { return; }
-int __init check_nmi_watchdog(void) { return 0; }
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index 7cc0a721f628..e4a040c28de1 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -54,7 +54,6 @@
 #include <asm/dma.h>
 #include <asm/timer.h>
 #include <asm/i8259.h>
-#include <asm/nmi.h>
 #include <asm/msidef.h>
 #include <asm/hypertransport.h>
 #include <asm/setup.h>
@@ -2643,24 +2642,6 @@ static void lapic_register_intr(int irq)
 				      "edge");
 }
 
-static void __init setup_nmi(void)
-{
-	/*
-	 * Dirty trick to enable the NMI watchdog ...
-	 * We put the 8259A master into AEOI mode and
-	 * unmask on all local APICs LVT0 as NMI.
-	 *
-	 * The idea to use the 8259A in AEOI mode ('8259A Virtual Wire')
-	 * is from Maciej W. Rozycki - so we do not have to EOI from
-	 * the NMI handler or the timer interrupt.
-	 */
-	apic_printk(APIC_VERBOSE, KERN_INFO "activating NMI Watchdog ...");
-
-	enable_NMI_through_LVT0();
-
-	apic_printk(APIC_VERBOSE, " done.\n");
-}
-
 /*
  * This looks a bit hackish but it's about the only one way of sending
  * a few INTA cycles to 8259As and any associated glue logic.  ICR does
@@ -2766,15 +2747,6 @@ static inline void __init check_timer(void)
 	 */
 	apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT);
 	legacy_pic->init(1);
-#ifdef CONFIG_X86_32
-	{
-		unsigned int ver;
-
-		ver = apic_read(APIC_LVR);
-		ver = GET_APIC_VERSION(ver);
-		timer_ack = (nmi_watchdog == NMI_IO_APIC && !APIC_INTEGRATED(ver));
-	}
-#endif
 
 	pin1  = find_isa_irq_pin(0, mp_INT);
 	apic1 = find_isa_irq_apic(0, mp_INT);
@@ -2822,10 +2794,6 @@ static inline void __init check_timer(void)
 				unmask_ioapic(cfg);
 		}
 		if (timer_irq_works()) {
-			if (nmi_watchdog == NMI_IO_APIC) {
-				setup_nmi();
-				legacy_pic->unmask(0);
-			}
 			if (disable_timer_pin_1 > 0)
 				clear_IO_APIC_pin(0, pin1);
 			goto out;
@@ -2851,11 +2819,6 @@ static inline void __init check_timer(void)
 		if (timer_irq_works()) {
 			apic_printk(APIC_QUIET, KERN_INFO "....... works.\n");
 			timer_through_8259 = 1;
-			if (nmi_watchdog == NMI_IO_APIC) {
-				legacy_pic->mask(0);
-				setup_nmi();
-				legacy_pic->unmask(0);
-			}
 			goto out;
 		}
 		/*
@@ -2867,15 +2830,6 @@ static inline void __init check_timer(void)
 		apic_printk(APIC_QUIET, KERN_INFO "....... failed.\n");
 	}
 
-	if (nmi_watchdog == NMI_IO_APIC) {
-		apic_printk(APIC_QUIET, KERN_WARNING "timer doesn't work "
-			    "through the IO-APIC - disabling NMI Watchdog!\n");
-		nmi_watchdog = NMI_NONE;
-	}
-#ifdef CONFIG_X86_32
-	timer_ack = 0;
-#endif
-
 	apic_printk(APIC_QUIET, KERN_INFO
 		    "...trying to set up timer as Virtual Wire IRQ...\n");
 
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index ed6310183efb..1f129a14e3a2 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -330,9 +330,6 @@ static bool reserve_pmc_hardware(void)
 {
 	int i;
 
-	if (nmi_watchdog == NMI_LOCAL_APIC)
-		disable_lapic_nmi_watchdog();
-
 	for (i = 0; i < x86_pmu.num_counters; i++) {
 		if (!reserve_perfctr_nmi(x86_pmu.perfctr + i))
 			goto perfctr_fail;
@@ -355,9 +352,6 @@ perfctr_fail:
 	for (i--; i >= 0; i--)
 		release_perfctr_nmi(x86_pmu.perfctr + i);
 
-	if (nmi_watchdog == NMI_LOCAL_APIC)
-		enable_lapic_nmi_watchdog();
-
 	return false;
 }
 
@@ -369,9 +363,6 @@ static void release_pmc_hardware(void)
 		release_perfctr_nmi(x86_pmu.perfctr + i);
 		release_evntsel_nmi(x86_pmu.eventsel + i);
 	}
-
-	if (nmi_watchdog == NMI_LOCAL_APIC)
-		enable_lapic_nmi_watchdog();
 }
 
 #else
diff --git a/arch/x86/kernel/cpu/perfctr-watchdog.c b/arch/x86/kernel/cpu/perfctr-watchdog.c
index d9f4ff8fcd69..14d45928c282 100644
--- a/arch/x86/kernel/cpu/perfctr-watchdog.c
+++ b/arch/x86/kernel/cpu/perfctr-watchdog.c
@@ -22,26 +22,6 @@
 #include <asm/apic.h>
 #include <asm/perf_event.h>
 
-struct nmi_watchdog_ctlblk {
-	unsigned int cccr_msr;
-	unsigned int perfctr_msr;  /* the MSR to reset in NMI handler */
-	unsigned int evntsel_msr;  /* the MSR to select the events to handle */
-};
-
-/* Interface defining a CPU specific perfctr watchdog */
-struct wd_ops {
-	int (*reserve)(void);
-	void (*unreserve)(void);
-	int (*setup)(unsigned nmi_hz);
-	void (*rearm)(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz);
-	void (*stop)(void);
-	unsigned perfctr;
-	unsigned evntsel;
-	u64 checkbit;
-};
-
-static const struct wd_ops *wd_ops;
-
 /*
  * this number is calculated from Intel's MSR_P4_CRU_ESCR5 register and it's
  * offset from MSR_P4_BSU_ESCR0.
@@ -60,8 +40,6 @@ static const struct wd_ops *wd_ops;
 static DECLARE_BITMAP(perfctr_nmi_owner, NMI_MAX_COUNTER_BITS);
 static DECLARE_BITMAP(evntsel_nmi_owner, NMI_MAX_COUNTER_BITS);
 
-static DEFINE_PER_CPU(struct nmi_watchdog_ctlblk, nmi_watchdog_ctlblk);
-
 /* converts an msr to an appropriate reservation bit */
 static inline unsigned int nmi_perfctr_msr_to_bit(unsigned int msr)
 {
@@ -172,623 +150,3 @@ void release_evntsel_nmi(unsigned int msr)
 	clear_bit(counter, evntsel_nmi_owner);
 }
 EXPORT_SYMBOL(release_evntsel_nmi);
-
-void disable_lapic_nmi_watchdog(void)
-{
-	BUG_ON(nmi_watchdog != NMI_LOCAL_APIC);
-
-	if (atomic_read(&nmi_active) <= 0)
-		return;
-
-	on_each_cpu(stop_apic_nmi_watchdog, NULL, 1);
-
-	if (wd_ops)
-		wd_ops->unreserve();
-
-	BUG_ON(atomic_read(&nmi_active) != 0);
-}
-
-void enable_lapic_nmi_watchdog(void)
-{
-	BUG_ON(nmi_watchdog != NMI_LOCAL_APIC);
-
-	/* are we already enabled */
-	if (atomic_read(&nmi_active) != 0)
-		return;
-
-	/* are we lapic aware */
-	if (!wd_ops)
-		return;
-	if (!wd_ops->reserve()) {
-		printk(KERN_ERR "NMI watchdog: cannot reserve perfctrs\n");
-		return;
-	}
-
-	on_each_cpu(setup_apic_nmi_watchdog, NULL, 1);
-	touch_nmi_watchdog();
-}
-
-/*
- * Activate the NMI watchdog via the local APIC.
- */
-
-static unsigned int adjust_for_32bit_ctr(unsigned int hz)
-{
-	u64 counter_val;
-	unsigned int retval = hz;
-
-	/*
-	 * On Intel CPUs with P6/ARCH_PERFMON only 32 bits in the counter
-	 * are writable, with higher bits sign extending from bit 31.
-	 * So, we can only program the counter with 31 bit values and
-	 * 32nd bit should be 1, for 33.. to be 1.
-	 * Find the appropriate nmi_hz
-	 */
-	counter_val = (u64)cpu_khz * 1000;
-	do_div(counter_val, retval);
-	if (counter_val > 0x7fffffffULL) {
-		u64 count = (u64)cpu_khz * 1000;
-		do_div(count, 0x7fffffffUL);
-		retval = count + 1;
-	}
-	return retval;
-}
-
-static void write_watchdog_counter(unsigned int perfctr_msr,
-				const char *descr, unsigned nmi_hz)
-{
-	u64 count = (u64)cpu_khz * 1000;
-
-	do_div(count, nmi_hz);
-	if (descr)
-		pr_debug("setting %s to -0x%08Lx\n", descr, count);
-	wrmsrl(perfctr_msr, 0 - count);
-}
-
-static void write_watchdog_counter32(unsigned int perfctr_msr,
-				const char *descr, unsigned nmi_hz)
-{
-	u64 count = (u64)cpu_khz * 1000;
-
-	do_div(count, nmi_hz);
-	if (descr)
-		pr_debug("setting %s to -0x%08Lx\n", descr, count);
-	wrmsr(perfctr_msr, (u32)(-count), 0);
-}
-
-/*
- * AMD K7/K8/Family10h/Family11h support.
- * AMD keeps this interface nicely stable so there is not much variety
- */
-#define K7_EVNTSEL_ENABLE	(1 << 22)
-#define K7_EVNTSEL_INT		(1 << 20)
-#define K7_EVNTSEL_OS		(1 << 17)
-#define K7_EVNTSEL_USR		(1 << 16)
-#define K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING	0x76
-#define K7_NMI_EVENT		K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING
-
-static int setup_k7_watchdog(unsigned nmi_hz)
-{
-	unsigned int perfctr_msr, evntsel_msr;
-	unsigned int evntsel;
-	struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
-
-	perfctr_msr = wd_ops->perfctr;
-	evntsel_msr = wd_ops->evntsel;
-
-	wrmsrl(perfctr_msr, 0UL);
-
-	evntsel = K7_EVNTSEL_INT
-		| K7_EVNTSEL_OS
-		| K7_EVNTSEL_USR
-		| K7_NMI_EVENT;
-
-	/* setup the timer */
-	wrmsr(evntsel_msr, evntsel, 0);
-	write_watchdog_counter(perfctr_msr, "K7_PERFCTR0", nmi_hz);
-
-	/* initialize the wd struct before enabling */
-	wd->perfctr_msr = perfctr_msr;
-	wd->evntsel_msr = evntsel_msr;
-	wd->cccr_msr = 0;  /* unused */
-
-	/* ok, everything is initialized, announce that we're set */
-	cpu_nmi_set_wd_enabled();
-
-	apic_write(APIC_LVTPC, APIC_DM_NMI);
-	evntsel |= K7_EVNTSEL_ENABLE;
-	wrmsr(evntsel_msr, evntsel, 0);
-
-	return 1;
-}
-
-static void single_msr_stop_watchdog(void)
-{
-	struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
-
-	wrmsr(wd->evntsel_msr, 0, 0);
-}
-
-static int single_msr_reserve(void)
-{
-	if (!reserve_perfctr_nmi(wd_ops->perfctr))
-		return 0;
-
-	if (!reserve_evntsel_nmi(wd_ops->evntsel)) {
-		release_perfctr_nmi(wd_ops->perfctr);
-		return 0;
-	}
-	return 1;
-}
-
-static void single_msr_unreserve(void)
-{
-	release_evntsel_nmi(wd_ops->evntsel);
-	release_perfctr_nmi(wd_ops->perfctr);
-}
-
-static void __kprobes
-single_msr_rearm(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz)
-{
-	/* start the cycle over again */
-	write_watchdog_counter(wd->perfctr_msr, NULL, nmi_hz);
-}
-
-static const struct wd_ops k7_wd_ops = {
-	.reserve	= single_msr_reserve,
-	.unreserve	= single_msr_unreserve,
-	.setup		= setup_k7_watchdog,
-	.rearm		= single_msr_rearm,
-	.stop		= single_msr_stop_watchdog,
-	.perfctr	= MSR_K7_PERFCTR0,
-	.evntsel	= MSR_K7_EVNTSEL0,
-	.checkbit	= 1ULL << 47,
-};
-
-/*
- * Intel Model 6 (PPro+,P2,P3,P-M,Core1)
- */
-#define P6_EVNTSEL0_ENABLE	(1 << 22)
-#define P6_EVNTSEL_INT		(1 << 20)
-#define P6_EVNTSEL_OS		(1 << 17)
-#define P6_EVNTSEL_USR		(1 << 16)
-#define P6_EVENT_CPU_CLOCKS_NOT_HALTED	0x79
-#define P6_NMI_EVENT		P6_EVENT_CPU_CLOCKS_NOT_HALTED
-
-static int setup_p6_watchdog(unsigned nmi_hz)
-{
-	unsigned int perfctr_msr, evntsel_msr;
-	unsigned int evntsel;
-	struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
-
-	perfctr_msr = wd_ops->perfctr;
-	evntsel_msr = wd_ops->evntsel;
-
-	/* KVM doesn't implement this MSR */
-	if (wrmsr_safe(perfctr_msr, 0, 0) < 0)
-		return 0;
-
-	evntsel = P6_EVNTSEL_INT
-		| P6_EVNTSEL_OS
-		| P6_EVNTSEL_USR
-		| P6_NMI_EVENT;
-
-	/* setup the timer */
-	wrmsr(evntsel_msr, evntsel, 0);
-	nmi_hz = adjust_for_32bit_ctr(nmi_hz);
-	write_watchdog_counter32(perfctr_msr, "P6_PERFCTR0", nmi_hz);
-
-	/* initialize the wd struct before enabling */
-	wd->perfctr_msr = perfctr_msr;
-	wd->evntsel_msr = evntsel_msr;
-	wd->cccr_msr = 0;  /* unused */
-
-	/* ok, everything is initialized, announce that we're set */
-	cpu_nmi_set_wd_enabled();
-
-	apic_write(APIC_LVTPC, APIC_DM_NMI);
-	evntsel |= P6_EVNTSEL0_ENABLE;
-	wrmsr(evntsel_msr, evntsel, 0);
-
-	return 1;
-}
-
-static void __kprobes p6_rearm(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz)
-{
-	/*
-	 * P6 based Pentium M need to re-unmask
-	 * the apic vector but it doesn't hurt
-	 * other P6 variant.
-	 * ArchPerfom/Core Duo also needs this
-	 */
-	apic_write(APIC_LVTPC, APIC_DM_NMI);
-
-	/* P6/ARCH_PERFMON has 32 bit counter write */
-	write_watchdog_counter32(wd->perfctr_msr, NULL, nmi_hz);
-}
-
-static const struct wd_ops p6_wd_ops = {
-	.reserve	= single_msr_reserve,
-	.unreserve	= single_msr_unreserve,
-	.setup		= setup_p6_watchdog,
-	.rearm		= p6_rearm,
-	.stop		= single_msr_stop_watchdog,
-	.perfctr	= MSR_P6_PERFCTR0,
-	.evntsel	= MSR_P6_EVNTSEL0,
-	.checkbit	= 1ULL << 39,
-};
-
-/*
- * Intel P4 performance counters.
- * By far the most complicated of all.
- */
-#define MSR_P4_MISC_ENABLE_PERF_AVAIL	(1 << 7)
-#define P4_ESCR_EVENT_SELECT(N)	((N) << 25)
-#define P4_ESCR_OS		(1 << 3)
-#define P4_ESCR_USR		(1 << 2)
-#define P4_CCCR_OVF_PMI0	(1 << 26)
-#define P4_CCCR_OVF_PMI1	(1 << 27)
-#define P4_CCCR_THRESHOLD(N)	((N) << 20)
-#define P4_CCCR_COMPLEMENT	(1 << 19)
-#define P4_CCCR_COMPARE		(1 << 18)
-#define P4_CCCR_REQUIRED	(3 << 16)
-#define P4_CCCR_ESCR_SELECT(N)	((N) << 13)
-#define P4_CCCR_ENABLE		(1 << 12)
-#define P4_CCCR_OVF 		(1 << 31)
-
-#define P4_CONTROLS 18
-static unsigned int p4_controls[18] = {
-	MSR_P4_BPU_CCCR0,
-	MSR_P4_BPU_CCCR1,
-	MSR_P4_BPU_CCCR2,
-	MSR_P4_BPU_CCCR3,
-	MSR_P4_MS_CCCR0,
-	MSR_P4_MS_CCCR1,
-	MSR_P4_MS_CCCR2,
-	MSR_P4_MS_CCCR3,
-	MSR_P4_FLAME_CCCR0,
-	MSR_P4_FLAME_CCCR1,
-	MSR_P4_FLAME_CCCR2,
-	MSR_P4_FLAME_CCCR3,
-	MSR_P4_IQ_CCCR0,
-	MSR_P4_IQ_CCCR1,
-	MSR_P4_IQ_CCCR2,
-	MSR_P4_IQ_CCCR3,
-	MSR_P4_IQ_CCCR4,
-	MSR_P4_IQ_CCCR5,
-};
-/*
- * Set up IQ_COUNTER0 to behave like a clock, by having IQ_CCCR0 filter
- * CRU_ESCR0 (with any non-null event selector) through a complemented
- * max threshold. [IA32-Vol3, Section 14.9.9]
- */
-static int setup_p4_watchdog(unsigned nmi_hz)
-{
-	unsigned int perfctr_msr, evntsel_msr, cccr_msr;
-	unsigned int evntsel, cccr_val;
-	unsigned int misc_enable, dummy;
-	unsigned int ht_num;
-	struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
-
-	rdmsr(MSR_IA32_MISC_ENABLE, misc_enable, dummy);
-	if (!(misc_enable & MSR_P4_MISC_ENABLE_PERF_AVAIL))
-		return 0;
-
-#ifdef CONFIG_SMP
-	/* detect which hyperthread we are on */
-	if (smp_num_siblings == 2) {
-		unsigned int ebx, apicid;
-
-		ebx = cpuid_ebx(1);
-		apicid = (ebx >> 24) & 0xff;
-		ht_num = apicid & 1;
-	} else
-#endif
-		ht_num = 0;
-
-	/*
-	 * performance counters are shared resources
-	 * assign each hyperthread its own set
-	 * (re-use the ESCR0 register, seems safe
-	 * and keeps the cccr_val the same)
-	 */
-	if (!ht_num) {
-		/* logical cpu 0 */
-		perfctr_msr = MSR_P4_IQ_PERFCTR0;
-		evntsel_msr = MSR_P4_CRU_ESCR0;
-		cccr_msr = MSR_P4_IQ_CCCR0;
-		cccr_val = P4_CCCR_OVF_PMI0 | P4_CCCR_ESCR_SELECT(4);
-
-		/*
-		 * If we're on the kdump kernel or other situation, we may
-		 * still have other performance counter registers set to
-		 * interrupt and they'll keep interrupting forever because
-		 * of the P4_CCCR_OVF quirk. So we need to ACK all the
-		 * pending interrupts and disable all the registers here,
-		 * before reenabling the NMI delivery. Refer to p4_rearm()
-		 * about the P4_CCCR_OVF quirk.
-		 */
-		if (reset_devices) {
-			unsigned int low, high;
-			int i;
-
-			for (i = 0; i < P4_CONTROLS; i++) {
-				rdmsr(p4_controls[i], low, high);
-				low &= ~(P4_CCCR_ENABLE | P4_CCCR_OVF);
-				wrmsr(p4_controls[i], low, high);
-			}
-		}
-	} else {
-		/* logical cpu 1 */
-		perfctr_msr = MSR_P4_IQ_PERFCTR1;
-		evntsel_msr = MSR_P4_CRU_ESCR0;
-		cccr_msr = MSR_P4_IQ_CCCR1;
-
-		/* Pentium 4 D processors don't support P4_CCCR_OVF_PMI1 */
-		if (boot_cpu_data.x86_model == 4 && boot_cpu_data.x86_mask == 4)
-			cccr_val = P4_CCCR_OVF_PMI0;
-		else
-			cccr_val = P4_CCCR_OVF_PMI1;
-		cccr_val |= P4_CCCR_ESCR_SELECT(4);
-	}
-
-	evntsel = P4_ESCR_EVENT_SELECT(0x3F)
-		| P4_ESCR_OS
-		| P4_ESCR_USR;
-
-	cccr_val |= P4_CCCR_THRESHOLD(15)
-		 | P4_CCCR_COMPLEMENT
-		 | P4_CCCR_COMPARE
-		 | P4_CCCR_REQUIRED;
-
-	wrmsr(evntsel_msr, evntsel, 0);
-	wrmsr(cccr_msr, cccr_val, 0);
-	write_watchdog_counter(perfctr_msr, "P4_IQ_COUNTER0", nmi_hz);
-
-	wd->perfctr_msr = perfctr_msr;
-	wd->evntsel_msr = evntsel_msr;
-	wd->cccr_msr = cccr_msr;
-
-	/* ok, everything is initialized, announce that we're set */
-	cpu_nmi_set_wd_enabled();
-
-	apic_write(APIC_LVTPC, APIC_DM_NMI);
-	cccr_val |= P4_CCCR_ENABLE;
-	wrmsr(cccr_msr, cccr_val, 0);
-	return 1;
-}
-
-static void stop_p4_watchdog(void)
-{
-	struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
-	wrmsr(wd->cccr_msr, 0, 0);
-	wrmsr(wd->evntsel_msr, 0, 0);
-}
-
-static int p4_reserve(void)
-{
-	if (!reserve_perfctr_nmi(MSR_P4_IQ_PERFCTR0))
-		return 0;
-#ifdef CONFIG_SMP
-	if (smp_num_siblings > 1 && !reserve_perfctr_nmi(MSR_P4_IQ_PERFCTR1))
-		goto fail1;
-#endif
-	if (!reserve_evntsel_nmi(MSR_P4_CRU_ESCR0))
-		goto fail2;
-	/* RED-PEN why is ESCR1 not reserved here? */
-	return 1;
- fail2:
-#ifdef CONFIG_SMP
-	if (smp_num_siblings > 1)
-		release_perfctr_nmi(MSR_P4_IQ_PERFCTR1);
- fail1:
-#endif
-	release_perfctr_nmi(MSR_P4_IQ_PERFCTR0);
-	return 0;
-}
-
-static void p4_unreserve(void)
-{
-#ifdef CONFIG_SMP
-	if (smp_num_siblings > 1)
-		release_perfctr_nmi(MSR_P4_IQ_PERFCTR1);
-#endif
-	release_evntsel_nmi(MSR_P4_CRU_ESCR0);
-	release_perfctr_nmi(MSR_P4_IQ_PERFCTR0);
-}
-
-static void __kprobes p4_rearm(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz)
-{
-	unsigned dummy;
-	/*
-	 * P4 quirks:
-	 * - An overflown perfctr will assert its interrupt
-	 *   until the OVF flag in its CCCR is cleared.
-	 * - LVTPC is masked on interrupt and must be
-	 *   unmasked by the LVTPC handler.
-	 */
-	rdmsrl(wd->cccr_msr, dummy);
-	dummy &= ~P4_CCCR_OVF;
-	wrmsrl(wd->cccr_msr, dummy);
-	apic_write(APIC_LVTPC, APIC_DM_NMI);
-	/* start the cycle over again */
-	write_watchdog_counter(wd->perfctr_msr, NULL, nmi_hz);
-}
-
-static const struct wd_ops p4_wd_ops = {
-	.reserve	= p4_reserve,
-	.unreserve	= p4_unreserve,
-	.setup		= setup_p4_watchdog,
-	.rearm		= p4_rearm,
-	.stop		= stop_p4_watchdog,
-	/* RED-PEN this is wrong for the other sibling */
-	.perfctr	= MSR_P4_BPU_PERFCTR0,
-	.evntsel	= MSR_P4_BSU_ESCR0,
-	.checkbit	= 1ULL << 39,
-};
-
-/*
- * Watchdog using the Intel architected PerfMon.
- * Used for Core2 and hopefully all future Intel CPUs.
- */
-#define ARCH_PERFMON_NMI_EVENT_SEL	ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL
-#define ARCH_PERFMON_NMI_EVENT_UMASK	ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK
-
-static struct wd_ops intel_arch_wd_ops;
-
-static int setup_intel_arch_watchdog(unsigned nmi_hz)
-{
-	unsigned int ebx;
-	union cpuid10_eax eax;
-	unsigned int unused;
-	unsigned int perfctr_msr, evntsel_msr;
-	unsigned int evntsel;
-	struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
-
-	/*
-	 * Check whether the Architectural PerfMon supports
-	 * Unhalted Core Cycles Event or not.
-	 * NOTE: Corresponding bit = 0 in ebx indicates event present.
-	 */
-	cpuid(10, &(eax.full), &ebx, &unused, &unused);
-	if ((eax.split.mask_length <
-			(ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX+1)) ||
-	    (ebx & ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT))
-		return 0;
-
-	perfctr_msr = wd_ops->perfctr;
-	evntsel_msr = wd_ops->evntsel;
-
-	wrmsrl(perfctr_msr, 0UL);
-
-	evntsel = ARCH_PERFMON_EVENTSEL_INT
-		| ARCH_PERFMON_EVENTSEL_OS
-		| ARCH_PERFMON_EVENTSEL_USR
-		| ARCH_PERFMON_NMI_EVENT_SEL
-		| ARCH_PERFMON_NMI_EVENT_UMASK;
-
-	/* setup the timer */
-	wrmsr(evntsel_msr, evntsel, 0);
-	nmi_hz = adjust_for_32bit_ctr(nmi_hz);
-	write_watchdog_counter32(perfctr_msr, "INTEL_ARCH_PERFCTR0", nmi_hz);
-
-	wd->perfctr_msr = perfctr_msr;
-	wd->evntsel_msr = evntsel_msr;
-	wd->cccr_msr = 0;  /* unused */
-
-	/* ok, everything is initialized, announce that we're set */
-	cpu_nmi_set_wd_enabled();
-
-	apic_write(APIC_LVTPC, APIC_DM_NMI);
-	evntsel |= ARCH_PERFMON_EVENTSEL_ENABLE;
-	wrmsr(evntsel_msr, evntsel, 0);
-	intel_arch_wd_ops.checkbit = 1ULL << (eax.split.bit_width - 1);
-	return 1;
-}
-
-static struct wd_ops intel_arch_wd_ops __read_mostly = {
-	.reserve	= single_msr_reserve,
-	.unreserve	= single_msr_unreserve,
-	.setup		= setup_intel_arch_watchdog,
-	.rearm		= p6_rearm,
-	.stop		= single_msr_stop_watchdog,
-	.perfctr	= MSR_ARCH_PERFMON_PERFCTR1,
-	.evntsel	= MSR_ARCH_PERFMON_EVENTSEL1,
-};
-
-static void probe_nmi_watchdog(void)
-{
-	switch (boot_cpu_data.x86_vendor) {
-	case X86_VENDOR_AMD:
-		if (boot_cpu_data.x86 == 6 ||
-		    (boot_cpu_data.x86 >= 0xf && boot_cpu_data.x86 <= 0x15))
-			wd_ops = &k7_wd_ops;
-		return;
-	case X86_VENDOR_INTEL:
-		/* Work around where perfctr1 doesn't have a working enable
-		 * bit as described in the following errata:
-		 * AE49 Core Duo and Intel Core Solo 65 nm
-		 * AN49 Intel Pentium Dual-Core
-		 * AF49 Dual-Core Intel Xeon Processor LV
-		 */
-		if ((boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model == 14) ||
-		    ((boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model == 15 &&
-		     boot_cpu_data.x86_mask == 4))) {
-			intel_arch_wd_ops.perfctr = MSR_ARCH_PERFMON_PERFCTR0;
-			intel_arch_wd_ops.evntsel = MSR_ARCH_PERFMON_EVENTSEL0;
-		}
-		if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) {
-			wd_ops = &intel_arch_wd_ops;
-			break;
-		}
-		switch (boot_cpu_data.x86) {
-		case 6:
-			if (boot_cpu_data.x86_model > 13)
-				return;
-
-			wd_ops = &p6_wd_ops;
-			break;
-		case 15:
-			wd_ops = &p4_wd_ops;
-			break;
-		default:
-			return;
-		}
-		break;
-	}
-}
-
-/* Interface to nmi.c */
-
-int lapic_watchdog_init(unsigned nmi_hz)
-{
-	if (!wd_ops) {
-		probe_nmi_watchdog();
-		if (!wd_ops) {
-			printk(KERN_INFO "NMI watchdog: CPU not supported\n");
-			return -1;
-		}
-
-		if (!wd_ops->reserve()) {
-			printk(KERN_ERR
-				"NMI watchdog: cannot reserve perfctrs\n");
-			return -1;
-		}
-	}
-
-	if (!(wd_ops->setup(nmi_hz))) {
-		printk(KERN_ERR "Cannot setup NMI watchdog on CPU %d\n",
-		       raw_smp_processor_id());
-		return -1;
-	}
-
-	return 0;
-}
-
-void lapic_watchdog_stop(void)
-{
-	if (wd_ops)
-		wd_ops->stop();
-}
-
-unsigned lapic_adjust_nmi_hz(unsigned hz)
-{
-	struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
-	if (wd->perfctr_msr == MSR_P6_PERFCTR0 ||
-	    wd->perfctr_msr == MSR_ARCH_PERFMON_PERFCTR1)
-		hz = adjust_for_32bit_ctr(hz);
-	return hz;
-}
-
-int __kprobes lapic_wd_event(unsigned nmi_hz)
-{
-	struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
-	u64 ctr;
-
-	rdmsrl(wd->perfctr_msr, ctr);
-	if (ctr & wd_ops->checkbit) /* perfctr still running? */
-		return 0;
-
-	wd_ops->rearm(wd, nmi_hz);
-	return 1;
-}
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 083e99d1b7df..f0a0624eea55 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -316,12 +316,6 @@ notrace static void __cpuinit start_secondary(void *unused)
 	 */
 	check_tsc_sync_target();
 
-	if (nmi_watchdog == NMI_IO_APIC) {
-		legacy_pic->mask(0);
-		enable_NMI_through_LVT0();
-		legacy_pic->unmask(0);
-	}
-
 	/* This must be done before setting cpu_online_mask */
 	set_cpu_sibling_map(raw_smp_processor_id());
 	wmb();
@@ -1061,8 +1055,6 @@ static int __init smp_sanity_check(unsigned max_cpus)
 		printk(KERN_INFO "SMP mode deactivated.\n");
 		smpboot_clear_io_apic();
 
-		localise_nmi_watchdog();
-
 		connect_bsp_APIC();
 		setup_local_APIC();
 		end_local_APIC_setup();
@@ -1196,7 +1188,6 @@ void __init native_smp_cpus_done(unsigned int max_cpus)
 #ifdef CONFIG_X86_IO_APIC
 	setup_ioapic_dest();
 #endif
-	check_nmi_watchdog();
 	mtrr_aps_init();
 }
 
@@ -1341,8 +1332,6 @@ int native_cpu_disable(void)
 	if (cpu == 0)
 		return -EBUSY;
 
-	if (nmi_watchdog == NMI_LOCAL_APIC)
-		stop_apic_nmi_watchdog(NULL);
 	clear_local_APIC();
 
 	cpu_disable_common();
diff --git a/arch/x86/kernel/time.c b/arch/x86/kernel/time.c
index fb5cc5e14cfa..25a28a245937 100644
--- a/arch/x86/kernel/time.c
+++ b/arch/x86/kernel/time.c
@@ -22,10 +22,6 @@
 #include <asm/hpet.h>
 #include <asm/time.h>
 
-#if defined(CONFIG_X86_32) && defined(CONFIG_X86_IO_APIC)
-int timer_ack;
-#endif
-
 #ifdef CONFIG_X86_64
 volatile unsigned long __jiffies __section_jiffies = INITIAL_JIFFIES;
 #endif
@@ -63,20 +59,6 @@ static irqreturn_t timer_interrupt(int irq, void *dev_id)
 	/* Keep nmi watchdog up to date */
 	inc_irq_stat(irq0_irqs);
 
-	/* Optimized out for !IO_APIC and x86_64 */
-	if (timer_ack) {
-		/*
-		 * Subtle, when I/O APICs are used we have to ack timer IRQ
-		 * manually to deassert NMI lines for the watchdog if run
-		 * on an 82489DX-based system.
-		 */
-		raw_spin_lock(&i8259A_lock);
-		outb(0x0c, PIC_MASTER_OCW3);
-		/* Ack the IRQ; AEOI will end it automatically. */
-		inb(PIC_MASTER_POLL);
-		raw_spin_unlock(&i8259A_lock);
-	}
-
 	global_clock_event->event_handler(global_clock_event);
 
 	/* MCA bus quirk: Acknowledge irq0 by setting bit 7 in port 0x61 */
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index db30d9cb9dd6..f02c179c2552 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -437,14 +437,12 @@ do_nmi(struct pt_regs *regs, long error_code)
 
 void stop_nmi(void)
 {
-	acpi_nmi_disable();
 	ignore_nmis++;
 }
 
 void restart_nmi(void)
 {
 	ignore_nmis--;
-	acpi_nmi_enable();
 }
 
 /* May run on IST stack. */
diff --git a/arch/x86/oprofile/nmi_timer_int.c b/arch/x86/oprofile/nmi_timer_int.c
index e3ecb71b5790..0636dd93cef8 100644
--- a/arch/x86/oprofile/nmi_timer_int.c
+++ b/arch/x86/oprofile/nmi_timer_int.c
@@ -58,9 +58,6 @@ static void timer_stop(void)
 
 int __init op_nmi_timer_init(struct oprofile_operations *ops)
 {
-	if ((nmi_watchdog != NMI_IO_APIC) || (atomic_read(&nmi_active) <= 0))
-		return -ENODEV;
-
 	ops->start = timer_start;
 	ops->stop = timer_stop;
 	ops->cpu_type = "timer";
diff --git a/drivers/acpi/acpica/nsinit.c b/drivers/acpi/acpica/nsinit.c
index 660a2728908d..0cac7ec0d2ec 100644
--- a/drivers/acpi/acpica/nsinit.c
+++ b/drivers/acpi/acpica/nsinit.c
@@ -577,9 +577,7 @@ acpi_ns_init_one_device(acpi_handle obj_handle,
 	 * as possible (without an NMI being received in the middle of
 	 * this) - so disable NMIs and initialize the device:
 	 */
-	acpi_nmi_disable();
 	status = acpi_ns_evaluate(info);
-	acpi_nmi_enable();
 
 	if (ACPI_SUCCESS(status)) {
 		walk_info->num_INI++;
diff --git a/drivers/watchdog/hpwdt.c b/drivers/watchdog/hpwdt.c
index 3d77116e4634..c19f4a20794a 100644
--- a/drivers/watchdog/hpwdt.c
+++ b/drivers/watchdog/hpwdt.c
@@ -649,12 +649,7 @@ static void __devinit hpwdt_check_nmi_decoding(struct pci_dev *dev)
 	 * If nmi_watchdog is turned off then we can turn on
 	 * our nmi decoding capability.
 	 */
-	if (!nmi_watchdog_active())
-		hpwdt_nmi_decoding = 1;
-	else
-		dev_warn(&dev->dev, "NMI decoding is disabled. To enable this "
-			"functionality you must reboot with nmi_watchdog=0 "
-			"and load the hpwdt driver with priority=1.\n");
+	hpwdt_nmi_decoding = 1;
 }
 #else
 static void __devinit hpwdt_check_nmi_decoding(struct pci_dev *dev)
diff --git a/include/linux/nmi.h b/include/linux/nmi.h
index 0cb3e5c246d0..1c451e6ecc17 100644
--- a/include/linux/nmi.h
+++ b/include/linux/nmi.h
@@ -25,8 +25,6 @@ static inline void touch_nmi_watchdog(void)
 #else
 extern void touch_nmi_watchdog(void);
 #endif
-static inline void acpi_nmi_disable(void) { }
-static inline void acpi_nmi_enable(void) { }
 
 /*
  * Create trigger_all_cpu_backtrace() out of the arch-provided
-- 
cgit v1.2.3-71-gd317


From 48c5ccae88dcd989d9de507e8510313c6cbd352b Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Sat, 13 Nov 2010 19:32:29 +0100
Subject: sched: Simplify cpu-hot-unplug task migration

While discussing the need for sched_idle_next(), Oleg remarked that
since try_to_wake_up() ensures sleeping tasks will end up running on a
sane cpu, we can do away with migrate_live_tasks().

If we then extend the existing hack of migrating current from
CPU_DYING to migrating the full rq worth of tasks from CPU_DYING, the
need for the sched_idle_next() abomination disappears as well, since
idle will be the only possible thread left after the migration thread
stops.

This greatly simplifies the hot-unplug task migration path, as can be
seen from the resulting code reduction (and about half the new lines
are comments).

Suggested-by: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <1289851597.2109.547.camel@laptop>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/sched.h |   3 -
 kernel/cpu.c          |  16 ++--
 kernel/sched.c        | 206 +++++++++++++++-----------------------------------
 3 files changed, 67 insertions(+), 158 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 3cd70cf91fde..29d953abb5ad 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1871,14 +1871,11 @@ extern void sched_clock_idle_sleep_event(void);
 extern void sched_clock_idle_wakeup_event(u64 delta_ns);
 
 #ifdef CONFIG_HOTPLUG_CPU
-extern void move_task_off_dead_cpu(int dead_cpu, struct task_struct *p);
 extern void idle_task_exit(void);
 #else
 static inline void idle_task_exit(void) {}
 #endif
 
-extern void sched_idle_next(void);
-
 #if defined(CONFIG_NO_HZ) && defined(CONFIG_SMP)
 extern void wake_up_idle_cpu(int cpu);
 #else
diff --git a/kernel/cpu.c b/kernel/cpu.c
index f6e726f18491..8615aa65d927 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -189,7 +189,6 @@ static inline void check_for_tasks(int cpu)
 }
 
 struct take_cpu_down_param {
-	struct task_struct *caller;
 	unsigned long mod;
 	void *hcpu;
 };
@@ -208,11 +207,6 @@ static int __ref take_cpu_down(void *_param)
 
 	cpu_notify(CPU_DYING | param->mod, param->hcpu);
 
-	if (task_cpu(param->caller) == cpu)
-		move_task_off_dead_cpu(cpu, param->caller);
-	/* Force idle task to run as soon as we yield: it should
-	   immediately notice cpu is offline and die quickly. */
-	sched_idle_next();
 	return 0;
 }
 
@@ -223,7 +217,6 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen)
 	void *hcpu = (void *)(long)cpu;
 	unsigned long mod = tasks_frozen ? CPU_TASKS_FROZEN : 0;
 	struct take_cpu_down_param tcd_param = {
-		.caller = current,
 		.mod = mod,
 		.hcpu = hcpu,
 	};
@@ -253,9 +246,12 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen)
 	}
 	BUG_ON(cpu_online(cpu));
 
-	/* Wait for it to sleep (leaving idle task). */
-	while (!idle_cpu(cpu))
-		yield();
+	/*
+	 * The migration_call() CPU_DYING callback will have removed all
+	 * runnable tasks from the cpu, there's only the idle task left now
+	 * that the migration thread is done doing the stop_machine thing.
+	 */
+	BUG_ON(!idle_cpu(cpu));
 
 	/* This actually kills the CPU. */
 	__cpu_die(cpu);
diff --git a/kernel/sched.c b/kernel/sched.c
index 41f18695b730..b0d5f1b24a39 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -2366,18 +2366,15 @@ static int select_fallback_rq(int cpu, struct task_struct *p)
 		return dest_cpu;
 
 	/* No more Mr. Nice Guy. */
-	if (unlikely(dest_cpu >= nr_cpu_ids)) {
-		dest_cpu = cpuset_cpus_allowed_fallback(p);
-		/*
-		 * Don't tell them about moving exiting tasks or
-		 * kernel threads (both mm NULL), since they never
-		 * leave kernel.
-		 */
-		if (p->mm && printk_ratelimit()) {
-			printk(KERN_INFO "process %d (%s) no "
-			       "longer affine to cpu%d\n",
-			       task_pid_nr(p), p->comm, cpu);
-		}
+	dest_cpu = cpuset_cpus_allowed_fallback(p);
+	/*
+	 * Don't tell them about moving exiting tasks or
+	 * kernel threads (both mm NULL), since they never
+	 * leave kernel.
+	 */
+	if (p->mm && printk_ratelimit()) {
+		printk(KERN_INFO "process %d (%s) no longer affine to cpu%d\n",
+				task_pid_nr(p), p->comm, cpu);
 	}
 
 	return dest_cpu;
@@ -5712,29 +5709,20 @@ static int migration_cpu_stop(void *data)
 }
 
 #ifdef CONFIG_HOTPLUG_CPU
+
 /*
- * Figure out where task on dead CPU should go, use force if necessary.
+ * Ensures that the idle task is using init_mm right before its cpu goes
+ * offline.
  */
-void move_task_off_dead_cpu(int dead_cpu, struct task_struct *p)
+void idle_task_exit(void)
 {
-	struct rq *rq = cpu_rq(dead_cpu);
-	int needs_cpu, uninitialized_var(dest_cpu);
-	unsigned long flags;
+	struct mm_struct *mm = current->active_mm;
 
-	local_irq_save(flags);
+	BUG_ON(cpu_online(smp_processor_id()));
 
-	raw_spin_lock(&rq->lock);
-	needs_cpu = (task_cpu(p) == dead_cpu) && (p->state != TASK_WAKING);
-	if (needs_cpu)
-		dest_cpu = select_fallback_rq(dead_cpu, p);
-	raw_spin_unlock(&rq->lock);
-	/*
-	 * It can only fail if we race with set_cpus_allowed(),
-	 * in the racer should migrate the task anyway.
-	 */
-	if (needs_cpu)
-		__migrate_task(p, dead_cpu, dest_cpu);
-	local_irq_restore(flags);
+	if (mm != &init_mm)
+		switch_mm(mm, &init_mm, current);
+	mmdrop(mm);
 }
 
 /*
@@ -5747,128 +5735,69 @@ void move_task_off_dead_cpu(int dead_cpu, struct task_struct *p)
 static void migrate_nr_uninterruptible(struct rq *rq_src)
 {
 	struct rq *rq_dest = cpu_rq(cpumask_any(cpu_active_mask));
-	unsigned long flags;
 
-	local_irq_save(flags);
-	double_rq_lock(rq_src, rq_dest);
 	rq_dest->nr_uninterruptible += rq_src->nr_uninterruptible;
 	rq_src->nr_uninterruptible = 0;
-	double_rq_unlock(rq_src, rq_dest);
-	local_irq_restore(flags);
-}
-
-/* Run through task list and migrate tasks from the dead cpu. */
-static void migrate_live_tasks(int src_cpu)
-{
-	struct task_struct *p, *t;
-
-	read_lock(&tasklist_lock);
-
-	do_each_thread(t, p) {
-		if (p == current)
-			continue;
-
-		if (task_cpu(p) == src_cpu)
-			move_task_off_dead_cpu(src_cpu, p);
-	} while_each_thread(t, p);
-
-	read_unlock(&tasklist_lock);
 }
 
 /*
- * Schedules idle task to be the next runnable task on current CPU.
- * It does so by boosting its priority to highest possible.
- * Used by CPU offline code.
+ * remove the tasks which were accounted by rq from calc_load_tasks.
  */
-void sched_idle_next(void)
+static void calc_global_load_remove(struct rq *rq)
 {
-	int this_cpu = smp_processor_id();
-	struct rq *rq = cpu_rq(this_cpu);
-	struct task_struct *p = rq->idle;
-	unsigned long flags;
-
-	/* cpu has to be offline */
-	BUG_ON(cpu_online(this_cpu));
-
-	/*
-	 * Strictly not necessary since rest of the CPUs are stopped by now
-	 * and interrupts disabled on the current cpu.
-	 */
-	raw_spin_lock_irqsave(&rq->lock, flags);
-
-	__setscheduler(rq, p, SCHED_FIFO, MAX_RT_PRIO-1);
-
-	activate_task(rq, p, 0);
-
-	raw_spin_unlock_irqrestore(&rq->lock, flags);
+	atomic_long_sub(rq->calc_load_active, &calc_load_tasks);
+	rq->calc_load_active = 0;
 }
 
 /*
- * Ensures that the idle task is using init_mm right before its cpu goes
- * offline.
+ * Migrate all tasks from the rq, sleeping tasks will be migrated by
+ * try_to_wake_up()->select_task_rq().
+ *
+ * Called with rq->lock held even though we'er in stop_machine() and
+ * there's no concurrency possible, we hold the required locks anyway
+ * because of lock validation efforts.
  */
-void idle_task_exit(void)
-{
-	struct mm_struct *mm = current->active_mm;
-
-	BUG_ON(cpu_online(smp_processor_id()));
-
-	if (mm != &init_mm)
-		switch_mm(mm, &init_mm, current);
-	mmdrop(mm);
-}
-
-/* called under rq->lock with disabled interrupts */
-static void migrate_dead(unsigned int dead_cpu, struct task_struct *p)
+static void migrate_tasks(unsigned int dead_cpu)
 {
 	struct rq *rq = cpu_rq(dead_cpu);
-
-	/* Must be exiting, otherwise would be on tasklist. */
-	BUG_ON(!p->exit_state);
-
-	/* Cannot have done final schedule yet: would have vanished. */
-	BUG_ON(p->state == TASK_DEAD);
-
-	get_task_struct(p);
+	struct task_struct *next, *stop = rq->stop;
+	int dest_cpu;
 
 	/*
-	 * Drop lock around migration; if someone else moves it,
-	 * that's OK. No task can be added to this CPU, so iteration is
-	 * fine.
+	 * Fudge the rq selection such that the below task selection loop
+	 * doesn't get stuck on the currently eligible stop task.
+	 *
+	 * We're currently inside stop_machine() and the rq is either stuck
+	 * in the stop_machine_cpu_stop() loop, or we're executing this code,
+	 * either way we should never end up calling schedule() until we're
+	 * done here.
 	 */
-	raw_spin_unlock_irq(&rq->lock);
-	move_task_off_dead_cpu(dead_cpu, p);
-	raw_spin_lock_irq(&rq->lock);
-
-	put_task_struct(p);
-}
-
-/* release_task() removes task from tasklist, so we won't find dead tasks. */
-static void migrate_dead_tasks(unsigned int dead_cpu)
-{
-	struct rq *rq = cpu_rq(dead_cpu);
-	struct task_struct *next;
+	rq->stop = NULL;
 
 	for ( ; ; ) {
-		if (!rq->nr_running)
+		/*
+		 * There's this thread running, bail when that's the only
+		 * remaining thread.
+		 */
+		if (rq->nr_running == 1)
 			break;
+
 		next = pick_next_task(rq);
-		if (!next)
-			break;
+		BUG_ON(!next);
 		next->sched_class->put_prev_task(rq, next);
-		migrate_dead(dead_cpu, next);
 
+		/* Find suitable destination for @next, with force if needed. */
+		dest_cpu = select_fallback_rq(dead_cpu, next);
+		raw_spin_unlock(&rq->lock);
+
+		__migrate_task(next, dead_cpu, dest_cpu);
+
+		raw_spin_lock(&rq->lock);
 	}
-}
 
-/*
- * remove the tasks which were accounted by rq from calc_load_tasks.
- */
-static void calc_global_load_remove(struct rq *rq)
-{
-	atomic_long_sub(rq->calc_load_active, &calc_load_tasks);
-	rq->calc_load_active = 0;
+	rq->stop = stop;
 }
+
 #endif /* CONFIG_HOTPLUG_CPU */
 
 #if defined(CONFIG_SCHED_DEBUG) && defined(CONFIG_SYSCTL)
@@ -6078,15 +6007,13 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu)
 	unsigned long flags;
 	struct rq *rq = cpu_rq(cpu);
 
-	switch (action) {
+	switch (action & ~CPU_TASKS_FROZEN) {
 
 	case CPU_UP_PREPARE:
-	case CPU_UP_PREPARE_FROZEN:
 		rq->calc_load_update = calc_load_update;
 		break;
 
 	case CPU_ONLINE:
-	case CPU_ONLINE_FROZEN:
 		/* Update our root-domain */
 		raw_spin_lock_irqsave(&rq->lock, flags);
 		if (rq->rd) {
@@ -6098,30 +6025,19 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu)
 		break;
 
 #ifdef CONFIG_HOTPLUG_CPU
-	case CPU_DEAD:
-	case CPU_DEAD_FROZEN:
-		migrate_live_tasks(cpu);
-		/* Idle task back to normal (off runqueue, low prio) */
-		raw_spin_lock_irq(&rq->lock);
-		deactivate_task(rq, rq->idle, 0);
-		__setscheduler(rq, rq->idle, SCHED_NORMAL, 0);
-		rq->idle->sched_class = &idle_sched_class;
-		migrate_dead_tasks(cpu);
-		raw_spin_unlock_irq(&rq->lock);
-		migrate_nr_uninterruptible(rq);
-		BUG_ON(rq->nr_running != 0);
-		calc_global_load_remove(rq);
-		break;
-
 	case CPU_DYING:
-	case CPU_DYING_FROZEN:
 		/* Update our root-domain */
 		raw_spin_lock_irqsave(&rq->lock, flags);
 		if (rq->rd) {
 			BUG_ON(!cpumask_test_cpu(cpu, rq->rd->span));
 			set_rq_offline(rq);
 		}
+		migrate_tasks(cpu);
+		BUG_ON(rq->nr_running != 1); /* the migration thread */
 		raw_spin_unlock_irqrestore(&rq->lock, flags);
+
+		migrate_nr_uninterruptible(rq);
+		calc_global_load_remove(rq);
 		break;
 #endif
 	}
-- 
cgit v1.2.3-71-gd317


From 2069dd75c7d0f49355939e5586daf5a9ab216db7 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Mon, 15 Nov 2010 15:47:00 -0800
Subject: sched: Rewrite tg_shares_up)

By tracking a per-cpu load-avg for each cfs_rq and folding it into a
global task_group load on each tick we can rework tg_shares_up to be
strictly per-cpu.

This should improve cpu-cgroup performance for smp systems
significantly.

[ Paul: changed to use queueing cfs_rq + bug fixes ]

Signed-off-by: Paul Turner <pjt@google.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <20101115234937.580480400@google.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/sched.h   |   2 -
 kernel/sched.c          | 173 ++++++++++++------------------------------------
 kernel/sched_debug.c    |  15 +++--
 kernel/sched_fair.c     | 164 +++++++++++++++++++++++++++++----------------
 kernel/sched_features.h |   2 -
 kernel/sysctl.c         |  19 ------
 6 files changed, 162 insertions(+), 213 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 29d953abb5ad..8abb8aa59664 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1885,8 +1885,6 @@ static inline void wake_up_idle_cpu(int cpu) { }
 extern unsigned int sysctl_sched_latency;
 extern unsigned int sysctl_sched_min_granularity;
 extern unsigned int sysctl_sched_wakeup_granularity;
-extern unsigned int sysctl_sched_shares_ratelimit;
-extern unsigned int sysctl_sched_shares_thresh;
 extern unsigned int sysctl_sched_child_runs_first;
 
 enum sched_tunable_scaling {
diff --git a/kernel/sched.c b/kernel/sched.c
index b0d5f1b24a39..e2f1a3024a99 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -253,6 +253,8 @@ struct task_group {
 	/* runqueue "owned" by this group on each cpu */
 	struct cfs_rq **cfs_rq;
 	unsigned long shares;
+
+	atomic_t load_weight;
 #endif
 
 #ifdef CONFIG_RT_GROUP_SCHED
@@ -359,15 +361,11 @@ struct cfs_rq {
 	 */
 	unsigned long h_load;
 
-	/*
-	 * this cpu's part of tg->shares
-	 */
-	unsigned long shares;
+	u64 load_avg;
+	u64 load_period;
+	u64 load_stamp;
 
-	/*
-	 * load.weight at the time we set shares
-	 */
-	unsigned long rq_weight;
+	unsigned long load_contribution;
 #endif
 #endif
 };
@@ -806,20 +804,6 @@ late_initcall(sched_init_debug);
  */
 const_debug unsigned int sysctl_sched_nr_migrate = 32;
 
-/*
- * ratelimit for updating the group shares.
- * default: 0.25ms
- */
-unsigned int sysctl_sched_shares_ratelimit = 250000;
-unsigned int normalized_sysctl_sched_shares_ratelimit = 250000;
-
-/*
- * Inject some fuzzyness into changing the per-cpu group shares
- * this avoids remote rq-locks at the expense of fairness.
- * default: 4
- */
-unsigned int sysctl_sched_shares_thresh = 4;
-
 /*
  * period over which we average the RT time consumption, measured
  * in ms.
@@ -1369,6 +1353,12 @@ static inline void update_load_sub(struct load_weight *lw, unsigned long dec)
 	lw->inv_weight = 0;
 }
 
+static inline void update_load_set(struct load_weight *lw, unsigned long w)
+{
+	lw->weight = w;
+	lw->inv_weight = 0;
+}
+
 /*
  * To aid in avoiding the subversion of "niceness" due to uneven distribution
  * of tasks with abnormal "nice" values across CPUs the contribution that
@@ -1557,97 +1547,44 @@ static unsigned long cpu_avg_load_per_task(int cpu)
 
 #ifdef CONFIG_FAIR_GROUP_SCHED
 
-static __read_mostly unsigned long __percpu *update_shares_data;
-
-static void __set_se_shares(struct sched_entity *se, unsigned long shares);
-
-/*
- * Calculate and set the cpu's group shares.
- */
-static void update_group_shares_cpu(struct task_group *tg, int cpu,
-				    unsigned long sd_shares,
-				    unsigned long sd_rq_weight,
-				    unsigned long *usd_rq_weight)
-{
-	unsigned long shares, rq_weight;
-	int boost = 0;
-
-	rq_weight = usd_rq_weight[cpu];
-	if (!rq_weight) {
-		boost = 1;
-		rq_weight = NICE_0_LOAD;
-	}
-
-	/*
-	 *             \Sum_j shares_j * rq_weight_i
-	 * shares_i =  -----------------------------
-	 *                  \Sum_j rq_weight_j
-	 */
-	shares = (sd_shares * rq_weight) / sd_rq_weight;
-	shares = clamp_t(unsigned long, shares, MIN_SHARES, MAX_SHARES);
-
-	if (abs(shares - tg->se[cpu]->load.weight) >
-			sysctl_sched_shares_thresh) {
-		struct rq *rq = cpu_rq(cpu);
-		unsigned long flags;
-
-		raw_spin_lock_irqsave(&rq->lock, flags);
-		tg->cfs_rq[cpu]->rq_weight = boost ? 0 : rq_weight;
-		tg->cfs_rq[cpu]->shares = boost ? 0 : shares;
-		__set_se_shares(tg->se[cpu], shares);
-		raw_spin_unlock_irqrestore(&rq->lock, flags);
-	}
-}
+static void update_cfs_load(struct cfs_rq *cfs_rq);
+static void update_cfs_shares(struct cfs_rq *cfs_rq);
 
 /*
- * Re-compute the task group their per cpu shares over the given domain.
- * This needs to be done in a bottom-up fashion because the rq weight of a
- * parent group depends on the shares of its child groups.
+ * update tg->load_weight by folding this cpu's load_avg
  */
 static int tg_shares_up(struct task_group *tg, void *data)
 {
-	unsigned long weight, rq_weight = 0, sum_weight = 0, shares = 0;
-	unsigned long *usd_rq_weight;
-	struct sched_domain *sd = data;
+	long load_avg;
+	struct cfs_rq *cfs_rq;
 	unsigned long flags;
-	int i;
+	int cpu = (long)data;
+	struct rq *rq;
 
-	if (!tg->se[0])
+	if (!tg->se[cpu])
 		return 0;
 
-	local_irq_save(flags);
-	usd_rq_weight = per_cpu_ptr(update_shares_data, smp_processor_id());
-
-	for_each_cpu(i, sched_domain_span(sd)) {
-		weight = tg->cfs_rq[i]->load.weight;
-		usd_rq_weight[i] = weight;
-
-		rq_weight += weight;
-		/*
-		 * If there are currently no tasks on the cpu pretend there
-		 * is one of average load so that when a new task gets to
-		 * run here it will not get delayed by group starvation.
-		 */
-		if (!weight)
-			weight = NICE_0_LOAD;
+	rq = cpu_rq(cpu);
+	cfs_rq = tg->cfs_rq[cpu];
 
-		sum_weight += weight;
-		shares += tg->cfs_rq[i]->shares;
-	}
+	raw_spin_lock_irqsave(&rq->lock, flags);
 
-	if (!rq_weight)
-		rq_weight = sum_weight;
+	update_rq_clock(rq);
+	update_cfs_load(cfs_rq);
 
-	if ((!shares && rq_weight) || shares > tg->shares)
-		shares = tg->shares;
+	load_avg = div64_u64(cfs_rq->load_avg, cfs_rq->load_period+1);
+	load_avg -= cfs_rq->load_contribution;
 
-	if (!sd->parent || !(sd->parent->flags & SD_LOAD_BALANCE))
-		shares = tg->shares;
+	atomic_add(load_avg, &tg->load_weight);
+	cfs_rq->load_contribution += load_avg;
 
-	for_each_cpu(i, sched_domain_span(sd))
-		update_group_shares_cpu(tg, i, shares, rq_weight, usd_rq_weight);
+	/*
+	 * We need to update shares after updating tg->load_weight in
+	 * order to adjust the weight of groups with long running tasks.
+	 */
+	update_cfs_shares(cfs_rq);
 
-	local_irq_restore(flags);
+	raw_spin_unlock_irqrestore(&rq->lock, flags);
 
 	return 0;
 }
@@ -1666,7 +1603,7 @@ static int tg_load_down(struct task_group *tg, void *data)
 		load = cpu_rq(cpu)->load.weight;
 	} else {
 		load = tg->parent->cfs_rq[cpu]->h_load;
-		load *= tg->cfs_rq[cpu]->shares;
+		load *= tg->se[cpu]->load.weight;
 		load /= tg->parent->cfs_rq[cpu]->load.weight + 1;
 	}
 
@@ -1675,21 +1612,16 @@ static int tg_load_down(struct task_group *tg, void *data)
 	return 0;
 }
 
-static void update_shares(struct sched_domain *sd)
+static void update_shares(long cpu)
 {
-	s64 elapsed;
-	u64 now;
-
 	if (root_task_group_empty())
 		return;
 
-	now = local_clock();
-	elapsed = now - sd->last_update;
+	/*
+	 * XXX: replace with an on-demand list
+	 */
 
-	if (elapsed >= (s64)(u64)sysctl_sched_shares_ratelimit) {
-		sd->last_update = now;
-		walk_tg_tree(tg_nop, tg_shares_up, sd);
-	}
+	walk_tg_tree(tg_nop, tg_shares_up, (void *)cpu);
 }
 
 static void update_h_load(long cpu)
@@ -1699,7 +1631,7 @@ static void update_h_load(long cpu)
 
 #else
 
-static inline void update_shares(struct sched_domain *sd)
+static inline void update_shares(int cpu)
 {
 }
 
@@ -1824,15 +1756,6 @@ static void double_rq_unlock(struct rq *rq1, struct rq *rq2)
 
 #endif
 
-#ifdef CONFIG_FAIR_GROUP_SCHED
-static void cfs_rq_set_shares(struct cfs_rq *cfs_rq, unsigned long shares)
-{
-#ifdef CONFIG_SMP
-	cfs_rq->shares = shares;
-#endif
-}
-#endif
-
 static void calc_load_account_idle(struct rq *this_rq);
 static void update_sysctl(void);
 static int get_update_sysctl_factor(void);
@@ -5551,7 +5474,6 @@ static void update_sysctl(void)
 	SET_SYSCTL(sched_min_granularity);
 	SET_SYSCTL(sched_latency);
 	SET_SYSCTL(sched_wakeup_granularity);
-	SET_SYSCTL(sched_shares_ratelimit);
 #undef SET_SYSCTL
 }
 
@@ -7787,8 +7709,7 @@ static void init_tg_cfs_entry(struct task_group *tg, struct cfs_rq *cfs_rq,
 		se->cfs_rq = parent->my_q;
 
 	se->my_q = cfs_rq;
-	se->load.weight = tg->shares;
-	se->load.inv_weight = 0;
+	update_load_set(&se->load, tg->shares);
 	se->parent = parent;
 }
 #endif
@@ -7881,10 +7802,6 @@ void __init sched_init(void)
 
 #endif /* CONFIG_CGROUP_SCHED */
 
-#if defined CONFIG_FAIR_GROUP_SCHED && defined CONFIG_SMP
-	update_shares_data = __alloc_percpu(nr_cpu_ids * sizeof(unsigned long),
-					    __alignof__(unsigned long));
-#endif
 	for_each_possible_cpu(i) {
 		struct rq *rq;
 
@@ -8452,8 +8369,7 @@ static void __set_se_shares(struct sched_entity *se, unsigned long shares)
 	if (on_rq)
 		dequeue_entity(cfs_rq, se, 0);
 
-	se->load.weight = shares;
-	se->load.inv_weight = 0;
+	update_load_set(&se->load, shares);
 
 	if (on_rq)
 		enqueue_entity(cfs_rq, se, 0);
@@ -8510,7 +8426,6 @@ int sched_group_set_shares(struct task_group *tg, unsigned long shares)
 		/*
 		 * force a rebalance
 		 */
-		cfs_rq_set_shares(tg->cfs_rq[i], 0);
 		set_se_shares(tg->se[i], shares);
 	}
 
diff --git a/kernel/sched_debug.c b/kernel/sched_debug.c
index 2e1b0d17dd9b..e6590e7312e8 100644
--- a/kernel/sched_debug.c
+++ b/kernel/sched_debug.c
@@ -202,15 +202,22 @@ void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq)
 	spread0 = min_vruntime - rq0_min_vruntime;
 	SEQ_printf(m, "  .%-30s: %Ld.%06ld\n", "spread0",
 			SPLIT_NS(spread0));
-	SEQ_printf(m, "  .%-30s: %ld\n", "nr_running", cfs_rq->nr_running);
-	SEQ_printf(m, "  .%-30s: %ld\n", "load", cfs_rq->load.weight);
-
 	SEQ_printf(m, "  .%-30s: %d\n", "nr_spread_over",
 			cfs_rq->nr_spread_over);
+	SEQ_printf(m, "  .%-30s: %ld\n", "nr_running", cfs_rq->nr_running);
+	SEQ_printf(m, "  .%-30s: %ld\n", "load", cfs_rq->load.weight);
 #ifdef CONFIG_FAIR_GROUP_SCHED
 #ifdef CONFIG_SMP
-	SEQ_printf(m, "  .%-30s: %lu\n", "shares", cfs_rq->shares);
+	SEQ_printf(m, "  .%-30s: %Ld.%06ld\n", "load_avg",
+			SPLIT_NS(cfs_rq->load_avg));
+	SEQ_printf(m, "  .%-30s: %Ld.%06ld\n", "load_period",
+			SPLIT_NS(cfs_rq->load_period));
+	SEQ_printf(m, "  .%-30s: %ld\n", "load_contrib",
+			cfs_rq->load_contribution);
+	SEQ_printf(m, "  .%-30s: %d\n", "load_tg",
+			atomic_read(&tg->load_weight));
 #endif
+
 	print_cfs_group_stats(m, cpu, cfs_rq->tg);
 #endif
 }
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index f4f6a8326dd0..d86544b4151c 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -417,7 +417,6 @@ int sched_proc_update_handler(struct ctl_table *table, int write,
 	WRT_SYSCTL(sched_min_granularity);
 	WRT_SYSCTL(sched_latency);
 	WRT_SYSCTL(sched_wakeup_granularity);
-	WRT_SYSCTL(sched_shares_ratelimit);
 #undef WRT_SYSCTL
 
 	return 0;
@@ -633,7 +632,6 @@ account_entity_enqueue(struct cfs_rq *cfs_rq, struct sched_entity *se)
 		list_add(&se->group_node, &cfs_rq->tasks);
 	}
 	cfs_rq->nr_running++;
-	se->on_rq = 1;
 }
 
 static void
@@ -647,9 +645,89 @@ account_entity_dequeue(struct cfs_rq *cfs_rq, struct sched_entity *se)
 		list_del_init(&se->group_node);
 	}
 	cfs_rq->nr_running--;
-	se->on_rq = 0;
 }
 
+#if defined CONFIG_SMP && defined CONFIG_FAIR_GROUP_SCHED
+static void update_cfs_load(struct cfs_rq *cfs_rq)
+{
+	u64 period = sched_avg_period();
+	u64 now, delta;
+
+	if (!cfs_rq)
+		return;
+
+	now = rq_of(cfs_rq)->clock;
+	delta = now - cfs_rq->load_stamp;
+
+	cfs_rq->load_stamp = now;
+	cfs_rq->load_period += delta;
+	cfs_rq->load_avg += delta * cfs_rq->load.weight;
+
+	while (cfs_rq->load_period > period) {
+		/*
+		 * Inline assembly required to prevent the compiler
+		 * optimising this loop into a divmod call.
+		 * See __iter_div_u64_rem() for another example of this.
+		 */
+		asm("" : "+rm" (cfs_rq->load_period));
+		cfs_rq->load_period /= 2;
+		cfs_rq->load_avg /= 2;
+	}
+}
+
+static void reweight_entity(struct cfs_rq *cfs_rq, struct sched_entity *se,
+			    unsigned long weight)
+{
+	if (se->on_rq)
+		account_entity_dequeue(cfs_rq, se);
+
+	update_load_set(&se->load, weight);
+
+	if (se->on_rq)
+		account_entity_enqueue(cfs_rq, se);
+}
+
+static void update_cfs_shares(struct cfs_rq *cfs_rq)
+{
+	struct task_group *tg;
+	struct sched_entity *se;
+	long load_weight, load, shares;
+
+	if (!cfs_rq)
+		return;
+
+	tg = cfs_rq->tg;
+	se = tg->se[cpu_of(rq_of(cfs_rq))];
+	if (!se)
+		return;
+
+	load = cfs_rq->load.weight;
+
+	load_weight = atomic_read(&tg->load_weight);
+	load_weight -= cfs_rq->load_contribution;
+	load_weight += load;
+
+	shares = (tg->shares * load);
+	if (load_weight)
+		shares /= load_weight;
+
+	if (shares < MIN_SHARES)
+		shares = MIN_SHARES;
+	if (shares > tg->shares)
+		shares = tg->shares;
+
+	reweight_entity(cfs_rq_of(se), se, shares);
+}
+#else /* CONFIG_FAIR_GROUP_SCHED */
+static inline void update_cfs_load(struct cfs_rq *cfs_rq)
+{
+}
+
+static inline void update_cfs_shares(struct cfs_rq *cfs_rq)
+{
+}
+#endif /* CONFIG_FAIR_GROUP_SCHED */
+
 static void enqueue_sleeper(struct cfs_rq *cfs_rq, struct sched_entity *se)
 {
 #ifdef CONFIG_SCHEDSTATS
@@ -771,7 +849,9 @@ enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
 	 * Update run-time statistics of the 'current'.
 	 */
 	update_curr(cfs_rq);
+	update_cfs_load(cfs_rq);
 	account_entity_enqueue(cfs_rq, se);
+	update_cfs_shares(cfs_rq);
 
 	if (flags & ENQUEUE_WAKEUP) {
 		place_entity(cfs_rq, se, 0);
@@ -782,6 +862,7 @@ enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
 	check_spread(cfs_rq, se);
 	if (se != cfs_rq->curr)
 		__enqueue_entity(cfs_rq, se);
+	se->on_rq = 1;
 }
 
 static void __clear_buddies(struct cfs_rq *cfs_rq, struct sched_entity *se)
@@ -825,8 +906,11 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
 
 	if (se != cfs_rq->curr)
 		__dequeue_entity(cfs_rq, se);
+	se->on_rq = 0;
+	update_cfs_load(cfs_rq);
 	account_entity_dequeue(cfs_rq, se);
 	update_min_vruntime(cfs_rq);
+	update_cfs_shares(cfs_rq);
 
 	/*
 	 * Normalize the entity after updating the min_vruntime because the
@@ -1055,6 +1139,13 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)
 		flags = ENQUEUE_WAKEUP;
 	}
 
+	for_each_sched_entity(se) {
+		struct cfs_rq *cfs_rq = cfs_rq_of(se);
+
+		update_cfs_load(cfs_rq);
+		update_cfs_shares(cfs_rq);
+	}
+
 	hrtick_update(rq);
 }
 
@@ -1071,12 +1162,20 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags)
 	for_each_sched_entity(se) {
 		cfs_rq = cfs_rq_of(se);
 		dequeue_entity(cfs_rq, se, flags);
+
 		/* Don't dequeue parent if it has other entities besides us */
 		if (cfs_rq->load.weight)
 			break;
 		flags |= DEQUEUE_SLEEP;
 	}
 
+	for_each_sched_entity(se) {
+		struct cfs_rq *cfs_rq = cfs_rq_of(se);
+
+		update_cfs_load(cfs_rq);
+		update_cfs_shares(cfs_rq);
+	}
+
 	hrtick_update(rq);
 }
 
@@ -1143,51 +1242,20 @@ static void task_waking_fair(struct rq *rq, struct task_struct *p)
  * Adding load to a group doesn't make a group heavier, but can cause movement
  * of group shares between cpus. Assuming the shares were perfectly aligned one
  * can calculate the shift in shares.
- *
- * The problem is that perfectly aligning the shares is rather expensive, hence
- * we try to avoid doing that too often - see update_shares(), which ratelimits
- * this change.
- *
- * We compensate this by not only taking the current delta into account, but
- * also considering the delta between when the shares were last adjusted and
- * now.
- *
- * We still saw a performance dip, some tracing learned us that between
- * cgroup:/ and cgroup:/foo balancing the number of affine wakeups increased
- * significantly. Therefore try to bias the error in direction of failing
- * the affine wakeup.
- *
  */
-static long effective_load(struct task_group *tg, int cpu,
-		long wl, long wg)
+static long effective_load(struct task_group *tg, int cpu, long wl, long wg)
 {
 	struct sched_entity *se = tg->se[cpu];
 
 	if (!tg->parent)
 		return wl;
 
-	/*
-	 * By not taking the decrease of shares on the other cpu into
-	 * account our error leans towards reducing the affine wakeups.
-	 */
-	if (!wl && sched_feat(ASYM_EFF_LOAD))
-		return wl;
-
 	for_each_sched_entity(se) {
 		long S, rw, s, a, b;
-		long more_w;
-
-		/*
-		 * Instead of using this increment, also add the difference
-		 * between when the shares were last updated and now.
-		 */
-		more_w = se->my_q->load.weight - se->my_q->rq_weight;
-		wl += more_w;
-		wg += more_w;
 
 		S = se->my_q->tg->shares;
-		s = se->my_q->shares;
-		rw = se->my_q->rq_weight;
+		s = se->load.weight;
+		rw = se->my_q->load.weight;
 
 		a = S*(rw + wl);
 		b = S*rw + s*wg;
@@ -1508,23 +1576,6 @@ select_task_rq_fair(struct rq *rq, struct task_struct *p, int sd_flag, int wake_
 			sd = tmp;
 	}
 
-#ifdef CONFIG_FAIR_GROUP_SCHED
-	if (sched_feat(LB_SHARES_UPDATE)) {
-		/*
-		 * Pick the largest domain to update shares over
-		 */
-		tmp = sd;
-		if (affine_sd && (!tmp || affine_sd->span_weight > sd->span_weight))
-			tmp = affine_sd;
-
-		if (tmp) {
-			raw_spin_unlock(&rq->lock);
-			update_shares(tmp);
-			raw_spin_lock(&rq->lock);
-		}
-	}
-#endif
-
 	if (affine_sd) {
 		if (cpu == prev_cpu || wake_affine(affine_sd, p, sync))
 			return select_idle_sibling(p, cpu);
@@ -3014,7 +3065,6 @@ static int load_balance(int this_cpu, struct rq *this_rq,
 	schedstat_inc(sd, lb_count[idle]);
 
 redo:
-	update_shares(sd);
 	group = find_busiest_group(sd, this_cpu, &imbalance, idle, &sd_idle,
 				   cpus, balance);
 
@@ -3156,8 +3206,6 @@ out_one_pinned:
 	else
 		ld_moved = 0;
 out:
-	if (ld_moved)
-		update_shares(sd);
 	return ld_moved;
 }
 
@@ -3549,6 +3597,8 @@ static void rebalance_domains(int cpu, enum cpu_idle_type idle)
 	int update_next_balance = 0;
 	int need_serialize;
 
+	update_shares(cpu);
+
 	for_each_domain(cpu, sd) {
 		if (!(sd->flags & SD_LOAD_BALANCE))
 			continue;
diff --git a/kernel/sched_features.h b/kernel/sched_features.h
index 185f920ec1a2..68e69acc29b9 100644
--- a/kernel/sched_features.h
+++ b/kernel/sched_features.h
@@ -52,8 +52,6 @@ SCHED_FEAT(ARCH_POWER, 0)
 SCHED_FEAT(HRTICK, 0)
 SCHED_FEAT(DOUBLE_TICK, 0)
 SCHED_FEAT(LB_BIAS, 1)
-SCHED_FEAT(LB_SHARES_UPDATE, 1)
-SCHED_FEAT(ASYM_EFF_LOAD, 1)
 
 /*
  * Spin-wait on mutex acquisition when the mutex owner is running on
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index b65bf634035e..3132b25193db 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -259,8 +259,6 @@ static int min_wakeup_granularity_ns;			/* 0 usecs */
 static int max_wakeup_granularity_ns = NSEC_PER_SEC;	/* 1 second */
 static int min_sched_tunable_scaling = SCHED_TUNABLESCALING_NONE;
 static int max_sched_tunable_scaling = SCHED_TUNABLESCALING_END-1;
-static int min_sched_shares_ratelimit = 100000; /* 100 usec */
-static int max_sched_shares_ratelimit = NSEC_PER_SEC; /* 1 second */
 #endif
 
 #ifdef CONFIG_COMPACTION
@@ -304,15 +302,6 @@ static struct ctl_table kern_table[] = {
 		.extra1		= &min_wakeup_granularity_ns,
 		.extra2		= &max_wakeup_granularity_ns,
 	},
-	{
-		.procname	= "sched_shares_ratelimit",
-		.data		= &sysctl_sched_shares_ratelimit,
-		.maxlen		= sizeof(unsigned int),
-		.mode		= 0644,
-		.proc_handler	= sched_proc_update_handler,
-		.extra1		= &min_sched_shares_ratelimit,
-		.extra2		= &max_sched_shares_ratelimit,
-	},
 	{
 		.procname	= "sched_tunable_scaling",
 		.data		= &sysctl_sched_tunable_scaling,
@@ -322,14 +311,6 @@ static struct ctl_table kern_table[] = {
 		.extra1		= &min_sched_tunable_scaling,
 		.extra2		= &max_sched_tunable_scaling,
 	},
-	{
-		.procname	= "sched_shares_thresh",
-		.data		= &sysctl_sched_shares_thresh,
-		.maxlen		= sizeof(unsigned int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec_minmax,
-		.extra1		= &zero,
-	},
 	{
 		.procname	= "sched_migration_cost",
 		.data		= &sysctl_sched_migration_cost,
-- 
cgit v1.2.3-71-gd317


From a7a4f8a752ec734b2eab904fc863d5dc873de338 Mon Sep 17 00:00:00 2001
From: Paul Turner <pjt@google.com>
Date: Mon, 15 Nov 2010 15:47:06 -0800
Subject: sched: Add sysctl_sched_shares_window

Introduce a new sysctl for the shares window and disambiguate it from
sched_time_avg.

A 10ms window appears to be a good compromise between accuracy and performance.

Signed-off-by: Paul Turner <pjt@google.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <20101115234938.112173964@google.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/sched.h | 1 +
 kernel/sched_fair.c   | 9 ++++++++-
 kernel/sysctl.c       | 7 +++++++
 3 files changed, 16 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 8abb8aa59664..840f1277492f 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1900,6 +1900,7 @@ extern unsigned int sysctl_sched_migration_cost;
 extern unsigned int sysctl_sched_nr_migrate;
 extern unsigned int sysctl_sched_time_avg;
 extern unsigned int sysctl_timer_migration;
+extern unsigned int sysctl_sched_shares_window;
 
 int sched_proc_update_handler(struct ctl_table *table, int write,
 		void __user *buffer, size_t *length,
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index b320753aa6c9..6c84439ce987 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -89,6 +89,13 @@ unsigned int normalized_sysctl_sched_wakeup_granularity = 1000000UL;
 
 const_debug unsigned int sysctl_sched_migration_cost = 500000UL;
 
+/*
+ * The exponential sliding  window over which load is averaged for shares
+ * distribution.
+ * (default: 10msec)
+ */
+unsigned int __read_mostly sysctl_sched_shares_window = 10000000UL;
+
 static const struct sched_class fair_sched_class;
 
 /**************************************************************
@@ -688,7 +695,7 @@ account_entity_dequeue(struct cfs_rq *cfs_rq, struct sched_entity *se)
 #if defined CONFIG_SMP && defined CONFIG_FAIR_GROUP_SCHED
 static void update_cfs_load(struct cfs_rq *cfs_rq)
 {
-	u64 period = sched_avg_period();
+	u64 period = sysctl_sched_shares_window;
 	u64 now, delta;
 	unsigned long load = cfs_rq->load.weight;
 
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 3132b25193db..9b520d74f052 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -332,6 +332,13 @@ static struct ctl_table kern_table[] = {
 		.mode		= 0644,
 		.proc_handler	= proc_dointvec,
 	},
+	{
+		.procname	= "sched_shares_window",
+		.data		= &sysctl_sched_shares_window,
+		.maxlen		= sizeof(unsigned int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec,
+	},
 	{
 		.procname	= "timer_migration",
 		.data		= &sysctl_timer_migration,
-- 
cgit v1.2.3-71-gd317


From 84e1c6bb38eb318e456558b610396d9f1afaabf0 Mon Sep 17 00:00:00 2001
From: matthieu castet <castet.matthieu@free.fr>
Date: Tue, 16 Nov 2010 22:35:16 +0100
Subject: x86: Add RO/NX protection for loadable kernel modules

This patch is a logical extension of the protection provided by
CONFIG_DEBUG_RODATA to LKMs. The protection is provided by
splitting module_core and module_init into three logical parts
each and setting appropriate page access permissions for each
individual section:

 1. Code: RO+X
 2. RO data: RO+NX
 3. RW data: RW+NX

In order to achieve proper protection, layout_sections() have
been modified to align each of the three parts mentioned above
onto page boundary. Next, the corresponding page access
permissions are set right before successful exit from
load_module(). Further, free_module() and sys_init_module have
been modified to set module_core and module_init as RW+NX right
before calling module_free().

By default, the original section layout and access flags are
preserved. When compiled with CONFIG_DEBUG_SET_MODULE_RONX=y,
the patch will page-align each group of sections to ensure that
each page contains only one type of content and will enforce
RO/NX for each group of pages.

  -v1: Initial proof-of-concept patch.
  -v2: The patch have been re-written to reduce the number of #ifdefs
       and to make it architecture-agnostic. Code formatting has also
       been corrected.
  -v3: Opportunistic RO/NX protection is now unconditional. Section
       page-alignment is enabled when CONFIG_DEBUG_RODATA=y.
  -v4: Removed most macros and improved coding style.
  -v5: Changed page-alignment and RO/NX section size calculation
  -v6: Fixed comments. Restricted RO/NX enforcement to x86 only
  -v7: Introduced CONFIG_DEBUG_SET_MODULE_RONX, added
       calls to set_all_modules_text_rw() and set_all_modules_text_ro()
       in ftrace
  -v8: updated for compatibility with linux 2.6.33-rc5
  -v9: coding style fixes
 -v10: more coding style fixes
 -v11: minor adjustments for -tip
 -v12: minor adjustments for v2.6.35-rc2-tip
 -v13: minor adjustments for v2.6.37-rc1-tip

Signed-off-by: Siarhei Liakh <sliakh.lkml@gmail.com>
Signed-off-by: Xuxian Jiang <jiang@cs.ncsu.edu>
Acked-by: Arjan van de Ven <arjan@linux.intel.com>
Reviewed-by: James Morris <jmorris@namei.org>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
Cc: Andi Kleen <ak@muc.de>
Cc: Rusty Russell <rusty@rustcorp.com.au>
Cc: Stephen Rothwell <sfr@canb.auug.org.au>
Cc: Dave Jones <davej@redhat.com>
Cc: Kees Cook <kees.cook@canonical.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
LKML-Reference: <4CE2F914.9070106@free.fr>
[ minor cleanliness edits, -v14: build failure fix ]
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/Kconfig.debug   |  11 +++
 arch/x86/kernel/ftrace.c |   3 +
 include/linux/module.h   |  11 ++-
 kernel/module.c          | 171 ++++++++++++++++++++++++++++++++++++++++++++++-
 4 files changed, 193 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug
index b59ee765414e..45143bbcfe5e 100644
--- a/arch/x86/Kconfig.debug
+++ b/arch/x86/Kconfig.debug
@@ -117,6 +117,17 @@ config DEBUG_RODATA_TEST
 	  feature as well as for the change_page_attr() infrastructure.
 	  If in doubt, say "N"
 
+config DEBUG_SET_MODULE_RONX
+	bool "Set loadable kernel module data as NX and text as RO"
+	depends on MODULES
+	---help---
+	  This option helps catch unintended modifications to loadable
+	  kernel module's text and read-only data. It also prevents execution
+	  of module data. Such protection may interfere with run-time code
+	  patching and dynamic kernel tracing - and they might also protect
+	  against certain classes of kernel exploits.
+	  If in doubt, say "N".
+
 config DEBUG_NX_TEST
 	tristate "Testcase for the NX non-executable stack feature"
 	depends on DEBUG_KERNEL && m
diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
index 3afb33f14d2d..298448656b60 100644
--- a/arch/x86/kernel/ftrace.c
+++ b/arch/x86/kernel/ftrace.c
@@ -19,6 +19,7 @@
 #include <linux/sched.h>
 #include <linux/init.h>
 #include <linux/list.h>
+#include <linux/module.h>
 
 #include <trace/syscall.h>
 
@@ -49,6 +50,7 @@ static DEFINE_PER_CPU(int, save_modifying_code);
 int ftrace_arch_code_modify_prepare(void)
 {
 	set_kernel_text_rw();
+	set_all_modules_text_rw();
 	modifying_code = 1;
 	return 0;
 }
@@ -56,6 +58,7 @@ int ftrace_arch_code_modify_prepare(void)
 int ftrace_arch_code_modify_post_process(void)
 {
 	modifying_code = 0;
+	set_all_modules_text_ro();
 	set_kernel_text_ro();
 	return 0;
 }
diff --git a/include/linux/module.h b/include/linux/module.h
index b29e7458b966..ddaa689d71bd 100644
--- a/include/linux/module.h
+++ b/include/linux/module.h
@@ -308,6 +308,9 @@ struct module
 	/* The size of the executable code in each section.  */
 	unsigned int init_text_size, core_text_size;
 
+	/* Size of RO sections of the module (text+rodata) */
+	unsigned int init_ro_size, core_ro_size;
+
 	/* Arch-specific module values */
 	struct mod_arch_specific arch;
 
@@ -672,7 +675,6 @@ static inline int module_get_iter_tracepoints(struct tracepoint_iter *iter)
 {
 	return 0;
 }
-
 #endif /* CONFIG_MODULES */
 
 #ifdef CONFIG_SYSFS
@@ -687,6 +689,13 @@ extern int module_sysfs_initialized;
 
 #define __MODULE_STRING(x) __stringify(x)
 
+#ifdef CONFIG_DEBUG_SET_MODULE_RONX
+extern void set_all_modules_text_rw(void);
+extern void set_all_modules_text_ro(void);
+#else
+static inline void set_all_modules_text_rw(void) { }
+static inline void set_all_modules_text_ro(void) { }
+#endif
 
 #ifdef CONFIG_GENERIC_BUG
 void module_bug_finalize(const Elf_Ehdr *, const Elf_Shdr *,
diff --git a/kernel/module.c b/kernel/module.c
index 437a74a7524a..ba421e6b4ada 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -56,6 +56,7 @@
 #include <linux/percpu.h>
 #include <linux/kmemleak.h>
 #include <linux/jump_label.h>
+#include <linux/pfn.h>
 
 #define CREATE_TRACE_POINTS
 #include <trace/events/module.h>
@@ -70,6 +71,26 @@
 #define ARCH_SHF_SMALL 0
 #endif
 
+/*
+ * Modules' sections will be aligned on page boundaries
+ * to ensure complete separation of code and data, but
+ * only when CONFIG_DEBUG_SET_MODULE_RONX=y
+ */
+#ifdef CONFIG_DEBUG_SET_MODULE_RONX
+# define debug_align(X) ALIGN(X, PAGE_SIZE)
+#else
+# define debug_align(X) (X)
+#endif
+
+/*
+ * Given BASE and SIZE this macro calculates the number of pages the
+ * memory regions occupies
+ */
+#define MOD_NUMBER_OF_PAGES(BASE, SIZE) (((SIZE) > 0) ?		\
+		(PFN_DOWN((unsigned long)(BASE) + (SIZE) - 1) -	\
+			 PFN_DOWN((unsigned long)BASE) + 1)	\
+		: (0UL))
+
 /* If this is set, the section belongs in the init part of the module */
 #define INIT_OFFSET_MASK (1UL << (BITS_PER_LONG-1))
 
@@ -1542,6 +1563,115 @@ static int __unlink_module(void *_mod)
 	return 0;
 }
 
+#ifdef CONFIG_DEBUG_SET_MODULE_RONX
+/*
+ * LKM RO/NX protection: protect module's text/ro-data
+ * from modification and any data from execution.
+ */
+void set_page_attributes(void *start, void *end, int (*set)(unsigned long start, int num_pages))
+{
+	unsigned long begin_pfn = PFN_DOWN((unsigned long)start);
+	unsigned long end_pfn = PFN_DOWN((unsigned long)end);
+
+	if (end_pfn > begin_pfn)
+		set(begin_pfn << PAGE_SHIFT, end_pfn - begin_pfn);
+}
+
+static void set_section_ro_nx(void *base,
+			unsigned long text_size,
+			unsigned long ro_size,
+			unsigned long total_size)
+{
+	/* begin and end PFNs of the current subsection */
+	unsigned long begin_pfn;
+	unsigned long end_pfn;
+
+	/*
+	 * Set RO for module text and RO-data:
+	 * - Always protect first page.
+	 * - Do not protect last partial page.
+	 */
+	if (ro_size > 0)
+		set_page_attributes(base, base + ro_size, set_memory_ro);
+
+	/*
+	 * Set NX permissions for module data:
+	 * - Do not protect first partial page.
+	 * - Always protect last page.
+	 */
+	if (total_size > text_size) {
+		begin_pfn = PFN_UP((unsigned long)base + text_size);
+		end_pfn = PFN_UP((unsigned long)base + total_size);
+		if (end_pfn > begin_pfn)
+			set_memory_nx(begin_pfn << PAGE_SHIFT, end_pfn - begin_pfn);
+	}
+}
+
+/* Setting memory back to RW+NX before releasing it */
+void unset_section_ro_nx(struct module *mod, void *module_region)
+{
+	unsigned long total_pages;
+
+	if (mod->module_core == module_region) {
+		/* Set core as NX+RW */
+		total_pages = MOD_NUMBER_OF_PAGES(mod->module_core, mod->core_size);
+		set_memory_nx((unsigned long)mod->module_core, total_pages);
+		set_memory_rw((unsigned long)mod->module_core, total_pages);
+
+	} else if (mod->module_init == module_region) {
+		/* Set init as NX+RW */
+		total_pages = MOD_NUMBER_OF_PAGES(mod->module_init, mod->init_size);
+		set_memory_nx((unsigned long)mod->module_init, total_pages);
+		set_memory_rw((unsigned long)mod->module_init, total_pages);
+	}
+}
+
+/* Iterate through all modules and set each module's text as RW */
+void set_all_modules_text_rw()
+{
+	struct module *mod;
+
+	mutex_lock(&module_mutex);
+	list_for_each_entry_rcu(mod, &modules, list) {
+		if ((mod->module_core) && (mod->core_text_size)) {
+			set_page_attributes(mod->module_core,
+						mod->module_core + mod->core_text_size,
+						set_memory_rw);
+		}
+		if ((mod->module_init) && (mod->init_text_size)) {
+			set_page_attributes(mod->module_init,
+						mod->module_init + mod->init_text_size,
+						set_memory_rw);
+		}
+	}
+	mutex_unlock(&module_mutex);
+}
+
+/* Iterate through all modules and set each module's text as RO */
+void set_all_modules_text_ro()
+{
+	struct module *mod;
+
+	mutex_lock(&module_mutex);
+	list_for_each_entry_rcu(mod, &modules, list) {
+		if ((mod->module_core) && (mod->core_text_size)) {
+			set_page_attributes(mod->module_core,
+						mod->module_core + mod->core_text_size,
+						set_memory_ro);
+		}
+		if ((mod->module_init) && (mod->init_text_size)) {
+			set_page_attributes(mod->module_init,
+						mod->module_init + mod->init_text_size,
+						set_memory_ro);
+		}
+	}
+	mutex_unlock(&module_mutex);
+}
+#else
+static inline void set_section_ro_nx(void *base, unsigned long text_size, unsigned long ro_size, unsigned long total_size) { }
+static inline void unset_section_ro_nx(struct module *mod, void *module_region) { }
+#endif
+
 /* Free a module, remove from lists, etc. */
 static void free_module(struct module *mod)
 {
@@ -1566,6 +1696,7 @@ static void free_module(struct module *mod)
 	destroy_params(mod->kp, mod->num_kp);
 
 	/* This may be NULL, but that's OK */
+	unset_section_ro_nx(mod, mod->module_init);
 	module_free(mod, mod->module_init);
 	kfree(mod->args);
 	percpu_modfree(mod);
@@ -1574,6 +1705,7 @@ static void free_module(struct module *mod)
 	lockdep_free_key_range(mod->module_core, mod->core_size);
 
 	/* Finally, free the core (containing the module structure) */
+	unset_section_ro_nx(mod, mod->module_core);
 	module_free(mod, mod->module_core);
 
 #ifdef CONFIG_MPU
@@ -1777,8 +1909,19 @@ static void layout_sections(struct module *mod, struct load_info *info)
 			s->sh_entsize = get_offset(mod, &mod->core_size, s, i);
 			DEBUGP("\t%s\n", name);
 		}
-		if (m == 0)
+		switch (m) {
+		case 0: /* executable */
+			mod->core_size = debug_align(mod->core_size);
 			mod->core_text_size = mod->core_size;
+			break;
+		case 1: /* RO: text and ro-data */
+			mod->core_size = debug_align(mod->core_size);
+			mod->core_ro_size = mod->core_size;
+			break;
+		case 3: /* whole core */
+			mod->core_size = debug_align(mod->core_size);
+			break;
+		}
 	}
 
 	DEBUGP("Init section allocation order:\n");
@@ -1796,8 +1939,19 @@ static void layout_sections(struct module *mod, struct load_info *info)
 					 | INIT_OFFSET_MASK);
 			DEBUGP("\t%s\n", sname);
 		}
-		if (m == 0)
+		switch (m) {
+		case 0: /* executable */
+			mod->init_size = debug_align(mod->init_size);
 			mod->init_text_size = mod->init_size;
+			break;
+		case 1: /* RO: text and ro-data */
+			mod->init_size = debug_align(mod->init_size);
+			mod->init_ro_size = mod->init_size;
+			break;
+		case 3: /* whole init */
+			mod->init_size = debug_align(mod->init_size);
+			break;
+		}
 	}
 }
 
@@ -2650,6 +2804,18 @@ static struct module *load_module(void __user *umod,
 	kfree(info.strmap);
 	free_copy(&info);
 
+	/* Set RO and NX regions for core */
+	set_section_ro_nx(mod->module_core,
+				mod->core_text_size,
+				mod->core_ro_size,
+				mod->core_size);
+
+	/* Set RO and NX regions for init */
+	set_section_ro_nx(mod->module_init,
+				mod->init_text_size,
+				mod->init_ro_size,
+				mod->init_size);
+
 	/* Done! */
 	trace_module_load(mod);
 	return mod;
@@ -2753,6 +2919,7 @@ SYSCALL_DEFINE3(init_module, void __user *, umod,
 	mod->symtab = mod->core_symtab;
 	mod->strtab = mod->core_strtab;
 #endif
+	unset_section_ro_nx(mod, mod->module_init);
 	module_free(mod, mod->module_init);
 	mod->module_init = NULL;
 	mod->init_size = 0;
-- 
cgit v1.2.3-71-gd317


From 9c0729dc8062bed96189bd14ac6d4920f3958743 Mon Sep 17 00:00:00 2001
From: Soeren Sandmann Pedersen <sandmann@redhat.com>
Date: Fri, 5 Nov 2010 05:59:39 -0400
Subject: x86: Eliminate bp argument from the stack tracing routines

The various stack tracing routines take a 'bp' argument in which the
caller is supposed to provide the base pointer to use, or 0 if doesn't
have one. Since bp is garbage whenever CONFIG_FRAME_POINTER is not
defined, this means all callers in principle should either always pass
0, or be conditional on CONFIG_FRAME_POINTER.

However, there are only really three use cases for stack tracing:

(a) Trace the current task, including IRQ stack if any
(b) Trace the current task, but skip IRQ stack
(c) Trace some other task

In all cases, if CONFIG_FRAME_POINTER is not defined, bp should just
be 0.  If it _is_ defined, then

- in case (a) bp should be gotten directly from the CPU's register, so
  the caller should pass NULL for regs,

- in case (b) the caller should should pass the IRQ registers to
  dump_trace(),

- in case (c) bp should be gotten from the top of the task's stack, so
  the caller should pass NULL for regs.

Hence, the bp argument is not necessary because the combination of
task and regs is sufficient to determine an appropriate value for bp.

This patch introduces a new inline function stack_frame(task, regs)
that computes the desired bp. This function is then called from the
two versions of dump_stack().

Signed-off-by: Soren Sandmann <ssp@redhat.com>
Acked-by: Steven Rostedt <rostedt@goodmis.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Arjan van de Ven <arjan@infradead.org>,
Cc: Frederic Weisbecker <fweisbec@gmail.com>,
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>,
LKML-Reference: <m3oc9rop28.fsf@dhcp-100-3-82.bos.redhat.com>>
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
---
 arch/x86/include/asm/kdebug.h     |  2 +-
 arch/x86/include/asm/stacktrace.h | 33 ++++++++++++++++++++++++++++++---
 arch/x86/kernel/cpu/perf_event.c  |  2 +-
 arch/x86/kernel/dumpstack.c       | 12 ++++++------
 arch/x86/kernel/dumpstack_32.c    | 25 +++++++------------------
 arch/x86/kernel/dumpstack_64.c    | 24 +++++++-----------------
 arch/x86/kernel/process.c         |  3 +--
 arch/x86/kernel/stacktrace.c      |  8 ++++----
 arch/x86/mm/kmemcheck/error.c     |  2 +-
 arch/x86/oprofile/backtrace.c     |  2 +-
 include/linux/stacktrace.h        |  4 +++-
 11 files changed, 62 insertions(+), 55 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/include/asm/kdebug.h b/arch/x86/include/asm/kdebug.h
index 5bdfca86581b..f23eb2528464 100644
--- a/arch/x86/include/asm/kdebug.h
+++ b/arch/x86/include/asm/kdebug.h
@@ -28,7 +28,7 @@ extern void die(const char *, struct pt_regs *,long);
 extern int __must_check __die(const char *, struct pt_regs *, long);
 extern void show_registers(struct pt_regs *regs);
 extern void show_trace(struct task_struct *t, struct pt_regs *regs,
-		       unsigned long *sp, unsigned long bp);
+		       unsigned long *sp);
 extern void __show_regs(struct pt_regs *regs, int all);
 extern void show_regs(struct pt_regs *regs);
 extern unsigned long oops_begin(void);
diff --git a/arch/x86/include/asm/stacktrace.h b/arch/x86/include/asm/stacktrace.h
index 2b16a2ad23dc..52b5c7ed3608 100644
--- a/arch/x86/include/asm/stacktrace.h
+++ b/arch/x86/include/asm/stacktrace.h
@@ -7,6 +7,7 @@
 #define _ASM_X86_STACKTRACE_H
 
 #include <linux/uaccess.h>
+#include <linux/ptrace.h>
 
 extern int kstack_depth_to_print;
 
@@ -46,7 +47,7 @@ struct stacktrace_ops {
 };
 
 void dump_trace(struct task_struct *tsk, struct pt_regs *regs,
-		unsigned long *stack, unsigned long bp,
+		unsigned long *stack,
 		const struct stacktrace_ops *ops, void *data);
 
 #ifdef CONFIG_X86_32
@@ -57,13 +58,39 @@ void dump_trace(struct task_struct *tsk, struct pt_regs *regs,
 #define get_bp(bp) asm("movq %%rbp, %0" : "=r" (bp) :)
 #endif
 
+#ifdef CONFIG_FRAME_POINTER
+static inline unsigned long
+stack_frame(struct task_struct *task, struct pt_regs *regs)
+{
+	unsigned long bp;
+
+	if (regs)
+		return regs->bp;
+
+	if (task == current) {
+		/* Grab bp right from our regs */
+		get_bp(bp);
+		return bp;
+	}
+
+	/* bp is the last reg pushed by switch_to */
+	return *(unsigned long *)task->thread.sp;
+}
+#else
+static inline unsigned long
+stack_frame(struct task_struct *task, struct pt_regs *regs)
+{
+	return 0;
+}
+#endif
+
 extern void
 show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
-		unsigned long *stack, unsigned long bp, char *log_lvl);
+		   unsigned long *stack, char *log_lvl);
 
 extern void
 show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
-		unsigned long *sp, unsigned long bp, char *log_lvl);
+		   unsigned long *sp, char *log_lvl);
 
 extern unsigned int code_bytes;
 
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index ed6310183efb..461a85dcaba4 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -1666,7 +1666,7 @@ perf_callchain_kernel(struct perf_callchain_entry *entry, struct pt_regs *regs)
 
 	perf_callchain_store(entry, regs->ip);
 
-	dump_trace(NULL, regs, NULL, regs->bp, &backtrace_ops, entry);
+	dump_trace(NULL, regs, NULL, &backtrace_ops, entry);
 }
 
 #ifdef CONFIG_COMPAT
diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c
index 6e8752c1bd52..8474c998cbd4 100644
--- a/arch/x86/kernel/dumpstack.c
+++ b/arch/x86/kernel/dumpstack.c
@@ -175,21 +175,21 @@ static const struct stacktrace_ops print_trace_ops = {
 
 void
 show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
-		unsigned long *stack, unsigned long bp, char *log_lvl)
+		unsigned long *stack, char *log_lvl)
 {
 	printk("%sCall Trace:\n", log_lvl);
-	dump_trace(task, regs, stack, bp, &print_trace_ops, log_lvl);
+	dump_trace(task, regs, stack, &print_trace_ops, log_lvl);
 }
 
 void show_trace(struct task_struct *task, struct pt_regs *regs,
-		unsigned long *stack, unsigned long bp)
+		unsigned long *stack)
 {
-	show_trace_log_lvl(task, regs, stack, bp, "");
+	show_trace_log_lvl(task, regs, stack, "");
 }
 
 void show_stack(struct task_struct *task, unsigned long *sp)
 {
-	show_stack_log_lvl(task, NULL, sp, 0, "");
+	show_stack_log_lvl(task, NULL, sp, "");
 }
 
 /*
@@ -210,7 +210,7 @@ void dump_stack(void)
 		init_utsname()->release,
 		(int)strcspn(init_utsname()->version, " "),
 		init_utsname()->version);
-	show_trace(NULL, NULL, &stack, bp);
+	show_trace(NULL, NULL, &stack);
 }
 EXPORT_SYMBOL(dump_stack);
 
diff --git a/arch/x86/kernel/dumpstack_32.c b/arch/x86/kernel/dumpstack_32.c
index 1bc7f75a5bda..74cc1eda384b 100644
--- a/arch/x86/kernel/dumpstack_32.c
+++ b/arch/x86/kernel/dumpstack_32.c
@@ -17,11 +17,12 @@
 #include <asm/stacktrace.h>
 
 
-void dump_trace(struct task_struct *task, struct pt_regs *regs,
-		unsigned long *stack, unsigned long bp,
+void dump_trace(struct task_struct *task,
+		struct pt_regs *regs, unsigned long *stack,
 		const struct stacktrace_ops *ops, void *data)
 {
 	int graph = 0;
+	unsigned long bp;
 
 	if (!task)
 		task = current;
@@ -34,18 +35,7 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs,
 			stack = (unsigned long *)task->thread.sp;
 	}
 
-#ifdef CONFIG_FRAME_POINTER
-	if (!bp) {
-		if (task == current) {
-			/* Grab bp right from our regs */
-			get_bp(bp);
-		} else {
-			/* bp is the last reg pushed by switch_to */
-			bp = *(unsigned long *) task->thread.sp;
-		}
-	}
-#endif
-
+	bp = stack_frame(task, regs);
 	for (;;) {
 		struct thread_info *context;
 
@@ -65,7 +55,7 @@ EXPORT_SYMBOL(dump_trace);
 
 void
 show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
-		   unsigned long *sp, unsigned long bp, char *log_lvl)
+		   unsigned long *sp, char *log_lvl)
 {
 	unsigned long *stack;
 	int i;
@@ -87,7 +77,7 @@ show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
 		touch_nmi_watchdog();
 	}
 	printk(KERN_CONT "\n");
-	show_trace_log_lvl(task, regs, sp, bp, log_lvl);
+	show_trace_log_lvl(task, regs, sp, log_lvl);
 }
 
 
@@ -112,8 +102,7 @@ void show_registers(struct pt_regs *regs)
 		u8 *ip;
 
 		printk(KERN_EMERG "Stack:\n");
-		show_stack_log_lvl(NULL, regs, &regs->sp,
-				0, KERN_EMERG);
+		show_stack_log_lvl(NULL, regs, &regs->sp, KERN_EMERG);
 
 		printk(KERN_EMERG "Code: ");
 
diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c
index 6a340485249a..64101335de19 100644
--- a/arch/x86/kernel/dumpstack_64.c
+++ b/arch/x86/kernel/dumpstack_64.c
@@ -139,8 +139,8 @@ fixup_bp_irq_link(unsigned long bp, unsigned long *stack,
  * severe exception (double fault, nmi, stack fault, debug, mce) hardware stack
  */
 
-void dump_trace(struct task_struct *task, struct pt_regs *regs,
-		unsigned long *stack, unsigned long bp,
+void dump_trace(struct task_struct *task,
+		struct pt_regs *regs, unsigned long *stack,
 		const struct stacktrace_ops *ops, void *data)
 {
 	const unsigned cpu = get_cpu();
@@ -149,6 +149,7 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs,
 	unsigned used = 0;
 	struct thread_info *tinfo;
 	int graph = 0;
+	unsigned long bp;
 
 	if (!task)
 		task = current;
@@ -160,18 +161,7 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs,
 			stack = (unsigned long *)task->thread.sp;
 	}
 
-#ifdef CONFIG_FRAME_POINTER
-	if (!bp) {
-		if (task == current) {
-			/* Grab bp right from our regs */
-			get_bp(bp);
-		} else {
-			/* bp is the last reg pushed by switch_to */
-			bp = *(unsigned long *) task->thread.sp;
-		}
-	}
-#endif
-
+	bp = stack_frame(task, regs);
 	/*
 	 * Print function call entries in all stacks, starting at the
 	 * current stack address. If the stacks consist of nested
@@ -235,7 +225,7 @@ EXPORT_SYMBOL(dump_trace);
 
 void
 show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
-		   unsigned long *sp, unsigned long bp, char *log_lvl)
+		   unsigned long *sp, char *log_lvl)
 {
 	unsigned long *irq_stack_end;
 	unsigned long *irq_stack;
@@ -279,7 +269,7 @@ show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
 	preempt_enable();
 
 	printk(KERN_CONT "\n");
-	show_trace_log_lvl(task, regs, sp, bp, log_lvl);
+	show_trace_log_lvl(task, regs, sp, log_lvl);
 }
 
 void show_registers(struct pt_regs *regs)
@@ -308,7 +298,7 @@ void show_registers(struct pt_regs *regs)
 
 		printk(KERN_EMERG "Stack:\n");
 		show_stack_log_lvl(NULL, regs, (unsigned long *)sp,
-				regs->bp, KERN_EMERG);
+				   KERN_EMERG);
 
 		printk(KERN_EMERG "Code: ");
 
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 57d1868a86aa..96ed1aac543a 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -91,8 +91,7 @@ void exit_thread(void)
 void show_regs(struct pt_regs *regs)
 {
 	show_registers(regs);
-	show_trace(NULL, regs, (unsigned long *)kernel_stack_pointer(regs),
-		   regs->bp);
+	show_trace(NULL, regs, (unsigned long *)kernel_stack_pointer(regs));
 }
 
 void show_regs_common(void)
diff --git a/arch/x86/kernel/stacktrace.c b/arch/x86/kernel/stacktrace.c
index b53c525368a7..938c8e10a19a 100644
--- a/arch/x86/kernel/stacktrace.c
+++ b/arch/x86/kernel/stacktrace.c
@@ -73,22 +73,22 @@ static const struct stacktrace_ops save_stack_ops_nosched = {
  */
 void save_stack_trace(struct stack_trace *trace)
 {
-	dump_trace(current, NULL, NULL, 0, &save_stack_ops, trace);
+	dump_trace(current, NULL, NULL, &save_stack_ops, trace);
 	if (trace->nr_entries < trace->max_entries)
 		trace->entries[trace->nr_entries++] = ULONG_MAX;
 }
 EXPORT_SYMBOL_GPL(save_stack_trace);
 
-void save_stack_trace_bp(struct stack_trace *trace, unsigned long bp)
+void save_stack_trace_regs(struct stack_trace *trace, struct pt_regs *regs)
 {
-	dump_trace(current, NULL, NULL, bp, &save_stack_ops, trace);
+	dump_trace(current, regs, NULL, &save_stack_ops, trace);
 	if (trace->nr_entries < trace->max_entries)
 		trace->entries[trace->nr_entries++] = ULONG_MAX;
 }
 
 void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace)
 {
-	dump_trace(tsk, NULL, NULL, 0, &save_stack_ops_nosched, trace);
+	dump_trace(tsk, NULL, NULL, &save_stack_ops_nosched, trace);
 	if (trace->nr_entries < trace->max_entries)
 		trace->entries[trace->nr_entries++] = ULONG_MAX;
 }
diff --git a/arch/x86/mm/kmemcheck/error.c b/arch/x86/mm/kmemcheck/error.c
index af3b6c8a436f..704a37cedddb 100644
--- a/arch/x86/mm/kmemcheck/error.c
+++ b/arch/x86/mm/kmemcheck/error.c
@@ -185,7 +185,7 @@ void kmemcheck_error_save(enum kmemcheck_shadow state,
 	e->trace.entries = e->trace_entries;
 	e->trace.max_entries = ARRAY_SIZE(e->trace_entries);
 	e->trace.skip = 0;
-	save_stack_trace_bp(&e->trace, regs->bp);
+	save_stack_trace_regs(&e->trace, regs);
 
 	/* Round address down to nearest 16 bytes */
 	shadow_copy = kmemcheck_shadow_lookup(address
diff --git a/arch/x86/oprofile/backtrace.c b/arch/x86/oprofile/backtrace.c
index 2d49d4e19a36..72cbec14d783 100644
--- a/arch/x86/oprofile/backtrace.c
+++ b/arch/x86/oprofile/backtrace.c
@@ -126,7 +126,7 @@ x86_backtrace(struct pt_regs * const regs, unsigned int depth)
 	if (!user_mode_vm(regs)) {
 		unsigned long stack = kernel_stack_pointer(regs);
 		if (depth)
-			dump_trace(NULL, regs, (unsigned long *)stack, 0,
+			dump_trace(NULL, regs, (unsigned long *)stack,
 				   &backtrace_ops, &depth);
 		return;
 	}
diff --git a/include/linux/stacktrace.h b/include/linux/stacktrace.h
index 51efbef38fb0..25310f1d7f37 100644
--- a/include/linux/stacktrace.h
+++ b/include/linux/stacktrace.h
@@ -2,6 +2,7 @@
 #define __LINUX_STACKTRACE_H
 
 struct task_struct;
+struct pt_regs;
 
 #ifdef CONFIG_STACKTRACE
 struct task_struct;
@@ -13,7 +14,8 @@ struct stack_trace {
 };
 
 extern void save_stack_trace(struct stack_trace *trace);
-extern void save_stack_trace_bp(struct stack_trace *trace, unsigned long bp);
+extern void save_stack_trace_regs(struct stack_trace *trace,
+				  struct pt_regs *regs);
 extern void save_stack_trace_tsk(struct task_struct *tsk,
 				struct stack_trace *trace);
 
-- 
cgit v1.2.3-71-gd317


From 61c32659b12c44e62de32fbf99f7e4ca783dc38b Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Thu, 18 Nov 2010 01:39:17 +0100
Subject: tracing: New flag to allow non privileged users to use a trace event

This adds a new trace event internal flag that allows them to be
used in perf by non privileged users in case of task bound tracing.

This is desired for syscalls tracepoint because they don't leak
global system informations, like some other tracepoints.

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Li Zefan <lizf@cn.fujitsu.com>
Cc: Jason Baron <jbaron@redhat.com>
---
 include/linux/ftrace_event.h    |  2 ++
 kernel/perf_event.c             |  9 ---------
 kernel/trace/trace_event_perf.c | 31 ++++++++++++++++++++++++++++++-
 3 files changed, 32 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h
index 8beabb958f61..312dce7e0d52 100644
--- a/include/linux/ftrace_event.h
+++ b/include/linux/ftrace_event.h
@@ -154,12 +154,14 @@ enum {
 	TRACE_EVENT_FL_ENABLED_BIT,
 	TRACE_EVENT_FL_FILTERED_BIT,
 	TRACE_EVENT_FL_RECORDED_CMD_BIT,
+	TRACE_EVENT_FL_CAP_ANY_BIT,
 };
 
 enum {
 	TRACE_EVENT_FL_ENABLED		= (1 << TRACE_EVENT_FL_ENABLED_BIT),
 	TRACE_EVENT_FL_FILTERED		= (1 << TRACE_EVENT_FL_FILTERED_BIT),
 	TRACE_EVENT_FL_RECORDED_CMD	= (1 << TRACE_EVENT_FL_RECORDED_CMD_BIT),
+	TRACE_EVENT_FL_CAP_ANY		= (1 << TRACE_EVENT_FL_CAP_ANY_BIT),
 };
 
 struct ftrace_event_call {
diff --git a/kernel/perf_event.c b/kernel/perf_event.c
index 517d827f4982..ee1e903f983c 100644
--- a/kernel/perf_event.c
+++ b/kernel/perf_event.c
@@ -4747,15 +4747,6 @@ static int perf_tp_event_init(struct perf_event *event)
 	if (event->attr.type != PERF_TYPE_TRACEPOINT)
 		return -ENOENT;
 
-	/*
-	 * Raw tracepoint data is a severe data leak, only allow root to
-	 * have these.
-	 */
-	if ((event->attr.sample_type & PERF_SAMPLE_RAW) &&
-			perf_paranoid_tracepoint_raw() &&
-			!capable(CAP_SYS_ADMIN))
-		return -EPERM;
-
 	err = perf_trace_init(event);
 	if (err)
 		return err;
diff --git a/kernel/trace/trace_event_perf.c b/kernel/trace/trace_event_perf.c
index 39c059ca670e..19a359d5e6d5 100644
--- a/kernel/trace/trace_event_perf.c
+++ b/kernel/trace/trace_event_perf.c
@@ -21,17 +21,46 @@ typedef typeof(unsigned long [PERF_MAX_TRACE_SIZE / sizeof(unsigned long)])
 /* Count the events in use (per event id, not per instance) */
 static int	total_ref_count;
 
+static int perf_trace_event_perm(struct ftrace_event_call *tp_event,
+				 struct perf_event *p_event)
+{
+	/* No tracing, just counting, so no obvious leak */
+	if (!(p_event->attr.sample_type & PERF_SAMPLE_RAW))
+		return 0;
+
+	/* Some events are ok to be traced by non-root users... */
+	if (p_event->attach_state == PERF_ATTACH_TASK) {
+		if (tp_event->flags & TRACE_EVENT_FL_CAP_ANY)
+			return 0;
+	}
+
+	/*
+	 * ...otherwise raw tracepoint data can be a severe data leak,
+	 * only allow root to have these.
+	 */
+	if (perf_paranoid_tracepoint_raw() && !capable(CAP_SYS_ADMIN))
+		return -EPERM;
+
+	return 0;
+}
+
 static int perf_trace_event_init(struct ftrace_event_call *tp_event,
 				 struct perf_event *p_event)
 {
 	struct hlist_head __percpu *list;
-	int ret = -ENOMEM;
+	int ret;
 	int cpu;
 
+	ret = perf_trace_event_perm(tp_event, p_event);
+	if (ret)
+		return ret;
+
 	p_event->tp_event = tp_event;
 	if (tp_event->perf_refcount++ > 0)
 		return 0;
 
+	ret = -ENOMEM;
+
 	list = alloc_percpu(struct hlist_head);
 	if (!list)
 		goto fail;
-- 
cgit v1.2.3-71-gd317


From 1ed0c5971159974185653170543a764cc061c857 Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Thu, 18 Nov 2010 01:46:57 +0100
Subject: tracing: New macro to set up initial event flags value

This introduces the new TRACE_EVENT_FLAGS() macro in order
to set up initial event flags value.

This macro must simply follow the definition of a trace event
and take the event name and the flag value as parameters:

TRACE_EVENT(my_event, .....
....
);

TRACE_EVENT_FLAGS(my_event, 1)

This will set up 1 as the initial my_event->flags value.

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Li Zefan <lizf@cn.fujitsu.com>
Cc: Jason Baron <jbaron@redhat.com>
---
 include/linux/tracepoint.h |  4 ++++
 include/trace/ftrace.h     | 12 ++++++++++++
 2 files changed, 16 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h
index a4a90b6726ce..5a6074fcd81d 100644
--- a/include/linux/tracepoint.h
+++ b/include/linux/tracepoint.h
@@ -234,6 +234,8 @@ do_trace:								\
 				PARAMS(void *__data, proto),		\
 				PARAMS(__data, args))
 
+#define TRACE_EVENT_FLAGS(event, flag)
+
 #endif /* DECLARE_TRACE */
 
 #ifndef TRACE_EVENT
@@ -354,4 +356,6 @@ do_trace:								\
 		assign, print, reg, unreg)			\
 	DECLARE_TRACE(name, PARAMS(proto), PARAMS(args))
 
+#define TRACE_EVENT_FLAGS(event, flag)
+
 #endif /* ifdef TRACE_EVENT (see note above) */
diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h
index a9377c0083ad..6f540123d43e 100644
--- a/include/trace/ftrace.h
+++ b/include/trace/ftrace.h
@@ -82,6 +82,15 @@
 	TRACE_EVENT(name, PARAMS(proto), PARAMS(args),			\
 		PARAMS(tstruct), PARAMS(assign), PARAMS(print))		\
 
+#undef TRACE_EVENT_FLAGS
+#define TRACE_EVENT_FLAGS(name, value)					\
+	static int __init trace_init_flags_##name(void)			\
+	{								\
+		event_##name.flags = value;				\
+		return 0;						\
+	}								\
+	early_initcall(trace_init_flags_##name);
+
 #include TRACE_INCLUDE(TRACE_INCLUDE_FILE)
 
 
@@ -129,6 +138,9 @@
 #define DEFINE_EVENT_PRINT(template, name, proto, args, print)	\
 	DEFINE_EVENT(template, name, PARAMS(proto), PARAMS(args))
 
+#undef TRACE_EVENT_FLAGS
+#define TRACE_EVENT_FLAGS(event, flag)
+
 #include TRACE_INCLUDE(TRACE_INCLUDE_FILE)
 
 /*
-- 
cgit v1.2.3-71-gd317


From 53cf810b1934f08a68e131aeeb16267a778f43df Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Thu, 18 Nov 2010 02:11:42 +0100
Subject: tracing: Allow syscall trace events for non privileged users

As for the raw syscalls events, individual syscall events won't
leak system wide information on task bound tracing. Allow non
privileged users to use them in such workflow.

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Li Zefan <lizf@cn.fujitsu.com>
Cc: Jason Baron <jbaron@redhat.com>
---
 include/linux/ftrace_event.h | 8 ++++++++
 include/linux/syscalls.h     | 6 ++++--
 include/trace/ftrace.h       | 7 +------
 3 files changed, 13 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h
index 312dce7e0d52..725bf6bd39f7 100644
--- a/include/linux/ftrace_event.h
+++ b/include/linux/ftrace_event.h
@@ -198,6 +198,14 @@ struct ftrace_event_call {
 #endif
 };
 
+#define __TRACE_EVENT_FLAGS(name, value)				\
+	static int __init trace_init_flags_##name(void)			\
+	{								\
+		event_##name.flags = value;				\
+		return 0;						\
+	}								\
+	early_initcall(trace_init_flags_##name);
+
 #define PERF_MAX_TRACE_SIZE	2048
 
 #define MAX_FILTER_PRED		32
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index cacc27a0e285..13b9731d30cf 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -137,7 +137,8 @@ extern struct trace_event_functions exit_syscall_print_funcs;
 		.class			= &event_class_syscall_enter,	\
 		.event.funcs            = &enter_syscall_print_funcs,	\
 		.data			= (void *)&__syscall_meta_##sname,\
-	}
+	};								\
+	__TRACE_EVENT_FLAGS(enter_##sname, TRACE_EVENT_FL_CAP_ANY)
 
 #define SYSCALL_TRACE_EXIT_EVENT(sname)					\
 	static struct syscall_metadata					\
@@ -152,7 +153,8 @@ extern struct trace_event_functions exit_syscall_print_funcs;
 		.class			= &event_class_syscall_exit,	\
 		.event.funcs		= &exit_syscall_print_funcs,	\
 		.data			= (void *)&__syscall_meta_##sname,\
-	}
+	};								\
+	__TRACE_EVENT_FLAGS(exit_##sname, TRACE_EVENT_FL_CAP_ANY)
 
 #define SYSCALL_METADATA(sname, nb)				\
 	SYSCALL_TRACE_ENTER_EVENT(sname);			\
diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h
index 6f540123d43e..e718a917d897 100644
--- a/include/trace/ftrace.h
+++ b/include/trace/ftrace.h
@@ -84,12 +84,7 @@
 
 #undef TRACE_EVENT_FLAGS
 #define TRACE_EVENT_FLAGS(name, value)					\
-	static int __init trace_init_flags_##name(void)			\
-	{								\
-		event_##name.flags = value;				\
-		return 0;						\
-	}								\
-	early_initcall(trace_init_flags_##name);
+	__TRACE_EVENT_FLAGS(name, value)
 
 #include TRACE_INCLUDE(TRACE_INCLUDE_FILE)
 
-- 
cgit v1.2.3-71-gd317


From 423478cde453eebdfcfebf4b8d378d8f5d49b853 Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Thu, 18 Nov 2010 02:21:26 +0100
Subject: tracing: Remove useless syscall ftrace_event_call declaration

It is defined right after, which makes the declaration completely
useless.

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Li Zefan <lizf@cn.fujitsu.com>
Cc: Jason Baron <jbaron@redhat.com>
---
 include/linux/syscalls.h | 4 ----
 1 file changed, 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 13b9731d30cf..18cd0684fc4e 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -127,8 +127,6 @@ extern struct trace_event_functions exit_syscall_print_funcs;
 #define SYSCALL_TRACE_ENTER_EVENT(sname)				\
 	static struct syscall_metadata					\
 	__attribute__((__aligned__(4))) __syscall_meta_##sname;		\
-	static struct ftrace_event_call					\
-	__attribute__((__aligned__(4))) event_enter_##sname;		\
 	static struct ftrace_event_call __used				\
 	  __attribute__((__aligned__(4)))				\
 	  __attribute__((section("_ftrace_events")))			\
@@ -143,8 +141,6 @@ extern struct trace_event_functions exit_syscall_print_funcs;
 #define SYSCALL_TRACE_EXIT_EVENT(sname)					\
 	static struct syscall_metadata					\
 	__attribute__((__aligned__(4))) __syscall_meta_##sname;		\
-	static struct ftrace_event_call					\
-	__attribute__((__aligned__(4))) event_exit_##sname;		\
 	static struct ftrace_event_call __used				\
 	  __attribute__((__aligned__(4)))				\
 	  __attribute__((section("_ftrace_events")))			\
-- 
cgit v1.2.3-71-gd317


From 866f3b25a2eb60d7529c227a0ecd80c3aba443fd Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Thu, 18 Nov 2010 09:33:19 -0800
Subject: bonding: IGMP handling cleanup

Instead of iterating in_dev->mc_list from bonding driver, its better
to call a helper function provided by igmp.c
Details of implementation (locking) are private to igmp code.

ip_mc_rejoin_group(struct ip_mc_list *im) becomes
ip_mc_rejoin_groups(struct in_device *in_dev);

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/bonding/bond_main.c |  8 ++------
 include/linux/igmp.h            |  2 +-
 net/ipv4/igmp.c                 | 32 +++++++++++++++++++-------------
 3 files changed, 22 insertions(+), 20 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index 518844852f06..e588b2e1c3b3 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -873,15 +873,11 @@ static void bond_mc_del(struct bonding *bond, void *addr)
 static void __bond_resend_igmp_join_requests(struct net_device *dev)
 {
 	struct in_device *in_dev;
-	struct ip_mc_list *im;
 
 	rcu_read_lock();
 	in_dev = __in_dev_get_rcu(dev);
-	if (in_dev) {
-		for (im = in_dev->mc_list; im; im = im->next)
-			ip_mc_rejoin_group(im);
-	}
-
+	if (in_dev)
+			ip_mc_rejoin_groups(in_dev);
 	rcu_read_unlock();
 }
 
diff --git a/include/linux/igmp.h b/include/linux/igmp.h
index 7d164670f264..c4987f265109 100644
--- a/include/linux/igmp.h
+++ b/include/linux/igmp.h
@@ -238,7 +238,7 @@ extern void ip_mc_unmap(struct in_device *);
 extern void ip_mc_remap(struct in_device *);
 extern void ip_mc_dec_group(struct in_device *in_dev, __be32 addr);
 extern void ip_mc_inc_group(struct in_device *in_dev, __be32 addr);
-extern void ip_mc_rejoin_group(struct ip_mc_list *im);
+extern void ip_mc_rejoin_groups(struct in_device *in_dev);
 
 #endif
 #endif
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index afb1e82a59f9..50f6bc1a002a 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -1267,26 +1267,32 @@ EXPORT_SYMBOL(ip_mc_inc_group);
 
 /*
  *	Resend IGMP JOIN report; used for bonding.
+ *	Called with rcu_read_lock()
  */
-void ip_mc_rejoin_group(struct ip_mc_list *im)
+void ip_mc_rejoin_groups(struct in_device *in_dev)
 {
 #ifdef CONFIG_IP_MULTICAST
-	struct in_device *in_dev = im->interface;
+	struct ip_mc_list *im;
+	int type;
 
-	if (im->multiaddr == IGMP_ALL_HOSTS)
-		return;
+	for_each_pmc_rcu(in_dev, im) {
+		if (im->multiaddr == IGMP_ALL_HOSTS)
+			continue;
 
-	/* a failover is happening and switches
-	 * must be notified immediately */
-	if (IGMP_V1_SEEN(in_dev))
-		igmp_send_report(in_dev, im, IGMP_HOST_MEMBERSHIP_REPORT);
-	else if (IGMP_V2_SEEN(in_dev))
-		igmp_send_report(in_dev, im, IGMPV2_HOST_MEMBERSHIP_REPORT);
-	else
-		igmp_send_report(in_dev, im, IGMPV3_HOST_MEMBERSHIP_REPORT);
+		/* a failover is happening and switches
+		 * must be notified immediately
+		 */
+		if (IGMP_V1_SEEN(in_dev))
+			type = IGMP_HOST_MEMBERSHIP_REPORT;
+		else if (IGMP_V2_SEEN(in_dev))
+			type = IGMPV2_HOST_MEMBERSHIP_REPORT;
+		else
+			type = IGMPV3_HOST_MEMBERSHIP_REPORT;
+		igmp_send_report(in_dev, im, type);
+	}
 #endif
 }
-EXPORT_SYMBOL(ip_mc_rejoin_group);
+EXPORT_SYMBOL(ip_mc_rejoin_groups);
 
 /*
  *	A socket has left a multicast group on device dev
-- 
cgit v1.2.3-71-gd317


From 4c3710afbc333c33100739dec10662b4ee64e219 Mon Sep 17 00:00:00 2001
From: Changli Gao <xiaosuo@gmail.com>
Date: Tue, 16 Nov 2010 20:28:24 +0000
Subject: net: move definitions of BPF_S_* to net/core/filter.c

BPF_S_* are used internally, should not be exposed to the others.

Signed-off-by: Changli Gao <xiaosuo@gmail.com>
Acked-by: Eric Dumazet <eric.dumazet@gmail.com>
Acked-by: Hagen Paul Pfeifer <hagen@jauu.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/filter.h | 48 ------------------------------------------------
 net/core/filter.c      | 48 ++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 48 insertions(+), 48 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/filter.h b/include/linux/filter.h
index 69b43dbea6c6..151f5d703b7e 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -91,54 +91,6 @@ struct sock_fprog {	/* Required for SO_ATTACH_FILTER. */
 #define         BPF_TAX         0x00
 #define         BPF_TXA         0x80
 
-enum {
-	BPF_S_RET_K = 0,
-	BPF_S_RET_A,
-	BPF_S_ALU_ADD_K,
-	BPF_S_ALU_ADD_X,
-	BPF_S_ALU_SUB_K,
-	BPF_S_ALU_SUB_X,
-	BPF_S_ALU_MUL_K,
-	BPF_S_ALU_MUL_X,
-	BPF_S_ALU_DIV_X,
-	BPF_S_ALU_AND_K,
-	BPF_S_ALU_AND_X,
-	BPF_S_ALU_OR_K,
-	BPF_S_ALU_OR_X,
-	BPF_S_ALU_LSH_K,
-	BPF_S_ALU_LSH_X,
-	BPF_S_ALU_RSH_K,
-	BPF_S_ALU_RSH_X,
-	BPF_S_ALU_NEG,
-	BPF_S_LD_W_ABS,
-	BPF_S_LD_H_ABS,
-	BPF_S_LD_B_ABS,
-	BPF_S_LD_W_LEN,
-	BPF_S_LD_W_IND,
-	BPF_S_LD_H_IND,
-	BPF_S_LD_B_IND,
-	BPF_S_LD_IMM,
-	BPF_S_LDX_W_LEN,
-	BPF_S_LDX_B_MSH,
-	BPF_S_LDX_IMM,
-	BPF_S_MISC_TAX,
-	BPF_S_MISC_TXA,
-	BPF_S_ALU_DIV_K,
-	BPF_S_LD_MEM,
-	BPF_S_LDX_MEM,
-	BPF_S_ST,
-	BPF_S_STX,
-	BPF_S_JMP_JA,
-	BPF_S_JMP_JEQ_K,
-	BPF_S_JMP_JEQ_X,
-	BPF_S_JMP_JGE_K,
-	BPF_S_JMP_JGE_X,
-	BPF_S_JMP_JGT_K,
-	BPF_S_JMP_JGT_X,
-	BPF_S_JMP_JSET_K,
-	BPF_S_JMP_JSET_X,
-};
-
 #ifndef BPF_MAXINSNS
 #define BPF_MAXINSNS 4096
 #endif
diff --git a/net/core/filter.c b/net/core/filter.c
index 03dc0710194f..15a545d39cd3 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -38,6 +38,54 @@
 #include <asm/unaligned.h>
 #include <linux/filter.h>
 
+enum {
+	BPF_S_RET_K = 0,
+	BPF_S_RET_A,
+	BPF_S_ALU_ADD_K,
+	BPF_S_ALU_ADD_X,
+	BPF_S_ALU_SUB_K,
+	BPF_S_ALU_SUB_X,
+	BPF_S_ALU_MUL_K,
+	BPF_S_ALU_MUL_X,
+	BPF_S_ALU_DIV_X,
+	BPF_S_ALU_AND_K,
+	BPF_S_ALU_AND_X,
+	BPF_S_ALU_OR_K,
+	BPF_S_ALU_OR_X,
+	BPF_S_ALU_LSH_K,
+	BPF_S_ALU_LSH_X,
+	BPF_S_ALU_RSH_K,
+	BPF_S_ALU_RSH_X,
+	BPF_S_ALU_NEG,
+	BPF_S_LD_W_ABS,
+	BPF_S_LD_H_ABS,
+	BPF_S_LD_B_ABS,
+	BPF_S_LD_W_LEN,
+	BPF_S_LD_W_IND,
+	BPF_S_LD_H_IND,
+	BPF_S_LD_B_IND,
+	BPF_S_LD_IMM,
+	BPF_S_LDX_W_LEN,
+	BPF_S_LDX_B_MSH,
+	BPF_S_LDX_IMM,
+	BPF_S_MISC_TAX,
+	BPF_S_MISC_TXA,
+	BPF_S_ALU_DIV_K,
+	BPF_S_LD_MEM,
+	BPF_S_LDX_MEM,
+	BPF_S_ST,
+	BPF_S_STX,
+	BPF_S_JMP_JA,
+	BPF_S_JMP_JEQ_K,
+	BPF_S_JMP_JEQ_X,
+	BPF_S_JMP_JGE_K,
+	BPF_S_JMP_JGE_X,
+	BPF_S_JMP_JGT_K,
+	BPF_S_JMP_JGT_X,
+	BPF_S_JMP_JSET_K,
+	BPF_S_JMP_JSET_X,
+};
+
 /* No hurry in this branch */
 static void *__load_pointer(struct sk_buff *skb, int k)
 {
-- 
cgit v1.2.3-71-gd317


From c5485a7e7569ab32eea240c850198519e2a765ef Mon Sep 17 00:00:00 2001
From: Bruno Randolf <br1@einfach.org>
Date: Tue, 16 Nov 2010 10:58:37 +0900
Subject: lib: Add generic exponentially weighted moving average (EWMA)
 function

This adds generic functions for calculating Exponentially Weighted Moving
Averages (EWMA). This implementation makes use of a structure which keeps the
EWMA parameters and a scaled up internal representation to reduce rounding
errors.

The original idea for this implementation came from the rt2x00 driver
(rt2x00link.c). I would like to use it in several places in the mac80211 and
ath5k code and I hope it can be useful in many other places in the kernel code.

Signed-off-by: Bruno Randolf <br1@einfach.org>
Reviewed-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/average.h | 32 +++++++++++++++++++++++++++
 lib/Kconfig             |  3 +++
 lib/Makefile            |  2 ++
 lib/average.c           | 57 +++++++++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 94 insertions(+)
 create mode 100644 include/linux/average.h
 create mode 100644 lib/average.c

(limited to 'include/linux')

diff --git a/include/linux/average.h b/include/linux/average.h
new file mode 100644
index 000000000000..7706e40f95fa
--- /dev/null
+++ b/include/linux/average.h
@@ -0,0 +1,32 @@
+#ifndef _LINUX_AVERAGE_H
+#define _LINUX_AVERAGE_H
+
+#include <linux/kernel.h>
+
+/* Exponentially weighted moving average (EWMA) */
+
+/* For more documentation see lib/average.c */
+
+struct ewma {
+	unsigned long internal;
+	unsigned long factor;
+	unsigned long weight;
+};
+
+extern void ewma_init(struct ewma *avg, unsigned long factor,
+		      unsigned long weight);
+
+extern struct ewma *ewma_add(struct ewma *avg, unsigned long val);
+
+/**
+ * ewma_read() - Get average value
+ * @avg: Average structure
+ *
+ * Returns the average value held in @avg.
+ */
+static inline unsigned long ewma_read(const struct ewma *avg)
+{
+	return DIV_ROUND_CLOSEST(avg->internal, avg->factor);
+}
+
+#endif /* _LINUX_AVERAGE_H */
diff --git a/lib/Kconfig b/lib/Kconfig
index fa9bf2c06199..3116aa631af6 100644
--- a/lib/Kconfig
+++ b/lib/Kconfig
@@ -210,4 +210,7 @@ config GENERIC_ATOMIC64
 config LRU_CACHE
 	tristate
 
+config AVERAGE
+	bool
+
 endmenu
diff --git a/lib/Makefile b/lib/Makefile
index e6a3763b8212..76d3b8514903 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -106,6 +106,8 @@ obj-$(CONFIG_GENERIC_ATOMIC64) += atomic64.o
 
 obj-$(CONFIG_ATOMIC64_SELFTEST) += atomic64_test.o
 
+obj-$(CONFIG_AVERAGE) += average.o
+
 hostprogs-y	:= gen_crc32table
 clean-files	:= crc32table.h
 
diff --git a/lib/average.c b/lib/average.c
new file mode 100644
index 000000000000..f1d1b4660c42
--- /dev/null
+++ b/lib/average.c
@@ -0,0 +1,57 @@
+/*
+ * lib/average.c
+ *
+ * This source code is licensed under the GNU General Public License,
+ * Version 2.  See the file COPYING for more details.
+ */
+
+#include <linux/module.h>
+#include <linux/average.h>
+#include <linux/bug.h>
+
+/**
+ * DOC: Exponentially Weighted Moving Average (EWMA)
+ *
+ * These are generic functions for calculating Exponentially Weighted Moving
+ * Averages (EWMA). We keep a structure with the EWMA parameters and a scaled
+ * up internal representation of the average value to prevent rounding errors.
+ * The factor for scaling up and the exponential weight (or decay rate) have to
+ * be specified thru the init fuction. The structure should not be accessed
+ * directly but only thru the helper functions.
+ */
+
+/**
+ * ewma_init() - Initialize EWMA parameters
+ * @avg: Average structure
+ * @factor: Factor to use for the scaled up internal value. The maximum value
+ *	of averages can be ULONG_MAX/(factor*weight).
+ * @weight: Exponential weight, or decay rate. This defines how fast the
+ *	influence of older values decreases. Has to be bigger than 1.
+ *
+ * Initialize the EWMA parameters for a given struct ewma @avg.
+ */
+void ewma_init(struct ewma *avg, unsigned long factor, unsigned long weight)
+{
+	WARN_ON(weight <= 1 || factor == 0);
+	avg->internal = 0;
+	avg->weight = weight;
+	avg->factor = factor;
+}
+EXPORT_SYMBOL(ewma_init);
+
+/**
+ * ewma_add() - Exponentially weighted moving average (EWMA)
+ * @avg: Average structure
+ * @val: Current value
+ *
+ * Add a sample to the average.
+ */
+struct ewma *ewma_add(struct ewma *avg, unsigned long val)
+{
+	avg->internal = avg->internal  ?
+		(((avg->internal * (avg->weight - 1)) +
+			(val * avg->factor)) / avg->weight) :
+		(val * avg->factor);
+	return avg;
+}
+EXPORT_SYMBOL(ewma_add);
-- 
cgit v1.2.3-71-gd317


From 86107fd170bc379869250eb7e1bd393a3a70e8ae Mon Sep 17 00:00:00 2001
From: Bruno Randolf <br1@einfach.org>
Date: Tue, 16 Nov 2010 10:58:48 +0900
Subject: nl80211/mac80211: Report signal average

Extend nl80211 to report an exponential weighted moving average (EWMA) of the
signal value. Since the signal value usually fluctuates between different
packets, an average can be more useful than the value of the last packet.

This uses the recently added generic EWMA library function.

Signed-off-by: Bruno Randolf <br1@einfach.org>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/nl80211.h | 2 ++
 include/net/cfg80211.h  | 4 ++++
 net/mac80211/Kconfig    | 1 +
 net/mac80211/cfg.c      | 3 ++-
 net/mac80211/rx.c       | 1 +
 net/mac80211/sta_info.c | 2 ++
 net/mac80211/sta_info.h | 3 +++
 net/wireless/nl80211.c  | 3 +++
 8 files changed, 18 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h
index 037b4e498890..1ce3775e9e26 100644
--- a/include/linux/nl80211.h
+++ b/include/linux/nl80211.h
@@ -1161,6 +1161,7 @@ enum nl80211_rate_info {
  * @__NL80211_STA_INFO_AFTER_LAST: internal
  * @NL80211_STA_INFO_MAX: highest possible station info attribute
  * @NL80211_STA_INFO_SIGNAL: signal strength of last received PPDU (u8, dBm)
+ * @NL80211_STA_INFO_SIGNAL_AVG: signal strength average (u8, dBm)
  * @NL80211_STA_INFO_TX_BITRATE: current unicast tx rate, nested attribute
  * 	containing info as possible, see &enum nl80211_sta_info_txrate.
  * @NL80211_STA_INFO_RX_PACKETS: total received packet (u32, from this station)
@@ -1178,6 +1179,7 @@ enum nl80211_sta_info {
 	NL80211_STA_INFO_PLID,
 	NL80211_STA_INFO_PLINK_STATE,
 	NL80211_STA_INFO_SIGNAL,
+	NL80211_STA_INFO_SIGNAL_AVG,
 	NL80211_STA_INFO_TX_BITRATE,
 	NL80211_STA_INFO_RX_PACKETS,
 	NL80211_STA_INFO_TX_PACKETS,
diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 8fd9eebd0cc9..69e2364889f1 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -424,6 +424,7 @@ struct station_parameters {
  * @STATION_INFO_TX_RETRIES: @tx_retries filled
  * @STATION_INFO_TX_FAILED: @tx_failed filled
  * @STATION_INFO_RX_DROP_MISC: @rx_dropped_misc filled
+ * @STATION_INFO_SIGNAL_AVG: @signal_avg filled
  */
 enum station_info_flags {
 	STATION_INFO_INACTIVE_TIME	= 1<<0,
@@ -439,6 +440,7 @@ enum station_info_flags {
 	STATION_INFO_TX_RETRIES		= 1<<10,
 	STATION_INFO_TX_FAILED		= 1<<11,
 	STATION_INFO_RX_DROP_MISC	= 1<<12,
+	STATION_INFO_SIGNAL_AVG		= 1<<13,
 };
 
 /**
@@ -485,6 +487,7 @@ struct rate_info {
  * @plid: mesh peer link id
  * @plink_state: mesh peer link state
  * @signal: signal strength of last received packet in dBm
+ * @signal_avg: signal strength average in dBm
  * @txrate: current unicast bitrate to this station
  * @rx_packets: packets received from this station
  * @tx_packets: packets transmitted to this station
@@ -505,6 +508,7 @@ struct station_info {
 	u16 plid;
 	u8 plink_state;
 	s8 signal;
+	s8 signal_avg;
 	struct rate_info txrate;
 	u32 rx_packets;
 	u32 tx_packets;
diff --git a/net/mac80211/Kconfig b/net/mac80211/Kconfig
index 4d6f8653ec88..798d9b9462e2 100644
--- a/net/mac80211/Kconfig
+++ b/net/mac80211/Kconfig
@@ -6,6 +6,7 @@ config MAC80211
 	select CRYPTO_ARC4
 	select CRYPTO_AES
 	select CRC32
+	select AVERAGE
 	---help---
 	  This option enables the hardware independent IEEE 802.11
 	  networking stack.
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index 0c544074479e..92c9cf6a7d1c 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -343,8 +343,9 @@ static void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo)
 
 	if ((sta->local->hw.flags & IEEE80211_HW_SIGNAL_DBM) ||
 	    (sta->local->hw.flags & IEEE80211_HW_SIGNAL_UNSPEC)) {
-		sinfo->filled |= STATION_INFO_SIGNAL;
+		sinfo->filled |= STATION_INFO_SIGNAL | STATION_INFO_SIGNAL_AVG;
 		sinfo->signal = (s8)sta->last_signal;
+		sinfo->signal_avg = (s8) -ewma_read(&sta->avg_signal);
 	}
 
 	sinfo->txrate.flags = 0;
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index d2fcd22ab06d..9dd60a74181f 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -1156,6 +1156,7 @@ ieee80211_rx_h_sta_process(struct ieee80211_rx_data *rx)
 	sta->rx_fragments++;
 	sta->rx_bytes += rx->skb->len;
 	sta->last_signal = status->signal;
+	ewma_add(&sta->avg_signal, -status->signal);
 
 	/*
 	 * Change STA power saving mode only at the end of a frame
diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c
index eff58571fd7e..f43fca8907f7 100644
--- a/net/mac80211/sta_info.c
+++ b/net/mac80211/sta_info.c
@@ -244,6 +244,8 @@ struct sta_info *sta_info_alloc(struct ieee80211_sub_if_data *sdata,
 	sta->local = local;
 	sta->sdata = sdata;
 
+	ewma_init(&sta->avg_signal, 1000, 8);
+
 	if (sta_prepare_rate_control(local, sta, gfp)) {
 		kfree(sta);
 		return NULL;
diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h
index 9265acadef32..84062e2c782c 100644
--- a/net/mac80211/sta_info.h
+++ b/net/mac80211/sta_info.h
@@ -13,6 +13,7 @@
 #include <linux/types.h>
 #include <linux/if_ether.h>
 #include <linux/workqueue.h>
+#include <linux/average.h>
 #include "key.h"
 
 /**
@@ -224,6 +225,7 @@ enum plink_state {
  * @rx_fragments: number of received MPDUs
  * @rx_dropped: number of dropped MPDUs from this STA
  * @last_signal: signal of last received frame from this STA
+ * @avg_signal: moving average of signal of received frames from this STA
  * @last_seq_ctrl: last received seq/frag number from this STA (per RX queue)
  * @tx_filtered_count: number of frames the hardware filtered for this STA
  * @tx_retry_failed: number of frames that failed retry
@@ -291,6 +293,7 @@ struct sta_info {
 	unsigned long rx_fragments;
 	unsigned long rx_dropped;
 	int last_signal;
+	struct ewma avg_signal;
 	__le16 last_seq_ctrl[NUM_RX_DATA_QUEUES];
 
 	/* Updated from TX status path only, no locking requirements */
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 605553842226..d06a40d17002 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -1872,6 +1872,9 @@ static int nl80211_send_station(struct sk_buff *msg, u32 pid, u32 seq,
 	if (sinfo->filled & STATION_INFO_SIGNAL)
 		NLA_PUT_U8(msg, NL80211_STA_INFO_SIGNAL,
 			   sinfo->signal);
+	if (sinfo->filled & STATION_INFO_SIGNAL_AVG)
+		NLA_PUT_U8(msg, NL80211_STA_INFO_SIGNAL_AVG,
+			   sinfo->signal_avg);
 	if (sinfo->filled & STATION_INFO_TX_BITRATE) {
 		txrate = nla_nest_start(msg, NL80211_STA_INFO_TX_BITRATE);
 		if (!txrate)
-- 
cgit v1.2.3-71-gd317


From 042957801626465492b9428860de39a3cb2a8219 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Fri, 12 Nov 2010 22:32:11 -0500
Subject: tracing/events: Show real number in array fields

Currently we have in something like the sched_switch event:

  field:char prev_comm[TASK_COMM_LEN];	offset:12;	size:16;	signed:1;

When a userspace tool such as perf tries to parse this, the
TASK_COMM_LEN is meaningless. This is done because the TRACE_EVENT() macro
simply uses a #len to show the string of the length. When the length is
an enum, we get a string that means nothing for tools.

By adding a static buffer and a mutex to protect it, we can store the
string into that buffer with snprintf and show the actual number.
Now we get:

  field:char prev_comm[16];       offset:12;      size:16;        signed:1;

Something much more useful.

Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/linux/ftrace_event.h |  4 ++++
 include/trace/ftrace.h       | 14 ++++++++++----
 kernel/trace/trace_events.c  |  6 ++++++
 kernel/trace/trace_export.c  | 14 ++++++++++----
 4 files changed, 30 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h
index 725bf6bd39f7..47e3997f7b5c 100644
--- a/include/linux/ftrace_event.h
+++ b/include/linux/ftrace_event.h
@@ -225,6 +225,10 @@ enum {
 	FILTER_PTR_STRING,
 };
 
+#define EVENT_STORAGE_SIZE 128
+extern struct mutex event_storage_mutex;
+extern char event_storage[EVENT_STORAGE_SIZE];
+
 extern int trace_event_raw_init(struct ftrace_event_call *call);
 extern int trace_define_field(struct ftrace_event_call *call, const char *type,
 			      const char *name, int offset, int size,
diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h
index e718a917d897..e16610c208c9 100644
--- a/include/trace/ftrace.h
+++ b/include/trace/ftrace.h
@@ -296,13 +296,19 @@ static struct trace_event_functions ftrace_event_type_funcs_##call = {	\
 
 #undef __array
 #define __array(type, item, len)					\
-	BUILD_BUG_ON(len > MAX_FILTER_STR_VAL);				\
-	ret = trace_define_field(event_call, #type "[" #len "]", #item,	\
+	do {								\
+		mutex_lock(&event_storage_mutex);			\
+		BUILD_BUG_ON(len > MAX_FILTER_STR_VAL);			\
+		snprintf(event_storage, sizeof(event_storage),		\
+			 "%s[%d]", #type, len);				\
+		ret = trace_define_field(event_call, event_storage, #item, \
 				 offsetof(typeof(field), item),		\
 				 sizeof(field.item),			\
 				 is_signed_type(type), FILTER_OTHER);	\
-	if (ret)							\
-		return ret;
+		mutex_unlock(&event_storage_mutex);			\
+		if (ret)						\
+			return ret;					\
+	} while (0);
 
 #undef __dynamic_array
 #define __dynamic_array(type, item, len)				       \
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index 0725eeab1937..35fde09b81de 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -27,6 +27,12 @@
 
 DEFINE_MUTEX(event_mutex);
 
+DEFINE_MUTEX(event_storage_mutex);
+EXPORT_SYMBOL_GPL(event_storage_mutex);
+
+char event_storage[EVENT_STORAGE_SIZE];
+EXPORT_SYMBOL_GPL(event_storage);
+
 LIST_HEAD(ftrace_events);
 LIST_HEAD(ftrace_common_fields);
 
diff --git a/kernel/trace/trace_export.c b/kernel/trace/trace_export.c
index 4ba44deaac25..4b74d71705c0 100644
--- a/kernel/trace/trace_export.c
+++ b/kernel/trace/trace_export.c
@@ -83,13 +83,19 @@ static void __always_unused ____ftrace_check_##name(void)	\
 
 #undef __array
 #define __array(type, item, len)					\
-	BUILD_BUG_ON(len > MAX_FILTER_STR_VAL);				\
-	ret = trace_define_field(event_call, #type "[" #len "]", #item,	\
+	do {								\
+		BUILD_BUG_ON(len > MAX_FILTER_STR_VAL);			\
+		mutex_lock(&event_storage_mutex);			\
+		snprintf(event_storage, sizeof(event_storage),		\
+			 "%s[%d]", #type, len);				\
+		ret = trace_define_field(event_call, event_storage, #item, \
 				 offsetof(typeof(field), item),		\
 				 sizeof(field.item),			\
 				 is_signed_type(type), FILTER_OTHER);	\
-	if (ret)							\
-		return ret;
+		mutex_unlock(&event_storage_mutex);			\
+		if (ret)						\
+			return ret;					\
+	} while (0);
 
 #undef __array_desc
 #define __array_desc(type, container, item, len)			\
-- 
cgit v1.2.3-71-gd317


From 93aaae2e01e57483256b7da05c9a7ebd65ad4686 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Fri, 19 Nov 2010 09:49:59 -0800
Subject: filter: optimize sk_run_filter

Remove pc variable to avoid arithmetic to compute fentry at each filter
instruction. Jumps directly manipulate fentry pointer.

As the last instruction of filter[] is guaranteed to be a RETURN, and
all jumps are before the last instruction, we dont need to check filter
bounds (number of instructions in filter array) at each iteration, so we
remove it from sk_run_filter() params.

On x86_32 remove f_k var introduced in commit 57fe93b374a6b871
(filter: make sure filters dont read uninitialized memory)

Note : We could use a CONFIG_ARCH_HAS_{FEW|MANY}_REGISTERS in order to
avoid too many ifdefs in this code.

This helps compiler to use cpu registers to hold fentry and A
accumulator.

On x86_32, this saves 401 bytes, and more important, sk_run_filter()
runs much faster because less register pressure (One less conditional
branch per BPF instruction)

# size net/core/filter.o net/core/filter_pre.o
   text    data     bss     dec     hex filename
   2948       0       0    2948     b84 net/core/filter.o
   3349       0       0    3349     d15 net/core/filter_pre.o

on x86_64 :
# size net/core/filter.o net/core/filter_pre.o
   text    data     bss     dec     hex filename
   5173       0       0    5173    1435 net/core/filter.o
   5224       0       0    5224    1468 net/core/filter_pre.o

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Acked-by: Changli Gao <xiaosuo@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/isdn/i4l/isdn_ppp.c | 14 +++----
 drivers/net/ppp_generic.c   | 12 ++----
 include/linux/filter.h      |  2 +-
 net/core/filter.c           | 93 +++++++++++++++++++++++----------------------
 net/core/timestamping.c     |  2 +-
 net/packet/af_packet.c      |  2 +-
 6 files changed, 61 insertions(+), 64 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/isdn/i4l/isdn_ppp.c b/drivers/isdn/i4l/isdn_ppp.c
index 97c5cc2997f5..9e8162c80bb0 100644
--- a/drivers/isdn/i4l/isdn_ppp.c
+++ b/drivers/isdn/i4l/isdn_ppp.c
@@ -1147,15 +1147,14 @@ isdn_ppp_push_higher(isdn_net_dev * net_dev, isdn_net_local * lp, struct sk_buff
 	}
 
 	if (is->pass_filter
-	    && sk_run_filter(skb, is->pass_filter, is->pass_len) == 0) {
+	    && sk_run_filter(skb, is->pass_filter) == 0) {
 		if (is->debug & 0x2)
 			printk(KERN_DEBUG "IPPP: inbound frame filtered.\n");
 		kfree_skb(skb);
 		return;
 	}
 	if (!(is->active_filter
-	      && sk_run_filter(skb, is->active_filter,
-	                       is->active_len) == 0)) {
+	      && sk_run_filter(skb, is->active_filter) == 0)) {
 		if (is->debug & 0x2)
 			printk(KERN_DEBUG "IPPP: link-active filter: reseting huptimer.\n");
 		lp->huptimer = 0;
@@ -1294,15 +1293,14 @@ isdn_ppp_xmit(struct sk_buff *skb, struct net_device *netdev)
 	}
 
 	if (ipt->pass_filter
-	    && sk_run_filter(skb, ipt->pass_filter, ipt->pass_len) == 0) {
+	    && sk_run_filter(skb, ipt->pass_filter) == 0) {
 		if (ipt->debug & 0x4)
 			printk(KERN_DEBUG "IPPP: outbound frame filtered.\n");
 		kfree_skb(skb);
 		goto unlock;
 	}
 	if (!(ipt->active_filter
-	      && sk_run_filter(skb, ipt->active_filter,
-		               ipt->active_len) == 0)) {
+	      && sk_run_filter(skb, ipt->active_filter) == 0)) {
 		if (ipt->debug & 0x4)
 			printk(KERN_DEBUG "IPPP: link-active filter: reseting huptimer.\n");
 		lp->huptimer = 0;
@@ -1492,9 +1490,9 @@ int isdn_ppp_autodial_filter(struct sk_buff *skb, isdn_net_local *lp)
 	}
 	
 	drop |= is->pass_filter
-	        && sk_run_filter(skb, is->pass_filter, is->pass_len) == 0;
+	        && sk_run_filter(skb, is->pass_filter) == 0;
 	drop |= is->active_filter
-	        && sk_run_filter(skb, is->active_filter, is->active_len) == 0;
+	        && sk_run_filter(skb, is->active_filter) == 0;
 	
 	skb_push(skb, IPPP_MAX_HEADER - 4);
 	return drop;
diff --git a/drivers/net/ppp_generic.c b/drivers/net/ppp_generic.c
index 09cf56d0416a..0c91598ae280 100644
--- a/drivers/net/ppp_generic.c
+++ b/drivers/net/ppp_generic.c
@@ -1136,8 +1136,7 @@ ppp_send_frame(struct ppp *ppp, struct sk_buff *skb)
 		   a four-byte PPP header on each packet */
 		*skb_push(skb, 2) = 1;
 		if (ppp->pass_filter &&
-		    sk_run_filter(skb, ppp->pass_filter,
-				  ppp->pass_len) == 0) {
+		    sk_run_filter(skb, ppp->pass_filter) == 0) {
 			if (ppp->debug & 1)
 				printk(KERN_DEBUG "PPP: outbound frame not passed\n");
 			kfree_skb(skb);
@@ -1145,8 +1144,7 @@ ppp_send_frame(struct ppp *ppp, struct sk_buff *skb)
 		}
 		/* if this packet passes the active filter, record the time */
 		if (!(ppp->active_filter &&
-		      sk_run_filter(skb, ppp->active_filter,
-				    ppp->active_len) == 0))
+		      sk_run_filter(skb, ppp->active_filter) == 0))
 			ppp->last_xmit = jiffies;
 		skb_pull(skb, 2);
 #else
@@ -1758,8 +1756,7 @@ ppp_receive_nonmp_frame(struct ppp *ppp, struct sk_buff *skb)
 
 			*skb_push(skb, 2) = 0;
 			if (ppp->pass_filter &&
-			    sk_run_filter(skb, ppp->pass_filter,
-					  ppp->pass_len) == 0) {
+			    sk_run_filter(skb, ppp->pass_filter) == 0) {
 				if (ppp->debug & 1)
 					printk(KERN_DEBUG "PPP: inbound frame "
 					       "not passed\n");
@@ -1767,8 +1764,7 @@ ppp_receive_nonmp_frame(struct ppp *ppp, struct sk_buff *skb)
 				return;
 			}
 			if (!(ppp->active_filter &&
-			      sk_run_filter(skb, ppp->active_filter,
-					    ppp->active_len) == 0))
+			      sk_run_filter(skb, ppp->active_filter) == 0))
 				ppp->last_recv = jiffies;
 			__skb_pull(skb, 2);
 		} else
diff --git a/include/linux/filter.h b/include/linux/filter.h
index 151f5d703b7e..447a775878fb 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -147,7 +147,7 @@ struct sock;
 
 extern int sk_filter(struct sock *sk, struct sk_buff *skb);
 extern unsigned int sk_run_filter(struct sk_buff *skb,
-				  struct sock_filter *filter, int flen);
+				  const struct sock_filter *filter);
 extern int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk);
 extern int sk_detach_filter(struct sock *sk);
 extern int sk_chk_filter(struct sock_filter *filter, int flen);
diff --git a/net/core/filter.c b/net/core/filter.c
index 15a545d39cd3..9e77b3c816c5 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -137,7 +137,7 @@ int sk_filter(struct sock *sk, struct sk_buff *skb)
 	rcu_read_lock_bh();
 	filter = rcu_dereference_bh(sk->sk_filter);
 	if (filter) {
-		unsigned int pkt_len = sk_run_filter(skb, filter->insns, filter->len);
+		unsigned int pkt_len = sk_run_filter(skb, filter->insns);
 
 		err = pkt_len ? pskb_trim(skb, pkt_len) : -EPERM;
 	}
@@ -151,14 +151,15 @@ EXPORT_SYMBOL(sk_filter);
  *	sk_run_filter - run a filter on a socket
  *	@skb: buffer to run the filter on
  *	@filter: filter to apply
- *	@flen: length of filter
  *
  * Decode and apply filter instructions to the skb->data.
- * Return length to keep, 0 for none. skb is the data we are
- * filtering, filter is the array of filter instructions, and
- * len is the number of filter blocks in the array.
+ * Return length to keep, 0 for none. @skb is the data we are
+ * filtering, @filter is the array of filter instructions.
+ * Because all jumps are guaranteed to be before last instruction,
+ * and last instruction guaranteed to be a RET, we dont need to check
+ * flen. (We used to pass to this function the length of filter)
  */
-unsigned int sk_run_filter(struct sk_buff *skb, struct sock_filter *filter, int flen)
+unsigned int sk_run_filter(struct sk_buff *skb, const struct sock_filter *fentry)
 {
 	void *ptr;
 	u32 A = 0;			/* Accumulator */
@@ -167,34 +168,36 @@ unsigned int sk_run_filter(struct sk_buff *skb, struct sock_filter *filter, int
 	unsigned long memvalid = 0;
 	u32 tmp;
 	int k;
-	int pc;
 
 	BUILD_BUG_ON(BPF_MEMWORDS > BITS_PER_LONG);
 	/*
 	 * Process array of filter instructions.
 	 */
-	for (pc = 0; pc < flen; pc++) {
-		const struct sock_filter *fentry = &filter[pc];
-		u32 f_k = fentry->k;
+	for (;; fentry++) {
+#if defined(CONFIG_X86_32)
+#define	K (fentry->k)
+#else
+		const u32 K = fentry->k;
+#endif
 
 		switch (fentry->code) {
 		case BPF_S_ALU_ADD_X:
 			A += X;
 			continue;
 		case BPF_S_ALU_ADD_K:
-			A += f_k;
+			A += K;
 			continue;
 		case BPF_S_ALU_SUB_X:
 			A -= X;
 			continue;
 		case BPF_S_ALU_SUB_K:
-			A -= f_k;
+			A -= K;
 			continue;
 		case BPF_S_ALU_MUL_X:
 			A *= X;
 			continue;
 		case BPF_S_ALU_MUL_K:
-			A *= f_k;
+			A *= K;
 			continue;
 		case BPF_S_ALU_DIV_X:
 			if (X == 0)
@@ -202,64 +205,64 @@ unsigned int sk_run_filter(struct sk_buff *skb, struct sock_filter *filter, int
 			A /= X;
 			continue;
 		case BPF_S_ALU_DIV_K:
-			A /= f_k;
+			A /= K;
 			continue;
 		case BPF_S_ALU_AND_X:
 			A &= X;
 			continue;
 		case BPF_S_ALU_AND_K:
-			A &= f_k;
+			A &= K;
 			continue;
 		case BPF_S_ALU_OR_X:
 			A |= X;
 			continue;
 		case BPF_S_ALU_OR_K:
-			A |= f_k;
+			A |= K;
 			continue;
 		case BPF_S_ALU_LSH_X:
 			A <<= X;
 			continue;
 		case BPF_S_ALU_LSH_K:
-			A <<= f_k;
+			A <<= K;
 			continue;
 		case BPF_S_ALU_RSH_X:
 			A >>= X;
 			continue;
 		case BPF_S_ALU_RSH_K:
-			A >>= f_k;
+			A >>= K;
 			continue;
 		case BPF_S_ALU_NEG:
 			A = -A;
 			continue;
 		case BPF_S_JMP_JA:
-			pc += f_k;
+			fentry += K;
 			continue;
 		case BPF_S_JMP_JGT_K:
-			pc += (A > f_k) ? fentry->jt : fentry->jf;
+			fentry += (A > K) ? fentry->jt : fentry->jf;
 			continue;
 		case BPF_S_JMP_JGE_K:
-			pc += (A >= f_k) ? fentry->jt : fentry->jf;
+			fentry += (A >= K) ? fentry->jt : fentry->jf;
 			continue;
 		case BPF_S_JMP_JEQ_K:
-			pc += (A == f_k) ? fentry->jt : fentry->jf;
+			fentry += (A == K) ? fentry->jt : fentry->jf;
 			continue;
 		case BPF_S_JMP_JSET_K:
-			pc += (A & f_k) ? fentry->jt : fentry->jf;
+			fentry += (A & K) ? fentry->jt : fentry->jf;
 			continue;
 		case BPF_S_JMP_JGT_X:
-			pc += (A > X) ? fentry->jt : fentry->jf;
+			fentry += (A > X) ? fentry->jt : fentry->jf;
 			continue;
 		case BPF_S_JMP_JGE_X:
-			pc += (A >= X) ? fentry->jt : fentry->jf;
+			fentry += (A >= X) ? fentry->jt : fentry->jf;
 			continue;
 		case BPF_S_JMP_JEQ_X:
-			pc += (A == X) ? fentry->jt : fentry->jf;
+			fentry += (A == X) ? fentry->jt : fentry->jf;
 			continue;
 		case BPF_S_JMP_JSET_X:
-			pc += (A & X) ? fentry->jt : fentry->jf;
+			fentry += (A & X) ? fentry->jt : fentry->jf;
 			continue;
 		case BPF_S_LD_W_ABS:
-			k = f_k;
+			k = K;
 load_w:
 			ptr = load_pointer(skb, k, 4, &tmp);
 			if (ptr != NULL) {
@@ -268,7 +271,7 @@ load_w:
 			}
 			break;
 		case BPF_S_LD_H_ABS:
-			k = f_k;
+			k = K;
 load_h:
 			ptr = load_pointer(skb, k, 2, &tmp);
 			if (ptr != NULL) {
@@ -277,7 +280,7 @@ load_h:
 			}
 			break;
 		case BPF_S_LD_B_ABS:
-			k = f_k;
+			k = K;
 load_b:
 			ptr = load_pointer(skb, k, 1, &tmp);
 			if (ptr != NULL) {
@@ -292,34 +295,34 @@ load_b:
 			X = skb->len;
 			continue;
 		case BPF_S_LD_W_IND:
-			k = X + f_k;
+			k = X + K;
 			goto load_w;
 		case BPF_S_LD_H_IND:
-			k = X + f_k;
+			k = X + K;
 			goto load_h;
 		case BPF_S_LD_B_IND:
-			k = X + f_k;
+			k = X + K;
 			goto load_b;
 		case BPF_S_LDX_B_MSH:
-			ptr = load_pointer(skb, f_k, 1, &tmp);
+			ptr = load_pointer(skb, K, 1, &tmp);
 			if (ptr != NULL) {
 				X = (*(u8 *)ptr & 0xf) << 2;
 				continue;
 			}
 			return 0;
 		case BPF_S_LD_IMM:
-			A = f_k;
+			A = K;
 			continue;
 		case BPF_S_LDX_IMM:
-			X = f_k;
+			X = K;
 			continue;
 		case BPF_S_LD_MEM:
-			A = (memvalid & (1UL << f_k)) ?
-				mem[f_k] : 0;
+			A = (memvalid & (1UL << K)) ?
+				mem[K] : 0;
 			continue;
 		case BPF_S_LDX_MEM:
-			X = (memvalid & (1UL << f_k)) ?
-				mem[f_k] : 0;
+			X = (memvalid & (1UL << K)) ?
+				mem[K] : 0;
 			continue;
 		case BPF_S_MISC_TAX:
 			X = A;
@@ -328,16 +331,16 @@ load_b:
 			A = X;
 			continue;
 		case BPF_S_RET_K:
-			return f_k;
+			return K;
 		case BPF_S_RET_A:
 			return A;
 		case BPF_S_ST:
-			memvalid |= 1UL << f_k;
-			mem[f_k] = A;
+			memvalid |= 1UL << K;
+			mem[K] = A;
 			continue;
 		case BPF_S_STX:
-			memvalid |= 1UL << f_k;
-			mem[f_k] = X;
+			memvalid |= 1UL << K;
+			mem[K] = X;
 			continue;
 		default:
 			WARN_ON(1);
diff --git a/net/core/timestamping.c b/net/core/timestamping.c
index 0ae6c22da85b..dac7ed687f60 100644
--- a/net/core/timestamping.c
+++ b/net/core/timestamping.c
@@ -31,7 +31,7 @@ static unsigned int classify(struct sk_buff *skb)
 	if (likely(skb->dev &&
 		   skb->dev->phydev &&
 		   skb->dev->phydev->drv))
-		return sk_run_filter(skb, ptp_filter, ARRAY_SIZE(ptp_filter));
+		return sk_run_filter(skb, ptp_filter);
 	else
 		return PTP_CLASS_NONE;
 }
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 20964560a0ed..b6372dd128d7 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -519,7 +519,7 @@ static inline unsigned int run_filter(struct sk_buff *skb, struct sock *sk,
 	rcu_read_lock_bh();
 	filter = rcu_dereference_bh(sk->sk_filter);
 	if (filter != NULL)
-		res = sk_run_filter(skb, filter->insns, filter->len);
+		res = sk_run_filter(skb, filter->insns);
 	rcu_read_unlock_bh();
 
 	return res;
-- 
cgit v1.2.3-71-gd317


From ecf7ace9a8450303a987aa8364e53860cd50e554 Mon Sep 17 00:00:00 2001
From: Thomas Hellstrom <thellstrom@vmware.com>
Date: Tue, 16 Nov 2010 15:21:07 +0100
Subject: kref: Add a kref_sub function

Makes it possible to optimize batched multiple unrefs.
Initial user will be drivers/gpu/ttm which accumulates unrefs to be
processed outside of atomic code.

Signed-off-by: Thomas Hellstrom <thellstrom@vmware.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 include/linux/kref.h |  2 ++
 lib/kref.c           | 30 ++++++++++++++++++++++++++++++
 2 files changed, 32 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/kref.h b/include/linux/kref.h
index 6cc38fc07ab7..d4a62ab2ee5e 100644
--- a/include/linux/kref.h
+++ b/include/linux/kref.h
@@ -24,5 +24,7 @@ struct kref {
 void kref_init(struct kref *kref);
 void kref_get(struct kref *kref);
 int kref_put(struct kref *kref, void (*release) (struct kref *kref));
+int kref_sub(struct kref *kref, unsigned int count,
+	     void (*release) (struct kref *kref));
 
 #endif /* _KREF_H_ */
diff --git a/lib/kref.c b/lib/kref.c
index d3d227a08a4b..3efb882b11db 100644
--- a/lib/kref.c
+++ b/lib/kref.c
@@ -62,6 +62,36 @@ int kref_put(struct kref *kref, void (*release)(struct kref *kref))
 	return 0;
 }
 
+
+/**
+ * kref_sub - subtract a number of refcounts for object.
+ * @kref: object.
+ * @count: Number of recounts to subtract.
+ * @release: pointer to the function that will clean up the object when the
+ *	     last reference to the object is released.
+ *	     This pointer is required, and it is not acceptable to pass kfree
+ *	     in as this function.
+ *
+ * Subtract @count from the refcount, and if 0, call release().
+ * Return 1 if the object was removed, otherwise return 0.  Beware, if this
+ * function returns 0, you still can not count on the kref from remaining in
+ * memory.  Only use the return value if you want to see if the kref is now
+ * gone, not present.
+ */
+int kref_sub(struct kref *kref, unsigned int count,
+	     void (*release)(struct kref *kref))
+{
+	WARN_ON(release == NULL);
+	WARN_ON(release == (void (*)(struct kref *))kfree);
+
+	if (atomic_sub_and_test((int) count, &kref->refcount)) {
+		release(kref);
+		return 1;
+	}
+	return 0;
+}
+
 EXPORT_SYMBOL(kref_init);
 EXPORT_SYMBOL(kref_get);
 EXPORT_SYMBOL(kref_put);
+EXPORT_SYMBOL(kref_sub);
-- 
cgit v1.2.3-71-gd317


From eb06acdc85585f28864261f28659157848762ee4 Mon Sep 17 00:00:00 2001
From: Sridhar Samudrala <sri@us.ibm.com>
Date: Thu, 28 Oct 2010 13:10:50 +0000
Subject: macvlan: Introduce 'passthru' mode to takeover the underlying device

With the current default 'vepa' mode, a KVM guest using virtio with
macvtap backend has the following limitations.
- cannot change/add a mac address on the guest virtio-net
- cannot create a vlan device on the guest virtio-net
- cannot enable promiscuous mode on guest virtio-net

To address these limitations, this patch introduces a new mode called
'passthru' when creating a macvlan device which allows takeover of the
underlying device and passing it to a guest using virtio with macvtap
backend.

Only one macvlan device is allowed in passthru mode and it inherits
the mac address from the underlying device and sets it in promiscuous
mode to receive and forward all the packets.

Signed-off-by: Sridhar Samudrala <sri@us.ibm.com>

-------------------------------------------------------------------------
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/macvlan.c   | 33 ++++++++++++++++++++++++++++++++-
 include/linux/if_link.h |  1 +
 2 files changed, 33 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c
index 93f0ba25c808..6ed577b065df 100644
--- a/drivers/net/macvlan.c
+++ b/drivers/net/macvlan.c
@@ -38,6 +38,7 @@ struct macvlan_port {
 	struct hlist_head	vlan_hash[MACVLAN_HASH_SIZE];
 	struct list_head	vlans;
 	struct rcu_head		rcu;
+	bool 			passthru;
 };
 
 #define macvlan_port_get_rcu(dev) \
@@ -169,6 +170,7 @@ static struct sk_buff *macvlan_handle_frame(struct sk_buff *skb)
 			macvlan_broadcast(skb, port, NULL,
 					  MACVLAN_MODE_PRIVATE |
 					  MACVLAN_MODE_VEPA    |
+					  MACVLAN_MODE_PASSTHRU|
 					  MACVLAN_MODE_BRIDGE);
 		else if (src->mode == MACVLAN_MODE_VEPA)
 			/* flood to everyone except source */
@@ -185,7 +187,10 @@ static struct sk_buff *macvlan_handle_frame(struct sk_buff *skb)
 		return skb;
 	}
 
-	vlan = macvlan_hash_lookup(port, eth->h_dest);
+	if (port->passthru)
+		vlan = list_first_entry(&port->vlans, struct macvlan_dev, list);
+	else
+		vlan = macvlan_hash_lookup(port, eth->h_dest);
 	if (vlan == NULL)
 		return skb;
 
@@ -288,6 +293,11 @@ static int macvlan_open(struct net_device *dev)
 	struct net_device *lowerdev = vlan->lowerdev;
 	int err;
 
+	if (vlan->port->passthru) {
+		dev_set_promiscuity(lowerdev, 1);
+		goto hash_add;
+	}
+
 	err = -EBUSY;
 	if (macvlan_addr_busy(vlan->port, dev->dev_addr))
 		goto out;
@@ -300,6 +310,8 @@ static int macvlan_open(struct net_device *dev)
 		if (err < 0)
 			goto del_unicast;
 	}
+
+hash_add:
 	macvlan_hash_add(vlan);
 	return 0;
 
@@ -314,12 +326,18 @@ static int macvlan_stop(struct net_device *dev)
 	struct macvlan_dev *vlan = netdev_priv(dev);
 	struct net_device *lowerdev = vlan->lowerdev;
 
+	if (vlan->port->passthru) {
+		dev_set_promiscuity(lowerdev, -1);
+		goto hash_del;
+	}
+
 	dev_mc_unsync(lowerdev, dev);
 	if (dev->flags & IFF_ALLMULTI)
 		dev_set_allmulti(lowerdev, -1);
 
 	dev_uc_del(lowerdev, dev->dev_addr);
 
+hash_del:
 	macvlan_hash_del(vlan);
 	return 0;
 }
@@ -559,6 +577,7 @@ static int macvlan_port_create(struct net_device *dev)
 	if (port == NULL)
 		return -ENOMEM;
 
+	port->passthru = false;
 	port->dev = dev;
 	INIT_LIST_HEAD(&port->vlans);
 	for (i = 0; i < MACVLAN_HASH_SIZE; i++)
@@ -603,6 +622,7 @@ static int macvlan_validate(struct nlattr *tb[], struct nlattr *data[])
 		case MACVLAN_MODE_PRIVATE:
 		case MACVLAN_MODE_VEPA:
 		case MACVLAN_MODE_BRIDGE:
+		case MACVLAN_MODE_PASSTHRU:
 			break;
 		default:
 			return -EINVAL;
@@ -652,6 +672,10 @@ int macvlan_common_newlink(struct net *src_net, struct net_device *dev,
 	}
 	port = macvlan_port_get(lowerdev);
 
+	/* Only 1 macvlan device can be created in passthru mode */
+	if (port->passthru)
+		return -EINVAL;
+
 	vlan->lowerdev = lowerdev;
 	vlan->dev      = dev;
 	vlan->port     = port;
@@ -662,6 +686,13 @@ int macvlan_common_newlink(struct net *src_net, struct net_device *dev,
 	if (data && data[IFLA_MACVLAN_MODE])
 		vlan->mode = nla_get_u32(data[IFLA_MACVLAN_MODE]);
 
+	if (vlan->mode == MACVLAN_MODE_PASSTHRU) {
+		if (!list_empty(&port->vlans))
+			return -EINVAL;
+		port->passthru = true;
+		memcpy(dev->dev_addr, lowerdev->dev_addr, ETH_ALEN);
+	}
+
 	err = register_netdevice(dev);
 	if (err < 0)
 		goto destroy_port;
diff --git a/include/linux/if_link.h b/include/linux/if_link.h
index 2e02e4d7b11e..6485d2a89bec 100644
--- a/include/linux/if_link.h
+++ b/include/linux/if_link.h
@@ -259,6 +259,7 @@ enum macvlan_mode {
 	MACVLAN_MODE_PRIVATE = 1, /* don't talk to other macvlans */
 	MACVLAN_MODE_VEPA    = 2, /* talk to other ports through ext bridge */
 	MACVLAN_MODE_BRIDGE  = 4, /* talk to bridge ports directly */
+	MACVLAN_MODE_PASSTHRU = 8,/* take over the underlying device */
 };
 
 /* SR-IOV virtual function management section */
-- 
cgit v1.2.3-71-gd317


From 23ed992a5ebe6964ebe312b54142fbc5e8185cdc Mon Sep 17 00:00:00 2001
From: Daniel Vetter <daniel.vetter@ffwll.ch>
Date: Fri, 5 Nov 2010 18:04:52 +0100
Subject: drm/i915|intel-gtt: consolidate intel-gtt.h headers

... and a few other defines.

Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/char/agp/intel-gtt.c    |  5 -----
 drivers/gpu/drm/i915/i915_gem.c |  1 -
 include/drm/intel-gtt.h         | 12 ++++++++++++
 include/linux/intel-gtt.h       | 20 --------------------
 4 files changed, 12 insertions(+), 26 deletions(-)
 delete mode 100644 include/linux/intel-gtt.h

(limited to 'include/linux')

diff --git a/drivers/char/agp/intel-gtt.c b/drivers/char/agp/intel-gtt.c
index 72267c801637..291ac5113576 100644
--- a/drivers/char/agp/intel-gtt.c
+++ b/drivers/char/agp/intel-gtt.c
@@ -24,7 +24,6 @@
 #include <asm/smp.h>
 #include "agp.h"
 #include "intel-agp.h"
-#include <linux/intel-gtt.h>
 #include <drm/intel-gtt.h>
 
 /*
@@ -39,10 +38,6 @@
 #define USE_PCI_DMA_API 0
 #endif
 
-#define AGP_DCACHE_MEMORY	1
-#define AGP_PHYS_MEMORY		2
-#define INTEL_AGP_CACHED_MEMORY 3
-
 struct intel_gtt_driver {
 	unsigned int gen : 8;
 	unsigned int is_g33 : 1;
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index bf05ac414b1f..68492357658c 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -34,7 +34,6 @@
 #include <linux/slab.h>
 #include <linux/swap.h>
 #include <linux/pci.h>
-#include <linux/intel-gtt.h>
 
 struct change_domains {
 	uint32_t invalidate_domains;
diff --git a/include/drm/intel-gtt.h b/include/drm/intel-gtt.h
index 020f8aab7e5b..9f91cbe35157 100644
--- a/include/drm/intel-gtt.h
+++ b/include/drm/intel-gtt.h
@@ -13,5 +13,17 @@ const struct intel_gtt {
 	unsigned int gtt_mappable_entries;
 } *intel_gtt_get(void);
 
+
+/* Special gtt memory types */
+#define AGP_DCACHE_MEMORY	1
+#define AGP_PHYS_MEMORY		2
+
+/* New caching attributes for gen6/sandybridge */
+#define AGP_USER_CACHED_MEMORY_LLC_MLC (AGP_USER_TYPES + 2)
+#define AGP_USER_UNCACHED_MEMORY (AGP_USER_TYPES + 4)
+
+/* flag for GFDT type */
+#define AGP_USER_CACHED_MEMORY_GFDT (1 << 3)
+
 #endif
 
diff --git a/include/linux/intel-gtt.h b/include/linux/intel-gtt.h
deleted file mode 100644
index 1d19ab2afa39..000000000000
--- a/include/linux/intel-gtt.h
+++ /dev/null
@@ -1,20 +0,0 @@
-/*
- * Common Intel AGPGART and GTT definitions.
- */
-#ifndef _INTEL_GTT_H
-#define _INTEL_GTT_H
-
-#include <linux/agp_backend.h>
-
-/* This is for Intel only GTT controls.
- *
- * Sandybridge: AGP_USER_CACHED_MEMORY default to LLC only
- */
-
-#define AGP_USER_CACHED_MEMORY_LLC_MLC (AGP_USER_TYPES + 2)
-#define AGP_USER_UNCACHED_MEMORY (AGP_USER_TYPES + 4)
-
-/* flag for GFDT type */
-#define AGP_USER_CACHED_MEMORY_GFDT (1 << 3)
-
-#endif
-- 
cgit v1.2.3-71-gd317


From f050a8abbda0efcd597c6b1825e3b9ce4d613383 Mon Sep 17 00:00:00 2001
From: Daniel Vetter <daniel.vetter@ffwll.ch>
Date: Fri, 5 Nov 2010 18:40:56 +0100
Subject: agp: kill agp_flush_chipset and corresponding ioctl

The intel drm calls the chipset functions now directly. Userspace
never called the corresponding ioctl, hence it can be killed, too.

Cc: Dave Airlie <airlied@gmail.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/char/agp/agp.h          | 1 -
 drivers/char/agp/compat_ioctl.c | 1 -
 drivers/char/agp/compat_ioctl.h | 1 -
 drivers/char/agp/frontend.c     | 8 --------
 drivers/char/agp/generic.c      | 7 -------
 drivers/char/agp/intel-gtt.c    | 6 ------
 include/linux/agp_backend.h     | 1 -
 7 files changed, 25 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/char/agp/agp.h b/drivers/char/agp/agp.h
index 5259065f3c79..3e67ddde9e16 100644
--- a/drivers/char/agp/agp.h
+++ b/drivers/char/agp/agp.h
@@ -120,7 +120,6 @@ struct agp_bridge_driver {
 	void (*agp_destroy_page)(struct page *, int flags);
 	void (*agp_destroy_pages)(struct agp_memory *);
 	int (*agp_type_to_mask_type) (struct agp_bridge_data *, int);
-	void (*chipset_flush)(struct agp_bridge_data *);
 };
 
 struct agp_bridge_data {
diff --git a/drivers/char/agp/compat_ioctl.c b/drivers/char/agp/compat_ioctl.c
index 9d2c97a69cdd..a48e05b31593 100644
--- a/drivers/char/agp/compat_ioctl.c
+++ b/drivers/char/agp/compat_ioctl.c
@@ -276,7 +276,6 @@ long compat_agp_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 		break;
 
 	case AGPIOC_CHIPSET_FLUSH32:
-		ret_val = agpioc_chipset_flush_wrap(curr_priv);
 		break;
 	}
 
diff --git a/drivers/char/agp/compat_ioctl.h b/drivers/char/agp/compat_ioctl.h
index 0c9678ac0371..f30e0fd97963 100644
--- a/drivers/char/agp/compat_ioctl.h
+++ b/drivers/char/agp/compat_ioctl.h
@@ -102,6 +102,5 @@ void agp_free_memory_wrap(struct agp_memory *memory);
 struct agp_memory *agp_allocate_memory_wrap(size_t pg_count, u32 type);
 struct agp_memory *agp_find_mem_by_key(int key);
 struct agp_client *agp_find_client_by_pid(pid_t id);
-int agpioc_chipset_flush_wrap(struct agp_file_private *priv);
 
 #endif /* _AGP_COMPAT_H */
diff --git a/drivers/char/agp/frontend.c b/drivers/char/agp/frontend.c
index 3cb4539a98b2..2e044338753c 100644
--- a/drivers/char/agp/frontend.c
+++ b/drivers/char/agp/frontend.c
@@ -957,13 +957,6 @@ static int agpioc_unbind_wrap(struct agp_file_private *priv, void __user *arg)
 	return agp_unbind_memory(memory);
 }
 
-int agpioc_chipset_flush_wrap(struct agp_file_private *priv)
-{
-	DBG("");
-	agp_flush_chipset(agp_bridge);
-	return 0;
-}
-
 static long agp_ioctl(struct file *file,
 		     unsigned int cmd, unsigned long arg)
 {
@@ -1039,7 +1032,6 @@ static long agp_ioctl(struct file *file,
 		break;
 	       
 	case AGPIOC_CHIPSET_FLUSH:
-		ret_val = agpioc_chipset_flush_wrap(curr_priv);
 		break;
 	}
 
diff --git a/drivers/char/agp/generic.c b/drivers/char/agp/generic.c
index 4956f1c8f9d5..78bc8de0f234 100644
--- a/drivers/char/agp/generic.c
+++ b/drivers/char/agp/generic.c
@@ -81,13 +81,6 @@ static int agp_get_key(void)
 	return -1;
 }
 
-void agp_flush_chipset(struct agp_bridge_data *bridge)
-{
-	if (bridge->driver->chipset_flush)
-		bridge->driver->chipset_flush(bridge);
-}
-EXPORT_SYMBOL(agp_flush_chipset);
-
 /*
  * Use kmalloc if possible for the page list. Otherwise fall back to
  * vmalloc. This speeds things up and also saves memory for small AGP
diff --git a/drivers/char/agp/intel-gtt.c b/drivers/char/agp/intel-gtt.c
index 8e2e208c925b..1603e4f8ae73 100644
--- a/drivers/char/agp/intel-gtt.c
+++ b/drivers/char/agp/intel-gtt.c
@@ -984,11 +984,6 @@ static int intel_fake_agp_remove_entries(struct agp_memory *mem,
 	return 0;
 }
 
-static void intel_fake_agp_chipset_flush(struct agp_bridge_data *bridge)
-{
-	intel_private.driver->chipset_flush();
-}
-
 static struct agp_memory *intel_fake_agp_alloc_by_type(size_t pg_count,
 						       int type)
 {
@@ -1222,7 +1217,6 @@ static const struct agp_bridge_driver intel_fake_agp_driver = {
 	.agp_alloc_pages        = agp_generic_alloc_pages,
 	.agp_destroy_page	= agp_generic_destroy_page,
 	.agp_destroy_pages      = agp_generic_destroy_pages,
-	.chipset_flush		= intel_fake_agp_chipset_flush,
 };
 
 static const struct intel_gtt_driver i81x_gtt_driver = {
diff --git a/include/linux/agp_backend.h b/include/linux/agp_backend.h
index 09ea4a1e9505..a479b4885d25 100644
--- a/include/linux/agp_backend.h
+++ b/include/linux/agp_backend.h
@@ -106,6 +106,5 @@ extern int agp_rebind_memory(void);
 extern void agp_enable(struct agp_bridge_data *, u32);
 extern struct agp_bridge_data *agp_backend_acquire(struct pci_dev *);
 extern void agp_backend_release(struct agp_bridge_data *);
-extern void agp_flush_chipset(struct agp_bridge_data *);
 
 #endif				/* _AGP_BACKEND_H */
-- 
cgit v1.2.3-71-gd317


From cb16b67b5cb33b7d6732e0c416d29d933eea13ce Mon Sep 17 00:00:00 2001
From: Daniel Vetter <daniel.vetter@ffwll.ch>
Date: Fri, 5 Nov 2010 22:27:10 +0100
Subject: agp: kill agp_rebind_memory

Its only user, intel-gtt.c is now gone.

Cc: Dave Airlie <airlied@gmail.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/char/agp/generic.c  | 20 --------------------
 include/linux/agp_backend.h |  1 -
 2 files changed, 21 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/char/agp/generic.c b/drivers/char/agp/generic.c
index 78bc8de0f234..012cba0d6d96 100644
--- a/drivers/char/agp/generic.c
+++ b/drivers/char/agp/generic.c
@@ -480,26 +480,6 @@ int agp_unbind_memory(struct agp_memory *curr)
 }
 EXPORT_SYMBOL(agp_unbind_memory);
 
-/**
- *	agp_rebind_emmory  -  Rewrite the entire GATT, useful on resume
- */
-int agp_rebind_memory(void)
-{
-	struct agp_memory *curr;
-	int ret_val = 0;
-
-	spin_lock(&agp_bridge->mapped_lock);
-	list_for_each_entry(curr, &agp_bridge->mapped_list, mapped_list) {
-		ret_val = curr->bridge->driver->insert_memory(curr,
-							      curr->pg_start,
-							      curr->type);
-		if (ret_val != 0)
-			break;
-	}
-	spin_unlock(&agp_bridge->mapped_lock);
-	return ret_val;
-}
-EXPORT_SYMBOL(agp_rebind_memory);
 
 /* End - Routines for handling swapping of agp_memory into the GATT */
 
diff --git a/include/linux/agp_backend.h b/include/linux/agp_backend.h
index a479b4885d25..eaf6cd75a1b1 100644
--- a/include/linux/agp_backend.h
+++ b/include/linux/agp_backend.h
@@ -102,7 +102,6 @@ extern struct agp_memory *agp_allocate_memory(struct agp_bridge_data *, size_t,
 extern int agp_copy_info(struct agp_bridge_data *, struct agp_kern_info *);
 extern int agp_bind_memory(struct agp_memory *, off_t);
 extern int agp_unbind_memory(struct agp_memory *);
-extern int agp_rebind_memory(void);
 extern void agp_enable(struct agp_bridge_data *, u32);
 extern struct agp_bridge_data *agp_backend_acquire(struct pci_dev *);
 extern void agp_backend_release(struct agp_bridge_data *);
-- 
cgit v1.2.3-71-gd317


From 293bb1c41b728d4aa248fe8a0acd2b9066ff5c34 Mon Sep 17 00:00:00 2001
From: Giuseppe CAVALLARO <peppe.cavallaro@st.com>
Date: Wed, 24 Nov 2010 02:38:05 +0000
Subject: stmmac: add init/exit callback in plat_stmmacenet_data struct

This patch adds in the plat_stmmacenet_data
the init and exit callbacks that can be used
for invoking specific platform functions.
For example, on ST targets, these call the
PAD manager functions to set PIO lines and
syscfg registers.
The patch removes the stmmac_claim_resource
only used on STM Kernels as well.

Signed-off-by: Giuseppe Cavallaro <peppe.cavallaro@st.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/stmmac/stmmac.h      | 22 ----------------------
 drivers/net/stmmac/stmmac_main.c | 18 +++++++++++++-----
 include/linux/stmmac.h           |  6 +++---
 3 files changed, 16 insertions(+), 30 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/stmmac/stmmac.h b/drivers/net/stmmac/stmmac.h
index 31575670d862..8ae76501eb74 100644
--- a/drivers/net/stmmac/stmmac.h
+++ b/drivers/net/stmmac/stmmac.h
@@ -87,28 +87,6 @@ struct stmmac_priv {
 	struct plat_stmmacenet_data *plat;
 };
 
-#ifdef CONFIG_STM_DRIVERS
-#include <linux/stm/pad.h>
-static inline int stmmac_claim_resource(struct platform_device *pdev)
-{
-	int ret = 0;
-	struct plat_stmmacenet_data *plat_dat = pdev->dev.platform_data;
-
-	/* Pad routing setup */
-	if (IS_ERR(devm_stm_pad_claim(&pdev->dev, plat_dat->pad_config,
-			dev_name(&pdev->dev)))) {
-		printk(KERN_ERR "%s: Failed to request pads!\n", __func__);
-		ret = -ENODEV;
-	}
-	return ret;
-}
-#else
-static inline int stmmac_claim_resource(struct platform_device *pdev)
-{
-	return 0;
-}
-#endif
-
 extern int stmmac_mdio_unregister(struct net_device *ndev);
 extern int stmmac_mdio_register(struct net_device *ndev);
 extern void stmmac_set_ethtool_ops(struct net_device *netdev);
diff --git a/drivers/net/stmmac/stmmac_main.c b/drivers/net/stmmac/stmmac_main.c
index 29ba28660fa9..b806cd3515b4 100644
--- a/drivers/net/stmmac/stmmac_main.c
+++ b/drivers/net/stmmac/stmmac_main.c
@@ -1643,7 +1643,7 @@ static int stmmac_dvr_probe(struct platform_device *pdev)
 	struct resource *res;
 	void __iomem *addr = NULL;
 	struct net_device *ndev = NULL;
-	struct stmmac_priv *priv;
+	struct stmmac_priv *priv = NULL;
 	struct plat_stmmacenet_data *plat_dat;
 
 	pr_info("STMMAC driver:\n\tplatform registration... ");
@@ -1708,10 +1708,12 @@ static int stmmac_dvr_probe(struct platform_device *pdev)
 	/* Set the I/O base addr */
 	ndev->base_addr = (unsigned long)addr;
 
-	/* Verify embedded resource for the platform */
-	ret = stmmac_claim_resource(pdev);
-	if (ret < 0)
-		goto out;
+	/* Custom initialisation */
+	if (priv->plat->init) {
+		ret = priv->plat->init(pdev);
+		if (unlikely(ret))
+			goto out;
+	}
 
 	/* MAC HW revice detection */
 	ret = stmmac_mac_device_setup(ndev);
@@ -1745,6 +1747,9 @@ static int stmmac_dvr_probe(struct platform_device *pdev)
 
 out:
 	if (ret < 0) {
+		if (priv->plat->exit)
+			priv->plat->exit(pdev);
+
 		platform_set_drvdata(pdev, NULL);
 		release_mem_region(res->start, resource_size(res));
 		if (addr != NULL)
@@ -1778,6 +1783,9 @@ static int stmmac_dvr_remove(struct platform_device *pdev)
 
 	stmmac_mdio_unregister(ndev);
 
+	if (priv->plat->exit)
+		priv->plat->exit(pdev);
+
 	platform_set_drvdata(pdev, NULL);
 	unregister_netdev(ndev);
 
diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h
index d66c61774d95..e10352915698 100644
--- a/include/linux/stmmac.h
+++ b/include/linux/stmmac.h
@@ -40,9 +40,9 @@ struct plat_stmmacenet_data {
 	int pmt;
 	void (*fix_mac_speed)(void *priv, unsigned int speed);
 	void (*bus_setup)(void __iomem *ioaddr);
-#ifdef CONFIG_STM_DRIVERS
-	struct stm_pad_config *pad_config;
-#endif
+	int (*init)(struct platform_device *pdev);
+	void (*exit)(struct platform_device *pdev);
+	void *custom_cfg;
 	void *bsp_priv;
 };
 
-- 
cgit v1.2.3-71-gd317


From 456b61bca8ee324ab6c18b065e632c9a8c88aa39 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Tue, 23 Nov 2010 13:12:15 +0000
Subject: ipv6: mcast: RCU conversion

ipv6_sk_mc_lock rwlock becomes a spinlock.

readers (inet6_mc_check()) now takes rcu_read_lock() instead of read
lock. Writers dont need to disable BH anymore.

struct ipv6_mc_socklist objects are reclaimed after one RCU grace
period.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/ipv6.h   |  2 +-
 include/net/if_inet6.h |  3 +-
 net/ipv6/mcast.c       | 75 +++++++++++++++++++++++++++++---------------------
 3 files changed, 47 insertions(+), 33 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h
index 8e429d0e0405..0c997767429a 100644
--- a/include/linux/ipv6.h
+++ b/include/linux/ipv6.h
@@ -364,7 +364,7 @@ struct ipv6_pinfo {
 
 	__u32			dst_cookie;
 
-	struct ipv6_mc_socklist	*ipv6_mc_list;
+	struct ipv6_mc_socklist	__rcu *ipv6_mc_list;
 	struct ipv6_ac_socklist	*ipv6_ac_list;
 	struct ipv6_fl_socklist *ipv6_fl_list;
 
diff --git a/include/net/if_inet6.h b/include/net/if_inet6.h
index f95ff8d9aa47..04977eefb0ee 100644
--- a/include/net/if_inet6.h
+++ b/include/net/if_inet6.h
@@ -89,10 +89,11 @@ struct ip6_sf_socklist {
 struct ipv6_mc_socklist {
 	struct in6_addr		addr;
 	int			ifindex;
-	struct ipv6_mc_socklist *next;
+	struct ipv6_mc_socklist __rcu *next;
 	rwlock_t		sflock;
 	unsigned int		sfmode;		/* MCAST_{INCLUDE,EXCLUDE} */
 	struct ip6_sf_socklist	*sflist;
+	struct rcu_head		rcu;
 };
 
 struct ip6_sf_list {
diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index 9c5074528a71..49f986d626a0 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -82,7 +82,7 @@ static void *__mld2_query_bugs[] __attribute__((__unused__)) = {
 static struct in6_addr mld2_all_mcr = MLD2_ALL_MCR_INIT;
 
 /* Big mc list lock for all the sockets */
-static DEFINE_RWLOCK(ipv6_sk_mc_lock);
+static DEFINE_SPINLOCK(ipv6_sk_mc_lock);
 
 static void igmp6_join_group(struct ifmcaddr6 *ma);
 static void igmp6_leave_group(struct ifmcaddr6 *ma);
@@ -123,6 +123,11 @@ int sysctl_mld_max_msf __read_mostly = IPV6_MLD_MAX_MSF;
  *	socket join on multicast group
  */
 
+#define for_each_pmc_rcu(np, pmc)				\
+	for (pmc = rcu_dereference(np->ipv6_mc_list);		\
+	     pmc != NULL;					\
+	     pmc = rcu_dereference(pmc->next))
+
 int ipv6_sock_mc_join(struct sock *sk, int ifindex, const struct in6_addr *addr)
 {
 	struct net_device *dev = NULL;
@@ -134,15 +139,15 @@ int ipv6_sock_mc_join(struct sock *sk, int ifindex, const struct in6_addr *addr)
 	if (!ipv6_addr_is_multicast(addr))
 		return -EINVAL;
 
-	read_lock_bh(&ipv6_sk_mc_lock);
-	for (mc_lst=np->ipv6_mc_list; mc_lst; mc_lst=mc_lst->next) {
+	rcu_read_lock();
+	for_each_pmc_rcu(np, mc_lst) {
 		if ((ifindex == 0 || mc_lst->ifindex == ifindex) &&
 		    ipv6_addr_equal(&mc_lst->addr, addr)) {
-			read_unlock_bh(&ipv6_sk_mc_lock);
+			rcu_read_unlock();
 			return -EADDRINUSE;
 		}
 	}
-	read_unlock_bh(&ipv6_sk_mc_lock);
+	rcu_read_unlock();
 
 	mc_lst = sock_kmalloc(sk, sizeof(struct ipv6_mc_socklist), GFP_KERNEL);
 
@@ -186,33 +191,41 @@ int ipv6_sock_mc_join(struct sock *sk, int ifindex, const struct in6_addr *addr)
 		return err;
 	}
 
-	write_lock_bh(&ipv6_sk_mc_lock);
+	spin_lock(&ipv6_sk_mc_lock);
 	mc_lst->next = np->ipv6_mc_list;
-	np->ipv6_mc_list = mc_lst;
-	write_unlock_bh(&ipv6_sk_mc_lock);
+	rcu_assign_pointer(np->ipv6_mc_list, mc_lst);
+	spin_unlock(&ipv6_sk_mc_lock);
 
 	rcu_read_unlock();
 
 	return 0;
 }
 
+static void ipv6_mc_socklist_reclaim(struct rcu_head *head)
+{
+	kfree(container_of(head, struct ipv6_mc_socklist, rcu));
+}
 /*
  *	socket leave on multicast group
  */
 int ipv6_sock_mc_drop(struct sock *sk, int ifindex, const struct in6_addr *addr)
 {
 	struct ipv6_pinfo *np = inet6_sk(sk);
-	struct ipv6_mc_socklist *mc_lst, **lnk;
+	struct ipv6_mc_socklist *mc_lst;
+	struct ipv6_mc_socklist __rcu **lnk;
 	struct net *net = sock_net(sk);
 
-	write_lock_bh(&ipv6_sk_mc_lock);
-	for (lnk = &np->ipv6_mc_list; (mc_lst = *lnk) !=NULL ; lnk = &mc_lst->next) {
+	spin_lock(&ipv6_sk_mc_lock);
+	for (lnk = &np->ipv6_mc_list;
+	     (mc_lst = rcu_dereference_protected(*lnk,
+			lockdep_is_held(&ipv6_sk_mc_lock))) !=NULL ;
+	      lnk = &mc_lst->next) {
 		if ((ifindex == 0 || mc_lst->ifindex == ifindex) &&
 		    ipv6_addr_equal(&mc_lst->addr, addr)) {
 			struct net_device *dev;
 
 			*lnk = mc_lst->next;
-			write_unlock_bh(&ipv6_sk_mc_lock);
+			spin_unlock(&ipv6_sk_mc_lock);
 
 			rcu_read_lock();
 			dev = dev_get_by_index_rcu(net, mc_lst->ifindex);
@@ -225,11 +238,12 @@ int ipv6_sock_mc_drop(struct sock *sk, int ifindex, const struct in6_addr *addr)
 			} else
 				(void) ip6_mc_leave_src(sk, mc_lst, NULL);
 			rcu_read_unlock();
-			sock_kfree_s(sk, mc_lst, sizeof(*mc_lst));
+			atomic_sub(sizeof(*mc_lst), &sk->sk_omem_alloc);
+			call_rcu(&mc_lst->rcu, ipv6_mc_socklist_reclaim);
 			return 0;
 		}
 	}
-	write_unlock_bh(&ipv6_sk_mc_lock);
+	spin_unlock(&ipv6_sk_mc_lock);
 
 	return -EADDRNOTAVAIL;
 }
@@ -272,12 +286,13 @@ void ipv6_sock_mc_close(struct sock *sk)
 	struct ipv6_mc_socklist *mc_lst;
 	struct net *net = sock_net(sk);
 
-	write_lock_bh(&ipv6_sk_mc_lock);
-	while ((mc_lst = np->ipv6_mc_list) != NULL) {
+	spin_lock(&ipv6_sk_mc_lock);
+	while ((mc_lst = rcu_dereference_protected(np->ipv6_mc_list,
+				lockdep_is_held(&ipv6_sk_mc_lock))) != NULL) {
 		struct net_device *dev;
 
 		np->ipv6_mc_list = mc_lst->next;
-		write_unlock_bh(&ipv6_sk_mc_lock);
+		spin_unlock(&ipv6_sk_mc_lock);
 
 		rcu_read_lock();
 		dev = dev_get_by_index_rcu(net, mc_lst->ifindex);
@@ -290,11 +305,13 @@ void ipv6_sock_mc_close(struct sock *sk)
 		} else
 			(void) ip6_mc_leave_src(sk, mc_lst, NULL);
 		rcu_read_unlock();
-		sock_kfree_s(sk, mc_lst, sizeof(*mc_lst));
 
-		write_lock_bh(&ipv6_sk_mc_lock);
+		atomic_sub(sizeof(*mc_lst), &sk->sk_omem_alloc);
+		call_rcu(&mc_lst->rcu, ipv6_mc_socklist_reclaim);
+
+		spin_lock(&ipv6_sk_mc_lock);
 	}
-	write_unlock_bh(&ipv6_sk_mc_lock);
+	spin_unlock(&ipv6_sk_mc_lock);
 }
 
 int ip6_mc_source(int add, int omode, struct sock *sk,
@@ -328,8 +345,7 @@ int ip6_mc_source(int add, int omode, struct sock *sk,
 
 	err = -EADDRNOTAVAIL;
 
-	read_lock(&ipv6_sk_mc_lock);
-	for (pmc=inet6->ipv6_mc_list; pmc; pmc=pmc->next) {
+	for_each_pmc_rcu(inet6, pmc) {
 		if (pgsr->gsr_interface && pmc->ifindex != pgsr->gsr_interface)
 			continue;
 		if (ipv6_addr_equal(&pmc->addr, group))
@@ -428,7 +444,6 @@ int ip6_mc_source(int add, int omode, struct sock *sk,
 done:
 	if (pmclocked)
 		write_unlock(&pmc->sflock);
-	read_unlock(&ipv6_sk_mc_lock);
 	read_unlock_bh(&idev->lock);
 	rcu_read_unlock();
 	if (leavegroup)
@@ -466,14 +481,13 @@ int ip6_mc_msfilter(struct sock *sk, struct group_filter *gsf)
 	dev = idev->dev;
 
 	err = 0;
-	read_lock(&ipv6_sk_mc_lock);
 
 	if (gsf->gf_fmode == MCAST_INCLUDE && gsf->gf_numsrc == 0) {
 		leavegroup = 1;
 		goto done;
 	}
 
-	for (pmc=inet6->ipv6_mc_list; pmc; pmc=pmc->next) {
+	for_each_pmc_rcu(inet6, pmc) {
 		if (pmc->ifindex != gsf->gf_interface)
 			continue;
 		if (ipv6_addr_equal(&pmc->addr, group))
@@ -521,7 +535,6 @@ int ip6_mc_msfilter(struct sock *sk, struct group_filter *gsf)
 	write_unlock(&pmc->sflock);
 	err = 0;
 done:
-	read_unlock(&ipv6_sk_mc_lock);
 	read_unlock_bh(&idev->lock);
 	rcu_read_unlock();
 	if (leavegroup)
@@ -562,7 +575,7 @@ int ip6_mc_msfget(struct sock *sk, struct group_filter *gsf,
 	 * so reading the list is safe.
 	 */
 
-	for (pmc=inet6->ipv6_mc_list; pmc; pmc=pmc->next) {
+	for_each_pmc_rcu(inet6, pmc) {
 		if (pmc->ifindex != gsf->gf_interface)
 			continue;
 		if (ipv6_addr_equal(group, &pmc->addr))
@@ -612,13 +625,13 @@ int inet6_mc_check(struct sock *sk, const struct in6_addr *mc_addr,
 	struct ip6_sf_socklist *psl;
 	int rv = 1;
 
-	read_lock(&ipv6_sk_mc_lock);
-	for (mc = np->ipv6_mc_list; mc; mc = mc->next) {
+	rcu_read_lock();
+	for_each_pmc_rcu(np, mc) {
 		if (ipv6_addr_equal(&mc->addr, mc_addr))
 			break;
 	}
 	if (!mc) {
-		read_unlock(&ipv6_sk_mc_lock);
+		rcu_read_unlock();
 		return 1;
 	}
 	read_lock(&mc->sflock);
@@ -638,7 +651,7 @@ int inet6_mc_check(struct sock *sk, const struct in6_addr *mc_addr,
 			rv = 0;
 	}
 	read_unlock(&mc->sflock);
-	read_unlock(&ipv6_sk_mc_lock);
+	rcu_read_unlock();
 
 	return rv;
 }
-- 
cgit v1.2.3-71-gd317


From 3853b5841c01a3f492fe137afaad9c209e5162c6 Mon Sep 17 00:00:00 2001
From: Tom Herbert <therbert@google.com>
Date: Sun, 21 Nov 2010 13:17:29 +0000
Subject: xps: Improvements in TX queue selection

In dev_pick_tx, don't do work in calculating queue
index or setting
the index in the sock unless the device has more than one queue.  This
allows the sock to be set only with a queue index of a multi-queue
device which is desirable if device are stacked like in a tunnel.

We also allow the mapping of a socket to queue to be changed.  To
maintain in order packet transmission a flag (ooo_okay) has been
added to the sk_buff structure.  If a transport layer sets this flag
on a packet, the transmit queue can be changed for the socket.
Presumably, the transport would set this if there was no possbility
of creating OOO packets (for instance, there are no packets in flight
for the socket).  This patch includes the modification in TCP output
for setting this flag.

Signed-off-by: Tom Herbert <therbert@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h |  3 ++-
 net/core/dev.c         | 18 +++++++++++-------
 net/ipv4/tcp_output.c  |  5 ++++-
 3 files changed, 17 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index e6ba898de61c..19f37a6ee6c4 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -386,9 +386,10 @@ struct sk_buff {
 #else
 	__u8			deliver_no_wcard:1;
 #endif
+	__u8			ooo_okay:1;
 	kmemcheck_bitfield_end(flags2);
 
-	/* 0/14 bit hole */
+	/* 0/13 bit hole */
 
 #ifdef CONFIG_NET_DMA
 	dma_cookie_t		dma_cookie;
diff --git a/net/core/dev.c b/net/core/dev.c
index 381b8e280162..7b17674a29ec 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2148,20 +2148,24 @@ static struct netdev_queue *dev_pick_tx(struct net_device *dev,
 	int queue_index;
 	const struct net_device_ops *ops = dev->netdev_ops;
 
-	if (ops->ndo_select_queue) {
+	if (dev->real_num_tx_queues == 1)
+		queue_index = 0;
+	else if (ops->ndo_select_queue) {
 		queue_index = ops->ndo_select_queue(dev, skb);
 		queue_index = dev_cap_txqueue(dev, queue_index);
 	} else {
 		struct sock *sk = skb->sk;
 		queue_index = sk_tx_queue_get(sk);
-		if (queue_index < 0 || queue_index >= dev->real_num_tx_queues) {
 
-			queue_index = 0;
-			if (dev->real_num_tx_queues > 1)
-				queue_index = skb_tx_hash(dev, skb);
+		if (queue_index < 0 || skb->ooo_okay ||
+		    queue_index >= dev->real_num_tx_queues) {
+			int old_index = queue_index;
 
-			if (sk) {
-				struct dst_entry *dst = rcu_dereference_check(sk->sk_dst_cache, 1);
+			queue_index = skb_tx_hash(dev, skb);
+
+			if (queue_index != old_index && sk) {
+				struct dst_entry *dst =
+				    rcu_dereference_check(sk->sk_dst_cache, 1);
 
 				if (dst && skb_dst(skb) == dst)
 					sk_tx_queue_set(sk, queue_index);
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index bb8f547fc7d2..5f29b2e20e23 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -822,8 +822,11 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
 							   &md5);
 	tcp_header_size = tcp_options_size + sizeof(struct tcphdr);
 
-	if (tcp_packets_in_flight(tp) == 0)
+	if (tcp_packets_in_flight(tp) == 0) {
 		tcp_ca_event(sk, CA_EVENT_TX_START);
+		skb->ooo_okay = 1;
+	} else
+		skb->ooo_okay = 0;
 
 	skb_push(skb, tcp_header_size);
 	skb_reset_transport_header(skb);
-- 
cgit v1.2.3-71-gd317


From 1d24eb4815d1e0e8b451ecc546645f8ef1176d4f Mon Sep 17 00:00:00 2001
From: Tom Herbert <therbert@google.com>
Date: Sun, 21 Nov 2010 13:17:27 +0000
Subject: xps: Transmit Packet Steering

This patch implements transmit packet steering (XPS) for multiqueue
devices.  XPS selects a transmit queue during packet transmission based
on configuration.  This is done by mapping the CPU transmitting the
packet to a queue.  This is the transmit side analogue to RPS-- where
RPS is selecting a CPU based on receive queue, XPS selects a queue
based on the CPU (previously there was an XPS patch from Eric
Dumazet, but that might more appropriately be called transmit completion
steering).

Each transmit queue can be associated with a number of CPUs which will
use the queue to send packets.  This is configured as a CPU mask on a
per queue basis in:

/sys/class/net/eth<n>/queues/tx-<n>/xps_cpus

The mappings are stored per device in an inverted data structure that
maps CPUs to queues.  In the netdevice structure this is an array of
num_possible_cpu structures where each structure holds and array of
queue_indexes for queues which that CPU can use.

The benefits of XPS are improved locality in the per queue data
structures.  Also, transmit completions are more likely to be done
nearer to the sending thread, so this should promote locality back
to the socket on free (e.g. UDP).  The benefits of XPS are dependent on
cache hierarchy, application load, and other factors.  XPS would
nominally be configured so that a queue would only be shared by CPUs
which are sharing a cache, the degenerative configuration woud be that
each CPU has it's own queue.

Below are some benchmark results which show the potential benfit of
this patch.  The netperf test has 500 instances of netperf TCP_RR test
with 1 byte req. and resp.

bnx2x on 16 core AMD
   XPS (16 queues, 1 TX queue per CPU)  1234K at 100% CPU
   No XPS (16 queues)                   996K at 100% CPU

Signed-off-by: Tom Herbert <therbert@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h |  30 ++++
 net/core/dev.c            |  53 ++++++-
 net/core/net-sysfs.c      | 369 +++++++++++++++++++++++++++++++++++++++++++++-
 net/core/net-sysfs.h      |   3 +
 4 files changed, 447 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index b45c1b8b1d19..badf9285fe0d 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -503,6 +503,10 @@ struct netdev_queue {
 	struct Qdisc		*qdisc;
 	unsigned long		state;
 	struct Qdisc		*qdisc_sleeping;
+#ifdef CONFIG_RPS
+	struct kobject		kobj;
+#endif
+
 /*
  * write mostly part
  */
@@ -529,6 +533,30 @@ struct rps_map {
 };
 #define RPS_MAP_SIZE(_num) (sizeof(struct rps_map) + (_num * sizeof(u16)))
 
+/*
+ * This structure holds an XPS map which can be of variable length.  The
+ * map is an array of queues.
+ */
+struct xps_map {
+	unsigned int len;
+	unsigned int alloc_len;
+	struct rcu_head rcu;
+	u16 queues[0];
+};
+#define XPS_MAP_SIZE(_num) (sizeof(struct xps_map) + (_num * sizeof(u16)))
+#define XPS_MIN_MAP_ALLOC ((L1_CACHE_BYTES - sizeof(struct xps_map))	\
+    / sizeof(u16))
+
+/*
+ * This structure holds all XPS maps for device.  Maps are indexed by CPU.
+ */
+struct xps_dev_maps {
+	struct rcu_head rcu;
+	struct xps_map *cpu_map[0];
+};
+#define XPS_DEV_MAPS_SIZE (sizeof(struct xps_dev_maps) +		\
+    (nr_cpu_ids * sizeof(struct xps_map *)))
+
 /*
  * The rps_dev_flow structure contains the mapping of a flow to a CPU and the
  * tail pointer for that CPU's input queue at the time of last enqueue.
@@ -1016,6 +1044,8 @@ struct net_device {
 	unsigned long		tx_queue_len;	/* Max frames per queue allowed */
 	spinlock_t		tx_global_lock;
 
+	struct xps_dev_maps	*xps_maps;
+
 	/* These may be needed for future network-power-down code. */
 
 	/*
diff --git a/net/core/dev.c b/net/core/dev.c
index 7b17674a29ec..c852f0038a08 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1557,12 +1557,16 @@ static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
  */
 int netif_set_real_num_tx_queues(struct net_device *dev, unsigned int txq)
 {
+	int rc;
+
 	if (txq < 1 || txq > dev->num_tx_queues)
 		return -EINVAL;
 
 	if (dev->reg_state == NETREG_REGISTERED) {
 		ASSERT_RTNL();
 
+		rc = netdev_queue_update_kobjects(dev, dev->real_num_tx_queues,
+						  txq);
 		if (txq < dev->real_num_tx_queues)
 			qdisc_reset_all_tx_gt(dev, txq);
 	}
@@ -2142,6 +2146,44 @@ static inline u16 dev_cap_txqueue(struct net_device *dev, u16 queue_index)
 	return queue_index;
 }
 
+static inline int get_xps_queue(struct net_device *dev, struct sk_buff *skb)
+{
+#ifdef CONFIG_RPS
+	struct xps_dev_maps *dev_maps;
+	struct xps_map *map;
+	int queue_index = -1;
+
+	rcu_read_lock();
+	dev_maps = rcu_dereference(dev->xps_maps);
+	if (dev_maps) {
+		map = rcu_dereference(
+		    dev_maps->cpu_map[raw_smp_processor_id()]);
+		if (map) {
+			if (map->len == 1)
+				queue_index = map->queues[0];
+			else {
+				u32 hash;
+				if (skb->sk && skb->sk->sk_hash)
+					hash = skb->sk->sk_hash;
+				else
+					hash = (__force u16) skb->protocol ^
+					    skb->rxhash;
+				hash = jhash_1word(hash, hashrnd);
+				queue_index = map->queues[
+				    ((u64)hash * map->len) >> 32];
+			}
+			if (unlikely(queue_index >= dev->real_num_tx_queues))
+				queue_index = -1;
+		}
+	}
+	rcu_read_unlock();
+
+	return queue_index;
+#else
+	return -1;
+#endif
+}
+
 static struct netdev_queue *dev_pick_tx(struct net_device *dev,
 					struct sk_buff *skb)
 {
@@ -2161,7 +2203,9 @@ static struct netdev_queue *dev_pick_tx(struct net_device *dev,
 		    queue_index >= dev->real_num_tx_queues) {
 			int old_index = queue_index;
 
-			queue_index = skb_tx_hash(dev, skb);
+			queue_index = get_xps_queue(dev, skb);
+			if (queue_index < 0)
+				queue_index = skb_tx_hash(dev, skb);
 
 			if (queue_index != old_index && sk) {
 				struct dst_entry *dst =
@@ -5066,6 +5110,7 @@ static int netif_alloc_netdev_queues(struct net_device *dev)
 {
 	unsigned int count = dev->num_tx_queues;
 	struct netdev_queue *tx;
+	int i;
 
 	BUG_ON(count < 1);
 
@@ -5076,6 +5121,10 @@ static int netif_alloc_netdev_queues(struct net_device *dev)
 		return -ENOMEM;
 	}
 	dev->_tx = tx;
+
+	for (i = 0; i < count; i++)
+		tx[i].dev = dev;
+
 	return 0;
 }
 
@@ -5083,8 +5132,6 @@ static void netdev_init_one_queue(struct net_device *dev,
 				  struct netdev_queue *queue,
 				  void *_unused)
 {
-	queue->dev = dev;
-
 	/* Initialize queue lock */
 	spin_lock_init(&queue->_xmit_lock);
 	netdev_set_xmit_lockdep_class(&queue->_xmit_lock, dev->type);
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index 7abeb7ceaa4c..68dbbfdee274 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -772,18 +772,377 @@ net_rx_queue_update_kobjects(struct net_device *net, int old_num, int new_num)
 	return error;
 }
 
-static int rx_queue_register_kobjects(struct net_device *net)
+/*
+ * netdev_queue sysfs structures and functions.
+ */
+struct netdev_queue_attribute {
+	struct attribute attr;
+	ssize_t (*show)(struct netdev_queue *queue,
+	    struct netdev_queue_attribute *attr, char *buf);
+	ssize_t (*store)(struct netdev_queue *queue,
+	    struct netdev_queue_attribute *attr, const char *buf, size_t len);
+};
+#define to_netdev_queue_attr(_attr) container_of(_attr,		\
+    struct netdev_queue_attribute, attr)
+
+#define to_netdev_queue(obj) container_of(obj, struct netdev_queue, kobj)
+
+static ssize_t netdev_queue_attr_show(struct kobject *kobj,
+				      struct attribute *attr, char *buf)
+{
+	struct netdev_queue_attribute *attribute = to_netdev_queue_attr(attr);
+	struct netdev_queue *queue = to_netdev_queue(kobj);
+
+	if (!attribute->show)
+		return -EIO;
+
+	return attribute->show(queue, attribute, buf);
+}
+
+static ssize_t netdev_queue_attr_store(struct kobject *kobj,
+				       struct attribute *attr,
+				       const char *buf, size_t count)
+{
+	struct netdev_queue_attribute *attribute = to_netdev_queue_attr(attr);
+	struct netdev_queue *queue = to_netdev_queue(kobj);
+
+	if (!attribute->store)
+		return -EIO;
+
+	return attribute->store(queue, attribute, buf, count);
+}
+
+static const struct sysfs_ops netdev_queue_sysfs_ops = {
+	.show = netdev_queue_attr_show,
+	.store = netdev_queue_attr_store,
+};
+
+static inline unsigned int get_netdev_queue_index(struct netdev_queue *queue)
 {
+	struct net_device *dev = queue->dev;
+	int i;
+
+	for (i = 0; i < dev->num_tx_queues; i++)
+		if (queue == &dev->_tx[i])
+			break;
+
+	BUG_ON(i >= dev->num_tx_queues);
+
+	return i;
+}
+
+
+static ssize_t show_xps_map(struct netdev_queue *queue,
+			    struct netdev_queue_attribute *attribute, char *buf)
+{
+	struct net_device *dev = queue->dev;
+	struct xps_dev_maps *dev_maps;
+	cpumask_var_t mask;
+	unsigned long index;
+	size_t len = 0;
+	int i;
+
+	if (!zalloc_cpumask_var(&mask, GFP_KERNEL))
+		return -ENOMEM;
+
+	index = get_netdev_queue_index(queue);
+
+	rcu_read_lock();
+	dev_maps = rcu_dereference(dev->xps_maps);
+	if (dev_maps) {
+		for_each_possible_cpu(i) {
+			struct xps_map *map =
+			    rcu_dereference(dev_maps->cpu_map[i]);
+			if (map) {
+				int j;
+				for (j = 0; j < map->len; j++) {
+					if (map->queues[j] == index) {
+						cpumask_set_cpu(i, mask);
+						break;
+					}
+				}
+			}
+		}
+	}
+	rcu_read_unlock();
+
+	len += cpumask_scnprintf(buf + len, PAGE_SIZE, mask);
+	if (PAGE_SIZE - len < 3) {
+		free_cpumask_var(mask);
+		return -EINVAL;
+	}
+
+	free_cpumask_var(mask);
+	len += sprintf(buf + len, "\n");
+	return len;
+}
+
+static void xps_map_release(struct rcu_head *rcu)
+{
+	struct xps_map *map = container_of(rcu, struct xps_map, rcu);
+
+	kfree(map);
+}
+
+static void xps_dev_maps_release(struct rcu_head *rcu)
+{
+	struct xps_dev_maps *dev_maps =
+	    container_of(rcu, struct xps_dev_maps, rcu);
+
+	kfree(dev_maps);
+}
+
+static DEFINE_MUTEX(xps_map_mutex);
+
+static ssize_t store_xps_map(struct netdev_queue *queue,
+		      struct netdev_queue_attribute *attribute,
+		      const char *buf, size_t len)
+{
+	struct net_device *dev = queue->dev;
+	cpumask_var_t mask;
+	int err, i, cpu, pos, map_len, alloc_len, need_set;
+	unsigned long index;
+	struct xps_map *map, *new_map;
+	struct xps_dev_maps *dev_maps, *new_dev_maps;
+	int nonempty = 0;
+
+	if (!capable(CAP_NET_ADMIN))
+		return -EPERM;
+
+	if (!alloc_cpumask_var(&mask, GFP_KERNEL))
+		return -ENOMEM;
+
+	index = get_netdev_queue_index(queue);
+
+	err = bitmap_parse(buf, len, cpumask_bits(mask), nr_cpumask_bits);
+	if (err) {
+		free_cpumask_var(mask);
+		return err;
+	}
+
+	new_dev_maps = kzalloc(max_t(unsigned,
+	    XPS_DEV_MAPS_SIZE, L1_CACHE_BYTES), GFP_KERNEL);
+	if (!new_dev_maps) {
+		free_cpumask_var(mask);
+		return -ENOMEM;
+	}
+
+	mutex_lock(&xps_map_mutex);
+
+	dev_maps = dev->xps_maps;
+
+	for_each_possible_cpu(cpu) {
+		new_map = map = dev_maps ? dev_maps->cpu_map[cpu] : NULL;
+
+		if (map) {
+			for (pos = 0; pos < map->len; pos++)
+				if (map->queues[pos] == index)
+					break;
+			map_len = map->len;
+			alloc_len = map->alloc_len;
+		} else
+			pos = map_len = alloc_len = 0;
+
+		need_set = cpu_isset(cpu, *mask) && cpu_online(cpu);
+
+		if (need_set && pos >= map_len) {
+			/* Need to add queue to this CPU's map */
+			if (map_len >= alloc_len) {
+				alloc_len = alloc_len ?
+				    2 * alloc_len : XPS_MIN_MAP_ALLOC;
+				new_map = kzalloc(XPS_MAP_SIZE(alloc_len),
+				    GFP_KERNEL);
+				if (!new_map)
+					goto error;
+				new_map->alloc_len = alloc_len;
+				for (i = 0; i < map_len; i++)
+					new_map->queues[i] = map->queues[i];
+				new_map->len = map_len;
+			}
+			new_map->queues[new_map->len++] = index;
+		} else if (!need_set && pos < map_len) {
+			/* Need to remove queue from this CPU's map */
+			if (map_len > 1)
+				new_map->queues[pos] =
+				    new_map->queues[--new_map->len];
+			else
+				new_map = NULL;
+		}
+		new_dev_maps->cpu_map[cpu] = new_map;
+	}
+
+	/* Cleanup old maps */
+	for_each_possible_cpu(cpu) {
+		map = dev_maps ? dev_maps->cpu_map[cpu] : NULL;
+		if (map && new_dev_maps->cpu_map[cpu] != map)
+			call_rcu(&map->rcu, xps_map_release);
+		if (new_dev_maps->cpu_map[cpu])
+			nonempty = 1;
+	}
+
+	if (nonempty)
+		rcu_assign_pointer(dev->xps_maps, new_dev_maps);
+	else {
+		kfree(new_dev_maps);
+		rcu_assign_pointer(dev->xps_maps, NULL);
+	}
+
+	if (dev_maps)
+		call_rcu(&dev_maps->rcu, xps_dev_maps_release);
+
+	mutex_unlock(&xps_map_mutex);
+
+	free_cpumask_var(mask);
+	return len;
+
+error:
+	mutex_unlock(&xps_map_mutex);
+
+	if (new_dev_maps)
+		for_each_possible_cpu(i)
+			kfree(new_dev_maps->cpu_map[i]);
+	kfree(new_dev_maps);
+	free_cpumask_var(mask);
+	return -ENOMEM;
+}
+
+static struct netdev_queue_attribute xps_cpus_attribute =
+    __ATTR(xps_cpus, S_IRUGO | S_IWUSR, show_xps_map, store_xps_map);
+
+static struct attribute *netdev_queue_default_attrs[] = {
+	&xps_cpus_attribute.attr,
+	NULL
+};
+
+static void netdev_queue_release(struct kobject *kobj)
+{
+	struct netdev_queue *queue = to_netdev_queue(kobj);
+	struct net_device *dev = queue->dev;
+	struct xps_dev_maps *dev_maps;
+	struct xps_map *map;
+	unsigned long index;
+	int i, pos, nonempty = 0;
+
+	index = get_netdev_queue_index(queue);
+
+	mutex_lock(&xps_map_mutex);
+	dev_maps = dev->xps_maps;
+
+	if (dev_maps) {
+		for_each_possible_cpu(i) {
+			map  = dev_maps->cpu_map[i];
+			if (!map)
+				continue;
+
+			for (pos = 0; pos < map->len; pos++)
+				if (map->queues[pos] == index)
+					break;
+
+			if (pos < map->len) {
+				if (map->len > 1)
+					map->queues[pos] =
+					    map->queues[--map->len];
+				else {
+					RCU_INIT_POINTER(dev_maps->cpu_map[i],
+					    NULL);
+					call_rcu(&map->rcu, xps_map_release);
+					map = NULL;
+				}
+			}
+			if (map)
+				nonempty = 1;
+		}
+
+		if (!nonempty) {
+			RCU_INIT_POINTER(dev->xps_maps, NULL);
+			call_rcu(&dev_maps->rcu, xps_dev_maps_release);
+		}
+	}
+
+	mutex_unlock(&xps_map_mutex);
+
+	memset(kobj, 0, sizeof(*kobj));
+	dev_put(queue->dev);
+}
+
+static struct kobj_type netdev_queue_ktype = {
+	.sysfs_ops = &netdev_queue_sysfs_ops,
+	.release = netdev_queue_release,
+	.default_attrs = netdev_queue_default_attrs,
+};
+
+static int netdev_queue_add_kobject(struct net_device *net, int index)
+{
+	struct netdev_queue *queue = net->_tx + index;
+	struct kobject *kobj = &queue->kobj;
+	int error = 0;
+
+	kobj->kset = net->queues_kset;
+	error = kobject_init_and_add(kobj, &netdev_queue_ktype, NULL,
+	    "tx-%u", index);
+	if (error) {
+		kobject_put(kobj);
+		return error;
+	}
+
+	kobject_uevent(kobj, KOBJ_ADD);
+	dev_hold(queue->dev);
+
+	return error;
+}
+
+int
+netdev_queue_update_kobjects(struct net_device *net, int old_num, int new_num)
+{
+	int i;
+	int error = 0;
+
+	for (i = old_num; i < new_num; i++) {
+		error = netdev_queue_add_kobject(net, i);
+		if (error) {
+			new_num = old_num;
+			break;
+		}
+	}
+
+	while (--i >= new_num)
+		kobject_put(&net->_tx[i].kobj);
+
+	return error;
+}
+
+static int register_queue_kobjects(struct net_device *net)
+{
+	int error = 0, txq = 0, rxq = 0;
+
 	net->queues_kset = kset_create_and_add("queues",
 	    NULL, &net->dev.kobj);
 	if (!net->queues_kset)
 		return -ENOMEM;
-	return net_rx_queue_update_kobjects(net, 0, net->real_num_rx_queues);
+
+	error = net_rx_queue_update_kobjects(net, 0, net->real_num_rx_queues);
+	if (error)
+		goto error;
+	rxq = net->real_num_rx_queues;
+
+	error = netdev_queue_update_kobjects(net, 0,
+					     net->real_num_tx_queues);
+	if (error)
+		goto error;
+	txq = net->real_num_tx_queues;
+
+	return 0;
+
+error:
+	netdev_queue_update_kobjects(net, txq, 0);
+	net_rx_queue_update_kobjects(net, rxq, 0);
+	return error;
 }
 
-static void rx_queue_remove_kobjects(struct net_device *net)
+static void remove_queue_kobjects(struct net_device *net)
 {
 	net_rx_queue_update_kobjects(net, net->real_num_rx_queues, 0);
+	netdev_queue_update_kobjects(net, net->real_num_tx_queues, 0);
 	kset_unregister(net->queues_kset);
 }
 #endif /* CONFIG_RPS */
@@ -886,7 +1245,7 @@ void netdev_unregister_kobject(struct net_device * net)
 	kobject_get(&dev->kobj);
 
 #ifdef CONFIG_RPS
-	rx_queue_remove_kobjects(net);
+	remove_queue_kobjects(net);
 #endif
 
 	device_del(dev);
@@ -927,7 +1286,7 @@ int netdev_register_kobject(struct net_device *net)
 		return error;
 
 #ifdef CONFIG_RPS
-	error = rx_queue_register_kobjects(net);
+	error = register_queue_kobjects(net);
 	if (error) {
 		device_del(dev);
 		return error;
diff --git a/net/core/net-sysfs.h b/net/core/net-sysfs.h
index 778e1571548d..25ec2ee57df7 100644
--- a/net/core/net-sysfs.h
+++ b/net/core/net-sysfs.h
@@ -6,6 +6,9 @@ int netdev_register_kobject(struct net_device *);
 void netdev_unregister_kobject(struct net_device *);
 #ifdef CONFIG_RPS
 int net_rx_queue_update_kobjects(struct net_device *, int old_num, int new_num);
+int netdev_queue_update_kobjects(struct net_device *net,
+				 int old_num, int new_num);
+
 #endif
 
 #endif
-- 
cgit v1.2.3-71-gd317


From ccb14354017272ddac002e859a2711610b6af174 Mon Sep 17 00:00:00 2001
From: "John W. Linville" <linville@tuxdriver.com>
Date: Wed, 24 Nov 2010 16:18:36 -0500
Subject: Revert "nl80211/mac80211: Report signal average"

This reverts commit 86107fd170bc379869250eb7e1bd393a3a70e8ae.

This patch inadvertantly changed the userland ABI.

Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/nl80211.h | 2 --
 include/net/cfg80211.h  | 4 ----
 net/mac80211/Kconfig    | 1 -
 net/mac80211/cfg.c      | 3 +--
 net/mac80211/rx.c       | 1 -
 net/mac80211/sta_info.c | 2 --
 net/mac80211/sta_info.h | 3 ---
 net/wireless/nl80211.c  | 3 ---
 8 files changed, 1 insertion(+), 18 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h
index 1ce3775e9e26..037b4e498890 100644
--- a/include/linux/nl80211.h
+++ b/include/linux/nl80211.h
@@ -1161,7 +1161,6 @@ enum nl80211_rate_info {
  * @__NL80211_STA_INFO_AFTER_LAST: internal
  * @NL80211_STA_INFO_MAX: highest possible station info attribute
  * @NL80211_STA_INFO_SIGNAL: signal strength of last received PPDU (u8, dBm)
- * @NL80211_STA_INFO_SIGNAL_AVG: signal strength average (u8, dBm)
  * @NL80211_STA_INFO_TX_BITRATE: current unicast tx rate, nested attribute
  * 	containing info as possible, see &enum nl80211_sta_info_txrate.
  * @NL80211_STA_INFO_RX_PACKETS: total received packet (u32, from this station)
@@ -1179,7 +1178,6 @@ enum nl80211_sta_info {
 	NL80211_STA_INFO_PLID,
 	NL80211_STA_INFO_PLINK_STATE,
 	NL80211_STA_INFO_SIGNAL,
-	NL80211_STA_INFO_SIGNAL_AVG,
 	NL80211_STA_INFO_TX_BITRATE,
 	NL80211_STA_INFO_RX_PACKETS,
 	NL80211_STA_INFO_TX_PACKETS,
diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 69e2364889f1..8fd9eebd0cc9 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -424,7 +424,6 @@ struct station_parameters {
  * @STATION_INFO_TX_RETRIES: @tx_retries filled
  * @STATION_INFO_TX_FAILED: @tx_failed filled
  * @STATION_INFO_RX_DROP_MISC: @rx_dropped_misc filled
- * @STATION_INFO_SIGNAL_AVG: @signal_avg filled
  */
 enum station_info_flags {
 	STATION_INFO_INACTIVE_TIME	= 1<<0,
@@ -440,7 +439,6 @@ enum station_info_flags {
 	STATION_INFO_TX_RETRIES		= 1<<10,
 	STATION_INFO_TX_FAILED		= 1<<11,
 	STATION_INFO_RX_DROP_MISC	= 1<<12,
-	STATION_INFO_SIGNAL_AVG		= 1<<13,
 };
 
 /**
@@ -487,7 +485,6 @@ struct rate_info {
  * @plid: mesh peer link id
  * @plink_state: mesh peer link state
  * @signal: signal strength of last received packet in dBm
- * @signal_avg: signal strength average in dBm
  * @txrate: current unicast bitrate to this station
  * @rx_packets: packets received from this station
  * @tx_packets: packets transmitted to this station
@@ -508,7 +505,6 @@ struct station_info {
 	u16 plid;
 	u8 plink_state;
 	s8 signal;
-	s8 signal_avg;
 	struct rate_info txrate;
 	u32 rx_packets;
 	u32 tx_packets;
diff --git a/net/mac80211/Kconfig b/net/mac80211/Kconfig
index 798d9b9462e2..4d6f8653ec88 100644
--- a/net/mac80211/Kconfig
+++ b/net/mac80211/Kconfig
@@ -6,7 +6,6 @@ config MAC80211
 	select CRYPTO_ARC4
 	select CRYPTO_AES
 	select CRC32
-	select AVERAGE
 	---help---
 	  This option enables the hardware independent IEEE 802.11
 	  networking stack.
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index 92c9cf6a7d1c..0c544074479e 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -343,9 +343,8 @@ static void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo)
 
 	if ((sta->local->hw.flags & IEEE80211_HW_SIGNAL_DBM) ||
 	    (sta->local->hw.flags & IEEE80211_HW_SIGNAL_UNSPEC)) {
-		sinfo->filled |= STATION_INFO_SIGNAL | STATION_INFO_SIGNAL_AVG;
+		sinfo->filled |= STATION_INFO_SIGNAL;
 		sinfo->signal = (s8)sta->last_signal;
-		sinfo->signal_avg = (s8) -ewma_read(&sta->avg_signal);
 	}
 
 	sinfo->txrate.flags = 0;
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 9dd60a74181f..d2fcd22ab06d 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -1156,7 +1156,6 @@ ieee80211_rx_h_sta_process(struct ieee80211_rx_data *rx)
 	sta->rx_fragments++;
 	sta->rx_bytes += rx->skb->len;
 	sta->last_signal = status->signal;
-	ewma_add(&sta->avg_signal, -status->signal);
 
 	/*
 	 * Change STA power saving mode only at the end of a frame
diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c
index f43fca8907f7..eff58571fd7e 100644
--- a/net/mac80211/sta_info.c
+++ b/net/mac80211/sta_info.c
@@ -244,8 +244,6 @@ struct sta_info *sta_info_alloc(struct ieee80211_sub_if_data *sdata,
 	sta->local = local;
 	sta->sdata = sdata;
 
-	ewma_init(&sta->avg_signal, 1000, 8);
-
 	if (sta_prepare_rate_control(local, sta, gfp)) {
 		kfree(sta);
 		return NULL;
diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h
index 84062e2c782c..9265acadef32 100644
--- a/net/mac80211/sta_info.h
+++ b/net/mac80211/sta_info.h
@@ -13,7 +13,6 @@
 #include <linux/types.h>
 #include <linux/if_ether.h>
 #include <linux/workqueue.h>
-#include <linux/average.h>
 #include "key.h"
 
 /**
@@ -225,7 +224,6 @@ enum plink_state {
  * @rx_fragments: number of received MPDUs
  * @rx_dropped: number of dropped MPDUs from this STA
  * @last_signal: signal of last received frame from this STA
- * @avg_signal: moving average of signal of received frames from this STA
  * @last_seq_ctrl: last received seq/frag number from this STA (per RX queue)
  * @tx_filtered_count: number of frames the hardware filtered for this STA
  * @tx_retry_failed: number of frames that failed retry
@@ -293,7 +291,6 @@ struct sta_info {
 	unsigned long rx_fragments;
 	unsigned long rx_dropped;
 	int last_signal;
-	struct ewma avg_signal;
 	__le16 last_seq_ctrl[NUM_RX_DATA_QUEUES];
 
 	/* Updated from TX status path only, no locking requirements */
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index d06a40d17002..605553842226 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -1872,9 +1872,6 @@ static int nl80211_send_station(struct sk_buff *msg, u32 pid, u32 seq,
 	if (sinfo->filled & STATION_INFO_SIGNAL)
 		NLA_PUT_U8(msg, NL80211_STA_INFO_SIGNAL,
 			   sinfo->signal);
-	if (sinfo->filled & STATION_INFO_SIGNAL_AVG)
-		NLA_PUT_U8(msg, NL80211_STA_INFO_SIGNAL_AVG,
-			   sinfo->signal_avg);
 	if (sinfo->filled & STATION_INFO_TX_BITRATE) {
 		txrate = nla_nest_start(msg, NL80211_STA_INFO_TX_BITRATE);
 		if (!txrate)
-- 
cgit v1.2.3-71-gd317


From c063dbf52b998b852122dff07a8b8dd430b38437 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Wed, 24 Nov 2010 08:10:05 +0100
Subject: cfg80211: allow using CQM event to notify packet loss

This adds the ability for drivers to use CQM events
to notify about packet loss for specific stations
(which could be the AP for the managed mode case).
Since the threshold might be determined by the
driver (it isn't passed in right now) it will be
passed out of the driver to userspace in the event.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/nl80211.h |  3 +++
 include/net/cfg80211.h  | 12 ++++++++++++
 net/wireless/mlme.c     | 12 ++++++++++++
 net/wireless/nl80211.c  | 45 +++++++++++++++++++++++++++++++++++++++++++++
 net/wireless/nl80211.h  |  4 ++++
 5 files changed, 76 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h
index 037b4e498890..d706bf3badc8 100644
--- a/include/linux/nl80211.h
+++ b/include/linux/nl80211.h
@@ -1819,6 +1819,8 @@ enum nl80211_ps_state {
  *	the minimum amount the RSSI level must change after an event before a
  *	new event may be issued (to reduce effects of RSSI oscillation).
  * @NL80211_ATTR_CQM_RSSI_THRESHOLD_EVENT: RSSI threshold event
+ * @NL80211_ATTR_CQM_PKT_LOSS_EVENT: a u32 value indicating that this many
+ *	consecutive packets were not acknowledged by the peer
  * @__NL80211_ATTR_CQM_AFTER_LAST: internal
  * @NL80211_ATTR_CQM_MAX: highest key attribute
  */
@@ -1827,6 +1829,7 @@ enum nl80211_attr_cqm {
 	NL80211_ATTR_CQM_RSSI_THOLD,
 	NL80211_ATTR_CQM_RSSI_HYST,
 	NL80211_ATTR_CQM_RSSI_THRESHOLD_EVENT,
+	NL80211_ATTR_CQM_PKT_LOSS_EVENT,
 
 	/* keep last */
 	__NL80211_ATTR_CQM_AFTER_LAST,
diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index dd4c43f512e2..0663945cfa48 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -2601,6 +2601,18 @@ void cfg80211_cqm_rssi_notify(struct net_device *dev,
 			      enum nl80211_cqm_rssi_threshold_event rssi_event,
 			      gfp_t gfp);
 
+/**
+ * cfg80211_cqm_pktloss_notify - notify userspace about packetloss to peer
+ * @dev: network device
+ * @peer: peer's MAC address
+ * @num_packets: how many packets were lost -- should be a fixed threshold
+ *	but probably no less than maybe 50, or maybe a throughput dependent
+ *	threshold (to account for temporary interference)
+ * @gfp: context flags
+ */
+void cfg80211_cqm_pktloss_notify(struct net_device *dev,
+				 const u8 *peer, u32 num_packets, gfp_t gfp);
+
 /* Logging, debugging and troubleshooting/diagnostic helpers. */
 
 /* wiphy_printk helpers, similar to dev_printk */
diff --git a/net/wireless/mlme.c b/net/wireless/mlme.c
index 26838d903b9a..6980a0c315b2 100644
--- a/net/wireless/mlme.c
+++ b/net/wireless/mlme.c
@@ -1028,3 +1028,15 @@ void cfg80211_cqm_rssi_notify(struct net_device *dev,
 	nl80211_send_cqm_rssi_notify(rdev, dev, rssi_event, gfp);
 }
 EXPORT_SYMBOL(cfg80211_cqm_rssi_notify);
+
+void cfg80211_cqm_pktloss_notify(struct net_device *dev,
+				 const u8 *peer, u32 num_packets, gfp_t gfp)
+{
+	struct wireless_dev *wdev = dev->ieee80211_ptr;
+	struct wiphy *wiphy = wdev->wiphy;
+	struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);
+
+	/* Indicate roaming trigger event to user space */
+	nl80211_send_cqm_pktloss_notify(rdev, dev, peer, num_packets, gfp);
+}
+EXPORT_SYMBOL(cfg80211_cqm_pktloss_notify);
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 8734efa663d1..67ff7e92cb99 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -5715,6 +5715,51 @@ nl80211_send_cqm_rssi_notify(struct cfg80211_registered_device *rdev,
 	nlmsg_free(msg);
 }
 
+void
+nl80211_send_cqm_pktloss_notify(struct cfg80211_registered_device *rdev,
+				struct net_device *netdev, const u8 *peer,
+				u32 num_packets, gfp_t gfp)
+{
+	struct sk_buff *msg;
+	struct nlattr *pinfoattr;
+	void *hdr;
+
+	msg = nlmsg_new(NLMSG_GOODSIZE, gfp);
+	if (!msg)
+		return;
+
+	hdr = nl80211hdr_put(msg, 0, 0, 0, NL80211_CMD_NOTIFY_CQM);
+	if (!hdr) {
+		nlmsg_free(msg);
+		return;
+	}
+
+	NLA_PUT_U32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx);
+	NLA_PUT_U32(msg, NL80211_ATTR_IFINDEX, netdev->ifindex);
+	NLA_PUT(msg, NL80211_ATTR_MAC, ETH_ALEN, peer);
+
+	pinfoattr = nla_nest_start(msg, NL80211_ATTR_CQM);
+	if (!pinfoattr)
+		goto nla_put_failure;
+
+	NLA_PUT_U32(msg, NL80211_ATTR_CQM_PKT_LOSS_EVENT, num_packets);
+
+	nla_nest_end(msg, pinfoattr);
+
+	if (genlmsg_end(msg, hdr) < 0) {
+		nlmsg_free(msg);
+		return;
+	}
+
+	genlmsg_multicast_netns(wiphy_net(&rdev->wiphy), msg, 0,
+				nl80211_mlme_mcgrp.id, gfp);
+	return;
+
+ nla_put_failure:
+	genlmsg_cancel(msg, hdr);
+	nlmsg_free(msg);
+}
+
 static int nl80211_netlink_notify(struct notifier_block * nb,
 				  unsigned long state,
 				  void *_notify)
diff --git a/net/wireless/nl80211.h b/net/wireless/nl80211.h
index 30d2f939150d..16c2f7190768 100644
--- a/net/wireless/nl80211.h
+++ b/net/wireless/nl80211.h
@@ -87,5 +87,9 @@ nl80211_send_cqm_rssi_notify(struct cfg80211_registered_device *rdev,
 			     struct net_device *netdev,
 			     enum nl80211_cqm_rssi_threshold_event rssi_event,
 			     gfp_t gfp);
+void
+nl80211_send_cqm_pktloss_notify(struct cfg80211_registered_device *rdev,
+				struct net_device *netdev, const u8 *peer,
+				u32 num_packets, gfp_t gfp);
 
 #endif /* __NET_WIRELESS_NL80211_H */
-- 
cgit v1.2.3-71-gd317


From a782d688e9c6f9ca9a7a9a28e8e2876969ddef53 Mon Sep 17 00:00:00 2001
From: Guennadi Liakhovetski <g.liakhovetski@gmx.de>
Date: Wed, 24 Nov 2010 10:05:22 +0000
Subject: mmc: sh_mmcif: add DMA support

The MMCIF controller on sh-mobile platforms can use the DMA controller for data
transfers. Interface to the SH dmaengine driver to enable DMA. We also have to
lower the maximum number of segments to match with the number od DMA
descriptors on SuperH, this doesn't significantly affect driver's PIO
performance.

Signed-off-by: Guennadi Liakhovetski <g.liakhovetski@gmx.de>
Signed-off-by: Paul Mundt <lethal@linux-sh.org>
---
 drivers/mmc/host/Kconfig     |   6 ++
 drivers/mmc/host/sh_mmcif.c  | 246 ++++++++++++++++++++++++++++++++++++++++++-
 include/linux/mmc/sh_mmcif.h |  15 ++-
 3 files changed, 258 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mmc/host/Kconfig b/drivers/mmc/host/Kconfig
index d618e8673996..859e352d0b5f 100644
--- a/drivers/mmc/host/Kconfig
+++ b/drivers/mmc/host/Kconfig
@@ -466,6 +466,12 @@ config MMC_SH_MMCIF
 
 	  This driver supports MMCIF in sh7724/sh7757/sh7372.
 
+config SH_MMCIF_DMA
+	bool "Use DMA for MMCIF"
+	depends on MMC_SH_MMCIF
+	help
+	  Use SH dma-engine driver for data transfer
+
 config MMC_JZ4740
 	tristate "JZ4740 SD/Multimedia Card Interface support"
 	depends on MACH_JZ4740
diff --git a/drivers/mmc/host/sh_mmcif.c b/drivers/mmc/host/sh_mmcif.c
index b2f261cdaec1..d09a2b38eeeb 100644
--- a/drivers/mmc/host/sh_mmcif.c
+++ b/drivers/mmc/host/sh_mmcif.c
@@ -20,12 +20,14 @@
 #include <linux/completion.h>
 #include <linux/delay.h>
 #include <linux/dma-mapping.h>
+#include <linux/dmaengine.h>
 #include <linux/mmc/card.h>
 #include <linux/mmc/core.h>
 #include <linux/mmc/host.h>
 #include <linux/mmc/mmc.h>
 #include <linux/mmc/sdio.h>
 #include <linux/mmc/sh_mmcif.h>
+#include <linux/pagemap.h>
 #include <linux/platform_device.h>
 
 #define DRIVER_NAME	"sh_mmcif"
@@ -162,8 +164,13 @@ struct sh_mmcif_host {
 	long timeout;
 	void __iomem *addr;
 	struct completion intr_wait;
-};
 
+	/* DMA support */
+	struct dma_chan		*chan_rx;
+	struct dma_chan		*chan_tx;
+	struct completion	dma_complete;
+	unsigned int            dma_sglen;
+};
 
 static inline void sh_mmcif_bitset(struct sh_mmcif_host *host,
 					unsigned int reg, u32 val)
@@ -177,6 +184,208 @@ static inline void sh_mmcif_bitclr(struct sh_mmcif_host *host,
 	writel(~val & readl(host->addr + reg), host->addr + reg);
 }
 
+#ifdef CONFIG_SH_MMCIF_DMA
+static void mmcif_dma_complete(void *arg)
+{
+	struct sh_mmcif_host *host = arg;
+	dev_dbg(&host->pd->dev, "Command completed\n");
+
+	if (WARN(!host->data, "%s: NULL data in DMA completion!\n",
+		 dev_name(&host->pd->dev)))
+		return;
+
+	if (host->data->flags & MMC_DATA_READ)
+		dma_unmap_sg(&host->pd->dev, host->data->sg, host->dma_sglen,
+			     DMA_FROM_DEVICE);
+	else
+		dma_unmap_sg(&host->pd->dev, host->data->sg, host->dma_sglen,
+			     DMA_TO_DEVICE);
+
+	complete(&host->dma_complete);
+}
+
+static void sh_mmcif_start_dma_rx(struct sh_mmcif_host *host)
+{
+	struct scatterlist *sg = host->data->sg;
+	struct dma_async_tx_descriptor *desc = NULL;
+	struct dma_chan *chan = host->chan_rx;
+	dma_cookie_t cookie = -EINVAL;
+	int ret;
+
+	ret = dma_map_sg(&host->pd->dev, sg, host->data->sg_len, DMA_FROM_DEVICE);
+	if (ret > 0) {
+		host->dma_sglen = ret;
+		desc = chan->device->device_prep_slave_sg(chan, sg, ret,
+			DMA_FROM_DEVICE, DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
+	}
+
+	if (desc) {
+		desc->callback = mmcif_dma_complete;
+		desc->callback_param = host;
+		cookie = desc->tx_submit(desc);
+		if (cookie < 0) {
+			desc = NULL;
+			ret = cookie;
+		} else {
+			sh_mmcif_bitset(host, MMCIF_CE_BUF_ACC, BUF_ACC_DMAREN);
+			chan->device->device_issue_pending(chan);
+		}
+	}
+	dev_dbg(&host->pd->dev, "%s(): mapped %d -> %d, cookie %d\n",
+		__func__, host->data->sg_len, ret, cookie);
+
+	if (!desc) {
+		/* DMA failed, fall back to PIO */
+		if (ret >= 0)
+			ret = -EIO;
+		host->chan_rx = NULL;
+		host->dma_sglen = 0;
+		dma_release_channel(chan);
+		/* Free the Tx channel too */
+		chan = host->chan_tx;
+		if (chan) {
+			host->chan_tx = NULL;
+			dma_release_channel(chan);
+		}
+		dev_warn(&host->pd->dev,
+			 "DMA failed: %d, falling back to PIO\n", ret);
+		sh_mmcif_bitclr(host, MMCIF_CE_BUF_ACC, BUF_ACC_DMAREN | BUF_ACC_DMAWEN);
+	}
+
+	dev_dbg(&host->pd->dev, "%s(): desc %p, cookie %d, sg[%d]\n", __func__,
+		desc, cookie, host->data->sg_len);
+}
+
+static void sh_mmcif_start_dma_tx(struct sh_mmcif_host *host)
+{
+	struct scatterlist *sg = host->data->sg;
+	struct dma_async_tx_descriptor *desc = NULL;
+	struct dma_chan *chan = host->chan_tx;
+	dma_cookie_t cookie = -EINVAL;
+	int ret;
+
+	ret = dma_map_sg(&host->pd->dev, sg, host->data->sg_len, DMA_TO_DEVICE);
+	if (ret > 0) {
+		host->dma_sglen = ret;
+		desc = chan->device->device_prep_slave_sg(chan, sg, ret,
+			DMA_TO_DEVICE, DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
+	}
+
+	if (desc) {
+		desc->callback = mmcif_dma_complete;
+		desc->callback_param = host;
+		cookie = desc->tx_submit(desc);
+		if (cookie < 0) {
+			desc = NULL;
+			ret = cookie;
+		} else {
+			sh_mmcif_bitset(host, MMCIF_CE_BUF_ACC, BUF_ACC_DMAWEN);
+			chan->device->device_issue_pending(chan);
+		}
+	}
+	dev_dbg(&host->pd->dev, "%s(): mapped %d -> %d, cookie %d\n",
+		__func__, host->data->sg_len, ret, cookie);
+
+	if (!desc) {
+		/* DMA failed, fall back to PIO */
+		if (ret >= 0)
+			ret = -EIO;
+		host->chan_tx = NULL;
+		host->dma_sglen = 0;
+		dma_release_channel(chan);
+		/* Free the Rx channel too */
+		chan = host->chan_rx;
+		if (chan) {
+			host->chan_rx = NULL;
+			dma_release_channel(chan);
+		}
+		dev_warn(&host->pd->dev,
+			 "DMA failed: %d, falling back to PIO\n", ret);
+		sh_mmcif_bitclr(host, MMCIF_CE_BUF_ACC, BUF_ACC_DMAREN | BUF_ACC_DMAWEN);
+	}
+
+	dev_dbg(&host->pd->dev, "%s(): desc %p, cookie %d\n", __func__,
+		desc, cookie);
+}
+
+static bool sh_mmcif_filter(struct dma_chan *chan, void *arg)
+{
+	dev_dbg(chan->device->dev, "%s: slave data %p\n", __func__, arg);
+	chan->private = arg;
+	return true;
+}
+
+static void sh_mmcif_request_dma(struct sh_mmcif_host *host,
+				 struct sh_mmcif_plat_data *pdata)
+{
+	host->dma_sglen = 0;
+
+	/* We can only either use DMA for both Tx and Rx or not use it at all */
+	if (pdata->dma) {
+		dma_cap_mask_t mask;
+
+		dma_cap_zero(mask);
+		dma_cap_set(DMA_SLAVE, mask);
+
+		host->chan_tx = dma_request_channel(mask, sh_mmcif_filter,
+						    &pdata->dma->chan_priv_tx);
+		dev_dbg(&host->pd->dev, "%s: TX: got channel %p\n", __func__,
+			host->chan_tx);
+
+		if (!host->chan_tx)
+			return;
+
+		host->chan_rx = dma_request_channel(mask, sh_mmcif_filter,
+						    &pdata->dma->chan_priv_rx);
+		dev_dbg(&host->pd->dev, "%s: RX: got channel %p\n", __func__,
+			host->chan_rx);
+
+		if (!host->chan_rx) {
+			dma_release_channel(host->chan_tx);
+			host->chan_tx = NULL;
+			return;
+		}
+
+		init_completion(&host->dma_complete);
+	}
+}
+
+static void sh_mmcif_release_dma(struct sh_mmcif_host *host)
+{
+	sh_mmcif_bitclr(host, MMCIF_CE_BUF_ACC, BUF_ACC_DMAREN | BUF_ACC_DMAWEN);
+	/* Descriptors are freed automatically */
+	if (host->chan_tx) {
+		struct dma_chan *chan = host->chan_tx;
+		host->chan_tx = NULL;
+		dma_release_channel(chan);
+	}
+	if (host->chan_rx) {
+		struct dma_chan *chan = host->chan_rx;
+		host->chan_rx = NULL;
+		dma_release_channel(chan);
+	}
+
+	host->dma_sglen = 0;
+}
+#else
+static void sh_mmcif_start_dma_tx(struct sh_mmcif_host *host)
+{
+}
+
+static void sh_mmcif_start_dma_rx(struct sh_mmcif_host *host)
+{
+}
+
+static void sh_mmcif_request_dma(struct sh_mmcif_host *host,
+				 struct sh_mmcif_plat_data *pdata)
+{
+	/* host->chan_tx, host->chan_tx and host->dma_sglen are all zero */
+}
+
+static void sh_mmcif_release_dma(struct sh_mmcif_host *host)
+{
+}
+#endif
 
 static void sh_mmcif_clock_control(struct sh_mmcif_host *host, unsigned int clk)
 {
@@ -564,7 +773,20 @@ static void sh_mmcif_start_cmd(struct sh_mmcif_host *host,
 	}
 	sh_mmcif_get_response(host, cmd);
 	if (host->data) {
-		ret = sh_mmcif_data_trans(host, mrq, cmd->opcode);
+		if (!host->dma_sglen) {
+			ret = sh_mmcif_data_trans(host, mrq, cmd->opcode);
+		} else {
+			long time =
+				wait_for_completion_interruptible_timeout(&host->dma_complete,
+									  host->timeout);
+			if (!time)
+				ret = -ETIMEDOUT;
+			else if (time < 0)
+				ret = time;
+			sh_mmcif_bitclr(host, MMCIF_CE_BUF_ACC,
+					BUF_ACC_DMAREN | BUF_ACC_DMAWEN);
+			host->dma_sglen = 0;
+		}
 		if (ret < 0)
 			mrq->data->bytes_xfered = 0;
 		else
@@ -622,6 +844,15 @@ static void sh_mmcif_request(struct mmc_host *mmc, struct mmc_request *mrq)
 		break;
 	}
 	host->data = mrq->data;
+	if (mrq->data) {
+		if (mrq->data->flags & MMC_DATA_READ) {
+			if (host->chan_rx)
+				sh_mmcif_start_dma_rx(host);
+		} else {
+			if (host->chan_tx)
+				sh_mmcif_start_dma_tx(host);
+		}
+	}
 	sh_mmcif_start_cmd(host, mrq, mrq->cmd);
 	host->data = NULL;
 
@@ -806,14 +1037,18 @@ static int __devinit sh_mmcif_probe(struct platform_device *pdev)
 	mmc->caps = MMC_CAP_MMC_HIGHSPEED;
 	if (pd->caps)
 		mmc->caps |= pd->caps;
-	mmc->max_segs = 128;
+	mmc->max_segs = 32;
 	mmc->max_blk_size = 512;
-	mmc->max_blk_count = 65535;
-	mmc->max_req_size = mmc->max_blk_size * mmc->max_blk_count;
+	mmc->max_req_size = PAGE_CACHE_SIZE * mmc->max_segs;
+	mmc->max_blk_count = mmc->max_req_size / mmc->max_blk_size;
 	mmc->max_seg_size = mmc->max_req_size;
 
 	sh_mmcif_sync_reset(host);
 	platform_set_drvdata(pdev, host);
+
+	/* See if we also get DMA */
+	sh_mmcif_request_dma(host, pd);
+
 	mmc_add_host(mmc);
 
 	ret = request_irq(irq[0], sh_mmcif_intr, 0, "sh_mmc:error", host);
@@ -852,6 +1087,7 @@ static int __devexit sh_mmcif_remove(struct platform_device *pdev)
 	int irq[2];
 
 	mmc_remove_host(host->mmc);
+	sh_mmcif_release_dma(host);
 
 	if (host->addr)
 		iounmap(host->addr);
diff --git a/include/linux/mmc/sh_mmcif.h b/include/linux/mmc/sh_mmcif.h
index a6bfa5296495..f216a8879b58 100644
--- a/include/linux/mmc/sh_mmcif.h
+++ b/include/linux/mmc/sh_mmcif.h
@@ -14,8 +14,9 @@
 #ifndef __SH_MMCIF_H__
 #define __SH_MMCIF_H__
 
-#include <linux/platform_device.h>
 #include <linux/io.h>
+#include <linux/platform_device.h>
+#include <linux/sh_dma.h>
 
 /*
  * MMCIF : CE_CLK_CTRL [19:16]
@@ -31,13 +32,19 @@
  * 1111 : Peripheral clock (sup_pclk set '1')
  */
 
+struct sh_mmcif_dma {
+	struct sh_dmae_slave chan_priv_tx;
+	struct sh_dmae_slave chan_priv_rx;
+};
+
 struct sh_mmcif_plat_data {
 	void (*set_pwr)(struct platform_device *pdev, int state);
 	void (*down_pwr)(struct platform_device *pdev);
 	int (*get_cd)(struct platform_device *pdef);
-	u8	sup_pclk;	/* 1 :SH7757, 0: SH7724/SH7372 */
-	unsigned long caps;
-	u32	ocr;
+	struct sh_mmcif_dma	*dma;
+	u8			sup_pclk;	/* 1 :SH7757, 0: SH7724/SH7372 */
+	unsigned long		caps;
+	u32			ocr;
 };
 
 #define MMCIF_CE_CMD_SET	0x00000000
-- 
cgit v1.2.3-71-gd317


From 6d803ba736abb5e122dede70a4720e4843dd6df4 Mon Sep 17 00:00:00 2001
From: Jean-Christop PLAGNIOL-VILLARD <plagnioj@jcrosoft.com>
Date: Wed, 17 Nov 2010 10:04:33 +0100
Subject: ARM: 6483/1: arm & sh: factorised duplicated clkdev.c

factorise some generic infrastructure to assist looking up struct clks
for the ARM & SH architecture.

as the code is identical at 99%

put the arch specific code for allocation as example in asm/clkdev.h

Signed-off-by: Jean-Christophe PLAGNIOL-VILLARD <plagnioj@jcrosoft.com>
Acked-by: Paul Mundt <lethal@linux-sh.org>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/Kconfig                              |  42 +++---
 arch/arm/common/Kconfig                       |   4 -
 arch/arm/common/Makefile                      |   1 -
 arch/arm/common/clkdev.c                      | 179 --------------------------
 arch/arm/include/asm/clkdev.h                 |  22 +---
 arch/arm/mach-bcmring/clock.c                 |   3 +-
 arch/arm/mach-bcmring/core.c                  |   2 +-
 arch/arm/mach-davinci/clock.h                 |   2 +-
 arch/arm/mach-ep93xx/clock.c                  |   2 +-
 arch/arm/mach-imx/clock-imx1.c                |   3 +-
 arch/arm/mach-imx/clock-imx21.c               |   2 +-
 arch/arm/mach-imx/clock-imx27.c               |   2 +-
 arch/arm/mach-integrator/core.c               |   3 +-
 arch/arm/mach-integrator/impd1.c              |   3 +-
 arch/arm/mach-integrator/integrator_cp.c      |   3 +-
 arch/arm/mach-lpc32xx/clock.c                 |   3 +-
 arch/arm/mach-mmp/clock.h                     |   2 +-
 arch/arm/mach-mx25/clock.c                    |   3 +-
 arch/arm/mach-mx3/clock-imx31.c               |   2 +-
 arch/arm/mach-mx3/clock-imx35.c               |   3 +-
 arch/arm/mach-mx5/clock-mx51.c                |   2 +-
 arch/arm/mach-mxc91231/clock.c                |   2 +-
 arch/arm/mach-nomadik/clock.c                 |   2 +-
 arch/arm/mach-nuc93x/clock.h                  |   2 +-
 arch/arm/mach-omap1/clock.c                   |   2 +-
 arch/arm/mach-omap2/dpll3xxx.c                |   2 +-
 arch/arm/mach-pnx4008/clock.c                 |   3 +-
 arch/arm/mach-pxa/clock.c                     |   2 +-
 arch/arm/mach-pxa/clock.h                     |   2 +-
 arch/arm/mach-realview/core.c                 |   3 +-
 arch/arm/mach-shmobile/Kconfig                |   6 +-
 arch/arm/mach-shmobile/clock-sh7367.c         |   2 +-
 arch/arm/mach-shmobile/clock-sh7372.c         |   2 +-
 arch/arm/mach-shmobile/clock-sh7377.c         |   2 +-
 arch/arm/mach-tcc8k/clock.c                   |   3 +-
 arch/arm/mach-tegra/clock.c                   |   2 +-
 arch/arm/mach-tegra/clock.h                   |   2 +-
 arch/arm/mach-tegra/tegra2_clocks.c           |   3 +-
 arch/arm/mach-u300/clock.c                    |   2 +-
 arch/arm/mach-ux500/clock.c                   |   3 +-
 arch/arm/mach-versatile/core.c                |   3 +-
 arch/arm/mach-vexpress/ct-ca9x4.c             |   3 +-
 arch/arm/mach-vexpress/v2m.c                  |   3 +-
 arch/arm/mach-w90x900/clock.h                 |   2 +-
 arch/arm/plat-omap/Kconfig                    |   4 +-
 arch/arm/plat-omap/include/plat/clkdev_omap.h |   2 +-
 arch/arm/plat-spear/include/plat/clock.h      |   2 +-
 arch/arm/plat-stmp3xxx/clock.c                |   2 +-
 arch/sh/Kconfig                               |   2 +-
 arch/sh/boards/mach-highlander/setup.c        |   2 +-
 arch/sh/include/asm/clkdev.h                  |  38 +++---
 arch/sh/kernel/Makefile                       |   2 +-
 arch/sh/kernel/clkdev.c                       | 171 ------------------------
 arch/sh/kernel/cpu/clock-cpg.c                |   2 +-
 arch/sh/kernel/cpu/clock.c                    |  16 ---
 arch/sh/kernel/cpu/sh4/clock-sh4-202.c        |   2 +-
 arch/sh/kernel/cpu/sh4a/clock-sh7343.c        |   2 +-
 arch/sh/kernel/cpu/sh4a/clock-sh7366.c        |   2 +-
 arch/sh/kernel/cpu/sh4a/clock-sh7722.c        |   2 +-
 arch/sh/kernel/cpu/sh4a/clock-sh7723.c        |   2 +-
 arch/sh/kernel/cpu/sh4a/clock-sh7724.c        |   2 +-
 arch/sh/kernel/cpu/sh4a/clock-sh7757.c        |   2 +-
 arch/sh/kernel/cpu/sh4a/clock-sh7763.c        |   2 +-
 arch/sh/kernel/cpu/sh4a/clock-sh7780.c        |   2 +-
 arch/sh/kernel/cpu/sh4a/clock-sh7785.c        |   2 +-
 arch/sh/kernel/cpu/sh4a/clock-sh7786.c        |   2 +-
 arch/sh/kernel/cpu/sh4a/clock-shx3.c          |   2 +-
 drivers/Kconfig                               |   2 +
 drivers/Makefile                              |   2 +
 drivers/clk/Kconfig                           |   4 +
 drivers/clk/Makefile                          |   2 +
 drivers/clk/clkdev.c                          | 176 +++++++++++++++++++++++++
 include/linux/clkdev.h                        |  36 ++++++
 73 files changed, 328 insertions(+), 507 deletions(-)
 delete mode 100644 arch/arm/common/clkdev.c
 delete mode 100644 arch/sh/kernel/clkdev.c
 create mode 100644 drivers/clk/Kconfig
 create mode 100644 drivers/clk/Makefile
 create mode 100644 drivers/clk/clkdev.c
 create mode 100644 include/linux/clkdev.h

(limited to 'include/linux')

diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index a19a5266d5fc..0e51342b3c02 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -221,7 +221,7 @@ config ARCH_INTEGRATOR
 	bool "ARM Ltd. Integrator family"
 	select ARM_AMBA
 	select ARCH_HAS_CPUFREQ
-	select COMMON_CLKDEV
+	select CLKDEV_LOOKUP
 	select ICST
 	select GENERIC_CLOCKEVENTS
 	select PLAT_VERSATILE
@@ -231,7 +231,7 @@ config ARCH_INTEGRATOR
 config ARCH_REALVIEW
 	bool "ARM Ltd. RealView family"
 	select ARM_AMBA
-	select COMMON_CLKDEV
+	select CLKDEV_LOOKUP
 	select ICST
 	select GENERIC_CLOCKEVENTS
 	select ARCH_WANT_OPTIONAL_GPIOLIB
@@ -245,7 +245,7 @@ config ARCH_VERSATILE
 	bool "ARM Ltd. Versatile family"
 	select ARM_AMBA
 	select ARM_VIC
-	select COMMON_CLKDEV
+	select CLKDEV_LOOKUP
 	select ICST
 	select GENERIC_CLOCKEVENTS
 	select ARCH_WANT_OPTIONAL_GPIOLIB
@@ -259,7 +259,7 @@ config ARCH_VEXPRESS
 	select ARCH_WANT_OPTIONAL_GPIOLIB
 	select ARM_AMBA
 	select ARM_TIMER_SP804
-	select COMMON_CLKDEV
+	select CLKDEV_LOOKUP
 	select GENERIC_CLOCKEVENTS
 	select HAVE_CLK
 	select ICST
@@ -280,7 +280,7 @@ config ARCH_BCMRING
 	depends on MMU
 	select CPU_V6
 	select ARM_AMBA
-	select COMMON_CLKDEV
+	select CLKDEV_LOOKUP
 	select GENERIC_CLOCKEVENTS
 	select ARCH_WANT_OPTIONAL_GPIOLIB
 	help
@@ -327,7 +327,7 @@ config ARCH_EP93XX
 	select CPU_ARM920T
 	select ARM_AMBA
 	select ARM_VIC
-	select COMMON_CLKDEV
+	select CLKDEV_LOOKUP
 	select ARCH_REQUIRE_GPIOLIB
 	select ARCH_HAS_HOLES_MEMORYMODEL
 	select ARCH_USES_GETTIMEOFFSET
@@ -347,14 +347,14 @@ config ARCH_MXC
 	bool "Freescale MXC/iMX-based"
 	select GENERIC_CLOCKEVENTS
 	select ARCH_REQUIRE_GPIOLIB
-	select COMMON_CLKDEV
+	select CLKDEV_LOOKUP
 	help
 	  Support for Freescale MXC/iMX-based family of processors
 
 config ARCH_STMP3XXX
 	bool "Freescale STMP3xxx"
 	select CPU_ARM926T
-	select COMMON_CLKDEV
+	select CLKDEV_LOOKUP
 	select ARCH_REQUIRE_GPIOLIB
 	select GENERIC_CLOCKEVENTS
 	select USB_ARCH_HAS_EHCI
@@ -472,7 +472,7 @@ config ARCH_LPC32XX
 	select HAVE_IDE
 	select ARM_AMBA
 	select USB_ARCH_HAS_OHCI
-	select COMMON_CLKDEV
+	select CLKDEV_LOOKUP
 	select GENERIC_TIME
 	select GENERIC_CLOCKEVENTS
 	help
@@ -506,7 +506,7 @@ config ARCH_MMP
 	bool "Marvell PXA168/910/MMP2"
 	depends on MMU
 	select ARCH_REQUIRE_GPIOLIB
-	select COMMON_CLKDEV
+	select CLKDEV_LOOKUP
 	select GENERIC_CLOCKEVENTS
 	select TICK_ONESHOT
 	select PLAT_PXA
@@ -539,7 +539,7 @@ config ARCH_W90X900
 	bool "Nuvoton W90X900 CPU"
 	select CPU_ARM926T
 	select ARCH_REQUIRE_GPIOLIB
-	select COMMON_CLKDEV
+	select CLKDEV_LOOKUP
 	select GENERIC_CLOCKEVENTS
 	help
 	  Support for Nuvoton (Winbond logic dept.) ARM9 processor,
@@ -553,7 +553,7 @@ config ARCH_W90X900
 config ARCH_NUC93X
 	bool "Nuvoton NUC93X CPU"
 	select CPU_ARM926T
-	select COMMON_CLKDEV
+	select CLKDEV_LOOKUP
 	help
 	  Support for Nuvoton (Winbond logic dept.) NUC93X MCU,The NUC93X is a
 	  low-power and high performance MPEG-4/JPEG multimedia controller chip.
@@ -564,7 +564,7 @@ config ARCH_TEGRA
 	select GENERIC_CLOCKEVENTS
 	select GENERIC_GPIO
 	select HAVE_CLK
-	select COMMON_CLKDEV
+	select CLKDEV_LOOKUP
 	select ARCH_HAS_BARRIERS if CACHE_L2X0
 	select ARCH_HAS_CPUFREQ
 	help
@@ -574,7 +574,7 @@ config ARCH_TEGRA
 config ARCH_PNX4008
 	bool "Philips Nexperia PNX4008 Mobile"
 	select CPU_ARM926T
-	select COMMON_CLKDEV
+	select CLKDEV_LOOKUP
 	select ARCH_USES_GETTIMEOFFSET
 	help
 	  This enables support for Philips PNX4008 mobile platform.
@@ -584,7 +584,7 @@ config ARCH_PXA
 	depends on MMU
 	select ARCH_MTD_XIP
 	select ARCH_HAS_CPUFREQ
-	select COMMON_CLKDEV
+	select CLKDEV_LOOKUP
 	select ARCH_REQUIRE_GPIOLIB
 	select GENERIC_CLOCKEVENTS
 	select TICK_ONESHOT
@@ -761,7 +761,7 @@ config ARCH_TCC_926
 	bool "Telechips TCC ARM926-based systems"
 	select CPU_ARM926T
 	select HAVE_CLK
-	select COMMON_CLKDEV
+	select CLKDEV_LOOKUP
 	select GENERIC_CLOCKEVENTS
 	help
 	  Support for Telechips TCC ARM926-based systems.
@@ -785,7 +785,7 @@ config ARCH_U300
 	select ARM_AMBA
 	select ARM_VIC
 	select GENERIC_CLOCKEVENTS
-	select COMMON_CLKDEV
+	select CLKDEV_LOOKUP
 	select GENERIC_GPIO
 	help
 	  Support for ST-Ericsson U300 series mobile platforms.
@@ -795,7 +795,7 @@ config ARCH_U8500
 	select CPU_V7
 	select ARM_AMBA
 	select GENERIC_CLOCKEVENTS
-	select COMMON_CLKDEV
+	select CLKDEV_LOOKUP
 	select ARCH_REQUIRE_GPIOLIB
 	help
 	  Support for ST-Ericsson's Ux500 architecture
@@ -805,7 +805,7 @@ config ARCH_NOMADIK
 	select ARM_AMBA
 	select ARM_VIC
 	select CPU_ARM926T
-	select COMMON_CLKDEV
+	select CLKDEV_LOOKUP
 	select GENERIC_CLOCKEVENTS
 	select ARCH_REQUIRE_GPIOLIB
 	help
@@ -817,7 +817,7 @@ config ARCH_DAVINCI
 	select ARCH_REQUIRE_GPIOLIB
 	select ZONE_DMA
 	select HAVE_IDE
-	select COMMON_CLKDEV
+	select CLKDEV_LOOKUP
 	select GENERIC_ALLOCATOR
 	select ARCH_HAS_HOLES_MEMORYMODEL
 	help
@@ -837,7 +837,7 @@ config PLAT_SPEAR
 	bool "ST SPEAr"
 	select ARM_AMBA
 	select ARCH_REQUIRE_GPIOLIB
-	select COMMON_CLKDEV
+	select CLKDEV_LOOKUP
 	select GENERIC_CLOCKEVENTS
 	select HAVE_CLK
 	help
diff --git a/arch/arm/common/Kconfig b/arch/arm/common/Kconfig
index 0a34c8186924..778655f0257a 100644
--- a/arch/arm/common/Kconfig
+++ b/arch/arm/common/Kconfig
@@ -37,7 +37,3 @@ config SHARP_PARAM
 
 config SHARP_SCOOP
 	bool
-
-config COMMON_CLKDEV
-	bool
-	select HAVE_CLK
diff --git a/arch/arm/common/Makefile b/arch/arm/common/Makefile
index e6e8664a9413..799e140274f1 100644
--- a/arch/arm/common/Makefile
+++ b/arch/arm/common/Makefile
@@ -16,4 +16,3 @@ obj-$(CONFIG_SHARP_SCOOP)	+= scoop.o
 obj-$(CONFIG_ARCH_IXP2000)	+= uengine.o
 obj-$(CONFIG_ARCH_IXP23XX)	+= uengine.o
 obj-$(CONFIG_PCI_HOST_ITE8152)  += it8152.o
-obj-$(CONFIG_COMMON_CLKDEV)	+= clkdev.o
diff --git a/arch/arm/common/clkdev.c b/arch/arm/common/clkdev.c
deleted file mode 100644
index e2b2bb66e094..000000000000
--- a/arch/arm/common/clkdev.c
+++ /dev/null
@@ -1,179 +0,0 @@
-/*
- *  arch/arm/common/clkdev.c
- *
- *  Copyright (C) 2008 Russell King.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * Helper for the clk API to assist looking up a struct clk.
- */
-#include <linux/module.h>
-#include <linux/kernel.h>
-#include <linux/device.h>
-#include <linux/list.h>
-#include <linux/errno.h>
-#include <linux/err.h>
-#include <linux/string.h>
-#include <linux/mutex.h>
-#include <linux/clk.h>
-#include <linux/slab.h>
-
-#include <asm/clkdev.h>
-#include <mach/clkdev.h>
-
-static LIST_HEAD(clocks);
-static DEFINE_MUTEX(clocks_mutex);
-
-/*
- * Find the correct struct clk for the device and connection ID.
- * We do slightly fuzzy matching here:
- *  An entry with a NULL ID is assumed to be a wildcard.
- *  If an entry has a device ID, it must match
- *  If an entry has a connection ID, it must match
- * Then we take the most specific entry - with the following
- * order of precedence: dev+con > dev only > con only.
- */
-static struct clk *clk_find(const char *dev_id, const char *con_id)
-{
-	struct clk_lookup *p;
-	struct clk *clk = NULL;
-	int match, best = 0;
-
-	list_for_each_entry(p, &clocks, node) {
-		match = 0;
-		if (p->dev_id) {
-			if (!dev_id || strcmp(p->dev_id, dev_id))
-				continue;
-			match += 2;
-		}
-		if (p->con_id) {
-			if (!con_id || strcmp(p->con_id, con_id))
-				continue;
-			match += 1;
-		}
-
-		if (match > best) {
-			clk = p->clk;
-			if (match != 3)
-				best = match;
-			else
-				break;
-		}
-	}
-	return clk;
-}
-
-struct clk *clk_get_sys(const char *dev_id, const char *con_id)
-{
-	struct clk *clk;
-
-	mutex_lock(&clocks_mutex);
-	clk = clk_find(dev_id, con_id);
-	if (clk && !__clk_get(clk))
-		clk = NULL;
-	mutex_unlock(&clocks_mutex);
-
-	return clk ? clk : ERR_PTR(-ENOENT);
-}
-EXPORT_SYMBOL(clk_get_sys);
-
-struct clk *clk_get(struct device *dev, const char *con_id)
-{
-	const char *dev_id = dev ? dev_name(dev) : NULL;
-
-	return clk_get_sys(dev_id, con_id);
-}
-EXPORT_SYMBOL(clk_get);
-
-void clk_put(struct clk *clk)
-{
-	__clk_put(clk);
-}
-EXPORT_SYMBOL(clk_put);
-
-void clkdev_add(struct clk_lookup *cl)
-{
-	mutex_lock(&clocks_mutex);
-	list_add_tail(&cl->node, &clocks);
-	mutex_unlock(&clocks_mutex);
-}
-EXPORT_SYMBOL(clkdev_add);
-
-void __init clkdev_add_table(struct clk_lookup *cl, size_t num)
-{
-	mutex_lock(&clocks_mutex);
-	while (num--) {
-		list_add_tail(&cl->node, &clocks);
-		cl++;
-	}
-	mutex_unlock(&clocks_mutex);
-}
-
-#define MAX_DEV_ID	20
-#define MAX_CON_ID	16
-
-struct clk_lookup_alloc {
-	struct clk_lookup cl;
-	char	dev_id[MAX_DEV_ID];
-	char	con_id[MAX_CON_ID];
-};
-
-struct clk_lookup *clkdev_alloc(struct clk *clk, const char *con_id,
-	const char *dev_fmt, ...)
-{
-	struct clk_lookup_alloc *cla;
-
-	cla = kzalloc(sizeof(*cla), GFP_KERNEL);
-	if (!cla)
-		return NULL;
-
-	cla->cl.clk = clk;
-	if (con_id) {
-		strlcpy(cla->con_id, con_id, sizeof(cla->con_id));
-		cla->cl.con_id = cla->con_id;
-	}
-
-	if (dev_fmt) {
-		va_list ap;
-
-		va_start(ap, dev_fmt);
-		vscnprintf(cla->dev_id, sizeof(cla->dev_id), dev_fmt, ap);
-		cla->cl.dev_id = cla->dev_id;
-		va_end(ap);
-	}
-
-	return &cla->cl;
-}
-EXPORT_SYMBOL(clkdev_alloc);
-
-int clk_add_alias(const char *alias, const char *alias_dev_name, char *id,
-	struct device *dev)
-{
-	struct clk *r = clk_get(dev, id);
-	struct clk_lookup *l;
-
-	if (IS_ERR(r))
-		return PTR_ERR(r);
-
-	l = clkdev_alloc(r, alias, alias_dev_name);
-	clk_put(r);
-	if (!l)
-		return -ENODEV;
-	clkdev_add(l);
-	return 0;
-}
-EXPORT_SYMBOL(clk_add_alias);
-
-/*
- * clkdev_drop - remove a clock dynamically allocated
- */
-void clkdev_drop(struct clk_lookup *cl)
-{
-	mutex_lock(&clocks_mutex);
-	list_del(&cl->node);
-	mutex_unlock(&clocks_mutex);
-	kfree(cl);
-}
-EXPORT_SYMBOL(clkdev_drop);
diff --git a/arch/arm/include/asm/clkdev.h b/arch/arm/include/asm/clkdev.h
index b56c1389b6fa..765d33222369 100644
--- a/arch/arm/include/asm/clkdev.h
+++ b/arch/arm/include/asm/clkdev.h
@@ -12,23 +12,13 @@
 #ifndef __ASM_CLKDEV_H
 #define __ASM_CLKDEV_H
 
-struct clk;
-struct device;
+#include <linux/slab.h>
 
-struct clk_lookup {
-	struct list_head	node;
-	const char		*dev_id;
-	const char		*con_id;
-	struct clk		*clk;
-};
+#include <mach/clkdev.h>
 
-struct clk_lookup *clkdev_alloc(struct clk *clk, const char *con_id,
-	const char *dev_fmt, ...);
-
-void clkdev_add(struct clk_lookup *cl);
-void clkdev_drop(struct clk_lookup *cl);
-
-void clkdev_add_table(struct clk_lookup *, size_t);
-int clk_add_alias(const char *, const char *, char *, struct device *);
+static inline struct clk_lookup_alloc *__clkdev_alloc(size_t size)
+{
+	return kzalloc(size, GFP_KERNEL);
+}
 
 #endif
diff --git a/arch/arm/mach-bcmring/clock.c b/arch/arm/mach-bcmring/clock.c
index 14bafc38f2dc..ad237a42d265 100644
--- a/arch/arm/mach-bcmring/clock.c
+++ b/arch/arm/mach-bcmring/clock.c
@@ -21,13 +21,12 @@
 #include <linux/string.h>
 #include <linux/clk.h>
 #include <linux/spinlock.h>
+#include <linux/clkdev.h>
 #include <mach/csp/hw_cfg.h>
 #include <mach/csp/chipcHw_def.h>
 #include <mach/csp/chipcHw_reg.h>
 #include <mach/csp/chipcHw_inline.h>
 
-#include <asm/clkdev.h>
-
 #include "clock.h"
 
 #define clk_is_primary(x)       ((x)->type & CLK_TYPE_PRIMARY)
diff --git a/arch/arm/mach-bcmring/core.c b/arch/arm/mach-bcmring/core.c
index d3f959e92b2d..ed96ef400474 100644
--- a/arch/arm/mach-bcmring/core.c
+++ b/arch/arm/mach-bcmring/core.c
@@ -30,10 +30,10 @@
 #include <linux/amba/bus.h>
 #include <linux/clocksource.h>
 #include <linux/clockchips.h>
+#include <linux/clkdev.h>
 
 #include <mach/csp/mm_addr.h>
 #include <mach/hardware.h>
-#include <asm/clkdev.h>
 #include <linux/io.h>
 #include <asm/irq.h>
 #include <asm/hardware/arm_timer.h>
diff --git a/arch/arm/mach-davinci/clock.h b/arch/arm/mach-davinci/clock.h
index 11099980b58b..0dd22031ec62 100644
--- a/arch/arm/mach-davinci/clock.h
+++ b/arch/arm/mach-davinci/clock.h
@@ -68,7 +68,7 @@
 #ifndef __ASSEMBLER__
 
 #include <linux/list.h>
-#include <asm/clkdev.h>
+#include <linux/clkdev.h>
 
 #define PLLSTAT_GOSTAT	BIT(0)
 #define PLLCMD_GOSET	BIT(0)
diff --git a/arch/arm/mach-ep93xx/clock.c b/arch/arm/mach-ep93xx/clock.c
index ef06c66a6f16..ca4de7105097 100644
--- a/arch/arm/mach-ep93xx/clock.c
+++ b/arch/arm/mach-ep93xx/clock.c
@@ -19,10 +19,10 @@
 #include <linux/string.h>
 #include <linux/io.h>
 #include <linux/spinlock.h>
+#include <linux/clkdev.h>
 
 #include <mach/hardware.h>
 
-#include <asm/clkdev.h>
 #include <asm/div64.h>
 
 
diff --git a/arch/arm/mach-imx/clock-imx1.c b/arch/arm/mach-imx/clock-imx1.c
index daca30b2d5b1..3938a563b280 100644
--- a/arch/arm/mach-imx/clock-imx1.c
+++ b/arch/arm/mach-imx/clock-imx1.c
@@ -22,8 +22,7 @@
 #include <linux/err.h>
 #include <linux/clk.h>
 #include <linux/io.h>
-
-#include <asm/clkdev.h>
+#include <linux/clkdev.h>
 
 #include <mach/clock.h>
 #include <mach/hardware.h>
diff --git a/arch/arm/mach-imx/clock-imx21.c b/arch/arm/mach-imx/clock-imx21.c
index cf15ea516a72..d7056559715a 100644
--- a/arch/arm/mach-imx/clock-imx21.c
+++ b/arch/arm/mach-imx/clock-imx21.c
@@ -21,11 +21,11 @@
 #include <linux/clk.h>
 #include <linux/io.h>
 #include <linux/module.h>
+#include <linux/clkdev.h>
 
 #include <mach/clock.h>
 #include <mach/hardware.h>
 #include <mach/common.h>
-#include <asm/clkdev.h>
 #include <asm/div64.h>
 
 #define IO_ADDR_CCM(off)	(MX21_IO_ADDRESS(MX21_CCM_BASE_ADDR + (off)))
diff --git a/arch/arm/mach-imx/clock-imx27.c b/arch/arm/mach-imx/clock-imx27.c
index 98a25bada783..ca1017b9028d 100644
--- a/arch/arm/mach-imx/clock-imx27.c
+++ b/arch/arm/mach-imx/clock-imx27.c
@@ -21,8 +21,8 @@
 #include <linux/clk.h>
 #include <linux/io.h>
 #include <linux/module.h>
+#include <linux/clkdev.h>
 
-#include <asm/clkdev.h>
 #include <asm/div64.h>
 
 #include <mach/clock.h>
diff --git a/arch/arm/mach-integrator/core.c b/arch/arm/mach-integrator/core.c
index 8f4fb6d638f7..b8e884b450da 100644
--- a/arch/arm/mach-integrator/core.c
+++ b/arch/arm/mach-integrator/core.c
@@ -21,9 +21,8 @@
 #include <linux/amba/bus.h>
 #include <linux/amba/serial.h>
 #include <linux/io.h>
+#include <linux/clkdev.h>
 
-#include <asm/clkdev.h>
-#include <mach/clkdev.h>
 #include <mach/hardware.h>
 #include <mach/platform.h>
 #include <asm/irq.h>
diff --git a/arch/arm/mach-integrator/impd1.c b/arch/arm/mach-integrator/impd1.c
index fd684bf205e5..5db574f8ae3f 100644
--- a/arch/arm/mach-integrator/impd1.c
+++ b/arch/arm/mach-integrator/impd1.c
@@ -22,9 +22,8 @@
 #include <linux/amba/clcd.h>
 #include <linux/io.h>
 #include <linux/slab.h>
+#include <linux/clkdev.h>
 
-#include <asm/clkdev.h>
-#include <mach/clkdev.h>
 #include <asm/hardware/icst.h>
 #include <mach/lm.h>
 #include <mach/impd1.h>
diff --git a/arch/arm/mach-integrator/integrator_cp.c b/arch/arm/mach-integrator/integrator_cp.c
index 6258c90d020c..9403d2fa13a3 100644
--- a/arch/arm/mach-integrator/integrator_cp.c
+++ b/arch/arm/mach-integrator/integrator_cp.c
@@ -21,9 +21,8 @@
 #include <linux/amba/mmci.h>
 #include <linux/io.h>
 #include <linux/gfp.h>
+#include <linux/clkdev.h>
 
-#include <asm/clkdev.h>
-#include <mach/clkdev.h>
 #include <mach/hardware.h>
 #include <mach/platform.h>
 #include <asm/irq.h>
diff --git a/arch/arm/mach-lpc32xx/clock.c b/arch/arm/mach-lpc32xx/clock.c
index 32d63796430a..da0e6498110a 100644
--- a/arch/arm/mach-lpc32xx/clock.c
+++ b/arch/arm/mach-lpc32xx/clock.c
@@ -90,10 +90,9 @@
 #include <linux/clk.h>
 #include <linux/amba/bus.h>
 #include <linux/amba/clcd.h>
+#include <linux/clkdev.h>
 
 #include <mach/hardware.h>
-#include <asm/clkdev.h>
-#include <mach/clkdev.h>
 #include <mach/platform.h>
 #include "clock.h"
 #include "common.h"
diff --git a/arch/arm/mach-mmp/clock.h b/arch/arm/mach-mmp/clock.h
index 016ae94691c0..9b027d7491f5 100644
--- a/arch/arm/mach-mmp/clock.h
+++ b/arch/arm/mach-mmp/clock.h
@@ -6,7 +6,7 @@
  *  published by the Free Software Foundation.
  */
 
-#include <asm/clkdev.h>
+#include <linux/clkdev.h>
 
 struct clkops {
 	void			(*enable)(struct clk *);
diff --git a/arch/arm/mach-mx25/clock.c b/arch/arm/mach-mx25/clock.c
index 9e4a5578c2fb..00dcb08019e9 100644
--- a/arch/arm/mach-mx25/clock.c
+++ b/arch/arm/mach-mx25/clock.c
@@ -21,8 +21,7 @@
 #include <linux/list.h>
 #include <linux/clk.h>
 #include <linux/io.h>
-
-#include <asm/clkdev.h>
+#include <linux/clkdev.h>
 
 #include <mach/clock.h>
 #include <mach/hardware.h>
diff --git a/arch/arm/mach-mx3/clock-imx31.c b/arch/arm/mach-mx3/clock-imx31.c
index 109e98f323e0..1cd8b40b7676 100644
--- a/arch/arm/mach-mx3/clock-imx31.c
+++ b/arch/arm/mach-mx3/clock-imx31.c
@@ -23,8 +23,8 @@
 #include <linux/clk.h>
 #include <linux/err.h>
 #include <linux/io.h>
+#include <linux/clkdev.h>
 
-#include <asm/clkdev.h>
 #include <asm/div64.h>
 
 #include <mach/clock.h>
diff --git a/arch/arm/mach-mx3/clock-imx35.c b/arch/arm/mach-mx3/clock-imx35.c
index 61e4a318980a..819dd809615a 100644
--- a/arch/arm/mach-mx3/clock-imx35.c
+++ b/arch/arm/mach-mx3/clock-imx35.c
@@ -21,8 +21,7 @@
 #include <linux/list.h>
 #include <linux/clk.h>
 #include <linux/io.h>
-
-#include <asm/clkdev.h>
+#include <linux/clkdev.h>
 
 #include <mach/clock.h>
 #include <mach/hardware.h>
diff --git a/arch/arm/mach-mx5/clock-mx51.c b/arch/arm/mach-mx5/clock-mx51.c
index 8ac36d882927..5975edb47de8 100644
--- a/arch/arm/mach-mx5/clock-mx51.c
+++ b/arch/arm/mach-mx5/clock-mx51.c
@@ -14,8 +14,8 @@
 #include <linux/delay.h>
 #include <linux/clk.h>
 #include <linux/io.h>
+#include <linux/clkdev.h>
 
-#include <asm/clkdev.h>
 #include <asm/div64.h>
 
 #include <mach/hardware.h>
diff --git a/arch/arm/mach-mxc91231/clock.c b/arch/arm/mach-mxc91231/clock.c
index 5c85075d8a56..9fab505f1eb1 100644
--- a/arch/arm/mach-mxc91231/clock.c
+++ b/arch/arm/mach-mxc91231/clock.c
@@ -2,12 +2,12 @@
 #include <linux/kernel.h>
 #include <linux/init.h>
 #include <linux/io.h>
+#include <linux/clkdev.h>
 
 #include <mach/clock.h>
 #include <mach/hardware.h>
 #include <mach/common.h>
 
-#include <asm/clkdev.h>
 #include <asm/bug.h>
 #include <asm/div64.h>
 
diff --git a/arch/arm/mach-nomadik/clock.c b/arch/arm/mach-nomadik/clock.c
index 89f793adf776..48a59f24e10c 100644
--- a/arch/arm/mach-nomadik/clock.c
+++ b/arch/arm/mach-nomadik/clock.c
@@ -7,7 +7,7 @@
 #include <linux/module.h>
 #include <linux/errno.h>
 #include <linux/clk.h>
-#include <asm/clkdev.h>
+#include <linux/clkdev.h>
 #include "clock.h"
 
 /*
diff --git a/arch/arm/mach-nuc93x/clock.h b/arch/arm/mach-nuc93x/clock.h
index 18e51be4816f..4de1f1da9dc5 100644
--- a/arch/arm/mach-nuc93x/clock.h
+++ b/arch/arm/mach-nuc93x/clock.h
@@ -10,7 +10,7 @@
  * the Free Software Foundation; either version 2 of the License.
  */
 
-#include <asm/clkdev.h>
+#include <linux/clkdev.h>
 
 void nuc93x_clk_enable(struct clk *clk, int enable);
 void clks_register(struct clk_lookup *clks, size_t num);
diff --git a/arch/arm/mach-omap1/clock.c b/arch/arm/mach-omap1/clock.c
index b8c7fb9d7921..84ef70476b51 100644
--- a/arch/arm/mach-omap1/clock.c
+++ b/arch/arm/mach-omap1/clock.c
@@ -17,9 +17,9 @@
 #include <linux/err.h>
 #include <linux/clk.h>
 #include <linux/io.h>
+#include <linux/clkdev.h>
 
 #include <asm/mach-types.h>
-#include <asm/clkdev.h>
 
 #include <plat/cpu.h>
 #include <plat/usb.h>
diff --git a/arch/arm/mach-omap2/dpll3xxx.c b/arch/arm/mach-omap2/dpll3xxx.c
index ed8d330522f1..ebb888f59365 100644
--- a/arch/arm/mach-omap2/dpll3xxx.c
+++ b/arch/arm/mach-omap2/dpll3xxx.c
@@ -26,10 +26,10 @@
 #include <linux/clk.h>
 #include <linux/io.h>
 #include <linux/bitops.h>
+#include <linux/clkdev.h>
 
 #include <plat/cpu.h>
 #include <plat/clock.h>
-#include <asm/clkdev.h>
 
 #include "clock.h"
 #include "prm.h"
diff --git a/arch/arm/mach-pnx4008/clock.c b/arch/arm/mach-pnx4008/clock.c
index 9d1975fa4d9f..a4a3819c96cb 100644
--- a/arch/arm/mach-pnx4008/clock.c
+++ b/arch/arm/mach-pnx4008/clock.c
@@ -21,8 +21,7 @@
 #include <linux/err.h>
 #include <linux/delay.h>
 #include <linux/io.h>
-
-#include <asm/clkdev.h>
+#include <linux/clkdev.h>
 
 #include <mach/hardware.h>
 #include <mach/clock.h>
diff --git a/arch/arm/mach-pxa/clock.c b/arch/arm/mach-pxa/clock.c
index abba0089a2ae..4e4a84be96ba 100644
--- a/arch/arm/mach-pxa/clock.c
+++ b/arch/arm/mach-pxa/clock.c
@@ -11,8 +11,8 @@
 #include <linux/spinlock.h>
 #include <linux/platform_device.h>
 #include <linux/delay.h>
+#include <linux/clkdev.h>
 
-#include <asm/clkdev.h>
 #include <mach/pxa2xx-regs.h>
 #include <mach/hardware.h>
 
diff --git a/arch/arm/mach-pxa/clock.h b/arch/arm/mach-pxa/clock.h
index d8488742b807..12cc0e87e6c4 100644
--- a/arch/arm/mach-pxa/clock.h
+++ b/arch/arm/mach-pxa/clock.h
@@ -1,4 +1,4 @@
-#include <asm/clkdev.h>
+#include <linux/clkdev.h>
 
 struct clkops {
 	void			(*enable)(struct clk *);
diff --git a/arch/arm/mach-realview/core.c b/arch/arm/mach-realview/core.c
index 07c08151dfe6..3d915b1ccdb5 100644
--- a/arch/arm/mach-realview/core.c
+++ b/arch/arm/mach-realview/core.c
@@ -30,8 +30,8 @@
 #include <linux/ata_platform.h>
 #include <linux/amba/mmci.h>
 #include <linux/gfp.h>
+#include <linux/clkdev.h>
 
-#include <asm/clkdev.h>
 #include <asm/system.h>
 #include <mach/hardware.h>
 #include <asm/irq.h>
@@ -47,7 +47,6 @@
 
 #include <asm/hardware/gic.h>
 
-#include <mach/clkdev.h>
 #include <mach/platform.h>
 #include <mach/irqs.h>
 #include <plat/timer-sp.h>
diff --git a/arch/arm/mach-shmobile/Kconfig b/arch/arm/mach-shmobile/Kconfig
index 54b479c35ee0..f8f06e9fec35 100644
--- a/arch/arm/mach-shmobile/Kconfig
+++ b/arch/arm/mach-shmobile/Kconfig
@@ -6,7 +6,7 @@ config ARCH_SH7367
 	bool "SH-Mobile G3 (SH7367)"
 	select CPU_V6
 	select HAVE_CLK
-	select COMMON_CLKDEV
+	select CLKDEV_LOOKUP
 	select SH_CLK_CPG
 	select GENERIC_CLOCKEVENTS
 
@@ -14,7 +14,7 @@ config ARCH_SH7377
 	bool "SH-Mobile G4 (SH7377)"
 	select CPU_V7
 	select HAVE_CLK
-	select COMMON_CLKDEV
+	select CLKDEV_LOOKUP
 	select SH_CLK_CPG
 	select GENERIC_CLOCKEVENTS
 
@@ -22,7 +22,7 @@ config ARCH_SH7372
 	bool "SH-Mobile AP4 (SH7372)"
 	select CPU_V7
 	select HAVE_CLK
-	select COMMON_CLKDEV
+	select CLKDEV_LOOKUP
 	select SH_CLK_CPG
 	select GENERIC_CLOCKEVENTS
 
diff --git a/arch/arm/mach-shmobile/clock-sh7367.c b/arch/arm/mach-shmobile/clock-sh7367.c
index 9f78729098f2..6b186aefcbd6 100644
--- a/arch/arm/mach-shmobile/clock-sh7367.c
+++ b/arch/arm/mach-shmobile/clock-sh7367.c
@@ -20,8 +20,8 @@
 #include <linux/kernel.h>
 #include <linux/io.h>
 #include <linux/sh_clk.h>
+#include <linux/clkdev.h>
 #include <mach/common.h>
-#include <asm/clkdev.h>
 
 /* SH7367 registers */
 #define RTFRQCR    0xe6150000
diff --git a/arch/arm/mach-shmobile/clock-sh7372.c b/arch/arm/mach-shmobile/clock-sh7372.c
index 8565aefa21fd..445112adba46 100644
--- a/arch/arm/mach-shmobile/clock-sh7372.c
+++ b/arch/arm/mach-shmobile/clock-sh7372.c
@@ -20,8 +20,8 @@
 #include <linux/kernel.h>
 #include <linux/io.h>
 #include <linux/sh_clk.h>
+#include <linux/clkdev.h>
 #include <mach/common.h>
-#include <asm/clkdev.h>
 
 /* SH7372 registers */
 #define FRQCRA		0xe6150000
diff --git a/arch/arm/mach-shmobile/clock-sh7377.c b/arch/arm/mach-shmobile/clock-sh7377.c
index f91395aeb9ab..95942466e63f 100644
--- a/arch/arm/mach-shmobile/clock-sh7377.c
+++ b/arch/arm/mach-shmobile/clock-sh7377.c
@@ -20,8 +20,8 @@
 #include <linux/kernel.h>
 #include <linux/io.h>
 #include <linux/sh_clk.h>
+#include <linux/clkdev.h>
 #include <mach/common.h>
-#include <asm/clkdev.h>
 
 /* SH7377 registers */
 #define RTFRQCR    0xe6150000
diff --git a/arch/arm/mach-tcc8k/clock.c b/arch/arm/mach-tcc8k/clock.c
index ba32a15127ab..3970a9cdce26 100644
--- a/arch/arm/mach-tcc8k/clock.c
+++ b/arch/arm/mach-tcc8k/clock.c
@@ -12,8 +12,7 @@
 #include <linux/io.h>
 #include <linux/module.h>
 #include <linux/spinlock.h>
-
-#include <asm/clkdev.h>
+#include <linux/clkdev.h>
 
 #include <mach/clock.h>
 #include <mach/irqs.h>
diff --git a/arch/arm/mach-tegra/clock.c b/arch/arm/mach-tegra/clock.c
index ae19f95585be..77948e0f4909 100644
--- a/arch/arm/mach-tegra/clock.c
+++ b/arch/arm/mach-tegra/clock.c
@@ -25,7 +25,7 @@
 #include <linux/slab.h>
 #include <linux/seq_file.h>
 #include <linux/regulator/consumer.h>
-#include <asm/clkdev.h>
+#include <linux/clkdev.h>
 
 #include "clock.h"
 #include "board.h"
diff --git a/arch/arm/mach-tegra/clock.h b/arch/arm/mach-tegra/clock.h
index 94fd859770f1..083a4cfc6cf0 100644
--- a/arch/arm/mach-tegra/clock.h
+++ b/arch/arm/mach-tegra/clock.h
@@ -21,7 +21,7 @@
 #define __MACH_TEGRA_CLOCK_H
 
 #include <linux/list.h>
-#include <asm/clkdev.h>
+#include <linux/clkdev.h>
 
 #define DIV_BUS			(1 << 0)
 #define DIV_U71			(1 << 1)
diff --git a/arch/arm/mach-tegra/tegra2_clocks.c b/arch/arm/mach-tegra/tegra2_clocks.c
index ae3b308e22a4..f0dae6d8ba52 100644
--- a/arch/arm/mach-tegra/tegra2_clocks.c
+++ b/arch/arm/mach-tegra/tegra2_clocks.c
@@ -24,8 +24,7 @@
 #include <linux/delay.h>
 #include <linux/io.h>
 #include <linux/hrtimer.h>
-
-#include <asm/clkdev.h>
+#include <linux/clkdev.h>
 
 #include <mach/iomap.h>
 
diff --git a/arch/arm/mach-u300/clock.c b/arch/arm/mach-u300/clock.c
index 7458fc6df5c6..fabcc49abe80 100644
--- a/arch/arm/mach-u300/clock.c
+++ b/arch/arm/mach-u300/clock.c
@@ -25,8 +25,8 @@
 #include <linux/timer.h>
 #include <linux/io.h>
 #include <linux/seq_file.h>
+#include <linux/clkdev.h>
 
-#include <asm/clkdev.h>
 #include <mach/hardware.h>
 #include <mach/syscon.h>
 
diff --git a/arch/arm/mach-ux500/clock.c b/arch/arm/mach-ux500/clock.c
index 1675047daf20..531de5c63641 100644
--- a/arch/arm/mach-ux500/clock.c
+++ b/arch/arm/mach-ux500/clock.c
@@ -13,8 +13,7 @@
 #include <linux/err.h>
 #include <linux/clk.h>
 #include <linux/io.h>
-
-#include <asm/clkdev.h>
+#include <linux/clkdev.h>
 
 #include <plat/mtu.h>
 #include <mach/hardware.h>
diff --git a/arch/arm/mach-versatile/core.c b/arch/arm/mach-versatile/core.c
index e38acb0f89c8..8c1ca1d63538 100644
--- a/arch/arm/mach-versatile/core.c
+++ b/arch/arm/mach-versatile/core.c
@@ -31,8 +31,8 @@
 #include <linux/amba/pl022.h>
 #include <linux/io.h>
 #include <linux/gfp.h>
+#include <linux/clkdev.h>
 
-#include <asm/clkdev.h>
 #include <asm/system.h>
 #include <asm/irq.h>
 #include <asm/leds.h>
@@ -46,7 +46,6 @@
 #include <asm/mach/irq.h>
 #include <asm/mach/time.h>
 #include <asm/mach/map.h>
-#include <mach/clkdev.h>
 #include <mach/hardware.h>
 #include <mach/platform.h>
 #include <plat/timer-sp.h>
diff --git a/arch/arm/mach-vexpress/ct-ca9x4.c b/arch/arm/mach-vexpress/ct-ca9x4.c
index c2e405a9e025..26a02eb57571 100644
--- a/arch/arm/mach-vexpress/ct-ca9x4.c
+++ b/arch/arm/mach-vexpress/ct-ca9x4.c
@@ -8,8 +8,8 @@
 #include <linux/platform_device.h>
 #include <linux/amba/bus.h>
 #include <linux/amba/clcd.h>
+#include <linux/clkdev.h>
 
-#include <asm/clkdev.h>
 #include <asm/pgtable.h>
 #include <asm/hardware/arm_timer.h>
 #include <asm/hardware/cache-l2x0.h>
@@ -18,7 +18,6 @@
 #include <asm/pmu.h>
 #include <asm/smp_twd.h>
 
-#include <mach/clkdev.h>
 #include <mach/ct-ca9x4.h>
 
 #include <plat/timer-sp.h>
diff --git a/arch/arm/mach-vexpress/v2m.c b/arch/arm/mach-vexpress/v2m.c
index 7eaa232180a5..d374a78986e8 100644
--- a/arch/arm/mach-vexpress/v2m.c
+++ b/arch/arm/mach-vexpress/v2m.c
@@ -11,15 +11,14 @@
 #include <linux/spinlock.h>
 #include <linux/sysdev.h>
 #include <linux/usb/isp1760.h>
+#include <linux/clkdev.h>
 
-#include <asm/clkdev.h>
 #include <asm/sizes.h>
 #include <asm/mach/flash.h>
 #include <asm/mach/map.h>
 #include <asm/mach/time.h>
 #include <asm/hardware/arm_timer.h>
 
-#include <mach/clkdev.h>
 #include <mach/motherboard.h>
 
 #include <plat/timer-sp.h>
diff --git a/arch/arm/mach-w90x900/clock.h b/arch/arm/mach-w90x900/clock.h
index c56ddab3d912..b88a1b16b2e9 100644
--- a/arch/arm/mach-w90x900/clock.h
+++ b/arch/arm/mach-w90x900/clock.h
@@ -10,7 +10,7 @@
  * the Free Software Foundation; either version 2 of the License.
  */
 
-#include <asm/clkdev.h>
+#include <linux/clkdev.h>
 
 void nuc900_clk_enable(struct clk *clk, int enable);
 void nuc900_subclk_enable(struct clk *clk, int enable);
diff --git a/arch/arm/plat-omap/Kconfig b/arch/arm/plat-omap/Kconfig
index 92c5bb7909f5..c9408434a855 100644
--- a/arch/arm/plat-omap/Kconfig
+++ b/arch/arm/plat-omap/Kconfig
@@ -11,13 +11,13 @@ choice
 
 config ARCH_OMAP1
 	bool "TI OMAP1"
-	select COMMON_CLKDEV
+	select CLKDEV_LOOKUP
 	help
 	  "Systems based on omap7xx, omap15xx or omap16xx"
 
 config ARCH_OMAP2PLUS
 	bool "TI OMAP2/3/4"
-	select COMMON_CLKDEV
+	select CLKDEV_LOOKUP
 	help
 	  "Systems based on OMAP2, OMAP3 or OMAP4"
 
diff --git a/arch/arm/plat-omap/include/plat/clkdev_omap.h b/arch/arm/plat-omap/include/plat/clkdev_omap.h
index bb937f3fabed..4b2028ab4d2b 100644
--- a/arch/arm/plat-omap/include/plat/clkdev_omap.h
+++ b/arch/arm/plat-omap/include/plat/clkdev_omap.h
@@ -8,7 +8,7 @@
 #ifndef __ARCH_ARM_PLAT_OMAP_INCLUDE_PLAT_CLKDEV_OMAP_H
 #define __ARCH_ARM_PLAT_OMAP_INCLUDE_PLAT_CLKDEV_OMAP_H
 
-#include <asm/clkdev.h>
+#include <linux/clkdev.h>
 
 struct omap_clk {
 	u16				cpu;
diff --git a/arch/arm/plat-spear/include/plat/clock.h b/arch/arm/plat-spear/include/plat/clock.h
index 298bafc0a52f..2572260f990f 100644
--- a/arch/arm/plat-spear/include/plat/clock.h
+++ b/arch/arm/plat-spear/include/plat/clock.h
@@ -15,7 +15,7 @@
 #define __PLAT_CLOCK_H
 
 #include <linux/list.h>
-#include <asm/clkdev.h>
+#include <linux/clkdev.h>
 #include <linux/types.h>
 
 /* clk structure flags */
diff --git a/arch/arm/plat-stmp3xxx/clock.c b/arch/arm/plat-stmp3xxx/clock.c
index e593a2a801c6..2e712e17ce72 100644
--- a/arch/arm/plat-stmp3xxx/clock.c
+++ b/arch/arm/plat-stmp3xxx/clock.c
@@ -25,9 +25,9 @@
 #include <linux/err.h>
 #include <linux/delay.h>
 #include <linux/io.h>
+#include <linux/clkdev.h>
 
 #include <asm/mach-types.h>
-#include <asm/clkdev.h>
 #include <mach/platform.h>
 #include <mach/regs-clkctrl.h>
 
diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig
index 5c075f562eba..cfc510608039 100644
--- a/arch/sh/Kconfig
+++ b/arch/sh/Kconfig
@@ -1,7 +1,7 @@
 config SUPERH
 	def_bool y
 	select EMBEDDED
-	select HAVE_CLK
+	select CLKDEV_LOOKUP
 	select HAVE_IDE if HAS_IOPORT
 	select HAVE_MEMBLOCK
 	select HAVE_OPROFILE
diff --git a/arch/sh/boards/mach-highlander/setup.c b/arch/sh/boards/mach-highlander/setup.c
index a5ecfbacaf36..87618c91d178 100644
--- a/arch/sh/boards/mach-highlander/setup.c
+++ b/arch/sh/boards/mach-highlander/setup.c
@@ -24,10 +24,10 @@
 #include <linux/interrupt.h>
 #include <linux/usb/r8a66597.h>
 #include <linux/usb/m66592.h>
+#include <linux/clkdev.h>
 #include <net/ax88796.h>
 #include <asm/machvec.h>
 #include <mach/highlander.h>
-#include <asm/clkdev.h>
 #include <asm/clock.h>
 #include <asm/heartbeat.h>
 #include <asm/io.h>
diff --git a/arch/sh/include/asm/clkdev.h b/arch/sh/include/asm/clkdev.h
index 5645f358128b..6ba91868201c 100644
--- a/arch/sh/include/asm/clkdev.h
+++ b/arch/sh/include/asm/clkdev.h
@@ -1,9 +1,5 @@
 /*
- *  arch/sh/include/asm/clkdev.h
- *
- * Cloned from arch/arm/include/asm/clkdev.h:
- *
- *  Copyright (C) 2008 Russell King.
+ *  Copyright (C) 2010 Paul Mundt <lethal@linux-sh.org>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
@@ -11,25 +7,25 @@
  *
  * Helper for the clk API to assist looking up a struct clk.
  */
-#ifndef __ASM_CLKDEV_H
-#define __ASM_CLKDEV_H
 
-struct clk;
+#ifndef __CLKDEV__H_
+#define __CLKDEV__H_
 
-struct clk_lookup {
-	struct list_head	node;
-	const char		*dev_id;
-	const char		*con_id;
-	struct clk		*clk;
-};
+#include <linux/bootmem.h>
+#include <linux/mm.h>
+#include <linux/slab.h>
 
-struct clk_lookup *clkdev_alloc(struct clk *clk, const char *con_id,
-	const char *dev_fmt, ...);
+#include <asm/clock.h>
 
-void clkdev_add(struct clk_lookup *cl);
-void clkdev_drop(struct clk_lookup *cl);
+static inline struct clk_lookup_alloc *__clkdev_alloc(size_t size)
+{
+	if (!slab_is_available())
+		return alloc_bootmem_low_pages(size);
+	else
+		return kzalloc(size, GFP_KERNEL);
+}
 
-void clkdev_add_table(struct clk_lookup *, size_t);
-int clk_add_alias(const char *, const char *, char *, struct device *);
+#define __clk_put(clk)
+#define __clk_get(clk) ({ 1; })
 
-#endif
+#endif /* __CLKDEV_H__ */
diff --git a/arch/sh/kernel/Makefile b/arch/sh/kernel/Makefile
index 8eed6a485446..cf6522179523 100644
--- a/arch/sh/kernel/Makefile
+++ b/arch/sh/kernel/Makefile
@@ -11,7 +11,7 @@ endif
 
 CFLAGS_REMOVE_return_address.o = -pg
 
-obj-y	:= clkdev.o debugtraps.o dma-nommu.o dumpstack.o 		\
+obj-y	:= debugtraps.o dma-nommu.o dumpstack.o 		\
 	   idle.o io.o irq.o irq_$(BITS).o kdebugfs.o			\
 	   machvec.o nmi_debug.o process.o				\
 	   process_$(BITS).o ptrace.o ptrace_$(BITS).o			\
diff --git a/arch/sh/kernel/clkdev.c b/arch/sh/kernel/clkdev.c
deleted file mode 100644
index 1f800ef4a735..000000000000
--- a/arch/sh/kernel/clkdev.c
+++ /dev/null
@@ -1,171 +0,0 @@
-/*
- * arch/sh/kernel/clkdev.c
- *
- * Cloned from arch/arm/common/clkdev.c:
- *
- *  Copyright (C) 2008 Russell King.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * Helper for the clk API to assist looking up a struct clk.
- */
-#include <linux/module.h>
-#include <linux/kernel.h>
-#include <linux/device.h>
-#include <linux/list.h>
-#include <linux/errno.h>
-#include <linux/err.h>
-#include <linux/string.h>
-#include <linux/mutex.h>
-#include <linux/clk.h>
-#include <linux/slab.h>
-#include <linux/bootmem.h>
-#include <linux/mm.h>
-#include <asm/clock.h>
-#include <asm/clkdev.h>
-
-static LIST_HEAD(clocks);
-static DEFINE_MUTEX(clocks_mutex);
-
-/*
- * Find the correct struct clk for the device and connection ID.
- * We do slightly fuzzy matching here:
- *  An entry with a NULL ID is assumed to be a wildcard.
- *  If an entry has a device ID, it must match
- *  If an entry has a connection ID, it must match
- * Then we take the most specific entry - with the following
- * order of precedence: dev+con > dev only > con only.
- */
-static struct clk *clk_find(const char *dev_id, const char *con_id)
-{
-	struct clk_lookup *p;
-	struct clk *clk = NULL;
-	int match, best = 0;
-
-	list_for_each_entry(p, &clocks, node) {
-		match = 0;
-		if (p->dev_id) {
-			if (!dev_id || strcmp(p->dev_id, dev_id))
-				continue;
-			match += 2;
-		}
-		if (p->con_id) {
-			if (!con_id || strcmp(p->con_id, con_id))
-				continue;
-			match += 1;
-		}
-		if (match == 0)
-			continue;
-
-		if (match > best) {
-			clk = p->clk;
-			best = match;
-		}
-	}
-	return clk;
-}
-
-struct clk *clk_get_sys(const char *dev_id, const char *con_id)
-{
-	struct clk *clk;
-
-	mutex_lock(&clocks_mutex);
-	clk = clk_find(dev_id, con_id);
-	mutex_unlock(&clocks_mutex);
-
-	return clk ? clk : ERR_PTR(-ENOENT);
-}
-EXPORT_SYMBOL(clk_get_sys);
-
-void clkdev_add(struct clk_lookup *cl)
-{
-	mutex_lock(&clocks_mutex);
-	list_add_tail(&cl->node, &clocks);
-	mutex_unlock(&clocks_mutex);
-}
-EXPORT_SYMBOL(clkdev_add);
-
-void __init clkdev_add_table(struct clk_lookup *cl, size_t num)
-{
-	mutex_lock(&clocks_mutex);
-	while (num--) {
-		list_add_tail(&cl->node, &clocks);
-		cl++;
-	}
-	mutex_unlock(&clocks_mutex);
-}
-
-#define MAX_DEV_ID	20
-#define MAX_CON_ID	16
-
-struct clk_lookup_alloc {
-	struct clk_lookup cl;
-	char	dev_id[MAX_DEV_ID];
-	char	con_id[MAX_CON_ID];
-};
-
-struct clk_lookup * __init_refok
-clkdev_alloc(struct clk *clk, const char *con_id, const char *dev_fmt, ...)
-{
-	struct clk_lookup_alloc *cla;
-
-	if (!slab_is_available())
-		cla = alloc_bootmem_low_pages(sizeof(*cla));
-	else
-		cla = kzalloc(sizeof(*cla), GFP_KERNEL);
-
-	if (!cla)
-		return NULL;
-
-	cla->cl.clk = clk;
-	if (con_id) {
-		strlcpy(cla->con_id, con_id, sizeof(cla->con_id));
-		cla->cl.con_id = cla->con_id;
-	}
-
-	if (dev_fmt) {
-		va_list ap;
-
-		va_start(ap, dev_fmt);
-		vscnprintf(cla->dev_id, sizeof(cla->dev_id), dev_fmt, ap);
-		cla->cl.dev_id = cla->dev_id;
-		va_end(ap);
-	}
-
-	return &cla->cl;
-}
-EXPORT_SYMBOL(clkdev_alloc);
-
-int clk_add_alias(const char *alias, const char *alias_dev_name, char *id,
-	struct device *dev)
-{
-	struct clk *r = clk_get(dev, id);
-	struct clk_lookup *l;
-
-	if (IS_ERR(r))
-		return PTR_ERR(r);
-
-	l = clkdev_alloc(r, alias, alias_dev_name);
-	clk_put(r);
-	if (!l)
-		return -ENODEV;
-	clkdev_add(l);
-	return 0;
-}
-EXPORT_SYMBOL(clk_add_alias);
-
-/*
- * clkdev_drop - remove a clock dynamically allocated
- */
-void clkdev_drop(struct clk_lookup *cl)
-{
-	struct clk_lookup_alloc *cla = container_of(cl, struct clk_lookup_alloc, cl);
-
-	mutex_lock(&clocks_mutex);
-	list_del(&cl->node);
-	mutex_unlock(&clocks_mutex);
-	kfree(cla);
-}
-EXPORT_SYMBOL(clkdev_drop);
diff --git a/arch/sh/kernel/cpu/clock-cpg.c b/arch/sh/kernel/cpu/clock-cpg.c
index e2f63d68da51..dd0e0f211359 100644
--- a/arch/sh/kernel/cpu/clock-cpg.c
+++ b/arch/sh/kernel/cpu/clock-cpg.c
@@ -2,7 +2,7 @@
 #include <linux/compiler.h>
 #include <linux/slab.h>
 #include <linux/io.h>
-#include <asm/clkdev.h>
+#include <linux/clkdev.h>
 #include <asm/clock.h>
 
 static struct clk master_clk = {
diff --git a/arch/sh/kernel/cpu/clock.c b/arch/sh/kernel/cpu/clock.c
index 50f887dda565..4187cf4fe185 100644
--- a/arch/sh/kernel/cpu/clock.c
+++ b/arch/sh/kernel/cpu/clock.c
@@ -48,20 +48,4 @@ int __init clk_init(void)
 	return ret;
 }
 
-/*
- * Returns a clock. Note that we first try to use device id on the bus
- * and clock name. If this fails, we try to use clock name only.
- */
-struct clk *clk_get(struct device *dev, const char *con_id)
-{
-	const char *dev_id = dev ? dev_name(dev) : NULL;
-
-	return clk_get_sys(dev_id, con_id);
-}
-EXPORT_SYMBOL_GPL(clk_get);
-
-void clk_put(struct clk *clk)
-{
-}
-EXPORT_SYMBOL_GPL(clk_put);
 
diff --git a/arch/sh/kernel/cpu/sh4/clock-sh4-202.c b/arch/sh/kernel/cpu/sh4/clock-sh4-202.c
index 4eabc68cd753..6c1492b8431a 100644
--- a/arch/sh/kernel/cpu/sh4/clock-sh4-202.c
+++ b/arch/sh/kernel/cpu/sh4/clock-sh4-202.c
@@ -13,7 +13,7 @@
 #include <linux/kernel.h>
 #include <linux/err.h>
 #include <linux/io.h>
-#include <asm/clkdev.h>
+#include <linux/clkdev.h>
 #include <asm/clock.h>
 #include <asm/freq.h>
 
diff --git a/arch/sh/kernel/cpu/sh4a/clock-sh7343.c b/arch/sh/kernel/cpu/sh4a/clock-sh7343.c
index 71291ae201b9..93c646072c1b 100644
--- a/arch/sh/kernel/cpu/sh4a/clock-sh7343.c
+++ b/arch/sh/kernel/cpu/sh4a/clock-sh7343.c
@@ -21,7 +21,7 @@
 #include <linux/init.h>
 #include <linux/kernel.h>
 #include <linux/io.h>
-#include <asm/clkdev.h>
+#include <linux/clkdev.h>
 #include <asm/clock.h>
 
 /* SH7343 registers */
diff --git a/arch/sh/kernel/cpu/sh4a/clock-sh7366.c b/arch/sh/kernel/cpu/sh4a/clock-sh7366.c
index 7ce5bbcd4084..049dc0628ccc 100644
--- a/arch/sh/kernel/cpu/sh4a/clock-sh7366.c
+++ b/arch/sh/kernel/cpu/sh4a/clock-sh7366.c
@@ -21,7 +21,7 @@
 #include <linux/init.h>
 #include <linux/kernel.h>
 #include <linux/io.h>
-#include <asm/clkdev.h>
+#include <linux/clkdev.h>
 #include <asm/clock.h>
 
 /* SH7366 registers */
diff --git a/arch/sh/kernel/cpu/sh4a/clock-sh7722.c b/arch/sh/kernel/cpu/sh4a/clock-sh7722.c
index 2030f3d9fac7..9d23a36f0647 100644
--- a/arch/sh/kernel/cpu/sh4a/clock-sh7722.c
+++ b/arch/sh/kernel/cpu/sh4a/clock-sh7722.c
@@ -21,7 +21,7 @@
 #include <linux/init.h>
 #include <linux/kernel.h>
 #include <linux/io.h>
-#include <asm/clkdev.h>
+#include <linux/clkdev.h>
 #include <asm/clock.h>
 #include <asm/hwblk.h>
 #include <cpu/sh7722.h>
diff --git a/arch/sh/kernel/cpu/sh4a/clock-sh7723.c b/arch/sh/kernel/cpu/sh4a/clock-sh7723.c
index d3938f0d3702..55493cd5bd8f 100644
--- a/arch/sh/kernel/cpu/sh4a/clock-sh7723.c
+++ b/arch/sh/kernel/cpu/sh4a/clock-sh7723.c
@@ -22,7 +22,7 @@
 #include <linux/kernel.h>
 #include <linux/io.h>
 #include <linux/clk.h>
-#include <asm/clkdev.h>
+#include <linux/clkdev.h>
 #include <asm/clock.h>
 #include <asm/hwblk.h>
 #include <cpu/sh7723.h>
diff --git a/arch/sh/kernel/cpu/sh4a/clock-sh7724.c b/arch/sh/kernel/cpu/sh4a/clock-sh7724.c
index 2d9700c6b53a..527936bb3ce0 100644
--- a/arch/sh/kernel/cpu/sh4a/clock-sh7724.c
+++ b/arch/sh/kernel/cpu/sh4a/clock-sh7724.c
@@ -22,7 +22,7 @@
 #include <linux/kernel.h>
 #include <linux/io.h>
 #include <linux/clk.h>
-#include <asm/clkdev.h>
+#include <linux/clkdev.h>
 #include <asm/clock.h>
 #include <asm/hwblk.h>
 #include <cpu/sh7724.h>
diff --git a/arch/sh/kernel/cpu/sh4a/clock-sh7757.c b/arch/sh/kernel/cpu/sh4a/clock-sh7757.c
index ce39a2ae8c6c..e073e3eb4c3d 100644
--- a/arch/sh/kernel/cpu/sh4a/clock-sh7757.c
+++ b/arch/sh/kernel/cpu/sh4a/clock-sh7757.c
@@ -12,7 +12,7 @@
 #include <linux/init.h>
 #include <linux/kernel.h>
 #include <linux/io.h>
-#include <asm/clkdev.h>
+#include <linux/clkdev.h>
 #include <asm/clock.h>
 #include <asm/freq.h>
 
diff --git a/arch/sh/kernel/cpu/sh4a/clock-sh7763.c b/arch/sh/kernel/cpu/sh4a/clock-sh7763.c
index 1f1df48008cd..599630fc4d3b 100644
--- a/arch/sh/kernel/cpu/sh4a/clock-sh7763.c
+++ b/arch/sh/kernel/cpu/sh4a/clock-sh7763.c
@@ -13,7 +13,7 @@
 #include <linux/init.h>
 #include <linux/kernel.h>
 #include <linux/io.h>
-#include <asm/clkdev.h>
+#include <linux/clkdev.h>
 #include <asm/clock.h>
 #include <asm/freq.h>
 #include <asm/io.h>
diff --git a/arch/sh/kernel/cpu/sh4a/clock-sh7780.c b/arch/sh/kernel/cpu/sh4a/clock-sh7780.c
index 62d706350060..8894926479a6 100644
--- a/arch/sh/kernel/cpu/sh4a/clock-sh7780.c
+++ b/arch/sh/kernel/cpu/sh4a/clock-sh7780.c
@@ -12,7 +12,7 @@
 #include <linux/init.h>
 #include <linux/kernel.h>
 #include <linux/io.h>
-#include <asm/clkdev.h>
+#include <linux/clkdev.h>
 #include <asm/clock.h>
 #include <asm/freq.h>
 #include <asm/io.h>
diff --git a/arch/sh/kernel/cpu/sh4a/clock-sh7785.c b/arch/sh/kernel/cpu/sh4a/clock-sh7785.c
index c3e458aaa2b7..2d960247f3eb 100644
--- a/arch/sh/kernel/cpu/sh4a/clock-sh7785.c
+++ b/arch/sh/kernel/cpu/sh4a/clock-sh7785.c
@@ -14,7 +14,7 @@
 #include <linux/clk.h>
 #include <linux/io.h>
 #include <linux/cpufreq.h>
-#include <asm/clkdev.h>
+#include <linux/clkdev.h>
 #include <asm/clock.h>
 #include <asm/freq.h>
 #include <cpu/sh7785.h>
diff --git a/arch/sh/kernel/cpu/sh4a/clock-sh7786.c b/arch/sh/kernel/cpu/sh4a/clock-sh7786.c
index 597c9fbe49c6..42e403be9076 100644
--- a/arch/sh/kernel/cpu/sh4a/clock-sh7786.c
+++ b/arch/sh/kernel/cpu/sh4a/clock-sh7786.c
@@ -13,7 +13,7 @@
 #include <linux/kernel.h>
 #include <linux/clk.h>
 #include <linux/io.h>
-#include <asm/clkdev.h>
+#include <linux/clkdev.h>
 #include <asm/clock.h>
 #include <asm/freq.h>
 
diff --git a/arch/sh/kernel/cpu/sh4a/clock-shx3.c b/arch/sh/kernel/cpu/sh4a/clock-shx3.c
index 4f70df6b6169..1afdb93b8ccb 100644
--- a/arch/sh/kernel/cpu/sh4a/clock-shx3.c
+++ b/arch/sh/kernel/cpu/sh4a/clock-shx3.c
@@ -14,7 +14,7 @@
 #include <linux/init.h>
 #include <linux/kernel.h>
 #include <linux/io.h>
-#include <asm/clkdev.h>
+#include <linux/clkdev.h>
 #include <asm/clock.h>
 #include <asm/freq.h>
 
diff --git a/drivers/Kconfig b/drivers/Kconfig
index a2b902f4d437..3d93b3a3d630 100644
--- a/drivers/Kconfig
+++ b/drivers/Kconfig
@@ -111,4 +111,6 @@ source "drivers/xen/Kconfig"
 source "drivers/staging/Kconfig"
 
 source "drivers/platform/Kconfig"
+
+source "drivers/clk/Kconfig"
 endmenu
diff --git a/drivers/Makefile b/drivers/Makefile
index 14cf9077bb2b..4af7d5b124ce 100644
--- a/drivers/Makefile
+++ b/drivers/Makefile
@@ -114,3 +114,5 @@ obj-$(CONFIG_VLYNQ)		+= vlynq/
 obj-$(CONFIG_STAGING)		+= staging/
 obj-y				+= platform/
 obj-y				+= ieee802154/
+#common clk code
+obj-y				+= clk/
diff --git a/drivers/clk/Kconfig b/drivers/clk/Kconfig
new file mode 100644
index 000000000000..4168c8896e16
--- /dev/null
+++ b/drivers/clk/Kconfig
@@ -0,0 +1,4 @@
+
+config CLKDEV_LOOKUP
+	bool
+	select HAVE_CLK
diff --git a/drivers/clk/Makefile b/drivers/clk/Makefile
new file mode 100644
index 000000000000..07613fa172c9
--- /dev/null
+++ b/drivers/clk/Makefile
@@ -0,0 +1,2 @@
+
+obj-$(CONFIG_CLKDEV_LOOKUP)	+= clkdev.o
diff --git a/drivers/clk/clkdev.c b/drivers/clk/clkdev.c
new file mode 100644
index 000000000000..0fc0a79852de
--- /dev/null
+++ b/drivers/clk/clkdev.c
@@ -0,0 +1,176 @@
+/*
+ * drivers/clk/clkdev.c
+ *
+ *  Copyright (C) 2008 Russell King.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Helper for the clk API to assist looking up a struct clk.
+ */
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/device.h>
+#include <linux/list.h>
+#include <linux/errno.h>
+#include <linux/err.h>
+#include <linux/string.h>
+#include <linux/mutex.h>
+#include <linux/clk.h>
+#include <linux/clkdev.h>
+
+static LIST_HEAD(clocks);
+static DEFINE_MUTEX(clocks_mutex);
+
+/*
+ * Find the correct struct clk for the device and connection ID.
+ * We do slightly fuzzy matching here:
+ *  An entry with a NULL ID is assumed to be a wildcard.
+ *  If an entry has a device ID, it must match
+ *  If an entry has a connection ID, it must match
+ * Then we take the most specific entry - with the following
+ * order of precedence: dev+con > dev only > con only.
+ */
+static struct clk *clk_find(const char *dev_id, const char *con_id)
+{
+	struct clk_lookup *p;
+	struct clk *clk = NULL;
+	int match, best = 0;
+
+	list_for_each_entry(p, &clocks, node) {
+		match = 0;
+		if (p->dev_id) {
+			if (!dev_id || strcmp(p->dev_id, dev_id))
+				continue;
+			match += 2;
+		}
+		if (p->con_id) {
+			if (!con_id || strcmp(p->con_id, con_id))
+				continue;
+			match += 1;
+		}
+
+		if (match > best) {
+			clk = p->clk;
+			if (match != 3)
+				best = match;
+			else
+				break;
+		}
+	}
+	return clk;
+}
+
+struct clk *clk_get_sys(const char *dev_id, const char *con_id)
+{
+	struct clk *clk;
+
+	mutex_lock(&clocks_mutex);
+	clk = clk_find(dev_id, con_id);
+	if (clk && !__clk_get(clk))
+		clk = NULL;
+	mutex_unlock(&clocks_mutex);
+
+	return clk ? clk : ERR_PTR(-ENOENT);
+}
+EXPORT_SYMBOL(clk_get_sys);
+
+struct clk *clk_get(struct device *dev, const char *con_id)
+{
+	const char *dev_id = dev ? dev_name(dev) : NULL;
+
+	return clk_get_sys(dev_id, con_id);
+}
+EXPORT_SYMBOL(clk_get);
+
+void clk_put(struct clk *clk)
+{
+	__clk_put(clk);
+}
+EXPORT_SYMBOL(clk_put);
+
+void clkdev_add(struct clk_lookup *cl)
+{
+	mutex_lock(&clocks_mutex);
+	list_add_tail(&cl->node, &clocks);
+	mutex_unlock(&clocks_mutex);
+}
+EXPORT_SYMBOL(clkdev_add);
+
+void __init clkdev_add_table(struct clk_lookup *cl, size_t num)
+{
+	mutex_lock(&clocks_mutex);
+	while (num--) {
+		list_add_tail(&cl->node, &clocks);
+		cl++;
+	}
+	mutex_unlock(&clocks_mutex);
+}
+
+#define MAX_DEV_ID	20
+#define MAX_CON_ID	16
+
+struct clk_lookup_alloc {
+	struct clk_lookup cl;
+	char	dev_id[MAX_DEV_ID];
+	char	con_id[MAX_CON_ID];
+};
+
+struct clk_lookup * __init_refok
+clkdev_alloc(struct clk *clk, const char *con_id, const char *dev_fmt, ...)
+{
+	struct clk_lookup_alloc *cla;
+
+	cla = __clkdev_alloc(sizeof(*cla));
+	if (!cla)
+		return NULL;
+
+	cla->cl.clk = clk;
+	if (con_id) {
+		strlcpy(cla->con_id, con_id, sizeof(cla->con_id));
+		cla->cl.con_id = cla->con_id;
+	}
+
+	if (dev_fmt) {
+		va_list ap;
+
+		va_start(ap, dev_fmt);
+		vscnprintf(cla->dev_id, sizeof(cla->dev_id), dev_fmt, ap);
+		cla->cl.dev_id = cla->dev_id;
+		va_end(ap);
+	}
+
+	return &cla->cl;
+}
+EXPORT_SYMBOL(clkdev_alloc);
+
+int clk_add_alias(const char *alias, const char *alias_dev_name, char *id,
+	struct device *dev)
+{
+	struct clk *r = clk_get(dev, id);
+	struct clk_lookup *l;
+
+	if (IS_ERR(r))
+		return PTR_ERR(r);
+
+	l = clkdev_alloc(r, alias, alias_dev_name);
+	clk_put(r);
+	if (!l)
+		return -ENODEV;
+	clkdev_add(l);
+	return 0;
+}
+EXPORT_SYMBOL(clk_add_alias);
+
+/*
+ * clkdev_drop - remove a clock dynamically allocated
+ */
+void clkdev_drop(struct clk_lookup *cl)
+{
+	mutex_lock(&clocks_mutex);
+	list_del(&cl->node);
+	mutex_unlock(&clocks_mutex);
+	kfree(cl);
+}
+EXPORT_SYMBOL(clkdev_drop);
diff --git a/include/linux/clkdev.h b/include/linux/clkdev.h
new file mode 100644
index 000000000000..457bcb0a310a
--- /dev/null
+++ b/include/linux/clkdev.h
@@ -0,0 +1,36 @@
+/*
+ *  include/linux/clkdev.h
+ *
+ *  Copyright (C) 2008 Russell King.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Helper for the clk API to assist looking up a struct clk.
+ */
+#ifndef __CLKDEV_H
+#define __CLKDEV_H
+
+#include <asm/clkdev.h>
+
+struct clk;
+struct device;
+
+struct clk_lookup {
+	struct list_head	node;
+	const char		*dev_id;
+	const char		*con_id;
+	struct clk		*clk;
+};
+
+struct clk_lookup *clkdev_alloc(struct clk *clk, const char *con_id,
+	const char *dev_fmt, ...);
+
+void clkdev_add(struct clk_lookup *cl);
+void clkdev_drop(struct clk_lookup *cl);
+
+void clkdev_add_table(struct clk_lookup *, size_t);
+int clk_add_alias(const char *, const char *, char *, struct device *);
+
+#endif
-- 
cgit v1.2.3-71-gd317


From 65500fa94aaeb3475e39c0c5180f188014164ca4 Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@stericsson.com>
Date: Thu, 4 Nov 2010 13:06:59 +0100
Subject: ARM: 6467/1: amba: optional PrimeCell core voltage switch

On some contemporary sub-micron SoCs, peripherals on the chip have
power domain switches, i.e. the voltage to the core may be turned
off to conserve power. In the Ux500 we have this for out PrimeCell
derivates.

This patch makes it possible to specify an (optional) regulator to
handle the voltage domain switch on AMBA PrimeCells, modeled very
similar to how block clocks are handled.

Additional amba_vcore_[enable|disable] calls are supplied to make
it possible introduce optional powering off of the core voltage.
Using this will require code to spool/unspool any core HW state.

Cc: Rabin Vincent <rabin.vincent@stericsson.com>
Cc: Bengt Jonsson <bengt.g.jonsson@stericsson.com>
Cc: Jonas Aaberg <jonas.aberg@stericsson.com>
Signed-off-by: Linus Walleij <linus.walleij@stericsson.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 drivers/amba/bus.c       | 39 +++++++++++++++++++++++++++++++++++++++
 include/linux/amba/bus.h |  8 ++++++++
 2 files changed, 47 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/amba/bus.c b/drivers/amba/bus.c
index 2737b9752205..e7df019d29d4 100644
--- a/drivers/amba/bus.c
+++ b/drivers/amba/bus.c
@@ -147,6 +147,39 @@ static void amba_put_disable_pclk(struct amba_device *pcdev)
 	clk_put(pclk);
 }
 
+static int amba_get_enable_vcore(struct amba_device *pcdev)
+{
+	struct regulator *vcore = regulator_get(&pcdev->dev, "vcore");
+	int ret;
+
+	pcdev->vcore = vcore;
+
+	if (IS_ERR(vcore)) {
+		/* It is OK not to supply a vcore regulator */
+		if (PTR_ERR(vcore) == -ENODEV)
+			return 0;
+		return PTR_ERR(vcore);
+	}
+
+	ret = regulator_enable(vcore);
+	if (ret) {
+		regulator_put(vcore);
+		pcdev->vcore = ERR_PTR(-ENODEV);
+	}
+
+	return ret;
+}
+
+static void amba_put_disable_vcore(struct amba_device *pcdev)
+{
+	struct regulator *vcore = pcdev->vcore;
+
+	if (!IS_ERR(vcore)) {
+		regulator_disable(vcore);
+		regulator_put(vcore);
+	}
+}
+
 /*
  * These are the device model conversion veneers; they convert the
  * device model structures to our more specific structures.
@@ -159,6 +192,10 @@ static int amba_probe(struct device *dev)
 	int ret;
 
 	do {
+		ret = amba_get_enable_vcore(pcdev);
+		if (ret)
+			break;
+
 		ret = amba_get_enable_pclk(pcdev);
 		if (ret)
 			break;
@@ -168,6 +205,7 @@ static int amba_probe(struct device *dev)
 			break;
 
 		amba_put_disable_pclk(pcdev);
+		amba_put_disable_vcore(pcdev);
 	} while (0);
 
 	return ret;
@@ -180,6 +218,7 @@ static int amba_remove(struct device *dev)
 	int ret = drv->remove(pcdev);
 
 	amba_put_disable_pclk(pcdev);
+	amba_put_disable_vcore(pcdev);
 
 	return ret;
 }
diff --git a/include/linux/amba/bus.h b/include/linux/amba/bus.h
index c6454cca0447..9e7f259346e1 100644
--- a/include/linux/amba/bus.h
+++ b/include/linux/amba/bus.h
@@ -18,6 +18,7 @@
 #include <linux/device.h>
 #include <linux/err.h>
 #include <linux/resource.h>
+#include <linux/regulator/consumer.h>
 
 #define AMBA_NR_IRQS	2
 #define AMBA_CID	0xb105f00d
@@ -28,6 +29,7 @@ struct amba_device {
 	struct device		dev;
 	struct resource		res;
 	struct clk		*pclk;
+	struct regulator	*vcore;
 	u64			dma_mask;
 	unsigned int		periphid;
 	unsigned int		irq[AMBA_NR_IRQS];
@@ -71,6 +73,12 @@ void amba_release_regions(struct amba_device *);
 #define amba_pclk_disable(d)	\
 	do { if (!IS_ERR((d)->pclk)) clk_disable((d)->pclk); } while (0)
 
+#define amba_vcore_enable(d)	\
+	(IS_ERR((d)->vcore) ? 0 : regulator_enable((d)->vcore))
+
+#define amba_vcore_disable(d)	\
+	do { if (!IS_ERR((d)->vcore)) regulator_disable((d)->vcore); } while (0)
+
 /* Some drivers don't use the struct amba_device */
 #define AMBA_CONFIG_BITS(a) (((a) >> 24) & 0xff)
 #define AMBA_REV_BITS(a) (((a) >> 20) & 0x0f)
-- 
cgit v1.2.3-71-gd317


From 335d7afbfb71faac833734a94240c1e07cf0ead8 Mon Sep 17 00:00:00 2001
From: Gerald Schaefer <gerald.schaefer@de.ibm.com>
Date: Mon, 22 Nov 2010 15:47:36 +0100
Subject: mutexes, sched: Introduce arch_mutex_cpu_relax()

The spinning mutex implementation uses cpu_relax() in busy loops as a
compiler barrier. Depending on the architecture, cpu_relax() may do more
than needed in this specific mutex spin loops. On System z we also give
up the time slice of the virtual cpu in cpu_relax(), which prevents
effective spinning on the mutex.

This patch replaces cpu_relax() in the spinning mutex code with
arch_mutex_cpu_relax(), which can be defined by each architecture that
selects HAVE_ARCH_MUTEX_CPU_RELAX. The default is still cpu_relax(), so
this patch should not affect other architectures than System z for now.

Signed-off-by: Gerald Schaefer <gerald.schaefer@de.ibm.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <1290437256.7455.4.camel@thinkpad>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/Kconfig                  | 3 +++
 arch/s390/Kconfig             | 1 +
 arch/s390/include/asm/mutex.h | 2 ++
 include/linux/mutex.h         | 4 ++++
 kernel/mutex.c                | 2 +-
 kernel/sched.c                | 3 ++-
 6 files changed, 13 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/arch/Kconfig b/arch/Kconfig
index 8bf0fa652eb6..f78c2be4242b 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -175,4 +175,7 @@ config HAVE_PERF_EVENTS_NMI
 config HAVE_ARCH_JUMP_LABEL
 	bool
 
+config HAVE_ARCH_MUTEX_CPU_RELAX
+	bool
+
 source "kernel/gcov/Kconfig"
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index e0b98e71ff47..6c6d7b339aae 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -99,6 +99,7 @@ config S390
 	select HAVE_KERNEL_LZMA
 	select HAVE_KERNEL_LZO
 	select HAVE_GET_USER_PAGES_FAST
+	select HAVE_ARCH_MUTEX_CPU_RELAX
 	select ARCH_INLINE_SPIN_TRYLOCK
 	select ARCH_INLINE_SPIN_TRYLOCK_BH
 	select ARCH_INLINE_SPIN_LOCK
diff --git a/arch/s390/include/asm/mutex.h b/arch/s390/include/asm/mutex.h
index 458c1f7fbc18..688271f5f2e4 100644
--- a/arch/s390/include/asm/mutex.h
+++ b/arch/s390/include/asm/mutex.h
@@ -7,3 +7,5 @@
  */
 
 #include <asm-generic/mutex-dec.h>
+
+#define arch_mutex_cpu_relax()	barrier()
diff --git a/include/linux/mutex.h b/include/linux/mutex.h
index f363bc8fdc74..94b48bd40dd7 100644
--- a/include/linux/mutex.h
+++ b/include/linux/mutex.h
@@ -160,4 +160,8 @@ extern int mutex_trylock(struct mutex *lock);
 extern void mutex_unlock(struct mutex *lock);
 extern int atomic_dec_and_mutex_lock(atomic_t *cnt, struct mutex *lock);
 
+#ifndef CONFIG_HAVE_ARCH_MUTEX_CPU_RELAX
+#define arch_mutex_cpu_relax()	cpu_relax()
+#endif
+
 #endif
diff --git a/kernel/mutex.c b/kernel/mutex.c
index 200407c1502f..a5889fb28ecf 100644
--- a/kernel/mutex.c
+++ b/kernel/mutex.c
@@ -199,7 +199,7 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
 		 * memory barriers as we'll eventually observe the right
 		 * values at the cost of a few extra spins.
 		 */
-		cpu_relax();
+		arch_mutex_cpu_relax();
 	}
 #endif
 	spin_lock_mutex(&lock->wait_lock, flags);
diff --git a/kernel/sched.c b/kernel/sched.c
index 3e8a7db951a6..abe7aec55763 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -75,6 +75,7 @@
 
 #include <asm/tlb.h>
 #include <asm/irq_regs.h>
+#include <asm/mutex.h>
 
 #include "sched_cpupri.h"
 #include "workqueue_sched.h"
@@ -3888,7 +3889,7 @@ int mutex_spin_on_owner(struct mutex *lock, struct thread_info *owner)
 		if (task_thread_info(rq->curr) != owner || need_resched())
 			return 0;
 
-		cpu_relax();
+		arch_mutex_cpu_relax();
 	}
 
 	return 1;
-- 
cgit v1.2.3-71-gd317


From 6c7e550f13f8ad82efb6a5653ae628c2543c1768 Mon Sep 17 00:00:00 2001
From: Franck Bui-Huu <fbuihuu@gmail.com>
Date: Tue, 23 Nov 2010 16:21:43 +0100
Subject: perf: Introduce is_sampling_event()

and use it when appropriate.

Signed-off-by: Franck Bui-Huu <fbuihuu@gmail.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <1290525705-6265-1-git-send-email-fbuihuu@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/cpu/perf_event.c |  2 +-
 include/linux/perf_event.h       |  5 +++++
 kernel/perf_event.c              | 10 +++++-----
 3 files changed, 11 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index 7c1a4c35fd41..c01dfec635db 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -442,7 +442,7 @@ static int x86_setup_perfctr(struct perf_event *event)
 	struct hw_perf_event *hwc = &event->hw;
 	u64 config;
 
-	if (!hwc->sample_period) {
+	if (!is_sampling_event(event)) {
 		hwc->sample_period = x86_pmu.max_period;
 		hwc->last_period = hwc->sample_period;
 		local64_set(&hwc->period_left, hwc->sample_period);
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index de2c41758e29..cbf04cc1e630 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -969,6 +969,11 @@ extern int perf_event_overflow(struct perf_event *event, int nmi,
 				 struct perf_sample_data *data,
 				 struct pt_regs *regs);
 
+static inline bool is_sampling_event(struct perf_event *event)
+{
+	return event->attr.sample_period != 0;
+}
+
 /*
  * Return 1 for a software event, 0 for a hardware event
  */
diff --git a/kernel/perf_event.c b/kernel/perf_event.c
index 43f757ccf831..880698488c91 100644
--- a/kernel/perf_event.c
+++ b/kernel/perf_event.c
@@ -2514,7 +2514,7 @@ static int perf_event_period(struct perf_event *event, u64 __user *arg)
 	int ret = 0;
 	u64 value;
 
-	if (!event->attr.sample_period)
+	if (!is_sampling_event(event))
 		return -EINVAL;
 
 	if (copy_from_user(&value, arg, sizeof(value)))
@@ -4385,7 +4385,7 @@ static void perf_swevent_event(struct perf_event *event, u64 nr,
 	if (!regs)
 		return;
 
-	if (!hwc->sample_period)
+	if (!is_sampling_event(event))
 		return;
 
 	if (nr == 1 && hwc->sample_period == 1 && !event->attr.freq)
@@ -4548,7 +4548,7 @@ static int perf_swevent_add(struct perf_event *event, int flags)
 	struct hw_perf_event *hwc = &event->hw;
 	struct hlist_head *head;
 
-	if (hwc->sample_period) {
+	if (is_sampling_event(event)) {
 		hwc->last_period = hwc->sample_period;
 		perf_swevent_set_period(event);
 	}
@@ -4920,7 +4920,7 @@ static void perf_swevent_start_hrtimer(struct perf_event *event)
 
 	hrtimer_init(&hwc->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
 	hwc->hrtimer.function = perf_swevent_hrtimer;
-	if (hwc->sample_period) {
+	if (is_sampling_event(event)) {
 		s64 period = local64_read(&hwc->period_left);
 
 		if (period) {
@@ -4941,7 +4941,7 @@ static void perf_swevent_cancel_hrtimer(struct perf_event *event)
 {
 	struct hw_perf_event *hwc = &event->hw;
 
-	if (hwc->sample_period) {
+	if (is_sampling_event(event)) {
 		ktime_t remaining = hrtimer_get_remaining(&hwc->hrtimer);
 		local64_set(&hwc->period_left, ktime_to_ns(remaining));
 
-- 
cgit v1.2.3-71-gd317


From 004417a6d468e24399e383645c068b498eed84ad Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Thu, 25 Nov 2010 18:38:29 +0100
Subject: perf, arch: Cleanup perf-pmu init vs lockup-detector

The perf hardware pmu got initialized at various points in the boot,
some before early_initcall() some after (notably arch_initcall).

The problem is that the NMI lockup detector is ran from early_initcall()
and expects the hardware pmu to be present.

Sanitize this by moving all architecture hardware pmu implementations to
initialize at early_initcall() and move the lockup detector to an explicit
initcall right after that.

Cc: paulus <paulus@samba.org>
Cc: davem <davem@davemloft.net>
Cc: Michael Cree <mcree@orcon.net.nz>
Cc: Deng-Cheng Zhu <dengcheng.zhu@gmail.com>
Acked-by: Paul Mundt <lethal@linux-sh.org>
Acked-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <1290707759.2145.119.camel@laptop>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/alpha/include/asm/perf_event.h  |  6 ------
 arch/alpha/kernel/irq_alpha.c        |  2 --
 arch/alpha/kernel/perf_event.c       |  9 ++++++---
 arch/arm/kernel/perf_event.c         |  2 +-
 arch/mips/kernel/perf_event_mipsxx.c |  2 +-
 arch/powerpc/kernel/e500-pmu.c       |  2 +-
 arch/powerpc/kernel/mpc7450-pmu.c    |  2 +-
 arch/powerpc/kernel/power4-pmu.c     |  2 +-
 arch/powerpc/kernel/power5+-pmu.c    |  2 +-
 arch/powerpc/kernel/power5-pmu.c     |  2 +-
 arch/powerpc/kernel/power6-pmu.c     |  2 +-
 arch/powerpc/kernel/power7-pmu.c     |  2 +-
 arch/powerpc/kernel/ppc970-pmu.c     |  2 +-
 arch/sh/kernel/cpu/sh4/perf_event.c  |  2 +-
 arch/sh/kernel/cpu/sh4a/perf_event.c |  2 +-
 arch/sparc/include/asm/perf_event.h  |  4 ----
 arch/sparc/kernel/nmi.c              |  2 --
 arch/sparc/kernel/perf_event.c       |  7 +++++--
 arch/x86/include/asm/perf_event.h    |  2 --
 arch/x86/kernel/cpu/common.c         |  1 -
 arch/x86/kernel/cpu/perf_event.c     | 11 +++++++----
 include/linux/sched.h                |  4 ++++
 init/main.c                          |  1 +
 kernel/watchdog.c                    |  7 +++----
 24 files changed, 38 insertions(+), 42 deletions(-)

(limited to 'include/linux')

diff --git a/arch/alpha/include/asm/perf_event.h b/arch/alpha/include/asm/perf_event.h
index fe792ca818f6..5996e7a6757e 100644
--- a/arch/alpha/include/asm/perf_event.h
+++ b/arch/alpha/include/asm/perf_event.h
@@ -1,10 +1,4 @@
 #ifndef __ASM_ALPHA_PERF_EVENT_H
 #define __ASM_ALPHA_PERF_EVENT_H
 
-#ifdef CONFIG_PERF_EVENTS
-extern void init_hw_perf_events(void);
-#else
-static inline void init_hw_perf_events(void)    { }
-#endif
-
 #endif /* __ASM_ALPHA_PERF_EVENT_H */
diff --git a/arch/alpha/kernel/irq_alpha.c b/arch/alpha/kernel/irq_alpha.c
index 5f77afb88e89..4c8bb374eb0a 100644
--- a/arch/alpha/kernel/irq_alpha.c
+++ b/arch/alpha/kernel/irq_alpha.c
@@ -112,8 +112,6 @@ init_IRQ(void)
 	wrent(entInt, 0);
 
 	alpha_mv.init_irq();
-
-	init_hw_perf_events();
 }
 
 /*
diff --git a/arch/alpha/kernel/perf_event.c b/arch/alpha/kernel/perf_event.c
index 1cc49683fb69..3283059b6e85 100644
--- a/arch/alpha/kernel/perf_event.c
+++ b/arch/alpha/kernel/perf_event.c
@@ -14,6 +14,7 @@
 #include <linux/kernel.h>
 #include <linux/kdebug.h>
 #include <linux/mutex.h>
+#include <linux/init.h>
 
 #include <asm/hwrpb.h>
 #include <asm/atomic.h>
@@ -863,13 +864,13 @@ static void alpha_perf_event_irq_handler(unsigned long la_ptr,
 /*
  * Init call to initialise performance events at kernel startup.
  */
-void __init init_hw_perf_events(void)
+int __init init_hw_perf_events(void)
 {
 	pr_info("Performance events: ");
 
 	if (!supported_cpu()) {
 		pr_cont("No support for your CPU.\n");
-		return;
+		return 0;
 	}
 
 	pr_cont("Supported CPU type!\n");
@@ -882,5 +883,7 @@ void __init init_hw_perf_events(void)
 	alpha_pmu = &ev67_pmu;
 
 	perf_pmu_register(&pmu);
-}
 
+	return 0;
+}
+early_initcall(init_hw_perf_events);
diff --git a/arch/arm/kernel/perf_event.c b/arch/arm/kernel/perf_event.c
index 07a50357492a..d45f70e5f2ee 100644
--- a/arch/arm/kernel/perf_event.c
+++ b/arch/arm/kernel/perf_event.c
@@ -3038,7 +3038,7 @@ init_hw_perf_events(void)
 
 	return 0;
 }
-arch_initcall(init_hw_perf_events);
+early_initcall(init_hw_perf_events);
 
 /*
  * Callchain handling code.
diff --git a/arch/mips/kernel/perf_event_mipsxx.c b/arch/mips/kernel/perf_event_mipsxx.c
index 5c7c6fc07565..183e0d226669 100644
--- a/arch/mips/kernel/perf_event_mipsxx.c
+++ b/arch/mips/kernel/perf_event_mipsxx.c
@@ -1047,6 +1047,6 @@ init_hw_perf_events(void)
 
 	return 0;
 }
-arch_initcall(init_hw_perf_events);
+early_initcall(init_hw_perf_events);
 
 #endif /* defined(CONFIG_CPU_MIPS32)... */
diff --git a/arch/powerpc/kernel/e500-pmu.c b/arch/powerpc/kernel/e500-pmu.c
index 7c07de0d8943..b150b510510f 100644
--- a/arch/powerpc/kernel/e500-pmu.c
+++ b/arch/powerpc/kernel/e500-pmu.c
@@ -126,4 +126,4 @@ static int init_e500_pmu(void)
 	return register_fsl_emb_pmu(&e500_pmu);
 }
 
-arch_initcall(init_e500_pmu);
+early_initcall(init_e500_pmu);
diff --git a/arch/powerpc/kernel/mpc7450-pmu.c b/arch/powerpc/kernel/mpc7450-pmu.c
index 09d72028f317..2cc5e0301d0b 100644
--- a/arch/powerpc/kernel/mpc7450-pmu.c
+++ b/arch/powerpc/kernel/mpc7450-pmu.c
@@ -414,4 +414,4 @@ static int init_mpc7450_pmu(void)
 	return register_power_pmu(&mpc7450_pmu);
 }
 
-arch_initcall(init_mpc7450_pmu);
+early_initcall(init_mpc7450_pmu);
diff --git a/arch/powerpc/kernel/power4-pmu.c b/arch/powerpc/kernel/power4-pmu.c
index 2a361cdda635..ead8b3c2649e 100644
--- a/arch/powerpc/kernel/power4-pmu.c
+++ b/arch/powerpc/kernel/power4-pmu.c
@@ -613,4 +613,4 @@ static int init_power4_pmu(void)
 	return register_power_pmu(&power4_pmu);
 }
 
-arch_initcall(init_power4_pmu);
+early_initcall(init_power4_pmu);
diff --git a/arch/powerpc/kernel/power5+-pmu.c b/arch/powerpc/kernel/power5+-pmu.c
index 199de527d411..eca0ac595cb6 100644
--- a/arch/powerpc/kernel/power5+-pmu.c
+++ b/arch/powerpc/kernel/power5+-pmu.c
@@ -682,4 +682,4 @@ static int init_power5p_pmu(void)
 	return register_power_pmu(&power5p_pmu);
 }
 
-arch_initcall(init_power5p_pmu);
+early_initcall(init_power5p_pmu);
diff --git a/arch/powerpc/kernel/power5-pmu.c b/arch/powerpc/kernel/power5-pmu.c
index 98b6a729a9dd..d5ff0f64a5e6 100644
--- a/arch/powerpc/kernel/power5-pmu.c
+++ b/arch/powerpc/kernel/power5-pmu.c
@@ -621,4 +621,4 @@ static int init_power5_pmu(void)
 	return register_power_pmu(&power5_pmu);
 }
 
-arch_initcall(init_power5_pmu);
+early_initcall(init_power5_pmu);
diff --git a/arch/powerpc/kernel/power6-pmu.c b/arch/powerpc/kernel/power6-pmu.c
index 84a607bda8fb..31603927e376 100644
--- a/arch/powerpc/kernel/power6-pmu.c
+++ b/arch/powerpc/kernel/power6-pmu.c
@@ -544,4 +544,4 @@ static int init_power6_pmu(void)
 	return register_power_pmu(&power6_pmu);
 }
 
-arch_initcall(init_power6_pmu);
+early_initcall(init_power6_pmu);
diff --git a/arch/powerpc/kernel/power7-pmu.c b/arch/powerpc/kernel/power7-pmu.c
index 852f7b7f6b40..593740fcb799 100644
--- a/arch/powerpc/kernel/power7-pmu.c
+++ b/arch/powerpc/kernel/power7-pmu.c
@@ -369,4 +369,4 @@ static int init_power7_pmu(void)
 	return register_power_pmu(&power7_pmu);
 }
 
-arch_initcall(init_power7_pmu);
+early_initcall(init_power7_pmu);
diff --git a/arch/powerpc/kernel/ppc970-pmu.c b/arch/powerpc/kernel/ppc970-pmu.c
index 3fee685de4df..9a6e093858fe 100644
--- a/arch/powerpc/kernel/ppc970-pmu.c
+++ b/arch/powerpc/kernel/ppc970-pmu.c
@@ -494,4 +494,4 @@ static int init_ppc970_pmu(void)
 	return register_power_pmu(&ppc970_pmu);
 }
 
-arch_initcall(init_ppc970_pmu);
+early_initcall(init_ppc970_pmu);
diff --git a/arch/sh/kernel/cpu/sh4/perf_event.c b/arch/sh/kernel/cpu/sh4/perf_event.c
index dbf3b4bb71fe..748955df018d 100644
--- a/arch/sh/kernel/cpu/sh4/perf_event.c
+++ b/arch/sh/kernel/cpu/sh4/perf_event.c
@@ -250,4 +250,4 @@ static int __init sh7750_pmu_init(void)
 
 	return register_sh_pmu(&sh7750_pmu);
 }
-arch_initcall(sh7750_pmu_init);
+early_initcall(sh7750_pmu_init);
diff --git a/arch/sh/kernel/cpu/sh4a/perf_event.c b/arch/sh/kernel/cpu/sh4a/perf_event.c
index 580276525731..17e6bebfede0 100644
--- a/arch/sh/kernel/cpu/sh4a/perf_event.c
+++ b/arch/sh/kernel/cpu/sh4a/perf_event.c
@@ -284,4 +284,4 @@ static int __init sh4a_pmu_init(void)
 
 	return register_sh_pmu(&sh4a_pmu);
 }
-arch_initcall(sh4a_pmu_init);
+early_initcall(sh4a_pmu_init);
diff --git a/arch/sparc/include/asm/perf_event.h b/arch/sparc/include/asm/perf_event.h
index 6e8bfa1786da..4d3dbe3703e9 100644
--- a/arch/sparc/include/asm/perf_event.h
+++ b/arch/sparc/include/asm/perf_event.h
@@ -4,8 +4,6 @@
 #ifdef CONFIG_PERF_EVENTS
 #include <asm/ptrace.h>
 
-extern void init_hw_perf_events(void);
-
 #define perf_arch_fetch_caller_regs(regs, ip)		\
 do {							\
 	unsigned long _pstate, _asi, _pil, _i7, _fp;	\
@@ -26,8 +24,6 @@ do {							\
 	(regs)->u_regs[UREG_I6] = _fp;			\
 	(regs)->u_regs[UREG_I7] = _i7;			\
 } while (0)
-#else
-static inline void init_hw_perf_events(void)	{ }
 #endif
 
 #endif
diff --git a/arch/sparc/kernel/nmi.c b/arch/sparc/kernel/nmi.c
index a4bd7ba74c89..300f810142f5 100644
--- a/arch/sparc/kernel/nmi.c
+++ b/arch/sparc/kernel/nmi.c
@@ -270,8 +270,6 @@ int __init nmi_init(void)
 			atomic_set(&nmi_active, -1);
 		}
 	}
-	if (!err)
-		init_hw_perf_events();
 
 	return err;
 }
diff --git a/arch/sparc/kernel/perf_event.c b/arch/sparc/kernel/perf_event.c
index 0d6deb55a2ae..75c5b1263970 100644
--- a/arch/sparc/kernel/perf_event.c
+++ b/arch/sparc/kernel/perf_event.c
@@ -1307,20 +1307,23 @@ static bool __init supported_pmu(void)
 	return false;
 }
 
-void __init init_hw_perf_events(void)
+int __init init_hw_perf_events(void)
 {
 	pr_info("Performance events: ");
 
 	if (!supported_pmu()) {
 		pr_cont("No support for PMU type '%s'\n", sparc_pmu_type);
-		return;
+		return 0;
 	}
 
 	pr_cont("Supported PMU type is '%s'\n", sparc_pmu_type);
 
 	perf_pmu_register(&pmu);
 	register_die_notifier(&perf_event_nmi_notifier);
+
+	return 0;
 }
+early_initcall(init_hw_perf_event);
 
 void perf_callchain_kernel(struct perf_callchain_entry *entry,
 			   struct pt_regs *regs)
diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h
index 550e26b1dbb3..d9d4dae305f6 100644
--- a/arch/x86/include/asm/perf_event.h
+++ b/arch/x86/include/asm/perf_event.h
@@ -125,7 +125,6 @@ union cpuid10_edx {
 #define IBS_OP_MAX_CNT_EXT	0x007FFFFFULL	/* not a register bit mask */
 
 #ifdef CONFIG_PERF_EVENTS
-extern void init_hw_perf_events(void);
 extern void perf_events_lapic_init(void);
 
 #define PERF_EVENT_INDEX_OFFSET			0
@@ -156,7 +155,6 @@ extern unsigned long perf_misc_flags(struct pt_regs *regs);
 }
 
 #else
-static inline void init_hw_perf_events(void)		{ }
 static inline void perf_events_lapic_init(void)	{ }
 #endif
 
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 4b68bda30938..1d59834396bd 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -894,7 +894,6 @@ void __init identify_boot_cpu(void)
 #else
 	vgetcpu_set_mode();
 #endif
-	init_hw_perf_events();
 }
 
 void __cpuinit identify_secondary_cpu(struct cpuinfo_x86 *c)
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index c01dfec635db..817d2b195e8e 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -1353,7 +1353,7 @@ static void __init pmu_check_apic(void)
 	pr_info("no hardware sampling interrupt available.\n");
 }
 
-void __init init_hw_perf_events(void)
+int __init init_hw_perf_events(void)
 {
 	struct event_constraint *c;
 	int err;
@@ -1368,11 +1368,11 @@ void __init init_hw_perf_events(void)
 		err = amd_pmu_init();
 		break;
 	default:
-		return;
+		return 0;
 	}
 	if (err != 0) {
 		pr_cont("no PMU driver, software events only.\n");
-		return;
+		return 0;
 	}
 
 	pmu_check_apic();
@@ -1380,7 +1380,7 @@ void __init init_hw_perf_events(void)
 	/* sanity check that the hardware exists or is emulated */
 	if (!check_hw_exists()) {
 		pr_cont("Broken PMU hardware detected, software events only.\n");
-		return;
+		return 0;
 	}
 
 	pr_cont("%s PMU driver.\n", x86_pmu.name);
@@ -1431,7 +1431,10 @@ void __init init_hw_perf_events(void)
 
 	perf_pmu_register(&pmu);
 	perf_cpu_notifier(x86_pmu_notifier);
+
+	return 0;
 }
+early_initcall(init_hw_perf_events);
 
 static inline void x86_pmu_read(struct perf_event *event)
 {
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 2c79e921a68b..d2e63d1e725c 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -316,6 +316,7 @@ extern int proc_dowatchdog_thresh(struct ctl_table *table, int write,
 				  size_t *lenp, loff_t *ppos);
 extern unsigned int  softlockup_panic;
 extern int softlockup_thresh;
+void lockup_detector_init(void);
 #else
 static inline void touch_softlockup_watchdog(void)
 {
@@ -326,6 +327,9 @@ static inline void touch_softlockup_watchdog_sync(void)
 static inline void touch_all_softlockup_watchdogs(void)
 {
 }
+static inline void lockup_detector_init(void)
+{
+}
 #endif
 
 #ifdef CONFIG_DETECT_HUNG_TASK
diff --git a/init/main.c b/init/main.c
index 8646401f7a0e..261ad7b3fe0b 100644
--- a/init/main.c
+++ b/init/main.c
@@ -882,6 +882,7 @@ static int __init kernel_init(void * unused)
 	smp_prepare_cpus(setup_max_cpus);
 
 	do_pre_smp_initcalls();
+	lockup_detector_init();
 
 	smp_init();
 	sched_init_smp();
diff --git a/kernel/watchdog.c b/kernel/watchdog.c
index 6e3c41a4024c..cad4e42060a9 100644
--- a/kernel/watchdog.c
+++ b/kernel/watchdog.c
@@ -547,13 +547,13 @@ static struct notifier_block __cpuinitdata cpu_nfb = {
 	.notifier_call = cpu_callback
 };
 
-static int __init spawn_watchdog_task(void)
+void __init lockup_detector_init(void)
 {
 	void *cpu = (void *)(long)smp_processor_id();
 	int err;
 
 	if (no_watchdog)
-		return 0;
+		return;
 
 	err = cpu_callback(&cpu_nfb, CPU_UP_PREPARE, cpu);
 	WARN_ON(notifier_to_errno(err));
@@ -561,6 +561,5 @@ static int __init spawn_watchdog_task(void)
 	cpu_callback(&cpu_nfb, CPU_ONLINE, cpu);
 	register_cpu_notifier(&cpu_nfb);
 
-	return 0;
+	return;
 }
-early_initcall(spawn_watchdog_task);
-- 
cgit v1.2.3-71-gd317


From 5a0d2268d259886f0c87131639d19eb4a67b4532 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Tue, 23 Nov 2010 10:42:02 +0000
Subject: net: add netif_tx_queue_frozen_or_stopped

When testing struct netdev_queue state against FROZEN bit, we also test
XOFF bit. We can test both bits at once and save some cycles.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 6 ++++--
 net/core/netpoll.c        | 3 +--
 net/core/pktgen.c         | 2 +-
 net/sched/sch_generic.c   | 8 +++-----
 net/sched/sch_teql.c      | 3 +--
 5 files changed, 10 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index badf9285fe0d..7c6ae2f4b9ab 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -493,6 +493,8 @@ static inline void napi_synchronize(const struct napi_struct *n)
 enum netdev_queue_state_t {
 	__QUEUE_STATE_XOFF,
 	__QUEUE_STATE_FROZEN,
+#define QUEUE_STATE_XOFF_OR_FROZEN ((1 << __QUEUE_STATE_XOFF)		| \
+				    (1 << __QUEUE_STATE_FROZEN))
 };
 
 struct netdev_queue {
@@ -1629,9 +1631,9 @@ static inline int netif_queue_stopped(const struct net_device *dev)
 	return netif_tx_queue_stopped(netdev_get_tx_queue(dev, 0));
 }
 
-static inline int netif_tx_queue_frozen(const struct netdev_queue *dev_queue)
+static inline int netif_tx_queue_frozen_or_stopped(const struct netdev_queue *dev_queue)
 {
-	return test_bit(__QUEUE_STATE_FROZEN, &dev_queue->state);
+	return dev_queue->state & QUEUE_STATE_XOFF_OR_FROZEN;
 }
 
 /**
diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index 4e98ffac3af0..ee38acb6d463 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -76,8 +76,7 @@ static void queue_process(struct work_struct *work)
 
 		local_irq_save(flags);
 		__netif_tx_lock(txq, smp_processor_id());
-		if (netif_tx_queue_stopped(txq) ||
-		    netif_tx_queue_frozen(txq) ||
+		if (netif_tx_queue_frozen_or_stopped(txq) ||
 		    ops->ndo_start_xmit(skb, dev) != NETDEV_TX_OK) {
 			skb_queue_head(&npinfo->txq, skb);
 			__netif_tx_unlock(txq);
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index 2e57830cbeb2..2953b2abc971 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -3527,7 +3527,7 @@ static void pktgen_xmit(struct pktgen_dev *pkt_dev)
 
 	__netif_tx_lock_bh(txq);
 
-	if (unlikely(netif_tx_queue_stopped(txq) || netif_tx_queue_frozen(txq))) {
+	if (unlikely(netif_tx_queue_frozen_or_stopped(txq))) {
 		ret = NETDEV_TX_BUSY;
 		pkt_dev->last_ok = 0;
 		goto unlock;
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 5dbb3cd96e59..7f0bd8952646 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -60,8 +60,7 @@ static inline struct sk_buff *dequeue_skb(struct Qdisc *q)
 
 		/* check the reason of requeuing without tx lock first */
 		txq = netdev_get_tx_queue(dev, skb_get_queue_mapping(skb));
-		if (!netif_tx_queue_stopped(txq) &&
-		    !netif_tx_queue_frozen(txq)) {
+		if (!netif_tx_queue_frozen_or_stopped(txq)) {
 			q->gso_skb = NULL;
 			q->q.qlen--;
 		} else
@@ -122,7 +121,7 @@ int sch_direct_xmit(struct sk_buff *skb, struct Qdisc *q,
 	spin_unlock(root_lock);
 
 	HARD_TX_LOCK(dev, txq, smp_processor_id());
-	if (!netif_tx_queue_stopped(txq) && !netif_tx_queue_frozen(txq))
+	if (!netif_tx_queue_frozen_or_stopped(txq))
 		ret = dev_hard_start_xmit(skb, dev, txq);
 
 	HARD_TX_UNLOCK(dev, txq);
@@ -144,8 +143,7 @@ int sch_direct_xmit(struct sk_buff *skb, struct Qdisc *q,
 		ret = dev_requeue_skb(skb, q);
 	}
 
-	if (ret && (netif_tx_queue_stopped(txq) ||
-		    netif_tx_queue_frozen(txq)))
+	if (ret && netif_tx_queue_frozen_or_stopped(txq))
 		ret = 0;
 
 	return ret;
diff --git a/net/sched/sch_teql.c b/net/sched/sch_teql.c
index 401af9596709..106479a7c94a 100644
--- a/net/sched/sch_teql.c
+++ b/net/sched/sch_teql.c
@@ -309,8 +309,7 @@ restart:
 			if (__netif_tx_trylock(slave_txq)) {
 				unsigned int length = qdisc_pkt_len(skb);
 
-				if (!netif_tx_queue_stopped(slave_txq) &&
-				    !netif_tx_queue_frozen(slave_txq) &&
+				if (!netif_tx_queue_frozen_or_stopped(slave_txq) &&
 				    slave_ops->ndo_start_xmit(skb, slave) == NETDEV_TX_OK) {
 					txq_trans_update(slave_txq);
 					__netif_tx_unlock(slave_txq);
-- 
cgit v1.2.3-71-gd317


From 85beb5869a4f6abb52a7cf8e01de6fa57e9ee47d Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Wed, 24 Nov 2010 16:23:34 -0500
Subject: tracing/slab: Move kmalloc tracepoint out of inline code

The tracepoint for kmalloc is in the slab inlined code which causes
every instance of kmalloc to have the tracepoint.

This patch moves the tracepoint out of the inline code to the
slab C file, which removes a large number of inlined trace
points.

  objdump -dr vmlinux.slab| grep 'jmpq.*<trace_kmalloc' |wc -l
213
  objdump -dr vmlinux.slab.patched| grep 'jmpq.*<trace_kmalloc' |wc -l
1

This also has a nice impact on size.

   text	   data	    bss	    dec	    hex	filename
7023060	2121564	2482432	11627056	 b16a30	vmlinux.slab
6970579	2109772	2482432	11562783	 b06f1f	vmlinux.slab.patched

Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
Signed-off-by: Pekka Enberg <penberg@kernel.org>
---
 include/linux/slab_def.h | 33 +++++++++++++--------------------
 mm/slab.c                | 38 +++++++++++++++++++++++---------------
 2 files changed, 36 insertions(+), 35 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/slab_def.h b/include/linux/slab_def.h
index 791a502f6906..83203ae9390b 100644
--- a/include/linux/slab_def.h
+++ b/include/linux/slab_def.h
@@ -138,11 +138,12 @@ void *kmem_cache_alloc(struct kmem_cache *, gfp_t);
 void *__kmalloc(size_t size, gfp_t flags);
 
 #ifdef CONFIG_TRACING
-extern void *kmem_cache_alloc_notrace(struct kmem_cache *cachep, gfp_t flags);
+extern void *kmem_cache_alloc_trace(size_t size,
+				    struct kmem_cache *cachep, gfp_t flags);
 extern size_t slab_buffer_size(struct kmem_cache *cachep);
 #else
 static __always_inline void *
-kmem_cache_alloc_notrace(struct kmem_cache *cachep, gfp_t flags)
+kmem_cache_alloc_trace(size_t size, struct kmem_cache *cachep, gfp_t flags)
 {
 	return kmem_cache_alloc(cachep, flags);
 }
@@ -179,10 +180,7 @@ found:
 #endif
 			cachep = malloc_sizes[i].cs_cachep;
 
-		ret = kmem_cache_alloc_notrace(cachep, flags);
-
-		trace_kmalloc(_THIS_IP_, ret,
-			      size, slab_buffer_size(cachep), flags);
+		ret = kmem_cache_alloc_trace(size, cachep, flags);
 
 		return ret;
 	}
@@ -194,14 +192,16 @@ extern void *__kmalloc_node(size_t size, gfp_t flags, int node);
 extern void *kmem_cache_alloc_node(struct kmem_cache *, gfp_t flags, int node);
 
 #ifdef CONFIG_TRACING
-extern void *kmem_cache_alloc_node_notrace(struct kmem_cache *cachep,
-					   gfp_t flags,
-					   int nodeid);
+extern void *kmem_cache_alloc_node_trace(size_t size,
+					 struct kmem_cache *cachep,
+					 gfp_t flags,
+					 int nodeid);
 #else
 static __always_inline void *
-kmem_cache_alloc_node_notrace(struct kmem_cache *cachep,
-			      gfp_t flags,
-			      int nodeid)
+kmem_cache_alloc_node_trace(size_t size,
+			    struct kmem_cache *cachep,
+			    gfp_t flags,
+			    int nodeid)
 {
 	return kmem_cache_alloc_node(cachep, flags, nodeid);
 }
@@ -210,7 +210,6 @@ kmem_cache_alloc_node_notrace(struct kmem_cache *cachep,
 static __always_inline void *kmalloc_node(size_t size, gfp_t flags, int node)
 {
 	struct kmem_cache *cachep;
-	void *ret;
 
 	if (__builtin_constant_p(size)) {
 		int i = 0;
@@ -234,13 +233,7 @@ found:
 #endif
 			cachep = malloc_sizes[i].cs_cachep;
 
-		ret = kmem_cache_alloc_node_notrace(cachep, flags, node);
-
-		trace_kmalloc_node(_THIS_IP_, ret,
-				   size, slab_buffer_size(cachep),
-				   flags, node);
-
-		return ret;
+		return kmem_cache_alloc_node_trace(size, cachep, flags, node);
 	}
 	return __kmalloc_node(size, flags, node);
 }
diff --git a/mm/slab.c b/mm/slab.c
index b1e40dafbab3..dfcc8885d7d5 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -3653,11 +3653,18 @@ void *kmem_cache_alloc(struct kmem_cache *cachep, gfp_t flags)
 EXPORT_SYMBOL(kmem_cache_alloc);
 
 #ifdef CONFIG_TRACING
-void *kmem_cache_alloc_notrace(struct kmem_cache *cachep, gfp_t flags)
+void *
+kmem_cache_alloc_trace(size_t size, struct kmem_cache *cachep, gfp_t flags)
 {
-	return __cache_alloc(cachep, flags, __builtin_return_address(0));
+	void *ret;
+
+	ret = __cache_alloc(cachep, flags, __builtin_return_address(0));
+
+	trace_kmalloc(_RET_IP_, ret,
+		      size, slab_buffer_size(cachep), flags);
+	return ret;
 }
-EXPORT_SYMBOL(kmem_cache_alloc_notrace);
+EXPORT_SYMBOL(kmem_cache_alloc_trace);
 #endif
 
 /**
@@ -3705,31 +3712,32 @@ void *kmem_cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid)
 EXPORT_SYMBOL(kmem_cache_alloc_node);
 
 #ifdef CONFIG_TRACING
-void *kmem_cache_alloc_node_notrace(struct kmem_cache *cachep,
-				    gfp_t flags,
-				    int nodeid)
+void *kmem_cache_alloc_node_trace(size_t size,
+				  struct kmem_cache *cachep,
+				  gfp_t flags,
+				  int nodeid)
 {
-	return __cache_alloc_node(cachep, flags, nodeid,
+	void *ret;
+
+	ret = __cache_alloc_node(cachep, flags, nodeid,
 				  __builtin_return_address(0));
+	trace_kmalloc_node(_RET_IP_, ret,
+			   size, slab_buffer_size(cachep),
+			   flags, nodeid);
+	return ret;
 }
-EXPORT_SYMBOL(kmem_cache_alloc_node_notrace);
+EXPORT_SYMBOL(kmem_cache_alloc_node_trace);
 #endif
 
 static __always_inline void *
 __do_kmalloc_node(size_t size, gfp_t flags, int node, void *caller)
 {
 	struct kmem_cache *cachep;
-	void *ret;
 
 	cachep = kmem_find_general_cachep(size, flags);
 	if (unlikely(ZERO_OR_NULL_PTR(cachep)))
 		return cachep;
-	ret = kmem_cache_alloc_node_notrace(cachep, flags, node);
-
-	trace_kmalloc_node((unsigned long) caller, ret,
-			   size, cachep->buffer_size, flags, node);
-
-	return ret;
+	return kmem_cache_alloc_node_trace(size, cachep, flags, node);
 }
 
 #if defined(CONFIG_DEBUG_SLAB) || defined(CONFIG_TRACING)
-- 
cgit v1.2.3-71-gd317


From ce6ada35bdf710d16582cc4869c26722547e6f11 Mon Sep 17 00:00:00 2001
From: "Serge E. Hallyn" <serge@hallyn.com>
Date: Thu, 25 Nov 2010 17:11:32 +0000
Subject: security: Define CAP_SYSLOG

Privileged syslog operations currently require CAP_SYS_ADMIN.  Split
this off into a new CAP_SYSLOG privilege which we can sanely take away
from a container through the capability bounding set.

With this patch, an lxc container can be prevented from messing with
the host's syslog (i.e. dmesg -c).

Changelog: mar 12 2010: add selinux capability2:cap_syslog perm
Changelog: nov 22 2010:
	. port to new kernel
	. add a WARN_ONCE if userspace isn't using CAP_SYSLOG

Signed-off-by: Serge Hallyn <serge.hallyn@ubuntu.com>
Acked-by: Andrew G. Morgan <morgan@kernel.org>
Acked-By: Kees Cook <kees.cook@canonical.com>
Cc: James Morris <jmorris@namei.org>
Cc: Michael Kerrisk <mtk.manpages@gmail.com>
Cc: Stephen Smalley <sds@tycho.nsa.gov>
Cc: "Christopher J. PeBenito" <cpebenito@tresys.com>
Cc: Eric Paris <eparis@parisplace.org>
Signed-off-by: James Morris <jmorris@namei.org>
---
 include/linux/capability.h          | 7 +++++--
 kernel/printk.c                     | 8 +++++++-
 security/selinux/include/classmap.h | 2 +-
 3 files changed, 13 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/capability.h b/include/linux/capability.h
index 90012b9ddbf3..fb16a3699b99 100644
--- a/include/linux/capability.h
+++ b/include/linux/capability.h
@@ -246,7 +246,6 @@ struct cpu_vfs_cap_data {
 /* Allow configuration of the secure attention key */
 /* Allow administration of the random device */
 /* Allow examination and configuration of disk quotas */
-/* Allow configuring the kernel's syslog (printk behaviour) */
 /* Allow setting the domainname */
 /* Allow setting the hostname */
 /* Allow calling bdflush() */
@@ -352,7 +351,11 @@ struct cpu_vfs_cap_data {
 
 #define CAP_MAC_ADMIN        33
 
-#define CAP_LAST_CAP         CAP_MAC_ADMIN
+/* Allow configuring the kernel's syslog (printk behaviour) */
+
+#define CAP_SYSLOG           34
+
+#define CAP_LAST_CAP         CAP_SYSLOG
 
 #define cap_valid(x) ((x) >= 0 && (x) <= CAP_LAST_CAP)
 
diff --git a/kernel/printk.c b/kernel/printk.c
index 9a2264fc42ca..0712380737b3 100644
--- a/kernel/printk.c
+++ b/kernel/printk.c
@@ -283,8 +283,14 @@ int do_syslog(int type, char __user *buf, int len, bool from_file)
 			return -EPERM;
 		if ((type != SYSLOG_ACTION_READ_ALL &&
 		     type != SYSLOG_ACTION_SIZE_BUFFER) &&
-		    !capable(CAP_SYS_ADMIN))
+		    !capable(CAP_SYSLOG)) {
+			/* remove after 2.6.38 */
+			if (capable(CAP_SYS_ADMIN))
+				WARN_ONCE(1, "Attempt to access syslog with "
+				  "CAP_SYS_ADMIN but no CAP_SYSLOG "
+				  "(deprecated and denied).\n");
 			return -EPERM;
+		}
 	}
 
 	error = security_syslog(type);
diff --git a/security/selinux/include/classmap.h b/security/selinux/include/classmap.h
index 8858d2b2d4b6..7ed3663332ec 100644
--- a/security/selinux/include/classmap.h
+++ b/security/selinux/include/classmap.h
@@ -142,7 +142,7 @@ struct security_class_mapping secclass_map[] = {
 	    "node_bind", "name_connect", NULL } },
 	{ "memprotect", { "mmap_zero", NULL } },
 	{ "peer", { "recv", NULL } },
-	{ "capability2", { "mac_override", "mac_admin", NULL } },
+	{ "capability2", { "mac_override", "mac_admin", "syslog", NULL } },
 	{ "kernel_service", { "use_as_override", "create_files_as", NULL } },
 	{ "tun_socket",
 	  { COMMON_SOCK_PERMS, NULL } },
-- 
cgit v1.2.3-71-gd317


From dc88e46029486ed475c71fe1bb696d39511ac8fe Mon Sep 17 00:00:00 2001
From: Mimi Zohar <zohar@linux.vnet.ibm.com>
Date: Tue, 23 Nov 2010 17:50:31 -0500
Subject: lib: hex2bin converts ascii hexadecimal string to binary

Similar to the kgdb_hex2mem() code, hex2bin converts a string
to binary using the hex_to_bin() library call.

Changelog:
- Replace parameter names with src/dst (based on David Howell's comment)
- Add 'const' where needed (based on David Howell's comment)
- Replace int with size_t (based on David Howell's comment)

Signed-off-by: Mimi Zohar <zohar@us.ibm.com>
Acked-by: Serge E. Hallyn <serge@hallyn.com>
Acked-by: David Howells <dhowells@redhat.com>
Signed-off-by: James Morris <jmorris@namei.org>
---
 include/linux/kernel.h |  1 +
 lib/hexdump.c          | 16 ++++++++++++++++
 2 files changed, 17 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index a35b4f7332f0..d0fbc043de60 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -265,6 +265,7 @@ static inline char *pack_hex_byte(char *buf, u8 byte)
 }
 
 extern int hex_to_bin(char ch);
+extern void hex2bin(u8 *dst, const char *src, size_t count);
 
 /*
  * General tracing related utility functions - trace_printk(),
diff --git a/lib/hexdump.c b/lib/hexdump.c
index 5d7a4802c562..b66b2bd67952 100644
--- a/lib/hexdump.c
+++ b/lib/hexdump.c
@@ -33,6 +33,22 @@ int hex_to_bin(char ch)
 }
 EXPORT_SYMBOL(hex_to_bin);
 
+/**
+ * hex2bin - convert an ascii hexadecimal string to its binary representation
+ * @dst: binary result
+ * @src: ascii hexadecimal string
+ * @count: result length
+ */
+void hex2bin(u8 *dst, const char *src, size_t count)
+{
+	while (count--) {
+		*dst = hex_to_bin(*src++) << 4;
+		*dst += hex_to_bin(*src++);
+		dst++;
+	}
+}
+EXPORT_SYMBOL(hex2bin);
+
 /**
  * hex_dump_to_buffer - convert a blob of data to "hex ASCII" in memory
  * @buf: data blob to dump
-- 
cgit v1.2.3-71-gd317


From c749ba912e87ccebd674ae24b97462176c63732e Mon Sep 17 00:00:00 2001
From: Mimi Zohar <zohar@linux.vnet.ibm.com>
Date: Tue, 23 Nov 2010 18:54:16 -0500
Subject: key: add tpm_send command

Add internal kernel tpm_send() command used to seal/unseal keys.

Changelog:
- replaced module_put in tpm_send() with new tpm_chip_put() wrapper
  (suggested by David Howells)
- Make tpm_send() cmd argument a 'void *' (suggested by David Howells)

Signed-off-by: David Safford <safford@watson.ibm.com>
Signed-off-by: Mimi Zohar <zohar@us.ibm.com>
Acked-by: David Howells <dhowells@redhat.com>
Acked-by: Serge E. Hallyn <serge.hallyn@canonical.com>
Signed-off-by: James Morris <jmorris@namei.org>
---
 drivers/char/tpm/tpm.c | 16 ++++++++++++++++
 include/linux/tpm.h    |  4 ++++
 2 files changed, 20 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/char/tpm/tpm.c b/drivers/char/tpm/tpm.c
index 26c09f3b4a74..068bac858b4a 100644
--- a/drivers/char/tpm/tpm.c
+++ b/drivers/char/tpm/tpm.c
@@ -780,6 +780,22 @@ int tpm_pcr_extend(u32 chip_num, int pcr_idx, const u8 *hash)
 }
 EXPORT_SYMBOL_GPL(tpm_pcr_extend);
 
+int tpm_send(u32 chip_num, void *cmd, size_t buflen)
+{
+	struct tpm_chip *chip;
+	int rc;
+
+	chip = tpm_chip_find_get(chip_num);
+	if (chip == NULL)
+		return -ENODEV;
+
+	rc = transmit_cmd(chip, cmd, buflen, "attempting tpm_cmd");
+
+	tpm_chip_put(chip);
+	return rc;
+}
+EXPORT_SYMBOL_GPL(tpm_send);
+
 ssize_t tpm_show_pcrs(struct device *dev, struct device_attribute *attr,
 		      char *buf)
 {
diff --git a/include/linux/tpm.h b/include/linux/tpm.h
index ac5d1c1285d9..fdc718abf83b 100644
--- a/include/linux/tpm.h
+++ b/include/linux/tpm.h
@@ -31,6 +31,7 @@
 
 extern int tpm_pcr_read(u32 chip_num, int pcr_idx, u8 *res_buf);
 extern int tpm_pcr_extend(u32 chip_num, int pcr_idx, const u8 *hash);
+extern int tpm_send(u32 chip_num, void *cmd, size_t buflen);
 #else
 static inline int tpm_pcr_read(u32 chip_num, int pcr_idx, u8 *res_buf) {
 	return -ENODEV;
@@ -38,5 +39,8 @@ static inline int tpm_pcr_read(u32 chip_num, int pcr_idx, u8 *res_buf) {
 static inline int tpm_pcr_extend(u32 chip_num, int pcr_idx, const u8 *hash) {
 	return -ENODEV;
 }
+static inline int tpm_send(u32 chip_num, void *cmd, size_t buflen) {
+	return -ENODEV;
+}
 #endif
 #endif
-- 
cgit v1.2.3-71-gd317


From d00a1c72f7f4661212299e6cb132dfa58030bcdb Mon Sep 17 00:00:00 2001
From: Mimi Zohar <zohar@linux.vnet.ibm.com>
Date: Tue, 23 Nov 2010 17:50:34 -0500
Subject: keys: add new trusted key-type

Define a new kernel key-type called 'trusted'.  Trusted keys are random
number symmetric keys, generated and RSA-sealed by the TPM.  The TPM
only unseals the keys, if the boot PCRs and other criteria match.
Userspace can only ever see encrypted blobs.

Based on suggestions by Jason Gunthorpe, several new options have been
added to support additional usages.

The new options are:
migratable=  designates that the key may/may not ever be updated
             (resealed under a new key, new pcrinfo or new auth.)

pcrlock=n    extends the designated PCR 'n' with a random value,
             so that a key sealed to that PCR may not be unsealed
             again until after a reboot.

keyhandle=   specifies the sealing/unsealing key handle.

keyauth=     specifies the sealing/unsealing key auth.

blobauth=    specifies the sealed data auth.

Implementation of a kernel reserved locality for trusted keys will be
investigated for a possible future extension.

Changelog:
- Updated and added examples to Documentation/keys-trusted-encrypted.txt
- Moved generic TPM constants to include/linux/tpm_command.h
  (David Howell's suggestion.)
- trusted_defined.c: replaced kzalloc with kmalloc, added pcrlock failure
  error handling, added const qualifiers where appropriate.
- moved to late_initcall
- updated from hash to shash (suggestion by David Howells)
- reduced worst stack usage (tpm_seal) from 530 to 312 bytes
- moved documentation to Documentation directory (suggestion by David Howells)
- all the other code cleanups suggested by David Howells
- Add pcrlock CAP_SYS_ADMIN dependency (based on comment by Jason Gunthorpe)
- New options: migratable, pcrlock, keyhandle, keyauth, blobauth (based on
  discussions with Jason Gunthorpe)
- Free payload on failure to create key(reported/fixed by Roberto Sassu)
- Updated Kconfig and other descriptions (based on Serge Hallyn's suggestion)
- Replaced kzalloc() with kmalloc() (reported by Serge Hallyn)

Signed-off-by: David Safford <safford@watson.ibm.com>
Signed-off-by: Mimi Zohar <zohar@us.ibm.com>
Signed-off-by: James Morris <jmorris@namei.org>
---
 Documentation/keys-trusted-encrypted.txt |  145 ++++
 include/keys/trusted-type.h              |   31 +
 include/linux/tpm_command.h              |   28 +
 security/Kconfig                         |   15 +
 security/keys/Makefile                   |    1 +
 security/keys/trusted_defined.c          | 1151 ++++++++++++++++++++++++++++++
 security/keys/trusted_defined.h          |  134 ++++
 7 files changed, 1505 insertions(+)
 create mode 100644 Documentation/keys-trusted-encrypted.txt
 create mode 100644 include/keys/trusted-type.h
 create mode 100644 include/linux/tpm_command.h
 create mode 100644 security/keys/trusted_defined.c
 create mode 100644 security/keys/trusted_defined.h

(limited to 'include/linux')

diff --git a/Documentation/keys-trusted-encrypted.txt b/Documentation/keys-trusted-encrypted.txt
new file mode 100644
index 000000000000..8fb79bc1ac4b
--- /dev/null
+++ b/Documentation/keys-trusted-encrypted.txt
@@ -0,0 +1,145 @@
+			Trusted and Encrypted Keys
+
+Trusted and Encrypted Keys are two new key types added to the existing kernel
+key ring service.  Both of these new types are variable length symmetic keys,
+and in both cases all keys are created in the kernel, and user space sees,
+stores, and loads only encrypted blobs.  Trusted Keys require the availability
+of a Trusted Platform Module (TPM) chip for greater security, while Encrypted
+Keys can be used on any system.  All user level blobs, are displayed and loaded
+in hex ascii for convenience, and are integrity verified.
+
+Trusted Keys use a TPM both to generate and to seal the keys.  Keys are sealed
+under a 2048 bit RSA key in the TPM, and optionally sealed to specified PCR
+(integrity measurement) values, and only unsealed by the TPM, if PCRs and blob
+integrity verifications match.  A loaded Trusted Key can be updated with new
+(future) PCR values, so keys are easily migrated to new pcr values, such as
+when the kernel and initramfs are updated.  The same key can have many saved
+blobs under different PCR values, so multiple boots are easily supported.
+
+By default, trusted keys are sealed under the SRK, which has the default
+authorization value (20 zeros).  This can be set at takeownership time with the
+trouser's utility: "tpm_takeownership -u -z".
+
+Usage:
+    keyctl add trusted name "new keylen [options]" ring
+    keyctl add trusted name "load hex_blob [pcrlock=pcrnum]" ring
+    keyctl update key "update [options]"
+    keyctl print keyid
+
+    options:
+       keyhandle= ascii hex value of sealing key default 0x40000000 (SRK)
+       keyauth=	  ascii hex auth for sealing key default 0x00...i
+		  (40 ascii zeros)
+       blobauth=  ascii hex auth for sealed data default 0x00...
+		  (40 ascii zeros)
+       blobauth=  ascii hex auth for sealed data default 0x00...
+		  (40 ascii zeros)
+       pcrinfo=	  ascii hex of PCR_INFO or PCR_INFO_LONG (no default)
+       pcrlock=	  pcr number to be extended to "lock" blob
+       migratable= 0|1 indicating permission to reseal to new PCR values,
+                   default 1 (resealing allowed)
+
+"keyctl print" returns an ascii hex copy of the sealed key, which is in standard
+TPM_STORED_DATA format.  The key length for new keys are always in bytes.
+Trusted Keys can be 32 - 128 bytes (256 - 1024 bits), the upper limit is to fit
+within the 2048 bit SRK (RSA) keylength, with all necessary structure/padding.
+
+Encrypted keys do not depend on a TPM, and are faster, as they use AES for
+encryption/decryption.  New keys are created from kernel generated random
+numbers, and are encrypted/decrypted using a specified 'master' key.  The
+'master' key can either be a trusted-key or user-key type.  The main
+disadvantage of encrypted keys is that if they are not rooted in a trusted key,
+they are only as secure as the user key encrypting them.  The master user key
+should therefore be loaded in as secure a way as possible, preferably early in
+boot.
+
+Usage:
+  keyctl add encrypted name "new key-type:master-key-name keylen" ring
+  keyctl add encrypted name "load hex_blob" ring
+  keyctl update keyid "update key-type:master-key-name"
+
+where 'key-type' is either 'trusted' or 'user'.
+
+Examples of trusted and encrypted key usage:
+
+Create and save a trusted key named "kmk" of length 32 bytes:
+
+    $ keyctl add trusted kmk "new 32" @u
+    440502848
+
+    $ keyctl show
+    Session Keyring
+           -3 --alswrv    500   500  keyring: _ses
+     97833714 --alswrv    500    -1   \_ keyring: _uid.500
+    440502848 --alswrv    500   500       \_ trusted: kmk
+
+    $ keyctl print 440502848
+    0101000000000000000001005d01b7e3f4a6be5709930f3b70a743cbb42e0cc95e18e915
+    3f60da455bbf1144ad12e4f92b452f966929f6105fd29ca28e4d4d5a031d068478bacb0b
+    27351119f822911b0a11ba3d3498ba6a32e50dac7f32894dd890eb9ad578e4e292c83722
+    a52e56a097e6a68b3f56f7a52ece0cdccba1eb62cad7d817f6dc58898b3ac15f36026fec
+    d568bd4a706cb60bb37be6d8f1240661199d640b66fb0fe3b079f97f450b9ef9c22c6d5d
+    dd379f0facd1cd020281dfa3c70ba21a3fa6fc2471dc6d13ecf8298b946f65345faa5ef0
+    f1f8fff03ad0acb083725535636addb08d73dedb9832da198081e5deae84bfaf0409c22b
+    e4a8aea2b607ec96931e6f4d4fe563ba
+
+    $ keyctl pipe 440502848 > kmk.blob
+
+Load a trusted key from the saved blob:
+
+    $ keyctl add trusted kmk "load `cat kmk.blob`" @u
+    268728824
+
+    $ keyctl print 268728824
+    0101000000000000000001005d01b7e3f4a6be5709930f3b70a743cbb42e0cc95e18e915
+    3f60da455bbf1144ad12e4f92b452f966929f6105fd29ca28e4d4d5a031d068478bacb0b
+    27351119f822911b0a11ba3d3498ba6a32e50dac7f32894dd890eb9ad578e4e292c83722
+    a52e56a097e6a68b3f56f7a52ece0cdccba1eb62cad7d817f6dc58898b3ac15f36026fec
+    d568bd4a706cb60bb37be6d8f1240661199d640b66fb0fe3b079f97f450b9ef9c22c6d5d
+    dd379f0facd1cd020281dfa3c70ba21a3fa6fc2471dc6d13ecf8298b946f65345faa5ef0
+    f1f8fff03ad0acb083725535636addb08d73dedb9832da198081e5deae84bfaf0409c22b
+    e4a8aea2b607ec96931e6f4d4fe563ba
+
+Reseal a trusted key under new pcr values:
+
+    $ keyctl update 268728824 "update pcrinfo=`cat pcr.blob`"
+    $ keyctl print 268728824
+    010100000000002c0002800093c35a09b70fff26e7a98ae786c641e678ec6ffb6b46d805
+    77c8a6377aed9d3219c6dfec4b23ffe3000001005d37d472ac8a44023fbb3d18583a4f73
+    d3a076c0858f6f1dcaa39ea0f119911ff03f5406df4f7f27f41da8d7194f45c9f4e00f2e
+    df449f266253aa3f52e55c53de147773e00f0f9aca86c64d94c95382265968c354c5eab4
+    9638c5ae99c89de1e0997242edfb0b501744e11ff9762dfd951cffd93227cc513384e7e6
+    e782c29435c7ec2edafaa2f4c1fe6e7a781b59549ff5296371b42133777dcc5b8b971610
+    94bc67ede19e43ddb9dc2baacad374a36feaf0314d700af0a65c164b7082401740e489c9
+    7ef6a24defe4846104209bf0c3eced7fa1a672ed5b125fc9d8cd88b476a658a4434644ef
+    df8ae9a178e9f83ba9f08d10fa47e4226b98b0702f06b3b8
+
+Create and save an encrypted key "evm" using the above trusted key "kmk":
+
+    $ keyctl add encrypted evm "new trusted:kmk 32" @u
+    159771175
+
+    $ keyctl print 159771175
+    trusted:kmk 32 2375725ad57798846a9bbd240de8906f006e66c03af53b1b382dbbc55
+    be2a44616e4959430436dc4f2a7a9659aa60bb4652aeb2120f149ed197c564e024717c64
+    5972dcb82ab2dde83376d82b2e3c09ffc
+
+    $ keyctl pipe 159771175 > evm.blob
+
+Load an encrypted key "evm" from saved blob:
+
+    $ keyctl add encrypted evm "load `cat evm.blob`" @u
+    831684262
+
+    $ keyctl print 831684262
+    trusted:kmk 32 2375725ad57798846a9bbd240de8906f006e66c03af53b1b382dbbc55
+    be2a44616e4959430436dc4f2a7a9659aa60bb4652aeb2120f149ed197c564e024717c64
+    5972dcb82ab2dde83376d82b2e3c09ffc
+
+
+The initial consumer of trusted keys is EVM, which at boot time needs a high
+quality symmetric key for HMAC protection of file metadata.  The use of a
+trusted key provides strong guarantees that the EVM key has not been
+compromised by a user level problem, and when sealed to specific boot PCR
+values, protects against boot and offline attacks.  Other uses for trusted and
+encrypted keys, such as for disk and file encryption are anticipated.
diff --git a/include/keys/trusted-type.h b/include/keys/trusted-type.h
new file mode 100644
index 000000000000..56f82e5c9975
--- /dev/null
+++ b/include/keys/trusted-type.h
@@ -0,0 +1,31 @@
+/*
+ * Copyright (C) 2010 IBM Corporation
+ * Author: David Safford <safford@us.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, version 2 of the License.
+ */
+
+#ifndef _KEYS_TRUSTED_TYPE_H
+#define _KEYS_TRUSTED_TYPE_H
+
+#include <linux/key.h>
+#include <linux/rcupdate.h>
+
+#define MIN_KEY_SIZE			32
+#define MAX_KEY_SIZE			128
+#define MAX_BLOB_SIZE			320
+
+struct trusted_key_payload {
+	struct rcu_head rcu;
+	unsigned int key_len;
+	unsigned int blob_len;
+	unsigned char migratable;
+	unsigned char key[MAX_KEY_SIZE + 1];
+	unsigned char blob[MAX_BLOB_SIZE];
+};
+
+extern struct key_type key_type_trusted;
+
+#endif /* _KEYS_TRUSTED_TYPE_H */
diff --git a/include/linux/tpm_command.h b/include/linux/tpm_command.h
new file mode 100644
index 000000000000..727512e249b5
--- /dev/null
+++ b/include/linux/tpm_command.h
@@ -0,0 +1,28 @@
+#ifndef __LINUX_TPM_COMMAND_H__
+#define __LINUX_TPM_COMMAND_H__
+
+/*
+ * TPM Command constants from specifications at
+ * http://www.trustedcomputinggroup.org
+ */
+
+/* Command TAGS */
+#define TPM_TAG_RQU_COMMAND             193
+#define TPM_TAG_RQU_AUTH1_COMMAND       194
+#define TPM_TAG_RQU_AUTH2_COMMAND       195
+#define TPM_TAG_RSP_COMMAND             196
+#define TPM_TAG_RSP_AUTH1_COMMAND       197
+#define TPM_TAG_RSP_AUTH2_COMMAND       198
+
+/* Command Ordinals */
+#define TPM_ORD_GETRANDOM               70
+#define TPM_ORD_OSAP                    11
+#define TPM_ORD_OIAP                    10
+#define TPM_ORD_SEAL                    23
+#define TPM_ORD_UNSEAL                  24
+
+/* Other constants */
+#define SRKHANDLE                       0x40000000
+#define TPM_NONCE_SIZE                  20
+
+#endif
diff --git a/security/Kconfig b/security/Kconfig
index e80da955e687..24b8f9b491b8 100644
--- a/security/Kconfig
+++ b/security/Kconfig
@@ -21,6 +21,21 @@ config KEYS
 
 	  If you are unsure as to whether this is required, answer N.
 
+config TRUSTED_KEYS
+	tristate "TRUSTED KEYS"
+	depends on KEYS && TCG_TPM
+	select CRYPTO
+	select CRYPTO_HMAC
+	select CRYPTO_SHA1
+	help
+	  This option provides support for creating, sealing, and unsealing
+	  keys in the kernel. Trusted keys are random number symmetric keys,
+	  generated and RSA-sealed by the TPM. The TPM only unseals the keys,
+	  if the boot PCRs and other criteria match.  Userspace will only ever
+	  see encrypted blobs.
+
+	  If you are unsure as to whether this is required, answer N.
+
 config KEYS_DEBUG_PROC_KEYS
 	bool "Enable the /proc/keys file by which keys may be viewed"
 	depends on KEYS
diff --git a/security/keys/Makefile b/security/keys/Makefile
index 74d5447d7df7..fcb107020b4a 100644
--- a/security/keys/Makefile
+++ b/security/keys/Makefile
@@ -13,6 +13,7 @@ obj-y := \
 	request_key_auth.o \
 	user_defined.o
 
+obj-$(CONFIG_TRUSTED_KEYS) += trusted_defined.o
 obj-$(CONFIG_KEYS_COMPAT) += compat.o
 obj-$(CONFIG_PROC_FS) += proc.o
 obj-$(CONFIG_SYSCTL) += sysctl.o
diff --git a/security/keys/trusted_defined.c b/security/keys/trusted_defined.c
new file mode 100644
index 000000000000..1bec72e7596d
--- /dev/null
+++ b/security/keys/trusted_defined.c
@@ -0,0 +1,1151 @@
+/*
+ * Copyright (C) 2010 IBM Corporation
+ *
+ * Author:
+ * David Safford <safford@us.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, version 2 of the License.
+ *
+ * See Documentation/keys-trusted-encrypted.txt
+ */
+
+#include <linux/uaccess.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/parser.h>
+#include <linux/string.h>
+#include <keys/user-type.h>
+#include <keys/trusted-type.h>
+#include <linux/key-type.h>
+#include <linux/rcupdate.h>
+#include <linux/crypto.h>
+#include <crypto/hash.h>
+#include <crypto/sha.h>
+#include <linux/capability.h>
+#include <linux/tpm.h>
+#include <linux/tpm_command.h>
+
+#include "trusted_defined.h"
+
+static const char hmac_alg[] = "hmac(sha1)";
+static const char hash_alg[] = "sha1";
+
+struct sdesc {
+	struct shash_desc shash;
+	char ctx[];
+};
+
+static struct crypto_shash *hashalg;
+static struct crypto_shash *hmacalg;
+
+static struct sdesc *init_sdesc(struct crypto_shash *alg)
+{
+	struct sdesc *sdesc;
+	int size;
+
+	size = sizeof(struct shash_desc) + crypto_shash_descsize(alg);
+	sdesc = kmalloc(size, GFP_KERNEL);
+	if (!sdesc)
+		return ERR_PTR(-ENOMEM);
+	sdesc->shash.tfm = alg;
+	sdesc->shash.flags = 0x0;
+	return sdesc;
+}
+
+static int TSS_sha1(const unsigned char *data, const unsigned int datalen,
+		    unsigned char *digest)
+{
+	struct sdesc *sdesc;
+	int ret;
+
+	sdesc = init_sdesc(hashalg);
+	if (IS_ERR(sdesc)) {
+		pr_info("trusted_key: can't alloc %s\n", hash_alg);
+		return PTR_ERR(sdesc);
+	}
+
+	ret = crypto_shash_digest(&sdesc->shash, data, datalen, digest);
+	kfree(sdesc);
+	return ret;
+}
+
+static int TSS_rawhmac(unsigned char *digest, const unsigned char *key,
+		       const unsigned int keylen, ...)
+{
+	struct sdesc *sdesc;
+	va_list argp;
+	unsigned int dlen;
+	unsigned char *data;
+	int ret;
+
+	sdesc = init_sdesc(hmacalg);
+	if (IS_ERR(sdesc)) {
+		pr_info("trusted_key: can't alloc %s\n", hmac_alg);
+		return PTR_ERR(sdesc);
+	}
+
+	ret = crypto_shash_setkey(hmacalg, key, keylen);
+	if (ret < 0)
+		goto out;
+	ret = crypto_shash_init(&sdesc->shash);
+	if (ret < 0)
+		goto out;
+
+	va_start(argp, keylen);
+	for (;;) {
+		dlen = va_arg(argp, unsigned int);
+		if (dlen == 0)
+			break;
+		data = va_arg(argp, unsigned char *);
+		if (data == NULL)
+			return -EINVAL;
+		ret = crypto_shash_update(&sdesc->shash, data, dlen);
+		if (ret < 0)
+			goto out;
+	}
+	va_end(argp);
+	ret = crypto_shash_final(&sdesc->shash, digest);
+out:
+	kfree(sdesc);
+	return ret;
+}
+
+/*
+ * calculate authorization info fields to send to TPM
+ */
+static uint32_t TSS_authhmac(unsigned char *digest, const unsigned char *key,
+			     const unsigned int keylen, unsigned char *h1,
+			     unsigned char *h2, unsigned char h3, ...)
+{
+	unsigned char paramdigest[SHA1_DIGEST_SIZE];
+	struct sdesc *sdesc;
+	unsigned int dlen;
+	unsigned char *data;
+	unsigned char c;
+	int ret;
+	va_list argp;
+
+	sdesc = init_sdesc(hashalg);
+	if (IS_ERR(sdesc)) {
+		pr_info("trusted_key: can't alloc %s\n", hash_alg);
+		return PTR_ERR(sdesc);
+	}
+
+	c = h3;
+	ret = crypto_shash_init(&sdesc->shash);
+	if (ret < 0)
+		goto out;
+	va_start(argp, h3);
+	for (;;) {
+		dlen = va_arg(argp, unsigned int);
+		if (dlen == 0)
+			break;
+		data = va_arg(argp, unsigned char *);
+		ret = crypto_shash_update(&sdesc->shash, data, dlen);
+		if (ret < 0)
+			goto out;
+	}
+	va_end(argp);
+	ret = crypto_shash_final(&sdesc->shash, paramdigest);
+	if (!ret)
+		TSS_rawhmac(digest, key, keylen, SHA1_DIGEST_SIZE,
+			    paramdigest, TPM_NONCE_SIZE, h1,
+			    TPM_NONCE_SIZE, h2, 1, &c, 0, 0);
+out:
+	kfree(sdesc);
+	return ret;
+}
+
+/*
+ * verify the AUTH1_COMMAND (Seal) result from TPM
+ */
+static uint32_t TSS_checkhmac1(unsigned char *buffer,
+			       const uint32_t command,
+			       const unsigned char *ononce,
+			       const unsigned char *key,
+			       const unsigned int keylen, ...)
+{
+	uint32_t bufsize;
+	uint16_t tag;
+	uint32_t ordinal;
+	uint32_t result;
+	unsigned char *enonce;
+	unsigned char *continueflag;
+	unsigned char *authdata;
+	unsigned char testhmac[SHA1_DIGEST_SIZE];
+	unsigned char paramdigest[SHA1_DIGEST_SIZE];
+	struct sdesc *sdesc;
+	unsigned int dlen;
+	unsigned int dpos;
+	va_list argp;
+	int ret;
+
+	bufsize = LOAD32(buffer, TPM_SIZE_OFFSET);
+	tag = LOAD16(buffer, 0);
+	ordinal = command;
+	result = LOAD32N(buffer, TPM_RETURN_OFFSET);
+	if (tag == TPM_TAG_RSP_COMMAND)
+		return 0;
+	if (tag != TPM_TAG_RSP_AUTH1_COMMAND)
+		return -EINVAL;
+	authdata = buffer + bufsize - SHA1_DIGEST_SIZE;
+	continueflag = authdata - 1;
+	enonce = continueflag - TPM_NONCE_SIZE;
+
+	sdesc = init_sdesc(hashalg);
+	if (IS_ERR(sdesc)) {
+		pr_info("trusted_key: can't alloc %s\n", hash_alg);
+		return PTR_ERR(sdesc);
+	}
+	ret = crypto_shash_init(&sdesc->shash);
+	if (ret < 0)
+		goto out;
+	ret = crypto_shash_update(&sdesc->shash, (const u8 *)&result,
+				  sizeof result);
+	if (ret < 0)
+		goto out;
+	ret = crypto_shash_update(&sdesc->shash, (const u8 *)&ordinal,
+				  sizeof ordinal);
+	if (ret < 0)
+		goto out;
+	va_start(argp, keylen);
+	for (;;) {
+		dlen = va_arg(argp, unsigned int);
+		if (dlen == 0)
+			break;
+		dpos = va_arg(argp, unsigned int);
+		ret = crypto_shash_update(&sdesc->shash, buffer + dpos, dlen);
+		if (ret < 0)
+			goto out;
+	}
+	va_end(argp);
+	ret = crypto_shash_final(&sdesc->shash, paramdigest);
+	if (ret < 0)
+		goto out;
+	ret = TSS_rawhmac(testhmac, key, keylen, SHA1_DIGEST_SIZE, paramdigest,
+			  TPM_NONCE_SIZE, enonce, TPM_NONCE_SIZE, ononce,
+			  1, continueflag, 0, 0);
+	if (ret < 0)
+		goto out;
+	if (memcmp(testhmac, authdata, SHA1_DIGEST_SIZE))
+		ret = -EINVAL;
+out:
+	kfree(sdesc);
+	return ret;
+}
+
+/*
+ * verify the AUTH2_COMMAND (unseal) result from TPM
+ */
+static uint32_t TSS_checkhmac2(unsigned char *buffer,
+			       const uint32_t command,
+			       const unsigned char *ononce,
+			       const unsigned char *key1,
+			       const unsigned int keylen1,
+			       const unsigned char *key2,
+			       const unsigned int keylen2, ...)
+{
+	uint32_t bufsize;
+	uint16_t tag;
+	uint32_t ordinal;
+	uint32_t result;
+	unsigned char *enonce1;
+	unsigned char *continueflag1;
+	unsigned char *authdata1;
+	unsigned char *enonce2;
+	unsigned char *continueflag2;
+	unsigned char *authdata2;
+	unsigned char testhmac1[SHA1_DIGEST_SIZE];
+	unsigned char testhmac2[SHA1_DIGEST_SIZE];
+	unsigned char paramdigest[SHA1_DIGEST_SIZE];
+	struct sdesc *sdesc;
+	unsigned int dlen;
+	unsigned int dpos;
+	va_list argp;
+	int ret;
+
+	bufsize = LOAD32(buffer, TPM_SIZE_OFFSET);
+	tag = LOAD16(buffer, 0);
+	ordinal = command;
+	result = LOAD32N(buffer, TPM_RETURN_OFFSET);
+
+	if (tag == TPM_TAG_RSP_COMMAND)
+		return 0;
+	if (tag != TPM_TAG_RSP_AUTH2_COMMAND)
+		return -EINVAL;
+	authdata1 = buffer + bufsize - (SHA1_DIGEST_SIZE + 1
+			+ SHA1_DIGEST_SIZE + SHA1_DIGEST_SIZE);
+	authdata2 = buffer + bufsize - (SHA1_DIGEST_SIZE);
+	continueflag1 = authdata1 - 1;
+	continueflag2 = authdata2 - 1;
+	enonce1 = continueflag1 - TPM_NONCE_SIZE;
+	enonce2 = continueflag2 - TPM_NONCE_SIZE;
+
+	sdesc = init_sdesc(hashalg);
+	if (IS_ERR(sdesc)) {
+		pr_info("trusted_key: can't alloc %s\n", hash_alg);
+		return PTR_ERR(sdesc);
+	}
+	ret = crypto_shash_init(&sdesc->shash);
+	if (ret < 0)
+		goto out;
+	ret = crypto_shash_update(&sdesc->shash, (const u8 *)&result,
+				  sizeof result);
+	if (ret < 0)
+		goto out;
+	ret = crypto_shash_update(&sdesc->shash, (const u8 *)&ordinal,
+				  sizeof ordinal);
+	if (ret < 0)
+		goto out;
+
+	va_start(argp, keylen2);
+	for (;;) {
+		dlen = va_arg(argp, unsigned int);
+		if (dlen == 0)
+			break;
+		dpos = va_arg(argp, unsigned int);
+		ret = crypto_shash_update(&sdesc->shash, buffer + dpos, dlen);
+		if (ret < 0)
+			goto out;
+	}
+	ret = crypto_shash_final(&sdesc->shash, paramdigest);
+	if (ret < 0)
+		goto out;
+
+	ret = TSS_rawhmac(testhmac1, key1, keylen1, SHA1_DIGEST_SIZE,
+			  paramdigest, TPM_NONCE_SIZE, enonce1,
+			  TPM_NONCE_SIZE, ononce, 1, continueflag1, 0, 0);
+	if (memcmp(testhmac1, authdata1, SHA1_DIGEST_SIZE)) {
+		ret = -EINVAL;
+		goto out;
+	}
+	ret = TSS_rawhmac(testhmac2, key2, keylen2, SHA1_DIGEST_SIZE,
+			  paramdigest, TPM_NONCE_SIZE, enonce2,
+			  TPM_NONCE_SIZE, ononce, 1, continueflag2, 0, 0);
+	if (memcmp(testhmac2, authdata2, SHA1_DIGEST_SIZE))
+		ret = -EINVAL;
+out:
+	kfree(sdesc);
+	return ret;
+}
+
+/*
+ * For key specific tpm requests, we will generate and send our
+ * own TPM command packets using the drivers send function.
+ */
+static int trusted_tpm_send(const u32 chip_num, unsigned char *cmd,
+			    size_t buflen)
+{
+	int rc;
+
+	dump_tpm_buf(cmd);
+	rc = tpm_send(chip_num, cmd, buflen);
+	dump_tpm_buf(cmd);
+	if (rc > 0)
+		/* Can't return positive return codes values to keyctl */
+		rc = -EPERM;
+	return rc;
+}
+
+/*
+ * get a random value from TPM
+ */
+static int tpm_get_random(struct tpm_buf *tb, unsigned char *buf, uint32_t len)
+{
+	int ret;
+
+	INIT_BUF(tb);
+	store16(tb, TPM_TAG_RQU_COMMAND);
+	store32(tb, TPM_GETRANDOM_SIZE);
+	store32(tb, TPM_ORD_GETRANDOM);
+	store32(tb, len);
+	ret = trusted_tpm_send(TPM_ANY_NUM, tb->data, sizeof tb->data);
+	memcpy(buf, tb->data + TPM_GETRANDOM_SIZE, len);
+
+	return ret;
+}
+
+static int my_get_random(unsigned char *buf, int len)
+{
+	struct tpm_buf *tb;
+	int ret;
+
+	tb = kzalloc(sizeof *tb, GFP_KERNEL);
+	if (!tb)
+		return -ENOMEM;
+	ret = tpm_get_random(tb, buf, len);
+
+	kfree(tb);
+	return ret;
+}
+
+/*
+ * Lock a trusted key, by extending a selected PCR.
+ *
+ * Prevents a trusted key that is sealed to PCRs from being accessed.
+ * This uses the tpm driver's extend function.
+ */
+static int pcrlock(const int pcrnum)
+{
+	unsigned char hash[SHA1_DIGEST_SIZE];
+
+	if (!capable(CAP_SYS_ADMIN))
+		return -EPERM;
+	my_get_random(hash, SHA1_DIGEST_SIZE);
+	return tpm_pcr_extend(TPM_ANY_NUM, pcrnum, hash) ? -EINVAL : 0;
+}
+
+/*
+ * Create an object specific authorisation protocol (OSAP) session
+ */
+static int osap(struct tpm_buf *tb, struct osapsess *s,
+		const unsigned char *key, const uint16_t type,
+		const uint32_t handle)
+{
+	unsigned char enonce[TPM_NONCE_SIZE];
+	unsigned char ononce[TPM_NONCE_SIZE];
+	int ret;
+
+	ret = tpm_get_random(tb, ononce, TPM_NONCE_SIZE);
+	if (ret < 0)
+		return ret;
+
+	INIT_BUF(tb);
+	store16(tb, TPM_TAG_RQU_COMMAND);
+	store32(tb, TPM_OSAP_SIZE);
+	store32(tb, TPM_ORD_OSAP);
+	store16(tb, type);
+	store32(tb, handle);
+	storebytes(tb, ononce, TPM_NONCE_SIZE);
+
+	ret = trusted_tpm_send(TPM_ANY_NUM, tb->data, MAX_BUF_SIZE);
+	if (ret < 0)
+		return ret;
+
+	s->handle = LOAD32(tb->data, TPM_DATA_OFFSET);
+	memcpy(s->enonce, &(tb->data[TPM_DATA_OFFSET + sizeof(uint32_t)]),
+	       TPM_NONCE_SIZE);
+	memcpy(enonce, &(tb->data[TPM_DATA_OFFSET + sizeof(uint32_t) +
+				  TPM_NONCE_SIZE]), TPM_NONCE_SIZE);
+	ret = TSS_rawhmac(s->secret, key, SHA1_DIGEST_SIZE, TPM_NONCE_SIZE,
+			  enonce, TPM_NONCE_SIZE, ononce, 0, 0);
+	return ret;
+}
+
+/*
+ * Create an object independent authorisation protocol (oiap) session
+ */
+static int oiap(struct tpm_buf *tb, uint32_t *handle, unsigned char *nonce)
+{
+	int ret;
+
+	INIT_BUF(tb);
+	store16(tb, TPM_TAG_RQU_COMMAND);
+	store32(tb, TPM_OIAP_SIZE);
+	store32(tb, TPM_ORD_OIAP);
+	ret = trusted_tpm_send(TPM_ANY_NUM, tb->data, MAX_BUF_SIZE);
+	if (ret < 0)
+		return ret;
+
+	*handle = LOAD32(tb->data, TPM_DATA_OFFSET);
+	memcpy(nonce, &tb->data[TPM_DATA_OFFSET + sizeof(uint32_t)],
+	       TPM_NONCE_SIZE);
+	return ret;
+}
+
+struct tpm_digests {
+	unsigned char encauth[SHA1_DIGEST_SIZE];
+	unsigned char pubauth[SHA1_DIGEST_SIZE];
+	unsigned char xorwork[SHA1_DIGEST_SIZE * 2];
+	unsigned char xorhash[SHA1_DIGEST_SIZE];
+	unsigned char nonceodd[TPM_NONCE_SIZE];
+};
+
+/*
+ * Have the TPM seal(encrypt) the trusted key, possibly based on
+ * Platform Configuration Registers (PCRs). AUTH1 for sealing key.
+ */
+static int tpm_seal(struct tpm_buf *tb, const uint16_t keytype,
+		    const uint32_t keyhandle, const unsigned char *keyauth,
+		    const unsigned char *data, const uint32_t datalen,
+		    unsigned char *blob, uint32_t *bloblen,
+		    const unsigned char *blobauth,
+		    const unsigned char *pcrinfo, const uint32_t pcrinfosize)
+{
+	struct osapsess sess;
+	struct tpm_digests *td;
+	unsigned char cont;
+	uint32_t ordinal;
+	uint32_t pcrsize;
+	uint32_t datsize;
+	int sealinfosize;
+	int encdatasize;
+	int storedsize;
+	int ret;
+	int i;
+
+	/* alloc some work space for all the hashes */
+	td = kmalloc(sizeof *td, GFP_KERNEL);
+	if (!td)
+		return -ENOMEM;
+
+	/* get session for sealing key */
+	ret = osap(tb, &sess, keyauth, keytype, keyhandle);
+	if (ret < 0)
+		return ret;
+	dump_sess(&sess);
+
+	/* calculate encrypted authorization value */
+	memcpy(td->xorwork, sess.secret, SHA1_DIGEST_SIZE);
+	memcpy(td->xorwork + SHA1_DIGEST_SIZE, sess.enonce, SHA1_DIGEST_SIZE);
+	ret = TSS_sha1(td->xorwork, SHA1_DIGEST_SIZE * 2, td->xorhash);
+	if (ret < 0)
+		return ret;
+
+	ret = tpm_get_random(tb, td->nonceodd, TPM_NONCE_SIZE);
+	if (ret < 0)
+		return ret;
+	ordinal = htonl(TPM_ORD_SEAL);
+	datsize = htonl(datalen);
+	pcrsize = htonl(pcrinfosize);
+	cont = 0;
+
+	/* encrypt data authorization key */
+	for (i = 0; i < SHA1_DIGEST_SIZE; ++i)
+		td->encauth[i] = td->xorhash[i] ^ blobauth[i];
+
+	/* calculate authorization HMAC value */
+	if (pcrinfosize == 0) {
+		/* no pcr info specified */
+		TSS_authhmac(td->pubauth, sess.secret, SHA1_DIGEST_SIZE,
+			     sess.enonce, td->nonceodd, cont, sizeof(uint32_t),
+			     &ordinal, SHA1_DIGEST_SIZE, td->encauth,
+			     sizeof(uint32_t), &pcrsize, sizeof(uint32_t),
+			     &datsize, datalen, data, 0, 0);
+	} else {
+		/* pcr info specified */
+		TSS_authhmac(td->pubauth, sess.secret, SHA1_DIGEST_SIZE,
+			     sess.enonce, td->nonceodd, cont, sizeof(uint32_t),
+			     &ordinal, SHA1_DIGEST_SIZE, td->encauth,
+			     sizeof(uint32_t), &pcrsize, pcrinfosize,
+			     pcrinfo, sizeof(uint32_t), &datsize, datalen,
+			     data, 0, 0);
+	}
+
+	/* build and send the TPM request packet */
+	INIT_BUF(tb);
+	store16(tb, TPM_TAG_RQU_AUTH1_COMMAND);
+	store32(tb, TPM_SEAL_SIZE + pcrinfosize + datalen);
+	store32(tb, TPM_ORD_SEAL);
+	store32(tb, keyhandle);
+	storebytes(tb, td->encauth, SHA1_DIGEST_SIZE);
+	store32(tb, pcrinfosize);
+	storebytes(tb, pcrinfo, pcrinfosize);
+	store32(tb, datalen);
+	storebytes(tb, data, datalen);
+	store32(tb, sess.handle);
+	storebytes(tb, td->nonceodd, TPM_NONCE_SIZE);
+	store8(tb, cont);
+	storebytes(tb, td->pubauth, SHA1_DIGEST_SIZE);
+
+	ret = trusted_tpm_send(TPM_ANY_NUM, tb->data, MAX_BUF_SIZE);
+	if (ret < 0)
+		return ret;
+
+	/* calculate the size of the returned Blob */
+	sealinfosize = LOAD32(tb->data, TPM_DATA_OFFSET + sizeof(uint32_t));
+	encdatasize = LOAD32(tb->data, TPM_DATA_OFFSET + sizeof(uint32_t) +
+			     sizeof(uint32_t) + sealinfosize);
+	storedsize = sizeof(uint32_t) + sizeof(uint32_t) + sealinfosize +
+	    sizeof(uint32_t) + encdatasize;
+
+	/* check the HMAC in the response */
+	ret = TSS_checkhmac1(tb->data, ordinal, td->nonceodd, sess.secret,
+			     SHA1_DIGEST_SIZE, storedsize, TPM_DATA_OFFSET, 0,
+			     0);
+
+	/* copy the returned blob to caller */
+	memcpy(blob, tb->data + TPM_DATA_OFFSET, storedsize);
+	*bloblen = storedsize;
+	return ret;
+}
+
+/*
+ * use the AUTH2_COMMAND form of unseal, to authorize both key and blob
+ */
+static int tpm_unseal(struct tpm_buf *tb,
+		      const uint32_t keyhandle, const unsigned char *keyauth,
+		      const unsigned char *blob, const int bloblen,
+		      const unsigned char *blobauth,
+		      unsigned char *data, unsigned int *datalen)
+{
+	unsigned char nonceodd[TPM_NONCE_SIZE];
+	unsigned char enonce1[TPM_NONCE_SIZE];
+	unsigned char enonce2[TPM_NONCE_SIZE];
+	unsigned char authdata1[SHA1_DIGEST_SIZE];
+	unsigned char authdata2[SHA1_DIGEST_SIZE];
+	uint32_t authhandle1 = 0;
+	uint32_t authhandle2 = 0;
+	unsigned char cont = 0;
+	uint32_t ordinal;
+	uint32_t keyhndl;
+	int ret;
+
+	/* sessions for unsealing key and data */
+	ret = oiap(tb, &authhandle1, enonce1);
+	if (ret < 0) {
+		pr_info("trusted_key: oiap failed (%d)\n", ret);
+		return ret;
+	}
+	ret = oiap(tb, &authhandle2, enonce2);
+	if (ret < 0) {
+		pr_info("trusted_key: oiap failed (%d)\n", ret);
+		return ret;
+	}
+
+	ordinal = htonl(TPM_ORD_UNSEAL);
+	keyhndl = htonl(SRKHANDLE);
+	ret = tpm_get_random(tb, nonceodd, TPM_NONCE_SIZE);
+	if (ret < 0) {
+		pr_info("trusted_key: tpm_get_random failed (%d)\n", ret);
+		return ret;
+	}
+	TSS_authhmac(authdata1, keyauth, TPM_NONCE_SIZE,
+		     enonce1, nonceodd, cont, sizeof(uint32_t),
+		     &ordinal, bloblen, blob, 0, 0);
+	TSS_authhmac(authdata2, blobauth, TPM_NONCE_SIZE,
+		     enonce2, nonceodd, cont, sizeof(uint32_t),
+		     &ordinal, bloblen, blob, 0, 0);
+
+	/* build and send TPM request packet */
+	INIT_BUF(tb);
+	store16(tb, TPM_TAG_RQU_AUTH2_COMMAND);
+	store32(tb, TPM_UNSEAL_SIZE + bloblen);
+	store32(tb, TPM_ORD_UNSEAL);
+	store32(tb, keyhandle);
+	storebytes(tb, blob, bloblen);
+	store32(tb, authhandle1);
+	storebytes(tb, nonceodd, TPM_NONCE_SIZE);
+	store8(tb, cont);
+	storebytes(tb, authdata1, SHA1_DIGEST_SIZE);
+	store32(tb, authhandle2);
+	storebytes(tb, nonceodd, TPM_NONCE_SIZE);
+	store8(tb, cont);
+	storebytes(tb, authdata2, SHA1_DIGEST_SIZE);
+
+	ret = trusted_tpm_send(TPM_ANY_NUM, tb->data, MAX_BUF_SIZE);
+	if (ret < 0) {
+		pr_info("trusted_key: authhmac failed (%d)\n", ret);
+		return ret;
+	}
+
+	*datalen = LOAD32(tb->data, TPM_DATA_OFFSET);
+	ret = TSS_checkhmac2(tb->data, ordinal, nonceodd,
+			     keyauth, SHA1_DIGEST_SIZE,
+			     blobauth, SHA1_DIGEST_SIZE,
+			     sizeof(uint32_t), TPM_DATA_OFFSET,
+			     *datalen, TPM_DATA_OFFSET + sizeof(uint32_t), 0,
+			     0);
+	if (ret < 0)
+		pr_info("trusted_key: TSS_checkhmac2 failed (%d)\n", ret);
+	memcpy(data, tb->data + TPM_DATA_OFFSET + sizeof(uint32_t), *datalen);
+	return ret;
+}
+
+/*
+ * Have the TPM seal(encrypt) the symmetric key
+ */
+static int key_seal(struct trusted_key_payload *p,
+		    struct trusted_key_options *o)
+{
+	struct tpm_buf *tb;
+	int ret;
+
+	tb = kzalloc(sizeof *tb, GFP_KERNEL);
+	if (!tb)
+		return -ENOMEM;
+
+	/* include migratable flag at end of sealed key */
+	p->key[p->key_len] = p->migratable;
+
+	ret = tpm_seal(tb, o->keytype, o->keyhandle, o->keyauth,
+		       p->key, p->key_len + 1, p->blob, &p->blob_len,
+		       o->blobauth, o->pcrinfo, o->pcrinfo_len);
+	if (ret < 0)
+		pr_info("trusted_key: srkseal failed (%d)\n", ret);
+
+	kfree(tb);
+	return ret;
+}
+
+/*
+ * Have the TPM unseal(decrypt) the symmetric key
+ */
+static int key_unseal(struct trusted_key_payload *p,
+		      struct trusted_key_options *o)
+{
+	struct tpm_buf *tb;
+	int ret;
+
+	tb = kzalloc(sizeof *tb, GFP_KERNEL);
+	if (!tb)
+		return -ENOMEM;
+
+	ret = tpm_unseal(tb, o->keyhandle, o->keyauth, p->blob, p->blob_len,
+			 o->blobauth, p->key, &p->key_len);
+	/* pull migratable flag out of sealed key */
+	p->migratable = p->key[--p->key_len];
+
+	if (ret < 0)
+		pr_info("trusted_key: srkunseal failed (%d)\n", ret);
+
+	kfree(tb);
+	return ret;
+}
+
+enum {
+	Opt_err = -1,
+	Opt_new, Opt_load, Opt_update,
+	Opt_keyhandle, Opt_keyauth, Opt_blobauth,
+	Opt_pcrinfo, Opt_pcrlock, Opt_migratable
+};
+
+static const match_table_t key_tokens = {
+	{Opt_new, "new"},
+	{Opt_load, "load"},
+	{Opt_update, "update"},
+	{Opt_keyhandle, "keyhandle=%s"},
+	{Opt_keyauth, "keyauth=%s"},
+	{Opt_blobauth, "blobauth=%s"},
+	{Opt_pcrinfo, "pcrinfo=%s"},
+	{Opt_pcrlock, "pcrlock=%s"},
+	{Opt_migratable, "migratable=%s"},
+	{Opt_err, NULL}
+};
+
+/* can have zero or more token= options */
+static int getoptions(char *c, struct trusted_key_payload *pay,
+		      struct trusted_key_options *opt)
+{
+	substring_t args[MAX_OPT_ARGS];
+	char *p = c;
+	int token;
+	int res;
+	unsigned long handle;
+	unsigned long lock;
+
+	while ((p = strsep(&c, " \t"))) {
+		if (*p == '\0' || *p == ' ' || *p == '\t')
+			continue;
+		token = match_token(p, key_tokens, args);
+
+		switch (token) {
+		case Opt_pcrinfo:
+			opt->pcrinfo_len = strlen(args[0].from) / 2;
+			if (opt->pcrinfo_len > MAX_PCRINFO_SIZE)
+				return -EINVAL;
+			hex2bin(opt->pcrinfo, args[0].from, opt->pcrinfo_len);
+			break;
+		case Opt_keyhandle:
+			res = strict_strtoul(args[0].from, 16, &handle);
+			if (res < 0)
+				return -EINVAL;
+			opt->keytype = SEAL_keytype;
+			opt->keyhandle = handle;
+			break;
+		case Opt_keyauth:
+			if (strlen(args[0].from) != 2 * SHA1_DIGEST_SIZE)
+				return -EINVAL;
+			hex2bin(opt->keyauth, args[0].from, SHA1_DIGEST_SIZE);
+			break;
+		case Opt_blobauth:
+			if (strlen(args[0].from) != 2 * SHA1_DIGEST_SIZE)
+				return -EINVAL;
+			hex2bin(opt->blobauth, args[0].from, SHA1_DIGEST_SIZE);
+			break;
+		case Opt_migratable:
+			if (*args[0].from == '0')
+				pay->migratable = 0;
+			else
+				return -EINVAL;
+			break;
+		case Opt_pcrlock:
+			res = strict_strtoul(args[0].from, 10, &lock);
+			if (res < 0)
+				return -EINVAL;
+			opt->pcrlock = lock;
+			break;
+		default:
+			return -EINVAL;
+		}
+	}
+	return 0;
+}
+
+/*
+ * datablob_parse - parse the keyctl data and fill in the
+ * 		    payload and options structures
+ *
+ * On success returns 0, otherwise -EINVAL.
+ */
+static int datablob_parse(char *datablob, struct trusted_key_payload *p,
+			  struct trusted_key_options *o)
+{
+	substring_t args[MAX_OPT_ARGS];
+	long keylen;
+	int ret = -EINVAL;
+	int key_cmd;
+	char *c;
+
+	/* main command */
+	c = strsep(&datablob, " \t");
+	if (!c)
+		return -EINVAL;
+	key_cmd = match_token(c, key_tokens, args);
+	switch (key_cmd) {
+	case Opt_new:
+		/* first argument is key size */
+		c = strsep(&datablob, " \t");
+		if (!c)
+			return -EINVAL;
+		ret = strict_strtol(c, 10, &keylen);
+		if (ret < 0 || keylen < MIN_KEY_SIZE || keylen > MAX_KEY_SIZE)
+			return -EINVAL;
+		p->key_len = keylen;
+		ret = getoptions(datablob, p, o);
+		if (ret < 0)
+			return ret;
+		ret = Opt_new;
+		break;
+	case Opt_load:
+		/* first argument is sealed blob */
+		c = strsep(&datablob, " \t");
+		if (!c)
+			return -EINVAL;
+		p->blob_len = strlen(c) / 2;
+		if (p->blob_len > MAX_BLOB_SIZE)
+			return -EINVAL;
+		hex2bin(p->blob, c, p->blob_len);
+		ret = getoptions(datablob, p, o);
+		if (ret < 0)
+			return ret;
+		ret = Opt_load;
+		break;
+	case Opt_update:
+		/* all arguments are options */
+		ret = getoptions(datablob, p, o);
+		if (ret < 0)
+			return ret;
+		ret = Opt_update;
+		break;
+	case Opt_err:
+		return -EINVAL;
+		break;
+	}
+	return ret;
+}
+
+static struct trusted_key_options *trusted_options_alloc(void)
+{
+	struct trusted_key_options *options;
+
+	options = kzalloc(sizeof *options, GFP_KERNEL);
+	if (!options)
+		return options;
+
+	/* set any non-zero defaults */
+	options->keytype = SRK_keytype;
+	options->keyhandle = SRKHANDLE;
+	return options;
+}
+
+static struct trusted_key_payload *trusted_payload_alloc(struct key *key)
+{
+	struct trusted_key_payload *p = NULL;
+	int ret;
+
+	ret = key_payload_reserve(key, sizeof *p);
+	if (ret < 0)
+		return p;
+	p = kzalloc(sizeof *p, GFP_KERNEL);
+
+	/* migratable by default */
+	p->migratable = 1;
+	return p;
+}
+
+/*
+ * trusted_instantiate - create a new trusted key
+ *
+ * Unseal an existing trusted blob or, for a new key, get a
+ * random key, then seal and create a trusted key-type key,
+ * adding it to the specified keyring.
+ *
+ * On success, return 0. Otherwise return errno.
+ */
+static int trusted_instantiate(struct key *key, const void *data,
+			       const size_t datalen)
+{
+	struct trusted_key_payload *payload = NULL;
+	struct trusted_key_options *options = NULL;
+	char *datablob;
+	int ret = 0;
+	int key_cmd;
+
+	if (datalen <= 0 || datalen > 32767 || !data)
+		return -EINVAL;
+
+	datablob = kmalloc(datalen + 1, GFP_KERNEL);
+	if (!datablob)
+		return -ENOMEM;
+	memcpy(datablob, data, datalen);
+	datablob[datalen] = '\0';
+
+	options = trusted_options_alloc();
+	if (!options) {
+		ret = -ENOMEM;
+		goto out;
+	}
+	payload = trusted_payload_alloc(key);
+	if (!payload) {
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	key_cmd = datablob_parse(datablob, payload, options);
+	if (key_cmd < 0) {
+		ret = key_cmd;
+		goto out;
+	}
+
+	dump_payload(payload);
+	dump_options(options);
+
+	switch (key_cmd) {
+	case Opt_load:
+		ret = key_unseal(payload, options);
+		dump_payload(payload);
+		dump_options(options);
+		if (ret < 0)
+			pr_info("trusted_key: key_unseal failed (%d)\n", ret);
+		break;
+	case Opt_new:
+		ret = my_get_random(payload->key, payload->key_len);
+		if (ret < 0) {
+			pr_info("trusted_key: key_create failed (%d)\n", ret);
+			goto out;
+		}
+		ret = key_seal(payload, options);
+		if (ret < 0)
+			pr_info("trusted_key: key_seal failed (%d)\n", ret);
+		break;
+	default:
+		ret = -EINVAL;
+		goto out;
+	}
+	if (!ret && options->pcrlock)
+		ret = pcrlock(options->pcrlock);
+out:
+	kfree(datablob);
+	kfree(options);
+	if (!ret)
+		rcu_assign_pointer(key->payload.data, payload);
+	else
+		kfree(payload);
+	return ret;
+}
+
+static void trusted_rcu_free(struct rcu_head *rcu)
+{
+	struct trusted_key_payload *p;
+
+	p = container_of(rcu, struct trusted_key_payload, rcu);
+	memset(p->key, 0, p->key_len);
+	kfree(p);
+}
+
+/*
+ * trusted_update - reseal an existing key with new PCR values
+ */
+static int trusted_update(struct key *key, const void *data,
+			  const size_t datalen)
+{
+	struct trusted_key_payload *p = key->payload.data;
+	struct trusted_key_payload *new_p;
+	struct trusted_key_options *new_o;
+	char *datablob;
+	int ret = 0;
+
+	if (!p->migratable)
+		return -EPERM;
+	if (datalen <= 0 || datalen > 32767 || !data)
+		return -EINVAL;
+
+	datablob = kmalloc(datalen + 1, GFP_KERNEL);
+	if (!datablob)
+		return -ENOMEM;
+	new_o = trusted_options_alloc();
+	if (!new_o) {
+		ret = -ENOMEM;
+		goto out;
+	}
+	new_p = trusted_payload_alloc(key);
+	if (!new_p) {
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	memcpy(datablob, data, datalen);
+	datablob[datalen] = '\0';
+	ret = datablob_parse(datablob, new_p, new_o);
+	if (ret != Opt_update) {
+		ret = -EINVAL;
+		goto out;
+	}
+	/* copy old key values, and reseal with new pcrs */
+	new_p->migratable = p->migratable;
+	new_p->key_len = p->key_len;
+	memcpy(new_p->key, p->key, p->key_len);
+	dump_payload(p);
+	dump_payload(new_p);
+
+	ret = key_seal(new_p, new_o);
+	if (ret < 0) {
+		pr_info("trusted_key: key_seal failed (%d)\n", ret);
+		kfree(new_p);
+		goto out;
+	}
+	if (new_o->pcrlock) {
+		ret = pcrlock(new_o->pcrlock);
+		if (ret < 0) {
+			pr_info("trusted_key: pcrlock failed (%d)\n", ret);
+			kfree(new_p);
+			goto out;
+		}
+	}
+	rcu_assign_pointer(key->payload.data, new_p);
+	call_rcu(&p->rcu, trusted_rcu_free);
+out:
+	kfree(datablob);
+	kfree(new_o);
+	return ret;
+}
+
+/*
+ * trusted_read - copy the sealed blob data to userspace in hex.
+ * On success, return to userspace the trusted key datablob size.
+ */
+static long trusted_read(const struct key *key, char __user *buffer,
+			 size_t buflen)
+{
+	struct trusted_key_payload *p;
+	char *ascii_buf;
+	char *bufp;
+	int i;
+
+	p = rcu_dereference_protected(key->payload.data,
+			rwsem_is_locked(&((struct key *)key)->sem));
+	if (!p)
+		return -EINVAL;
+	if (!buffer || buflen <= 0)
+		return 2 * p->blob_len;
+	ascii_buf = kmalloc(2 * p->blob_len, GFP_KERNEL);
+	if (!ascii_buf)
+		return -ENOMEM;
+
+	bufp = ascii_buf;
+	for (i = 0; i < p->blob_len; i++)
+		bufp = pack_hex_byte(bufp, p->blob[i]);
+	if ((copy_to_user(buffer, ascii_buf, 2 * p->blob_len)) != 0) {
+		kfree(ascii_buf);
+		return -EFAULT;
+	}
+	kfree(ascii_buf);
+	return 2 * p->blob_len;
+}
+
+/*
+ * trusted_destroy - before freeing the key, clear the decrypted data
+ */
+static void trusted_destroy(struct key *key)
+{
+	struct trusted_key_payload *p = key->payload.data;
+
+	if (!p)
+		return;
+	memset(p->key, 0, p->key_len);
+	kfree(key->payload.data);
+}
+
+struct key_type key_type_trusted = {
+	.name = "trusted",
+	.instantiate = trusted_instantiate,
+	.update = trusted_update,
+	.match = user_match,
+	.destroy = trusted_destroy,
+	.describe = user_describe,
+	.read = trusted_read,
+};
+
+EXPORT_SYMBOL_GPL(key_type_trusted);
+
+static void trusted_shash_release(void)
+{
+	if (hashalg)
+		crypto_free_shash(hashalg);
+	if (hmacalg)
+		crypto_free_shash(hmacalg);
+}
+
+static int __init trusted_shash_alloc(void)
+{
+	int ret;
+
+	hmacalg = crypto_alloc_shash(hmac_alg, 0, CRYPTO_ALG_ASYNC);
+	if (IS_ERR(hmacalg)) {
+		pr_info("trusted_key: could not allocate crypto %s\n",
+			hmac_alg);
+		return PTR_ERR(hmacalg);
+	}
+
+	hashalg = crypto_alloc_shash(hash_alg, 0, CRYPTO_ALG_ASYNC);
+	if (IS_ERR(hashalg)) {
+		pr_info("trusted_key: could not allocate crypto %s\n",
+			hash_alg);
+		ret = PTR_ERR(hashalg);
+		goto hashalg_fail;
+	}
+
+	return 0;
+
+hashalg_fail:
+	crypto_free_shash(hmacalg);
+	return ret;
+}
+
+static int __init init_trusted(void)
+{
+	int ret;
+
+	ret = trusted_shash_alloc();
+	if (ret < 0)
+		return ret;
+	ret = register_key_type(&key_type_trusted);
+	if (ret < 0)
+		trusted_shash_release();
+	return ret;
+}
+
+static void __exit cleanup_trusted(void)
+{
+	trusted_shash_release();
+	unregister_key_type(&key_type_trusted);
+}
+
+late_initcall(init_trusted);
+module_exit(cleanup_trusted);
+
+MODULE_LICENSE("GPL");
diff --git a/security/keys/trusted_defined.h b/security/keys/trusted_defined.h
new file mode 100644
index 000000000000..3249fbd2b653
--- /dev/null
+++ b/security/keys/trusted_defined.h
@@ -0,0 +1,134 @@
+#ifndef __TRUSTED_KEY_H
+#define __TRUSTED_KEY_H
+
+/* implementation specific TPM constants */
+#define MAX_PCRINFO_SIZE		64
+#define MAX_BUF_SIZE			512
+#define TPM_GETRANDOM_SIZE		14
+#define TPM_OSAP_SIZE			36
+#define TPM_OIAP_SIZE			10
+#define TPM_SEAL_SIZE			87
+#define TPM_UNSEAL_SIZE			104
+#define TPM_SIZE_OFFSET			2
+#define TPM_RETURN_OFFSET		6
+#define TPM_DATA_OFFSET			10
+
+#define LOAD32(buffer, offset)	(ntohl(*(uint32_t *)&buffer[offset]))
+#define LOAD32N(buffer, offset)	(*(uint32_t *)&buffer[offset])
+#define LOAD16(buffer, offset)	(ntohs(*(uint16_t *)&buffer[offset]))
+
+struct tpm_buf {
+	int len;
+	unsigned char data[MAX_BUF_SIZE];
+};
+
+#define INIT_BUF(tb) (tb->len = 0)
+
+struct osapsess {
+	uint32_t handle;
+	unsigned char secret[SHA1_DIGEST_SIZE];
+	unsigned char enonce[TPM_NONCE_SIZE];
+};
+
+/* discrete values, but have to store in uint16_t for TPM use */
+enum {
+	SEAL_keytype = 1,
+	SRK_keytype = 4
+};
+
+struct trusted_key_options {
+	uint16_t keytype;
+	uint32_t keyhandle;
+	unsigned char keyauth[SHA1_DIGEST_SIZE];
+	unsigned char blobauth[SHA1_DIGEST_SIZE];
+	uint32_t pcrinfo_len;
+	unsigned char pcrinfo[MAX_PCRINFO_SIZE];
+	int pcrlock;
+};
+
+#define TPM_DEBUG 0
+
+#if TPM_DEBUG
+static inline void dump_options(struct trusted_key_options *o)
+{
+	pr_info("trusted_key: sealing key type %d\n", o->keytype);
+	pr_info("trusted_key: sealing key handle %0X\n", o->keyhandle);
+	pr_info("trusted_key: pcrlock %d\n", o->pcrlock);
+	pr_info("trusted_key: pcrinfo %d\n", o->pcrinfo_len);
+	print_hex_dump(KERN_INFO, "pcrinfo ", DUMP_PREFIX_NONE,
+		       16, 1, o->pcrinfo, o->pcrinfo_len, 0);
+}
+
+static inline void dump_payload(struct trusted_key_payload *p)
+{
+	pr_info("trusted_key: key_len %d\n", p->key_len);
+	print_hex_dump(KERN_INFO, "key ", DUMP_PREFIX_NONE,
+		       16, 1, p->key, p->key_len, 0);
+	pr_info("trusted_key: bloblen %d\n", p->blob_len);
+	print_hex_dump(KERN_INFO, "blob ", DUMP_PREFIX_NONE,
+		       16, 1, p->blob, p->blob_len, 0);
+	pr_info("trusted_key: migratable %d\n", p->migratable);
+}
+
+static inline void dump_sess(struct osapsess *s)
+{
+	print_hex_dump(KERN_INFO, "trusted-key: handle ", DUMP_PREFIX_NONE,
+		       16, 1, &s->handle, 4, 0);
+	pr_info("trusted-key: secret:\n");
+	print_hex_dump(KERN_INFO, "", DUMP_PREFIX_NONE,
+		       16, 1, &s->secret, SHA1_DIGEST_SIZE, 0);
+	pr_info("trusted-key: enonce:\n");
+	print_hex_dump(KERN_INFO, "", DUMP_PREFIX_NONE,
+		       16, 1, &s->enonce, SHA1_DIGEST_SIZE, 0);
+}
+
+static inline void dump_tpm_buf(unsigned char *buf)
+{
+	int len;
+
+	pr_info("\ntrusted-key: tpm buffer\n");
+	len = LOAD32(buf, TPM_SIZE_OFFSET);
+	print_hex_dump(KERN_INFO, "", DUMP_PREFIX_NONE, 16, 1, buf, len, 0);
+}
+#else
+static inline void dump_options(struct trusted_key_options *o)
+{
+}
+
+static inline void dump_payload(struct trusted_key_payload *p)
+{
+}
+
+static inline void dump_sess(struct osapsess *s)
+{
+}
+
+static inline void dump_tpm_buf(unsigned char *buf)
+{
+}
+#endif
+
+static inline void store8(struct tpm_buf *buf, const unsigned char value)
+{
+	buf->data[buf->len++] = value;
+}
+
+static inline void store16(struct tpm_buf *buf, const uint16_t value)
+{
+	*(uint16_t *) & buf->data[buf->len] = htons(value);
+	buf->len += sizeof value;
+}
+
+static inline void store32(struct tpm_buf *buf, const uint32_t value)
+{
+	*(uint32_t *) & buf->data[buf->len] = htonl(value);
+	buf->len += sizeof value;
+}
+
+static inline void storebytes(struct tpm_buf *buf, const unsigned char *in,
+			      const int len)
+{
+	memcpy(buf->data + buf->len, in, len);
+	buf->len += len;
+}
+#endif
-- 
cgit v1.2.3-71-gd317


From bf26414510103448ad3dc069c7422462f03ea3d7 Mon Sep 17 00:00:00 2001
From: Tom Herbert <therbert@google.com>
Date: Fri, 26 Nov 2010 08:36:09 +0000
Subject: xps: Add CONFIG_XPS

This patch adds XPS_CONFIG option to enable and disable XPS.  This is
done in the same manner as RPS_CONFIG.  This is also fixes build
failure in XPS code when SMP is not enabled.

Signed-off-by: Tom Herbert <therbert@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 52 +++++++++++++++++++++++++----------------------
 net/Kconfig               |  5 +++++
 net/core/dev.c            |  9 +++++---
 net/core/net-sysfs.c      | 47 ++++++++++++++++++++++++++++++------------
 net/core/net-sysfs.h      |  3 ---
 5 files changed, 73 insertions(+), 43 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 7c6ae2f4b9ab..9ae4544f0cf0 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -535,30 +535,6 @@ struct rps_map {
 };
 #define RPS_MAP_SIZE(_num) (sizeof(struct rps_map) + (_num * sizeof(u16)))
 
-/*
- * This structure holds an XPS map which can be of variable length.  The
- * map is an array of queues.
- */
-struct xps_map {
-	unsigned int len;
-	unsigned int alloc_len;
-	struct rcu_head rcu;
-	u16 queues[0];
-};
-#define XPS_MAP_SIZE(_num) (sizeof(struct xps_map) + (_num * sizeof(u16)))
-#define XPS_MIN_MAP_ALLOC ((L1_CACHE_BYTES - sizeof(struct xps_map))	\
-    / sizeof(u16))
-
-/*
- * This structure holds all XPS maps for device.  Maps are indexed by CPU.
- */
-struct xps_dev_maps {
-	struct rcu_head rcu;
-	struct xps_map *cpu_map[0];
-};
-#define XPS_DEV_MAPS_SIZE (sizeof(struct xps_dev_maps) +		\
-    (nr_cpu_ids * sizeof(struct xps_map *)))
-
 /*
  * The rps_dev_flow structure contains the mapping of a flow to a CPU and the
  * tail pointer for that CPU's input queue at the time of last enqueue.
@@ -626,6 +602,32 @@ struct netdev_rx_queue {
 } ____cacheline_aligned_in_smp;
 #endif /* CONFIG_RPS */
 
+#ifdef CONFIG_XPS
+/*
+ * This structure holds an XPS map which can be of variable length.  The
+ * map is an array of queues.
+ */
+struct xps_map {
+	unsigned int len;
+	unsigned int alloc_len;
+	struct rcu_head rcu;
+	u16 queues[0];
+};
+#define XPS_MAP_SIZE(_num) (sizeof(struct xps_map) + (_num * sizeof(u16)))
+#define XPS_MIN_MAP_ALLOC ((L1_CACHE_BYTES - sizeof(struct xps_map))	\
+    / sizeof(u16))
+
+/*
+ * This structure holds all XPS maps for device.  Maps are indexed by CPU.
+ */
+struct xps_dev_maps {
+	struct rcu_head rcu;
+	struct xps_map *cpu_map[0];
+};
+#define XPS_DEV_MAPS_SIZE (sizeof(struct xps_dev_maps) +		\
+    (nr_cpu_ids * sizeof(struct xps_map *)))
+#endif /* CONFIG_XPS */
+
 /*
  * This structure defines the management hooks for network devices.
  * The following hooks can be defined; unless noted otherwise, they are
@@ -1046,7 +1048,9 @@ struct net_device {
 	unsigned long		tx_queue_len;	/* Max frames per queue allowed */
 	spinlock_t		tx_global_lock;
 
+#ifdef CONFIG_XPS
 	struct xps_dev_maps	*xps_maps;
+#endif
 
 	/* These may be needed for future network-power-down code. */
 
diff --git a/net/Kconfig b/net/Kconfig
index 55fd82e9ffd9..126c2af0fc1f 100644
--- a/net/Kconfig
+++ b/net/Kconfig
@@ -220,6 +220,11 @@ config RPS
 	depends on SMP && SYSFS && USE_GENERIC_SMP_HELPERS
 	default y
 
+config XPS
+	boolean
+	depends on SMP && SYSFS && USE_GENERIC_SMP_HELPERS
+	default y
+
 menu "Network testing"
 
 config NET_PKTGEN
diff --git a/net/core/dev.c b/net/core/dev.c
index c852f0038a08..3259d2c323a6 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1567,6 +1567,9 @@ int netif_set_real_num_tx_queues(struct net_device *dev, unsigned int txq)
 
 		rc = netdev_queue_update_kobjects(dev, dev->real_num_tx_queues,
 						  txq);
+		if (rc)
+			return rc;
+
 		if (txq < dev->real_num_tx_queues)
 			qdisc_reset_all_tx_gt(dev, txq);
 	}
@@ -2148,7 +2151,7 @@ static inline u16 dev_cap_txqueue(struct net_device *dev, u16 queue_index)
 
 static inline int get_xps_queue(struct net_device *dev, struct sk_buff *skb)
 {
-#ifdef CONFIG_RPS
+#ifdef CONFIG_XPS
 	struct xps_dev_maps *dev_maps;
 	struct xps_map *map;
 	int queue_index = -1;
@@ -5085,9 +5088,9 @@ void netif_stacked_transfer_operstate(const struct net_device *rootdev,
 }
 EXPORT_SYMBOL(netif_stacked_transfer_operstate);
 
+#ifdef CONFIG_RPS
 static int netif_alloc_rx_queues(struct net_device *dev)
 {
-#ifdef CONFIG_RPS
 	unsigned int i, count = dev->num_rx_queues;
 	struct netdev_rx_queue *rx;
 
@@ -5102,9 +5105,9 @@ static int netif_alloc_rx_queues(struct net_device *dev)
 
 	for (i = 0; i < count; i++)
 		rx[i].dev = dev;
-#endif
 	return 0;
 }
+#endif
 
 static int netif_alloc_netdev_queues(struct net_device *dev)
 {
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index 68dbbfdee274..99c11294623f 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -751,10 +751,12 @@ static int rx_queue_add_kobject(struct net_device *net, int index)
 
 	return error;
 }
+#endif /* CONFIG_RPS */
 
 int
 net_rx_queue_update_kobjects(struct net_device *net, int old_num, int new_num)
 {
+#ifdef CONFIG_RPS
 	int i;
 	int error = 0;
 
@@ -770,8 +772,12 @@ net_rx_queue_update_kobjects(struct net_device *net, int old_num, int new_num)
 		kobject_put(&net->_rx[i].kobj);
 
 	return error;
+#else
+	return 0;
+#endif
 }
 
+#ifdef CONFIG_XPS
 /*
  * netdev_queue sysfs structures and functions.
  */
@@ -1090,10 +1096,12 @@ static int netdev_queue_add_kobject(struct net_device *net, int index)
 
 	return error;
 }
+#endif /* CONFIG_XPS */
 
 int
 netdev_queue_update_kobjects(struct net_device *net, int old_num, int new_num)
 {
+#ifdef CONFIG_XPS
 	int i;
 	int error = 0;
 
@@ -1109,27 +1117,36 @@ netdev_queue_update_kobjects(struct net_device *net, int old_num, int new_num)
 		kobject_put(&net->_tx[i].kobj);
 
 	return error;
+#else
+	return 0;
+#endif
 }
 
 static int register_queue_kobjects(struct net_device *net)
 {
-	int error = 0, txq = 0, rxq = 0;
+	int error = 0, txq = 0, rxq = 0, real_rx = 0, real_tx = 0;
 
+#if defined(CONFIG_RPS) || defined(CONFIG_XPS)
 	net->queues_kset = kset_create_and_add("queues",
 	    NULL, &net->dev.kobj);
 	if (!net->queues_kset)
 		return -ENOMEM;
+#endif
+
+#ifdef CONFIG_RPS
+	real_rx = net->real_num_rx_queues;
+#endif
+	real_tx = net->real_num_tx_queues;
 
-	error = net_rx_queue_update_kobjects(net, 0, net->real_num_rx_queues);
+	error = net_rx_queue_update_kobjects(net, 0, real_rx);
 	if (error)
 		goto error;
-	rxq = net->real_num_rx_queues;
+	rxq = real_rx;
 
-	error = netdev_queue_update_kobjects(net, 0,
-					     net->real_num_tx_queues);
+	error = netdev_queue_update_kobjects(net, 0, real_tx);
 	if (error)
 		goto error;
-	txq = net->real_num_tx_queues;
+	txq = real_tx;
 
 	return 0;
 
@@ -1141,11 +1158,19 @@ error:
 
 static void remove_queue_kobjects(struct net_device *net)
 {
-	net_rx_queue_update_kobjects(net, net->real_num_rx_queues, 0);
-	netdev_queue_update_kobjects(net, net->real_num_tx_queues, 0);
+	int real_rx = 0, real_tx = 0;
+
+#ifdef CONFIG_RPS
+	real_rx = net->real_num_rx_queues;
+#endif
+	real_tx = net->real_num_tx_queues;
+
+	net_rx_queue_update_kobjects(net, real_rx, 0);
+	netdev_queue_update_kobjects(net, real_tx, 0);
+#if defined(CONFIG_RPS) || defined(CONFIG_XPS)
 	kset_unregister(net->queues_kset);
+#endif
 }
-#endif /* CONFIG_RPS */
 
 static const void *net_current_ns(void)
 {
@@ -1244,9 +1269,7 @@ void netdev_unregister_kobject(struct net_device * net)
 
 	kobject_get(&dev->kobj);
 
-#ifdef CONFIG_RPS
 	remove_queue_kobjects(net);
-#endif
 
 	device_del(dev);
 }
@@ -1285,13 +1308,11 @@ int netdev_register_kobject(struct net_device *net)
 	if (error)
 		return error;
 
-#ifdef CONFIG_RPS
 	error = register_queue_kobjects(net);
 	if (error) {
 		device_del(dev);
 		return error;
 	}
-#endif
 
 	return error;
 }
diff --git a/net/core/net-sysfs.h b/net/core/net-sysfs.h
index 25ec2ee57df7..bd7751ec1c4d 100644
--- a/net/core/net-sysfs.h
+++ b/net/core/net-sysfs.h
@@ -4,11 +4,8 @@
 int netdev_kobject_init(void);
 int netdev_register_kobject(struct net_device *);
 void netdev_unregister_kobject(struct net_device *);
-#ifdef CONFIG_RPS
 int net_rx_queue_update_kobjects(struct net_device *, int old_num, int new_num);
 int netdev_queue_update_kobjects(struct net_device *net,
 				 int old_num, int new_num);
 
 #endif
-
-#endif
-- 
cgit v1.2.3-71-gd317


From 1ae0affedce1d3e401991fbe7f2674753f0a7641 Mon Sep 17 00:00:00 2001
From: Simon Horman <horms@verge.net.au>
Date: Fri, 26 Nov 2010 23:02:58 +0000
Subject: mmc, sh: Correct value for reset

This resolves a regression that I introduced in
"mmc, sh: Move constants to sh_mmcif.h". Having
examined the manual and tested the code on an AP4EVB board
it seems that the correct sequence is.

1) Write 1 to bit 31 and zeros to all other bits
2) Write zero to all bits

Cc: Yusuke Goda <yusuke.goda.sx@renesas.com>
Cc: Magnus Damm <magnus.damm@gmail.com>
Signed-off-by: Simon Horman <horms@verge.net.au>
Signed-off-by: Paul Mundt <lethal@linux-sh.org>
---
 include/linux/mmc/sh_mmcif.h | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mmc/sh_mmcif.h b/include/linux/mmc/sh_mmcif.h
index a6bfa5296495..342ec1a38684 100644
--- a/include/linux/mmc/sh_mmcif.h
+++ b/include/linux/mmc/sh_mmcif.h
@@ -80,7 +80,7 @@ struct sh_mmcif_plat_data {
 
 /* CE_VERSION */
 #define SOFT_RST_ON		(1 << 31)
-#define SOFT_RST_OFF		~SOFT_RST_ON
+#define SOFT_RST_OFF		0
 
 static inline u32 sh_mmcif_readl(void __iomem *addr, int reg)
 {
@@ -168,12 +168,9 @@ static inline int sh_mmcif_boot_do_read(void __iomem *base,
 
 static inline void sh_mmcif_boot_init(void __iomem *base)
 {
-	unsigned long tmp;
-
 	/* reset */
-	tmp = sh_mmcif_readl(base, MMCIF_CE_VERSION);
-	sh_mmcif_writel(base, MMCIF_CE_VERSION, tmp | SOFT_RST_ON);
-	sh_mmcif_writel(base, MMCIF_CE_VERSION, tmp & SOFT_RST_OFF);
+	sh_mmcif_writel(base, MMCIF_CE_VERSION, SOFT_RST_ON);
+	sh_mmcif_writel(base, MMCIF_CE_VERSION, SOFT_RST_OFF);
 
 	/* byte swap */
 	sh_mmcif_writel(base, MMCIF_CE_BUF_ACC, BUF_ACC_ATYP);
-- 
cgit v1.2.3-71-gd317


From 22efa0fee32d9e7f6f6fbc396a872b5708d86048 Mon Sep 17 00:00:00 2001
From: Simon Horman <horms@verge.net.au>
Date: Sat, 27 Nov 2010 00:11:55 +0000
Subject: sh, mmc: Use defines when setting CE_CLK_CTRL

The 16-19th bits of CE_CLK_CTRL set the
MMC clock frequency.

Cc: Yusuke Goda <yusuke.goda.sx@renesas.com>
Cc: Magnus Damm <magnus.damm@gmail.com>
Signed-off-by: Simon Horman <horms@verge.net.au>
Signed-off-by: Paul Mundt <lethal@linux-sh.org>
---
 include/linux/mmc/sh_mmcif.h | 17 +++++++++--------
 1 file changed, 9 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mmc/sh_mmcif.h b/include/linux/mmc/sh_mmcif.h
index 342ec1a38684..ffabf8c0a531 100644
--- a/include/linux/mmc/sh_mmcif.h
+++ b/include/linux/mmc/sh_mmcif.h
@@ -70,6 +70,9 @@ struct sh_mmcif_plat_data {
 #define CLK_ENABLE		(1 << 24) /* 1: output mmc clock */
 #define CLK_CLEAR		((1 << 19) | (1 << 18) | (1 << 17) | (1 << 16))
 #define CLK_SUP_PCLK		((1 << 19) | (1 << 18) | (1 << 17) | (1 << 16))
+#define CLKDIV_4		(1<<16) /* mmc clock frequency.
+					 * n: bus clock/(2^(n+1)) */
+#define CLKDIV_256		(7<<16) /* mmc clock frequency. (see above) */
 #define SRSPTO_256		((1 << 13) | (0 << 12)) /* resp timeout */
 #define SRBSYTO_29		((1 << 11) | (1 << 10) |	\
 				 (1 << 9) | (1 << 8)) /* resp busy timeout */
@@ -178,14 +181,10 @@ static inline void sh_mmcif_boot_init(void __iomem *base)
 	/* Set block size in MMCIF hardware */
 	sh_mmcif_writel(base, MMCIF_CE_BLOCK_SET, SH_MMCIF_BBS);
 
-	/* Enable the clock, set it to Bus clock/256 (about 325Khz).
-	 * It is unclear where 0x70000 comes from or if it is even needed.
-	 * It is there for byte-compatibility with code that is known to
-	 * work.
-	 */
+	/* Enable the clock, set it to Bus clock/256 (about 325Khz). */
 	sh_mmcif_writel(base, MMCIF_CE_CLK_CTRL,
-			CLK_ENABLE | SRSPTO_256 | SRBSYTO_29 | SRWDTO_29 |
-			SCCSTO_29 | 0x70000);
+			CLK_ENABLE | CLKDIV_256 | SRSPTO_256 |
+			SRBSYTO_29 | SRWDTO_29 | SCCSTO_29);
 
 	/* CMD0 */
 	sh_mmcif_boot_cmd(base, 0x00000040, 0);
@@ -210,7 +209,9 @@ static inline void sh_mmcif_boot_slurp(void __iomem *base,
 	unsigned long tmp;
 
 	/* In data transfer mode: Set clock to Bus clock/4 (about 20Mhz) */
-	sh_mmcif_writel(base, MMCIF_CE_CLK_CTRL, 0x01012fff);
+	sh_mmcif_writel(base, MMCIF_CE_CLK_CTRL,
+			CLK_ENABLE | CLKDIV_4 | SRSPTO_256 |
+			SRBSYTO_29 | SRWDTO_29 | SCCSTO_29);
 
 	/* CMD9 - Get CSD */
 	sh_mmcif_boot_cmd(base, 0x09806000, 0x00010000);
-- 
cgit v1.2.3-71-gd317


From a41778694806ac1ccd4b1dafed1abef8d5ba98ac Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Sun, 28 Nov 2010 21:43:02 +0000
Subject: xps: add __rcu annotations

Avoid sparse warnings : add __rcu annotations and use
rcu_dereference_protected() where necessary.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Cc: Tom Herbert <therbert@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h |  4 ++--
 net/core/net-sysfs.c      | 24 +++++++++++++++---------
 2 files changed, 17 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 9ae4544f0cf0..4b0c7f3aa32b 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -622,7 +622,7 @@ struct xps_map {
  */
 struct xps_dev_maps {
 	struct rcu_head rcu;
-	struct xps_map *cpu_map[0];
+	struct xps_map __rcu *cpu_map[0];
 };
 #define XPS_DEV_MAPS_SIZE (sizeof(struct xps_dev_maps) +		\
     (nr_cpu_ids * sizeof(struct xps_map *)))
@@ -1049,7 +1049,7 @@ struct net_device {
 	spinlock_t		tx_global_lock;
 
 #ifdef CONFIG_XPS
-	struct xps_dev_maps	*xps_maps;
+	struct xps_dev_maps __rcu *xps_maps;
 #endif
 
 	/* These may be needed for future network-power-down code. */
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index 35ef42fa0cf3..f85cee3d869e 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -899,6 +899,8 @@ static void xps_dev_maps_release(struct rcu_head *rcu)
 }
 
 static DEFINE_MUTEX(xps_map_mutex);
+#define xmap_dereference(P)		\
+	rcu_dereference_protected((P), lockdep_is_held(&xps_map_mutex))
 
 static ssize_t store_xps_map(struct netdev_queue *queue,
 		      struct netdev_queue_attribute *attribute,
@@ -935,11 +937,12 @@ static ssize_t store_xps_map(struct netdev_queue *queue,
 
 	mutex_lock(&xps_map_mutex);
 
-	dev_maps = dev->xps_maps;
+	dev_maps = xmap_dereference(dev->xps_maps);
 
 	for_each_possible_cpu(cpu) {
-		new_map = map = dev_maps ? dev_maps->cpu_map[cpu] : NULL;
-
+		map = dev_maps ?
+			xmap_dereference(dev_maps->cpu_map[cpu]) : NULL;
+		new_map = map;
 		if (map) {
 			for (pos = 0; pos < map->len; pos++)
 				if (map->queues[pos] == index)
@@ -975,13 +978,14 @@ static ssize_t store_xps_map(struct netdev_queue *queue,
 			else
 				new_map = NULL;
 		}
-		new_dev_maps->cpu_map[cpu] = new_map;
+		RCU_INIT_POINTER(new_dev_maps->cpu_map[cpu], new_map);
 	}
 
 	/* Cleanup old maps */
 	for_each_possible_cpu(cpu) {
-		map = dev_maps ? dev_maps->cpu_map[cpu] : NULL;
-		if (map && new_dev_maps->cpu_map[cpu] != map)
+		map = dev_maps ?
+			xmap_dereference(dev_maps->cpu_map[cpu]) : NULL;
+		if (map && xmap_dereference(new_dev_maps->cpu_map[cpu]) != map)
 			call_rcu(&map->rcu, xps_map_release);
 		if (new_dev_maps->cpu_map[cpu])
 			nonempty = 1;
@@ -1007,7 +1011,9 @@ error:
 
 	if (new_dev_maps)
 		for_each_possible_cpu(i)
-			kfree(new_dev_maps->cpu_map[i]);
+			kfree(rcu_dereference_protected(
+				new_dev_maps->cpu_map[i],
+				1));
 	kfree(new_dev_maps);
 	free_cpumask_var(mask);
 	return -ENOMEM;
@@ -1033,11 +1039,11 @@ static void netdev_queue_release(struct kobject *kobj)
 	index = get_netdev_queue_index(queue);
 
 	mutex_lock(&xps_map_mutex);
-	dev_maps = dev->xps_maps;
+	dev_maps = xmap_dereference(dev->xps_maps);
 
 	if (dev_maps) {
 		for_each_possible_cpu(i) {
-			map  = dev_maps->cpu_map[i];
+			map = xmap_dereference(dev_maps->cpu_map[i]);
 			if (!map)
 				continue;
 
-- 
cgit v1.2.3-71-gd317


From 62a8c3a32e4143812ed8e0f3783ef1ea40dc87e4 Mon Sep 17 00:00:00 2001
From: Alan Cox <alan@linux.intel.com>
Date: Wed, 24 Nov 2010 19:33:43 +0000
Subject: Staging: sep: handle the rar definition stuff in the header

SEP isn't the only driver that may need to handle both cases easily

Signed-off-by: Alan Cox <alan@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/staging/sep/sep_driver.c        |  1 +
 drivers/staging/sep/sep_driver_config.h | 61 ---------------------------------
 include/linux/rar_register.h            | 16 +++++++++
 3 files changed, 17 insertions(+), 61 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/staging/sep/sep_driver.c b/drivers/staging/sep/sep_driver.c
index ef36239c7b24..8a1ff861b135 100644
--- a/drivers/staging/sep/sep_driver.c
+++ b/drivers/staging/sep/sep_driver.c
@@ -53,6 +53,7 @@
 #include <asm/cacheflush.h>
 #include <linux/sched.h>
 #include <linux/delay.h>
+#include <linux/rar_register.h>
 
 #include <linux/netlink.h>
 #include <linux/connector.h>
diff --git a/drivers/staging/sep/sep_driver_config.h b/drivers/staging/sep/sep_driver_config.h
index 68688cbc2a92..cfda86f2aaff 100644
--- a/drivers/staging/sep/sep_driver_config.h
+++ b/drivers/staging/sep/sep_driver_config.h
@@ -235,15 +235,6 @@ held by the proccess (struct file) */
 
 /* This stub header is for non Moorestown driver only */
 
-/*
- * Constants that specify different kinds of RAR regions that could be
- * set up.
- */
-static __u32 const RAR_TYPE_VIDEO;  /* 0 */
-static __u32 const RAR_TYPE_AUDIO = 1;
-static __u32 const RAR_TYPE_IMAGE = 2;
-static __u32 const RAR_TYPE_DATA  = 3;
-
 /*
  * @struct RAR_stat
  *
@@ -373,56 +364,4 @@ struct RAR_buffer {
 
 #endif  /* MEMRAR */
 
-/* rar_register */
-#ifndef CONFIG_RAR_REGISTER
-/* This stub header is for non Moorestown driver only */
-
-/* The register_rar function is to used by other device drivers
- * to ensure that this driver is ready. As we cannot be sure of
- * the compile/execute order of dirvers in ther kernel, it is
- * best to give this driver a callback function to call when
- * it is ready to give out addresses. The callback function
- * would have those steps that continue the initialization of
- * a driver that do require a valid RAR address. One of those
- * steps would be to call get_rar_address()
- * This function return 0 on success an -1 on failure.
- */
-#define register_rar(a, b, c) (-ENODEV)
-
-/* The get_rar_address function is used by other device drivers
- * to obtain RAR address information on a RAR. It takes two
- * parameter:
- *
- * int rar_index
- * The rar_index is an index to the rar for which you wish to retrieve
- * the address information.
- * Values can be 0,1, or 2.
- *
- * struct RAR_address_struct is a pointer to a place to which the function
- * can return the address structure for the RAR.
- *
- * The function returns a 0 upon success or a -1 if there is no RAR
- * facility on this system.
- */
-#define rar_get_address(a, b, c) (-ENODEV)
-
-/* The lock_rar function is ued by other device drivers to lock an RAR.
- * once an RAR is locked, it stays locked until the next system reboot.
- * The function takes one parameter:
- *
- * int rar_index
- * The rar_index is an index to the rar that you want to lock.
- * Values can be 0,1, or 2.
- *
- * The function returns a 0 upon success or a -1 if there is no RAR
- * facility on this system.
- */
-#define rar_lock(a) (-1)
-
-#else /* using real RAR_REGISTER */
-
-#include <linux/rar_register.h>
-
-#endif  /* CONFIG_RAR_REGISTER */
-
 #endif /* SEP DRIVER CONFIG */
diff --git a/include/linux/rar_register.h b/include/linux/rar_register.h
index ffa805780f85..5c6118189363 100644
--- a/include/linux/rar_register.h
+++ b/include/linux/rar_register.h
@@ -34,11 +34,27 @@
 
 struct rar_device;
 
+#if defined(CONFIG_RAR_REGISTER)
 int register_rar(int num,
 		int (*callback)(unsigned long data), unsigned long data);
 void unregister_rar(int num);
 int rar_get_address(int rar_index, dma_addr_t *start, dma_addr_t *end);
 int rar_lock(int rar_index);
+#else
+extern void unregister_rar(int num)  { }
+extern int rar_lock(int rar_index) { return -EIO; }
+
+extern inline int register_rar(int num,
+		int (*callback)(unsigned long data), unsigned long data)
+{
+	return -ENODEV;
+}
+
+extern int rar_get_address(int rar_index, dma_addr_t *start, dma_addr_t *end)
+{
+	return -ENODEV;
+}
+#endif	/* RAR_REGISTER */
 
 #endif  /* __KERNEL__ */
 #endif  /* _RAR_REGISTER_H */
-- 
cgit v1.2.3-71-gd317


From f7ca38dfe58c20cb1aa2ed9643187e8b194b5bae Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Thu, 25 Nov 2010 10:02:29 +0100
Subject: nl80211/cfg80211: extend mgmt-tx API for off-channel

With p2p, it is sometimes necessary to transmit
a frame (typically an action frame) on another
channel than the current channel. Enable this
through the CMD_FRAME API, and allow it to wait
for a response. A new command allows that wait
to be aborted.

However, allow userspace to specify whether or
not it wants to allow off-channel TX, it may
actually want to use the same channel only.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/nl80211.h | 25 +++++++++++++++++-----
 include/net/cfg80211.h  | 11 +++++++---
 net/mac80211/cfg.c      |  7 ++++--
 net/wireless/core.h     |  4 ++--
 net/wireless/mlme.c     |  9 ++++----
 net/wireless/nl80211.c  | 57 +++++++++++++++++++++++++++++++++++++++++++------
 6 files changed, 91 insertions(+), 22 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h
index d706bf3badc8..5cfa579df476 100644
--- a/include/linux/nl80211.h
+++ b/include/linux/nl80211.h
@@ -358,11 +358,16 @@
  *	user space application). %NL80211_ATTR_FRAME is used to specify the
  *	frame contents (including header). %NL80211_ATTR_WIPHY_FREQ (and
  *	optionally %NL80211_ATTR_WIPHY_CHANNEL_TYPE) is used to indicate on
- *	which channel the frame is to be transmitted or was received. This
- *	channel has to be the current channel (remain-on-channel or the
- *	operational channel). When called, this operation returns a cookie
- *	(%NL80211_ATTR_COOKIE) that will be included with the TX status event
- *	pertaining to the TX request.
+ *	which channel the frame is to be transmitted or was received. If this
+ *	channel is not the current channel (remain-on-channel or the
+ *	operational channel) the device will switch to the given channel and
+ *	transmit the frame, optionally waiting for a response for the time
+ *	specified using %NL80211_ATTR_DURATION. When called, this operation
+ *	returns a cookie (%NL80211_ATTR_COOKIE) that will be included with the
+ *	TX status event pertaining to the TX request.
+ * @NL80211_CMD_FRAME_WAIT_CANCEL: When an off-channel TX was requested, this
+ *	command may be used with the corresponding cookie to cancel the wait
+ *	time if it is known that it is no longer necessary.
  * @NL80211_CMD_ACTION: Alias for @NL80211_CMD_FRAME for backward compatibility.
  * @NL80211_CMD_FRAME_TX_STATUS: Report TX status of a management frame
  *	transmitted with %NL80211_CMD_FRAME. %NL80211_ATTR_COOKIE identifies
@@ -493,6 +498,8 @@ enum nl80211_commands {
 	NL80211_CMD_SET_CHANNEL,
 	NL80211_CMD_SET_WDS_PEER,
 
+	NL80211_CMD_FRAME_WAIT_CANCEL,
+
 	/* add new commands above here */
 
 	/* used to define NL80211_CMD_MAX below */
@@ -828,6 +835,12 @@ enum nl80211_commands {
  *
  * @NL80211_ATTR_MCAST_RATE: Multicast tx rate (in 100 kbps) for IBSS
  *
+ * @NL80211_ATTR_OFFCHANNEL_TX_OK: For management frame TX, the frame may be
+ *	transmitted on another channel when the channel given doesn't match
+ *	the current channel. If the current channel doesn't match and this
+ *	flag isn't set, the frame will be rejected. This is also used as an
+ *	nl80211 capability flag.
+ *
  * @NL80211_ATTR_MAX: highest attribute number currently defined
  * @__NL80211_ATTR_AFTER_LAST: internal use
  */
@@ -1002,6 +1015,8 @@ enum nl80211_attrs {
 
 	NL80211_ATTR_MCAST_RATE,
 
+	NL80211_ATTR_OFFCHANNEL_TX_OK,
+
 	/* add attributes here, update the policy in nl80211.c */
 
 	__NL80211_ATTR_AFTER_LAST,
diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 0663945cfa48..49a7c53a48ca 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -1134,7 +1134,9 @@ struct cfg80211_pmksa {
  * @cancel_remain_on_channel: Cancel an on-going remain-on-channel operation.
  *	This allows the operation to be terminated prior to timeout based on
  *	the duration value.
- * @mgmt_tx: Transmit a management frame
+ * @mgmt_tx: Transmit a management frame.
+ * @mgmt_tx_cancel_wait: Cancel the wait time from transmitting a management
+ *	frame on another channel
  *
  * @testmode_cmd: run a test mode command
  *
@@ -1291,10 +1293,13 @@ struct cfg80211_ops {
 					    u64 cookie);
 
 	int	(*mgmt_tx)(struct wiphy *wiphy, struct net_device *dev,
-			  struct ieee80211_channel *chan,
+			  struct ieee80211_channel *chan, bool offchan,
 			  enum nl80211_channel_type channel_type,
-			  bool channel_type_valid,
+			  bool channel_type_valid, unsigned int wait,
 			  const u8 *buf, size_t len, u64 *cookie);
+	int	(*mgmt_tx_cancel_wait)(struct wiphy *wiphy,
+				       struct net_device *dev,
+				       u64 cookie);
 
 	int	(*set_power_mgmt)(struct wiphy *wiphy, struct net_device *dev,
 				  bool enabled, int timeout);
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index 0c544074479e..aac2d7de828e 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -1552,9 +1552,9 @@ static int ieee80211_cancel_remain_on_channel(struct wiphy *wiphy,
 }
 
 static int ieee80211_mgmt_tx(struct wiphy *wiphy, struct net_device *dev,
-			     struct ieee80211_channel *chan,
+			     struct ieee80211_channel *chan, bool offchan,
 			     enum nl80211_channel_type channel_type,
-			     bool channel_type_valid,
+			     bool channel_type_valid, unsigned int wait,
 			     const u8 *buf, size_t len, u64 *cookie)
 {
 	struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
@@ -1565,6 +1565,9 @@ static int ieee80211_mgmt_tx(struct wiphy *wiphy, struct net_device *dev,
 	u32 flags = IEEE80211_TX_INTFL_NL80211_FRAME_TX |
 		    IEEE80211_TX_CTL_REQ_TX_STATUS;
 
+	if (offchan)
+		return -EOPNOTSUPP;
+
 	/* Check that we are on the requested channel for transmission */
 	if (chan != local->tmp_channel &&
 	    chan != local->oper_channel)
diff --git a/net/wireless/core.h b/net/wireless/core.h
index 6583cca0e2ee..ee80ad8dc655 100644
--- a/net/wireless/core.h
+++ b/net/wireless/core.h
@@ -341,9 +341,9 @@ void cfg80211_mlme_unregister_socket(struct wireless_dev *wdev, u32 nlpid);
 void cfg80211_mlme_purge_registrations(struct wireless_dev *wdev);
 int cfg80211_mlme_mgmt_tx(struct cfg80211_registered_device *rdev,
 			  struct net_device *dev,
-			  struct ieee80211_channel *chan,
+			  struct ieee80211_channel *chan, bool offchan,
 			  enum nl80211_channel_type channel_type,
-			  bool channel_type_valid,
+			  bool channel_type_valid, unsigned int wait,
 			  const u8 *buf, size_t len, u64 *cookie);
 
 /* SME */
diff --git a/net/wireless/mlme.c b/net/wireless/mlme.c
index 6980a0c315b2..d7680f2a4c5b 100644
--- a/net/wireless/mlme.c
+++ b/net/wireless/mlme.c
@@ -864,9 +864,9 @@ void cfg80211_mlme_purge_registrations(struct wireless_dev *wdev)
 
 int cfg80211_mlme_mgmt_tx(struct cfg80211_registered_device *rdev,
 			  struct net_device *dev,
-			  struct ieee80211_channel *chan,
+			  struct ieee80211_channel *chan, bool offchan,
 			  enum nl80211_channel_type channel_type,
-			  bool channel_type_valid,
+			  bool channel_type_valid, unsigned int wait,
 			  const u8 *buf, size_t len, u64 *cookie)
 {
 	struct wireless_dev *wdev = dev->ieee80211_ptr;
@@ -946,8 +946,9 @@ int cfg80211_mlme_mgmt_tx(struct cfg80211_registered_device *rdev,
 		return -EINVAL;
 
 	/* Transmit the Action frame as requested by user space */
-	return rdev->ops->mgmt_tx(&rdev->wiphy, dev, chan, channel_type,
-				  channel_type_valid, buf, len, cookie);
+	return rdev->ops->mgmt_tx(&rdev->wiphy, dev, chan, offchan,
+				  channel_type, channel_type_valid,
+				  wait, buf, len, cookie);
 }
 
 bool cfg80211_rx_mgmt(struct net_device *dev, int freq, const u8 *buf,
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 67ff7e92cb99..960be4e650f0 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -163,16 +163,13 @@ static const struct nla_policy nl80211_policy[NL80211_ATTR_MAX+1] = {
 	[NL80211_ATTR_CQM] = { .type = NLA_NESTED, },
 	[NL80211_ATTR_LOCAL_STATE_CHANGE] = { .type = NLA_FLAG },
 	[NL80211_ATTR_AP_ISOLATE] = { .type = NLA_U8 },
-
 	[NL80211_ATTR_WIPHY_TX_POWER_SETTING] = { .type = NLA_U32 },
 	[NL80211_ATTR_WIPHY_TX_POWER_LEVEL] = { .type = NLA_U32 },
-
 	[NL80211_ATTR_FRAME_TYPE] = { .type = NLA_U16 },
-
 	[NL80211_ATTR_WIPHY_ANTENNA_TX] = { .type = NLA_U32 },
 	[NL80211_ATTR_WIPHY_ANTENNA_RX] = { .type = NLA_U32 },
-
 	[NL80211_ATTR_MCAST_RATE] = { .type = NLA_U32 },
+	[NL80211_ATTR_OFFCHANNEL_TX_OK] = { .type = NLA_FLAG },
 };
 
 /* policy for the key attributes */
@@ -677,6 +674,7 @@ static int nl80211_send_wiphy(struct sk_buff *msg, u32 pid, u32 seq, int flags,
 	CMD(remain_on_channel, REMAIN_ON_CHANNEL);
 	CMD(set_bitrate_mask, SET_TX_BITRATE_MASK);
 	CMD(mgmt_tx, FRAME);
+	CMD(mgmt_tx_cancel_wait, FRAME_WAIT_CANCEL);
 	if (dev->wiphy.flags & WIPHY_FLAG_NETNS_OK) {
 		i++;
 		NLA_PUT_U32(msg, i, NL80211_CMD_SET_WIPHY_NETNS);
@@ -698,6 +696,10 @@ static int nl80211_send_wiphy(struct sk_buff *msg, u32 pid, u32 seq, int flags,
 
 	nla_nest_end(msg, nl_cmds);
 
+	/* for now at least assume all drivers have it */
+	if (dev->ops->mgmt_tx)
+		NLA_PUT_FLAG(msg, NL80211_ATTR_OFFCHANNEL_TX_OK);
+
 	if (mgmt_stypes) {
 		u16 stypes;
 		struct nlattr *nl_ftypes, *nl_ifs;
@@ -4244,6 +4246,8 @@ static int nl80211_tx_mgmt(struct sk_buff *skb, struct genl_info *info)
 	void *hdr;
 	u64 cookie;
 	struct sk_buff *msg;
+	unsigned int wait = 0;
+	bool offchan;
 
 	if (!info->attrs[NL80211_ATTR_FRAME] ||
 	    !info->attrs[NL80211_ATTR_WIPHY_FREQ])
@@ -4260,6 +4264,12 @@ static int nl80211_tx_mgmt(struct sk_buff *skb, struct genl_info *info)
 	    dev->ieee80211_ptr->iftype != NL80211_IFTYPE_P2P_GO)
 		return -EOPNOTSUPP;
 
+	if (info->attrs[NL80211_ATTR_DURATION]) {
+		if (!rdev->ops->mgmt_tx_cancel_wait)
+			return -EINVAL;
+		wait = nla_get_u32(info->attrs[NL80211_ATTR_DURATION]);
+	}
+
 	if (info->attrs[NL80211_ATTR_WIPHY_CHANNEL_TYPE]) {
 		channel_type = nla_get_u32(
 			info->attrs[NL80211_ATTR_WIPHY_CHANNEL_TYPE]);
@@ -4271,6 +4281,8 @@ static int nl80211_tx_mgmt(struct sk_buff *skb, struct genl_info *info)
 		channel_type_valid = true;
 	}
 
+	offchan = info->attrs[NL80211_ATTR_OFFCHANNEL_TX_OK];
+
 	freq = nla_get_u32(info->attrs[NL80211_ATTR_WIPHY_FREQ]);
 	chan = rdev_freq_to_chan(rdev, freq, channel_type);
 	if (chan == NULL)
@@ -4287,8 +4299,8 @@ static int nl80211_tx_mgmt(struct sk_buff *skb, struct genl_info *info)
 		err = PTR_ERR(hdr);
 		goto free_msg;
 	}
-	err = cfg80211_mlme_mgmt_tx(rdev, dev, chan, channel_type,
-				    channel_type_valid,
+	err = cfg80211_mlme_mgmt_tx(rdev, dev, chan, offchan, channel_type,
+				    channel_type_valid, wait,
 				    nla_data(info->attrs[NL80211_ATTR_FRAME]),
 				    nla_len(info->attrs[NL80211_ATTR_FRAME]),
 				    &cookie);
@@ -4307,6 +4319,31 @@ static int nl80211_tx_mgmt(struct sk_buff *skb, struct genl_info *info)
 	return err;
 }
 
+static int nl80211_tx_mgmt_cancel_wait(struct sk_buff *skb, struct genl_info *info)
+{
+	struct cfg80211_registered_device *rdev = info->user_ptr[0];
+	struct net_device *dev = info->user_ptr[1];
+	u64 cookie;
+
+	if (!info->attrs[NL80211_ATTR_COOKIE])
+		return -EINVAL;
+
+	if (!rdev->ops->mgmt_tx_cancel_wait)
+		return -EOPNOTSUPP;
+
+	if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_STATION &&
+	    dev->ieee80211_ptr->iftype != NL80211_IFTYPE_ADHOC &&
+	    dev->ieee80211_ptr->iftype != NL80211_IFTYPE_P2P_CLIENT &&
+	    dev->ieee80211_ptr->iftype != NL80211_IFTYPE_AP &&
+	    dev->ieee80211_ptr->iftype != NL80211_IFTYPE_AP_VLAN &&
+	    dev->ieee80211_ptr->iftype != NL80211_IFTYPE_P2P_GO)
+		return -EOPNOTSUPP;
+
+	cookie = nla_get_u64(info->attrs[NL80211_ATTR_COOKIE]);
+
+	return rdev->ops->mgmt_tx_cancel_wait(&rdev->wiphy, dev, cookie);
+}
+
 static int nl80211_set_power_save(struct sk_buff *skb, struct genl_info *info)
 {
 	struct cfg80211_registered_device *rdev = info->user_ptr[0];
@@ -4879,6 +4916,14 @@ static struct genl_ops nl80211_ops[] = {
 		.internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
 				  NL80211_FLAG_NEED_RTNL,
 	},
+	{
+		.cmd = NL80211_CMD_FRAME_WAIT_CANCEL,
+		.doit = nl80211_tx_mgmt_cancel_wait,
+		.policy = nl80211_policy,
+		.flags = GENL_ADMIN_PERM,
+		.internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
+				  NL80211_FLAG_NEED_RTNL,
+	},
 	{
 		.cmd = NL80211_CMD_SET_POWER_SAVE,
 		.doit = nl80211_set_power_save,
-- 
cgit v1.2.3-71-gd317


From 24278d148316d2180be6df40e06db013d8b232b8 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paul.mckenney@linaro.org>
Date: Mon, 27 Sep 2010 17:25:23 -0700
Subject: rcu: priority boosting for TINY_PREEMPT_RCU

Add priority boosting, but only for TINY_PREEMPT_RCU.  This is enabled
by the default-off RCU_BOOST kernel parameter.  The priority to which to
boost preempted RCU readers is controlled by the RCU_BOOST_PRIO kernel
parameter (defaulting to real-time priority 1) and the time to wait
before boosting the readers blocking a given grace period is controlled
by the RCU_BOOST_DELAY kernel parameter (defaulting to 500 milliseconds).

Signed-off-by: Paul E. McKenney <paul.mckenney@linaro.org>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 include/linux/init_task.h |   9 +-
 include/linux/sched.h     |  11 ++-
 init/Kconfig              |  39 +++++++++
 kernel/rcutiny.c          |  66 ++++++---------
 kernel/rcutiny_plugin.h   | 208 +++++++++++++++++++++++++++++++++++++++++++---
 5 files changed, 280 insertions(+), 53 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/init_task.h b/include/linux/init_task.h
index 2fea6c8ef6ba..69f91aacdeee 100644
--- a/include/linux/init_task.h
+++ b/include/linux/init_task.h
@@ -81,6 +81,12 @@ extern struct group_info init_groups;
  */
 # define CAP_INIT_BSET  CAP_FULL_SET
 
+#ifdef CONFIG_RCU_BOOST
+#define INIT_TASK_RCU_BOOST()						\
+	.rcu_boost_mutex = NULL,
+#else
+#define INIT_TASK_RCU_BOOST()
+#endif
 #ifdef CONFIG_TREE_PREEMPT_RCU
 #define INIT_TASK_RCU_TREE_PREEMPT()					\
 	.rcu_blocked_node = NULL,
@@ -92,7 +98,8 @@ extern struct group_info init_groups;
 	.rcu_read_lock_nesting = 0,					\
 	.rcu_read_unlock_special = 0,					\
 	.rcu_node_entry = LIST_HEAD_INIT(tsk.rcu_node_entry),		\
-	INIT_TASK_RCU_TREE_PREEMPT()
+	INIT_TASK_RCU_TREE_PREEMPT()					\
+	INIT_TASK_RCU_BOOST()
 #else
 #define INIT_TASK_RCU_PREEMPT(tsk)
 #endif
diff --git a/include/linux/sched.h b/include/linux/sched.h
index e18473f0eb78..ed1a9bc52b2f 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1210,6 +1210,9 @@ struct task_struct {
 #ifdef CONFIG_TREE_PREEMPT_RCU
 	struct rcu_node *rcu_blocked_node;
 #endif /* #ifdef CONFIG_TREE_PREEMPT_RCU */
+#ifdef CONFIG_RCU_BOOST
+	struct rt_mutex *rcu_boost_mutex;
+#endif /* #ifdef CONFIG_RCU_BOOST */
 
 #if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT)
 	struct sched_info sched_info;
@@ -1745,7 +1748,8 @@ extern void thread_group_times(struct task_struct *p, cputime_t *ut, cputime_t *
 #ifdef CONFIG_PREEMPT_RCU
 
 #define RCU_READ_UNLOCK_BLOCKED (1 << 0) /* blocked while in RCU read-side. */
-#define RCU_READ_UNLOCK_NEED_QS (1 << 1) /* RCU core needs CPU response. */
+#define RCU_READ_UNLOCK_BOOSTED (1 << 1) /* boosted while in RCU read-side. */
+#define RCU_READ_UNLOCK_NEED_QS (1 << 2) /* RCU core needs CPU response. */
 
 static inline void rcu_copy_process(struct task_struct *p)
 {
@@ -1753,7 +1757,10 @@ static inline void rcu_copy_process(struct task_struct *p)
 	p->rcu_read_unlock_special = 0;
 #ifdef CONFIG_TREE_PREEMPT_RCU
 	p->rcu_blocked_node = NULL;
-#endif
+#endif /* #ifdef CONFIG_TREE_PREEMPT_RCU */
+#ifdef CONFIG_RCU_BOOST
+	p->rcu_boost_mutex = NULL;
+#endif /* #ifdef CONFIG_RCU_BOOST */
 	INIT_LIST_HEAD(&p->rcu_node_entry);
 }
 
diff --git a/init/Kconfig b/init/Kconfig
index a619a1ac7f4c..48efefcac12a 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -450,6 +450,45 @@ config TREE_RCU_TRACE
 	  TREE_PREEMPT_RCU implementations, permitting Makefile to
 	  trivially select kernel/rcutree_trace.c.
 
+config RCU_BOOST
+	bool "Enable RCU priority boosting"
+	depends on RT_MUTEXES && TINY_PREEMPT_RCU
+	default n
+	help
+	  This option boosts the priority of preempted RCU readers that
+	  block the current preemptible RCU grace period for too long.
+	  This option also prevents heavy loads from blocking RCU
+	  callback invocation for all flavors of RCU.
+
+	  Say Y here if you are working with real-time apps or heavy loads
+	  Say N here if you are unsure.
+
+config RCU_BOOST_PRIO
+	int "Real-time priority to boost RCU readers to"
+	range 1 99
+	depends on RCU_BOOST
+	default 1
+	help
+	  This option specifies the real-time priority to which preempted
+	  RCU readers are to be boosted.  If you are working with CPU-bound
+	  real-time applications, you should specify a priority higher then
+	  the highest-priority CPU-bound application.
+
+	  Specify the real-time priority, or take the default if unsure.
+
+config RCU_BOOST_DELAY
+	int "Milliseconds to delay boosting after RCU grace-period start"
+	range 0 3000
+	depends on RCU_BOOST
+	default 500
+	help
+	  This option specifies the time to wait after the beginning of
+	  a given grace period before priority-boosting preempted RCU
+	  readers blocking that grace period.  Note that any RCU reader
+	  blocking an expedited RCU grace period is boosted immediately.
+
+	  Accept the default if unsure.
+
 endmenu # "RCU Subsystem"
 
 config IKCONFIG
diff --git a/kernel/rcutiny.c b/kernel/rcutiny.c
index 86eef29cdfb2..93d166582cbb 100644
--- a/kernel/rcutiny.c
+++ b/kernel/rcutiny.c
@@ -36,38 +36,16 @@
 #include <linux/time.h>
 #include <linux/cpu.h>
 
-/* Global control variables for rcupdate callback mechanism. */
-struct rcu_ctrlblk {
-	struct rcu_head *rcucblist;	/* List of pending callbacks (CBs). */
-	struct rcu_head **donetail;	/* ->next pointer of last "done" CB. */
-	struct rcu_head **curtail;	/* ->next pointer of last CB. */
-};
-
-/* Definition for rcupdate control block. */
-static struct rcu_ctrlblk rcu_sched_ctrlblk = {
-	.donetail	= &rcu_sched_ctrlblk.rcucblist,
-	.curtail	= &rcu_sched_ctrlblk.rcucblist,
-};
-
-static struct rcu_ctrlblk rcu_bh_ctrlblk = {
-	.donetail	= &rcu_bh_ctrlblk.rcucblist,
-	.curtail	= &rcu_bh_ctrlblk.rcucblist,
-};
-
-#ifdef CONFIG_DEBUG_LOCK_ALLOC
-int rcu_scheduler_active __read_mostly;
-EXPORT_SYMBOL_GPL(rcu_scheduler_active);
-#endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */
-
-/* Controls for rcu_cbs() kthread, replacing RCU_SOFTIRQ used previously. */
-static struct task_struct *rcu_cbs_task;
-static DECLARE_WAIT_QUEUE_HEAD(rcu_cbs_wq);
-static unsigned long have_rcu_cbs;
-static void invoke_rcu_cbs(void);
+/* Controls for rcu_kthread() kthread, replacing RCU_SOFTIRQ used previously. */
+static struct task_struct *rcu_kthread_task;
+static DECLARE_WAIT_QUEUE_HEAD(rcu_kthread_wq);
+static unsigned long have_rcu_kthread_work;
+static void invoke_rcu_kthread(void);
 
 /* Forward declarations for rcutiny_plugin.h. */
+struct rcu_ctrlblk;
 static void rcu_process_callbacks(struct rcu_ctrlblk *rcp);
-static int rcu_cbs(void *arg);
+static int rcu_kthread(void *arg);
 static void __call_rcu(struct rcu_head *head,
 		       void (*func)(struct rcu_head *rcu),
 		       struct rcu_ctrlblk *rcp);
@@ -130,7 +108,7 @@ void rcu_sched_qs(int cpu)
 {
 	if (rcu_qsctr_help(&rcu_sched_ctrlblk) +
 	    rcu_qsctr_help(&rcu_bh_ctrlblk))
-		invoke_rcu_cbs();
+		invoke_rcu_kthread();
 }
 
 /*
@@ -139,7 +117,7 @@ void rcu_sched_qs(int cpu)
 void rcu_bh_qs(int cpu)
 {
 	if (rcu_qsctr_help(&rcu_bh_ctrlblk))
-		invoke_rcu_cbs();
+		invoke_rcu_kthread();
 }
 
 /*
@@ -201,37 +179,41 @@ static void rcu_process_callbacks(struct rcu_ctrlblk *rcp)
  * This is a kthread, but it is never stopped, at least not until
  * the system goes down.
  */
-static int rcu_cbs(void *arg)
+static int rcu_kthread(void *arg)
 {
 	unsigned long work;
+	unsigned long morework;
 	unsigned long flags;
 
 	for (;;) {
-		wait_event(rcu_cbs_wq, have_rcu_cbs != 0);
+		wait_event(rcu_kthread_wq, have_rcu_kthread_work != 0);
+		morework = rcu_boost();
 		local_irq_save(flags);
-		work = have_rcu_cbs;
-		have_rcu_cbs = 0;
+		work = have_rcu_kthread_work;
+		have_rcu_kthread_work = morework;
 		local_irq_restore(flags);
 		if (work) {
 			rcu_process_callbacks(&rcu_sched_ctrlblk);
 			rcu_process_callbacks(&rcu_bh_ctrlblk);
 			rcu_preempt_process_callbacks();
 		}
+		schedule_timeout_interruptible(1); /* Leave CPU for others. */
 	}
 
 	return 0;  /* Not reached, but needed to shut gcc up. */
 }
 
 /*
- * Wake up rcu_cbs() to process callbacks now eligible for invocation.
+ * Wake up rcu_kthread() to process callbacks now eligible for invocation
+ * or to boost readers.
  */
-static void invoke_rcu_cbs(void)
+static void invoke_rcu_kthread(void)
 {
 	unsigned long flags;
 
 	local_irq_save(flags);
-	have_rcu_cbs = 1;
-	wake_up(&rcu_cbs_wq);
+	have_rcu_kthread_work = 1;
+	wake_up(&rcu_kthread_wq);
 	local_irq_restore(flags);
 }
 
@@ -327,7 +309,11 @@ EXPORT_SYMBOL_GPL(rcu_barrier_sched);
  */
 static int __init rcu_spawn_kthreads(void)
 {
-	rcu_cbs_task = kthread_run(rcu_cbs, NULL, "rcu_cbs");
+	struct sched_param sp;
+
+	rcu_kthread_task = kthread_run(rcu_kthread, NULL, "rcu_kthread");
+	sp.sched_priority = RCU_BOOST_PRIO;
+	sched_setscheduler_nocheck(rcu_kthread_task, SCHED_FIFO, &sp);
 	return 0;
 }
 early_initcall(rcu_spawn_kthreads);
diff --git a/kernel/rcutiny_plugin.h b/kernel/rcutiny_plugin.h
index 95f9239df512..24f43165f222 100644
--- a/kernel/rcutiny_plugin.h
+++ b/kernel/rcutiny_plugin.h
@@ -24,6 +24,29 @@
 
 #include <linux/kthread.h>
 
+/* Global control variables for rcupdate callback mechanism. */
+struct rcu_ctrlblk {
+	struct rcu_head *rcucblist;	/* List of pending callbacks (CBs). */
+	struct rcu_head **donetail;	/* ->next pointer of last "done" CB. */
+	struct rcu_head **curtail;	/* ->next pointer of last CB. */
+};
+
+/* Definition for rcupdate control block. */
+static struct rcu_ctrlblk rcu_sched_ctrlblk = {
+	.donetail	= &rcu_sched_ctrlblk.rcucblist,
+	.curtail	= &rcu_sched_ctrlblk.rcucblist,
+};
+
+static struct rcu_ctrlblk rcu_bh_ctrlblk = {
+	.donetail	= &rcu_bh_ctrlblk.rcucblist,
+	.curtail	= &rcu_bh_ctrlblk.rcucblist,
+};
+
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+int rcu_scheduler_active __read_mostly;
+EXPORT_SYMBOL_GPL(rcu_scheduler_active);
+#endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */
+
 #ifdef CONFIG_TINY_PREEMPT_RCU
 
 #include <linux/delay.h>
@@ -48,17 +71,27 @@ struct rcu_preempt_ctrlblk {
 	struct list_head *gp_tasks;
 				/* Pointer to the first task blocking the */
 				/*  current grace period, or NULL if there */
-				/*  is not such task. */
+				/*  is no such task. */
 	struct list_head *exp_tasks;
 				/* Pointer to first task blocking the */
 				/*  current expedited grace period, or NULL */
 				/*  if there is no such task.  If there */
 				/*  is no current expedited grace period, */
 				/*  then there cannot be any such task. */
+#ifdef CONFIG_RCU_BOOST
+	struct list_head *boost_tasks;
+				/* Pointer to first task that needs to be */
+				/*  priority-boosted, or NULL if no priority */
+				/*  boosting is needed.  If there is no */
+				/*  current or expedited grace period, there */
+				/*  can be no such task. */
+#endif /* #ifdef CONFIG_RCU_BOOST */
 	u8 gpnum;		/* Current grace period. */
 	u8 gpcpu;		/* Last grace period blocked by the CPU. */
 	u8 completed;		/* Last grace period completed. */
 				/*  If all three are equal, RCU is idle. */
+	s8 boosted_this_gp;	/* Has boosting already happened? */
+	unsigned long boost_time; /* When to start boosting (jiffies) */
 };
 
 static struct rcu_preempt_ctrlblk rcu_preempt_ctrlblk = {
@@ -123,6 +156,130 @@ static int rcu_preempt_gp_in_progress(void)
 	return rcu_preempt_ctrlblk.completed != rcu_preempt_ctrlblk.gpnum;
 }
 
+/*
+ * Advance a ->blkd_tasks-list pointer to the next entry, instead
+ * returning NULL if at the end of the list.
+ */
+static struct list_head *rcu_next_node_entry(struct task_struct *t)
+{
+	struct list_head *np;
+
+	np = t->rcu_node_entry.next;
+	if (np == &rcu_preempt_ctrlblk.blkd_tasks)
+		np = NULL;
+	return np;
+}
+
+#ifdef CONFIG_RCU_BOOST
+
+#include "rtmutex_common.h"
+
+/*
+ * Carry out RCU priority boosting on the task indicated by ->boost_tasks,
+ * and advance ->boost_tasks to the next task in the ->blkd_tasks list.
+ */
+static int rcu_boost(void)
+{
+	unsigned long flags;
+	struct rt_mutex mtx;
+	struct list_head *np;
+	struct task_struct *t;
+
+	if (rcu_preempt_ctrlblk.boost_tasks == NULL)
+		return 0;  /* Nothing to boost. */
+	raw_local_irq_save(flags);
+	rcu_preempt_ctrlblk.boosted_this_gp++;
+	t = container_of(rcu_preempt_ctrlblk.boost_tasks, struct task_struct,
+			 rcu_node_entry);
+	np = rcu_next_node_entry(t);
+	rt_mutex_init_proxy_locked(&mtx, t);
+	t->rcu_boost_mutex = &mtx;
+	t->rcu_read_unlock_special |= RCU_READ_UNLOCK_BOOSTED;
+	raw_local_irq_restore(flags);
+	rt_mutex_lock(&mtx);
+	rt_mutex_unlock(&mtx);
+	return rcu_preempt_ctrlblk.boost_tasks != NULL;
+}
+
+/*
+ * Check to see if it is now time to start boosting RCU readers blocking
+ * the current grace period, and, if so, tell the rcu_kthread_task to
+ * start boosting them.  If there is an expedited boost in progress,
+ * we wait for it to complete.
+ */
+static void rcu_initiate_boost(void)
+{
+	if (rcu_preempt_ctrlblk.gp_tasks != NULL &&
+	    rcu_preempt_ctrlblk.boost_tasks == NULL &&
+	    rcu_preempt_ctrlblk.boosted_this_gp == 0 &&
+	    ULONG_CMP_GE(jiffies, rcu_preempt_ctrlblk.boost_time)) {
+		rcu_preempt_ctrlblk.boost_tasks = rcu_preempt_ctrlblk.gp_tasks;
+		invoke_rcu_kthread();
+	}
+}
+
+/*
+ * Initiate boosting for an expedited grace period.
+ */
+static void rcu_initiate_expedited_boost(void)
+{
+	unsigned long flags;
+
+	raw_local_irq_save(flags);
+	if (!list_empty(&rcu_preempt_ctrlblk.blkd_tasks)) {
+		rcu_preempt_ctrlblk.boost_tasks =
+			rcu_preempt_ctrlblk.blkd_tasks.next;
+		rcu_preempt_ctrlblk.boosted_this_gp = -1;
+		invoke_rcu_kthread();
+	}
+	raw_local_irq_restore(flags);
+}
+
+#define RCU_BOOST_DELAY_JIFFIES DIV_ROUND_UP(CONFIG_RCU_BOOST_DELAY * HZ, 1000);
+
+/*
+ * Do priority-boost accounting for the start of a new grace period.
+ */
+static void rcu_preempt_boost_start_gp(void)
+{
+	rcu_preempt_ctrlblk.boost_time = jiffies + RCU_BOOST_DELAY_JIFFIES;
+	if (rcu_preempt_ctrlblk.boosted_this_gp > 0)
+		rcu_preempt_ctrlblk.boosted_this_gp = 0;
+}
+
+#else /* #ifdef CONFIG_RCU_BOOST */
+
+/*
+ * If there is no RCU priority boosting, we don't boost.
+ */
+static int rcu_boost(void)
+{
+	return 0;
+}
+
+/*
+ * If there is no RCU priority boosting, we don't initiate boosting.
+ */
+static void rcu_initiate_boost(void)
+{
+}
+
+/*
+ * If there is no RCU priority boosting, we don't initiate expedited boosting.
+ */
+static void rcu_initiate_expedited_boost(void)
+{
+}
+
+/*
+ * If there is no RCU priority boosting, nothing to do at grace-period start.
+ */
+static void rcu_preempt_boost_start_gp(void)
+{
+}
+
+#endif /* else #ifdef CONFIG_RCU_BOOST */
+
 /*
  * Record a preemptible-RCU quiescent state for the specified CPU.  Note
  * that this just means that the task currently running on the CPU is
@@ -150,12 +307,14 @@ static void rcu_preempt_cpu_qs(void)
 	rcu_preempt_ctrlblk.gpcpu = rcu_preempt_ctrlblk.gpnum;
 	current->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_NEED_QS;
 
-	/*
-	 * If there is no GP, or if blocked readers are still blocking GP,
-	 * then there is nothing more to do.
-	 */
+	/* If there is no GP then there is nothing more to do.  */
 	if (!rcu_preempt_gp_in_progress() || rcu_preempt_blocked_readers_cgp())
 		return;
+	/* If there are blocked readers, go check up on boosting. */
+	if (rcu_preempt_blocked_readers_cgp()) {
+		rcu_initiate_boost();
+		return;
+	}
 
 	/* Advance callbacks. */
 	rcu_preempt_ctrlblk.completed = rcu_preempt_ctrlblk.gpnum;
@@ -168,7 +327,7 @@ static void rcu_preempt_cpu_qs(void)
 
 	/* If there are done callbacks, cause them to be invoked. */
 	if (*rcu_preempt_ctrlblk.rcb.donetail != NULL)
-		invoke_rcu_cbs();
+		invoke_rcu_kthread();
 }
 
 /*
@@ -186,6 +345,9 @@ static void rcu_preempt_start_gp(void)
 			rcu_preempt_ctrlblk.gp_tasks =
 				rcu_preempt_ctrlblk.blkd_tasks.next;
 
+		/* Set up for RCU priority boosting. */
+		rcu_preempt_boost_start_gp();
+
 		/* If there is no running reader, CPU is done with GP. */
 		if (!rcu_preempt_running_reader())
 			rcu_preempt_cpu_qs();
@@ -306,14 +468,16 @@ static void rcu_read_unlock_special(struct task_struct *t)
 		 */
 		empty = !rcu_preempt_blocked_readers_cgp();
 		empty_exp = rcu_preempt_ctrlblk.exp_tasks == NULL;
-		np = t->rcu_node_entry.next;
-		if (np == &rcu_preempt_ctrlblk.blkd_tasks)
-			np = NULL;
+		np = rcu_next_node_entry(t);
 		list_del(&t->rcu_node_entry);
 		if (&t->rcu_node_entry == rcu_preempt_ctrlblk.gp_tasks)
 			rcu_preempt_ctrlblk.gp_tasks = np;
 		if (&t->rcu_node_entry == rcu_preempt_ctrlblk.exp_tasks)
 			rcu_preempt_ctrlblk.exp_tasks = np;
+#ifdef CONFIG_RCU_BOOST
+		if (&t->rcu_node_entry == rcu_preempt_ctrlblk.boost_tasks)
+			rcu_preempt_ctrlblk.boost_tasks = np;
+#endif /* #ifdef CONFIG_RCU_BOOST */
 		INIT_LIST_HEAD(&t->rcu_node_entry);
 
 		/*
@@ -333,6 +497,14 @@ static void rcu_read_unlock_special(struct task_struct *t)
 		if (!empty_exp && rcu_preempt_ctrlblk.exp_tasks == NULL)
 			rcu_report_exp_done();
 	}
+#ifdef CONFIG_RCU_BOOST
+	/* Unboost self if was boosted. */
+	if (special & RCU_READ_UNLOCK_BOOSTED) {
+		t->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_BOOSTED;
+		rt_mutex_unlock(t->rcu_boost_mutex);
+		t->rcu_boost_mutex = NULL;
+	}
+#endif /* #ifdef CONFIG_RCU_BOOST */
 	local_irq_restore(flags);
 }
 
@@ -376,7 +548,7 @@ static void rcu_preempt_check_callbacks(void)
 		rcu_preempt_cpu_qs();
 	if (&rcu_preempt_ctrlblk.rcb.rcucblist !=
 	    rcu_preempt_ctrlblk.rcb.donetail)
-		invoke_rcu_cbs();
+		invoke_rcu_kthread();
 	if (rcu_preempt_gp_in_progress() &&
 	    rcu_cpu_blocking_cur_gp() &&
 	    rcu_preempt_running_reader())
@@ -534,6 +706,7 @@ void synchronize_rcu_expedited(void)
 
 	/* Wait for tail of ->blkd_tasks list to drain. */
 	if (rcu_preempted_readers_exp())
+		rcu_initiate_expedited_boost();
 		wait_event(sync_rcu_preempt_exp_wq,
 			   !rcu_preempted_readers_exp());
 
@@ -574,6 +747,15 @@ void exit_rcu(void)
 
 #else /* #ifdef CONFIG_TINY_PREEMPT_RCU */
 
+/*
+ * Because preemptible RCU does not exist, it is never necessary to
+ * boost preempted RCU readers.
+ */
+static int rcu_boost(void)
+{
+	return 0;
+}
+
 /*
  * Because preemptible RCU does not exist, it never has any callbacks
  * to check.
@@ -614,3 +796,9 @@ void __init rcu_scheduler_starting(void)
 }
 
 #endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */
+
+#ifdef CONFIG_RCU_BOOST
+#define RCU_BOOST_PRIO CONFIG_RCU_BOOST_PRIO
+#else /* #ifdef CONFIG_RCU_BOOST */
+#define RCU_BOOST_PRIO 1
+#endif /* #else #ifdef CONFIG_RCU_BOOST */
-- 
cgit v1.2.3-71-gd317


From 7b27d5475f86186914e54e4a6bb994e9a985337b Mon Sep 17 00:00:00 2001
From: Lai Jiangshan <laijs@cn.fujitsu.com>
Date: Thu, 21 Oct 2010 11:29:05 +0800
Subject: rcu,cleanup: move synchronize_sched_expedited() out of sched.c

The first version of synchronize_sched_expedited() used the migration
code in the scheduler, and was therefore implemented in kernel/sched.c.
However, the more recent version of this code no longer uses the
migration code, so this commit moves it to the main RCU source files.

Signed-off-by: Lai Jiangshan <laijs@cn.fujitsu.com>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 include/linux/rcupdate.h |  1 -
 include/linux/rcutiny.h  |  5 ++++
 include/linux/rcutree.h  |  1 +
 kernel/rcutree_plugin.h  | 71 ++++++++++++++++++++++++++++++++++++++++++++++++
 kernel/sched.c           | 69 ----------------------------------------------
 5 files changed, 77 insertions(+), 70 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index 7142ee3304ab..49e8e16308e1 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -66,7 +66,6 @@ extern void call_rcu_sched(struct rcu_head *head,
 extern void synchronize_sched(void);
 extern void rcu_barrier_bh(void);
 extern void rcu_barrier_sched(void);
-extern void synchronize_sched_expedited(void);
 extern int sched_expedited_torture_stats(char *page);
 
 static inline void __rcu_read_lock_bh(void)
diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h
index ea025a611fcc..30ebd7c8d874 100644
--- a/include/linux/rcutiny.h
+++ b/include/linux/rcutiny.h
@@ -60,6 +60,11 @@ static inline void synchronize_rcu_bh_expedited(void)
 	synchronize_sched();
 }
 
+static inline void synchronize_sched_expedited(void)
+{
+	synchronize_sched();
+}
+
 #ifdef CONFIG_TINY_RCU
 
 static inline void rcu_preempt_note_context_switch(void)
diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h
index c0e96833aa73..3a933482734a 100644
--- a/include/linux/rcutree.h
+++ b/include/linux/rcutree.h
@@ -48,6 +48,7 @@ static inline void exit_rcu(void)
 #endif /* #else #ifdef CONFIG_TREE_PREEMPT_RCU */
 
 extern void synchronize_rcu_bh(void);
+extern void synchronize_sched_expedited(void);
 extern void synchronize_rcu_expedited(void);
 
 static inline void synchronize_rcu_bh_expedited(void)
diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h
index 71a4147473f9..21df7f3e7273 100644
--- a/kernel/rcutree_plugin.h
+++ b/kernel/rcutree_plugin.h
@@ -25,6 +25,7 @@
  */
 
 #include <linux/delay.h>
+#include <linux/stop_machine.h>
 
 /*
  * Check the RCU kernel configuration parameters and print informative
@@ -1014,6 +1015,76 @@ static void __init __rcu_init_preempt(void)
 
 #endif /* #else #ifdef CONFIG_TREE_PREEMPT_RCU */
 
+#ifndef CONFIG_SMP
+
+void synchronize_sched_expedited(void)
+{
+	cond_resched();
+}
+EXPORT_SYMBOL_GPL(synchronize_sched_expedited);
+
+#else /* #ifndef CONFIG_SMP */
+
+static atomic_t synchronize_sched_expedited_count = ATOMIC_INIT(0);
+
+static int synchronize_sched_expedited_cpu_stop(void *data)
+{
+	/*
+	 * There must be a full memory barrier on each affected CPU
+	 * between the time that try_stop_cpus() is called and the
+	 * time that it returns.
+	 *
+	 * In the current initial implementation of cpu_stop, the
+	 * above condition is already met when the control reaches
+	 * this point and the following smp_mb() is not strictly
+	 * necessary.  Do smp_mb() anyway for documentation and
+	 * robustness against future implementation changes.
+	 */
+	smp_mb(); /* See above comment block. */
+	return 0;
+}
+
+/*
+ * Wait for an rcu-sched grace period to elapse, but use "big hammer"
+ * approach to force grace period to end quickly.  This consumes
+ * significant time on all CPUs, and is thus not recommended for
+ * any sort of common-case code.
+ *
+ * Note that it is illegal to call this function while holding any
+ * lock that is acquired by a CPU-hotplug notifier.  Failing to
+ * observe this restriction will result in deadlock.
+ */
+void synchronize_sched_expedited(void)
+{
+	int snap, trycount = 0;
+
+	smp_mb();  /* ensure prior mod happens before capturing snap. */
+	snap = atomic_read(&synchronize_sched_expedited_count) + 1;
+	get_online_cpus();
+	while (try_stop_cpus(cpu_online_mask,
+			     synchronize_sched_expedited_cpu_stop,
+			     NULL) == -EAGAIN) {
+		put_online_cpus();
+		if (trycount++ < 10)
+			udelay(trycount * num_online_cpus());
+		else {
+			synchronize_sched();
+			return;
+		}
+		if (atomic_read(&synchronize_sched_expedited_count) - snap > 0) {
+			smp_mb(); /* ensure test happens before caller kfree */
+			return;
+		}
+		get_online_cpus();
+	}
+	atomic_inc(&synchronize_sched_expedited_count);
+	smp_mb__after_atomic_inc(); /* ensure post-GP actions seen after GP. */
+	put_online_cpus();
+}
+EXPORT_SYMBOL_GPL(synchronize_sched_expedited);
+
+#endif /* #else #ifndef CONFIG_SMP */
+
 #if !defined(CONFIG_RCU_FAST_NO_HZ)
 
 /*
diff --git a/kernel/sched.c b/kernel/sched.c
index ae8f75a5ceb4..d1e8889872a1 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -9131,72 +9131,3 @@ struct cgroup_subsys cpuacct_subsys = {
 };
 #endif	/* CONFIG_CGROUP_CPUACCT */
 
-#ifndef CONFIG_SMP
-
-void synchronize_sched_expedited(void)
-{
-	barrier();
-}
-EXPORT_SYMBOL_GPL(synchronize_sched_expedited);
-
-#else /* #ifndef CONFIG_SMP */
-
-static atomic_t synchronize_sched_expedited_count = ATOMIC_INIT(0);
-
-static int synchronize_sched_expedited_cpu_stop(void *data)
-{
-	/*
-	 * There must be a full memory barrier on each affected CPU
-	 * between the time that try_stop_cpus() is called and the
-	 * time that it returns.
-	 *
-	 * In the current initial implementation of cpu_stop, the
-	 * above condition is already met when the control reaches
-	 * this point and the following smp_mb() is not strictly
-	 * necessary.  Do smp_mb() anyway for documentation and
-	 * robustness against future implementation changes.
-	 */
-	smp_mb(); /* See above comment block. */
-	return 0;
-}
-
-/*
- * Wait for an rcu-sched grace period to elapse, but use "big hammer"
- * approach to force grace period to end quickly.  This consumes
- * significant time on all CPUs, and is thus not recommended for
- * any sort of common-case code.
- *
- * Note that it is illegal to call this function while holding any
- * lock that is acquired by a CPU-hotplug notifier.  Failing to
- * observe this restriction will result in deadlock.
- */
-void synchronize_sched_expedited(void)
-{
-	int snap, trycount = 0;
-
-	smp_mb();  /* ensure prior mod happens before capturing snap. */
-	snap = atomic_read(&synchronize_sched_expedited_count) + 1;
-	get_online_cpus();
-	while (try_stop_cpus(cpu_online_mask,
-			     synchronize_sched_expedited_cpu_stop,
-			     NULL) == -EAGAIN) {
-		put_online_cpus();
-		if (trycount++ < 10)
-			udelay(trycount * num_online_cpus());
-		else {
-			synchronize_sched();
-			return;
-		}
-		if (atomic_read(&synchronize_sched_expedited_count) - snap > 0) {
-			smp_mb(); /* ensure test happens before caller kfree */
-			return;
-		}
-		get_online_cpus();
-	}
-	atomic_inc(&synchronize_sched_expedited_count);
-	smp_mb__after_atomic_inc(); /* ensure post-GP actions seen after GP. */
-	put_online_cpus();
-}
-EXPORT_SYMBOL_GPL(synchronize_sched_expedited);
-
-#endif /* #else #ifndef CONFIG_SMP */
-- 
cgit v1.2.3-71-gd317


From 9833c39400c3e6ee19daeded6910df648741611e Mon Sep 17 00:00:00 2001
From: Mika Westerberg <mika.westerberg@iki.fi>
Date: Fri, 19 Nov 2010 09:29:24 +0100
Subject: ARM: 6485/5: proc/vmcore - allow archs to override
 vmcore_elf_check_arch()

Allow architectures to redefine this macro if needed. This is useful for
example in architectures where 64-bit ELF vmcores are not supported.
Specifying zero vmcore_elf64_check_arch() allows compiler to optimize
away unnecessary parts of parse_crash_elf64_headers().

We also rename the macro to vmcore_elf64_check_arch() to reflect that it
is used for 64-bit vmcores only.

Signed-off-by: Mika Westerberg <mika.westerberg@iki.fi>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 fs/proc/vmcore.c           | 2 +-
 include/linux/crash_dump.h | 9 ++++++++-
 2 files changed, 9 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/fs/proc/vmcore.c b/fs/proc/vmcore.c
index 2367fb3f70bc..74802bc5ded9 100644
--- a/fs/proc/vmcore.c
+++ b/fs/proc/vmcore.c
@@ -499,7 +499,7 @@ static int __init parse_crash_elf64_headers(void)
 	/* Do some basic Verification. */
 	if (memcmp(ehdr.e_ident, ELFMAG, SELFMAG) != 0 ||
 		(ehdr.e_type != ET_CORE) ||
-		!vmcore_elf_check_arch(&ehdr) ||
+		!vmcore_elf64_check_arch(&ehdr) ||
 		ehdr.e_ident[EI_CLASS] != ELFCLASS64 ||
 		ehdr.e_ident[EI_VERSION] != EV_CURRENT ||
 		ehdr.e_version != EV_CURRENT ||
diff --git a/include/linux/crash_dump.h b/include/linux/crash_dump.h
index 0026f267da20..088cd4ace4ef 100644
--- a/include/linux/crash_dump.h
+++ b/include/linux/crash_dump.h
@@ -20,7 +20,14 @@ extern ssize_t copy_oldmem_page(unsigned long, char *, size_t,
 #define vmcore_elf_check_arch_cross(x) 0
 #endif
 
-#define vmcore_elf_check_arch(x) (elf_check_arch(x) || vmcore_elf_check_arch_cross(x))
+/*
+ * Architecture code can redefine this if there are any special checks
+ * needed for 64-bit ELF vmcores. In case of 32-bit only architecture,
+ * this can be set to zero.
+ */
+#ifndef vmcore_elf64_check_arch
+#define vmcore_elf64_check_arch(x) (elf_check_arch(x) || vmcore_elf_check_arch_cross(x))
+#endif
 
 /*
  * is_kdump_kernel() checks whether this kernel is booting after a panic of
-- 
cgit v1.2.3-71-gd317


From 5091faa449ee0b7d73bc296a93bca9540fc51d0a Mon Sep 17 00:00:00 2001
From: Mike Galbraith <efault@gmx.de>
Date: Tue, 30 Nov 2010 14:18:03 +0100
Subject: sched: Add 'autogroup' scheduling feature: automated per session task
 groups

A recurring complaint from CFS users is that parallel kbuild has
a negative impact on desktop interactivity.  This patch
implements an idea from Linus, to automatically create task
groups.  Currently, only per session autogroups are implemented,
but the patch leaves the way open for enhancement.

Implementation: each task's signal struct contains an inherited
pointer to a refcounted autogroup struct containing a task group
pointer, the default for all tasks pointing to the
init_task_group.  When a task calls setsid(), a new task group
is created, the process is moved into the new task group, and a
reference to the preveious task group is dropped.  Child
processes inherit this task group thereafter, and increase it's
refcount.  When the last thread of a process exits, the
process's reference is dropped, such that when the last process
referencing an autogroup exits, the autogroup is destroyed.

At runqueue selection time, IFF a task has no cgroup assignment,
its current autogroup is used.

Autogroup bandwidth is controllable via setting it's nice level
through the proc filesystem:

  cat /proc/<pid>/autogroup

Displays the task's group and the group's nice level.

  echo <nice level> > /proc/<pid>/autogroup

Sets the task group's shares to the weight of nice <level> task.
Setting nice level is rate limited for !admin users due to the
abuse risk of task group locking.

The feature is enabled from boot by default if
CONFIG_SCHED_AUTOGROUP=y is selected, but can be disabled via
the boot option noautogroup, and can also be turned on/off on
the fly via:

  echo [01] > /proc/sys/kernel/sched_autogroup_enabled

... which will automatically move tasks to/from the root task group.

Signed-off-by: Mike Galbraith <efault@gmx.de>
Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Markus Trippelsdorf <markus@trippelsdorf.de>
Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Cc: Paul Turner <pjt@google.com>
Cc: Oleg Nesterov <oleg@redhat.com>
[ Removed the task_group_path() debug code, and fixed !EVENTFD build failure. ]
Signed-off-by: Ingo Molnar <mingo@elte.hu>
LKML-Reference: <1290281700.28711.9.camel@maggy.simson.net>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 Documentation/kernel-parameters.txt |   2 +
 fs/proc/base.c                      |  79 +++++++++++++
 include/linux/sched.h               |  23 ++++
 init/Kconfig                        |  13 ++
 kernel/fork.c                       |   5 +-
 kernel/sched.c                      |  13 +-
 kernel/sched_autogroup.c            | 229 ++++++++++++++++++++++++++++++++++++
 kernel/sched_autogroup.h            |  32 +++++
 kernel/sched_debug.c                |  47 +-------
 kernel/sys.c                        |   4 +-
 kernel/sysctl.c                     |  11 ++
 11 files changed, 409 insertions(+), 49 deletions(-)
 create mode 100644 kernel/sched_autogroup.c
 create mode 100644 kernel/sched_autogroup.h

(limited to 'include/linux')

diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 92e83e53148f..86820a727b0b 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -1622,6 +1622,8 @@ and is between 256 and 4096 characters. It is defined in the file
 	noapic		[SMP,APIC] Tells the kernel to not make use of any
 			IOAPICs that may be present in the system.
 
+	noautogroup	Disable scheduler automatic task group creation.
+
 	nobats		[PPC] Do not use BATs for mapping kernel lowmem
 			on "Classic" PPC cores.
 
diff --git a/fs/proc/base.c b/fs/proc/base.c
index f3d02ca461ec..2fa0ce29b6dc 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -1407,6 +1407,82 @@ static const struct file_operations proc_pid_sched_operations = {
 
 #endif
 
+#ifdef CONFIG_SCHED_AUTOGROUP
+/*
+ * Print out autogroup related information:
+ */
+static int sched_autogroup_show(struct seq_file *m, void *v)
+{
+	struct inode *inode = m->private;
+	struct task_struct *p;
+
+	p = get_proc_task(inode);
+	if (!p)
+		return -ESRCH;
+	proc_sched_autogroup_show_task(p, m);
+
+	put_task_struct(p);
+
+	return 0;
+}
+
+static ssize_t
+sched_autogroup_write(struct file *file, const char __user *buf,
+	    size_t count, loff_t *offset)
+{
+	struct inode *inode = file->f_path.dentry->d_inode;
+	struct task_struct *p;
+	char buffer[PROC_NUMBUF];
+	long nice;
+	int err;
+
+	memset(buffer, 0, sizeof(buffer));
+	if (count > sizeof(buffer) - 1)
+		count = sizeof(buffer) - 1;
+	if (copy_from_user(buffer, buf, count))
+		return -EFAULT;
+
+	err = strict_strtol(strstrip(buffer), 0, &nice);
+	if (err)
+		return -EINVAL;
+
+	p = get_proc_task(inode);
+	if (!p)
+		return -ESRCH;
+
+	err = nice;
+	err = proc_sched_autogroup_set_nice(p, &err);
+	if (err)
+		count = err;
+
+	put_task_struct(p);
+
+	return count;
+}
+
+static int sched_autogroup_open(struct inode *inode, struct file *filp)
+{
+	int ret;
+
+	ret = single_open(filp, sched_autogroup_show, NULL);
+	if (!ret) {
+		struct seq_file *m = filp->private_data;
+
+		m->private = inode;
+	}
+	return ret;
+}
+
+static const struct file_operations proc_pid_sched_autogroup_operations = {
+	.open		= sched_autogroup_open,
+	.read		= seq_read,
+	.write		= sched_autogroup_write,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
+#endif /* CONFIG_SCHED_AUTOGROUP */
+
 static ssize_t comm_write(struct file *file, const char __user *buf,
 				size_t count, loff_t *offset)
 {
@@ -2732,6 +2808,9 @@ static const struct pid_entry tgid_base_stuff[] = {
 	INF("limits",	  S_IRUGO, proc_pid_limits),
 #ifdef CONFIG_SCHED_DEBUG
 	REG("sched",      S_IRUGO|S_IWUSR, proc_pid_sched_operations),
+#endif
+#ifdef CONFIG_SCHED_AUTOGROUP
+	REG("autogroup",  S_IRUGO|S_IWUSR, proc_pid_sched_autogroup_operations),
 #endif
 	REG("comm",      S_IRUGO|S_IWUSR, proc_pid_set_comm_operations),
 #ifdef CONFIG_HAVE_ARCH_TRACEHOOK
diff --git a/include/linux/sched.h b/include/linux/sched.h
index a5b92c70c737..9c2d46da486e 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -509,6 +509,8 @@ struct thread_group_cputimer {
 	spinlock_t lock;
 };
 
+struct autogroup;
+
 /*
  * NOTE! "signal_struct" does not have it's own
  * locking, because a shared signal_struct always
@@ -576,6 +578,9 @@ struct signal_struct {
 
 	struct tty_struct *tty; /* NULL if no tty */
 
+#ifdef CONFIG_SCHED_AUTOGROUP
+	struct autogroup *autogroup;
+#endif
 	/*
 	 * Cumulative resource counters for dead threads in the group,
 	 * and for reaped dead child processes forked by this group.
@@ -1927,6 +1932,24 @@ int sched_rt_handler(struct ctl_table *table, int write,
 
 extern unsigned int sysctl_sched_compat_yield;
 
+#ifdef CONFIG_SCHED_AUTOGROUP
+extern unsigned int sysctl_sched_autogroup_enabled;
+
+extern void sched_autogroup_create_attach(struct task_struct *p);
+extern void sched_autogroup_detach(struct task_struct *p);
+extern void sched_autogroup_fork(struct signal_struct *sig);
+extern void sched_autogroup_exit(struct signal_struct *sig);
+#ifdef CONFIG_PROC_FS
+extern void proc_sched_autogroup_show_task(struct task_struct *p, struct seq_file *m);
+extern int proc_sched_autogroup_set_nice(struct task_struct *p, int *nice);
+#endif
+#else
+static inline void sched_autogroup_create_attach(struct task_struct *p) { }
+static inline void sched_autogroup_detach(struct task_struct *p) { }
+static inline void sched_autogroup_fork(struct signal_struct *sig) { }
+static inline void sched_autogroup_exit(struct signal_struct *sig) { }
+#endif
+
 #ifdef CONFIG_RT_MUTEXES
 extern int rt_mutex_getprio(struct task_struct *p);
 extern void rt_mutex_setprio(struct task_struct *p, int prio);
diff --git a/init/Kconfig b/init/Kconfig
index 88c10468db46..f1bba0a1b051 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -728,6 +728,19 @@ config NET_NS
 
 endif # NAMESPACES
 
+config SCHED_AUTOGROUP
+	bool "Automatic process group scheduling"
+	select EVENTFD
+	select CGROUPS
+	select CGROUP_SCHED
+	select FAIR_GROUP_SCHED
+	help
+	  This option optimizes the scheduler for common desktop workloads by
+	  automatically creating and populating task groups.  This separation
+	  of workloads isolates aggressive CPU burners (like build jobs) from
+	  desktop applications.  Task group autogeneration is currently based
+	  upon task session.
+
 config MM_OWNER
 	bool
 
diff --git a/kernel/fork.c b/kernel/fork.c
index 3b159c5991b7..b6f2475f1e83 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -174,8 +174,10 @@ static inline void free_signal_struct(struct signal_struct *sig)
 
 static inline void put_signal_struct(struct signal_struct *sig)
 {
-	if (atomic_dec_and_test(&sig->sigcnt))
+	if (atomic_dec_and_test(&sig->sigcnt)) {
+		sched_autogroup_exit(sig);
 		free_signal_struct(sig);
+	}
 }
 
 void __put_task_struct(struct task_struct *tsk)
@@ -904,6 +906,7 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk)
 	posix_cpu_timers_init_group(sig);
 
 	tty_audit_fork(sig);
+	sched_autogroup_fork(sig);
 
 	sig->oom_adj = current->signal->oom_adj;
 	sig->oom_score_adj = current->signal->oom_score_adj;
diff --git a/kernel/sched.c b/kernel/sched.c
index 66ef5790d932..b646dad4a40e 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -79,6 +79,7 @@
 
 #include "sched_cpupri.h"
 #include "workqueue_sched.h"
+#include "sched_autogroup.h"
 
 #define CREATE_TRACE_POINTS
 #include <trace/events/sched.h>
@@ -271,6 +272,10 @@ struct task_group {
 	struct task_group *parent;
 	struct list_head siblings;
 	struct list_head children;
+
+#ifdef CONFIG_SCHED_AUTOGROUP
+	struct autogroup *autogroup;
+#endif
 };
 
 #define root_task_group init_task_group
@@ -603,11 +608,14 @@ static inline int cpu_of(struct rq *rq)
  */
 static inline struct task_group *task_group(struct task_struct *p)
 {
+	struct task_group *tg;
 	struct cgroup_subsys_state *css;
 
 	css = task_subsys_state_check(p, cpu_cgroup_subsys_id,
 			lockdep_is_held(&task_rq(p)->lock));
-	return container_of(css, struct task_group, css);
+	tg = container_of(css, struct task_group, css);
+
+	return autogroup_task_group(p, tg);
 }
 
 /* Change a task's cfs_rq and parent entity if it moves across CPUs/groups */
@@ -1869,6 +1877,7 @@ static void sched_irq_time_avg_update(struct rq *rq, u64 curr_irq_time) { }
 #include "sched_idletask.c"
 #include "sched_fair.c"
 #include "sched_rt.c"
+#include "sched_autogroup.c"
 #include "sched_stoptask.c"
 #ifdef CONFIG_SCHED_DEBUG
 # include "sched_debug.c"
@@ -7750,7 +7759,7 @@ void __init sched_init(void)
 #ifdef CONFIG_CGROUP_SCHED
 	list_add(&init_task_group.list, &task_groups);
 	INIT_LIST_HEAD(&init_task_group.children);
-
+	autogroup_init(&init_task);
 #endif /* CONFIG_CGROUP_SCHED */
 
 	for_each_possible_cpu(i) {
diff --git a/kernel/sched_autogroup.c b/kernel/sched_autogroup.c
new file mode 100644
index 000000000000..57a7ac286a02
--- /dev/null
+++ b/kernel/sched_autogroup.c
@@ -0,0 +1,229 @@
+#ifdef CONFIG_SCHED_AUTOGROUP
+
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
+#include <linux/kallsyms.h>
+#include <linux/utsname.h>
+
+unsigned int __read_mostly sysctl_sched_autogroup_enabled = 1;
+static struct autogroup autogroup_default;
+static atomic_t autogroup_seq_nr;
+
+static void autogroup_init(struct task_struct *init_task)
+{
+	autogroup_default.tg = &init_task_group;
+	init_task_group.autogroup = &autogroup_default;
+	kref_init(&autogroup_default.kref);
+	init_rwsem(&autogroup_default.lock);
+	init_task->signal->autogroup = &autogroup_default;
+}
+
+static inline void autogroup_free(struct task_group *tg)
+{
+	kfree(tg->autogroup);
+}
+
+static inline void autogroup_destroy(struct kref *kref)
+{
+	struct autogroup *ag = container_of(kref, struct autogroup, kref);
+
+	sched_destroy_group(ag->tg);
+}
+
+static inline void autogroup_kref_put(struct autogroup *ag)
+{
+	kref_put(&ag->kref, autogroup_destroy);
+}
+
+static inline struct autogroup *autogroup_kref_get(struct autogroup *ag)
+{
+	kref_get(&ag->kref);
+	return ag;
+}
+
+static inline struct autogroup *autogroup_create(void)
+{
+	struct autogroup *ag = kzalloc(sizeof(*ag), GFP_KERNEL);
+	struct task_group *tg;
+
+	if (!ag)
+		goto out_fail;
+
+	tg = sched_create_group(&init_task_group);
+
+	if (IS_ERR(tg))
+		goto out_free;
+
+	kref_init(&ag->kref);
+	init_rwsem(&ag->lock);
+	ag->id = atomic_inc_return(&autogroup_seq_nr);
+	ag->tg = tg;
+	tg->autogroup = ag;
+
+	return ag;
+
+out_free:
+	kfree(ag);
+out_fail:
+	if (printk_ratelimit()) {
+		printk(KERN_WARNING "autogroup_create: %s failure.\n",
+			ag ? "sched_create_group()" : "kmalloc()");
+	}
+
+	return autogroup_kref_get(&autogroup_default);
+}
+
+static inline bool
+task_wants_autogroup(struct task_struct *p, struct task_group *tg)
+{
+	if (tg != &root_task_group)
+		return false;
+
+	if (p->sched_class != &fair_sched_class)
+		return false;
+
+	/*
+	 * We can only assume the task group can't go away on us if
+	 * autogroup_move_group() can see us on ->thread_group list.
+	 */
+	if (p->flags & PF_EXITING)
+		return false;
+
+	return true;
+}
+
+static inline struct task_group *
+autogroup_task_group(struct task_struct *p, struct task_group *tg)
+{
+	int enabled = ACCESS_ONCE(sysctl_sched_autogroup_enabled);
+
+	if (enabled && task_wants_autogroup(p, tg))
+		return p->signal->autogroup->tg;
+
+	return tg;
+}
+
+static void
+autogroup_move_group(struct task_struct *p, struct autogroup *ag)
+{
+	struct autogroup *prev;
+	struct task_struct *t;
+	unsigned long flags;
+
+	BUG_ON(!lock_task_sighand(p, &flags));
+
+	prev = p->signal->autogroup;
+	if (prev == ag) {
+		unlock_task_sighand(p, &flags);
+		return;
+	}
+
+	p->signal->autogroup = autogroup_kref_get(ag);
+
+	t = p;
+	do {
+		sched_move_task(t);
+	} while_each_thread(p, t);
+
+	unlock_task_sighand(p, &flags);
+	autogroup_kref_put(prev);
+}
+
+/* Allocates GFP_KERNEL, cannot be called under any spinlock */
+void sched_autogroup_create_attach(struct task_struct *p)
+{
+	struct autogroup *ag = autogroup_create();
+
+	autogroup_move_group(p, ag);
+	/* drop extra refrence added by autogroup_create() */
+	autogroup_kref_put(ag);
+}
+EXPORT_SYMBOL(sched_autogroup_create_attach);
+
+/* Cannot be called under siglock.  Currently has no users */
+void sched_autogroup_detach(struct task_struct *p)
+{
+	autogroup_move_group(p, &autogroup_default);
+}
+EXPORT_SYMBOL(sched_autogroup_detach);
+
+void sched_autogroup_fork(struct signal_struct *sig)
+{
+	struct task_struct *p = current;
+
+	spin_lock_irq(&p->sighand->siglock);
+	sig->autogroup = autogroup_kref_get(p->signal->autogroup);
+	spin_unlock_irq(&p->sighand->siglock);
+}
+
+void sched_autogroup_exit(struct signal_struct *sig)
+{
+	autogroup_kref_put(sig->autogroup);
+}
+
+static int __init setup_autogroup(char *str)
+{
+	sysctl_sched_autogroup_enabled = 0;
+
+	return 1;
+}
+
+__setup("noautogroup", setup_autogroup);
+
+#ifdef CONFIG_PROC_FS
+
+/* Called with siglock held. */
+int proc_sched_autogroup_set_nice(struct task_struct *p, int *nice)
+{
+	static unsigned long next = INITIAL_JIFFIES;
+	struct autogroup *ag;
+	int err;
+
+	if (*nice < -20 || *nice > 19)
+		return -EINVAL;
+
+	err = security_task_setnice(current, *nice);
+	if (err)
+		return err;
+
+	if (*nice < 0 && !can_nice(current, *nice))
+		return -EPERM;
+
+	/* this is a heavy operation taking global locks.. */
+	if (!capable(CAP_SYS_ADMIN) && time_before(jiffies, next))
+		return -EAGAIN;
+
+	next = HZ / 10 + jiffies;
+	ag = autogroup_kref_get(p->signal->autogroup);
+
+	down_write(&ag->lock);
+	err = sched_group_set_shares(ag->tg, prio_to_weight[*nice + 20]);
+	if (!err)
+		ag->nice = *nice;
+	up_write(&ag->lock);
+
+	autogroup_kref_put(ag);
+
+	return err;
+}
+
+void proc_sched_autogroup_show_task(struct task_struct *p, struct seq_file *m)
+{
+	struct autogroup *ag = autogroup_kref_get(p->signal->autogroup);
+
+	down_read(&ag->lock);
+	seq_printf(m, "/autogroup-%ld nice %d\n", ag->id, ag->nice);
+	up_read(&ag->lock);
+
+	autogroup_kref_put(ag);
+}
+#endif /* CONFIG_PROC_FS */
+
+#ifdef CONFIG_SCHED_DEBUG
+static inline int autogroup_path(struct task_group *tg, char *buf, int buflen)
+{
+	return snprintf(buf, buflen, "%s-%ld", "/autogroup", tg->autogroup->id);
+}
+#endif /* CONFIG_SCHED_DEBUG */
+
+#endif /* CONFIG_SCHED_AUTOGROUP */
diff --git a/kernel/sched_autogroup.h b/kernel/sched_autogroup.h
new file mode 100644
index 000000000000..5358e241cb20
--- /dev/null
+++ b/kernel/sched_autogroup.h
@@ -0,0 +1,32 @@
+#ifdef CONFIG_SCHED_AUTOGROUP
+
+struct autogroup {
+	struct kref		kref;
+	struct task_group	*tg;
+	struct rw_semaphore	lock;
+	unsigned long		id;
+	int			nice;
+};
+
+static inline struct task_group *
+autogroup_task_group(struct task_struct *p, struct task_group *tg);
+
+#else /* !CONFIG_SCHED_AUTOGROUP */
+
+static inline void autogroup_init(struct task_struct *init_task) {  }
+static inline void autogroup_free(struct task_group *tg) { }
+
+static inline struct task_group *
+autogroup_task_group(struct task_struct *p, struct task_group *tg)
+{
+	return tg;
+}
+
+#ifdef CONFIG_SCHED_DEBUG
+static inline int autogroup_path(struct task_group *tg, char *buf, int buflen)
+{
+	return 0;
+}
+#endif
+
+#endif /* CONFIG_SCHED_AUTOGROUP */
diff --git a/kernel/sched_debug.c b/kernel/sched_debug.c
index e95b77414a99..1dfae3d014b5 100644
--- a/kernel/sched_debug.c
+++ b/kernel/sched_debug.c
@@ -54,8 +54,7 @@ static unsigned long nsec_low(unsigned long long nsec)
 #define SPLIT_NS(x) nsec_high(x), nsec_low(x)
 
 #ifdef CONFIG_FAIR_GROUP_SCHED
-static void print_cfs_group_stats(struct seq_file *m, int cpu,
-		struct task_group *tg)
+static void print_cfs_group_stats(struct seq_file *m, int cpu, struct task_group *tg)
 {
 	struct sched_entity *se = tg->se[cpu];
 	if (!se)
@@ -110,16 +109,6 @@ print_task(struct seq_file *m, struct rq *rq, struct task_struct *p)
 		0LL, 0LL, 0LL, 0L, 0LL, 0L, 0LL, 0L);
 #endif
 
-#ifdef CONFIG_CGROUP_SCHED
-	{
-		char path[64];
-
-		rcu_read_lock();
-		cgroup_path(task_group(p)->css.cgroup, path, sizeof(path));
-		rcu_read_unlock();
-		SEQ_printf(m, " %s", path);
-	}
-#endif
 	SEQ_printf(m, "\n");
 }
 
@@ -147,19 +136,6 @@ static void print_rq(struct seq_file *m, struct rq *rq, int rq_cpu)
 	read_unlock_irqrestore(&tasklist_lock, flags);
 }
 
-#if defined(CONFIG_CGROUP_SCHED) && \
-	(defined(CONFIG_FAIR_GROUP_SCHED) || defined(CONFIG_RT_GROUP_SCHED))
-static void task_group_path(struct task_group *tg, char *buf, int buflen)
-{
-	/* may be NULL if the underlying cgroup isn't fully-created yet */
-	if (!tg->css.cgroup) {
-		buf[0] = '\0';
-		return;
-	}
-	cgroup_path(tg->css.cgroup, buf, buflen);
-}
-#endif
-
 void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq)
 {
 	s64 MIN_vruntime = -1, min_vruntime, max_vruntime = -1,
@@ -168,16 +144,7 @@ void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq)
 	struct sched_entity *last;
 	unsigned long flags;
 
-#if defined(CONFIG_CGROUP_SCHED) && defined(CONFIG_FAIR_GROUP_SCHED)
-	char path[128];
-	struct task_group *tg = cfs_rq->tg;
-
-	task_group_path(tg, path, sizeof(path));
-
-	SEQ_printf(m, "\ncfs_rq[%d]:%s\n", cpu, path);
-#else
 	SEQ_printf(m, "\ncfs_rq[%d]:\n", cpu);
-#endif
 	SEQ_printf(m, "  .%-30s: %Ld.%06ld\n", "exec_clock",
 			SPLIT_NS(cfs_rq->exec_clock));
 
@@ -215,7 +182,7 @@ void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq)
 	SEQ_printf(m, "  .%-30s: %ld\n", "load_contrib",
 			cfs_rq->load_contribution);
 	SEQ_printf(m, "  .%-30s: %d\n", "load_tg",
-			atomic_read(&tg->load_weight));
+			atomic_read(&cfs_rq->tg->load_weight));
 #endif
 
 	print_cfs_group_stats(m, cpu, cfs_rq->tg);
@@ -224,17 +191,7 @@ void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq)
 
 void print_rt_rq(struct seq_file *m, int cpu, struct rt_rq *rt_rq)
 {
-#if defined(CONFIG_CGROUP_SCHED) && defined(CONFIG_RT_GROUP_SCHED)
-	char path[128];
-	struct task_group *tg = rt_rq->tg;
-
-	task_group_path(tg, path, sizeof(path));
-
-	SEQ_printf(m, "\nrt_rq[%d]:%s\n", cpu, path);
-#else
 	SEQ_printf(m, "\nrt_rq[%d]:\n", cpu);
-#endif
-
 
 #define P(x) \
 	SEQ_printf(m, "  .%-30s: %Ld\n", #x, (long long)(rt_rq->x))
diff --git a/kernel/sys.c b/kernel/sys.c
index 7f5a0cd296a9..2745dcdb6c6c 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -1080,8 +1080,10 @@ SYSCALL_DEFINE0(setsid)
 	err = session;
 out:
 	write_unlock_irq(&tasklist_lock);
-	if (err > 0)
+	if (err > 0) {
 		proc_sid_connector(group_leader);
+		sched_autogroup_create_attach(group_leader);
+	}
 	return err;
 }
 
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index a00fdefd24ce..121e4fff03d1 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -370,6 +370,17 @@ static struct ctl_table kern_table[] = {
 		.mode		= 0644,
 		.proc_handler	= proc_dointvec,
 	},
+#ifdef CONFIG_SCHED_AUTOGROUP
+	{
+		.procname	= "sched_autogroup_enabled",
+		.data		= &sysctl_sched_autogroup_enabled,
+		.maxlen		= sizeof(unsigned int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec,
+		.extra1		= &zero,
+		.extra2		= &one,
+	},
+#endif
 #ifdef CONFIG_PROVE_LOCKING
 	{
 		.procname	= "prove_locking",
-- 
cgit v1.2.3-71-gd317


From c320c7b7d380e630f595de1236d9d085b035d5b4 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Wed, 20 Oct 2010 12:50:11 -0200
Subject: perf events: Precalculate the header space for PERF_SAMPLE_ fields
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

PERF_SAMPLE_{CALLCHAIN,RAW} have variable lenghts per sample, but the others
can be precalculated, reducing a bit the per sample cost.

Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Frédéric Weisbecker <fweisbec@gmail.com>
Cc: Ian Munsie <imunsie@au1.ibm.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Stephane Eranian <eranian@google.com>
LKML-Reference: <new-submission>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 include/linux/perf_event.h |   2 +
 kernel/perf_event.c        | 150 +++++++++++++++++++++++++++------------------
 2 files changed, 93 insertions(+), 59 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index cbf04cc1e630..adf6d9931643 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -758,6 +758,8 @@ struct perf_event {
 	u64				shadow_ctx_time;
 
 	struct perf_event_attr		attr;
+	u16				header_size;
+	u16				read_size;
 	struct hw_perf_event		hw;
 
 	struct perf_event_context	*ctx;
diff --git a/kernel/perf_event.c b/kernel/perf_event.c
index af1e63f249f3..aede71245e9f 100644
--- a/kernel/perf_event.c
+++ b/kernel/perf_event.c
@@ -312,9 +312,75 @@ list_add_event(struct perf_event *event, struct perf_event_context *ctx)
 		ctx->nr_stat++;
 }
 
+/*
+ * Called at perf_event creation and when events are attached/detached from a
+ * group.
+ */
+static void perf_event__read_size(struct perf_event *event)
+{
+	int entry = sizeof(u64); /* value */
+	int size = 0;
+	int nr = 1;
+
+	if (event->attr.read_format & PERF_FORMAT_TOTAL_TIME_ENABLED)
+		size += sizeof(u64);
+
+	if (event->attr.read_format & PERF_FORMAT_TOTAL_TIME_RUNNING)
+		size += sizeof(u64);
+
+	if (event->attr.read_format & PERF_FORMAT_ID)
+		entry += sizeof(u64);
+
+	if (event->attr.read_format & PERF_FORMAT_GROUP) {
+		nr += event->group_leader->nr_siblings;
+		size += sizeof(u64);
+	}
+
+	size += entry * nr;
+	event->read_size = size;
+}
+
+static void perf_event__header_size(struct perf_event *event)
+{
+	struct perf_sample_data *data;
+	u64 sample_type = event->attr.sample_type;
+	u16 size = 0;
+
+	perf_event__read_size(event);
+
+	if (sample_type & PERF_SAMPLE_IP)
+		size += sizeof(data->ip);
+
+	if (sample_type & PERF_SAMPLE_TID)
+		size += sizeof(data->tid_entry);
+
+	if (sample_type & PERF_SAMPLE_TIME)
+		size += sizeof(data->time);
+
+	if (sample_type & PERF_SAMPLE_ADDR)
+		size += sizeof(data->addr);
+
+	if (sample_type & PERF_SAMPLE_ID)
+		size += sizeof(data->id);
+
+	if (sample_type & PERF_SAMPLE_STREAM_ID)
+		size += sizeof(data->stream_id);
+
+	if (sample_type & PERF_SAMPLE_CPU)
+		size += sizeof(data->cpu_entry);
+
+	if (sample_type & PERF_SAMPLE_PERIOD)
+		size += sizeof(data->period);
+
+	if (sample_type & PERF_SAMPLE_READ)
+		size += event->read_size;
+
+	event->header_size = size;
+}
+
 static void perf_group_attach(struct perf_event *event)
 {
-	struct perf_event *group_leader = event->group_leader;
+	struct perf_event *group_leader = event->group_leader, *pos;
 
 	/*
 	 * We can have double attach due to group movement in perf_event_open.
@@ -333,6 +399,11 @@ static void perf_group_attach(struct perf_event *event)
 
 	list_add_tail(&event->group_entry, &group_leader->sibling_list);
 	group_leader->nr_siblings++;
+
+	perf_event__header_size(group_leader);
+
+	list_for_each_entry(pos, &group_leader->sibling_list, group_entry)
+		perf_event__header_size(pos);
 }
 
 /*
@@ -391,7 +462,7 @@ static void perf_group_detach(struct perf_event *event)
 	if (event->group_leader != event) {
 		list_del_init(&event->group_entry);
 		event->group_leader->nr_siblings--;
-		return;
+		goto out;
 	}
 
 	if (!list_empty(&event->group_entry))
@@ -410,6 +481,12 @@ static void perf_group_detach(struct perf_event *event)
 		/* Inherit group flags from the previous leader */
 		sibling->group_flags = event->group_flags;
 	}
+
+out:
+	perf_event__header_size(event->group_leader);
+
+	list_for_each_entry(tmp, &event->group_leader->sibling_list, group_entry)
+		perf_event__header_size(tmp);
 }
 
 static inline int
@@ -2289,31 +2366,6 @@ static int perf_release(struct inode *inode, struct file *file)
 	return perf_event_release_kernel(event);
 }
 
-static int perf_event_read_size(struct perf_event *event)
-{
-	int entry = sizeof(u64); /* value */
-	int size = 0;
-	int nr = 1;
-
-	if (event->attr.read_format & PERF_FORMAT_TOTAL_TIME_ENABLED)
-		size += sizeof(u64);
-
-	if (event->attr.read_format & PERF_FORMAT_TOTAL_TIME_RUNNING)
-		size += sizeof(u64);
-
-	if (event->attr.read_format & PERF_FORMAT_ID)
-		entry += sizeof(u64);
-
-	if (event->attr.read_format & PERF_FORMAT_GROUP) {
-		nr += event->group_leader->nr_siblings;
-		size += sizeof(u64);
-	}
-
-	size += entry * nr;
-
-	return size;
-}
-
 u64 perf_event_read_value(struct perf_event *event, u64 *enabled, u64 *running)
 {
 	struct perf_event *child;
@@ -2428,7 +2480,7 @@ perf_read_hw(struct perf_event *event, char __user *buf, size_t count)
 	if (event->state == PERF_EVENT_STATE_ERROR)
 		return 0;
 
-	if (count < perf_event_read_size(event))
+	if (count < event->read_size)
 		return -ENOSPC;
 
 	WARN_ON_ONCE(event->ctx->parent_ctx);
@@ -3606,59 +3658,34 @@ void perf_prepare_sample(struct perf_event_header *header,
 	data->type = sample_type;
 
 	header->type = PERF_RECORD_SAMPLE;
-	header->size = sizeof(*header);
+	header->size = sizeof(*header) + event->header_size;
 
 	header->misc = 0;
 	header->misc |= perf_misc_flags(regs);
 
-	if (sample_type & PERF_SAMPLE_IP) {
+	if (sample_type & PERF_SAMPLE_IP)
 		data->ip = perf_instruction_pointer(regs);
 
-		header->size += sizeof(data->ip);
-	}
-
 	if (sample_type & PERF_SAMPLE_TID) {
 		/* namespace issues */
 		data->tid_entry.pid = perf_event_pid(event, current);
 		data->tid_entry.tid = perf_event_tid(event, current);
-
-		header->size += sizeof(data->tid_entry);
 	}
 
-	if (sample_type & PERF_SAMPLE_TIME) {
+	if (sample_type & PERF_SAMPLE_TIME)
 		data->time = perf_clock();
 
-		header->size += sizeof(data->time);
-	}
-
-	if (sample_type & PERF_SAMPLE_ADDR)
-		header->size += sizeof(data->addr);
-
-	if (sample_type & PERF_SAMPLE_ID) {
+	if (sample_type & PERF_SAMPLE_ID)
 		data->id = primary_event_id(event);
 
-		header->size += sizeof(data->id);
-	}
-
-	if (sample_type & PERF_SAMPLE_STREAM_ID) {
+	if (sample_type & PERF_SAMPLE_STREAM_ID)
 		data->stream_id = event->id;
 
-		header->size += sizeof(data->stream_id);
-	}
-
 	if (sample_type & PERF_SAMPLE_CPU) {
 		data->cpu_entry.cpu		= raw_smp_processor_id();
 		data->cpu_entry.reserved	= 0;
-
-		header->size += sizeof(data->cpu_entry);
 	}
 
-	if (sample_type & PERF_SAMPLE_PERIOD)
-		header->size += sizeof(data->period);
-
-	if (sample_type & PERF_SAMPLE_READ)
-		header->size += perf_event_read_size(event);
-
 	if (sample_type & PERF_SAMPLE_CALLCHAIN) {
 		int size = 1;
 
@@ -3726,7 +3753,7 @@ perf_event_read_event(struct perf_event *event,
 		.header = {
 			.type = PERF_RECORD_READ,
 			.misc = 0,
-			.size = sizeof(read_event) + perf_event_read_size(event),
+			.size = sizeof(read_event) + event->read_size,
 		},
 		.pid = perf_event_pid(event, task),
 		.tid = perf_event_tid(event, task),
@@ -5714,6 +5741,11 @@ SYSCALL_DEFINE5(perf_event_open,
 	list_add_tail(&event->owner_entry, &current->perf_event_list);
 	mutex_unlock(&current->perf_event_mutex);
 
+	/*
+	 * Precalculate sample_data sizes
+	 */
+	perf_event__header_size(event);
+
 	/*
 	 * Drop the reference on the group_event after placing the
 	 * new event on the sibling_list. This ensures destruction
-- 
cgit v1.2.3-71-gd317


From 498cb95175c29ed96bf32f30df2d11ec1c7f3879 Mon Sep 17 00:00:00 2001
From: Andrei Emeltchenko <andrei.emeltchenko@nokia.com>
Date: Tue, 30 Nov 2010 14:11:49 -0800
Subject: OMAP: Serial: Define OMAP uart MDR1 reg and remove magic numbers

Define MDR1 register serial definitions used in serial and
bluetooth drivers.
Change magic number to ones defined in serial_reg for omap1/2
serial driver.
Remove redefined MDR1 register definitions in omap-serial driver.

Signed-off-by: Andrei Emeltchenko <andrei.emeltchenko@nokia.com>
Acked-by: G, Manjunath Kondaiah <manjugk@ti.com>
Acked-by: Govindraj.R <govindraj.raja@ti.com>
Acked-by: Greg Kroah-Hartman <gregkh@suse.de>
Signed-off-by: Tony Lindgren <tony@atomide.com>
---
 arch/arm/mach-omap1/serial.c                  |  6 ++++--
 arch/arm/mach-omap2/serial.c                  | 15 +++++++++------
 arch/arm/plat-omap/include/plat/omap-serial.h |  3 ---
 drivers/serial/omap-serial.c                  |  6 +++---
 include/linux/serial_reg.h                    | 12 ++++++++++++
 5 files changed, 28 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm/mach-omap1/serial.c b/arch/arm/mach-omap1/serial.c
index b78d0749f13d..c73d1b77b364 100644
--- a/arch/arm/mach-omap1/serial.c
+++ b/arch/arm/mach-omap1/serial.c
@@ -52,9 +52,11 @@ static inline void omap_serial_outp(struct plat_serial8250_port *p, int offset,
  */
 static void __init omap_serial_reset(struct plat_serial8250_port *p)
 {
-	omap_serial_outp(p, UART_OMAP_MDR1, 0x07);	/* disable UART */
+	omap_serial_outp(p, UART_OMAP_MDR1,
+			UART_OMAP_MDR1_DISABLE);	/* disable UART */
 	omap_serial_outp(p, UART_OMAP_SCR, 0x08);	/* TX watermark */
-	omap_serial_outp(p, UART_OMAP_MDR1, 0x00);	/* enable UART */
+	omap_serial_outp(p, UART_OMAP_MDR1,
+			UART_OMAP_MDR1_16X_MODE);	/* enable UART */
 
 	if (!cpu_is_omap15xx()) {
 		omap_serial_outp(p, UART_OMAP_SYSC, 0x01);
diff --git a/arch/arm/mach-omap2/serial.c b/arch/arm/mach-omap2/serial.c
index d17960a1be25..fa9806250b50 100644
--- a/arch/arm/mach-omap2/serial.c
+++ b/arch/arm/mach-omap2/serial.c
@@ -169,9 +169,9 @@ static inline void serial_write_reg(struct omap_uart_state *uart, int offset,
 
 static inline void __init omap_uart_reset(struct omap_uart_state *uart)
 {
-	serial_write_reg(uart, UART_OMAP_MDR1, 0x07);
+	serial_write_reg(uart, UART_OMAP_MDR1, UART_OMAP_MDR1_DISABLE);
 	serial_write_reg(uart, UART_OMAP_SCR, 0x08);
-	serial_write_reg(uart, UART_OMAP_MDR1, 0x00);
+	serial_write_reg(uart, UART_OMAP_MDR1, UART_OMAP_MDR1_16X_MODE);
 }
 
 #if defined(CONFIG_PM) && defined(CONFIG_ARCH_OMAP3)
@@ -247,9 +247,10 @@ static void omap_uart_restore_context(struct omap_uart_state *uart)
 	uart->context_valid = 0;
 
 	if (uart->errata & UART_ERRATA_i202_MDR1_ACCESS)
-		omap_uart_mdr1_errataset(uart, 0x07, 0xA0);
+		omap_uart_mdr1_errataset(uart, UART_OMAP_MDR1_DISABLE, 0xA0);
 	else
-		serial_write_reg(uart, UART_OMAP_MDR1, 0x7);
+		serial_write_reg(uart, UART_OMAP_MDR1, UART_OMAP_MDR1_DISABLE);
+
 	serial_write_reg(uart, UART_LCR, 0xBF); /* Config B mode */
 	efr = serial_read_reg(uart, UART_EFR);
 	serial_write_reg(uart, UART_EFR, UART_EFR_ECB);
@@ -268,11 +269,13 @@ static void omap_uart_restore_context(struct omap_uart_state *uart)
 	serial_write_reg(uart, UART_OMAP_SCR, uart->scr);
 	serial_write_reg(uart, UART_OMAP_WER, uart->wer);
 	serial_write_reg(uart, UART_OMAP_SYSC, uart->sysc);
+
 	if (uart->errata & UART_ERRATA_i202_MDR1_ACCESS)
-		omap_uart_mdr1_errataset(uart, 0x00, 0xA1);
+		omap_uart_mdr1_errataset(uart, UART_OMAP_MDR1_16X_MODE, 0xA1);
 	else
 		/* UART 16x mode */
-		serial_write_reg(uart, UART_OMAP_MDR1, 0x00);
+		serial_write_reg(uart, UART_OMAP_MDR1,
+				UART_OMAP_MDR1_16X_MODE);
 }
 #else
 static inline void omap_uart_save_context(struct omap_uart_state *uart) {}
diff --git a/arch/arm/plat-omap/include/plat/omap-serial.h b/arch/arm/plat-omap/include/plat/omap-serial.h
index c8dae02f0704..6a1788014611 100644
--- a/arch/arm/plat-omap/include/plat/omap-serial.h
+++ b/arch/arm/plat-omap/include/plat/omap-serial.h
@@ -31,9 +31,6 @@
  */
 #define OMAP_SERIAL_NAME	"ttyO"
 
-#define OMAP_MDR1_DISABLE	0x07
-#define OMAP_MDR1_MODE13X	0x03
-#define OMAP_MDR1_MODE16X	0x00
 #define OMAP_MODE13X_SPEED	230400
 
 /*
diff --git a/drivers/serial/omap-serial.c b/drivers/serial/omap-serial.c
index 14365f72b664..03a96db67de4 100644
--- a/drivers/serial/omap-serial.c
+++ b/drivers/serial/omap-serial.c
@@ -753,7 +753,7 @@ serial_omap_set_termios(struct uart_port *port, struct ktermios *termios,
 
 	/* Protocol, Baud Rate, and Interrupt Settings */
 
-	serial_out(up, UART_OMAP_MDR1, OMAP_MDR1_DISABLE);
+	serial_out(up, UART_OMAP_MDR1, UART_OMAP_MDR1_DISABLE);
 	serial_out(up, UART_LCR, OMAP_UART_LCR_CONF_MDB);
 
 	up->efr = serial_in(up, UART_EFR);
@@ -774,9 +774,9 @@ serial_omap_set_termios(struct uart_port *port, struct ktermios *termios,
 	serial_out(up, UART_LCR, cval);
 
 	if (baud > 230400 && baud != 3000000)
-		serial_out(up, UART_OMAP_MDR1, OMAP_MDR1_MODE13X);
+		serial_out(up, UART_OMAP_MDR1, UART_OMAP_MDR1_13X_MODE);
 	else
-		serial_out(up, UART_OMAP_MDR1, OMAP_MDR1_MODE16X);
+		serial_out(up, UART_OMAP_MDR1, UART_OMAP_MDR1_16X_MODE);
 
 	/* Hardware Flow Control Configuration */
 
diff --git a/include/linux/serial_reg.h b/include/linux/serial_reg.h
index c7a0ce11cd47..6f3823474e6c 100644
--- a/include/linux/serial_reg.h
+++ b/include/linux/serial_reg.h
@@ -341,5 +341,17 @@
 #define UART_OMAP_SYSS		0x16	/* System status register */
 #define UART_OMAP_WER		0x17	/* Wake-up enable register */
 
+/*
+ * These are the definitions for the MDR1 register
+ */
+#define UART_OMAP_MDR1_16X_MODE		0x00	/* UART 16x mode */
+#define UART_OMAP_MDR1_SIR_MODE		0x01	/* SIR mode */
+#define UART_OMAP_MDR1_16X_ABAUD_MODE	0x02	/* UART 16x auto-baud */
+#define UART_OMAP_MDR1_13X_MODE		0x03	/* UART 13x mode */
+#define UART_OMAP_MDR1_MIR_MODE		0x04	/* MIR mode */
+#define UART_OMAP_MDR1_FIR_MODE		0x05	/* FIR mode */
+#define UART_OMAP_MDR1_CIR_MODE		0x06	/* CIR mode */
+#define UART_OMAP_MDR1_DISABLE		0x07	/* Disable (default state) */
+
 #endif /* _LINUX_SERIAL_REG_H */
 
-- 
cgit v1.2.3-71-gd317


From 662b083a87a3489f3f19c6e0651c1b99b0de5df0 Mon Sep 17 00:00:00 2001
From: Andrei Emeltchenko <andrei.emeltchenko@nokia.com>
Date: Tue, 30 Nov 2010 14:11:49 -0800
Subject: omap: Serial: Define register access modes in LCR
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Access to some registers depends on register access mode
Three different modes are available for OMAP (at least)
• Operational mode     LCR_REG[7] = 0x0
• Configuration mode A LCR_REG[7] = 0x1 and LCR_REG[7:0]! = 0xBF
• Configuration mode B LCR_REG[7] = 0x1 and LCR_REG[7:0]  = 0xBF

Define access modes and remove redefinitions and magic numbers
in serial drivers (and later in bluetooth driver).

Signed-off-by: Andrei Emeltchenko <andrei.emeltchenko@nokia.com>
Acked-by: Govindraj.R <govindraj.raja@ti.com>
Acked-by: Greg Kroah-Hartman <gregkh@suse.de>
Signed-off-by: Tony Lindgren <tony@atomide.com>
---
 arch/arm/mach-omap2/serial.c                  | 12 +++++-----
 arch/arm/plat-omap/include/plat/omap-serial.h |  9 -------
 drivers/serial/8250.c                         | 26 ++++++++++----------
 drivers/serial/omap-serial.c                  | 34 +++++++++++++--------------
 include/linux/serial_reg.h                    |  7 ++++++
 5 files changed, 43 insertions(+), 45 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm/mach-omap2/serial.c b/arch/arm/mach-omap2/serial.c
index fa9806250b50..9dc077e2d8af 100644
--- a/arch/arm/mach-omap2/serial.c
+++ b/arch/arm/mach-omap2/serial.c
@@ -219,7 +219,7 @@ static void omap_uart_save_context(struct omap_uart_state *uart)
 		return;
 
 	lcr = serial_read_reg(uart, UART_LCR);
-	serial_write_reg(uart, UART_LCR, 0xBF);
+	serial_write_reg(uart, UART_LCR, UART_LCR_CONF_MODE_B);
 	uart->dll = serial_read_reg(uart, UART_DLL);
 	uart->dlh = serial_read_reg(uart, UART_DLM);
 	serial_write_reg(uart, UART_LCR, lcr);
@@ -227,7 +227,7 @@ static void omap_uart_save_context(struct omap_uart_state *uart)
 	uart->sysc = serial_read_reg(uart, UART_OMAP_SYSC);
 	uart->scr = serial_read_reg(uart, UART_OMAP_SCR);
 	uart->wer = serial_read_reg(uart, UART_OMAP_WER);
-	serial_write_reg(uart, UART_LCR, 0x80);
+	serial_write_reg(uart, UART_LCR, UART_LCR_CONF_MODE_A);
 	uart->mcr = serial_read_reg(uart, UART_MCR);
 	serial_write_reg(uart, UART_LCR, lcr);
 
@@ -251,19 +251,19 @@ static void omap_uart_restore_context(struct omap_uart_state *uart)
 	else
 		serial_write_reg(uart, UART_OMAP_MDR1, UART_OMAP_MDR1_DISABLE);
 
-	serial_write_reg(uart, UART_LCR, 0xBF); /* Config B mode */
+	serial_write_reg(uart, UART_LCR, UART_LCR_CONF_MODE_B);
 	efr = serial_read_reg(uart, UART_EFR);
 	serial_write_reg(uart, UART_EFR, UART_EFR_ECB);
 	serial_write_reg(uart, UART_LCR, 0x0); /* Operational mode */
 	serial_write_reg(uart, UART_IER, 0x0);
-	serial_write_reg(uart, UART_LCR, 0xBF); /* Config B mode */
+	serial_write_reg(uart, UART_LCR, UART_LCR_CONF_MODE_B);
 	serial_write_reg(uart, UART_DLL, uart->dll);
 	serial_write_reg(uart, UART_DLM, uart->dlh);
 	serial_write_reg(uart, UART_LCR, 0x0); /* Operational mode */
 	serial_write_reg(uart, UART_IER, uart->ier);
-	serial_write_reg(uart, UART_LCR, 0x80);
+	serial_write_reg(uart, UART_LCR, UART_LCR_CONF_MODE_A);
 	serial_write_reg(uart, UART_MCR, uart->mcr);
-	serial_write_reg(uart, UART_LCR, 0xBF); /* Config B mode */
+	serial_write_reg(uart, UART_LCR, UART_LCR_CONF_MODE_B);
 	serial_write_reg(uart, UART_EFR, efr);
 	serial_write_reg(uart, UART_LCR, UART_LCR_WLEN8);
 	serial_write_reg(uart, UART_OMAP_SCR, uart->scr);
diff --git a/arch/arm/plat-omap/include/plat/omap-serial.h b/arch/arm/plat-omap/include/plat/omap-serial.h
index 6a1788014611..b3e0bad9b77e 100644
--- a/arch/arm/plat-omap/include/plat/omap-serial.h
+++ b/arch/arm/plat-omap/include/plat/omap-serial.h
@@ -33,15 +33,6 @@
 
 #define OMAP_MODE13X_SPEED	230400
 
-/*
- * LCR = 0XBF: Switch to Configuration Mode B.
- * In configuration mode b allow access
- * to EFR,DLL,DLH.
- * Reference OMAP TRM Chapter 17
- * Section: 1.4.3 Mode Selection
- */
-#define OMAP_UART_LCR_CONF_MDB	0XBF
-
 /* WER = 0x7F
  * Enable module level wakeup in WER reg
  */
diff --git a/drivers/serial/8250.c b/drivers/serial/8250.c
index 4d8e14b7aa93..aaf9907e6014 100644
--- a/drivers/serial/8250.c
+++ b/drivers/serial/8250.c
@@ -653,13 +653,13 @@ static void serial8250_set_sleep(struct uart_8250_port *p, int sleep)
 {
 	if (p->capabilities & UART_CAP_SLEEP) {
 		if (p->capabilities & UART_CAP_EFR) {
-			serial_outp(p, UART_LCR, 0xBF);
+			serial_outp(p, UART_LCR, UART_LCR_CONF_MODE_B);
 			serial_outp(p, UART_EFR, UART_EFR_ECB);
 			serial_outp(p, UART_LCR, 0);
 		}
 		serial_outp(p, UART_IER, sleep ? UART_IERX_SLEEP : 0);
 		if (p->capabilities & UART_CAP_EFR) {
-			serial_outp(p, UART_LCR, 0xBF);
+			serial_outp(p, UART_LCR, UART_LCR_CONF_MODE_B);
 			serial_outp(p, UART_EFR, 0);
 			serial_outp(p, UART_LCR, 0);
 		}
@@ -752,7 +752,7 @@ static int size_fifo(struct uart_8250_port *up)
 	serial_outp(up, UART_FCR, UART_FCR_ENABLE_FIFO |
 		    UART_FCR_CLEAR_RCVR | UART_FCR_CLEAR_XMIT);
 	serial_outp(up, UART_MCR, UART_MCR_LOOP);
-	serial_outp(up, UART_LCR, UART_LCR_DLAB);
+	serial_outp(up, UART_LCR, UART_LCR_CONF_MODE_A);
 	old_dl = serial_dl_read(up);
 	serial_dl_write(up, 0x0001);
 	serial_outp(up, UART_LCR, 0x03);
@@ -764,7 +764,7 @@ static int size_fifo(struct uart_8250_port *up)
 		serial_inp(up, UART_RX);
 	serial_outp(up, UART_FCR, old_fcr);
 	serial_outp(up, UART_MCR, old_mcr);
-	serial_outp(up, UART_LCR, UART_LCR_DLAB);
+	serial_outp(up, UART_LCR, UART_LCR_CONF_MODE_A);
 	serial_dl_write(up, old_dl);
 	serial_outp(up, UART_LCR, old_lcr);
 
@@ -782,7 +782,7 @@ static unsigned int autoconfig_read_divisor_id(struct uart_8250_port *p)
 	unsigned int id;
 
 	old_lcr = serial_inp(p, UART_LCR);
-	serial_outp(p, UART_LCR, UART_LCR_DLAB);
+	serial_outp(p, UART_LCR, UART_LCR_CONF_MODE_A);
 
 	old_dll = serial_inp(p, UART_DLL);
 	old_dlm = serial_inp(p, UART_DLM);
@@ -836,7 +836,7 @@ static void autoconfig_has_efr(struct uart_8250_port *up)
 	 * recommended for new designs).
 	 */
 	up->acr = 0;
-	serial_out(up, UART_LCR, 0xBF);
+	serial_out(up, UART_LCR, UART_LCR_CONF_MODE_B);
 	serial_out(up, UART_EFR, UART_EFR_ECB);
 	serial_out(up, UART_LCR, 0x00);
 	id1 = serial_icr_read(up, UART_ID1);
@@ -945,7 +945,7 @@ static void autoconfig_16550a(struct uart_8250_port *up)
 	 * Check for presence of the EFR when DLAB is set.
 	 * Only ST16C650V1 UARTs pass this test.
 	 */
-	serial_outp(up, UART_LCR, UART_LCR_DLAB);
+	serial_outp(up, UART_LCR, UART_LCR_CONF_MODE_A);
 	if (serial_in(up, UART_EFR) == 0) {
 		serial_outp(up, UART_EFR, 0xA8);
 		if (serial_in(up, UART_EFR) != 0) {
@@ -963,7 +963,7 @@ static void autoconfig_16550a(struct uart_8250_port *up)
 	 * Maybe it requires 0xbf to be written to the LCR.
 	 * (other ST16C650V2 UARTs, TI16C752A, etc)
 	 */
-	serial_outp(up, UART_LCR, 0xBF);
+	serial_outp(up, UART_LCR, UART_LCR_CONF_MODE_B);
 	if (serial_in(up, UART_EFR) == 0 && !broken_efr(up)) {
 		DEBUG_AUTOCONF("EFRv2 ");
 		autoconfig_has_efr(up);
@@ -1024,7 +1024,7 @@ static void autoconfig_16550a(struct uart_8250_port *up)
 	serial_outp(up, UART_FCR, UART_FCR_ENABLE_FIFO | UART_FCR7_64BYTE);
 	status1 = serial_in(up, UART_IIR) >> 5;
 	serial_outp(up, UART_FCR, UART_FCR_ENABLE_FIFO);
-	serial_outp(up, UART_LCR, UART_LCR_DLAB);
+	serial_outp(up, UART_LCR, UART_LCR_CONF_MODE_A);
 	serial_outp(up, UART_FCR, UART_FCR_ENABLE_FIFO | UART_FCR7_64BYTE);
 	status2 = serial_in(up, UART_IIR) >> 5;
 	serial_outp(up, UART_FCR, UART_FCR_ENABLE_FIFO);
@@ -1183,7 +1183,7 @@ static void autoconfig(struct uart_8250_port *up, unsigned int probeflags)
 	 * We also initialise the EFR (if any) to zero for later.  The
 	 * EFR occupies the same register location as the FCR and IIR.
 	 */
-	serial_outp(up, UART_LCR, 0xBF);
+	serial_outp(up, UART_LCR, UART_LCR_CONF_MODE_B);
 	serial_outp(up, UART_EFR, 0);
 	serial_outp(up, UART_LCR, 0);
 
@@ -1952,7 +1952,7 @@ static int serial8250_startup(struct uart_port *port)
 	if (up->port.type == PORT_16C950) {
 		/* Wake up and initialize UART */
 		up->acr = 0;
-		serial_outp(up, UART_LCR, 0xBF);
+		serial_outp(up, UART_LCR, UART_LCR_CONF_MODE_B);
 		serial_outp(up, UART_EFR, UART_EFR_ECB);
 		serial_outp(up, UART_IER, 0);
 		serial_outp(up, UART_LCR, 0);
@@ -2002,7 +2002,7 @@ static int serial8250_startup(struct uart_port *port)
 	if (up->port.type == PORT_16850) {
 		unsigned char fctr;
 
-		serial_outp(up, UART_LCR, 0xbf);
+		serial_outp(up, UART_LCR, UART_LCR_CONF_MODE_B);
 
 		fctr = serial_inp(up, UART_FCTR) & ~(UART_FCTR_RX|UART_FCTR_TX);
 		serial_outp(up, UART_FCTR, fctr | UART_FCTR_TRGD | UART_FCTR_RX);
@@ -2363,7 +2363,7 @@ serial8250_do_set_termios(struct uart_port *port, struct ktermios *termios,
 		if (termios->c_cflag & CRTSCTS)
 			efr |= UART_EFR_CTS;
 
-		serial_outp(up, UART_LCR, 0xBF);
+		serial_outp(up, UART_LCR, UART_LCR_CONF_MODE_B);
 		serial_outp(up, UART_EFR, efr);
 	}
 
diff --git a/drivers/serial/omap-serial.c b/drivers/serial/omap-serial.c
index 03a96db67de4..1201eff1831e 100644
--- a/drivers/serial/omap-serial.c
+++ b/drivers/serial/omap-serial.c
@@ -570,7 +570,7 @@ serial_omap_configure_xonxoff
 	unsigned char efr = 0;
 
 	up->lcr = serial_in(up, UART_LCR);
-	serial_out(up, UART_LCR, OMAP_UART_LCR_CONF_MDB);
+	serial_out(up, UART_LCR, UART_LCR_CONF_MODE_B);
 	up->efr = serial_in(up, UART_EFR);
 	serial_out(up, UART_EFR, up->efr & ~UART_EFR_ECB);
 
@@ -598,7 +598,7 @@ serial_omap_configure_xonxoff
 		efr |= OMAP_UART_SW_RX;
 
 	serial_out(up, UART_EFR, up->efr | UART_EFR_ECB);
-	serial_out(up, UART_LCR, UART_LCR_DLAB);
+	serial_out(up, UART_LCR, UART_LCR_CONF_MODE_A);
 
 	up->mcr = serial_in(up, UART_MCR);
 
@@ -612,14 +612,14 @@ serial_omap_configure_xonxoff
 		up->mcr |= UART_MCR_XONANY;
 
 	serial_out(up, UART_MCR, up->mcr | UART_MCR_TCRTLR);
-	serial_out(up, UART_LCR, OMAP_UART_LCR_CONF_MDB);
+	serial_out(up, UART_LCR, UART_LCR_CONF_MODE_B);
 	serial_out(up, UART_TI752_TCR, OMAP_UART_TCR_TRIG);
 	/* Enable special char function UARTi.EFR_REG[5] and
 	 * load the new software flow control mode IXON or IXOFF
 	 * and restore the UARTi.EFR_REG[4] ENHANCED_EN value.
 	 */
 	serial_out(up, UART_EFR, efr | UART_EFR_SCD);
-	serial_out(up, UART_LCR, UART_LCR_DLAB);
+	serial_out(up, UART_LCR, UART_LCR_CONF_MODE_A);
 
 	serial_out(up, UART_MCR, up->mcr & ~UART_MCR_TCRTLR);
 	serial_out(up, UART_LCR, up->lcr);
@@ -724,22 +724,22 @@ serial_omap_set_termios(struct uart_port *port, struct ktermios *termios,
 	 * baud clock is not running
 	 * DLL_REG and DLH_REG set to 0.
 	 */
-	serial_out(up, UART_LCR, UART_LCR_DLAB);
+	serial_out(up, UART_LCR, UART_LCR_CONF_MODE_A);
 	serial_out(up, UART_DLL, 0);
 	serial_out(up, UART_DLM, 0);
 	serial_out(up, UART_LCR, 0);
 
-	serial_out(up, UART_LCR, OMAP_UART_LCR_CONF_MDB);
+	serial_out(up, UART_LCR, UART_LCR_CONF_MODE_B);
 
 	up->efr = serial_in(up, UART_EFR);
 	serial_out(up, UART_EFR, up->efr | UART_EFR_ECB);
 
-	serial_out(up, UART_LCR, UART_LCR_DLAB);
+	serial_out(up, UART_LCR, UART_LCR_CONF_MODE_A);
 	up->mcr = serial_in(up, UART_MCR);
 	serial_out(up, UART_MCR, up->mcr | UART_MCR_TCRTLR);
 	/* FIFO ENABLE, DMA MODE */
 	serial_out(up, UART_FCR, up->fcr);
-	serial_out(up, UART_LCR, OMAP_UART_LCR_CONF_MDB);
+	serial_out(up, UART_LCR, UART_LCR_CONF_MODE_B);
 
 	if (up->use_dma) {
 		serial_out(up, UART_TI752_TLR, 0);
@@ -748,27 +748,27 @@ serial_omap_set_termios(struct uart_port *port, struct ktermios *termios,
 	}
 
 	serial_out(up, UART_EFR, up->efr);
-	serial_out(up, UART_LCR, UART_LCR_DLAB);
+	serial_out(up, UART_LCR, UART_LCR_CONF_MODE_A);
 	serial_out(up, UART_MCR, up->mcr);
 
 	/* Protocol, Baud Rate, and Interrupt Settings */
 
 	serial_out(up, UART_OMAP_MDR1, UART_OMAP_MDR1_DISABLE);
-	serial_out(up, UART_LCR, OMAP_UART_LCR_CONF_MDB);
+	serial_out(up, UART_LCR, UART_LCR_CONF_MODE_B);
 
 	up->efr = serial_in(up, UART_EFR);
 	serial_out(up, UART_EFR, up->efr | UART_EFR_ECB);
 
 	serial_out(up, UART_LCR, 0);
 	serial_out(up, UART_IER, 0);
-	serial_out(up, UART_LCR, OMAP_UART_LCR_CONF_MDB);
+	serial_out(up, UART_LCR, UART_LCR_CONF_MODE_B);
 
 	serial_out(up, UART_DLL, quot & 0xff);          /* LS of divisor */
 	serial_out(up, UART_DLM, quot >> 8);            /* MS of divisor */
 
 	serial_out(up, UART_LCR, 0);
 	serial_out(up, UART_IER, up->ier);
-	serial_out(up, UART_LCR, OMAP_UART_LCR_CONF_MDB);
+	serial_out(up, UART_LCR, UART_LCR_CONF_MODE_B);
 
 	serial_out(up, UART_EFR, up->efr);
 	serial_out(up, UART_LCR, cval);
@@ -782,18 +782,18 @@ serial_omap_set_termios(struct uart_port *port, struct ktermios *termios,
 
 	if (termios->c_cflag & CRTSCTS) {
 		efr |= (UART_EFR_CTS | UART_EFR_RTS);
-		serial_out(up, UART_LCR, UART_LCR_DLAB);
+		serial_out(up, UART_LCR, UART_LCR_CONF_MODE_A);
 
 		up->mcr = serial_in(up, UART_MCR);
 		serial_out(up, UART_MCR, up->mcr | UART_MCR_TCRTLR);
 
-		serial_out(up, UART_LCR, OMAP_UART_LCR_CONF_MDB);
+		serial_out(up, UART_LCR, UART_LCR_CONF_MODE_B);
 		up->efr = serial_in(up, UART_EFR);
 		serial_out(up, UART_EFR, up->efr | UART_EFR_ECB);
 
 		serial_out(up, UART_TI752_TCR, OMAP_UART_TCR_TRIG);
 		serial_out(up, UART_EFR, efr); /* Enable AUTORTS and AUTOCTS */
-		serial_out(up, UART_LCR, UART_LCR_DLAB);
+		serial_out(up, UART_LCR, UART_LCR_CONF_MODE_A);
 		serial_out(up, UART_MCR, up->mcr | UART_MCR_RTS);
 		serial_out(up, UART_LCR, cval);
 	}
@@ -815,13 +815,13 @@ serial_omap_pm(struct uart_port *port, unsigned int state,
 	unsigned char efr;
 
 	dev_dbg(up->port.dev, "serial_omap_pm+%d\n", up->pdev->id);
-	serial_out(up, UART_LCR, OMAP_UART_LCR_CONF_MDB);
+	serial_out(up, UART_LCR, UART_LCR_CONF_MODE_B);
 	efr = serial_in(up, UART_EFR);
 	serial_out(up, UART_EFR, efr | UART_EFR_ECB);
 	serial_out(up, UART_LCR, 0);
 
 	serial_out(up, UART_IER, (state != 0) ? UART_IERX_SLEEP : 0);
-	serial_out(up, UART_LCR, OMAP_UART_LCR_CONF_MDB);
+	serial_out(up, UART_LCR, UART_LCR_CONF_MODE_B);
 	serial_out(up, UART_EFR, efr);
 	serial_out(up, UART_LCR, 0);
 	/* Enable module level wake up */
diff --git a/include/linux/serial_reg.h b/include/linux/serial_reg.h
index 6f3823474e6c..3ecb71a9e505 100644
--- a/include/linux/serial_reg.h
+++ b/include/linux/serial_reg.h
@@ -99,6 +99,13 @@
 #define UART_LCR_WLEN7		0x02 /* Wordlength: 7 bits */
 #define UART_LCR_WLEN8		0x03 /* Wordlength: 8 bits */
 
+/*
+ * Access to some registers depends on register access / configuration
+ * mode.
+ */
+#define UART_LCR_CONF_MODE_A	UART_LCR_DLAB	/* Configutation mode A */
+#define UART_LCR_CONF_MODE_B	0xBF		/* Configutation mode B */
+
 #define UART_MCR	4	/* Out: Modem Control Register */
 #define UART_MCR_CLKSEL		0x80 /* Divide clock by 4 (TI16C752, EFR[4]=1) */
 #define UART_MCR_TCRTLR		0x40 /* Access TCR/TLR (TI16C752, EFR[4]=1) */
-- 
cgit v1.2.3-71-gd317


From ad9c2b048b605fbc8d50526e330b88abdd631ab2 Mon Sep 17 00:00:00 2001
From: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Date: Tue, 30 Nov 2010 11:06:47 +0900
Subject: security: Fix comment of security_key_permission

Comment for return value of security_key_permission() has been wrong
since it was added in 2.6.15.

Signed-off-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Signed-off-by: James Morris <jmorris@namei.org>
---
 include/linux/security.h | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/security.h b/include/linux/security.h
index fd4d55fb8845..e7d89b0c1fd8 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -1058,8 +1058,7 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts)
  *	@cred points to the credentials to provide the context against which to
  *	evaluate the security data on the key.
  *	@perm describes the combination of permissions required of this key.
- *	Return 1 if permission granted, 0 if permission denied and -ve it the
- *	normal permissions model should be effected.
+ *	Return 0 if permission is granted, -ve error otherwise.
  * @key_getsecurity:
  *	Get a textual representation of the security context attached to a key
  *	for the purposes of honouring KEYCTL_GETSECURITY.  This function
-- 
cgit v1.2.3-71-gd317


From c41ab6a1b9028de33e74101cb0aae13098a56fdb Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Mon, 29 Nov 2010 15:47:09 -0500
Subject: flex_array: fix flex_array_put_ptr macro to be valid C
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Using flex_array_put_ptr() results in a compile error "error: lvalue
required as unary ‘&’ operand"  fix the casting order to fix this.

Signed-off-by: Eric Paris <eparis@redhat.com>
---
 include/linux/flex_array.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/flex_array.h b/include/linux/flex_array.h
index 631b77f2ac70..70e4efabe0fb 100644
--- a/include/linux/flex_array.h
+++ b/include/linux/flex_array.h
@@ -71,7 +71,7 @@ void *flex_array_get(struct flex_array *fa, unsigned int element_nr);
 int flex_array_shrink(struct flex_array *fa);
 
 #define flex_array_put_ptr(fa, nr, src, gfp) \
-	flex_array_put(fa, nr, &(void *)(src), gfp)
+	flex_array_put(fa, nr, (void *)&(src), gfp)
 
 void *flex_array_get_ptr(struct flex_array *fa, unsigned int element_nr);
 
-- 
cgit v1.2.3-71-gd317


From 7fc56f0d9908fe140a01387d59954e3d0a2e7744 Mon Sep 17 00:00:00 2001
From: Luo Andy <yifei.luo@intel.com>
Date: Tue, 23 Nov 2010 10:41:21 +0800
Subject: usb: gadget: langwell_udc: add usb test mode support

This patch adds test mode support for Langwell gadget driver.

Signed-off-by: Henry Yuan <hang.yuan@intel.com>
Signed-off-by: Andy Luo <yifei.luo@intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/usb/gadget/langwell_udc.c | 23 +++++++++++++++++++++++
 include/linux/usb/ch9.h           | 10 ++++++++++
 2 files changed, 33 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/usb/gadget/langwell_udc.c b/drivers/usb/gadget/langwell_udc.c
index b8ec954c0692..777972454e3e 100644
--- a/drivers/usb/gadget/langwell_udc.c
+++ b/drivers/usb/gadget/langwell_udc.c
@@ -2225,6 +2225,7 @@ static void handle_setup_packet(struct langwell_udc *dev,
 	u16	wValue = le16_to_cpu(setup->wValue);
 	u16	wIndex = le16_to_cpu(setup->wIndex);
 	u16	wLength = le16_to_cpu(setup->wLength);
+	u32	portsc1;
 
 	dev_vdbg(&dev->pdev->dev, "---> %s()\n", __func__);
 
@@ -2313,6 +2314,28 @@ static void handle_setup_packet(struct langwell_udc *dev,
 					dev->dev_status &= ~(1 << wValue);
 				}
 				break;
+			case USB_DEVICE_TEST_MODE:
+				dev_dbg(&dev->pdev->dev, "SETUP: TEST MODE\n");
+				if ((wIndex & 0xff) ||
+					(dev->gadget.speed != USB_SPEED_HIGH))
+					ep0_stall(dev);
+
+				switch (wIndex >> 8) {
+				case TEST_J:
+				case TEST_K:
+				case TEST_SE0_NAK:
+				case TEST_PACKET:
+				case TEST_FORCE_EN:
+					if (prime_status_phase(dev, EP_DIR_IN))
+						ep0_stall(dev);
+					portsc1 = readl(&dev->op_regs->portsc1);
+					portsc1 |= (wIndex & 0xf00) << 8;
+					writel(portsc1, &dev->op_regs->portsc1);
+					goto end;
+				default:
+					rc = -EOPNOTSUPP;
+				}
+				break;
 			default:
 				rc = -EOPNOTSUPP;
 				break;
diff --git a/include/linux/usb/ch9.h b/include/linux/usb/ch9.h
index f917bbbc8901..ab461948b579 100644
--- a/include/linux/usb/ch9.h
+++ b/include/linux/usb/ch9.h
@@ -123,6 +123,16 @@
 #define USB_DEVICE_A_ALT_HNP_SUPPORT	5	/* (otg) other RH port does */
 #define USB_DEVICE_DEBUG_MODE		6	/* (special devices only) */
 
+/*
+ * Test Mode Selectors
+ * See USB 2.0 spec Table 9-7
+ */
+#define	TEST_J		1
+#define	TEST_K		2
+#define	TEST_SE0_NAK	3
+#define	TEST_PACKET	4
+#define	TEST_FORCE_EN	5
+
 /*
  * New Feature Selectors as added by USB 3.0
  * See USB 3.0 spec Table 9-6
-- 
cgit v1.2.3-71-gd317


From b029ffafe89cf4b97cf39e0225a5205cbbf9e02f Mon Sep 17 00:00:00 2001
From: Hemanth V <hemanthv@ti.com>
Date: Tue, 30 Nov 2010 23:03:54 -0800
Subject: Input: add CMA3000 accelerometer driver

Add support for CMA3000 Tri-axis accelerometer, which supports Motion
detect, Measurement and Free fall modes. CMA3000 supports both I2C/SPI
bus for communication, currently the driver supports I2C based
communication.

Signed-off-by: Hemanth V <hemanthv@ti.com>
Reviewed-by: Jonathan Cameron <jic23@cam.ac.uk>
Reviewed-by: Sergio Aguirre <saaguirre@ti.com>
Reviewed-by: Shubhrajyoti <Shubhrajyoti@ti.com>
Signed-off-by: Dmitry Torokhov <dtor@mail.ru>
---
 Documentation/input/cma3000_d0x.txt  | 115 ++++++++++
 drivers/input/misc/Kconfig           |  24 +++
 drivers/input/misc/Makefile          |   2 +
 drivers/input/misc/cma3000_d0x.c     | 398 +++++++++++++++++++++++++++++++++++
 drivers/input/misc/cma3000_d0x.h     |  42 ++++
 drivers/input/misc/cma3000_d0x_i2c.c | 141 +++++++++++++
 include/linux/input/cma3000.h        |  59 ++++++
 7 files changed, 781 insertions(+)
 create mode 100644 Documentation/input/cma3000_d0x.txt
 create mode 100644 drivers/input/misc/cma3000_d0x.c
 create mode 100644 drivers/input/misc/cma3000_d0x.h
 create mode 100644 drivers/input/misc/cma3000_d0x_i2c.c
 create mode 100644 include/linux/input/cma3000.h

(limited to 'include/linux')

diff --git a/Documentation/input/cma3000_d0x.txt b/Documentation/input/cma3000_d0x.txt
new file mode 100644
index 000000000000..29d088db4afd
--- /dev/null
+++ b/Documentation/input/cma3000_d0x.txt
@@ -0,0 +1,115 @@
+Kernel driver for CMA3000-D0x
+============================
+
+Supported chips:
+* VTI CMA3000-D0x
+Datasheet:
+  CMA3000-D0X Product Family Specification 8281000A.02.pdf
+  <http://www.vti.fi/en/>
+
+Author: Hemanth V <hemanthv@ti.com>
+
+
+Description
+-----------
+CMA3000 Tri-axis accelerometer supports Motion detect, Measurement and
+Free fall modes.
+
+Motion Detect Mode: Its the low power mode where interrupts are generated only
+when motion exceeds the defined thresholds.
+
+Measurement Mode: This mode is used to read the acceleration data on X,Y,Z
+axis and supports 400, 100, 40 Hz sample frequency.
+
+Free fall Mode: This mode is intended to save system resources.
+
+Threshold values: Chip supports defining threshold values for above modes
+which includes time and g value. Refer product specifications for more details.
+
+CMA3000 chip supports mutually exclusive I2C and SPI interfaces for
+communication, currently the driver supports I2C based communication only.
+Initial configuration for bus mode is set in non volatile memory and can later
+be modified through bus interface command.
+
+Driver reports acceleration data through input subsystem. It generates ABS_MISC
+event with value 1 when free fall is detected.
+
+Platform data need to be configured for initial default values.
+
+Platform Data
+-------------
+fuzz_x: Noise on X Axis
+
+fuzz_y: Noise on Y Axis
+
+fuzz_z: Noise on Z Axis
+
+g_range: G range in milli g i.e 2000 or 8000
+
+mode: Default Operating mode
+
+mdthr: Motion detect g range threshold value
+
+mdfftmr: Motion detect and free fall time threshold value
+
+ffthr: Free fall g range threshold value
+
+Input Interface
+--------------
+Input driver version is 1.0.0
+Input device ID: bus 0x18 vendor 0x0 product 0x0 version 0x0
+Input device name: "cma3000-accelerometer"
+Supported events:
+  Event type 0 (Sync)
+  Event type 3 (Absolute)
+    Event code 0 (X)
+      Value     47
+      Min    -8000
+      Max     8000
+      Fuzz     200
+    Event code 1 (Y)
+      Value    -28
+      Min    -8000
+      Max     8000
+      Fuzz     200
+    Event code 2 (Z)
+      Value    905
+      Min    -8000
+      Max     8000
+      Fuzz     200
+    Event code 40 (Misc)
+      Value      0
+      Min        0
+      Max        1
+  Event type 4 (Misc)
+
+
+Register/Platform parameters Description
+----------------------------------------
+
+mode:
+	0: power down mode
+	1: 100 Hz Measurement mode
+	2: 400 Hz Measurement mode
+	3: 40 Hz Measurement mode
+	4: Motion Detect mode (default)
+	5: 100 Hz Free fall mode
+	6: 40 Hz Free fall mode
+	7: Power off mode
+
+grange:
+	2000: 2000 mg or 2G Range
+	8000: 8000 mg or 8G Range
+
+mdthr:
+	X: X * 71mg (8G Range)
+	X: X * 18mg (2G Range)
+
+mdfftmr:
+	X: (X & 0x70) * 100 ms (MDTMR)
+	   (X & 0x0F) * 2.5 ms (FFTMR 400 Hz)
+	   (X & 0x0F) * 10 ms  (FFTMR 100 Hz)
+
+ffthr:
+       X: (X >> 2) * 18mg (2G Range)
+       X: (X & 0x0F) * 71 mg (8G Range)
diff --git a/drivers/input/misc/Kconfig b/drivers/input/misc/Kconfig
index b99b8cbde02f..f0d90172a65f 100644
--- a/drivers/input/misc/Kconfig
+++ b/drivers/input/misc/Kconfig
@@ -448,4 +448,28 @@ config INPUT_ADXL34X_SPI
 	  To compile this driver as a module, choose M here: the
 	  module will be called adxl34x-spi.
 
+config INPUT_CMA3000
+	tristate "VTI CMA3000 Tri-axis accelerometer"
+	help
+	  Say Y here if you want to use VTI CMA3000_D0x Accelerometer
+	  driver
+
+	  This driver currently only supports I2C interface to the
+	  controller. Also select the I2C method.
+
+	  If unsure, say N
+
+	  To compile this driver as a module, choose M here: the
+	  module will be called cma3000_d0x.
+
+config INPUT_CMA3000_I2C
+	tristate "Support I2C bus connection"
+	depends on INPUT_CMA3000 && I2C
+	help
+	  Say Y here if you want to use VTI CMA3000_D0x Accelerometer
+	  through I2C interface.
+
+	  To compile this driver as a module, choose M here: the
+	  module will be called cma3000_d0x_i2c.
+
 endif
diff --git a/drivers/input/misc/Makefile b/drivers/input/misc/Makefile
index 1fe1f6c8b737..35bcfe46555e 100644
--- a/drivers/input/misc/Makefile
+++ b/drivers/input/misc/Makefile
@@ -18,6 +18,8 @@ obj-$(CONFIG_INPUT_ATI_REMOTE2)		+= ati_remote2.o
 obj-$(CONFIG_INPUT_ATLAS_BTNS)		+= atlas_btns.o
 obj-$(CONFIG_INPUT_BFIN_ROTARY)		+= bfin_rotary.o
 obj-$(CONFIG_INPUT_CM109)		+= cm109.o
+obj-$(CONFIG_INPUT_CMA3000)		+= cma3000_d0x.o
+obj-$(CONFIG_INPUT_CMA3000_I2C)		+= cma3000_d0x_i2c.o
 obj-$(CONFIG_INPUT_COBALT_BTNS)		+= cobalt_btns.o
 obj-$(CONFIG_INPUT_DM355EVM)		+= dm355evm_keys.o
 obj-$(CONFIG_HP_SDC_RTC)		+= hp_sdc_rtc.o
diff --git a/drivers/input/misc/cma3000_d0x.c b/drivers/input/misc/cma3000_d0x.c
new file mode 100644
index 000000000000..1633b6342267
--- /dev/null
+++ b/drivers/input/misc/cma3000_d0x.c
@@ -0,0 +1,398 @@
+/*
+ * VTI CMA3000_D0x Accelerometer driver
+ *
+ * Copyright (C) 2010 Texas Instruments
+ * Author: Hemanth V <hemanthv@ti.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/types.h>
+#include <linux/interrupt.h>
+#include <linux/delay.h>
+#include <linux/slab.h>
+#include <linux/input.h>
+#include <linux/input/cma3000.h>
+
+#include "cma3000_d0x.h"
+
+#define CMA3000_WHOAMI      0x00
+#define CMA3000_REVID       0x01
+#define CMA3000_CTRL        0x02
+#define CMA3000_STATUS      0x03
+#define CMA3000_RSTR        0x04
+#define CMA3000_INTSTATUS   0x05
+#define CMA3000_DOUTX       0x06
+#define CMA3000_DOUTY       0x07
+#define CMA3000_DOUTZ       0x08
+#define CMA3000_MDTHR       0x09
+#define CMA3000_MDFFTMR     0x0A
+#define CMA3000_FFTHR       0x0B
+
+#define CMA3000_RANGE2G    (1 << 7)
+#define CMA3000_RANGE8G    (0 << 7)
+#define CMA3000_BUSI2C     (0 << 4)
+#define CMA3000_MODEMASK   (7 << 1)
+#define CMA3000_GRANGEMASK (1 << 7)
+
+#define CMA3000_STATUS_PERR    1
+#define CMA3000_INTSTATUS_FFDET (1 << 2)
+
+/* Settling time delay in ms */
+#define CMA3000_SETDELAY    30
+
+/* Delay for clearing interrupt in us */
+#define CMA3000_INTDELAY    44
+
+
+/*
+ * Bit weights in mg for bit 0, other bits need
+ * multipy factor 2^n. Eight bit is the sign bit.
+ */
+#define BIT_TO_2G  18
+#define BIT_TO_8G  71
+
+struct cma3000_accl_data {
+	const struct cma3000_bus_ops *bus_ops;
+	const struct cma3000_platform_data *pdata;
+
+	struct device *dev;
+	struct input_dev *input_dev;
+
+	int bit_to_mg;
+	int irq;
+
+	int g_range;
+	u8 mode;
+
+	struct mutex mutex;
+	bool opened;
+	bool suspended;
+};
+
+#define CMA3000_READ(data, reg, msg) \
+	(data->bus_ops->read(data->dev, reg, msg))
+#define CMA3000_SET(data, reg, val, msg) \
+	((data)->bus_ops->write(data->dev, reg, val, msg))
+
+/*
+ * Conversion for each of the eight modes to g, depending
+ * on G range i.e 2G or 8G. Some modes always operate in
+ * 8G.
+ */
+
+static int mode_to_mg[8][2] = {
+	{ 0, 0 },
+	{ BIT_TO_8G, BIT_TO_2G },
+	{ BIT_TO_8G, BIT_TO_2G },
+	{ BIT_TO_8G, BIT_TO_8G },
+	{ BIT_TO_8G, BIT_TO_8G },
+	{ BIT_TO_8G, BIT_TO_2G },
+	{ BIT_TO_8G, BIT_TO_2G },
+	{ 0, 0},
+};
+
+static void decode_mg(struct cma3000_accl_data *data, int *datax,
+				int *datay, int *dataz)
+{
+	/* Data in 2's complement, convert to mg */
+	*datax = ((s8)*datax) * data->bit_to_mg;
+	*datay = ((s8)*datay) * data->bit_to_mg;
+	*dataz = ((s8)*dataz) * data->bit_to_mg;
+}
+
+static irqreturn_t cma3000_thread_irq(int irq, void *dev_id)
+{
+	struct cma3000_accl_data *data = dev_id;
+	int datax, datay, dataz;
+	u8 ctrl, mode, range, intr_status;
+
+	intr_status = CMA3000_READ(data, CMA3000_INTSTATUS, "interrupt status");
+	if (intr_status < 0)
+		return IRQ_NONE;
+
+	/* Check if free fall is detected, report immediately */
+	if (intr_status & CMA3000_INTSTATUS_FFDET) {
+		input_report_abs(data->input_dev, ABS_MISC, 1);
+		input_sync(data->input_dev);
+	} else {
+		input_report_abs(data->input_dev, ABS_MISC, 0);
+	}
+
+	datax = CMA3000_READ(data, CMA3000_DOUTX, "X");
+	datay = CMA3000_READ(data, CMA3000_DOUTY, "Y");
+	dataz = CMA3000_READ(data, CMA3000_DOUTZ, "Z");
+
+	ctrl = CMA3000_READ(data, CMA3000_CTRL, "ctrl");
+	mode = (ctrl & CMA3000_MODEMASK) >> 1;
+	range = (ctrl & CMA3000_GRANGEMASK) >> 7;
+
+	data->bit_to_mg = mode_to_mg[mode][range];
+
+	/* Interrupt not for this device */
+	if (data->bit_to_mg == 0)
+		return IRQ_NONE;
+
+	/* Decode register values to milli g */
+	decode_mg(data, &datax, &datay, &dataz);
+
+	input_report_abs(data->input_dev, ABS_X, datax);
+	input_report_abs(data->input_dev, ABS_Y, datay);
+	input_report_abs(data->input_dev, ABS_Z, dataz);
+	input_sync(data->input_dev);
+
+	return IRQ_HANDLED;
+}
+
+static int cma3000_reset(struct cma3000_accl_data *data)
+{
+	int val;
+
+	/* Reset sequence */
+	CMA3000_SET(data, CMA3000_RSTR, 0x02, "Reset");
+	CMA3000_SET(data, CMA3000_RSTR, 0x0A, "Reset");
+	CMA3000_SET(data, CMA3000_RSTR, 0x04, "Reset");
+
+	/* Settling time delay */
+	mdelay(10);
+
+	val = CMA3000_READ(data, CMA3000_STATUS, "Status");
+	if (val < 0) {
+		dev_err(data->dev, "Reset failed\n");
+		return val;
+	}
+
+	if (val & CMA3000_STATUS_PERR) {
+		dev_err(data->dev, "Parity Error\n");
+		return -EIO;
+	}
+
+	return 0;
+}
+
+static int cma3000_poweron(struct cma3000_accl_data *data)
+{
+	const struct cma3000_platform_data *pdata = data->pdata;
+	u8 ctrl = 0;
+	int ret;
+
+	if (data->g_range == CMARANGE_2G) {
+		ctrl = (data->mode << 1) | CMA3000_RANGE2G;
+	} else if (data->g_range == CMARANGE_8G) {
+		ctrl = (data->mode << 1) | CMA3000_RANGE8G;
+	} else {
+		dev_info(data->dev,
+			 "Invalid G range specified, assuming 8G\n");
+		ctrl = (data->mode << 1) | CMA3000_RANGE8G;
+	}
+
+	ctrl |= data->bus_ops->ctrl_mod;
+
+	CMA3000_SET(data, CMA3000_MDTHR, pdata->mdthr,
+		    "Motion Detect Threshold");
+	CMA3000_SET(data, CMA3000_MDFFTMR, pdata->mdfftmr,
+		    "Time register");
+	CMA3000_SET(data, CMA3000_FFTHR, pdata->ffthr,
+		    "Free fall threshold");
+	ret = CMA3000_SET(data, CMA3000_CTRL, ctrl, "Mode setting");
+	if (ret < 0)
+		return -EIO;
+
+	msleep(CMA3000_SETDELAY);
+
+	return 0;
+}
+
+static int cma3000_poweroff(struct cma3000_accl_data *data)
+{
+	int ret;
+
+	ret = CMA3000_SET(data, CMA3000_CTRL, CMAMODE_POFF, "Mode setting");
+	msleep(CMA3000_SETDELAY);
+
+	return ret;
+}
+
+static int cma3000_open(struct input_dev *input_dev)
+{
+	struct cma3000_accl_data *data = input_get_drvdata(input_dev);
+
+	mutex_lock(&data->mutex);
+
+	if (!data->suspended)
+		cma3000_poweron(data);
+
+	data->opened = true;
+
+	mutex_unlock(&data->mutex);
+
+	return 0;
+}
+
+static void cma3000_close(struct input_dev *input_dev)
+{
+	struct cma3000_accl_data *data = input_get_drvdata(input_dev);
+
+	mutex_lock(&data->mutex);
+
+	if (!data->suspended)
+		cma3000_poweroff(data);
+
+	data->opened = false;
+
+	mutex_unlock(&data->mutex);
+}
+
+void cma3000_suspend(struct cma3000_accl_data *data)
+{
+	mutex_lock(&data->mutex);
+
+	if (!data->suspended && data->opened)
+		cma3000_poweroff(data);
+
+	data->suspended = true;
+
+	mutex_unlock(&data->mutex);
+}
+EXPORT_SYMBOL(cma3000_suspend);
+
+
+void cma3000_resume(struct cma3000_accl_data *data)
+{
+	mutex_lock(&data->mutex);
+
+	if (data->suspended && data->opened)
+		cma3000_poweron(data);
+
+	data->suspended = false;
+
+	mutex_unlock(&data->mutex);
+}
+EXPORT_SYMBOL(cma3000_resume);
+
+struct cma3000_accl_data *cma3000_init(struct device *dev, int irq,
+				       const struct cma3000_bus_ops *bops)
+{
+	const struct cma3000_platform_data *pdata = dev->platform_data;
+	struct cma3000_accl_data *data;
+	struct input_dev *input_dev;
+	int rev;
+	int error;
+
+	if (!pdata) {
+		dev_err(dev, "platform data not found\n");
+		error = -EINVAL;
+		goto err_out;
+	}
+
+
+	/* if no IRQ return error */
+	if (irq == 0) {
+		error = -EINVAL;
+		goto err_out;
+	}
+
+	data = kzalloc(sizeof(struct cma3000_accl_data), GFP_KERNEL);
+	input_dev = input_allocate_device();
+	if (!data || !input_dev) {
+		error = -ENOMEM;
+		goto err_free_mem;
+	}
+
+	data->dev = dev;
+	data->input_dev = input_dev;
+	data->bus_ops = bops;
+	data->pdata = pdata;
+	data->irq = irq;
+	mutex_init(&data->mutex);
+
+	data->mode = pdata->mode;
+	if (data->mode < CMAMODE_DEFAULT || data->mode > CMAMODE_POFF) {
+		data->mode = CMAMODE_MOTDET;
+		dev_warn(dev,
+			 "Invalid mode specified, assuming Motion Detect\n");
+	}
+
+	data->g_range = pdata->g_range;
+	if (data->g_range != CMARANGE_2G && data->g_range != CMARANGE_8G) {
+		dev_info(dev,
+			 "Invalid G range specified, assuming 8G\n");
+		data->g_range = CMARANGE_8G;
+	}
+
+	input_dev->name = "cma3000-accelerometer";
+	input_dev->id.bustype = bops->bustype;
+	input_dev->open = cma3000_open;
+	input_dev->close = cma3000_close;
+
+	 __set_bit(EV_ABS, input_dev->evbit);
+
+	input_set_abs_params(input_dev, ABS_X,
+			-data->g_range, data->g_range, pdata->fuzz_x, 0);
+	input_set_abs_params(input_dev, ABS_Y,
+			-data->g_range, data->g_range, pdata->fuzz_y, 0);
+	input_set_abs_params(input_dev, ABS_Z,
+			-data->g_range, data->g_range, pdata->fuzz_z, 0);
+	input_set_abs_params(input_dev, ABS_MISC, 0, 1, 0, 0);
+
+	input_set_drvdata(input_dev, data);
+
+	error = cma3000_reset(data);
+	if (error)
+		goto err_free_mem;
+
+	rev = CMA3000_READ(data, CMA3000_REVID, "Revid");
+	if (rev < 0) {
+		error = rev;
+		goto err_free_mem;
+	}
+
+	pr_info("CMA3000 Accelerometer: Revision %x\n", rev);
+
+	error = request_threaded_irq(irq, NULL, cma3000_thread_irq,
+				     pdata->irqflags | IRQF_ONESHOT,
+				     "cma3000_d0x", data);
+	if (error) {
+		dev_err(dev, "request_threaded_irq failed\n");
+		goto err_free_mem;
+	}
+
+	error = input_register_device(data->input_dev);
+	if (error) {
+		dev_err(dev, "Unable to register input device\n");
+		goto err_free_irq;
+	}
+
+	return data;
+
+err_free_irq:
+	free_irq(irq, data);
+err_free_mem:
+	input_free_device(input_dev);
+	kfree(data);
+err_out:
+	return ERR_PTR(error);
+}
+EXPORT_SYMBOL(cma3000_init);
+
+void cma3000_exit(struct cma3000_accl_data *data)
+{
+	free_irq(data->irq, data);
+	input_unregister_device(data->input_dev);
+	kfree(data);
+}
+EXPORT_SYMBOL(cma3000_exit);
+
+MODULE_DESCRIPTION("CMA3000-D0x Accelerometer Driver");
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Hemanth V <hemanthv@ti.com>");
diff --git a/drivers/input/misc/cma3000_d0x.h b/drivers/input/misc/cma3000_d0x.h
new file mode 100644
index 000000000000..2304ce306e1c
--- /dev/null
+++ b/drivers/input/misc/cma3000_d0x.h
@@ -0,0 +1,42 @@
+/*
+ * VTI CMA3000_D0x Accelerometer driver
+ *
+ * Copyright (C) 2010 Texas Instruments
+ * Author: Hemanth V <hemanthv@ti.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef _INPUT_CMA3000_H
+#define _INPUT_CMA3000_H
+
+#include <linux/types.h>
+#include <linux/input.h>
+
+struct device;
+struct cma3000_accl_data;
+
+struct cma3000_bus_ops {
+	u16 bustype;
+	u8 ctrl_mod;
+	int (*read)(struct device *, u8, char *);
+	int (*write)(struct device *, u8, u8, char *);
+};
+
+struct cma3000_accl_data *cma3000_init(struct device *dev, int irq,
+					const struct cma3000_bus_ops *bops);
+void cma3000_exit(struct cma3000_accl_data *);
+void cma3000_suspend(struct cma3000_accl_data *);
+void cma3000_resume(struct cma3000_accl_data *);
+
+#endif
diff --git a/drivers/input/misc/cma3000_d0x_i2c.c b/drivers/input/misc/cma3000_d0x_i2c.c
new file mode 100644
index 000000000000..c52d278a7942
--- /dev/null
+++ b/drivers/input/misc/cma3000_d0x_i2c.c
@@ -0,0 +1,141 @@
+/*
+ * Implements I2C interface for VTI CMA300_D0x Accelerometer driver
+ *
+ * Copyright (C) 2010 Texas Instruments
+ * Author: Hemanth V <hemanthv@ti.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/module.h>
+#include <linux/i2c.h>
+#include <linux/input/cma3000.h>
+#include "cma3000_d0x.h"
+
+static int cma3000_i2c_set(struct device *dev,
+			   u8 reg, u8 val, char *msg)
+{
+	struct i2c_client *client = to_i2c_client(dev);
+	int ret;
+
+	ret = i2c_smbus_write_byte_data(client, reg, val);
+	if (ret < 0)
+		dev_err(&client->dev,
+			"%s failed (%s, %d)\n", __func__, msg, ret);
+	return ret;
+}
+
+static int cma3000_i2c_read(struct device *dev, u8 reg, char *msg)
+{
+	struct i2c_client *client = to_i2c_client(dev);
+	int ret;
+
+	ret = i2c_smbus_read_byte_data(client, reg);
+	if (ret < 0)
+		dev_err(&client->dev,
+			"%s failed (%s, %d)\n", __func__, msg, ret);
+	return ret;
+}
+
+static const struct cma3000_bus_ops cma3000_i2c_bops = {
+	.bustype	= BUS_I2C,
+#define CMA3000_BUSI2C     (0 << 4)
+	.ctrl_mod	= CMA3000_BUSI2C,
+	.read		= cma3000_i2c_read,
+	.write		= cma3000_i2c_set,
+};
+
+static int __devinit cma3000_i2c_probe(struct i2c_client *client,
+					const struct i2c_device_id *id)
+{
+	struct cma3000_accl_data *data;
+
+	data = cma3000_init(&client->dev, client->irq, &cma3000_i2c_bops);
+	if (IS_ERR(data))
+		return PTR_ERR(data);
+
+	i2c_set_clientdata(client, data);
+
+	return 0;
+}
+
+static int __devexit cma3000_i2c_remove(struct i2c_client *client)
+{
+	struct cma3000_accl_data *data = i2c_get_clientdata(client);
+
+	cma3000_exit(data);
+
+	return 0;
+}
+
+#ifdef CONFIG_PM
+static int cma3000_i2c_suspend(struct device *dev)
+{
+	struct i2c_client *client = to_i2c_client(dev);
+	struct cma3000_accl_data *data = i2c_get_clientdata(client);
+
+	cma3000_suspend(data);
+
+	return 0;
+}
+
+static int cma3000_i2c_resume(struct device *dev)
+{
+	struct i2c_client *client = to_i2c_client(dev);
+	struct cma3000_accl_data *data = i2c_get_clientdata(client);
+
+	cma3000_resume(data);
+
+	return 0;
+}
+
+static const struct dev_pm_ops cma3000_i2c_pm_ops = {
+	.suspend	= cma3000_i2c_suspend,
+	.resume		= cma3000_i2c_resume,
+};
+#endif
+
+static const struct i2c_device_id cma3000_i2c_id[] = {
+	{ "cma3000_d01", 0 },
+	{ },
+};
+
+static struct i2c_driver cma3000_i2c_driver = {
+	.probe		= cma3000_i2c_probe,
+	.remove		= __devexit_p(cma3000_i2c_remove),
+	.id_table	= cma3000_i2c_id,
+	.driver = {
+		.name	= "cma3000_i2c_accl",
+		.owner	= THIS_MODULE,
+#ifdef CONFIG_PM
+		.pm	= &cma3000_i2c_pm_ops,
+#endif
+	},
+};
+
+static int __init cma3000_i2c_init(void)
+{
+	return i2c_add_driver(&cma3000_i2c_driver);
+}
+
+static void __exit cma3000_i2c_exit(void)
+{
+	i2c_del_driver(&cma3000_i2c_driver);
+}
+
+module_init(cma3000_i2c_init);
+module_exit(cma3000_i2c_exit);
+
+MODULE_DESCRIPTION("CMA3000-D0x Accelerometer I2C Driver");
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Hemanth V <hemanthv@ti.com>");
diff --git a/include/linux/input/cma3000.h b/include/linux/input/cma3000.h
new file mode 100644
index 000000000000..cbbaac27d311
--- /dev/null
+++ b/include/linux/input/cma3000.h
@@ -0,0 +1,59 @@
+/*
+ * VTI CMA3000_Dxx Accelerometer driver
+ *
+ * Copyright (C) 2010 Texas Instruments
+ * Author: Hemanth V <hemanthv@ti.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef _LINUX_CMA3000_H
+#define _LINUX_CMA3000_H
+
+#define CMAMODE_DEFAULT    0
+#define CMAMODE_MEAS100    1
+#define CMAMODE_MEAS400    2
+#define CMAMODE_MEAS40     3
+#define CMAMODE_MOTDET     4
+#define CMAMODE_FF100      5
+#define CMAMODE_FF400      6
+#define CMAMODE_POFF       7
+
+#define CMARANGE_2G   2000
+#define CMARANGE_8G   8000
+
+/**
+ * struct cma3000_i2c_platform_data - CMA3000 Platform data
+ * @fuzz_x: Noise on X Axis
+ * @fuzz_y: Noise on Y Axis
+ * @fuzz_z: Noise on Z Axis
+ * @g_range: G range in milli g i.e 2000 or 8000
+ * @mode: Operating mode
+ * @mdthr: Motion detect threshold value
+ * @mdfftmr: Motion detect and free fall time value
+ * @ffthr: Free fall threshold value
+ */
+
+struct cma3000_platform_data {
+	int fuzz_x;
+	int fuzz_y;
+	int fuzz_z;
+	int g_range;
+	uint8_t mode;
+	uint8_t mdthr;
+	uint8_t mdfftmr;
+	uint8_t ffthr;
+	unsigned long irqflags;
+};
+
+#endif
-- 
cgit v1.2.3-71-gd317


From 8348c259dd6a6019a8fa01b0a3443409480f7b9d Mon Sep 17 00:00:00 2001
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Mon, 22 Nov 2010 17:12:15 -0800
Subject: arm/pxa2xx: reorgazine SSP and SPI header files

The PXA-SPI driver relies on some files / defines which are arm specific
and are within the ARM tree. The CE4100 SoC which is x86 has also the
SPI core.
This patch moves the ssp and spi files from arm/mach-pxa and plat-pxa to
include/linux where the CE4100 can access them.

This move got verified by building the following defconfigs:
   cm_x2xx_defconfig corgi_defconfig em_x270_defconfig ezx_defconfig
   imote2_defconfig pxa3xx_defconfig spitz_defconfig zeus_defconfig
   raumfeld_defconfig magician_defconfig

Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Dirk Brandewie <dirk.brandewie@gmail.com>
---
 Documentation/spi/pxa2xx                    |   4 +-
 arch/arm/mach-pxa/cm-x255.c                 |   2 +-
 arch/arm/mach-pxa/cm-x270.c                 |   2 +-
 arch/arm/mach-pxa/corgi.c                   |   2 +-
 arch/arm/mach-pxa/devices.c                 |   2 +-
 arch/arm/mach-pxa/em-x270.c                 |   2 +-
 arch/arm/mach-pxa/hx4700.c                  |   2 +-
 arch/arm/mach-pxa/icontrol.c                |   2 +-
 arch/arm/mach-pxa/include/mach/pxa2xx_spi.h |  47 -------
 arch/arm/mach-pxa/littleton.c               |   2 +-
 arch/arm/mach-pxa/lubbock.c                 |   2 +-
 arch/arm/mach-pxa/pcm027.c                  |   2 +-
 arch/arm/mach-pxa/poodle.c                  |   2 +-
 arch/arm/mach-pxa/spitz.c                   |   3 +-
 arch/arm/mach-pxa/stargate2.c               |   2 +-
 arch/arm/mach-pxa/tosa.c                    |   2 +-
 arch/arm/mach-pxa/trizeps4.c                |   1 -
 arch/arm/mach-pxa/z2.c                      |   2 +-
 arch/arm/mach-pxa/zeus.c                    |   2 +-
 arch/arm/plat-pxa/include/plat/ssp.h        | 187 ----------------------------
 arch/arm/plat-pxa/ssp.c                     |   2 +-
 drivers/spi/pxa2xx_spi.c                    |   4 +-
 include/linux/pxa2xx_ssp.h                  | 187 ++++++++++++++++++++++++++++
 include/linux/spi/pxa2xx_spi.h              |  49 ++++++++
 sound/soc/pxa/pxa-ssp.c                     |   2 +-
 25 files changed, 257 insertions(+), 259 deletions(-)
 delete mode 100644 arch/arm/mach-pxa/include/mach/pxa2xx_spi.h
 delete mode 100644 arch/arm/plat-pxa/include/plat/ssp.h
 create mode 100644 include/linux/pxa2xx_ssp.h
 create mode 100644 include/linux/spi/pxa2xx_spi.h

(limited to 'include/linux')

diff --git a/Documentation/spi/pxa2xx b/Documentation/spi/pxa2xx
index 6bb916d57c95..68a4fe3818a1 100644
--- a/Documentation/spi/pxa2xx
+++ b/Documentation/spi/pxa2xx
@@ -19,7 +19,7 @@ Declaring PXA2xx Master Controllers
 -----------------------------------
 Typically a SPI master is defined in the arch/.../mach-*/board-*.c as a
 "platform device".  The master configuration is passed to the driver via a table
-found in arch/arm/mach-pxa/include/mach/pxa2xx_spi.h:
+found in include/linux/spi/pxa2xx_spi.h:
 
 struct pxa2xx_spi_master {
 	enum pxa_ssp_type ssp_type;
@@ -94,7 +94,7 @@ using the "spi_board_info" structure found in "linux/spi/spi.h". See
 
 Each slave device attached to the PXA must provide slave specific configuration
 information via the structure "pxa2xx_spi_chip" found in
-"arch/arm/mach-pxa/include/mach/pxa2xx_spi.h".  The pxa2xx_spi master controller driver
+"include/linux/spi/pxa2xx_spi.h".  The pxa2xx_spi master controller driver
 will uses the configuration whenever the driver communicates with the slave
 device. All fields are optional.
 
diff --git a/arch/arm/mach-pxa/cm-x255.c b/arch/arm/mach-pxa/cm-x255.c
index f1a7703d771b..93f59f877fc6 100644
--- a/arch/arm/mach-pxa/cm-x255.c
+++ b/arch/arm/mach-pxa/cm-x255.c
@@ -17,13 +17,13 @@
 #include <linux/mtd/nand-gpio.h>
 
 #include <linux/spi/spi.h>
+#include <linux/spi/pxa2xx_spi.h>
 
 #include <asm/mach/arch.h>
 #include <asm/mach-types.h>
 #include <asm/mach/map.h>
 
 #include <mach/pxa25x.h>
-#include <mach/pxa2xx_spi.h>
 
 #include "generic.h"
 
diff --git a/arch/arm/mach-pxa/cm-x270.c b/arch/arm/mach-pxa/cm-x270.c
index a9926bb75922..b88d601a8090 100644
--- a/arch/arm/mach-pxa/cm-x270.c
+++ b/arch/arm/mach-pxa/cm-x270.c
@@ -19,12 +19,12 @@
 #include <video/mbxfb.h>
 
 #include <linux/spi/spi.h>
+#include <linux/spi/pxa2xx_spi.h>
 #include <linux/spi/libertas_spi.h>
 
 #include <mach/pxa27x.h>
 #include <mach/ohci.h>
 #include <mach/mmc.h>
-#include <mach/pxa2xx_spi.h>
 
 #include "generic.h"
 
diff --git a/arch/arm/mach-pxa/corgi.c b/arch/arm/mach-pxa/corgi.c
index 821229acabe6..3b8dcac2b8fb 100644
--- a/arch/arm/mach-pxa/corgi.c
+++ b/arch/arm/mach-pxa/corgi.c
@@ -28,6 +28,7 @@
 #include <linux/spi/spi.h>
 #include <linux/spi/ads7846.h>
 #include <linux/spi/corgi_lcd.h>
+#include <linux/spi/pxa2xx_spi.h>
 #include <linux/mtd/sharpsl.h>
 #include <linux/input/matrix_keypad.h>
 #include <video/w100fb.h>
@@ -48,7 +49,6 @@
 #include <mach/irda.h>
 #include <mach/mmc.h>
 #include <mach/udc.h>
-#include <mach/pxa2xx_spi.h>
 #include <mach/corgi.h>
 #include <mach/sharpsl_pm.h>
 
diff --git a/arch/arm/mach-pxa/devices.c b/arch/arm/mach-pxa/devices.c
index aaa1166df964..c4f9c715be73 100644
--- a/arch/arm/mach-pxa/devices.c
+++ b/arch/arm/mach-pxa/devices.c
@@ -3,6 +3,7 @@
 #include <linux/init.h>
 #include <linux/platform_device.h>
 #include <linux/dma-mapping.h>
+#include <linux/spi/pxa2xx_spi.h>
 
 #include <asm/pmu.h>
 #include <mach/udc.h>
@@ -12,7 +13,6 @@
 #include <mach/irda.h>
 #include <mach/ohci.h>
 #include <plat/pxa27x_keypad.h>
-#include <mach/pxa2xx_spi.h>
 #include <mach/camera.h>
 #include <mach/audio.h>
 #include <mach/hardware.h>
diff --git a/arch/arm/mach-pxa/em-x270.c b/arch/arm/mach-pxa/em-x270.c
index ed0dbfdb22ed..b20b944c337c 100644
--- a/arch/arm/mach-pxa/em-x270.c
+++ b/arch/arm/mach-pxa/em-x270.c
@@ -26,6 +26,7 @@
 #include <linux/spi/spi.h>
 #include <linux/spi/tdo24m.h>
 #include <linux/spi/libertas_spi.h>
+#include <linux/spi/pxa2xx_spi.h>
 #include <linux/power_supply.h>
 #include <linux/apm-emulation.h>
 #include <linux/i2c.h>
@@ -46,7 +47,6 @@
 #include <plat/pxa27x_keypad.h>
 #include <plat/i2c.h>
 #include <mach/camera.h>
-#include <mach/pxa2xx_spi.h>
 
 #include "generic.h"
 #include "devices.h"
diff --git a/arch/arm/mach-pxa/hx4700.c b/arch/arm/mach-pxa/hx4700.c
index 76d93a25bab6..f09526f52152 100644
--- a/arch/arm/mach-pxa/hx4700.c
+++ b/arch/arm/mach-pxa/hx4700.c
@@ -33,6 +33,7 @@
 #include <linux/regulator/max1586.h>
 #include <linux/spi/ads7846.h>
 #include <linux/spi/spi.h>
+#include <linux/spi/pxa2xx_spi.h>
 #include <linux/usb/gpio_vbus.h>
 
 #include <mach/hardware.h>
@@ -43,7 +44,6 @@
 #include <mach/hx4700.h>
 #include <plat/i2c.h>
 #include <mach/irda.h>
-#include <mach/pxa2xx_spi.h>
 
 #include <video/platform_lcd.h>
 #include <video/w100fb.h>
diff --git a/arch/arm/mach-pxa/icontrol.c b/arch/arm/mach-pxa/icontrol.c
index d51ee3d25e70..3ab6fd369584 100644
--- a/arch/arm/mach-pxa/icontrol.c
+++ b/arch/arm/mach-pxa/icontrol.c
@@ -24,7 +24,7 @@
 #include <mach/mxm8x10.h>
 
 #include <linux/spi/spi.h>
-#include <mach/pxa2xx_spi.h>
+#include <linux/spi/pxa2xx_spi.h>
 #include <linux/can/platform/mcp251x.h>
 
 #include "generic.h"
diff --git a/arch/arm/mach-pxa/include/mach/pxa2xx_spi.h b/arch/arm/mach-pxa/include/mach/pxa2xx_spi.h
deleted file mode 100644
index b87cecd9bbdc..000000000000
--- a/arch/arm/mach-pxa/include/mach/pxa2xx_spi.h
+++ /dev/null
@@ -1,47 +0,0 @@
-/*
- * Copyright (C) 2005 Stephen Street / StreetFire Sound Labs
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- */
-
-#ifndef PXA2XX_SPI_H_
-#define PXA2XX_SPI_H_
-
-#define PXA2XX_CS_ASSERT (0x01)
-#define PXA2XX_CS_DEASSERT (0x02)
-
-/* device.platform_data for SSP controller devices */
-struct pxa2xx_spi_master {
-	u32 clock_enable;
-	u16 num_chipselect;
-	u8 enable_dma;
-};
-
-/* spi_board_info.controller_data for SPI slave devices,
- * copied to spi_device.platform_data ... mostly for dma tuning
- */
-struct pxa2xx_spi_chip {
-	u8 tx_threshold;
-	u8 rx_threshold;
-	u8 dma_burst_size;
-	u32 timeout;
-	u8 enable_loopback;
-	int gpio_cs;
-	void (*cs_control)(u32 command);
-};
-
-extern void pxa2xx_set_spi_info(unsigned id, struct pxa2xx_spi_master *info);
-
-#endif /*PXA2XX_SPI_H_*/
diff --git a/arch/arm/mach-pxa/littleton.c b/arch/arm/mach-pxa/littleton.c
index 41aa89e35772..8051925d457c 100644
--- a/arch/arm/mach-pxa/littleton.c
+++ b/arch/arm/mach-pxa/littleton.c
@@ -22,6 +22,7 @@
 #include <linux/clk.h>
 #include <linux/gpio.h>
 #include <linux/spi/spi.h>
+#include <linux/spi/pxa2xx_spi.h>
 #include <linux/smc91x.h>
 #include <linux/i2c.h>
 #include <linux/leds.h>
@@ -42,7 +43,6 @@
 #include <mach/pxa300.h>
 #include <mach/pxafb.h>
 #include <mach/mmc.h>
-#include <mach/pxa2xx_spi.h>
 #include <plat/pxa27x_keypad.h>
 #include <mach/littleton.h>
 #include <plat/i2c.h>
diff --git a/arch/arm/mach-pxa/lubbock.c b/arch/arm/mach-pxa/lubbock.c
index 1499493cd070..4a3fe450c20f 100644
--- a/arch/arm/mach-pxa/lubbock.c
+++ b/arch/arm/mach-pxa/lubbock.c
@@ -25,7 +25,7 @@
 
 #include <linux/spi/spi.h>
 #include <linux/spi/ads7846.h>
-#include <mach/pxa2xx_spi.h>
+#include <linux/spi/pxa2xx_spi.h>
 
 #include <asm/setup.h>
 #include <asm/memory.h>
diff --git a/arch/arm/mach-pxa/pcm027.c b/arch/arm/mach-pxa/pcm027.c
index c77e8f30a439..a5596f9b17bc 100644
--- a/arch/arm/mach-pxa/pcm027.c
+++ b/arch/arm/mach-pxa/pcm027.c
@@ -25,12 +25,12 @@
 #include <linux/mtd/physmap.h>
 #include <linux/spi/spi.h>
 #include <linux/spi/max7301.h>
+#include <linux/spi/pxa2xx_spi.h>
 #include <linux/leds.h>
 
 #include <asm/mach-types.h>
 #include <asm/mach/arch.h>
 #include <mach/pxa27x.h>
-#include <mach/pxa2xx_spi.h>
 #include <mach/pcm027.h>
 #include "generic.h"
 
diff --git a/arch/arm/mach-pxa/poodle.c b/arch/arm/mach-pxa/poodle.c
index 93a191c889df..c05eee326234 100644
--- a/arch/arm/mach-pxa/poodle.c
+++ b/arch/arm/mach-pxa/poodle.c
@@ -25,6 +25,7 @@
 #include <linux/i2c.h>
 #include <linux/spi/spi.h>
 #include <linux/spi/ads7846.h>
+#include <linux/spi/pxa2xx_spi.h>
 #include <linux/mtd/sharpsl.h>
 
 #include <mach/hardware.h>
@@ -43,7 +44,6 @@
 #include <mach/irda.h>
 #include <mach/poodle.h>
 #include <mach/pxafb.h>
-#include <mach/pxa2xx_spi.h>
 #include <plat/i2c.h>
 
 #include <asm/hardware/scoop.h>
diff --git a/arch/arm/mach-pxa/spitz.c b/arch/arm/mach-pxa/spitz.c
index f736119f1ebf..4fd5572f6995 100644
--- a/arch/arm/mach-pxa/spitz.c
+++ b/arch/arm/mach-pxa/spitz.c
@@ -23,7 +23,7 @@
 #include <linux/spi/spi.h>
 #include <linux/spi/ads7846.h>
 #include <linux/spi/corgi_lcd.h>
-#include <linux/mtd/physmap.h>
+#include <linux/spi/pxa2xx_spi.h>
 #include <linux/mtd/sharpsl.h>
 #include <linux/input/matrix_keypad.h>
 #include <linux/regulator/machine.h>
@@ -41,7 +41,6 @@
 #include <mach/mmc.h>
 #include <mach/ohci.h>
 #include <mach/pxafb.h>
-#include <mach/pxa2xx_spi.h>
 #include <mach/spitz.h>
 #include <mach/sharpsl_pm.h>
 
diff --git a/arch/arm/mach-pxa/stargate2.c b/arch/arm/mach-pxa/stargate2.c
index 738adc1773fd..325f6ac6b591 100644
--- a/arch/arm/mach-pxa/stargate2.c
+++ b/arch/arm/mach-pxa/stargate2.c
@@ -46,10 +46,10 @@
 #include <plat/i2c.h>
 #include <mach/mmc.h>
 #include <mach/udc.h>
-#include <mach/pxa2xx_spi.h>
 #include <mach/pxa27x-udc.h>
 
 #include <linux/spi/spi.h>
+#include <linux/spi/pxa2xx_spi.h>
 #include <linux/mfd/da903x.h>
 #include <linux/sht15.h>
 
diff --git a/arch/arm/mach-pxa/tosa.c b/arch/arm/mach-pxa/tosa.c
index 0ee1df49606d..7b5765dacbd9 100644
--- a/arch/arm/mach-pxa/tosa.c
+++ b/arch/arm/mach-pxa/tosa.c
@@ -32,6 +32,7 @@
 #include <linux/gpio.h>
 #include <linux/pda_power.h>
 #include <linux/spi/spi.h>
+#include <linux/spi/pxa2xx_spi.h>
 #include <linux/input/matrix_keypad.h>
 
 #include <asm/setup.h>
@@ -44,7 +45,6 @@
 #include <mach/mmc.h>
 #include <mach/udc.h>
 #include <mach/tosa_bt.h>
-#include <mach/pxa2xx_spi.h>
 #include <mach/audio.h>
 
 #include <asm/mach/arch.h>
diff --git a/arch/arm/mach-pxa/trizeps4.c b/arch/arm/mach-pxa/trizeps4.c
index 565d062f51d5..bdb02a0ae1b7 100644
--- a/arch/arm/mach-pxa/trizeps4.c
+++ b/arch/arm/mach-pxa/trizeps4.c
@@ -40,7 +40,6 @@
 #include <asm/mach/flash.h>
 
 #include <mach/pxa27x.h>
-#include <mach/pxa2xx_spi.h>
 #include <mach/trizeps4.h>
 #include <mach/audio.h>
 #include <mach/pxafb.h>
diff --git a/arch/arm/mach-pxa/z2.c b/arch/arm/mach-pxa/z2.c
index fefde9848d82..1b8b71b3c719 100644
--- a/arch/arm/mach-pxa/z2.c
+++ b/arch/arm/mach-pxa/z2.c
@@ -20,6 +20,7 @@
 #include <linux/z2_battery.h>
 #include <linux/dma-mapping.h>
 #include <linux/spi/spi.h>
+#include <linux/spi/pxa2xx_spi.h>
 #include <linux/spi/libertas_spi.h>
 #include <linux/spi/lms283gf05.h>
 #include <linux/power_supply.h>
@@ -38,7 +39,6 @@
 #include <mach/pxafb.h>
 #include <mach/mmc.h>
 #include <plat/pxa27x_keypad.h>
-#include <mach/pxa2xx_spi.h>
 
 #include <plat/i2c.h>
 
diff --git a/arch/arm/mach-pxa/zeus.c b/arch/arm/mach-pxa/zeus.c
index dea46a2d089b..f5c9f3032c31 100644
--- a/arch/arm/mach-pxa/zeus.c
+++ b/arch/arm/mach-pxa/zeus.c
@@ -20,6 +20,7 @@
 #include <linux/dm9000.h>
 #include <linux/mmc/host.h>
 #include <linux/spi/spi.h>
+#include <linux/spi/pxa2xx_spi.h>
 #include <linux/mtd/mtd.h>
 #include <linux/mtd/partitions.h>
 #include <linux/mtd/physmap.h>
@@ -41,7 +42,6 @@
 #include <mach/pxa27x-udc.h>
 #include <mach/udc.h>
 #include <mach/pxafb.h>
-#include <mach/pxa2xx_spi.h>
 #include <mach/mfp-pxa27x.h>
 #include <mach/pm.h>
 #include <mach/audio.h>
diff --git a/arch/arm/plat-pxa/include/plat/ssp.h b/arch/arm/plat-pxa/include/plat/ssp.h
deleted file mode 100644
index 21c12ca80738..000000000000
--- a/arch/arm/plat-pxa/include/plat/ssp.h
+++ /dev/null
@@ -1,187 +0,0 @@
-/*
- *  ssp.h
- *
- *  Copyright (C) 2003 Russell King, All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This driver supports the following PXA CPU/SSP ports:-
- *
- *       PXA250     SSP
- *       PXA255     SSP, NSSP
- *       PXA26x     SSP, NSSP, ASSP
- *       PXA27x     SSP1, SSP2, SSP3
- *       PXA3xx     SSP1, SSP2, SSP3, SSP4
- */
-
-#ifndef __ASM_ARCH_SSP_H
-#define __ASM_ARCH_SSP_H
-
-#include <linux/list.h>
-#include <linux/io.h>
-
-/*
- * SSP Serial Port Registers
- * PXA250, PXA255, PXA26x and PXA27x SSP controllers are all slightly different.
- * PXA255, PXA26x and PXA27x have extra ports, registers and bits.
- */
-
-#define SSCR0		(0x00)  /* SSP Control Register 0 */
-#define SSCR1		(0x04)  /* SSP Control Register 1 */
-#define SSSR		(0x08)  /* SSP Status Register */
-#define SSITR		(0x0C)  /* SSP Interrupt Test Register */
-#define SSDR		(0x10)  /* SSP Data Write/Data Read Register */
-
-#define SSTO		(0x28)  /* SSP Time Out Register */
-#define SSPSP		(0x2C)  /* SSP Programmable Serial Protocol */
-#define SSTSA		(0x30)  /* SSP Tx Timeslot Active */
-#define SSRSA		(0x34)  /* SSP Rx Timeslot Active */
-#define SSTSS		(0x38)  /* SSP Timeslot Status */
-#define SSACD		(0x3C)  /* SSP Audio Clock Divider */
-#define SSACDD		(0x40)	/* SSP Audio Clock Dither Divider */
-
-/* Common PXA2xx bits first */
-#define SSCR0_DSS	(0x0000000f)	/* Data Size Select (mask) */
-#define SSCR0_DataSize(x)  ((x) - 1)	/* Data Size Select [4..16] */
-#define SSCR0_FRF	(0x00000030)	/* FRame Format (mask) */
-#define SSCR0_Motorola	(0x0 << 4)	/* Motorola's Serial Peripheral Interface (SPI) */
-#define SSCR0_TI	(0x1 << 4)	/* Texas Instruments' Synchronous Serial Protocol (SSP) */
-#define SSCR0_National	(0x2 << 4)	/* National Microwire */
-#define SSCR0_ECS	(1 << 6)	/* External clock select */
-#define SSCR0_SSE	(1 << 7)	/* Synchronous Serial Port Enable */
-#define SSCR0_SCR(x)	((x) << 8)	/* Serial Clock Rate (mask) */
-
-/* PXA27x, PXA3xx */
-#define SSCR0_EDSS	(1 << 20)	/* Extended data size select */
-#define SSCR0_NCS	(1 << 21)	/* Network clock select */
-#define SSCR0_RIM	(1 << 22)	/* Receive FIFO overrrun interrupt mask */
-#define SSCR0_TUM	(1 << 23)	/* Transmit FIFO underrun interrupt mask */
-#define SSCR0_FRDC	(0x07000000)	/* Frame rate divider control (mask) */
-#define SSCR0_SlotsPerFrm(x) (((x) - 1) << 24)	/* Time slots per frame [1..8] */
-#define SSCR0_FPCKE	(1 << 29)	/* FIFO packing enable */
-#define SSCR0_ACS	(1 << 30)	/* Audio clock select */
-#define SSCR0_MOD	(1 << 31)	/* Mode (normal or network) */
-
-
-#define SSCR1_RIE	(1 << 0)	/* Receive FIFO Interrupt Enable */
-#define SSCR1_TIE	(1 << 1)	/* Transmit FIFO Interrupt Enable */
-#define SSCR1_LBM	(1 << 2)	/* Loop-Back Mode */
-#define SSCR1_SPO	(1 << 3)	/* Motorola SPI SSPSCLK polarity setting */
-#define SSCR1_SPH	(1 << 4)	/* Motorola SPI SSPSCLK phase setting */
-#define SSCR1_MWDS	(1 << 5)	/* Microwire Transmit Data Size */
-#define SSCR1_TFT	(0x000003c0)	/* Transmit FIFO Threshold (mask) */
-#define SSCR1_TxTresh(x) (((x) - 1) << 6) /* level [1..16] */
-#define SSCR1_RFT	(0x00003c00)	/* Receive FIFO Threshold (mask) */
-#define SSCR1_RxTresh(x) (((x) - 1) << 10) /* level [1..16] */
-
-#define SSSR_TNF	(1 << 2)	/* Transmit FIFO Not Full */
-#define SSSR_RNE	(1 << 3)	/* Receive FIFO Not Empty */
-#define SSSR_BSY	(1 << 4)	/* SSP Busy */
-#define SSSR_TFS	(1 << 5)	/* Transmit FIFO Service Request */
-#define SSSR_RFS	(1 << 6)	/* Receive FIFO Service Request */
-#define SSSR_ROR	(1 << 7)	/* Receive FIFO Overrun */
-#define SSSR_TFL_MASK   (0xf << 8)	/* Transmit FIFO Level mask */
-#define SSSR_RFL_MASK   (0xf << 12)	/* Receive FIFO Level mask */
-
-/* extra bits in PXA255, PXA26x and PXA27x SSP ports */
-#define SSCR0_TISSP		(1 << 4)	/* TI Sync Serial Protocol */
-#define SSCR0_PSP		(3 << 4)	/* PSP - Programmable Serial Protocol */
-#define SSCR1_TTELP		(1 << 31)	/* TXD Tristate Enable Last Phase */
-#define SSCR1_TTE		(1 << 30)	/* TXD Tristate Enable */
-#define SSCR1_EBCEI		(1 << 29)	/* Enable Bit Count Error interrupt */
-#define SSCR1_SCFR		(1 << 28)	/* Slave Clock free Running */
-#define SSCR1_ECRA		(1 << 27)	/* Enable Clock Request A */
-#define SSCR1_ECRB		(1 << 26)	/* Enable Clock request B */
-#define SSCR1_SCLKDIR		(1 << 25)	/* Serial Bit Rate Clock Direction */
-#define SSCR1_SFRMDIR		(1 << 24)	/* Frame Direction */
-#define SSCR1_RWOT		(1 << 23)	/* Receive Without Transmit */
-#define SSCR1_TRAIL		(1 << 22)	/* Trailing Byte */
-#define SSCR1_TSRE		(1 << 21)	/* Transmit Service Request Enable */
-#define SSCR1_RSRE		(1 << 20)	/* Receive Service Request Enable */
-#define SSCR1_TINTE		(1 << 19)	/* Receiver Time-out Interrupt enable */
-#define SSCR1_PINTE		(1 << 18)	/* Peripheral Trailing Byte Interupt Enable */
-#define SSCR1_IFS		(1 << 16)	/* Invert Frame Signal */
-#define SSCR1_STRF		(1 << 15)	/* Select FIFO or EFWR */
-#define SSCR1_EFWR		(1 << 14)	/* Enable FIFO Write/Read */
-
-#define SSSR_BCE		(1 << 23)	/* Bit Count Error */
-#define SSSR_CSS		(1 << 22)	/* Clock Synchronisation Status */
-#define SSSR_TUR		(1 << 21)	/* Transmit FIFO Under Run */
-#define SSSR_EOC		(1 << 20)	/* End Of Chain */
-#define SSSR_TINT		(1 << 19)	/* Receiver Time-out Interrupt */
-#define SSSR_PINT		(1 << 18)	/* Peripheral Trailing Byte Interrupt */
-
-
-#define SSPSP_SCMODE(x)		((x) << 0)	/* Serial Bit Rate Clock Mode */
-#define SSPSP_SFRMP		(1 << 2)	/* Serial Frame Polarity */
-#define SSPSP_ETDS		(1 << 3)	/* End of Transfer data State */
-#define SSPSP_STRTDLY(x)	((x) << 4)	/* Start Delay */
-#define SSPSP_DMYSTRT(x)	((x) << 7)	/* Dummy Start */
-#define SSPSP_SFRMDLY(x)	((x) << 9)	/* Serial Frame Delay */
-#define SSPSP_SFRMWDTH(x)	((x) << 16)	/* Serial Frame Width */
-#define SSPSP_DMYSTOP(x)	((x) << 23)	/* Dummy Stop */
-#define SSPSP_FSRT		(1 << 25)	/* Frame Sync Relative Timing */
-
-/* PXA3xx */
-#define SSPSP_EDMYSTRT(x)	((x) << 26)     /* Extended Dummy Start */
-#define SSPSP_EDMYSTOP(x)	((x) << 28)     /* Extended Dummy Stop */
-#define SSPSP_TIMING_MASK	(0x7f8001f0)
-
-#define SSACD_SCDB		(1 << 3)	/* SSPSYSCLK Divider Bypass */
-#define SSACD_ACPS(x)		((x) << 4)	/* Audio clock PLL select */
-#define SSACD_ACDS(x)		((x) << 0)	/* Audio clock divider select */
-#define SSACD_SCDX8		(1 << 7)	/* SYSCLK division ratio select */
-
-enum pxa_ssp_type {
-	SSP_UNDEFINED = 0,
-	PXA25x_SSP,  /* pxa 210, 250, 255, 26x */
-	PXA25x_NSSP, /* pxa 255, 26x (including ASSP) */
-	PXA27x_SSP,
-	PXA168_SSP,
-};
-
-struct ssp_device {
-	struct platform_device *pdev;
-	struct list_head	node;
-
-	struct clk	*clk;
-	void __iomem	*mmio_base;
-	unsigned long	phys_base;
-
-	const char	*label;
-	int		port_id;
-	int		type;
-	int		use_count;
-	int		irq;
-	int		drcmr_rx;
-	int		drcmr_tx;
-};
-
-/**
- * pxa_ssp_write_reg - Write to a SSP register
- *
- * @dev: SSP device to access
- * @reg: Register to write to
- * @val: Value to be written.
- */
-static inline void pxa_ssp_write_reg(struct ssp_device *dev, u32 reg, u32 val)
-{
-	__raw_writel(val, dev->mmio_base + reg);
-}
-
-/**
- * pxa_ssp_read_reg - Read from a SSP register
- *
- * @dev: SSP device to access
- * @reg: Register to read from
- */
-static inline u32 pxa_ssp_read_reg(struct ssp_device *dev, u32 reg)
-{
-	return __raw_readl(dev->mmio_base + reg);
-}
-
-struct ssp_device *pxa_ssp_request(int port, const char *label);
-void pxa_ssp_free(struct ssp_device *);
-#endif /* __ASM_ARCH_SSP_H */
diff --git a/arch/arm/plat-pxa/ssp.c b/arch/arm/plat-pxa/ssp.c
index c6357e554aba..58b79809d20c 100644
--- a/arch/arm/plat-pxa/ssp.c
+++ b/arch/arm/plat-pxa/ssp.c
@@ -28,11 +28,11 @@
 #include <linux/clk.h>
 #include <linux/err.h>
 #include <linux/platform_device.h>
+#include <linux/spi/pxa2xx_spi.h>
 #include <linux/io.h>
 
 #include <asm/irq.h>
 #include <mach/hardware.h>
-#include <plat/ssp.h>
 
 static DEFINE_MUTEX(ssp_lock);
 static LIST_HEAD(ssp_list);
diff --git a/drivers/spi/pxa2xx_spi.c b/drivers/spi/pxa2xx_spi.c
index 55083445aae6..98d9c8b09182 100644
--- a/drivers/spi/pxa2xx_spi.c
+++ b/drivers/spi/pxa2xx_spi.c
@@ -23,6 +23,7 @@
 #include <linux/errno.h>
 #include <linux/interrupt.h>
 #include <linux/platform_device.h>
+#include <linux/spi/pxa2xx_spi.h>
 #include <linux/dma-mapping.h>
 #include <linux/spi/spi.h>
 #include <linux/workqueue.h>
@@ -35,9 +36,6 @@
 #include <asm/irq.h>
 #include <asm/delay.h>
 
-#include <mach/dma.h>
-#include <plat/ssp.h>
-#include <mach/pxa2xx_spi.h>
 
 MODULE_AUTHOR("Stephen Street");
 MODULE_DESCRIPTION("PXA2xx SSP SPI Controller");
diff --git a/include/linux/pxa2xx_ssp.h b/include/linux/pxa2xx_ssp.h
new file mode 100644
index 000000000000..84465d4a51b7
--- /dev/null
+++ b/include/linux/pxa2xx_ssp.h
@@ -0,0 +1,187 @@
+/*
+ *  pxa2xx_ssp.h
+ *
+ *  Copyright (C) 2003 Russell King, All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This driver supports the following PXA CPU/SSP ports:-
+ *
+ *       PXA250     SSP
+ *       PXA255     SSP, NSSP
+ *       PXA26x     SSP, NSSP, ASSP
+ *       PXA27x     SSP1, SSP2, SSP3
+ *       PXA3xx     SSP1, SSP2, SSP3, SSP4
+ */
+
+#ifndef __LINUX_SSP_H
+#define __LINUX_SSP_H
+
+#include <linux/list.h>
+#include <linux/io.h>
+
+/*
+ * SSP Serial Port Registers
+ * PXA250, PXA255, PXA26x and PXA27x SSP controllers are all slightly different.
+ * PXA255, PXA26x and PXA27x have extra ports, registers and bits.
+ */
+
+#define SSCR0		(0x00)  /* SSP Control Register 0 */
+#define SSCR1		(0x04)  /* SSP Control Register 1 */
+#define SSSR		(0x08)  /* SSP Status Register */
+#define SSITR		(0x0C)  /* SSP Interrupt Test Register */
+#define SSDR		(0x10)  /* SSP Data Write/Data Read Register */
+
+#define SSTO		(0x28)  /* SSP Time Out Register */
+#define SSPSP		(0x2C)  /* SSP Programmable Serial Protocol */
+#define SSTSA		(0x30)  /* SSP Tx Timeslot Active */
+#define SSRSA		(0x34)  /* SSP Rx Timeslot Active */
+#define SSTSS		(0x38)  /* SSP Timeslot Status */
+#define SSACD		(0x3C)  /* SSP Audio Clock Divider */
+#define SSACDD		(0x40)	/* SSP Audio Clock Dither Divider */
+
+/* Common PXA2xx bits first */
+#define SSCR0_DSS	(0x0000000f)	/* Data Size Select (mask) */
+#define SSCR0_DataSize(x)  ((x) - 1)	/* Data Size Select [4..16] */
+#define SSCR0_FRF	(0x00000030)	/* FRame Format (mask) */
+#define SSCR0_Motorola	(0x0 << 4)	/* Motorola's Serial Peripheral Interface (SPI) */
+#define SSCR0_TI	(0x1 << 4)	/* Texas Instruments' Synchronous Serial Protocol (SSP) */
+#define SSCR0_National	(0x2 << 4)	/* National Microwire */
+#define SSCR0_ECS	(1 << 6)	/* External clock select */
+#define SSCR0_SSE	(1 << 7)	/* Synchronous Serial Port Enable */
+#define SSCR0_SCR(x)	((x) << 8)	/* Serial Clock Rate (mask) */
+
+/* PXA27x, PXA3xx */
+#define SSCR0_EDSS	(1 << 20)	/* Extended data size select */
+#define SSCR0_NCS	(1 << 21)	/* Network clock select */
+#define SSCR0_RIM	(1 << 22)	/* Receive FIFO overrrun interrupt mask */
+#define SSCR0_TUM	(1 << 23)	/* Transmit FIFO underrun interrupt mask */
+#define SSCR0_FRDC	(0x07000000)	/* Frame rate divider control (mask) */
+#define SSCR0_SlotsPerFrm(x) (((x) - 1) << 24)	/* Time slots per frame [1..8] */
+#define SSCR0_FPCKE	(1 << 29)	/* FIFO packing enable */
+#define SSCR0_ACS	(1 << 30)	/* Audio clock select */
+#define SSCR0_MOD	(1 << 31)	/* Mode (normal or network) */
+
+
+#define SSCR1_RIE	(1 << 0)	/* Receive FIFO Interrupt Enable */
+#define SSCR1_TIE	(1 << 1)	/* Transmit FIFO Interrupt Enable */
+#define SSCR1_LBM	(1 << 2)	/* Loop-Back Mode */
+#define SSCR1_SPO	(1 << 3)	/* Motorola SPI SSPSCLK polarity setting */
+#define SSCR1_SPH	(1 << 4)	/* Motorola SPI SSPSCLK phase setting */
+#define SSCR1_MWDS	(1 << 5)	/* Microwire Transmit Data Size */
+#define SSCR1_TFT	(0x000003c0)	/* Transmit FIFO Threshold (mask) */
+#define SSCR1_TxTresh(x) (((x) - 1) << 6) /* level [1..16] */
+#define SSCR1_RFT	(0x00003c00)	/* Receive FIFO Threshold (mask) */
+#define SSCR1_RxTresh(x) (((x) - 1) << 10) /* level [1..16] */
+
+#define SSSR_TNF	(1 << 2)	/* Transmit FIFO Not Full */
+#define SSSR_RNE	(1 << 3)	/* Receive FIFO Not Empty */
+#define SSSR_BSY	(1 << 4)	/* SSP Busy */
+#define SSSR_TFS	(1 << 5)	/* Transmit FIFO Service Request */
+#define SSSR_RFS	(1 << 6)	/* Receive FIFO Service Request */
+#define SSSR_ROR	(1 << 7)	/* Receive FIFO Overrun */
+#define SSSR_TFL_MASK   (0xf << 8)	/* Transmit FIFO Level mask */
+#define SSSR_RFL_MASK   (0xf << 12)	/* Receive FIFO Level mask */
+
+/* extra bits in PXA255, PXA26x and PXA27x SSP ports */
+#define SSCR0_TISSP		(1 << 4)	/* TI Sync Serial Protocol */
+#define SSCR0_PSP		(3 << 4)	/* PSP - Programmable Serial Protocol */
+#define SSCR1_TTELP		(1 << 31)	/* TXD Tristate Enable Last Phase */
+#define SSCR1_TTE		(1 << 30)	/* TXD Tristate Enable */
+#define SSCR1_EBCEI		(1 << 29)	/* Enable Bit Count Error interrupt */
+#define SSCR1_SCFR		(1 << 28)	/* Slave Clock free Running */
+#define SSCR1_ECRA		(1 << 27)	/* Enable Clock Request A */
+#define SSCR1_ECRB		(1 << 26)	/* Enable Clock request B */
+#define SSCR1_SCLKDIR		(1 << 25)	/* Serial Bit Rate Clock Direction */
+#define SSCR1_SFRMDIR		(1 << 24)	/* Frame Direction */
+#define SSCR1_RWOT		(1 << 23)	/* Receive Without Transmit */
+#define SSCR1_TRAIL		(1 << 22)	/* Trailing Byte */
+#define SSCR1_TSRE		(1 << 21)	/* Transmit Service Request Enable */
+#define SSCR1_RSRE		(1 << 20)	/* Receive Service Request Enable */
+#define SSCR1_TINTE		(1 << 19)	/* Receiver Time-out Interrupt enable */
+#define SSCR1_PINTE		(1 << 18)	/* Peripheral Trailing Byte Interupt Enable */
+#define SSCR1_IFS		(1 << 16)	/* Invert Frame Signal */
+#define SSCR1_STRF		(1 << 15)	/* Select FIFO or EFWR */
+#define SSCR1_EFWR		(1 << 14)	/* Enable FIFO Write/Read */
+
+#define SSSR_BCE		(1 << 23)	/* Bit Count Error */
+#define SSSR_CSS		(1 << 22)	/* Clock Synchronisation Status */
+#define SSSR_TUR		(1 << 21)	/* Transmit FIFO Under Run */
+#define SSSR_EOC		(1 << 20)	/* End Of Chain */
+#define SSSR_TINT		(1 << 19)	/* Receiver Time-out Interrupt */
+#define SSSR_PINT		(1 << 18)	/* Peripheral Trailing Byte Interrupt */
+
+
+#define SSPSP_SCMODE(x)		((x) << 0)	/* Serial Bit Rate Clock Mode */
+#define SSPSP_SFRMP		(1 << 2)	/* Serial Frame Polarity */
+#define SSPSP_ETDS		(1 << 3)	/* End of Transfer data State */
+#define SSPSP_STRTDLY(x)	((x) << 4)	/* Start Delay */
+#define SSPSP_DMYSTRT(x)	((x) << 7)	/* Dummy Start */
+#define SSPSP_SFRMDLY(x)	((x) << 9)	/* Serial Frame Delay */
+#define SSPSP_SFRMWDTH(x)	((x) << 16)	/* Serial Frame Width */
+#define SSPSP_DMYSTOP(x)	((x) << 23)	/* Dummy Stop */
+#define SSPSP_FSRT		(1 << 25)	/* Frame Sync Relative Timing */
+
+/* PXA3xx */
+#define SSPSP_EDMYSTRT(x)	((x) << 26)     /* Extended Dummy Start */
+#define SSPSP_EDMYSTOP(x)	((x) << 28)     /* Extended Dummy Stop */
+#define SSPSP_TIMING_MASK	(0x7f8001f0)
+
+#define SSACD_SCDB		(1 << 3)	/* SSPSYSCLK Divider Bypass */
+#define SSACD_ACPS(x)		((x) << 4)	/* Audio clock PLL select */
+#define SSACD_ACDS(x)		((x) << 0)	/* Audio clock divider select */
+#define SSACD_SCDX8		(1 << 7)	/* SYSCLK division ratio select */
+
+enum pxa_ssp_type {
+	SSP_UNDEFINED = 0,
+	PXA25x_SSP,  /* pxa 210, 250, 255, 26x */
+	PXA25x_NSSP, /* pxa 255, 26x (including ASSP) */
+	PXA27x_SSP,
+	PXA168_SSP,
+};
+
+struct ssp_device {
+	struct platform_device *pdev;
+	struct list_head	node;
+
+	struct clk	*clk;
+	void __iomem	*mmio_base;
+	unsigned long	phys_base;
+
+	const char	*label;
+	int		port_id;
+	int		type;
+	int		use_count;
+	int		irq;
+	int		drcmr_rx;
+	int		drcmr_tx;
+};
+
+/**
+ * pxa_ssp_write_reg - Write to a SSP register
+ *
+ * @dev: SSP device to access
+ * @reg: Register to write to
+ * @val: Value to be written.
+ */
+static inline void pxa_ssp_write_reg(struct ssp_device *dev, u32 reg, u32 val)
+{
+	__raw_writel(val, dev->mmio_base + reg);
+}
+
+/**
+ * pxa_ssp_read_reg - Read from a SSP register
+ *
+ * @dev: SSP device to access
+ * @reg: Register to read from
+ */
+static inline u32 pxa_ssp_read_reg(struct ssp_device *dev, u32 reg)
+{
+	return __raw_readl(dev->mmio_base + reg);
+}
+
+struct ssp_device *pxa_ssp_request(int port, const char *label);
+void pxa_ssp_free(struct ssp_device *);
+#endif
diff --git a/include/linux/spi/pxa2xx_spi.h b/include/linux/spi/pxa2xx_spi.h
new file mode 100644
index 000000000000..471ed6889113
--- /dev/null
+++ b/include/linux/spi/pxa2xx_spi.h
@@ -0,0 +1,49 @@
+/*
+ * Copyright (C) 2005 Stephen Street / StreetFire Sound Labs
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+#ifndef __linux_pxa2xx_spi_h
+#define __linux_pxa2xx_spi_h
+
+#include <linux/pxa2xx_ssp.h>
+#include <mach/dma.h>
+
+#define PXA2XX_CS_ASSERT (0x01)
+#define PXA2XX_CS_DEASSERT (0x02)
+
+/* device.platform_data for SSP controller devices */
+struct pxa2xx_spi_master {
+	u32 clock_enable;
+	u16 num_chipselect;
+	u8 enable_dma;
+};
+
+/* spi_board_info.controller_data for SPI slave devices,
+ * copied to spi_device.platform_data ... mostly for dma tuning
+ */
+struct pxa2xx_spi_chip {
+	u8 tx_threshold;
+	u8 rx_threshold;
+	u8 dma_burst_size;
+	u32 timeout;
+	u8 enable_loopback;
+	int gpio_cs;
+	void (*cs_control)(u32 command);
+};
+
+extern void pxa2xx_set_spi_info(unsigned id, struct pxa2xx_spi_master *info);
+
+#endif
diff --git a/sound/soc/pxa/pxa-ssp.c b/sound/soc/pxa/pxa-ssp.c
index b439eee462cb..8ad93ee2e92b 100644
--- a/sound/soc/pxa/pxa-ssp.c
+++ b/sound/soc/pxa/pxa-ssp.c
@@ -20,6 +20,7 @@
 #include <linux/platform_device.h>
 #include <linux/clk.h>
 #include <linux/io.h>
+#include <linux/pxa2xx_ssp.h>
 
 #include <asm/irq.h>
 
@@ -33,7 +34,6 @@
 #include <mach/hardware.h>
 #include <mach/dma.h>
 #include <mach/audio.h>
-#include <plat/ssp.h>
 
 #include "../../arm/pxa2xx-pcm.h"
 #include "pxa-ssp.h"
-- 
cgit v1.2.3-71-gd317


From d6ea3df0d470fb9260db93883f97764cf9f0e562 Mon Sep 17 00:00:00 2001
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Wed, 24 Nov 2010 10:17:14 +0100
Subject: spi/pxa2xx: Add CE4100 support

Sodaville's SPI controller is very much the same as in PXA25x. The
difference:
- The RX/TX FIFO is only 4 words deep instead of 16
- No DMA support
- The SPI controller offers a CS functionality

Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Dirk Brandewie <dirk.brandewie@gmail.com>
---
 drivers/spi/Kconfig            |  13 ++-
 drivers/spi/Makefile           |   1 +
 drivers/spi/pxa2xx_spi.c       |   1 -
 drivers/spi/pxa2xx_spi_pci.c   | 201 +++++++++++++++++++++++++++++++++++++++++
 include/linux/spi/pxa2xx_spi.h | 105 ++++++++++++++++++++-
 5 files changed, 314 insertions(+), 7 deletions(-)
 create mode 100644 drivers/spi/pxa2xx_spi_pci.c

(limited to 'include/linux')

diff --git a/drivers/spi/Kconfig b/drivers/spi/Kconfig
index 78f9fd02c1b2..537759011d1d 100644
--- a/drivers/spi/Kconfig
+++ b/drivers/spi/Kconfig
@@ -267,12 +267,15 @@ config SPI_PPC4xx
 
 config SPI_PXA2XX
 	tristate "PXA2xx SSP SPI master"
-	depends on ARCH_PXA && EXPERIMENTAL
-	select PXA_SSP
+	depends on (ARCH_PXA || (X86_32 && PCI)) && EXPERIMENTAL
+	select PXA_SSP if ARCH_PXA
 	help
-	  This enables using a PXA2xx SSP port as a SPI master controller.
-	  The driver can be configured to use any SSP port and additional
-	  documentation can be found a Documentation/spi/pxa2xx.
+	  This enables using a PXA2xx or Sodaville SSP port as a SPI master
+	  controller. The driver can be configured to use any SSP port and
+	  additional documentation can be found a Documentation/spi/pxa2xx.
+
+config SPI_PXA2XX_PCI
+	def_bool SPI_PXA2XX && X86_32 && PCI
 
 config SPI_S3C24XX
 	tristate "Samsung S3C24XX series SPI"
diff --git a/drivers/spi/Makefile b/drivers/spi/Makefile
index 8bc1a5abac1f..bdc4c400fd71 100644
--- a/drivers/spi/Makefile
+++ b/drivers/spi/Makefile
@@ -24,6 +24,7 @@ obj-$(CONFIG_SPI_GPIO)			+= spi_gpio.o
 obj-$(CONFIG_SPI_IMX)			+= spi_imx.o
 obj-$(CONFIG_SPI_LM70_LLP)		+= spi_lm70llp.o
 obj-$(CONFIG_SPI_PXA2XX)		+= pxa2xx_spi.o
+obj-$(CONFIG_SPI_PXA2XX_PCI)		+= pxa2xx_spi_pci.o
 obj-$(CONFIG_SPI_OMAP_UWIRE)		+= omap_uwire.o
 obj-$(CONFIG_SPI_OMAP24XX)		+= omap2_mcspi.o
 obj-$(CONFIG_SPI_OMAP_100K)		+= omap_spi_100k.o
diff --git a/drivers/spi/pxa2xx_spi.c b/drivers/spi/pxa2xx_spi.c
index 98d9c8b09182..ed212c2583ab 100644
--- a/drivers/spi/pxa2xx_spi.c
+++ b/drivers/spi/pxa2xx_spi.c
@@ -28,7 +28,6 @@
 #include <linux/spi/spi.h>
 #include <linux/workqueue.h>
 #include <linux/delay.h>
-#include <linux/clk.h>
 #include <linux/gpio.h>
 #include <linux/slab.h>
 
diff --git a/drivers/spi/pxa2xx_spi_pci.c b/drivers/spi/pxa2xx_spi_pci.c
new file mode 100644
index 000000000000..351d8a375b57
--- /dev/null
+++ b/drivers/spi/pxa2xx_spi_pci.c
@@ -0,0 +1,201 @@
+/*
+ * CE4100's SPI device is more or less the same one as found on PXA
+ *
+ */
+#include <linux/pci.h>
+#include <linux/platform_device.h>
+#include <linux/of_device.h>
+#include <linux/spi/pxa2xx_spi.h>
+
+struct awesome_struct {
+	struct ssp_device ssp;
+	struct platform_device spi_pdev;
+	struct pxa2xx_spi_master spi_pdata;
+};
+
+static DEFINE_MUTEX(ssp_lock);
+static LIST_HEAD(ssp_list);
+
+struct ssp_device *pxa_ssp_request(int port, const char *label)
+{
+	struct ssp_device *ssp = NULL;
+
+	mutex_lock(&ssp_lock);
+
+	list_for_each_entry(ssp, &ssp_list, node) {
+		if (ssp->port_id == port && ssp->use_count == 0) {
+			ssp->use_count++;
+			ssp->label = label;
+			break;
+		}
+	}
+
+	mutex_unlock(&ssp_lock);
+
+	if (&ssp->node == &ssp_list)
+		return NULL;
+
+	return ssp;
+}
+EXPORT_SYMBOL_GPL(pxa_ssp_request);
+
+void pxa_ssp_free(struct ssp_device *ssp)
+{
+	mutex_lock(&ssp_lock);
+	if (ssp->use_count) {
+		ssp->use_count--;
+		ssp->label = NULL;
+	} else
+		dev_err(&ssp->pdev->dev, "device already free\n");
+	mutex_unlock(&ssp_lock);
+}
+EXPORT_SYMBOL_GPL(pxa_ssp_free);
+
+static void plat_dev_release(struct device *dev)
+{
+	struct awesome_struct *as = container_of(dev,
+			struct awesome_struct, spi_pdev.dev);
+
+	of_device_node_put(&as->spi_pdev.dev);
+}
+
+static int __devinit ce4100_spi_probe(struct pci_dev *dev,
+		const struct pci_device_id *ent)
+{
+	int ret;
+	resource_size_t phys_beg;
+	resource_size_t phys_len;
+	struct awesome_struct *spi_info;
+	struct platform_device *pdev;
+	struct pxa2xx_spi_master *spi_pdata;
+	struct ssp_device *ssp;
+
+	ret = pci_enable_device(dev);
+	if (ret)
+		return ret;
+
+	phys_beg = pci_resource_start(dev, 0);
+	phys_len = pci_resource_len(dev, 0);
+
+	if (!request_mem_region(phys_beg, phys_len,
+				"CE4100 SPI")) {
+		dev_err(&dev->dev, "Can't request register space.\n");
+		ret = -EBUSY;
+		return ret;
+	}
+
+	spi_info = kzalloc(sizeof(*spi_info), GFP_KERNEL);
+	if (!spi_info) {
+		ret = -ENOMEM;
+		goto err_kz;
+	}
+	ssp = &spi_info->ssp;
+	pdev = &spi_info->spi_pdev;
+	spi_pdata =  &spi_info->spi_pdata;
+
+	pdev->name = "pxa2xx-spi";
+	pdev->id = dev->devfn;
+	pdev->dev.parent = &dev->dev;
+	pdev->dev.platform_data = &spi_info->spi_pdata;
+
+#ifdef CONFIG_OF
+	pdev->dev.of_node = dev->dev.of_node;
+#endif
+	pdev->dev.release = plat_dev_release;
+
+	spi_pdata->num_chipselect = dev->devfn;
+
+	ssp->phys_base = pci_resource_start(dev, 0);
+	ssp->mmio_base = ioremap(phys_beg, phys_len);
+	if (!ssp->mmio_base) {
+		dev_err(&pdev->dev, "failed to ioremap() registers\n");
+		ret = -EIO;
+		goto err_remap;
+	}
+	ssp->irq = dev->irq;
+	ssp->port_id = pdev->id;
+	ssp->type = PXA25x_SSP;
+
+	mutex_lock(&ssp_lock);
+	list_add(&ssp->node, &ssp_list);
+	mutex_unlock(&ssp_lock);
+
+	pci_set_drvdata(dev, spi_info);
+
+	ret = platform_device_register(pdev);
+	if (ret)
+		goto err_dev_add;
+
+	return ret;
+
+err_dev_add:
+	pci_set_drvdata(dev, NULL);
+	mutex_lock(&ssp_lock);
+	list_del(&ssp->node);
+	mutex_unlock(&ssp_lock);
+	iounmap(ssp->mmio_base);
+
+err_remap:
+	kfree(spi_info);
+
+err_kz:
+	release_mem_region(phys_beg, phys_len);
+
+	return ret;
+}
+
+static void __devexit ce4100_spi_remove(struct pci_dev *dev)
+{
+	struct awesome_struct *spi_info;
+	struct platform_device *pdev;
+	struct ssp_device *ssp;
+
+	spi_info = pci_get_drvdata(dev);
+
+	ssp = &spi_info->ssp;
+	pdev = &spi_info->spi_pdev;
+
+	platform_device_unregister(pdev);
+
+	iounmap(ssp->mmio_base);
+	release_mem_region(pci_resource_start(dev, 0),
+			pci_resource_len(dev, 0));
+
+	mutex_lock(&ssp_lock);
+	list_del(&ssp->node);
+	mutex_unlock(&ssp_lock);
+
+	pci_set_drvdata(dev, NULL);
+	pci_disable_device(dev);
+	kfree(spi_info);
+}
+
+static struct pci_device_id ce4100_spi_devices[] __devinitdata = {
+
+	{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x2e6a) },
+	{ },
+};
+MODULE_DEVICE_TABLE(pci, ce4100_spi_devices);
+
+static struct pci_driver ce4100_spi_driver = {
+	.name           = "ce4100_spi",
+	.id_table       = ce4100_spi_devices,
+	.probe          = ce4100_spi_probe,
+	.remove         = __devexit_p(ce4100_spi_remove),
+};
+
+static int __init ce4100_spi_init(void)
+{
+	return pci_register_driver(&ce4100_spi_driver);
+}
+module_init(ce4100_spi_init);
+
+static void __exit ce4100_spi_exit(void)
+{
+	pci_unregister_driver(&ce4100_spi_driver);
+}
+module_exit(ce4100_spi_exit);
+
+MODULE_DESCRIPTION("CE4100 PCI-SPI glue code for PXA's driver");
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Sebastian Andrzej Siewior <bigeasy@linutronix.de>");
diff --git a/include/linux/spi/pxa2xx_spi.h b/include/linux/spi/pxa2xx_spi.h
index 471ed6889113..d3e1075f7b60 100644
--- a/include/linux/spi/pxa2xx_spi.h
+++ b/include/linux/spi/pxa2xx_spi.h
@@ -19,7 +19,6 @@
 #define __linux_pxa2xx_spi_h
 
 #include <linux/pxa2xx_ssp.h>
-#include <mach/dma.h>
 
 #define PXA2XX_CS_ASSERT (0x01)
 #define PXA2XX_CS_DEASSERT (0x02)
@@ -44,6 +43,110 @@ struct pxa2xx_spi_chip {
 	void (*cs_control)(u32 command);
 };
 
+#ifdef CONFIG_ARCH_PXA
+
+#include <linux/clk.h>
+#include <mach/dma.h>
+
 extern void pxa2xx_set_spi_info(unsigned id, struct pxa2xx_spi_master *info);
 
+#else
+/*
+ * This is the implemtation for CE4100 on x86. ARM defines them in mach/ or
+ * plat/ include path.
+ * The CE4100 does not provide DMA support. This bits are here to let the driver
+ * compile and will never be used. Maybe we get DMA support at a later point in
+ * time.
+ */
+
+#define DCSR(n)         (n)
+#define DSADR(n)        (n)
+#define DTADR(n)        (n)
+#define DCMD(n)         (n)
+#define DRCMR(n)        (n)
+
+#define DCSR_RUN	(1 << 31)	/* Run Bit */
+#define DCSR_NODESC	(1 << 30)	/* No-Descriptor Fetch */
+#define DCSR_STOPIRQEN	(1 << 29)	/* Stop Interrupt Enable */
+#define DCSR_REQPEND	(1 << 8)	/* Request Pending (read-only) */
+#define DCSR_STOPSTATE	(1 << 3)	/* Stop State (read-only) */
+#define DCSR_ENDINTR	(1 << 2)	/* End Interrupt */
+#define DCSR_STARTINTR	(1 << 1)	/* Start Interrupt */
+#define DCSR_BUSERR	(1 << 0)	/* Bus Error Interrupt */
+
+#define DCSR_EORIRQEN	(1 << 28)	/* End of Receive Interrupt Enable */
+#define DCSR_EORJMPEN	(1 << 27)	/* Jump to next descriptor on EOR */
+#define DCSR_EORSTOPEN	(1 << 26)	/* STOP on an EOR */
+#define DCSR_SETCMPST	(1 << 25)	/* Set Descriptor Compare Status */
+#define DCSR_CLRCMPST	(1 << 24)	/* Clear Descriptor Compare Status */
+#define DCSR_CMPST	(1 << 10)	/* The Descriptor Compare Status */
+#define DCSR_EORINTR	(1 << 9)	/* The end of Receive */
+
+#define DRCMR_MAPVLD	(1 << 7)	/* Map Valid */
+#define DRCMR_CHLNUM	0x1f		/* mask for Channel Number */
+
+#define DDADR_DESCADDR	0xfffffff0	/* Address of next descriptor */
+#define DDADR_STOP	(1 << 0)	/* Stop */
+
+#define DCMD_INCSRCADDR	(1 << 31)	/* Source Address Increment Setting. */
+#define DCMD_INCTRGADDR	(1 << 30)	/* Target Address Increment Setting. */
+#define DCMD_FLOWSRC	(1 << 29)	/* Flow Control by the source. */
+#define DCMD_FLOWTRG	(1 << 28)	/* Flow Control by the target. */
+#define DCMD_STARTIRQEN	(1 << 22)	/* Start Interrupt Enable */
+#define DCMD_ENDIRQEN	(1 << 21)	/* End Interrupt Enable */
+#define DCMD_ENDIAN	(1 << 18)	/* Device Endian-ness. */
+#define DCMD_BURST8	(1 << 16)	/* 8 byte burst */
+#define DCMD_BURST16	(2 << 16)	/* 16 byte burst */
+#define DCMD_BURST32	(3 << 16)	/* 32 byte burst */
+#define DCMD_WIDTH1	(1 << 14)	/* 1 byte width */
+#define DCMD_WIDTH2	(2 << 14)	/* 2 byte width (HalfWord) */
+#define DCMD_WIDTH4	(3 << 14)	/* 4 byte width (Word) */
+#define DCMD_LENGTH	0x01fff		/* length mask (max = 8K - 1) */
+
+/*
+ * Descriptor structure for PXA's DMA engine
+ * Note: this structure must always be aligned to a 16-byte boundary.
+ */
+
+typedef enum {
+	DMA_PRIO_HIGH = 0,
+	DMA_PRIO_MEDIUM = 1,
+	DMA_PRIO_LOW = 2
+} pxa_dma_prio;
+
+/*
+ * DMA registration
+ */
+
+static inline int pxa_request_dma(char *name,
+		pxa_dma_prio prio,
+		void (*irq_handler)(int, void *),
+		void *data)
+{
+	return -ENODEV;
+}
+
+static inline void pxa_free_dma(int dma_ch)
+{
+}
+
+/*
+ * The CE4100 does not have the clk framework implemented and SPI clock can
+ * not be switched on/off or the divider changed.
+ */
+static inline void clk_disable(struct clk *clk)
+{
+}
+
+static inline int clk_enable(struct clk *clk)
+{
+	return 0;
+}
+
+static inline unsigned long clk_get_rate(struct clk *clk)
+{
+	return 3686400;
+}
+
+#endif
 #endif
-- 
cgit v1.2.3-71-gd317


From d0777f2c3eda180e3fc549e0efbe741014f17689 Mon Sep 17 00:00:00 2001
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Mon, 22 Nov 2010 17:12:16 -0800
Subject: spi/pxa2xx: Consider CE4100's FIFO depth

For PXA the default threshold is FIFO_DEPTH / 2. Adjust this value for
CE4100.

Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Dirk Brandewie <dirk.brandewie@gmail.com>
---
 drivers/spi/pxa2xx_spi.c   |  2 --
 include/linux/pxa2xx_ssp.h | 32 ++++++++++++++++++++++++++------
 2 files changed, 26 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/spi/pxa2xx_spi.c b/drivers/spi/pxa2xx_spi.c
index ed212c2583ab..81cfbbc58e94 100644
--- a/drivers/spi/pxa2xx_spi.c
+++ b/drivers/spi/pxa2xx_spi.c
@@ -43,8 +43,6 @@ MODULE_ALIAS("platform:pxa2xx-spi");
 
 #define MAX_BUSES 3
 
-#define RX_THRESH_DFLT 	8
-#define TX_THRESH_DFLT 	8
 #define TIMOUT_DFLT		1000
 
 #define DMA_INT_MASK		(DCSR_ENDINTR | DCSR_STARTINTR | DCSR_BUSERR)
diff --git a/include/linux/pxa2xx_ssp.h b/include/linux/pxa2xx_ssp.h
index 84465d4a51b7..c3aa334cbb9b 100644
--- a/include/linux/pxa2xx_ssp.h
+++ b/include/linux/pxa2xx_ssp.h
@@ -71,10 +71,6 @@
 #define SSCR1_SPO	(1 << 3)	/* Motorola SPI SSPSCLK polarity setting */
 #define SSCR1_SPH	(1 << 4)	/* Motorola SPI SSPSCLK phase setting */
 #define SSCR1_MWDS	(1 << 5)	/* Microwire Transmit Data Size */
-#define SSCR1_TFT	(0x000003c0)	/* Transmit FIFO Threshold (mask) */
-#define SSCR1_TxTresh(x) (((x) - 1) << 6) /* level [1..16] */
-#define SSCR1_RFT	(0x00003c00)	/* Receive FIFO Threshold (mask) */
-#define SSCR1_RxTresh(x) (((x) - 1) << 10) /* level [1..16] */
 
 #define SSSR_TNF	(1 << 2)	/* Transmit FIFO Not Full */
 #define SSSR_RNE	(1 << 3)	/* Receive FIFO Not Empty */
@@ -82,8 +78,32 @@
 #define SSSR_TFS	(1 << 5)	/* Transmit FIFO Service Request */
 #define SSSR_RFS	(1 << 6)	/* Receive FIFO Service Request */
 #define SSSR_ROR	(1 << 7)	/* Receive FIFO Overrun */
-#define SSSR_TFL_MASK   (0xf << 8)	/* Transmit FIFO Level mask */
-#define SSSR_RFL_MASK   (0xf << 12)	/* Receive FIFO Level mask */
+
+#ifdef CONFIG_ARCH_PXA
+#define RX_THRESH_DFLT	8
+#define TX_THRESH_DFLT	8
+
+#define SSSR_TFL_MASK	(0xf << 8)	/* Transmit FIFO Level mask */
+#define SSSR_RFL_MASK	(0xf << 12)	/* Receive FIFO Level mask */
+
+#define SSCR1_TFT	(0x000003c0)	/* Transmit FIFO Threshold (mask) */
+#define SSCR1_TxTresh(x) (((x) - 1) << 6) /* level [1..16] */
+#define SSCR1_RFT	(0x00003c00)	/* Receive FIFO Threshold (mask) */
+#define SSCR1_RxTresh(x) (((x) - 1) << 10) /* level [1..16] */
+
+#else
+
+#define RX_THRESH_DFLT	2
+#define TX_THRESH_DFLT	2
+
+#define SSSR_TFL_MASK	(0x3 << 8)	/* Transmit FIFO Level mask */
+#define SSSR_RFL_MASK	(0x3 << 12)	/* Receive FIFO Level mask */
+
+#define SSCR1_TFT	(0x000000c0)	/* Transmit FIFO Threshold (mask) */
+#define SSCR1_TxTresh(x) (((x) - 1) << 6) /* level [1..4] */
+#define SSCR1_RFT	(0x00000c00)	/* Receive FIFO Threshold (mask) */
+#define SSCR1_RxTresh(x) (((x) - 1) << 10) /* level [1..4] */
+#endif
 
 /* extra bits in PXA255, PXA26x and PXA27x SSP ports */
 #define SSCR0_TISSP		(1 << 4)	/* TI Sync Serial Protocol */
-- 
cgit v1.2.3-71-gd317


From f2cd2d3e9b3ef960612e362f0ad129d735452df2 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Mon, 29 Nov 2010 08:14:37 +0000
Subject: net sched: use xps information for qdisc NUMA affinity

Allocate qdisc memory according to NUMA properties of cpus included in
xps map.

To be effective, qdisc should be (re)setup after changes
of /sys/class/net/eth<n>/queues/tx-<n>/xps_cpus

I added a numa_node field in struct netdev_queue, containing NUMA node
if all cpus included in xps_cpus share same node, else -1.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Cc: Ben Hutchings <bhutchings@solarflare.com>
Cc: Tom Herbert <therbert@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 20 +++++++++++++++++++-
 net/core/dev.c            |  5 +++--
 net/core/net-sysfs.c      | 12 +++++++++++-
 net/sched/sch_generic.c   |  4 +++-
 4 files changed, 36 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 4b0c7f3aa32b..a9ac5dc26e3c 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -508,7 +508,9 @@ struct netdev_queue {
 #ifdef CONFIG_RPS
 	struct kobject		kobj;
 #endif
-
+#if defined(CONFIG_XPS) && defined(CONFIG_NUMA)
+	int			numa_node;
+#endif
 /*
  * write mostly part
  */
@@ -523,6 +525,22 @@ struct netdev_queue {
 	u64			tx_dropped;
 } ____cacheline_aligned_in_smp;
 
+static inline int netdev_queue_numa_node_read(const struct netdev_queue *q)
+{
+#if defined(CONFIG_XPS) && defined(CONFIG_NUMA)
+	return q->numa_node;
+#else
+	return -1;
+#endif
+}
+
+static inline void netdev_queue_numa_node_write(struct netdev_queue *q, int node)
+{
+#if defined(CONFIG_XPS) && defined(CONFIG_NUMA)
+	q->numa_node = node;
+#endif
+}
+
 #ifdef CONFIG_RPS
 /*
  * This structure holds an RPS map which can be of variable length.  The
diff --git a/net/core/dev.c b/net/core/dev.c
index 3259d2c323a6..cd2437495428 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -5125,9 +5125,10 @@ static int netif_alloc_netdev_queues(struct net_device *dev)
 	}
 	dev->_tx = tx;
 
-	for (i = 0; i < count; i++)
+	for (i = 0; i < count; i++) {
+		netdev_queue_numa_node_write(&tx[i], -1);
 		tx[i].dev = dev;
-
+	}
 	return 0;
 }
 
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index f85cee3d869e..85e8b5326dd6 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -913,6 +913,7 @@ static ssize_t store_xps_map(struct netdev_queue *queue,
 	struct xps_map *map, *new_map;
 	struct xps_dev_maps *dev_maps, *new_dev_maps;
 	int nonempty = 0;
+	int numa_node = -2;
 
 	if (!capable(CAP_NET_ADMIN))
 		return -EPERM;
@@ -953,7 +954,14 @@ static ssize_t store_xps_map(struct netdev_queue *queue,
 			pos = map_len = alloc_len = 0;
 
 		need_set = cpu_isset(cpu, *mask) && cpu_online(cpu);
-
+#ifdef CONFIG_NUMA
+		if (need_set) {
+			if (numa_node == -2)
+				numa_node = cpu_to_node(cpu);
+			else if (numa_node != cpu_to_node(cpu))
+				numa_node = -1;
+		}
+#endif
 		if (need_set && pos >= map_len) {
 			/* Need to add queue to this CPU's map */
 			if (map_len >= alloc_len) {
@@ -1001,6 +1009,8 @@ static ssize_t store_xps_map(struct netdev_queue *queue,
 	if (dev_maps)
 		call_rcu(&dev_maps->rcu, xps_dev_maps_release);
 
+	netdev_queue_numa_node_write(queue, (numa_node >= 0) ? numa_node : -1);
+
 	mutex_unlock(&xps_map_mutex);
 
 	free_cpumask_var(mask);
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 7f0bd8952646..0918834ee4a1 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -553,7 +553,9 @@ struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue,
 	size = QDISC_ALIGN(sizeof(*sch));
 	size += ops->priv_size + (QDISC_ALIGNTO - 1);
 
-	p = kzalloc(size, GFP_KERNEL);
+	p = kzalloc_node(size, GFP_KERNEL,
+			 netdev_queue_numa_node_read(dev_queue));
+
 	if (!p)
 		goto errout;
 	sch = (struct Qdisc *) QDISC_ALIGN((unsigned long) p);
-- 
cgit v1.2.3-71-gd317


From 676dac4b1bee0469d6932f698aeb77e8489f5861 Mon Sep 17 00:00:00 2001
From: Casey Schaufler <casey@schaufler-ca.com>
Date: Thu, 2 Dec 2010 06:43:39 -0800
Subject: This patch adds a new security attribute to Smack called SMACK64EXEC.
 It defines label that is used while task is running.

Exception: in smack_task_wait() child task is checked
for write access to parent task using label inherited
from the task that forked it.

Fixed issues from previous submit:
- SMACK64EXEC was not read when SMACK64 was not set.
- inode security blob was not updated after setting
  SMACK64EXEC
- inode security blob was not updated when removing
  SMACK64EXEC
---
 include/linux/xattr.h         |   2 +
 security/smack/smack.h        |  30 +++++++
 security/smack/smack_access.c |   4 +-
 security/smack/smack_lsm.c    | 192 +++++++++++++++++++++++++++++++-----------
 security/smack/smackfs.c      |   4 +-
 5 files changed, 178 insertions(+), 54 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/xattr.h b/include/linux/xattr.h
index f1e5bde4b35a..351c7901d74a 100644
--- a/include/linux/xattr.h
+++ b/include/linux/xattr.h
@@ -40,9 +40,11 @@
 #define XATTR_SMACK_SUFFIX "SMACK64"
 #define XATTR_SMACK_IPIN "SMACK64IPIN"
 #define XATTR_SMACK_IPOUT "SMACK64IPOUT"
+#define XATTR_SMACK_EXEC "SMACK64EXEC"
 #define XATTR_NAME_SMACK XATTR_SECURITY_PREFIX XATTR_SMACK_SUFFIX
 #define XATTR_NAME_SMACKIPIN	XATTR_SECURITY_PREFIX XATTR_SMACK_IPIN
 #define XATTR_NAME_SMACKIPOUT	XATTR_SECURITY_PREFIX XATTR_SMACK_IPOUT
+#define XATTR_NAME_SMACKEXEC	XATTR_SECURITY_PREFIX XATTR_SMACK_EXEC
 
 #define XATTR_CAPS_SUFFIX "capability"
 #define XATTR_NAME_CAPS XATTR_SECURITY_PREFIX XATTR_CAPS_SUFFIX
diff --git a/security/smack/smack.h b/security/smack/smack.h
index 43ae747a5aa4..a2e2cdfab4ef 100644
--- a/security/smack/smack.h
+++ b/security/smack/smack.h
@@ -51,10 +51,16 @@ struct socket_smack {
  */
 struct inode_smack {
 	char		*smk_inode;	/* label of the fso */
+	char		*smk_task;	/* label of the task */
 	struct mutex	smk_lock;	/* initialization lock */
 	int		smk_flags;	/* smack inode flags */
 };
 
+struct task_smack {
+	char		*smk_task;	/* label used for access control */
+	char		*smk_forked;	/* label when forked */
+};
+
 #define	SMK_INODE_INSTANT	0x01	/* inode is instantiated */
 
 /*
@@ -242,6 +248,30 @@ static inline char *smk_of_inode(const struct inode *isp)
 	return sip->smk_inode;
 }
 
+/*
+ * Present a pointer to the smack label in an task blob.
+ */
+static inline char *smk_of_task(const struct task_smack *tsp)
+{
+	return tsp->smk_task;
+}
+
+/*
+ * Present a pointer to the forked smack label in an task blob.
+ */
+static inline char *smk_of_forked(const struct task_smack *tsp)
+{
+	return tsp->smk_forked;
+}
+
+/*
+ * Present a pointer to the smack label in the curren task blob.
+ */
+static inline char *smk_of_current(void)
+{
+	return smk_of_task(current_security());
+}
+
 /*
  * logging functions
  */
diff --git a/security/smack/smack_access.c b/security/smack/smack_access.c
index f4fac64c4da8..42becbc1ce33 100644
--- a/security/smack/smack_access.c
+++ b/security/smack/smack_access.c
@@ -185,7 +185,7 @@ out_audit:
 int smk_curacc(char *obj_label, u32 mode, struct smk_audit_info *a)
 {
 	int rc;
-	char *sp = current_security();
+	char *sp = smk_of_current();
 
 	rc = smk_access(sp, obj_label, mode, NULL);
 	if (rc == 0)
@@ -196,7 +196,7 @@ int smk_curacc(char *obj_label, u32 mode, struct smk_audit_info *a)
 	 * only one that gets privilege and current does not
 	 * have that label.
 	 */
-	if (smack_onlycap != NULL && smack_onlycap != current->cred->security)
+	if (smack_onlycap != NULL && smack_onlycap != sp)
 		goto out_audit;
 
 	if (capable(CAP_MAC_OVERRIDE))
diff --git a/security/smack/smack_lsm.c b/security/smack/smack_lsm.c
index 04a98c361a65..7e19afe0e738 100644
--- a/security/smack/smack_lsm.c
+++ b/security/smack/smack_lsm.c
@@ -43,7 +43,7 @@
  * Returns a pointer to the master list entry for the Smack label
  * or NULL if there was no label to fetch.
  */
-static char *smk_fetch(struct inode *ip, struct dentry *dp)
+static char *smk_fetch(const char *name, struct inode *ip, struct dentry *dp)
 {
 	int rc;
 	char in[SMK_LABELLEN];
@@ -51,7 +51,7 @@ static char *smk_fetch(struct inode *ip, struct dentry *dp)
 	if (ip->i_op->getxattr == NULL)
 		return NULL;
 
-	rc = ip->i_op->getxattr(dp, XATTR_NAME_SMACK, in, SMK_LABELLEN);
+	rc = ip->i_op->getxattr(dp, name, in, SMK_LABELLEN);
 	if (rc < 0)
 		return NULL;
 
@@ -103,8 +103,8 @@ static int smack_ptrace_access_check(struct task_struct *ctp, unsigned int mode)
 	if (rc != 0)
 		return rc;
 
-	sp = current_security();
-	tsp = task_security(ctp);
+	sp = smk_of_current();
+	tsp = smk_of_task(task_security(ctp));
 	smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_TASK);
 	smk_ad_setfield_u_tsk(&ad, ctp);
 
@@ -138,8 +138,8 @@ static int smack_ptrace_traceme(struct task_struct *ptp)
 	smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_TASK);
 	smk_ad_setfield_u_tsk(&ad, ptp);
 
-	sp = current_security();
-	tsp = task_security(ptp);
+	sp = smk_of_current();
+	tsp = smk_of_task(task_security(ptp));
 	/* we won't log here, because rc can be overriden */
 	rc = smk_access(tsp, sp, MAY_READWRITE, NULL);
 	if (rc != 0 && has_capability(ptp, CAP_MAC_OVERRIDE))
@@ -160,7 +160,7 @@ static int smack_ptrace_traceme(struct task_struct *ptp)
 static int smack_syslog(int typefrom_file)
 {
 	int rc = 0;
-	char *sp = current_security();
+	char *sp = smk_of_current();
 
 	if (capable(CAP_MAC_OVERRIDE))
 		return 0;
@@ -390,6 +390,40 @@ static int smack_sb_umount(struct vfsmount *mnt, int flags)
 	return smk_curacc(sbp->smk_floor, MAY_WRITE, &ad);
 }
 
+/*
+ * BPRM hooks
+ */
+
+static int smack_bprm_set_creds(struct linux_binprm *bprm)
+{
+	struct task_smack *tsp = bprm->cred->security;
+	struct inode_smack *isp;
+	struct dentry *dp;
+	int rc;
+
+	rc = cap_bprm_set_creds(bprm);
+	if (rc != 0)
+		return rc;
+
+	if (bprm->cred_prepared)
+		return 0;
+
+	if (bprm->file == NULL || bprm->file->f_dentry == NULL)
+		return 0;
+
+	dp = bprm->file->f_dentry;
+
+	if (dp->d_inode == NULL)
+		return 0;
+
+	isp = dp->d_inode->i_security;
+
+	if (isp->smk_task != NULL)
+		tsp->smk_task = isp->smk_task;
+
+	return 0;
+}
+
 /*
  * Inode hooks
  */
@@ -402,7 +436,7 @@ static int smack_sb_umount(struct vfsmount *mnt, int flags)
  */
 static int smack_inode_alloc_security(struct inode *inode)
 {
-	inode->i_security = new_inode_smack(current_security());
+	inode->i_security = new_inode_smack(smk_of_current());
 	if (inode->i_security == NULL)
 		return -ENOMEM;
 	return 0;
@@ -664,7 +698,8 @@ static int smack_inode_setxattr(struct dentry *dentry, const char *name,
 
 	if (strcmp(name, XATTR_NAME_SMACK) == 0 ||
 	    strcmp(name, XATTR_NAME_SMACKIPIN) == 0 ||
-	    strcmp(name, XATTR_NAME_SMACKIPOUT) == 0) {
+	    strcmp(name, XATTR_NAME_SMACKIPOUT) == 0 ||
+	    strcmp(name, XATTR_NAME_SMACKEXEC) == 0) {
 		if (!capable(CAP_MAC_ADMIN))
 			rc = -EPERM;
 		/*
@@ -704,9 +739,10 @@ static void smack_inode_post_setxattr(struct dentry *dentry, const char *name,
 	char *nsp;
 
 	/*
-	 * Not SMACK
+	 * Not SMACK or SMACKEXEC
 	 */
-	if (strcmp(name, XATTR_NAME_SMACK))
+	if (strcmp(name, XATTR_NAME_SMACK) &&
+	    strcmp(name, XATTR_NAME_SMACKEXEC))
 		return;
 
 	isp = dentry->d_inode->i_security;
@@ -716,10 +752,18 @@ static void smack_inode_post_setxattr(struct dentry *dentry, const char *name,
 	 * assignment.
 	 */
 	nsp = smk_import(value, size);
-	if (nsp != NULL)
-		isp->smk_inode = nsp;
-	else
-		isp->smk_inode = smack_known_invalid.smk_known;
+
+	if (strcmp(name, XATTR_NAME_SMACK) == 0) {
+		if (nsp != NULL)
+			isp->smk_inode = nsp;
+		else
+			isp->smk_inode = smack_known_invalid.smk_known;
+	} else {
+		if (nsp != NULL)
+			isp->smk_task = nsp;
+		else
+			isp->smk_task = smack_known_invalid.smk_known;
+	}
 
 	return;
 }
@@ -752,12 +796,14 @@ static int smack_inode_getxattr(struct dentry *dentry, const char *name)
  */
 static int smack_inode_removexattr(struct dentry *dentry, const char *name)
 {
+	struct inode_smack *isp;
 	struct smk_audit_info ad;
 	int rc = 0;
 
 	if (strcmp(name, XATTR_NAME_SMACK) == 0 ||
 	    strcmp(name, XATTR_NAME_SMACKIPIN) == 0 ||
-	    strcmp(name, XATTR_NAME_SMACKIPOUT) == 0) {
+	    strcmp(name, XATTR_NAME_SMACKIPOUT) == 0 ||
+	    strcmp(name, XATTR_NAME_SMACKEXEC) == 0) {
 		if (!capable(CAP_MAC_ADMIN))
 			rc = -EPERM;
 	} else
@@ -768,6 +814,11 @@ static int smack_inode_removexattr(struct dentry *dentry, const char *name)
 	if (rc == 0)
 		rc = smk_curacc(smk_of_inode(dentry->d_inode), MAY_WRITE, &ad);
 
+	if (rc == 0) {
+		isp = dentry->d_inode->i_security;
+		isp->smk_task = NULL;
+	}
+
 	return rc;
 }
 
@@ -895,7 +946,7 @@ static int smack_file_permission(struct file *file, int mask)
  */
 static int smack_file_alloc_security(struct file *file)
 {
-	file->f_security = current_security();
+	file->f_security = smk_of_current();
 	return 0;
 }
 
@@ -1005,7 +1056,7 @@ static int smack_file_fcntl(struct file *file, unsigned int cmd,
  */
 static int smack_file_set_fowner(struct file *file)
 {
-	file->f_security = current_security();
+	file->f_security = smk_of_current();
 	return 0;
 }
 
@@ -1025,7 +1076,7 @@ static int smack_file_send_sigiotask(struct task_struct *tsk,
 {
 	struct file *file;
 	int rc;
-	char *tsp = tsk->cred->security;
+	char *tsp = smk_of_task(tsk->cred->security);
 	struct smk_audit_info ad;
 
 	/*
@@ -1082,7 +1133,9 @@ static int smack_file_receive(struct file *file)
  */
 static int smack_cred_alloc_blank(struct cred *cred, gfp_t gfp)
 {
-	cred->security = NULL;
+	cred->security = kzalloc(sizeof(struct task_smack), gfp);
+	if (cred->security == NULL)
+		return -ENOMEM;
 	return 0;
 }
 
@@ -1097,7 +1150,7 @@ static int smack_cred_alloc_blank(struct cred *cred, gfp_t gfp)
  */
 static void smack_cred_free(struct cred *cred)
 {
-	cred->security = NULL;
+	kfree(cred->security);
 }
 
 /**
@@ -1111,7 +1164,16 @@ static void smack_cred_free(struct cred *cred)
 static int smack_cred_prepare(struct cred *new, const struct cred *old,
 			      gfp_t gfp)
 {
-	new->security = old->security;
+	struct task_smack *old_tsp = old->security;
+	struct task_smack *new_tsp;
+
+	new_tsp = kzalloc(sizeof(struct task_smack), gfp);
+	if (new_tsp == NULL)
+		return -ENOMEM;
+
+	new_tsp->smk_task = old_tsp->smk_task;
+	new_tsp->smk_forked = old_tsp->smk_task;
+	new->security = new_tsp;
 	return 0;
 }
 
@@ -1124,7 +1186,11 @@ static int smack_cred_prepare(struct cred *new, const struct cred *old,
  */
 static void smack_cred_transfer(struct cred *new, const struct cred *old)
 {
-	new->security = old->security;
+	struct task_smack *old_tsp = old->security;
+	struct task_smack *new_tsp = new->security;
+
+	new_tsp->smk_task = old_tsp->smk_task;
+	new_tsp->smk_forked = old_tsp->smk_task;
 }
 
 /**
@@ -1136,12 +1202,13 @@ static void smack_cred_transfer(struct cred *new, const struct cred *old)
  */
 static int smack_kernel_act_as(struct cred *new, u32 secid)
 {
+	struct task_smack *new_tsp = new->security;
 	char *smack = smack_from_secid(secid);
 
 	if (smack == NULL)
 		return -EINVAL;
 
-	new->security = smack;
+	new_tsp->smk_task = smack;
 	return 0;
 }
 
@@ -1157,8 +1224,10 @@ static int smack_kernel_create_files_as(struct cred *new,
 					struct inode *inode)
 {
 	struct inode_smack *isp = inode->i_security;
+	struct task_smack *tsp = new->security;
 
-	new->security = isp->smk_inode;
+	tsp->smk_forked = isp->smk_inode;
+	tsp->smk_task = isp->smk_inode;
 	return 0;
 }
 
@@ -1175,7 +1244,7 @@ static int smk_curacc_on_task(struct task_struct *p, int access)
 
 	smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_TASK);
 	smk_ad_setfield_u_tsk(&ad, p);
-	return smk_curacc(task_security(p), access, &ad);
+	return smk_curacc(smk_of_task(task_security(p)), access, &ad);
 }
 
 /**
@@ -1221,7 +1290,7 @@ static int smack_task_getsid(struct task_struct *p)
  */
 static void smack_task_getsecid(struct task_struct *p, u32 *secid)
 {
-	*secid = smack_to_secid(task_security(p));
+	*secid = smack_to_secid(smk_of_task(task_security(p)));
 }
 
 /**
@@ -1333,14 +1402,15 @@ static int smack_task_kill(struct task_struct *p, struct siginfo *info,
 	 * can write the receiver.
 	 */
 	if (secid == 0)
-		return smk_curacc(task_security(p), MAY_WRITE, &ad);
+		return smk_curacc(smk_of_task(task_security(p)), MAY_WRITE,
+				  &ad);
 	/*
 	 * If the secid isn't 0 we're dealing with some USB IO
 	 * specific behavior. This is not clean. For one thing
 	 * we can't take privilege into account.
 	 */
-	return smk_access(smack_from_secid(secid), task_security(p),
-			  MAY_WRITE, &ad);
+	return smk_access(smack_from_secid(secid),
+			  smk_of_task(task_security(p)), MAY_WRITE, &ad);
 }
 
 /**
@@ -1352,12 +1422,12 @@ static int smack_task_kill(struct task_struct *p, struct siginfo *info,
 static int smack_task_wait(struct task_struct *p)
 {
 	struct smk_audit_info ad;
-	char *sp = current_security();
-	char *tsp = task_security(p);
+	char *sp = smk_of_current();
+	char *tsp = smk_of_forked(task_security(p));
 	int rc;
 
 	/* we don't log here, we can be overriden */
-	rc = smk_access(sp, tsp, MAY_WRITE, NULL);
+	rc = smk_access(tsp, sp, MAY_WRITE, NULL);
 	if (rc == 0)
 		goto out_log;
 
@@ -1378,7 +1448,7 @@ static int smack_task_wait(struct task_struct *p)
  out_log:
 	smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_TASK);
 	smk_ad_setfield_u_tsk(&ad, p);
-	smack_log(sp, tsp, MAY_WRITE, rc, &ad);
+	smack_log(tsp, sp, MAY_WRITE, rc, &ad);
 	return rc;
 }
 
@@ -1392,7 +1462,7 @@ static int smack_task_wait(struct task_struct *p)
 static void smack_task_to_inode(struct task_struct *p, struct inode *inode)
 {
 	struct inode_smack *isp = inode->i_security;
-	isp->smk_inode = task_security(p);
+	isp->smk_inode = smk_of_task(task_security(p));
 }
 
 /*
@@ -1411,7 +1481,7 @@ static void smack_task_to_inode(struct task_struct *p, struct inode *inode)
  */
 static int smack_sk_alloc_security(struct sock *sk, int family, gfp_t gfp_flags)
 {
-	char *csp = current_security();
+	char *csp = smk_of_current();
 	struct socket_smack *ssp;
 
 	ssp = kzalloc(sizeof(struct socket_smack), gfp_flags);
@@ -1752,7 +1822,7 @@ static int smack_flags_to_may(int flags)
  */
 static int smack_msg_msg_alloc_security(struct msg_msg *msg)
 {
-	msg->security = current_security();
+	msg->security = smk_of_current();
 	return 0;
 }
 
@@ -1788,7 +1858,7 @@ static int smack_shm_alloc_security(struct shmid_kernel *shp)
 {
 	struct kern_ipc_perm *isp = &shp->shm_perm;
 
-	isp->security = current_security();
+	isp->security = smk_of_current();
 	return 0;
 }
 
@@ -1911,7 +1981,7 @@ static int smack_sem_alloc_security(struct sem_array *sma)
 {
 	struct kern_ipc_perm *isp = &sma->sem_perm;
 
-	isp->security = current_security();
+	isp->security = smk_of_current();
 	return 0;
 }
 
@@ -2029,7 +2099,7 @@ static int smack_msg_queue_alloc_security(struct msg_queue *msq)
 {
 	struct kern_ipc_perm *kisp = &msq->q_perm;
 
-	kisp->security = current_security();
+	kisp->security = smk_of_current();
 	return 0;
 }
 
@@ -2201,7 +2271,7 @@ static void smack_d_instantiate(struct dentry *opt_dentry, struct inode *inode)
 	struct super_block *sbp;
 	struct superblock_smack *sbsp;
 	struct inode_smack *isp;
-	char *csp = current_security();
+	char *csp = smk_of_current();
 	char *fetched;
 	char *final;
 	struct dentry *dp;
@@ -2321,9 +2391,12 @@ static void smack_d_instantiate(struct dentry *opt_dentry, struct inode *inode)
 		 * Get the dentry for xattr.
 		 */
 		dp = dget(opt_dentry);
-		fetched = smk_fetch(inode, dp);
+		fetched = smk_fetch(XATTR_NAME_SMACK, inode, dp);
 		if (fetched != NULL)
 			final = fetched;
+		isp->smk_task = smk_fetch(XATTR_NAME_SMACKEXEC, inode,
+					  dp);
+
 		dput(dp);
 		break;
 	}
@@ -2358,7 +2431,7 @@ static int smack_getprocattr(struct task_struct *p, char *name, char **value)
 	if (strcmp(name, "current") != 0)
 		return -EINVAL;
 
-	cp = kstrdup(task_security(p), GFP_KERNEL);
+	cp = kstrdup(smk_of_task(task_security(p)), GFP_KERNEL);
 	if (cp == NULL)
 		return -ENOMEM;
 
@@ -2382,6 +2455,7 @@ static int smack_getprocattr(struct task_struct *p, char *name, char **value)
 static int smack_setprocattr(struct task_struct *p, char *name,
 			     void *value, size_t size)
 {
+	struct task_smack *tsp;
 	struct cred *new;
 	char *newsmack;
 
@@ -2414,7 +2488,13 @@ static int smack_setprocattr(struct task_struct *p, char *name,
 	new = prepare_creds();
 	if (new == NULL)
 		return -ENOMEM;
-	new->security = newsmack;
+	tsp = kzalloc(sizeof(struct task_smack), GFP_KERNEL);
+	if (tsp == NULL) {
+		kfree(new);
+		return -ENOMEM;
+	}
+	tsp->smk_task = newsmack;
+	new->security = tsp;
 	commit_creds(new);
 	return size;
 }
@@ -2715,7 +2795,7 @@ static void smack_sock_graft(struct sock *sk, struct socket *parent)
 		return;
 
 	ssp = sk->sk_security;
-	ssp->smk_in = ssp->smk_out = current_security();
+	ssp->smk_in = ssp->smk_out = smk_of_current();
 	/* cssp->smk_packet is already set in smack_inet_csk_clone() */
 }
 
@@ -2836,7 +2916,7 @@ static void smack_inet_csk_clone(struct sock *sk,
 static int smack_key_alloc(struct key *key, const struct cred *cred,
 			   unsigned long flags)
 {
-	key->security = cred->security;
+	key->security = smk_of_task(cred->security);
 	return 0;
 }
 
@@ -2865,6 +2945,7 @@ static int smack_key_permission(key_ref_t key_ref,
 {
 	struct key *keyp;
 	struct smk_audit_info ad;
+	char *tsp = smk_of_task(cred->security);
 
 	keyp = key_ref_to_ptr(key_ref);
 	if (keyp == NULL)
@@ -2878,14 +2959,14 @@ static int smack_key_permission(key_ref_t key_ref,
 	/*
 	 * This should not occur
 	 */
-	if (cred->security == NULL)
+	if (tsp == NULL)
 		return -EACCES;
 #ifdef CONFIG_AUDIT
 	smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_KEY);
 	ad.a.u.key_struct.key = keyp->serial;
 	ad.a.u.key_struct.key_desc = keyp->description;
 #endif
-	return smk_access(cred->security, keyp->security,
+	return smk_access(tsp, keyp->security,
 				 MAY_READWRITE, &ad);
 }
 #endif /* CONFIG_KEYS */
@@ -3087,6 +3168,8 @@ struct security_operations smack_ops = {
 	.sb_mount = 			smack_sb_mount,
 	.sb_umount = 			smack_sb_umount,
 
+	.bprm_set_creds =		smack_bprm_set_creds,
+
 	.inode_alloc_security = 	smack_inode_alloc_security,
 	.inode_free_security = 		smack_inode_free_security,
 	.inode_init_security = 		smack_inode_init_security,
@@ -3223,9 +3306,16 @@ static __init void init_smack_know_list(void)
 static __init int smack_init(void)
 {
 	struct cred *cred;
+	struct task_smack *tsp;
 
-	if (!security_module_enable(&smack_ops))
+	tsp = kzalloc(sizeof(struct task_smack), GFP_KERNEL);
+	if (tsp == NULL)
+		return -ENOMEM;
+
+	if (!security_module_enable(&smack_ops)) {
+		kfree(tsp);
 		return 0;
+	}
 
 	printk(KERN_INFO "Smack:  Initializing.\n");
 
@@ -3233,7 +3323,9 @@ static __init int smack_init(void)
 	 * Set the security state for the initial task.
 	 */
 	cred = (struct cred *) current->cred;
-	cred->security = &smack_known_floor.smk_known;
+	tsp->smk_forked = smack_known_floor.smk_known;
+	tsp->smk_task = smack_known_floor.smk_known;
+	cred->security = tsp;
 
 	/* initialize the smack_know_list */
 	init_smack_know_list();
diff --git a/security/smack/smackfs.c b/security/smack/smackfs.c
index dc1fd6239f24..01a0be93d8d0 100644
--- a/security/smack/smackfs.c
+++ b/security/smack/smackfs.c
@@ -121,7 +121,7 @@ static void smk_netlabel_audit_set(struct netlbl_audit *nap)
 {
 	nap->loginuid = audit_get_loginuid(current);
 	nap->sessionid = audit_get_sessionid(current);
-	nap->secid = smack_to_secid(current_security());
+	nap->secid = smack_to_secid(smk_of_current());
 }
 
 /*
@@ -1160,7 +1160,7 @@ static ssize_t smk_write_onlycap(struct file *file, const char __user *buf,
 				 size_t count, loff_t *ppos)
 {
 	char in[SMK_LABELLEN];
-	char *sp = current->cred->security;
+	char *sp = smk_of_task(current->cred->security);
 
 	if (!capable(CAP_MAC_ADMIN))
 		return -EPERM;
-- 
cgit v1.2.3-71-gd317


From 2a8626a9e2d86d114a2d9f813a1acebf9d53dd10 Mon Sep 17 00:00:00 2001
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Mon, 22 Nov 2010 17:12:17 -0800
Subject: spi/pxa2xx: Add chipselect support for Sodaville

The SPI core on Sodaville supports chip selects. Its configuration
moved into the SSSR register at bit 0 and 1. Thus Sodaville can be hooked
up with up to 4 devices.
This patch ensures that the bits which are otherwiese reserved are only
touched on Sodaville and not on any other PXAs. Also it makes sure that
the status register does not lose the CS information while clearing the
ROR bit.

Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Dirk Brandewie <dirk.brandewie@gmail.com>
---
 drivers/spi/pxa2xx_spi.c   | 93 +++++++++++++++++++++++++++++++++-------------
 include/linux/pxa2xx_ssp.h |  2 +
 2 files changed, 70 insertions(+), 25 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/spi/pxa2xx_spi.c b/drivers/spi/pxa2xx_spi.c
index 81cfbbc58e94..a54685bb7e53 100644
--- a/drivers/spi/pxa2xx_spi.c
+++ b/drivers/spi/pxa2xx_spi.c
@@ -163,7 +163,10 @@ struct chip_data {
 	u8 enable_dma;
 	u8 bits_per_word;
 	u32 speed_hz;
-	int gpio_cs;
+	union {
+		int gpio_cs;
+		unsigned int frm;
+	};
 	int gpio_cs_inverted;
 	int (*write)(struct driver_data *drv_data);
 	int (*read)(struct driver_data *drv_data);
@@ -176,6 +179,11 @@ static void cs_assert(struct driver_data *drv_data)
 {
 	struct chip_data *chip = drv_data->cur_chip;
 
+	if (drv_data->ssp_type == CE4100_SSP) {
+		write_SSSR(drv_data->cur_chip->frm, drv_data->ioaddr);
+		return;
+	}
+
 	if (chip->cs_control) {
 		chip->cs_control(PXA2XX_CS_ASSERT);
 		return;
@@ -189,6 +197,9 @@ static void cs_deassert(struct driver_data *drv_data)
 {
 	struct chip_data *chip = drv_data->cur_chip;
 
+	if (drv_data->ssp_type == CE4100_SSP)
+		return;
+
 	if (chip->cs_control) {
 		chip->cs_control(PXA2XX_CS_DEASSERT);
 		return;
@@ -198,6 +209,25 @@ static void cs_deassert(struct driver_data *drv_data)
 		gpio_set_value(chip->gpio_cs, !chip->gpio_cs_inverted);
 }
 
+static void write_SSSR_CS(struct driver_data *drv_data, u32 val)
+{
+	void __iomem *reg = drv_data->ioaddr;
+
+	if (drv_data->ssp_type == CE4100_SSP)
+		val |= read_SSSR(reg) & SSSR_ALT_FRM_MASK;
+
+	write_SSSR(val, reg);
+}
+
+static int pxa25x_ssp_comp(struct driver_data *drv_data)
+{
+	if (drv_data->ssp_type == PXA25x_SSP)
+		return 1;
+	if (drv_data->ssp_type == CE4100_SSP)
+		return 1;
+	return 0;
+}
+
 static int flush(struct driver_data *drv_data)
 {
 	unsigned long limit = loops_per_jiffy << 1;
@@ -209,7 +239,7 @@ static int flush(struct driver_data *drv_data)
 			read_SSDR(reg);
 		}
 	} while ((read_SSSR(reg) & SSSR_BSY) && --limit);
-	write_SSSR(SSSR_ROR, reg);
+	write_SSSR_CS(drv_data, SSSR_ROR);
 
 	return limit;
 }
@@ -502,9 +532,9 @@ static void dma_error_stop(struct driver_data *drv_data, const char *msg)
 	/* Stop and reset */
 	DCSR(drv_data->rx_channel) = RESET_DMA_CHANNEL;
 	DCSR(drv_data->tx_channel) = RESET_DMA_CHANNEL;
-	write_SSSR(drv_data->clear_sr, reg);
+	write_SSSR_CS(drv_data, drv_data->clear_sr);
 	write_SSCR1(read_SSCR1(reg) & ~drv_data->dma_cr1, reg);
-	if (drv_data->ssp_type != PXA25x_SSP)
+	if (!pxa25x_ssp_comp(drv_data))
 		write_SSTO(0, reg);
 	flush(drv_data);
 	write_SSCR0(read_SSCR0(reg) & ~SSCR0_SSE, reg);
@@ -524,7 +554,7 @@ static void dma_transfer_complete(struct driver_data *drv_data)
 
 	/* Clear and disable interrupts on SSP and DMA channels*/
 	write_SSCR1(read_SSCR1(reg) & ~drv_data->dma_cr1, reg);
-	write_SSSR(drv_data->clear_sr, reg);
+	write_SSSR_CS(drv_data, drv_data->clear_sr);
 	DCSR(drv_data->tx_channel) = RESET_DMA_CHANNEL;
 	DCSR(drv_data->rx_channel) = RESET_DMA_CHANNEL;
 
@@ -617,7 +647,7 @@ static irqreturn_t dma_transfer(struct driver_data *drv_data)
 
 		/* Clear and disable timeout interrupt, do the rest in
 		 * dma_transfer_complete */
-		if (drv_data->ssp_type != PXA25x_SSP)
+		if (!pxa25x_ssp_comp(drv_data))
 			write_SSTO(0, reg);
 
 		/* finish this transfer, start the next */
@@ -635,9 +665,9 @@ static void int_error_stop(struct driver_data *drv_data, const char* msg)
 	void __iomem *reg = drv_data->ioaddr;
 
 	/* Stop and reset SSP */
-	write_SSSR(drv_data->clear_sr, reg);
+	write_SSSR_CS(drv_data, drv_data->clear_sr);
 	write_SSCR1(read_SSCR1(reg) & ~drv_data->int_cr1, reg);
-	if (drv_data->ssp_type != PXA25x_SSP)
+	if (!pxa25x_ssp_comp(drv_data))
 		write_SSTO(0, reg);
 	flush(drv_data);
 	write_SSCR0(read_SSCR0(reg) & ~SSCR0_SSE, reg);
@@ -653,9 +683,9 @@ static void int_transfer_complete(struct driver_data *drv_data)
 	void __iomem *reg = drv_data->ioaddr;
 
 	/* Stop SSP */
-	write_SSSR(drv_data->clear_sr, reg);
+	write_SSSR_CS(drv_data, drv_data->clear_sr);
 	write_SSCR1(read_SSCR1(reg) & ~drv_data->int_cr1, reg);
-	if (drv_data->ssp_type != PXA25x_SSP)
+	if (!pxa25x_ssp_comp(drv_data))
 		write_SSTO(0, reg);
 
 	/* Update total byte transfered return count actual bytes read */
@@ -711,7 +741,7 @@ static irqreturn_t interrupt_transfer(struct driver_data *drv_data)
 	if (drv_data->tx == drv_data->tx_end) {
 		write_SSCR1(read_SSCR1(reg) & ~SSCR1_TIE, reg);
 		/* PXA25x_SSP has no timeout, read trailing bytes */
-		if (drv_data->ssp_type == PXA25x_SSP) {
+		if (pxa25x_ssp_comp(drv_data)) {
 			if (!wait_ssp_rx_stall(reg))
 			{
 				int_error_stop(drv_data, "interrupt_transfer: "
@@ -754,9 +784,9 @@ static irqreturn_t ssp_int(int irq, void *dev_id)
 
 		write_SSCR0(read_SSCR0(reg) & ~SSCR0_SSE, reg);
 		write_SSCR1(read_SSCR1(reg) & ~drv_data->int_cr1, reg);
-		if (drv_data->ssp_type != PXA25x_SSP)
+		if (!pxa25x_ssp_comp(drv_data))
 			write_SSTO(0, reg);
-		write_SSSR(drv_data->clear_sr, reg);
+		write_SSSR_CS(drv_data, drv_data->clear_sr);
 
 		dev_err(&drv_data->pdev->dev, "bad message state "
 			"in interrupt handler\n");
@@ -869,7 +899,7 @@ static unsigned int ssp_get_clk_div(struct ssp_device *ssp, int rate)
 {
 	unsigned long ssp_clk = clk_get_rate(ssp->clk);
 
-	if (ssp->type == PXA25x_SSP)
+	if (ssp->type == PXA25x_SSP || ssp->type == CE4100_SSP)
 		return ((ssp_clk / (2 * rate) - 1) & 0xff) << 8;
 	else
 		return ((ssp_clk / rate - 1) & 0xfff) << 8;
@@ -1095,7 +1125,7 @@ static void pump_transfers(unsigned long data)
 
 		/* Clear status  */
 		cr1 = chip->cr1 | chip->threshold | drv_data->int_cr1;
-		write_SSSR(drv_data->clear_sr, reg);
+		write_SSSR_CS(drv_data, drv_data->clear_sr);
 	}
 
 	/* see if we need to reload the config registers */
@@ -1105,7 +1135,7 @@ static void pump_transfers(unsigned long data)
 
 		/* stop the SSP, and update the other bits */
 		write_SSCR0(cr0 & ~SSCR0_SSE, reg);
-		if (drv_data->ssp_type != PXA25x_SSP)
+		if (!pxa25x_ssp_comp(drv_data))
 			write_SSTO(chip->timeout, reg);
 		/* first set CR1 without interrupt and service enables */
 		write_SSCR1(cr1 & SSCR1_CHANGE_MASK, reg);
@@ -1113,7 +1143,7 @@ static void pump_transfers(unsigned long data)
 		write_SSCR0(cr0, reg);
 
 	} else {
-		if (drv_data->ssp_type != PXA25x_SSP)
+		if (!pxa25x_ssp_comp(drv_data))
 			write_SSTO(chip->timeout, reg);
 	}
 
@@ -1240,14 +1270,13 @@ static int setup(struct spi_device *spi)
 	uint tx_thres = TX_THRESH_DFLT;
 	uint rx_thres = RX_THRESH_DFLT;
 
-	if (drv_data->ssp_type != PXA25x_SSP
+	if (!pxa25x_ssp_comp(drv_data)
 		&& (spi->bits_per_word < 4 || spi->bits_per_word > 32)) {
 		dev_err(&spi->dev, "failed setup: ssp_type=%d, bits/wrd=%d "
 				"b/w not 4-32 for type non-PXA25x_SSP\n",
 				drv_data->ssp_type, spi->bits_per_word);
 		return -EINVAL;
-	}
-	else if (drv_data->ssp_type == PXA25x_SSP
+	} else if (pxa25x_ssp_comp(drv_data)
 			&& (spi->bits_per_word < 4
 				|| spi->bits_per_word > 16)) {
 		dev_err(&spi->dev, "failed setup: ssp_type=%d, bits/wrd=%d "
@@ -1266,7 +1295,17 @@ static int setup(struct spi_device *spi)
 			return -ENOMEM;
 		}
 
-		chip->gpio_cs = -1;
+		if (drv_data->ssp_type == CE4100_SSP) {
+			if (spi->chip_select > 4) {
+				dev_err(&spi->dev, "failed setup: "
+				"cs number must not be > 4.\n");
+				kfree(chip);
+				return -EINVAL;
+			}
+
+			chip->frm = spi->chip_select;
+		} else
+			chip->gpio_cs = -1;
 		chip->enable_dma = 0;
 		chip->timeout = TIMOUT_DFLT;
 		chip->dma_burst_size = drv_data->master_info->enable_dma ?
@@ -1322,7 +1361,7 @@ static int setup(struct spi_device *spi)
 			| (((spi->mode & SPI_CPOL) != 0) ? SSCR1_SPO : 0);
 
 	/* NOTE:  PXA25x_SSP _could_ use external clocking ... */
-	if (drv_data->ssp_type != PXA25x_SSP)
+	if (!pxa25x_ssp_comp(drv_data))
 		dev_dbg(&spi->dev, "%ld Hz actual, %s\n",
 			clk_get_rate(ssp->clk)
 				/ (1 + ((chip->cr0 & SSCR0_SCR(0xfff)) >> 8)),
@@ -1357,17 +1396,21 @@ static int setup(struct spi_device *spi)
 
 	spi_set_ctldata(spi, chip);
 
+	if (drv_data->ssp_type == CE4100_SSP)
+		return 0;
+
 	return setup_cs(spi, chip, chip_info);
 }
 
 static void cleanup(struct spi_device *spi)
 {
 	struct chip_data *chip = spi_get_ctldata(spi);
+	struct driver_data *drv_data = spi_master_get_devdata(spi->master);
 
 	if (!chip)
 		return;
 
-	if (gpio_is_valid(chip->gpio_cs))
+	if (drv_data->ssp_type != CE4100_SSP && gpio_is_valid(chip->gpio_cs))
 		gpio_free(chip->gpio_cs);
 
 	kfree(chip);
@@ -1507,7 +1550,7 @@ static int __devinit pxa2xx_spi_probe(struct platform_device *pdev)
 
 	drv_data->ioaddr = ssp->mmio_base;
 	drv_data->ssdr_physical = ssp->phys_base + SSDR;
-	if (ssp->type == PXA25x_SSP) {
+	if (pxa25x_ssp_comp(drv_data)) {
 		drv_data->int_cr1 = SSCR1_TIE | SSCR1_RIE;
 		drv_data->dma_cr1 = 0;
 		drv_data->clear_sr = SSSR_ROR;
@@ -1569,7 +1612,7 @@ static int __devinit pxa2xx_spi_probe(struct platform_device *pdev)
 			| SSCR0_Motorola
 			| SSCR0_DataSize(8),
 			drv_data->ioaddr);
-	if (drv_data->ssp_type != PXA25x_SSP)
+	if (!pxa25x_ssp_comp(drv_data))
 		write_SSTO(0, drv_data->ioaddr);
 	write_SSPSP(0, drv_data->ioaddr);
 
diff --git a/include/linux/pxa2xx_ssp.h b/include/linux/pxa2xx_ssp.h
index c3aa334cbb9b..2f691e4e6222 100644
--- a/include/linux/pxa2xx_ssp.h
+++ b/include/linux/pxa2xx_ssp.h
@@ -72,6 +72,7 @@
 #define SSCR1_SPH	(1 << 4)	/* Motorola SPI SSPSCLK phase setting */
 #define SSCR1_MWDS	(1 << 5)	/* Microwire Transmit Data Size */
 
+#define SSSR_ALT_FRM_MASK	3	/* Masks the SFRM signal number */
 #define SSSR_TNF	(1 << 2)	/* Transmit FIFO Not Full */
 #define SSSR_RNE	(1 << 3)	/* Receive FIFO Not Empty */
 #define SSSR_BSY	(1 << 4)	/* SSP Busy */
@@ -160,6 +161,7 @@ enum pxa_ssp_type {
 	PXA25x_NSSP, /* pxa 255, 26x (including ASSP) */
 	PXA27x_SSP,
 	PXA168_SSP,
+	CE4100_SSP,
 };
 
 struct ssp_device {
-- 
cgit v1.2.3-71-gd317


From 172c69a47675dc1ca9c7243c031d8d77701bccc0 Mon Sep 17 00:00:00 2001
From: Rafał Miłecki <zajec5@gmail.com>
Date: Sun, 28 Nov 2010 10:39:35 +0100
Subject: ssb: extract indexes for power tables
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Rafał Miłecki <zajec5@gmail.com>
Acked-by: Michael Buesch <mb@bu3sch.de>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 drivers/ssb/pci.c            | 44 ++++++++++++++++++++++++++++++++++++++++++++
 include/linux/ssb/ssb.h      |  4 ++++
 include/linux/ssb/ssb_regs.h | 40 ++++++++++++++++++++++++++++++++++++++++
 3 files changed, 88 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/ssb/pci.c b/drivers/ssb/pci.c
index f52966305e05..158449e55044 100644
--- a/drivers/ssb/pci.c
+++ b/drivers/ssb/pci.c
@@ -406,6 +406,46 @@ static void sprom_extract_r123(struct ssb_sprom *out, const u16 *in)
 	out->antenna_gain.ghz5.a3 = gain;
 }
 
+/* Revs 4 5 and 8 have partially shared layout */
+static void sprom_extract_r458(struct ssb_sprom *out, const u16 *in)
+{
+	SPEX(txpid2g[0], SSB_SPROM4_TXPID2G01,
+	     SSB_SPROM4_TXPID2G0, SSB_SPROM4_TXPID2G0_SHIFT);
+	SPEX(txpid2g[1], SSB_SPROM4_TXPID2G01,
+	     SSB_SPROM4_TXPID2G1, SSB_SPROM4_TXPID2G1_SHIFT);
+	SPEX(txpid2g[2], SSB_SPROM4_TXPID2G23,
+	     SSB_SPROM4_TXPID2G2, SSB_SPROM4_TXPID2G2_SHIFT);
+	SPEX(txpid2g[3], SSB_SPROM4_TXPID2G23,
+	     SSB_SPROM4_TXPID2G3, SSB_SPROM4_TXPID2G3_SHIFT);
+
+	SPEX(txpid5gl[0], SSB_SPROM4_TXPID5GL01,
+	     SSB_SPROM4_TXPID5GL0, SSB_SPROM4_TXPID5GL0_SHIFT);
+	SPEX(txpid5gl[1], SSB_SPROM4_TXPID5GL01,
+	     SSB_SPROM4_TXPID5GL1, SSB_SPROM4_TXPID5GL1_SHIFT);
+	SPEX(txpid5gl[2], SSB_SPROM4_TXPID5GL23,
+	     SSB_SPROM4_TXPID5GL2, SSB_SPROM4_TXPID5GL2_SHIFT);
+	SPEX(txpid5gl[3], SSB_SPROM4_TXPID5GL23,
+	     SSB_SPROM4_TXPID5GL3, SSB_SPROM4_TXPID5GL3_SHIFT);
+
+	SPEX(txpid5g[0], SSB_SPROM4_TXPID5G01,
+	     SSB_SPROM4_TXPID5G0, SSB_SPROM4_TXPID5G0_SHIFT);
+	SPEX(txpid5g[1], SSB_SPROM4_TXPID5G01,
+	     SSB_SPROM4_TXPID5G1, SSB_SPROM4_TXPID5G1_SHIFT);
+	SPEX(txpid5g[2], SSB_SPROM4_TXPID5G23,
+	     SSB_SPROM4_TXPID5G2, SSB_SPROM4_TXPID5G2_SHIFT);
+	SPEX(txpid5g[3], SSB_SPROM4_TXPID5G23,
+	     SSB_SPROM4_TXPID5G3, SSB_SPROM4_TXPID5G3_SHIFT);
+
+	SPEX(txpid5gh[0], SSB_SPROM4_TXPID5GH01,
+	     SSB_SPROM4_TXPID5GH0, SSB_SPROM4_TXPID5GH0_SHIFT);
+	SPEX(txpid5gh[1], SSB_SPROM4_TXPID5GH01,
+	     SSB_SPROM4_TXPID5GH1, SSB_SPROM4_TXPID5GH1_SHIFT);
+	SPEX(txpid5gh[2], SSB_SPROM4_TXPID5GH23,
+	     SSB_SPROM4_TXPID5GH2, SSB_SPROM4_TXPID5GH2_SHIFT);
+	SPEX(txpid5gh[3], SSB_SPROM4_TXPID5GH23,
+	     SSB_SPROM4_TXPID5GH3, SSB_SPROM4_TXPID5GH3_SHIFT);
+}
+
 static void sprom_extract_r45(struct ssb_sprom *out, const u16 *in)
 {
 	int i;
@@ -471,6 +511,8 @@ static void sprom_extract_r45(struct ssb_sprom *out, const u16 *in)
 	memcpy(&out->antenna_gain.ghz5, &out->antenna_gain.ghz24,
 	       sizeof(out->antenna_gain.ghz5));
 
+	sprom_extract_r458(out, in);
+
 	/* TODO - get remaining rev 4 stuff needed */
 }
 
@@ -561,6 +603,8 @@ static void sprom_extract_r8(struct ssb_sprom *out, const u16 *in)
 	memcpy(&out->antenna_gain.ghz5, &out->antenna_gain.ghz24,
 	       sizeof(out->antenna_gain.ghz5));
 
+	sprom_extract_r458(out, in);
+
 	/* TODO - get remaining rev 8 stuff needed */
 }
 
diff --git a/include/linux/ssb/ssb.h b/include/linux/ssb/ssb.h
index 623b704fdc42..9659eff52ca2 100644
--- a/include/linux/ssb/ssb.h
+++ b/include/linux/ssb/ssb.h
@@ -55,6 +55,10 @@ struct ssb_sprom {
 	u8 tri5gl;		/* 5.2GHz TX isolation */
 	u8 tri5g;		/* 5.3GHz TX isolation */
 	u8 tri5gh;		/* 5.8GHz TX isolation */
+	u8 txpid2g[4];		/* 2GHz TX power index */
+	u8 txpid5gl[4];		/* 4.9 - 5.1GHz TX power index */
+	u8 txpid5g[4];		/* 5.1 - 5.5GHz TX power index */
+	u8 txpid5gh[4];		/* 5.5 - ...GHz TX power index */
 	u8 rxpo2g;		/* 2GHz RX power offset */
 	u8 rxpo5g;		/* 5GHz RX power offset */
 	u8 rssisav2g;		/* 2GHz RSSI params */
diff --git a/include/linux/ssb/ssb_regs.h b/include/linux/ssb/ssb_regs.h
index 11daf9c140e7..489f7b6d61c5 100644
--- a/include/linux/ssb/ssb_regs.h
+++ b/include/linux/ssb/ssb_regs.h
@@ -299,6 +299,46 @@
 #define  SSB_SPROM4_AGAIN2_SHIFT	0
 #define  SSB_SPROM4_AGAIN3		0xFF00	/* Antenna 3 */
 #define  SSB_SPROM4_AGAIN3_SHIFT	8
+#define SSB_SPROM4_TXPID2G01		0x0062 	/* TX Power Index 2GHz */
+#define  SSB_SPROM4_TXPID2G0		0x00FF
+#define  SSB_SPROM4_TXPID2G0_SHIFT	0
+#define  SSB_SPROM4_TXPID2G1		0xFF00
+#define  SSB_SPROM4_TXPID2G1_SHIFT	8
+#define SSB_SPROM4_TXPID2G23		0x0064 	/* TX Power Index 2GHz */
+#define  SSB_SPROM4_TXPID2G2		0x00FF
+#define  SSB_SPROM4_TXPID2G2_SHIFT	0
+#define  SSB_SPROM4_TXPID2G3		0xFF00
+#define  SSB_SPROM4_TXPID2G3_SHIFT	8
+#define SSB_SPROM4_TXPID5G01		0x0066 	/* TX Power Index 5GHz middle subband */
+#define  SSB_SPROM4_TXPID5G0		0x00FF
+#define  SSB_SPROM4_TXPID5G0_SHIFT	0
+#define  SSB_SPROM4_TXPID5G1		0xFF00
+#define  SSB_SPROM4_TXPID5G1_SHIFT	8
+#define SSB_SPROM4_TXPID5G23		0x0068 	/* TX Power Index 5GHz middle subband */
+#define  SSB_SPROM4_TXPID5G2		0x00FF
+#define  SSB_SPROM4_TXPID5G2_SHIFT	0
+#define  SSB_SPROM4_TXPID5G3		0xFF00
+#define  SSB_SPROM4_TXPID5G3_SHIFT	8
+#define SSB_SPROM4_TXPID5GL01		0x006A 	/* TX Power Index 5GHz low subband */
+#define  SSB_SPROM4_TXPID5GL0		0x00FF
+#define  SSB_SPROM4_TXPID5GL0_SHIFT	0
+#define  SSB_SPROM4_TXPID5GL1		0xFF00
+#define  SSB_SPROM4_TXPID5GL1_SHIFT	8
+#define SSB_SPROM4_TXPID5GL23		0x006C 	/* TX Power Index 5GHz low subband */
+#define  SSB_SPROM4_TXPID5GL2		0x00FF
+#define  SSB_SPROM4_TXPID5GL2_SHIFT	0
+#define  SSB_SPROM4_TXPID5GL3		0xFF00
+#define  SSB_SPROM4_TXPID5GL3_SHIFT	8
+#define SSB_SPROM4_TXPID5GH01		0x006E 	/* TX Power Index 5GHz high subband */
+#define  SSB_SPROM4_TXPID5GH0		0x00FF
+#define  SSB_SPROM4_TXPID5GH0_SHIFT	0
+#define  SSB_SPROM4_TXPID5GH1		0xFF00
+#define  SSB_SPROM4_TXPID5GH1_SHIFT	8
+#define SSB_SPROM4_TXPID5GH23		0x0070 	/* TX Power Index 5GHz high subband */
+#define  SSB_SPROM4_TXPID5GH2		0x00FF
+#define  SSB_SPROM4_TXPID5GH2_SHIFT	0
+#define  SSB_SPROM4_TXPID5GH3		0xFF00
+#define  SSB_SPROM4_TXPID5GH3_SHIFT	8
 #define SSB_SPROM4_MAXP_BG		0x0080  /* Max Power BG in path 1 */
 #define  SSB_SPROM4_MAXP_BG_MASK	0x00FF  /* Mask for Max Power BG */
 #define  SSB_SPROM4_ITSSI_BG		0xFF00	/* Mask for path 1 itssi_bg */
-- 
cgit v1.2.3-71-gd317


From 87de5ac782761a3ebf806e434e8c9cc205a87274 Mon Sep 17 00:00:00 2001
From: John Stultz <john.stultz@linaro.org>
Date: Mon, 20 Sep 2010 17:42:46 -0700
Subject: timers: Introduce timerlist infrastructure.

The timerlist infrastructure is a thin layer over the rbtree
code that implements a simple list of timers sorted by an
expires value, and a getnext function that provides a pointer
to the earliest timer.

This infrastructure allows drivers and other kernel infrastructure
to easily implement timers without duplicating code.

Signed-off-by: John Stultz <john.stultz@linaro.org>
LKML Reference: <1290136329-18291-2-git-send-email-john.stultz@linaro.org>
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
CC: Alessandro Zummo <a.zummo@towertech.it>
CC: Thomas Gleixner <tglx@linutronix.de>
CC: Richard Cochran <richardcochran@gmail.com>
---
 include/linux/timerlist.h |  37 +++++++++++++++
 lib/Makefile              |   2 +-
 lib/timerlist.c           | 118 ++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 156 insertions(+), 1 deletion(-)
 create mode 100644 include/linux/timerlist.h
 create mode 100644 lib/timerlist.c

(limited to 'include/linux')

diff --git a/include/linux/timerlist.h b/include/linux/timerlist.h
new file mode 100644
index 000000000000..c46b28ae6e4d
--- /dev/null
+++ b/include/linux/timerlist.h
@@ -0,0 +1,37 @@
+#ifndef _LINUX_TIMERLIST_H
+#define _LINUX_TIMERLIST_H
+
+#include <linux/rbtree.h>
+#include <linux/ktime.h>
+
+
+struct timerlist_node {
+	struct rb_node node;
+	ktime_t expires;
+};
+
+struct timerlist_head {
+	struct rb_root head;
+	struct timerlist_node *next;
+};
+
+
+extern void timerlist_add(struct timerlist_head *head,
+				struct timerlist_node *node);
+extern void timerlist_del(struct timerlist_head *head,
+				struct timerlist_node *node);
+extern struct timerlist_node *timerlist_getnext(struct timerlist_head *head);
+extern struct timerlist_node *timerlist_iterate_next(
+						struct timerlist_node *node);
+
+static inline void timerlist_init(struct timerlist_node *node)
+{
+	RB_CLEAR_NODE(&node->node);
+}
+
+static inline void timerlist_init_head(struct timerlist_head *head)
+{
+	head->head = RB_ROOT;
+	head->next = NULL;
+}
+#endif /* _LINUX_TIMERLIST_H */
diff --git a/lib/Makefile b/lib/Makefile
index e6a3763b8212..8b475cfb8195 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -8,7 +8,7 @@ KBUILD_CFLAGS = $(subst -pg,,$(ORIG_CFLAGS))
 endif
 
 lib-y := ctype.o string.o vsprintf.o cmdline.o \
-	 rbtree.o radix-tree.o dump_stack.o \
+	 rbtree.o radix-tree.o dump_stack.o timerlist.o\
 	 idr.o int_sqrt.o extable.o prio_tree.o \
 	 sha1.o irq_regs.o reciprocal_div.o argv_split.o \
 	 proportions.o prio_heap.o ratelimit.o show_mem.o \
diff --git a/lib/timerlist.c b/lib/timerlist.c
new file mode 100644
index 000000000000..9101b42fd13d
--- /dev/null
+++ b/lib/timerlist.c
@@ -0,0 +1,118 @@
+/*
+ *  Generic Timer-list
+ *
+ *  Manages a simple list of timers, ordered by expiration time.
+ *  Uses rbtrees for quick list adds and expiration.
+ *
+ *  NOTE: All of the following functions need to be serialized
+ *  to avoid races. No locking is done by this libary code.
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+#include <linux/timerlist.h>
+#include <linux/rbtree.h>
+
+/**
+ * timerlist_add - Adds timer to timerlist.
+ *
+ * @head: head of timerlist
+ * @node: timer node to be added
+ *
+ * Adds the timer node to the timerlist, sorted by the
+ * node's expires value.
+ */
+void timerlist_add(struct timerlist_head *head, struct timerlist_node *node)
+{
+	struct rb_node **p = &head->head.rb_node;
+	struct rb_node *parent = NULL;
+	struct timerlist_node  *ptr;
+
+	/* Make sure we don't add nodes that are already added */
+	WARN_ON_ONCE(!RB_EMPTY_NODE(&node->node));
+
+	while (*p) {
+		parent = *p;
+		ptr = rb_entry(parent, struct timerlist_node, node);
+		if (node->expires.tv64 < ptr->expires.tv64)
+			p = &(*p)->rb_left;
+		else
+			p = &(*p)->rb_right;
+	}
+	rb_link_node(&node->node, parent, p);
+	rb_insert_color(&node->node, &head->head);
+
+	if (!head->next || node->expires.tv64 < head->next->expires.tv64)
+		head->next = node;
+}
+
+/**
+ * timerlist_del - Removes a timer from the timerlist.
+ *
+ * @head: head of timerlist
+ * @node: timer node to be removed
+ *
+ * Removes the timer node from the timerlist.
+ */
+void timerlist_del(struct timerlist_head *head, struct timerlist_node *node)
+{
+	WARN_ON_ONCE(RB_EMPTY_NODE(&node->node));
+
+	/* update next pointer */
+	if (head->next == node) {
+		struct rb_node *rbn = rb_next(&node->node);
+
+		head->next = rbn ?
+			rb_entry(rbn, struct timerlist_node, node) : NULL;
+	}
+	rb_erase(&node->node, &head->head);
+	RB_CLEAR_NODE(&node->node);
+}
+
+
+/**
+ * timerlist_getnext - Returns the timer with the earlies expiration time
+ *
+ * @head: head of timerlist
+ *
+ * Returns a pointer to the timer node that has the
+ * earliest expiration time.
+ */
+struct timerlist_node *timerlist_getnext(struct timerlist_head *head)
+{
+	return head->next;
+}
+
+
+/**
+ * timerlist_iterate_next - Returns the timer after the provided timer
+ *
+ * @node: Pointer to a timer.
+ *
+ * Provides the timer that is after the given node. This is used, when
+ * necessary, to iterate through the list of timers in a timer list
+ * without modifying the list.
+ */
+struct timerlist_node *timerlist_iterate_next(struct timerlist_node *node)
+{
+	struct rb_node *next;
+
+	if (!node)
+		return NULL;
+	next = rb_next(&node->node);
+	if (!next)
+		return NULL;
+	return container_of(next, struct timerlist_node, node);
+}
-- 
cgit v1.2.3-71-gd317


From 287050d390264402e11bea8b811859e42e8faa29 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Thu, 2 Dec 2010 16:46:18 -0500
Subject: tracing: Add TRACE_EVENT_CONDITIONAL()

There are instances in the kernel that we only want to trace
a tracepoint when a certain condition is set. But we do not
want to test for that condition in the core kernel.
If we test for that condition before calling the tracepoin, then
we will be performing that test even when tracing is not enabled.
This is 99.99% of the time.

We currently can just filter out on that condition, but that happens
after we write to the trace buffer. We just wasted time writing to
the ring buffer for an event we never cared about.

This patch adds:

   TRACE_EVENT_CONDITION() and DEFINE_EVENT_CONDITION()

These have a new TP_CONDITION() argument that comes right after
the TP_ARGS().  This condition can use the parameters of TP_ARGS()
in the TRACE_EVENT() to determine if the tracepoint should be traced
or not. The TP_CONDITION() will be placed in a if (cond) trace;

For example, for the tracepoint sched_wakeup, it is useless to
trace a wakeup event where the caller never actually wakes
anything up (where success == 0). So adding:

	TP_CONDITION(success),

which uses the "success" parameter of the wakeup tracepoint
will have it only trace when we have successfully woken up a
task.

Acked-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Acked-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Arjan van de Ven <arjan@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/linux/tracepoint.h   | 29 +++++++++++++++++++++++------
 include/trace/define_trace.h | 15 +++++++++++++++
 2 files changed, 38 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h
index 5a6074fcd81d..d3e4f87e95c0 100644
--- a/include/linux/tracepoint.h
+++ b/include/linux/tracepoint.h
@@ -106,6 +106,7 @@ static inline void tracepoint_update_probe_range(struct tracepoint *begin,
 
 #define TP_PROTO(args...)	args
 #define TP_ARGS(args...)	args
+#define TP_CONDITION(args...)	args
 
 #ifdef CONFIG_TRACEPOINTS
 
@@ -119,12 +120,14 @@ static inline void tracepoint_update_probe_range(struct tracepoint *begin,
  * as "(void *, void)". The DECLARE_TRACE_NOARGS() will pass in just
  * "void *data", where as the DECLARE_TRACE() will pass in "void *data, proto".
  */
-#define __DO_TRACE(tp, proto, args)					\
+#define __DO_TRACE(tp, proto, args, cond)				\
 	do {								\
 		struct tracepoint_func *it_func_ptr;			\
 		void *it_func;						\
 		void *__data;						\
 									\
+		if (!(cond))						\
+			return;						\
 		rcu_read_lock_sched_notrace();				\
 		it_func_ptr = rcu_dereference_sched((tp)->funcs);	\
 		if (it_func_ptr) {					\
@@ -142,7 +145,7 @@ static inline void tracepoint_update_probe_range(struct tracepoint *begin,
  * not add unwanted padding between the beginning of the section and the
  * structure. Force alignment to the same alignment as the section start.
  */
-#define __DECLARE_TRACE(name, proto, args, data_proto, data_args)	\
+#define __DECLARE_TRACE(name, proto, args, cond, data_proto, data_args)	\
 	extern struct tracepoint __tracepoint_##name;			\
 	static inline void trace_##name(proto)				\
 	{								\
@@ -151,7 +154,8 @@ static inline void tracepoint_update_probe_range(struct tracepoint *begin,
 do_trace:								\
 			__DO_TRACE(&__tracepoint_##name,		\
 				TP_PROTO(data_proto),			\
-				TP_ARGS(data_args));			\
+				TP_ARGS(data_args),			\
+				TP_CONDITION(cond));			\
 	}								\
 	static inline int						\
 	register_trace_##name(void (*probe)(data_proto), void *data)	\
@@ -186,7 +190,7 @@ do_trace:								\
 	EXPORT_SYMBOL(__tracepoint_##name)
 
 #else /* !CONFIG_TRACEPOINTS */
-#define __DECLARE_TRACE(name, proto, args, data_proto, data_args)	\
+#define __DECLARE_TRACE(name, proto, args, cond, data_proto, data_args)	\
 	static inline void trace_##name(proto)				\
 	{ }								\
 	static inline int						\
@@ -227,13 +231,18 @@ do_trace:								\
  * "void *__data, proto" as the callback prototype.
  */
 #define DECLARE_TRACE_NOARGS(name)					\
-		__DECLARE_TRACE(name, void, , void *__data, __data)
+		__DECLARE_TRACE(name, void, , 1, void *__data, __data)
 
 #define DECLARE_TRACE(name, proto, args)				\
-		__DECLARE_TRACE(name, PARAMS(proto), PARAMS(args),	\
+		__DECLARE_TRACE(name, PARAMS(proto), PARAMS(args), 1,	\
 				PARAMS(void *__data, proto),		\
 				PARAMS(__data, args))
 
+#define DECLARE_TRACE_CONDITION(name, proto, args, cond)		\
+	__DECLARE_TRACE(name, PARAMS(proto), PARAMS(args), PARAMS(cond), \
+			PARAMS(void *__data, proto),			\
+			PARAMS(__data, args))
+
 #define TRACE_EVENT_FLAGS(event, flag)
 
 #endif /* DECLARE_TRACE */
@@ -349,12 +358,20 @@ do_trace:								\
 	DECLARE_TRACE(name, PARAMS(proto), PARAMS(args))
 #define DEFINE_EVENT_PRINT(template, name, proto, args, print)	\
 	DECLARE_TRACE(name, PARAMS(proto), PARAMS(args))
+#define DEFINE_EVENT_CONDITION(template, name, proto,		\
+			       args, cond)			\
+	DECLARE_TRACE_CONDITION(name, PARAMS(proto),		\
+				PARAMS(args), PARAMS(cond))
 
 #define TRACE_EVENT(name, proto, args, struct, assign, print)	\
 	DECLARE_TRACE(name, PARAMS(proto), PARAMS(args))
 #define TRACE_EVENT_FN(name, proto, args, struct,		\
 		assign, print, reg, unreg)			\
 	DECLARE_TRACE(name, PARAMS(proto), PARAMS(args))
+#define TRACE_EVENT_CONDITION(name, proto, args, cond,		\
+			      struct, assign, print)		\
+	DECLARE_TRACE_CONDITION(name, PARAMS(proto),		\
+				PARAMS(args), PARAMS(cond))
 
 #define TRACE_EVENT_FLAGS(event, flag)
 
diff --git a/include/trace/define_trace.h b/include/trace/define_trace.h
index 1dfab5401511..b0b4eb24d592 100644
--- a/include/trace/define_trace.h
+++ b/include/trace/define_trace.h
@@ -26,6 +26,15 @@
 #define TRACE_EVENT(name, proto, args, tstruct, assign, print)	\
 	DEFINE_TRACE(name)
 
+#undef TRACE_EVENT_CONDITION
+#define TRACE_EVENT_CONDITION(name, proto, args, cond, tstruct, assign, print) \
+	TRACE_EVENT(name,						\
+		PARAMS(proto),						\
+		PARAMS(args),						\
+		PARAMS(tstruct),					\
+		PARAMS(assign),						\
+		PARAMS(print))
+
 #undef TRACE_EVENT_FN
 #define TRACE_EVENT_FN(name, proto, args, tstruct,		\
 		assign, print, reg, unreg)			\
@@ -39,6 +48,10 @@
 #define DEFINE_EVENT_PRINT(template, name, proto, args, print)	\
 	DEFINE_TRACE(name)
 
+#undef DEFINE_EVENT_CONDITION
+#define DEFINE_EVENT_CONDITION(template, name, proto, args, cond) \
+	DEFINE_EVENT(template, name, PARAMS(proto), PARAMS(args))
+
 #undef DECLARE_TRACE
 #define DECLARE_TRACE(name, proto, args)	\
 	DEFINE_TRACE(name)
@@ -75,9 +88,11 @@
 
 #undef TRACE_EVENT
 #undef TRACE_EVENT_FN
+#undef TRACE_EVENT_CONDITION
 #undef DECLARE_EVENT_CLASS
 #undef DEFINE_EVENT
 #undef DEFINE_EVENT_PRINT
+#undef DEFINE_EVENT_CONDITION
 #undef TRACE_HEADER_MULTI_READ
 #undef DECLARE_TRACE
 
-- 
cgit v1.2.3-71-gd317


From 6844c09d849aeb00e8ddfe9525e8567a531c22d0 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Fri, 3 Dec 2010 16:36:35 -0200
Subject: perf events: Separate the routines handling the PERF_SAMPLE_ identity
 fields
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Those will be made available in sample like events like MMAP, EXEC, etc in a
followup patch. So precalculate the extra id header space and have a separate
routine to fill them up.

V2: Thomas noticed that the id header needs to be precalculated at
inherit_events too:

LKML-Reference: <alpine.LFD.2.00.1012031245220.2653@localhost6.localdomain6>

Tested-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Ian Munsie <imunsie@au1.ibm.com>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Acked-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Frédéric Weisbecker <fweisbec@gmail.com>
Cc: Ian Munsie <imunsie@au1.ibm.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
LKML-Reference: <1291318772-30880-2-git-send-email-acme@infradead.org>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 include/linux/perf_event.h |   1 +
 kernel/perf_event.c        | 129 ++++++++++++++++++++++++++-------------------
 2 files changed, 76 insertions(+), 54 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index adf6d9931643..b9950b1620d8 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -759,6 +759,7 @@ struct perf_event {
 
 	struct perf_event_attr		attr;
 	u16				header_size;
+	u16				id_header_size;
 	u16				read_size;
 	struct hw_perf_event		hw;
 
diff --git a/kernel/perf_event.c b/kernel/perf_event.c
index 7961b27aceea..a04799769566 100644
--- a/kernel/perf_event.c
+++ b/kernel/perf_event.c
@@ -133,6 +133,28 @@ static void unclone_ctx(struct perf_event_context *ctx)
 	}
 }
 
+static u32 perf_event_pid(struct perf_event *event, struct task_struct *p)
+{
+	/*
+	 * only top level events have the pid namespace they were created in
+	 */
+	if (event->parent)
+		event = event->parent;
+
+	return task_tgid_nr_ns(p, event->ns);
+}
+
+static u32 perf_event_tid(struct perf_event *event, struct task_struct *p)
+{
+	/*
+	 * only top level events have the pid namespace they were created in
+	 */
+	if (event->parent)
+		event = event->parent;
+
+	return task_pid_nr_ns(p, event->ns);
+}
+
 /*
  * If we inherit events we want to return the parent event id
  * to userspace.
@@ -351,15 +373,30 @@ static void perf_event__header_size(struct perf_event *event)
 	if (sample_type & PERF_SAMPLE_IP)
 		size += sizeof(data->ip);
 
+	if (sample_type & PERF_SAMPLE_ADDR)
+		size += sizeof(data->addr);
+
+	if (sample_type & PERF_SAMPLE_PERIOD)
+		size += sizeof(data->period);
+
+	if (sample_type & PERF_SAMPLE_READ)
+		size += event->read_size;
+
+	event->header_size = size;
+}
+
+static void perf_event__id_header_size(struct perf_event *event)
+{
+	struct perf_sample_data *data;
+	u64 sample_type = event->attr.sample_type;
+	u16 size = 0;
+
 	if (sample_type & PERF_SAMPLE_TID)
 		size += sizeof(data->tid_entry);
 
 	if (sample_type & PERF_SAMPLE_TIME)
 		size += sizeof(data->time);
 
-	if (sample_type & PERF_SAMPLE_ADDR)
-		size += sizeof(data->addr);
-
 	if (sample_type & PERF_SAMPLE_ID)
 		size += sizeof(data->id);
 
@@ -369,13 +406,7 @@ static void perf_event__header_size(struct perf_event *event)
 	if (sample_type & PERF_SAMPLE_CPU)
 		size += sizeof(data->cpu_entry);
 
-	if (sample_type & PERF_SAMPLE_PERIOD)
-		size += sizeof(data->period);
-
-	if (sample_type & PERF_SAMPLE_READ)
-		size += event->read_size;
-
-	event->header_size = size;
+	event->id_header_size = size;
 }
 
 static void perf_group_attach(struct perf_event *event)
@@ -3357,6 +3388,36 @@ __always_inline void perf_output_copy(struct perf_output_handle *handle,
 	} while (len);
 }
 
+static void perf_event_header__init_id(struct perf_event_header *header,
+				       struct perf_sample_data *data,
+				       struct perf_event *event)
+{
+	u64 sample_type = event->attr.sample_type;
+
+	data->type = sample_type;
+	header->size += event->id_header_size;
+
+	if (sample_type & PERF_SAMPLE_TID) {
+		/* namespace issues */
+		data->tid_entry.pid = perf_event_pid(event, current);
+		data->tid_entry.tid = perf_event_tid(event, current);
+	}
+
+	if (sample_type & PERF_SAMPLE_TIME)
+		data->time = perf_clock();
+
+	if (sample_type & PERF_SAMPLE_ID)
+		data->id = primary_event_id(event);
+
+	if (sample_type & PERF_SAMPLE_STREAM_ID)
+		data->stream_id = event->id;
+
+	if (sample_type & PERF_SAMPLE_CPU) {
+		data->cpu_entry.cpu	 = raw_smp_processor_id();
+		data->cpu_entry.reserved = 0;
+	}
+}
+
 int perf_output_begin(struct perf_output_handle *handle,
 		      struct perf_event *event, unsigned int size,
 		      int nmi, int sample)
@@ -3459,28 +3520,6 @@ void perf_output_end(struct perf_output_handle *handle)
 	rcu_read_unlock();
 }
 
-static u32 perf_event_pid(struct perf_event *event, struct task_struct *p)
-{
-	/*
-	 * only top level events have the pid namespace they were created in
-	 */
-	if (event->parent)
-		event = event->parent;
-
-	return task_tgid_nr_ns(p, event->ns);
-}
-
-static u32 perf_event_tid(struct perf_event *event, struct task_struct *p)
-{
-	/*
-	 * only top level events have the pid namespace they were created in
-	 */
-	if (event->parent)
-		event = event->parent;
-
-	return task_pid_nr_ns(p, event->ns);
-}
-
 static void perf_output_read_one(struct perf_output_handle *handle,
 				 struct perf_event *event,
 				 u64 enabled, u64 running)
@@ -3655,37 +3694,17 @@ void perf_prepare_sample(struct perf_event_header *header,
 {
 	u64 sample_type = event->attr.sample_type;
 
-	data->type = sample_type;
-
 	header->type = PERF_RECORD_SAMPLE;
 	header->size = sizeof(*header) + event->header_size;
 
 	header->misc = 0;
 	header->misc |= perf_misc_flags(regs);
 
+	perf_event_header__init_id(header, data, event);
+
 	if (sample_type & PERF_SAMPLE_IP)
 		data->ip = perf_instruction_pointer(regs);
 
-	if (sample_type & PERF_SAMPLE_TID) {
-		/* namespace issues */
-		data->tid_entry.pid = perf_event_pid(event, current);
-		data->tid_entry.tid = perf_event_tid(event, current);
-	}
-
-	if (sample_type & PERF_SAMPLE_TIME)
-		data->time = perf_clock();
-
-	if (sample_type & PERF_SAMPLE_ID)
-		data->id = primary_event_id(event);
-
-	if (sample_type & PERF_SAMPLE_STREAM_ID)
-		data->stream_id = event->id;
-
-	if (sample_type & PERF_SAMPLE_CPU) {
-		data->cpu_entry.cpu		= raw_smp_processor_id();
-		data->cpu_entry.reserved	= 0;
-	}
-
 	if (sample_type & PERF_SAMPLE_CALLCHAIN) {
 		int size = 1;
 
@@ -5745,6 +5764,7 @@ SYSCALL_DEFINE5(perf_event_open,
 	 * Precalculate sample_data sizes
 	 */
 	perf_event__header_size(event);
+	perf_event__id_header_size(event);
 
 	/*
 	 * Drop the reference on the group_event after placing the
@@ -6102,6 +6122,7 @@ inherit_event(struct perf_event *parent_event,
 	 * Precalculate sample_data sizes
 	 */
 	perf_event__header_size(child_event);
+	perf_event__id_header_size(child_event);
 
 	/*
 	 * Link it up in the child's context:
-- 
cgit v1.2.3-71-gd317


From c980d1091810df13f21aabbce545fd98f545bbf7 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Sat, 4 Dec 2010 23:02:20 -0200
Subject: perf events: Make sample_type identity fields available in all
 PERF_RECORD_ events
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

If perf_event_attr.sample_id_all is set it will add the PERF_SAMPLE_ identity
info:

TID, TIME, ID, CPU, STREAM_ID

As a trailer, so that older perf tools can process new files, just ignoring the
extra payload.

With this its possible to do further analysis on problems in the event stream,
like detecting reordering of MMAP and FORK events, etc.

V2: Fixup header size in comm, mmap and task processing, as we have to take into
account different sample_types for each matching event, noticed by Thomas Gleixner.

Thomas also noticed a problem in v2 where if we didn't had space in the buffer we
wouldn't restore the header size.

Tested-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Ian Munsie <imunsie@au1.ibm.com>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Acked-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Frédéric Weisbecker <fweisbec@gmail.com>
Cc: Ian Munsie <imunsie@au1.ibm.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
LKML-Reference: <new-submission>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 include/linux/perf_event.h |  12 ++++-
 kernel/perf_event.c        | 108 ++++++++++++++++++++++++++++++++++++++-------
 2 files changed, 102 insertions(+), 18 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index b9950b1620d8..2814ead4adb8 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -215,8 +215,9 @@ struct perf_event_attr {
 				 */
 				precise_ip     :  2, /* skid constraint       */
 				mmap_data      :  1, /* non-exec mmap data    */
+				sample_id_all  :  1, /* sample_type all events */
 
-				__reserved_1   : 46;
+				__reserved_1   : 45;
 
 	union {
 		__u32		wakeup_events;	  /* wakeup every n events */
@@ -327,6 +328,15 @@ struct perf_event_header {
 enum perf_event_type {
 
 	/*
+	 * If perf_event_attr.sample_id_all is set then all event types will
+	 * have the sample_type selected fields related to where/when
+	 * (identity) an event took place (TID, TIME, ID, CPU, STREAM_ID)
+	 * described in PERF_RECORD_SAMPLE below, it will be stashed just after
+	 * the perf_event_header and the fields already present for the existing
+	 * fields, i.e. at the end of the payload. That way a newer perf.data
+	 * file will be supported by older perf tools, with these new optional
+	 * fields being ignored.
+	 *
 	 * The MMAP events record the PROT_EXEC mappings so that we can
 	 * correlate userspace IPs to code. They have the following structure:
 	 *
diff --git a/kernel/perf_event.c b/kernel/perf_event.c
index a04799769566..77ad22c00b9d 100644
--- a/kernel/perf_event.c
+++ b/kernel/perf_event.c
@@ -3388,9 +3388,9 @@ __always_inline void perf_output_copy(struct perf_output_handle *handle,
 	} while (len);
 }
 
-static void perf_event_header__init_id(struct perf_event_header *header,
-				       struct perf_sample_data *data,
-				       struct perf_event *event)
+static void __perf_event_header__init_id(struct perf_event_header *header,
+					 struct perf_sample_data *data,
+					 struct perf_event *event)
 {
 	u64 sample_type = event->attr.sample_type;
 
@@ -3418,6 +3418,43 @@ static void perf_event_header__init_id(struct perf_event_header *header,
 	}
 }
 
+static void perf_event_header__init_id(struct perf_event_header *header,
+				       struct perf_sample_data *data,
+				       struct perf_event *event)
+{
+	if (event->attr.sample_id_all)
+		__perf_event_header__init_id(header, data, event);
+}
+
+static void __perf_event__output_id_sample(struct perf_output_handle *handle,
+					   struct perf_sample_data *data)
+{
+	u64 sample_type = data->type;
+
+	if (sample_type & PERF_SAMPLE_TID)
+		perf_output_put(handle, data->tid_entry);
+
+	if (sample_type & PERF_SAMPLE_TIME)
+		perf_output_put(handle, data->time);
+
+	if (sample_type & PERF_SAMPLE_ID)
+		perf_output_put(handle, data->id);
+
+	if (sample_type & PERF_SAMPLE_STREAM_ID)
+		perf_output_put(handle, data->stream_id);
+
+	if (sample_type & PERF_SAMPLE_CPU)
+		perf_output_put(handle, data->cpu_entry);
+}
+
+static void perf_event__output_id_sample(struct perf_event *event,
+					 struct perf_output_handle *handle,
+					 struct perf_sample_data *sample)
+{
+	if (event->attr.sample_id_all)
+		__perf_event__output_id_sample(handle, sample);
+}
+
 int perf_output_begin(struct perf_output_handle *handle,
 		      struct perf_event *event, unsigned int size,
 		      int nmi, int sample)
@@ -3425,6 +3462,7 @@ int perf_output_begin(struct perf_output_handle *handle,
 	struct perf_buffer *buffer;
 	unsigned long tail, offset, head;
 	int have_lost;
+	struct perf_sample_data sample_data;
 	struct {
 		struct perf_event_header header;
 		u64			 id;
@@ -3451,8 +3489,12 @@ int perf_output_begin(struct perf_output_handle *handle,
 		goto out;
 
 	have_lost = local_read(&buffer->lost);
-	if (have_lost)
-		size += sizeof(lost_event);
+	if (have_lost) {
+		lost_event.header.size = sizeof(lost_event);
+		perf_event_header__init_id(&lost_event.header, &sample_data,
+					   event);
+		size += lost_event.header.size;
+	}
 
 	perf_output_get_handle(handle);
 
@@ -3483,11 +3525,11 @@ int perf_output_begin(struct perf_output_handle *handle,
 	if (have_lost) {
 		lost_event.header.type = PERF_RECORD_LOST;
 		lost_event.header.misc = 0;
-		lost_event.header.size = sizeof(lost_event);
 		lost_event.id          = event->id;
 		lost_event.lost        = local_xchg(&buffer->lost, 0);
 
 		perf_output_put(handle, lost_event);
+		perf_event__output_id_sample(event, handle, &sample_data);
 	}
 
 	return 0;
@@ -3700,7 +3742,7 @@ void perf_prepare_sample(struct perf_event_header *header,
 	header->misc = 0;
 	header->misc |= perf_misc_flags(regs);
 
-	perf_event_header__init_id(header, data, event);
+	__perf_event_header__init_id(header, data, event);
 
 	if (sample_type & PERF_SAMPLE_IP)
 		data->ip = perf_instruction_pointer(regs);
@@ -3768,6 +3810,7 @@ perf_event_read_event(struct perf_event *event,
 			struct task_struct *task)
 {
 	struct perf_output_handle handle;
+	struct perf_sample_data sample;
 	struct perf_read_event read_event = {
 		.header = {
 			.type = PERF_RECORD_READ,
@@ -3779,12 +3822,14 @@ perf_event_read_event(struct perf_event *event,
 	};
 	int ret;
 
+	perf_event_header__init_id(&read_event.header, &sample, event);
 	ret = perf_output_begin(&handle, event, read_event.header.size, 0, 0);
 	if (ret)
 		return;
 
 	perf_output_put(&handle, read_event);
 	perf_output_read(&handle, event);
+	perf_event__output_id_sample(event, &handle, &sample);
 
 	perf_output_end(&handle);
 }
@@ -3814,14 +3859,16 @@ static void perf_event_task_output(struct perf_event *event,
 				     struct perf_task_event *task_event)
 {
 	struct perf_output_handle handle;
+	struct perf_sample_data	sample;
 	struct task_struct *task = task_event->task;
-	int size, ret;
+	int ret, size = task_event->event_id.header.size;
 
-	size  = task_event->event_id.header.size;
-	ret = perf_output_begin(&handle, event, size, 0, 0);
+	perf_event_header__init_id(&task_event->event_id.header, &sample, event);
 
+	ret = perf_output_begin(&handle, event,
+				task_event->event_id.header.size, 0, 0);
 	if (ret)
-		return;
+		goto out;
 
 	task_event->event_id.pid = perf_event_pid(event, task);
 	task_event->event_id.ppid = perf_event_pid(event, current);
@@ -3831,7 +3878,11 @@ static void perf_event_task_output(struct perf_event *event,
 
 	perf_output_put(&handle, task_event->event_id);
 
+	perf_event__output_id_sample(event, &handle, &sample);
+
 	perf_output_end(&handle);
+out:
+	task_event->event_id.header.size = size;
 }
 
 static int perf_event_task_match(struct perf_event *event)
@@ -3944,11 +3995,16 @@ static void perf_event_comm_output(struct perf_event *event,
 				     struct perf_comm_event *comm_event)
 {
 	struct perf_output_handle handle;
+	struct perf_sample_data sample;
 	int size = comm_event->event_id.header.size;
-	int ret = perf_output_begin(&handle, event, size, 0, 0);
+	int ret;
+
+	perf_event_header__init_id(&comm_event->event_id.header, &sample, event);
+	ret = perf_output_begin(&handle, event,
+				comm_event->event_id.header.size, 0, 0);
 
 	if (ret)
-		return;
+		goto out;
 
 	comm_event->event_id.pid = perf_event_pid(event, comm_event->task);
 	comm_event->event_id.tid = perf_event_tid(event, comm_event->task);
@@ -3956,7 +4012,12 @@ static void perf_event_comm_output(struct perf_event *event,
 	perf_output_put(&handle, comm_event->event_id);
 	perf_output_copy(&handle, comm_event->comm,
 				   comm_event->comm_size);
+
+	perf_event__output_id_sample(event, &handle, &sample);
+
 	perf_output_end(&handle);
+out:
+	comm_event->event_id.header.size = size;
 }
 
 static int perf_event_comm_match(struct perf_event *event)
@@ -4001,7 +4062,6 @@ static void perf_event_comm_event(struct perf_comm_event *comm_event)
 	comm_event->comm_size = size;
 
 	comm_event->event_id.header.size = sizeof(comm_event->event_id) + size;
-
 	rcu_read_lock();
 	list_for_each_entry_rcu(pmu, &pmus, entry) {
 		cpuctx = get_cpu_ptr(pmu->pmu_cpu_context);
@@ -4080,11 +4140,15 @@ static void perf_event_mmap_output(struct perf_event *event,
 				     struct perf_mmap_event *mmap_event)
 {
 	struct perf_output_handle handle;
+	struct perf_sample_data sample;
 	int size = mmap_event->event_id.header.size;
-	int ret = perf_output_begin(&handle, event, size, 0, 0);
+	int ret;
 
+	perf_event_header__init_id(&mmap_event->event_id.header, &sample, event);
+	ret = perf_output_begin(&handle, event,
+				mmap_event->event_id.header.size, 0, 0);
 	if (ret)
-		return;
+		goto out;
 
 	mmap_event->event_id.pid = perf_event_pid(event, current);
 	mmap_event->event_id.tid = perf_event_tid(event, current);
@@ -4092,7 +4156,12 @@ static void perf_event_mmap_output(struct perf_event *event,
 	perf_output_put(&handle, mmap_event->event_id);
 	perf_output_copy(&handle, mmap_event->file_name,
 				   mmap_event->file_size);
+
+	perf_event__output_id_sample(event, &handle, &sample);
+
 	perf_output_end(&handle);
+out:
+	mmap_event->event_id.header.size = size;
 }
 
 static int perf_event_mmap_match(struct perf_event *event,
@@ -4245,6 +4314,7 @@ void perf_event_mmap(struct vm_area_struct *vma)
 static void perf_log_throttle(struct perf_event *event, int enable)
 {
 	struct perf_output_handle handle;
+	struct perf_sample_data sample;
 	int ret;
 
 	struct {
@@ -4266,11 +4336,15 @@ static void perf_log_throttle(struct perf_event *event, int enable)
 	if (enable)
 		throttle_event.header.type = PERF_RECORD_UNTHROTTLE;
 
-	ret = perf_output_begin(&handle, event, sizeof(throttle_event), 1, 0);
+	perf_event_header__init_id(&throttle_event.header, &sample, event);
+
+	ret = perf_output_begin(&handle, event,
+				throttle_event.header.size, 1, 0);
 	if (ret)
 		return;
 
 	perf_output_put(&handle, throttle_event);
+	perf_event__output_id_sample(event, &handle, &sample);
 	perf_output_end(&handle);
 }
 
-- 
cgit v1.2.3-71-gd317


From cd7ebe2298ff1c3112232878678ce5fe6be8a15b Mon Sep 17 00:00:00 2001
From: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
Date: Fri, 3 Dec 2010 18:54:28 +0900
Subject: kprobes: Use text_poke_smp_batch for optimizing

Use text_poke_smp_batch() in optimization path for reducing
the number of stop_machine() issues. If the number of optimizing
probes is more than MAX_OPTIMIZE_PROBES(=256), kprobes optimizes
first MAX_OPTIMIZE_PROBES probes and kicks optimizer for
remaining probes.

Changes in v5:
- Use kick_kprobe_optimizer() instead of directly calling
  schedule_delayed_work().
- Rescheduling optimizer outside of kprobe mutex lock.

Changes in v2:
- Allocate code buffer and parameters in arch_init_kprobes()
  instead of using static arraies.
- Merge previous max optimization limit patch into this patch.
  So, this patch introduces upper limit of optimization at
  once.

Signed-off-by: Masami Hiramatsu <mhiramat@redhat.com>
Cc: Rusty Russell <rusty@rustcorp.com.au>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Ananth N Mavinakayanahalli <ananth@in.ibm.com>
Cc: Jason Baron <jbaron@redhat.com>
Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Cc: 2nddept-manager@sdl.hitachi.co.jp
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Steven Rostedt <rostedt@goodmis.org>
LKML-Reference: <20101203095428.2961.8994.stgit@ltc236.sdl.hitachi.co.jp>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/kprobes.c | 69 +++++++++++++++++++++++++++++++++++++++++------
 include/linux/kprobes.h   |  2 +-
 kernel/kprobes.c          | 17 +++++-------
 3 files changed, 69 insertions(+), 19 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c
index da51dc8e77cb..25a8af76feb5 100644
--- a/arch/x86/kernel/kprobes.c
+++ b/arch/x86/kernel/kprobes.c
@@ -1405,10 +1405,16 @@ int __kprobes arch_prepare_optimized_kprobe(struct optimized_kprobe *op)
 	return 0;
 }
 
-/* Replace a breakpoint (int3) with a relative jump.  */
-int __kprobes arch_optimize_kprobe(struct optimized_kprobe *op)
+#define MAX_OPTIMIZE_PROBES 256
+static struct text_poke_param *jump_poke_params;
+static struct jump_poke_buffer {
+	u8 buf[RELATIVEJUMP_SIZE];
+} *jump_poke_bufs;
+
+static void __kprobes setup_optimize_kprobe(struct text_poke_param *tprm,
+					    u8 *insn_buf,
+					    struct optimized_kprobe *op)
 {
-	unsigned char jmp_code[RELATIVEJUMP_SIZE];
 	s32 rel = (s32)((long)op->optinsn.insn -
 			((long)op->kp.addr + RELATIVEJUMP_SIZE));
 
@@ -1416,16 +1422,39 @@ int __kprobes arch_optimize_kprobe(struct optimized_kprobe *op)
 	memcpy(op->optinsn.copied_insn, op->kp.addr + INT3_SIZE,
 	       RELATIVE_ADDR_SIZE);
 
-	jmp_code[0] = RELATIVEJUMP_OPCODE;
-	*(s32 *)(&jmp_code[1]) = rel;
+	insn_buf[0] = RELATIVEJUMP_OPCODE;
+	*(s32 *)(&insn_buf[1]) = rel;
+
+	tprm->addr = op->kp.addr;
+	tprm->opcode = insn_buf;
+	tprm->len = RELATIVEJUMP_SIZE;
+}
+
+/*
+ * Replace breakpoints (int3) with relative jumps.
+ * Caller must call with locking kprobe_mutex and text_mutex.
+ */
+void __kprobes arch_optimize_kprobes(struct list_head *oplist)
+{
+	struct optimized_kprobe *op, *tmp;
+	int c = 0;
+
+	list_for_each_entry_safe(op, tmp, oplist, list) {
+		WARN_ON(kprobe_disabled(&op->kp));
+		/* Setup param */
+		setup_optimize_kprobe(&jump_poke_params[c],
+				      jump_poke_bufs[c].buf, op);
+		list_del_init(&op->list);
+		if (++c >= MAX_OPTIMIZE_PROBES)
+			break;
+	}
 
 	/*
 	 * text_poke_smp doesn't support NMI/MCE code modifying.
 	 * However, since kprobes itself also doesn't support NMI/MCE
 	 * code probing, it's not a problem.
 	 */
-	text_poke_smp(op->kp.addr, jmp_code, RELATIVEJUMP_SIZE);
-	return 0;
+	text_poke_smp_batch(jump_poke_params, c);
 }
 
 /* Replace a relative jump with a breakpoint (int3).  */
@@ -1457,11 +1486,35 @@ static int  __kprobes setup_detour_execution(struct kprobe *p,
 	}
 	return 0;
 }
+
+static int __kprobes init_poke_params(void)
+{
+	/* Allocate code buffer and parameter array */
+	jump_poke_bufs = kmalloc(sizeof(struct jump_poke_buffer) *
+				 MAX_OPTIMIZE_PROBES, GFP_KERNEL);
+	if (!jump_poke_bufs)
+		return -ENOMEM;
+
+	jump_poke_params = kmalloc(sizeof(struct text_poke_param) *
+				   MAX_OPTIMIZE_PROBES, GFP_KERNEL);
+	if (!jump_poke_params) {
+		kfree(jump_poke_bufs);
+		jump_poke_bufs = NULL;
+		return -ENOMEM;
+	}
+
+	return 0;
+}
+#else	/* !CONFIG_OPTPROBES */
+static int __kprobes init_poke_params(void)
+{
+	return 0;
+}
 #endif
 
 int __init arch_init_kprobes(void)
 {
-	return 0;
+	return init_poke_params();
 }
 
 int __kprobes arch_trampoline_kprobe(struct kprobe *p)
diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h
index e7d1b2e0070d..fe157ba6aa0e 100644
--- a/include/linux/kprobes.h
+++ b/include/linux/kprobes.h
@@ -275,7 +275,7 @@ extern int arch_prepared_optinsn(struct arch_optimized_insn *optinsn);
 extern int arch_check_optimized_kprobe(struct optimized_kprobe *op);
 extern int arch_prepare_optimized_kprobe(struct optimized_kprobe *op);
 extern void arch_remove_optimized_kprobe(struct optimized_kprobe *op);
-extern int  arch_optimize_kprobe(struct optimized_kprobe *op);
+extern void arch_optimize_kprobes(struct list_head *oplist);
 extern void arch_unoptimize_kprobe(struct optimized_kprobe *op);
 extern kprobe_opcode_t *get_optinsn_slot(void);
 extern void free_optinsn_slot(kprobe_opcode_t *slot, int dirty);
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index 134754d18bb4..531e10164836 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -480,8 +480,6 @@ static DECLARE_COMPLETION(optimizer_comp);
  */
 static __kprobes void do_optimize_kprobes(void)
 {
-	struct optimized_kprobe *op, *tmp;
-
 	/* Optimization never be done when disarmed */
 	if (kprobes_all_disarmed || !kprobes_allow_optimization ||
 	    list_empty(&optimizing_list))
@@ -499,12 +497,7 @@ static __kprobes void do_optimize_kprobes(void)
 	 */
 	get_online_cpus();
 	mutex_lock(&text_mutex);
-	list_for_each_entry_safe(op, tmp, &optimizing_list, list) {
-		WARN_ON(kprobe_disabled(&op->kp));
-		if (arch_optimize_kprobe(op) < 0)
-			op->kp.flags &= ~KPROBE_FLAG_OPTIMIZED;
-		list_del_init(&op->list);
-	}
+	arch_optimize_kprobes(&optimizing_list);
 	mutex_unlock(&text_mutex);
 	put_online_cpus();
 }
@@ -598,8 +591,12 @@ static __kprobes void kprobe_optimizer(struct work_struct *work)
 	mutex_unlock(&kprobe_mutex);
 	mutex_unlock(&module_mutex);
 
-	/* Wake up all waiters */
-	complete_all(&optimizer_comp);
+	/* Step 5: Kick optimizer again if needed */
+	if (!list_empty(&optimizing_list))
+		kick_kprobe_optimizer();
+	else
+		/* Wake up all waiters */
+		complete_all(&optimizer_comp);
 }
 
 /* Wait for completing optimization and unoptimization */
-- 
cgit v1.2.3-71-gd317


From f984ba4eb575e4a27ed28a76d4126d2aa9233c32 Mon Sep 17 00:00:00 2001
From: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
Date: Fri, 3 Dec 2010 18:54:34 +0900
Subject: kprobes: Use text_poke_smp_batch for unoptimizing

Use text_poke_smp_batch() on unoptimization path for reducing
the number of stop_machine() issues. If the number of
unoptimizing probes is more than MAX_OPTIMIZE_PROBES(=256),
kprobes unoptimizes first MAX_OPTIMIZE_PROBES probes and kicks
optimizer for remaining probes.

Signed-off-by: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
Cc: Rusty Russell <rusty@rustcorp.com.au>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Ananth N Mavinakayanahalli <ananth@in.ibm.com>
Cc: Jason Baron <jbaron@redhat.com>
Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Cc: 2nddept-manager@sdl.hitachi.co.jp
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Steven Rostedt <rostedt@goodmis.org>
LKML-Reference: <20101203095434.2961.22657.stgit@ltc236.sdl.hitachi.co.jp>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/kprobes.c | 40 ++++++++++++++++++++++++++++++++++++++++
 include/linux/kprobes.h   |  2 ++
 kernel/kprobes.c          | 10 ++++------
 3 files changed, 46 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c
index 25a8af76feb5..5940282bd2f9 100644
--- a/arch/x86/kernel/kprobes.c
+++ b/arch/x86/kernel/kprobes.c
@@ -1457,6 +1457,46 @@ void __kprobes arch_optimize_kprobes(struct list_head *oplist)
 	text_poke_smp_batch(jump_poke_params, c);
 }
 
+static void __kprobes setup_unoptimize_kprobe(struct text_poke_param *tprm,
+					      u8 *insn_buf,
+					      struct optimized_kprobe *op)
+{
+	/* Set int3 to first byte for kprobes */
+	insn_buf[0] = BREAKPOINT_INSTRUCTION;
+	memcpy(insn_buf + 1, op->optinsn.copied_insn, RELATIVE_ADDR_SIZE);
+
+	tprm->addr = op->kp.addr;
+	tprm->opcode = insn_buf;
+	tprm->len = RELATIVEJUMP_SIZE;
+}
+
+/*
+ * Recover original instructions and breakpoints from relative jumps.
+ * Caller must call with locking kprobe_mutex.
+ */
+extern void arch_unoptimize_kprobes(struct list_head *oplist,
+				    struct list_head *done_list)
+{
+	struct optimized_kprobe *op, *tmp;
+	int c = 0;
+
+	list_for_each_entry_safe(op, tmp, oplist, list) {
+		/* Setup param */
+		setup_unoptimize_kprobe(&jump_poke_params[c],
+					jump_poke_bufs[c].buf, op);
+		list_move(&op->list, done_list);
+		if (++c >= MAX_OPTIMIZE_PROBES)
+			break;
+	}
+
+	/*
+	 * text_poke_smp doesn't support NMI/MCE code modifying.
+	 * However, since kprobes itself also doesn't support NMI/MCE
+	 * code probing, it's not a problem.
+	 */
+	text_poke_smp_batch(jump_poke_params, c);
+}
+
 /* Replace a relative jump with a breakpoint (int3).  */
 void __kprobes arch_unoptimize_kprobe(struct optimized_kprobe *op)
 {
diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h
index fe157ba6aa0e..b78edb58ee66 100644
--- a/include/linux/kprobes.h
+++ b/include/linux/kprobes.h
@@ -276,6 +276,8 @@ extern int arch_check_optimized_kprobe(struct optimized_kprobe *op);
 extern int arch_prepare_optimized_kprobe(struct optimized_kprobe *op);
 extern void arch_remove_optimized_kprobe(struct optimized_kprobe *op);
 extern void arch_optimize_kprobes(struct list_head *oplist);
+extern void arch_unoptimize_kprobes(struct list_head *oplist,
+				    struct list_head *done_list);
 extern void arch_unoptimize_kprobe(struct optimized_kprobe *op);
 extern kprobe_opcode_t *get_optinsn_slot(void);
 extern void free_optinsn_slot(kprobe_opcode_t *slot, int dirty);
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index 531e10164836..7663e5df0e6f 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -517,9 +517,9 @@ static __kprobes void do_unoptimize_kprobes(struct list_head *free_list)
 	/* Ditto to do_optimize_kprobes */
 	get_online_cpus();
 	mutex_lock(&text_mutex);
-	list_for_each_entry_safe(op, tmp, &unoptimizing_list, list) {
-		/* Unoptimize kprobes */
-		arch_unoptimize_kprobe(op);
+	arch_unoptimize_kprobes(&unoptimizing_list, free_list);
+	/* Loop free_list for disarming */
+	list_for_each_entry_safe(op, tmp, free_list, list) {
 		/* Disarm probes if marked disabled */
 		if (kprobe_disabled(&op->kp))
 			arch_disarm_kprobe(&op->kp);
@@ -530,8 +530,6 @@ static __kprobes void do_unoptimize_kprobes(struct list_head *free_list)
 			 * (reclaiming is done by do_free_cleaned_kprobes.)
 			 */
 			hlist_del_rcu(&op->kp.hlist);
-			/* Move only unused probes on free_list */
-			list_move(&op->list, free_list);
 		} else
 			list_del_init(&op->list);
 	}
@@ -592,7 +590,7 @@ static __kprobes void kprobe_optimizer(struct work_struct *work)
 	mutex_unlock(&module_mutex);
 
 	/* Step 5: Kick optimizer again if needed */
-	if (!list_empty(&optimizing_list))
+	if (!list_empty(&optimizing_list) || !list_empty(&unoptimizing_list))
 		kick_kprobe_optimizer();
 	else
 		/* Wake up all waiters */
-- 
cgit v1.2.3-71-gd317


From 3110f5f5545a645c50ef66b1f705d08dfd1df404 Mon Sep 17 00:00:00 2001
From: Matt Carlson <mcarlson@broadcom.com>
Date: Mon, 6 Dec 2010 08:28:50 +0000
Subject: tg3: Move EEE definitions into mdio.h

In commit 52b02d04c801fff51ca49ad033210846d1713253 entitled "tg3: Add
EEE support", Ben Hutchings had commented that the EEE advertisement
register will be in a standard location.  This patch moves that
definition into mdio.h and changes the code to use it.

Signed-off-by: Matt Carlson <mcarlson@broadcom.com>
Reviewed-by: Benjamin Li <benli@broadcom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/tg3.c    | 16 ++++++++--------
 drivers/net/tg3.h    |  3 ---
 include/linux/mdio.h |  5 +++++
 3 files changed, 13 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/tg3.c b/drivers/net/tg3.c
index b8ae5e19ced5..1e7a135de7b3 100644
--- a/drivers/net/tg3.c
+++ b/drivers/net/tg3.c
@@ -32,6 +32,7 @@
 #include <linux/etherdevice.h>
 #include <linux/skbuff.h>
 #include <linux/ethtool.h>
+#include <linux/mdio.h>
 #include <linux/mii.h>
 #include <linux/phy.h>
 #include <linux/brcmphy.h>
@@ -1781,7 +1782,8 @@ static void tg3_phy_eee_adjust(struct tg3 *tp, u32 current_link_up)
 
 		tw32(TG3_CPMU_EEE_CTRL, eeectl);
 
-		tg3_phy_cl45_read(tp, 0x7, TG3_CL45_D7_EEERES_STAT, &val);
+		tg3_phy_cl45_read(tp, MDIO_MMD_AN,
+				  TG3_CL45_D7_EEERES_STAT, &val);
 
 		if (val == TG3_CL45_D7_EEERES_STAT_LP_1000T ||
 		    val == TG3_CL45_D7_EEERES_STAT_LP_100TX)
@@ -2987,16 +2989,14 @@ static void tg3_phy_copper_begin(struct tg3 *tp)
 		if (tp->link_config.autoneg == AUTONEG_ENABLE) {
 			/* Advertise 100-BaseTX EEE ability */
 			if (tp->link_config.advertising &
-			    (ADVERTISED_100baseT_Half |
-			     ADVERTISED_100baseT_Full))
-				val |= TG3_CL45_D7_EEEADV_CAP_100TX;
+			    ADVERTISED_100baseT_Full)
+				val |= MDIO_AN_EEE_ADV_100TX;
 			/* Advertise 1000-BaseT EEE ability */
 			if (tp->link_config.advertising &
-			    (ADVERTISED_1000baseT_Half |
-			     ADVERTISED_1000baseT_Full))
-				val |= TG3_CL45_D7_EEEADV_CAP_1000T;
+			    ADVERTISED_1000baseT_Full)
+				val |= MDIO_AN_EEE_ADV_1000T;
 		}
-		tg3_phy_cl45_write(tp, 0x7, TG3_CL45_D7_EEEADV_CAP, val);
+		tg3_phy_cl45_write(tp, MDIO_MMD_AN, MDIO_AN_EEE_ADV, val);
 
 		/* Turn off SM_DSP clock. */
 		val = MII_TG3_AUXCTL_SHDWSEL_AUXCTL |
diff --git a/drivers/net/tg3.h b/drivers/net/tg3.h
index 59b0e096149e..6e72c6bd2675 100644
--- a/drivers/net/tg3.h
+++ b/drivers/net/tg3.h
@@ -2172,9 +2172,6 @@
 #define MII_TG3_TEST1_CRC_EN		0x8000
 
 /* Clause 45 expansion registers */
-#define TG3_CL45_D7_EEEADV_CAP		0x003c
-#define TG3_CL45_D7_EEEADV_CAP_100TX	0x0002
-#define TG3_CL45_D7_EEEADV_CAP_1000T	0x0004
 #define TG3_CL45_D7_EEERES_STAT		0x803e
 #define TG3_CL45_D7_EEERES_STAT_LP_100TX	0x0002
 #define TG3_CL45_D7_EEERES_STAT_LP_1000T	0x0004
diff --git a/include/linux/mdio.h b/include/linux/mdio.h
index c779b49a1fda..b1494aced217 100644
--- a/include/linux/mdio.h
+++ b/include/linux/mdio.h
@@ -55,6 +55,7 @@
 #define MDIO_PCS_10GBRT_STAT2	33	/* 10GBASE-R/-T PCS status 2 */
 #define MDIO_AN_10GBT_CTRL	32	/* 10GBASE-T auto-negotiation control */
 #define MDIO_AN_10GBT_STAT	33	/* 10GBASE-T auto-negotiation status */
+#define MDIO_AN_EEE_ADV		60	/* EEE advertisement */
 
 /* LASI (Link Alarm Status Interrupt) registers, defined by XENPAK MSA. */
 #define MDIO_PMA_LASI_RXCTRL	0x9000	/* RX_ALARM control */
@@ -235,6 +236,10 @@
 #define MDIO_AN_10GBT_STAT_MS		0x4000	/* Master/slave config */
 #define MDIO_AN_10GBT_STAT_MSFLT	0x8000	/* Master/slave config fault */
 
+/* AN EEE Advertisement register. */
+#define MDIO_AN_EEE_ADV_100TX		0x0002	/* Advertise 100TX EEE cap */
+#define MDIO_AN_EEE_ADV_1000T		0x0004	/* Advertise 1000T EEE cap */
+
 /* LASI RX_ALARM control/status registers. */
 #define MDIO_PMA_LASI_RX_PHYXSLFLT	0x0001	/* PHY XS RX local fault */
 #define MDIO_PMA_LASI_RX_PCSLFLT	0x0008	/* PCS RX local fault */
-- 
cgit v1.2.3-71-gd317


From af5568843594fb71055debe36e521fa8072fcecc Mon Sep 17 00:00:00 2001
From: Bruno Randolf <br1@einfach.org>
Date: Thu, 2 Dec 2010 19:50:37 +0900
Subject: lib: Improve EWMA efficiency by using bitshifts

Using bitshifts instead of division and multiplication should improve
performance. That requires weight and factor to be powers of two, but i think
this is something we can live with.

Thanks to Peter Zijlstra for the improved formula!

Signed-off-by: Bruno Randolf <br1@einfach.org>

--

v2:	use log2.h functions
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/average.h |  4 +---
 lib/average.c           | 20 ++++++++++++--------
 2 files changed, 13 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/average.h b/include/linux/average.h
index 7706e40f95fa..c6028fd742c1 100644
--- a/include/linux/average.h
+++ b/include/linux/average.h
@@ -1,8 +1,6 @@
 #ifndef _LINUX_AVERAGE_H
 #define _LINUX_AVERAGE_H
 
-#include <linux/kernel.h>
-
 /* Exponentially weighted moving average (EWMA) */
 
 /* For more documentation see lib/average.c */
@@ -26,7 +24,7 @@ extern struct ewma *ewma_add(struct ewma *avg, unsigned long val);
  */
 static inline unsigned long ewma_read(const struct ewma *avg)
 {
-	return DIV_ROUND_CLOSEST(avg->internal, avg->factor);
+	return avg->internal >> avg->factor;
 }
 
 #endif /* _LINUX_AVERAGE_H */
diff --git a/lib/average.c b/lib/average.c
index f1d1b4660c42..5576c2841496 100644
--- a/lib/average.c
+++ b/lib/average.c
@@ -8,6 +8,7 @@
 #include <linux/module.h>
 #include <linux/average.h>
 #include <linux/bug.h>
+#include <linux/log2.h>
 
 /**
  * DOC: Exponentially Weighted Moving Average (EWMA)
@@ -24,18 +25,21 @@
  * ewma_init() - Initialize EWMA parameters
  * @avg: Average structure
  * @factor: Factor to use for the scaled up internal value. The maximum value
- *	of averages can be ULONG_MAX/(factor*weight).
+ *	of averages can be ULONG_MAX/(factor*weight). For performance reasons
+ *	factor has to be a power of 2.
  * @weight: Exponential weight, or decay rate. This defines how fast the
- *	influence of older values decreases. Has to be bigger than 1.
+ *	influence of older values decreases. For performance reasons weight has
+ *	to be a power of 2.
  *
  * Initialize the EWMA parameters for a given struct ewma @avg.
  */
 void ewma_init(struct ewma *avg, unsigned long factor, unsigned long weight)
 {
-	WARN_ON(weight <= 1 || factor == 0);
+	WARN_ON(!is_power_of_2(weight) || !is_power_of_2(factor));
+
+	avg->weight = ilog2(weight);
+	avg->factor = ilog2(factor);
 	avg->internal = 0;
-	avg->weight = weight;
-	avg->factor = factor;
 }
 EXPORT_SYMBOL(ewma_init);
 
@@ -49,9 +53,9 @@ EXPORT_SYMBOL(ewma_init);
 struct ewma *ewma_add(struct ewma *avg, unsigned long val)
 {
 	avg->internal = avg->internal  ?
-		(((avg->internal * (avg->weight - 1)) +
-			(val * avg->factor)) / avg->weight) :
-		(val * avg->factor);
+		(((avg->internal << avg->weight) - avg->internal) +
+			(val << avg->factor)) >> avg->weight :
+		(val << avg->factor);
 	return avg;
 }
 EXPORT_SYMBOL(ewma_add);
-- 
cgit v1.2.3-71-gd317


From 073285fd392f6dc901da7c698d46e1e2a7e26436 Mon Sep 17 00:00:00 2001
From: Alexey Orishko <alexey.orishko@gmail.com>
Date: Mon, 29 Nov 2010 23:23:27 +0000
Subject: usbnet: changes for upcoming cdc_ncm driver

Changes:
include/linux/usb/usbnet.h:
- a new flag to indicate driver's capability to accumulate IP packets in Tx
 direction and extract several packets from single skb in Rx direction.
drivers/net/usb/usbnet.c:
- the procedure of counting packets in usbnet was updated due to the
 accumulating of IP packets in the driver
- no short packets are sent if indicated by the flag in driver_info
 structure

Signed-off-by: Alexey Orishko <alexey.orishko@stericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/usb/usbnet.c   | 45 +++++++++++++++++++++++++++++++--------------
 include/linux/usb/usbnet.h |  6 ++++++
 2 files changed, 37 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/usb/usbnet.c b/drivers/net/usb/usbnet.c
index c04d49e31f81..cff74b81a7d2 100644
--- a/drivers/net/usb/usbnet.c
+++ b/drivers/net/usb/usbnet.c
@@ -391,14 +391,19 @@ static inline void rx_process (struct usbnet *dev, struct sk_buff *skb)
 		goto error;
 	// else network stack removes extra byte if we forced a short packet
 
-	if (skb->len)
-		usbnet_skb_return (dev, skb);
-	else {
-		netif_dbg(dev, rx_err, dev->net, "drop\n");
-error:
-		dev->net->stats.rx_errors++;
-		skb_queue_tail (&dev->done, skb);
+	if (skb->len) {
+		/* all data was already cloned from skb inside the driver */
+		if (dev->driver_info->flags & FLAG_MULTI_PACKET)
+			dev_kfree_skb_any(skb);
+		else
+			usbnet_skb_return(dev, skb);
+		return;
 	}
+
+	netif_dbg(dev, rx_err, dev->net, "drop\n");
+error:
+	dev->net->stats.rx_errors++;
+	skb_queue_tail(&dev->done, skb);
 }
 
 /*-------------------------------------------------------------------------*/
@@ -971,7 +976,8 @@ static void tx_complete (struct urb *urb)
 	struct usbnet		*dev = entry->dev;
 
 	if (urb->status == 0) {
-		dev->net->stats.tx_packets++;
+		if (!(dev->driver_info->flags & FLAG_MULTI_PACKET))
+			dev->net->stats.tx_packets++;
 		dev->net->stats.tx_bytes += entry->length;
 	} else {
 		dev->net->stats.tx_errors++;
@@ -1044,8 +1050,13 @@ netdev_tx_t usbnet_start_xmit (struct sk_buff *skb,
 	if (info->tx_fixup) {
 		skb = info->tx_fixup (dev, skb, GFP_ATOMIC);
 		if (!skb) {
-			netif_dbg(dev, tx_err, dev->net, "can't tx_fixup skb\n");
-			goto drop;
+			if (netif_msg_tx_err(dev)) {
+				netif_dbg(dev, tx_err, dev->net, "can't tx_fixup skb\n");
+				goto drop;
+			} else {
+				/* cdc_ncm collected packet; waits for more */
+				goto not_drop;
+			}
 		}
 	}
 	length = skb->len;
@@ -1067,13 +1078,18 @@ netdev_tx_t usbnet_start_xmit (struct sk_buff *skb,
 	/* don't assume the hardware handles USB_ZERO_PACKET
 	 * NOTE:  strictly conforming cdc-ether devices should expect
 	 * the ZLP here, but ignore the one-byte packet.
+	 * NOTE2: CDC NCM specification is different from CDC ECM when
+	 * handling ZLP/short packets, so cdc_ncm driver will make short
+	 * packet itself if needed.
 	 */
 	if (length % dev->maxpacket == 0) {
 		if (!(info->flags & FLAG_SEND_ZLP)) {
-			urb->transfer_buffer_length++;
-			if (skb_tailroom(skb)) {
-				skb->data[skb->len] = 0;
-				__skb_put(skb, 1);
+			if (!(info->flags & FLAG_MULTI_PACKET)) {
+				urb->transfer_buffer_length++;
+				if (skb_tailroom(skb)) {
+					skb->data[skb->len] = 0;
+					__skb_put(skb, 1);
+				}
 			}
 		} else
 			urb->transfer_flags |= URB_ZERO_PACKET;
@@ -1122,6 +1138,7 @@ netdev_tx_t usbnet_start_xmit (struct sk_buff *skb,
 		netif_dbg(dev, tx_err, dev->net, "drop, code %d\n", retval);
 drop:
 		dev->net->stats.tx_dropped++;
+not_drop:
 		if (skb)
 			dev_kfree_skb_any (skb);
 		usb_free_urb (urb);
diff --git a/include/linux/usb/usbnet.h b/include/linux/usb/usbnet.h
index 7ae27a473818..44842c8d38c0 100644
--- a/include/linux/usb/usbnet.h
+++ b/include/linux/usb/usbnet.h
@@ -97,6 +97,12 @@ struct driver_info {
 
 #define FLAG_LINK_INTR	0x0800		/* updates link (carrier) status */
 
+/*
+ * Indicates to usbnet, that USB driver accumulates multiple IP packets.
+ * Affects statistic (counters) and short packet handling.
+ */
+#define FLAG_MULTI_PACKET	0x1000
+
 	/* init device ... can sleep, or cause probe() failure */
 	int	(*bind)(struct usbnet *, struct usb_interface *);
 
-- 
cgit v1.2.3-71-gd317


From da2033c282264bfba4e339b7cb3df62adb5c5fc8 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Tue, 30 Nov 2010 21:45:56 +0000
Subject: filter: add SKF_AD_RXHASH and SKF_AD_CPU

Add SKF_AD_RXHASH and SKF_AD_CPU to filter ancillary mechanism,
to be able to build advanced filters.

This can help spreading packets on several sockets with a fast
selection, after RPS dispatch to N cpus for example, or to catch a
percentage of flows in one queue.

tcpdump -s 500 "cpu = 1" :

[0] ld CPU
[1] jeq #1  jt 2  jf 3
[2] ret #500
[3] ret #0

# take 12.5 % of flows (average)
tcpdump -s 1000 "rxhash & 7 = 2" :

[0] ld RXHASH
[1] and #7
[2] jeq #2  jt 3  jf 4
[3] ret #1000
[4] ret #0

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Cc: Rui <wirelesser@gmail.com>
Acked-by: Changli Gao <xiaosuo@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/filter.h | 4 +++-
 net/core/filter.c      | 6 ++++++
 2 files changed, 9 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/filter.h b/include/linux/filter.h
index 447a775878fb..5334adaf4072 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -124,7 +124,9 @@ struct sock_fprog {	/* Required for SO_ATTACH_FILTER. */
 #define SKF_AD_MARK 	20
 #define SKF_AD_QUEUE	24
 #define SKF_AD_HATYPE	28
-#define SKF_AD_MAX	32
+#define SKF_AD_RXHASH	32
+#define SKF_AD_CPU	36
+#define SKF_AD_MAX	40
 #define SKF_NET_OFF   (-0x100000)
 #define SKF_LL_OFF    (-0x200000)
 
diff --git a/net/core/filter.c b/net/core/filter.c
index a44d27f9f0f0..054e286861d2 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -375,6 +375,12 @@ load_b:
 				return 0;
 			A = skb->dev->type;
 			continue;
+		case SKF_AD_RXHASH:
+			A = skb->rxhash;
+			continue;
+		case SKF_AD_CPU:
+			A = raw_smp_processor_id();
+			continue;
 		case SKF_AD_NLATTR: {
 			struct nlattr *nla;
 
-- 
cgit v1.2.3-71-gd317


From 06a9701f4b3e3381dea96fee1cc8a3bb41b0a1f1 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Wed, 1 Dec 2010 01:37:42 +0000
Subject: __in_dev_get_rtnl() can use rtnl_dereference()

If caller holds RTNL, we dont need a memory barrier
(smp_read_barrier_depends) included in rcu_dereference().

Just use rtnl_dereference() to properly document the assertions.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/inetdevice.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/inetdevice.h b/include/linux/inetdevice.h
index 2b86eaf11773..ae8fdc54e0c0 100644
--- a/include/linux/inetdevice.h
+++ b/include/linux/inetdevice.h
@@ -222,7 +222,7 @@ static inline struct in_device *in_dev_get(const struct net_device *dev)
 
 static inline struct in_device *__in_dev_get_rtnl(const struct net_device *dev)
 {
-	return rcu_dereference_check(dev->ip_ptr, lockdep_rtnl_is_held());
+	return rtnl_dereference(dev->ip_ptr);
 }
 
 extern void in_dev_finish_destroy(struct in_device *idev);
-- 
cgit v1.2.3-71-gd317


From 45904f21655cf4f0ae7d0fab5906fe51bf56ecf4 Mon Sep 17 00:00:00 2001
From: Javier Cardona <javier@cozybit.com>
Date: Fri, 3 Dec 2010 09:20:40 +0100
Subject: nl80211/mac80211: define and allow configuring mesh element TTL

The TTL in path selection information elements is different from
the mesh ttl used in mesh data frames.  Version 7.03 of the 11s
draft calls this ttl 'Element TTL'.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/nl80211.h       | 4 ++++
 include/net/cfg80211.h        | 2 ++
 net/mac80211/cfg.c            | 2 ++
 net/mac80211/debugfs_netdev.c | 2 ++
 net/mac80211/mesh.c           | 1 +
 net/mac80211/mesh.h           | 2 ++
 net/mac80211/mesh_hwmp.c      | 9 +++++----
 net/mac80211/mesh_pathtbl.c   | 7 ++++---
 net/wireless/nl80211.c        | 5 +++++
 9 files changed, 27 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h
index 5cfa579df476..9e541452d805 100644
--- a/include/linux/nl80211.h
+++ b/include/linux/nl80211.h
@@ -1547,6 +1547,9 @@ enum nl80211_mntr_flags {
  * @NL80211_MESHCONF_TTL: specifies the value of TTL field set at a source mesh
  * point.
  *
+ * @NL80211_MESHCONF_ELEMENT_TTL: specifies the value of TTL field set at a
+ * source mesh point for path selection elements.
+ *
  * @NL80211_MESHCONF_AUTO_OPEN_PLINKS: whether we should automatically
  * open peer links when we detect compatible mesh peers.
  *
@@ -1593,6 +1596,7 @@ enum nl80211_meshconf_params {
 	NL80211_MESHCONF_HWMP_PREQ_MIN_INTERVAL,
 	NL80211_MESHCONF_HWMP_NET_DIAM_TRVS_TIME,
 	NL80211_MESHCONF_HWMP_ROOTMODE,
+	NL80211_MESHCONF_ELEMENT_TTL,
 
 	/* keep last */
 	__NL80211_MESHCONF_ATTR_AFTER_LAST,
diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 6b2af7aeddd3..93a4b2068334 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -624,6 +624,8 @@ struct mesh_config {
 	u16 dot11MeshMaxPeerLinks;
 	u8  dot11MeshMaxRetries;
 	u8  dot11MeshTTL;
+	/* ttl used in path selection information elements */
+	u8  element_ttl;
 	bool auto_open_plinks;
 	/* HWMP parameters */
 	u8  dot11MeshHWMPmaxPREQretries;
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index db134b500caa..ce6936890c26 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -1024,6 +1024,8 @@ static int ieee80211_set_mesh_params(struct wiphy *wiphy,
 		conf->dot11MeshMaxRetries = nconf->dot11MeshMaxRetries;
 	if (_chg_mesh_attr(NL80211_MESHCONF_TTL, mask))
 		conf->dot11MeshTTL = nconf->dot11MeshTTL;
+	if (_chg_mesh_attr(NL80211_MESHCONF_ELEMENT_TTL, mask))
+		conf->dot11MeshTTL = nconf->element_ttl;
 	if (_chg_mesh_attr(NL80211_MESHCONF_AUTO_OPEN_PLINKS, mask))
 		conf->auto_open_plinks = nconf->auto_open_plinks;
 	if (_chg_mesh_attr(NL80211_MESHCONF_HWMP_MAX_PREQ_RETRIES, mask))
diff --git a/net/mac80211/debugfs_netdev.c b/net/mac80211/debugfs_netdev.c
index cbdf36d7841c..2dabdf7680d0 100644
--- a/net/mac80211/debugfs_netdev.c
+++ b/net/mac80211/debugfs_netdev.c
@@ -251,6 +251,7 @@ IEEE80211_IF_FILE(dot11MeshConfirmTimeout,
 IEEE80211_IF_FILE(dot11MeshHoldingTimeout,
 		u.mesh.mshcfg.dot11MeshHoldingTimeout, DEC);
 IEEE80211_IF_FILE(dot11MeshTTL, u.mesh.mshcfg.dot11MeshTTL, DEC);
+IEEE80211_IF_FILE(element_ttl, u.mesh.mshcfg.element_ttl, DEC);
 IEEE80211_IF_FILE(auto_open_plinks, u.mesh.mshcfg.auto_open_plinks, DEC);
 IEEE80211_IF_FILE(dot11MeshMaxPeerLinks,
 		u.mesh.mshcfg.dot11MeshMaxPeerLinks, DEC);
@@ -355,6 +356,7 @@ static void add_mesh_config(struct ieee80211_sub_if_data *sdata)
 	MESHPARAMS_ADD(dot11MeshConfirmTimeout);
 	MESHPARAMS_ADD(dot11MeshHoldingTimeout);
 	MESHPARAMS_ADD(dot11MeshTTL);
+	MESHPARAMS_ADD(element_ttl);
 	MESHPARAMS_ADD(auto_open_plinks);
 	MESHPARAMS_ADD(dot11MeshMaxPeerLinks);
 	MESHPARAMS_ADD(dot11MeshHWMPactivePathTimeout);
diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c
index c8a4f19ed13b..78a36c79bdcc 100644
--- a/net/mac80211/mesh.c
+++ b/net/mac80211/mesh.c
@@ -668,6 +668,7 @@ void ieee80211_mesh_init_sdata(struct ieee80211_sub_if_data *sdata)
 	ifmsh->mshcfg.dot11MeshHoldingTimeout = MESH_HOLD_T;
 	ifmsh->mshcfg.dot11MeshMaxRetries = MESH_MAX_RETR;
 	ifmsh->mshcfg.dot11MeshTTL = MESH_TTL;
+	ifmsh->mshcfg.element_ttl = MESH_DEFAULT_ELEMENT_TTL;
 	ifmsh->mshcfg.auto_open_plinks = true;
 	ifmsh->mshcfg.dot11MeshMaxPeerLinks =
 		MESH_MAX_ESTAB_PLINKS;
diff --git a/net/mac80211/mesh.h b/net/mac80211/mesh.h
index 58e741128968..182942eeac4d 100644
--- a/net/mac80211/mesh.h
+++ b/net/mac80211/mesh.h
@@ -216,6 +216,8 @@ struct mesh_rmc {
 #define PERR_RCODE_NO_ROUTE     12
 #define PERR_RCODE_DEST_UNREACH 13
 
+#define MESH_DEFAULT_ELEMENT_TTL 31
+
 /* Public interfaces */
 /* Various */
 int ieee80211_fill_mesh_addresses(struct ieee80211_hdr *hdr, __le16 *fc,
diff --git a/net/mac80211/mesh_hwmp.c b/net/mac80211/mesh_hwmp.c
index 829e08a657d0..5bf64d7112b3 100644
--- a/net/mac80211/mesh_hwmp.c
+++ b/net/mac80211/mesh_hwmp.c
@@ -232,7 +232,7 @@ int mesh_path_error_tx(u8 ttl, u8 *target, __le32 target_sn,
 	*pos++ = WLAN_EID_PERR;
 	*pos++ = ie_len;
 	/* ttl */
-	*pos++ = MESH_TTL;
+	*pos++ = ttl;
 	/* number of destinations */
 	*pos++ = 1;
 	/*
@@ -522,7 +522,7 @@ static void hwmp_preq_frame_process(struct ieee80211_sub_if_data *sdata,
 
 	if (reply) {
 		lifetime = PREQ_IE_LIFETIME(preq_elem);
-		ttl = ifmsh->mshcfg.dot11MeshTTL;
+		ttl = ifmsh->mshcfg.element_ttl;
 		if (ttl != 0) {
 			mhwmp_dbg("replying to the PREQ\n");
 			mesh_path_sel_frame_tx(MPATH_PREP, 0, target_addr,
@@ -877,7 +877,7 @@ void mesh_path_start_discovery(struct ieee80211_sub_if_data *sdata)
 		sdata->u.mesh.last_sn_update = jiffies;
 	}
 	lifetime = default_lifetime(sdata);
-	ttl = sdata->u.mesh.mshcfg.dot11MeshTTL;
+	ttl = sdata->u.mesh.mshcfg.element_ttl;
 	if (ttl == 0) {
 		sdata->u.mesh.mshstats.dropped_frames_ttl++;
 		spin_unlock_bh(&mpath->state_lock);
@@ -1013,5 +1013,6 @@ mesh_path_tx_root_frame(struct ieee80211_sub_if_data *sdata)
 	mesh_path_sel_frame_tx(MPATH_RANN, 0, sdata->vif.addr,
 			       cpu_to_le32(++ifmsh->sn),
 			       0, NULL, 0, broadcast_addr,
-			       0, MESH_TTL, 0, 0, 0, sdata);
+			       0, sdata->u.mesh.mshcfg.element_ttl,
+			       0, 0, 0, sdata);
 }
diff --git a/net/mac80211/mesh_pathtbl.c b/net/mac80211/mesh_pathtbl.c
index 349e466cf08b..8d65b47d9837 100644
--- a/net/mac80211/mesh_pathtbl.c
+++ b/net/mac80211/mesh_pathtbl.c
@@ -467,8 +467,8 @@ void mesh_plink_broken(struct sta_info *sta)
 			mpath->flags &= ~MESH_PATH_ACTIVE;
 			++mpath->sn;
 			spin_unlock_bh(&mpath->state_lock);
-			mesh_path_error_tx(MESH_TTL, mpath->dst,
-					cpu_to_le32(mpath->sn),
+			mesh_path_error_tx(sdata->u.mesh.mshcfg.element_ttl,
+					mpath->dst, cpu_to_le32(mpath->sn),
 					cpu_to_le16(PERR_RCODE_DEST_UNREACH),
 					bcast, sdata);
 		} else
@@ -614,7 +614,8 @@ void mesh_path_discard_frame(struct sk_buff *skb,
 		mpath = mesh_path_lookup(da, sdata);
 		if (mpath)
 			sn = ++mpath->sn;
-		mesh_path_error_tx(MESH_TTL, skb->data, cpu_to_le32(sn),
+		mesh_path_error_tx(sdata->u.mesh.mshcfg.element_ttl, skb->data,
+				   cpu_to_le32(sn),
 				   cpu_to_le16(PERR_RCODE_NO_ROUTE), ra, sdata);
 	}
 
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 960be4e650f0..0b90cab5da2f 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -2582,6 +2582,8 @@ static int nl80211_get_mesh_params(struct sk_buff *skb,
 			cur_params.dot11MeshMaxRetries);
 	NLA_PUT_U8(msg, NL80211_MESHCONF_TTL,
 			cur_params.dot11MeshTTL);
+	NLA_PUT_U8(msg, NL80211_MESHCONF_ELEMENT_TTL,
+			cur_params.element_ttl);
 	NLA_PUT_U8(msg, NL80211_MESHCONF_AUTO_OPEN_PLINKS,
 			cur_params.auto_open_plinks);
 	NLA_PUT_U8(msg, NL80211_MESHCONF_HWMP_MAX_PREQ_RETRIES,
@@ -2623,6 +2625,7 @@ static const struct nla_policy nl80211_meshconf_params_policy[NL80211_MESHCONF_A
 	[NL80211_MESHCONF_MAX_PEER_LINKS] = { .type = NLA_U16 },
 	[NL80211_MESHCONF_MAX_RETRIES] = { .type = NLA_U8 },
 	[NL80211_MESHCONF_TTL] = { .type = NLA_U8 },
+	[NL80211_MESHCONF_ELEMENT_TTL] = { .type = NLA_U8 },
 	[NL80211_MESHCONF_AUTO_OPEN_PLINKS] = { .type = NLA_U8 },
 
 	[NL80211_MESHCONF_HWMP_MAX_PREQ_RETRIES] = { .type = NLA_U8 },
@@ -2670,6 +2673,8 @@ static int nl80211_set_mesh_params(struct sk_buff *skb, struct genl_info *info)
 			mask, NL80211_MESHCONF_MAX_RETRIES, nla_get_u8);
 	FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshTTL,
 			mask, NL80211_MESHCONF_TTL, nla_get_u8);
+	FILL_IN_MESH_PARAM_IF_SET(tb, cfg, element_ttl,
+			mask, NL80211_MESHCONF_ELEMENT_TTL, nla_get_u8);
 	FILL_IN_MESH_PARAM_IF_SET(tb, cfg, auto_open_plinks,
 			mask, NL80211_MESHCONF_AUTO_OPEN_PLINKS, nla_get_u8);
 	FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshHWMPmaxPREQretries,
-- 
cgit v1.2.3-71-gd317


From 29cbe68c516a48a9a88b3226878570c6cbd83c02 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Fri, 3 Dec 2010 09:20:44 +0100
Subject: cfg80211/mac80211: add mesh join/leave commands

Instead of tying mesh activity to interface up,
add join and leave commands for mesh. Since we
must be backward compatible, let cfg80211 handle
joining a mesh if a mesh ID was pre-configured
when the device goes up.

Note that this therefore must modify mac80211 as
well since mac80211 needs to lose the logic to
start the mesh on interface up.

We now allow querying mesh parameters before the
mesh is connected, which simply returns defaults.
Setting them (internally renamed to "update") is
only allowed while connected. Specify them with
the new mesh join command instead where needed.

In mac80211, beaconing must now also follow the
mesh enabled/not enabled state, which is done
by testing the mesh ID.

Signed-off-by: Javier Cardona <javier@cozybit.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/nl80211.h    |   8 +++
 include/net/cfg80211.h     |  38 +++++++++---
 net/mac80211/cfg.c         |  39 ++++++++++---
 net/mac80211/ieee80211_i.h |  13 -----
 net/mac80211/iface.c       |  14 +----
 net/mac80211/main.c        |   3 +-
 net/mac80211/mesh.c        |  26 ++-------
 net/mac80211/mesh.h        |  25 --------
 net/wireless/Makefile      |   2 +-
 net/wireless/core.c        |  15 ++++-
 net/wireless/core.h        |  13 +++++
 net/wireless/mesh.c        | 140 +++++++++++++++++++++++++++++++++++++++++++++
 net/wireless/nl80211.c     | 137 +++++++++++++++++++++++++++++++++++++-------
 net/wireless/util.c        |   1 +
 14 files changed, 359 insertions(+), 115 deletions(-)
 create mode 100644 net/wireless/mesh.c

(limited to 'include/linux')

diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h
index 9e541452d805..410a06ea551b 100644
--- a/include/linux/nl80211.h
+++ b/include/linux/nl80211.h
@@ -394,6 +394,11 @@
  *
  * @NL80211_CMD_SET_WDS_PEER: Set the MAC address of the peer on a WDS interface.
  *
+ * @NL80211_CMD_JOIN_MESH: Join a mesh. The mesh ID must be given, and initial
+ *	mesh config parameters may be given.
+ * @NL80211_CMD_LEAVE_MESH: Leave the mesh network -- no special arguments, the
+ *	network is determined by the network interface.
+ *
  * @NL80211_CMD_MAX: highest used command number
  * @__NL80211_CMD_AFTER_LAST: internal use
  */
@@ -500,6 +505,9 @@ enum nl80211_commands {
 
 	NL80211_CMD_FRAME_WAIT_CANCEL,
 
+	NL80211_CMD_JOIN_MESH,
+	NL80211_CMD_LEAVE_MESH,
+
 	/* add new commands above here */
 
 	/* used to define NL80211_CMD_MAX below */
diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 902895dfbd49..788c3989a9e8 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -258,13 +258,9 @@ struct ieee80211_supported_band {
 
 /**
  * struct vif_params - describes virtual interface parameters
- * @mesh_id: mesh ID to use
- * @mesh_id_len: length of the mesh ID
  * @use_4addr: use 4-address frames
  */
 struct vif_params {
-       u8 *mesh_id;
-       int mesh_id_len;
        int use_4addr;
 };
 
@@ -615,6 +611,11 @@ struct bss_parameters {
 	int ap_isolate;
 };
 
+/*
+ * struct mesh_config - 802.11s mesh configuration
+ *
+ * These parameters can be changed while the mesh is active.
+ */
 struct mesh_config {
 	/* Timeouts in ms */
 	/* Mesh plink management parameters */
@@ -637,6 +638,18 @@ struct mesh_config {
 	u8  dot11MeshHWMPRootMode;
 };
 
+/**
+ * struct mesh_setup - 802.11s mesh setup configuration
+ * @mesh_id: the mesh ID
+ * @mesh_id_len: length of the mesh ID, at least 1 and at most 32 bytes
+ *
+ * These parameters are fixed when the mesh is created.
+ */
+struct mesh_setup {
+	const u8 *mesh_id;
+	u8 mesh_id_len;
+};
+
 /**
  * struct ieee80211_txq_params - TX queue parameters
  * @queue: TX queue identifier (NL80211_TXQ_Q_*)
@@ -1078,7 +1091,7 @@ struct cfg80211_pmksa {
  *
  * @get_mesh_params: Put the current mesh parameters into *params
  *
- * @set_mesh_params: Set mesh parameters.
+ * @update_mesh_params: Update mesh parameters on a running mesh.
  *	The mask is a bitfield which tells us which parameters to
  *	set, and which to leave alone.
  *
@@ -1229,9 +1242,14 @@ struct cfg80211_ops {
 	int	(*get_mesh_params)(struct wiphy *wiphy,
 				struct net_device *dev,
 				struct mesh_config *conf);
-	int	(*set_mesh_params)(struct wiphy *wiphy,
-				struct net_device *dev,
-				const struct mesh_config *nconf, u32 mask);
+	int	(*update_mesh_params)(struct wiphy *wiphy,
+				      struct net_device *dev, u32 mask,
+				      const struct mesh_config *nconf);
+	int	(*join_mesh)(struct wiphy *wiphy, struct net_device *dev,
+			     const struct mesh_config *conf,
+			     const struct mesh_setup *setup);
+	int	(*leave_mesh)(struct wiphy *wiphy, struct net_device *dev);
+
 	int	(*change_bss)(struct wiphy *wiphy, struct net_device *dev,
 			      struct bss_parameters *params);
 
@@ -1647,6 +1665,8 @@ struct cfg80211_cached_keys;
  * @bssid: (private) Used by the internal configuration code
  * @ssid: (private) Used by the internal configuration code
  * @ssid_len: (private) Used by the internal configuration code
+ * @mesh_id_len: (private) Used by the internal configuration code
+ * @mesh_id_up_len: (private) Used by the internal configuration code
  * @wext: (private) Used by the internal wireless extensions compat code
  * @use_4addr: indicates 4addr mode is used on this interface, must be
  *	set by driver (if supported) on add_interface BEFORE registering the
@@ -1676,7 +1696,7 @@ struct wireless_dev {
 
 	/* currently used for IBSS and SME - might be rearranged later */
 	u8 ssid[IEEE80211_MAX_SSID_LEN];
-	u8 ssid_len;
+	u8 ssid_len, mesh_id_len, mesh_id_up_len;
 	enum {
 		CFG80211_SME_IDLE,
 		CFG80211_SME_CONNECTING,
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index d34c7c3dd762..68329d713c02 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -60,11 +60,6 @@ static int ieee80211_change_iface(struct wiphy *wiphy,
 	if (ret)
 		return ret;
 
-	if (ieee80211_vif_is_mesh(&sdata->vif) && params->mesh_id_len)
-		ieee80211_sdata_set_mesh_id(sdata,
-					    params->mesh_id_len,
-					    params->mesh_id);
-
 	if (type == NL80211_IFTYPE_AP_VLAN &&
 	    params && params->use_4addr == 0)
 		rcu_assign_pointer(sdata->u.vlan.sta, NULL);
@@ -1003,9 +998,9 @@ static inline bool _chg_mesh_attr(enum nl80211_meshconf_params parm, u32 mask)
 	return (mask >> (parm-1)) & 0x1;
 }
 
-static int ieee80211_set_mesh_params(struct wiphy *wiphy,
-				struct net_device *dev,
-				const struct mesh_config *nconf, u32 mask)
+static int ieee80211_update_mesh_params(struct wiphy *wiphy,
+					struct net_device *dev, u32 mask,
+					const struct mesh_config *nconf)
 {
 	struct mesh_config *conf;
 	struct ieee80211_sub_if_data *sdata;
@@ -1056,6 +1051,30 @@ static int ieee80211_set_mesh_params(struct wiphy *wiphy,
 	return 0;
 }
 
+static int ieee80211_join_mesh(struct wiphy *wiphy, struct net_device *dev,
+			       const struct mesh_config *conf,
+			       const struct mesh_setup *setup)
+{
+	struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
+	struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh;
+
+	memcpy(&sdata->u.mesh.mshcfg, conf, sizeof(struct mesh_config));
+	ifmsh->mesh_id_len = setup->mesh_id_len;
+	memcpy(ifmsh->mesh_id, setup->mesh_id, ifmsh->mesh_id_len);
+
+	ieee80211_start_mesh(sdata);
+
+	return 0;
+}
+
+static int ieee80211_leave_mesh(struct wiphy *wiphy, struct net_device *dev)
+{
+	struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
+
+	ieee80211_stop_mesh(sdata);
+
+	return 0;
+}
 #endif
 
 static int ieee80211_change_bss(struct wiphy *wiphy,
@@ -1760,8 +1779,10 @@ struct cfg80211_ops mac80211_config_ops = {
 	.change_mpath = ieee80211_change_mpath,
 	.get_mpath = ieee80211_get_mpath,
 	.dump_mpath = ieee80211_dump_mpath,
-	.set_mesh_params = ieee80211_set_mesh_params,
+	.update_mesh_params = ieee80211_update_mesh_params,
 	.get_mesh_params = ieee80211_get_mesh_params,
+	.join_mesh = ieee80211_join_mesh,
+	.leave_mesh = ieee80211_leave_mesh,
 #endif
 	.change_bss = ieee80211_change_bss,
 	.set_txq_params = ieee80211_set_txq_params,
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index e7c880725639..72499fe5fc36 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -609,19 +609,6 @@ struct ieee80211_sub_if_data *vif_to_sdata(struct ieee80211_vif *p)
 	return container_of(p, struct ieee80211_sub_if_data, vif);
 }
 
-static inline void
-ieee80211_sdata_set_mesh_id(struct ieee80211_sub_if_data *sdata,
-			    u8 mesh_id_len, u8 *mesh_id)
-{
-#ifdef CONFIG_MAC80211_MESH
-	struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh;
-	ifmsh->mesh_id_len = mesh_id_len;
-	memcpy(ifmsh->mesh_id, mesh_id, mesh_id_len);
-#else
-	WARN_ON(1);
-#endif
-}
-
 enum sdata_queue_type {
 	IEEE80211_SDATA_QUEUE_TYPE_FRAME	= 0,
 	IEEE80211_SDATA_QUEUE_AGG_START		= 1,
diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c
index 96e27f1e79fb..f0f11bb794af 100644
--- a/net/mac80211/iface.c
+++ b/net/mac80211/iface.c
@@ -268,9 +268,7 @@ static int ieee80211_do_open(struct net_device *dev, bool coming_up)
 				goto err_stop;
 		}
 
-		if (ieee80211_vif_is_mesh(&sdata->vif)) {
-			ieee80211_start_mesh(sdata);
-		} else if (sdata->vif.type == NL80211_IFTYPE_AP) {
+		if (sdata->vif.type == NL80211_IFTYPE_AP) {
 			local->fif_pspoll++;
 			local->fif_probe_req++;
 
@@ -495,10 +493,6 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata,
 		ieee80211_adjust_monitor_flags(sdata, -1);
 		ieee80211_configure_filter(local);
 		break;
-	case NL80211_IFTYPE_MESH_POINT:
-		if (ieee80211_vif_is_mesh(&sdata->vif))
-			ieee80211_stop_mesh(sdata);
-		/* fall through */
 	default:
 		flush_work(&sdata->work);
 		/*
@@ -1188,12 +1182,6 @@ int ieee80211_if_add(struct ieee80211_local *local, const char *name,
 	if (ret)
 		goto fail;
 
-	if (ieee80211_vif_is_mesh(&sdata->vif) &&
-	    params && params->mesh_id_len)
-		ieee80211_sdata_set_mesh_id(sdata,
-					    params->mesh_id_len,
-					    params->mesh_id);
-
 	mutex_lock(&local->iflist_mtx);
 	list_add_tail_rcu(&sdata->list, &local->interfaces);
 	mutex_unlock(&local->iflist_mtx);
diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index 107a0cbe52ac..2de69766c6aa 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -246,7 +246,8 @@ void ieee80211_bss_info_change_notify(struct ieee80211_sub_if_data *sdata,
 					!!sdata->u.ibss.presp;
 				break;
 			case NL80211_IFTYPE_MESH_POINT:
-				sdata->vif.bss_conf.enable_beacon = true;
+				sdata->vif.bss_conf.enable_beacon =
+					!!sdata->u.mesh.mesh_id_len;
 				break;
 			default:
 				/* not reached */
diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c
index 0d3234875ac5..63e1188d5062 100644
--- a/net/mac80211/mesh.c
+++ b/net/mac80211/mesh.c
@@ -530,6 +530,11 @@ void ieee80211_start_mesh(struct ieee80211_sub_if_data *sdata)
 void ieee80211_stop_mesh(struct ieee80211_sub_if_data *sdata)
 {
 	struct ieee80211_local *local = sdata->local;
+	struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh;
+
+	ifmsh->mesh_id_len = 0;
+	ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_BEACON_ENABLED);
+	sta_info_flush(local, NULL);
 
 	del_timer_sync(&sdata->u.mesh.housekeeping_timer);
 	del_timer_sync(&sdata->u.mesh.mesh_path_root_timer);
@@ -674,27 +679,6 @@ void ieee80211_mesh_init_sdata(struct ieee80211_sub_if_data *sdata)
 		    ieee80211_mesh_housekeeping_timer,
 		    (unsigned long) sdata);
 
-	ifmsh->mshcfg.dot11MeshRetryTimeout = MESH_RET_T;
-	ifmsh->mshcfg.dot11MeshConfirmTimeout = MESH_CONF_T;
-	ifmsh->mshcfg.dot11MeshHoldingTimeout = MESH_HOLD_T;
-	ifmsh->mshcfg.dot11MeshMaxRetries = MESH_MAX_RETR;
-	ifmsh->mshcfg.dot11MeshTTL = MESH_TTL;
-	ifmsh->mshcfg.element_ttl = MESH_DEFAULT_ELEMENT_TTL;
-	ifmsh->mshcfg.auto_open_plinks = true;
-	ifmsh->mshcfg.dot11MeshMaxPeerLinks =
-		MESH_MAX_ESTAB_PLINKS;
-	ifmsh->mshcfg.dot11MeshHWMPactivePathTimeout =
-		MESH_PATH_TIMEOUT;
-	ifmsh->mshcfg.dot11MeshHWMPpreqMinInterval =
-		MESH_PREQ_MIN_INT;
-	ifmsh->mshcfg.dot11MeshHWMPnetDiameterTraversalTime =
-		MESH_DIAM_TRAVERSAL_TIME;
-	ifmsh->mshcfg.dot11MeshHWMPmaxPREQretries =
-		MESH_MAX_PREQ_RETRIES;
-	ifmsh->mshcfg.path_refresh_time =
-		MESH_PATH_REFRESH_TIME;
-	ifmsh->mshcfg.min_discovery_timeout =
-		MESH_MIN_DISCOVERY_TIMEOUT;
 	ifmsh->accepting_plinks = true;
 	ifmsh->preq_id = 0;
 	ifmsh->sn = 0;
diff --git a/net/mac80211/mesh.h b/net/mac80211/mesh.h
index 182942eeac4d..039d7fa0af74 100644
--- a/net/mac80211/mesh.h
+++ b/net/mac80211/mesh.h
@@ -175,33 +175,10 @@ struct mesh_rmc {
  */
 #define MESH_CFG_CMP_LEN 	(IEEE80211_MESH_CONFIG_LEN - 2)
 
-/* Default values, timeouts in ms */
-#define MESH_TTL 		31
-#define MESH_MAX_RETR	 	3
-#define MESH_RET_T 		100
-#define MESH_CONF_T 		100
-#define MESH_HOLD_T 		100
-
-#define MESH_PATH_TIMEOUT	5000
-/* Minimum interval between two consecutive PREQs originated by the same
- * interface
- */
-#define MESH_PREQ_MIN_INT	10
-#define MESH_DIAM_TRAVERSAL_TIME 50
-/* A path will be refreshed if it is used PATH_REFRESH_TIME milliseconds before
- * timing out.  This way it will remain ACTIVE and no data frames will be
- * unnecesarily held in the pending queue.
- */
-#define MESH_PATH_REFRESH_TIME			1000
-#define MESH_MIN_DISCOVERY_TIMEOUT (2 * MESH_DIAM_TRAVERSAL_TIME)
 #define MESH_DEFAULT_BEACON_INTERVAL		1000 	/* in 1024 us units */
 
-#define MESH_MAX_PREQ_RETRIES 4
 #define MESH_PATH_EXPIRE (600 * HZ)
 
-/* Default maximum number of established plinks per interface */
-#define MESH_MAX_ESTAB_PLINKS	32
-
 /* Default maximum number of plinks per interface */
 #define MESH_MAX_PLINKS		256
 
@@ -216,8 +193,6 @@ struct mesh_rmc {
 #define PERR_RCODE_NO_ROUTE     12
 #define PERR_RCODE_DEST_UNREACH 13
 
-#define MESH_DEFAULT_ELEMENT_TTL 31
-
 /* Public interfaces */
 /* Various */
 int ieee80211_fill_mesh_addresses(struct ieee80211_hdr *hdr, __le16 *fc,
diff --git a/net/wireless/Makefile b/net/wireless/Makefile
index e77e508126fa..55a28ab21db9 100644
--- a/net/wireless/Makefile
+++ b/net/wireless/Makefile
@@ -10,7 +10,7 @@ obj-$(CONFIG_WEXT_SPY) += wext-spy.o
 obj-$(CONFIG_WEXT_PRIV) += wext-priv.o
 
 cfg80211-y += core.o sysfs.o radiotap.o util.o reg.o scan.o nl80211.o
-cfg80211-y += mlme.o ibss.o sme.o chan.o ethtool.o
+cfg80211-y += mlme.o ibss.o sme.o chan.o ethtool.o mesh.o
 cfg80211-$(CONFIG_CFG80211_DEBUGFS) += debugfs.o
 cfg80211-$(CONFIG_CFG80211_WEXT) += wext-compat.o wext-sme.o
 cfg80211-$(CONFIG_CFG80211_INTERNAL_REGDB) += regdb.o
diff --git a/net/wireless/core.c b/net/wireless/core.c
index 630bcf0a2f04..79772fcc37bc 100644
--- a/net/wireless/core.c
+++ b/net/wireless/core.c
@@ -332,6 +332,7 @@ struct wiphy *wiphy_new(const struct cfg80211_ops *ops, int sizeof_priv)
 	WARN_ON(ops->add_virtual_intf && !ops->del_virtual_intf);
 	WARN_ON(ops->add_station && !ops->del_station);
 	WARN_ON(ops->add_mpath && !ops->del_mpath);
+	WARN_ON(ops->join_mesh && !ops->leave_mesh);
 
 	alloc_size = sizeof(*rdev) + sizeof_priv;
 
@@ -752,6 +753,9 @@ static int cfg80211_netdev_notifier_call(struct notifier_block * nb,
 			cfg80211_mlme_down(rdev, dev);
 			wdev_unlock(wdev);
 			break;
+		case NL80211_IFTYPE_MESH_POINT:
+			cfg80211_leave_mesh(rdev, dev);
+			break;
 		default:
 			break;
 		}
@@ -775,20 +779,27 @@ static int cfg80211_netdev_notifier_call(struct notifier_block * nb,
 		}
 		cfg80211_lock_rdev(rdev);
 		mutex_lock(&rdev->devlist_mtx);
-#ifdef CONFIG_CFG80211_WEXT
 		wdev_lock(wdev);
 		switch (wdev->iftype) {
+#ifdef CONFIG_CFG80211_WEXT
 		case NL80211_IFTYPE_ADHOC:
 			cfg80211_ibss_wext_join(rdev, wdev);
 			break;
 		case NL80211_IFTYPE_STATION:
 			cfg80211_mgd_wext_connect(rdev, wdev);
 			break;
+#endif
+		case NL80211_IFTYPE_MESH_POINT:
+			/* backward compat code ... */
+			if (wdev->mesh_id_up_len)
+				__cfg80211_join_mesh(rdev, dev, wdev->ssid,
+						     wdev->mesh_id_up_len,
+						     &default_mesh_config);
+			break;
 		default:
 			break;
 		}
 		wdev_unlock(wdev);
-#endif
 		rdev->opencount++;
 		mutex_unlock(&rdev->devlist_mtx);
 		cfg80211_unlock_rdev(rdev);
diff --git a/net/wireless/core.h b/net/wireless/core.h
index ee80ad8dc655..743203bb61ac 100644
--- a/net/wireless/core.h
+++ b/net/wireless/core.h
@@ -285,6 +285,19 @@ void __cfg80211_ibss_joined(struct net_device *dev, const u8 *bssid);
 int cfg80211_ibss_wext_join(struct cfg80211_registered_device *rdev,
 			    struct wireless_dev *wdev);
 
+/* mesh */
+extern const struct mesh_config default_mesh_config;
+int __cfg80211_join_mesh(struct cfg80211_registered_device *rdev,
+			 struct net_device *dev,
+			 const u8 *mesh_id, u8 mesh_id_len,
+			 const struct mesh_config *conf);
+int cfg80211_join_mesh(struct cfg80211_registered_device *rdev,
+		       struct net_device *dev,
+		       const u8 *mesh_id, u8 mesh_id_len,
+		       const struct mesh_config *conf);
+int cfg80211_leave_mesh(struct cfg80211_registered_device *rdev,
+			struct net_device *dev);
+
 /* MLME */
 int __cfg80211_mlme_auth(struct cfg80211_registered_device *rdev,
 			 struct net_device *dev,
diff --git a/net/wireless/mesh.c b/net/wireless/mesh.c
new file mode 100644
index 000000000000..e0b9747fe50a
--- /dev/null
+++ b/net/wireless/mesh.c
@@ -0,0 +1,140 @@
+#include <linux/ieee80211.h>
+#include <net/cfg80211.h>
+#include "core.h"
+
+/* Default values, timeouts in ms */
+#define MESH_TTL 		31
+#define MESH_DEFAULT_ELEMENT_TTL 31
+#define MESH_MAX_RETR	 	3
+#define MESH_RET_T 		100
+#define MESH_CONF_T 		100
+#define MESH_HOLD_T 		100
+
+#define MESH_PATH_TIMEOUT	5000
+
+/*
+ * Minimum interval between two consecutive PREQs originated by the same
+ * interface
+ */
+#define MESH_PREQ_MIN_INT	10
+#define MESH_DIAM_TRAVERSAL_TIME 50
+
+/*
+ * A path will be refreshed if it is used PATH_REFRESH_TIME milliseconds
+ * before timing out.  This way it will remain ACTIVE and no data frames
+ * will be unnecessarily held in the pending queue.
+ */
+#define MESH_PATH_REFRESH_TIME			1000
+#define MESH_MIN_DISCOVERY_TIMEOUT (2 * MESH_DIAM_TRAVERSAL_TIME)
+
+/* Default maximum number of established plinks per interface */
+#define MESH_MAX_ESTAB_PLINKS	32
+
+#define MESH_MAX_PREQ_RETRIES	4
+
+
+const struct mesh_config default_mesh_config = {
+	.dot11MeshRetryTimeout = MESH_RET_T,
+	.dot11MeshConfirmTimeout = MESH_CONF_T,
+	.dot11MeshHoldingTimeout = MESH_HOLD_T,
+	.dot11MeshMaxRetries = MESH_MAX_RETR,
+	.dot11MeshTTL = MESH_TTL,
+	.element_ttl = MESH_DEFAULT_ELEMENT_TTL,
+	.auto_open_plinks = true,
+	.dot11MeshMaxPeerLinks = MESH_MAX_ESTAB_PLINKS,
+	.dot11MeshHWMPactivePathTimeout = MESH_PATH_TIMEOUT,
+	.dot11MeshHWMPpreqMinInterval = MESH_PREQ_MIN_INT,
+	.dot11MeshHWMPnetDiameterTraversalTime = MESH_DIAM_TRAVERSAL_TIME,
+	.dot11MeshHWMPmaxPREQretries = MESH_MAX_PREQ_RETRIES,
+	.path_refresh_time = MESH_PATH_REFRESH_TIME,
+	.min_discovery_timeout = MESH_MIN_DISCOVERY_TIMEOUT,
+};
+
+
+int __cfg80211_join_mesh(struct cfg80211_registered_device *rdev,
+			 struct net_device *dev,
+			 const u8 *mesh_id, u8 mesh_id_len,
+			 const struct mesh_config *conf)
+{
+	struct wireless_dev *wdev = dev->ieee80211_ptr;
+	struct mesh_setup setup = {
+		.mesh_id = mesh_id,
+		.mesh_id_len = mesh_id_len,
+	};
+	int err;
+
+	BUILD_BUG_ON(IEEE80211_MAX_SSID_LEN != IEEE80211_MAX_MESH_ID_LEN);
+
+	ASSERT_WDEV_LOCK(wdev);
+
+	if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_MESH_POINT)
+		return -EOPNOTSUPP;
+
+	if (wdev->mesh_id_len)
+		return -EALREADY;
+
+	if (!mesh_id_len)
+		return -EINVAL;
+
+	if (!rdev->ops->join_mesh)
+		return -EOPNOTSUPP;
+
+	err = rdev->ops->join_mesh(&rdev->wiphy, dev, conf, &setup);
+	if (!err) {
+		memcpy(wdev->ssid, mesh_id, mesh_id_len);
+		wdev->mesh_id_len = mesh_id_len;
+	}
+
+	return err;
+}
+
+int cfg80211_join_mesh(struct cfg80211_registered_device *rdev,
+		       struct net_device *dev,
+		       const u8 *mesh_id, u8 mesh_id_len,
+		       const struct mesh_config *conf)
+{
+	struct wireless_dev *wdev = dev->ieee80211_ptr;
+	int err;
+
+	wdev_lock(wdev);
+	err = __cfg80211_join_mesh(rdev, dev, mesh_id, mesh_id_len, conf);
+	wdev_unlock(wdev);
+
+	return err;
+}
+
+static int __cfg80211_leave_mesh(struct cfg80211_registered_device *rdev,
+				 struct net_device *dev)
+{
+	struct wireless_dev *wdev = dev->ieee80211_ptr;
+	int err;
+
+	ASSERT_WDEV_LOCK(wdev);
+
+	if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_MESH_POINT)
+		return -EOPNOTSUPP;
+
+	if (!rdev->ops->leave_mesh)
+		return -EOPNOTSUPP;
+
+	if (!wdev->mesh_id_len)
+		return -ENOTCONN;
+
+	err = rdev->ops->leave_mesh(&rdev->wiphy, dev);
+	if (!err)
+		wdev->mesh_id_len = 0;
+	return err;
+}
+
+int cfg80211_leave_mesh(struct cfg80211_registered_device *rdev,
+			struct net_device *dev)
+{
+	struct wireless_dev *wdev = dev->ieee80211_ptr;
+	int err;
+
+	wdev_lock(wdev);
+	err = __cfg80211_leave_mesh(rdev, dev);
+	wdev_unlock(wdev);
+
+	return err;
+}
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index c8d4d53fc450..56508d40c740 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -661,13 +661,14 @@ static int nl80211_send_wiphy(struct sk_buff *msg, u32 pid, u32 seq, int flags,
 	CMD(add_beacon, NEW_BEACON);
 	CMD(add_station, NEW_STATION);
 	CMD(add_mpath, NEW_MPATH);
-	CMD(set_mesh_params, SET_MESH_PARAMS);
+	CMD(update_mesh_params, SET_MESH_PARAMS);
 	CMD(change_bss, SET_BSS);
 	CMD(auth, AUTHENTICATE);
 	CMD(assoc, ASSOCIATE);
 	CMD(deauth, DEAUTHENTICATE);
 	CMD(disassoc, DISASSOCIATE);
 	CMD(join_ibss, JOIN_IBSS);
+	CMD(join_mesh, JOIN_MESH);
 	CMD(set_pmksa, SET_PMKSA);
 	CMD(del_pmksa, DEL_PMKSA);
 	CMD(flush_pmksa, FLUSH_PMKSA);
@@ -1324,11 +1325,21 @@ static int nl80211_set_interface(struct sk_buff *skb, struct genl_info *info)
 	}
 
 	if (info->attrs[NL80211_ATTR_MESH_ID]) {
+		struct wireless_dev *wdev = dev->ieee80211_ptr;
+
 		if (ntype != NL80211_IFTYPE_MESH_POINT)
 			return -EINVAL;
-		params.mesh_id = nla_data(info->attrs[NL80211_ATTR_MESH_ID]);
-		params.mesh_id_len = nla_len(info->attrs[NL80211_ATTR_MESH_ID]);
-		change = true;
+		if (netif_running(dev))
+			return -EBUSY;
+
+		wdev_lock(wdev);
+		BUILD_BUG_ON(IEEE80211_MAX_SSID_LEN !=
+			     IEEE80211_MAX_MESH_ID_LEN);
+		wdev->mesh_id_up_len =
+			nla_len(info->attrs[NL80211_ATTR_MESH_ID]);
+		memcpy(wdev->ssid, nla_data(info->attrs[NL80211_ATTR_MESH_ID]),
+		       wdev->mesh_id_up_len);
+		wdev_unlock(wdev);
 	}
 
 	if (info->attrs[NL80211_ATTR_4ADDR]) {
@@ -1388,12 +1399,6 @@ static int nl80211_new_interface(struct sk_buff *skb, struct genl_info *info)
 	    !(rdev->wiphy.interface_modes & (1 << type)))
 		return -EOPNOTSUPP;
 
-	if (type == NL80211_IFTYPE_MESH_POINT &&
-	    info->attrs[NL80211_ATTR_MESH_ID]) {
-		params.mesh_id = nla_data(info->attrs[NL80211_ATTR_MESH_ID]);
-		params.mesh_id_len = nla_len(info->attrs[NL80211_ATTR_MESH_ID]);
-	}
-
 	if (info->attrs[NL80211_ATTR_4ADDR]) {
 		params.use_4addr = !!nla_get_u8(info->attrs[NL80211_ATTR_4ADDR]);
 		err = nl80211_valid_4addr(rdev, NULL, params.use_4addr, type);
@@ -1410,6 +1415,20 @@ static int nl80211_new_interface(struct sk_buff *skb, struct genl_info *info)
 	if (IS_ERR(dev))
 		return PTR_ERR(dev);
 
+	if (type == NL80211_IFTYPE_MESH_POINT &&
+	    info->attrs[NL80211_ATTR_MESH_ID]) {
+		struct wireless_dev *wdev = dev->ieee80211_ptr;
+
+		wdev_lock(wdev);
+		BUILD_BUG_ON(IEEE80211_MAX_SSID_LEN !=
+			     IEEE80211_MAX_MESH_ID_LEN);
+		wdev->mesh_id_up_len =
+			nla_len(info->attrs[NL80211_ATTR_MESH_ID]);
+		memcpy(wdev->ssid, nla_data(info->attrs[NL80211_ATTR_MESH_ID]),
+		       wdev->mesh_id_up_len);
+		wdev_unlock(wdev);
+	}
+
 	return 0;
 }
 
@@ -2543,21 +2562,32 @@ static int nl80211_req_set_reg(struct sk_buff *skb, struct genl_info *info)
 }
 
 static int nl80211_get_mesh_params(struct sk_buff *skb,
-	struct genl_info *info)
+				   struct genl_info *info)
 {
 	struct cfg80211_registered_device *rdev = info->user_ptr[0];
-	struct mesh_config cur_params;
-	int err;
 	struct net_device *dev = info->user_ptr[1];
+	struct wireless_dev *wdev = dev->ieee80211_ptr;
+	struct mesh_config cur_params;
+	int err = 0;
 	void *hdr;
 	struct nlattr *pinfoattr;
 	struct sk_buff *msg;
 
+	if (wdev->iftype != NL80211_IFTYPE_MESH_POINT)
+		return -EOPNOTSUPP;
+
 	if (!rdev->ops->get_mesh_params)
 		return -EOPNOTSUPP;
 
-	/* Get the mesh params */
-	err = rdev->ops->get_mesh_params(&rdev->wiphy, dev, &cur_params);
+	wdev_lock(wdev);
+	/* If not connected, get default parameters */
+	if (!wdev->mesh_id_len)
+		memcpy(&cur_params, &default_mesh_config, sizeof(cur_params));
+	else
+		err = rdev->ops->get_mesh_params(&rdev->wiphy, dev,
+						 &cur_params);
+	wdev_unlock(wdev);
+
 	if (err)
 		return err;
 
@@ -2705,23 +2735,37 @@ do {\
 #undef FILL_IN_MESH_PARAM_IF_SET
 }
 
-static int nl80211_set_mesh_params(struct sk_buff *skb, struct genl_info *info)
+static int nl80211_update_mesh_params(struct sk_buff *skb,
+				      struct genl_info *info)
 {
 	struct cfg80211_registered_device *rdev = info->user_ptr[0];
 	struct net_device *dev = info->user_ptr[1];
+	struct wireless_dev *wdev = dev->ieee80211_ptr;
 	struct mesh_config cfg;
 	u32 mask;
 	int err;
 
-	if (!rdev->ops->set_mesh_params)
+	if (wdev->iftype != NL80211_IFTYPE_MESH_POINT)
+		return -EOPNOTSUPP;
+
+	if (!rdev->ops->update_mesh_params)
 		return -EOPNOTSUPP;
 
 	err = nl80211_parse_mesh_params(info, &cfg, &mask);
 	if (err)
 		return err;
 
-	/* Apply changes */
-	return rdev->ops->set_mesh_params(&rdev->wiphy, dev, &cfg, mask);
+	wdev_lock(wdev);
+	if (!wdev->mesh_id_len)
+		err = -ENOLINK;
+
+	if (!err)
+		err = rdev->ops->update_mesh_params(&rdev->wiphy, dev,
+						    mask, &cfg);
+
+	wdev_unlock(wdev);
+
+	return err;
 }
 
 static int nl80211_get_reg(struct sk_buff *skb, struct genl_info *info)
@@ -4505,6 +4549,41 @@ out:
 	return err;
 }
 
+static int nl80211_join_mesh(struct sk_buff *skb, struct genl_info *info)
+{
+	struct cfg80211_registered_device *rdev = info->user_ptr[0];
+	struct net_device *dev = info->user_ptr[1];
+	struct mesh_config cfg;
+	int err;
+
+	/* start with default */
+	memcpy(&cfg, &default_mesh_config, sizeof(cfg));
+
+	if (info->attrs[NL80211_ATTR_MESH_PARAMS]) {
+		/* and parse parameters if given */
+		err = nl80211_parse_mesh_params(info, &cfg, NULL);
+		if (err)
+			return err;
+	}
+
+	if (!info->attrs[NL80211_ATTR_MESH_ID] ||
+	    !nla_len(info->attrs[NL80211_ATTR_MESH_ID]))
+		return -EINVAL;
+
+	return cfg80211_join_mesh(rdev, dev,
+				  nla_data(info->attrs[NL80211_ATTR_MESH_ID]),
+				  nla_len(info->attrs[NL80211_ATTR_MESH_ID]),
+				  &cfg);
+}
+
+static int nl80211_leave_mesh(struct sk_buff *skb, struct genl_info *info)
+{
+	struct cfg80211_registered_device *rdev = info->user_ptr[0];
+	struct net_device *dev = info->user_ptr[1];
+
+	return cfg80211_leave_mesh(rdev, dev);
+}
+
 #define NL80211_FLAG_NEED_WIPHY		0x01
 #define NL80211_FLAG_NEED_NETDEV	0x02
 #define NL80211_FLAG_NEED_RTNL		0x04
@@ -4769,10 +4848,10 @@ static struct genl_ops nl80211_ops[] = {
 	},
 	{
 		.cmd = NL80211_CMD_SET_MESH_PARAMS,
-		.doit = nl80211_set_mesh_params,
+		.doit = nl80211_update_mesh_params,
 		.policy = nl80211_policy,
 		.flags = GENL_ADMIN_PERM,
-		.internal_flags = NL80211_FLAG_NEED_NETDEV |
+		.internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
 				  NL80211_FLAG_NEED_RTNL,
 	},
 	{
@@ -4987,6 +5066,22 @@ static struct genl_ops nl80211_ops[] = {
 		.internal_flags = NL80211_FLAG_NEED_NETDEV |
 				  NL80211_FLAG_NEED_RTNL,
 	},
+	{
+		.cmd = NL80211_CMD_JOIN_MESH,
+		.doit = nl80211_join_mesh,
+		.policy = nl80211_policy,
+		.flags = GENL_ADMIN_PERM,
+		.internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
+				  NL80211_FLAG_NEED_RTNL,
+	},
+	{
+		.cmd = NL80211_CMD_LEAVE_MESH,
+		.doit = nl80211_leave_mesh,
+		.policy = nl80211_policy,
+		.flags = GENL_ADMIN_PERM,
+		.internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
+				  NL80211_FLAG_NEED_RTNL,
+	},
 };
 
 static struct genl_multicast_group nl80211_mlme_mcgrp = {
diff --git a/net/wireless/util.c b/net/wireless/util.c
index fee020b15a4e..4de624ca4c63 100644
--- a/net/wireless/util.c
+++ b/net/wireless/util.c
@@ -792,6 +792,7 @@ int cfg80211_change_iface(struct cfg80211_registered_device *rdev,
 
 	if (ntype != otype) {
 		dev->ieee80211_ptr->use_4addr = false;
+		dev->ieee80211_ptr->mesh_id_up_len = 0;
 
 		switch (otype) {
 		case NL80211_IFTYPE_ADHOC:
-- 
cgit v1.2.3-71-gd317


From 871a2c16c21b988688b4ab1a78eadd969765c0a3 Mon Sep 17 00:00:00 2001
From: Tomasz Grobelny <tomasz@grobelny.oswiecenia.net>
Date: Sat, 4 Dec 2010 13:38:01 +0100
Subject: dccp: Policy-based packet dequeueing infrastructure

This patch adds a generic infrastructure for policy-based dequeueing of
TX packets and provides two policies:
 * a simple FIFO policy (which is the default) and
 * a priority based policy (set via socket options).
Both policies honour the tx_qlen sysctl for the maximum size of the write
queue (can be overridden via socket options).

The priority policy uses skb->priority internally to assign an u32 priority
identifier, using the same ranking as SO_PRIORITY. The skb->priority field
is set to 0 when the packet leaves DCCP. The priority is supplied as ancillary
data using cmsg(3), the patch also provides the requisite parsing routines.

Signed-off-by: Tomasz Grobelny <tomasz@grobelny.oswiecenia.net>
Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
---
 Documentation/networking/dccp.txt |  20 ++++++
 include/linux/dccp.h              |  21 +++++++
 net/dccp/Makefile                 |   4 +-
 net/dccp/dccp.h                   |  12 ++++
 net/dccp/output.c                 |   7 +--
 net/dccp/proto.c                  |  67 +++++++++++++++++++-
 net/dccp/qpolicy.c                | 126 ++++++++++++++++++++++++++++++++++++++
 7 files changed, 248 insertions(+), 9 deletions(-)
 create mode 100644 net/dccp/qpolicy.c

(limited to 'include/linux')

diff --git a/Documentation/networking/dccp.txt b/Documentation/networking/dccp.txt
index 271d524a4c8d..b395ca6a49f2 100644
--- a/Documentation/networking/dccp.txt
+++ b/Documentation/networking/dccp.txt
@@ -47,6 +47,26 @@ http://linux-net.osdl.org/index.php/DCCP_Testing#Experimental_DCCP_source_tree
 
 Socket options
 ==============
+DCCP_SOCKOPT_QPOLICY_ID sets the dequeuing policy for outgoing packets. It takes
+a policy ID as argument and can only be set before the connection (i.e. changes
+during an established connection are not supported). Currently, two policies are
+defined: the "simple" policy (DCCPQ_POLICY_SIMPLE), which does nothing special,
+and a priority-based variant (DCCPQ_POLICY_PRIO). The latter allows to pass an
+u32 priority value as ancillary data to sendmsg(), where higher numbers indicate
+a higher packet priority (similar to SO_PRIORITY). This ancillary data needs to
+be formatted using a cmsg(3) message header filled in as follows:
+	cmsg->cmsg_level = SOL_DCCP;
+	cmsg->cmsg_type	 = DCCP_SCM_PRIORITY;
+	cmsg->cmsg_len	 = CMSG_LEN(sizeof(uint32_t));	/* or CMSG_LEN(4) */
+
+DCCP_SOCKOPT_QPOLICY_TXQLEN sets the maximum length of the output queue. A zero
+value is always interpreted as unbounded queue length. If different from zero,
+the interpretation of this parameter depends on the current dequeuing policy
+(see above): the "simple" policy will enforce a fixed queue size by returning
+EAGAIN, whereas the "prio" policy enforces a fixed queue length by dropping the
+lowest-priority packet first. The default value for this parameter is
+initialised from /proc/sys/net/dccp/default/tx_qlen.
+
 DCCP_SOCKOPT_SERVICE sets the service. The specification mandates use of
 service codes (RFC 4340, sec. 8.1.2); if this socket option is not set,
 the socket will fall back to 0 (which means that no meaningful service code
diff --git a/include/linux/dccp.h b/include/linux/dccp.h
index eed52bcd35d0..010e2d87ed75 100644
--- a/include/linux/dccp.h
+++ b/include/linux/dccp.h
@@ -197,6 +197,21 @@ enum dccp_feature_numbers {
 	DCCPF_MAX_CCID_SPECIFIC = 255,
 };
 
+/* DCCP socket control message types for cmsg */
+enum dccp_cmsg_type {
+	DCCP_SCM_PRIORITY = 1,
+	DCCP_SCM_QPOLICY_MAX = 0xFFFF,
+	/* ^-- Up to here reserved exclusively for qpolicy parameters */
+	DCCP_SCM_MAX
+};
+
+/* DCCP priorities for outgoing/queued packets */
+enum dccp_packet_dequeueing_policy {
+	DCCPQ_POLICY_SIMPLE,
+	DCCPQ_POLICY_PRIO,
+	DCCPQ_POLICY_MAX
+};
+
 /* DCCP socket options */
 #define DCCP_SOCKOPT_PACKET_SIZE	1 /* XXX deprecated, without effect */
 #define DCCP_SOCKOPT_SERVICE		2
@@ -210,6 +225,8 @@ enum dccp_feature_numbers {
 #define DCCP_SOCKOPT_CCID		13
 #define DCCP_SOCKOPT_TX_CCID		14
 #define DCCP_SOCKOPT_RX_CCID		15
+#define DCCP_SOCKOPT_QPOLICY_ID		16
+#define DCCP_SOCKOPT_QPOLICY_TXQLEN	17
 #define DCCP_SOCKOPT_CCID_RX_INFO	128
 #define DCCP_SOCKOPT_CCID_TX_INFO	192
 
@@ -458,6 +475,8 @@ struct dccp_ackvec;
  * @dccps_hc_rx_ccid - CCID used for the receiver (or receiving half-connection)
  * @dccps_hc_tx_ccid - CCID used for the sender (or sending half-connection)
  * @dccps_options_received - parsed set of retrieved options
+ * @dccps_qpolicy - TX dequeueing policy, one of %dccp_packet_dequeueing_policy
+ * @dccps_tx_qlen - maximum length of the TX queue
  * @dccps_role - role of this sock, one of %dccp_role
  * @dccps_hc_rx_insert_options - receiver wants to add options when acking
  * @dccps_hc_tx_insert_options - sender wants to add options when sending
@@ -500,6 +519,8 @@ struct dccp_sock {
 	struct ccid			*dccps_hc_rx_ccid;
 	struct ccid			*dccps_hc_tx_ccid;
 	struct dccp_options_received	dccps_options_received;
+	__u8				dccps_qpolicy;
+	__u32				dccps_tx_qlen;
 	enum dccp_role			dccps_role:2;
 	__u8				dccps_hc_rx_insert_options:1;
 	__u8				dccps_hc_tx_insert_options:1;
diff --git a/net/dccp/Makefile b/net/dccp/Makefile
index 2991efcc8dea..5c8362b037ed 100644
--- a/net/dccp/Makefile
+++ b/net/dccp/Makefile
@@ -1,7 +1,7 @@
 obj-$(CONFIG_IP_DCCP) += dccp.o dccp_ipv4.o
 
-dccp-y := ccid.o feat.o input.o minisocks.o options.o output.o proto.o timer.o
-
+dccp-y := ccid.o feat.o input.o minisocks.o options.o output.o proto.o timer.o \
+	  qpolicy.o
 #
 # CCID algorithms to be used by dccp.ko
 #
diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h
index 19fafd597465..d008da91cec2 100644
--- a/net/dccp/dccp.h
+++ b/net/dccp/dccp.h
@@ -243,6 +243,18 @@ extern void dccp_reqsk_send_ack(struct sock *sk, struct sk_buff *skb,
 extern void dccp_send_sync(struct sock *sk, const u64 seq,
 			   const enum dccp_pkt_type pkt_type);
 
+/*
+ * TX Packet Dequeueing Interface
+ */
+extern void		dccp_qpolicy_push(struct sock *sk, struct sk_buff *skb);
+extern bool		dccp_qpolicy_full(struct sock *sk);
+extern void		dccp_qpolicy_drop(struct sock *sk, struct sk_buff *skb);
+extern struct sk_buff	*dccp_qpolicy_top(struct sock *sk);
+extern struct sk_buff	*dccp_qpolicy_pop(struct sock *sk);
+
+/*
+ * TX Packet Output and TX Timers
+ */
 extern void   dccp_write_xmit(struct sock *sk);
 extern void   dccp_write_space(struct sock *sk);
 extern void   dccp_flush_write_queue(struct sock *sk, long *time_budget);
diff --git a/net/dccp/output.c b/net/dccp/output.c
index d96dd9d362ae..784d30210543 100644
--- a/net/dccp/output.c
+++ b/net/dccp/output.c
@@ -242,7 +242,7 @@ static void dccp_xmit_packet(struct sock *sk)
 {
 	int err, len;
 	struct dccp_sock *dp = dccp_sk(sk);
-	struct sk_buff *skb = skb_dequeue(&sk->sk_write_queue);
+	struct sk_buff *skb = dccp_qpolicy_pop(sk);
 
 	if (unlikely(skb == NULL))
 		return;
@@ -345,7 +345,7 @@ void dccp_write_xmit(struct sock *sk)
 	struct dccp_sock *dp = dccp_sk(sk);
 	struct sk_buff *skb;
 
-	while ((skb = skb_peek(&sk->sk_write_queue))) {
+	while ((skb = dccp_qpolicy_top(sk))) {
 		int rc = ccid_hc_tx_send_packet(dp->dccps_hc_tx_ccid, sk, skb);
 
 		switch (ccid_packet_dequeue_eval(rc)) {
@@ -359,8 +359,7 @@ void dccp_write_xmit(struct sock *sk)
 			dccp_xmit_packet(sk);
 			break;
 		case CCID_PACKET_ERR:
-			skb_dequeue(&sk->sk_write_queue);
-			kfree_skb(skb);
+			dccp_qpolicy_drop(sk, skb);
 			dccp_pr_debug("packet discarded due to err=%d\n", rc);
 		}
 	}
diff --git a/net/dccp/proto.c b/net/dccp/proto.c
index ef343d53fcea..d6a224982bb5 100644
--- a/net/dccp/proto.c
+++ b/net/dccp/proto.c
@@ -185,6 +185,7 @@ int dccp_init_sock(struct sock *sk, const __u8 ctl_sock_initialized)
 	dp->dccps_role		= DCCP_ROLE_UNDEFINED;
 	dp->dccps_service	= DCCP_SERVICE_CODE_IS_ABSENT;
 	dp->dccps_l_ack_ratio	= dp->dccps_r_ack_ratio = 1;
+	dp->dccps_tx_qlen	= sysctl_dccp_tx_qlen;
 
 	dccp_init_xmit_timers(sk);
 
@@ -532,6 +533,20 @@ static int do_dccp_setsockopt(struct sock *sk, int level, int optname,
 	case DCCP_SOCKOPT_RECV_CSCOV:
 		err = dccp_setsockopt_cscov(sk, val, true);
 		break;
+	case DCCP_SOCKOPT_QPOLICY_ID:
+		if (sk->sk_state != DCCP_CLOSED)
+			err = -EISCONN;
+		else if (val < 0 || val >= DCCPQ_POLICY_MAX)
+			err = -EINVAL;
+		else
+			dp->dccps_qpolicy = val;
+		break;
+	case DCCP_SOCKOPT_QPOLICY_TXQLEN:
+		if (val < 0)
+			err = -EINVAL;
+		else
+			dp->dccps_tx_qlen = val;
+		break;
 	default:
 		err = -ENOPROTOOPT;
 		break;
@@ -639,6 +654,12 @@ static int do_dccp_getsockopt(struct sock *sk, int level, int optname,
 	case DCCP_SOCKOPT_RECV_CSCOV:
 		val = dp->dccps_pcrlen;
 		break;
+	case DCCP_SOCKOPT_QPOLICY_ID:
+		val = dp->dccps_qpolicy;
+		break;
+	case DCCP_SOCKOPT_QPOLICY_TXQLEN:
+		val = dp->dccps_tx_qlen;
+		break;
 	case 128 ... 191:
 		return ccid_hc_rx_getsockopt(dp->dccps_hc_rx_ccid, sk, optname,
 					     len, (u32 __user *)optval, optlen);
@@ -681,6 +702,43 @@ int compat_dccp_getsockopt(struct sock *sk, int level, int optname,
 EXPORT_SYMBOL_GPL(compat_dccp_getsockopt);
 #endif
 
+static int dccp_msghdr_parse(struct msghdr *msg, struct sk_buff *skb)
+{
+	struct cmsghdr *cmsg = CMSG_FIRSTHDR(msg);
+
+	/*
+	 * Assign an (opaque) qpolicy priority value to skb->priority.
+	 *
+	 * We are overloading this skb field for use with the qpolicy subystem.
+	 * The skb->priority is normally used for the SO_PRIORITY option, which
+	 * is initialised from sk_priority. Since the assignment of sk_priority
+	 * to skb->priority happens later (on layer 3), we overload this field
+	 * for use with queueing priorities as long as the skb is on layer 4.
+	 * The default priority value (if nothing is set) is 0.
+	 */
+	skb->priority = 0;
+
+	for (; cmsg != NULL; cmsg = CMSG_NXTHDR(msg, cmsg)) {
+
+		if (!CMSG_OK(msg, cmsg))
+			return -EINVAL;
+
+		if (cmsg->cmsg_level != SOL_DCCP)
+			continue;
+
+		switch (cmsg->cmsg_type) {
+		case DCCP_SCM_PRIORITY:
+			if (cmsg->cmsg_len != CMSG_LEN(sizeof(__u32)))
+				return -EINVAL;
+			skb->priority = *(__u32 *)CMSG_DATA(cmsg);
+			break;
+		default:
+			return -EINVAL;
+		}
+	}
+	return 0;
+}
+
 int dccp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
 		 size_t len)
 {
@@ -696,8 +754,7 @@ int dccp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
 
 	lock_sock(sk);
 
-	if (sysctl_dccp_tx_qlen &&
-	    (sk->sk_write_queue.qlen >= sysctl_dccp_tx_qlen)) {
+	if (dccp_qpolicy_full(sk)) {
 		rc = -EAGAIN;
 		goto out_release;
 	}
@@ -725,7 +782,11 @@ int dccp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
 	if (rc != 0)
 		goto out_discard;
 
-	skb_queue_tail(&sk->sk_write_queue, skb);
+	rc = dccp_msghdr_parse(msg, skb);
+	if (rc != 0)
+		goto out_discard;
+
+	dccp_qpolicy_push(sk, skb);
 	/*
 	 * The xmit_timer is set if the TX CCID is rate-based and will expire
 	 * when congestion control permits to release further packets into the
diff --git a/net/dccp/qpolicy.c b/net/dccp/qpolicy.c
new file mode 100644
index 000000000000..4b0fd6b11f6d
--- /dev/null
+++ b/net/dccp/qpolicy.c
@@ -0,0 +1,126 @@
+/*
+ *  net/dccp/qpolicy.c
+ *
+ *  Policy-based packet dequeueing interface for DCCP.
+ *
+ *  Copyright (c) 2008 Tomasz Grobelny <tomasz@grobelny.oswiecenia.net>
+ *
+ *  This program is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU General Public License v2
+ *  as published by the Free Software Foundation.
+ */
+#include "dccp.h"
+
+/*
+ *	Simple Dequeueing Policy:
+ *	If tx_qlen is different from 0, enqueue up to tx_qlen elements.
+ */
+static void qpolicy_simple_push(struct sock *sk, struct sk_buff *skb)
+{
+	skb_queue_tail(&sk->sk_write_queue, skb);
+}
+
+static bool qpolicy_simple_full(struct sock *sk)
+{
+	return dccp_sk(sk)->dccps_tx_qlen &&
+	       sk->sk_write_queue.qlen >= dccp_sk(sk)->dccps_tx_qlen;
+}
+
+static struct sk_buff *qpolicy_simple_top(struct sock *sk)
+{
+	return skb_peek(&sk->sk_write_queue);
+}
+
+/*
+ *	Priority-based Dequeueing Policy:
+ *	If tx_qlen is different from 0 and the queue has reached its upper bound
+ *	of tx_qlen elements, replace older packets lowest-priority-first.
+ */
+static struct sk_buff *qpolicy_prio_best_skb(struct sock *sk)
+{
+	struct sk_buff *skb, *best = NULL;
+
+	skb_queue_walk(&sk->sk_write_queue, skb)
+		if (best == NULL || skb->priority > best->priority)
+			best = skb;
+	return best;
+}
+
+static struct sk_buff *qpolicy_prio_worst_skb(struct sock *sk)
+{
+	struct sk_buff *skb, *worst = NULL;
+
+	skb_queue_walk(&sk->sk_write_queue, skb)
+		if (worst == NULL || skb->priority < worst->priority)
+			worst = skb;
+	return worst;
+}
+
+static bool qpolicy_prio_full(struct sock *sk)
+{
+	if (qpolicy_simple_full(sk))
+		dccp_qpolicy_drop(sk, qpolicy_prio_worst_skb(sk));
+	return false;
+}
+
+/**
+ * struct dccp_qpolicy_operations  -  TX Packet Dequeueing Interface
+ * @push: add a new @skb to the write queue
+ * @full: indicates that no more packets will be admitted
+ * @top:  peeks at whatever the queueing policy defines as its `top'
+ */
+static struct dccp_qpolicy_operations {
+	void		(*push)	(struct sock *sk, struct sk_buff *skb);
+	bool		(*full) (struct sock *sk);
+	struct sk_buff*	(*top)  (struct sock *sk);
+
+} qpol_table[DCCPQ_POLICY_MAX] = {
+	[DCCPQ_POLICY_SIMPLE] = {
+		.push = qpolicy_simple_push,
+		.full = qpolicy_simple_full,
+		.top  = qpolicy_simple_top,
+	},
+	[DCCPQ_POLICY_PRIO] = {
+		.push = qpolicy_simple_push,
+		.full = qpolicy_prio_full,
+		.top  = qpolicy_prio_best_skb,
+	},
+};
+
+/*
+ *	Externally visible interface
+ */
+void dccp_qpolicy_push(struct sock *sk, struct sk_buff *skb)
+{
+	qpol_table[dccp_sk(sk)->dccps_qpolicy].push(sk, skb);
+}
+
+bool dccp_qpolicy_full(struct sock *sk)
+{
+	return qpol_table[dccp_sk(sk)->dccps_qpolicy].full(sk);
+}
+
+void dccp_qpolicy_drop(struct sock *sk, struct sk_buff *skb)
+{
+	if (skb != NULL) {
+		skb_unlink(skb, &sk->sk_write_queue);
+		kfree_skb(skb);
+	}
+}
+
+struct sk_buff *dccp_qpolicy_top(struct sock *sk)
+{
+	return qpol_table[dccp_sk(sk)->dccps_qpolicy].top(sk);
+}
+
+struct sk_buff *dccp_qpolicy_pop(struct sock *sk)
+{
+	struct sk_buff *skb = dccp_qpolicy_top(sk);
+
+	if (skb != NULL) {
+		/* Clear any skb fields that we used internally */
+		skb->priority = 0;
+		skb_unlink(skb, &sk->sk_write_queue);
+	}
+	return skb;
+}
-- 
cgit v1.2.3-71-gd317


From 02c048b919455aaa38628563cdcc2e691c8a9f53 Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <mszeredi@suse.cz>
Date: Tue, 7 Dec 2010 20:16:56 +0100
Subject: fuse: allow batching of FORGET requests

Terje Malmedal reports that a fuse filesystem with 32 million inodes
on a machine with lots of memory can take up to 30 minutes to process
FORGET requests when all those inodes are evicted from the icache.

To solve this, create a BATCH_FORGET request that allows up to about
8000 FORGET requests to be sent in a single message.

This request is only sent if userspace supports interface version 7.16
or later, otherwise fall back to sending individual FORGET messages.

Reported-by: Terje Malmedal <terje.malmedal@usit.uio.no>
Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>
---
 fs/fuse/dev.c        | 92 +++++++++++++++++++++++++++++++++++++++++++++-------
 fs/fuse/fuse_i.h     |  3 +-
 include/linux/fuse.h | 16 ++++++++-
 3 files changed, 97 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index fed65303eeeb..cf8d28d1fbad 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -254,8 +254,8 @@ static void queue_request(struct fuse_conn *fc, struct fuse_req *req)
 void fuse_queue_forget(struct fuse_conn *fc, struct fuse_forget_link *forget,
 		       u64 nodeid, u64 nlookup)
 {
-	forget->nodeid = nodeid;
-	forget->nlookup = nlookup;
+	forget->forget_one.nodeid = nodeid;
+	forget->forget_one.nlookup = nlookup;
 
 	spin_lock(&fc->lock);
 	fc->forget_list_tail->next = forget;
@@ -974,15 +974,26 @@ __releases(fc->lock)
 	return err ? err : reqsize;
 }
 
-static struct fuse_forget_link *dequeue_forget(struct fuse_conn *fc)
+static struct fuse_forget_link *dequeue_forget(struct fuse_conn *fc,
+					       unsigned max,
+					       unsigned *countp)
 {
-	struct fuse_forget_link *forget = fc->forget_list_head.next;
+	struct fuse_forget_link *head = fc->forget_list_head.next;
+	struct fuse_forget_link **newhead = &head;
+	unsigned count;
 
-	fc->forget_list_head.next = forget->next;
+	for (count = 0; *newhead != NULL && count < max; count++)
+		newhead = &(*newhead)->next;
+
+	fc->forget_list_head.next = *newhead;
+	*newhead = NULL;
 	if (fc->forget_list_head.next == NULL)
 		fc->forget_list_tail = &fc->forget_list_head;
 
-	return forget;
+	if (countp != NULL)
+		*countp = count;
+
+	return head;
 }
 
 static int fuse_read_single_forget(struct fuse_conn *fc,
@@ -991,13 +1002,13 @@ static int fuse_read_single_forget(struct fuse_conn *fc,
 __releases(fc->lock)
 {
 	int err;
-	struct fuse_forget_link *forget = dequeue_forget(fc);
+	struct fuse_forget_link *forget = dequeue_forget(fc, 1, NULL);
 	struct fuse_forget_in arg = {
-		.nlookup = forget->nlookup,
+		.nlookup = forget->forget_one.nlookup,
 	};
 	struct fuse_in_header ih = {
 		.opcode = FUSE_FORGET,
-		.nodeid = forget->nodeid,
+		.nodeid = forget->forget_one.nodeid,
 		.unique = fuse_get_unique(fc),
 		.len = sizeof(ih) + sizeof(arg),
 	};
@@ -1018,6 +1029,65 @@ __releases(fc->lock)
 	return ih.len;
 }
 
+static int fuse_read_batch_forget(struct fuse_conn *fc,
+				   struct fuse_copy_state *cs, size_t nbytes)
+__releases(fc->lock)
+{
+	int err;
+	unsigned max_forgets;
+	unsigned count;
+	struct fuse_forget_link *head;
+	struct fuse_batch_forget_in arg = { .count = 0 };
+	struct fuse_in_header ih = {
+		.opcode = FUSE_BATCH_FORGET,
+		.unique = fuse_get_unique(fc),
+		.len = sizeof(ih) + sizeof(arg),
+	};
+
+	if (nbytes < ih.len) {
+		spin_unlock(&fc->lock);
+		return -EINVAL;
+	}
+
+	max_forgets = (nbytes - ih.len) / sizeof(struct fuse_forget_one);
+	head = dequeue_forget(fc, max_forgets, &count);
+	spin_unlock(&fc->lock);
+
+	arg.count = count;
+	ih.len += count * sizeof(struct fuse_forget_one);
+	err = fuse_copy_one(cs, &ih, sizeof(ih));
+	if (!err)
+		err = fuse_copy_one(cs, &arg, sizeof(arg));
+
+	while (head) {
+		struct fuse_forget_link *forget = head;
+
+		if (!err) {
+			err = fuse_copy_one(cs, &forget->forget_one,
+					    sizeof(forget->forget_one));
+		}
+		head = forget->next;
+		kfree(forget);
+	}
+
+	fuse_copy_finish(cs);
+
+	if (err)
+		return err;
+
+	return ih.len;
+}
+
+static int fuse_read_forget(struct fuse_conn *fc, struct fuse_copy_state *cs,
+			    size_t nbytes)
+__releases(fc->lock)
+{
+	if (fc->minor < 16 || fc->forget_list_head.next->next == NULL)
+		return fuse_read_single_forget(fc, cs, nbytes);
+	else
+		return fuse_read_batch_forget(fc, cs, nbytes);
+}
+
 /*
  * Read a single request into the userspace filesystem's buffer.  This
  * function waits until a request is available, then removes it from
@@ -1058,7 +1128,7 @@ static ssize_t fuse_dev_do_read(struct fuse_conn *fc, struct file *file,
 
 	if (forget_pending(fc)) {
 		if (list_empty(&fc->pending) || fc->forget_batch-- > 0)
-			return fuse_read_single_forget(fc, cs, nbytes);
+			return fuse_read_forget(fc, cs, nbytes);
 
 		if (fc->forget_batch <= -8)
 			fc->forget_batch = 16;
@@ -1837,7 +1907,7 @@ __acquires(fc->lock)
 	end_requests(fc, &fc->pending);
 	end_requests(fc, &fc->processing);
 	while (forget_pending(fc))
-		kfree(dequeue_forget(fc));
+		kfree(dequeue_forget(fc, 1, NULL));
 }
 
 /*
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index 33369c63a522..ae5744a2f9e9 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -55,8 +55,7 @@ extern unsigned max_user_congthresh;
 
 /* One forget request */
 struct fuse_forget_link {
-	u64	nodeid;
-	u64	nlookup;
+	struct fuse_forget_one forget_one;
 	struct fuse_forget_link *next;
 };
 
diff --git a/include/linux/fuse.h b/include/linux/fuse.h
index c3c578e09833..cf11881f4938 100644
--- a/include/linux/fuse.h
+++ b/include/linux/fuse.h
@@ -41,6 +41,9 @@
  * 7.15
  *  - add store notify
  *  - add retrieve notify
+ *
+ * 7.16
+ *  - add BATCH_FORGET request
  */
 
 #ifndef _LINUX_FUSE_H
@@ -72,7 +75,7 @@
 #define FUSE_KERNEL_VERSION 7
 
 /** Minor version number of this interface */
-#define FUSE_KERNEL_MINOR_VERSION 15
+#define FUSE_KERNEL_MINOR_VERSION 16
 
 /** The node ID of the root inode */
 #define FUSE_ROOT_ID 1
@@ -256,6 +259,7 @@ enum fuse_opcode {
 	FUSE_IOCTL         = 39,
 	FUSE_POLL          = 40,
 	FUSE_NOTIFY_REPLY  = 41,
+	FUSE_BATCH_FORGET  = 42,
 
 	/* CUSE specific operations */
 	CUSE_INIT          = 4096,
@@ -290,6 +294,16 @@ struct fuse_forget_in {
 	__u64	nlookup;
 };
 
+struct fuse_forget_one {
+	__u64	nodeid;
+	__u64	nlookup;
+};
+
+struct fuse_batch_forget_in {
+	__u32	count;
+	__u32	dummy;
+};
+
 struct fuse_getattr_in {
 	__u32	getattr_flags;
 	__u32	dummy;
-- 
cgit v1.2.3-71-gd317


From 1baa26b2be92fe9917e2f7ef46d423b5dfa4da71 Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <mszeredi@suse.cz>
Date: Tue, 7 Dec 2010 20:16:56 +0100
Subject: fuse: fix ioctl ABI

In kernel ABI version 7.16 and later FUSE_IOCTL_RETRY reply from a
unrestricted IOCTL request shall return with an array of 'struct
fuse_ioctl_iovec' instead of 'struct iovec'.  This fixes the ABI
ambiguity of 32bit vs. 64bit.

Reported-by: "ccmail111" <ccmail111@yahoo.com>
Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>
CC: Tejun Heo <tj@kernel.org>
---
 fs/fuse/file.c       | 53 +++++++++++++++++++++++++++++++++++++++++++++++-----
 include/linux/fuse.h | 10 ++++++++++
 2 files changed, 58 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index ca3b6bbb3790..95da1bc1c826 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -1634,9 +1634,9 @@ static int fuse_ioctl_copy_user(struct page **pages, struct iovec *iov,
  * and 64bit.  Fortunately we can determine which structure the server
  * used from the size of the reply.
  */
-static int fuse_copy_ioctl_iovec(struct iovec *dst, void *src,
-				 size_t transferred, unsigned count,
-				 bool is_compat)
+static int fuse_copy_ioctl_iovec_old(struct iovec *dst, void *src,
+				     size_t transferred, unsigned count,
+				     bool is_compat)
 {
 #ifdef CONFIG_COMPAT
 	if (count * sizeof(struct compat_iovec) == transferred) {
@@ -1680,6 +1680,42 @@ static int fuse_verify_ioctl_iov(struct iovec *iov, size_t count)
 	return 0;
 }
 
+static int fuse_copy_ioctl_iovec(struct fuse_conn *fc, struct iovec *dst,
+				 void *src, size_t transferred, unsigned count,
+				 bool is_compat)
+{
+	unsigned i;
+	struct fuse_ioctl_iovec *fiov = src;
+
+	if (fc->minor < 16) {
+		return fuse_copy_ioctl_iovec_old(dst, src, transferred,
+						 count, is_compat);
+	}
+
+	if (count * sizeof(struct fuse_ioctl_iovec) != transferred)
+		return -EIO;
+
+	for (i = 0; i < count; i++) {
+		/* Did the server supply an inappropriate value? */
+		if (fiov[i].base != (unsigned long) fiov[i].base ||
+		    fiov[i].len != (unsigned long) fiov[i].len)
+			return -EIO;
+
+		dst[i].iov_base = (void __user *) (unsigned long) fiov[i].base;
+		dst[i].iov_len = (size_t) fiov[i].len;
+
+#ifdef CONFIG_COMPAT
+		if (is_compat &&
+		    (ptr_to_compat(dst[i].iov_base) != fiov[i].base ||
+		     (compat_size_t) dst[i].iov_len != fiov[i].len))
+			return -EIO;
+#endif
+	}
+
+	return 0;
+}
+
+
 /*
  * For ioctls, there is no generic way to determine how much memory
  * needs to be read and/or written.  Furthermore, ioctls are allowed
@@ -1746,8 +1782,15 @@ long fuse_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg,
 	size_t in_size, out_size, transferred;
 	int err;
 
+#if BITS_PER_LONG == 32
+	inarg.flags |= FUSE_IOCTL_32BIT;
+#else
+	if (flags & FUSE_IOCTL_COMPAT)
+		inarg.flags |= FUSE_IOCTL_32BIT;
+#endif
+
 	/* assume all the iovs returned by client always fits in a page */
-	BUILD_BUG_ON(sizeof(struct iovec) * FUSE_IOCTL_MAX_IOV > PAGE_SIZE);
+	BUILD_BUG_ON(sizeof(struct fuse_ioctl_iovec) * FUSE_IOCTL_MAX_IOV > PAGE_SIZE);
 
 	err = -ENOMEM;
 	pages = kzalloc(sizeof(pages[0]) * FUSE_MAX_PAGES_PER_REQ, GFP_KERNEL);
@@ -1862,7 +1905,7 @@ long fuse_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg,
 			goto out;
 
 		vaddr = kmap_atomic(pages[0], KM_USER0);
-		err = fuse_copy_ioctl_iovec(iov_page, vaddr,
+		err = fuse_copy_ioctl_iovec(fc, iov_page, vaddr,
 					    transferred, in_iovs + out_iovs,
 					    (flags & FUSE_IOCTL_COMPAT) != 0);
 		kunmap_atomic(vaddr, KM_USER0);
diff --git a/include/linux/fuse.h b/include/linux/fuse.h
index cf11881f4938..d464de53db43 100644
--- a/include/linux/fuse.h
+++ b/include/linux/fuse.h
@@ -44,6 +44,9 @@
  *
  * 7.16
  *  - add BATCH_FORGET request
+ *  - FUSE_IOCTL_UNRESTRICTED shall now return with array of 'struct
+ *    fuse_ioctl_iovec' instead of ambiguous 'struct iovec'
+ *  - add FUSE_IOCTL_32BIT flag
  */
 
 #ifndef _LINUX_FUSE_H
@@ -203,12 +206,14 @@ struct fuse_file_lock {
  * FUSE_IOCTL_COMPAT: 32bit compat ioctl on 64bit machine
  * FUSE_IOCTL_UNRESTRICTED: not restricted to well-formed ioctls, retry allowed
  * FUSE_IOCTL_RETRY: retry with new iovecs
+ * FUSE_IOCTL_32BIT: 32bit ioctl
  *
  * FUSE_IOCTL_MAX_IOV: maximum of in_iovecs + out_iovecs
  */
 #define FUSE_IOCTL_COMPAT	(1 << 0)
 #define FUSE_IOCTL_UNRESTRICTED	(1 << 1)
 #define FUSE_IOCTL_RETRY	(1 << 2)
+#define FUSE_IOCTL_32BIT	(1 << 3)
 
 #define FUSE_IOCTL_MAX_IOV	256
 
@@ -524,6 +529,11 @@ struct fuse_ioctl_in {
 	__u32	out_size;
 };
 
+struct fuse_ioctl_iovec {
+	__u64	base;
+	__u64	len;
+};
+
 struct fuse_ioctl_out {
 	__s32	result;
 	__u32	flags;
-- 
cgit v1.2.3-71-gd317


From 541a45a142df281c974d74eac2066138fc107b23 Mon Sep 17 00:00:00 2001
From: Bruno Randolf <br1@einfach.org>
Date: Thu, 2 Dec 2010 19:12:43 +0900
Subject: nl80211/mac80211: Report signal average

Extend nl80211 to report an exponential weighted moving average (EWMA) of the
signal value. Since the signal value usually fluctuates between different
packets, an average can be more useful than the value of the last packet.

This uses the recently added generic EWMA library function.

--
v2:	fix ABI breakage and change factor to be a power of 2.

Signed-off-by: Bruno Randolf <br1@einfach.org>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/nl80211.h | 2 ++
 include/net/cfg80211.h  | 4 ++++
 net/mac80211/Kconfig    | 1 +
 net/mac80211/cfg.c      | 3 ++-
 net/mac80211/rx.c       | 1 +
 net/mac80211/sta_info.c | 2 ++
 net/mac80211/sta_info.h | 3 +++
 net/wireless/nl80211.c  | 3 +++
 8 files changed, 18 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h
index 410a06ea551b..8e28053ea423 100644
--- a/include/linux/nl80211.h
+++ b/include/linux/nl80211.h
@@ -1191,6 +1191,7 @@ enum nl80211_rate_info {
  *	station)
  * @NL80211_STA_INFO_TX_RETRIES: total retries (u32, to this station)
  * @NL80211_STA_INFO_TX_FAILED: total failed packets (u32, to this station)
+ * @NL80211_STA_INFO_SIGNAL_AVG: signal strength average (u8, dBm)
  */
 enum nl80211_sta_info {
 	__NL80211_STA_INFO_INVALID,
@@ -1206,6 +1207,7 @@ enum nl80211_sta_info {
 	NL80211_STA_INFO_TX_PACKETS,
 	NL80211_STA_INFO_TX_RETRIES,
 	NL80211_STA_INFO_TX_FAILED,
+	NL80211_STA_INFO_SIGNAL_AVG,
 
 	/* keep last */
 	__NL80211_STA_INFO_AFTER_LAST,
diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 788c3989a9e8..8764c9a5bab7 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -420,6 +420,7 @@ struct station_parameters {
  * @STATION_INFO_TX_RETRIES: @tx_retries filled
  * @STATION_INFO_TX_FAILED: @tx_failed filled
  * @STATION_INFO_RX_DROP_MISC: @rx_dropped_misc filled
+ * @STATION_INFO_SIGNAL_AVG: @signal_avg filled
  */
 enum station_info_flags {
 	STATION_INFO_INACTIVE_TIME	= 1<<0,
@@ -435,6 +436,7 @@ enum station_info_flags {
 	STATION_INFO_TX_RETRIES		= 1<<10,
 	STATION_INFO_TX_FAILED		= 1<<11,
 	STATION_INFO_RX_DROP_MISC	= 1<<12,
+	STATION_INFO_SIGNAL_AVG		= 1<<13,
 };
 
 /**
@@ -481,6 +483,7 @@ struct rate_info {
  * @plid: mesh peer link id
  * @plink_state: mesh peer link state
  * @signal: signal strength of last received packet in dBm
+ * @signal_avg: signal strength average in dBm
  * @txrate: current unicast bitrate to this station
  * @rx_packets: packets received from this station
  * @tx_packets: packets transmitted to this station
@@ -501,6 +504,7 @@ struct station_info {
 	u16 plid;
 	u8 plink_state;
 	s8 signal;
+	s8 signal_avg;
 	struct rate_info txrate;
 	u32 rx_packets;
 	u32 tx_packets;
diff --git a/net/mac80211/Kconfig b/net/mac80211/Kconfig
index 4d6f8653ec88..798d9b9462e2 100644
--- a/net/mac80211/Kconfig
+++ b/net/mac80211/Kconfig
@@ -6,6 +6,7 @@ config MAC80211
 	select CRYPTO_ARC4
 	select CRYPTO_AES
 	select CRC32
+	select AVERAGE
 	---help---
 	  This option enables the hardware independent IEEE 802.11
 	  networking stack.
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index 68329d713c02..af9620406321 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -342,8 +342,9 @@ static void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo)
 
 	if ((sta->local->hw.flags & IEEE80211_HW_SIGNAL_DBM) ||
 	    (sta->local->hw.flags & IEEE80211_HW_SIGNAL_UNSPEC)) {
-		sinfo->filled |= STATION_INFO_SIGNAL;
+		sinfo->filled |= STATION_INFO_SIGNAL | STATION_INFO_SIGNAL_AVG;
 		sinfo->signal = (s8)sta->last_signal;
+		sinfo->signal_avg = (s8) -ewma_read(&sta->avg_signal);
 	}
 
 	sinfo->txrate.flags = 0;
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 6289525c0998..2fe8f5f86499 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -1163,6 +1163,7 @@ ieee80211_rx_h_sta_process(struct ieee80211_rx_data *rx)
 	sta->rx_fragments++;
 	sta->rx_bytes += rx->skb->len;
 	sta->last_signal = status->signal;
+	ewma_add(&sta->avg_signal, -status->signal);
 
 	/*
 	 * Change STA power saving mode only at the end of a frame
diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c
index eff58571fd7e..c426504ed1cf 100644
--- a/net/mac80211/sta_info.c
+++ b/net/mac80211/sta_info.c
@@ -244,6 +244,8 @@ struct sta_info *sta_info_alloc(struct ieee80211_sub_if_data *sdata,
 	sta->local = local;
 	sta->sdata = sdata;
 
+	ewma_init(&sta->avg_signal, 1024, 8);
+
 	if (sta_prepare_rate_control(local, sta, gfp)) {
 		kfree(sta);
 		return NULL;
diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h
index 05f11302443b..fdca52cf88de 100644
--- a/net/mac80211/sta_info.h
+++ b/net/mac80211/sta_info.h
@@ -13,6 +13,7 @@
 #include <linux/types.h>
 #include <linux/if_ether.h>
 #include <linux/workqueue.h>
+#include <linux/average.h>
 #include "key.h"
 
 /**
@@ -223,6 +224,7 @@ enum plink_state {
  * @rx_fragments: number of received MPDUs
  * @rx_dropped: number of dropped MPDUs from this STA
  * @last_signal: signal of last received frame from this STA
+ * @avg_signal: moving average of signal of received frames from this STA
  * @last_seq_ctrl: last received seq/frag number from this STA (per RX queue)
  * @tx_filtered_count: number of frames the hardware filtered for this STA
  * @tx_retry_failed: number of frames that failed retry
@@ -291,6 +293,7 @@ struct sta_info {
 	unsigned long rx_fragments;
 	unsigned long rx_dropped;
 	int last_signal;
+	struct ewma avg_signal;
 	__le16 last_seq_ctrl[NUM_RX_DATA_QUEUES];
 
 	/* Updated from TX status path only, no locking requirements */
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 56508d40c740..2cf03331d4a2 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -1896,6 +1896,9 @@ static int nl80211_send_station(struct sk_buff *msg, u32 pid, u32 seq,
 	if (sinfo->filled & STATION_INFO_SIGNAL)
 		NLA_PUT_U8(msg, NL80211_STA_INFO_SIGNAL,
 			   sinfo->signal);
+	if (sinfo->filled & STATION_INFO_SIGNAL_AVG)
+		NLA_PUT_U8(msg, NL80211_STA_INFO_SIGNAL_AVG,
+			   sinfo->signal_avg);
 	if (sinfo->filled & STATION_INFO_TX_BITRATE) {
 		txrate = nla_nest_start(msg, NL80211_STA_INFO_TX_BITRATE);
 		if (!txrate)
-- 
cgit v1.2.3-71-gd317


From 5c6d1125f8dbd1bfef39e38fbc2837003be78a59 Mon Sep 17 00:00:00 2001
From: Jarkko Sakkinen <ext-jarkko.2.sakkinen@nokia.com>
Date: Tue, 7 Dec 2010 13:34:01 +0200
Subject: Smack: Transmute labels on specified directories

In a situation where Smack access rules allow processes
with multiple labels to write to a directory it is easy
to get into a situation where the directory gets cluttered
with files that the owner can't deal with because while
they could be written to the directory a process at the
label of the directory can't write them. This is generally
the desired behavior, but when it isn't it is a real
issue.

This patch introduces a new attribute SMACK64TRANSMUTE that
instructs Smack to create the file with the label of the directory
under certain circumstances.

A new access mode, "t" for transmute, is made available to
Smack access rules, which are expanded from "rwxa" to "rwxat".
If a file is created in a directory marked as transmutable
and if access was granted to perform the operation by a rule
that included the transmute mode, then the file gets the
Smack label of the directory instead of the Smack label of the
creating process.

Note that this is equivalent to creating an empty file at the
label of the directory and then having the other process write
to it. The transmute scheme requires that both the access rule
allows transmutation and that the directory be explicitly marked.

Signed-off-by: Jarkko Sakkinen <ext-jarkko.2.sakkinen@nokia.com>
Signed-off-by: Casey Schaufler <casey@schaufler-ca.com>
---
 include/linux/xattr.h         |  2 ++
 security/smack/smack.h        | 17 +++++++++-
 security/smack/smack_access.c | 54 +++++++++++++++++++++++--------
 security/smack/smack_lsm.c    | 74 +++++++++++++++++++++++++++++--------------
 security/smack/smackfs.c      | 37 +++++++++++++++++++---
 5 files changed, 141 insertions(+), 43 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/xattr.h b/include/linux/xattr.h
index 351c7901d74a..e6131ef98d8f 100644
--- a/include/linux/xattr.h
+++ b/include/linux/xattr.h
@@ -41,10 +41,12 @@
 #define XATTR_SMACK_IPIN "SMACK64IPIN"
 #define XATTR_SMACK_IPOUT "SMACK64IPOUT"
 #define XATTR_SMACK_EXEC "SMACK64EXEC"
+#define XATTR_SMACK_TRANSMUTE "SMACK64TRANSMUTE"
 #define XATTR_NAME_SMACK XATTR_SECURITY_PREFIX XATTR_SMACK_SUFFIX
 #define XATTR_NAME_SMACKIPIN	XATTR_SECURITY_PREFIX XATTR_SMACK_IPIN
 #define XATTR_NAME_SMACKIPOUT	XATTR_SECURITY_PREFIX XATTR_SMACK_IPOUT
 #define XATTR_NAME_SMACKEXEC	XATTR_SECURITY_PREFIX XATTR_SMACK_EXEC
+#define XATTR_NAME_SMACKTRANSMUTE XATTR_SECURITY_PREFIX XATTR_SMACK_TRANSMUTE
 
 #define XATTR_CAPS_SUFFIX "capability"
 #define XATTR_NAME_CAPS XATTR_SECURITY_PREFIX XATTR_CAPS_SUFFIX
diff --git a/security/smack/smack.h b/security/smack/smack.h
index a2e2cdfab4ef..129c4eb8ffb1 100644
--- a/security/smack/smack.h
+++ b/security/smack/smack.h
@@ -62,6 +62,7 @@ struct task_smack {
 };
 
 #define	SMK_INODE_INSTANT	0x01	/* inode is instantiated */
+#define	SMK_INODE_TRANSMUTE	0x02	/* directory is transmuting */
 
 /*
  * A label access rule.
@@ -166,6 +167,10 @@ struct smack_known {
 #define SMACK_CIPSO_MAXLEVEL            255     /* CIPSO 2.2 standard */
 #define SMACK_CIPSO_MAXCATNUM           239     /* CIPSO 2.2 standard */
 
+/*
+ * Flag for transmute access
+ */
+#define MAY_TRANSMUTE	64
 /*
  * Just to make the common cases easier to deal with
  */
@@ -197,6 +202,7 @@ struct inode_smack *new_inode_smack(char *);
 /*
  * These functions are in smack_access.c
  */
+int smk_access_entry(char *, char *);
 int smk_access(char *, char *, int, struct smk_audit_info *);
 int smk_curacc(char *, u32, struct smk_audit_info *);
 int smack_to_cipso(const char *, struct smack_cipso *);
@@ -239,6 +245,15 @@ static inline void smack_catset_bit(int cat, char *catsetp)
 	catsetp[(cat - 1) / 8] |= 0x80 >> ((cat - 1) % 8);
 }
 
+/*
+ * Is the directory transmuting?
+ */
+static inline int smk_inode_transmutable(const struct inode *isp)
+{
+	struct inode_smack *sip = isp->i_security;
+	return (sip->smk_flags & SMK_INODE_TRANSMUTE) != 0;
+}
+
 /*
  * Present a pointer to the smack label in an inode blob.
  */
@@ -265,7 +280,7 @@ static inline char *smk_of_forked(const struct task_smack *tsp)
 }
 
 /*
- * Present a pointer to the smack label in the curren task blob.
+ * Present a pointer to the smack label in the current task blob.
  */
 static inline char *smk_of_current(void)
 {
diff --git a/security/smack/smack_access.c b/security/smack/smack_access.c
index 42becbc1ce33..7ba8478f599e 100644
--- a/security/smack/smack_access.c
+++ b/security/smack/smack_access.c
@@ -66,6 +66,46 @@ static u32 smack_next_secid = 10;
  */
 int log_policy = SMACK_AUDIT_DENIED;
 
+/**
+ * smk_access_entry - look up matching access rule
+ * @subject_label: a pointer to the subject's Smack label
+ * @object_label: a pointer to the object's Smack label
+ *
+ * This function looks up the subject/object pair in the
+ * access rule list and returns pointer to the matching rule if found,
+ * NULL otherwise.
+ *
+ * NOTE:
+ * Even though Smack labels are usually shared on smack_list
+ * labels that come in off the network can't be imported
+ * and added to the list for locking reasons.
+ *
+ * Therefore, it is necessary to check the contents of the labels,
+ * not just the pointer values. Of course, in most cases the labels
+ * will be on the list, so checking the pointers may be a worthwhile
+ * optimization.
+ */
+int smk_access_entry(char *subject_label, char *object_label)
+{
+	u32 may = MAY_NOT;
+	struct smack_rule *srp;
+
+	rcu_read_lock();
+	list_for_each_entry_rcu(srp, &smack_rule_list, list) {
+		if (srp->smk_subject == subject_label ||
+		    strcmp(srp->smk_subject, subject_label) == 0) {
+			if (srp->smk_object == object_label ||
+			    strcmp(srp->smk_object, object_label) == 0) {
+				may = srp->smk_access;
+				break;
+			}
+		}
+	}
+	rcu_read_unlock();
+
+	return may;
+}
+
 /**
  * smk_access - determine if a subject has a specific access to an object
  * @subject_label: a pointer to the subject's Smack label
@@ -90,7 +130,6 @@ int smk_access(char *subject_label, char *object_label, int request,
 	       struct smk_audit_info *a)
 {
 	u32 may = MAY_NOT;
-	struct smack_rule *srp;
 	int rc = 0;
 
 	/*
@@ -144,18 +183,7 @@ int smk_access(char *subject_label, char *object_label, int request,
 	 * access (e.g. read is included in readwrite) it's
 	 * good.
 	 */
-	rcu_read_lock();
-	list_for_each_entry_rcu(srp, &smack_rule_list, list) {
-		if (srp->smk_subject == subject_label ||
-		    strcmp(srp->smk_subject, subject_label) == 0) {
-			if (srp->smk_object == object_label ||
-			    strcmp(srp->smk_object, object_label) == 0) {
-				may = srp->smk_access;
-				break;
-			}
-		}
-	}
-	rcu_read_unlock();
+	may = smk_access_entry(subject_label, object_label);
 	/*
 	 * This is a bit map operation.
 	 */
diff --git a/security/smack/smack_lsm.c b/security/smack/smack_lsm.c
index 7e19afe0e738..05dc4da2e25f 100644
--- a/security/smack/smack_lsm.c
+++ b/security/smack/smack_lsm.c
@@ -3,12 +3,14 @@
  *
  *  This file contains the smack hook function implementations.
  *
- *  Author:
+ *  Authors:
  *	Casey Schaufler <casey@schaufler-ca.com>
+ *	Jarkko Sakkinen <ext-jarkko.2.sakkinen@nokia.com>
  *
  *  Copyright (C) 2007 Casey Schaufler <casey@schaufler-ca.com>
  *  Copyright (C) 2009 Hewlett-Packard Development Company, L.P.
  *                Paul Moore <paul.moore@hp.com>
+ *  Copyright (C) 2010 Nokia Corporation
  *
  *	This program is free software; you can redistribute it and/or modify
  *	it under the terms of the GNU General Public License version 2,
@@ -35,6 +37,9 @@
 
 #define task_security(task)	(task_cred_xxx((task), security))
 
+#define TRANS_TRUE	"TRUE"
+#define TRANS_TRUE_SIZE	4
+
 /**
  * smk_fetch - Fetch the smack label from a file.
  * @ip: a pointer to the inode
@@ -468,6 +473,8 @@ static int smack_inode_init_security(struct inode *inode, struct inode *dir,
 				     char **name, void **value, size_t *len)
 {
 	char *isp = smk_of_inode(inode);
+	char *dsp = smk_of_inode(dir);
+	u32 may;
 
 	if (name) {
 		*name = kstrdup(XATTR_SMACK_SUFFIX, GFP_KERNEL);
@@ -476,6 +483,16 @@ static int smack_inode_init_security(struct inode *inode, struct inode *dir,
 	}
 
 	if (value) {
+		may = smk_access_entry(smk_of_current(), dsp);
+
+		/*
+		 * If the access rule allows transmutation and
+		 * the directory requests transmutation then
+		 * by all means transmute.
+		 */
+		if (((may & MAY_TRANSMUTE) != 0) && smk_inode_transmutable(dir))
+			isp = dsp;
+
 		*value = kstrdup(isp, GFP_KERNEL);
 		if (*value == NULL)
 			return -ENOMEM;
@@ -709,6 +726,12 @@ static int smack_inode_setxattr(struct dentry *dentry, const char *name,
 		if (size == 0 || size >= SMK_LABELLEN ||
 		    smk_import(value, size) == NULL)
 			rc = -EINVAL;
+	} else if (strcmp(name, XATTR_NAME_SMACKTRANSMUTE) == 0) {
+		if (!capable(CAP_MAC_ADMIN))
+			rc = -EPERM;
+		if (size != TRANS_TRUE_SIZE ||
+		    strncmp(value, TRANS_TRUE, TRANS_TRUE_SIZE) != 0)
+			rc = -EINVAL;
 	} else
 		rc = cap_inode_setxattr(dentry, name, value, size, flags);
 
@@ -735,35 +758,23 @@ static int smack_inode_setxattr(struct dentry *dentry, const char *name,
 static void smack_inode_post_setxattr(struct dentry *dentry, const char *name,
 				      const void *value, size_t size, int flags)
 {
-	struct inode_smack *isp;
 	char *nsp;
-
-	/*
-	 * Not SMACK or SMACKEXEC
-	 */
-	if (strcmp(name, XATTR_NAME_SMACK) &&
-	    strcmp(name, XATTR_NAME_SMACKEXEC))
-		return;
-
-	isp = dentry->d_inode->i_security;
-
-	/*
-	 * No locking is done here. This is a pointer
-	 * assignment.
-	 */
-	nsp = smk_import(value, size);
+	struct inode_smack *isp = dentry->d_inode->i_security;
 
 	if (strcmp(name, XATTR_NAME_SMACK) == 0) {
+		nsp = smk_import(value, size);
 		if (nsp != NULL)
 			isp->smk_inode = nsp;
 		else
 			isp->smk_inode = smack_known_invalid.smk_known;
-	} else {
+	} else if (strcmp(name, XATTR_NAME_SMACKEXEC) == 0) {
+		nsp = smk_import(value, size);
 		if (nsp != NULL)
 			isp->smk_task = nsp;
 		else
 			isp->smk_task = smack_known_invalid.smk_known;
-	}
+	} else if (strcmp(name, XATTR_NAME_SMACKTRANSMUTE) == 0)
+		isp->smk_flags |= SMK_INODE_TRANSMUTE;
 
 	return;
 }
@@ -803,7 +814,8 @@ static int smack_inode_removexattr(struct dentry *dentry, const char *name)
 	if (strcmp(name, XATTR_NAME_SMACK) == 0 ||
 	    strcmp(name, XATTR_NAME_SMACKIPIN) == 0 ||
 	    strcmp(name, XATTR_NAME_SMACKIPOUT) == 0 ||
-	    strcmp(name, XATTR_NAME_SMACKEXEC) == 0) {
+	    strcmp(name, XATTR_NAME_SMACKEXEC) == 0 ||
+	    strcmp(name, XATTR_NAME_SMACKTRANSMUTE) == 0) {
 		if (!capable(CAP_MAC_ADMIN))
 			rc = -EPERM;
 	} else
@@ -2274,6 +2286,8 @@ static void smack_d_instantiate(struct dentry *opt_dentry, struct inode *inode)
 	char *csp = smk_of_current();
 	char *fetched;
 	char *final;
+	char trattr[TRANS_TRUE_SIZE];
+	int transflag = 0;
 	struct dentry *dp;
 
 	if (inode == NULL)
@@ -2392,10 +2406,19 @@ static void smack_d_instantiate(struct dentry *opt_dentry, struct inode *inode)
 		 */
 		dp = dget(opt_dentry);
 		fetched = smk_fetch(XATTR_NAME_SMACK, inode, dp);
-		if (fetched != NULL)
+		if (fetched != NULL) {
 			final = fetched;
-		isp->smk_task = smk_fetch(XATTR_NAME_SMACKEXEC, inode,
-					  dp);
+			if (S_ISDIR(inode->i_mode)) {
+				trattr[0] = '\0';
+				inode->i_op->getxattr(dp,
+					XATTR_NAME_SMACKTRANSMUTE,
+					trattr, TRANS_TRUE_SIZE);
+				if (strncmp(trattr, TRANS_TRUE,
+					    TRANS_TRUE_SIZE) == 0)
+					transflag = SMK_INODE_TRANSMUTE;
+			}
+		}
+		isp->smk_task = smk_fetch(XATTR_NAME_SMACKEXEC, inode, dp);
 
 		dput(dp);
 		break;
@@ -2406,7 +2429,7 @@ static void smack_d_instantiate(struct dentry *opt_dentry, struct inode *inode)
 	else
 		isp->smk_inode = final;
 
-	isp->smk_flags |= SMK_INODE_INSTANT;
+	isp->smk_flags |= (SMK_INODE_INSTANT | transflag);
 
 unlockandout:
 	mutex_unlock(&isp->smk_lock);
@@ -2456,6 +2479,7 @@ static int smack_setprocattr(struct task_struct *p, char *name,
 			     void *value, size_t size)
 {
 	struct task_smack *tsp;
+	struct task_smack *oldtsp;
 	struct cred *new;
 	char *newsmack;
 
@@ -2485,6 +2509,7 @@ static int smack_setprocattr(struct task_struct *p, char *name,
 	if (newsmack == smack_known_web.smk_known)
 		return -EPERM;
 
+	oldtsp = p->cred->security;
 	new = prepare_creds();
 	if (new == NULL)
 		return -ENOMEM;
@@ -2494,6 +2519,7 @@ static int smack_setprocattr(struct task_struct *p, char *name,
 		return -ENOMEM;
 	}
 	tsp->smk_task = newsmack;
+	tsp->smk_forked = oldtsp->smk_forked;
 	new->security = tsp;
 	commit_creds(new);
 	return size;
diff --git a/security/smack/smackfs.c b/security/smack/smackfs.c
index 01a0be93d8d0..362d5eda948b 100644
--- a/security/smack/smackfs.c
+++ b/security/smack/smackfs.c
@@ -109,9 +109,12 @@ const char *smack_cipso_option = SMACK_CIPSO_OPTION;
  * SMK_ACCESSLEN: Maximum length for a rule access field
  * SMK_LOADLEN: Smack rule length
  */
-#define SMK_ACCESS    "rwxa"
-#define SMK_ACCESSLEN (sizeof(SMK_ACCESS) - 1)
-#define SMK_LOADLEN   (SMK_LABELLEN + SMK_LABELLEN + SMK_ACCESSLEN)
+#define SMK_OACCESS	"rwxa"
+#define SMK_ACCESS	"rwxat"
+#define SMK_OACCESSLEN	(sizeof(SMK_OACCESS) - 1)
+#define SMK_ACCESSLEN	(sizeof(SMK_ACCESS) - 1)
+#define SMK_OLOADLEN	(SMK_LABELLEN + SMK_LABELLEN + SMK_OACCESSLEN)
+#define SMK_LOADLEN	(SMK_LABELLEN + SMK_LABELLEN + SMK_ACCESSLEN)
 
 /**
  * smk_netlabel_audit_set - fill a netlbl_audit struct
@@ -175,6 +178,8 @@ static int load_seq_show(struct seq_file *s, void *v)
 		seq_putc(s, 'x');
 	if (srp->smk_access & MAY_APPEND)
 		seq_putc(s, 'a');
+	if (srp->smk_access & MAY_TRANSMUTE)
+		seq_putc(s, 't');
 	if (srp->smk_access == 0)
 		seq_putc(s, '-');
 
@@ -273,10 +278,15 @@ static ssize_t smk_write_load(struct file *file, const char __user *buf,
 	if (!capable(CAP_MAC_ADMIN))
 		return -EPERM;
 
-	if (*ppos != 0 || count != SMK_LOADLEN)
+	if (*ppos != 0)
+		return -EINVAL;
+	/*
+	 * Minor hack for backward compatability
+	 */
+	if (count < (SMK_OLOADLEN) || count > SMK_LOADLEN)
 		return -EINVAL;
 
-	data = kzalloc(count, GFP_KERNEL);
+	data = kzalloc(SMK_LOADLEN, GFP_KERNEL);
 	if (data == NULL)
 		return -ENOMEM;
 
@@ -285,6 +295,12 @@ static ssize_t smk_write_load(struct file *file, const char __user *buf,
 		goto out;
 	}
 
+	/*
+	 * More on the minor hack for backward compatability
+	 */
+	if (count == (SMK_OLOADLEN))
+		data[SMK_OLOADLEN] = '-';
+
 	rule = kzalloc(sizeof(*rule), GFP_KERNEL);
 	if (rule == NULL) {
 		rc = -ENOMEM;
@@ -345,6 +361,17 @@ static ssize_t smk_write_load(struct file *file, const char __user *buf,
 		goto out_free_rule;
 	}
 
+	switch (data[SMK_LABELLEN + SMK_LABELLEN + 4]) {
+	case '-':
+		break;
+	case 't':
+	case 'T':
+		rule->smk_access |= MAY_TRANSMUTE;
+		break;
+	default:
+		goto out_free_rule;
+	}
+
 	rc = smk_set_access(rule);
 
 	if (!rc)
-- 
cgit v1.2.3-71-gd317


From 5bea7660bba973dc5e8e9d92b11fb1dd5b524ebf Mon Sep 17 00:00:00 2001
From: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Date: Tue, 7 Dec 2010 23:02:48 -0800
Subject: HID: add hid_hw_open/close/power() handlers

Instead of exposing the guts of hid->ll_driver relationship to HID
sub-drivers provide these helpers to encapsulate the details.

Signed-off-by: Dmitry Torokhov <dtor@mail.ru>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 drivers/hid/hid-input.c   |  4 ++--
 drivers/hid/hid-picolcd.c |  6 +++---
 drivers/hid/hid-roccat.c  | 26 ++++++++++----------------
 drivers/hid/hidraw.c      | 21 +++++++++------------
 include/linux/hid.h       | 43 +++++++++++++++++++++++++++++++++++++++++++
 5 files changed, 67 insertions(+), 33 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/hid/hid-input.c b/drivers/hid/hid-input.c
index 834ef47b76d6..b718f7144ce1 100644
--- a/drivers/hid/hid-input.c
+++ b/drivers/hid/hid-input.c
@@ -772,14 +772,14 @@ static int hidinput_open(struct input_dev *dev)
 {
 	struct hid_device *hid = input_get_drvdata(dev);
 
-	return hid->ll_driver->open(hid);
+	return hid_hw_open(hid);
 }
 
 static void hidinput_close(struct input_dev *dev)
 {
 	struct hid_device *hid = input_get_drvdata(dev);
 
-	hid->ll_driver->close(hid);
+	hid_hw_close(hid);
 }
 
 /*
diff --git a/drivers/hid/hid-picolcd.c b/drivers/hid/hid-picolcd.c
index bc2e07740628..38565fee7bf1 100644
--- a/drivers/hid/hid-picolcd.c
+++ b/drivers/hid/hid-picolcd.c
@@ -2635,7 +2635,7 @@ static int picolcd_probe(struct hid_device *hdev,
 		goto err_cleanup_data;
 	}
 
-	error = hdev->ll_driver->open(hdev);
+	error = hid_hw_open(hdev);
 	if (error) {
 		dev_err(&hdev->dev, "failed to open input interrupt pipe for key and IR events\n");
 		goto err_cleanup_hid_hw;
@@ -2668,7 +2668,7 @@ err_cleanup_sysfs2:
 err_cleanup_sysfs1:
 	device_remove_file(&hdev->dev, &dev_attr_operation_mode_delay);
 err_cleanup_hid_ll:
-	hdev->ll_driver->close(hdev);
+	hid_hw_close(hdev);
 err_cleanup_hid_hw:
 	hid_hw_stop(hdev);
 err_cleanup_data:
@@ -2699,7 +2699,7 @@ static void picolcd_remove(struct hid_device *hdev)
 	picolcd_exit_devfs(data);
 	device_remove_file(&hdev->dev, &dev_attr_operation_mode);
 	device_remove_file(&hdev->dev, &dev_attr_operation_mode_delay);
-	hdev->ll_driver->close(hdev);
+	hid_hw_close(hdev);
 	hid_hw_stop(hdev);
 	hid_set_drvdata(hdev, NULL);
 
diff --git a/drivers/hid/hid-roccat.c b/drivers/hid/hid-roccat.c
index 5a6879e235ac..a9d9b29ff47f 100644
--- a/drivers/hid/hid-roccat.c
+++ b/drivers/hid/hid-roccat.c
@@ -173,19 +173,15 @@ static int roccat_open(struct inode *inode, struct file *file)
 
 	if (!device->open++) {
 		/* power on device on adding first reader */
-		if (device->hid->ll_driver->power) {
-			error = device->hid->ll_driver->power(device->hid,
-					PM_HINT_FULLON);
-			if (error < 0) {
-				--device->open;
-				goto exit_err;
-			}
+		error = hid_hw_power(device->hid, PM_HINT_FULLON);
+		if (error < 0) {
+			--device->open;
+			goto exit_err;
 		}
-		error = device->hid->ll_driver->open(device->hid);
+
+		error = hid_hw_open(device->hid);
 		if (error < 0) {
-			if (device->hid->ll_driver->power)
-				device->hid->ll_driver->power(device->hid,
-						PM_HINT_NORMAL);
+			hid_hw_power(device->hid, PM_HINT_NORMAL);
 			--device->open;
 			goto exit_err;
 		}
@@ -231,10 +227,8 @@ static int roccat_release(struct inode *inode, struct file *file)
 	if (!--device->open) {
 		/* removing last reader */
 		if (device->exist) {
-			if (device->hid->ll_driver->power)
-				device->hid->ll_driver->power(device->hid,
-						PM_HINT_NORMAL);
-			device->hid->ll_driver->close(device->hid);
+			hid_hw_power(device->hid, PM_HINT_NORMAL);
+			hid_hw_close(device->hid);
 		} else {
 			kfree(device);
 		}
@@ -370,7 +364,7 @@ void roccat_disconnect(int minor)
 	device_destroy(roccat_class, MKDEV(roccat_major, minor));
 
 	if (device->open) {
-		device->hid->ll_driver->close(device->hid);
+		hid_hw_close(device->hid);
 		wake_up_interruptible(&device->wait);
 	} else {
 		kfree(device);
diff --git a/drivers/hid/hidraw.c b/drivers/hid/hidraw.c
index 8a4b32dca9f7..5aefe73cf795 100644
--- a/drivers/hid/hidraw.c
+++ b/drivers/hid/hidraw.c
@@ -193,15 +193,13 @@ static int hidraw_open(struct inode *inode, struct file *file)
 
 	dev = hidraw_table[minor];
 	if (!dev->open++) {
-		if (dev->hid->ll_driver->power) {
-			err = dev->hid->ll_driver->power(dev->hid, PM_HINT_FULLON);
-			if (err < 0)
-				goto out_unlock;
-		}
-		err = dev->hid->ll_driver->open(dev->hid);
+		err = hid_hw_power(dev->hid, PM_HINT_FULLON);
+		if (err < 0)
+			goto out_unlock;
+
+		err = hid_hw_open(dev->hid);
 		if (err < 0) {
-			if (dev->hid->ll_driver->power)
-				dev->hid->ll_driver->power(dev->hid, PM_HINT_NORMAL);
+			hid_hw_power(dev->hid, PM_HINT_NORMAL);
 			dev->open--;
 		}
 	}
@@ -230,9 +228,8 @@ static int hidraw_release(struct inode * inode, struct file * file)
 	dev = hidraw_table[minor];
 	if (!--dev->open) {
 		if (list->hidraw->exist) {
-			if (dev->hid->ll_driver->power)
-				dev->hid->ll_driver->power(dev->hid, PM_HINT_NORMAL);
-			dev->hid->ll_driver->close(dev->hid);
+			hid_hw_power(dev->hid, PM_HINT_NORMAL);
+			hid_hw_close(dev->hid);
 		} else {
 			kfree(list->hidraw);
 		}
@@ -434,7 +431,7 @@ void hidraw_disconnect(struct hid_device *hid)
 	device_destroy(hidraw_class, MKDEV(hidraw_major, hidraw->minor));
 
 	if (hidraw->open) {
-		hid->ll_driver->close(hid);
+		hid_hw_close(hid);
 		wake_up_interruptible(&hidraw->wait);
 	} else {
 		kfree(hidraw);
diff --git a/include/linux/hid.h b/include/linux/hid.h
index bb0f56f5c01e..e2af195d8b46 100644
--- a/include/linux/hid.h
+++ b/include/linux/hid.h
@@ -820,6 +820,49 @@ static inline void hid_hw_stop(struct hid_device *hdev)
 	hdev->ll_driver->stop(hdev);
 }
 
+/**
+ * hid_hw_open - signal underlaying HW to start delivering events
+ *
+ * @hdev: hid device
+ *
+ * Tell underlying HW to start delivering events from the device.
+ * This function should be called sometime after successful call
+ * to hid_hiw_start().
+ */
+static inline int __must_check hid_hw_open(struct hid_device *hdev)
+{
+	return hdev->ll_driver->open(hdev);
+}
+
+/**
+ * hid_hw_close - signal underlaying HW to stop delivering events
+ *
+ * @hdev: hid device
+ *
+ * This function indicates that we are not interested in the events
+ * from this device anymore. Delivery of events may or may not stop,
+ * depending on the number of users still outstanding.
+ */
+static inline void hid_hw_close(struct hid_device *hdev)
+{
+	hdev->ll_driver->close(hdev);
+}
+
+/**
+ * hid_hw_power - requests underlying HW to go into given power mode
+ *
+ * @hdev: hid device
+ * @level: requested power level (one of %PM_HINT_* defines)
+ *
+ * This function requests underlying hardware to enter requested power
+ * mode.
+ */
+
+static inline int hid_hw_power(struct hid_device *hdev, int level)
+{
+	return hdev->ll_driver->power ? hdev->ll_driver->power(hdev, level) : 0;
+}
+
 void hid_report_raw_event(struct hid_device *hid, int type, u8 *data, int size,
 		int interrupt);
 
-- 
cgit v1.2.3-71-gd317


From 941666c2e3e0f9f6a1cb5808d02352d445bd702c Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Sun, 5 Dec 2010 01:23:53 +0000
Subject: net: RCU conversion of dev_getbyhwaddr() and arp_ioctl()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Le dimanche 05 décembre 2010 à 09:19 +0100, Eric Dumazet a écrit :

> Hmm..
>
> If somebody can explain why RTNL is held in arp_ioctl() (and therefore
> in arp_req_delete()), we might first remove RTNL use in arp_ioctl() so
> that your patch can be applied.
>
> Right now it is not good, because RTNL wont be necessarly held when you
> are going to call arp_invalidate() ?

While doing this analysis, I found a refcount bug in llc, I'll send a
patch for net-2.6

Meanwhile, here is the patch for net-next-2.6

Your patch then can be applied after mine.

Thanks

[PATCH] net: RCU conversion of dev_getbyhwaddr() and arp_ioctl()

dev_getbyhwaddr() was called under RTNL.

Rename it to dev_getbyhwaddr_rcu() and change all its caller to now use
RCU locking instead of RTNL.

Change arp_ioctl() to use RCU instead of RTNL locking.

Note: this fix a dev refcount bug in llc

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h      |  3 ++-
 net/core/dev.c                 | 17 +++++++----------
 net/ieee802154/af_ieee802154.c |  6 +++---
 net/ipv4/arp.c                 | 17 +++++++++--------
 net/llc/af_llc.c               | 11 ++++++-----
 5 files changed, 27 insertions(+), 27 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index a9ac5dc26e3c..d31bc3c94717 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1360,7 +1360,8 @@ static inline struct net_device *first_net_device(struct net *net)
 
 extern int 			netdev_boot_setup_check(struct net_device *dev);
 extern unsigned long		netdev_boot_base(const char *prefix, int unit);
-extern struct net_device    *dev_getbyhwaddr(struct net *net, unsigned short type, char *hwaddr);
+extern struct net_device *dev_getbyhwaddr_rcu(struct net *net, unsigned short type,
+					      const char *hwaddr);
 extern struct net_device *dev_getfirstbyhwtype(struct net *net, unsigned short type);
 extern struct net_device *__dev_getfirstbyhwtype(struct net *net, unsigned short type);
 extern void		dev_add_pack(struct packet_type *pt);
diff --git a/net/core/dev.c b/net/core/dev.c
index ee605c0867e7..822b15b8d11c 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -743,34 +743,31 @@ struct net_device *dev_get_by_index(struct net *net, int ifindex)
 EXPORT_SYMBOL(dev_get_by_index);
 
 /**
- *	dev_getbyhwaddr - find a device by its hardware address
+ *	dev_getbyhwaddr_rcu - find a device by its hardware address
  *	@net: the applicable net namespace
  *	@type: media type of device
  *	@ha: hardware address
  *
  *	Search for an interface by MAC address. Returns NULL if the device
- *	is not found or a pointer to the device. The caller must hold the
- *	rtnl semaphore. The returned device has not had its ref count increased
+ *	is not found or a pointer to the device. The caller must hold RCU
+ *	The returned device has not had its ref count increased
  *	and the caller must therefore be careful about locking
  *
- *	BUGS:
- *	If the API was consistent this would be __dev_get_by_hwaddr
  */
 
-struct net_device *dev_getbyhwaddr(struct net *net, unsigned short type, char *ha)
+struct net_device *dev_getbyhwaddr_rcu(struct net *net, unsigned short type,
+				       const char *ha)
 {
 	struct net_device *dev;
 
-	ASSERT_RTNL();
-
-	for_each_netdev(net, dev)
+	for_each_netdev_rcu(net, dev)
 		if (dev->type == type &&
 		    !memcmp(dev->dev_addr, ha, dev->addr_len))
 			return dev;
 
 	return NULL;
 }
-EXPORT_SYMBOL(dev_getbyhwaddr);
+EXPORT_SYMBOL(dev_getbyhwaddr_rcu);
 
 struct net_device *__dev_getfirstbyhwtype(struct net *net, unsigned short type)
 {
diff --git a/net/ieee802154/af_ieee802154.c b/net/ieee802154/af_ieee802154.c
index 93c91b633a56..6df6ecf49708 100644
--- a/net/ieee802154/af_ieee802154.c
+++ b/net/ieee802154/af_ieee802154.c
@@ -52,11 +52,11 @@ struct net_device *ieee802154_get_dev(struct net *net,
 
 	switch (addr->addr_type) {
 	case IEEE802154_ADDR_LONG:
-		rtnl_lock();
-		dev = dev_getbyhwaddr(net, ARPHRD_IEEE802154, addr->hwaddr);
+		rcu_read_lock();
+		dev = dev_getbyhwaddr_rcu(net, ARPHRD_IEEE802154, addr->hwaddr);
 		if (dev)
 			dev_hold(dev);
-		rtnl_unlock();
+		rcu_read_unlock();
 		break;
 	case IEEE802154_ADDR_SHORT:
 		if (addr->pan_id == 0xffff ||
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index 10af759f2630..a2fc7b961dbc 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -1017,13 +1017,14 @@ static int arp_req_set_proxy(struct net *net, struct net_device *dev, int on)
 		IPV4_DEVCONF_ALL(net, PROXY_ARP) = on;
 		return 0;
 	}
-	if (__in_dev_get_rtnl(dev)) {
-		IN_DEV_CONF_SET(__in_dev_get_rtnl(dev), PROXY_ARP, on);
+	if (__in_dev_get_rcu(dev)) {
+		IN_DEV_CONF_SET(__in_dev_get_rcu(dev), PROXY_ARP, on);
 		return 0;
 	}
 	return -ENXIO;
 }
 
+/* must be called with rcu_read_lock() */
 static int arp_req_set_public(struct net *net, struct arpreq *r,
 		struct net_device *dev)
 {
@@ -1033,7 +1034,7 @@ static int arp_req_set_public(struct net *net, struct arpreq *r,
 	if (mask && mask != htonl(0xFFFFFFFF))
 		return -EINVAL;
 	if (!dev && (r->arp_flags & ATF_COM)) {
-		dev = dev_getbyhwaddr(net, r->arp_ha.sa_family,
+		dev = dev_getbyhwaddr_rcu(net, r->arp_ha.sa_family,
 				      r->arp_ha.sa_data);
 		if (!dev)
 			return -ENODEV;
@@ -1225,10 +1226,10 @@ int arp_ioctl(struct net *net, unsigned int cmd, void __user *arg)
 	if (!(r.arp_flags & ATF_NETMASK))
 		((struct sockaddr_in *)&r.arp_netmask)->sin_addr.s_addr =
 							   htonl(0xFFFFFFFFUL);
-	rtnl_lock();
+	rcu_read_lock();
 	if (r.arp_dev[0]) {
 		err = -ENODEV;
-		dev = __dev_get_by_name(net, r.arp_dev);
+		dev = dev_get_by_name_rcu(net, r.arp_dev);
 		if (dev == NULL)
 			goto out;
 
@@ -1252,12 +1253,12 @@ int arp_ioctl(struct net *net, unsigned int cmd, void __user *arg)
 		break;
 	case SIOCGARP:
 		err = arp_req_get(&r, dev);
-		if (!err && copy_to_user(arg, &r, sizeof(r)))
-			err = -EFAULT;
 		break;
 	}
 out:
-	rtnl_unlock();
+	rcu_read_unlock();
+	if (cmd == SIOCGARP && !err && copy_to_user(arg, &r, sizeof(r)))
+		err = -EFAULT;
 	return err;
 }
 
diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c
index 582612998211..dfd3a648a551 100644
--- a/net/llc/af_llc.c
+++ b/net/llc/af_llc.c
@@ -316,9 +316,9 @@ static int llc_ui_bind(struct socket *sock, struct sockaddr *uaddr, int addrlen)
 	if (unlikely(addr->sllc_family != AF_LLC))
 		goto out;
 	rc = -ENODEV;
-	rtnl_lock();
+	rcu_read_lock();
 	if (sk->sk_bound_dev_if) {
-		llc->dev = dev_get_by_index(&init_net, sk->sk_bound_dev_if);
+		llc->dev = dev_get_by_index_rcu(&init_net, sk->sk_bound_dev_if);
 		if (llc->dev) {
 			if (!addr->sllc_arphrd)
 				addr->sllc_arphrd = llc->dev->type;
@@ -329,14 +329,15 @@ static int llc_ui_bind(struct socket *sock, struct sockaddr *uaddr, int addrlen)
 			    !llc_mac_match(addr->sllc_mac,
 					   llc->dev->dev_addr)) {
 				rc = -EINVAL;
-				dev_put(llc->dev);
 				llc->dev = NULL;
 			}
 		}
 	} else
-		llc->dev = dev_getbyhwaddr(&init_net, addr->sllc_arphrd,
+		llc->dev = dev_getbyhwaddr_rcu(&init_net, addr->sllc_arphrd,
 					   addr->sllc_mac);
-	rtnl_unlock();
+	if (llc->dev)
+		dev_hold(llc->dev);
+	rcu_read_unlock();
 	if (!llc->dev)
 		goto out;
 	if (!addr->sllc_sap) {
-- 
cgit v1.2.3-71-gd317


From 62ab0812137ec4f9884dd7de346238841ac03283 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Mon, 6 Dec 2010 20:50:09 +0000
Subject: filter: constify sk_run_filter()

sk_run_filter() doesnt write on skb, change its prototype to reflect
this.

Fix two af_packet comments.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/filter.h  |  2 +-
 net/core/filter.c       |  7 ++++---
 net/core/timestamping.c |  2 +-
 net/packet/af_packet.c  | 31 ++++++++++++++++---------------
 4 files changed, 22 insertions(+), 20 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/filter.h b/include/linux/filter.h
index 5334adaf4072..45266b75409a 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -148,7 +148,7 @@ struct sk_buff;
 struct sock;
 
 extern int sk_filter(struct sock *sk, struct sk_buff *skb);
-extern unsigned int sk_run_filter(struct sk_buff *skb,
+extern unsigned int sk_run_filter(const struct sk_buff *skb,
 				  const struct sock_filter *filter);
 extern int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk);
 extern int sk_detach_filter(struct sock *sk);
diff --git a/net/core/filter.c b/net/core/filter.c
index ac4920a87be5..25500f16a18a 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -88,7 +88,7 @@ enum {
 };
 
 /* No hurry in this branch */
-static void *__load_pointer(struct sk_buff *skb, int k)
+static void *__load_pointer(const struct sk_buff *skb, int k)
 {
 	u8 *ptr = NULL;
 
@@ -102,7 +102,7 @@ static void *__load_pointer(struct sk_buff *skb, int k)
 	return NULL;
 }
 
-static inline void *load_pointer(struct sk_buff *skb, int k,
+static inline void *load_pointer(const struct sk_buff *skb, int k,
 				 unsigned int size, void *buffer)
 {
 	if (k >= 0)
@@ -160,7 +160,8 @@ EXPORT_SYMBOL(sk_filter);
  * and last instruction guaranteed to be a RET, we dont need to check
  * flen. (We used to pass to this function the length of filter)
  */
-unsigned int sk_run_filter(struct sk_buff *skb, const struct sock_filter *fentry)
+unsigned int sk_run_filter(const struct sk_buff *skb,
+			   const struct sock_filter *fentry)
 {
 	void *ptr;
 	u32 A = 0;			/* Accumulator */
diff --git a/net/core/timestamping.c b/net/core/timestamping.c
index dac7ed687f60..b124d28ff1c8 100644
--- a/net/core/timestamping.c
+++ b/net/core/timestamping.c
@@ -26,7 +26,7 @@ static struct sock_filter ptp_filter[] = {
 	PTP_FILTER
 };
 
-static unsigned int classify(struct sk_buff *skb)
+static unsigned int classify(const struct sk_buff *skb)
 {
 	if (likely(skb->dev &&
 		   skb->dev->phydev &&
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index a11c731d2ee4..17eafe5b48c6 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -517,7 +517,8 @@ out_free:
 	return err;
 }
 
-static inline unsigned int run_filter(struct sk_buff *skb, struct sock *sk,
+static inline unsigned int run_filter(const struct sk_buff *skb,
+				      const struct sock *sk,
 				      unsigned int res)
 {
 	struct sk_filter *filter;
@@ -532,15 +533,15 @@ static inline unsigned int run_filter(struct sk_buff *skb, struct sock *sk,
 }
 
 /*
-   This function makes lazy skb cloning in hope that most of packets
-   are discarded by BPF.
-
-   Note tricky part: we DO mangle shared skb! skb->data, skb->len
-   and skb->cb are mangled. It works because (and until) packets
-   falling here are owned by current CPU. Output packets are cloned
-   by dev_queue_xmit_nit(), input packets are processed by net_bh
-   sequencially, so that if we return skb to original state on exit,
-   we will not harm anyone.
+ * This function makes lazy skb cloning in hope that most of packets
+ * are discarded by BPF.
+ *
+ * Note tricky part: we DO mangle shared skb! skb->data, skb->len
+ * and skb->cb are mangled. It works because (and until) packets
+ * falling here are owned by current CPU. Output packets are cloned
+ * by dev_queue_xmit_nit(), input packets are processed by net_bh
+ * sequencially, so that if we return skb to original state on exit,
+ * we will not harm anyone.
  */
 
 static int packet_rcv(struct sk_buff *skb, struct net_device *dev,
@@ -566,11 +567,11 @@ static int packet_rcv(struct sk_buff *skb, struct net_device *dev,
 
 	if (dev->header_ops) {
 		/* The device has an explicit notion of ll header,
-		   exported to higher levels.
-
-		   Otherwise, the device hides datails of it frame
-		   structure, so that corresponding packet head
-		   never delivered to user.
+		 * exported to higher levels.
+		 *
+		 * Otherwise, the device hides details of its frame
+		 * structure, so that corresponding packet head is
+		 * never delivered to user.
 		 */
 		if (sk->sk_type != SOCK_DGRAM)
 			skb_push(skb, skb->data - skb_mac_header(skb));
-- 
cgit v1.2.3-71-gd317


From 806c09a7db457be3758e14b1f152761135d89af5 Mon Sep 17 00:00:00 2001
From: Dario Faggioli <raistlin@linux.it>
Date: Tue, 30 Nov 2010 19:51:33 +0100
Subject: sched: Make pushable_tasks CONFIG_SMP dependant

As noted by Peter Zijlstra at https://lkml.org/lkml/2010/11/10/391
(while reviewing other stuff, though), tracking pushable tasks
only makes sense on SMP systems.

Signed-off-by: Dario Faggioli <raistlin@linux.it>
Acked-by: Steven Rostedt <rostedt@goodmis.org>
Acked-by: Gregory Haskins <ghaskins@novell.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <1291143093.2697.298.camel@Palantir>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/init_task.h | 9 ++++++++-
 include/linux/sched.h     | 2 ++
 kernel/sched.c            | 2 ++
 3 files changed, 12 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/init_task.h b/include/linux/init_task.h
index 1f8c06ce0fa6..6ed8812bfe2d 100644
--- a/include/linux/init_task.h
+++ b/include/linux/init_task.h
@@ -12,6 +12,13 @@
 #include <linux/securebits.h>
 #include <net/net_namespace.h>
 
+#ifdef CONFIG_SMP
+# define INIT_PUSHABLE_TASKS(tsk)					\
+	.pushable_tasks = PLIST_NODE_INIT(tsk.pushable_tasks, MAX_PRIO),
+#else
+# define INIT_PUSHABLE_TASKS(tsk)
+#endif
+
 extern struct files_struct init_files;
 extern struct fs_struct init_fs;
 
@@ -137,7 +144,7 @@ extern struct cred init_cred;
 		.nr_cpus_allowed = NR_CPUS,				\
 	},								\
 	.tasks		= LIST_HEAD_INIT(tsk.tasks),			\
-	.pushable_tasks = PLIST_NODE_INIT(tsk.pushable_tasks, MAX_PRIO), \
+	INIT_PUSHABLE_TASKS(tsk)					\
 	.ptraced	= LIST_HEAD_INIT(tsk.ptraced),			\
 	.ptrace_entry	= LIST_HEAD_INIT(tsk.ptrace_entry),		\
 	.real_parent	= &tsk,						\
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 9c2d46da486e..4f92a239c14d 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1240,7 +1240,9 @@ struct task_struct {
 #endif
 
 	struct list_head tasks;
+#ifdef CONFIG_SMP
 	struct plist_node pushable_tasks;
+#endif
 
 	struct mm_struct *mm, *active_mm;
 #if defined(SPLIT_RSS_COUNTING)
diff --git a/kernel/sched.c b/kernel/sched.c
index b646dad4a40e..3925a1bbf5dd 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -2595,7 +2595,9 @@ void sched_fork(struct task_struct *p, int clone_flags)
 	/* Want to start with kernel preemption disabled. */
 	task_thread_info(p)->preempt_count = 1;
 #endif
+#ifdef CONFIG_SMP
 	plist_node_init(&p->pushable_tasks, MAX_PRIO);
+#endif
 
 	put_cpu();
 }
-- 
cgit v1.2.3-71-gd317


From 50b12f597be354a5a224f05c65c54c0667e57aec Mon Sep 17 00:00:00 2001
From: Helmut Schaa <helmut.schaa@googlemail.com>
Date: Fri, 19 Nov 2010 12:40:25 +0100
Subject: cfg80211: Add new BSS attribute ht_opmode

Add a new BSS attribute to allow hostapd to set the current HT opmode.
Otherwise drivers won't be able to set up protection for HT rates in
AP mode.

Cc: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: Helmut Schaa <helmut.schaa@googlemail.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/nl80211.h | 4 ++++
 include/net/cfg80211.h  | 3 +++
 net/wireless/nl80211.c  | 5 +++++
 3 files changed, 12 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h
index 8e28053ea423..380421253d16 100644
--- a/include/linux/nl80211.h
+++ b/include/linux/nl80211.h
@@ -849,6 +849,8 @@ enum nl80211_commands {
  *	flag isn't set, the frame will be rejected. This is also used as an
  *	nl80211 capability flag.
  *
+ * @NL80211_ATTR_BSS_HTOPMODE: HT operation mode (u16)
+ *
  * @NL80211_ATTR_MAX: highest attribute number currently defined
  * @__NL80211_ATTR_AFTER_LAST: internal use
  */
@@ -1025,6 +1027,8 @@ enum nl80211_attrs {
 
 	NL80211_ATTR_OFFCHANNEL_TX_OK,
 
+	NL80211_ATTR_BSS_HT_OPMODE,
+
 	/* add attributes here, update the policy in nl80211.c */
 
 	__NL80211_ATTR_AFTER_LAST,
diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 8764c9a5bab7..0d5979924be3 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -605,6 +605,8 @@ struct mpath_info {
  *	(or NULL for no change)
  * @basic_rates_len: number of basic rates
  * @ap_isolate: do not forward packets between connected stations
+ * @ht_opmode: HT Operation mode
+ * 	(u16 = opmode, -1 = do not change)
  */
 struct bss_parameters {
 	int use_cts_prot;
@@ -613,6 +615,7 @@ struct bss_parameters {
 	u8 *basic_rates;
 	u8 basic_rates_len;
 	int ap_isolate;
+	int ht_opmode;
 };
 
 /*
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 2cf03331d4a2..c3f80e565365 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -121,6 +121,7 @@ static const struct nla_policy nl80211_policy[NL80211_ATTR_MAX+1] = {
 	[NL80211_ATTR_BSS_SHORT_SLOT_TIME] = { .type = NLA_U8 },
 	[NL80211_ATTR_BSS_BASIC_RATES] = { .type = NLA_BINARY,
 					   .len = NL80211_MAX_SUPP_RATES },
+	[NL80211_ATTR_BSS_HT_OPMODE] = { .type = NLA_U16 },
 
 	[NL80211_ATTR_MESH_PARAMS] = { .type = NLA_NESTED },
 
@@ -2462,6 +2463,7 @@ static int nl80211_set_bss(struct sk_buff *skb, struct genl_info *info)
 	params.use_short_preamble = -1;
 	params.use_short_slot_time = -1;
 	params.ap_isolate = -1;
+	params.ht_opmode = -1;
 
 	if (info->attrs[NL80211_ATTR_BSS_CTS_PROT])
 		params.use_cts_prot =
@@ -2480,6 +2482,9 @@ static int nl80211_set_bss(struct sk_buff *skb, struct genl_info *info)
 	}
 	if (info->attrs[NL80211_ATTR_AP_ISOLATE])
 		params.ap_isolate = !!nla_get_u8(info->attrs[NL80211_ATTR_AP_ISOLATE]);
+	if (info->attrs[NL80211_ATTR_BSS_HT_OPMODE])
+		params.ht_opmode =
+			nla_get_u16(info->attrs[NL80211_ATTR_BSS_HT_OPMODE]);
 
 	if (!rdev->ops->change_bss)
 		return -EOPNOTSUPP;
-- 
cgit v1.2.3-71-gd317


From b7e8941b2df518186d9f7679c007f6b619bb4e89 Mon Sep 17 00:00:00 2001
From: Amitkumar Karwar <akarwar@marvell.com>
Date: Tue, 7 Dec 2010 13:43:03 -0800
Subject: cfg80211: add some element IDs in enum ieee80211_eid

1)WLAN_EID_BSS_COEX_2040
2)WLAN_EID_OVERLAP_BSS_SCAN_PARAM
3)WLAN_EID_EXT_CAPABILITY

Signed-off-by: Amitkumar Karwar <akarwar@marvell.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/ieee80211.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h
index ed5a03cbe184..351c0ab4e284 100644
--- a/include/linux/ieee80211.h
+++ b/include/linux/ieee80211.h
@@ -1223,6 +1223,9 @@ enum ieee80211_eid {
 	WLAN_EID_BSS_AC_ACCESS_DELAY = 68,
 	WLAN_EID_RRM_ENABLED_CAPABILITIES = 70,
 	WLAN_EID_MULTIPLE_BSSID = 71,
+	WLAN_EID_BSS_COEX_2040 = 72,
+	WLAN_EID_OVERLAP_BSS_SCAN_PARAM = 74,
+	WLAN_EID_EXT_CAPABILITY = 127,
 
 	WLAN_EID_MOBILITY_DOMAIN = 54,
 	WLAN_EID_FAST_BSS_TRANSITION = 55,
-- 
cgit v1.2.3-71-gd317


From 96a84c20d635fb1e98ab92f9fc517c4441f5c424 Mon Sep 17 00:00:00 2001
From: Don Zickus <dzickus@redhat.com>
Date: Mon, 29 Nov 2010 17:07:16 -0500
Subject: lockup detector: Compile fixes from removing the old x86 nmi watchdog

My patch that removed the old x86 nmi watchdog broke other
arches.  This change reverts a piece of that patch and puts the
change in the correct spot.

Signed-off-by: Don Zickus <dzickus@redhat.com>
Reviewed-by: Cyrill Gorcunov <gorcunov@gmail.com>
Cc: fweisbec@gmail.com
Cc: yinghai@kernel.org
LKML-Reference: <1291068437-5331-2-git-send-email-dzickus@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/apic/hw_nmi.c | 8 +++++++-
 include/linux/nmi.h           | 4 +++-
 2 files changed, 10 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/kernel/apic/hw_nmi.c b/arch/x86/kernel/apic/hw_nmi.c
index 057f1ebebe0c..2e94eb493591 100644
--- a/arch/x86/kernel/apic/hw_nmi.c
+++ b/arch/x86/kernel/apic/hw_nmi.c
@@ -24,8 +24,14 @@ u64 hw_nmi_get_sample_period(void)
 }
 #endif
 
+#ifndef CONFIG_HARDLOCKUP_DETECTOR
+void touch_nmi_watchdog(void)
+{
+	touch_softlockup_watchdog();
+}
+EXPORT_SYMBOL(touch_nmi_watchdog);
+#endif
 #ifdef arch_trigger_all_cpu_backtrace
-
 /* For reliability, we're prepared to waste bits here. */
 static DECLARE_BITMAP(backtrace_mask, NR_CPUS) __read_mostly;
 
diff --git a/include/linux/nmi.h b/include/linux/nmi.h
index 1c451e6ecc17..17ccf44e7dcb 100644
--- a/include/linux/nmi.h
+++ b/include/linux/nmi.h
@@ -16,7 +16,8 @@
  */
 #ifdef ARCH_HAS_NMI_WATCHDOG
 #include <asm/nmi.h>
-#endif
+extern void touch_nmi_watchdog(void);
+#else
 #ifndef CONFIG_HARDLOCKUP_DETECTOR
 static inline void touch_nmi_watchdog(void)
 {
@@ -25,6 +26,7 @@ static inline void touch_nmi_watchdog(void)
 #else
 extern void touch_nmi_watchdog(void);
 #endif
+#endif
 
 /*
  * Create trigger_all_cpu_backtrace() out of the arch-provided
-- 
cgit v1.2.3-71-gd317


From 60d509c823cca21e77d537bd356785f7cfe8f0d1 Mon Sep 17 00:00:00 2001
From: Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
Date: Fri, 3 Dec 2010 02:39:01 +0000
Subject: The new jhash implementation

The current jhash.h implements the lookup2() hash function by Bob Jenkins.
However, lookup2() is outdated as Bob wrote a new hash function called
lookup3(). The patch replaces the lookup2() implementation of the 'jhash*'
functions with that of lookup3().

You can read a longer comparison of the two and other hash functions at
http://burtleburtle.net/bob/hash/doobs.html.

Signed-off-by: Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
Acked-by: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/jhash.h | 183 +++++++++++++++++++++++++++++---------------------
 1 file changed, 105 insertions(+), 78 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/jhash.h b/include/linux/jhash.h
index ced1159fa4f2..47cb09edec1a 100644
--- a/include/linux/jhash.h
+++ b/include/linux/jhash.h
@@ -3,129 +3,156 @@
 
 /* jhash.h: Jenkins hash support.
  *
- * Copyright (C) 1996 Bob Jenkins (bob_jenkins@burtleburtle.net)
+ * Copyright (C) 2006. Bob Jenkins (bob_jenkins@burtleburtle.net)
  *
  * http://burtleburtle.net/bob/hash/
  *
  * These are the credits from Bob's sources:
  *
- * lookup2.c, by Bob Jenkins, December 1996, Public Domain.
- * hash(), hash2(), hash3, and mix() are externally useful functions.
- * Routines to test the hash are included if SELF_TEST is defined.
- * You can use this free for any purpose.  It has no warranty.
+ * lookup3.c, by Bob Jenkins, May 2006, Public Domain.
  *
- * Copyright (C) 2003 David S. Miller (davem@redhat.com)
+ * These are functions for producing 32-bit hashes for hash table lookup.
+ * hashword(), hashlittle(), hashlittle2(), hashbig(), mix(), and final()
+ * are externally useful functions.  Routines to test the hash are included
+ * if SELF_TEST is defined.  You can use this free for any purpose.  It's in
+ * the public domain.  It has no warranty.
+ *
+ * Copyright (C) 2009-2010 Jozsef Kadlecsik (kadlec@blackhole.kfki.hu)
  *
  * I've modified Bob's hash to be useful in the Linux kernel, and
- * any bugs present are surely my fault.  -DaveM
+ * any bugs present are my fault.
+ * Jozsef
  */
+#include <linux/bitops.h>
+#include <linux/unaligned/packed_struct.h>
+
+/* Best hash sizes are of power of two */
+#define jhash_size(n)   ((u32)1<<(n))
+/* Mask the hash value, i.e (value & jhash_mask(n)) instead of (value % n) */
+#define jhash_mask(n)   (jhash_size(n)-1)
+
+/* __jhash_mix -- mix 3 32-bit values reversibly. */
+#define __jhash_mix(a, b, c)			\
+{						\
+	a -= c;  a ^= rol32(c, 4);  c += b;	\
+	b -= a;  b ^= rol32(a, 6);  a += c;	\
+	c -= b;  c ^= rol32(b, 8);  b += a;	\
+	a -= c;  a ^= rol32(c, 16); c += b;	\
+	b -= a;  b ^= rol32(a, 19); a += c;	\
+	c -= b;  c ^= rol32(b, 4);  b += a;	\
+}
 
-/* NOTE: Arguments are modified. */
-#define __jhash_mix(a, b, c) \
-{ \
-  a -= b; a -= c; a ^= (c>>13); \
-  b -= c; b -= a; b ^= (a<<8); \
-  c -= a; c -= b; c ^= (b>>13); \
-  a -= b; a -= c; a ^= (c>>12);  \
-  b -= c; b -= a; b ^= (a<<16); \
-  c -= a; c -= b; c ^= (b>>5); \
-  a -= b; a -= c; a ^= (c>>3);  \
-  b -= c; b -= a; b ^= (a<<10); \
-  c -= a; c -= b; c ^= (b>>15); \
+/* __jhash_final - final mixing of 3 32-bit values (a,b,c) into c */
+#define __jhash_final(a, b, c)			\
+{						\
+	c ^= b; c -= rol32(b, 14);		\
+	a ^= c; a -= rol32(c, 11);		\
+	b ^= a; b -= rol32(a, 25);		\
+	c ^= b; c -= rol32(b, 16);		\
+	a ^= c; a -= rol32(c, 4);		\
+	b ^= a; b -= rol32(a, 14);		\
+	c ^= b; c -= rol32(b, 24);		\
 }
 
-/* The golden ration: an arbitrary value */
-#define JHASH_GOLDEN_RATIO	0x9e3779b9
+/* An arbitrary initial parameter */
+#define JHASH_INITVAL		0xdeadbeef
 
-/* The most generic version, hashes an arbitrary sequence
- * of bytes.  No alignment or length assumptions are made about
- * the input key.
+/* jhash - hash an arbitrary key
+ * @k: sequence of bytes as key
+ * @length: the length of the key
+ * @initval: the previous hash, or an arbitray value
+ *
+ * The generic version, hashes an arbitrary sequence of bytes.
+ * No alignment or length assumptions are made about the input key.
+ *
+ * Returns the hash value of the key. The result depends on endianness.
  */
 static inline u32 jhash(const void *key, u32 length, u32 initval)
 {
-	u32 a, b, c, len;
+	u32 a, b, c;
 	const u8 *k = key;
 
-	len = length;
-	a = b = JHASH_GOLDEN_RATIO;
-	c = initval;
-
-	while (len >= 12) {
-		a += (k[0] +((u32)k[1]<<8) +((u32)k[2]<<16) +((u32)k[3]<<24));
-		b += (k[4] +((u32)k[5]<<8) +((u32)k[6]<<16) +((u32)k[7]<<24));
-		c += (k[8] +((u32)k[9]<<8) +((u32)k[10]<<16)+((u32)k[11]<<24));
-
-		__jhash_mix(a,b,c);
+	/* Set up the internal state */
+	a = b = c = JHASH_INITVAL + length + initval;
 
+	/* All but the last block: affect some 32 bits of (a,b,c) */
+	while (length > 12) {
+		a += __get_unaligned_cpu32(k);
+		b += __get_unaligned_cpu32(k + 4);
+		c += __get_unaligned_cpu32(k + 8);
+		__jhash_mix(a, b, c);
+		length -= 12;
 		k += 12;
-		len -= 12;
 	}
-
-	c += length;
-	switch (len) {
-	case 11: c += ((u32)k[10]<<24);
-	case 10: c += ((u32)k[9]<<16);
-	case 9 : c += ((u32)k[8]<<8);
-	case 8 : b += ((u32)k[7]<<24);
-	case 7 : b += ((u32)k[6]<<16);
-	case 6 : b += ((u32)k[5]<<8);
-	case 5 : b += k[4];
-	case 4 : a += ((u32)k[3]<<24);
-	case 3 : a += ((u32)k[2]<<16);
-	case 2 : a += ((u32)k[1]<<8);
-	case 1 : a += k[0];
-	};
-
-	__jhash_mix(a,b,c);
+	/* Last block: affect all 32 bits of (c) */
+	/* All the case statements fall through */
+	switch (length) {
+	case 12: c += (u32)k[11]<<24;
+	case 11: c += (u32)k[10]<<16;
+	case 10: c += (u32)k[9]<<8;
+	case 9:  c += k[8];
+	case 8:  b += (u32)k[7]<<24;
+	case 7:  b += (u32)k[6]<<16;
+	case 6:  b += (u32)k[5]<<8;
+	case 5:  b += k[4];
+	case 4:  a += (u32)k[3]<<24;
+	case 3:  a += (u32)k[2]<<16;
+	case 2:  a += (u32)k[1]<<8;
+	case 1:  a += k[0];
+		 __jhash_final(a, b, c);
+	case 0: /* Nothing left to add */
+		break;
+	}
 
 	return c;
 }
 
-/* A special optimized version that handles 1 or more of u32s.
- * The length parameter here is the number of u32s in the key.
+/* jhash2 - hash an array of u32's
+ * @k: the key which must be an array of u32's
+ * @length: the number of u32's in the key
+ * @initval: the previous hash, or an arbitray value
+ *
+ * Returns the hash value of the key.
  */
 static inline u32 jhash2(const u32 *k, u32 length, u32 initval)
 {
-	u32 a, b, c, len;
+	u32 a, b, c;
 
-	a = b = JHASH_GOLDEN_RATIO;
-	c = initval;
-	len = length;
+	/* Set up the internal state */
+	a = b = c = JHASH_INITVAL + (length<<2) + initval;
 
-	while (len >= 3) {
+	/* Handle most of the key */
+	while (length > 3) {
 		a += k[0];
 		b += k[1];
 		c += k[2];
 		__jhash_mix(a, b, c);
-		k += 3; len -= 3;
+		length -= 3;
+		k += 3;
 	}
 
-	c += length * 4;
-
-	switch (len) {
-	case 2 : b += k[1];
-	case 1 : a += k[0];
-	};
-
-	__jhash_mix(a,b,c);
+	/* Handle the last 3 u32's: all the case statements fall through */
+	switch (length) {
+	case 3: c += k[2];
+	case 2: b += k[1];
+	case 1: a += k[0];
+		__jhash_final(a, b, c);
+	case 0:	/* Nothing left to add */
+		break;
+	}
 
 	return c;
 }
 
 
-/* A special ultra-optimized versions that knows they are hashing exactly
- * 3, 2 or 1 word(s).
- *
- * NOTE: In particular the "c += length; __jhash_mix(a,b,c);" normally
- *       done at the end is not done here.
- */
+/* jhash_3words - hash exactly 3, 2 or 1 word(s) */
 static inline u32 jhash_3words(u32 a, u32 b, u32 c, u32 initval)
 {
-	a += JHASH_GOLDEN_RATIO;
-	b += JHASH_GOLDEN_RATIO;
+	a += JHASH_INITVAL;
+	b += JHASH_INITVAL;
 	c += initval;
 
-	__jhash_mix(a, b, c);
+	__jhash_final(a, b, c);
 
 	return c;
 }
-- 
cgit v1.2.3-71-gd317


From 05ac10dd6862a3fcce33d2203fbb2ef285e3ca87 Mon Sep 17 00:00:00 2001
From: Felipe Balbi <balbi@ti.com>
Date: Thu, 2 Dec 2010 08:49:26 +0200
Subject: usb: musb: trivial search and replace patch

change all ocurrences of musb_hdrc to musb-hdrc.

We will call glue layer drivers musb-<glue layer>,
so in order to keep things somewhat standard, let's
change the underscore into a dash.

Signed-off-by: Felipe Balbi <balbi@ti.com>
---
 arch/arm/mach-davinci/usb.c                  | 2 +-
 arch/arm/mach-omap2/clock2420_data.c         | 2 +-
 arch/arm/mach-omap2/clock2430_data.c         | 2 +-
 arch/arm/mach-omap2/clock3xxx_data.c         | 8 ++++----
 arch/arm/mach-omap2/clock44xx_data.c         | 2 +-
 arch/arm/mach-omap2/usb-musb.c               | 2 +-
 arch/arm/mach-omap2/usb-tusb6010.c           | 2 +-
 arch/blackfin/mach-bf527/boards/ad7160eval.c | 2 +-
 arch/blackfin/mach-bf527/boards/cm_bf527.c   | 2 +-
 arch/blackfin/mach-bf527/boards/ezbrd.c      | 2 +-
 arch/blackfin/mach-bf527/boards/ezkit.c      | 2 +-
 arch/blackfin/mach-bf527/boards/tll6527m.c   | 2 +-
 arch/blackfin/mach-bf548/boards/cm_bf548.c   | 2 +-
 arch/blackfin/mach-bf548/boards/ezkit.c      | 2 +-
 drivers/usb/gadget/gadget_chips.h            | 2 +-
 drivers/usb/musb/Kconfig                     | 2 +-
 drivers/usb/musb/musb_core.c                 | 2 +-
 include/linux/usb/musb.h                     | 2 +-
 18 files changed, 21 insertions(+), 21 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm/mach-davinci/usb.c b/arch/arm/mach-davinci/usb.c
index b0d6b07431c0..0c7735bc0d19 100644
--- a/arch/arm/mach-davinci/usb.c
+++ b/arch/arm/mach-davinci/usb.c
@@ -76,7 +76,7 @@ static struct resource usb_resources[] = {
 static u64 usb_dmamask = DMA_BIT_MASK(32);
 
 static struct platform_device usb_dev = {
-	.name           = "musb_hdrc",
+	.name           = "musb-hdrc",
 	.id             = -1,
 	.dev = {
 		.platform_data		= &usb_data,
diff --git a/arch/arm/mach-omap2/clock2420_data.c b/arch/arm/mach-omap2/clock2420_data.c
index 21f856252ad8..831e25c89d2d 100644
--- a/arch/arm/mach-omap2/clock2420_data.c
+++ b/arch/arm/mach-omap2/clock2420_data.c
@@ -1877,7 +1877,7 @@ static struct omap_clk omap2420_clks[] = {
 	CLK("omap-aes",	"ick",	&aes_ick,	CK_242X),
 	CLK(NULL,	"pka_ick",	&pka_ick,	CK_242X),
 	CLK(NULL,	"usb_fck",	&usb_fck,	CK_242X),
-	CLK("musb_hdrc",	"fck",	&osc_ck,	CK_242X),
+	CLK("musb-hdrc",	"fck",	&osc_ck,	CK_242X),
 };
 
 /*
diff --git a/arch/arm/mach-omap2/clock2430_data.c b/arch/arm/mach-omap2/clock2430_data.c
index e32afcbdfb88..a6bccd7475a0 100644
--- a/arch/arm/mach-omap2/clock2430_data.c
+++ b/arch/arm/mach-omap2/clock2430_data.c
@@ -1983,7 +1983,7 @@ static struct omap_clk omap2430_clks[] = {
 	CLK("omap-aes",	"ick",	&aes_ick,	CK_243X),
 	CLK(NULL,	"pka_ick",	&pka_ick,	CK_243X),
 	CLK(NULL,	"usb_fck",	&usb_fck,	CK_243X),
-	CLK("musb_hdrc",	"ick",	&usbhs_ick,	CK_243X),
+	CLK("musb-hdrc",	"ick",	&usbhs_ick,	CK_243X),
 	CLK("mmci-omap-hs.0", "ick",	&mmchs1_ick,	CK_243X),
 	CLK("mmci-omap-hs.0", "fck",	&mmchs1_fck,	CK_243X),
 	CLK("mmci-omap-hs.1", "ick",	&mmchs2_ick,	CK_243X),
diff --git a/arch/arm/mach-omap2/clock3xxx_data.c b/arch/arm/mach-omap2/clock3xxx_data.c
index a04cb03db51f..3e668edbf6fe 100644
--- a/arch/arm/mach-omap2/clock3xxx_data.c
+++ b/arch/arm/mach-omap2/clock3xxx_data.c
@@ -3306,8 +3306,8 @@ static struct omap_clk omap3xxx_clks[] = {
 	CLK(NULL,	"ssi_sst_fck",	&ssi_sst_fck_3430es1,	CK_3430ES1),
 	CLK(NULL,	"ssi_sst_fck",	&ssi_sst_fck_3430es2,	CK_3430ES2),
 	CLK(NULL,	"core_l3_ick",	&core_l3_ick,	CK_3XXX),
-	CLK("musb_hdrc",	"ick",	&hsotgusb_ick_3430es1,	CK_3430ES1),
-	CLK("musb_hdrc",	"ick",	&hsotgusb_ick_3430es2,	CK_3430ES2),
+	CLK("musb-hdrc",	"ick",	&hsotgusb_ick_3430es1,	CK_3430ES1),
+	CLK("musb-hdrc",	"ick",	&hsotgusb_ick_3430es2,	CK_3430ES2),
 	CLK(NULL,	"sdrc_ick",	&sdrc_ick,	CK_3XXX),
 	CLK(NULL,	"gpmc_fck",	&gpmc_fck,	CK_3XXX),
 	CLK(NULL,	"security_l3_ick", &security_l3_ick, CK_343X),
@@ -3442,8 +3442,8 @@ static struct omap_clk omap3xxx_clks[] = {
 	CLK("davinci_emac",	"phy_clk",	&emac_fck,	CK_AM35XX),
 	CLK("vpfe-capture",	"master",	&vpfe_ick,	CK_AM35XX),
 	CLK("vpfe-capture",	"slave",	&vpfe_fck,	CK_AM35XX),
-	CLK("musb_hdrc",	"ick",		&hsotgusb_ick_am35xx,	CK_AM35XX),
-	CLK("musb_hdrc",	"fck",		&hsotgusb_fck_am35xx,	CK_AM35XX),
+	CLK("musb-hdrc",	"ick",		&hsotgusb_ick_am35xx,	CK_AM35XX),
+	CLK("musb-hdrc",	"fck",		&hsotgusb_fck_am35xx,	CK_AM35XX),
 	CLK(NULL,	"hecc_ck",	&hecc_ck,	CK_AM35XX),
 	CLK(NULL,	"uart4_ick",	&uart4_ick_am35xx,	CK_AM35XX),
 };
diff --git a/arch/arm/mach-omap2/clock44xx_data.c b/arch/arm/mach-omap2/clock44xx_data.c
index f473e8922664..95dd34ce6ea1 100644
--- a/arch/arm/mach-omap2/clock44xx_data.c
+++ b/arch/arm/mach-omap2/clock44xx_data.c
@@ -2953,7 +2953,7 @@ static struct omap_clk omap44xx_clks[] = {
 	CLK("ehci-omap.0",	"usbhost_ick",		&dummy_ck,		CK_443X),
 	CLK(NULL,	"otg_60m_gfclk",		&otg_60m_gfclk,	CK_443X),
 	CLK(NULL,	"usb_otg_hs_xclk",		&usb_otg_hs_xclk,	CK_443X),
-	CLK("musb_hdrc",	"ick",				&usb_otg_hs_ick,	CK_443X),
+	CLK("musb-hdrc",	"ick",				&usb_otg_hs_ick,	CK_443X),
 	CLK(NULL,	"usb_phy_cm_clk32k",		&usb_phy_cm_clk32k,	CK_443X),
 	CLK(NULL,	"usb_tll_hs_usb_ch2_clk",	&usb_tll_hs_usb_ch2_clk,	CK_443X),
 	CLK(NULL,	"usb_tll_hs_usb_ch0_clk",	&usb_tll_hs_usb_ch0_clk,	CK_443X),
diff --git a/arch/arm/mach-omap2/usb-musb.c b/arch/arm/mach-omap2/usb-musb.c
index ed0e85c3764f..7d3ea53af3ab 100644
--- a/arch/arm/mach-omap2/usb-musb.c
+++ b/arch/arm/mach-omap2/usb-musb.c
@@ -77,7 +77,7 @@ static struct musb_hdrc_platform_data musb_plat = {
 static u64 musb_dmamask = DMA_BIT_MASK(32);
 
 static struct platform_device musb_device = {
-	.name		= "musb_hdrc",
+	.name		= "musb-hdrc",
 	.id		= -1,
 	.dev = {
 		.dma_mask		= &musb_dmamask,
diff --git a/arch/arm/mach-omap2/usb-tusb6010.c b/arch/arm/mach-omap2/usb-tusb6010.c
index 64a0112b70a5..42389213b470 100644
--- a/arch/arm/mach-omap2/usb-tusb6010.c
+++ b/arch/arm/mach-omap2/usb-tusb6010.c
@@ -223,7 +223,7 @@ static struct resource tusb_resources[] = {
 static u64 tusb_dmamask = ~(u32)0;
 
 static struct platform_device tusb_device = {
-	.name		= "musb_hdrc",
+	.name		= "musb-hdrc",
 	.id		= -1,
 	.dev = {
 		.dma_mask		= &tusb_dmamask,
diff --git a/arch/blackfin/mach-bf527/boards/ad7160eval.c b/arch/blackfin/mach-bf527/boards/ad7160eval.c
index fc767ac76381..ec9f2612be1d 100644
--- a/arch/blackfin/mach-bf527/boards/ad7160eval.c
+++ b/arch/blackfin/mach-bf527/boards/ad7160eval.c
@@ -83,7 +83,7 @@ static struct musb_hdrc_platform_data musb_plat = {
 static u64 musb_dmamask = ~(u32)0;
 
 static struct platform_device musb_device = {
-	.name		= "musb_hdrc",
+	.name		= "musb-hdrc",
 	.id		= 0,
 	.dev = {
 		.dma_mask		= &musb_dmamask,
diff --git a/arch/blackfin/mach-bf527/boards/cm_bf527.c b/arch/blackfin/mach-bf527/boards/cm_bf527.c
index f714d7be35b9..a7627dee688e 100644
--- a/arch/blackfin/mach-bf527/boards/cm_bf527.c
+++ b/arch/blackfin/mach-bf527/boards/cm_bf527.c
@@ -120,7 +120,7 @@ static struct musb_hdrc_platform_data musb_plat = {
 static u64 musb_dmamask = ~(u32)0;
 
 static struct platform_device musb_device = {
-	.name		= "musb_hdrc",
+	.name		= "musb-hdrc",
 	.id		= 0,
 	.dev = {
 		.dma_mask		= &musb_dmamask,
diff --git a/arch/blackfin/mach-bf527/boards/ezbrd.c b/arch/blackfin/mach-bf527/boards/ezbrd.c
index 315eec930604..d1df634e24c3 100644
--- a/arch/blackfin/mach-bf527/boards/ezbrd.c
+++ b/arch/blackfin/mach-bf527/boards/ezbrd.c
@@ -84,7 +84,7 @@ static struct musb_hdrc_platform_data musb_plat = {
 static u64 musb_dmamask = ~(u32)0;
 
 static struct platform_device musb_device = {
-	.name		= "musb_hdrc",
+	.name		= "musb-hdrc",
 	.id		= 0,
 	.dev = {
 		.dma_mask		= &musb_dmamask,
diff --git a/arch/blackfin/mach-bf527/boards/ezkit.c b/arch/blackfin/mach-bf527/boards/ezkit.c
index 273731279740..5983abd8a7e3 100644
--- a/arch/blackfin/mach-bf527/boards/ezkit.c
+++ b/arch/blackfin/mach-bf527/boards/ezkit.c
@@ -124,7 +124,7 @@ static struct musb_hdrc_platform_data musb_plat = {
 static u64 musb_dmamask = ~(u32)0;
 
 static struct platform_device musb_device = {
-	.name		= "musb_hdrc",
+	.name		= "musb-hdrc",
 	.id		= 0,
 	.dev = {
 		.dma_mask		= &musb_dmamask,
diff --git a/arch/blackfin/mach-bf527/boards/tll6527m.c b/arch/blackfin/mach-bf527/boards/tll6527m.c
index 9ec575729e2c..284ec1f44131 100644
--- a/arch/blackfin/mach-bf527/boards/tll6527m.c
+++ b/arch/blackfin/mach-bf527/boards/tll6527m.c
@@ -91,7 +91,7 @@ static struct musb_hdrc_platform_data musb_plat = {
 static u64 musb_dmamask = ~(u32)0;
 
 static struct platform_device musb_device = {
-	.name		= "musb_hdrc",
+	.name		= "musb-hdrc",
 	.id		= 0,
 	.dev = {
 		.dma_mask		= &musb_dmamask,
diff --git a/arch/blackfin/mach-bf548/boards/cm_bf548.c b/arch/blackfin/mach-bf548/boards/cm_bf548.c
index 3e3dfb23f94e..f52334519a20 100644
--- a/arch/blackfin/mach-bf548/boards/cm_bf548.c
+++ b/arch/blackfin/mach-bf548/boards/cm_bf548.c
@@ -520,7 +520,7 @@ static struct musb_hdrc_platform_data musb_plat = {
 static u64 musb_dmamask = ~(u32)0;
 
 static struct platform_device musb_device = {
-	.name		= "musb_hdrc",
+	.name		= "musb-hdrc",
 	.id		= 0,
 	.dev = {
 		.dma_mask		= &musb_dmamask,
diff --git a/arch/blackfin/mach-bf548/boards/ezkit.c b/arch/blackfin/mach-bf548/boards/ezkit.c
index 9ff166d6f00d..e2c851bef99a 100644
--- a/arch/blackfin/mach-bf548/boards/ezkit.c
+++ b/arch/blackfin/mach-bf548/boards/ezkit.c
@@ -625,7 +625,7 @@ static struct musb_hdrc_platform_data musb_plat = {
 static u64 musb_dmamask = ~(u32)0;
 
 static struct platform_device musb_device = {
-	.name		= "musb_hdrc",
+	.name		= "musb-hdrc",
 	.id		= 0,
 	.dev = {
 		.dma_mask		= &musb_dmamask,
diff --git a/drivers/usb/gadget/gadget_chips.h b/drivers/usb/gadget/gadget_chips.h
index d7b3bbe56808..aeb109bd317d 100644
--- a/drivers/usb/gadget/gadget_chips.h
+++ b/drivers/usb/gadget/gadget_chips.h
@@ -96,7 +96,7 @@
 
 /* Mentor high speed "dual role" controller, in peripheral role */
 #ifdef CONFIG_USB_GADGET_MUSB_HDRC
-#define gadget_is_musbhdrc(g)	!strcmp("musb_hdrc", (g)->name)
+#define gadget_is_musbhdrc(g)	!strcmp("musb-hdrc", (g)->name)
 #else
 #define gadget_is_musbhdrc(g)	0
 #endif
diff --git a/drivers/usb/musb/Kconfig b/drivers/usb/musb/Kconfig
index 7119d900a479..03a42901922c 100644
--- a/drivers/usb/musb/Kconfig
+++ b/drivers/usb/musb/Kconfig
@@ -30,7 +30,7 @@ config USB_MUSB_HDRC
 	  If you do not know what this is, please say N.
 
 	  To compile this driver as a module, choose M here; the
-	  module will be called "musb_hdrc".
+	  module will be called "musb-hdrc".
 
 choice
 	prompt "Platform Glue Layer"
diff --git a/drivers/usb/musb/musb_core.c b/drivers/usb/musb/musb_core.c
index 8df1c583f19d..1ca14f90c994 100644
--- a/drivers/usb/musb/musb_core.c
+++ b/drivers/usb/musb/musb_core.c
@@ -126,7 +126,7 @@ MODULE_PARM_DESC(debug, "Debug message level. Default = 0");
 
 #define DRIVER_INFO DRIVER_DESC ", v" MUSB_VERSION
 
-#define MUSB_DRIVER_NAME "musb_hdrc"
+#define MUSB_DRIVER_NAME "musb-hdrc"
 const char musb_driver_name[] = MUSB_DRIVER_NAME;
 
 MODULE_DESCRIPTION(DRIVER_INFO);
diff --git a/include/linux/usb/musb.h b/include/linux/usb/musb.h
index ee2dd1d506ed..e2191d4db4dd 100644
--- a/include/linux/usb/musb.h
+++ b/include/linux/usb/musb.h
@@ -3,7 +3,7 @@
  * Inventra (Multidrop) Highspeed Dual-Role Controllers:  (M)HDRC.
  *
  * Board initialization should put one of these into dev->platform_data,
- * probably on some platform_device named "musb_hdrc".  It encapsulates
+ * probably on some platform_device named "musb-hdrc".  It encapsulates
  * key configuration differences between boards.
  */
 
-- 
cgit v1.2.3-71-gd317


From f7ec94370f417fedad4db1054228ef958d48b926 Mon Sep 17 00:00:00 2001
From: Felipe Balbi <balbi@ti.com>
Date: Thu, 2 Dec 2010 09:48:58 +0200
Subject: usb: musb: pass platform_ops via platform_data

... then we don't need to export any symbols
from glue layer to musb_core.

Signed-off-by: Felipe Balbi <balbi@ti.com>
---
 drivers/usb/musb/am35x.c     | 4 +++-
 drivers/usb/musb/blackfin.c  | 4 +++-
 drivers/usb/musb/da8xx.c     | 4 +++-
 drivers/usb/musb/davinci.c   | 4 +++-
 drivers/usb/musb/musb_core.c | 2 +-
 drivers/usb/musb/musb_core.h | 2 --
 drivers/usb/musb/omap2430.c  | 4 +++-
 drivers/usb/musb/tusb6010.c  | 4 +++-
 include/linux/usb/musb.h     | 3 +++
 9 files changed, 22 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/usb/musb/am35x.c b/drivers/usb/musb/am35x.c
index 355883c5ffed..e372c87f37e2 100644
--- a/drivers/usb/musb/am35x.c
+++ b/drivers/usb/musb/am35x.c
@@ -530,7 +530,7 @@ void musb_read_fifo(struct musb_hw_ep *hw_ep, u16 len, u8 *dst)
 	}
 }
 
-const struct musb_platform_ops musb_ops = {
+static const struct musb_platform_ops am35x_ops = {
 	.init		= am35x_musb_init,
 	.exit		= am35x_musb_exit,
 
@@ -572,6 +572,8 @@ static int __init am35x_probe(struct platform_device *pdev)
 	glue->dev			= &pdev->dev;
 	glue->musb			= musb;
 
+	pdata->platform_ops		= &am35x_ops;
+
 	platform_set_drvdata(pdev, glue);
 
 	ret = platform_device_add_resources(musb, pdev->resource,
diff --git a/drivers/usb/musb/blackfin.c b/drivers/usb/musb/blackfin.c
index 02eded21d171..03cb001c0e19 100644
--- a/drivers/usb/musb/blackfin.c
+++ b/drivers/usb/musb/blackfin.c
@@ -436,7 +436,7 @@ static int bfin_musb_exit(struct musb *musb)
 	return 0;
 }
 
-const struct musb_platform_ops musb_ops = {
+static const struct musb_platform_ops bfin_ops = {
 	.init		= bfin_musb_init,
 	.exit		= bfin_musb_exit,
 
@@ -479,6 +479,8 @@ static int __init bfin_probe(struct platform_device *pdev)
 	glue->dev			= &pdev->dev;
 	glue->musb			= musb;
 
+	pdata->platform_ops		= &bfin_ops;
+
 	platform_set_drvdata(pdev, glue);
 
 	ret = platform_device_add_resources(musb, pdev->resource,
diff --git a/drivers/usb/musb/da8xx.c b/drivers/usb/musb/da8xx.c
index 94ddfe01d673..45ccac3aad9d 100644
--- a/drivers/usb/musb/da8xx.c
+++ b/drivers/usb/musb/da8xx.c
@@ -475,7 +475,7 @@ static int da8xx_musb_exit(struct musb *musb)
 	return 0;
 }
 
-const struct musb_platform_ops musb_ops = {
+static const struct musb_platform_ops da8xx_ops = {
 	.init		= da8xx_musb_init,
 	.exit		= da8xx_musb_exit,
 
@@ -517,6 +517,8 @@ static int __init da8xx_probe(struct platform_device *pdev)
 	glue->dev			= &pdev->dev;
 	glue->musb			= musb;
 
+	pdata->platform_ops		= &da8xx_ops;
+
 	platform_set_drvdata(pdev, glue);
 
 	ret = platform_device_add_resources(musb, pdev->resource,
diff --git a/drivers/usb/musb/davinci.c b/drivers/usb/musb/davinci.c
index 661870a1cd4d..831a04dd5a53 100644
--- a/drivers/usb/musb/davinci.c
+++ b/drivers/usb/musb/davinci.c
@@ -510,7 +510,7 @@ static int davinci_musb_exit(struct musb *musb)
 	return 0;
 }
 
-const struct musb_platform_ops musb_ops = {
+static const struct musb_platform_ops davinci_ops = {
 	.init		= davinci_musb_init,
 	.exit		= davinci_musb_exit,
 
@@ -551,6 +551,8 @@ static int __init davinci_probe(struct platform_device *pdev)
 	glue->dev			= &pdev->dev;
 	glue->musb			= musb;
 
+	pdata->platform_ops		= &davinci_ops;
+
 	platform_set_drvdata(pdev, glue);
 
 	ret = platform_device_add_resources(musb, pdev->resource,
diff --git a/drivers/usb/musb/musb_core.c b/drivers/usb/musb/musb_core.c
index 1ca14f90c994..dcc77ef6cfff 100644
--- a/drivers/usb/musb/musb_core.c
+++ b/drivers/usb/musb/musb_core.c
@@ -1897,7 +1897,6 @@ allocate_instance(struct device *dev,
 	}
 
 	musb->controller = dev;
-	musb->ops = &musb_ops;
 
 	return musb;
 }
@@ -1997,6 +1996,7 @@ bad_config:
 	musb->board_set_power = plat->set_power;
 	musb->set_clock = plat->set_clock;
 	musb->min_power = plat->min_power;
+	musb->ops = plat->platform_ops;
 
 	/* Clock usage is chip-specific ... functional clock (DaVinci,
 	 * OMAP2430), or PHY ref (some TUSB6010 boards).  All this core
diff --git a/drivers/usb/musb/musb_core.h b/drivers/usb/musb/musb_core.h
index 9594b9d1d27b..fac1eab3c59f 100644
--- a/drivers/usb/musb/musb_core.h
+++ b/drivers/usb/musb/musb_core.h
@@ -281,8 +281,6 @@ struct musb_platform_ops {
 	void	(*set_vbus)(struct musb *musb, int on);
 };
 
-extern const struct musb_platform_ops musb_ops;
-
 /*
  * struct musb_hw_ep - endpoint hardware (bidirectional)
  *
diff --git a/drivers/usb/musb/omap2430.c b/drivers/usb/musb/omap2430.c
index bca9df7557a4..2eea1703e630 100644
--- a/drivers/usb/musb/omap2430.c
+++ b/drivers/usb/musb/omap2430.c
@@ -333,7 +333,7 @@ static int omap2430_musb_exit(struct musb *musb)
 	return 0;
 }
 
-const struct musb_platform_ops musb_ops = {
+static const struct musb_platform_ops omap2430_ops = {
 	.init		= omap2430_musb_init,
 	.exit		= omap2430_musb_exit,
 
@@ -378,6 +378,8 @@ static int __init omap2430_probe(struct platform_device *pdev)
 	glue->dev			= &pdev->dev;
 	glue->musb			= musb;
 
+	pdata->platform_ops		= &omap2430_ops;
+
 	platform_set_drvdata(pdev, glue);
 
 	ret = platform_device_add_resources(musb, pdev->resource,
diff --git a/drivers/usb/musb/tusb6010.c b/drivers/usb/musb/tusb6010.c
index 2ff78d6b2eff..d6b832641c53 100644
--- a/drivers/usb/musb/tusb6010.c
+++ b/drivers/usb/musb/tusb6010.c
@@ -1178,7 +1178,7 @@ static int tusb_musb_exit(struct musb *musb)
 	return 0;
 }
 
-const struct musb_platform_ops musb_ops = {
+static const struct musb_platform_ops tusb_ops = {
 	.init		= tusb_musb_init,
 	.exit		= tusb_musb_exit,
 
@@ -1221,6 +1221,8 @@ static int __init tusb_probe(struct platform_device *pdev)
 	glue->dev			= &pdev->dev;
 	glue->musb			= musb;
 
+	pdata->platform_ops		= &tusb_ops;
+
 	platform_set_drvdata(pdev, glue);
 
 	ret = platform_device_add_resources(musb, pdev->resource,
diff --git a/include/linux/usb/musb.h b/include/linux/usb/musb.h
index e2191d4db4dd..6f4e5014cf5e 100644
--- a/include/linux/usb/musb.h
+++ b/include/linux/usb/musb.h
@@ -126,6 +126,9 @@ struct musb_hdrc_platform_data {
 
 	/* Architecture specific board data	*/
 	void		*board_data;
+
+	/* Platform specific struct musb_ops pointer */
+	const void	*platform_ops;
 };
 
 
-- 
cgit v1.2.3-71-gd317


From 3b7029670d39d22f288ece95254e9ba5ceddd6ba Mon Sep 17 00:00:00 2001
From: Felipe Balbi <balbi@ti.com>
Date: Thu, 2 Dec 2010 09:51:00 +0200
Subject: usb: musb: mark ->set_clock deprecated

... we will completely drop that need by
moving clock handling to platform glue
layer. Marking as deprecated will allow
us to catch all users easily.

Signed-off-by: Felipe Balbi <balbi@ti.com>
---
 include/linux/usb/musb.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/usb/musb.h b/include/linux/usb/musb.h
index 6f4e5014cf5e..0b72b5741645 100644
--- a/include/linux/usb/musb.h
+++ b/include/linux/usb/musb.h
@@ -119,7 +119,7 @@ struct musb_hdrc_platform_data {
 	int		(*set_power)(int state);
 
 	/* Turn device clock on or off */
-	int		(*set_clock)(struct clk *clock, int is_on);
+	int		(*set_clock)(struct clk *clock, int is_on) __deprecated;
 
 	/* MUSB configuration-specific details */
 	struct musb_hdrc_config	*config;
-- 
cgit v1.2.3-71-gd317


From fa56df915d101770a495569473b4c13b1904087b Mon Sep 17 00:00:00 2001
From: Felipe Balbi <balbi@ti.com>
Date: Thu, 2 Dec 2010 10:55:29 +0200
Subject: usb: musb: drop the set_clock magic

now that platform glue layer handles
clock completely, that function is completely
useless for us. Drop it.

Signed-off-by: Felipe Balbi <balbi@ti.com>
---
 drivers/usb/musb/musb_core.h | 2 --
 include/linux/usb/musb.h     | 3 ---
 2 files changed, 5 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/usb/musb/musb_core.h b/drivers/usb/musb/musb_core.h
index 6c8e9630fb19..1e538675ab86 100644
--- a/drivers/usb/musb/musb_core.h
+++ b/drivers/usb/musb/musb_core.h
@@ -455,8 +455,6 @@ struct musb {
 	u8 board_mode;		/* enum musb_mode */
 	int			(*board_set_power)(int state);
 
-	int			(*set_clock)(struct clk *clk, int is_active);
-
 	u8			min_power;	/* vbus for periph, in mA/2 */
 
 	bool			is_host;
diff --git a/include/linux/usb/musb.h b/include/linux/usb/musb.h
index 0b72b5741645..0b0e383035e5 100644
--- a/include/linux/usb/musb.h
+++ b/include/linux/usb/musb.h
@@ -118,9 +118,6 @@ struct musb_hdrc_platform_data {
 	/* Power the device on or off */
 	int		(*set_power)(int state);
 
-	/* Turn device clock on or off */
-	int		(*set_clock)(struct clk *clock, int is_on) __deprecated;
-
 	/* MUSB configuration-specific details */
 	struct musb_hdrc_config	*config;
 
-- 
cgit v1.2.3-71-gd317


From e70357e35c522776d9f56f6800af4ed7a5bdbaaf Mon Sep 17 00:00:00 2001
From: Hema HK <hemahk@ti.com>
Date: Fri, 10 Dec 2010 18:09:52 +0530
Subject: mfd: TWL6030: OMAP4: Registering the TWL6030-usb device

Registering the twl6030-usb transceiver device as a child to twl6030 core.
Removed the NOP transceiver init call from board file.

Populated twl4030_usb_data platform data structure with the function
pointers for OMAP4430 internal PHY operation to be used by twl630-usb driver.

Signed-off-by: Hema HK <hemahk@ti.com>
Cc: Samuel Ortiz <sameo@linux.intel.com>
Cc: Tony Lindgren <tony@atomide.com>
Signed-off-by: Felipe Balbi <balbi@ti.com>
---
 arch/arm/mach-omap2/board-4430sdp.c    | 11 ++++++---
 arch/arm/mach-omap2/board-omap4panda.c |  8 +++++++
 drivers/mfd/twl-core.c                 | 44 ++++++++++++++++++++++++++++++----
 include/linux/i2c/twl.h                |  7 ++++++
 4 files changed, 62 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm/mach-omap2/board-4430sdp.c b/arch/arm/mach-omap2/board-4430sdp.c
index 288ff4415757..95bdea82ece6 100644
--- a/arch/arm/mach-omap2/board-4430sdp.c
+++ b/arch/arm/mach-omap2/board-4430sdp.c
@@ -242,6 +242,13 @@ static struct omap_musb_board_data musb_board_data = {
 	.power			= 100,
 };
 
+static struct twl4030_usb_data omap4_usbphy_data = {
+	.phy_init	= omap4430_phy_init,
+	.phy_exit	= omap4430_phy_exit,
+	.phy_power	= omap4430_phy_power,
+	.phy_set_clock	= omap4430_phy_set_clk,
+};
+
 static struct omap2_hsmmc_info mmc[] = {
 	{
 		.mmc		= 1,
@@ -461,6 +468,7 @@ static struct twl4030_platform_data sdp4430_twldata = {
 	.vaux1		= &sdp4430_vaux1,
 	.vaux2		= &sdp4430_vaux2,
 	.vaux3		= &sdp4430_vaux3,
+	.usb		= &omap4_usbphy_data
 };
 
 static struct i2c_board_info __initdata sdp4430_i2c_boardinfo[] = {
@@ -533,9 +541,6 @@ static void __init omap_4430sdp_init(void)
 		gpio_direction_output(OMAP4SDP_MDM_PWR_EN_GPIO, 1);
 	}
 	usb_ehci_init(&ehci_pdata);
-
-	/* OMAP4 SDP uses internal transceiver so register nop transceiver */
-	usb_nop_xceiv_register();
 	usb_musb_init(&musb_board_data);
 
 	status = omap_ethernet_init();
diff --git a/arch/arm/mach-omap2/board-omap4panda.c b/arch/arm/mach-omap2/board-omap4panda.c
index 3bf644deb8b8..585b2e387341 100644
--- a/arch/arm/mach-omap2/board-omap4panda.c
+++ b/arch/arm/mach-omap2/board-omap4panda.c
@@ -137,6 +137,13 @@ static struct omap_musb_board_data musb_board_data = {
 	.power			= 100,
 };
 
+static struct twl4030_usb_data omap4_usbphy_data = {
+	.phy_init	= omap4430_phy_init,
+	.phy_exit	= omap4430_phy_exit,
+	.phy_power	= omap4430_phy_power,
+	.phy_set_clock	= omap4430_phy_set_clk,
+};
+
 static struct omap2_hsmmc_info mmc[] = {
 	{
 		.mmc		= 1,
@@ -345,6 +352,7 @@ static struct twl4030_platform_data omap4_panda_twldata = {
 	.vaux1		= &omap4_panda_vaux1,
 	.vaux2		= &omap4_panda_vaux2,
 	.vaux3		= &omap4_panda_vaux3,
+	.usb		= &omap4_usbphy_data,
 };
 
 static struct i2c_board_info __initdata omap4_panda_i2c_boardinfo[] = {
diff --git a/drivers/mfd/twl-core.c b/drivers/mfd/twl-core.c
index 35275ba7096f..12abd5b924b3 100644
--- a/drivers/mfd/twl-core.c
+++ b/drivers/mfd/twl-core.c
@@ -95,7 +95,8 @@
 #define twl_has_rtc()	false
 #endif
 
-#if defined(CONFIG_TWL4030_USB) || defined(CONFIG_TWL4030_USB_MODULE)
+#if defined(CONFIG_TWL4030_USB) || defined(CONFIG_TWL4030_USB_MODULE) ||\
+	defined(CONFIG_TWL6030_USB) || defined(CONFIG_TWL6030_USB_MODULE)
 #define twl_has_usb()	true
 #else
 #define twl_has_usb()	false
@@ -682,6 +683,43 @@ add_children(struct twl4030_platform_data *pdata, unsigned long features)
 			usb3v1.dev = child;
 		}
 	}
+	if (twl_has_usb() && pdata->usb && twl_class_is_6030()) {
+
+		static struct regulator_consumer_supply usb3v3 = {
+			.supply =	"vusb",
+		};
+
+		if (twl_has_regulator()) {
+			/* this is a template that gets copied */
+			struct regulator_init_data usb_fixed = {
+				.constraints.valid_modes_mask =
+					REGULATOR_MODE_NORMAL
+					| REGULATOR_MODE_STANDBY,
+				.constraints.valid_ops_mask =
+					REGULATOR_CHANGE_MODE
+					| REGULATOR_CHANGE_STATUS,
+			};
+
+			child = add_regulator_linked(TWL6030_REG_VUSB,
+						      &usb_fixed, &usb3v3, 1);
+			if (IS_ERR(child))
+				return PTR_ERR(child);
+		}
+
+		child = add_child(0, "twl6030_usb",
+			pdata->usb, sizeof(*pdata->usb),
+			true,
+			/* irq1 = VBUS_PRES, irq0 = USB ID */
+			pdata->irq_base + USBOTG_INTR_OFFSET,
+			pdata->irq_base + USB_PRES_INTR_OFFSET);
+
+		if (IS_ERR(child))
+			return PTR_ERR(child);
+		/* we need to connect regulators to this transceiver */
+		if (twl_has_regulator() && child)
+			usb3v3.dev = child;
+
+	}
 
 	if (twl_has_watchdog()) {
 		child = add_child(0, "twl4030_wdt", NULL, 0, false, 0, 0);
@@ -815,10 +853,6 @@ add_children(struct twl4030_platform_data *pdata, unsigned long features)
 		if (IS_ERR(child))
 			return PTR_ERR(child);
 
-		child = add_regulator(TWL6030_REG_VUSB, pdata->vusb);
-		if (IS_ERR(child))
-			return PTR_ERR(child);
-
 		child = add_regulator(TWL6030_REG_VAUX1_6030, pdata->vaux1);
 		if (IS_ERR(child))
 			return PTR_ERR(child);
diff --git a/include/linux/i2c/twl.h b/include/linux/i2c/twl.h
index c760991b354a..61b9609e55f2 100644
--- a/include/linux/i2c/twl.h
+++ b/include/linux/i2c/twl.h
@@ -593,6 +593,13 @@ enum twl4030_usb_mode {
 
 struct twl4030_usb_data {
 	enum twl4030_usb_mode	usb_mode;
+
+	int		(*phy_init)(struct device *dev);
+	int		(*phy_exit)(struct device *dev);
+	/* Power on/off the PHY */
+	int		(*phy_power)(struct device *dev, int iD, int on);
+	/* enable/disable  phy clocks */
+	int		(*phy_set_clock)(struct device *dev, int on);
 };
 
 struct twl4030_ins {
-- 
cgit v1.2.3-71-gd317


From 4291ee305e9bb0699504a66f0e2b7aefcf0512a5 Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Thu, 9 Dec 2010 19:29:03 -0800
Subject: HID: Add and use hid_<level>: dev_<level> equivalents

Neaten current uses of dev_<level> by adding and using
hid specific hid_<level> macros.

Convert existing uses of dev_<level> uses to hid_<level>.
Convert hid-pidff printk uses to hid_<level>.

Remove err_hid and use hid_err instead.

Add missing newlines to logging messages where necessary.
Coalesce format strings.

Add and use pr_fmt(fmt) KBUILD_MODNAME ": " fmt

Other miscellaneous changes:

Add const struct hid_device * argument to hid-core functions
extract() and implement() so hid_<level> can be used by them.
Fix bad indentation in hid-core hid_input_field function
that calls extract() function above.

Signed-off-by: Joe Perches <joe@perches.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 drivers/hid/hid-3m-pct.c       |   2 +-
 drivers/hid/hid-a4tech.c       |   6 +-
 drivers/hid/hid-apple.c        |  14 ++--
 drivers/hid/hid-axff.c         |  14 ++--
 drivers/hid/hid-belkin.c       |   4 +-
 drivers/hid/hid-cando.c        |   2 +-
 drivers/hid/hid-cherry.c       |   3 +-
 drivers/hid/hid-core.c         |  64 +++++++++-------
 drivers/hid/hid-cypress.c      |   4 +-
 drivers/hid/hid-debug.c        |   4 +-
 drivers/hid/hid-drff.c         |  14 ++--
 drivers/hid/hid-egalax.c       |   2 +-
 drivers/hid/hid-elecom.c       |   3 +-
 drivers/hid/hid-emsff.c        |  13 ++--
 drivers/hid/hid-gaff.c         |  13 ++--
 drivers/hid/hid-input.c        |   2 +-
 drivers/hid/hid-kye.c          |   4 +-
 drivers/hid/hid-lg.c           |  15 ++--
 drivers/hid/hid-lg2ff.c        |   9 +--
 drivers/hid/hid-lg3ff.c        |   9 +--
 drivers/hid/hid-lg4ff.c        |   9 +--
 drivers/hid/hid-lgff.c         |   8 +-
 drivers/hid/hid-magicmouse.c   |  15 ++--
 drivers/hid/hid-microsoft.c    |   7 +-
 drivers/hid/hid-monterey.c     |   3 +-
 drivers/hid/hid-mosart.c       |   2 +-
 drivers/hid/hid-ntrig.c        |   9 +--
 drivers/hid/hid-ortek.c        |   3 +-
 drivers/hid/hid-petalynx.c     |   7 +-
 drivers/hid/hid-picolcd.c      |  45 ++++++-----
 drivers/hid/hid-pl.c           |  16 ++--
 drivers/hid/hid-prodikeys.c    |  21 +++---
 drivers/hid/hid-quanta.c       |   2 +-
 drivers/hid/hid-roccat-kone.c  |  22 +++---
 drivers/hid/hid-roccat-pyra.c  |  20 +++--
 drivers/hid/hid-roccat.c       |  10 +--
 drivers/hid/hid-samsung.c      |   8 +-
 drivers/hid/hid-sjoy.c         |  16 ++--
 drivers/hid/hid-sony.c         |  11 ++-
 drivers/hid/hid-stantum.c      |   2 +-
 drivers/hid/hid-sunplus.c      |   3 +-
 drivers/hid/hid-tmff.c         |  27 +++----
 drivers/hid/hid-wacom.c        |  26 ++++---
 drivers/hid/hid-zpff.c         |  11 ++-
 drivers/hid/hid-zydacron.c     |  11 ++-
 drivers/hid/hidraw.c           |  12 +--
 drivers/hid/usbhid/hid-core.c  |  48 ++++++------
 drivers/hid/usbhid/hid-pidff.c | 164 +++++++++++++++++++----------------------
 drivers/hid/usbhid/hiddev.c    |   2 +-
 drivers/hid/usbhid/usbkbd.c    |  24 +++---
 include/linux/hid.h            |  32 ++++++--
 51 files changed, 398 insertions(+), 399 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/hid/hid-3m-pct.c b/drivers/hid/hid-3m-pct.c
index 02d8cd3b1b1b..4546c123eb77 100644
--- a/drivers/hid/hid-3m-pct.c
+++ b/drivers/hid/hid-3m-pct.c
@@ -274,7 +274,7 @@ static int mmm_probe(struct hid_device *hdev, const struct hid_device_id *id)
 
 	md = kzalloc(sizeof(struct mmm_data), GFP_KERNEL);
 	if (!md) {
-		dev_err(&hdev->dev, "cannot allocate 3M data\n");
+		hid_err(hdev, "cannot allocate 3M data\n");
 		return -ENOMEM;
 	}
 	hid_set_drvdata(hdev, md);
diff --git a/drivers/hid/hid-a4tech.c b/drivers/hid/hid-a4tech.c
index 1666c1684e79..902d1dfeb1b5 100644
--- a/drivers/hid/hid-a4tech.c
+++ b/drivers/hid/hid-a4tech.c
@@ -93,7 +93,7 @@ static int a4_probe(struct hid_device *hdev, const struct hid_device_id *id)
 
 	a4 = kzalloc(sizeof(*a4), GFP_KERNEL);
 	if (a4 == NULL) {
-		dev_err(&hdev->dev, "can't alloc device descriptor\n");
+		hid_err(hdev, "can't alloc device descriptor\n");
 		ret = -ENOMEM;
 		goto err_free;
 	}
@@ -104,13 +104,13 @@ static int a4_probe(struct hid_device *hdev, const struct hid_device_id *id)
 
 	ret = hid_parse(hdev);
 	if (ret) {
-		dev_err(&hdev->dev, "parse failed\n");
+		hid_err(hdev, "parse failed\n");
 		goto err_free;
 	}
 
 	ret = hid_hw_start(hdev, HID_CONNECT_DEFAULT);
 	if (ret) {
-		dev_err(&hdev->dev, "hw start failed\n");
+		hid_err(hdev, "hw start failed\n");
 		goto err_free;
 	}
 
diff --git a/drivers/hid/hid-apple.c b/drivers/hid/hid-apple.c
index 8aa71750a23c..61aa71233392 100644
--- a/drivers/hid/hid-apple.c
+++ b/drivers/hid/hid-apple.c
@@ -16,6 +16,8 @@
  * any later version.
  */
 
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
 #include <linux/device.h>
 #include <linux/hid.h>
 #include <linux/module.h>
@@ -280,8 +282,8 @@ static __u8 *apple_report_fixup(struct hid_device *hdev, __u8 *rdesc,
 
 	if ((asc->quirks & APPLE_RDESC_JIS) && *rsize >= 60 &&
 			rdesc[53] == 0x65 && rdesc[59] == 0x65) {
-		dev_info(&hdev->dev, "fixing up MacBook JIS keyboard report "
-				"descriptor\n");
+		hid_info(hdev,
+			 "fixing up MacBook JIS keyboard report descriptor\n");
 		rdesc[53] = rdesc[59] = 0xe7;
 	}
 	return rdesc;
@@ -351,7 +353,7 @@ static int apple_probe(struct hid_device *hdev,
 
 	asc = kzalloc(sizeof(*asc), GFP_KERNEL);
 	if (asc == NULL) {
-		dev_err(&hdev->dev, "can't alloc apple descriptor\n");
+		hid_err(hdev, "can't alloc apple descriptor\n");
 		return -ENOMEM;
 	}
 
@@ -361,7 +363,7 @@ static int apple_probe(struct hid_device *hdev,
 
 	ret = hid_parse(hdev);
 	if (ret) {
-		dev_err(&hdev->dev, "parse failed\n");
+		hid_err(hdev, "parse failed\n");
 		goto err_free;
 	}
 
@@ -372,7 +374,7 @@ static int apple_probe(struct hid_device *hdev,
 
 	ret = hid_hw_start(hdev, connect_mask);
 	if (ret) {
-		dev_err(&hdev->dev, "hw start failed\n");
+		hid_err(hdev, "hw start failed\n");
 		goto err_free;
 	}
 
@@ -512,7 +514,7 @@ static int __init apple_init(void)
 
 	ret = hid_register_driver(&apple_driver);
 	if (ret)
-		printk(KERN_ERR "can't register apple driver\n");
+		pr_err("can't register apple driver\n");
 
 	return ret;
 }
diff --git a/drivers/hid/hid-axff.c b/drivers/hid/hid-axff.c
index f42ee140738a..e5b961d6ff22 100644
--- a/drivers/hid/hid-axff.c
+++ b/drivers/hid/hid-axff.c
@@ -73,14 +73,14 @@ static int axff_init(struct hid_device *hid)
 	int error;
 
 	if (list_empty(report_list)) {
-		dev_err(&hid->dev, "no output reports found\n");
+		hid_err(hid, "no output reports found\n");
 		return -ENODEV;
 	}
 
 	report = list_first_entry(report_list, struct hid_report, list);
 
 	if (report->maxfield < 4) {
-		dev_err(&hid->dev, "no fields in the report: %d\n", report->maxfield);
+		hid_err(hid, "no fields in the report: %d\n", report->maxfield);
 		return -ENODEV;
 	}
 
@@ -101,7 +101,7 @@ static int axff_init(struct hid_device *hid)
 	axff->report->field[3]->value[0] = 0x00;
 	usbhid_submit_report(hid, axff->report, USB_DIR_OUT);
 
-	dev_info(&hid->dev, "Force Feedback for ACRUX game controllers by Sergei Kolzun<x0r@dv-life.ru>\n");
+	hid_info(hid, "Force Feedback for ACRUX game controllers by Sergei Kolzun<x0r@dv-life.ru>\n");
 
 	return 0;
 
@@ -114,17 +114,17 @@ static int ax_probe(struct hid_device *hdev, const struct hid_device_id *id)
 {
 	int error;
 
-	dev_dbg(&hdev->dev, "ACRUX HID hardware probe...");
+	dev_dbg(&hdev->dev, "ACRUX HID hardware probe...\n");
 
 	error = hid_parse(hdev);
 	if (error) {
-		dev_err(&hdev->dev, "parse failed\n");
+		hid_err(hdev, "parse failed\n");
 		return error;
 	}
 
 	error = hid_hw_start(hdev, HID_CONNECT_DEFAULT & ~HID_CONNECT_FF);
 	if (error) {
-		dev_err(&hdev->dev, "hw start failed\n");
+		hid_err(hdev, "hw start failed\n");
 		return error;
 	}
 
@@ -134,7 +134,7 @@ static int ax_probe(struct hid_device *hdev, const struct hid_device_id *id)
 		 * Do not fail device initialization completely as device
 		 * may still be partially operable, just warn.
 		 */
-		dev_warn(&hdev->dev,
+		hid_warn(hdev,
 			 "Failed to enable force feedback support, error: %d\n",
 			 error);
 	}
diff --git a/drivers/hid/hid-belkin.c b/drivers/hid/hid-belkin.c
index 4ce7aa3a519f..a1a765a5b08a 100644
--- a/drivers/hid/hid-belkin.c
+++ b/drivers/hid/hid-belkin.c
@@ -56,14 +56,14 @@ static int belkin_probe(struct hid_device *hdev, const struct hid_device_id *id)
 
 	ret = hid_parse(hdev);
 	if (ret) {
-		dev_err(&hdev->dev, "parse failed\n");
+		hid_err(hdev, "parse failed\n");
 		goto err_free;
 	}
 
 	ret = hid_hw_start(hdev, HID_CONNECT_DEFAULT |
 		((quirks & BELKIN_HIDDEV) ? HID_CONNECT_HIDDEV_FORCE : 0));
 	if (ret) {
-		dev_err(&hdev->dev, "hw start failed\n");
+		hid_err(hdev, "hw start failed\n");
 		goto err_free;
 	}
 
diff --git a/drivers/hid/hid-cando.c b/drivers/hid/hid-cando.c
index 5925bdcd417d..375b50929a50 100644
--- a/drivers/hid/hid-cando.c
+++ b/drivers/hid/hid-cando.c
@@ -207,7 +207,7 @@ static int cando_probe(struct hid_device *hdev, const struct hid_device_id *id)
 
 	td = kmalloc(sizeof(struct cando_data), GFP_KERNEL);
 	if (!td) {
-		dev_err(&hdev->dev, "cannot allocate Cando Touch data\n");
+		hid_err(hdev, "cannot allocate Cando Touch data\n");
 		return -ENOMEM;
 	}
 	hid_set_drvdata(hdev, td);
diff --git a/drivers/hid/hid-cherry.c b/drivers/hid/hid-cherry.c
index e880086c2311..888ece68a47c 100644
--- a/drivers/hid/hid-cherry.c
+++ b/drivers/hid/hid-cherry.c
@@ -30,8 +30,7 @@ static __u8 *ch_report_fixup(struct hid_device *hdev, __u8 *rdesc,
 		unsigned int *rsize)
 {
 	if (*rsize >= 17 && rdesc[11] == 0x3c && rdesc[12] == 0x02) {
-		dev_info(&hdev->dev, "fixing up Cherry Cymotion report "
-				"descriptor\n");
+		hid_info(hdev, "fixing up Cherry Cymotion report descriptor\n");
 		rdesc[11] = rdesc[16] = 0xff;
 		rdesc[12] = rdesc[17] = 0x03;
 	}
diff --git a/drivers/hid/hid-core.c b/drivers/hid/hid-core.c
index 396f8c7e0076..1fd5e331fa8d 100644
--- a/drivers/hid/hid-core.c
+++ b/drivers/hid/hid-core.c
@@ -14,6 +14,8 @@
  * any later version.
  */
 
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
 #include <linux/module.h>
 #include <linux/slab.h>
 #include <linux/init.h>
@@ -672,7 +674,8 @@ int hid_parse_report(struct hid_device *device, __u8 *start,
 
 		if (dispatch_type[item.type](parser, &item)) {
 			dbg_hid("item %u %u %u %u parsing failed\n",
-				item.format, (unsigned)item.size, (unsigned)item.type, (unsigned)item.tag);
+				item.format, (unsigned)item.size,
+				(unsigned)item.type, (unsigned)item.tag);
 			goto err;
 		}
 
@@ -737,13 +740,14 @@ static u32 s32ton(__s32 value, unsigned n)
  * Search linux-kernel and linux-usb-devel archives for "hid-core extract".
  */
 
-static __inline__ __u32 extract(__u8 *report, unsigned offset, unsigned n)
+static __inline__ __u32 extract(const struct hid_device *hid, __u8 *report,
+				unsigned offset, unsigned n)
 {
 	u64 x;
 
 	if (n > 32)
-		printk(KERN_WARNING "HID: extract() called with n (%d) > 32! (%s)\n",
-				n, current->comm);
+		hid_warn(hid, "extract() called with n (%d) > 32! (%s)\n",
+			 n, current->comm);
 
 	report += offset >> 3;  /* adjust byte index */
 	offset &= 7;            /* now only need bit offset into one byte */
@@ -760,18 +764,19 @@ static __inline__ __u32 extract(__u8 *report, unsigned offset, unsigned n)
  * endianness of register values by considering a register
  * a "cached" copy of the little endiad bit stream.
  */
-static __inline__ void implement(__u8 *report, unsigned offset, unsigned n, __u32 value)
+static __inline__ void implement(const struct hid_device *hid, __u8 *report,
+				 unsigned offset, unsigned n, __u32 value)
 {
 	u64 x;
 	u64 m = (1ULL << n) - 1;
 
 	if (n > 32)
-		printk(KERN_WARNING "HID: implement() called with n (%d) > 32! (%s)\n",
-				n, current->comm);
+		hid_warn(hid, "%s() called with n (%d) > 32! (%s)\n",
+			 __func__, n, current->comm);
 
 	if (value > m)
-		printk(KERN_WARNING "HID: implement() called with too large value %d! (%s)\n",
-				value, current->comm);
+		hid_warn(hid, "%s() called with too large value %d! (%s)\n",
+			 __func__, value, current->comm);
 	WARN_ON(value > m);
 	value &= m;
 
@@ -892,13 +897,16 @@ static void hid_input_field(struct hid_device *hid, struct hid_field *field,
 
 	for (n = 0; n < count; n++) {
 
-			value[n] = min < 0 ? snto32(extract(data, offset + n * size, size), size) :
-						    extract(data, offset + n * size, size);
+		value[n] = min < 0 ?
+			snto32(extract(hid, data, offset + n * size, size),
+			       size) :
+			extract(hid, data, offset + n * size, size);
 
-			if (!(field->flags & HID_MAIN_ITEM_VARIABLE) /* Ignore report if ErrorRollOver */
-			    && value[n] >= min && value[n] <= max
-			    && field->usage[value[n] - min].hid == HID_UP_KEYBOARD + 1)
-				goto exit;
+		/* Ignore report if ErrorRollOver */
+		if (!(field->flags & HID_MAIN_ITEM_VARIABLE) &&
+		    value[n] >= min && value[n] <= max &&
+		    field->usage[value[n] - min].hid == HID_UP_KEYBOARD + 1)
+			goto exit;
 	}
 
 	for (n = 0; n < count; n++) {
@@ -928,7 +936,8 @@ exit:
  * Output the field into the report.
  */
 
-static void hid_output_field(struct hid_field *field, __u8 *data)
+static void hid_output_field(const struct hid_device *hid,
+			     struct hid_field *field, __u8 *data)
 {
 	unsigned count = field->report_count;
 	unsigned offset = field->report_offset;
@@ -937,9 +946,11 @@ static void hid_output_field(struct hid_field *field, __u8 *data)
 
 	for (n = 0; n < count; n++) {
 		if (field->logical_minimum < 0)	/* signed values */
-			implement(data, offset + n * size, size, s32ton(field->value[n], size));
+			implement(hid, data, offset + n * size, size,
+				  s32ton(field->value[n], size));
 		else				/* unsigned values */
-			implement(data, offset + n * size, size, field->value[n]);
+			implement(hid, data, offset + n * size, size,
+				  field->value[n]);
 	}
 }
 
@@ -956,7 +967,7 @@ void hid_output_report(struct hid_report *report, __u8 *data)
 
 	memset(data, 0, ((report->size - 1) >> 3) + 1);
 	for (n = 0; n < report->maxfield; n++)
-		hid_output_field(report->field[n], data);
+		hid_output_field(report->device, report->field[n], data);
 }
 EXPORT_SYMBOL_GPL(hid_output_report);
 
@@ -1169,8 +1180,7 @@ int hid_connect(struct hid_device *hdev, unsigned int connect_mask)
 		hdev->claimed |= HID_CLAIMED_HIDRAW;
 
 	if (!hdev->claimed) {
-		dev_err(&hdev->dev, "claimed by neither input, hiddev nor "
-				"hidraw\n");
+		hid_err(hdev, "claimed by neither input, hiddev nor hidraw\n");
 		return -ENODEV;
 	}
 
@@ -1210,9 +1220,9 @@ int hid_connect(struct hid_device *hdev, unsigned int connect_mask)
 		bus = "<UNKNOWN>";
 	}
 
-	dev_info(&hdev->dev, "%s: %s HID v%x.%02x %s [%s] on %s\n",
-			buf, bus, hdev->version >> 8, hdev->version & 0xff,
-			type, hdev->name, hdev->phys);
+	hid_info(hdev, "%s: %s HID v%x.%02x %s [%s] on %s\n",
+		 buf, bus, hdev->version >> 8, hdev->version & 0xff,
+		 type, hdev->name, hdev->phys);
 
 	return 0;
 }
@@ -1956,12 +1966,12 @@ static int __init hid_init(void)
 	int ret;
 
 	if (hid_debug)
-		printk(KERN_WARNING "HID: hid_debug is now used solely for parser and driver debugging.\n"
-				"HID: debugfs is now used for inspecting the device (report descriptor, reports)\n");
+		pr_warn("hid_debug is now used solely for parser and driver debugging.\n"
+			"debugfs is now used for inspecting the device (report descriptor, reports)\n");
 
 	ret = bus_register(&hid_bus_type);
 	if (ret) {
-		printk(KERN_ERR "HID: can't register hid bus\n");
+		pr_err("can't register hid bus\n");
 		goto err;
 	}
 
diff --git a/drivers/hid/hid-cypress.c b/drivers/hid/hid-cypress.c
index 4cd0e2345991..2f0be4c66af7 100644
--- a/drivers/hid/hid-cypress.c
+++ b/drivers/hid/hid-cypress.c
@@ -107,13 +107,13 @@ static int cp_probe(struct hid_device *hdev, const struct hid_device_id *id)
 
 	ret = hid_parse(hdev);
 	if (ret) {
-		dev_err(&hdev->dev, "parse failed\n");
+		hid_err(hdev, "parse failed\n");
 		goto err_free;
 	}
 
 	ret = hid_hw_start(hdev, HID_CONNECT_DEFAULT);
 	if (ret) {
-		dev_err(&hdev->dev, "hw start failed\n");
+		hid_err(hdev, "hw start failed\n");
 		goto err_free;
 	}
 
diff --git a/drivers/hid/hid-debug.c b/drivers/hid/hid-debug.c
index 75c5e23d09d2..555382fc7417 100644
--- a/drivers/hid/hid-debug.c
+++ b/drivers/hid/hid-debug.c
@@ -26,6 +26,8 @@
  * Vojtech Pavlik, Simunkova 1594, Prague 8, 182 00 Czech Republic
  */
 
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
 #include <linux/debugfs.h>
 #include <linux/seq_file.h>
 #include <linux/sched.h>
@@ -393,7 +395,7 @@ char *hid_resolv_usage(unsigned usage, struct seq_file *f) {
 
 	buf = resolv_usage_page(usage >> 16, f);
 	if (IS_ERR(buf)) {
-		printk(KERN_ERR "error allocating HID debug buffer\n");
+		pr_err("error allocating HID debug buffer\n");
 		return NULL;
 	}
 
diff --git a/drivers/hid/hid-drff.c b/drivers/hid/hid-drff.c
index 968b04f9b796..afcf3d67eb02 100644
--- a/drivers/hid/hid-drff.c
+++ b/drivers/hid/hid-drff.c
@@ -96,18 +96,18 @@ static int drff_init(struct hid_device *hid)
 	int error;
 
 	if (list_empty(report_list)) {
-		dev_err(&hid->dev, "no output reports found\n");
+		hid_err(hid, "no output reports found\n");
 		return -ENODEV;
 	}
 
 	report = list_first_entry(report_list, struct hid_report, list);
 	if (report->maxfield < 1) {
-		dev_err(&hid->dev, "no fields in the report\n");
+		hid_err(hid, "no fields in the report\n");
 		return -ENODEV;
 	}
 
 	if (report->field[0]->report_count < 7) {
-		dev_err(&hid->dev, "not enough values in the field\n");
+		hid_err(hid, "not enough values in the field\n");
 		return -ENODEV;
 	}
 
@@ -133,8 +133,8 @@ static int drff_init(struct hid_device *hid)
 	drff->report->field[0]->value[6] = 0x00;
 	usbhid_submit_report(hid, drff->report, USB_DIR_OUT);
 
-	dev_info(&hid->dev, "Force Feedback for DragonRise Inc. game "
-	       "controllers by Richard Walmsley <richwalm@gmail.com>\n");
+	hid_info(hid, "Force Feedback for DragonRise Inc. "
+		 "game controllers by Richard Walmsley <richwalm@gmail.com>\n");
 
 	return 0;
 }
@@ -153,13 +153,13 @@ static int dr_probe(struct hid_device *hdev, const struct hid_device_id *id)
 
 	ret = hid_parse(hdev);
 	if (ret) {
-		dev_err(&hdev->dev, "parse failed\n");
+		hid_err(hdev, "parse failed\n");
 		goto err;
 	}
 
 	ret = hid_hw_start(hdev, HID_CONNECT_DEFAULT & ~HID_CONNECT_FF);
 	if (ret) {
-		dev_err(&hdev->dev, "hw start failed\n");
+		hid_err(hdev, "hw start failed\n");
 		goto err;
 	}
 
diff --git a/drivers/hid/hid-egalax.c b/drivers/hid/hid-egalax.c
index 54b017ad258d..bbab6cff3518 100644
--- a/drivers/hid/hid-egalax.c
+++ b/drivers/hid/hid-egalax.c
@@ -223,7 +223,7 @@ static int egalax_probe(struct hid_device *hdev, const struct hid_device_id *id)
 
 	td = kmalloc(sizeof(struct egalax_data), GFP_KERNEL);
 	if (!td) {
-		dev_err(&hdev->dev, "cannot allocate eGalax data\n");
+		hid_err(hdev, "cannot allocate eGalax data\n");
 		return -ENOMEM;
 	}
 	hid_set_drvdata(hdev, td);
diff --git a/drivers/hid/hid-elecom.c b/drivers/hid/hid-elecom.c
index 6e31f305397d..79d0c61e7214 100644
--- a/drivers/hid/hid-elecom.c
+++ b/drivers/hid/hid-elecom.c
@@ -24,8 +24,7 @@ static __u8 *elecom_report_fixup(struct hid_device *hdev, __u8 *rdesc,
 		unsigned int *rsize)
 {
 	if (*rsize >= 48 && rdesc[46] == 0x05 && rdesc[47] == 0x0c) {
-		dev_info(&hdev->dev, "Fixing up Elecom BM084 "
-				"report descriptor.\n");
+		hid_info(hdev, "Fixing up Elecom BM084 report descriptor\n");
 		rdesc[47] = 0x00;
 	}
     return rdesc;
diff --git a/drivers/hid/hid-emsff.c b/drivers/hid/hid-emsff.c
index c61b192d7cff..81877c67caea 100644
--- a/drivers/hid/hid-emsff.c
+++ b/drivers/hid/hid-emsff.c
@@ -68,18 +68,18 @@ static int emsff_init(struct hid_device *hid)
 	int error;
 
 	if (list_empty(report_list)) {
-		dev_err(&hid->dev, "no output reports found\n");
+		hid_err(hid, "no output reports found\n");
 		return -ENODEV;
 	}
 
 	report = list_first_entry(report_list, struct hid_report, list);
 	if (report->maxfield < 1) {
-		dev_err(&hid->dev, "no fields in the report\n");
+		hid_err(hid, "no fields in the report\n");
 		return -ENODEV;
 	}
 
 	if (report->field[0]->report_count < 7) {
-		dev_err(&hid->dev, "not enough values in the field\n");
+		hid_err(hid, "not enough values in the field\n");
 		return -ENODEV;
 	}
 
@@ -105,8 +105,7 @@ static int emsff_init(struct hid_device *hid)
 	emsff->report->field[0]->value[6] = 0x00;
 	usbhid_submit_report(hid, emsff->report, USB_DIR_OUT);
 
-	dev_info(&hid->dev, "force feedback for EMS based devices by "
-	       "Ignaz Forster <ignaz.forster@gmx.de>\n");
+	hid_info(hid, "force feedback for EMS based devices by Ignaz Forster <ignaz.forster@gmx.de>\n");
 
 	return 0;
 }
@@ -117,13 +116,13 @@ static int ems_probe(struct hid_device *hdev, const struct hid_device_id *id)
 
 	ret = hid_parse(hdev);
 	if (ret) {
-		dev_err(&hdev->dev, "parse failed\n");
+		hid_err(hdev, "parse failed\n");
 		goto err;
 	}
 
 	ret = hid_hw_start(hdev, HID_CONNECT_DEFAULT & ~HID_CONNECT_FF);
 	if (ret) {
-		dev_err(&hdev->dev, "hw start failed\n");
+		hid_err(hdev, "hw start failed\n");
 		goto err;
 	}
 
diff --git a/drivers/hid/hid-gaff.c b/drivers/hid/hid-gaff.c
index 88dfcf49a5d7..279ba530003c 100644
--- a/drivers/hid/hid-gaff.c
+++ b/drivers/hid/hid-gaff.c
@@ -87,7 +87,7 @@ static int gaff_init(struct hid_device *hid)
 	int error;
 
 	if (list_empty(report_list)) {
-		dev_err(&hid->dev, "no output reports found\n");
+		hid_err(hid, "no output reports found\n");
 		return -ENODEV;
 	}
 
@@ -95,12 +95,12 @@ static int gaff_init(struct hid_device *hid)
 
 	report = list_entry(report_ptr, struct hid_report, list);
 	if (report->maxfield < 1) {
-		dev_err(&hid->dev, "no fields in the report\n");
+		hid_err(hid, "no fields in the report\n");
 		return -ENODEV;
 	}
 
 	if (report->field[0]->report_count < 6) {
-		dev_err(&hid->dev, "not enough values in the field\n");
+		hid_err(hid, "not enough values in the field\n");
 		return -ENODEV;
 	}
 
@@ -128,8 +128,7 @@ static int gaff_init(struct hid_device *hid)
 
 	usbhid_submit_report(hid, gaff->report, USB_DIR_OUT);
 
-	dev_info(&hid->dev, "Force Feedback for GreenAsia 0x12"
-	       " devices by Lukasz Lubojanski <lukasz@lubojanski.info>\n");
+	hid_info(hid, "Force Feedback for GreenAsia 0x12 devices by Lukasz Lubojanski <lukasz@lubojanski.info>\n");
 
 	return 0;
 }
@@ -148,13 +147,13 @@ static int ga_probe(struct hid_device *hdev, const struct hid_device_id *id)
 
 	ret = hid_parse(hdev);
 	if (ret) {
-		dev_err(&hdev->dev, "parse failed\n");
+		hid_err(hdev, "parse failed\n");
 		goto err;
 	}
 
 	ret = hid_hw_start(hdev, HID_CONNECT_DEFAULT & ~HID_CONNECT_FF);
 	if (ret) {
-		dev_err(&hdev->dev, "hw start failed\n");
+		hid_err(hdev, "hw start failed\n");
 		goto err;
 	}
 
diff --git a/drivers/hid/hid-input.c b/drivers/hid/hid-input.c
index b718f7144ce1..a1a2206714fc 100644
--- a/drivers/hid/hid-input.c
+++ b/drivers/hid/hid-input.c
@@ -826,7 +826,7 @@ int hidinput_connect(struct hid_device *hid, unsigned int force)
 				if (!hidinput || !input_dev) {
 					kfree(hidinput);
 					input_free_device(input_dev);
-					err_hid("Out of memory during hid input probe");
+					hid_err(hid, "Out of memory during hid input probe\n");
 					goto out_unwind;
 				}
 
diff --git a/drivers/hid/hid-kye.c b/drivers/hid/hid-kye.c
index 817247ee006c..f2ba9efc3a53 100644
--- a/drivers/hid/hid-kye.c
+++ b/drivers/hid/hid-kye.c
@@ -32,8 +32,8 @@ static __u8 *kye_report_fixup(struct hid_device *hdev, __u8 *rdesc,
 		rdesc[65] == 0x29 && rdesc[66] == 0x0f &&
 		rdesc[71] == 0x75 && rdesc[72] == 0x08 &&
 		rdesc[73] == 0x95 && rdesc[74] == 0x01) {
-		dev_info(&hdev->dev, "fixing up Kye/Genius Ergo Mouse report "
-				"descriptor\n");
+		hid_info(hdev,
+			 "fixing up Kye/Genius Ergo Mouse report descriptor\n");
 		rdesc[62] = 0x09;
 		rdesc[64] = 0x04;
 		rdesc[66] = 0x07;
diff --git a/drivers/hid/hid-lg.c b/drivers/hid/hid-lg.c
index b629fba5a057..aef4104da141 100644
--- a/drivers/hid/hid-lg.c
+++ b/drivers/hid/hid-lg.c
@@ -53,23 +53,22 @@ static __u8 *lg_report_fixup(struct hid_device *hdev, __u8 *rdesc,
 
 	if ((quirks & LG_RDESC) && *rsize >= 90 && rdesc[83] == 0x26 &&
 			rdesc[84] == 0x8c && rdesc[85] == 0x02) {
-		dev_info(&hdev->dev, "fixing up Logitech keyboard report "
-				"descriptor\n");
+		hid_info(hdev,
+			 "fixing up Logitech keyboard report descriptor\n");
 		rdesc[84] = rdesc[89] = 0x4d;
 		rdesc[85] = rdesc[90] = 0x10;
 	}
 	if ((quirks & LG_RDESC_REL_ABS) && *rsize >= 50 &&
 			rdesc[32] == 0x81 && rdesc[33] == 0x06 &&
 			rdesc[49] == 0x81 && rdesc[50] == 0x06) {
-		dev_info(&hdev->dev, "fixing up rel/abs in Logitech "
-				"report descriptor\n");
+		hid_info(hdev,
+			 "fixing up rel/abs in Logitech report descriptor\n");
 		rdesc[33] = rdesc[50] = 0x02;
 	}
 	if ((quirks & LG_FF4) && *rsize >= 101 &&
 			rdesc[41] == 0x95 && rdesc[42] == 0x0B &&
 			rdesc[47] == 0x05 && rdesc[48] == 0x09) {
-		dev_info(&hdev->dev, "fixing up Logitech Speed Force Wireless "
-			"button descriptor\n");
+		hid_info(hdev, "fixing up Logitech Speed Force Wireless button descriptor\n");
 		rdesc[41] = 0x05;
 		rdesc[42] = 0x09;
 		rdesc[47] = 0x95;
@@ -288,7 +287,7 @@ static int lg_probe(struct hid_device *hdev, const struct hid_device_id *id)
 
 	ret = hid_parse(hdev);
 	if (ret) {
-		dev_err(&hdev->dev, "parse failed\n");
+		hid_err(hdev, "parse failed\n");
 		goto err_free;
 	}
 
@@ -297,7 +296,7 @@ static int lg_probe(struct hid_device *hdev, const struct hid_device_id *id)
 
 	ret = hid_hw_start(hdev, connect_mask);
 	if (ret) {
-		dev_err(&hdev->dev, "hw start failed\n");
+		hid_err(hdev, "hw start failed\n");
 		goto err_free;
 	}
 
diff --git a/drivers/hid/hid-lg2ff.c b/drivers/hid/hid-lg2ff.c
index 4258253c36b3..3c31bc650e5d 100644
--- a/drivers/hid/hid-lg2ff.c
+++ b/drivers/hid/hid-lg2ff.c
@@ -72,18 +72,18 @@ int lg2ff_init(struct hid_device *hid)
 	int error;
 
 	if (list_empty(report_list)) {
-		dev_err(&hid->dev, "no output report found\n");
+		hid_err(hid, "no output report found\n");
 		return -ENODEV;
 	}
 
 	report = list_entry(report_list->next, struct hid_report, list);
 
 	if (report->maxfield < 1) {
-		dev_err(&hid->dev, "output report is empty\n");
+		hid_err(hid, "output report is empty\n");
 		return -ENODEV;
 	}
 	if (report->field[0]->report_count < 7) {
-		dev_err(&hid->dev, "not enough values in the field\n");
+		hid_err(hid, "not enough values in the field\n");
 		return -ENODEV;
 	}
 
@@ -110,8 +110,7 @@ int lg2ff_init(struct hid_device *hid)
 
 	usbhid_submit_report(hid, report, USB_DIR_OUT);
 
-	dev_info(&hid->dev, "Force feedback for Logitech RumblePad/Rumblepad 2 by "
-	       "Anssi Hannula <anssi.hannula@gmail.com>\n");
+	hid_info(hid, "Force feedback for Logitech RumblePad/Rumblepad 2 by Anssi Hannula <anssi.hannula@gmail.com>\n");
 
 	return 0;
 }
diff --git a/drivers/hid/hid-lg3ff.c b/drivers/hid/hid-lg3ff.c
index 4002832ee4af..f98644c26c1d 100644
--- a/drivers/hid/hid-lg3ff.c
+++ b/drivers/hid/hid-lg3ff.c
@@ -141,20 +141,20 @@ int lg3ff_init(struct hid_device *hid)
 
 	/* Find the report to use */
 	if (list_empty(report_list)) {
-		err_hid("No output report found");
+		hid_err(hid, "No output report found\n");
 		return -1;
 	}
 
 	/* Check that the report looks ok */
 	report = list_entry(report_list->next, struct hid_report, list);
 	if (!report) {
-		err_hid("NULL output report");
+		hid_err(hid, "NULL output report\n");
 		return -1;
 	}
 
 	field = report->field[0];
 	if (!field) {
-		err_hid("NULL field");
+		hid_err(hid, "NULL field\n");
 		return -1;
 	}
 
@@ -169,8 +169,7 @@ int lg3ff_init(struct hid_device *hid)
 	if (test_bit(FF_AUTOCENTER, dev->ffbit))
 		dev->ff->set_autocenter = hid_lg3ff_set_autocenter;
 
-	dev_info(&hid->dev, "Force feedback for Logitech Flight System G940 by "
-			"Gary Stein <LordCnidarian@gmail.com>\n");
+	hid_info(hid, "Force feedback for Logitech Flight System G940 by Gary Stein <LordCnidarian@gmail.com>\n");
 	return 0;
 }
 
diff --git a/drivers/hid/hid-lg4ff.c b/drivers/hid/hid-lg4ff.c
index 7eef5a2ce948..fa550c8e1d1b 100644
--- a/drivers/hid/hid-lg4ff.c
+++ b/drivers/hid/hid-lg4ff.c
@@ -101,20 +101,20 @@ int lg4ff_init(struct hid_device *hid)
 
 	/* Find the report to use */
 	if (list_empty(report_list)) {
-		err_hid("No output report found");
+		hid_err(hid, "No output report found\n");
 		return -1;
 	}
 
 	/* Check that the report looks ok */
 	report = list_entry(report_list->next, struct hid_report, list);
 	if (!report) {
-		err_hid("NULL output report");
+		hid_err(hid, "NULL output report\n");
 		return -1;
 	}
 
 	field = report->field[0];
 	if (!field) {
-		err_hid("NULL field");
+		hid_err(hid, "NULL field\n");
 		return -1;
 	}
 
@@ -129,8 +129,7 @@ int lg4ff_init(struct hid_device *hid)
 	if (test_bit(FF_AUTOCENTER, dev->ffbit))
 		dev->ff->set_autocenter = hid_lg4ff_set_autocenter;
 
-	dev_info(&hid->dev, "Force feedback for Logitech Speed Force Wireless by "
-			"Simon Wood <simon@mungewell.org>\n");
+	hid_info(hid, "Force feedback for Logitech Speed Force Wireless by Simon Wood <simon@mungewell.org>\n");
 	return 0;
 }
 
diff --git a/drivers/hid/hid-lgff.c b/drivers/hid/hid-lgff.c
index 61142b76a9b1..90d0ef2c92be 100644
--- a/drivers/hid/hid-lgff.c
+++ b/drivers/hid/hid-lgff.c
@@ -27,6 +27,8 @@
  * e-mail - mail your message to <johann.deneux@it.uu.se>
  */
 
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
 #include <linux/input.h>
 #include <linux/usb.h>
 #include <linux/hid.h>
@@ -146,7 +148,7 @@ int lgff_init(struct hid_device* hid)
 
 	/* Find the report to use */
 	if (list_empty(report_list)) {
-		err_hid("No output report found");
+		hid_err(hid, "No output report found\n");
 		return -1;
 	}
 
@@ -154,7 +156,7 @@ int lgff_init(struct hid_device* hid)
 	report = list_entry(report_list->next, struct hid_report, list);
 	field = report->field[0];
 	if (!field) {
-		err_hid("NULL field");
+		hid_err(hid, "NULL field\n");
 		return -1;
 	}
 
@@ -176,7 +178,7 @@ int lgff_init(struct hid_device* hid)
 	if ( test_bit(FF_AUTOCENTER, dev->ffbit) )
 		dev->ff->set_autocenter = hid_lgff_set_autocenter;
 
-	printk(KERN_INFO "Force feedback for Logitech force feedback devices by Johann Deneux <johann.deneux@it.uu.se>\n");
+	pr_info("Force feedback for Logitech force feedback devices by Johann Deneux <johann.deneux@it.uu.se>\n");
 
 	return 0;
 }
diff --git a/drivers/hid/hid-magicmouse.c b/drivers/hid/hid-magicmouse.c
index e6dc15171664..11306b3d9c56 100644
--- a/drivers/hid/hid-magicmouse.c
+++ b/drivers/hid/hid-magicmouse.c
@@ -12,6 +12,8 @@
  * any later version.
  */
 
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
 #include <linux/device.h>
 #include <linux/hid.h>
 #include <linux/module.h>
@@ -446,7 +448,7 @@ static int magicmouse_probe(struct hid_device *hdev,
 
 	msc = kzalloc(sizeof(*msc), GFP_KERNEL);
 	if (msc == NULL) {
-		dev_err(&hdev->dev, "can't alloc magicmouse descriptor\n");
+		hid_err(hdev, "can't alloc magicmouse descriptor\n");
 		return -ENOMEM;
 	}
 
@@ -459,13 +461,13 @@ static int magicmouse_probe(struct hid_device *hdev,
 
 	ret = hid_parse(hdev);
 	if (ret) {
-		dev_err(&hdev->dev, "magicmouse hid parse failed\n");
+		hid_err(hdev, "magicmouse hid parse failed\n");
 		goto err_free;
 	}
 
 	ret = hid_hw_start(hdev, HID_CONNECT_DEFAULT);
 	if (ret) {
-		dev_err(&hdev->dev, "magicmouse hw start failed\n");
+		hid_err(hdev, "magicmouse hw start failed\n");
 		goto err_free;
 	}
 
@@ -486,7 +488,7 @@ static int magicmouse_probe(struct hid_device *hdev,
 	}
 
 	if (!report) {
-		dev_err(&hdev->dev, "unable to register touch report\n");
+		hid_err(hdev, "unable to register touch report\n");
 		ret = -ENOMEM;
 		goto err_stop_hw;
 	}
@@ -495,8 +497,7 @@ static int magicmouse_probe(struct hid_device *hdev,
 	ret = hdev->hid_output_raw_report(hdev, feature, sizeof(feature),
 			HID_FEATURE_REPORT);
 	if (ret != sizeof(feature)) {
-		dev_err(&hdev->dev, "unable to request touch data (%d)\n",
-				ret);
+		hid_err(hdev, "unable to request touch data (%d)\n", ret);
 		goto err_stop_hw;
 	}
 
@@ -540,7 +541,7 @@ static int __init magicmouse_init(void)
 
 	ret = hid_register_driver(&magicmouse_driver);
 	if (ret)
-		printk(KERN_ERR "can't register magicmouse driver\n");
+		pr_err("can't register magicmouse driver\n");
 
 	return ret;
 }
diff --git a/drivers/hid/hid-microsoft.c b/drivers/hid/hid-microsoft.c
index dc618c33d0a2..0f6fc54dc196 100644
--- a/drivers/hid/hid-microsoft.c
+++ b/drivers/hid/hid-microsoft.c
@@ -40,8 +40,7 @@ static __u8 *ms_report_fixup(struct hid_device *hdev, __u8 *rdesc,
 
 	if ((quirks & MS_RDESC) && *rsize == 571 && rdesc[557] == 0x19 &&
 			rdesc[559] == 0x29) {
-		dev_info(&hdev->dev, "fixing up Microsoft Wireless Receiver "
-				"Model 1028 report descriptor\n");
+		hid_info(hdev, "fixing up Microsoft Wireless Receiver Model 1028 report descriptor\n");
 		rdesc[557] = 0x35;
 		rdesc[559] = 0x45;
 	}
@@ -155,14 +154,14 @@ static int ms_probe(struct hid_device *hdev, const struct hid_device_id *id)
 
 	ret = hid_parse(hdev);
 	if (ret) {
-		dev_err(&hdev->dev, "parse failed\n");
+		hid_err(hdev, "parse failed\n");
 		goto err_free;
 	}
 
 	ret = hid_hw_start(hdev, HID_CONNECT_DEFAULT | ((quirks & MS_HIDINPUT) ?
 				HID_CONNECT_HIDINPUT_FORCE : 0));
 	if (ret) {
-		dev_err(&hdev->dev, "hw start failed\n");
+		hid_err(hdev, "hw start failed\n");
 		goto err_free;
 	}
 
diff --git a/drivers/hid/hid-monterey.c b/drivers/hid/hid-monterey.c
index c95c31e2d869..dedf757781ae 100644
--- a/drivers/hid/hid-monterey.c
+++ b/drivers/hid/hid-monterey.c
@@ -26,8 +26,7 @@ static __u8 *mr_report_fixup(struct hid_device *hdev, __u8 *rdesc,
 		unsigned int *rsize)
 {
 	if (*rsize >= 30 && rdesc[29] == 0x05 && rdesc[30] == 0x09) {
-		dev_info(&hdev->dev, "fixing up button/consumer in HID report "
-				"descriptor\n");
+		hid_info(hdev, "fixing up button/consumer in HID report descriptor\n");
 		rdesc[30] = 0x0c;
 	}
 	return rdesc;
diff --git a/drivers/hid/hid-mosart.c b/drivers/hid/hid-mosart.c
index ac5421d568f1..0668685380d5 100644
--- a/drivers/hid/hid-mosart.c
+++ b/drivers/hid/hid-mosart.c
@@ -199,7 +199,7 @@ static int mosart_probe(struct hid_device *hdev, const struct hid_device_id *id)
 
 	td = kmalloc(sizeof(struct mosart_data), GFP_KERNEL);
 	if (!td) {
-		dev_err(&hdev->dev, "cannot allocate MosArt data\n");
+		hid_err(hdev, "cannot allocate MosArt data\n");
 		return -ENOMEM;
 	}
 	td->valid = false;
diff --git a/drivers/hid/hid-ntrig.c b/drivers/hid/hid-ntrig.c
index 69169efa1e16..beb403421e72 100644
--- a/drivers/hid/hid-ntrig.c
+++ b/drivers/hid/hid-ntrig.c
@@ -130,8 +130,7 @@ static void ntrig_report_version(struct hid_device *hdev)
 	if (ret == 8) {
 		ret = ntrig_version_string(&data[2], buf);
 
-		dev_info(&hdev->dev,
-			 "Firmware version: %s (%02x%02x %02x%02x)\n",
+		hid_info(hdev, "Firmware version: %s (%02x%02x %02x%02x)\n",
 			 buf, data[2], data[3], data[4], data[5]);
 	}
 
@@ -831,7 +830,7 @@ static int ntrig_probe(struct hid_device *hdev, const struct hid_device_id *id)
 
 	nd = kmalloc(sizeof(struct ntrig_data), GFP_KERNEL);
 	if (!nd) {
-		dev_err(&hdev->dev, "cannot allocate N-Trig data\n");
+		hid_err(hdev, "cannot allocate N-Trig data\n");
 		return -ENOMEM;
 	}
 
@@ -850,13 +849,13 @@ static int ntrig_probe(struct hid_device *hdev, const struct hid_device_id *id)
 
 	ret = hid_parse(hdev);
 	if (ret) {
-		dev_err(&hdev->dev, "parse failed\n");
+		hid_err(hdev, "parse failed\n");
 		goto err_free;
 	}
 
 	ret = hid_hw_start(hdev, HID_CONNECT_DEFAULT & ~HID_CONNECT_FF);
 	if (ret) {
-		dev_err(&hdev->dev, "hw start failed\n");
+		hid_err(hdev, "hw start failed\n");
 		goto err_free;
 	}
 
diff --git a/drivers/hid/hid-ortek.c b/drivers/hid/hid-ortek.c
index 2e79716dca31..e90edfc63051 100644
--- a/drivers/hid/hid-ortek.c
+++ b/drivers/hid/hid-ortek.c
@@ -23,8 +23,7 @@ static __u8 *ortek_report_fixup(struct hid_device *hdev, __u8 *rdesc,
 		unsigned int *rsize)
 {
 	if (*rsize >= 56 && rdesc[54] == 0x25 && rdesc[55] == 0x01) {
-		dev_info(&hdev->dev, "Fixing up Ortek WKB-2000 "
-				"report descriptor.\n");
+		hid_info(hdev, "Fixing up Ortek WKB-2000 report descriptor\n");
 		rdesc[55] = 0x92;
 	}
 	return rdesc;
diff --git a/drivers/hid/hid-petalynx.c b/drivers/hid/hid-petalynx.c
index 308d6ae48a3e..f1ea3ff8a98d 100644
--- a/drivers/hid/hid-petalynx.c
+++ b/drivers/hid/hid-petalynx.c
@@ -29,8 +29,7 @@ static __u8 *pl_report_fixup(struct hid_device *hdev, __u8 *rdesc,
 	if (*rsize >= 60 && rdesc[39] == 0x2a && rdesc[40] == 0xf5 &&
 			rdesc[41] == 0x00 && rdesc[59] == 0x26 &&
 			rdesc[60] == 0xf9 && rdesc[61] == 0x00) {
-		dev_info(&hdev->dev, "fixing up Petalynx Maxter Remote report "
-				"descriptor\n");
+		hid_info(hdev, "fixing up Petalynx Maxter Remote report descriptor\n");
 		rdesc[60] = 0xfa;
 		rdesc[40] = 0xfa;
 	}
@@ -77,13 +76,13 @@ static int pl_probe(struct hid_device *hdev, const struct hid_device_id *id)
 
 	ret = hid_parse(hdev);
 	if (ret) {
-		dev_err(&hdev->dev, "parse failed\n");
+		hid_err(hdev, "parse failed\n");
 		goto err_free;
 	}
 
 	ret = hid_hw_start(hdev, HID_CONNECT_DEFAULT);
 	if (ret) {
-		dev_err(&hdev->dev, "hw start failed\n");
+		hid_err(hdev, "hw start failed\n");
 		goto err_free;
 	}
 
diff --git a/drivers/hid/hid-picolcd.c b/drivers/hid/hid-picolcd.c
index 38565fee7bf1..e4ce47156106 100644
--- a/drivers/hid/hid-picolcd.c
+++ b/drivers/hid/hid-picolcd.c
@@ -253,7 +253,7 @@ static struct hid_report *picolcd_report(int id, struct hid_device *hdev, int di
 		if (report->id == id)
 			return report;
 	}
-	dev_warn(&hdev->dev, "No report with id 0x%x found\n", id);
+	hid_warn(hdev, "No report with id 0x%x found\n", id);
 	return NULL;
 }
 
@@ -1329,7 +1329,7 @@ static int picolcd_check_version(struct hid_device *hdev)
 
 	verinfo = picolcd_send_and_wait(hdev, REPORT_VERSION, NULL, 0);
 	if (!verinfo) {
-		dev_err(&hdev->dev, "no version response from PicoLCD");
+		hid_err(hdev, "no version response from PicoLCD\n");
 		return -ENODEV;
 	}
 
@@ -1337,14 +1337,14 @@ static int picolcd_check_version(struct hid_device *hdev)
 		data->version[0] = verinfo->raw_data[1];
 		data->version[1] = verinfo->raw_data[0];
 		if (data->status & PICOLCD_BOOTLOADER) {
-			dev_info(&hdev->dev, "PicoLCD, bootloader version %d.%d\n",
-					verinfo->raw_data[1], verinfo->raw_data[0]);
+			hid_info(hdev, "PicoLCD, bootloader version %d.%d\n",
+				 verinfo->raw_data[1], verinfo->raw_data[0]);
 		} else {
-			dev_info(&hdev->dev, "PicoLCD, firmware version %d.%d\n",
-					verinfo->raw_data[1], verinfo->raw_data[0]);
+			hid_info(hdev, "PicoLCD, firmware version %d.%d\n",
+				 verinfo->raw_data[1], verinfo->raw_data[0]);
 		}
 	} else {
-		dev_err(&hdev->dev, "confused, got unexpected version response from PicoLCD\n");
+		hid_err(hdev, "confused, got unexpected version response from PicoLCD\n");
 		ret = -EINVAL;
 	}
 	kfree(verinfo);
@@ -2328,8 +2328,7 @@ static void picolcd_init_devfs(struct picolcd_data *data,
 			(flash_w ? S_IWUSR : 0) | (flash_r ? S_IRUSR : 0),
 			hdev->debug_dir, data, &picolcd_debug_flash_fops);
 	} else if (flash_r || flash_w)
-		dev_warn(&hdev->dev, "Unexpected FLASH access reports, "
-				"please submit rdesc for review\n");
+		hid_warn(hdev, "Unexpected FLASH access reports, please submit rdesc for review\n");
 }
 
 static void picolcd_exit_devfs(struct picolcd_data *data)
@@ -2457,13 +2456,13 @@ static int picolcd_init_keys(struct picolcd_data *data,
 		return -ENODEV;
 	if (report->maxfield != 1 || report->field[0]->report_count != 2 ||
 			report->field[0]->report_size != 8) {
-		dev_err(&hdev->dev, "unsupported KEY_STATE report");
+		hid_err(hdev, "unsupported KEY_STATE report\n");
 		return -EINVAL;
 	}
 
 	idev = input_allocate_device();
 	if (idev == NULL) {
-		dev_err(&hdev->dev, "failed to allocate input device");
+		hid_err(hdev, "failed to allocate input device\n");
 		return -ENOMEM;
 	}
 	input_set_drvdata(idev, hdev);
@@ -2485,7 +2484,7 @@ static int picolcd_init_keys(struct picolcd_data *data,
 		input_set_capability(idev, EV_KEY, data->keycode[i]);
 	error = input_register_device(idev);
 	if (error) {
-		dev_err(&hdev->dev, "error registering the input device");
+		hid_err(hdev, "error registering the input device\n");
 		input_free_device(idev);
 		return error;
 	}
@@ -2522,9 +2521,8 @@ static int picolcd_probe_lcd(struct hid_device *hdev, struct picolcd_data *data)
 		return error;
 
 	if (data->version[0] != 0 && data->version[1] != 3)
-		dev_info(&hdev->dev, "Device with untested firmware revision, "
-				"please submit /sys/kernel/debug/hid/%s/rdesc for this device.\n",
-				dev_name(&hdev->dev));
+		hid_info(hdev, "Device with untested firmware revision, please submit /sys/kernel/debug/hid/%s/rdesc for this device.\n",
+			 dev_name(&hdev->dev));
 
 	/* Setup keypad input device */
 	error = picolcd_init_keys(data, picolcd_in_report(REPORT_KEY_STATE, hdev));
@@ -2581,9 +2579,8 @@ static int picolcd_probe_bootloader(struct hid_device *hdev, struct picolcd_data
 		return error;
 
 	if (data->version[0] != 1 && data->version[1] != 0)
-		dev_info(&hdev->dev, "Device with untested bootloader revision, "
-				"please submit /sys/kernel/debug/hid/%s/rdesc for this device.\n",
-				dev_name(&hdev->dev));
+		hid_info(hdev, "Device with untested bootloader revision, please submit /sys/kernel/debug/hid/%s/rdesc for this device.\n",
+			 dev_name(&hdev->dev));
 
 	picolcd_init_devfs(data, NULL, NULL,
 			picolcd_out_report(REPORT_BL_READ_MEMORY, hdev),
@@ -2605,7 +2602,7 @@ static int picolcd_probe(struct hid_device *hdev,
 	 */
 	data = kzalloc(sizeof(struct picolcd_data), GFP_KERNEL);
 	if (data == NULL) {
-		dev_err(&hdev->dev, "can't allocate space for Minibox PicoLCD device data\n");
+		hid_err(hdev, "can't allocate space for Minibox PicoLCD device data\n");
 		error = -ENOMEM;
 		goto err_no_cleanup;
 	}
@@ -2621,7 +2618,7 @@ static int picolcd_probe(struct hid_device *hdev,
 	/* Parse the device reports and start it up */
 	error = hid_parse(hdev);
 	if (error) {
-		dev_err(&hdev->dev, "device report parse failed\n");
+		hid_err(hdev, "device report parse failed\n");
 		goto err_cleanup_data;
 	}
 
@@ -2631,25 +2628,25 @@ static int picolcd_probe(struct hid_device *hdev,
 	error = hid_hw_start(hdev, 0);
 	hdev->claimed = 0;
 	if (error) {
-		dev_err(&hdev->dev, "hardware start failed\n");
+		hid_err(hdev, "hardware start failed\n");
 		goto err_cleanup_data;
 	}
 
 	error = hid_hw_open(hdev);
 	if (error) {
-		dev_err(&hdev->dev, "failed to open input interrupt pipe for key and IR events\n");
+		hid_err(hdev, "failed to open input interrupt pipe for key and IR events\n");
 		goto err_cleanup_hid_hw;
 	}
 
 	error = device_create_file(&hdev->dev, &dev_attr_operation_mode_delay);
 	if (error) {
-		dev_err(&hdev->dev, "failed to create sysfs attributes\n");
+		hid_err(hdev, "failed to create sysfs attributes\n");
 		goto err_cleanup_hid_ll;
 	}
 
 	error = device_create_file(&hdev->dev, &dev_attr_operation_mode);
 	if (error) {
-		dev_err(&hdev->dev, "failed to create sysfs attributes\n");
+		hid_err(hdev, "failed to create sysfs attributes\n");
 		goto err_cleanup_sysfs1;
 	}
 
diff --git a/drivers/hid/hid-pl.c b/drivers/hid/hid-pl.c
index 9f41e2bd8483..06e5300d43d2 100644
--- a/drivers/hid/hid-pl.c
+++ b/drivers/hid/hid-pl.c
@@ -103,7 +103,7 @@ static int plff_init(struct hid_device *hid)
 	*/
 
 	if (list_empty(report_list)) {
-		dev_err(&hid->dev, "no output reports found\n");
+		hid_err(hid, "no output reports found\n");
 		return -ENODEV;
 	}
 
@@ -112,14 +112,13 @@ static int plff_init(struct hid_device *hid)
 		report_ptr = report_ptr->next;
 
 		if (report_ptr == report_list) {
-			dev_err(&hid->dev, "required output report is "
-					"missing\n");
+			hid_err(hid, "required output report is missing\n");
 			return -ENODEV;
 		}
 
 		report = list_entry(report_ptr, struct hid_report, list);
 		if (report->maxfield < 1) {
-			dev_err(&hid->dev, "no fields in the report\n");
+			hid_err(hid, "no fields in the report\n");
 			return -ENODEV;
 		}
 
@@ -137,7 +136,7 @@ static int plff_init(struct hid_device *hid)
 			weak = &report->field[3]->value[0];
 			debug("detected 4-field device");
 		} else {
-			dev_err(&hid->dev, "not enough fields or values\n");
+			hid_err(hid, "not enough fields or values\n");
 			return -ENODEV;
 		}
 
@@ -164,8 +163,7 @@ static int plff_init(struct hid_device *hid)
 		usbhid_submit_report(hid, plff->report, USB_DIR_OUT);
 	}
 
-	dev_info(&hid->dev, "Force feedback for PantherLord/GreenAsia "
-	       "devices by Anssi Hannula <anssi.hannula@gmail.com>\n");
+	hid_info(hid, "Force feedback for PantherLord/GreenAsia devices by Anssi Hannula <anssi.hannula@gmail.com>\n");
 
 	return 0;
 }
@@ -185,13 +183,13 @@ static int pl_probe(struct hid_device *hdev, const struct hid_device_id *id)
 
 	ret = hid_parse(hdev);
 	if (ret) {
-		dev_err(&hdev->dev, "parse failed\n");
+		hid_err(hdev, "parse failed\n");
 		goto err;
 	}
 
 	ret = hid_hw_start(hdev, HID_CONNECT_DEFAULT & ~HID_CONNECT_FF);
 	if (ret) {
-		dev_err(&hdev->dev, "hw start failed\n");
+		hid_err(hdev, "hw start failed\n");
 		goto err;
 	}
 
diff --git a/drivers/hid/hid-prodikeys.c b/drivers/hid/hid-prodikeys.c
index 5f0fa6c1bf87..ab19f2905d27 100644
--- a/drivers/hid/hid-prodikeys.c
+++ b/drivers/hid/hid-prodikeys.c
@@ -16,6 +16,8 @@
  * any later version.
  */
 
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
 #include <linux/device.h>
 #include <linux/module.h>
 #include <linux/usb.h>
@@ -285,11 +287,11 @@ static int pcmidi_get_output_report(struct pcmidi_snd *pm)
 			continue;
 
 		if (report->maxfield < 1) {
-			dev_err(&hdev->dev, "output report is empty\n");
+			hid_err(hdev, "output report is empty\n");
 			break;
 		}
 		if (report->field[0]->report_count != 2) {
-			dev_err(&hdev->dev, "field count too low\n");
+			hid_err(hdev, "field count too low\n");
 			break;
 		}
 		pm->pcmidi_report6 = report;
@@ -746,8 +748,8 @@ static __u8 *pk_report_fixup(struct hid_device *hdev, __u8 *rdesc,
 	if (*rsize == 178 &&
 	      rdesc[111] == 0x06 && rdesc[112] == 0x00 &&
 	      rdesc[113] == 0xff) {
-		dev_info(&hdev->dev, "fixing up pc-midi keyboard report "
-			"descriptor\n");
+		hid_info(hdev,
+			 "fixing up pc-midi keyboard report descriptor\n");
 
 		rdesc[144] = 0x18; /* report 4: was 0x10 report count */
 	}
@@ -805,7 +807,7 @@ static int pk_probe(struct hid_device *hdev, const struct hid_device_id *id)
 
 	pk = kzalloc(sizeof(*pk), GFP_KERNEL);
 	if (pk == NULL) {
-		dev_err(&hdev->dev, "prodikeys: can't alloc descriptor\n");
+		hid_err(hdev, "can't alloc descriptor\n");
 		return -ENOMEM;
 	}
 
@@ -813,8 +815,7 @@ static int pk_probe(struct hid_device *hdev, const struct hid_device_id *id)
 
 	pm = kzalloc(sizeof(*pm), GFP_KERNEL);
 	if (pm == NULL) {
-		dev_err(&hdev->dev,
-			"prodikeys: can't alloc descriptor\n");
+		hid_err(hdev, "can't alloc descriptor\n");
 		ret = -ENOMEM;
 		goto err_free;
 	}
@@ -827,7 +828,7 @@ static int pk_probe(struct hid_device *hdev, const struct hid_device_id *id)
 
 	ret = hid_parse(hdev);
 	if (ret) {
-		dev_err(&hdev->dev, "prodikeys: hid parse failed\n");
+		hid_err(hdev, "hid parse failed\n");
 		goto err_free;
 	}
 
@@ -837,7 +838,7 @@ static int pk_probe(struct hid_device *hdev, const struct hid_device_id *id)
 
 	ret = hid_hw_start(hdev, HID_CONNECT_DEFAULT);
 	if (ret) {
-		dev_err(&hdev->dev, "prodikeys: hw start failed\n");
+		hid_err(hdev, "hw start failed\n");
 		goto err_free;
 	}
 
@@ -896,7 +897,7 @@ static int pk_init(void)
 
 	ret = hid_register_driver(&pk_driver);
 	if (ret)
-		printk(KERN_ERR "can't register prodikeys driver\n");
+		pr_err("can't register prodikeys driver\n");
 
 	return ret;
 }
diff --git a/drivers/hid/hid-quanta.c b/drivers/hid/hid-quanta.c
index 54d3db50605b..87a54df4d4ac 100644
--- a/drivers/hid/hid-quanta.c
+++ b/drivers/hid/hid-quanta.c
@@ -195,7 +195,7 @@ static int quanta_probe(struct hid_device *hdev, const struct hid_device_id *id)
 
 	td = kmalloc(sizeof(struct quanta_data), GFP_KERNEL);
 	if (!td) {
-		dev_err(&hdev->dev, "cannot allocate Quanta Touch data\n");
+		hid_err(hdev, "cannot allocate Quanta Touch data\n");
 		return -ENOMEM;
 	}
 	td->valid = false;
diff --git a/drivers/hid/hid-roccat-kone.c b/drivers/hid/hid-roccat-kone.c
index f77695762cb5..73199de2e37f 100644
--- a/drivers/hid/hid-roccat-kone.c
+++ b/drivers/hid/hid-roccat-kone.c
@@ -90,8 +90,7 @@ static int kone_check_write(struct usb_device *usb_dev)
 		kfree(data);
 		return 0;
 	} else { /* unknown answer */
-		dev_err(&usb_dev->dev, "got retval %d when checking write\n",
-				*data);
+		hid_err(usb_dev, "got retval %d when checking write\n", *data);
 		kfree(data);
 		return -EIO;
 	}
@@ -556,7 +555,7 @@ static ssize_t kone_sysfs_set_tcu(struct device *dev,
 
 		retval = kone_set_settings(usb_dev, &kone->settings);
 		if (retval) {
-			dev_err(&usb_dev->dev, "couldn't set tcu state\n");
+			hid_err(usb_dev, "couldn't set tcu state\n");
 			/*
 			 * try to reread valid settings into buffer overwriting
 			 * first error code
@@ -570,7 +569,7 @@ static ssize_t kone_sysfs_set_tcu(struct device *dev,
 
 	retval = size;
 exit_no_settings:
-	dev_err(&usb_dev->dev, "couldn't read settings\n");
+	hid_err(usb_dev, "couldn't read settings\n");
 exit_unlock:
 	mutex_unlock(&kone->kone_lock);
 	return retval;
@@ -818,21 +817,20 @@ static int kone_init_specials(struct hid_device *hdev)
 
 		kone = kzalloc(sizeof(*kone), GFP_KERNEL);
 		if (!kone) {
-			dev_err(&hdev->dev, "can't alloc device descriptor\n");
+			hid_err(hdev, "can't alloc device descriptor\n");
 			return -ENOMEM;
 		}
 		hid_set_drvdata(hdev, kone);
 
 		retval = kone_init_kone_device_struct(usb_dev, kone);
 		if (retval) {
-			dev_err(&hdev->dev,
-					"couldn't init struct kone_device\n");
+			hid_err(hdev, "couldn't init struct kone_device\n");
 			goto exit_free;
 		}
 
 		retval = roccat_connect(hdev);
 		if (retval < 0) {
-			dev_err(&hdev->dev, "couldn't init char dev\n");
+			hid_err(hdev, "couldn't init char dev\n");
 			/* be tolerant about not getting chrdev */
 		} else {
 			kone->roccat_claimed = 1;
@@ -841,7 +839,7 @@ static int kone_init_specials(struct hid_device *hdev)
 
 		retval = kone_create_sysfs_attributes(intf);
 		if (retval) {
-			dev_err(&hdev->dev, "cannot create sysfs files\n");
+			hid_err(hdev, "cannot create sysfs files\n");
 			goto exit_free;
 		}
 	} else {
@@ -876,19 +874,19 @@ static int kone_probe(struct hid_device *hdev, const struct hid_device_id *id)
 
 	retval = hid_parse(hdev);
 	if (retval) {
-		dev_err(&hdev->dev, "parse failed\n");
+		hid_err(hdev, "parse failed\n");
 		goto exit;
 	}
 
 	retval = hid_hw_start(hdev, HID_CONNECT_DEFAULT);
 	if (retval) {
-		dev_err(&hdev->dev, "hw start failed\n");
+		hid_err(hdev, "hw start failed\n");
 		goto exit;
 	}
 
 	retval = kone_init_specials(hdev);
 	if (retval) {
-		dev_err(&hdev->dev, "couldn't install mouse\n");
+		hid_err(hdev, "couldn't install mouse\n");
 		goto exit_stop;
 	}
 
diff --git a/drivers/hid/hid-roccat-pyra.c b/drivers/hid/hid-roccat-pyra.c
index 9bf23047892a..e7b4affe2664 100644
--- a/drivers/hid/hid-roccat-pyra.c
+++ b/drivers/hid/hid-roccat-pyra.c
@@ -87,9 +87,8 @@ static int pyra_receive_control_status(struct usb_device *usb_dev)
 			control.value == 1)
 			return 0;
 	else {
-		dev_err(&usb_dev->dev, "receive control status: "
-				"unknown response 0x%x 0x%x\n",
-				control.request, control.value);
+		hid_err(usb_dev, "receive control status: unknown response 0x%x 0x%x\n",
+			control.request, control.value);
 		return -EINVAL;
 	}
 }
@@ -770,21 +769,20 @@ static int pyra_init_specials(struct hid_device *hdev)
 
 		pyra = kzalloc(sizeof(*pyra), GFP_KERNEL);
 		if (!pyra) {
-			dev_err(&hdev->dev, "can't alloc device descriptor\n");
+			hid_err(hdev, "can't alloc device descriptor\n");
 			return -ENOMEM;
 		}
 		hid_set_drvdata(hdev, pyra);
 
 		retval = pyra_init_pyra_device_struct(usb_dev, pyra);
 		if (retval) {
-			dev_err(&hdev->dev,
-					"couldn't init struct pyra_device\n");
+			hid_err(hdev, "couldn't init struct pyra_device\n");
 			goto exit_free;
 		}
 
 		retval = roccat_connect(hdev);
 		if (retval < 0) {
-			dev_err(&hdev->dev, "couldn't init char dev\n");
+			hid_err(hdev, "couldn't init char dev\n");
 		} else {
 			pyra->chrdev_minor = retval;
 			pyra->roccat_claimed = 1;
@@ -792,7 +790,7 @@ static int pyra_init_specials(struct hid_device *hdev)
 
 		retval = pyra_create_sysfs_attributes(intf);
 		if (retval) {
-			dev_err(&hdev->dev, "cannot create sysfs files\n");
+			hid_err(hdev, "cannot create sysfs files\n");
 			goto exit_free;
 		}
 	} else {
@@ -826,19 +824,19 @@ static int pyra_probe(struct hid_device *hdev, const struct hid_device_id *id)
 
 	retval = hid_parse(hdev);
 	if (retval) {
-		dev_err(&hdev->dev, "parse failed\n");
+		hid_err(hdev, "parse failed\n");
 		goto exit;
 	}
 
 	retval = hid_hw_start(hdev, HID_CONNECT_DEFAULT);
 	if (retval) {
-		dev_err(&hdev->dev, "hw start failed\n");
+		hid_err(hdev, "hw start failed\n");
 		goto exit;
 	}
 
 	retval = pyra_init_specials(hdev);
 	if (retval) {
-		dev_err(&hdev->dev, "couldn't install mouse\n");
+		hid_err(hdev, "couldn't install mouse\n");
 		goto exit_stop;
 	}
 	return 0;
diff --git a/drivers/hid/hid-roccat.c b/drivers/hid/hid-roccat.c
index a9d9b29ff47f..4bc7a93dc1f5 100644
--- a/drivers/hid/hid-roccat.c
+++ b/drivers/hid/hid-roccat.c
@@ -21,6 +21,8 @@
  * It is inspired by hidraw, but uses only one circular buffer for all readers.
  */
 
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
 #include <linux/cdev.h>
 #include <linux/poll.h>
 #include <linux/sched.h>
@@ -165,8 +167,7 @@ static int roccat_open(struct inode *inode, struct file *file)
 	mutex_lock(&device->readers_lock);
 
 	if (!device) {
-		printk(KERN_EMERG "roccat device with minor %d doesn't exist\n",
-				minor);
+		pr_emerg("roccat device with minor %d doesn't exist\n", minor);
 		error = -ENODEV;
 		goto exit_err;
 	}
@@ -214,8 +215,7 @@ static int roccat_release(struct inode *inode, struct file *file)
 	device = devices[minor];
 	if (!device) {
 		mutex_unlock(&devices_lock);
-		printk(KERN_EMERG "roccat device with minor %d doesn't exist\n",
-				minor);
+		pr_emerg("roccat device with minor %d doesn't exist\n", minor);
 		return -ENODEV;
 	}
 
@@ -392,7 +392,7 @@ static int __init roccat_init(void)
 	roccat_major = MAJOR(dev_id);
 
 	if (retval < 0) {
-		printk(KERN_WARNING "roccat: can't get major number\n");
+		pr_warn("can't get major number\n");
 		return retval;
 	}
 
diff --git a/drivers/hid/hid-samsung.c b/drivers/hid/hid-samsung.c
index 35894444e000..3c1fd8af5e0c 100644
--- a/drivers/hid/hid-samsung.c
+++ b/drivers/hid/hid-samsung.c
@@ -57,8 +57,8 @@
 static inline void samsung_irda_dev_trace(struct hid_device *hdev,
 		unsigned int rsize)
 {
-	dev_info(&hdev->dev, "fixing up Samsung IrDA %d byte report "
-			"descriptor\n", rsize);
+	hid_info(hdev, "fixing up Samsung IrDA %d byte report descriptor\n",
+		 rsize);
 }
 
 static __u8 *samsung_irda_report_fixup(struct hid_device *hdev, __u8 *rdesc,
@@ -160,7 +160,7 @@ static int samsung_probe(struct hid_device *hdev,
 
 	ret = hid_parse(hdev);
 	if (ret) {
-		dev_err(&hdev->dev, "parse failed\n");
+		hid_err(hdev, "parse failed\n");
 		goto err_free;
 	}
 
@@ -174,7 +174,7 @@ static int samsung_probe(struct hid_device *hdev,
 
 	ret = hid_hw_start(hdev, cmask);
 	if (ret) {
-		dev_err(&hdev->dev, "hw start failed\n");
+		hid_err(hdev, "hw start failed\n");
 		goto err_free;
 	}
 
diff --git a/drivers/hid/hid-sjoy.c b/drivers/hid/hid-sjoy.c
index e10a7687ebf2..16f7cafc9695 100644
--- a/drivers/hid/hid-sjoy.c
+++ b/drivers/hid/hid-sjoy.c
@@ -74,26 +74,25 @@ static int sjoyff_init(struct hid_device *hid)
 	int error;
 
 	if (list_empty(report_list)) {
-		dev_err(&hid->dev, "no output reports found\n");
+		hid_err(hid, "no output reports found\n");
 		return -ENODEV;
 	}
 
 	report_ptr = report_ptr->next;
 
 	if (report_ptr == report_list) {
-		dev_err(&hid->dev, "required output report is "
-				"missing\n");
+		hid_err(hid, "required output report is missing\n");
 		return -ENODEV;
 	}
 
 	report = list_entry(report_ptr, struct hid_report, list);
 	if (report->maxfield < 1) {
-		dev_err(&hid->dev, "no fields in the report\n");
+		hid_err(hid, "no fields in the report\n");
 		return -ENODEV;
 	}
 
 	if (report->field[0]->report_count < 3) {
-		dev_err(&hid->dev, "not enough values in the field\n");
+		hid_err(hid, "not enough values in the field\n");
 		return -ENODEV;
 	}
 
@@ -117,8 +116,7 @@ static int sjoyff_init(struct hid_device *hid)
 	sjoyff->report->field[0]->value[2] = 0x00;
 	usbhid_submit_report(hid, sjoyff->report, USB_DIR_OUT);
 
-	dev_info(&hid->dev,
-		"Force feedback for SmartJoy PLUS PS2/USB adapter\n");
+	hid_info(hid, "Force feedback for SmartJoy PLUS PS2/USB adapter\n");
 
 	return 0;
 }
@@ -135,13 +133,13 @@ static int sjoy_probe(struct hid_device *hdev, const struct hid_device_id *id)
 
 	ret = hid_parse(hdev);
 	if (ret) {
-		dev_err(&hdev->dev, "parse failed\n");
+		hid_err(hdev, "parse failed\n");
 		goto err;
 	}
 
 	ret = hid_hw_start(hdev, HID_CONNECT_DEFAULT & ~HID_CONNECT_FF);
 	if (ret) {
-		dev_err(&hdev->dev, "hw start failed\n");
+		hid_err(hdev, "hw start failed\n");
 		goto err;
 	}
 
diff --git a/drivers/hid/hid-sony.c b/drivers/hid/hid-sony.c
index 677bb3da10e8..68d7b36e31e4 100644
--- a/drivers/hid/hid-sony.c
+++ b/drivers/hid/hid-sony.c
@@ -40,8 +40,7 @@ static __u8 *sony_report_fixup(struct hid_device *hdev, __u8 *rdesc,
 
 	if ((sc->quirks & VAIO_RDESC_CONSTANT) &&
 			*rsize >= 56 && rdesc[54] == 0x81 && rdesc[55] == 0x07) {
-		dev_info(&hdev->dev, "Fixing up Sony Vaio VGX report "
-				"descriptor\n");
+		hid_info(hdev, "Fixing up Sony Vaio VGX report descriptor\n");
 		rdesc[55] = 0x06;
 	}
 	return rdesc;
@@ -89,7 +88,7 @@ static int sixaxis_set_operational_usb(struct hid_device *hdev)
 				 (3 << 8) | 0xf2, ifnum, buf, 17,
 				 USB_CTRL_GET_TIMEOUT);
 	if (ret < 0)
-		dev_err(&hdev->dev, "can't set operational mode\n");
+		hid_err(hdev, "can't set operational mode\n");
 
 	kfree(buf);
 
@@ -110,7 +109,7 @@ static int sony_probe(struct hid_device *hdev, const struct hid_device_id *id)
 
 	sc = kzalloc(sizeof(*sc), GFP_KERNEL);
 	if (sc == NULL) {
-		dev_err(&hdev->dev, "can't alloc sony descriptor\n");
+		hid_err(hdev, "can't alloc sony descriptor\n");
 		return -ENOMEM;
 	}
 
@@ -119,14 +118,14 @@ static int sony_probe(struct hid_device *hdev, const struct hid_device_id *id)
 
 	ret = hid_parse(hdev);
 	if (ret) {
-		dev_err(&hdev->dev, "parse failed\n");
+		hid_err(hdev, "parse failed\n");
 		goto err_free;
 	}
 
 	ret = hid_hw_start(hdev, HID_CONNECT_DEFAULT |
 			HID_CONNECT_HIDDEV_FORCE);
 	if (ret) {
-		dev_err(&hdev->dev, "hw start failed\n");
+		hid_err(hdev, "hw start failed\n");
 		goto err_free;
 	}
 
diff --git a/drivers/hid/hid-stantum.c b/drivers/hid/hid-stantum.c
index 3171be28c3d5..b2be1d11916b 100644
--- a/drivers/hid/hid-stantum.c
+++ b/drivers/hid/hid-stantum.c
@@ -222,7 +222,7 @@ static int stantum_probe(struct hid_device *hdev,
 
 	sd = kmalloc(sizeof(struct stantum_data), GFP_KERNEL);
 	if (!sd) {
-		dev_err(&hdev->dev, "cannot allocate Stantum data\n");
+		hid_err(hdev, "cannot allocate Stantum data\n");
 		return -ENOMEM;
 	}
 	sd->valid = false;
diff --git a/drivers/hid/hid-sunplus.c b/drivers/hid/hid-sunplus.c
index 164ed568f6cf..d484a0043dd4 100644
--- a/drivers/hid/hid-sunplus.c
+++ b/drivers/hid/hid-sunplus.c
@@ -27,8 +27,7 @@ static __u8 *sp_report_fixup(struct hid_device *hdev, __u8 *rdesc,
 {
 	if (*rsize >= 107 && rdesc[104] == 0x26 && rdesc[105] == 0x80 &&
 			rdesc[106] == 0x03) {
-		dev_info(&hdev->dev, "fixing up Sunplus Wireless Desktop "
-				"report descriptor\n");
+		hid_info(hdev, "fixing up Sunplus Wireless Desktop report descriptor\n");
 		rdesc[105] = rdesc[110] = 0x03;
 		rdesc[106] = rdesc[111] = 0x21;
 	}
diff --git a/drivers/hid/hid-tmff.c b/drivers/hid/hid-tmff.c
index 15434c814793..356a98fcb365 100644
--- a/drivers/hid/hid-tmff.c
+++ b/drivers/hid/hid-tmff.c
@@ -151,28 +151,23 @@ static int tmff_init(struct hid_device *hid, const signed short *ff_bits)
 			switch (field->usage[0].hid) {
 			case THRUSTMASTER_USAGE_FF:
 				if (field->report_count < 2) {
-					dev_warn(&hid->dev, "ignoring FF field "
-						"with report_count < 2\n");
+					hid_warn(hid, "ignoring FF field with report_count < 2\n");
 					continue;
 				}
 
 				if (field->logical_maximum ==
 						field->logical_minimum) {
-					dev_warn(&hid->dev, "ignoring FF field "
-							"with logical_maximum "
-							"== logical_minimum\n");
+					hid_warn(hid, "ignoring FF field with logical_maximum == logical_minimum\n");
 					continue;
 				}
 
 				if (tmff->report && tmff->report != report) {
-					dev_warn(&hid->dev, "ignoring FF field "
-							"in other report\n");
+					hid_warn(hid, "ignoring FF field in other report\n");
 					continue;
 				}
 
 				if (tmff->ff_field && tmff->ff_field != field) {
-					dev_warn(&hid->dev, "ignoring "
-							"duplicate FF field\n");
+					hid_warn(hid, "ignoring duplicate FF field\n");
 					continue;
 				}
 
@@ -185,16 +180,15 @@ static int tmff_init(struct hid_device *hid, const signed short *ff_bits)
 				break;
 
 			default:
-				dev_warn(&hid->dev, "ignoring unknown output "
-						"usage %08x\n",
-						field->usage[0].hid);
+				hid_warn(hid, "ignoring unknown output usage %08x\n",
+					 field->usage[0].hid);
 				continue;
 			}
 		}
 	}
 
 	if (!tmff->report) {
-		dev_err(&hid->dev, "can't find FF field in output reports\n");
+		hid_err(hid, "can't find FF field in output reports\n");
 		error = -ENODEV;
 		goto fail;
 	}
@@ -203,8 +197,7 @@ static int tmff_init(struct hid_device *hid, const signed short *ff_bits)
 	if (error)
 		goto fail;
 
-	dev_info(&hid->dev, "force feedback for ThrustMaster devices by Zinx "
-			"Verituse <zinx@epicsol.org>");
+	hid_info(hid, "force feedback for ThrustMaster devices by Zinx Verituse <zinx@epicsol.org>\n");
 	return 0;
 
 fail:
@@ -224,13 +217,13 @@ static int tm_probe(struct hid_device *hdev, const struct hid_device_id *id)
 
 	ret = hid_parse(hdev);
 	if (ret) {
-		dev_err(&hdev->dev, "parse failed\n");
+		hid_err(hdev, "parse failed\n");
 		goto err;
 	}
 
 	ret = hid_hw_start(hdev, HID_CONNECT_DEFAULT & ~HID_CONNECT_FF);
 	if (ret) {
-		dev_err(&hdev->dev, "hw start failed\n");
+		hid_err(hdev, "hw start failed\n");
 		goto err;
 	}
 
diff --git a/drivers/hid/hid-wacom.c b/drivers/hid/hid-wacom.c
index 94caae731381..06888323828c 100644
--- a/drivers/hid/hid-wacom.c
+++ b/drivers/hid/hid-wacom.c
@@ -18,6 +18,8 @@
  * any later version.
  */
 
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
 #include <linux/device.h>
 #include <linux/hid.h>
 #include <linux/module.h>
@@ -141,8 +143,8 @@ static void wacom_poke(struct hid_device *hdev, u8 speed)
 	 * Note that if the raw queries fail, it's not a hard failure and it
 	 * is safe to continue
 	 */
-	dev_warn(&hdev->dev, "failed to poke device, command %d, err %d\n",
-				rep_data[0], ret);
+	hid_warn(hdev, "failed to poke device, command %d, err %d\n",
+		 rep_data[0], ret);
 	return;
 }
 
@@ -312,7 +314,7 @@ static int wacom_probe(struct hid_device *hdev,
 
 	wdata = kzalloc(sizeof(*wdata), GFP_KERNEL);
 	if (wdata == NULL) {
-		dev_err(&hdev->dev, "can't alloc wacom descriptor\n");
+		hid_err(hdev, "can't alloc wacom descriptor\n");
 		return -ENOMEM;
 	}
 
@@ -321,20 +323,20 @@ static int wacom_probe(struct hid_device *hdev,
 	/* Parse the HID report now */
 	ret = hid_parse(hdev);
 	if (ret) {
-		dev_err(&hdev->dev, "parse failed\n");
+		hid_err(hdev, "parse failed\n");
 		goto err_free;
 	}
 
 	ret = hid_hw_start(hdev, HID_CONNECT_DEFAULT);
 	if (ret) {
-		dev_err(&hdev->dev, "hw start failed\n");
+		hid_err(hdev, "hw start failed\n");
 		goto err_free;
 	}
 
 	ret = device_create_file(&hdev->dev, &dev_attr_speed);
 	if (ret)
-		dev_warn(&hdev->dev,
-			"can't create sysfs speed attribute err: %d\n", ret);
+		hid_warn(hdev,
+			 "can't create sysfs speed attribute err: %d\n", ret);
 
 	/* Set Wacom mode 2 with high reporting speed */
 	wacom_poke(hdev, 1);
@@ -349,8 +351,8 @@ static int wacom_probe(struct hid_device *hdev,
 
 	ret = power_supply_register(&hdev->dev, &wdata->battery);
 	if (ret) {
-		dev_warn(&hdev->dev,
-			"can't create sysfs battery attribute, err: %d\n", ret);
+		hid_warn(hdev, "can't create sysfs battery attribute, err: %d\n",
+			 ret);
 		/*
 		 * battery attribute is not critical for the tablet, but if it
 		 * failed then there is no need to create ac attribute
@@ -367,8 +369,8 @@ static int wacom_probe(struct hid_device *hdev,
 
 	ret = power_supply_register(&hdev->dev, &wdata->ac);
 	if (ret) {
-		dev_warn(&hdev->dev,
-			"can't create ac battery attribute, err: %d\n", ret);
+		hid_warn(hdev,
+			 "can't create ac battery attribute, err: %d\n", ret);
 		/*
 		 * ac attribute is not critical for the tablet, but if it
 		 * failed then we don't want to battery attribute to exist
@@ -454,7 +456,7 @@ static int __init wacom_init(void)
 
 	ret = hid_register_driver(&wacom_driver);
 	if (ret)
-		printk(KERN_ERR "can't register wacom driver\n");
+		pr_err("can't register wacom driver\n");
 	return ret;
 }
 
diff --git a/drivers/hid/hid-zpff.c b/drivers/hid/hid-zpff.c
index b7acceabba80..f31fab012f2f 100644
--- a/drivers/hid/hid-zpff.c
+++ b/drivers/hid/hid-zpff.c
@@ -75,14 +75,14 @@ static int zpff_init(struct hid_device *hid)
 	int error;
 
 	if (list_empty(report_list)) {
-		dev_err(&hid->dev, "no output report found\n");
+		hid_err(hid, "no output report found\n");
 		return -ENODEV;
 	}
 
 	report = list_entry(report_list->next, struct hid_report, list);
 
 	if (report->maxfield < 4) {
-		dev_err(&hid->dev, "not enough fields in report\n");
+		hid_err(hid, "not enough fields in report\n");
 		return -ENODEV;
 	}
 
@@ -105,8 +105,7 @@ static int zpff_init(struct hid_device *hid)
 	zpff->report->field[3]->value[0] = 0x00;
 	usbhid_submit_report(hid, zpff->report, USB_DIR_OUT);
 
-	dev_info(&hid->dev, "force feedback for Zeroplus based devices by "
-	       "Anssi Hannula <anssi.hannula@gmail.com>\n");
+	hid_info(hid, "force feedback for Zeroplus based devices by Anssi Hannula <anssi.hannula@gmail.com>\n");
 
 	return 0;
 }
@@ -123,13 +122,13 @@ static int zp_probe(struct hid_device *hdev, const struct hid_device_id *id)
 
 	ret = hid_parse(hdev);
 	if (ret) {
-		dev_err(&hdev->dev, "parse failed\n");
+		hid_err(hdev, "parse failed\n");
 		goto err;
 	}
 
 	ret = hid_hw_start(hdev, HID_CONNECT_DEFAULT & ~HID_CONNECT_FF);
 	if (ret) {
-		dev_err(&hdev->dev, "hw start failed\n");
+		hid_err(hdev, "hw start failed\n");
 		goto err;
 	}
 
diff --git a/drivers/hid/hid-zydacron.c b/drivers/hid/hid-zydacron.c
index aac1f9273149..e90371508fd2 100644
--- a/drivers/hid/hid-zydacron.c
+++ b/drivers/hid/hid-zydacron.c
@@ -34,9 +34,8 @@ static __u8 *zc_report_fixup(struct hid_device *hdev, __u8 *rdesc,
 		rdesc[0x96] == 0xbc && rdesc[0x97] == 0xff &&
 		rdesc[0xca] == 0xbc && rdesc[0xcb] == 0xff &&
 		rdesc[0xe1] == 0xbc && rdesc[0xe2] == 0xff) {
-			dev_info(&hdev->dev,
-				"fixing up zydacron remote control report "
-				"descriptor\n");
+			hid_info(hdev,
+				"fixing up zydacron remote control report descriptor\n");
 			rdesc[0x96] = rdesc[0xca] = rdesc[0xe1] = 0x0c;
 			rdesc[0x97] = rdesc[0xcb] = rdesc[0xe2] = 0x00;
 		}
@@ -172,7 +171,7 @@ static int zc_probe(struct hid_device *hdev, const struct hid_device_id *id)
 
 	zc = kzalloc(sizeof(*zc), GFP_KERNEL);
 	if (zc == NULL) {
-		dev_err(&hdev->dev, "zydacron: can't alloc descriptor\n");
+		hid_err(hdev, "can't alloc descriptor\n");
 		return -ENOMEM;
 	}
 
@@ -180,13 +179,13 @@ static int zc_probe(struct hid_device *hdev, const struct hid_device_id *id)
 
 	ret = hid_parse(hdev);
 	if (ret) {
-		dev_err(&hdev->dev, "zydacron: parse failed\n");
+		hid_err(hdev, "parse failed\n");
 		goto err_free;
 	}
 
 	ret = hid_hw_start(hdev, HID_CONNECT_DEFAULT);
 	if (ret) {
-		dev_err(&hdev->dev, "zydacron: hw start failed\n");
+		hid_err(hdev, "hw start failed\n");
 		goto err_free;
 	}
 
diff --git a/drivers/hid/hidraw.c b/drivers/hid/hidraw.c
index 5aefe73cf795..eb16cd143e2a 100644
--- a/drivers/hid/hidraw.c
+++ b/drivers/hid/hidraw.c
@@ -19,6 +19,8 @@
  * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
  */
 
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
 #include <linux/fs.h>
 #include <linux/module.h>
 #include <linux/errno.h>
@@ -123,15 +125,15 @@ static ssize_t hidraw_write(struct file *file, const char __user *buffer, size_t
 	}
 
 	if (count > HID_MAX_BUFFER_SIZE) {
-		printk(KERN_WARNING "hidraw: pid %d passed too large report\n",
-				task_pid_nr(current));
+		hid_warn(dev, "pid %d passed too large report\n",
+			 task_pid_nr(current));
 		ret = -EINVAL;
 		goto out;
 	}
 
 	if (count < 2) {
-		printk(KERN_WARNING "hidraw: pid %d passed too short report\n",
-				task_pid_nr(current));
+		hid_warn(dev, "pid %d passed too short report\n",
+			 task_pid_nr(current));
 		ret = -EINVAL;
 		goto out;
 	}
@@ -450,7 +452,7 @@ int __init hidraw_init(void)
 	hidraw_major = MAJOR(dev_id);
 
 	if (result < 0) {
-		printk(KERN_WARNING "hidraw: can't get major number\n");
+		pr_warn("can't get major number\n");
 		result = 0;
 		goto out;
 	}
diff --git a/drivers/hid/usbhid/hid-core.c b/drivers/hid/usbhid/hid-core.c
index 5489eab3a6bd..276758f53ab5 100644
--- a/drivers/hid/usbhid/hid-core.c
+++ b/drivers/hid/usbhid/hid-core.c
@@ -136,10 +136,10 @@ static void hid_reset(struct work_struct *work)
 			hid_io_error(hid);
 		break;
 	default:
-		err_hid("can't reset device, %s-%s/input%d, status %d",
-				hid_to_usb_dev(hid)->bus->bus_name,
-				hid_to_usb_dev(hid)->devpath,
-				usbhid->ifnum, rc);
+		hid_err(hid, "can't reset device, %s-%s/input%d, status %d\n",
+			hid_to_usb_dev(hid)->bus->bus_name,
+			hid_to_usb_dev(hid)->devpath,
+			usbhid->ifnum, rc);
 		/* FALLTHROUGH */
 	case -EHOSTUNREACH:
 	case -ENODEV:
@@ -278,18 +278,18 @@ static void hid_irq_in(struct urb *urb)
 		hid_io_error(hid);
 		return;
 	default:		/* error */
-		dev_warn(&urb->dev->dev, "input irq status %d  "
-				"received\n", urb->status);
+		hid_warn(urb->dev, "input irq status %d received\n",
+			 urb->status);
 	}
 
 	status = usb_submit_urb(urb, GFP_ATOMIC);
 	if (status) {
 		clear_bit(HID_IN_RUNNING, &usbhid->iofl);
 		if (status != -EPERM) {
-			err_hid("can't resubmit intr, %s-%s/input%d, status %d",
-					hid_to_usb_dev(hid)->bus->bus_name,
-					hid_to_usb_dev(hid)->devpath,
-					usbhid->ifnum, status);
+			hid_err(hid, "can't resubmit intr, %s-%s/input%d, status %d\n",
+				hid_to_usb_dev(hid)->bus->bus_name,
+				hid_to_usb_dev(hid)->devpath,
+				usbhid->ifnum, status);
 			hid_io_error(hid);
 		}
 	}
@@ -313,7 +313,7 @@ static int hid_submit_out(struct hid_device *hid)
 		dbg_hid("submitting out urb\n");
 
 		if (usb_submit_urb(usbhid->urbout, GFP_ATOMIC)) {
-			err_hid("usb_submit_urb(out) failed");
+			hid_err(hid, "usb_submit_urb(out) failed\n");
 			return -1;
 		}
 		usbhid->last_out = jiffies;
@@ -375,7 +375,7 @@ static int hid_submit_ctrl(struct hid_device *hid)
 			usbhid->cr->wValue, usbhid->cr->wIndex, usbhid->cr->wLength);
 
 		if (usb_submit_urb(usbhid->urbctrl, GFP_ATOMIC)) {
-			err_hid("usb_submit_urb(ctrl) failed");
+			hid_err(hid, "usb_submit_urb(ctrl) failed\n");
 			return -1;
 		}
 		usbhid->last_ctrl = jiffies;
@@ -413,8 +413,8 @@ static void hid_irq_out(struct urb *urb)
 	case -ENOENT:
 		break;
 	default:		/* error */
-		dev_warn(&urb->dev->dev, "output irq status %d "
-				"received\n", urb->status);
+		hid_warn(urb->dev, "output irq status %d received\n",
+			 urb->status);
 	}
 
 	spin_lock_irqsave(&usbhid->lock, flags);
@@ -466,8 +466,7 @@ static void hid_ctrl(struct urb *urb)
 	case -EPIPE:		/* report not available */
 		break;
 	default:		/* error */
-		dev_warn(&urb->dev->dev, "ctrl urb status %d "
-				"received\n", status);
+		hid_warn(urb->dev, "ctrl urb status %d received\n", status);
 	}
 
 	if (unplug)
@@ -501,13 +500,13 @@ static void __usbhid_submit_report(struct hid_device *hid, struct hid_report *re
 
 	if (usbhid->urbout && dir == USB_DIR_OUT && report->type == HID_OUTPUT_REPORT) {
 		if ((head = (usbhid->outhead + 1) & (HID_OUTPUT_FIFO_SIZE - 1)) == usbhid->outtail) {
-			dev_warn(&hid->dev, "output queue full\n");
+			hid_warn(hid, "output queue full\n");
 			return;
 		}
 
 		usbhid->out[usbhid->outhead].raw_report = kmalloc(len, GFP_ATOMIC);
 		if (!usbhid->out[usbhid->outhead].raw_report) {
-			dev_warn(&hid->dev, "output queueing failed\n");
+			hid_warn(hid, "output queueing failed\n");
 			return;
 		}
 		hid_output_report(report, usbhid->out[usbhid->outhead].raw_report);
@@ -532,14 +531,14 @@ static void __usbhid_submit_report(struct hid_device *hid, struct hid_report *re
 	}
 
 	if ((head = (usbhid->ctrlhead + 1) & (HID_CONTROL_FIFO_SIZE - 1)) == usbhid->ctrltail) {
-		dev_warn(&hid->dev, "control queue full\n");
+		hid_warn(hid, "control queue full\n");
 		return;
 	}
 
 	if (dir == USB_DIR_OUT) {
 		usbhid->ctrl[usbhid->ctrlhead].raw_report = kmalloc(len, GFP_ATOMIC);
 		if (!usbhid->ctrl[usbhid->ctrlhead].raw_report) {
-			dev_warn(&hid->dev, "control queueing failed\n");
+			hid_warn(hid, "control queueing failed\n");
 			return;
 		}
 		hid_output_report(report, usbhid->ctrl[usbhid->ctrlhead].raw_report);
@@ -590,7 +589,7 @@ static int usb_hidinput_input_event(struct input_dev *dev, unsigned int type, un
 		return -1;
 
 	if ((offset = hidinput_find_field(hid, type, code, &field)) == -1) {
-		dev_warn(&dev->dev, "event field not found\n");
+		hid_warn(dev, "event field not found\n");
 		return -1;
 	}
 
@@ -722,7 +721,7 @@ void usbhid_init_reports(struct hid_device *hid)
 	}
 
 	if (err)
-		dev_warn(&hid->dev, "timeout initializing reports\n");
+		hid_warn(hid, "timeout initializing reports\n");
 }
 
 /*
@@ -1140,8 +1139,7 @@ static int usbhid_probe(struct usb_interface *intf, const struct usb_device_id *
 		if (usb_endpoint_is_int_in(&interface->endpoint[n].desc))
 			has_in++;
 	if (!has_in) {
-		dev_err(&intf->dev, "couldn't find an input interrupt "
-				"endpoint\n");
+		hid_err(intf, "couldn't find an input interrupt endpoint\n");
 		return -ENODEV;
 	}
 
@@ -1213,7 +1211,7 @@ static int usbhid_probe(struct usb_interface *intf, const struct usb_device_id *
 	ret = hid_add_device(hid);
 	if (ret) {
 		if (ret != -ENODEV)
-			dev_err(&intf->dev, "can't add hid device: %d\n", ret);
+			hid_err(intf, "can't add hid device: %d\n", ret);
 		goto err_free;
 	}
 
diff --git a/drivers/hid/usbhid/hid-pidff.c b/drivers/hid/usbhid/hid-pidff.c
index ef381d79cfa8..f91c136821f7 100644
--- a/drivers/hid/usbhid/hid-pidff.c
+++ b/drivers/hid/usbhid/hid-pidff.c
@@ -22,7 +22,7 @@
 
 /* #define DEBUG */
 
-#define debug(format, arg...) pr_debug("hid-pidff: " format "\n" , ## arg)
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 
 #include <linux/input.h>
 #include <linux/slab.h>
@@ -220,7 +220,7 @@ static int pidff_rescale_signed(int i, struct hid_field *field)
 static void pidff_set(struct pidff_usage *usage, u16 value)
 {
 	usage->value[0] = pidff_rescale(value, 0xffff, usage->field);
-	debug("calculated from %d to %d", value, usage->value[0]);
+	pr_debug("calculated from %d to %d\n", value, usage->value[0]);
 }
 
 static void pidff_set_signed(struct pidff_usage *usage, s16 value)
@@ -235,7 +235,7 @@ static void pidff_set_signed(struct pidff_usage *usage, s16 value)
 			usage->value[0] =
 			    pidff_rescale(value, 0x7fff, usage->field);
 	}
-	debug("calculated from %d to %d", value, usage->value[0]);
+	pr_debug("calculated from %d to %d\n", value, usage->value[0]);
 }
 
 /*
@@ -259,8 +259,9 @@ static void pidff_set_envelope_report(struct pidff_device *pidff,
 	pidff->set_envelope[PID_ATTACK_TIME].value[0] = envelope->attack_length;
 	pidff->set_envelope[PID_FADE_TIME].value[0] = envelope->fade_length;
 
-	debug("attack %u => %d", envelope->attack_level,
-	      pidff->set_envelope[PID_ATTACK_LEVEL].value[0]);
+	hid_dbg(pidff->hid, "attack %u => %d\n",
+		envelope->attack_level,
+		pidff->set_envelope[PID_ATTACK_LEVEL].value[0]);
 
 	usbhid_submit_report(pidff->hid, pidff->reports[PID_SET_ENVELOPE],
 			  USB_DIR_OUT);
@@ -466,33 +467,33 @@ static int pidff_request_effect_upload(struct pidff_device *pidff, int efnum)
 	pidff->create_new_effect_type->value[0] = efnum;
 	usbhid_submit_report(pidff->hid, pidff->reports[PID_CREATE_NEW_EFFECT],
 			  USB_DIR_OUT);
-	debug("create_new_effect sent, type: %d", efnum);
+	hid_dbg(pidff->hid, "create_new_effect sent, type: %d\n", efnum);
 
 	pidff->block_load[PID_EFFECT_BLOCK_INDEX].value[0] = 0;
 	pidff->block_load_status->value[0] = 0;
 	usbhid_wait_io(pidff->hid);
 
 	for (j = 0; j < 60; j++) {
-		debug("pid_block_load requested");
+		hid_dbg(pidff->hid, "pid_block_load requested\n");
 		usbhid_submit_report(pidff->hid, pidff->reports[PID_BLOCK_LOAD],
 				  USB_DIR_IN);
 		usbhid_wait_io(pidff->hid);
 		if (pidff->block_load_status->value[0] ==
 		    pidff->status_id[PID_BLOCK_LOAD_SUCCESS]) {
-			debug("device reported free memory: %d bytes",
-			      pidff->block_load[PID_RAM_POOL_AVAILABLE].value ?
-				pidff->block_load[PID_RAM_POOL_AVAILABLE].value[0] : -1);
+			hid_dbg(pidff->hid, "device reported free memory: %d bytes\n",
+				 pidff->block_load[PID_RAM_POOL_AVAILABLE].value ?
+				 pidff->block_load[PID_RAM_POOL_AVAILABLE].value[0] : -1);
 			return 0;
 		}
 		if (pidff->block_load_status->value[0] ==
 		    pidff->status_id[PID_BLOCK_LOAD_FULL]) {
-			debug("not enough memory free: %d bytes",
-			      pidff->block_load[PID_RAM_POOL_AVAILABLE].value ?
+			hid_dbg(pidff->hid, "not enough memory free: %d bytes\n",
+				pidff->block_load[PID_RAM_POOL_AVAILABLE].value ?
 				pidff->block_load[PID_RAM_POOL_AVAILABLE].value[0] : -1);
 			return -ENOSPC;
 		}
 	}
-	printk(KERN_ERR "hid-pidff: pid_block_load failed 60 times\n");
+	hid_err(pidff->hid, "pid_block_load failed 60 times\n");
 	return -EIO;
 }
 
@@ -546,7 +547,8 @@ static int pidff_erase_effect(struct input_dev *dev, int effect_id)
 	struct pidff_device *pidff = dev->ff->private;
 	int pid_id = pidff->pid_id[effect_id];
 
-	debug("starting to erase %d/%d", effect_id, pidff->pid_id[effect_id]);
+	hid_dbg(pidff->hid, "starting to erase %d/%d\n",
+		effect_id, pidff->pid_id[effect_id]);
 	/* Wait for the queue to clear. We do not want a full fifo to
 	   prevent the effect removal. */
 	usbhid_wait_io(pidff->hid);
@@ -604,8 +606,7 @@ static int pidff_upload_effect(struct input_dev *dev, struct ff_effect *effect,
 				type_id = PID_SAW_DOWN;
 				break;
 			default:
-				printk(KERN_ERR
-				       "hid-pidff: invalid waveform\n");
+				hid_err(pidff->hid, "invalid waveform\n");
 				return -EINVAL;
 			}
 
@@ -696,7 +697,7 @@ static int pidff_upload_effect(struct input_dev *dev, struct ff_effect *effect,
 		break;
 
 	default:
-		printk(KERN_ERR "hid-pidff: invalid type\n");
+		hid_err(pidff->hid, "invalid type\n");
 		return -EINVAL;
 	}
 
@@ -704,7 +705,7 @@ static int pidff_upload_effect(struct input_dev *dev, struct ff_effect *effect,
 		pidff->pid_id[effect->id] =
 		    pidff->block_load[PID_EFFECT_BLOCK_INDEX].value[0];
 
-	debug("uploaded");
+	hid_dbg(pidff->hid, "uploaded\n");
 
 	return 0;
 }
@@ -770,14 +771,14 @@ static int pidff_find_fields(struct pidff_usage *usage, const u8 *table,
 		for (i = 0; i < report->maxfield; i++) {
 			if (report->field[i]->maxusage !=
 			    report->field[i]->report_count) {
-				debug("maxusage and report_count do not match, "
-				      "skipping");
+				pr_debug("maxusage and report_count do not match, skipping\n");
 				continue;
 			}
 			for (j = 0; j < report->field[i]->maxusage; j++) {
 				if (report->field[i]->usage[j].hid ==
 				    (HID_UP_PID | table[k])) {
-					debug("found %d at %d->%d", k, i, j);
+					pr_debug("found %d at %d->%d\n",
+						 k, i, j);
 					usage[k].field = report->field[i];
 					usage[k].value =
 						&report->field[i]->value[j];
@@ -789,7 +790,7 @@ static int pidff_find_fields(struct pidff_usage *usage, const u8 *table,
 				break;
 		}
 		if (!found && strict) {
-			debug("failed to locate %d", k);
+			pr_debug("failed to locate %d\n", k);
 			return -1;
 		}
 	}
@@ -826,8 +827,8 @@ static void pidff_find_reports(struct hid_device *hid, int report_type,
 			continue;
 		ret = pidff_check_usage(report->field[0]->logical);
 		if (ret != -1) {
-			debug("found usage 0x%02x from field->logical",
-			      pidff_reports[ret]);
+			hid_dbg(hid, "found usage 0x%02x from field->logical\n",
+				pidff_reports[ret]);
 			pidff->reports[ret] = report;
 			continue;
 		}
@@ -845,8 +846,9 @@ static void pidff_find_reports(struct hid_device *hid, int report_type,
 			continue;
 		ret = pidff_check_usage(hid->collection[i - 1].usage);
 		if (ret != -1 && !pidff->reports[ret]) {
-			debug("found usage 0x%02x from collection array",
-			      pidff_reports[ret]);
+			hid_dbg(hid,
+				"found usage 0x%02x from collection array\n",
+				pidff_reports[ret]);
 			pidff->reports[ret] = report;
 		}
 	}
@@ -861,7 +863,7 @@ static int pidff_reports_ok(struct pidff_device *pidff)
 
 	for (i = 0; i <= PID_REQUIRED_REPORTS; i++) {
 		if (!pidff->reports[i]) {
-			debug("%d missing", i);
+			hid_dbg(pidff->hid, "%d missing\n", i);
 			return 0;
 		}
 	}
@@ -884,8 +886,7 @@ static struct hid_field *pidff_find_special_field(struct hid_report *report,
 			    report->field[i]->logical_minimum == 1)
 				return report->field[i];
 			else {
-				printk(KERN_ERR "hid-pidff: logical_minimum "
-					"is not 1 as it should be\n");
+				pr_err("logical_minimum is not 1 as it should be\n");
 				return NULL;
 			}
 		}
@@ -924,7 +925,7 @@ static int pidff_find_special_keys(int *keys, struct hid_field *fld,
  */
 static int pidff_find_special_fields(struct pidff_device *pidff)
 {
-	debug("finding special fields");
+	hid_dbg(pidff->hid, "finding special fields\n");
 
 	pidff->create_new_effect_type =
 		pidff_find_special_field(pidff->reports[PID_CREATE_NEW_EFFECT],
@@ -945,32 +946,30 @@ static int pidff_find_special_fields(struct pidff_device *pidff)
 		pidff_find_special_field(pidff->reports[PID_EFFECT_OPERATION],
 					 0x78, 1);
 
-	debug("search done");
+	hid_dbg(pidff->hid, "search done\n");
 
 	if (!pidff->create_new_effect_type || !pidff->set_effect_type) {
-		printk(KERN_ERR "hid-pidff: effect lists not found\n");
+		hid_err(pidff->hid, "effect lists not found\n");
 		return -1;
 	}
 
 	if (!pidff->effect_direction) {
-		printk(KERN_ERR "hid-pidff: direction field not found\n");
+		hid_err(pidff->hid, "direction field not found\n");
 		return -1;
 	}
 
 	if (!pidff->device_control) {
-		printk(KERN_ERR "hid-pidff: device control field not found\n");
+		hid_err(pidff->hid, "device control field not found\n");
 		return -1;
 	}
 
 	if (!pidff->block_load_status) {
-		printk(KERN_ERR
-		       "hid-pidff: block load status field not found\n");
+		hid_err(pidff->hid, "block load status field not found\n");
 		return -1;
 	}
 
 	if (!pidff->effect_operation_status) {
-		printk(KERN_ERR
-		       "hid-pidff: effect operation field not found\n");
+		hid_err(pidff->hid, "effect operation field not found\n");
 		return -1;
 	}
 
@@ -982,23 +981,22 @@ static int pidff_find_special_fields(struct pidff_device *pidff)
 
 	if (!PIDFF_FIND_SPECIAL_KEYS(type_id, create_new_effect_type,
 				     effect_types)) {
-		printk(KERN_ERR "hid-pidff: no effect types found\n");
+		hid_err(pidff->hid, "no effect types found\n");
 		return -1;
 	}
 
 	if (PIDFF_FIND_SPECIAL_KEYS(status_id, block_load_status,
 				    block_load_status) !=
 			sizeof(pidff_block_load_status)) {
-		printk(KERN_ERR
-		       "hidpidff: block load status identifiers not found\n");
+		hid_err(pidff->hid,
+			"block load status identifiers not found\n");
 		return -1;
 	}
 
 	if (PIDFF_FIND_SPECIAL_KEYS(operation_id, effect_operation_status,
 				    effect_operation_status) !=
 			sizeof(pidff_effect_operation_status)) {
-		printk(KERN_ERR
-		       "hidpidff: effect operation identifiers not found\n");
+		hid_err(pidff->hid, "effect operation identifiers not found\n");
 		return -1;
 	}
 
@@ -1017,8 +1015,8 @@ static int pidff_find_effects(struct pidff_device *pidff,
 		int pidff_type = pidff->type_id[i];
 		if (pidff->set_effect_type->usage[pidff_type].hid !=
 		    pidff->create_new_effect_type->usage[pidff_type].hid) {
-			printk(KERN_ERR "hid-pidff: "
-			       "effect type number %d is invalid\n", i);
+			hid_err(pidff->hid,
+				"effect type number %d is invalid\n", i);
 			return -1;
 		}
 	}
@@ -1073,27 +1071,23 @@ static int pidff_init_fields(struct pidff_device *pidff, struct input_dev *dev)
 	int envelope_ok = 0;
 
 	if (PIDFF_FIND_FIELDS(set_effect, PID_SET_EFFECT, 1)) {
-		printk(KERN_ERR
-		       "hid-pidff: unknown set_effect report layout\n");
+		hid_err(pidff->hid, "unknown set_effect report layout\n");
 		return -ENODEV;
 	}
 
 	PIDFF_FIND_FIELDS(block_load, PID_BLOCK_LOAD, 0);
 	if (!pidff->block_load[PID_EFFECT_BLOCK_INDEX].value) {
-		printk(KERN_ERR
-		       "hid-pidff: unknown pid_block_load report layout\n");
+		hid_err(pidff->hid, "unknown pid_block_load report layout\n");
 		return -ENODEV;
 	}
 
 	if (PIDFF_FIND_FIELDS(effect_operation, PID_EFFECT_OPERATION, 1)) {
-		printk(KERN_ERR
-		       "hid-pidff: unknown effect_operation report layout\n");
+		hid_err(pidff->hid, "unknown effect_operation report layout\n");
 		return -ENODEV;
 	}
 
 	if (PIDFF_FIND_FIELDS(block_free, PID_BLOCK_FREE, 1)) {
-		printk(KERN_ERR
-		       "hid-pidff: unknown pid_block_free report layout\n");
+		hid_err(pidff->hid, "unknown pid_block_free report layout\n");
 		return -ENODEV;
 	}
 
@@ -1105,27 +1099,26 @@ static int pidff_init_fields(struct pidff_device *pidff, struct input_dev *dev)
 
 	if (!envelope_ok) {
 		if (test_and_clear_bit(FF_CONSTANT, dev->ffbit))
-			printk(KERN_WARNING "hid-pidff: "
-			       "has constant effect but no envelope\n");
+			hid_warn(pidff->hid,
+				 "has constant effect but no envelope\n");
 		if (test_and_clear_bit(FF_RAMP, dev->ffbit))
-			printk(KERN_WARNING "hid-pidff: "
-				"has ramp effect but no envelope\n");
+			hid_warn(pidff->hid,
+				 "has ramp effect but no envelope\n");
 
 		if (test_and_clear_bit(FF_PERIODIC, dev->ffbit))
-			printk(KERN_WARNING "hid-pidff: "
-				"has periodic effect but no envelope\n");
+			hid_warn(pidff->hid,
+				 "has periodic effect but no envelope\n");
 	}
 
 	if (test_bit(FF_CONSTANT, dev->ffbit) &&
 	    PIDFF_FIND_FIELDS(set_constant, PID_SET_CONSTANT, 1)) {
-		printk(KERN_WARNING
-		       "hid-pidff: unknown constant effect layout\n");
+		hid_warn(pidff->hid, "unknown constant effect layout\n");
 		clear_bit(FF_CONSTANT, dev->ffbit);
 	}
 
 	if (test_bit(FF_RAMP, dev->ffbit) &&
 	    PIDFF_FIND_FIELDS(set_ramp, PID_SET_RAMP, 1)) {
-		printk(KERN_WARNING "hid-pidff: unknown ramp effect layout\n");
+		hid_warn(pidff->hid, "unknown ramp effect layout\n");
 		clear_bit(FF_RAMP, dev->ffbit);
 	}
 
@@ -1134,8 +1127,7 @@ static int pidff_init_fields(struct pidff_device *pidff, struct input_dev *dev)
 	     test_bit(FF_FRICTION, dev->ffbit) ||
 	     test_bit(FF_INERTIA, dev->ffbit)) &&
 	    PIDFF_FIND_FIELDS(set_condition, PID_SET_CONDITION, 1)) {
-		printk(KERN_WARNING
-		       "hid-pidff: unknown condition effect layout\n");
+		hid_warn(pidff->hid, "unknown condition effect layout\n");
 		clear_bit(FF_SPRING, dev->ffbit);
 		clear_bit(FF_DAMPER, dev->ffbit);
 		clear_bit(FF_FRICTION, dev->ffbit);
@@ -1144,8 +1136,7 @@ static int pidff_init_fields(struct pidff_device *pidff, struct input_dev *dev)
 
 	if (test_bit(FF_PERIODIC, dev->ffbit) &&
 	    PIDFF_FIND_FIELDS(set_periodic, PID_SET_PERIODIC, 1)) {
-		printk(KERN_WARNING
-		       "hid-pidff: unknown periodic effect layout\n");
+		hid_warn(pidff->hid, "unknown periodic effect layout\n");
 		clear_bit(FF_PERIODIC, dev->ffbit);
 	}
 
@@ -1184,12 +1175,12 @@ static void pidff_reset(struct pidff_device *pidff)
 	if (pidff->pool[PID_SIMULTANEOUS_MAX].value) {
 		while (pidff->pool[PID_SIMULTANEOUS_MAX].value[0] < 2) {
 			if (i++ > 20) {
-				printk(KERN_WARNING "hid-pidff: device reports "
-				       "%d simultaneous effects\n",
-				       pidff->pool[PID_SIMULTANEOUS_MAX].value[0]);
+				hid_warn(pidff->hid,
+					 "device reports %d simultaneous effects\n",
+					 pidff->pool[PID_SIMULTANEOUS_MAX].value[0]);
 				break;
 			}
-			debug("pid_pool requested again");
+			hid_dbg(pidff->hid, "pid_pool requested again\n");
 			usbhid_submit_report(hid, pidff->reports[PID_POOL],
 					  USB_DIR_IN);
 			usbhid_wait_io(hid);
@@ -1215,7 +1206,7 @@ static int pidff_check_autocenter(struct pidff_device *pidff,
 
 	error = pidff_request_effect_upload(pidff, 1);
 	if (error) {
-		printk(KERN_ERR "hid-pidff: upload request failed\n");
+		hid_err(pidff->hid, "upload request failed\n");
 		return error;
 	}
 
@@ -1224,8 +1215,8 @@ static int pidff_check_autocenter(struct pidff_device *pidff,
 		pidff_autocenter(pidff, 0xffff);
 		set_bit(FF_AUTOCENTER, dev->ffbit);
 	} else {
-		printk(KERN_NOTICE "hid-pidff: "
-		       "device has unknown autocenter control method\n");
+		hid_notice(pidff->hid,
+			   "device has unknown autocenter control method\n");
 	}
 
 	pidff_erase_pid(pidff,
@@ -1248,10 +1239,10 @@ int hid_pidff_init(struct hid_device *hid)
 	int max_effects;
 	int error;
 
-	debug("starting pid init");
+	hid_dbg(hid, "starting pid init\n");
 
 	if (list_empty(&hid->report_enum[HID_OUTPUT_REPORT].report_list)) {
-		debug("not a PID device, no output report");
+		hid_dbg(hid, "not a PID device, no output report\n");
 		return -ENODEV;
 	}
 
@@ -1265,7 +1256,7 @@ int hid_pidff_init(struct hid_device *hid)
 	pidff_find_reports(hid, HID_FEATURE_REPORT, pidff);
 
 	if (!pidff_reports_ok(pidff)) {
-		debug("reports not ok, aborting");
+		hid_dbg(hid, "reports not ok, aborting\n");
 		error = -ENODEV;
 		goto fail;
 	}
@@ -1278,8 +1269,8 @@ int hid_pidff_init(struct hid_device *hid)
 
 	if (test_bit(FF_GAIN, dev->ffbit)) {
 		pidff_set(&pidff->device_gain[PID_DEVICE_GAIN_FIELD], 0xffff);
-		usbhid_submit_report(pidff->hid, pidff->reports[PID_DEVICE_GAIN],
-				  USB_DIR_OUT);
+		usbhid_submit_report(hid, pidff->reports[PID_DEVICE_GAIN],
+				     USB_DIR_OUT);
 	}
 
 	error = pidff_check_autocenter(pidff, dev);
@@ -1290,23 +1281,23 @@ int hid_pidff_init(struct hid_device *hid)
 	    pidff->block_load[PID_EFFECT_BLOCK_INDEX].field->logical_maximum -
 	    pidff->block_load[PID_EFFECT_BLOCK_INDEX].field->logical_minimum +
 	    1;
-	debug("max effects is %d", max_effects);
+	hid_dbg(hid, "max effects is %d\n", max_effects);
 
 	if (max_effects > PID_EFFECTS_MAX)
 		max_effects = PID_EFFECTS_MAX;
 
 	if (pidff->pool[PID_SIMULTANEOUS_MAX].value)
-		debug("max simultaneous effects is %d",
-		      pidff->pool[PID_SIMULTANEOUS_MAX].value[0]);
+		hid_dbg(hid, "max simultaneous effects is %d\n",
+			pidff->pool[PID_SIMULTANEOUS_MAX].value[0]);
 
 	if (pidff->pool[PID_RAM_POOL_SIZE].value)
-		debug("device memory size is %d bytes",
-		      pidff->pool[PID_RAM_POOL_SIZE].value[0]);
+		hid_dbg(hid, "device memory size is %d bytes\n",
+			pidff->pool[PID_RAM_POOL_SIZE].value[0]);
 
 	if (pidff->pool[PID_DEVICE_MANAGED_POOL].value &&
 	    pidff->pool[PID_DEVICE_MANAGED_POOL].value[0] == 0) {
-		printk(KERN_NOTICE "hid-pidff: "
-		       "device does not support device managed pool\n");
+		hid_notice(hid,
+			   "device does not support device managed pool\n");
 		goto fail;
 	}
 
@@ -1322,8 +1313,7 @@ int hid_pidff_init(struct hid_device *hid)
 	ff->set_autocenter = pidff_set_autocenter;
 	ff->playback = pidff_playback;
 
-	printk(KERN_INFO "Force feedback for USB HID PID devices by "
-	       "Anssi Hannula <anssi.hannula@gmail.com>\n");
+	hid_info(dev, "Force feedback for USB HID PID devices by Anssi Hannula <anssi.hannula@gmail.com>\n");
 
 	return 0;
 
diff --git a/drivers/hid/usbhid/hiddev.c b/drivers/hid/usbhid/hiddev.c
index a9935f6709f7..fb78f75d49a9 100644
--- a/drivers/hid/usbhid/hiddev.c
+++ b/drivers/hid/usbhid/hiddev.c
@@ -906,7 +906,7 @@ int hiddev_connect(struct hid_device *hid, unsigned int force)
 	hiddev->exist = 1;
 	retval = usb_register_dev(usbhid->intf, &hiddev_class);
 	if (retval) {
-		err_hid("Not able to get a minor for this device.");
+		hid_err(hid, "Not able to get a minor for this device\n");
 		hid->hiddev = NULL;
 		kfree(hiddev);
 		return -1;
diff --git a/drivers/hid/usbhid/usbkbd.c b/drivers/hid/usbhid/usbkbd.c
index a948605564fb..065817329f03 100644
--- a/drivers/hid/usbhid/usbkbd.c
+++ b/drivers/hid/usbhid/usbkbd.c
@@ -24,6 +24,8 @@
  * Vojtech Pavlik, Simunkova 1594, Prague 8, 182 00 Czech Republic
  */
 
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
 #include <linux/kernel.h>
 #include <linux/slab.h>
 #include <linux/module.h>
@@ -104,16 +106,18 @@ static void usb_kbd_irq(struct urb *urb)
 			if (usb_kbd_keycode[kbd->old[i]])
 				input_report_key(kbd->dev, usb_kbd_keycode[kbd->old[i]], 0);
 			else
-				dev_info(&urb->dev->dev,
-						"Unknown key (scancode %#x) released.\n", kbd->old[i]);
+				hid_info(urb->dev,
+					 "Unknown key (scancode %#x) released.\n",
+					 kbd->old[i]);
 		}
 
 		if (kbd->new[i] > 3 && memscan(kbd->old + 2, kbd->new[i], 6) == kbd->old + 8) {
 			if (usb_kbd_keycode[kbd->new[i]])
 				input_report_key(kbd->dev, usb_kbd_keycode[kbd->new[i]], 1);
 			else
-				dev_info(&urb->dev->dev,
-						"Unknown key (scancode %#x) released.\n", kbd->new[i]);
+				hid_info(urb->dev,
+					 "Unknown key (scancode %#x) released.\n",
+					 kbd->new[i]);
 		}
 	}
 
@@ -124,9 +128,9 @@ static void usb_kbd_irq(struct urb *urb)
 resubmit:
 	i = usb_submit_urb (urb, GFP_ATOMIC);
 	if (i)
-		err_hid ("can't resubmit intr, %s-%s/input0, status %d",
-				kbd->usbdev->bus->bus_name,
-				kbd->usbdev->devpath, i);
+		hid_err(urb->dev, "can't resubmit intr, %s-%s/input0, status %d",
+			kbd->usbdev->bus->bus_name,
+			kbd->usbdev->devpath, i);
 }
 
 static int usb_kbd_event(struct input_dev *dev, unsigned int type,
@@ -150,7 +154,7 @@ static int usb_kbd_event(struct input_dev *dev, unsigned int type,
 	*(kbd->leds) = kbd->newleds;
 	kbd->led->dev = kbd->usbdev;
 	if (usb_submit_urb(kbd->led, GFP_ATOMIC))
-		err_hid("usb_submit_urb(leds) failed");
+		pr_err("usb_submit_urb(leds) failed\n");
 
 	return 0;
 }
@@ -160,7 +164,7 @@ static void usb_kbd_led(struct urb *urb)
 	struct usb_kbd *kbd = urb->context;
 
 	if (urb->status)
-		dev_warn(&urb->dev->dev, "led urb status %d received\n",
+		hid_warn(urb->dev, "led urb status %d received\n",
 			 urb->status);
 
 	if (*(kbd->leds) == kbd->newleds)
@@ -169,7 +173,7 @@ static void usb_kbd_led(struct urb *urb)
 	*(kbd->leds) = kbd->newleds;
 	kbd->led->dev = kbd->usbdev;
 	if (usb_submit_urb(kbd->led, GFP_ATOMIC))
-		err_hid("usb_submit_urb(leds) failed");
+		hid_err(urb->dev, "usb_submit_urb(leds) failed\n");
 }
 
 static int usb_kbd_open(struct input_dev *dev)
diff --git a/include/linux/hid.h b/include/linux/hid.h
index e2af195d8b46..20b9801f669b 100644
--- a/include/linux/hid.h
+++ b/include/linux/hid.h
@@ -881,12 +881,32 @@ int hid_pidff_init(struct hid_device *hid);
 #define hid_pidff_init NULL
 #endif
 
-#define dbg_hid(format, arg...) if (hid_debug) \
-				printk(KERN_DEBUG "%s: " format ,\
-				__FILE__ , ## arg)
-#define err_hid(format, arg...) printk(KERN_ERR "%s: " format "\n" , \
-		__FILE__ , ## arg)
-#endif /* HID_FF */
+#define dbg_hid(format, arg...)						\
+do {									\
+	if (hid_debug)							\
+		printk(KERN_DEBUG "%s: " format, __FILE__, ##arg);	\
+} while (0)
+
+#define hid_printk(level, hid, fmt, arg...)		\
+	dev_printk(level, &(hid)->dev, fmt, ##arg)
+#define hid_emerg(hid, fmt, arg...)			\
+	dev_emerg(&(hid)->dev, fmt, ##arg)
+#define hid_crit(hid, fmt, arg...)			\
+	dev_crit(&(hid)->dev, fmt, ##arg)
+#define hid_alert(hid, fmt, arg...)			\
+	dev_alert(&(hid)->dev, fmt, ##arg)
+#define hid_err(hid, fmt, arg...)			\
+	dev_err(&(hid)->dev, fmt, ##arg)
+#define hid_notice(hid, fmt, arg...)			\
+	dev_notice(&(hid)->dev, fmt, ##arg)
+#define hid_warn(hid, fmt, arg...)			\
+	dev_warn(&(hid)->dev, fmt, ##arg)
+#define hid_info(hid, fmt, arg...)			\
+	dev_info(&(hid)->dev, fmt, ##arg)
+#define hid_dbg(hid, fmt, arg...)			\
+	dev_dbg(&(hid)->dev, fmt, ##arg)
+
+#endif /* __KERNEL__ */
 
 #endif
 
-- 
cgit v1.2.3-71-gd317


From 1f5a24794a54588ea3a9efd521be31d826e0b9d7 Mon Sep 17 00:00:00 2001
From: John Stultz <john.stultz@linaro.org>
Date: Thu, 9 Dec 2010 12:02:18 -0800
Subject: timers: Rename timerlist infrastructure to timerqueue

Thomas pointed out a namespace collision between the new timerlist
infrastructure I introduced and the existing timer_list.c

So to avoid confusion, I've renamed the timerlist infrastructure
to timerqueue.

Reported-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: John Stultz <john.stultz@linaro.org>
---
 include/linux/timerlist.h  |  37 --------------
 include/linux/timerqueue.h |  37 ++++++++++++++
 lib/Makefile               |   2 +-
 lib/timerlist.c            | 118 ---------------------------------------------
 lib/timerqueue.c           | 118 +++++++++++++++++++++++++++++++++++++++++++++
 5 files changed, 156 insertions(+), 156 deletions(-)
 delete mode 100644 include/linux/timerlist.h
 create mode 100644 include/linux/timerqueue.h
 delete mode 100644 lib/timerlist.c
 create mode 100644 lib/timerqueue.c

(limited to 'include/linux')

diff --git a/include/linux/timerlist.h b/include/linux/timerlist.h
deleted file mode 100644
index c46b28ae6e4d..000000000000
--- a/include/linux/timerlist.h
+++ /dev/null
@@ -1,37 +0,0 @@
-#ifndef _LINUX_TIMERLIST_H
-#define _LINUX_TIMERLIST_H
-
-#include <linux/rbtree.h>
-#include <linux/ktime.h>
-
-
-struct timerlist_node {
-	struct rb_node node;
-	ktime_t expires;
-};
-
-struct timerlist_head {
-	struct rb_root head;
-	struct timerlist_node *next;
-};
-
-
-extern void timerlist_add(struct timerlist_head *head,
-				struct timerlist_node *node);
-extern void timerlist_del(struct timerlist_head *head,
-				struct timerlist_node *node);
-extern struct timerlist_node *timerlist_getnext(struct timerlist_head *head);
-extern struct timerlist_node *timerlist_iterate_next(
-						struct timerlist_node *node);
-
-static inline void timerlist_init(struct timerlist_node *node)
-{
-	RB_CLEAR_NODE(&node->node);
-}
-
-static inline void timerlist_init_head(struct timerlist_head *head)
-{
-	head->head = RB_ROOT;
-	head->next = NULL;
-}
-#endif /* _LINUX_TIMERLIST_H */
diff --git a/include/linux/timerqueue.h b/include/linux/timerqueue.h
new file mode 100644
index 000000000000..406b103894bd
--- /dev/null
+++ b/include/linux/timerqueue.h
@@ -0,0 +1,37 @@
+#ifndef _LINUX_TIMERQUEUE_H
+#define _LINUX_TIMERQUEUE_H
+
+#include <linux/rbtree.h>
+#include <linux/ktime.h>
+
+
+struct timerqueue_node {
+	struct rb_node node;
+	ktime_t expires;
+};
+
+struct timerqueue_head {
+	struct rb_root head;
+	struct timerqueue_node *next;
+};
+
+
+extern void timerqueue_add(struct timerqueue_head *head,
+				struct timerqueue_node *node);
+extern void timerqueue_del(struct timerqueue_head *head,
+				struct timerqueue_node *node);
+extern struct timerqueue_node *timerqueue_getnext(struct timerqueue_head *head);
+extern struct timerqueue_node *timerqueue_iterate_next(
+						struct timerqueue_node *node);
+
+static inline void timerqueue_init(struct timerqueue_node *node)
+{
+	RB_CLEAR_NODE(&node->node);
+}
+
+static inline void timerqueue_init_head(struct timerqueue_head *head)
+{
+	head->head = RB_ROOT;
+	head->next = NULL;
+}
+#endif /* _LINUX_TIMERQUEUE_H */
diff --git a/lib/Makefile b/lib/Makefile
index 8b475cfb8195..9e2db72d128e 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -8,7 +8,7 @@ KBUILD_CFLAGS = $(subst -pg,,$(ORIG_CFLAGS))
 endif
 
 lib-y := ctype.o string.o vsprintf.o cmdline.o \
-	 rbtree.o radix-tree.o dump_stack.o timerlist.o\
+	 rbtree.o radix-tree.o dump_stack.o timerqueue.o\
 	 idr.o int_sqrt.o extable.o prio_tree.o \
 	 sha1.o irq_regs.o reciprocal_div.o argv_split.o \
 	 proportions.o prio_heap.o ratelimit.o show_mem.o \
diff --git a/lib/timerlist.c b/lib/timerlist.c
deleted file mode 100644
index 9101b42fd13d..000000000000
--- a/lib/timerlist.c
+++ /dev/null
@@ -1,118 +0,0 @@
-/*
- *  Generic Timer-list
- *
- *  Manages a simple list of timers, ordered by expiration time.
- *  Uses rbtrees for quick list adds and expiration.
- *
- *  NOTE: All of the following functions need to be serialized
- *  to avoid races. No locking is done by this libary code.
- *
- *  This program is free software; you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License as published by
- *  the Free Software Foundation; either version 2 of the License, or
- *  (at your option) any later version.
- *
- *  This program is distributed in the hope that it will be useful,
- *  but WITHOUT ANY WARRANTY; without even the implied warranty of
- *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *  GNU General Public License for more details.
- *
- *  You should have received a copy of the GNU General Public License
- *  along with this program; if not, write to the Free Software
- *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
- */
-
-#include <linux/timerlist.h>
-#include <linux/rbtree.h>
-
-/**
- * timerlist_add - Adds timer to timerlist.
- *
- * @head: head of timerlist
- * @node: timer node to be added
- *
- * Adds the timer node to the timerlist, sorted by the
- * node's expires value.
- */
-void timerlist_add(struct timerlist_head *head, struct timerlist_node *node)
-{
-	struct rb_node **p = &head->head.rb_node;
-	struct rb_node *parent = NULL;
-	struct timerlist_node  *ptr;
-
-	/* Make sure we don't add nodes that are already added */
-	WARN_ON_ONCE(!RB_EMPTY_NODE(&node->node));
-
-	while (*p) {
-		parent = *p;
-		ptr = rb_entry(parent, struct timerlist_node, node);
-		if (node->expires.tv64 < ptr->expires.tv64)
-			p = &(*p)->rb_left;
-		else
-			p = &(*p)->rb_right;
-	}
-	rb_link_node(&node->node, parent, p);
-	rb_insert_color(&node->node, &head->head);
-
-	if (!head->next || node->expires.tv64 < head->next->expires.tv64)
-		head->next = node;
-}
-
-/**
- * timerlist_del - Removes a timer from the timerlist.
- *
- * @head: head of timerlist
- * @node: timer node to be removed
- *
- * Removes the timer node from the timerlist.
- */
-void timerlist_del(struct timerlist_head *head, struct timerlist_node *node)
-{
-	WARN_ON_ONCE(RB_EMPTY_NODE(&node->node));
-
-	/* update next pointer */
-	if (head->next == node) {
-		struct rb_node *rbn = rb_next(&node->node);
-
-		head->next = rbn ?
-			rb_entry(rbn, struct timerlist_node, node) : NULL;
-	}
-	rb_erase(&node->node, &head->head);
-	RB_CLEAR_NODE(&node->node);
-}
-
-
-/**
- * timerlist_getnext - Returns the timer with the earlies expiration time
- *
- * @head: head of timerlist
- *
- * Returns a pointer to the timer node that has the
- * earliest expiration time.
- */
-struct timerlist_node *timerlist_getnext(struct timerlist_head *head)
-{
-	return head->next;
-}
-
-
-/**
- * timerlist_iterate_next - Returns the timer after the provided timer
- *
- * @node: Pointer to a timer.
- *
- * Provides the timer that is after the given node. This is used, when
- * necessary, to iterate through the list of timers in a timer list
- * without modifying the list.
- */
-struct timerlist_node *timerlist_iterate_next(struct timerlist_node *node)
-{
-	struct rb_node *next;
-
-	if (!node)
-		return NULL;
-	next = rb_next(&node->node);
-	if (!next)
-		return NULL;
-	return container_of(next, struct timerlist_node, node);
-}
diff --git a/lib/timerqueue.c b/lib/timerqueue.c
new file mode 100644
index 000000000000..f46de84d9467
--- /dev/null
+++ b/lib/timerqueue.c
@@ -0,0 +1,118 @@
+/*
+ *  Generic Timer-queue
+ *
+ *  Manages a simple queue of timers, ordered by expiration time.
+ *  Uses rbtrees for quick list adds and expiration.
+ *
+ *  NOTE: All of the following functions need to be serialized
+ *  to avoid races. No locking is done by this libary code.
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+#include <linux/timerqueue.h>
+#include <linux/rbtree.h>
+
+/**
+ * timerqueue_add - Adds timer to timerqueue.
+ *
+ * @head: head of timerqueue
+ * @node: timer node to be added
+ *
+ * Adds the timer node to the timerqueue, sorted by the
+ * node's expires value.
+ */
+void timerqueue_add(struct timerqueue_head *head, struct timerqueue_node *node)
+{
+	struct rb_node **p = &head->head.rb_node;
+	struct rb_node *parent = NULL;
+	struct timerqueue_node  *ptr;
+
+	/* Make sure we don't add nodes that are already added */
+	WARN_ON_ONCE(!RB_EMPTY_NODE(&node->node));
+
+	while (*p) {
+		parent = *p;
+		ptr = rb_entry(parent, struct timerqueue_node, node);
+		if (node->expires.tv64 < ptr->expires.tv64)
+			p = &(*p)->rb_left;
+		else
+			p = &(*p)->rb_right;
+	}
+	rb_link_node(&node->node, parent, p);
+	rb_insert_color(&node->node, &head->head);
+
+	if (!head->next || node->expires.tv64 < head->next->expires.tv64)
+		head->next = node;
+}
+
+/**
+ * timerqueue_del - Removes a timer from the timerqueue.
+ *
+ * @head: head of timerqueue
+ * @node: timer node to be removed
+ *
+ * Removes the timer node from the timerqueue.
+ */
+void timerqueue_del(struct timerqueue_head *head, struct timerqueue_node *node)
+{
+	WARN_ON_ONCE(RB_EMPTY_NODE(&node->node));
+
+	/* update next pointer */
+	if (head->next == node) {
+		struct rb_node *rbn = rb_next(&node->node);
+
+		head->next = rbn ?
+			rb_entry(rbn, struct timerqueue_node, node) : NULL;
+	}
+	rb_erase(&node->node, &head->head);
+	RB_CLEAR_NODE(&node->node);
+}
+
+
+/**
+ * timerqueue_getnext - Returns the timer with the earlies expiration time
+ *
+ * @head: head of timerqueue
+ *
+ * Returns a pointer to the timer node that has the
+ * earliest expiration time.
+ */
+struct timerqueue_node *timerqueue_getnext(struct timerqueue_head *head)
+{
+	return head->next;
+}
+
+
+/**
+ * timerqueue_iterate_next - Returns the timer after the provided timer
+ *
+ * @node: Pointer to a timer.
+ *
+ * Provides the timer that is after the given node. This is used, when
+ * necessary, to iterate through the list of timers in a timer list
+ * without modifying the list.
+ */
+struct timerqueue_node *timerqueue_iterate_next(struct timerqueue_node *node)
+{
+	struct rb_node *next;
+
+	if (!node)
+		return NULL;
+	next = rb_next(&node->node);
+	if (!next)
+		return NULL;
+	return container_of(next, struct timerqueue_node, node);
+}
-- 
cgit v1.2.3-71-gd317


From 998adc3dda59f811966b3ccb21eb223680b25ec4 Mon Sep 17 00:00:00 2001
From: John Stultz <john.stultz@linaro.org>
Date: Mon, 20 Sep 2010 19:19:17 -0700
Subject: hrtimers: Convert hrtimers to use timerlist infrastructure

Converts the hrtimer code to use the new timerlist infrastructure

Signed-off-by: John Stultz <john.stultz@linaro.org>
LKML Reference: <1290136329-18291-3-git-send-email-john.stultz@linaro.org>
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
CC: Alessandro Zummo <a.zummo@towertech.it>
CC: Thomas Gleixner <tglx@linutronix.de>
CC: Richard Cochran <richardcochran@gmail.com>
---
 include/linux/hrtimer.h  | 32 +++++++++---------
 kernel/hrtimer.c         | 86 +++++++++++++++++-------------------------------
 kernel/time/timer_list.c |  8 ++---
 3 files changed, 49 insertions(+), 77 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h
index dd9954b79342..330586ffffbb 100644
--- a/include/linux/hrtimer.h
+++ b/include/linux/hrtimer.h
@@ -22,7 +22,7 @@
 #include <linux/wait.h>
 #include <linux/percpu.h>
 #include <linux/timer.h>
-
+#include <linux/timerqueue.h>
 
 struct hrtimer_clock_base;
 struct hrtimer_cpu_base;
@@ -79,8 +79,8 @@ enum hrtimer_restart {
 
 /**
  * struct hrtimer - the basic hrtimer structure
- * @node:	red black tree node for time ordered insertion
- * @_expires:	the absolute expiry time in the hrtimers internal
+ * @node:	timerqueue node, which also manages node.expires,
+ *		the absolute expiry time in the hrtimers internal
  *		representation. The time is related to the clock on
  *		which the timer is based. Is setup by adding
  *		slack to the _softexpires value. For non range timers
@@ -101,8 +101,7 @@ enum hrtimer_restart {
  * The hrtimer structure must be initialized by hrtimer_init()
  */
 struct hrtimer {
-	struct rb_node			node;
-	ktime_t				_expires;
+	struct timerqueue_node		node;
 	ktime_t				_softexpires;
 	enum hrtimer_restart		(*function)(struct hrtimer *);
 	struct hrtimer_clock_base	*base;
@@ -141,8 +140,7 @@ struct hrtimer_sleeper {
 struct hrtimer_clock_base {
 	struct hrtimer_cpu_base	*cpu_base;
 	clockid_t		index;
-	struct rb_root		active;
-	struct rb_node		*first;
+	struct timerqueue_head	active;
 	ktime_t			resolution;
 	ktime_t			(*get_time)(void);
 	ktime_t			softirq_time;
@@ -183,43 +181,43 @@ struct hrtimer_cpu_base {
 
 static inline void hrtimer_set_expires(struct hrtimer *timer, ktime_t time)
 {
-	timer->_expires = time;
+	timer->node.expires = time;
 	timer->_softexpires = time;
 }
 
 static inline void hrtimer_set_expires_range(struct hrtimer *timer, ktime_t time, ktime_t delta)
 {
 	timer->_softexpires = time;
-	timer->_expires = ktime_add_safe(time, delta);
+	timer->node.expires = ktime_add_safe(time, delta);
 }
 
 static inline void hrtimer_set_expires_range_ns(struct hrtimer *timer, ktime_t time, unsigned long delta)
 {
 	timer->_softexpires = time;
-	timer->_expires = ktime_add_safe(time, ns_to_ktime(delta));
+	timer->node.expires = ktime_add_safe(time, ns_to_ktime(delta));
 }
 
 static inline void hrtimer_set_expires_tv64(struct hrtimer *timer, s64 tv64)
 {
-	timer->_expires.tv64 = tv64;
+	timer->node.expires.tv64 = tv64;
 	timer->_softexpires.tv64 = tv64;
 }
 
 static inline void hrtimer_add_expires(struct hrtimer *timer, ktime_t time)
 {
-	timer->_expires = ktime_add_safe(timer->_expires, time);
+	timer->node.expires = ktime_add_safe(timer->node.expires, time);
 	timer->_softexpires = ktime_add_safe(timer->_softexpires, time);
 }
 
 static inline void hrtimer_add_expires_ns(struct hrtimer *timer, u64 ns)
 {
-	timer->_expires = ktime_add_ns(timer->_expires, ns);
+	timer->node.expires = ktime_add_ns(timer->node.expires, ns);
 	timer->_softexpires = ktime_add_ns(timer->_softexpires, ns);
 }
 
 static inline ktime_t hrtimer_get_expires(const struct hrtimer *timer)
 {
-	return timer->_expires;
+	return timer->node.expires;
 }
 
 static inline ktime_t hrtimer_get_softexpires(const struct hrtimer *timer)
@@ -229,7 +227,7 @@ static inline ktime_t hrtimer_get_softexpires(const struct hrtimer *timer)
 
 static inline s64 hrtimer_get_expires_tv64(const struct hrtimer *timer)
 {
-	return timer->_expires.tv64;
+	return timer->node.expires.tv64;
 }
 static inline s64 hrtimer_get_softexpires_tv64(const struct hrtimer *timer)
 {
@@ -238,12 +236,12 @@ static inline s64 hrtimer_get_softexpires_tv64(const struct hrtimer *timer)
 
 static inline s64 hrtimer_get_expires_ns(const struct hrtimer *timer)
 {
-	return ktime_to_ns(timer->_expires);
+	return ktime_to_ns(timer->node.expires);
 }
 
 static inline ktime_t hrtimer_expires_remaining(const struct hrtimer *timer)
 {
-    return ktime_sub(timer->_expires, timer->base->get_time());
+	return ktime_sub(timer->node.expires, timer->base->get_time());
 }
 
 #ifdef CONFIG_HIGH_RES_TIMERS
diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c
index ce669174f355..93976ad42f5a 100644
--- a/kernel/hrtimer.c
+++ b/kernel/hrtimer.c
@@ -516,10 +516,13 @@ hrtimer_force_reprogram(struct hrtimer_cpu_base *cpu_base, int skip_equal)
 
 	for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++, base++) {
 		struct hrtimer *timer;
+		struct timerqueue_node *next;
 
-		if (!base->first)
+		next = timerqueue_getnext(&base->active);
+		if (!next)
 			continue;
-		timer = rb_entry(base->first, struct hrtimer, node);
+		timer = container_of(next, struct hrtimer, node);
+
 		expires = ktime_sub(hrtimer_get_expires(timer), base->offset);
 		/*
 		 * clock_was_set() has changed base->offset so the
@@ -840,48 +843,17 @@ EXPORT_SYMBOL_GPL(hrtimer_forward);
 static int enqueue_hrtimer(struct hrtimer *timer,
 			   struct hrtimer_clock_base *base)
 {
-	struct rb_node **link = &base->active.rb_node;
-	struct rb_node *parent = NULL;
-	struct hrtimer *entry;
-	int leftmost = 1;
-
 	debug_activate(timer);
 
-	/*
-	 * Find the right place in the rbtree:
-	 */
-	while (*link) {
-		parent = *link;
-		entry = rb_entry(parent, struct hrtimer, node);
-		/*
-		 * We dont care about collisions. Nodes with
-		 * the same expiry time stay together.
-		 */
-		if (hrtimer_get_expires_tv64(timer) <
-				hrtimer_get_expires_tv64(entry)) {
-			link = &(*link)->rb_left;
-		} else {
-			link = &(*link)->rb_right;
-			leftmost = 0;
-		}
-	}
+	timerqueue_add(&base->active, &timer->node);
 
-	/*
-	 * Insert the timer to the rbtree and check whether it
-	 * replaces the first pending timer
-	 */
-	if (leftmost)
-		base->first = &timer->node;
-
-	rb_link_node(&timer->node, parent, link);
-	rb_insert_color(&timer->node, &base->active);
 	/*
 	 * HRTIMER_STATE_ENQUEUED is or'ed to the current state to preserve the
 	 * state of a possibly running callback.
 	 */
 	timer->state |= HRTIMER_STATE_ENQUEUED;
 
-	return leftmost;
+	return (&timer->node == base->active.next);
 }
 
 /*
@@ -901,12 +873,7 @@ static void __remove_hrtimer(struct hrtimer *timer,
 	if (!(timer->state & HRTIMER_STATE_ENQUEUED))
 		goto out;
 
-	/*
-	 * Remove the timer from the rbtree and replace the first
-	 * entry pointer if necessary.
-	 */
-	if (base->first == &timer->node) {
-		base->first = rb_next(&timer->node);
+	if (&timer->node == timerqueue_getnext(&base->active)) {
 #ifdef CONFIG_HIGH_RES_TIMERS
 		/* Reprogram the clock event device. if enabled */
 		if (reprogram && hrtimer_hres_active()) {
@@ -919,7 +886,7 @@ static void __remove_hrtimer(struct hrtimer *timer,
 		}
 #endif
 	}
-	rb_erase(&timer->node, &base->active);
+	timerqueue_del(&base->active, &timer->node);
 out:
 	timer->state = newstate;
 }
@@ -1123,11 +1090,13 @@ ktime_t hrtimer_get_next_event(void)
 	if (!hrtimer_hres_active()) {
 		for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++, base++) {
 			struct hrtimer *timer;
+			struct timerqueue_node *next;
 
-			if (!base->first)
+			next = timerqueue_getnext(&base->active);
+			if (!next)
 				continue;
 
-			timer = rb_entry(base->first, struct hrtimer, node);
+			timer = container_of(next, struct hrtimer, node);
 			delta.tv64 = hrtimer_get_expires_tv64(timer);
 			delta = ktime_sub(delta, base->get_time());
 			if (delta.tv64 < mindelta.tv64)
@@ -1157,6 +1126,7 @@ static void __hrtimer_init(struct hrtimer *timer, clockid_t clock_id,
 
 	timer->base = &cpu_base->clock_base[clock_id];
 	hrtimer_init_timer_hres(timer);
+	timerqueue_init(&timer->node);
 
 #ifdef CONFIG_TIMER_STATS
 	timer->start_site = NULL;
@@ -1270,14 +1240,14 @@ retry:
 
 	for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) {
 		ktime_t basenow;
-		struct rb_node *node;
+		struct timerqueue_node *node;
 
 		basenow = ktime_add(now, base->offset);
 
-		while ((node = base->first)) {
+		while ((node = timerqueue_getnext(&base->active))) {
 			struct hrtimer *timer;
 
-			timer = rb_entry(node, struct hrtimer, node);
+			timer = container_of(node, struct hrtimer, node);
 
 			/*
 			 * The immediate goal for using the softexpires is
@@ -1433,7 +1403,7 @@ void hrtimer_run_pending(void)
  */
 void hrtimer_run_queues(void)
 {
-	struct rb_node *node;
+	struct timerqueue_node *node;
 	struct hrtimer_cpu_base *cpu_base = &__get_cpu_var(hrtimer_bases);
 	struct hrtimer_clock_base *base;
 	int index, gettime = 1;
@@ -1442,9 +1412,11 @@ void hrtimer_run_queues(void)
 		return;
 
 	for (index = 0; index < HRTIMER_MAX_CLOCK_BASES; index++) {
-		base = &cpu_base->clock_base[index];
+		struct timerqueue_node *next;
 
-		if (!base->first)
+		base = &cpu_base->clock_base[index];
+		next = timerqueue_getnext(&base->active);
+		if (!next)
 			continue;
 
 		if (gettime) {
@@ -1454,10 +1426,10 @@ void hrtimer_run_queues(void)
 
 		raw_spin_lock(&cpu_base->lock);
 
-		while ((node = base->first)) {
+		while ((node = next)) {
 			struct hrtimer *timer;
 
-			timer = rb_entry(node, struct hrtimer, node);
+			timer = container_of(node, struct hrtimer, node);
 			if (base->softirq_time.tv64 <=
 					hrtimer_get_expires_tv64(timer))
 				break;
@@ -1622,8 +1594,10 @@ static void __cpuinit init_hrtimers_cpu(int cpu)
 
 	raw_spin_lock_init(&cpu_base->lock);
 
-	for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++)
+	for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) {
 		cpu_base->clock_base[i].cpu_base = cpu_base;
+		timerqueue_init_head(&cpu_base->clock_base[i].active);
+	}
 
 	hrtimer_init_hres(cpu_base);
 }
@@ -1634,10 +1608,10 @@ static void migrate_hrtimer_list(struct hrtimer_clock_base *old_base,
 				struct hrtimer_clock_base *new_base)
 {
 	struct hrtimer *timer;
-	struct rb_node *node;
+	struct timerqueue_node *node;
 
-	while ((node = rb_first(&old_base->active))) {
-		timer = rb_entry(node, struct hrtimer, node);
+	while ((node = timerqueue_getnext(&old_base->active))) {
+		timer = container_of(node, struct hrtimer, node);
 		BUG_ON(hrtimer_callback_running(timer));
 		debug_deactivate(timer);
 
diff --git a/kernel/time/timer_list.c b/kernel/time/timer_list.c
index ab8f5e33fa92..32a19f9397fc 100644
--- a/kernel/time/timer_list.c
+++ b/kernel/time/timer_list.c
@@ -79,26 +79,26 @@ print_active_timers(struct seq_file *m, struct hrtimer_clock_base *base,
 {
 	struct hrtimer *timer, tmp;
 	unsigned long next = 0, i;
-	struct rb_node *curr;
+	struct timerqueue_node *curr;
 	unsigned long flags;
 
 next_one:
 	i = 0;
 	raw_spin_lock_irqsave(&base->cpu_base->lock, flags);
 
-	curr = base->first;
+	curr = timerqueue_getnext(&base->active);
 	/*
 	 * Crude but we have to do this O(N*N) thing, because
 	 * we have to unlock the base when printing:
 	 */
 	while (curr && i < next) {
-		curr = rb_next(curr);
+		curr = timerqueue_iterate_next(curr);
 		i++;
 	}
 
 	if (curr) {
 
-		timer = rb_entry(curr, struct hrtimer, node);
+		timer = container_of(curr, struct hrtimer, node);
 		tmp = *timer;
 		raw_spin_unlock_irqrestore(&base->cpu_base->lock, flags);
 
-- 
cgit v1.2.3-71-gd317


From 352a337832774a6929c16b569abe9cedc3db01cc Mon Sep 17 00:00:00 2001
From: Guennadi Liakhovetski <g.liakhovetski@gmx.de>
Date: Thu, 9 Dec 2010 22:46:29 +0100
Subject: USB: otg: fix link breakage, when the NOP USB Xceiver is a module

If the NOP USB OTG transceiver driver is built as a module, the otg.h
header declares external functions, but if they are referenced from the
kernel proper, as, e.g., in the OMAP3 case, where the omap3evm board is
calling the usb_nop_xceiv_register() function, linkage breaks. This patch
fixes this problem.

Signed-off-by: Guennadi Liakhovetski <g.liakhovetski@gmx.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/usb/otg.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/usb/otg.h b/include/linux/usb/otg.h
index 0a5b3711e502..a1a1e7a73ec9 100644
--- a/include/linux/usb/otg.h
+++ b/include/linux/usb/otg.h
@@ -116,7 +116,7 @@ struct otg_transceiver {
 /* for board-specific init logic */
 extern int otg_set_transceiver(struct otg_transceiver *);
 
-#if defined(CONFIG_NOP_USB_XCEIV) || defined(CONFIG_NOP_USB_XCEIV_MODULE)
+#if defined(CONFIG_NOP_USB_XCEIV) || (defined(CONFIG_NOP_USB_XCEIV_MODULE) && defined(MODULE))
 /* sometimes transceivers are accessed only through e.g. ULPI */
 extern void usb_nop_xceiv_register(void);
 extern void usb_nop_xceiv_unregister(void);
-- 
cgit v1.2.3-71-gd317


From e0c201f339fe7fc38d1b0f6f4755ff627686c7e0 Mon Sep 17 00:00:00 2001
From: Pavankumar Kondeti <pkondeti@codeaurora.org>
Date: Tue, 7 Dec 2010 17:53:55 +0530
Subject: USB: Add MSM OTG Controller driver

This driver implements PHY initialization, clock management, ULPI IO ops
and simple OTG state machine to kick host/peripheral based on Id/VBUS
line status.  VBUS/Id lines are tied to a reference voltage on some boards.
Hence provide debugfs interface to select host/peripheral mode.

Signed-off-by: Pavankumar Kondeti <pkondeti@codeaurora.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/usb/otg/Kconfig          |  10 +
 drivers/usb/otg/Makefile         |   1 +
 drivers/usb/otg/msm72k_otg.c     | 850 +++++++++++++++++++++++++++++++++++++++
 include/linux/usb/msm_hsusb.h    | 108 +++++
 include/linux/usb/msm_hsusb_hw.h |  56 +++
 5 files changed, 1025 insertions(+)
 create mode 100644 drivers/usb/otg/msm72k_otg.c
 create mode 100644 include/linux/usb/msm_hsusb.h
 create mode 100644 include/linux/usb/msm_hsusb_hw.h

(limited to 'include/linux')

diff --git a/drivers/usb/otg/Kconfig b/drivers/usb/otg/Kconfig
index 5ce07528cd0c..915c729872f8 100644
--- a/drivers/usb/otg/Kconfig
+++ b/drivers/usb/otg/Kconfig
@@ -81,4 +81,14 @@ config USB_LANGWELL_OTG
 	  To compile this driver as a module, choose M here: the
 	  module will be called langwell_otg.
 
+config USB_MSM_OTG_72K
+	tristate "OTG support for Qualcomm on-chip USB controller"
+	depends on (USB || USB_GADGET) && ARCH_MSM
+	select USB_OTG_UTILS
+	help
+	  Enable this to support the USB OTG transceiver on MSM chips. It
+	  handles PHY initialization, clock management, and workarounds
+	  required after resetting the hardware. This driver is required
+	  even for peripheral only or host only mode configuration.
+
 endif # USB || OTG
diff --git a/drivers/usb/otg/Makefile b/drivers/usb/otg/Makefile
index 66f1b83e4fa7..3b1b0960fb68 100644
--- a/drivers/usb/otg/Makefile
+++ b/drivers/usb/otg/Makefile
@@ -15,3 +15,4 @@ obj-$(CONFIG_TWL4030_USB)	+= twl4030-usb.o
 obj-$(CONFIG_USB_LANGWELL_OTG)	+= langwell_otg.o
 obj-$(CONFIG_NOP_USB_XCEIV)	+= nop-usb-xceiv.o
 obj-$(CONFIG_USB_ULPI)		+= ulpi.o
+obj-$(CONFIG_USB_MSM_OTG_72K)	+= msm72k_otg.o
diff --git a/drivers/usb/otg/msm72k_otg.c b/drivers/usb/otg/msm72k_otg.c
new file mode 100644
index 000000000000..46f468a912f4
--- /dev/null
+++ b/drivers/usb/otg/msm72k_otg.c
@@ -0,0 +1,850 @@
+/* Copyright (c) 2009-2010, Code Aurora Forum. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/device.h>
+#include <linux/platform_device.h>
+#include <linux/clk.h>
+#include <linux/slab.h>
+#include <linux/interrupt.h>
+#include <linux/err.h>
+#include <linux/delay.h>
+#include <linux/io.h>
+#include <linux/ioport.h>
+#include <linux/uaccess.h>
+#include <linux/debugfs.h>
+#include <linux/seq_file.h>
+
+#include <linux/usb.h>
+#include <linux/usb/otg.h>
+#include <linux/usb/ulpi.h>
+#include <linux/usb/gadget.h>
+#include <linux/usb/hcd.h>
+#include <linux/usb/msm_hsusb.h>
+#include <linux/usb/msm_hsusb_hw.h>
+
+#include <mach/clk.h>
+
+#define MSM_USB_BASE	(motg->regs)
+#define DRIVER_NAME	"msm_otg"
+
+#define ULPI_IO_TIMEOUT_USEC	(10 * 1000)
+static int ulpi_read(struct otg_transceiver *otg, u32 reg)
+{
+	struct msm_otg *motg = container_of(otg, struct msm_otg, otg);
+	int cnt = 0;
+
+	/* initiate read operation */
+	writel(ULPI_RUN | ULPI_READ | ULPI_ADDR(reg),
+	       USB_ULPI_VIEWPORT);
+
+	/* wait for completion */
+	while (cnt < ULPI_IO_TIMEOUT_USEC) {
+		if (!(readl(USB_ULPI_VIEWPORT) & ULPI_RUN))
+			break;
+		udelay(1);
+		cnt++;
+	}
+
+	if (cnt >= ULPI_IO_TIMEOUT_USEC) {
+		dev_err(otg->dev, "ulpi_read: timeout %08x\n",
+			readl(USB_ULPI_VIEWPORT));
+		return -ETIMEDOUT;
+	}
+	return ULPI_DATA_READ(readl(USB_ULPI_VIEWPORT));
+}
+
+static int ulpi_write(struct otg_transceiver *otg, u32 val, u32 reg)
+{
+	struct msm_otg *motg = container_of(otg, struct msm_otg, otg);
+	int cnt = 0;
+
+	/* initiate write operation */
+	writel(ULPI_RUN | ULPI_WRITE |
+	       ULPI_ADDR(reg) | ULPI_DATA(val),
+	       USB_ULPI_VIEWPORT);
+
+	/* wait for completion */
+	while (cnt < ULPI_IO_TIMEOUT_USEC) {
+		if (!(readl(USB_ULPI_VIEWPORT) & ULPI_RUN))
+			break;
+		udelay(1);
+		cnt++;
+	}
+
+	if (cnt >= ULPI_IO_TIMEOUT_USEC) {
+		dev_err(otg->dev, "ulpi_write: timeout\n");
+		return -ETIMEDOUT;
+	}
+	return 0;
+}
+
+static struct otg_io_access_ops msm_otg_io_ops = {
+	.read = ulpi_read,
+	.write = ulpi_write,
+};
+
+static void ulpi_init(struct msm_otg *motg)
+{
+	struct msm_otg_platform_data *pdata = motg->pdata;
+	int *seq = pdata->phy_init_seq;
+
+	if (!seq)
+		return;
+
+	while (seq[0] >= 0) {
+		dev_vdbg(motg->otg.dev, "ulpi: write 0x%02x to 0x%02x\n",
+				seq[0], seq[1]);
+		ulpi_write(&motg->otg, seq[0], seq[1]);
+		seq += 2;
+	}
+}
+
+static int msm_otg_link_clk_reset(struct msm_otg *motg, bool assert)
+{
+	int ret;
+
+	if (assert) {
+		ret = clk_reset(motg->clk, CLK_RESET_ASSERT);
+		if (ret)
+			dev_err(motg->otg.dev, "usb hs_clk assert failed\n");
+	} else {
+		ret = clk_reset(motg->clk, CLK_RESET_DEASSERT);
+		if (ret)
+			dev_err(motg->otg.dev, "usb hs_clk deassert failed\n");
+	}
+	return ret;
+}
+
+static int msm_otg_phy_clk_reset(struct msm_otg *motg)
+{
+	int ret;
+
+	ret = clk_reset(motg->phy_reset_clk, CLK_RESET_ASSERT);
+	if (ret) {
+		dev_err(motg->otg.dev, "usb phy clk assert failed\n");
+		return ret;
+	}
+	usleep_range(10000, 12000);
+	ret = clk_reset(motg->phy_reset_clk, CLK_RESET_DEASSERT);
+	if (ret)
+		dev_err(motg->otg.dev, "usb phy clk deassert failed\n");
+	return ret;
+}
+
+static int msm_otg_phy_reset(struct msm_otg *motg)
+{
+	u32 val;
+	int ret;
+	int retries;
+
+	ret = msm_otg_link_clk_reset(motg, 1);
+	if (ret)
+		return ret;
+	ret = msm_otg_phy_clk_reset(motg);
+	if (ret)
+		return ret;
+	ret = msm_otg_link_clk_reset(motg, 0);
+	if (ret)
+		return ret;
+
+	val = readl(USB_PORTSC) & ~PORTSC_PTS_MASK;
+	writel(val | PORTSC_PTS_ULPI, USB_PORTSC);
+
+	for (retries = 3; retries > 0; retries--) {
+		ret = ulpi_write(&motg->otg, ULPI_FUNC_CTRL_SUSPENDM,
+				ULPI_CLR(ULPI_FUNC_CTRL));
+		if (!ret)
+			break;
+		ret = msm_otg_phy_clk_reset(motg);
+		if (ret)
+			return ret;
+	}
+	if (!retries)
+		return -ETIMEDOUT;
+
+	/* This reset calibrates the phy, if the above write succeeded */
+	ret = msm_otg_phy_clk_reset(motg);
+	if (ret)
+		return ret;
+
+	for (retries = 3; retries > 0; retries--) {
+		ret = ulpi_read(&motg->otg, ULPI_DEBUG);
+		if (ret != -ETIMEDOUT)
+			break;
+		ret = msm_otg_phy_clk_reset(motg);
+		if (ret)
+			return ret;
+	}
+	if (!retries)
+		return -ETIMEDOUT;
+
+	dev_info(motg->otg.dev, "phy_reset: success\n");
+	return 0;
+}
+
+#define LINK_RESET_TIMEOUT_USEC		(250 * 1000)
+static int msm_otg_reset(struct otg_transceiver *otg)
+{
+	struct msm_otg *motg = container_of(otg, struct msm_otg, otg);
+	struct msm_otg_platform_data *pdata = motg->pdata;
+	int cnt = 0;
+	int ret;
+	u32 val = 0;
+	u32 ulpi_val = 0;
+
+	ret = msm_otg_phy_reset(motg);
+	if (ret) {
+		dev_err(otg->dev, "phy_reset failed\n");
+		return ret;
+	}
+
+	ulpi_init(motg);
+
+	writel(USBCMD_RESET, USB_USBCMD);
+	while (cnt < LINK_RESET_TIMEOUT_USEC) {
+		if (!(readl(USB_USBCMD) & USBCMD_RESET))
+			break;
+		udelay(1);
+		cnt++;
+	}
+	if (cnt >= LINK_RESET_TIMEOUT_USEC)
+		return -ETIMEDOUT;
+
+	/* select ULPI phy */
+	writel(0x80000000, USB_PORTSC);
+
+	msleep(100);
+
+	writel(0x0, USB_AHBBURST);
+	writel(0x00, USB_AHBMODE);
+
+	if (pdata->otg_control == OTG_PHY_CONTROL) {
+		val = readl(USB_OTGSC);
+		if (pdata->mode == USB_OTG) {
+			ulpi_val = ULPI_INT_IDGRD | ULPI_INT_SESS_VALID;
+			val |= OTGSC_IDIE | OTGSC_BSVIE;
+		} else if (pdata->mode == USB_PERIPHERAL) {
+			ulpi_val = ULPI_INT_SESS_VALID;
+			val |= OTGSC_BSVIE;
+		}
+		writel(val, USB_OTGSC);
+		ulpi_write(otg, ulpi_val, ULPI_USB_INT_EN_RISE);
+		ulpi_write(otg, ulpi_val, ULPI_USB_INT_EN_FALL);
+	}
+
+	return 0;
+}
+
+static void msm_otg_start_host(struct otg_transceiver *otg, int on)
+{
+	struct msm_otg *motg = container_of(otg, struct msm_otg, otg);
+	struct msm_otg_platform_data *pdata = motg->pdata;
+	struct usb_hcd *hcd;
+
+	if (!otg->host)
+		return;
+
+	hcd = bus_to_hcd(otg->host);
+
+	if (on) {
+		dev_dbg(otg->dev, "host on\n");
+
+		if (pdata->vbus_power)
+			pdata->vbus_power(1);
+		/*
+		 * Some boards have a switch cotrolled by gpio
+		 * to enable/disable internal HUB. Enable internal
+		 * HUB before kicking the host.
+		 */
+		if (pdata->setup_gpio)
+			pdata->setup_gpio(OTG_STATE_A_HOST);
+#ifdef CONFIG_USB
+		usb_add_hcd(hcd, hcd->irq, IRQF_SHARED);
+#endif
+	} else {
+		dev_dbg(otg->dev, "host off\n");
+
+#ifdef CONFIG_USB
+		usb_remove_hcd(hcd);
+#endif
+		if (pdata->setup_gpio)
+			pdata->setup_gpio(OTG_STATE_UNDEFINED);
+		if (pdata->vbus_power)
+			pdata->vbus_power(0);
+	}
+}
+
+static int msm_otg_set_host(struct otg_transceiver *otg, struct usb_bus *host)
+{
+	struct msm_otg *motg = container_of(otg, struct msm_otg, otg);
+	struct usb_hcd *hcd;
+
+	/*
+	 * Fail host registration if this board can support
+	 * only peripheral configuration.
+	 */
+	if (motg->pdata->mode == USB_PERIPHERAL) {
+		dev_info(otg->dev, "Host mode is not supported\n");
+		return -ENODEV;
+	}
+
+	if (!host) {
+		if (otg->state == OTG_STATE_A_HOST) {
+			msm_otg_start_host(otg, 0);
+			otg->host = NULL;
+			otg->state = OTG_STATE_UNDEFINED;
+			schedule_work(&motg->sm_work);
+		} else {
+			otg->host = NULL;
+		}
+
+		return 0;
+	}
+
+	hcd = bus_to_hcd(host);
+	hcd->power_budget = motg->pdata->power_budget;
+
+	otg->host = host;
+	dev_dbg(otg->dev, "host driver registered w/ tranceiver\n");
+
+	/*
+	 * Kick the state machine work, if peripheral is not supported
+	 * or peripheral is already registered with us.
+	 */
+	if (motg->pdata->mode == USB_HOST || otg->gadget)
+		schedule_work(&motg->sm_work);
+
+	return 0;
+}
+
+static void msm_otg_start_peripheral(struct otg_transceiver *otg, int on)
+{
+	struct msm_otg *motg = container_of(otg, struct msm_otg, otg);
+	struct msm_otg_platform_data *pdata = motg->pdata;
+
+	if (!otg->gadget)
+		return;
+
+	if (on) {
+		dev_dbg(otg->dev, "gadget on\n");
+		/*
+		 * Some boards have a switch cotrolled by gpio
+		 * to enable/disable internal HUB. Disable internal
+		 * HUB before kicking the gadget.
+		 */
+		if (pdata->setup_gpio)
+			pdata->setup_gpio(OTG_STATE_B_PERIPHERAL);
+		usb_gadget_vbus_connect(otg->gadget);
+	} else {
+		dev_dbg(otg->dev, "gadget off\n");
+		usb_gadget_vbus_disconnect(otg->gadget);
+		if (pdata->setup_gpio)
+			pdata->setup_gpio(OTG_STATE_UNDEFINED);
+	}
+
+}
+
+static int msm_otg_set_peripheral(struct otg_transceiver *otg,
+			struct usb_gadget *gadget)
+{
+	struct msm_otg *motg = container_of(otg, struct msm_otg, otg);
+
+	/*
+	 * Fail peripheral registration if this board can support
+	 * only host configuration.
+	 */
+	if (motg->pdata->mode == USB_HOST) {
+		dev_info(otg->dev, "Peripheral mode is not supported\n");
+		return -ENODEV;
+	}
+
+	if (!gadget) {
+		if (otg->state == OTG_STATE_B_PERIPHERAL) {
+			msm_otg_start_peripheral(otg, 0);
+			otg->gadget = NULL;
+			otg->state = OTG_STATE_UNDEFINED;
+			schedule_work(&motg->sm_work);
+		} else {
+			otg->gadget = NULL;
+		}
+
+		return 0;
+	}
+	otg->gadget = gadget;
+	dev_dbg(otg->dev, "peripheral driver registered w/ tranceiver\n");
+
+	/*
+	 * Kick the state machine work, if host is not supported
+	 * or host is already registered with us.
+	 */
+	if (motg->pdata->mode == USB_PERIPHERAL || otg->host)
+		schedule_work(&motg->sm_work);
+
+	return 0;
+}
+
+/*
+ * We support OTG, Peripheral only and Host only configurations. In case
+ * of OTG, mode switch (host-->peripheral/peripheral-->host) can happen
+ * via Id pin status or user request (debugfs). Id/BSV interrupts are not
+ * enabled when switch is controlled by user and default mode is supplied
+ * by board file, which can be changed by userspace later.
+ */
+static void msm_otg_init_sm(struct msm_otg *motg)
+{
+	struct msm_otg_platform_data *pdata = motg->pdata;
+	u32 otgsc = readl(USB_OTGSC);
+
+	switch (pdata->mode) {
+	case USB_OTG:
+		if (pdata->otg_control == OTG_PHY_CONTROL) {
+			if (otgsc & OTGSC_ID)
+				set_bit(ID, &motg->inputs);
+			else
+				clear_bit(ID, &motg->inputs);
+
+			if (otgsc & OTGSC_BSV)
+				set_bit(B_SESS_VLD, &motg->inputs);
+			else
+				clear_bit(B_SESS_VLD, &motg->inputs);
+		} else if (pdata->otg_control == OTG_USER_CONTROL) {
+			if (pdata->default_mode == USB_HOST) {
+				clear_bit(ID, &motg->inputs);
+			} else if (pdata->default_mode == USB_PERIPHERAL) {
+				set_bit(ID, &motg->inputs);
+				set_bit(B_SESS_VLD, &motg->inputs);
+			} else {
+				set_bit(ID, &motg->inputs);
+				clear_bit(B_SESS_VLD, &motg->inputs);
+			}
+		}
+		break;
+	case USB_HOST:
+		clear_bit(ID, &motg->inputs);
+		break;
+	case USB_PERIPHERAL:
+		set_bit(ID, &motg->inputs);
+		if (otgsc & OTGSC_BSV)
+			set_bit(B_SESS_VLD, &motg->inputs);
+		else
+			clear_bit(B_SESS_VLD, &motg->inputs);
+		break;
+	default:
+		break;
+	}
+}
+
+static void msm_otg_sm_work(struct work_struct *w)
+{
+	struct msm_otg *motg = container_of(w, struct msm_otg, sm_work);
+	struct otg_transceiver *otg = &motg->otg;
+
+	switch (otg->state) {
+	case OTG_STATE_UNDEFINED:
+		dev_dbg(otg->dev, "OTG_STATE_UNDEFINED state\n");
+		msm_otg_reset(otg);
+		msm_otg_init_sm(motg);
+		otg->state = OTG_STATE_B_IDLE;
+		/* FALL THROUGH */
+	case OTG_STATE_B_IDLE:
+		dev_dbg(otg->dev, "OTG_STATE_B_IDLE state\n");
+		if (!test_bit(ID, &motg->inputs) && otg->host) {
+			/* disable BSV bit */
+			writel(readl(USB_OTGSC) & ~OTGSC_BSVIE, USB_OTGSC);
+			msm_otg_start_host(otg, 1);
+			otg->state = OTG_STATE_A_HOST;
+		} else if (test_bit(B_SESS_VLD, &motg->inputs) && otg->gadget) {
+			msm_otg_start_peripheral(otg, 1);
+			otg->state = OTG_STATE_B_PERIPHERAL;
+		}
+		break;
+	case OTG_STATE_B_PERIPHERAL:
+		dev_dbg(otg->dev, "OTG_STATE_B_PERIPHERAL state\n");
+		if (!test_bit(B_SESS_VLD, &motg->inputs) ||
+				!test_bit(ID, &motg->inputs)) {
+			msm_otg_start_peripheral(otg, 0);
+			otg->state = OTG_STATE_B_IDLE;
+			msm_otg_reset(otg);
+			schedule_work(w);
+		}
+		break;
+	case OTG_STATE_A_HOST:
+		dev_dbg(otg->dev, "OTG_STATE_A_HOST state\n");
+		if (test_bit(ID, &motg->inputs)) {
+			msm_otg_start_host(otg, 0);
+			otg->state = OTG_STATE_B_IDLE;
+			msm_otg_reset(otg);
+			schedule_work(w);
+		}
+		break;
+	default:
+		break;
+	}
+}
+
+static irqreturn_t msm_otg_irq(int irq, void *data)
+{
+	struct msm_otg *motg = data;
+	struct otg_transceiver *otg = &motg->otg;
+	u32 otgsc = 0;
+
+	otgsc = readl(USB_OTGSC);
+	if (!(otgsc & (OTGSC_IDIS | OTGSC_BSVIS)))
+		return IRQ_NONE;
+
+	if ((otgsc & OTGSC_IDIS) && (otgsc & OTGSC_IDIE)) {
+		if (otgsc & OTGSC_ID)
+			set_bit(ID, &motg->inputs);
+		else
+			clear_bit(ID, &motg->inputs);
+		dev_dbg(otg->dev, "ID set/clear\n");
+	} else if ((otgsc & OTGSC_BSVIS) && (otgsc & OTGSC_BSVIE)) {
+		if (otgsc & OTGSC_BSV)
+			set_bit(B_SESS_VLD, &motg->inputs);
+		else
+			clear_bit(B_SESS_VLD, &motg->inputs);
+		dev_dbg(otg->dev, "BSV set/clear\n");
+	}
+
+	writel(otgsc, USB_OTGSC);
+	schedule_work(&motg->sm_work);
+	return IRQ_HANDLED;
+}
+
+static int msm_otg_mode_show(struct seq_file *s, void *unused)
+{
+	struct msm_otg *motg = s->private;
+	struct otg_transceiver *otg = &motg->otg;
+
+	switch (otg->state) {
+	case OTG_STATE_A_HOST:
+		seq_printf(s, "host\n");
+		break;
+	case OTG_STATE_B_PERIPHERAL:
+		seq_printf(s, "peripheral\n");
+		break;
+	default:
+		seq_printf(s, "none\n");
+		break;
+	}
+
+	return 0;
+}
+
+static int msm_otg_mode_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, msm_otg_mode_show, inode->i_private);
+}
+
+static ssize_t msm_otg_mode_write(struct file *file, const char __user *ubuf,
+				size_t count, loff_t *ppos)
+{
+	struct msm_otg *motg = file->private_data;
+	char buf[16];
+	struct otg_transceiver *otg = &motg->otg;
+	int status = count;
+	enum usb_mode_type req_mode;
+
+	memset(buf, 0x00, sizeof(buf));
+
+	if (copy_from_user(&buf, ubuf, min_t(size_t, sizeof(buf) - 1, count))) {
+		status = -EFAULT;
+		goto out;
+	}
+
+	if (!strncmp(buf, "host", 4)) {
+		req_mode = USB_HOST;
+	} else if (!strncmp(buf, "peripheral", 10)) {
+		req_mode = USB_PERIPHERAL;
+	} else if (!strncmp(buf, "none", 4)) {
+		req_mode = USB_NONE;
+	} else {
+		status = -EINVAL;
+		goto out;
+	}
+
+	switch (req_mode) {
+	case USB_NONE:
+		switch (otg->state) {
+		case OTG_STATE_A_HOST:
+		case OTG_STATE_B_PERIPHERAL:
+			set_bit(ID, &motg->inputs);
+			clear_bit(B_SESS_VLD, &motg->inputs);
+			break;
+		default:
+			goto out;
+		}
+		break;
+	case USB_PERIPHERAL:
+		switch (otg->state) {
+		case OTG_STATE_B_IDLE:
+		case OTG_STATE_A_HOST:
+			set_bit(ID, &motg->inputs);
+			set_bit(B_SESS_VLD, &motg->inputs);
+			break;
+		default:
+			goto out;
+		}
+		break;
+	case USB_HOST:
+		switch (otg->state) {
+		case OTG_STATE_B_IDLE:
+		case OTG_STATE_B_PERIPHERAL:
+			clear_bit(ID, &motg->inputs);
+			break;
+		default:
+			goto out;
+		}
+		break;
+	default:
+		goto out;
+	}
+
+	schedule_work(&motg->sm_work);
+out:
+	return status;
+}
+
+const struct file_operations msm_otg_mode_fops = {
+	.open = msm_otg_mode_open,
+	.read = seq_read,
+	.write = msm_otg_mode_write,
+	.llseek = seq_lseek,
+	.release = single_release,
+};
+
+static struct dentry *msm_otg_dbg_root;
+static struct dentry *msm_otg_dbg_mode;
+
+static int msm_otg_debugfs_init(struct msm_otg *motg)
+{
+	msm_otg_dbg_root = debugfs_create_dir("msm_otg", NULL);
+
+	if (!msm_otg_dbg_root || IS_ERR(msm_otg_dbg_root))
+		return -ENODEV;
+
+	msm_otg_dbg_mode = debugfs_create_file("mode", S_IRUGO | S_IWUSR,
+				msm_otg_dbg_root, motg, &msm_otg_mode_fops);
+	if (!msm_otg_dbg_mode) {
+		debugfs_remove(msm_otg_dbg_root);
+		msm_otg_dbg_root = NULL;
+		return -ENODEV;
+	}
+
+	return 0;
+}
+
+static void msm_otg_debugfs_cleanup(void)
+{
+	debugfs_remove(msm_otg_dbg_mode);
+	debugfs_remove(msm_otg_dbg_root);
+}
+
+static int __init msm_otg_probe(struct platform_device *pdev)
+{
+	int ret = 0;
+	struct resource *res;
+	struct msm_otg *motg;
+	struct otg_transceiver *otg;
+
+	dev_info(&pdev->dev, "msm_otg probe\n");
+	if (!pdev->dev.platform_data) {
+		dev_err(&pdev->dev, "No platform data given. Bailing out\n");
+		return -ENODEV;
+	}
+
+	motg = kzalloc(sizeof(struct msm_otg), GFP_KERNEL);
+	if (!motg) {
+		dev_err(&pdev->dev, "unable to allocate msm_otg\n");
+		return -ENOMEM;
+	}
+
+	motg->pdata = pdev->dev.platform_data;
+	otg = &motg->otg;
+	otg->dev = &pdev->dev;
+
+	motg->phy_reset_clk = clk_get(&pdev->dev, "usb_phy_clk");
+	if (IS_ERR(motg->phy_reset_clk)) {
+		dev_err(&pdev->dev, "failed to get usb_phy_clk\n");
+		ret = PTR_ERR(motg->phy_reset_clk);
+		goto free_motg;
+	}
+
+	motg->clk = clk_get(&pdev->dev, "usb_hs_clk");
+	if (IS_ERR(motg->clk)) {
+		dev_err(&pdev->dev, "failed to get usb_hs_clk\n");
+		ret = PTR_ERR(motg->clk);
+		goto put_phy_reset_clk;
+	}
+
+	motg->pclk = clk_get(&pdev->dev, "usb_hs_pclk");
+	if (IS_ERR(motg->pclk)) {
+		dev_err(&pdev->dev, "failed to get usb_hs_pclk\n");
+		ret = PTR_ERR(motg->pclk);
+		goto put_clk;
+	}
+
+	/*
+	 * USB core clock is not present on all MSM chips. This
+	 * clock is introduced to remove the dependency on AXI
+	 * bus frequency.
+	 */
+	motg->core_clk = clk_get(&pdev->dev, "usb_hs_core_clk");
+	if (IS_ERR(motg->core_clk))
+		motg->core_clk = NULL;
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (!res) {
+		dev_err(&pdev->dev, "failed to get platform resource mem\n");
+		ret = -ENODEV;
+		goto put_core_clk;
+	}
+
+	motg->regs = ioremap(res->start, resource_size(res));
+	if (!motg->regs) {
+		dev_err(&pdev->dev, "ioremap failed\n");
+		ret = -ENOMEM;
+		goto put_core_clk;
+	}
+	dev_info(&pdev->dev, "OTG regs = %p\n", motg->regs);
+
+	motg->irq = platform_get_irq(pdev, 0);
+	if (!motg->irq) {
+		dev_err(&pdev->dev, "platform_get_irq failed\n");
+		ret = -ENODEV;
+		goto free_regs;
+	}
+
+	clk_enable(motg->clk);
+	clk_enable(motg->pclk);
+	if (motg->core_clk)
+		clk_enable(motg->core_clk);
+
+	writel(0, USB_USBINTR);
+	writel(0, USB_OTGSC);
+
+	INIT_WORK(&motg->sm_work, msm_otg_sm_work);
+	ret = request_irq(motg->irq, msm_otg_irq, IRQF_SHARED,
+					"msm_otg", motg);
+	if (ret) {
+		dev_err(&pdev->dev, "request irq failed\n");
+		goto disable_clks;
+	}
+
+	otg->init = msm_otg_reset;
+	otg->set_host = msm_otg_set_host;
+	otg->set_peripheral = msm_otg_set_peripheral;
+
+	otg->io_ops = &msm_otg_io_ops;
+
+	ret = otg_set_transceiver(&motg->otg);
+	if (ret) {
+		dev_err(&pdev->dev, "otg_set_transceiver failed\n");
+		goto free_irq;
+	}
+
+	platform_set_drvdata(pdev, motg);
+	device_init_wakeup(&pdev->dev, 1);
+
+	if (motg->pdata->mode == USB_OTG &&
+			motg->pdata->otg_control == OTG_USER_CONTROL) {
+		ret = msm_otg_debugfs_init(motg);
+		if (ret)
+			dev_dbg(&pdev->dev, "mode debugfs file is"
+					"not available\n");
+	}
+
+	return 0;
+
+free_irq:
+	free_irq(motg->irq, motg);
+disable_clks:
+	clk_disable(motg->pclk);
+	clk_disable(motg->clk);
+free_regs:
+	iounmap(motg->regs);
+put_core_clk:
+	if (motg->core_clk)
+		clk_put(motg->core_clk);
+	clk_put(motg->pclk);
+put_clk:
+	clk_put(motg->clk);
+put_phy_reset_clk:
+	clk_put(motg->phy_reset_clk);
+free_motg:
+	kfree(motg);
+	return ret;
+}
+
+static int __devexit msm_otg_remove(struct platform_device *pdev)
+{
+	struct msm_otg *motg = platform_get_drvdata(pdev);
+	struct otg_transceiver *otg = &motg->otg;
+
+	if (otg->host || otg->gadget)
+		return -EBUSY;
+
+	msm_otg_debugfs_cleanup();
+	cancel_work_sync(&motg->sm_work);
+	device_init_wakeup(&pdev->dev, 0);
+	otg_set_transceiver(NULL);
+
+	free_irq(motg->irq, motg);
+
+	clk_disable(motg->pclk);
+	clk_disable(motg->clk);
+	if (motg->core_clk)
+		clk_disable(motg->core_clk);
+
+	iounmap(motg->regs);
+
+	clk_put(motg->phy_reset_clk);
+	clk_put(motg->pclk);
+	clk_put(motg->clk);
+	if (motg->core_clk)
+		clk_put(motg->core_clk);
+
+	kfree(motg);
+
+	return 0;
+}
+
+static struct platform_driver msm_otg_driver = {
+	.remove = __devexit_p(msm_otg_remove),
+	.driver = {
+		.name = DRIVER_NAME,
+		.owner = THIS_MODULE,
+	},
+};
+
+static int __init msm_otg_init(void)
+{
+	return platform_driver_probe(&msm_otg_driver, msm_otg_probe);
+}
+
+static void __exit msm_otg_exit(void)
+{
+	platform_driver_unregister(&msm_otg_driver);
+}
+
+module_init(msm_otg_init);
+module_exit(msm_otg_exit);
+
+MODULE_LICENSE("GPL v2");
+MODULE_DESCRIPTION("MSM USB transceiver driver");
diff --git a/include/linux/usb/msm_hsusb.h b/include/linux/usb/msm_hsusb.h
new file mode 100644
index 000000000000..b796df94ec72
--- /dev/null
+++ b/include/linux/usb/msm_hsusb.h
@@ -0,0 +1,108 @@
+/* linux/include/asm-arm/arch-msm/hsusb.h
+ *
+ * Copyright (C) 2008 Google, Inc.
+ * Author: Brian Swetland <swetland@google.com>
+ * Copyright (c) 2009-2010, Code Aurora Forum. All rights reserved.
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#ifndef __ASM_ARCH_MSM_HSUSB_H
+#define __ASM_ARCH_MSM_HSUSB_H
+
+#include <linux/types.h>
+#include <linux/usb/otg.h>
+
+/**
+ * Supported USB modes
+ *
+ * USB_PERIPHERAL       Only peripheral mode is supported.
+ * USB_HOST             Only host mode is supported.
+ * USB_OTG              OTG mode is supported.
+ *
+ */
+enum usb_mode_type {
+	USB_NONE = 0,
+	USB_PERIPHERAL,
+	USB_HOST,
+	USB_OTG,
+};
+
+/**
+ * OTG control
+ *
+ * OTG_NO_CONTROL	Id/VBUS notifications not required. Useful in host
+ *                      only configuration.
+ * OTG_PHY_CONTROL	Id/VBUS notifications comes form USB PHY.
+ * OTG_PMIC_CONTROL	Id/VBUS notifications comes from PMIC hardware.
+ * OTG_USER_CONTROL	Id/VBUS notifcations comes from User via sysfs.
+ *
+ */
+enum otg_control_type {
+	OTG_NO_CONTROL = 0,
+	OTG_PHY_CONTROL,
+	OTG_PMIC_CONTROL,
+	OTG_USER_CONTROL,
+};
+
+/**
+ * struct msm_otg_platform_data - platform device data
+ *              for msm72k_otg driver.
+ * @phy_init_seq: PHY configuration sequence. val, reg pairs
+ *              terminated by -1.
+ * @vbus_power: VBUS power on/off routine.
+ * @power_budget: VBUS power budget in mA (0 will be treated as 500mA).
+ * @mode: Supported mode (OTG/peripheral/host).
+ * @otg_control: OTG switch controlled by user/Id pin
+ * @default_mode: Default operational mode. Applicable only if
+ *              OTG switch is controller by user.
+ *
+ */
+struct msm_otg_platform_data {
+	int *phy_init_seq;
+	void (*vbus_power)(bool on);
+	unsigned power_budget;
+	enum usb_mode_type mode;
+	enum otg_control_type otg_control;
+	enum usb_mode_type default_mode;
+	void (*setup_gpio)(enum usb_otg_state state);
+};
+
+/**
+ * struct msm_otg: OTG driver data. Shared by HCD and DCD.
+ * @otg: USB OTG Transceiver structure.
+ * @pdata: otg device platform data.
+ * @irq: IRQ number assigned for HSUSB controller.
+ * @clk: clock struct of usb_hs_clk.
+ * @pclk: clock struct of usb_hs_pclk.
+ * @phy_reset_clk: clock struct of usb_phy_clk.
+ * @core_clk: clock struct of usb_hs_core_clk.
+ * @regs: ioremapped register base address.
+ * @inputs: OTG state machine inputs(Id, SessValid etc).
+ * @sm_work: OTG state machine work.
+ *
+ */
+struct msm_otg {
+	struct otg_transceiver otg;
+	struct msm_otg_platform_data *pdata;
+	int irq;
+	struct clk *clk;
+	struct clk *pclk;
+	struct clk *phy_reset_clk;
+	struct clk *core_clk;
+	void __iomem *regs;
+#define ID		0
+#define B_SESS_VLD	1
+	unsigned long inputs;
+	struct work_struct sm_work;
+};
+
+#endif
diff --git a/include/linux/usb/msm_hsusb_hw.h b/include/linux/usb/msm_hsusb_hw.h
new file mode 100644
index 000000000000..b061cffcf048
--- /dev/null
+++ b/include/linux/usb/msm_hsusb_hw.h
@@ -0,0 +1,56 @@
+/*
+ * Copyright (C) 2007 Google, Inc.
+ * Author: Brian Swetland <swetland@google.com>
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#ifndef __LINUX_USB_GADGET_MSM72K_UDC_H__
+#define __LINUX_USB_GADGET_MSM72K_UDC_H__
+
+#ifdef CONFIG_ARCH_MSM7X00A
+#define USB_SBUSCFG          (MSM_USB_BASE + 0x0090)
+#else
+#define USB_AHBBURST         (MSM_USB_BASE + 0x0090)
+#define USB_AHBMODE          (MSM_USB_BASE + 0x0098)
+#endif
+#define USB_CAPLENGTH        (MSM_USB_BASE + 0x0100) /* 8 bit */
+
+#define USB_USBCMD           (MSM_USB_BASE + 0x0140)
+#define USB_PORTSC           (MSM_USB_BASE + 0x0184)
+#define USB_OTGSC            (MSM_USB_BASE + 0x01A4)
+#define USB_USBMODE          (MSM_USB_BASE + 0x01A8)
+
+#define USBCMD_RESET   2
+#define USB_USBINTR          (MSM_USB_BASE + 0x0148)
+
+#define PORTSC_PHCD            (1 << 23) /* phy suspend mode */
+#define PORTSC_PTS_MASK         (3 << 30)
+#define PORTSC_PTS_ULPI         (3 << 30)
+
+#define USB_ULPI_VIEWPORT    (MSM_USB_BASE + 0x0170)
+#define ULPI_RUN              (1 << 30)
+#define ULPI_WRITE            (1 << 29)
+#define ULPI_READ             (0 << 29)
+#define ULPI_ADDR(n)          (((n) & 255) << 16)
+#define ULPI_DATA(n)          ((n) & 255)
+#define ULPI_DATA_READ(n)     (((n) >> 8) & 255)
+
+/* OTG definitions */
+#define OTGSC_INTSTS_MASK	(0x7f << 16)
+#define OTGSC_ID		(1 << 8)
+#define OTGSC_BSV		(1 << 11)
+#define OTGSC_IDIS		(1 << 16)
+#define OTGSC_BSVIS		(1 << 19)
+#define OTGSC_IDIE		(1 << 24)
+#define OTGSC_BSVIE		(1 << 27)
+
+#endif /* __LINUX_USB_GADGET_MSM72K_UDC_H__ */
-- 
cgit v1.2.3-71-gd317


From 87c0104af742af2acfcbd685f2b9a40f33770dc0 Mon Sep 17 00:00:00 2001
From: Pavankumar Kondeti <pkondeti@codeaurora.org>
Date: Tue, 7 Dec 2010 17:53:58 +0530
Subject: USB: OTG: msm: Add support for power management

Implement runtime and system pm ops to put hardware into low power
mode (LPM). As part of LPM, USB clocks are turned off, PHY is put
into suspend state and PHY comparators are turned off if VBUS/Id
notifications are not required from PHY.

Signed-off-by: Pavankumar Kondeti <pkondeti@codeaurora.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/usb/otg/Kconfig          |   5 +-
 drivers/usb/otg/msm72k_otg.c     | 283 ++++++++++++++++++++++++++++++++++++++-
 include/linux/usb/msm_hsusb.h    |   4 +
 include/linux/usb/msm_hsusb_hw.h |   3 +
 4 files changed, 289 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/usb/otg/Kconfig b/drivers/usb/otg/Kconfig
index 915c729872f8..2810c2af71b0 100644
--- a/drivers/usb/otg/Kconfig
+++ b/drivers/usb/otg/Kconfig
@@ -88,7 +88,8 @@ config USB_MSM_OTG_72K
 	help
 	  Enable this to support the USB OTG transceiver on MSM chips. It
 	  handles PHY initialization, clock management, and workarounds
-	  required after resetting the hardware. This driver is required
-	  even for peripheral only or host only mode configuration.
+	  required after resetting the hardware and power management.
+	  This driver is required even for peripheral only or host only
+	  mode configurations.
 
 endif # USB || OTG
diff --git a/drivers/usb/otg/msm72k_otg.c b/drivers/usb/otg/msm72k_otg.c
index 46f468a912f4..1cd52edcd0c2 100644
--- a/drivers/usb/otg/msm72k_otg.c
+++ b/drivers/usb/otg/msm72k_otg.c
@@ -29,6 +29,7 @@
 #include <linux/uaccess.h>
 #include <linux/debugfs.h>
 #include <linux/seq_file.h>
+#include <linux/pm_runtime.h>
 
 #include <linux/usb.h>
 #include <linux/usb/otg.h>
@@ -251,6 +252,154 @@ static int msm_otg_reset(struct otg_transceiver *otg)
 	return 0;
 }
 
+#define PHY_SUSPEND_TIMEOUT_USEC	(500 * 1000)
+static int msm_otg_suspend(struct msm_otg *motg)
+{
+	struct otg_transceiver *otg = &motg->otg;
+	struct usb_bus *bus = otg->host;
+	struct msm_otg_platform_data *pdata = motg->pdata;
+	int cnt = 0;
+
+	if (atomic_read(&motg->in_lpm))
+		return 0;
+
+	disable_irq(motg->irq);
+	/*
+	 * Interrupt Latch Register auto-clear feature is not present
+	 * in all PHY versions. Latch register is clear on read type.
+	 * Clear latch register to avoid spurious wakeup from
+	 * low power mode (LPM).
+	 */
+	ulpi_read(otg, 0x14);
+
+	/*
+	 * PHY comparators are disabled when PHY enters into low power
+	 * mode (LPM). Keep PHY comparators ON in LPM only when we expect
+	 * VBUS/Id notifications from USB PHY. Otherwise turn off USB
+	 * PHY comparators. This save significant amount of power.
+	 */
+	if (pdata->otg_control == OTG_PHY_CONTROL)
+		ulpi_write(otg, 0x01, 0x30);
+
+	/*
+	 * PLL is not turned off when PHY enters into low power mode (LPM).
+	 * Disable PLL for maximum power savings.
+	 */
+	ulpi_write(otg, 0x08, 0x09);
+
+	/*
+	 * PHY may take some time or even fail to enter into low power
+	 * mode (LPM). Hence poll for 500 msec and reset the PHY and link
+	 * in failure case.
+	 */
+	writel(readl(USB_PORTSC) | PORTSC_PHCD, USB_PORTSC);
+	while (cnt < PHY_SUSPEND_TIMEOUT_USEC) {
+		if (readl(USB_PORTSC) & PORTSC_PHCD)
+			break;
+		udelay(1);
+		cnt++;
+	}
+
+	if (cnt >= PHY_SUSPEND_TIMEOUT_USEC) {
+		dev_err(otg->dev, "Unable to suspend PHY\n");
+		msm_otg_reset(otg);
+		enable_irq(motg->irq);
+		return -ETIMEDOUT;
+	}
+
+	/*
+	 * PHY has capability to generate interrupt asynchronously in low
+	 * power mode (LPM). This interrupt is level triggered. So USB IRQ
+	 * line must be disabled till async interrupt enable bit is cleared
+	 * in USBCMD register. Assert STP (ULPI interface STOP signal) to
+	 * block data communication from PHY.
+	 */
+	writel(readl(USB_USBCMD) | ASYNC_INTR_CTRL | ULPI_STP_CTRL, USB_USBCMD);
+
+	clk_disable(motg->pclk);
+	clk_disable(motg->clk);
+	if (motg->core_clk)
+		clk_disable(motg->core_clk);
+
+	if (device_may_wakeup(otg->dev))
+		enable_irq_wake(motg->irq);
+	if (bus)
+		clear_bit(HCD_FLAG_HW_ACCESSIBLE, &(bus_to_hcd(bus))->flags);
+
+	atomic_set(&motg->in_lpm, 1);
+	enable_irq(motg->irq);
+
+	dev_info(otg->dev, "USB in low power mode\n");
+
+	return 0;
+}
+
+#define PHY_RESUME_TIMEOUT_USEC	(100 * 1000)
+static int msm_otg_resume(struct msm_otg *motg)
+{
+	struct otg_transceiver *otg = &motg->otg;
+	struct usb_bus *bus = otg->host;
+	int cnt = 0;
+	unsigned temp;
+
+	if (!atomic_read(&motg->in_lpm))
+		return 0;
+
+	clk_enable(motg->pclk);
+	clk_enable(motg->clk);
+	if (motg->core_clk)
+		clk_enable(motg->core_clk);
+
+	temp = readl(USB_USBCMD);
+	temp &= ~ASYNC_INTR_CTRL;
+	temp &= ~ULPI_STP_CTRL;
+	writel(temp, USB_USBCMD);
+
+	/*
+	 * PHY comes out of low power mode (LPM) in case of wakeup
+	 * from asynchronous interrupt.
+	 */
+	if (!(readl(USB_PORTSC) & PORTSC_PHCD))
+		goto skip_phy_resume;
+
+	writel(readl(USB_PORTSC) & ~PORTSC_PHCD, USB_PORTSC);
+	while (cnt < PHY_RESUME_TIMEOUT_USEC) {
+		if (!(readl(USB_PORTSC) & PORTSC_PHCD))
+			break;
+		udelay(1);
+		cnt++;
+	}
+
+	if (cnt >= PHY_RESUME_TIMEOUT_USEC) {
+		/*
+		 * This is a fatal error. Reset the link and
+		 * PHY. USB state can not be restored. Re-insertion
+		 * of USB cable is the only way to get USB working.
+		 */
+		dev_err(otg->dev, "Unable to resume USB."
+				"Re-plugin the cable\n");
+		msm_otg_reset(otg);
+	}
+
+skip_phy_resume:
+	if (device_may_wakeup(otg->dev))
+		disable_irq_wake(motg->irq);
+	if (bus)
+		set_bit(HCD_FLAG_HW_ACCESSIBLE, &(bus_to_hcd(bus))->flags);
+
+	if (motg->async_int) {
+		motg->async_int = 0;
+		pm_runtime_put(otg->dev);
+		enable_irq(motg->irq);
+	}
+
+	atomic_set(&motg->in_lpm, 0);
+
+	dev_info(otg->dev, "USB exited from low power mode\n");
+
+	return 0;
+}
+
 static void msm_otg_start_host(struct otg_transceiver *otg, int on)
 {
 	struct msm_otg *motg = container_of(otg, struct msm_otg, otg);
@@ -306,6 +455,7 @@ static int msm_otg_set_host(struct otg_transceiver *otg, struct usb_bus *host)
 
 	if (!host) {
 		if (otg->state == OTG_STATE_A_HOST) {
+			pm_runtime_get_sync(otg->dev);
 			msm_otg_start_host(otg, 0);
 			otg->host = NULL;
 			otg->state = OTG_STATE_UNDEFINED;
@@ -327,8 +477,10 @@ static int msm_otg_set_host(struct otg_transceiver *otg, struct usb_bus *host)
 	 * Kick the state machine work, if peripheral is not supported
 	 * or peripheral is already registered with us.
 	 */
-	if (motg->pdata->mode == USB_HOST || otg->gadget)
+	if (motg->pdata->mode == USB_HOST || otg->gadget) {
+		pm_runtime_get_sync(otg->dev);
 		schedule_work(&motg->sm_work);
+	}
 
 	return 0;
 }
@@ -376,6 +528,7 @@ static int msm_otg_set_peripheral(struct otg_transceiver *otg,
 
 	if (!gadget) {
 		if (otg->state == OTG_STATE_B_PERIPHERAL) {
+			pm_runtime_get_sync(otg->dev);
 			msm_otg_start_peripheral(otg, 0);
 			otg->gadget = NULL;
 			otg->state = OTG_STATE_UNDEFINED;
@@ -393,8 +546,10 @@ static int msm_otg_set_peripheral(struct otg_transceiver *otg,
 	 * Kick the state machine work, if host is not supported
 	 * or host is already registered with us.
 	 */
-	if (motg->pdata->mode == USB_PERIPHERAL || otg->host)
+	if (motg->pdata->mode == USB_PERIPHERAL || otg->host) {
+		pm_runtime_get_sync(otg->dev);
 		schedule_work(&motg->sm_work);
+	}
 
 	return 0;
 }
@@ -473,6 +628,7 @@ static void msm_otg_sm_work(struct work_struct *w)
 			msm_otg_start_peripheral(otg, 1);
 			otg->state = OTG_STATE_B_PERIPHERAL;
 		}
+		pm_runtime_put_sync(otg->dev);
 		break;
 	case OTG_STATE_B_PERIPHERAL:
 		dev_dbg(otg->dev, "OTG_STATE_B_PERIPHERAL state\n");
@@ -504,6 +660,13 @@ static irqreturn_t msm_otg_irq(int irq, void *data)
 	struct otg_transceiver *otg = &motg->otg;
 	u32 otgsc = 0;
 
+	if (atomic_read(&motg->in_lpm)) {
+		disable_irq_nosync(irq);
+		motg->async_int = 1;
+		pm_runtime_get(otg->dev);
+		return IRQ_HANDLED;
+	}
+
 	otgsc = readl(USB_OTGSC);
 	if (!(otgsc & (OTGSC_IDIS | OTGSC_BSVIS)))
 		return IRQ_NONE;
@@ -514,12 +677,14 @@ static irqreturn_t msm_otg_irq(int irq, void *data)
 		else
 			clear_bit(ID, &motg->inputs);
 		dev_dbg(otg->dev, "ID set/clear\n");
+		pm_runtime_get_noresume(otg->dev);
 	} else if ((otgsc & OTGSC_BSVIS) && (otgsc & OTGSC_BSVIE)) {
 		if (otgsc & OTGSC_BSV)
 			set_bit(B_SESS_VLD, &motg->inputs);
 		else
 			clear_bit(B_SESS_VLD, &motg->inputs);
 		dev_dbg(otg->dev, "BSV set/clear\n");
+		pm_runtime_get_noresume(otg->dev);
 	}
 
 	writel(otgsc, USB_OTGSC);
@@ -616,6 +781,7 @@ static ssize_t msm_otg_mode_write(struct file *file, const char __user *ubuf,
 		goto out;
 	}
 
+	pm_runtime_get_sync(otg->dev);
 	schedule_work(&motg->sm_work);
 out:
 	return status;
@@ -770,8 +936,10 @@ static int __init msm_otg_probe(struct platform_device *pdev)
 					"not available\n");
 	}
 
-	return 0;
+	pm_runtime_set_active(&pdev->dev);
+	pm_runtime_enable(&pdev->dev);
 
+	return 0;
 free_irq:
 	free_irq(motg->irq, motg);
 disable_clks:
@@ -796,23 +964,45 @@ static int __devexit msm_otg_remove(struct platform_device *pdev)
 {
 	struct msm_otg *motg = platform_get_drvdata(pdev);
 	struct otg_transceiver *otg = &motg->otg;
+	int cnt = 0;
 
 	if (otg->host || otg->gadget)
 		return -EBUSY;
 
 	msm_otg_debugfs_cleanup();
 	cancel_work_sync(&motg->sm_work);
+
+	msm_otg_resume(motg);
+
 	device_init_wakeup(&pdev->dev, 0);
-	otg_set_transceiver(NULL);
+	pm_runtime_disable(&pdev->dev);
 
+	otg_set_transceiver(NULL);
 	free_irq(motg->irq, motg);
 
+	/*
+	 * Put PHY in low power mode.
+	 */
+	ulpi_read(otg, 0x14);
+	ulpi_write(otg, 0x08, 0x09);
+
+	writel(readl(USB_PORTSC) | PORTSC_PHCD, USB_PORTSC);
+	while (cnt < PHY_SUSPEND_TIMEOUT_USEC) {
+		if (readl(USB_PORTSC) & PORTSC_PHCD)
+			break;
+		udelay(1);
+		cnt++;
+	}
+	if (cnt >= PHY_SUSPEND_TIMEOUT_USEC)
+		dev_err(otg->dev, "Unable to suspend PHY\n");
+
 	clk_disable(motg->pclk);
 	clk_disable(motg->clk);
 	if (motg->core_clk)
 		clk_disable(motg->core_clk);
 
 	iounmap(motg->regs);
+	pm_runtime_set_suspended(&pdev->dev);
 
 	clk_put(motg->phy_reset_clk);
 	clk_put(motg->pclk);
@@ -825,11 +1015,96 @@ static int __devexit msm_otg_remove(struct platform_device *pdev)
 	return 0;
 }
 
+#ifdef CONFIG_PM_RUNTIME
+static int msm_otg_runtime_idle(struct device *dev)
+{
+	struct msm_otg *motg = dev_get_drvdata(dev);
+	struct otg_transceiver *otg = &motg->otg;
+
+	dev_dbg(dev, "OTG runtime idle\n");
+
+	/*
+	 * It is observed some times that a spurious interrupt
+	 * comes when PHY is put into LPM immediately after PHY reset.
+	 * This 1 sec delay also prevents entering into LPM immediately
+	 * after asynchronous interrupt.
+	 */
+	if (otg->state != OTG_STATE_UNDEFINED)
+		pm_schedule_suspend(dev, 1000);
+
+	return -EAGAIN;
+}
+
+static int msm_otg_runtime_suspend(struct device *dev)
+{
+	struct msm_otg *motg = dev_get_drvdata(dev);
+
+	dev_dbg(dev, "OTG runtime suspend\n");
+	return msm_otg_suspend(motg);
+}
+
+static int msm_otg_runtime_resume(struct device *dev)
+{
+	struct msm_otg *motg = dev_get_drvdata(dev);
+
+	dev_dbg(dev, "OTG runtime resume\n");
+	return msm_otg_resume(motg);
+}
+#else
+#define msm_otg_runtime_idle	NULL
+#define msm_otg_runtime_suspend	NULL
+#define msm_otg_runtime_resume	NULL
+#endif
+
+#ifdef CONFIG_PM
+static int msm_otg_pm_suspend(struct device *dev)
+{
+	struct msm_otg *motg = dev_get_drvdata(dev);
+
+	dev_dbg(dev, "OTG PM suspend\n");
+	return msm_otg_suspend(motg);
+}
+
+static int msm_otg_pm_resume(struct device *dev)
+{
+	struct msm_otg *motg = dev_get_drvdata(dev);
+	int ret;
+
+	dev_dbg(dev, "OTG PM resume\n");
+
+	ret = msm_otg_resume(motg);
+	if (ret)
+		return ret;
+
+	/*
+	 * Runtime PM Documentation recommends bringing the
+	 * device to full powered state upon resume.
+	 */
+	pm_runtime_disable(dev);
+	pm_runtime_set_active(dev);
+	pm_runtime_enable(dev);
+
+	return 0;
+}
+#else
+#define msm_otg_pm_suspend	NULL
+#define msm_otg_pm_resume	NULL
+#endif
+
+static const struct dev_pm_ops msm_otg_dev_pm_ops = {
+	.runtime_suspend = msm_otg_runtime_suspend,
+	.runtime_resume  = msm_otg_runtime_resume,
+	.runtime_idle    = msm_otg_runtime_idle,
+	.suspend         = msm_otg_pm_suspend,
+	.resume          = msm_otg_pm_resume,
+};
+
 static struct platform_driver msm_otg_driver = {
 	.remove = __devexit_p(msm_otg_remove),
 	.driver = {
 		.name = DRIVER_NAME,
 		.owner = THIS_MODULE,
+		.pm = &msm_otg_dev_pm_ops,
 	},
 };
 
diff --git a/include/linux/usb/msm_hsusb.h b/include/linux/usb/msm_hsusb.h
index b796df94ec72..3675e03b1539 100644
--- a/include/linux/usb/msm_hsusb.h
+++ b/include/linux/usb/msm_hsusb.h
@@ -88,6 +88,8 @@ struct msm_otg_platform_data {
  * @regs: ioremapped register base address.
  * @inputs: OTG state machine inputs(Id, SessValid etc).
  * @sm_work: OTG state machine work.
+ * @in_lpm: indicates low power mode (LPM) state.
+ * @async_int: Async interrupt arrived.
  *
  */
 struct msm_otg {
@@ -103,6 +105,8 @@ struct msm_otg {
 #define B_SESS_VLD	1
 	unsigned long inputs;
 	struct work_struct sm_work;
+	atomic_t in_lpm;
+	int async_int;
 };
 
 #endif
diff --git a/include/linux/usb/msm_hsusb_hw.h b/include/linux/usb/msm_hsusb_hw.h
index b061cffcf048..b92e17349c7b 100644
--- a/include/linux/usb/msm_hsusb_hw.h
+++ b/include/linux/usb/msm_hsusb_hw.h
@@ -44,6 +44,9 @@
 #define ULPI_DATA(n)          ((n) & 255)
 #define ULPI_DATA_READ(n)     (((n) >> 8) & 255)
 
+#define ASYNC_INTR_CTRL         (1 << 29) /* Enable async interrupt */
+#define ULPI_STP_CTRL           (1 << 30) /* Block communication with PHY */
+
 /* OTG definitions */
 #define OTGSC_INTSTS_MASK	(0x7f << 16)
 #define OTGSC_ID		(1 << 8)
-- 
cgit v1.2.3-71-gd317


From 35d2856b4693e8de5d616307b56cef296b839157 Mon Sep 17 00:00:00 2001
From: Martin Willi <martin@strongswan.org>
Date: Wed, 8 Dec 2010 04:37:49 +0000
Subject: xfrm: Add Traffic Flow Confidentiality padding XFRM attribute

The XFRMA_TFCPAD attribute for XFRM state installation configures
Traffic Flow Confidentiality by padding ESP packets to a specified
length.

Signed-off-by: Martin Willi <martin@strongswan.org>
Acked-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/xfrm.h |  1 +
 include/net/xfrm.h   |  1 +
 net/xfrm/xfrm_user.c | 19 +++++++++++++++++--
 3 files changed, 19 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/xfrm.h b/include/linux/xfrm.h
index b971e3848493..930fdd2de79c 100644
--- a/include/linux/xfrm.h
+++ b/include/linux/xfrm.h
@@ -283,6 +283,7 @@ enum xfrm_attr_type_t {
 	XFRMA_KMADDRESS,        /* struct xfrm_user_kmaddress */
 	XFRMA_ALG_AUTH_TRUNC,	/* struct xfrm_algo_auth */
 	XFRMA_MARK,		/* struct xfrm_mark */
+	XFRMA_TFCPAD,		/* __u32 */
 	__XFRMA_MAX
 
 #define XFRMA_MAX (__XFRMA_MAX - 1)
diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index 7fa5b005893e..b9f385da758e 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -143,6 +143,7 @@ struct xfrm_state {
 	struct xfrm_id		id;
 	struct xfrm_selector	sel;
 	struct xfrm_mark	mark;
+	u32			tfcpad;
 
 	u32			genid;
 
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index 8bae6b22c846..8eb889510916 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -148,7 +148,8 @@ static int verify_newsa_info(struct xfrm_usersa_info *p,
 		     !attrs[XFRMA_ALG_AUTH_TRUNC]) ||
 		    attrs[XFRMA_ALG_AEAD]	||
 		    attrs[XFRMA_ALG_CRYPT]	||
-		    attrs[XFRMA_ALG_COMP])
+		    attrs[XFRMA_ALG_COMP]	||
+		    attrs[XFRMA_TFCPAD])
 			goto out;
 		break;
 
@@ -165,6 +166,9 @@ static int verify_newsa_info(struct xfrm_usersa_info *p,
 		     attrs[XFRMA_ALG_CRYPT]) &&
 		    attrs[XFRMA_ALG_AEAD])
 			goto out;
+		if (attrs[XFRMA_TFCPAD] &&
+		    p->mode != XFRM_MODE_TUNNEL)
+			goto out;
 		break;
 
 	case IPPROTO_COMP:
@@ -172,7 +176,8 @@ static int verify_newsa_info(struct xfrm_usersa_info *p,
 		    attrs[XFRMA_ALG_AEAD]	||
 		    attrs[XFRMA_ALG_AUTH]	||
 		    attrs[XFRMA_ALG_AUTH_TRUNC]	||
-		    attrs[XFRMA_ALG_CRYPT])
+		    attrs[XFRMA_ALG_CRYPT]	||
+		    attrs[XFRMA_TFCPAD])
 			goto out;
 		break;
 
@@ -186,6 +191,7 @@ static int verify_newsa_info(struct xfrm_usersa_info *p,
 		    attrs[XFRMA_ALG_CRYPT]	||
 		    attrs[XFRMA_ENCAP]		||
 		    attrs[XFRMA_SEC_CTX]	||
+		    attrs[XFRMA_TFCPAD]		||
 		    !attrs[XFRMA_COADDR])
 			goto out;
 		break;
@@ -439,6 +445,9 @@ static struct xfrm_state *xfrm_state_construct(struct net *net,
 			goto error;
 	}
 
+	if (attrs[XFRMA_TFCPAD])
+		x->tfcpad = nla_get_u32(attrs[XFRMA_TFCPAD]);
+
 	if (attrs[XFRMA_COADDR]) {
 		x->coaddr = kmemdup(nla_data(attrs[XFRMA_COADDR]),
 				    sizeof(*x->coaddr), GFP_KERNEL);
@@ -688,6 +697,9 @@ static int copy_to_user_state_extra(struct xfrm_state *x,
 	if (x->encap)
 		NLA_PUT(skb, XFRMA_ENCAP, sizeof(*x->encap), x->encap);
 
+	if (x->tfcpad)
+		NLA_PUT_U32(skb, XFRMA_TFCPAD, x->tfcpad);
+
 	if (xfrm_mark_put(skb, &x->mark))
 		goto nla_put_failure;
 
@@ -2122,6 +2134,7 @@ static const struct nla_policy xfrma_policy[XFRMA_MAX+1] = {
 	[XFRMA_MIGRATE]		= { .len = sizeof(struct xfrm_user_migrate) },
 	[XFRMA_KMADDRESS]	= { .len = sizeof(struct xfrm_user_kmaddress) },
 	[XFRMA_MARK]		= { .len = sizeof(struct xfrm_mark) },
+	[XFRMA_TFCPAD]		= { .type = NLA_U32 },
 };
 
 static struct xfrm_link {
@@ -2301,6 +2314,8 @@ static inline size_t xfrm_sa_len(struct xfrm_state *x)
 		l += nla_total_size(sizeof(*x->calg));
 	if (x->encap)
 		l += nla_total_size(sizeof(*x->encap));
+	if (x->tfcpad)
+		l += nla_total_size(sizeof(x->tfcpad));
 	if (x->security)
 		l += nla_total_size(sizeof(struct xfrm_user_sec_ctx) +
 				    x->security->ctx_len);
-- 
cgit v1.2.3-71-gd317


From a3ae0fc34f58e7163b7724feb3d77aa4603f0dc3 Mon Sep 17 00:00:00 2001
From: Jamie Iles <jamie@jamieiles.com>
Date: Wed, 1 Dec 2010 23:39:36 +0000
Subject: 8250: add a UPIO_DWAPB32 for 32 bit accesses

Some platforms contain a Synopsys DesignWare APB UART that is attached
to a 32-bit APB bus where sub-word accesses are not allowed. Add a new
IO type (UPIO_DWAPB32) that performs 32 bit acccesses to the UART.

v2:
	- don't test for 32 bit in the output fast path, provide a
	  separate dwabp32_serial_out() function. Refactor
	  dwabp_serial_out() so that we can reuse the LCR saving
	  code.
v3:
	- rebased on top of "8250: use container_of() instead of
	  casting"

Signed-off-by: Jamie Iles <jamie@jamieiles.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/serial/8250.c        | 51 +++++++++++++++++++++++++++++++++-----------
 drivers/serial/serial_core.c |  2 ++
 include/linux/serial_core.h  |  1 +
 3 files changed, 42 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/serial/8250.c b/drivers/serial/8250.c
index 0b4ce47d9871..9839199c1d1d 100644
--- a/drivers/serial/8250.c
+++ b/drivers/serial/8250.c
@@ -454,22 +454,40 @@ static void tsi_serial_out(struct uart_port *p, int offset, int value)
 		writeb(value, p->membase + offset);
 }
 
+/* Save the LCR value so it can be re-written when a Busy Detect IRQ occurs. */
+static inline void dwapb_save_out_value(struct uart_port *p, int offset,
+					int value)
+{
+	struct uart_8250_port *up =
+		container_of(p, struct uart_8250_port, port);
+
+	if (offset == UART_LCR)
+		up->lcr = value;
+}
+
+/* Read the IER to ensure any interrupt is cleared before returning from ISR. */
+static inline void dwapb_check_clear_ier(struct uart_port *p, int offset)
+{
+	if (offset == UART_TX || offset == UART_IER)
+		p->serial_in(p, UART_IER);
+}
+
 static void dwapb_serial_out(struct uart_port *p, int offset, int value)
 {
 	int save_offset = offset;
 	offset = map_8250_out_reg(p, offset) << p->regshift;
-	/* Save the LCR value so it can be re-written when a
-	 * Busy Detect interrupt occurs. */
-	if (save_offset == UART_LCR) {
-		struct uart_8250_port *up =
-			container_of(p, struct uart_8250_port, port);
-		up->lcr = value;
-	}
+	dwapb_save_out_value(p, save_offset, value);
 	writeb(value, p->membase + offset);
-	/* Read the IER to ensure any interrupt is cleared before
-	 * returning from ISR. */
-	if (save_offset == UART_TX || save_offset == UART_IER)
-		value = p->serial_in(p, UART_IER);
+	dwapb_check_clear_ier(p, save_offset);
+}
+
+static void dwapb32_serial_out(struct uart_port *p, int offset, int value)
+{
+	int save_offset = offset;
+	offset = map_8250_out_reg(p, offset) << p->regshift;
+	dwapb_save_out_value(p, save_offset, value);
+	writel(value, p->membase + offset);
+	dwapb_check_clear_ier(p, save_offset);
 }
 
 static unsigned int io_serial_in(struct uart_port *p, int offset)
@@ -520,6 +538,11 @@ static void set_io_from_upio(struct uart_port *p)
 		p->serial_out = dwapb_serial_out;
 		break;
 
+	case UPIO_DWAPB32:
+		p->serial_in = mem32_serial_in;
+		p->serial_out = dwapb32_serial_out;
+		break;
+
 	default:
 		p->serial_in = io_serial_in;
 		p->serial_out = io_serial_out;
@@ -538,6 +561,7 @@ serial_out_sync(struct uart_8250_port *up, int offset, int value)
 	case UPIO_MEM32:
 	case UPIO_AU:
 	case UPIO_DWAPB:
+	case UPIO_DWAPB32:
 		p->serial_out(p, offset, value);
 		p->serial_in(p, UART_LCR);	/* safe, no side-effects */
 		break;
@@ -1587,7 +1611,8 @@ static irqreturn_t serial8250_interrupt(int irq, void *dev_id)
 			handled = 1;
 
 			end = NULL;
-		} else if (up->port.iotype == UPIO_DWAPB &&
+		} else if ((up->port.iotype == UPIO_DWAPB ||
+			    up->port.iotype == UPIO_DWAPB32) &&
 			  (iir & UART_IIR_BUSY) == UART_IIR_BUSY) {
 			/* The DesignWare APB UART has an Busy Detect (0x07)
 			 * interrupt meaning an LCR write attempt occured while the
@@ -2492,6 +2517,7 @@ static int serial8250_request_std_resource(struct uart_8250_port *up)
 	case UPIO_MEM32:
 	case UPIO_MEM:
 	case UPIO_DWAPB:
+	case UPIO_DWAPB32:
 		if (!up->port.mapbase)
 			break;
 
@@ -2529,6 +2555,7 @@ static void serial8250_release_std_resource(struct uart_8250_port *up)
 	case UPIO_MEM32:
 	case UPIO_MEM:
 	case UPIO_DWAPB:
+	case UPIO_DWAPB32:
 		if (!up->port.mapbase)
 			break;
 
diff --git a/drivers/serial/serial_core.c b/drivers/serial/serial_core.c
index c4ea14670d44..2835e29298ed 100644
--- a/drivers/serial/serial_core.c
+++ b/drivers/serial/serial_core.c
@@ -2135,6 +2135,7 @@ uart_report_port(struct uart_driver *drv, struct uart_port *port)
 	case UPIO_AU:
 	case UPIO_TSI:
 	case UPIO_DWAPB:
+	case UPIO_DWAPB32:
 		snprintf(address, sizeof(address),
 			 "MMIO 0x%llx", (unsigned long long)port->mapbase);
 		break;
@@ -2555,6 +2556,7 @@ int uart_match_port(struct uart_port *port1, struct uart_port *port2)
 	case UPIO_AU:
 	case UPIO_TSI:
 	case UPIO_DWAPB:
+	case UPIO_DWAPB32:
 		return (port1->mapbase == port2->mapbase);
 	}
 	return 0;
diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h
index 9ff9b7db293b..7a6daf189995 100644
--- a/include/linux/serial_core.h
+++ b/include/linux/serial_core.h
@@ -314,6 +314,7 @@ struct uart_port {
 #define UPIO_TSI		(5)			/* Tsi108/109 type IO */
 #define UPIO_DWAPB		(6)			/* DesignWare APB UART */
 #define UPIO_RM9000		(7)			/* RM9000 type IO */
+#define UPIO_DWAPB32		(8)			/* DesignWare APB UART (32 bit accesses) */
 
 	unsigned int		read_status_mask;	/* driver specific */
 	unsigned int		ignore_status_mask;	/* driver specific */
-- 
cgit v1.2.3-71-gd317


From e596e6e4d578f2639416e620d367a3af34814a40 Mon Sep 17 00:00:00 2001
From: Ben Hutchings <bhutchings@solarflare.com>
Date: Thu, 9 Dec 2010 12:08:35 +0000
Subject: ethtool: Report link-down while interface is down

While an interface is down, many implementations of
ethtool_ops::get_link, including the default, ethtool_op_get_link(),
will report the last link state seen while the interface was up.  In
general the current physical link state is not available if the
interface is down.

Define ETHTOOL_GLINK to reflect whether the interface *and* any
physical port have a working link, and consistently return 0 when the
interface is down.

Signed-off-by: Ben Hutchings <bhutchings@solarflare.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/ethtool.h |  4 +++-
 net/core/ethtool.c      | 17 +++++++++++++++--
 2 files changed, 18 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h
index 6628a507fd3b..1908929204a9 100644
--- a/include/linux/ethtool.h
+++ b/include/linux/ethtool.h
@@ -691,7 +691,9 @@ struct ethtool_ops {
 #define ETHTOOL_GMSGLVL		0x00000007 /* Get driver message level */
 #define ETHTOOL_SMSGLVL		0x00000008 /* Set driver msg level. */
 #define ETHTOOL_NWAY_RST	0x00000009 /* Restart autonegotiation. */
-#define ETHTOOL_GLINK		0x0000000a /* Get link status (ethtool_value) */
+/* Get link status for host, i.e. whether the interface *and* the
+ * physical port (if there is one) are up (ethtool_value). */
+#define ETHTOOL_GLINK		0x0000000a
 #define ETHTOOL_GEEPROM		0x0000000b /* Get EEPROM data */
 #define ETHTOOL_SEEPROM		0x0000000c /* Set EEPROM data. */
 #define ETHTOOL_GCOALESCE	0x0000000e /* Get coalesce config */
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index d5bc28818883..17741782a345 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -891,6 +891,20 @@ static int ethtool_nway_reset(struct net_device *dev)
 	return dev->ethtool_ops->nway_reset(dev);
 }
 
+static int ethtool_get_link(struct net_device *dev, char __user *useraddr)
+{
+	struct ethtool_value edata = { .cmd = ETHTOOL_GLINK };
+
+	if (!dev->ethtool_ops->get_link)
+		return -EOPNOTSUPP;
+
+	edata.data = netif_running(dev) && dev->ethtool_ops->get_link(dev);
+
+	if (copy_to_user(useraddr, &edata, sizeof(edata)))
+		return -EFAULT;
+	return 0;
+}
+
 static int ethtool_get_eeprom(struct net_device *dev, void __user *useraddr)
 {
 	struct ethtool_eeprom eeprom;
@@ -1530,8 +1544,7 @@ int dev_ethtool(struct net *net, struct ifreq *ifr)
 		rc = ethtool_nway_reset(dev);
 		break;
 	case ETHTOOL_GLINK:
-		rc = ethtool_get_value(dev, useraddr, ethcmd,
-				       dev->ethtool_ops->get_link);
+		rc = ethtool_get_link(dev, useraddr);
 		break;
 	case ETHTOOL_GEEPROM:
 		rc = ethtool_get_eeprom(dev, useraddr);
-- 
cgit v1.2.3-71-gd317


From 6610e0893b8bc6f59b14fed7f089c5997f035f88 Mon Sep 17 00:00:00 2001
From: John Stultz <john.stultz@linaro.org>
Date: Thu, 23 Sep 2010 15:07:34 -0700
Subject: RTC: Rework RTC code to use timerqueue for events

This patch reworks a large portion of the generic RTC code
to in-effect virtualize the rtc interrupt code.

The current RTC interface is very much a raw hardware interface.
Via the proc, /dev/, or sysfs interfaces, applciations can set
the hardware to trigger interrupts in one of three modes:

AIE: Alarm interrupt
UIE: Update interrupt (ie: once per second)
PIE: Periodic interrupt (sub-second irqs)

The problem with this interface is that it limits the RTC hardware
so it can only be used by one application at a time.

The purpose of this patch is to extend the RTC code so that we can
multiplex multiple applications event needs onto a single RTC device.
This is done by utilizing the timerqueue infrastructure to manage
a list of events, which cause the RTC hardware to be programmed
to fire an interrupt for the next event in the list.

In order to preserve the functionality of the exsting proc,/dev/ and
sysfs interfaces, we emulate the different interrupt modes as follows:

AIE: We create a rtc_timer dedicated to AIE mode interrupts. There is
only one per device, so we don't change existing interface semantics.

UIE: Again, a dedicated rtc_timer, set for periodic mode, is used
to emulate UIE interrupts. Again, only one per device.

PIE: Since PIE mode interrupts fire faster then the RTC's clock read
granularity, we emulate PIE mode interrupts using a hrtimer. Again,
one per device.

With this patch, the rtctest.c application in Documentation/rtc.txt
passes fine on x86 hardware. However, there may very well still be
bugs, so greatly I'd appreciate any feedback or testing!

Signed-off-by: John Stultz <john.stultz@linaro.org>
LKML Reference: <1290136329-18291-4-git-send-email-john.stultz@linaro.org>
Acked-by: Alessandro Zummo <a.zummo@towertech.it>
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
CC: Alessandro Zummo <a.zummo@towertech.it>
CC: Thomas Gleixner <tglx@linutronix.de>
CC: Richard Cochran <richardcochran@gmail.com>
---
 drivers/rtc/class.c     |  13 ++
 drivers/rtc/interface.c | 574 +++++++++++++++++++++++++++++-------------------
 drivers/rtc/rtc-lib.c   |  28 +++
 include/linux/rtc.h     |  43 +++-
 4 files changed, 428 insertions(+), 230 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/rtc/class.c b/drivers/rtc/class.c
index 565562ba6ac9..95d2b82762d6 100644
--- a/drivers/rtc/class.c
+++ b/drivers/rtc/class.c
@@ -16,6 +16,7 @@
 #include <linux/kdev_t.h>
 #include <linux/idr.h>
 #include <linux/slab.h>
+#include <linux/workqueue.h>
 
 #include "rtc-core.h"
 
@@ -152,6 +153,18 @@ struct rtc_device *rtc_device_register(const char *name, struct device *dev,
 	spin_lock_init(&rtc->irq_task_lock);
 	init_waitqueue_head(&rtc->irq_queue);
 
+	/* Init timerqueue */
+	timerqueue_init_head(&rtc->timerqueue);
+	INIT_WORK(&rtc->irqwork, rtctimer_do_work);
+	/* Init aie timer */
+	rtctimer_init(&rtc->aie_timer, rtc_aie_update_irq, (void *)rtc);
+	/* Init uie timer */
+	rtctimer_init(&rtc->uie_rtctimer, rtc_uie_update_irq, (void *)rtc);
+	/* Init pie timer */
+	hrtimer_init(&rtc->pie_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+	rtc->pie_timer.function = rtc_pie_update_irq;
+	rtc->pie_enabled = 0;
+
 	strlcpy(rtc->name, name, RTC_DEVICE_NAME_SIZE);
 	dev_set_name(&rtc->dev, "rtc%d", id);
 
diff --git a/drivers/rtc/interface.c b/drivers/rtc/interface.c
index a0c816238aa9..c81c50b497b7 100644
--- a/drivers/rtc/interface.c
+++ b/drivers/rtc/interface.c
@@ -14,15 +14,11 @@
 #include <linux/rtc.h>
 #include <linux/sched.h>
 #include <linux/log2.h>
+#include <linux/workqueue.h>
 
-int rtc_read_time(struct rtc_device *rtc, struct rtc_time *tm)
+static int __rtc_read_time(struct rtc_device *rtc, struct rtc_time *tm)
 {
 	int err;
-
-	err = mutex_lock_interruptible(&rtc->ops_lock);
-	if (err)
-		return err;
-
 	if (!rtc->ops)
 		err = -ENODEV;
 	else if (!rtc->ops->read_time)
@@ -31,7 +27,18 @@ int rtc_read_time(struct rtc_device *rtc, struct rtc_time *tm)
 		memset(tm, 0, sizeof(struct rtc_time));
 		err = rtc->ops->read_time(rtc->dev.parent, tm);
 	}
+	return err;
+}
+
+int rtc_read_time(struct rtc_device *rtc, struct rtc_time *tm)
+{
+	int err;
 
+	err = mutex_lock_interruptible(&rtc->ops_lock);
+	if (err)
+		return err;
+
+	err = __rtc_read_time(rtc, tm);
 	mutex_unlock(&rtc->ops_lock);
 	return err;
 }
@@ -106,188 +113,54 @@ int rtc_set_mmss(struct rtc_device *rtc, unsigned long secs)
 }
 EXPORT_SYMBOL_GPL(rtc_set_mmss);
 
-static int rtc_read_alarm_internal(struct rtc_device *rtc, struct rtc_wkalrm *alarm)
+int rtc_read_alarm(struct rtc_device *rtc, struct rtc_wkalrm *alarm)
 {
 	int err;
 
 	err = mutex_lock_interruptible(&rtc->ops_lock);
 	if (err)
 		return err;
-
-	if (rtc->ops == NULL)
-		err = -ENODEV;
-	else if (!rtc->ops->read_alarm)
-		err = -EINVAL;
-	else {
-		memset(alarm, 0, sizeof(struct rtc_wkalrm));
-		err = rtc->ops->read_alarm(rtc->dev.parent, alarm);
-	}
-
+	alarm->enabled = rtc->aie_timer.enabled;
+	if (alarm->enabled)
+		alarm->time = rtc_ktime_to_tm(rtc->aie_timer.node.expires);
 	mutex_unlock(&rtc->ops_lock);
-	return err;
+
+	return 0;
 }
+EXPORT_SYMBOL_GPL(rtc_read_alarm);
 
-int rtc_read_alarm(struct rtc_device *rtc, struct rtc_wkalrm *alarm)
+int __rtc_set_alarm(struct rtc_device *rtc, struct rtc_wkalrm *alarm)
 {
+	struct rtc_time tm;
+	long now, scheduled;
 	int err;
-	struct rtc_time before, now;
-	int first_time = 1;
-	unsigned long t_now, t_alm;
-	enum { none, day, month, year } missing = none;
-	unsigned days;
-
-	/* The lower level RTC driver may return -1 in some fields,
-	 * creating invalid alarm->time values, for reasons like:
-	 *
-	 *   - The hardware may not be capable of filling them in;
-	 *     many alarms match only on time-of-day fields, not
-	 *     day/month/year calendar data.
-	 *
-	 *   - Some hardware uses illegal values as "wildcard" match
-	 *     values, which non-Linux firmware (like a BIOS) may try
-	 *     to set up as e.g. "alarm 15 minutes after each hour".
-	 *     Linux uses only oneshot alarms.
-	 *
-	 * When we see that here, we deal with it by using values from
-	 * a current RTC timestamp for any missing (-1) values.  The
-	 * RTC driver prevents "periodic alarm" modes.
-	 *
-	 * But this can be racey, because some fields of the RTC timestamp
-	 * may have wrapped in the interval since we read the RTC alarm,
-	 * which would lead to us inserting inconsistent values in place
-	 * of the -1 fields.
-	 *
-	 * Reading the alarm and timestamp in the reverse sequence
-	 * would have the same race condition, and not solve the issue.
-	 *
-	 * So, we must first read the RTC timestamp,
-	 * then read the RTC alarm value,
-	 * and then read a second RTC timestamp.
-	 *
-	 * If any fields of the second timestamp have changed
-	 * when compared with the first timestamp, then we know
-	 * our timestamp may be inconsistent with that used by
-	 * the low-level rtc_read_alarm_internal() function.
-	 *
-	 * So, when the two timestamps disagree, we just loop and do
-	 * the process again to get a fully consistent set of values.
-	 *
-	 * This could all instead be done in the lower level driver,
-	 * but since more than one lower level RTC implementation needs it,
-	 * then it's probably best best to do it here instead of there..
-	 */
 
-	/* Get the "before" timestamp */
-	err = rtc_read_time(rtc, &before);
-	if (err < 0)
+	err = rtc_valid_tm(&alarm->time);
+	if (err)
 		return err;
-	do {
-		if (!first_time)
-			memcpy(&before, &now, sizeof(struct rtc_time));
-		first_time = 0;
-
-		/* get the RTC alarm values, which may be incomplete */
-		err = rtc_read_alarm_internal(rtc, alarm);
-		if (err)
-			return err;
-		if (!alarm->enabled)
-			return 0;
-
-		/* full-function RTCs won't have such missing fields */
-		if (rtc_valid_tm(&alarm->time) == 0)
-			return 0;
-
-		/* get the "after" timestamp, to detect wrapped fields */
-		err = rtc_read_time(rtc, &now);
-		if (err < 0)
-			return err;
-
-		/* note that tm_sec is a "don't care" value here: */
-	} while (   before.tm_min   != now.tm_min
-		 || before.tm_hour  != now.tm_hour
-		 || before.tm_mon   != now.tm_mon
-		 || before.tm_year  != now.tm_year);
-
-	/* Fill in the missing alarm fields using the timestamp; we
-	 * know there's at least one since alarm->time is invalid.
-	 */
-	if (alarm->time.tm_sec == -1)
-		alarm->time.tm_sec = now.tm_sec;
-	if (alarm->time.tm_min == -1)
-		alarm->time.tm_min = now.tm_min;
-	if (alarm->time.tm_hour == -1)
-		alarm->time.tm_hour = now.tm_hour;
-
-	/* For simplicity, only support date rollover for now */
-	if (alarm->time.tm_mday == -1) {
-		alarm->time.tm_mday = now.tm_mday;
-		missing = day;
-	}
-	if (alarm->time.tm_mon == -1) {
-		alarm->time.tm_mon = now.tm_mon;
-		if (missing == none)
-			missing = month;
-	}
-	if (alarm->time.tm_year == -1) {
-		alarm->time.tm_year = now.tm_year;
-		if (missing == none)
-			missing = year;
-	}
-
-	/* with luck, no rollover is needed */
-	rtc_tm_to_time(&now, &t_now);
-	rtc_tm_to_time(&alarm->time, &t_alm);
-	if (t_now < t_alm)
-		goto done;
-
-	switch (missing) {
+	rtc_tm_to_time(&alarm->time, &scheduled);
 
-	/* 24 hour rollover ... if it's now 10am Monday, an alarm that
-	 * that will trigger at 5am will do so at 5am Tuesday, which
-	 * could also be in the next month or year.  This is a common
-	 * case, especially for PCs.
-	 */
-	case day:
-		dev_dbg(&rtc->dev, "alarm rollover: %s\n", "day");
-		t_alm += 24 * 60 * 60;
-		rtc_time_to_tm(t_alm, &alarm->time);
-		break;
-
-	/* Month rollover ... if it's the 31th, an alarm on the 3rd will
-	 * be next month.  An alarm matching on the 30th, 29th, or 28th
-	 * may end up in the month after that!  Many newer PCs support
-	 * this type of alarm.
+	/* Make sure we're not setting alarms in the past */
+	err = __rtc_read_time(rtc, &tm);
+	rtc_tm_to_time(&tm, &now);
+	if (scheduled <= now)
+		return -ETIME;
+	/*
+	 * XXX - We just checked to make sure the alarm time is not
+	 * in the past, but there is still a race window where if
+	 * the is alarm set for the next second and the second ticks
+	 * over right here, before we set the alarm.
 	 */
-	case month:
-		dev_dbg(&rtc->dev, "alarm rollover: %s\n", "month");
-		do {
-			if (alarm->time.tm_mon < 11)
-				alarm->time.tm_mon++;
-			else {
-				alarm->time.tm_mon = 0;
-				alarm->time.tm_year++;
-			}
-			days = rtc_month_days(alarm->time.tm_mon,
-					alarm->time.tm_year);
-		} while (days < alarm->time.tm_mday);
-		break;
-
-	/* Year rollover ... easy except for leap years! */
-	case year:
-		dev_dbg(&rtc->dev, "alarm rollover: %s\n", "year");
-		do {
-			alarm->time.tm_year++;
-		} while (rtc_valid_tm(&alarm->time) != 0);
-		break;
-
-	default:
-		dev_warn(&rtc->dev, "alarm rollover not handled\n");
-	}
 
-done:
-	return 0;
+	if (!rtc->ops)
+		err = -ENODEV;
+	else if (!rtc->ops->set_alarm)
+		err = -EINVAL;
+	else
+		err = rtc->ops->set_alarm(rtc->dev.parent, alarm);
+
+	return err;
 }
-EXPORT_SYMBOL_GPL(rtc_read_alarm);
 
 int rtc_set_alarm(struct rtc_device *rtc, struct rtc_wkalrm *alarm)
 {
@@ -300,16 +173,18 @@ int rtc_set_alarm(struct rtc_device *rtc, struct rtc_wkalrm *alarm)
 	err = mutex_lock_interruptible(&rtc->ops_lock);
 	if (err)
 		return err;
-
-	if (!rtc->ops)
-		err = -ENODEV;
-	else if (!rtc->ops->set_alarm)
-		err = -EINVAL;
-	else
-		err = rtc->ops->set_alarm(rtc->dev.parent, alarm);
-
+	if (rtc->aie_timer.enabled) {
+		rtctimer_remove(rtc, &rtc->aie_timer);
+		rtc->aie_timer.enabled = 0;
+	}
+	rtc->aie_timer.node.expires = rtc_tm_to_ktime(alarm->time);
+	rtc->aie_timer.period = ktime_set(0, 0);
+	if (alarm->enabled) {
+		rtc->aie_timer.enabled = 1;
+		rtctimer_enqueue(rtc, &rtc->aie_timer);
+	}
 	mutex_unlock(&rtc->ops_lock);
-	return err;
+	return 0;
 }
 EXPORT_SYMBOL_GPL(rtc_set_alarm);
 
@@ -319,6 +194,16 @@ int rtc_alarm_irq_enable(struct rtc_device *rtc, unsigned int enabled)
 	if (err)
 		return err;
 
+	if (rtc->aie_timer.enabled != enabled) {
+		if (enabled) {
+			rtc->aie_timer.enabled = 1;
+			rtctimer_enqueue(rtc, &rtc->aie_timer);
+		} else {
+			rtctimer_remove(rtc, &rtc->aie_timer);
+			rtc->aie_timer.enabled = 0;
+		}
+	}
+
 	if (!rtc->ops)
 		err = -ENODEV;
 	else if (!rtc->ops->alarm_irq_enable)
@@ -337,52 +222,53 @@ int rtc_update_irq_enable(struct rtc_device *rtc, unsigned int enabled)
 	if (err)
 		return err;
 
-#ifdef CONFIG_RTC_INTF_DEV_UIE_EMUL
-	if (enabled == 0 && rtc->uie_irq_active) {
-		mutex_unlock(&rtc->ops_lock);
-		return rtc_dev_update_irq_enable_emul(rtc, enabled);
+	/* make sure we're changing state */
+	if (rtc->uie_rtctimer.enabled == enabled)
+		goto out;
+
+	if (enabled) {
+		struct rtc_time tm;
+		ktime_t now, onesec;
+
+		__rtc_read_time(rtc, &tm);
+		onesec = ktime_set(1, 0);
+		now = rtc_tm_to_ktime(tm);
+		rtc->uie_rtctimer.node.expires = ktime_add(now, onesec);
+		rtc->uie_rtctimer.period = ktime_set(1, 0);
+		rtc->uie_rtctimer.enabled = 1;
+		rtctimer_enqueue(rtc, &rtc->uie_rtctimer);
+	} else {
+		rtctimer_remove(rtc, &rtc->uie_rtctimer);
+		rtc->uie_rtctimer.enabled = 0;
 	}
-#endif
-
-	if (!rtc->ops)
-		err = -ENODEV;
-	else if (!rtc->ops->update_irq_enable)
-		err = -EINVAL;
-	else
-		err = rtc->ops->update_irq_enable(rtc->dev.parent, enabled);
 
+out:
 	mutex_unlock(&rtc->ops_lock);
-
-#ifdef CONFIG_RTC_INTF_DEV_UIE_EMUL
-	/*
-	 * Enable emulation if the driver did not provide
-	 * the update_irq_enable function pointer or if returned
-	 * -EINVAL to signal that it has been configured without
-	 * interrupts or that are not available at the moment.
-	 */
-	if (err == -EINVAL)
-		err = rtc_dev_update_irq_enable_emul(rtc, enabled);
-#endif
 	return err;
+
 }
 EXPORT_SYMBOL_GPL(rtc_update_irq_enable);
 
+
 /**
- * rtc_update_irq - report RTC periodic, alarm, and/or update irqs
- * @rtc: the rtc device
- * @num: how many irqs are being reported (usually one)
- * @events: mask of RTC_IRQF with one or more of RTC_PF, RTC_AF, RTC_UF
- * Context: any
+ * rtc_handle_legacy_irq - AIE, UIE and PIE event hook
+ * @rtc: pointer to the rtc device
+ *
+ * This function is called when an AIE, UIE or PIE mode interrupt
+ * has occured (or been emulated).
+ *
+ * Triggers the registered irq_task function callback.
  */
-void rtc_update_irq(struct rtc_device *rtc,
-		unsigned long num, unsigned long events)
+static void rtc_handle_legacy_irq(struct rtc_device *rtc, int num, int mode)
 {
 	unsigned long flags;
 
+	/* mark one irq of the appropriate mode */
 	spin_lock_irqsave(&rtc->irq_lock, flags);
-	rtc->irq_data = (rtc->irq_data + (num << 8)) | events;
+	rtc->irq_data = (rtc->irq_data + (num << 8)) | (RTC_IRQF|mode);
 	spin_unlock_irqrestore(&rtc->irq_lock, flags);
 
+	/* call the task func */
 	spin_lock_irqsave(&rtc->irq_task_lock, flags);
 	if (rtc->irq_task)
 		rtc->irq_task->func(rtc->irq_task->private_data);
@@ -391,6 +277,69 @@ void rtc_update_irq(struct rtc_device *rtc,
 	wake_up_interruptible(&rtc->irq_queue);
 	kill_fasync(&rtc->async_queue, SIGIO, POLL_IN);
 }
+
+
+/**
+ * rtc_aie_update_irq - AIE mode rtctimer hook
+ * @private: pointer to the rtc_device
+ *
+ * This functions is called when the aie_timer expires.
+ */
+void rtc_aie_update_irq(void *private)
+{
+	struct rtc_device *rtc = (struct rtc_device *)private;
+	rtc_handle_legacy_irq(rtc, 1, RTC_AF);
+}
+
+
+/**
+ * rtc_uie_update_irq - UIE mode rtctimer hook
+ * @private: pointer to the rtc_device
+ *
+ * This functions is called when the uie_timer expires.
+ */
+void rtc_uie_update_irq(void *private)
+{
+	struct rtc_device *rtc = (struct rtc_device *)private;
+	rtc_handle_legacy_irq(rtc, 1,  RTC_UF);
+}
+
+
+/**
+ * rtc_pie_update_irq - PIE mode hrtimer hook
+ * @timer: pointer to the pie mode hrtimer
+ *
+ * This function is used to emulate PIE mode interrupts
+ * using an hrtimer. This function is called when the periodic
+ * hrtimer expires.
+ */
+enum hrtimer_restart rtc_pie_update_irq(struct hrtimer *timer)
+{
+	struct rtc_device *rtc;
+	ktime_t period;
+	int count;
+	rtc = container_of(timer, struct rtc_device, pie_timer);
+
+	period = ktime_set(0, NSEC_PER_SEC/rtc->irq_freq);
+	count = hrtimer_forward_now(timer, period);
+
+	rtc_handle_legacy_irq(rtc, count, RTC_PF);
+
+	return HRTIMER_RESTART;
+}
+
+/**
+ * rtc_update_irq - Triggered when a RTC interrupt occurs.
+ * @rtc: the rtc device
+ * @num: how many irqs are being reported (usually one)
+ * @events: mask of RTC_IRQF with one or more of RTC_PF, RTC_AF, RTC_UF
+ * Context: any
+ */
+void rtc_update_irq(struct rtc_device *rtc,
+		unsigned long num, unsigned long events)
+{
+	schedule_work(&rtc->irqwork);
+}
 EXPORT_SYMBOL_GPL(rtc_update_irq);
 
 static int __rtc_match(struct device *dev, void *data)
@@ -477,18 +426,20 @@ int rtc_irq_set_state(struct rtc_device *rtc, struct rtc_task *task, int enabled
 	int err = 0;
 	unsigned long flags;
 
-	if (rtc->ops->irq_set_state == NULL)
-		return -ENXIO;
-
 	spin_lock_irqsave(&rtc->irq_task_lock, flags);
 	if (rtc->irq_task != NULL && task == NULL)
 		err = -EBUSY;
 	if (rtc->irq_task != task)
 		err = -EACCES;
-	spin_unlock_irqrestore(&rtc->irq_task_lock, flags);
 
-	if (err == 0)
-		err = rtc->ops->irq_set_state(rtc->dev.parent, enabled);
+	if (enabled) {
+		ktime_t period = ktime_set(0, NSEC_PER_SEC/rtc->irq_freq);
+		hrtimer_start(&rtc->pie_timer, period, HRTIMER_MODE_REL);
+	} else {
+		hrtimer_cancel(&rtc->pie_timer);
+	}
+	rtc->pie_enabled = enabled;
+	spin_unlock_irqrestore(&rtc->irq_task_lock, flags);
 
 	return err;
 }
@@ -509,21 +460,194 @@ int rtc_irq_set_freq(struct rtc_device *rtc, struct rtc_task *task, int freq)
 	int err = 0;
 	unsigned long flags;
 
-	if (rtc->ops->irq_set_freq == NULL)
-		return -ENXIO;
-
 	spin_lock_irqsave(&rtc->irq_task_lock, flags);
 	if (rtc->irq_task != NULL && task == NULL)
 		err = -EBUSY;
 	if (rtc->irq_task != task)
 		err = -EACCES;
-	spin_unlock_irqrestore(&rtc->irq_task_lock, flags);
-
 	if (err == 0) {
-		err = rtc->ops->irq_set_freq(rtc->dev.parent, freq);
-		if (err == 0)
-			rtc->irq_freq = freq;
+		rtc->irq_freq = freq;
+		if (rtc->pie_enabled) {
+			ktime_t period;
+			hrtimer_cancel(&rtc->pie_timer);
+			period = ktime_set(0, NSEC_PER_SEC/rtc->irq_freq);
+			hrtimer_start(&rtc->pie_timer, period,
+					HRTIMER_MODE_REL);
+		}
 	}
+	spin_unlock_irqrestore(&rtc->irq_task_lock, flags);
 	return err;
 }
 EXPORT_SYMBOL_GPL(rtc_irq_set_freq);
+
+/**
+ * rtctimer_enqueue - Adds a rtc_timer to the rtc_device timerqueue
+ * @rtc rtc device
+ * @timer timer being added.
+ *
+ * Enqueues a timer onto the rtc devices timerqueue and sets
+ * the next alarm event appropriately.
+ *
+ * Must hold ops_lock for proper serialization of timerqueue
+ */
+void rtctimer_enqueue(struct rtc_device *rtc, struct rtc_timer *timer)
+{
+	timerqueue_add(&rtc->timerqueue, &timer->node);
+	if (&timer->node == timerqueue_getnext(&rtc->timerqueue)) {
+		struct rtc_wkalrm alarm;
+		int err;
+		alarm.time = rtc_ktime_to_tm(timer->node.expires);
+		alarm.enabled = 1;
+		err = __rtc_set_alarm(rtc, &alarm);
+		if (err == -ETIME)
+			schedule_work(&rtc->irqwork);
+	}
+}
+
+/**
+ * rtctimer_remove - Removes a rtc_timer from the rtc_device timerqueue
+ * @rtc rtc device
+ * @timer timer being removed.
+ *
+ * Removes a timer onto the rtc devices timerqueue and sets
+ * the next alarm event appropriately.
+ *
+ * Must hold ops_lock for proper serialization of timerqueue
+ */
+void rtctimer_remove(struct rtc_device *rtc, struct rtc_timer *timer)
+{
+	struct timerqueue_node *next = timerqueue_getnext(&rtc->timerqueue);
+	timerqueue_del(&rtc->timerqueue, &timer->node);
+
+	if (next == &timer->node) {
+		struct rtc_wkalrm alarm;
+		int err;
+		next = timerqueue_getnext(&rtc->timerqueue);
+		if (!next)
+			return;
+		alarm.time = rtc_ktime_to_tm(next->expires);
+		alarm.enabled = 1;
+		err = __rtc_set_alarm(rtc, &alarm);
+		if (err == -ETIME)
+			schedule_work(&rtc->irqwork);
+	}
+}
+
+/**
+ * rtctimer_do_work - Expires rtc timers
+ * @rtc rtc device
+ * @timer timer being removed.
+ *
+ * Expires rtc timers. Reprograms next alarm event if needed.
+ * Called via worktask.
+ *
+ * Serializes access to timerqueue via ops_lock mutex
+ */
+void rtctimer_do_work(struct work_struct *work)
+{
+	struct rtc_timer *timer;
+	struct timerqueue_node *next;
+	ktime_t now;
+	struct rtc_time tm;
+
+	struct rtc_device *rtc =
+		container_of(work, struct rtc_device, irqwork);
+
+	mutex_lock(&rtc->ops_lock);
+again:
+	__rtc_read_time(rtc, &tm);
+	now = rtc_tm_to_ktime(tm);
+	while ((next = timerqueue_getnext(&rtc->timerqueue))) {
+		if (next->expires.tv64 > now.tv64)
+			break;
+
+		/* expire timer */
+		timer = container_of(next, struct rtc_timer, node);
+		timerqueue_del(&rtc->timerqueue, &timer->node);
+		timer->enabled = 0;
+		if (timer->task.func)
+			timer->task.func(timer->task.private_data);
+
+		/* Re-add/fwd periodic timers */
+		if (ktime_to_ns(timer->period)) {
+			timer->node.expires = ktime_add(timer->node.expires,
+							timer->period);
+			timer->enabled = 1;
+			timerqueue_add(&rtc->timerqueue, &timer->node);
+		}
+	}
+
+	/* Set next alarm */
+	if (next) {
+		struct rtc_wkalrm alarm;
+		int err;
+		alarm.time = rtc_ktime_to_tm(next->expires);
+		alarm.enabled = 1;
+		err = __rtc_set_alarm(rtc, &alarm);
+		if (err == -ETIME)
+			goto again;
+	}
+
+	mutex_unlock(&rtc->ops_lock);
+}
+
+
+/* rtctimer_init - Initializes an rtc_timer
+ * @timer: timer to be intiialized
+ * @f: function pointer to be called when timer fires
+ * @data: private data passed to function pointer
+ *
+ * Kernel interface to initializing an rtc_timer.
+ */
+void rtctimer_init(struct rtc_timer *timer, void (*f)(void* p), void* data)
+{
+	timerqueue_init(&timer->node);
+	timer->enabled = 0;
+	timer->task.func = f;
+	timer->task.private_data = data;
+}
+
+/* rtctimer_start - Sets an rtc_timer to fire in the future
+ * @ rtc: rtc device to be used
+ * @ timer: timer being set
+ * @ expires: time at which to expire the timer
+ * @ period: period that the timer will recur
+ *
+ * Kernel interface to set an rtc_timer
+ */
+int rtctimer_start(struct rtc_device *rtc, struct rtc_timer* timer,
+			ktime_t expires, ktime_t period)
+{
+	int ret = 0;
+	mutex_lock(&rtc->ops_lock);
+	if (timer->enabled)
+		rtctimer_remove(rtc, timer);
+
+	timer->node.expires = expires;
+	timer->period = period;
+
+	timer->enabled = 1;
+	rtctimer_enqueue(rtc, timer);
+
+	mutex_unlock(&rtc->ops_lock);
+	return ret;
+}
+
+/* rtctimer_cancel - Stops an rtc_timer
+ * @ rtc: rtc device to be used
+ * @ timer: timer being set
+ *
+ * Kernel interface to cancel an rtc_timer
+ */
+int rtctimer_cancel(struct rtc_device *rtc, struct rtc_timer* timer)
+{
+	int ret = 0;
+	mutex_lock(&rtc->ops_lock);
+	if (timer->enabled)
+		rtctimer_remove(rtc, timer);
+	timer->enabled = 0;
+	mutex_unlock(&rtc->ops_lock);
+	return ret;
+}
+
+
diff --git a/drivers/rtc/rtc-lib.c b/drivers/rtc/rtc-lib.c
index 773851f338b8..075f1708deae 100644
--- a/drivers/rtc/rtc-lib.c
+++ b/drivers/rtc/rtc-lib.c
@@ -117,4 +117,32 @@ int rtc_tm_to_time(struct rtc_time *tm, unsigned long *time)
 }
 EXPORT_SYMBOL(rtc_tm_to_time);
 
+/*
+ * Convert rtc_time to ktime
+ */
+ktime_t rtc_tm_to_ktime(struct rtc_time tm)
+{
+	time_t time;
+	rtc_tm_to_time(&tm, &time);
+	return ktime_set(time, 0);
+}
+EXPORT_SYMBOL_GPL(rtc_tm_to_ktime);
+
+/*
+ * Convert ktime to rtc_time
+ */
+struct rtc_time rtc_ktime_to_tm(ktime_t kt)
+{
+	struct timespec ts;
+	struct rtc_time ret;
+
+	ts = ktime_to_timespec(kt);
+	/* Round up any ns */
+	if (ts.tv_nsec)
+		ts.tv_sec++;
+	rtc_time_to_tm(ts.tv_sec, &ret);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(rtc_ktime_to_tm);
+
 MODULE_LICENSE("GPL");
diff --git a/include/linux/rtc.h b/include/linux/rtc.h
index 14dbc83ded20..a3421abca703 100644
--- a/include/linux/rtc.h
+++ b/include/linux/rtc.h
@@ -107,12 +107,17 @@ extern int rtc_year_days(unsigned int day, unsigned int month, unsigned int year
 extern int rtc_valid_tm(struct rtc_time *tm);
 extern int rtc_tm_to_time(struct rtc_time *tm, unsigned long *time);
 extern void rtc_time_to_tm(unsigned long time, struct rtc_time *tm);
+ktime_t rtc_tm_to_ktime(struct rtc_time tm);
+struct rtc_time rtc_ktime_to_tm(ktime_t kt);
+
 
 #include <linux/device.h>
 #include <linux/seq_file.h>
 #include <linux/cdev.h>
 #include <linux/poll.h>
 #include <linux/mutex.h>
+#include <linux/timerqueue.h>
+#include <linux/workqueue.h>
 
 extern struct class *rtc_class;
 
@@ -151,7 +156,19 @@ struct rtc_class_ops {
 };
 
 #define RTC_DEVICE_NAME_SIZE 20
-struct rtc_task;
+typedef struct rtc_task {
+	void (*func)(void *private_data);
+	void *private_data;
+} rtc_task_t;
+
+
+struct rtc_timer {
+	struct rtc_task	task;
+	struct timerqueue_node node;
+	ktime_t period;
+	int enabled;
+};
+
 
 /* flags */
 #define RTC_DEV_BUSY 0
@@ -179,6 +196,15 @@ struct rtc_device
 	spinlock_t irq_task_lock;
 	int irq_freq;
 	int max_user_freq;
+
+	struct timerqueue_head timerqueue;
+	struct rtc_timer aie_timer;
+	struct rtc_timer uie_rtctimer;
+	struct hrtimer pie_timer; /* sub second exp, so needs hrtimer */
+	int pie_enabled;
+	struct work_struct irqwork;
+
+
 #ifdef CONFIG_RTC_INTF_DEV_UIE_EMUL
 	struct work_struct uie_task;
 	struct timer_list uie_timer;
@@ -224,15 +250,22 @@ extern int rtc_alarm_irq_enable(struct rtc_device *rtc, unsigned int enabled);
 extern int rtc_dev_update_irq_enable_emul(struct rtc_device *rtc,
 						unsigned int enabled);
 
-typedef struct rtc_task {
-	void (*func)(void *private_data);
-	void *private_data;
-} rtc_task_t;
+void rtc_aie_update_irq(void *private);
+void rtc_uie_update_irq(void *private);
+enum hrtimer_restart rtc_pie_update_irq(struct hrtimer *timer);
 
 int rtc_register(rtc_task_t *task);
 int rtc_unregister(rtc_task_t *task);
 int rtc_control(rtc_task_t *t, unsigned int cmd, unsigned long arg);
 
+void rtctimer_enqueue(struct rtc_device *rtc, struct rtc_timer *timer);
+void rtctimer_remove(struct rtc_device *rtc, struct rtc_timer *timer);
+void rtctimer_init(struct rtc_timer *timer, void (*f)(void* p), void* data);
+int rtctimer_start(struct rtc_device *rtc, struct rtc_timer* timer,
+			ktime_t expires, ktime_t period);
+int rtctimer_cancel(struct rtc_device *rtc, struct rtc_timer* timer);
+void rtctimer_do_work(struct work_struct *work);
+
 static inline bool is_leap_year(unsigned int year)
 {
 	return (!(year % 4) && (year % 100)) || !(year % 400);
-- 
cgit v1.2.3-71-gd317


From 042620a018afcfba1d678062b62e463b9e43a68d Mon Sep 17 00:00:00 2001
From: John Stultz <john.stultz@linaro.org>
Date: Thu, 14 Oct 2010 16:22:33 -0700
Subject: RTC: Remove UIE emulation

Since we provide UIE interrupts via a rtc_timer, the old
emulation code can be removed.

Signed-off-by: John Stultz <john.stultz@linaro.org>
LKML Reference: <1290136329-18291-5-git-send-email-john.stultz@linaro.org>
Acked-by: Alessandro Zummo <a.zummo@towertech.it>
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
CC: Alessandro Zummo <a.zummo@towertech.it>
CC: Thomas Gleixner <tglx@linutronix.de>
CC: Richard Cochran <richardcochran@gmail.com>
---
 drivers/rtc/rtc-dev.c | 104 --------------------------------------------------
 include/linux/rtc.h   |  12 ------
 2 files changed, 116 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/rtc/rtc-dev.c b/drivers/rtc/rtc-dev.c
index 62227cd52410..212b16edafc0 100644
--- a/drivers/rtc/rtc-dev.c
+++ b/drivers/rtc/rtc-dev.c
@@ -46,105 +46,6 @@ static int rtc_dev_open(struct inode *inode, struct file *file)
 	return err;
 }
 
-#ifdef CONFIG_RTC_INTF_DEV_UIE_EMUL
-/*
- * Routine to poll RTC seconds field for change as often as possible,
- * after first RTC_UIE use timer to reduce polling
- */
-static void rtc_uie_task(struct work_struct *work)
-{
-	struct rtc_device *rtc =
-		container_of(work, struct rtc_device, uie_task);
-	struct rtc_time tm;
-	int num = 0;
-	int err;
-
-	err = rtc_read_time(rtc, &tm);
-
-	spin_lock_irq(&rtc->irq_lock);
-	if (rtc->stop_uie_polling || err) {
-		rtc->uie_task_active = 0;
-	} else if (rtc->oldsecs != tm.tm_sec) {
-		num = (tm.tm_sec + 60 - rtc->oldsecs) % 60;
-		rtc->oldsecs = tm.tm_sec;
-		rtc->uie_timer.expires = jiffies + HZ - (HZ/10);
-		rtc->uie_timer_active = 1;
-		rtc->uie_task_active = 0;
-		add_timer(&rtc->uie_timer);
-	} else if (schedule_work(&rtc->uie_task) == 0) {
-		rtc->uie_task_active = 0;
-	}
-	spin_unlock_irq(&rtc->irq_lock);
-	if (num)
-		rtc_update_irq(rtc, num, RTC_UF | RTC_IRQF);
-}
-static void rtc_uie_timer(unsigned long data)
-{
-	struct rtc_device *rtc = (struct rtc_device *)data;
-	unsigned long flags;
-
-	spin_lock_irqsave(&rtc->irq_lock, flags);
-	rtc->uie_timer_active = 0;
-	rtc->uie_task_active = 1;
-	if ((schedule_work(&rtc->uie_task) == 0))
-		rtc->uie_task_active = 0;
-	spin_unlock_irqrestore(&rtc->irq_lock, flags);
-}
-
-static int clear_uie(struct rtc_device *rtc)
-{
-	spin_lock_irq(&rtc->irq_lock);
-	if (rtc->uie_irq_active) {
-		rtc->stop_uie_polling = 1;
-		if (rtc->uie_timer_active) {
-			spin_unlock_irq(&rtc->irq_lock);
-			del_timer_sync(&rtc->uie_timer);
-			spin_lock_irq(&rtc->irq_lock);
-			rtc->uie_timer_active = 0;
-		}
-		if (rtc->uie_task_active) {
-			spin_unlock_irq(&rtc->irq_lock);
-			flush_scheduled_work();
-			spin_lock_irq(&rtc->irq_lock);
-		}
-		rtc->uie_irq_active = 0;
-	}
-	spin_unlock_irq(&rtc->irq_lock);
-	return 0;
-}
-
-static int set_uie(struct rtc_device *rtc)
-{
-	struct rtc_time tm;
-	int err;
-
-	err = rtc_read_time(rtc, &tm);
-	if (err)
-		return err;
-	spin_lock_irq(&rtc->irq_lock);
-	if (!rtc->uie_irq_active) {
-		rtc->uie_irq_active = 1;
-		rtc->stop_uie_polling = 0;
-		rtc->oldsecs = tm.tm_sec;
-		rtc->uie_task_active = 1;
-		if (schedule_work(&rtc->uie_task) == 0)
-			rtc->uie_task_active = 0;
-	}
-	rtc->irq_data = 0;
-	spin_unlock_irq(&rtc->irq_lock);
-	return 0;
-}
-
-int rtc_dev_update_irq_enable_emul(struct rtc_device *rtc, unsigned int enabled)
-{
-	if (enabled)
-		return set_uie(rtc);
-	else
-		return clear_uie(rtc);
-}
-EXPORT_SYMBOL(rtc_dev_update_irq_enable_emul);
-
-#endif /* CONFIG_RTC_INTF_DEV_UIE_EMUL */
 
 static ssize_t
 rtc_dev_read(struct file *file, char __user *buf, size_t count, loff_t *ppos)
@@ -493,11 +394,6 @@ void rtc_dev_prepare(struct rtc_device *rtc)
 
 	rtc->dev.devt = MKDEV(MAJOR(rtc_devt), rtc->id);
 
-#ifdef CONFIG_RTC_INTF_DEV_UIE_EMUL
-	INIT_WORK(&rtc->uie_task, rtc_uie_task);
-	setup_timer(&rtc->uie_timer, rtc_uie_timer, (unsigned long)rtc);
-#endif
-
 	cdev_init(&rtc->char_dev, &rtc_dev_fops);
 	rtc->char_dev.owner = rtc->owner;
 }
diff --git a/include/linux/rtc.h b/include/linux/rtc.h
index a3421abca703..44e18a2523a3 100644
--- a/include/linux/rtc.h
+++ b/include/linux/rtc.h
@@ -203,18 +203,6 @@ struct rtc_device
 	struct hrtimer pie_timer; /* sub second exp, so needs hrtimer */
 	int pie_enabled;
 	struct work_struct irqwork;
-
-
-#ifdef CONFIG_RTC_INTF_DEV_UIE_EMUL
-	struct work_struct uie_task;
-	struct timer_list uie_timer;
-	/* Those fields are protected by rtc->irq_lock */
-	unsigned int oldsecs;
-	unsigned int uie_irq_active:1;
-	unsigned int stop_uie_polling:1;
-	unsigned int uie_task_active:1;
-	unsigned int uie_timer_active:1;
-#endif
 };
 #define to_rtc_device(d) container_of(d, struct rtc_device, dev)
 
-- 
cgit v1.2.3-71-gd317


From 45f74264e18449cf3c93cccaf098ee6e9524ab78 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Sat, 11 Dec 2010 12:34:34 +0100
Subject: timerqueue: Make timerqueue_getnext() static inline

No point in calling a function just to dereference a pointer.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: John Stultz <john.stultz@linaro.org>
---
 include/linux/timerqueue.h | 15 ++++++++++++++-
 lib/timerqueue.c           | 14 --------------
 2 files changed, 14 insertions(+), 15 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/timerqueue.h b/include/linux/timerqueue.h
index 406b103894bd..d24aabaca474 100644
--- a/include/linux/timerqueue.h
+++ b/include/linux/timerqueue.h
@@ -20,10 +20,23 @@ extern void timerqueue_add(struct timerqueue_head *head,
 				struct timerqueue_node *node);
 extern void timerqueue_del(struct timerqueue_head *head,
 				struct timerqueue_node *node);
-extern struct timerqueue_node *timerqueue_getnext(struct timerqueue_head *head);
 extern struct timerqueue_node *timerqueue_iterate_next(
 						struct timerqueue_node *node);
 
+/**
+ * timerqueue_getnext - Returns the timer with the earlies expiration time
+ *
+ * @head: head of timerqueue
+ *
+ * Returns a pointer to the timer node that has the
+ * earliest expiration time.
+ */
+static inline
+struct timerqueue_node *timerqueue_getnext(struct timerqueue_head *head)
+{
+	return head->next;
+}
+
 static inline void timerqueue_init(struct timerqueue_node *node)
 {
 	RB_CLEAR_NODE(&node->node);
diff --git a/lib/timerqueue.c b/lib/timerqueue.c
index 444b0934af92..e3a1050e6820 100644
--- a/lib/timerqueue.c
+++ b/lib/timerqueue.c
@@ -84,20 +84,6 @@ void timerqueue_del(struct timerqueue_head *head, struct timerqueue_node *node)
 }
 EXPORT_SYMBOL_GPL(timerqueue_del);
 
-/**
- * timerqueue_getnext - Returns the timer with the earlies expiration time
- *
- * @head: head of timerqueue
- *
- * Returns a pointer to the timer node that has the
- * earliest expiration time.
- */
-struct timerqueue_node *timerqueue_getnext(struct timerqueue_head *head)
-{
-	return head->next;
-}
-EXPORT_SYMBOL_GPL(timerqueue_getnext);
-
 /**
  * timerqueue_iterate_next - Returns the timer after the provided timer
  *
-- 
cgit v1.2.3-71-gd317


From dbd2fd656f2060abfd3a16257f8b51ec60f6d2ed Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Thu, 9 Dec 2010 19:58:59 +0100
Subject: cfg80211/nl80211: separate unicast/multicast default TX keys

Allow userspace to specify that a given key
is default only for unicast and/or multicast
transmissions. Only WEP keys are for both,
WPA/RSN keys set here are GTKs for multicast
only. For more future flexibility, allow to
specify all combiations.

Wireless extensions can only set both so use
nl80211; WEP keys (connect keys) must be set
as default for both (but 802.1X WEP is still
possible).

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 drivers/net/wireless/iwmc3200wifi/cfg80211.c |   3 +-
 drivers/net/wireless/libertas/cfg.c          |   3 +-
 drivers/net/wireless/rndis_wlan.c            |   4 +-
 include/linux/nl80211.h                      |  27 ++++++
 include/net/cfg80211.h                       |   5 +-
 net/mac80211/cfg.c                           |   3 +-
 net/wireless/nl80211.c                       | 125 +++++++++++++++++++++++----
 net/wireless/util.c                          |   3 +-
 net/wireless/wext-compat.c                   |   8 +-
 9 files changed, 152 insertions(+), 29 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/wireless/iwmc3200wifi/cfg80211.c b/drivers/net/wireless/iwmc3200wifi/cfg80211.c
index c6c0eff9b5ed..5a4982271e96 100644
--- a/drivers/net/wireless/iwmc3200wifi/cfg80211.c
+++ b/drivers/net/wireless/iwmc3200wifi/cfg80211.c
@@ -225,7 +225,8 @@ static int iwm_cfg80211_del_key(struct wiphy *wiphy, struct net_device *ndev,
 
 static int iwm_cfg80211_set_default_key(struct wiphy *wiphy,
 					struct net_device *ndev,
-					u8 key_index)
+					u8 key_index, bool unicast,
+					bool multicast)
 {
 	struct iwm_priv *iwm = ndev_to_iwm(ndev);
 
diff --git a/drivers/net/wireless/libertas/cfg.c b/drivers/net/wireless/libertas/cfg.c
index dee32d3681a5..300be1931826 100644
--- a/drivers/net/wireless/libertas/cfg.c
+++ b/drivers/net/wireless/libertas/cfg.c
@@ -1422,7 +1422,8 @@ static int lbs_cfg_disconnect(struct wiphy *wiphy, struct net_device *dev,
 
 static int lbs_cfg_set_default_key(struct wiphy *wiphy,
 				   struct net_device *netdev,
-				   u8 key_index)
+				   u8 key_index, bool unicast,
+				   bool multicast)
 {
 	struct lbs_private *priv = wiphy_priv(wiphy);
 
diff --git a/drivers/net/wireless/rndis_wlan.c b/drivers/net/wireless/rndis_wlan.c
index 19f3d568f700..4a4f00591447 100644
--- a/drivers/net/wireless/rndis_wlan.c
+++ b/drivers/net/wireless/rndis_wlan.c
@@ -554,7 +554,7 @@ static int rndis_del_key(struct wiphy *wiphy, struct net_device *netdev,
 			 u8 key_index, bool pairwise, const u8 *mac_addr);
 
 static int rndis_set_default_key(struct wiphy *wiphy, struct net_device *netdev,
-								u8 key_index);
+				 u8 key_index, bool unicast, bool multicast);
 
 static int rndis_get_station(struct wiphy *wiphy, struct net_device *dev,
 					u8 *mac, struct station_info *sinfo);
@@ -2381,7 +2381,7 @@ static int rndis_del_key(struct wiphy *wiphy, struct net_device *netdev,
 }
 
 static int rndis_set_default_key(struct wiphy *wiphy, struct net_device *netdev,
-								u8 key_index)
+				 u8 key_index, bool unicast, bool multicast)
 {
 	struct rndis_wlan_private *priv = wiphy_priv(wiphy);
 	struct usbnet *usbdev = priv->usbdev;
diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h
index 380421253d16..b8fa25d741ba 100644
--- a/include/linux/nl80211.h
+++ b/include/linux/nl80211.h
@@ -851,6 +851,10 @@ enum nl80211_commands {
  *
  * @NL80211_ATTR_BSS_HTOPMODE: HT operation mode (u16)
  *
+ * @NL80211_ATTR_KEY_DEFAULT_TYPES: A nested attribute containing flags
+ *	attributes, specifying what a key should be set as default as.
+ *	See &enum nl80211_key_default_types.
+ *
  * @NL80211_ATTR_MAX: highest attribute number currently defined
  * @__NL80211_ATTR_AFTER_LAST: internal use
  */
@@ -1029,6 +1033,8 @@ enum nl80211_attrs {
 
 	NL80211_ATTR_BSS_HT_OPMODE,
 
+	NL80211_ATTR_KEY_DEFAULT_TYPES,
+
 	/* add attributes here, update the policy in nl80211.c */
 
 	__NL80211_ATTR_AFTER_LAST,
@@ -1774,6 +1780,23 @@ enum nl80211_wpa_versions {
 	NL80211_WPA_VERSION_2 = 1 << 1,
 };
 
+/**
+ * enum nl80211_key_default_types - key default types
+ * @__NL80211_KEY_DEFAULT_TYPE_INVALID: invalid
+ * @NL80211_KEY_DEFAULT_TYPE_UNICAST: key should be used as default
+ *	unicast key
+ * @NL80211_KEY_DEFAULT_TYPE_MULTICAST: key should be used as default
+ *	multicast key
+ * @NUM_NL80211_KEY_DEFAULT_TYPES: number of default types
+ */
+enum nl80211_key_default_types {
+	__NL80211_KEY_DEFAULT_TYPE_INVALID,
+	NL80211_KEY_DEFAULT_TYPE_UNICAST,
+	NL80211_KEY_DEFAULT_TYPE_MULTICAST,
+
+	NUM_NL80211_KEY_DEFAULT_TYPES
+};
+
 /**
  * enum nl80211_key_attributes - key attributes
  * @__NL80211_KEY_INVALID: invalid
@@ -1790,6 +1813,9 @@ enum nl80211_wpa_versions {
  * @NL80211_KEY_TYPE: the key type from enum nl80211_key_type, if not
  *	specified the default depends on whether a MAC address was
  *	given with the command using the key or not (u32)
+ * @NL80211_KEY_DEFAULT_TYPES: A nested attribute containing flags
+ *	attributes, specifying what a key should be set as default as.
+ *	See &enum nl80211_key_default_types.
  * @__NL80211_KEY_AFTER_LAST: internal
  * @NL80211_KEY_MAX: highest key attribute
  */
@@ -1802,6 +1828,7 @@ enum nl80211_key_attributes {
 	NL80211_KEY_DEFAULT,
 	NL80211_KEY_DEFAULT_MGMT,
 	NL80211_KEY_TYPE,
+	NL80211_KEY_DEFAULT_TYPES,
 
 	/* keep last */
 	__NL80211_KEY_AFTER_LAST,
diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 4d5acb013636..22be7c625b70 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -1211,7 +1211,7 @@ struct cfg80211_ops {
 			   u8 key_index, bool pairwise, const u8 *mac_addr);
 	int	(*set_default_key)(struct wiphy *wiphy,
 				   struct net_device *netdev,
-				   u8 key_index);
+				   u8 key_index, bool unicast, bool multicast);
 	int	(*set_default_mgmt_key)(struct wiphy *wiphy,
 					struct net_device *netdev,
 					u8 key_index);
@@ -1393,6 +1393,8 @@ struct cfg80211_ops {
  *	control port protocol ethertype. The device also honours the
  *	control_port_no_encrypt flag.
  * @WIPHY_FLAG_IBSS_RSN: The device supports IBSS RSN.
+ * @WIPHY_FLAG_SUPPORTS_SEPARATE_DEFAULT_KEYS: The device supports separate
+ *	unicast and multicast TX keys.
  */
 enum wiphy_flags {
 	WIPHY_FLAG_CUSTOM_REGULATORY		= BIT(0),
@@ -1404,6 +1406,7 @@ enum wiphy_flags {
 	WIPHY_FLAG_4ADDR_STATION		= BIT(6),
 	WIPHY_FLAG_CONTROL_PORT_PROTOCOL	= BIT(7),
 	WIPHY_FLAG_IBSS_RSN			= BIT(8),
+	WIPHY_FLAG_SUPPORTS_SEPARATE_DEFAULT_KEYS= BIT(9),
 };
 
 struct mac_address {
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index c30b8b72eedb..12f7dc048d34 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -295,7 +295,8 @@ static int ieee80211_get_key(struct wiphy *wiphy, struct net_device *dev,
 
 static int ieee80211_config_default_key(struct wiphy *wiphy,
 					struct net_device *dev,
-					u8 key_idx)
+					u8 key_idx, bool uni,
+					bool multi)
 {
 	struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
 
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 73a7f6d354c9..53f044370cde 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -171,6 +171,7 @@ static const struct nla_policy nl80211_policy[NL80211_ATTR_MAX+1] = {
 	[NL80211_ATTR_WIPHY_ANTENNA_RX] = { .type = NLA_U32 },
 	[NL80211_ATTR_MCAST_RATE] = { .type = NLA_U32 },
 	[NL80211_ATTR_OFFCHANNEL_TX_OK] = { .type = NLA_FLAG },
+	[NL80211_ATTR_KEY_DEFAULT_TYPES] = { .type = NLA_NESTED },
 };
 
 /* policy for the key attributes */
@@ -182,6 +183,14 @@ static const struct nla_policy nl80211_key_policy[NL80211_KEY_MAX + 1] = {
 	[NL80211_KEY_DEFAULT] = { .type = NLA_FLAG },
 	[NL80211_KEY_DEFAULT_MGMT] = { .type = NLA_FLAG },
 	[NL80211_KEY_TYPE] = { .type = NLA_U32 },
+	[NL80211_KEY_DEFAULT_TYPES] = { .type = NLA_NESTED },
+};
+
+/* policy for the key default flags */
+static const struct nla_policy
+nl80211_key_default_policy[NUM_NL80211_KEY_DEFAULT_TYPES] = {
+	[NL80211_KEY_DEFAULT_TYPE_UNICAST] = { .type = NLA_FLAG },
+	[NL80211_KEY_DEFAULT_TYPE_MULTICAST] = { .type = NLA_FLAG },
 };
 
 /* ifidx get helper */
@@ -314,6 +323,7 @@ struct key_parse {
 	int idx;
 	int type;
 	bool def, defmgmt;
+	bool def_uni, def_multi;
 };
 
 static int nl80211_parse_key_new(struct nlattr *key, struct key_parse *k)
@@ -327,6 +337,13 @@ static int nl80211_parse_key_new(struct nlattr *key, struct key_parse *k)
 	k->def = !!tb[NL80211_KEY_DEFAULT];
 	k->defmgmt = !!tb[NL80211_KEY_DEFAULT_MGMT];
 
+	if (k->def) {
+		k->def_uni = true;
+		k->def_multi = true;
+	}
+	if (k->defmgmt)
+		k->def_multi = true;
+
 	if (tb[NL80211_KEY_IDX])
 		k->idx = nla_get_u8(tb[NL80211_KEY_IDX]);
 
@@ -349,6 +366,19 @@ static int nl80211_parse_key_new(struct nlattr *key, struct key_parse *k)
 			return -EINVAL;
 	}
 
+	if (tb[NL80211_KEY_DEFAULT_TYPES]) {
+		struct nlattr *kdt[NUM_NL80211_KEY_DEFAULT_TYPES];
+		int err = nla_parse_nested(kdt,
+					   NUM_NL80211_KEY_DEFAULT_TYPES - 1,
+					   tb[NL80211_KEY_DEFAULT_TYPES],
+					   nl80211_key_default_policy);
+		if (err)
+			return err;
+
+		k->def_uni = kdt[NL80211_KEY_DEFAULT_TYPE_UNICAST];
+		k->def_multi = kdt[NL80211_KEY_DEFAULT_TYPE_MULTICAST];
+	}
+
 	return 0;
 }
 
@@ -373,12 +403,32 @@ static int nl80211_parse_key_old(struct genl_info *info, struct key_parse *k)
 	k->def = !!info->attrs[NL80211_ATTR_KEY_DEFAULT];
 	k->defmgmt = !!info->attrs[NL80211_ATTR_KEY_DEFAULT_MGMT];
 
+	if (k->def) {
+		k->def_uni = true;
+		k->def_multi = true;
+	}
+	if (k->defmgmt)
+		k->def_multi = true;
+
 	if (info->attrs[NL80211_ATTR_KEY_TYPE]) {
 		k->type = nla_get_u32(info->attrs[NL80211_ATTR_KEY_TYPE]);
 		if (k->type < 0 || k->type >= NUM_NL80211_KEYTYPES)
 			return -EINVAL;
 	}
 
+	if (info->attrs[NL80211_ATTR_KEY_DEFAULT_TYPES]) {
+		struct nlattr *kdt[NUM_NL80211_KEY_DEFAULT_TYPES];
+		int err = nla_parse_nested(
+				kdt, NUM_NL80211_KEY_DEFAULT_TYPES - 1,
+				info->attrs[NL80211_ATTR_KEY_DEFAULT_TYPES],
+				nl80211_key_default_policy);
+		if (err)
+			return err;
+
+		k->def_uni = kdt[NL80211_KEY_DEFAULT_TYPE_UNICAST];
+		k->def_multi = kdt[NL80211_KEY_DEFAULT_TYPE_MULTICAST];
+	}
+
 	return 0;
 }
 
@@ -401,6 +451,11 @@ static int nl80211_parse_key(struct genl_info *info, struct key_parse *k)
 	if (k->def && k->defmgmt)
 		return -EINVAL;
 
+	if (k->defmgmt) {
+		if (k->def_uni || !k->def_multi)
+			return -EINVAL;
+	}
+
 	if (k->idx != -1) {
 		if (k->defmgmt) {
 			if (k->idx < 4 || k->idx > 5)
@@ -450,6 +505,8 @@ nl80211_parse_connkeys(struct cfg80211_registered_device *rdev,
 				goto error;
 			def = 1;
 			result->def = parse.idx;
+			if (!parse.def_uni || !parse.def_multi)
+				goto error;
 		} else if (parse.defmgmt)
 			goto error;
 		err = cfg80211_validate_key_settings(rdev, &parse.p,
@@ -1586,8 +1643,6 @@ static int nl80211_set_key(struct sk_buff *skb, struct genl_info *info)
 	struct key_parse key;
 	int err;
 	struct net_device *dev = info->user_ptr[1];
-	int (*func)(struct wiphy *wiphy, struct net_device *netdev,
-		    u8 key_index);
 
 	err = nl80211_parse_key(info, &key);
 	if (err)
@@ -1600,27 +1655,61 @@ static int nl80211_set_key(struct sk_buff *skb, struct genl_info *info)
 	if (!key.def && !key.defmgmt)
 		return -EINVAL;
 
-	if (key.def)
-		func = rdev->ops->set_default_key;
-	else
-		func = rdev->ops->set_default_mgmt_key;
+	wdev_lock(dev->ieee80211_ptr);
 
-	if (!func)
-		return -EOPNOTSUPP;
+	if (key.def) {
+		if (!rdev->ops->set_default_key) {
+			err = -EOPNOTSUPP;
+			goto out;
+		}
 
-	wdev_lock(dev->ieee80211_ptr);
-	err = nl80211_key_allowed(dev->ieee80211_ptr);
-	if (!err)
-		err = func(&rdev->wiphy, dev, key.idx);
+		err = nl80211_key_allowed(dev->ieee80211_ptr);
+		if (err)
+			goto out;
+
+		if (!(rdev->wiphy.flags &
+				WIPHY_FLAG_SUPPORTS_SEPARATE_DEFAULT_KEYS)) {
+			if (!key.def_uni || !key.def_multi) {
+				err = -EOPNOTSUPP;
+				goto out;
+			}
+		}
+
+		err = rdev->ops->set_default_key(&rdev->wiphy, dev, key.idx,
+						 key.def_uni, key.def_multi);
+
+		if (err)
+			goto out;
 
 #ifdef CONFIG_CFG80211_WEXT
-	if (!err) {
-		if (func == rdev->ops->set_default_key)
-			dev->ieee80211_ptr->wext.default_key = key.idx;
-		else
-			dev->ieee80211_ptr->wext.default_mgmt_key = key.idx;
-	}
+		dev->ieee80211_ptr->wext.default_key = key.idx;
+#endif
+	} else {
+		if (key.def_uni || !key.def_multi) {
+			err = -EINVAL;
+			goto out;
+		}
+
+		if (!rdev->ops->set_default_mgmt_key) {
+			err = -EOPNOTSUPP;
+			goto out;
+		}
+
+		err = nl80211_key_allowed(dev->ieee80211_ptr);
+		if (err)
+			goto out;
+
+		err = rdev->ops->set_default_mgmt_key(&rdev->wiphy,
+						      dev, key.idx);
+		if (err)
+			goto out;
+
+#ifdef CONFIG_CFG80211_WEXT
+		dev->ieee80211_ptr->wext.default_mgmt_key = key.idx;
 #endif
+	}
+
+ out:
 	wdev_unlock(dev->ieee80211_ptr);
 
 	return err;
diff --git a/net/wireless/util.c b/net/wireless/util.c
index 4de624ca4c63..7620ae2fcf18 100644
--- a/net/wireless/util.c
+++ b/net/wireless/util.c
@@ -689,7 +689,8 @@ void cfg80211_upload_connect_keys(struct wireless_dev *wdev)
 			continue;
 		}
 		if (wdev->connect_keys->def == i)
-			if (rdev->ops->set_default_key(wdev->wiphy, dev, i)) {
+			if (rdev->ops->set_default_key(wdev->wiphy, dev,
+						       i, true, true)) {
 				netdev_err(dev, "failed to set defkey %d\n", i);
 				continue;
 			}
diff --git a/net/wireless/wext-compat.c b/net/wireless/wext-compat.c
index 12222ee6ebf2..3e5dbd4e4cd5 100644
--- a/net/wireless/wext-compat.c
+++ b/net/wireless/wext-compat.c
@@ -548,8 +548,8 @@ static int __cfg80211_set_encryption(struct cfg80211_registered_device *rdev,
 				__cfg80211_leave_ibss(rdev, wdev->netdev, true);
 				rejoin = true;
 			}
-			err = rdev->ops->set_default_key(&rdev->wiphy,
-							 dev, idx);
+			err = rdev->ops->set_default_key(&rdev->wiphy, dev,
+							 idx, true, true);
 		}
 		if (!err) {
 			wdev->wext.default_key = idx;
@@ -627,8 +627,8 @@ int cfg80211_wext_siwencode(struct net_device *dev,
 		err = 0;
 		wdev_lock(wdev);
 		if (wdev->current_bss)
-			err = rdev->ops->set_default_key(&rdev->wiphy,
-							 dev, idx);
+			err = rdev->ops->set_default_key(&rdev->wiphy, dev,
+							 idx, true, true);
 		if (!err)
 			wdev->wext.default_key = idx;
 		wdev_unlock(wdev);
-- 
cgit v1.2.3-71-gd317


From 44316cb1e97a1e7f76eb3f07e5b0ba91d72e9693 Mon Sep 17 00:00:00 2001
From: Bing Zhao <bzhao@marvell.com>
Date: Thu, 9 Dec 2010 18:24:41 -0800
Subject: ieee80211: add Parameter Set Count bitmask

WMM IE QoS Info field lower 4 bits: Parameter Set Count

Signed-off-by: Bing Zhao <bzhao@marvell.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/ieee80211.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h
index 351c0ab4e284..7f2354534242 100644
--- a/include/linux/ieee80211.h
+++ b/include/linux/ieee80211.h
@@ -122,6 +122,7 @@
 
 /* U-APSD queue for WMM IEs sent by AP */
 #define IEEE80211_WMM_IE_AP_QOSINFO_UAPSD	(1<<7)
+#define IEEE80211_WMM_IE_AP_QOSINFO_PARAM_SET_CNT_MASK	0x0f
 
 /* U-APSD queues for WMM IEs sent by STA */
 #define IEEE80211_WMM_IE_STA_QOSINFO_AC_VO	(1<<0)
-- 
cgit v1.2.3-71-gd317


From 96c8f06a0fb359a9a89701a7afab6d837e466ab0 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Mon, 13 Dec 2010 22:45:48 +0100
Subject: rtc: Namespace fixup

rtctimer_* is already occupied by sound/core/rtctimer.c. Instead of
fiddling with that, rename the new functions to rtc_timer_* which
reads nicer anyway.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: John Stultz <johnstul@us.ibm.com>
---
 drivers/rtc/class.c     |  6 +++---
 drivers/rtc/interface.c | 42 +++++++++++++++++++++---------------------
 include/linux/rtc.h     | 12 ++++++------
 3 files changed, 30 insertions(+), 30 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/rtc/class.c b/drivers/rtc/class.c
index 95d2b82762d6..3243832a17cd 100644
--- a/drivers/rtc/class.c
+++ b/drivers/rtc/class.c
@@ -155,11 +155,11 @@ struct rtc_device *rtc_device_register(const char *name, struct device *dev,
 
 	/* Init timerqueue */
 	timerqueue_init_head(&rtc->timerqueue);
-	INIT_WORK(&rtc->irqwork, rtctimer_do_work);
+	INIT_WORK(&rtc->irqwork, rtc_timer_do_work);
 	/* Init aie timer */
-	rtctimer_init(&rtc->aie_timer, rtc_aie_update_irq, (void *)rtc);
+	rtc_timer_init(&rtc->aie_timer, rtc_aie_update_irq, (void *)rtc);
 	/* Init uie timer */
-	rtctimer_init(&rtc->uie_rtctimer, rtc_uie_update_irq, (void *)rtc);
+	rtc_timer_init(&rtc->uie_rtctimer, rtc_uie_update_irq, (void *)rtc);
 	/* Init pie timer */
 	hrtimer_init(&rtc->pie_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
 	rtc->pie_timer.function = rtc_pie_update_irq;
diff --git a/drivers/rtc/interface.c b/drivers/rtc/interface.c
index c81c50b497b7..90384b9f6b2c 100644
--- a/drivers/rtc/interface.c
+++ b/drivers/rtc/interface.c
@@ -174,14 +174,14 @@ int rtc_set_alarm(struct rtc_device *rtc, struct rtc_wkalrm *alarm)
 	if (err)
 		return err;
 	if (rtc->aie_timer.enabled) {
-		rtctimer_remove(rtc, &rtc->aie_timer);
+		rtc_timer_remove(rtc, &rtc->aie_timer);
 		rtc->aie_timer.enabled = 0;
 	}
 	rtc->aie_timer.node.expires = rtc_tm_to_ktime(alarm->time);
 	rtc->aie_timer.period = ktime_set(0, 0);
 	if (alarm->enabled) {
 		rtc->aie_timer.enabled = 1;
-		rtctimer_enqueue(rtc, &rtc->aie_timer);
+		rtc_timer_enqueue(rtc, &rtc->aie_timer);
 	}
 	mutex_unlock(&rtc->ops_lock);
 	return 0;
@@ -197,9 +197,9 @@ int rtc_alarm_irq_enable(struct rtc_device *rtc, unsigned int enabled)
 	if (rtc->aie_timer.enabled != enabled) {
 		if (enabled) {
 			rtc->aie_timer.enabled = 1;
-			rtctimer_enqueue(rtc, &rtc->aie_timer);
+			rtc_timer_enqueue(rtc, &rtc->aie_timer);
 		} else {
-			rtctimer_remove(rtc, &rtc->aie_timer);
+			rtc_timer_remove(rtc, &rtc->aie_timer);
 			rtc->aie_timer.enabled = 0;
 		}
 	}
@@ -236,9 +236,9 @@ int rtc_update_irq_enable(struct rtc_device *rtc, unsigned int enabled)
 		rtc->uie_rtctimer.node.expires = ktime_add(now, onesec);
 		rtc->uie_rtctimer.period = ktime_set(1, 0);
 		rtc->uie_rtctimer.enabled = 1;
-		rtctimer_enqueue(rtc, &rtc->uie_rtctimer);
+		rtc_timer_enqueue(rtc, &rtc->uie_rtctimer);
 	} else {
-		rtctimer_remove(rtc, &rtc->uie_rtctimer);
+		rtc_timer_remove(rtc, &rtc->uie_rtctimer);
 		rtc->uie_rtctimer.enabled = 0;
 	}
 
@@ -481,7 +481,7 @@ int rtc_irq_set_freq(struct rtc_device *rtc, struct rtc_task *task, int freq)
 EXPORT_SYMBOL_GPL(rtc_irq_set_freq);
 
 /**
- * rtctimer_enqueue - Adds a rtc_timer to the rtc_device timerqueue
+ * rtc_timer_enqueue - Adds a rtc_timer to the rtc_device timerqueue
  * @rtc rtc device
  * @timer timer being added.
  *
@@ -490,7 +490,7 @@ EXPORT_SYMBOL_GPL(rtc_irq_set_freq);
  *
  * Must hold ops_lock for proper serialization of timerqueue
  */
-void rtctimer_enqueue(struct rtc_device *rtc, struct rtc_timer *timer)
+void rtc_timer_enqueue(struct rtc_device *rtc, struct rtc_timer *timer)
 {
 	timerqueue_add(&rtc->timerqueue, &timer->node);
 	if (&timer->node == timerqueue_getnext(&rtc->timerqueue)) {
@@ -505,7 +505,7 @@ void rtctimer_enqueue(struct rtc_device *rtc, struct rtc_timer *timer)
 }
 
 /**
- * rtctimer_remove - Removes a rtc_timer from the rtc_device timerqueue
+ * rtc_timer_remove - Removes a rtc_timer from the rtc_device timerqueue
  * @rtc rtc device
  * @timer timer being removed.
  *
@@ -514,7 +514,7 @@ void rtctimer_enqueue(struct rtc_device *rtc, struct rtc_timer *timer)
  *
  * Must hold ops_lock for proper serialization of timerqueue
  */
-void rtctimer_remove(struct rtc_device *rtc, struct rtc_timer *timer)
+void rtc_timer_remove(struct rtc_device *rtc, struct rtc_timer *timer)
 {
 	struct timerqueue_node *next = timerqueue_getnext(&rtc->timerqueue);
 	timerqueue_del(&rtc->timerqueue, &timer->node);
@@ -534,7 +534,7 @@ void rtctimer_remove(struct rtc_device *rtc, struct rtc_timer *timer)
 }
 
 /**
- * rtctimer_do_work - Expires rtc timers
+ * rtc_timer_do_work - Expires rtc timers
  * @rtc rtc device
  * @timer timer being removed.
  *
@@ -543,7 +543,7 @@ void rtctimer_remove(struct rtc_device *rtc, struct rtc_timer *timer)
  *
  * Serializes access to timerqueue via ops_lock mutex
  */
-void rtctimer_do_work(struct work_struct *work)
+void rtc_timer_do_work(struct work_struct *work)
 {
 	struct rtc_timer *timer;
 	struct timerqueue_node *next;
@@ -592,14 +592,14 @@ again:
 }
 
 
-/* rtctimer_init - Initializes an rtc_timer
+/* rtc_timer_init - Initializes an rtc_timer
  * @timer: timer to be intiialized
  * @f: function pointer to be called when timer fires
  * @data: private data passed to function pointer
  *
  * Kernel interface to initializing an rtc_timer.
  */
-void rtctimer_init(struct rtc_timer *timer, void (*f)(void* p), void* data)
+void rtc_timer_init(struct rtc_timer *timer, void (*f)(void* p), void* data)
 {
 	timerqueue_init(&timer->node);
 	timer->enabled = 0;
@@ -607,7 +607,7 @@ void rtctimer_init(struct rtc_timer *timer, void (*f)(void* p), void* data)
 	timer->task.private_data = data;
 }
 
-/* rtctimer_start - Sets an rtc_timer to fire in the future
+/* rtc_timer_start - Sets an rtc_timer to fire in the future
  * @ rtc: rtc device to be used
  * @ timer: timer being set
  * @ expires: time at which to expire the timer
@@ -615,36 +615,36 @@ void rtctimer_init(struct rtc_timer *timer, void (*f)(void* p), void* data)
  *
  * Kernel interface to set an rtc_timer
  */
-int rtctimer_start(struct rtc_device *rtc, struct rtc_timer* timer,
+int rtc_timer_start(struct rtc_device *rtc, struct rtc_timer* timer,
 			ktime_t expires, ktime_t period)
 {
 	int ret = 0;
 	mutex_lock(&rtc->ops_lock);
 	if (timer->enabled)
-		rtctimer_remove(rtc, timer);
+		rtc_timer_remove(rtc, timer);
 
 	timer->node.expires = expires;
 	timer->period = period;
 
 	timer->enabled = 1;
-	rtctimer_enqueue(rtc, timer);
+	rtc_timer_enqueue(rtc, timer);
 
 	mutex_unlock(&rtc->ops_lock);
 	return ret;
 }
 
-/* rtctimer_cancel - Stops an rtc_timer
+/* rtc_timer_cancel - Stops an rtc_timer
  * @ rtc: rtc device to be used
  * @ timer: timer being set
  *
  * Kernel interface to cancel an rtc_timer
  */
-int rtctimer_cancel(struct rtc_device *rtc, struct rtc_timer* timer)
+int rtc_timer_cancel(struct rtc_device *rtc, struct rtc_timer* timer)
 {
 	int ret = 0;
 	mutex_lock(&rtc->ops_lock);
 	if (timer->enabled)
-		rtctimer_remove(rtc, timer);
+		rtc_timer_remove(rtc, timer);
 	timer->enabled = 0;
 	mutex_unlock(&rtc->ops_lock);
 	return ret;
diff --git a/include/linux/rtc.h b/include/linux/rtc.h
index 44e18a2523a3..3c995b4d742c 100644
--- a/include/linux/rtc.h
+++ b/include/linux/rtc.h
@@ -246,13 +246,13 @@ int rtc_register(rtc_task_t *task);
 int rtc_unregister(rtc_task_t *task);
 int rtc_control(rtc_task_t *t, unsigned int cmd, unsigned long arg);
 
-void rtctimer_enqueue(struct rtc_device *rtc, struct rtc_timer *timer);
-void rtctimer_remove(struct rtc_device *rtc, struct rtc_timer *timer);
-void rtctimer_init(struct rtc_timer *timer, void (*f)(void* p), void* data);
-int rtctimer_start(struct rtc_device *rtc, struct rtc_timer* timer,
+void rtc_timer_enqueue(struct rtc_device *rtc, struct rtc_timer *timer);
+void rtc_timer_remove(struct rtc_device *rtc, struct rtc_timer *timer);
+void rtc_timer_init(struct rtc_timer *timer, void (*f)(void* p), void* data);
+int rtc_timer_start(struct rtc_device *rtc, struct rtc_timer* timer,
 			ktime_t expires, ktime_t period);
-int rtctimer_cancel(struct rtc_device *rtc, struct rtc_timer* timer);
-void rtctimer_do_work(struct work_struct *work);
+int rtc_timer_cancel(struct rtc_device *rtc, struct rtc_timer* timer);
+void rtc_timer_do_work(struct work_struct *work);
 
 static inline bool is_leap_year(unsigned int year)
 {
-- 
cgit v1.2.3-71-gd317


From ba9effa2ecdc08325bd297d541b4207b2df38184 Mon Sep 17 00:00:00 2001
From: "Kirill A. Shutemov" <kirill@shutemov.name>
Date: Mon, 13 Dec 2010 19:10:29 +0200
Subject: kbuild: export linux/{a.out,kvm,kvm_para}.h on headers_install_all

Export linux/a.out.h, linux/kvm.h and linux/kvm_para.h on
headers_install_all if at least one architecture has appropriate files
in arch-dependent headers.

Signed-off-by: Kirill A. Shutemov <kirill@shutemov.name>
Signed-off-by: Michal Marek <mmarek@suse.cz>
---
 include/linux/Kbuild | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/Kbuild b/include/linux/Kbuild
index 97319a8fc1e0..ae31f9521a6d 100644
--- a/include/linux/Kbuild
+++ b/include/linux/Kbuild
@@ -20,15 +20,18 @@ header-y += wimax/
 objhdr-y += version.h
 
 ifneq ($(wildcard $(srctree)/arch/$(SRCARCH)/include/asm/a.out.h \
-		  $(srctree)/include/asm-$(SRCARCH)/a.out.h),)
+		  $(srctree)/include/asm-$(SRCARCH)/a.out.h \
+		  $(INSTALL_HDR_PATH)/include/asm-*/a.out.h),)
 header-y += a.out.h
 endif
 ifneq ($(wildcard $(srctree)/arch/$(SRCARCH)/include/asm/kvm.h \
-		  $(srctree)/include/asm-$(SRCARCH)/kvm.h),)
+		  $(srctree)/include/asm-$(SRCARCH)/kvm.h \
+		  $(INSTALL_HDR_PATH)/include/asm-*/kvm.h),)
 header-y += kvm.h
 endif
 ifneq ($(wildcard $(srctree)/arch/$(SRCARCH)/include/asm/kvm_para.h \
-		  $(srctree)/include/asm-$(SRCARCH)/kvm_para.h),)
+		  $(srctree)/include/asm-$(SRCARCH)/kvm_para.h \
+		  $(INSTALL_HDR_PATH)/include/asm-*/kvm_para.h),)
 header-y += kvm_para.h
 endif
 
-- 
cgit v1.2.3-71-gd317


From ed41390fa57a21d06e6e3a3c4bc238bab8957fbb Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Tue, 14 Dec 2010 16:23:10 +0100
Subject: workqueue: deprecate cancel_rearming_delayed_work[queue]()

There's no in-kernel user left for these two obsolete functions.  Mark
them deprecated and schedule for removal during 2.6.39 cycle.

Signed-off-by: Tejun Heo <tj@kernel.org>
Acked-by: David S. Miller <davem@davemloft.net>
---
 Documentation/feature-removal-schedule.txt | 10 ++++++++++
 include/linux/workqueue.h                  |  4 ++--
 2 files changed, 12 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/feature-removal-schedule.txt b/Documentation/feature-removal-schedule.txt
index 6c2f55e05f13..4ff47deb86da 100644
--- a/Documentation/feature-removal-schedule.txt
+++ b/Documentation/feature-removal-schedule.txt
@@ -564,3 +564,13 @@ Why:	This field is deprecated. I2C device drivers shouldn't change their
 Who:	Jean Delvare <khali@linux-fr.org>
 
 ----------------------------
+
+What:	cancel_rearming_delayed_work[queue]()
+When:	2.6.39
+
+Why:	The functions have been superceded by cancel_delayed_work_sync()
+	quite some time ago.  The conversion is trivial and there is no
+	in-kernel user left.
+Who:	Tejun Heo <tj@kernel.org>
+
+----------------------------
diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index 0c0771f06bfa..6b5193d70268 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -401,7 +401,7 @@ static inline bool __cancel_delayed_work(struct delayed_work *work)
 }
 
 /* Obsolete. use cancel_delayed_work_sync() */
-static inline
+static inline __deprecated
 void cancel_rearming_delayed_workqueue(struct workqueue_struct *wq,
 					struct delayed_work *work)
 {
@@ -409,7 +409,7 @@ void cancel_rearming_delayed_workqueue(struct workqueue_struct *wq,
 }
 
 /* Obsolete. use cancel_delayed_work_sync() */
-static inline
+static inline __deprecated
 void cancel_rearming_delayed_work(struct delayed_work *work)
 {
 	cancel_delayed_work_sync(work);
-- 
cgit v1.2.3-71-gd317


From a293911d4fd5e8593dbf478399a77f990d466269 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Tue, 14 Dec 2010 17:54:28 +0100
Subject: nl80211: advertise maximum remain-on-channel duration

With the upcoming hardware offload implementation,
some devices will have a different maximum duration
for the remain-on-channel command. Advertise the
maximum duration in mac80211, and make mac80211 set
it.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/nl80211.h | 5 +++++
 include/net/cfg80211.h  | 5 +++++
 net/mac80211/main.c     | 2 ++
 net/wireless/nl80211.c  | 7 ++++++-
 4 files changed, 18 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h
index b8fa25d741ba..1cee56b3a79a 100644
--- a/include/linux/nl80211.h
+++ b/include/linux/nl80211.h
@@ -773,6 +773,9 @@ enum nl80211_commands {
  *	cache, a wiphy attribute.
  *
  * @NL80211_ATTR_DURATION: Duration of an operation in milliseconds, u32.
+ * @NL80211_ATTR_MAX_REMAIN_ON_CHANNEL_DURATION: Device attribute that
+ *	specifies the maximum duration that can be requested with the
+ *	remain-on-channel operation, in milliseconds, u32.
  *
  * @NL80211_ATTR_COOKIE: Generic 64-bit cookie to identify objects.
  *
@@ -1035,6 +1038,8 @@ enum nl80211_attrs {
 
 	NL80211_ATTR_KEY_DEFAULT_TYPES,
 
+	NL80211_ATTR_MAX_REMAIN_ON_CHANNEL_DURATION,
+
 	/* add attributes here, update the policy in nl80211.c */
 
 	__NL80211_ATTR_AFTER_LAST,
diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 22be7c625b70..f45e15f12446 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -1474,6 +1474,9 @@ struct ieee80211_txrx_stypes {
  *
  * @available_antennas: bitmap of antennas which are available to configure.
  *	antenna configuration commands will be rejected unless this is set.
+ *
+ * @max_remain_on_channel_duration: Maximum time a remain-on-channel operation
+ *	may request, if implemented.
  */
 struct wiphy {
 	/* assign these fields before you register the wiphy */
@@ -1511,6 +1514,8 @@ struct wiphy {
 	char fw_version[ETHTOOL_BUSINFO_LEN];
 	u32 hw_version;
 
+	u16 max_remain_on_channel_duration;
+
 	u8 max_num_pmkids;
 
 	u32 available_antennas;
diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index f7bdb7c78879..d87eb005690f 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -745,6 +745,8 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
 		}
 	}
 
+	local->hw.wiphy->max_remain_on_channel_duration = 5000;
+
 	result = wiphy_register(local->hw.wiphy);
 	if (result < 0)
 		goto fail_wiphy_register;
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 53f044370cde..594a6ac8b9d2 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -755,6 +755,10 @@ static int nl80211_send_wiphy(struct sk_buff *msg, u32 pid, u32 seq, int flags,
 
 	nla_nest_end(msg, nl_cmds);
 
+	if (dev->ops->remain_on_channel)
+		NLA_PUT_U32(msg, NL80211_ATTR_MAX_REMAIN_ON_CHANNEL_DURATION,
+			    dev->wiphy.max_remain_on_channel_duration);
+
 	/* for now at least assume all drivers have it */
 	if (dev->ops->mgmt_tx)
 		NLA_PUT_FLAG(msg, NL80211_ATTR_OFFCHANNEL_TX_OK);
@@ -4228,7 +4232,8 @@ static int nl80211_remain_on_channel(struct sk_buff *skb,
 	 * We should be on that channel for at least one jiffie,
 	 * and more than 5 seconds seems excessive.
 	 */
-	if (!duration || !msecs_to_jiffies(duration) || duration > 5000)
+	if (!duration || !msecs_to_jiffies(duration) ||
+	    duration > rdev->wiphy.max_remain_on_channel_duration)
 		return -EINVAL;
 
 	if (!rdev->ops->remain_on_channel)
-- 
cgit v1.2.3-71-gd317


From 47c78e891323513e9909729b44033e2c6649e2b7 Mon Sep 17 00:00:00 2001
From: Henrik Rydberg <rydberg@euromail.se>
Date: Sat, 27 Nov 2010 09:16:48 +0100
Subject: input: mt: Break out slots handling

In preparation for common code to handle a larger set of MT slots
devices, move the slots handling over to a separate file.

Signed-off-by: Henrik Rydberg <rydberg@euromail.se>
---
 drivers/hid/hid-3m-pct.c                |  1 +
 drivers/input/Makefile                  |  2 +-
 drivers/input/input-mt.c                | 58 +++++++++++++++++++++++++++++++++
 drivers/input/input.c                   | 48 +--------------------------
 drivers/input/misc/uinput.c             |  1 +
 drivers/input/tablet/wacom_wac.c        |  1 +
 drivers/input/touchscreen/wacom_w8001.c |  2 +-
 include/linux/input.h                   | 16 ---------
 include/linux/input/mt.h                | 44 +++++++++++++++++++++++++
 9 files changed, 108 insertions(+), 65 deletions(-)
 create mode 100644 drivers/input/input-mt.c
 create mode 100644 include/linux/input/mt.h

(limited to 'include/linux')

diff --git a/drivers/hid/hid-3m-pct.c b/drivers/hid/hid-3m-pct.c
index 02d8cd3b1b1b..18575a4e0d63 100644
--- a/drivers/hid/hid-3m-pct.c
+++ b/drivers/hid/hid-3m-pct.c
@@ -19,6 +19,7 @@
 #include <linux/module.h>
 #include <linux/slab.h>
 #include <linux/usb.h>
+#include <linux/input/mt.h>
 
 MODULE_AUTHOR("Stephane Chatty <chatty@enac.fr>");
 MODULE_DESCRIPTION("3M PCT multitouch panels");
diff --git a/drivers/input/Makefile b/drivers/input/Makefile
index 7ad212d31f99..569938b3cc04 100644
--- a/drivers/input/Makefile
+++ b/drivers/input/Makefile
@@ -5,7 +5,7 @@
 # Each configuration option enables a list of files.
 
 obj-$(CONFIG_INPUT)		+= input-core.o
-input-core-objs := input.o input-compat.o ff-core.o
+input-core-objs := input.o input-compat.o ff-core.o input-mt.o
 
 obj-$(CONFIG_INPUT_FF_MEMLESS)	+= ff-memless.o
 obj-$(CONFIG_INPUT_POLLDEV)	+= input-polldev.o
diff --git a/drivers/input/input-mt.c b/drivers/input/input-mt.c
new file mode 100644
index 000000000000..463a4d7d54f2
--- /dev/null
+++ b/drivers/input/input-mt.c
@@ -0,0 +1,58 @@
+/*
+ * Input Multitouch Library
+ *
+ * Copyright (c) 2008-2010 Henrik Rydberg
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ */
+
+#include <linux/input/mt.h>
+#include <linux/slab.h>
+
+/**
+ * input_mt_create_slots() - create MT input slots
+ * @dev: input device supporting MT events and finger tracking
+ * @num_slots: number of slots used by the device
+ *
+ * This function allocates all necessary memory for MT slot handling in the
+ * input device, and adds ABS_MT_SLOT to the device capabilities. All slots
+ * are initially marked as unused by setting ABS_MT_TRACKING_ID to -1.
+ */
+int input_mt_create_slots(struct input_dev *dev, unsigned int num_slots)
+{
+	int i;
+
+	if (!num_slots)
+		return 0;
+
+	dev->mt = kcalloc(num_slots, sizeof(struct input_mt_slot), GFP_KERNEL);
+	if (!dev->mt)
+		return -ENOMEM;
+
+	dev->mtsize = num_slots;
+	input_set_abs_params(dev, ABS_MT_SLOT, 0, num_slots - 1, 0, 0);
+
+	/* Mark slots as 'unused' */
+	for (i = 0; i < num_slots; i++)
+		input_mt_set_value(&dev->mt[i], ABS_MT_TRACKING_ID, -1);
+
+	return 0;
+}
+EXPORT_SYMBOL(input_mt_create_slots);
+
+/**
+ * input_mt_destroy_slots() - frees the MT slots of the input device
+ * @dev: input device with allocated MT slots
+ *
+ * This function is only needed in error path as the input core will
+ * automatically free the MT slots when the device is destroyed.
+ */
+void input_mt_destroy_slots(struct input_dev *dev)
+{
+	kfree(dev->mt);
+	dev->mt = NULL;
+	dev->mtsize = 0;
+}
+EXPORT_SYMBOL(input_mt_destroy_slots);
diff --git a/drivers/input/input.c b/drivers/input/input.c
index d092ef9291da..37708d1d86ec 100644
--- a/drivers/input/input.c
+++ b/drivers/input/input.c
@@ -12,7 +12,7 @@
 
 #include <linux/init.h>
 #include <linux/types.h>
-#include <linux/input.h>
+#include <linux/input/mt.h>
 #include <linux/module.h>
 #include <linux/slab.h>
 #include <linux/random.h>
@@ -1690,52 +1690,6 @@ void input_free_device(struct input_dev *dev)
 }
 EXPORT_SYMBOL(input_free_device);
 
-/**
- * input_mt_create_slots() - create MT input slots
- * @dev: input device supporting MT events and finger tracking
- * @num_slots: number of slots used by the device
- *
- * This function allocates all necessary memory for MT slot handling in the
- * input device, and adds ABS_MT_SLOT to the device capabilities. All slots
- * are initially marked as unused by setting ABS_MT_TRACKING_ID to -1.
- */
-int input_mt_create_slots(struct input_dev *dev, unsigned int num_slots)
-{
-	int i;
-
-	if (!num_slots)
-		return 0;
-
-	dev->mt = kcalloc(num_slots, sizeof(struct input_mt_slot), GFP_KERNEL);
-	if (!dev->mt)
-		return -ENOMEM;
-
-	dev->mtsize = num_slots;
-	input_set_abs_params(dev, ABS_MT_SLOT, 0, num_slots - 1, 0, 0);
-
-	/* Mark slots as 'unused' */
-	for (i = 0; i < num_slots; i++)
-		dev->mt[i].abs[ABS_MT_TRACKING_ID - ABS_MT_FIRST] = -1;
-
-	return 0;
-}
-EXPORT_SYMBOL(input_mt_create_slots);
-
-/**
- * input_mt_destroy_slots() - frees the MT slots of the input device
- * @dev: input device with allocated MT slots
- *
- * This function is only needed in error path as the input core will
- * automatically free the MT slots when the device is destroyed.
- */
-void input_mt_destroy_slots(struct input_dev *dev)
-{
-	kfree(dev->mt);
-	dev->mt = NULL;
-	dev->mtsize = 0;
-}
-EXPORT_SYMBOL(input_mt_destroy_slots);
-
 /**
  * input_set_capability - mark device as capable of a certain event
  * @dev: device that is capable of emitting or accepting event
diff --git a/drivers/input/misc/uinput.c b/drivers/input/misc/uinput.c
index b9410784e6a1..8f374143190e 100644
--- a/drivers/input/misc/uinput.c
+++ b/drivers/input/misc/uinput.c
@@ -37,6 +37,7 @@
 #include <linux/fs.h>
 #include <linux/miscdevice.h>
 #include <linux/uinput.h>
+#include <linux/input/mt.h>
 #include "../input-compat.h"
 
 static int uinput_dev_event(struct input_dev *dev, unsigned int type, unsigned int code, int value)
diff --git a/drivers/input/tablet/wacom_wac.c b/drivers/input/tablet/wacom_wac.c
index b3252ef1e279..bde612c6d36d 100644
--- a/drivers/input/tablet/wacom_wac.c
+++ b/drivers/input/tablet/wacom_wac.c
@@ -14,6 +14,7 @@
 
 #include "wacom_wac.h"
 #include "wacom.h"
+#include <linux/input/mt.h>
 
 static int wacom_penpartner_irq(struct wacom_wac *wacom)
 {
diff --git a/drivers/input/touchscreen/wacom_w8001.c b/drivers/input/touchscreen/wacom_w8001.c
index 9ae4c7b16ba7..5d4f50e52a28 100644
--- a/drivers/input/touchscreen/wacom_w8001.c
+++ b/drivers/input/touchscreen/wacom_w8001.c
@@ -15,7 +15,7 @@
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/slab.h>
-#include <linux/input.h>
+#include <linux/input/mt.h>
 #include <linux/serio.h>
 #include <linux/init.h>
 #include <linux/ctype.h>
diff --git a/include/linux/input.h b/include/linux/input.h
index 51af441f3a21..99e2a52c0509 100644
--- a/include/linux/input.h
+++ b/include/linux/input.h
@@ -1082,14 +1082,6 @@ struct ff_effect {
 #include <linux/timer.h>
 #include <linux/mod_devicetable.h>
 
-/**
- * struct input_mt_slot - represents the state of an input MT slot
- * @abs: holds current values of ABS_MT axes for this slot
- */
-struct input_mt_slot {
-	int abs[ABS_MT_LAST - ABS_MT_FIRST + 1];
-};
-
 /**
  * struct input_dev - represents an input device
  * @name: name of the device
@@ -1461,11 +1453,6 @@ static inline void input_mt_sync(struct input_dev *dev)
 	input_event(dev, EV_SYN, SYN_MT_REPORT, 0);
 }
 
-static inline void input_mt_slot(struct input_dev *dev, int slot)
-{
-	input_event(dev, EV_ABS, ABS_MT_SLOT, slot);
-}
-
 void input_set_capability(struct input_dev *dev, unsigned int type, unsigned int code);
 
 /**
@@ -1578,8 +1565,5 @@ int input_ff_erase(struct input_dev *dev, int effect_id, struct file *file);
 int input_ff_create_memless(struct input_dev *dev, void *data,
 		int (*play_effect)(struct input_dev *, void *, struct ff_effect *));
 
-int input_mt_create_slots(struct input_dev *dev, unsigned int num_slots);
-void input_mt_destroy_slots(struct input_dev *dev);
-
 #endif
 #endif
diff --git a/include/linux/input/mt.h b/include/linux/input/mt.h
new file mode 100644
index 000000000000..4f5e9d0e2eae
--- /dev/null
+++ b/include/linux/input/mt.h
@@ -0,0 +1,44 @@
+#ifndef _INPUT_MT_H
+#define _INPUT_MT_H
+
+/*
+ * Input Multitouch Library
+ *
+ * Copyright (c) 2010 Henrik Rydberg
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ */
+
+#include <linux/input.h>
+
+/**
+ * struct input_mt_slot - represents the state of an input MT slot
+ * @abs: holds current values of ABS_MT axes for this slot
+ */
+struct input_mt_slot {
+	int abs[ABS_MT_LAST - ABS_MT_FIRST + 1];
+};
+
+static inline void input_mt_set_value(struct input_mt_slot *slot,
+				      unsigned code, int value)
+{
+	slot->abs[code - ABS_MT_FIRST] = value;
+}
+
+static inline int input_mt_get_value(const struct input_mt_slot *slot,
+				     unsigned code)
+{
+	return slot->abs[code - ABS_MT_FIRST];
+}
+
+int input_mt_create_slots(struct input_dev *dev, unsigned int num_slots);
+void input_mt_destroy_slots(struct input_dev *dev);
+
+static inline void input_mt_slot(struct input_dev *dev, int slot)
+{
+	input_event(dev, EV_ABS, ABS_MT_SLOT, slot);
+}
+
+#endif
-- 
cgit v1.2.3-71-gd317


From 8cde81001626c4c60b26ef2eb5fc522885ed9fd0 Mon Sep 17 00:00:00 2001
From: Henrik Rydberg <rydberg@euromail.se>
Date: Sat, 27 Nov 2010 10:50:54 +0100
Subject: input: mt: Collect slots initialization code

The MT slots devices all follow the same initialization pattern
of creating slots and hinting about buffer size. Let drivers call
an initialization function instead, and make sure it can be called
repeatedly without side effects.

Signed-off-by: Henrik Rydberg <rydberg@euromail.se>
---
 drivers/hid/hid-3m-pct.c                |  5 +----
 drivers/input/input-mt.c                | 19 +++++++++++++------
 drivers/input/misc/uinput.c             |  3 +--
 drivers/input/tablet/wacom_wac.c        |  2 +-
 drivers/input/touchscreen/wacom_w8001.c |  2 +-
 include/linux/input/mt.h                |  2 +-
 6 files changed, 18 insertions(+), 15 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/hid/hid-3m-pct.c b/drivers/hid/hid-3m-pct.c
index 18575a4e0d63..ea475964d05a 100644
--- a/drivers/hid/hid-3m-pct.c
+++ b/drivers/hid/hid-3m-pct.c
@@ -29,7 +29,6 @@ MODULE_LICENSE("GPL");
 
 #define MAX_SLOTS		60
 #define MAX_TRKID		USHRT_MAX
-#define MAX_EVENTS		360
 
 /* estimated signal-to-noise ratios */
 #define SN_MOVE			2048
@@ -123,9 +122,7 @@ static int mmm_input_mapping(struct hid_device *hdev, struct hid_input *hi,
 					EV_ABS, ABS_MT_TRACKING_ID);
 			input_set_abs_params(hi->input, ABS_MT_TRACKING_ID,
 					     0, MAX_TRKID, 0, 0);
-			if (!hi->input->mt)
-				input_mt_create_slots(hi->input, MAX_SLOTS);
-			input_set_events_per_packet(hi->input, MAX_EVENTS);
+			input_mt_init_slots(hi->input, MAX_SLOTS);
 			return 1;
 		}
 		/* let hid-input decide for the others */
diff --git a/drivers/input/input-mt.c b/drivers/input/input-mt.c
index 463a4d7d54f2..f400e47092c4 100644
--- a/drivers/input/input-mt.c
+++ b/drivers/input/input-mt.c
@@ -12,20 +12,25 @@
 #include <linux/slab.h>
 
 /**
- * input_mt_create_slots() - create MT input slots
+ * input_mt_init_slots() - initialize MT input slots
  * @dev: input device supporting MT events and finger tracking
  * @num_slots: number of slots used by the device
  *
- * This function allocates all necessary memory for MT slot handling in the
- * input device, and adds ABS_MT_SLOT to the device capabilities. All slots
- * are initially marked as unused by setting ABS_MT_TRACKING_ID to -1.
+ * This function allocates all necessary memory for MT slot handling
+ * in the input device, adds ABS_MT_SLOT to the device capabilities
+ * and sets up appropriate event buffers. All slots are initially
+ * marked as unused by setting ABS_MT_TRACKING_ID to -1. May be called
+ * repeatedly. Returns -EINVAL if attempting to reinitialize with a
+ * different number of slots.
  */
-int input_mt_create_slots(struct input_dev *dev, unsigned int num_slots)
+int input_mt_init_slots(struct input_dev *dev, unsigned int num_slots)
 {
 	int i;
 
 	if (!num_slots)
 		return 0;
+	if (dev->mt)
+		return dev->mtsize != num_slots ? -EINVAL : 0;
 
 	dev->mt = kcalloc(num_slots, sizeof(struct input_mt_slot), GFP_KERNEL);
 	if (!dev->mt)
@@ -33,6 +38,7 @@ int input_mt_create_slots(struct input_dev *dev, unsigned int num_slots)
 
 	dev->mtsize = num_slots;
 	input_set_abs_params(dev, ABS_MT_SLOT, 0, num_slots - 1, 0, 0);
+	input_set_events_per_packet(dev, 6 * num_slots);
 
 	/* Mark slots as 'unused' */
 	for (i = 0; i < num_slots; i++)
@@ -40,7 +46,7 @@ int input_mt_create_slots(struct input_dev *dev, unsigned int num_slots)
 
 	return 0;
 }
-EXPORT_SYMBOL(input_mt_create_slots);
+EXPORT_SYMBOL(input_mt_init_slots);
 
 /**
  * input_mt_destroy_slots() - frees the MT slots of the input device
@@ -54,5 +60,6 @@ void input_mt_destroy_slots(struct input_dev *dev)
 	kfree(dev->mt);
 	dev->mt = NULL;
 	dev->mtsize = 0;
+	dev->slot = 0;
 }
 EXPORT_SYMBOL(input_mt_destroy_slots);
diff --git a/drivers/input/misc/uinput.c b/drivers/input/misc/uinput.c
index 8f374143190e..bea89722c4e9 100644
--- a/drivers/input/misc/uinput.c
+++ b/drivers/input/misc/uinput.c
@@ -407,8 +407,7 @@ static int uinput_setup_device(struct uinput_device *udev, const char __user *bu
 			goto exit;
 		if (test_bit(ABS_MT_SLOT, dev->absbit)) {
 			int nslot = input_abs_get_max(dev, ABS_MT_SLOT) + 1;
-			input_mt_create_slots(dev, nslot);
-			input_set_events_per_packet(dev, 6 * nslot);
+			input_mt_init_slots(dev, nslot);
 		} else if (test_bit(ABS_MT_POSITION_X, dev->absbit)) {
 			input_set_events_per_packet(dev, 60);
 		}
diff --git a/drivers/input/tablet/wacom_wac.c b/drivers/input/tablet/wacom_wac.c
index bde612c6d36d..f26e2238f6ca 100644
--- a/drivers/input/tablet/wacom_wac.c
+++ b/drivers/input/tablet/wacom_wac.c
@@ -1273,7 +1273,7 @@ void wacom_setup_input_capabilities(struct input_dev *input_dev,
 			__set_bit(BTN_TOOL_FINGER, input_dev->keybit);
 			__set_bit(BTN_TOOL_DOUBLETAP, input_dev->keybit);
 
-			input_mt_create_slots(input_dev, 2);
+			input_mt_init_slots(input_dev, 2);
 			input_set_abs_params(input_dev, ABS_MT_POSITION_X,
 					     0, features->x_max,
 					     features->x_fuzz, 0);
diff --git a/drivers/input/touchscreen/wacom_w8001.c b/drivers/input/touchscreen/wacom_w8001.c
index 5d4f50e52a28..4a2e8cf4c8ef 100644
--- a/drivers/input/touchscreen/wacom_w8001.c
+++ b/drivers/input/touchscreen/wacom_w8001.c
@@ -318,7 +318,7 @@ static int w8001_setup(struct w8001 *w8001)
 		case 5:
 			w8001->pktlen = W8001_PKTLEN_TOUCH2FG;
 
-			input_mt_create_slots(dev, 2);
+			input_mt_init_slots(dev, 2);
 			input_set_abs_params(dev, ABS_MT_TRACKING_ID,
 						0, MAX_TRACKING_ID, 0, 0);
 			input_set_abs_params(dev, ABS_MT_POSITION_X,
diff --git a/include/linux/input/mt.h b/include/linux/input/mt.h
index 4f5e9d0e2eae..d7f6518e3222 100644
--- a/include/linux/input/mt.h
+++ b/include/linux/input/mt.h
@@ -33,7 +33,7 @@ static inline int input_mt_get_value(const struct input_mt_slot *slot,
 	return slot->abs[code - ABS_MT_FIRST];
 }
 
-int input_mt_create_slots(struct input_dev *dev, unsigned int num_slots);
+int input_mt_init_slots(struct input_dev *dev, unsigned int num_slots);
 void input_mt_destroy_slots(struct input_dev *dev);
 
 static inline void input_mt_slot(struct input_dev *dev, int slot)
-- 
cgit v1.2.3-71-gd317


From c5f4dec1ceb6ab773bbbefbe64a7c990c7d6b17f Mon Sep 17 00:00:00 2001
From: Henrik Rydberg <rydberg@euromail.se>
Date: Wed, 15 Dec 2010 13:50:34 +0100
Subject: input: mt: Move tracking and pointer emulation to input-mt

The drivers using the type B protocol all report tracking information
the same way. The contact id is semantically equivalent to
ABS_MT_SLOT, and the handling of ABS_MT_TRACKING_ID only complicates
the driver. The situation can be improved upon by providing a common
pointer emulation code, thereby removing the need for the tracking id
in the driver.  This patch moves all tracking event handling over to
the input core, simplifying both the existing drivers and the ones
currently in preparation.

Acked-by: Ping Cheng <pingc@wacom.com>
Acked-by: Jiri Kosina <jkosina@suse.cz>
Signed-off-by: Henrik Rydberg <rydberg@euromail.se>
---
 drivers/hid/hid-3m-pct.c                |  30 +--------
 drivers/input/input-mt.c                | 115 ++++++++++++++++++++++++++++++--
 drivers/input/tablet/wacom_wac.c        |  23 ++-----
 drivers/input/tablet/wacom_wac.h        |   4 --
 drivers/input/touchscreen/wacom_w8001.c |  21 ++----
 include/linux/input.h                   |   3 +
 include/linux/input/mt.h                |  13 ++++
 7 files changed, 138 insertions(+), 71 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/hid/hid-3m-pct.c b/drivers/hid/hid-3m-pct.c
index ea475964d05a..4fb7c7528d16 100644
--- a/drivers/hid/hid-3m-pct.c
+++ b/drivers/hid/hid-3m-pct.c
@@ -28,7 +28,6 @@ MODULE_LICENSE("GPL");
 #include "hid-ids.h"
 
 #define MAX_SLOTS		60
-#define MAX_TRKID		USHRT_MAX
 
 /* estimated signal-to-noise ratios */
 #define SN_MOVE			2048
@@ -36,14 +35,11 @@ MODULE_LICENSE("GPL");
 
 struct mmm_finger {
 	__s32 x, y, w, h;
-	__u16 id;
-	bool prev_touch;
 	bool touch, valid;
 };
 
 struct mmm_data {
 	struct mmm_finger f[MAX_SLOTS];
-	__u16 id;
 	__u8 curid;
 	__u8 nexp, nreal;
 	bool touch, valid;
@@ -117,11 +113,6 @@ static int mmm_input_mapping(struct hid_device *hdev, struct hid_input *hi,
 					0, 1, 0, 0);
 			return 1;
 		case HID_DG_CONTACTID:
-			field->logical_maximum = MAX_TRKID;
-			hid_map_usage(hi, usage, bit, max,
-					EV_ABS, ABS_MT_TRACKING_ID);
-			input_set_abs_params(hi->input, ABS_MT_TRACKING_ID,
-					     0, MAX_TRKID, 0, 0);
 			input_mt_init_slots(hi->input, MAX_SLOTS);
 			return 1;
 		}
@@ -152,7 +143,6 @@ static int mmm_input_mapped(struct hid_device *hdev, struct hid_input *hi,
  */
 static void mmm_filter_event(struct mmm_data *md, struct input_dev *input)
 {
-	struct mmm_finger *oldest = 0;
 	int i;
 	for (i = 0; i < MAX_SLOTS; ++i) {
 		struct mmm_finger *f = &md->f[i];
@@ -161,6 +151,7 @@ static void mmm_filter_event(struct mmm_data *md, struct input_dev *input)
 			continue;
 		}
 		input_mt_slot(input, i);
+		input_mt_report_slot_state(input, MT_TOOL_FINGER, f->touch);
 		if (f->touch) {
 			/* this finger is on the screen */
 			int wide = (f->w > f->h);
@@ -168,33 +159,16 @@ static void mmm_filter_event(struct mmm_data *md, struct input_dev *input)
 			int major = max(f->w, f->h) >> 1;
 			int minor = min(f->w, f->h) >> 1;
 
-			if (!f->prev_touch)
-				f->id = md->id++;
-			input_event(input, EV_ABS, ABS_MT_TRACKING_ID, f->id);
 			input_event(input, EV_ABS, ABS_MT_POSITION_X, f->x);
 			input_event(input, EV_ABS, ABS_MT_POSITION_Y, f->y);
 			input_event(input, EV_ABS, ABS_MT_ORIENTATION, wide);
 			input_event(input, EV_ABS, ABS_MT_TOUCH_MAJOR, major);
 			input_event(input, EV_ABS, ABS_MT_TOUCH_MINOR, minor);
-			/* touchscreen emulation: pick the oldest contact */
-			if (!oldest || ((f->id - oldest->id) & (SHRT_MAX + 1)))
-				oldest = f;
-		} else {
-			/* this finger took off the screen */
-			input_event(input, EV_ABS, ABS_MT_TRACKING_ID, -1);
 		}
-		f->prev_touch = f->touch;
 		f->valid = 0;
 	}
 
-	/* touchscreen emulation */
-	if (oldest) {
-		input_event(input, EV_KEY, BTN_TOUCH, 1);
-		input_event(input, EV_ABS, ABS_X, oldest->x);
-		input_event(input, EV_ABS, ABS_Y, oldest->y);
-	} else {
-		input_event(input, EV_KEY, BTN_TOUCH, 0);
-	}
+	input_mt_report_pointer_emulation(input, true);
 	input_sync(input);
 }
 
diff --git a/drivers/input/input-mt.c b/drivers/input/input-mt.c
index f400e47092c4..c48c81f0308d 100644
--- a/drivers/input/input-mt.c
+++ b/drivers/input/input-mt.c
@@ -11,17 +11,18 @@
 #include <linux/input/mt.h>
 #include <linux/slab.h>
 
+#define TRKID_SGN	((TRKID_MAX + 1) >> 1)
+
 /**
  * input_mt_init_slots() - initialize MT input slots
  * @dev: input device supporting MT events and finger tracking
  * @num_slots: number of slots used by the device
  *
  * This function allocates all necessary memory for MT slot handling
- * in the input device, adds ABS_MT_SLOT to the device capabilities
- * and sets up appropriate event buffers. All slots are initially
- * marked as unused by setting ABS_MT_TRACKING_ID to -1. May be called
- * repeatedly. Returns -EINVAL if attempting to reinitialize with a
- * different number of slots.
+ * in the input device, prepares the ABS_MT_SLOT and
+ * ABS_MT_TRACKING_ID events for use and sets up appropriate buffers.
+ * May be called repeatedly. Returns -EINVAL if attempting to
+ * reinitialize with a different number of slots.
  */
 int input_mt_init_slots(struct input_dev *dev, unsigned int num_slots)
 {
@@ -38,6 +39,7 @@ int input_mt_init_slots(struct input_dev *dev, unsigned int num_slots)
 
 	dev->mtsize = num_slots;
 	input_set_abs_params(dev, ABS_MT_SLOT, 0, num_slots - 1, 0, 0);
+	input_set_abs_params(dev, ABS_MT_TRACKING_ID, 0, TRKID_MAX, 0, 0);
 	input_set_events_per_packet(dev, 6 * num_slots);
 
 	/* Mark slots as 'unused' */
@@ -61,5 +63,108 @@ void input_mt_destroy_slots(struct input_dev *dev)
 	dev->mt = NULL;
 	dev->mtsize = 0;
 	dev->slot = 0;
+	dev->trkid = 0;
 }
 EXPORT_SYMBOL(input_mt_destroy_slots);
+
+/**
+ * input_mt_report_slot_state() - report contact state
+ * @dev: input device with allocated MT slots
+ * @tool_type: the tool type to use in this slot
+ * @active: true if contact is active, false otherwise
+ *
+ * Reports a contact via ABS_MT_TRACKING_ID, and optionally
+ * ABS_MT_TOOL_TYPE. If active is true and the slot is currently
+ * inactive, or if the tool type is changed, a new tracking id is
+ * assigned to the slot. The tool type is only reported if the
+ * corresponding absbit field is set.
+ */
+void input_mt_report_slot_state(struct input_dev *dev,
+				unsigned int tool_type, bool active)
+{
+	struct input_mt_slot *mt;
+	int id;
+
+	if (!dev->mt || !active) {
+		input_event(dev, EV_ABS, ABS_MT_TRACKING_ID, -1);
+		return;
+	}
+
+	mt = &dev->mt[dev->slot];
+	id = input_mt_get_value(mt, ABS_MT_TRACKING_ID);
+	if (id < 0 || input_mt_get_value(mt, ABS_MT_TOOL_TYPE) != tool_type)
+		id = input_mt_new_trkid(dev);
+
+	input_event(dev, EV_ABS, ABS_MT_TRACKING_ID, id);
+	input_event(dev, EV_ABS, ABS_MT_TOOL_TYPE, tool_type);
+}
+EXPORT_SYMBOL(input_mt_report_slot_state);
+
+/**
+ * input_mt_report_finger_count() - report contact count
+ * @dev: input device with allocated MT slots
+ * @count: the number of contacts
+ *
+ * Reports the contact count via BTN_TOOL_FINGER, BTN_TOOL_DOUBLETAP,
+ * BTN_TOOL_TRIPLETAP and BTN_TOOL_QUADTAP.
+ *
+ * The input core ensures only the KEY events already setup for
+ * this device will produce output.
+ */
+void input_mt_report_finger_count(struct input_dev *dev, int count)
+{
+	input_event(dev, EV_KEY, BTN_TOOL_FINGER, count == 1);
+	input_event(dev, EV_KEY, BTN_TOOL_DOUBLETAP, count == 2);
+	input_event(dev, EV_KEY, BTN_TOOL_TRIPLETAP, count == 3);
+	input_event(dev, EV_KEY, BTN_TOOL_QUADTAP, count == 4);
+}
+EXPORT_SYMBOL(input_mt_report_finger_count);
+
+/**
+ * input_mt_report_pointer_emulation() - common pointer emulation
+ * @dev: input device with allocated MT slots
+ * @use_count: report number of active contacts as finger count
+ *
+ * Performs legacy pointer emulation via BTN_TOUCH, ABS_X, ABS_Y and
+ * ABS_PRESSURE. Touchpad finger count is emulated if use_count is true.
+ *
+ * The input core ensures only the KEY and ABS axes already setup for
+ * this device will produce output.
+ */
+void input_mt_report_pointer_emulation(struct input_dev *dev, bool use_count)
+{
+	struct input_mt_slot *oldest = 0;
+	int oldid = dev->trkid;
+	int count = 0;
+	int i;
+
+	for (i = 0; i < dev->mtsize; ++i) {
+		struct input_mt_slot *ps = &dev->mt[i];
+		int id = input_mt_get_value(ps, ABS_MT_TRACKING_ID);
+
+		if (id < 0)
+			continue;
+		if ((id - oldid) & TRKID_SGN) {
+			oldest = ps;
+			oldid = id;
+		}
+		count++;
+	}
+
+	input_event(dev, EV_KEY, BTN_TOUCH, count > 0);
+	if (use_count)
+		input_mt_report_finger_count(dev, count);
+
+	if (oldest) {
+		int x = input_mt_get_value(oldest, ABS_MT_POSITION_X);
+		int y = input_mt_get_value(oldest, ABS_MT_POSITION_Y);
+		int p = input_mt_get_value(oldest, ABS_MT_PRESSURE);
+
+		input_event(dev, EV_ABS, ABS_X, x);
+		input_event(dev, EV_ABS, ABS_Y, y);
+		input_event(dev, EV_ABS, ABS_PRESSURE, p);
+	} else {
+		input_event(dev, EV_ABS, ABS_PRESSURE, 0);
+	}
+}
+EXPORT_SYMBOL(input_mt_report_pointer_emulation);
diff --git a/drivers/input/tablet/wacom_wac.c b/drivers/input/tablet/wacom_wac.c
index f26e2238f6ca..0b0525486711 100644
--- a/drivers/input/tablet/wacom_wac.c
+++ b/drivers/input/tablet/wacom_wac.c
@@ -863,19 +863,21 @@ static int wacom_bpt_touch(struct wacom_wac *wacom)
 	struct wacom_features *features = &wacom->features;
 	struct input_dev *input = wacom->input;
 	unsigned char *data = wacom->data;
-	int sp = 0, sx = 0, sy = 0, count = 0;
 	int i;
 
 	for (i = 0; i < 2; i++) {
 		int p = data[9 * i + 2];
+		bool touch = p && !wacom->shared->stylus_in_proximity;
+
 		input_mt_slot(input, i);
+		input_mt_report_slot_state(input, MT_TOOL_FINGER, touch);
 		/*
 		 * Touch events need to be disabled while stylus is
 		 * in proximity because user's hand is resting on touchpad
 		 * and sending unwanted events.  User expects tablet buttons
 		 * to continue working though.
 		 */
-		if (p && !wacom->shared->stylus_in_proximity) {
+		if (touch) {
 			int x = get_unaligned_be16(&data[9 * i + 3]) & 0x7ff;
 			int y = get_unaligned_be16(&data[9 * i + 5]) & 0x7ff;
 			if (features->quirks & WACOM_QUIRK_BBTOUCH_LOWRES) {
@@ -885,23 +887,10 @@ static int wacom_bpt_touch(struct wacom_wac *wacom)
 			input_report_abs(input, ABS_MT_PRESSURE, p);
 			input_report_abs(input, ABS_MT_POSITION_X, x);
 			input_report_abs(input, ABS_MT_POSITION_Y, y);
-			if (wacom->id[i] < 0)
-				wacom->id[i] = wacom->trk_id++ & MAX_TRACKING_ID;
-			if (!count++)
-				sp = p, sx = x, sy = y;
-		} else {
-			wacom->id[i] = -1;
 		}
-		input_report_abs(input, ABS_MT_TRACKING_ID, wacom->id[i]);
 	}
 
-	input_report_key(input, BTN_TOUCH, count > 0);
-	input_report_key(input, BTN_TOOL_FINGER, count == 1);
-	input_report_key(input, BTN_TOOL_DOUBLETAP, count == 2);
-
-	input_report_abs(input, ABS_PRESSURE, sp);
-	input_report_abs(input, ABS_X, sx);
-	input_report_abs(input, ABS_Y, sy);
+	input_mt_report_pointer_emulation(input, true);
 
 	input_report_key(input, BTN_LEFT, (data[1] & 0x08) != 0);
 	input_report_key(input, BTN_FORWARD, (data[1] & 0x04) != 0);
@@ -1283,8 +1272,6 @@ void wacom_setup_input_capabilities(struct input_dev *input_dev,
 			input_set_abs_params(input_dev, ABS_MT_PRESSURE,
 					     0, features->pressure_max,
 					     features->pressure_fuzz, 0);
-			input_set_abs_params(input_dev, ABS_MT_TRACKING_ID, 0,
-					     MAX_TRACKING_ID, 0, 0);
 		} else if (features->device_type == BTN_TOOL_PEN) {
 			__set_bit(BTN_TOOL_RUBBER, input_dev->keybit);
 			__set_bit(BTN_TOOL_PEN, input_dev->keybit);
diff --git a/drivers/input/tablet/wacom_wac.h b/drivers/input/tablet/wacom_wac.h
index 00ca01541d89..b1310ec9720c 100644
--- a/drivers/input/tablet/wacom_wac.h
+++ b/drivers/input/tablet/wacom_wac.h
@@ -42,9 +42,6 @@
 #define WACOM_QUIRK_MULTI_INPUT		0x0001
 #define WACOM_QUIRK_BBTOUCH_LOWRES	0x0002
 
-/* largest reported tracking id */
-#define MAX_TRACKING_ID			0xfff
-
 enum {
 	PENPARTNER = 0,
 	GRAPHIRE,
@@ -100,7 +97,6 @@ struct wacom_wac {
 	int id[3];
 	__u32 serial[2];
 	int last_finger;
-	int trk_id;
 	struct wacom_features features;
 	struct wacom_shared *shared;
 	struct input_dev *input;
diff --git a/drivers/input/touchscreen/wacom_w8001.c b/drivers/input/touchscreen/wacom_w8001.c
index 4a2e8cf4c8ef..2a0bec12d12a 100644
--- a/drivers/input/touchscreen/wacom_w8001.c
+++ b/drivers/input/touchscreen/wacom_w8001.c
@@ -48,8 +48,6 @@ MODULE_LICENSE("GPL");
 #define W8001_PKTLEN_TPCCTL	11	/* control packet */
 #define W8001_PKTLEN_TOUCH2FG	13
 
-#define MAX_TRACKING_ID		0xFF	/* arbitrarily chosen */
-
 struct w8001_coord {
 	u8 rdy;
 	u8 tsw;
@@ -87,7 +85,6 @@ struct w8001 {
 	char phys[32];
 	int type;
 	unsigned int pktlen;
-	int trkid[2];
 };
 
 static void parse_data(u8 *data, struct w8001_coord *coord)
@@ -116,28 +113,23 @@ static void parse_data(u8 *data, struct w8001_coord *coord)
 
 static void parse_touch(struct w8001 *w8001)
 {
-	static int trkid;
 	struct input_dev *dev = w8001->dev;
 	unsigned char *data = w8001->data;
 	int i;
 
 	for (i = 0; i < 2; i++) {
-		input_mt_slot(dev, i);
+		bool touch = data[0] & (1 << i);
 
-		if (data[0] & (1 << i)) {
+		input_mt_slot(dev, i);
+		input_mt_report_slot_state(dev, MT_TOOL_FINGER, touch);
+		if (touch) {
 			int x = (data[6 * i + 1] << 7) | (data[6 * i + 2]);
 			int y = (data[6 * i + 3] << 7) | (data[6 * i + 4]);
 			/* data[5,6] and [11,12] is finger capacity */
 
 			input_report_abs(dev, ABS_MT_POSITION_X, x);
 			input_report_abs(dev, ABS_MT_POSITION_Y, y);
-			input_report_abs(dev, ABS_MT_TOOL_TYPE, MT_TOOL_FINGER);
-			if (w8001->trkid[i] < 0)
-				w8001->trkid[i] = trkid++ & MAX_TRACKING_ID;
-		} else {
-			w8001->trkid[i] = -1;
 		}
-		input_report_abs(dev, ABS_MT_TRACKING_ID, w8001->trkid[i]);
 	}
 
 	input_sync(dev);
@@ -319,14 +311,12 @@ static int w8001_setup(struct w8001 *w8001)
 			w8001->pktlen = W8001_PKTLEN_TOUCH2FG;
 
 			input_mt_init_slots(dev, 2);
-			input_set_abs_params(dev, ABS_MT_TRACKING_ID,
-						0, MAX_TRACKING_ID, 0, 0);
 			input_set_abs_params(dev, ABS_MT_POSITION_X,
 						0, touch.x, 0, 0);
 			input_set_abs_params(dev, ABS_MT_POSITION_Y,
 						0, touch.y, 0, 0);
 			input_set_abs_params(dev, ABS_MT_TOOL_TYPE,
-						0, 0, 0, 0);
+						0, MT_TOOL_MAX, 0, 0);
 			break;
 		}
 	}
@@ -372,7 +362,6 @@ static int w8001_connect(struct serio *serio, struct serio_driver *drv)
 	w8001->serio = serio;
 	w8001->id = serio->id.id;
 	w8001->dev = input_dev;
-	w8001->trkid[0] = w8001->trkid[1] = -1;
 	init_completion(&w8001->cmd_done);
 	snprintf(w8001->phys, sizeof(w8001->phys), "%s/input0", serio->phys);
 
diff --git a/include/linux/input.h b/include/linux/input.h
index 99e2a52c0509..6de145df4c1c 100644
--- a/include/linux/input.h
+++ b/include/linux/input.h
@@ -848,6 +848,7 @@ struct input_keymap_entry {
  */
 #define MT_TOOL_FINGER		0
 #define MT_TOOL_PEN		1
+#define MT_TOOL_MAX		1
 
 /*
  * Values describing the status of a force-feedback effect
@@ -1122,6 +1123,7 @@ struct ff_effect {
  *	of tracked contacts
  * @mtsize: number of MT slots the device uses
  * @slot: MT slot currently being transmitted
+ * @trkid: stores MT tracking ID for the current contact
  * @absinfo: array of &struct absinfo elements holding information
  *	about absolute axes (current value, min, max, flat, fuzz,
  *	resolution)
@@ -1206,6 +1208,7 @@ struct input_dev {
 	struct input_mt_slot *mt;
 	int mtsize;
 	int slot;
+	int trkid;
 
 	struct input_absinfo *absinfo;
 
diff --git a/include/linux/input/mt.h b/include/linux/input/mt.h
index d7f6518e3222..b3ac06a4435d 100644
--- a/include/linux/input/mt.h
+++ b/include/linux/input/mt.h
@@ -13,6 +13,8 @@
 
 #include <linux/input.h>
 
+#define TRKID_MAX	0xffff
+
 /**
  * struct input_mt_slot - represents the state of an input MT slot
  * @abs: holds current values of ABS_MT axes for this slot
@@ -36,9 +38,20 @@ static inline int input_mt_get_value(const struct input_mt_slot *slot,
 int input_mt_init_slots(struct input_dev *dev, unsigned int num_slots);
 void input_mt_destroy_slots(struct input_dev *dev);
 
+static inline int input_mt_new_trkid(struct input_dev *dev)
+{
+	return dev->trkid++ & TRKID_MAX;
+}
+
 static inline void input_mt_slot(struct input_dev *dev, int slot)
 {
 	input_event(dev, EV_ABS, ABS_MT_SLOT, slot);
 }
 
+void input_mt_report_slot_state(struct input_dev *dev,
+				unsigned int tool_type, bool active);
+
+void input_mt_report_finger_count(struct input_dev *dev, int count);
+void input_mt_report_pointer_emulation(struct input_dev *dev, bool use_count);
+
 #endif
-- 
cgit v1.2.3-71-gd317


From e42a98b520bb22535687ead3120e80edc268279a Mon Sep 17 00:00:00 2001
From: Henrik Rydberg <rydberg@euromail.se>
Date: Mon, 6 Dec 2010 10:05:43 +0100
Subject: input: mt: Add hovering distance axis

Touch devices capable of hovering, i.e., fingers detected a
distance from the surface, are not supported by the current
input MT protocol. This patch adds ABS_MT_DISTANCE, which may
be used to indicate the distance between the contact and the
surface.

Signed-off-by: Henrik Rydberg <rydberg@euromail.se>
---
 Documentation/input/multi-touch-protocol.txt | 9 ++++++++-
 include/linux/input.h                        | 3 ++-
 2 files changed, 10 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/input/multi-touch-protocol.txt b/Documentation/input/multi-touch-protocol.txt
index bdcba154b83e..07215fa0c588 100644
--- a/Documentation/input/multi-touch-protocol.txt
+++ b/Documentation/input/multi-touch-protocol.txt
@@ -161,7 +161,8 @@ against the glass. The inner region will increase, and in general, the
 ratio ABS_MT_TOUCH_MAJOR / ABS_MT_WIDTH_MAJOR, which is always smaller than
 unity, is related to the contact pressure. For pressure-based devices,
 ABS_MT_PRESSURE may be used to provide the pressure on the contact area
-instead.
+instead. Devices capable of contact hovering can use ABS_MT_DISTANCE to
+indicate the distance between the contact and the surface.
 
 In addition to the MAJOR parameters, the oval shape of the contact can be
 described by adding the MINOR parameters, such that MAJOR and MINOR are the
@@ -213,6 +214,12 @@ The pressure, in arbitrary units, on the contact area. May be used instead
 of TOUCH and WIDTH for pressure-based devices or any device with a spatial
 signal intensity distribution.
 
+ABS_MT_DISTANCE
+
+The distance, in surface units, between the contact and the surface. Zero
+distance means the contact is touching the surface. A positive number means
+the contact is hovering above the surface.
+
 ABS_MT_ORIENTATION
 
 The orientation of the ellipse. The value should describe a signed quarter
diff --git a/include/linux/input.h b/include/linux/input.h
index 6de145df4c1c..b3a1e02080c0 100644
--- a/include/linux/input.h
+++ b/include/linux/input.h
@@ -733,11 +733,12 @@ struct input_keymap_entry {
 #define ABS_MT_BLOB_ID		0x38	/* Group a set of packets as a blob */
 #define ABS_MT_TRACKING_ID	0x39	/* Unique ID of initiated contact */
 #define ABS_MT_PRESSURE		0x3a	/* Pressure on contact area */
+#define ABS_MT_DISTANCE		0x3b	/* Contact hover distance */
 
 #ifdef __KERNEL__
 /* Implementation details, userspace should not care about these */
 #define ABS_MT_FIRST		ABS_MT_TOUCH_MAJOR
-#define ABS_MT_LAST		ABS_MT_PRESSURE
+#define ABS_MT_LAST		ABS_MT_DISTANCE
 #endif
 
 #define ABS_MAX			0x3f
-- 
cgit v1.2.3-71-gd317


From 2e80a82a49c4c7eca4e35734380f28298ba5db19 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Wed, 17 Nov 2010 23:17:36 +0100
Subject: perf: Dynamic pmu types

Extend the perf_pmu_register() interface to allow for named and
dynamic pmu types.

Because we need to support the existing static types we cannot use
dynamic types for everything, hence provide a type argument.

If we want to enumerate the PMUs they need a name, provide one.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <20101117222056.259707703@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/alpha/kernel/perf_event.c           |  2 +-
 arch/arm/kernel/perf_event.c             |  2 +-
 arch/powerpc/kernel/perf_event.c         |  2 +-
 arch/powerpc/kernel/perf_event_fsl_emb.c |  2 +-
 arch/sh/kernel/perf_event.c              |  2 +-
 arch/sparc/kernel/perf_event.c           |  2 +-
 arch/x86/kernel/cpu/perf_event.c         |  2 +-
 include/linux/perf_event.h               |  5 +++-
 kernel/hw_breakpoint.c                   |  2 +-
 kernel/perf_event.c                      | 48 ++++++++++++++++++++++++++++----
 10 files changed, 54 insertions(+), 15 deletions(-)

(limited to 'include/linux')

diff --git a/arch/alpha/kernel/perf_event.c b/arch/alpha/kernel/perf_event.c
index 3283059b6e85..90561c45e7d8 100644
--- a/arch/alpha/kernel/perf_event.c
+++ b/arch/alpha/kernel/perf_event.c
@@ -882,7 +882,7 @@ int __init init_hw_perf_events(void)
 	/* And set up PMU specification */
 	alpha_pmu = &ev67_pmu;
 
-	perf_pmu_register(&pmu);
+	perf_pmu_register(&pmu, "cpu", PERF_TYPE_RAW);
 
 	return 0;
 }
diff --git a/arch/arm/kernel/perf_event.c b/arch/arm/kernel/perf_event.c
index d45f70e5f2ee..fdfa4976b0bf 100644
--- a/arch/arm/kernel/perf_event.c
+++ b/arch/arm/kernel/perf_event.c
@@ -3034,7 +3034,7 @@ init_hw_perf_events(void)
 		pr_info("no hardware support available\n");
 	}
 
-	perf_pmu_register(&pmu);
+	perf_pmu_register(&pmu, "cpu", PERF_TYPE_RAW);
 
 	return 0;
 }
diff --git a/arch/powerpc/kernel/perf_event.c b/arch/powerpc/kernel/perf_event.c
index 3129c855933c..567480705789 100644
--- a/arch/powerpc/kernel/perf_event.c
+++ b/arch/powerpc/kernel/perf_event.c
@@ -1379,7 +1379,7 @@ int register_power_pmu(struct power_pmu *pmu)
 		freeze_events_kernel = MMCR0_FCHV;
 #endif /* CONFIG_PPC64 */
 
-	perf_pmu_register(&power_pmu);
+	perf_pmu_register(&power_pmu, "cpu", PERF_TYPE_RAW);
 	perf_cpu_notifier(power_pmu_notifier);
 
 	return 0;
diff --git a/arch/powerpc/kernel/perf_event_fsl_emb.c b/arch/powerpc/kernel/perf_event_fsl_emb.c
index 7ecca59ddf77..4dcf5f831e9d 100644
--- a/arch/powerpc/kernel/perf_event_fsl_emb.c
+++ b/arch/powerpc/kernel/perf_event_fsl_emb.c
@@ -681,7 +681,7 @@ int register_fsl_emb_pmu(struct fsl_emb_pmu *pmu)
 	pr_info("%s performance monitor hardware support registered\n",
 		pmu->name);
 
-	perf_pmu_register(&fsl_emb_pmu);
+	perf_pmu_register(&fsl_emb_pmu, "cpu", PERF_TYPE_RAW);
 
 	return 0;
 }
diff --git a/arch/sh/kernel/perf_event.c b/arch/sh/kernel/perf_event.c
index 5a4b33435650..2ee21a47b5af 100644
--- a/arch/sh/kernel/perf_event.c
+++ b/arch/sh/kernel/perf_event.c
@@ -389,7 +389,7 @@ int __cpuinit register_sh_pmu(struct sh_pmu *_pmu)
 
 	WARN_ON(_pmu->num_events > MAX_HWEVENTS);
 
-	perf_pmu_register(&pmu);
+	perf_pmu_register(&pmu, "cpu", PERF_TYPE_RAW);
 	perf_cpu_notifier(sh_pmu_notifier);
 	return 0;
 }
diff --git a/arch/sparc/kernel/perf_event.c b/arch/sparc/kernel/perf_event.c
index 39348b1cebd3..760578687e7c 100644
--- a/arch/sparc/kernel/perf_event.c
+++ b/arch/sparc/kernel/perf_event.c
@@ -1318,7 +1318,7 @@ int __init init_hw_perf_events(void)
 
 	pr_cont("Supported PMU type is '%s'\n", sparc_pmu_type);
 
-	perf_pmu_register(&pmu);
+	perf_pmu_register(&pmu, "cpu", PERF_TYPE_RAW);
 	register_die_notifier(&perf_event_nmi_notifier);
 
 	return 0;
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index ce27c547fe78..0a360d146596 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -1465,7 +1465,7 @@ int __init init_hw_perf_events(void)
 	pr_info("... fixed-purpose events:   %d\n",     x86_pmu.num_counters_fixed);
 	pr_info("... event mask:             %016Lx\n", x86_pmu.intel_ctrl);
 
-	perf_pmu_register(&pmu);
+	perf_pmu_register(&pmu, "cpu", PERF_TYPE_RAW);
 	perf_cpu_notifier(x86_pmu_notifier);
 
 	return 0;
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 30e50e2c7f30..21206d27466b 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -588,6 +588,9 @@ struct perf_event;
 struct pmu {
 	struct list_head		entry;
 
+	char				*name;
+	int				type;
+
 	int * __percpu			pmu_disable_count;
 	struct perf_cpu_context * __percpu pmu_cpu_context;
 	int				task_ctx_nr;
@@ -916,7 +919,7 @@ struct perf_output_handle {
 
 #ifdef CONFIG_PERF_EVENTS
 
-extern int perf_pmu_register(struct pmu *pmu);
+extern int perf_pmu_register(struct pmu *pmu, char *name, int type);
 extern void perf_pmu_unregister(struct pmu *pmu);
 
 extern int perf_num_counters(void);
diff --git a/kernel/hw_breakpoint.c b/kernel/hw_breakpoint.c
index e5325825aeb6..086adf25a55e 100644
--- a/kernel/hw_breakpoint.c
+++ b/kernel/hw_breakpoint.c
@@ -641,7 +641,7 @@ int __init init_hw_breakpoint(void)
 
 	constraints_initialized = 1;
 
-	perf_pmu_register(&perf_breakpoint);
+	perf_pmu_register(&perf_breakpoint, "breakpoint", PERF_TYPE_BREAKPOINT);
 
 	return register_die_notifier(&hw_breakpoint_exceptions_nb);
 
diff --git a/kernel/perf_event.c b/kernel/perf_event.c
index a3d568fbacc6..8f09bc877a30 100644
--- a/kernel/perf_event.c
+++ b/kernel/perf_event.c
@@ -13,6 +13,7 @@
 #include <linux/mm.h>
 #include <linux/cpu.h>
 #include <linux/smp.h>
+#include <linux/idr.h>
 #include <linux/file.h>
 #include <linux/poll.h>
 #include <linux/slab.h>
@@ -4961,7 +4962,7 @@ static struct pmu perf_tracepoint = {
 
 static inline void perf_tp_register(void)
 {
-	perf_pmu_register(&perf_tracepoint);
+	perf_pmu_register(&perf_tracepoint, "tracepoint", PERF_TYPE_TRACEPOINT);
 }
 
 static int perf_event_set_filter(struct perf_event *event, void __user *arg)
@@ -5305,8 +5306,9 @@ static void free_pmu_context(struct pmu *pmu)
 out:
 	mutex_unlock(&pmus_lock);
 }
+static struct idr pmu_idr;
 
-int perf_pmu_register(struct pmu *pmu)
+int perf_pmu_register(struct pmu *pmu, char *name, int type)
 {
 	int cpu, ret;
 
@@ -5316,13 +5318,32 @@ int perf_pmu_register(struct pmu *pmu)
 	if (!pmu->pmu_disable_count)
 		goto unlock;
 
+	pmu->type = -1;
+	if (!name)
+		goto skip_type;
+	pmu->name = name;
+
+	if (type < 0) {
+		int err = idr_pre_get(&pmu_idr, GFP_KERNEL);
+		if (!err)
+			goto free_pdc;
+
+		err = idr_get_new_above(&pmu_idr, pmu, PERF_TYPE_MAX, &type);
+		if (err) {
+			ret = err;
+			goto free_pdc;
+		}
+	}
+	pmu->type = type;
+
+skip_type:
 	pmu->pmu_cpu_context = find_pmu_context(pmu->task_ctx_nr);
 	if (pmu->pmu_cpu_context)
 		goto got_cpu_context;
 
 	pmu->pmu_cpu_context = alloc_percpu(struct perf_cpu_context);
 	if (!pmu->pmu_cpu_context)
-		goto free_pdc;
+		goto free_ird;
 
 	for_each_possible_cpu(cpu) {
 		struct perf_cpu_context *cpuctx;
@@ -5366,6 +5387,10 @@ unlock:
 
 	return ret;
 
+free_idr:
+	if (pmu->type >= PERF_TYPE_MAX)
+		idr_remove(&pmu_idr, pmu->type);
+
 free_pdc:
 	free_percpu(pmu->pmu_disable_count);
 	goto unlock;
@@ -5385,6 +5410,8 @@ void perf_pmu_unregister(struct pmu *pmu)
 	synchronize_rcu();
 
 	free_percpu(pmu->pmu_disable_count);
+	if (pmu->type >= PERF_TYPE_MAX)
+		idr_remove(&pmu_idr, pmu->type);
 	free_pmu_context(pmu);
 }
 
@@ -5394,6 +5421,13 @@ struct pmu *perf_init_event(struct perf_event *event)
 	int idx;
 
 	idx = srcu_read_lock(&pmus_srcu);
+
+	rcu_read_lock();
+	pmu = idr_find(&pmu_idr, event->attr.type);
+	rcu_read_unlock();
+	if (pmu)
+		goto unlock;
+
 	list_for_each_entry_rcu(pmu, &pmus, entry) {
 		int ret = pmu->event_init(event);
 		if (!ret)
@@ -6555,11 +6589,13 @@ void __init perf_event_init(void)
 {
 	int ret;
 
+	idr_init(&pmu_idr);
+
 	perf_event_init_all_cpus();
 	init_srcu_struct(&pmus_srcu);
-	perf_pmu_register(&perf_swevent);
-	perf_pmu_register(&perf_cpu_clock);
-	perf_pmu_register(&perf_task_clock);
+	perf_pmu_register(&perf_swevent, "software", PERF_TYPE_SOFTWARE);
+	perf_pmu_register(&perf_cpu_clock, NULL, -1);
+	perf_pmu_register(&perf_task_clock, NULL, -1);
 	perf_tp_register();
 	perf_cpu_notifier(perf_cpu_notify);
 	register_reboot_notifier(&perf_reboot_notifier);
-- 
cgit v1.2.3-71-gd317


From abe43400579d5de0078c2d3a760e6598e183f871 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Wed, 17 Nov 2010 23:17:37 +0100
Subject: perf: Sysfs enumeration

Simple sysfs emumeration of the PMUs.

Use a "event_source" bus, and add PMU devices using their name.

Each PMU device has a type attribute which contrains the value needed
for perf_event_attr::type to identify this PMU.

This is the minimal stub needed to start using this interface,
we'll consider extending the sysfs usage later.

Cc: Kay Sievers <kay.sievers@vrfy.org>
Cc: Greg KH <gregkh@suse.de>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <20101117222056.316982569@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/perf_event.h |  1 +
 kernel/perf_event.c        | 95 +++++++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 95 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 21206d27466b..dda5b0a3ff60 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -588,6 +588,7 @@ struct perf_event;
 struct pmu {
 	struct list_head		entry;
 
+	struct device			*dev;
 	char				*name;
 	int				type;
 
diff --git a/kernel/perf_event.c b/kernel/perf_event.c
index 8f09bc877a30..11847bf1e8cc 100644
--- a/kernel/perf_event.c
+++ b/kernel/perf_event.c
@@ -24,6 +24,7 @@
 #include <linux/ptrace.h>
 #include <linux/reboot.h>
 #include <linux/vmstat.h>
+#include <linux/device.h>
 #include <linux/vmalloc.h>
 #include <linux/hardirq.h>
 #include <linux/rculist.h>
@@ -5308,6 +5309,58 @@ out:
 }
 static struct idr pmu_idr;
 
+static ssize_t
+type_show(struct device *dev, struct device_attribute *attr, char *page)
+{
+	struct pmu *pmu = dev_get_drvdata(dev);
+
+	return snprintf(page, PAGE_SIZE-1, "%d\n", pmu->type);
+}
+
+static struct device_attribute pmu_dev_attrs[] = {
+       __ATTR_RO(type),
+       __ATTR_NULL,
+};
+
+static int pmu_bus_running;
+static struct bus_type pmu_bus = {
+	.name		= "event_source",
+	.dev_attrs	= pmu_dev_attrs,
+};
+
+static void pmu_dev_release(struct device *dev)
+{
+	kfree(dev);
+}
+
+static int pmu_dev_alloc(struct pmu *pmu)
+{
+	int ret = -ENOMEM;
+
+	pmu->dev = kzalloc(sizeof(struct device), GFP_KERNEL);
+	if (!pmu->dev)
+		goto out;
+
+	device_initialize(pmu->dev);
+	ret = dev_set_name(pmu->dev, "%s", pmu->name);
+	if (ret)
+		goto free_dev;
+
+	dev_set_drvdata(pmu->dev, pmu);
+	pmu->dev->bus = &pmu_bus;
+	pmu->dev->release = pmu_dev_release;
+	ret = device_add(pmu->dev);
+	if (ret)
+		goto free_dev;
+
+out:
+	return ret;
+
+free_dev:
+	put_device(pmu->dev);
+	goto out;
+}
+
 int perf_pmu_register(struct pmu *pmu, char *name, int type)
 {
 	int cpu, ret;
@@ -5336,6 +5389,12 @@ int perf_pmu_register(struct pmu *pmu, char *name, int type)
 	}
 	pmu->type = type;
 
+	if (pmu_bus_running) {
+		ret = pmu_dev_alloc(pmu);
+		if (ret)
+			goto free_idr;
+	}
+
 skip_type:
 	pmu->pmu_cpu_context = find_pmu_context(pmu->task_ctx_nr);
 	if (pmu->pmu_cpu_context)
@@ -5343,7 +5402,7 @@ skip_type:
 
 	pmu->pmu_cpu_context = alloc_percpu(struct perf_cpu_context);
 	if (!pmu->pmu_cpu_context)
-		goto free_ird;
+		goto free_dev;
 
 	for_each_possible_cpu(cpu) {
 		struct perf_cpu_context *cpuctx;
@@ -5387,6 +5446,10 @@ unlock:
 
 	return ret;
 
+free_dev:
+	device_del(pmu->dev);
+	put_device(pmu->dev);
+
 free_idr:
 	if (pmu->type >= PERF_TYPE_MAX)
 		idr_remove(&pmu_idr, pmu->type);
@@ -5412,6 +5475,8 @@ void perf_pmu_unregister(struct pmu *pmu)
 	free_percpu(pmu->pmu_disable_count);
 	if (pmu->type >= PERF_TYPE_MAX)
 		idr_remove(&pmu_idr, pmu->type);
+	device_del(pmu->dev);
+	put_device(pmu->dev);
 	free_pmu_context(pmu);
 }
 
@@ -6603,3 +6668,31 @@ void __init perf_event_init(void)
 	ret = init_hw_breakpoint();
 	WARN(ret, "hw_breakpoint initialization failed with: %d", ret);
 }
+
+static int __init perf_event_sysfs_init(void)
+{
+	struct pmu *pmu;
+	int ret;
+
+	mutex_lock(&pmus_lock);
+
+	ret = bus_register(&pmu_bus);
+	if (ret)
+		goto unlock;
+
+	list_for_each_entry(pmu, &pmus, entry) {
+		if (!pmu->name || pmu->type < 0)
+			continue;
+
+		ret = pmu_dev_alloc(pmu);
+		WARN(ret, "Failed to register pmu: %s, reason %d\n", pmu->name, ret);
+	}
+	pmu_bus_running = 1;
+	ret = 0;
+
+unlock:
+	mutex_unlock(&pmus_lock);
+
+	return ret;
+}
+device_initcall(perf_event_sysfs_init);
-- 
cgit v1.2.3-71-gd317


From d9c407b138926132e1f93c01fb2dee50eb0bb615 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Tue, 14 Dec 2010 14:55:50 +0000
Subject: NFS: Introduce new-style XDR encoding functions for NFSv3

We're interested in taking advantage of the safety benefits of
xdr_streams.  These data structures allow more careful checking for
buffer overflow while encoding.  More careful type checking is also
introduced in the new functions.

For efficiency, we also eventually want to be able to pass xdr_streams
from call_encode() to all XDR encoding functions, rather than building
an xdr_stream in every XDR encoding function in the kernel.  To do
this means all encoders must be ready to handle a passed-in
xdr_stream.

The new encoders follow the modern paradigm for XDR encoders: BUG on
error, and always return a zero status code.

Static helper functions are left without the "inline" directive.  This
allows the compiler to choose automatically how to optimize these for
size or speed.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Tested-by: J. Bruce Fields <bfields@redhat.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/nfs3xdr.c     | 833 ++++++++++++++++++++++++++++++++++++++++++++++++++-
 include/linux/nfs3.h |   2 +
 2 files changed, 832 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c
index f6cc60f06dac..3d1043f7667c 100644
--- a/fs/nfs/nfs3xdr.c
+++ b/fs/nfs/nfs3xdr.c
@@ -37,6 +37,7 @@
 #define NFS3_filename_sz	(1+(NFS3_MAXNAMLEN>>2))
 #define NFS3_path_sz		(1+(NFS3_MAXPATHLEN>>2))
 #define NFS3_fattr_sz		(21)
+#define NFS3_cookieverf_sz	(NFS3_COOKIEVERFSIZE>>2)
 #define NFS3_wcc_attr_sz		(6)
 #define NFS3_pre_op_attr_sz	(1+NFS3_wcc_attr_sz)
 #define NFS3_post_op_attr_sz	(1+NFS3_fattr_sz)
@@ -59,7 +60,8 @@
 #define NFS3_mknodargs_sz	(NFS3_diropargs_sz+2+NFS3_sattr_sz)
 #define NFS3_renameargs_sz	(NFS3_diropargs_sz+NFS3_diropargs_sz)
 #define NFS3_linkargs_sz		(NFS3_fh_sz+NFS3_diropargs_sz)
-#define NFS3_readdirargs_sz	(NFS3_fh_sz+2)
+#define NFS3_readdirargs_sz	(NFS3_fh_sz+NFS3_cookieverf_sz+3)
+#define NFS3_readdirplusargs_sz	(NFS3_fh_sz+NFS3_cookieverf_sz+4)
 #define NFS3_commitargs_sz	(NFS3_fh_sz+3)
 
 #define NFS3_attrstat_sz	(1+NFS3_fattr_sz)
@@ -107,6 +109,22 @@ static void print_overflow_msg(const char *func, const struct xdr_stream *xdr)
 		func, xdr->end - xdr->p);
 }
 
+/*
+ * While encoding arguments, set up the reply buffer in advance to
+ * receive reply data directly into the page cache.
+ */
+static void prepare_reply_buffer(struct rpc_rqst *req, struct page **pages,
+				 unsigned int base, unsigned int len,
+				 unsigned int bufsize)
+{
+	struct rpc_auth	*auth = req->rq_cred->cr_auth;
+	unsigned int replen;
+
+	replen = RPC_REPHDRSIZE + auth->au_rslack + bufsize;
+	xdr_inline_pages(&req->rq_rcv_buf, replen << 2, pages, base, len);
+}
+
+
 /*
  * Common NFS XDR functions as inlines
  */
@@ -153,7 +171,7 @@ out_overflow:
  * Encode/decode time.
  */
 static inline __be32 *
-xdr_encode_time3(__be32 *p, struct timespec *timep)
+xdr_encode_time3(__be32 *p, const struct timespec *timep)
 {
 	*p++ = htonl(timep->tv_sec);
 	*p++ = htonl(timep->tv_nsec);
@@ -205,7 +223,7 @@ xdr_decode_fattr(__be32 *p, struct nfs_fattr *fattr)
 }
 
 static inline __be32 *
-xdr_encode_sattr(__be32 *p, struct iattr *attr)
+xdr_encode_sattr(__be32 *p, const struct iattr *attr)
 {
 	if (attr->ia_valid & ATTR_MODE) {
 		*p++ = xdr_one;
@@ -306,6 +324,243 @@ xdr_decode_wcc_data(__be32 *p, struct nfs_fattr *fattr)
 	return xdr_decode_post_op_attr(p, fattr);
 }
 
+
+/*
+ * Encode/decode NFSv3 basic data types
+ *
+ * Basic NFSv3 data types are defined in section 2.5 of RFC 1813:
+ * "NFS Version 3 Protocol Specification".
+ *
+ * Not all basic data types have their own encoding and decoding
+ * functions.  For run-time efficiency, some data types are encoded
+ * or decoded inline.
+ */
+
+static void encode_uint32(struct xdr_stream *xdr, u32 value)
+{
+	__be32 *p = xdr_reserve_space(xdr, 4);
+	*p = cpu_to_be32(value);
+}
+
+/*
+ * filename3
+ *
+ *	typedef string filename3<>;
+ */
+static void encode_filename3(struct xdr_stream *xdr,
+			     const char *name, u32 length)
+{
+	__be32 *p;
+
+	BUG_ON(length > NFS3_MAXNAMLEN);
+	p = xdr_reserve_space(xdr, 4 + length);
+	xdr_encode_opaque(p, name, length);
+}
+
+/*
+ * nfspath3
+ *
+ *	typedef string nfspath3<>;
+ */
+static void encode_nfspath3(struct xdr_stream *xdr, struct page **pages,
+			    const u32 length)
+{
+	BUG_ON(length > NFS3_MAXPATHLEN);
+	encode_uint32(xdr, length);
+	xdr_write_pages(xdr, pages, 0, length);
+}
+
+/*
+ * cookie3
+ *
+ *	typedef uint64 cookie3
+ */
+static __be32 *xdr_encode_cookie3(__be32 *p, u64 cookie)
+{
+	return xdr_encode_hyper(p, cookie);
+}
+
+/*
+ * cookieverf3
+ *
+ *	typedef opaque cookieverf3[NFS3_COOKIEVERFSIZE];
+ */
+static __be32 *xdr_encode_cookieverf3(__be32 *p, const __be32 *verifier)
+{
+	memcpy(p, verifier, NFS3_COOKIEVERFSIZE);
+	return p + XDR_QUADLEN(NFS3_COOKIEVERFSIZE);
+}
+
+/*
+ * createverf3
+ *
+ *	typedef opaque createverf3[NFS3_CREATEVERFSIZE];
+ */
+static void encode_createverf3(struct xdr_stream *xdr, const __be32 *verifier)
+{
+	__be32 *p;
+
+	p = xdr_reserve_space(xdr, NFS3_CREATEVERFSIZE);
+	memcpy(p, verifier, NFS3_CREATEVERFSIZE);
+}
+
+/*
+ * ftype3
+ *
+ *	enum ftype3 {
+ *		NF3REG	= 1,
+ *		NF3DIR	= 2,
+ *		NF3BLK	= 3,
+ *		NF3CHR	= 4,
+ *		NF3LNK	= 5,
+ *		NF3SOCK	= 6,
+ *		NF3FIFO	= 7
+ *	};
+ */
+static void encode_ftype3(struct xdr_stream *xdr, const u32 type)
+{
+	BUG_ON(type > NF3FIFO);
+	encode_uint32(xdr, type);
+}
+
+/*
+ * specdata3
+ *
+ *     struct specdata3 {
+ *             uint32  specdata1;
+ *             uint32  specdata2;
+ *     };
+ */
+static void encode_specdata3(struct xdr_stream *xdr, const dev_t rdev)
+{
+	__be32 *p;
+
+	p = xdr_reserve_space(xdr, 8);
+	*p++ = cpu_to_be32(MAJOR(rdev));
+	*p = cpu_to_be32(MINOR(rdev));
+}
+
+/*
+ * nfs_fh3
+ *
+ *	struct nfs_fh3 {
+ *		opaque       data<NFS3_FHSIZE>;
+ *	};
+ */
+static void encode_nfs_fh3(struct xdr_stream *xdr, const struct nfs_fh *fh)
+{
+	__be32 *p;
+
+	BUG_ON(fh->size > NFS3_FHSIZE);
+	p = xdr_reserve_space(xdr, 4 + fh->size);
+	xdr_encode_opaque(p, fh->data, fh->size);
+}
+
+/*
+ * sattr3
+ *
+ *	enum time_how {
+ *		DONT_CHANGE		= 0,
+ *		SET_TO_SERVER_TIME	= 1,
+ *		SET_TO_CLIENT_TIME	= 2
+ *	};
+ *
+ *	union set_mode3 switch (bool set_it) {
+ *	case TRUE:
+ *		mode3	mode;
+ *	default:
+ *		void;
+ *	};
+ *
+ *	union set_uid3 switch (bool set_it) {
+ *	case TRUE:
+ *		uid3	uid;
+ *	default:
+ *		void;
+ *	};
+ *
+ *	union set_gid3 switch (bool set_it) {
+ *	case TRUE:
+ *		gid3	gid;
+ *	default:
+ *		void;
+ *	};
+ *
+ *	union set_size3 switch (bool set_it) {
+ *	case TRUE:
+ *		size3	size;
+ *	default:
+ *		void;
+ *	};
+ *
+ *	union set_atime switch (time_how set_it) {
+ *	case SET_TO_CLIENT_TIME:
+ *		nfstime3	atime;
+ *	default:
+ *		void;
+ *	};
+ *
+ *	union set_mtime switch (time_how set_it) {
+ *	case SET_TO_CLIENT_TIME:
+ *		nfstime3  mtime;
+ *	default:
+ *		void;
+ *	};
+ *
+ *	struct sattr3 {
+ *		set_mode3	mode;
+ *		set_uid3	uid;
+ *		set_gid3	gid;
+ *		set_size3	size;
+ *		set_atime	atime;
+ *		set_mtime	mtime;
+ *	};
+ */
+static void encode_sattr3(struct xdr_stream *xdr, const struct iattr *attr)
+{
+	u32 nbytes;
+	__be32 *p;
+
+	/*
+	 * In order to make only a single xdr_reserve_space() call,
+	 * pre-compute the total number of bytes to be reserved.
+	 * Six boolean values, one for each set_foo field, are always
+	 * present in the encoded result, so start there.
+	 */
+	nbytes = 6 * 4;
+	if (attr->ia_valid & ATTR_MODE)
+		nbytes += 4;
+	if (attr->ia_valid & ATTR_UID)
+		nbytes += 4;
+	if (attr->ia_valid & ATTR_GID)
+		nbytes += 4;
+	if (attr->ia_valid & ATTR_SIZE)
+		nbytes += 8;
+	if (attr->ia_valid & ATTR_ATIME_SET)
+		nbytes += 8;
+	if (attr->ia_valid & ATTR_MTIME_SET)
+		nbytes += 8;
+	p = xdr_reserve_space(xdr, nbytes);
+
+	xdr_encode_sattr(p, attr);
+}
+
+/*
+ * diropargs3
+ *
+ *	struct diropargs3 {
+ *		nfs_fh3		dir;
+ *		filename3	name;
+ *	};
+ */
+static void encode_diropargs3(struct xdr_stream *xdr, const struct nfs_fh *fh,
+			      const char *name, u32 length)
+{
+	encode_nfs_fh3(xdr, fh);
+	encode_filename3(xdr, name, length);
+}
+
+
 /*
  * NFS encode functions
  */
@@ -321,6 +576,23 @@ nfs3_xdr_fhandle(struct rpc_rqst *req, __be32 *p, struct nfs_fh *fh)
 	return 0;
 }
 
+/*
+ * 3.3.1  GETATTR3args
+ *
+ *	struct GETATTR3args {
+ *		nfs_fh3  object;
+ *	};
+ */
+static int nfs3_xdr_enc_getattr3args(struct rpc_rqst *req, __be32 *p,
+				     const struct nfs_fh *fh)
+{
+	struct xdr_stream xdr;
+
+	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
+	encode_nfs_fh3(&xdr, fh);
+	return 0;
+}
+
 /*
  * Encode SETATTR arguments
  */
@@ -336,6 +608,49 @@ nfs3_xdr_sattrargs(struct rpc_rqst *req, __be32 *p, struct nfs3_sattrargs *args)
 	return 0;
 }
 
+/*
+ * 3.3.2  SETATTR3args
+ *
+ *	union sattrguard3 switch (bool check) {
+ *	case TRUE:
+ *		nfstime3  obj_ctime;
+ *	case FALSE:
+ *		void;
+ *	};
+ *
+ *	struct SETATTR3args {
+ *		nfs_fh3		object;
+ *		sattr3		new_attributes;
+ *		sattrguard3	guard;
+ *	};
+ */
+static void encode_sattrguard3(struct xdr_stream *xdr,
+			       const struct nfs3_sattrargs *args)
+{
+	__be32 *p;
+
+	if (args->guard) {
+		p = xdr_reserve_space(xdr, 4 + 8);
+		*p++ = xdr_one;
+		xdr_encode_time3(p, &args->guardtime);
+	} else {
+		p = xdr_reserve_space(xdr, 4);
+		*p = xdr_zero;
+	}
+}
+
+static int nfs3_xdr_enc_setattr3args(struct rpc_rqst *req, __be32 *p,
+				     const struct nfs3_sattrargs *args)
+{
+	struct xdr_stream xdr;
+
+	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
+	encode_nfs_fh3(&xdr, args->fh);
+	encode_sattr3(&xdr, args->sattr);
+	encode_sattrguard3(&xdr, args);
+	return 0;
+}
+
 /*
  * Encode directory ops argument
  */
@@ -348,6 +663,23 @@ nfs3_xdr_diropargs(struct rpc_rqst *req, __be32 *p, struct nfs3_diropargs *args)
 	return 0;
 }
 
+/*
+ * 3.3.3  LOOKUP3args
+ *
+ *	struct LOOKUP3args {
+ *		diropargs3  what;
+ *	};
+ */
+static int nfs3_xdr_enc_lookup3args(struct rpc_rqst *req, __be32 *p,
+				    const struct nfs3_diropargs *args)
+{
+	struct xdr_stream xdr;
+
+	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
+	encode_diropargs3(&xdr, args->fh, args->name, args->len);
+	return 0;
+}
+
 /*
  * Encode REMOVE argument
  */
@@ -372,6 +704,50 @@ nfs3_xdr_accessargs(struct rpc_rqst *req, __be32 *p, struct nfs3_accessargs *arg
 	return 0;
 }
 
+/*
+ * 3.3.4  ACCESS3args
+ *
+ *	struct ACCESS3args {
+ *		nfs_fh3		object;
+ *		uint32		access;
+ *	};
+ */
+static void encode_access3args(struct xdr_stream *xdr,
+			       const struct nfs3_accessargs *args)
+{
+	encode_nfs_fh3(xdr, args->fh);
+	encode_uint32(xdr, args->access);
+}
+
+static int nfs3_xdr_enc_access3args(struct rpc_rqst *req, __be32 *p,
+				    const struct nfs3_accessargs *args)
+{
+	struct xdr_stream xdr;
+
+	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
+	encode_access3args(&xdr, args);
+	return 0;
+}
+
+/*
+ * 3.3.5  READLINK3args
+ *
+ *	struct READLINK3args {
+ *		nfs_fh3	symlink;
+ *	};
+ */
+static int nfs3_xdr_enc_readlink3args(struct rpc_rqst *req, __be32 *p,
+				      const struct nfs3_readlinkargs *args)
+{
+	struct xdr_stream xdr;
+
+	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
+	encode_nfs_fh3(&xdr, args->fh);
+	prepare_reply_buffer(req, args->pages, args->pgbase,
+					args->pglen, NFS3_readlinkres_sz);
+	return 0;
+}
+
 /*
  * Arguments to a READ call. Since we read data directly into the page
  * cache, we also set up the reply iovec here so that iov[1] points
@@ -397,6 +773,40 @@ nfs3_xdr_readargs(struct rpc_rqst *req, __be32 *p, struct nfs_readargs *args)
 	return 0;
 }
 
+/*
+ * 3.3.6  READ3args
+ *
+ *	struct READ3args {
+ *		nfs_fh3		file;
+ *		offset3		offset;
+ *		count3		count;
+ *	};
+ */
+static void encode_read3args(struct xdr_stream *xdr,
+			     const struct nfs_readargs *args)
+{
+	__be32 *p;
+
+	encode_nfs_fh3(xdr, args->fh);
+
+	p = xdr_reserve_space(xdr, 8 + 4);
+	p = xdr_encode_hyper(p, args->offset);
+	*p = cpu_to_be32(args->count);
+}
+
+static int nfs3_xdr_enc_read3args(struct rpc_rqst *req, __be32 *p,
+				  const struct nfs_readargs *args)
+{
+	struct xdr_stream xdr;
+
+	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
+	encode_read3args(&xdr, args);
+	prepare_reply_buffer(req, args->pages, args->pgbase,
+					args->count, NFS3_readres_sz);
+	req->rq_rcv_buf.flags |= XDRBUF_READ;
+	return 0;
+}
+
 /*
  * Write arguments. Splice the buffer to be written into the iovec.
  */
@@ -419,6 +829,52 @@ nfs3_xdr_writeargs(struct rpc_rqst *req, __be32 *p, struct nfs_writeargs *args)
 	return 0;
 }
 
+/*
+ * 3.3.7  WRITE3args
+ *
+ *	enum stable_how {
+ *		UNSTABLE  = 0,
+ *		DATA_SYNC = 1,
+ *		FILE_SYNC = 2
+ *	};
+ *
+ *	struct WRITE3args {
+ *		nfs_fh3		file;
+ *		offset3		offset;
+ *		count3		count;
+ *		stable_how	stable;
+ *		opaque		data<>;
+ *	};
+ */
+static void encode_write3args(struct xdr_stream *xdr,
+			      const struct nfs_writeargs *args)
+{
+	__be32 *p;
+
+	encode_nfs_fh3(xdr, args->fh);
+
+	p = xdr_reserve_space(xdr, 8 + 4 + 4 + 4);
+	p = xdr_encode_hyper(p, args->offset);
+	*p++ = cpu_to_be32(args->count);
+
+	BUG_ON(args->stable > NFS_FILE_SYNC);
+	*p++ = cpu_to_be32(args->stable);
+
+	*p = cpu_to_be32(args->count);
+	xdr_write_pages(xdr, args->pages, args->pgbase, args->count);
+}
+
+static int nfs3_xdr_enc_write3args(struct rpc_rqst *req, __be32 *p,
+				   const struct nfs_writeargs *args)
+{
+	struct xdr_stream xdr;
+
+	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
+	encode_write3args(&xdr, args);
+	xdr.buf->flags |= XDRBUF_WRITE;
+	return 0;
+}
+
 /*
  * Encode CREATE arguments
  */
@@ -439,6 +895,56 @@ nfs3_xdr_createargs(struct rpc_rqst *req, __be32 *p, struct nfs3_createargs *arg
 	return 0;
 }
 
+/*
+ * 3.3.8  CREATE3args
+ *
+ *	enum createmode3 {
+ *		UNCHECKED = 0,
+ *		GUARDED   = 1,
+ *		EXCLUSIVE = 2
+ *	};
+ *
+ *	union createhow3 switch (createmode3 mode) {
+ *	case UNCHECKED:
+ *	case GUARDED:
+ *		sattr3       obj_attributes;
+ *	case EXCLUSIVE:
+ *		createverf3  verf;
+ *	};
+ *
+ *	struct CREATE3args {
+ *		diropargs3	where;
+ *		createhow3	how;
+ *	};
+ */
+static void encode_createhow3(struct xdr_stream *xdr,
+			      const struct nfs3_createargs *args)
+{
+	encode_uint32(xdr, args->createmode);
+	switch (args->createmode) {
+	case NFS3_CREATE_UNCHECKED:
+	case NFS3_CREATE_GUARDED:
+		encode_sattr3(xdr, args->sattr);
+		break;
+	case NFS3_CREATE_EXCLUSIVE:
+		encode_createverf3(xdr, args->verifier);
+		break;
+	default:
+		BUG();
+	}
+}
+
+static int nfs3_xdr_enc_create3args(struct rpc_rqst *req, __be32 *p,
+				    const struct nfs3_createargs *args)
+{
+	struct xdr_stream xdr;
+
+	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
+	encode_diropargs3(&xdr, args->fh, args->name, args->len);
+	encode_createhow3(&xdr, args);
+	return 0;
+}
+
 /*
  * Encode MKDIR arguments
  */
@@ -452,6 +958,25 @@ nfs3_xdr_mkdirargs(struct rpc_rqst *req, __be32 *p, struct nfs3_mkdirargs *args)
 	return 0;
 }
 
+/*
+ * 3.3.9  MKDIR3args
+ *
+ *	struct MKDIR3args {
+ *		diropargs3	where;
+ *		sattr3		attributes;
+ *	};
+ */
+static int nfs3_xdr_enc_mkdir3args(struct rpc_rqst *req, __be32 *p,
+				   const struct nfs3_mkdirargs *args)
+{
+	struct xdr_stream xdr;
+
+	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
+	encode_diropargs3(&xdr, args->fh, args->name, args->len);
+	encode_sattr3(&xdr, args->sattr);
+	return 0;
+}
+
 /*
  * Encode SYMLINK arguments
  */
@@ -469,6 +994,37 @@ nfs3_xdr_symlinkargs(struct rpc_rqst *req, __be32 *p, struct nfs3_symlinkargs *a
 	return 0;
 }
 
+/*
+ * 3.3.10  SYMLINK3args
+ *
+ *	struct symlinkdata3 {
+ *		sattr3		symlink_attributes;
+ *		nfspath3	symlink_data;
+ *	};
+ *
+ *	struct SYMLINK3args {
+ *		diropargs3	where;
+ *		symlinkdata3	symlink;
+ *	};
+ */
+static void encode_symlinkdata3(struct xdr_stream *xdr,
+				const struct nfs3_symlinkargs *args)
+{
+	encode_sattr3(xdr, args->sattr);
+	encode_nfspath3(xdr, args->pages, args->pathlen);
+}
+
+static int nfs3_xdr_enc_symlink3args(struct rpc_rqst *req, __be32 *p,
+				     const struct nfs3_symlinkargs *args)
+{
+	struct xdr_stream xdr;
+
+	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
+	encode_diropargs3(&xdr, args->fromfh, args->fromname, args->fromlen);
+	encode_symlinkdata3(&xdr, args);
+	return 0;
+}
+
 /*
  * Encode MKNOD arguments
  */
@@ -488,6 +1044,86 @@ nfs3_xdr_mknodargs(struct rpc_rqst *req, __be32 *p, struct nfs3_mknodargs *args)
 	return 0;
 }
 
+/*
+ * 3.3.11  MKNOD3args
+ *
+ *	struct devicedata3 {
+ *		sattr3		dev_attributes;
+ *		specdata3	spec;
+ *	};
+ *
+ *	union mknoddata3 switch (ftype3 type) {
+ *	case NF3CHR:
+ *	case NF3BLK:
+ *		devicedata3	device;
+ *	case NF3SOCK:
+ *	case NF3FIFO:
+ *		sattr3		pipe_attributes;
+ *	default:
+ *		void;
+ *	};
+ *
+ *	struct MKNOD3args {
+ *		diropargs3	where;
+ *		mknoddata3	what;
+ *	};
+ */
+static void encode_devicedata3(struct xdr_stream *xdr,
+			       const struct nfs3_mknodargs *args)
+{
+	encode_sattr3(xdr, args->sattr);
+	encode_specdata3(xdr, args->rdev);
+}
+
+static void encode_mknoddata3(struct xdr_stream *xdr,
+			      const struct nfs3_mknodargs *args)
+{
+	encode_ftype3(xdr, args->type);
+	switch (args->type) {
+	case NF3CHR:
+	case NF3BLK:
+		encode_devicedata3(xdr, args);
+		break;
+	case NF3SOCK:
+	case NF3FIFO:
+		encode_sattr3(xdr, args->sattr);
+		break;
+	case NF3REG:
+	case NF3DIR:
+		break;
+	default:
+		BUG();
+	}
+}
+
+static int nfs3_xdr_enc_mknod3args(struct rpc_rqst *req, __be32 *p,
+				   const struct nfs3_mknodargs *args)
+{
+	struct xdr_stream xdr;
+
+	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
+	encode_diropargs3(&xdr, args->fh, args->name, args->len);
+	encode_mknoddata3(&xdr, args);
+	return 0;
+}
+
+/*
+ * 3.3.12  REMOVE3args
+ *
+ *	struct REMOVE3args {
+ *		diropargs3  object;
+ *	};
+ */
+static int nfs3_xdr_enc_remove3args(struct rpc_rqst *req, __be32 *p,
+				    const struct nfs_removeargs *args)
+{
+	struct xdr_stream xdr;
+
+	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
+	encode_diropargs3(&xdr, args->fh, args->name.name, args->name.len);
+	return 0;
+}
+
 /*
  * Encode RENAME arguments
  */
@@ -502,6 +1138,27 @@ nfs3_xdr_renameargs(struct rpc_rqst *req, __be32 *p, struct nfs_renameargs *args
 	return 0;
 }
 
+/*
+ * 3.3.14  RENAME3args
+ *
+ *	struct RENAME3args {
+ *		diropargs3	from;
+ *		diropargs3	to;
+ *	};
+ */
+static int nfs3_xdr_enc_rename3args(struct rpc_rqst *req, __be32 *p,
+				    const struct nfs_renameargs *args)
+{
+	const struct qstr *old = args->old_name;
+	const struct qstr *new = args->new_name;
+	struct xdr_stream xdr;
+
+	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
+	encode_diropargs3(&xdr, args->old_dir, old->name, old->len);
+	encode_diropargs3(&xdr, args->new_dir, new->name, new->len);
+	return 0;
+}
+
 /*
  * Encode LINK arguments
  */
@@ -515,6 +1172,25 @@ nfs3_xdr_linkargs(struct rpc_rqst *req, __be32 *p, struct nfs3_linkargs *args)
 	return 0;
 }
 
+/*
+ * 3.3.15  LINK3args
+ *
+ *	struct LINK3args {
+ *		nfs_fh3		file;
+ *		diropargs3	link;
+ *	};
+ */
+static int nfs3_xdr_enc_link3args(struct rpc_rqst *req, __be32 *p,
+				  const struct nfs3_linkargs *args)
+{
+	struct xdr_stream xdr;
+
+	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
+	encode_nfs_fh3(&xdr, args->fromfh);
+	encode_diropargs3(&xdr, args->tofh, args->toname, args->tolen);
+	return 0;
+}
+
 /*
  * Encode arguments to readdir call
  */
@@ -543,6 +1219,84 @@ nfs3_xdr_readdirargs(struct rpc_rqst *req, __be32 *p, struct nfs3_readdirargs *a
 	return 0;
 }
 
+/*
+ * 3.3.16  READDIR3args
+ *
+ *	struct READDIR3args {
+ *		nfs_fh3		dir;
+ *		cookie3		cookie;
+ *		cookieverf3	cookieverf;
+ *		count3		count;
+ *	};
+ */
+static void encode_readdir3args(struct xdr_stream *xdr,
+				const struct nfs3_readdirargs *args)
+{
+	__be32 *p;
+
+	encode_nfs_fh3(xdr, args->fh);
+
+	p = xdr_reserve_space(xdr, 8 + NFS3_COOKIEVERFSIZE + 4);
+	p = xdr_encode_cookie3(p, args->cookie);
+	p = xdr_encode_cookieverf3(p, args->verf);
+	*p = cpu_to_be32(args->count);
+}
+
+static int nfs3_xdr_enc_readdir3args(struct rpc_rqst *req, __be32 *p,
+				     const struct nfs3_readdirargs *args)
+{
+	struct xdr_stream xdr;
+
+	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
+	encode_readdir3args(&xdr, args);
+	prepare_reply_buffer(req, args->pages, 0,
+				args->count, NFS3_readdirres_sz);
+	return 0;
+}
+
+/*
+ * 3.3.17  READDIRPLUS3args
+ *
+ *	struct READDIRPLUS3args {
+ *		nfs_fh3		dir;
+ *		cookie3		cookie;
+ *		cookieverf3	cookieverf;
+ *		count3		dircount;
+ *		count3		maxcount;
+ *	};
+ */
+static void encode_readdirplus3args(struct xdr_stream *xdr,
+				    const struct nfs3_readdirargs *args)
+{
+	__be32 *p;
+
+	encode_nfs_fh3(xdr, args->fh);
+
+	p = xdr_reserve_space(xdr, 8 + NFS3_COOKIEVERFSIZE + 4 + 4);
+	p = xdr_encode_cookie3(p, args->cookie);
+	p = xdr_encode_cookieverf3(p, args->verf);
+
+	/*
+	 * readdirplus: need dircount + buffer size.
+	 * We just make sure we make dircount big enough
+	 */
+	*p++ = cpu_to_be32(args->count >> 3);
+
+	*p = cpu_to_be32(args->count);
+}
+
+static int nfs3_xdr_enc_readdirplus3args(struct rpc_rqst *req, __be32 *p,
+					 const struct nfs3_readdirargs *args)
+{
+	struct xdr_stream xdr;
+
+	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
+	encode_readdirplus3args(&xdr, args);
+	prepare_reply_buffer(req, args->pages, 0,
+				args->count, NFS3_readdirres_sz);
+	return 0;
+}
+
 /*
  * Decode the result of a readdir call.
  * We just check for syntactical correctness.
@@ -674,6 +1428,37 @@ nfs3_xdr_commitargs(struct rpc_rqst *req, __be32 *p, struct nfs_writeargs *args)
 	return 0;
 }
 
+/*
+ * 3.3.21  COMMIT3args
+ *
+ *	struct COMMIT3args {
+ *		nfs_fh3		file;
+ *		offset3		offset;
+ *		count3		count;
+ *	};
+ */
+static void encode_commit3args(struct xdr_stream *xdr,
+			       const struct nfs_writeargs *args)
+{
+	__be32 *p;
+
+	encode_nfs_fh3(xdr, args->fh);
+
+	p = xdr_reserve_space(xdr, 8 + 4);
+	p = xdr_encode_hyper(p, args->offset);
+	*p = cpu_to_be32(args->count);
+}
+
+static int nfs3_xdr_enc_commit3args(struct rpc_rqst *req, __be32 *p,
+				    const struct nfs_writeargs *args)
+{
+	struct xdr_stream xdr;
+
+	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
+	encode_commit3args(&xdr, args);
+	return 0;
+}
+
 #ifdef CONFIG_NFS_V3_ACL
 /*
  * Encode GETACL arguments
@@ -699,6 +1484,21 @@ nfs3_xdr_getaclargs(struct rpc_rqst *req, __be32 *p,
 	return 0;
 }
 
+static int nfs3_xdr_enc_getacl3args(struct rpc_rqst *req, __be32 *p,
+				    const struct nfs3_getaclargs *args)
+{
+	struct xdr_stream xdr;
+
+	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
+	encode_nfs_fh3(&xdr, args->fh);
+	encode_uint32(&xdr, args->mask);
+	if (args->mask & (NFS_ACL | NFS_DFACL))
+		prepare_reply_buffer(req, args->pages, 0,
+					NFSACL_MAXPAGES << PAGE_SHIFT,
+					ACL3_getaclres_sz);
+	return 0;
+}
+
 /*
  * Encode SETACL arguments
  */
@@ -731,6 +1531,33 @@ nfs3_xdr_setaclargs(struct rpc_rqst *req, __be32 *p,
 				    NFS_ACL_DEFAULT);
 	return (err > 0) ? 0 : err;
 }
+
+static int nfs3_xdr_enc_setacl3args(struct rpc_rqst *req, __be32 *p,
+				    const struct nfs3_setaclargs *args)
+{
+	struct xdr_stream xdr;
+	unsigned int base;
+	int error;
+
+	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
+	encode_nfs_fh3(&xdr, NFS_FH(args->inode));
+	encode_uint32(&xdr, args->mask);
+	if (args->npages != 0)
+		xdr_write_pages(&xdr, args->pages, 0, args->len);
+
+	base = req->rq_slen;
+	error = nfsacl_encode(xdr.buf, base, args->inode,
+			    (args->mask & NFS_ACL) ?
+			    args->acl_access : NULL, 1, 0);
+	BUG_ON(error < 0);
+	error = nfsacl_encode(xdr.buf, base + error, args->inode,
+			    (args->mask & NFS_DFACL) ?
+			    args->acl_default : NULL, 1,
+			    NFS_ACL_DEFAULT);
+	BUG_ON(error < 0);
+	return 0;
+}
+
 #endif  /* CONFIG_NFS_V3_ACL */
 
 /*
diff --git a/include/linux/nfs3.h b/include/linux/nfs3.h
index ac33806ec7f9..e2ee14fd0083 100644
--- a/include/linux/nfs3.h
+++ b/include/linux/nfs3.h
@@ -11,6 +11,8 @@
 #define NFS3_MAXGROUPS		16
 #define NFS3_FHSIZE		64
 #define NFS3_COOKIESIZE		4
+#define NFS3_CREATEVERFSIZE	8
+#define NFS3_COOKIEVERFSIZE	8
 #define NFS3_FIFO_DEV		(-1)
 #define NFS3MODE_FMT		0170000
 #define NFS3MODE_DIR		0040000
-- 
cgit v1.2.3-71-gd317


From e4f9323409369a3aeb01885c0c4409d2eeec794a Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Tue, 14 Dec 2010 14:56:30 +0000
Subject: NFS: Introduce new-style XDR decoding functions for NFSv2

We'd like to prevent local buffer overflows caused by malicious or
broken servers.  New xdr_stream style decoders can do that.

For efficiency, we also eventually want to be able to pass xdr_streams
from call_decode() to all XDR decoding functions, rather than building
an xdr_stream in every XDR decoding function in the kernel.

Static helper functions are left without the "inline" directive.  This
allows the compiler to choose automatically how to optimize these for
size or speed.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Tested-by: J. Bruce Fields <bfields@redhat.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/nfs3xdr.c     | 1534 +++++++++++++++++++++++++++++++++++++++++++++++---
 include/linux/nfs3.h |    1 +
 2 files changed, 1451 insertions(+), 84 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c
index 119844d0b4d5..0f07c6d55131 100644
--- a/fs/nfs/nfs3xdr.c
+++ b/fs/nfs/nfs3xdr.c
@@ -104,13 +104,6 @@ static const umode_t nfs_type2fmt[] = {
 	[NF3FIFO] = S_IFIFO,
 };
 
-static void print_overflow_msg(const char *func, const struct xdr_stream *xdr)
-{
-	dprintk("nfs: %s: prematurely hit end of receive buffer. "
-		"Remaining buffer length is %tu words.\n",
-		func, xdr->end - xdr->p);
-}
-
 /*
  * While encoding arguments, set up the reply buffer in advance to
  * receive reply data directly into the page cache.
@@ -126,6 +119,16 @@ static void prepare_reply_buffer(struct rpc_rqst *req, struct page **pages,
 	xdr_inline_pages(&req->rq_rcv_buf, replen << 2, pages, base, len);
 }
 
+/*
+ * Handle decode buffer overflows out-of-line.
+ */
+static void print_overflow_msg(const char *func, const struct xdr_stream *xdr)
+{
+	dprintk("NFS: %s prematurely hit the end of our receive buffer. "
+		"Remaining buffer length is %tu words.\n",
+		func, xdr->end - xdr->p);
+}
+
 
 /*
  * Common NFS XDR functions as inlines
@@ -284,6 +287,44 @@ static void encode_uint32(struct xdr_stream *xdr, u32 value)
 	*p = cpu_to_be32(value);
 }
 
+static int decode_uint32(struct xdr_stream *xdr, u32 *value)
+{
+	__be32 *p;
+
+	p = xdr_inline_decode(xdr, 4);
+	if (unlikely(p == NULL))
+		goto out_overflow;
+	*value = be32_to_cpup(p);
+	return 0;
+out_overflow:
+	print_overflow_msg(__func__, xdr);
+	return -EIO;
+}
+
+static int decode_uint64(struct xdr_stream *xdr, u64 *value)
+{
+	__be32 *p;
+
+	p = xdr_inline_decode(xdr, 8);
+	if (unlikely(p == NULL))
+		goto out_overflow;
+	xdr_decode_hyper(p, value);
+	return 0;
+out_overflow:
+	print_overflow_msg(__func__, xdr);
+	return -EIO;
+}
+
+/*
+ * fileid3
+ *
+ *	typedef uint64 fileid3;
+ */
+static int decode_fileid3(struct xdr_stream *xdr, u64 *fileid)
+{
+	return decode_uint64(xdr, fileid);
+}
+
 /*
  * filename3
  *
@@ -299,6 +340,33 @@ static void encode_filename3(struct xdr_stream *xdr,
 	xdr_encode_opaque(p, name, length);
 }
 
+static int decode_inline_filename3(struct xdr_stream *xdr,
+				   const char **name, u32 *length)
+{
+	__be32 *p;
+	u32 count;
+
+	p = xdr_inline_decode(xdr, 4);
+	if (unlikely(p == NULL))
+		goto out_overflow;
+	count = be32_to_cpup(p);
+	if (count > NFS3_MAXNAMLEN)
+		goto out_nametoolong;
+	p = xdr_inline_decode(xdr, count);
+	if (unlikely(p == NULL))
+		goto out_overflow;
+	*name = (const char *)p;
+	*length = count;
+	return 0;
+
+out_nametoolong:
+	dprintk("NFS: returned filename too long: %u\n", count);
+	return -ENAMETOOLONG;
+out_overflow:
+	print_overflow_msg(__func__, xdr);
+	return -EIO;
+}
+
 /*
  * nfspath3
  *
@@ -312,6 +380,39 @@ static void encode_nfspath3(struct xdr_stream *xdr, struct page **pages,
 	xdr_write_pages(xdr, pages, 0, length);
 }
 
+static int decode_nfspath3(struct xdr_stream *xdr)
+{
+	u32 recvd, count;
+	size_t hdrlen;
+	__be32 *p;
+
+	p = xdr_inline_decode(xdr, 4);
+	if (unlikely(p == NULL))
+		goto out_overflow;
+	count = be32_to_cpup(p);
+	if (unlikely(count >= xdr->buf->page_len || count > NFS3_MAXPATHLEN))
+		goto out_nametoolong;
+	hdrlen = (u8 *)xdr->p - (u8 *)xdr->iov->iov_base;
+	recvd = xdr->buf->len - hdrlen;
+	if (unlikely(count > recvd))
+		goto out_cheating;
+
+	xdr_read_pages(xdr, count);
+	xdr_terminate_string(xdr->buf, count);
+	return 0;
+
+out_nametoolong:
+	dprintk("NFS: returned pathname too long: %u\n", count);
+	return -ENAMETOOLONG;
+out_cheating:
+	dprintk("NFS: server cheating in pathname result: "
+		"count %u > recvd %u\n", count, recvd);
+	return -EIO;
+out_overflow:
+	print_overflow_msg(__func__, xdr);
+	return -EIO;
+}
+
 /*
  * cookie3
  *
@@ -322,6 +423,11 @@ static __be32 *xdr_encode_cookie3(__be32 *p, u64 cookie)
 	return xdr_encode_hyper(p, cookie);
 }
 
+static int decode_cookie3(struct xdr_stream *xdr, u64 *cookie)
+{
+	return decode_uint64(xdr, cookie);
+}
+
 /*
  * cookieverf3
  *
@@ -333,6 +439,20 @@ static __be32 *xdr_encode_cookieverf3(__be32 *p, const __be32 *verifier)
 	return p + XDR_QUADLEN(NFS3_COOKIEVERFSIZE);
 }
 
+static int decode_cookieverf3(struct xdr_stream *xdr, __be32 *verifier)
+{
+	__be32 *p;
+
+	p = xdr_inline_decode(xdr, NFS3_COOKIEVERFSIZE);
+	if (unlikely(p == NULL))
+		goto out_overflow;
+	memcpy(verifier, p, NFS3_COOKIEVERFSIZE);
+	return 0;
+out_overflow:
+	print_overflow_msg(__func__, xdr);
+	return -EIO;
+}
+
 /*
  * createverf3
  *
@@ -346,6 +466,54 @@ static void encode_createverf3(struct xdr_stream *xdr, const __be32 *verifier)
 	memcpy(p, verifier, NFS3_CREATEVERFSIZE);
 }
 
+static int decode_writeverf3(struct xdr_stream *xdr, __be32 *verifier)
+{
+	__be32 *p;
+
+	p = xdr_inline_decode(xdr, NFS3_WRITEVERFSIZE);
+	if (unlikely(p == NULL))
+		goto out_overflow;
+	memcpy(verifier, p, NFS3_WRITEVERFSIZE);
+	return 0;
+out_overflow:
+	print_overflow_msg(__func__, xdr);
+	return -EIO;
+}
+
+/*
+ * size3
+ *
+ *	typedef uint64 size3;
+ */
+static __be32 *xdr_decode_size3(__be32 *p, u64 *size)
+{
+	return xdr_decode_hyper(p, size);
+}
+
+/*
+ * nfsstat3
+ *
+ *	enum nfsstat3 {
+ *		NFS3_OK = 0,
+ *		...
+ *	}
+ */
+#define NFS3_OK		NFS_OK
+
+static int decode_nfsstat3(struct xdr_stream *xdr, enum nfs_stat *status)
+{
+	__be32 *p;
+
+	p = xdr_inline_decode(xdr, 4);
+	if (unlikely(p == NULL))
+		goto out_overflow;
+	*status = be32_to_cpup(p);
+	return 0;
+out_overflow:
+	print_overflow_msg(__func__, xdr);
+	return -EIO;
+}
+
 /*
  * ftype3
  *
@@ -398,6 +566,36 @@ static void encode_nfs_fh3(struct xdr_stream *xdr, const struct nfs_fh *fh)
 	xdr_encode_opaque(p, fh->data, fh->size);
 }
 
+static int decode_nfs_fh3(struct xdr_stream *xdr, struct nfs_fh *fh)
+{
+	u32 length;
+	__be32 *p;
+
+	p = xdr_inline_decode(xdr, 4);
+	if (unlikely(p == NULL))
+		goto out_overflow;
+	length = be32_to_cpup(p++);
+	if (unlikely(length > NFS3_FHSIZE))
+		goto out_toobig;
+	p = xdr_inline_decode(xdr, length);
+	if (unlikely(p == NULL))
+		goto out_overflow;
+	fh->size = length;
+	memcpy(fh->data, p, length);
+	return 0;
+out_toobig:
+	dprintk("NFS: file handle size (%u) too big\n", length);
+	return -E2BIG;
+out_overflow:
+	print_overflow_msg(__func__, xdr);
+	return -EIO;
+}
+
+static void zero_nfs_fh3(struct nfs_fh *fh)
+{
+	memset(fh, 0, sizeof(*fh));
+}
+
 /*
  * nfstime3
  *
@@ -540,6 +738,153 @@ static void encode_sattr3(struct xdr_stream *xdr, const struct iattr *attr)
 		*p = xdr_zero;
 }
 
+/*
+ * fattr3
+ *
+ *	struct fattr3 {
+ *		ftype3		type;
+ *		mode3		mode;
+ *		uint32		nlink;
+ *		uid3		uid;
+ *		gid3		gid;
+ *		size3		size;
+ *		size3		used;
+ *		specdata3	rdev;
+ *		uint64		fsid;
+ *		fileid3		fileid;
+ *		nfstime3	atime;
+ *		nfstime3	mtime;
+ *		nfstime3	ctime;
+ *	};
+ */
+static int decode_fattr3(struct xdr_stream *xdr, struct nfs_fattr *fattr)
+{
+	__be32 *p;
+
+	p = xdr_inline_decode(xdr, NFS3_fattr_sz << 2);
+	if (unlikely(p == NULL))
+		goto out_overflow;
+	xdr_decode_fattr(p, fattr);
+	return 0;
+out_overflow:
+	print_overflow_msg(__func__, xdr);
+	return -EIO;
+}
+
+/*
+ * post_op_attr
+ *
+ *	union post_op_attr switch (bool attributes_follow) {
+ *	case TRUE:
+ *		fattr3	attributes;
+ *	case FALSE:
+ *		void;
+ *	};
+ */
+static int decode_post_op_attr(struct xdr_stream *xdr, struct nfs_fattr *fattr)
+{
+	__be32 *p;
+
+	p = xdr_inline_decode(xdr, 4);
+	if (unlikely(p == NULL))
+		goto out_overflow;
+	if (*p != xdr_zero)
+		return decode_fattr3(xdr, fattr);
+	return 0;
+out_overflow:
+	print_overflow_msg(__func__, xdr);
+	return -EIO;
+}
+
+/*
+ * wcc_attr
+ *	struct wcc_attr {
+ *		size3		size;
+ *		nfstime3	mtime;
+ *		nfstime3	ctime;
+ *	};
+ */
+static int decode_wcc_attr(struct xdr_stream *xdr, struct nfs_fattr *fattr)
+{
+	__be32 *p;
+
+	p = xdr_inline_decode(xdr, NFS3_wcc_attr_sz << 2);
+	if (unlikely(p == NULL))
+		goto out_overflow;
+	xdr_decode_wcc_attr(p, fattr);
+	return 0;
+out_overflow:
+	print_overflow_msg(__func__, xdr);
+	return -EIO;
+}
+
+/*
+ * pre_op_attr
+ *	union pre_op_attr switch (bool attributes_follow) {
+ *	case TRUE:
+ *		wcc_attr	attributes;
+ *	case FALSE:
+ *		void;
+ *	};
+ *
+ * wcc_data
+ *
+ *	struct wcc_data {
+ *		pre_op_attr	before;
+ *		post_op_attr	after;
+ *	};
+ */
+static int decode_pre_op_attr(struct xdr_stream *xdr, struct nfs_fattr *fattr)
+{
+	__be32 *p;
+
+	p = xdr_inline_decode(xdr, 4);
+	if (unlikely(p == NULL))
+		goto out_overflow;
+	if (*p != xdr_zero)
+		return decode_wcc_attr(xdr, fattr);
+	return 0;
+out_overflow:
+	print_overflow_msg(__func__, xdr);
+	return -EIO;
+}
+
+static int decode_wcc_data(struct xdr_stream *xdr, struct nfs_fattr *fattr)
+{
+	int error;
+
+	error = decode_pre_op_attr(xdr, fattr);
+	if (unlikely(error))
+		goto out;
+	error = decode_post_op_attr(xdr, fattr);
+out:
+	return error;
+}
+
+/*
+ * post_op_fh3
+ *
+ *	union post_op_fh3 switch (bool handle_follows) {
+ *	case TRUE:
+ *		nfs_fh3  handle;
+ *	case FALSE:
+ *		void;
+ *	};
+ */
+static int decode_post_op_fh3(struct xdr_stream *xdr, struct nfs_fh *fh)
+{
+	__be32 *p = xdr_inline_decode(xdr, 4);
+	if (unlikely(p == NULL))
+		goto out_overflow;
+	if (*p != xdr_zero)
+		return decode_nfs_fh3(xdr, fh);
+	zero_nfs_fh3(fh);
+	return 0;
+out_overflow:
+	print_overflow_msg(__func__, xdr);
+	return -EIO;
+}
+
 /*
  * diropargs3
  *
@@ -1108,78 +1453,6 @@ nfs3_xdr_readdirres(struct rpc_rqst *req, __be32 *p, struct nfs3_readdirres *res
 	return pglen;
 }
 
-__be32 *
-nfs3_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry, struct nfs_server *server, int plus)
-{
-	__be32 *p;
-	struct nfs_entry old = *entry;
-
-	p = xdr_inline_decode(xdr, 4);
-	if (unlikely(!p))
-		goto out_overflow;
-	if (!ntohl(*p++)) {
-		p = xdr_inline_decode(xdr, 4);
-		if (unlikely(!p))
-			goto out_overflow;
-		if (!ntohl(*p++))
-			return ERR_PTR(-EAGAIN);
-		entry->eof = 1;
-		return ERR_PTR(-EBADCOOKIE);
-	}
-
-	p = xdr_inline_decode(xdr, 12);
-	if (unlikely(!p))
-		goto out_overflow;
-	p = xdr_decode_hyper(p, &entry->ino);
-	entry->len  = ntohl(*p++);
-
-	p = xdr_inline_decode(xdr, entry->len + 8);
-	if (unlikely(!p))
-		goto out_overflow;
-	entry->name = (const char *) p;
-	p += XDR_QUADLEN(entry->len);
-	entry->prev_cookie = entry->cookie;
-	p = xdr_decode_hyper(p, &entry->cookie);
-
-	entry->d_type = DT_UNKNOWN;
-	if (plus) {
-		entry->fattr->valid = 0;
-		p = xdr_decode_post_op_attr_stream(xdr, entry->fattr);
-		if (IS_ERR(p))
-			goto out_overflow_exit;
-		entry->d_type = nfs_umode_to_dtype(entry->fattr->mode);
-		/* In fact, a post_op_fh3: */
-		p = xdr_inline_decode(xdr, 4);
-		if (unlikely(!p))
-			goto out_overflow;
-		if (*p++) {
-			p = xdr_decode_fhandle_stream(xdr, entry->fh);
-			if (IS_ERR(p))
-				goto out_overflow_exit;
-			/* Ugh -- server reply was truncated */
-			if (p == NULL) {
-				dprintk("NFS: FH truncated\n");
-				*entry = old;
-				return ERR_PTR(-EAGAIN);
-			}
-		} else
-			memset((u8*)(entry->fh), 0, sizeof(*entry->fh));
-	}
-
-	p = xdr_inline_peek(xdr, 8);
-	if (p != NULL)
-		entry->eof = !p[0] && p[1];
-	else
-		entry->eof = 0;
-
-	return p;
-
-out_overflow:
-	print_overflow_msg(__func__, xdr);
-out_overflow_exit:
-	return ERR_PTR(-EAGAIN);
-}
-
 /*
  * 3.3.21  COMMIT3args
  *
@@ -1275,13 +1548,47 @@ nfs3_xdr_attrstat(struct rpc_rqst *req, __be32 *p, struct nfs_fattr *fattr)
 }
 
 /*
- * Decode status+wcc_data reply
- * SATTR, REMOVE, RMDIR
+ * 3.3.1  GETATTR3res
+ *
+ *	struct GETATTR3resok {
+ *		fattr3		obj_attributes;
+ *	};
+ *
+ *	union GETATTR3res switch (nfsstat3 status) {
+ *	case NFS3_OK:
+ *		GETATTR3resok  resok;
+ *	default:
+ *		void;
+ *	};
  */
-static int
-nfs3_xdr_wccstat(struct rpc_rqst *req, __be32 *p, struct nfs_fattr *fattr)
+static int nfs3_xdr_dec_getattr3res(struct rpc_rqst *req, __be32 *p,
+				    struct nfs_fattr *result)
 {
-	int	status;
+	struct xdr_stream xdr;
+	enum nfs_stat status;
+	int error;
+
+	xdr_init_decode(&xdr, &req->rq_rcv_buf, p);
+	error = decode_nfsstat3(&xdr, &status);
+	if (unlikely(error))
+		goto out;
+	if (status != NFS3_OK)
+		goto out_default;
+	error = decode_fattr3(&xdr, result);
+out:
+	return error;
+out_default:
+	return nfs_stat_to_errno(status);
+}
+
+/*
+ * Decode status+wcc_data reply
+ * SATTR, REMOVE, RMDIR
+ */
+static int
+nfs3_xdr_wccstat(struct rpc_rqst *req, __be32 *p, struct nfs_fattr *fattr)
+{
+	int	status;
 
 	if ((status = ntohl(*p++)))
 		status = nfs_stat_to_errno(status);
@@ -1289,6 +1596,46 @@ nfs3_xdr_wccstat(struct rpc_rqst *req, __be32 *p, struct nfs_fattr *fattr)
 	return status;
 }
 
+/*
+ * 3.3.2  SETATTR3res
+ *
+ *	struct SETATTR3resok {
+ *		wcc_data  obj_wcc;
+ *	};
+ *
+ *	struct SETATTR3resfail {
+ *		wcc_data  obj_wcc;
+ *	};
+ *
+ *	union SETATTR3res switch (nfsstat3 status) {
+ *	case NFS3_OK:
+ *		SETATTR3resok   resok;
+ *	default:
+ *		SETATTR3resfail resfail;
+ *	};
+ */
+static int nfs3_xdr_dec_setattr3res(struct rpc_rqst *req, __be32 *p,
+				    struct nfs_fattr *result)
+{
+	struct xdr_stream xdr;
+	enum nfs_stat status;
+	int error;
+
+	xdr_init_decode(&xdr, &req->rq_rcv_buf, p);
+	error = decode_nfsstat3(&xdr, &status);
+	if (unlikely(error))
+		goto out;
+	error = decode_wcc_data(&xdr, result);
+	if (unlikely(error))
+		goto out;
+	if (status != NFS3_OK)
+		goto out_status;
+out:
+	return error;
+out_status:
+	return nfs_stat_to_errno(status);
+}
+
 static int
 nfs3_xdr_removeres(struct rpc_rqst *req, __be32 *p, struct nfs_removeres *res)
 {
@@ -1314,6 +1661,55 @@ nfs3_xdr_lookupres(struct rpc_rqst *req, __be32 *p, struct nfs3_diropres *res)
 	return status;
 }
 
+/*
+ * 3.3.3  LOOKUP3res
+ *
+ *	struct LOOKUP3resok {
+ *		nfs_fh3		object;
+ *		post_op_attr	obj_attributes;
+ *		post_op_attr	dir_attributes;
+ *	};
+ *
+ *	struct LOOKUP3resfail {
+ *		post_op_attr	dir_attributes;
+ *	};
+ *
+ *	union LOOKUP3res switch (nfsstat3 status) {
+ *	case NFS3_OK:
+ *		LOOKUP3resok	resok;
+ *	default:
+ *		LOOKUP3resfail	resfail;
+ *	};
+ */
+static int nfs3_xdr_dec_lookup3res(struct rpc_rqst *req, __be32 *p,
+				   struct nfs3_diropres *result)
+{
+	struct xdr_stream xdr;
+	enum nfs_stat status;
+	int error;
+
+	xdr_init_decode(&xdr, &req->rq_rcv_buf, p);
+	error = decode_nfsstat3(&xdr, &status);
+	if (unlikely(error))
+		goto out;
+	if (status != NFS3_OK)
+		goto out_default;
+	error = decode_nfs_fh3(&xdr, result->fh);
+	if (unlikely(error))
+		goto out;
+	error = decode_post_op_attr(&xdr, result->fattr);
+	if (unlikely(error))
+		goto out;
+	error = decode_post_op_attr(&xdr, result->dir_attr);
+out:
+	return error;
+out_default:
+	error = decode_post_op_attr(&xdr, result->dir_attr);
+	if (unlikely(error))
+		goto out;
+	return nfs_stat_to_errno(status);
+}
+
 /*
  * Decode ACCESS reply
  */
@@ -1329,6 +1725,48 @@ nfs3_xdr_accessres(struct rpc_rqst *req, __be32 *p, struct nfs3_accessres *res)
 	return 0;
 }
 
+/*
+ * 3.3.4  ACCESS3res
+ *
+ *	struct ACCESS3resok {
+ *		post_op_attr	obj_attributes;
+ *		uint32		access;
+ *	};
+ *
+ *	struct ACCESS3resfail {
+ *		post_op_attr	obj_attributes;
+ *	};
+ *
+ *	union ACCESS3res switch (nfsstat3 status) {
+ *	case NFS3_OK:
+ *		ACCESS3resok	resok;
+ *	default:
+ *		ACCESS3resfail	resfail;
+ *	};
+ */
+static int nfs3_xdr_dec_access3res(struct rpc_rqst *req, __be32 *p,
+				   struct nfs3_accessres *result)
+{
+	struct xdr_stream xdr;
+	enum nfs_stat status;
+	int error;
+
+	xdr_init_decode(&xdr, &req->rq_rcv_buf, p);
+	error = decode_nfsstat3(&xdr, &status);
+	if (unlikely(error))
+		goto out;
+	error = decode_post_op_attr(&xdr, result->fattr);
+	if (unlikely(error))
+		goto out;
+	if (status != NFS3_OK)
+		goto out_default;
+	error = decode_uint32(&xdr, &result->access);
+out:
+	return error;
+out_default:
+	return nfs_stat_to_errno(status);
+}
+
 /*
  * Decode READLINK reply
  */
@@ -1375,6 +1813,48 @@ nfs3_xdr_readlinkres(struct rpc_rqst *req, __be32 *p, struct nfs_fattr *fattr)
 	return 0;
 }
 
+/*
+ * 3.3.5  READLINK3res
+ *
+ *	struct READLINK3resok {
+ *		post_op_attr	symlink_attributes;
+ *		nfspath3	data;
+ *	};
+ *
+ *	struct READLINK3resfail {
+ *		post_op_attr	symlink_attributes;
+ *	};
+ *
+ *	union READLINK3res switch (nfsstat3 status) {
+ *	case NFS3_OK:
+ *		READLINK3resok	resok;
+ *	default:
+ *		READLINK3resfail resfail;
+ *	};
+ */
+static int nfs3_xdr_dec_readlink3res(struct rpc_rqst *req, __be32 *p,
+				     struct nfs_fattr *result)
+{
+	struct xdr_stream xdr;
+	enum nfs_stat status;
+	int error;
+
+	xdr_init_decode(&xdr, &req->rq_rcv_buf, p);
+	error = decode_nfsstat3(&xdr, &status);
+	if (unlikely(error))
+		goto out;
+	error = decode_post_op_attr(&xdr, result);
+	if (unlikely(error))
+		goto out;
+	if (status != NFS3_OK)
+		goto out_default;
+	error = decode_nfspath3(&xdr);
+out:
+	return error;
+out_default:
+	return nfs_stat_to_errno(status);
+}
+
 /*
  * Decode READ reply
  */
@@ -1428,6 +1908,90 @@ nfs3_xdr_readres(struct rpc_rqst *req, __be32 *p, struct nfs_readres *res)
 	return count;
 }
 
+/*
+ * 3.3.6  READ3res
+ *
+ *	struct READ3resok {
+ *		post_op_attr	file_attributes;
+ *		count3		count;
+ *		bool		eof;
+ *		opaque		data<>;
+ *	};
+ *
+ *	struct READ3resfail {
+ *		post_op_attr	file_attributes;
+ *	};
+ *
+ *	union READ3res switch (nfsstat3 status) {
+ *	case NFS3_OK:
+ *		READ3resok	resok;
+ *	default:
+ *		READ3resfail	resfail;
+ *	};
+ */
+static int decode_read3resok(struct xdr_stream *xdr,
+			     struct nfs_readres *result)
+{
+	u32 eof, count, ocount, recvd;
+	size_t hdrlen;
+	__be32 *p;
+
+	p = xdr_inline_decode(xdr, 4 + 4 + 4);
+	if (unlikely(p == NULL))
+		goto out_overflow;
+	count = be32_to_cpup(p++);
+	eof = be32_to_cpup(p++);
+	ocount = be32_to_cpup(p++);
+	if (unlikely(ocount != count))
+		goto out_mismatch;
+	hdrlen = (u8 *)xdr->p - (u8 *)xdr->iov->iov_base;
+	recvd = xdr->buf->len - hdrlen;
+	if (unlikely(count > recvd))
+		goto out_cheating;
+
+out:
+	xdr_read_pages(xdr, count);
+	result->eof = eof;
+	result->count = count;
+	return count;
+out_mismatch:
+	dprintk("NFS: READ count doesn't match length of opaque: "
+		"count %u != ocount %u\n", count, ocount);
+	return -EIO;
+out_cheating:
+	dprintk("NFS: server cheating in read result: "
+		"count %u > recvd %u\n", count, recvd);
+	count = recvd;
+	eof = 0;
+	goto out;
+out_overflow:
+	print_overflow_msg(__func__, xdr);
+	return -EIO;
+}
+
+static int nfs3_xdr_dec_read3res(struct rpc_rqst *req, __be32 *p,
+				 struct nfs_readres *result)
+{
+	struct xdr_stream xdr;
+	enum nfs_stat status;
+	int error;
+
+	xdr_init_decode(&xdr, &req->rq_rcv_buf, p);
+	error = decode_nfsstat3(&xdr, &status);
+	if (unlikely(error))
+		goto out;
+	error = decode_post_op_attr(&xdr, result->fattr);
+	if (unlikely(error))
+		goto out;
+	if (status != NFS3_OK)
+		goto out_status;
+	error = decode_read3resok(&xdr, result);
+out:
+	return error;
+out_status:
+	return nfs_stat_to_errno(status);
+}
+
 /*
  * Decode WRITE response
  */
@@ -1450,6 +2014,78 @@ nfs3_xdr_writeres(struct rpc_rqst *req, __be32 *p, struct nfs_writeres *res)
 	return res->count;
 }
 
+/*
+ * 3.3.7  WRITE3res
+ *
+ *	enum stable_how {
+ *		UNSTABLE  = 0,
+ *		DATA_SYNC = 1,
+ *		FILE_SYNC = 2
+ *	};
+ *
+ *	struct WRITE3resok {
+ *		wcc_data	file_wcc;
+ *		count3		count;
+ *		stable_how	committed;
+ *		writeverf3	verf;
+ *	};
+ *
+ *	struct WRITE3resfail {
+ *		wcc_data	file_wcc;
+ *	};
+ *
+ *	union WRITE3res switch (nfsstat3 status) {
+ *	case NFS3_OK:
+ *		WRITE3resok	resok;
+ *	default:
+ *		WRITE3resfail	resfail;
+ *	};
+ */
+static int decode_write3resok(struct xdr_stream *xdr,
+			      struct nfs_writeres *result)
+{
+	__be32 *p;
+
+	p = xdr_inline_decode(xdr, 4 + 4 + NFS3_WRITEVERFSIZE);
+	if (unlikely(p == NULL))
+		goto out_overflow;
+	result->count = be32_to_cpup(p++);
+	result->verf->committed = be32_to_cpup(p++);
+	if (unlikely(result->verf->committed > NFS_FILE_SYNC))
+		goto out_badvalue;
+	memcpy(result->verf->verifier, p, NFS3_WRITEVERFSIZE);
+	return result->count;
+out_badvalue:
+	dprintk("NFS: bad stable_how value: %u\n", result->verf->committed);
+	return -EIO;
+out_overflow:
+	print_overflow_msg(__func__, xdr);
+	return -EIO;
+}
+
+static int nfs3_xdr_dec_write3res(struct rpc_rqst *req, __be32 *p,
+				  struct nfs_writeres *result)
+{
+	struct xdr_stream xdr;
+	enum nfs_stat status;
+	int error;
+
+	xdr_init_decode(&xdr, &req->rq_rcv_buf, p);
+	error = decode_nfsstat3(&xdr, &status);
+	if (unlikely(error))
+		goto out;
+	error = decode_wcc_data(&xdr, result->fattr);
+	if (unlikely(error))
+		goto out;
+	if (status != NFS3_OK)
+		goto out_status;
+	error = decode_write3resok(&xdr, result);
+out:
+	return error;
+out_status:
+	return nfs_stat_to_errno(status);
+}
+
 /*
  * Decode a CREATE response
  */
@@ -1477,6 +2113,111 @@ nfs3_xdr_createres(struct rpc_rqst *req, __be32 *p, struct nfs3_diropres *res)
 	return status;
 }
 
+/*
+ * 3.3.8  CREATE3res
+ *
+ *	struct CREATE3resok {
+ *		post_op_fh3	obj;
+ *		post_op_attr	obj_attributes;
+ *		wcc_data	dir_wcc;
+ *	};
+ *
+ *	struct CREATE3resfail {
+ *		wcc_data	dir_wcc;
+ *	};
+ *
+ *	union CREATE3res switch (nfsstat3 status) {
+ *	case NFS3_OK:
+ *		CREATE3resok	resok;
+ *	default:
+ *		CREATE3resfail	resfail;
+ *	};
+ */
+static int decode_create3resok(struct xdr_stream *xdr,
+			       struct nfs3_diropres *result)
+{
+	int error;
+
+	error = decode_post_op_fh3(xdr, result->fh);
+	if (unlikely(error))
+		goto out;
+	error = decode_post_op_attr(xdr, result->fattr);
+	if (unlikely(error))
+		goto out;
+	/* The server isn't required to return a file handle.
+	 * If it didn't, force the client to perform a LOOKUP
+	 * to determine the correct file handle and attribute
+	 * values for the new object. */
+	if (result->fh->size == 0)
+		result->fattr->valid = 0;
+	error = decode_wcc_data(xdr, result->dir_attr);
+out:
+	return error;
+}
+
+static int nfs3_xdr_dec_create3res(struct rpc_rqst *req, __be32 *p,
+				   struct nfs3_diropres *result)
+{
+	struct xdr_stream xdr;
+	enum nfs_stat status;
+	int error;
+
+	xdr_init_decode(&xdr, &req->rq_rcv_buf, p);
+	error = decode_nfsstat3(&xdr, &status);
+	if (unlikely(error))
+		goto out;
+	if (status != NFS3_OK)
+		goto out_default;
+	error = decode_create3resok(&xdr, result);
+out:
+	return error;
+out_default:
+	error = decode_wcc_data(&xdr, result->dir_attr);
+	if (unlikely(error))
+		goto out;
+	return nfs_stat_to_errno(status);
+}
+
+/*
+ * 3.3.12  REMOVE3res
+ *
+ *	struct REMOVE3resok {
+ *		wcc_data    dir_wcc;
+ *	};
+ *
+ *	struct REMOVE3resfail {
+ *		wcc_data    dir_wcc;
+ *	};
+ *
+ *	union REMOVE3res switch (nfsstat3 status) {
+ *	case NFS3_OK:
+ *		REMOVE3resok   resok;
+ *	default:
+ *		REMOVE3resfail resfail;
+ *	};
+ */
+static int nfs3_xdr_dec_remove3res(struct rpc_rqst *req, __be32 *p,
+				   struct nfs_removeres *result)
+{
+	struct xdr_stream xdr;
+	enum nfs_stat status;
+	int error;
+
+	xdr_init_decode(&xdr, &req->rq_rcv_buf, p);
+	error = decode_nfsstat3(&xdr, &status);
+	if (unlikely(error))
+		goto out;
+	error = decode_wcc_data(&xdr, result->dir_attr);
+	if (unlikely(error))
+		goto out;
+	if (status != NFS3_OK)
+		goto out_status;
+out:
+	return error;
+out_status:
+	return nfs_stat_to_errno(status);
+}
+
 /*
  * Decode RENAME reply
  */
@@ -1492,6 +2233,51 @@ nfs3_xdr_renameres(struct rpc_rqst *req, __be32 *p, struct nfs_renameres *res)
 	return status;
 }
 
+/*
+ * 3.3.14  RENAME3res
+ *
+ *	struct RENAME3resok {
+ *		wcc_data	fromdir_wcc;
+ *		wcc_data	todir_wcc;
+ *	};
+ *
+ *	struct RENAME3resfail {
+ *		wcc_data	fromdir_wcc;
+ *		wcc_data	todir_wcc;
+ *	};
+ *
+ *	union RENAME3res switch (nfsstat3 status) {
+ *	case NFS3_OK:
+ *		RENAME3resok   resok;
+ *	default:
+ *		RENAME3resfail resfail;
+ *	};
+ */
+static int nfs3_xdr_dec_rename3res(struct rpc_rqst *req, __be32 *p,
+				   struct nfs_renameres *result)
+{
+	struct xdr_stream xdr;
+	enum nfs_stat status;
+	int error;
+
+	xdr_init_decode(&xdr, &req->rq_rcv_buf, p);
+	error = decode_nfsstat3(&xdr, &status);
+	if (unlikely(error))
+		goto out;
+	error = decode_wcc_data(&xdr, result->old_fattr);
+	if (unlikely(error))
+		goto out;
+	error = decode_wcc_data(&xdr, result->new_fattr);
+	if (unlikely(error))
+		goto out;
+	if (status != NFS3_OK)
+		goto out_status;
+out:
+	return error;
+out_status:
+	return nfs_stat_to_errno(status);
+}
+
 /*
  * Decode LINK reply
  */
@@ -1507,6 +2293,249 @@ nfs3_xdr_linkres(struct rpc_rqst *req, __be32 *p, struct nfs3_linkres *res)
 	return status;
 }
 
+/*
+ * 3.3.15  LINK3res
+ *
+ *	struct LINK3resok {
+ *		post_op_attr	file_attributes;
+ *		wcc_data	linkdir_wcc;
+ *	};
+ *
+ *	struct LINK3resfail {
+ *		post_op_attr	file_attributes;
+ *		wcc_data	linkdir_wcc;
+ *	};
+ *
+ *	union LINK3res switch (nfsstat3 status) {
+ *	case NFS3_OK:
+ *		LINK3resok	resok;
+ *	default:
+ *		LINK3resfail	resfail;
+ *	};
+ */
+static int nfs3_xdr_dec_link3res(struct rpc_rqst *req, __be32 *p,
+				 struct nfs3_linkres *result)
+{
+	struct xdr_stream xdr;
+	enum nfs_stat status;
+	int error;
+
+	xdr_init_decode(&xdr, &req->rq_rcv_buf, p);
+	error = decode_nfsstat3(&xdr, &status);
+	if (unlikely(error))
+		goto out;
+	error = decode_post_op_attr(&xdr, result->fattr);
+	if (unlikely(error))
+		goto out;
+	error = decode_wcc_data(&xdr, result->dir_attr);
+	if (unlikely(error))
+		goto out;
+	if (status != NFS3_OK)
+		goto out_status;
+out:
+	return error;
+out_status:
+	return nfs_stat_to_errno(status);
+}
+
+/**
+ * nfs3_decode_dirent - Decode a single NFSv3 directory entry stored in
+ *			the local page cache
+ * @xdr: XDR stream where entry resides
+ * @entry: buffer to fill in with entry data
+ * @server: nfs_server data for this directory
+ * @plus: boolean indicating whether this should be a readdirplus entry
+ *
+ * Returns the position of the next item in the buffer, or an ERR_PTR.
+ *
+ * This function is not invoked during READDIR reply decoding, but
+ * rather whenever an application invokes the getdents(2) system call
+ * on a directory already in our cache.
+ *
+ * 3.3.16  entry3
+ *
+ *	struct entry3 {
+ *		fileid3		fileid;
+ *		filename3	name;
+ *		cookie3		cookie;
+ *		fhandle3	filehandle;
+ *		post_op_attr3	attributes;
+ *		entry3		*nextentry;
+ *	};
+ *
+ * 3.3.17  entryplus3
+ *	struct entryplus3 {
+ *		fileid3		fileid;
+ *		filename3	name;
+ *		cookie3		cookie;
+ *		post_op_attr	name_attributes;
+ *		post_op_fh3	name_handle;
+ *		entryplus3	*nextentry;
+ *	};
+ */
+__be32 *nfs3_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry,
+			   struct nfs_server *server, int plus)
+{
+	struct nfs_entry old = *entry;
+	__be32 *p;
+	int error;
+
+	p = xdr_inline_decode(xdr, 4);
+	if (unlikely(p == NULL))
+		goto out_overflow;
+	if (*p == xdr_zero) {
+		p = xdr_inline_decode(xdr, 4);
+		if (unlikely(p == NULL))
+			goto out_overflow;
+		if (*p == xdr_zero)
+			return ERR_PTR(-EAGAIN);
+		entry->eof = 1;
+		return ERR_PTR(-EBADCOOKIE);
+	}
+
+	error = decode_fileid3(xdr, &entry->ino);
+	if (unlikely(error))
+		return ERR_PTR(error);
+
+	error = decode_inline_filename3(xdr, &entry->name, &entry->len);
+	if (unlikely(error))
+		return ERR_PTR(error);
+
+	entry->prev_cookie = entry->cookie;
+	error = decode_cookie3(xdr, &entry->cookie);
+	if (unlikely(error))
+		return ERR_PTR(error);
+
+	entry->d_type = DT_UNKNOWN;
+
+	if (plus) {
+		entry->fattr->valid = 0;
+		error = decode_post_op_attr(xdr, entry->fattr);
+		if (unlikely(error))
+			return ERR_PTR(error);
+		if (entry->fattr->valid & NFS_ATTR_FATTR_V3)
+			entry->d_type = nfs_umode_to_dtype(entry->fattr->mode);
+
+		/* In fact, a post_op_fh3: */
+		p = xdr_inline_decode(xdr, 4);
+		if (unlikely(p == NULL))
+			goto out_overflow;
+		if (*p != xdr_zero) {
+			error = decode_nfs_fh3(xdr, entry->fh);
+			if (unlikely(error)) {
+				if (error == -E2BIG)
+					goto out_truncated;
+				return ERR_PTR(error);
+			}
+		} else
+			zero_nfs_fh3(entry->fh);
+	}
+
+	/* Peek at the next entry to see if we're at EOD */
+	p = xdr_inline_peek(xdr, 4 + 4);
+	entry->eof = 0;
+	if (p != NULL)
+		entry->eof = (p[0] == xdr_zero) && (p[1] != xdr_zero);
+	return p;
+
+out_overflow:
+	print_overflow_msg(__func__, xdr);
+	return ERR_PTR(-EAGAIN);
+out_truncated:
+	dprintk("NFS: directory entry contains invalid file handle\n");
+	*entry = old;
+	return ERR_PTR(-EAGAIN);
+}
+
+/*
+ * 3.3.16  READDIR3res
+ *
+ *	struct dirlist3 {
+ *		entry3		*entries;
+ *		bool		eof;
+ *	};
+ *
+ *	struct READDIR3resok {
+ *		post_op_attr	dir_attributes;
+ *		cookieverf3	cookieverf;
+ *		dirlist3	reply;
+ *	};
+ *
+ *	struct READDIR3resfail {
+ *		post_op_attr	dir_attributes;
+ *	};
+ *
+ *	union READDIR3res switch (nfsstat3 status) {
+ *	case NFS3_OK:
+ *		READDIR3resok	resok;
+ *	default:
+ *		READDIR3resfail	resfail;
+ *	};
+ *
+ * Read the directory contents into the page cache, but otherwise
+ * don't touch them.  The actual decoding is done by nfs3_decode_entry()
+ * during subsequent nfs_readdir() calls.
+ */
+static int decode_dirlist3(struct xdr_stream *xdr)
+{
+	u32 recvd, pglen;
+	size_t hdrlen;
+
+	pglen = xdr->buf->page_len;
+	hdrlen = (u8 *)xdr->p - (u8 *)xdr->iov->iov_base;
+	recvd = xdr->buf->len - hdrlen;
+	if (unlikely(pglen > recvd))
+		goto out_cheating;
+out:
+	xdr_read_pages(xdr, pglen);
+	return pglen;
+out_cheating:
+	dprintk("NFS: server cheating in readdir result: "
+		"pglen %u > recvd %u\n", pglen, recvd);
+	pglen = recvd;
+	goto out;
+}
+
+static int decode_readdir3resok(struct xdr_stream *xdr,
+				struct nfs3_readdirres *result)
+{
+	int error;
+
+	error = decode_post_op_attr(xdr, result->dir_attr);
+	if (unlikely(error))
+		goto out;
+	/* XXX: do we need to check if result->verf != NULL ? */
+	error = decode_cookieverf3(xdr, result->verf);
+	if (unlikely(error))
+		goto out;
+	error = decode_dirlist3(xdr);
+out:
+	return error;
+}
+
+static int nfs3_xdr_dec_readdir3res(struct rpc_rqst *req, __be32 *p,
+				    struct nfs3_readdirres *result)
+{
+	struct xdr_stream xdr;
+	enum nfs_stat status;
+	int error;
+
+	xdr_init_decode(&xdr, &req->rq_rcv_buf, p);
+	error = decode_nfsstat3(&xdr, &status);
+	if (unlikely(error))
+		goto out;
+	if (status != NFS3_OK)
+		goto out_default;
+	error = decode_readdir3resok(&xdr, result);
+out:
+	return error;
+out_default:
+	error = decode_post_op_attr(&xdr, result->dir_attr);
+	if (unlikely(error))
+		goto out;
+	return nfs_stat_to_errno(status);
+}
+
 /*
  * Decode FSSTAT reply
  */
@@ -1532,6 +2561,75 @@ nfs3_xdr_fsstatres(struct rpc_rqst *req, __be32 *p, struct nfs_fsstat *res)
 	return 0;
 }
 
+/*
+ * 3.3.18  FSSTAT3res
+ *
+ *	struct FSSTAT3resok {
+ *		post_op_attr	obj_attributes;
+ *		size3		tbytes;
+ *		size3		fbytes;
+ *		size3		abytes;
+ *		size3		tfiles;
+ *		size3		ffiles;
+ *		size3		afiles;
+ *		uint32		invarsec;
+ *	};
+ *
+ *	struct FSSTAT3resfail {
+ *		post_op_attr	obj_attributes;
+ *	};
+ *
+ *	union FSSTAT3res switch (nfsstat3 status) {
+ *	case NFS3_OK:
+ *		FSSTAT3resok	resok;
+ *	default:
+ *		FSSTAT3resfail	resfail;
+ *	};
+ */
+static int decode_fsstat3resok(struct xdr_stream *xdr,
+			       struct nfs_fsstat *result)
+{
+	__be32 *p;
+
+	p = xdr_inline_decode(xdr, 8 * 6 + 4);
+	if (unlikely(p == NULL))
+		goto out_overflow;
+	p = xdr_decode_size3(p, &result->tbytes);
+	p = xdr_decode_size3(p, &result->fbytes);
+	p = xdr_decode_size3(p, &result->abytes);
+	p = xdr_decode_size3(p, &result->tfiles);
+	p = xdr_decode_size3(p, &result->ffiles);
+	xdr_decode_size3(p, &result->afiles);
+	/* ignore invarsec */
+	return 0;
+out_overflow:
+	print_overflow_msg(__func__, xdr);
+	return -EIO;
+}
+
+static int nfs3_xdr_dec_fsstat3res(struct rpc_rqst *req, __be32 *p,
+				   struct nfs_fsstat *result)
+{
+	struct xdr_stream xdr;
+	enum nfs_stat status;
+	int error;
+
+	xdr_init_decode(&xdr, &req->rq_rcv_buf, p);
+	error = decode_nfsstat3(&xdr, &status);
+	if (unlikely(error))
+		goto out;
+	error = decode_post_op_attr(&xdr, result->fattr);
+	if (unlikely(error))
+		goto out;
+	if (status != NFS3_OK)
+		goto out_status;
+	error = decode_fsstat3resok(&xdr, result);
+out:
+	return error;
+out_status:
+	return nfs_stat_to_errno(status);
+}
+
 /*
  * Decode FSINFO reply
  */
@@ -1561,6 +2659,83 @@ nfs3_xdr_fsinfores(struct rpc_rqst *req, __be32 *p, struct nfs_fsinfo *res)
 	return 0;
 }
 
+/*
+ * 3.3.19  FSINFO3res
+ *
+ *	struct FSINFO3resok {
+ *		post_op_attr	obj_attributes;
+ *		uint32		rtmax;
+ *		uint32		rtpref;
+ *		uint32		rtmult;
+ *		uint32		wtmax;
+ *		uint32		wtpref;
+ *		uint32		wtmult;
+ *		uint32		dtpref;
+ *		size3		maxfilesize;
+ *		nfstime3	time_delta;
+ *		uint32		properties;
+ *	};
+ *
+ *	struct FSINFO3resfail {
+ *		post_op_attr	obj_attributes;
+ *	};
+ *
+ *	union FSINFO3res switch (nfsstat3 status) {
+ *	case NFS3_OK:
+ *		FSINFO3resok	resok;
+ *	default:
+ *		FSINFO3resfail	resfail;
+ *	};
+ */
+static int decode_fsinfo3resok(struct xdr_stream *xdr,
+			       struct nfs_fsinfo *result)
+{
+	__be32 *p;
+
+	p = xdr_inline_decode(xdr, 4 * 7 + 8 + 8 + 4);
+	if (unlikely(p == NULL))
+		goto out_overflow;
+	result->rtmax  = be32_to_cpup(p++);
+	result->rtpref = be32_to_cpup(p++);
+	result->rtmult = be32_to_cpup(p++);
+	result->wtmax  = be32_to_cpup(p++);
+	result->wtpref = be32_to_cpup(p++);
+	result->wtmult = be32_to_cpup(p++);
+	result->dtpref = be32_to_cpup(p++);
+	p = xdr_decode_size3(p, &result->maxfilesize);
+	xdr_decode_time3(p, &result->time_delta);
+
+	/* ignore properties */
+	result->lease_time = 0;
+	return 0;
+out_overflow:
+	print_overflow_msg(__func__, xdr);
+	return -EIO;
+}
+
+static int nfs3_xdr_dec_fsinfo3res(struct rpc_rqst *req, __be32 *p,
+				   struct nfs_fsinfo *result)
+{
+	struct xdr_stream xdr;
+	enum nfs_stat status;
+	int error;
+
+	xdr_init_decode(&xdr, &req->rq_rcv_buf, p);
+	error = decode_nfsstat3(&xdr, &status);
+	if (unlikely(error))
+		goto out;
+	error = decode_post_op_attr(&xdr, result->fattr);
+	if (unlikely(error))
+		goto out;
+	if (status != NFS3_OK)
+		goto out_status;
+	error = decode_fsinfo3resok(&xdr, result);
+out:
+	return error;
+out_status:
+	return nfs_stat_to_errno(status);
+}
+
 /*
  * Decode PATHCONF reply
  */
@@ -1581,6 +2756,70 @@ nfs3_xdr_pathconfres(struct rpc_rqst *req, __be32 *p, struct nfs_pathconf *res)
 	return 0;
 }
 
+/*
+ * 3.3.20  PATHCONF3res
+ *
+ *	struct PATHCONF3resok {
+ *		post_op_attr	obj_attributes;
+ *		uint32		linkmax;
+ *		uint32		name_max;
+ *		bool		no_trunc;
+ *		bool		chown_restricted;
+ *		bool		case_insensitive;
+ *		bool		case_preserving;
+ *	};
+ *
+ *	struct PATHCONF3resfail {
+ *		post_op_attr	obj_attributes;
+ *	};
+ *
+ *	union PATHCONF3res switch (nfsstat3 status) {
+ *	case NFS3_OK:
+ *		PATHCONF3resok	resok;
+ *	default:
+ *		PATHCONF3resfail resfail;
+ *	};
+ */
+static int decode_pathconf3resok(struct xdr_stream *xdr,
+				 struct nfs_pathconf *result)
+{
+	__be32 *p;
+
+	p = xdr_inline_decode(xdr, 4 * 6);
+	if (unlikely(p == NULL))
+		goto out_overflow;
+	result->max_link = be32_to_cpup(p++);
+	result->max_namelen = be32_to_cpup(p);
+	/* ignore remaining fields */
+	return 0;
+out_overflow:
+	print_overflow_msg(__func__, xdr);
+	return -EIO;
+}
+
+static int nfs3_xdr_dec_pathconf3res(struct rpc_rqst *req, __be32 *p,
+				     struct nfs_pathconf *result)
+{
+	struct xdr_stream xdr;
+	enum nfs_stat status;
+	int error;
+
+	xdr_init_decode(&xdr, &req->rq_rcv_buf, p);
+	error = decode_nfsstat3(&xdr, &status);
+	if (unlikely(error))
+		goto out;
+	error = decode_post_op_attr(&xdr, result->fattr);
+	if (unlikely(error))
+		goto out;
+	if (status != NFS3_OK)
+		goto out_status;
+	error = decode_pathconf3resok(&xdr, result);
+out:
+	return error;
+out_status:
+	return nfs_stat_to_errno(status);
+}
+
 /*
  * Decode COMMIT reply
  */
@@ -1599,6 +2838,48 @@ nfs3_xdr_commitres(struct rpc_rqst *req, __be32 *p, struct nfs_writeres *res)
 	return 0;
 }
 
+/*
+ * 3.3.21  COMMIT3res
+ *
+ *	struct COMMIT3resok {
+ *		wcc_data	file_wcc;
+ *		writeverf3	verf;
+ *	};
+ *
+ *	struct COMMIT3resfail {
+ *		wcc_data	file_wcc;
+ *	};
+ *
+ *	union COMMIT3res switch (nfsstat3 status) {
+ *	case NFS3_OK:
+ *		COMMIT3resok	resok;
+ *	default:
+ *		COMMIT3resfail	resfail;
+ *	};
+ */
+static int nfs3_xdr_dec_commit3res(struct rpc_rqst *req, __be32 *p,
+				   struct nfs_writeres *result)
+{
+	struct xdr_stream xdr;
+	enum nfs_stat status;
+	int error;
+
+	xdr_init_decode(&xdr, &req->rq_rcv_buf, p);
+	error = decode_nfsstat3(&xdr, &status);
+	if (unlikely(error))
+		goto out;
+	error = decode_wcc_data(&xdr, result->fattr);
+	if (unlikely(error))
+		goto out;
+	if (status != NFS3_OK)
+		goto out_status;
+	error = decode_writeverf3(&xdr, result->verf->verifier);
+out:
+	return error;
+out_status:
+	return nfs_stat_to_errno(status);
+}
+
 #ifdef CONFIG_NFS_V3_ACL
 /*
  * Decode GETACL reply
@@ -1632,6 +2913,70 @@ nfs3_xdr_getaclres(struct rpc_rqst *req, __be32 *p,
 	return (err > 0) ? 0 : err;
 }
 
+static inline int decode_getacl3resok(struct xdr_stream *xdr,
+				      struct nfs3_getaclres *result)
+{
+	struct posix_acl **acl;
+	unsigned int *aclcnt;
+	size_t hdrlen;
+	int error;
+
+	error = decode_post_op_attr(xdr, result->fattr);
+	if (unlikely(error))
+		goto out;
+	error = decode_uint32(xdr, &result->mask);
+	if (unlikely(error))
+		goto out;
+	error = -EINVAL;
+	if (result->mask & ~(NFS_ACL|NFS_ACLCNT|NFS_DFACL|NFS_DFACLCNT))
+		goto out;
+
+	hdrlen = (u8 *)xdr->p - (u8 *)xdr->iov->iov_base;
+
+	acl = NULL;
+	if (result->mask & NFS_ACL)
+		acl = &result->acl_access;
+	aclcnt = NULL;
+	if (result->mask & NFS_ACLCNT)
+		aclcnt = &result->acl_access_count;
+	error = nfsacl_decode(xdr->buf, hdrlen, aclcnt, acl);
+	if (unlikely(error <= 0))
+		goto out;
+
+	acl = NULL;
+	if (result->mask & NFS_DFACL)
+		acl = &result->acl_default;
+	aclcnt = NULL;
+	if (result->mask & NFS_DFACLCNT)
+		aclcnt = &result->acl_default_count;
+	error = nfsacl_decode(xdr->buf, hdrlen + error, aclcnt, acl);
+	if (unlikely(error <= 0))
+		return error;
+	error = 0;
+out:
+	return error;
+}
+
+static int nfs3_xdr_dec_getacl3res(struct rpc_rqst *req, __be32 *p,
+				   struct nfs3_getaclres *result)
+{
+	struct xdr_stream xdr;
+	enum nfs_stat status;
+	int error;
+
+	xdr_init_decode(&xdr, &req->rq_rcv_buf, p);
+	error = decode_nfsstat3(&xdr, &status);
+	if (unlikely(error))
+		goto out;
+	if (status != NFS3_OK)
+		goto out_default;
+	error = decode_getacl3resok(&xdr, result);
+out:
+	return error;
+out_default:
+	return nfs_stat_to_errno(status);
+}
+
 /*
  * Decode setacl reply.
  */
@@ -1645,6 +2990,27 @@ nfs3_xdr_setaclres(struct rpc_rqst *req, __be32 *p, struct nfs_fattr *fattr)
 	xdr_decode_post_op_attr(p, fattr);
 	return 0;
 }
+
+static int nfs3_xdr_dec_setacl3res(struct rpc_rqst *req, __be32 *p,
+				   struct nfs_fattr *result)
+{
+	struct xdr_stream xdr;
+	enum nfs_stat status;
+	int error;
+
+	xdr_init_decode(&xdr, &req->rq_rcv_buf, p);
+	error = decode_nfsstat3(&xdr, &status);
+	if (unlikely(error))
+		goto out;
+	if (status != NFS3_OK)
+		goto out_default;
+	error = decode_post_op_attr(&xdr, result);
+out:
+	return error;
+out_default:
+	return nfs_stat_to_errno(status);
+}
+
 #endif  /* CONFIG_NFS_V3_ACL */
 
 #define PROC(proc, argtype, restype, timer)				\
diff --git a/include/linux/nfs3.h b/include/linux/nfs3.h
index e2ee14fd0083..6ccfe3b641e1 100644
--- a/include/linux/nfs3.h
+++ b/include/linux/nfs3.h
@@ -13,6 +13,7 @@
 #define NFS3_COOKIESIZE		4
 #define NFS3_CREATEVERFSIZE	8
 #define NFS3_COOKIEVERFSIZE	8
+#define NFS3_WRITEVERFSIZE	8
 #define NFS3_FIFO_DEV		(-1)
 #define NFS3MODE_FMT		0170000
 #define NFS3MODE_DIR		0040000
-- 
cgit v1.2.3-71-gd317


From d8367c504e39528a057a5d7a267b6724f7fdb4b8 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Tue, 14 Dec 2010 14:57:52 +0000
Subject: lockd: Move nlmdbg_cookie2a() to svclock.c

Clean up.  nlmdbg_cookie2a() is used only in svclock.c.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Tested-by: J. Bruce Fields <bfields@redhat.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/lockd/svclock.c          | 30 ++++++++++++++++++++++++++++++
 fs/lockd/xdr.c              | 29 -----------------------------
 include/linux/lockd/debug.h | 10 ----------
 3 files changed, 30 insertions(+), 39 deletions(-)

(limited to 'include/linux')

diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c
index ef5659b211e9..9266c4600208 100644
--- a/fs/lockd/svclock.c
+++ b/fs/lockd/svclock.c
@@ -46,6 +46,7 @@ static void	nlmsvc_remove_block(struct nlm_block *block);
 static int nlmsvc_setgrantargs(struct nlm_rqst *call, struct nlm_lock *lock);
 static void nlmsvc_freegrantargs(struct nlm_rqst *call);
 static const struct rpc_call_ops nlmsvc_grant_ops;
+static const char *nlmdbg_cookie2a(const struct nlm_cookie *cookie);
 
 /*
  * The list of blocked locks to retry
@@ -934,3 +935,32 @@ nlmsvc_retry_blocked(void)
 
 	return timeout;
 }
+
+#ifdef RPC_DEBUG
+static const char *nlmdbg_cookie2a(const struct nlm_cookie *cookie)
+{
+	/*
+	 * We can get away with a static buffer because we're only
+	 * called with BKL held.
+	 */
+	static char buf[2*NLM_MAXCOOKIELEN+1];
+	unsigned int i, len = sizeof(buf);
+	char *p = buf;
+
+	len--;	/* allow for trailing \0 */
+	if (len < 3)
+		return "???";
+	for (i = 0 ; i < cookie->len ; i++) {
+		if (len < 2) {
+			strcpy(p-3, "...");
+			break;
+		}
+		sprintf(p, "%02x", cookie->data[i]);
+		p += 2;
+		len -= 2;
+	}
+	*p = '\0';
+
+	return buf;
+}
+#endif
diff --git a/fs/lockd/xdr.c b/fs/lockd/xdr.c
index 0eb694dc497b..964666c68a86 100644
--- a/fs/lockd/xdr.c
+++ b/fs/lockd/xdr.c
@@ -341,32 +341,3 @@ nlmsvc_encode_void(struct svc_rqst *rqstp, __be32 *p, void *dummy)
 {
 	return xdr_ressize_check(rqstp, p);
 }
-
-#ifdef RPC_DEBUG
-const char *nlmdbg_cookie2a(const struct nlm_cookie *cookie)
-{
-	/*
-	 * We can get away with a static buffer because we're only
-	 * called with BKL held.
-	 */
-	static char buf[2*NLM_MAXCOOKIELEN+1];
-	unsigned int i, len = sizeof(buf);
-	char *p = buf;
-
-	len--;	/* allow for trailing \0 */
-	if (len < 3)
-		return "???";
-	for (i = 0 ; i < cookie->len ; i++) {
-		if (len < 2) {
-			strcpy(p-3, "...");
-			break;
-		}
-		sprintf(p, "%02x", cookie->data[i]);
-		p += 2;
-		len -= 2;
-	}
-	*p = '\0';
-
-	return buf;
-}
-#endif
diff --git a/include/linux/lockd/debug.h b/include/linux/lockd/debug.h
index 34b2b7f33c3b..257d3779f2ab 100644
--- a/include/linux/lockd/debug.h
+++ b/include/linux/lockd/debug.h
@@ -44,14 +44,4 @@
 #define NLMDBG_XDR		0x0100
 #define NLMDBG_ALL		0x7fff
 
-
-/*
- * Support for printing NLM cookies in dprintk()
- */
-#ifdef RPC_DEBUG
-struct nlm_cookie;
-/* Call this function with the BKL held (it uses a static buffer) */
-extern const char *nlmdbg_cookie2a(const struct nlm_cookie *);
-#endif
-
 #endif /* LINUX_LOCKD_DEBUG_H */
-- 
cgit v1.2.3-71-gd317


From 573c4e1ef53a6b891b73cc2257e1604da754a2e4 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Tue, 14 Dec 2010 14:58:11 +0000
Subject: NFS: Simplify ->decode_dirent() calling sequence

Clean up.

The pointer returned by ->decode_dirent() is no longer used as a
pointer.  The only call site (xdr_decode() in fs/nfs/dir.c) simply
extracts the errno value encoded in the pointer.  Replace the
returned pointer with a standard integer errno return value.

Also, pass the "server" argument as part of the nfs_entry instead of
as a separate parameter.  It's faster to derive "server" in
nfs_readdir_xdr_to_array() since we already have the directory's inode
handy.  "server" ought to be invariant for a set of entries in the
same directory, right?

The legacy versions of decode_dirent() don't use "server" anyway, so
it's wasted work for them to derive and pass "server" for each entry.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Tested-by: J. Bruce Fields <bfields@redhat.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/dir.c            | 15 ++++++++-------
 fs/nfs/internal.h       |  9 ++++++---
 fs/nfs/nfs2xdr.c        | 18 +++++++++---------
 fs/nfs/nfs3xdr.c        | 28 ++++++++++++++--------------
 fs/nfs/nfs4_fs.h        |  1 -
 fs/nfs/nfs4xdr.c        | 29 ++++++++++++++++++++++-------
 include/linux/nfs_xdr.h |  3 ++-
 7 files changed, 61 insertions(+), 42 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 996dd8989a91..3e2123fe79f5 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -172,7 +172,7 @@ struct nfs_cache_array {
 	struct nfs_cache_array_entry array[0];
 };
 
-typedef __be32 * (*decode_dirent_t)(struct xdr_stream *, struct nfs_entry *, struct nfs_server *, int);
+typedef int (*decode_dirent_t)(struct xdr_stream *, struct nfs_entry *, int);
 typedef struct {
 	struct file	*file;
 	struct page	*page;
@@ -378,14 +378,14 @@ error:
 	return error;
 }
 
-/* Fill in an entry based on the xdr code stored in desc->page */
-static
-int xdr_decode(nfs_readdir_descriptor_t *desc, struct nfs_entry *entry, struct xdr_stream *stream)
+static int xdr_decode(nfs_readdir_descriptor_t *desc,
+		      struct nfs_entry *entry, struct xdr_stream *xdr)
 {
-	__be32 *p = desc->decode(stream, entry, NFS_SERVER(desc->file->f_path.dentry->d_inode), desc->plus);
-	if (IS_ERR(p))
-		return PTR_ERR(p);
+	int error;
 
+	error = desc->decode(xdr, entry, desc->plus);
+	if (error)
+		return error;
 	entry->fattr->time_start = desc->timestamp;
 	entry->fattr->gencount = desc->gencount;
 	return 0;
@@ -566,6 +566,7 @@ int nfs_readdir_xdr_to_array(nfs_readdir_descriptor_t *desc, struct page *page,
 	entry.eof = 0;
 	entry.fh = nfs_alloc_fhandle();
 	entry.fattr = nfs_alloc_fattr();
+	entry.server = NFS_SERVER(inode);
 	if (entry.fh == NULL || entry.fattr == NULL)
 		goto out;
 
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index 6c6a9955bae9..435eae3666bd 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -187,15 +187,18 @@ extern void nfs_destroy_directcache(void);
 /* nfs2xdr.c */
 extern int nfs_stat_to_errno(enum nfs_stat);
 extern struct rpc_procinfo nfs_procedures[];
-extern __be32 *nfs2_decode_dirent(struct xdr_stream *, struct nfs_entry *, struct nfs_server *, int);
+extern int nfs2_decode_dirent(struct xdr_stream *,
+				struct nfs_entry *, int);
 
 /* nfs3xdr.c */
 extern struct rpc_procinfo nfs3_procedures[];
-extern __be32 *nfs3_decode_dirent(struct xdr_stream *, struct nfs_entry *, struct nfs_server *, int);
+extern int nfs3_decode_dirent(struct xdr_stream *,
+				struct nfs_entry *, int);
 
 /* nfs4xdr.c */
 #ifdef CONFIG_NFS_V4
-extern __be32 *nfs4_decode_dirent(struct xdr_stream *, struct nfs_entry *, struct nfs_server *, int);
+extern int nfs4_decode_dirent(struct xdr_stream *,
+				struct nfs_entry *, int);
 #endif
 #ifdef CONFIG_NFS_V4_1
 extern const u32 nfs41_maxread_overhead;
diff --git a/fs/nfs/nfs2xdr.c b/fs/nfs/nfs2xdr.c
index 0343175fe6c0..a9b848edbd2e 100644
--- a/fs/nfs/nfs2xdr.c
+++ b/fs/nfs/nfs2xdr.c
@@ -936,10 +936,10 @@ static int nfs2_xdr_dec_writeres(struct rpc_rqst *req, __be32 *p,
  *                      the local page cache.
  * @xdr: XDR stream where entry resides
  * @entry: buffer to fill in with entry data
- * @server: nfs_server data for this directory
  * @plus: boolean indicating whether this should be a readdirplus entry
  *
- * Returns the position of the next item in the buffer, or an ERR_PTR.
+ * Returns zero if successful, otherwise a negative errno value is
+ * returned.
  *
  * This function is not invoked during READDIR reply decoding, but
  * rather whenever an application invokes the getdents(2) system call
@@ -954,8 +954,8 @@ static int nfs2_xdr_dec_writeres(struct rpc_rqst *req, __be32 *p,
  *		entry		*nextentry;
  *	};
  */
-__be32 *nfs2_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry,
-			   struct nfs_server *server, int plus)
+int nfs2_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry,
+		       int plus)
 {
 	__be32 *p;
 	int error;
@@ -968,9 +968,9 @@ __be32 *nfs2_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry,
 		if (unlikely(p == NULL))
 			goto out_overflow;
 		if (*p++ == xdr_zero)
-			return ERR_PTR(-EAGAIN);
+			return -EAGAIN;
 		entry->eof = 1;
-		return ERR_PTR(-EBADCOOKIE);
+		return -EBADCOOKIE;
 	}
 
 	p = xdr_inline_decode(xdr, 4);
@@ -980,7 +980,7 @@ __be32 *nfs2_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry,
 
 	error = decode_filename_inline(xdr, &entry->name, &entry->len);
 	if (unlikely(error))
-		return ERR_PTR(error);
+		return error;
 
 	/*
 	 * The type (size and byte order) of nfscookie isn't defined in
@@ -999,11 +999,11 @@ __be32 *nfs2_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry,
 	entry->eof = 0;
 	if (p != NULL)
 		entry->eof = (p[0] == xdr_zero) && (p[1] != xdr_zero);
-	return p;
+	return 0;
 
 out_overflow:
 	print_overflow_msg(__func__, xdr);
-	return ERR_PTR(-EAGAIN);
+	return -EAGAIN;
 }
 
 /*
diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c
index c97d00fe849a..15c93ccd90c5 100644
--- a/fs/nfs/nfs3xdr.c
+++ b/fs/nfs/nfs3xdr.c
@@ -1970,10 +1970,10 @@ out_status:
  *			the local page cache
  * @xdr: XDR stream where entry resides
  * @entry: buffer to fill in with entry data
- * @server: nfs_server data for this directory
  * @plus: boolean indicating whether this should be a readdirplus entry
  *
- * Returns the position of the next item in the buffer, or an ERR_PTR.
+ * Returns zero if successful, otherwise a negative errno value is
+ * returned.
  *
  * This function is not invoked during READDIR reply decoding, but
  * rather whenever an application invokes the getdents(2) system call
@@ -2000,8 +2000,8 @@ out_status:
  *		entryplus3	*nextentry;
  *	};
  */
-__be32 *nfs3_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry,
-			   struct nfs_server *server, int plus)
+int nfs3_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry,
+		       int plus)
 {
 	struct nfs_entry old = *entry;
 	__be32 *p;
@@ -2015,23 +2015,23 @@ __be32 *nfs3_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry,
 		if (unlikely(p == NULL))
 			goto out_overflow;
 		if (*p == xdr_zero)
-			return ERR_PTR(-EAGAIN);
+			return -EAGAIN;
 		entry->eof = 1;
-		return ERR_PTR(-EBADCOOKIE);
+		return -EBADCOOKIE;
 	}
 
 	error = decode_fileid3(xdr, &entry->ino);
 	if (unlikely(error))
-		return ERR_PTR(error);
+		return error;
 
 	error = decode_inline_filename3(xdr, &entry->name, &entry->len);
 	if (unlikely(error))
-		return ERR_PTR(error);
+		return error;
 
 	entry->prev_cookie = entry->cookie;
 	error = decode_cookie3(xdr, &entry->cookie);
 	if (unlikely(error))
-		return ERR_PTR(error);
+		return error;
 
 	entry->d_type = DT_UNKNOWN;
 
@@ -2039,7 +2039,7 @@ __be32 *nfs3_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry,
 		entry->fattr->valid = 0;
 		error = decode_post_op_attr(xdr, entry->fattr);
 		if (unlikely(error))
-			return ERR_PTR(error);
+			return error;
 		if (entry->fattr->valid & NFS_ATTR_FATTR_V3)
 			entry->d_type = nfs_umode_to_dtype(entry->fattr->mode);
 
@@ -2052,7 +2052,7 @@ __be32 *nfs3_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry,
 			if (unlikely(error)) {
 				if (error == -E2BIG)
 					goto out_truncated;
-				return ERR_PTR(error);
+				return error;
 			}
 		} else
 			zero_nfs_fh3(entry->fh);
@@ -2063,15 +2063,15 @@ __be32 *nfs3_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry,
 	entry->eof = 0;
 	if (p != NULL)
 		entry->eof = (p[0] == xdr_zero) && (p[1] != xdr_zero);
-	return p;
+	return 0;
 
 out_overflow:
 	print_overflow_msg(__func__, xdr);
-	return ERR_PTR(-EAGAIN);
+	return -EAGAIN;
 out_truncated:
 	dprintk("NFS: directory entry contains invalid file handle\n");
 	*entry = old;
-	return ERR_PTR(-EAGAIN);
+	return -EAGAIN;
 }
 
 /*
diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
index 9fa496387fdf..7a6eecffcaeb 100644
--- a/fs/nfs/nfs4_fs.h
+++ b/fs/nfs/nfs4_fs.h
@@ -331,7 +331,6 @@ extern void nfs_free_seqid(struct nfs_seqid *seqid);
 extern const nfs4_stateid zero_stateid;
 
 /* nfs4xdr.c */
-extern __be32 *nfs4_decode_dirent(struct xdr_stream *, struct nfs_entry *, struct nfs_server *, int);
 extern struct rpc_procinfo nfs4_procedures[];
 
 struct nfs4_mount_data;
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 868815c55450..be9f00ab0d18 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -6159,8 +6159,22 @@ out:
 }
 #endif /* CONFIG_NFS_V4_1 */
 
-__be32 *nfs4_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry,
-			   struct nfs_server *server, int plus)
+/**
+ * nfs4_decode_dirent - Decode a single NFSv4 directory entry stored in
+ *                      the local page cache.
+ * @xdr: XDR stream where entry resides
+ * @entry: buffer to fill in with entry data
+ * @plus: boolean indicating whether this should be a readdirplus entry
+ *
+ * Returns zero if successful, otherwise a negative errno value is
+ * returned.
+ *
+ * This function is not invoked during READDIR reply decoding, but
+ * rather whenever an application invokes the getdents(2) system call
+ * on a directory already in our cache.
+ */
+int nfs4_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry,
+		       int plus)
 {
 	uint32_t bitmap[2] = {0};
 	uint32_t len;
@@ -6172,9 +6186,9 @@ __be32 *nfs4_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry,
 		if (unlikely(!p))
 			goto out_overflow;
 		if (!ntohl(*p++))
-			return ERR_PTR(-EAGAIN);
+			return -EAGAIN;
 		entry->eof = 1;
-		return ERR_PTR(-EBADCOOKIE);
+		return -EBADCOOKIE;
 	}
 
 	p = xdr_inline_decode(xdr, 12);
@@ -6203,7 +6217,8 @@ __be32 *nfs4_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry,
 	if (decode_attr_length(xdr, &len, &p) < 0)
 		goto out_overflow;
 
-	if (decode_getfattr_attrs(xdr, bitmap, entry->fattr, entry->fh, server, 1) < 0)
+	if (decode_getfattr_attrs(xdr, bitmap, entry->fattr, entry->fh,
+					entry->server, 1) < 0)
 		goto out_overflow;
 	if (entry->fattr->valid & NFS_ATTR_FATTR_FILEID)
 		entry->ino = entry->fattr->fileid;
@@ -6221,11 +6236,11 @@ __be32 *nfs4_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry,
 	else
 		entry->eof = 0;
 
-	return p;
+	return 0;
 
 out_overflow:
 	print_overflow_msg(__func__, xdr);
-	return ERR_PTR(-EAGAIN);
+	return -EAGAIN;
 }
 
 /*
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 80f07198a31a..236e7e4b99a0 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -484,6 +484,7 @@ struct nfs_entry {
 	struct nfs_fh *		fh;
 	struct nfs_fattr *	fattr;
 	unsigned char		d_type;
+	struct nfs_server *	server;
 };
 
 /*
@@ -1089,7 +1090,7 @@ struct nfs_rpc_ops {
 	int	(*pathconf) (struct nfs_server *, struct nfs_fh *,
 			     struct nfs_pathconf *);
 	int	(*set_capabilities)(struct nfs_server *, struct nfs_fh *);
-	__be32 *(*decode_dirent)(struct xdr_stream *, struct nfs_entry *, struct nfs_server *, int plus);
+	int	(*decode_dirent)(struct xdr_stream *, struct nfs_entry *, int);
 	void	(*read_setup)   (struct nfs_read_data *, struct rpc_message *);
 	int	(*read_done)  (struct rpc_task *, struct nfs_read_data *);
 	void	(*write_setup)  (struct nfs_write_data *, struct rpc_message *);
-- 
cgit v1.2.3-71-gd317


From 9f06c719f474be7003763284a990bed6377bb0d4 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Tue, 14 Dec 2010 14:59:18 +0000
Subject: SUNRPC: New xdr_streams XDR encoder API

Now that all client-side XDR encoder routines use xdr_streams, there
should be no need to support the legacy calling sequence [rpc_rqst *,
__be32 *, RPC arg *] anywhere.  We can construct an xdr_stream in the
generic RPC code, instead of in each encoder function.

Also, all the client-side encoder functions return 0 now, making a
return value superfluous.  Take this opportunity to convert them to
return void instead.

This is a refactoring change.  It should not cause different behavior.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Tested-by: J. Bruce Fields <bfields@redhat.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/lockd/clnt4xdr.c            |  92 +++---
 fs/lockd/clntxdr.c             |  92 +++---
 fs/lockd/mon.c                 |  26 +-
 fs/nfs/mount_clnt.c            |  18 +-
 fs/nfs/nfs2xdr.c               | 147 ++++-----
 fs/nfs/nfs3xdr.c               | 241 ++++++---------
 fs/nfs/nfs4xdr.c               | 663 ++++++++++++++++++-----------------------
 fs/nfsd/nfs4callback.c         |  22 +-
 include/linux/sunrpc/auth.h    |   4 +-
 include/linux/sunrpc/clnt.h    |   2 +-
 include/linux/sunrpc/xdr.h     |   9 +-
 net/sunrpc/auth.c              |  14 +-
 net/sunrpc/auth_gss/auth_gss.c |  31 +-
 net/sunrpc/clnt.c              |   5 +-
 net/sunrpc/rpcb_clnt.c         |  47 ++-
 15 files changed, 606 insertions(+), 807 deletions(-)

(limited to 'include/linux')

diff --git a/fs/lockd/clnt4xdr.c b/fs/lockd/clnt4xdr.c
index 1a1c3e21ed2c..974f1d9cd323 100644
--- a/fs/lockd/clnt4xdr.c
+++ b/fs/lockd/clnt4xdr.c
@@ -385,17 +385,15 @@ static void encode_nlm4_lock(struct xdr_stream *xdr,
  *		struct nlm4_lock alock;
  *	};
  */
-static int nlm4_xdr_enc_testargs(struct rpc_rqst *req, __be32 *p,
-				 const struct nlm_args *args)
+static void nlm4_xdr_enc_testargs(struct rpc_rqst *req,
+				  struct xdr_stream *xdr,
+				  const struct nlm_args *args)
 {
 	const struct nlm_lock *lock = &args->lock;
-	struct xdr_stream xdr;
 
-	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
-	encode_cookie(&xdr, &args->cookie);
-	encode_bool(&xdr, lock->fl.fl_type == F_WRLCK);
-	encode_nlm4_lock(&xdr, lock);
-	return 0;
+	encode_cookie(xdr, &args->cookie);
+	encode_bool(xdr, lock->fl.fl_type == F_WRLCK);
+	encode_nlm4_lock(xdr, lock);
 }
 
 /*
@@ -408,20 +406,18 @@ static int nlm4_xdr_enc_testargs(struct rpc_rqst *req, __be32 *p,
  *		int state;
  *	};
  */
-static int nlm4_xdr_enc_lockargs(struct rpc_rqst *req, __be32 *p,
-				 const struct nlm_args *args)
+static void nlm4_xdr_enc_lockargs(struct rpc_rqst *req,
+				  struct xdr_stream *xdr,
+				  const struct nlm_args *args)
 {
 	const struct nlm_lock *lock = &args->lock;
-	struct xdr_stream xdr;
 
-	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
-	encode_cookie(&xdr, &args->cookie);
-	encode_bool(&xdr, args->block);
-	encode_bool(&xdr, lock->fl.fl_type == F_WRLCK);
-	encode_nlm4_lock(&xdr, lock);
-	encode_bool(&xdr, args->reclaim);
-	encode_int32(&xdr, args->state);
-	return 0;
+	encode_cookie(xdr, &args->cookie);
+	encode_bool(xdr, args->block);
+	encode_bool(xdr, lock->fl.fl_type == F_WRLCK);
+	encode_nlm4_lock(xdr, lock);
+	encode_bool(xdr, args->reclaim);
+	encode_int32(xdr, args->state);
 }
 
 /*
@@ -432,18 +428,16 @@ static int nlm4_xdr_enc_lockargs(struct rpc_rqst *req, __be32 *p,
  *		struct nlm4_lock alock;
  *	};
  */
-static int nlm4_xdr_enc_cancargs(struct rpc_rqst *req, __be32 *p,
-				 const struct nlm_args *args)
+static void nlm4_xdr_enc_cancargs(struct rpc_rqst *req,
+				  struct xdr_stream *xdr,
+				  const struct nlm_args *args)
 {
 	const struct nlm_lock *lock = &args->lock;
-	struct xdr_stream xdr;
 
-	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
-	encode_cookie(&xdr, &args->cookie);
-	encode_bool(&xdr, args->block);
-	encode_bool(&xdr, lock->fl.fl_type == F_WRLCK);
-	encode_nlm4_lock(&xdr, lock);
-	return 0;
+	encode_cookie(xdr, &args->cookie);
+	encode_bool(xdr, args->block);
+	encode_bool(xdr, lock->fl.fl_type == F_WRLCK);
+	encode_nlm4_lock(xdr, lock);
 }
 
 /*
@@ -452,16 +446,14 @@ static int nlm4_xdr_enc_cancargs(struct rpc_rqst *req, __be32 *p,
  *		struct nlm4_lock alock;
  *	};
  */
-static int nlm4_xdr_enc_unlockargs(struct rpc_rqst *req, __be32 *p,
-				   const struct nlm_args *args)
+static void nlm4_xdr_enc_unlockargs(struct rpc_rqst *req,
+				    struct xdr_stream *xdr,
+				    const struct nlm_args *args)
 {
 	const struct nlm_lock *lock = &args->lock;
-	struct xdr_stream xdr;
 
-	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
-	encode_cookie(&xdr, &args->cookie);
-	encode_nlm4_lock(&xdr, lock);
-	return 0;
+	encode_cookie(xdr, &args->cookie);
+	encode_nlm4_lock(xdr, lock);
 }
 
 /*
@@ -470,15 +462,12 @@ static int nlm4_xdr_enc_unlockargs(struct rpc_rqst *req, __be32 *p,
  *		nlm4_stat stat;
  *	};
  */
-static int nlm4_xdr_enc_res(struct rpc_rqst *req, __be32 *p,
-			    const struct nlm_res *result)
+static void nlm4_xdr_enc_res(struct rpc_rqst *req,
+			     struct xdr_stream *xdr,
+			     const struct nlm_res *result)
 {
-	struct xdr_stream xdr;
-
-	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
-	encode_cookie(&xdr, &result->cookie);
-	encode_nlm4_stat(&xdr, result->status);
-	return 0;
+	encode_cookie(xdr, &result->cookie);
+	encode_nlm4_stat(xdr, result->status);
 }
 
 /*
@@ -494,17 +483,14 @@ static int nlm4_xdr_enc_res(struct rpc_rqst *req, __be32 *p,
  *		nlm4_testrply test_stat;
  *	};
  */
-static int nlm4_xdr_enc_testres(struct rpc_rqst *req, __be32 *p,
-				const struct nlm_res *result)
+static void nlm4_xdr_enc_testres(struct rpc_rqst *req,
+				 struct xdr_stream *xdr,
+				 const struct nlm_res *result)
 {
-	struct xdr_stream xdr;
-
-	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
-	encode_cookie(&xdr, &result->cookie);
-	encode_nlm4_stat(&xdr, result->status);
+	encode_cookie(xdr, &result->cookie);
+	encode_nlm4_stat(xdr, result->status);
 	if (result->status == nlm_lck_denied)
-		encode_nlm4_holder(&xdr, result);
-	return 0;
+		encode_nlm4_holder(xdr, result);
 }
 
 
@@ -588,7 +574,7 @@ out:
 #define PROC(proc, argtype, restype)					\
 [NLMPROC_##proc] = {							\
 	.p_proc      = NLMPROC_##proc,					\
-	.p_encode    = (kxdrproc_t)nlm4_xdr_enc_##argtype,		\
+	.p_encode    = (kxdreproc_t)nlm4_xdr_enc_##argtype,		\
 	.p_decode    = (kxdrproc_t)nlm4_xdr_dec_##restype,		\
 	.p_arglen    = NLM4_##argtype##_sz,				\
 	.p_replen    = NLM4_##restype##_sz,				\
diff --git a/fs/lockd/clntxdr.c b/fs/lockd/clntxdr.c
index 0472f2aff509..c6fda8fb1c5b 100644
--- a/fs/lockd/clntxdr.c
+++ b/fs/lockd/clntxdr.c
@@ -378,17 +378,15 @@ static void encode_nlm_lock(struct xdr_stream *xdr,
  *		struct nlm_lock alock;
  *	};
  */
-static int nlm_xdr_enc_testargs(struct rpc_rqst *req, __be32 *p,
-				const struct nlm_args *args)
+static void nlm_xdr_enc_testargs(struct rpc_rqst *req,
+				 struct xdr_stream *xdr,
+				 const struct nlm_args *args)
 {
 	const struct nlm_lock *lock = &args->lock;
-	struct xdr_stream xdr;
 
-	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
-	encode_cookie(&xdr, &args->cookie);
-	encode_bool(&xdr, lock->fl.fl_type == F_WRLCK);
-	encode_nlm_lock(&xdr, lock);
-	return 0;
+	encode_cookie(xdr, &args->cookie);
+	encode_bool(xdr, lock->fl.fl_type == F_WRLCK);
+	encode_nlm_lock(xdr, lock);
 }
 
 /*
@@ -401,20 +399,18 @@ static int nlm_xdr_enc_testargs(struct rpc_rqst *req, __be32 *p,
  *		int state;
  *	};
  */
-static int nlm_xdr_enc_lockargs(struct rpc_rqst *req, __be32 *p,
-				const struct nlm_args *args)
+static void nlm_xdr_enc_lockargs(struct rpc_rqst *req,
+				 struct xdr_stream *xdr,
+				 const struct nlm_args *args)
 {
 	const struct nlm_lock *lock = &args->lock;
-	struct xdr_stream xdr;
 
-	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
-	encode_cookie(&xdr, &args->cookie);
-	encode_bool(&xdr, args->block);
-	encode_bool(&xdr, lock->fl.fl_type == F_WRLCK);
-	encode_nlm_lock(&xdr, lock);
-	encode_bool(&xdr, args->reclaim);
-	encode_int32(&xdr, args->state);
-	return 0;
+	encode_cookie(xdr, &args->cookie);
+	encode_bool(xdr, args->block);
+	encode_bool(xdr, lock->fl.fl_type == F_WRLCK);
+	encode_nlm_lock(xdr, lock);
+	encode_bool(xdr, args->reclaim);
+	encode_int32(xdr, args->state);
 }
 
 /*
@@ -425,18 +421,16 @@ static int nlm_xdr_enc_lockargs(struct rpc_rqst *req, __be32 *p,
  *		struct nlm_lock alock;
  *	};
  */
-static int nlm_xdr_enc_cancargs(struct rpc_rqst *req, __be32 *p,
-				const struct nlm_args *args)
+static void nlm_xdr_enc_cancargs(struct rpc_rqst *req,
+				 struct xdr_stream *xdr,
+				 const struct nlm_args *args)
 {
 	const struct nlm_lock *lock = &args->lock;
-	struct xdr_stream xdr;
 
-	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
-	encode_cookie(&xdr, &args->cookie);
-	encode_bool(&xdr, args->block);
-	encode_bool(&xdr, lock->fl.fl_type == F_WRLCK);
-	encode_nlm_lock(&xdr, lock);
-	return 0;
+	encode_cookie(xdr, &args->cookie);
+	encode_bool(xdr, args->block);
+	encode_bool(xdr, lock->fl.fl_type == F_WRLCK);
+	encode_nlm_lock(xdr, lock);
 }
 
 /*
@@ -445,16 +439,14 @@ static int nlm_xdr_enc_cancargs(struct rpc_rqst *req, __be32 *p,
  *		struct nlm_lock alock;
  *	};
  */
-static int nlm_xdr_enc_unlockargs(struct rpc_rqst *req, __be32 *p,
-				  const struct nlm_args *args)
+static void nlm_xdr_enc_unlockargs(struct rpc_rqst *req,
+				   struct xdr_stream *xdr,
+				   const struct nlm_args *args)
 {
 	const struct nlm_lock *lock = &args->lock;
-	struct xdr_stream xdr;
 
-	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
-	encode_cookie(&xdr, &args->cookie);
-	encode_nlm_lock(&xdr, lock);
-	return 0;
+	encode_cookie(xdr, &args->cookie);
+	encode_nlm_lock(xdr, lock);
 }
 
 /*
@@ -463,15 +455,12 @@ static int nlm_xdr_enc_unlockargs(struct rpc_rqst *req, __be32 *p,
  *		nlm_stat stat;
  *	};
  */
-static int nlm_xdr_enc_res(struct rpc_rqst *req, __be32 *p,
-			   const struct nlm_res *result)
+static void nlm_xdr_enc_res(struct rpc_rqst *req,
+			    struct xdr_stream *xdr,
+			    const struct nlm_res *result)
 {
-	struct xdr_stream xdr;
-
-	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
-	encode_cookie(&xdr, &result->cookie);
-	encode_nlm_stat(&xdr, result->status);
-	return 0;
+	encode_cookie(xdr, &result->cookie);
+	encode_nlm_stat(xdr, result->status);
 }
 
 /*
@@ -494,16 +483,13 @@ static void encode_nlm_testrply(struct xdr_stream *xdr,
 		encode_nlm_holder(xdr, result);
 }
 
-static int nlm_xdr_enc_testres(struct rpc_rqst *req, __be32 *p,
-			       const struct nlm_res *result)
+static void nlm_xdr_enc_testres(struct rpc_rqst *req,
+				struct xdr_stream *xdr,
+				const struct nlm_res *result)
 {
-	struct xdr_stream xdr;
-
-	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
-	encode_cookie(&xdr, &result->cookie);
-	encode_nlm_stat(&xdr, result->status);
-	encode_nlm_testrply(&xdr, result);
-	return 0;
+	encode_cookie(xdr, &result->cookie);
+	encode_nlm_stat(xdr, result->status);
+	encode_nlm_testrply(xdr, result);
 }
 
 
@@ -586,7 +572,7 @@ out:
 #define PROC(proc, argtype, restype)	\
 [NLMPROC_##proc] = {							\
 	.p_proc      = NLMPROC_##proc,					\
-	.p_encode    = (kxdrproc_t)nlm_xdr_enc_##argtype,		\
+	.p_encode    = (kxdreproc_t)nlm_xdr_enc_##argtype,		\
 	.p_decode    = (kxdrproc_t)nlm_xdr_dec_##restype,		\
 	.p_arglen    = NLM_##argtype##_sz,				\
 	.p_replen    = NLM_##restype##_sz,				\
diff --git a/fs/lockd/mon.c b/fs/lockd/mon.c
index d812818d0258..baa77bc9d825 100644
--- a/fs/lockd/mon.c
+++ b/fs/lockd/mon.c
@@ -459,25 +459,17 @@ static void encode_priv(struct xdr_stream *xdr, const struct nsm_args *argp)
 	xdr_encode_opaque_fixed(p, argp->priv->data, SM_PRIV_SIZE);
 }
 
-static int xdr_enc_mon(struct rpc_rqst *req, __be32 *p,
-		       const struct nsm_args *argp)
+static void nsm_xdr_enc_mon(struct rpc_rqst *req, struct xdr_stream *xdr,
+			    const struct nsm_args *argp)
 {
-	struct xdr_stream xdr;
-
-	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
-	encode_mon_id(&xdr, argp);
-	encode_priv(&xdr, argp);
-	return 0;
+	encode_mon_id(xdr, argp);
+	encode_priv(xdr, argp);
 }
 
-static int xdr_enc_unmon(struct rpc_rqst *req, __be32 *p,
-			 const struct nsm_args *argp)
+static void nsm_xdr_enc_unmon(struct rpc_rqst *req, struct xdr_stream *xdr,
+			      const struct nsm_args *argp)
 {
-	struct xdr_stream xdr;
-
-	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
-	encode_mon_id(&xdr, argp);
-	return 0;
+	encode_mon_id(xdr, argp);
 }
 
 static int xdr_dec_stat_res(struct rpc_rqst *rqstp, __be32 *p,
@@ -524,7 +516,7 @@ static int xdr_dec_stat(struct rpc_rqst *rqstp, __be32 *p,
 static struct rpc_procinfo	nsm_procedures[] = {
 [NSMPROC_MON] = {
 		.p_proc		= NSMPROC_MON,
-		.p_encode	= (kxdrproc_t)xdr_enc_mon,
+		.p_encode	= (kxdreproc_t)nsm_xdr_enc_mon,
 		.p_decode	= (kxdrproc_t)xdr_dec_stat_res,
 		.p_arglen	= SM_mon_sz,
 		.p_replen	= SM_monres_sz,
@@ -533,7 +525,7 @@ static struct rpc_procinfo	nsm_procedures[] = {
 	},
 [NSMPROC_UNMON] = {
 		.p_proc		= NSMPROC_UNMON,
-		.p_encode	= (kxdrproc_t)xdr_enc_unmon,
+		.p_encode	= (kxdreproc_t)nsm_xdr_enc_unmon,
 		.p_decode	= (kxdrproc_t)xdr_dec_stat,
 		.p_arglen	= SM_mon_id_sz,
 		.p_replen	= SM_unmonres_sz,
diff --git a/fs/nfs/mount_clnt.c b/fs/nfs/mount_clnt.c
index 97c3ec793305..979ebd7af3cb 100644
--- a/fs/nfs/mount_clnt.c
+++ b/fs/nfs/mount_clnt.c
@@ -288,14 +288,10 @@ static void encode_mntdirpath(struct xdr_stream *xdr, const char *pathname)
 	xdr_encode_opaque(p, pathname, pathname_len);
 }
 
-static int mnt_enc_dirpath(struct rpc_rqst *req, __be32 *p,
-			   const char *dirpath)
+static void mnt_xdr_enc_dirpath(struct rpc_rqst *req, struct xdr_stream *xdr,
+				const char *dirpath)
 {
-	struct xdr_stream xdr;
-
-	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
-	encode_mntdirpath(&xdr, dirpath);
-	return 0;
+	encode_mntdirpath(xdr, dirpath);
 }
 
 /*
@@ -460,7 +456,7 @@ static int mnt_dec_mountres3(struct rpc_rqst *req, __be32 *p,
 static struct rpc_procinfo mnt_procedures[] = {
 	[MOUNTPROC_MNT] = {
 		.p_proc		= MOUNTPROC_MNT,
-		.p_encode	= (kxdrproc_t)mnt_enc_dirpath,
+		.p_encode	= (kxdreproc_t)mnt_xdr_enc_dirpath,
 		.p_decode	= (kxdrproc_t)mnt_dec_mountres,
 		.p_arglen	= MNT_enc_dirpath_sz,
 		.p_replen	= MNT_dec_mountres_sz,
@@ -469,7 +465,7 @@ static struct rpc_procinfo mnt_procedures[] = {
 	},
 	[MOUNTPROC_UMNT] = {
 		.p_proc		= MOUNTPROC_UMNT,
-		.p_encode	= (kxdrproc_t)mnt_enc_dirpath,
+		.p_encode	= (kxdreproc_t)mnt_xdr_enc_dirpath,
 		.p_arglen	= MNT_enc_dirpath_sz,
 		.p_statidx	= MOUNTPROC_UMNT,
 		.p_name		= "UMOUNT",
@@ -479,7 +475,7 @@ static struct rpc_procinfo mnt_procedures[] = {
 static struct rpc_procinfo mnt3_procedures[] = {
 	[MOUNTPROC3_MNT] = {
 		.p_proc		= MOUNTPROC3_MNT,
-		.p_encode	= (kxdrproc_t)mnt_enc_dirpath,
+		.p_encode	= (kxdreproc_t)mnt_xdr_enc_dirpath,
 		.p_decode	= (kxdrproc_t)mnt_dec_mountres3,
 		.p_arglen	= MNT_enc_dirpath_sz,
 		.p_replen	= MNT_dec_mountres3_sz,
@@ -488,7 +484,7 @@ static struct rpc_procinfo mnt3_procedures[] = {
 	},
 	[MOUNTPROC3_UMNT] = {
 		.p_proc		= MOUNTPROC3_UMNT,
-		.p_encode	= (kxdrproc_t)mnt_enc_dirpath,
+		.p_encode	= (kxdreproc_t)mnt_xdr_enc_dirpath,
 		.p_arglen	= MNT_enc_dirpath_sz,
 		.p_statidx	= MOUNTPROC3_UMNT,
 		.p_name		= "UMOUNT",
diff --git a/fs/nfs/nfs2xdr.c b/fs/nfs/nfs2xdr.c
index a9b848edbd2e..8f3acbec761f 100644
--- a/fs/nfs/nfs2xdr.c
+++ b/fs/nfs/nfs2xdr.c
@@ -558,14 +558,11 @@ out_default:
  * "NFS: Network File System Protocol Specification".
  */
 
-static int nfs2_xdr_enc_fhandle(struct rpc_rqst *req, __be32 *p,
-				const struct nfs_fh *fh)
+static void nfs2_xdr_enc_fhandle(struct rpc_rqst *req,
+				 struct xdr_stream *xdr,
+				 const struct nfs_fh *fh)
 {
-	struct xdr_stream xdr;
-
-	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
-	encode_fhandle(&xdr, fh);
-	return 0;
+	encode_fhandle(xdr, fh);
 }
 
 /*
@@ -576,37 +573,28 @@ static int nfs2_xdr_enc_fhandle(struct rpc_rqst *req, __be32 *p,
  *		sattr attributes;
  *	};
  */
-static int nfs2_xdr_enc_sattrargs(struct rpc_rqst *req, __be32 *p,
-				  const struct nfs_sattrargs *args)
+static void nfs2_xdr_enc_sattrargs(struct rpc_rqst *req,
+				   struct xdr_stream *xdr,
+				   const struct nfs_sattrargs *args)
 {
-	struct xdr_stream xdr;
-
-	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
-	encode_fhandle(&xdr, args->fh);
-	encode_sattr(&xdr, args->sattr);
-	return 0;
+	encode_fhandle(xdr, args->fh);
+	encode_sattr(xdr, args->sattr);
 }
 
-static int nfs2_xdr_enc_diropargs(struct rpc_rqst *req, __be32 *p,
-				  const struct nfs_diropargs *args)
+static void nfs2_xdr_enc_diropargs(struct rpc_rqst *req,
+				   struct xdr_stream *xdr,
+				   const struct nfs_diropargs *args)
 {
-	struct xdr_stream xdr;
-
-	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
-	encode_diropargs(&xdr, args->fh, args->name, args->len);
-	return 0;
+	encode_diropargs(xdr, args->fh, args->name, args->len);
 }
 
-static int nfs2_xdr_enc_readlinkargs(struct rpc_rqst *req, __be32 *p,
-				     const struct nfs_readlinkargs *args)
+static void nfs2_xdr_enc_readlinkargs(struct rpc_rqst *req,
+				      struct xdr_stream *xdr,
+				      const struct nfs_readlinkargs *args)
 {
-	struct xdr_stream xdr;
-
-	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
-	encode_fhandle(&xdr, args->fh);
+	encode_fhandle(xdr, args->fh);
 	prepare_reply_buffer(req, args->pages, args->pgbase,
 					args->pglen, NFS_readlinkres_sz);
-	return 0;
 }
 
 /*
@@ -634,17 +622,14 @@ static void encode_readargs(struct xdr_stream *xdr,
 	*p = cpu_to_be32(count);
 }
 
-static int nfs2_xdr_enc_readargs(struct rpc_rqst *req, __be32 *p,
-				 const struct nfs_readargs *args)
+static void nfs2_xdr_enc_readargs(struct rpc_rqst *req,
+				  struct xdr_stream *xdr,
+				  const struct nfs_readargs *args)
 {
-	struct xdr_stream xdr;
-
-	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
-	encode_readargs(&xdr, args);
+	encode_readargs(xdr, args);
 	prepare_reply_buffer(req, args->pages, args->pgbase,
 					args->count, NFS_readres_sz);
 	req->rq_rcv_buf.flags |= XDRBUF_READ;
-	return 0;
 }
 
 /*
@@ -677,15 +662,12 @@ static void encode_writeargs(struct xdr_stream *xdr,
 	xdr_write_pages(xdr, args->pages, args->pgbase, count);
 }
 
-static int nfs2_xdr_enc_writeargs(struct rpc_rqst *req, __be32 *p,
-				  const struct nfs_writeargs *args)
+static void nfs2_xdr_enc_writeargs(struct rpc_rqst *req,
+				   struct xdr_stream *xdr,
+				   const struct nfs_writeargs *args)
 {
-	struct xdr_stream xdr;
-
-	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
-	encode_writeargs(&xdr, args);
-	xdr.buf->flags |= XDRBUF_WRITE;
-	return 0;
+	encode_writeargs(xdr, args);
+	xdr->buf->flags |= XDRBUF_WRITE;
 }
 
 /*
@@ -696,25 +678,19 @@ static int nfs2_xdr_enc_writeargs(struct rpc_rqst *req, __be32 *p,
  *		sattr attributes;
  *	};
  */
-static int nfs2_xdr_enc_createargs(struct rpc_rqst *req, __be32 *p,
-				   const struct nfs_createargs *args)
+static void nfs2_xdr_enc_createargs(struct rpc_rqst *req,
+				    struct xdr_stream *xdr,
+				    const struct nfs_createargs *args)
 {
-	struct xdr_stream xdr;
-
-	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
-	encode_diropargs(&xdr, args->fh, args->name, args->len);
-	encode_sattr(&xdr, args->sattr);
-	return 0;
+	encode_diropargs(xdr, args->fh, args->name, args->len);
+	encode_sattr(xdr, args->sattr);
 }
 
-static int nfs2_xdr_enc_removeargs(struct rpc_rqst *req, __be32 *p,
-				   const struct nfs_removeargs *args)
+static void nfs2_xdr_enc_removeargs(struct rpc_rqst *req,
+				    struct xdr_stream *xdr,
+				    const struct nfs_removeargs *args)
 {
-	struct xdr_stream xdr;
-
-	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
-	encode_diropargs(&xdr, args->fh, args->name.name, args->name.len);
-	return 0;
+	encode_diropargs(xdr, args->fh, args->name.name, args->name.len);
 }
 
 /*
@@ -725,17 +701,15 @@ static int nfs2_xdr_enc_removeargs(struct rpc_rqst *req, __be32 *p,
  *		diropargs to;
  *	};
  */
-static int nfs2_xdr_enc_renameargs(struct rpc_rqst *req, __be32 *p,
-				   const struct nfs_renameargs *args)
+static void nfs2_xdr_enc_renameargs(struct rpc_rqst *req,
+				    struct xdr_stream *xdr,
+				    const struct nfs_renameargs *args)
 {
 	const struct qstr *old = args->old_name;
 	const struct qstr *new = args->new_name;
-	struct xdr_stream xdr;
 
-	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
-	encode_diropargs(&xdr, args->old_dir, old->name, old->len);
-	encode_diropargs(&xdr, args->new_dir, new->name, new->len);
-	return 0;
+	encode_diropargs(xdr, args->old_dir, old->name, old->len);
+	encode_diropargs(xdr, args->new_dir, new->name, new->len);
 }
 
 /*
@@ -746,15 +720,12 @@ static int nfs2_xdr_enc_renameargs(struct rpc_rqst *req, __be32 *p,
  *		diropargs to;
  *	};
  */
-static int nfs2_xdr_enc_linkargs(struct rpc_rqst *req, __be32 *p,
-				 const struct nfs_linkargs *args)
+static void nfs2_xdr_enc_linkargs(struct rpc_rqst *req,
+				  struct xdr_stream *xdr,
+				  const struct nfs_linkargs *args)
 {
-	struct xdr_stream xdr;
-
-	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
-	encode_fhandle(&xdr, args->fromfh);
-	encode_diropargs(&xdr, args->tofh, args->toname, args->tolen);
-	return 0;
+	encode_fhandle(xdr, args->fromfh);
+	encode_diropargs(xdr, args->tofh, args->toname, args->tolen);
 }
 
 /*
@@ -766,16 +737,13 @@ static int nfs2_xdr_enc_linkargs(struct rpc_rqst *req, __be32 *p,
  *		sattr attributes;
  *	};
  */
-static int nfs2_xdr_enc_symlinkargs(struct rpc_rqst *req, __be32 *p,
-				    const struct nfs_symlinkargs *args)
+static void nfs2_xdr_enc_symlinkargs(struct rpc_rqst *req,
+				     struct xdr_stream *xdr,
+				     const struct nfs_symlinkargs *args)
 {
-	struct xdr_stream xdr;
-
-	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
-	encode_diropargs(&xdr, args->fromfh, args->fromname, args->fromlen);
-	encode_path(&xdr, args->pages, args->pathlen);
-	encode_sattr(&xdr, args->sattr);
-	return 0;
+	encode_diropargs(xdr, args->fromfh, args->fromname, args->fromlen);
+	encode_path(xdr, args->pages, args->pathlen);
+	encode_sattr(xdr, args->sattr);
 }
 
 /*
@@ -799,16 +767,13 @@ static void encode_readdirargs(struct xdr_stream *xdr,
 	*p = cpu_to_be32(args->count);
 }
 
-static int nfs2_xdr_enc_readdirargs(struct rpc_rqst *req, __be32 *p,
-				    const struct nfs_readdirargs *args)
+static void nfs2_xdr_enc_readdirargs(struct rpc_rqst *req,
+				     struct xdr_stream *xdr,
+				     const struct nfs_readdirargs *args)
 {
-	struct xdr_stream xdr;
-
-	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
-	encode_readdirargs(&xdr, args);
+	encode_readdirargs(xdr, args);
 	prepare_reply_buffer(req, args->pages, 0,
 					args->count, NFS_readdirres_sz);
-	return 0;
 }
 
 /*
@@ -1184,7 +1149,7 @@ int nfs_stat_to_errno(enum nfs_stat status)
 #define PROC(proc, argtype, restype, timer)				\
 [NFSPROC_##proc] = {							\
 	.p_proc	    =  NFSPROC_##proc,					\
-	.p_encode   =  (kxdrproc_t)nfs2_xdr_enc_##argtype,		\
+	.p_encode   =  (kxdreproc_t)nfs2_xdr_enc_##argtype,		\
 	.p_decode   =  (kxdrproc_t)nfs2_xdr_dec_##restype,		\
 	.p_arglen   =  NFS_##argtype##_sz,				\
 	.p_replen   =  NFS_##restype##_sz,				\
diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c
index 15c93ccd90c5..ae1b1a43f05e 100644
--- a/fs/nfs/nfs3xdr.c
+++ b/fs/nfs/nfs3xdr.c
@@ -835,14 +835,11 @@ static void encode_diropargs3(struct xdr_stream *xdr, const struct nfs_fh *fh,
  *		nfs_fh3  object;
  *	};
  */
-static int nfs3_xdr_enc_getattr3args(struct rpc_rqst *req, __be32 *p,
-				     const struct nfs_fh *fh)
+static void nfs3_xdr_enc_getattr3args(struct rpc_rqst *req,
+				      struct xdr_stream *xdr,
+				      const struct nfs_fh *fh)
 {
-	struct xdr_stream xdr;
-
-	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
-	encode_nfs_fh3(&xdr, fh);
-	return 0;
+	encode_nfs_fh3(xdr, fh);
 }
 
 /*
@@ -876,16 +873,13 @@ static void encode_sattrguard3(struct xdr_stream *xdr,
 	}
 }
 
-static int nfs3_xdr_enc_setattr3args(struct rpc_rqst *req, __be32 *p,
-				     const struct nfs3_sattrargs *args)
+static void nfs3_xdr_enc_setattr3args(struct rpc_rqst *req,
+				      struct xdr_stream *xdr,
+				      const struct nfs3_sattrargs *args)
 {
-	struct xdr_stream xdr;
-
-	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
-	encode_nfs_fh3(&xdr, args->fh);
-	encode_sattr3(&xdr, args->sattr);
-	encode_sattrguard3(&xdr, args);
-	return 0;
+	encode_nfs_fh3(xdr, args->fh);
+	encode_sattr3(xdr, args->sattr);
+	encode_sattrguard3(xdr, args);
 }
 
 /*
@@ -895,14 +889,11 @@ static int nfs3_xdr_enc_setattr3args(struct rpc_rqst *req, __be32 *p,
  *		diropargs3  what;
  *	};
  */
-static int nfs3_xdr_enc_lookup3args(struct rpc_rqst *req, __be32 *p,
-				    const struct nfs3_diropargs *args)
+static void nfs3_xdr_enc_lookup3args(struct rpc_rqst *req,
+				     struct xdr_stream *xdr,
+				     const struct nfs3_diropargs *args)
 {
-	struct xdr_stream xdr;
-
-	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
-	encode_diropargs3(&xdr, args->fh, args->name, args->len);
-	return 0;
+	encode_diropargs3(xdr, args->fh, args->name, args->len);
 }
 
 /*
@@ -920,14 +911,11 @@ static void encode_access3args(struct xdr_stream *xdr,
 	encode_uint32(xdr, args->access);
 }
 
-static int nfs3_xdr_enc_access3args(struct rpc_rqst *req, __be32 *p,
-				    const struct nfs3_accessargs *args)
+static void nfs3_xdr_enc_access3args(struct rpc_rqst *req,
+				     struct xdr_stream *xdr,
+				     const struct nfs3_accessargs *args)
 {
-	struct xdr_stream xdr;
-
-	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
-	encode_access3args(&xdr, args);
-	return 0;
+	encode_access3args(xdr, args);
 }
 
 /*
@@ -937,16 +925,13 @@ static int nfs3_xdr_enc_access3args(struct rpc_rqst *req, __be32 *p,
  *		nfs_fh3	symlink;
  *	};
  */
-static int nfs3_xdr_enc_readlink3args(struct rpc_rqst *req, __be32 *p,
-				      const struct nfs3_readlinkargs *args)
+static void nfs3_xdr_enc_readlink3args(struct rpc_rqst *req,
+				       struct xdr_stream *xdr,
+				       const struct nfs3_readlinkargs *args)
 {
-	struct xdr_stream xdr;
-
-	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
-	encode_nfs_fh3(&xdr, args->fh);
+	encode_nfs_fh3(xdr, args->fh);
 	prepare_reply_buffer(req, args->pages, args->pgbase,
 					args->pglen, NFS3_readlinkres_sz);
-	return 0;
 }
 
 /*
@@ -970,17 +955,14 @@ static void encode_read3args(struct xdr_stream *xdr,
 	*p = cpu_to_be32(args->count);
 }
 
-static int nfs3_xdr_enc_read3args(struct rpc_rqst *req, __be32 *p,
-				  const struct nfs_readargs *args)
+static void nfs3_xdr_enc_read3args(struct rpc_rqst *req,
+				   struct xdr_stream *xdr,
+				   const struct nfs_readargs *args)
 {
-	struct xdr_stream xdr;
-
-	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
-	encode_read3args(&xdr, args);
+	encode_read3args(xdr, args);
 	prepare_reply_buffer(req, args->pages, args->pgbase,
 					args->count, NFS3_readres_sz);
 	req->rq_rcv_buf.flags |= XDRBUF_READ;
-	return 0;
 }
 
 /*
@@ -1015,15 +997,12 @@ static void encode_write3args(struct xdr_stream *xdr,
 	xdr_write_pages(xdr, args->pages, args->pgbase, args->count);
 }
 
-static int nfs3_xdr_enc_write3args(struct rpc_rqst *req, __be32 *p,
-				   const struct nfs_writeargs *args)
+static void nfs3_xdr_enc_write3args(struct rpc_rqst *req,
+				    struct xdr_stream *xdr,
+				    const struct nfs_writeargs *args)
 {
-	struct xdr_stream xdr;
-
-	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
-	encode_write3args(&xdr, args);
-	xdr.buf->flags |= XDRBUF_WRITE;
-	return 0;
+	encode_write3args(xdr, args);
+	xdr->buf->flags |= XDRBUF_WRITE;
 }
 
 /*
@@ -1065,15 +1044,12 @@ static void encode_createhow3(struct xdr_stream *xdr,
 	}
 }
 
-static int nfs3_xdr_enc_create3args(struct rpc_rqst *req, __be32 *p,
-				    const struct nfs3_createargs *args)
+static void nfs3_xdr_enc_create3args(struct rpc_rqst *req,
+				     struct xdr_stream *xdr,
+				     const struct nfs3_createargs *args)
 {
-	struct xdr_stream xdr;
-
-	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
-	encode_diropargs3(&xdr, args->fh, args->name, args->len);
-	encode_createhow3(&xdr, args);
-	return 0;
+	encode_diropargs3(xdr, args->fh, args->name, args->len);
+	encode_createhow3(xdr, args);
 }
 
 /*
@@ -1084,15 +1060,12 @@ static int nfs3_xdr_enc_create3args(struct rpc_rqst *req, __be32 *p,
  *		sattr3		attributes;
  *	};
  */
-static int nfs3_xdr_enc_mkdir3args(struct rpc_rqst *req, __be32 *p,
-				   const struct nfs3_mkdirargs *args)
+static void nfs3_xdr_enc_mkdir3args(struct rpc_rqst *req,
+				    struct xdr_stream *xdr,
+				    const struct nfs3_mkdirargs *args)
 {
-	struct xdr_stream xdr;
-
-	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
-	encode_diropargs3(&xdr, args->fh, args->name, args->len);
-	encode_sattr3(&xdr, args->sattr);
-	return 0;
+	encode_diropargs3(xdr, args->fh, args->name, args->len);
+	encode_sattr3(xdr, args->sattr);
 }
 
 /*
@@ -1115,15 +1088,12 @@ static void encode_symlinkdata3(struct xdr_stream *xdr,
 	encode_nfspath3(xdr, args->pages, args->pathlen);
 }
 
-static int nfs3_xdr_enc_symlink3args(struct rpc_rqst *req, __be32 *p,
-				     const struct nfs3_symlinkargs *args)
+static void nfs3_xdr_enc_symlink3args(struct rpc_rqst *req,
+				      struct xdr_stream *xdr,
+				      const struct nfs3_symlinkargs *args)
 {
-	struct xdr_stream xdr;
-
-	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
-	encode_diropargs3(&xdr, args->fromfh, args->fromname, args->fromlen);
-	encode_symlinkdata3(&xdr, args);
-	return 0;
+	encode_diropargs3(xdr, args->fromfh, args->fromname, args->fromlen);
+	encode_symlinkdata3(xdr, args);
 }
 
 /*
@@ -1178,15 +1148,12 @@ static void encode_mknoddata3(struct xdr_stream *xdr,
 	}
 }
 
-static int nfs3_xdr_enc_mknod3args(struct rpc_rqst *req, __be32 *p,
-				   const struct nfs3_mknodargs *args)
+static void nfs3_xdr_enc_mknod3args(struct rpc_rqst *req,
+				    struct xdr_stream *xdr,
+				    const struct nfs3_mknodargs *args)
 {
-	struct xdr_stream xdr;
-
-	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
-	encode_diropargs3(&xdr, args->fh, args->name, args->len);
-	encode_mknoddata3(&xdr, args);
-	return 0;
+	encode_diropargs3(xdr, args->fh, args->name, args->len);
+	encode_mknoddata3(xdr, args);
 }
 
 /*
@@ -1196,14 +1163,11 @@ static int nfs3_xdr_enc_mknod3args(struct rpc_rqst *req, __be32 *p,
  *		diropargs3  object;
  *	};
  */
-static int nfs3_xdr_enc_remove3args(struct rpc_rqst *req, __be32 *p,
-				    const struct nfs_removeargs *args)
+static void nfs3_xdr_enc_remove3args(struct rpc_rqst *req,
+				     struct xdr_stream *xdr,
+				     const struct nfs_removeargs *args)
 {
-	struct xdr_stream xdr;
-
-	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
-	encode_diropargs3(&xdr, args->fh, args->name.name, args->name.len);
-	return 0;
+	encode_diropargs3(xdr, args->fh, args->name.name, args->name.len);
 }
 
 /*
@@ -1214,17 +1178,15 @@ static int nfs3_xdr_enc_remove3args(struct rpc_rqst *req, __be32 *p,
  *		diropargs3	to;
  *	};
  */
-static int nfs3_xdr_enc_rename3args(struct rpc_rqst *req, __be32 *p,
-				    const struct nfs_renameargs *args)
+static void nfs3_xdr_enc_rename3args(struct rpc_rqst *req,
+				     struct xdr_stream *xdr,
+				     const struct nfs_renameargs *args)
 {
 	const struct qstr *old = args->old_name;
 	const struct qstr *new = args->new_name;
-	struct xdr_stream xdr;
 
-	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
-	encode_diropargs3(&xdr, args->old_dir, old->name, old->len);
-	encode_diropargs3(&xdr, args->new_dir, new->name, new->len);
-	return 0;
+	encode_diropargs3(xdr, args->old_dir, old->name, old->len);
+	encode_diropargs3(xdr, args->new_dir, new->name, new->len);
 }
 
 /*
@@ -1235,15 +1197,12 @@ static int nfs3_xdr_enc_rename3args(struct rpc_rqst *req, __be32 *p,
  *		diropargs3	link;
  *	};
  */
-static int nfs3_xdr_enc_link3args(struct rpc_rqst *req, __be32 *p,
-				  const struct nfs3_linkargs *args)
+static void nfs3_xdr_enc_link3args(struct rpc_rqst *req,
+				   struct xdr_stream *xdr,
+				   const struct nfs3_linkargs *args)
 {
-	struct xdr_stream xdr;
-
-	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
-	encode_nfs_fh3(&xdr, args->fromfh);
-	encode_diropargs3(&xdr, args->tofh, args->toname, args->tolen);
-	return 0;
+	encode_nfs_fh3(xdr, args->fromfh);
+	encode_diropargs3(xdr, args->tofh, args->toname, args->tolen);
 }
 
 /*
@@ -1269,16 +1228,13 @@ static void encode_readdir3args(struct xdr_stream *xdr,
 	*p = cpu_to_be32(args->count);
 }
 
-static int nfs3_xdr_enc_readdir3args(struct rpc_rqst *req, __be32 *p,
-				     const struct nfs3_readdirargs *args)
+static void nfs3_xdr_enc_readdir3args(struct rpc_rqst *req,
+				      struct xdr_stream *xdr,
+				      const struct nfs3_readdirargs *args)
 {
-	struct xdr_stream xdr;
-
-	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
-	encode_readdir3args(&xdr, args);
+	encode_readdir3args(xdr, args);
 	prepare_reply_buffer(req, args->pages, 0,
 				args->count, NFS3_readdirres_sz);
-	return 0;
 }
 
 /*
@@ -1312,16 +1268,13 @@ static void encode_readdirplus3args(struct xdr_stream *xdr,
 	*p = cpu_to_be32(args->count);
 }
 
-static int nfs3_xdr_enc_readdirplus3args(struct rpc_rqst *req, __be32 *p,
-					 const struct nfs3_readdirargs *args)
+static void nfs3_xdr_enc_readdirplus3args(struct rpc_rqst *req,
+					  struct xdr_stream *xdr,
+					  const struct nfs3_readdirargs *args)
 {
-	struct xdr_stream xdr;
-
-	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
-	encode_readdirplus3args(&xdr, args);
+	encode_readdirplus3args(xdr, args);
 	prepare_reply_buffer(req, args->pages, 0,
 				args->count, NFS3_readdirres_sz);
-	return 0;
 }
 
 /*
@@ -1345,57 +1298,49 @@ static void encode_commit3args(struct xdr_stream *xdr,
 	*p = cpu_to_be32(args->count);
 }
 
-static int nfs3_xdr_enc_commit3args(struct rpc_rqst *req, __be32 *p,
-				    const struct nfs_writeargs *args)
+static void nfs3_xdr_enc_commit3args(struct rpc_rqst *req,
+				     struct xdr_stream *xdr,
+				     const struct nfs_writeargs *args)
 {
-	struct xdr_stream xdr;
-
-	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
-	encode_commit3args(&xdr, args);
-	return 0;
+	encode_commit3args(xdr, args);
 }
 
 #ifdef CONFIG_NFS_V3_ACL
 
-static int nfs3_xdr_enc_getacl3args(struct rpc_rqst *req, __be32 *p,
-				    const struct nfs3_getaclargs *args)
+static void nfs3_xdr_enc_getacl3args(struct rpc_rqst *req,
+				     struct xdr_stream *xdr,
+				     const struct nfs3_getaclargs *args)
 {
-	struct xdr_stream xdr;
-
-	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
-	encode_nfs_fh3(&xdr, args->fh);
-	encode_uint32(&xdr, args->mask);
+	encode_nfs_fh3(xdr, args->fh);
+	encode_uint32(xdr, args->mask);
 	if (args->mask & (NFS_ACL | NFS_DFACL))
 		prepare_reply_buffer(req, args->pages, 0,
 					NFSACL_MAXPAGES << PAGE_SHIFT,
 					ACL3_getaclres_sz);
-	return 0;
 }
 
-static int nfs3_xdr_enc_setacl3args(struct rpc_rqst *req, __be32 *p,
-				    const struct nfs3_setaclargs *args)
+static void nfs3_xdr_enc_setacl3args(struct rpc_rqst *req,
+				     struct xdr_stream *xdr,
+				     const struct nfs3_setaclargs *args)
 {
-	struct xdr_stream xdr;
 	unsigned int base;
 	int error;
 
-	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
-	encode_nfs_fh3(&xdr, NFS_FH(args->inode));
-	encode_uint32(&xdr, args->mask);
+	encode_nfs_fh3(xdr, NFS_FH(args->inode));
+	encode_uint32(xdr, args->mask);
 	if (args->npages != 0)
-		xdr_write_pages(&xdr, args->pages, 0, args->len);
+		xdr_write_pages(xdr, args->pages, 0, args->len);
 
 	base = req->rq_slen;
-	error = nfsacl_encode(xdr.buf, base, args->inode,
+	error = nfsacl_encode(xdr->buf, base, args->inode,
 			    (args->mask & NFS_ACL) ?
 			    args->acl_access : NULL, 1, 0);
 	BUG_ON(error < 0);
-	error = nfsacl_encode(xdr.buf, base + error, args->inode,
+	error = nfsacl_encode(xdr->buf, base + error, args->inode,
 			    (args->mask & NFS_DFACL) ?
 			    args->acl_default : NULL, 1,
 			    NFS_ACL_DEFAULT);
 	BUG_ON(error < 0);
-	return 0;
 }
 
 #endif  /* CONFIG_NFS_V3_ACL */
@@ -2506,7 +2451,7 @@ out_default:
 #define PROC(proc, argtype, restype, timer)				\
 [NFS3PROC_##proc] = {							\
 	.p_proc      = NFS3PROC_##proc,					\
-	.p_encode    = (kxdrproc_t)nfs3_xdr_enc_##argtype##3args,	\
+	.p_encode    = (kxdreproc_t)nfs3_xdr_enc_##argtype##3args,	\
 	.p_decode    = (kxdrproc_t)nfs3_xdr_dec_##restype##3res,	\
 	.p_arglen    = NFS3_##argtype##args_sz,				\
 	.p_replen    = NFS3_##restype##res_sz,				\
@@ -2549,7 +2494,7 @@ struct rpc_version		nfs_version3 = {
 static struct rpc_procinfo	nfs3_acl_procedures[] = {
 	[ACLPROC3_GETACL] = {
 		.p_proc = ACLPROC3_GETACL,
-		.p_encode = (kxdrproc_t)nfs3_xdr_enc_getacl3args,
+		.p_encode = (kxdreproc_t)nfs3_xdr_enc_getacl3args,
 		.p_decode = (kxdrproc_t)nfs3_xdr_dec_getacl3res,
 		.p_arglen = ACL3_getaclargs_sz,
 		.p_replen = ACL3_getaclres_sz,
@@ -2558,7 +2503,7 @@ static struct rpc_procinfo	nfs3_acl_procedures[] = {
 	},
 	[ACLPROC3_SETACL] = {
 		.p_proc = ACLPROC3_SETACL,
-		.p_encode = (kxdrproc_t)nfs3_xdr_enc_setacl3args,
+		.p_encode = (kxdreproc_t)nfs3_xdr_enc_setacl3args,
 		.p_decode = (kxdrproc_t)nfs3_xdr_dec_setacl3res,
 		.p_arglen = ACL3_setaclargs_sz,
 		.p_replen = ACL3_setaclres_sz,
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index a15fe99fea86..6ec38b3e4a3d 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -1510,7 +1510,7 @@ encode_restorefh(struct xdr_stream *xdr, struct compound_hdr *hdr)
 	hdr->replen += decode_restorefh_maxsz;
 }
 
-static int
+static void
 encode_setacl(struct xdr_stream *xdr, struct nfs_setaclargs *arg, struct compound_hdr *hdr)
 {
 	__be32 *p;
@@ -1521,14 +1521,12 @@ encode_setacl(struct xdr_stream *xdr, struct nfs_setaclargs *arg, struct compoun
 	p = reserve_space(xdr, 2*4);
 	*p++ = cpu_to_be32(1);
 	*p = cpu_to_be32(FATTR4_WORD0_ACL);
-	if (arg->acl_len % 4)
-		return -EINVAL;
+	BUG_ON(arg->acl_len % 4);
 	p = reserve_space(xdr, 4);
 	*p = cpu_to_be32(arg->acl_len);
 	xdr_write_pages(xdr, arg->acl_pages, arg->acl_pgbase, arg->acl_len);
 	hdr->nops++;
 	hdr->replen += decode_setacl_maxsz;
-	return 0;
 }
 
 static void
@@ -1833,393 +1831,362 @@ static u32 nfs4_xdr_minorversion(const struct nfs4_sequence_args *args)
 /*
  * Encode an ACCESS request
  */
-static int nfs4_xdr_enc_access(struct rpc_rqst *req, __be32 *p, const struct nfs4_accessargs *args)
+static void nfs4_xdr_enc_access(struct rpc_rqst *req, struct xdr_stream *xdr,
+				const struct nfs4_accessargs *args)
 {
-	struct xdr_stream xdr;
 	struct compound_hdr hdr = {
 		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
 	};
 
-	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
-	encode_compound_hdr(&xdr, req, &hdr);
-	encode_sequence(&xdr, &args->seq_args, &hdr);
-	encode_putfh(&xdr, args->fh, &hdr);
-	encode_access(&xdr, args->access, &hdr);
-	encode_getfattr(&xdr, args->bitmask, &hdr);
+	encode_compound_hdr(xdr, req, &hdr);
+	encode_sequence(xdr, &args->seq_args, &hdr);
+	encode_putfh(xdr, args->fh, &hdr);
+	encode_access(xdr, args->access, &hdr);
+	encode_getfattr(xdr, args->bitmask, &hdr);
 	encode_nops(&hdr);
-	return 0;
 }
 
 /*
  * Encode LOOKUP request
  */
-static int nfs4_xdr_enc_lookup(struct rpc_rqst *req, __be32 *p, const struct nfs4_lookup_arg *args)
+static void nfs4_xdr_enc_lookup(struct rpc_rqst *req, struct xdr_stream *xdr,
+				const struct nfs4_lookup_arg *args)
 {
-	struct xdr_stream xdr;
 	struct compound_hdr hdr = {
 		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
 	};
 
-	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
-	encode_compound_hdr(&xdr, req, &hdr);
-	encode_sequence(&xdr, &args->seq_args, &hdr);
-	encode_putfh(&xdr, args->dir_fh, &hdr);
-	encode_lookup(&xdr, args->name, &hdr);
-	encode_getfh(&xdr, &hdr);
-	encode_getfattr(&xdr, args->bitmask, &hdr);
+	encode_compound_hdr(xdr, req, &hdr);
+	encode_sequence(xdr, &args->seq_args, &hdr);
+	encode_putfh(xdr, args->dir_fh, &hdr);
+	encode_lookup(xdr, args->name, &hdr);
+	encode_getfh(xdr, &hdr);
+	encode_getfattr(xdr, args->bitmask, &hdr);
 	encode_nops(&hdr);
-	return 0;
 }
 
 /*
  * Encode LOOKUP_ROOT request
  */
-static int nfs4_xdr_enc_lookup_root(struct rpc_rqst *req, __be32 *p, const struct nfs4_lookup_root_arg *args)
+static void nfs4_xdr_enc_lookup_root(struct rpc_rqst *req,
+				     struct xdr_stream *xdr,
+				     const struct nfs4_lookup_root_arg *args)
 {
-	struct xdr_stream xdr;
 	struct compound_hdr hdr = {
 		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
 	};
 
-	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
-	encode_compound_hdr(&xdr, req, &hdr);
-	encode_sequence(&xdr, &args->seq_args, &hdr);
-	encode_putrootfh(&xdr, &hdr);
-	encode_getfh(&xdr, &hdr);
-	encode_getfattr(&xdr, args->bitmask, &hdr);
+	encode_compound_hdr(xdr, req, &hdr);
+	encode_sequence(xdr, &args->seq_args, &hdr);
+	encode_putrootfh(xdr, &hdr);
+	encode_getfh(xdr, &hdr);
+	encode_getfattr(xdr, args->bitmask, &hdr);
 	encode_nops(&hdr);
-	return 0;
 }
 
 /*
  * Encode REMOVE request
  */
-static int nfs4_xdr_enc_remove(struct rpc_rqst *req, __be32 *p, const struct nfs_removeargs *args)
+static void nfs4_xdr_enc_remove(struct rpc_rqst *req, struct xdr_stream *xdr,
+				const struct nfs_removeargs *args)
 {
-	struct xdr_stream xdr;
 	struct compound_hdr hdr = {
 		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
 	};
 
-	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
-	encode_compound_hdr(&xdr, req, &hdr);
-	encode_sequence(&xdr, &args->seq_args, &hdr);
-	encode_putfh(&xdr, args->fh, &hdr);
-	encode_remove(&xdr, &args->name, &hdr);
-	encode_getfattr(&xdr, args->bitmask, &hdr);
+	encode_compound_hdr(xdr, req, &hdr);
+	encode_sequence(xdr, &args->seq_args, &hdr);
+	encode_putfh(xdr, args->fh, &hdr);
+	encode_remove(xdr, &args->name, &hdr);
+	encode_getfattr(xdr, args->bitmask, &hdr);
 	encode_nops(&hdr);
-	return 0;
 }
 
 /*
  * Encode RENAME request
  */
-static int nfs4_xdr_enc_rename(struct rpc_rqst *req, __be32 *p, const struct nfs_renameargs *args)
+static void nfs4_xdr_enc_rename(struct rpc_rqst *req, struct xdr_stream *xdr,
+				const struct nfs_renameargs *args)
 {
-	struct xdr_stream xdr;
 	struct compound_hdr hdr = {
 		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
 	};
 
-	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
-	encode_compound_hdr(&xdr, req, &hdr);
-	encode_sequence(&xdr, &args->seq_args, &hdr);
-	encode_putfh(&xdr, args->old_dir, &hdr);
-	encode_savefh(&xdr, &hdr);
-	encode_putfh(&xdr, args->new_dir, &hdr);
-	encode_rename(&xdr, args->old_name, args->new_name, &hdr);
-	encode_getfattr(&xdr, args->bitmask, &hdr);
-	encode_restorefh(&xdr, &hdr);
-	encode_getfattr(&xdr, args->bitmask, &hdr);
+	encode_compound_hdr(xdr, req, &hdr);
+	encode_sequence(xdr, &args->seq_args, &hdr);
+	encode_putfh(xdr, args->old_dir, &hdr);
+	encode_savefh(xdr, &hdr);
+	encode_putfh(xdr, args->new_dir, &hdr);
+	encode_rename(xdr, args->old_name, args->new_name, &hdr);
+	encode_getfattr(xdr, args->bitmask, &hdr);
+	encode_restorefh(xdr, &hdr);
+	encode_getfattr(xdr, args->bitmask, &hdr);
 	encode_nops(&hdr);
-	return 0;
 }
 
 /*
  * Encode LINK request
  */
-static int nfs4_xdr_enc_link(struct rpc_rqst *req, __be32 *p, const struct nfs4_link_arg *args)
+static void nfs4_xdr_enc_link(struct rpc_rqst *req, struct xdr_stream *xdr,
+			     const struct nfs4_link_arg *args)
 {
-	struct xdr_stream xdr;
 	struct compound_hdr hdr = {
 		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
 	};
 
-	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
-	encode_compound_hdr(&xdr, req, &hdr);
-	encode_sequence(&xdr, &args->seq_args, &hdr);
-	encode_putfh(&xdr, args->fh, &hdr);
-	encode_savefh(&xdr, &hdr);
-	encode_putfh(&xdr, args->dir_fh, &hdr);
-	encode_link(&xdr, args->name, &hdr);
-	encode_getfattr(&xdr, args->bitmask, &hdr);
-	encode_restorefh(&xdr, &hdr);
-	encode_getfattr(&xdr, args->bitmask, &hdr);
+	encode_compound_hdr(xdr, req, &hdr);
+	encode_sequence(xdr, &args->seq_args, &hdr);
+	encode_putfh(xdr, args->fh, &hdr);
+	encode_savefh(xdr, &hdr);
+	encode_putfh(xdr, args->dir_fh, &hdr);
+	encode_link(xdr, args->name, &hdr);
+	encode_getfattr(xdr, args->bitmask, &hdr);
+	encode_restorefh(xdr, &hdr);
+	encode_getfattr(xdr, args->bitmask, &hdr);
 	encode_nops(&hdr);
-	return 0;
 }
 
 /*
  * Encode CREATE request
  */
-static int nfs4_xdr_enc_create(struct rpc_rqst *req, __be32 *p, const struct nfs4_create_arg *args)
+static void nfs4_xdr_enc_create(struct rpc_rqst *req, struct xdr_stream *xdr,
+				const struct nfs4_create_arg *args)
 {
-	struct xdr_stream xdr;
 	struct compound_hdr hdr = {
 		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
 	};
 
-	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
-	encode_compound_hdr(&xdr, req, &hdr);
-	encode_sequence(&xdr, &args->seq_args, &hdr);
-	encode_putfh(&xdr, args->dir_fh, &hdr);
-	encode_savefh(&xdr, &hdr);
-	encode_create(&xdr, args, &hdr);
-	encode_getfh(&xdr, &hdr);
-	encode_getfattr(&xdr, args->bitmask, &hdr);
-	encode_restorefh(&xdr, &hdr);
-	encode_getfattr(&xdr, args->bitmask, &hdr);
+	encode_compound_hdr(xdr, req, &hdr);
+	encode_sequence(xdr, &args->seq_args, &hdr);
+	encode_putfh(xdr, args->dir_fh, &hdr);
+	encode_savefh(xdr, &hdr);
+	encode_create(xdr, args, &hdr);
+	encode_getfh(xdr, &hdr);
+	encode_getfattr(xdr, args->bitmask, &hdr);
+	encode_restorefh(xdr, &hdr);
+	encode_getfattr(xdr, args->bitmask, &hdr);
 	encode_nops(&hdr);
-	return 0;
 }
 
 /*
  * Encode SYMLINK request
  */
-static int nfs4_xdr_enc_symlink(struct rpc_rqst *req, __be32 *p, const struct nfs4_create_arg *args)
+static void nfs4_xdr_enc_symlink(struct rpc_rqst *req, struct xdr_stream *xdr,
+				 const struct nfs4_create_arg *args)
 {
-	return nfs4_xdr_enc_create(req, p, args);
+	nfs4_xdr_enc_create(req, xdr, args);
 }
 
 /*
  * Encode GETATTR request
  */
-static int nfs4_xdr_enc_getattr(struct rpc_rqst *req, __be32 *p, const struct nfs4_getattr_arg *args)
+static void nfs4_xdr_enc_getattr(struct rpc_rqst *req, struct xdr_stream *xdr,
+				 const struct nfs4_getattr_arg *args)
 {
-	struct xdr_stream xdr;
 	struct compound_hdr hdr = {
 		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
 	};
 
-	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
-	encode_compound_hdr(&xdr, req, &hdr);
-	encode_sequence(&xdr, &args->seq_args, &hdr);
-	encode_putfh(&xdr, args->fh, &hdr);
-	encode_getfattr(&xdr, args->bitmask, &hdr);
+	encode_compound_hdr(xdr, req, &hdr);
+	encode_sequence(xdr, &args->seq_args, &hdr);
+	encode_putfh(xdr, args->fh, &hdr);
+	encode_getfattr(xdr, args->bitmask, &hdr);
 	encode_nops(&hdr);
-	return 0;
 }
 
 /*
  * Encode a CLOSE request
  */
-static int nfs4_xdr_enc_close(struct rpc_rqst *req, __be32 *p, struct nfs_closeargs *args)
+static void nfs4_xdr_enc_close(struct rpc_rqst *req, struct xdr_stream *xdr,
+			       struct nfs_closeargs *args)
 {
-	struct xdr_stream xdr;
 	struct compound_hdr hdr = {
 		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
 	};
 
-	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
-	encode_compound_hdr(&xdr, req, &hdr);
-	encode_sequence(&xdr, &args->seq_args, &hdr);
-	encode_putfh(&xdr, args->fh, &hdr);
-	encode_close(&xdr, args, &hdr);
-	encode_getfattr(&xdr, args->bitmask, &hdr);
+	encode_compound_hdr(xdr, req, &hdr);
+	encode_sequence(xdr, &args->seq_args, &hdr);
+	encode_putfh(xdr, args->fh, &hdr);
+	encode_close(xdr, args, &hdr);
+	encode_getfattr(xdr, args->bitmask, &hdr);
 	encode_nops(&hdr);
-	return 0;
 }
 
 /*
  * Encode an OPEN request
  */
-static int nfs4_xdr_enc_open(struct rpc_rqst *req, __be32 *p, struct nfs_openargs *args)
+static void nfs4_xdr_enc_open(struct rpc_rqst *req, struct xdr_stream *xdr,
+			      struct nfs_openargs *args)
 {
-	struct xdr_stream xdr;
 	struct compound_hdr hdr = {
 		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
 	};
 
-	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
-	encode_compound_hdr(&xdr, req, &hdr);
-	encode_sequence(&xdr, &args->seq_args, &hdr);
-	encode_putfh(&xdr, args->fh, &hdr);
-	encode_savefh(&xdr, &hdr);
-	encode_open(&xdr, args, &hdr);
-	encode_getfh(&xdr, &hdr);
-	encode_getfattr(&xdr, args->bitmask, &hdr);
-	encode_restorefh(&xdr, &hdr);
-	encode_getfattr(&xdr, args->bitmask, &hdr);
+	encode_compound_hdr(xdr, req, &hdr);
+	encode_sequence(xdr, &args->seq_args, &hdr);
+	encode_putfh(xdr, args->fh, &hdr);
+	encode_savefh(xdr, &hdr);
+	encode_open(xdr, args, &hdr);
+	encode_getfh(xdr, &hdr);
+	encode_getfattr(xdr, args->bitmask, &hdr);
+	encode_restorefh(xdr, &hdr);
+	encode_getfattr(xdr, args->bitmask, &hdr);
 	encode_nops(&hdr);
-	return 0;
 }
 
 /*
  * Encode an OPEN_CONFIRM request
  */
-static int nfs4_xdr_enc_open_confirm(struct rpc_rqst *req, __be32 *p, struct nfs_open_confirmargs *args)
+static void nfs4_xdr_enc_open_confirm(struct rpc_rqst *req,
+				      struct xdr_stream *xdr,
+				      struct nfs_open_confirmargs *args)
 {
-	struct xdr_stream xdr;
 	struct compound_hdr hdr = {
 		.nops   = 0,
 	};
 
-	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
-	encode_compound_hdr(&xdr, req, &hdr);
-	encode_putfh(&xdr, args->fh, &hdr);
-	encode_open_confirm(&xdr, args, &hdr);
+	encode_compound_hdr(xdr, req, &hdr);
+	encode_putfh(xdr, args->fh, &hdr);
+	encode_open_confirm(xdr, args, &hdr);
 	encode_nops(&hdr);
-	return 0;
 }
 
 /*
  * Encode an OPEN request with no attributes.
  */
-static int nfs4_xdr_enc_open_noattr(struct rpc_rqst *req, __be32 *p, struct nfs_openargs *args)
+static void nfs4_xdr_enc_open_noattr(struct rpc_rqst *req,
+				     struct xdr_stream *xdr,
+				     struct nfs_openargs *args)
 {
-	struct xdr_stream xdr;
 	struct compound_hdr hdr = {
 		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
 	};
 
-	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
-	encode_compound_hdr(&xdr, req, &hdr);
-	encode_sequence(&xdr, &args->seq_args, &hdr);
-	encode_putfh(&xdr, args->fh, &hdr);
-	encode_open(&xdr, args, &hdr);
-	encode_getfattr(&xdr, args->bitmask, &hdr);
+	encode_compound_hdr(xdr, req, &hdr);
+	encode_sequence(xdr, &args->seq_args, &hdr);
+	encode_putfh(xdr, args->fh, &hdr);
+	encode_open(xdr, args, &hdr);
+	encode_getfattr(xdr, args->bitmask, &hdr);
 	encode_nops(&hdr);
-	return 0;
 }
 
 /*
  * Encode an OPEN_DOWNGRADE request
  */
-static int nfs4_xdr_enc_open_downgrade(struct rpc_rqst *req, __be32 *p, struct nfs_closeargs *args)
+static void nfs4_xdr_enc_open_downgrade(struct rpc_rqst *req,
+					struct xdr_stream *xdr,
+					struct nfs_closeargs *args)
 {
-	struct xdr_stream xdr;
 	struct compound_hdr hdr = {
 		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
 	};
 
-	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
-	encode_compound_hdr(&xdr, req, &hdr);
-	encode_sequence(&xdr, &args->seq_args, &hdr);
-	encode_putfh(&xdr, args->fh, &hdr);
-	encode_open_downgrade(&xdr, args, &hdr);
-	encode_getfattr(&xdr, args->bitmask, &hdr);
+	encode_compound_hdr(xdr, req, &hdr);
+	encode_sequence(xdr, &args->seq_args, &hdr);
+	encode_putfh(xdr, args->fh, &hdr);
+	encode_open_downgrade(xdr, args, &hdr);
+	encode_getfattr(xdr, args->bitmask, &hdr);
 	encode_nops(&hdr);
-	return 0;
 }
 
 /*
  * Encode a LOCK request
  */
-static int nfs4_xdr_enc_lock(struct rpc_rqst *req, __be32 *p, struct nfs_lock_args *args)
+static void nfs4_xdr_enc_lock(struct rpc_rqst *req, struct xdr_stream *xdr,
+			      struct nfs_lock_args *args)
 {
-	struct xdr_stream xdr;
 	struct compound_hdr hdr = {
 		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
 	};
 
-	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
-	encode_compound_hdr(&xdr, req, &hdr);
-	encode_sequence(&xdr, &args->seq_args, &hdr);
-	encode_putfh(&xdr, args->fh, &hdr);
-	encode_lock(&xdr, args, &hdr);
+	encode_compound_hdr(xdr, req, &hdr);
+	encode_sequence(xdr, &args->seq_args, &hdr);
+	encode_putfh(xdr, args->fh, &hdr);
+	encode_lock(xdr, args, &hdr);
 	encode_nops(&hdr);
-	return 0;
 }
 
 /*
  * Encode a LOCKT request
  */
-static int nfs4_xdr_enc_lockt(struct rpc_rqst *req, __be32 *p, struct nfs_lockt_args *args)
+static void nfs4_xdr_enc_lockt(struct rpc_rqst *req, struct xdr_stream *xdr,
+			       struct nfs_lockt_args *args)
 {
-	struct xdr_stream xdr;
 	struct compound_hdr hdr = {
 		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
 	};
 
-	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
-	encode_compound_hdr(&xdr, req, &hdr);
-	encode_sequence(&xdr, &args->seq_args, &hdr);
-	encode_putfh(&xdr, args->fh, &hdr);
-	encode_lockt(&xdr, args, &hdr);
+	encode_compound_hdr(xdr, req, &hdr);
+	encode_sequence(xdr, &args->seq_args, &hdr);
+	encode_putfh(xdr, args->fh, &hdr);
+	encode_lockt(xdr, args, &hdr);
 	encode_nops(&hdr);
-	return 0;
 }
 
 /*
  * Encode a LOCKU request
  */
-static int nfs4_xdr_enc_locku(struct rpc_rqst *req, __be32 *p, struct nfs_locku_args *args)
+static void nfs4_xdr_enc_locku(struct rpc_rqst *req, struct xdr_stream *xdr,
+			       struct nfs_locku_args *args)
 {
-	struct xdr_stream xdr;
 	struct compound_hdr hdr = {
 		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
 	};
 
-	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
-	encode_compound_hdr(&xdr, req, &hdr);
-	encode_sequence(&xdr, &args->seq_args, &hdr);
-	encode_putfh(&xdr, args->fh, &hdr);
-	encode_locku(&xdr, args, &hdr);
+	encode_compound_hdr(xdr, req, &hdr);
+	encode_sequence(xdr, &args->seq_args, &hdr);
+	encode_putfh(xdr, args->fh, &hdr);
+	encode_locku(xdr, args, &hdr);
 	encode_nops(&hdr);
-	return 0;
 }
 
-static int nfs4_xdr_enc_release_lockowner(struct rpc_rqst *req, __be32 *p, struct nfs_release_lockowner_args *args)
+static void nfs4_xdr_enc_release_lockowner(struct rpc_rqst *req,
+					   struct xdr_stream *xdr,
+					struct nfs_release_lockowner_args *args)
 {
-	struct xdr_stream xdr;
 	struct compound_hdr hdr = {
 		.minorversion = 0,
 	};
 
-	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
-	encode_compound_hdr(&xdr, req, &hdr);
-	encode_release_lockowner(&xdr, &args->lock_owner, &hdr);
+	encode_compound_hdr(xdr, req, &hdr);
+	encode_release_lockowner(xdr, &args->lock_owner, &hdr);
 	encode_nops(&hdr);
-	return 0;
 }
 
 /*
  * Encode a READLINK request
  */
-static int nfs4_xdr_enc_readlink(struct rpc_rqst *req, __be32 *p, const struct nfs4_readlink *args)
+static void nfs4_xdr_enc_readlink(struct rpc_rqst *req, struct xdr_stream *xdr,
+				  const struct nfs4_readlink *args)
 {
-	struct xdr_stream xdr;
 	struct compound_hdr hdr = {
 		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
 	};
 
-	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
-	encode_compound_hdr(&xdr, req, &hdr);
-	encode_sequence(&xdr, &args->seq_args, &hdr);
-	encode_putfh(&xdr, args->fh, &hdr);
-	encode_readlink(&xdr, args, req, &hdr);
+	encode_compound_hdr(xdr, req, &hdr);
+	encode_sequence(xdr, &args->seq_args, &hdr);
+	encode_putfh(xdr, args->fh, &hdr);
+	encode_readlink(xdr, args, req, &hdr);
 
 	xdr_inline_pages(&req->rq_rcv_buf, hdr.replen << 2, args->pages,
 			args->pgbase, args->pglen);
 	encode_nops(&hdr);
-	return 0;
 }
 
 /*
  * Encode a READDIR request
  */
-static int nfs4_xdr_enc_readdir(struct rpc_rqst *req, __be32 *p, const struct nfs4_readdir_arg *args)
+static void nfs4_xdr_enc_readdir(struct rpc_rqst *req, struct xdr_stream *xdr,
+				 const struct nfs4_readdir_arg *args)
 {
-	struct xdr_stream xdr;
 	struct compound_hdr hdr = {
 		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
 	};
 
-	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
-	encode_compound_hdr(&xdr, req, &hdr);
-	encode_sequence(&xdr, &args->seq_args, &hdr);
-	encode_putfh(&xdr, args->fh, &hdr);
-	encode_readdir(&xdr, args, req, &hdr);
+	encode_compound_hdr(xdr, req, &hdr);
+	encode_sequence(xdr, &args->seq_args, &hdr);
+	encode_putfh(xdr, args->fh, &hdr);
+	encode_readdir(xdr, args, req, &hdr);
 
 	xdr_inline_pages(&req->rq_rcv_buf, hdr.replen << 2, args->pages,
 			 args->pgbase, args->count);
@@ -2227,428 +2194,387 @@ static int nfs4_xdr_enc_readdir(struct rpc_rqst *req, __be32 *p, const struct nf
 			__func__, hdr.replen << 2, args->pages,
 			args->pgbase, args->count);
 	encode_nops(&hdr);
-	return 0;
 }
 
 /*
  * Encode a READ request
  */
-static int nfs4_xdr_enc_read(struct rpc_rqst *req, __be32 *p, struct nfs_readargs *args)
+static void nfs4_xdr_enc_read(struct rpc_rqst *req, struct xdr_stream *xdr,
+			      struct nfs_readargs *args)
 {
-	struct xdr_stream xdr;
 	struct compound_hdr hdr = {
 		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
 	};
 
-	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
-	encode_compound_hdr(&xdr, req, &hdr);
-	encode_sequence(&xdr, &args->seq_args, &hdr);
-	encode_putfh(&xdr, args->fh, &hdr);
-	encode_read(&xdr, args, &hdr);
+	encode_compound_hdr(xdr, req, &hdr);
+	encode_sequence(xdr, &args->seq_args, &hdr);
+	encode_putfh(xdr, args->fh, &hdr);
+	encode_read(xdr, args, &hdr);
 
 	xdr_inline_pages(&req->rq_rcv_buf, hdr.replen << 2,
 			 args->pages, args->pgbase, args->count);
 	req->rq_rcv_buf.flags |= XDRBUF_READ;
 	encode_nops(&hdr);
-	return 0;
 }
 
 /*
  * Encode an SETATTR request
  */
-static int nfs4_xdr_enc_setattr(struct rpc_rqst *req, __be32 *p, struct nfs_setattrargs *args)
+static void nfs4_xdr_enc_setattr(struct rpc_rqst *req, struct xdr_stream *xdr,
+				 struct nfs_setattrargs *args)
 {
-	struct xdr_stream xdr;
 	struct compound_hdr hdr = {
 		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
 	};
 
-	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
-	encode_compound_hdr(&xdr, req, &hdr);
-	encode_sequence(&xdr, &args->seq_args, &hdr);
-	encode_putfh(&xdr, args->fh, &hdr);
-	encode_setattr(&xdr, args, args->server, &hdr);
-	encode_getfattr(&xdr, args->bitmask, &hdr);
+	encode_compound_hdr(xdr, req, &hdr);
+	encode_sequence(xdr, &args->seq_args, &hdr);
+	encode_putfh(xdr, args->fh, &hdr);
+	encode_setattr(xdr, args, args->server, &hdr);
+	encode_getfattr(xdr, args->bitmask, &hdr);
 	encode_nops(&hdr);
-	return 0;
 }
 
 /*
  * Encode a GETACL request
  */
-static int
-nfs4_xdr_enc_getacl(struct rpc_rqst *req, __be32 *p,
-		struct nfs_getaclargs *args)
+static void nfs4_xdr_enc_getacl(struct rpc_rqst *req, struct xdr_stream *xdr,
+				struct nfs_getaclargs *args)
 {
-	struct xdr_stream xdr;
 	struct compound_hdr hdr = {
 		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
 	};
 	uint32_t replen;
 
-	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
-	encode_compound_hdr(&xdr, req, &hdr);
-	encode_sequence(&xdr, &args->seq_args, &hdr);
-	encode_putfh(&xdr, args->fh, &hdr);
+	encode_compound_hdr(xdr, req, &hdr);
+	encode_sequence(xdr, &args->seq_args, &hdr);
+	encode_putfh(xdr, args->fh, &hdr);
 	replen = hdr.replen + op_decode_hdr_maxsz + nfs4_fattr_bitmap_maxsz + 1;
-	encode_getattr_two(&xdr, FATTR4_WORD0_ACL, 0, &hdr);
+	encode_getattr_two(xdr, FATTR4_WORD0_ACL, 0, &hdr);
 
 	xdr_inline_pages(&req->rq_rcv_buf, replen << 2,
 		args->acl_pages, args->acl_pgbase, args->acl_len);
 	encode_nops(&hdr);
-	return 0;
 }
 
 /*
  * Encode a WRITE request
  */
-static int nfs4_xdr_enc_write(struct rpc_rqst *req, __be32 *p, struct nfs_writeargs *args)
+static void nfs4_xdr_enc_write(struct rpc_rqst *req, struct xdr_stream *xdr,
+			       struct nfs_writeargs *args)
 {
-	struct xdr_stream xdr;
 	struct compound_hdr hdr = {
 		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
 	};
 
-	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
-	encode_compound_hdr(&xdr, req, &hdr);
-	encode_sequence(&xdr, &args->seq_args, &hdr);
-	encode_putfh(&xdr, args->fh, &hdr);
-	encode_write(&xdr, args, &hdr);
+	encode_compound_hdr(xdr, req, &hdr);
+	encode_sequence(xdr, &args->seq_args, &hdr);
+	encode_putfh(xdr, args->fh, &hdr);
+	encode_write(xdr, args, &hdr);
 	req->rq_snd_buf.flags |= XDRBUF_WRITE;
-	encode_getfattr(&xdr, args->bitmask, &hdr);
+	encode_getfattr(xdr, args->bitmask, &hdr);
 	encode_nops(&hdr);
-	return 0;
 }
 
 /*
  *  a COMMIT request
  */
-static int nfs4_xdr_enc_commit(struct rpc_rqst *req, __be32 *p, struct nfs_writeargs *args)
+static void nfs4_xdr_enc_commit(struct rpc_rqst *req, struct xdr_stream *xdr,
+				struct nfs_writeargs *args)
 {
-	struct xdr_stream xdr;
 	struct compound_hdr hdr = {
 		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
 	};
 
-	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
-	encode_compound_hdr(&xdr, req, &hdr);
-	encode_sequence(&xdr, &args->seq_args, &hdr);
-	encode_putfh(&xdr, args->fh, &hdr);
-	encode_commit(&xdr, args, &hdr);
-	encode_getfattr(&xdr, args->bitmask, &hdr);
+	encode_compound_hdr(xdr, req, &hdr);
+	encode_sequence(xdr, &args->seq_args, &hdr);
+	encode_putfh(xdr, args->fh, &hdr);
+	encode_commit(xdr, args, &hdr);
+	encode_getfattr(xdr, args->bitmask, &hdr);
 	encode_nops(&hdr);
-	return 0;
 }
 
 /*
  * FSINFO request
  */
-static int nfs4_xdr_enc_fsinfo(struct rpc_rqst *req, __be32 *p, struct nfs4_fsinfo_arg *args)
+static void nfs4_xdr_enc_fsinfo(struct rpc_rqst *req, struct xdr_stream *xdr,
+				struct nfs4_fsinfo_arg *args)
 {
-	struct xdr_stream xdr;
 	struct compound_hdr hdr = {
 		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
 	};
 
-	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
-	encode_compound_hdr(&xdr, req, &hdr);
-	encode_sequence(&xdr, &args->seq_args, &hdr);
-	encode_putfh(&xdr, args->fh, &hdr);
-	encode_fsinfo(&xdr, args->bitmask, &hdr);
+	encode_compound_hdr(xdr, req, &hdr);
+	encode_sequence(xdr, &args->seq_args, &hdr);
+	encode_putfh(xdr, args->fh, &hdr);
+	encode_fsinfo(xdr, args->bitmask, &hdr);
 	encode_nops(&hdr);
-	return 0;
 }
 
 /*
  * a PATHCONF request
  */
-static int nfs4_xdr_enc_pathconf(struct rpc_rqst *req, __be32 *p, const struct nfs4_pathconf_arg *args)
+static void nfs4_xdr_enc_pathconf(struct rpc_rqst *req, struct xdr_stream *xdr,
+				  const struct nfs4_pathconf_arg *args)
 {
-	struct xdr_stream xdr;
 	struct compound_hdr hdr = {
 		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
 	};
 
-	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
-	encode_compound_hdr(&xdr, req, &hdr);
-	encode_sequence(&xdr, &args->seq_args, &hdr);
-	encode_putfh(&xdr, args->fh, &hdr);
-	encode_getattr_one(&xdr, args->bitmask[0] & nfs4_pathconf_bitmap[0],
+	encode_compound_hdr(xdr, req, &hdr);
+	encode_sequence(xdr, &args->seq_args, &hdr);
+	encode_putfh(xdr, args->fh, &hdr);
+	encode_getattr_one(xdr, args->bitmask[0] & nfs4_pathconf_bitmap[0],
 			   &hdr);
 	encode_nops(&hdr);
-	return 0;
 }
 
 /*
  * a STATFS request
  */
-static int nfs4_xdr_enc_statfs(struct rpc_rqst *req, __be32 *p, const struct nfs4_statfs_arg *args)
+static void nfs4_xdr_enc_statfs(struct rpc_rqst *req, struct xdr_stream *xdr,
+				const struct nfs4_statfs_arg *args)
 {
-	struct xdr_stream xdr;
 	struct compound_hdr hdr = {
 		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
 	};
 
-	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
-	encode_compound_hdr(&xdr, req, &hdr);
-	encode_sequence(&xdr, &args->seq_args, &hdr);
-	encode_putfh(&xdr, args->fh, &hdr);
-	encode_getattr_two(&xdr, args->bitmask[0] & nfs4_statfs_bitmap[0],
+	encode_compound_hdr(xdr, req, &hdr);
+	encode_sequence(xdr, &args->seq_args, &hdr);
+	encode_putfh(xdr, args->fh, &hdr);
+	encode_getattr_two(xdr, args->bitmask[0] & nfs4_statfs_bitmap[0],
 			   args->bitmask[1] & nfs4_statfs_bitmap[1], &hdr);
 	encode_nops(&hdr);
-	return 0;
 }
 
 /*
  * GETATTR_BITMAP request
  */
-static int nfs4_xdr_enc_server_caps(struct rpc_rqst *req, __be32 *p,
-				    struct nfs4_server_caps_arg *args)
+static void nfs4_xdr_enc_server_caps(struct rpc_rqst *req,
+				     struct xdr_stream *xdr,
+				     struct nfs4_server_caps_arg *args)
 {
-	struct xdr_stream xdr;
 	struct compound_hdr hdr = {
 		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
 	};
 
-	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
-	encode_compound_hdr(&xdr, req, &hdr);
-	encode_sequence(&xdr, &args->seq_args, &hdr);
-	encode_putfh(&xdr, args->fhandle, &hdr);
-	encode_getattr_one(&xdr, FATTR4_WORD0_SUPPORTED_ATTRS|
+	encode_compound_hdr(xdr, req, &hdr);
+	encode_sequence(xdr, &args->seq_args, &hdr);
+	encode_putfh(xdr, args->fhandle, &hdr);
+	encode_getattr_one(xdr, FATTR4_WORD0_SUPPORTED_ATTRS|
 			   FATTR4_WORD0_LINK_SUPPORT|
 			   FATTR4_WORD0_SYMLINK_SUPPORT|
 			   FATTR4_WORD0_ACLSUPPORT, &hdr);
 	encode_nops(&hdr);
-	return 0;
 }
 
 /*
  * a RENEW request
  */
-static int nfs4_xdr_enc_renew(struct rpc_rqst *req, __be32 *p, struct nfs_client *clp)
+static void nfs4_xdr_enc_renew(struct rpc_rqst *req, struct xdr_stream *xdr,
+			       struct nfs_client *clp)
 {
-	struct xdr_stream xdr;
 	struct compound_hdr hdr = {
 		.nops	= 0,
 	};
 
-	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
-	encode_compound_hdr(&xdr, req, &hdr);
-	encode_renew(&xdr, clp, &hdr);
+	encode_compound_hdr(xdr, req, &hdr);
+	encode_renew(xdr, clp, &hdr);
 	encode_nops(&hdr);
-	return 0;
 }
 
 /*
  * a SETCLIENTID request
  */
-static int nfs4_xdr_enc_setclientid(struct rpc_rqst *req, __be32 *p, struct nfs4_setclientid *sc)
+static void nfs4_xdr_enc_setclientid(struct rpc_rqst *req,
+				     struct xdr_stream *xdr,
+				     struct nfs4_setclientid *sc)
 {
-	struct xdr_stream xdr;
 	struct compound_hdr hdr = {
 		.nops	= 0,
 	};
 
-	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
-	encode_compound_hdr(&xdr, req, &hdr);
-	encode_setclientid(&xdr, sc, &hdr);
+	encode_compound_hdr(xdr, req, &hdr);
+	encode_setclientid(xdr, sc, &hdr);
 	encode_nops(&hdr);
-	return 0;
 }
 
 /*
  * a SETCLIENTID_CONFIRM request
  */
-static int nfs4_xdr_enc_setclientid_confirm(struct rpc_rqst *req, __be32 *p, struct nfs4_setclientid_res *arg)
+static void nfs4_xdr_enc_setclientid_confirm(struct rpc_rqst *req,
+					     struct xdr_stream *xdr,
+					     struct nfs4_setclientid_res *arg)
 {
-	struct xdr_stream xdr;
 	struct compound_hdr hdr = {
 		.nops	= 0,
 	};
 	const u32 lease_bitmap[2] = { FATTR4_WORD0_LEASE_TIME, 0 };
 
-	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
-	encode_compound_hdr(&xdr, req, &hdr);
-	encode_setclientid_confirm(&xdr, arg, &hdr);
-	encode_putrootfh(&xdr, &hdr);
-	encode_fsinfo(&xdr, lease_bitmap, &hdr);
+	encode_compound_hdr(xdr, req, &hdr);
+	encode_setclientid_confirm(xdr, arg, &hdr);
+	encode_putrootfh(xdr, &hdr);
+	encode_fsinfo(xdr, lease_bitmap, &hdr);
 	encode_nops(&hdr);
-	return 0;
 }
 
 /*
  * DELEGRETURN request
  */
-static int nfs4_xdr_enc_delegreturn(struct rpc_rqst *req, __be32 *p, const struct nfs4_delegreturnargs *args)
+static void nfs4_xdr_enc_delegreturn(struct rpc_rqst *req,
+				     struct xdr_stream *xdr,
+				     const struct nfs4_delegreturnargs *args)
 {
-	struct xdr_stream xdr;
 	struct compound_hdr hdr = {
 		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
 	};
 
-	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
-	encode_compound_hdr(&xdr, req, &hdr);
-	encode_sequence(&xdr, &args->seq_args, &hdr);
-	encode_putfh(&xdr, args->fhandle, &hdr);
-	encode_delegreturn(&xdr, args->stateid, &hdr);
-	encode_getfattr(&xdr, args->bitmask, &hdr);
+	encode_compound_hdr(xdr, req, &hdr);
+	encode_sequence(xdr, &args->seq_args, &hdr);
+	encode_putfh(xdr, args->fhandle, &hdr);
+	encode_delegreturn(xdr, args->stateid, &hdr);
+	encode_getfattr(xdr, args->bitmask, &hdr);
 	encode_nops(&hdr);
-	return 0;
 }
 
 /*
  * Encode FS_LOCATIONS request
  */
-static int nfs4_xdr_enc_fs_locations(struct rpc_rqst *req, __be32 *p, struct nfs4_fs_locations_arg *args)
+static void nfs4_xdr_enc_fs_locations(struct rpc_rqst *req,
+				      struct xdr_stream *xdr,
+				      struct nfs4_fs_locations_arg *args)
 {
-	struct xdr_stream xdr;
 	struct compound_hdr hdr = {
 		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
 	};
 	uint32_t replen;
 
-	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
-	encode_compound_hdr(&xdr, req, &hdr);
-	encode_sequence(&xdr, &args->seq_args, &hdr);
-	encode_putfh(&xdr, args->dir_fh, &hdr);
-	encode_lookup(&xdr, args->name, &hdr);
+	encode_compound_hdr(xdr, req, &hdr);
+	encode_sequence(xdr, &args->seq_args, &hdr);
+	encode_putfh(xdr, args->dir_fh, &hdr);
+	encode_lookup(xdr, args->name, &hdr);
 	replen = hdr.replen;	/* get the attribute into args->page */
-	encode_fs_locations(&xdr, args->bitmask, &hdr);
+	encode_fs_locations(xdr, args->bitmask, &hdr);
 
 	xdr_inline_pages(&req->rq_rcv_buf, replen << 2, &args->page,
 			0, PAGE_SIZE);
 	encode_nops(&hdr);
-	return 0;
 }
 
 #if defined(CONFIG_NFS_V4_1)
 /*
  * EXCHANGE_ID request
  */
-static int nfs4_xdr_enc_exchange_id(struct rpc_rqst *req, uint32_t *p,
-				    struct nfs41_exchange_id_args *args)
+static void nfs4_xdr_enc_exchange_id(struct rpc_rqst *req,
+				     struct xdr_stream *xdr,
+				     struct nfs41_exchange_id_args *args)
 {
-	struct xdr_stream xdr;
 	struct compound_hdr hdr = {
 		.minorversion = args->client->cl_mvops->minor_version,
 	};
 
-	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
-	encode_compound_hdr(&xdr, req, &hdr);
-	encode_exchange_id(&xdr, args, &hdr);
+	encode_compound_hdr(xdr, req, &hdr);
+	encode_exchange_id(xdr, args, &hdr);
 	encode_nops(&hdr);
-	return 0;
 }
 
 /*
  * a CREATE_SESSION request
  */
-static int nfs4_xdr_enc_create_session(struct rpc_rqst *req, uint32_t *p,
-				       struct nfs41_create_session_args *args)
+static void nfs4_xdr_enc_create_session(struct rpc_rqst *req,
+					struct xdr_stream *xdr,
+					struct nfs41_create_session_args *args)
 {
-	struct xdr_stream xdr;
 	struct compound_hdr hdr = {
 		.minorversion = args->client->cl_mvops->minor_version,
 	};
 
-	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
-	encode_compound_hdr(&xdr, req, &hdr);
-	encode_create_session(&xdr, args, &hdr);
+	encode_compound_hdr(xdr, req, &hdr);
+	encode_create_session(xdr, args, &hdr);
 	encode_nops(&hdr);
-	return 0;
 }
 
 /*
  * a DESTROY_SESSION request
  */
-static int nfs4_xdr_enc_destroy_session(struct rpc_rqst *req, uint32_t *p,
-					struct nfs4_session *session)
+static void nfs4_xdr_enc_destroy_session(struct rpc_rqst *req,
+					 struct xdr_stream *xdr,
+					 struct nfs4_session *session)
 {
-	struct xdr_stream xdr;
 	struct compound_hdr hdr = {
 		.minorversion = session->clp->cl_mvops->minor_version,
 	};
 
-	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
-	encode_compound_hdr(&xdr, req, &hdr);
-	encode_destroy_session(&xdr, session, &hdr);
+	encode_compound_hdr(xdr, req, &hdr);
+	encode_destroy_session(xdr, session, &hdr);
 	encode_nops(&hdr);
-	return 0;
 }
 
 /*
  * a SEQUENCE request
  */
-static int nfs4_xdr_enc_sequence(struct rpc_rqst *req, uint32_t *p,
-				 struct nfs4_sequence_args *args)
+static void nfs4_xdr_enc_sequence(struct rpc_rqst *req, struct xdr_stream *xdr,
+				  struct nfs4_sequence_args *args)
 {
-	struct xdr_stream xdr;
 	struct compound_hdr hdr = {
 		.minorversion = nfs4_xdr_minorversion(args),
 	};
 
-	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
-	encode_compound_hdr(&xdr, req, &hdr);
-	encode_sequence(&xdr, args, &hdr);
+	encode_compound_hdr(xdr, req, &hdr);
+	encode_sequence(xdr, args, &hdr);
 	encode_nops(&hdr);
-	return 0;
 }
 
 /*
  * a GET_LEASE_TIME request
  */
-static int nfs4_xdr_enc_get_lease_time(struct rpc_rqst *req, uint32_t *p,
-				       struct nfs4_get_lease_time_args *args)
+static void nfs4_xdr_enc_get_lease_time(struct rpc_rqst *req,
+					struct xdr_stream *xdr,
+					struct nfs4_get_lease_time_args *args)
 {
-	struct xdr_stream xdr;
 	struct compound_hdr hdr = {
 		.minorversion = nfs4_xdr_minorversion(&args->la_seq_args),
 	};
 	const u32 lease_bitmap[2] = { FATTR4_WORD0_LEASE_TIME, 0 };
 
-	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
-	encode_compound_hdr(&xdr, req, &hdr);
-	encode_sequence(&xdr, &args->la_seq_args, &hdr);
-	encode_putrootfh(&xdr, &hdr);
-	encode_fsinfo(&xdr, lease_bitmap, &hdr);
+	encode_compound_hdr(xdr, req, &hdr);
+	encode_sequence(xdr, &args->la_seq_args, &hdr);
+	encode_putrootfh(xdr, &hdr);
+	encode_fsinfo(xdr, lease_bitmap, &hdr);
 	encode_nops(&hdr);
-	return 0;
 }
 
 /*
  * a RECLAIM_COMPLETE request
  */
-static int nfs4_xdr_enc_reclaim_complete(struct rpc_rqst *req, uint32_t *p,
-				     struct nfs41_reclaim_complete_args *args)
+static void nfs4_xdr_enc_reclaim_complete(struct rpc_rqst *req,
+					  struct xdr_stream *xdr,
+				struct nfs41_reclaim_complete_args *args)
 {
-	struct xdr_stream xdr;
 	struct compound_hdr hdr = {
 		.minorversion = nfs4_xdr_minorversion(&args->seq_args)
 	};
 
-	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
-	encode_compound_hdr(&xdr, req, &hdr);
-	encode_sequence(&xdr, &args->seq_args, &hdr);
-	encode_reclaim_complete(&xdr, args, &hdr);
+	encode_compound_hdr(xdr, req, &hdr);
+	encode_sequence(xdr, &args->seq_args, &hdr);
+	encode_reclaim_complete(xdr, args, &hdr);
 	encode_nops(&hdr);
-	return 0;
 }
 
 /*
  * Encode GETDEVICEINFO request
  */
-static int nfs4_xdr_enc_getdeviceinfo(struct rpc_rqst *req, uint32_t *p,
-				      struct nfs4_getdeviceinfo_args *args)
+static void nfs4_xdr_enc_getdeviceinfo(struct rpc_rqst *req,
+				       struct xdr_stream *xdr,
+				       struct nfs4_getdeviceinfo_args *args)
 {
-	struct xdr_stream xdr;
 	struct compound_hdr hdr = {
 		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
 	};
 
-	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
-	encode_compound_hdr(&xdr, req, &hdr);
-	encode_sequence(&xdr, &args->seq_args, &hdr);
-	encode_getdeviceinfo(&xdr, args, &hdr);
+	encode_compound_hdr(xdr, req, &hdr);
+	encode_sequence(xdr, &args->seq_args, &hdr);
+	encode_getdeviceinfo(xdr, args, &hdr);
 
 	/* set up reply kvec. Subtract notification bitmap max size (2)
 	 * so that notification bitmap is put in xdr_buf tail */
@@ -2657,27 +2583,24 @@ static int nfs4_xdr_enc_getdeviceinfo(struct rpc_rqst *req, uint32_t *p,
 			 args->pdev->pglen);
 
 	encode_nops(&hdr);
-	return 0;
 }
 
 /*
  *  Encode LAYOUTGET request
  */
-static int nfs4_xdr_enc_layoutget(struct rpc_rqst *req, uint32_t *p,
-				  struct nfs4_layoutget_args *args)
+static void nfs4_xdr_enc_layoutget(struct rpc_rqst *req,
+				   struct xdr_stream *xdr,
+				   struct nfs4_layoutget_args *args)
 {
-	struct xdr_stream xdr;
 	struct compound_hdr hdr = {
 		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
 	};
 
-	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
-	encode_compound_hdr(&xdr, req, &hdr);
-	encode_sequence(&xdr, &args->seq_args, &hdr);
-	encode_putfh(&xdr, NFS_FH(args->inode), &hdr);
-	encode_layoutget(&xdr, args, &hdr);
+	encode_compound_hdr(xdr, req, &hdr);
+	encode_sequence(xdr, &args->seq_args, &hdr);
+	encode_putfh(xdr, NFS_FH(args->inode), &hdr);
+	encode_layoutget(xdr, args, &hdr);
 	encode_nops(&hdr);
-	return 0;
 }
 #endif /* CONFIG_NFS_V4_1 */
 
@@ -5368,22 +5291,18 @@ out:
 /*
  * Encode an SETACL request
  */
-static int
-nfs4_xdr_enc_setacl(struct rpc_rqst *req, __be32 *p, struct nfs_setaclargs *args)
+static void nfs4_xdr_enc_setacl(struct rpc_rqst *req, struct xdr_stream *xdr,
+				struct nfs_setaclargs *args)
 {
-	struct xdr_stream xdr;
 	struct compound_hdr hdr = {
 		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
 	};
-	int status;
 
-	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
-	encode_compound_hdr(&xdr, req, &hdr);
-	encode_sequence(&xdr, &args->seq_args, &hdr);
-	encode_putfh(&xdr, args->fh, &hdr);
-	status = encode_setacl(&xdr, args, &hdr);
+	encode_compound_hdr(xdr, req, &hdr);
+	encode_sequence(xdr, &args->seq_args, &hdr);
+	encode_putfh(xdr, args->fh, &hdr);
+	encode_setacl(xdr, args, &hdr);
 	encode_nops(&hdr);
-	return status;
 }
 
 /*
@@ -6316,7 +6235,7 @@ nfs4_stat_to_errno(int stat)
 #define PROC(proc, argtype, restype)				\
 [NFSPROC4_CLNT_##proc] = {					\
 	.p_proc   = NFSPROC4_COMPOUND,				\
-	.p_encode = (kxdrproc_t)nfs4_xdr_##argtype,		\
+	.p_encode = (kxdreproc_t)nfs4_xdr_##argtype,		\
 	.p_decode = (kxdrproc_t)nfs4_xdr_##restype,		\
 	.p_arglen = NFS4_##argtype##_sz,			\
 	.p_replen = NFS4_##restype##_sz,			\
diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index 6529534d7aae..c363efda8ecf 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -499,34 +499,28 @@ out_default:
 /*
  * NB: Without this zero space reservation, callbacks over krb5p fail
  */
-static int nfs4_xdr_enc_cb_null(struct rpc_rqst *req, __be32 *p, void *__unused)
+static void nfs4_xdr_enc_cb_null(struct rpc_rqst *req, struct xdr_stream *xdr,
+				 void *__unused)
 {
-	struct xdr_stream xdrs, *xdr = &xdrs;
-
-	xdr_init_encode(&xdrs, &req->rq_snd_buf, p);
 	xdr_reserve_space(xdr, 0);
-	return 0;
 }
 
 /*
  * 20.2. Operation 4: CB_RECALL - Recall a Delegation
  */
-static int nfs4_xdr_enc_cb_recall(struct rpc_rqst *req, __be32 *p,
-				  const struct nfsd4_callback *cb)
+static void nfs4_xdr_enc_cb_recall(struct rpc_rqst *req, struct xdr_stream *xdr,
+				   const struct nfsd4_callback *cb)
 {
-	struct xdr_stream xdr;
 	const struct nfs4_delegation *args = cb->cb_op;
 	struct nfs4_cb_compound_hdr hdr = {
 		.ident = cb->cb_clp->cl_cb_ident,
 		.minorversion = cb->cb_minorversion,
 	};
 
-	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
-	encode_cb_compound4args(&xdr, &hdr);
-	encode_cb_sequence4args(&xdr, cb, &hdr);
-	encode_cb_recall4args(&xdr, args, &hdr);
+	encode_cb_compound4args(xdr, &hdr);
+	encode_cb_sequence4args(xdr, cb, &hdr);
+	encode_cb_recall4args(xdr, args, &hdr);
 	encode_cb_nops(&hdr);
-	return 0;
 }
 
 
@@ -583,7 +577,7 @@ out_default:
 #define PROC(proc, call, argtype, restype)				\
 [NFSPROC4_CLNT_##proc] = {						\
 	.p_proc    = NFSPROC4_CB_##call,				\
-	.p_encode  = (kxdrproc_t)nfs4_xdr_enc_##argtype,		\
+	.p_encode  = (kxdreproc_t)nfs4_xdr_enc_##argtype,		\
 	.p_decode  = (kxdrproc_t)nfs4_xdr_dec_##restype,		\
 	.p_arglen  = NFS4_enc_##argtype##_sz,				\
 	.p_replen  = NFS4_dec_##restype##_sz,				\
diff --git a/include/linux/sunrpc/auth.h b/include/linux/sunrpc/auth.h
index b2024757edd5..d88cffbaa6df 100644
--- a/include/linux/sunrpc/auth.h
+++ b/include/linux/sunrpc/auth.h
@@ -110,7 +110,7 @@ struct rpc_credops {
 	__be32 *		(*crmarshal)(struct rpc_task *, __be32 *);
 	int			(*crrefresh)(struct rpc_task *);
 	__be32 *		(*crvalidate)(struct rpc_task *, __be32 *);
-	int			(*crwrap_req)(struct rpc_task *, kxdrproc_t,
+	int			(*crwrap_req)(struct rpc_task *, kxdreproc_t,
 						void *, __be32 *, void *);
 	int			(*crunwrap_resp)(struct rpc_task *, kxdrproc_t,
 						void *, __be32 *, void *);
@@ -139,7 +139,7 @@ struct rpc_cred *	rpcauth_generic_bind_cred(struct rpc_task *, struct rpc_cred *
 void			put_rpccred(struct rpc_cred *);
 __be32 *		rpcauth_marshcred(struct rpc_task *, __be32 *);
 __be32 *		rpcauth_checkverf(struct rpc_task *, __be32 *);
-int			rpcauth_wrap_req(struct rpc_task *task, kxdrproc_t encode, void *rqstp, __be32 *data, void *obj);
+int			rpcauth_wrap_req(struct rpc_task *task, kxdreproc_t encode, void *rqstp, __be32 *data, void *obj);
 int			rpcauth_unwrap_resp(struct rpc_task *task, kxdrproc_t decode, void *rqstp, __be32 *data, void *obj);
 int			rpcauth_refreshcred(struct rpc_task *);
 void			rpcauth_invalcred(struct rpc_task *);
diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h
index a5a55f284b7d..7b19c4e1ce53 100644
--- a/include/linux/sunrpc/clnt.h
+++ b/include/linux/sunrpc/clnt.h
@@ -89,7 +89,7 @@ struct rpc_version {
  */
 struct rpc_procinfo {
 	u32			p_proc;		/* RPC procedure number */
-	kxdrproc_t		p_encode;	/* XDR encode function */
+	kxdreproc_t		p_encode;	/* XDR encode function */
 	kxdrproc_t		p_decode;	/* XDR decode function */
 	unsigned int		p_arglen;	/* argument hdr length (u32) */
 	unsigned int		p_replen;	/* reply hdr length (u32) */
diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h
index 498ab93a81e4..a21cf5378c1d 100644
--- a/include/linux/sunrpc/xdr.h
+++ b/include/linux/sunrpc/xdr.h
@@ -33,8 +33,8 @@ struct xdr_netobj {
 };
 
 /*
- * This is the generic XDR function. rqstp is either a rpc_rqst (client
- * side) or svc_rqst pointer (server side).
+ * This is the legacy generic XDR function. rqstp is either a rpc_rqst
+ * (client side) or svc_rqst pointer (server side).
  * Encode functions always assume there's enough room in the buffer.
  */
 typedef int	(*kxdrproc_t)(void *rqstp, __be32 *data, void *obj);
@@ -203,6 +203,11 @@ struct xdr_stream {
 	struct kvec *iov;	/* pointer to the current kvec */
 };
 
+/*
+ * This is the xdr_stream style generic XDR function.
+ */
+typedef void	(*kxdreproc_t)(void *rqstp, struct xdr_stream *xdr, void *obj);
+
 extern void xdr_init_encode(struct xdr_stream *xdr, struct xdr_buf *buf, __be32 *p);
 extern __be32 *xdr_reserve_space(struct xdr_stream *xdr, size_t nbytes);
 extern void xdr_write_pages(struct xdr_stream *xdr, struct page **pages,
diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c
index afe67849269f..651c9da703cb 100644
--- a/net/sunrpc/auth.c
+++ b/net/sunrpc/auth.c
@@ -563,8 +563,17 @@ rpcauth_checkverf(struct rpc_task *task, __be32 *p)
 	return cred->cr_ops->crvalidate(task, p);
 }
 
+static void rpcauth_wrap_req_encode(kxdreproc_t encode, struct rpc_rqst *rqstp,
+				   __be32 *data, void *obj)
+{
+	struct xdr_stream xdr;
+
+	xdr_init_encode(&xdr, &rqstp->rq_snd_buf, data);
+	encode(rqstp, &xdr, obj);
+}
+
 int
-rpcauth_wrap_req(struct rpc_task *task, kxdrproc_t encode, void *rqstp,
+rpcauth_wrap_req(struct rpc_task *task, kxdreproc_t encode, void *rqstp,
 		__be32 *data, void *obj)
 {
 	struct rpc_cred *cred = task->tk_rqstp->rq_cred;
@@ -574,7 +583,8 @@ rpcauth_wrap_req(struct rpc_task *task, kxdrproc_t encode, void *rqstp,
 	if (cred->cr_ops->crwrap_req)
 		return cred->cr_ops->crwrap_req(task, encode, rqstp, data, obj);
 	/* By default, we encode the arguments normally. */
-	return encode(rqstp, data, obj);
+	rpcauth_wrap_req_encode(encode, rqstp, data, obj);
+	return 0;
 }
 
 int
diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c
index 3835ce35e224..42b46f9a670a 100644
--- a/net/sunrpc/auth_gss/auth_gss.c
+++ b/net/sunrpc/auth_gss/auth_gss.c
@@ -1231,9 +1231,19 @@ out_bad:
 	return NULL;
 }
 
+static void gss_wrap_req_encode(kxdreproc_t encode, struct rpc_rqst *rqstp,
+				__be32 *p, void *obj)
+{
+	struct xdr_stream xdr;
+
+	xdr_init_encode(&xdr, &rqstp->rq_snd_buf, p);
+	encode(rqstp, &xdr, obj);
+}
+
 static inline int
 gss_wrap_req_integ(struct rpc_cred *cred, struct gss_cl_ctx *ctx,
-		kxdrproc_t encode, struct rpc_rqst *rqstp, __be32 *p, void *obj)
+		   kxdreproc_t encode, struct rpc_rqst *rqstp,
+		   __be32 *p, void *obj)
 {
 	struct xdr_buf	*snd_buf = &rqstp->rq_snd_buf;
 	struct xdr_buf	integ_buf;
@@ -1249,9 +1259,7 @@ gss_wrap_req_integ(struct rpc_cred *cred, struct gss_cl_ctx *ctx,
 	offset = (u8 *)p - (u8 *)snd_buf->head[0].iov_base;
 	*p++ = htonl(rqstp->rq_seqno);
 
-	status = encode(rqstp, p, obj);
-	if (status)
-		return status;
+	gss_wrap_req_encode(encode, rqstp, p, obj);
 
 	if (xdr_buf_subsegment(snd_buf, &integ_buf,
 				offset, snd_buf->len - offset))
@@ -1325,7 +1333,8 @@ out:
 
 static inline int
 gss_wrap_req_priv(struct rpc_cred *cred, struct gss_cl_ctx *ctx,
-		kxdrproc_t encode, struct rpc_rqst *rqstp, __be32 *p, void *obj)
+		  kxdreproc_t encode, struct rpc_rqst *rqstp,
+		  __be32 *p, void *obj)
 {
 	struct xdr_buf	*snd_buf = &rqstp->rq_snd_buf;
 	u32		offset;
@@ -1342,9 +1351,7 @@ gss_wrap_req_priv(struct rpc_cred *cred, struct gss_cl_ctx *ctx,
 	offset = (u8 *)p - (u8 *)snd_buf->head[0].iov_base;
 	*p++ = htonl(rqstp->rq_seqno);
 
-	status = encode(rqstp, p, obj);
-	if (status)
-		return status;
+	gss_wrap_req_encode(encode, rqstp, p, obj);
 
 	status = alloc_enc_pages(rqstp);
 	if (status)
@@ -1394,7 +1401,7 @@ gss_wrap_req_priv(struct rpc_cred *cred, struct gss_cl_ctx *ctx,
 
 static int
 gss_wrap_req(struct rpc_task *task,
-	     kxdrproc_t encode, void *rqstp, __be32 *p, void *obj)
+	     kxdreproc_t encode, void *rqstp, __be32 *p, void *obj)
 {
 	struct rpc_cred *cred = task->tk_rqstp->rq_cred;
 	struct gss_cred	*gss_cred = container_of(cred, struct gss_cred,
@@ -1407,12 +1414,14 @@ gss_wrap_req(struct rpc_task *task,
 		/* The spec seems a little ambiguous here, but I think that not
 		 * wrapping context destruction requests makes the most sense.
 		 */
-		status = encode(rqstp, p, obj);
+		gss_wrap_req_encode(encode, rqstp, p, obj);
+		status = 0;
 		goto out;
 	}
 	switch (gss_cred->gc_service) {
 		case RPC_GSS_SVC_NONE:
-			status = encode(rqstp, p, obj);
+			gss_wrap_req_encode(encode, rqstp, p, obj);
+			status = 0;
 			break;
 		case RPC_GSS_SVC_INTEGRITY:
 			status = gss_wrap_req_integ(cred, ctx, encode,
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index 92ce94f5146b..d446a32be667 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -1095,7 +1095,7 @@ static void
 rpc_xdr_encode(struct rpc_task *task)
 {
 	struct rpc_rqst	*req = task->tk_rqstp;
-	kxdrproc_t	encode;
+	kxdreproc_t	encode;
 	__be32		*p;
 
 	dprint_status(task);
@@ -1776,9 +1776,8 @@ out_overflow:
 	goto out_garbage;
 }
 
-static int rpcproc_encode_null(void *rqstp, __be32 *data, void *obj)
+static void rpcproc_encode_null(void *rqstp, struct xdr_stream *xdr, void *obj)
 {
-	return 0;
 }
 
 static int rpcproc_decode_null(void *rqstp, __be32 *data, void *obj)
diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c
index 43838c72b778..63912a1a2983 100644
--- a/net/sunrpc/rpcb_clnt.c
+++ b/net/sunrpc/rpcb_clnt.c
@@ -689,25 +689,21 @@ static void rpcb_getport_done(struct rpc_task *child, void *data)
  * XDR functions for rpcbind
  */
 
-static int rpcb_enc_mapping(struct rpc_rqst *req, __be32 *p,
-			    const struct rpcbind_args *rpcb)
+static void rpcb_enc_mapping(struct rpc_rqst *req, struct xdr_stream *xdr,
+			     const struct rpcbind_args *rpcb)
 {
 	struct rpc_task *task = req->rq_task;
-	struct xdr_stream xdr;
+	__be32 *p;
 
 	dprintk("RPC: %5u encoding PMAP_%s call (%u, %u, %d, %u)\n",
 			task->tk_pid, task->tk_msg.rpc_proc->p_name,
 			rpcb->r_prog, rpcb->r_vers, rpcb->r_prot, rpcb->r_port);
 
-	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
-
-	p = xdr_reserve_space(&xdr, RPCB_mappingargs_sz << 2);
+	p = xdr_reserve_space(xdr, RPCB_mappingargs_sz << 2);
 	*p++ = cpu_to_be32(rpcb->r_prog);
 	*p++ = cpu_to_be32(rpcb->r_vers);
 	*p++ = cpu_to_be32(rpcb->r_prot);
 	*p   = cpu_to_be32(rpcb->r_port);
-
-	return 0;
 }
 
 static int rpcb_dec_getport(struct rpc_rqst *req, __be32 *p,
@@ -769,27 +765,24 @@ static void encode_rpcb_string(struct xdr_stream *xdr, const char *string,
 	xdr_encode_opaque(p, string, len);
 }
 
-static int rpcb_enc_getaddr(struct rpc_rqst *req, __be32 *p,
-			    const struct rpcbind_args *rpcb)
+static void rpcb_enc_getaddr(struct rpc_rqst *req, struct xdr_stream *xdr,
+			     const struct rpcbind_args *rpcb)
 {
 	struct rpc_task *task = req->rq_task;
-	struct xdr_stream xdr;
+	__be32 *p;
 
 	dprintk("RPC: %5u encoding RPCB_%s call (%u, %u, '%s', '%s')\n",
 			task->tk_pid, task->tk_msg.rpc_proc->p_name,
 			rpcb->r_prog, rpcb->r_vers,
 			rpcb->r_netid, rpcb->r_addr);
 
-	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
-
-	p = xdr_reserve_space(&xdr, (RPCB_program_sz + RPCB_version_sz) << 2);
+	p = xdr_reserve_space(xdr, (RPCB_program_sz + RPCB_version_sz) << 2);
 	*p++ = cpu_to_be32(rpcb->r_prog);
 	*p = cpu_to_be32(rpcb->r_vers);
 
-	encode_rpcb_string(&xdr, rpcb->r_netid, RPCBIND_MAXNETIDLEN);
-	encode_rpcb_string(&xdr, rpcb->r_addr, RPCBIND_MAXUADDRLEN);
-	encode_rpcb_string(&xdr, rpcb->r_owner, RPCB_MAXOWNERLEN);
-	return 0;
+	encode_rpcb_string(xdr, rpcb->r_netid, RPCBIND_MAXNETIDLEN);
+	encode_rpcb_string(xdr, rpcb->r_addr, RPCBIND_MAXUADDRLEN);
+	encode_rpcb_string(xdr, rpcb->r_owner, RPCB_MAXOWNERLEN);
 }
 
 static int rpcb_dec_getaddr(struct rpc_rqst *req, __be32 *p,
@@ -849,7 +842,7 @@ out_fail:
 static struct rpc_procinfo rpcb_procedures2[] = {
 	[RPCBPROC_SET] = {
 		.p_proc		= RPCBPROC_SET,
-		.p_encode	= (kxdrproc_t)rpcb_enc_mapping,
+		.p_encode	= (kxdreproc_t)rpcb_enc_mapping,
 		.p_decode	= (kxdrproc_t)rpcb_dec_set,
 		.p_arglen	= RPCB_mappingargs_sz,
 		.p_replen	= RPCB_setres_sz,
@@ -859,7 +852,7 @@ static struct rpc_procinfo rpcb_procedures2[] = {
 	},
 	[RPCBPROC_UNSET] = {
 		.p_proc		= RPCBPROC_UNSET,
-		.p_encode	= (kxdrproc_t)rpcb_enc_mapping,
+		.p_encode	= (kxdreproc_t)rpcb_enc_mapping,
 		.p_decode	= (kxdrproc_t)rpcb_dec_set,
 		.p_arglen	= RPCB_mappingargs_sz,
 		.p_replen	= RPCB_setres_sz,
@@ -869,7 +862,7 @@ static struct rpc_procinfo rpcb_procedures2[] = {
 	},
 	[RPCBPROC_GETPORT] = {
 		.p_proc		= RPCBPROC_GETPORT,
-		.p_encode	= (kxdrproc_t)rpcb_enc_mapping,
+		.p_encode	= (kxdreproc_t)rpcb_enc_mapping,
 		.p_decode	= (kxdrproc_t)rpcb_dec_getport,
 		.p_arglen	= RPCB_mappingargs_sz,
 		.p_replen	= RPCB_getportres_sz,
@@ -882,7 +875,7 @@ static struct rpc_procinfo rpcb_procedures2[] = {
 static struct rpc_procinfo rpcb_procedures3[] = {
 	[RPCBPROC_SET] = {
 		.p_proc		= RPCBPROC_SET,
-		.p_encode	= (kxdrproc_t)rpcb_enc_getaddr,
+		.p_encode	= (kxdreproc_t)rpcb_enc_getaddr,
 		.p_decode	= (kxdrproc_t)rpcb_dec_set,
 		.p_arglen	= RPCB_getaddrargs_sz,
 		.p_replen	= RPCB_setres_sz,
@@ -892,7 +885,7 @@ static struct rpc_procinfo rpcb_procedures3[] = {
 	},
 	[RPCBPROC_UNSET] = {
 		.p_proc		= RPCBPROC_UNSET,
-		.p_encode	= (kxdrproc_t)rpcb_enc_getaddr,
+		.p_encode	= (kxdreproc_t)rpcb_enc_getaddr,
 		.p_decode	= (kxdrproc_t)rpcb_dec_set,
 		.p_arglen	= RPCB_getaddrargs_sz,
 		.p_replen	= RPCB_setres_sz,
@@ -902,7 +895,7 @@ static struct rpc_procinfo rpcb_procedures3[] = {
 	},
 	[RPCBPROC_GETADDR] = {
 		.p_proc		= RPCBPROC_GETADDR,
-		.p_encode	= (kxdrproc_t)rpcb_enc_getaddr,
+		.p_encode	= (kxdreproc_t)rpcb_enc_getaddr,
 		.p_decode	= (kxdrproc_t)rpcb_dec_getaddr,
 		.p_arglen	= RPCB_getaddrargs_sz,
 		.p_replen	= RPCB_getaddrres_sz,
@@ -915,7 +908,7 @@ static struct rpc_procinfo rpcb_procedures3[] = {
 static struct rpc_procinfo rpcb_procedures4[] = {
 	[RPCBPROC_SET] = {
 		.p_proc		= RPCBPROC_SET,
-		.p_encode	= (kxdrproc_t)rpcb_enc_getaddr,
+		.p_encode	= (kxdreproc_t)rpcb_enc_getaddr,
 		.p_decode	= (kxdrproc_t)rpcb_dec_set,
 		.p_arglen	= RPCB_getaddrargs_sz,
 		.p_replen	= RPCB_setres_sz,
@@ -925,7 +918,7 @@ static struct rpc_procinfo rpcb_procedures4[] = {
 	},
 	[RPCBPROC_UNSET] = {
 		.p_proc		= RPCBPROC_UNSET,
-		.p_encode	= (kxdrproc_t)rpcb_enc_getaddr,
+		.p_encode	= (kxdreproc_t)rpcb_enc_getaddr,
 		.p_decode	= (kxdrproc_t)rpcb_dec_set,
 		.p_arglen	= RPCB_getaddrargs_sz,
 		.p_replen	= RPCB_setres_sz,
@@ -935,7 +928,7 @@ static struct rpc_procinfo rpcb_procedures4[] = {
 	},
 	[RPCBPROC_GETADDR] = {
 		.p_proc		= RPCBPROC_GETADDR,
-		.p_encode	= (kxdrproc_t)rpcb_enc_getaddr,
+		.p_encode	= (kxdreproc_t)rpcb_enc_getaddr,
 		.p_decode	= (kxdrproc_t)rpcb_dec_getaddr,
 		.p_arglen	= RPCB_getaddrargs_sz,
 		.p_replen	= RPCB_getaddrres_sz,
-- 
cgit v1.2.3-71-gd317


From bf2695516db982e90a22fc94f93491b481796bb1 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Tue, 14 Dec 2010 14:59:29 +0000
Subject: SUNRPC: New xdr_streams XDR decoder API

Now that all client-side XDR decoder routines use xdr_streams, there
should be no need to support the legacy calling sequence [rpc_rqst *,
__be32 *, RPC res *] anywhere.  We can construct an xdr_stream in the
generic RPC code, instead of in each decoder function.

This is a refactoring change.  It should not cause different behavior.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Tested-by: J. Bruce Fields <bfields@redhat.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/lockd/clnt4xdr.c            |  20 +-
 fs/lockd/clntxdr.c             |  20 +-
 fs/lockd/mon.c                 |  30 +-
 fs/nfs/mount_clnt.c            |  30 +-
 fs/nfs/nfs2xdr.c               |  68 ++---
 fs/nfs/nfs3xdr.c               | 195 ++++++-------
 fs/nfs/nfs4xdr.c               | 619 ++++++++++++++++++++---------------------
 fs/nfsd/nfs4callback.c         |  16 +-
 include/linux/sunrpc/auth.h    |   4 +-
 include/linux/sunrpc/clnt.h    |   2 +-
 include/linux/sunrpc/xdr.h     |   3 +-
 net/sunrpc/auth.c              |  14 +-
 net/sunrpc/auth_gss/auth_gss.c |  13 +-
 net/sunrpc/clnt.c              |   4 +-
 net/sunrpc/rpcb_clnt.c         |  46 ++-
 15 files changed, 518 insertions(+), 566 deletions(-)

(limited to 'include/linux')

diff --git a/fs/lockd/clnt4xdr.c b/fs/lockd/clnt4xdr.c
index 974f1d9cd323..f848b52c67b1 100644
--- a/fs/lockd/clnt4xdr.c
+++ b/fs/lockd/clnt4xdr.c
@@ -529,17 +529,16 @@ out:
 	return error;
 }
 
-static int nlm4_xdr_dec_testres(struct rpc_rqst *req, __be32 *p,
+static int nlm4_xdr_dec_testres(struct rpc_rqst *req,
+				struct xdr_stream *xdr,
 				struct nlm_res *result)
 {
-	struct xdr_stream xdr;
 	int error;
 
-	xdr_init_decode(&xdr, &req->rq_rcv_buf, p);
-	error = decode_cookie(&xdr, &result->cookie);
+	error = decode_cookie(xdr, &result->cookie);
 	if (unlikely(error))
 		goto out;
-	error = decode_nlm4_testrply(&xdr, result);
+	error = decode_nlm4_testrply(xdr, result);
 out:
 	return error;
 }
@@ -550,17 +549,16 @@ out:
  *		nlm4_stat stat;
  *	};
  */
-static int nlm4_xdr_dec_res(struct rpc_rqst *req, __be32 *p,
+static int nlm4_xdr_dec_res(struct rpc_rqst *req,
+			    struct xdr_stream *xdr,
 			    struct nlm_res *result)
 {
-	struct xdr_stream xdr;
 	int error;
 
-	xdr_init_decode(&xdr, &req->rq_rcv_buf, p);
-	error = decode_cookie(&xdr, &result->cookie);
+	error = decode_cookie(xdr, &result->cookie);
 	if (unlikely(error))
 		goto out;
-	error = decode_nlm4_stat(&xdr, &result->status);
+	error = decode_nlm4_stat(xdr, &result->status);
 out:
 	return error;
 }
@@ -575,7 +573,7 @@ out:
 [NLMPROC_##proc] = {							\
 	.p_proc      = NLMPROC_##proc,					\
 	.p_encode    = (kxdreproc_t)nlm4_xdr_enc_##argtype,		\
-	.p_decode    = (kxdrproc_t)nlm4_xdr_dec_##restype,		\
+	.p_decode    = (kxdrdproc_t)nlm4_xdr_dec_##restype,		\
 	.p_arglen    = NLM4_##argtype##_sz,				\
 	.p_replen    = NLM4_##restype##_sz,				\
 	.p_statidx   = NLMPROC_##proc,					\
diff --git a/fs/lockd/clntxdr.c b/fs/lockd/clntxdr.c
index c6fda8fb1c5b..180ac34feb9a 100644
--- a/fs/lockd/clntxdr.c
+++ b/fs/lockd/clntxdr.c
@@ -527,17 +527,16 @@ out:
 	return error;
 }
 
-static int nlm_xdr_dec_testres(struct rpc_rqst *req, __be32 *p,
+static int nlm_xdr_dec_testres(struct rpc_rqst *req,
+			       struct xdr_stream *xdr,
 			       struct nlm_res *result)
 {
-	struct xdr_stream xdr;
 	int error;
 
-	xdr_init_decode(&xdr, &req->rq_rcv_buf, p);
-	error = decode_cookie(&xdr, &result->cookie);
+	error = decode_cookie(xdr, &result->cookie);
 	if (unlikely(error))
 		goto out;
-	error = decode_nlm_testrply(&xdr, result);
+	error = decode_nlm_testrply(xdr, result);
 out:
 	return error;
 }
@@ -548,17 +547,16 @@ out:
  *		nlm_stat stat;
  *	};
  */
-static int nlm_xdr_dec_res(struct rpc_rqst *req, __be32 *p,
+static int nlm_xdr_dec_res(struct rpc_rqst *req,
+			   struct xdr_stream *xdr,
 			   struct nlm_res *result)
 {
-	struct xdr_stream xdr;
 	int error;
 
-	xdr_init_decode(&xdr, &req->rq_rcv_buf, p);
-	error = decode_cookie(&xdr, &result->cookie);
+	error = decode_cookie(xdr, &result->cookie);
 	if (unlikely(error))
 		goto out;
-	error = decode_nlm_stat(&xdr, &result->status);
+	error = decode_nlm_stat(xdr, &result->status);
 out:
 	return error;
 }
@@ -573,7 +571,7 @@ out:
 [NLMPROC_##proc] = {							\
 	.p_proc      = NLMPROC_##proc,					\
 	.p_encode    = (kxdreproc_t)nlm_xdr_enc_##argtype,		\
-	.p_decode    = (kxdrproc_t)nlm_xdr_dec_##restype,		\
+	.p_decode    = (kxdrdproc_t)nlm_xdr_dec_##restype,		\
 	.p_arglen    = NLM_##argtype##_sz,				\
 	.p_replen    = NLM_##restype##_sz,				\
 	.p_statidx   = NLMPROC_##proc,					\
diff --git a/fs/lockd/mon.c b/fs/lockd/mon.c
index baa77bc9d825..23d7451b2938 100644
--- a/fs/lockd/mon.c
+++ b/fs/lockd/mon.c
@@ -472,35 +472,35 @@ static void nsm_xdr_enc_unmon(struct rpc_rqst *req, struct xdr_stream *xdr,
 	encode_mon_id(xdr, argp);
 }
 
-static int xdr_dec_stat_res(struct rpc_rqst *rqstp, __be32 *p,
-			    struct nsm_res *resp)
+static int nsm_xdr_dec_stat_res(struct rpc_rqst *rqstp,
+				struct xdr_stream *xdr,
+				struct nsm_res *resp)
 {
-	struct xdr_stream xdr;
+	__be32 *p;
 
-	xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
-	p = xdr_inline_decode(&xdr, 4 + 4);
+	p = xdr_inline_decode(xdr, 4 + 4);
 	if (unlikely(p == NULL))
 		return -EIO;
 	resp->status = be32_to_cpup(p++);
 	resp->state = be32_to_cpup(p);
 
-	dprintk("lockd: xdr_dec_stat_res status %d state %d\n",
-			resp->status, resp->state);
+	dprintk("lockd: %s status %d state %d\n",
+		__func__, resp->status, resp->state);
 	return 0;
 }
 
-static int xdr_dec_stat(struct rpc_rqst *rqstp, __be32 *p,
-			struct nsm_res *resp)
+static int nsm_xdr_dec_stat(struct rpc_rqst *rqstp,
+			    struct xdr_stream *xdr,
+			    struct nsm_res *resp)
 {
-	struct xdr_stream xdr;
+	__be32 *p;
 
-	xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
-	p = xdr_inline_decode(&xdr, 4);
+	p = xdr_inline_decode(xdr, 4);
 	if (unlikely(p == NULL))
 		return -EIO;
 	resp->state = be32_to_cpup(p);
 
-	dprintk("lockd: xdr_dec_stat state %d\n", resp->state);
+	dprintk("lockd: %s state %d\n", __func__, resp->state);
 	return 0;
 }
 
@@ -517,7 +517,7 @@ static struct rpc_procinfo	nsm_procedures[] = {
 [NSMPROC_MON] = {
 		.p_proc		= NSMPROC_MON,
 		.p_encode	= (kxdreproc_t)nsm_xdr_enc_mon,
-		.p_decode	= (kxdrproc_t)xdr_dec_stat_res,
+		.p_decode	= (kxdrdproc_t)nsm_xdr_dec_stat_res,
 		.p_arglen	= SM_mon_sz,
 		.p_replen	= SM_monres_sz,
 		.p_statidx	= NSMPROC_MON,
@@ -526,7 +526,7 @@ static struct rpc_procinfo	nsm_procedures[] = {
 [NSMPROC_UNMON] = {
 		.p_proc		= NSMPROC_UNMON,
 		.p_encode	= (kxdreproc_t)nsm_xdr_enc_unmon,
-		.p_decode	= (kxdrproc_t)xdr_dec_stat,
+		.p_decode	= (kxdrdproc_t)nsm_xdr_dec_stat,
 		.p_arglen	= SM_mon_id_sz,
 		.p_replen	= SM_unmonres_sz,
 		.p_statidx	= NSMPROC_UNMON,
diff --git a/fs/nfs/mount_clnt.c b/fs/nfs/mount_clnt.c
index 979ebd7af3cb..697e07235f30 100644
--- a/fs/nfs/mount_clnt.c
+++ b/fs/nfs/mount_clnt.c
@@ -340,18 +340,16 @@ static int decode_fhandle(struct xdr_stream *xdr, struct mountres *res)
 	return 0;
 }
 
-static int mnt_dec_mountres(struct rpc_rqst *req, __be32 *p,
-			    struct mountres *res)
+static int mnt_xdr_dec_mountres(struct rpc_rqst *req,
+				struct xdr_stream *xdr,
+				struct mountres *res)
 {
-	struct xdr_stream xdr;
 	int status;
 
-	xdr_init_decode(&xdr, &req->rq_rcv_buf, p);
-
-	status = decode_status(&xdr, res);
+	status = decode_status(xdr, res);
 	if (unlikely(status != 0 || res->errno != 0))
 		return status;
-	return decode_fhandle(&xdr, res);
+	return decode_fhandle(xdr, res);
 }
 
 static int decode_fhs_status(struct xdr_stream *xdr, struct mountres *res)
@@ -434,30 +432,28 @@ static int decode_auth_flavors(struct xdr_stream *xdr, struct mountres *res)
 	return 0;
 }
 
-static int mnt_dec_mountres3(struct rpc_rqst *req, __be32 *p,
-			     struct mountres *res)
+static int mnt_xdr_dec_mountres3(struct rpc_rqst *req,
+				 struct xdr_stream *xdr,
+				 struct mountres *res)
 {
-	struct xdr_stream xdr;
 	int status;
 
-	xdr_init_decode(&xdr, &req->rq_rcv_buf, p);
-
-	status = decode_fhs_status(&xdr, res);
+	status = decode_fhs_status(xdr, res);
 	if (unlikely(status != 0 || res->errno != 0))
 		return status;
-	status = decode_fhandle3(&xdr, res);
+	status = decode_fhandle3(xdr, res);
 	if (unlikely(status != 0)) {
 		res->errno = -EBADHANDLE;
 		return 0;
 	}
-	return decode_auth_flavors(&xdr, res);
+	return decode_auth_flavors(xdr, res);
 }
 
 static struct rpc_procinfo mnt_procedures[] = {
 	[MOUNTPROC_MNT] = {
 		.p_proc		= MOUNTPROC_MNT,
 		.p_encode	= (kxdreproc_t)mnt_xdr_enc_dirpath,
-		.p_decode	= (kxdrproc_t)mnt_dec_mountres,
+		.p_decode	= (kxdrdproc_t)mnt_xdr_dec_mountres,
 		.p_arglen	= MNT_enc_dirpath_sz,
 		.p_replen	= MNT_dec_mountres_sz,
 		.p_statidx	= MOUNTPROC_MNT,
@@ -476,7 +472,7 @@ static struct rpc_procinfo mnt3_procedures[] = {
 	[MOUNTPROC3_MNT] = {
 		.p_proc		= MOUNTPROC3_MNT,
 		.p_encode	= (kxdreproc_t)mnt_xdr_enc_dirpath,
-		.p_decode	= (kxdrproc_t)mnt_dec_mountres3,
+		.p_decode	= (kxdrdproc_t)mnt_xdr_dec_mountres3,
 		.p_arglen	= MNT_enc_dirpath_sz,
 		.p_replen	= MNT_dec_mountres3_sz,
 		.p_statidx	= MOUNTPROC3_MNT,
diff --git a/fs/nfs/nfs2xdr.c b/fs/nfs/nfs2xdr.c
index 8f3acbec761f..51f1cfa04d27 100644
--- a/fs/nfs/nfs2xdr.c
+++ b/fs/nfs/nfs2xdr.c
@@ -783,15 +783,13 @@ static void nfs2_xdr_enc_readdirargs(struct rpc_rqst *req,
  * "NFS: Network File System Protocol Specification".
  */
 
-static int nfs2_xdr_dec_stat(struct rpc_rqst *req, __be32 *p,
+static int nfs2_xdr_dec_stat(struct rpc_rqst *req, struct xdr_stream *xdr,
 			     void *__unused)
 {
-	struct xdr_stream xdr;
 	enum nfs_stat status;
 	int error;
 
-	xdr_init_decode(&xdr, &req->rq_rcv_buf, p);
-	error = decode_stat(&xdr, &status);
+	error = decode_stat(xdr, &status);
 	if (unlikely(error))
 		goto out;
 	if (status != NFS_OK)
@@ -802,22 +800,16 @@ out_default:
 	return nfs_stat_to_errno(status);
 }
 
-static int nfs2_xdr_dec_attrstat(struct rpc_rqst *req, __be32 *p,
+static int nfs2_xdr_dec_attrstat(struct rpc_rqst *req, struct xdr_stream *xdr,
 				 struct nfs_fattr *result)
 {
-	struct xdr_stream xdr;
-
-	xdr_init_decode(&xdr, &req->rq_rcv_buf, p);
-	return decode_attrstat(&xdr, result);
+	return decode_attrstat(xdr, result);
 }
 
-static int nfs2_xdr_dec_diropres(struct rpc_rqst *req, __be32 *p,
+static int nfs2_xdr_dec_diropres(struct rpc_rqst *req, struct xdr_stream *xdr,
 				 struct nfs_diropok *result)
 {
-	struct xdr_stream xdr;
-
-	xdr_init_decode(&xdr, &req->rq_rcv_buf, p);
-	return decode_diropres(&xdr, result);
+	return decode_diropres(xdr, result);
 }
 
 /*
@@ -830,20 +822,18 @@ static int nfs2_xdr_dec_diropres(struct rpc_rqst *req, __be32 *p,
  *		void;
  *	};
  */
-static int nfs2_xdr_dec_readlinkres(struct rpc_rqst *req, __be32 *p,
-				    void *__unused)
+static int nfs2_xdr_dec_readlinkres(struct rpc_rqst *req,
+				    struct xdr_stream *xdr, void *__unused)
 {
-	struct xdr_stream xdr;
 	enum nfs_stat status;
 	int error;
 
-	xdr_init_decode(&xdr, &req->rq_rcv_buf, p);
-	error = decode_stat(&xdr, &status);
+	error = decode_stat(xdr, &status);
 	if (unlikely(error))
 		goto out;
 	if (status != NFS_OK)
 		goto out_default;
-	error = decode_path(&xdr);
+	error = decode_path(xdr);
 out:
 	return error;
 out_default:
@@ -861,39 +851,33 @@ out_default:
  *		void;
  *	};
  */
-static int nfs2_xdr_dec_readres(struct rpc_rqst *req, __be32 *p,
+static int nfs2_xdr_dec_readres(struct rpc_rqst *req, struct xdr_stream *xdr,
 				struct nfs_readres *result)
 {
-	struct xdr_stream xdr;
 	enum nfs_stat status;
 	int error;
 
-	xdr_init_decode(&xdr, &req->rq_rcv_buf, p);
-	error = decode_stat(&xdr, &status);
+	error = decode_stat(xdr, &status);
 	if (unlikely(error))
 		goto out;
 	if (status != NFS_OK)
 		goto out_default;
-	error = decode_fattr(&xdr, result->fattr);
+	error = decode_fattr(xdr, result->fattr);
 	if (unlikely(error))
 		goto out;
-	error = decode_nfsdata(&xdr, result);
+	error = decode_nfsdata(xdr, result);
 out:
 	return error;
 out_default:
 	return nfs_stat_to_errno(status);
 }
 
-static int nfs2_xdr_dec_writeres(struct rpc_rqst *req, __be32 *p,
+static int nfs2_xdr_dec_writeres(struct rpc_rqst *req, struct xdr_stream *xdr,
 				 struct nfs_writeres *result)
 {
-	struct xdr_stream xdr;
-
 	/* All NFSv2 writes are "file sync" writes */
 	result->verf->committed = NFS_FILE_SYNC;
-
-	xdr_init_decode(&xdr, &req->rq_rcv_buf, p);
-	return decode_attrstat(&xdr, result->fattr);
+	return decode_attrstat(xdr, result->fattr);
 }
 
 /**
@@ -1008,20 +992,18 @@ out_cheating:
 	goto out;
 }
 
-static int nfs2_xdr_dec_readdirres(struct rpc_rqst *req, __be32 *p,
-				   void *__unused)
+static int nfs2_xdr_dec_readdirres(struct rpc_rqst *req,
+				   struct xdr_stream *xdr, void *__unused)
 {
-	struct xdr_stream xdr;
 	enum nfs_stat status;
 	int error;
 
-	xdr_init_decode(&xdr, &req->rq_rcv_buf, p);
-	error = decode_stat(&xdr, &status);
+	error = decode_stat(xdr, &status);
 	if (unlikely(error))
 		goto out;
 	if (status != NFS_OK)
 		goto out_default;
-	error = decode_readdirok(&xdr);
+	error = decode_readdirok(xdr);
 out:
 	return error;
 out_default:
@@ -1062,20 +1044,18 @@ out_overflow:
 	return -EIO;
 }
 
-static int nfs2_xdr_dec_statfsres(struct rpc_rqst *req, __be32 *p,
+static int nfs2_xdr_dec_statfsres(struct rpc_rqst *req, struct xdr_stream *xdr,
 				  struct nfs2_fsstat *result)
 {
-	struct xdr_stream xdr;
 	enum nfs_stat status;
 	int error;
 
-	xdr_init_decode(&xdr, &req->rq_rcv_buf, p);
-	error = decode_stat(&xdr, &status);
+	error = decode_stat(xdr, &status);
 	if (unlikely(error))
 		goto out;
 	if (status != NFS_OK)
 		goto out_default;
-	error = decode_info(&xdr, result);
+	error = decode_info(xdr, result);
 out:
 	return error;
 out_default:
@@ -1150,7 +1130,7 @@ int nfs_stat_to_errno(enum nfs_stat status)
 [NFSPROC_##proc] = {							\
 	.p_proc	    =  NFSPROC_##proc,					\
 	.p_encode   =  (kxdreproc_t)nfs2_xdr_enc_##argtype,		\
-	.p_decode   =  (kxdrproc_t)nfs2_xdr_dec_##restype,		\
+	.p_decode   =  (kxdrdproc_t)nfs2_xdr_dec_##restype,		\
 	.p_arglen   =  NFS_##argtype##_sz,				\
 	.p_replen   =  NFS_##restype##_sz,				\
 	.p_timer    =  timer,						\
diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c
index ae1b1a43f05e..df30a26cc4fa 100644
--- a/fs/nfs/nfs3xdr.c
+++ b/fs/nfs/nfs3xdr.c
@@ -1366,20 +1366,19 @@ static void nfs3_xdr_enc_setacl3args(struct rpc_rqst *req,
  *		void;
  *	};
  */
-static int nfs3_xdr_dec_getattr3res(struct rpc_rqst *req, __be32 *p,
+static int nfs3_xdr_dec_getattr3res(struct rpc_rqst *req,
+				    struct xdr_stream *xdr,
 				    struct nfs_fattr *result)
 {
-	struct xdr_stream xdr;
 	enum nfs_stat status;
 	int error;
 
-	xdr_init_decode(&xdr, &req->rq_rcv_buf, p);
-	error = decode_nfsstat3(&xdr, &status);
+	error = decode_nfsstat3(xdr, &status);
 	if (unlikely(error))
 		goto out;
 	if (status != NFS3_OK)
 		goto out_default;
-	error = decode_fattr3(&xdr, result);
+	error = decode_fattr3(xdr, result);
 out:
 	return error;
 out_default:
@@ -1404,18 +1403,17 @@ out_default:
  *		SETATTR3resfail resfail;
  *	};
  */
-static int nfs3_xdr_dec_setattr3res(struct rpc_rqst *req, __be32 *p,
+static int nfs3_xdr_dec_setattr3res(struct rpc_rqst *req,
+				    struct xdr_stream *xdr,
 				    struct nfs_fattr *result)
 {
-	struct xdr_stream xdr;
 	enum nfs_stat status;
 	int error;
 
-	xdr_init_decode(&xdr, &req->rq_rcv_buf, p);
-	error = decode_nfsstat3(&xdr, &status);
+	error = decode_nfsstat3(xdr, &status);
 	if (unlikely(error))
 		goto out;
-	error = decode_wcc_data(&xdr, result);
+	error = decode_wcc_data(xdr, result);
 	if (unlikely(error))
 		goto out;
 	if (status != NFS3_OK)
@@ -1446,30 +1444,29 @@ out_status:
  *		LOOKUP3resfail	resfail;
  *	};
  */
-static int nfs3_xdr_dec_lookup3res(struct rpc_rqst *req, __be32 *p,
+static int nfs3_xdr_dec_lookup3res(struct rpc_rqst *req,
+				   struct xdr_stream *xdr,
 				   struct nfs3_diropres *result)
 {
-	struct xdr_stream xdr;
 	enum nfs_stat status;
 	int error;
 
-	xdr_init_decode(&xdr, &req->rq_rcv_buf, p);
-	error = decode_nfsstat3(&xdr, &status);
+	error = decode_nfsstat3(xdr, &status);
 	if (unlikely(error))
 		goto out;
 	if (status != NFS3_OK)
 		goto out_default;
-	error = decode_nfs_fh3(&xdr, result->fh);
+	error = decode_nfs_fh3(xdr, result->fh);
 	if (unlikely(error))
 		goto out;
-	error = decode_post_op_attr(&xdr, result->fattr);
+	error = decode_post_op_attr(xdr, result->fattr);
 	if (unlikely(error))
 		goto out;
-	error = decode_post_op_attr(&xdr, result->dir_attr);
+	error = decode_post_op_attr(xdr, result->dir_attr);
 out:
 	return error;
 out_default:
-	error = decode_post_op_attr(&xdr, result->dir_attr);
+	error = decode_post_op_attr(xdr, result->dir_attr);
 	if (unlikely(error))
 		goto out;
 	return nfs_stat_to_errno(status);
@@ -1494,23 +1491,22 @@ out_default:
  *		ACCESS3resfail	resfail;
  *	};
  */
-static int nfs3_xdr_dec_access3res(struct rpc_rqst *req, __be32 *p,
+static int nfs3_xdr_dec_access3res(struct rpc_rqst *req,
+				   struct xdr_stream *xdr,
 				   struct nfs3_accessres *result)
 {
-	struct xdr_stream xdr;
 	enum nfs_stat status;
 	int error;
 
-	xdr_init_decode(&xdr, &req->rq_rcv_buf, p);
-	error = decode_nfsstat3(&xdr, &status);
+	error = decode_nfsstat3(xdr, &status);
 	if (unlikely(error))
 		goto out;
-	error = decode_post_op_attr(&xdr, result->fattr);
+	error = decode_post_op_attr(xdr, result->fattr);
 	if (unlikely(error))
 		goto out;
 	if (status != NFS3_OK)
 		goto out_default;
-	error = decode_uint32(&xdr, &result->access);
+	error = decode_uint32(xdr, &result->access);
 out:
 	return error;
 out_default:
@@ -1536,23 +1532,22 @@ out_default:
  *		READLINK3resfail resfail;
  *	};
  */
-static int nfs3_xdr_dec_readlink3res(struct rpc_rqst *req, __be32 *p,
+static int nfs3_xdr_dec_readlink3res(struct rpc_rqst *req,
+				     struct xdr_stream *xdr,
 				     struct nfs_fattr *result)
 {
-	struct xdr_stream xdr;
 	enum nfs_stat status;
 	int error;
 
-	xdr_init_decode(&xdr, &req->rq_rcv_buf, p);
-	error = decode_nfsstat3(&xdr, &status);
+	error = decode_nfsstat3(xdr, &status);
 	if (unlikely(error))
 		goto out;
-	error = decode_post_op_attr(&xdr, result);
+	error = decode_post_op_attr(xdr, result);
 	if (unlikely(error))
 		goto out;
 	if (status != NFS3_OK)
 		goto out_default;
-	error = decode_nfspath3(&xdr);
+	error = decode_nfspath3(xdr);
 out:
 	return error;
 out_default:
@@ -1620,23 +1615,21 @@ out_overflow:
 	return -EIO;
 }
 
-static int nfs3_xdr_dec_read3res(struct rpc_rqst *req, __be32 *p,
+static int nfs3_xdr_dec_read3res(struct rpc_rqst *req, struct xdr_stream *xdr,
 				 struct nfs_readres *result)
 {
-	struct xdr_stream xdr;
 	enum nfs_stat status;
 	int error;
 
-	xdr_init_decode(&xdr, &req->rq_rcv_buf, p);
-	error = decode_nfsstat3(&xdr, &status);
+	error = decode_nfsstat3(xdr, &status);
 	if (unlikely(error))
 		goto out;
-	error = decode_post_op_attr(&xdr, result->fattr);
+	error = decode_post_op_attr(xdr, result->fattr);
 	if (unlikely(error))
 		goto out;
 	if (status != NFS3_OK)
 		goto out_status;
-	error = decode_read3resok(&xdr, result);
+	error = decode_read3resok(xdr, result);
 out:
 	return error;
 out_status:
@@ -1692,23 +1685,21 @@ out_overflow:
 	return -EIO;
 }
 
-static int nfs3_xdr_dec_write3res(struct rpc_rqst *req, __be32 *p,
+static int nfs3_xdr_dec_write3res(struct rpc_rqst *req, struct xdr_stream *xdr,
 				  struct nfs_writeres *result)
 {
-	struct xdr_stream xdr;
 	enum nfs_stat status;
 	int error;
 
-	xdr_init_decode(&xdr, &req->rq_rcv_buf, p);
-	error = decode_nfsstat3(&xdr, &status);
+	error = decode_nfsstat3(xdr, &status);
 	if (unlikely(error))
 		goto out;
-	error = decode_wcc_data(&xdr, result->fattr);
+	error = decode_wcc_data(xdr, result->fattr);
 	if (unlikely(error))
 		goto out;
 	if (status != NFS3_OK)
 		goto out_status;
-	error = decode_write3resok(&xdr, result);
+	error = decode_write3resok(xdr, result);
 out:
 	return error;
 out_status:
@@ -1757,24 +1748,23 @@ out:
 	return error;
 }
 
-static int nfs3_xdr_dec_create3res(struct rpc_rqst *req, __be32 *p,
+static int nfs3_xdr_dec_create3res(struct rpc_rqst *req,
+				   struct xdr_stream *xdr,
 				   struct nfs3_diropres *result)
 {
-	struct xdr_stream xdr;
 	enum nfs_stat status;
 	int error;
 
-	xdr_init_decode(&xdr, &req->rq_rcv_buf, p);
-	error = decode_nfsstat3(&xdr, &status);
+	error = decode_nfsstat3(xdr, &status);
 	if (unlikely(error))
 		goto out;
 	if (status != NFS3_OK)
 		goto out_default;
-	error = decode_create3resok(&xdr, result);
+	error = decode_create3resok(xdr, result);
 out:
 	return error;
 out_default:
-	error = decode_wcc_data(&xdr, result->dir_attr);
+	error = decode_wcc_data(xdr, result->dir_attr);
 	if (unlikely(error))
 		goto out;
 	return nfs_stat_to_errno(status);
@@ -1798,18 +1788,17 @@ out_default:
  *		REMOVE3resfail resfail;
  *	};
  */
-static int nfs3_xdr_dec_remove3res(struct rpc_rqst *req, __be32 *p,
+static int nfs3_xdr_dec_remove3res(struct rpc_rqst *req,
+				   struct xdr_stream *xdr,
 				   struct nfs_removeres *result)
 {
-	struct xdr_stream xdr;
 	enum nfs_stat status;
 	int error;
 
-	xdr_init_decode(&xdr, &req->rq_rcv_buf, p);
-	error = decode_nfsstat3(&xdr, &status);
+	error = decode_nfsstat3(xdr, &status);
 	if (unlikely(error))
 		goto out;
-	error = decode_wcc_data(&xdr, result->dir_attr);
+	error = decode_wcc_data(xdr, result->dir_attr);
 	if (unlikely(error))
 		goto out;
 	if (status != NFS3_OK)
@@ -1840,21 +1829,20 @@ out_status:
  *		RENAME3resfail resfail;
  *	};
  */
-static int nfs3_xdr_dec_rename3res(struct rpc_rqst *req, __be32 *p,
+static int nfs3_xdr_dec_rename3res(struct rpc_rqst *req,
+				   struct xdr_stream *xdr,
 				   struct nfs_renameres *result)
 {
-	struct xdr_stream xdr;
 	enum nfs_stat status;
 	int error;
 
-	xdr_init_decode(&xdr, &req->rq_rcv_buf, p);
-	error = decode_nfsstat3(&xdr, &status);
+	error = decode_nfsstat3(xdr, &status);
 	if (unlikely(error))
 		goto out;
-	error = decode_wcc_data(&xdr, result->old_fattr);
+	error = decode_wcc_data(xdr, result->old_fattr);
 	if (unlikely(error))
 		goto out;
-	error = decode_wcc_data(&xdr, result->new_fattr);
+	error = decode_wcc_data(xdr, result->new_fattr);
 	if (unlikely(error))
 		goto out;
 	if (status != NFS3_OK)
@@ -1885,21 +1873,19 @@ out_status:
  *		LINK3resfail	resfail;
  *	};
  */
-static int nfs3_xdr_dec_link3res(struct rpc_rqst *req, __be32 *p,
+static int nfs3_xdr_dec_link3res(struct rpc_rqst *req, struct xdr_stream *xdr,
 				 struct nfs3_linkres *result)
 {
-	struct xdr_stream xdr;
 	enum nfs_stat status;
 	int error;
 
-	xdr_init_decode(&xdr, &req->rq_rcv_buf, p);
-	error = decode_nfsstat3(&xdr, &status);
+	error = decode_nfsstat3(xdr, &status);
 	if (unlikely(error))
 		goto out;
-	error = decode_post_op_attr(&xdr, result->fattr);
+	error = decode_post_op_attr(xdr, result->fattr);
 	if (unlikely(error))
 		goto out;
-	error = decode_wcc_data(&xdr, result->dir_attr);
+	error = decode_wcc_data(xdr, result->dir_attr);
 	if (unlikely(error))
 		goto out;
 	if (status != NFS3_OK)
@@ -2085,24 +2071,23 @@ out:
 	return error;
 }
 
-static int nfs3_xdr_dec_readdir3res(struct rpc_rqst *req, __be32 *p,
+static int nfs3_xdr_dec_readdir3res(struct rpc_rqst *req,
+				    struct xdr_stream *xdr,
 				    struct nfs3_readdirres *result)
 {
-	struct xdr_stream xdr;
 	enum nfs_stat status;
 	int error;
 
-	xdr_init_decode(&xdr, &req->rq_rcv_buf, p);
-	error = decode_nfsstat3(&xdr, &status);
+	error = decode_nfsstat3(xdr, &status);
 	if (unlikely(error))
 		goto out;
 	if (status != NFS3_OK)
 		goto out_default;
-	error = decode_readdir3resok(&xdr, result);
+	error = decode_readdir3resok(xdr, result);
 out:
 	return error;
 out_default:
-	error = decode_post_op_attr(&xdr, result->dir_attr);
+	error = decode_post_op_attr(xdr, result->dir_attr);
 	if (unlikely(error))
 		goto out;
 	return nfs_stat_to_errno(status);
@@ -2154,23 +2139,22 @@ out_overflow:
 	return -EIO;
 }
 
-static int nfs3_xdr_dec_fsstat3res(struct rpc_rqst *req, __be32 *p,
+static int nfs3_xdr_dec_fsstat3res(struct rpc_rqst *req,
+				   struct xdr_stream *xdr,
 				   struct nfs_fsstat *result)
 {
-	struct xdr_stream xdr;
 	enum nfs_stat status;
 	int error;
 
-	xdr_init_decode(&xdr, &req->rq_rcv_buf, p);
-	error = decode_nfsstat3(&xdr, &status);
+	error = decode_nfsstat3(xdr, &status);
 	if (unlikely(error))
 		goto out;
-	error = decode_post_op_attr(&xdr, result->fattr);
+	error = decode_post_op_attr(xdr, result->fattr);
 	if (unlikely(error))
 		goto out;
 	if (status != NFS3_OK)
 		goto out_status;
-	error = decode_fsstat3resok(&xdr, result);
+	error = decode_fsstat3resok(xdr, result);
 out:
 	return error;
 out_status:
@@ -2231,23 +2215,22 @@ out_overflow:
 	return -EIO;
 }
 
-static int nfs3_xdr_dec_fsinfo3res(struct rpc_rqst *req, __be32 *p,
+static int nfs3_xdr_dec_fsinfo3res(struct rpc_rqst *req,
+				   struct xdr_stream *xdr,
 				   struct nfs_fsinfo *result)
 {
-	struct xdr_stream xdr;
 	enum nfs_stat status;
 	int error;
 
-	xdr_init_decode(&xdr, &req->rq_rcv_buf, p);
-	error = decode_nfsstat3(&xdr, &status);
+	error = decode_nfsstat3(xdr, &status);
 	if (unlikely(error))
 		goto out;
-	error = decode_post_op_attr(&xdr, result->fattr);
+	error = decode_post_op_attr(xdr, result->fattr);
 	if (unlikely(error))
 		goto out;
 	if (status != NFS3_OK)
 		goto out_status;
-	error = decode_fsinfo3resok(&xdr, result);
+	error = decode_fsinfo3resok(xdr, result);
 out:
 	return error;
 out_status:
@@ -2295,23 +2278,22 @@ out_overflow:
 	return -EIO;
 }
 
-static int nfs3_xdr_dec_pathconf3res(struct rpc_rqst *req, __be32 *p,
+static int nfs3_xdr_dec_pathconf3res(struct rpc_rqst *req,
+				     struct xdr_stream *xdr,
 				     struct nfs_pathconf *result)
 {
-	struct xdr_stream xdr;
 	enum nfs_stat status;
 	int error;
 
-	xdr_init_decode(&xdr, &req->rq_rcv_buf, p);
-	error = decode_nfsstat3(&xdr, &status);
+	error = decode_nfsstat3(xdr, &status);
 	if (unlikely(error))
 		goto out;
-	error = decode_post_op_attr(&xdr, result->fattr);
+	error = decode_post_op_attr(xdr, result->fattr);
 	if (unlikely(error))
 		goto out;
 	if (status != NFS3_OK)
 		goto out_status;
-	error = decode_pathconf3resok(&xdr, result);
+	error = decode_pathconf3resok(xdr, result);
 out:
 	return error;
 out_status:
@@ -2337,23 +2319,22 @@ out_status:
  *		COMMIT3resfail	resfail;
  *	};
  */
-static int nfs3_xdr_dec_commit3res(struct rpc_rqst *req, __be32 *p,
+static int nfs3_xdr_dec_commit3res(struct rpc_rqst *req,
+				   struct xdr_stream *xdr,
 				   struct nfs_writeres *result)
 {
-	struct xdr_stream xdr;
 	enum nfs_stat status;
 	int error;
 
-	xdr_init_decode(&xdr, &req->rq_rcv_buf, p);
-	error = decode_nfsstat3(&xdr, &status);
+	error = decode_nfsstat3(xdr, &status);
 	if (unlikely(error))
 		goto out;
-	error = decode_wcc_data(&xdr, result->fattr);
+	error = decode_wcc_data(xdr, result->fattr);
 	if (unlikely(error))
 		goto out;
 	if (status != NFS3_OK)
 		goto out_status;
-	error = decode_writeverf3(&xdr, result->verf->verifier);
+	error = decode_writeverf3(xdr, result->verf->verifier);
 out:
 	return error;
 out_status:
@@ -2406,40 +2387,38 @@ out:
 	return error;
 }
 
-static int nfs3_xdr_dec_getacl3res(struct rpc_rqst *req, __be32 *p,
+static int nfs3_xdr_dec_getacl3res(struct rpc_rqst *req,
+				   struct xdr_stream *xdr,
 				   struct nfs3_getaclres *result)
 {
-	struct xdr_stream xdr;
 	enum nfs_stat status;
 	int error;
 
-	xdr_init_decode(&xdr, &req->rq_rcv_buf, p);
-	error = decode_nfsstat3(&xdr, &status);
+	error = decode_nfsstat3(xdr, &status);
 	if (unlikely(error))
 		goto out;
 	if (status != NFS3_OK)
 		goto out_default;
-	error = decode_getacl3resok(&xdr, result);
+	error = decode_getacl3resok(xdr, result);
 out:
 	return error;
 out_default:
 	return nfs_stat_to_errno(status);
 }
 
-static int nfs3_xdr_dec_setacl3res(struct rpc_rqst *req, __be32 *p,
+static int nfs3_xdr_dec_setacl3res(struct rpc_rqst *req,
+				   struct xdr_stream *xdr,
 				   struct nfs_fattr *result)
 {
-	struct xdr_stream xdr;
 	enum nfs_stat status;
 	int error;
 
-	xdr_init_decode(&xdr, &req->rq_rcv_buf, p);
-	error = decode_nfsstat3(&xdr, &status);
+	error = decode_nfsstat3(xdr, &status);
 	if (unlikely(error))
 		goto out;
 	if (status != NFS3_OK)
 		goto out_default;
-	error = decode_post_op_attr(&xdr, result);
+	error = decode_post_op_attr(xdr, result);
 out:
 	return error;
 out_default:
@@ -2452,7 +2431,7 @@ out_default:
 [NFS3PROC_##proc] = {							\
 	.p_proc      = NFS3PROC_##proc,					\
 	.p_encode    = (kxdreproc_t)nfs3_xdr_enc_##argtype##3args,	\
-	.p_decode    = (kxdrproc_t)nfs3_xdr_dec_##restype##3res,	\
+	.p_decode    = (kxdrdproc_t)nfs3_xdr_dec_##restype##3res,	\
 	.p_arglen    = NFS3_##argtype##args_sz,				\
 	.p_replen    = NFS3_##restype##res_sz,				\
 	.p_timer     = timer,						\
@@ -2495,7 +2474,7 @@ static struct rpc_procinfo	nfs3_acl_procedures[] = {
 	[ACLPROC3_GETACL] = {
 		.p_proc = ACLPROC3_GETACL,
 		.p_encode = (kxdreproc_t)nfs3_xdr_enc_getacl3args,
-		.p_decode = (kxdrproc_t)nfs3_xdr_dec_getacl3res,
+		.p_decode = (kxdrdproc_t)nfs3_xdr_dec_getacl3res,
 		.p_arglen = ACL3_getaclargs_sz,
 		.p_replen = ACL3_getaclres_sz,
 		.p_timer = 1,
@@ -2504,7 +2483,7 @@ static struct rpc_procinfo	nfs3_acl_procedures[] = {
 	[ACLPROC3_SETACL] = {
 		.p_proc = ACLPROC3_SETACL,
 		.p_encode = (kxdreproc_t)nfs3_xdr_enc_setacl3args,
-		.p_decode = (kxdrproc_t)nfs3_xdr_dec_setacl3res,
+		.p_decode = (kxdrdproc_t)nfs3_xdr_dec_setacl3res,
 		.p_arglen = ACL3_setaclargs_sz,
 		.p_replen = ACL3_setaclres_sz,
 		.p_timer = 0,
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 6ec38b3e4a3d..f3f99156bfcb 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -5013,26 +5013,26 @@ out_overflow:
 /*
  * Decode OPEN_DOWNGRADE response
  */
-static int nfs4_xdr_dec_open_downgrade(struct rpc_rqst *rqstp, __be32 *p, struct nfs_closeres *res)
+static int nfs4_xdr_dec_open_downgrade(struct rpc_rqst *rqstp,
+				       struct xdr_stream *xdr,
+				       struct nfs_closeres *res)
 {
-	struct xdr_stream xdr;
 	struct compound_hdr hdr;
 	int status;
 
-	xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
-	status = decode_compound_hdr(&xdr, &hdr);
+	status = decode_compound_hdr(xdr, &hdr);
 	if (status)
 		goto out;
-	status = decode_sequence(&xdr, &res->seq_res, rqstp);
+	status = decode_sequence(xdr, &res->seq_res, rqstp);
 	if (status)
 		goto out;
-	status = decode_putfh(&xdr);
+	status = decode_putfh(xdr);
 	if (status)
 		goto out;
-	status = decode_open_downgrade(&xdr, res);
+	status = decode_open_downgrade(xdr, res);
 	if (status != 0)
 		goto out;
-	decode_getfattr(&xdr, res->fattr, res->server,
+	decode_getfattr(xdr, res->fattr, res->server,
 			!RPC_IS_ASYNC(rqstp->rq_task));
 out:
 	return status;
@@ -5041,26 +5041,25 @@ out:
 /*
  * Decode ACCESS response
  */
-static int nfs4_xdr_dec_access(struct rpc_rqst *rqstp, __be32 *p, struct nfs4_accessres *res)
+static int nfs4_xdr_dec_access(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
+			       struct nfs4_accessres *res)
 {
-	struct xdr_stream xdr;
 	struct compound_hdr hdr;
 	int status;
 
-	xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
-	status = decode_compound_hdr(&xdr, &hdr);
+	status = decode_compound_hdr(xdr, &hdr);
 	if (status)
 		goto out;
-	status = decode_sequence(&xdr, &res->seq_res, rqstp);
+	status = decode_sequence(xdr, &res->seq_res, rqstp);
 	if (status)
 		goto out;
-	status = decode_putfh(&xdr);
+	status = decode_putfh(xdr);
 	if (status != 0)
 		goto out;
-	status = decode_access(&xdr, res);
+	status = decode_access(xdr, res);
 	if (status != 0)
 		goto out;
-	decode_getfattr(&xdr, res->fattr, res->server,
+	decode_getfattr(xdr, res->fattr, res->server,
 			!RPC_IS_ASYNC(rqstp->rq_task));
 out:
 	return status;
@@ -5069,26 +5068,28 @@ out:
 /*
  * Decode LOOKUP response
  */
-static int nfs4_xdr_dec_lookup(struct rpc_rqst *rqstp, __be32 *p, struct nfs4_lookup_res *res)
+static int nfs4_xdr_dec_lookup(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
+			       struct nfs4_lookup_res *res)
 {
-	struct xdr_stream xdr;
 	struct compound_hdr hdr;
 	int status;
 
-	xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
-	status = decode_compound_hdr(&xdr, &hdr);
+	status = decode_compound_hdr(xdr, &hdr);
 	if (status)
 		goto out;
-	status = decode_sequence(&xdr, &res->seq_res, rqstp);
+	status = decode_sequence(xdr, &res->seq_res, rqstp);
 	if (status)
 		goto out;
-	if ((status = decode_putfh(&xdr)) != 0)
+	status = decode_putfh(xdr);
+	if (status)
 		goto out;
-	if ((status = decode_lookup(&xdr)) != 0)
+	status = decode_lookup(xdr);
+	if (status)
 		goto out;
-	if ((status = decode_getfh(&xdr, res->fh)) != 0)
+	status = decode_getfh(xdr, res->fh);
+	if (status)
 		goto out;
-	status = decode_getfattr(&xdr, res->fattr, res->server
+	status = decode_getfattr(xdr, res->fattr, res->server
 			,!RPC_IS_ASYNC(rqstp->rq_task));
 out:
 	return status;
@@ -5097,23 +5098,25 @@ out:
 /*
  * Decode LOOKUP_ROOT response
  */
-static int nfs4_xdr_dec_lookup_root(struct rpc_rqst *rqstp, __be32 *p, struct nfs4_lookup_res *res)
+static int nfs4_xdr_dec_lookup_root(struct rpc_rqst *rqstp,
+				    struct xdr_stream *xdr,
+				    struct nfs4_lookup_res *res)
 {
-	struct xdr_stream xdr;
 	struct compound_hdr hdr;
 	int status;
 
-	xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
-	status = decode_compound_hdr(&xdr, &hdr);
+	status = decode_compound_hdr(xdr, &hdr);
 	if (status)
 		goto out;
-	status = decode_sequence(&xdr, &res->seq_res, rqstp);
+	status = decode_sequence(xdr, &res->seq_res, rqstp);
 	if (status)
 		goto out;
-	if ((status = decode_putrootfh(&xdr)) != 0)
+	status = decode_putrootfh(xdr);
+	if (status)
 		goto out;
-	if ((status = decode_getfh(&xdr, res->fh)) == 0)
-		status = decode_getfattr(&xdr, res->fattr, res->server,
+	status = decode_getfh(xdr, res->fh);
+	if (status == 0)
+		status = decode_getfattr(xdr, res->fattr, res->server,
 				!RPC_IS_ASYNC(rqstp->rq_task));
 out:
 	return status;
@@ -5122,24 +5125,25 @@ out:
 /*
  * Decode REMOVE response
  */
-static int nfs4_xdr_dec_remove(struct rpc_rqst *rqstp, __be32 *p, struct nfs_removeres *res)
+static int nfs4_xdr_dec_remove(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
+			       struct nfs_removeres *res)
 {
-	struct xdr_stream xdr;
 	struct compound_hdr hdr;
 	int status;
 
-	xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
-	status = decode_compound_hdr(&xdr, &hdr);
+	status = decode_compound_hdr(xdr, &hdr);
 	if (status)
 		goto out;
-	status = decode_sequence(&xdr, &res->seq_res, rqstp);
+	status = decode_sequence(xdr, &res->seq_res, rqstp);
 	if (status)
 		goto out;
-	if ((status = decode_putfh(&xdr)) != 0)
+	status = decode_putfh(xdr);
+	if (status)
 		goto out;
-	if ((status = decode_remove(&xdr, &res->cinfo)) != 0)
+	status = decode_remove(xdr, &res->cinfo);
+	if (status)
 		goto out;
-	decode_getfattr(&xdr, res->dir_attr, res->server,
+	decode_getfattr(xdr, res->dir_attr, res->server,
 			!RPC_IS_ASYNC(rqstp->rq_task));
 out:
 	return status;
@@ -5148,34 +5152,38 @@ out:
 /*
  * Decode RENAME response
  */
-static int nfs4_xdr_dec_rename(struct rpc_rqst *rqstp, __be32 *p, struct nfs_renameres *res)
+static int nfs4_xdr_dec_rename(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
+			       struct nfs_renameres *res)
 {
-	struct xdr_stream xdr;
 	struct compound_hdr hdr;
 	int status;
 
-	xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
-	status = decode_compound_hdr(&xdr, &hdr);
+	status = decode_compound_hdr(xdr, &hdr);
 	if (status)
 		goto out;
-	status = decode_sequence(&xdr, &res->seq_res, rqstp);
+	status = decode_sequence(xdr, &res->seq_res, rqstp);
 	if (status)
 		goto out;
-	if ((status = decode_putfh(&xdr)) != 0)
+	status = decode_putfh(xdr);
+	if (status)
 		goto out;
-	if ((status = decode_savefh(&xdr)) != 0)
+	status = decode_savefh(xdr);
+	if (status)
 		goto out;
-	if ((status = decode_putfh(&xdr)) != 0)
+	status = decode_putfh(xdr);
+	if (status)
 		goto out;
-	if ((status = decode_rename(&xdr, &res->old_cinfo, &res->new_cinfo)) != 0)
+	status = decode_rename(xdr, &res->old_cinfo, &res->new_cinfo);
+	if (status)
 		goto out;
 	/* Current FH is target directory */
-	if (decode_getfattr(&xdr, res->new_fattr, res->server,
+	if (decode_getfattr(xdr, res->new_fattr, res->server,
 				!RPC_IS_ASYNC(rqstp->rq_task)) != 0)
 		goto out;
-	if ((status = decode_restorefh(&xdr)) != 0)
+	status = decode_restorefh(xdr);
+	if (status)
 		goto out;
-	decode_getfattr(&xdr, res->old_fattr, res->server,
+	decode_getfattr(xdr, res->old_fattr, res->server,
 			!RPC_IS_ASYNC(rqstp->rq_task));
 out:
 	return status;
@@ -5184,37 +5192,41 @@ out:
 /*
  * Decode LINK response
  */
-static int nfs4_xdr_dec_link(struct rpc_rqst *rqstp, __be32 *p, struct nfs4_link_res *res)
+static int nfs4_xdr_dec_link(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
+			     struct nfs4_link_res *res)
 {
-	struct xdr_stream xdr;
 	struct compound_hdr hdr;
 	int status;
 
-	xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
-	status = decode_compound_hdr(&xdr, &hdr);
+	status = decode_compound_hdr(xdr, &hdr);
 	if (status)
 		goto out;
-	status = decode_sequence(&xdr, &res->seq_res, rqstp);
+	status = decode_sequence(xdr, &res->seq_res, rqstp);
 	if (status)
 		goto out;
-	if ((status = decode_putfh(&xdr)) != 0)
+	status = decode_putfh(xdr);
+	if (status)
 		goto out;
-	if ((status = decode_savefh(&xdr)) != 0)
+	status = decode_savefh(xdr);
+	if (status)
 		goto out;
-	if ((status = decode_putfh(&xdr)) != 0)
+	status = decode_putfh(xdr);
+	if (status)
 		goto out;
-	if ((status = decode_link(&xdr, &res->cinfo)) != 0)
+	status = decode_link(xdr, &res->cinfo);
+	if (status)
 		goto out;
 	/*
 	 * Note order: OP_LINK leaves the directory as the current
 	 *             filehandle.
 	 */
-	if (decode_getfattr(&xdr, res->dir_attr, res->server,
+	if (decode_getfattr(xdr, res->dir_attr, res->server,
 				!RPC_IS_ASYNC(rqstp->rq_task)) != 0)
 		goto out;
-	if ((status = decode_restorefh(&xdr)) != 0)
+	status = decode_restorefh(xdr);
+	if (status)
 		goto out;
-	decode_getfattr(&xdr, res->fattr, res->server,
+	decode_getfattr(xdr, res->fattr, res->server,
 			!RPC_IS_ASYNC(rqstp->rq_task));
 out:
 	return status;
@@ -5223,33 +5235,37 @@ out:
 /*
  * Decode CREATE response
  */
-static int nfs4_xdr_dec_create(struct rpc_rqst *rqstp, __be32 *p, struct nfs4_create_res *res)
+static int nfs4_xdr_dec_create(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
+			       struct nfs4_create_res *res)
 {
-	struct xdr_stream xdr;
 	struct compound_hdr hdr;
 	int status;
 
-	xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
-	status = decode_compound_hdr(&xdr, &hdr);
+	status = decode_compound_hdr(xdr, &hdr);
 	if (status)
 		goto out;
-	status = decode_sequence(&xdr, &res->seq_res, rqstp);
+	status = decode_sequence(xdr, &res->seq_res, rqstp);
 	if (status)
 		goto out;
-	if ((status = decode_putfh(&xdr)) != 0)
+	status = decode_putfh(xdr);
+	if (status)
 		goto out;
-	if ((status = decode_savefh(&xdr)) != 0)
+	status = decode_savefh(xdr);
+	if (status)
 		goto out;
-	if ((status = decode_create(&xdr,&res->dir_cinfo)) != 0)
+	status = decode_create(xdr, &res->dir_cinfo);
+	if (status)
 		goto out;
-	if ((status = decode_getfh(&xdr, res->fh)) != 0)
+	status = decode_getfh(xdr, res->fh);
+	if (status)
 		goto out;
-	if (decode_getfattr(&xdr, res->fattr, res->server,
+	if (decode_getfattr(xdr, res->fattr, res->server,
 				!RPC_IS_ASYNC(rqstp->rq_task)) != 0)
 		goto out;
-	if ((status = decode_restorefh(&xdr)) != 0)
+	status = decode_restorefh(xdr);
+	if (status)
 		goto out;
-	decode_getfattr(&xdr, res->dir_fattr, res->server,
+	decode_getfattr(xdr, res->dir_fattr, res->server,
 			!RPC_IS_ASYNC(rqstp->rq_task));
 out:
 	return status;
@@ -5258,31 +5274,31 @@ out:
 /*
  * Decode SYMLINK response
  */
-static int nfs4_xdr_dec_symlink(struct rpc_rqst *rqstp, __be32 *p, struct nfs4_create_res *res)
+static int nfs4_xdr_dec_symlink(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
+				struct nfs4_create_res *res)
 {
-	return nfs4_xdr_dec_create(rqstp, p, res);
+	return nfs4_xdr_dec_create(rqstp, xdr, res);
 }
 
 /*
  * Decode GETATTR response
  */
-static int nfs4_xdr_dec_getattr(struct rpc_rqst *rqstp, __be32 *p, struct nfs4_getattr_res *res)
+static int nfs4_xdr_dec_getattr(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
+				struct nfs4_getattr_res *res)
 {
-	struct xdr_stream xdr;
 	struct compound_hdr hdr;
 	int status;
 
-	xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
-	status = decode_compound_hdr(&xdr, &hdr);
+	status = decode_compound_hdr(xdr, &hdr);
 	if (status)
 		goto out;
-	status = decode_sequence(&xdr, &res->seq_res, rqstp);
+	status = decode_sequence(xdr, &res->seq_res, rqstp);
 	if (status)
 		goto out;
-	status = decode_putfh(&xdr);
+	status = decode_putfh(xdr);
 	if (status)
 		goto out;
-	status = decode_getfattr(&xdr, res->fattr, res->server,
+	status = decode_getfattr(xdr, res->fattr, res->server,
 			!RPC_IS_ASYNC(rqstp->rq_task));
 out:
 	return status;
@@ -5309,24 +5325,22 @@ static void nfs4_xdr_enc_setacl(struct rpc_rqst *req, struct xdr_stream *xdr,
  * Decode SETACL response
  */
 static int
-nfs4_xdr_dec_setacl(struct rpc_rqst *rqstp, __be32 *p,
+nfs4_xdr_dec_setacl(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
 		    struct nfs_setaclres *res)
 {
-	struct xdr_stream xdr;
 	struct compound_hdr hdr;
 	int status;
 
-	xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
-	status = decode_compound_hdr(&xdr, &hdr);
+	status = decode_compound_hdr(xdr, &hdr);
 	if (status)
 		goto out;
-	status = decode_sequence(&xdr, &res->seq_res, rqstp);
+	status = decode_sequence(xdr, &res->seq_res, rqstp);
 	if (status)
 		goto out;
-	status = decode_putfh(&xdr);
+	status = decode_putfh(xdr);
 	if (status)
 		goto out;
-	status = decode_setattr(&xdr);
+	status = decode_setattr(xdr);
 out:
 	return status;
 }
@@ -5335,24 +5349,22 @@ out:
  * Decode GETACL response
  */
 static int
-nfs4_xdr_dec_getacl(struct rpc_rqst *rqstp, __be32 *p,
+nfs4_xdr_dec_getacl(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
 		    struct nfs_getaclres *res)
 {
-	struct xdr_stream xdr;
 	struct compound_hdr hdr;
 	int status;
 
-	xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
-	status = decode_compound_hdr(&xdr, &hdr);
+	status = decode_compound_hdr(xdr, &hdr);
 	if (status)
 		goto out;
-	status = decode_sequence(&xdr, &res->seq_res, rqstp);
+	status = decode_sequence(xdr, &res->seq_res, rqstp);
 	if (status)
 		goto out;
-	status = decode_putfh(&xdr);
+	status = decode_putfh(xdr);
 	if (status)
 		goto out;
-	status = decode_getacl(&xdr, rqstp, &res->acl_len);
+	status = decode_getacl(xdr, rqstp, &res->acl_len);
 
 out:
 	return status;
@@ -5361,23 +5373,22 @@ out:
 /*
  * Decode CLOSE response
  */
-static int nfs4_xdr_dec_close(struct rpc_rqst *rqstp, __be32 *p, struct nfs_closeres *res)
+static int nfs4_xdr_dec_close(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
+			      struct nfs_closeres *res)
 {
-	struct xdr_stream xdr;
 	struct compound_hdr hdr;
 	int status;
 
-	xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
-	status = decode_compound_hdr(&xdr, &hdr);
+	status = decode_compound_hdr(xdr, &hdr);
 	if (status)
 		goto out;
-	status = decode_sequence(&xdr, &res->seq_res, rqstp);
+	status = decode_sequence(xdr, &res->seq_res, rqstp);
 	if (status)
 		goto out;
-	status = decode_putfh(&xdr);
+	status = decode_putfh(xdr);
 	if (status)
 		goto out;
-	status = decode_close(&xdr, res);
+	status = decode_close(xdr, res);
 	if (status != 0)
 		goto out;
 	/*
@@ -5386,7 +5397,7 @@ static int nfs4_xdr_dec_close(struct rpc_rqst *rqstp, __be32 *p, struct nfs_clos
 	 * 	an ESTALE error. Shouldn't be a problem,
 	 * 	though, since fattr->valid will remain unset.
 	 */
-	decode_getfattr(&xdr, res->fattr, res->server,
+	decode_getfattr(xdr, res->fattr, res->server,
 			!RPC_IS_ASYNC(rqstp->rq_task));
 out:
 	return status;
@@ -5395,36 +5406,35 @@ out:
 /*
  * Decode OPEN response
  */
-static int nfs4_xdr_dec_open(struct rpc_rqst *rqstp, __be32 *p, struct nfs_openres *res)
+static int nfs4_xdr_dec_open(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
+			     struct nfs_openres *res)
 {
-	struct xdr_stream xdr;
 	struct compound_hdr hdr;
 	int status;
 
-	xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
-	status = decode_compound_hdr(&xdr, &hdr);
+	status = decode_compound_hdr(xdr, &hdr);
 	if (status)
 		goto out;
-	status = decode_sequence(&xdr, &res->seq_res, rqstp);
+	status = decode_sequence(xdr, &res->seq_res, rqstp);
 	if (status)
 		goto out;
-	status = decode_putfh(&xdr);
+	status = decode_putfh(xdr);
 	if (status)
 		goto out;
-	status = decode_savefh(&xdr);
+	status = decode_savefh(xdr);
 	if (status)
 		goto out;
-	status = decode_open(&xdr, res);
+	status = decode_open(xdr, res);
 	if (status)
 		goto out;
-	if (decode_getfh(&xdr, &res->fh) != 0)
+	if (decode_getfh(xdr, &res->fh) != 0)
 		goto out;
-	if (decode_getfattr(&xdr, res->f_attr, res->server,
+	if (decode_getfattr(xdr, res->f_attr, res->server,
 				!RPC_IS_ASYNC(rqstp->rq_task)) != 0)
 		goto out;
-	if (decode_restorefh(&xdr) != 0)
+	if (decode_restorefh(xdr) != 0)
 		goto out;
-	decode_getfattr(&xdr, res->dir_attr, res->server,
+	decode_getfattr(xdr, res->dir_attr, res->server,
 			!RPC_IS_ASYNC(rqstp->rq_task));
 out:
 	return status;
@@ -5433,20 +5443,20 @@ out:
 /*
  * Decode OPEN_CONFIRM response
  */
-static int nfs4_xdr_dec_open_confirm(struct rpc_rqst *rqstp, __be32 *p, struct nfs_open_confirmres *res)
+static int nfs4_xdr_dec_open_confirm(struct rpc_rqst *rqstp,
+				     struct xdr_stream *xdr,
+				     struct nfs_open_confirmres *res)
 {
-	struct xdr_stream xdr;
 	struct compound_hdr hdr;
 	int status;
 
-	xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
-	status = decode_compound_hdr(&xdr, &hdr);
+	status = decode_compound_hdr(xdr, &hdr);
 	if (status)
 		goto out;
-	status = decode_putfh(&xdr);
+	status = decode_putfh(xdr);
 	if (status)
 		goto out;
-	status = decode_open_confirm(&xdr, res);
+	status = decode_open_confirm(xdr, res);
 out:
 	return status;
 }
@@ -5454,26 +5464,26 @@ out:
 /*
  * Decode OPEN response
  */
-static int nfs4_xdr_dec_open_noattr(struct rpc_rqst *rqstp, __be32 *p, struct nfs_openres *res)
+static int nfs4_xdr_dec_open_noattr(struct rpc_rqst *rqstp,
+				    struct xdr_stream *xdr,
+				    struct nfs_openres *res)
 {
-	struct xdr_stream xdr;
 	struct compound_hdr hdr;
 	int status;
 
-	xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
-	status = decode_compound_hdr(&xdr, &hdr);
+	status = decode_compound_hdr(xdr, &hdr);
 	if (status)
 		goto out;
-	status = decode_sequence(&xdr, &res->seq_res, rqstp);
+	status = decode_sequence(xdr, &res->seq_res, rqstp);
 	if (status)
 		goto out;
-	status = decode_putfh(&xdr);
+	status = decode_putfh(xdr);
 	if (status)
 		goto out;
-	status = decode_open(&xdr, res);
+	status = decode_open(xdr, res);
 	if (status)
 		goto out;
-	decode_getfattr(&xdr, res->f_attr, res->server,
+	decode_getfattr(xdr, res->f_attr, res->server,
 			!RPC_IS_ASYNC(rqstp->rq_task));
 out:
 	return status;
@@ -5482,26 +5492,26 @@ out:
 /*
  * Decode SETATTR response
  */
-static int nfs4_xdr_dec_setattr(struct rpc_rqst *rqstp, __be32 *p, struct nfs_setattrres *res)
+static int nfs4_xdr_dec_setattr(struct rpc_rqst *rqstp,
+				struct xdr_stream *xdr,
+				struct nfs_setattrres *res)
 {
-	struct xdr_stream xdr;
 	struct compound_hdr hdr;
 	int status;
 
-	xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
-	status = decode_compound_hdr(&xdr, &hdr);
+	status = decode_compound_hdr(xdr, &hdr);
 	if (status)
 		goto out;
-	status = decode_sequence(&xdr, &res->seq_res, rqstp);
+	status = decode_sequence(xdr, &res->seq_res, rqstp);
 	if (status)
 		goto out;
-	status = decode_putfh(&xdr);
+	status = decode_putfh(xdr);
 	if (status)
 		goto out;
-	status = decode_setattr(&xdr);
+	status = decode_setattr(xdr);
 	if (status)
 		goto out;
-	decode_getfattr(&xdr, res->fattr, res->server,
+	decode_getfattr(xdr, res->fattr, res->server,
 			!RPC_IS_ASYNC(rqstp->rq_task));
 out:
 	return status;
@@ -5510,23 +5520,22 @@ out:
 /*
  * Decode LOCK response
  */
-static int nfs4_xdr_dec_lock(struct rpc_rqst *rqstp, __be32 *p, struct nfs_lock_res *res)
+static int nfs4_xdr_dec_lock(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
+			     struct nfs_lock_res *res)
 {
-	struct xdr_stream xdr;
 	struct compound_hdr hdr;
 	int status;
 
-	xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
-	status = decode_compound_hdr(&xdr, &hdr);
+	status = decode_compound_hdr(xdr, &hdr);
 	if (status)
 		goto out;
-	status = decode_sequence(&xdr, &res->seq_res, rqstp);
+	status = decode_sequence(xdr, &res->seq_res, rqstp);
 	if (status)
 		goto out;
-	status = decode_putfh(&xdr);
+	status = decode_putfh(xdr);
 	if (status)
 		goto out;
-	status = decode_lock(&xdr, res);
+	status = decode_lock(xdr, res);
 out:
 	return status;
 }
@@ -5534,23 +5543,22 @@ out:
 /*
  * Decode LOCKT response
  */
-static int nfs4_xdr_dec_lockt(struct rpc_rqst *rqstp, __be32 *p, struct nfs_lockt_res *res)
+static int nfs4_xdr_dec_lockt(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
+			      struct nfs_lockt_res *res)
 {
-	struct xdr_stream xdr;
 	struct compound_hdr hdr;
 	int status;
 
-	xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
-	status = decode_compound_hdr(&xdr, &hdr);
+	status = decode_compound_hdr(xdr, &hdr);
 	if (status)
 		goto out;
-	status = decode_sequence(&xdr, &res->seq_res, rqstp);
+	status = decode_sequence(xdr, &res->seq_res, rqstp);
 	if (status)
 		goto out;
-	status = decode_putfh(&xdr);
+	status = decode_putfh(xdr);
 	if (status)
 		goto out;
-	status = decode_lockt(&xdr, res);
+	status = decode_lockt(xdr, res);
 out:
 	return status;
 }
@@ -5558,61 +5566,58 @@ out:
 /*
  * Decode LOCKU response
  */
-static int nfs4_xdr_dec_locku(struct rpc_rqst *rqstp, __be32 *p, struct nfs_locku_res *res)
+static int nfs4_xdr_dec_locku(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
+			      struct nfs_locku_res *res)
 {
-	struct xdr_stream xdr;
 	struct compound_hdr hdr;
 	int status;
 
-	xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
-	status = decode_compound_hdr(&xdr, &hdr);
+	status = decode_compound_hdr(xdr, &hdr);
 	if (status)
 		goto out;
-	status = decode_sequence(&xdr, &res->seq_res, rqstp);
+	status = decode_sequence(xdr, &res->seq_res, rqstp);
 	if (status)
 		goto out;
-	status = decode_putfh(&xdr);
+	status = decode_putfh(xdr);
 	if (status)
 		goto out;
-	status = decode_locku(&xdr, res);
+	status = decode_locku(xdr, res);
 out:
 	return status;
 }
 
-static int nfs4_xdr_dec_release_lockowner(struct rpc_rqst *rqstp, __be32 *p, void *dummy)
+static int nfs4_xdr_dec_release_lockowner(struct rpc_rqst *rqstp,
+					  struct xdr_stream *xdr, void *dummy)
 {
-	struct xdr_stream xdr;
 	struct compound_hdr hdr;
 	int status;
 
-	xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
-	status = decode_compound_hdr(&xdr, &hdr);
+	status = decode_compound_hdr(xdr, &hdr);
 	if (!status)
-		status = decode_release_lockowner(&xdr);
+		status = decode_release_lockowner(xdr);
 	return status;
 }
 
 /*
  * Decode READLINK response
  */
-static int nfs4_xdr_dec_readlink(struct rpc_rqst *rqstp, __be32 *p,
+static int nfs4_xdr_dec_readlink(struct rpc_rqst *rqstp,
+				 struct xdr_stream *xdr,
 				 struct nfs4_readlink_res *res)
 {
-	struct xdr_stream xdr;
 	struct compound_hdr hdr;
 	int status;
 
-	xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
-	status = decode_compound_hdr(&xdr, &hdr);
+	status = decode_compound_hdr(xdr, &hdr);
 	if (status)
 		goto out;
-	status = decode_sequence(&xdr, &res->seq_res, rqstp);
+	status = decode_sequence(xdr, &res->seq_res, rqstp);
 	if (status)
 		goto out;
-	status = decode_putfh(&xdr);
+	status = decode_putfh(xdr);
 	if (status)
 		goto out;
-	status = decode_readlink(&xdr, rqstp);
+	status = decode_readlink(xdr, rqstp);
 out:
 	return status;
 }
@@ -5620,23 +5625,22 @@ out:
 /*
  * Decode READDIR response
  */
-static int nfs4_xdr_dec_readdir(struct rpc_rqst *rqstp, __be32 *p, struct nfs4_readdir_res *res)
+static int nfs4_xdr_dec_readdir(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
+				struct nfs4_readdir_res *res)
 {
-	struct xdr_stream xdr;
 	struct compound_hdr hdr;
 	int status;
 
-	xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
-	status = decode_compound_hdr(&xdr, &hdr);
+	status = decode_compound_hdr(xdr, &hdr);
 	if (status)
 		goto out;
-	status = decode_sequence(&xdr, &res->seq_res, rqstp);
+	status = decode_sequence(xdr, &res->seq_res, rqstp);
 	if (status)
 		goto out;
-	status = decode_putfh(&xdr);
+	status = decode_putfh(xdr);
 	if (status)
 		goto out;
-	status = decode_readdir(&xdr, rqstp, res);
+	status = decode_readdir(xdr, rqstp, res);
 out:
 	return status;
 }
@@ -5644,23 +5648,22 @@ out:
 /*
  * Decode Read response
  */
-static int nfs4_xdr_dec_read(struct rpc_rqst *rqstp, __be32 *p, struct nfs_readres *res)
+static int nfs4_xdr_dec_read(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
+			     struct nfs_readres *res)
 {
-	struct xdr_stream xdr;
 	struct compound_hdr hdr;
 	int status;
 
-	xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
-	status = decode_compound_hdr(&xdr, &hdr);
+	status = decode_compound_hdr(xdr, &hdr);
 	if (status)
 		goto out;
-	status = decode_sequence(&xdr, &res->seq_res, rqstp);
+	status = decode_sequence(xdr, &res->seq_res, rqstp);
 	if (status)
 		goto out;
-	status = decode_putfh(&xdr);
+	status = decode_putfh(xdr);
 	if (status)
 		goto out;
-	status = decode_read(&xdr, rqstp, res);
+	status = decode_read(xdr, rqstp, res);
 	if (!status)
 		status = res->count;
 out:
@@ -5670,26 +5673,25 @@ out:
 /*
  * Decode WRITE response
  */
-static int nfs4_xdr_dec_write(struct rpc_rqst *rqstp, __be32 *p, struct nfs_writeres *res)
+static int nfs4_xdr_dec_write(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
+			      struct nfs_writeres *res)
 {
-	struct xdr_stream xdr;
 	struct compound_hdr hdr;
 	int status;
 
-	xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
-	status = decode_compound_hdr(&xdr, &hdr);
+	status = decode_compound_hdr(xdr, &hdr);
 	if (status)
 		goto out;
-	status = decode_sequence(&xdr, &res->seq_res, rqstp);
+	status = decode_sequence(xdr, &res->seq_res, rqstp);
 	if (status)
 		goto out;
-	status = decode_putfh(&xdr);
+	status = decode_putfh(xdr);
 	if (status)
 		goto out;
-	status = decode_write(&xdr, res);
+	status = decode_write(xdr, res);
 	if (status)
 		goto out;
-	decode_getfattr(&xdr, res->fattr, res->server,
+	decode_getfattr(xdr, res->fattr, res->server,
 			!RPC_IS_ASYNC(rqstp->rq_task));
 	if (!status)
 		status = res->count;
@@ -5700,26 +5702,25 @@ out:
 /*
  * Decode COMMIT response
  */
-static int nfs4_xdr_dec_commit(struct rpc_rqst *rqstp, __be32 *p, struct nfs_writeres *res)
+static int nfs4_xdr_dec_commit(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
+			       struct nfs_writeres *res)
 {
-	struct xdr_stream xdr;
 	struct compound_hdr hdr;
 	int status;
 
-	xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
-	status = decode_compound_hdr(&xdr, &hdr);
+	status = decode_compound_hdr(xdr, &hdr);
 	if (status)
 		goto out;
-	status = decode_sequence(&xdr, &res->seq_res, rqstp);
+	status = decode_sequence(xdr, &res->seq_res, rqstp);
 	if (status)
 		goto out;
-	status = decode_putfh(&xdr);
+	status = decode_putfh(xdr);
 	if (status)
 		goto out;
-	status = decode_commit(&xdr, res);
+	status = decode_commit(xdr, res);
 	if (status)
 		goto out;
-	decode_getfattr(&xdr, res->fattr, res->server,
+	decode_getfattr(xdr, res->fattr, res->server,
 			!RPC_IS_ASYNC(rqstp->rq_task));
 out:
 	return status;
@@ -5728,85 +5729,80 @@ out:
 /*
  * Decode FSINFO response
  */
-static int nfs4_xdr_dec_fsinfo(struct rpc_rqst *req, __be32 *p,
+static int nfs4_xdr_dec_fsinfo(struct rpc_rqst *req, struct xdr_stream *xdr,
 			       struct nfs4_fsinfo_res *res)
 {
-	struct xdr_stream xdr;
 	struct compound_hdr hdr;
 	int status;
 
-	xdr_init_decode(&xdr, &req->rq_rcv_buf, p);
-	status = decode_compound_hdr(&xdr, &hdr);
+	status = decode_compound_hdr(xdr, &hdr);
 	if (!status)
-		status = decode_sequence(&xdr, &res->seq_res, req);
+		status = decode_sequence(xdr, &res->seq_res, req);
 	if (!status)
-		status = decode_putfh(&xdr);
+		status = decode_putfh(xdr);
 	if (!status)
-		status = decode_fsinfo(&xdr, res->fsinfo);
+		status = decode_fsinfo(xdr, res->fsinfo);
 	return status;
 }
 
 /*
  * Decode PATHCONF response
  */
-static int nfs4_xdr_dec_pathconf(struct rpc_rqst *req, __be32 *p,
+static int nfs4_xdr_dec_pathconf(struct rpc_rqst *req, struct xdr_stream *xdr,
 				 struct nfs4_pathconf_res *res)
 {
-	struct xdr_stream xdr;
 	struct compound_hdr hdr;
 	int status;
 
-	xdr_init_decode(&xdr, &req->rq_rcv_buf, p);
-	status = decode_compound_hdr(&xdr, &hdr);
+	status = decode_compound_hdr(xdr, &hdr);
 	if (!status)
-		status = decode_sequence(&xdr, &res->seq_res, req);
+		status = decode_sequence(xdr, &res->seq_res, req);
 	if (!status)
-		status = decode_putfh(&xdr);
+		status = decode_putfh(xdr);
 	if (!status)
-		status = decode_pathconf(&xdr, res->pathconf);
+		status = decode_pathconf(xdr, res->pathconf);
 	return status;
 }
 
 /*
  * Decode STATFS response
  */
-static int nfs4_xdr_dec_statfs(struct rpc_rqst *req, __be32 *p,
+static int nfs4_xdr_dec_statfs(struct rpc_rqst *req, struct xdr_stream *xdr,
 			       struct nfs4_statfs_res *res)
 {
-	struct xdr_stream xdr;
 	struct compound_hdr hdr;
 	int status;
 
-	xdr_init_decode(&xdr, &req->rq_rcv_buf, p);
-	status = decode_compound_hdr(&xdr, &hdr);
+	status = decode_compound_hdr(xdr, &hdr);
 	if (!status)
-		status = decode_sequence(&xdr, &res->seq_res, req);
+		status = decode_sequence(xdr, &res->seq_res, req);
 	if (!status)
-		status = decode_putfh(&xdr);
+		status = decode_putfh(xdr);
 	if (!status)
-		status = decode_statfs(&xdr, res->fsstat);
+		status = decode_statfs(xdr, res->fsstat);
 	return status;
 }
 
 /*
  * Decode GETATTR_BITMAP response
  */
-static int nfs4_xdr_dec_server_caps(struct rpc_rqst *req, __be32 *p, struct nfs4_server_caps_res *res)
+static int nfs4_xdr_dec_server_caps(struct rpc_rqst *req,
+				    struct xdr_stream *xdr,
+				    struct nfs4_server_caps_res *res)
 {
-	struct xdr_stream xdr;
 	struct compound_hdr hdr;
 	int status;
 
-	xdr_init_decode(&xdr, &req->rq_rcv_buf, p);
-	status = decode_compound_hdr(&xdr, &hdr);
+	status = decode_compound_hdr(xdr, &hdr);
 	if (status)
 		goto out;
-	status = decode_sequence(&xdr, &res->seq_res, req);
+	status = decode_sequence(xdr, &res->seq_res, req);
 	if (status)
 		goto out;
-	if ((status = decode_putfh(&xdr)) != 0)
+	status = decode_putfh(xdr);
+	if (status)
 		goto out;
-	status = decode_server_caps(&xdr, res);
+	status = decode_server_caps(xdr, res);
 out:
 	return status;
 }
@@ -5814,79 +5810,77 @@ out:
 /*
  * Decode RENEW response
  */
-static int nfs4_xdr_dec_renew(struct rpc_rqst *rqstp, __be32 *p, void *dummy)
+static int nfs4_xdr_dec_renew(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
+			      void *__unused)
 {
-	struct xdr_stream xdr;
 	struct compound_hdr hdr;
 	int status;
 
-	xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
-	status = decode_compound_hdr(&xdr, &hdr);
+	status = decode_compound_hdr(xdr, &hdr);
 	if (!status)
-		status = decode_renew(&xdr);
+		status = decode_renew(xdr);
 	return status;
 }
 
 /*
  * Decode SETCLIENTID response
  */
-static int nfs4_xdr_dec_setclientid(struct rpc_rqst *req, __be32 *p,
-		struct nfs4_setclientid_res *res)
+static int nfs4_xdr_dec_setclientid(struct rpc_rqst *req,
+				    struct xdr_stream *xdr,
+				    struct nfs4_setclientid_res *res)
 {
-	struct xdr_stream xdr;
 	struct compound_hdr hdr;
 	int status;
 
-	xdr_init_decode(&xdr, &req->rq_rcv_buf, p);
-	status = decode_compound_hdr(&xdr, &hdr);
+	status = decode_compound_hdr(xdr, &hdr);
 	if (!status)
-		status = decode_setclientid(&xdr, res);
+		status = decode_setclientid(xdr, res);
 	return status;
 }
 
 /*
  * Decode SETCLIENTID_CONFIRM response
  */
-static int nfs4_xdr_dec_setclientid_confirm(struct rpc_rqst *req, __be32 *p, struct nfs_fsinfo *fsinfo)
+static int nfs4_xdr_dec_setclientid_confirm(struct rpc_rqst *req,
+					    struct xdr_stream *xdr,
+					    struct nfs_fsinfo *fsinfo)
 {
-	struct xdr_stream xdr;
 	struct compound_hdr hdr;
 	int status;
 
-	xdr_init_decode(&xdr, &req->rq_rcv_buf, p);
-	status = decode_compound_hdr(&xdr, &hdr);
+	status = decode_compound_hdr(xdr, &hdr);
 	if (!status)
-		status = decode_setclientid_confirm(&xdr);
+		status = decode_setclientid_confirm(xdr);
 	if (!status)
-		status = decode_putrootfh(&xdr);
+		status = decode_putrootfh(xdr);
 	if (!status)
-		status = decode_fsinfo(&xdr, fsinfo);
+		status = decode_fsinfo(xdr, fsinfo);
 	return status;
 }
 
 /*
  * Decode DELEGRETURN response
  */
-static int nfs4_xdr_dec_delegreturn(struct rpc_rqst *rqstp, __be32 *p, struct nfs4_delegreturnres *res)
+static int nfs4_xdr_dec_delegreturn(struct rpc_rqst *rqstp,
+				    struct xdr_stream *xdr,
+				    struct nfs4_delegreturnres *res)
 {
-	struct xdr_stream xdr;
 	struct compound_hdr hdr;
 	int status;
 
-	xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
-	status = decode_compound_hdr(&xdr, &hdr);
+	status = decode_compound_hdr(xdr, &hdr);
 	if (status)
 		goto out;
-	status = decode_sequence(&xdr, &res->seq_res, rqstp);
+	status = decode_sequence(xdr, &res->seq_res, rqstp);
 	if (status)
 		goto out;
-	status = decode_putfh(&xdr);
+	status = decode_putfh(xdr);
 	if (status != 0)
 		goto out;
-	status = decode_delegreturn(&xdr);
+	status = decode_delegreturn(xdr);
 	if (status != 0)
 		goto out;
-	decode_getfattr(&xdr, res->fattr, res->server,
+	decode_getfattr(xdr, res->fattr, res->server,
 			!RPC_IS_ASYNC(rqstp->rq_task));
 out:
 	return status;
@@ -5895,26 +5889,27 @@ out:
 /*
  * Decode FS_LOCATIONS response
  */
-static int nfs4_xdr_dec_fs_locations(struct rpc_rqst *req, __be32 *p,
+static int nfs4_xdr_dec_fs_locations(struct rpc_rqst *req,
+				     struct xdr_stream *xdr,
 				     struct nfs4_fs_locations_res *res)
 {
-	struct xdr_stream xdr;
 	struct compound_hdr hdr;
 	int status;
 
-	xdr_init_decode(&xdr, &req->rq_rcv_buf, p);
-	status = decode_compound_hdr(&xdr, &hdr);
+	status = decode_compound_hdr(xdr, &hdr);
 	if (status)
 		goto out;
-	status = decode_sequence(&xdr, &res->seq_res, req);
+	status = decode_sequence(xdr, &res->seq_res, req);
 	if (status)
 		goto out;
-	if ((status = decode_putfh(&xdr)) != 0)
+	status = decode_putfh(xdr);
+	if (status)
 		goto out;
-	if ((status = decode_lookup(&xdr)) != 0)
+	status = decode_lookup(xdr);
+	if (status)
 		goto out;
-	xdr_enter_page(&xdr, PAGE_SIZE);
-	status = decode_getfattr(&xdr, &res->fs_locations->fattr,
+	xdr_enter_page(xdr, PAGE_SIZE);
+	status = decode_getfattr(xdr, &res->fs_locations->fattr,
 				 res->fs_locations->server,
 				 !RPC_IS_ASYNC(req->rq_task));
 out:
@@ -5925,129 +5920,122 @@ out:
 /*
  * Decode EXCHANGE_ID response
  */
-static int nfs4_xdr_dec_exchange_id(struct rpc_rqst *rqstp, uint32_t *p,
+static int nfs4_xdr_dec_exchange_id(struct rpc_rqst *rqstp,
+				    struct xdr_stream *xdr,
 				    void *res)
 {
-	struct xdr_stream xdr;
 	struct compound_hdr hdr;
 	int status;
 
-	xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
-	status = decode_compound_hdr(&xdr, &hdr);
+	status = decode_compound_hdr(xdr, &hdr);
 	if (!status)
-		status = decode_exchange_id(&xdr, res);
+		status = decode_exchange_id(xdr, res);
 	return status;
 }
 
 /*
  * Decode CREATE_SESSION response
  */
-static int nfs4_xdr_dec_create_session(struct rpc_rqst *rqstp, uint32_t *p,
+static int nfs4_xdr_dec_create_session(struct rpc_rqst *rqstp,
+				       struct xdr_stream *xdr,
 				       struct nfs41_create_session_res *res)
 {
-	struct xdr_stream xdr;
 	struct compound_hdr hdr;
 	int status;
 
-	xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
-	status = decode_compound_hdr(&xdr, &hdr);
+	status = decode_compound_hdr(xdr, &hdr);
 	if (!status)
-		status = decode_create_session(&xdr, res);
+		status = decode_create_session(xdr, res);
 	return status;
 }
 
 /*
  * Decode DESTROY_SESSION response
  */
-static int nfs4_xdr_dec_destroy_session(struct rpc_rqst *rqstp, uint32_t *p,
-					void *dummy)
+static int nfs4_xdr_dec_destroy_session(struct rpc_rqst *rqstp,
+					struct xdr_stream *xdr,
+					void *res)
 {
-	struct xdr_stream xdr;
 	struct compound_hdr hdr;
 	int status;
 
-	xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
-	status = decode_compound_hdr(&xdr, &hdr);
+	status = decode_compound_hdr(xdr, &hdr);
 	if (!status)
-		status = decode_destroy_session(&xdr, dummy);
+		status = decode_destroy_session(xdr, res);
 	return status;
 }
 
 /*
  * Decode SEQUENCE response
  */
-static int nfs4_xdr_dec_sequence(struct rpc_rqst *rqstp, uint32_t *p,
+static int nfs4_xdr_dec_sequence(struct rpc_rqst *rqstp,
+				 struct xdr_stream *xdr,
 				 struct nfs4_sequence_res *res)
 {
-	struct xdr_stream xdr;
 	struct compound_hdr hdr;
 	int status;
 
-	xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
-	status = decode_compound_hdr(&xdr, &hdr);
+	status = decode_compound_hdr(xdr, &hdr);
 	if (!status)
-		status = decode_sequence(&xdr, res, rqstp);
+		status = decode_sequence(xdr, res, rqstp);
 	return status;
 }
 
 /*
  * Decode GET_LEASE_TIME response
  */
-static int nfs4_xdr_dec_get_lease_time(struct rpc_rqst *rqstp, uint32_t *p,
+static int nfs4_xdr_dec_get_lease_time(struct rpc_rqst *rqstp,
+				       struct xdr_stream *xdr,
 				       struct nfs4_get_lease_time_res *res)
 {
-	struct xdr_stream xdr;
 	struct compound_hdr hdr;
 	int status;
 
-	xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
-	status = decode_compound_hdr(&xdr, &hdr);
+	status = decode_compound_hdr(xdr, &hdr);
 	if (!status)
-		status = decode_sequence(&xdr, &res->lr_seq_res, rqstp);
+		status = decode_sequence(xdr, &res->lr_seq_res, rqstp);
 	if (!status)
-		status = decode_putrootfh(&xdr);
+		status = decode_putrootfh(xdr);
 	if (!status)
-		status = decode_fsinfo(&xdr, res->lr_fsinfo);
+		status = decode_fsinfo(xdr, res->lr_fsinfo);
 	return status;
 }
 
 /*
  * Decode RECLAIM_COMPLETE response
  */
-static int nfs4_xdr_dec_reclaim_complete(struct rpc_rqst *rqstp, uint32_t *p,
+static int nfs4_xdr_dec_reclaim_complete(struct rpc_rqst *rqstp,
+					 struct xdr_stream *xdr,
 					 struct nfs41_reclaim_complete_res *res)
 {
-	struct xdr_stream xdr;
 	struct compound_hdr hdr;
 	int status;
 
-	xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
-	status = decode_compound_hdr(&xdr, &hdr);
+	status = decode_compound_hdr(xdr, &hdr);
 	if (!status)
-		status = decode_sequence(&xdr, &res->seq_res, rqstp);
+		status = decode_sequence(xdr, &res->seq_res, rqstp);
 	if (!status)
-		status = decode_reclaim_complete(&xdr, (void *)NULL);
+		status = decode_reclaim_complete(xdr, (void *)NULL);
 	return status;
 }
 
 /*
  * Decode GETDEVINFO response
  */
-static int nfs4_xdr_dec_getdeviceinfo(struct rpc_rqst *rqstp, uint32_t *p,
+static int nfs4_xdr_dec_getdeviceinfo(struct rpc_rqst *rqstp,
+				      struct xdr_stream *xdr,
 				      struct nfs4_getdeviceinfo_res *res)
 {
-	struct xdr_stream xdr;
 	struct compound_hdr hdr;
 	int status;
 
-	xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
-	status = decode_compound_hdr(&xdr, &hdr);
+	status = decode_compound_hdr(xdr, &hdr);
 	if (status != 0)
 		goto out;
-	status = decode_sequence(&xdr, &res->seq_res, rqstp);
+	status = decode_sequence(xdr, &res->seq_res, rqstp);
 	if (status != 0)
 		goto out;
-	status = decode_getdeviceinfo(&xdr, res->pdev);
+	status = decode_getdeviceinfo(xdr, res->pdev);
 out:
 	return status;
 }
@@ -6055,24 +6043,23 @@ out:
 /*
  * Decode LAYOUTGET response
  */
-static int nfs4_xdr_dec_layoutget(struct rpc_rqst *rqstp, uint32_t *p,
+static int nfs4_xdr_dec_layoutget(struct rpc_rqst *rqstp,
+				  struct xdr_stream *xdr,
 				  struct nfs4_layoutget_res *res)
 {
-	struct xdr_stream xdr;
 	struct compound_hdr hdr;
 	int status;
 
-	xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
-	status = decode_compound_hdr(&xdr, &hdr);
+	status = decode_compound_hdr(xdr, &hdr);
 	if (status)
 		goto out;
-	status = decode_sequence(&xdr, &res->seq_res, rqstp);
+	status = decode_sequence(xdr, &res->seq_res, rqstp);
 	if (status)
 		goto out;
-	status = decode_putfh(&xdr);
+	status = decode_putfh(xdr);
 	if (status)
 		goto out;
-	status = decode_layoutget(&xdr, rqstp, res);
+	status = decode_layoutget(xdr, rqstp, res);
 out:
 	return status;
 }
@@ -6236,7 +6223,7 @@ nfs4_stat_to_errno(int stat)
 [NFSPROC4_CLNT_##proc] = {					\
 	.p_proc   = NFSPROC4_COMPOUND,				\
 	.p_encode = (kxdreproc_t)nfs4_xdr_##argtype,		\
-	.p_decode = (kxdrproc_t)nfs4_xdr_##restype,		\
+	.p_decode = (kxdrdproc_t)nfs4_xdr_##restype,		\
 	.p_arglen = NFS4_##argtype##_sz,			\
 	.p_replen = NFS4_##restype##_sz,			\
 	.p_statidx = NFSPROC4_CLNT_##proc,			\
diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index c363efda8ecf..21a63da305ff 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -533,7 +533,8 @@ static void nfs4_xdr_enc_cb_recall(struct rpc_rqst *req, struct xdr_stream *xdr,
  * Protocol".
  */
 
-static int nfs4_xdr_dec_cb_null(struct rpc_rqst *req, __be32 *p, void *__unused)
+static int nfs4_xdr_dec_cb_null(struct rpc_rqst *req, struct xdr_stream *xdr,
+				void *__unused)
 {
 	return 0;
 }
@@ -541,26 +542,25 @@ static int nfs4_xdr_dec_cb_null(struct rpc_rqst *req, __be32 *p, void *__unused)
 /*
  * 20.2. Operation 4: CB_RECALL - Recall a Delegation
  */
-static int nfs4_xdr_dec_cb_recall(struct rpc_rqst *rqstp, __be32 *p,
+static int nfs4_xdr_dec_cb_recall(struct rpc_rqst *rqstp,
+				  struct xdr_stream *xdr,
 				  struct nfsd4_callback *cb)
 {
-	struct xdr_stream xdr;
 	struct nfs4_cb_compound_hdr hdr;
 	enum nfsstat4 nfserr;
 	int status;
 
-	xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
-	status = decode_cb_compound4res(&xdr, &hdr);
+	status = decode_cb_compound4res(xdr, &hdr);
 	if (unlikely(status))
 		goto out;
 
 	if (cb != NULL) {
-		status = decode_cb_sequence4res(&xdr, cb);
+		status = decode_cb_sequence4res(xdr, cb);
 		if (unlikely(status))
 			goto out;
 	}
 
-	status = decode_cb_op_status(&xdr, OP_CB_RECALL, &nfserr);
+	status = decode_cb_op_status(xdr, OP_CB_RECALL, &nfserr);
 	if (unlikely(status))
 		goto out;
 	if (unlikely(nfserr != NFS4_OK))
@@ -578,7 +578,7 @@ out_default:
 [NFSPROC4_CLNT_##proc] = {						\
 	.p_proc    = NFSPROC4_CB_##call,				\
 	.p_encode  = (kxdreproc_t)nfs4_xdr_enc_##argtype,		\
-	.p_decode  = (kxdrproc_t)nfs4_xdr_dec_##restype,		\
+	.p_decode  = (kxdrdproc_t)nfs4_xdr_dec_##restype,		\
 	.p_arglen  = NFS4_enc_##argtype##_sz,				\
 	.p_replen  = NFS4_dec_##restype##_sz,				\
 	.p_statidx = NFSPROC4_CB_##call,				\
diff --git a/include/linux/sunrpc/auth.h b/include/linux/sunrpc/auth.h
index d88cffbaa6df..8521067ed4f7 100644
--- a/include/linux/sunrpc/auth.h
+++ b/include/linux/sunrpc/auth.h
@@ -112,7 +112,7 @@ struct rpc_credops {
 	__be32 *		(*crvalidate)(struct rpc_task *, __be32 *);
 	int			(*crwrap_req)(struct rpc_task *, kxdreproc_t,
 						void *, __be32 *, void *);
-	int			(*crunwrap_resp)(struct rpc_task *, kxdrproc_t,
+	int			(*crunwrap_resp)(struct rpc_task *, kxdrdproc_t,
 						void *, __be32 *, void *);
 };
 
@@ -140,7 +140,7 @@ void			put_rpccred(struct rpc_cred *);
 __be32 *		rpcauth_marshcred(struct rpc_task *, __be32 *);
 __be32 *		rpcauth_checkverf(struct rpc_task *, __be32 *);
 int			rpcauth_wrap_req(struct rpc_task *task, kxdreproc_t encode, void *rqstp, __be32 *data, void *obj);
-int			rpcauth_unwrap_resp(struct rpc_task *task, kxdrproc_t decode, void *rqstp, __be32 *data, void *obj);
+int			rpcauth_unwrap_resp(struct rpc_task *task, kxdrdproc_t decode, void *rqstp, __be32 *data, void *obj);
 int			rpcauth_refreshcred(struct rpc_task *);
 void			rpcauth_invalcred(struct rpc_task *);
 int			rpcauth_uptodatecred(struct rpc_task *);
diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h
index 7b19c4e1ce53..ef9476a36ff7 100644
--- a/include/linux/sunrpc/clnt.h
+++ b/include/linux/sunrpc/clnt.h
@@ -90,7 +90,7 @@ struct rpc_version {
 struct rpc_procinfo {
 	u32			p_proc;		/* RPC procedure number */
 	kxdreproc_t		p_encode;	/* XDR encode function */
-	kxdrproc_t		p_decode;	/* XDR decode function */
+	kxdrdproc_t		p_decode;	/* XDR decode function */
 	unsigned int		p_arglen;	/* argument hdr length (u32) */
 	unsigned int		p_replen;	/* reply hdr length (u32) */
 	unsigned int		p_count;	/* call count */
diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h
index a21cf5378c1d..9a21e8102c42 100644
--- a/include/linux/sunrpc/xdr.h
+++ b/include/linux/sunrpc/xdr.h
@@ -204,9 +204,10 @@ struct xdr_stream {
 };
 
 /*
- * This is the xdr_stream style generic XDR function.
+ * These are the xdr_stream style generic XDR encode and decode functions.
  */
 typedef void	(*kxdreproc_t)(void *rqstp, struct xdr_stream *xdr, void *obj);
+typedef int	(*kxdrdproc_t)(void *rqstp, struct xdr_stream *xdr, void *obj);
 
 extern void xdr_init_encode(struct xdr_stream *xdr, struct xdr_buf *buf, __be32 *p);
 extern __be32 *xdr_reserve_space(struct xdr_stream *xdr, size_t nbytes);
diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c
index 651c9da703cb..67e31276682a 100644
--- a/net/sunrpc/auth.c
+++ b/net/sunrpc/auth.c
@@ -587,8 +587,18 @@ rpcauth_wrap_req(struct rpc_task *task, kxdreproc_t encode, void *rqstp,
 	return 0;
 }
 
+static int
+rpcauth_unwrap_req_decode(kxdrdproc_t decode, struct rpc_rqst *rqstp,
+			  __be32 *data, void *obj)
+{
+	struct xdr_stream xdr;
+
+	xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, data);
+	return decode(rqstp, &xdr, obj);
+}
+
 int
-rpcauth_unwrap_resp(struct rpc_task *task, kxdrproc_t decode, void *rqstp,
+rpcauth_unwrap_resp(struct rpc_task *task, kxdrdproc_t decode, void *rqstp,
 		__be32 *data, void *obj)
 {
 	struct rpc_cred *cred = task->tk_rqstp->rq_cred;
@@ -599,7 +609,7 @@ rpcauth_unwrap_resp(struct rpc_task *task, kxdrproc_t decode, void *rqstp,
 		return cred->cr_ops->crunwrap_resp(task, decode, rqstp,
 						   data, obj);
 	/* By default, we decode the arguments normally. */
-	return decode(rqstp, data, obj);
+	return rpcauth_unwrap_req_decode(decode, rqstp, data, obj);
 }
 
 int
diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c
index 42b46f9a670a..45dbf1521b9a 100644
--- a/net/sunrpc/auth_gss/auth_gss.c
+++ b/net/sunrpc/auth_gss/auth_gss.c
@@ -1503,10 +1503,19 @@ gss_unwrap_resp_priv(struct rpc_cred *cred, struct gss_cl_ctx *ctx,
 	return 0;
 }
 
+static int
+gss_unwrap_req_decode(kxdrdproc_t decode, struct rpc_rqst *rqstp,
+		      __be32 *p, void *obj)
+{
+	struct xdr_stream xdr;
+
+	xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
+	return decode(rqstp, &xdr, obj);
+}
 
 static int
 gss_unwrap_resp(struct rpc_task *task,
-		kxdrproc_t decode, void *rqstp, __be32 *p, void *obj)
+		kxdrdproc_t decode, void *rqstp, __be32 *p, void *obj)
 {
 	struct rpc_cred *cred = task->tk_rqstp->rq_cred;
 	struct gss_cred *gss_cred = container_of(cred, struct gss_cred,
@@ -1537,7 +1546,7 @@ gss_unwrap_resp(struct rpc_task *task,
 	cred->cr_auth->au_rslack = cred->cr_auth->au_verfsize + (p - savedp)
 						+ (savedlen - head->iov_len);
 out_decode:
-	status = decode(rqstp, p, obj);
+	status = gss_unwrap_req_decode(decode, rqstp, p, obj);
 out:
 	gss_put_ctx(ctx);
 	dprintk("RPC: %5u gss_unwrap_resp returning %d\n", task->tk_pid,
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index d446a32be667..4e8210dcda72 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -1535,7 +1535,7 @@ call_decode(struct rpc_task *task)
 {
 	struct rpc_clnt	*clnt = task->tk_client;
 	struct rpc_rqst	*req = task->tk_rqstp;
-	kxdrproc_t	decode = task->tk_msg.rpc_proc->p_decode;
+	kxdrdproc_t	decode = task->tk_msg.rpc_proc->p_decode;
 	__be32		*p;
 
 	dprintk("RPC: %5u call_decode (status %d)\n",
@@ -1780,7 +1780,7 @@ static void rpcproc_encode_null(void *rqstp, struct xdr_stream *xdr, void *obj)
 {
 }
 
-static int rpcproc_decode_null(void *rqstp, __be32 *data, void *obj)
+static int rpcproc_decode_null(void *rqstp, struct xdr_stream *xdr, void *obj)
 {
 	return 0;
 }
diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c
index 63912a1a2983..c652e4cc9fe9 100644
--- a/net/sunrpc/rpcb_clnt.c
+++ b/net/sunrpc/rpcb_clnt.c
@@ -706,18 +706,16 @@ static void rpcb_enc_mapping(struct rpc_rqst *req, struct xdr_stream *xdr,
 	*p   = cpu_to_be32(rpcb->r_port);
 }
 
-static int rpcb_dec_getport(struct rpc_rqst *req, __be32 *p,
+static int rpcb_dec_getport(struct rpc_rqst *req, struct xdr_stream *xdr,
 			    struct rpcbind_args *rpcb)
 {
 	struct rpc_task *task = req->rq_task;
-	struct xdr_stream xdr;
 	unsigned long port;
-
-	xdr_init_decode(&xdr, &req->rq_rcv_buf, p);
+	__be32 *p;
 
 	rpcb->r_port = 0;
 
-	p = xdr_inline_decode(&xdr, 4);
+	p = xdr_inline_decode(xdr, 4);
 	if (unlikely(p == NULL))
 		return -EIO;
 
@@ -731,20 +729,18 @@ static int rpcb_dec_getport(struct rpc_rqst *req, __be32 *p,
 	return 0;
 }
 
-static int rpcb_dec_set(struct rpc_rqst *req, __be32 *p,
+static int rpcb_dec_set(struct rpc_rqst *req, struct xdr_stream *xdr,
 			unsigned int *boolp)
 {
 	struct rpc_task *task = req->rq_task;
-	struct xdr_stream xdr;
-
-	xdr_init_decode(&xdr, &req->rq_rcv_buf, p);
+	__be32 *p;
 
-	p = xdr_inline_decode(&xdr, 4);
+	p = xdr_inline_decode(xdr, 4);
 	if (unlikely(p == NULL))
 		return -EIO;
 
 	*boolp = 0;
-	if (*p)
+	if (*p != xdr_zero)
 		*boolp = 1;
 
 	dprintk("RPC: %5u RPCB_%s call %s\n",
@@ -785,20 +781,18 @@ static void rpcb_enc_getaddr(struct rpc_rqst *req, struct xdr_stream *xdr,
 	encode_rpcb_string(xdr, rpcb->r_owner, RPCB_MAXOWNERLEN);
 }
 
-static int rpcb_dec_getaddr(struct rpc_rqst *req, __be32 *p,
+static int rpcb_dec_getaddr(struct rpc_rqst *req, struct xdr_stream *xdr,
 			    struct rpcbind_args *rpcb)
 {
 	struct sockaddr_storage address;
 	struct sockaddr *sap = (struct sockaddr *)&address;
 	struct rpc_task *task = req->rq_task;
-	struct xdr_stream xdr;
+	__be32 *p;
 	u32 len;
 
 	rpcb->r_port = 0;
 
-	xdr_init_decode(&xdr, &req->rq_rcv_buf, p);
-
-	p = xdr_inline_decode(&xdr, 4);
+	p = xdr_inline_decode(xdr, 4);
 	if (unlikely(p == NULL))
 		goto out_fail;
 	len = be32_to_cpup(p);
@@ -816,7 +810,7 @@ static int rpcb_dec_getaddr(struct rpc_rqst *req, __be32 *p,
 	if (unlikely(len > RPCBIND_MAXUADDRLEN))
 		goto out_fail;
 
-	p = xdr_inline_decode(&xdr, len);
+	p = xdr_inline_decode(xdr, len);
 	if (unlikely(p == NULL))
 		goto out_fail;
 	dprintk("RPC: %5u RPCB_%s reply: %s\n", task->tk_pid,
@@ -843,7 +837,7 @@ static struct rpc_procinfo rpcb_procedures2[] = {
 	[RPCBPROC_SET] = {
 		.p_proc		= RPCBPROC_SET,
 		.p_encode	= (kxdreproc_t)rpcb_enc_mapping,
-		.p_decode	= (kxdrproc_t)rpcb_dec_set,
+		.p_decode	= (kxdrdproc_t)rpcb_dec_set,
 		.p_arglen	= RPCB_mappingargs_sz,
 		.p_replen	= RPCB_setres_sz,
 		.p_statidx	= RPCBPROC_SET,
@@ -853,7 +847,7 @@ static struct rpc_procinfo rpcb_procedures2[] = {
 	[RPCBPROC_UNSET] = {
 		.p_proc		= RPCBPROC_UNSET,
 		.p_encode	= (kxdreproc_t)rpcb_enc_mapping,
-		.p_decode	= (kxdrproc_t)rpcb_dec_set,
+		.p_decode	= (kxdrdproc_t)rpcb_dec_set,
 		.p_arglen	= RPCB_mappingargs_sz,
 		.p_replen	= RPCB_setres_sz,
 		.p_statidx	= RPCBPROC_UNSET,
@@ -863,7 +857,7 @@ static struct rpc_procinfo rpcb_procedures2[] = {
 	[RPCBPROC_GETPORT] = {
 		.p_proc		= RPCBPROC_GETPORT,
 		.p_encode	= (kxdreproc_t)rpcb_enc_mapping,
-		.p_decode	= (kxdrproc_t)rpcb_dec_getport,
+		.p_decode	= (kxdrdproc_t)rpcb_dec_getport,
 		.p_arglen	= RPCB_mappingargs_sz,
 		.p_replen	= RPCB_getportres_sz,
 		.p_statidx	= RPCBPROC_GETPORT,
@@ -876,7 +870,7 @@ static struct rpc_procinfo rpcb_procedures3[] = {
 	[RPCBPROC_SET] = {
 		.p_proc		= RPCBPROC_SET,
 		.p_encode	= (kxdreproc_t)rpcb_enc_getaddr,
-		.p_decode	= (kxdrproc_t)rpcb_dec_set,
+		.p_decode	= (kxdrdproc_t)rpcb_dec_set,
 		.p_arglen	= RPCB_getaddrargs_sz,
 		.p_replen	= RPCB_setres_sz,
 		.p_statidx	= RPCBPROC_SET,
@@ -886,7 +880,7 @@ static struct rpc_procinfo rpcb_procedures3[] = {
 	[RPCBPROC_UNSET] = {
 		.p_proc		= RPCBPROC_UNSET,
 		.p_encode	= (kxdreproc_t)rpcb_enc_getaddr,
-		.p_decode	= (kxdrproc_t)rpcb_dec_set,
+		.p_decode	= (kxdrdproc_t)rpcb_dec_set,
 		.p_arglen	= RPCB_getaddrargs_sz,
 		.p_replen	= RPCB_setres_sz,
 		.p_statidx	= RPCBPROC_UNSET,
@@ -896,7 +890,7 @@ static struct rpc_procinfo rpcb_procedures3[] = {
 	[RPCBPROC_GETADDR] = {
 		.p_proc		= RPCBPROC_GETADDR,
 		.p_encode	= (kxdreproc_t)rpcb_enc_getaddr,
-		.p_decode	= (kxdrproc_t)rpcb_dec_getaddr,
+		.p_decode	= (kxdrdproc_t)rpcb_dec_getaddr,
 		.p_arglen	= RPCB_getaddrargs_sz,
 		.p_replen	= RPCB_getaddrres_sz,
 		.p_statidx	= RPCBPROC_GETADDR,
@@ -909,7 +903,7 @@ static struct rpc_procinfo rpcb_procedures4[] = {
 	[RPCBPROC_SET] = {
 		.p_proc		= RPCBPROC_SET,
 		.p_encode	= (kxdreproc_t)rpcb_enc_getaddr,
-		.p_decode	= (kxdrproc_t)rpcb_dec_set,
+		.p_decode	= (kxdrdproc_t)rpcb_dec_set,
 		.p_arglen	= RPCB_getaddrargs_sz,
 		.p_replen	= RPCB_setres_sz,
 		.p_statidx	= RPCBPROC_SET,
@@ -919,7 +913,7 @@ static struct rpc_procinfo rpcb_procedures4[] = {
 	[RPCBPROC_UNSET] = {
 		.p_proc		= RPCBPROC_UNSET,
 		.p_encode	= (kxdreproc_t)rpcb_enc_getaddr,
-		.p_decode	= (kxdrproc_t)rpcb_dec_set,
+		.p_decode	= (kxdrdproc_t)rpcb_dec_set,
 		.p_arglen	= RPCB_getaddrargs_sz,
 		.p_replen	= RPCB_setres_sz,
 		.p_statidx	= RPCBPROC_UNSET,
@@ -929,7 +923,7 @@ static struct rpc_procinfo rpcb_procedures4[] = {
 	[RPCBPROC_GETADDR] = {
 		.p_proc		= RPCBPROC_GETADDR,
 		.p_encode	= (kxdreproc_t)rpcb_enc_getaddr,
-		.p_decode	= (kxdrproc_t)rpcb_dec_getaddr,
+		.p_decode	= (kxdrdproc_t)rpcb_dec_getaddr,
 		.p_arglen	= RPCB_getaddrargs_sz,
 		.p_replen	= RPCB_getaddrres_sz,
 		.p_statidx	= RPCBPROC_GETADDR,
-- 
cgit v1.2.3-71-gd317


From 7db836d4a427c3c64406b00b6d8d745d6335d72a Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Tue, 14 Dec 2010 15:05:42 +0000
Subject: lockd: Split nlm_release_call()

The nlm_release_call() function is invoked from both the server and
the client side.  We're about to introduce a distinct server- and
client-side nlm_release_host(), so nlm_release_call() must first be
split into a client-side and a server-side version.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/lockd/clntproc.c         | 12 ++++++------
 fs/lockd/svc4proc.c         |  4 ++--
 fs/lockd/svclock.c          |  4 ++--
 fs/lockd/svcproc.c          | 12 ++++++++++--
 include/linux/lockd/lockd.h |  3 ++-
 5 files changed, 22 insertions(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/fs/lockd/clntproc.c b/fs/lockd/clntproc.c
index 332c54cf75e0..fbc6617f76c4 100644
--- a/fs/lockd/clntproc.c
+++ b/fs/lockd/clntproc.c
@@ -211,7 +211,7 @@ struct nlm_rqst *nlm_alloc_call(struct nlm_host *host)
 	return NULL;
 }
 
-void nlm_release_call(struct nlm_rqst *call)
+void nlmclnt_release_call(struct nlm_rqst *call)
 {
 	if (!atomic_dec_and_test(&call->a_count))
 		return;
@@ -222,7 +222,7 @@ void nlm_release_call(struct nlm_rqst *call)
 
 static void nlmclnt_rpc_release(void *data)
 {
-	nlm_release_call(data);
+	nlmclnt_release_call(data);
 }
 
 static int nlm_wait_on_grace(wait_queue_head_t *queue)
@@ -436,7 +436,7 @@ nlmclnt_test(struct nlm_rqst *req, struct file_lock *fl)
 			status = nlm_stat_to_errno(req->a_res.status);
 	}
 out:
-	nlm_release_call(req);
+	nlmclnt_release_call(req);
 	return status;
 }
 
@@ -593,7 +593,7 @@ again:
 out_unblock:
 	nlmclnt_finish_block(block);
 out:
-	nlm_release_call(req);
+	nlmclnt_release_call(req);
 	return status;
 out_unlock:
 	/* Fatal error: ensure that we remove the lock altogether */
@@ -694,7 +694,7 @@ nlmclnt_unlock(struct nlm_rqst *req, struct file_lock *fl)
 	/* What to do now? I'm out of my depth... */
 	status = -ENOLCK;
 out:
-	nlm_release_call(req);
+	nlmclnt_release_call(req);
 	return status;
 }
 
@@ -755,7 +755,7 @@ static int nlmclnt_cancel(struct nlm_host *host, int block, struct file_lock *fl
 			NLMPROC_CANCEL, &nlmclnt_cancel_ops);
 	if (status == 0 && req->a_res.status == nlm_lck_denied)
 		status = -ENOLCK;
-	nlm_release_call(req);
+	nlmclnt_release_call(req);
 	return status;
 }
 
diff --git a/fs/lockd/svc4proc.c b/fs/lockd/svc4proc.c
index 38d261192453..c187422026d8 100644
--- a/fs/lockd/svc4proc.c
+++ b/fs/lockd/svc4proc.c
@@ -229,7 +229,7 @@ static void nlm4svc_callback_exit(struct rpc_task *task, void *data)
 
 static void nlm4svc_callback_release(void *data)
 {
-	nlm_release_call(data);
+	nlmsvc_release_call(data);
 }
 
 static const struct rpc_call_ops nlm4svc_callback_ops = {
@@ -261,7 +261,7 @@ static __be32 nlm4svc_callback(struct svc_rqst *rqstp, u32 proc, struct nlm_args
 
 	stat = func(rqstp, argp, &call->a_res);
 	if (stat != 0) {
-		nlm_release_call(call);
+		nlmsvc_release_call(call);
 		return stat;
 	}
 
diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c
index 9266c4600208..6e31695d046f 100644
--- a/fs/lockd/svclock.c
+++ b/fs/lockd/svclock.c
@@ -234,7 +234,7 @@ nlmsvc_create_block(struct svc_rqst *rqstp, struct nlm_host *host,
 failed_free:
 	kfree(block);
 failed:
-	nlm_release_call(call);
+	nlmsvc_release_call(call);
 	return NULL;
 }
 
@@ -267,7 +267,7 @@ static void nlmsvc_free_block(struct kref *kref)
 	mutex_unlock(&file->f_mutex);
 
 	nlmsvc_freegrantargs(block->b_call);
-	nlm_release_call(block->b_call);
+	nlmsvc_release_call(block->b_call);
 	nlm_release_file(block->b_file);
 	kfree(block->b_fl);
 	kfree(block);
diff --git a/fs/lockd/svcproc.c b/fs/lockd/svcproc.c
index 0caea5310ac3..0df65ec29e43 100644
--- a/fs/lockd/svcproc.c
+++ b/fs/lockd/svcproc.c
@@ -257,9 +257,17 @@ static void nlmsvc_callback_exit(struct rpc_task *task, void *data)
 			-task->tk_status);
 }
 
+void nlmsvc_release_call(struct nlm_rqst *call)
+{
+	if (!atomic_dec_and_test(&call->a_count))
+		return;
+	nlm_release_host(call->a_host);
+	kfree(call);
+}
+
 static void nlmsvc_callback_release(void *data)
 {
-	nlm_release_call(data);
+	nlmsvc_release_call(data);
 }
 
 static const struct rpc_call_ops nlmsvc_callback_ops = {
@@ -291,7 +299,7 @@ static __be32 nlmsvc_callback(struct svc_rqst *rqstp, u32 proc, struct nlm_args
 
 	stat = func(rqstp, argp, &call->a_res);
 	if (stat != 0) {
-		nlm_release_call(call);
+		nlmsvc_release_call(call);
 		return stat;
 	}
 
diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h
index 2dee05e5119a..a32ba62455af 100644
--- a/include/linux/lockd/lockd.h
+++ b/include/linux/lockd/lockd.h
@@ -202,9 +202,9 @@ extern u32			nsm_local_state;
  * Lockd client functions
  */
 struct nlm_rqst * nlm_alloc_call(struct nlm_host *host);
-void		  nlm_release_call(struct nlm_rqst *);
 int		  nlm_async_call(struct nlm_rqst *, u32, const struct rpc_call_ops *);
 int		  nlm_async_reply(struct nlm_rqst *, u32, const struct rpc_call_ops *);
+void		  nlmclnt_release_call(struct nlm_rqst *);
 struct nlm_wait * nlmclnt_prepare_block(struct nlm_host *host, struct file_lock *fl);
 void		  nlmclnt_finish_block(struct nlm_wait *block);
 int		  nlmclnt_block(struct nlm_wait *block, struct nlm_rqst *req, long timeout);
@@ -267,6 +267,7 @@ unsigned long	  nlmsvc_retry_blocked(void);
 void		  nlmsvc_traverse_blocks(struct nlm_host *, struct nlm_file *,
 					nlm_host_match_fn_t match);
 void		  nlmsvc_grant_reply(struct nlm_cookie *, __be32);
+void		  nlmsvc_release_call(struct nlm_rqst *);
 
 /*
  * File handling for the server personality
-- 
cgit v1.2.3-71-gd317


From 8ea6ecc8b0759756a766c05dc7c98c51ec90de37 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Tue, 14 Dec 2010 15:05:52 +0000
Subject: lockd: Create client-side nlm_host cache

NFS clients don't need the garbage collection processing that is
performed on nlm_host structures.  The client picks up an nlm_host at
mount time and holds a reference to it until the file system is
unmounted.

Servers, on the other hand, don't have a precise way to tell when an
nlm_host is no longer being used, so zero refcount nlm_host entries
are left to expire in the cache after a time.

Basically there's nothing holding a reference to an nlm_host between
individual server-side NLM requests, but we can't afford the expense
of recreating them for every new NLM request from a client.  The
nlm_host cache adds some lifetime hysteresis to entries in the cache
so the next time a particular nlm_host is needed, it's likely to be
discovered by a lookup rather than created from whole cloth.

With the new implementation, client nlm_host cache items are no longer
garbage collected, and are destroyed directly by a new release
function specialized for client entries, nlmclnt_release_host().  They
are cached in their own data structure, and have their own lookup
logic, simplified and specialized for client nlm_host entries.

However, the client nlm_host cache still shares reboot recovery logic
with the server nlm_host cache.  The NSM "peer rebooted" downcall for
clients and servers still come through the same RPC call.  This is a
legacy formal API that would be difficult to alter, and besides, the
user space NSM implementation can't tell the difference between peers
that are clients or servers.

For this reason, the client cache continues to share the
nlm_host_mutex (and reboot recovery logic) with the server cache.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/lockd/clntlock.c         |  4 +--
 fs/lockd/clntproc.c         |  6 ++--
 fs/lockd/host.c             | 81 ++++++++++++++++++++++++++++++++++++++++-----
 include/linux/lockd/lockd.h |  1 +
 4 files changed, 78 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/fs/lockd/clntlock.c b/fs/lockd/clntlock.c
index 25509eb28fd7..8d4ea8351e3d 100644
--- a/fs/lockd/clntlock.c
+++ b/fs/lockd/clntlock.c
@@ -79,7 +79,7 @@ EXPORT_SYMBOL_GPL(nlmclnt_init);
  */
 void nlmclnt_done(struct nlm_host *host)
 {
-	nlm_release_host(host);
+	nlmclnt_release_host(host);
 	lockd_down();
 }
 EXPORT_SYMBOL_GPL(nlmclnt_done);
@@ -273,7 +273,7 @@ restart:
 	spin_unlock(&nlm_blocked_lock);
 
 	/* Release host handle after use */
-	nlm_release_host(host);
+	nlmclnt_release_host(host);
 	lockd_down();
 	return 0;
 }
diff --git a/fs/lockd/clntproc.c b/fs/lockd/clntproc.c
index fbc6617f76c4..adb45ec9038c 100644
--- a/fs/lockd/clntproc.c
+++ b/fs/lockd/clntproc.c
@@ -58,7 +58,7 @@ static void nlm_put_lockowner(struct nlm_lockowner *lockowner)
 		return;
 	list_del(&lockowner->list);
 	spin_unlock(&lockowner->host->h_lock);
-	nlm_release_host(lockowner->host);
+	nlmclnt_release_host(lockowner->host);
 	kfree(lockowner);
 }
 
@@ -207,7 +207,7 @@ struct nlm_rqst *nlm_alloc_call(struct nlm_host *host)
 		printk("nlm_alloc_call: failed, waiting for memory\n");
 		schedule_timeout_interruptible(5*HZ);
 	}
-	nlm_release_host(host);
+	nlmclnt_release_host(host);
 	return NULL;
 }
 
@@ -215,7 +215,7 @@ void nlmclnt_release_call(struct nlm_rqst *call)
 {
 	if (!atomic_dec_and_test(&call->a_count))
 		return;
-	nlm_release_host(call->a_host);
+	nlmclnt_release_host(call->a_host);
 	nlmclnt_release_lockargs(call);
 	kfree(call);
 }
diff --git a/fs/lockd/host.c b/fs/lockd/host.c
index e58e1426d161..c6942fb4bd0d 100644
--- a/fs/lockd/host.c
+++ b/fs/lockd/host.c
@@ -26,6 +26,7 @@
 #define NLM_HOST_COLLECT	(120 * HZ)
 
 static struct hlist_head	nlm_hosts[NLM_HOST_NRHASH];
+static struct hlist_head	nlm_client_hosts[NLM_HOST_NRHASH];
 
 #define for_each_host(host, pos, chain, table) \
 	for ((chain) = (table); \
@@ -288,12 +289,76 @@ struct nlm_host *nlmclnt_lookup_host(const struct sockaddr *sap,
 		.hostname_len	= strlen(hostname),
 		.noresvport	= noresvport,
 	};
+	struct hlist_head *chain;
+	struct hlist_node *pos;
+	struct nlm_host	*host;
+	struct nsm_handle *nsm = NULL;
 
 	dprintk("lockd: %s(host='%s', vers=%u, proto=%s)\n", __func__,
 			(hostname ? hostname : "<none>"), version,
 			(protocol == IPPROTO_UDP ? "udp" : "tcp"));
 
-	return nlm_lookup_host(&ni);
+	mutex_lock(&nlm_host_mutex);
+
+	chain = &nlm_client_hosts[nlm_hash_address(sap)];
+	hlist_for_each_entry(host, pos, chain, h_hash) {
+		if (!rpc_cmp_addr(nlm_addr(host), sap))
+			continue;
+
+		/* Same address. Share an NSM handle if we already have one */
+		if (nsm == NULL)
+			nsm = host->h_nsmhandle;
+
+		if (host->h_proto != protocol)
+			continue;
+		if (host->h_version != version)
+			continue;
+
+		nlm_get_host(host);
+		dprintk("lockd: %s found host %s (%s)\n", __func__,
+			host->h_name, host->h_addrbuf);
+		goto out;
+	}
+
+	host = nlm_alloc_host(&ni, nsm);
+	if (unlikely(host == NULL))
+		goto out;
+
+	hlist_add_head(&host->h_hash, chain);
+	nrhosts++;
+
+	dprintk("lockd: %s created host %s (%s)\n", __func__,
+		host->h_name, host->h_addrbuf);
+
+out:
+	mutex_unlock(&nlm_host_mutex);
+	return host;
+}
+
+/**
+ * nlmclnt_release_host - release client nlm_host
+ * @host: nlm_host to release
+ *
+ */
+void nlmclnt_release_host(struct nlm_host *host)
+{
+	if (host == NULL)
+		return;
+
+	dprintk("lockd: release client host %s\n", host->h_name);
+
+	BUG_ON(atomic_read(&host->h_count) < 0);
+	BUG_ON(host->h_server);
+
+	if (atomic_dec_and_test(&host->h_count)) {
+		BUG_ON(!list_empty(&host->h_lockowners));
+		BUG_ON(!list_empty(&host->h_granted));
+		BUG_ON(!list_empty(&host->h_reclaim));
+
+		mutex_lock(&nlm_host_mutex);
+		nlm_destroy_host_locked(host);
+		mutex_unlock(&nlm_host_mutex);
+	}
 }
 
 /**
@@ -515,16 +580,14 @@ void nlm_host_rebooted(const struct nlm_reboot *info)
 	 * To avoid processing a host several times, we match the nsmstate.
 	 */
 	while ((host = next_host_state(nlm_hosts, nsm, info)) != NULL) {
-		if (host->h_server) {
-			/* We're server for this guy, just ditch
-			 * all the locks he held. */
-			nlmsvc_free_host_resources(host);
-		} else {
-			/* He's the server, initiate lock recovery. */
-			nlmclnt_recovery(host);
-		}
+		nlmsvc_free_host_resources(host);
 		nlm_release_host(host);
 	}
+	while ((host = next_host_state(nlm_client_hosts, nsm, info)) != NULL) {
+		nlmclnt_recovery(host);
+		nlmclnt_release_host(host);
+	}
+
 	nsm_release(nsm);
 }
 
diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h
index a32ba62455af..6c2a0e2f298e 100644
--- a/include/linux/lockd/lockd.h
+++ b/include/linux/lockd/lockd.h
@@ -223,6 +223,7 @@ struct nlm_host  *nlmclnt_lookup_host(const struct sockaddr *sap,
 					const u32 version,
 					const char *hostname,
 					int noresvport);
+void		  nlmclnt_release_host(struct nlm_host *);
 struct nlm_host  *nlmsvc_lookup_host(const struct svc_rqst *rqstp,
 					const char *hostname,
 					const size_t hostname_len);
-- 
cgit v1.2.3-71-gd317


From 67216b94d498f5880d8bba2a6b841880739dd524 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Tue, 14 Dec 2010 15:06:12 +0000
Subject: lockd: Clean up nlmsvc_lookup_host()

Clean up.

Change nlmsvc_lookup_host() to be purpose-built for server-side
nlm_host management.  This replaces the generic nlm_lookup_host()
helper function, just like on the client side.  The lookup logic is
specialized for server host lookups.

The server side cache also gets its own specialized equivalent of the
nlm_release_host() function.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/lockd/host.c             | 91 +++++++++++++++++++++++++++++++++++----------
 fs/lockd/svc4proc.c         | 16 ++++----
 fs/lockd/svcproc.c          | 18 ++++-----
 include/linux/lockd/lockd.h |  2 +-
 4 files changed, 90 insertions(+), 37 deletions(-)

(limited to 'include/linux')

diff --git a/fs/lockd/host.c b/fs/lockd/host.c
index c6942fb4bd0d..0250b0e4f5e9 100644
--- a/fs/lockd/host.c
+++ b/fs/lockd/host.c
@@ -383,6 +383,10 @@ struct nlm_host *nlmsvc_lookup_host(const struct svc_rqst *rqstp,
 				    const char *hostname,
 				    const size_t hostname_len)
 {
+	struct hlist_head *chain;
+	struct hlist_node *pos;
+	struct nlm_host	*host = NULL;
+	struct nsm_handle *nsm = NULL;
 	struct sockaddr_in sin = {
 		.sin_family	= AF_INET,
 	};
@@ -404,6 +408,8 @@ struct nlm_host *nlmsvc_lookup_host(const struct svc_rqst *rqstp,
 			(int)hostname_len, hostname, rqstp->rq_vers,
 			(rqstp->rq_prot == IPPROTO_UDP ? "udp" : "tcp"));
 
+	mutex_lock(&nlm_host_mutex);
+
 	switch (ni.sap->sa_family) {
 	case AF_INET:
 		sin.sin_addr.s_addr = rqstp->rq_daddr.addr.s_addr;
@@ -414,10 +420,73 @@ struct nlm_host *nlmsvc_lookup_host(const struct svc_rqst *rqstp,
 		ni.src_sap = (struct sockaddr *)&sin6;
 		break;
 	default:
-		return NULL;
+		dprintk("lockd: %s failed; unrecognized address family\n",
+			__func__);
+		goto out;
 	}
 
-	return nlm_lookup_host(&ni);
+	if (time_after_eq(jiffies, next_gc))
+		nlm_gc_hosts();
+
+	chain = &nlm_hosts[nlm_hash_address(ni.sap)];
+	hlist_for_each_entry(host, pos, chain, h_hash) {
+		if (!rpc_cmp_addr(nlm_addr(host), ni.sap))
+			continue;
+
+		/* Same address. Share an NSM handle if we already have one */
+		if (nsm == NULL)
+			nsm = host->h_nsmhandle;
+
+		if (host->h_proto != ni.protocol)
+			continue;
+		if (host->h_version != ni.version)
+			continue;
+		if (!rpc_cmp_addr(nlm_srcaddr(host), ni.src_sap))
+			continue;
+
+		/* Move to head of hash chain. */
+		hlist_del(&host->h_hash);
+		hlist_add_head(&host->h_hash, chain);
+
+		nlm_get_host(host);
+		dprintk("lockd: %s found host %s (%s)\n",
+			__func__, host->h_name, host->h_addrbuf);
+		goto out;
+	}
+
+	host = nlm_alloc_host(&ni, nsm);
+	if (unlikely(host == NULL))
+		goto out;
+
+	memcpy(nlm_srcaddr(host), ni.src_sap, ni.src_len);
+	host->h_srcaddrlen = ni.src_len;
+	hlist_add_head(&host->h_hash, chain);
+	nrhosts++;
+
+	dprintk("lockd: %s created host %s (%s)\n",
+		__func__, host->h_name, host->h_addrbuf);
+
+out:
+	mutex_unlock(&nlm_host_mutex);
+	return host;
+}
+
+/**
+ * nlmsvc_release_host - release server nlm_host
+ * @host: nlm_host to release
+ *
+ * Host is destroyed later in nlm_gc_host().
+ */
+void nlmsvc_release_host(struct nlm_host *host)
+{
+	if (host == NULL)
+		return;
+
+	dprintk("lockd: release server host %s\n", host->h_name);
+
+	BUG_ON(atomic_read(&host->h_count) < 0);
+	BUG_ON(!host->h_server);
+	atomic_dec(&host->h_count);
 }
 
 /*
@@ -517,22 +586,6 @@ struct nlm_host * nlm_get_host(struct nlm_host *host)
 	return host;
 }
 
-/*
- * Release NLM host after use
- */
-void nlm_release_host(struct nlm_host *host)
-{
-	if (host != NULL) {
-		dprintk("lockd: release host %s\n", host->h_name);
-		BUG_ON(atomic_read(&host->h_count) < 0);
-		if (atomic_dec_and_test(&host->h_count)) {
-			BUG_ON(!list_empty(&host->h_lockowners));
-			BUG_ON(!list_empty(&host->h_granted));
-			BUG_ON(!list_empty(&host->h_reclaim));
-		}
-	}
-}
-
 static struct nlm_host *next_host_state(struct hlist_head *cache,
 					struct nsm_handle *nsm,
 					const struct nlm_reboot *info)
@@ -581,7 +634,7 @@ void nlm_host_rebooted(const struct nlm_reboot *info)
 	 */
 	while ((host = next_host_state(nlm_hosts, nsm, info)) != NULL) {
 		nlmsvc_free_host_resources(host);
-		nlm_release_host(host);
+		nlmsvc_release_host(host);
 	}
 	while ((host = next_host_state(nlm_client_hosts, nsm, info)) != NULL) {
 		nlmclnt_recovery(host);
diff --git a/fs/lockd/svc4proc.c b/fs/lockd/svc4proc.c
index c187422026d8..9a41fdc19511 100644
--- a/fs/lockd/svc4proc.c
+++ b/fs/lockd/svc4proc.c
@@ -51,7 +51,7 @@ nlm4svc_retrieve_args(struct svc_rqst *rqstp, struct nlm_args *argp,
 	return 0;
 
 no_locks:
-	nlm_release_host(host);
+	nlmsvc_release_host(host);
  	if (error)
 		return error;	
 	return nlm_lck_denied_nolocks;
@@ -92,7 +92,7 @@ nlm4svc_proc_test(struct svc_rqst *rqstp, struct nlm_args *argp,
 	else
 		dprintk("lockd: TEST4        status %d\n", ntohl(resp->status));
 
-	nlm_release_host(host);
+	nlmsvc_release_host(host);
 	nlm_release_file(file);
 	return rc;
 }
@@ -134,7 +134,7 @@ nlm4svc_proc_lock(struct svc_rqst *rqstp, struct nlm_args *argp,
 	else
 		dprintk("lockd: LOCK         status %d\n", ntohl(resp->status));
 
-	nlm_release_host(host);
+	nlmsvc_release_host(host);
 	nlm_release_file(file);
 	return rc;
 }
@@ -164,7 +164,7 @@ nlm4svc_proc_cancel(struct svc_rqst *rqstp, struct nlm_args *argp,
 	resp->status = nlmsvc_cancel_blocked(file, &argp->lock);
 
 	dprintk("lockd: CANCEL        status %d\n", ntohl(resp->status));
-	nlm_release_host(host);
+	nlmsvc_release_host(host);
 	nlm_release_file(file);
 	return rpc_success;
 }
@@ -197,7 +197,7 @@ nlm4svc_proc_unlock(struct svc_rqst *rqstp, struct nlm_args *argp,
 	resp->status = nlmsvc_unlock(file, &argp->lock);
 
 	dprintk("lockd: UNLOCK        status %d\n", ntohl(resp->status));
-	nlm_release_host(host);
+	nlmsvc_release_host(host);
 	nlm_release_file(file);
 	return rpc_success;
 }
@@ -334,7 +334,7 @@ nlm4svc_proc_share(struct svc_rqst *rqstp, struct nlm_args *argp,
 	resp->status = nlmsvc_share_file(host, file, argp);
 
 	dprintk("lockd: SHARE         status %d\n", ntohl(resp->status));
-	nlm_release_host(host);
+	nlmsvc_release_host(host);
 	nlm_release_file(file);
 	return rpc_success;
 }
@@ -367,7 +367,7 @@ nlm4svc_proc_unshare(struct svc_rqst *rqstp, struct nlm_args *argp,
 	resp->status = nlmsvc_unshare_file(host, file, argp);
 
 	dprintk("lockd: UNSHARE       status %d\n", ntohl(resp->status));
-	nlm_release_host(host);
+	nlmsvc_release_host(host);
 	nlm_release_file(file);
 	return rpc_success;
 }
@@ -399,7 +399,7 @@ nlm4svc_proc_free_all(struct svc_rqst *rqstp, struct nlm_args *argp,
 		return rpc_success;
 
 	nlmsvc_free_host_resources(host);
-	nlm_release_host(host);
+	nlmsvc_release_host(host);
 	return rpc_success;
 }
 
diff --git a/fs/lockd/svcproc.c b/fs/lockd/svcproc.c
index 0df65ec29e43..d27aab11f324 100644
--- a/fs/lockd/svcproc.c
+++ b/fs/lockd/svcproc.c
@@ -80,7 +80,7 @@ nlmsvc_retrieve_args(struct svc_rqst *rqstp, struct nlm_args *argp,
 	return 0;
 
 no_locks:
-	nlm_release_host(host);
+	nlmsvc_release_host(host);
 	if (error)
 		return error;
 	return nlm_lck_denied_nolocks;
@@ -122,7 +122,7 @@ nlmsvc_proc_test(struct svc_rqst *rqstp, struct nlm_args *argp,
 		dprintk("lockd: TEST          status %d vers %d\n",
 			ntohl(resp->status), rqstp->rq_vers);
 
-	nlm_release_host(host);
+	nlmsvc_release_host(host);
 	nlm_release_file(file);
 	return rc;
 }
@@ -164,7 +164,7 @@ nlmsvc_proc_lock(struct svc_rqst *rqstp, struct nlm_args *argp,
 	else
 		dprintk("lockd: LOCK         status %d\n", ntohl(resp->status));
 
-	nlm_release_host(host);
+	nlmsvc_release_host(host);
 	nlm_release_file(file);
 	return rc;
 }
@@ -194,7 +194,7 @@ nlmsvc_proc_cancel(struct svc_rqst *rqstp, struct nlm_args *argp,
 	resp->status = cast_status(nlmsvc_cancel_blocked(file, &argp->lock));
 
 	dprintk("lockd: CANCEL        status %d\n", ntohl(resp->status));
-	nlm_release_host(host);
+	nlmsvc_release_host(host);
 	nlm_release_file(file);
 	return rpc_success;
 }
@@ -227,7 +227,7 @@ nlmsvc_proc_unlock(struct svc_rqst *rqstp, struct nlm_args *argp,
 	resp->status = cast_status(nlmsvc_unlock(file, &argp->lock));
 
 	dprintk("lockd: UNLOCK        status %d\n", ntohl(resp->status));
-	nlm_release_host(host);
+	nlmsvc_release_host(host);
 	nlm_release_file(file);
 	return rpc_success;
 }
@@ -261,7 +261,7 @@ void nlmsvc_release_call(struct nlm_rqst *call)
 {
 	if (!atomic_dec_and_test(&call->a_count))
 		return;
-	nlm_release_host(call->a_host);
+	nlmsvc_release_host(call->a_host);
 	kfree(call);
 }
 
@@ -374,7 +374,7 @@ nlmsvc_proc_share(struct svc_rqst *rqstp, struct nlm_args *argp,
 	resp->status = cast_status(nlmsvc_share_file(host, file, argp));
 
 	dprintk("lockd: SHARE         status %d\n", ntohl(resp->status));
-	nlm_release_host(host);
+	nlmsvc_release_host(host);
 	nlm_release_file(file);
 	return rpc_success;
 }
@@ -407,7 +407,7 @@ nlmsvc_proc_unshare(struct svc_rqst *rqstp, struct nlm_args *argp,
 	resp->status = cast_status(nlmsvc_unshare_file(host, file, argp));
 
 	dprintk("lockd: UNSHARE       status %d\n", ntohl(resp->status));
-	nlm_release_host(host);
+	nlmsvc_release_host(host);
 	nlm_release_file(file);
 	return rpc_success;
 }
@@ -439,7 +439,7 @@ nlmsvc_proc_free_all(struct svc_rqst *rqstp, struct nlm_args *argp,
 		return rpc_success;
 
 	nlmsvc_free_host_resources(host);
-	nlm_release_host(host);
+	nlmsvc_release_host(host);
 	return rpc_success;
 }
 
diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h
index 6c2a0e2f298e..ff9abff55aa0 100644
--- a/include/linux/lockd/lockd.h
+++ b/include/linux/lockd/lockd.h
@@ -227,10 +227,10 @@ void		  nlmclnt_release_host(struct nlm_host *);
 struct nlm_host  *nlmsvc_lookup_host(const struct svc_rqst *rqstp,
 					const char *hostname,
 					const size_t hostname_len);
+void		  nlmsvc_release_host(struct nlm_host *);
 struct rpc_clnt * nlm_bind_host(struct nlm_host *);
 void		  nlm_rebind_host(struct nlm_host *);
 struct nlm_host * nlm_get_host(struct nlm_host *);
-void		  nlm_release_host(struct nlm_host *);
 void		  nlm_shutdown_hosts(void);
 void		  nlm_host_rebooted(const struct nlm_reboot *);
 
-- 
cgit v1.2.3-71-gd317


From cf4e594ea7e55555e81647b74a3a8e8b2826a529 Mon Sep 17 00:00:00 2001
From: Jouni Malinen <j@w1.fi>
Date: Thu, 16 Dec 2010 00:52:40 +0200
Subject: nl80211: Add notification for dropped Deauth/Disassoc

Add a new notification to indicate that a received, unprotected
Deauthentication or Disassociation frame was dropped due to
management frame protection being in use. This notification is
needed to allow user space (e.g., wpa_supplicant) to implement
SA Query procedure to recover from association state mismatch
between an AP and STA.

This is needed to avoid getting stuck in non-working state when MFP
(IEEE 802.11w) is used and a protected Deauthentication or
Disassociation frame is dropped for any reason. After that, the
station would silently discard any unprotected Deauthentication or
Disassociation frame that could be indicating that the AP does not
have association for the STA (when the Reason Code would be 6 or 7).
IEEE Std 802.11w-2009, 11.13 describes this recovery mechanism.

Signed-off-by: Jouni Malinen <j@w1.fi>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/nl80211.h | 10 ++++++++++
 include/net/cfg80211.h  | 26 ++++++++++++++++++++++++++
 net/mac80211/rx.c       | 22 ++++++++++++++++++++--
 net/wireless/mlme.c     | 22 ++++++++++++++++++++++
 net/wireless/nl80211.c  | 16 ++++++++++++++++
 net/wireless/nl80211.h  |  6 ++++++
 6 files changed, 100 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h
index 1cee56b3a79a..7483a89cee8f 100644
--- a/include/linux/nl80211.h
+++ b/include/linux/nl80211.h
@@ -399,6 +399,13 @@
  * @NL80211_CMD_LEAVE_MESH: Leave the mesh network -- no special arguments, the
  *	network is determined by the network interface.
  *
+ * @NL80211_CMD_UNPROT_DEAUTHENTICATE: Unprotected deauthentication frame
+ *	notification. This event is used to indicate that an unprotected
+ *	deauthentication frame was dropped when MFP is in use.
+ * @NL80211_CMD_UNPROT_DISASSOCIATE: Unprotected disassociation frame
+ *	notification. This event is used to indicate that an unprotected
+ *	disassociation frame was dropped when MFP is in use.
+ *
  * @NL80211_CMD_MAX: highest used command number
  * @__NL80211_CMD_AFTER_LAST: internal use
  */
@@ -508,6 +515,9 @@ enum nl80211_commands {
 	NL80211_CMD_JOIN_MESH,
 	NL80211_CMD_LEAVE_MESH,
 
+	NL80211_CMD_UNPROT_DEAUTHENTICATE,
+	NL80211_CMD_UNPROT_DISASSOCIATE,
+
 	/* add new commands above here */
 
 	/* used to define NL80211_CMD_MAX below */
diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index f45e15f12446..3d1c09b777e8 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -2359,6 +2359,32 @@ void cfg80211_send_disassoc(struct net_device *dev, const u8 *buf, size_t len);
 void __cfg80211_send_disassoc(struct net_device *dev, const u8 *buf,
 	size_t len);
 
+/**
+ * cfg80211_send_unprot_deauth - notification of unprotected deauthentication
+ * @dev: network device
+ * @buf: deauthentication frame (header + body)
+ * @len: length of the frame data
+ *
+ * This function is called whenever a received Deauthentication frame has been
+ * dropped in station mode because of MFP being used but the Deauthentication
+ * frame was not protected. This function may sleep.
+ */
+void cfg80211_send_unprot_deauth(struct net_device *dev, const u8 *buf,
+				 size_t len);
+
+/**
+ * cfg80211_send_unprot_disassoc - notification of unprotected disassociation
+ * @dev: network device
+ * @buf: disassociation frame (header + body)
+ * @len: length of the frame data
+ *
+ * This function is called whenever a received Disassociation frame has been
+ * dropped in station mode because of MFP being used but the Disassociation
+ * frame was not protected. This function may sleep.
+ */
+void cfg80211_send_unprot_disassoc(struct net_device *dev, const u8 *buf,
+				   size_t len);
+
 /**
  * cfg80211_michael_mic_failure - notification of Michael MIC failure (TKIP)
  * @dev: network device
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 052789ef4745..4573ce1e1d15 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -1540,12 +1540,30 @@ ieee80211_drop_unencrypted_mgmt(struct ieee80211_rx_data *rx)
 	if (rx->sta && test_sta_flags(rx->sta, WLAN_STA_MFP)) {
 		if (unlikely(!ieee80211_has_protected(fc) &&
 			     ieee80211_is_unicast_robust_mgmt_frame(rx->skb) &&
-			     rx->key))
+			     rx->key)) {
+			if (ieee80211_is_deauth(fc))
+				cfg80211_send_unprot_deauth(rx->sdata->dev,
+							    rx->skb->data,
+							    rx->skb->len);
+			else if (ieee80211_is_disassoc(fc))
+				cfg80211_send_unprot_disassoc(rx->sdata->dev,
+							      rx->skb->data,
+							      rx->skb->len);
 			return -EACCES;
+		}
 		/* BIP does not use Protected field, so need to check MMIE */
 		if (unlikely(ieee80211_is_multicast_robust_mgmt_frame(rx->skb) &&
-			     ieee80211_get_mmie_keyidx(rx->skb) < 0))
+			     ieee80211_get_mmie_keyidx(rx->skb) < 0)) {
+			if (ieee80211_is_deauth(fc))
+				cfg80211_send_unprot_deauth(rx->sdata->dev,
+							    rx->skb->data,
+							    rx->skb->len);
+			else if (ieee80211_is_disassoc(fc))
+				cfg80211_send_unprot_disassoc(rx->sdata->dev,
+							      rx->skb->data,
+							      rx->skb->len);
 			return -EACCES;
+		}
 		/*
 		 * When using MFP, Action frames are not allowed prior to
 		 * having configured keys.
diff --git a/net/wireless/mlme.c b/net/wireless/mlme.c
index d7680f2a4c5b..aa5df8865ff7 100644
--- a/net/wireless/mlme.c
+++ b/net/wireless/mlme.c
@@ -263,6 +263,28 @@ void cfg80211_send_disassoc(struct net_device *dev, const u8 *buf, size_t len)
 }
 EXPORT_SYMBOL(cfg80211_send_disassoc);
 
+void cfg80211_send_unprot_deauth(struct net_device *dev, const u8 *buf,
+				 size_t len)
+{
+	struct wireless_dev *wdev = dev->ieee80211_ptr;
+	struct wiphy *wiphy = wdev->wiphy;
+	struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);
+
+	nl80211_send_unprot_deauth(rdev, dev, buf, len, GFP_ATOMIC);
+}
+EXPORT_SYMBOL(cfg80211_send_unprot_deauth);
+
+void cfg80211_send_unprot_disassoc(struct net_device *dev, const u8 *buf,
+				   size_t len)
+{
+	struct wireless_dev *wdev = dev->ieee80211_ptr;
+	struct wiphy *wiphy = wdev->wiphy;
+	struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);
+
+	nl80211_send_unprot_disassoc(rdev, dev, buf, len, GFP_ATOMIC);
+}
+EXPORT_SYMBOL(cfg80211_send_unprot_disassoc);
+
 static void __cfg80211_auth_remove(struct wireless_dev *wdev, const u8 *addr)
 {
 	int i;
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 594a6ac8b9d2..aefce54d47e2 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -5473,6 +5473,22 @@ void nl80211_send_disassoc(struct cfg80211_registered_device *rdev,
 				NL80211_CMD_DISASSOCIATE, gfp);
 }
 
+void nl80211_send_unprot_deauth(struct cfg80211_registered_device *rdev,
+				struct net_device *netdev, const u8 *buf,
+				size_t len, gfp_t gfp)
+{
+	nl80211_send_mlme_event(rdev, netdev, buf, len,
+				NL80211_CMD_UNPROT_DEAUTHENTICATE, gfp);
+}
+
+void nl80211_send_unprot_disassoc(struct cfg80211_registered_device *rdev,
+				  struct net_device *netdev, const u8 *buf,
+				  size_t len, gfp_t gfp)
+{
+	nl80211_send_mlme_event(rdev, netdev, buf, len,
+				NL80211_CMD_UNPROT_DISASSOCIATE, gfp);
+}
+
 static void nl80211_send_mlme_timeout(struct cfg80211_registered_device *rdev,
 				      struct net_device *netdev, int cmd,
 				      const u8 *addr, gfp_t gfp)
diff --git a/net/wireless/nl80211.h b/net/wireless/nl80211.h
index 16c2f7190768..e3f7fa886966 100644
--- a/net/wireless/nl80211.h
+++ b/net/wireless/nl80211.h
@@ -25,6 +25,12 @@ void nl80211_send_deauth(struct cfg80211_registered_device *rdev,
 void nl80211_send_disassoc(struct cfg80211_registered_device *rdev,
 			   struct net_device *netdev,
 			   const u8 *buf, size_t len, gfp_t gfp);
+void nl80211_send_unprot_deauth(struct cfg80211_registered_device *rdev,
+				struct net_device *netdev,
+				const u8 *buf, size_t len, gfp_t gfp);
+void nl80211_send_unprot_disassoc(struct cfg80211_registered_device *rdev,
+				  struct net_device *netdev,
+				  const u8 *buf, size_t len, gfp_t gfp);
 void nl80211_send_auth_timeout(struct cfg80211_registered_device *rdev,
 			       struct net_device *netdev,
 			       const u8 *addr, gfp_t gfp);
-- 
cgit v1.2.3-71-gd317


From 6ce5b1ce5f6922db32599e73bcb22f5cdcbf241f Mon Sep 17 00:00:00 2001
From: Timur Tabi <timur@freescale.com>
Date: Mon, 13 Dec 2010 14:08:53 -0600
Subject: tty: fix typos/errors in tty_driver.h comments

Fix various typos and other errors in comments of tty_driver.h.  The most
significant is the wrong name of a function for the description of
TTY_DRIVER_DYNAMIC_DEV.

Signed-off-by: Timur Tabi <timur@freescale.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/tty_driver.h | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/tty_driver.h b/include/linux/tty_driver.h
index 09678ed370f8..c3d43eb4150c 100644
--- a/include/linux/tty_driver.h
+++ b/include/linux/tty_driver.h
@@ -102,7 +102,7 @@
  * 	    unsigned int cmd, unsigned long arg);
  *
  * 	This routine allows the tty driver to implement
- *	device-specific ioctl's.  If the ioctl number passed in cmd
+ *	device-specific ioctls.  If the ioctl number passed in cmd
  * 	is not recognized by the driver, it should return ENOIOCTLCMD.
  *
  *	Optional
@@ -167,12 +167,12 @@
  * 
  * void (*hangup)(struct tty_struct *tty);
  *
- * 	This routine notifies the tty driver that it should hangup the
+ * 	This routine notifies the tty driver that it should hang up the
  * 	tty device.
  *
  *	Optional:
  *
- * int (*break_ctl)(struct tty_stuct *tty, int state);
+ * int (*break_ctl)(struct tty_struct *tty, int state);
  *
  * 	This optional routine requests the tty driver to turn on or
  * 	off BREAK status on the RS-232 port.  If state is -1,
@@ -358,7 +358,7 @@ static inline struct tty_driver *tty_driver_kref_get(struct tty_driver *d)
  * 	overruns, either.)
  *
  * TTY_DRIVER_DYNAMIC_DEV --- if set, the individual tty devices need
- *	to be registered with a call to tty_register_driver() when the
+ *	to be registered with a call to tty_register_device() when the
  *	device is found in the system and unregistered with a call to
  *	tty_unregister_device() so the devices will be show up
  *	properly in sysfs.  If not set, driver->num entries will be
-- 
cgit v1.2.3-71-gd317


From 3f960dbb9dfe29ff283810624c4340c79fde87f5 Mon Sep 17 00:00:00 2001
From: "Govindraj.R" <govindraj.raja@ti.com>
Date: Thu, 16 Dec 2010 18:12:47 +0530
Subject: Serial: Avoid unbalanced IRQ wake disable during resume

To avoid unbalanced IRQ wake disable, ensure that wakeups are disabled
only when wakeups have been successfully enabled.
Tested on OMAP3630SDP/ZOOM3.

Signed-off-by: Govindraj.R <govindraj.raja@ti.com>
Reported-by: Paul Walmsley <paul@pwsan.com>
Cc: Santosh Shilimkar <santosh.shilimkar@ti.com>
Acked-by: Kevin Hilman <khilman@deeprootsystems.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/serial/serial_core.c | 8 ++++++--
 include/linux/serial_core.h  | 1 +
 2 files changed, 7 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/serial/serial_core.c b/drivers/serial/serial_core.c
index 2835e29298ed..76418c139ffa 100644
--- a/drivers/serial/serial_core.c
+++ b/drivers/serial/serial_core.c
@@ -1986,7 +1986,8 @@ int uart_suspend_port(struct uart_driver *drv, struct uart_port *uport)
 
 	tty_dev = device_find_child(uport->dev, &match, serial_match_port);
 	if (device_may_wakeup(tty_dev)) {
-		enable_irq_wake(uport->irq);
+		if (!enable_irq_wake(uport->irq))
+			uport->irq_wake = 1;
 		put_device(tty_dev);
 		mutex_unlock(&port->mutex);
 		return 0;
@@ -2052,7 +2053,10 @@ int uart_resume_port(struct uart_driver *drv, struct uart_port *uport)
 
 	tty_dev = device_find_child(uport->dev, &match, serial_match_port);
 	if (!uport->suspended && device_may_wakeup(tty_dev)) {
-		disable_irq_wake(uport->irq);
+		if (uport->irq_wake) {
+			disable_irq_wake(uport->irq);
+			uport->irq_wake = 0;
+		}
 		mutex_unlock(&port->mutex);
 		return 0;
 	}
diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h
index 7a6daf189995..a23fa29d4eb0 100644
--- a/include/linux/serial_core.h
+++ b/include/linux/serial_core.h
@@ -365,6 +365,7 @@ struct uart_port {
 	struct device		*dev;			/* parent device */
 	unsigned char		hub6;			/* this should be in the 8250 driver */
 	unsigned char		suspended;
+	unsigned char		irq_wake;
 	unsigned char		unused[2];
 	void			*private_data;		/* generic platform data pointer */
 };
-- 
cgit v1.2.3-71-gd317


From a3d22a68d752ccc1a01bb0a64dd70b7a98bf9e23 Mon Sep 17 00:00:00 2001
From: Vladislav Zolotarov <vladz@broadcom.com>
Date: Mon, 13 Dec 2010 06:27:10 +0000
Subject: bnx2x: Take the distribution range definition out of skb_tx_hash()

Move the calcualation of the Tx hash for a given hash range into a separate
function and define the skb_tx_hash(), which calculates a Tx hash for a
[0; dev->real_num_tx_queues - 1] hash values range, using this
function (__skb_tx_hash()).

Signed-off-by: Vladislav Zolotarov <vladz@broadcom.com>
Signed-off-by: Eilon Greenstein <eilong@broadcom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 10 ++++++++++
 include/linux/skbuff.h    |  5 +++--
 net/core/dev.c            | 15 ++++++++++-----
 3 files changed, 23 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index d31bc3c94717..445e6825f8eb 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1747,6 +1747,16 @@ static inline void netif_wake_subqueue(struct net_device *dev, u16 queue_index)
 		__netif_schedule(txq->qdisc);
 }
 
+/*
+ * Returns a Tx hash for the given packet when dev->real_num_tx_queues is used
+ * as a distribution range limit for the returned value.
+ */
+static inline u16 skb_tx_hash(const struct net_device *dev,
+			      const struct sk_buff *skb)
+{
+	return __skb_tx_hash(dev, skb, dev->real_num_tx_queues);
+}
+
 /**
  *	netif_is_multiqueue - test if device has multiple transmit queues
  *	@dev: network device
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 19f37a6ee6c4..4c4bec6316d9 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -2165,8 +2165,9 @@ static inline bool skb_rx_queue_recorded(const struct sk_buff *skb)
 	return skb->queue_mapping != 0;
 }
 
-extern u16 skb_tx_hash(const struct net_device *dev,
-		       const struct sk_buff *skb);
+extern u16 __skb_tx_hash(const struct net_device *dev,
+			 const struct sk_buff *skb,
+			 unsigned int num_tx_queues);
 
 #ifdef CONFIG_XFRM
 static inline struct sec_path *skb_sec_path(struct sk_buff *skb)
diff --git a/net/core/dev.c b/net/core/dev.c
index d28b3a023bb2..b25dd087f06a 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2112,14 +2112,19 @@ out:
 
 static u32 hashrnd __read_mostly;
 
-u16 skb_tx_hash(const struct net_device *dev, const struct sk_buff *skb)
+/*
+ * Returns a Tx hash based on the given packet descriptor a Tx queues' number
+ * to be used as a distribution range.
+ */
+u16 __skb_tx_hash(const struct net_device *dev, const struct sk_buff *skb,
+		  unsigned int num_tx_queues)
 {
 	u32 hash;
 
 	if (skb_rx_queue_recorded(skb)) {
 		hash = skb_get_rx_queue(skb);
-		while (unlikely(hash >= dev->real_num_tx_queues))
-			hash -= dev->real_num_tx_queues;
+		while (unlikely(hash >= num_tx_queues))
+			hash -= num_tx_queues;
 		return hash;
 	}
 
@@ -2129,9 +2134,9 @@ u16 skb_tx_hash(const struct net_device *dev, const struct sk_buff *skb)
 		hash = (__force u16) skb->protocol ^ skb->rxhash;
 	hash = jhash_1word(hash, hashrnd);
 
-	return (u16) (((u64) hash * dev->real_num_tx_queues) >> 32);
+	return (u16) (((u64) hash * num_tx_queues) >> 32);
 }
-EXPORT_SYMBOL(skb_tx_hash);
+EXPORT_SYMBOL(__skb_tx_hash);
 
 static inline u16 dev_cap_txqueue(struct net_device *dev, u16 queue_index)
 {
-- 
cgit v1.2.3-71-gd317


From b236da6931e2482bfe44a7865dd4e7bb036f3496 Mon Sep 17 00:00:00 2001
From: Changli Gao <xiaosuo@gmail.com>
Date: Tue, 14 Dec 2010 03:09:15 +0000
Subject: net: use NUMA_NO_NODE instead of the magic number -1

Signed-off-by: Changli Gao <xiaosuo@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 2 +-
 net/core/dev.c            | 2 +-
 net/core/net-sysfs.c      | 3 ++-
 3 files changed, 4 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 445e6825f8eb..cc916c5c3279 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -530,7 +530,7 @@ static inline int netdev_queue_numa_node_read(const struct netdev_queue *q)
 #if defined(CONFIG_XPS) && defined(CONFIG_NUMA)
 	return q->numa_node;
 #else
-	return -1;
+	return NUMA_NO_NODE;
 #endif
 }
 
diff --git a/net/core/dev.c b/net/core/dev.c
index b25dd087f06a..7ac26d2b9722 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -5121,7 +5121,7 @@ static void netdev_init_one_queue(struct net_device *dev,
 	spin_lock_init(&queue->_xmit_lock);
 	netdev_set_xmit_lockdep_class(&queue->_xmit_lock, dev->type);
 	queue->xmit_lock_owner = -1;
-	netdev_queue_numa_node_write(queue, -1);
+	netdev_queue_numa_node_write(queue, NUMA_NO_NODE);
 	queue->dev = dev;
 }
 
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index 85e8b5326dd6..e23c01be5a5b 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -1009,7 +1009,8 @@ static ssize_t store_xps_map(struct netdev_queue *queue,
 	if (dev_maps)
 		call_rcu(&dev_maps->rcu, xps_dev_maps_release);
 
-	netdev_queue_numa_node_write(queue, (numa_node >= 0) ? numa_node : -1);
+	netdev_queue_numa_node_write(queue, (numa_node >= 0) ? numa_node :
+					    NUMA_NO_NODE);
 
 	mutex_unlock(&xps_map_mutex);
 
-- 
cgit v1.2.3-71-gd317


From 04fb451eff978ca059399eab83d5594b073caf6f Mon Sep 17 00:00:00 2001
From: Michał Mirosław <mirq-linux@rere.qmqm.pl>
Date: Tue, 14 Dec 2010 15:24:08 +0000
Subject: net: Introduce skb_checksum_start_offset()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Introduce skb_checksum_start_offset() to replace repetitive calculation.

Signed-off-by: Michał Mirosław <mirq-linux@rere.qmqm.pl>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 4c4bec6316d9..20ec0a64cb9f 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -1355,6 +1355,11 @@ static inline void skb_set_mac_header(struct sk_buff *skb, const int offset)
 }
 #endif /* NET_SKBUFF_DATA_USES_OFFSET */
 
+static inline int skb_checksum_start_offset(const struct sk_buff *skb)
+{
+	return skb->csum_start - skb_headroom(skb);
+}
+
 static inline int skb_transport_offset(const struct sk_buff *skb)
 {
 	return skb_transport_header(skb) - skb->data;
-- 
cgit v1.2.3-71-gd317


From fbc92a3455577ab17615cbcb91826399061bd789 Mon Sep 17 00:00:00 2001
From: Kay Sievers <kay.sievers@vrfy.org>
Date: Wed, 1 Dec 2010 18:51:05 +0100
Subject: tty: add 'active' sysfs attribute to tty0 and console device

tty: add 'active' sysfs attribute to tty0 and console device

Userspace can query the actual virtual console, and the configured
console devices behind /dev/tt0 and /dev/console.

The last entry in the list of devices is the active device, analog
to the console= kernel command line option.

The attribute supports poll(), which is raised when the virtual
console is changed or /dev/console is reconfigured.

Signed-off-by: Kay Sievers <kay.sievers@vrfy.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>

index 0000000..b138b66
---
 Documentation/ABI/testing/sysfs-tty | 19 +++++++++++++++
 drivers/tty/tty_io.c                | 47 +++++++++++++++++++++++++++++++++----
 drivers/tty/vt/vt.c                 | 23 +++++++++++++++++-
 include/linux/console.h             |  2 +-
 kernel/printk.c                     |  2 ++
 5 files changed, 87 insertions(+), 6 deletions(-)
 create mode 100644 Documentation/ABI/testing/sysfs-tty

(limited to 'include/linux')

diff --git a/Documentation/ABI/testing/sysfs-tty b/Documentation/ABI/testing/sysfs-tty
new file mode 100644
index 000000000000..b138b663bf54
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-tty
@@ -0,0 +1,19 @@
+What:		/sys/class/tty/console/active
+Date:		Nov 2010
+Contact:	Kay Sievers <kay.sievers@vrfy.org>
+Description:
+		 Shows the list of currently configured
+		 console devices, like 'tty1 ttyS0'.
+		 The last entry in the file is the active
+		 device connected to /dev/console.
+		 The file supports poll() to detect virtual
+		 console switches.
+
+What:		/sys/class/tty/tty0/active
+Date:		Nov 2010
+Contact:	Kay Sievers <kay.sievers@vrfy.org>
+Description:
+		 Shows the currently active virtual console
+		 device, like 'tty1'.
+		 The file supports poll() to detect virtual
+		 console switches.
diff --git a/drivers/tty/tty_io.c b/drivers/tty/tty_io.c
index c05c5af5aa04..be5ab4ac0b93 100644
--- a/drivers/tty/tty_io.c
+++ b/drivers/tty/tty_io.c
@@ -3232,9 +3232,45 @@ static int __init tty_class_init(void)
 postcore_initcall(tty_class_init);
 
 /* 3/2004 jmc: why do these devices exist? */
-
 static struct cdev tty_cdev, console_cdev;
 
+static ssize_t show_cons_active(struct device *dev,
+				struct device_attribute *attr, char *buf)
+{
+	struct console *cs[16];
+	int i = 0;
+	struct console *c;
+	ssize_t count = 0;
+
+	acquire_console_sem();
+	for (c = console_drivers; c; c = c->next) {
+		if (!c->device)
+			continue;
+		if (!c->write)
+			continue;
+		if ((c->flags & CON_ENABLED) == 0)
+			continue;
+		cs[i++] = c;
+		if (i >= ARRAY_SIZE(cs))
+			break;
+	}
+	while (i--)
+		count += sprintf(buf + count, "%s%d%c",
+				 cs[i]->name, cs[i]->index, i ? ' ':'\n');
+	release_console_sem();
+
+	return count;
+}
+static DEVICE_ATTR(active, S_IRUGO, show_cons_active, NULL);
+
+static struct device *consdev;
+
+void console_sysfs_notify(void)
+{
+	if (consdev)
+		sysfs_notify(&consdev->kobj, NULL, "active");
+}
+
 /*
  * Ok, now we can initialize the rest of the tty devices and can count
  * on memory allocations, interrupts etc..
@@ -3245,15 +3281,18 @@ int __init tty_init(void)
 	if (cdev_add(&tty_cdev, MKDEV(TTYAUX_MAJOR, 0), 1) ||
 	    register_chrdev_region(MKDEV(TTYAUX_MAJOR, 0), 1, "/dev/tty") < 0)
 		panic("Couldn't register /dev/tty driver\n");
-	device_create(tty_class, NULL, MKDEV(TTYAUX_MAJOR, 0), NULL,
-			      "tty");
+	device_create(tty_class, NULL, MKDEV(TTYAUX_MAJOR, 0), NULL, "tty");
 
 	cdev_init(&console_cdev, &console_fops);
 	if (cdev_add(&console_cdev, MKDEV(TTYAUX_MAJOR, 1), 1) ||
 	    register_chrdev_region(MKDEV(TTYAUX_MAJOR, 1), 1, "/dev/console") < 0)
 		panic("Couldn't register /dev/console driver\n");
-	device_create(tty_class, NULL, MKDEV(TTYAUX_MAJOR, 1), NULL,
+	consdev = device_create(tty_class, NULL, MKDEV(TTYAUX_MAJOR, 1), NULL,
 			      "console");
+	if (IS_ERR(consdev))
+		consdev = NULL;
+	else
+		device_create_file(consdev, &dev_attr_active);
 
 #ifdef CONFIG_VT
 	vty_init(&console_fops);
diff --git a/drivers/tty/vt/vt.c b/drivers/tty/vt/vt.c
index a8ec48ed14d9..76407eca9ab0 100644
--- a/drivers/tty/vt/vt.c
+++ b/drivers/tty/vt/vt.c
@@ -235,6 +235,14 @@ enum {
 	blank_vesa_wait,
 };
 
+/*
+ * /sys/class/tty/tty0/
+ *
+ * the attribute 'active' contains the name of the current vc
+ * console and it supports poll() to detect vc switches
+ */
+static struct device *tty0dev;
+
 /*
  * Notifier list for console events.
  */
@@ -688,6 +696,8 @@ void redraw_screen(struct vc_data *vc, int is_switch)
 			save_screen(old_vc);
 			set_origin(old_vc);
 		}
+		if (tty0dev)
+			sysfs_notify(&tty0dev->kobj, NULL, "active");
 	} else {
 		hide_cursor(vc);
 		redraw = 1;
@@ -2967,13 +2977,24 @@ static const struct tty_operations con_ops = {
 
 static struct cdev vc0_cdev;
 
+static ssize_t show_tty_active(struct device *dev,
+				struct device_attribute *attr, char *buf)
+{
+	return sprintf(buf, "tty%d\n", fg_console + 1);
+}
+static DEVICE_ATTR(active, S_IRUGO, show_tty_active, NULL);
+
 int __init vty_init(const struct file_operations *console_fops)
 {
 	cdev_init(&vc0_cdev, console_fops);
 	if (cdev_add(&vc0_cdev, MKDEV(TTY_MAJOR, 0), 1) ||
 	    register_chrdev_region(MKDEV(TTY_MAJOR, 0), 1, "/dev/vc/0") < 0)
 		panic("Couldn't register /dev/tty0 driver\n");
-	device_create(tty_class, NULL, MKDEV(TTY_MAJOR, 0), NULL, "tty0");
+	tty0dev = device_create(tty_class, NULL, MKDEV(TTY_MAJOR, 0), NULL, "tty0");
+	if (IS_ERR(tty0dev))
+		tty0dev = NULL;
+	else
+		device_create_file(tty0dev, &dev_attr_active);
 
 	vcs_init();
 
diff --git a/include/linux/console.h b/include/linux/console.h
index 875cfb1c8132..9774fe6a1a97 100644
--- a/include/linux/console.h
+++ b/include/linux/console.h
@@ -151,7 +151,7 @@ extern int is_console_locked(void);
 extern int braille_register_console(struct console *, int index,
 		char *console_options, char *braille_options);
 extern int braille_unregister_console(struct console *);
-
+extern void console_sysfs_notify(void);
 extern int console_suspend_enabled;
 
 /* Suspend and resume console messages over PM events */
diff --git a/kernel/printk.c b/kernel/printk.c
index bf0420a92a1a..5417784a76b5 100644
--- a/kernel/printk.c
+++ b/kernel/printk.c
@@ -1332,6 +1332,7 @@ void register_console(struct console *newcon)
 		spin_unlock_irqrestore(&logbuf_lock, flags);
 	}
 	release_console_sem();
+	console_sysfs_notify();
 
 	/*
 	 * By unregistering the bootconsoles after we enable the real console
@@ -1390,6 +1391,7 @@ int unregister_console(struct console *console)
 		console_drivers->flags |= CON_CONSDEV;
 
 	release_console_sem();
+	console_sysfs_notify();
 	return res;
 }
 EXPORT_SYMBOL(unregister_console);
-- 
cgit v1.2.3-71-gd317


From b76834bc1b6db0a0923eed85c81b1113021b0612 Mon Sep 17 00:00:00 2001
From: Christoph Lameter <cl@linux.com>
Date: Mon, 6 Dec 2010 11:16:25 -0600
Subject: kprobes: Use this_cpu_ops

Use this_cpu ops in various places to optimize per cpu data access.

Cc: Jason Baron <jbaron@redhat.com>
Cc: Namhyung Kim <namhyung@gmail.com>
Acked-by: H. Peter Anvin <hpa@zytor.com>
Signed-off-by: Christoph Lameter <cl@linux.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 arch/x86/kernel/kprobes.c | 14 +++++++-------
 include/linux/kprobes.h   |  4 ++--
 kernel/kprobes.c          |  8 ++++----
 3 files changed, 13 insertions(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c
index 1cbd54c0df99..572ecc88ca40 100644
--- a/arch/x86/kernel/kprobes.c
+++ b/arch/x86/kernel/kprobes.c
@@ -403,7 +403,7 @@ static void __kprobes save_previous_kprobe(struct kprobe_ctlblk *kcb)
 
 static void __kprobes restore_previous_kprobe(struct kprobe_ctlblk *kcb)
 {
-	__get_cpu_var(current_kprobe) = kcb->prev_kprobe.kp;
+	__this_cpu_write(current_kprobe, kcb->prev_kprobe.kp);
 	kcb->kprobe_status = kcb->prev_kprobe.status;
 	kcb->kprobe_old_flags = kcb->prev_kprobe.old_flags;
 	kcb->kprobe_saved_flags = kcb->prev_kprobe.saved_flags;
@@ -412,7 +412,7 @@ static void __kprobes restore_previous_kprobe(struct kprobe_ctlblk *kcb)
 static void __kprobes set_current_kprobe(struct kprobe *p, struct pt_regs *regs,
 				struct kprobe_ctlblk *kcb)
 {
-	__get_cpu_var(current_kprobe) = p;
+	__this_cpu_write(current_kprobe, p);
 	kcb->kprobe_saved_flags = kcb->kprobe_old_flags
 		= (regs->flags & (X86_EFLAGS_TF | X86_EFLAGS_IF));
 	if (is_IF_modifier(p->ainsn.insn))
@@ -586,7 +586,7 @@ static int __kprobes kprobe_handler(struct pt_regs *regs)
 		preempt_enable_no_resched();
 		return 1;
 	} else if (kprobe_running()) {
-		p = __get_cpu_var(current_kprobe);
+		p = __this_cpu_read(current_kprobe);
 		if (p->break_handler && p->break_handler(p, regs)) {
 			setup_singlestep(p, regs, kcb, 0);
 			return 1;
@@ -759,11 +759,11 @@ static __used __kprobes void *trampoline_handler(struct pt_regs *regs)
 
 		orig_ret_address = (unsigned long)ri->ret_addr;
 		if (ri->rp && ri->rp->handler) {
-			__get_cpu_var(current_kprobe) = &ri->rp->kp;
+			__this_cpu_write(current_kprobe, &ri->rp->kp);
 			get_kprobe_ctlblk()->kprobe_status = KPROBE_HIT_ACTIVE;
 			ri->ret_addr = correct_ret_addr;
 			ri->rp->handler(ri, regs);
-			__get_cpu_var(current_kprobe) = NULL;
+			__this_cpu_write(current_kprobe, NULL);
 		}
 
 		recycle_rp_inst(ri, &empty_rp);
@@ -1198,10 +1198,10 @@ static void __kprobes optimized_callback(struct optimized_kprobe *op,
 		regs->ip = (unsigned long)op->kp.addr + INT3_SIZE;
 		regs->orig_ax = ~0UL;
 
-		__get_cpu_var(current_kprobe) = &op->kp;
+		__this_cpu_write(current_kprobe, &op->kp);
 		kcb->kprobe_status = KPROBE_HIT_ACTIVE;
 		opt_pre_handler(&op->kp, regs);
-		__get_cpu_var(current_kprobe) = NULL;
+		__this_cpu_write(current_kprobe, NULL);
 	}
 	preempt_enable_no_resched();
 }
diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h
index e7d1b2e0070d..0c251e9f0507 100644
--- a/include/linux/kprobes.h
+++ b/include/linux/kprobes.h
@@ -303,12 +303,12 @@ struct hlist_head * kretprobe_inst_table_head(struct task_struct *tsk);
 /* kprobe_running() will just return the current_kprobe on this CPU */
 static inline struct kprobe *kprobe_running(void)
 {
-	return (__get_cpu_var(current_kprobe));
+	return (__this_cpu_read(current_kprobe));
 }
 
 static inline void reset_current_kprobe(void)
 {
-	__get_cpu_var(current_kprobe) = NULL;
+	__this_cpu_write(current_kprobe, NULL);
 }
 
 static inline struct kprobe_ctlblk *get_kprobe_ctlblk(void)
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index 9737a76e106f..732f1e9b65ee 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -317,12 +317,12 @@ void __kprobes free_optinsn_slot(kprobe_opcode_t * slot, int dirty)
 /* We have preemption disabled.. so it is safe to use __ versions */
 static inline void set_kprobe_instance(struct kprobe *kp)
 {
-	__get_cpu_var(kprobe_instance) = kp;
+	__this_cpu_write(kprobe_instance, kp);
 }
 
 static inline void reset_kprobe_instance(void)
 {
-	__get_cpu_var(kprobe_instance) = NULL;
+	__this_cpu_write(kprobe_instance, NULL);
 }
 
 /*
@@ -775,7 +775,7 @@ static void __kprobes aggr_post_handler(struct kprobe *p, struct pt_regs *regs,
 static int __kprobes aggr_fault_handler(struct kprobe *p, struct pt_regs *regs,
 					int trapnr)
 {
-	struct kprobe *cur = __get_cpu_var(kprobe_instance);
+	struct kprobe *cur = __this_cpu_read(kprobe_instance);
 
 	/*
 	 * if we faulted "during" the execution of a user specified
@@ -790,7 +790,7 @@ static int __kprobes aggr_fault_handler(struct kprobe *p, struct pt_regs *regs,
 
 static int __kprobes aggr_break_handler(struct kprobe *p, struct pt_regs *regs)
 {
-	struct kprobe *cur = __get_cpu_var(kprobe_instance);
+	struct kprobe *cur = __this_cpu_read(kprobe_instance);
 	int ret = 0;
 
 	if (cur && cur->break_handler) {
-- 
cgit v1.2.3-71-gd317


From 909ea96468096b07fbb41aaf69be060d92bd9271 Mon Sep 17 00:00:00 2001
From: Christoph Lameter <cl@linux.com>
Date: Wed, 8 Dec 2010 16:22:55 +0100
Subject: core: Replace __get_cpu_var with __this_cpu_read if not used for an
 address.

__get_cpu_var() can be replaced with this_cpu_read and will then use a
single read instruction with implied address calculation to access the
correct per cpu instance.

However, the address of a per cpu variable passed to __this_cpu_read()
cannot be determined (since it's an implied address conversion through
segment prefixes).  Therefore apply this only to uses of __get_cpu_var
where the address of the variable is not used.

Cc: Pekka Enberg <penberg@cs.helsinki.fi>
Cc: Hugh Dickins <hughd@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Acked-by: H. Peter Anvin <hpa@zytor.com>
Signed-off-by: Christoph Lameter <cl@linux.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 include/asm-generic/irq_regs.h |  8 ++++----
 include/linux/elevator.h       | 12 +++---------
 include/linux/kernel_stat.h    |  2 +-
 kernel/exit.c                  |  2 +-
 kernel/fork.c                  |  2 +-
 kernel/hrtimer.c               |  2 +-
 kernel/printk.c                |  4 ++--
 kernel/rcutree.c               |  4 ++--
 kernel/softirq.c               | 42 +++++++++++++++++++++---------------------
 kernel/time/tick-common.c      |  2 +-
 kernel/time/tick-oneshot.c     |  4 ++--
 kernel/watchdog.c              | 36 ++++++++++++++++++------------------
 mm/slab.c                      |  6 +++---
 13 files changed, 60 insertions(+), 66 deletions(-)

(limited to 'include/linux')

diff --git a/include/asm-generic/irq_regs.h b/include/asm-generic/irq_regs.h
index 5ae1d07d4a12..6bf9355fa7eb 100644
--- a/include/asm-generic/irq_regs.h
+++ b/include/asm-generic/irq_regs.h
@@ -22,15 +22,15 @@ DECLARE_PER_CPU(struct pt_regs *, __irq_regs);
 
 static inline struct pt_regs *get_irq_regs(void)
 {
-	return __get_cpu_var(__irq_regs);
+	return __this_cpu_read(__irq_regs);
 }
 
 static inline struct pt_regs *set_irq_regs(struct pt_regs *new_regs)
 {
-	struct pt_regs *old_regs, **pp_regs = &__get_cpu_var(__irq_regs);
+	struct pt_regs *old_regs;
 
-	old_regs = *pp_regs;
-	*pp_regs = new_regs;
+	old_regs = __this_cpu_read(__irq_regs);
+	__this_cpu_write(__irq_regs, new_regs);
 	return old_regs;
 }
 
diff --git a/include/linux/elevator.h b/include/linux/elevator.h
index 4fd978e7eb83..4d857973d2c9 100644
--- a/include/linux/elevator.h
+++ b/include/linux/elevator.h
@@ -195,15 +195,9 @@ enum {
 /*
  * io context count accounting
  */
-#define elv_ioc_count_mod(name, __val)				\
-	do {							\
-		preempt_disable();				\
-		__get_cpu_var(name) += (__val);			\
-		preempt_enable();				\
-	} while (0)
-
-#define elv_ioc_count_inc(name)	elv_ioc_count_mod(name, 1)
-#define elv_ioc_count_dec(name)	elv_ioc_count_mod(name, -1)
+#define elv_ioc_count_mod(name, __val) this_cpu_add(name, __val)
+#define elv_ioc_count_inc(name)	this_cpu_inc(name)
+#define elv_ioc_count_dec(name)	this_cpu_dec(name)
 
 #define elv_ioc_count_read(name)				\
 ({								\
diff --git a/include/linux/kernel_stat.h b/include/linux/kernel_stat.h
index ad54c846911b..44e83ba12b5b 100644
--- a/include/linux/kernel_stat.h
+++ b/include/linux/kernel_stat.h
@@ -47,7 +47,7 @@ extern unsigned long long nr_context_switches(void);
 
 #ifndef CONFIG_GENERIC_HARDIRQS
 #define kstat_irqs_this_cpu(irq) \
-	(kstat_this_cpu.irqs[irq])
+	(this_cpu_read(kstat.irqs[irq])
 
 struct irq_desc;
 
diff --git a/kernel/exit.c b/kernel/exit.c
index 676149a4ac5f..89c74861a3da 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -69,7 +69,7 @@ static void __unhash_process(struct task_struct *p, bool group_dead)
 
 		list_del_rcu(&p->tasks);
 		list_del_init(&p->sibling);
-		__get_cpu_var(process_counts)--;
+		__this_cpu_dec(process_counts);
 	}
 	list_del_rcu(&p->thread_group);
 }
diff --git a/kernel/fork.c b/kernel/fork.c
index 3b159c5991b7..e05e27de67df 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1282,7 +1282,7 @@ static struct task_struct *copy_process(unsigned long clone_flags,
 			attach_pid(p, PIDTYPE_SID, task_session(current));
 			list_add_tail(&p->sibling, &p->real_parent->children);
 			list_add_tail_rcu(&p->tasks, &init_task.tasks);
-			__get_cpu_var(process_counts)++;
+			__this_cpu_inc(process_counts);
 		}
 		attach_pid(p, PIDTYPE_PID, pid);
 		nr_threads++;
diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c
index 72206cf5c6cf..29de5ae4ca95 100644
--- a/kernel/hrtimer.c
+++ b/kernel/hrtimer.c
@@ -497,7 +497,7 @@ static inline int hrtimer_is_hres_enabled(void)
  */
 static inline int hrtimer_hres_active(void)
 {
-	return __get_cpu_var(hrtimer_bases).hres_active;
+	return __this_cpu_read(hrtimer_bases.hres_active);
 }
 
 /*
diff --git a/kernel/printk.c b/kernel/printk.c
index 9a2264fc42ca..b032317f9964 100644
--- a/kernel/printk.c
+++ b/kernel/printk.c
@@ -1074,8 +1074,8 @@ static DEFINE_PER_CPU(int, printk_pending);
 
 void printk_tick(void)
 {
-	if (__get_cpu_var(printk_pending)) {
-		__get_cpu_var(printk_pending) = 0;
+	if (__this_cpu_read(printk_pending)) {
+		__this_cpu_write(printk_pending, 0);
 		wake_up_interruptible(&log_wait);
 	}
 }
diff --git a/kernel/rcutree.c b/kernel/rcutree.c
index ccdc04c47981..aeebf772d6a2 100644
--- a/kernel/rcutree.c
+++ b/kernel/rcutree.c
@@ -367,8 +367,8 @@ void rcu_irq_exit(void)
 	WARN_ON_ONCE(rdtp->dynticks & 0x1);
 
 	/* If the interrupt queued a callback, get out of dyntick mode. */
-	if (__get_cpu_var(rcu_sched_data).nxtlist ||
-	    __get_cpu_var(rcu_bh_data).nxtlist)
+	if (__this_cpu_read(rcu_sched_data.nxtlist) ||
+	    __this_cpu_read(rcu_bh_data.nxtlist))
 		set_need_resched();
 }
 
diff --git a/kernel/softirq.c b/kernel/softirq.c
index 18f4be0d5fe0..d0a0dda52c1a 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -70,7 +70,7 @@ char *softirq_to_name[NR_SOFTIRQS] = {
 static void wakeup_softirqd(void)
 {
 	/* Interrupts are disabled: no need to stop preemption */
-	struct task_struct *tsk = __get_cpu_var(ksoftirqd);
+	struct task_struct *tsk = __this_cpu_read(ksoftirqd);
 
 	if (tsk && tsk->state != TASK_RUNNING)
 		wake_up_process(tsk);
@@ -388,8 +388,8 @@ void __tasklet_schedule(struct tasklet_struct *t)
 
 	local_irq_save(flags);
 	t->next = NULL;
-	*__get_cpu_var(tasklet_vec).tail = t;
-	__get_cpu_var(tasklet_vec).tail = &(t->next);
+	*__this_cpu_read(tasklet_vec.tail) = t;
+	__this_cpu_write(tasklet_vec.tail, &(t->next));
 	raise_softirq_irqoff(TASKLET_SOFTIRQ);
 	local_irq_restore(flags);
 }
@@ -402,8 +402,8 @@ void __tasklet_hi_schedule(struct tasklet_struct *t)
 
 	local_irq_save(flags);
 	t->next = NULL;
-	*__get_cpu_var(tasklet_hi_vec).tail = t;
-	__get_cpu_var(tasklet_hi_vec).tail = &(t->next);
+	*__this_cpu_read(tasklet_hi_vec.tail) = t;
+	__this_cpu_write(tasklet_hi_vec.tail,  &(t->next));
 	raise_softirq_irqoff(HI_SOFTIRQ);
 	local_irq_restore(flags);
 }
@@ -414,8 +414,8 @@ void __tasklet_hi_schedule_first(struct tasklet_struct *t)
 {
 	BUG_ON(!irqs_disabled());
 
-	t->next = __get_cpu_var(tasklet_hi_vec).head;
-	__get_cpu_var(tasklet_hi_vec).head = t;
+	t->next = __this_cpu_read(tasklet_hi_vec.head);
+	__this_cpu_write(tasklet_hi_vec.head, t);
 	__raise_softirq_irqoff(HI_SOFTIRQ);
 }
 
@@ -426,9 +426,9 @@ static void tasklet_action(struct softirq_action *a)
 	struct tasklet_struct *list;
 
 	local_irq_disable();
-	list = __get_cpu_var(tasklet_vec).head;
-	__get_cpu_var(tasklet_vec).head = NULL;
-	__get_cpu_var(tasklet_vec).tail = &__get_cpu_var(tasklet_vec).head;
+	list = __this_cpu_read(tasklet_vec.head);
+	__this_cpu_write(tasklet_vec.head, NULL);
+	__this_cpu_write(tasklet_vec.tail, &__get_cpu_var(tasklet_vec).head);
 	local_irq_enable();
 
 	while (list) {
@@ -449,8 +449,8 @@ static void tasklet_action(struct softirq_action *a)
 
 		local_irq_disable();
 		t->next = NULL;
-		*__get_cpu_var(tasklet_vec).tail = t;
-		__get_cpu_var(tasklet_vec).tail = &(t->next);
+		*__this_cpu_read(tasklet_vec.tail) = t;
+		__this_cpu_write(tasklet_vec.tail, &(t->next));
 		__raise_softirq_irqoff(TASKLET_SOFTIRQ);
 		local_irq_enable();
 	}
@@ -461,9 +461,9 @@ static void tasklet_hi_action(struct softirq_action *a)
 	struct tasklet_struct *list;
 
 	local_irq_disable();
-	list = __get_cpu_var(tasklet_hi_vec).head;
-	__get_cpu_var(tasklet_hi_vec).head = NULL;
-	__get_cpu_var(tasklet_hi_vec).tail = &__get_cpu_var(tasklet_hi_vec).head;
+	list = __this_cpu_read(tasklet_hi_vec.head);
+	__this_cpu_write(tasklet_hi_vec.head, NULL);
+	__this_cpu_write(tasklet_hi_vec.tail, &__get_cpu_var(tasklet_hi_vec).head);
 	local_irq_enable();
 
 	while (list) {
@@ -484,8 +484,8 @@ static void tasklet_hi_action(struct softirq_action *a)
 
 		local_irq_disable();
 		t->next = NULL;
-		*__get_cpu_var(tasklet_hi_vec).tail = t;
-		__get_cpu_var(tasklet_hi_vec).tail = &(t->next);
+		*__this_cpu_read(tasklet_hi_vec.tail) = t;
+		__this_cpu_write(tasklet_hi_vec.tail, &(t->next));
 		__raise_softirq_irqoff(HI_SOFTIRQ);
 		local_irq_enable();
 	}
@@ -802,16 +802,16 @@ static void takeover_tasklets(unsigned int cpu)
 
 	/* Find end, append list for that CPU. */
 	if (&per_cpu(tasklet_vec, cpu).head != per_cpu(tasklet_vec, cpu).tail) {
-		*(__get_cpu_var(tasklet_vec).tail) = per_cpu(tasklet_vec, cpu).head;
-		__get_cpu_var(tasklet_vec).tail = per_cpu(tasklet_vec, cpu).tail;
+		*__this_cpu_read(tasklet_vec.tail) = per_cpu(tasklet_vec, cpu).head;
+		this_cpu_write(tasklet_vec.tail, per_cpu(tasklet_vec, cpu).tail);
 		per_cpu(tasklet_vec, cpu).head = NULL;
 		per_cpu(tasklet_vec, cpu).tail = &per_cpu(tasklet_vec, cpu).head;
 	}
 	raise_softirq_irqoff(TASKLET_SOFTIRQ);
 
 	if (&per_cpu(tasklet_hi_vec, cpu).head != per_cpu(tasklet_hi_vec, cpu).tail) {
-		*__get_cpu_var(tasklet_hi_vec).tail = per_cpu(tasklet_hi_vec, cpu).head;
-		__get_cpu_var(tasklet_hi_vec).tail = per_cpu(tasklet_hi_vec, cpu).tail;
+		*__this_cpu_read(tasklet_hi_vec.tail) = per_cpu(tasklet_hi_vec, cpu).head;
+		__this_cpu_write(tasklet_hi_vec.tail, per_cpu(tasklet_hi_vec, cpu).tail);
 		per_cpu(tasklet_hi_vec, cpu).head = NULL;
 		per_cpu(tasklet_hi_vec, cpu).tail = &per_cpu(tasklet_hi_vec, cpu).head;
 	}
diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c
index b6b898d2eeef..051bc80a0c43 100644
--- a/kernel/time/tick-common.c
+++ b/kernel/time/tick-common.c
@@ -49,7 +49,7 @@ struct tick_device *tick_get_device(int cpu)
  */
 int tick_is_oneshot_available(void)
 {
-	struct clock_event_device *dev = __get_cpu_var(tick_cpu_device).evtdev;
+	struct clock_event_device *dev = __this_cpu_read(tick_cpu_device.evtdev);
 
 	return dev && (dev->features & CLOCK_EVT_FEAT_ONESHOT);
 }
diff --git a/kernel/time/tick-oneshot.c b/kernel/time/tick-oneshot.c
index aada0e52680a..5cbc101f908b 100644
--- a/kernel/time/tick-oneshot.c
+++ b/kernel/time/tick-oneshot.c
@@ -95,7 +95,7 @@ int tick_dev_program_event(struct clock_event_device *dev, ktime_t expires,
  */
 int tick_program_event(ktime_t expires, int force)
 {
-	struct clock_event_device *dev = __get_cpu_var(tick_cpu_device).evtdev;
+	struct clock_event_device *dev = __this_cpu_read(tick_cpu_device.evtdev);
 
 	return tick_dev_program_event(dev, expires, force);
 }
@@ -167,7 +167,7 @@ int tick_oneshot_mode_active(void)
 	int ret;
 
 	local_irq_save(flags);
-	ret = __get_cpu_var(tick_cpu_device).mode == TICKDEV_MODE_ONESHOT;
+	ret = __this_cpu_read(tick_cpu_device.mode) == TICKDEV_MODE_ONESHOT;
 	local_irq_restore(flags);
 
 	return ret;
diff --git a/kernel/watchdog.c b/kernel/watchdog.c
index 6e3c41a4024c..8037a86106ed 100644
--- a/kernel/watchdog.c
+++ b/kernel/watchdog.c
@@ -116,12 +116,12 @@ static void __touch_watchdog(void)
 {
 	int this_cpu = smp_processor_id();
 
-	__get_cpu_var(watchdog_touch_ts) = get_timestamp(this_cpu);
+	__this_cpu_write(watchdog_touch_ts, get_timestamp(this_cpu));
 }
 
 void touch_softlockup_watchdog(void)
 {
-	__raw_get_cpu_var(watchdog_touch_ts) = 0;
+	__this_cpu_write(watchdog_touch_ts, 0);
 }
 EXPORT_SYMBOL(touch_softlockup_watchdog);
 
@@ -165,12 +165,12 @@ void touch_softlockup_watchdog_sync(void)
 /* watchdog detector functions */
 static int is_hardlockup(void)
 {
-	unsigned long hrint = __get_cpu_var(hrtimer_interrupts);
+	unsigned long hrint = __this_cpu_read(hrtimer_interrupts);
 
-	if (__get_cpu_var(hrtimer_interrupts_saved) == hrint)
+	if (__this_cpu_read(hrtimer_interrupts_saved) == hrint)
 		return 1;
 
-	__get_cpu_var(hrtimer_interrupts_saved) = hrint;
+	__this_cpu_write(hrtimer_interrupts_saved, hrint);
 	return 0;
 }
 #endif
@@ -203,8 +203,8 @@ static void watchdog_overflow_callback(struct perf_event *event, int nmi,
 	/* Ensure the watchdog never gets throttled */
 	event->hw.interrupts = 0;
 
-	if (__get_cpu_var(watchdog_nmi_touch) == true) {
-		__get_cpu_var(watchdog_nmi_touch) = false;
+	if (__this_cpu_read(watchdog_nmi_touch) == true) {
+		__this_cpu_write(watchdog_nmi_touch, false);
 		return;
 	}
 
@@ -218,7 +218,7 @@ static void watchdog_overflow_callback(struct perf_event *event, int nmi,
 		int this_cpu = smp_processor_id();
 
 		/* only print hardlockups once */
-		if (__get_cpu_var(hard_watchdog_warn) == true)
+		if (__this_cpu_read(hard_watchdog_warn) == true)
 			return;
 
 		if (hardlockup_panic)
@@ -226,16 +226,16 @@ static void watchdog_overflow_callback(struct perf_event *event, int nmi,
 		else
 			WARN(1, "Watchdog detected hard LOCKUP on cpu %d", this_cpu);
 
-		__get_cpu_var(hard_watchdog_warn) = true;
+		__this_cpu_write(hard_watchdog_warn, true);
 		return;
 	}
 
-	__get_cpu_var(hard_watchdog_warn) = false;
+	__this_cpu_write(hard_watchdog_warn, false);
 	return;
 }
 static void watchdog_interrupt_count(void)
 {
-	__get_cpu_var(hrtimer_interrupts)++;
+	__this_cpu_inc(hrtimer_interrupts);
 }
 #else
 static inline void watchdog_interrupt_count(void) { return; }
@@ -244,7 +244,7 @@ static inline void watchdog_interrupt_count(void) { return; }
 /* watchdog kicker functions */
 static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)
 {
-	unsigned long touch_ts = __get_cpu_var(watchdog_touch_ts);
+	unsigned long touch_ts = __this_cpu_read(watchdog_touch_ts);
 	struct pt_regs *regs = get_irq_regs();
 	int duration;
 
@@ -252,18 +252,18 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)
 	watchdog_interrupt_count();
 
 	/* kick the softlockup detector */
-	wake_up_process(__get_cpu_var(softlockup_watchdog));
+	wake_up_process(__this_cpu_read(softlockup_watchdog));
 
 	/* .. and repeat */
 	hrtimer_forward_now(hrtimer, ns_to_ktime(get_sample_period()));
 
 	if (touch_ts == 0) {
-		if (unlikely(__get_cpu_var(softlockup_touch_sync))) {
+		if (unlikely(__this_cpu_read(softlockup_touch_sync))) {
 			/*
 			 * If the time stamp was touched atomically
 			 * make sure the scheduler tick is up to date.
 			 */
-			__get_cpu_var(softlockup_touch_sync) = false;
+			__this_cpu_write(softlockup_touch_sync, false);
 			sched_clock_tick();
 		}
 		__touch_watchdog();
@@ -279,7 +279,7 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)
 	duration = is_softlockup(touch_ts);
 	if (unlikely(duration)) {
 		/* only warn once */
-		if (__get_cpu_var(soft_watchdog_warn) == true)
+		if (__this_cpu_read(soft_watchdog_warn) == true)
 			return HRTIMER_RESTART;
 
 		printk(KERN_ERR "BUG: soft lockup - CPU#%d stuck for %us! [%s:%d]\n",
@@ -294,9 +294,9 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)
 
 		if (softlockup_panic)
 			panic("softlockup: hung tasks");
-		__get_cpu_var(soft_watchdog_warn) = true;
+		__this_cpu_write(soft_watchdog_warn, true);
 	} else
-		__get_cpu_var(soft_watchdog_warn) = false;
+		__this_cpu_write(soft_watchdog_warn, false);
 
 	return HRTIMER_RESTART;
 }
diff --git a/mm/slab.c b/mm/slab.c
index b1e40dafbab3..316d75596f3c 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -829,12 +829,12 @@ static void init_reap_node(int cpu)
 
 static void next_reap_node(void)
 {
-	int node = __get_cpu_var(slab_reap_node);
+	int node = __this_cpu_read(slab_reap_node);
 
 	node = next_node(node, node_online_map);
 	if (unlikely(node >= MAX_NUMNODES))
 		node = first_node(node_online_map);
-	__get_cpu_var(slab_reap_node) = node;
+	__this_cpu_write(slab_reap_node, node);
 }
 
 #else
@@ -1012,7 +1012,7 @@ static void __drain_alien_cache(struct kmem_cache *cachep,
  */
 static void reap_alien(struct kmem_cache *cachep, struct kmem_list3 *l3)
 {
-	int node = __get_cpu_var(slab_reap_node);
+	int node = __this_cpu_read(slab_reap_node);
 
 	if (l3->alien) {
 		struct array_cache *ac = l3->alien[node];
-- 
cgit v1.2.3-71-gd317


From a663ffff1d2e94a7c549a37d08ed9169ce83bdd6 Mon Sep 17 00:00:00 2001
From: Christoph Lameter <cl@linux.com>
Date: Mon, 6 Dec 2010 11:39:59 -0600
Subject: percpu: Generic support for this_cpu_add, sub, dec, inc_return

Introduce generic support for this_cpu_add_return etc.

The fallback is to realize these operations with simpler __this_cpu_ops.

tj: - Reformatted __cpu_size_call_return2() to make it more consistent
      with its neighbors.
    - Dropped unnecessary temp variable ret__ from
      __this_cpu_generic_add_return().

Reviewed-by: Tejun Heo <tj@kernel.org>
Reviewed-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Acked-by: H. Peter Anvin <hpa@zytor.com>
Signed-off-by: Christoph Lameter <cl@linux.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 include/linux/percpu.h | 71 ++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 71 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/percpu.h b/include/linux/percpu.h
index 5095b834a6fb..4d593defc47d 100644
--- a/include/linux/percpu.h
+++ b/include/linux/percpu.h
@@ -240,6 +240,21 @@ extern void __bad_size_call_parameter(void);
 	pscr_ret__;							\
 })
 
+#define __pcpu_size_call_return2(stem, variable, ...)			\
+({									\
+	typeof(variable) pscr2_ret__;					\
+	__verify_pcpu_ptr(&(variable));					\
+	switch(sizeof(variable)) {					\
+	case 1: pscr2_ret__ = stem##1(variable, __VA_ARGS__); break;	\
+	case 2: pscr2_ret__ = stem##2(variable, __VA_ARGS__); break;	\
+	case 4: pscr2_ret__ = stem##4(variable, __VA_ARGS__); break;	\
+	case 8: pscr2_ret__ = stem##8(variable, __VA_ARGS__); break;	\
+	default:							\
+		__bad_size_call_parameter(); break;			\
+	}								\
+	pscr2_ret__;							\
+})
+
 #define __pcpu_size_call(stem, variable, ...)				\
 do {									\
 	__verify_pcpu_ptr(&(variable));					\
@@ -529,6 +544,62 @@ do {									\
 # define __this_cpu_xor(pcp, val)	__pcpu_size_call(__this_cpu_xor_, (pcp), (val))
 #endif
 
+#define _this_cpu_generic_add_return(pcp, val)				\
+({									\
+	typeof(pcp) ret__;						\
+	preempt_disable();						\
+	__this_cpu_add(pcp, val);					\
+	ret__ = __this_cpu_read(pcp);					\
+	preempt_enable();						\
+	ret__;								\
+})
+
+#ifndef this_cpu_add_return
+# ifndef this_cpu_add_return_1
+#  define this_cpu_add_return_1(pcp, val)	_this_cpu_generic_add_return(pcp, val)
+# endif
+# ifndef this_cpu_add_return_2
+#  define this_cpu_add_return_2(pcp, val)	_this_cpu_generic_add_return(pcp, val)
+# endif
+# ifndef this_cpu_add_return_4
+#  define this_cpu_add_return_4(pcp, val)	_this_cpu_generic_add_return(pcp, val)
+# endif
+# ifndef this_cpu_add_return_8
+#  define this_cpu_add_return_8(pcp, val)	_this_cpu_generic_add_return(pcp, val)
+# endif
+# define this_cpu_add_return(pcp, val)	__pcpu_size_call_return2(this_cpu_add_return_, pcp, val)
+#endif
+
+#define this_cpu_sub_return(pcp, val)	this_cpu_add_return(pcp, -(val))
+#define this_cpu_inc_return(pcp)	this_cpu_add_return(pcp, 1)
+#define this_cpu_dec_return(pcp)	this_cpu_add_return(pcp, -1)
+
+#define __this_cpu_generic_add_return(pcp, val)				\
+({									\
+	__this_cpu_add(pcp, val);					\
+	__this_cpu_read(pcp);						\
+})
+
+#ifndef __this_cpu_add_return
+# ifndef __this_cpu_add_return_1
+#  define __this_cpu_add_return_1(pcp, val)	__this_cpu_generic_add_return(pcp, val)
+# endif
+# ifndef __this_cpu_add_return_2
+#  define __this_cpu_add_return_2(pcp, val)	__this_cpu_generic_add_return(pcp, val)
+# endif
+# ifndef __this_cpu_add_return_4
+#  define __this_cpu_add_return_4(pcp, val)	__this_cpu_generic_add_return(pcp, val)
+# endif
+# ifndef __this_cpu_add_return_8
+#  define __this_cpu_add_return_8(pcp, val)	__this_cpu_generic_add_return(pcp, val)
+# endif
+# define __this_cpu_add_return(pcp, val)	__pcpu_size_call_return2(this_cpu_add_return_, pcp, val)
+#endif
+
+#define __this_cpu_sub_return(pcp, val)	this_cpu_add_return(pcp, -(val))
+#define __this_cpu_inc_return(pcp)	this_cpu_add_return(pcp, 1)
+#define __this_cpu_dec_return(pcp)	this_cpu_add_return(pcp, -1)
+
 /*
  * IRQ safe versions of the per cpu RMW operations. Note that these operations
  * are *not* safe against modification of the same variable from another
-- 
cgit v1.2.3-71-gd317


From cfb824349556904b319464139be5c75fce983b0d Mon Sep 17 00:00:00 2001
From: Christoph Lameter <cl@linux.com>
Date: Mon, 6 Dec 2010 11:40:03 -0600
Subject: highmem: Use this_cpu_xx_return() operations

Use this_cpu operations to optimize access primitives for highmem.

The main effect is the avoidance of address calculations through the
use of a segment prefix.

V3->V4
	- kmap_atomic_idx: Do not return a value.
	- Use __this_cpu_dec without HIGHMEM_DEBUG

Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Acked-by: H. Peter Anvin <hpa@zytor.com>
Signed-off-by: Christoph Lameter <cl@linux.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 include/linux/highmem.h | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/highmem.h b/include/linux/highmem.h
index b676c585574e..3a93f73a8acc 100644
--- a/include/linux/highmem.h
+++ b/include/linux/highmem.h
@@ -81,7 +81,8 @@ DECLARE_PER_CPU(int, __kmap_atomic_idx);
 
 static inline int kmap_atomic_idx_push(void)
 {
-	int idx = __get_cpu_var(__kmap_atomic_idx)++;
+	int idx = __this_cpu_inc_return(__kmap_atomic_idx) - 1;
+
 #ifdef CONFIG_DEBUG_HIGHMEM
 	WARN_ON_ONCE(in_irq() && !irqs_disabled());
 	BUG_ON(idx > KM_TYPE_NR);
@@ -91,16 +92,18 @@ static inline int kmap_atomic_idx_push(void)
 
 static inline int kmap_atomic_idx(void)
 {
-	return __get_cpu_var(__kmap_atomic_idx) - 1;
+	return __this_cpu_read(__kmap_atomic_idx) - 1;
 }
 
-static inline int kmap_atomic_idx_pop(void)
+static inline void kmap_atomic_idx_pop(void)
 {
-	int idx = --__get_cpu_var(__kmap_atomic_idx);
 #ifdef CONFIG_DEBUG_HIGHMEM
+	int idx = __this_cpu_dec_return(__kmap_atomic_idx);
+
 	BUG_ON(idx < 0);
+#else
+	__this_cpu_dec(__kmap_atomic_idx);
 #endif
-	return idx;
 }
 
 #endif
-- 
cgit v1.2.3-71-gd317


From 403047754cf690b012369b8fb563b738b88086e6 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Fri, 17 Dec 2010 15:47:04 +0100
Subject: percpu,x86: relocate this_cpu_add_return() and friends

- include/linux/percpu.h: this_cpu_add_return() and friends were
  located next to __this_cpu_add_return().  However, the overall
  organization is to first group by preemption safeness.  Relocate
  this_cpu_add_return() and friends to preemption-safe area.

- arch/x86/include/asm/percpu.h: Relocate percpu_add_return_op() after
  other more basic operations.  Relocate [__]this_cpu_add_return_8()
  so that they're first grouped by preemption safeness.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Christoph Lameter <cl@linux.com>
---
 arch/x86/include/asm/percpu.h | 71 +++++++++++++++++++++----------------------
 include/linux/percpu.h        | 60 ++++++++++++++++++------------------
 2 files changed, 65 insertions(+), 66 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h
index 38f9e965ff96..dd0cd4b6a76f 100644
--- a/arch/x86/include/asm/percpu.h
+++ b/arch/x86/include/asm/percpu.h
@@ -177,39 +177,6 @@ do {									\
 	}								\
 } while (0)
 
-/*
- * Add return operation
- */
-#define percpu_add_return_op(var, val)					\
-({									\
-	typeof(var) paro_ret__ = val;					\
-	switch (sizeof(var)) {						\
-	case 1:								\
-		asm("xaddb %0, "__percpu_arg(1)				\
-			    : "+q" (paro_ret__), "+m" (var)		\
-			    : : "memory");				\
-		break;							\
-	case 2:								\
-		asm("xaddw %0, "__percpu_arg(1)				\
-			    : "+r" (paro_ret__), "+m" (var)		\
-			    : : "memory");				\
-		break;							\
-	case 4:								\
-		asm("xaddl %0, "__percpu_arg(1)				\
-			    : "+r" (paro_ret__), "+m" (var)		\
-			    : : "memory");				\
-		break;							\
-	case 8:								\
-		asm("xaddq %0, "__percpu_arg(1)				\
-			    : "+re" (paro_ret__), "+m" (var)		\
-			    : : "memory");				\
-		break;							\
-	default: __bad_percpu_size();					\
-	}								\
-	paro_ret__ += val;						\
-	paro_ret__;							\
-})
-
 #define percpu_from_op(op, var, constraint)		\
 ({							\
 	typeof(var) pfo_ret__;				\
@@ -262,6 +229,39 @@ do {									\
 	}						\
 })
 
+/*
+ * Add return operation
+ */
+#define percpu_add_return_op(var, val)					\
+({									\
+	typeof(var) paro_ret__ = val;					\
+	switch (sizeof(var)) {						\
+	case 1:								\
+		asm("xaddb %0, "__percpu_arg(1)				\
+			    : "+q" (paro_ret__), "+m" (var)		\
+			    : : "memory");				\
+		break;							\
+	case 2:								\
+		asm("xaddw %0, "__percpu_arg(1)				\
+			    : "+r" (paro_ret__), "+m" (var)		\
+			    : : "memory");				\
+		break;							\
+	case 4:								\
+		asm("xaddl %0, "__percpu_arg(1)				\
+			    : "+r" (paro_ret__), "+m" (var)		\
+			    : : "memory");				\
+		break;							\
+	case 8:								\
+		asm("xaddq %0, "__percpu_arg(1)				\
+			    : "+re" (paro_ret__), "+m" (var)		\
+			    : : "memory");				\
+		break;							\
+	default: __bad_percpu_size();					\
+	}								\
+	paro_ret__ += val;						\
+	paro_ret__;							\
+})
+
 /*
  * percpu_read() makes gcc load the percpu variable every time it is
  * accessed while percpu_read_stable() allows the value to be cached.
@@ -352,6 +352,7 @@ do {									\
 #define __this_cpu_and_8(pcp, val)	percpu_to_op("and", (pcp), val)
 #define __this_cpu_or_8(pcp, val)	percpu_to_op("or", (pcp), val)
 #define __this_cpu_xor_8(pcp, val)	percpu_to_op("xor", (pcp), val)
+#define __this_cpu_add_return_8(pcp, val) percpu_add_return_op(pcp, val)
 
 #define this_cpu_read_8(pcp)		percpu_from_op("mov", (pcp), "m"(pcp))
 #define this_cpu_write_8(pcp, val)	percpu_to_op("mov", (pcp), val)
@@ -359,14 +360,12 @@ do {									\
 #define this_cpu_and_8(pcp, val)	percpu_to_op("and", (pcp), val)
 #define this_cpu_or_8(pcp, val)		percpu_to_op("or", (pcp), val)
 #define this_cpu_xor_8(pcp, val)	percpu_to_op("xor", (pcp), val)
+#define this_cpu_add_return_8(pcp, val)	percpu_add_return_op(pcp, val)
 
 #define irqsafe_cpu_add_8(pcp, val)	percpu_add_op((pcp), val)
 #define irqsafe_cpu_and_8(pcp, val)	percpu_to_op("and", (pcp), val)
 #define irqsafe_cpu_or_8(pcp, val)	percpu_to_op("or", (pcp), val)
 #define irqsafe_cpu_xor_8(pcp, val)	percpu_to_op("xor", (pcp), val)
-
-#define __this_cpu_add_return_8(pcp, val) percpu_add_return_op(pcp, val)
-#define this_cpu_add_return_8(pcp, val)	percpu_add_return_op(pcp, val)
 #endif
 
 /* This is not atomic against other CPUs -- CPU preemption needs to be off */
diff --git a/include/linux/percpu.h b/include/linux/percpu.h
index 4d593defc47d..3484e88d93f8 100644
--- a/include/linux/percpu.h
+++ b/include/linux/percpu.h
@@ -417,6 +417,36 @@ do {									\
 # define this_cpu_xor(pcp, val)		__pcpu_size_call(this_cpu_or_, (pcp), (val))
 #endif
 
+#define _this_cpu_generic_add_return(pcp, val)				\
+({									\
+	typeof(pcp) ret__;						\
+	preempt_disable();						\
+	__this_cpu_add(pcp, val);					\
+	ret__ = __this_cpu_read(pcp);					\
+	preempt_enable();						\
+	ret__;								\
+})
+
+#ifndef this_cpu_add_return
+# ifndef this_cpu_add_return_1
+#  define this_cpu_add_return_1(pcp, val)	_this_cpu_generic_add_return(pcp, val)
+# endif
+# ifndef this_cpu_add_return_2
+#  define this_cpu_add_return_2(pcp, val)	_this_cpu_generic_add_return(pcp, val)
+# endif
+# ifndef this_cpu_add_return_4
+#  define this_cpu_add_return_4(pcp, val)	_this_cpu_generic_add_return(pcp, val)
+# endif
+# ifndef this_cpu_add_return_8
+#  define this_cpu_add_return_8(pcp, val)	_this_cpu_generic_add_return(pcp, val)
+# endif
+# define this_cpu_add_return(pcp, val)	__pcpu_size_call_return2(this_cpu_add_return_, pcp, val)
+#endif
+
+#define this_cpu_sub_return(pcp, val)	this_cpu_add_return(pcp, -(val))
+#define this_cpu_inc_return(pcp)	this_cpu_add_return(pcp, 1)
+#define this_cpu_dec_return(pcp)	this_cpu_add_return(pcp, -1)
+
 /*
  * Generic percpu operations that do not require preemption handling.
  * Either we do not care about races or the caller has the
@@ -544,36 +574,6 @@ do {									\
 # define __this_cpu_xor(pcp, val)	__pcpu_size_call(__this_cpu_xor_, (pcp), (val))
 #endif
 
-#define _this_cpu_generic_add_return(pcp, val)				\
-({									\
-	typeof(pcp) ret__;						\
-	preempt_disable();						\
-	__this_cpu_add(pcp, val);					\
-	ret__ = __this_cpu_read(pcp);					\
-	preempt_enable();						\
-	ret__;								\
-})
-
-#ifndef this_cpu_add_return
-# ifndef this_cpu_add_return_1
-#  define this_cpu_add_return_1(pcp, val)	_this_cpu_generic_add_return(pcp, val)
-# endif
-# ifndef this_cpu_add_return_2
-#  define this_cpu_add_return_2(pcp, val)	_this_cpu_generic_add_return(pcp, val)
-# endif
-# ifndef this_cpu_add_return_4
-#  define this_cpu_add_return_4(pcp, val)	_this_cpu_generic_add_return(pcp, val)
-# endif
-# ifndef this_cpu_add_return_8
-#  define this_cpu_add_return_8(pcp, val)	_this_cpu_generic_add_return(pcp, val)
-# endif
-# define this_cpu_add_return(pcp, val)	__pcpu_size_call_return2(this_cpu_add_return_, pcp, val)
-#endif
-
-#define this_cpu_sub_return(pcp, val)	this_cpu_add_return(pcp, -(val))
-#define this_cpu_inc_return(pcp)	this_cpu_add_return(pcp, 1)
-#define this_cpu_dec_return(pcp)	this_cpu_add_return(pcp, -1)
-
 #define __this_cpu_generic_add_return(pcp, val)				\
 ({									\
 	__this_cpu_add(pcp, val);					\
-- 
cgit v1.2.3-71-gd317


From e27fc9641e8ddc8146f8e01f06e5eba2469698de Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Mon, 22 Nov 2010 21:36:11 -0800
Subject: rcu: increase synchronize_sched_expedited() batching

The fix in commit #6a0cc49 requires more than three concurrent instances
of synchronize_sched_expedited() before batching is possible.  This
patch uses a ticket-counter-like approach that is also not unrelated to
Lai Jiangshan's Ring RCU to allow sharing of expedited grace periods even
when there are only two concurrent instances of synchronize_sched_expedited().

This commit builds on Tejun's original posting, which may be found at
http://lkml.org/lkml/2010/11/9/204, adding memory barriers, avoiding
overflow of signed integers (other than via atomic_t), and fixing the
detection of batching.

Signed-off-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 include/linux/rcupdate.h |  2 ++
 kernel/rcutree_plugin.h  | 82 ++++++++++++++++++++++++++++++++++++------------
 2 files changed, 64 insertions(+), 20 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index 49e8e16308e1..af5614856285 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -47,6 +47,8 @@
 extern int rcutorture_runnable; /* for sysctl */
 #endif /* #ifdef CONFIG_RCU_TORTURE_TEST */
 
+#define UINT_CMP_GE(a, b)	(UINT_MAX / 2 >= (a) - (b))
+#define UINT_CMP_LT(a, b)	(UINT_MAX / 2 < (a) - (b))
 #define ULONG_CMP_GE(a, b)	(ULONG_MAX / 2 >= (a) - (b))
 #define ULONG_CMP_LT(a, b)	(ULONG_MAX / 2 < (a) - (b))
 
diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h
index c22c4ef2a0d0..a3638710dc67 100644
--- a/kernel/rcutree_plugin.h
+++ b/kernel/rcutree_plugin.h
@@ -1025,7 +1025,8 @@ EXPORT_SYMBOL_GPL(synchronize_sched_expedited);
 
 #else /* #ifndef CONFIG_SMP */
 
-static atomic_t synchronize_sched_expedited_count = ATOMIC_INIT(0);
+static atomic_t sync_sched_expedited_started = ATOMIC_INIT(0);
+static atomic_t sync_sched_expedited_done = ATOMIC_INIT(0);
 
 static int synchronize_sched_expedited_cpu_stop(void *data)
 {
@@ -1041,8 +1042,6 @@ static int synchronize_sched_expedited_cpu_stop(void *data)
 	 * robustness against future implementation changes.
 	 */
 	smp_mb(); /* See above comment block. */
-	if (cpumask_first(cpu_online_mask) == smp_processor_id())
-		atomic_inc(&synchronize_sched_expedited_count);
 	return 0;
 }
 
@@ -1056,43 +1055,86 @@ static int synchronize_sched_expedited_cpu_stop(void *data)
  * lock that is acquired by a CPU-hotplug notifier.  Failing to
  * observe this restriction will result in deadlock.
  *
- * The synchronize_sched_expedited_cpu_stop() function is called
- * in stop-CPU context, but in order to keep overhead down to a dull
- * roar, we don't force this function to wait for its counterparts
- * on other CPUs.  One instance of this function will increment the
- * synchronize_sched_expedited_count variable per call to
- * try_stop_cpus(), but there is no guarantee what order this instance
- * will occur in.  The worst case is that it is last on one call
- * to try_stop_cpus(), and the first on the next call.  This means
- * that piggybacking requires that synchronize_sched_expedited_count
- * be incremented by 3: this guarantees that the piggybacking
- * task has waited through an entire cycle of context switches,
- * even in the worst case.
+ * This implementation can be thought of as an application of ticket
+ * locking to RCU, with sync_sched_expedited_started and
+ * sync_sched_expedited_done taking on the roles of the halves
+ * of the ticket-lock word.  Each task atomically increments
+ * sync_sched_expedited_started upon entry, snapshotting the old value,
+ * then attempts to stop all the CPUs.  If this succeeds, then each
+ * CPU will have executed a context switch, resulting in an RCU-sched
+ * grace period.  We are then done, so we use atomic_cmpxchg() to
+ * update sync_sched_expedited_done to match our snapshot -- but
+ * only if someone else has not already advanced past our snapshot.
+ *
+ * On the other hand, if try_stop_cpus() fails, we check the value
+ * of sync_sched_expedited_done.  If it has advanced past our
+ * initial snapshot, then someone else must have forced a grace period
+ * some time after we took our snapshot.  In this case, our work is
+ * done for us, and we can simply return.  Otherwise, we try again,
+ * but keep our initial snapshot for purposes of checking for someone
+ * doing our work for us.
+ *
+ * If we fail too many times in a row, we fall back to synchronize_sched().
  */
 void synchronize_sched_expedited(void)
 {
-	int snap, trycount = 0;
+	int firstsnap, s, snap, trycount = 0;
 
-	smp_mb();  /* ensure prior mod happens before capturing snap. */
-	snap = atomic_read(&synchronize_sched_expedited_count) + 2;
+	/* Note that atomic_inc_return() implies full memory barrier. */
+	firstsnap = snap = atomic_inc_return(&sync_sched_expedited_started);
 	get_online_cpus();
+
+	/*
+	 * Each pass through the following loop attempts to force a
+	 * context switch on each CPU.
+	 */
 	while (try_stop_cpus(cpu_online_mask,
 			     synchronize_sched_expedited_cpu_stop,
 			     NULL) == -EAGAIN) {
 		put_online_cpus();
+
+		/* No joy, try again later.  Or just synchronize_sched(). */
 		if (trycount++ < 10)
 			udelay(trycount * num_online_cpus());
 		else {
 			synchronize_sched();
 			return;
 		}
-		if (atomic_read(&synchronize_sched_expedited_count) - snap > 0) {
+
+		/* Check to see if someone else did our work for us. */
+		s = atomic_read(&sync_sched_expedited_done);
+		if (UINT_CMP_GE((unsigned)s, (unsigned)firstsnap)) {
 			smp_mb(); /* ensure test happens before caller kfree */
 			return;
 		}
+
+		/*
+		 * Refetching sync_sched_expedited_started allows later
+		 * callers to piggyback on our grace period.  We subtract
+		 * 1 to get the same token that the last incrementer got.
+		 * We retry after they started, so our grace period works
+		 * for them, and they started after our first try, so their
+		 * grace period works for us.
+		 */
 		get_online_cpus();
+		snap = atomic_read(&sync_sched_expedited_started) - 1;
+		smp_mb(); /* ensure read is before try_stop_cpus(). */
 	}
-	smp_mb__after_atomic_inc(); /* ensure post-GP actions seen after GP. */
+
+	/*
+	 * Everyone up to our most recent fetch is covered by our grace
+	 * period.  Update the counter, but only if our work is still
+	 * relevant -- which it won't be if someone who started later
+	 * than we did beat us to the punch.
+	 */
+	do {
+		s = atomic_read(&sync_sched_expedited_done);
+		if (UINT_CMP_GE((unsigned)s, (unsigned)snap)) {
+			smp_mb(); /* ensure test happens before caller kfree */
+			break;
+		}
+	} while (atomic_cmpxchg(&sync_sched_expedited_done, s, snap) != s);
+
 	put_online_cpus();
 }
 EXPORT_SYMBOL_GPL(synchronize_sched_expedited);
-- 
cgit v1.2.3-71-gd317


From 8a9c1cee26c0ece23b38c45b92b724438878f842 Mon Sep 17 00:00:00 2001
From: Mariusz Kozlowski <mk@lab.zgora.pl>
Date: Wed, 15 Dec 2010 23:11:12 +0100
Subject: rculist: fix borked __list_for_each_rcu() macro

This restores parentheses blance.

Signed-off-by: Mariusz Kozlowski <mk@lab.zgora.pl>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 include/linux/rculist.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/rculist.h b/include/linux/rculist.h
index f31ef61f1c65..70d3ba504d17 100644
--- a/include/linux/rculist.h
+++ b/include/linux/rculist.h
@@ -244,7 +244,7 @@ static inline void list_splice_init_rcu(struct list_head *list,
 #define __list_for_each_rcu(pos, head) \
 	for (pos = rcu_dereference_raw(list_next_rcu(head)); \
 		pos != (head); \
-		pos = rcu_dereference_raw(list_next_rcu((pos)))
+		pos = rcu_dereference_raw(list_next_rcu(pos)))
 
 /**
  * list_for_each_entry_rcu	-	iterate over rcu list of given type
-- 
cgit v1.2.3-71-gd317


From 3c2dcf2aed5ea22ecf65a9a871c4963faec421b3 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Wed, 15 Dec 2010 21:12:15 -0800
Subject: rcu: remove unused __list_for_each_rcu() macro

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 include/linux/rculist.h | 5 -----
 1 file changed, 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/rculist.h b/include/linux/rculist.h
index 70d3ba504d17..2dea94fc4402 100644
--- a/include/linux/rculist.h
+++ b/include/linux/rculist.h
@@ -241,11 +241,6 @@ static inline void list_splice_init_rcu(struct list_head *list,
 #define list_first_entry_rcu(ptr, type, member) \
 	list_entry_rcu((ptr)->next, type, member)
 
-#define __list_for_each_rcu(pos, head) \
-	for (pos = rcu_dereference_raw(list_next_rcu(head)); \
-		pos != (head); \
-		pos = rcu_dereference_raw(list_next_rcu(pos)))
-
 /**
  * list_for_each_entry_rcu	-	iterate over rcu list of given type
  * @pos:	the type * to use as a loop cursor.
-- 
cgit v1.2.3-71-gd317


From 2b7124428561c7c3cfa4a58cc4c6feea53f3148e Mon Sep 17 00:00:00 2001
From: Christoph Lameter <cl@linux.com>
Date: Sat, 18 Dec 2010 15:54:04 +0100
Subject: percpu: Generic this_cpu_cmpxchg() and this_cpu_xchg support

Generic code to provide new per cpu atomic features

	this_cpu_cmpxchg
	this_cpu_xchg

Fallback occurs to functions using interrupts disable/enable
to ensure correct per cpu atomicity.

Fallback to regular cmpxchg and xchg is not possible since per cpu atomic
semantics include the guarantee that the current cpus per cpu data is
accessed atomically. Use of regular cmpxchg and xchg requires the
determination of the address of the per cpu data before regular cmpxchg
or xchg which therefore cannot be atomically included in an xchg or
cmpxchg without segment override.

tj: - Relocated new ops to conform better to the general organization.
    - This patch contains a trivial comment fix.

Signed-off-by: Christoph Lameter <cl@linux.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 include/linux/percpu.h | 134 ++++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 133 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/percpu.h b/include/linux/percpu.h
index 3484e88d93f8..27c3c6fcfad3 100644
--- a/include/linux/percpu.h
+++ b/include/linux/percpu.h
@@ -447,6 +447,59 @@ do {									\
 #define this_cpu_inc_return(pcp)	this_cpu_add_return(pcp, 1)
 #define this_cpu_dec_return(pcp)	this_cpu_add_return(pcp, -1)
 
+#define _this_cpu_generic_xchg(pcp, nval)				\
+({	typeof(pcp) ret__;						\
+	preempt_disable();						\
+	ret__ = __this_cpu_read(pcp);					\
+	__this_cpu_write(pcp, nval);					\
+	preempt_enable();						\
+	ret__;								\
+})
+
+#ifndef this_cpu_xchg
+# ifndef this_cpu_xchg_1
+#  define this_cpu_xchg_1(pcp, nval)	_this_cpu_generic_xchg(pcp, nval)
+# endif
+# ifndef this_cpu_xchg_2
+#  define this_cpu_xchg_2(pcp, nval)	_this_cpu_generic_xchg(pcp, nval)
+# endif
+# ifndef this_cpu_xchg_4
+#  define this_cpu_xchg_4(pcp, nval)	_this_cpu_generic_xchg(pcp, nval)
+# endif
+# ifndef this_cpu_xchg_8
+#  define this_cpu_xchg_8(pcp, nval)	_this_cpu_generic_xchg(pcp, nval)
+# endif
+# define this_cpu_xchg(pcp, nval)	\
+	__pcpu_size_call_return2(this_cpu_xchg_, (pcp), nval)
+#endif
+
+#define _this_cpu_generic_cmpxchg(pcp, oval, nval)			\
+({	typeof(pcp) ret__;						\
+	preempt_disable();						\
+	ret__ = __this_cpu_read(pcp);					\
+	if (ret__ == (oval))						\
+		__this_cpu_write(pcp, nval);				\
+	preempt_enable();						\
+	ret__;								\
+})
+
+#ifndef this_cpu_cmpxchg
+# ifndef this_cpu_cmpxchg_1
+#  define this_cpu_cmpxchg_1(pcp, oval, nval)	_this_cpu_generic_cmpxchg(pcp, oval, nval)
+# endif
+# ifndef this_cpu_cmpxchg_2
+#  define this_cpu_cmpxchg_2(pcp, oval, nval)	_this_cpu_generic_cmpxchg(pcp, oval, nval)
+# endif
+# ifndef this_cpu_cmpxchg_4
+#  define this_cpu_cmpxchg_4(pcp, oval, nval)	_this_cpu_generic_cmpxchg(pcp, oval, nval)
+# endif
+# ifndef this_cpu_cmpxchg_8
+#  define this_cpu_cmpxchg_8(pcp, oval, nval)	_this_cpu_generic_cmpxchg(pcp, oval, nval)
+# endif
+# define this_cpu_cmpxchg(pcp, oval, nval)	\
+	__pcpu_size_call_return2(this_cpu_cmpxchg_, pcp, oval, nval)
+#endif
+
 /*
  * Generic percpu operations that do not require preemption handling.
  * Either we do not care about races or the caller has the
@@ -600,11 +653,61 @@ do {									\
 #define __this_cpu_inc_return(pcp)	this_cpu_add_return(pcp, 1)
 #define __this_cpu_dec_return(pcp)	this_cpu_add_return(pcp, -1)
 
+#define __this_cpu_generic_xchg(pcp, nval)				\
+({	typeof(pcp) ret__;						\
+	ret__ = __this_cpu_read(pcp);					\
+	__this_cpu_write(pcp, nval);					\
+	ret__;								\
+})
+
+#ifndef __this_cpu_xchg
+# ifndef __this_cpu_xchg_1
+#  define __this_cpu_xchg_1(pcp, nval)	__this_cpu_generic_xchg(pcp, nval)
+# endif
+# ifndef __this_cpu_xchg_2
+#  define __this_cpu_xchg_2(pcp, nval)	__this_cpu_generic_xchg(pcp, nval)
+# endif
+# ifndef __this_cpu_xchg_4
+#  define __this_cpu_xchg_4(pcp, nval)	__this_cpu_generic_xchg(pcp, nval)
+# endif
+# ifndef __this_cpu_xchg_8
+#  define __this_cpu_xchg_8(pcp, nval)	__this_cpu_generic_xchg(pcp, nval)
+# endif
+# define __this_cpu_xchg(pcp, nval)	\
+	__pcpu_size_call_return2(__this_cpu_xchg_, (pcp), nval)
+#endif
+
+#define __this_cpu_generic_cmpxchg(pcp, oval, nval)			\
+({									\
+	typeof(pcp) ret__;						\
+	ret__ = __this_cpu_read(pcp);					\
+	if (ret__ == (oval))						\
+		__this_cpu_write(pcp, nval);				\
+	ret__;								\
+})
+
+#ifndef __this_cpu_cmpxchg
+# ifndef __this_cpu_cmpxchg_1
+#  define __this_cpu_cmpxchg_1(pcp, oval, nval)	__this_cpu_generic_cmpxchg(pcp, oval, nval)
+# endif
+# ifndef __this_cpu_cmpxchg_2
+#  define __this_cpu_cmpxchg_2(pcp, oval, nval)	__this_cpu_generic_cmpxchg(pcp, oval, nval)
+# endif
+# ifndef __this_cpu_cmpxchg_4
+#  define __this_cpu_cmpxchg_4(pcp, oval, nval)	__this_cpu_generic_cmpxchg(pcp, oval, nval)
+# endif
+# ifndef __this_cpu_cmpxchg_8
+#  define __this_cpu_cmpxchg_8(pcp, oval, nval)	__this_cpu_generic_cmpxchg(pcp, oval, nval)
+# endif
+# define __this_cpu_cmpxchg(pcp, oval, nval)	\
+	__pcpu_size_call_return2(__this_cpu_cmpxchg_, pcp, oval, nval)
+#endif
+
 /*
  * IRQ safe versions of the per cpu RMW operations. Note that these operations
  * are *not* safe against modification of the same variable from another
  * processors (which one gets when using regular atomic operations)
- . They are guaranteed to be atomic vs. local interrupts and
+ * They are guaranteed to be atomic vs. local interrupts and
  * preemption only.
  */
 #define irqsafe_cpu_generic_to_op(pcp, val, op)				\
@@ -691,4 +794,33 @@ do {									\
 # define irqsafe_cpu_xor(pcp, val) __pcpu_size_call(irqsafe_cpu_xor_, (val))
 #endif
 
+#define irqsafe_cpu_generic_cmpxchg(pcp, oval, nval)			\
+({									\
+	typeof(pcp) ret__;						\
+	unsigned long flags;						\
+	local_irq_save(flags);						\
+	ret__ = __this_cpu_read(pcp);					\
+	if (ret__ == (oval))						\
+		__this_cpu_write(pcp, nval);				\
+	local_irq_restore(flags);					\
+	ret__;								\
+})
+
+#ifndef irqsafe_cpu_cmpxchg
+# ifndef irqsafe_cpu_cmpxchg_1
+#  define irqsafe_cpu_cmpxchg_1(pcp, oval, nval)	irqsafe_cpu_generic_cmpxchg(pcp, oval, nval)
+# endif
+# ifndef irqsafe_cpu_cmpxchg_2
+#  define irqsafe_cpu_cmpxchg_2(pcp, oval, nval)	irqsafe_cpu_generic_cmpxchg(pcp, oval, nval)
+# endif
+# ifndef irqsafe_cpu_cmpxchg_4
+#  define irqsafe_cpu_cmpxchg_4(pcp, oval, nval)	irqsafe_cpu_generic_cmpxchg(pcp, oval, nval)
+# endif
+# ifndef irqsafe_cpu_cmpxchg_8
+#  define irqsafe_cpu_cmpxchg_8(pcp, oval, nval)	irqsafe_cpu_generic_cmpxchg(pcp, oval, nval)
+# endif
+# define irqsafe_cpu_cmpxchg(pcp, oval, nval)		\
+	__pcpu_size_call_return2(irqsafe_cpu_cmpxchg_, (pcp), oval, nval)
+#endif
+
 #endif /* __LINUX_PERCPU_H */
-- 
cgit v1.2.3-71-gd317


From c6eda6c5eeb357ff231121619fb49d2bc0605faf Mon Sep 17 00:00:00 2001
From: Sundar Iyer <sundar.iyer@stericsson.com>
Date: Mon, 13 Dec 2010 09:33:12 +0530
Subject: mfd/tc35892: rename tc35892 header to tc3589x

Rename the header file to include further variants within
the same mfd core driver

Acked-by: Samuel Ortiz <sameo@linux.intel.com>
Signed-off-by: Sundar Iyer <sundar.iyer@stericsson.com>
Signed-off-by: Linus Walleij <linus.walleij@stericsson.com>
---
 drivers/gpio/tc35892-gpio.c |   2 +-
 drivers/mfd/tc35892.c       |   2 +-
 include/linux/mfd/tc35892.h | 136 --------------------------------------------
 include/linux/mfd/tc3589x.h | 136 ++++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 138 insertions(+), 138 deletions(-)
 delete mode 100644 include/linux/mfd/tc35892.h
 create mode 100644 include/linux/mfd/tc3589x.h

(limited to 'include/linux')

diff --git a/drivers/gpio/tc35892-gpio.c b/drivers/gpio/tc35892-gpio.c
index 7e10c935a047..027b857c18ff 100644
--- a/drivers/gpio/tc35892-gpio.c
+++ b/drivers/gpio/tc35892-gpio.c
@@ -13,7 +13,7 @@
 #include <linux/gpio.h>
 #include <linux/irq.h>
 #include <linux/interrupt.h>
-#include <linux/mfd/tc35892.h>
+#include <linux/mfd/tc3589x.h>
 
 /*
  * These registers are modified under the irq bus lock and cached to avoid
diff --git a/drivers/mfd/tc35892.c b/drivers/mfd/tc35892.c
index e619e2a55997..f230235c3765 100644
--- a/drivers/mfd/tc35892.c
+++ b/drivers/mfd/tc35892.c
@@ -12,7 +12,7 @@
 #include <linux/slab.h>
 #include <linux/i2c.h>
 #include <linux/mfd/core.h>
-#include <linux/mfd/tc35892.h>
+#include <linux/mfd/tc3589x.h>
 
 /**
  * tc35892_reg_read() - read a single TC35892 register
diff --git a/include/linux/mfd/tc35892.h b/include/linux/mfd/tc35892.h
deleted file mode 100644
index eff3094ca84e..000000000000
--- a/include/linux/mfd/tc35892.h
+++ /dev/null
@@ -1,136 +0,0 @@
-/*
- * Copyright (C) ST-Ericsson SA 2010
- *
- * License Terms: GNU General Public License, version 2
- */
-
-#ifndef __LINUX_MFD_TC35892_H
-#define __LINUX_MFD_TC35892_H
-
-#include <linux/device.h>
-
-#define TC35892_RSTCTRL_IRQRST	(1 << 4)
-#define TC35892_RSTCTRL_TIMRST	(1 << 3)
-#define TC35892_RSTCTRL_ROTRST	(1 << 2)
-#define TC35892_RSTCTRL_KBDRST	(1 << 1)
-#define TC35892_RSTCTRL_GPIRST	(1 << 0)
-
-#define TC35892_IRQST		0x91
-
-#define TC35892_MANFCODE_MAGIC	0x03
-#define TC35892_MANFCODE	0x80
-#define TC35892_VERSION		0x81
-#define TC35892_IOCFG		0xA7
-
-#define TC35892_CLKMODE		0x88
-#define TC35892_CLKCFG		0x89
-#define TC35892_CLKEN		0x8A
-
-#define TC35892_RSTCTRL		0x82
-#define TC35892_EXTRSTN		0x83
-#define TC35892_RSTINTCLR	0x84
-
-#define TC35892_GPIOIS0		0xC9
-#define TC35892_GPIOIS1		0xCA
-#define TC35892_GPIOIS2		0xCB
-#define TC35892_GPIOIBE0	0xCC
-#define TC35892_GPIOIBE1	0xCD
-#define TC35892_GPIOIBE2	0xCE
-#define TC35892_GPIOIEV0	0xCF
-#define TC35892_GPIOIEV1	0xD0
-#define TC35892_GPIOIEV2	0xD1
-#define TC35892_GPIOIE0		0xD2
-#define TC35892_GPIOIE1		0xD3
-#define TC35892_GPIOIE2		0xD4
-#define TC35892_GPIORIS0	0xD6
-#define TC35892_GPIORIS1	0xD7
-#define TC35892_GPIORIS2	0xD8
-#define TC35892_GPIOMIS0	0xD9
-#define TC35892_GPIOMIS1	0xDA
-#define TC35892_GPIOMIS2	0xDB
-#define TC35892_GPIOIC0		0xDC
-#define TC35892_GPIOIC1		0xDD
-#define TC35892_GPIOIC2		0xDE
-
-#define TC35892_GPIODATA0	0xC0
-#define TC35892_GPIOMASK0	0xc1
-#define TC35892_GPIODATA1	0xC2
-#define TC35892_GPIOMASK1	0xc3
-#define TC35892_GPIODATA2	0xC4
-#define TC35892_GPIOMASK2	0xC5
-
-#define TC35892_GPIODIR0	0xC6
-#define TC35892_GPIODIR1	0xC7
-#define TC35892_GPIODIR2	0xC8
-
-#define TC35892_GPIOSYNC0	0xE6
-#define TC35892_GPIOSYNC1	0xE7
-#define TC35892_GPIOSYNC2	0xE8
-
-#define TC35892_GPIOWAKE0	0xE9
-#define TC35892_GPIOWAKE1	0xEA
-#define TC35892_GPIOWAKE2	0xEB
-
-#define TC35892_GPIOODM0	0xE0
-#define TC35892_GPIOODE0	0xE1
-#define TC35892_GPIOODM1	0xE2
-#define TC35892_GPIOODE1	0xE3
-#define TC35892_GPIOODM2	0xE4
-#define TC35892_GPIOODE2	0xE5
-
-#define TC35892_INT_GPIIRQ	0
-#define TC35892_INT_TI0IRQ	1
-#define TC35892_INT_TI1IRQ	2
-#define TC35892_INT_TI2IRQ	3
-#define TC35892_INT_ROTIRQ	5
-#define TC35892_INT_KBDIRQ	6
-#define TC35892_INT_PORIRQ	7
-
-#define TC35892_NR_INTERNAL_IRQS	8
-#define TC35892_INT_GPIO(x)	(TC35892_NR_INTERNAL_IRQS + (x))
-
-struct tc35892 {
-	struct mutex lock;
-	struct device *dev;
-	struct i2c_client *i2c;
-
-	int irq_base;
-	int num_gpio;
-	struct tc35892_platform_data *pdata;
-};
-
-extern int tc35892_reg_write(struct tc35892 *tc35892, u8 reg, u8 data);
-extern int tc35892_reg_read(struct tc35892 *tc35892, u8 reg);
-extern int tc35892_block_read(struct tc35892 *tc35892, u8 reg, u8 length,
-			      u8 *values);
-extern int tc35892_block_write(struct tc35892 *tc35892, u8 reg, u8 length,
-			       const u8 *values);
-extern int tc35892_set_bits(struct tc35892 *tc35892, u8 reg, u8 mask, u8 val);
-
-/**
- * struct tc35892_gpio_platform_data - TC35892 GPIO platform data
- * @gpio_base: first gpio number assigned to TC35892.  A maximum of
- *	       %TC35892_NR_GPIOS GPIOs will be allocated.
- * @setup: callback for board-specific initialization
- * @remove: callback for board-specific teardown
- */
-struct tc35892_gpio_platform_data {
-	int gpio_base;
-	void (*setup)(struct tc35892 *tc35892, unsigned gpio_base);
-	void (*remove)(struct tc35892 *tc35892, unsigned gpio_base);
-};
-
-/**
- * struct tc35892_platform_data - TC35892 platform data
- * @irq_base: base IRQ number.  %TC35892_NR_IRQS irqs will be used.
- * @gpio: GPIO-specific platform data
- */
-struct tc35892_platform_data {
-	int irq_base;
-	struct tc35892_gpio_platform_data *gpio;
-};
-
-#define TC35892_NR_GPIOS	24
-#define TC35892_NR_IRQS		TC35892_INT_GPIO(TC35892_NR_GPIOS)
-
-#endif
diff --git a/include/linux/mfd/tc3589x.h b/include/linux/mfd/tc3589x.h
new file mode 100644
index 000000000000..eff3094ca84e
--- /dev/null
+++ b/include/linux/mfd/tc3589x.h
@@ -0,0 +1,136 @@
+/*
+ * Copyright (C) ST-Ericsson SA 2010
+ *
+ * License Terms: GNU General Public License, version 2
+ */
+
+#ifndef __LINUX_MFD_TC35892_H
+#define __LINUX_MFD_TC35892_H
+
+#include <linux/device.h>
+
+#define TC35892_RSTCTRL_IRQRST	(1 << 4)
+#define TC35892_RSTCTRL_TIMRST	(1 << 3)
+#define TC35892_RSTCTRL_ROTRST	(1 << 2)
+#define TC35892_RSTCTRL_KBDRST	(1 << 1)
+#define TC35892_RSTCTRL_GPIRST	(1 << 0)
+
+#define TC35892_IRQST		0x91
+
+#define TC35892_MANFCODE_MAGIC	0x03
+#define TC35892_MANFCODE	0x80
+#define TC35892_VERSION		0x81
+#define TC35892_IOCFG		0xA7
+
+#define TC35892_CLKMODE		0x88
+#define TC35892_CLKCFG		0x89
+#define TC35892_CLKEN		0x8A
+
+#define TC35892_RSTCTRL		0x82
+#define TC35892_EXTRSTN		0x83
+#define TC35892_RSTINTCLR	0x84
+
+#define TC35892_GPIOIS0		0xC9
+#define TC35892_GPIOIS1		0xCA
+#define TC35892_GPIOIS2		0xCB
+#define TC35892_GPIOIBE0	0xCC
+#define TC35892_GPIOIBE1	0xCD
+#define TC35892_GPIOIBE2	0xCE
+#define TC35892_GPIOIEV0	0xCF
+#define TC35892_GPIOIEV1	0xD0
+#define TC35892_GPIOIEV2	0xD1
+#define TC35892_GPIOIE0		0xD2
+#define TC35892_GPIOIE1		0xD3
+#define TC35892_GPIOIE2		0xD4
+#define TC35892_GPIORIS0	0xD6
+#define TC35892_GPIORIS1	0xD7
+#define TC35892_GPIORIS2	0xD8
+#define TC35892_GPIOMIS0	0xD9
+#define TC35892_GPIOMIS1	0xDA
+#define TC35892_GPIOMIS2	0xDB
+#define TC35892_GPIOIC0		0xDC
+#define TC35892_GPIOIC1		0xDD
+#define TC35892_GPIOIC2		0xDE
+
+#define TC35892_GPIODATA0	0xC0
+#define TC35892_GPIOMASK0	0xc1
+#define TC35892_GPIODATA1	0xC2
+#define TC35892_GPIOMASK1	0xc3
+#define TC35892_GPIODATA2	0xC4
+#define TC35892_GPIOMASK2	0xC5
+
+#define TC35892_GPIODIR0	0xC6
+#define TC35892_GPIODIR1	0xC7
+#define TC35892_GPIODIR2	0xC8
+
+#define TC35892_GPIOSYNC0	0xE6
+#define TC35892_GPIOSYNC1	0xE7
+#define TC35892_GPIOSYNC2	0xE8
+
+#define TC35892_GPIOWAKE0	0xE9
+#define TC35892_GPIOWAKE1	0xEA
+#define TC35892_GPIOWAKE2	0xEB
+
+#define TC35892_GPIOODM0	0xE0
+#define TC35892_GPIOODE0	0xE1
+#define TC35892_GPIOODM1	0xE2
+#define TC35892_GPIOODE1	0xE3
+#define TC35892_GPIOODM2	0xE4
+#define TC35892_GPIOODE2	0xE5
+
+#define TC35892_INT_GPIIRQ	0
+#define TC35892_INT_TI0IRQ	1
+#define TC35892_INT_TI1IRQ	2
+#define TC35892_INT_TI2IRQ	3
+#define TC35892_INT_ROTIRQ	5
+#define TC35892_INT_KBDIRQ	6
+#define TC35892_INT_PORIRQ	7
+
+#define TC35892_NR_INTERNAL_IRQS	8
+#define TC35892_INT_GPIO(x)	(TC35892_NR_INTERNAL_IRQS + (x))
+
+struct tc35892 {
+	struct mutex lock;
+	struct device *dev;
+	struct i2c_client *i2c;
+
+	int irq_base;
+	int num_gpio;
+	struct tc35892_platform_data *pdata;
+};
+
+extern int tc35892_reg_write(struct tc35892 *tc35892, u8 reg, u8 data);
+extern int tc35892_reg_read(struct tc35892 *tc35892, u8 reg);
+extern int tc35892_block_read(struct tc35892 *tc35892, u8 reg, u8 length,
+			      u8 *values);
+extern int tc35892_block_write(struct tc35892 *tc35892, u8 reg, u8 length,
+			       const u8 *values);
+extern int tc35892_set_bits(struct tc35892 *tc35892, u8 reg, u8 mask, u8 val);
+
+/**
+ * struct tc35892_gpio_platform_data - TC35892 GPIO platform data
+ * @gpio_base: first gpio number assigned to TC35892.  A maximum of
+ *	       %TC35892_NR_GPIOS GPIOs will be allocated.
+ * @setup: callback for board-specific initialization
+ * @remove: callback for board-specific teardown
+ */
+struct tc35892_gpio_platform_data {
+	int gpio_base;
+	void (*setup)(struct tc35892 *tc35892, unsigned gpio_base);
+	void (*remove)(struct tc35892 *tc35892, unsigned gpio_base);
+};
+
+/**
+ * struct tc35892_platform_data - TC35892 platform data
+ * @irq_base: base IRQ number.  %TC35892_NR_IRQS irqs will be used.
+ * @gpio: GPIO-specific platform data
+ */
+struct tc35892_platform_data {
+	int irq_base;
+	struct tc35892_gpio_platform_data *gpio;
+};
+
+#define TC35892_NR_GPIOS	24
+#define TC35892_NR_IRQS		TC35892_INT_GPIO(TC35892_NR_GPIOS)
+
+#endif
-- 
cgit v1.2.3-71-gd317


From 20406ebff4a298e6e3abbc1717a90bb3e55dc820 Mon Sep 17 00:00:00 2001
From: Sundar Iyer <sundar.iyer@stericsson.com>
Date: Mon, 13 Dec 2010 09:33:14 +0530
Subject: mfd/tc3589x: rename tc35892 structs/registers to tc359x

Most of the register layout, client IRQ numbers on the TC35892 is shared also
by other variants. Make this generic as tc3589x

Acked-by: Samuel Ortiz <sameo@linux.intel.com>
Signed-off-by: Sundar Iyer <sundar.iyer@stericsson.com>
Signed-off-by: Linus Walleij <linus.walleij@stericsson.com>
---
 arch/arm/mach-ux500/board-mop500.c |  12 +-
 drivers/gpio/tc3589x-gpio.c        | 268 ++++++++++++++++++-------------------
 drivers/mfd/tc3589x.c              | 222 +++++++++++++++---------------
 include/linux/mfd/tc3589x.h        | 202 ++++++++++++++--------------
 4 files changed, 351 insertions(+), 353 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm/mach-ux500/board-mop500.c b/arch/arm/mach-ux500/board-mop500.c
index a8220c5a6a69..5c950261968e 100644
--- a/arch/arm/mach-ux500/board-mop500.c
+++ b/arch/arm/mach-ux500/board-mop500.c
@@ -19,8 +19,7 @@
 #include <linux/amba/pl022.h>
 #include <linux/spi/spi.h>
 #include <linux/mfd/ab8500.h>
-#include <linux/mfd/tc35892.h>
-#include <linux/input/matrix_keypad.h>
+#include <linux/mfd/tc3589x.h>
 
 #include <asm/mach-types.h>
 #include <asm/mach/arch.h>
@@ -112,24 +111,24 @@ static struct pl022_ssp_controller ssp0_platform_data = {
  * TC35892
  */
 
-static void mop500_tc35892_init(struct tc35892 *tc35892, unsigned int base)
+static void mop500_tc35892_init(struct tc3589x *tc3589x, unsigned int base)
 {
 	mop500_sdi_tc35892_init();
 }
 
-static struct tc35892_gpio_platform_data mop500_tc35892_gpio_data = {
+static struct tc3589x_gpio_platform_data mop500_tc35892_gpio_data = {
 	.gpio_base	= MOP500_EGPIO(0),
 	.setup		= mop500_tc35892_init,
 };
 
-static struct tc35892_platform_data mop500_tc35892_data = {
+static struct tc3589x_platform_data mop500_tc35892_data = {
 	.gpio		= &mop500_tc35892_gpio_data,
 	.irq_base	= MOP500_EGPIO_IRQ_BASE,
 };
 
 static struct i2c_board_info mop500_i2c0_devices[] = {
 	{
-		I2C_BOARD_INFO("tc35892", 0x42),
+		I2C_BOARD_INFO("tc3589x", 0x42),
 		.irq            = NOMADIK_GPIO_TO_IRQ(217),
 		.platform_data  = &mop500_tc35892_data,
 	},
@@ -302,7 +301,6 @@ static void __init u8500_init_machine(void)
 
 	nmk_config_pins(mop500_pins, ARRAY_SIZE(mop500_pins));
 
-	ux500_ske_keypad_device.dev.platform_data = &ske_keypad_board;
 	platform_add_devices(platform_devs, ARRAY_SIZE(platform_devs));
 
 	mop500_i2c_init();
diff --git a/drivers/gpio/tc3589x-gpio.c b/drivers/gpio/tc3589x-gpio.c
index 027b857c18ff..180d584454fb 100644
--- a/drivers/gpio/tc3589x-gpio.c
+++ b/drivers/gpio/tc3589x-gpio.c
@@ -24,9 +24,9 @@ enum { REG_IBE, REG_IEV, REG_IS, REG_IE };
 #define CACHE_NR_REGS	4
 #define CACHE_NR_BANKS	3
 
-struct tc35892_gpio {
+struct tc3589x_gpio {
 	struct gpio_chip chip;
-	struct tc35892 *tc35892;
+	struct tc3589x *tc3589x;
 	struct device *dev;
 	struct mutex irq_lock;
 
@@ -37,179 +37,179 @@ struct tc35892_gpio {
 	u8 oldregs[CACHE_NR_REGS][CACHE_NR_BANKS];
 };
 
-static inline struct tc35892_gpio *to_tc35892_gpio(struct gpio_chip *chip)
+static inline struct tc3589x_gpio *to_tc3589x_gpio(struct gpio_chip *chip)
 {
-	return container_of(chip, struct tc35892_gpio, chip);
+	return container_of(chip, struct tc3589x_gpio, chip);
 }
 
-static int tc35892_gpio_get(struct gpio_chip *chip, unsigned offset)
+static int tc3589x_gpio_get(struct gpio_chip *chip, unsigned offset)
 {
-	struct tc35892_gpio *tc35892_gpio = to_tc35892_gpio(chip);
-	struct tc35892 *tc35892 = tc35892_gpio->tc35892;
-	u8 reg = TC35892_GPIODATA0 + (offset / 8) * 2;
+	struct tc3589x_gpio *tc3589x_gpio = to_tc3589x_gpio(chip);
+	struct tc3589x *tc3589x = tc3589x_gpio->tc3589x;
+	u8 reg = TC3589x_GPIODATA0 + (offset / 8) * 2;
 	u8 mask = 1 << (offset % 8);
 	int ret;
 
-	ret = tc35892_reg_read(tc35892, reg);
+	ret = tc3589x_reg_read(tc3589x, reg);
 	if (ret < 0)
 		return ret;
 
 	return ret & mask;
 }
 
-static void tc35892_gpio_set(struct gpio_chip *chip, unsigned offset, int val)
+static void tc3589x_gpio_set(struct gpio_chip *chip, unsigned offset, int val)
 {
-	struct tc35892_gpio *tc35892_gpio = to_tc35892_gpio(chip);
-	struct tc35892 *tc35892 = tc35892_gpio->tc35892;
-	u8 reg = TC35892_GPIODATA0 + (offset / 8) * 2;
+	struct tc3589x_gpio *tc3589x_gpio = to_tc3589x_gpio(chip);
+	struct tc3589x *tc3589x = tc3589x_gpio->tc3589x;
+	u8 reg = TC3589x_GPIODATA0 + (offset / 8) * 2;
 	unsigned pos = offset % 8;
 	u8 data[] = {!!val << pos, 1 << pos};
 
-	tc35892_block_write(tc35892, reg, ARRAY_SIZE(data), data);
+	tc3589x_block_write(tc3589x, reg, ARRAY_SIZE(data), data);
 }
 
-static int tc35892_gpio_direction_output(struct gpio_chip *chip,
+static int tc3589x_gpio_direction_output(struct gpio_chip *chip,
 					 unsigned offset, int val)
 {
-	struct tc35892_gpio *tc35892_gpio = to_tc35892_gpio(chip);
-	struct tc35892 *tc35892 = tc35892_gpio->tc35892;
-	u8 reg = TC35892_GPIODIR0 + offset / 8;
+	struct tc3589x_gpio *tc3589x_gpio = to_tc3589x_gpio(chip);
+	struct tc3589x *tc3589x = tc3589x_gpio->tc3589x;
+	u8 reg = TC3589x_GPIODIR0 + offset / 8;
 	unsigned pos = offset % 8;
 
-	tc35892_gpio_set(chip, offset, val);
+	tc3589x_gpio_set(chip, offset, val);
 
-	return tc35892_set_bits(tc35892, reg, 1 << pos, 1 << pos);
+	return tc3589x_set_bits(tc3589x, reg, 1 << pos, 1 << pos);
 }
 
-static int tc35892_gpio_direction_input(struct gpio_chip *chip,
+static int tc3589x_gpio_direction_input(struct gpio_chip *chip,
 					unsigned offset)
 {
-	struct tc35892_gpio *tc35892_gpio = to_tc35892_gpio(chip);
-	struct tc35892 *tc35892 = tc35892_gpio->tc35892;
-	u8 reg = TC35892_GPIODIR0 + offset / 8;
+	struct tc3589x_gpio *tc3589x_gpio = to_tc3589x_gpio(chip);
+	struct tc3589x *tc3589x = tc3589x_gpio->tc3589x;
+	u8 reg = TC3589x_GPIODIR0 + offset / 8;
 	unsigned pos = offset % 8;
 
-	return tc35892_set_bits(tc35892, reg, 1 << pos, 0);
+	return tc3589x_set_bits(tc3589x, reg, 1 << pos, 0);
 }
 
-static int tc35892_gpio_to_irq(struct gpio_chip *chip, unsigned offset)
+static int tc3589x_gpio_to_irq(struct gpio_chip *chip, unsigned offset)
 {
-	struct tc35892_gpio *tc35892_gpio = to_tc35892_gpio(chip);
+	struct tc3589x_gpio *tc3589x_gpio = to_tc3589x_gpio(chip);
 
-	return tc35892_gpio->irq_base + offset;
+	return tc3589x_gpio->irq_base + offset;
 }
 
 static struct gpio_chip template_chip = {
-	.label			= "tc35892",
+	.label			= "tc3589x",
 	.owner			= THIS_MODULE,
-	.direction_input	= tc35892_gpio_direction_input,
-	.get			= tc35892_gpio_get,
-	.direction_output	= tc35892_gpio_direction_output,
-	.set			= tc35892_gpio_set,
-	.to_irq			= tc35892_gpio_to_irq,
+	.direction_input	= tc3589x_gpio_direction_input,
+	.get			= tc3589x_gpio_get,
+	.direction_output	= tc3589x_gpio_direction_output,
+	.set			= tc3589x_gpio_set,
+	.to_irq			= tc3589x_gpio_to_irq,
 	.can_sleep		= 1,
 };
 
-static int tc35892_gpio_irq_set_type(unsigned int irq, unsigned int type)
+static int tc3589x_gpio_irq_set_type(unsigned int irq, unsigned int type)
 {
-	struct tc35892_gpio *tc35892_gpio = get_irq_chip_data(irq);
-	int offset = irq - tc35892_gpio->irq_base;
+	struct tc3589x_gpio *tc3589x_gpio = get_irq_chip_data(irq);
+	int offset = irq - tc3589x_gpio->irq_base;
 	int regoffset = offset / 8;
 	int mask = 1 << (offset % 8);
 
 	if (type == IRQ_TYPE_EDGE_BOTH) {
-		tc35892_gpio->regs[REG_IBE][regoffset] |= mask;
+		tc3589x_gpio->regs[REG_IBE][regoffset] |= mask;
 		return 0;
 	}
 
-	tc35892_gpio->regs[REG_IBE][regoffset] &= ~mask;
+	tc3589x_gpio->regs[REG_IBE][regoffset] &= ~mask;
 
 	if (type == IRQ_TYPE_LEVEL_LOW || type == IRQ_TYPE_LEVEL_HIGH)
-		tc35892_gpio->regs[REG_IS][regoffset] |= mask;
+		tc3589x_gpio->regs[REG_IS][regoffset] |= mask;
 	else
-		tc35892_gpio->regs[REG_IS][regoffset] &= ~mask;
+		tc3589x_gpio->regs[REG_IS][regoffset] &= ~mask;
 
 	if (type == IRQ_TYPE_EDGE_RISING || type == IRQ_TYPE_LEVEL_HIGH)
-		tc35892_gpio->regs[REG_IEV][regoffset] |= mask;
+		tc3589x_gpio->regs[REG_IEV][regoffset] |= mask;
 	else
-		tc35892_gpio->regs[REG_IEV][regoffset] &= ~mask;
+		tc3589x_gpio->regs[REG_IEV][regoffset] &= ~mask;
 
 	return 0;
 }
 
-static void tc35892_gpio_irq_lock(unsigned int irq)
+static void tc3589x_gpio_irq_lock(unsigned int irq)
 {
-	struct tc35892_gpio *tc35892_gpio = get_irq_chip_data(irq);
+	struct tc3589x_gpio *tc3589x_gpio = get_irq_chip_data(irq);
 
-	mutex_lock(&tc35892_gpio->irq_lock);
+	mutex_lock(&tc3589x_gpio->irq_lock);
 }
 
-static void tc35892_gpio_irq_sync_unlock(unsigned int irq)
+static void tc3589x_gpio_irq_sync_unlock(unsigned int irq)
 {
-	struct tc35892_gpio *tc35892_gpio = get_irq_chip_data(irq);
-	struct tc35892 *tc35892 = tc35892_gpio->tc35892;
+	struct tc3589x_gpio *tc3589x_gpio = get_irq_chip_data(irq);
+	struct tc3589x *tc3589x = tc3589x_gpio->tc3589x;
 	static const u8 regmap[] = {
-		[REG_IBE]	= TC35892_GPIOIBE0,
-		[REG_IEV]	= TC35892_GPIOIEV0,
-		[REG_IS]	= TC35892_GPIOIS0,
-		[REG_IE]	= TC35892_GPIOIE0,
+		[REG_IBE]	= TC3589x_GPIOIBE0,
+		[REG_IEV]	= TC3589x_GPIOIEV0,
+		[REG_IS]	= TC3589x_GPIOIS0,
+		[REG_IE]	= TC3589x_GPIOIE0,
 	};
 	int i, j;
 
 	for (i = 0; i < CACHE_NR_REGS; i++) {
 		for (j = 0; j < CACHE_NR_BANKS; j++) {
-			u8 old = tc35892_gpio->oldregs[i][j];
-			u8 new = tc35892_gpio->regs[i][j];
+			u8 old = tc3589x_gpio->oldregs[i][j];
+			u8 new = tc3589x_gpio->regs[i][j];
 
 			if (new == old)
 				continue;
 
-			tc35892_gpio->oldregs[i][j] = new;
-			tc35892_reg_write(tc35892, regmap[i] + j * 8, new);
+			tc3589x_gpio->oldregs[i][j] = new;
+			tc3589x_reg_write(tc3589x, regmap[i] + j * 8, new);
 		}
 	}
 
-	mutex_unlock(&tc35892_gpio->irq_lock);
+	mutex_unlock(&tc3589x_gpio->irq_lock);
 }
 
-static void tc35892_gpio_irq_mask(unsigned int irq)
+static void tc3589x_gpio_irq_mask(unsigned int irq)
 {
-	struct tc35892_gpio *tc35892_gpio = get_irq_chip_data(irq);
-	int offset = irq - tc35892_gpio->irq_base;
+	struct tc3589x_gpio *tc3589x_gpio = get_irq_chip_data(irq);
+	int offset = irq - tc3589x_gpio->irq_base;
 	int regoffset = offset / 8;
 	int mask = 1 << (offset % 8);
 
-	tc35892_gpio->regs[REG_IE][regoffset] &= ~mask;
+	tc3589x_gpio->regs[REG_IE][regoffset] &= ~mask;
 }
 
-static void tc35892_gpio_irq_unmask(unsigned int irq)
+static void tc3589x_gpio_irq_unmask(unsigned int irq)
 {
-	struct tc35892_gpio *tc35892_gpio = get_irq_chip_data(irq);
-	int offset = irq - tc35892_gpio->irq_base;
+	struct tc3589x_gpio *tc3589x_gpio = get_irq_chip_data(irq);
+	int offset = irq - tc3589x_gpio->irq_base;
 	int regoffset = offset / 8;
 	int mask = 1 << (offset % 8);
 
-	tc35892_gpio->regs[REG_IE][regoffset] |= mask;
+	tc3589x_gpio->regs[REG_IE][regoffset] |= mask;
 }
 
-static struct irq_chip tc35892_gpio_irq_chip = {
-	.name			= "tc35892-gpio",
-	.bus_lock		= tc35892_gpio_irq_lock,
-	.bus_sync_unlock	= tc35892_gpio_irq_sync_unlock,
-	.mask			= tc35892_gpio_irq_mask,
-	.unmask			= tc35892_gpio_irq_unmask,
-	.set_type		= tc35892_gpio_irq_set_type,
+static struct irq_chip tc3589x_gpio_irq_chip = {
+	.name			= "tc3589x-gpio",
+	.bus_lock		= tc3589x_gpio_irq_lock,
+	.bus_sync_unlock	= tc3589x_gpio_irq_sync_unlock,
+	.mask			= tc3589x_gpio_irq_mask,
+	.unmask			= tc3589x_gpio_irq_unmask,
+	.set_type		= tc3589x_gpio_irq_set_type,
 };
 
-static irqreturn_t tc35892_gpio_irq(int irq, void *dev)
+static irqreturn_t tc3589x_gpio_irq(int irq, void *dev)
 {
-	struct tc35892_gpio *tc35892_gpio = dev;
-	struct tc35892 *tc35892 = tc35892_gpio->tc35892;
+	struct tc3589x_gpio *tc3589x_gpio = dev;
+	struct tc3589x *tc3589x = tc3589x_gpio->tc3589x;
 	u8 status[CACHE_NR_BANKS];
 	int ret;
 	int i;
 
-	ret = tc35892_block_read(tc35892, TC35892_GPIOMIS0,
+	ret = tc3589x_block_read(tc3589x, TC3589x_GPIOMIS0,
 				 ARRAY_SIZE(status), status);
 	if (ret < 0)
 		return IRQ_NONE;
@@ -223,24 +223,24 @@ static irqreturn_t tc35892_gpio_irq(int irq, void *dev)
 			int bit = __ffs(stat);
 			int line = i * 8 + bit;
 
-			handle_nested_irq(tc35892_gpio->irq_base + line);
+			handle_nested_irq(tc3589x_gpio->irq_base + line);
 			stat &= ~(1 << bit);
 		}
 
-		tc35892_reg_write(tc35892, TC35892_GPIOIC0 + i, status[i]);
+		tc3589x_reg_write(tc3589x, TC3589x_GPIOIC0 + i, status[i]);
 	}
 
 	return IRQ_HANDLED;
 }
 
-static int tc35892_gpio_irq_init(struct tc35892_gpio *tc35892_gpio)
+static int tc3589x_gpio_irq_init(struct tc3589x_gpio *tc3589x_gpio)
 {
-	int base = tc35892_gpio->irq_base;
+	int base = tc3589x_gpio->irq_base;
 	int irq;
 
-	for (irq = base; irq < base + tc35892_gpio->chip.ngpio; irq++) {
-		set_irq_chip_data(irq, tc35892_gpio);
-		set_irq_chip_and_handler(irq, &tc35892_gpio_irq_chip,
+	for (irq = base; irq < base + tc3589x_gpio->chip.ngpio; irq++) {
+		set_irq_chip_data(irq, tc3589x_gpio);
+		set_irq_chip_and_handler(irq, &tc3589x_gpio_irq_chip,
 					 handle_simple_irq);
 		set_irq_nested_thread(irq, 1);
 #ifdef CONFIG_ARM
@@ -253,12 +253,12 @@ static int tc35892_gpio_irq_init(struct tc35892_gpio *tc35892_gpio)
 	return 0;
 }
 
-static void tc35892_gpio_irq_remove(struct tc35892_gpio *tc35892_gpio)
+static void tc3589x_gpio_irq_remove(struct tc3589x_gpio *tc3589x_gpio)
 {
-	int base = tc35892_gpio->irq_base;
+	int base = tc3589x_gpio->irq_base;
 	int irq;
 
-	for (irq = base; irq < base + tc35892_gpio->chip.ngpio; irq++) {
+	for (irq = base; irq < base + tc3589x_gpio->chip.ngpio; irq++) {
 #ifdef CONFIG_ARM
 		set_irq_flags(irq, 0);
 #endif
@@ -267,15 +267,15 @@ static void tc35892_gpio_irq_remove(struct tc35892_gpio *tc35892_gpio)
 	}
 }
 
-static int __devinit tc35892_gpio_probe(struct platform_device *pdev)
+static int __devinit tc3589x_gpio_probe(struct platform_device *pdev)
 {
-	struct tc35892 *tc35892 = dev_get_drvdata(pdev->dev.parent);
-	struct tc35892_gpio_platform_data *pdata;
-	struct tc35892_gpio *tc35892_gpio;
+	struct tc3589x *tc3589x = dev_get_drvdata(pdev->dev.parent);
+	struct tc3589x_gpio_platform_data *pdata;
+	struct tc3589x_gpio *tc3589x_gpio;
 	int ret;
 	int irq;
 
-	pdata = tc35892->pdata->gpio;
+	pdata = tc3589x->pdata->gpio;
 	if (!pdata)
 		return -ENODEV;
 
@@ -283,107 +283,107 @@ static int __devinit tc35892_gpio_probe(struct platform_device *pdev)
 	if (irq < 0)
 		return irq;
 
-	tc35892_gpio = kzalloc(sizeof(struct tc35892_gpio), GFP_KERNEL);
-	if (!tc35892_gpio)
+	tc3589x_gpio = kzalloc(sizeof(struct tc3589x_gpio), GFP_KERNEL);
+	if (!tc3589x_gpio)
 		return -ENOMEM;
 
-	mutex_init(&tc35892_gpio->irq_lock);
+	mutex_init(&tc3589x_gpio->irq_lock);
 
-	tc35892_gpio->dev = &pdev->dev;
-	tc35892_gpio->tc35892 = tc35892;
+	tc3589x_gpio->dev = &pdev->dev;
+	tc3589x_gpio->tc3589x = tc3589x;
 
-	tc35892_gpio->chip = template_chip;
-	tc35892_gpio->chip.ngpio = tc35892->num_gpio;
-	tc35892_gpio->chip.dev = &pdev->dev;
-	tc35892_gpio->chip.base = pdata->gpio_base;
+	tc3589x_gpio->chip = template_chip;
+	tc3589x_gpio->chip.ngpio = tc3589x->num_gpio;
+	tc3589x_gpio->chip.dev = &pdev->dev;
+	tc3589x_gpio->chip.base = pdata->gpio_base;
 
-	tc35892_gpio->irq_base = tc35892->irq_base + TC35892_INT_GPIO(0);
+	tc3589x_gpio->irq_base = tc3589x->irq_base + TC3589x_INT_GPIO(0);
 
 	/* Bring the GPIO module out of reset */
-	ret = tc35892_set_bits(tc35892, TC35892_RSTCTRL,
-			       TC35892_RSTCTRL_GPIRST, 0);
+	ret = tc3589x_set_bits(tc3589x, TC3589x_RSTCTRL,
+			       TC3589x_RSTCTRL_GPIRST, 0);
 	if (ret < 0)
 		goto out_free;
 
-	ret = tc35892_gpio_irq_init(tc35892_gpio);
+	ret = tc3589x_gpio_irq_init(tc3589x_gpio);
 	if (ret)
 		goto out_free;
 
-	ret = request_threaded_irq(irq, NULL, tc35892_gpio_irq, IRQF_ONESHOT,
-				   "tc35892-gpio", tc35892_gpio);
+	ret = request_threaded_irq(irq, NULL, tc3589x_gpio_irq, IRQF_ONESHOT,
+				   "tc3589x-gpio", tc3589x_gpio);
 	if (ret) {
 		dev_err(&pdev->dev, "unable to get irq: %d\n", ret);
 		goto out_removeirq;
 	}
 
-	ret = gpiochip_add(&tc35892_gpio->chip);
+	ret = gpiochip_add(&tc3589x_gpio->chip);
 	if (ret) {
 		dev_err(&pdev->dev, "unable to add gpiochip: %d\n", ret);
 		goto out_freeirq;
 	}
 
 	if (pdata->setup)
-		pdata->setup(tc35892, tc35892_gpio->chip.base);
+		pdata->setup(tc3589x, tc3589x_gpio->chip.base);
 
-	platform_set_drvdata(pdev, tc35892_gpio);
+	platform_set_drvdata(pdev, tc3589x_gpio);
 
 	return 0;
 
 out_freeirq:
-	free_irq(irq, tc35892_gpio);
+	free_irq(irq, tc3589x_gpio);
 out_removeirq:
-	tc35892_gpio_irq_remove(tc35892_gpio);
+	tc3589x_gpio_irq_remove(tc3589x_gpio);
 out_free:
-	kfree(tc35892_gpio);
+	kfree(tc3589x_gpio);
 	return ret;
 }
 
-static int __devexit tc35892_gpio_remove(struct platform_device *pdev)
+static int __devexit tc3589x_gpio_remove(struct platform_device *pdev)
 {
-	struct tc35892_gpio *tc35892_gpio = platform_get_drvdata(pdev);
-	struct tc35892 *tc35892 = tc35892_gpio->tc35892;
-	struct tc35892_gpio_platform_data *pdata = tc35892->pdata->gpio;
+	struct tc3589x_gpio *tc3589x_gpio = platform_get_drvdata(pdev);
+	struct tc3589x *tc3589x = tc3589x_gpio->tc3589x;
+	struct tc3589x_gpio_platform_data *pdata = tc3589x->pdata->gpio;
 	int irq = platform_get_irq(pdev, 0);
 	int ret;
 
 	if (pdata->remove)
-		pdata->remove(tc35892, tc35892_gpio->chip.base);
+		pdata->remove(tc3589x, tc3589x_gpio->chip.base);
 
-	ret = gpiochip_remove(&tc35892_gpio->chip);
+	ret = gpiochip_remove(&tc3589x_gpio->chip);
 	if (ret < 0) {
-		dev_err(tc35892_gpio->dev,
+		dev_err(tc3589x_gpio->dev,
 			"unable to remove gpiochip: %d\n", ret);
 		return ret;
 	}
 
-	free_irq(irq, tc35892_gpio);
-	tc35892_gpio_irq_remove(tc35892_gpio);
+	free_irq(irq, tc3589x_gpio);
+	tc3589x_gpio_irq_remove(tc3589x_gpio);
 
 	platform_set_drvdata(pdev, NULL);
-	kfree(tc35892_gpio);
+	kfree(tc3589x_gpio);
 
 	return 0;
 }
 
-static struct platform_driver tc35892_gpio_driver = {
-	.driver.name	= "tc35892-gpio",
+static struct platform_driver tc3589x_gpio_driver = {
+	.driver.name	= "tc3589x-gpio",
 	.driver.owner	= THIS_MODULE,
-	.probe		= tc35892_gpio_probe,
-	.remove		= __devexit_p(tc35892_gpio_remove),
+	.probe		= tc3589x_gpio_probe,
+	.remove		= __devexit_p(tc3589x_gpio_remove),
 };
 
-static int __init tc35892_gpio_init(void)
+static int __init tc3589x_gpio_init(void)
 {
-	return platform_driver_register(&tc35892_gpio_driver);
+	return platform_driver_register(&tc3589x_gpio_driver);
 }
-subsys_initcall(tc35892_gpio_init);
+subsys_initcall(tc3589x_gpio_init);
 
-static void __exit tc35892_gpio_exit(void)
+static void __exit tc3589x_gpio_exit(void)
 {
-	platform_driver_unregister(&tc35892_gpio_driver);
+	platform_driver_unregister(&tc3589x_gpio_driver);
 }
-module_exit(tc35892_gpio_exit);
+module_exit(tc3589x_gpio_exit);
 
 MODULE_LICENSE("GPL v2");
-MODULE_DESCRIPTION("TC35892 GPIO driver");
+MODULE_DESCRIPTION("TC3589x GPIO driver");
 MODULE_AUTHOR("Hanumath Prasad, Rabin Vincent");
diff --git a/drivers/mfd/tc3589x.c b/drivers/mfd/tc3589x.c
index f230235c3765..7deff53181d1 100644
--- a/drivers/mfd/tc3589x.c
+++ b/drivers/mfd/tc3589x.c
@@ -15,141 +15,141 @@
 #include <linux/mfd/tc3589x.h>
 
 /**
- * tc35892_reg_read() - read a single TC35892 register
- * @tc35892:	Device to read from
+ * tc3589x_reg_read() - read a single TC3589x register
+ * @tc3589x:	Device to read from
  * @reg:	Register to read
  */
-int tc35892_reg_read(struct tc35892 *tc35892, u8 reg)
+int tc3589x_reg_read(struct tc3589x *tc3589x, u8 reg)
 {
 	int ret;
 
-	ret = i2c_smbus_read_byte_data(tc35892->i2c, reg);
+	ret = i2c_smbus_read_byte_data(tc3589x->i2c, reg);
 	if (ret < 0)
-		dev_err(tc35892->dev, "failed to read reg %#x: %d\n",
+		dev_err(tc3589x->dev, "failed to read reg %#x: %d\n",
 			reg, ret);
 
 	return ret;
 }
-EXPORT_SYMBOL_GPL(tc35892_reg_read);
+EXPORT_SYMBOL_GPL(tc3589x_reg_read);
 
 /**
- * tc35892_reg_read() - write a single TC35892 register
- * @tc35892:	Device to write to
+ * tc3589x_reg_read() - write a single TC3589x register
+ * @tc3589x:	Device to write to
  * @reg:	Register to read
  * @data:	Value to write
  */
-int tc35892_reg_write(struct tc35892 *tc35892, u8 reg, u8 data)
+int tc3589x_reg_write(struct tc3589x *tc3589x, u8 reg, u8 data)
 {
 	int ret;
 
-	ret = i2c_smbus_write_byte_data(tc35892->i2c, reg, data);
+	ret = i2c_smbus_write_byte_data(tc3589x->i2c, reg, data);
 	if (ret < 0)
-		dev_err(tc35892->dev, "failed to write reg %#x: %d\n",
+		dev_err(tc3589x->dev, "failed to write reg %#x: %d\n",
 			reg, ret);
 
 	return ret;
 }
-EXPORT_SYMBOL_GPL(tc35892_reg_write);
+EXPORT_SYMBOL_GPL(tc3589x_reg_write);
 
 /**
- * tc35892_block_read() - read multiple TC35892 registers
- * @tc35892:	Device to read from
+ * tc3589x_block_read() - read multiple TC3589x registers
+ * @tc3589x:	Device to read from
  * @reg:	First register
  * @length:	Number of registers
  * @values:	Buffer to write to
  */
-int tc35892_block_read(struct tc35892 *tc35892, u8 reg, u8 length, u8 *values)
+int tc3589x_block_read(struct tc3589x *tc3589x, u8 reg, u8 length, u8 *values)
 {
 	int ret;
 
-	ret = i2c_smbus_read_i2c_block_data(tc35892->i2c, reg, length, values);
+	ret = i2c_smbus_read_i2c_block_data(tc3589x->i2c, reg, length, values);
 	if (ret < 0)
-		dev_err(tc35892->dev, "failed to read regs %#x: %d\n",
+		dev_err(tc3589x->dev, "failed to read regs %#x: %d\n",
 			reg, ret);
 
 	return ret;
 }
-EXPORT_SYMBOL_GPL(tc35892_block_read);
+EXPORT_SYMBOL_GPL(tc3589x_block_read);
 
 /**
- * tc35892_block_write() - write multiple TC35892 registers
- * @tc35892:	Device to write to
+ * tc3589x_block_write() - write multiple TC3589x registers
+ * @tc3589x:	Device to write to
  * @reg:	First register
  * @length:	Number of registers
  * @values:	Values to write
  */
-int tc35892_block_write(struct tc35892 *tc35892, u8 reg, u8 length,
+int tc3589x_block_write(struct tc3589x *tc3589x, u8 reg, u8 length,
 			const u8 *values)
 {
 	int ret;
 
-	ret = i2c_smbus_write_i2c_block_data(tc35892->i2c, reg, length,
+	ret = i2c_smbus_write_i2c_block_data(tc3589x->i2c, reg, length,
 					     values);
 	if (ret < 0)
-		dev_err(tc35892->dev, "failed to write regs %#x: %d\n",
+		dev_err(tc3589x->dev, "failed to write regs %#x: %d\n",
 			reg, ret);
 
 	return ret;
 }
-EXPORT_SYMBOL_GPL(tc35892_block_write);
+EXPORT_SYMBOL_GPL(tc3589x_block_write);
 
 /**
- * tc35892_set_bits() - set the value of a bitfield in a TC35892 register
- * @tc35892:	Device to write to
+ * tc3589x_set_bits() - set the value of a bitfield in a TC3589x register
+ * @tc3589x:	Device to write to
  * @reg:	Register to write
  * @mask:	Mask of bits to set
  * @values:	Value to set
  */
-int tc35892_set_bits(struct tc35892 *tc35892, u8 reg, u8 mask, u8 val)
+int tc3589x_set_bits(struct tc3589x *tc3589x, u8 reg, u8 mask, u8 val)
 {
 	int ret;
 
-	mutex_lock(&tc35892->lock);
+	mutex_lock(&tc3589x->lock);
 
-	ret = tc35892_reg_read(tc35892, reg);
+	ret = tc3589x_reg_read(tc3589x, reg);
 	if (ret < 0)
 		goto out;
 
 	ret &= ~mask;
 	ret |= val;
 
-	ret = tc35892_reg_write(tc35892, reg, ret);
+	ret = tc3589x_reg_write(tc3589x, reg, ret);
 
 out:
-	mutex_unlock(&tc35892->lock);
+	mutex_unlock(&tc3589x->lock);
 	return ret;
 }
-EXPORT_SYMBOL_GPL(tc35892_set_bits);
+EXPORT_SYMBOL_GPL(tc3589x_set_bits);
 
 static struct resource gpio_resources[] = {
 	{
-		.start	= TC35892_INT_GPIIRQ,
-		.end	= TC35892_INT_GPIIRQ,
+		.start	= TC3589x_INT_GPIIRQ,
+		.end	= TC3589x_INT_GPIIRQ,
 		.flags	= IORESOURCE_IRQ,
 	},
 };
 
-static struct mfd_cell tc35892_devs[] = {
+static struct mfd_cell tc3589x_devs[] = {
 	{
-		.name		= "tc35892-gpio",
+		.name		= "tc3589x-gpio",
 		.num_resources	= ARRAY_SIZE(gpio_resources),
 		.resources	= &gpio_resources[0],
 	},
 };
 
-static irqreturn_t tc35892_irq(int irq, void *data)
+static irqreturn_t tc3589x_irq(int irq, void *data)
 {
-	struct tc35892 *tc35892 = data;
+	struct tc3589x *tc3589x = data;
 	int status;
 
-	status = tc35892_reg_read(tc35892, TC35892_IRQST);
+	status = tc3589x_reg_read(tc3589x, TC3589x_IRQST);
 	if (status < 0)
 		return IRQ_NONE;
 
 	while (status) {
 		int bit = __ffs(status);
 
-		handle_nested_irq(tc35892->irq_base + bit);
+		handle_nested_irq(tc3589x->irq_base + bit);
 		status &= ~(1 << bit);
 	}
 
@@ -158,30 +158,30 @@ static irqreturn_t tc35892_irq(int irq, void *data)
 	 * have the last interrupt clear (for example, GPIO IC write) take
 	 * effect.
 	 */
-	tc35892_reg_read(tc35892, TC35892_IRQST);
+	tc3589x_reg_read(tc3589x, TC3589x_IRQST);
 
 	return IRQ_HANDLED;
 }
 
-static void tc35892_irq_dummy(unsigned int irq)
+static void tc3589x_irq_dummy(unsigned int irq)
 {
 	/* No mask/unmask at this level */
 }
 
-static struct irq_chip tc35892_irq_chip = {
-	.name	= "tc35892",
-	.mask	= tc35892_irq_dummy,
-	.unmask	= tc35892_irq_dummy,
+static struct irq_chip tc3589x_irq_chip = {
+	.name	= "tc3589x",
+	.mask	= tc3589x_irq_dummy,
+	.unmask	= tc3589x_irq_dummy,
 };
 
-static int tc35892_irq_init(struct tc35892 *tc35892)
+static int tc3589x_irq_init(struct tc3589x *tc3589x)
 {
-	int base = tc35892->irq_base;
+	int base = tc3589x->irq_base;
 	int irq;
 
-	for (irq = base; irq < base + TC35892_NR_INTERNAL_IRQS; irq++) {
-		set_irq_chip_data(irq, tc35892);
-		set_irq_chip_and_handler(irq, &tc35892_irq_chip,
+	for (irq = base; irq < base + TC3589x_NR_INTERNAL_IRQS; irq++) {
+		set_irq_chip_data(irq, tc3589x);
+		set_irq_chip_and_handler(irq, &tc3589x_irq_chip,
 					 handle_edge_irq);
 		set_irq_nested_thread(irq, 1);
 #ifdef CONFIG_ARM
@@ -194,12 +194,12 @@ static int tc35892_irq_init(struct tc35892 *tc35892)
 	return 0;
 }
 
-static void tc35892_irq_remove(struct tc35892 *tc35892)
+static void tc3589x_irq_remove(struct tc3589x *tc3589x)
 {
-	int base = tc35892->irq_base;
+	int base = tc3589x->irq_base;
 	int irq;
 
-	for (irq = base; irq < base + TC35892_NR_INTERNAL_IRQS; irq++) {
+	for (irq = base; irq < base + TC3589x_NR_INTERNAL_IRQS; irq++) {
 #ifdef CONFIG_ARM
 		set_irq_flags(irq, 0);
 #endif
@@ -208,138 +208,138 @@ static void tc35892_irq_remove(struct tc35892 *tc35892)
 	}
 }
 
-static int tc35892_chip_init(struct tc35892 *tc35892)
+static int tc3589x_chip_init(struct tc3589x *tc3589x)
 {
 	int manf, ver, ret;
 
-	manf = tc35892_reg_read(tc35892, TC35892_MANFCODE);
+	manf = tc3589x_reg_read(tc3589x, TC3589x_MANFCODE);
 	if (manf < 0)
 		return manf;
 
-	ver = tc35892_reg_read(tc35892, TC35892_VERSION);
+	ver = tc3589x_reg_read(tc3589x, TC3589x_VERSION);
 	if (ver < 0)
 		return ver;
 
-	if (manf != TC35892_MANFCODE_MAGIC) {
-		dev_err(tc35892->dev, "unknown manufacturer: %#x\n", manf);
+	if (manf != TC3589x_MANFCODE_MAGIC) {
+		dev_err(tc3589x->dev, "unknown manufacturer: %#x\n", manf);
 		return -EINVAL;
 	}
 
-	dev_info(tc35892->dev, "manufacturer: %#x, version: %#x\n", manf, ver);
+	dev_info(tc3589x->dev, "manufacturer: %#x, version: %#x\n", manf, ver);
 
 	/* Put everything except the IRQ module into reset */
-	ret = tc35892_reg_write(tc35892, TC35892_RSTCTRL,
-				TC35892_RSTCTRL_TIMRST
-				| TC35892_RSTCTRL_ROTRST
-				| TC35892_RSTCTRL_KBDRST
-				| TC35892_RSTCTRL_GPIRST);
+	ret = tc3589x_reg_write(tc3589x, TC3589x_RSTCTRL,
+				TC3589x_RSTCTRL_TIMRST
+				| TC3589x_RSTCTRL_ROTRST
+				| TC3589x_RSTCTRL_KBDRST
+				| TC3589x_RSTCTRL_GPIRST);
 	if (ret < 0)
 		return ret;
 
 	/* Clear the reset interrupt. */
-	return tc35892_reg_write(tc35892, TC35892_RSTINTCLR, 0x1);
+	return tc3589x_reg_write(tc3589x, TC3589x_RSTINTCLR, 0x1);
 }
 
-static int __devinit tc35892_probe(struct i2c_client *i2c,
+static int __devinit tc3589x_probe(struct i2c_client *i2c,
 				   const struct i2c_device_id *id)
 {
-	struct tc35892_platform_data *pdata = i2c->dev.platform_data;
-	struct tc35892 *tc35892;
+	struct tc3589x_platform_data *pdata = i2c->dev.platform_data;
+	struct tc3589x *tc3589x;
 	int ret;
 
 	if (!i2c_check_functionality(i2c->adapter, I2C_FUNC_SMBUS_BYTE_DATA
 				     | I2C_FUNC_SMBUS_I2C_BLOCK))
 		return -EIO;
 
-	tc35892 = kzalloc(sizeof(struct tc35892), GFP_KERNEL);
-	if (!tc35892)
+	tc3589x = kzalloc(sizeof(struct tc3589x), GFP_KERNEL);
+	if (!tc3589x)
 		return -ENOMEM;
 
-	mutex_init(&tc35892->lock);
+	mutex_init(&tc3589x->lock);
 
-	tc35892->dev = &i2c->dev;
-	tc35892->i2c = i2c;
-	tc35892->pdata = pdata;
-	tc35892->irq_base = pdata->irq_base;
-	tc35892->num_gpio = id->driver_data;
+	tc3589x->dev = &i2c->dev;
+	tc3589x->i2c = i2c;
+	tc3589x->pdata = pdata;
+	tc3589x->irq_base = pdata->irq_base;
+	tc3589x->num_gpio = id->driver_data;
 
-	i2c_set_clientdata(i2c, tc35892);
+	i2c_set_clientdata(i2c, tc3589x);
 
-	ret = tc35892_chip_init(tc35892);
+	ret = tc3589x_chip_init(tc3589x);
 	if (ret)
 		goto out_free;
 
-	ret = tc35892_irq_init(tc35892);
+	ret = tc3589x_irq_init(tc3589x);
 	if (ret)
 		goto out_free;
 
-	ret = request_threaded_irq(tc35892->i2c->irq, NULL, tc35892_irq,
+	ret = request_threaded_irq(tc3589x->i2c->irq, NULL, tc3589x_irq,
 				   IRQF_TRIGGER_FALLING | IRQF_ONESHOT,
-				   "tc35892", tc35892);
+				   "tc3589x", tc3589x);
 	if (ret) {
-		dev_err(tc35892->dev, "failed to request IRQ: %d\n", ret);
+		dev_err(tc3589x->dev, "failed to request IRQ: %d\n", ret);
 		goto out_removeirq;
 	}
 
-	ret = mfd_add_devices(tc35892->dev, -1, tc35892_devs,
-			      ARRAY_SIZE(tc35892_devs), NULL,
-			      tc35892->irq_base);
+	ret = mfd_add_devices(tc3589x->dev, -1, tc3589x_devs,
+			      ARRAY_SIZE(tc3589x_devs), NULL,
+			      tc3589x->irq_base);
 	if (ret) {
-		dev_err(tc35892->dev, "failed to add children\n");
+		dev_err(tc3589x->dev, "failed to add children\n");
 		goto out_freeirq;
 	}
 
 	return 0;
 
 out_freeirq:
-	free_irq(tc35892->i2c->irq, tc35892);
+	free_irq(tc3589x->i2c->irq, tc3589x);
 out_removeirq:
-	tc35892_irq_remove(tc35892);
+	tc3589x_irq_remove(tc3589x);
 out_free:
-	kfree(tc35892);
+	kfree(tc3589x);
 	return ret;
 }
 
-static int __devexit tc35892_remove(struct i2c_client *client)
+static int __devexit tc3589x_remove(struct i2c_client *client)
 {
-	struct tc35892 *tc35892 = i2c_get_clientdata(client);
+	struct tc3589x *tc3589x = i2c_get_clientdata(client);
 
-	mfd_remove_devices(tc35892->dev);
+	mfd_remove_devices(tc3589x->dev);
 
-	free_irq(tc35892->i2c->irq, tc35892);
-	tc35892_irq_remove(tc35892);
+	free_irq(tc3589x->i2c->irq, tc3589x);
+	tc3589x_irq_remove(tc3589x);
 
-	kfree(tc35892);
+	kfree(tc3589x);
 
 	return 0;
 }
 
-static const struct i2c_device_id tc35892_id[] = {
-	{ "tc35892", 24 },
+static const struct i2c_device_id tc3589x_id[] = {
+	{ "tc3589x", 24 },
 	{ }
 };
-MODULE_DEVICE_TABLE(i2c, tc35892_id);
+MODULE_DEVICE_TABLE(i2c, tc3589x_id);
 
-static struct i2c_driver tc35892_driver = {
-	.driver.name	= "tc35892",
+static struct i2c_driver tc3589x_driver = {
+	.driver.name	= "tc3589x",
 	.driver.owner	= THIS_MODULE,
-	.probe		= tc35892_probe,
-	.remove		= __devexit_p(tc35892_remove),
-	.id_table	= tc35892_id,
+	.probe		= tc3589x_probe,
+	.remove		= __devexit_p(tc3589x_remove),
+	.id_table	= tc3589x_id,
 };
 
-static int __init tc35892_init(void)
+static int __init tc3589x_init(void)
 {
-	return i2c_add_driver(&tc35892_driver);
+	return i2c_add_driver(&tc3589x_driver);
 }
-subsys_initcall(tc35892_init);
+subsys_initcall(tc3589x_init);
 
-static void __exit tc35892_exit(void)
+static void __exit tc3589x_exit(void)
 {
-	i2c_del_driver(&tc35892_driver);
+	i2c_del_driver(&tc3589x_driver);
 }
-module_exit(tc35892_exit);
+module_exit(tc3589x_exit);
 
 MODULE_LICENSE("GPL v2");
-MODULE_DESCRIPTION("TC35892 MFD core driver");
+MODULE_DESCRIPTION("TC3589x MFD core driver");
 MODULE_AUTHOR("Hanumath Prasad, Rabin Vincent");
diff --git a/include/linux/mfd/tc3589x.h b/include/linux/mfd/tc3589x.h
index eff3094ca84e..ea1918896f5b 100644
--- a/include/linux/mfd/tc3589x.h
+++ b/include/linux/mfd/tc3589x.h
@@ -4,133 +4,133 @@
  * License Terms: GNU General Public License, version 2
  */
 
-#ifndef __LINUX_MFD_TC35892_H
-#define __LINUX_MFD_TC35892_H
+#ifndef __LINUX_MFD_TC3589x_H
+#define __LINUX_MFD_TC3589x_H
 
 #include <linux/device.h>
 
-#define TC35892_RSTCTRL_IRQRST	(1 << 4)
-#define TC35892_RSTCTRL_TIMRST	(1 << 3)
-#define TC35892_RSTCTRL_ROTRST	(1 << 2)
-#define TC35892_RSTCTRL_KBDRST	(1 << 1)
-#define TC35892_RSTCTRL_GPIRST	(1 << 0)
-
-#define TC35892_IRQST		0x91
-
-#define TC35892_MANFCODE_MAGIC	0x03
-#define TC35892_MANFCODE	0x80
-#define TC35892_VERSION		0x81
-#define TC35892_IOCFG		0xA7
-
-#define TC35892_CLKMODE		0x88
-#define TC35892_CLKCFG		0x89
-#define TC35892_CLKEN		0x8A
-
-#define TC35892_RSTCTRL		0x82
-#define TC35892_EXTRSTN		0x83
-#define TC35892_RSTINTCLR	0x84
-
-#define TC35892_GPIOIS0		0xC9
-#define TC35892_GPIOIS1		0xCA
-#define TC35892_GPIOIS2		0xCB
-#define TC35892_GPIOIBE0	0xCC
-#define TC35892_GPIOIBE1	0xCD
-#define TC35892_GPIOIBE2	0xCE
-#define TC35892_GPIOIEV0	0xCF
-#define TC35892_GPIOIEV1	0xD0
-#define TC35892_GPIOIEV2	0xD1
-#define TC35892_GPIOIE0		0xD2
-#define TC35892_GPIOIE1		0xD3
-#define TC35892_GPIOIE2		0xD4
-#define TC35892_GPIORIS0	0xD6
-#define TC35892_GPIORIS1	0xD7
-#define TC35892_GPIORIS2	0xD8
-#define TC35892_GPIOMIS0	0xD9
-#define TC35892_GPIOMIS1	0xDA
-#define TC35892_GPIOMIS2	0xDB
-#define TC35892_GPIOIC0		0xDC
-#define TC35892_GPIOIC1		0xDD
-#define TC35892_GPIOIC2		0xDE
-
-#define TC35892_GPIODATA0	0xC0
-#define TC35892_GPIOMASK0	0xc1
-#define TC35892_GPIODATA1	0xC2
-#define TC35892_GPIOMASK1	0xc3
-#define TC35892_GPIODATA2	0xC4
-#define TC35892_GPIOMASK2	0xC5
-
-#define TC35892_GPIODIR0	0xC6
-#define TC35892_GPIODIR1	0xC7
-#define TC35892_GPIODIR2	0xC8
-
-#define TC35892_GPIOSYNC0	0xE6
-#define TC35892_GPIOSYNC1	0xE7
-#define TC35892_GPIOSYNC2	0xE8
-
-#define TC35892_GPIOWAKE0	0xE9
-#define TC35892_GPIOWAKE1	0xEA
-#define TC35892_GPIOWAKE2	0xEB
-
-#define TC35892_GPIOODM0	0xE0
-#define TC35892_GPIOODE0	0xE1
-#define TC35892_GPIOODM1	0xE2
-#define TC35892_GPIOODE1	0xE3
-#define TC35892_GPIOODM2	0xE4
-#define TC35892_GPIOODE2	0xE5
-
-#define TC35892_INT_GPIIRQ	0
-#define TC35892_INT_TI0IRQ	1
-#define TC35892_INT_TI1IRQ	2
-#define TC35892_INT_TI2IRQ	3
-#define TC35892_INT_ROTIRQ	5
-#define TC35892_INT_KBDIRQ	6
-#define TC35892_INT_PORIRQ	7
-
-#define TC35892_NR_INTERNAL_IRQS	8
-#define TC35892_INT_GPIO(x)	(TC35892_NR_INTERNAL_IRQS + (x))
-
-struct tc35892 {
+#define TC3589x_RSTCTRL_IRQRST	(1 << 4)
+#define TC3589x_RSTCTRL_TIMRST	(1 << 3)
+#define TC3589x_RSTCTRL_ROTRST	(1 << 2)
+#define TC3589x_RSTCTRL_KBDRST	(1 << 1)
+#define TC3589x_RSTCTRL_GPIRST	(1 << 0)
+
+#define TC3589x_IRQST		0x91
+
+#define TC3589x_MANFCODE_MAGIC	0x03
+#define TC3589x_MANFCODE	0x80
+#define TC3589x_VERSION		0x81
+#define TC3589x_IOCFG		0xA7
+
+#define TC3589x_CLKMODE		0x88
+#define TC3589x_CLKCFG		0x89
+#define TC3589x_CLKEN		0x8A
+
+#define TC3589x_RSTCTRL		0x82
+#define TC3589x_EXTRSTN		0x83
+#define TC3589x_RSTINTCLR	0x84
+
+#define TC3589x_GPIOIS0		0xC9
+#define TC3589x_GPIOIS1		0xCA
+#define TC3589x_GPIOIS2		0xCB
+#define TC3589x_GPIOIBE0	0xCC
+#define TC3589x_GPIOIBE1	0xCD
+#define TC3589x_GPIOIBE2	0xCE
+#define TC3589x_GPIOIEV0	0xCF
+#define TC3589x_GPIOIEV1	0xD0
+#define TC3589x_GPIOIEV2	0xD1
+#define TC3589x_GPIOIE0		0xD2
+#define TC3589x_GPIOIE1		0xD3
+#define TC3589x_GPIOIE2		0xD4
+#define TC3589x_GPIORIS0	0xD6
+#define TC3589x_GPIORIS1	0xD7
+#define TC3589x_GPIORIS2	0xD8
+#define TC3589x_GPIOMIS0	0xD9
+#define TC3589x_GPIOMIS1	0xDA
+#define TC3589x_GPIOMIS2	0xDB
+#define TC3589x_GPIOIC0		0xDC
+#define TC3589x_GPIOIC1		0xDD
+#define TC3589x_GPIOIC2		0xDE
+
+#define TC3589x_GPIODATA0	0xC0
+#define TC3589x_GPIOMASK0	0xc1
+#define TC3589x_GPIODATA1	0xC2
+#define TC3589x_GPIOMASK1	0xc3
+#define TC3589x_GPIODATA2	0xC4
+#define TC3589x_GPIOMASK2	0xC5
+
+#define TC3589x_GPIODIR0	0xC6
+#define TC3589x_GPIODIR1	0xC7
+#define TC3589x_GPIODIR2	0xC8
+
+#define TC3589x_GPIOSYNC0	0xE6
+#define TC3589x_GPIOSYNC1	0xE7
+#define TC3589x_GPIOSYNC2	0xE8
+
+#define TC3589x_GPIOWAKE0	0xE9
+#define TC3589x_GPIOWAKE1	0xEA
+#define TC3589x_GPIOWAKE2	0xEB
+
+#define TC3589x_GPIOODM0	0xE0
+#define TC3589x_GPIOODE0	0xE1
+#define TC3589x_GPIOODM1	0xE2
+#define TC3589x_GPIOODE1	0xE3
+#define TC3589x_GPIOODM2	0xE4
+#define TC3589x_GPIOODE2	0xE5
+
+#define TC3589x_INT_GPIIRQ	0
+#define TC3589x_INT_TI0IRQ	1
+#define TC3589x_INT_TI1IRQ	2
+#define TC3589x_INT_TI2IRQ	3
+#define TC3589x_INT_ROTIRQ	5
+#define TC3589x_INT_KBDIRQ	6
+#define TC3589x_INT_PORIRQ	7
+
+#define TC3589x_NR_INTERNAL_IRQS	8
+#define TC3589x_INT_GPIO(x)	(TC3589x_NR_INTERNAL_IRQS + (x))
+
+struct tc3589x {
 	struct mutex lock;
 	struct device *dev;
 	struct i2c_client *i2c;
 
 	int irq_base;
 	int num_gpio;
-	struct tc35892_platform_data *pdata;
+	struct tc3589x_platform_data *pdata;
 };
 
-extern int tc35892_reg_write(struct tc35892 *tc35892, u8 reg, u8 data);
-extern int tc35892_reg_read(struct tc35892 *tc35892, u8 reg);
-extern int tc35892_block_read(struct tc35892 *tc35892, u8 reg, u8 length,
+extern int tc3589x_reg_write(struct tc3589x *tc3589x, u8 reg, u8 data);
+extern int tc3589x_reg_read(struct tc3589x *tc3589x, u8 reg);
+extern int tc3589x_block_read(struct tc3589x *tc3589x, u8 reg, u8 length,
 			      u8 *values);
-extern int tc35892_block_write(struct tc35892 *tc35892, u8 reg, u8 length,
+extern int tc3589x_block_write(struct tc3589x *tc3589x, u8 reg, u8 length,
 			       const u8 *values);
-extern int tc35892_set_bits(struct tc35892 *tc35892, u8 reg, u8 mask, u8 val);
+extern int tc3589x_set_bits(struct tc3589x *tc3589x, u8 reg, u8 mask, u8 val);
 
 /**
- * struct tc35892_gpio_platform_data - TC35892 GPIO platform data
- * @gpio_base: first gpio number assigned to TC35892.  A maximum of
- *	       %TC35892_NR_GPIOS GPIOs will be allocated.
+ * struct tc3589x_gpio_platform_data - TC3589x GPIO platform data
+ * @gpio_base: first gpio number assigned to TC3589x.  A maximum of
+ *	       %TC3589x_NR_GPIOS GPIOs will be allocated.
  * @setup: callback for board-specific initialization
  * @remove: callback for board-specific teardown
  */
-struct tc35892_gpio_platform_data {
+struct tc3589x_gpio_platform_data {
 	int gpio_base;
-	void (*setup)(struct tc35892 *tc35892, unsigned gpio_base);
-	void (*remove)(struct tc35892 *tc35892, unsigned gpio_base);
+	void (*setup)(struct tc3589x *tc3589x, unsigned gpio_base);
+	void (*remove)(struct tc3589x *tc3589x, unsigned gpio_base);
 };
 
 /**
- * struct tc35892_platform_data - TC35892 platform data
- * @irq_base: base IRQ number.  %TC35892_NR_IRQS irqs will be used.
+ * struct tc3589x_platform_data - TC3589x platform data
+ * @irq_base: base IRQ number.  %TC3589x_NR_IRQS irqs will be used.
  * @gpio: GPIO-specific platform data
  */
-struct tc35892_platform_data {
+struct tc3589x_platform_data {
 	int irq_base;
-	struct tc35892_gpio_platform_data *gpio;
+	struct tc3589x_gpio_platform_data *gpio;
 };
 
-#define TC35892_NR_GPIOS	24
-#define TC35892_NR_IRQS		TC35892_INT_GPIO(TC35892_NR_GPIOS)
+#define TC3589x_NR_GPIOS	24
+#define TC3589x_NR_IRQS		TC3589x_INT_GPIO(TC3589x_NR_GPIOS)
 
 #endif
-- 
cgit v1.2.3-71-gd317


From 611b7590afa6e6c6b0942b1d3efef17fbb348ef5 Mon Sep 17 00:00:00 2001
From: Sundar Iyer <sundar.iyer@stericsson.com>
Date: Mon, 13 Dec 2010 09:33:15 +0530
Subject: mfd/tc3589x: add block identifier for multiple child devices

Add block identifier to be able to add multiple mfd clients
to the mfd core

Acked-by: Samuel Ortiz <sameo@linux.intel.com>
Signed-off-by: Sundar Iyer <sundar.iyer@stericsson.com>
Signed-off-by: Linus Walleij <linus.walleij@stericsson.com>
---
 arch/arm/mach-ux500/board-mop500.c |  1 +
 drivers/mfd/tc3589x.c              | 28 +++++++++++++++++++++++-----
 include/linux/mfd/tc3589x.h        |  7 +++++++
 3 files changed, 31 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm/mach-ux500/board-mop500.c b/arch/arm/mach-ux500/board-mop500.c
index 5c950261968e..060b23aab8e4 100644
--- a/arch/arm/mach-ux500/board-mop500.c
+++ b/arch/arm/mach-ux500/board-mop500.c
@@ -122,6 +122,7 @@ static struct tc3589x_gpio_platform_data mop500_tc35892_gpio_data = {
 };
 
 static struct tc3589x_platform_data mop500_tc35892_data = {
+	.block		= TC3589x_BLOCK_GPIO,
 	.gpio		= &mop500_tc35892_gpio_data,
 	.irq_base	= MOP500_EGPIO_IRQ_BASE,
 };
diff --git a/drivers/mfd/tc3589x.c b/drivers/mfd/tc3589x.c
index 7deff53181d1..0ed9669d95fa 100644
--- a/drivers/mfd/tc3589x.c
+++ b/drivers/mfd/tc3589x.c
@@ -129,7 +129,7 @@ static struct resource gpio_resources[] = {
 	},
 };
 
-static struct mfd_cell tc3589x_devs[] = {
+static struct mfd_cell tc3589x_dev_gpio[] = {
 	{
 		.name		= "tc3589x-gpio",
 		.num_resources	= ARRAY_SIZE(gpio_resources),
@@ -240,6 +240,26 @@ static int tc3589x_chip_init(struct tc3589x *tc3589x)
 	return tc3589x_reg_write(tc3589x, TC3589x_RSTINTCLR, 0x1);
 }
 
+static int __devinit tc3589x_device_init(struct tc3589x *tc3589x)
+{
+	int ret = 0;
+	unsigned int blocks = tc3589x->pdata->block;
+
+	if (blocks & TC3589x_BLOCK_GPIO) {
+		ret = mfd_add_devices(tc3589x->dev, -1, tc3589x_dev_gpio,
+				ARRAY_SIZE(tc3589x_dev_gpio), NULL,
+				tc3589x->irq_base);
+		if (ret) {
+			dev_err(tc3589x->dev, "failed to add gpio child\n");
+			return ret;
+		}
+		dev_info(tc3589x->dev, "added gpio block\n");
+	}
+
+	return ret;
+
+}
+
 static int __devinit tc3589x_probe(struct i2c_client *i2c,
 				   const struct i2c_device_id *id)
 {
@@ -281,11 +301,9 @@ static int __devinit tc3589x_probe(struct i2c_client *i2c,
 		goto out_removeirq;
 	}
 
-	ret = mfd_add_devices(tc3589x->dev, -1, tc3589x_devs,
-			      ARRAY_SIZE(tc3589x_devs), NULL,
-			      tc3589x->irq_base);
+	ret = tc3589x_device_init(tc3589x);
 	if (ret) {
-		dev_err(tc3589x->dev, "failed to add children\n");
+		dev_err(tc3589x->dev, "failed to add child devices\n");
 		goto out_freeirq;
 	}
 
diff --git a/include/linux/mfd/tc3589x.h b/include/linux/mfd/tc3589x.h
index ea1918896f5b..da00958b12d9 100644
--- a/include/linux/mfd/tc3589x.h
+++ b/include/linux/mfd/tc3589x.h
@@ -9,6 +9,11 @@
 
 #include <linux/device.h>
 
+enum tx3589x_block {
+	TC3589x_BLOCK_GPIO        = 1 << 0,
+	TC3589x_BLOCK_KEYPAD      = 1 << 1,
+};
+
 #define TC3589x_RSTCTRL_IRQRST	(1 << 4)
 #define TC3589x_RSTCTRL_TIMRST	(1 << 3)
 #define TC3589x_RSTCTRL_ROTRST	(1 << 2)
@@ -122,10 +127,12 @@ struct tc3589x_gpio_platform_data {
 
 /**
  * struct tc3589x_platform_data - TC3589x platform data
+ * @block: bitmask of blocks to enable (use TC3589x_BLOCK_*)
  * @irq_base: base IRQ number.  %TC3589x_NR_IRQS irqs will be used.
  * @gpio: GPIO-specific platform data
  */
 struct tc3589x_platform_data {
+	unsigned int block;
 	int irq_base;
 	struct tc3589x_gpio_platform_data *gpio;
 };
-- 
cgit v1.2.3-71-gd317


From 85b7720039fc000b561c20fe2aaa3b54cddae4a7 Mon Sep 17 00:00:00 2001
From: Henrik Rydberg <rydberg@euromail.se>
Date: Sat, 18 Dec 2010 20:51:13 +0100
Subject: Input: introduce device properties

Today, userspace sets up an input device based on the data it emits.
This is not always enough; a tablet and a touchscreen may emit exactly
the same data, for instance, but the former should be set up with a
pointer whereas the latter does not need to. Recently, a new type of
touchpad has emerged where the buttons are under the pad, which
changes logic without changing the emitted data. This patch introduces
a new ioctl, EVIOCGPROP, which enables user access to a set of device
properties useful during setup. The properties are given as a bitmap
in the same fashion as the event types, and are also made available
via sysfs, uevent and /proc/bus/input/devices.

Acked-by: Ping Cheng <pingc@wacom.com>
Acked-by: Chase Douglas <chase.douglas@canonical.com>
Acked-by: Dmitry Torokhov <dtor@mail.ru>
Signed-off-by: Henrik Rydberg <rydberg@euromail.se>
---
 drivers/input/evdev.c       |  4 ++++
 drivers/input/input.c       | 19 +++++++++++++++++++
 drivers/input/misc/uinput.c |  4 ++++
 include/linux/input.h       | 16 ++++++++++++++++
 include/linux/uinput.h      |  1 +
 5 files changed, 44 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/input/evdev.c b/drivers/input/evdev.c
index e3f7fc6f9565..0cd97e8f0c9a 100644
--- a/drivers/input/evdev.c
+++ b/drivers/input/evdev.c
@@ -677,6 +677,10 @@ static long evdev_do_ioctl(struct file *file, unsigned int cmd,
 #define EVIOC_MASK_SIZE(nr)	((nr) & ~(_IOC_SIZEMASK << _IOC_SIZESHIFT))
 	switch (EVIOC_MASK_SIZE(cmd)) {
 
+	case EVIOCGPROP(0):
+		return bits_to_user(dev->propbit, INPUT_PROP_MAX,
+				    size, p, compat_mode);
+
 	case EVIOCGKEY(0):
 		return bits_to_user(dev->key, KEY_MAX, size, p, compat_mode);
 
diff --git a/drivers/input/input.c b/drivers/input/input.c
index 37708d1d86ec..9ea713f4192b 100644
--- a/drivers/input/input.c
+++ b/drivers/input/input.c
@@ -1095,6 +1095,8 @@ static int input_devices_seq_show(struct seq_file *seq, void *v)
 		seq_printf(seq, "%s ", handle->name);
 	seq_putc(seq, '\n');
 
+	input_seq_print_bitmap(seq, "PROP", dev->propbit, INPUT_PROP_MAX);
+
 	input_seq_print_bitmap(seq, "EV", dev->evbit, EV_MAX);
 	if (test_bit(EV_KEY, dev->evbit))
 		input_seq_print_bitmap(seq, "KEY", dev->keybit, KEY_MAX);
@@ -1318,11 +1320,26 @@ static ssize_t input_dev_show_modalias(struct device *dev,
 }
 static DEVICE_ATTR(modalias, S_IRUGO, input_dev_show_modalias, NULL);
 
+static int input_print_bitmap(char *buf, int buf_size, unsigned long *bitmap,
+			      int max, int add_cr);
+
+static ssize_t input_dev_show_properties(struct device *dev,
+					 struct device_attribute *attr,
+					 char *buf)
+{
+	struct input_dev *input_dev = to_input_dev(dev);
+	int len = input_print_bitmap(buf, PAGE_SIZE, input_dev->propbit,
+				     INPUT_PROP_MAX, true);
+	return min_t(int, len, PAGE_SIZE);
+}
+static DEVICE_ATTR(properties, S_IRUGO, input_dev_show_properties, NULL);
+
 static struct attribute *input_dev_attrs[] = {
 	&dev_attr_name.attr,
 	&dev_attr_phys.attr,
 	&dev_attr_uniq.attr,
 	&dev_attr_modalias.attr,
+	&dev_attr_properties.attr,
 	NULL
 };
 
@@ -1522,6 +1539,8 @@ static int input_dev_uevent(struct device *device, struct kobj_uevent_env *env)
 	if (dev->uniq)
 		INPUT_ADD_HOTPLUG_VAR("UNIQ=\"%s\"", dev->uniq);
 
+	INPUT_ADD_HOTPLUG_BM_VAR("PROP=", dev->propbit, INPUT_PROP_MAX);
+
 	INPUT_ADD_HOTPLUG_BM_VAR("EV=", dev->evbit, EV_MAX);
 	if (test_bit(EV_KEY, dev->evbit))
 		INPUT_ADD_HOTPLUG_BM_VAR("KEY=", dev->keybit, KEY_MAX);
diff --git a/drivers/input/misc/uinput.c b/drivers/input/misc/uinput.c
index bea89722c4e9..82542a1c1098 100644
--- a/drivers/input/misc/uinput.c
+++ b/drivers/input/misc/uinput.c
@@ -680,6 +680,10 @@ static long uinput_ioctl_handler(struct file *file, unsigned int cmd,
 			retval = uinput_set_bit(arg, swbit, SW_MAX);
 			break;
 
+		case UI_SET_PROPBIT:
+			retval = uinput_set_bit(arg, propbit, INPUT_PROP_MAX);
+			break;
+
 		case UI_SET_PHYS:
 			if (udev->state == UIST_CREATED) {
 				retval = -EINVAL;
diff --git a/include/linux/input.h b/include/linux/input.h
index b3a1e02080c0..8d9c76cd3c43 100644
--- a/include/linux/input.h
+++ b/include/linux/input.h
@@ -91,6 +91,7 @@ struct input_keymap_entry {
 #define EVIOCGNAME(len)		_IOC(_IOC_READ, 'E', 0x06, len)		/* get device name */
 #define EVIOCGPHYS(len)		_IOC(_IOC_READ, 'E', 0x07, len)		/* get physical location */
 #define EVIOCGUNIQ(len)		_IOC(_IOC_READ, 'E', 0x08, len)		/* get unique identifier */
+#define EVIOCGPROP(len)		_IOC(_IOC_READ, 'E', 0x09, len)		/* get device properties */
 
 #define EVIOCGKEY(len)		_IOC(_IOC_READ, 'E', 0x18, len)		/* get global key state */
 #define EVIOCGLED(len)		_IOC(_IOC_READ, 'E', 0x19, len)		/* get all LEDs */
@@ -107,6 +108,18 @@ struct input_keymap_entry {
 
 #define EVIOCGRAB		_IOW('E', 0x90, int)			/* Grab/Release device */
 
+/*
+ * Device properties and quirks
+ */
+
+#define INPUT_PROP_POINTER		0x00	/* needs a pointer */
+#define INPUT_PROP_DIRECT		0x01	/* direct input devices */
+#define INPUT_PROP_BUTTONPAD		0x02	/* has button(s) under pad */
+#define INPUT_PROP_SEMI_MT		0x03	/* touch rectangle only */
+
+#define INPUT_PROP_MAX			0x1f
+#define INPUT_PROP_CNT			(INPUT_PROP_MAX + 1)
+
 /*
  * Event types
  */
@@ -1090,6 +1103,7 @@ struct ff_effect {
  * @phys: physical path to the device in the system hierarchy
  * @uniq: unique identification code for the device (if device has it)
  * @id: id of the device (struct input_id)
+ * @propbit: bitmap of device properties and quirks
  * @evbit: bitmap of types of events supported by the device (EV_KEY,
  *	EV_REL, etc.)
  * @keybit: bitmap of keys/buttons this device has
@@ -1173,6 +1187,8 @@ struct input_dev {
 	const char *uniq;
 	struct input_id id;
 
+	unsigned long propbit[BITS_TO_LONGS(INPUT_PROP_CNT)];
+
 	unsigned long evbit[BITS_TO_LONGS(EV_CNT)];
 	unsigned long keybit[BITS_TO_LONGS(KEY_CNT)];
 	unsigned long relbit[BITS_TO_LONGS(REL_CNT)];
diff --git a/include/linux/uinput.h b/include/linux/uinput.h
index 05f7fed2b173..d28c726ede4f 100644
--- a/include/linux/uinput.h
+++ b/include/linux/uinput.h
@@ -104,6 +104,7 @@ struct uinput_ff_erase {
 #define UI_SET_FFBIT		_IOW(UINPUT_IOCTL_BASE, 107, int)
 #define UI_SET_PHYS		_IOW(UINPUT_IOCTL_BASE, 108, char*)
 #define UI_SET_SWBIT		_IOW(UINPUT_IOCTL_BASE, 109, int)
+#define UI_SET_PROPBIT		_IOW(UINPUT_IOCTL_BASE, 110, int)
 
 #define UI_BEGIN_FF_UPLOAD	_IOWR(UINPUT_IOCTL_BASE, 200, struct uinput_ff_upload)
 #define UI_END_FF_UPLOAD	_IOW(UINPUT_IOCTL_BASE, 201, struct uinput_ff_upload)
-- 
cgit v1.2.3-71-gd317


From 24bdd9f4c9af75b33b438d60381a67626de0128d Mon Sep 17 00:00:00 2001
From: Javier Cardona <javier@cozybit.com>
Date: Thu, 16 Dec 2010 17:37:48 -0800
Subject: mac80211: Rename mesh_params to mesh_config to prepare for mesh_setup

Mesh parameters can be to setup a mesh or to configure it.
This patch renames the ambiguous name mesh_params to mesh_config
in preparation for mesh_setup.

Signed-off-by: Javier Cardona <javier@cozybit.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/nl80211.h | 15 ++++++++++-----
 include/net/cfg80211.h  |  8 ++++----
 net/mac80211/cfg.c      |  8 ++++----
 net/wireless/nl80211.c  | 40 ++++++++++++++++++++--------------------
 4 files changed, 38 insertions(+), 33 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h
index 7483a89cee8f..11a1de67b618 100644
--- a/include/linux/nl80211.h
+++ b/include/linux/nl80211.h
@@ -172,10 +172,10 @@
  * 	to the specified ISO/IEC 3166-1 alpha2 country code. The core will
  * 	store this as a valid request and then query userspace for it.
  *
- * @NL80211_CMD_GET_MESH_PARAMS: Get mesh networking properties for the
+ * @NL80211_CMD_GET_MESH_CONFIG: Get mesh networking properties for the
  *	interface identified by %NL80211_ATTR_IFINDEX
  *
- * @NL80211_CMD_SET_MESH_PARAMS: Set mesh networking properties for the
+ * @NL80211_CMD_SET_MESH_CONFIG: Set mesh networking properties for the
  *      interface identified by %NL80211_ATTR_IFINDEX
  *
  * @NL80211_CMD_SET_MGMT_EXTRA_IE: Set extra IEs for management frames. The
@@ -448,8 +448,8 @@ enum nl80211_commands {
 	NL80211_CMD_SET_REG,
 	NL80211_CMD_REQ_SET_REG,
 
-	NL80211_CMD_GET_MESH_PARAMS,
-	NL80211_CMD_SET_MESH_PARAMS,
+	NL80211_CMD_GET_MESH_CONFIG,
+	NL80211_CMD_SET_MESH_CONFIG,
 
 	NL80211_CMD_SET_MGMT_EXTRA_IE /* reserved; not used */,
 
@@ -538,6 +538,10 @@ enum nl80211_commands {
 #define NL80211_CMD_DISASSOCIATE NL80211_CMD_DISASSOCIATE
 #define NL80211_CMD_REG_BEACON_HINT NL80211_CMD_REG_BEACON_HINT
 
+/* source-level API compatibility */
+#define NL80211_CMD_GET_MESH_PARAMS NL80211_CMD_GET_MESH_CONFIG
+#define NL80211_CMD_SET_MESH_PARAMS NL80211_CMD_SET_MESH_CONFIG
+
 /**
  * enum nl80211_attrs - nl80211 netlink attributes
  *
@@ -922,7 +926,7 @@ enum nl80211_attrs {
 	NL80211_ATTR_REG_ALPHA2,
 	NL80211_ATTR_REG_RULES,
 
-	NL80211_ATTR_MESH_PARAMS,
+	NL80211_ATTR_MESH_CONFIG,
 
 	NL80211_ATTR_BSS_BASIC_RATES,
 
@@ -1058,6 +1062,7 @@ enum nl80211_attrs {
 
 /* source-level API compatibility */
 #define NL80211_ATTR_SCAN_GENERATION NL80211_ATTR_GENERATION
+#define	NL80211_ATTR_MESH_PARAMS NL80211_ATTR_MESH_CONFIG
 
 /*
  * Allow user space programs to use #ifdef on new attributes by defining them
diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 6dc665a727c2..7283496c2d05 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -1096,9 +1096,9 @@ struct cfg80211_pmksa {
  * @get_mpath: get a mesh path for the given parameters
  * @dump_mpath: dump mesh path callback -- resume dump at index @idx
  *
- * @get_mesh_params: Put the current mesh parameters into *params
+ * @get_mesh_config: Get the current mesh configuration
  *
- * @update_mesh_params: Update mesh parameters on a running mesh.
+ * @update_mesh_config: Update mesh parameters on a running mesh.
  *	The mask is a bitfield which tells us which parameters to
  *	set, and which to leave alone.
  *
@@ -1246,10 +1246,10 @@ struct cfg80211_ops {
 	int	(*dump_mpath)(struct wiphy *wiphy, struct net_device *dev,
 			       int idx, u8 *dst, u8 *next_hop,
 			       struct mpath_info *pinfo);
-	int	(*get_mesh_params)(struct wiphy *wiphy,
+	int	(*get_mesh_config)(struct wiphy *wiphy,
 				struct net_device *dev,
 				struct mesh_config *conf);
-	int	(*update_mesh_params)(struct wiphy *wiphy,
+	int	(*update_mesh_config)(struct wiphy *wiphy,
 				      struct net_device *dev, u32 mask,
 				      const struct mesh_config *nconf);
 	int	(*join_mesh)(struct wiphy *wiphy, struct net_device *dev,
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index ea06f92801e9..1c94a2ae22ee 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -984,7 +984,7 @@ static int ieee80211_dump_mpath(struct wiphy *wiphy, struct net_device *dev,
 	return 0;
 }
 
-static int ieee80211_get_mesh_params(struct wiphy *wiphy,
+static int ieee80211_get_mesh_config(struct wiphy *wiphy,
 				struct net_device *dev,
 				struct mesh_config *conf)
 {
@@ -1000,7 +1000,7 @@ static inline bool _chg_mesh_attr(enum nl80211_meshconf_params parm, u32 mask)
 	return (mask >> (parm-1)) & 0x1;
 }
 
-static int ieee80211_update_mesh_params(struct wiphy *wiphy,
+static int ieee80211_update_mesh_config(struct wiphy *wiphy,
 					struct net_device *dev, u32 mask,
 					const struct mesh_config *nconf)
 {
@@ -1787,8 +1787,8 @@ struct cfg80211_ops mac80211_config_ops = {
 	.change_mpath = ieee80211_change_mpath,
 	.get_mpath = ieee80211_get_mpath,
 	.dump_mpath = ieee80211_dump_mpath,
-	.update_mesh_params = ieee80211_update_mesh_params,
-	.get_mesh_params = ieee80211_get_mesh_params,
+	.update_mesh_config = ieee80211_update_mesh_config,
+	.get_mesh_config = ieee80211_get_mesh_config,
 	.join_mesh = ieee80211_join_mesh,
 	.leave_mesh = ieee80211_leave_mesh,
 #endif
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index aefce54d47e2..10be9350752e 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -123,7 +123,7 @@ static const struct nla_policy nl80211_policy[NL80211_ATTR_MAX+1] = {
 					   .len = NL80211_MAX_SUPP_RATES },
 	[NL80211_ATTR_BSS_HT_OPMODE] = { .type = NLA_U16 },
 
-	[NL80211_ATTR_MESH_PARAMS] = { .type = NLA_NESTED },
+	[NL80211_ATTR_MESH_CONFIG] = { .type = NLA_NESTED },
 
 	[NL80211_ATTR_HT_CAPABILITY] = { .type = NLA_BINARY,
 					 .len = NL80211_HT_CAPABILITY_LEN },
@@ -719,7 +719,7 @@ static int nl80211_send_wiphy(struct sk_buff *msg, u32 pid, u32 seq, int flags,
 	CMD(add_beacon, NEW_BEACON);
 	CMD(add_station, NEW_STATION);
 	CMD(add_mpath, NEW_MPATH);
-	CMD(update_mesh_params, SET_MESH_PARAMS);
+	CMD(update_mesh_config, SET_MESH_CONFIG);
 	CMD(change_bss, SET_BSS);
 	CMD(auth, AUTHENTICATE);
 	CMD(assoc, ASSOCIATE);
@@ -2673,7 +2673,7 @@ static int nl80211_req_set_reg(struct sk_buff *skb, struct genl_info *info)
 	return r;
 }
 
-static int nl80211_get_mesh_params(struct sk_buff *skb,
+static int nl80211_get_mesh_config(struct sk_buff *skb,
 				   struct genl_info *info)
 {
 	struct cfg80211_registered_device *rdev = info->user_ptr[0];
@@ -2688,7 +2688,7 @@ static int nl80211_get_mesh_params(struct sk_buff *skb,
 	if (wdev->iftype != NL80211_IFTYPE_MESH_POINT)
 		return -EOPNOTSUPP;
 
-	if (!rdev->ops->get_mesh_params)
+	if (!rdev->ops->get_mesh_config)
 		return -EOPNOTSUPP;
 
 	wdev_lock(wdev);
@@ -2696,7 +2696,7 @@ static int nl80211_get_mesh_params(struct sk_buff *skb,
 	if (!wdev->mesh_id_len)
 		memcpy(&cur_params, &default_mesh_config, sizeof(cur_params));
 	else
-		err = rdev->ops->get_mesh_params(&rdev->wiphy, dev,
+		err = rdev->ops->get_mesh_config(&rdev->wiphy, dev,
 						 &cur_params);
 	wdev_unlock(wdev);
 
@@ -2708,10 +2708,10 @@ static int nl80211_get_mesh_params(struct sk_buff *skb,
 	if (!msg)
 		return -ENOMEM;
 	hdr = nl80211hdr_put(msg, info->snd_pid, info->snd_seq, 0,
-			     NL80211_CMD_GET_MESH_PARAMS);
+			     NL80211_CMD_GET_MESH_CONFIG);
 	if (!hdr)
 		goto nla_put_failure;
-	pinfoattr = nla_nest_start(msg, NL80211_ATTR_MESH_PARAMS);
+	pinfoattr = nla_nest_start(msg, NL80211_ATTR_MESH_CONFIG);
 	if (!pinfoattr)
 		goto nla_put_failure;
 	NLA_PUT_U32(msg, NL80211_ATTR_IFINDEX, dev->ifindex);
@@ -2773,7 +2773,7 @@ static const struct nla_policy nl80211_meshconf_params_policy[NL80211_MESHCONF_A
 	[NL80211_MESHCONF_HWMP_NET_DIAM_TRVS_TIME] = { .type = NLA_U16 },
 };
 
-static int nl80211_parse_mesh_params(struct genl_info *info,
+static int nl80211_parse_mesh_config(struct genl_info *info,
 				     struct mesh_config *cfg,
 				     u32 *mask_out)
 {
@@ -2789,10 +2789,10 @@ do {\
 } while (0);\
 
 
-	if (!info->attrs[NL80211_ATTR_MESH_PARAMS])
+	if (!info->attrs[NL80211_ATTR_MESH_CONFIG])
 		return -EINVAL;
 	if (nla_parse_nested(tb, NL80211_MESHCONF_ATTR_MAX,
-			     info->attrs[NL80211_ATTR_MESH_PARAMS],
+			     info->attrs[NL80211_ATTR_MESH_CONFIG],
 			     nl80211_meshconf_params_policy))
 		return -EINVAL;
 
@@ -2847,7 +2847,7 @@ do {\
 #undef FILL_IN_MESH_PARAM_IF_SET
 }
 
-static int nl80211_update_mesh_params(struct sk_buff *skb,
+static int nl80211_update_mesh_config(struct sk_buff *skb,
 				      struct genl_info *info)
 {
 	struct cfg80211_registered_device *rdev = info->user_ptr[0];
@@ -2860,10 +2860,10 @@ static int nl80211_update_mesh_params(struct sk_buff *skb,
 	if (wdev->iftype != NL80211_IFTYPE_MESH_POINT)
 		return -EOPNOTSUPP;
 
-	if (!rdev->ops->update_mesh_params)
+	if (!rdev->ops->update_mesh_config)
 		return -EOPNOTSUPP;
 
-	err = nl80211_parse_mesh_params(info, &cfg, &mask);
+	err = nl80211_parse_mesh_config(info, &cfg, &mask);
 	if (err)
 		return err;
 
@@ -2872,7 +2872,7 @@ static int nl80211_update_mesh_params(struct sk_buff *skb,
 		err = -ENOLINK;
 
 	if (!err)
-		err = rdev->ops->update_mesh_params(&rdev->wiphy, dev,
+		err = rdev->ops->update_mesh_config(&rdev->wiphy, dev,
 						    mask, &cfg);
 
 	wdev_unlock(wdev);
@@ -4672,9 +4672,9 @@ static int nl80211_join_mesh(struct sk_buff *skb, struct genl_info *info)
 	/* start with default */
 	memcpy(&cfg, &default_mesh_config, sizeof(cfg));
 
-	if (info->attrs[NL80211_ATTR_MESH_PARAMS]) {
+	if (info->attrs[NL80211_ATTR_MESH_CONFIG]) {
 		/* and parse parameters if given */
-		err = nl80211_parse_mesh_params(info, &cfg, NULL);
+		err = nl80211_parse_mesh_config(info, &cfg, NULL);
 		if (err)
 			return err;
 	}
@@ -4952,16 +4952,16 @@ static struct genl_ops nl80211_ops[] = {
 		.flags = GENL_ADMIN_PERM,
 	},
 	{
-		.cmd = NL80211_CMD_GET_MESH_PARAMS,
-		.doit = nl80211_get_mesh_params,
+		.cmd = NL80211_CMD_GET_MESH_CONFIG,
+		.doit = nl80211_get_mesh_config,
 		.policy = nl80211_policy,
 		/* can be retrieved by unprivileged users */
 		.internal_flags = NL80211_FLAG_NEED_NETDEV |
 				  NL80211_FLAG_NEED_RTNL,
 	},
 	{
-		.cmd = NL80211_CMD_SET_MESH_PARAMS,
-		.doit = nl80211_update_mesh_params,
+		.cmd = NL80211_CMD_SET_MESH_CONFIG,
+		.doit = nl80211_update_mesh_config,
 		.policy = nl80211_policy,
 		.flags = GENL_ADMIN_PERM,
 		.internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
-- 
cgit v1.2.3-71-gd317


From c80d545da3f7c0e534ccd4a780f322f80a92cff1 Mon Sep 17 00:00:00 2001
From: Javier Cardona <javier@cozybit.com>
Date: Thu, 16 Dec 2010 17:37:49 -0800
Subject: mac80211: Let userspace enable and configure vendor specific path
 selection.

Userspace will now be allowed to toggle between the default path
selection algorithm (HWMP, implemented in the kernel), and a vendor
specific alternative.  Also in the same patch, allow userspace to add
information elements to mesh beacons.  This is accordance with the
Extensible Path Selection Framework specified in version 7.0 of the
802.11s draft.

Signed-off-by: Javier Cardona <javier@cozybit.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/ieee80211.h  | 25 ++++++++++++++++++
 include/linux/nl80211.h    | 47 +++++++++++++++++++++++++++++++---
 include/net/cfg80211.h     |  8 ++++++
 net/mac80211/cfg.c         | 39 +++++++++++++++++++++++++---
 net/mac80211/ieee80211_i.h |  4 +--
 net/mac80211/mesh.c        |  7 ++++++
 net/mac80211/mesh_plink.c  |  3 ++-
 net/mac80211/tx.c          |  3 ++-
 net/wireless/core.c        | 22 +++++++++++-----
 net/wireless/core.h        |  5 ++--
 net/wireless/mesh.c        | 24 ++++++++++--------
 net/wireless/nl80211.c     | 63 ++++++++++++++++++++++++++++++++++++++++++----
 12 files changed, 214 insertions(+), 36 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h
index 7f2354534242..cd681681d211 100644
--- a/include/linux/ieee80211.h
+++ b/include/linux/ieee80211.h
@@ -1291,6 +1291,31 @@ enum ieee80211_key_len {
 	WLAN_KEY_LEN_AES_CMAC = 16,
 };
 
+/**
+ * enum - mesh path selection protocol identifier
+ *
+ * @IEEE80211_PATH_PROTOCOL_HWMP: the default path selection protocol
+ * @IEEE80211_PATH_PROTOCOL_VENDOR: a vendor specific protocol that will
+ * be specified in a vendor specific information element
+ */
+enum {
+	IEEE80211_PATH_PROTOCOL_HWMP = 0,
+	IEEE80211_PATH_PROTOCOL_VENDOR = 255,
+};
+
+/**
+ * enum - mesh path selection metric identifier
+ *
+ * @IEEE80211_PATH_METRIC_AIRTIME: the default path selection metric
+ * @IEEE80211_PATH_METRIC_VENDOR: a vendor specific metric that will be
+ * specified in a vendor specific information element
+ */
+enum {
+	IEEE80211_PATH_METRIC_AIRTIME = 0,
+	IEEE80211_PATH_METRIC_VENDOR = 255,
+};
+
+
 /*
  * IEEE 802.11-2007 7.3.2.9 Country information element
  *
diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h
index 11a1de67b618..69eaccac78c4 100644
--- a/include/linux/nl80211.h
+++ b/include/linux/nl80211.h
@@ -872,6 +872,9 @@ enum nl80211_commands {
  *	attributes, specifying what a key should be set as default as.
  *	See &enum nl80211_key_default_types.
  *
+ * @NL80211_ATTR_MESH_SETUP: Optional mesh setup parameters.  These cannot be
+ * changed once the mesh is active.
+ *
  * @NL80211_ATTR_MAX: highest attribute number currently defined
  * @__NL80211_ATTR_AFTER_LAST: internal use
  */
@@ -1054,6 +1057,8 @@ enum nl80211_attrs {
 
 	NL80211_ATTR_MAX_REMAIN_ON_CHANNEL_DURATION,
 
+	NL80211_ATTR_MESH_SETUP,
+
 	/* add attributes here, update the policy in nl80211.c */
 
 	__NL80211_ATTR_AFTER_LAST,
@@ -1564,7 +1569,8 @@ enum nl80211_mntr_flags {
 /**
  * enum nl80211_meshconf_params - mesh configuration parameters
  *
- * Mesh configuration parameters
+ * Mesh configuration parameters. These can be changed while the mesh is
+ * active.
  *
  * @__NL80211_MESHCONF_INVALID: internal use
  *
@@ -1587,9 +1593,6 @@ enum nl80211_mntr_flags {
  * @NL80211_MESHCONF_TTL: specifies the value of TTL field set at a source mesh
  * point.
  *
- * @NL80211_MESHCONF_ELEMENT_TTL: specifies the value of TTL field set at a
- * source mesh point for path selection elements.
- *
  * @NL80211_MESHCONF_AUTO_OPEN_PLINKS: whether we should automatically
  * open peer links when we detect compatible mesh peers.
  *
@@ -1616,6 +1619,9 @@ enum nl80211_mntr_flags {
  *
  * @NL80211_MESHCONF_ROOTMODE: whether root mode is enabled or not
  *
+ * @NL80211_MESHCONF_ELEMENT_TTL: specifies the value of TTL field set at a
+ * source mesh point for path selection elements.
+ *
  * @NL80211_MESHCONF_ATTR_MAX: highest possible mesh configuration attribute
  *
  * @__NL80211_MESHCONF_ATTR_AFTER_LAST: internal use
@@ -1643,6 +1649,39 @@ enum nl80211_meshconf_params {
 	NL80211_MESHCONF_ATTR_MAX = __NL80211_MESHCONF_ATTR_AFTER_LAST - 1
 };
 
+/**
+ * enum nl80211_mesh_setup_params - mesh setup parameters
+ *
+ * Mesh setup parameters.  These are used to start/join a mesh and cannot be
+ * changed while the mesh is active.
+ *
+ * @__NL80211_MESH_SETUP_INVALID: Internal use
+ *
+ * @NL80211_MESH_SETUP_ENABLE_VENDOR_PATH_SEL: Enable this option to use a
+ * vendor specific path selection algorithm or disable it to use the default
+ * HWMP.
+ *
+ * @NL80211_MESH_SETUP_ENABLE_VENDOR_METRIC: Enable this option to use a
+ * vendor specific path metric or disable it to use the default Airtime
+ * metric.
+ *
+ * @NL80211_MESH_SETUP_VENDOR_PATH_SEL_IE: A vendor specific information
+ * element that vendors will use to identify the path selection methods and
+ * metrics in use.
+ *
+ * @__NL80211_MESH_SETUP_ATTR_AFTER_LAST: Internal use
+ */
+enum nl80211_mesh_setup_params {
+	__NL80211_MESH_SETUP_INVALID,
+	NL80211_MESH_SETUP_ENABLE_VENDOR_PATH_SEL,
+	NL80211_MESH_SETUP_ENABLE_VENDOR_METRIC,
+	NL80211_MESH_SETUP_VENDOR_PATH_SEL_IE,
+
+	/* keep last */
+	__NL80211_MESH_SETUP_ATTR_AFTER_LAST,
+	NL80211_MESH_SETUP_ATTR_MAX = __NL80211_MESH_SETUP_ATTR_AFTER_LAST - 1
+};
+
 /**
  * enum nl80211_txq_attr - TX queue parameter attributes
  * @__NL80211_TXQ_ATTR_INVALID: Attribute number 0 is reserved
diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 7283496c2d05..924d60366233 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -649,12 +649,20 @@ struct mesh_config {
  * struct mesh_setup - 802.11s mesh setup configuration
  * @mesh_id: the mesh ID
  * @mesh_id_len: length of the mesh ID, at least 1 and at most 32 bytes
+ * @path_sel_proto: which path selection protocol to use
+ * @path_metric: which metric to use
+ * @vendor_ie: vendor information elements (optional)
+ * @vendor_ie_len: length of vendor information elements
  *
  * These parameters are fixed when the mesh is created.
  */
 struct mesh_setup {
 	const u8 *mesh_id;
 	u8 mesh_id_len;
+	u8  path_sel_proto;
+	u8  path_metric;
+	const u8 *vendor_ie;
+	u8 vendor_ie_len;
 };
 
 /**
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index 1c94a2ae22ee..ae2c7127a8aa 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -1000,6 +1000,36 @@ static inline bool _chg_mesh_attr(enum nl80211_meshconf_params parm, u32 mask)
 	return (mask >> (parm-1)) & 0x1;
 }
 
+static int copy_mesh_setup(struct ieee80211_if_mesh *ifmsh,
+		const struct mesh_setup *setup)
+{
+	u8 *new_ie;
+	const u8 *old_ie;
+
+	/* first allocate the new vendor information element */
+	new_ie = NULL;
+	old_ie = ifmsh->vendor_ie;
+
+	ifmsh->vendor_ie_len = setup->vendor_ie_len;
+	if (setup->vendor_ie_len) {
+		new_ie = kmemdup(setup->vendor_ie, setup->vendor_ie_len,
+				GFP_KERNEL);
+		if (!new_ie)
+			return -ENOMEM;
+	}
+
+	/* now copy the rest of the setup parameters */
+	ifmsh->mesh_id_len = setup->mesh_id_len;
+	memcpy(ifmsh->mesh_id, setup->mesh_id, ifmsh->mesh_id_len);
+	ifmsh->mesh_pp_id = setup->path_sel_proto;
+	ifmsh->mesh_pm_id = setup->path_metric;
+	ifmsh->vendor_ie = new_ie;
+
+	kfree(old_ie);
+
+	return 0;
+}
+
 static int ieee80211_update_mesh_config(struct wiphy *wiphy,
 					struct net_device *dev, u32 mask,
 					const struct mesh_config *nconf)
@@ -1059,11 +1089,12 @@ static int ieee80211_join_mesh(struct wiphy *wiphy, struct net_device *dev,
 {
 	struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
 	struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh;
+	int err;
 
-	memcpy(&sdata->u.mesh.mshcfg, conf, sizeof(struct mesh_config));
-	ifmsh->mesh_id_len = setup->mesh_id_len;
-	memcpy(ifmsh->mesh_id, setup->mesh_id, ifmsh->mesh_id_len);
-
+	memcpy(&ifmsh->mshcfg, conf, sizeof(struct mesh_config));
+	err = copy_mesh_setup(ifmsh, setup);
+	if (err)
+		return err;
 	ieee80211_start_mesh(sdata);
 
 	return 0;
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index ce58b2a676e2..eadaa243a3da 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -484,6 +484,8 @@ struct ieee80211_if_mesh {
 	struct mesh_config mshcfg;
 	u32 mesh_seqnum;
 	bool accepting_plinks;
+	const u8 *vendor_ie;
+	u8 vendor_ie_len;
 };
 
 #ifdef CONFIG_MAC80211_MESH
@@ -585,9 +587,7 @@ struct ieee80211_sub_if_data {
 		struct ieee80211_if_vlan vlan;
 		struct ieee80211_if_managed mgd;
 		struct ieee80211_if_ibss ibss;
-#ifdef CONFIG_MAC80211_MESH
 		struct ieee80211_if_mesh mesh;
-#endif
 		u32 mntr_flags;
 	} u;
 
diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c
index 63e1188d5062..c326e009389d 100644
--- a/net/mac80211/mesh.c
+++ b/net/mac80211/mesh.c
@@ -287,6 +287,13 @@ void mesh_mgmt_ies_add(struct sk_buff *skb, struct ieee80211_sub_if_data *sdata)
 	*pos++ |= sdata->u.mesh.accepting_plinks ?
 	    MESHCONF_CAPAB_ACCEPT_PLINKS : 0x00;
 	*pos++ = 0x00;
+
+	if (sdata->u.mesh.vendor_ie) {
+		int len = sdata->u.mesh.vendor_ie_len;
+		const u8 *data = sdata->u.mesh.vendor_ie;
+		if (skb_tailroom(skb) > len)
+			memcpy(skb_put(skb, len), data, len);
+	}
 }
 
 u32 mesh_table_hash(u8 *addr, struct ieee80211_sub_if_data *sdata, struct mesh_table *tbl)
diff --git a/net/mac80211/mesh_plink.c b/net/mac80211/mesh_plink.c
index 1c91f0f3c307..44b53931ba5e 100644
--- a/net/mac80211/mesh_plink.c
+++ b/net/mac80211/mesh_plink.c
@@ -160,7 +160,8 @@ static int mesh_plink_frame_tx(struct ieee80211_sub_if_data *sdata,
 		enum plink_frame_type action, u8 *da, __le16 llid, __le16 plid,
 		__le16 reason) {
 	struct ieee80211_local *local = sdata->local;
-	struct sk_buff *skb = dev_alloc_skb(local->hw.extra_tx_headroom + 400);
+	struct sk_buff *skb = dev_alloc_skb(local->hw.extra_tx_headroom + 400 +
+			sdata->u.mesh.vendor_ie_len);
 	struct ieee80211_mgmt *mgmt;
 	bool include_plid = false;
 	static const u8 meshpeeringproto[] = { 0x00, 0x0F, 0xAC, 0x2A };
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index 157bde993ef5..f4b1b624ea9f 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -2290,7 +2290,8 @@ struct sk_buff *ieee80211_beacon_get_tim(struct ieee80211_hw *hw,
 		u8 *pos;
 
 		/* headroom, head length, tail length and maximum TIM length */
-		skb = dev_alloc_skb(local->tx_headroom + 400);
+		skb = dev_alloc_skb(local->tx_headroom + 400 +
+				sdata->u.mesh.vendor_ie_len);
 		if (!skb)
 			goto out;
 
diff --git a/net/wireless/core.c b/net/wireless/core.c
index 79772fcc37bc..e9a5f8ca4c27 100644
--- a/net/wireless/core.c
+++ b/net/wireless/core.c
@@ -789,13 +789,23 @@ static int cfg80211_netdev_notifier_call(struct notifier_block * nb,
 			cfg80211_mgd_wext_connect(rdev, wdev);
 			break;
 #endif
+#ifdef CONFIG_MAC80211_MESH
 		case NL80211_IFTYPE_MESH_POINT:
-			/* backward compat code ... */
-			if (wdev->mesh_id_up_len)
-				__cfg80211_join_mesh(rdev, dev, wdev->ssid,
-						     wdev->mesh_id_up_len,
-						     &default_mesh_config);
-			break;
+			{
+				/* backward compat code... */
+				struct mesh_setup setup;
+				memcpy(&setup, &default_mesh_setup,
+						sizeof(setup));
+				 /* back compat only needed for mesh_id */
+				setup.mesh_id = wdev->ssid;
+				setup.mesh_id_len = wdev->mesh_id_up_len;
+				if (wdev->mesh_id_up_len)
+					__cfg80211_join_mesh(rdev, dev,
+							&setup,
+							&default_mesh_config);
+				break;
+			}
+#endif
 		default:
 			break;
 		}
diff --git a/net/wireless/core.h b/net/wireless/core.h
index 743203bb61ac..26a0a084e16b 100644
--- a/net/wireless/core.h
+++ b/net/wireless/core.h
@@ -287,13 +287,14 @@ int cfg80211_ibss_wext_join(struct cfg80211_registered_device *rdev,
 
 /* mesh */
 extern const struct mesh_config default_mesh_config;
+extern const struct mesh_setup default_mesh_setup;
 int __cfg80211_join_mesh(struct cfg80211_registered_device *rdev,
 			 struct net_device *dev,
-			 const u8 *mesh_id, u8 mesh_id_len,
+			 const struct mesh_setup *setup,
 			 const struct mesh_config *conf);
 int cfg80211_join_mesh(struct cfg80211_registered_device *rdev,
 		       struct net_device *dev,
-		       const u8 *mesh_id, u8 mesh_id_len,
+		       const struct mesh_setup *setup,
 		       const struct mesh_config *conf);
 int cfg80211_leave_mesh(struct cfg80211_registered_device *rdev,
 			struct net_device *dev);
diff --git a/net/wireless/mesh.c b/net/wireless/mesh.c
index e0b9747fe50a..73e39c171ffb 100644
--- a/net/wireless/mesh.c
+++ b/net/wireless/mesh.c
@@ -50,17 +50,19 @@ const struct mesh_config default_mesh_config = {
 	.min_discovery_timeout = MESH_MIN_DISCOVERY_TIMEOUT,
 };
 
+const struct mesh_setup default_mesh_setup = {
+	.path_sel_proto = IEEE80211_PATH_PROTOCOL_HWMP,
+	.path_metric = IEEE80211_PATH_METRIC_AIRTIME,
+	.vendor_ie = NULL,
+	.vendor_ie_len = 0,
+};
 
 int __cfg80211_join_mesh(struct cfg80211_registered_device *rdev,
 			 struct net_device *dev,
-			 const u8 *mesh_id, u8 mesh_id_len,
+			 const struct mesh_setup *setup,
 			 const struct mesh_config *conf)
 {
 	struct wireless_dev *wdev = dev->ieee80211_ptr;
-	struct mesh_setup setup = {
-		.mesh_id = mesh_id,
-		.mesh_id_len = mesh_id_len,
-	};
 	int err;
 
 	BUILD_BUG_ON(IEEE80211_MAX_SSID_LEN != IEEE80211_MAX_MESH_ID_LEN);
@@ -73,16 +75,16 @@ int __cfg80211_join_mesh(struct cfg80211_registered_device *rdev,
 	if (wdev->mesh_id_len)
 		return -EALREADY;
 
-	if (!mesh_id_len)
+	if (!setup->mesh_id_len)
 		return -EINVAL;
 
 	if (!rdev->ops->join_mesh)
 		return -EOPNOTSUPP;
 
-	err = rdev->ops->join_mesh(&rdev->wiphy, dev, conf, &setup);
+	err = rdev->ops->join_mesh(&rdev->wiphy, dev, conf, setup);
 	if (!err) {
-		memcpy(wdev->ssid, mesh_id, mesh_id_len);
-		wdev->mesh_id_len = mesh_id_len;
+		memcpy(wdev->ssid, setup->mesh_id, setup->mesh_id_len);
+		wdev->mesh_id_len = setup->mesh_id_len;
 	}
 
 	return err;
@@ -90,14 +92,14 @@ int __cfg80211_join_mesh(struct cfg80211_registered_device *rdev,
 
 int cfg80211_join_mesh(struct cfg80211_registered_device *rdev,
 		       struct net_device *dev,
-		       const u8 *mesh_id, u8 mesh_id_len,
+		       const struct mesh_setup *setup,
 		       const struct mesh_config *conf)
 {
 	struct wireless_dev *wdev = dev->ieee80211_ptr;
 	int err;
 
 	wdev_lock(wdev);
-	err = __cfg80211_join_mesh(rdev, dev, mesh_id, mesh_id_len, conf);
+	err = __cfg80211_join_mesh(rdev, dev, setup, conf);
 	wdev_unlock(wdev);
 
 	return err;
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 10be9350752e..eef89d0b558b 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -2773,6 +2773,14 @@ static const struct nla_policy nl80211_meshconf_params_policy[NL80211_MESHCONF_A
 	[NL80211_MESHCONF_HWMP_NET_DIAM_TRVS_TIME] = { .type = NLA_U16 },
 };
 
+static const struct nla_policy
+	nl80211_mesh_setup_params_policy[NL80211_MESH_SETUP_ATTR_MAX+1] = {
+	[NL80211_MESH_SETUP_ENABLE_VENDOR_PATH_SEL] = { .type = NLA_U8 },
+	[NL80211_MESH_SETUP_ENABLE_VENDOR_METRIC] = { .type = NLA_U8 },
+	[NL80211_MESH_SETUP_VENDOR_PATH_SEL_IE] = { .type = NLA_BINARY,
+		.len = IEEE80211_MAX_DATA_LEN },
+};
+
 static int nl80211_parse_mesh_config(struct genl_info *info,
 				     struct mesh_config *cfg,
 				     u32 *mask_out)
@@ -2839,14 +2847,50 @@ do {\
 			dot11MeshHWMPRootMode, mask,
 			NL80211_MESHCONF_HWMP_ROOTMODE,
 			nla_get_u8);
-
 	if (mask_out)
 		*mask_out = mask;
+
 	return 0;
 
 #undef FILL_IN_MESH_PARAM_IF_SET
 }
 
+static int nl80211_parse_mesh_setup(struct genl_info *info,
+				     struct mesh_setup *setup)
+{
+	struct nlattr *tb[NL80211_MESH_SETUP_ATTR_MAX + 1];
+
+	if (!info->attrs[NL80211_ATTR_MESH_SETUP])
+		return -EINVAL;
+	if (nla_parse_nested(tb, NL80211_MESH_SETUP_ATTR_MAX,
+			     info->attrs[NL80211_ATTR_MESH_SETUP],
+			     nl80211_mesh_setup_params_policy))
+		return -EINVAL;
+
+	if (tb[NL80211_MESH_SETUP_ENABLE_VENDOR_PATH_SEL])
+		setup->path_sel_proto =
+		(nla_get_u8(tb[NL80211_MESH_SETUP_ENABLE_VENDOR_PATH_SEL])) ?
+		 IEEE80211_PATH_PROTOCOL_VENDOR :
+		 IEEE80211_PATH_PROTOCOL_HWMP;
+
+	if (tb[NL80211_MESH_SETUP_ENABLE_VENDOR_METRIC])
+		setup->path_metric =
+		(nla_get_u8(tb[NL80211_MESH_SETUP_ENABLE_VENDOR_METRIC])) ?
+		 IEEE80211_PATH_METRIC_VENDOR :
+		 IEEE80211_PATH_METRIC_AIRTIME;
+
+	if (tb[NL80211_MESH_SETUP_VENDOR_PATH_SEL_IE]) {
+		struct nlattr *ieattr =
+			tb[NL80211_MESH_SETUP_VENDOR_PATH_SEL_IE];
+		if (!is_valid_ie_attr(ieattr))
+			return -EINVAL;
+		setup->vendor_ie = nla_data(ieattr);
+		setup->vendor_ie_len = nla_len(ieattr);
+	}
+
+	return 0;
+}
+
 static int nl80211_update_mesh_config(struct sk_buff *skb,
 				      struct genl_info *info)
 {
@@ -4667,10 +4711,12 @@ static int nl80211_join_mesh(struct sk_buff *skb, struct genl_info *info)
 	struct cfg80211_registered_device *rdev = info->user_ptr[0];
 	struct net_device *dev = info->user_ptr[1];
 	struct mesh_config cfg;
+	struct mesh_setup setup;
 	int err;
 
 	/* start with default */
 	memcpy(&cfg, &default_mesh_config, sizeof(cfg));
+	memcpy(&setup, &default_mesh_setup, sizeof(setup));
 
 	if (info->attrs[NL80211_ATTR_MESH_CONFIG]) {
 		/* and parse parameters if given */
@@ -4683,10 +4729,17 @@ static int nl80211_join_mesh(struct sk_buff *skb, struct genl_info *info)
 	    !nla_len(info->attrs[NL80211_ATTR_MESH_ID]))
 		return -EINVAL;
 
-	return cfg80211_join_mesh(rdev, dev,
-				  nla_data(info->attrs[NL80211_ATTR_MESH_ID]),
-				  nla_len(info->attrs[NL80211_ATTR_MESH_ID]),
-				  &cfg);
+	setup.mesh_id = nla_data(info->attrs[NL80211_ATTR_MESH_ID]);
+	setup.mesh_id_len = nla_len(info->attrs[NL80211_ATTR_MESH_ID]);
+
+	if (info->attrs[NL80211_ATTR_MESH_SETUP]) {
+		/* parse additional setup parameters if given */
+		err = nl80211_parse_mesh_setup(info, &setup);
+		if (err)
+			return err;
+	}
+
+	return cfg80211_join_mesh(rdev, dev, &setup, &cfg);
 }
 
 static int nl80211_leave_mesh(struct sk_buff *skb, struct genl_info *info)
-- 
cgit v1.2.3-71-gd317


From 39fd5de4472b7b222c6cec78d72b069133f694e4 Mon Sep 17 00:00:00 2001
From: Bruno Randolf <br1@einfach.org>
Date: Thu, 16 Dec 2010 11:30:28 +0900
Subject: nl80211: Export available antennas

Export the information which antennas are available for configuration as TX or
RX antennas via nl80211.

Signed-off-by: Bruno Randolf <br1@einfach.org>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/nl80211.h | 9 +++++++++
 net/wireless/nl80211.c  | 5 +++++
 2 files changed, 14 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h
index 69eaccac78c4..2b89b712565b 100644
--- a/include/linux/nl80211.h
+++ b/include/linux/nl80211.h
@@ -858,6 +858,12 @@ enum nl80211_commands {
  *	the hardware should not be configured to receive on this antenna.
  *	For a more detailed descripton see @NL80211_ATTR_WIPHY_ANTENNA_TX.
  *
+ * @NL80211_ATTR_WIPHY_ANTENNA_AVAIL_TX: Bitmap of antennas which are available
+ *	for configuration as TX antennas via the above parameters.
+ *
+ * @NL80211_ATTR_WIPHY_ANTENNA_AVAIL_RX: Bitmap of antennas which are available
+ *	for configuration as RX antennas via the above parameters.
+ *
  * @NL80211_ATTR_MCAST_RATE: Multicast tx rate (in 100 kbps) for IBSS
  *
  * @NL80211_ATTR_OFFCHANNEL_TX_OK: For management frame TX, the frame may be
@@ -1059,6 +1065,9 @@ enum nl80211_attrs {
 
 	NL80211_ATTR_MESH_SETUP,
 
+	NL80211_ATTR_WIPHY_ANTENNA_AVAIL_TX,
+	NL80211_ATTR_WIPHY_ANTENNA_AVAIL_RX,
+
 	/* add attributes here, update the policy in nl80211.c */
 
 	__NL80211_ATTR_AFTER_LAST,
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 8d2f5f8d8080..9b62710891a2 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -605,6 +605,11 @@ static int nl80211_send_wiphy(struct sk_buff *msg, u32 pid, u32 seq, int flags,
 	if (dev->wiphy.flags & WIPHY_FLAG_CONTROL_PORT_PROTOCOL)
 		NLA_PUT_FLAG(msg, NL80211_ATTR_CONTROL_PORT_ETHERTYPE);
 
+	NLA_PUT_U32(msg, NL80211_ATTR_WIPHY_ANTENNA_AVAIL_TX,
+		    dev->wiphy.available_antennas_tx);
+	NLA_PUT_U32(msg, NL80211_ATTR_WIPHY_ANTENNA_AVAIL_RX,
+		    dev->wiphy.available_antennas_rx);
+
 	if ((dev->wiphy.available_antennas_tx ||
 	     dev->wiphy.available_antennas_rx) && dev->ops->get_antenna) {
 		u32 tx_ant = 0, rx_ant = 0;
-- 
cgit v1.2.3-71-gd317


From 61ad5394590c5c5338ab4ec50553d809a9996d50 Mon Sep 17 00:00:00 2001
From: Javier Cardona <javier@cozybit.com>
Date: Thu, 16 Dec 2010 17:23:34 -0800
Subject: mac80211: Remove unused third address from mesh address extension
 header.

The Mesh Control header only includes 0, 1 or 2 addresses. If there is
one address, it should be interpreted as Address 4.  If there are 2,
they are interpreted as Addresses 5 and 6 (Address 4 being the 4th
address in the 802.11 header).

The address extension used to hold up to 3 addresses instead of the current 2.
I'm not sure which draft version changed this, but it is very unlikely that it
will change again given the state of the approval process of this draft.  See
section 7.1.3.6.3 in current draft (8.0).

Also, note that the extra address that I'm removing was not being used, so this
change has no effect on over-the-air frame formats.  But I thought I better
remove it before someone does start using it.

Signed-off-by: Javier Cardona <javier@cozybit.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/ieee80211.h |  1 -
 net/mac80211/mesh.c       | 32 +++++++++++++-------------------
 net/mac80211/mesh.h       |  4 ++--
 net/mac80211/tx.c         |  4 +---
 4 files changed, 16 insertions(+), 25 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h
index cd681681d211..6042228954a7 100644
--- a/include/linux/ieee80211.h
+++ b/include/linux/ieee80211.h
@@ -536,7 +536,6 @@ struct ieee80211s_hdr {
 	__le32 seqnum;
 	u8 eaddr1[6];
 	u8 eaddr2[6];
-	u8 eaddr3[6];
 } __attribute__ ((packed));
 
 /* Mesh flags */
diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c
index 8b5906c83f93..ca3af4685b0a 100644
--- a/net/mac80211/mesh.c
+++ b/net/mac80211/mesh.c
@@ -410,39 +410,33 @@ int ieee80211_fill_mesh_addresses(struct ieee80211_hdr *hdr, __le16 *fc,
  * ieee80211_new_mesh_header - create a new mesh header
  * @meshhdr:    uninitialized mesh header
  * @sdata:	mesh interface to be used
- * @addr4:	addr4 of the mesh frame (1st in ae header)
- *              may be NULL
- * @addr5:	addr5 of the mesh frame (1st or 2nd in ae header)
- *              may be NULL unless addr6 is present
- * @addr6:	addr6 of the mesh frame (2nd or 3rd in ae header)
- * 		may be NULL unless addr5 is present
+ * @addr4or5:   1st address in the ae header, which may correspond to address 4
+ *              (if addr6 is NULL) or address 5 (if addr6 is present). It may
+ *              be NULL.
+ * @addr6:	2nd address in the ae header, which corresponds to addr6 of the
+ *              mesh frame
  *
  * Return the header length.
  */
 int ieee80211_new_mesh_header(struct ieee80211s_hdr *meshhdr,
-		struct ieee80211_sub_if_data *sdata, char *addr4,
-		char *addr5, char *addr6)
+		struct ieee80211_sub_if_data *sdata, char *addr4or5,
+		char *addr6)
 {
 	int aelen = 0;
+	BUG_ON(!addr4or5 && addr6);
 	memset(meshhdr, 0, sizeof(*meshhdr));
 	meshhdr->ttl = sdata->u.mesh.mshcfg.dot11MeshTTL;
 	put_unaligned(cpu_to_le32(sdata->u.mesh.mesh_seqnum), &meshhdr->seqnum);
 	sdata->u.mesh.mesh_seqnum++;
-	if (addr4) {
+	if (addr4or5 && !addr6) {
 		meshhdr->flags |= MESH_FLAGS_AE_A4;
 		aelen += ETH_ALEN;
-		memcpy(meshhdr->eaddr1, addr4, ETH_ALEN);
-	}
-	if (addr5 && addr6) {
+		memcpy(meshhdr->eaddr1, addr4or5, ETH_ALEN);
+	} else if (addr4or5 && addr6) {
 		meshhdr->flags |= MESH_FLAGS_AE_A5_A6;
 		aelen += 2 * ETH_ALEN;
-		if (!addr4) {
-			memcpy(meshhdr->eaddr1, addr5, ETH_ALEN);
-			memcpy(meshhdr->eaddr2, addr6, ETH_ALEN);
-		} else {
-			memcpy(meshhdr->eaddr2, addr5, ETH_ALEN);
-			memcpy(meshhdr->eaddr3, addr6, ETH_ALEN);
-		}
+		memcpy(meshhdr->eaddr1, addr4or5, ETH_ALEN);
+		memcpy(meshhdr->eaddr2, addr6, ETH_ALEN);
 	}
 	return 6 + aelen;
 }
diff --git a/net/mac80211/mesh.h b/net/mac80211/mesh.h
index 890dd19c2eca..b99e230fe31c 100644
--- a/net/mac80211/mesh.h
+++ b/net/mac80211/mesh.h
@@ -187,8 +187,8 @@ struct mesh_rmc {
 int ieee80211_fill_mesh_addresses(struct ieee80211_hdr *hdr, __le16 *fc,
 				  const u8 *da, const u8 *sa);
 int ieee80211_new_mesh_header(struct ieee80211s_hdr *meshhdr,
-		struct ieee80211_sub_if_data *sdata, char *addr4,
-		char *addr5, char *addr6);
+		struct ieee80211_sub_if_data *sdata, char *addr4or5,
+		char *addr6);
 int mesh_rmc_check(u8 *addr, struct ieee80211s_hdr *mesh_hdr,
 		struct ieee80211_sub_if_data *sdata);
 bool mesh_matches_local(struct ieee802_11_elems *ie,
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index f4b1b624ea9f..807dcd06e268 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -1811,7 +1811,7 @@ netdev_tx_t ieee80211_subif_start_xmit(struct sk_buff *skb,
 			hdrlen = ieee80211_fill_mesh_addresses(&hdr, &fc,
 					skb->data, skb->data + ETH_ALEN);
 			meshhdrlen = ieee80211_new_mesh_header(&mesh_hdr,
-					sdata, NULL, NULL, NULL);
+					sdata, NULL, NULL);
 		} else {
 			/* packet from other interface */
 			struct mesh_path *mppath;
@@ -1844,13 +1844,11 @@ netdev_tx_t ieee80211_subif_start_xmit(struct sk_buff *skb,
 					ieee80211_new_mesh_header(&mesh_hdr,
 							sdata,
 							skb->data + ETH_ALEN,
-							NULL,
 							NULL);
 			else
 				meshhdrlen =
 					ieee80211_new_mesh_header(&mesh_hdr,
 							sdata,
-							NULL,
 							skb->data,
 							skb->data + ETH_ALEN);
 
-- 
cgit v1.2.3-71-gd317


From da1f026b532ce944d74461497dc6d8c16456466e Mon Sep 17 00:00:00 2001
From: Janusz Krzysztofik <jkrzyszt@tis.icnet.pl>
Date: Mon, 20 Dec 2010 21:09:22 +0000
Subject: Keyboard: omap-keypad: use matrix_keypad.h

Most keypad drivers make use of the <linux/input/matrix_keypad.h>
defined macros, structures and inline functions.

Convert omap-keypad driver to use those as well, as suggested by a
compile time warning, hardcoded into the OMAP <palt/keypad.h>.

Created against linux-2.6.37-rc5.
Tested on Amstrad Delta.
Compile tested with omap1_defconfig and omap2plus_defconfig shrinked to
board-h4.

Signed-off-by: Janusz Krzysztofik <jkrzyszt@tis.icnet.pl>
Reviewed-by: Aaro Koskinen <aaro.koskinen@nokia.com>
Acked-by: Dmitry Torokhov <dtor@mail.ru>
Signed-off-by: Tony Lindgren <tony@atomide.com>
---
 arch/arm/mach-omap1/board-ams-delta.c    | 128 ++++++++++++++++---------------
 arch/arm/mach-omap1/board-fsample.c      |  67 ++++++++--------
 arch/arm/mach-omap1/board-h2.c           |  75 +++++++++---------
 arch/arm/mach-omap1/board-h3.c           |  75 +++++++++---------
 arch/arm/mach-omap1/board-htcherald.c    |  98 +++++++++++------------
 arch/arm/mach-omap1/board-innovator.c    |  21 ++---
 arch/arm/mach-omap1/board-nokia770.c     |  29 +++----
 arch/arm/mach-omap1/board-osk.c          |  21 ++---
 arch/arm/mach-omap1/board-palmte.c       |  28 ++++---
 arch/arm/mach-omap1/board-palmtt.c       |  28 ++++---
 arch/arm/mach-omap1/board-palmz71.c      |  30 ++++----
 arch/arm/mach-omap1/board-perseus2.c     |  69 +++++++++--------
 arch/arm/mach-omap1/board-sx1.c          |  57 +++++++-------
 arch/arm/mach-omap2/board-h4.c           |  63 +++++++--------
 arch/arm/plat-omap/include/plat/keypad.h |  35 +++++----
 drivers/input/keyboard/omap-keypad.c     |  41 +++++-----
 include/linux/input/matrix_keypad.h      |   2 +-
 17 files changed, 457 insertions(+), 410 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm/mach-omap1/board-ams-delta.c b/arch/arm/mach-omap1/board-ams-delta.c
index e1439506eba9..bd0495a9ac3b 100644
--- a/arch/arm/mach-omap1/board-ams-delta.c
+++ b/arch/arm/mach-omap1/board-ams-delta.c
@@ -43,84 +43,82 @@
 static u8 ams_delta_latch1_reg;
 static u16 ams_delta_latch2_reg;
 
-static int ams_delta_keymap[] = {
+static const unsigned int ams_delta_keymap[] = {
 	KEY(0, 0, KEY_F1),		/* Advert    */
 
-	KEY(3, 0, KEY_COFFEE),		/* Games     */
-	KEY(2, 0, KEY_QUESTION),	/* Directory */
-	KEY(3, 2, KEY_CONNECT),		/* Internet  */
-	KEY(2, 1, KEY_SHOP),		/* Services  */
+	KEY(0, 3, KEY_COFFEE),		/* Games     */
+	KEY(0, 2, KEY_QUESTION),	/* Directory */
+	KEY(2, 3, KEY_CONNECT),		/* Internet  */
+	KEY(1, 2, KEY_SHOP),		/* Services  */
 	KEY(1, 1, KEY_PHONE),		/* VoiceMail */
 
-	KEY(1, 0, KEY_DELETE),		/* Delete    */
+	KEY(0, 1, KEY_DELETE),		/* Delete    */
 	KEY(2, 2, KEY_PLAY),		/* Play      */
-	KEY(0, 1, KEY_PAGEUP),		/* Up        */
-	KEY(3, 1, KEY_PAGEDOWN),	/* Down      */
-	KEY(0, 2, KEY_EMAIL),		/* ReadEmail */
-	KEY(1, 2, KEY_STOP),		/* Stop      */
+	KEY(1, 0, KEY_PAGEUP),		/* Up        */
+	KEY(1, 3, KEY_PAGEDOWN),	/* Down      */
+	KEY(2, 0, KEY_EMAIL),		/* ReadEmail */
+	KEY(2, 1, KEY_STOP),		/* Stop      */
 
 	/* Numeric keypad portion */
-	KEY(7, 0, KEY_KP1),
-	KEY(6, 0, KEY_KP2),
-	KEY(5, 0, KEY_KP3),
-	KEY(7, 1, KEY_KP4),
-	KEY(6, 1, KEY_KP5),
-	KEY(5, 1, KEY_KP6),
-	KEY(7, 2, KEY_KP7),
-	KEY(6, 2, KEY_KP8),
-	KEY(5, 2, KEY_KP9),
-	KEY(6, 3, KEY_KP0),
-	KEY(7, 3, KEY_KPASTERISK),
-	KEY(5, 3, KEY_KPDOT),		/* # key     */
-	KEY(2, 7, KEY_NUMLOCK),		/* Mute      */
-	KEY(1, 7, KEY_KPMINUS),		/* Recall    */
-	KEY(1, 6, KEY_KPPLUS),		/* Redial    */
-	KEY(6, 7, KEY_KPSLASH),		/* Handsfree */
-	KEY(0, 6, KEY_ENTER),		/* Video     */
-
-	KEY(4, 7, KEY_CAMERA),		/* Photo     */
-
-	KEY(4, 0, KEY_F2),		/* Home      */
-	KEY(4, 1, KEY_F3),		/* Office    */
-	KEY(4, 2, KEY_F4),		/* Mobile    */
+	KEY(0, 7, KEY_KP1),
+	KEY(0, 6, KEY_KP2),
+	KEY(0, 5, KEY_KP3),
+	KEY(1, 7, KEY_KP4),
+	KEY(1, 6, KEY_KP5),
+	KEY(1, 5, KEY_KP6),
+	KEY(2, 7, KEY_KP7),
+	KEY(2, 6, KEY_KP8),
+	KEY(2, 5, KEY_KP9),
+	KEY(3, 6, KEY_KP0),
+	KEY(3, 7, KEY_KPASTERISK),
+	KEY(3, 5, KEY_KPDOT),		/* # key     */
+	KEY(7, 2, KEY_NUMLOCK),		/* Mute      */
+	KEY(7, 1, KEY_KPMINUS),		/* Recall    */
+	KEY(6, 1, KEY_KPPLUS),		/* Redial    */
+	KEY(7, 6, KEY_KPSLASH),		/* Handsfree */
+	KEY(6, 0, KEY_ENTER),		/* Video     */
+
+	KEY(7, 4, KEY_CAMERA),		/* Photo     */
+
+	KEY(0, 4, KEY_F2),		/* Home      */
+	KEY(1, 4, KEY_F3),		/* Office    */
+	KEY(2, 4, KEY_F4),		/* Mobile    */
 	KEY(7, 7, KEY_F5),		/* SMS       */
-	KEY(5, 7, KEY_F6),		/* Email     */
+	KEY(7, 5, KEY_F6),		/* Email     */
 
 	/* QWERTY portion of keypad */
-	KEY(4, 3, KEY_Q),
+	KEY(3, 4, KEY_Q),
 	KEY(3, 3, KEY_W),
-	KEY(2, 3, KEY_E),
-	KEY(1, 3, KEY_R),
-	KEY(0, 3, KEY_T),
-	KEY(7, 4, KEY_Y),
-	KEY(6, 4, KEY_U),
-	KEY(5, 4, KEY_I),
+	KEY(3, 2, KEY_E),
+	KEY(3, 1, KEY_R),
+	KEY(3, 0, KEY_T),
+	KEY(4, 7, KEY_Y),
+	KEY(4, 6, KEY_U),
+	KEY(4, 5, KEY_I),
 	KEY(4, 4, KEY_O),
-	KEY(3, 4, KEY_P),
+	KEY(4, 3, KEY_P),
 
-	KEY(2, 4, KEY_A),
-	KEY(1, 4, KEY_S),
-	KEY(0, 4, KEY_D),
-	KEY(7, 5, KEY_F),
-	KEY(6, 5, KEY_G),
+	KEY(4, 2, KEY_A),
+	KEY(4, 1, KEY_S),
+	KEY(4, 0, KEY_D),
+	KEY(5, 7, KEY_F),
+	KEY(5, 6, KEY_G),
 	KEY(5, 5, KEY_H),
-	KEY(4, 5, KEY_J),
-	KEY(3, 5, KEY_K),
-	KEY(2, 5, KEY_L),
+	KEY(5, 4, KEY_J),
+	KEY(5, 3, KEY_K),
+	KEY(5, 2, KEY_L),
 
-	KEY(1, 5, KEY_Z),
-	KEY(0, 5, KEY_X),
-	KEY(7, 6, KEY_C),
+	KEY(5, 1, KEY_Z),
+	KEY(5, 0, KEY_X),
+	KEY(6, 7, KEY_C),
 	KEY(6, 6, KEY_V),
-	KEY(5, 6, KEY_B),
-	KEY(4, 6, KEY_N),
-	KEY(3, 6, KEY_M),
-	KEY(2, 6, KEY_SPACE),
+	KEY(6, 5, KEY_B),
+	KEY(6, 4, KEY_N),
+	KEY(6, 3, KEY_M),
+	KEY(6, 2, KEY_SPACE),
 
-	KEY(0, 7, KEY_LEFTSHIFT),	/* Vol up    */
-	KEY(3, 7, KEY_LEFTCTRL),	/* Vol down  */
-
-	0
+	KEY(7, 0, KEY_LEFTSHIFT),	/* Vol up    */
+	KEY(7, 3, KEY_LEFTCTRL),	/* Vol down  */
 };
 
 void ams_delta_latch1_write(u8 mask, u8 value)
@@ -189,11 +187,15 @@ static struct resource ams_delta_kp_resources[] = {
 	},
 };
 
+static const struct matrix_keymap_data ams_delta_keymap_data = {
+	.keymap		= ams_delta_keymap,
+	.keymap_size	= ARRAY_SIZE(ams_delta_keymap),
+};
+
 static struct omap_kp_platform_data ams_delta_kp_data = {
 	.rows		= 8,
 	.cols		= 8,
-	.keymap 	= ams_delta_keymap,
-	.keymapsize	= ARRAY_SIZE(ams_delta_keymap),
+	.keymap_data	= &ams_delta_keymap_data,
 	.delay		= 9,
 };
 
diff --git a/arch/arm/mach-omap1/board-fsample.c b/arch/arm/mach-omap1/board-fsample.c
index 0c3f396328bd..0efb9dbae44c 100644
--- a/arch/arm/mach-omap1/board-fsample.c
+++ b/arch/arm/mach-omap1/board-fsample.c
@@ -69,36 +69,35 @@
 #define fsample_cpld_clear(bit) \
     fsample_cpld_write(0xf0 | ((bit) & 15), FSAMPLE_CPLD_SET_CLR)
 
-static int fsample_keymap[] = {
-	KEY(0,0,KEY_UP),
-	KEY(0,1,KEY_RIGHT),
-	KEY(0,2,KEY_LEFT),
-	KEY(0,3,KEY_DOWN),
-	KEY(0,4,KEY_ENTER),
-	KEY(1,0,KEY_F10),
-	KEY(1,1,KEY_SEND),
-	KEY(1,2,KEY_END),
-	KEY(1,3,KEY_VOLUMEDOWN),
-	KEY(1,4,KEY_VOLUMEUP),
-	KEY(1,5,KEY_RECORD),
-	KEY(2,0,KEY_F9),
-	KEY(2,1,KEY_3),
-	KEY(2,2,KEY_6),
-	KEY(2,3,KEY_9),
-	KEY(2,4,KEY_KPDOT),
-	KEY(3,0,KEY_BACK),
-	KEY(3,1,KEY_2),
-	KEY(3,2,KEY_5),
-	KEY(3,3,KEY_8),
-	KEY(3,4,KEY_0),
-	KEY(3,5,KEY_KPSLASH),
-	KEY(4,0,KEY_HOME),
-	KEY(4,1,KEY_1),
-	KEY(4,2,KEY_4),
-	KEY(4,3,KEY_7),
-	KEY(4,4,KEY_KPASTERISK),
-	KEY(4,5,KEY_POWER),
-	0
+static const unsigned int fsample_keymap[] = {
+	KEY(0, 0, KEY_UP),
+	KEY(1, 0, KEY_RIGHT),
+	KEY(2, 0, KEY_LEFT),
+	KEY(3, 0, KEY_DOWN),
+	KEY(4, 0, KEY_ENTER),
+	KEY(0, 1, KEY_F10),
+	KEY(1, 1, KEY_SEND),
+	KEY(2, 1, KEY_END),
+	KEY(3, 1, KEY_VOLUMEDOWN),
+	KEY(4, 1, KEY_VOLUMEUP),
+	KEY(5, 1, KEY_RECORD),
+	KEY(0, 2, KEY_F9),
+	KEY(1, 2, KEY_3),
+	KEY(2, 2, KEY_6),
+	KEY(3, 2, KEY_9),
+	KEY(4, 2, KEY_KPDOT),
+	KEY(0, 3, KEY_BACK),
+	KEY(1, 3, KEY_2),
+	KEY(2, 3, KEY_5),
+	KEY(3, 3, KEY_8),
+	KEY(4, 3, KEY_0),
+	KEY(5, 3, KEY_KPSLASH),
+	KEY(0, 4, KEY_HOME),
+	KEY(1, 4, KEY_1),
+	KEY(2, 4, KEY_4),
+	KEY(3, 4, KEY_7),
+	KEY(4, 4, KEY_KPASTERISK),
+	KEY(5, 4, KEY_POWER),
 };
 
 static struct smc91x_platdata smc91x_info = {
@@ -253,11 +252,15 @@ static struct resource kp_resources[] = {
 	},
 };
 
+static const struct matrix_keymap_data fsample_keymap_data = {
+	.keymap		= fsample_keymap,
+	.keymap_size	= ARRAY_SIZE(fsample_keymap),
+};
+
 static struct omap_kp_platform_data kp_data = {
 	.rows		= 8,
 	.cols		= 8,
-	.keymap		= fsample_keymap,
-	.keymapsize	= ARRAY_SIZE(fsample_keymap),
+	.keymap_data	= &fsample_keymap_data,
 	.delay		= 4,
 };
 
diff --git a/arch/arm/mach-omap1/board-h2.c b/arch/arm/mach-omap1/board-h2.c
index 082a73ca5564..28b84aa9bdba 100644
--- a/arch/arm/mach-omap1/board-h2.c
+++ b/arch/arm/mach-omap1/board-h2.c
@@ -52,43 +52,42 @@
 /* At OMAP1610 Innovator the Ethernet is directly connected to CS1 */
 #define OMAP1610_ETHR_START		0x04000300
 
-static int h2_keymap[] = {
+static const unsigned int h2_keymap[] = {
 	KEY(0, 0, KEY_LEFT),
-	KEY(0, 1, KEY_RIGHT),
-	KEY(0, 2, KEY_3),
-	KEY(0, 3, KEY_F10),
-	KEY(0, 4, KEY_F5),
-	KEY(0, 5, KEY_9),
-	KEY(1, 0, KEY_DOWN),
+	KEY(1, 0, KEY_RIGHT),
+	KEY(2, 0, KEY_3),
+	KEY(3, 0, KEY_F10),
+	KEY(4, 0, KEY_F5),
+	KEY(5, 0, KEY_9),
+	KEY(0, 1, KEY_DOWN),
 	KEY(1, 1, KEY_UP),
-	KEY(1, 2, KEY_2),
-	KEY(1, 3, KEY_F9),
-	KEY(1, 4, KEY_F7),
-	KEY(1, 5, KEY_0),
-	KEY(2, 0, KEY_ENTER),
-	KEY(2, 1, KEY_6),
+	KEY(2, 1, KEY_2),
+	KEY(3, 1, KEY_F9),
+	KEY(4, 1, KEY_F7),
+	KEY(5, 1, KEY_0),
+	KEY(0, 2, KEY_ENTER),
+	KEY(1, 2, KEY_6),
 	KEY(2, 2, KEY_1),
-	KEY(2, 3, KEY_F2),
-	KEY(2, 4, KEY_F6),
-	KEY(2, 5, KEY_HOME),
-	KEY(3, 0, KEY_8),
-	KEY(3, 1, KEY_5),
-	KEY(3, 2, KEY_F12),
+	KEY(3, 2, KEY_F2),
+	KEY(4, 2, KEY_F6),
+	KEY(5, 2, KEY_HOME),
+	KEY(0, 3, KEY_8),
+	KEY(1, 3, KEY_5),
+	KEY(2, 3, KEY_F12),
 	KEY(3, 3, KEY_F3),
-	KEY(3, 4, KEY_F8),
-	KEY(3, 5, KEY_END),
-	KEY(4, 0, KEY_7),
-	KEY(4, 1, KEY_4),
-	KEY(4, 2, KEY_F11),
-	KEY(4, 3, KEY_F1),
+	KEY(4, 3, KEY_F8),
+	KEY(5, 3, KEY_END),
+	KEY(0, 4, KEY_7),
+	KEY(1, 4, KEY_4),
+	KEY(2, 4, KEY_F11),
+	KEY(3, 4, KEY_F1),
 	KEY(4, 4, KEY_F4),
-	KEY(4, 5, KEY_ESC),
-	KEY(5, 0, KEY_F13),
-	KEY(5, 1, KEY_F14),
-	KEY(5, 2, KEY_F15),
-	KEY(5, 3, KEY_F16),
-	KEY(5, 4, KEY_SLEEP),
-	0
+	KEY(5, 4, KEY_ESC),
+	KEY(0, 5, KEY_F13),
+	KEY(1, 5, KEY_F14),
+	KEY(2, 5, KEY_F15),
+	KEY(3, 5, KEY_F16),
+	KEY(4, 5, KEY_SLEEP),
 };
 
 static struct mtd_partition h2_nor_partitions[] = {
@@ -270,14 +269,18 @@ static struct resource h2_kp_resources[] = {
 	},
 };
 
+static const struct matrix_keymap_data h2_keymap_data = {
+	.keymap		= h2_keymap,
+	.keymap_size	= ARRAY_SIZE(h2_keymap),
+};
+
 static struct omap_kp_platform_data h2_kp_data = {
 	.rows		= 8,
 	.cols		= 8,
-	.keymap		= h2_keymap,
-	.keymapsize	= ARRAY_SIZE(h2_keymap),
-	.rep		= 1,
+	.keymap_data	= &h2_keymap_data,
+	.rep		= true,
 	.delay		= 9,
-	.dbounce	= 1,
+	.dbounce	= true,
 };
 
 static struct platform_device h2_kp_device = {
diff --git a/arch/arm/mach-omap1/board-h3.c b/arch/arm/mach-omap1/board-h3.c
index d2cff5022fe5..dbc8b8d882ba 100644
--- a/arch/arm/mach-omap1/board-h3.c
+++ b/arch/arm/mach-omap1/board-h3.c
@@ -56,43 +56,42 @@
 
 #define H3_TS_GPIO	48
 
-static int h3_keymap[] = {
+static const unsigned int h3_keymap[] = {
 	KEY(0, 0, KEY_LEFT),
-	KEY(0, 1, KEY_RIGHT),
-	KEY(0, 2, KEY_3),
-	KEY(0, 3, KEY_F10),
-	KEY(0, 4, KEY_F5),
-	KEY(0, 5, KEY_9),
-	KEY(1, 0, KEY_DOWN),
+	KEY(1, 0, KEY_RIGHT),
+	KEY(2, 0, KEY_3),
+	KEY(3, 0, KEY_F10),
+	KEY(4, 0, KEY_F5),
+	KEY(5, 0, KEY_9),
+	KEY(0, 1, KEY_DOWN),
 	KEY(1, 1, KEY_UP),
-	KEY(1, 2, KEY_2),
-	KEY(1, 3, KEY_F9),
-	KEY(1, 4, KEY_F7),
-	KEY(1, 5, KEY_0),
-	KEY(2, 0, KEY_ENTER),
-	KEY(2, 1, KEY_6),
+	KEY(2, 1, KEY_2),
+	KEY(3, 1, KEY_F9),
+	KEY(4, 1, KEY_F7),
+	KEY(5, 1, KEY_0),
+	KEY(0, 2, KEY_ENTER),
+	KEY(1, 2, KEY_6),
 	KEY(2, 2, KEY_1),
-	KEY(2, 3, KEY_F2),
-	KEY(2, 4, KEY_F6),
-	KEY(2, 5, KEY_HOME),
-	KEY(3, 0, KEY_8),
-	KEY(3, 1, KEY_5),
-	KEY(3, 2, KEY_F12),
+	KEY(3, 2, KEY_F2),
+	KEY(4, 2, KEY_F6),
+	KEY(5, 2, KEY_HOME),
+	KEY(0, 3, KEY_8),
+	KEY(1, 3, KEY_5),
+	KEY(2, 3, KEY_F12),
 	KEY(3, 3, KEY_F3),
-	KEY(3, 4, KEY_F8),
-	KEY(3, 5, KEY_END),
-	KEY(4, 0, KEY_7),
-	KEY(4, 1, KEY_4),
-	KEY(4, 2, KEY_F11),
-	KEY(4, 3, KEY_F1),
+	KEY(4, 3, KEY_F8),
+	KEY(5, 3, KEY_END),
+	KEY(0, 4, KEY_7),
+	KEY(1, 4, KEY_4),
+	KEY(2, 4, KEY_F11),
+	KEY(3, 4, KEY_F1),
 	KEY(4, 4, KEY_F4),
-	KEY(4, 5, KEY_ESC),
-	KEY(5, 0, KEY_F13),
-	KEY(5, 1, KEY_F14),
-	KEY(5, 2, KEY_F15),
-	KEY(5, 3, KEY_F16),
-	KEY(5, 4, KEY_SLEEP),
-	0
+	KEY(5, 4, KEY_ESC),
+	KEY(0, 5, KEY_F13),
+	KEY(1, 5, KEY_F14),
+	KEY(2, 5, KEY_F15),
+	KEY(3, 5, KEY_F16),
+	KEY(4, 5, KEY_SLEEP),
 };
 
 
@@ -305,14 +304,18 @@ static struct resource h3_kp_resources[] = {
 	},
 };
 
+static const struct matrix_keymap_data h3_keymap_data = {
+	.keymap		= h3_keymap,
+	.keymap_size	= ARRAY_SIZE(h3_keymap),
+};
+
 static struct omap_kp_platform_data h3_kp_data = {
 	.rows		= 8,
 	.cols		= 8,
-	.keymap		= h3_keymap,
-	.keymapsize	= ARRAY_SIZE(h3_keymap),
-	.rep		= 1,
+	.keymap_data	= &h3_keymap_data,
+	.rep		= true,
 	.delay		= 9,
-	.dbounce	= 1,
+	.dbounce	= true,
 };
 
 static struct platform_device h3_kp_device = {
diff --git a/arch/arm/mach-omap1/board-htcherald.c b/arch/arm/mach-omap1/board-htcherald.c
index 742c6d107268..f2c5c585bc83 100644
--- a/arch/arm/mach-omap1/board-htcherald.c
+++ b/arch/arm/mach-omap1/board-htcherald.c
@@ -180,64 +180,68 @@
 
 /* Keyboard definition */
 
-static int htc_herald_keymap[] = {
+static const unsigned int htc_herald_keymap[] = {
 	KEY(0, 0, KEY_RECORD), /* Mail button */
-	KEY(0, 1, KEY_CAMERA), /* Camera */
-	KEY(0, 2, KEY_PHONE), /* Send key */
-	KEY(0, 3, KEY_VOLUMEUP), /* Volume up */
-	KEY(0, 4, KEY_F2),  /* Right bar (landscape) */
-	KEY(0, 5, KEY_MAIL), /* Win key (portrait) */
-	KEY(0, 6, KEY_DIRECTORY), /* Right bar (protrait) */
-	KEY(1, 0, KEY_LEFTCTRL), /* Windows key */
+	KEY(1, 0, KEY_CAMERA), /* Camera */
+	KEY(2, 0, KEY_PHONE), /* Send key */
+	KEY(3, 0, KEY_VOLUMEUP), /* Volume up */
+	KEY(4, 0, KEY_F2),  /* Right bar (landscape) */
+	KEY(5, 0, KEY_MAIL), /* Win key (portrait) */
+	KEY(6, 0, KEY_DIRECTORY), /* Right bar (protrait) */
+	KEY(0, 1, KEY_LEFTCTRL), /* Windows key */
 	KEY(1, 1, KEY_COMMA),
-	KEY(1, 2, KEY_M),
-	KEY(1, 3, KEY_K),
-	KEY(1, 4, KEY_SLASH), /* OK key */
-	KEY(1, 5, KEY_I),
-	KEY(1, 6, KEY_U),
-	KEY(2, 0, KEY_LEFTALT),
-	KEY(2, 1, KEY_TAB),
+	KEY(2, 1, KEY_M),
+	KEY(3, 1, KEY_K),
+	KEY(4, 1, KEY_SLASH), /* OK key */
+	KEY(5, 1, KEY_I),
+	KEY(6, 1, KEY_U),
+	KEY(0, 2, KEY_LEFTALT),
+	KEY(1, 2, KEY_TAB),
 	KEY(2, 2, KEY_N),
-	KEY(2, 3, KEY_J),
-	KEY(2, 4, KEY_ENTER),
-	KEY(2, 5, KEY_H),
-	KEY(2, 6, KEY_Y),
-	KEY(3, 0, KEY_SPACE),
-	KEY(3, 1, KEY_L),
-	KEY(3, 2, KEY_B),
+	KEY(3, 2, KEY_J),
+	KEY(4, 2, KEY_ENTER),
+	KEY(5, 2, KEY_H),
+	KEY(6, 2, KEY_Y),
+	KEY(0, 3, KEY_SPACE),
+	KEY(1, 3, KEY_L),
+	KEY(2, 3, KEY_B),
 	KEY(3, 3, KEY_V),
-	KEY(3, 4, KEY_BACKSPACE),
-	KEY(3, 5, KEY_G),
-	KEY(3, 6, KEY_T),
-	KEY(4, 0, KEY_CAPSLOCK), /* Shift */
-	KEY(4, 1, KEY_C),
-	KEY(4, 2, KEY_F),
-	KEY(4, 3, KEY_R),
+	KEY(4, 3, KEY_BACKSPACE),
+	KEY(5, 3, KEY_G),
+	KEY(6, 3, KEY_T),
+	KEY(0, 4, KEY_CAPSLOCK), /* Shift */
+	KEY(1, 4, KEY_C),
+	KEY(2, 4, KEY_F),
+	KEY(3, 4, KEY_R),
 	KEY(4, 4, KEY_O),
-	KEY(4, 5, KEY_E),
-	KEY(4, 6, KEY_D),
-	KEY(5, 0, KEY_X),
-	KEY(5, 1, KEY_Z),
-	KEY(5, 2, KEY_S),
-	KEY(5, 3, KEY_W),
-	KEY(5, 4, KEY_P),
+	KEY(5, 4, KEY_E),
+	KEY(6, 4, KEY_D),
+	KEY(0, 5, KEY_X),
+	KEY(1, 5, KEY_Z),
+	KEY(2, 5, KEY_S),
+	KEY(3, 5, KEY_W),
+	KEY(4, 5, KEY_P),
 	KEY(5, 5, KEY_Q),
-	KEY(5, 6, KEY_A),
-	KEY(6, 0, KEY_CONNECT), /* Voice button */
-	KEY(6, 2, KEY_CANCEL), /* End key */
-	KEY(6, 3, KEY_VOLUMEDOWN), /* Volume down */
-	KEY(6, 4, KEY_F1), /* Left bar (landscape) */
-	KEY(6, 5, KEY_WWW), /* OK button (portrait) */
+	KEY(6, 5, KEY_A),
+	KEY(0, 6, KEY_CONNECT), /* Voice button */
+	KEY(2, 6, KEY_CANCEL), /* End key */
+	KEY(3, 6, KEY_VOLUMEDOWN), /* Volume down */
+	KEY(4, 6, KEY_F1), /* Left bar (landscape) */
+	KEY(5, 6, KEY_WWW), /* OK button (portrait) */
 	KEY(6, 6, KEY_CALENDAR), /* Left bar (portrait) */
-	0
 };
 
-struct omap_kp_platform_data htcherald_kp_data = {
+static const struct matrix_keymap_data htc_herald_keymap_data = {
+	.keymap		= htc_herald_keymap,
+	.keymap_size	= ARRAY_SIZE(htc_herald_keymap),
+};
+
+static struct omap_kp_platform_data htcherald_kp_data = {
 	.rows	= 7,
 	.cols	= 7,
 	.delay = 20,
-	.rep = 1,
-	.keymap = htc_herald_keymap,
+	.rep = true,
+	.keymap_data = &htc_herald_keymap_data,
 };
 
 static struct resource kp_resources[] = {
@@ -278,7 +282,7 @@ static struct gpio_keys_button herald_gpio_keys_table[] = {
 static struct gpio_keys_platform_data herald_gpio_keys_data = {
 	.buttons	= herald_gpio_keys_table,
 	.nbuttons	= ARRAY_SIZE(herald_gpio_keys_table),
-	.rep		= 1,
+	.rep		= true,
 };
 
 static struct platform_device herald_gpiokeys_device = {
diff --git a/arch/arm/mach-omap1/board-innovator.c b/arch/arm/mach-omap1/board-innovator.c
index 8d59b078fc2c..a36e6742bf9b 100644
--- a/arch/arm/mach-omap1/board-innovator.c
+++ b/arch/arm/mach-omap1/board-innovator.c
@@ -44,17 +44,16 @@
 /* At OMAP1610 Innovator the Ethernet is directly connected to CS1 */
 #define INNOVATOR1610_ETHR_START	0x04000300
 
-static int innovator_keymap[] = {
+static const unsigned int innovator_keymap[] = {
 	KEY(0, 0, KEY_F1),
-	KEY(0, 3, KEY_DOWN),
+	KEY(3, 0, KEY_DOWN),
 	KEY(1, 1, KEY_F2),
-	KEY(1, 2, KEY_RIGHT),
-	KEY(2, 0, KEY_F3),
-	KEY(2, 1, KEY_F4),
+	KEY(2, 1, KEY_RIGHT),
+	KEY(0, 2, KEY_F3),
+	KEY(1, 2, KEY_F4),
 	KEY(2, 2, KEY_UP),
-	KEY(3, 2, KEY_ENTER),
+	KEY(2, 3, KEY_ENTER),
 	KEY(3, 3, KEY_LEFT),
-	0
 };
 
 static struct mtd_partition innovator_partitions[] = {
@@ -126,11 +125,15 @@ static struct resource innovator_kp_resources[] = {
 	},
 };
 
+static const struct matrix_keymap_data innovator_keymap_data = {
+	.keymap		= innovator_keymap,
+	.keymap_size	= ARRAY_SIZE(innovator_keymap),
+};
+
 static struct omap_kp_platform_data innovator_kp_data = {
 	.rows		= 8,
 	.cols		= 8,
-	.keymap		= innovator_keymap,
-	.keymapsize	= ARRAY_SIZE(innovator_keymap),
+	.keymap_data	= &innovator_keymap_data,
 	.delay		= 4,
 };
 
diff --git a/arch/arm/mach-omap1/board-nokia770.c b/arch/arm/mach-omap1/board-nokia770.c
index 605495bbc583..d21f09dc78f4 100644
--- a/arch/arm/mach-omap1/board-nokia770.c
+++ b/arch/arm/mach-omap1/board-nokia770.c
@@ -54,19 +54,18 @@ static void __init omap_nokia770_init_irq(void)
 	omap_init_irq();
 }
 
-static int nokia770_keymap[] = {
-	KEY(0, 1, GROUP_0 | KEY_UP),
-	KEY(0, 2, GROUP_1 | KEY_F5),
-	KEY(1, 0, GROUP_0 | KEY_LEFT),
+static const unsigned int nokia770_keymap[] = {
+	KEY(1, 0, GROUP_0 | KEY_UP),
+	KEY(2, 0, GROUP_1 | KEY_F5),
+	KEY(0, 1, GROUP_0 | KEY_LEFT),
 	KEY(1, 1, GROUP_0 | KEY_ENTER),
-	KEY(1, 2, GROUP_0 | KEY_RIGHT),
-	KEY(2, 0, GROUP_1 | KEY_ESC),
-	KEY(2, 1, GROUP_0 | KEY_DOWN),
+	KEY(2, 1, GROUP_0 | KEY_RIGHT),
+	KEY(0, 2, GROUP_1 | KEY_ESC),
+	KEY(1, 2, GROUP_0 | KEY_DOWN),
 	KEY(2, 2, GROUP_1 | KEY_F4),
-	KEY(3, 0, GROUP_2 | KEY_F7),
-	KEY(3, 1, GROUP_2 | KEY_F8),
-	KEY(3, 2, GROUP_2 | KEY_F6),
-	0
+	KEY(0, 3, GROUP_2 | KEY_F7),
+	KEY(1, 3, GROUP_2 | KEY_F8),
+	KEY(2, 3, GROUP_2 | KEY_F6),
 };
 
 static struct resource nokia770_kp_resources[] = {
@@ -77,11 +76,15 @@ static struct resource nokia770_kp_resources[] = {
 	},
 };
 
+static const struct matrix_keymap_data nokia770_keymap_data = {
+	.keymap		= nokia770_keymap,
+	.keymap_size	= ARRAY_SIZE(nokia770_keymap),
+};
+
 static struct omap_kp_platform_data nokia770_kp_data = {
 	.rows		= 8,
 	.cols		= 8,
-	.keymap		= nokia770_keymap,
-	.keymapsize	= ARRAY_SIZE(nokia770_keymap),
+	.keymap_data	= &nokia770_keymap_data,
 	.delay		= 4,
 };
 
diff --git a/arch/arm/mach-omap1/board-osk.c b/arch/arm/mach-omap1/board-osk.c
index d44e7172efc2..7c5e2112c776 100644
--- a/arch/arm/mach-omap1/board-osk.c
+++ b/arch/arm/mach-omap1/board-osk.c
@@ -338,25 +338,28 @@ static struct i2c_board_info __initdata mistral_i2c_board_info[] = {
 	 */
 };
 
-static const int osk_keymap[] = {
+static const unsigned int osk_keymap[] = {
 	/* KEY(col, row, code) */
 	KEY(0, 0, KEY_F1),		/* SW4 */
-	KEY(0, 3, KEY_UP),		/* (sw2/up) */
+	KEY(3, 0, KEY_UP),		/* (sw2/up) */
 	KEY(1, 1, KEY_LEFTCTRL),	/* SW5 */
-	KEY(1, 2, KEY_LEFT),		/* (sw2/left) */
-	KEY(2, 0, KEY_SPACE),		/* SW3 */
-	KEY(2, 1, KEY_ESC),		/* SW6 */
+	KEY(2, 1, KEY_LEFT),		/* (sw2/left) */
+	KEY(0, 2, KEY_SPACE),		/* SW3 */
+	KEY(1, 2, KEY_ESC),		/* SW6 */
 	KEY(2, 2, KEY_DOWN),		/* (sw2/down) */
-	KEY(3, 2, KEY_ENTER),		/* (sw2/select) */
+	KEY(2, 3, KEY_ENTER),		/* (sw2/select) */
 	KEY(3, 3, KEY_RIGHT),		/* (sw2/right) */
-	0
+};
+
+static const struct matrix_keymap_data osk_keymap_data = {
+	.keymap		= osk_keymap,
+	.keymap_size	= ARRAY_SIZE(osk_keymap),
 };
 
 static struct omap_kp_platform_data osk_kp_data = {
 	.rows		= 8,
 	.cols		= 8,
-	.keymap		= (int *) osk_keymap,
-	.keymapsize	= ARRAY_SIZE(osk_keymap),
+	.keymap_data	= &osk_keymap_data,
 	.delay		= 9,
 };
 
diff --git a/arch/arm/mach-omap1/board-palmte.c b/arch/arm/mach-omap1/board-palmte.c
index 994dc6f50729..fb51ce6123d8 100644
--- a/arch/arm/mach-omap1/board-palmte.c
+++ b/arch/arm/mach-omap1/board-palmte.c
@@ -65,25 +65,29 @@ static void __init omap_palmte_init_irq(void)
 	omap_init_irq();
 }
 
-static const int palmte_keymap[] = {
+static const unsigned int palmte_keymap[] = {
 	KEY(0, 0, KEY_F1),		/* Calendar */
-	KEY(0, 1, KEY_F2),		/* Contacts */
-	KEY(0, 2, KEY_F3),		/* Tasks List */
-	KEY(0, 3, KEY_F4),		/* Note Pad */
-	KEY(0, 4, KEY_POWER),
-	KEY(1, 0, KEY_LEFT),
+	KEY(1, 0, KEY_F2),		/* Contacts */
+	KEY(2, 0, KEY_F3),		/* Tasks List */
+	KEY(3, 0, KEY_F4),		/* Note Pad */
+	KEY(4, 0, KEY_POWER),
+	KEY(0, 1, KEY_LEFT),
 	KEY(1, 1, KEY_DOWN),
-	KEY(1, 2, KEY_UP),
-	KEY(1, 3, KEY_RIGHT),
-	KEY(1, 4, KEY_ENTER),
-	0,
+	KEY(2, 1, KEY_UP),
+	KEY(3, 1, KEY_RIGHT),
+	KEY(4, 1, KEY_ENTER),
+};
+
+static const struct matrix_keymap_data palmte_keymap_data = {
+	.keymap		= palmte_keymap,
+	.keymap_size	= ARRAY_SIZE(palmte_keymap),
 };
 
 static struct omap_kp_platform_data palmte_kp_data = {
 	.rows	= 8,
 	.cols	= 8,
-	.keymap = (int *) palmte_keymap,
-	.rep	= 1,
+	.keymap_data = &palmte_keymap_data,
+	.rep	= true,
 	.delay	= 12,
 };
 
diff --git a/arch/arm/mach-omap1/board-palmtt.c b/arch/arm/mach-omap1/board-palmtt.c
index ed1400a67f75..f04f2d36e7d3 100644
--- a/arch/arm/mach-omap1/board-palmtt.c
+++ b/arch/arm/mach-omap1/board-palmtt.c
@@ -51,19 +51,18 @@
 #define PALMTT_MMC_WP_GPIO	8
 #define PALMTT_HDQ_GPIO		11
 
-static int palmtt_keymap[] = {
+static const unsigned int palmtt_keymap[] = {
 	KEY(0, 0, KEY_ESC),
-	KEY(0, 1, KEY_SPACE),
-	KEY(0, 2, KEY_LEFTCTRL),
-	KEY(0, 3, KEY_TAB),
-	KEY(0, 4, KEY_ENTER),
-	KEY(1, 0, KEY_LEFT),
+	KEY(1, 0, KEY_SPACE),
+	KEY(2, 0, KEY_LEFTCTRL),
+	KEY(3, 0, KEY_TAB),
+	KEY(4, 0, KEY_ENTER),
+	KEY(0, 1, KEY_LEFT),
 	KEY(1, 1, KEY_DOWN),
-	KEY(1, 2, KEY_UP),
-	KEY(1, 3, KEY_RIGHT),
-	KEY(2, 0, KEY_SLEEP),
-	KEY(2, 4, KEY_Y),
-	0
+	KEY(2, 1, KEY_UP),
+	KEY(3, 1, KEY_RIGHT),
+	KEY(0, 2, KEY_SLEEP),
+	KEY(4, 2, KEY_Y),
 };
 
 static struct mtd_partition palmtt_partitions[] = {
@@ -136,10 +135,15 @@ static struct resource palmtt_kp_resources[] = {
 	},
 };
 
+static const struct matrix_keymap_data palmtt_keymap_data = {
+	.keymap		= palmtt_keymap,
+	.keymap_size	= ARRAY_SIZE(palmtt_keymap),
+};
+
 static struct omap_kp_platform_data palmtt_kp_data = {
 	.rows	= 6,
 	.cols	= 3,
-	.keymap = palmtt_keymap,
+	.keymap_data = &palmtt_keymap_data,
 };
 
 static struct platform_device palmtt_kp_device = {
diff --git a/arch/arm/mach-omap1/board-palmz71.c b/arch/arm/mach-omap1/board-palmz71.c
index 2afac598baee..d7bbbe721a75 100644
--- a/arch/arm/mach-omap1/board-palmz71.c
+++ b/arch/arm/mach-omap1/board-palmz71.c
@@ -64,26 +64,30 @@ omap_palmz71_init_irq(void)
 	omap_init_irq();
 }
 
-static int palmz71_keymap[] = {
+static const unsigned int palmz71_keymap[] = {
 	KEY(0, 0, KEY_F1),
-	KEY(0, 1, KEY_F2),
-	KEY(0, 2, KEY_F3),
-	KEY(0, 3, KEY_F4),
-	KEY(0, 4, KEY_POWER),
-	KEY(1, 0, KEY_LEFT),
+	KEY(1, 0, KEY_F2),
+	KEY(2, 0, KEY_F3),
+	KEY(3, 0, KEY_F4),
+	KEY(4, 0, KEY_POWER),
+	KEY(0, 1, KEY_LEFT),
 	KEY(1, 1, KEY_DOWN),
-	KEY(1, 2, KEY_UP),
-	KEY(1, 3, KEY_RIGHT),
-	KEY(1, 4, KEY_ENTER),
-	KEY(2, 0, KEY_CAMERA),
-	0,
+	KEY(2, 1, KEY_UP),
+	KEY(3, 1, KEY_RIGHT),
+	KEY(4, 1, KEY_ENTER),
+	KEY(0, 2, KEY_CAMERA),
+};
+
+static const struct matrix_keymap_data palmz71_keymap_data = {
+	.keymap		= palmz71_keymap,
+	.keymap_size	= ARRAY_SIZE(palmz71_keymap),
 };
 
 static struct omap_kp_platform_data palmz71_kp_data = {
 	.rows	= 8,
 	.cols	= 8,
-	.keymap	= palmz71_keymap,
-	.rep	= 1,
+	.keymap_data	= &palmz71_keymap_data,
+	.rep	= true,
 	.delay	= 80,
 };
 
diff --git a/arch/arm/mach-omap1/board-perseus2.c b/arch/arm/mach-omap1/board-perseus2.c
index 69fda218fb45..3c8ee8489458 100644
--- a/arch/arm/mach-omap1/board-perseus2.c
+++ b/arch/arm/mach-omap1/board-perseus2.c
@@ -36,36 +36,35 @@
 #include <plat/common.h>
 #include <plat/board.h>
 
-static int p2_keymap[] = {
-	KEY(0,0,KEY_UP),
-	KEY(0,1,KEY_RIGHT),
-	KEY(0,2,KEY_LEFT),
-	KEY(0,3,KEY_DOWN),
-	KEY(0,4,KEY_ENTER),
-	KEY(1,0,KEY_F10),
-	KEY(1,1,KEY_SEND),
-	KEY(1,2,KEY_END),
-	KEY(1,3,KEY_VOLUMEDOWN),
-	KEY(1,4,KEY_VOLUMEUP),
-	KEY(1,5,KEY_RECORD),
-	KEY(2,0,KEY_F9),
-	KEY(2,1,KEY_3),
-	KEY(2,2,KEY_6),
-	KEY(2,3,KEY_9),
-	KEY(2,4,KEY_KPDOT),
-	KEY(3,0,KEY_BACK),
-	KEY(3,1,KEY_2),
-	KEY(3,2,KEY_5),
-	KEY(3,3,KEY_8),
-	KEY(3,4,KEY_0),
-	KEY(3,5,KEY_KPSLASH),
-	KEY(4,0,KEY_HOME),
-	KEY(4,1,KEY_1),
-	KEY(4,2,KEY_4),
-	KEY(4,3,KEY_7),
-	KEY(4,4,KEY_KPASTERISK),
-	KEY(4,5,KEY_POWER),
-	0
+static const unsigned int p2_keymap[] = {
+	KEY(0, 0, KEY_UP),
+	KEY(1, 0, KEY_RIGHT),
+	KEY(2, 0, KEY_LEFT),
+	KEY(3, 0, KEY_DOWN),
+	KEY(4, 0, KEY_ENTER),
+	KEY(0, 1, KEY_F10),
+	KEY(1, 1, KEY_SEND),
+	KEY(2, 1, KEY_END),
+	KEY(3, 1, KEY_VOLUMEDOWN),
+	KEY(4, 1, KEY_VOLUMEUP),
+	KEY(5, 1, KEY_RECORD),
+	KEY(0, 2, KEY_F9),
+	KEY(1, 2, KEY_3),
+	KEY(2, 2, KEY_6),
+	KEY(3, 2, KEY_9),
+	KEY(4, 2, KEY_KPDOT),
+	KEY(0, 3, KEY_BACK),
+	KEY(1, 3, KEY_2),
+	KEY(2, 3, KEY_5),
+	KEY(3, 3, KEY_8),
+	KEY(4, 3, KEY_0),
+	KEY(5, 3, KEY_KPSLASH),
+	KEY(0, 4, KEY_HOME),
+	KEY(1, 4, KEY_1),
+	KEY(2, 4, KEY_4),
+	KEY(3, 4, KEY_7),
+	KEY(4, 4, KEY_KPASTERISK),
+	KEY(5, 4, KEY_POWER),
 };
 
 static struct smc91x_platdata smc91x_info = {
@@ -211,13 +210,17 @@ static struct resource kp_resources[] = {
 	},
 };
 
+static const struct matrix_keymap_data p2_keymap_data = {
+	.keymap		= p2_keymap,
+	.keymap_size	= ARRAY_SIZE(p2_keymap),
+};
+
 static struct omap_kp_platform_data kp_data = {
 	.rows		= 8,
 	.cols		= 8,
-	.keymap		= p2_keymap,
-	.keymapsize	= ARRAY_SIZE(p2_keymap),
+	.keymap_data	= &p2_keymap_data,
 	.delay		= 4,
-	.dbounce	= 1,
+	.dbounce	= true,
 };
 
 static struct platform_device kp_device = {
diff --git a/arch/arm/mach-omap1/board-sx1.c b/arch/arm/mach-omap1/board-sx1.c
index 463862c67819..d41fe2d0616a 100644
--- a/arch/arm/mach-omap1/board-sx1.c
+++ b/arch/arm/mach-omap1/board-sx1.c
@@ -164,36 +164,35 @@ EXPORT_SYMBOL(sx1_setusbpower);
 
 /*----------- Keypad -------------------------*/
 
-static int sx1_keymap[] = {
-	KEY(5, 3, GROUP_0 | 117), /* camera Qt::Key_F17 */
-	KEY(0, 4, GROUP_0 | 114), /* voice memo Qt::Key_F14 */
-	KEY(1, 4, GROUP_2 | 114), /* voice memo */
-	KEY(2, 4, GROUP_3 | 114), /* voice memo */
+static const unsigned int sx1_keymap[] = {
+	KEY(3, 5, GROUP_0 | 117), /* camera Qt::Key_F17 */
+	KEY(4, 0, GROUP_0 | 114), /* voice memo Qt::Key_F14 */
+	KEY(4, 1, GROUP_2 | 114), /* voice memo */
+	KEY(4, 2, GROUP_3 | 114), /* voice memo */
 	KEY(0, 0, GROUP_1 | KEY_F12),	/* red button Qt::Key_Hangup */
-	KEY(4, 3, GROUP_1 | KEY_LEFT),
-	KEY(2, 3, GROUP_1 | KEY_DOWN),
-	KEY(1, 3, GROUP_1 | KEY_RIGHT),
-	KEY(0, 3, GROUP_1 | KEY_UP),
+	KEY(3, 4, GROUP_1 | KEY_LEFT),
+	KEY(3, 2, GROUP_1 | KEY_DOWN),
+	KEY(3, 1, GROUP_1 | KEY_RIGHT),
+	KEY(3, 0, GROUP_1 | KEY_UP),
 	KEY(3, 3, GROUP_1 | KEY_POWER), /* joystick press or Qt::Key_Select */
-	KEY(5, 0, GROUP_1 | KEY_1),
-	KEY(4, 0, GROUP_1 | KEY_2),
-	KEY(3, 0, GROUP_1 | KEY_3),
-	KEY(3, 4, GROUP_1 | KEY_4),
+	KEY(0, 5, GROUP_1 | KEY_1),
+	KEY(0, 4, GROUP_1 | KEY_2),
+	KEY(0, 3, GROUP_1 | KEY_3),
+	KEY(4, 3, GROUP_1 | KEY_4),
 	KEY(4, 4, GROUP_1 | KEY_5),
-	KEY(5, 4, GROUP_1 | KEY_KPASTERISK),/* "*" */
-	KEY(4, 1, GROUP_1 | KEY_6),
-	KEY(5, 1, GROUP_1 | KEY_7),
-	KEY(3, 1, GROUP_1 | KEY_8),
-	KEY(3, 2, GROUP_1 | KEY_9),
-	KEY(5, 2, GROUP_1 | KEY_0),
-	KEY(4, 2, GROUP_1 | 113),	/* # F13 Toggle input method Qt::Key_F13 */
-	KEY(0, 1, GROUP_1 | KEY_F11),	/* green button Qt::Key_Call */
-	KEY(1, 2, GROUP_1 | KEY_YEN),	/* left soft Qt::Key_Context1 */
+	KEY(4, 5, GROUP_1 | KEY_KPASTERISK),/* "*" */
+	KEY(1, 4, GROUP_1 | KEY_6),
+	KEY(1, 5, GROUP_1 | KEY_7),
+	KEY(1, 3, GROUP_1 | KEY_8),
+	KEY(2, 3, GROUP_1 | KEY_9),
+	KEY(2, 5, GROUP_1 | KEY_0),
+	KEY(2, 4, GROUP_1 | 113), /* # F13 Toggle input method Qt::Key_F13 */
+	KEY(1, 0, GROUP_1 | KEY_F11),	/* green button Qt::Key_Call */
+	KEY(2, 1, GROUP_1 | KEY_YEN),	/* left soft Qt::Key_Context1 */
 	KEY(2, 2, GROUP_1 | KEY_F8),	/* right soft Qt::Key_Back */
-	KEY(2, 1, GROUP_1 | KEY_LEFTSHIFT), /* shift */
+	KEY(1, 2, GROUP_1 | KEY_LEFTSHIFT), /* shift */
 	KEY(1, 1, GROUP_1 | KEY_BACKSPACE), /* C (clear) */
-	KEY(0, 2, GROUP_1 | KEY_F7),	/* menu Qt::Key_Menu */
-	0
+	KEY(2, 0, GROUP_1 | KEY_F7),	/* menu Qt::Key_Menu */
 };
 
 static struct resource sx1_kp_resources[] = {
@@ -204,11 +203,15 @@ static struct resource sx1_kp_resources[] = {
 	},
 };
 
+static const struct matrix_keymap_data sx1_keymap_data = {
+	.keymap		= sx1_keymap,
+	.keymap_size	= ARRAY_SIZE(sx1_keymap),
+};
+
 static struct omap_kp_platform_data sx1_kp_data = {
 	.rows		= 6,
 	.cols		= 6,
-	.keymap	= sx1_keymap,
-	.keymapsize = ARRAY_SIZE(sx1_keymap),
+	.keymap_data	= &sx1_keymap_data,
 	.delay	= 80,
 };
 
diff --git a/arch/arm/mach-omap2/board-h4.c b/arch/arm/mach-omap2/board-h4.c
index 0a2d73cf036f..b386a403c379 100644
--- a/arch/arm/mach-omap2/board-h4.c
+++ b/arch/arm/mach-omap2/board-h4.c
@@ -51,38 +51,37 @@
 static unsigned int row_gpios[6] = { 88, 89, 124, 11, 6, 96 };
 static unsigned int col_gpios[7] = { 90, 91, 100, 36, 12, 97, 98 };
 
-static int h4_keymap[] = {
+static const unsigned int h4_keymap[] = {
 	KEY(0, 0, KEY_LEFT),
-	KEY(0, 1, KEY_RIGHT),
-	KEY(0, 2, KEY_A),
-	KEY(0, 3, KEY_B),
-	KEY(0, 4, KEY_C),
-	KEY(1, 0, KEY_DOWN),
+	KEY(1, 0, KEY_RIGHT),
+	KEY(2, 0, KEY_A),
+	KEY(3, 0, KEY_B),
+	KEY(4, 0, KEY_C),
+	KEY(0, 1, KEY_DOWN),
 	KEY(1, 1, KEY_UP),
-	KEY(1, 2, KEY_E),
-	KEY(1, 3, KEY_F),
-	KEY(1, 4, KEY_G),
-	KEY(2, 0, KEY_ENTER),
-	KEY(2, 1, KEY_I),
+	KEY(2, 1, KEY_E),
+	KEY(3, 1, KEY_F),
+	KEY(4, 1, KEY_G),
+	KEY(0, 2, KEY_ENTER),
+	KEY(1, 2, KEY_I),
 	KEY(2, 2, KEY_J),
-	KEY(2, 3, KEY_K),
-	KEY(2, 4, KEY_3),
-	KEY(3, 0, KEY_M),
-	KEY(3, 1, KEY_N),
-	KEY(3, 2, KEY_O),
+	KEY(3, 2, KEY_K),
+	KEY(4, 2, KEY_3),
+	KEY(0, 3, KEY_M),
+	KEY(1, 3, KEY_N),
+	KEY(2, 3, KEY_O),
 	KEY(3, 3, KEY_P),
-	KEY(3, 4, KEY_Q),
-	KEY(4, 0, KEY_R),
-	KEY(4, 1, KEY_4),
-	KEY(4, 2, KEY_T),
-	KEY(4, 3, KEY_U),
+	KEY(4, 3, KEY_Q),
+	KEY(0, 4, KEY_R),
+	KEY(1, 4, KEY_4),
+	KEY(2, 4, KEY_T),
+	KEY(3, 4, KEY_U),
 	KEY(4, 4, KEY_ENTER),
-	KEY(5, 0, KEY_V),
-	KEY(5, 1, KEY_W),
-	KEY(5, 2, KEY_L),
-	KEY(5, 3, KEY_S),
-	KEY(5, 4, KEY_ENTER),
-	0
+	KEY(0, 5, KEY_V),
+	KEY(1, 5, KEY_W),
+	KEY(2, 5, KEY_L),
+	KEY(3, 5, KEY_S),
+	KEY(4, 5, KEY_ENTER),
 };
 
 static struct mtd_partition h4_partitions[] = {
@@ -136,12 +135,16 @@ static struct platform_device h4_flash_device = {
 	.resource	= &h4_flash_resource,
 };
 
+static const struct matrix_keymap_data h4_keymap_data = {
+	.keymap		= h4_keymap,
+	.keymap_size	= ARRAY_SIZE(h4_keymap),
+};
+
 static struct omap_kp_platform_data h4_kp_data = {
 	.rows		= 6,
 	.cols		= 7,
-	.keymap 	= h4_keymap,
-	.keymapsize 	= ARRAY_SIZE(h4_keymap),
-	.rep		= 1,
+	.keymap_data	= &h4_keymap_data,
+	.rep		= true,
 	.row_gpios 	= row_gpios,
 	.col_gpios 	= col_gpios,
 };
diff --git a/arch/arm/plat-omap/include/plat/keypad.h b/arch/arm/plat-omap/include/plat/keypad.h
index 3ae52ccc793c..793ce9d53294 100644
--- a/arch/arm/plat-omap/include/plat/keypad.h
+++ b/arch/arm/plat-omap/include/plat/keypad.h
@@ -10,16 +10,18 @@
 #ifndef ASMARM_ARCH_KEYPAD_H
 #define ASMARM_ARCH_KEYPAD_H
 
-#warning: Please update the board to use matrix_keypad.h instead
+#ifndef CONFIG_ARCH_OMAP1
+#warning Please update the board to use matrix-keypad driver
+#endif
+#include <linux/input/matrix_keypad.h>
 
 struct omap_kp_platform_data {
 	int rows;
 	int cols;
-	int *keymap;
-	unsigned int keymapsize;
-	unsigned int rep:1;
+	const struct matrix_keymap_data *keymap_data;
+	bool rep;
 	unsigned long delay;
-	unsigned int dbounce:1;
+	bool dbounce;
 	/* specific to OMAP242x*/
 	unsigned int *row_gpios;
 	unsigned int *col_gpios;
@@ -28,18 +30,21 @@ struct omap_kp_platform_data {
 /* Group (0..3) -- when multiple keys are pressed, only the
  * keys pressed in the same group are considered as pressed. This is
  * in order to workaround certain crappy HW designs that produce ghost
- * keypresses. */
-#define GROUP_0		(0 << 16)
-#define GROUP_1		(1 << 16)
-#define GROUP_2		(2 << 16)
-#define GROUP_3		(3 << 16)
+ * keypresses. Two free bits, not used by neither row/col nor keynum,
+ * must be available for use as group bits. The below GROUP_SHIFT
+ * macro definition is based on some prior knowledge of the
+ * matrix_keypad defined KEY() macro internals.
+ */
+#define GROUP_SHIFT	14
+#define GROUP_0		(0 << GROUP_SHIFT)
+#define GROUP_1		(1 << GROUP_SHIFT)
+#define GROUP_2		(2 << GROUP_SHIFT)
+#define GROUP_3		(3 << GROUP_SHIFT)
 #define GROUP_MASK	GROUP_3
+#if KEY_MAX & GROUP_MASK
+#error Group bits in conflict with keynum bits
+#endif
 
-#define KEY_PERSISTENT		0x00800000
-#define KEYNUM_MASK		0x00EFFFFF
-#define KEY(col, row, val) (((col) << 28) | ((row) << 24) | (val))
-#define PERSISTENT_KEY(col, row) (((col) << 28) | ((row) << 24) | \
-						KEY_PERSISTENT)
 
 #endif
 
diff --git a/drivers/input/keyboard/omap-keypad.c b/drivers/input/keyboard/omap-keypad.c
index a72e61ddca91..0e2a19cb43d8 100644
--- a/drivers/input/keyboard/omap-keypad.c
+++ b/drivers/input/keyboard/omap-keypad.c
@@ -65,7 +65,6 @@ struct omap_kp {
 
 static DECLARE_TASKLET_DISABLED(kp_tasklet, omap_kp_tasklet, 0);
 
-static int *keymap;
 static unsigned int *row_gpios;
 static unsigned int *col_gpios;
 
@@ -162,20 +161,11 @@ static void omap_kp_scan_keypad(struct omap_kp *omap_kp, unsigned char *state)
 	}
 }
 
-static inline int omap_kp_find_key(int col, int row)
-{
-	int i, key;
-
-	key = KEY(col, row, 0);
-	for (i = 0; keymap[i] != 0; i++)
-		if ((keymap[i] & 0xff000000) == key)
-			return keymap[i] & 0x00ffffff;
-	return -1;
-}
-
 static void omap_kp_tasklet(unsigned long data)
 {
 	struct omap_kp *omap_kp_data = (struct omap_kp *) data;
+	unsigned short *keycodes = omap_kp_data->input->keycode;
+	unsigned int row_shift = get_count_order(omap_kp_data->cols);
 	unsigned char new_state[8], changed, key_down = 0;
 	int col, row;
 	int spurious = 0;
@@ -199,7 +189,7 @@ static void omap_kp_tasklet(unsigned long data)
 			       row, (new_state[col] & (1 << row)) ?
 			       "pressed" : "released");
 #else
-			key = omap_kp_find_key(col, row);
+			key = keycodes[MATRIX_SCAN_CODE(row, col, row_shift)];
 			if (key < 0) {
 				printk(KERN_WARNING
 				      "omap-keypad: Spurious key event %d-%d\n",
@@ -298,13 +288,18 @@ static int __devinit omap_kp_probe(struct platform_device *pdev)
 	struct input_dev *input_dev;
 	struct omap_kp_platform_data *pdata =  pdev->dev.platform_data;
 	int i, col_idx, row_idx, irq_idx, ret;
+	unsigned int row_shift, keycodemax;
 
-	if (!pdata->rows || !pdata->cols || !pdata->keymap) {
-		printk(KERN_ERR "No rows, cols or keymap from pdata\n");
+	if (!pdata->rows || !pdata->cols || !pdata->keymap_data) {
+		printk(KERN_ERR "No rows, cols or keymap_data from pdata\n");
 		return -EINVAL;
 	}
 
-	omap_kp = kzalloc(sizeof(struct omap_kp), GFP_KERNEL);
+	row_shift = get_count_order(pdata->cols);
+	keycodemax = pdata->rows << row_shift;
+
+	omap_kp = kzalloc(sizeof(struct omap_kp) +
+			keycodemax * sizeof(unsigned short), GFP_KERNEL);
 	input_dev = input_allocate_device();
 	if (!omap_kp || !input_dev) {
 		kfree(omap_kp);
@@ -320,7 +315,9 @@ static int __devinit omap_kp_probe(struct platform_device *pdev)
 	if (!cpu_is_omap24xx())
 		omap_writew(1, OMAP1_MPUIO_BASE + OMAP_MPUIO_KBD_MASKIT);
 
-	keymap = pdata->keymap;
+	input_dev->keycode      = &omap_kp[1];
+	input_dev->keycodesize  = sizeof(unsigned short);
+	input_dev->keycodemax   = keycodemax;
 
 	if (pdata->rep)
 		__set_bit(EV_REP, input_dev->evbit);
@@ -374,8 +371,8 @@ static int __devinit omap_kp_probe(struct platform_device *pdev)
 
 	/* setup input device */
 	__set_bit(EV_KEY, input_dev->evbit);
-	for (i = 0; keymap[i] != 0; i++)
-		__set_bit(keymap[i] & KEY_MAX, input_dev->keybit);
+	matrix_keypad_build_keymap(pdata->keymap_data, row_shift,
+			input_dev->keycode, input_dev->keybit);
 	input_dev->name = "omap-keypad";
 	input_dev->phys = "omap-keypad/input0";
 	input_dev->dev.parent = &pdev->dev;
@@ -416,7 +413,7 @@ static int __devinit omap_kp_probe(struct platform_device *pdev)
 	return 0;
 err5:
 	for (i = irq_idx - 1; i >=0; i--)
-		free_irq(row_gpios[i], 0);
+		free_irq(row_gpios[i], NULL);
 err4:
 	input_unregister_device(omap_kp->input);
 	input_dev = NULL;
@@ -447,11 +444,11 @@ static int __devexit omap_kp_remove(struct platform_device *pdev)
 			gpio_free(col_gpios[i]);
 		for (i = 0; i < omap_kp->rows; i++) {
 			gpio_free(row_gpios[i]);
-			free_irq(gpio_to_irq(row_gpios[i]), 0);
+			free_irq(gpio_to_irq(row_gpios[i]), NULL);
 		}
 	} else {
 		omap_writew(1, OMAP1_MPUIO_BASE + OMAP_MPUIO_KBD_MASKIT);
-		free_irq(omap_kp->irq, 0);
+		free_irq(omap_kp->irq, NULL);
 	}
 
 	del_timer_sync(&omap_kp->timer);
diff --git a/include/linux/input/matrix_keypad.h b/include/linux/input/matrix_keypad.h
index 80352ad6581a..697474691749 100644
--- a/include/linux/input/matrix_keypad.h
+++ b/include/linux/input/matrix_keypad.h
@@ -9,7 +9,7 @@
 
 #define KEY(row, col, val)	((((row) & (MATRIX_MAX_ROWS - 1)) << 24) |\
 				 (((col) & (MATRIX_MAX_COLS - 1)) << 16) |\
-				 (val & 0xffff))
+				 ((val) & 0xffff))
 
 #define KEY_ROW(k)		(((k) >> 24) & 0xff)
 #define KEY_COL(k)		(((k) >> 16) & 0xff)
-- 
cgit v1.2.3-71-gd317


From 4a7863cc2eb5f9804f1c4e9156619a801cd7f14f Mon Sep 17 00:00:00 2001
From: Don Zickus <dzickus@redhat.com>
Date: Wed, 22 Dec 2010 14:00:03 -0500
Subject: x86, nmi_watchdog: Remove ARCH_HAS_NMI_WATCHDOG and rely on
 CONFIG_HARDLOCKUP_DETECTOR

The x86 arch has shifted its use of the nmi_watchdog from a
local implementation to the global one provide by
kernel/watchdog.c.  This shift has caused a whole bunch of
compile problems under different config options.  I attempt to
simplify things with the patch below.

In order to simplify things, I had to come to terms with the
meaning of two terms ARCH_HAS_NMI_WATCHDOG and
CONFIG_HARDLOCKUP_DETECTOR.  Basically they mean the same thing,
the former on a local level and the latter on a global level.

With the old x86 nmi watchdog gone, there is no need to rely on
defining the ARCH_HAS_NMI_WATCHDOG variable because it doesn't
make sense any more.  x86 will now use the global
implementation.

The changes below do a few things.  First it changes the few
places that relied on ARCH_HAS_NMI_WATCHDOG to use
CONFIG_X86_LOCAL_APIC (the former was an alias for the latter
anyway, so nothing unusual here).  Those pieces of code were
relying more on local apic functionality the nmi watchdog
functionality, so the change should make sense.

Second, I removed the x86 implementation of
touch_nmi_watchdog().  It isn't need now, instead x86 will rely
on kernel/watchdog.c's implementation.

Third, I removed the #define ARCH_HAS_NMI_WATCHDOG itself from
x86.  And tweaked the include/linux/nmi.h file to tell users to
look for an externally defined touch_nmi_watchdog in the case of
ARCH_HAS_NMI_WATCHDOG _or_ CONFIG_HARDLOCKUP_DETECTOR. This
changes removes some of the ugliness in that file.

Finally, I added a Kconfig dependency for
CONFIG_HARDLOCKUP_DETECTOR that said you can't have
ARCH_HAS_NMI_WATCHDOG _and_ CONFIG_HARDLOCKUP_DETECTOR.  You can
only have one nmi_watchdog.

Tested with
ARCH=i386: allnoconfig, defconfig, allyesconfig, (various broken
configs) ARCH=x86_64: allnoconfig, defconfig, allyesconfig,
(various broken configs)

Hopefully, after this patch I won't get any more compile broken
emails. :-)

v3:
  changed a couple of 'linux/nmi.h' -> 'asm/nmi.h' to pick-up correct function
  prototypes when CONFIG_HARDLOCKUP_DETECTOR is not set.

Signed-off-by: Don Zickus <dzickus@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: fweisbec@gmail.com
LKML-Reference: <1293044403-14117-1-git-send-email-dzickus@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/include/asm/irq.h             |  4 ----
 arch/x86/include/asm/nmi.h             |  2 +-
 arch/x86/kernel/apic/hw_nmi.c          | 10 ----------
 arch/x86/kernel/cpu/perfctr-watchdog.c |  2 +-
 arch/x86/oprofile/op_model_p4.c        |  2 +-
 drivers/watchdog/hpwdt.c               |  4 ++--
 include/linux/nmi.h                    |  6 +-----
 lib/Kconfig.debug                      |  3 ++-
 8 files changed, 8 insertions(+), 25 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/include/asm/irq.h b/arch/x86/include/asm/irq.h
index 13b0ebaa512f..ba870bb6dd8e 100644
--- a/arch/x86/include/asm/irq.h
+++ b/arch/x86/include/asm/irq.h
@@ -15,10 +15,6 @@ static inline int irq_canonicalize(int irq)
 	return ((irq == 2) ? 9 : irq);
 }
 
-#ifdef CONFIG_X86_LOCAL_APIC
-# define ARCH_HAS_NMI_WATCHDOG
-#endif
-
 #ifdef CONFIG_X86_32
 extern void irq_ctx_init(int cpu);
 #else
diff --git a/arch/x86/include/asm/nmi.h b/arch/x86/include/asm/nmi.h
index 3545838cddeb..c4021b953510 100644
--- a/arch/x86/include/asm/nmi.h
+++ b/arch/x86/include/asm/nmi.h
@@ -5,7 +5,7 @@
 #include <asm/irq.h>
 #include <asm/io.h>
 
-#ifdef ARCH_HAS_NMI_WATCHDOG
+#ifdef CONFIG_X86_LOCAL_APIC
 
 extern void die_nmi(char *str, struct pt_regs *regs, int do_panic);
 extern int avail_to_resrv_perfctr_nmi_bit(unsigned int);
diff --git a/arch/x86/kernel/apic/hw_nmi.c b/arch/x86/kernel/apic/hw_nmi.c
index 93da91df5b38..c57d0b599448 100644
--- a/arch/x86/kernel/apic/hw_nmi.c
+++ b/arch/x86/kernel/apic/hw_nmi.c
@@ -17,7 +17,6 @@
 #include <linux/nmi.h>
 #include <linux/module.h>
 
-#ifdef ARCH_HAS_NMI_WATCHDOG
 #ifdef CONFIG_HARDLOCKUP_DETECTOR
 u64 hw_nmi_get_sample_period(void)
 {
@@ -25,15 +24,6 @@ u64 hw_nmi_get_sample_period(void)
 }
 #endif
 
-#ifndef CONFIG_HARDLOCKUP_DETECTOR
-void touch_nmi_watchdog(void)
-{
-	touch_softlockup_watchdog();
-}
-EXPORT_SYMBOL(touch_nmi_watchdog);
-#endif
-#endif
-
 #ifdef arch_trigger_all_cpu_backtrace
 /* For reliability, we're prepared to waste bits here. */
 static DECLARE_BITMAP(backtrace_mask, NR_CPUS) __read_mostly;
diff --git a/arch/x86/kernel/cpu/perfctr-watchdog.c b/arch/x86/kernel/cpu/perfctr-watchdog.c
index 14d45928c282..d5a236615501 100644
--- a/arch/x86/kernel/cpu/perfctr-watchdog.c
+++ b/arch/x86/kernel/cpu/perfctr-watchdog.c
@@ -16,7 +16,7 @@
 #include <linux/kernel.h>
 #include <linux/bitops.h>
 #include <linux/smp.h>
-#include <linux/nmi.h>
+#include <asm/nmi.h>
 #include <linux/kprobes.h>
 
 #include <asm/apic.h>
diff --git a/arch/x86/oprofile/op_model_p4.c b/arch/x86/oprofile/op_model_p4.c
index 182558dd5515..9fadec074142 100644
--- a/arch/x86/oprofile/op_model_p4.c
+++ b/arch/x86/oprofile/op_model_p4.c
@@ -11,7 +11,7 @@
 #include <linux/oprofile.h>
 #include <linux/smp.h>
 #include <linux/ptrace.h>
-#include <linux/nmi.h>
+#include <asm/nmi.h>
 #include <asm/msr.h>
 #include <asm/fixmap.h>
 #include <asm/apic.h>
diff --git a/drivers/watchdog/hpwdt.c b/drivers/watchdog/hpwdt.c
index c19f4a20794a..dea7b5bf6e2c 100644
--- a/drivers/watchdog/hpwdt.c
+++ b/drivers/watchdog/hpwdt.c
@@ -642,7 +642,7 @@ static struct notifier_block die_notifier = {
  */
 
 #ifdef CONFIG_HPWDT_NMI_DECODING
-#ifdef ARCH_HAS_NMI_WATCHDOG
+#ifdef CONFIG_X86_LOCAL_APIC
 static void __devinit hpwdt_check_nmi_decoding(struct pci_dev *dev)
 {
 	/*
@@ -657,7 +657,7 @@ static void __devinit hpwdt_check_nmi_decoding(struct pci_dev *dev)
 	dev_warn(&dev->dev, "NMI decoding is disabled. "
 		"Your kernel does not support a NMI Watchdog.\n");
 }
-#endif /* ARCH_HAS_NMI_WATCHDOG */
+#endif /* CONFIG_X86_LOCAL_APIC */
 
 static int __devinit hpwdt_init_nmi_decoding(struct pci_dev *dev)
 {
diff --git a/include/linux/nmi.h b/include/linux/nmi.h
index 17ccf44e7dcb..c536f8545f74 100644
--- a/include/linux/nmi.h
+++ b/include/linux/nmi.h
@@ -14,18 +14,14 @@
  * may be used to reset the timeout - for code which intentionally
  * disables interrupts for a long time. This call is stateless.
  */
-#ifdef ARCH_HAS_NMI_WATCHDOG
+#if defined(ARCH_HAS_NMI_WATCHDOG) || defined(CONFIG_HARDLOCKUP_DETECTOR)
 #include <asm/nmi.h>
 extern void touch_nmi_watchdog(void);
 #else
-#ifndef CONFIG_HARDLOCKUP_DETECTOR
 static inline void touch_nmi_watchdog(void)
 {
 	touch_softlockup_watchdog();
 }
-#else
-extern void touch_nmi_watchdog(void);
-#endif
 #endif
 
 /*
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 28b42b9274d0..2d05adb98401 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -173,7 +173,8 @@ config LOCKUP_DETECTOR
 	  An NMI is generated every 60 seconds or so to check for hardlockups.
 
 config HARDLOCKUP_DETECTOR
-	def_bool LOCKUP_DETECTOR && PERF_EVENTS && HAVE_PERF_EVENTS_NMI
+	def_bool LOCKUP_DETECTOR && PERF_EVENTS && HAVE_PERF_EVENTS_NMI && \
+		 !ARCH_HAS_NMI_WATCHDOG
 
 config BOOTPARAM_SOFTLOCKUP_PANIC
 	bool "Panic (Reboot) On Soft Lockups"
-- 
cgit v1.2.3-71-gd317


From 0131d8973c8b9bd9d40fee8fae24eab24821efdb Mon Sep 17 00:00:00 2001
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Wed, 1 Dec 2010 10:54:46 +0100
Subject: of/address: use proper endianess in get_flags

This patch changes u32 to __be32 for all "ranges", "prop" and "addr" and
such. Those variables are pointing to the device tree which contains
integers in big endian format.

Most functions are doing it right because of_read_number() is doing the
right thing for them. of_bus_isa_get_flags(), of_bus_pci_get_flags() and
of_bus_isa_map() were accessing the data directly and were doing it wrong.

Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Acked-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: Grant Likely <grant.likely@secretlab.ca>
---
 arch/powerpc/include/asm/prom.h |  2 +-
 drivers/of/address.c            | 54 +++++++++++++++++++++--------------------
 include/linux/of_address.h      |  6 ++---
 3 files changed, 32 insertions(+), 30 deletions(-)

(limited to 'include/linux')

diff --git a/arch/powerpc/include/asm/prom.h b/arch/powerpc/include/asm/prom.h
index 98264bf0a433..d72757585595 100644
--- a/arch/powerpc/include/asm/prom.h
+++ b/arch/powerpc/include/asm/prom.h
@@ -42,7 +42,7 @@ extern void pci_create_OF_bus_map(void);
 
 /* Translate a DMA address from device space to CPU space */
 extern u64 of_translate_dma_address(struct device_node *dev,
-				    const u32 *in_addr);
+				    const __be32 *in_addr);
 
 #ifdef CONFIG_PCI
 extern unsigned long pci_address_to_pio(phys_addr_t address);
diff --git a/drivers/of/address.c b/drivers/of/address.c
index 3a1c7e70b192..b4559c58c095 100644
--- a/drivers/of/address.c
+++ b/drivers/of/address.c
@@ -12,13 +12,13 @@
 			(ns) > 0)
 
 static struct of_bus *of_match_bus(struct device_node *np);
-static int __of_address_to_resource(struct device_node *dev, const u32 *addrp,
-				    u64 size, unsigned int flags,
+static int __of_address_to_resource(struct device_node *dev,
+		const __be32 *addrp, u64 size, unsigned int flags,
 				    struct resource *r);
 
 /* Debug utility */
 #ifdef DEBUG
-static void of_dump_addr(const char *s, const u32 *addr, int na)
+static void of_dump_addr(const char *s, const __be32 *addr, int na)
 {
 	printk(KERN_DEBUG "%s", s);
 	while (na--)
@@ -26,7 +26,7 @@ static void of_dump_addr(const char *s, const u32 *addr, int na)
 	printk("\n");
 }
 #else
-static void of_dump_addr(const char *s, const u32 *addr, int na) { }
+static void of_dump_addr(const char *s, const __be32 *addr, int na) { }
 #endif
 
 /* Callbacks for bus specific translators */
@@ -36,10 +36,10 @@ struct of_bus {
 	int		(*match)(struct device_node *parent);
 	void		(*count_cells)(struct device_node *child,
 				       int *addrc, int *sizec);
-	u64		(*map)(u32 *addr, const u32 *range,
+	u64		(*map)(u32 *addr, const __be32 *range,
 				int na, int ns, int pna);
 	int		(*translate)(u32 *addr, u64 offset, int na);
-	unsigned int	(*get_flags)(const u32 *addr);
+	unsigned int	(*get_flags)(const __be32 *addr);
 };
 
 /*
@@ -55,7 +55,7 @@ static void of_bus_default_count_cells(struct device_node *dev,
 		*sizec = of_n_size_cells(dev);
 }
 
-static u64 of_bus_default_map(u32 *addr, const u32 *range,
+static u64 of_bus_default_map(u32 *addr, const __be32 *range,
 		int na, int ns, int pna)
 {
 	u64 cp, s, da;
@@ -85,7 +85,7 @@ static int of_bus_default_translate(u32 *addr, u64 offset, int na)
 	return 0;
 }
 
-static unsigned int of_bus_default_get_flags(const u32 *addr)
+static unsigned int of_bus_default_get_flags(const __be32 *addr)
 {
 	return IORESOURCE_MEM;
 }
@@ -110,10 +110,10 @@ static void of_bus_pci_count_cells(struct device_node *np,
 		*sizec = 2;
 }
 
-static unsigned int of_bus_pci_get_flags(const u32 *addr)
+static unsigned int of_bus_pci_get_flags(const __be32 *addr)
 {
 	unsigned int flags = 0;
-	u32 w = addr[0];
+	u32 w = be32_to_cpup(addr);
 
 	switch((w >> 24) & 0x03) {
 	case 0x01:
@@ -129,7 +129,8 @@ static unsigned int of_bus_pci_get_flags(const u32 *addr)
 	return flags;
 }
 
-static u64 of_bus_pci_map(u32 *addr, const u32 *range, int na, int ns, int pna)
+static u64 of_bus_pci_map(u32 *addr, const __be32 *range, int na, int ns,
+		int pna)
 {
 	u64 cp, s, da;
 	unsigned int af, rf;
@@ -160,7 +161,7 @@ static int of_bus_pci_translate(u32 *addr, u64 offset, int na)
 	return of_bus_default_translate(addr + 1, offset, na - 1);
 }
 
-const u32 *of_get_pci_address(struct device_node *dev, int bar_no, u64 *size,
+const __be32 *of_get_pci_address(struct device_node *dev, int bar_no, u64 *size,
 			unsigned int *flags)
 {
 	const __be32 *prop;
@@ -207,7 +208,7 @@ EXPORT_SYMBOL(of_get_pci_address);
 int of_pci_address_to_resource(struct device_node *dev, int bar,
 			       struct resource *r)
 {
-	const u32	*addrp;
+	const __be32	*addrp;
 	u64		size;
 	unsigned int	flags;
 
@@ -237,12 +238,13 @@ static void of_bus_isa_count_cells(struct device_node *child,
 		*sizec = 1;
 }
 
-static u64 of_bus_isa_map(u32 *addr, const u32 *range, int na, int ns, int pna)
+static u64 of_bus_isa_map(u32 *addr, const __be32 *range, int na, int ns,
+		int pna)
 {
 	u64 cp, s, da;
 
 	/* Check address type match */
-	if ((addr[0] ^ range[0]) & 0x00000001)
+	if ((addr[0] ^ range[0]) & cpu_to_be32(1))
 		return OF_BAD_ADDR;
 
 	/* Read address values, skipping high cell */
@@ -264,10 +266,10 @@ static int of_bus_isa_translate(u32 *addr, u64 offset, int na)
 	return of_bus_default_translate(addr + 1, offset, na - 1);
 }
 
-static unsigned int of_bus_isa_get_flags(const u32 *addr)
+static unsigned int of_bus_isa_get_flags(const __be32 *addr)
 {
 	unsigned int flags = 0;
-	u32 w = addr[0];
+	u32 w = be32_to_cpup(addr);
 
 	if (w & 1)
 		flags |= IORESOURCE_IO;
@@ -330,7 +332,7 @@ static int of_translate_one(struct device_node *parent, struct of_bus *bus,
 			    struct of_bus *pbus, u32 *addr,
 			    int na, int ns, int pna, const char *rprop)
 {
-	const u32 *ranges;
+	const __be32 *ranges;
 	unsigned int rlen;
 	int rone;
 	u64 offset = OF_BAD_ADDR;
@@ -398,7 +400,7 @@ static int of_translate_one(struct device_node *parent, struct of_bus *bus,
  * that can be mapped to a cpu physical address). This is not really specified
  * that way, but this is traditionally the way IBM at least do things
  */
-u64 __of_translate_address(struct device_node *dev, const u32 *in_addr,
+u64 __of_translate_address(struct device_node *dev, const __be32 *in_addr,
 			   const char *rprop)
 {
 	struct device_node *parent = NULL;
@@ -475,22 +477,22 @@ u64 __of_translate_address(struct device_node *dev, const u32 *in_addr,
 	return result;
 }
 
-u64 of_translate_address(struct device_node *dev, const u32 *in_addr)
+u64 of_translate_address(struct device_node *dev, const __be32 *in_addr)
 {
 	return __of_translate_address(dev, in_addr, "ranges");
 }
 EXPORT_SYMBOL(of_translate_address);
 
-u64 of_translate_dma_address(struct device_node *dev, const u32 *in_addr)
+u64 of_translate_dma_address(struct device_node *dev, const __be32 *in_addr)
 {
 	return __of_translate_address(dev, in_addr, "dma-ranges");
 }
 EXPORT_SYMBOL(of_translate_dma_address);
 
-const u32 *of_get_address(struct device_node *dev, int index, u64 *size,
+const __be32 *of_get_address(struct device_node *dev, int index, u64 *size,
 		    unsigned int *flags)
 {
-	const u32 *prop;
+	const __be32 *prop;
 	unsigned int psize;
 	struct device_node *parent;
 	struct of_bus *bus;
@@ -525,8 +527,8 @@ const u32 *of_get_address(struct device_node *dev, int index, u64 *size,
 }
 EXPORT_SYMBOL(of_get_address);
 
-static int __of_address_to_resource(struct device_node *dev, const u32 *addrp,
-				    u64 size, unsigned int flags,
+static int __of_address_to_resource(struct device_node *dev,
+		const __be32 *addrp, u64 size, unsigned int flags,
 				    struct resource *r)
 {
 	u64 taddr;
@@ -564,7 +566,7 @@ static int __of_address_to_resource(struct device_node *dev, const u32 *addrp,
 int of_address_to_resource(struct device_node *dev, int index,
 			   struct resource *r)
 {
-	const u32	*addrp;
+	const __be32	*addrp;
 	u64		size;
 	unsigned int	flags;
 
diff --git a/include/linux/of_address.h b/include/linux/of_address.h
index 8aea06f0564c..2feda6ee6140 100644
--- a/include/linux/of_address.h
+++ b/include/linux/of_address.h
@@ -3,7 +3,7 @@
 #include <linux/ioport.h>
 #include <linux/of.h>
 
-extern u64 of_translate_address(struct device_node *np, const u32 *addr);
+extern u64 of_translate_address(struct device_node *np, const __be32 *addr);
 extern int of_address_to_resource(struct device_node *dev, int index,
 				  struct resource *r);
 extern void __iomem *of_iomap(struct device_node *device, int index);
@@ -21,7 +21,7 @@ static inline unsigned long pci_address_to_pio(phys_addr_t addr) { return -1; }
 #endif
 
 #ifdef CONFIG_PCI
-extern const u32 *of_get_pci_address(struct device_node *dev, int bar_no,
+extern const __be32 *of_get_pci_address(struct device_node *dev, int bar_no,
 			       u64 *size, unsigned int *flags);
 extern int of_pci_address_to_resource(struct device_node *dev, int bar,
 				      struct resource *r);
@@ -32,7 +32,7 @@ static inline int of_pci_address_to_resource(struct device_node *dev, int bar,
 	return -ENOSYS;
 }
 
-static inline const u32 *of_get_pci_address(struct device_node *dev,
+static inline const __be32 *of_get_pci_address(struct device_node *dev,
 		int bar_no, u64 *size, unsigned int *flags)
 {
 	return NULL;
-- 
cgit v1.2.3-71-gd317


From 7063c0d942a1af2993531fbe52b4c74c1db818c4 Mon Sep 17 00:00:00 2001
From: Feng Tang <feng.tang@intel.com>
Date: Fri, 24 Dec 2010 13:59:11 +0800
Subject: spi/dw_spi: add DMA support

dw_spi driver in upstream only supports PIO mode, and this patch
will support it to cowork with the Designware dma controller used
on Intel Moorestown platform, at the same time it provides a general
framework to support dw_spi core to cowork with dma controllers on
other platforms

It has been tested with a Option GTM501L 3G modem and Infenion 60x60
modem. To use DMA mode, DMA controller 2 of Moorestown has to be enabled

Also change the dma interface suggested by Linus Walleij.

Acked-by: Linus Walleij <linus.walleij@stericsson.com>
Signed-off-by: Feng Tang <feng.tang@intel.com>
[Typo fix and renames to match intel_mid_dma renaming]
Signed-off-by: Vinod Koul <vinod.koul@intel.com>
Signed-off-by: Alan Cox <alan@linux.intel.com>
Signed-off-by: Grant Likely <grant.likely@secretlab.ca>
---
 drivers/spi/Kconfig        |   4 +
 drivers/spi/Makefile       |   3 +-
 drivers/spi/dw_spi.c       |  33 ++++---
 drivers/spi/dw_spi_mid.c   | 223 +++++++++++++++++++++++++++++++++++++++++++++
 drivers/spi/dw_spi_pci.c   |  20 ++--
 include/linux/spi/dw_spi.h |  24 ++++-
 6 files changed, 283 insertions(+), 24 deletions(-)
 create mode 100644 drivers/spi/dw_spi_mid.c

(limited to 'include/linux')

diff --git a/drivers/spi/Kconfig b/drivers/spi/Kconfig
index 665d03d4e022..caa2e84cc0a6 100644
--- a/drivers/spi/Kconfig
+++ b/drivers/spi/Kconfig
@@ -382,6 +382,10 @@ config SPI_DW_PCI
 	tristate "PCI interface driver for DW SPI core"
 	depends on SPI_DESIGNWARE && PCI
 
+config SPI_DW_MID_DMA
+	bool "DMA support for DW SPI controller on Intel Moorestown platform"
+	depends on SPI_DW_PCI && INTEL_MID_DMAC
+
 config SPI_DW_MMIO
 	tristate "Memory-mapped io interface driver for DW SPI core"
 	depends on SPI_DESIGNWARE && HAVE_CLK
diff --git a/drivers/spi/Makefile b/drivers/spi/Makefile
index 02dad4ae412d..7ec375b6c0c3 100644
--- a/drivers/spi/Makefile
+++ b/drivers/spi/Makefile
@@ -17,7 +17,8 @@ obj-$(CONFIG_SPI_BUTTERFLY)		+= spi_butterfly.o
 obj-$(CONFIG_SPI_COLDFIRE_QSPI)		+= coldfire_qspi.o
 obj-$(CONFIG_SPI_DAVINCI)		+= davinci_spi.o
 obj-$(CONFIG_SPI_DESIGNWARE)		+= dw_spi.o
-obj-$(CONFIG_SPI_DW_PCI)		+= dw_spi_pci.o
+obj-$(CONFIG_SPI_DW_PCI)		+= dw_spi_midpci.o
+dw_spi_midpci-objs			:= dw_spi_pci.o dw_spi_mid.o
 obj-$(CONFIG_SPI_DW_MMIO)		+= dw_spi_mmio.o
 obj-$(CONFIG_SPI_EP93XX)		+= ep93xx_spi.o
 obj-$(CONFIG_SPI_GPIO)			+= spi_gpio.o
diff --git a/drivers/spi/dw_spi.c b/drivers/spi/dw_spi.c
index b50bf5ba873f..497ecb3ab83f 100644
--- a/drivers/spi/dw_spi.c
+++ b/drivers/spi/dw_spi.c
@@ -288,8 +288,10 @@ static void *next_transfer(struct dw_spi *dws)
  */
 static int map_dma_buffers(struct dw_spi *dws)
 {
-	if (!dws->cur_msg->is_dma_mapped || !dws->dma_inited
-		|| !dws->cur_chip->enable_dma)
+	if (!dws->cur_msg->is_dma_mapped
+		|| !dws->dma_inited
+		|| !dws->cur_chip->enable_dma
+		|| !dws->dma_ops)
 		return 0;
 
 	if (dws->cur_transfer->tx_dma)
@@ -341,7 +343,7 @@ static void int_error_stop(struct dw_spi *dws, const char *msg)
 	tasklet_schedule(&dws->pump_transfers);
 }
 
-static void transfer_complete(struct dw_spi *dws)
+void dw_spi_xfer_done(struct dw_spi *dws)
 {
 	/* Update total byte transfered return count actual bytes read */
 	dws->cur_msg->actual_length += dws->len;
@@ -356,6 +358,7 @@ static void transfer_complete(struct dw_spi *dws)
 	} else
 		tasklet_schedule(&dws->pump_transfers);
 }
+EXPORT_SYMBOL_GPL(dw_spi_xfer_done);
 
 static irqreturn_t interrupt_transfer(struct dw_spi *dws)
 {
@@ -387,7 +390,7 @@ static irqreturn_t interrupt_transfer(struct dw_spi *dws)
 		if (dws->tx_end > dws->tx)
 			spi_umask_intr(dws, SPI_INT_TXEI);
 		else
-			transfer_complete(dws);
+			dw_spi_xfer_done(dws);
 	}
 
 	return IRQ_HANDLED;
@@ -422,11 +425,7 @@ static void poll_transfer(struct dw_spi *dws)
 	 */
 	dws->read(dws);
 
-	transfer_complete(dws);
-}
-
-static void dma_transfer(struct dw_spi *dws, int cs_change)
-{
+	dw_spi_xfer_done(dws);
 }
 
 static void pump_transfers(unsigned long data)
@@ -608,7 +607,7 @@ static void pump_transfers(unsigned long data)
 	}
 
 	if (dws->dma_mapped)
-		dma_transfer(dws, cs_change);
+		dws->dma_ops->dma_transfer(dws, cs_change);
 
 	if (chip->poll_mode)
 		poll_transfer(dws);
@@ -904,11 +903,17 @@ int __devinit dw_spi_add_host(struct dw_spi *dws)
 	master->setup = dw_spi_setup;
 	master->transfer = dw_spi_transfer;
 
-	dws->dma_inited = 0;
-
 	/* Basic HW init */
 	spi_hw_init(dws);
 
+	if (dws->dma_ops && dws->dma_ops->dma_init) {
+		ret = dws->dma_ops->dma_init(dws);
+		if (ret) {
+			dev_warn(&master->dev, "DMA init failed\n");
+			dws->dma_inited = 0;
+		}
+	}
+
 	/* Initial and start queue */
 	ret = init_queue(dws);
 	if (ret) {
@@ -933,6 +938,8 @@ int __devinit dw_spi_add_host(struct dw_spi *dws)
 
 err_queue_alloc:
 	destroy_queue(dws);
+	if (dws->dma_ops && dws->dma_ops->dma_exit)
+		dws->dma_ops->dma_exit(dws);
 err_diable_hw:
 	spi_enable_chip(dws, 0);
 	free_irq(dws->irq, dws);
@@ -957,6 +964,8 @@ void __devexit dw_spi_remove_host(struct dw_spi *dws)
 		dev_err(&dws->master->dev, "dw_spi_remove: workqueue will not "
 			"complete, message memory not freed\n");
 
+	if (dws->dma_ops && dws->dma_ops->dma_exit)
+		dws->dma_ops->dma_exit(dws);
 	spi_enable_chip(dws, 0);
 	/* Disable clk */
 	spi_set_clk(dws, 0);
diff --git a/drivers/spi/dw_spi_mid.c b/drivers/spi/dw_spi_mid.c
new file mode 100644
index 000000000000..c91c966e0717
--- /dev/null
+++ b/drivers/spi/dw_spi_mid.c
@@ -0,0 +1,223 @@
+/*
+ * dw_spi_mid.c - special handling for DW core on Intel MID platform
+ *
+ * Copyright (c) 2009, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include <linux/dma-mapping.h>
+#include <linux/dmaengine.h>
+#include <linux/interrupt.h>
+#include <linux/slab.h>
+#include <linux/spi/spi.h>
+#include <linux/spi/dw_spi.h>
+
+#ifdef CONFIG_SPI_DW_MID_DMA
+#include <linux/intel_mid_dma.h>
+#include <linux/pci.h>
+
+struct mid_dma {
+	struct intel_mid_dma_slave	dmas_tx;
+	struct intel_mid_dma_slave	dmas_rx;
+};
+
+static bool mid_spi_dma_chan_filter(struct dma_chan *chan, void *param)
+{
+	struct dw_spi *dws = param;
+
+	return dws->dmac && (&dws->dmac->dev == chan->device->dev);
+}
+
+static int mid_spi_dma_init(struct dw_spi *dws)
+{
+	struct mid_dma *dw_dma = dws->dma_priv;
+	struct intel_mid_dma_slave *rxs, *txs;
+	dma_cap_mask_t mask;
+
+	/*
+	 * Get pci device for DMA controller, currently it could only
+	 * be the DMA controller of either Moorestown or Medfield
+	 */
+	dws->dmac = pci_get_device(PCI_VENDOR_ID_INTEL, 0x0813, NULL);
+	if (!dws->dmac)
+		dws->dmac = pci_get_device(PCI_VENDOR_ID_INTEL, 0x0827, NULL);
+
+	dma_cap_zero(mask);
+	dma_cap_set(DMA_SLAVE, mask);
+
+	/* 1. Init rx channel */
+	dws->rxchan = dma_request_channel(mask, mid_spi_dma_chan_filter, dws);
+	if (!dws->rxchan)
+		goto err_exit;
+	rxs = &dw_dma->dmas_rx;
+	rxs->hs_mode = LNW_DMA_HW_HS;
+	rxs->cfg_mode = LNW_DMA_PER_TO_MEM;
+	dws->rxchan->private = rxs;
+
+	/* 2. Init tx channel */
+	dws->txchan = dma_request_channel(mask, mid_spi_dma_chan_filter, dws);
+	if (!dws->txchan)
+		goto free_rxchan;
+	txs = &dw_dma->dmas_tx;
+	txs->hs_mode = LNW_DMA_HW_HS;
+	txs->cfg_mode = LNW_DMA_MEM_TO_PER;
+	dws->txchan->private = txs;
+
+	dws->dma_inited = 1;
+	return 0;
+
+free_rxchan:
+	dma_release_channel(dws->rxchan);
+err_exit:
+	return -1;
+
+}
+
+static void mid_spi_dma_exit(struct dw_spi *dws)
+{
+	dma_release_channel(dws->txchan);
+	dma_release_channel(dws->rxchan);
+}
+
+/*
+ * dws->dma_chan_done is cleared before the dma transfer starts,
+ * callback for rx/tx channel will each increment it by 1.
+ * Reaching 2 means the whole spi transaction is done.
+ */
+static void dw_spi_dma_done(void *arg)
+{
+	struct dw_spi *dws = arg;
+
+	if (++dws->dma_chan_done != 2)
+		return;
+	dw_spi_xfer_done(dws);
+}
+
+static int mid_spi_dma_transfer(struct dw_spi *dws, int cs_change)
+{
+	struct dma_async_tx_descriptor *txdesc = NULL, *rxdesc = NULL;
+	struct dma_chan *txchan, *rxchan;
+	struct dma_slave_config txconf, rxconf;
+	u16 dma_ctrl = 0;
+
+	/* 1. setup DMA related registers */
+	if (cs_change) {
+		spi_enable_chip(dws, 0);
+		dw_writew(dws, dmardlr, 0xf);
+		dw_writew(dws, dmatdlr, 0x10);
+		if (dws->tx_dma)
+			dma_ctrl |= 0x2;
+		if (dws->rx_dma)
+			dma_ctrl |= 0x1;
+		dw_writew(dws, dmacr, dma_ctrl);
+		spi_enable_chip(dws, 1);
+	}
+
+	dws->dma_chan_done = 0;
+	txchan = dws->txchan;
+	rxchan = dws->rxchan;
+
+	/* 2. Prepare the TX dma transfer */
+	txconf.direction = DMA_TO_DEVICE;
+	txconf.dst_addr = dws->dma_addr;
+	txconf.dst_maxburst = LNW_DMA_MSIZE_16;
+	txconf.src_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES;
+	txconf.dst_addr_width = DMA_SLAVE_BUSWIDTH_2_BYTES;
+
+	txchan->device->device_control(txchan, DMA_SLAVE_CONFIG,
+				       (unsigned long) &txconf);
+
+	memset(&dws->tx_sgl, 0, sizeof(dws->tx_sgl));
+	dws->tx_sgl.dma_address = dws->tx_dma;
+	dws->tx_sgl.length = dws->len;
+
+	txdesc = txchan->device->device_prep_slave_sg(txchan,
+				&dws->tx_sgl,
+				1,
+				DMA_TO_DEVICE,
+				DMA_PREP_INTERRUPT | DMA_COMPL_SKIP_DEST_UNMAP);
+	txdesc->callback = dw_spi_dma_done;
+	txdesc->callback_param = dws;
+
+	/* 3. Prepare the RX dma transfer */
+	rxconf.direction = DMA_FROM_DEVICE;
+	rxconf.src_addr = dws->dma_addr;
+	rxconf.src_maxburst = LNW_DMA_MSIZE_16;
+	rxconf.dst_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES;
+	rxconf.src_addr_width = DMA_SLAVE_BUSWIDTH_2_BYTES;
+
+	rxchan->device->device_control(rxchan, DMA_SLAVE_CONFIG,
+				       (unsigned long) &rxconf);
+
+	memset(&dws->rx_sgl, 0, sizeof(dws->rx_sgl));
+	dws->rx_sgl.dma_address = dws->rx_dma;
+	dws->rx_sgl.length = dws->len;
+
+	rxdesc = rxchan->device->device_prep_slave_sg(rxchan,
+				&dws->rx_sgl,
+				1,
+				DMA_FROM_DEVICE,
+				DMA_PREP_INTERRUPT | DMA_COMPL_SKIP_DEST_UNMAP);
+	rxdesc->callback = dw_spi_dma_done;
+	rxdesc->callback_param = dws;
+
+	/* rx must be started before tx due to spi instinct */
+	rxdesc->tx_submit(rxdesc);
+	txdesc->tx_submit(txdesc);
+	return 0;
+}
+
+static struct dw_spi_dma_ops mid_dma_ops = {
+	.dma_init	= mid_spi_dma_init,
+	.dma_exit	= mid_spi_dma_exit,
+	.dma_transfer	= mid_spi_dma_transfer,
+};
+#endif
+
+/* Some specific info for SPI0 controller on Moorestown */
+
+/* HW info for MRST CLk Control Unit, one 32b reg */
+#define MRST_SPI_CLK_BASE	100000000	/* 100m */
+#define MRST_CLK_SPI0_REG	0xff11d86c
+#define CLK_SPI_BDIV_OFFSET	0
+#define CLK_SPI_BDIV_MASK	0x00000007
+#define CLK_SPI_CDIV_OFFSET	9
+#define CLK_SPI_CDIV_MASK	0x00000e00
+#define CLK_SPI_DISABLE_OFFSET	8
+
+int dw_spi_mid_init(struct dw_spi *dws)
+{
+	u32 *clk_reg, clk_cdiv;
+
+	clk_reg = ioremap_nocache(MRST_CLK_SPI0_REG, 16);
+	if (!clk_reg)
+		return -ENOMEM;
+
+	/* get SPI controller operating freq info */
+	clk_cdiv  = (readl(clk_reg) & CLK_SPI_CDIV_MASK) >> CLK_SPI_CDIV_OFFSET;
+	dws->max_freq = MRST_SPI_CLK_BASE / (clk_cdiv + 1);
+	iounmap(clk_reg);
+
+	dws->num_cs = 16;
+	dws->fifo_len = 40;	/* FIFO has 40 words buffer */
+
+#ifdef CONFIG_SPI_DW_MID_DMA
+	dws->dma_priv = kzalloc(sizeof(struct mid_dma), GFP_KERNEL);
+	if (!dws->dma_priv)
+		return -ENOMEM;
+	dws->dma_ops = &mid_dma_ops;
+#endif
+	return 0;
+}
diff --git a/drivers/spi/dw_spi_pci.c b/drivers/spi/dw_spi_pci.c
index 1f52755dc878..49ec3aa1219f 100644
--- a/drivers/spi/dw_spi_pci.c
+++ b/drivers/spi/dw_spi_pci.c
@@ -1,5 +1,5 @@
 /*
- * mrst_spi_pci.c - PCI interface driver for DW SPI Core
+ * dw_spi_pci.c - PCI interface driver for DW SPI Core
  *
  * Copyright (c) 2009, Intel Corporation.
  *
@@ -26,8 +26,8 @@
 #define DRIVER_NAME "dw_spi_pci"
 
 struct dw_spi_pci {
-	struct pci_dev		*pdev;
-	struct dw_spi		dws;
+	struct pci_dev	*pdev;
+	struct dw_spi	dws;
 };
 
 static int __devinit spi_pci_probe(struct pci_dev *pdev,
@@ -72,9 +72,17 @@ static int __devinit spi_pci_probe(struct pci_dev *pdev,
 	dws->parent_dev = &pdev->dev;
 	dws->bus_num = 0;
 	dws->num_cs = 4;
-	dws->max_freq = 25000000;	/* for Moorestwon */
 	dws->irq = pdev->irq;
-	dws->fifo_len = 40;		/* FIFO has 40 words buffer */
+
+	/*
+	 * Specific handling for Intel MID paltforms, like dma setup,
+	 * clock rate, FIFO depth.
+	 */
+	if (pdev->device == 0x0800) {
+		ret = dw_spi_mid_init(dws);
+		if (ret)
+			goto err_unmap;
+	}
 
 	ret = dw_spi_add_host(dws);
 	if (ret)
@@ -140,7 +148,7 @@ static int spi_resume(struct pci_dev *pdev)
 #endif
 
 static const struct pci_device_id pci_ids[] __devinitdata = {
-	/* Intel Moorestown platform SPI controller 0 */
+	/* Intel MID platform SPI controller 0 */
 	{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x0800) },
 	{},
 };
diff --git a/include/linux/spi/dw_spi.h b/include/linux/spi/dw_spi.h
index c91302f3a257..6cd10f6ad472 100644
--- a/include/linux/spi/dw_spi.h
+++ b/include/linux/spi/dw_spi.h
@@ -1,5 +1,6 @@
 #ifndef DW_SPI_HEADER_H
 #define DW_SPI_HEADER_H
+
 #include <linux/io.h>
 
 /* Bit fields in CTRLR0 */
@@ -82,6 +83,13 @@ struct dw_spi_reg {
 				though only low 16 bits matters */
 } __packed;
 
+struct dw_spi;
+struct dw_spi_dma_ops {
+	int (*dma_init)(struct dw_spi *dws);
+	void (*dma_exit)(struct dw_spi *dws);
+	int (*dma_transfer)(struct dw_spi *dws, int cs_change);
+};
+
 struct dw_spi {
 	struct spi_master	*master;
 	struct spi_device	*cur_dev;
@@ -136,13 +144,15 @@ struct dw_spi {
 	/* Dma info */
 	int			dma_inited;
 	struct dma_chan		*txchan;
+	struct scatterlist	tx_sgl;
 	struct dma_chan		*rxchan;
-	int			txdma_done;
-	int			rxdma_done;
-	u64			tx_param;
-	u64			rx_param;
+	struct scatterlist	rx_sgl;
+	int			dma_chan_done;
 	struct device		*dma_dev;
-	dma_addr_t		dma_addr;
+	dma_addr_t		dma_addr; /* phy address of the Data register */
+	struct dw_spi_dma_ops	*dma_ops;
+	void			*dma_priv; /* platform relate info */
+	struct pci_dev		*dmac;
 
 	/* Bus interface info */
 	void			*priv;
@@ -216,4 +226,8 @@ extern int dw_spi_add_host(struct dw_spi *dws);
 extern void dw_spi_remove_host(struct dw_spi *dws);
 extern int dw_spi_suspend_host(struct dw_spi *dws);
 extern int dw_spi_resume_host(struct dw_spi *dws);
+extern void dw_spi_xfer_done(struct dw_spi *dws);
+
+/* platform related setup */
+extern int dw_spi_mid_init(struct dw_spi *dws); /* Intel MID platforms */
 #endif /* DW_SPI_HEADER_H */
-- 
cgit v1.2.3-71-gd317


From c7b61de5b7b17f0df34dc7d2f8b9576f8bd36fce Mon Sep 17 00:00:00 2001
From: Alan Stern <stern@rowland.harvard.edu>
Date: Wed, 1 Dec 2010 00:14:42 +0100
Subject: PM / Runtime: Add synchronous runtime interface for interrupt
 handlers (v3)

This patch (as1431c) makes the synchronous runtime-PM interface
suitable for use in interrupt handlers.  Subsystems can call the new
pm_runtime_irq_safe() function to tell the PM core that a device's
runtime_suspend and runtime_resume callbacks should be invoked with
interrupts disabled and the spinlock held.  This permits the
pm_runtime_get_sync() and the new pm_runtime_put_sync_suspend()
routines to be called from within interrupt handlers.

When a device is declared irq-safe in this way, the PM core increments
the parent's usage count, so the parent will never be runtime
suspended.  This prevents difficult situations in which an irq-safe
device can't resume because it is forced to wait for its non-irq-safe
parent.

Signed-off-by: Alan Stern <stern@rowland.harvard.edu>
Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
---
 Documentation/power/runtime_pm.txt | 31 +++++++++++++++++++++++++
 drivers/base/power/runtime.c       | 47 ++++++++++++++++++++++++++++++--------
 include/linux/pm.h                 |  1 +
 include/linux/pm_runtime.h         |  7 ++++++
 4 files changed, 77 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/power/runtime_pm.txt b/Documentation/power/runtime_pm.txt
index 41cc7b30d7dd..ffe55ffa540a 100644
--- a/Documentation/power/runtime_pm.txt
+++ b/Documentation/power/runtime_pm.txt
@@ -50,6 +50,15 @@ type's callbacks are not defined) of given device.  The bus type, device type
 and device class callbacks are referred to as subsystem-level callbacks in what
 follows.
 
+By default, the callbacks are always invoked in process context with interrupts
+enabled.  However, subsystems can use the pm_runtime_irq_safe() helper function
+to tell the PM core that a device's ->runtime_suspend() and ->runtime_resume()
+callbacks should be invoked in atomic context with interrupts disabled
+(->runtime_idle() is still invoked the default way).  This implies that these
+callback routines must not block or sleep, but it also means that the
+synchronous helper functions listed at the end of Section 4 can be used within
+an interrupt handler or in an atomic context.
+
 The subsystem-level suspend callback is _entirely_ _responsible_ for handling
 the suspend of the device as appropriate, which may, but need not include
 executing the device driver's own ->runtime_suspend() callback (from the
@@ -237,6 +246,10 @@ defined in include/linux/pm.h:
       Section 8); it may be modified only by the pm_runtime_no_callbacks()
       helper function
 
+  unsigned int irq_safe;
+    - indicates that the ->runtime_suspend() and ->runtime_resume() callbacks
+      will be invoked with the spinlock held and interrupts disabled
+
   unsigned int use_autosuspend;
     - indicates that the device's driver supports delayed autosuspend (see
       Section 9); it may be modified only by the
@@ -344,6 +357,10 @@ drivers/base/power/runtime.c and include/linux/pm_runtime.h:
     - decrement the device's usage counter; if the result is 0 then run
       pm_runtime_idle(dev) and return its result
 
+  int pm_runtime_put_sync_suspend(struct device *dev);
+    - decrement the device's usage counter; if the result is 0 then run
+      pm_runtime_suspend(dev) and return its result
+
   int pm_runtime_put_sync_autosuspend(struct device *dev);
     - decrement the device's usage counter; if the result is 0 then run
       pm_runtime_autosuspend(dev) and return its result
@@ -397,6 +414,11 @@ drivers/base/power/runtime.c and include/linux/pm_runtime.h:
       PM attributes from /sys/devices/.../power (or prevent them from being
       added when the device is registered)
 
+  void pm_runtime_irq_safe(struct device *dev);
+    - set the power.irq_safe flag for the device, causing the runtime-PM
+      suspend and resume callbacks (but not the idle callback) to be invoked
+      with interrupts disabled
+
   void pm_runtime_mark_last_busy(struct device *dev);
     - set the power.last_busy field to the current time
 
@@ -438,6 +460,15 @@ pm_runtime_suspended()
 pm_runtime_mark_last_busy()
 pm_runtime_autosuspend_expiration()
 
+If pm_runtime_irq_safe() has been called for a device then the following helper
+functions may also be used in interrupt context:
+
+pm_runtime_suspend()
+pm_runtime_autosuspend()
+pm_runtime_resume()
+pm_runtime_get_sync()
+pm_runtime_put_sync_suspend()
+
 5. Run-time PM Initialization, Device Probing and Removal
 
 Initially, the run-time PM is disabled for all devices, which means that the
diff --git a/drivers/base/power/runtime.c b/drivers/base/power/runtime.c
index 02c652be83e7..656493a5e073 100644
--- a/drivers/base/power/runtime.c
+++ b/drivers/base/power/runtime.c
@@ -250,13 +250,16 @@ static int rpm_callback(int (*cb)(struct device *), struct device *dev)
 	if (!cb)
 		return -ENOSYS;
 
-	spin_unlock_irq(&dev->power.lock);
+	if (dev->power.irq_safe) {
+		retval = cb(dev);
+	} else {
+		spin_unlock_irq(&dev->power.lock);
 
-	retval = cb(dev);
+		retval = cb(dev);
 
-	spin_lock_irq(&dev->power.lock);
+		spin_lock_irq(&dev->power.lock);
+	}
 	dev->power.runtime_error = retval;
-
 	return retval;
 }
 
@@ -404,7 +407,7 @@ static int rpm_suspend(struct device *dev, int rpmflags)
 		goto out;
 	}
 
-	if (parent && !parent->power.ignore_children) {
+	if (parent && !parent->power.ignore_children && !dev->power.irq_safe) {
 		spin_unlock_irq(&dev->power.lock);
 
 		pm_request_idle(parent);
@@ -527,10 +530,13 @@ static int rpm_resume(struct device *dev, int rpmflags)
 
 	if (!parent && dev->parent) {
 		/*
-		 * Increment the parent's resume counter and resume it if
-		 * necessary.
+		 * Increment the parent's usage counter and resume it if
+		 * necessary.  Not needed if dev is irq-safe; then the
+		 * parent is permanently resumed.
 		 */
 		parent = dev->parent;
+		if (dev->power.irq_safe)
+			goto skip_parent;
 		spin_unlock(&dev->power.lock);
 
 		pm_runtime_get_noresume(parent);
@@ -553,6 +559,7 @@ static int rpm_resume(struct device *dev, int rpmflags)
 			goto out;
 		goto repeat;
 	}
+ skip_parent:
 
 	if (dev->power.no_callbacks)
 		goto no_callback;	/* Assume success. */
@@ -584,7 +591,7 @@ static int rpm_resume(struct device *dev, int rpmflags)
 		rpm_idle(dev, RPM_ASYNC);
 
  out:
-	if (parent) {
+	if (parent && !dev->power.irq_safe) {
 		spin_unlock_irq(&dev->power.lock);
 
 		pm_runtime_put(parent);
@@ -1065,7 +1072,6 @@ EXPORT_SYMBOL_GPL(pm_runtime_allow);
  * Set the power.no_callbacks flag, which tells the PM core that this
  * device is power-managed through its parent and has no run-time PM
  * callbacks of its own.  The run-time sysfs attributes will be removed.
- *
  */
 void pm_runtime_no_callbacks(struct device *dev)
 {
@@ -1077,6 +1083,27 @@ void pm_runtime_no_callbacks(struct device *dev)
 }
 EXPORT_SYMBOL_GPL(pm_runtime_no_callbacks);
 
+/**
+ * pm_runtime_irq_safe - Leave interrupts disabled during callbacks.
+ * @dev: Device to handle
+ *
+ * Set the power.irq_safe flag, which tells the PM core that the
+ * ->runtime_suspend() and ->runtime_resume() callbacks for this device should
+ * always be invoked with the spinlock held and interrupts disabled.  It also
+ * causes the parent's usage counter to be permanently incremented, preventing
+ * the parent from runtime suspending -- otherwise an irq-safe child might have
+ * to wait for a non-irq-safe parent.
+ */
+void pm_runtime_irq_safe(struct device *dev)
+{
+	if (dev->parent)
+		pm_runtime_get_sync(dev->parent);
+	spin_lock_irq(&dev->power.lock);
+	dev->power.irq_safe = 1;
+	spin_unlock_irq(&dev->power.lock);
+}
+EXPORT_SYMBOL_GPL(pm_runtime_irq_safe);
+
 /**
  * update_autosuspend - Handle a change to a device's autosuspend settings.
  * @dev: Device to handle.
@@ -1199,4 +1226,6 @@ void pm_runtime_remove(struct device *dev)
 	/* Change the status back to 'suspended' to match the initial status. */
 	if (dev->power.runtime_status == RPM_ACTIVE)
 		pm_runtime_set_suspended(dev);
+	if (dev->power.irq_safe && dev->parent)
+		pm_runtime_put_sync(dev->parent);
 }
diff --git a/include/linux/pm.h b/include/linux/pm.h
index 40f3f45702ba..61f2066e6852 100644
--- a/include/linux/pm.h
+++ b/include/linux/pm.h
@@ -486,6 +486,7 @@ struct dev_pm_info {
 	unsigned int		run_wake:1;
 	unsigned int		runtime_auto:1;
 	unsigned int		no_callbacks:1;
+	unsigned int		irq_safe:1;
 	unsigned int		use_autosuspend:1;
 	unsigned int		timer_autosuspends:1;
 	enum rpm_request	request;
diff --git a/include/linux/pm_runtime.h b/include/linux/pm_runtime.h
index d19f1cca7f74..e9cc049ccb62 100644
--- a/include/linux/pm_runtime.h
+++ b/include/linux/pm_runtime.h
@@ -40,6 +40,7 @@ extern int pm_generic_runtime_idle(struct device *dev);
 extern int pm_generic_runtime_suspend(struct device *dev);
 extern int pm_generic_runtime_resume(struct device *dev);
 extern void pm_runtime_no_callbacks(struct device *dev);
+extern void pm_runtime_irq_safe(struct device *dev);
 extern void __pm_runtime_use_autosuspend(struct device *dev, bool use);
 extern void pm_runtime_set_autosuspend_delay(struct device *dev, int delay);
 extern unsigned long pm_runtime_autosuspend_expiration(struct device *dev);
@@ -124,6 +125,7 @@ static inline int pm_generic_runtime_idle(struct device *dev) { return 0; }
 static inline int pm_generic_runtime_suspend(struct device *dev) { return 0; }
 static inline int pm_generic_runtime_resume(struct device *dev) { return 0; }
 static inline void pm_runtime_no_callbacks(struct device *dev) {}
+static inline void pm_runtime_irq_safe(struct device *dev) {}
 
 static inline void pm_runtime_mark_last_busy(struct device *dev) {}
 static inline void __pm_runtime_use_autosuspend(struct device *dev,
@@ -196,6 +198,11 @@ static inline int pm_runtime_put_sync(struct device *dev)
 	return __pm_runtime_idle(dev, RPM_GET_PUT);
 }
 
+static inline int pm_runtime_put_sync_suspend(struct device *dev)
+{
+	return __pm_runtime_suspend(dev, RPM_GET_PUT);
+}
+
 static inline int pm_runtime_put_sync_autosuspend(struct device *dev)
 {
 	return __pm_runtime_suspend(dev, RPM_GET_PUT | RPM_AUTO);
-- 
cgit v1.2.3-71-gd317


From a2867e08c8e3bdbc00caf56bc3bdde19ccc058e3 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rjw@sisk.pl>
Date: Fri, 3 Dec 2010 22:58:31 +0100
Subject: PM / Wakeup: Replace pm_check_wakeup_events() with
 pm_wakeup_pending()

To avoid confusion with the meaning and return value of
pm_check_wakeup_events() replace it with pm_wakeup_pending() that
will work the other way around (ie. return true when system-wide
power transition should be aborted).

Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
---
 drivers/base/power/main.c   |  2 +-
 drivers/base/power/wakeup.c | 20 ++++++++++----------
 include/linux/suspend.h     |  4 ++--
 kernel/power/hibernate.c    |  4 ++--
 kernel/power/process.c      |  2 +-
 kernel/power/suspend.c      |  2 +-
 6 files changed, 17 insertions(+), 17 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c
index 4747a1e8b44a..8a5258339ca2 100644
--- a/drivers/base/power/main.c
+++ b/drivers/base/power/main.c
@@ -1056,7 +1056,7 @@ static int dpm_prepare(pm_message_t state)
 		if (pm_runtime_barrier(dev) && device_may_wakeup(dev))
 			pm_wakeup_event(dev, 0);
 
-		if (!pm_check_wakeup_events()) {
+		if (pm_wakeup_pending()) {
 			pm_runtime_put_sync(dev);
 			error = -EBUSY;
 		} else {
diff --git a/drivers/base/power/wakeup.c b/drivers/base/power/wakeup.c
index 71c5528e1c35..8ec406d8f548 100644
--- a/drivers/base/power/wakeup.c
+++ b/drivers/base/power/wakeup.c
@@ -542,26 +542,26 @@ static void pm_wakeup_update_hit_counts(void)
 }
 
 /**
- * pm_check_wakeup_events - Check for new wakeup events.
+ * pm_wakeup_pending - Check if power transition in progress should be aborted.
  *
  * Compare the current number of registered wakeup events with its preserved
- * value from the past to check if new wakeup events have been registered since
- * the old value was stored.  Check if the current number of wakeup events being
- * processed is zero.
+ * value from the past and return true if new wakeup events have been registered
+ * since the old value was stored.  Also return true if the current number of
+ * wakeup events being processed is different from zero.
  */
-bool pm_check_wakeup_events(void)
+bool pm_wakeup_pending(void)
 {
 	unsigned long flags;
-	bool ret = true;
+	bool ret = false;
 
 	spin_lock_irqsave(&events_lock, flags);
 	if (events_check_enabled) {
-		ret = ((unsigned int)atomic_read(&event_count) == saved_count)
-			&& !atomic_read(&events_in_progress);
-		events_check_enabled = ret;
+		ret = ((unsigned int)atomic_read(&event_count) != saved_count)
+			|| atomic_read(&events_in_progress);
+		events_check_enabled = !ret;
 	}
 	spin_unlock_irqrestore(&events_lock, flags);
-	if (!ret)
+	if (ret)
 		pm_wakeup_update_hit_counts();
 	return ret;
 }
diff --git a/include/linux/suspend.h b/include/linux/suspend.h
index 26697514c5ec..144b34be5c32 100644
--- a/include/linux/suspend.h
+++ b/include/linux/suspend.h
@@ -292,7 +292,7 @@ extern int unregister_pm_notifier(struct notifier_block *nb);
 /* drivers/base/power/wakeup.c */
 extern bool events_check_enabled;
 
-extern bool pm_check_wakeup_events(void);
+extern bool pm_wakeup_pending(void);
 extern bool pm_get_wakeup_count(unsigned int *count);
 extern bool pm_save_wakeup_count(unsigned int count);
 #else /* !CONFIG_PM_SLEEP */
@@ -309,7 +309,7 @@ static inline int unregister_pm_notifier(struct notifier_block *nb)
 
 #define pm_notifier(fn, pri)	do { (void)(fn); } while (0)
 
-static inline bool pm_check_wakeup_events(void) { return true; }
+static inline bool pm_wakeup_pending(void) { return false; }
 #endif /* !CONFIG_PM_SLEEP */
 
 extern struct mutex pm_mutex;
diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c
index c9a98beffee4..870f72bc72ae 100644
--- a/kernel/power/hibernate.c
+++ b/kernel/power/hibernate.c
@@ -278,7 +278,7 @@ static int create_image(int platform_mode)
 		goto Enable_irqs;
 	}
 
-	if (hibernation_test(TEST_CORE) || !pm_check_wakeup_events())
+	if (hibernation_test(TEST_CORE) || pm_wakeup_pending())
 		goto Power_up;
 
 	in_suspend = 1;
@@ -516,7 +516,7 @@ int hibernation_platform_enter(void)
 
 	local_irq_disable();
 	sysdev_suspend(PMSG_HIBERNATE);
-	if (!pm_check_wakeup_events()) {
+	if (pm_wakeup_pending()) {
 		error = -EAGAIN;
 		goto Power_up;
 	}
diff --git a/kernel/power/process.c b/kernel/power/process.c
index eb2c88a9e562..d6d2a10320e0 100644
--- a/kernel/power/process.c
+++ b/kernel/power/process.c
@@ -85,7 +85,7 @@ static int try_to_freeze_tasks(bool sig_only)
 		if (!todo || time_after(jiffies, end_time))
 			break;
 
-		if (!pm_check_wakeup_events()) {
+		if (pm_wakeup_pending()) {
 			wakeup = true;
 			break;
 		}
diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c
index ecf770509d0d..5e644e3a6bf3 100644
--- a/kernel/power/suspend.c
+++ b/kernel/power/suspend.c
@@ -163,7 +163,7 @@ static int suspend_enter(suspend_state_t state)
 
 	error = sysdev_suspend(PMSG_SUSPEND);
 	if (!error) {
-		if (!suspend_test(TEST_CORE) && pm_check_wakeup_events()) {
+		if (!(suspend_test(TEST_CORE) || pm_wakeup_pending())) {
 			error = suspend_ops->enter(state);
 			events_check_enabled = false;
 		}
-- 
cgit v1.2.3-71-gd317


From b8c76f6aed0ab7df73a6410f3f82de2c831bb144 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rjw@sisk.pl>
Date: Thu, 16 Dec 2010 00:51:21 +0100
Subject: PM: Replace the device power.status field with a bit field

The device power.status field is too complicated for its purpose
(storing the information about whether or not the device is in the
"active" state from the PM core's point of view), so replace it with
a bit field and modify all of its users accordingly.

Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
---
 drivers/base/power/main.c | 17 +++++------------
 drivers/usb/core/driver.c |  7 +++----
 include/linux/device.h    |  4 ++--
 include/linux/pm.h        | 43 ++-----------------------------------------
 4 files changed, 12 insertions(+), 59 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c
index bb5c8cb64174..a90480baa850 100644
--- a/drivers/base/power/main.c
+++ b/drivers/base/power/main.c
@@ -63,7 +63,7 @@ static int async_error;
  */
 void device_pm_init(struct device *dev)
 {
-	dev->power.status = DPM_ON;
+	dev->power.in_suspend = false;
 	init_completion(&dev->power.completion);
 	complete_all(&dev->power.completion);
 	dev->power.wakeup = NULL;
@@ -98,7 +98,7 @@ void device_pm_add(struct device *dev)
 		 kobject_name(&dev->kobj));
 	mutex_lock(&dpm_list_mtx);
 	if (dev->parent) {
-		if (dev->parent->power.status >= DPM_SUSPENDING)
+		if (dev->parent->power.in_suspend)
 			dev_warn(dev, "parent %s should not be sleeping\n",
 				 dev_name(dev->parent));
 	} else if (transition_started) {
@@ -488,7 +488,6 @@ void dpm_resume_noirq(pm_message_t state)
 		int error;
 
 		get_device(dev);
-		dev->power.status = DPM_OFF;
 		list_move_tail(&dev->power.entry, &dpm_suspended_list);
 		mutex_unlock(&dpm_list_mtx);
 
@@ -541,7 +540,7 @@ static int device_resume(struct device *dev, pm_message_t state, bool async)
 	dpm_wait(dev->parent, async);
 	device_lock(dev);
 
-	dev->power.status = DPM_RESUMING;
+	dev->power.in_suspend = false;
 
 	if (dev->bus) {
 		if (dev->bus->pm) {
@@ -690,7 +689,7 @@ static void dpm_complete(pm_message_t state)
 		struct device *dev = to_device(dpm_prepared_list.prev);
 
 		get_device(dev);
-		dev->power.status = DPM_ON;
+		dev->power.in_suspend = false;
 		list_move(&dev->power.entry, &list);
 		mutex_unlock(&dpm_list_mtx);
 
@@ -806,7 +805,6 @@ int dpm_suspend_noirq(pm_message_t state)
 			put_device(dev);
 			break;
 		}
-		dev->power.status = DPM_OFF_IRQ;
 		if (!list_empty(&dev->power.entry))
 			list_move(&dev->power.entry, &dpm_noirq_list);
 		put_device(dev);
@@ -894,9 +892,6 @@ static int __device_suspend(struct device *dev, pm_message_t state, bool async)
 		}
 	}
 
-	if (!error)
-		dev->power.status = DPM_OFF;
-
  End:
 	device_unlock(dev);
 	complete_all(&dev->power.completion);
@@ -1030,7 +1025,6 @@ static int dpm_prepare(pm_message_t state)
 		struct device *dev = to_device(dpm_list.next);
 
 		get_device(dev);
-		dev->power.status = DPM_PREPARING;
 		mutex_unlock(&dpm_list_mtx);
 
 		pm_runtime_get_noresume(dev);
@@ -1046,7 +1040,6 @@ static int dpm_prepare(pm_message_t state)
 
 		mutex_lock(&dpm_list_mtx);
 		if (error) {
-			dev->power.status = DPM_ON;
 			if (error == -EAGAIN) {
 				put_device(dev);
 				error = 0;
@@ -1058,7 +1051,7 @@ static int dpm_prepare(pm_message_t state)
 			put_device(dev);
 			break;
 		}
-		dev->power.status = DPM_SUSPENDING;
+		dev->power.in_suspend = true;
 		if (!list_empty(&dev->power.entry))
 			list_move_tail(&dev->power.entry, &dpm_prepared_list);
 		put_device(dev);
diff --git a/drivers/usb/core/driver.c b/drivers/usb/core/driver.c
index c0e60fbcb048..4ec50224ee86 100644
--- a/drivers/usb/core/driver.c
+++ b/drivers/usb/core/driver.c
@@ -376,7 +376,7 @@ static int usb_unbind_interface(struct device *dev)
 		 * Just re-enable it without affecting the endpoint toggles.
 		 */
 		usb_enable_interface(udev, intf, false);
-	} else if (!error && intf->dev.power.status == DPM_ON) {
+	} else if (!error && !intf->dev.power.in_suspend) {
 		r = usb_set_interface(udev, intf->altsetting[0].
 				desc.bInterfaceNumber, 0);
 		if (r < 0)
@@ -961,7 +961,7 @@ void usb_rebind_intf(struct usb_interface *intf)
 	}
 
 	/* Try to rebind the interface */
-	if (intf->dev.power.status == DPM_ON) {
+	if (!intf->dev.power.in_suspend) {
 		intf->needs_binding = 0;
 		rc = device_attach(&intf->dev);
 		if (rc < 0)
@@ -1108,8 +1108,7 @@ static int usb_resume_interface(struct usb_device *udev,
 	if (intf->condition == USB_INTERFACE_UNBOUND) {
 
 		/* Carry out a deferred switch to altsetting 0 */
-		if (intf->needs_altsetting0 &&
-				intf->dev.power.status == DPM_ON) {
+		if (intf->needs_altsetting0 && !intf->dev.power.in_suspend) {
 			usb_set_interface(udev, intf->altsetting[0].
 					desc.bInterfaceNumber, 0);
 			intf->needs_altsetting0 = 0;
diff --git a/include/linux/device.h b/include/linux/device.h
index dd4895313468..45bc8c1669d2 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -508,13 +508,13 @@ static inline int device_is_registered(struct device *dev)
 
 static inline void device_enable_async_suspend(struct device *dev)
 {
-	if (dev->power.status == DPM_ON)
+	if (!dev->power.in_suspend)
 		dev->power.async_suspend = true;
 }
 
 static inline void device_disable_async_suspend(struct device *dev)
 {
-	if (dev->power.status == DPM_ON)
+	if (!dev->power.in_suspend)
 		dev->power.async_suspend = false;
 }
 
diff --git a/include/linux/pm.h b/include/linux/pm.h
index 61f2066e6852..c1756dfeb8c5 100644
--- a/include/linux/pm.h
+++ b/include/linux/pm.h
@@ -366,45 +366,6 @@ extern struct dev_pm_ops generic_subsys_pm_ops;
 #define PMSG_AUTO_RESUME	((struct pm_message) \
 					{ .event = PM_EVENT_AUTO_RESUME, })
 
-/**
- * Device power management states
- *
- * These state labels are used internally by the PM core to indicate the current
- * status of a device with respect to the PM core operations.
- *
- * DPM_ON		Device is regarded as operational.  Set this way
- *			initially and when ->complete() is about to be called.
- *			Also set when ->prepare() fails.
- *
- * DPM_PREPARING	Device is going to be prepared for a PM transition.  Set
- *			when ->prepare() is about to be called.
- *
- * DPM_RESUMING		Device is going to be resumed.  Set when ->resume(),
- *			->thaw(), or ->restore() is about to be called.
- *
- * DPM_SUSPENDING	Device has been prepared for a power transition.  Set
- *			when ->prepare() has just succeeded.
- *
- * DPM_OFF		Device is regarded as inactive.  Set immediately after
- *			->suspend(), ->freeze(), or ->poweroff() has succeeded.
- *			Also set when ->resume()_noirq, ->thaw_noirq(), or
- *			->restore_noirq() is about to be called.
- *
- * DPM_OFF_IRQ		Device is in a "deep sleep".  Set immediately after
- *			->suspend_noirq(), ->freeze_noirq(), or
- *			->poweroff_noirq() has just succeeded.
- */
-
-enum dpm_state {
-	DPM_INVALID,
-	DPM_ON,
-	DPM_PREPARING,
-	DPM_RESUMING,
-	DPM_SUSPENDING,
-	DPM_OFF,
-	DPM_OFF_IRQ,
-};
-
 /**
  * Device run-time power management status.
  *
@@ -463,8 +424,8 @@ struct wakeup_source;
 struct dev_pm_info {
 	pm_message_t		power_state;
 	unsigned int		can_wakeup:1;
-	unsigned		async_suspend:1;
-	enum dpm_state		status;		/* Owned by the PM core */
+	unsigned int		async_suspend:1;
+	unsigned int		in_suspend:1;	/* Owned by the PM core */
 	spinlock_t		lock;
 #ifdef CONFIG_PM_SLEEP
 	struct list_head	entry;
-- 
cgit v1.2.3-71-gd317


From 4b31db8a16fa0d4d6a0fa42d044e7a4f4dad3641 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rjw@sisk.pl>
Date: Fri, 24 Dec 2010 15:04:06 +0100
Subject: PM / Runtime: Generic resume shouldn't set RPM_ACTIVE unconditionally

The __pm_generic_resume() function changes the given device's runtime
PM status to RPM_ACTIVE if its driver's callback returns 0, but it
only should do that if the rumtime PM is enabled for the device.

Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
---
 drivers/base/power/generic_ops.c | 2 +-
 include/linux/pm_runtime.h       | 6 ++++++
 2 files changed, 7 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/base/power/generic_ops.c b/drivers/base/power/generic_ops.c
index 3d2c3500069a..42f97f925629 100644
--- a/drivers/base/power/generic_ops.c
+++ b/drivers/base/power/generic_ops.c
@@ -185,7 +185,7 @@ static int __pm_generic_resume(struct device *dev, int event)
 		return 0;
 
 	ret = callback(dev);
-	if (!ret) {
+	if (!ret && pm_runtime_enabled(dev)) {
 		pm_runtime_disable(dev);
 		pm_runtime_set_active(dev);
 		pm_runtime_enable(dev);
diff --git a/include/linux/pm_runtime.h b/include/linux/pm_runtime.h
index e9cc049ccb62..d34f067e2a7f 100644
--- a/include/linux/pm_runtime.h
+++ b/include/linux/pm_runtime.h
@@ -82,6 +82,11 @@ static inline bool pm_runtime_suspended(struct device *dev)
 		&& !dev->power.disable_depth;
 }
 
+static inline bool pm_runtime_enabled(struct device *dev)
+{
+	return !dev->power.disable_depth;
+}
+
 static inline void pm_runtime_mark_last_busy(struct device *dev)
 {
 	ACCESS_ONCE(dev->power.last_busy) = jiffies;
@@ -120,6 +125,7 @@ static inline void pm_runtime_put_noidle(struct device *dev) {}
 static inline bool device_run_wake(struct device *dev) { return false; }
 static inline void device_set_run_wake(struct device *dev, bool enable) {}
 static inline bool pm_runtime_suspended(struct device *dev) { return false; }
+static inline bool pm_runtime_enabled(struct device *dev) { return false; }
 
 static inline int pm_generic_runtime_idle(struct device *dev) { return 0; }
 static inline int pm_generic_runtime_suspend(struct device *dev) { return 0; }
-- 
cgit v1.2.3-71-gd317


From 62bcb91573425975d6ad2389d7ab1d8feca88ab4 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Fri, 24 Dec 2010 15:04:41 +0100
Subject: PM: Prototype the pm_generic_ operations

The pm_generic_ operations are all exported but are not prototyped in any
header file for direct use. Do so.

[rjw: Added extern.]

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
---
 include/linux/pm.h | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/pm.h b/include/linux/pm.h
index c1756dfeb8c5..dd9c7ab38270 100644
--- a/include/linux/pm.h
+++ b/include/linux/pm.h
@@ -572,4 +572,11 @@ extern unsigned int	pm_flags;
 #define PM_APM	1
 #define PM_ACPI	2
 
+extern int pm_generic_suspend(struct device *dev);
+extern int pm_generic_resume(struct device *dev);
+extern int pm_generic_freeze(struct device *dev);
+extern int pm_generic_thaw(struct device *dev);
+extern int pm_generic_restore(struct device *dev);
+extern int pm_generic_poweroff(struct device *dev);
+
 #endif /* _LINUX_PM_H */
-- 
cgit v1.2.3-71-gd317


From eac8ae087ac66b21de94fee3e920210b43d43076 Mon Sep 17 00:00:00 2001
From: Mariusz Białończyk <manio@skyboo.net>
Date: Mon, 15 Nov 2010 15:50:13 -0300
Subject: [media] Fix rc-tbs-nec table after converting the cx88 driver to
 ir-core
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The patch fixes the rc-tbs-nec table after converting
drivers/media/video/cx88 to ir-core
(commit ba7e90c9f878e0ac3c0614a5446fe5c62ccc33ec).

It is also adds two missing buttons (10- and 10+) with
its definition (KEY_10CHANNELSUP and KEY_10CHANNELSDOWN).

[mchehab@redhat.com: move keycode numbers to 0x1b8/0x1b9 as requested by the input Maintainer]
Signed-off-by: Mariusz Białończyk <manio@skyboo.net>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 drivers/media/rc/keymaps/rc-tbs-nec.c | 66 ++++++++++++++++++-----------------
 include/linux/input.h                 |  2 ++
 2 files changed, 36 insertions(+), 32 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/media/rc/keymaps/rc-tbs-nec.c b/drivers/media/rc/keymaps/rc-tbs-nec.c
index 15b9a9b80767..7242ee66f6e0 100644
--- a/drivers/media/rc/keymaps/rc-tbs-nec.c
+++ b/drivers/media/rc/keymaps/rc-tbs-nec.c
@@ -13,38 +13,40 @@
 #include <media/rc-map.h>
 
 static struct rc_map_table tbs_nec[] = {
-	{ 0x04, KEY_POWER2},	/*power*/
-	{ 0x14, KEY_MUTE},	/*mute*/
-	{ 0x07, KEY_1},
-	{ 0x06, KEY_2},
-	{ 0x05, KEY_3},
-	{ 0x0b, KEY_4},
-	{ 0x0a, KEY_5},
-	{ 0x09, KEY_6},
-	{ 0x0f, KEY_7},
-	{ 0x0e, KEY_8},
-	{ 0x0d, KEY_9},
-	{ 0x12, KEY_0},
-	{ 0x16, KEY_CHANNELUP},	/*ch+*/
-	{ 0x11, KEY_CHANNELDOWN},/*ch-*/
-	{ 0x13, KEY_VOLUMEUP},	/*vol+*/
-	{ 0x0c, KEY_VOLUMEDOWN},/*vol-*/
-	{ 0x03, KEY_RECORD},	/*rec*/
-	{ 0x18, KEY_PAUSE},	/*pause*/
-	{ 0x19, KEY_OK},	/*ok*/
-	{ 0x1a, KEY_CAMERA},	/* snapshot */
-	{ 0x01, KEY_UP},
-	{ 0x10, KEY_LEFT},
-	{ 0x02, KEY_RIGHT},
-	{ 0x08, KEY_DOWN},
-	{ 0x15, KEY_FAVORITES},
-	{ 0x17, KEY_SUBTITLE},
-	{ 0x1d, KEY_ZOOM},
-	{ 0x1f, KEY_EXIT},
-	{ 0x1e, KEY_MENU},
-	{ 0x1c, KEY_EPG},
-	{ 0x00, KEY_PREVIOUS},
-	{ 0x1b, KEY_MODE},
+	{ 0x84, KEY_POWER2},		/* power */
+	{ 0x94, KEY_MUTE},		/* mute */
+	{ 0x87, KEY_1},
+	{ 0x86, KEY_2},
+	{ 0x85, KEY_3},
+	{ 0x8b, KEY_4},
+	{ 0x8a, KEY_5},
+	{ 0x89, KEY_6},
+	{ 0x8f, KEY_7},
+	{ 0x8e, KEY_8},
+	{ 0x8d, KEY_9},
+	{ 0x92, KEY_0},
+	{ 0xc0, KEY_10CHANNELSUP},	/* 10+ */
+	{ 0xd0, KEY_10CHANNELSDOWN},	/* 10- */
+	{ 0x96, KEY_CHANNELUP},		/* ch+ */
+	{ 0x91, KEY_CHANNELDOWN},	/* ch- */
+	{ 0x93, KEY_VOLUMEUP},		/* vol+ */
+	{ 0x8c, KEY_VOLUMEDOWN},	/* vol- */
+	{ 0x83, KEY_RECORD},		/* rec */
+	{ 0x98, KEY_PAUSE},		/* pause, yellow */
+	{ 0x99, KEY_OK},		/* ok */
+	{ 0x9a, KEY_CAMERA},		/* snapshot */
+	{ 0x81, KEY_UP},
+	{ 0x90, KEY_LEFT},
+	{ 0x82, KEY_RIGHT},
+	{ 0x88, KEY_DOWN},
+	{ 0x95, KEY_FAVORITES},		/* blue */
+	{ 0x97, KEY_SUBTITLE},		/* green */
+	{ 0x9d, KEY_ZOOM},
+	{ 0x9f, KEY_EXIT},
+	{ 0x9e, KEY_MENU},
+	{ 0x9c, KEY_EPG},
+	{ 0x80, KEY_PREVIOUS},		/* red */
+	{ 0x9b, KEY_MODE},
 };
 
 static struct rc_map_list tbs_nec_map = {
diff --git a/include/linux/input.h b/include/linux/input.h
index 9777668883be..f7a6e1966df3 100644
--- a/include/linux/input.h
+++ b/include/linux/input.h
@@ -590,6 +590,8 @@ struct input_keymap_entry {
 #define KEY_FRAMEFORWARD	0x1b5
 #define KEY_CONTEXT_MENU	0x1b6	/* GenDesc - system context menu */
 #define KEY_MEDIA_REPEAT	0x1b7	/* Consumer - transport control */
+#define KEY_10CHANNELSUP        0x1b8   /* 10 channels up (10+) */
+#define KEY_10CHANNELSDOWN      0x1b9   /* 10 channels down (10-) */
 
 #define KEY_DEL_EOL		0x1c0
 #define KEY_DEL_EOS		0x1c1
-- 
cgit v1.2.3-71-gd317


From 383268a8e282fb549dabe3a33ccafc9434ab6006 Mon Sep 17 00:00:00 2001
From: Matti Aaltonen <matti.j.aaltonen@nokia.com>
Date: Fri, 10 Dec 2010 11:41:33 -0300
Subject: [media] MFD: WL1273 FM Radio: MFD driver for the FM radio

This is the core of the WL1273 FM radio driver, it connects
the two child modules. The two child drivers are
drivers/media/radio/radio-wl1273.c and sound/soc/codecs/wl1273.c.

The radio-wl1273 driver implements the V4L2 interface and communicates
with the device. The ALSA codec offers digital audio, without it only
analog audio is available.

Signed-off-by: Matti J. Aaltonen <matti.j.aaltonen@nokia.com>
Acked-by: Samuel Ortiz <sameo@linux.intel.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 drivers/mfd/Kconfig             |  10 ++
 drivers/mfd/Makefile            |   1 +
 drivers/mfd/wl1273-core.c       | 148 +++++++++++++++++++++
 include/linux/mfd/wl1273-core.h | 288 ++++++++++++++++++++++++++++++++++++++++
 4 files changed, 447 insertions(+)
 create mode 100644 drivers/mfd/wl1273-core.c
 create mode 100644 include/linux/mfd/wl1273-core.h

(limited to 'include/linux')

diff --git a/drivers/mfd/Kconfig b/drivers/mfd/Kconfig
index 3a1493b8b5e5..05ccab6fa919 100644
--- a/drivers/mfd/Kconfig
+++ b/drivers/mfd/Kconfig
@@ -606,6 +606,16 @@ config MFD_VX855
 	  VIA VX855/VX875 south bridge. You will need to enable the vx855_spi
 	  and/or vx855_gpio drivers for this to do anything useful.
 
+config MFD_WL1273_CORE
+	tristate
+	depends on I2C
+	select MFD_CORE
+	default n
+	help
+	  This is the core driver for the TI WL1273 FM radio. This MFD
+	  driver connects the radio-wl1273 V4L2 module and the wl1273
+	  audio codec.
+
 endif # MFD_SUPPORT
 
 menu "Multimedia Capabilities Port drivers"
diff --git a/drivers/mfd/Makefile b/drivers/mfd/Makefile
index f54b3659abbb..ae2f9153174a 100644
--- a/drivers/mfd/Makefile
+++ b/drivers/mfd/Makefile
@@ -81,3 +81,4 @@ obj-$(CONFIG_MFD_JANZ_CMODIO)	+= janz-cmodio.o
 obj-$(CONFIG_MFD_JZ4740_ADC)	+= jz4740-adc.o
 obj-$(CONFIG_MFD_TPS6586X)	+= tps6586x.o
 obj-$(CONFIG_MFD_VX855)		+= vx855.o
+obj-$(CONFIG_MFD_WL1273_CORE)	+= wl1273-core.o
diff --git a/drivers/mfd/wl1273-core.c b/drivers/mfd/wl1273-core.c
new file mode 100644
index 000000000000..d2ecc2435736
--- /dev/null
+++ b/drivers/mfd/wl1273-core.c
@@ -0,0 +1,148 @@
+/*
+ * MFD driver for wl1273 FM radio and audio codec submodules.
+ *
+ * Copyright (C) 2010 Nokia Corporation
+ * Author: Matti Aaltonen <matti.j.aaltonen@nokia.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
+ * 02110-1301 USA
+ *
+ */
+
+#include <linux/mfd/wl1273-core.h>
+#include <linux/slab.h>
+
+#define DRIVER_DESC "WL1273 FM Radio Core"
+
+static struct i2c_device_id wl1273_driver_id_table[] = {
+	{ WL1273_FM_DRIVER_NAME, 0 },
+	{ }
+};
+MODULE_DEVICE_TABLE(i2c, wl1273_driver_id_table);
+
+static int wl1273_core_remove(struct i2c_client *client)
+{
+	struct wl1273_core *core = i2c_get_clientdata(client);
+
+	dev_dbg(&client->dev, "%s\n", __func__);
+
+	mfd_remove_devices(&client->dev);
+	i2c_set_clientdata(client, NULL);
+	kfree(core);
+
+	return 0;
+}
+
+static int __devinit wl1273_core_probe(struct i2c_client *client,
+				       const struct i2c_device_id *id)
+{
+	struct wl1273_fm_platform_data *pdata = client->dev.platform_data;
+	struct wl1273_core *core;
+	struct mfd_cell *cell;
+	int children = 0;
+	int r = 0;
+
+	dev_dbg(&client->dev, "%s\n", __func__);
+
+	if (!pdata) {
+		dev_err(&client->dev, "No platform data.\n");
+		return -EINVAL;
+	}
+
+	if (!(pdata->children & WL1273_RADIO_CHILD)) {
+		dev_err(&client->dev, "Cannot function without radio child.\n");
+		return -EINVAL;
+	}
+
+	core = kzalloc(sizeof(*core), GFP_KERNEL);
+	if (!core)
+		return -ENOMEM;
+
+	core->pdata = pdata;
+	core->client = client;
+	mutex_init(&core->lock);
+
+	i2c_set_clientdata(client, core);
+
+	dev_dbg(&client->dev, "%s: Have V4L2.\n", __func__);
+
+	cell = &core->cells[children];
+	cell->name = "wl1273_fm_radio";
+	cell->platform_data = &core;
+	cell->data_size = sizeof(core);
+	children++;
+
+	if (pdata->children & WL1273_CODEC_CHILD) {
+		cell = &core->cells[children];
+
+		dev_dbg(&client->dev, "%s: Have codec.\n", __func__);
+		cell->name = "wl1273-codec";
+		cell->platform_data = &core;
+		cell->data_size = sizeof(core);
+		children++;
+	}
+
+	dev_dbg(&client->dev, "%s: number of children: %d.\n",
+		__func__, children);
+
+	r = mfd_add_devices(&client->dev, -1, core->cells,
+			    children, NULL, 0);
+	if (r)
+		goto err;
+
+	return 0;
+
+err:
+	i2c_set_clientdata(client, NULL);
+	pdata->free_resources();
+	kfree(core);
+
+	dev_dbg(&client->dev, "%s\n", __func__);
+
+	return r;
+}
+
+static struct i2c_driver wl1273_core_driver = {
+	.driver = {
+		.name = WL1273_FM_DRIVER_NAME,
+	},
+	.probe = wl1273_core_probe,
+	.id_table = wl1273_driver_id_table,
+	.remove = __devexit_p(wl1273_core_remove),
+};
+
+static int __init wl1273_core_init(void)
+{
+	int r;
+
+	r = i2c_add_driver(&wl1273_core_driver);
+	if (r) {
+		pr_err(WL1273_FM_DRIVER_NAME
+		       ": driver registration failed\n");
+		return r;
+	}
+
+	return r;
+}
+
+static void __exit wl1273_core_exit(void)
+{
+	i2c_del_driver(&wl1273_core_driver);
+}
+late_initcall(wl1273_core_init);
+module_exit(wl1273_core_exit);
+
+MODULE_AUTHOR("Matti Aaltonen <matti.j.aaltonen@nokia.com>");
+MODULE_DESCRIPTION(DRIVER_DESC);
+MODULE_LICENSE("GPL");
diff --git a/include/linux/mfd/wl1273-core.h b/include/linux/mfd/wl1273-core.h
new file mode 100644
index 000000000000..9787293eae5f
--- /dev/null
+++ b/include/linux/mfd/wl1273-core.h
@@ -0,0 +1,288 @@
+/*
+ * include/linux/mfd/wl1273-core.h
+ *
+ * Some definitions for the wl1273 radio receiver/transmitter chip.
+ *
+ * Copyright (C) 2010 Nokia Corporation
+ * Author: Matti J. Aaltonen <matti.j.aaltonen@nokia.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
+ * 02110-1301 USA
+ */
+
+#ifndef WL1273_CORE_H
+#define WL1273_CORE_H
+
+#include <linux/i2c.h>
+#include <linux/mfd/core.h>
+
+#define WL1273_FM_DRIVER_NAME	"wl1273-fm"
+#define RX71_FM_I2C_ADDR	0x22
+
+#define WL1273_STEREO_GET		0
+#define WL1273_RSSI_LVL_GET		1
+#define WL1273_IF_COUNT_GET		2
+#define WL1273_FLAG_GET			3
+#define WL1273_RDS_SYNC_GET		4
+#define WL1273_RDS_DATA_GET		5
+#define WL1273_FREQ_SET			10
+#define WL1273_AF_FREQ_SET		11
+#define WL1273_MOST_MODE_SET		12
+#define WL1273_MOST_BLEND_SET		13
+#define WL1273_DEMPH_MODE_SET		14
+#define WL1273_SEARCH_LVL_SET		15
+#define WL1273_BAND_SET			16
+#define WL1273_MUTE_STATUS_SET		17
+#define WL1273_RDS_PAUSE_LVL_SET	18
+#define WL1273_RDS_PAUSE_DUR_SET	19
+#define WL1273_RDS_MEM_SET		20
+#define WL1273_RDS_BLK_B_SET		21
+#define WL1273_RDS_MSK_B_SET		22
+#define WL1273_RDS_PI_MASK_SET		23
+#define WL1273_RDS_PI_SET		24
+#define WL1273_RDS_SYSTEM_SET		25
+#define WL1273_INT_MASK_SET		26
+#define WL1273_SEARCH_DIR_SET		27
+#define WL1273_VOLUME_SET		28
+#define WL1273_AUDIO_ENABLE		29
+#define WL1273_PCM_MODE_SET		30
+#define WL1273_I2S_MODE_CONFIG_SET	31
+#define WL1273_POWER_SET		32
+#define WL1273_INTX_CONFIG_SET		33
+#define WL1273_PULL_EN_SET		34
+#define WL1273_HILO_SET			35
+#define WL1273_SWITCH2FREF		36
+#define WL1273_FREQ_DRIFT_REPORT	37
+
+#define WL1273_PCE_GET			40
+#define WL1273_FIRM_VER_GET		41
+#define WL1273_ASIC_VER_GET		42
+#define WL1273_ASIC_ID_GET		43
+#define WL1273_MAN_ID_GET		44
+#define WL1273_TUNER_MODE_SET		45
+#define WL1273_STOP_SEARCH		46
+#define WL1273_RDS_CNTRL_SET		47
+
+#define WL1273_WRITE_HARDWARE_REG	100
+#define WL1273_CODE_DOWNLOAD		101
+#define WL1273_RESET			102
+
+#define WL1273_FM_POWER_MODE		254
+#define WL1273_FM_INTERRUPT		255
+
+/* Transmitter API */
+
+#define WL1273_CHANL_SET			55
+#define WL1273_SCAN_SPACING_SET			56
+#define WL1273_REF_SET				57
+#define WL1273_POWER_ENB_SET			90
+#define WL1273_POWER_ATT_SET			58
+#define WL1273_POWER_LEV_SET			59
+#define WL1273_AUDIO_DEV_SET			60
+#define WL1273_PILOT_DEV_SET			61
+#define WL1273_RDS_DEV_SET			62
+#define WL1273_PUPD_SET				91
+#define WL1273_AUDIO_IO_SET			63
+#define WL1273_PREMPH_SET			64
+#define WL1273_MONO_SET				66
+#define WL1273_MUTE				92
+#define WL1273_MPX_LMT_ENABLE			67
+#define WL1273_PI_SET				93
+#define WL1273_ECC_SET				69
+#define WL1273_PTY				70
+#define WL1273_AF				71
+#define WL1273_DISPLAY_MODE			74
+#define WL1273_RDS_REP_SET			77
+#define WL1273_RDS_CONFIG_DATA_SET		98
+#define WL1273_RDS_DATA_SET			99
+#define WL1273_RDS_DATA_ENB			94
+#define WL1273_TA_SET				78
+#define WL1273_TP_SET				79
+#define WL1273_DI_SET				80
+#define WL1273_MS_SET				81
+#define WL1273_PS_SCROLL_SPEED			82
+#define WL1273_TX_AUDIO_LEVEL_TEST		96
+#define WL1273_TX_AUDIO_LEVEL_TEST_THRESHOLD	73
+#define WL1273_TX_AUDIO_INPUT_LEVEL_RANGE_SET	54
+#define WL1273_RX_ANTENNA_SELECT		87
+#define WL1273_I2C_DEV_ADDR_SET			86
+#define WL1273_REF_ERR_CALIB_PARAM_SET		88
+#define WL1273_REF_ERR_CALIB_PERIODICITY_SET	89
+#define WL1273_SOC_INT_TRIGGER			52
+#define WL1273_SOC_AUDIO_PATH_SET		83
+#define WL1273_SOC_PCMI_OVERRIDE		84
+#define WL1273_SOC_I2S_OVERRIDE			85
+#define WL1273_RSSI_BLOCK_SCAN_FREQ_SET		95
+#define WL1273_RSSI_BLOCK_SCAN_START		97
+#define WL1273_RSSI_BLOCK_SCAN_DATA_GET		5
+#define WL1273_READ_FMANT_TUNE_VALUE		104
+
+#define WL1273_RDS_OFF		0
+#define WL1273_RDS_ON		1
+#define WL1273_RDS_RESET	2
+
+#define WL1273_AUDIO_DIGITAL	0
+#define WL1273_AUDIO_ANALOG	1
+
+#define WL1273_MODE_RX		BIT(0)
+#define WL1273_MODE_TX		BIT(1)
+#define WL1273_MODE_OFF		BIT(2)
+#define WL1273_MODE_SUSPENDED	BIT(3)
+
+#define WL1273_RADIO_CHILD	BIT(0)
+#define WL1273_CODEC_CHILD	BIT(1)
+
+#define WL1273_RX_MONO		1
+#define WL1273_RX_STEREO	0
+#define WL1273_TX_MONO		0
+#define WL1273_TX_STEREO	1
+
+#define WL1273_MAX_VOLUME	0xffff
+#define WL1273_DEFAULT_VOLUME	0x78b8
+
+/* I2S protocol, left channel first, data width 16 bits */
+#define WL1273_PCM_DEF_MODE		0x00
+
+/* Rx */
+#define WL1273_AUDIO_ENABLE_I2S		BIT(0)
+#define WL1273_AUDIO_ENABLE_ANALOG	BIT(1)
+
+/* Tx */
+#define WL1273_AUDIO_IO_SET_ANALOG	0
+#define WL1273_AUDIO_IO_SET_I2S		1
+
+#define WL1273_PUPD_SET_OFF		0x00
+#define WL1273_PUPD_SET_ON		0x01
+#define WL1273_PUPD_SET_RETENTION	0x10
+
+/* I2S mode */
+#define WL1273_IS2_WIDTH_32	0x0
+#define WL1273_IS2_WIDTH_40	0x1
+#define WL1273_IS2_WIDTH_22_23	0x2
+#define WL1273_IS2_WIDTH_23_22	0x3
+#define WL1273_IS2_WIDTH_48	0x4
+#define WL1273_IS2_WIDTH_50	0x5
+#define WL1273_IS2_WIDTH_60	0x6
+#define WL1273_IS2_WIDTH_64	0x7
+#define WL1273_IS2_WIDTH_80	0x8
+#define WL1273_IS2_WIDTH_96	0x9
+#define WL1273_IS2_WIDTH_128	0xa
+#define WL1273_IS2_WIDTH	0xf
+
+#define WL1273_IS2_FORMAT_STD	(0x0 << 4)
+#define WL1273_IS2_FORMAT_LEFT	(0x1 << 4)
+#define WL1273_IS2_FORMAT_RIGHT	(0x2 << 4)
+#define WL1273_IS2_FORMAT_USER	(0x3 << 4)
+
+#define WL1273_IS2_MASTER	(0x0 << 6)
+#define WL1273_IS2_SLAVEW	(0x1 << 6)
+
+#define WL1273_IS2_TRI_AFTER_SENDING	(0x0 << 7)
+#define WL1273_IS2_TRI_ALWAYS_ACTIVE	(0x1 << 7)
+
+#define WL1273_IS2_SDOWS_RR	(0x0 << 8)
+#define WL1273_IS2_SDOWS_RF	(0x1 << 8)
+#define WL1273_IS2_SDOWS_FR	(0x2 << 8)
+#define WL1273_IS2_SDOWS_FF	(0x3 << 8)
+
+#define WL1273_IS2_TRI_OPT	(0x0 << 10)
+#define WL1273_IS2_TRI_ALWAYS	(0x1 << 10)
+
+#define WL1273_IS2_RATE_48K	(0x0 << 12)
+#define WL1273_IS2_RATE_44_1K	(0x1 << 12)
+#define WL1273_IS2_RATE_32K	(0x2 << 12)
+#define WL1273_IS2_RATE_22_05K	(0x4 << 12)
+#define WL1273_IS2_RATE_16K	(0x5 << 12)
+#define WL1273_IS2_RATE_12K	(0x8 << 12)
+#define WL1273_IS2_RATE_11_025	(0x9 << 12)
+#define WL1273_IS2_RATE_8K	(0xa << 12)
+#define WL1273_IS2_RATE		(0xf << 12)
+
+#define WL1273_I2S_DEF_MODE	(WL1273_IS2_WIDTH_32 | \
+				 WL1273_IS2_FORMAT_STD | \
+				 WL1273_IS2_MASTER | \
+				 WL1273_IS2_TRI_AFTER_SENDING | \
+				 WL1273_IS2_SDOWS_RR | \
+				 WL1273_IS2_TRI_OPT | \
+				 WL1273_IS2_RATE_48K)
+
+#define SCHAR_MIN (-128)
+#define SCHAR_MAX 127
+
+#define WL1273_FR_EVENT			BIT(0)
+#define WL1273_BL_EVENT			BIT(1)
+#define WL1273_RDS_EVENT		BIT(2)
+#define WL1273_BBLK_EVENT		BIT(3)
+#define WL1273_LSYNC_EVENT		BIT(4)
+#define WL1273_LEV_EVENT		BIT(5)
+#define WL1273_IFFR_EVENT		BIT(6)
+#define WL1273_PI_EVENT			BIT(7)
+#define WL1273_PD_EVENT			BIT(8)
+#define WL1273_STIC_EVENT		BIT(9)
+#define WL1273_MAL_EVENT		BIT(10)
+#define WL1273_POW_ENB_EVENT		BIT(11)
+#define WL1273_SCAN_OVER_EVENT		BIT(12)
+#define WL1273_ERROR_EVENT		BIT(13)
+
+#define TUNER_MODE_STOP_SEARCH		0
+#define TUNER_MODE_PRESET		1
+#define TUNER_MODE_AUTO_SEEK		2
+#define TUNER_MODE_AF			3
+#define TUNER_MODE_AUTO_SEEK_PI		4
+#define TUNER_MODE_AUTO_SEEK_BULK	5
+
+#define RDS_BLOCK_SIZE	3
+
+struct wl1273_fm_platform_data {
+	int (*request_resources) (struct i2c_client *client);
+	void (*free_resources) (void);
+	void (*enable) (void);
+	void (*disable) (void);
+
+	u8 forbidden_modes;
+	unsigned int children;
+};
+
+#define WL1273_FM_CORE_CELLS	2
+
+#define WL1273_BAND_OTHER	0
+#define WL1273_BAND_JAPAN	1
+
+#define WL1273_BAND_JAPAN_LOW	76000
+#define WL1273_BAND_JAPAN_HIGH	90000
+#define WL1273_BAND_OTHER_LOW	87500
+#define WL1273_BAND_OTHER_HIGH	108000
+
+#define WL1273_BAND_TX_LOW	76000
+#define WL1273_BAND_TX_HIGH	108000
+
+struct wl1273_core {
+	struct mfd_cell cells[WL1273_FM_CORE_CELLS];
+	struct wl1273_fm_platform_data *pdata;
+
+	unsigned int mode;
+	unsigned int i2s_mode;
+	unsigned int volume;
+	unsigned int audio_mode;
+	unsigned int channel_number;
+	struct mutex lock; /* for serializing fm radio operations */
+
+	struct i2c_client *client;
+
+	int (*write)(struct wl1273_core *core, u8, u16);
+	int (*set_audio)(struct wl1273_core *core, unsigned int);
+	int (*set_volume)(struct wl1273_core *core, unsigned int);
+};
+
+#endif	/* ifndef WL1273_CORE_H */
-- 
cgit v1.2.3-71-gd317


From 08af245de0cf6ab5f4ed008ee2bb99273774fce0 Mon Sep 17 00:00:00 2001
From: Hans Verkuil <hverkuil@xs4all.nl>
Date: Fri, 24 Dec 2010 10:33:19 -0300
Subject: [media] V4L: remove V4L1 compatibility mode

Signed-off-by: Hans Verkuil <hverkuil@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 drivers/media/Kconfig                           |   15 -
 drivers/media/common/saa7146_video.c            |   32 -
 drivers/media/video/Makefile                    |    4 -
 drivers/media/video/au0828/au0828-video.c       |   12 -
 drivers/media/video/bt8xx/bttv-driver.c         |   28 -
 drivers/media/video/cpia2/cpia2_v4l.c           |   38 -
 drivers/media/video/cx231xx/cx231xx-video.c     |   12 -
 drivers/media/video/cx23885/cx23885-video.c     |   32 -
 drivers/media/video/cx88/cx88-video.c           |   12 -
 drivers/media/video/em28xx/em28xx-video.c       |   16 -
 drivers/media/video/pvrusb2/pvrusb2-v4l2.c      |    4 +-
 drivers/media/video/pwc/pwc-v4l.c               |   17 -
 drivers/media/video/s2255drv.c                  |   12 -
 drivers/media/video/saa7134/saa7134-video.c     |   11 -
 drivers/media/video/usbvision/usbvision-video.c |    3 -
 drivers/media/video/uvc/uvc_v4l2.c              |    7 +-
 drivers/media/video/v4l1-compat.c               | 1277 -----------------------
 drivers/media/video/v4l2-compat-ioctl32.c       |  325 ------
 drivers/media/video/v4l2-ioctl.c                |   86 --
 drivers/media/video/via-camera.c                |   13 -
 drivers/media/video/videobuf-core.c             |   30 -
 drivers/media/video/videobuf-dma-sg.c           |   23 -
 drivers/media/video/vivi.c                      |   12 -
 drivers/staging/cx25821/cx25821-video.c         |   31 -
 drivers/staging/cx25821/cx25821-video.h         |    6 -
 drivers/staging/dt3155v4l/dt3155v4l.c           |    3 -
 drivers/staging/tm6000/tm6000-video.c           |   12 -
 include/linux/videodev.h                        |   22 +-
 include/media/v4l2-ioctl.h                      |   22 +-
 include/media/videobuf-core.h                   |    8 -
 30 files changed, 9 insertions(+), 2116 deletions(-)
 delete mode 100644 drivers/media/video/v4l1-compat.c

(limited to 'include/linux')

diff --git a/drivers/media/Kconfig b/drivers/media/Kconfig
index c21dfc28482e..9ea1a6d70f0a 100644
--- a/drivers/media/Kconfig
+++ b/drivers/media/Kconfig
@@ -53,21 +53,6 @@ config VIDEO_ALLOW_V4L1
 
 	  If you are unsure as to whether this is required, answer Y.
 
-config VIDEO_V4L1_COMPAT
-	bool "Enable Video For Linux API 1 compatible Layer" if !VIDEO_ALLOW_V4L1
-	depends on VIDEO_DEV
-	default y
-	---help---
-	  Enables a compatibility API used by most V4L2 devices to allow
-	  its usage with legacy applications that supports only V4L1 api.
-
-	  Documentation for the original API is included in the file
-	  <Documentation/video4linux/API.html>.
-
-	  User tools for this are available from
-	  <ftp://ftp.uk.linux.org/pub/linux/video4linux/>.
-
-	  If you are unsure as to whether this is required, answer Y.
 
 #
 # DVB Core
diff --git a/drivers/media/common/saa7146_video.c b/drivers/media/common/saa7146_video.c
index d246910129e8..0ac5c619aecf 100644
--- a/drivers/media/common/saa7146_video.c
+++ b/drivers/media/common/saa7146_video.c
@@ -1129,35 +1129,6 @@ static int vidioc_g_chip_ident(struct file *file, void *__fh,
 			core, g_chip_ident, chip);
 }
 
-#ifdef CONFIG_VIDEO_V4L1_COMPAT
-static int vidiocgmbuf(struct file *file, void *__fh, struct video_mbuf *mbuf)
-{
-	struct saa7146_fh *fh = __fh;
-	struct videobuf_queue *q = &fh->video_q;
-	int err, i;
-
-	/* fixme: number of capture buffers and sizes for v4l apps */
-	int gbuffers = 2;
-	int gbufsize = 768 * 576 * 4;
-
-	DEB_D(("VIDIOCGMBUF \n"));
-
-	q = &fh->video_q;
-	err = videobuf_mmap_setup(q, gbuffers, gbufsize,
-			V4L2_MEMORY_MMAP);
-	if (err < 0)
-		return err;
-
-	gbuffers = err;
-	memset(mbuf, 0, sizeof(*mbuf));
-	mbuf->frames = gbuffers;
-	mbuf->size   = gbuffers * gbufsize;
-	for (i = 0; i < gbuffers; i++)
-		mbuf->offsets[i] = i * gbufsize;
-	return 0;
-}
-#endif
-
 const struct v4l2_ioctl_ops saa7146_video_ioctl_ops = {
 	.vidioc_querycap             = vidioc_querycap,
 	.vidioc_enum_fmt_vid_cap     = vidioc_enum_fmt_vid_cap,
@@ -1186,9 +1157,6 @@ const struct v4l2_ioctl_ops saa7146_video_ioctl_ops = {
 	.vidioc_streamon             = vidioc_streamon,
 	.vidioc_streamoff            = vidioc_streamoff,
 	.vidioc_g_parm 		     = vidioc_g_parm,
-#ifdef CONFIG_VIDEO_V4L1_COMPAT
-	.vidiocgmbuf                 = vidiocgmbuf,
-#endif
 };
 
 /*********************************************************************************/
diff --git a/drivers/media/video/Makefile b/drivers/media/video/Makefile
index 482f14b3ff84..ace7b6eaf5b8 100644
--- a/drivers/media/video/Makefile
+++ b/drivers/media/video/Makefile
@@ -22,10 +22,6 @@ endif
 
 obj-$(CONFIG_VIDEO_V4L2_COMMON) += v4l2-common.o
 
-ifeq ($(CONFIG_VIDEO_V4L1_COMPAT),y)
-  obj-$(CONFIG_VIDEO_DEV) += v4l1-compat.o
-endif
-
 # All i2c modules must come first:
 
 obj-$(CONFIG_VIDEO_TUNER) += tuner.o
diff --git a/drivers/media/video/au0828/au0828-video.c b/drivers/media/video/au0828/au0828-video.c
index 48682af911fa..e41e4ad5cc40 100644
--- a/drivers/media/video/au0828/au0828-video.c
+++ b/drivers/media/video/au0828/au0828-video.c
@@ -1809,15 +1809,6 @@ static int vidioc_dqbuf(struct file *file, void *priv, struct v4l2_buffer *b)
 	return videobuf_dqbuf(&fh->vb_vidq, b, file->f_flags & O_NONBLOCK);
 }
 
-#ifdef CONFIG_VIDEO_V4L1_COMPAT
-static int vidiocgmbuf(struct file *file, void *priv, struct video_mbuf *mbuf)
-{
-	struct au0828_fh *fh = priv;
-
-	return videobuf_cgmbuf(&fh->vb_vidq, mbuf, 8);
-}
-#endif
-
 static struct v4l2_file_operations au0828_v4l_fops = {
 	.owner      = THIS_MODULE,
 	.open       = au0828_v4l2_open,
@@ -1861,9 +1852,6 @@ static const struct v4l2_ioctl_ops video_ioctl_ops = {
 	.vidioc_s_register          = vidioc_s_register,
 #endif
 	.vidioc_g_chip_ident        = vidioc_g_chip_ident,
-#ifdef CONFIG_VIDEO_V4L1_COMPAT
-	.vidiocgmbuf                = vidiocgmbuf,
-#endif
 };
 
 static const struct video_device au0828_video_template = {
diff --git a/drivers/media/video/bt8xx/bttv-driver.c b/drivers/media/video/bt8xx/bttv-driver.c
index 54fbab95b88c..961d0805d877 100644
--- a/drivers/media/video/bt8xx/bttv-driver.c
+++ b/drivers/media/video/bt8xx/bttv-driver.c
@@ -2597,31 +2597,6 @@ static int bttv_s_fmt_vid_overlay(struct file *file, void *priv,
 	return setup_window_lock(fh, btv, &f->fmt.win, 1);
 }
 
-#ifdef CONFIG_VIDEO_V4L1_COMPAT
-static int vidiocgmbuf(struct file *file, void *priv, struct video_mbuf *mbuf)
-{
-	int retval;
-	unsigned int i;
-	struct bttv_fh *fh = priv;
-
-	retval = __videobuf_mmap_setup(&fh->cap, gbuffers, gbufsize,
-				     V4L2_MEMORY_MMAP);
-	if (retval < 0) {
-		return retval;
-	}
-
-	gbuffers = retval;
-	memset(mbuf, 0, sizeof(*mbuf));
-	mbuf->frames = gbuffers;
-	mbuf->size   = gbuffers * gbufsize;
-
-	for (i = 0; i < gbuffers; i++)
-		mbuf->offsets[i] = i * gbufsize;
-
-	return 0;
-}
-#endif
-
 static int bttv_querycap(struct file *file, void  *priv,
 				struct v4l2_capability *cap)
 {
@@ -3354,9 +3329,6 @@ static const struct v4l2_ioctl_ops bttv_ioctl_ops = {
 	.vidioc_streamoff               = bttv_streamoff,
 	.vidioc_g_tuner                 = bttv_g_tuner,
 	.vidioc_s_tuner                 = bttv_s_tuner,
-#ifdef CONFIG_VIDEO_V4L1_COMPAT
-	.vidiocgmbuf                    = vidiocgmbuf,
-#endif
 	.vidioc_g_crop                  = bttv_g_crop,
 	.vidioc_s_crop                  = bttv_s_crop,
 	.vidioc_g_fbuf                  = bttv_g_fbuf,
diff --git a/drivers/media/video/cpia2/cpia2_v4l.c b/drivers/media/video/cpia2/cpia2_v4l.c
index 46b433bbf2c1..7edf80b0d01a 100644
--- a/drivers/media/video/cpia2/cpia2_v4l.c
+++ b/drivers/media/video/cpia2/cpia2_v4l.c
@@ -417,28 +417,6 @@ static int sync(struct camera_data *cam, int frame_nr)
 	}
 }
 
-/******************************************************************************
- *
- *  ioctl_get_mbuf
- *
- *****************************************************************************/
-#ifdef CONFIG_VIDEO_V4L1_COMPAT
-static int ioctl_get_mbuf(void *arg, struct camera_data *cam)
-{
-	struct video_mbuf *vm;
-	int i;
-	vm = arg;
-
-	memset(vm, 0, sizeof(*vm));
-	vm->size = cam->frame_size*cam->num_frames;
-	vm->frames = cam->num_frames;
-	for (i = 0; i < cam->num_frames; i++)
-		vm->offsets[i] = cam->frame_size * i;
-
-	return 0;
-}
-#endif
-
 /******************************************************************************
  *
  *  ioctl_set_gpio
@@ -1380,17 +1358,6 @@ static long cpia2_do_ioctl(struct file *file, unsigned int cmd, void *arg)
 		}
 		break;
 	}
-#ifdef CONFIG_VIDEO_V4L1_COMPAT
-	case VIDIOCGMBUF:
-	{
-		struct cpia2_fh *fh = file->private_data;
-		if(fh->prio != V4L2_PRIORITY_RECORD) {
-			mutex_unlock(&cam->busy_lock);
-			return -EBUSY;
-		}
-		break;
-	}
-#endif
 	default:
 		break;
 	}
@@ -1400,11 +1367,6 @@ static long cpia2_do_ioctl(struct file *file, unsigned int cmd, void *arg)
 	case CPIA2_IOC_SET_GPIO:
 		retval = ioctl_set_gpio(arg, cam);
 		break;
-#ifdef CONFIG_VIDEO_V4L1_COMPAT
-	case VIDIOCGMBUF:	/* mmap interface */
-		retval = ioctl_get_mbuf(arg, cam);
-		break;
-#endif
 	case VIDIOC_QUERYCAP:
 		retval = ioctl_querycap(arg,cam);
 		break;
diff --git a/drivers/media/video/cx231xx/cx231xx-video.c b/drivers/media/video/cx231xx/cx231xx-video.c
index b13b69fb2af6..7e3e8c4f19b7 100644
--- a/drivers/media/video/cx231xx/cx231xx-video.c
+++ b/drivers/media/video/cx231xx/cx231xx-video.c
@@ -2044,15 +2044,6 @@ static int vidioc_dqbuf(struct file *file, void *priv, struct v4l2_buffer *b)
 	return videobuf_dqbuf(&fh->vb_vidq, b, file->f_flags & O_NONBLOCK);
 }
 
-#ifdef CONFIG_VIDEO_V4L1_COMPAT
-static int vidiocgmbuf(struct file *file, void *priv, struct video_mbuf *mbuf)
-{
-	struct cx231xx_fh *fh = priv;
-
-	return videobuf_cgmbuf(&fh->vb_vidq, mbuf, 8);
-}
-#endif
-
 /* ----------------------------------------------------------- */
 /* RADIO ESPECIFIC IOCTLS                                      */
 /* ----------------------------------------------------------- */
@@ -2507,9 +2498,6 @@ static const struct v4l2_ioctl_ops video_ioctl_ops = {
 	.vidioc_g_register             = vidioc_g_register,
 	.vidioc_s_register             = vidioc_s_register,
 #endif
-#ifdef CONFIG_VIDEO_V4L1_COMPAT
-	.vidiocgmbuf                   = vidiocgmbuf,
-#endif
 };
 
 static struct video_device cx231xx_vbi_template;
diff --git a/drivers/media/video/cx23885/cx23885-video.c b/drivers/media/video/cx23885/cx23885-video.c
index 8b2fb8a4375c..796a797b5205 100644
--- a/drivers/media/video/cx23885/cx23885-video.c
+++ b/drivers/media/video/cx23885/cx23885-video.c
@@ -1024,35 +1024,6 @@ static int vidioc_enum_fmt_vid_cap(struct file *file, void  *priv,
 	return 0;
 }
 
-#ifdef CONFIG_VIDEO_V4L1_COMPAT
-static int vidiocgmbuf(struct file *file, void *priv,
-	struct video_mbuf *mbuf)
-{
-	struct cx23885_fh *fh = priv;
-	struct videobuf_queue *q;
-	struct v4l2_requestbuffers req;
-	unsigned int i;
-	int err;
-
-	q = get_queue(fh);
-	memset(&req, 0, sizeof(req));
-	req.type   = q->type;
-	req.count  = 8;
-	req.memory = V4L2_MEMORY_MMAP;
-	err = videobuf_reqbufs(q, &req);
-	if (err < 0)
-		return err;
-
-	mbuf->frames = req.count;
-	mbuf->size   = 0;
-	for (i = 0; i < mbuf->frames; i++) {
-		mbuf->offsets[i]  = q->bufs[i]->boff;
-		mbuf->size       += q->bufs[i]->bsize;
-	}
-	return 0;
-}
-#endif
-
 static int vidioc_reqbufs(struct file *file, void *priv,
 	struct v4l2_requestbuffers *p)
 {
@@ -1427,9 +1398,6 @@ static const struct v4l2_ioctl_ops video_ioctl_ops = {
 	.vidioc_s_ctrl        = vidioc_s_ctrl,
 	.vidioc_streamon      = vidioc_streamon,
 	.vidioc_streamoff     = vidioc_streamoff,
-#ifdef CONFIG_VIDEO_V4L1_COMPAT
-	.vidiocgmbuf          = vidiocgmbuf,
-#endif
 	.vidioc_g_tuner       = vidioc_g_tuner,
 	.vidioc_s_tuner       = vidioc_s_tuner,
 	.vidioc_g_frequency   = vidioc_g_frequency,
diff --git a/drivers/media/video/cx88/cx88-video.c b/drivers/media/video/cx88/cx88-video.c
index 62cea9549404..9a37b839c67c 100644
--- a/drivers/media/video/cx88/cx88-video.c
+++ b/drivers/media/video/cx88/cx88-video.c
@@ -1179,15 +1179,6 @@ static int vidioc_enum_fmt_vid_cap (struct file *file, void  *priv,
 	return 0;
 }
 
-#ifdef CONFIG_VIDEO_V4L1_COMPAT
-static int vidiocgmbuf (struct file *file, void *priv, struct video_mbuf *mbuf)
-{
-	struct cx8800_fh           *fh  = priv;
-
-	return videobuf_cgmbuf (get_queue(fh), mbuf, 8);
-}
-#endif
-
 static int vidioc_reqbufs (struct file *file, void *priv, struct v4l2_requestbuffers *p)
 {
 	struct cx8800_fh  *fh   = priv;
@@ -1731,9 +1722,6 @@ static const struct v4l2_ioctl_ops video_ioctl_ops = {
 	.vidioc_s_ctrl        = vidioc_s_ctrl,
 	.vidioc_streamon      = vidioc_streamon,
 	.vidioc_streamoff     = vidioc_streamoff,
-#ifdef CONFIG_VIDEO_V4L1_COMPAT
-	.vidiocgmbuf          = vidiocgmbuf,
-#endif
 	.vidioc_g_tuner       = vidioc_g_tuner,
 	.vidioc_s_tuner       = vidioc_s_tuner,
 	.vidioc_g_frequency   = vidioc_g_frequency,
diff --git a/drivers/media/video/em28xx/em28xx-video.c b/drivers/media/video/em28xx/em28xx-video.c
index 908e3bc88303..7be9bbad79ba 100644
--- a/drivers/media/video/em28xx/em28xx-video.c
+++ b/drivers/media/video/em28xx/em28xx-video.c
@@ -1934,19 +1934,6 @@ static int vidioc_dqbuf(struct file *file, void *priv, struct v4l2_buffer *b)
 				      O_NONBLOCK);
 }
 
-#ifdef CONFIG_VIDEO_V4L1_COMPAT
-static int vidiocgmbuf(struct file *file, void *priv, struct video_mbuf *mbuf)
-{
-	struct em28xx_fh  *fh = priv;
-
-	if (fh->type == V4L2_BUF_TYPE_VIDEO_CAPTURE)
-		return videobuf_cgmbuf(&fh->vb_vidq, mbuf, 8);
-	else
-		return videobuf_cgmbuf(&fh->vb_vbiq, mbuf, 8);
-}
-#endif
-
-
 /* ----------------------------------------------------------- */
 /* RADIO ESPECIFIC IOCTLS                                      */
 /* ----------------------------------------------------------- */
@@ -2359,9 +2346,6 @@ static const struct v4l2_ioctl_ops video_ioctl_ops = {
 	.vidioc_s_register          = vidioc_s_register,
 	.vidioc_g_chip_ident        = vidioc_g_chip_ident,
 #endif
-#ifdef CONFIG_VIDEO_V4L1_COMPAT
-	.vidiocgmbuf                = vidiocgmbuf,
-#endif
 };
 
 static const struct video_device em28xx_video_template = {
diff --git a/drivers/media/video/pvrusb2/pvrusb2-v4l2.c b/drivers/media/video/pvrusb2/pvrusb2-v4l2.c
index aaafa0398fd5..58617fc656c2 100644
--- a/drivers/media/video/pvrusb2/pvrusb2-v4l2.c
+++ b/drivers/media/video/pvrusb2/pvrusb2-v4l2.c
@@ -852,8 +852,8 @@ static long pvr2_v4l2_do_ioctl(struct file *file, unsigned int cmd, void *arg)
 #endif
 
 	default :
-		ret = v4l_compat_translate_ioctl(file, cmd,
-						 arg, pvr2_v4l2_do_ioctl);
+		ret = -EINVAL;
+		break;
 	}
 
 	pvr2_hdw_commit_ctl(hdw);
diff --git a/drivers/media/video/pwc/pwc-v4l.c b/drivers/media/video/pwc/pwc-v4l.c
index 2ef1668638f9..8ca4d22b4384 100644
--- a/drivers/media/video/pwc/pwc-v4l.c
+++ b/drivers/media/video/pwc/pwc-v4l.c
@@ -362,23 +362,6 @@ long pwc_video_do_ioctl(struct file *file, unsigned int cmd, void *arg)
 
 
 	switch (cmd) {
-#ifdef CONFIG_VIDEO_V4L1_COMPAT
-		/* mmap() functions */
-		case VIDIOCGMBUF:
-		{
-			/* Tell the user program how much memory is needed for a mmap() */
-			struct video_mbuf *vm = arg;
-			int i;
-
-			memset(vm, 0, sizeof(*vm));
-			vm->size = pwc_mbufs * pdev->len_per_image;
-			vm->frames = pwc_mbufs; /* double buffering should be enough for most applications */
-			for (i = 0; i < pwc_mbufs; i++)
-				vm->offsets[i] = i * pdev->len_per_image;
-			break;
-		}
-#endif
-
 		/* V4L2 Layer */
 		case VIDIOC_QUERYCAP:
 		{
diff --git a/drivers/media/video/s2255drv.c b/drivers/media/video/s2255drv.c
index cf03372d1d17..b63f8cafa671 100644
--- a/drivers/media/video/s2255drv.c
+++ b/drivers/media/video/s2255drv.c
@@ -1097,15 +1097,6 @@ static int vidioc_dqbuf(struct file *file, void *priv, struct v4l2_buffer *p)
 	return rc;
 }
 
-#ifdef CONFIG_VIDEO_V4L1_COMPAT
-static int vidioc_cgmbuf(struct file *file, void *priv, struct video_mbuf *mbuf)
-{
-	struct s2255_fh *fh = priv;
-
-	return videobuf_cgmbuf(&fh->vb_vidq, mbuf, 8);
-}
-#endif
-
 /* write to the configuration pipe, synchronously */
 static int s2255_write_config(struct usb_device *udev, unsigned char *pbuf,
 			      int size)
@@ -1909,9 +1900,6 @@ static const struct v4l2_ioctl_ops s2255_ioctl_ops = {
 	.vidioc_s_ctrl = vidioc_s_ctrl,
 	.vidioc_streamon = vidioc_streamon,
 	.vidioc_streamoff = vidioc_streamoff,
-#ifdef CONFIG_VIDEO_V4L1_COMPAT
-	.vidiocgmbuf = vidioc_cgmbuf,
-#endif
 	.vidioc_s_jpegcomp = vidioc_s_jpegcomp,
 	.vidioc_g_jpegcomp = vidioc_g_jpegcomp,
 	.vidioc_s_parm = vidioc_s_parm,
diff --git a/drivers/media/video/saa7134/saa7134-video.c b/drivers/media/video/saa7134/saa7134-video.c
index f0b1573137f4..ad22be27bd38 100644
--- a/drivers/media/video/saa7134/saa7134-video.c
+++ b/drivers/media/video/saa7134/saa7134-video.c
@@ -2211,14 +2211,6 @@ static int saa7134_overlay(struct file *file, void *f, unsigned int on)
 	return 0;
 }
 
-#ifdef CONFIG_VIDEO_V4L1_COMPAT
-static int vidiocgmbuf(struct file *file, void *priv, struct video_mbuf *mbuf)
-{
-	struct saa7134_fh *fh = file->private_data;
-	return videobuf_cgmbuf(saa7134_queue(fh), mbuf, 8);
-}
-#endif
-
 static int saa7134_reqbufs(struct file *file, void *priv,
 					struct v4l2_requestbuffers *p)
 {
@@ -2456,9 +2448,6 @@ static const struct v4l2_ioctl_ops video_ioctl_ops = {
 	.vidioc_streamoff		= saa7134_streamoff,
 	.vidioc_g_tuner			= saa7134_g_tuner,
 	.vidioc_s_tuner			= saa7134_s_tuner,
-#ifdef CONFIG_VIDEO_V4L1_COMPAT
-	.vidiocgmbuf			= vidiocgmbuf,
-#endif
 	.vidioc_g_crop			= saa7134_g_crop,
 	.vidioc_s_crop			= saa7134_s_crop,
 	.vidioc_g_fbuf			= saa7134_g_fbuf,
diff --git a/drivers/media/video/usbvision/usbvision-video.c b/drivers/media/video/usbvision/usbvision-video.c
index 011c0c386995..20da6ea2cdf0 100644
--- a/drivers/media/video/usbvision/usbvision-video.c
+++ b/drivers/media/video/usbvision/usbvision-video.c
@@ -1273,9 +1273,6 @@ static const struct v4l2_ioctl_ops usbvision_ioctl_ops = {
 	.vidioc_s_ctrl        = vidioc_s_ctrl,
 	.vidioc_streamon      = vidioc_streamon,
 	.vidioc_streamoff     = vidioc_streamoff,
-#ifdef CONFIG_VIDEO_V4L1_COMPAT
-/*  	.vidiocgmbuf          = vidiocgmbuf, */
-#endif
 	.vidioc_g_tuner       = vidioc_g_tuner,
 	.vidioc_s_tuner       = vidioc_s_tuner,
 	.vidioc_g_frequency   = vidioc_g_frequency,
diff --git a/drivers/media/video/uvc/uvc_v4l2.c b/drivers/media/video/uvc/uvc_v4l2.c
index 8cf61e8a634f..9005a8d9d5f8 100644
--- a/drivers/media/video/uvc/uvc_v4l2.c
+++ b/drivers/media/video/uvc/uvc_v4l2.c
@@ -1035,11 +1035,8 @@ static long uvc_v4l2_do_ioctl(struct file *file, unsigned int cmd, void *arg)
 		return uvc_xu_ctrl_query(chain, arg, 1);
 
 	default:
-		if ((ret = v4l_compat_translate_ioctl(file, cmd, arg,
-			uvc_v4l2_do_ioctl)) == -ENOIOCTLCMD)
-			uvc_trace(UVC_TRACE_IOCTL, "Unknown ioctl 0x%08x\n",
-				  cmd);
-		return ret;
+		uvc_trace(UVC_TRACE_IOCTL, "Unknown ioctl 0x%08x\n", cmd);
+		return -EINVAL;
 	}
 
 	return ret;
diff --git a/drivers/media/video/v4l1-compat.c b/drivers/media/video/v4l1-compat.c
deleted file mode 100644
index d4ac751036a2..000000000000
--- a/drivers/media/video/v4l1-compat.c
+++ /dev/null
@@ -1,1277 +0,0 @@
-/*
- *
- *	Video for Linux Two
- *	Backward Compatibility Layer
- *
- *	Support subroutines for providing V4L2 drivers with backward
- *	compatibility with applications using the old API.
- *
- *	This program is free software; you can redistribute it and/or
- *	modify it under the terms of the GNU General Public License
- *	as published by the Free Software Foundation; either version
- *	2 of the License, or (at your option) any later version.
- *
- * Author:	Bill Dirks <bill@thedirks.org>
- *		et al.
- *
- */
-
-
-#include <linux/init.h>
-#include <linux/module.h>
-#include <linux/types.h>
-#include <linux/kernel.h>
-#include <linux/sched.h>
-#include <linux/mm.h>
-#include <linux/fs.h>
-#include <linux/file.h>
-#include <linux/string.h>
-#include <linux/errno.h>
-#include <linux/slab.h>
-#include <linux/videodev.h>
-#include <media/v4l2-common.h>
-#include <media/v4l2-ioctl.h>
-
-#include <asm/uaccess.h>
-#include <asm/system.h>
-#include <asm/pgtable.h>
-
-static unsigned int debug;
-module_param(debug, int, 0644);
-MODULE_PARM_DESC(debug, "enable debug messages");
-MODULE_AUTHOR("Bill Dirks");
-MODULE_DESCRIPTION("v4l(1) compatibility layer for v4l2 drivers.");
-MODULE_LICENSE("GPL");
-
-#define dprintk(fmt, arg...) \
-	do { \
-		if (debug) \
-			printk(KERN_DEBUG "v4l1-compat: " fmt , ## arg);\
-	} while (0)
-
-/*
- *	I O C T L   T R A N S L A T I O N
- *
- *	From here on down is the code for translating the numerous
- *	ioctl commands from the old API to the new API.
- */
-
-static int
-get_v4l_control(struct file             *file,
-		int			cid,
-		v4l2_kioctl             drv)
-{
-	struct v4l2_queryctrl	qctrl2;
-	struct v4l2_control	ctrl2;
-	int			err;
-
-	qctrl2.id = cid;
-	err = drv(file, VIDIOC_QUERYCTRL, &qctrl2);
-	if (err < 0)
-		dprintk("VIDIOC_QUERYCTRL: %d\n", err);
-	if (err == 0 && !(qctrl2.flags & V4L2_CTRL_FLAG_DISABLED)) {
-		ctrl2.id = qctrl2.id;
-		err = drv(file, VIDIOC_G_CTRL, &ctrl2);
-		if (err < 0) {
-			dprintk("VIDIOC_G_CTRL: %d\n", err);
-			return 0;
-		}
-		return DIV_ROUND_CLOSEST((ctrl2.value-qctrl2.minimum) * 65535,
-					 qctrl2.maximum - qctrl2.minimum);
-	}
-	return 0;
-}
-
-static int
-set_v4l_control(struct file             *file,
-		int			cid,
-		int			value,
-		v4l2_kioctl             drv)
-{
-	struct v4l2_queryctrl	qctrl2;
-	struct v4l2_control	ctrl2;
-	int			err;
-
-	qctrl2.id = cid;
-	err = drv(file, VIDIOC_QUERYCTRL, &qctrl2);
-	if (err < 0)
-		dprintk("VIDIOC_QUERYCTRL: %d\n", err);
-	if (err == 0 &&
-	    !(qctrl2.flags & V4L2_CTRL_FLAG_DISABLED) &&
-	    !(qctrl2.flags & V4L2_CTRL_FLAG_GRABBED)) {
-		if (value < 0)
-			value = 0;
-		if (value > 65535)
-			value = 65535;
-		if (value && qctrl2.type == V4L2_CTRL_TYPE_BOOLEAN)
-			value = 65535;
-		ctrl2.id = qctrl2.id;
-		ctrl2.value =
-			(value * (qctrl2.maximum - qctrl2.minimum)
-			 + 32767)
-			/ 65535;
-		ctrl2.value += qctrl2.minimum;
-		err = drv(file, VIDIOC_S_CTRL, &ctrl2);
-		if (err < 0)
-			dprintk("VIDIOC_S_CTRL: %d\n", err);
-	}
-	return 0;
-}
-
-/* ----------------------------------------------------------------- */
-
-static const unsigned int palette2pixelformat[] = {
-	[VIDEO_PALETTE_GREY]    = V4L2_PIX_FMT_GREY,
-	[VIDEO_PALETTE_RGB555]  = V4L2_PIX_FMT_RGB555,
-	[VIDEO_PALETTE_RGB565]  = V4L2_PIX_FMT_RGB565,
-	[VIDEO_PALETTE_RGB24]   = V4L2_PIX_FMT_BGR24,
-	[VIDEO_PALETTE_RGB32]   = V4L2_PIX_FMT_BGR32,
-	/* yuv packed pixel */
-	[VIDEO_PALETTE_YUYV]    = V4L2_PIX_FMT_YUYV,
-	[VIDEO_PALETTE_YUV422]  = V4L2_PIX_FMT_YUYV,
-	[VIDEO_PALETTE_UYVY]    = V4L2_PIX_FMT_UYVY,
-	/* yuv planar */
-	[VIDEO_PALETTE_YUV410P] = V4L2_PIX_FMT_YUV410,
-	[VIDEO_PALETTE_YUV420]  = V4L2_PIX_FMT_YUV420,
-	[VIDEO_PALETTE_YUV420P] = V4L2_PIX_FMT_YUV420,
-	[VIDEO_PALETTE_YUV411P] = V4L2_PIX_FMT_YUV411P,
-	[VIDEO_PALETTE_YUV422P] = V4L2_PIX_FMT_YUV422P,
-};
-
-static unsigned int __pure
-palette_to_pixelformat(unsigned int palette)
-{
-	if (palette < ARRAY_SIZE(palette2pixelformat))
-		return palette2pixelformat[palette];
-	else
-		return 0;
-}
-
-static unsigned int __attribute_const__
-pixelformat_to_palette(unsigned int pixelformat)
-{
-	int	palette = 0;
-	switch (pixelformat) {
-	case V4L2_PIX_FMT_GREY:
-		palette = VIDEO_PALETTE_GREY;
-		break;
-	case V4L2_PIX_FMT_RGB555:
-		palette = VIDEO_PALETTE_RGB555;
-		break;
-	case V4L2_PIX_FMT_RGB565:
-		palette = VIDEO_PALETTE_RGB565;
-		break;
-	case V4L2_PIX_FMT_BGR24:
-		palette = VIDEO_PALETTE_RGB24;
-		break;
-	case V4L2_PIX_FMT_BGR32:
-		palette = VIDEO_PALETTE_RGB32;
-		break;
-	/* yuv packed pixel */
-	case V4L2_PIX_FMT_YUYV:
-		palette = VIDEO_PALETTE_YUYV;
-		break;
-	case V4L2_PIX_FMT_UYVY:
-		palette = VIDEO_PALETTE_UYVY;
-		break;
-	/* yuv planar */
-	case V4L2_PIX_FMT_YUV410:
-		palette = VIDEO_PALETTE_YUV420;
-		break;
-	case V4L2_PIX_FMT_YUV420:
-		palette = VIDEO_PALETTE_YUV420;
-		break;
-	case V4L2_PIX_FMT_YUV411P:
-		palette = VIDEO_PALETTE_YUV411P;
-		break;
-	case V4L2_PIX_FMT_YUV422P:
-		palette = VIDEO_PALETTE_YUV422P;
-		break;
-	}
-	return palette;
-}
-
-/* ----------------------------------------------------------------- */
-
-static int poll_one(struct file *file, struct poll_wqueues *pwq)
-{
-	int retval = 1;
-	poll_table *table;
-
-	poll_initwait(pwq);
-	table = &pwq->pt;
-	for (;;) {
-		int mask;
-		mask = file->f_op->poll(file, table);
-		if (mask & POLLIN)
-			break;
-		table = NULL;
-		if (signal_pending(current)) {
-			retval = -ERESTARTSYS;
-			break;
-		}
-		poll_schedule(pwq, TASK_INTERRUPTIBLE);
-	}
-	poll_freewait(pwq);
-	return retval;
-}
-
-static int count_inputs(
-			struct file *file,
-			v4l2_kioctl drv)
-{
-	struct v4l2_input input2;
-	int i;
-
-	for (i = 0;; i++) {
-		memset(&input2, 0, sizeof(input2));
-		input2.index = i;
-		if (0 != drv(file, VIDIOC_ENUMINPUT, &input2))
-			break;
-	}
-	return i;
-}
-
-static int check_size(
-		struct file *file,
-		v4l2_kioctl drv,
-		int *maxw,
-		int *maxh)
-{
-	struct v4l2_fmtdesc desc2;
-	struct v4l2_format  fmt2;
-
-	memset(&desc2, 0, sizeof(desc2));
-	memset(&fmt2, 0, sizeof(fmt2));
-
-	desc2.type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
-	if (0 != drv(file, VIDIOC_ENUM_FMT, &desc2))
-		goto done;
-
-	fmt2.type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
-	fmt2.fmt.pix.width       = 10000;
-	fmt2.fmt.pix.height      = 10000;
-	fmt2.fmt.pix.pixelformat = desc2.pixelformat;
-	if (0 != drv(file, VIDIOC_TRY_FMT, &fmt2))
-		goto done;
-
-	*maxw = fmt2.fmt.pix.width;
-	*maxh = fmt2.fmt.pix.height;
-
-done:
-	return 0;
-}
-
-/* ----------------------------------------------------------------- */
-
-static noinline long v4l1_compat_get_capabilities(
-					struct video_capability *cap,
-					struct file *file,
-					v4l2_kioctl drv)
-{
-	long err;
-	struct v4l2_framebuffer fbuf;
-	struct v4l2_capability *cap2;
-
-	cap2 = kzalloc(sizeof(*cap2), GFP_KERNEL);
-	if (!cap2) {
-		err = -ENOMEM;
-		return err;
-	}
-	memset(cap, 0, sizeof(*cap));
-	memset(&fbuf, 0, sizeof(fbuf));
-
-	err = drv(file, VIDIOC_QUERYCAP, cap2);
-	if (err < 0) {
-		dprintk("VIDIOCGCAP / VIDIOC_QUERYCAP: %ld\n", err);
-		goto done;
-	}
-	if (cap2->capabilities & V4L2_CAP_VIDEO_OVERLAY) {
-		err = drv(file, VIDIOC_G_FBUF, &fbuf);
-		if (err < 0) {
-			dprintk("VIDIOCGCAP / VIDIOC_G_FBUF: %ld\n", err);
-			memset(&fbuf, 0, sizeof(fbuf));
-		}
-		err = 0;
-	}
-
-	memcpy(cap->name, cap2->card,
-	       min(sizeof(cap->name), sizeof(cap2->card)));
-	cap->name[sizeof(cap->name) - 1] = 0;
-	if (cap2->capabilities & V4L2_CAP_VIDEO_CAPTURE)
-		cap->type |= VID_TYPE_CAPTURE;
-	if (cap2->capabilities & V4L2_CAP_TUNER)
-		cap->type |= VID_TYPE_TUNER;
-	if (cap2->capabilities & V4L2_CAP_VBI_CAPTURE)
-		cap->type |= VID_TYPE_TELETEXT;
-	if (cap2->capabilities & V4L2_CAP_VIDEO_OVERLAY)
-		cap->type |= VID_TYPE_OVERLAY;
-	if (fbuf.capability & V4L2_FBUF_CAP_LIST_CLIPPING)
-		cap->type |= VID_TYPE_CLIPPING;
-
-	cap->channels  = count_inputs(file, drv);
-	check_size(file, drv,
-		   &cap->maxwidth, &cap->maxheight);
-	cap->audios    =  0; /* FIXME */
-	cap->minwidth  = 48; /* FIXME */
-	cap->minheight = 32; /* FIXME */
-
-done:
-	kfree(cap2);
-	return err;
-}
-
-static noinline long v4l1_compat_get_frame_buffer(
-					struct video_buffer *buffer,
-					struct file *file,
-					v4l2_kioctl drv)
-{
-	long err;
-	struct v4l2_framebuffer fbuf;
-
-	memset(buffer, 0, sizeof(*buffer));
-	memset(&fbuf, 0, sizeof(fbuf));
-
-	err = drv(file, VIDIOC_G_FBUF, &fbuf);
-	if (err < 0) {
-		dprintk("VIDIOCGFBUF / VIDIOC_G_FBUF: %ld\n", err);
-		goto done;
-	}
-	buffer->base   = fbuf.base;
-	buffer->height = fbuf.fmt.height;
-	buffer->width  = fbuf.fmt.width;
-
-	switch (fbuf.fmt.pixelformat) {
-	case V4L2_PIX_FMT_RGB332:
-		buffer->depth = 8;
-		break;
-	case V4L2_PIX_FMT_RGB555:
-		buffer->depth = 15;
-		break;
-	case V4L2_PIX_FMT_RGB565:
-		buffer->depth = 16;
-		break;
-	case V4L2_PIX_FMT_BGR24:
-		buffer->depth = 24;
-		break;
-	case V4L2_PIX_FMT_BGR32:
-		buffer->depth = 32;
-		break;
-	default:
-		buffer->depth = 0;
-	}
-	if (fbuf.fmt.bytesperline) {
-		buffer->bytesperline = fbuf.fmt.bytesperline;
-		if (!buffer->depth && buffer->width)
-			buffer->depth   = ((fbuf.fmt.bytesperline<<3)
-					  + (buffer->width-1))
-					  / buffer->width;
-	} else {
-		buffer->bytesperline =
-			(buffer->width * buffer->depth + 7) & 7;
-		buffer->bytesperline >>= 3;
-	}
-done:
-	return err;
-}
-
-static noinline long v4l1_compat_set_frame_buffer(
-					struct video_buffer *buffer,
-					struct file *file,
-					v4l2_kioctl drv)
-{
-	long err;
-	struct v4l2_framebuffer fbuf;
-
-	memset(&fbuf, 0, sizeof(fbuf));
-	fbuf.base       = buffer->base;
-	fbuf.fmt.height = buffer->height;
-	fbuf.fmt.width  = buffer->width;
-	switch (buffer->depth) {
-	case 8:
-		fbuf.fmt.pixelformat = V4L2_PIX_FMT_RGB332;
-		break;
-	case 15:
-		fbuf.fmt.pixelformat = V4L2_PIX_FMT_RGB555;
-		break;
-	case 16:
-		fbuf.fmt.pixelformat = V4L2_PIX_FMT_RGB565;
-		break;
-	case 24:
-		fbuf.fmt.pixelformat = V4L2_PIX_FMT_BGR24;
-		break;
-	case 32:
-		fbuf.fmt.pixelformat = V4L2_PIX_FMT_BGR32;
-		break;
-	}
-	fbuf.fmt.bytesperline = buffer->bytesperline;
-	err = drv(file, VIDIOC_S_FBUF, &fbuf);
-	if (err < 0)
-		dprintk("VIDIOCSFBUF / VIDIOC_S_FBUF: %ld\n", err);
-	return err;
-}
-
-static noinline long v4l1_compat_get_win_cap_dimensions(
-					struct video_window *win,
-					struct file *file,
-					v4l2_kioctl drv)
-{
-	long err;
-	struct v4l2_format *fmt;
-
-	fmt = kzalloc(sizeof(*fmt), GFP_KERNEL);
-	if (!fmt) {
-		err = -ENOMEM;
-		return err;
-	}
-	memset(win, 0, sizeof(*win));
-
-	fmt->type = V4L2_BUF_TYPE_VIDEO_OVERLAY;
-	err = drv(file, VIDIOC_G_FMT, fmt);
-	if (err < 0)
-		dprintk("VIDIOCGWIN / VIDIOC_G_WIN: %ld\n", err);
-	if (err == 0) {
-		win->x         = fmt->fmt.win.w.left;
-		win->y         = fmt->fmt.win.w.top;
-		win->width     = fmt->fmt.win.w.width;
-		win->height    = fmt->fmt.win.w.height;
-		win->chromakey = fmt->fmt.win.chromakey;
-		win->clips     = NULL;
-		win->clipcount = 0;
-		goto done;
-	}
-
-	fmt->type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
-	err = drv(file, VIDIOC_G_FMT, fmt);
-	if (err < 0) {
-		dprintk("VIDIOCGWIN / VIDIOC_G_FMT: %ld\n", err);
-		goto done;
-	}
-	win->x         = 0;
-	win->y         = 0;
-	win->width     = fmt->fmt.pix.width;
-	win->height    = fmt->fmt.pix.height;
-	win->chromakey = 0;
-	win->clips     = NULL;
-	win->clipcount = 0;
-done:
-	kfree(fmt);
-	return err;
-}
-
-static noinline long v4l1_compat_set_win_cap_dimensions(
-					struct video_window *win,
-					struct file *file,
-					v4l2_kioctl drv)
-{
-	long err, err1, err2;
-	struct v4l2_format *fmt;
-
-	fmt = kzalloc(sizeof(*fmt), GFP_KERNEL);
-	if (!fmt) {
-		err = -ENOMEM;
-		return err;
-	}
-	fmt->type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
-	drv(file, VIDIOC_STREAMOFF, &fmt->type);
-	err1 = drv(file, VIDIOC_G_FMT, fmt);
-	if (err1 < 0)
-		dprintk("VIDIOCSWIN / VIDIOC_G_FMT: %ld\n", err1);
-	if (err1 == 0) {
-		fmt->fmt.pix.width  = win->width;
-		fmt->fmt.pix.height = win->height;
-		fmt->fmt.pix.field  = V4L2_FIELD_ANY;
-		fmt->fmt.pix.bytesperline = 0;
-		err = drv(file, VIDIOC_S_FMT, fmt);
-		if (err < 0)
-			dprintk("VIDIOCSWIN / VIDIOC_S_FMT #1: %ld\n",
-				err);
-		win->width  = fmt->fmt.pix.width;
-		win->height = fmt->fmt.pix.height;
-	}
-
-	memset(fmt, 0, sizeof(*fmt));
-	fmt->type = V4L2_BUF_TYPE_VIDEO_OVERLAY;
-	fmt->fmt.win.w.left    = win->x;
-	fmt->fmt.win.w.top     = win->y;
-	fmt->fmt.win.w.width   = win->width;
-	fmt->fmt.win.w.height  = win->height;
-	fmt->fmt.win.chromakey = win->chromakey;
-	fmt->fmt.win.clips     = (void __user *)win->clips;
-	fmt->fmt.win.clipcount = win->clipcount;
-	err2 = drv(file, VIDIOC_S_FMT, fmt);
-	if (err2 < 0)
-		dprintk("VIDIOCSWIN / VIDIOC_S_FMT #2: %ld\n", err2);
-
-	if (err1 != 0 && err2 != 0)
-		err = err1;
-	else
-		err = 0;
-	kfree(fmt);
-	return err;
-}
-
-static noinline long v4l1_compat_turn_preview_on_off(
-					int *on,
-					struct file *file,
-					v4l2_kioctl drv)
-{
-	long err;
-	enum v4l2_buf_type captype = V4L2_BUF_TYPE_VIDEO_CAPTURE;
-
-	if (0 == *on) {
-		/* dirty hack time.  But v4l1 has no STREAMOFF
-		 * equivalent in the API, and this one at
-		 * least comes close ... */
-		drv(file, VIDIOC_STREAMOFF, &captype);
-	}
-	err = drv(file, VIDIOC_OVERLAY, on);
-	if (err < 0)
-		dprintk("VIDIOCCAPTURE / VIDIOC_PREVIEW: %ld\n", err);
-	return err;
-}
-
-static noinline long v4l1_compat_get_input_info(
-					struct video_channel *chan,
-					struct file *file,
-					v4l2_kioctl drv)
-{
-	long err;
-	struct v4l2_input	input2;
-	v4l2_std_id    		sid;
-
-	memset(&input2, 0, sizeof(input2));
-	input2.index = chan->channel;
-	err = drv(file, VIDIOC_ENUMINPUT, &input2);
-	if (err < 0) {
-		dprintk("VIDIOCGCHAN / VIDIOC_ENUMINPUT: "
-			"channel=%d err=%ld\n", chan->channel, err);
-		goto done;
-	}
-	chan->channel = input2.index;
-	memcpy(chan->name, input2.name,
-	       min(sizeof(chan->name), sizeof(input2.name)));
-	chan->name[sizeof(chan->name) - 1] = 0;
-	chan->tuners = (input2.type == V4L2_INPUT_TYPE_TUNER) ? 1 : 0;
-	chan->flags = (chan->tuners) ? VIDEO_VC_TUNER : 0;
-	switch (input2.type) {
-	case V4L2_INPUT_TYPE_TUNER:
-		chan->type = VIDEO_TYPE_TV;
-		break;
-	default:
-	case V4L2_INPUT_TYPE_CAMERA:
-		chan->type = VIDEO_TYPE_CAMERA;
-		break;
-	}
-	chan->norm = 0;
-	/* Note: G_STD might not be present for radio receivers,
-	 * so we should ignore any errors. */
-	if (drv(file, VIDIOC_G_STD, &sid) == 0) {
-		if (sid & V4L2_STD_PAL)
-			chan->norm = VIDEO_MODE_PAL;
-		if (sid & V4L2_STD_NTSC)
-			chan->norm = VIDEO_MODE_NTSC;
-		if (sid & V4L2_STD_SECAM)
-			chan->norm = VIDEO_MODE_SECAM;
-		if (sid == V4L2_STD_ALL)
-			chan->norm = VIDEO_MODE_AUTO;
-	}
-done:
-	return err;
-}
-
-static noinline long v4l1_compat_set_input(
-					struct video_channel *chan,
-					struct file *file,
-					v4l2_kioctl drv)
-{
-	long err;
-	v4l2_std_id sid = 0;
-
-	err = drv(file, VIDIOC_S_INPUT, &chan->channel);
-	if (err < 0)
-		dprintk("VIDIOCSCHAN / VIDIOC_S_INPUT: %ld\n", err);
-	switch (chan->norm) {
-	case VIDEO_MODE_PAL:
-		sid = V4L2_STD_PAL;
-		break;
-	case VIDEO_MODE_NTSC:
-		sid = V4L2_STD_NTSC;
-		break;
-	case VIDEO_MODE_SECAM:
-		sid = V4L2_STD_SECAM;
-		break;
-	case VIDEO_MODE_AUTO:
-		sid = V4L2_STD_ALL;
-		break;
-	}
-	if (0 != sid) {
-		err = drv(file, VIDIOC_S_STD, &sid);
-		if (err < 0)
-			dprintk("VIDIOCSCHAN / VIDIOC_S_STD: %ld\n", err);
-	}
-	return err;
-}
-
-static noinline long v4l1_compat_get_picture(
-					struct video_picture *pict,
-					struct file *file,
-					v4l2_kioctl drv)
-{
-	long err;
-	struct v4l2_format *fmt;
-
-	fmt = kzalloc(sizeof(*fmt), GFP_KERNEL);
-	if (!fmt) {
-		err = -ENOMEM;
-		return err;
-	}
-
-	pict->brightness = get_v4l_control(file,
-					   V4L2_CID_BRIGHTNESS, drv);
-	pict->hue = get_v4l_control(file,
-				    V4L2_CID_HUE, drv);
-	pict->contrast = get_v4l_control(file,
-					 V4L2_CID_CONTRAST, drv);
-	pict->colour = get_v4l_control(file,
-				       V4L2_CID_SATURATION, drv);
-	pict->whiteness = get_v4l_control(file,
-					  V4L2_CID_WHITENESS, drv);
-
-	fmt->type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
-	err = drv(file, VIDIOC_G_FMT, fmt);
-	if (err < 0) {
-		dprintk("VIDIOCGPICT / VIDIOC_G_FMT: %ld\n", err);
-		goto done;
-	}
-
-	if (fmt->fmt.pix.width)
-	{
-		pict->depth   = ((fmt->fmt.pix.bytesperline << 3)
-				 + (fmt->fmt.pix.width - 1))
-				 / fmt->fmt.pix.width;
-	} else {
-		err = -EINVAL;
-		goto done;
-	}
-
-	pict->palette = pixelformat_to_palette(
-		fmt->fmt.pix.pixelformat);
-done:
-	kfree(fmt);
-	return err;
-}
-
-static noinline long v4l1_compat_set_picture(
-					struct video_picture *pict,
-					struct file *file,
-					v4l2_kioctl drv)
-{
-	long err;
-	struct v4l2_framebuffer fbuf;
-	int mem_err = 0, ovl_err = 0;
-	struct v4l2_format *fmt;
-
-	fmt = kzalloc(sizeof(*fmt), GFP_KERNEL);
-	if (!fmt) {
-		err = -ENOMEM;
-		return err;
-	}
-	memset(&fbuf, 0, sizeof(fbuf));
-
-	set_v4l_control(file,
-			V4L2_CID_BRIGHTNESS, pict->brightness, drv);
-	set_v4l_control(file,
-			V4L2_CID_HUE, pict->hue, drv);
-	set_v4l_control(file,
-			V4L2_CID_CONTRAST, pict->contrast, drv);
-	set_v4l_control(file,
-			V4L2_CID_SATURATION, pict->colour, drv);
-	set_v4l_control(file,
-			V4L2_CID_WHITENESS, pict->whiteness, drv);
-	/*
-	 * V4L1 uses this ioctl to set both memory capture and overlay
-	 * pixel format, while V4L2 has two different ioctls for this.
-	 * Some cards may not support one or the other, and may support
-	 * different pixel formats for memory vs overlay.
-	 */
-
-	fmt->type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
-	err = drv(file, VIDIOC_G_FMT, fmt);
-	/* If VIDIOC_G_FMT failed, then the driver likely doesn't
-	   support memory capture.  Trying to set the memory capture
-	   parameters would be pointless.  */
-	if (err < 0) {
-		dprintk("VIDIOCSPICT / VIDIOC_G_FMT: %ld\n", err);
-		mem_err = -1000;  /* didn't even try */
-	} else if (fmt->fmt.pix.pixelformat !=
-		 palette_to_pixelformat(pict->palette)) {
-		fmt->fmt.pix.pixelformat = palette_to_pixelformat(
-			pict->palette);
-		mem_err = drv(file, VIDIOC_S_FMT, fmt);
-		if (mem_err < 0)
-			dprintk("VIDIOCSPICT / VIDIOC_S_FMT: %d\n",
-				mem_err);
-	}
-
-	err = drv(file, VIDIOC_G_FBUF, &fbuf);
-	/* If VIDIOC_G_FBUF failed, then the driver likely doesn't
-	   support overlay.  Trying to set the overlay parameters
-	   would be quite pointless.  */
-	if (err < 0) {
-		dprintk("VIDIOCSPICT / VIDIOC_G_FBUF: %ld\n", err);
-		ovl_err = -1000;  /* didn't even try */
-	} else if (fbuf.fmt.pixelformat !=
-		 palette_to_pixelformat(pict->palette)) {
-		fbuf.fmt.pixelformat = palette_to_pixelformat(
-			pict->palette);
-		ovl_err = drv(file, VIDIOC_S_FBUF, &fbuf);
-		if (ovl_err < 0)
-			dprintk("VIDIOCSPICT / VIDIOC_S_FBUF: %d\n",
-				ovl_err);
-	}
-	if (ovl_err < 0 && mem_err < 0) {
-		/* ioctl failed, couldn't set either parameter */
-		if (mem_err != -1000)
-			err = mem_err;
-		else if (ovl_err == -EPERM)
-			err = 0;
-		else
-			err = ovl_err;
-	} else
-		err = 0;
-	kfree(fmt);
-	return err;
-}
-
-static noinline long v4l1_compat_get_tuner(
-					struct video_tuner *tun,
-					struct file *file,
-					v4l2_kioctl drv)
-{
-	long err;
-	int i;
-	struct v4l2_tuner	tun2;
-	struct v4l2_standard	std2;
-	v4l2_std_id    		sid;
-
-	memset(&tun2, 0, sizeof(tun2));
-	err = drv(file, VIDIOC_G_TUNER, &tun2);
-	if (err < 0) {
-		dprintk("VIDIOCGTUNER / VIDIOC_G_TUNER: %ld\n", err);
-		goto done;
-	}
-	memcpy(tun->name, tun2.name,
-	       min(sizeof(tun->name), sizeof(tun2.name)));
-	tun->name[sizeof(tun->name) - 1] = 0;
-	tun->rangelow = tun2.rangelow;
-	tun->rangehigh = tun2.rangehigh;
-	tun->flags = 0;
-	tun->mode = VIDEO_MODE_AUTO;
-
-	for (i = 0; i < 64; i++) {
-		memset(&std2, 0, sizeof(std2));
-		std2.index = i;
-		if (0 != drv(file, VIDIOC_ENUMSTD, &std2))
-			break;
-		if (std2.id & V4L2_STD_PAL)
-			tun->flags |= VIDEO_TUNER_PAL;
-		if (std2.id & V4L2_STD_NTSC)
-			tun->flags |= VIDEO_TUNER_NTSC;
-		if (std2.id & V4L2_STD_SECAM)
-			tun->flags |= VIDEO_TUNER_SECAM;
-	}
-
-	/* Note: G_STD might not be present for radio receivers,
-	 * so we should ignore any errors. */
-	if (drv(file, VIDIOC_G_STD, &sid) == 0) {
-		if (sid & V4L2_STD_PAL)
-			tun->mode = VIDEO_MODE_PAL;
-		if (sid & V4L2_STD_NTSC)
-			tun->mode = VIDEO_MODE_NTSC;
-		if (sid & V4L2_STD_SECAM)
-			tun->mode = VIDEO_MODE_SECAM;
-	}
-
-	if (tun2.capability & V4L2_TUNER_CAP_LOW)
-		tun->flags |= VIDEO_TUNER_LOW;
-	if (tun2.rxsubchans & V4L2_TUNER_SUB_STEREO)
-		tun->flags |= VIDEO_TUNER_STEREO_ON;
-	tun->signal = tun2.signal;
-done:
-	return err;
-}
-
-static noinline long v4l1_compat_select_tuner(
-					struct video_tuner *tun,
-					struct file *file,
-					v4l2_kioctl drv)
-{
-	long err;
-	struct v4l2_tuner	t;/*84 bytes on x86_64*/
-	memset(&t, 0, sizeof(t));
-
-	t.index = tun->tuner;
-
-	err = drv(file, VIDIOC_S_TUNER, &t);
-	if (err < 0)
-		dprintk("VIDIOCSTUNER / VIDIOC_S_TUNER: %ld\n", err);
-	return err;
-}
-
-static noinline long v4l1_compat_get_frequency(
-					unsigned long *freq,
-					struct file *file,
-					v4l2_kioctl drv)
-{
-	long err;
-	struct v4l2_frequency   freq2;
-	memset(&freq2, 0, sizeof(freq2));
-
-	freq2.tuner = 0;
-	err = drv(file, VIDIOC_G_FREQUENCY, &freq2);
-	if (err < 0)
-		dprintk("VIDIOCGFREQ / VIDIOC_G_FREQUENCY: %ld\n", err);
-	if (0 == err)
-		*freq = freq2.frequency;
-	return err;
-}
-
-static noinline long v4l1_compat_set_frequency(
-					unsigned long *freq,
-					struct file *file,
-					v4l2_kioctl drv)
-{
-	long err;
-	struct v4l2_frequency   freq2;
-	memset(&freq2, 0, sizeof(freq2));
-
-	drv(file, VIDIOC_G_FREQUENCY, &freq2);
-	freq2.frequency = *freq;
-	err = drv(file, VIDIOC_S_FREQUENCY, &freq2);
-	if (err < 0)
-		dprintk("VIDIOCSFREQ / VIDIOC_S_FREQUENCY: %ld\n", err);
-	return err;
-}
-
-static noinline long v4l1_compat_get_audio(
-					struct video_audio *aud,
-					struct file *file,
-					v4l2_kioctl drv)
-{
-	long err;
-	int i;
-	struct v4l2_queryctrl	qctrl2;
-	struct v4l2_audio	aud2;
-	struct v4l2_tuner	tun2;
-	memset(&aud2, 0, sizeof(aud2));
-
-	err = drv(file, VIDIOC_G_AUDIO, &aud2);
-	if (err < 0) {
-		dprintk("VIDIOCGAUDIO / VIDIOC_G_AUDIO: %ld\n", err);
-		goto done;
-	}
-	memcpy(aud->name, aud2.name,
-	       min(sizeof(aud->name), sizeof(aud2.name)));
-	aud->name[sizeof(aud->name) - 1] = 0;
-	aud->audio = aud2.index;
-	aud->flags = 0;
-	i = get_v4l_control(file, V4L2_CID_AUDIO_VOLUME, drv);
-	if (i >= 0) {
-		aud->volume = i;
-		aud->flags |= VIDEO_AUDIO_VOLUME;
-	}
-	i = get_v4l_control(file, V4L2_CID_AUDIO_BASS, drv);
-	if (i >= 0) {
-		aud->bass = i;
-		aud->flags |= VIDEO_AUDIO_BASS;
-	}
-	i = get_v4l_control(file, V4L2_CID_AUDIO_TREBLE, drv);
-	if (i >= 0) {
-		aud->treble = i;
-		aud->flags |= VIDEO_AUDIO_TREBLE;
-	}
-	i = get_v4l_control(file, V4L2_CID_AUDIO_BALANCE, drv);
-	if (i >= 0) {
-		aud->balance = i;
-		aud->flags |= VIDEO_AUDIO_BALANCE;
-	}
-	i = get_v4l_control(file, V4L2_CID_AUDIO_MUTE, drv);
-	if (i >= 0) {
-		if (i)
-			aud->flags |= VIDEO_AUDIO_MUTE;
-		aud->flags |= VIDEO_AUDIO_MUTABLE;
-	}
-	aud->step = 1;
-	qctrl2.id = V4L2_CID_AUDIO_VOLUME;
-	if (drv(file, VIDIOC_QUERYCTRL, &qctrl2) == 0 &&
-	    !(qctrl2.flags & V4L2_CTRL_FLAG_DISABLED))
-		aud->step = qctrl2.step;
-	aud->mode = 0;
-
-	memset(&tun2, 0, sizeof(tun2));
-	err = drv(file, VIDIOC_G_TUNER, &tun2);
-	if (err < 0) {
-		dprintk("VIDIOCGAUDIO / VIDIOC_G_TUNER: %ld\n", err);
-		err = 0;
-		goto done;
-	}
-
-	if (tun2.rxsubchans & V4L2_TUNER_SUB_LANG2)
-		aud->mode = VIDEO_SOUND_LANG1 | VIDEO_SOUND_LANG2;
-	else if (tun2.rxsubchans & V4L2_TUNER_SUB_STEREO)
-		aud->mode = VIDEO_SOUND_STEREO;
-	else if (tun2.rxsubchans & V4L2_TUNER_SUB_MONO)
-		aud->mode = VIDEO_SOUND_MONO;
-done:
-	return err;
-}
-
-static noinline long v4l1_compat_set_audio(
-					struct video_audio *aud,
-					struct file *file,
-					v4l2_kioctl drv)
-{
-	long err;
-	struct v4l2_audio	aud2;
-	struct v4l2_tuner	tun2;
-
-	memset(&aud2, 0, sizeof(aud2));
-	memset(&tun2, 0, sizeof(tun2));
-
-	aud2.index = aud->audio;
-	err = drv(file, VIDIOC_S_AUDIO, &aud2);
-	if (err < 0) {
-		dprintk("VIDIOCSAUDIO / VIDIOC_S_AUDIO: %ld\n", err);
-		goto done;
-	}
-
-	set_v4l_control(file, V4L2_CID_AUDIO_VOLUME,
-			aud->volume, drv);
-	set_v4l_control(file, V4L2_CID_AUDIO_BASS,
-			aud->bass, drv);
-	set_v4l_control(file, V4L2_CID_AUDIO_TREBLE,
-			aud->treble, drv);
-	set_v4l_control(file, V4L2_CID_AUDIO_BALANCE,
-			aud->balance, drv);
-	set_v4l_control(file, V4L2_CID_AUDIO_MUTE,
-			!!(aud->flags & VIDEO_AUDIO_MUTE), drv);
-
-	err = drv(file, VIDIOC_G_TUNER, &tun2);
-	if (err < 0)
-		dprintk("VIDIOCSAUDIO / VIDIOC_G_TUNER: %ld\n", err);
-	if (err == 0) {
-		switch (aud->mode) {
-		default:
-		case VIDEO_SOUND_MONO:
-		case VIDEO_SOUND_LANG1:
-			tun2.audmode = V4L2_TUNER_MODE_MONO;
-			break;
-		case VIDEO_SOUND_STEREO:
-			tun2.audmode = V4L2_TUNER_MODE_STEREO;
-			break;
-		case VIDEO_SOUND_LANG2:
-			tun2.audmode = V4L2_TUNER_MODE_LANG2;
-			break;
-		}
-		err = drv(file, VIDIOC_S_TUNER, &tun2);
-		if (err < 0)
-			dprintk("VIDIOCSAUDIO / VIDIOC_S_TUNER: %ld\n", err);
-	}
-	err = 0;
-done:
-	return err;
-}
-
-static noinline long v4l1_compat_capture_frame(
-					struct video_mmap *mm,
-					struct file *file,
-					v4l2_kioctl drv)
-{
-	long err;
-	enum v4l2_buf_type      captype = V4L2_BUF_TYPE_VIDEO_CAPTURE;
-	struct v4l2_buffer	buf;
-	struct v4l2_format	*fmt;
-
-	fmt = kzalloc(sizeof(*fmt), GFP_KERNEL);
-	if (!fmt) {
-		err = -ENOMEM;
-		return err;
-	}
-	memset(&buf, 0, sizeof(buf));
-
-	fmt->type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
-	err = drv(file, VIDIOC_G_FMT, fmt);
-	if (err < 0) {
-		dprintk("VIDIOCMCAPTURE / VIDIOC_G_FMT: %ld\n", err);
-		goto done;
-	}
-	if (mm->width   != fmt->fmt.pix.width  ||
-	    mm->height  != fmt->fmt.pix.height ||
-	    palette_to_pixelformat(mm->format) !=
-	    fmt->fmt.pix.pixelformat) {
-		/* New capture format...  */
-		fmt->fmt.pix.width = mm->width;
-		fmt->fmt.pix.height = mm->height;
-		fmt->fmt.pix.pixelformat =
-			palette_to_pixelformat(mm->format);
-		fmt->fmt.pix.field = V4L2_FIELD_ANY;
-		fmt->fmt.pix.bytesperline = 0;
-		err = drv(file, VIDIOC_S_FMT, fmt);
-		if (err < 0) {
-			dprintk("VIDIOCMCAPTURE / VIDIOC_S_FMT: %ld\n", err);
-			goto done;
-		}
-	}
-	buf.index = mm->frame;
-	buf.type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
-	err = drv(file, VIDIOC_QUERYBUF, &buf);
-	if (err < 0) {
-		dprintk("VIDIOCMCAPTURE / VIDIOC_QUERYBUF: %ld\n", err);
-		goto done;
-	}
-	err = drv(file, VIDIOC_QBUF, &buf);
-	if (err < 0) {
-		dprintk("VIDIOCMCAPTURE / VIDIOC_QBUF: %ld\n", err);
-		goto done;
-	}
-	err = drv(file, VIDIOC_STREAMON, &captype);
-	if (err < 0)
-		dprintk("VIDIOCMCAPTURE / VIDIOC_STREAMON: %ld\n", err);
-done:
-	kfree(fmt);
-	return err;
-}
-
-static noinline long v4l1_compat_sync(
-				int *i,
-				struct file *file,
-				v4l2_kioctl drv)
-{
-	long err;
-	enum v4l2_buf_type captype = V4L2_BUF_TYPE_VIDEO_CAPTURE;
-	struct v4l2_buffer buf;
-	struct poll_wqueues *pwq;
-
-	memset(&buf, 0, sizeof(buf));
-	buf.index = *i;
-	buf.type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
-	err = drv(file, VIDIOC_QUERYBUF, &buf);
-	if (err < 0) {
-		/*  No such buffer */
-		dprintk("VIDIOCSYNC / VIDIOC_QUERYBUF: %ld\n", err);
-		goto done;
-	}
-	if (!(buf.flags & V4L2_BUF_FLAG_MAPPED)) {
-		/* Buffer is not mapped  */
-		err = -EINVAL;
-		goto done;
-	}
-
-	/* make sure capture actually runs so we don't block forever */
-	err = drv(file, VIDIOC_STREAMON, &captype);
-	if (err < 0) {
-		dprintk("VIDIOCSYNC / VIDIOC_STREAMON: %ld\n", err);
-		goto done;
-	}
-
-	pwq = kmalloc(sizeof(*pwq), GFP_KERNEL);
-	/*  Loop as long as the buffer is queued, but not done  */
-	while ((buf.flags & (V4L2_BUF_FLAG_QUEUED | V4L2_BUF_FLAG_DONE))
-						== V4L2_BUF_FLAG_QUEUED) {
-		err = poll_one(file, pwq);
-		if (err < 0 ||	/* error or sleep was interrupted  */
-		    err == 0)	/* timeout? Shouldn't occur.  */
-			break;
-		err = drv(file, VIDIOC_QUERYBUF, &buf);
-		if (err < 0)
-			dprintk("VIDIOCSYNC / VIDIOC_QUERYBUF: %ld\n", err);
-	}
-	kfree(pwq);
-	if (!(buf.flags & V4L2_BUF_FLAG_DONE)) /* not done */
-		goto done;
-	do {
-		err = drv(file, VIDIOC_DQBUF, &buf);
-		if (err < 0)
-			dprintk("VIDIOCSYNC / VIDIOC_DQBUF: %ld\n", err);
-	} while (err == 0 && buf.index != *i);
-done:
-	return err;
-}
-
-static noinline long v4l1_compat_get_vbi_format(
-				struct vbi_format *fmt,
-				struct file *file,
-				v4l2_kioctl drv)
-{
-	long err;
-	struct v4l2_format *fmt2;
-
-	fmt2 = kzalloc(sizeof(*fmt2), GFP_KERNEL);
-	if (!fmt2) {
-		err = -ENOMEM;
-		return err;
-	}
-	fmt2->type = V4L2_BUF_TYPE_VBI_CAPTURE;
-
-	err = drv(file, VIDIOC_G_FMT, fmt2);
-	if (err < 0) {
-		dprintk("VIDIOCGVBIFMT / VIDIOC_G_FMT: %ld\n", err);
-		goto done;
-	}
-	if (fmt2->fmt.vbi.sample_format != V4L2_PIX_FMT_GREY) {
-		err = -EINVAL;
-		goto done;
-	}
-	memset(fmt, 0, sizeof(*fmt));
-	fmt->samples_per_line = fmt2->fmt.vbi.samples_per_line;
-	fmt->sampling_rate    = fmt2->fmt.vbi.sampling_rate;
-	fmt->sample_format    = VIDEO_PALETTE_RAW;
-	fmt->start[0]         = fmt2->fmt.vbi.start[0];
-	fmt->count[0]         = fmt2->fmt.vbi.count[0];
-	fmt->start[1]         = fmt2->fmt.vbi.start[1];
-	fmt->count[1]         = fmt2->fmt.vbi.count[1];
-	fmt->flags            = fmt2->fmt.vbi.flags & 0x03;
-done:
-	kfree(fmt2);
-	return err;
-}
-
-static noinline long v4l1_compat_set_vbi_format(
-				struct vbi_format *fmt,
-				struct file *file,
-				v4l2_kioctl drv)
-{
-	long err;
-	struct v4l2_format	*fmt2 = NULL;
-
-	if (VIDEO_PALETTE_RAW != fmt->sample_format) {
-		err = -EINVAL;
-		return err;
-	}
-
-	fmt2 = kzalloc(sizeof(*fmt2), GFP_KERNEL);
-	if (!fmt2) {
-		err = -ENOMEM;
-		return err;
-	}
-	fmt2->type = V4L2_BUF_TYPE_VBI_CAPTURE;
-	fmt2->fmt.vbi.samples_per_line = fmt->samples_per_line;
-	fmt2->fmt.vbi.sampling_rate    = fmt->sampling_rate;
-	fmt2->fmt.vbi.sample_format    = V4L2_PIX_FMT_GREY;
-	fmt2->fmt.vbi.start[0]         = fmt->start[0];
-	fmt2->fmt.vbi.count[0]         = fmt->count[0];
-	fmt2->fmt.vbi.start[1]         = fmt->start[1];
-	fmt2->fmt.vbi.count[1]         = fmt->count[1];
-	fmt2->fmt.vbi.flags            = fmt->flags;
-	err = drv(file, VIDIOC_TRY_FMT, fmt2);
-	if (err < 0) {
-		dprintk("VIDIOCSVBIFMT / VIDIOC_TRY_FMT: %ld\n", err);
-		goto done;
-	}
-
-	if (fmt2->fmt.vbi.samples_per_line != fmt->samples_per_line ||
-	    fmt2->fmt.vbi.sampling_rate    != fmt->sampling_rate    ||
-	    fmt2->fmt.vbi.sample_format    != V4L2_PIX_FMT_GREY     ||
-	    fmt2->fmt.vbi.start[0]         != fmt->start[0]         ||
-	    fmt2->fmt.vbi.count[0]         != fmt->count[0]         ||
-	    fmt2->fmt.vbi.start[1]         != fmt->start[1]         ||
-	    fmt2->fmt.vbi.count[1]         != fmt->count[1]         ||
-	    fmt2->fmt.vbi.flags            != fmt->flags) {
-		err = -EINVAL;
-		goto done;
-	}
-	err = drv(file, VIDIOC_S_FMT, fmt2);
-	if (err < 0)
-		dprintk("VIDIOCSVBIFMT / VIDIOC_S_FMT: %ld\n", err);
-done:
-	kfree(fmt2);
-	return err;
-}
-
-/*
- *	This function is exported.
- */
-long
-v4l_compat_translate_ioctl(struct file		*file,
-			   int			cmd,
-			   void			*arg,
-			   v4l2_kioctl          drv)
-{
-	long err;
-
-	switch (cmd) {
-	case VIDIOCGCAP:	/* capability */
-		err = v4l1_compat_get_capabilities(arg, file, drv);
-		break;
-	case VIDIOCGFBUF: /*  get frame buffer  */
-		err = v4l1_compat_get_frame_buffer(arg, file, drv);
-		break;
-	case VIDIOCSFBUF: /*  set frame buffer  */
-		err = v4l1_compat_set_frame_buffer(arg, file, drv);
-		break;
-	case VIDIOCGWIN: /*  get window or capture dimensions  */
-		err = v4l1_compat_get_win_cap_dimensions(arg, file, drv);
-		break;
-	case VIDIOCSWIN: /*  set window and/or capture dimensions  */
-		err = v4l1_compat_set_win_cap_dimensions(arg, file, drv);
-		break;
-	case VIDIOCCAPTURE: /*  turn on/off preview  */
-		err = v4l1_compat_turn_preview_on_off(arg, file, drv);
-		break;
-	case VIDIOCGCHAN: /*  get input information  */
-		err = v4l1_compat_get_input_info(arg, file, drv);
-		break;
-	case VIDIOCSCHAN: /*  set input  */
-		err = v4l1_compat_set_input(arg, file, drv);
-		break;
-	case VIDIOCGPICT: /*  get tone controls & partial capture format  */
-		err = v4l1_compat_get_picture(arg, file, drv);
-		break;
-	case VIDIOCSPICT: /*  set tone controls & partial capture format  */
-		err = v4l1_compat_set_picture(arg, file, drv);
-		break;
-	case VIDIOCGTUNER: /*  get tuner information  */
-		err = v4l1_compat_get_tuner(arg, file, drv);
-		break;
-	case VIDIOCSTUNER: /*  select a tuner input  */
-		err = v4l1_compat_select_tuner(arg, file, drv);
-		break;
-	case VIDIOCGFREQ: /*  get frequency  */
-		err = v4l1_compat_get_frequency(arg, file, drv);
-		break;
-	case VIDIOCSFREQ: /*  set frequency  */
-		err = v4l1_compat_set_frequency(arg, file, drv);
-		break;
-	case VIDIOCGAUDIO: /*  get audio properties/controls  */
-		err = v4l1_compat_get_audio(arg, file, drv);
-		break;
-	case VIDIOCSAUDIO: /*  set audio controls  */
-		err = v4l1_compat_set_audio(arg, file, drv);
-		break;
-	case VIDIOCMCAPTURE: /*  capture a frame  */
-		err = v4l1_compat_capture_frame(arg, file, drv);
-		break;
-	case VIDIOCSYNC: /*  wait for a frame  */
-		err = v4l1_compat_sync(arg, file, drv);
-		break;
-	case VIDIOCGVBIFMT: /* query VBI data capture format */
-		err = v4l1_compat_get_vbi_format(arg, file, drv);
-		break;
-	case VIDIOCSVBIFMT:
-		err = v4l1_compat_set_vbi_format(arg, file, drv);
-		break;
-	default:
-		err = -ENOIOCTLCMD;
-		break;
-	}
-
-	return err;
-}
-EXPORT_SYMBOL(v4l_compat_translate_ioctl);
-
-/*
- * Local variables:
- * c-basic-offset: 8
- * End:
- */
diff --git a/drivers/media/video/v4l2-compat-ioctl32.c b/drivers/media/video/v4l2-compat-ioctl32.c
index e30e8dfb6205..425ac1e4a00e 100644
--- a/drivers/media/video/v4l2-compat-ioctl32.c
+++ b/drivers/media/video/v4l2-compat-ioctl32.c
@@ -22,212 +22,6 @@
 
 #ifdef CONFIG_COMPAT
 
-#ifdef CONFIG_VIDEO_V4L1_COMPAT
-struct video_tuner32 {
-	compat_int_t tuner;
-	char name[32];
-	compat_ulong_t rangelow, rangehigh;
-	u32 flags;	/* It is really u32 in videodev.h */
-	u16 mode, signal;
-};
-
-static int get_video_tuner32(struct video_tuner *kp, struct video_tuner32 __user *up)
-{
-	if (!access_ok(VERIFY_READ, up, sizeof(struct video_tuner32)) ||
-		get_user(kp->tuner, &up->tuner) ||
-		copy_from_user(kp->name, up->name, 32) ||
-		get_user(kp->rangelow, &up->rangelow) ||
-		get_user(kp->rangehigh, &up->rangehigh) ||
-		get_user(kp->flags, &up->flags) ||
-		get_user(kp->mode, &up->mode) ||
-		get_user(kp->signal, &up->signal))
-		return -EFAULT;
-	return 0;
-}
-
-static int put_video_tuner32(struct video_tuner *kp, struct video_tuner32 __user *up)
-{
-	if (!access_ok(VERIFY_WRITE, up, sizeof(struct video_tuner32)) ||
-		put_user(kp->tuner, &up->tuner) ||
-		copy_to_user(up->name, kp->name, 32) ||
-		put_user(kp->rangelow, &up->rangelow) ||
-		put_user(kp->rangehigh, &up->rangehigh) ||
-		put_user(kp->flags, &up->flags) ||
-		put_user(kp->mode, &up->mode) ||
-		put_user(kp->signal, &up->signal))
-			return -EFAULT;
-	return 0;
-}
-
-struct video_buffer32 {
-	compat_caddr_t base;
-	compat_int_t height, width, depth, bytesperline;
-};
-
-static int get_video_buffer32(struct video_buffer *kp, struct video_buffer32 __user *up)
-{
-	u32 tmp;
-
-	if (!access_ok(VERIFY_READ, up, sizeof(struct video_buffer32)) ||
-		get_user(tmp, &up->base) ||
-		get_user(kp->height, &up->height) ||
-		get_user(kp->width, &up->width) ||
-		get_user(kp->depth, &up->depth) ||
-		get_user(kp->bytesperline, &up->bytesperline))
-			return -EFAULT;
-
-	/* This is actually a physical address stored
-	 * as a void pointer.
-	 */
-	kp->base = (void *)(unsigned long) tmp;
-
-	return 0;
-}
-
-static int put_video_buffer32(struct video_buffer *kp, struct video_buffer32 __user *up)
-{
-	u32 tmp = (u32)((unsigned long)kp->base);
-
-	if (!access_ok(VERIFY_WRITE, up, sizeof(struct video_buffer32)) ||
-		put_user(tmp, &up->base) ||
-		put_user(kp->height, &up->height) ||
-		put_user(kp->width, &up->width) ||
-		put_user(kp->depth, &up->depth) ||
-		put_user(kp->bytesperline, &up->bytesperline))
-			return -EFAULT;
-	return 0;
-}
-
-struct video_clip32 {
-	s32 x, y, width, height;	/* It's really s32 in videodev.h */
-	compat_caddr_t next;
-};
-
-struct video_window32 {
-	u32 x, y, width, height, chromakey, flags;
-	compat_caddr_t clips;
-	compat_int_t clipcount;
-};
-
-static int get_video_window32(struct video_window *kp, struct video_window32 __user *up)
-{
-	struct video_clip __user *uclips;
-	struct video_clip __user *kclips;
-	compat_caddr_t p;
-	int nclips;
-
-	if (!access_ok(VERIFY_READ, up, sizeof(struct video_window32)))
-		return -EFAULT;
-
-	if (get_user(nclips, &up->clipcount))
-		return -EFAULT;
-
-	if (!access_ok(VERIFY_READ, up, sizeof(struct video_window32)) ||
-	    get_user(kp->x, &up->x) ||
-	    get_user(kp->y, &up->y) ||
-	    get_user(kp->width, &up->width) ||
-	    get_user(kp->height, &up->height) ||
-	    get_user(kp->chromakey, &up->chromakey) ||
-	    get_user(kp->flags, &up->flags) ||
-	    get_user(kp->clipcount, &up->clipcount))
-		return -EFAULT;
-
-	nclips = kp->clipcount;
-	kp->clips = NULL;
-
-	if (nclips == 0)
-		return 0;
-	if (get_user(p, &up->clips))
-		return -EFAULT;
-	uclips = compat_ptr(p);
-
-	/* If nclips < 0, then it is a clipping bitmap of size
-	   VIDEO_CLIPMAP_SIZE */
-	if (nclips < 0) {
-		if (!access_ok(VERIFY_READ, uclips, VIDEO_CLIPMAP_SIZE))
-			return -EFAULT;
-		kp->clips = compat_alloc_user_space(VIDEO_CLIPMAP_SIZE);
-		if (copy_in_user(kp->clips, uclips, VIDEO_CLIPMAP_SIZE))
-			return -EFAULT;
-		return 0;
-	}
-
-	/* Otherwise it is an array of video_clip structs. */
-	if (!access_ok(VERIFY_READ, uclips, nclips * sizeof(struct video_clip)))
-		return -EFAULT;
-
-	kp->clips = compat_alloc_user_space(nclips * sizeof(struct video_clip));
-	kclips = kp->clips;
-	while (nclips--) {
-		int err;
-
-		err = copy_in_user(&kclips->x, &uclips->x, sizeof(kclips->x));
-		err |= copy_in_user(&kclips->y, &uclips->y, sizeof(kclips->y));
-		err |= copy_in_user(&kclips->width, &uclips->width, sizeof(kclips->width));
-		err |= copy_in_user(&kclips->height, &uclips->height, sizeof(kclips->height));
-		kclips->next = NULL;
-		if (err)
-			return -EFAULT;
-		kclips++;
-		uclips++;
-	}
-	return 0;
-}
-
-/* You get back everything except the clips... */
-static int put_video_window32(struct video_window *kp, struct video_window32 __user *up)
-{
-	if (!access_ok(VERIFY_WRITE, up, sizeof(struct video_window32)) ||
-		put_user(kp->x, &up->x) ||
-		put_user(kp->y, &up->y) ||
-		put_user(kp->width, &up->width) ||
-		put_user(kp->height, &up->height) ||
-		put_user(kp->chromakey, &up->chromakey) ||
-		put_user(kp->flags, &up->flags) ||
-		put_user(kp->clipcount, &up->clipcount))
-			return -EFAULT;
-	return 0;
-}
-
-struct video_code32 {
-	char		loadwhat[16];	/* name or tag of file being passed */
-	compat_int_t	datasize;
-	compat_uptr_t	data;
-};
-
-static struct video_code __user *get_microcode32(struct video_code32 *kp)
-{
-	struct video_code __user *up;
-
-	up = compat_alloc_user_space(sizeof(*up));
-
-	/*
-	 * NOTE! We don't actually care if these fail. If the
-	 * user address is invalid, the native ioctl will do
-	 * the error handling for us
-	 */
-	(void) copy_to_user(up->loadwhat, kp->loadwhat, sizeof(up->loadwhat));
-	(void) put_user(kp->datasize, &up->datasize);
-	(void) put_user(compat_ptr(kp->data), &up->data);
-	return up;
-}
-
-#define VIDIOCGTUNER32		_IOWR('v', 4, struct video_tuner32)
-#define VIDIOCSTUNER32		_IOW('v', 5, struct video_tuner32)
-#define VIDIOCGWIN32		_IOR('v', 9, struct video_window32)
-#define VIDIOCSWIN32		_IOW('v', 10, struct video_window32)
-#define VIDIOCGFBUF32		_IOR('v', 11, struct video_buffer32)
-#define VIDIOCSFBUF32		_IOW('v', 12, struct video_buffer32)
-#define VIDIOCGFREQ32		_IOR('v', 14, u32)
-#define VIDIOCSFREQ32		_IOW('v', 15, u32)
-#define VIDIOCSMICROCODE32	_IOW('v', 27, struct video_code32)
-
-#define VIDIOCCAPTURE32		_IOW('v', 8, s32)
-#define VIDIOCSYNC32		_IOW('v', 18, s32)
-#define VIDIOCSWRITEMODE32	_IOW('v', 25, s32)
-
-#endif
-
 static long native_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 {
 	long ret = -ENOIOCTLCMD;
@@ -741,13 +535,6 @@ static int put_v4l2_ext_controls32(struct v4l2_ext_controls *kp, struct v4l2_ext
 static long do_video_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 {
 	union {
-#ifdef CONFIG_VIDEO_V4L1_COMPAT
-		struct video_tuner vt;
-		struct video_buffer vb;
-		struct video_window vw;
-		struct video_code32 vc;
-		struct video_audio va;
-#endif
 		struct v4l2_format v2f;
 		struct v4l2_buffer v2b;
 		struct v4l2_framebuffer v2fb;
@@ -763,17 +550,6 @@ static long do_video_ioctl(struct file *file, unsigned int cmd, unsigned long ar
 
 	/* First, convert the command. */
 	switch (cmd) {
-#ifdef CONFIG_VIDEO_V4L1_COMPAT
-	case VIDIOCGTUNER32: cmd = VIDIOCGTUNER; break;
-	case VIDIOCSTUNER32: cmd = VIDIOCSTUNER; break;
-	case VIDIOCGWIN32: cmd = VIDIOCGWIN; break;
-	case VIDIOCSWIN32: cmd = VIDIOCSWIN; break;
-	case VIDIOCGFBUF32: cmd = VIDIOCGFBUF; break;
-	case VIDIOCSFBUF32: cmd = VIDIOCSFBUF; break;
-	case VIDIOCGFREQ32: cmd = VIDIOCGFREQ; break;
-	case VIDIOCSFREQ32: cmd = VIDIOCSFREQ; break;
-	case VIDIOCSMICROCODE32: cmd = VIDIOCSMICROCODE; break;
-#endif
 	case VIDIOC_G_FMT32: cmd = VIDIOC_G_FMT; break;
 	case VIDIOC_S_FMT32: cmd = VIDIOC_S_FMT; break;
 	case VIDIOC_QUERYBUF32: cmd = VIDIOC_QUERYBUF; break;
@@ -800,46 +576,6 @@ static long do_video_ioctl(struct file *file, unsigned int cmd, unsigned long ar
 	}
 
 	switch (cmd) {
-#ifdef CONFIG_VIDEO_V4L1_COMPAT
-	case VIDIOCSTUNER:
-	case VIDIOCGTUNER:
-		err = get_video_tuner32(&karg.vt, up);
-		compatible_arg = 0;
-		break;
-
-	case VIDIOCSFBUF:
-		err = get_video_buffer32(&karg.vb, up);
-		compatible_arg = 0;
-		break;
-
-	case VIDIOCSWIN:
-		err = get_video_window32(&karg.vw, up);
-		compatible_arg = 0;
-		break;
-
-	case VIDIOCGWIN:
-	case VIDIOCGFBUF:
-	case VIDIOCGFREQ:
-		compatible_arg = 0;
-		break;
-
-	case VIDIOCSMICROCODE:
-		/* Copy the 32-bit "video_code32" to kernel space */
-		if (copy_from_user(&karg.vc, up, sizeof(karg.vc)))
-			return -EFAULT;
-		/* Convert the 32-bit version to a 64-bit version in user space */
-		up = get_microcode32(&karg.vc);
-		break;
-
-	case VIDIOCSFREQ:
-		err = get_user(karg.vx, (u32 __user *)up);
-		compatible_arg = 0;
-		break;
-
-	case VIDIOCCAPTURE:
-	case VIDIOCSYNC:
-	case VIDIOCSWRITEMODE:
-#endif
 	case VIDIOC_OVERLAY:
 	case VIDIOC_STREAMON:
 	case VIDIOC_STREAMOFF:
@@ -922,23 +658,6 @@ static long do_video_ioctl(struct file *file, unsigned int cmd, unsigned long ar
 		return err;
 
 	switch (cmd) {
-#ifdef CONFIG_VIDEO_V4L1_COMPAT
-	case VIDIOCGTUNER:
-		err = put_video_tuner32(&karg.vt, up);
-		break;
-
-	case VIDIOCGWIN:
-		err = put_video_window32(&karg.vw, up);
-		break;
-
-	case VIDIOCGFBUF:
-		err = put_video_buffer32(&karg.vb, up);
-		break;
-
-	case VIDIOCGFREQ:
-		err = put_user(((u32)karg.vx), (u32 __user *)up);
-		break;
-#endif
 	case VIDIOC_S_INPUT:
 	case VIDIOC_S_OUTPUT:
 	case VIDIOC_G_INPUT:
@@ -981,37 +700,6 @@ long v4l2_compat_ioctl32(struct file *file, unsigned int cmd, unsigned long arg)
 		return ret;
 
 	switch (cmd) {
-#ifdef CONFIG_VIDEO_V4L1_COMPAT
-	case VIDIOCGCAP:
-	case VIDIOCGCHAN:
-	case VIDIOCSCHAN:
-	case VIDIOCGTUNER32:
-	case VIDIOCSTUNER32:
-	case VIDIOCGPICT:
-	case VIDIOCSPICT:
-	case VIDIOCCAPTURE32:
-	case VIDIOCGWIN32:
-	case VIDIOCSWIN32:
-	case VIDIOCGFBUF32:
-	case VIDIOCSFBUF32:
-	case VIDIOCKEY:
-	case VIDIOCGFREQ32:
-	case VIDIOCSFREQ32:
-	case VIDIOCGAUDIO:
-	case VIDIOCSAUDIO:
-	case VIDIOCSYNC32:
-	case VIDIOCMCAPTURE:
-	case VIDIOCGMBUF:
-	case VIDIOCGUNIT:
-	case VIDIOCGCAPTURE:
-	case VIDIOCSCAPTURE:
-	case VIDIOCSPLAYMODE:
-	case VIDIOCSWRITEMODE32:
-	case VIDIOCGPLAYINFO:
-	case VIDIOCSMICROCODE32:
-	case VIDIOCGVBIFMT:
-	case VIDIOCSVBIFMT:
-#endif
 #ifdef __OLD_VIDIOC_
 	case VIDIOC_OVERLAY32_OLD:
 	case VIDIOC_S_PARM_OLD:
@@ -1096,19 +784,6 @@ long v4l2_compat_ioctl32(struct file *file, unsigned int cmd, unsigned long arg)
 		ret = do_video_ioctl(file, cmd, arg);
 		break;
 
-#ifdef CONFIG_VIDEO_V4L1_COMPAT
-	/* BTTV specific... */
-	case _IOW('v',  BASE_VIDIOCPRIVATE+0, char [256]):
-	case _IOR('v',  BASE_VIDIOCPRIVATE+1, char [256]):
-	case _IOR('v' , BASE_VIDIOCPRIVATE+2, unsigned int):
-	case _IOW('v' , BASE_VIDIOCPRIVATE+3, char [16]): /* struct bttv_pll_info */
-	case _IOR('v' , BASE_VIDIOCPRIVATE+4, int):
-	case _IOR('v' , BASE_VIDIOCPRIVATE+5, int):
-	case _IOR('v' , BASE_VIDIOCPRIVATE+6, int):
-	case _IOR('v' , BASE_VIDIOCPRIVATE+7, int):
-		ret = native_ioctl(file, cmd, (unsigned long)compat_ptr(arg));
-		break;
-#endif
 	default:
 		printk(KERN_WARNING "compat_ioctl32: "
 			"unknown ioctl '%c', dir=%d, #%d (0x%08x)\n",
diff --git a/drivers/media/video/v4l2-ioctl.c b/drivers/media/video/v4l2-ioctl.c
index dd9283fcb564..7e47f15f350d 100644
--- a/drivers/media/video/v4l2-ioctl.c
+++ b/drivers/media/video/v4l2-ioctl.c
@@ -18,12 +18,8 @@
 #include <linux/kernel.h>
 
 #define __OLD_VIDIOC_ /* To allow fixing old calls */
-#include <linux/videodev.h>
 #include <linux/videodev2.h>
 
-#ifdef CONFIG_VIDEO_V4L1
-#include <linux/videodev.h>
-#endif
 #include <media/v4l2-common.h>
 #include <media/v4l2-ioctl.h>
 #include <media/v4l2-ctrls.h>
@@ -183,42 +179,6 @@ static const char *v4l2_memory_names[] = {
 
 /* ------------------------------------------------------------------ */
 /* debug help functions                                               */
-
-#ifdef CONFIG_VIDEO_V4L1_COMPAT
-static const char *v4l1_ioctls[] = {
-	[_IOC_NR(VIDIOCGCAP)]       = "VIDIOCGCAP",
-	[_IOC_NR(VIDIOCGCHAN)]      = "VIDIOCGCHAN",
-	[_IOC_NR(VIDIOCSCHAN)]      = "VIDIOCSCHAN",
-	[_IOC_NR(VIDIOCGTUNER)]     = "VIDIOCGTUNER",
-	[_IOC_NR(VIDIOCSTUNER)]     = "VIDIOCSTUNER",
-	[_IOC_NR(VIDIOCGPICT)]      = "VIDIOCGPICT",
-	[_IOC_NR(VIDIOCSPICT)]      = "VIDIOCSPICT",
-	[_IOC_NR(VIDIOCCAPTURE)]    = "VIDIOCCAPTURE",
-	[_IOC_NR(VIDIOCGWIN)]       = "VIDIOCGWIN",
-	[_IOC_NR(VIDIOCSWIN)]       = "VIDIOCSWIN",
-	[_IOC_NR(VIDIOCGFBUF)]      = "VIDIOCGFBUF",
-	[_IOC_NR(VIDIOCSFBUF)]      = "VIDIOCSFBUF",
-	[_IOC_NR(VIDIOCKEY)]        = "VIDIOCKEY",
-	[_IOC_NR(VIDIOCGFREQ)]      = "VIDIOCGFREQ",
-	[_IOC_NR(VIDIOCSFREQ)]      = "VIDIOCSFREQ",
-	[_IOC_NR(VIDIOCGAUDIO)]     = "VIDIOCGAUDIO",
-	[_IOC_NR(VIDIOCSAUDIO)]     = "VIDIOCSAUDIO",
-	[_IOC_NR(VIDIOCSYNC)]       = "VIDIOCSYNC",
-	[_IOC_NR(VIDIOCMCAPTURE)]   = "VIDIOCMCAPTURE",
-	[_IOC_NR(VIDIOCGMBUF)]      = "VIDIOCGMBUF",
-	[_IOC_NR(VIDIOCGUNIT)]      = "VIDIOCGUNIT",
-	[_IOC_NR(VIDIOCGCAPTURE)]   = "VIDIOCGCAPTURE",
-	[_IOC_NR(VIDIOCSCAPTURE)]   = "VIDIOCSCAPTURE",
-	[_IOC_NR(VIDIOCSPLAYMODE)]  = "VIDIOCSPLAYMODE",
-	[_IOC_NR(VIDIOCSWRITEMODE)] = "VIDIOCSWRITEMODE",
-	[_IOC_NR(VIDIOCGPLAYINFO)]  = "VIDIOCGPLAYINFO",
-	[_IOC_NR(VIDIOCSMICROCODE)] = "VIDIOCSMICROCODE",
-	[_IOC_NR(VIDIOCGVBIFMT)]    = "VIDIOCGVBIFMT",
-	[_IOC_NR(VIDIOCSVBIFMT)]    = "VIDIOCSVBIFMT"
-};
-#define V4L1_IOCTLS ARRAY_SIZE(v4l1_ioctls)
-#endif
-
 static const char *v4l2_ioctls[] = {
 	[_IOC_NR(VIDIOC_QUERYCAP)]         = "VIDIOC_QUERYCAP",
 	[_IOC_NR(VIDIOC_RESERVED)]         = "VIDIOC_RESERVED",
@@ -310,15 +270,6 @@ void v4l_printk_ioctl(unsigned int cmd)
 	case 'd':
 		type = "v4l2_int";
 		break;
-#ifdef CONFIG_VIDEO_V4L1_COMPAT
-	case 'v':
-		if (_IOC_NR(cmd) >= V4L1_IOCTLS) {
-			type = "v4l1";
-			break;
-		}
-		printk("%s", v4l1_ioctls[_IOC_NR(cmd)]);
-		return;
-#endif
 	case 'V':
 		if (_IOC_NR(cmd) >= V4L2_IOCTLS) {
 			type = "v4l2";
@@ -622,20 +573,6 @@ static long __video_do_ioctl(struct file *file,
 		return -EINVAL;
 	}
 
-#ifdef CONFIG_VIDEO_V4L1_COMPAT
-	/********************************************************
-	 All other V4L1 calls are handled by v4l1_compat module.
-	 Those calls will be translated into V4L2 calls, and
-	 __video_do_ioctl will be called again, with one or more
-	 V4L2 ioctls.
-	 ********************************************************/
-	if (_IOC_TYPE(cmd) == 'v' && cmd != VIDIOCGMBUF &&
-				_IOC_NR(cmd) < BASE_VIDIOCPRIVATE) {
-		return v4l_compat_translate_ioctl(file, cmd, arg,
-						__video_do_ioctl);
-	}
-#endif
-
 	if ((vfd->debug & V4L2_DEBUG_IOCTL) &&
 				!(vfd->debug & V4L2_DEBUG_IOCTL_ARG)) {
 		v4l_print_ioctl(vfd->name, cmd);
@@ -644,29 +581,6 @@ static long __video_do_ioctl(struct file *file,
 
 	switch (cmd) {
 
-#ifdef CONFIG_VIDEO_V4L1_COMPAT
-	/***********************************************************
-	 Handles calls to the obsoleted V4L1 API
-	 Due to the nature of VIDIOCGMBUF, each driver that supports
-	 V4L1 should implement its own handler for this ioctl.
-	 ***********************************************************/
-
-	/* --- streaming capture ------------------------------------- */
-	case VIDIOCGMBUF:
-	{
-		struct video_mbuf *p = arg;
-
-		if (!ops->vidiocgmbuf)
-			break;
-		ret = ops->vidiocgmbuf(file, fh, p);
-		if (!ret)
-			dbgarg(cmd, "size=%d, frames=%d, offsets=0x%08lx\n",
-						p->size, p->frames,
-						(unsigned long)p->offsets);
-		break;
-	}
-#endif
-
 	/* --- capabilities ------------------------------------------ */
 	case VIDIOC_QUERYCAP:
 	{
diff --git a/drivers/media/video/via-camera.c b/drivers/media/video/via-camera.c
index 9eda7cc03121..e25aca5759fb 100644
--- a/drivers/media/video/via-camera.c
+++ b/drivers/media/video/via-camera.c
@@ -1161,16 +1161,6 @@ out:
 	return ret;
 }
 
-#ifdef CONFIG_VIDEO_V4L1_COMPAT
-static int viacam_vidiocgmbuf(struct file *filp, void *priv,
-		struct video_mbuf *mbuf)
-{
-	struct via_camera *cam = priv;
-
-	return videobuf_cgmbuf(&cam->vb_queue, mbuf, 6);
-}
-#endif
-
 /* G/S_PARM */
 
 static int viacam_g_parm(struct file *filp, void *priv,
@@ -1251,9 +1241,6 @@ static const struct v4l2_ioctl_ops viacam_ioctl_ops = {
 	.vidioc_s_parm		= viacam_s_parm,
 	.vidioc_enum_framesizes = viacam_enum_framesizes,
 	.vidioc_enum_frameintervals = viacam_enum_frameintervals,
-#ifdef CONFIG_VIDEO_V4L1_COMPAT
-	.vidiocgmbuf		= viacam_vidiocgmbuf,
-#endif
 };
 
 /*----------------------------------------------------------------------------*/
diff --git a/drivers/media/video/videobuf-core.c b/drivers/media/video/videobuf-core.c
index 8979f91fa8e5..de4fa4eb8844 100644
--- a/drivers/media/video/videobuf-core.c
+++ b/drivers/media/video/videobuf-core.c
@@ -1202,33 +1202,3 @@ int videobuf_mmap_mapper(struct videobuf_queue *q, struct vm_area_struct *vma)
 	return rc;
 }
 EXPORT_SYMBOL_GPL(videobuf_mmap_mapper);
-
-#ifdef CONFIG_VIDEO_V4L1_COMPAT
-int videobuf_cgmbuf(struct videobuf_queue *q,
-		    struct video_mbuf *mbuf, int count)
-{
-	struct v4l2_requestbuffers req;
-	int rc, i;
-
-	MAGIC_CHECK(q->int_ops->magic, MAGIC_QTYPE_OPS);
-
-	memset(&req, 0, sizeof(req));
-	req.type   = q->type;
-	req.count  = count;
-	req.memory = V4L2_MEMORY_MMAP;
-	rc = videobuf_reqbufs(q, &req);
-	if (rc < 0)
-		return rc;
-
-	mbuf->frames = req.count;
-	mbuf->size   = 0;
-	for (i = 0; i < mbuf->frames; i++) {
-		mbuf->offsets[i]  = q->bufs[i]->boff;
-		mbuf->size       += PAGE_ALIGN(q->bufs[i]->bsize);
-	}
-
-	return 0;
-}
-EXPORT_SYMBOL_GPL(videobuf_cgmbuf);
-#endif
-
diff --git a/drivers/media/video/videobuf-dma-sg.c b/drivers/media/video/videobuf-dma-sg.c
index ab684e807034..9d520601c70f 100644
--- a/drivers/media/video/videobuf-dma-sg.c
+++ b/drivers/media/video/videobuf-dma-sg.c
@@ -570,29 +570,6 @@ static int __videobuf_mmap_mapper(struct videobuf_queue *q,
 	}
 
 	last = first;
-#ifdef CONFIG_VIDEO_V4L1_COMPAT
-	if (size != (vma->vm_end - vma->vm_start)) {
-		/* look for last buffer to map */
-		for (last = first + 1; last < VIDEO_MAX_FRAME; last++) {
-			if (NULL == q->bufs[last])
-				continue;
-			if (V4L2_MEMORY_MMAP != q->bufs[last]->memory)
-				continue;
-			if (q->bufs[last]->map) {
-				retval = -EBUSY;
-				goto done;
-			}
-			size += PAGE_ALIGN(q->bufs[last]->bsize);
-			if (size == (vma->vm_end - vma->vm_start))
-				break;
-		}
-		if (VIDEO_MAX_FRAME == last) {
-			dprintk(1, "mmap app bug: size invalid [size=0x%lx]\n",
-					(vma->vm_end - vma->vm_start));
-			goto done;
-		}
-	}
-#endif
 
 	/* create mapping + update buffer list */
 	retval = -ENOMEM;
diff --git a/drivers/media/video/vivi.c b/drivers/media/video/vivi.c
index 9797e5a69265..c49c39386bd0 100644
--- a/drivers/media/video/vivi.c
+++ b/drivers/media/video/vivi.c
@@ -870,15 +870,6 @@ static int vidioc_dqbuf(struct file *file, void *priv, struct v4l2_buffer *p)
 				file->f_flags & O_NONBLOCK);
 }
 
-#ifdef CONFIG_VIDEO_V4L1_COMPAT
-static int vidiocgmbuf(struct file *file, void *priv, struct video_mbuf *mbuf)
-{
-	struct vivi_dev *dev = video_drvdata(file);
-
-	return videobuf_cgmbuf(&dev->vb_vidq, mbuf, 8);
-}
-#endif
-
 static int vidioc_streamon(struct file *file, void *priv, enum v4l2_buf_type i)
 {
 	struct vivi_dev *dev = video_drvdata(file);
@@ -1105,9 +1096,6 @@ static const struct v4l2_ioctl_ops vivi_ioctl_ops = {
 	.vidioc_queryctrl     = vidioc_queryctrl,
 	.vidioc_g_ctrl        = vidioc_g_ctrl,
 	.vidioc_s_ctrl        = vidioc_s_ctrl,
-#ifdef CONFIG_VIDEO_V4L1_COMPAT
-	.vidiocgmbuf          = vidiocgmbuf,
-#endif
 };
 
 static struct video_device vivi_template = {
diff --git a/drivers/staging/cx25821/cx25821-video.c b/drivers/staging/cx25821/cx25821-video.c
index 3c121fcb8731..27406c31d876 100644
--- a/drivers/staging/cx25821/cx25821-video.c
+++ b/drivers/staging/cx25821/cx25821-video.c
@@ -1188,34 +1188,6 @@ int cx25821_vidioc_enum_fmt_vid_cap(struct file *file, void *priv,
 	return 0;
 }
 
-#ifdef CONFIG_VIDEO_V4L1_COMPAT
-int cx25821_vidiocgmbuf(struct file *file, void *priv, struct video_mbuf *mbuf)
-{
-	struct cx25821_fh *fh = priv;
-	struct videobuf_queue *q;
-	struct v4l2_requestbuffers req;
-	unsigned int i;
-	int err;
-
-	q = get_queue(fh);
-	memset(&req, 0, sizeof(req));
-	req.type = q->type;
-	req.count = 8;
-	req.memory = V4L2_MEMORY_MMAP;
-	err = videobuf_reqbufs(q, &req);
-	if (err < 0)
-		return err;
-
-	mbuf->frames = req.count;
-	mbuf->size = 0;
-	for (i = 0; i < mbuf->frames; i++) {
-		mbuf->offsets[i] = q->bufs[i]->boff;
-		mbuf->size += q->bufs[i]->bsize;
-	}
-	return 0;
-}
-#endif
-
 int cx25821_vidioc_reqbufs(struct file *file, void *priv, struct v4l2_requestbuffers *p)
 {
 	struct cx25821_fh *fh = priv;
@@ -2016,9 +1988,6 @@ static const struct v4l2_ioctl_ops video_ioctl_ops = {
        .vidioc_log_status = vidioc_log_status,
        .vidioc_g_priority = cx25821_vidioc_g_priority,
        .vidioc_s_priority = cx25821_vidioc_s_priority,
-#ifdef CONFIG_VIDEO_V4L1_COMPAT
-       .vidiocgmbuf = cx25821_vidiocgmbuf,
-#endif
 #ifdef TUNER_FLAG
        .vidioc_g_tuner = cx25821_vidioc_g_tuner,
        .vidioc_s_tuner = cx25821_vidioc_s_tuner,
diff --git a/drivers/staging/cx25821/cx25821-video.h b/drivers/staging/cx25821/cx25821-video.h
index 213f37cea348..f4ee8051b8b3 100644
--- a/drivers/staging/cx25821/cx25821-video.h
+++ b/drivers/staging/cx25821/cx25821-video.h
@@ -40,11 +40,6 @@
 #include <media/v4l2-common.h>
 #include <media/v4l2-ioctl.h>
 
-#ifdef CONFIG_VIDEO_V4L1_COMPAT
-/* Include V4L1 specific functions. Should be removed soon */
-#include <linux/videodev.h>
-#endif
-
 #define TUNER_FLAG
 
 #define VIDEO_DEBUG 0
@@ -134,7 +129,6 @@ extern int cx25821_vidioc_querycap(struct file *file, void *priv,
 			   struct v4l2_capability *cap);
 extern int cx25821_vidioc_enum_fmt_vid_cap(struct file *file, void *priv,
 				   struct v4l2_fmtdesc *f);
-extern int cx25821_vidiocgmbuf(struct file *file, void *priv, struct video_mbuf *mbuf);
 extern int cx25821_vidioc_reqbufs(struct file *file, void *priv,
 			  struct v4l2_requestbuffers *p);
 extern int cx25821_vidioc_querybuf(struct file *file, void *priv,
diff --git a/drivers/staging/dt3155v4l/dt3155v4l.c b/drivers/staging/dt3155v4l/dt3155v4l.c
index b996697e7eb2..15d7efeed292 100644
--- a/drivers/staging/dt3155v4l/dt3155v4l.c
+++ b/drivers/staging/dt3155v4l/dt3155v4l.c
@@ -876,9 +876,6 @@ static const struct v4l2_ioctl_ops dt3155_ioctl_ops = {
 	.vidioc_s_crop = dt3155_ioc_s_crop,
 	.vidioc_enum_framesizes = dt3155_ioc_enum_framesizes,
 	.vidioc_enum_frameintervals = dt3155_ioc_enum_frameintervals,
-#ifdef CONFIG_VIDEO_V4L1_COMPAT
-	.vidiocgmbuf = iocgmbuf,
-#endif
 */
 };
 
diff --git a/drivers/staging/tm6000/tm6000-video.c b/drivers/staging/tm6000/tm6000-video.c
index c5690b2a8924..0dae427a21ec 100644
--- a/drivers/staging/tm6000/tm6000-video.c
+++ b/drivers/staging/tm6000/tm6000-video.c
@@ -986,15 +986,6 @@ static int vidioc_dqbuf(struct file *file, void *priv, struct v4l2_buffer *p)
 				file->f_flags & O_NONBLOCK);
 }
 
-#ifdef CONFIG_VIDEO_V4L1_COMPAT
-static int vidiocgmbuf(struct file *file, void *priv, struct video_mbuf *mbuf)
-{
-	struct tm6000_fh  *fh = priv;
-
-	return videobuf_cgmbuf(&fh->vb_vidq, mbuf, 8);
-}
-#endif
-
 static int vidioc_streamon(struct file *file, void *priv, enum v4l2_buf_type i)
 {
 	struct tm6000_fh  *fh = priv;
@@ -1438,9 +1429,6 @@ static const struct v4l2_ioctl_ops video_ioctl_ops = {
 	.vidioc_querybuf          = vidioc_querybuf,
 	.vidioc_qbuf              = vidioc_qbuf,
 	.vidioc_dqbuf             = vidioc_dqbuf,
-#ifdef CONFIG_VIDEO_V4L1_COMPAT
-	.vidiocgmbuf              = vidiocgmbuf,
-#endif
 };
 
 static struct video_device tm6000_template = {
diff --git a/include/linux/videodev.h b/include/linux/videodev.h
index b19eab140977..8a7aead76a38 100644
--- a/include/linux/videodev.h
+++ b/include/linux/videodev.h
@@ -16,24 +16,7 @@
 #include <linux/ioctl.h>
 #include <linux/videodev2.h>
 
-#if defined(__MIN_V4L1) && defined (__KERNEL__)
-
-/*
- * Used by those V4L2 core functions that need a minimum V4L1 support,
- * in order to allow V4L1 Compatibilty code compilation.
- */
-
-struct video_mbuf
-{
-	int	size;		/* Total memory to map */
-	int	frames;		/* Frames */
-	int	offsets[VIDEO_MAX_FRAME];
-};
-
-#define VIDIOCGMBUF		_IOR('v',20, struct video_mbuf)		/* Memory map buffer info */
-
-#else
-#if defined(CONFIG_VIDEO_V4L1_COMPAT) || !defined (__KERNEL__)
+#if defined(CONFIG_VIDEO_V4L1) || defined(CONFIG_VIDEO_V4L1_MODULE) || !defined(__KERNEL__)
 
 #define VID_TYPE_CAPTURE	1	/* Can capture */
 #define VID_TYPE_TUNER		2	/* Can tune */
@@ -328,8 +311,7 @@ struct video_code
 #define VID_PLAY_RESET			13
 #define VID_PLAY_END_MARK		14
 
-#endif /* CONFIG_VIDEO_V4L1_COMPAT */
-#endif /* __MIN_V4L1 */
+#endif /* CONFIG_VIDEO_V4L1 */
 
 #endif /* __LINUX_VIDEODEV_H */
 
diff --git a/include/media/v4l2-ioctl.h b/include/media/v4l2-ioctl.h
index 06daa6e8e051..67df37542c68 100644
--- a/include/media/v4l2-ioctl.h
+++ b/include/media/v4l2-ioctl.h
@@ -14,12 +14,7 @@
 #include <linux/device.h>
 #include <linux/mutex.h>
 #include <linux/compiler.h> /* need __user */
-#ifdef CONFIG_VIDEO_V4L1_COMPAT
-#define __MIN_V4L1
-#include <linux/videodev.h>
-#else
 #include <linux/videodev2.h>
-#endif
 
 struct v4l2_fh;
 
@@ -113,10 +108,6 @@ struct v4l2_ioctl_ops {
 
 
 	int (*vidioc_overlay) (struct file *file, void *fh, unsigned int i);
-#ifdef CONFIG_VIDEO_V4L1_COMPAT
-			/* buffer type is struct vidio_mbuf * */
-	int (*vidiocgmbuf)  (struct file *file, void *fh, struct video_mbuf *p);
-#endif
 	int (*vidioc_g_fbuf)   (struct file *file, void *fh,
 				struct v4l2_framebuffer *a);
 	int (*vidioc_s_fbuf)   (struct file *file, void *fh,
@@ -300,22 +291,15 @@ extern void v4l_printk_ioctl(unsigned int cmd);
 extern const char *v4l2_field_names[];
 extern const char *v4l2_type_names[];
 
-/*  Compatibility layer interface  --  v4l1-compat module */
-typedef long (*v4l2_kioctl)(struct file *file,
-			   unsigned int cmd, void *arg);
-#ifdef CONFIG_VIDEO_V4L1_COMPAT
-long v4l_compat_translate_ioctl(struct file *file,
-			       int cmd, void *arg, v4l2_kioctl driver_ioctl);
-#else
-#define v4l_compat_translate_ioctl(file, cmd, arg, ioctl) (-EINVAL)
-#endif
-
 #ifdef CONFIG_COMPAT
 /* 32 Bits compatibility layer for 64 bits processors */
 extern long v4l2_compat_ioctl32(struct file *file, unsigned int cmd,
 				unsigned long arg);
 #endif
 
+typedef long (*v4l2_kioctl)(struct file *file,
+		unsigned int cmd, void *arg);
+
 /* Include support for obsoleted stuff */
 extern long video_usercopy(struct file *file, unsigned int cmd,
 				unsigned long arg, v4l2_kioctl func);
diff --git a/include/media/videobuf-core.h b/include/media/videobuf-core.h
index 1d3835fc26be..90ed895e217d 100644
--- a/include/media/videobuf-core.h
+++ b/include/media/videobuf-core.h
@@ -17,10 +17,6 @@
 #define _VIDEOBUF_CORE_H
 
 #include <linux/poll.h>
-#ifdef CONFIG_VIDEO_V4L1_COMPAT
-#define __MIN_V4L1
-#include <linux/videodev.h>
-#endif
 #include <linux/videodev2.h>
 
 #define UNSET (-1U)
@@ -212,10 +208,6 @@ int videobuf_qbuf(struct videobuf_queue *q,
 		  struct v4l2_buffer *b);
 int videobuf_dqbuf(struct videobuf_queue *q,
 		   struct v4l2_buffer *b, int nonblocking);
-#ifdef CONFIG_VIDEO_V4L1_COMPAT
-int videobuf_cgmbuf(struct videobuf_queue *q,
-		    struct video_mbuf *mbuf, int count);
-#endif
 int videobuf_streamon(struct videobuf_queue *q);
 int videobuf_streamoff(struct videobuf_queue *q);
 
-- 
cgit v1.2.3-71-gd317


From 58c66df3e38ffb1d59cc5162bb9e07c859288034 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab@redhat.com>
Date: Mon, 27 Dec 2010 07:38:48 -0300
Subject: [media] Remove VIDEO_V4L1 Kconfig option

There's no sense on keeping VIDEO_V4L1 Kconfig option just because of
two deprecated drivers moved to staging scheduled to die on 2.6.39.

Reviewed-by: Hans Verkuil <hverkuil@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 drivers/media/Kconfig            | 14 --------------
 drivers/media/video/Kconfig      |  5 -----
 drivers/staging/se401/Kconfig    |  2 +-
 drivers/staging/usbvideo/Kconfig |  2 +-
 include/linux/videodev.h         |  4 ----
 5 files changed, 2 insertions(+), 25 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/media/Kconfig b/drivers/media/Kconfig
index 9ea1a6d70f0a..147c92b9b4f6 100644
--- a/drivers/media/Kconfig
+++ b/drivers/media/Kconfig
@@ -40,20 +40,6 @@ config VIDEO_V4L2_COMMON
 	depends on (I2C || I2C=n) && VIDEO_DEV
 	default (I2C || I2C=n) && VIDEO_DEV
 
-config VIDEO_ALLOW_V4L1
-	bool "Enable Video For Linux API 1 (DEPRECATED)"
-	depends on VIDEO_DEV && VIDEO_V4L2_COMMON
-	default VIDEO_DEV && VIDEO_V4L2_COMMON
-	---help---
-	  Enables drivers based on the legacy V4L1 API.
-
-	  This api were developed to be used at Kernel 2.2 and 2.4, but
-	  lacks support for several video standards. There are several
-	  drivers at kernel that still depends on it.
-
-	  If you are unsure as to whether this is required, answer Y.
-
-
 #
 # DVB Core
 #
diff --git a/drivers/media/video/Kconfig b/drivers/media/video/Kconfig
index b3bf04368bf2..78b8c5de1f6b 100644
--- a/drivers/media/video/Kconfig
+++ b/drivers/media/video/Kconfig
@@ -7,11 +7,6 @@ config VIDEO_V4L2
 	depends on VIDEO_DEV && VIDEO_V4L2_COMMON
 	default VIDEO_DEV && VIDEO_V4L2_COMMON
 
-config VIDEO_V4L1
-	tristate
-	depends on VIDEO_DEV && VIDEO_V4L2_COMMON && VIDEO_ALLOW_V4L1
-	default VIDEO_DEV && VIDEO_V4L2_COMMON && VIDEO_ALLOW_V4L1
-
 config VIDEOBUF_GEN
 	tristate
 
diff --git a/drivers/staging/se401/Kconfig b/drivers/staging/se401/Kconfig
index 586fc0432a4b..ee7313e66c79 100644
--- a/drivers/staging/se401/Kconfig
+++ b/drivers/staging/se401/Kconfig
@@ -1,6 +1,6 @@
 config USB_SE401
 	tristate "USB SE401 Camera support (DEPRECATED)"
-	depends on VIDEO_V4L1
+	depends on VIDEO_DEV && VIDEO_V4L2_COMMON
 	---help---
 	  Say Y here if you want to connect this type of camera to your
 	  computer's USB port. See <file:Documentation/video4linux/se401.txt>
diff --git a/drivers/staging/usbvideo/Kconfig b/drivers/staging/usbvideo/Kconfig
index aae863e8a902..d9fc867d09eb 100644
--- a/drivers/staging/usbvideo/Kconfig
+++ b/drivers/staging/usbvideo/Kconfig
@@ -3,7 +3,7 @@ config VIDEO_USBVIDEO
 
 config USB_VICAM
 	tristate "USB 3com HomeConnect (aka vicam) support (DEPRECATED)"
-	depends on VIDEO_V4L1 && EXPERIMENTAL
+	depends on VIDEO_DEV && VIDEO_V4L2_COMMON
 	select VIDEO_USBVIDEO
 	---help---
 	  Say Y here if you have 3com homeconnect camera (vicam).
diff --git a/include/linux/videodev.h b/include/linux/videodev.h
index 8a7aead76a38..f11efbef1c05 100644
--- a/include/linux/videodev.h
+++ b/include/linux/videodev.h
@@ -16,8 +16,6 @@
 #include <linux/ioctl.h>
 #include <linux/videodev2.h>
 
-#if defined(CONFIG_VIDEO_V4L1) || defined(CONFIG_VIDEO_V4L1_MODULE) || !defined(__KERNEL__)
-
 #define VID_TYPE_CAPTURE	1	/* Can capture */
 #define VID_TYPE_TUNER		2	/* Can tune */
 #define VID_TYPE_TELETEXT	4	/* Does teletext */
@@ -311,8 +309,6 @@ struct video_code
 #define VID_PLAY_RESET			13
 #define VID_PLAY_END_MARK		14
 
-#endif /* CONFIG_VIDEO_V4L1 */
-
 #endif /* __LINUX_VIDEODEV_H */
 
 /*
-- 
cgit v1.2.3-71-gd317


From 88ae7624a6fe890e5a8ca57b25420f66e1389f8b Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab@redhat.com>
Date: Mon, 27 Dec 2010 07:47:54 -0300
Subject: [media] V4L1 removal: Remove linux/videodev.h

There's no sense on keeping it on 2.6.38, as nobody is using it
anymore, at the kernel tree, and installing it at the userspace
API.

As two deprecated drivers still need it, move it to their internal
directories.

Reviewed-by: Hans Verkuil <hverkuil@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 Documentation/feature-removal-schedule.txt |  17 --
 drivers/media/video/v4l2-compat-ioctl32.c  |   1 -
 drivers/staging/se401/se401.h              |   2 +-
 drivers/staging/se401/videodev.h           | 318 +++++++++++++++++++++++++++++
 drivers/staging/usbvideo/usbvideo.h        |   2 +-
 drivers/staging/usbvideo/vicam.c           |   2 +-
 drivers/staging/usbvideo/videodev.h        | 318 +++++++++++++++++++++++++++++
 fs/compat_ioctl.c                          |   2 +-
 include/linux/Kbuild                       |   1 -
 include/linux/videodev.h                   | 318 -----------------------------
 include/media/ovcamchip.h                  |  90 --------
 11 files changed, 640 insertions(+), 431 deletions(-)
 create mode 100644 drivers/staging/se401/videodev.h
 create mode 100644 drivers/staging/usbvideo/videodev.h
 delete mode 100644 include/linux/videodev.h
 delete mode 100644 include/media/ovcamchip.h

(limited to 'include/linux')

diff --git a/Documentation/feature-removal-schedule.txt b/Documentation/feature-removal-schedule.txt
index d66ed2bdf8f1..e348b7e241f1 100644
--- a/Documentation/feature-removal-schedule.txt
+++ b/Documentation/feature-removal-schedule.txt
@@ -97,23 +97,6 @@ Who:	Pavel Machek <pavel@ucw.cz>
 
 ---------------------------
 
-What:	Video4Linux API 1 ioctls and from Video devices.
-When:	kernel 2.6.39
-Files:	include/linux/videodev.h
-Check:	include/linux/videodev.h
-Why:	V4L1 AP1 was replaced by V4L2 API during migration from 2.4 to 2.6
-	series. The old API have lots of drawbacks and don't provide enough
-	means to work with all video and audio standards. The newer API is
-	already available on the main drivers and should be used instead.
-
-	The userspace libv4l1 library can convert V4L1 calls to V4L2. This
-	replaces the kernel V4L1 compatibility module which was removed in
-	2.6.38. The last V4L1 drivers will either be converted to V4L2 or
-	removed for 2.6.39 at which point the V4L1 API will cease to exist.
-Who:	Mauro Carvalho Chehab <mchehab@infradead.org>
-
----------------------------
-
 What:	Video4Linux obsolete drivers using V4L1 API
 When:	kernel 2.6.39
 Files:	drivers/staging/se401/* drivers/staging/usbvideo/*
diff --git a/drivers/media/video/v4l2-compat-ioctl32.c b/drivers/media/video/v4l2-compat-ioctl32.c
index 425ac1e4a00e..65e6f133dd35 100644
--- a/drivers/media/video/v4l2-compat-ioctl32.c
+++ b/drivers/media/video/v4l2-compat-ioctl32.c
@@ -15,7 +15,6 @@
 
 #include <linux/compat.h>
 #define __OLD_VIDIOC_ /* To allow fixing old calls*/
-#include <linux/videodev.h>
 #include <linux/videodev2.h>
 #include <linux/module.h>
 #include <media/v4l2-ioctl.h>
diff --git a/drivers/staging/se401/se401.h b/drivers/staging/se401/se401.h
index bf7d2e9765b0..2758f4716c3d 100644
--- a/drivers/staging/se401/se401.h
+++ b/drivers/staging/se401/se401.h
@@ -3,7 +3,7 @@
 #define __LINUX_se401_H
 
 #include <linux/uaccess.h>
-#include <linux/videodev.h>
+#include "videodev.h"
 #include <media/v4l2-common.h>
 #include <media/v4l2-ioctl.h>
 #include <linux/mutex.h>
diff --git a/drivers/staging/se401/videodev.h b/drivers/staging/se401/videodev.h
new file mode 100644
index 000000000000..f11efbef1c05
--- /dev/null
+++ b/drivers/staging/se401/videodev.h
@@ -0,0 +1,318 @@
+/*
+ *	Video for Linux version 1 - OBSOLETE
+ *
+ *	Header file for v4l1 drivers and applications, for
+ *	Linux kernels 2.2.x or 2.4.x.
+ *
+ *	Provides header for legacy drivers and applications
+ *
+ *	See http://linuxtv.org for more info
+ *
+ */
+#ifndef __LINUX_VIDEODEV_H
+#define __LINUX_VIDEODEV_H
+
+#include <linux/types.h>
+#include <linux/ioctl.h>
+#include <linux/videodev2.h>
+
+#define VID_TYPE_CAPTURE	1	/* Can capture */
+#define VID_TYPE_TUNER		2	/* Can tune */
+#define VID_TYPE_TELETEXT	4	/* Does teletext */
+#define VID_TYPE_OVERLAY	8	/* Overlay onto frame buffer */
+#define VID_TYPE_CHROMAKEY	16	/* Overlay by chromakey */
+#define VID_TYPE_CLIPPING	32	/* Can clip */
+#define VID_TYPE_FRAMERAM	64	/* Uses the frame buffer memory */
+#define VID_TYPE_SCALES		128	/* Scalable */
+#define VID_TYPE_MONOCHROME	256	/* Monochrome only */
+#define VID_TYPE_SUBCAPTURE	512	/* Can capture subareas of the image */
+#define VID_TYPE_MPEG_DECODER	1024	/* Can decode MPEG streams */
+#define VID_TYPE_MPEG_ENCODER	2048	/* Can encode MPEG streams */
+#define VID_TYPE_MJPEG_DECODER	4096	/* Can decode MJPEG streams */
+#define VID_TYPE_MJPEG_ENCODER	8192	/* Can encode MJPEG streams */
+
+struct video_capability
+{
+	char name[32];
+	int type;
+	int channels;	/* Num channels */
+	int audios;	/* Num audio devices */
+	int maxwidth;	/* Supported width */
+	int maxheight;	/* And height */
+	int minwidth;	/* Supported width */
+	int minheight;	/* And height */
+};
+
+
+struct video_channel
+{
+	int channel;
+	char name[32];
+	int tuners;
+	__u32  flags;
+#define VIDEO_VC_TUNER		1	/* Channel has a tuner */
+#define VIDEO_VC_AUDIO		2	/* Channel has audio */
+	__u16  type;
+#define VIDEO_TYPE_TV		1
+#define VIDEO_TYPE_CAMERA	2
+	__u16 norm;			/* Norm set by channel */
+};
+
+struct video_tuner
+{
+	int tuner;
+	char name[32];
+	unsigned long rangelow, rangehigh;	/* Tuner range */
+	__u32 flags;
+#define VIDEO_TUNER_PAL		1
+#define VIDEO_TUNER_NTSC	2
+#define VIDEO_TUNER_SECAM	4
+#define VIDEO_TUNER_LOW		8	/* Uses KHz not MHz */
+#define VIDEO_TUNER_NORM	16	/* Tuner can set norm */
+#define VIDEO_TUNER_STEREO_ON	128	/* Tuner is seeing stereo */
+#define VIDEO_TUNER_RDS_ON      256     /* Tuner is seeing an RDS datastream */
+#define VIDEO_TUNER_MBS_ON      512     /* Tuner is seeing an MBS datastream */
+	__u16 mode;			/* PAL/NTSC/SECAM/OTHER */
+#define VIDEO_MODE_PAL		0
+#define VIDEO_MODE_NTSC		1
+#define VIDEO_MODE_SECAM	2
+#define VIDEO_MODE_AUTO		3
+	__u16 signal;			/* Signal strength 16bit scale */
+};
+
+struct video_picture
+{
+	__u16	brightness;
+	__u16	hue;
+	__u16	colour;
+	__u16	contrast;
+	__u16	whiteness;	/* Black and white only */
+	__u16	depth;		/* Capture depth */
+	__u16   palette;	/* Palette in use */
+#define VIDEO_PALETTE_GREY	1	/* Linear greyscale */
+#define VIDEO_PALETTE_HI240	2	/* High 240 cube (BT848) */
+#define VIDEO_PALETTE_RGB565	3	/* 565 16 bit RGB */
+#define VIDEO_PALETTE_RGB24	4	/* 24bit RGB */
+#define VIDEO_PALETTE_RGB32	5	/* 32bit RGB */
+#define VIDEO_PALETTE_RGB555	6	/* 555 15bit RGB */
+#define VIDEO_PALETTE_YUV422	7	/* YUV422 capture */
+#define VIDEO_PALETTE_YUYV	8
+#define VIDEO_PALETTE_UYVY	9	/* The great thing about standards is ... */
+#define VIDEO_PALETTE_YUV420	10
+#define VIDEO_PALETTE_YUV411	11	/* YUV411 capture */
+#define VIDEO_PALETTE_RAW	12	/* RAW capture (BT848) */
+#define VIDEO_PALETTE_YUV422P	13	/* YUV 4:2:2 Planar */
+#define VIDEO_PALETTE_YUV411P	14	/* YUV 4:1:1 Planar */
+#define VIDEO_PALETTE_YUV420P	15	/* YUV 4:2:0 Planar */
+#define VIDEO_PALETTE_YUV410P	16	/* YUV 4:1:0 Planar */
+#define VIDEO_PALETTE_PLANAR	13	/* start of planar entries */
+#define VIDEO_PALETTE_COMPONENT 7	/* start of component entries */
+};
+
+struct video_audio
+{
+	int	audio;		/* Audio channel */
+	__u16	volume;		/* If settable */
+	__u16	bass, treble;
+	__u32	flags;
+#define VIDEO_AUDIO_MUTE	1
+#define VIDEO_AUDIO_MUTABLE	2
+#define VIDEO_AUDIO_VOLUME	4
+#define VIDEO_AUDIO_BASS	8
+#define VIDEO_AUDIO_TREBLE	16
+#define VIDEO_AUDIO_BALANCE	32
+	char    name[16];
+#define VIDEO_SOUND_MONO	1
+#define VIDEO_SOUND_STEREO	2
+#define VIDEO_SOUND_LANG1	4
+#define VIDEO_SOUND_LANG2	8
+	__u16   mode;
+	__u16	balance;	/* Stereo balance */
+	__u16	step;		/* Step actual volume uses */
+};
+
+struct video_clip
+{
+	__s32	x,y;
+	__s32	width, height;
+	struct	video_clip *next;	/* For user use/driver use only */
+};
+
+struct video_window
+{
+	__u32	x,y;			/* Position of window */
+	__u32	width,height;		/* Its size */
+	__u32	chromakey;
+	__u32	flags;
+	struct	video_clip __user *clips;	/* Set only */
+	int	clipcount;
+#define VIDEO_WINDOW_INTERLACE	1
+#define VIDEO_WINDOW_CHROMAKEY	16	/* Overlay by chromakey */
+#define VIDEO_CLIP_BITMAP	-1
+/* bitmap is 1024x625, a '1' bit represents a clipped pixel */
+#define VIDEO_CLIPMAP_SIZE	(128 * 625)
+};
+
+struct video_capture
+{
+	__u32 	x,y;			/* Offsets into image */
+	__u32	width, height;		/* Area to capture */
+	__u16	decimation;		/* Decimation divider */
+	__u16	flags;			/* Flags for capture */
+#define VIDEO_CAPTURE_ODD		0	/* Temporal */
+#define VIDEO_CAPTURE_EVEN		1
+};
+
+struct video_buffer
+{
+	void	*base;
+	int	height,width;
+	int	depth;
+	int	bytesperline;
+};
+
+struct video_mmap
+{
+	unsigned	int frame;		/* Frame (0 - n) for double buffer */
+	int		height,width;
+	unsigned	int format;		/* should be VIDEO_PALETTE_* */
+};
+
+struct video_key
+{
+	__u8	key[8];
+	__u32	flags;
+};
+
+struct video_mbuf
+{
+	int	size;		/* Total memory to map */
+	int	frames;		/* Frames */
+	int	offsets[VIDEO_MAX_FRAME];
+};
+
+#define 	VIDEO_NO_UNIT	(-1)
+
+struct video_unit
+{
+	int 	video;		/* Video minor */
+	int	vbi;		/* VBI minor */
+	int	radio;		/* Radio minor */
+	int	audio;		/* Audio minor */
+	int	teletext;	/* Teletext minor */
+};
+
+struct vbi_format {
+	__u32	sampling_rate;	/* in Hz */
+	__u32	samples_per_line;
+	__u32	sample_format;	/* VIDEO_PALETTE_RAW only (1 byte) */
+	__s32	start[2];	/* starting line for each frame */
+	__u32	count[2];	/* count of lines for each frame */
+	__u32	flags;
+#define	VBI_UNSYNC	1	/* can distingues between top/bottom field */
+#define	VBI_INTERLACED	2	/* lines are interlaced */
+};
+
+/* video_info is biased towards hardware mpeg encode/decode */
+/* but it could apply generically to any hardware compressor/decompressor */
+struct video_info
+{
+	__u32	frame_count;	/* frames output since decode/encode began */
+	__u32	h_size;		/* current unscaled horizontal size */
+	__u32	v_size;		/* current unscaled veritcal size */
+	__u32	smpte_timecode;	/* current SMPTE timecode (for current GOP) */
+	__u32	picture_type;	/* current picture type */
+	__u32	temporal_reference;	/* current temporal reference */
+	__u8	user_data[256];	/* user data last found in compressed stream */
+	/* user_data[0] contains user data flags, user_data[1] has count */
+};
+
+/* generic structure for setting playback modes */
+struct video_play_mode
+{
+	int	mode;
+	int	p1;
+	int	p2;
+};
+
+/* for loading microcode / fpga programming */
+struct video_code
+{
+	char	loadwhat[16];	/* name or tag of file being passed */
+	int	datasize;
+	__u8	*data;
+};
+
+#define VIDIOCGCAP		_IOR('v',1,struct video_capability)	/* Get capabilities */
+#define VIDIOCGCHAN		_IOWR('v',2,struct video_channel)	/* Get channel info (sources) */
+#define VIDIOCSCHAN		_IOW('v',3,struct video_channel)	/* Set channel 	*/
+#define VIDIOCGTUNER		_IOWR('v',4,struct video_tuner)		/* Get tuner abilities */
+#define VIDIOCSTUNER		_IOW('v',5,struct video_tuner)		/* Tune the tuner for the current channel */
+#define VIDIOCGPICT		_IOR('v',6,struct video_picture)	/* Get picture properties */
+#define VIDIOCSPICT		_IOW('v',7,struct video_picture)	/* Set picture properties */
+#define VIDIOCCAPTURE		_IOW('v',8,int)				/* Start, end capture */
+#define VIDIOCGWIN		_IOR('v',9, struct video_window)	/* Get the video overlay window */
+#define VIDIOCSWIN		_IOW('v',10, struct video_window)	/* Set the video overlay window - passes clip list for hardware smarts , chromakey etc */
+#define VIDIOCGFBUF		_IOR('v',11, struct video_buffer)	/* Get frame buffer */
+#define VIDIOCSFBUF		_IOW('v',12, struct video_buffer)	/* Set frame buffer - root only */
+#define VIDIOCKEY		_IOR('v',13, struct video_key)		/* Video key event - to dev 255 is to all - cuts capture on all DMA windows with this key (0xFFFFFFFF == all) */
+#define VIDIOCGFREQ		_IOR('v',14, unsigned long)		/* Set tuner */
+#define VIDIOCSFREQ		_IOW('v',15, unsigned long)		/* Set tuner */
+#define VIDIOCGAUDIO		_IOR('v',16, struct video_audio)	/* Get audio info */
+#define VIDIOCSAUDIO		_IOW('v',17, struct video_audio)	/* Audio source, mute etc */
+#define VIDIOCSYNC		_IOW('v',18, int)			/* Sync with mmap grabbing */
+#define VIDIOCMCAPTURE		_IOW('v',19, struct video_mmap)		/* Grab frames */
+#define VIDIOCGMBUF		_IOR('v',20, struct video_mbuf)		/* Memory map buffer info */
+#define VIDIOCGUNIT		_IOR('v',21, struct video_unit)		/* Get attached units */
+#define VIDIOCGCAPTURE		_IOR('v',22, struct video_capture)	/* Get subcapture */
+#define VIDIOCSCAPTURE		_IOW('v',23, struct video_capture)	/* Set subcapture */
+#define VIDIOCSPLAYMODE		_IOW('v',24, struct video_play_mode)	/* Set output video mode/feature */
+#define VIDIOCSWRITEMODE	_IOW('v',25, int)			/* Set write mode */
+#define VIDIOCGPLAYINFO		_IOR('v',26, struct video_info)		/* Get current playback info from hardware */
+#define VIDIOCSMICROCODE	_IOW('v',27, struct video_code)		/* Load microcode into hardware */
+#define	VIDIOCGVBIFMT		_IOR('v',28, struct vbi_format)		/* Get VBI information */
+#define	VIDIOCSVBIFMT		_IOW('v',29, struct vbi_format)		/* Set VBI information */
+
+
+#define BASE_VIDIOCPRIVATE	192		/* 192-255 are private */
+
+/* VIDIOCSWRITEMODE */
+#define VID_WRITE_MPEG_AUD		0
+#define VID_WRITE_MPEG_VID		1
+#define VID_WRITE_OSD			2
+#define VID_WRITE_TTX			3
+#define VID_WRITE_CC			4
+#define VID_WRITE_MJPEG			5
+
+/* VIDIOCSPLAYMODE */
+#define VID_PLAY_VID_OUT_MODE		0
+	/* p1: = VIDEO_MODE_PAL, VIDEO_MODE_NTSC, etc ... */
+#define VID_PLAY_GENLOCK		1
+	/* p1: 0 = OFF, 1 = ON */
+	/* p2: GENLOCK FINE DELAY value */
+#define VID_PLAY_NORMAL			2
+#define VID_PLAY_PAUSE			3
+#define VID_PLAY_SINGLE_FRAME		4
+#define VID_PLAY_FAST_FORWARD		5
+#define VID_PLAY_SLOW_MOTION		6
+#define VID_PLAY_IMMEDIATE_NORMAL	7
+#define VID_PLAY_SWITCH_CHANNELS	8
+#define VID_PLAY_FREEZE_FRAME		9
+#define VID_PLAY_STILL_MODE		10
+#define VID_PLAY_MASTER_MODE		11
+	/* p1: see below */
+#define		VID_PLAY_MASTER_NONE	1
+#define		VID_PLAY_MASTER_VIDEO	2
+#define		VID_PLAY_MASTER_AUDIO	3
+#define VID_PLAY_ACTIVE_SCANLINES	12
+	/* p1 = first active; p2 = last active */
+#define VID_PLAY_RESET			13
+#define VID_PLAY_END_MARK		14
+
+#endif /* __LINUX_VIDEODEV_H */
+
+/*
+ * Local variables:
+ * c-basic-offset: 8
+ * End:
+ */
diff --git a/drivers/staging/usbvideo/usbvideo.h b/drivers/staging/usbvideo/usbvideo.h
index c66985beb8c9..95638a072b19 100644
--- a/drivers/staging/usbvideo/usbvideo.h
+++ b/drivers/staging/usbvideo/usbvideo.h
@@ -16,7 +16,7 @@
 #ifndef usbvideo_h
 #define	usbvideo_h
 
-#include <linux/videodev.h>
+#include "videodev.h"
 #include <media/v4l2-common.h>
 #include <media/v4l2-ioctl.h>
 #include <linux/usb.h>
diff --git a/drivers/staging/usbvideo/vicam.c b/drivers/staging/usbvideo/vicam.c
index dc17cce2fbb6..ecdb121297c9 100644
--- a/drivers/staging/usbvideo/vicam.c
+++ b/drivers/staging/usbvideo/vicam.c
@@ -38,7 +38,7 @@
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/init.h>
-#include <linux/videodev.h>
+#include "videodev.h"
 #include <linux/usb.h>
 #include <linux/vmalloc.h>
 #include <linux/mm.h>
diff --git a/drivers/staging/usbvideo/videodev.h b/drivers/staging/usbvideo/videodev.h
new file mode 100644
index 000000000000..f11efbef1c05
--- /dev/null
+++ b/drivers/staging/usbvideo/videodev.h
@@ -0,0 +1,318 @@
+/*
+ *	Video for Linux version 1 - OBSOLETE
+ *
+ *	Header file for v4l1 drivers and applications, for
+ *	Linux kernels 2.2.x or 2.4.x.
+ *
+ *	Provides header for legacy drivers and applications
+ *
+ *	See http://linuxtv.org for more info
+ *
+ */
+#ifndef __LINUX_VIDEODEV_H
+#define __LINUX_VIDEODEV_H
+
+#include <linux/types.h>
+#include <linux/ioctl.h>
+#include <linux/videodev2.h>
+
+#define VID_TYPE_CAPTURE	1	/* Can capture */
+#define VID_TYPE_TUNER		2	/* Can tune */
+#define VID_TYPE_TELETEXT	4	/* Does teletext */
+#define VID_TYPE_OVERLAY	8	/* Overlay onto frame buffer */
+#define VID_TYPE_CHROMAKEY	16	/* Overlay by chromakey */
+#define VID_TYPE_CLIPPING	32	/* Can clip */
+#define VID_TYPE_FRAMERAM	64	/* Uses the frame buffer memory */
+#define VID_TYPE_SCALES		128	/* Scalable */
+#define VID_TYPE_MONOCHROME	256	/* Monochrome only */
+#define VID_TYPE_SUBCAPTURE	512	/* Can capture subareas of the image */
+#define VID_TYPE_MPEG_DECODER	1024	/* Can decode MPEG streams */
+#define VID_TYPE_MPEG_ENCODER	2048	/* Can encode MPEG streams */
+#define VID_TYPE_MJPEG_DECODER	4096	/* Can decode MJPEG streams */
+#define VID_TYPE_MJPEG_ENCODER	8192	/* Can encode MJPEG streams */
+
+struct video_capability
+{
+	char name[32];
+	int type;
+	int channels;	/* Num channels */
+	int audios;	/* Num audio devices */
+	int maxwidth;	/* Supported width */
+	int maxheight;	/* And height */
+	int minwidth;	/* Supported width */
+	int minheight;	/* And height */
+};
+
+
+struct video_channel
+{
+	int channel;
+	char name[32];
+	int tuners;
+	__u32  flags;
+#define VIDEO_VC_TUNER		1	/* Channel has a tuner */
+#define VIDEO_VC_AUDIO		2	/* Channel has audio */
+	__u16  type;
+#define VIDEO_TYPE_TV		1
+#define VIDEO_TYPE_CAMERA	2
+	__u16 norm;			/* Norm set by channel */
+};
+
+struct video_tuner
+{
+	int tuner;
+	char name[32];
+	unsigned long rangelow, rangehigh;	/* Tuner range */
+	__u32 flags;
+#define VIDEO_TUNER_PAL		1
+#define VIDEO_TUNER_NTSC	2
+#define VIDEO_TUNER_SECAM	4
+#define VIDEO_TUNER_LOW		8	/* Uses KHz not MHz */
+#define VIDEO_TUNER_NORM	16	/* Tuner can set norm */
+#define VIDEO_TUNER_STEREO_ON	128	/* Tuner is seeing stereo */
+#define VIDEO_TUNER_RDS_ON      256     /* Tuner is seeing an RDS datastream */
+#define VIDEO_TUNER_MBS_ON      512     /* Tuner is seeing an MBS datastream */
+	__u16 mode;			/* PAL/NTSC/SECAM/OTHER */
+#define VIDEO_MODE_PAL		0
+#define VIDEO_MODE_NTSC		1
+#define VIDEO_MODE_SECAM	2
+#define VIDEO_MODE_AUTO		3
+	__u16 signal;			/* Signal strength 16bit scale */
+};
+
+struct video_picture
+{
+	__u16	brightness;
+	__u16	hue;
+	__u16	colour;
+	__u16	contrast;
+	__u16	whiteness;	/* Black and white only */
+	__u16	depth;		/* Capture depth */
+	__u16   palette;	/* Palette in use */
+#define VIDEO_PALETTE_GREY	1	/* Linear greyscale */
+#define VIDEO_PALETTE_HI240	2	/* High 240 cube (BT848) */
+#define VIDEO_PALETTE_RGB565	3	/* 565 16 bit RGB */
+#define VIDEO_PALETTE_RGB24	4	/* 24bit RGB */
+#define VIDEO_PALETTE_RGB32	5	/* 32bit RGB */
+#define VIDEO_PALETTE_RGB555	6	/* 555 15bit RGB */
+#define VIDEO_PALETTE_YUV422	7	/* YUV422 capture */
+#define VIDEO_PALETTE_YUYV	8
+#define VIDEO_PALETTE_UYVY	9	/* The great thing about standards is ... */
+#define VIDEO_PALETTE_YUV420	10
+#define VIDEO_PALETTE_YUV411	11	/* YUV411 capture */
+#define VIDEO_PALETTE_RAW	12	/* RAW capture (BT848) */
+#define VIDEO_PALETTE_YUV422P	13	/* YUV 4:2:2 Planar */
+#define VIDEO_PALETTE_YUV411P	14	/* YUV 4:1:1 Planar */
+#define VIDEO_PALETTE_YUV420P	15	/* YUV 4:2:0 Planar */
+#define VIDEO_PALETTE_YUV410P	16	/* YUV 4:1:0 Planar */
+#define VIDEO_PALETTE_PLANAR	13	/* start of planar entries */
+#define VIDEO_PALETTE_COMPONENT 7	/* start of component entries */
+};
+
+struct video_audio
+{
+	int	audio;		/* Audio channel */
+	__u16	volume;		/* If settable */
+	__u16	bass, treble;
+	__u32	flags;
+#define VIDEO_AUDIO_MUTE	1
+#define VIDEO_AUDIO_MUTABLE	2
+#define VIDEO_AUDIO_VOLUME	4
+#define VIDEO_AUDIO_BASS	8
+#define VIDEO_AUDIO_TREBLE	16
+#define VIDEO_AUDIO_BALANCE	32
+	char    name[16];
+#define VIDEO_SOUND_MONO	1
+#define VIDEO_SOUND_STEREO	2
+#define VIDEO_SOUND_LANG1	4
+#define VIDEO_SOUND_LANG2	8
+	__u16   mode;
+	__u16	balance;	/* Stereo balance */
+	__u16	step;		/* Step actual volume uses */
+};
+
+struct video_clip
+{
+	__s32	x,y;
+	__s32	width, height;
+	struct	video_clip *next;	/* For user use/driver use only */
+};
+
+struct video_window
+{
+	__u32	x,y;			/* Position of window */
+	__u32	width,height;		/* Its size */
+	__u32	chromakey;
+	__u32	flags;
+	struct	video_clip __user *clips;	/* Set only */
+	int	clipcount;
+#define VIDEO_WINDOW_INTERLACE	1
+#define VIDEO_WINDOW_CHROMAKEY	16	/* Overlay by chromakey */
+#define VIDEO_CLIP_BITMAP	-1
+/* bitmap is 1024x625, a '1' bit represents a clipped pixel */
+#define VIDEO_CLIPMAP_SIZE	(128 * 625)
+};
+
+struct video_capture
+{
+	__u32 	x,y;			/* Offsets into image */
+	__u32	width, height;		/* Area to capture */
+	__u16	decimation;		/* Decimation divider */
+	__u16	flags;			/* Flags for capture */
+#define VIDEO_CAPTURE_ODD		0	/* Temporal */
+#define VIDEO_CAPTURE_EVEN		1
+};
+
+struct video_buffer
+{
+	void	*base;
+	int	height,width;
+	int	depth;
+	int	bytesperline;
+};
+
+struct video_mmap
+{
+	unsigned	int frame;		/* Frame (0 - n) for double buffer */
+	int		height,width;
+	unsigned	int format;		/* should be VIDEO_PALETTE_* */
+};
+
+struct video_key
+{
+	__u8	key[8];
+	__u32	flags;
+};
+
+struct video_mbuf
+{
+	int	size;		/* Total memory to map */
+	int	frames;		/* Frames */
+	int	offsets[VIDEO_MAX_FRAME];
+};
+
+#define 	VIDEO_NO_UNIT	(-1)
+
+struct video_unit
+{
+	int 	video;		/* Video minor */
+	int	vbi;		/* VBI minor */
+	int	radio;		/* Radio minor */
+	int	audio;		/* Audio minor */
+	int	teletext;	/* Teletext minor */
+};
+
+struct vbi_format {
+	__u32	sampling_rate;	/* in Hz */
+	__u32	samples_per_line;
+	__u32	sample_format;	/* VIDEO_PALETTE_RAW only (1 byte) */
+	__s32	start[2];	/* starting line for each frame */
+	__u32	count[2];	/* count of lines for each frame */
+	__u32	flags;
+#define	VBI_UNSYNC	1	/* can distingues between top/bottom field */
+#define	VBI_INTERLACED	2	/* lines are interlaced */
+};
+
+/* video_info is biased towards hardware mpeg encode/decode */
+/* but it could apply generically to any hardware compressor/decompressor */
+struct video_info
+{
+	__u32	frame_count;	/* frames output since decode/encode began */
+	__u32	h_size;		/* current unscaled horizontal size */
+	__u32	v_size;		/* current unscaled veritcal size */
+	__u32	smpte_timecode;	/* current SMPTE timecode (for current GOP) */
+	__u32	picture_type;	/* current picture type */
+	__u32	temporal_reference;	/* current temporal reference */
+	__u8	user_data[256];	/* user data last found in compressed stream */
+	/* user_data[0] contains user data flags, user_data[1] has count */
+};
+
+/* generic structure for setting playback modes */
+struct video_play_mode
+{
+	int	mode;
+	int	p1;
+	int	p2;
+};
+
+/* for loading microcode / fpga programming */
+struct video_code
+{
+	char	loadwhat[16];	/* name or tag of file being passed */
+	int	datasize;
+	__u8	*data;
+};
+
+#define VIDIOCGCAP		_IOR('v',1,struct video_capability)	/* Get capabilities */
+#define VIDIOCGCHAN		_IOWR('v',2,struct video_channel)	/* Get channel info (sources) */
+#define VIDIOCSCHAN		_IOW('v',3,struct video_channel)	/* Set channel 	*/
+#define VIDIOCGTUNER		_IOWR('v',4,struct video_tuner)		/* Get tuner abilities */
+#define VIDIOCSTUNER		_IOW('v',5,struct video_tuner)		/* Tune the tuner for the current channel */
+#define VIDIOCGPICT		_IOR('v',6,struct video_picture)	/* Get picture properties */
+#define VIDIOCSPICT		_IOW('v',7,struct video_picture)	/* Set picture properties */
+#define VIDIOCCAPTURE		_IOW('v',8,int)				/* Start, end capture */
+#define VIDIOCGWIN		_IOR('v',9, struct video_window)	/* Get the video overlay window */
+#define VIDIOCSWIN		_IOW('v',10, struct video_window)	/* Set the video overlay window - passes clip list for hardware smarts , chromakey etc */
+#define VIDIOCGFBUF		_IOR('v',11, struct video_buffer)	/* Get frame buffer */
+#define VIDIOCSFBUF		_IOW('v',12, struct video_buffer)	/* Set frame buffer - root only */
+#define VIDIOCKEY		_IOR('v',13, struct video_key)		/* Video key event - to dev 255 is to all - cuts capture on all DMA windows with this key (0xFFFFFFFF == all) */
+#define VIDIOCGFREQ		_IOR('v',14, unsigned long)		/* Set tuner */
+#define VIDIOCSFREQ		_IOW('v',15, unsigned long)		/* Set tuner */
+#define VIDIOCGAUDIO		_IOR('v',16, struct video_audio)	/* Get audio info */
+#define VIDIOCSAUDIO		_IOW('v',17, struct video_audio)	/* Audio source, mute etc */
+#define VIDIOCSYNC		_IOW('v',18, int)			/* Sync with mmap grabbing */
+#define VIDIOCMCAPTURE		_IOW('v',19, struct video_mmap)		/* Grab frames */
+#define VIDIOCGMBUF		_IOR('v',20, struct video_mbuf)		/* Memory map buffer info */
+#define VIDIOCGUNIT		_IOR('v',21, struct video_unit)		/* Get attached units */
+#define VIDIOCGCAPTURE		_IOR('v',22, struct video_capture)	/* Get subcapture */
+#define VIDIOCSCAPTURE		_IOW('v',23, struct video_capture)	/* Set subcapture */
+#define VIDIOCSPLAYMODE		_IOW('v',24, struct video_play_mode)	/* Set output video mode/feature */
+#define VIDIOCSWRITEMODE	_IOW('v',25, int)			/* Set write mode */
+#define VIDIOCGPLAYINFO		_IOR('v',26, struct video_info)		/* Get current playback info from hardware */
+#define VIDIOCSMICROCODE	_IOW('v',27, struct video_code)		/* Load microcode into hardware */
+#define	VIDIOCGVBIFMT		_IOR('v',28, struct vbi_format)		/* Get VBI information */
+#define	VIDIOCSVBIFMT		_IOW('v',29, struct vbi_format)		/* Set VBI information */
+
+
+#define BASE_VIDIOCPRIVATE	192		/* 192-255 are private */
+
+/* VIDIOCSWRITEMODE */
+#define VID_WRITE_MPEG_AUD		0
+#define VID_WRITE_MPEG_VID		1
+#define VID_WRITE_OSD			2
+#define VID_WRITE_TTX			3
+#define VID_WRITE_CC			4
+#define VID_WRITE_MJPEG			5
+
+/* VIDIOCSPLAYMODE */
+#define VID_PLAY_VID_OUT_MODE		0
+	/* p1: = VIDEO_MODE_PAL, VIDEO_MODE_NTSC, etc ... */
+#define VID_PLAY_GENLOCK		1
+	/* p1: 0 = OFF, 1 = ON */
+	/* p2: GENLOCK FINE DELAY value */
+#define VID_PLAY_NORMAL			2
+#define VID_PLAY_PAUSE			3
+#define VID_PLAY_SINGLE_FRAME		4
+#define VID_PLAY_FAST_FORWARD		5
+#define VID_PLAY_SLOW_MOTION		6
+#define VID_PLAY_IMMEDIATE_NORMAL	7
+#define VID_PLAY_SWITCH_CHANNELS	8
+#define VID_PLAY_FREEZE_FRAME		9
+#define VID_PLAY_STILL_MODE		10
+#define VID_PLAY_MASTER_MODE		11
+	/* p1: see below */
+#define		VID_PLAY_MASTER_NONE	1
+#define		VID_PLAY_MASTER_VIDEO	2
+#define		VID_PLAY_MASTER_AUDIO	3
+#define VID_PLAY_ACTIVE_SCANLINES	12
+	/* p1 = first active; p2 = last active */
+#define VID_PLAY_RESET			13
+#define VID_PLAY_END_MARK		14
+
+#endif /* __LINUX_VIDEODEV_H */
+
+/*
+ * Local variables:
+ * c-basic-offset: 8
+ * End:
+ */
diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c
index a60579b007b0..175736c282ab 100644
--- a/fs/compat_ioctl.c
+++ b/fs/compat_ioctl.c
@@ -42,7 +42,7 @@
 #include <linux/tty.h>
 #include <linux/vt_kern.h>
 #include <linux/fb.h>
-#include <linux/videodev.h>
+#include <linux/videodev2.h>
 #include <linux/netdevice.h>
 #include <linux/raw.h>
 #include <linux/blkdev.h>
diff --git a/include/linux/Kbuild b/include/linux/Kbuild
index 97319a8fc1e0..a354c199ab98 100644
--- a/include/linux/Kbuild
+++ b/include/linux/Kbuild
@@ -367,7 +367,6 @@ header-y += utime.h
 header-y += utsname.h
 header-y += veth.h
 header-y += vhost.h
-header-y += videodev.h
 header-y += videodev2.h
 header-y += virtio_9p.h
 header-y += virtio_balloon.h
diff --git a/include/linux/videodev.h b/include/linux/videodev.h
deleted file mode 100644
index f11efbef1c05..000000000000
--- a/include/linux/videodev.h
+++ /dev/null
@@ -1,318 +0,0 @@
-/*
- *	Video for Linux version 1 - OBSOLETE
- *
- *	Header file for v4l1 drivers and applications, for
- *	Linux kernels 2.2.x or 2.4.x.
- *
- *	Provides header for legacy drivers and applications
- *
- *	See http://linuxtv.org for more info
- *
- */
-#ifndef __LINUX_VIDEODEV_H
-#define __LINUX_VIDEODEV_H
-
-#include <linux/types.h>
-#include <linux/ioctl.h>
-#include <linux/videodev2.h>
-
-#define VID_TYPE_CAPTURE	1	/* Can capture */
-#define VID_TYPE_TUNER		2	/* Can tune */
-#define VID_TYPE_TELETEXT	4	/* Does teletext */
-#define VID_TYPE_OVERLAY	8	/* Overlay onto frame buffer */
-#define VID_TYPE_CHROMAKEY	16	/* Overlay by chromakey */
-#define VID_TYPE_CLIPPING	32	/* Can clip */
-#define VID_TYPE_FRAMERAM	64	/* Uses the frame buffer memory */
-#define VID_TYPE_SCALES		128	/* Scalable */
-#define VID_TYPE_MONOCHROME	256	/* Monochrome only */
-#define VID_TYPE_SUBCAPTURE	512	/* Can capture subareas of the image */
-#define VID_TYPE_MPEG_DECODER	1024	/* Can decode MPEG streams */
-#define VID_TYPE_MPEG_ENCODER	2048	/* Can encode MPEG streams */
-#define VID_TYPE_MJPEG_DECODER	4096	/* Can decode MJPEG streams */
-#define VID_TYPE_MJPEG_ENCODER	8192	/* Can encode MJPEG streams */
-
-struct video_capability
-{
-	char name[32];
-	int type;
-	int channels;	/* Num channels */
-	int audios;	/* Num audio devices */
-	int maxwidth;	/* Supported width */
-	int maxheight;	/* And height */
-	int minwidth;	/* Supported width */
-	int minheight;	/* And height */
-};
-
-
-struct video_channel
-{
-	int channel;
-	char name[32];
-	int tuners;
-	__u32  flags;
-#define VIDEO_VC_TUNER		1	/* Channel has a tuner */
-#define VIDEO_VC_AUDIO		2	/* Channel has audio */
-	__u16  type;
-#define VIDEO_TYPE_TV		1
-#define VIDEO_TYPE_CAMERA	2
-	__u16 norm;			/* Norm set by channel */
-};
-
-struct video_tuner
-{
-	int tuner;
-	char name[32];
-	unsigned long rangelow, rangehigh;	/* Tuner range */
-	__u32 flags;
-#define VIDEO_TUNER_PAL		1
-#define VIDEO_TUNER_NTSC	2
-#define VIDEO_TUNER_SECAM	4
-#define VIDEO_TUNER_LOW		8	/* Uses KHz not MHz */
-#define VIDEO_TUNER_NORM	16	/* Tuner can set norm */
-#define VIDEO_TUNER_STEREO_ON	128	/* Tuner is seeing stereo */
-#define VIDEO_TUNER_RDS_ON      256     /* Tuner is seeing an RDS datastream */
-#define VIDEO_TUNER_MBS_ON      512     /* Tuner is seeing an MBS datastream */
-	__u16 mode;			/* PAL/NTSC/SECAM/OTHER */
-#define VIDEO_MODE_PAL		0
-#define VIDEO_MODE_NTSC		1
-#define VIDEO_MODE_SECAM	2
-#define VIDEO_MODE_AUTO		3
-	__u16 signal;			/* Signal strength 16bit scale */
-};
-
-struct video_picture
-{
-	__u16	brightness;
-	__u16	hue;
-	__u16	colour;
-	__u16	contrast;
-	__u16	whiteness;	/* Black and white only */
-	__u16	depth;		/* Capture depth */
-	__u16   palette;	/* Palette in use */
-#define VIDEO_PALETTE_GREY	1	/* Linear greyscale */
-#define VIDEO_PALETTE_HI240	2	/* High 240 cube (BT848) */
-#define VIDEO_PALETTE_RGB565	3	/* 565 16 bit RGB */
-#define VIDEO_PALETTE_RGB24	4	/* 24bit RGB */
-#define VIDEO_PALETTE_RGB32	5	/* 32bit RGB */
-#define VIDEO_PALETTE_RGB555	6	/* 555 15bit RGB */
-#define VIDEO_PALETTE_YUV422	7	/* YUV422 capture */
-#define VIDEO_PALETTE_YUYV	8
-#define VIDEO_PALETTE_UYVY	9	/* The great thing about standards is ... */
-#define VIDEO_PALETTE_YUV420	10
-#define VIDEO_PALETTE_YUV411	11	/* YUV411 capture */
-#define VIDEO_PALETTE_RAW	12	/* RAW capture (BT848) */
-#define VIDEO_PALETTE_YUV422P	13	/* YUV 4:2:2 Planar */
-#define VIDEO_PALETTE_YUV411P	14	/* YUV 4:1:1 Planar */
-#define VIDEO_PALETTE_YUV420P	15	/* YUV 4:2:0 Planar */
-#define VIDEO_PALETTE_YUV410P	16	/* YUV 4:1:0 Planar */
-#define VIDEO_PALETTE_PLANAR	13	/* start of planar entries */
-#define VIDEO_PALETTE_COMPONENT 7	/* start of component entries */
-};
-
-struct video_audio
-{
-	int	audio;		/* Audio channel */
-	__u16	volume;		/* If settable */
-	__u16	bass, treble;
-	__u32	flags;
-#define VIDEO_AUDIO_MUTE	1
-#define VIDEO_AUDIO_MUTABLE	2
-#define VIDEO_AUDIO_VOLUME	4
-#define VIDEO_AUDIO_BASS	8
-#define VIDEO_AUDIO_TREBLE	16
-#define VIDEO_AUDIO_BALANCE	32
-	char    name[16];
-#define VIDEO_SOUND_MONO	1
-#define VIDEO_SOUND_STEREO	2
-#define VIDEO_SOUND_LANG1	4
-#define VIDEO_SOUND_LANG2	8
-	__u16   mode;
-	__u16	balance;	/* Stereo balance */
-	__u16	step;		/* Step actual volume uses */
-};
-
-struct video_clip
-{
-	__s32	x,y;
-	__s32	width, height;
-	struct	video_clip *next;	/* For user use/driver use only */
-};
-
-struct video_window
-{
-	__u32	x,y;			/* Position of window */
-	__u32	width,height;		/* Its size */
-	__u32	chromakey;
-	__u32	flags;
-	struct	video_clip __user *clips;	/* Set only */
-	int	clipcount;
-#define VIDEO_WINDOW_INTERLACE	1
-#define VIDEO_WINDOW_CHROMAKEY	16	/* Overlay by chromakey */
-#define VIDEO_CLIP_BITMAP	-1
-/* bitmap is 1024x625, a '1' bit represents a clipped pixel */
-#define VIDEO_CLIPMAP_SIZE	(128 * 625)
-};
-
-struct video_capture
-{
-	__u32 	x,y;			/* Offsets into image */
-	__u32	width, height;		/* Area to capture */
-	__u16	decimation;		/* Decimation divider */
-	__u16	flags;			/* Flags for capture */
-#define VIDEO_CAPTURE_ODD		0	/* Temporal */
-#define VIDEO_CAPTURE_EVEN		1
-};
-
-struct video_buffer
-{
-	void	*base;
-	int	height,width;
-	int	depth;
-	int	bytesperline;
-};
-
-struct video_mmap
-{
-	unsigned	int frame;		/* Frame (0 - n) for double buffer */
-	int		height,width;
-	unsigned	int format;		/* should be VIDEO_PALETTE_* */
-};
-
-struct video_key
-{
-	__u8	key[8];
-	__u32	flags;
-};
-
-struct video_mbuf
-{
-	int	size;		/* Total memory to map */
-	int	frames;		/* Frames */
-	int	offsets[VIDEO_MAX_FRAME];
-};
-
-#define 	VIDEO_NO_UNIT	(-1)
-
-struct video_unit
-{
-	int 	video;		/* Video minor */
-	int	vbi;		/* VBI minor */
-	int	radio;		/* Radio minor */
-	int	audio;		/* Audio minor */
-	int	teletext;	/* Teletext minor */
-};
-
-struct vbi_format {
-	__u32	sampling_rate;	/* in Hz */
-	__u32	samples_per_line;
-	__u32	sample_format;	/* VIDEO_PALETTE_RAW only (1 byte) */
-	__s32	start[2];	/* starting line for each frame */
-	__u32	count[2];	/* count of lines for each frame */
-	__u32	flags;
-#define	VBI_UNSYNC	1	/* can distingues between top/bottom field */
-#define	VBI_INTERLACED	2	/* lines are interlaced */
-};
-
-/* video_info is biased towards hardware mpeg encode/decode */
-/* but it could apply generically to any hardware compressor/decompressor */
-struct video_info
-{
-	__u32	frame_count;	/* frames output since decode/encode began */
-	__u32	h_size;		/* current unscaled horizontal size */
-	__u32	v_size;		/* current unscaled veritcal size */
-	__u32	smpte_timecode;	/* current SMPTE timecode (for current GOP) */
-	__u32	picture_type;	/* current picture type */
-	__u32	temporal_reference;	/* current temporal reference */
-	__u8	user_data[256];	/* user data last found in compressed stream */
-	/* user_data[0] contains user data flags, user_data[1] has count */
-};
-
-/* generic structure for setting playback modes */
-struct video_play_mode
-{
-	int	mode;
-	int	p1;
-	int	p2;
-};
-
-/* for loading microcode / fpga programming */
-struct video_code
-{
-	char	loadwhat[16];	/* name or tag of file being passed */
-	int	datasize;
-	__u8	*data;
-};
-
-#define VIDIOCGCAP		_IOR('v',1,struct video_capability)	/* Get capabilities */
-#define VIDIOCGCHAN		_IOWR('v',2,struct video_channel)	/* Get channel info (sources) */
-#define VIDIOCSCHAN		_IOW('v',3,struct video_channel)	/* Set channel 	*/
-#define VIDIOCGTUNER		_IOWR('v',4,struct video_tuner)		/* Get tuner abilities */
-#define VIDIOCSTUNER		_IOW('v',5,struct video_tuner)		/* Tune the tuner for the current channel */
-#define VIDIOCGPICT		_IOR('v',6,struct video_picture)	/* Get picture properties */
-#define VIDIOCSPICT		_IOW('v',7,struct video_picture)	/* Set picture properties */
-#define VIDIOCCAPTURE		_IOW('v',8,int)				/* Start, end capture */
-#define VIDIOCGWIN		_IOR('v',9, struct video_window)	/* Get the video overlay window */
-#define VIDIOCSWIN		_IOW('v',10, struct video_window)	/* Set the video overlay window - passes clip list for hardware smarts , chromakey etc */
-#define VIDIOCGFBUF		_IOR('v',11, struct video_buffer)	/* Get frame buffer */
-#define VIDIOCSFBUF		_IOW('v',12, struct video_buffer)	/* Set frame buffer - root only */
-#define VIDIOCKEY		_IOR('v',13, struct video_key)		/* Video key event - to dev 255 is to all - cuts capture on all DMA windows with this key (0xFFFFFFFF == all) */
-#define VIDIOCGFREQ		_IOR('v',14, unsigned long)		/* Set tuner */
-#define VIDIOCSFREQ		_IOW('v',15, unsigned long)		/* Set tuner */
-#define VIDIOCGAUDIO		_IOR('v',16, struct video_audio)	/* Get audio info */
-#define VIDIOCSAUDIO		_IOW('v',17, struct video_audio)	/* Audio source, mute etc */
-#define VIDIOCSYNC		_IOW('v',18, int)			/* Sync with mmap grabbing */
-#define VIDIOCMCAPTURE		_IOW('v',19, struct video_mmap)		/* Grab frames */
-#define VIDIOCGMBUF		_IOR('v',20, struct video_mbuf)		/* Memory map buffer info */
-#define VIDIOCGUNIT		_IOR('v',21, struct video_unit)		/* Get attached units */
-#define VIDIOCGCAPTURE		_IOR('v',22, struct video_capture)	/* Get subcapture */
-#define VIDIOCSCAPTURE		_IOW('v',23, struct video_capture)	/* Set subcapture */
-#define VIDIOCSPLAYMODE		_IOW('v',24, struct video_play_mode)	/* Set output video mode/feature */
-#define VIDIOCSWRITEMODE	_IOW('v',25, int)			/* Set write mode */
-#define VIDIOCGPLAYINFO		_IOR('v',26, struct video_info)		/* Get current playback info from hardware */
-#define VIDIOCSMICROCODE	_IOW('v',27, struct video_code)		/* Load microcode into hardware */
-#define	VIDIOCGVBIFMT		_IOR('v',28, struct vbi_format)		/* Get VBI information */
-#define	VIDIOCSVBIFMT		_IOW('v',29, struct vbi_format)		/* Set VBI information */
-
-
-#define BASE_VIDIOCPRIVATE	192		/* 192-255 are private */
-
-/* VIDIOCSWRITEMODE */
-#define VID_WRITE_MPEG_AUD		0
-#define VID_WRITE_MPEG_VID		1
-#define VID_WRITE_OSD			2
-#define VID_WRITE_TTX			3
-#define VID_WRITE_CC			4
-#define VID_WRITE_MJPEG			5
-
-/* VIDIOCSPLAYMODE */
-#define VID_PLAY_VID_OUT_MODE		0
-	/* p1: = VIDEO_MODE_PAL, VIDEO_MODE_NTSC, etc ... */
-#define VID_PLAY_GENLOCK		1
-	/* p1: 0 = OFF, 1 = ON */
-	/* p2: GENLOCK FINE DELAY value */
-#define VID_PLAY_NORMAL			2
-#define VID_PLAY_PAUSE			3
-#define VID_PLAY_SINGLE_FRAME		4
-#define VID_PLAY_FAST_FORWARD		5
-#define VID_PLAY_SLOW_MOTION		6
-#define VID_PLAY_IMMEDIATE_NORMAL	7
-#define VID_PLAY_SWITCH_CHANNELS	8
-#define VID_PLAY_FREEZE_FRAME		9
-#define VID_PLAY_STILL_MODE		10
-#define VID_PLAY_MASTER_MODE		11
-	/* p1: see below */
-#define		VID_PLAY_MASTER_NONE	1
-#define		VID_PLAY_MASTER_VIDEO	2
-#define		VID_PLAY_MASTER_AUDIO	3
-#define VID_PLAY_ACTIVE_SCANLINES	12
-	/* p1 = first active; p2 = last active */
-#define VID_PLAY_RESET			13
-#define VID_PLAY_END_MARK		14
-
-#endif /* __LINUX_VIDEODEV_H */
-
-/*
- * Local variables:
- * c-basic-offset: 8
- * End:
- */
diff --git a/include/media/ovcamchip.h b/include/media/ovcamchip.h
deleted file mode 100644
index 05b9569ef1c8..000000000000
--- a/include/media/ovcamchip.h
+++ /dev/null
@@ -1,90 +0,0 @@
-/* OmniVision* camera chip driver API
- *
- * Copyright (c) 1999-2004 Mark McClelland
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 2 of the License, or (at your
- * option) any later version. NO WARRANTY OF ANY KIND is expressed or implied.
- *
- * * OmniVision is a trademark of OmniVision Technologies, Inc. This driver
- * is not sponsored or developed by them.
- */
-
-#ifndef __LINUX_OVCAMCHIP_H
-#define __LINUX_OVCAMCHIP_H
-
-#include <linux/videodev.h>
-#include <media/v4l2-common.h>
-
-/* --------------------------------- */
-/*           ENUMERATIONS            */
-/* --------------------------------- */
-
-/* Controls */
-enum {
-	OVCAMCHIP_CID_CONT,		/* Contrast */
-	OVCAMCHIP_CID_BRIGHT,		/* Brightness */
-	OVCAMCHIP_CID_SAT,		/* Saturation */
-	OVCAMCHIP_CID_HUE,		/* Hue */
-	OVCAMCHIP_CID_EXP,		/* Exposure */
-	OVCAMCHIP_CID_FREQ,		/* Light frequency */
-	OVCAMCHIP_CID_BANDFILT,		/* Banding filter */
-	OVCAMCHIP_CID_AUTOBRIGHT,	/* Auto brightness */
-	OVCAMCHIP_CID_AUTOEXP,		/* Auto exposure */
-	OVCAMCHIP_CID_BACKLIGHT,	/* Back light compensation */
-	OVCAMCHIP_CID_MIRROR,		/* Mirror horizontally */
-};
-
-/* Chip types */
-#define NUM_CC_TYPES	9
-enum {
-	CC_UNKNOWN,
-	CC_OV76BE,
-	CC_OV7610,
-	CC_OV7620,
-	CC_OV7620AE,
-	CC_OV6620,
-	CC_OV6630,
-	CC_OV6630AE,
-	CC_OV6630AF,
-};
-
-/* --------------------------------- */
-/*           I2C ADDRESSES           */
-/* --------------------------------- */
-
-#define OV7xx0_SID   (0x42 >> 1)
-#define OV6xx0_SID   (0xC0 >> 1)
-
-/* --------------------------------- */
-/*                API                */
-/* --------------------------------- */
-
-struct ovcamchip_control {
-	__u32 id;
-	__s32 value;
-};
-
-struct ovcamchip_window {
-	int x;
-	int y;
-	int width;
-	int height;
-	int format;
-	int quarter;		/* Scale width and height down 2x */
-
-	/* This stuff will be removed eventually */
-	int clockdiv;		/* Clock divisor setting */
-};
-
-/* Commands */
-#define OVCAMCHIP_CMD_Q_SUBTYPE     _IOR  (0x88, 0x00, int)
-#define OVCAMCHIP_CMD_INITIALIZE    _IOW  (0x88, 0x01, int)
-/* You must call OVCAMCHIP_CMD_INITIALIZE before any of commands below! */
-#define OVCAMCHIP_CMD_S_CTRL        _IOW  (0x88, 0x02, struct ovcamchip_control)
-#define OVCAMCHIP_CMD_G_CTRL        _IOWR (0x88, 0x03, struct ovcamchip_control)
-#define OVCAMCHIP_CMD_S_MODE        _IOW  (0x88, 0x04, struct ovcamchip_window)
-#define OVCAMCHIP_MAX_CMD           _IO   (0x88, 0x3f)
-
-#endif
-- 
cgit v1.2.3-71-gd317


From 09c730a488c32c2cadb31cdb8dcc4df528441197 Mon Sep 17 00:00:00 2001
From: Sundar Iyer <sundar.iyer@stericsson.com>
Date: Tue, 21 Dec 2010 15:53:31 +0530
Subject: input/tc3589x: add tc3589x keypad support

Add support for the keypad controller module found on the
TC3589X devices. This driver default adds the support for
TC35893 device.

Signed-off-by: Sundar Iyer <sundar.iyer@stericsson.com>
Acked-by: Dmitry Torokhov <dtor@mail.ru>
[Some minor fixups for compilation]
Signed-off-by: Linus Walleij <linus.walleij@stericsson.com>
---
 drivers/input/keyboard/Kconfig          |  10 +
 drivers/input/keyboard/Makefile         |   1 +
 drivers/input/keyboard/tc3589x-keypad.c | 472 ++++++++++++++++++++++++++++++++
 drivers/mfd/tc3589x.c                   |  28 +-
 include/linux/mfd/tc3589x.h             |  52 ++++
 5 files changed, 562 insertions(+), 1 deletion(-)
 create mode 100644 drivers/input/keyboard/tc3589x-keypad.c

(limited to 'include/linux')

diff --git a/drivers/input/keyboard/Kconfig b/drivers/input/keyboard/Kconfig
index b8c51b9781db..85af3c3f7bcb 100644
--- a/drivers/input/keyboard/Kconfig
+++ b/drivers/input/keyboard/Kconfig
@@ -443,6 +443,16 @@ config KEYBOARD_OMAP4
 	  To compile this driver as a module, choose M here: the
 	  module will be called omap4-keypad.
 
+config KEYBOARD_TC3589X
+	tristate "TC3589X Keypad support"
+	depends on MFD_TC3589X
+	help
+	  Say Y here if you want to use the keypad controller on
+	  TC35892/3 I/O expander.
+
+	  To compile this driver as a module, choose M here: the
+	  module will be called tc3589x-keypad.
+
 config KEYBOARD_TNETV107X
 	tristate "TI TNETV107X keypad support"
 	depends on ARCH_DAVINCI_TNETV107X
diff --git a/drivers/input/keyboard/Makefile b/drivers/input/keyboard/Makefile
index a34452e8ebe2..4411c70db3b5 100644
--- a/drivers/input/keyboard/Makefile
+++ b/drivers/input/keyboard/Makefile
@@ -40,6 +40,7 @@ obj-$(CONFIG_KEYBOARD_SH_KEYSC)		+= sh_keysc.o
 obj-$(CONFIG_KEYBOARD_STMPE)		+= stmpe-keypad.o
 obj-$(CONFIG_KEYBOARD_STOWAWAY)		+= stowaway.o
 obj-$(CONFIG_KEYBOARD_SUNKBD)		+= sunkbd.o
+obj-$(CONFIG_KEYBOARD_TC3589X)		+= tc3589x-keypad.o
 obj-$(CONFIG_KEYBOARD_TNETV107X)	+= tnetv107x-keypad.o
 obj-$(CONFIG_KEYBOARD_TWL4030)		+= twl4030_keypad.o
 obj-$(CONFIG_KEYBOARD_XTKBD)		+= xtkbd.o
diff --git a/drivers/input/keyboard/tc3589x-keypad.c b/drivers/input/keyboard/tc3589x-keypad.c
new file mode 100644
index 000000000000..69dc0cb20a00
--- /dev/null
+++ b/drivers/input/keyboard/tc3589x-keypad.c
@@ -0,0 +1,472 @@
+/*
+ * Copyright (C) ST-Ericsson SA 2010
+ *
+ * Author: Jayeeta Banerjee <jayeeta.banerjee@stericsson.com>
+ * Author: Sundar Iyer <sundar.iyer@stericsson.com>
+ *
+ * License Terms: GNU General Public License, version 2
+ *
+ * TC35893 MFD Keypad Controller driver
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/input.h>
+#include <linux/platform_device.h>
+#include <linux/input/matrix_keypad.h>
+#include <linux/i2c.h>
+#include <linux/slab.h>
+#include <linux/mfd/tc3589x.h>
+
+/* Maximum supported keypad matrix row/columns size */
+#define TC3589x_MAX_KPROW               8
+#define TC3589x_MAX_KPCOL               12
+
+/* keypad related Constants */
+#define TC3589x_MAX_DEBOUNCE_SETTLE     0xFF
+#define DEDICATED_KEY_VAL		0xFF
+
+/* Pull up/down masks */
+#define TC3589x_NO_PULL_MASK		0x0
+#define TC3589x_PULL_DOWN_MASK		0x1
+#define TC3589x_PULL_UP_MASK		0x2
+#define TC3589x_PULLUP_ALL_MASK		0xAA
+#define TC3589x_IO_PULL_VAL(index, mask)	((mask)<<((index)%4)*2))
+
+/* Bit masks for IOCFG register */
+#define IOCFG_BALLCFG		0x01
+#define IOCFG_IG		0x08
+
+#define KP_EVCODE_COL_MASK	0x0F
+#define KP_EVCODE_ROW_MASK	0x70
+#define KP_RELEASE_EVT_MASK	0x80
+
+#define KP_ROW_SHIFT		4
+
+#define KP_NO_VALID_KEY_MASK	0x7F
+
+/* bit masks for RESTCTRL register */
+#define TC3589x_KBDRST		0x2
+#define TC3589x_IRQRST		0x10
+#define TC3589x_RESET_ALL	0x1B
+
+/* KBDMFS register bit mask */
+#define TC3589x_KBDMFS_EN	0x1
+
+/* CLKEN register bitmask */
+#define KPD_CLK_EN		0x1
+
+/* RSTINTCLR register bit mask */
+#define IRQ_CLEAR		0x1
+
+/* bit masks for keyboard interrupts*/
+#define TC3589x_EVT_LOSS_INT	0x8
+#define TC3589x_EVT_INT		0x4
+#define TC3589x_KBD_LOSS_INT	0x2
+#define TC3589x_KBD_INT		0x1
+
+/* bit masks for keyboard interrupt clear*/
+#define TC3589x_EVT_INT_CLR	0x2
+#define TC3589x_KBD_INT_CLR	0x1
+
+#define TC3589x_KBD_KEYMAP_SIZE     64
+
+/**
+ * struct tc_keypad - data structure used by keypad driver
+ * @input:      pointer to input device object
+ * @board:      keypad platform device
+ * @krow:	number of rows
+ * @kcol:	number of coloumns
+ * @keymap:     matrix scan code table for keycodes
+ */
+struct tc_keypad {
+	struct tc3589x *tc3589x;
+	struct input_dev *input;
+	const struct tc3589x_keypad_platform_data *board;
+	unsigned int krow;
+	unsigned int kcol;
+	unsigned short keymap[TC3589x_KBD_KEYMAP_SIZE];
+	bool keypad_stopped;
+};
+
+static int __devinit tc3589x_keypad_init_key_hardware(struct tc_keypad *keypad)
+{
+	int ret;
+	struct tc3589x *tc3589x = keypad->tc3589x;
+	u8 settle_time = keypad->board->settle_time;
+	u8 dbounce_period = keypad->board->debounce_period;
+	u8 rows = keypad->board->krow & 0xf;	/* mask out the nibble */
+	u8 column = keypad->board->kcol & 0xf;	/* mask out the nibble */
+
+	/* validate platform configurations */
+	if (keypad->board->kcol > TC3589x_MAX_KPCOL ||
+	    keypad->board->krow > TC3589x_MAX_KPROW ||
+	    keypad->board->debounce_period > TC3589x_MAX_DEBOUNCE_SETTLE ||
+	    keypad->board->settle_time > TC3589x_MAX_DEBOUNCE_SETTLE)
+		return -EINVAL;
+
+	/* configure KBDSIZE 4 LSbits for cols and 4 MSbits for rows */
+	ret = tc3589x_reg_write(tc3589x, TC3589x_KBDSIZE,
+			(rows << KP_ROW_SHIFT) | column);
+	if (ret < 0)
+		return ret;
+
+	/* configure dedicated key config, no dedicated key selected */
+	ret = tc3589x_reg_write(tc3589x, TC3589x_KBCFG_LSB, DEDICATED_KEY_VAL);
+	if (ret < 0)
+		return ret;
+
+	ret = tc3589x_reg_write(tc3589x, TC3589x_KBCFG_MSB, DEDICATED_KEY_VAL);
+	if (ret < 0)
+		return ret;
+
+	/* Configure settle time */
+	ret = tc3589x_reg_write(tc3589x, TC3589x_KBDSETTLE_REG, settle_time);
+	if (ret < 0)
+		return ret;
+
+	/* Configure debounce time */
+	ret = tc3589x_reg_write(tc3589x, TC3589x_KBDBOUNCE, dbounce_period);
+	if (ret < 0)
+		return ret;
+
+	/* Start of initialise keypad GPIOs */
+	ret = tc3589x_set_bits(tc3589x, TC3589x_IOCFG, 0x0, IOCFG_IG);
+	if (ret < 0)
+		return ret;
+
+	/* Configure pull-up resistors for all row GPIOs */
+	ret = tc3589x_reg_write(tc3589x, TC3589x_IOPULLCFG0_LSB,
+					TC3589x_PULLUP_ALL_MASK);
+	if (ret < 0)
+		return ret;
+
+	ret = tc3589x_reg_write(tc3589x, TC3589x_IOPULLCFG0_MSB,
+					TC3589x_PULLUP_ALL_MASK);
+	if (ret < 0)
+		return ret;
+
+	/* Configure pull-up resistors for all column GPIOs */
+	ret = tc3589x_reg_write(tc3589x, TC3589x_IOPULLCFG1_LSB,
+			TC3589x_PULLUP_ALL_MASK);
+	if (ret < 0)
+		return ret;
+
+	ret = tc3589x_reg_write(tc3589x, TC3589x_IOPULLCFG1_MSB,
+			TC3589x_PULLUP_ALL_MASK);
+	if (ret < 0)
+		return ret;
+
+	ret = tc3589x_reg_write(tc3589x, TC3589x_IOPULLCFG2_LSB,
+			TC3589x_PULLUP_ALL_MASK);
+
+	return ret;
+}
+
+#define TC35893_DATA_REGS		4
+#define TC35893_KEYCODE_FIFO_EMPTY	0x7f
+#define TC35893_KEYCODE_FIFO_CLEAR	0xff
+#define TC35893_KEYPAD_ROW_SHIFT	0x3
+
+static irqreturn_t tc3589x_keypad_irq(int irq, void *dev)
+{
+	struct tc_keypad *keypad = dev;
+	struct tc3589x *tc3589x = keypad->tc3589x;
+	u8 i, row_index, col_index, kbd_code, up;
+	u8 code;
+
+	for (i = 0; i < TC35893_DATA_REGS * 2; i++) {
+		kbd_code = tc3589x_reg_read(tc3589x, TC3589x_EVTCODE_FIFO);
+
+		/* loop till fifo is empty and no more keys are pressed */
+		if (kbd_code == TC35893_KEYCODE_FIFO_EMPTY ||
+				kbd_code == TC35893_KEYCODE_FIFO_CLEAR)
+			continue;
+
+		/* valid key is found */
+		col_index = kbd_code & KP_EVCODE_COL_MASK;
+		row_index = (kbd_code & KP_EVCODE_ROW_MASK) >> KP_ROW_SHIFT;
+		code = MATRIX_SCAN_CODE(row_index, col_index,
+						TC35893_KEYPAD_ROW_SHIFT);
+		up = kbd_code & KP_RELEASE_EVT_MASK;
+
+		input_event(keypad->input, EV_MSC, MSC_SCAN, code);
+		input_report_key(keypad->input, keypad->keymap[code], !up);
+		input_sync(keypad->input);
+	}
+
+	/* clear IRQ */
+	tc3589x_set_bits(tc3589x, TC3589x_KBDIC,
+			0x0, TC3589x_EVT_INT_CLR | TC3589x_KBD_INT_CLR);
+	/* enable IRQ */
+	tc3589x_set_bits(tc3589x, TC3589x_KBDMSK,
+			0x0, TC3589x_EVT_LOSS_INT | TC3589x_EVT_INT);
+
+	return IRQ_HANDLED;
+}
+
+static int tc3589x_keypad_enable(struct tc_keypad *keypad)
+{
+	struct tc3589x *tc3589x = keypad->tc3589x;
+	int ret;
+
+	/* pull the keypad module out of reset */
+	ret = tc3589x_set_bits(tc3589x, TC3589x_RSTCTRL, TC3589x_KBDRST, 0x0);
+	if (ret < 0)
+		return ret;
+
+	/* configure KBDMFS */
+	ret = tc3589x_set_bits(tc3589x, TC3589x_KBDMFS, 0x0, TC3589x_KBDMFS_EN);
+	if (ret < 0)
+		return ret;
+
+	/* enable the keypad clock */
+	ret = tc3589x_set_bits(tc3589x, TC3589x_CLKEN, 0x0, KPD_CLK_EN);
+	if (ret < 0)
+		return ret;
+
+	/* clear pending IRQs */
+	ret =  tc3589x_set_bits(tc3589x, TC3589x_RSTINTCLR, 0x0, 0x1);
+	if (ret < 0)
+		return ret;
+
+	/* enable the IRQs */
+	ret = tc3589x_set_bits(tc3589x, TC3589x_KBDMSK, 0x0,
+					TC3589x_EVT_LOSS_INT | TC3589x_EVT_INT);
+	if (ret < 0)
+		return ret;
+
+	keypad->keypad_stopped = false;
+
+	return ret;
+}
+
+static int tc3589x_keypad_disable(struct tc_keypad *keypad)
+{
+	struct tc3589x *tc3589x = keypad->tc3589x;
+	int ret;
+
+	/* clear IRQ */
+	ret = tc3589x_set_bits(tc3589x, TC3589x_KBDIC,
+			0x0, TC3589x_EVT_INT_CLR | TC3589x_KBD_INT_CLR);
+	if (ret < 0)
+		return ret;
+
+	/* disable all interrupts */
+	ret = tc3589x_set_bits(tc3589x, TC3589x_KBDMSK,
+			~(TC3589x_EVT_LOSS_INT | TC3589x_EVT_INT), 0x0);
+	if (ret < 0)
+		return ret;
+
+	/* disable the keypad module */
+	ret = tc3589x_set_bits(tc3589x, TC3589x_CLKEN, 0x1, 0x0);
+	if (ret < 0)
+		return ret;
+
+	/* put the keypad module into reset */
+	ret = tc3589x_set_bits(tc3589x, TC3589x_RSTCTRL, TC3589x_KBDRST, 0x1);
+
+	keypad->keypad_stopped = true;
+
+	return ret;
+}
+
+static int tc3589x_keypad_open(struct input_dev *input)
+{
+	int error;
+	struct tc_keypad *keypad = input_get_drvdata(input);
+
+	/* enable the keypad module */
+	error = tc3589x_keypad_enable(keypad);
+	if (error < 0) {
+		dev_err(&input->dev, "failed to enable keypad module\n");
+		return error;
+	}
+
+	error = tc3589x_keypad_init_key_hardware(keypad);
+	if (error < 0) {
+		dev_err(&input->dev, "failed to configure keypad module\n");
+		return error;
+	}
+
+	return 0;
+}
+
+static void tc3589x_keypad_close(struct input_dev *input)
+{
+	struct tc_keypad *keypad = input_get_drvdata(input);
+
+	/* disable the keypad module */
+	tc3589x_keypad_disable(keypad);
+}
+
+static int __devinit tc3589x_keypad_probe(struct platform_device *pdev)
+{
+	struct tc3589x *tc3589x = dev_get_drvdata(pdev->dev.parent);
+	struct tc_keypad *keypad;
+	struct input_dev *input;
+	const struct tc3589x_keypad_platform_data *plat;
+	int error, irq;
+
+	plat = tc3589x->pdata->keypad;
+	if (!plat) {
+		dev_err(&pdev->dev, "invalid keypad platform data\n");
+		return -EINVAL;
+	}
+
+	irq = platform_get_irq(pdev, 0);
+	if (irq < 0)
+		return irq;
+
+	keypad = kzalloc(sizeof(struct tc_keypad), GFP_KERNEL);
+	input = input_allocate_device();
+	if (!keypad || !input) {
+		dev_err(&pdev->dev, "failed to allocate keypad memory\n");
+		error = -ENOMEM;
+		goto err_free_mem;
+	}
+
+	keypad->board = plat;
+	keypad->input = input;
+	keypad->tc3589x = tc3589x;
+
+	input->id.bustype = BUS_I2C;
+	input->name = pdev->name;
+	input->dev.parent = &pdev->dev;
+
+	input->keycode = keypad->keymap;
+	input->keycodesize = sizeof(keypad->keymap[0]);
+	input->keycodemax = ARRAY_SIZE(keypad->keymap);
+
+	input->open = tc3589x_keypad_open;
+	input->close = tc3589x_keypad_close;
+
+	input_set_drvdata(input, keypad);
+
+	input_set_capability(input, EV_MSC, MSC_SCAN);
+
+	__set_bit(EV_KEY, input->evbit);
+	if (!plat->no_autorepeat)
+		__set_bit(EV_REP, input->evbit);
+
+	matrix_keypad_build_keymap(plat->keymap_data, 0x3,
+			input->keycode, input->keybit);
+
+	error = request_threaded_irq(irq, NULL,
+			tc3589x_keypad_irq, plat->irqtype,
+			"tc3589x-keypad", keypad);
+	if (error < 0) {
+		dev_err(&pdev->dev,
+				"Could not allocate irq %d,error %d\n",
+				irq, error);
+		goto err_free_mem;
+	}
+
+	error = input_register_device(input);
+	if (error) {
+		dev_err(&pdev->dev, "Could not register input device\n");
+		goto err_free_irq;
+	}
+
+	/* let platform decide if keypad is a wakeup source or not */
+	device_init_wakeup(&pdev->dev, plat->enable_wakeup);
+	device_set_wakeup_capable(&pdev->dev, plat->enable_wakeup);
+
+	platform_set_drvdata(pdev, keypad);
+
+	return 0;
+
+err_free_irq:
+	free_irq(irq, keypad);
+err_free_mem:
+	input_free_device(input);
+	kfree(keypad);
+	return error;
+}
+
+static int __devexit tc3589x_keypad_remove(struct platform_device *pdev)
+{
+	struct tc_keypad *keypad = platform_get_drvdata(pdev);
+	int irq = platform_get_irq(pdev, 0);
+
+	if (!keypad->keypad_stopped)
+		tc3589x_keypad_disable(keypad);
+
+	free_irq(irq, keypad);
+
+	input_unregister_device(keypad->input);
+
+	kfree(keypad);
+
+	return 0;
+}
+
+#ifdef CONFIG_PM
+static int tc3589x_keypad_suspend(struct device *dev)
+{
+	struct platform_device *pdev = to_platform_device(dev);
+	struct tc_keypad *keypad = platform_get_drvdata(pdev);
+	int irq = platform_get_irq(pdev, 0);
+
+	/* keypad is already off; we do nothing */
+	if (keypad->keypad_stopped)
+		return 0;
+
+	/* if device is not a wakeup source, disable it for powersave */
+	if (!device_may_wakeup(&pdev->dev))
+		tc3589x_keypad_disable(keypad);
+	else
+		enable_irq_wake(irq);
+
+	return 0;
+}
+
+static int tc3589x_keypad_resume(struct device *dev)
+{
+	struct platform_device *pdev = to_platform_device(dev);
+	struct tc_keypad *keypad = platform_get_drvdata(pdev);
+	int irq = platform_get_irq(pdev, 0);
+
+	if (!keypad->keypad_stopped)
+		return 0;
+
+	/* enable the device to resume normal operations */
+	if (!device_may_wakeup(&pdev->dev))
+		tc3589x_keypad_enable(keypad);
+	else
+		disable_irq_wake(irq);
+
+	return 0;
+}
+
+static const SIMPLE_DEV_PM_OPS(tc3589x_keypad_dev_pm_ops,
+			       tc3589x_keypad_suspend, tc3589x_keypad_resume);
+#endif
+
+static struct platform_driver tc3589x_keypad_driver = {
+	.driver.name  = "tc3589x-keypad",
+	.driver.owner = THIS_MODULE,
+#ifdef CONFIG_PM
+	.driver.pm = &tc3589x_keypad_dev_pm_ops,
+#endif
+	.probe = tc3589x_keypad_probe,
+	.remove = __devexit_p(tc3589x_keypad_remove),
+};
+
+static int __init tc3589x_keypad_init(void)
+{
+	return platform_driver_register(&tc3589x_keypad_driver);
+}
+module_init(tc3589x_keypad_init);
+
+static void __exit tc3589x_keypad_exit(void)
+{
+	return platform_driver_unregister(&tc3589x_keypad_driver);
+}
+module_exit(tc3589x_keypad_exit);
+
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Jayeeta Banerjee/Sundar Iyer");
+MODULE_DESCRIPTION("TC35893 Keypad Driver");
+MODULE_ALIAS("platform:tc3589x-keypad")
diff --git a/drivers/mfd/tc3589x.c b/drivers/mfd/tc3589x.c
index 112efd3c4940..729dbeed2ce0 100644
--- a/drivers/mfd/tc3589x.c
+++ b/drivers/mfd/tc3589x.c
@@ -132,6 +132,14 @@ static struct resource gpio_resources[] = {
 	},
 };
 
+static struct resource keypad_resources[] = {
+	{
+		.start  = TC3589x_INT_KBDIRQ,
+		.end    = TC3589x_INT_KBDIRQ,
+		.flags  = IORESOURCE_IRQ,
+	},
+};
+
 static struct mfd_cell tc3589x_dev_gpio[] = {
 	{
 		.name		= "tc3589x-gpio",
@@ -140,6 +148,14 @@ static struct mfd_cell tc3589x_dev_gpio[] = {
 	},
 };
 
+static struct mfd_cell tc3589x_dev_keypad[] = {
+	{
+		.name           = "tc3589x-keypad",
+		.num_resources  = ARRAY_SIZE(keypad_resources),
+		.resources      = &keypad_resources[0],
+	},
+};
+
 static irqreturn_t tc3589x_irq(int irq, void *data)
 {
 	struct tc3589x *tc3589x = data;
@@ -255,8 +271,18 @@ static int __devinit tc3589x_device_init(struct tc3589x *tc3589x)
 		dev_info(tc3589x->dev, "added gpio block\n");
 	}
 
-	return ret;
+	if (blocks & TC3589x_BLOCK_KEYPAD) {
+		ret = mfd_add_devices(tc3589x->dev, -1, tc3589x_dev_keypad,
+				ARRAY_SIZE(tc3589x_dev_keypad), NULL,
+				tc3589x->irq_base);
+		if (ret) {
+			dev_err(tc3589x->dev, "failed to keypad child\n");
+			return ret;
+		}
+		dev_info(tc3589x->dev, "added keypad block\n");
+	}
 
+	return ret;
 }
 
 static int __devinit tc3589x_probe(struct i2c_client *i2c,
diff --git a/include/linux/mfd/tc3589x.h b/include/linux/mfd/tc3589x.h
index da00958b12d9..16c76e124f9c 100644
--- a/include/linux/mfd/tc3589x.h
+++ b/include/linux/mfd/tc3589x.h
@@ -20,6 +20,17 @@ enum tx3589x_block {
 #define TC3589x_RSTCTRL_KBDRST	(1 << 1)
 #define TC3589x_RSTCTRL_GPIRST	(1 << 0)
 
+/* Keyboard Configuration Registers */
+#define TC3589x_KBDSETTLE_REG   0x01
+#define TC3589x_KBDBOUNCE       0x02
+#define TC3589x_KBDSIZE         0x03
+#define TC3589x_KBCFG_LSB       0x04
+#define TC3589x_KBCFG_MSB       0x05
+#define TC3589x_KBDIC           0x08
+#define TC3589x_KBDMSK          0x09
+#define TC3589x_EVTCODE_FIFO    0x10
+#define TC3589x_KBDMFS		0x8F
+
 #define TC3589x_IRQST		0x91
 
 #define TC3589x_MANFCODE_MAGIC	0x03
@@ -35,6 +46,14 @@ enum tx3589x_block {
 #define TC3589x_EXTRSTN		0x83
 #define TC3589x_RSTINTCLR	0x84
 
+/* Pull up/down configuration registers */
+#define TC3589x_IOCFG           0xA7
+#define TC3589x_IOPULLCFG0_LSB  0xAA
+#define TC3589x_IOPULLCFG0_MSB  0xAB
+#define TC3589x_IOPULLCFG1_LSB  0xAC
+#define TC3589x_IOPULLCFG1_MSB  0xAD
+#define TC3589x_IOPULLCFG2_LSB  0xAE
+
 #define TC3589x_GPIOIS0		0xC9
 #define TC3589x_GPIOIS1		0xCA
 #define TC3589x_GPIOIS2		0xCB
@@ -112,6 +131,37 @@ extern int tc3589x_block_write(struct tc3589x *tc3589x, u8 reg, u8 length,
 			       const u8 *values);
 extern int tc3589x_set_bits(struct tc3589x *tc3589x, u8 reg, u8 mask, u8 val);
 
+/*
+ * Keypad related platform specific constants
+ * These values may be modified for fine tuning
+ */
+#define TC_KPD_ROWS             0x8
+#define TC_KPD_COLUMNS          0x8
+#define TC_KPD_DEBOUNCE_PERIOD  0xA3
+#define TC_KPD_SETTLE_TIME      0xA3
+
+/**
+ * struct tc35893_platform_data - data structure for platform specific data
+ * @keymap_data:        matrix scan code table for keycodes
+ * @krow:               mask for available rows, value is 0xFF
+ * @kcol:               mask for available columns, value is 0xFF
+ * @debounce_period:    platform specific debounce time
+ * @settle_time:        platform specific settle down time
+ * @irqtype:            type of interrupt, falling or rising edge
+ * @enable_wakeup:      specifies if keypad event can wake up system from sleep
+ * @no_autorepeat:      flag for auto repetition
+ */
+struct tc3589x_keypad_platform_data {
+	const struct matrix_keymap_data *keymap_data;
+	u8                      krow;
+	u8                      kcol;
+	u8                      debounce_period;
+	u8                      settle_time;
+	unsigned long           irqtype;
+	bool                    enable_wakeup;
+	bool                    no_autorepeat;
+};
+
 /**
  * struct tc3589x_gpio_platform_data - TC3589x GPIO platform data
  * @gpio_base: first gpio number assigned to TC3589x.  A maximum of
@@ -130,11 +180,13 @@ struct tc3589x_gpio_platform_data {
  * @block: bitmask of blocks to enable (use TC3589x_BLOCK_*)
  * @irq_base: base IRQ number.  %TC3589x_NR_IRQS irqs will be used.
  * @gpio: GPIO-specific platform data
+ * @keypad: keypad-specific platform data
  */
 struct tc3589x_platform_data {
 	unsigned int block;
 	int irq_base;
 	struct tc3589x_gpio_platform_data *gpio;
+	const struct tc3589x_keypad_platform_data *keypad;
 };
 
 #define TC3589x_NR_GPIOS	24
-- 
cgit v1.2.3-71-gd317


From 9706a36e35c4ce04f28a62cfe1205b4e3b0dd13c Mon Sep 17 00:00:00 2001
From: Stephen Neuendorffer <stephen.neuendorffer@xilinx.com>
Date: Thu, 18 Nov 2010 15:54:59 -0800
Subject: of/flattree: Add non-boottime device tree functions

In preparation for providing run-time handling of device trees, factor
out some of the basic functions so that they take an arbitrary blob,
rather than relying on the single boot-time tree.

V2:
- functions have of_fdt_* names
- removed find_flat_dt_string
- blob argument is first

Signed-off-by: Stephen Neuendorffer <stephen.neuendorffer@xilinx.com>
Signed-off-by: Grant Likely <grant.likely@secretlab.ca>
---
 drivers/of/fdt.c       | 133 +++++++++++++++++++++++++++++--------------------
 include/linux/of_fdt.h |  11 ++++
 2 files changed, 90 insertions(+), 54 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/of/fdt.c b/drivers/of/fdt.c
index 2ebacf14e7de..10eab21076ea 100644
--- a/drivers/of/fdt.c
+++ b/drivers/of/fdt.c
@@ -22,6 +22,82 @@
 
 #include <asm/page.h>
 
+char *of_fdt_get_string(struct boot_param_header *blob, u32 offset)
+{
+	return ((char *)blob) +
+		be32_to_cpu(blob->off_dt_strings) + offset;
+}
+
+/**
+ * of_fdt_get_property - Given a node in the given flat blob, return
+ * the property ptr
+ */
+void *of_fdt_get_property(struct boot_param_header *blob,
+		       unsigned long node, const char *name,
+		       unsigned long *size)
+{
+	unsigned long p = node;
+
+	do {
+		u32 tag = be32_to_cpup((__be32 *)p);
+		u32 sz, noff;
+		const char *nstr;
+
+		p += 4;
+		if (tag == OF_DT_NOP)
+			continue;
+		if (tag != OF_DT_PROP)
+			return NULL;
+
+		sz = be32_to_cpup((__be32 *)p);
+		noff = be32_to_cpup((__be32 *)(p + 4));
+		p += 8;
+		if (be32_to_cpu(blob->version) < 0x10)
+			p = ALIGN(p, sz >= 8 ? 8 : 4);
+
+		nstr = of_fdt_get_string(blob, noff);
+		if (nstr == NULL) {
+			pr_warning("Can't find property index name !\n");
+			return NULL;
+		}
+		if (strcmp(name, nstr) == 0) {
+			if (size)
+				*size = sz;
+			return (void *)p;
+		}
+		p += sz;
+		p = ALIGN(p, 4);
+	} while (1);
+}
+
+/**
+ * of_fdt_is_compatible - Return true if given node from the given blob has
+ * compat in its compatible list
+ * @blob: A device tree blob
+ * @node: node to test
+ * @compat: compatible string to compare with compatible list.
+ */
+int of_fdt_is_compatible(struct boot_param_header *blob,
+		      unsigned long node, const char *compat)
+{
+	const char *cp;
+	unsigned long cplen, l;
+
+	cp = of_fdt_get_property(blob, node, "compatible", &cplen);
+	if (cp == NULL)
+		return 0;
+	while (cplen > 0) {
+		if (of_compat_cmp(cp, compat, strlen(compat)) == 0)
+			return 1;
+		l = strlen(cp) + 1;
+		cp += l;
+		cplen -= l;
+	}
+
+	return 0;
+}
+
+/* Everything below here references initial_boot_params directly. */
 int __initdata dt_root_addr_cells;
 int __initdata dt_root_size_cells;
 
@@ -29,12 +105,6 @@ struct boot_param_header *initial_boot_params;
 
 #ifdef CONFIG_OF_EARLY_FLATTREE
 
-char *find_flat_dt_string(u32 offset)
-{
-	return ((char *)initial_boot_params) +
-		be32_to_cpu(initial_boot_params->off_dt_strings) + offset;
-}
-
 /**
  * of_scan_flat_dt - scan flattened tree blob and call callback on each.
  * @it: callback function
@@ -123,38 +193,7 @@ unsigned long __init of_get_flat_dt_root(void)
 void *__init of_get_flat_dt_prop(unsigned long node, const char *name,
 				 unsigned long *size)
 {
-	unsigned long p = node;
-
-	do {
-		u32 tag = be32_to_cpup((__be32 *)p);
-		u32 sz, noff;
-		const char *nstr;
-
-		p += 4;
-		if (tag == OF_DT_NOP)
-			continue;
-		if (tag != OF_DT_PROP)
-			return NULL;
-
-		sz = be32_to_cpup((__be32 *)p);
-		noff = be32_to_cpup((__be32 *)(p + 4));
-		p += 8;
-		if (be32_to_cpu(initial_boot_params->version) < 0x10)
-			p = ALIGN(p, sz >= 8 ? 8 : 4);
-
-		nstr = find_flat_dt_string(noff);
-		if (nstr == NULL) {
-			pr_warning("Can't find property index name !\n");
-			return NULL;
-		}
-		if (strcmp(name, nstr) == 0) {
-			if (size)
-				*size = sz;
-			return (void *)p;
-		}
-		p += sz;
-		p = ALIGN(p, 4);
-	} while (1);
+	return of_fdt_get_property(initial_boot_params, node, name, size);
 }
 
 /**
@@ -164,21 +203,7 @@ void *__init of_get_flat_dt_prop(unsigned long node, const char *name,
  */
 int __init of_flat_dt_is_compatible(unsigned long node, const char *compat)
 {
-	const char *cp;
-	unsigned long cplen, l;
-
-	cp = of_get_flat_dt_prop(node, "compatible", &cplen);
-	if (cp == NULL)
-		return 0;
-	while (cplen > 0) {
-		if (of_compat_cmp(cp, compat, strlen(compat)) == 0)
-			return 1;
-		l = strlen(cp) + 1;
-		cp += l;
-		cplen -= l;
-	}
-
-	return 0;
+	return of_fdt_is_compatible(initial_boot_params, node, compat);
 }
 
 static void *__init unflatten_dt_alloc(unsigned long *mem, unsigned long size,
@@ -303,7 +328,7 @@ unsigned long __init unflatten_dt_node(unsigned long mem,
 		if (be32_to_cpu(initial_boot_params->version) < 0x10)
 			*p = ALIGN(*p, sz >= 8 ? 8 : 4);
 
-		pname = find_flat_dt_string(noff);
+		pname = of_fdt_get_string(initial_boot_params, noff);
 		if (pname == NULL) {
 			pr_info("Can't find property name in list !\n");
 			break;
diff --git a/include/linux/of_fdt.h b/include/linux/of_fdt.h
index 7bbf5b328438..70c5b736f0a3 100644
--- a/include/linux/of_fdt.h
+++ b/include/linux/of_fdt.h
@@ -58,6 +58,17 @@ struct boot_param_header {
 };
 
 #if defined(CONFIG_OF_FLATTREE)
+
+/* For scanning an arbitrary device-tree at any time */
+extern char *of_fdt_get_string(struct boot_param_header *blob, u32 offset);
+extern void *of_fdt_get_property(struct boot_param_header *blob,
+				 unsigned long node,
+				 const char *name,
+				 unsigned long *size);
+extern int of_fdt_is_compatible(struct boot_param_header *blob,
+				unsigned long node,
+				const char *compat);
+
 /* TBD: Temporary export of fdt globals - remove when code fully merged */
 extern int __initdata dt_root_addr_cells;
 extern int __initdata dt_root_size_cells;
-- 
cgit v1.2.3-71-gd317


From fe14042358fac0673d4b6362a73796fd64379938 Mon Sep 17 00:00:00 2001
From: Stephen Neuendorffer <stephen.neuendorffer@xilinx.com>
Date: Thu, 18 Nov 2010 15:55:02 -0800
Subject: of/flattree: Refactor unflatten_device_tree and add
 fdt_unflatten_tree

unflatten_device_tree has two dependencies on things that happen
during boot time.  Firstly, it references the initial device tree
directly. Secondly, it allocates memory using the early boot
allocator.  This patch factors out these dependencies and uses
the new __unflatten_device_tree function to implement a driver-visible
fdt_unflatten_tree function, which can be used to unflatten a
blob after boot time.

V2:
- remove extra __va() call
- make dt_alloc functions return void *.  This doesn't fix the general
  strangeness in this code that constantly casts back and forth between
  unsigned long and __be32 *

Signed-off-by: Stephen Neuendorffer <stephen.neuendorffer@xilinx.com>
Signed-off-by: Grant Likely <grant.likely@secretlab.ca>
---
 drivers/of/fdt.c       | 149 ++++++++++++++++++++++++++++++++-----------------
 include/linux/of_fdt.h |   2 +
 2 files changed, 100 insertions(+), 51 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/of/fdt.c b/drivers/of/fdt.c
index 71edc9cecd62..8a90ee42071a 100644
--- a/drivers/of/fdt.c
+++ b/drivers/of/fdt.c
@@ -11,10 +11,12 @@
 
 #include <linux/kernel.h>
 #include <linux/initrd.h>
+#include <linux/module.h>
 #include <linux/of.h>
 #include <linux/of_fdt.h>
 #include <linux/string.h>
 #include <linux/errno.h>
+#include <linux/slab.h>
 
 #ifdef CONFIG_PPC
 #include <asm/machdep.h>
@@ -312,6 +314,94 @@ unsigned long unflatten_dt_node(struct boot_param_header *blob,
 	return mem;
 }
 
+/**
+ * __unflatten_device_tree - create tree of device_nodes from flat blob
+ *
+ * unflattens a device-tree, creating the
+ * tree of struct device_node. It also fills the "name" and "type"
+ * pointers of the nodes so the normal device-tree walking functions
+ * can be used.
+ * @blob: The blob to expand
+ * @mynodes: The device_node tree created by the call
+ * @dt_alloc: An allocator that provides a virtual address to memory
+ * for the resulting tree
+ */
+void __unflatten_device_tree(struct boot_param_header *blob,
+			     struct device_node **mynodes,
+			     void * (*dt_alloc)(u64 size, u64 align))
+{
+	unsigned long start, mem, size;
+	struct device_node **allnextp = mynodes;
+
+	pr_debug(" -> unflatten_device_tree()\n");
+
+	if (!blob) {
+		pr_debug("No device tree pointer\n");
+		return;
+	}
+
+	pr_debug("Unflattening device tree:\n");
+	pr_debug("magic: %08x\n", be32_to_cpu(blob->magic));
+	pr_debug("size: %08x\n", be32_to_cpu(blob->totalsize));
+	pr_debug("version: %08x\n", be32_to_cpu(blob->version));
+
+	if (be32_to_cpu(blob->magic) != OF_DT_HEADER) {
+		pr_err("Invalid device tree blob header\n");
+		return;
+	}
+
+	/* First pass, scan for size */
+	start = ((unsigned long)blob) +
+		be32_to_cpu(blob->off_dt_struct);
+	size = unflatten_dt_node(blob, 0, &start, NULL, NULL, 0);
+	size = (size | 3) + 1;
+
+	pr_debug("  size is %lx, allocating...\n", size);
+
+	/* Allocate memory for the expanded device tree */
+	mem = (unsigned long)
+		dt_alloc(size + 4, __alignof__(struct device_node));
+
+	((__be32 *)mem)[size / 4] = cpu_to_be32(0xdeadbeef);
+
+	pr_debug("  unflattening %lx...\n", mem);
+
+	/* Second pass, do actual unflattening */
+	start = ((unsigned long)blob) +
+		be32_to_cpu(blob->off_dt_struct);
+	unflatten_dt_node(blob, mem, &start, NULL, &allnextp, 0);
+	if (be32_to_cpup((__be32 *)start) != OF_DT_END)
+		pr_warning("Weird tag at end of tree: %08x\n", *((u32 *)start));
+	if (be32_to_cpu(((__be32 *)mem)[size / 4]) != 0xdeadbeef)
+		pr_warning("End of tree marker overwritten: %08x\n",
+			   be32_to_cpu(((__be32 *)mem)[size / 4]));
+	*allnextp = NULL;
+
+	pr_debug(" <- unflatten_device_tree()\n");
+}
+
+static void *kernel_tree_alloc(u64 size, u64 align)
+{
+	return kzalloc(size, GFP_KERNEL);
+}
+
+/**
+ * of_fdt_unflatten_tree - create tree of device_nodes from flat blob
+ *
+ * unflattens the device-tree passed by the firmware, creating the
+ * tree of struct device_node. It also fills the "name" and "type"
+ * pointers of the nodes so the normal device-tree walking functions
+ * can be used.
+ */
+void of_fdt_unflatten_tree(unsigned long *blob,
+			struct device_node **mynodes)
+{
+	struct boot_param_header *device_tree =
+		(struct boot_param_header *)blob;
+	__unflatten_device_tree(device_tree, mynodes, &kernel_tree_alloc);
+}
+EXPORT_SYMBOL_GPL(of_fdt_unflatten_tree);
+
 /* Everything below here references initial_boot_params directly. */
 int __initdata dt_root_addr_cells;
 int __initdata dt_root_size_cells;
@@ -569,6 +659,12 @@ int __init early_init_dt_scan_chosen(unsigned long node, const char *uname,
 	return 1;
 }
 
+static void *__init early_device_tree_alloc(u64 size, u64 align)
+{
+	unsigned long mem = early_init_dt_alloc_memory_arch(size, align);
+	return __va(mem);
+}
+
 /**
  * unflatten_device_tree - create tree of device_nodes from flat blob
  *
@@ -579,62 +675,13 @@ int __init early_init_dt_scan_chosen(unsigned long node, const char *uname,
  */
 void __init unflatten_device_tree(void)
 {
-	unsigned long start, mem, size;
-	struct device_node **allnextp = &allnodes;
-
-	pr_debug(" -> unflatten_device_tree()\n");
-
-	if (!initial_boot_params) {
-		pr_debug("No device tree pointer\n");
-		return;
-	}
-
-	pr_debug("Unflattening device tree:\n");
-	pr_debug("magic: %08x\n", be32_to_cpu(initial_boot_params->magic));
-	pr_debug("size: %08x\n", be32_to_cpu(initial_boot_params->totalsize));
-	pr_debug("version: %08x\n", be32_to_cpu(initial_boot_params->version));
-
-	if (be32_to_cpu(initial_boot_params->magic) != OF_DT_HEADER) {
-		pr_err("Invalid device tree blob header\n");
-		return;
-	}
-
-	/* First pass, scan for size */
-	start = ((unsigned long)initial_boot_params) +
-		be32_to_cpu(initial_boot_params->off_dt_struct);
-	size = unflatten_dt_node(initial_boot_params, 0, &start,
-				 NULL, NULL, 0);
-	size = (size | 3) + 1;
-
-	pr_debug("  size is %lx, allocating...\n", size);
-
-	/* Allocate memory for the expanded device tree */
-	mem = early_init_dt_alloc_memory_arch(size + 4,
-			__alignof__(struct device_node));
-	mem = (unsigned long) __va(mem);
-
-	((__be32 *)mem)[size / 4] = cpu_to_be32(0xdeadbeef);
-
-	pr_debug("  unflattening %lx...\n", mem);
-
-	/* Second pass, do actual unflattening */
-	start = ((unsigned long)initial_boot_params) +
-		be32_to_cpu(initial_boot_params->off_dt_struct);
-	unflatten_dt_node(initial_boot_params, mem, &start,
-			  NULL, &allnextp, 0);
-	if (be32_to_cpup((__be32 *)start) != OF_DT_END)
-		pr_warning("Weird tag at end of tree: %08x\n", *((u32 *)start));
-	if (be32_to_cpu(((__be32 *)mem)[size / 4]) != 0xdeadbeef)
-		pr_warning("End of tree marker overwritten: %08x\n",
-			   be32_to_cpu(((__be32 *)mem)[size / 4]));
-	*allnextp = NULL;
+	__unflatten_device_tree(initial_boot_params, &allnodes,
+				early_device_tree_alloc);
 
 	/* Get pointer to OF "/chosen" node for use everywhere */
 	of_chosen = of_find_node_by_path("/chosen");
 	if (of_chosen == NULL)
 		of_chosen = of_find_node_by_path("/chosen@0");
-
-	pr_debug(" <- unflatten_device_tree()\n");
 }
 
 #endif /* CONFIG_OF_EARLY_FLATTREE */
diff --git a/include/linux/of_fdt.h b/include/linux/of_fdt.h
index 70c5b736f0a3..9ce5dfd2186a 100644
--- a/include/linux/of_fdt.h
+++ b/include/linux/of_fdt.h
@@ -68,6 +68,8 @@ extern void *of_fdt_get_property(struct boot_param_header *blob,
 extern int of_fdt_is_compatible(struct boot_param_header *blob,
 				unsigned long node,
 				const char *compat);
+extern void of_fdt_unflatten_tree(unsigned long *blob,
+			       struct device_node **mynodes);
 
 /* TBD: Temporary export of fdt globals - remove when code fully merged */
 extern int __initdata dt_root_addr_cells;
-- 
cgit v1.2.3-71-gd317


From 3e29027af43728c2a91fe3f735ab2822edaf54a8 Mon Sep 17 00:00:00 2001
From: John Fastabend <john.r.fastabend@intel.com>
Date: Thu, 30 Dec 2010 09:25:46 +0000
Subject: dcbnl: add support for ieee8021Qaz attributes

The IEEE8021Qaz is the IEEE standard version of CEE. The
standard has had enough significant changes from the CEE
version that many of the CEE attributes have no meaning
in the new spec or do not easily map to IEEE standards.

Rather then attempt to create a complicated mapping
between CEE and IEEE standards this patch adds a nested
IEEE attribute to the list of DCB attributes. The policy
is,

	[DCB_ATTR_IFNAME]
	[DCB_ATTR_STATE]
	...
	[DCB_ATTR_IEEE]
		[DCB_ATTR_IEEE_ETS]
		[DCB_ATTR_IEEE_PFC]
		[DCB_ATTR_IEEE_APP_TABLE]
			[DCB_ATTR_IEEE_APP]
			...

The following dcbnl_rtnl_ops routines were added to handle
the IEEE standard,

	int (*ieee_getets) (struct net_device *, struct ieee_ets *);
	int (*ieee_setets) (struct net_device *, struct ieee_ets *);
	int (*ieee_getpfc) (struct net_device *, struct ieee_pfc *);
	int (*ieee_setpfc) (struct net_device *, struct ieee_pfc *);
	int (*ieee_getapp) (struct net_device *, struct dcb_app *);
	int (*ieee_setapp) (struct net_device *, struct dcb_app *);

Signed-off-by: John Fastabend <john.r.fastabend@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/dcbnl.h | 106 ++++++++++++++++++++++++++++++++++++++++
 include/net/dcbnl.h   |  11 +++++
 net/dcb/dcbnl.c       | 131 ++++++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 248 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/dcbnl.h b/include/linux/dcbnl.h
index 8723491f7dfd..287b5618e296 100644
--- a/include/linux/dcbnl.h
+++ b/include/linux/dcbnl.h
@@ -22,6 +22,87 @@
 
 #include <linux/types.h>
 
+/* IEEE 802.1Qaz std supported values */
+#define IEEE_8021QAZ_MAX_TCS	8
+
+/* This structure contains the IEEE 802.1Qaz ETS managed object
+ *
+ * @willing: willing bit in ETS configuratin TLV
+ * @ets_cap: indicates supported capacity of ets feature
+ * @cbs: credit based shaper ets algorithm supported
+ * @tc_tx_bw: tc tx bandwidth indexed by traffic class
+ * @tc_rx_bw: tc rx bandwidth indexed by traffic class
+ * @tc_tsa: TSA Assignment table, indexed by traffic class
+ * @prio_tc: priority assignment table mapping 8021Qp to traffic class
+ * @tc_reco_bw: recommended tc bandwidth indexed by traffic class for TLV
+ * @tc_reco_tsa: recommended tc bandwidth indexed by traffic class for TLV
+ * @reco_prio_tc: recommended tc tx bandwidth indexed by traffic class for TLV
+ *
+ * Recommended values are used to set fields in the ETS recommendation TLV
+ * with hardware offloaded LLDP.
+ *
+ * ----
+ *  TSA Assignment 8 bit identifiers
+ *	0	strict priority
+ *	1	credit-based shaper
+ *	2	enhanced transmission selection
+ *	3-254	reserved
+ *	255	vendor specific
+ */
+struct ieee_ets {
+	__u8	willing;
+	__u8	ets_cap;
+	__u8	cbs;
+	__u8	tc_tx_bw[IEEE_8021QAZ_MAX_TCS];
+	__u8	tc_rx_bw[IEEE_8021QAZ_MAX_TCS];
+	__u8	tc_tsa[IEEE_8021QAZ_MAX_TCS];
+	__u8	prio_tc[IEEE_8021QAZ_MAX_TCS];
+	__u8	tc_reco_bw[IEEE_8021QAZ_MAX_TCS];
+	__u8	tc_reco_tsa[IEEE_8021QAZ_MAX_TCS];
+	__u8	reco_prio_tc[IEEE_8021QAZ_MAX_TCS];
+};
+
+/* This structure contains the IEEE 802.1Qaz PFC managed object
+ *
+ * @pfc_cap: Indicates the number of traffic classes on the local device
+ *	     that may simultaneously have PFC enabled.
+ * @pfc_en: bitmap indicating pfc enabled traffic classes
+ * @mbc: enable macsec bypass capability
+ * @delay: the allowance made for a round-trip propagation delay of the
+ *	   link in bits.
+ * @requests: count of the sent pfc frames
+ * @indications: count of the received pfc frames
+ */
+struct ieee_pfc {
+	__u8	pfc_cap;
+	__u8	pfc_en;
+	__u8	mbc;
+	__u16	delay;
+	__u64	requests[IEEE_8021QAZ_MAX_TCS];
+	__u64	indications[IEEE_8021QAZ_MAX_TCS];
+};
+
+/* This structure contains the IEEE 802.1Qaz APP managed object
+ *
+ * @selector: protocol identifier type
+ * @protocol: protocol of type indicated
+ * @priority: 3-bit unsigned integer indicating priority
+ *
+ * ----
+ *  Selector field values
+ *	0	Reserved
+ *	1	Ethertype
+ *	2	Well known port number over TCP or SCTP
+ *	3	Well known port number over UDP or DCCP
+ *	4	Well known port number over TCP, SCTP, UDP, or DCCP
+ *	5-7	Reserved
+ */
+struct dcb_app {
+	__u8	selector;
+	__u32	protocol;
+	__u8	priority;
+};
+
 struct dcbmsg {
 	__u8               dcb_family;
 	__u8               cmd;
@@ -50,6 +131,8 @@ struct dcbmsg {
  * @DCB_CMD_SBCN: get backward congestion notification configration.
  * @DCB_CMD_GAPP: get application protocol configuration
  * @DCB_CMD_SAPP: set application protocol configuration
+ * @DCB_CMD_IEEE_SET: set IEEE 802.1Qaz configuration
+ * @DCB_CMD_IEEE_GET: get IEEE 802.1Qaz configuration
  */
 enum dcbnl_commands {
 	DCB_CMD_UNDEFINED,
@@ -83,6 +166,9 @@ enum dcbnl_commands {
 	DCB_CMD_GAPP,
 	DCB_CMD_SAPP,
 
+	DCB_CMD_IEEE_SET,
+	DCB_CMD_IEEE_GET,
+
 	__DCB_CMD_ENUM_MAX,
 	DCB_CMD_MAX = __DCB_CMD_ENUM_MAX - 1,
 };
@@ -102,6 +188,7 @@ enum dcbnl_commands {
  * @DCB_ATTR_CAP: DCB capabilities of the device (NLA_NESTED)
  * @DCB_ATTR_NUMTCS: number of traffic classes supported (NLA_NESTED)
  * @DCB_ATTR_BCN: backward congestion notification configuration (NLA_NESTED)
+ * @DCB_ATTR_IEEE: IEEE 802.1Qaz supported attributes (NLA_NESTED)
  */
 enum dcbnl_attrs {
 	DCB_ATTR_UNDEFINED,
@@ -119,10 +206,29 @@ enum dcbnl_attrs {
 	DCB_ATTR_BCN,
 	DCB_ATTR_APP,
 
+	/* IEEE std attributes */
+	DCB_ATTR_IEEE,
+
 	__DCB_ATTR_ENUM_MAX,
 	DCB_ATTR_MAX = __DCB_ATTR_ENUM_MAX - 1,
 };
 
+enum ieee_attrs {
+	DCB_ATTR_IEEE_UNSPEC,
+	DCB_ATTR_IEEE_ETS,
+	DCB_ATTR_IEEE_PFC,
+	DCB_ATTR_IEEE_APP_TABLE,
+	__DCB_ATTR_IEEE_MAX
+};
+#define DCB_ATTR_IEEE_MAX (__DCB_ATTR_IEEE_MAX - 1)
+
+enum ieee_attrs_app {
+	DCB_ATTR_IEEE_APP_UNSPEC,
+	DCB_ATTR_IEEE_APP,
+	__DCB_ATTR_IEEE_APP_MAX
+};
+#define DCB_ATTR_IEEE_APP_MAX (__DCB_ATTR_IEEE_APP_MAX - 1)
+
 /**
  * enum dcbnl_pfc_attrs - DCB Priority Flow Control user priority nested attrs
  *
diff --git a/include/net/dcbnl.h b/include/net/dcbnl.h
index b36ac7e0914d..e2d841e963b3 100644
--- a/include/net/dcbnl.h
+++ b/include/net/dcbnl.h
@@ -20,11 +20,22 @@
 #ifndef __NET_DCBNL_H__
 #define __NET_DCBNL_H__
 
+#include <linux/dcbnl.h>
+
 /*
  * Ops struct for the netlink callbacks.  Used by DCB-enabled drivers through
  * the netdevice struct.
  */
 struct dcbnl_rtnl_ops {
+	/* IEEE 802.1Qaz std */
+	int (*ieee_getets) (struct net_device *, struct ieee_ets *);
+	int (*ieee_setets) (struct net_device *, struct ieee_ets *);
+	int (*ieee_getpfc) (struct net_device *, struct ieee_pfc *);
+	int (*ieee_setpfc) (struct net_device *, struct ieee_pfc *);
+	int (*ieee_getapp) (struct net_device *, struct dcb_app *);
+	int (*ieee_setapp) (struct net_device *, struct dcb_app *);
+
+	/* CEE std */
 	u8   (*getstate)(struct net_device *);
 	u8   (*setstate)(struct net_device *, u8);
 	void (*getpermhwaddr)(struct net_device *, u8 *);
diff --git a/net/dcb/dcbnl.c b/net/dcb/dcbnl.c
index 19ac2b985485..2ff908498924 100644
--- a/net/dcb/dcbnl.c
+++ b/net/dcb/dcbnl.c
@@ -66,6 +66,7 @@ static const struct nla_policy dcbnl_rtnl_policy[DCB_ATTR_MAX + 1] = {
 	[DCB_ATTR_PFC_STATE]   = {.type = NLA_U8},
 	[DCB_ATTR_BCN]         = {.type = NLA_NESTED},
 	[DCB_ATTR_APP]         = {.type = NLA_NESTED},
+	[DCB_ATTR_IEEE]	       = {.type = NLA_NESTED},
 };
 
 /* DCB priority flow control to User Priority nested attributes */
@@ -167,6 +168,17 @@ static const struct nla_policy dcbnl_app_nest[DCB_APP_ATTR_MAX + 1] = {
 	[DCB_APP_ATTR_PRIORITY]     = {.type = NLA_U8},
 };
 
+/* IEEE 802.1Qaz nested attributes. */
+static const struct nla_policy dcbnl_ieee_policy[DCB_ATTR_IEEE_MAX + 1] = {
+	[DCB_ATTR_IEEE_ETS]	    = {.len = sizeof(struct ieee_ets)},
+	[DCB_ATTR_IEEE_PFC]	    = {.len = sizeof(struct ieee_pfc)},
+	[DCB_ATTR_IEEE_APP_TABLE]   = {.type = NLA_NESTED},
+};
+
+static const struct nla_policy dcbnl_ieee_app[DCB_ATTR_IEEE_APP_MAX + 1] = {
+	[DCB_ATTR_IEEE_APP]	    = {.len = sizeof(struct dcb_app)},
+};
+
 /* standard netlink reply call */
 static int dcbnl_reply(u8 value, u8 event, u8 cmd, u8 attr, u32 pid,
                        u32 seq, u16 flags)
@@ -1118,6 +1130,117 @@ err:
 	return ret;
 }
 
+/* Handle IEEE 802.1Qaz SET commands. If any requested operation can not
+ * be completed the entire msg is aborted and error value is returned.
+ * No attempt is made to reconcile the case where only part of the
+ * cmd can be completed.
+ */
+static int dcbnl_ieee_set(struct net_device *netdev, struct nlattr **tb,
+			  u32 pid, u32 seq, u16 flags)
+{
+	const struct dcbnl_rtnl_ops *ops = netdev->dcbnl_ops;
+	struct nlattr *ieee[DCB_ATTR_IEEE_MAX + 1];
+	int err = -EOPNOTSUPP;
+
+	if (!ops)
+		goto err;
+
+	err = nla_parse_nested(ieee, DCB_ATTR_IEEE_MAX,
+			       tb[DCB_ATTR_IEEE], dcbnl_ieee_policy);
+	if (err)
+		goto err;
+
+	if (ieee[DCB_ATTR_IEEE_ETS] && ops->ieee_setets) {
+		struct ieee_ets *ets = nla_data(ieee[DCB_ATTR_IEEE_ETS]);
+		err = ops->ieee_setets(netdev, ets);
+		if (err)
+			goto err;
+	}
+
+	if (ieee[DCB_ATTR_IEEE_PFC] && ops->ieee_setets) {
+		struct ieee_pfc *pfc = nla_data(ieee[DCB_ATTR_IEEE_PFC]);
+		err = ops->ieee_setpfc(netdev, pfc);
+		if (err)
+			goto err;
+	}
+
+	if (ieee[DCB_ATTR_IEEE_APP_TABLE] && ops->ieee_setapp) {
+		struct nlattr *attr;
+		int rem;
+
+		nla_for_each_nested(attr, ieee[DCB_ATTR_IEEE_APP_TABLE], rem) {
+			struct dcb_app *app_data;
+			if (nla_type(attr) != DCB_ATTR_IEEE_APP)
+				continue;
+			app_data = nla_data(attr);
+			err = ops->ieee_setapp(netdev, app_data);
+			if (err)
+				goto err;
+		}
+	}
+
+err:
+	dcbnl_reply(err, RTM_SETDCB, DCB_CMD_IEEE_SET, DCB_ATTR_IEEE,
+		    pid, seq, flags);
+	return err;
+}
+
+
+/* Handle IEEE 802.1Qaz GET commands. */
+static int dcbnl_ieee_get(struct net_device *netdev, struct nlattr **tb,
+			  u32 pid, u32 seq, u16 flags)
+{
+	struct sk_buff *skb;
+	struct nlmsghdr *nlh;
+	struct dcbmsg *dcb;
+	struct nlattr *ieee;
+	const struct dcbnl_rtnl_ops *ops = netdev->dcbnl_ops;
+	int err;
+
+	if (!ops)
+		return -EOPNOTSUPP;
+
+	skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+	if (!skb)
+		return -ENOBUFS;
+
+	nlh = NLMSG_NEW(skb, pid, seq, RTM_GETDCB, sizeof(*dcb), flags);
+
+	dcb = NLMSG_DATA(nlh);
+	dcb->dcb_family = AF_UNSPEC;
+	dcb->cmd = DCB_CMD_IEEE_GET;
+
+	NLA_PUT_STRING(skb, DCB_ATTR_IFNAME, netdev->name);
+
+	ieee = nla_nest_start(skb, DCB_ATTR_IEEE);
+	if (!ieee)
+		goto nla_put_failure;
+
+	if (ops->ieee_getets) {
+		struct ieee_ets ets;
+		err = ops->ieee_getets(netdev, &ets);
+		if (!err)
+			NLA_PUT(skb, DCB_ATTR_IEEE_ETS, sizeof(ets), &ets);
+	}
+
+	if (ops->ieee_getpfc) {
+		struct ieee_pfc pfc;
+		err = ops->ieee_getpfc(netdev, &pfc);
+		if (!err)
+			NLA_PUT(skb, DCB_ATTR_IEEE_PFC, sizeof(pfc), &pfc);
+	}
+
+	nla_nest_end(skb, ieee);
+	nlmsg_end(skb, nlh);
+
+	return rtnl_unicast(skb, &init_net, pid);
+nla_put_failure:
+	nlmsg_cancel(skb, nlh);
+nlmsg_failure:
+	kfree_skb(skb);
+	return -1;
+}
+
 static int dcb_doit(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
 {
 	struct net *net = sock_net(skb->sk);
@@ -1223,6 +1346,14 @@ static int dcb_doit(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
 		ret = dcbnl_setapp(netdev, tb, pid, nlh->nlmsg_seq,
 		                   nlh->nlmsg_flags);
 		goto out;
+	case DCB_CMD_IEEE_SET:
+		ret = dcbnl_ieee_set(netdev, tb, pid, nlh->nlmsg_seq,
+				 nlh->nlmsg_flags);
+		goto out;
+	case DCB_CMD_IEEE_GET:
+		ret = dcbnl_ieee_get(netdev, tb, pid, nlh->nlmsg_seq,
+				 nlh->nlmsg_flags);
+		goto out;
 	default:
 		goto errout;
 	}
-- 
cgit v1.2.3-71-gd317


From 9ab933ab2cc80f04690d6aa385b1110075c5e507 Mon Sep 17 00:00:00 2001
From: John Fastabend <john.r.fastabend@intel.com>
Date: Thu, 30 Dec 2010 09:26:31 +0000
Subject: dcbnl: add appliction tlv handlers

This patch adds application tlv handlers. Networking stacks
may use the application priority to set the skb priority of
their stack using the negoatiated dcbx priority.

This patch provides the dcb_{get|set}app() routines for the
stack to query these parameters. Notice lower layer drivers
can use the dcbnl_ops routines if additional handling is
needed. Perhaps in the firmware case for example

Signed-off-by: John Fastabend <john.r.fastabend@intel.com>
Signed-off-by: Shmulik Ravid <shmulikr@broadcom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/dcbnl.h |   4 +-
 include/net/dcbnl.h   |   9 ++++
 net/dcb/dcbnl.c       | 133 ++++++++++++++++++++++++++++++++++++++++++++++----
 3 files changed, 135 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/dcbnl.h b/include/linux/dcbnl.h
index 287b5618e296..775bdb4465bf 100644
--- a/include/linux/dcbnl.h
+++ b/include/linux/dcbnl.h
@@ -82,7 +82,9 @@ struct ieee_pfc {
 	__u64	indications[IEEE_8021QAZ_MAX_TCS];
 };
 
-/* This structure contains the IEEE 802.1Qaz APP managed object
+/* This structure contains the IEEE 802.1Qaz APP managed object. This
+ * object is also used for the CEE std as well. There is no difference
+ * between the objects.
  *
  * @selector: protocol identifier type
  * @protocol: protocol of type indicated
diff --git a/include/net/dcbnl.h b/include/net/dcbnl.h
index e2d841e963b3..ab7d623a2793 100644
--- a/include/net/dcbnl.h
+++ b/include/net/dcbnl.h
@@ -22,6 +22,15 @@
 
 #include <linux/dcbnl.h>
 
+struct dcb_app_type {
+	char		  name[IFNAMSIZ];
+	struct dcb_app	  app;
+	struct list_head  list;
+};
+
+u8 dcb_setapp(struct net_device *, struct dcb_app *);
+u8 dcb_getapp(struct net_device *, struct dcb_app *);
+
 /*
  * Ops struct for the netlink callbacks.  Used by DCB-enabled drivers through
  * the netdevice struct.
diff --git a/net/dcb/dcbnl.c b/net/dcb/dcbnl.c
index 2ff908498924..cfd731faf6c6 100644
--- a/net/dcb/dcbnl.c
+++ b/net/dcb/dcbnl.c
@@ -179,6 +179,9 @@ static const struct nla_policy dcbnl_ieee_app[DCB_ATTR_IEEE_APP_MAX + 1] = {
 	[DCB_ATTR_IEEE_APP]	    = {.len = sizeof(struct dcb_app)},
 };
 
+static LIST_HEAD(dcb_app_list);
+static DEFINE_SPINLOCK(dcb_lock);
+
 /* standard netlink reply call */
 static int dcbnl_reply(u8 value, u8 event, u8 cmd, u8 attr, u32 pid,
                        u32 seq, u16 flags)
@@ -634,12 +637,12 @@ out:
 static int dcbnl_setapp(struct net_device *netdev, struct nlattr **tb,
                         u32 pid, u32 seq, u16 flags)
 {
-	int ret = -EINVAL;
+	int err, ret = -EINVAL;
 	u16 id;
 	u8 up, idtype;
 	struct nlattr *app_tb[DCB_APP_ATTR_MAX + 1];
 
-	if (!tb[DCB_ATTR_APP] || !netdev->dcbnl_ops->setapp)
+	if (!tb[DCB_ATTR_APP])
 		goto out;
 
 	ret = nla_parse_nested(app_tb, DCB_APP_ATTR_MAX, tb[DCB_ATTR_APP],
@@ -663,9 +666,18 @@ static int dcbnl_setapp(struct net_device *netdev, struct nlattr **tb,
 	id = nla_get_u16(app_tb[DCB_APP_ATTR_ID]);
 	up = nla_get_u8(app_tb[DCB_APP_ATTR_PRIORITY]);
 
-	ret = dcbnl_reply(netdev->dcbnl_ops->setapp(netdev, idtype, id, up),
-	                  RTM_SETDCB, DCB_CMD_SAPP, DCB_ATTR_APP,
-	                  pid, seq, flags);
+	if (netdev->dcbnl_ops->setapp) {
+		err = netdev->dcbnl_ops->setapp(netdev, idtype, id, up);
+	} else {
+		struct dcb_app app;
+		app.selector = idtype;
+		app.protocol = id;
+		app.priority = up;
+		err = dcb_setapp(netdev, &app);
+	}
+
+	ret = dcbnl_reply(err, RTM_SETDCB, DCB_CMD_SAPP, DCB_ATTR_APP,
+			  pid, seq, flags);
 out:
 	return ret;
 }
@@ -1164,7 +1176,7 @@ static int dcbnl_ieee_set(struct net_device *netdev, struct nlattr **tb,
 			goto err;
 	}
 
-	if (ieee[DCB_ATTR_IEEE_APP_TABLE] && ops->ieee_setapp) {
+	if (ieee[DCB_ATTR_IEEE_APP_TABLE]) {
 		struct nlattr *attr;
 		int rem;
 
@@ -1173,7 +1185,10 @@ static int dcbnl_ieee_set(struct net_device *netdev, struct nlattr **tb,
 			if (nla_type(attr) != DCB_ATTR_IEEE_APP)
 				continue;
 			app_data = nla_data(attr);
-			err = ops->ieee_setapp(netdev, app_data);
+			if (ops->ieee_setapp)
+				err = ops->ieee_setapp(netdev, app_data);
+			else
+				err = dcb_setapp(netdev, app_data);
 			if (err)
 				goto err;
 		}
@@ -1193,7 +1208,8 @@ static int dcbnl_ieee_get(struct net_device *netdev, struct nlattr **tb,
 	struct sk_buff *skb;
 	struct nlmsghdr *nlh;
 	struct dcbmsg *dcb;
-	struct nlattr *ieee;
+	struct nlattr *ieee, *app;
+	struct dcb_app_type *itr;
 	const struct dcbnl_rtnl_ops *ops = netdev->dcbnl_ops;
 	int err;
 
@@ -1230,6 +1246,19 @@ static int dcbnl_ieee_get(struct net_device *netdev, struct nlattr **tb,
 			NLA_PUT(skb, DCB_ATTR_IEEE_PFC, sizeof(pfc), &pfc);
 	}
 
+	app = nla_nest_start(skb, DCB_ATTR_IEEE_APP_TABLE);
+	if (!app)
+		goto nla_put_failure;
+
+	spin_lock(&dcb_lock);
+	list_for_each_entry(itr, &dcb_app_list, list) {
+		if (strncmp(itr->name, netdev->name, IFNAMSIZ) == 0)
+			NLA_PUT(skb, DCB_ATTR_IEEE_APP,
+				sizeof(itr->app), &itr->app);
+	}
+	spin_unlock(&dcb_lock);
+	nla_nest_end(skb, app);
+
 	nla_nest_end(skb, ieee);
 	nlmsg_end(skb, nlh);
 
@@ -1364,8 +1393,93 @@ out:
 	return ret;
 }
 
+/**
+ * dcb_getapp - retrieve the DCBX application user priority
+ *
+ * On success returns a non-zero 802.1p user priority bitmap
+ * otherwise returns 0 as the invalid user priority bitmap to
+ * indicate an error.
+ */
+u8 dcb_getapp(struct net_device *dev, struct dcb_app *app)
+{
+	struct dcb_app_type *itr;
+	u8 prio = 0;
+
+	spin_lock(&dcb_lock);
+	list_for_each_entry(itr, &dcb_app_list, list) {
+		if (itr->app.selector == app->selector &&
+		    itr->app.protocol == app->protocol &&
+		    (strncmp(itr->name, dev->name, IFNAMSIZ) == 0)) {
+			prio = itr->app.priority;
+			break;
+		}
+	}
+	spin_unlock(&dcb_lock);
+
+	return prio;
+}
+EXPORT_SYMBOL(dcb_getapp);
+
+/**
+ * ixgbe_dcbnl_setapp - add dcb application data to app list
+ *
+ * Priority 0 is the default priority this removes applications
+ * from the app list if the priority is set to zero.
+ */
+u8 dcb_setapp(struct net_device *dev, struct dcb_app *new)
+{
+	struct dcb_app_type *itr;
+
+	spin_lock(&dcb_lock);
+	/* Search for existing match and replace */
+	list_for_each_entry(itr, &dcb_app_list, list) {
+		if (itr->app.selector == new->selector &&
+		    itr->app.protocol == new->protocol &&
+		    (strncmp(itr->name, dev->name, IFNAMSIZ) == 0)) {
+			if (new->priority)
+				itr->app.priority = new->priority;
+			else {
+				list_del(&itr->list);
+				kfree(itr);
+			}
+			goto out;
+		}
+	}
+	/* App type does not exist add new application type */
+	if (new->priority) {
+		struct dcb_app_type *entry;
+		entry = kmalloc(sizeof(struct dcb_app_type), GFP_ATOMIC);
+		if (!entry) {
+			spin_unlock(&dcb_lock);
+			return -ENOMEM;
+		}
+
+		memcpy(&entry->app, new, sizeof(*new));
+		strncpy(entry->name, dev->name, IFNAMSIZ);
+		list_add(&entry->list, &dcb_app_list);
+	}
+out:
+	spin_unlock(&dcb_lock);
+	return 0;
+}
+EXPORT_SYMBOL(dcb_setapp);
+
+void dcb_flushapp(void)
+{
+	struct dcb_app_type *app;
+
+	spin_lock(&dcb_lock);
+	list_for_each_entry(app, &dcb_app_list, list) {
+		list_del(&app->list);
+		kfree(app);
+	}
+	spin_unlock(&dcb_lock);
+}
+
 static int __init dcbnl_init(void)
 {
+	INIT_LIST_HEAD(&dcb_app_list);
+
 	rtnl_register(PF_UNSPEC, RTM_GETDCB, dcb_doit, NULL);
 	rtnl_register(PF_UNSPEC, RTM_SETDCB, dcb_doit, NULL);
 
@@ -1377,7 +1491,6 @@ static void __exit dcbnl_exit(void)
 {
 	rtnl_unregister(PF_UNSPEC, RTM_GETDCB);
 	rtnl_unregister(PF_UNSPEC, RTM_SETDCB);
+	dcb_flushapp();
 }
 module_exit(dcbnl_exit);
-
-
-- 
cgit v1.2.3-71-gd317


From 6241b6259b16aa390ff4bf50f520685b3801200b Mon Sep 17 00:00:00 2001
From: Shmulik Ravid <shmulikr@broadcom.com>
Date: Thu, 30 Dec 2010 06:26:48 +0000
Subject: dcbnl: adding DCBX engine capability

Adding an optional DCBX capability and a pair for get-set routines for
setting the device DCBX mode. The DCBX capability is a bit field of
supported attributes. The user is expected to set the DCBX mode with a
subset of the advertised attributes.

This patch is dependent on the following patches:
[net-next-2.6 PATCH 1/3] dcbnl: add support for ieee8021Qaz attributes
[net-next-2.6 PATCH 2/3] dcbnl: add appliction tlv handlers
[net-next-2.6 PATCH 3/3] net_dcb: add application notifiers

Signed-off-by: Shmulik Ravid <shmulikr@broadcom.com>
Acked-by: John Fastabend <john.r.fastabend@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/dcbnl.h | 43 +++++++++++++++++++++++++++++++++++++++++++
 include/net/dcbnl.h   |  5 +++++
 net/dcb/dcbnl.c       | 43 +++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 91 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/dcbnl.h b/include/linux/dcbnl.h
index 775bdb4465bf..16eea36d8934 100644
--- a/include/linux/dcbnl.h
+++ b/include/linux/dcbnl.h
@@ -135,6 +135,8 @@ struct dcbmsg {
  * @DCB_CMD_SAPP: set application protocol configuration
  * @DCB_CMD_IEEE_SET: set IEEE 802.1Qaz configuration
  * @DCB_CMD_IEEE_GET: get IEEE 802.1Qaz configuration
+ * @DCB_CMD_GDCBX: get DCBX engine configuration
+ * @DCB_CMD_SDCBX: set DCBX engine configuration
  */
 enum dcbnl_commands {
 	DCB_CMD_UNDEFINED,
@@ -171,6 +173,9 @@ enum dcbnl_commands {
 	DCB_CMD_IEEE_SET,
 	DCB_CMD_IEEE_GET,
 
+	DCB_CMD_GDCBX,
+	DCB_CMD_SDCBX,
+
 	__DCB_CMD_ENUM_MAX,
 	DCB_CMD_MAX = __DCB_CMD_ENUM_MAX - 1,
 };
@@ -191,6 +196,7 @@ enum dcbnl_commands {
  * @DCB_ATTR_NUMTCS: number of traffic classes supported (NLA_NESTED)
  * @DCB_ATTR_BCN: backward congestion notification configuration (NLA_NESTED)
  * @DCB_ATTR_IEEE: IEEE 802.1Qaz supported attributes (NLA_NESTED)
+ * @DCB_ATTR_DCBX: DCBX engine configuration in the device (NLA_U8)
  */
 enum dcbnl_attrs {
 	DCB_ATTR_UNDEFINED,
@@ -211,6 +217,8 @@ enum dcbnl_attrs {
 	/* IEEE std attributes */
 	DCB_ATTR_IEEE,
 
+	DCB_ATTR_DCBX,
+
 	__DCB_ATTR_ENUM_MAX,
 	DCB_ATTR_MAX = __DCB_ATTR_ENUM_MAX - 1,
 };
@@ -370,6 +378,8 @@ enum dcbnl_tc_attrs {
  * @DCB_CAP_ATTR_GSP: (NLA_U8) device supports group strict priority
  * @DCB_CAP_ATTR_BCN: (NLA_U8) device supports Backwards Congestion
  *                             Notification
+ * @DCB_CAP_ATTR_DCBX: (NLA_U8) device supports DCBX engine
+ *
  */
 enum dcbnl_cap_attrs {
 	DCB_CAP_ATTR_UNDEFINED,
@@ -381,11 +391,44 @@ enum dcbnl_cap_attrs {
 	DCB_CAP_ATTR_PFC_TCS,
 	DCB_CAP_ATTR_GSP,
 	DCB_CAP_ATTR_BCN,
+	DCB_CAP_ATTR_DCBX,
 
 	__DCB_CAP_ATTR_ENUM_MAX,
 	DCB_CAP_ATTR_MAX = __DCB_CAP_ATTR_ENUM_MAX - 1,
 };
 
+/**
+ * DCBX capability flags
+ *
+ * @DCB_CAP_DCBX_HOST: DCBX negotiation is performed by the host LLDP agent.
+ *                     'set' routines are used to configure the device with
+ *                     the negotiated parameters
+ *
+ * @DCB_CAP_DCBX_LLD_MANAGED: DCBX negotiation is not performed in the host but
+ *                            by another entity
+ *                            'get' routines are used to retrieve the
+ *                            negotiated parameters
+ *                            'set' routines can be used to set the initial
+ *                            negotiation configuration
+ *
+ * @DCB_CAP_DCBX_VER_CEE: for a non-host DCBX engine, indicates the engine
+ *                        supports the CEE protocol flavor
+ *
+ * @DCB_CAP_DCBX_VER_IEEE: for a non-host DCBX engine, indicates the engine
+ *                         supports the IEEE protocol flavor
+ *
+ * @DCB_CAP_DCBX_STATIC: for a non-host DCBX engine, indicates the engine
+ *                       supports static configuration (i.e no actual
+ *                       negotiation is performed negotiated parameters equal
+ *                       the initial configuration)
+ *
+ */
+#define DCB_CAP_DCBX_HOST		0x01
+#define DCB_CAP_DCBX_LLD_MANAGED	0x02
+#define DCB_CAP_DCBX_VER_CEE		0x04
+#define DCB_CAP_DCBX_VER_IEEE		0x08
+#define DCB_CAP_DCBX_STATIC		0x10
+
 /**
  * enum dcbnl_numtcs_attrs - number of traffic classes
  *
diff --git a/include/net/dcbnl.h b/include/net/dcbnl.h
index ab7d623a2793..c65347b3cbbf 100644
--- a/include/net/dcbnl.h
+++ b/include/net/dcbnl.h
@@ -70,6 +70,11 @@ struct dcbnl_rtnl_ops {
 	void (*setbcnrp)(struct net_device *, int, u8);
 	u8   (*setapp)(struct net_device *, u8, u16, u8);
 	u8   (*getapp)(struct net_device *, u8, u16);
+
+	/* DCBX configuration */
+	u8   (*getdcbx)(struct net_device *);
+	u8   (*setdcbx)(struct net_device *, u8);
+
 };
 
 #endif /* __NET_DCBNL_H__ */
diff --git a/net/dcb/dcbnl.c b/net/dcb/dcbnl.c
index 69144125fc4f..8f83ad859d9b 100644
--- a/net/dcb/dcbnl.c
+++ b/net/dcb/dcbnl.c
@@ -68,6 +68,7 @@ static const struct nla_policy dcbnl_rtnl_policy[DCB_ATTR_MAX + 1] = {
 	[DCB_ATTR_BCN]         = {.type = NLA_NESTED},
 	[DCB_ATTR_APP]         = {.type = NLA_NESTED},
 	[DCB_ATTR_IEEE]	       = {.type = NLA_NESTED},
+	[DCB_ATTR_DCBX]        = {.type = NLA_U8},
 };
 
 /* DCB priority flow control to User Priority nested attributes */
@@ -124,6 +125,7 @@ static const struct nla_policy dcbnl_cap_nest[DCB_CAP_ATTR_MAX + 1] = {
 	[DCB_CAP_ATTR_PFC_TCS] = {.type = NLA_U8},
 	[DCB_CAP_ATTR_GSP]     = {.type = NLA_U8},
 	[DCB_CAP_ATTR_BCN]     = {.type = NLA_U8},
+	[DCB_CAP_ATTR_DCBX]    = {.type = NLA_U8},
 };
 
 /* DCB capabilities nested attributes. */
@@ -1271,6 +1273,39 @@ nlmsg_failure:
 	return -1;
 }
 
+/* DCBX configuration */
+static int dcbnl_getdcbx(struct net_device *netdev, struct nlattr **tb,
+			 u32 pid, u32 seq, u16 flags)
+{
+	int ret = -EINVAL;
+
+	if (!netdev->dcbnl_ops->getdcbx)
+		return ret;
+
+	ret = dcbnl_reply(netdev->dcbnl_ops->getdcbx(netdev), RTM_GETDCB,
+			  DCB_CMD_GDCBX, DCB_ATTR_DCBX, pid, seq, flags);
+
+	return ret;
+}
+
+static int dcbnl_setdcbx(struct net_device *netdev, struct nlattr **tb,
+			 u32 pid, u32 seq, u16 flags)
+{
+	int ret = -EINVAL;
+	u8 value;
+
+	if (!tb[DCB_ATTR_DCBX] || !netdev->dcbnl_ops->setdcbx)
+		return ret;
+
+	value = nla_get_u8(tb[DCB_ATTR_DCBX]);
+
+	ret = dcbnl_reply(netdev->dcbnl_ops->setdcbx(netdev, value),
+			  RTM_SETDCB, DCB_CMD_SDCBX, DCB_ATTR_DCBX,
+			  pid, seq, flags);
+
+	return ret;
+}
+
 static int dcb_doit(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
 {
 	struct net *net = sock_net(skb->sk);
@@ -1384,6 +1419,14 @@ static int dcb_doit(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
 		ret = dcbnl_ieee_get(netdev, tb, pid, nlh->nlmsg_seq,
 				 nlh->nlmsg_flags);
 		goto out;
+	case DCB_CMD_GDCBX:
+		ret = dcbnl_getdcbx(netdev, tb, pid, nlh->nlmsg_seq,
+				    nlh->nlmsg_flags);
+		goto out;
+	case DCB_CMD_SDCBX:
+		ret = dcbnl_setdcbx(netdev, tb, pid, nlh->nlmsg_seq,
+				    nlh->nlmsg_flags);
+		goto out;
 	default:
 		goto errout;
 	}
-- 
cgit v1.2.3-71-gd317


From ea45fe4e176a42d2396878f530cfdc8265bef37b Mon Sep 17 00:00:00 2001
From: Shmulik Ravid <shmulikr@broadcom.com>
Date: Thu, 30 Dec 2010 06:26:55 +0000
Subject: dcbnl: adding DCBX feature flags get-set

Adding a pair of set-get routines to dcbnl for setting the negotiation
flags of the various DCB features. Conforms to the CEE flavor of DCBX
The user sets these flags (enable, advertise, willing) for each feature
to be used by the DCBX engine. The 'get' routine returns which of the
features is enabled after the negotiation.

This patch is dependent on the following patches:
[net-next-2.6 PATCH 1/3] dcbnl: add support for ieee8021Qaz attributes
[net-next-2.6 PATCH 2/3] dcbnl: add appliction tlv handlers
[net-next-2.6 PATCH 3/3] net_dcb: add application notifiers

Signed-off-by: Shmulik Ravid <shmulikr@broadcom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/dcbnl.h |  33 +++++++++++++
 include/net/dcbnl.h   |   3 ++
 net/dcb/dcbnl.c       | 133 ++++++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 169 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/dcbnl.h b/include/linux/dcbnl.h
index 16eea36d8934..68cd248f6d3e 100644
--- a/include/linux/dcbnl.h
+++ b/include/linux/dcbnl.h
@@ -137,6 +137,8 @@ struct dcbmsg {
  * @DCB_CMD_IEEE_GET: get IEEE 802.1Qaz configuration
  * @DCB_CMD_GDCBX: get DCBX engine configuration
  * @DCB_CMD_SDCBX: set DCBX engine configuration
+ * @DCB_CMD_GFEATCFG: get DCBX features flags
+ * @DCB_CMD_SFEATCFG: set DCBX features negotiation flags
  */
 enum dcbnl_commands {
 	DCB_CMD_UNDEFINED,
@@ -176,6 +178,9 @@ enum dcbnl_commands {
 	DCB_CMD_GDCBX,
 	DCB_CMD_SDCBX,
 
+	DCB_CMD_GFEATCFG,
+	DCB_CMD_SFEATCFG,
+
 	__DCB_CMD_ENUM_MAX,
 	DCB_CMD_MAX = __DCB_CMD_ENUM_MAX - 1,
 };
@@ -197,6 +202,7 @@ enum dcbnl_commands {
  * @DCB_ATTR_BCN: backward congestion notification configuration (NLA_NESTED)
  * @DCB_ATTR_IEEE: IEEE 802.1Qaz supported attributes (NLA_NESTED)
  * @DCB_ATTR_DCBX: DCBX engine configuration in the device (NLA_U8)
+ * @DCB_ATTR_FEATCFG: DCBX features flags (NLA_NESTED)
  */
 enum dcbnl_attrs {
 	DCB_ATTR_UNDEFINED,
@@ -218,6 +224,7 @@ enum dcbnl_attrs {
 	DCB_ATTR_IEEE,
 
 	DCB_ATTR_DCBX,
+	DCB_ATTR_FEATCFG,
 
 	__DCB_ATTR_ENUM_MAX,
 	DCB_ATTR_MAX = __DCB_ATTR_ENUM_MAX - 1,
@@ -506,4 +513,30 @@ enum dcbnl_app_attrs {
 	DCB_APP_ATTR_MAX = __DCB_APP_ATTR_ENUM_MAX - 1,
 };
 
+/**
+ * enum dcbnl_featcfg_attrs - features conifiguration flags
+ *
+ * @DCB_FEATCFG_ATTR_UNDEFINED: unspecified attribute to catch errors
+ * @DCB_FEATCFG_ATTR_ALL: (NLA_FLAG) all features configuration attributes
+ * @DCB_FEATCFG_ATTR_PG: (NLA_U8) configuration flags for priority groups
+ * @DCB_FEATCFG_ATTR_PFC: (NLA_U8) configuration flags for priority
+ *                                 flow control
+ * @DCB_FEATCFG_ATTR_APP: (NLA_U8) configuration flags for application TLV
+ *
+ */
+#define DCB_FEATCFG_ERROR	0x01	/* error in feature resolution */
+#define DCB_FEATCFG_ENABLE	0x02	/* enable feature */
+#define DCB_FEATCFG_WILLING	0x04	/* feature is willing */
+#define DCB_FEATCFG_ADVERTISE	0x08	/* advertise feature */
+enum dcbnl_featcfg_attrs {
+	DCB_FEATCFG_ATTR_UNDEFINED,
+	DCB_FEATCFG_ATTR_ALL,
+	DCB_FEATCFG_ATTR_PG,
+	DCB_FEATCFG_ATTR_PFC,
+	DCB_FEATCFG_ATTR_APP,
+
+	__DCB_FEATCFG_ATTR_ENUM_MAX,
+	DCB_FEATCFG_ATTR_MAX = __DCB_FEATCFG_ATTR_ENUM_MAX - 1,
+};
+
 #endif /* __LINUX_DCBNL_H__ */
diff --git a/include/net/dcbnl.h b/include/net/dcbnl.h
index c65347b3cbbf..a8e7852b10ab 100644
--- a/include/net/dcbnl.h
+++ b/include/net/dcbnl.h
@@ -70,11 +70,14 @@ struct dcbnl_rtnl_ops {
 	void (*setbcnrp)(struct net_device *, int, u8);
 	u8   (*setapp)(struct net_device *, u8, u16, u8);
 	u8   (*getapp)(struct net_device *, u8, u16);
+	u8   (*getfeatcfg)(struct net_device *, int, u8 *);
+	u8   (*setfeatcfg)(struct net_device *, int, u8);
 
 	/* DCBX configuration */
 	u8   (*getdcbx)(struct net_device *);
 	u8   (*setdcbx)(struct net_device *, u8);
 
+
 };
 
 #endif /* __NET_DCBNL_H__ */
diff --git a/net/dcb/dcbnl.c b/net/dcb/dcbnl.c
index 8f83ad859d9b..075af0a08d84 100644
--- a/net/dcb/dcbnl.c
+++ b/net/dcb/dcbnl.c
@@ -69,6 +69,7 @@ static const struct nla_policy dcbnl_rtnl_policy[DCB_ATTR_MAX + 1] = {
 	[DCB_ATTR_APP]         = {.type = NLA_NESTED},
 	[DCB_ATTR_IEEE]	       = {.type = NLA_NESTED},
 	[DCB_ATTR_DCBX]        = {.type = NLA_U8},
+	[DCB_ATTR_FEATCFG]     = {.type = NLA_NESTED},
 };
 
 /* DCB priority flow control to User Priority nested attributes */
@@ -182,6 +183,14 @@ static const struct nla_policy dcbnl_ieee_app[DCB_ATTR_IEEE_APP_MAX + 1] = {
 	[DCB_ATTR_IEEE_APP]	    = {.len = sizeof(struct dcb_app)},
 };
 
+/* DCB number of traffic classes nested attributes. */
+static const struct nla_policy dcbnl_featcfg_nest[DCB_FEATCFG_ATTR_MAX + 1] = {
+	[DCB_FEATCFG_ATTR_ALL]      = {.type = NLA_FLAG},
+	[DCB_FEATCFG_ATTR_PG]       = {.type = NLA_U8},
+	[DCB_FEATCFG_ATTR_PFC]      = {.type = NLA_U8},
+	[DCB_FEATCFG_ATTR_APP]      = {.type = NLA_U8},
+};
+
 static LIST_HEAD(dcb_app_list);
 static DEFINE_SPINLOCK(dcb_lock);
 
@@ -1306,6 +1315,122 @@ static int dcbnl_setdcbx(struct net_device *netdev, struct nlattr **tb,
 	return ret;
 }
 
+static int dcbnl_getfeatcfg(struct net_device *netdev, struct nlattr **tb,
+			    u32 pid, u32 seq, u16 flags)
+{
+	struct sk_buff *dcbnl_skb;
+	struct nlmsghdr *nlh;
+	struct dcbmsg *dcb;
+	struct nlattr *data[DCB_FEATCFG_ATTR_MAX + 1], *nest;
+	u8 value;
+	int ret = -EINVAL;
+	int i;
+	int getall = 0;
+
+	if (!tb[DCB_ATTR_FEATCFG] || !netdev->dcbnl_ops->getfeatcfg)
+		return ret;
+
+	ret = nla_parse_nested(data, DCB_FEATCFG_ATTR_MAX, tb[DCB_ATTR_FEATCFG],
+			       dcbnl_featcfg_nest);
+	if (ret) {
+		ret = -EINVAL;
+		goto err_out;
+	}
+
+	dcbnl_skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+	if (!dcbnl_skb) {
+		ret = -EINVAL;
+		goto err_out;
+	}
+
+	nlh = NLMSG_NEW(dcbnl_skb, pid, seq, RTM_GETDCB, sizeof(*dcb), flags);
+
+	dcb = NLMSG_DATA(nlh);
+	dcb->dcb_family = AF_UNSPEC;
+	dcb->cmd = DCB_CMD_GFEATCFG;
+
+	nest = nla_nest_start(dcbnl_skb, DCB_ATTR_FEATCFG);
+	if (!nest) {
+		ret = -EINVAL;
+		goto err;
+	}
+
+	if (data[DCB_FEATCFG_ATTR_ALL])
+		getall = 1;
+
+	for (i = DCB_FEATCFG_ATTR_ALL+1; i <= DCB_FEATCFG_ATTR_MAX; i++) {
+		if (!getall && !data[i])
+			continue;
+
+		ret = netdev->dcbnl_ops->getfeatcfg(netdev, i, &value);
+		if (!ret) {
+			ret = nla_put_u8(dcbnl_skb, i, value);
+
+			if (ret) {
+				nla_nest_cancel(dcbnl_skb, nest);
+				ret = -EINVAL;
+				goto err;
+			}
+		} else
+			goto err;
+	}
+	nla_nest_end(dcbnl_skb, nest);
+
+	nlmsg_end(dcbnl_skb, nlh);
+
+	ret = rtnl_unicast(dcbnl_skb, &init_net, pid);
+	if (ret) {
+		ret = -EINVAL;
+		goto err_out;
+	}
+
+	return 0;
+nlmsg_failure:
+err:
+	kfree_skb(dcbnl_skb);
+err_out:
+	return ret;
+}
+
+static int dcbnl_setfeatcfg(struct net_device *netdev, struct nlattr **tb,
+			    u32 pid, u32 seq, u16 flags)
+{
+	struct nlattr *data[DCB_FEATCFG_ATTR_MAX + 1];
+	int ret = -EINVAL;
+	u8 value;
+	int i;
+
+	if (!tb[DCB_ATTR_FEATCFG] || !netdev->dcbnl_ops->setfeatcfg)
+		return ret;
+
+	ret = nla_parse_nested(data, DCB_FEATCFG_ATTR_MAX, tb[DCB_ATTR_FEATCFG],
+			       dcbnl_featcfg_nest);
+
+	if (ret) {
+		ret = -EINVAL;
+		goto err;
+	}
+
+	for (i = DCB_FEATCFG_ATTR_ALL+1; i <= DCB_FEATCFG_ATTR_MAX; i++) {
+		if (data[i] == NULL)
+			continue;
+
+		value = nla_get_u8(data[i]);
+
+		ret = netdev->dcbnl_ops->setfeatcfg(netdev, i, value);
+
+		if (ret)
+			goto operr;
+	}
+
+operr:
+	ret = dcbnl_reply(!!ret, RTM_SETDCB, DCB_CMD_SFEATCFG,
+			  DCB_ATTR_FEATCFG, pid, seq, flags);
+
+err:
+	return ret;
+}
+
 static int dcb_doit(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
 {
 	struct net *net = sock_net(skb->sk);
@@ -1427,6 +1552,14 @@ static int dcb_doit(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
 		ret = dcbnl_setdcbx(netdev, tb, pid, nlh->nlmsg_seq,
 				    nlh->nlmsg_flags);
 		goto out;
+	case DCB_CMD_GFEATCFG:
+		ret = dcbnl_getfeatcfg(netdev, tb, pid, nlh->nlmsg_seq,
+				       nlh->nlmsg_flags);
+		goto out;
+	case DCB_CMD_SFEATCFG:
+		ret = dcbnl_setfeatcfg(netdev, tb, pid, nlh->nlmsg_seq,
+				       nlh->nlmsg_flags);
+		goto out;
 	default:
 		goto errout;
 	}
-- 
cgit v1.2.3-71-gd317


From a4f740cf33f7f6c164bbde3c0cdbcc77b0c4997c Mon Sep 17 00:00:00 2001
From: Grant Likely <grant.likely@secretlab.ca>
Date: Sat, 30 Oct 2010 11:49:09 -0400
Subject: of/flattree: Add of_flat_dt_match() helper function

This patch adds of_flat_dt_match() which tests a node for
compatibility with a list of values and converts the relevant powerpc
platform code to use it.  This approach simplifies the board support
code a bit.

Signed-off-by: Grant Likely <grant.likely@secretlab.ca>
Reviewed-by: Stephen Neuendorffer <stephen.neuendorffer@xilinx.com>
---
 arch/powerpc/platforms/40x/ppc40x_simple.c    | 13 +++-------
 arch/powerpc/platforms/512x/mpc5121_generic.c | 13 ++--------
 arch/powerpc/platforms/52xx/lite5200.c        | 16 +++++-------
 arch/powerpc/platforms/52xx/media5200.c       | 13 ++--------
 arch/powerpc/platforms/52xx/mpc5200_simple.c  | 13 ++--------
 arch/powerpc/platforms/83xx/mpc830x_rdb.c     | 13 ++++++----
 arch/powerpc/platforms/83xx/mpc831x_rdb.c     | 11 +++++---
 arch/powerpc/platforms/83xx/mpc837x_rdb.c     | 15 ++++++-----
 arch/powerpc/platforms/85xx/tqm85xx.c         | 20 +++++++--------
 drivers/of/fdt.c                              | 37 +++++++++++++++++++++++++--
 include/linux/of_fdt.h                        |  3 +++
 11 files changed, 89 insertions(+), 78 deletions(-)

(limited to 'include/linux')

diff --git a/arch/powerpc/platforms/40x/ppc40x_simple.c b/arch/powerpc/platforms/40x/ppc40x_simple.c
index 546bbc229d19..2521d93ef136 100644
--- a/arch/powerpc/platforms/40x/ppc40x_simple.c
+++ b/arch/powerpc/platforms/40x/ppc40x_simple.c
@@ -50,7 +50,7 @@ machine_device_initcall(ppc40x_simple, ppc40x_device_probe);
  * Again, if your board needs to do things differently then create a
  * board.c file for it rather than adding it to this list.
  */
-static char *board[] __initdata = {
+static const char *board[] __initdata = {
 	"amcc,acadia",
 	"amcc,haleakala",
 	"amcc,kilauea",
@@ -60,14 +60,9 @@ static char *board[] __initdata = {
 
 static int __init ppc40x_probe(void)
 {
-	unsigned long root = of_get_flat_dt_root();
-	int i = 0;
-
-	for (i = 0; i < ARRAY_SIZE(board); i++) {
-		if (of_flat_dt_is_compatible(root, board[i])) {
-			ppc_pci_set_flags(PPC_PCI_REASSIGN_ALL_RSRC);
-			return 1;
-		}
+	if (of_flat_dt_match(of_get_flat_dt_root(), board)) {
+		ppc_pci_set_flags(PPC_PCI_REASSIGN_ALL_RSRC);
+		return 1;
 	}
 
 	return 0;
diff --git a/arch/powerpc/platforms/512x/mpc5121_generic.c b/arch/powerpc/platforms/512x/mpc5121_generic.c
index e487eb06ec6b..926731f1ff01 100644
--- a/arch/powerpc/platforms/512x/mpc5121_generic.c
+++ b/arch/powerpc/platforms/512x/mpc5121_generic.c
@@ -26,7 +26,7 @@
 /*
  * list of supported boards
  */
-static char *board[] __initdata = {
+static const char *board[] __initdata = {
 	"prt,prtlvt",
 	NULL
 };
@@ -36,16 +36,7 @@ static char *board[] __initdata = {
  */
 static int __init mpc5121_generic_probe(void)
 {
-	unsigned long node = of_get_flat_dt_root();
-	int i = 0;
-
-	while (board[i]) {
-		if (of_flat_dt_is_compatible(node, board[i]))
-			break;
-		i++;
-	}
-
-	return board[i] != NULL;
+	return of_flat_dt_match(of_get_flat_dt_root(), board);
 }
 
 define_machine(mpc5121_generic) {
diff --git a/arch/powerpc/platforms/52xx/lite5200.c b/arch/powerpc/platforms/52xx/lite5200.c
index de55bc0584b5..01ffa64d2aa7 100644
--- a/arch/powerpc/platforms/52xx/lite5200.c
+++ b/arch/powerpc/platforms/52xx/lite5200.c
@@ -172,20 +172,18 @@ static void __init lite5200_setup_arch(void)
 	mpc52xx_setup_pci();
 }
 
+static const char *board[] __initdata = {
+	"fsl,lite5200",
+	"fsl,lite5200b",
+	NULL,
+};
+
 /*
  * Called very early, MMU is off, device-tree isn't unflattened
  */
 static int __init lite5200_probe(void)
 {
-	unsigned long node = of_get_flat_dt_root();
-	const char *model = of_get_flat_dt_prop(node, "model", NULL);
-
-	if (!of_flat_dt_is_compatible(node, "fsl,lite5200") &&
-	    !of_flat_dt_is_compatible(node, "fsl,lite5200b"))
-		return 0;
-	pr_debug("%s board found\n", model ? model : "unknown");
-
-	return 1;
+	return of_flat_dt_match(of_get_flat_dt_root(), board);
 }
 
 define_machine(lite5200) {
diff --git a/arch/powerpc/platforms/52xx/media5200.c b/arch/powerpc/platforms/52xx/media5200.c
index 0bac3a3dbecf..2c7780cb68e5 100644
--- a/arch/powerpc/platforms/52xx/media5200.c
+++ b/arch/powerpc/platforms/52xx/media5200.c
@@ -239,7 +239,7 @@ static void __init media5200_setup_arch(void)
 }
 
 /* list of the supported boards */
-static char *board[] __initdata = {
+static const char *board[] __initdata = {
 	"fsl,media5200",
 	NULL
 };
@@ -249,16 +249,7 @@ static char *board[] __initdata = {
  */
 static int __init media5200_probe(void)
 {
-	unsigned long node = of_get_flat_dt_root();
-	int i = 0;
-
-	while (board[i]) {
-		if (of_flat_dt_is_compatible(node, board[i]))
-			break;
-		i++;
-	}
-
-	return (board[i] != NULL);
+	return of_flat_dt_match(of_get_flat_dt_root(), board);
 }
 
 define_machine(media5200_platform) {
diff --git a/arch/powerpc/platforms/52xx/mpc5200_simple.c b/arch/powerpc/platforms/52xx/mpc5200_simple.c
index d45be5b5ad49..e36d6e232ae6 100644
--- a/arch/powerpc/platforms/52xx/mpc5200_simple.c
+++ b/arch/powerpc/platforms/52xx/mpc5200_simple.c
@@ -49,7 +49,7 @@ static void __init mpc5200_simple_setup_arch(void)
 }
 
 /* list of the supported boards */
-static char *board[] __initdata = {
+static const char *board[] __initdata = {
 	"intercontrol,digsy-mtc",
 	"manroland,mucmc52",
 	"manroland,uc101",
@@ -66,16 +66,7 @@ static char *board[] __initdata = {
  */
 static int __init mpc5200_simple_probe(void)
 {
-	unsigned long node = of_get_flat_dt_root();
-	int i = 0;
-
-	while (board[i]) {
-		if (of_flat_dt_is_compatible(node, board[i]))
-			break;
-		i++;
-	}
-	
-	return (board[i] != NULL);
+	return of_flat_dt_match(of_get_flat_dt_root(), board);
 }
 
 define_machine(mpc5200_simple_platform) {
diff --git a/arch/powerpc/platforms/83xx/mpc830x_rdb.c b/arch/powerpc/platforms/83xx/mpc830x_rdb.c
index 846831d495b5..661d354e4ff2 100644
--- a/arch/powerpc/platforms/83xx/mpc830x_rdb.c
+++ b/arch/powerpc/platforms/83xx/mpc830x_rdb.c
@@ -57,16 +57,19 @@ static void __init mpc830x_rdb_init_IRQ(void)
 	ipic_set_default_priority();
 }
 
+struct const char *board[] __initdata = {
+	"MPC8308RDB",
+	"fsl,mpc8308rdb",
+	"denx,mpc8308_p1m",
+	NULL
+}
+
 /*
  * Called very early, MMU is off, device-tree isn't unflattened
  */
 static int __init mpc830x_rdb_probe(void)
 {
-	unsigned long root = of_get_flat_dt_root();
-
-	return of_flat_dt_is_compatible(root, "MPC8308RDB") ||
-	       of_flat_dt_is_compatible(root, "fsl,mpc8308rdb") ||
-	       of_flat_dt_is_compatible(root, "denx,mpc8308_p1m");
+	return of_flat_dt_match(of_get_flat_dt_root(), board);
 }
 
 static struct of_device_id __initdata of_bus_ids[] = {
diff --git a/arch/powerpc/platforms/83xx/mpc831x_rdb.c b/arch/powerpc/platforms/83xx/mpc831x_rdb.c
index ae525e4745d2..b54cd736a895 100644
--- a/arch/powerpc/platforms/83xx/mpc831x_rdb.c
+++ b/arch/powerpc/platforms/83xx/mpc831x_rdb.c
@@ -60,15 +60,18 @@ static void __init mpc831x_rdb_init_IRQ(void)
 	ipic_set_default_priority();
 }
 
+struct const char *board[] __initdata = {
+	"MPC8313ERDB",
+	"fsl,mpc8315erdb",
+	NULL
+}
+
 /*
  * Called very early, MMU is off, device-tree isn't unflattened
  */
 static int __init mpc831x_rdb_probe(void)
 {
-	unsigned long root = of_get_flat_dt_root();
-
-	return of_flat_dt_is_compatible(root, "MPC8313ERDB") ||
-	       of_flat_dt_is_compatible(root, "fsl,mpc8315erdb");
+	return of_flat_dt_match(of_get_flat_dt_root(), board);
 }
 
 static struct of_device_id __initdata of_bus_ids[] = {
diff --git a/arch/powerpc/platforms/83xx/mpc837x_rdb.c b/arch/powerpc/platforms/83xx/mpc837x_rdb.c
index 910caa6b5810..7bafbf2ec0f9 100644
--- a/arch/powerpc/platforms/83xx/mpc837x_rdb.c
+++ b/arch/powerpc/platforms/83xx/mpc837x_rdb.c
@@ -101,17 +101,20 @@ static void __init mpc837x_rdb_init_IRQ(void)
 	ipic_set_default_priority();
 }
 
+static const char *board[] __initdata = {
+	"fsl,mpc8377rdb",
+	"fsl,mpc8378rdb",
+	"fsl,mpc8379rdb",
+	"fsl,mpc8377wlan",
+	NULL
+};
+
 /*
  * Called very early, MMU is off, device-tree isn't unflattened
  */
 static int __init mpc837x_rdb_probe(void)
 {
-	unsigned long root = of_get_flat_dt_root();
-
-	return of_flat_dt_is_compatible(root, "fsl,mpc8377rdb") ||
-	       of_flat_dt_is_compatible(root, "fsl,mpc8378rdb") ||
-	       of_flat_dt_is_compatible(root, "fsl,mpc8379rdb") ||
-	       of_flat_dt_is_compatible(root, "fsl,mpc8377wlan");
+	return of_flat_dt_match(of_get_flat_dt_root(), board);
 }
 
 define_machine(mpc837x_rdb) {
diff --git a/arch/powerpc/platforms/85xx/tqm85xx.c b/arch/powerpc/platforms/85xx/tqm85xx.c
index 8f29bbce5360..5e847d0b47c8 100644
--- a/arch/powerpc/platforms/85xx/tqm85xx.c
+++ b/arch/powerpc/platforms/85xx/tqm85xx.c
@@ -186,21 +186,21 @@ static int __init declare_of_platform_devices(void)
 }
 machine_device_initcall(tqm85xx, declare_of_platform_devices);
 
+static const char *board[] __initdata = {
+	"tqc,tqm8540",
+	"tqc,tqm8541",
+	"tqc,tqm8548",
+	"tqc,tqm8555",
+	"tqc,tqm8560",
+	NULL
+};
+
 /*
  * Called very early, device-tree isn't unflattened
  */
 static int __init tqm85xx_probe(void)
 {
-	unsigned long root = of_get_flat_dt_root();
-
-	if ((of_flat_dt_is_compatible(root, "tqc,tqm8540")) ||
-	    (of_flat_dt_is_compatible(root, "tqc,tqm8541")) ||
-	    (of_flat_dt_is_compatible(root, "tqc,tqm8548")) ||
-	    (of_flat_dt_is_compatible(root, "tqc,tqm8555")) ||
-	    (of_flat_dt_is_compatible(root, "tqc,tqm8560")))
-		return 1;
-
-	return 0;
+	return of_flat_dt_match(of_get_flat_dt_root(), board);
 }
 
 define_machine(tqm85xx) {
diff --git a/drivers/of/fdt.c b/drivers/of/fdt.c
index 8a90ee42071a..c787c3d95c60 100644
--- a/drivers/of/fdt.c
+++ b/drivers/of/fdt.c
@@ -78,19 +78,23 @@ void *of_fdt_get_property(struct boot_param_header *blob,
  * @blob: A device tree blob
  * @node: node to test
  * @compat: compatible string to compare with compatible list.
+ *
+ * On match, returns a non-zero value with smaller values returned for more
+ * specific compatible values.
  */
 int of_fdt_is_compatible(struct boot_param_header *blob,
 		      unsigned long node, const char *compat)
 {
 	const char *cp;
-	unsigned long cplen, l;
+	unsigned long cplen, l, score = 0;
 
 	cp = of_fdt_get_property(blob, node, "compatible", &cplen);
 	if (cp == NULL)
 		return 0;
 	while (cplen > 0) {
+		score++;
 		if (of_compat_cmp(cp, compat, strlen(compat)) == 0)
-			return 1;
+			return score;
 		l = strlen(cp) + 1;
 		cp += l;
 		cplen -= l;
@@ -99,6 +103,27 @@ int of_fdt_is_compatible(struct boot_param_header *blob,
 	return 0;
 }
 
+/**
+ * of_fdt_match - Return true if node matches a list of compatible values
+ */
+int of_fdt_match(struct boot_param_header *blob, unsigned long node,
+                 const char **compat)
+{
+	unsigned int tmp, score = 0;
+
+	if (!compat)
+		return 0;
+
+	while (*compat) {
+		tmp = of_fdt_is_compatible(blob, node, *compat);
+		if (tmp && (score == 0 || (tmp < score)))
+			score = tmp;
+		compat++;
+	}
+
+	return score;
+}
+
 static void *unflatten_dt_alloc(unsigned long *mem, unsigned long size,
 				       unsigned long align)
 {
@@ -511,6 +536,14 @@ int __init of_flat_dt_is_compatible(unsigned long node, const char *compat)
 	return of_fdt_is_compatible(initial_boot_params, node, compat);
 }
 
+/**
+ * of_flat_dt_match - Return true if node matches a list of compatible values
+ */
+int __init of_flat_dt_match(unsigned long node, const char **compat)
+{
+	return of_fdt_match(initial_boot_params, node, compat);
+}
+
 #ifdef CONFIG_BLK_DEV_INITRD
 /**
  * early_init_dt_check_for_initrd - Decode initrd location from flat tree
diff --git a/include/linux/of_fdt.h b/include/linux/of_fdt.h
index 9ce5dfd2186a..ee96091f7d25 100644
--- a/include/linux/of_fdt.h
+++ b/include/linux/of_fdt.h
@@ -68,6 +68,8 @@ extern void *of_fdt_get_property(struct boot_param_header *blob,
 extern int of_fdt_is_compatible(struct boot_param_header *blob,
 				unsigned long node,
 				const char *compat);
+extern int of_fdt_match(struct boot_param_header *blob, unsigned long node,
+			const char **compat);
 extern void of_fdt_unflatten_tree(unsigned long *blob,
 			       struct device_node **mynodes);
 
@@ -84,6 +86,7 @@ extern int of_scan_flat_dt(int (*it)(unsigned long node, const char *uname,
 extern void *of_get_flat_dt_prop(unsigned long node, const char *name,
 				 unsigned long *size);
 extern int of_flat_dt_is_compatible(unsigned long node, const char *name);
+extern int of_flat_dt_match(unsigned long node, const char **matches);
 extern unsigned long of_get_flat_dt_root(void);
 
 extern int early_init_dt_scan_chosen(unsigned long node, const char *uname,
-- 
cgit v1.2.3-71-gd317


From 51f98a8d70583b18cb08b19353aeed5efb0244af Mon Sep 17 00:00:00 2001
From: Allan Stephens <Allan.Stephens@windriver.com>
Date: Fri, 31 Dec 2010 18:59:16 +0000
Subject: tipc: Remove prototype code for supporting multiple zones

Eliminates routines, data structures, and files that were intended
to allows TIPC to support a network containing multiple zones.
Currently, TIPC supports only networks consisting of a single cluster
within a single zone, so this code is unnecessary.

Signed-off-by: Allan Stephens <Allan.Stephens@windriver.com>
Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/tipc_config.h |   4 +-
 net/tipc/Kconfig            |  12 ----
 net/tipc/Makefile           |   2 +-
 net/tipc/addr.c             |   4 --
 net/tipc/cluster.c          |  14 +---
 net/tipc/cluster.h          |  12 ++--
 net/tipc/config.c           |  30 ++-------
 net/tipc/core.c             |   6 --
 net/tipc/core.h             |   1 -
 net/tipc/net.c              |  41 +++++++-----
 net/tipc/net.h              |   8 ++-
 net/tipc/node.c             |   7 +-
 net/tipc/node.h             |   1 -
 net/tipc/zone.c             | 159 --------------------------------------------
 net/tipc/zone.h             |  70 -------------------
 15 files changed, 46 insertions(+), 325 deletions(-)
 delete mode 100644 net/tipc/zone.c
 delete mode 100644 net/tipc/zone.h

(limited to 'include/linux')

diff --git a/include/linux/tipc_config.h b/include/linux/tipc_config.h
index 9cde86c32412..fa3aeaafeab1 100644
--- a/include/linux/tipc_config.h
+++ b/include/linux/tipc_config.h
@@ -94,7 +94,7 @@
 #define  TIPC_CMD_GET_MAX_PORTS     0x4004    /* tx none, rx unsigned */
 #define  TIPC_CMD_GET_MAX_PUBL      0x4005    /* tx none, rx unsigned */
 #define  TIPC_CMD_GET_MAX_SUBSCR    0x4006    /* tx none, rx unsigned */
-#define  TIPC_CMD_GET_MAX_ZONES     0x4007    /* tx none, rx unsigned */
+#define  TIPC_CMD_GET_MAX_ZONES     0x4007    /* obsoleted */
 #define  TIPC_CMD_GET_MAX_CLUSTERS  0x4008    /* tx none, rx unsigned */
 #define  TIPC_CMD_GET_MAX_NODES     0x4009    /* tx none, rx unsigned */
 #define  TIPC_CMD_GET_MAX_SLAVES    0x400A    /* tx none, rx unsigned */
@@ -130,7 +130,7 @@
 #define  TIPC_CMD_SET_MAX_PORTS     0x8004    /* tx unsigned, rx none */
 #define  TIPC_CMD_SET_MAX_PUBL      0x8005    /* tx unsigned, rx none */
 #define  TIPC_CMD_SET_MAX_SUBSCR    0x8006    /* tx unsigned, rx none */
-#define  TIPC_CMD_SET_MAX_ZONES     0x8007    /* tx unsigned, rx none */
+#define  TIPC_CMD_SET_MAX_ZONES     0x8007    /* obsoleted */
 #define  TIPC_CMD_SET_MAX_CLUSTERS  0x8008    /* tx unsigned, rx none */
 #define  TIPC_CMD_SET_MAX_NODES     0x8009    /* tx unsigned, rx none */
 #define  TIPC_CMD_SET_MAX_SLAVES    0x800A    /* tx unsigned, rx none */
diff --git a/net/tipc/Kconfig b/net/tipc/Kconfig
index b74f78d0c033..06d32908fcfb 100644
--- a/net/tipc/Kconfig
+++ b/net/tipc/Kconfig
@@ -29,18 +29,6 @@ config TIPC_ADVANCED
 	  Saying Y here will open some advanced configuration for TIPC.
 	  Most users do not need to bother; if unsure, just say N.
 
-config TIPC_ZONES
-	int "Maximum number of zones in a network"
-	depends on TIPC_ADVANCED
-	range 1 255
-	default "3"
-	help
-	  Specifies how many zones can be supported in a TIPC network.
-	  Can range from 1 to 255 zones; default is 3.
-
-	  Setting this to a smaller value saves some memory;
-	  setting it to a higher value allows for more zones.
-
 config TIPC_CLUSTERS
 	int "Maximum number of clusters in a zone"
 	depends on TIPC_ADVANCED
diff --git a/net/tipc/Makefile b/net/tipc/Makefile
index dceb7027946c..3d936f0560c7 100644
--- a/net/tipc/Makefile
+++ b/net/tipc/Makefile
@@ -8,6 +8,6 @@ tipc-y	+= addr.o bcast.o bearer.o config.o cluster.o \
 	   core.o handler.o link.o discover.o msg.o  \
 	   name_distr.o  subscr.o name_table.o net.o  \
 	   netlink.o node.o node_subscr.o port.o ref.o  \
-	   socket.o user_reg.o zone.o dbg.o eth_media.o
+	   socket.o user_reg.o dbg.o eth_media.o
 
 # End of file
diff --git a/net/tipc/addr.c b/net/tipc/addr.c
index 886715a75259..3d0f97da5b5f 100644
--- a/net/tipc/addr.c
+++ b/net/tipc/addr.c
@@ -35,8 +35,6 @@
  */
 
 #include "core.h"
-#include "addr.h"
-#include "zone.h"
 #include "cluster.h"
 
 /**
@@ -61,8 +59,6 @@ int tipc_addr_domain_valid(u32 addr)
 		return 0;
 	if (c > tipc_max_clusters)
 		return 0;
-	if (z > tipc_max_zones)
-		return 0;
 
 	if (n && (!z || !c))
 		return 0;
diff --git a/net/tipc/cluster.c b/net/tipc/cluster.c
index 405be87157ba..996b2b67687e 100644
--- a/net/tipc/cluster.c
+++ b/net/tipc/cluster.c
@@ -47,7 +47,6 @@ u32 tipc_highest_allowed_slave = 0;
 
 struct cluster *tipc_cltr_create(u32 addr)
 {
-	struct _zone *z_ptr;
 	struct cluster *c_ptr;
 	int max_nodes;
 
@@ -75,18 +74,7 @@ struct cluster *tipc_cltr_create(u32 addr)
 	c_ptr->highest_slave = LOWEST_SLAVE - 1;
 	c_ptr->highest_node = 0;
 
-	z_ptr = tipc_zone_find(tipc_zone(addr));
-	if (!z_ptr) {
-		z_ptr = tipc_zone_create(addr);
-	}
-	if (!z_ptr) {
-		kfree(c_ptr->nodes);
-		kfree(c_ptr);
-		return NULL;
-	}
-
-	tipc_zone_attach_cluster(z_ptr, c_ptr);
-	c_ptr->owner = z_ptr;
+	tipc_net.clusters[1] = c_ptr;
 	return c_ptr;
 }
 
diff --git a/net/tipc/cluster.h b/net/tipc/cluster.h
index 32636d98c9c6..21493f7beb95 100644
--- a/net/tipc/cluster.h
+++ b/net/tipc/cluster.h
@@ -38,14 +38,13 @@
 #define _TIPC_CLUSTER_H
 
 #include "addr.h"
-#include "zone.h"
+#include "net.h"
 
 #define LOWEST_SLAVE  2048u
 
 /**
  * struct cluster - TIPC cluster structure
  * @addr: network address of cluster
- * @owner: pointer to zone that cluster belongs to
  * @nodes: array of pointers to all nodes within cluster
  * @highest_node: id of highest numbered node within cluster
  * @highest_slave: (used for secondary node support)
@@ -53,7 +52,6 @@
 
 struct cluster {
 	u32 addr;
-	struct _zone *owner;
 	struct tipc_node **nodes;
 	u32 highest_node;
 	u32 highest_slave;
@@ -82,11 +80,9 @@ void tipc_cltr_bcast_lost_route(struct cluster *c_ptr, u32 dest, u32 lo, u32 hi)
 
 static inline struct cluster *tipc_cltr_find(u32 addr)
 {
-	struct _zone *z_ptr = tipc_zone_find(addr);
-
-	if (z_ptr)
-		return z_ptr->clusters[1];
-	return NULL;
+	if (!in_own_cluster(addr))
+		return NULL;
+	return tipc_net.clusters[1];
 }
 
 #endif
diff --git a/net/tipc/config.c b/net/tipc/config.c
index bdde39f0436b..8de97ddb0427 100644
--- a/net/tipc/config.c
+++ b/net/tipc/config.c
@@ -269,25 +269,6 @@ static struct sk_buff *cfg_set_max_ports(void)
 	return tipc_cfg_reply_none();
 }
 
-static struct sk_buff *cfg_set_max_zones(void)
-{
-	u32 value;
-
-	if (!TLV_CHECK(req_tlv_area, req_tlv_space, TIPC_TLV_UNSIGNED))
-		return tipc_cfg_reply_error_string(TIPC_CFG_TLV_ERROR);
-	value = ntohl(*(__be32 *)TLV_DATA(req_tlv_area));
-	if (value == tipc_max_zones)
-		return tipc_cfg_reply_none();
-	if (value != delimit(value, 1, 255))
-		return tipc_cfg_reply_error_string(TIPC_CFG_INVALID_VALUE
-						   " (max zones must be 1-255)");
-	if (tipc_mode == TIPC_NET_MODE)
-		return tipc_cfg_reply_error_string(TIPC_CFG_NOT_SUPPORTED
-			" (cannot change max zones once TIPC has joined a network)");
-	tipc_max_zones = value;
-	return tipc_cfg_reply_none();
-}
-
 static struct sk_buff *cfg_set_max_clusters(void)
 {
 	u32 value;
@@ -452,9 +433,6 @@ struct sk_buff *tipc_cfg_do_cmd(u32 orig_node, u16 cmd, const void *request_area
 	case TIPC_CMD_SET_MAX_SUBSCR:
 		rep_tlv_buf = cfg_set_max_subscriptions();
 		break;
-	case TIPC_CMD_SET_MAX_ZONES:
-		rep_tlv_buf = cfg_set_max_zones();
-		break;
 	case TIPC_CMD_SET_MAX_CLUSTERS:
 		rep_tlv_buf = cfg_set_max_clusters();
 		break;
@@ -479,9 +457,6 @@ struct sk_buff *tipc_cfg_do_cmd(u32 orig_node, u16 cmd, const void *request_area
 	case TIPC_CMD_GET_MAX_SUBSCR:
 		rep_tlv_buf = tipc_cfg_reply_unsigned(tipc_max_subscriptions);
 		break;
-	case TIPC_CMD_GET_MAX_ZONES:
-		rep_tlv_buf = tipc_cfg_reply_unsigned(tipc_max_zones);
-		break;
 	case TIPC_CMD_GET_MAX_CLUSTERS:
 		rep_tlv_buf = tipc_cfg_reply_unsigned(tipc_max_clusters);
 		break;
@@ -498,6 +473,11 @@ struct sk_buff *tipc_cfg_do_cmd(u32 orig_node, u16 cmd, const void *request_area
 		rep_tlv_buf =
 			tipc_cfg_reply_error_string(TIPC_CFG_NOT_NET_ADMIN);
 		break;
+	case TIPC_CMD_SET_MAX_ZONES:
+	case TIPC_CMD_GET_MAX_ZONES:
+		rep_tlv_buf = tipc_cfg_reply_error_string(TIPC_CFG_NOT_SUPPORTED
+							  " (obsolete command)");
+		break;
 	default:
 		rep_tlv_buf = tipc_cfg_reply_error_string(TIPC_CFG_NOT_SUPPORTED
 							  " (unknown command)");
diff --git a/net/tipc/core.c b/net/tipc/core.c
index f5d62c174de2..13946331bd4d 100644
--- a/net/tipc/core.c
+++ b/net/tipc/core.c
@@ -48,10 +48,6 @@
 #include "config.h"
 
 
-#ifndef CONFIG_TIPC_ZONES
-#define CONFIG_TIPC_ZONES 3
-#endif
-
 #ifndef CONFIG_TIPC_CLUSTERS
 #define CONFIG_TIPC_CLUSTERS 1
 #endif
@@ -84,7 +80,6 @@ const char tipc_alphabet[] =
 /* configurable TIPC parameters */
 
 u32 tipc_own_addr;
-int tipc_max_zones;
 int tipc_max_clusters;
 int tipc_max_nodes;
 int tipc_max_slaves;
@@ -209,7 +204,6 @@ static int __init tipc_init(void)
 	tipc_max_publications = 10000;
 	tipc_max_subscriptions = 2000;
 	tipc_max_ports = CONFIG_TIPC_PORTS;
-	tipc_max_zones = CONFIG_TIPC_ZONES;
 	tipc_max_clusters = CONFIG_TIPC_CLUSTERS;
 	tipc_max_nodes = CONFIG_TIPC_NODES;
 	tipc_max_slaves = CONFIG_TIPC_SLAVE_NODES;
diff --git a/net/tipc/core.h b/net/tipc/core.h
index ca7e171c1043..9403a226a570 100644
--- a/net/tipc/core.h
+++ b/net/tipc/core.h
@@ -184,7 +184,6 @@ void tipc_dump_dbg(struct print_buf *, const char *fmt, ...);
  */
 
 extern u32 tipc_own_addr;
-extern int tipc_max_zones;
 extern int tipc_max_clusters;
 extern int tipc_max_nodes;
 extern int tipc_max_slaves;
diff --git a/net/tipc/net.c b/net/tipc/net.c
index c2b4b86c2e6a..a25f8bb1e1d9 100644
--- a/net/tipc/net.c
+++ b/net/tipc/net.c
@@ -36,7 +36,6 @@
 
 #include "core.h"
 #include "net.h"
-#include "zone.h"
 #include "name_table.h"
 #include "name_distr.h"
 #include "subscr.h"
@@ -111,46 +110,56 @@
 */
 
 DEFINE_RWLOCK(tipc_net_lock);
-static struct _zone *tipc_zones[256] = { NULL, };
-struct network tipc_net = { tipc_zones };
+struct network tipc_net;
 
 struct tipc_node *tipc_net_select_remote_node(u32 addr, u32 ref)
 {
-	return tipc_zone_select_remote_node(tipc_net.zones[tipc_zone(addr)], addr, ref);
+	struct cluster *c_ptr;
+
+	c_ptr = tipc_net.clusters[1];
+	if (!c_ptr)
+		return NULL;
+	return tipc_cltr_select_node(c_ptr, ref);
 }
 
 u32 tipc_net_select_router(u32 addr, u32 ref)
 {
-	return tipc_zone_select_router(tipc_net.zones[tipc_zone(addr)], addr, ref);
+	struct cluster *c_ptr;
+
+	c_ptr = tipc_net.clusters[1];
+	if (!c_ptr)
+		return 0;
+	return tipc_cltr_select_router(c_ptr, ref);
 }
 
 void tipc_net_remove_as_router(u32 router)
 {
-	u32 z_num;
+	u32 c_num;
 
-	for (z_num = 1; z_num <= tipc_max_zones; z_num++) {
-		if (!tipc_net.zones[z_num])
+	for (c_num = 1; c_num <= tipc_max_clusters; c_num++) {
+		if (!tipc_net.clusters[c_num])
 			continue;
-		tipc_zone_remove_as_router(tipc_net.zones[z_num], router);
+		tipc_cltr_remove_as_router(tipc_net.clusters[c_num], router);
 	}
 }
 
 void tipc_net_send_external_routes(u32 dest)
 {
-	u32 z_num;
+	u32 c_num;
 
-	for (z_num = 1; z_num <= tipc_max_zones; z_num++) {
-		if (tipc_net.zones[z_num])
-			tipc_zone_send_external_routes(tipc_net.zones[z_num], dest);
+	for (c_num = 1; c_num <= tipc_max_clusters; c_num++) {
+		if (tipc_net.clusters[c_num])
+			tipc_cltr_send_ext_routes(tipc_net.clusters[c_num],
+						  dest);
 	}
 }
 
 static void net_stop(void)
 {
-	u32 z_num;
+	u32 c_num;
 
-	for (z_num = 1; z_num <= tipc_max_zones; z_num++)
-		tipc_zone_delete(tipc_net.zones[z_num]);
+	for (c_num = 1; c_num <= tipc_max_clusters; c_num++)
+		tipc_cltr_delete(tipc_net.clusters[c_num]);
 }
 
 static void net_route_named_msg(struct sk_buff *buf)
diff --git a/net/tipc/net.h b/net/tipc/net.h
index de2b9ad8f646..786c94007470 100644
--- a/net/tipc/net.h
+++ b/net/tipc/net.h
@@ -37,15 +37,17 @@
 #ifndef _TIPC_NET_H
 #define _TIPC_NET_H
 
-struct _zone;
+struct cluster;
 
 /**
  * struct network - TIPC network structure
- * @zones: array of pointers to all zones within network
+ * @clusters: array of pointers to all clusters within zone
+ * @links: number of (unicast) links to cluster
  */
 
 struct network {
-	struct _zone **zones;
+	struct cluster *clusters[2]; /* currently limited to just 1 cluster */
+	u32 links;
 };
 
 
diff --git a/net/tipc/node.c b/net/tipc/node.c
index df71dfc3a9ae..c20bd851a44a 100644
--- a/net/tipc/node.c
+++ b/net/tipc/node.c
@@ -257,7 +257,7 @@ struct tipc_node *tipc_node_attach_link(struct link *l_ptr)
 
 		if (!n_ptr->links[bearer_id]) {
 			n_ptr->links[bearer_id] = l_ptr;
-			tipc_net.zones[tipc_zone(l_ptr->addr)]->links++;
+			tipc_net.links++;
 			n_ptr->link_cnt++;
 			return n_ptr;
 		}
@@ -271,7 +271,7 @@ struct tipc_node *tipc_node_attach_link(struct link *l_ptr)
 void tipc_node_detach_link(struct tipc_node *n_ptr, struct link *l_ptr)
 {
 	n_ptr->links[l_ptr->b_ptr->identity] = NULL;
-	tipc_net.zones[tipc_zone(l_ptr->addr)]->links--;
+	tipc_net.links--;
 	n_ptr->link_cnt--;
 }
 
@@ -656,8 +656,7 @@ struct sk_buff *tipc_node_get_links(const void *req_tlv_area, int req_tlv_space)
 
 	/* Get space for all unicast links + multicast link */
 
-	payload_size = TLV_SPACE(sizeof(link_info)) *
-		(tipc_net.zones[tipc_zone(tipc_own_addr)]->links + 1);
+	payload_size = TLV_SPACE(sizeof(link_info)) * (tipc_net.links + 1);
 	if (payload_size > 32768u) {
 		read_unlock_bh(&tipc_net_lock);
 		return tipc_cfg_reply_error_string(TIPC_CFG_NOT_SUPPORTED
diff --git a/net/tipc/node.h b/net/tipc/node.h
index fff331b2d26c..7bfaf5e8c201 100644
--- a/net/tipc/node.h
+++ b/net/tipc/node.h
@@ -38,7 +38,6 @@
 #define _TIPC_NODE_H
 
 #include "node_subscr.h"
-#include "addr.h"
 #include "cluster.h"
 #include "bearer.h"
 
diff --git a/net/tipc/zone.c b/net/tipc/zone.c
deleted file mode 100644
index 1b61ca8c48ef..000000000000
--- a/net/tipc/zone.c
+++ /dev/null
@@ -1,159 +0,0 @@
-/*
- * net/tipc/zone.c: TIPC zone management routines
- *
- * Copyright (c) 2000-2006, Ericsson AB
- * Copyright (c) 2005, Wind River Systems
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- * 3. Neither the names of the copyright holders nor the names of its
- *    contributors may be used to endorse or promote products derived from
- *    this software without specific prior written permission.
- *
- * Alternatively, this software may be distributed under the terms of the
- * GNU General Public License ("GPL") version 2 as published by the Free
- * Software Foundation.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include "core.h"
-#include "zone.h"
-#include "cluster.h"
-#include "node.h"
-
-struct _zone *tipc_zone_create(u32 addr)
-{
-	struct _zone *z_ptr;
-	u32 z_num;
-
-	if (!tipc_addr_domain_valid(addr)) {
-		err("Zone creation failed, invalid domain 0x%x\n", addr);
-		return NULL;
-	}
-
-	z_ptr = kzalloc(sizeof(*z_ptr), GFP_ATOMIC);
-	if (!z_ptr) {
-		warn("Zone creation failed, insufficient memory\n");
-		return NULL;
-	}
-
-	z_num = tipc_zone(addr);
-	z_ptr->addr = tipc_addr(z_num, 0, 0);
-	tipc_net.zones[z_num] = z_ptr;
-	return z_ptr;
-}
-
-void tipc_zone_delete(struct _zone *z_ptr)
-{
-	u32 c_num;
-
-	if (!z_ptr)
-		return;
-	for (c_num = 1; c_num <= tipc_max_clusters; c_num++) {
-		tipc_cltr_delete(z_ptr->clusters[c_num]);
-	}
-	kfree(z_ptr);
-}
-
-void tipc_zone_attach_cluster(struct _zone *z_ptr, struct cluster *c_ptr)
-{
-	u32 c_num = tipc_cluster(c_ptr->addr);
-
-	assert(c_ptr->addr);
-	assert(c_num <= tipc_max_clusters);
-	assert(z_ptr->clusters[c_num] == NULL);
-	z_ptr->clusters[c_num] = c_ptr;
-}
-
-void tipc_zone_remove_as_router(struct _zone *z_ptr, u32 router)
-{
-	u32 c_num;
-
-	for (c_num = 1; c_num <= tipc_max_clusters; c_num++) {
-		if (z_ptr->clusters[c_num]) {
-			tipc_cltr_remove_as_router(z_ptr->clusters[c_num],
-						   router);
-		}
-	}
-}
-
-void tipc_zone_send_external_routes(struct _zone *z_ptr, u32 dest)
-{
-	u32 c_num;
-
-	for (c_num = 1; c_num <= tipc_max_clusters; c_num++) {
-		if (z_ptr->clusters[c_num]) {
-			if (in_own_cluster(z_ptr->addr))
-				continue;
-			tipc_cltr_send_ext_routes(z_ptr->clusters[c_num], dest);
-		}
-	}
-}
-
-struct tipc_node *tipc_zone_select_remote_node(struct _zone *z_ptr, u32 addr, u32 ref)
-{
-	struct cluster *c_ptr;
-	struct tipc_node *n_ptr;
-	u32 c_num;
-
-	if (!z_ptr)
-		return NULL;
-	c_ptr = z_ptr->clusters[tipc_cluster(addr)];
-	if (!c_ptr)
-		return NULL;
-	n_ptr = tipc_cltr_select_node(c_ptr, ref);
-	if (n_ptr)
-		return n_ptr;
-
-	/* Links to any other clusters within this zone ? */
-	for (c_num = 1; c_num <= tipc_max_clusters; c_num++) {
-		c_ptr = z_ptr->clusters[c_num];
-		if (!c_ptr)
-			return NULL;
-		n_ptr = tipc_cltr_select_node(c_ptr, ref);
-		if (n_ptr)
-			return n_ptr;
-	}
-	return NULL;
-}
-
-u32 tipc_zone_select_router(struct _zone *z_ptr, u32 addr, u32 ref)
-{
-	struct cluster *c_ptr;
-	u32 c_num;
-	u32 router;
-
-	if (!z_ptr)
-		return 0;
-	c_ptr = z_ptr->clusters[tipc_cluster(addr)];
-	router = c_ptr ? tipc_cltr_select_router(c_ptr, ref) : 0;
-	if (router)
-		return router;
-
-	/* Links to any other clusters within the zone? */
-	for (c_num = 1; c_num <= tipc_max_clusters; c_num++) {
-		c_ptr = z_ptr->clusters[c_num];
-		router = c_ptr ? tipc_cltr_select_router(c_ptr, ref) : 0;
-		if (router)
-			return router;
-	}
-	return 0;
-}
diff --git a/net/tipc/zone.h b/net/tipc/zone.h
deleted file mode 100644
index bd1c20ce9d06..000000000000
--- a/net/tipc/zone.h
+++ /dev/null
@@ -1,70 +0,0 @@
-/*
- * net/tipc/zone.h: Include file for TIPC zone management routines
- *
- * Copyright (c) 2000-2006, Ericsson AB
- * Copyright (c) 2005-2006, Wind River Systems
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- * 3. Neither the names of the copyright holders nor the names of its
- *    contributors may be used to endorse or promote products derived from
- *    this software without specific prior written permission.
- *
- * Alternatively, this software may be distributed under the terms of the
- * GNU General Public License ("GPL") version 2 as published by the Free
- * Software Foundation.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef _TIPC_ZONE_H
-#define _TIPC_ZONE_H
-
-#include "node_subscr.h"
-#include "net.h"
-
-
-/**
- * struct _zone - TIPC zone structure
- * @addr: network address of zone
- * @clusters: array of pointers to all clusters within zone
- * @links: number of (unicast) links to zone
- */
-
-struct _zone {
-	u32 addr;
-	struct cluster *clusters[2]; /* currently limited to just 1 cluster */
-	u32 links;
-};
-
-struct tipc_node *tipc_zone_select_remote_node(struct _zone *z_ptr, u32 addr, u32 ref);
-u32 tipc_zone_select_router(struct _zone *z_ptr, u32 addr, u32 ref);
-void tipc_zone_remove_as_router(struct _zone *z_ptr, u32 router);
-void tipc_zone_send_external_routes(struct _zone *z_ptr, u32 dest);
-struct _zone *tipc_zone_create(u32 addr);
-void tipc_zone_delete(struct _zone *z_ptr);
-void tipc_zone_attach_cluster(struct _zone *z_ptr, struct cluster *c_ptr);
-
-static inline struct _zone *tipc_zone_find(u32 addr)
-{
-	return tipc_net.zones[tipc_zone(addr)];
-}
-
-#endif
-- 
cgit v1.2.3-71-gd317


From 08c80e9a031df0a8f0269477a32f5eae47d7a146 Mon Sep 17 00:00:00 2001
From: Allan Stephens <Allan.Stephens@windriver.com>
Date: Fri, 31 Dec 2010 18:59:17 +0000
Subject: tipc: Remove prototype code for supporting slave nodes

Simplifies routines and data structures that were intended to allow
TIPC to support slave nodes (i.e. nodes that did not have links to
all of the other nodes in its cluster, forcing TIPC to route messages
that it could not deliver directly through a non-slave node).

Currently, TIPC supports only networks containing non-slave nodes,
so this code is unnecessary.

Note: The latest edition of the TIPC 2.0 Specification has eliminated
the concept of slave nodes entirely.

Signed-off-by: Allan Stephens <Allan.Stephens@windriver.com>
Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/tipc_config.h |   4 +-
 net/tipc/addr.c             |   2 -
 net/tipc/addr.h             |  10 ---
 net/tipc/cluster.c          | 166 ++++++--------------------------------------
 net/tipc/cluster.h          |   7 --
 net/tipc/config.c           |  21 +-----
 net/tipc/core.c             |   6 --
 net/tipc/core.h             |   1 -
 net/tipc/discover.c         |   4 --
 net/tipc/msg.h              |   2 +-
 net/tipc/node.c             |  59 ++++------------
 net/tipc/port.c             |   5 +-
 12 files changed, 40 insertions(+), 247 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/tipc_config.h b/include/linux/tipc_config.h
index fa3aeaafeab1..dcc2b8796d0a 100644
--- a/include/linux/tipc_config.h
+++ b/include/linux/tipc_config.h
@@ -97,7 +97,7 @@
 #define  TIPC_CMD_GET_MAX_ZONES     0x4007    /* obsoleted */
 #define  TIPC_CMD_GET_MAX_CLUSTERS  0x4008    /* tx none, rx unsigned */
 #define  TIPC_CMD_GET_MAX_NODES     0x4009    /* tx none, rx unsigned */
-#define  TIPC_CMD_GET_MAX_SLAVES    0x400A    /* tx none, rx unsigned */
+#define  TIPC_CMD_GET_MAX_SLAVES    0x400A    /* obsoleted */
 #define  TIPC_CMD_GET_NETID         0x400B    /* tx none, rx unsigned */
 
 #define  TIPC_CMD_ENABLE_BEARER     0x4101    /* tx bearer_config, rx none */
@@ -133,7 +133,7 @@
 #define  TIPC_CMD_SET_MAX_ZONES     0x8007    /* obsoleted */
 #define  TIPC_CMD_SET_MAX_CLUSTERS  0x8008    /* tx unsigned, rx none */
 #define  TIPC_CMD_SET_MAX_NODES     0x8009    /* tx unsigned, rx none */
-#define  TIPC_CMD_SET_MAX_SLAVES    0x800A    /* tx unsigned, rx none */
+#define  TIPC_CMD_SET_MAX_SLAVES    0x800A    /* obsoleted */
 #define  TIPC_CMD_SET_NETID         0x800B    /* tx unsigned, rx none */
 
 /*
diff --git a/net/tipc/addr.c b/net/tipc/addr.c
index 3d0f97da5b5f..8823e03e52e0 100644
--- a/net/tipc/addr.c
+++ b/net/tipc/addr.c
@@ -53,8 +53,6 @@ int tipc_addr_domain_valid(u32 addr)
 	u32 z = tipc_zone(addr);
 	u32 max_nodes = tipc_max_nodes;
 
-	if (is_slave(addr))
-		max_nodes = LOWEST_SLAVE + tipc_max_slaves;
 	if (n > max_nodes)
 		return 0;
 	if (c > tipc_max_clusters)
diff --git a/net/tipc/addr.h b/net/tipc/addr.h
index c1cc5724d8cc..a16c6c87208e 100644
--- a/net/tipc/addr.h
+++ b/net/tipc/addr.h
@@ -57,16 +57,6 @@ static inline int in_own_cluster(u32 addr)
 	return !((addr ^ tipc_own_addr) >> 12);
 }
 
-static inline int is_slave(u32 addr)
-{
-	return addr & 0x800;
-}
-
-static inline int may_route(u32 addr)
-{
-	return(addr ^ tipc_own_addr) >> 11;
-}
-
 /**
  * addr_domain - convert 2-bit scope value to equivalent message lookup domain
  *
diff --git a/net/tipc/cluster.c b/net/tipc/cluster.c
index 996b2b67687e..6bc9f07be945 100644
--- a/net/tipc/cluster.c
+++ b/net/tipc/cluster.c
@@ -43,7 +43,6 @@ static void tipc_cltr_multicast(struct cluster *c_ptr, struct sk_buff *buf,
 
 struct tipc_node **tipc_local_nodes = NULL;
 struct tipc_node_map tipc_cltr_bcast_nodes = {0,{0,}};
-u32 tipc_highest_allowed_slave = 0;
 
 struct cluster *tipc_cltr_create(u32 addr)
 {
@@ -57,10 +56,7 @@ struct cluster *tipc_cltr_create(u32 addr)
 	}
 
 	c_ptr->addr = tipc_addr(tipc_zone(addr), tipc_cluster(addr), 0);
-	if (in_own_cluster(addr))
-		max_nodes = LOWEST_SLAVE + tipc_max_slaves;
-	else
-		max_nodes = tipc_max_nodes + 1;
+	max_nodes = tipc_max_nodes + 1;
 
 	c_ptr->nodes = kcalloc(max_nodes + 1, sizeof(void*), GFP_ATOMIC);
 	if (c_ptr->nodes == NULL) {
@@ -71,7 +67,6 @@ struct cluster *tipc_cltr_create(u32 addr)
 
 	if (in_own_cluster(addr))
 		tipc_local_nodes = c_ptr->nodes;
-	c_ptr->highest_slave = LOWEST_SLAVE - 1;
 	c_ptr->highest_node = 0;
 
 	tipc_net.clusters[1] = c_ptr;
@@ -87,9 +82,6 @@ void tipc_cltr_delete(struct cluster *c_ptr)
 	for (n_num = 1; n_num <= c_ptr->highest_node; n_num++) {
 		tipc_node_delete(c_ptr->nodes[n_num]);
 	}
-	for (n_num = LOWEST_SLAVE; n_num <= c_ptr->highest_slave; n_num++) {
-		tipc_node_delete(c_ptr->nodes[n_num]);
-	}
 	kfree(c_ptr->nodes);
 	kfree(c_ptr);
 }
@@ -100,8 +92,6 @@ void tipc_cltr_attach_node(struct cluster *c_ptr, struct tipc_node *n_ptr)
 	u32 n_num = tipc_node(n_ptr->addr);
 	u32 max_n_num = tipc_max_nodes;
 
-	if (in_own_cluster(n_ptr->addr))
-		max_n_num = tipc_highest_allowed_slave;
 	assert(n_num > 0);
 	assert(n_num <= max_n_num);
 	assert(c_ptr->nodes[n_num] == NULL);
@@ -237,41 +227,6 @@ void tipc_cltr_bcast_lost_route(struct cluster *c_ptr, u32 dest,
 	}
 }
 
-void tipc_cltr_send_slave_routes(struct cluster *c_ptr, u32 dest)
-{
-	struct sk_buff *buf;
-	struct tipc_msg *msg;
-	u32 highest = c_ptr->highest_slave;
-	u32 n_num;
-	int send = 0;
-
-	assert(!is_slave(dest));
-	assert(in_own_cluster(dest));
-	assert(in_own_cluster(c_ptr->addr));
-	if (highest <= LOWEST_SLAVE)
-		return;
-	buf = tipc_cltr_prepare_routing_msg(highest - LOWEST_SLAVE + 1,
-					    c_ptr->addr);
-	if (buf) {
-		msg = buf_msg(buf);
-		msg_set_remote_node(msg, c_ptr->addr);
-		msg_set_type(msg, SLAVE_ROUTING_TABLE);
-		for (n_num = LOWEST_SLAVE; n_num <= highest; n_num++) {
-			if (c_ptr->nodes[n_num] &&
-			    tipc_node_has_active_links(c_ptr->nodes[n_num])) {
-				send = 1;
-				msg_set_dataoctet(msg, n_num);
-			}
-		}
-		if (send)
-			tipc_link_send(buf, dest, dest);
-		else
-			buf_discard(buf);
-	} else {
-		warn("Memory squeeze: broadcast of lost route failed\n");
-	}
-}
-
 void tipc_cltr_send_ext_routes(struct cluster *c_ptr, u32 dest)
 {
 	struct sk_buff *buf;
@@ -282,7 +237,6 @@ void tipc_cltr_send_ext_routes(struct cluster *c_ptr, u32 dest)
 
 	if (in_own_cluster(c_ptr->addr))
 		return;
-	assert(!is_slave(dest));
 	assert(in_own_cluster(dest));
 	highest = c_ptr->highest_node;
 	buf = tipc_cltr_prepare_routing_msg(highest + 1, c_ptr->addr);
@@ -306,37 +260,6 @@ void tipc_cltr_send_ext_routes(struct cluster *c_ptr, u32 dest)
 	}
 }
 
-void tipc_cltr_send_local_routes(struct cluster *c_ptr, u32 dest)
-{
-	struct sk_buff *buf;
-	struct tipc_msg *msg;
-	u32 highest = c_ptr->highest_node;
-	u32 n_num;
-	int send = 0;
-
-	assert(is_slave(dest));
-	assert(in_own_cluster(c_ptr->addr));
-	buf = tipc_cltr_prepare_routing_msg(highest, c_ptr->addr);
-	if (buf) {
-		msg = buf_msg(buf);
-		msg_set_remote_node(msg, c_ptr->addr);
-		msg_set_type(msg, LOCAL_ROUTING_TABLE);
-		for (n_num = 1; n_num <= highest; n_num++) {
-			if (c_ptr->nodes[n_num] &&
-			    tipc_node_has_active_links(c_ptr->nodes[n_num])) {
-				send = 1;
-				msg_set_dataoctet(msg, n_num);
-			}
-		}
-		if (send)
-			tipc_link_send(buf, dest, dest);
-		else
-			buf_discard(buf);
-	} else {
-		warn("Memory squeeze: broadcast of local route failed\n");
-	}
-}
-
 void tipc_cltr_recv_routing_table(struct sk_buff *buf)
 {
 	struct tipc_msg *msg = buf_msg(buf);
@@ -366,8 +289,6 @@ void tipc_cltr_recv_routing_table(struct sk_buff *buf)
 	c_num = tipc_cluster(rem_node);
 
 	switch (msg_type(msg)) {
-	case LOCAL_ROUTING_TABLE:
-		assert(is_slave(tipc_own_addr));
 	case EXT_ROUTING_TABLE:
 		for (n_num = 1; n_num < table_size; n_num++) {
 			if (node_table[n_num]) {
@@ -382,29 +303,10 @@ void tipc_cltr_recv_routing_table(struct sk_buff *buf)
 		}
 		break;
 	case SLAVE_ROUTING_TABLE:
-		assert(!is_slave(tipc_own_addr));
 		assert(in_own_cluster(c_ptr->addr));
-		for (n_num = 1; n_num < table_size; n_num++) {
-			if (node_table[n_num]) {
-				u32 slave_num = n_num + LOWEST_SLAVE;
-				u32 addr = tipc_addr(z_num, c_num, slave_num);
-				n_ptr = c_ptr->nodes[slave_num];
-				if (!n_ptr) {
-					n_ptr = tipc_node_create(addr);
-				}
-				if (n_ptr)
-					tipc_node_add_router(n_ptr, router);
-			}
-		}
 		break;
 	case ROUTE_ADDITION:
-		if (!is_slave(tipc_own_addr)) {
-			assert(!in_own_cluster(c_ptr->addr) ||
-			       is_slave(rem_node));
-		} else {
-			assert(in_own_cluster(c_ptr->addr) &&
-			       !is_slave(rem_node));
-		}
+		assert(!in_own_cluster(c_ptr->addr));
 		n_ptr = c_ptr->nodes[tipc_node(rem_node)];
 		if (!n_ptr)
 			n_ptr = tipc_node_create(rem_node);
@@ -412,13 +314,7 @@ void tipc_cltr_recv_routing_table(struct sk_buff *buf)
 			tipc_node_add_router(n_ptr, router);
 		break;
 	case ROUTE_REMOVAL:
-		if (!is_slave(tipc_own_addr)) {
-			assert(!in_own_cluster(c_ptr->addr) ||
-			       is_slave(rem_node));
-		} else {
-			assert(in_own_cluster(c_ptr->addr) &&
-			       !is_slave(rem_node));
-		}
+		assert(!in_own_cluster(c_ptr->addr));
 		n_ptr = c_ptr->nodes[tipc_node(rem_node)];
 		if (n_ptr)
 			tipc_node_remove_router(n_ptr, router);
@@ -431,22 +327,12 @@ void tipc_cltr_recv_routing_table(struct sk_buff *buf)
 
 void tipc_cltr_remove_as_router(struct cluster *c_ptr, u32 router)
 {
-	u32 start_entry;
-	u32 tstop;
 	u32 n_num;
 
-	if (is_slave(router))
-		return;	/* Slave nodes can not be routers */
-
-	if (in_own_cluster(c_ptr->addr)) {
-		start_entry = LOWEST_SLAVE;
-		tstop = c_ptr->highest_slave;
-	} else {
-		start_entry = 1;
-		tstop = c_ptr->highest_node;
-	}
+	if (in_own_cluster(c_ptr->addr))
+		return;
 
-	for (n_num = start_entry; n_num <= tstop; n_num++) {
+	for (n_num = 1; n_num <= c_ptr->highest_node; n_num++) {
 		if (c_ptr->nodes[n_num]) {
 			tipc_node_remove_router(c_ptr->nodes[n_num], router);
 		}
@@ -466,13 +352,11 @@ static void tipc_cltr_multicast(struct cluster *c_ptr, struct sk_buff *buf,
 	u32 tstop;
 
 	assert(lower <= upper);
-	assert(((lower >= 1) && (lower <= tipc_max_nodes)) ||
-	       ((lower >= LOWEST_SLAVE) && (lower <= tipc_highest_allowed_slave)));
-	assert(((upper >= 1) && (upper <= tipc_max_nodes)) ||
-	       ((upper >= LOWEST_SLAVE) && (upper <= tipc_highest_allowed_slave)));
+	assert((lower >= 1) && (lower <= tipc_max_nodes));
+	assert((upper >= 1) && (upper <= tipc_max_nodes));
 	assert(in_own_cluster(c_ptr->addr));
 
-	tstop = is_slave(upper) ? c_ptr->highest_slave : c_ptr->highest_node;
+	tstop = c_ptr->highest_node;
 	if (tstop > upper)
 		tstop = upper;
 	for (n_num = lower; n_num <= tstop; n_num++) {
@@ -498,32 +382,23 @@ void tipc_cltr_broadcast(struct sk_buff *buf)
 	struct cluster *c_ptr;
 	struct tipc_node *n_ptr;
 	u32 n_num;
-	u32 tstart;
-	u32 tstop;
-	u32 node_type;
 
 	if (tipc_mode == TIPC_NET_MODE) {
 		c_ptr = tipc_cltr_find(tipc_own_addr);
 		assert(in_own_cluster(c_ptr->addr));	/* For now */
 
-		/* Send to standard nodes, then repeat loop sending to slaves */
-		tstart = 1;
-		tstop = c_ptr->highest_node;
-		for (node_type = 1; node_type <= 2; node_type++) {
-			for (n_num = tstart; n_num <= tstop; n_num++) {
-				n_ptr = c_ptr->nodes[n_num];
-				if (n_ptr && tipc_node_has_active_links(n_ptr)) {
-					buf_copy = skb_copy(buf, GFP_ATOMIC);
-					if (buf_copy == NULL)
-						goto exit;
-					msg_set_destnode(buf_msg(buf_copy),
-							 n_ptr->addr);
-					tipc_link_send(buf_copy, n_ptr->addr,
-						       n_ptr->addr);
-				}
+		/* Send to nodes */
+		for (n_num = 1; n_num <= c_ptr->highest_node; n_num++) {
+			n_ptr = c_ptr->nodes[n_num];
+			if (n_ptr && tipc_node_has_active_links(n_ptr)) {
+				buf_copy = skb_copy(buf, GFP_ATOMIC);
+				if (buf_copy == NULL)
+					goto exit;
+				msg_set_destnode(buf_msg(buf_copy),
+						 n_ptr->addr);
+				tipc_link_send(buf_copy, n_ptr->addr,
+					       n_ptr->addr);
 			}
-			tstart = LOWEST_SLAVE;
-			tstop = c_ptr->highest_slave;
 		}
 	}
 exit:
@@ -532,7 +407,6 @@ exit:
 
 int tipc_cltr_init(void)
 {
-	tipc_highest_allowed_slave = LOWEST_SLAVE + tipc_max_slaves;
 	return tipc_cltr_create(tipc_own_addr) ? 0 : -ENOMEM;
 }
 
diff --git a/net/tipc/cluster.h b/net/tipc/cluster.h
index 21493f7beb95..aa1fd6ab4d11 100644
--- a/net/tipc/cluster.h
+++ b/net/tipc/cluster.h
@@ -40,26 +40,21 @@
 #include "addr.h"
 #include "net.h"
 
-#define LOWEST_SLAVE  2048u
-
 /**
  * struct cluster - TIPC cluster structure
  * @addr: network address of cluster
  * @nodes: array of pointers to all nodes within cluster
  * @highest_node: id of highest numbered node within cluster
- * @highest_slave: (used for secondary node support)
  */
 
 struct cluster {
 	u32 addr;
 	struct tipc_node **nodes;
 	u32 highest_node;
-	u32 highest_slave;
 };
 
 
 extern struct tipc_node **tipc_local_nodes;
-extern u32 tipc_highest_allowed_slave;
 extern struct tipc_node_map tipc_cltr_bcast_nodes;
 
 void tipc_cltr_remove_as_router(struct cluster *c_ptr, u32 router);
@@ -70,12 +65,10 @@ void tipc_cltr_recv_routing_table(struct sk_buff *buf);
 struct cluster *tipc_cltr_create(u32 addr);
 void tipc_cltr_delete(struct cluster *c_ptr);
 void tipc_cltr_attach_node(struct cluster *c_ptr, struct tipc_node *n_ptr);
-void tipc_cltr_send_slave_routes(struct cluster *c_ptr, u32 dest);
 void tipc_cltr_broadcast(struct sk_buff *buf);
 int tipc_cltr_init(void);
 
 void tipc_cltr_bcast_new_route(struct cluster *c_ptr, u32 dest, u32 lo, u32 hi);
-void tipc_cltr_send_local_routes(struct cluster *c_ptr, u32 dest);
 void tipc_cltr_bcast_lost_route(struct cluster *c_ptr, u32 dest, u32 lo, u32 hi);
 
 static inline struct cluster *tipc_cltr_find(u32 addr)
diff --git a/net/tipc/config.c b/net/tipc/config.c
index 8de97ddb0427..05dc102300ae 100644
--- a/net/tipc/config.c
+++ b/net/tipc/config.c
@@ -301,19 +301,6 @@ static struct sk_buff *cfg_set_max_nodes(void)
 	return tipc_cfg_reply_none();
 }
 
-static struct sk_buff *cfg_set_max_slaves(void)
-{
-	u32 value;
-
-	if (!TLV_CHECK(req_tlv_area, req_tlv_space, TIPC_TLV_UNSIGNED))
-		return tipc_cfg_reply_error_string(TIPC_CFG_TLV_ERROR);
-	value = ntohl(*(__be32 *)TLV_DATA(req_tlv_area));
-	if (value != 0)
-		return tipc_cfg_reply_error_string(TIPC_CFG_NOT_SUPPORTED
-						   " (max secondary nodes fixed at 0)");
-	return tipc_cfg_reply_none();
-}
-
 static struct sk_buff *cfg_set_netid(void)
 {
 	u32 value;
@@ -439,9 +426,6 @@ struct sk_buff *tipc_cfg_do_cmd(u32 orig_node, u16 cmd, const void *request_area
 	case TIPC_CMD_SET_MAX_NODES:
 		rep_tlv_buf = cfg_set_max_nodes();
 		break;
-	case TIPC_CMD_SET_MAX_SLAVES:
-		rep_tlv_buf = cfg_set_max_slaves();
-		break;
 	case TIPC_CMD_SET_NETID:
 		rep_tlv_buf = cfg_set_netid();
 		break;
@@ -463,9 +447,6 @@ struct sk_buff *tipc_cfg_do_cmd(u32 orig_node, u16 cmd, const void *request_area
 	case TIPC_CMD_GET_MAX_NODES:
 		rep_tlv_buf = tipc_cfg_reply_unsigned(tipc_max_nodes);
 		break;
-	case TIPC_CMD_GET_MAX_SLAVES:
-		rep_tlv_buf = tipc_cfg_reply_unsigned(tipc_max_slaves);
-		break;
 	case TIPC_CMD_GET_NETID:
 		rep_tlv_buf = tipc_cfg_reply_unsigned(tipc_net_id);
 		break;
@@ -475,6 +456,8 @@ struct sk_buff *tipc_cfg_do_cmd(u32 orig_node, u16 cmd, const void *request_area
 		break;
 	case TIPC_CMD_SET_MAX_ZONES:
 	case TIPC_CMD_GET_MAX_ZONES:
+	case TIPC_CMD_SET_MAX_SLAVES:
+	case TIPC_CMD_GET_MAX_SLAVES:
 		rep_tlv_buf = tipc_cfg_reply_error_string(TIPC_CFG_NOT_SUPPORTED
 							  " (obsolete command)");
 		break;
diff --git a/net/tipc/core.c b/net/tipc/core.c
index 13946331bd4d..8b7af893971e 100644
--- a/net/tipc/core.c
+++ b/net/tipc/core.c
@@ -56,10 +56,6 @@
 #define CONFIG_TIPC_NODES 255
 #endif
 
-#ifndef CONFIG_TIPC_SLAVE_NODES
-#define CONFIG_TIPC_SLAVE_NODES 0
-#endif
-
 #ifndef CONFIG_TIPC_PORTS
 #define CONFIG_TIPC_PORTS 8191
 #endif
@@ -82,7 +78,6 @@ const char tipc_alphabet[] =
 u32 tipc_own_addr;
 int tipc_max_clusters;
 int tipc_max_nodes;
-int tipc_max_slaves;
 int tipc_max_ports;
 int tipc_max_subscriptions;
 int tipc_max_publications;
@@ -206,7 +201,6 @@ static int __init tipc_init(void)
 	tipc_max_ports = CONFIG_TIPC_PORTS;
 	tipc_max_clusters = CONFIG_TIPC_CLUSTERS;
 	tipc_max_nodes = CONFIG_TIPC_NODES;
-	tipc_max_slaves = CONFIG_TIPC_SLAVE_NODES;
 	tipc_net_id = 4711;
 
 	if ((res = tipc_core_start()))
diff --git a/net/tipc/core.h b/net/tipc/core.h
index 9403a226a570..8313a1689565 100644
--- a/net/tipc/core.h
+++ b/net/tipc/core.h
@@ -186,7 +186,6 @@ void tipc_dump_dbg(struct print_buf *, const char *fmt, ...);
 extern u32 tipc_own_addr;
 extern int tipc_max_clusters;
 extern int tipc_max_nodes;
-extern int tipc_max_slaves;
 extern int tipc_max_ports;
 extern int tipc_max_subscriptions;
 extern int tipc_max_publications;
diff --git a/net/tipc/discover.c b/net/tipc/discover.c
index f2ce36baf42e..80799f6ba892 100644
--- a/net/tipc/discover.c
+++ b/net/tipc/discover.c
@@ -149,10 +149,6 @@ void tipc_disc_recv_msg(struct sk_buff *buf, struct bearer *b_ptr)
 	}
 	if (!tipc_in_scope(dest, tipc_own_addr))
 		return;
-	if (is_slave(tipc_own_addr) && is_slave(orig))
-		return;
-	if (is_slave(orig) && !in_own_cluster(orig))
-		return;
 	if (in_own_cluster(orig)) {
 		/* Always accept link here */
 		struct sk_buff *rbuf;
diff --git a/net/tipc/msg.h b/net/tipc/msg.h
index aee53864d7a0..c1b6217838e6 100644
--- a/net/tipc/msg.h
+++ b/net/tipc/msg.h
@@ -850,7 +850,7 @@ static inline void msg_set_dataoctet(struct tipc_msg *m, u32 pos)
  * Routing table message types
  */
 #define EXT_ROUTING_TABLE    0
-#define LOCAL_ROUTING_TABLE  1
+#define LOCAL_ROUTING_TABLE  1		/* obsoleted */
 #define SLAVE_ROUTING_TABLE  2
 #define ROUTE_ADDITION       3
 #define ROUTE_REMOVAL        4
diff --git a/net/tipc/node.c b/net/tipc/node.c
index c20bd851a44a..8ffbdb33b2cb 100644
--- a/net/tipc/node.c
+++ b/net/tipc/node.c
@@ -333,8 +333,6 @@ static void node_established_contact(struct tipc_node *n_ptr)
 	/* Syncronize broadcast acks */
 	n_ptr->bclink.acked = tipc_bclink_get_last_sent();
 
-	if (is_slave(tipc_own_addr))
-		return;
 	if (!in_own_cluster(n_ptr->addr)) {
 		/* Usage case 1 (see above) */
 		c_ptr = tipc_cltr_find(tipc_own_addr);
@@ -347,13 +345,6 @@ static void node_established_contact(struct tipc_node *n_ptr)
 	}
 
 	c_ptr = n_ptr->owner;
-	if (is_slave(n_ptr->addr)) {
-		/* Usage case 2 (see above) */
-		tipc_cltr_bcast_new_route(c_ptr, n_ptr->addr, 1, tipc_max_nodes);
-		tipc_cltr_send_local_routes(c_ptr, n_ptr->addr);
-		return;
-	}
-
 	if (n_ptr->bclink.supported) {
 		tipc_nmap_add(&tipc_cltr_bcast_nodes, n_ptr->addr);
 		if (n_ptr->addr < tipc_own_addr)
@@ -362,9 +353,6 @@ static void node_established_contact(struct tipc_node *n_ptr)
 
 	/* Case 3 (see above) */
 	tipc_net_send_external_routes(n_ptr->addr);
-	tipc_cltr_send_slave_routes(c_ptr, n_ptr->addr);
-	tipc_cltr_bcast_new_route(c_ptr, n_ptr->addr, LOWEST_SLAVE,
-				  tipc_highest_allowed_slave);
 }
 
 static void node_cleanup_finished(unsigned long node_addr)
@@ -404,33 +392,20 @@ static void node_lost_contact(struct tipc_node *n_ptr)
 	}
 
 	/* Update routing tables */
-	if (is_slave(tipc_own_addr)) {
-		tipc_net_remove_as_router(n_ptr->addr);
+	if (!in_own_cluster(n_ptr->addr)) {
+		/* Case 4 (see above) */
+		c_ptr = tipc_cltr_find(tipc_own_addr);
+		tipc_cltr_bcast_lost_route(c_ptr, n_ptr->addr, 1,
+					   tipc_max_nodes);
 	} else {
-		if (!in_own_cluster(n_ptr->addr)) {
-			/* Case 4 (see above) */
-			c_ptr = tipc_cltr_find(tipc_own_addr);
-			tipc_cltr_bcast_lost_route(c_ptr, n_ptr->addr, 1,
-						   tipc_max_nodes);
-		} else {
-			/* Case 5 (see above) */
-			c_ptr = tipc_cltr_find(n_ptr->addr);
-			if (is_slave(n_ptr->addr)) {
-				tipc_cltr_bcast_lost_route(c_ptr, n_ptr->addr, 1,
-							   tipc_max_nodes);
-			} else {
-				if (n_ptr->bclink.supported) {
-					tipc_nmap_remove(&tipc_cltr_bcast_nodes,
-							 n_ptr->addr);
-					if (n_ptr->addr < tipc_own_addr)
-						tipc_own_tag--;
-				}
-				tipc_net_remove_as_router(n_ptr->addr);
-				tipc_cltr_bcast_lost_route(c_ptr, n_ptr->addr,
-							   LOWEST_SLAVE,
-							   tipc_highest_allowed_slave);
-			}
+		/* Case 5 (see above) */
+		c_ptr = tipc_cltr_find(n_ptr->addr);
+		if (n_ptr->bclink.supported) {
+			tipc_nmap_remove(&tipc_cltr_bcast_nodes, n_ptr->addr);
+			if (n_ptr->addr < tipc_own_addr)
+				tipc_own_tag--;
 		}
+		tipc_net_remove_as_router(n_ptr->addr);
 	}
 	if (tipc_node_has_active_routes(n_ptr))
 		return;
@@ -482,7 +457,7 @@ struct tipc_node *tipc_node_select_next_hop(u32 addr, u32 selector)
 		return n_ptr;
 
 	/* Cluster local system nodes *must* have direct links */
-	if (!is_slave(addr) && in_own_cluster(addr))
+	if (in_own_cluster(addr))
 		return NULL;
 
 	/* Look for cluster local router with direct link to node */
@@ -490,11 +465,6 @@ struct tipc_node *tipc_node_select_next_hop(u32 addr, u32 selector)
 	if (router_addr)
 		return tipc_node_select(router_addr, selector);
 
-	/* Slave nodes can only be accessed within own cluster via a
-	   known router with direct link -- if no router was found,give up */
-	if (is_slave(addr))
-		return NULL;
-
 	/* Inter zone/cluster -- find any direct link to remote cluster */
 	addr = tipc_addr(tipc_zone(addr), tipc_cluster(addr), 0);
 	n_ptr = tipc_net_select_remote_node(addr, selector);
@@ -603,8 +573,7 @@ struct sk_buff *tipc_node_get_nodes(const void *req_tlv_area, int req_tlv_space)
 		return tipc_cfg_reply_none();
 	}
 
-	/* For now, get space for all other nodes
-	   (will need to modify this when slave nodes are supported */
+	/* For now, get space for all other nodes */
 
 	payload_size = TLV_SPACE(sizeof(node_info)) * (tipc_max_nodes - 1);
 	if (payload_size > 32768u) {
diff --git a/net/tipc/port.c b/net/tipc/port.c
index 7873283f4965..c033cb87b964 100644
--- a/net/tipc/port.c
+++ b/net/tipc/port.c
@@ -1110,10 +1110,7 @@ int tipc_connect2port(u32 ref, struct tipc_portid const *peer)
 	msg_set_origport(msg, p_ptr->publ.ref);
 	msg_set_transp_seqno(msg, 42);
 	msg_set_type(msg, TIPC_CONN_MSG);
-	if (!may_route(peer->node))
-		msg_set_hdr_sz(msg, SHORT_H_SIZE);
-	else
-		msg_set_hdr_sz(msg, LONG_H_SIZE);
+	msg_set_hdr_sz(msg, SHORT_H_SIZE);
 
 	p_ptr->probing_interval = PROBING_INTERVAL;
 	p_ptr->probing_state = CONFIRMED;
-- 
cgit v1.2.3-71-gd317


From 51a8e4dee7653698ba4c6e7de71053665f075273 Mon Sep 17 00:00:00 2001
From: Allan Stephens <Allan.Stephens@windriver.com>
Date: Fri, 31 Dec 2010 18:59:18 +0000
Subject: tipc: Remove prototype code for supporting inter-cluster routing

Eliminates routines and data structures that were intended to allow
TIPC to route messages to other clusters. Currently, TIPC supports only
networks consisting of a single cluster within a single zone, so this
code is unnecessary.

Signed-off-by: Allan Stephens <Allan.Stephens@windriver.com>
Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/tipc_config.h |   5 -
 net/tipc/addr.h             |  15 ---
 net/tipc/cluster.c          | 279 +-------------------------------------------
 net/tipc/cluster.h          |   8 --
 net/tipc/link.c             |  13 +--
 net/tipc/msg.h              |   7 +-
 net/tipc/net.c              |  45 -------
 net/tipc/net.h              |   4 -
 net/tipc/node.c             | 166 ++------------------------
 net/tipc/node.h             |  21 ----
 10 files changed, 15 insertions(+), 548 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/tipc_config.h b/include/linux/tipc_config.h
index dcc2b8796d0a..1f38df1202ba 100644
--- a/include/linux/tipc_config.h
+++ b/include/linux/tipc_config.h
@@ -254,11 +254,6 @@ struct tipc_link_create {
 	struct tipc_media_addr peer_addr;
 	char bearer_name[TIPC_MAX_BEARER_NAME];
 };
-
-struct tipc_route_info {
-	__u32 dest;
-	__u32 router;
-};
 #endif
 
 /*
diff --git a/net/tipc/addr.h b/net/tipc/addr.h
index a16c6c87208e..2490fadd0caf 100644
--- a/net/tipc/addr.h
+++ b/net/tipc/addr.h
@@ -37,21 +37,6 @@
 #ifndef _TIPC_ADDR_H
 #define _TIPC_ADDR_H
 
-static inline u32 own_node(void)
-{
-	return tipc_node(tipc_own_addr);
-}
-
-static inline u32 own_cluster(void)
-{
-	return tipc_cluster(tipc_own_addr);
-}
-
-static inline u32 own_zone(void)
-{
-	return tipc_zone(tipc_own_addr);
-}
-
 static inline int in_own_cluster(u32 addr)
 {
 	return !((addr ^ tipc_own_addr) >> 12);
diff --git a/net/tipc/cluster.c b/net/tipc/cluster.c
index 6bc9f07be945..ba6f5bfa0cdb 100644
--- a/net/tipc/cluster.c
+++ b/net/tipc/cluster.c
@@ -38,9 +38,6 @@
 #include "cluster.h"
 #include "link.h"
 
-static void tipc_cltr_multicast(struct cluster *c_ptr, struct sk_buff *buf,
-				u32 lower, u32 upper);
-
 struct tipc_node **tipc_local_nodes = NULL;
 struct tipc_node_map tipc_cltr_bcast_nodes = {0,{0,}};
 
@@ -65,8 +62,7 @@ struct cluster *tipc_cltr_create(u32 addr)
 		return NULL;
 	}
 
-	if (in_own_cluster(addr))
-		tipc_local_nodes = c_ptr->nodes;
+	tipc_local_nodes = c_ptr->nodes;
 	c_ptr->highest_node = 0;
 
 	tipc_net.clusters[1] = c_ptr;
@@ -100,278 +96,6 @@ void tipc_cltr_attach_node(struct cluster *c_ptr, struct tipc_node *n_ptr)
 		c_ptr->highest_node = n_num;
 }
 
-/**
- * tipc_cltr_select_router - select router to a cluster
- *
- * Uses deterministic and fair algorithm.
- */
-
-u32 tipc_cltr_select_router(struct cluster *c_ptr, u32 ref)
-{
-	u32 n_num;
-	u32 ulim = c_ptr->highest_node;
-	u32 mask;
-	u32 tstart;
-
-	assert(!in_own_cluster(c_ptr->addr));
-	if (!ulim)
-		return 0;
-
-	/* Start entry must be random */
-	mask = tipc_max_nodes;
-	while (mask > ulim)
-		mask >>= 1;
-	tstart = ref & mask;
-	n_num = tstart;
-
-	/* Lookup upwards with wrap-around */
-	do {
-		if (tipc_node_is_up(c_ptr->nodes[n_num]))
-			break;
-	} while (++n_num <= ulim);
-	if (n_num > ulim) {
-		n_num = 1;
-		do {
-			if (tipc_node_is_up(c_ptr->nodes[n_num]))
-				break;
-		} while (++n_num < tstart);
-		if (n_num == tstart)
-			return 0;
-	}
-	assert(n_num <= ulim);
-	return tipc_node_select_router(c_ptr->nodes[n_num], ref);
-}
-
-/**
- * tipc_cltr_select_node - select destination node within a remote cluster
- *
- * Uses deterministic and fair algorithm.
- */
-
-struct tipc_node *tipc_cltr_select_node(struct cluster *c_ptr, u32 selector)
-{
-	u32 n_num;
-	u32 mask = tipc_max_nodes;
-	u32 start_entry;
-
-	assert(!in_own_cluster(c_ptr->addr));
-	if (!c_ptr->highest_node)
-		return NULL;
-
-	/* Start entry must be random */
-	while (mask > c_ptr->highest_node) {
-		mask >>= 1;
-	}
-	start_entry = (selector & mask) ? selector & mask : 1u;
-	assert(start_entry <= c_ptr->highest_node);
-
-	/* Lookup upwards with wrap-around */
-	for (n_num = start_entry; n_num <= c_ptr->highest_node; n_num++) {
-		if (tipc_node_has_active_links(c_ptr->nodes[n_num]))
-			return c_ptr->nodes[n_num];
-	}
-	for (n_num = 1; n_num < start_entry; n_num++) {
-		if (tipc_node_has_active_links(c_ptr->nodes[n_num]))
-			return c_ptr->nodes[n_num];
-	}
-	return NULL;
-}
-
-/*
- *    Routing table management: See description in node.c
- */
-
-static struct sk_buff *tipc_cltr_prepare_routing_msg(u32 data_size, u32 dest)
-{
-	u32 size = INT_H_SIZE + data_size;
-	struct sk_buff *buf = tipc_buf_acquire(size);
-	struct tipc_msg *msg;
-
-	if (buf) {
-		msg = buf_msg(buf);
-		memset((char *)msg, 0, size);
-		tipc_msg_init(msg, ROUTE_DISTRIBUTOR, 0, INT_H_SIZE, dest);
-	}
-	return buf;
-}
-
-void tipc_cltr_bcast_new_route(struct cluster *c_ptr, u32 dest,
-			     u32 lower, u32 upper)
-{
-	struct sk_buff *buf = tipc_cltr_prepare_routing_msg(0, c_ptr->addr);
-	struct tipc_msg *msg;
-
-	if (buf) {
-		msg = buf_msg(buf);
-		msg_set_remote_node(msg, dest);
-		msg_set_type(msg, ROUTE_ADDITION);
-		tipc_cltr_multicast(c_ptr, buf, lower, upper);
-	} else {
-		warn("Memory squeeze: broadcast of new route failed\n");
-	}
-}
-
-void tipc_cltr_bcast_lost_route(struct cluster *c_ptr, u32 dest,
-				u32 lower, u32 upper)
-{
-	struct sk_buff *buf = tipc_cltr_prepare_routing_msg(0, c_ptr->addr);
-	struct tipc_msg *msg;
-
-	if (buf) {
-		msg = buf_msg(buf);
-		msg_set_remote_node(msg, dest);
-		msg_set_type(msg, ROUTE_REMOVAL);
-		tipc_cltr_multicast(c_ptr, buf, lower, upper);
-	} else {
-		warn("Memory squeeze: broadcast of lost route failed\n");
-	}
-}
-
-void tipc_cltr_send_ext_routes(struct cluster *c_ptr, u32 dest)
-{
-	struct sk_buff *buf;
-	struct tipc_msg *msg;
-	u32 highest = c_ptr->highest_node;
-	u32 n_num;
-	int send = 0;
-
-	if (in_own_cluster(c_ptr->addr))
-		return;
-	assert(in_own_cluster(dest));
-	highest = c_ptr->highest_node;
-	buf = tipc_cltr_prepare_routing_msg(highest + 1, c_ptr->addr);
-	if (buf) {
-		msg = buf_msg(buf);
-		msg_set_remote_node(msg, c_ptr->addr);
-		msg_set_type(msg, EXT_ROUTING_TABLE);
-		for (n_num = 1; n_num <= highest; n_num++) {
-			if (c_ptr->nodes[n_num] &&
-			    tipc_node_has_active_links(c_ptr->nodes[n_num])) {
-				send = 1;
-				msg_set_dataoctet(msg, n_num);
-			}
-		}
-		if (send)
-			tipc_link_send(buf, dest, dest);
-		else
-			buf_discard(buf);
-	} else {
-		warn("Memory squeeze: broadcast of external route failed\n");
-	}
-}
-
-void tipc_cltr_recv_routing_table(struct sk_buff *buf)
-{
-	struct tipc_msg *msg = buf_msg(buf);
-	struct cluster *c_ptr;
-	struct tipc_node *n_ptr;
-	unchar *node_table;
-	u32 table_size;
-	u32 router;
-	u32 rem_node = msg_remote_node(msg);
-	u32 z_num;
-	u32 c_num;
-	u32 n_num;
-
-	c_ptr = tipc_cltr_find(rem_node);
-	if (!c_ptr) {
-		c_ptr = tipc_cltr_create(rem_node);
-		if (!c_ptr) {
-			buf_discard(buf);
-			return;
-		}
-	}
-
-	node_table = buf->data + msg_hdr_sz(msg);
-	table_size = msg_size(msg) - msg_hdr_sz(msg);
-	router = msg_prevnode(msg);
-	z_num = tipc_zone(rem_node);
-	c_num = tipc_cluster(rem_node);
-
-	switch (msg_type(msg)) {
-	case EXT_ROUTING_TABLE:
-		for (n_num = 1; n_num < table_size; n_num++) {
-			if (node_table[n_num]) {
-				u32 addr = tipc_addr(z_num, c_num, n_num);
-				n_ptr = c_ptr->nodes[n_num];
-				if (!n_ptr) {
-					n_ptr = tipc_node_create(addr);
-				}
-				if (n_ptr)
-					tipc_node_add_router(n_ptr, router);
-			}
-		}
-		break;
-	case SLAVE_ROUTING_TABLE:
-		assert(in_own_cluster(c_ptr->addr));
-		break;
-	case ROUTE_ADDITION:
-		assert(!in_own_cluster(c_ptr->addr));
-		n_ptr = c_ptr->nodes[tipc_node(rem_node)];
-		if (!n_ptr)
-			n_ptr = tipc_node_create(rem_node);
-		if (n_ptr)
-			tipc_node_add_router(n_ptr, router);
-		break;
-	case ROUTE_REMOVAL:
-		assert(!in_own_cluster(c_ptr->addr));
-		n_ptr = c_ptr->nodes[tipc_node(rem_node)];
-		if (n_ptr)
-			tipc_node_remove_router(n_ptr, router);
-		break;
-	default:
-		assert(!"Illegal routing manager message received\n");
-	}
-	buf_discard(buf);
-}
-
-void tipc_cltr_remove_as_router(struct cluster *c_ptr, u32 router)
-{
-	u32 n_num;
-
-	if (in_own_cluster(c_ptr->addr))
-		return;
-
-	for (n_num = 1; n_num <= c_ptr->highest_node; n_num++) {
-		if (c_ptr->nodes[n_num]) {
-			tipc_node_remove_router(c_ptr->nodes[n_num], router);
-		}
-	}
-}
-
-/**
- * tipc_cltr_multicast - multicast message to local nodes
- */
-
-static void tipc_cltr_multicast(struct cluster *c_ptr, struct sk_buff *buf,
-			 u32 lower, u32 upper)
-{
-	struct sk_buff *buf_copy;
-	struct tipc_node *n_ptr;
-	u32 n_num;
-	u32 tstop;
-
-	assert(lower <= upper);
-	assert((lower >= 1) && (lower <= tipc_max_nodes));
-	assert((upper >= 1) && (upper <= tipc_max_nodes));
-	assert(in_own_cluster(c_ptr->addr));
-
-	tstop = c_ptr->highest_node;
-	if (tstop > upper)
-		tstop = upper;
-	for (n_num = lower; n_num <= tstop; n_num++) {
-		n_ptr = c_ptr->nodes[n_num];
-		if (n_ptr && tipc_node_has_active_links(n_ptr)) {
-			buf_copy = skb_copy(buf, GFP_ATOMIC);
-			if (buf_copy == NULL)
-				break;
-			msg_set_destnode(buf_msg(buf_copy), n_ptr->addr);
-			tipc_link_send(buf_copy, n_ptr->addr, n_ptr->addr);
-		}
-	}
-	buf_discard(buf);
-}
-
 /**
  * tipc_cltr_broadcast - broadcast message to all nodes within cluster
  */
@@ -385,7 +109,6 @@ void tipc_cltr_broadcast(struct sk_buff *buf)
 
 	if (tipc_mode == TIPC_NET_MODE) {
 		c_ptr = tipc_cltr_find(tipc_own_addr);
-		assert(in_own_cluster(c_ptr->addr));	/* For now */
 
 		/* Send to nodes */
 		for (n_num = 1; n_num <= c_ptr->highest_node; n_num++) {
diff --git a/net/tipc/cluster.h b/net/tipc/cluster.h
index aa1fd6ab4d11..e4b6e4e27371 100644
--- a/net/tipc/cluster.h
+++ b/net/tipc/cluster.h
@@ -57,20 +57,12 @@ struct cluster {
 extern struct tipc_node **tipc_local_nodes;
 extern struct tipc_node_map tipc_cltr_bcast_nodes;
 
-void tipc_cltr_remove_as_router(struct cluster *c_ptr, u32 router);
-void tipc_cltr_send_ext_routes(struct cluster *c_ptr, u32 dest);
-struct tipc_node *tipc_cltr_select_node(struct cluster *c_ptr, u32 selector);
-u32 tipc_cltr_select_router(struct cluster *c_ptr, u32 ref);
-void tipc_cltr_recv_routing_table(struct sk_buff *buf);
 struct cluster *tipc_cltr_create(u32 addr);
 void tipc_cltr_delete(struct cluster *c_ptr);
 void tipc_cltr_attach_node(struct cluster *c_ptr, struct tipc_node *n_ptr);
 void tipc_cltr_broadcast(struct sk_buff *buf);
 int tipc_cltr_init(void);
 
-void tipc_cltr_bcast_new_route(struct cluster *c_ptr, u32 dest, u32 lo, u32 hi);
-void tipc_cltr_bcast_lost_route(struct cluster *c_ptr, u32 dest, u32 lo, u32 hi);
-
 static inline struct cluster *tipc_cltr_find(u32 addr)
 {
 	if (!in_own_cluster(addr))
diff --git a/net/tipc/link.c b/net/tipc/link.c
index cf414cf05e72..671ffd3c0e53 100644
--- a/net/tipc/link.c
+++ b/net/tipc/link.c
@@ -1061,7 +1061,7 @@ int tipc_link_send(struct sk_buff *buf, u32 dest, u32 selector)
 	int res = -ELINKCONG;
 
 	read_lock_bh(&tipc_net_lock);
-	n_ptr = tipc_node_select(dest, selector);
+	n_ptr = tipc_node_find(dest);
 	if (n_ptr) {
 		tipc_node_lock(n_ptr);
 		l_ptr = n_ptr->active_links[selector & 1];
@@ -1137,7 +1137,7 @@ int tipc_send_buf_fast(struct sk_buff *buf, u32 destnode)
 		return tipc_port_recv_msg(buf);
 
 	read_lock_bh(&tipc_net_lock);
-	n_ptr = tipc_node_select(destnode, selector);
+	n_ptr = tipc_node_find(destnode);
 	if (likely(n_ptr)) {
 		tipc_node_lock(n_ptr);
 		l_ptr = n_ptr->active_links[selector];
@@ -1186,7 +1186,7 @@ again:
 			!sender->user_port, &buf);
 
 	read_lock_bh(&tipc_net_lock);
-	node = tipc_node_select(destaddr, selector);
+	node = tipc_node_find(destaddr);
 	if (likely(node)) {
 		tipc_node_lock(node);
 		l_ptr = node->active_links[selector];
@@ -1376,7 +1376,7 @@ error:
 	 * Now we have a buffer chain. Select a link and check
 	 * that packet size is still OK
 	 */
-	node = tipc_node_select(destaddr, sender->publ.ref & 1);
+	node = tipc_node_find(destaddr);
 	if (likely(node)) {
 		tipc_node_lock(node);
 		l_ptr = node->active_links[sender->publ.ref & 1];
@@ -1893,7 +1893,7 @@ deliver:
 						continue;
 					case ROUTE_DISTRIBUTOR:
 						tipc_node_unlock(n_ptr);
-						tipc_cltr_recv_routing_table(buf);
+						buf_discard(buf);
 						continue;
 					case NAME_DISTRIBUTOR:
 						tipc_node_unlock(n_ptr);
@@ -2852,7 +2852,6 @@ void tipc_link_set_queue_limits(struct link *l_ptr, u32 window)
 	l_ptr->queue_limit[TIPC_HIGH_IMPORTANCE + 4] = 900;
 	l_ptr->queue_limit[TIPC_CRITICAL_IMPORTANCE + 4] = 1200;
 	l_ptr->queue_limit[CONN_MANAGER] = 1200;
-	l_ptr->queue_limit[ROUTE_DISTRIBUTOR] = 1200;
 	l_ptr->queue_limit[CHANGEOVER_PROTOCOL] = 2500;
 	l_ptr->queue_limit[NAME_DISTRIBUTOR] = 3000;
 	/* FRAGMENT and LAST_FRAGMENT packets */
@@ -3154,7 +3153,7 @@ u32 tipc_link_get_max_pkt(u32 dest, u32 selector)
 		return MAX_MSG_SIZE;
 
 	read_lock_bh(&tipc_net_lock);
-	n_ptr = tipc_node_select(dest, selector);
+	n_ptr = tipc_node_find(dest);
 	if (n_ptr) {
 		tipc_node_lock(n_ptr);
 		l_ptr = n_ptr->active_links[selector & 1];
diff --git a/net/tipc/msg.h b/net/tipc/msg.h
index c1b6217838e6..68b65ef3e74b 100644
--- a/net/tipc/msg.h
+++ b/net/tipc/msg.h
@@ -540,7 +540,7 @@ static inline struct tipc_msg *msg_get_wrapped(struct tipc_msg *m)
 #define  MSG_BUNDLER          6
 #define  LINK_PROTOCOL        7
 #define  CONN_MANAGER         8
-#define  ROUTE_DISTRIBUTOR    9
+#define  ROUTE_DISTRIBUTOR    9		/* obsoleted */
 #define  CHANGEOVER_PROTOCOL  10
 #define  NAME_DISTRIBUTOR     11
 #define  MSG_FRAGMENTER       12
@@ -819,11 +819,6 @@ static inline void msg_set_remote_node(struct tipc_msg *m, u32 a)
 	msg_set_word(m, msg_hdr_sz(m)/4, a);
 }
 
-static inline void msg_set_dataoctet(struct tipc_msg *m, u32 pos)
-{
-	msg_data(m)[pos + 4] = 1;
-}
-
 /*
  * Segmentation message types
  */
diff --git a/net/tipc/net.c b/net/tipc/net.c
index a25f8bb1e1d9..3967f1f6d97f 100644
--- a/net/tipc/net.c
+++ b/net/tipc/net.c
@@ -112,48 +112,6 @@
 DEFINE_RWLOCK(tipc_net_lock);
 struct network tipc_net;
 
-struct tipc_node *tipc_net_select_remote_node(u32 addr, u32 ref)
-{
-	struct cluster *c_ptr;
-
-	c_ptr = tipc_net.clusters[1];
-	if (!c_ptr)
-		return NULL;
-	return tipc_cltr_select_node(c_ptr, ref);
-}
-
-u32 tipc_net_select_router(u32 addr, u32 ref)
-{
-	struct cluster *c_ptr;
-
-	c_ptr = tipc_net.clusters[1];
-	if (!c_ptr)
-		return 0;
-	return tipc_cltr_select_router(c_ptr, ref);
-}
-
-void tipc_net_remove_as_router(u32 router)
-{
-	u32 c_num;
-
-	for (c_num = 1; c_num <= tipc_max_clusters; c_num++) {
-		if (!tipc_net.clusters[c_num])
-			continue;
-		tipc_cltr_remove_as_router(tipc_net.clusters[c_num], router);
-	}
-}
-
-void tipc_net_send_external_routes(u32 dest)
-{
-	u32 c_num;
-
-	for (c_num = 1; c_num <= tipc_max_clusters; c_num++) {
-		if (tipc_net.clusters[c_num])
-			tipc_cltr_send_ext_routes(tipc_net.clusters[c_num],
-						  dest);
-	}
-}
-
 static void net_stop(void)
 {
 	u32 c_num;
@@ -225,9 +183,6 @@ void tipc_net_route_msg(struct sk_buff *buf)
 			return;
 		}
 		switch (msg_user(msg)) {
-		case ROUTE_DISTRIBUTOR:
-			tipc_cltr_recv_routing_table(buf);
-			break;
 		case NAME_DISTRIBUTOR:
 			tipc_named_recv(buf);
 			break;
diff --git a/net/tipc/net.h b/net/tipc/net.h
index 786c94007470..6e402d9b33e0 100644
--- a/net/tipc/net.h
+++ b/net/tipc/net.h
@@ -54,11 +54,7 @@ struct network {
 extern struct network tipc_net;
 extern rwlock_t tipc_net_lock;
 
-void tipc_net_remove_as_router(u32 router);
-void tipc_net_send_external_routes(u32 dest);
 void tipc_net_route_msg(struct sk_buff *buf);
-struct tipc_node *tipc_net_select_remote_node(u32 addr, u32 ref);
-u32 tipc_net_select_router(u32 addr, u32 ref);
 
 int tipc_net_start(u32 addr);
 void tipc_net_stop(void);
diff --git a/net/tipc/node.c b/net/tipc/node.c
index 8ffbdb33b2cb..c47cc69eb575 100644
--- a/net/tipc/node.c
+++ b/net/tipc/node.c
@@ -95,11 +95,10 @@ struct tipc_node *tipc_node_create(u32 addr)
 	}
 
 	n_ptr->addr = addr;
-		spin_lock_init(&n_ptr->lock);
+	spin_lock_init(&n_ptr->lock);
 	INIT_LIST_HEAD(&n_ptr->nsub);
 	n_ptr->owner = c_ptr;
 	tipc_cltr_attach_node(c_ptr, n_ptr);
-	n_ptr->last_router = -1;
 
 	/* Insert node into ordered list */
 	for (curr_node = &tipc_nodes; *curr_node;
@@ -229,14 +228,9 @@ int tipc_node_has_redundant_links(struct tipc_node *n_ptr)
 	return n_ptr->working_links > 1;
 }
 
-static int tipc_node_has_active_routes(struct tipc_node *n_ptr)
-{
-	return n_ptr && (n_ptr->last_router >= 0);
-}
-
 int tipc_node_is_up(struct tipc_node *n_ptr)
 {
-	return tipc_node_has_active_links(n_ptr) || tipc_node_has_active_routes(n_ptr);
+	return tipc_node_has_active_links(n_ptr);
 }
 
 struct tipc_node *tipc_node_attach_link(struct link *l_ptr)
@@ -323,36 +317,17 @@ void tipc_node_detach_link(struct tipc_node *n_ptr, struct link *l_ptr)
 
 static void node_established_contact(struct tipc_node *n_ptr)
 {
-	struct cluster *c_ptr;
-
 	dbg("node_established_contact:-> %x\n", n_ptr->addr);
-	if (!tipc_node_has_active_routes(n_ptr) && in_own_cluster(n_ptr->addr)) {
-		tipc_k_signal((Handler)tipc_named_node_up, n_ptr->addr);
-	}
+	tipc_k_signal((Handler)tipc_named_node_up, n_ptr->addr);
 
 	/* Syncronize broadcast acks */
 	n_ptr->bclink.acked = tipc_bclink_get_last_sent();
 
-	if (!in_own_cluster(n_ptr->addr)) {
-		/* Usage case 1 (see above) */
-		c_ptr = tipc_cltr_find(tipc_own_addr);
-		if (!c_ptr)
-			c_ptr = tipc_cltr_create(tipc_own_addr);
-		if (c_ptr)
-			tipc_cltr_bcast_new_route(c_ptr, n_ptr->addr, 1,
-						  tipc_max_nodes);
-		return;
-	}
-
-	c_ptr = n_ptr->owner;
 	if (n_ptr->bclink.supported) {
 		tipc_nmap_add(&tipc_cltr_bcast_nodes, n_ptr->addr);
 		if (n_ptr->addr < tipc_own_addr)
 			tipc_own_tag++;
 	}
-
-	/* Case 3 (see above) */
-	tipc_net_send_external_routes(n_ptr->addr);
 }
 
 static void node_cleanup_finished(unsigned long node_addr)
@@ -371,7 +346,6 @@ static void node_cleanup_finished(unsigned long node_addr)
 
 static void node_lost_contact(struct tipc_node *n_ptr)
 {
-	struct cluster *c_ptr;
 	struct tipc_node_subscr *ns, *tns;
 	char addr_string[16];
 	u32 i;
@@ -392,23 +366,11 @@ static void node_lost_contact(struct tipc_node *n_ptr)
 	}
 
 	/* Update routing tables */
-	if (!in_own_cluster(n_ptr->addr)) {
-		/* Case 4 (see above) */
-		c_ptr = tipc_cltr_find(tipc_own_addr);
-		tipc_cltr_bcast_lost_route(c_ptr, n_ptr->addr, 1,
-					   tipc_max_nodes);
-	} else {
-		/* Case 5 (see above) */
-		c_ptr = tipc_cltr_find(n_ptr->addr);
-		if (n_ptr->bclink.supported) {
-			tipc_nmap_remove(&tipc_cltr_bcast_nodes, n_ptr->addr);
-			if (n_ptr->addr < tipc_own_addr)
-				tipc_own_tag--;
-		}
-		tipc_net_remove_as_router(n_ptr->addr);
+	if (n_ptr->bclink.supported) {
+		tipc_nmap_remove(&tipc_cltr_bcast_nodes, n_ptr->addr);
+		if (n_ptr->addr < tipc_own_addr)
+			tipc_own_tag--;
 	}
-	if (tipc_node_has_active_routes(n_ptr))
-		return;
 
 	info("Lost contact with %s\n",
 	     tipc_addr_string_fill(addr_string, n_ptr->addr));
@@ -437,120 +399,6 @@ static void node_lost_contact(struct tipc_node *n_ptr)
 	tipc_k_signal((Handler)node_cleanup_finished, n_ptr->addr);
 }
 
-/**
- * tipc_node_select_next_hop - find the next-hop node for a message
- *
- * Called by when cluster local lookup has failed.
- */
-
-struct tipc_node *tipc_node_select_next_hop(u32 addr, u32 selector)
-{
-	struct tipc_node *n_ptr;
-	u32 router_addr;
-
-	if (!tipc_addr_domain_valid(addr))
-		return NULL;
-
-	/* Look for direct link to destination processsor */
-	n_ptr = tipc_node_find(addr);
-	if (n_ptr && tipc_node_has_active_links(n_ptr))
-		return n_ptr;
-
-	/* Cluster local system nodes *must* have direct links */
-	if (in_own_cluster(addr))
-		return NULL;
-
-	/* Look for cluster local router with direct link to node */
-	router_addr = tipc_node_select_router(n_ptr, selector);
-	if (router_addr)
-		return tipc_node_select(router_addr, selector);
-
-	/* Inter zone/cluster -- find any direct link to remote cluster */
-	addr = tipc_addr(tipc_zone(addr), tipc_cluster(addr), 0);
-	n_ptr = tipc_net_select_remote_node(addr, selector);
-	if (n_ptr && tipc_node_has_active_links(n_ptr))
-		return n_ptr;
-
-	/* Last resort -- look for any router to anywhere in remote zone */
-	router_addr =  tipc_net_select_router(addr, selector);
-	if (router_addr)
-		return tipc_node_select(router_addr, selector);
-
-	return NULL;
-}
-
-/**
- * tipc_node_select_router - select router to reach specified node
- *
- * Uses a deterministic and fair algorithm for selecting router node.
- */
-
-u32 tipc_node_select_router(struct tipc_node *n_ptr, u32 ref)
-{
-	u32 ulim;
-	u32 mask;
-	u32 start;
-	u32 r;
-
-	if (!n_ptr)
-		return 0;
-
-	if (n_ptr->last_router < 0)
-		return 0;
-	ulim = ((n_ptr->last_router + 1) * 32) - 1;
-
-	/* Start entry must be random */
-	mask = tipc_max_nodes;
-	while (mask > ulim)
-		mask >>= 1;
-	start = ref & mask;
-	r = start;
-
-	/* Lookup upwards with wrap-around */
-	do {
-		if (((n_ptr->routers[r / 32]) >> (r % 32)) & 1)
-			break;
-	} while (++r <= ulim);
-	if (r > ulim) {
-		r = 1;
-		do {
-			if (((n_ptr->routers[r / 32]) >> (r % 32)) & 1)
-				break;
-		} while (++r < start);
-		assert(r != start);
-	}
-	assert(r && (r <= ulim));
-	return tipc_addr(own_zone(), own_cluster(), r);
-}
-
-void tipc_node_add_router(struct tipc_node *n_ptr, u32 router)
-{
-	u32 r_num = tipc_node(router);
-
-	n_ptr->routers[r_num / 32] =
-		((1 << (r_num % 32)) | n_ptr->routers[r_num / 32]);
-	n_ptr->last_router = tipc_max_nodes / 32;
-	while ((--n_ptr->last_router >= 0) &&
-	       !n_ptr->routers[n_ptr->last_router]);
-}
-
-void tipc_node_remove_router(struct tipc_node *n_ptr, u32 router)
-{
-	u32 r_num = tipc_node(router);
-
-	if (n_ptr->last_router < 0)
-		return;		/* No routes */
-
-	n_ptr->routers[r_num / 32] =
-		((~(1 << (r_num % 32))) & (n_ptr->routers[r_num / 32]));
-	n_ptr->last_router = tipc_max_nodes / 32;
-	while ((--n_ptr->last_router >= 0) &&
-	       !n_ptr->routers[n_ptr->last_router]);
-
-	if (!tipc_node_is_up(n_ptr))
-		node_lost_contact(n_ptr);
-}
-
 struct sk_buff *tipc_node_get_nodes(const void *req_tlv_area, int req_tlv_space)
 {
 	u32 domain;
diff --git a/net/tipc/node.h b/net/tipc/node.h
index 7bfaf5e8c201..3abaaa24c77d 100644
--- a/net/tipc/node.h
+++ b/net/tipc/node.h
@@ -54,8 +54,6 @@
  * @cleanup_required: non-zero if cleaning up after a prior loss of contact
  * @link_cnt: number of links to node
  * @permit_changeover: non-zero if node has redundant links to this system
- * @routers: bitmap (used for multicluster communication)
- * @last_router: (used for multicluster communication)
  * @bclink: broadcast-related info
  *    @supported: non-zero if node supports TIPC b'cast capability
  *    @acked: sequence # of last outbound b'cast message acknowledged by node
@@ -80,8 +78,6 @@ struct tipc_node {
 	int working_links;
 	int cleanup_required;
 	int permit_changeover;
-	u32 routers[512/32];
-	int last_router;
 	struct {
 		int supported;
 		u32 acked;
@@ -105,11 +101,7 @@ void tipc_node_link_down(struct tipc_node *n_ptr, struct link *l_ptr);
 void tipc_node_link_up(struct tipc_node *n_ptr, struct link *l_ptr);
 int tipc_node_has_active_links(struct tipc_node *n_ptr);
 int tipc_node_has_redundant_links(struct tipc_node *n_ptr);
-u32 tipc_node_select_router(struct tipc_node *n_ptr, u32 ref);
-struct tipc_node *tipc_node_select_next_hop(u32 addr, u32 selector);
 int tipc_node_is_up(struct tipc_node *n_ptr);
-void tipc_node_add_router(struct tipc_node *n_ptr, u32 router);
-void tipc_node_remove_router(struct tipc_node *n_ptr, u32 router);
 struct sk_buff *tipc_node_get_links(const void *req_tlv_area, int req_tlv_space);
 struct sk_buff *tipc_node_get_nodes(const void *req_tlv_area, int req_tlv_space);
 
@@ -117,22 +109,9 @@ static inline struct tipc_node *tipc_node_find(u32 addr)
 {
 	if (likely(in_own_cluster(addr)))
 		return tipc_local_nodes[tipc_node(addr)];
-	else if (tipc_addr_domain_valid(addr)) {
-		struct cluster *c_ptr = tipc_cltr_find(addr);
-
-		if (c_ptr)
-			return c_ptr->nodes[tipc_node(addr)];
-	}
 	return NULL;
 }
 
-static inline struct tipc_node *tipc_node_select(u32 addr, u32 selector)
-{
-	if (likely(in_own_cluster(addr)))
-		return tipc_local_nodes[tipc_node(addr)];
-	return tipc_node_select_next_hop(addr, selector);
-}
-
 static inline void tipc_node_lock(struct tipc_node *n_ptr)
 {
 	spin_lock_bh(&n_ptr->lock);
-- 
cgit v1.2.3-71-gd317


From 8f92df6ad49da958d97e171762d0a97a3dc738f1 Mon Sep 17 00:00:00 2001
From: Allan Stephens <Allan.Stephens@windriver.com>
Date: Fri, 31 Dec 2010 18:59:19 +0000
Subject: tipc: Remove prototype code for supporting multiple clusters

Eliminates routines, data structures, and files that were intended
to allow TIPC to support a network containing multiple clusters.
Currently, TIPC supports only networks consisting of a single cluster
within a single zone, so this code is unnecessary.

Signed-off-by: Allan Stephens <Allan.Stephens@windriver.com>
Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/tipc_config.h |   4 +-
 net/tipc/Kconfig            |  10 ----
 net/tipc/Makefile           |   2 +-
 net/tipc/addr.c             |   5 +-
 net/tipc/bcast.c            |   9 ++-
 net/tipc/bcast.h            |   1 +
 net/tipc/cluster.c          | 135 --------------------------------------------
 net/tipc/cluster.h          |  73 ------------------------
 net/tipc/config.c           |  21 +------
 net/tipc/core.c             |   6 --
 net/tipc/core.h             |   1 -
 net/tipc/name_distr.c       |  28 +++++++--
 net/tipc/net.c              |  25 ++++++--
 net/tipc/net.h              |   8 ++-
 net/tipc/node.c             |  37 ++++++------
 net/tipc/node.h             |   7 +--
 16 files changed, 83 insertions(+), 289 deletions(-)
 delete mode 100644 net/tipc/cluster.c
 delete mode 100644 net/tipc/cluster.h

(limited to 'include/linux')

diff --git a/include/linux/tipc_config.h b/include/linux/tipc_config.h
index 1f38df1202ba..677aa13dc5d7 100644
--- a/include/linux/tipc_config.h
+++ b/include/linux/tipc_config.h
@@ -95,7 +95,7 @@
 #define  TIPC_CMD_GET_MAX_PUBL      0x4005    /* tx none, rx unsigned */
 #define  TIPC_CMD_GET_MAX_SUBSCR    0x4006    /* tx none, rx unsigned */
 #define  TIPC_CMD_GET_MAX_ZONES     0x4007    /* obsoleted */
-#define  TIPC_CMD_GET_MAX_CLUSTERS  0x4008    /* tx none, rx unsigned */
+#define  TIPC_CMD_GET_MAX_CLUSTERS  0x4008    /* obsoleted */
 #define  TIPC_CMD_GET_MAX_NODES     0x4009    /* tx none, rx unsigned */
 #define  TIPC_CMD_GET_MAX_SLAVES    0x400A    /* obsoleted */
 #define  TIPC_CMD_GET_NETID         0x400B    /* tx none, rx unsigned */
@@ -131,7 +131,7 @@
 #define  TIPC_CMD_SET_MAX_PUBL      0x8005    /* tx unsigned, rx none */
 #define  TIPC_CMD_SET_MAX_SUBSCR    0x8006    /* tx unsigned, rx none */
 #define  TIPC_CMD_SET_MAX_ZONES     0x8007    /* obsoleted */
-#define  TIPC_CMD_SET_MAX_CLUSTERS  0x8008    /* tx unsigned, rx none */
+#define  TIPC_CMD_SET_MAX_CLUSTERS  0x8008    /* obsoleted */
 #define  TIPC_CMD_SET_MAX_NODES     0x8009    /* tx unsigned, rx none */
 #define  TIPC_CMD_SET_MAX_SLAVES    0x800A    /* obsoleted */
 #define  TIPC_CMD_SET_NETID         0x800B    /* tx unsigned, rx none */
diff --git a/net/tipc/Kconfig b/net/tipc/Kconfig
index 06d32908fcfb..c02d3e9c156d 100644
--- a/net/tipc/Kconfig
+++ b/net/tipc/Kconfig
@@ -29,16 +29,6 @@ config TIPC_ADVANCED
 	  Saying Y here will open some advanced configuration for TIPC.
 	  Most users do not need to bother; if unsure, just say N.
 
-config TIPC_CLUSTERS
-	int "Maximum number of clusters in a zone"
-	depends on TIPC_ADVANCED
-	range 1 1
-	default "1"
-	help
-	  Specifies how many clusters can be supported in a TIPC zone.
-
-	  *** Currently TIPC only supports a single cluster per zone. ***
-
 config TIPC_NODES
 	int "Maximum number of nodes in a cluster"
 	depends on TIPC_ADVANCED
diff --git a/net/tipc/Makefile b/net/tipc/Makefile
index 3d936f0560c7..849d819bfc74 100644
--- a/net/tipc/Makefile
+++ b/net/tipc/Makefile
@@ -4,7 +4,7 @@
 
 obj-$(CONFIG_TIPC) := tipc.o
 
-tipc-y	+= addr.o bcast.o bearer.o config.o cluster.o \
+tipc-y	+= addr.o bcast.o bearer.o config.o \
 	   core.o handler.o link.o discover.o msg.o  \
 	   name_distr.o  subscr.o name_table.o net.o  \
 	   netlink.o node.o node_subscr.o port.o ref.o  \
diff --git a/net/tipc/addr.c b/net/tipc/addr.c
index 8823e03e52e0..483868a75b88 100644
--- a/net/tipc/addr.c
+++ b/net/tipc/addr.c
@@ -35,7 +35,8 @@
  */
 
 #include "core.h"
-#include "cluster.h"
+#include "node.h"
+#include "addr.h"
 
 /**
  * tipc_addr_domain_valid - validates a network domain address
@@ -55,8 +56,6 @@ int tipc_addr_domain_valid(u32 addr)
 
 	if (n > max_nodes)
 		return 0;
-	if (c > tipc_max_clusters)
-		return 0;
 
 	if (n && (!z || !c))
 		return 0;
diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c
index 6d828d9eda42..110829eab96a 100644
--- a/net/tipc/bcast.c
+++ b/net/tipc/bcast.c
@@ -111,6 +111,9 @@ static struct bclink *bclink = NULL;
 static struct link *bcl = NULL;
 static DEFINE_SPINLOCK(bc_lock);
 
+/* broadcast-capable node map */
+struct tipc_node_map tipc_bcast_nmap;
+
 const char tipc_bclink_name[] = "broadcast-link";
 
 static void tipc_nmap_diff(struct tipc_node_map *nm_a,
@@ -566,8 +569,8 @@ static int tipc_bcbearer_send(struct sk_buff *buf,
 	if (likely(!msg_non_seq(buf_msg(buf)))) {
 		struct tipc_msg *msg;
 
-		assert(tipc_cltr_bcast_nodes.count != 0);
-		bcbuf_set_acks(buf, tipc_cltr_bcast_nodes.count);
+		assert(tipc_bcast_nmap.count != 0);
+		bcbuf_set_acks(buf, tipc_bcast_nmap.count);
 		msg = buf_msg(buf);
 		msg_set_non_seq(msg, 1);
 		msg_set_mc_netid(msg, tipc_net_id);
@@ -576,7 +579,7 @@ static int tipc_bcbearer_send(struct sk_buff *buf,
 
 	/* Send buffer over bearers until all targets reached */
 
-	bcbearer->remains = tipc_cltr_bcast_nodes;
+	bcbearer->remains = tipc_bcast_nmap;
 
 	for (bp_index = 0; bp_index < MAX_BEARERS; bp_index++) {
 		struct bearer *p = bcbearer->bpairs[bp_index].primary;
diff --git a/net/tipc/bcast.h b/net/tipc/bcast.h
index 011c03f0a4ab..51f8c5326ce6 100644
--- a/net/tipc/bcast.h
+++ b/net/tipc/bcast.h
@@ -51,6 +51,7 @@ struct tipc_node_map {
 	u32 map[MAX_NODES / WSIZE];
 };
 
+extern struct tipc_node_map tipc_bcast_nmap;
 
 #define PLSIZE 32
 
diff --git a/net/tipc/cluster.c b/net/tipc/cluster.c
deleted file mode 100644
index ba6f5bfa0cdb..000000000000
--- a/net/tipc/cluster.c
+++ /dev/null
@@ -1,135 +0,0 @@
-/*
- * net/tipc/cluster.c: TIPC cluster management routines
- *
- * Copyright (c) 2000-2006, Ericsson AB
- * Copyright (c) 2005, Wind River Systems
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- * 3. Neither the names of the copyright holders nor the names of its
- *    contributors may be used to endorse or promote products derived from
- *    this software without specific prior written permission.
- *
- * Alternatively, this software may be distributed under the terms of the
- * GNU General Public License ("GPL") version 2 as published by the Free
- * Software Foundation.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include "core.h"
-#include "cluster.h"
-#include "link.h"
-
-struct tipc_node **tipc_local_nodes = NULL;
-struct tipc_node_map tipc_cltr_bcast_nodes = {0,{0,}};
-
-struct cluster *tipc_cltr_create(u32 addr)
-{
-	struct cluster *c_ptr;
-	int max_nodes;
-
-	c_ptr = kzalloc(sizeof(*c_ptr), GFP_ATOMIC);
-	if (c_ptr == NULL) {
-		warn("Cluster creation failure, no memory\n");
-		return NULL;
-	}
-
-	c_ptr->addr = tipc_addr(tipc_zone(addr), tipc_cluster(addr), 0);
-	max_nodes = tipc_max_nodes + 1;
-
-	c_ptr->nodes = kcalloc(max_nodes + 1, sizeof(void*), GFP_ATOMIC);
-	if (c_ptr->nodes == NULL) {
-		warn("Cluster creation failure, no memory for node area\n");
-		kfree(c_ptr);
-		return NULL;
-	}
-
-	tipc_local_nodes = c_ptr->nodes;
-	c_ptr->highest_node = 0;
-
-	tipc_net.clusters[1] = c_ptr;
-	return c_ptr;
-}
-
-void tipc_cltr_delete(struct cluster *c_ptr)
-{
-	u32 n_num;
-
-	if (!c_ptr)
-		return;
-	for (n_num = 1; n_num <= c_ptr->highest_node; n_num++) {
-		tipc_node_delete(c_ptr->nodes[n_num]);
-	}
-	kfree(c_ptr->nodes);
-	kfree(c_ptr);
-}
-
-
-void tipc_cltr_attach_node(struct cluster *c_ptr, struct tipc_node *n_ptr)
-{
-	u32 n_num = tipc_node(n_ptr->addr);
-	u32 max_n_num = tipc_max_nodes;
-
-	assert(n_num > 0);
-	assert(n_num <= max_n_num);
-	assert(c_ptr->nodes[n_num] == NULL);
-	c_ptr->nodes[n_num] = n_ptr;
-	if (n_num > c_ptr->highest_node)
-		c_ptr->highest_node = n_num;
-}
-
-/**
- * tipc_cltr_broadcast - broadcast message to all nodes within cluster
- */
-
-void tipc_cltr_broadcast(struct sk_buff *buf)
-{
-	struct sk_buff *buf_copy;
-	struct cluster *c_ptr;
-	struct tipc_node *n_ptr;
-	u32 n_num;
-
-	if (tipc_mode == TIPC_NET_MODE) {
-		c_ptr = tipc_cltr_find(tipc_own_addr);
-
-		/* Send to nodes */
-		for (n_num = 1; n_num <= c_ptr->highest_node; n_num++) {
-			n_ptr = c_ptr->nodes[n_num];
-			if (n_ptr && tipc_node_has_active_links(n_ptr)) {
-				buf_copy = skb_copy(buf, GFP_ATOMIC);
-				if (buf_copy == NULL)
-					goto exit;
-				msg_set_destnode(buf_msg(buf_copy),
-						 n_ptr->addr);
-				tipc_link_send(buf_copy, n_ptr->addr,
-					       n_ptr->addr);
-			}
-		}
-	}
-exit:
-	buf_discard(buf);
-}
-
-int tipc_cltr_init(void)
-{
-	return tipc_cltr_create(tipc_own_addr) ? 0 : -ENOMEM;
-}
-
diff --git a/net/tipc/cluster.h b/net/tipc/cluster.h
deleted file mode 100644
index e4b6e4e27371..000000000000
--- a/net/tipc/cluster.h
+++ /dev/null
@@ -1,73 +0,0 @@
-/*
- * net/tipc/cluster.h: Include file for TIPC cluster management routines
- *
- * Copyright (c) 2000-2006, Ericsson AB
- * Copyright (c) 2005, Wind River Systems
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- * 3. Neither the names of the copyright holders nor the names of its
- *    contributors may be used to endorse or promote products derived from
- *    this software without specific prior written permission.
- *
- * Alternatively, this software may be distributed under the terms of the
- * GNU General Public License ("GPL") version 2 as published by the Free
- * Software Foundation.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef _TIPC_CLUSTER_H
-#define _TIPC_CLUSTER_H
-
-#include "addr.h"
-#include "net.h"
-
-/**
- * struct cluster - TIPC cluster structure
- * @addr: network address of cluster
- * @nodes: array of pointers to all nodes within cluster
- * @highest_node: id of highest numbered node within cluster
- */
-
-struct cluster {
-	u32 addr;
-	struct tipc_node **nodes;
-	u32 highest_node;
-};
-
-
-extern struct tipc_node **tipc_local_nodes;
-extern struct tipc_node_map tipc_cltr_bcast_nodes;
-
-struct cluster *tipc_cltr_create(u32 addr);
-void tipc_cltr_delete(struct cluster *c_ptr);
-void tipc_cltr_attach_node(struct cluster *c_ptr, struct tipc_node *n_ptr);
-void tipc_cltr_broadcast(struct sk_buff *buf);
-int tipc_cltr_init(void);
-
-static inline struct cluster *tipc_cltr_find(u32 addr)
-{
-	if (!in_own_cluster(addr))
-		return NULL;
-	return tipc_net.clusters[1];
-}
-
-#endif
diff --git a/net/tipc/config.c b/net/tipc/config.c
index 05dc102300ae..bc512102eebd 100644
--- a/net/tipc/config.c
+++ b/net/tipc/config.c
@@ -269,19 +269,6 @@ static struct sk_buff *cfg_set_max_ports(void)
 	return tipc_cfg_reply_none();
 }
 
-static struct sk_buff *cfg_set_max_clusters(void)
-{
-	u32 value;
-
-	if (!TLV_CHECK(req_tlv_area, req_tlv_space, TIPC_TLV_UNSIGNED))
-		return tipc_cfg_reply_error_string(TIPC_CFG_TLV_ERROR);
-	value = ntohl(*(__be32 *)TLV_DATA(req_tlv_area));
-	if (value != delimit(value, 1, 1))
-		return tipc_cfg_reply_error_string(TIPC_CFG_INVALID_VALUE
-						   " (max clusters fixed at 1)");
-	return tipc_cfg_reply_none();
-}
-
 static struct sk_buff *cfg_set_max_nodes(void)
 {
 	u32 value;
@@ -420,9 +407,6 @@ struct sk_buff *tipc_cfg_do_cmd(u32 orig_node, u16 cmd, const void *request_area
 	case TIPC_CMD_SET_MAX_SUBSCR:
 		rep_tlv_buf = cfg_set_max_subscriptions();
 		break;
-	case TIPC_CMD_SET_MAX_CLUSTERS:
-		rep_tlv_buf = cfg_set_max_clusters();
-		break;
 	case TIPC_CMD_SET_MAX_NODES:
 		rep_tlv_buf = cfg_set_max_nodes();
 		break;
@@ -441,9 +425,6 @@ struct sk_buff *tipc_cfg_do_cmd(u32 orig_node, u16 cmd, const void *request_area
 	case TIPC_CMD_GET_MAX_SUBSCR:
 		rep_tlv_buf = tipc_cfg_reply_unsigned(tipc_max_subscriptions);
 		break;
-	case TIPC_CMD_GET_MAX_CLUSTERS:
-		rep_tlv_buf = tipc_cfg_reply_unsigned(tipc_max_clusters);
-		break;
 	case TIPC_CMD_GET_MAX_NODES:
 		rep_tlv_buf = tipc_cfg_reply_unsigned(tipc_max_nodes);
 		break;
@@ -458,6 +439,8 @@ struct sk_buff *tipc_cfg_do_cmd(u32 orig_node, u16 cmd, const void *request_area
 	case TIPC_CMD_GET_MAX_ZONES:
 	case TIPC_CMD_SET_MAX_SLAVES:
 	case TIPC_CMD_GET_MAX_SLAVES:
+	case TIPC_CMD_SET_MAX_CLUSTERS:
+	case TIPC_CMD_GET_MAX_CLUSTERS:
 		rep_tlv_buf = tipc_cfg_reply_error_string(TIPC_CFG_NOT_SUPPORTED
 							  " (obsolete command)");
 		break;
diff --git a/net/tipc/core.c b/net/tipc/core.c
index 8b7af893971e..b9a3ef13f7e7 100644
--- a/net/tipc/core.c
+++ b/net/tipc/core.c
@@ -48,10 +48,6 @@
 #include "config.h"
 
 
-#ifndef CONFIG_TIPC_CLUSTERS
-#define CONFIG_TIPC_CLUSTERS 1
-#endif
-
 #ifndef CONFIG_TIPC_NODES
 #define CONFIG_TIPC_NODES 255
 #endif
@@ -76,7 +72,6 @@ const char tipc_alphabet[] =
 /* configurable TIPC parameters */
 
 u32 tipc_own_addr;
-int tipc_max_clusters;
 int tipc_max_nodes;
 int tipc_max_ports;
 int tipc_max_subscriptions;
@@ -199,7 +194,6 @@ static int __init tipc_init(void)
 	tipc_max_publications = 10000;
 	tipc_max_subscriptions = 2000;
 	tipc_max_ports = CONFIG_TIPC_PORTS;
-	tipc_max_clusters = CONFIG_TIPC_CLUSTERS;
 	tipc_max_nodes = CONFIG_TIPC_NODES;
 	tipc_net_id = 4711;
 
diff --git a/net/tipc/core.h b/net/tipc/core.h
index 8313a1689565..c44f955bd54a 100644
--- a/net/tipc/core.h
+++ b/net/tipc/core.h
@@ -184,7 +184,6 @@ void tipc_dump_dbg(struct print_buf *, const char *fmt, ...);
  */
 
 extern u32 tipc_own_addr;
-extern int tipc_max_clusters;
 extern int tipc_max_nodes;
 extern int tipc_max_ports;
 extern int tipc_max_subscriptions;
diff --git a/net/tipc/name_distr.c b/net/tipc/name_distr.c
index 10ff48be3c01..c4583fe888d8 100644
--- a/net/tipc/name_distr.c
+++ b/net/tipc/name_distr.c
@@ -35,7 +35,7 @@
  */
 
 #include "core.h"
-#include "cluster.h"
+#include "addr.h"
 #include "link.h"
 #include "name_distr.h"
 
@@ -107,6 +107,26 @@ static struct sk_buff *named_prepare_buf(u32 type, u32 size, u32 dest)
 	return buf;
 }
 
+static void named_cluster_distribute(struct sk_buff *buf)
+{
+	struct sk_buff *buf_copy;
+	struct tipc_node *n_ptr;
+	u32 n_num;
+
+	for (n_num = 1; n_num <= tipc_net.highest_node; n_num++) {
+		n_ptr = tipc_net.nodes[n_num];
+		if (n_ptr && tipc_node_has_active_links(n_ptr)) {
+			buf_copy = skb_copy(buf, GFP_ATOMIC);
+			if (!buf_copy)
+				break;
+			msg_set_destnode(buf_msg(buf_copy), n_ptr->addr);
+			tipc_link_send(buf_copy, n_ptr->addr, n_ptr->addr);
+		}
+	}
+
+	buf_discard(buf);
+}
+
 /**
  * tipc_named_publish - tell other nodes about a new publication by this node
  */
@@ -127,8 +147,8 @@ void tipc_named_publish(struct publication *publ)
 
 	item = (struct distr_item *)msg_data(buf_msg(buf));
 	publ_to_item(item, publ);
-	dbg("tipc_named_withdraw: broadcasting publish msg\n");
-	tipc_cltr_broadcast(buf);
+	dbg("tipc_named_publish: broadcasting publish msg\n");
+	named_cluster_distribute(buf);
 }
 
 /**
@@ -152,7 +172,7 @@ void tipc_named_withdraw(struct publication *publ)
 	item = (struct distr_item *)msg_data(buf_msg(buf));
 	publ_to_item(item, publ);
 	dbg("tipc_named_withdraw: broadcasting withdraw msg\n");
-	tipc_cltr_broadcast(buf);
+	named_cluster_distribute(buf);
 }
 
 /**
diff --git a/net/tipc/net.c b/net/tipc/net.c
index 3967f1f6d97f..3baf55ee0985 100644
--- a/net/tipc/net.c
+++ b/net/tipc/net.c
@@ -112,12 +112,23 @@
 DEFINE_RWLOCK(tipc_net_lock);
 struct network tipc_net;
 
+static int net_start(void)
+{
+	tipc_net.nodes = kcalloc(tipc_max_nodes + 1,
+				 sizeof(*tipc_net.nodes), GFP_ATOMIC);
+	tipc_net.highest_node = 0;
+
+	return tipc_net.nodes ? 0 : -ENOMEM;
+}
+
 static void net_stop(void)
 {
-	u32 c_num;
+	u32 n_num;
 
-	for (c_num = 1; c_num <= tipc_max_clusters; c_num++)
-		tipc_cltr_delete(tipc_net.clusters[c_num]);
+	for (n_num = 1; n_num <= tipc_net.highest_node; n_num++)
+		tipc_node_delete(tipc_net.nodes[n_num]);
+	kfree(tipc_net.nodes);
+	tipc_net.nodes = NULL;
 }
 
 static void net_route_named_msg(struct sk_buff *buf)
@@ -218,10 +229,12 @@ int tipc_net_start(u32 addr)
 	tipc_named_reinit();
 	tipc_port_reinit();
 
-	if ((res = tipc_cltr_init()) ||
-	    (res = tipc_bclink_init())) {
+	res = net_start();
+	if (res)
+		return res;
+	res = tipc_bclink_init();
+	if (res)
 		return res;
-	}
 
 	tipc_k_signal((Handler)tipc_subscr_start, 0);
 	tipc_k_signal((Handler)tipc_cfg_init, 0);
diff --git a/net/tipc/net.h b/net/tipc/net.h
index 6e402d9b33e0..4ae59ad04893 100644
--- a/net/tipc/net.h
+++ b/net/tipc/net.h
@@ -37,16 +37,18 @@
 #ifndef _TIPC_NET_H
 #define _TIPC_NET_H
 
-struct cluster;
+struct tipc_node;
 
 /**
  * struct network - TIPC network structure
- * @clusters: array of pointers to all clusters within zone
+ * @nodes: array of pointers to all nodes within cluster
+ * @highest_node: id of highest numbered node within cluster
  * @links: number of (unicast) links to cluster
  */
 
 struct network {
-	struct cluster *clusters[2]; /* currently limited to just 1 cluster */
+	struct tipc_node **nodes;
+	u32 highest_node;
 	u32 links;
 };
 
diff --git a/net/tipc/node.c b/net/tipc/node.c
index c47cc69eb575..58e189bf5c96 100644
--- a/net/tipc/node.c
+++ b/net/tipc/node.c
@@ -62,9 +62,9 @@ u32 tipc_own_tag = 0;
 
 struct tipc_node *tipc_node_create(u32 addr)
 {
-	struct cluster *c_ptr;
 	struct tipc_node *n_ptr;
 	struct tipc_node **curr_node;
+	u32 n_num;
 
 	spin_lock_bh(&node_create_lock);
 
@@ -84,21 +84,14 @@ struct tipc_node *tipc_node_create(u32 addr)
 		return NULL;
 	}
 
-	c_ptr = tipc_cltr_find(addr);
-	if (!c_ptr) {
-		c_ptr = tipc_cltr_create(addr);
-	}
-	if (!c_ptr) {
-		spin_unlock_bh(&node_create_lock);
-		kfree(n_ptr);
-		return NULL;
-	}
-
 	n_ptr->addr = addr;
 	spin_lock_init(&n_ptr->lock);
 	INIT_LIST_HEAD(&n_ptr->nsub);
-	n_ptr->owner = c_ptr;
-	tipc_cltr_attach_node(c_ptr, n_ptr);
+
+	n_num = tipc_node(addr);
+	tipc_net.nodes[n_num] = n_ptr;
+	if (n_num > tipc_net.highest_node)
+		tipc_net.highest_node = n_num;
 
 	/* Insert node into ordered list */
 	for (curr_node = &tipc_nodes; *curr_node;
@@ -115,11 +108,19 @@ struct tipc_node *tipc_node_create(u32 addr)
 
 void tipc_node_delete(struct tipc_node *n_ptr)
 {
+	u32 n_num;
+
 	if (!n_ptr)
 		return;
 
 	dbg("node %x deleted\n", n_ptr->addr);
+	n_num = tipc_node(n_ptr->addr);
+	tipc_net.nodes[n_num] = NULL;
 	kfree(n_ptr);
+
+	while (!tipc_net.nodes[tipc_net.highest_node])
+		if (--tipc_net.highest_node == 0)
+			break;
 }
 
 
@@ -324,7 +325,7 @@ static void node_established_contact(struct tipc_node *n_ptr)
 	n_ptr->bclink.acked = tipc_bclink_get_last_sent();
 
 	if (n_ptr->bclink.supported) {
-		tipc_nmap_add(&tipc_cltr_bcast_nodes, n_ptr->addr);
+		tipc_nmap_add(&tipc_bcast_nmap, n_ptr->addr);
 		if (n_ptr->addr < tipc_own_addr)
 			tipc_own_tag++;
 	}
@@ -361,13 +362,11 @@ static void node_lost_contact(struct tipc_node *n_ptr)
 		buf_discard(n_ptr->bclink.defragm);
 		n_ptr->bclink.defragm = NULL;
 	}
-	if (in_own_cluster(n_ptr->addr) && n_ptr->bclink.supported) {
-		tipc_bclink_acknowledge(n_ptr, mod(n_ptr->bclink.acked + 10000));
-	}
 
-	/* Update routing tables */
 	if (n_ptr->bclink.supported) {
-		tipc_nmap_remove(&tipc_cltr_bcast_nodes, n_ptr->addr);
+		tipc_bclink_acknowledge(n_ptr,
+					mod(n_ptr->bclink.acked + 10000));
+		tipc_nmap_remove(&tipc_bcast_nmap, n_ptr->addr);
 		if (n_ptr->addr < tipc_own_addr)
 			tipc_own_tag--;
 	}
diff --git a/net/tipc/node.h b/net/tipc/node.h
index 3abaaa24c77d..206a8efa410e 100644
--- a/net/tipc/node.h
+++ b/net/tipc/node.h
@@ -38,14 +38,14 @@
 #define _TIPC_NODE_H
 
 #include "node_subscr.h"
-#include "cluster.h"
+#include "addr.h"
+#include "net.h"
 #include "bearer.h"
 
 /**
  * struct tipc_node - TIPC node structure
  * @addr: network address of node
  * @lock: spinlock governing access to structure
- * @owner: pointer to cluster that node belongs to
  * @next: pointer to next node in sorted list of cluster's nodes
  * @nsub: list of "node down" subscriptions monitoring node
  * @active_links: pointers to active links to node
@@ -69,7 +69,6 @@
 struct tipc_node {
 	u32 addr;
 	spinlock_t lock;
-	struct cluster *owner;
 	struct tipc_node *next;
 	struct list_head nsub;
 	struct link *active_links[2];
@@ -108,7 +107,7 @@ struct sk_buff *tipc_node_get_nodes(const void *req_tlv_area, int req_tlv_space)
 static inline struct tipc_node *tipc_node_find(u32 addr)
 {
 	if (likely(in_own_cluster(addr)))
-		return tipc_local_nodes[tipc_node(addr)];
+		return tipc_net.nodes[tipc_node(addr)];
 	return NULL;
 }
 
-- 
cgit v1.2.3-71-gd317


From 0e65967e33be61e5f67727edd4ea829b47676fc0 Mon Sep 17 00:00:00 2001
From: Allan Stephens <Allan.Stephens@windriver.com>
Date: Fri, 31 Dec 2010 18:59:32 +0000
Subject: tipc: cleanup various cosmetic whitespace issues

Cleans up TIPC's source code to eliminate deviations from generally
accepted coding conventions relating to leading/trailing white space
and white space around commas, braces, cases, and sizeof.

These changes are purely cosmetic and do not alter the operation of TIPC
in any way.

Signed-off-by: Allan Stephens <Allan.Stephens@windriver.com>
Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/tipc.h        | 18 +++++++-------
 include/linux/tipc_config.h | 59 ++++++++++++++++++++++-----------------------
 net/tipc/Kconfig            |  2 +-
 net/tipc/bcast.c            |  2 +-
 net/tipc/config.c           |  3 +--
 net/tipc/discover.c         |  2 +-
 net/tipc/eth_media.c        |  5 ++--
 net/tipc/link.c             | 31 +++++++++++-------------
 net/tipc/link.h             |  4 +--
 net/tipc/msg.c              | 56 +++++++++++++++++++++---------------------
 net/tipc/msg.h              |  8 +++---
 net/tipc/name_table.c       | 14 +++++------
 net/tipc/name_table.h       |  2 +-
 net/tipc/node.c             |  2 +-
 net/tipc/port.c             | 15 ++++++------
 net/tipc/port.h             |  2 +-
 net/tipc/ref.c              |  6 ++---
 net/tipc/socket.c           | 24 +++++++++---------
 net/tipc/subscr.c           |  2 +-
 19 files changed, 124 insertions(+), 133 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/tipc.h b/include/linux/tipc.h
index d10614b29d59..1eefa3f6d1f4 100644
--- a/include/linux/tipc.h
+++ b/include/linux/tipc.h
@@ -1,6 +1,6 @@
 /*
  * include/linux/tipc.h: Include file for TIPC socket interface
- * 
+ *
  * Copyright (c) 2003-2006, Ericsson AB
  * Copyright (c) 2005, Wind River Systems
  * All rights reserved.
@@ -42,7 +42,7 @@
 /*
  * TIPC addressing primitives
  */
- 
+
 struct tipc_portid {
 	__u32 ref;
 	__u32 node;
@@ -89,7 +89,7 @@ static inline unsigned int tipc_node(__u32 addr)
 #define TIPC_TOP_SRV		1	/* topology service name type */
 #define TIPC_RESERVED_TYPES	64	/* lowest user-publishable name type */
 
-/* 
+/*
  * Publication scopes when binding port names and port name sequences
  */
 
@@ -112,7 +112,7 @@ static inline unsigned int tipc_node(__u32 addr)
 #define TIPC_HIGH_IMPORTANCE		2
 #define TIPC_CRITICAL_IMPORTANCE	3
 
-/* 
+/*
  * Msg rejection/connection shutdown reasons
  */
 
@@ -127,9 +127,9 @@ static inline unsigned int tipc_node(__u32 addr)
  * TIPC topology subscription service definitions
  */
 
-#define TIPC_SUB_PORTS     	0x01  	/* filter for port availability */
-#define TIPC_SUB_SERVICE     	0x02  	/* filter for service availability */
-#define TIPC_SUB_CANCEL         0x04    /* cancel a subscription */
+#define TIPC_SUB_PORTS		0x01	/* filter for port availability */
+#define TIPC_SUB_SERVICE	0x02	/* filter for service availability */
+#define TIPC_SUB_CANCEL		0x04	/* cancel a subscription */
 #if 0
 /* The following filter options are not currently implemented */
 #define TIPC_SUB_NO_BIND_EVTS	0x04	/* filter out "publish" events */
@@ -137,12 +137,12 @@ static inline unsigned int tipc_node(__u32 addr)
 #define TIPC_SUB_SINGLE_EVT	0x10	/* expire after first event */
 #endif
 
-#define TIPC_WAIT_FOREVER	~0	/* timeout for permanent subscription */
+#define TIPC_WAIT_FOREVER	(~0)	/* timeout for permanent subscription */
 
 struct tipc_subscr {
 	struct tipc_name_seq seq;	/* name sequence of interest */
 	__u32 timeout;			/* subscription duration (in ms) */
-        __u32 filter;   		/* bitmask of filter options */
+	__u32 filter;			/* bitmask of filter options */
 	char usr_handle[8];		/* available for subscriber use */
 };
 
diff --git a/include/linux/tipc_config.h b/include/linux/tipc_config.h
index 677aa13dc5d7..7d42460a5e3c 100644
--- a/include/linux/tipc_config.h
+++ b/include/linux/tipc_config.h
@@ -1,6 +1,6 @@
 /*
  * include/linux/tipc_config.h: Include file for TIPC configuration interface
- * 
+ *
  * Copyright (c) 2003-2006, Ericsson AB
  * Copyright (c) 2005-2007, Wind River Systems
  * All rights reserved.
@@ -54,19 +54,19 @@
  * which specify parameters or results for the operation.
  *
  * For many operations, the request and reply messages have a fixed number
- * of TLVs (usually zero or one); however, some reply messages may return 
+ * of TLVs (usually zero or one); however, some reply messages may return
  * a variable number of TLVs.  A failed request is denoted by the presence
  * of an "error string" TLV in the reply message instead of the TLV(s) the
  * reply should contain if the request succeeds.
  */
- 
-/* 
+
+/*
  * Public commands:
  * May be issued by any process.
- * Accepted by own node, or by remote node only if remote management enabled.                       
+ * Accepted by own node, or by remote node only if remote management enabled.
  */
- 
-#define  TIPC_CMD_NOOP   	    0x0000    /* tx none, rx none */
+
+#define  TIPC_CMD_NOOP              0x0000    /* tx none, rx none */
 #define  TIPC_CMD_GET_NODES         0x0001    /* tx net_addr, rx node_info(s) */
 #define  TIPC_CMD_GET_MEDIA_NAMES   0x0002    /* tx none, rx media_name(s) */
 #define  TIPC_CMD_GET_BEARER_NAMES  0x0003    /* tx none, rx bearer_name(s) */
@@ -83,11 +83,11 @@
 #define  TIPC_CMD_GET_LINK_PEER     0x000D    /* tx link_name, rx ? */
 #endif
 
-/* 
+/*
  * Protected commands:
  * May only be issued by "network administration capable" process.
  * Accepted by own node, or by remote node only if remote management enabled
- * and this node is zone manager.                       
+ * and this node is zone manager.
  */
 
 #define  TIPC_CMD_GET_REMOTE_MNG    0x4003    /* tx none, rx unsigned */
@@ -116,10 +116,10 @@
 #define  TIPC_CMD_UNBLOCK_LINK      0x4106    /* tx link_name, rx none */
 #endif
 
-/* 
+/*
  * Private commands:
  * May only be issued by "network administration capable" process.
- * Accepted by own node only; cannot be used on a remote node.                       
+ * Accepted by own node only; cannot be used on a remote node.
  */
 
 #define  TIPC_CMD_SET_NODE_ADDR     0x8001    /* tx net_addr, rx none */
@@ -156,20 +156,20 @@
 #define TIPC_TLV_ULTRA_STRING	5	/* char[32768] (max) */
 
 #define TIPC_TLV_ERROR_STRING	16	/* char[128] containing "error code" */
-#define TIPC_TLV_NET_ADDR   	17	/* 32-bit integer denoting <Z.C.N> */
+#define TIPC_TLV_NET_ADDR	17	/* 32-bit integer denoting <Z.C.N> */
 #define TIPC_TLV_MEDIA_NAME	18	/* char[TIPC_MAX_MEDIA_NAME] */
 #define TIPC_TLV_BEARER_NAME	19	/* char[TIPC_MAX_BEARER_NAME] */
 #define TIPC_TLV_LINK_NAME	20	/* char[TIPC_MAX_LINK_NAME] */
 #define TIPC_TLV_NODE_INFO	21	/* struct tipc_node_info */
 #define TIPC_TLV_LINK_INFO	22	/* struct tipc_link_info */
-#define TIPC_TLV_BEARER_CONFIG  23	/* struct tipc_bearer_config */
-#define TIPC_TLV_LINK_CONFIG    24	/* struct tipc_link_config */
+#define TIPC_TLV_BEARER_CONFIG	23	/* struct tipc_bearer_config */
+#define TIPC_TLV_LINK_CONFIG	24	/* struct tipc_link_config */
 #define TIPC_TLV_NAME_TBL_QUERY	25	/* struct tipc_name_table_query */
-#define TIPC_TLV_PORT_REF   	26	/* 32-bit port reference */
+#define TIPC_TLV_PORT_REF	26	/* 32-bit port reference */
 
 /*
  * Maximum sizes of TIPC bearer-related names (including terminating NUL)
- */ 
+ */
 
 #define TIPC_MAX_MEDIA_NAME	16	/* format = media */
 #define TIPC_MAX_IF_NAME	16	/* format = interface */
@@ -234,7 +234,7 @@ struct tipc_name_table_query {
 };
 
 /*
- * The error string TLV is a null-terminated string describing the cause 
+ * The error string TLV is a null-terminated string describing the cause
  * of the request failure.  To simplify error processing (and to save space)
  * the first character of the string can be a special error code character
  * (lying by the range 0x80 to 0xFF) which represents a pre-defined reason.
@@ -258,7 +258,7 @@ struct tipc_link_create {
 
 /*
  * A TLV consists of a descriptor, followed by the TLV value.
- * TLV descriptor fields are stored in network byte order; 
+ * TLV descriptor fields are stored in network byte order;
  * TLV values must also be stored in network byte order (where applicable).
  * TLV descriptors must be aligned to addresses which are multiple of 4,
  * so up to 3 bytes of padding may exist at the end of the TLV value area.
@@ -294,7 +294,7 @@ static inline int TLV_OK(const void *tlv, __u16 space)
 
 static inline int TLV_CHECK(const void *tlv, __u16 space, __u16 exp_type)
 {
-	return TLV_OK(tlv, space) && 
+	return TLV_OK(tlv, space) &&
 		(ntohs(((struct tlv_desc *)tlv)->tlv_type) == exp_type);
 }
 
@@ -313,7 +313,7 @@ static inline int TLV_SET(void *tlv, __u16 type, void *data, __u16 len)
 }
 
 /*
- * A TLV list descriptor simplifies processing of messages 
+ * A TLV list descriptor simplifies processing of messages
  * containing multiple TLVs.
  */
 
@@ -322,15 +322,15 @@ struct tlv_list_desc {
 	__u32 tlv_space;		/* # bytes from curr TLV to list end */
 };
 
-static inline void TLV_LIST_INIT(struct tlv_list_desc *list, 
+static inline void TLV_LIST_INIT(struct tlv_list_desc *list,
 				 void *data, __u32 space)
 {
 	list->tlv_ptr = (struct tlv_desc *)data;
 	list->tlv_space = space;
 }
-	     
+
 static inline int TLV_LIST_EMPTY(struct tlv_list_desc *list)
-{ 
+{
 	return (list->tlv_space == 0);
 }
 
@@ -348,7 +348,7 @@ static inline void TLV_LIST_STEP(struct tlv_list_desc *list)
 {
 	__u16 tlv_space = TLV_ALIGN(ntohs(list->tlv_ptr->tlv_len));
 
-        list->tlv_ptr = (struct tlv_desc *)((char *)list->tlv_ptr + tlv_space);
+	list->tlv_ptr = (struct tlv_desc *)((char *)list->tlv_ptr + tlv_space);
 	list->tlv_space -= tlv_space;
 }
 
@@ -372,15 +372,14 @@ struct tipc_genlmsghdr {
 #define TIPC_GENL_HDRLEN	NLMSG_ALIGN(sizeof(struct tipc_genlmsghdr))
 
 /*
- * Configuration messages exchanged via TIPC sockets use the TIPC configuration 
- * message header, which is defined below.  This structure is analogous 
- * to the Netlink message header, but fields are stored in network byte order 
- * and no padding is permitted between the header and the message data 
+ * Configuration messages exchanged via TIPC sockets use the TIPC configuration
+ * message header, which is defined below.  This structure is analogous
+ * to the Netlink message header, but fields are stored in network byte order
+ * and no padding is permitted between the header and the message data
  * that follows.
  */
 
-struct tipc_cfg_msg_hdr
-{
+struct tipc_cfg_msg_hdr {
 	__be32 tcm_len;		/* Message length (including header) */
 	__be16 tcm_type;	/* Command type */
 	__be16 tcm_flags;	/* Additional flags */
diff --git a/net/tipc/Kconfig b/net/tipc/Kconfig
index 0e7ce30fd567..0436927369f3 100644
--- a/net/tipc/Kconfig
+++ b/net/tipc/Kconfig
@@ -50,7 +50,7 @@ config TIPC_PORTS
 	  Specifies how many ports can be supported by a node.
 	  Can range from 127 to 65535 ports; default is 8191.
 
-	  Setting this to a smaller value saves some memory, 
+	  Setting this to a smaller value saves some memory,
 	  setting it to higher allows for more ports.
 
 config TIPC_LOG
diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c
index c0f3b096e7f5..9de32564984a 100644
--- a/net/tipc/bcast.c
+++ b/net/tipc/bcast.c
@@ -428,7 +428,7 @@ void tipc_bclink_recv_pkt(struct sk_buff *buf)
 	static int rx_count = 0;
 #endif
 	struct tipc_msg *msg = buf_msg(buf);
-	struct tipc_node* node = tipc_node_find(msg_prevnode(msg));
+	struct tipc_node *node = tipc_node_find(msg_prevnode(msg));
 	u32 next_in;
 	u32 seqno;
 	struct sk_buff *deferred;
diff --git a/net/tipc/config.c b/net/tipc/config.c
index 6c67132f8652..e16750dcf3c1 100644
--- a/net/tipc/config.c
+++ b/net/tipc/config.c
@@ -322,8 +322,7 @@ struct sk_buff *tipc_cfg_do_cmd(u32 orig_node, u16 cmd, const void *request_area
 	} else if (!tipc_remote_management) {
 		rep_tlv_buf = tipc_cfg_reply_error_string(TIPC_CFG_NO_REMOTE);
 		goto exit;
-	}
-	else if (cmd >= 0x4000) {
+	} else if (cmd >= 0x4000) {
 		u32 domain = 0;
 
 		if ((tipc_nametbl_translate(TIPC_ZM_SRV, 0, &domain) == 0) ||
diff --git a/net/tipc/discover.c b/net/tipc/discover.c
index be28f5adc770..fa026bd91a68 100644
--- a/net/tipc/discover.c
+++ b/net/tipc/discover.c
@@ -130,7 +130,7 @@ void tipc_disc_recv_msg(struct sk_buff *buf, struct bearer *b_ptr)
 	u32 net_id = msg_bc_netid(msg);
 	u32 type = msg_type(msg);
 
-	msg_get_media_addr(msg,&media_addr);
+	msg_get_media_addr(msg, &media_addr);
 	buf_discard(buf);
 
 	if (net_id != tipc_net_id)
diff --git a/net/tipc/eth_media.c b/net/tipc/eth_media.c
index 101d9cb6a559..5dfe66357794 100644
--- a/net/tipc/eth_media.c
+++ b/net/tipc/eth_media.c
@@ -144,7 +144,7 @@ static int enable_bearer(struct tipc_bearer *tb_ptr)
 
 	/* Find device with specified name */
 
-	for_each_netdev(&init_net, pdev){
+	for_each_netdev(&init_net, pdev) {
 		if (!strncmp(pdev->name, driver_name, IFNAMSIZ)) {
 			dev = pdev;
 			break;
@@ -155,7 +155,8 @@ static int enable_bearer(struct tipc_bearer *tb_ptr)
 
 	/* Find Ethernet bearer for device (or create one) */
 
-	for (;(eb_ptr != stop) && eb_ptr->dev && (eb_ptr->dev != dev); eb_ptr++);
+	while ((eb_ptr != stop) && eb_ptr->dev && (eb_ptr->dev != dev))
+		eb_ptr++;
 	if (eb_ptr == stop)
 		return -EDQUOT;
 	if (!eb_ptr->dev) {
diff --git a/net/tipc/link.c b/net/tipc/link.c
index 647f2ecfde50..ef203a1581cd 100644
--- a/net/tipc/link.c
+++ b/net/tipc/link.c
@@ -977,8 +977,7 @@ static int link_send_buf_fast(struct link *l_ptr, struct sk_buff *buf,
 				l_ptr->next_out = buf;
 				return res;
 			}
-		}
-		else
+		} else
 			*used_max_pkt = l_ptr->max_pkt;
 	}
 	return tipc_link_send_buf(l_ptr, buf);  /* All other cases */
@@ -1132,10 +1131,10 @@ static int link_send_sections_long(struct port *sender,
 	struct tipc_node *node;
 	struct tipc_msg *hdr = &sender->publ.phdr;
 	u32 dsz = msg_data_sz(hdr);
-	u32 max_pkt,fragm_sz,rest;
+	u32 max_pkt, fragm_sz, rest;
 	struct tipc_msg fragm_hdr;
-	struct sk_buff *buf,*buf_chain,*prev;
-	u32 fragm_crs,fragm_rest,hsz,sect_rest;
+	struct sk_buff *buf, *buf_chain, *prev;
+	u32 fragm_crs, fragm_rest, hsz, sect_rest;
 	const unchar *sect_crs;
 	int curr_sect;
 	u32 fragm_no;
@@ -1212,7 +1211,7 @@ error:
 			/* Initiate new fragment: */
 			if (rest <= fragm_sz) {
 				fragm_sz = rest;
-				msg_set_type(&fragm_hdr,LAST_FRAGMENT);
+				msg_set_type(&fragm_hdr, LAST_FRAGMENT);
 			} else {
 				msg_set_type(&fragm_hdr, FRAGMENT);
 			}
@@ -1229,8 +1228,7 @@ error:
 			fragm_crs = INT_H_SIZE;
 			fragm_rest = fragm_sz;
 		}
-	}
-	while (rest > 0);
+	} while (rest > 0);
 
 	/*
 	 * Now we have a buffer chain. Select a link and check
@@ -1333,7 +1331,7 @@ u32 tipc_link_push_packet(struct link *l_ptr)
 	buf = l_ptr->proto_msg_queue;
 	if (buf) {
 		msg_set_ack(buf_msg(buf), mod(l_ptr->next_in_no - 1));
-		msg_set_bcast_ack(buf_msg(buf),l_ptr->owner->bclink.last_in);
+		msg_set_bcast_ack(buf_msg(buf), l_ptr->owner->bclink.last_in);
 		if (tipc_bearer_send(l_ptr->b_ptr, buf, &l_ptr->media_addr)) {
 			l_ptr->unacked_window = 0;
 			buf_discard(buf);
@@ -1847,8 +1845,7 @@ u32 tipc_link_defer_pkt(struct sk_buff **head,
 		}
 		prev = crs;
 		crs = crs->next;
-	}
-	while (crs);
+	} while (crs);
 
 	/* Message is a duplicate of an existing message */
 
@@ -2215,11 +2212,11 @@ void tipc_link_changeover(struct link *l_ptr)
 
 		if ((msg_user(msg) == MSG_BUNDLER) && split_bundles) {
 			struct tipc_msg *m = msg_get_wrapped(msg);
-			unchar* pos = (unchar*)m;
+			unchar *pos = (unchar *)m;
 
 			msgcount = msg_msgcnt(msg);
 			while (msgcount--) {
-				msg_set_seqno(m,msg_seqno(msg));
+				msg_set_seqno(m, msg_seqno(msg));
 				tipc_link_tunnel(l_ptr, &tunnel_hdr, m,
 						 msg_link_selector(m));
 				pos += align(msg_size(m));
@@ -2321,7 +2318,7 @@ static int link_recv_changeover_msg(struct link **l_ptr,
 	if (msg_typ == DUPLICATE_MSG) {
 		if (less(msg_seqno(msg), mod(dest_link->next_in_no)))
 			goto exit;
-		*buf = buf_extract(tunnel_buf,INT_H_SIZE);
+		*buf = buf_extract(tunnel_buf, INT_H_SIZE);
 		if (*buf == NULL) {
 			warn("Link changeover error, duplicate msg dropped\n");
 			goto exit;
@@ -2552,8 +2549,8 @@ int tipc_link_recv_fragment(struct sk_buff **pending, struct sk_buff **fb,
 			/*  Prepare buffer for subsequent fragments. */
 
 			set_long_msg_seqno(pbuf, long_msg_seq_no);
-			set_fragm_size(pbuf,fragm_sz);
-			set_expected_frags(pbuf,exp_fragm_cnt - 1);
+			set_fragm_size(pbuf, fragm_sz);
+			set_expected_frags(pbuf, exp_fragm_cnt - 1);
 		} else {
 			warn("Link unable to reassemble fragmented message\n");
 		}
@@ -2580,7 +2577,7 @@ int tipc_link_recv_fragment(struct sk_buff **pending, struct sk_buff **fb,
 			*m = buf_msg(pbuf);
 			return 1;
 		}
-		set_expected_frags(pbuf,exp_frags);
+		set_expected_frags(pbuf, exp_frags);
 		return 0;
 	}
 	buf_discard(fbuf);
diff --git a/net/tipc/link.h b/net/tipc/link.h
index eca19c247b79..70967e637027 100644
--- a/net/tipc/link.h
+++ b/net/tipc/link.h
@@ -229,8 +229,8 @@ struct sk_buff *tipc_link_cmd_reset_stats(const void *req_tlv_area, int req_tlv_
 void tipc_link_reset(struct link *l_ptr);
 int tipc_link_send(struct sk_buff *buf, u32 dest, u32 selector);
 int tipc_link_send_buf(struct link *l_ptr, struct sk_buff *buf);
-u32 tipc_link_get_max_pkt(u32 dest,u32 selector);
-int tipc_link_send_sections_fast(struct port* sender,
+u32 tipc_link_get_max_pkt(u32 dest, u32 selector);
+int tipc_link_send_sections_fast(struct port *sender,
 				 struct iovec const *msg_sect,
 				 const u32 num_sect,
 				 u32 destnode);
diff --git a/net/tipc/msg.c b/net/tipc/msg.c
index 2571ffb4d350..e81d43a8ea3d 100644
--- a/net/tipc/msg.c
+++ b/net/tipc/msg.c
@@ -91,7 +91,7 @@ int tipc_msg_calc_data_size(struct iovec const *msg_sect, u32 num_sect)
 
 int tipc_msg_build(struct tipc_msg *hdr,
 			    struct iovec const *msg_sect, u32 num_sect,
-			    int max_size, int usrmem, struct sk_buff** buf)
+			    int max_size, int usrmem, struct sk_buff **buf)
 {
 	int dsz, sz, hsz, pos, res, cnt;
 
@@ -161,10 +161,10 @@ void tipc_msg_dbg(struct print_buf *buf, struct tipc_msg *msg, const char *str)
 			tipc_printf(buf, "LAST:");
 			break;
 		default:
-			tipc_printf(buf, "UNKNOWN:%x",msg_type(msg));
+			tipc_printf(buf, "UNKNOWN:%x", msg_type(msg));
 
 		}
-		tipc_printf(buf, "NO(%u/%u):",msg_long_msgno(msg),
+		tipc_printf(buf, "NO(%u/%u):", msg_long_msgno(msg),
 			    msg_fragm_no(msg));
 		break;
 	case TIPC_LOW_IMPORTANCE:
@@ -190,7 +190,7 @@ void tipc_msg_dbg(struct print_buf *buf, struct tipc_msg *msg, const char *str)
 			tipc_printf(buf, "DIR:");
 			break;
 		default:
-			tipc_printf(buf, "UNKNOWN TYPE %u",msg_type(msg));
+			tipc_printf(buf, "UNKNOWN TYPE %u", msg_type(msg));
 		}
 		if (msg_routed(msg) && !msg_non_seq(msg))
 			tipc_printf(buf, "ROUT:");
@@ -208,7 +208,7 @@ void tipc_msg_dbg(struct print_buf *buf, struct tipc_msg *msg, const char *str)
 			tipc_printf(buf, "WDRW:");
 			break;
 		default:
-			tipc_printf(buf, "UNKNOWN:%x",msg_type(msg));
+			tipc_printf(buf, "UNKNOWN:%x", msg_type(msg));
 		}
 		if (msg_routed(msg))
 			tipc_printf(buf, "ROUT:");
@@ -227,39 +227,39 @@ void tipc_msg_dbg(struct print_buf *buf, struct tipc_msg *msg, const char *str)
 			break;
 		case CONN_ACK:
 			tipc_printf(buf, "CONN_ACK:");
-			tipc_printf(buf, "ACK(%u):",msg_msgcnt(msg));
+			tipc_printf(buf, "ACK(%u):", msg_msgcnt(msg));
 			break;
 		default:
-			tipc_printf(buf, "UNKNOWN TYPE:%x",msg_type(msg));
+			tipc_printf(buf, "UNKNOWN TYPE:%x", msg_type(msg));
 		}
 		if (msg_routed(msg))
 			tipc_printf(buf, "ROUT:");
 		if (msg_reroute_cnt(msg))
-			tipc_printf(buf, "REROUTED(%u):",msg_reroute_cnt(msg));
+			tipc_printf(buf, "REROUTED(%u):", msg_reroute_cnt(msg));
 		break;
 	case LINK_PROTOCOL:
-		tipc_printf(buf, "PROT:TIM(%u):",msg_timestamp(msg));
+		tipc_printf(buf, "PROT:TIM(%u):", msg_timestamp(msg));
 		switch (msg_type(msg)) {
 		case STATE_MSG:
 			tipc_printf(buf, "STATE:");
-			tipc_printf(buf, "%s:",msg_probe(msg) ? "PRB" :"");
-			tipc_printf(buf, "NXS(%u):",msg_next_sent(msg));
-			tipc_printf(buf, "GAP(%u):",msg_seq_gap(msg));
-			tipc_printf(buf, "LSTBC(%u):",msg_last_bcast(msg));
+			tipc_printf(buf, "%s:", msg_probe(msg) ? "PRB" : "");
+			tipc_printf(buf, "NXS(%u):", msg_next_sent(msg));
+			tipc_printf(buf, "GAP(%u):", msg_seq_gap(msg));
+			tipc_printf(buf, "LSTBC(%u):", msg_last_bcast(msg));
 			break;
 		case RESET_MSG:
 			tipc_printf(buf, "RESET:");
 			if (msg_size(msg) != msg_hdr_sz(msg))
-				tipc_printf(buf, "BEAR:%s:",msg_data(msg));
+				tipc_printf(buf, "BEAR:%s:", msg_data(msg));
 			break;
 		case ACTIVATE_MSG:
 			tipc_printf(buf, "ACTIVATE:");
 			break;
 		default:
-			tipc_printf(buf, "UNKNOWN TYPE:%x",msg_type(msg));
+			tipc_printf(buf, "UNKNOWN TYPE:%x", msg_type(msg));
 		}
-		tipc_printf(buf, "PLANE(%c):",msg_net_plane(msg));
-		tipc_printf(buf, "SESS(%u):",msg_session(msg));
+		tipc_printf(buf, "PLANE(%c):", msg_net_plane(msg));
+		tipc_printf(buf, "SESS(%u):", msg_session(msg));
 		break;
 	case CHANGEOVER_PROTOCOL:
 		tipc_printf(buf, "TUNL:");
@@ -269,10 +269,10 @@ void tipc_msg_dbg(struct print_buf *buf, struct tipc_msg *msg, const char *str)
 			break;
 		case ORIGINAL_MSG:
 			tipc_printf(buf, "ORIG:");
-			tipc_printf(buf, "EXP(%u)",msg_msgcnt(msg));
+			tipc_printf(buf, "EXP(%u)", msg_msgcnt(msg));
 			break;
 		default:
-			tipc_printf(buf, "UNKNOWN TYPE:%x",msg_type(msg));
+			tipc_printf(buf, "UNKNOWN TYPE:%x", msg_type(msg));
 		}
 		break;
 	case ROUTE_DISTRIBUTOR:
@@ -280,26 +280,26 @@ void tipc_msg_dbg(struct print_buf *buf, struct tipc_msg *msg, const char *str)
 		switch (msg_type(msg)) {
 		case EXT_ROUTING_TABLE:
 			tipc_printf(buf, "EXT_TBL:");
-			tipc_printf(buf, "TO:%x:",msg_remote_node(msg));
+			tipc_printf(buf, "TO:%x:", msg_remote_node(msg));
 			break;
 		case LOCAL_ROUTING_TABLE:
 			tipc_printf(buf, "LOCAL_TBL:");
-			tipc_printf(buf, "TO:%x:",msg_remote_node(msg));
+			tipc_printf(buf, "TO:%x:", msg_remote_node(msg));
 			break;
 		case SLAVE_ROUTING_TABLE:
 			tipc_printf(buf, "DP_TBL:");
-			tipc_printf(buf, "TO:%x:",msg_remote_node(msg));
+			tipc_printf(buf, "TO:%x:", msg_remote_node(msg));
 			break;
 		case ROUTE_ADDITION:
 			tipc_printf(buf, "ADD:");
-			tipc_printf(buf, "TO:%x:",msg_remote_node(msg));
+			tipc_printf(buf, "TO:%x:", msg_remote_node(msg));
 			break;
 		case ROUTE_REMOVAL:
 			tipc_printf(buf, "REMOVE:");
-			tipc_printf(buf, "TO:%x:",msg_remote_node(msg));
+			tipc_printf(buf, "TO:%x:", msg_remote_node(msg));
 			break;
 		default:
-			tipc_printf(buf, "UNKNOWN TYPE:%x",msg_type(msg));
+			tipc_printf(buf, "UNKNOWN TYPE:%x", msg_type(msg));
 		}
 		break;
 	case LINK_CONFIG:
@@ -312,7 +312,7 @@ void tipc_msg_dbg(struct print_buf *buf, struct tipc_msg *msg, const char *str)
 			tipc_printf(buf, "DSC_RESP:");
 			break;
 		default:
-			tipc_printf(buf, "UNKNOWN TYPE:%x:",msg_type(msg));
+			tipc_printf(buf, "UNKNOWN TYPE:%x:", msg_type(msg));
 			break;
 		}
 		break;
@@ -393,8 +393,8 @@ void tipc_msg_dbg(struct print_buf *buf, struct tipc_msg *msg, const char *str)
 	}
 
 	if (msg_user(msg) ==  LINK_CONFIG) {
-		u32* raw = (u32*)msg;
-		struct tipc_media_addr* orig = (struct tipc_media_addr*)&raw[5];
+		u32 *raw = (u32 *)msg;
+		struct tipc_media_addr *orig = (struct tipc_media_addr *)&raw[5];
 		tipc_printf(buf, ":REQL(%u):", msg_req_links(msg));
 		tipc_printf(buf, ":DDOM(%x):", msg_dest_domain(msg));
 		tipc_printf(buf, ":NETID(%u):", msg_bc_netid(msg));
diff --git a/net/tipc/msg.h b/net/tipc/msg.h
index 68b65ef3e74b..92c4c4fd7b3f 100644
--- a/net/tipc/msg.h
+++ b/net/tipc/msg.h
@@ -174,7 +174,7 @@ static inline u32 msg_hdr_sz(struct tipc_msg *m)
 	return msg_bits(m, 0, 21, 0xf) << 2;
 }
 
-static inline void msg_set_hdr_sz(struct tipc_msg *m,u32 n)
+static inline void msg_set_hdr_sz(struct tipc_msg *m, u32 n)
 {
 	msg_set_bits(m, 0, 21, 0xf, n>>2);
 }
@@ -425,7 +425,7 @@ static inline u32 msg_routed(struct tipc_msg *m)
 {
 	if (likely(msg_short(m)))
 		return 0;
-	return(msg_destnode(m) ^ msg_orignode(m)) >> 11;
+	return (msg_destnode(m) ^ msg_orignode(m)) >> 11;
 }
 
 static inline u32 msg_nametype(struct tipc_msg *m)
@@ -863,7 +863,7 @@ void tipc_msg_init(struct tipc_msg *m, u32 user, u32 type,
 int tipc_msg_calc_data_size(struct iovec const *msg_sect, u32 num_sect);
 int tipc_msg_build(struct tipc_msg *hdr,
 			    struct iovec const *msg_sect, u32 num_sect,
-			    int max_size, int usrmem, struct sk_buff** buf);
+			    int max_size, int usrmem, struct sk_buff **buf);
 
 static inline void msg_set_media_addr(struct tipc_msg *m, struct tipc_media_addr *a)
 {
@@ -872,7 +872,7 @@ static inline void msg_set_media_addr(struct tipc_msg *m, struct tipc_media_addr
 
 static inline void msg_get_media_addr(struct tipc_msg *m, struct tipc_media_addr *a)
 {
-	memcpy(a, &((int*)m)[5], sizeof(*a));
+	memcpy(a, &((int *)m)[5], sizeof(*a));
 }
 
 #endif
diff --git a/net/tipc/name_table.c b/net/tipc/name_table.c
index ddc2ad4c2d32..46b2f31f96a1 100644
--- a/net/tipc/name_table.c
+++ b/net/tipc/name_table.c
@@ -296,8 +296,8 @@ static struct publication *tipc_nameseq_insert_publ(struct name_seq *nseq,
 
 		sseq = &nseq->sseqs[inspos];
 		freesseq = &nseq->sseqs[nseq->first_free];
-		memmove(sseq + 1, sseq, (freesseq - sseq) * sizeof (*sseq));
-		memset(sseq, 0, sizeof (*sseq));
+		memmove(sseq + 1, sseq, (freesseq - sseq) * sizeof(*sseq));
+		memset(sseq, 0, sizeof(*sseq));
 		nseq->first_free++;
 		sseq->lower = lower;
 		sseq->upper = upper;
@@ -471,7 +471,7 @@ end_node:
 
 	if (!sseq->zone_list) {
 		free = &nseq->sseqs[nseq->first_free--];
-		memmove(sseq, sseq + 1, (free - (sseq + 1)) * sizeof (*sseq));
+		memmove(sseq, sseq + 1, (free - (sseq + 1)) * sizeof(*sseq));
 		removed_subseq = 1;
 	}
 
@@ -507,7 +507,7 @@ static void tipc_nameseq_subscribe(struct name_seq *nseq, struct subscription *s
 
 	while (sseq != &nseq->sseqs[nseq->first_free]) {
 		struct publication *zl = sseq->zone_list;
-		if (zl && tipc_subscr_overlap(s,sseq->lower,sseq->upper)) {
+		if (zl && tipc_subscr_overlap(s, sseq->lower, sseq->upper)) {
 			struct publication *crs = zl;
 			int must_report = 1;
 
@@ -798,7 +798,7 @@ void tipc_nametbl_subscribe(struct subscription *s)
 	if (!seq) {
 		seq = tipc_nameseq_create(type, &table.types[hash(type)]);
 	}
-	if (seq){
+	if (seq) {
 		spin_lock_bh(&seq->lock);
 		tipc_nameseq_subscribe(seq, s);
 		spin_unlock_bh(&seq->lock);
@@ -819,7 +819,7 @@ void tipc_nametbl_unsubscribe(struct subscription *s)
 
 	write_lock_bh(&tipc_nametbl_lock);
 	seq = nametbl_find_seq(s->seq.type);
-	if (seq != NULL){
+	if (seq != NULL) {
 		spin_lock_bh(&seq->lock);
 		list_del_init(&s->nameseq_list);
 		spin_unlock_bh(&seq->lock);
@@ -852,7 +852,7 @@ static void subseq_list(struct sub_seq *sseq, struct print_buf *buf, u32 depth,
 	}
 
 	do {
-		sprintf (portIdStr, "<%u.%u.%u:%u>",
+		sprintf(portIdStr, "<%u.%u.%u:%u>",
 			 tipc_zone(publ->node), tipc_cluster(publ->node),
 			 tipc_node(publ->node), publ->ref);
 		tipc_printf(buf, "%-26s ", portIdStr);
diff --git a/net/tipc/name_table.h b/net/tipc/name_table.h
index 139882d4ed00..d228bd682655 100644
--- a/net/tipc/name_table.h
+++ b/net/tipc/name_table.h
@@ -46,7 +46,7 @@ struct port_list;
  * TIPC name types reserved for internal TIPC use (both current and planned)
  */
 
-#define TIPC_ZM_SRV 3  		/* zone master service name type */
+#define TIPC_ZM_SRV 3		/* zone master service name type */
 
 
 /**
diff --git a/net/tipc/node.c b/net/tipc/node.c
index 126d774883dd..2ed162f91a08 100644
--- a/net/tipc/node.c
+++ b/net/tipc/node.c
@@ -334,7 +334,7 @@ static void node_lost_contact(struct tipc_node *n_ptr)
 	/* Clean up broadcast reception remains */
 	n_ptr->bclink.gap_after = n_ptr->bclink.gap_to = 0;
 	while (n_ptr->bclink.deferred_head) {
-		struct sk_buff* buf = n_ptr->bclink.deferred_head;
+		struct sk_buff *buf = n_ptr->bclink.deferred_head;
 		n_ptr->bclink.deferred_head = buf->next;
 		buf_discard(buf);
 	}
diff --git a/net/tipc/port.c b/net/tipc/port.c
index db14b7e0afdc..78fcb75bb082 100644
--- a/net/tipc/port.c
+++ b/net/tipc/port.c
@@ -54,8 +54,8 @@ static DEFINE_SPINLOCK(queue_lock);
 
 static LIST_HEAD(ports);
 static void port_handle_node_down(unsigned long ref);
-static struct sk_buff* port_build_self_abort_msg(struct port *,u32 err);
-static struct sk_buff* port_build_peer_abort_msg(struct port *,u32 err);
+static struct sk_buff *port_build_self_abort_msg(struct port *, u32 err);
+static struct sk_buff *port_build_peer_abort_msg(struct port *, u32 err);
 static void port_timeout(unsigned long ref);
 
 
@@ -155,7 +155,7 @@ int tipc_multicast(u32 ref, struct tipc_name_seq const *seq,
 
 void tipc_port_recv_mcast(struct sk_buff *buf, struct port_list *dp)
 {
-	struct tipc_msg* msg;
+	struct tipc_msg *msg;
 	struct port_list dports = {0, NULL, };
 	struct port_list *item = dp;
 	int cnt = 0;
@@ -193,7 +193,7 @@ void tipc_port_recv_mcast(struct sk_buff *buf, struct port_list *dp)
 			if ((index == 0) && (cnt != 0)) {
 				item = item->next;
 			}
-			msg_set_destport(buf_msg(b),item->ports[index]);
+			msg_set_destport(buf_msg(b), item->ports[index]);
 			tipc_port_recv_msg(b);
 		}
 	}
@@ -484,7 +484,7 @@ static void port_timeout(unsigned long ref)
 static void port_handle_node_down(unsigned long ref)
 {
 	struct port *p_ptr = tipc_port_lock(ref);
-	struct sk_buff* buf = NULL;
+	struct sk_buff *buf = NULL;
 
 	if (!p_ptr)
 		return;
@@ -620,8 +620,7 @@ static void port_print(struct port *p_ptr, struct print_buf *buf, int full_id)
 			tipc_printf(buf, " via {%u,%u}",
 				    p_ptr->publ.conn_type,
 				    p_ptr->publ.conn_instance);
-	}
-	else if (p_ptr->publ.published) {
+	} else if (p_ptr->publ.published) {
 		tipc_printf(buf, " bound to");
 		list_for_each_entry(publ, &p_ptr->publications, pport_list) {
 			if (publ->lower == publ->upper)
@@ -1099,7 +1098,7 @@ int tipc_connect2port(u32 ref, struct tipc_portid const *peer)
 	p_ptr->publ.connected = 1;
 	k_start_timer(&p_ptr->timer, p_ptr->probing_interval);
 
-	tipc_nodesub_subscribe(&p_ptr->subscription,peer->node,
+	tipc_nodesub_subscribe(&p_ptr->subscription, peer->node,
 			  (void *)(unsigned long)ref,
 			  (net_ev_handler)port_handle_node_down);
 	res = 0;
diff --git a/net/tipc/port.h b/net/tipc/port.h
index a9c528799f2f..8e84b989949c 100644
--- a/net/tipc/port.h
+++ b/net/tipc/port.h
@@ -258,7 +258,7 @@ static inline void tipc_port_unlock(struct port *p_ptr)
 	spin_unlock_bh(p_ptr->publ.lock);
 }
 
-static inline struct port* tipc_port_deref(u32 ref)
+static inline struct port *tipc_port_deref(u32 ref)
 {
 	return (struct port *)tipc_ref_deref(ref);
 }
diff --git a/net/tipc/ref.c b/net/tipc/ref.c
index ab8ad32d8c20..3974529a66e7 100644
--- a/net/tipc/ref.c
+++ b/net/tipc/ref.c
@@ -178,14 +178,12 @@ u32 tipc_ref_acquire(void *object, spinlock_t **lock)
 		next_plus_upper = entry->ref;
 		tipc_ref_table.first_free = next_plus_upper & index_mask;
 		ref = (next_plus_upper & ~index_mask) + index;
-	}
-	else if (tipc_ref_table.init_point < tipc_ref_table.capacity) {
+	} else if (tipc_ref_table.init_point < tipc_ref_table.capacity) {
 		index = tipc_ref_table.init_point++;
 		entry = &(tipc_ref_table.entries[index]);
 		spin_lock_init(&entry->lock);
 		ref = tipc_ref_table.start_mask + index;
-	}
-	else {
+	} else {
 		ref = 0;
 	}
 	write_unlock_bh(&ref_table_lock);
diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index 1a2eb23c6223..e9fc5df79eb0 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -375,7 +375,7 @@ static int bind(struct socket *sock, struct sockaddr *uaddr, int uaddr_len)
  *
  * NOTE: This routine doesn't need to take the socket lock since it only
  *       accesses socket information that is unchanging (or which changes in
- * 	 a completely predictable manner).
+ *       a completely predictable manner).
  */
 
 static int get_name(struct socket *sock, struct sockaddr *uaddr,
@@ -570,14 +570,12 @@ static int send_msg(struct kiocb *iocb, struct socket *sock,
 					     dest->addr.name.domain,
 					     m->msg_iovlen,
 					     m->msg_iov);
-		}
-		else if (dest->addrtype == TIPC_ADDR_ID) {
+		} else if (dest->addrtype == TIPC_ADDR_ID) {
 			res = tipc_send2port(tport->ref,
 					     &dest->addr.id,
 					     m->msg_iovlen,
 					     m->msg_iov);
-		}
-		else if (dest->addrtype == TIPC_ADDR_MCAST) {
+		} else if (dest->addrtype == TIPC_ADDR_MCAST) {
 			if (needs_conn) {
 				res = -EOPNOTSUPP;
 				break;
@@ -812,8 +810,8 @@ static void set_orig_addr(struct msghdr *m, struct tipc_msg *msg)
 		addr->addrtype = TIPC_ADDR_ID;
 		addr->addr.id.ref = msg_origport(msg);
 		addr->addr.id.node = msg_orignode(msg);
-		addr->addr.name.domain = 0;   	/* could leave uninitialized */
-		addr->scope = 0;   		/* could leave uninitialized */
+		addr->addr.name.domain = 0;	/* could leave uninitialized */
+		addr->scope = 0;		/* could leave uninitialized */
 		m->msg_namelen = sizeof(struct sockaddr_tipc);
 	}
 }
@@ -1743,10 +1741,10 @@ static int getsockopt(struct socket *sock,
 		value = jiffies_to_msecs(tipc_sk(sk)->conn_timeout);
 		/* no need to set "res", since already 0 at this point */
 		break;
-	 case TIPC_NODE_RECVQ_DEPTH:
+	case TIPC_NODE_RECVQ_DEPTH:
 		value = (u32)atomic_read(&tipc_queue_size);
 		break;
-	 case TIPC_SOCK_RECVQ_DEPTH:
+	case TIPC_SOCK_RECVQ_DEPTH:
 		value = skb_queue_len(&sk->sk_receive_queue);
 		break;
 	default:
@@ -1772,7 +1770,7 @@ static int getsockopt(struct socket *sock,
  */
 
 static const struct proto_ops msg_ops = {
-	.owner 		= THIS_MODULE,
+	.owner		= THIS_MODULE,
 	.family		= AF_TIPC,
 	.release	= release,
 	.bind		= bind,
@@ -1793,7 +1791,7 @@ static const struct proto_ops msg_ops = {
 };
 
 static const struct proto_ops packet_ops = {
-	.owner 		= THIS_MODULE,
+	.owner		= THIS_MODULE,
 	.family		= AF_TIPC,
 	.release	= release,
 	.bind		= bind,
@@ -1814,7 +1812,7 @@ static const struct proto_ops packet_ops = {
 };
 
 static const struct proto_ops stream_ops = {
-	.owner 		= THIS_MODULE,
+	.owner		= THIS_MODULE,
 	.family		= AF_TIPC,
 	.release	= release,
 	.bind		= bind,
@@ -1835,7 +1833,7 @@ static const struct proto_ops stream_ops = {
 };
 
 static const struct net_proto_family tipc_family_ops = {
-	.owner 		= THIS_MODULE,
+	.owner		= THIS_MODULE,
 	.family		= AF_TIPC,
 	.create		= tipc_create
 };
diff --git a/net/tipc/subscr.c b/net/tipc/subscr.c
index 510271152165..acc30e1833c9 100644
--- a/net/tipc/subscr.c
+++ b/net/tipc/subscr.c
@@ -538,7 +538,7 @@ int tipc_subscr_start(void)
 	struct tipc_name_seq seq = {TIPC_TOP_SRV, TIPC_TOP_SRV, TIPC_TOP_SRV};
 	int res;
 
-	memset(&topsrv, 0, sizeof (topsrv));
+	memset(&topsrv, 0, sizeof(topsrv));
 	spin_lock_init(&topsrv.lock);
 	INIT_LIST_HEAD(&topsrv.subscriber_list);
 
-- 
cgit v1.2.3-71-gd317


From 68763c890eb2a60f9b50a061502f94e0cf20fdfe Mon Sep 17 00:00:00 2001
From: Michal Simek <monstr@monstr.eu>
Date: Sun, 2 Jan 2011 22:54:09 +0000
Subject: trivial: Fix typo fault in netdevice.h

Signed-off-by: Michal Simek <monstr@monstr.eu>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index cc916c5c3279..0f6b1c965815 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -732,7 +732,7 @@ struct xps_dev_maps {
  *	   neither operation.
  *
  * void (*ndo_vlan_rx_register)(struct net_device *dev, struct vlan_group *grp);
- *	If device support VLAN receive accleration
+ *	If device support VLAN receive acceleration
  *	(ie. dev->features & NETIF_F_HW_VLAN_RX), then this function is called
  *	when vlan groups for the device changes.  Note: grp is NULL
  *	if no vlan's groups are being used.
-- 
cgit v1.2.3-71-gd317


From 410cf2bd3dc6ec1ed9e1b36b25b9d7aa927ed14e Mon Sep 17 00:00:00 2001
From: Clemens Ladisch <clemens@ladisch.de>
Date: Mon, 13 Dec 2010 14:56:02 +0100
Subject: firewire: use split transaction timeout only for split transactions

Instead of starting the split transaction timeout timer when any request
is submitted, start it only when the destination's ACK_PENDING has been
received.  This prevents us from using a timeout that is too short, and,
if the controller's AT queue is emptying very slowly, from cancelling
a packet that has not yet been sent.

Signed-off-by: Clemens Ladisch <clemens@ladisch.de>
Signed-off-by: Stefan Richter <stefanr@s5r6.in-berlin.de>
---
 drivers/firewire/core-transaction.c | 45 ++++++++++++++++++++++++++-----------
 include/linux/firewire.h            |  2 +-
 2 files changed, 33 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/firewire/core-transaction.c b/drivers/firewire/core-transaction.c
index 9e81cc54abd2..d00f8ce902cc 100644
--- a/drivers/firewire/core-transaction.c
+++ b/drivers/firewire/core-transaction.c
@@ -72,6 +72,15 @@
 #define PHY_CONFIG_ROOT_ID(node_id)	((((node_id) & 0x3f) << 24) | (1 << 23))
 #define PHY_IDENTIFIER(id)		((id) << 30)
 
+/* returns 0 if the split timeout handler is already running */
+static int try_cancel_split_timeout(struct fw_transaction *t)
+{
+	if (t->is_split_transaction)
+		return del_timer(&t->split_timeout_timer);
+	else
+		return 1;
+}
+
 static int close_transaction(struct fw_transaction *transaction,
 			     struct fw_card *card, int rcode)
 {
@@ -81,7 +90,7 @@ static int close_transaction(struct fw_transaction *transaction,
 	spin_lock_irqsave(&card->lock, flags);
 	list_for_each_entry(t, &card->transaction_list, link) {
 		if (t == transaction) {
-			if (!del_timer(&t->split_timeout_timer)) {
+			if (!try_cancel_split_timeout(t)) {
 				spin_unlock_irqrestore(&card->lock, flags);
 				goto timed_out;
 			}
@@ -141,16 +150,28 @@ static void split_transaction_timeout_callback(unsigned long data)
 	card->tlabel_mask &= ~(1ULL << t->tlabel);
 	spin_unlock_irqrestore(&card->lock, flags);
 
-	card->driver->cancel_packet(card, &t->packet);
-
-	/*
-	 * At this point cancel_packet will never call the transaction
-	 * callback, since we just took the transaction out of the list.
-	 * So do it here.
-	 */
 	t->callback(card, RCODE_CANCELLED, NULL, 0, t->callback_data);
 }
 
+static void start_split_transaction_timeout(struct fw_transaction *t,
+					    struct fw_card *card)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&card->lock, flags);
+
+	if (list_empty(&t->link) || WARN_ON(t->is_split_transaction)) {
+		spin_unlock_irqrestore(&card->lock, flags);
+		return;
+	}
+
+	t->is_split_transaction = true;
+	mod_timer(&t->split_timeout_timer,
+		  jiffies + card->split_timeout_jiffies);
+
+	spin_unlock_irqrestore(&card->lock, flags);
+}
+
 static void transmit_complete_callback(struct fw_packet *packet,
 				       struct fw_card *card, int status)
 {
@@ -162,7 +183,7 @@ static void transmit_complete_callback(struct fw_packet *packet,
 		close_transaction(t, card, RCODE_COMPLETE);
 		break;
 	case ACK_PENDING:
-		t->timestamp = packet->timestamp;
+		start_split_transaction_timeout(t, card);
 		break;
 	case ACK_BUSY_X:
 	case ACK_BUSY_A:
@@ -349,11 +370,9 @@ void fw_send_request(struct fw_card *card, struct fw_transaction *t, int tcode,
 	t->node_id = destination_id;
 	t->tlabel = tlabel;
 	t->card = card;
+	t->is_split_transaction = false;
 	setup_timer(&t->split_timeout_timer,
 		    split_transaction_timeout_callback, (unsigned long)t);
-	/* FIXME: start this timer later, relative to t->timestamp */
-	mod_timer(&t->split_timeout_timer,
-		  jiffies + card->split_timeout_jiffies);
 	t->callback = callback;
 	t->callback_data = callback_data;
 
@@ -926,7 +945,7 @@ void fw_core_handle_response(struct fw_card *card, struct fw_packet *p)
 	spin_lock_irqsave(&card->lock, flags);
 	list_for_each_entry(t, &card->transaction_list, link) {
 		if (t->node_id == source && t->tlabel == tlabel) {
-			if (!del_timer(&t->split_timeout_timer)) {
+			if (!try_cancel_split_timeout(t)) {
 				spin_unlock_irqrestore(&card->lock, flags);
 				goto timed_out;
 			}
diff --git a/include/linux/firewire.h b/include/linux/firewire.h
index 1cd637ef62d2..9a3f5f9383f6 100644
--- a/include/linux/firewire.h
+++ b/include/linux/firewire.h
@@ -302,9 +302,9 @@ struct fw_packet {
 struct fw_transaction {
 	int node_id; /* The generation is implied; it is always the current. */
 	int tlabel;
-	int timestamp;
 	struct list_head link;
 	struct fw_card *card;
+	bool is_split_transaction;
 	struct timer_list split_timeout_timer;
 
 	struct fw_packet packet;
-- 
cgit v1.2.3-71-gd317


From 7b26e5ebd8b27b0126a84ae7f9a42aa8293d6c48 Mon Sep 17 00:00:00 2001
From: Francois-Xavier Le Bail <fx.lebail@orange.fr>
Date: Tue, 4 Jan 2011 09:10:20 +0000
Subject: net: typos in comments in include/linux/igmp.h

There are typos in comments in include/linux/igmp.h:

83 #define IGMP_HOST_MEMBERSHIP_QUERY      0x11    /* From RFC1112 */
84 #define IGMP_HOST_MEMBERSHIP_REPORT     0x12    /* Ditto */
[snip]
88 #define IGMPV2_HOST_MEMBERSHIP_REPORT   0x16    /* V2 version of 0x11 */
89 #define IGMP_HOST_LEAVE_MESSAGE         0x17
90 #define IGMPV3_HOST_MEMBERSHIP_REPORT   0x22    /* V3 version of 0x11 */

The line 88 and 90 are about REPORT messages.
The IGMP_HOST_MEMBERSHIP_REPORT (IGMP V1) value is 0x12.
So the comment on line 88 must be /* V2 version of 0x12 */,
and the comment on line 90 must be /* V3 version of 0x12 */.

Signed-off-by: Francois-Xavier Le Bail <fx.lebail@orange.fr>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/igmp.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/igmp.h b/include/linux/igmp.h
index c4987f265109..74cfcff0148b 100644
--- a/include/linux/igmp.h
+++ b/include/linux/igmp.h
@@ -85,9 +85,9 @@ struct igmpv3_query {
 #define IGMP_DVMRP			0x13	/* DVMRP routing */
 #define IGMP_PIM			0x14	/* PIM routing */
 #define IGMP_TRACE			0x15
-#define IGMPV2_HOST_MEMBERSHIP_REPORT	0x16	/* V2 version of 0x11 */
+#define IGMPV2_HOST_MEMBERSHIP_REPORT	0x16	/* V2 version of 0x12 */
 #define IGMP_HOST_LEAVE_MESSAGE 	0x17
-#define IGMPV3_HOST_MEMBERSHIP_REPORT	0x22	/* V3 version of 0x11 */
+#define IGMPV3_HOST_MEMBERSHIP_REPORT	0x22	/* V3 version of 0x12 */
 
 #define IGMP_MTRACE_RESP		0x1e
 #define IGMP_MTRACE			0x1f
-- 
cgit v1.2.3-71-gd317


From 8d608aa6295242fe4c4b6105b8c59c6a5b232d89 Mon Sep 17 00:00:00 2001
From: Dave Airlie <airlied@redhat.com>
Date: Tue, 7 Dec 2010 08:57:57 +1000
Subject: vga_switcheroo: add reprobe hook for fbcon to recheck connected
 outputs.

This adds a hook after the mux is switched for the driver to reprobe
the connected outputs.

Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 drivers/gpu/drm/i915/i915_dma.c         | 1 +
 drivers/gpu/drm/nouveau/nouveau_state.c | 7 +++++++
 drivers/gpu/drm/radeon/radeon_device.c  | 1 +
 drivers/gpu/vga/vga_switcheroo.c        | 6 ++++++
 include/linux/vga_switcheroo.h          | 1 +
 5 files changed, 16 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
index 8cb7f93732c2..ec1f650f6fab 100644
--- a/drivers/gpu/drm/i915/i915_dma.c
+++ b/drivers/gpu/drm/i915/i915_dma.c
@@ -1231,6 +1231,7 @@ static int i915_load_modeset_init(struct drm_device *dev)
 
 	ret = vga_switcheroo_register_client(dev->pdev,
 					     i915_switcheroo_set_state,
+					     NULL,
 					     i915_switcheroo_can_switch);
 	if (ret)
 		goto cleanup_vga_client;
diff --git a/drivers/gpu/drm/nouveau/nouveau_state.c b/drivers/gpu/drm/nouveau/nouveau_state.c
index db4b410f4a8c..1b87eee22fa9 100644
--- a/drivers/gpu/drm/nouveau/nouveau_state.c
+++ b/drivers/gpu/drm/nouveau/nouveau_state.c
@@ -605,6 +605,12 @@ static void nouveau_switcheroo_set_state(struct pci_dev *pdev,
 	}
 }
 
+static void nouveau_switcheroo_reprobe(struct pci_dev *pdev)
+{
+	struct drm_device *dev = pci_get_drvdata(pdev);
+	nouveau_fbcon_output_poll_changed(dev);
+}
+
 static bool nouveau_switcheroo_can_switch(struct pci_dev *pdev)
 {
 	struct drm_device *dev = pci_get_drvdata(pdev);
@@ -625,6 +631,7 @@ nouveau_card_init(struct drm_device *dev)
 
 	vga_client_register(dev->pdev, dev, NULL, nouveau_vga_set_decode);
 	vga_switcheroo_register_client(dev->pdev, nouveau_switcheroo_set_state,
+				       nouveau_switcheroo_reprobe,
 				       nouveau_switcheroo_can_switch);
 
 	/* Initialise internal driver API hooks */
diff --git a/drivers/gpu/drm/radeon/radeon_device.c b/drivers/gpu/drm/radeon/radeon_device.c
index 96f0eb484ce6..1a1017f0d9db 100644
--- a/drivers/gpu/drm/radeon/radeon_device.c
+++ b/drivers/gpu/drm/radeon/radeon_device.c
@@ -779,6 +779,7 @@ int radeon_device_init(struct radeon_device *rdev,
 	vga_client_register(rdev->pdev, rdev, NULL, radeon_vga_set_decode);
 	vga_switcheroo_register_client(rdev->pdev,
 				       radeon_switcheroo_set_state,
+				       NULL,
 				       radeon_switcheroo_can_switch);
 
 	r = radeon_init(rdev);
diff --git a/drivers/gpu/vga/vga_switcheroo.c b/drivers/gpu/vga/vga_switcheroo.c
index 463691a1650e..2b8e1d25e8d0 100644
--- a/drivers/gpu/vga/vga_switcheroo.c
+++ b/drivers/gpu/vga/vga_switcheroo.c
@@ -33,6 +33,7 @@ struct vga_switcheroo_client {
 	struct fb_info *fb_info;
 	int pwr_state;
 	void (*set_gpu_state)(struct pci_dev *pdev, enum vga_switcheroo_state);
+	void (*reprobe)(struct pci_dev *pdev);
 	bool (*can_switch)(struct pci_dev *pdev);
 	int id;
 	bool active;
@@ -103,6 +104,7 @@ static void vga_switcheroo_enable(void)
 
 int vga_switcheroo_register_client(struct pci_dev *pdev,
 				   void (*set_gpu_state)(struct pci_dev *pdev, enum vga_switcheroo_state),
+				   void (*reprobe)(struct pci_dev *pdev),
 				   bool (*can_switch)(struct pci_dev *pdev))
 {
 	int index;
@@ -117,6 +119,7 @@ int vga_switcheroo_register_client(struct pci_dev *pdev,
 	vgasr_priv.clients[index].pwr_state = VGA_SWITCHEROO_ON;
 	vgasr_priv.clients[index].pdev = pdev;
 	vgasr_priv.clients[index].set_gpu_state = set_gpu_state;
+	vgasr_priv.clients[index].reprobe = reprobe;
 	vgasr_priv.clients[index].can_switch = can_switch;
 	vgasr_priv.clients[index].id = -1;
 	if (pdev->resource[PCI_ROM_RESOURCE].flags & IORESOURCE_ROM_SHADOW)
@@ -251,6 +254,9 @@ static int vga_switchto(struct vga_switcheroo_client *new_client)
 	if (ret)
 		return ret;
 
+	if (new_client->reprobe)
+		new_client->reprobe(new_client->pdev);
+
 	if (active->pwr_state == VGA_SWITCHEROO_ON)
 		vga_switchoff(active);
 
diff --git a/include/linux/vga_switcheroo.h b/include/linux/vga_switcheroo.h
index ae9ab13b963d..f80fa9de5b82 100644
--- a/include/linux/vga_switcheroo.h
+++ b/include/linux/vga_switcheroo.h
@@ -33,6 +33,7 @@ struct vga_switcheroo_handler {
 void vga_switcheroo_unregister_client(struct pci_dev *dev);
 int vga_switcheroo_register_client(struct pci_dev *dev,
 				   void (*set_gpu_state)(struct pci_dev *dev, enum vga_switcheroo_state),
+				   void (*reprobe)(struct pci_dev *dev),
 				   bool (*can_switch)(struct pci_dev *dev));
 
 void vga_switcheroo_client_fb_set(struct pci_dev *dev,
-- 
cgit v1.2.3-71-gd317


From 6bf4123760a5aece6e4829ce90b70b6ffd751d65 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.de>
Date: Wed, 5 Jan 2011 12:50:16 +1100
Subject: sched: Change wait_for_completion_*_timeout() to return a signed long

wait_for_completion_*_timeout() can return:

   0: if the wait timed out
 -ve: if the wait was interrupted
 +ve: if the completion was completed.

As they currently return an 'unsigned long', the last two cases
are not easily distinguished which can easily result in buggy
code, as is the case for the recently added
wait_for_completion_interruptible_timeout() call in
net/sunrpc/cache.c

So change them both to return 'long'.  As MAX_SCHEDULE_TIMEOUT
is LONG_MAX, a large +ve return value should never overflow.

Signed-off-by: NeilBrown <neilb@suse.de>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: J.  Bruce Fields <bfields@fieldses.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
LKML-Reference: <20110105125016.64ccab0e@notabene.brown>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/completion.h | 8 ++++----
 kernel/sched.c             | 4 ++--
 2 files changed, 6 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/completion.h b/include/linux/completion.h
index 36d57f74cd01..51494e6b5548 100644
--- a/include/linux/completion.h
+++ b/include/linux/completion.h
@@ -81,10 +81,10 @@ extern int wait_for_completion_interruptible(struct completion *x);
 extern int wait_for_completion_killable(struct completion *x);
 extern unsigned long wait_for_completion_timeout(struct completion *x,
 						   unsigned long timeout);
-extern unsigned long wait_for_completion_interruptible_timeout(
-			struct completion *x, unsigned long timeout);
-extern unsigned long wait_for_completion_killable_timeout(
-			struct completion *x, unsigned long timeout);
+extern long wait_for_completion_interruptible_timeout(
+	struct completion *x, unsigned long timeout);
+extern long wait_for_completion_killable_timeout(
+	struct completion *x, unsigned long timeout);
 extern bool try_wait_for_completion(struct completion *x);
 extern bool completion_done(struct completion *x);
 
diff --git a/kernel/sched.c b/kernel/sched.c
index f2f914e0c47c..114a0deb2b01 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -4395,7 +4395,7 @@ EXPORT_SYMBOL(wait_for_completion_interruptible);
  * This waits for either a completion of a specific task to be signaled or for a
  * specified timeout to expire. It is interruptible. The timeout is in jiffies.
  */
-unsigned long __sched
+long __sched
 wait_for_completion_interruptible_timeout(struct completion *x,
 					  unsigned long timeout)
 {
@@ -4428,7 +4428,7 @@ EXPORT_SYMBOL(wait_for_completion_killable);
  * signaled or for a specified timeout to expire. It can be
  * interrupted by a kill signal. The timeout is in jiffies.
  */
-unsigned long __sched
+long __sched
 wait_for_completion_killable_timeout(struct completion *x,
 				     unsigned long timeout)
 {
-- 
cgit v1.2.3-71-gd317


From 68b65f7305e54b822b2483c60de7d7b017526a92 Mon Sep 17 00:00:00 2001
From: Russell King <rmk+kernel@arm.linux.org.uk>
Date: Wed, 22 Dec 2010 17:24:39 +0000
Subject: ARM: PL011: Add support for transmit DMA

Add DMA engine support for transmit to the PL011 driver.  Based on a
patch from Linus Walliej, with the following changes:

- remove RX DMA support.  As PL011 doesn't give us receive timeout
  interrupts, we only get notified of received data when the RX DMA
  has completed.  This rather sucks for interactive use of the TTY.

- remove abuse of completions.  Completions are supposed to be for
  events, not to tell what condition buffers are in.  Replace it with
  a simple 'queued' bool.

- fix locking - it is only safe to access the circular buffer with the
  port lock held.

- only map the DMA buffer when required - if we're ever behind an IOMMU
  this helps keep IOMMU usage down, and also ensures that we're legal
  when we change the scatterlist entry length.

- fix XON/XOFF sending - we must send XON/XOFF characters out as soon
  as possible - waiting for up to 4095 characters in the DMA buffer
  to be sent first is not acceptable.

- fix XON/XOFF receive handling - we need to stop DMA when instructed
  to by the TTY layer, and restart it again when instructed to.  There
  is a subtle problem here: we must not completely empty the circular
  buffer with DMA, otherwise we will not be notified of XON.

- change the 'enable_dma' flag into a 'using DMA' flag, and track
  whether we can use TX DMA by whether the channel pointer is non-NULL.
  This gives us more control over whether we use DMA in the driver.

- we don't need to have the TX DMA buffer continually allocated for
  each port - instead, allocate it when the port starts up, and free
  it when it's shut down.  Update the 'using DMA' flag if we get
  the buffer, and adjust the TTY FIFO size appropriately.

- if we're going to use PIO to send characters, use the existing IRQ
  based functionality rather than reimplementing it.  This also ensures
  we call uart_write_wakeup() at the appropriate time, otherwise we'll
  stall.

- use DMA engine helper functions for type safety.

- fix init when built as a module - we can't have to initcall functions,
  so we must settle on one.  This means we can eliminate the deferred
  DMA initialization.

- there is no need to terminate transfers on a failed prep_slave_sg()
  call - nothing has been setup, so nothing needs to be terminated.
  This avoids a potential deadlock in the DMA engine code
  (tasklet->callback->failed prepare->terminate->tasklet_disable
   which then ends up waiting for the tasklet to finish running.)

- Dan says that the submission callback should not return an error:
  | dma_submit_error() is something I should have removed after commit
  | a0587bcf "ioat1: move descriptor allocation from submit to prep" all
  | errors should be notified by prep failing to return a descriptor
  | handle.  Negative dma_cookie_t values are only returned by the
  | dma_async_memcpy* calls which translate a prep failure into -ENOMEM.
  So remove the error handling at that point.  This also solves the
  potential deadlock mentioned in the previous comment.

Acked-by: Linus Walleij <linus.walleij@stericsson.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 drivers/serial/amba-pl011.c | 508 +++++++++++++++++++++++++++++++++++++++++++-
 include/linux/amba/serial.h |   7 +
 2 files changed, 513 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/serial/amba-pl011.c b/drivers/serial/amba-pl011.c
index f741a8b51400..ab025dc52fa4 100644
--- a/drivers/serial/amba-pl011.c
+++ b/drivers/serial/amba-pl011.c
@@ -7,6 +7,7 @@
  *
  *  Copyright 1999 ARM Limited
  *  Copyright (C) 2000 Deep Blue Solutions Ltd.
+ *  Copyright (C) 2010 ST-Ericsson SA
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
@@ -48,6 +49,9 @@
 #include <linux/amba/serial.h>
 #include <linux/clk.h>
 #include <linux/slab.h>
+#include <linux/dmaengine.h>
+#include <linux/dma-mapping.h>
+#include <linux/scatterlist.h>
 
 #include <asm/io.h>
 #include <asm/sizes.h>
@@ -88,6 +92,14 @@ static struct vendor_data vendor_st = {
 	.oversampling		= true,
 };
 
+/* Deals with DMA transactions */
+struct pl011_dmatx_data {
+	struct dma_chan		*chan;
+	struct scatterlist	sg;
+	char			*buf;
+	bool			queued;
+};
+
 /*
  * We wrap our port structure around the generic uart_port.
  */
@@ -95,6 +107,7 @@ struct uart_amba_port {
 	struct uart_port	port;
 	struct clk		*clk;
 	const struct vendor_data *vendor;
+	unsigned int		dmacr;		/* dma control reg */
 	unsigned int		im;		/* interrupt mask */
 	unsigned int		old_status;
 	unsigned int		fifosize;	/* vendor-specific */
@@ -102,22 +115,500 @@ struct uart_amba_port {
 	unsigned int		lcrh_rx;	/* vendor-specific */
 	bool			autorts;
 	char			type[12];
+#ifdef CONFIG_DMA_ENGINE
+	/* DMA stuff */
+	bool			using_dma;
+	struct pl011_dmatx_data	dmatx;
+#endif
+};
+
+/*
+ * All the DMA operation mode stuff goes inside this ifdef.
+ * This assumes that you have a generic DMA device interface,
+ * no custom DMA interfaces are supported.
+ */
+#ifdef CONFIG_DMA_ENGINE
+
+#define PL011_DMA_BUFFER_SIZE PAGE_SIZE
+
+static void pl011_dma_probe_initcall(struct uart_amba_port *uap)
+{
+	/* DMA is the sole user of the platform data right now */
+	struct amba_pl011_data *plat = uap->port.dev->platform_data;
+	struct dma_slave_config tx_conf = {
+		.dst_addr = uap->port.mapbase + UART01x_DR,
+		.dst_addr_width = DMA_SLAVE_BUSWIDTH_1_BYTE,
+		.direction = DMA_TO_DEVICE,
+		.dst_maxburst = uap->fifosize >> 1,
+	};
+	struct dma_chan *chan;
+	dma_cap_mask_t mask;
+
+	/* We need platform data */
+	if (!plat || !plat->dma_filter) {
+		dev_info(uap->port.dev, "no DMA platform data\n");
+		return;
+	}
+
+	/* Try to acquire a generic DMA engine slave channel */
+	dma_cap_zero(mask);
+	dma_cap_set(DMA_SLAVE, mask);
+
+	chan = dma_request_channel(mask, plat->dma_filter, plat->dma_tx_param);
+	if (!chan) {
+		dev_err(uap->port.dev, "no TX DMA channel!\n");
+		return;
+	}
+
+	dmaengine_slave_config(chan, &tx_conf);
+	uap->dmatx.chan = chan;
+
+	dev_info(uap->port.dev, "DMA channel TX %s\n",
+		 dma_chan_name(uap->dmatx.chan));
+}
+
+#ifndef MODULE
+/*
+ * Stack up the UARTs and let the above initcall be done at device
+ * initcall time, because the serial driver is called as an arch
+ * initcall, and at this time the DMA subsystem is not yet registered.
+ * At this point the driver will switch over to using DMA where desired.
+ */
+struct dma_uap {
+	struct list_head node;
+	struct uart_amba_port *uap;
 };
 
+static LIST_HEAD(pl011_dma_uarts);
+
+static int __init pl011_dma_initcall(void)
+{
+	struct list_head *node, *tmp;
+
+	list_for_each_safe(node, tmp, &pl011_dma_uarts) {
+		struct dma_uap *dmau = list_entry(node, struct dma_uap, node);
+		pl011_dma_probe_initcall(dmau->uap);
+		list_del(node);
+		kfree(dmau);
+	}
+	return 0;
+}
+
+device_initcall(pl011_dma_initcall);
+
+static void pl011_dma_probe(struct uart_amba_port *uap)
+{
+	struct dma_uap *dmau = kzalloc(sizeof(struct dma_uap), GFP_KERNEL);
+	if (dmau) {
+		dmau->uap = uap;
+		list_add_tail(&dmau->node, &pl011_dma_uarts);
+	}
+}
+#else
+static void pl011_dma_probe(struct uart_amba_port *uap)
+{
+	pl011_dma_probe_initcall(uap);
+}
+#endif
+
+static void pl011_dma_remove(struct uart_amba_port *uap)
+{
+	/* TODO: remove the initcall if it has not yet executed */
+	if (uap->dmatx.chan)
+		dma_release_channel(uap->dmatx.chan);
+}
+
+
+/* Forward declare this for the refill routine */
+static int pl011_dma_tx_refill(struct uart_amba_port *uap);
+
+/*
+ * The current DMA TX buffer has been sent.
+ * Try to queue up another DMA buffer.
+ */
+static void pl011_dma_tx_callback(void *data)
+{
+	struct uart_amba_port *uap = data;
+	struct pl011_dmatx_data *dmatx = &uap->dmatx;
+	unsigned long flags;
+	u16 dmacr;
+
+	spin_lock_irqsave(&uap->port.lock, flags);
+	if (uap->dmatx.queued)
+		dma_unmap_sg(dmatx->chan->device->dev, &dmatx->sg, 1,
+			     DMA_TO_DEVICE);
+
+	dmacr = uap->dmacr;
+	uap->dmacr = dmacr & ~UART011_TXDMAE;
+	writew(uap->dmacr, uap->port.membase + UART011_DMACR);
+
+	/*
+	 * If TX DMA was disabled, it means that we've stopped the DMA for
+	 * some reason (eg, XOFF received, or we want to send an X-char.)
+	 *
+	 * Note: we need to be careful here of a potential race between DMA
+	 * and the rest of the driver - if the driver disables TX DMA while
+	 * a TX buffer completing, we must update the tx queued status to
+	 * get further refills (hence we check dmacr).
+	 */
+	if (!(dmacr & UART011_TXDMAE) || uart_tx_stopped(&uap->port) ||
+	    uart_circ_empty(&uap->port.state->xmit)) {
+		uap->dmatx.queued = false;
+		spin_unlock_irqrestore(&uap->port.lock, flags);
+		return;
+	}
+
+	if (pl011_dma_tx_refill(uap) <= 0) {
+		/*
+		 * We didn't queue a DMA buffer for some reason, but we
+		 * have data pending to be sent.  Re-enable the TX IRQ.
+		 */
+		uap->im |= UART011_TXIM;
+		writew(uap->im, uap->port.membase + UART011_IMSC);
+	}
+	spin_unlock_irqrestore(&uap->port.lock, flags);
+}
+
+/*
+ * Try to refill the TX DMA buffer.
+ * Locking: called with port lock held and IRQs disabled.
+ * Returns:
+ *   1 if we queued up a TX DMA buffer.
+ *   0 if we didn't want to handle this by DMA
+ *  <0 on error
+ */
+static int pl011_dma_tx_refill(struct uart_amba_port *uap)
+{
+	struct pl011_dmatx_data *dmatx = &uap->dmatx;
+	struct dma_chan *chan = dmatx->chan;
+	struct dma_device *dma_dev = chan->device;
+	struct dma_async_tx_descriptor *desc;
+	struct circ_buf *xmit = &uap->port.state->xmit;
+	unsigned int count;
+
+	/*
+	 * Try to avoid the overhead involved in using DMA if the
+	 * transaction fits in the first half of the FIFO, by using
+	 * the standard interrupt handling.  This ensures that we
+	 * issue a uart_write_wakeup() at the appropriate time.
+	 */
+	count = uart_circ_chars_pending(xmit);
+	if (count < (uap->fifosize >> 1)) {
+		uap->dmatx.queued = false;
+		return 0;
+	}
+
+	/*
+	 * Bodge: don't send the last character by DMA, as this
+	 * will prevent XON from notifying us to restart DMA.
+	 */
+	count -= 1;
+
+	/* Else proceed to copy the TX chars to the DMA buffer and fire DMA */
+	if (count > PL011_DMA_BUFFER_SIZE)
+		count = PL011_DMA_BUFFER_SIZE;
+
+	if (xmit->tail < xmit->head)
+		memcpy(&dmatx->buf[0], &xmit->buf[xmit->tail], count);
+	else {
+		size_t first = UART_XMIT_SIZE - xmit->tail;
+		size_t second = xmit->head;
+
+		memcpy(&dmatx->buf[0], &xmit->buf[xmit->tail], first);
+		if (second)
+			memcpy(&dmatx->buf[first], &xmit->buf[0], second);
+	}
+
+	dmatx->sg.length = count;
+
+	if (dma_map_sg(dma_dev->dev, &dmatx->sg, 1, DMA_TO_DEVICE) != 1) {
+		uap->dmatx.queued = false;
+		dev_dbg(uap->port.dev, "unable to map TX DMA\n");
+		return -EBUSY;
+	}
+
+	desc = dma_dev->device_prep_slave_sg(chan, &dmatx->sg, 1, DMA_TO_DEVICE,
+					     DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
+	if (!desc) {
+		dma_unmap_sg(dma_dev->dev, &dmatx->sg, 1, DMA_TO_DEVICE);
+		uap->dmatx.queued = false;
+		/*
+		 * If DMA cannot be used right now, we complete this
+		 * transaction via IRQ and let the TTY layer retry.
+		 */
+		dev_dbg(uap->port.dev, "TX DMA busy\n");
+		return -EBUSY;
+	}
+
+	/* Some data to go along to the callback */
+	desc->callback = pl011_dma_tx_callback;
+	desc->callback_param = uap;
+
+	/* All errors should happen at prepare time */
+	dmaengine_submit(desc);
+
+	/* Fire the DMA transaction */
+	dma_dev->device_issue_pending(chan);
+
+	uap->dmacr |= UART011_TXDMAE;
+	writew(uap->dmacr, uap->port.membase + UART011_DMACR);
+	uap->dmatx.queued = true;
+
+	/*
+	 * Now we know that DMA will fire, so advance the ring buffer
+	 * with the stuff we just dispatched.
+	 */
+	xmit->tail = (xmit->tail + count) & (UART_XMIT_SIZE - 1);
+	uap->port.icount.tx += count;
+
+	if (uart_circ_chars_pending(xmit) < WAKEUP_CHARS)
+		uart_write_wakeup(&uap->port);
+
+	return 1;
+}
+
+/*
+ * We received a transmit interrupt without a pending X-char but with
+ * pending characters.
+ * Locking: called with port lock held and IRQs disabled.
+ * Returns:
+ *   false if we want to use PIO to transmit
+ *   true if we queued a DMA buffer
+ */
+static bool pl011_dma_tx_irq(struct uart_amba_port *uap)
+{
+	if (!uap->using_dma)
+		return false;
+
+	/*
+	 * If we already have a TX buffer queued, but received a
+	 * TX interrupt, it will be because we've just sent an X-char.
+	 * Ensure the TX DMA is enabled and the TX IRQ is disabled.
+	 */
+	if (uap->dmatx.queued) {
+		uap->dmacr |= UART011_TXDMAE;
+		writew(uap->dmacr, uap->port.membase + UART011_DMACR);
+		uap->im &= ~UART011_TXIM;
+		writew(uap->im, uap->port.membase + UART011_IMSC);
+		return true;
+	}
+
+	/*
+	 * We don't have a TX buffer queued, so try to queue one.
+	 * If we succesfully queued a buffer, mask the TX IRQ.
+	 */
+	if (pl011_dma_tx_refill(uap) > 0) {
+		uap->im &= ~UART011_TXIM;
+		writew(uap->im, uap->port.membase + UART011_IMSC);
+		return true;
+	}
+	return false;
+}
+
+/*
+ * Stop the DMA transmit (eg, due to received XOFF).
+ * Locking: called with port lock held and IRQs disabled.
+ */
+static inline void pl011_dma_tx_stop(struct uart_amba_port *uap)
+{
+	if (uap->dmatx.queued) {
+		uap->dmacr &= ~UART011_TXDMAE;
+		writew(uap->dmacr, uap->port.membase + UART011_DMACR);
+	}
+}
+
+/*
+ * Try to start a DMA transmit, or in the case of an XON/OFF
+ * character queued for send, try to get that character out ASAP.
+ * Locking: called with port lock held and IRQs disabled.
+ * Returns:
+ *   false if we want the TX IRQ to be enabled
+ *   true if we have a buffer queued
+ */
+static inline bool pl011_dma_tx_start(struct uart_amba_port *uap)
+{
+	u16 dmacr;
+
+	if (!uap->using_dma)
+		return false;
+
+	if (!uap->port.x_char) {
+		/* no X-char, try to push chars out in DMA mode */
+		bool ret = true;
+
+		if (!uap->dmatx.queued) {
+			if (pl011_dma_tx_refill(uap) > 0) {
+				uap->im &= ~UART011_TXIM;
+				ret = true;
+			} else {
+				uap->im |= UART011_TXIM;
+				ret = false;
+			}
+			writew(uap->im, uap->port.membase + UART011_IMSC);
+		} else if (!(uap->dmacr & UART011_TXDMAE)) {
+			uap->dmacr |= UART011_TXDMAE;
+			writew(uap->dmacr,
+				       uap->port.membase + UART011_DMACR);
+		}
+		return ret;
+	}
+
+	/*
+	 * We have an X-char to send.  Disable DMA to prevent it loading
+	 * the TX fifo, and then see if we can stuff it into the FIFO.
+	 */
+	dmacr = uap->dmacr;
+	uap->dmacr &= ~UART011_TXDMAE;
+	writew(uap->dmacr, uap->port.membase + UART011_DMACR);
+
+	if (readw(uap->port.membase + UART01x_FR) & UART01x_FR_TXFF) {
+		/*
+		 * No space in the FIFO, so enable the transmit interrupt
+		 * so we know when there is space.  Note that once we've
+		 * loaded the character, we should just re-enable DMA.
+		 */
+		return false;
+	}
+
+	writew(uap->port.x_char, uap->port.membase + UART01x_DR);
+	uap->port.icount.tx++;
+	uap->port.x_char = 0;
+
+	/* Success - restore the DMA state */
+	uap->dmacr = dmacr;
+	writew(dmacr, uap->port.membase + UART011_DMACR);
+
+	return true;
+}
+
+/*
+ * Flush the transmit buffer.
+ * Locking: called with port lock held and IRQs disabled.
+ */
+static void pl011_dma_flush_buffer(struct uart_port *port)
+{
+	struct uart_amba_port *uap = (struct uart_amba_port *)port;
+
+	if (!uap->using_dma)
+		return;
+
+	/* Avoid deadlock with the DMA engine callback */
+	spin_unlock(&uap->port.lock);
+	dmaengine_terminate_all(uap->dmatx.chan);
+	spin_lock(&uap->port.lock);
+	if (uap->dmatx.queued) {
+		dma_unmap_sg(uap->dmatx.chan->device->dev, &uap->dmatx.sg, 1,
+			     DMA_TO_DEVICE);
+		uap->dmatx.queued = false;
+		uap->dmacr &= ~UART011_TXDMAE;
+		writew(uap->dmacr, uap->port.membase + UART011_DMACR);
+	}
+}
+
+
+static void pl011_dma_startup(struct uart_amba_port *uap)
+{
+	if (!uap->dmatx.chan)
+		return;
+
+	uap->dmatx.buf = kmalloc(PL011_DMA_BUFFER_SIZE, GFP_KERNEL);
+	if (!uap->dmatx.buf) {
+		dev_err(uap->port.dev, "no memory for DMA TX buffer\n");
+		uap->port.fifosize = uap->fifosize;
+		return;
+	}
+
+	sg_init_one(&uap->dmatx.sg, uap->dmatx.buf, PL011_DMA_BUFFER_SIZE);
+
+	/* The DMA buffer is now the FIFO the TTY subsystem can use */
+	uap->port.fifosize = PL011_DMA_BUFFER_SIZE;
+	uap->using_dma = true;
+
+	/* Turn on DMA error (RX/TX will be enabled on demand) */
+	uap->dmacr |= UART011_DMAONERR;
+	writew(uap->dmacr, uap->port.membase + UART011_DMACR);
+}
+
+static void pl011_dma_shutdown(struct uart_amba_port *uap)
+{
+	if (!uap->using_dma)
+		return;
+
+	/* Disable RX and TX DMA */
+	while (readw(uap->port.membase + UART01x_FR) & UART01x_FR_BUSY)
+		barrier();
+
+	spin_lock_irq(&uap->port.lock);
+	uap->dmacr &= ~(UART011_DMAONERR | UART011_RXDMAE | UART011_TXDMAE);
+	writew(uap->dmacr, uap->port.membase + UART011_DMACR);
+	spin_unlock_irq(&uap->port.lock);
+
+	/* In theory, this should already be done by pl011_dma_flush_buffer */
+	dmaengine_terminate_all(uap->dmatx.chan);
+	if (uap->dmatx.queued) {
+		dma_unmap_sg(uap->dmatx.chan->device->dev, &uap->dmatx.sg, 1,
+			     DMA_TO_DEVICE);
+		uap->dmatx.queued = false;
+	}
+
+	kfree(uap->dmatx.buf);
+
+	uap->using_dma = false;
+}
+
+#else
+/* Blank functions if the DMA engine is not available */
+static inline void pl011_dma_probe(struct uart_amba_port *uap)
+{
+}
+
+static inline void pl011_dma_remove(struct uart_amba_port *uap)
+{
+}
+
+static inline void pl011_dma_startup(struct uart_amba_port *uap)
+{
+}
+
+static inline void pl011_dma_shutdown(struct uart_amba_port *uap)
+{
+}
+
+static inline bool pl011_dma_tx_irq(struct uart_amba_port *uap)
+{
+	return false;
+}
+
+static inline void pl011_dma_tx_stop(struct uart_amba_port *uap)
+{
+}
+
+static inline bool pl011_dma_tx_start(struct uart_amba_port *uap)
+{
+	return false;
+}
+
+#define pl011_dma_flush_buffer	NULL
+#endif
+
+
 static void pl011_stop_tx(struct uart_port *port)
 {
 	struct uart_amba_port *uap = (struct uart_amba_port *)port;
 
 	uap->im &= ~UART011_TXIM;
 	writew(uap->im, uap->port.membase + UART011_IMSC);
+	pl011_dma_tx_stop(uap);
 }
 
 static void pl011_start_tx(struct uart_port *port)
 {
 	struct uart_amba_port *uap = (struct uart_amba_port *)port;
 
-	uap->im |= UART011_TXIM;
-	writew(uap->im, uap->port.membase + UART011_IMSC);
+	if (!pl011_dma_tx_start(uap)) {
+		uap->im |= UART011_TXIM;
+		writew(uap->im, uap->port.membase + UART011_IMSC);
+	}
 }
 
 static void pl011_stop_rx(struct uart_port *port)
@@ -204,6 +695,10 @@ static void pl011_tx_chars(struct uart_amba_port *uap)
 		return;
 	}
 
+	/* If we are using DMA mode, try to send some characters. */
+	if (pl011_dma_tx_irq(uap))
+		return;
+
 	count = uap->fifosize >> 1;
 	do {
 		writew(xmit->buf[xmit->tail], uap->port.membase + UART01x_DR);
@@ -434,6 +929,9 @@ static int pl011_startup(struct uart_port *port)
 	 */
 	uap->old_status = readw(uap->port.membase + UART01x_FR) & UART01x_FR_MODEM_ANY;
 
+	/* Startup DMA */
+	pl011_dma_startup(uap);
+
 	/*
 	 * Finally, enable interrupts
 	 */
@@ -473,6 +971,8 @@ static void pl011_shutdown(struct uart_port *port)
 	writew(0xffff, uap->port.membase + UART011_ICR);
 	spin_unlock_irq(&uap->port.lock);
 
+	pl011_dma_shutdown(uap);
+
 	/*
 	 * Free the interrupt
 	 */
@@ -691,6 +1191,7 @@ static struct uart_ops amba_pl011_pops = {
 	.break_ctl	= pl011_break_ctl,
 	.startup	= pl011_startup,
 	.shutdown	= pl011_shutdown,
+	.flush_buffer	= pl011_dma_flush_buffer,
 	.set_termios	= pl011_set_termios,
 	.type		= pl011_type,
 	.release_port	= pl010_release_port,
@@ -883,6 +1384,7 @@ static int pl011_probe(struct amba_device *dev, struct amba_id *id)
 	uap->port.ops = &amba_pl011_pops;
 	uap->port.flags = UPF_BOOT_AUTOCONF;
 	uap->port.line = i;
+	pl011_dma_probe(uap);
 
 	snprintf(uap->type, sizeof(uap->type), "PL011 rev%u", amba_rev(dev));
 
@@ -893,6 +1395,7 @@ static int pl011_probe(struct amba_device *dev, struct amba_id *id)
 	if (ret) {
 		amba_set_drvdata(dev, NULL);
 		amba_ports[i] = NULL;
+		pl011_dma_remove(uap);
 		clk_put(uap->clk);
  unmap:
 		iounmap(base);
@@ -916,6 +1419,7 @@ static int pl011_remove(struct amba_device *dev)
 		if (amba_ports[i] == uap)
 			amba_ports[i] = NULL;
 
+	pl011_dma_remove(uap);
 	iounmap(uap->port.membase);
 	clk_put(uap->clk);
 	kfree(uap);
diff --git a/include/linux/amba/serial.h b/include/linux/amba/serial.h
index 6021588ba0a8..577f22eb9225 100644
--- a/include/linux/amba/serial.h
+++ b/include/linux/amba/serial.h
@@ -180,6 +180,13 @@ struct amba_device; /* in uncompress this is included but amba/bus.h is not */
 struct amba_pl010_data {
 	void (*set_mctrl)(struct amba_device *dev, void __iomem *base, unsigned int mctrl);
 };
+
+struct dma_chan;
+struct amba_pl011_data {
+	bool (*dma_filter)(struct dma_chan *chan, void *filter_param);
+	void *dma_rx_param;
+	void *dma_tx_param;
+};
 #endif
 
 #endif
-- 
cgit v1.2.3-71-gd317


From 38d624361b2a82d6317c379aebf81b1b28210bb0 Mon Sep 17 00:00:00 2001
From: Russell King <rmk+kernel@arm.linux.org.uk>
Date: Wed, 22 Dec 2010 17:59:16 +0000
Subject: ARM: PL011: add DMA burst threshold support for ST variants

ST Micro variants has some specific dma burst threshold compensation,
which allows them to make better use of a DMA controller.  Add support
to set this up.

Based on a patch from Linus Walleij.

Acked-by: Linus Walleij <linus.walleij@stericsson.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 drivers/serial/amba-pl011.c | 12 ++++++++++++
 include/linux/amba/serial.h | 15 +++++++++++++++
 2 files changed, 27 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/serial/amba-pl011.c b/drivers/serial/amba-pl011.c
index ab025dc52fa4..e76d7d000128 100644
--- a/drivers/serial/amba-pl011.c
+++ b/drivers/serial/amba-pl011.c
@@ -74,6 +74,7 @@ struct vendor_data {
 	unsigned int		lcrh_tx;
 	unsigned int		lcrh_rx;
 	bool			oversampling;
+	bool			dma_threshold;
 };
 
 static struct vendor_data vendor_arm = {
@@ -82,6 +83,7 @@ static struct vendor_data vendor_arm = {
 	.lcrh_tx		= UART011_LCRH,
 	.lcrh_rx		= UART011_LCRH,
 	.oversampling		= false,
+	.dma_threshold		= false,
 };
 
 static struct vendor_data vendor_st = {
@@ -90,6 +92,7 @@ static struct vendor_data vendor_st = {
 	.lcrh_tx		= ST_UART011_LCRH_TX,
 	.lcrh_rx		= ST_UART011_LCRH_RX,
 	.oversampling		= true,
+	.dma_threshold		= true,
 };
 
 /* Deals with DMA transactions */
@@ -527,6 +530,15 @@ static void pl011_dma_startup(struct uart_amba_port *uap)
 	/* Turn on DMA error (RX/TX will be enabled on demand) */
 	uap->dmacr |= UART011_DMAONERR;
 	writew(uap->dmacr, uap->port.membase + UART011_DMACR);
+
+	/*
+	 * ST Micro variants has some specific dma burst threshold
+	 * compensation. Set this to 16 bytes, so burst will only
+	 * be issued above/below 16 bytes.
+	 */
+	if (uap->vendor->dma_threshold)
+		writew(ST_UART011_DMAWM_RX_16 | ST_UART011_DMAWM_TX_16,
+			       uap->port.membase + ST_UART011_DMAWM);
 }
 
 static void pl011_dma_shutdown(struct uart_amba_port *uap)
diff --git a/include/linux/amba/serial.h b/include/linux/amba/serial.h
index 577f22eb9225..5479fdc849e9 100644
--- a/include/linux/amba/serial.h
+++ b/include/linux/amba/serial.h
@@ -113,6 +113,21 @@
 #define UART01x_LCRH_PEN	0x02
 #define UART01x_LCRH_BRK	0x01
 
+#define ST_UART011_DMAWM_RX_1	(0 << 3)
+#define ST_UART011_DMAWM_RX_2	(1 << 3)
+#define ST_UART011_DMAWM_RX_4	(2 << 3)
+#define ST_UART011_DMAWM_RX_8	(3 << 3)
+#define ST_UART011_DMAWM_RX_16	(4 << 3)
+#define ST_UART011_DMAWM_RX_32	(5 << 3)
+#define ST_UART011_DMAWM_RX_48	(6 << 3)
+#define ST_UART011_DMAWM_TX_1	0
+#define ST_UART011_DMAWM_TX_2	1
+#define ST_UART011_DMAWM_TX_4	2
+#define ST_UART011_DMAWM_TX_8	3
+#define ST_UART011_DMAWM_TX_16	4
+#define ST_UART011_DMAWM_TX_32	5
+#define ST_UART011_DMAWM_TX_48	6
+
 #define UART010_IIR_RTIS	0x08
 #define UART010_IIR_TIS		0x04
 #define UART010_IIR_RIS		0x02
-- 
cgit v1.2.3-71-gd317


From 3610cda53f247e176bcbb7a7cca64bc53b12acdb Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Wed, 5 Jan 2011 15:38:53 -0800
Subject: af_unix: Avoid socket->sk NULL OOPS in stream connect security hooks.

unix_release() can asynchornously set socket->sk to NULL, and
it does so without holding the unix_state_lock() on "other"
during stream connects.

However, the reverse mapping, sk->sk_socket, is only transitioned
to NULL under the unix_state_lock().

Therefore make the security hooks follow the reverse mapping instead
of the forward mapping.

Reported-by: Jeremy Fitzhardinge <jeremy@goop.org>
Reported-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/security.h   | 15 +++++++--------
 net/unix/af_unix.c         |  2 +-
 security/capability.c      |  2 +-
 security/security.c        |  3 +--
 security/selinux/hooks.c   | 10 +++++-----
 security/smack/smack_lsm.c | 14 +++++++-------
 6 files changed, 22 insertions(+), 24 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/security.h b/include/linux/security.h
index fd4d55fb8845..d47a4c24b3e4 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -796,8 +796,9 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts)
  * @unix_stream_connect:
  *	Check permissions before establishing a Unix domain stream connection
  *	between @sock and @other.
- *	@sock contains the socket structure.
- *	@other contains the peer socket structure.
+ *	@sock contains the sock structure.
+ *	@other contains the peer sock structure.
+ *	@newsk contains the new sock structure.
  *	Return 0 if permission is granted.
  * @unix_may_send:
  *	Check permissions before connecting or sending datagrams from @sock to
@@ -1568,8 +1569,7 @@ struct security_operations {
 	int (*inode_getsecctx)(struct inode *inode, void **ctx, u32 *ctxlen);
 
 #ifdef CONFIG_SECURITY_NETWORK
-	int (*unix_stream_connect) (struct socket *sock,
-				    struct socket *other, struct sock *newsk);
+	int (*unix_stream_connect) (struct sock *sock, struct sock *other, struct sock *newsk);
 	int (*unix_may_send) (struct socket *sock, struct socket *other);
 
 	int (*socket_create) (int family, int type, int protocol, int kern);
@@ -2525,8 +2525,7 @@ static inline int security_inode_getsecctx(struct inode *inode, void **ctx, u32
 
 #ifdef CONFIG_SECURITY_NETWORK
 
-int security_unix_stream_connect(struct socket *sock, struct socket *other,
-				 struct sock *newsk);
+int security_unix_stream_connect(struct sock *sock, struct sock *other, struct sock *newsk);
 int security_unix_may_send(struct socket *sock,  struct socket *other);
 int security_socket_create(int family, int type, int protocol, int kern);
 int security_socket_post_create(struct socket *sock, int family,
@@ -2567,8 +2566,8 @@ void security_tun_dev_post_create(struct sock *sk);
 int security_tun_dev_attach(struct sock *sk);
 
 #else	/* CONFIG_SECURITY_NETWORK */
-static inline int security_unix_stream_connect(struct socket *sock,
-					       struct socket *other,
+static inline int security_unix_stream_connect(struct sock *sock,
+					       struct sock *other,
 					       struct sock *newsk)
 {
 	return 0;
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 417d7a6c36cf..dd419d286204 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -1157,7 +1157,7 @@ restart:
 		goto restart;
 	}
 
-	err = security_unix_stream_connect(sock, other->sk_socket, newsk);
+	err = security_unix_stream_connect(sk, other, newsk);
 	if (err) {
 		unix_state_unlock(sk);
 		goto out_unlock;
diff --git a/security/capability.c b/security/capability.c
index c773635ca3a0..2a5df2b7da83 100644
--- a/security/capability.c
+++ b/security/capability.c
@@ -548,7 +548,7 @@ static int cap_sem_semop(struct sem_array *sma, struct sembuf *sops,
 }
 
 #ifdef CONFIG_SECURITY_NETWORK
-static int cap_unix_stream_connect(struct socket *sock, struct socket *other,
+static int cap_unix_stream_connect(struct sock *sock, struct sock *other,
 				   struct sock *newsk)
 {
 	return 0;
diff --git a/security/security.c b/security/security.c
index 1b798d3df710..e5fb07a3052d 100644
--- a/security/security.c
+++ b/security/security.c
@@ -977,8 +977,7 @@ EXPORT_SYMBOL(security_inode_getsecctx);
 
 #ifdef CONFIG_SECURITY_NETWORK
 
-int security_unix_stream_connect(struct socket *sock, struct socket *other,
-				 struct sock *newsk)
+int security_unix_stream_connect(struct sock *sock, struct sock *other, struct sock *newsk)
 {
 	return security_ops->unix_stream_connect(sock, other, newsk);
 }
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index c82538a4b1a4..6f637d2678ac 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -3921,18 +3921,18 @@ static int selinux_socket_shutdown(struct socket *sock, int how)
 	return sock_has_perm(current, sock->sk, SOCKET__SHUTDOWN);
 }
 
-static int selinux_socket_unix_stream_connect(struct socket *sock,
-					      struct socket *other,
+static int selinux_socket_unix_stream_connect(struct sock *sock,
+					      struct sock *other,
 					      struct sock *newsk)
 {
-	struct sk_security_struct *sksec_sock = sock->sk->sk_security;
-	struct sk_security_struct *sksec_other = other->sk->sk_security;
+	struct sk_security_struct *sksec_sock = sock->sk_security;
+	struct sk_security_struct *sksec_other = other->sk_security;
 	struct sk_security_struct *sksec_new = newsk->sk_security;
 	struct common_audit_data ad;
 	int err;
 
 	COMMON_AUDIT_DATA_INIT(&ad, NET);
-	ad.u.net.sk = other->sk;
+	ad.u.net.sk = other;
 
 	err = avc_has_perm(sksec_sock->sid, sksec_other->sid,
 			   sksec_other->sclass,
diff --git a/security/smack/smack_lsm.c b/security/smack/smack_lsm.c
index 489a85afa477..ccb71a044a1a 100644
--- a/security/smack/smack_lsm.c
+++ b/security/smack/smack_lsm.c
@@ -2408,22 +2408,22 @@ static int smack_setprocattr(struct task_struct *p, char *name,
 
 /**
  * smack_unix_stream_connect - Smack access on UDS
- * @sock: one socket
- * @other: the other socket
+ * @sock: one sock
+ * @other: the other sock
  * @newsk: unused
  *
  * Return 0 if a subject with the smack of sock could access
  * an object with the smack of other, otherwise an error code
  */
-static int smack_unix_stream_connect(struct socket *sock,
-				     struct socket *other, struct sock *newsk)
+static int smack_unix_stream_connect(struct sock *sock,
+				     struct sock *other, struct sock *newsk)
 {
-	struct inode *sp = SOCK_INODE(sock);
-	struct inode *op = SOCK_INODE(other);
+	struct inode *sp = SOCK_INODE(sock->sk_socket);
+	struct inode *op = SOCK_INODE(other->sk_socket);
 	struct smk_audit_info ad;
 
 	smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_NET);
-	smk_ad_setfield_u_net_sk(&ad, other->sk);
+	smk_ad_setfield_u_net_sk(&ad, other);
 	return smk_access(smk_of_inode(sp), smk_of_inode(op),
 				 MAY_READWRITE, &ad);
 }
-- 
cgit v1.2.3-71-gd317


From b27dcfb0670ea7352a67137f4ff7947c2a9f6892 Mon Sep 17 00:00:00 2001
From: Jeff Garzik <jeff@garzik.org>
Date: Wed, 17 Nov 2010 22:56:48 -0500
Subject: [libata] avoid needlessly passing around ptr to SCSI completion func

It's stored in struct scsi_cmnd->scsi_done, making several 'done'
parameters to functions redundant.

Signed-off-by: Jeff Garzik <jgarzik@redhat.com>
---
 drivers/ata/libata-scsi.c           | 60 ++++++++++++++++---------------------
 drivers/scsi/ipr.c                  |  2 +-
 drivers/scsi/libsas/sas_scsi_host.c |  3 +-
 include/linux/libata.h              |  6 ++--
 4 files changed, 29 insertions(+), 42 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c
index 66aa4bee80a6..5defc74973d7 100644
--- a/drivers/ata/libata-scsi.c
+++ b/drivers/ata/libata-scsi.c
@@ -346,12 +346,11 @@ struct device_attribute *ata_common_sdev_attrs[] = {
 };
 EXPORT_SYMBOL_GPL(ata_common_sdev_attrs);
 
-static void ata_scsi_invalid_field(struct scsi_cmnd *cmd,
-				   void (*done)(struct scsi_cmnd *))
+static void ata_scsi_invalid_field(struct scsi_cmnd *cmd)
 {
 	ata_scsi_set_sense(cmd, ILLEGAL_REQUEST, 0x24, 0x0);
 	/* "Invalid field in cbd" */
-	done(cmd);
+	cmd->scsi_done(cmd);
 }
 
 /**
@@ -719,7 +718,6 @@ EXPORT_SYMBOL_GPL(ata_scsi_ioctl);
  *	ata_scsi_qc_new - acquire new ata_queued_cmd reference
  *	@dev: ATA device to which the new command is attached
  *	@cmd: SCSI command that originated this ATA command
- *	@done: SCSI command completion function
  *
  *	Obtain a reference to an unused ata_queued_cmd structure,
  *	which is the basic libata structure representing a single
@@ -736,21 +734,20 @@ EXPORT_SYMBOL_GPL(ata_scsi_ioctl);
  *	Command allocated, or %NULL if none available.
  */
 static struct ata_queued_cmd *ata_scsi_qc_new(struct ata_device *dev,
-					      struct scsi_cmnd *cmd,
-					      void (*done)(struct scsi_cmnd *))
+					      struct scsi_cmnd *cmd)
 {
 	struct ata_queued_cmd *qc;
 
 	qc = ata_qc_new_init(dev);
 	if (qc) {
 		qc->scsicmd = cmd;
-		qc->scsidone = done;
+		qc->scsidone = cmd->scsi_done;
 
 		qc->sg = scsi_sglist(cmd);
 		qc->n_elem = scsi_sg_count(cmd);
 	} else {
 		cmd->result = (DID_OK << 16) | (QUEUE_FULL << 1);
-		done(cmd);
+		cmd->scsi_done(cmd);
 	}
 
 	return qc;
@@ -1735,7 +1732,6 @@ static void ata_scsi_qc_complete(struct ata_queued_cmd *qc)
  *	ata_scsi_translate - Translate then issue SCSI command to ATA device
  *	@dev: ATA device to which the command is addressed
  *	@cmd: SCSI command to execute
- *	@done: SCSI command completion function
  *	@xlat_func: Actor which translates @cmd to an ATA taskfile
  *
  *	Our ->queuecommand() function has decided that the SCSI
@@ -1759,7 +1755,6 @@ static void ata_scsi_qc_complete(struct ata_queued_cmd *qc)
  *	needs to be deferred.
  */
 static int ata_scsi_translate(struct ata_device *dev, struct scsi_cmnd *cmd,
-			      void (*done)(struct scsi_cmnd *),
 			      ata_xlat_func_t xlat_func)
 {
 	struct ata_port *ap = dev->link->ap;
@@ -1768,7 +1763,7 @@ static int ata_scsi_translate(struct ata_device *dev, struct scsi_cmnd *cmd,
 
 	VPRINTK("ENTER\n");
 
-	qc = ata_scsi_qc_new(dev, cmd, done);
+	qc = ata_scsi_qc_new(dev, cmd);
 	if (!qc)
 		goto err_mem;
 
@@ -1804,14 +1799,14 @@ static int ata_scsi_translate(struct ata_device *dev, struct scsi_cmnd *cmd,
 
 early_finish:
 	ata_qc_free(qc);
-	qc->scsidone(cmd);
+	cmd->scsi_done(cmd);
 	DPRINTK("EXIT - early finish (good or error)\n");
 	return 0;
 
 err_did:
 	ata_qc_free(qc);
 	cmd->result = (DID_ERROR << 16);
-	qc->scsidone(cmd);
+	cmd->scsi_done(cmd);
 err_mem:
 	DPRINTK("EXIT - internal\n");
 	return 0;
@@ -3116,7 +3111,6 @@ static inline void ata_scsi_dump_cdb(struct ata_port *ap,
 }
 
 static inline int __ata_scsi_queuecmd(struct scsi_cmnd *scmd,
-				      void (*done)(struct scsi_cmnd *),
 				      struct ata_device *dev)
 {
 	u8 scsi_op = scmd->cmnd[0];
@@ -3150,9 +3144,9 @@ static inline int __ata_scsi_queuecmd(struct scsi_cmnd *scmd,
 	}
 
 	if (xlat_func)
-		rc = ata_scsi_translate(dev, scmd, done, xlat_func);
+		rc = ata_scsi_translate(dev, scmd, xlat_func);
 	else
-		ata_scsi_simulate(dev, scmd, done);
+		ata_scsi_simulate(dev, scmd);
 
 	return rc;
 
@@ -3160,7 +3154,7 @@ static inline int __ata_scsi_queuecmd(struct scsi_cmnd *scmd,
 	DPRINTK("bad CDB len=%u, scsi_op=0x%02x, max=%u\n",
 		scmd->cmd_len, scsi_op, dev->cdb_len);
 	scmd->result = DID_ERROR << 16;
-	done(scmd);
+	scmd->scsi_done(scmd);
 	return 0;
 }
 
@@ -3199,7 +3193,7 @@ int ata_scsi_queuecmd(struct Scsi_Host *shost, struct scsi_cmnd *cmd)
 
 	dev = ata_scsi_find_dev(ap, scsidev);
 	if (likely(dev))
-		rc = __ata_scsi_queuecmd(cmd, cmd->scsi_done, dev);
+		rc = __ata_scsi_queuecmd(cmd, dev);
 	else {
 		cmd->result = (DID_BAD_TARGET << 16);
 		cmd->scsi_done(cmd);
@@ -3214,7 +3208,6 @@ int ata_scsi_queuecmd(struct Scsi_Host *shost, struct scsi_cmnd *cmd)
  *	ata_scsi_simulate - simulate SCSI command on ATA device
  *	@dev: the target device
  *	@cmd: SCSI command being sent to device.
- *	@done: SCSI command completion function.
  *
  *	Interprets and directly executes a select list of SCSI commands
  *	that can be handled internally.
@@ -3223,8 +3216,7 @@ int ata_scsi_queuecmd(struct Scsi_Host *shost, struct scsi_cmnd *cmd)
  *	spin_lock_irqsave(host lock)
  */
 
-void ata_scsi_simulate(struct ata_device *dev, struct scsi_cmnd *cmd,
-		      void (*done)(struct scsi_cmnd *))
+void ata_scsi_simulate(struct ata_device *dev, struct scsi_cmnd *cmd)
 {
 	struct ata_scsi_args args;
 	const u8 *scsicmd = cmd->cmnd;
@@ -3233,17 +3225,17 @@ void ata_scsi_simulate(struct ata_device *dev, struct scsi_cmnd *cmd,
 	args.dev = dev;
 	args.id = dev->id;
 	args.cmd = cmd;
-	args.done = done;
+	args.done = cmd->scsi_done;
 
 	switch(scsicmd[0]) {
 	/* TODO: worth improving? */
 	case FORMAT_UNIT:
-		ata_scsi_invalid_field(cmd, done);
+		ata_scsi_invalid_field(cmd);
 		break;
 
 	case INQUIRY:
 		if (scsicmd[1] & 2)	           /* is CmdDt set?  */
-			ata_scsi_invalid_field(cmd, done);
+			ata_scsi_invalid_field(cmd);
 		else if ((scsicmd[1] & 1) == 0)    /* is EVPD clear? */
 			ata_scsi_rbuf_fill(&args, ata_scsiop_inq_std);
 		else switch (scsicmd[2]) {
@@ -3269,7 +3261,7 @@ void ata_scsi_simulate(struct ata_device *dev, struct scsi_cmnd *cmd,
 			ata_scsi_rbuf_fill(&args, ata_scsiop_inq_b2);
 			break;
 		default:
-			ata_scsi_invalid_field(cmd, done);
+			ata_scsi_invalid_field(cmd);
 			break;
 		}
 		break;
@@ -3281,7 +3273,7 @@ void ata_scsi_simulate(struct ata_device *dev, struct scsi_cmnd *cmd,
 
 	case MODE_SELECT:	/* unconditionally return */
 	case MODE_SELECT_10:	/* bad-field-in-cdb */
-		ata_scsi_invalid_field(cmd, done);
+		ata_scsi_invalid_field(cmd);
 		break;
 
 	case READ_CAPACITY:
@@ -3292,7 +3284,7 @@ void ata_scsi_simulate(struct ata_device *dev, struct scsi_cmnd *cmd,
 		if ((scsicmd[1] & 0x1f) == SAI_READ_CAPACITY_16)
 			ata_scsi_rbuf_fill(&args, ata_scsiop_read_cap);
 		else
-			ata_scsi_invalid_field(cmd, done);
+			ata_scsi_invalid_field(cmd);
 		break;
 
 	case REPORT_LUNS:
@@ -3302,7 +3294,7 @@ void ata_scsi_simulate(struct ata_device *dev, struct scsi_cmnd *cmd,
 	case REQUEST_SENSE:
 		ata_scsi_set_sense(cmd, 0, 0, 0);
 		cmd->result = (DRIVER_SENSE << 24);
-		done(cmd);
+		cmd->scsi_done(cmd);
 		break;
 
 	/* if we reach this, then writeback caching is disabled,
@@ -3324,14 +3316,14 @@ void ata_scsi_simulate(struct ata_device *dev, struct scsi_cmnd *cmd,
 		if ((tmp8 == 0x4) && (!scsicmd[3]) && (!scsicmd[4]))
 			ata_scsi_rbuf_fill(&args, ata_scsiop_noop);
 		else
-			ata_scsi_invalid_field(cmd, done);
+			ata_scsi_invalid_field(cmd);
 		break;
 
 	/* all other commands */
 	default:
 		ata_scsi_set_sense(cmd, ILLEGAL_REQUEST, 0x20, 0x0);
 		/* "Invalid command operation code" */
-		done(cmd);
+		cmd->scsi_done(cmd);
 		break;
 	}
 }
@@ -3858,7 +3850,6 @@ EXPORT_SYMBOL_GPL(ata_sas_slave_configure);
 /**
  *	ata_sas_queuecmd - Issue SCSI cdb to libata-managed device
  *	@cmd: SCSI command to be sent
- *	@done: Completion function, called when command is complete
  *	@ap:	ATA port to which the command is being sent
  *
  *	RETURNS:
@@ -3866,18 +3857,17 @@ EXPORT_SYMBOL_GPL(ata_sas_slave_configure);
  *	0 otherwise.
  */
 
-int ata_sas_queuecmd(struct scsi_cmnd *cmd, void (*done)(struct scsi_cmnd *),
-		     struct ata_port *ap)
+int ata_sas_queuecmd(struct scsi_cmnd *cmd, struct ata_port *ap)
 {
 	int rc = 0;
 
 	ata_scsi_dump_cdb(ap, cmd);
 
 	if (likely(ata_dev_enabled(ap->link.device)))
-		rc = __ata_scsi_queuecmd(cmd, done, ap->link.device);
+		rc = __ata_scsi_queuecmd(cmd, ap->link.device);
 	else {
 		cmd->result = (DID_BAD_TARGET << 16);
-		done(cmd);
+		cmd->scsi_done(cmd);
 	}
 	return rc;
 }
diff --git a/drivers/scsi/ipr.c b/drivers/scsi/ipr.c
index 5bbaee597e88..2c71927aad64 100644
--- a/drivers/scsi/ipr.c
+++ b/drivers/scsi/ipr.c
@@ -5743,7 +5743,7 @@ static int ipr_queuecommand_lck(struct scsi_cmnd *scsi_cmd,
 	}
 
 	if (ipr_is_gata(res) && res->sata_port)
-		return ata_sas_queuecmd(scsi_cmd, done, res->sata_port->ap);
+		return ata_sas_queuecmd(scsi_cmd, res->sata_port->ap);
 
 	ipr_cmd = ipr_get_free_ipr_cmnd(ioa_cfg);
 	ioarcb = &ipr_cmd->ioarcb;
diff --git a/drivers/scsi/libsas/sas_scsi_host.c b/drivers/scsi/libsas/sas_scsi_host.c
index 29251fabecc6..5815cbeb27a6 100644
--- a/drivers/scsi/libsas/sas_scsi_host.c
+++ b/drivers/scsi/libsas/sas_scsi_host.c
@@ -211,8 +211,7 @@ static int sas_queuecommand_lck(struct scsi_cmnd *cmd,
 			unsigned long flags;
 
 			spin_lock_irqsave(dev->sata_dev.ap->lock, flags);
-			res = ata_sas_queuecmd(cmd, scsi_done,
-					       dev->sata_dev.ap);
+			res = ata_sas_queuecmd(cmd, dev->sata_dev.ap);
 			spin_unlock_irqrestore(dev->sata_dev.ap->lock, flags);
 			goto out;
 		}
diff --git a/include/linux/libata.h b/include/linux/libata.h
index d947b1231662..c9c5d7ad1a2b 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -996,8 +996,7 @@ extern int ata_sas_port_init(struct ata_port *);
 extern int ata_sas_port_start(struct ata_port *ap);
 extern void ata_sas_port_stop(struct ata_port *ap);
 extern int ata_sas_slave_configure(struct scsi_device *, struct ata_port *);
-extern int ata_sas_queuecmd(struct scsi_cmnd *cmd, void (*done)(struct scsi_cmnd *),
-			    struct ata_port *ap);
+extern int ata_sas_queuecmd(struct scsi_cmnd *cmd, struct ata_port *ap);
 extern int sata_scr_valid(struct ata_link *link);
 extern int sata_scr_read(struct ata_link *link, int reg, u32 *val);
 extern int sata_scr_write(struct ata_link *link, int reg, u32 val);
@@ -1040,8 +1039,7 @@ extern unsigned int ata_do_dev_read_id(struct ata_device *dev,
 					struct ata_taskfile *tf, u16 *id);
 extern void ata_qc_complete(struct ata_queued_cmd *qc);
 extern int ata_qc_complete_multiple(struct ata_port *ap, u32 qc_active);
-extern void ata_scsi_simulate(struct ata_device *dev, struct scsi_cmnd *cmd,
-			      void (*done)(struct scsi_cmnd *));
+extern void ata_scsi_simulate(struct ata_device *dev, struct scsi_cmnd *cmd);
 extern int ata_std_bios_param(struct scsi_device *sdev,
 			      struct block_device *bdev,
 			      sector_t capacity, int geom[]);
-- 
cgit v1.2.3-71-gd317


From af5dd83b873efd4e1477f2265b6fa15a825aff26 Mon Sep 17 00:00:00 2001
From: Dave Airlie <airlied@redhat.com>
Date: Thu, 6 Jan 2011 13:04:32 +1000
Subject: vga_switcheroo: fix build with non switcheroo enabled path.

Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 include/linux/vga_switcheroo.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/vga_switcheroo.h b/include/linux/vga_switcheroo.h
index f80fa9de5b82..4b9a7f596f92 100644
--- a/include/linux/vga_switcheroo.h
+++ b/include/linux/vga_switcheroo.h
@@ -49,6 +49,7 @@ int vga_switcheroo_process_delayed_switch(void);
 static inline void vga_switcheroo_unregister_client(struct pci_dev *dev) {}
 static inline int vga_switcheroo_register_client(struct pci_dev *dev,
 					  void (*set_gpu_state)(struct pci_dev *dev, enum vga_switcheroo_state),
+					  void (*reprobe)(struct pci_dev *dev),
 					  bool (*can_switch)(struct pci_dev *dev)) { return 0; }
 static inline void vga_switcheroo_client_fb_set(struct pci_dev *dev, struct fb_info *info) {}
 static inline int vga_switcheroo_register_handler(struct vga_switcheroo_handler *handler) { return 0; }
-- 
cgit v1.2.3-71-gd317


From 238c855805c853eaec95b0bc3065effb64f955a0 Mon Sep 17 00:00:00 2001
From: Henry Ptasinski <henryp@broadcom.com>
Date: Tue, 4 Jan 2011 16:07:14 +0000
Subject: include/linux/if_ether.h: Add #define ETH_P_LINK_CTL for HPNA and
 wlan local tunnel

Ethertype used by HPNA control protocols (LARQ, rate, link, etc) and by
Broadcom wlan drivers for local signalling.

Signed-off-by: Henry Ptasinski <henryp@broadcom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/if_ether.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/if_ether.h b/include/linux/if_ether.h
index f9c3df03db0f..be69043d2896 100644
--- a/include/linux/if_ether.h
+++ b/include/linux/if_ether.h
@@ -72,6 +72,7 @@
 #define ETH_P_MPLS_UC	0x8847		/* MPLS Unicast traffic		*/
 #define ETH_P_MPLS_MC	0x8848		/* MPLS Multicast traffic	*/
 #define ETH_P_ATMMPOA	0x884c		/* MultiProtocol Over ATM	*/
+#define ETH_P_LINK_CTL	0x886c		/* HPNA, wlan link local tunnel */
 #define ETH_P_ATMFATE	0x8884		/* Frame-based ATM Transport
 					 * over Ethernet
 					 */
-- 
cgit v1.2.3-71-gd317


From 2ad0d9d413abc3380fc1d89a9da7f8db59d9746b Mon Sep 17 00:00:00 2001
From: Changli Gao <xiaosuo@gmail.com>
Date: Thu, 6 Jan 2011 11:41:42 -0800
Subject: net: remove the duplicate #ifdef __KERNEL__

Since we are already in #ifdef __KERNEL__, we don't need to check it
again.

Signed-off-by: Changli Gao <xiaosuo@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/socket.h | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/socket.h b/include/linux/socket.h
index 86b652fabf6e..5f65f14c4f44 100644
--- a/include/linux/socket.h
+++ b/include/linux/socket.h
@@ -30,12 +30,10 @@ struct cred;
 #define __sockaddr_check_size(size)	\
 	BUILD_BUG_ON(((size) > sizeof(struct __kernel_sockaddr_storage)))
 
-#ifdef __KERNEL__
-# ifdef CONFIG_PROC_FS
+#ifdef CONFIG_PROC_FS
 struct seq_file;
 extern void socket_seq_show(struct seq_file *seq);
-# endif
-#endif /* __KERNEL__ */
+#endif
 
 typedef unsigned short	sa_family_t;
 
@@ -311,7 +309,6 @@ struct ucred {
 /* IPX options */
 #define IPX_TYPE	1
 
-#ifdef __KERNEL__
 extern void cred_to_ucred(struct pid *pid, const struct cred *cred, struct ucred *ucred);
 
 extern int memcpy_fromiovec(unsigned char *kdata, struct iovec *iov, int len);
@@ -333,6 +330,5 @@ struct timespec;
 
 extern int __sys_recvmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen,
 			  unsigned int flags, struct timespec *timeout);
-#endif
 #endif /* not kernel and not glibc */
 #endif /* _LINUX_SOCKET_H */
-- 
cgit v1.2.3-71-gd317


From 1f11a034cdc4b45ee56d51b87a9e37cb776fb15b Mon Sep 17 00:00:00 2001
From: Andy Adamson <andros@netapp.com>
Date: Thu, 6 Jan 2011 02:04:26 +0000
Subject: SUNRPC new transport for the NFSv4.1 shared back channel

Move the current sock create and destroy routines into the new transport ops.
Back channel socket will be destroyed by the svc_closs_all call in svc_destroy.

Added check: only TCP supported on shared back channel.

Signed-off-by: Andy Adamson <andros@netapp.com>
Acked-by: Bruce Fields <bfields@redhat.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 include/linux/sunrpc/svcsock.h |  1 +
 net/sunrpc/svc.c               |  4 ---
 net/sunrpc/svcsock.c           | 82 +++++++++++++++++++++++++++++++-----------
 3 files changed, 63 insertions(+), 24 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/svcsock.h b/include/linux/sunrpc/svcsock.h
index 1b353a76c304..3a45a80760b9 100644
--- a/include/linux/sunrpc/svcsock.h
+++ b/include/linux/sunrpc/svcsock.h
@@ -45,6 +45,7 @@ int		svc_sock_names(struct svc_serv *serv, char *buf,
 int		svc_addsock(struct svc_serv *serv, const int fd,
 					char *name_return, const size_t len);
 void		svc_init_xprt_sock(void);
+void		svc_init_bc_xprt_sock(void);
 void		svc_cleanup_xprt_sock(void);
 struct svc_xprt *svc_sock_create(struct svc_serv *serv, int prot);
 void		svc_sock_destroy(struct svc_xprt *);
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index 606d182895a9..261e2d1dff10 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -488,10 +488,6 @@ svc_destroy(struct svc_serv *serv)
 	if (svc_serv_is_pooled(serv))
 		svc_pool_map_put();
 
-#if defined(CONFIG_NFS_V4_1)
-	svc_sock_destroy(serv->bc_xprt);
-#endif /* CONFIG_NFS_V4_1 */
-
 	svc_unregister(serv);
 	kfree(serv->sv_pools);
 	kfree(serv);
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index 07919e16be3e..6630f2922f40 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -66,6 +66,13 @@ static void		svc_sock_free(struct svc_xprt *);
 static struct svc_xprt *svc_create_socket(struct svc_serv *, int,
 					  struct net *, struct sockaddr *,
 					  int, int);
+#if defined(CONFIG_NFS_V4_1)
+static struct svc_xprt *svc_bc_create_socket(struct svc_serv *, int,
+					     struct net *, struct sockaddr *,
+					     int, int);
+static void svc_bc_sock_free(struct svc_xprt *xprt);
+#endif /* CONFIG_NFS_V4_1 */
+
 #ifdef CONFIG_DEBUG_LOCK_ALLOC
 static struct lock_class_key svc_key[2];
 static struct lock_class_key svc_slock_key[2];
@@ -1184,6 +1191,39 @@ static struct svc_xprt *svc_tcp_create(struct svc_serv *serv,
 	return svc_create_socket(serv, IPPROTO_TCP, net, sa, salen, flags);
 }
 
+#if defined(CONFIG_NFS_V4_1)
+static struct svc_xprt *svc_bc_create_socket(struct svc_serv *, int,
+					     struct net *, struct sockaddr *,
+					     int, int);
+static void svc_bc_sock_free(struct svc_xprt *xprt);
+
+static struct svc_xprt *svc_bc_tcp_create(struct svc_serv *serv,
+				       struct net *net,
+				       struct sockaddr *sa, int salen,
+				       int flags)
+{
+	return svc_bc_create_socket(serv, IPPROTO_TCP, net, sa, salen, flags);
+}
+
+static void svc_bc_tcp_sock_detach(struct svc_xprt *xprt)
+{
+}
+
+static struct svc_xprt_ops svc_tcp_bc_ops = {
+	.xpo_create = svc_bc_tcp_create,
+	.xpo_detach = svc_bc_tcp_sock_detach,
+	.xpo_free = svc_bc_sock_free,
+	.xpo_prep_reply_hdr = svc_tcp_prep_reply_hdr,
+};
+
+static struct svc_xprt_class svc_tcp_bc_class = {
+	.xcl_name = "tcp-bc",
+	.xcl_owner = THIS_MODULE,
+	.xcl_ops = &svc_tcp_bc_ops,
+	.xcl_max_payload = RPCSVC_MAXPAYLOAD_TCP,
+};
+#endif /* CONFIG_NFS_V4_1 */
+
 static struct svc_xprt_ops svc_tcp_ops = {
 	.xpo_create = svc_tcp_create,
 	.xpo_recvfrom = svc_tcp_recvfrom,
@@ -1509,41 +1549,43 @@ static void svc_sock_free(struct svc_xprt *xprt)
 	kfree(svsk);
 }
 
+#if defined(CONFIG_NFS_V4_1)
 /*
- * Create a svc_xprt.
- *
- * For internal use only (e.g. nfsv4.1 backchannel).
- * Callers should typically use the xpo_create() method.
+ * Create a back channel svc_xprt which shares the fore channel socket.
  */
-struct svc_xprt *svc_sock_create(struct svc_serv *serv, int prot)
+static struct svc_xprt *svc_bc_create_socket(struct svc_serv *serv,
+					     int protocol,
+					     struct net *net,
+					     struct sockaddr *sin, int len,
+					     int flags)
 {
 	struct svc_sock *svsk;
-	struct svc_xprt *xprt = NULL;
+	struct svc_xprt *xprt;
+
+	if (protocol != IPPROTO_TCP) {
+		printk(KERN_WARNING "svc: only TCP sockets"
+			" supported on shared back channel\n");
+		return ERR_PTR(-EINVAL);
+	}
 
-	dprintk("svc: %s\n", __func__);
 	svsk = kzalloc(sizeof(*svsk), GFP_KERNEL);
 	if (!svsk)
-		goto out;
+		return ERR_PTR(-ENOMEM);
 
 	xprt = &svsk->sk_xprt;
-	if (prot == IPPROTO_TCP)
-		svc_xprt_init(&svc_tcp_class, xprt, serv);
-	else if (prot == IPPROTO_UDP)
-		svc_xprt_init(&svc_udp_class, xprt, serv);
-	else
-		BUG();
-out:
-	dprintk("svc: %s return %p\n", __func__, xprt);
+	svc_xprt_init(&svc_tcp_bc_class, xprt, serv);
+
+	serv->bc_xprt = xprt;
+
 	return xprt;
 }
-EXPORT_SYMBOL_GPL(svc_sock_create);
 
 /*
- * Destroy a svc_sock.
+ * Free a back channel svc_sock.
  */
-void svc_sock_destroy(struct svc_xprt *xprt)
+static void svc_bc_sock_free(struct svc_xprt *xprt)
 {
 	if (xprt)
 		kfree(container_of(xprt, struct svc_sock, sk_xprt));
 }
-EXPORT_SYMBOL_GPL(svc_sock_destroy);
+#endif /* CONFIG_NFS_V4_1 */
-- 
cgit v1.2.3-71-gd317


From 16b2d1e1d12de000404d7c845d0db1226511f84d Mon Sep 17 00:00:00 2001
From: Andy Adamson <andros@netapp.com>
Date: Thu, 6 Jan 2011 02:04:27 +0000
Subject: SUNRPC register and unregister the back channel transport

Signed-off-by: Andy Adamson <andros@netapp.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 include/linux/sunrpc/svcsock.h |  1 -
 net/sunrpc/svcsock.c           | 20 ++++++++++++++++++++
 2 files changed, 20 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/svcsock.h b/include/linux/sunrpc/svcsock.h
index 3a45a80760b9..1b353a76c304 100644
--- a/include/linux/sunrpc/svcsock.h
+++ b/include/linux/sunrpc/svcsock.h
@@ -45,7 +45,6 @@ int		svc_sock_names(struct svc_serv *serv, char *buf,
 int		svc_addsock(struct svc_serv *serv, const int fd,
 					char *name_return, const size_t len);
 void		svc_init_xprt_sock(void);
-void		svc_init_bc_xprt_sock(void);
 void		svc_cleanup_xprt_sock(void);
 struct svc_xprt *svc_sock_create(struct svc_serv *serv, int prot);
 void		svc_sock_destroy(struct svc_xprt *);
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index 6630f2922f40..e6b66d81115e 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -1222,6 +1222,24 @@ static struct svc_xprt_class svc_tcp_bc_class = {
 	.xcl_ops = &svc_tcp_bc_ops,
 	.xcl_max_payload = RPCSVC_MAXPAYLOAD_TCP,
 };
+
+static void svc_init_bc_xprt_sock(void)
+{
+	svc_reg_xprt_class(&svc_tcp_bc_class);
+}
+
+static void svc_cleanup_bc_xprt_sock(void)
+{
+	svc_unreg_xprt_class(&svc_tcp_bc_class);
+}
+#else /* CONFIG_NFS_V4_1 */
+static void svc_init_bc_xprt_sock(void)
+{
+}
+
+static void svc_cleanup_bc_xprt_sock(void)
+{
+}
 #endif /* CONFIG_NFS_V4_1 */
 
 static struct svc_xprt_ops svc_tcp_ops = {
@@ -1247,12 +1265,14 @@ void svc_init_xprt_sock(void)
 {
 	svc_reg_xprt_class(&svc_tcp_class);
 	svc_reg_xprt_class(&svc_udp_class);
+	svc_init_bc_xprt_sock();
 }
 
 void svc_cleanup_xprt_sock(void)
 {
 	svc_unreg_xprt_class(&svc_tcp_class);
 	svc_unreg_xprt_class(&svc_udp_class);
+	svc_cleanup_bc_xprt_sock();
 }
 
 static void svc_tcp_init(struct svc_sock *svsk, struct svc_serv *serv)
-- 
cgit v1.2.3-71-gd317


From f4eecd5da3422e82e88e36c33cbd2595eebcacb1 Mon Sep 17 00:00:00 2001
From: Andy Adamson <andros@netapp.com>
Date: Thu, 6 Jan 2011 02:04:30 +0000
Subject: NFS implement v4.0 callback_ident

Use the small id to pointer translator service to provide a unique callback
identifier per SETCLIENTID call used to identify the v4.0 callback service
associated with the clientid.

Signed-off-by: Andy Adamson <andros@netapp.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/client.c           | 51 +++++++++++++++++++++++++++++++++++++++++++++++
 fs/nfs/inode.c            |  1 +
 fs/nfs/internal.h         |  1 +
 fs/nfs/nfs4proc.c         |  1 +
 include/linux/nfs_fs_sb.h |  1 +
 5 files changed, 55 insertions(+)

(limited to 'include/linux')

diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index 855add62abc1..bc3a8620e8c3 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -56,6 +56,30 @@ static DEFINE_SPINLOCK(nfs_client_lock);
 static LIST_HEAD(nfs_client_list);
 static LIST_HEAD(nfs_volume_list);
 static DECLARE_WAIT_QUEUE_HEAD(nfs_client_active_wq);
+#ifdef CONFIG_NFS_V4
+static DEFINE_IDR(cb_ident_idr); /* Protected by nfs_client_lock */
+
+/*
+ * Get a unique NFSv4.0 callback identifier which will be used
+ * by the V4.0 callback service to lookup the nfs_client struct
+ */
+static int nfs_get_cb_ident_idr(struct nfs_client *clp, int minorversion)
+{
+	int ret = 0;
+
+	if (clp->rpc_ops->version != 4 || minorversion != 0)
+		return ret;
+retry:
+	if (!idr_pre_get(&cb_ident_idr, GFP_KERNEL))
+		return -ENOMEM;
+	spin_lock(&nfs_client_lock);
+	ret = idr_get_new(&cb_ident_idr, clp, &clp->cl_cb_ident);
+	spin_unlock(&nfs_client_lock);
+	if (ret == -EAGAIN)
+		goto retry;
+	return ret;
+}
+#endif /* CONFIG_NFS_V4 */
 
 /*
  * RPC cruft for NFS
@@ -144,6 +168,10 @@ static struct nfs_client *nfs_alloc_client(const struct nfs_client_initdata *cl_
 	clp->cl_proto = cl_init->proto;
 
 #ifdef CONFIG_NFS_V4
+	err = nfs_get_cb_ident_idr(clp, cl_init->minorversion);
+	if (err)
+		goto error_cleanup;
+
 	INIT_LIST_HEAD(&clp->cl_delegations);
 	spin_lock_init(&clp->cl_lock);
 	INIT_DELAYED_WORK(&clp->cl_renewd, nfs4_renew_state);
@@ -202,10 +230,32 @@ static void nfs4_shutdown_client(struct nfs_client *clp)
 
 	rpc_destroy_wait_queue(&clp->cl_rpcwaitq);
 }
+
+/* idr_remove_all is not needed as all id's are removed by nfs_put_client */
+void nfs_cleanup_cb_ident_idr(void)
+{
+	idr_destroy(&cb_ident_idr);
+}
+
+/* nfs_client_lock held */
+static void nfs_cb_idr_remove_locked(struct nfs_client *clp)
+{
+	if (clp->cl_cb_ident)
+		idr_remove(&cb_ident_idr, clp->cl_cb_ident);
+}
+
 #else
 static void nfs4_shutdown_client(struct nfs_client *clp)
 {
 }
+
+void nfs_cleanup_cb_ident_idr(void)
+{
+}
+
+static void nfs_cb_idr_remove_locked(struct nfs_client *clp)
+{
+}
 #endif /* CONFIG_NFS_V4 */
 
 /*
@@ -244,6 +294,7 @@ void nfs_put_client(struct nfs_client *clp)
 
 	if (atomic_dec_and_lock(&clp->cl_count, &nfs_client_lock)) {
 		list_del(&clp->cl_share_link);
+		nfs_cb_idr_remove_locked(clp);
 		spin_unlock(&nfs_client_lock);
 
 		BUG_ON(!list_empty(&clp->cl_superblocks));
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index e67e31c73416..c7782b278e8b 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -1612,6 +1612,7 @@ static void __exit exit_nfs_fs(void)
 #ifdef CONFIG_PROC_FS
 	rpc_proc_unregister("nfs");
 #endif
+	nfs_cleanup_cb_ident_idr();
 	unregister_nfs_fs();
 	nfs_fs_proc_exit();
 	nfsiod_stop();
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index 435eae3666bd..7c803c916574 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -128,6 +128,7 @@ extern void nfs_umount(const struct nfs_mount_request *info);
 /* client.c */
 extern struct rpc_program nfs_program;
 
+extern void nfs_cleanup_cb_ident_idr(void);
 extern void nfs_put_client(struct nfs_client *);
 extern struct nfs_client *nfs_find_client(const struct sockaddr *, u32);
 extern struct nfs_client *nfs_find_client_next(struct nfs_client *);
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 82f3a82b7115..e165c53db08f 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -3484,6 +3484,7 @@ int nfs4_proc_setclientid(struct nfs_client *clp, u32 program,
 	struct nfs4_setclientid setclientid = {
 		.sc_verifier = &sc_verifier,
 		.sc_prog = program,
+		.sc_cb_ident = clp->cl_cb_ident,
 	};
 	struct rpc_message msg = {
 		.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_SETCLIENTID],
diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h
index 452d96436d26..1eaa054a2c7d 100644
--- a/include/linux/nfs_fs_sb.h
+++ b/include/linux/nfs_fs_sb.h
@@ -71,6 +71,7 @@ struct nfs_client {
 	 */
 	char			cl_ipaddr[48];
 	unsigned char		cl_id_uniquifier;
+	u32			cl_cb_ident;	/* v4.0 callback identifier */
 	const struct nfs4_minor_version_ops *cl_mvops;
 #endif /* CONFIG_NFS_V4 */
 
-- 
cgit v1.2.3-71-gd317


From 2c2618c6f29c41a0a966f14f05c8bf45fcabb750 Mon Sep 17 00:00:00 2001
From: Andy Adamson <andros@netapp.com>
Date: Thu, 6 Jan 2011 02:04:31 +0000
Subject: NFS associate sessionid with callback connection

The sessions based callback service is started prior to the CREATE_SESSION call
so that it can handle CB_NULL requests which can be sent before the
CREATE_SESSION call returns and the session ID is known.

Set the callback sessionid after a sucessful CREATE_SESSION.

Signed-off-by: Andy Adamson <andros@netapp.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/callback.c               | 31 +++++++++++++++++++++++++++++++
 fs/nfs/callback.h               |  1 +
 fs/nfs/nfs4state.c              |  6 ++++++
 include/linux/sunrpc/svc_xprt.h |  1 +
 net/sunrpc/svcsock.c            |  4 +++-
 5 files changed, 42 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c
index 0e9fae831dfa..c0b05497972b 100644
--- a/fs/nfs/callback.c
+++ b/fs/nfs/callback.c
@@ -136,6 +136,33 @@ out_err:
 }
 
 #if defined(CONFIG_NFS_V4_1)
+/*
+ *  * CB_SEQUENCE operations will fail until the callback sessionid is set.
+ *   */
+int nfs4_set_callback_sessionid(struct nfs_client *clp)
+{
+	struct svc_serv *serv = clp->cl_rpcclient->cl_xprt->bc_serv;
+	struct nfs4_sessionid *bc_sid;
+
+	if (!serv->bc_xprt)
+		return -EINVAL;
+
+	/* on success freed in xprt_free */
+	bc_sid = kmalloc(sizeof(struct nfs4_sessionid), GFP_KERNEL);
+	if (!bc_sid)
+		return -ENOMEM;
+	memcpy(bc_sid->data, &clp->cl_session->sess_id.data,
+		NFS4_MAX_SESSIONID_LEN);
+	spin_lock_bh(&serv->sv_cb_lock);
+	serv->bc_xprt->xpt_bc_sid = bc_sid;
+	spin_unlock_bh(&serv->sv_cb_lock);
+	dprintk("%s set xpt_bc_sid=%u:%u:%u:%u for bc_xprt %p\n", __func__,
+		((u32 *)bc_sid->data)[0], ((u32 *)bc_sid->data)[1],
+		((u32 *)bc_sid->data)[2], ((u32 *)bc_sid->data)[3],
+		serv->bc_xprt);
+	return 0;
+}
+
 /*
  * The callback service for NFSv4.1 callbacks
  */
@@ -241,6 +268,10 @@ static inline void nfs_callback_bc_serv(u32 minorversion, struct rpc_xprt *xprt,
 		struct nfs_callback_data *cb_info)
 {
 }
+int nfs4_set_callback_sessionid(struct nfs_client *clp)
+{
+	return 0;
+}
 #endif /* CONFIG_NFS_V4_1 */
 
 /*
diff --git a/fs/nfs/callback.h b/fs/nfs/callback.h
index 85a7cfd1b8dd..58d61a8ce8b9 100644
--- a/fs/nfs/callback.h
+++ b/fs/nfs/callback.h
@@ -137,6 +137,7 @@ extern int nfs_callback_up(u32 minorversion, struct rpc_xprt *xprt);
 extern void nfs_callback_down(int minorversion);
 extern int nfs4_validate_delegation_stateid(struct nfs_delegation *delegation,
 					    const nfs4_stateid *stateid);
+extern int nfs4_set_callback_sessionid(struct nfs_client *clp);
 #endif /* CONFIG_NFS_V4 */
 /*
  * nfs41: Callbacks are expected to not cause substantial latency,
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index f575a3126737..485e95e8fd62 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -192,6 +192,12 @@ int nfs41_init_clientid(struct nfs_client *clp, struct rpc_cred *cred)
 	status = nfs4_proc_create_session(clp);
 	if (status != 0)
 		goto out;
+	status = nfs4_set_callback_sessionid(clp);
+	if (status != 0) {
+		printk(KERN_WARNING "Sessionid not set. No callback service\n");
+		nfs_callback_down(1);
+		status = 0;
+	}
 	nfs41_setup_state_renewal(clp);
 	nfs_mark_client_ready(clp, NFS_CS_READY);
 out:
diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/svc_xprt.h
index aea0d438e3c7..357da5e0daa3 100644
--- a/include/linux/sunrpc/svc_xprt.h
+++ b/include/linux/sunrpc/svc_xprt.h
@@ -78,6 +78,7 @@ struct svc_xprt {
 	size_t			xpt_remotelen;	/* length of address */
 	struct rpc_wait_queue	xpt_bc_pending;	/* backchannel wait queue */
 	struct list_head	xpt_users;	/* callbacks on free */
+	void			*xpt_bc_sid;	/* back channel session ID */
 
 	struct net		*xpt_net;
 };
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index e6b66d81115e..db3013e4aa04 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -1605,7 +1605,9 @@ static struct svc_xprt *svc_bc_create_socket(struct svc_serv *serv,
  */
 static void svc_bc_sock_free(struct svc_xprt *xprt)
 {
-	if (xprt)
+	if (xprt) {
+		kfree(xprt->xpt_bc_sid);
 		kfree(container_of(xprt, struct svc_sock, sk_xprt));
+	}
 }
 #endif /* CONFIG_NFS_V4_1 */
-- 
cgit v1.2.3-71-gd317


From c36fca52f5e4594ffd0ff175b328966b0d393184 Mon Sep 17 00:00:00 2001
From: Andy Adamson <andros@netapp.com>
Date: Thu, 6 Jan 2011 02:04:32 +0000
Subject: NFS refactor nfs_find_client and reference client across callback
 processing

Fixes a bug where the nfs_client could be freed during callback processing.
Refactor nfs_find_client to use minorversion specific means to locate the
correct nfs_client structure.

In the NFS layer, V4.0 clients are found using the callback_ident field in the
CB_COMPOUND header.  V4.1 clients are found using the sessionID in the
CB_SEQUENCE operation which is also compared against the sessionID associated
with the back channel thread after a successful CREATE_SESSION.

Each of these methods finds the one an only nfs_client associated
with the incoming callback request - so nfs_find_client_next is not needed.

In the RPC layer, the pg_authenticate call needs to find the nfs_client. For
the v4.0 callback service, the callback identifier has not been decoded so a
search by address, version, and minorversion is used.  The sessionid for the
sessions based callback service has (usually) not been set for the
pg_authenticate on a CB_NULL call which can be sent prior to the return
of a CREATE_SESSION call, so the sessionid associated with the back channel
thread is not used to find the client in pg_authenticate for CB_NULL calls.

Pass the referenced nfs_client to each CB_COMPOUND operation being proceesed
via the new cb_process_state structure. The reference is held across
cb_compound processing.

Use the new cb_process_state struct to move the NFS4ERR_RETRY_UNCACHED_REP
processing from process_op into nfs4_callback_sequence where it belongs.

Signed-off-by: Andy Adamson <andros@netapp.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/callback.c              |  21 ++++-
 fs/nfs/callback.h              |  28 +++++--
 fs/nfs/callback_proc.c         | 167 ++++++++++++++++------------------------
 fs/nfs/callback_xdr.c          |  39 ++++++----
 fs/nfs/client.c                | 171 +++++++++++++++++++++++++++--------------
 fs/nfs/internal.h              |   7 +-
 include/linux/sunrpc/bc_xprt.h |  13 ++++
 7 files changed, 258 insertions(+), 188 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c
index c0b05497972b..15677e7bede5 100644
--- a/fs/nfs/callback.c
+++ b/fs/nfs/callback.c
@@ -16,9 +16,7 @@
 #include <linux/freezer.h>
 #include <linux/kthread.h>
 #include <linux/sunrpc/svcauth_gss.h>
-#if defined(CONFIG_NFS_V4_1)
 #include <linux/sunrpc/bc_xprt.h>
-#endif
 
 #include <net/inet_sock.h>
 
@@ -384,6 +382,23 @@ static int check_gss_callback_principal(struct nfs_client *clp,
 	return SVC_OK;
 }
 
+/* pg_authenticate method helper */
+static struct nfs_client *nfs_cb_find_client(struct svc_rqst *rqstp)
+{
+	struct nfs4_sessionid *sessionid = bc_xprt_sid(rqstp);
+	int is_cb_compound = rqstp->rq_proc == CB_COMPOUND ? 1 : 0;
+
+	dprintk("--> %s rq_proc %d\n", __func__, rqstp->rq_proc);
+	if (svc_is_backchannel(rqstp))
+		/* Sessionid (usually) set after CB_NULL ping */
+		return nfs4_find_client_sessionid(svc_addr(rqstp), sessionid,
+						  is_cb_compound);
+	else
+		/* No callback identifier in pg_authenticate */
+		return nfs4_find_client_no_ident(svc_addr(rqstp));
+}
+
+/* pg_authenticate method for nfsv4 callback threads. */
 static int nfs_callback_authenticate(struct svc_rqst *rqstp)
 {
 	struct nfs_client *clp;
@@ -391,7 +406,7 @@ static int nfs_callback_authenticate(struct svc_rqst *rqstp)
 	int ret = SVC_OK;
 
 	/* Don't talk to strangers */
-	clp = nfs_find_client(svc_addr(rqstp), 4);
+	clp = nfs_cb_find_client(rqstp);
 	if (clp == NULL)
 		return SVC_DROP;
 
diff --git a/fs/nfs/callback.h b/fs/nfs/callback.h
index 58d61a8ce8b9..25e8802a51d1 100644
--- a/fs/nfs/callback.h
+++ b/fs/nfs/callback.h
@@ -34,10 +34,17 @@ enum nfs4_callback_opnum {
 	OP_CB_ILLEGAL = 10044,
 };
 
+struct cb_process_state {
+	__be32			drc_status;
+	struct nfs_client	*clp;
+	struct nfs4_sessionid	*svc_sid; /* v4.1 callback service sessionid */
+};
+
 struct cb_compound_hdr_arg {
 	unsigned int taglen;
 	const char *tag;
 	unsigned int minorversion;
+	unsigned int cb_ident; /* v4.0 callback identifier */
 	unsigned nops;
 };
 
@@ -103,8 +110,9 @@ struct cb_sequenceres {
 	uint32_t			csr_target_highestslotid;
 };
 
-extern unsigned nfs4_callback_sequence(struct cb_sequenceargs *args,
-				       struct cb_sequenceres *res);
+extern __be32 nfs4_callback_sequence(struct cb_sequenceargs *args,
+				       struct cb_sequenceres *res,
+				       struct cb_process_state *cps);
 
 extern int nfs41_validate_delegation_stateid(struct nfs_delegation *delegation,
 					     const nfs4_stateid *stateid);
@@ -118,19 +126,25 @@ struct cb_recallanyargs {
 	uint32_t	craa_type_mask;
 };
 
-extern unsigned nfs4_callback_recallany(struct cb_recallanyargs *args, void *dummy);
+extern __be32 nfs4_callback_recallany(struct cb_recallanyargs *args,
+					void *dummy,
+					struct cb_process_state *cps);
 
 struct cb_recallslotargs {
 	struct sockaddr	*crsa_addr;
 	uint32_t	crsa_target_max_slots;
 };
-extern unsigned nfs4_callback_recallslot(struct cb_recallslotargs *args,
-					  void *dummy);
+extern __be32 nfs4_callback_recallslot(struct cb_recallslotargs *args,
+					 void *dummy,
+					 struct cb_process_state *cps);
 
 #endif /* CONFIG_NFS_V4_1 */
 
-extern __be32 nfs4_callback_getattr(struct cb_getattrargs *args, struct cb_getattrres *res);
-extern __be32 nfs4_callback_recall(struct cb_recallargs *args, void *dummy);
+extern __be32 nfs4_callback_getattr(struct cb_getattrargs *args,
+				    struct cb_getattrres *res,
+				    struct cb_process_state *cps);
+extern __be32 nfs4_callback_recall(struct cb_recallargs *args, void *dummy,
+				   struct cb_process_state *cps);
 
 #ifdef CONFIG_NFS_V4
 extern int nfs_callback_up(u32 minorversion, struct rpc_xprt *xprt);
diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c
index 2950fca0c61b..b70e46da16fc 100644
--- a/fs/nfs/callback_proc.c
+++ b/fs/nfs/callback_proc.c
@@ -16,26 +16,28 @@
 #ifdef NFS_DEBUG
 #define NFSDBG_FACILITY NFSDBG_CALLBACK
 #endif
- 
-__be32 nfs4_callback_getattr(struct cb_getattrargs *args, struct cb_getattrres *res)
+
+__be32 nfs4_callback_getattr(struct cb_getattrargs *args,
+			     struct cb_getattrres *res,
+			     struct cb_process_state *cps)
 {
-	struct nfs_client *clp;
 	struct nfs_delegation *delegation;
 	struct nfs_inode *nfsi;
 	struct inode *inode;
 
+	res->status = htonl(NFS4ERR_OP_NOT_IN_SESSION);
+	if (!cps->clp) /* Always set for v4.0. Set in cb_sequence for v4.1 */
+		goto out;
+
 	res->bitmap[0] = res->bitmap[1] = 0;
 	res->status = htonl(NFS4ERR_BADHANDLE);
-	clp = nfs_find_client(args->addr, 4);
-	if (clp == NULL)
-		goto out;
 
 	dprintk("NFS: GETATTR callback request from %s\n",
-		rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_ADDR));
+		rpc_peeraddr2str(cps->clp->cl_rpcclient, RPC_DISPLAY_ADDR));
 
-	inode = nfs_delegation_find_inode(clp, &args->fh);
+	inode = nfs_delegation_find_inode(cps->clp, &args->fh);
 	if (inode == NULL)
-		goto out_putclient;
+		goto out;
 	nfsi = NFS_I(inode);
 	rcu_read_lock();
 	delegation = rcu_dereference(nfsi->delegation);
@@ -55,49 +57,41 @@ __be32 nfs4_callback_getattr(struct cb_getattrargs *args, struct cb_getattrres *
 out_iput:
 	rcu_read_unlock();
 	iput(inode);
-out_putclient:
-	nfs_put_client(clp);
 out:
 	dprintk("%s: exit with status = %d\n", __func__, ntohl(res->status));
 	return res->status;
 }
 
-__be32 nfs4_callback_recall(struct cb_recallargs *args, void *dummy)
+__be32 nfs4_callback_recall(struct cb_recallargs *args, void *dummy,
+			    struct cb_process_state *cps)
 {
-	struct nfs_client *clp;
 	struct inode *inode;
 	__be32 res;
 	
-	res = htonl(NFS4ERR_BADHANDLE);
-	clp = nfs_find_client(args->addr, 4);
-	if (clp == NULL)
+	res = htonl(NFS4ERR_OP_NOT_IN_SESSION);
+	if (!cps->clp) /* Always set for v4.0. Set in cb_sequence for v4.1 */
 		goto out;
 
 	dprintk("NFS: RECALL callback request from %s\n",
-		rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_ADDR));
-
-	do {
-		struct nfs_client *prev = clp;
-
-		inode = nfs_delegation_find_inode(clp, &args->fh);
-		if (inode != NULL) {
-			/* Set up a helper thread to actually return the delegation */
-			switch (nfs_async_inode_return_delegation(inode, &args->stateid)) {
-				case 0:
-					res = 0;
-					break;
-				case -ENOENT:
-					if (res != 0)
-						res = htonl(NFS4ERR_BAD_STATEID);
-					break;
-				default:
-					res = htonl(NFS4ERR_RESOURCE);
-			}
-			iput(inode);
-		}
-		clp = nfs_find_client_next(prev);
-		nfs_put_client(prev);
-	} while (clp != NULL);
+		rpc_peeraddr2str(cps->clp->cl_rpcclient, RPC_DISPLAY_ADDR));
+
+	res = htonl(NFS4ERR_BADHANDLE);
+	inode = nfs_delegation_find_inode(cps->clp, &args->fh);
+	if (inode == NULL)
+		goto out;
+	/* Set up a helper thread to actually return the delegation */
+	switch (nfs_async_inode_return_delegation(inode, &args->stateid)) {
+	case 0:
+		res = 0;
+		break;
+	case -ENOENT:
+		if (res != 0)
+			res = htonl(NFS4ERR_BAD_STATEID);
+		break;
+	default:
+		res = htonl(NFS4ERR_RESOURCE);
+	}
+	iput(inode);
 out:
 	dprintk("%s: exit with status = %d\n", __func__, ntohl(res));
 	return res;
@@ -184,42 +178,6 @@ validate_seqid(struct nfs4_slot_table *tbl, struct cb_sequenceargs * args)
 	return htonl(NFS4ERR_SEQ_MISORDERED);
 }
 
-/*
- * Returns a pointer to a held 'struct nfs_client' that matches the server's
- * address, major version number, and session ID.  It is the caller's
- * responsibility to release the returned reference.
- *
- * Returns NULL if there are no connections with sessions, or if no session
- * matches the one of interest.
- */
- static struct nfs_client *find_client_with_session(
-	const struct sockaddr *addr, u32 nfsversion,
-	struct nfs4_sessionid *sessionid)
-{
-	struct nfs_client *clp;
-
-	clp = nfs_find_client(addr, 4);
-	if (clp == NULL)
-		return NULL;
-
-	do {
-		struct nfs_client *prev = clp;
-
-		if (clp->cl_session != NULL) {
-			if (memcmp(clp->cl_session->sess_id.data,
-					sessionid->data,
-					NFS4_MAX_SESSIONID_LEN) == 0) {
-				/* Returns a held reference to clp */
-				return clp;
-			}
-		}
-		clp = nfs_find_client_next(prev);
-		nfs_put_client(prev);
-	} while (clp != NULL);
-
-	return NULL;
-}
-
 /*
  * For each referring call triple, check the session's slot table for
  * a match.  If the slot is in use and the sequence numbers match, the
@@ -276,20 +234,28 @@ out:
 }
 
 __be32 nfs4_callback_sequence(struct cb_sequenceargs *args,
-				struct cb_sequenceres *res)
+			      struct cb_sequenceres *res,
+			      struct cb_process_state *cps)
 {
 	struct nfs_client *clp;
 	int i;
 	__be32 status;
 
+	cps->clp = NULL;
+
 	status = htonl(NFS4ERR_BADSESSION);
-	clp = find_client_with_session(args->csa_addr, 4, &args->csa_sessionid);
+	/* Incoming session must match the callback session */
+	if (memcmp(&args->csa_sessionid, cps->svc_sid, NFS4_MAX_SESSIONID_LEN))
+		goto out;
+
+	clp = nfs4_find_client_sessionid(args->csa_addr,
+					 &args->csa_sessionid, 1);
 	if (clp == NULL)
 		goto out;
 
 	status = validate_seqid(&clp->cl_session->bc_slot_table, args);
 	if (status)
-		goto out_putclient;
+		goto out;
 
 	/*
 	 * Check for pending referring calls.  If a match is found, a
@@ -298,7 +264,7 @@ __be32 nfs4_callback_sequence(struct cb_sequenceargs *args,
 	 */
 	if (referring_call_exists(clp, args->csa_nrclists, args->csa_rclists)) {
 		status = htonl(NFS4ERR_DELAY);
-		goto out_putclient;
+		goto out;
 	}
 
 	memcpy(&res->csr_sessionid, &args->csa_sessionid,
@@ -307,36 +273,36 @@ __be32 nfs4_callback_sequence(struct cb_sequenceargs *args,
 	res->csr_slotid = args->csa_slotid;
 	res->csr_highestslotid = NFS41_BC_MAX_CALLBACKS - 1;
 	res->csr_target_highestslotid = NFS41_BC_MAX_CALLBACKS - 1;
+	cps->clp = clp; /* put in nfs4_callback_compound */
 
-out_putclient:
-	nfs_put_client(clp);
 out:
 	for (i = 0; i < args->csa_nrclists; i++)
 		kfree(args->csa_rclists[i].rcl_refcalls);
 	kfree(args->csa_rclists);
 
-	if (status == htonl(NFS4ERR_RETRY_UNCACHED_REP))
-		res->csr_status = 0;
-	else
+	if (status == htonl(NFS4ERR_RETRY_UNCACHED_REP)) {
+		cps->drc_status = status;
+		status = 0;
+	} else
 		res->csr_status = status;
+
 	dprintk("%s: exit with status = %d res->csr_status %d\n", __func__,
 		ntohl(status), ntohl(res->csr_status));
 	return status;
 }
 
-__be32 nfs4_callback_recallany(struct cb_recallanyargs *args, void *dummy)
+__be32 nfs4_callback_recallany(struct cb_recallanyargs *args, void *dummy,
+			       struct cb_process_state *cps)
 {
-	struct nfs_client *clp;
 	__be32 status;
 	fmode_t flags = 0;
 
 	status = htonl(NFS4ERR_OP_NOT_IN_SESSION);
-	clp = nfs_find_client(args->craa_addr, 4);
-	if (clp == NULL)
+	if (!cps->clp) /* set in cb_sequence */
 		goto out;
 
 	dprintk("NFS: RECALL_ANY callback request from %s\n",
-		rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_ADDR));
+		rpc_peeraddr2str(cps->clp->cl_rpcclient, RPC_DISPLAY_ADDR));
 
 	if (test_bit(RCA4_TYPE_MASK_RDATA_DLG, (const unsigned long *)
 		     &args->craa_type_mask))
@@ -346,7 +312,7 @@ __be32 nfs4_callback_recallany(struct cb_recallanyargs *args, void *dummy)
 		flags |= FMODE_WRITE;
 
 	if (flags)
-		nfs_expire_all_delegation_types(clp, flags);
+		nfs_expire_all_delegation_types(cps->clp, flags);
 	status = htonl(NFS4_OK);
 out:
 	dprintk("%s: exit with status = %d\n", __func__, ntohl(status));
@@ -354,36 +320,33 @@ out:
 }
 
 /* Reduce the fore channel's max_slots to the target value */
-__be32 nfs4_callback_recallslot(struct cb_recallslotargs *args, void *dummy)
+__be32 nfs4_callback_recallslot(struct cb_recallslotargs *args, void *dummy,
+				struct cb_process_state *cps)
 {
-	struct nfs_client *clp;
 	struct nfs4_slot_table *fc_tbl;
 	__be32 status;
 
 	status = htonl(NFS4ERR_OP_NOT_IN_SESSION);
-	clp = nfs_find_client(args->crsa_addr, 4);
-	if (clp == NULL)
+	if (!cps->clp) /* set in cb_sequence */
 		goto out;
 
 	dprintk("NFS: CB_RECALL_SLOT request from %s target max slots %d\n",
-		rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_ADDR),
+		rpc_peeraddr2str(cps->clp->cl_rpcclient, RPC_DISPLAY_ADDR),
 		args->crsa_target_max_slots);
 
-	fc_tbl = &clp->cl_session->fc_slot_table;
+	fc_tbl = &cps->clp->cl_session->fc_slot_table;
 
 	status = htonl(NFS4ERR_BAD_HIGH_SLOT);
 	if (args->crsa_target_max_slots > fc_tbl->max_slots ||
 	    args->crsa_target_max_slots < 1)
-		goto out_putclient;
+		goto out;
 
 	status = htonl(NFS4_OK);
 	if (args->crsa_target_max_slots == fc_tbl->max_slots)
-		goto out_putclient;
+		goto out;
 
 	fc_tbl->target_max_slots = args->crsa_target_max_slots;
-	nfs41_handle_recall_slot(clp);
-out_putclient:
-	nfs_put_client(clp);	/* balance nfs_find_client */
+	nfs41_handle_recall_slot(cps->clp);
 out:
 	dprintk("%s: exit with status = %d\n", __func__, ntohl(status));
 	return status;
diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c
index 05af212f0edf..dbd0d649805c 100644
--- a/fs/nfs/callback_xdr.c
+++ b/fs/nfs/callback_xdr.c
@@ -10,8 +10,10 @@
 #include <linux/nfs4.h>
 #include <linux/nfs_fs.h>
 #include <linux/slab.h>
+#include <linux/sunrpc/bc_xprt.h>
 #include "nfs4_fs.h"
 #include "callback.h"
+#include "internal.h"
 
 #define CB_OP_TAGLEN_MAXSZ	(512)
 #define CB_OP_HDR_RES_MAXSZ	(2 + CB_OP_TAGLEN_MAXSZ)
@@ -33,7 +35,8 @@
 /* Internal error code */
 #define NFS4ERR_RESOURCE_HDR	11050
 
-typedef __be32 (*callback_process_op_t)(void *, void *);
+typedef __be32 (*callback_process_op_t)(void *, void *,
+					struct cb_process_state *);
 typedef __be32 (*callback_decode_arg_t)(struct svc_rqst *, struct xdr_stream *, void *);
 typedef __be32 (*callback_encode_res_t)(struct svc_rqst *, struct xdr_stream *, void *);
 
@@ -160,7 +163,7 @@ static __be32 decode_compound_hdr_arg(struct xdr_stream *xdr, struct cb_compound
 	hdr->minorversion = ntohl(*p++);
 	/* Check minor version is zero or one. */
 	if (hdr->minorversion <= 1) {
-		p++;	/* skip callback_ident */
+		hdr->cb_ident = ntohl(*p++); /* ignored by v4.1 */
 	} else {
 		printk(KERN_WARNING "%s: NFSv4 server callback with "
 			"illegal minor version %u!\n",
@@ -621,7 +624,8 @@ preprocess_nfs4_op(unsigned int op_nr, struct callback_op **op)
 static __be32 process_op(uint32_t minorversion, int nop,
 		struct svc_rqst *rqstp,
 		struct xdr_stream *xdr_in, void *argp,
-		struct xdr_stream *xdr_out, void *resp, int* drc_status)
+		struct xdr_stream *xdr_out, void *resp,
+		struct cb_process_state *cps)
 {
 	struct callback_op *op = &callback_ops[0];
 	unsigned int op_nr;
@@ -644,8 +648,8 @@ static __be32 process_op(uint32_t minorversion, int nop,
 	if (status)
 		goto encode_hdr;
 
-	if (*drc_status) {
-		status = *drc_status;
+	if (cps->drc_status) {
+		status = cps->drc_status;
 		goto encode_hdr;
 	}
 
@@ -653,16 +657,10 @@ static __be32 process_op(uint32_t minorversion, int nop,
 	if (maxlen > 0 && maxlen < PAGE_SIZE) {
 		status = op->decode_args(rqstp, xdr_in, argp);
 		if (likely(status == 0))
-			status = op->process_op(argp, resp);
+			status = op->process_op(argp, resp, cps);
 	} else
 		status = htonl(NFS4ERR_RESOURCE);
 
-	/* Only set by OP_CB_SEQUENCE processing */
-	if (status == htonl(NFS4ERR_RETRY_UNCACHED_REP)) {
-		*drc_status = status;
-		status = 0;
-	}
-
 encode_hdr:
 	res = encode_op_hdr(xdr_out, op_nr, status);
 	if (unlikely(res))
@@ -681,8 +679,11 @@ static __be32 nfs4_callback_compound(struct svc_rqst *rqstp, void *argp, void *r
 	struct cb_compound_hdr_arg hdr_arg = { 0 };
 	struct cb_compound_hdr_res hdr_res = { NULL };
 	struct xdr_stream xdr_in, xdr_out;
-	__be32 *p;
-	__be32 status, drc_status = 0;
+	__be32 *p, status;
+	struct cb_process_state cps = {
+		.drc_status = 0,
+		.clp = NULL,
+	};
 	unsigned int nops = 0;
 
 	dprintk("%s: start\n", __func__);
@@ -696,6 +697,13 @@ static __be32 nfs4_callback_compound(struct svc_rqst *rqstp, void *argp, void *r
 	if (status == __constant_htonl(NFS4ERR_RESOURCE))
 		return rpc_garbage_args;
 
+	if (hdr_arg.minorversion == 0) {
+		cps.clp = nfs4_find_client_ident(hdr_arg.cb_ident);
+		if (!cps.clp)
+			return rpc_drop_reply;
+	} else
+		cps.svc_sid = bc_xprt_sid(rqstp);
+
 	hdr_res.taglen = hdr_arg.taglen;
 	hdr_res.tag = hdr_arg.tag;
 	if (encode_compound_hdr_res(&xdr_out, &hdr_res) != 0)
@@ -703,7 +711,7 @@ static __be32 nfs4_callback_compound(struct svc_rqst *rqstp, void *argp, void *r
 
 	while (status == 0 && nops != hdr_arg.nops) {
 		status = process_op(hdr_arg.minorversion, nops, rqstp,
-				    &xdr_in, argp, &xdr_out, resp, &drc_status);
+				    &xdr_in, argp, &xdr_out, resp, &cps);
 		nops++;
 	}
 
@@ -716,6 +724,7 @@ static __be32 nfs4_callback_compound(struct svc_rqst *rqstp, void *argp, void *r
 
 	*hdr_res.status = status;
 	*hdr_res.nops = htonl(nops);
+	nfs_put_client(cps.clp);
 	dprintk("%s: done, status = %u\n", __func__, ntohl(status));
 	return rpc_success;
 }
diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index bc3a8620e8c3..11eb9934c747 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -410,70 +410,28 @@ static int nfs_sockaddr_cmp(const struct sockaddr *sa1,
 	return 0;
 }
 
-/*
- * Find a client by IP address and protocol version
- * - returns NULL if no such client
- */
-struct nfs_client *nfs_find_client(const struct sockaddr *addr, u32 nfsversion)
+/* Common match routine for v4.0 and v4.1 callback services */
+bool
+nfs4_cb_match_client(const struct sockaddr *addr, struct nfs_client *clp,
+		     u32 minorversion)
 {
-	struct nfs_client *clp;
+	struct sockaddr *clap = (struct sockaddr *)&clp->cl_addr;
 
-	spin_lock(&nfs_client_lock);
-	list_for_each_entry(clp, &nfs_client_list, cl_share_link) {
-		struct sockaddr *clap = (struct sockaddr *)&clp->cl_addr;
+	/* Don't match clients that failed to initialise */
+	if (!(clp->cl_cons_state == NFS_CS_READY ||
+	    clp->cl_cons_state == NFS_CS_SESSION_INITING))
+		return false;
 
-		/* Don't match clients that failed to initialise properly */
-		if (!(clp->cl_cons_state == NFS_CS_READY ||
-		      clp->cl_cons_state == NFS_CS_SESSION_INITING))
-			continue;
+	/* Match the version and minorversion */
+	if (clp->rpc_ops->version != 4 ||
+	    clp->cl_minorversion != minorversion)
+		return false;
 
-		/* Different NFS versions cannot share the same nfs_client */
-		if (clp->rpc_ops->version != nfsversion)
-			continue;
-
-		/* Match only the IP address, not the port number */
-		if (!nfs_sockaddr_match_ipaddr(addr, clap))
-			continue;
+	/* Match only the IP address, not the port number */
+	if (!nfs_sockaddr_match_ipaddr(addr, clap))
+		return false;
 
-		atomic_inc(&clp->cl_count);
-		spin_unlock(&nfs_client_lock);
-		return clp;
-	}
-	spin_unlock(&nfs_client_lock);
-	return NULL;
-}
-
-/*
- * Find a client by IP address and protocol version
- * - returns NULL if no such client
- */
-struct nfs_client *nfs_find_client_next(struct nfs_client *clp)
-{
-	struct sockaddr *sap = (struct sockaddr *)&clp->cl_addr;
-	u32 nfsvers = clp->rpc_ops->version;
-
-	spin_lock(&nfs_client_lock);
-	list_for_each_entry_continue(clp, &nfs_client_list, cl_share_link) {
-		struct sockaddr *clap = (struct sockaddr *)&clp->cl_addr;
-
-		/* Don't match clients that failed to initialise properly */
-		if (clp->cl_cons_state != NFS_CS_READY)
-			continue;
-
-		/* Different NFS versions cannot share the same nfs_client */
-		if (clp->rpc_ops->version != nfsvers)
-			continue;
-
-		/* Match only the IP address, not the port number */
-		if (!nfs_sockaddr_match_ipaddr(sap, clap))
-			continue;
-
-		atomic_inc(&clp->cl_count);
-		spin_unlock(&nfs_client_lock);
-		return clp;
-	}
-	spin_unlock(&nfs_client_lock);
-	return NULL;
+	return true;
 }
 
 /*
@@ -1171,6 +1129,101 @@ error:
 }
 
 #ifdef CONFIG_NFS_V4
+/*
+ * NFSv4.0 callback thread helper
+ *
+ * Find a client by IP address, protocol version, and minorversion
+ *
+ * Called from the pg_authenticate method. The callback identifier
+ * is not used as it has not been decoded.
+ *
+ * Returns NULL if no such client
+ */
+struct nfs_client *
+nfs4_find_client_no_ident(const struct sockaddr *addr)
+{
+	struct nfs_client *clp;
+
+	spin_lock(&nfs_client_lock);
+	list_for_each_entry(clp, &nfs_client_list, cl_share_link) {
+		if (nfs4_cb_match_client(addr, clp, 0) == false)
+			continue;
+		atomic_inc(&clp->cl_count);
+		spin_unlock(&nfs_client_lock);
+		return clp;
+	}
+	spin_unlock(&nfs_client_lock);
+	return NULL;
+}
+
+/*
+ * NFSv4.0 callback thread helper
+ *
+ * Find a client by callback identifier
+ */
+struct nfs_client *
+nfs4_find_client_ident(int cb_ident)
+{
+	struct nfs_client *clp;
+
+	spin_lock(&nfs_client_lock);
+	clp = idr_find(&cb_ident_idr, cb_ident);
+	if (clp)
+		atomic_inc(&clp->cl_count);
+	spin_unlock(&nfs_client_lock);
+	return clp;
+}
+
+#if defined(CONFIG_NFS_V4_1)
+/*
+ * NFSv4.1 callback thread helper
+ * For CB_COMPOUND calls, find a client by IP address, protocol version,
+ * minorversion, and sessionID
+ *
+ * CREATE_SESSION triggers a CB_NULL ping from servers. The callback service
+ * sessionid can only be set after the CREATE_SESSION return, so a CB_NULL
+ * can arrive before the callback sessionid is set. For CB_NULL calls,
+ * find a client by IP address protocol version, and minorversion.
+ *
+ * Returns NULL if no such client
+ */
+struct nfs_client *
+nfs4_find_client_sessionid(const struct sockaddr *addr,
+			   struct nfs4_sessionid *sid, int is_cb_compound)
+{
+	struct nfs_client *clp;
+
+	spin_lock(&nfs_client_lock);
+	list_for_each_entry(clp, &nfs_client_list, cl_share_link) {
+		if (nfs4_cb_match_client(addr, clp, 1) == false)
+			continue;
+
+		if (!nfs4_has_session(clp))
+			continue;
+
+		/* Match sessionid unless cb_null call*/
+		if (is_cb_compound && (memcmp(clp->cl_session->sess_id.data,
+		    sid->data, NFS4_MAX_SESSIONID_LEN) != 0))
+			continue;
+
+		atomic_inc(&clp->cl_count);
+		spin_unlock(&nfs_client_lock);
+		return clp;
+	}
+	spin_unlock(&nfs_client_lock);
+	return NULL;
+}
+
+#else /* CONFIG_NFS_V4_1 */
+
+struct nfs_client *
+nfs4_find_client_sessionid(const struct sockaddr *addr,
+			   struct nfs4_sessionid *sid, int is_cb_compound)
+{
+	return NULL;
+}
+#endif /* CONFIG_NFS_V4_1 */
+
 /*
  * Initialize the NFS4 callback service
  */
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index 7c803c916574..bfa3a34af801 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -130,8 +130,11 @@ extern struct rpc_program nfs_program;
 
 extern void nfs_cleanup_cb_ident_idr(void);
 extern void nfs_put_client(struct nfs_client *);
-extern struct nfs_client *nfs_find_client(const struct sockaddr *, u32);
-extern struct nfs_client *nfs_find_client_next(struct nfs_client *);
+extern struct nfs_client *nfs4_find_client_no_ident(const struct sockaddr *);
+extern struct nfs_client *nfs4_find_client_ident(int);
+extern struct nfs_client *
+nfs4_find_client_sessionid(const struct sockaddr *, struct nfs4_sessionid *,
+			   int);
 extern struct nfs_server *nfs_create_server(
 					const struct nfs_parsed_mount_data *,
 					struct nfs_fh *);
diff --git a/include/linux/sunrpc/bc_xprt.h b/include/linux/sunrpc/bc_xprt.h
index 7c91260c44a9..2c60e094fdec 100644
--- a/include/linux/sunrpc/bc_xprt.h
+++ b/include/linux/sunrpc/bc_xprt.h
@@ -47,6 +47,14 @@ static inline int svc_is_backchannel(const struct svc_rqst *rqstp)
 		return 1;
 	return 0;
 }
+static inline struct nfs4_sessionid *bc_xprt_sid(struct svc_rqst *rqstp)
+{
+	if (svc_is_backchannel(rqstp))
+		return (struct nfs4_sessionid *)
+					rqstp->rq_server->bc_xprt->xpt_bc_sid;
+	return NULL;
+}
+
 #else /* CONFIG_NFS_V4_1 */
 static inline int xprt_setup_backchannel(struct rpc_xprt *xprt,
 					 unsigned int min_reqs)
@@ -59,6 +67,11 @@ static inline int svc_is_backchannel(const struct svc_rqst *rqstp)
 	return 0;
 }
 
+static inline struct nfs4_sessionid *bc_xprt_sid(struct svc_rqst *rqstp)
+{
+	return NULL;
+}
+
 static inline void xprt_free_bc_request(struct rpc_rqst *req)
 {
 }
-- 
cgit v1.2.3-71-gd317


From 42acd021824578fa0eeb6eb58d457c23ec5dc9c0 Mon Sep 17 00:00:00 2001
From: Andy Adamson <andros@netapp.com>
Date: Thu, 6 Jan 2011 02:04:34 +0000
Subject: NFS add session back channel draining

Currently session draining only drains the fore channel.
The back channel processing must also be drained.

Use the back channel highest_slot_used to indicate that a callback is being
processed by the callback thread.  Move the session complete to be per channel.

When the session is draininig, wait for any current back channel processing
to complete and stop all new back channel processing by returning NFS4ERR_DELAY
to the back channel client.

Drain the back channel, then the fore channel.

Signed-off-by: Andy Adamson <andros@netapp.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/callback.h         |  3 ++-
 fs/nfs/callback_proc.c    |  7 +++++++
 fs/nfs/callback_xdr.c     | 35 +++++++++++++++++++++++++++++++++++
 fs/nfs/nfs4proc.c         | 26 +++++++++++++++++++-------
 fs/nfs/nfs4state.c        | 29 ++++++++++++++++++++++-------
 include/linux/nfs_fs_sb.h |  2 +-
 6 files changed, 86 insertions(+), 16 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfs/callback.h b/fs/nfs/callback.h
index 25e8802a51d1..b678e3e15bd9 100644
--- a/fs/nfs/callback.h
+++ b/fs/nfs/callback.h
@@ -138,6 +138,8 @@ extern __be32 nfs4_callback_recallslot(struct cb_recallslotargs *args,
 					 void *dummy,
 					 struct cb_process_state *cps);
 
+extern void nfs4_check_drain_bc_complete(struct nfs4_session *ses);
+extern void nfs4_cb_take_slot(struct nfs_client *clp);
 #endif /* CONFIG_NFS_V4_1 */
 
 extern __be32 nfs4_callback_getattr(struct cb_getattrargs *args,
@@ -145,7 +147,6 @@ extern __be32 nfs4_callback_getattr(struct cb_getattrargs *args,
 				    struct cb_process_state *cps);
 extern __be32 nfs4_callback_recall(struct cb_recallargs *args, void *dummy,
 				   struct cb_process_state *cps);
-
 #ifdef CONFIG_NFS_V4
 extern int nfs_callback_up(u32 minorversion, struct rpc_xprt *xprt);
 extern void nfs_callback_down(int minorversion);
diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c
index b70e46da16fc..c1bead2f3e04 100644
--- a/fs/nfs/callback_proc.c
+++ b/fs/nfs/callback_proc.c
@@ -253,6 +253,12 @@ __be32 nfs4_callback_sequence(struct cb_sequenceargs *args,
 	if (clp == NULL)
 		goto out;
 
+	/* state manager is resetting the session */
+	if (test_bit(NFS4_SESSION_DRAINING, &clp->cl_session->session_state)) {
+		status = NFS4ERR_DELAY;
+		goto out;
+	}
+
 	status = validate_seqid(&clp->cl_session->bc_slot_table, args);
 	if (status)
 		goto out;
@@ -273,6 +279,7 @@ __be32 nfs4_callback_sequence(struct cb_sequenceargs *args,
 	res->csr_slotid = args->csa_slotid;
 	res->csr_highestslotid = NFS41_BC_MAX_CALLBACKS - 1;
 	res->csr_target_highestslotid = NFS41_BC_MAX_CALLBACKS - 1;
+	nfs4_cb_take_slot(clp);
 	cps->clp = clp; /* put in nfs4_callback_compound */
 
 out:
diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c
index dbd0d649805c..7a2d6c5864ca 100644
--- a/fs/nfs/callback_xdr.c
+++ b/fs/nfs/callback_xdr.c
@@ -596,6 +596,37 @@ preprocess_nfs41_op(int nop, unsigned int op_nr, struct callback_op **op)
 	return htonl(NFS_OK);
 }
 
+static void nfs4_callback_free_slot(struct nfs4_session *session)
+{
+	struct nfs4_slot_table *tbl = &session->bc_slot_table;
+
+	spin_lock(&tbl->slot_tbl_lock);
+	/*
+	 * Let the state manager know callback processing done.
+	 * A single slot, so highest used slotid is either 0 or -1
+	 */
+	tbl->highest_used_slotid--;
+	nfs4_check_drain_bc_complete(session);
+	spin_unlock(&tbl->slot_tbl_lock);
+}
+
+static void nfs4_cb_free_slot(struct nfs_client *clp)
+{
+	if (clp && clp->cl_session)
+		nfs4_callback_free_slot(clp->cl_session);
+}
+
+/* A single slot, so highest used slotid is either 0 or -1 */
+void nfs4_cb_take_slot(struct nfs_client *clp)
+{
+	struct nfs4_slot_table *tbl = &clp->cl_session->bc_slot_table;
+
+	spin_lock(&tbl->slot_tbl_lock);
+	tbl->highest_used_slotid++;
+	BUG_ON(tbl->highest_used_slotid != 0);
+	spin_unlock(&tbl->slot_tbl_lock);
+}
+
 #else /* CONFIG_NFS_V4_1 */
 
 static __be32
@@ -604,6 +635,9 @@ preprocess_nfs41_op(int nop, unsigned int op_nr, struct callback_op **op)
 	return htonl(NFS4ERR_MINOR_VERS_MISMATCH);
 }
 
+static void nfs4_cb_free_slot(struct nfs_client *clp)
+{
+}
 #endif /* CONFIG_NFS_V4_1 */
 
 static __be32
@@ -724,6 +758,7 @@ static __be32 nfs4_callback_compound(struct svc_rqst *rqstp, void *argp, void *r
 
 	*hdr_res.status = status;
 	*hdr_res.nops = htonl(nops);
+	nfs4_cb_free_slot(cps.clp);
 	nfs_put_client(cps.clp);
 	dprintk("%s: done, status = %u\n", __func__, ntohl(status));
 	return rpc_success;
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index e165c53db08f..18a4d5a9a4e9 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -356,9 +356,9 @@ nfs4_free_slot(struct nfs4_slot_table *tbl, struct nfs4_slot *free_slot)
 }
 
 /*
- * Signal state manager thread if session is drained
+ * Signal state manager thread if session fore channel is drained
  */
-static void nfs41_check_drain_session_complete(struct nfs4_session *ses)
+static void nfs4_check_drain_fc_complete(struct nfs4_session *ses)
 {
 	struct rpc_task *task;
 
@@ -372,8 +372,20 @@ static void nfs41_check_drain_session_complete(struct nfs4_session *ses)
 	if (ses->fc_slot_table.highest_used_slotid != -1)
 		return;
 
-	dprintk("%s COMPLETE: Session Drained\n", __func__);
-	complete(&ses->complete);
+	dprintk("%s COMPLETE: Session Fore Channel Drained\n", __func__);
+	complete(&ses->fc_slot_table.complete);
+}
+
+/*
+ * Signal state manager thread if session back channel is drained
+ */
+void nfs4_check_drain_bc_complete(struct nfs4_session *ses)
+{
+	if (!test_bit(NFS4_SESSION_DRAINING, &ses->session_state) ||
+	    ses->bc_slot_table.highest_used_slotid != -1)
+		return;
+	dprintk("%s COMPLETE: Session Back Channel Drained\n", __func__);
+	complete(&ses->bc_slot_table.complete);
 }
 
 static void nfs41_sequence_free_slot(struct nfs4_sequence_res *res)
@@ -390,7 +402,7 @@ static void nfs41_sequence_free_slot(struct nfs4_sequence_res *res)
 
 	spin_lock(&tbl->slot_tbl_lock);
 	nfs4_free_slot(tbl, res->sr_slot);
-	nfs41_check_drain_session_complete(res->sr_session);
+	nfs4_check_drain_fc_complete(res->sr_session);
 	spin_unlock(&tbl->slot_tbl_lock);
 	res->sr_slot = NULL;
 }
@@ -4777,17 +4789,17 @@ struct nfs4_session *nfs4_alloc_session(struct nfs_client *clp)
 	if (!session)
 		return NULL;
 
-	init_completion(&session->complete);
-
 	tbl = &session->fc_slot_table;
 	tbl->highest_used_slotid = -1;
 	spin_lock_init(&tbl->slot_tbl_lock);
 	rpc_init_priority_wait_queue(&tbl->slot_tbl_waitq, "ForeChannel Slot table");
+	init_completion(&tbl->complete);
 
 	tbl = &session->bc_slot_table;
 	tbl->highest_used_slotid = -1;
 	spin_lock_init(&tbl->slot_tbl_lock);
 	rpc_init_wait_queue(&tbl->slot_tbl_waitq, "BackChannel Slot table");
+	init_completion(&tbl->complete);
 
 	session->session_state = 1<<NFS4_SESSION_INITING;
 
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index 485e95e8fd62..6891dedd80f1 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -142,6 +142,11 @@ static int nfs41_setup_state_renewal(struct nfs_client *clp)
 	return status;
 }
 
+/*
+ * Back channel returns NFS4ERR_DELAY for new requests when
+ * NFS4_SESSION_DRAINING is set so there is no work to be done when draining
+ * is ended.
+ */
 static void nfs4_end_drain_session(struct nfs_client *clp)
 {
 	struct nfs4_session *ses = clp->cl_session;
@@ -165,22 +170,32 @@ static void nfs4_end_drain_session(struct nfs_client *clp)
 	}
 }
 
-static int nfs4_begin_drain_session(struct nfs_client *clp)
+static int nfs4_wait_on_slot_tbl(struct nfs4_slot_table *tbl)
 {
-	struct nfs4_session *ses = clp->cl_session;
-	struct nfs4_slot_table *tbl = &ses->fc_slot_table;
-
 	spin_lock(&tbl->slot_tbl_lock);
-	set_bit(NFS4_SESSION_DRAINING, &ses->session_state);
 	if (tbl->highest_used_slotid != -1) {
-		INIT_COMPLETION(ses->complete);
+		INIT_COMPLETION(tbl->complete);
 		spin_unlock(&tbl->slot_tbl_lock);
-		return wait_for_completion_interruptible(&ses->complete);
+		return wait_for_completion_interruptible(&tbl->complete);
 	}
 	spin_unlock(&tbl->slot_tbl_lock);
 	return 0;
 }
 
+static int nfs4_begin_drain_session(struct nfs_client *clp)
+{
+	struct nfs4_session *ses = clp->cl_session;
+	int ret = 0;
+
+	set_bit(NFS4_SESSION_DRAINING, &ses->session_state);
+	/* back channel */
+	ret = nfs4_wait_on_slot_tbl(&ses->bc_slot_table);
+	if (ret)
+		return ret;
+	/* fore channel */
+	return nfs4_wait_on_slot_tbl(&ses->fc_slot_table);
+}
+
 int nfs41_init_clientid(struct nfs_client *clp, struct rpc_cred *cred)
 {
 	int status;
diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h
index 1eaa054a2c7d..e93ada0565fc 100644
--- a/include/linux/nfs_fs_sb.h
+++ b/include/linux/nfs_fs_sb.h
@@ -197,6 +197,7 @@ struct nfs4_slot_table {
 						 * op for dynamic resizing */
 	int		target_max_slots;	/* Set by CB_RECALL_SLOT as
 						 * the new max_slots */
+	struct completion complete;
 };
 
 static inline int slot_idx(struct nfs4_slot_table *tbl, struct nfs4_slot *sp)
@@ -213,7 +214,6 @@ struct nfs4_session {
 	unsigned long			session_state;
 	u32				hash_alg;
 	u32				ssv_len;
-	struct completion		complete;
 
 	/* The fore and back channel */
 	struct nfs4_channel_attrs	fc_attrs;
-- 
cgit v1.2.3-71-gd317


From 4a19de0f4b693139bb10b7cc3cfe1f618576ba67 Mon Sep 17 00:00:00 2001
From: Andy Adamson <andros@netapp.com>
Date: Thu, 6 Jan 2011 02:04:35 +0000
Subject: NFS rename client back channel transport field

Differentiate from server backchannel

Signed-off-by: Andy Adamson <andros@netapp.com>
Acked-by: Bruce Fields <bfields@redhat.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/callback.c              | 12 ++++++------
 include/linux/sunrpc/bc_xprt.h |  4 ++--
 include/linux/sunrpc/svc.h     |  2 +-
 net/sunrpc/svc.c               |  2 +-
 net/sunrpc/svcsock.c           |  2 +-
 5 files changed, 11 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c
index 753a9e315518..199016528fcb 100644
--- a/fs/nfs/callback.c
+++ b/fs/nfs/callback.c
@@ -142,7 +142,7 @@ int nfs4_set_callback_sessionid(struct nfs_client *clp)
 	struct svc_serv *serv = clp->cl_rpcclient->cl_xprt->bc_serv;
 	struct nfs4_sessionid *bc_sid;
 
-	if (!serv->bc_xprt)
+	if (!serv->sv_bc_xprt)
 		return -EINVAL;
 
 	/* on success freed in xprt_free */
@@ -152,12 +152,12 @@ int nfs4_set_callback_sessionid(struct nfs_client *clp)
 	memcpy(bc_sid->data, &clp->cl_session->sess_id.data,
 		NFS4_MAX_SESSIONID_LEN);
 	spin_lock_bh(&serv->sv_cb_lock);
-	serv->bc_xprt->xpt_bc_sid = bc_sid;
+	serv->sv_bc_xprt->xpt_bc_sid = bc_sid;
 	spin_unlock_bh(&serv->sv_cb_lock);
-	dprintk("%s set xpt_bc_sid=%u:%u:%u:%u for bc_xprt %p\n", __func__,
+	dprintk("%s set xpt_bc_sid=%u:%u:%u:%u for sv_bc_xprt %p\n", __func__,
 		((u32 *)bc_sid->data)[0], ((u32 *)bc_sid->data)[1],
 		((u32 *)bc_sid->data)[2], ((u32 *)bc_sid->data)[3],
-		serv->bc_xprt);
+		serv->sv_bc_xprt);
 	return 0;
 }
 
@@ -228,8 +228,8 @@ nfs41_callback_up(struct svc_serv *serv, struct rpc_xprt *xprt)
 	init_waitqueue_head(&serv->sv_cb_waitq);
 	rqstp = svc_prepare_thread(serv, &serv->sv_pools[0]);
 	if (IS_ERR(rqstp)) {
-		svc_xprt_put(serv->bc_xprt);
-		serv->bc_xprt = NULL;
+		svc_xprt_put(serv->sv_bc_xprt);
+		serv->sv_bc_xprt = NULL;
 	}
 out:
 	dprintk("--> %s return %ld\n", __func__,
diff --git a/include/linux/sunrpc/bc_xprt.h b/include/linux/sunrpc/bc_xprt.h
index 2c60e094fdec..c50b458b8a3f 100644
--- a/include/linux/sunrpc/bc_xprt.h
+++ b/include/linux/sunrpc/bc_xprt.h
@@ -43,7 +43,7 @@ int bc_send(struct rpc_rqst *req);
  */
 static inline int svc_is_backchannel(const struct svc_rqst *rqstp)
 {
-	if (rqstp->rq_server->bc_xprt)
+	if (rqstp->rq_server->sv_bc_xprt)
 		return 1;
 	return 0;
 }
@@ -51,7 +51,7 @@ static inline struct nfs4_sessionid *bc_xprt_sid(struct svc_rqst *rqstp)
 {
 	if (svc_is_backchannel(rqstp))
 		return (struct nfs4_sessionid *)
-					rqstp->rq_server->bc_xprt->xpt_bc_sid;
+			rqstp->rq_server->sv_bc_xprt->xpt_bc_sid;
 	return NULL;
 }
 
diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
index 5a3085b9b394..c81d4d8be3a9 100644
--- a/include/linux/sunrpc/svc.h
+++ b/include/linux/sunrpc/svc.h
@@ -99,7 +99,7 @@ struct svc_serv {
 	spinlock_t		sv_cb_lock;	/* protects the svc_cb_list */
 	wait_queue_head_t	sv_cb_waitq;	/* sleep here if there are no
 						 * entries in the svc_cb_list */
-	struct svc_xprt		*bc_xprt;
+	struct svc_xprt		*sv_bc_xprt;	/* callback on fore channel */
 #endif /* CONFIG_NFS_V4_1 */
 };
 
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index 261e2d1dff10..0e659c665a8d 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -1262,7 +1262,7 @@ bc_svc_process(struct svc_serv *serv, struct rpc_rqst *req,
 	struct kvec	*resv = &rqstp->rq_res.head[0];
 
 	/* Build the svc_rqst used by the common processing routine */
-	rqstp->rq_xprt = serv->bc_xprt;
+	rqstp->rq_xprt = serv->sv_bc_xprt;
 	rqstp->rq_xid = req->rq_xid;
 	rqstp->rq_prot = req->rq_xprt->prot;
 	rqstp->rq_server = serv;
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index db3013e4aa04..d265aa700bb3 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -1595,7 +1595,7 @@ static struct svc_xprt *svc_bc_create_socket(struct svc_serv *serv,
 	xprt = &svsk->sk_xprt;
 	svc_xprt_init(&svc_tcp_bc_class, xprt, serv);
 
-	serv->bc_xprt = xprt;
+	serv->sv_bc_xprt = xprt;
 
 	return xprt;
 }
-- 
cgit v1.2.3-71-gd317


From daaa82d1c72e10dc16cad3a810e225f9188dc7aa Mon Sep 17 00:00:00 2001
From: Fred Isaman <iisaman@netapp.com>
Date: Thu, 6 Jan 2011 11:36:19 +0000
Subject: pnfs: remove unnecessary field lgp->status

Signed-off-by: Fred Isaman <iisaman@netapp.com>
Signed-off-by: Benny Halevy <bhalevy@panasas.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/nfs4proc.c       | 3 +--
 include/linux/nfs_xdr.h | 1 -
 2 files changed, 1 insertion(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 18a4d5a9a4e9..28e175e74de2 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -5326,7 +5326,6 @@ static void nfs4_layoutget_done(struct rpc_task *task, void *calldata)
 			return;
 		}
 	}
-	lgp->status = task->tk_status;
 	dprintk("<-- %s\n", __func__);
 }
 
@@ -5382,7 +5381,7 @@ int nfs4_proc_layoutget(struct nfs4_layoutget *lgp)
 	status = nfs4_wait_for_completion_rpc_task(task);
 	if (status != 0)
 		goto out;
-	status = lgp->status;
+	status = task->tk_status;
 	if (status != 0)
 		goto out;
 	status = pnfs_layout_process(lgp);
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 236e7e4b99a0..8fcc54267bba 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -223,7 +223,6 @@ struct nfs4_layoutget {
 	struct nfs4_layoutget_args args;
 	struct nfs4_layoutget_res res;
 	struct pnfs_layout_segment **lsegpp;
-	int status;
 };
 
 struct nfs4_getdeviceinfo_args {
-- 
cgit v1.2.3-71-gd317


From cf7d63f1f9895713551df2e6d18b006f8af26e91 Mon Sep 17 00:00:00 2001
From: Fred Isaman <iisaman@netapp.com>
Date: Thu, 6 Jan 2011 11:36:25 +0000
Subject: pnfs: serialize LAYOUTGET(openstateid)

We shouldn't send a LAYOUTGET(openstateid) unless all outstanding RPCs
using the previous stateid are completed.  This requires choosing the
stateid to encode earlier, so we can abort if one is not available (we
want to use the open stateid, but a LAYOUTGET is already out using
it), and adding a count of the number of outstanding rpc calls using
layout state (which for now consist solely of LAYOUTGETs).

Signed-off-by: Fred Isaman <iisaman@netapp.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/nfs4proc.c       |  7 ++++++-
 fs/nfs/nfs4xdr.c        |  5 +----
 fs/nfs/pnfs.c           | 24 +++++++++++++++++++-----
 fs/nfs/pnfs.h           |  1 +
 include/linux/nfs_xdr.h |  1 +
 5 files changed, 28 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 5bee453d36d6..a3549ce72ab2 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -5304,6 +5304,12 @@ nfs4_layoutget_prepare(struct rpc_task *task, void *calldata)
 	if (nfs4_setup_sequence(server, &lgp->args.seq_args,
 				&lgp->res.seq_res, 0, task))
 		return;
+	if (pnfs_choose_layoutget_stateid(&lgp->args.stateid,
+					  NFS_I(lgp->args.inode)->layout,
+					  lgp->args.ctx->state)) {
+		rpc_exit(task, NFS4_OK);
+		return;
+	}
 	rpc_call_start(task);
 }
 
@@ -5338,7 +5344,6 @@ static void nfs4_layoutget_release(void *calldata)
 	struct nfs4_layoutget *lgp = calldata;
 
 	dprintk("--> %s\n", __func__);
-	put_layout_hdr(lgp->args.inode);
 	if (lgp->res.layout.buf != NULL)
 		free_page((unsigned long) lgp->res.layout.buf);
 	put_nfs_open_context(lgp->args.ctx);
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 4e28242360d6..3cbdd0c80a2d 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -1787,7 +1787,6 @@ encode_layoutget(struct xdr_stream *xdr,
 		      const struct nfs4_layoutget_args *args,
 		      struct compound_hdr *hdr)
 {
-	nfs4_stateid stateid;
 	__be32 *p;
 
 	p = reserve_space(xdr, 44 + NFS4_STATEID_SIZE);
@@ -1798,9 +1797,7 @@ encode_layoutget(struct xdr_stream *xdr,
 	p = xdr_encode_hyper(p, args->range.offset);
 	p = xdr_encode_hyper(p, args->range.length);
 	p = xdr_encode_hyper(p, args->minlength);
-	pnfs_choose_layoutget_stateid(&stateid, NFS_I(args->inode)->layout,
-				args->ctx->state);
-	p = xdr_encode_opaque_fixed(p, &stateid.data, NFS4_STATEID_SIZE);
+	p = xdr_encode_opaque_fixed(p, &args->stateid.data, NFS4_STATEID_SIZE);
 	*p = cpu_to_be32(args->maxcount);
 
 	dprintk("%s: 1st type:0x%x iomode:%d off:%lu len:%lu mc:%d\n",
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index 212cbc22c59d..59ed68bf79fa 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -371,6 +371,14 @@ pnfs_set_layout_stateid(struct pnfs_layout_hdr *lo,
 		memcpy(&lo->plh_stateid, &new->stateid, sizeof(new->stateid));
 }
 
+/* lget is set to 1 if called from inside send_layoutget call chain */
+static bool
+pnfs_layoutgets_blocked(struct pnfs_layout_hdr *lo, int lget)
+{
+	return (list_empty(&lo->plh_segs) &&
+		 (atomic_read(&lo->plh_outstanding) > lget));
+}
+
 int
 pnfs_choose_layoutget_stateid(nfs4_stateid *dst, struct pnfs_layout_hdr *lo,
 			      struct nfs4_state *open_state)
@@ -379,7 +387,9 @@ pnfs_choose_layoutget_stateid(nfs4_stateid *dst, struct pnfs_layout_hdr *lo,
 
 	dprintk("--> %s\n", __func__);
 	spin_lock(&lo->plh_inode->i_lock);
-	if (list_empty(&lo->plh_segs)) {
+	if (pnfs_layoutgets_blocked(lo, 1)) {
+		status = -EAGAIN;
+	} else if (list_empty(&lo->plh_segs)) {
 		int seq;
 
 		do {
@@ -414,10 +424,8 @@ send_layoutget(struct pnfs_layout_hdr *lo,
 
 	BUG_ON(ctx == NULL);
 	lgp = kzalloc(sizeof(*lgp), GFP_KERNEL);
-	if (lgp == NULL) {
-		put_layout_hdr(lo->plh_inode);
+	if (lgp == NULL)
 		return NULL;
-	}
 	lgp->args.minlength = NFS4_MAX_UINT64;
 	lgp->args.maxcount = PNFS_LAYOUT_MAXSIZE;
 	lgp->args.range.iomode = iomode;
@@ -613,10 +621,16 @@ pnfs_update_layout(struct inode *ino,
 	if (test_bit(lo_fail_bit(iomode), &nfsi->layout->plh_flags))
 		goto out_unlock;
 
-	get_layout_hdr_locked(lo); /* Matched in nfs4_layoutget_release */
+	if (pnfs_layoutgets_blocked(lo, 0))
+		goto out_unlock;
+	atomic_inc(&lo->plh_outstanding);
+
+	get_layout_hdr_locked(lo);
 	spin_unlock(&ino->i_lock);
 
 	lseg = send_layoutget(lo, ctx, iomode);
+	atomic_dec(&lo->plh_outstanding);
+	put_layout_hdr(ino);
 out:
 	dprintk("%s end, state 0x%lx lseg %p\n", __func__,
 		nfsi->layout->plh_flags, lseg);
diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
index 787253e6fca3..698380da24cc 100644
--- a/fs/nfs/pnfs.h
+++ b/fs/nfs/pnfs.h
@@ -69,6 +69,7 @@ struct pnfs_layout_hdr {
 	struct list_head	plh_layouts;   /* other client layouts */
 	struct list_head	plh_segs;      /* layout segments list */
 	nfs4_stateid		plh_stateid;
+	atomic_t		plh_outstanding; /* number of RPCs out */
 	unsigned long		plh_flags;
 	struct inode		*plh_inode;
 };
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 8fcc54267bba..83d36d3a12e6 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -208,6 +208,7 @@ struct nfs4_layoutget_args {
 	struct inode *inode;
 	struct nfs_open_context *ctx;
 	struct nfs4_sequence_args seq_args;
+	nfs4_stateid stateid;
 };
 
 struct nfs4_layoutget_res {
-- 
cgit v1.2.3-71-gd317


From f7e8917a67980924651a9e244510e63ef05c7755 Mon Sep 17 00:00:00 2001
From: Fred Isaman <iisaman@netapp.com>
Date: Thu, 6 Jan 2011 11:36:32 +0000
Subject: pnfs: layout roc code

A layout can request return-on-close.  How this interacts with the
forgetful model of never sending LAYOUTRETURNS is a bit ambiguous.
We forget any layouts marked roc, and wait for them to be completely
forgotten before continuing with the close.  In addition, to compensate
for races with any inflight LAYOUTGETs, and the fact that we do not get
any layout stateid back from the server, we set the barrier to the worst
case scenario of current_seqid + number of outstanding LAYOUTGETS.

Signed-off-by: Fred Isaman <iisaman@netapp.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/client.c           | 12 +++++++
 fs/nfs/nfs4_fs.h          |  2 +-
 fs/nfs/nfs4proc.c         | 21 ++++++++++--
 fs/nfs/nfs4state.c        |  7 ++--
 fs/nfs/pnfs.c             | 86 ++++++++++++++++++++++++++++++++++++++++++++++-
 fs/nfs/pnfs.h             | 29 ++++++++++++++++
 include/linux/nfs_fs_sb.h |  1 +
 7 files changed, 152 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index 11eb9934c747..684b67771199 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -244,6 +244,11 @@ static void nfs_cb_idr_remove_locked(struct nfs_client *clp)
 		idr_remove(&cb_ident_idr, clp->cl_cb_ident);
 }
 
+static void pnfs_init_server(struct nfs_server *server)
+{
+	rpc_init_wait_queue(&server->roc_rpcwaitq, "pNFS ROC");
+}
+
 #else
 static void nfs4_shutdown_client(struct nfs_client *clp)
 {
@@ -256,6 +261,11 @@ void nfs_cleanup_cb_ident_idr(void)
 static void nfs_cb_idr_remove_locked(struct nfs_client *clp)
 {
 }
+
+static void pnfs_init_server(struct nfs_server *server)
+{
+}
+
 #endif /* CONFIG_NFS_V4 */
 
 /*
@@ -1024,6 +1034,8 @@ static struct nfs_server *nfs_alloc_server(void)
 		return NULL;
 	}
 
+	pnfs_init_server(server);
+
 	return server;
 }
 
diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
index 8f169dc789db..18d64cb5985b 100644
--- a/fs/nfs/nfs4_fs.h
+++ b/fs/nfs/nfs4_fs.h
@@ -236,7 +236,7 @@ extern int nfs4_proc_async_renew(struct nfs_client *, struct rpc_cred *);
 extern int nfs4_proc_renew(struct nfs_client *, struct rpc_cred *);
 extern int nfs4_init_clientid(struct nfs_client *, struct rpc_cred *);
 extern int nfs41_init_clientid(struct nfs_client *, struct rpc_cred *);
-extern int nfs4_do_close(struct path *path, struct nfs4_state *state, gfp_t gfp_mask, int wait);
+extern int nfs4_do_close(struct path *path, struct nfs4_state *state, gfp_t gfp_mask, int wait, bool roc);
 extern int nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *fhandle);
 extern int nfs4_proc_fs_locations(struct inode *dir, const struct qstr *name,
 		struct nfs4_fs_locations *fs_locations, struct page *page);
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index a3549ce72ab2..88f590feeb72 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -1839,6 +1839,8 @@ struct nfs4_closedata {
 	struct nfs_closeres res;
 	struct nfs_fattr fattr;
 	unsigned long timestamp;
+	bool roc;
+	u32 roc_barrier;
 };
 
 static void nfs4_free_closedata(void *data)
@@ -1846,6 +1848,8 @@ static void nfs4_free_closedata(void *data)
 	struct nfs4_closedata *calldata = data;
 	struct nfs4_state_owner *sp = calldata->state->owner;
 
+	if (calldata->roc)
+		pnfs_roc_release(calldata->state->inode);
 	nfs4_put_open_state(calldata->state);
 	nfs_free_seqid(calldata->arg.seqid);
 	nfs4_put_state_owner(sp);
@@ -1878,6 +1882,9 @@ static void nfs4_close_done(struct rpc_task *task, void *data)
 	 */
 	switch (task->tk_status) {
 		case 0:
+			if (calldata->roc)
+				pnfs_roc_set_barrier(state->inode,
+						     calldata->roc_barrier);
 			nfs_set_open_stateid(state, &calldata->res.stateid, 0);
 			renew_lease(server, calldata->timestamp);
 			nfs4_close_clear_stateid_flags(state,
@@ -1930,8 +1937,15 @@ static void nfs4_close_prepare(struct rpc_task *task, void *data)
 		return;
 	}
 
-	if (calldata->arg.fmode == 0)
+	if (calldata->arg.fmode == 0) {
 		task->tk_msg.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_CLOSE];
+		if (calldata->roc &&
+		    pnfs_roc_drain(calldata->inode, &calldata->roc_barrier)) {
+			rpc_sleep_on(&NFS_SERVER(calldata->inode)->roc_rpcwaitq,
+				     task, NULL);
+			return;
+		}
+	}
 
 	nfs_fattr_init(calldata->res.fattr);
 	calldata->timestamp = jiffies;
@@ -1959,7 +1973,7 @@ static const struct rpc_call_ops nfs4_close_ops = {
  *
  * NOTE: Caller must be holding the sp->so_owner semaphore!
  */
-int nfs4_do_close(struct path *path, struct nfs4_state *state, gfp_t gfp_mask, int wait)
+int nfs4_do_close(struct path *path, struct nfs4_state *state, gfp_t gfp_mask, int wait, bool roc)
 {
 	struct nfs_server *server = NFS_SERVER(state->inode);
 	struct nfs4_closedata *calldata;
@@ -1994,6 +2008,7 @@ int nfs4_do_close(struct path *path, struct nfs4_state *state, gfp_t gfp_mask, i
 	calldata->res.fattr = &calldata->fattr;
 	calldata->res.seqid = calldata->arg.seqid;
 	calldata->res.server = server;
+	calldata->roc = roc;
 	path_get(path);
 	calldata->path = *path;
 
@@ -2011,6 +2026,8 @@ int nfs4_do_close(struct path *path, struct nfs4_state *state, gfp_t gfp_mask, i
 out_free_calldata:
 	kfree(calldata);
 out:
+	if (roc)
+		pnfs_roc_release(state->inode);
 	nfs4_put_open_state(state);
 	nfs4_put_state_owner(sp);
 	return status;
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index 6891dedd80f1..286084f148e3 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -606,8 +606,11 @@ static void __nfs4_close(struct path *path, struct nfs4_state *state,
 	if (!call_close) {
 		nfs4_put_open_state(state);
 		nfs4_put_state_owner(owner);
-	} else
-		nfs4_do_close(path, state, gfp_mask, wait);
+	} else {
+		bool roc = pnfs_roc(state->inode);
+
+		nfs4_do_close(path, state, gfp_mask, wait, roc);
+	}
 }
 
 void nfs4_close_state(struct path *path, struct nfs4_state *state, fmode_t fmode)
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index bf4186b8f2fc..bc4089769735 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -256,6 +256,7 @@ put_lseg_locked(struct pnfs_layout_segment *lseg,
 			spin_unlock(&clp->cl_lock);
 			clear_bit(NFS_LAYOUT_BULK_RECALL, &lseg->pls_layout->plh_flags);
 		}
+		rpc_wake_up(&NFS_SERVER(ino)->roc_rpcwaitq);
 		list_add(&lseg->pls_list, tmp_list);
 		return 1;
 	}
@@ -401,7 +402,8 @@ pnfs_layoutgets_blocked(struct pnfs_layout_hdr *lo, nfs4_stateid *stateid,
 	if ((stateid) &&
 	    (int)(lo->plh_barrier - be32_to_cpu(stateid->stateid.seqid)) >= 0)
 		return true;
-	return test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags) ||
+	return lo->plh_block_lgets ||
+		test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags) ||
 		(list_empty(&lo->plh_segs) &&
 		 (atomic_read(&lo->plh_outstanding) > lget));
 }
@@ -474,6 +476,83 @@ send_layoutget(struct pnfs_layout_hdr *lo,
 	return lseg;
 }
 
+bool pnfs_roc(struct inode *ino)
+{
+	struct pnfs_layout_hdr *lo;
+	struct pnfs_layout_segment *lseg, *tmp;
+	LIST_HEAD(tmp_list);
+	bool found = false;
+
+	spin_lock(&ino->i_lock);
+	lo = NFS_I(ino)->layout;
+	if (!lo || !test_and_clear_bit(NFS_LAYOUT_ROC, &lo->plh_flags) ||
+	    test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags))
+		goto out_nolayout;
+	list_for_each_entry_safe(lseg, tmp, &lo->plh_segs, pls_list)
+		if (test_bit(NFS_LSEG_ROC, &lseg->pls_flags)) {
+			mark_lseg_invalid(lseg, &tmp_list);
+			found = true;
+		}
+	if (!found)
+		goto out_nolayout;
+	lo->plh_block_lgets++;
+	get_layout_hdr(lo); /* matched in pnfs_roc_release */
+	spin_unlock(&ino->i_lock);
+	pnfs_free_lseg_list(&tmp_list);
+	return true;
+
+out_nolayout:
+	spin_unlock(&ino->i_lock);
+	return false;
+}
+
+void pnfs_roc_release(struct inode *ino)
+{
+	struct pnfs_layout_hdr *lo;
+
+	spin_lock(&ino->i_lock);
+	lo = NFS_I(ino)->layout;
+	lo->plh_block_lgets--;
+	put_layout_hdr_locked(lo);
+	spin_unlock(&ino->i_lock);
+}
+
+void pnfs_roc_set_barrier(struct inode *ino, u32 barrier)
+{
+	struct pnfs_layout_hdr *lo;
+
+	spin_lock(&ino->i_lock);
+	lo = NFS_I(ino)->layout;
+	if ((int)(barrier - lo->plh_barrier) > 0)
+		lo->plh_barrier = barrier;
+	spin_unlock(&ino->i_lock);
+}
+
+bool pnfs_roc_drain(struct inode *ino, u32 *barrier)
+{
+	struct nfs_inode *nfsi = NFS_I(ino);
+	struct pnfs_layout_segment *lseg;
+	bool found = false;
+
+	spin_lock(&ino->i_lock);
+	list_for_each_entry(lseg, &nfsi->layout->plh_segs, pls_list)
+		if (test_bit(NFS_LSEG_ROC, &lseg->pls_flags)) {
+			found = true;
+			break;
+		}
+	if (!found) {
+		struct pnfs_layout_hdr *lo = nfsi->layout;
+		u32 current_seqid = be32_to_cpu(lo->plh_stateid.stateid.seqid);
+
+		/* Since close does not return a layout stateid for use as
+		 * a barrier, we choose the worst-case barrier.
+		 */
+		*barrier = current_seqid + atomic_read(&lo->plh_outstanding);
+	}
+	spin_unlock(&ino->i_lock);
+	return found;
+}
+
 /*
  * Compare two layout segments for sorting into layout cache.
  * We want to preferentially return RW over RO layouts, so ensure those
@@ -732,6 +811,11 @@ pnfs_layout_process(struct nfs4_layoutget *lgp)
 	*lgp->lsegpp = lseg;
 	pnfs_insert_layout(lo, lseg);
 
+	if (res->return_on_close) {
+		set_bit(NFS_LSEG_ROC, &lseg->pls_flags);
+		set_bit(NFS_LAYOUT_ROC, &lo->plh_flags);
+	}
+
 	/* Done processing layoutget. Set the layout stateid */
 	pnfs_set_layout_stateid(lo, &res->stateid, false);
 	spin_unlock(&ino->i_lock);
diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
index f91d0d45551c..e2612ea0cbed 100644
--- a/fs/nfs/pnfs.h
+++ b/fs/nfs/pnfs.h
@@ -32,6 +32,7 @@
 
 enum {
 	NFS_LSEG_VALID = 0,	/* cleared when lseg is recalled/returned */
+	NFS_LSEG_ROC,		/* roc bit received from server */
 };
 
 struct pnfs_layout_segment {
@@ -50,6 +51,7 @@ enum {
 	NFS_LAYOUT_RO_FAILED = 0,	/* get ro layout failed stop trying */
 	NFS_LAYOUT_RW_FAILED,		/* get rw layout failed stop trying */
 	NFS_LAYOUT_BULK_RECALL,		/* bulk recall affecting layout */
+	NFS_LAYOUT_ROC,			/* some lseg had roc bit set */
 	NFS_LAYOUT_DESTROYED,		/* no new use of layout allowed */
 };
 
@@ -72,6 +74,7 @@ struct pnfs_layout_hdr {
 	struct list_head	plh_segs;      /* layout segments list */
 	nfs4_stateid		plh_stateid;
 	atomic_t		plh_outstanding; /* number of RPCs out */
+	unsigned long		plh_block_lgets; /* block LAYOUTGET if >0 */
 	u32			plh_barrier; /* ignore lower seqids */
 	unsigned long		plh_flags;
 	struct inode		*plh_inode;
@@ -162,6 +165,10 @@ int pnfs_choose_layoutget_stateid(nfs4_stateid *dst,
 int mark_matching_lsegs_invalid(struct pnfs_layout_hdr *lo,
 				struct list_head *tmp_list,
 				u32 iomode);
+bool pnfs_roc(struct inode *ino);
+void pnfs_roc_release(struct inode *ino);
+void pnfs_roc_set_barrier(struct inode *ino, u32 barrier);
+bool pnfs_roc_drain(struct inode *ino, u32 *barrier);
 
 
 static inline int lo_fail_bit(u32 iomode)
@@ -193,6 +200,28 @@ pnfs_update_layout(struct inode *ino, struct nfs_open_context *ctx,
 	return NULL;
 }
 
+static inline bool
+pnfs_roc(struct inode *ino)
+{
+	return false;
+}
+
+static inline void
+pnfs_roc_release(struct inode *ino)
+{
+}
+
+static inline void
+pnfs_roc_set_barrier(struct inode *ino, u32 barrier)
+{
+}
+
+static inline bool
+pnfs_roc_drain(struct inode *ino, u32 *barrier)
+{
+	return false;
+}
+
 static inline void set_pnfs_layoutdriver(struct nfs_server *s, u32 id)
 {
 }
diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h
index e93ada0565fc..7f20c0b47a91 100644
--- a/include/linux/nfs_fs_sb.h
+++ b/include/linux/nfs_fs_sb.h
@@ -149,6 +149,7 @@ struct nfs_server {
 						   that are supported on this
 						   filesystem */
 	struct pnfs_layoutdriver_type  *pnfs_curr_ld; /* Active layout driver */
+	struct rpc_wait_queue	roc_rpcwaitq;
 #endif
 	void (*destroy)(struct nfs_server *);
 
-- 
cgit v1.2.3-71-gd317


From 24d292b894273495f9664bb495e575f8cb7e8cac Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Fri, 24 Dec 2010 01:32:43 +0000
Subject: NFS: Move cl_state_owners and related fields to the nfs_server struct

NFSv4 migration needs to reassociate state owners from the source to
the destination nfs_server data structures.  To make that easier, move
the cl_state_owners field to the nfs_server struct.  cl_openowner_id
and cl_lockowner_id accompany this move, as they are used in
conjunction with cl_state_owners.

The cl_lock field in the parent nfs_client continues to protect all
three of these fields.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/nfs4_fs.h          |   2 +-
 fs/nfs/nfs4state.c        | 251 +++++++++++++++++++++++++++++++++-------------
 include/linux/nfs_fs_sb.h |   9 +-
 3 files changed, 186 insertions(+), 76 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
index 18d64cb5985b..7a7474073148 100644
--- a/fs/nfs/nfs4_fs.h
+++ b/fs/nfs/nfs4_fs.h
@@ -110,7 +110,7 @@ struct nfs_unique_id {
 struct nfs4_state_owner {
 	struct nfs_unique_id so_owner_id;
 	struct nfs_server    *so_server;
-	struct rb_node	     so_client_node;
+	struct rb_node	     so_server_node;
 
 	struct rpc_cred	     *so_cred;	 /* Associated cred */
 
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index 286084f148e3..2336d532cf66 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -105,14 +105,17 @@ static void nfs4_clear_machine_cred(struct nfs_client *clp)
 		put_rpccred(cred);
 }
 
-struct rpc_cred *nfs4_get_renew_cred_locked(struct nfs_client *clp)
+static struct rpc_cred *
+nfs4_get_renew_cred_server_locked(struct nfs_server *server)
 {
+	struct rpc_cred *cred = NULL;
 	struct nfs4_state_owner *sp;
 	struct rb_node *pos;
-	struct rpc_cred *cred = NULL;
 
-	for (pos = rb_first(&clp->cl_state_owners); pos != NULL; pos = rb_next(pos)) {
-		sp = rb_entry(pos, struct nfs4_state_owner, so_client_node);
+	for (pos = rb_first(&server->state_owners);
+	     pos != NULL;
+	     pos = rb_next(pos)) {
+		sp = rb_entry(pos, struct nfs4_state_owner, so_server_node);
 		if (list_empty(&sp->so_states))
 			continue;
 		cred = get_rpccred(sp->so_cred);
@@ -121,6 +124,28 @@ struct rpc_cred *nfs4_get_renew_cred_locked(struct nfs_client *clp)
 	return cred;
 }
 
+/**
+ * nfs4_get_renew_cred_locked - Acquire credential for a renew operation
+ * @clp: client state handle
+ *
+ * Returns an rpc_cred with reference count bumped, or NULL.
+ * Caller must hold clp->cl_lock.
+ */
+struct rpc_cred *nfs4_get_renew_cred_locked(struct nfs_client *clp)
+{
+	struct rpc_cred *cred = NULL;
+	struct nfs_server *server;
+
+	rcu_read_lock();
+	list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) {
+		cred = nfs4_get_renew_cred_server_locked(server);
+		if (cred != NULL)
+			break;
+	}
+	rcu_read_unlock();
+	return cred;
+}
+
 #if defined(CONFIG_NFS_V4_1)
 
 static int nfs41_setup_state_renewal(struct nfs_client *clp)
@@ -231,28 +256,56 @@ struct rpc_cred *nfs4_get_exchange_id_cred(struct nfs_client *clp)
 
 #endif /* CONFIG_NFS_V4_1 */
 
-struct rpc_cred *nfs4_get_setclientid_cred(struct nfs_client *clp)
+static struct rpc_cred *
+nfs4_get_setclientid_cred_server(struct nfs_server *server)
 {
+	struct nfs_client *clp = server->nfs_client;
+	struct rpc_cred *cred = NULL;
 	struct nfs4_state_owner *sp;
 	struct rb_node *pos;
+
+	spin_lock(&clp->cl_lock);
+	pos = rb_first(&server->state_owners);
+	if (pos != NULL) {
+		sp = rb_entry(pos, struct nfs4_state_owner, so_server_node);
+		cred = get_rpccred(sp->so_cred);
+	}
+	spin_unlock(&clp->cl_lock);
+	return cred;
+}
+
+/**
+ * nfs4_get_setclientid_cred - Acquire credential for a setclientid operation
+ * @clp: client state handle
+ *
+ * Returns an rpc_cred with reference count bumped, or NULL.
+ */
+struct rpc_cred *nfs4_get_setclientid_cred(struct nfs_client *clp)
+{
+	struct nfs_server *server;
 	struct rpc_cred *cred;
 
 	spin_lock(&clp->cl_lock);
 	cred = nfs4_get_machine_cred_locked(clp);
+	spin_unlock(&clp->cl_lock);
 	if (cred != NULL)
 		goto out;
-	pos = rb_first(&clp->cl_state_owners);
-	if (pos != NULL) {
-		sp = rb_entry(pos, struct nfs4_state_owner, so_client_node);
-		cred = get_rpccred(sp->so_cred);
+
+	rcu_read_lock();
+	list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) {
+		cred = nfs4_get_setclientid_cred_server(server);
+		if (cred != NULL)
+			break;
 	}
+	rcu_read_unlock();
+
 out:
-	spin_unlock(&clp->cl_lock);
 	return cred;
 }
 
-static void nfs_alloc_unique_id(struct rb_root *root, struct nfs_unique_id *new,
-		__u64 minval, int maxbits)
+static void nfs_alloc_unique_id_locked(struct rb_root *root,
+				       struct nfs_unique_id *new,
+				       __u64 minval, int maxbits)
 {
 	struct rb_node **p, *parent;
 	struct nfs_unique_id *pos;
@@ -307,16 +360,15 @@ static void nfs_free_unique_id(struct rb_root *root, struct nfs_unique_id *id)
 }
 
 static struct nfs4_state_owner *
-nfs4_find_state_owner(struct nfs_server *server, struct rpc_cred *cred)
+nfs4_find_state_owner_locked(struct nfs_server *server, struct rpc_cred *cred)
 {
-	struct nfs_client *clp = server->nfs_client;
-	struct rb_node **p = &clp->cl_state_owners.rb_node,
+	struct rb_node **p = &server->state_owners.rb_node,
 		       *parent = NULL;
 	struct nfs4_state_owner *sp, *res = NULL;
 
 	while (*p != NULL) {
 		parent = *p;
-		sp = rb_entry(parent, struct nfs4_state_owner, so_client_node);
+		sp = rb_entry(parent, struct nfs4_state_owner, so_server_node);
 
 		if (server < sp->so_server) {
 			p = &parent->rb_left;
@@ -340,24 +392,17 @@ nfs4_find_state_owner(struct nfs_server *server, struct rpc_cred *cred)
 }
 
 static struct nfs4_state_owner *
-nfs4_insert_state_owner(struct nfs_client *clp, struct nfs4_state_owner *new)
+nfs4_insert_state_owner_locked(struct nfs4_state_owner *new)
 {
-	struct rb_node **p = &clp->cl_state_owners.rb_node,
+	struct nfs_server *server = new->so_server;
+	struct rb_node **p = &server->state_owners.rb_node,
 		       *parent = NULL;
 	struct nfs4_state_owner *sp;
 
 	while (*p != NULL) {
 		parent = *p;
-		sp = rb_entry(parent, struct nfs4_state_owner, so_client_node);
+		sp = rb_entry(parent, struct nfs4_state_owner, so_server_node);
 
-		if (new->so_server < sp->so_server) {
-			p = &parent->rb_left;
-			continue;
-		}
-		if (new->so_server > sp->so_server) {
-			p = &parent->rb_right;
-			continue;
-		}
 		if (new->so_cred < sp->so_cred)
 			p = &parent->rb_left;
 		else if (new->so_cred > sp->so_cred)
@@ -367,18 +412,21 @@ nfs4_insert_state_owner(struct nfs_client *clp, struct nfs4_state_owner *new)
 			return sp;
 		}
 	}
-	nfs_alloc_unique_id(&clp->cl_openowner_id, &new->so_owner_id, 1, 64);
-	rb_link_node(&new->so_client_node, parent, p);
-	rb_insert_color(&new->so_client_node, &clp->cl_state_owners);
+	nfs_alloc_unique_id_locked(&server->openowner_id,
+					&new->so_owner_id, 1, 64);
+	rb_link_node(&new->so_server_node, parent, p);
+	rb_insert_color(&new->so_server_node, &server->state_owners);
 	return new;
 }
 
 static void
-nfs4_remove_state_owner(struct nfs_client *clp, struct nfs4_state_owner *sp)
+nfs4_remove_state_owner_locked(struct nfs4_state_owner *sp)
 {
-	if (!RB_EMPTY_NODE(&sp->so_client_node))
-		rb_erase(&sp->so_client_node, &clp->cl_state_owners);
-	nfs_free_unique_id(&clp->cl_openowner_id, &sp->so_owner_id);
+	struct nfs_server *server = sp->so_server;
+
+	if (!RB_EMPTY_NODE(&sp->so_server_node))
+		rb_erase(&sp->so_server_node, &server->state_owners);
+	nfs_free_unique_id(&server->openowner_id, &sp->so_owner_id);
 }
 
 /*
@@ -407,23 +455,32 @@ nfs4_alloc_state_owner(void)
 static void
 nfs4_drop_state_owner(struct nfs4_state_owner *sp)
 {
-	if (!RB_EMPTY_NODE(&sp->so_client_node)) {
-		struct nfs_client *clp = sp->so_server->nfs_client;
+	if (!RB_EMPTY_NODE(&sp->so_server_node)) {
+		struct nfs_server *server = sp->so_server;
+		struct nfs_client *clp = server->nfs_client;
 
 		spin_lock(&clp->cl_lock);
-		rb_erase(&sp->so_client_node, &clp->cl_state_owners);
-		RB_CLEAR_NODE(&sp->so_client_node);
+		rb_erase(&sp->so_server_node, &server->state_owners);
+		RB_CLEAR_NODE(&sp->so_server_node);
 		spin_unlock(&clp->cl_lock);
 	}
 }
 
-struct nfs4_state_owner *nfs4_get_state_owner(struct nfs_server *server, struct rpc_cred *cred)
+/**
+ * nfs4_get_state_owner - Look up a state owner given a credential
+ * @server: nfs_server to search
+ * @cred: RPC credential to match
+ *
+ * Returns a pointer to an instantiated nfs4_state_owner struct, or NULL.
+ */
+struct nfs4_state_owner *nfs4_get_state_owner(struct nfs_server *server,
+					      struct rpc_cred *cred)
 {
 	struct nfs_client *clp = server->nfs_client;
 	struct nfs4_state_owner *sp, *new;
 
 	spin_lock(&clp->cl_lock);
-	sp = nfs4_find_state_owner(server, cred);
+	sp = nfs4_find_state_owner_locked(server, cred);
 	spin_unlock(&clp->cl_lock);
 	if (sp != NULL)
 		return sp;
@@ -433,7 +490,7 @@ struct nfs4_state_owner *nfs4_get_state_owner(struct nfs_server *server, struct
 	new->so_server = server;
 	new->so_cred = cred;
 	spin_lock(&clp->cl_lock);
-	sp = nfs4_insert_state_owner(clp, new);
+	sp = nfs4_insert_state_owner_locked(new);
 	spin_unlock(&clp->cl_lock);
 	if (sp == new)
 		get_rpccred(cred);
@@ -444,6 +501,11 @@ struct nfs4_state_owner *nfs4_get_state_owner(struct nfs_server *server, struct
 	return sp;
 }
 
+/**
+ * nfs4_put_state_owner - Release a nfs4_state_owner
+ * @sp: state owner data to release
+ *
+ */
 void nfs4_put_state_owner(struct nfs4_state_owner *sp)
 {
 	struct nfs_client *clp = sp->so_server->nfs_client;
@@ -451,7 +513,7 @@ void nfs4_put_state_owner(struct nfs4_state_owner *sp)
 
 	if (!atomic_dec_and_lock(&sp->so_count, &clp->cl_lock))
 		return;
-	nfs4_remove_state_owner(clp, sp);
+	nfs4_remove_state_owner_locked(sp);
 	spin_unlock(&clp->cl_lock);
 	rpc_destroy_wait_queue(&sp->so_sequence.wait);
 	put_rpccred(cred);
@@ -657,7 +719,8 @@ __nfs4_find_lock_state(struct nfs4_state *state, fl_owner_t fl_owner, pid_t fl_p
 static struct nfs4_lock_state *nfs4_alloc_lock_state(struct nfs4_state *state, fl_owner_t fl_owner, pid_t fl_pid, unsigned int type)
 {
 	struct nfs4_lock_state *lsp;
-	struct nfs_client *clp = state->owner->so_server->nfs_client;
+	struct nfs_server *server = state->owner->so_server;
+	struct nfs_client *clp = server->nfs_client;
 
 	lsp = kzalloc(sizeof(*lsp), GFP_NOFS);
 	if (lsp == NULL)
@@ -681,7 +744,7 @@ static struct nfs4_lock_state *nfs4_alloc_lock_state(struct nfs4_state *state, f
 		return NULL;
 	}
 	spin_lock(&clp->cl_lock);
-	nfs_alloc_unique_id(&clp->cl_lockowner_id, &lsp->ls_id, 1, 64);
+	nfs_alloc_unique_id_locked(&server->lockowner_id, &lsp->ls_id, 1, 64);
 	spin_unlock(&clp->cl_lock);
 	INIT_LIST_HEAD(&lsp->ls_locks);
 	return lsp;
@@ -689,10 +752,11 @@ static struct nfs4_lock_state *nfs4_alloc_lock_state(struct nfs4_state *state, f
 
 static void nfs4_free_lock_state(struct nfs4_lock_state *lsp)
 {
-	struct nfs_client *clp = lsp->ls_state->owner->so_server->nfs_client;
+	struct nfs_server *server = lsp->ls_state->owner->so_server;
+	struct nfs_client *clp = server->nfs_client;
 
 	spin_lock(&clp->cl_lock);
-	nfs_free_unique_id(&clp->cl_lockowner_id, &lsp->ls_id);
+	nfs_free_unique_id(&server->lockowner_id, &lsp->ls_id);
 	spin_unlock(&clp->cl_lock);
 	rpc_destroy_wait_queue(&lsp->ls_sequence.wait);
 	kfree(lsp);
@@ -1138,15 +1202,19 @@ static void nfs4_clear_open_state(struct nfs4_state *state)
 	}
 }
 
-static void nfs4_state_mark_reclaim_helper(struct nfs_client *clp, int (*mark_reclaim)(struct nfs_client *clp, struct nfs4_state *state))
+static void nfs4_reset_seqids(struct nfs_server *server,
+	int (*mark_reclaim)(struct nfs_client *clp, struct nfs4_state *state))
 {
+	struct nfs_client *clp = server->nfs_client;
 	struct nfs4_state_owner *sp;
 	struct rb_node *pos;
 	struct nfs4_state *state;
 
-	/* Reset all sequence ids to zero */
-	for (pos = rb_first(&clp->cl_state_owners); pos != NULL; pos = rb_next(pos)) {
-		sp = rb_entry(pos, struct nfs4_state_owner, so_client_node);
+	spin_lock(&clp->cl_lock);
+	for (pos = rb_first(&server->state_owners);
+	     pos != NULL;
+	     pos = rb_next(pos)) {
+		sp = rb_entry(pos, struct nfs4_state_owner, so_server_node);
 		sp->so_seqid.flags = 0;
 		spin_lock(&sp->so_lock);
 		list_for_each_entry(state, &sp->so_states, open_states) {
@@ -1155,6 +1223,18 @@ static void nfs4_state_mark_reclaim_helper(struct nfs_client *clp, int (*mark_re
 		}
 		spin_unlock(&sp->so_lock);
 	}
+	spin_unlock(&clp->cl_lock);
+}
+
+static void nfs4_state_mark_reclaim_helper(struct nfs_client *clp,
+	int (*mark_reclaim)(struct nfs_client *clp, struct nfs4_state *state))
+{
+	struct nfs_server *server;
+
+	rcu_read_lock();
+	list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link)
+		nfs4_reset_seqids(server, mark_reclaim);
+	rcu_read_unlock();
 }
 
 static void nfs4_state_start_reclaim_reboot(struct nfs_client *clp)
@@ -1172,25 +1252,41 @@ static void nfs4_reclaim_complete(struct nfs_client *clp,
 		(void)ops->reclaim_complete(clp);
 }
 
-static int nfs4_state_clear_reclaim_reboot(struct nfs_client *clp)
+static void nfs4_clear_reclaim_server(struct nfs_server *server)
 {
+	struct nfs_client *clp = server->nfs_client;
 	struct nfs4_state_owner *sp;
 	struct rb_node *pos;
 	struct nfs4_state *state;
 
-	if (!test_and_clear_bit(NFS4CLNT_RECLAIM_REBOOT, &clp->cl_state))
-		return 0;
-
-	for (pos = rb_first(&clp->cl_state_owners); pos != NULL; pos = rb_next(pos)) {
-		sp = rb_entry(pos, struct nfs4_state_owner, so_client_node);
+	spin_lock(&clp->cl_lock);
+	for (pos = rb_first(&server->state_owners);
+	     pos != NULL;
+	     pos = rb_next(pos)) {
+		sp = rb_entry(pos, struct nfs4_state_owner, so_server_node);
 		spin_lock(&sp->so_lock);
 		list_for_each_entry(state, &sp->so_states, open_states) {
-			if (!test_and_clear_bit(NFS_STATE_RECLAIM_REBOOT, &state->flags))
+			if (!test_and_clear_bit(NFS_STATE_RECLAIM_REBOOT,
+						&state->flags))
 				continue;
 			nfs4_state_mark_reclaim_nograce(clp, state);
 		}
 		spin_unlock(&sp->so_lock);
 	}
+	spin_unlock(&clp->cl_lock);
+}
+
+static int nfs4_state_clear_reclaim_reboot(struct nfs_client *clp)
+{
+	struct nfs_server *server;
+
+	if (!test_and_clear_bit(NFS4CLNT_RECLAIM_REBOOT, &clp->cl_state))
+		return 0;
+
+	rcu_read_lock();
+	list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link)
+		nfs4_clear_reclaim_server(server);
+	rcu_read_unlock();
 
 	nfs_delegation_reap_unclaimed(clp);
 	return 1;
@@ -1262,27 +1358,40 @@ static int nfs4_recovery_handle_error(struct nfs_client *clp, int error)
 
 static int nfs4_do_reclaim(struct nfs_client *clp, const struct nfs4_state_recovery_ops *ops)
 {
+	struct nfs4_state_owner *sp;
+	struct nfs_server *server;
 	struct rb_node *pos;
 	int status = 0;
 
 restart:
-	spin_lock(&clp->cl_lock);
-	for (pos = rb_first(&clp->cl_state_owners); pos != NULL; pos = rb_next(pos)) {
-		struct nfs4_state_owner *sp = rb_entry(pos, struct nfs4_state_owner, so_client_node);
-		if (!test_and_clear_bit(ops->owner_flag_bit, &sp->so_flags))
-			continue;
-		atomic_inc(&sp->so_count);
-		spin_unlock(&clp->cl_lock);
-		status = nfs4_reclaim_open_state(sp, ops);
-		if (status < 0) {
-			set_bit(ops->owner_flag_bit, &sp->so_flags);
+	rcu_read_lock();
+	list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) {
+		spin_lock(&clp->cl_lock);
+		for (pos = rb_first(&server->state_owners);
+		     pos != NULL;
+		     pos = rb_next(pos)) {
+			sp = rb_entry(pos,
+				struct nfs4_state_owner, so_server_node);
+			if (!test_and_clear_bit(ops->owner_flag_bit,
+							&sp->so_flags))
+				continue;
+			atomic_inc(&sp->so_count);
+			spin_unlock(&clp->cl_lock);
+			rcu_read_unlock();
+
+			status = nfs4_reclaim_open_state(sp, ops);
+			if (status < 0) {
+				set_bit(ops->owner_flag_bit, &sp->so_flags);
+				nfs4_put_state_owner(sp);
+				return nfs4_recovery_handle_error(clp, status);
+			}
+
 			nfs4_put_state_owner(sp);
-			return nfs4_recovery_handle_error(clp, status);
+			goto restart;
 		}
-		nfs4_put_state_owner(sp);
-		goto restart;
+		spin_unlock(&clp->cl_lock);
 	}
-	spin_unlock(&clp->cl_lock);
+	rcu_read_unlock();
 	return status;
 }
 
diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h
index 7f20c0b47a91..27255ffdbe10 100644
--- a/include/linux/nfs_fs_sb.h
+++ b/include/linux/nfs_fs_sb.h
@@ -47,11 +47,7 @@ struct nfs_client {
 	u64			cl_clientid;	/* constant */
 	unsigned long		cl_state;
 
-	struct rb_root		cl_openowner_id;
-	struct rb_root		cl_lockowner_id;
-
 	struct list_head	cl_delegations;
-	struct rb_root		cl_state_owners;
 	spinlock_t		cl_lock;
 
 	unsigned long		cl_lease_time;
@@ -150,6 +146,11 @@ struct nfs_server {
 						   filesystem */
 	struct pnfs_layoutdriver_type  *pnfs_curr_ld; /* Active layout driver */
 	struct rpc_wait_queue	roc_rpcwaitq;
+
+	/* the following fields are protected by nfs_client->cl_lock */
+	struct rb_root		state_owners;
+	struct rb_root		openowner_id;
+	struct rb_root		lockowner_id;
 #endif
 	void (*destroy)(struct nfs_server *);
 
-- 
cgit v1.2.3-71-gd317


From d3978bb325510f0a26ebd92f211b36c5f98b2306 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Fri, 24 Dec 2010 01:33:04 +0000
Subject: NFS: Move cl_delegations to the nfs_server struct

Delegations are per-inode, not per-nfs_client.  When a server file
system is migrated, delegations on the client must be moved from the
source to the destination nfs_server.  Make it easier to manage a
mount point's delegation list across a migration event by moving the
list to the nfs_server struct.

Clean up: I added documenting comments to public functions I changed
in this patch.  For consistency I added comments to all the other
public functions in fs/nfs/delegation.c.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/client.c           |   2 +-
 fs/nfs/delegation.c       | 337 +++++++++++++++++++++++++++++++++-------------
 fs/nfs/delegation.h       |   1 +
 fs/nfs/nfs4renewd.c       |   2 +-
 include/linux/nfs_fs_sb.h |   2 +-
 5 files changed, 251 insertions(+), 93 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index 32b5fbfab35e..192f2f860265 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -172,7 +172,6 @@ static struct nfs_client *nfs_alloc_client(const struct nfs_client_initdata *cl_
 	if (err)
 		goto error_cleanup;
 
-	INIT_LIST_HEAD(&clp->cl_delegations);
 	spin_lock_init(&clp->cl_lock);
 	INIT_DELAYED_WORK(&clp->cl_renewd, nfs4_renew_state);
 	rpc_init_wait_queue(&clp->cl_rpcwaitq, "NFS client");
@@ -1040,6 +1039,7 @@ static struct nfs_server *nfs_alloc_server(void)
 	/* Zero out the NFS state stuff */
 	INIT_LIST_HEAD(&server->client_link);
 	INIT_LIST_HEAD(&server->master_link);
+	INIT_LIST_HEAD(&server->delegations);
 
 	atomic_set(&server->active, 0);
 
diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c
index 521d71b81825..364e4328f392 100644
--- a/fs/nfs/delegation.c
+++ b/fs/nfs/delegation.c
@@ -40,11 +40,23 @@ static void nfs_free_delegation(struct nfs_delegation *delegation)
 	call_rcu(&delegation->rcu, nfs_free_delegation_callback);
 }
 
+/**
+ * nfs_mark_delegation_referenced - set delegation's REFERENCED flag
+ * @delegation: delegation to process
+ *
+ */
 void nfs_mark_delegation_referenced(struct nfs_delegation *delegation)
 {
 	set_bit(NFS_DELEGATION_REFERENCED, &delegation->flags);
 }
 
+/**
+ * nfs_have_delegation - check if inode has a delegation
+ * @inode: inode to check
+ * @flags: delegation types to check for
+ *
+ * Returns one if inode has the indicated delegation, otherwise zero.
+ */
 int nfs_have_delegation(struct inode *inode, fmode_t flags)
 {
 	struct nfs_delegation *delegation;
@@ -119,10 +131,15 @@ again:
 	return 0;
 }
 
-/*
- * Set up a delegation on an inode
+/**
+ * nfs_inode_reclaim_delegation - process a delegation reclaim request
+ * @inode: inode to process
+ * @cred: credential to use for request
+ * @res: new delegation state from server
+ *
  */
-void nfs_inode_reclaim_delegation(struct inode *inode, struct rpc_cred *cred, struct nfs_openres *res)
+void nfs_inode_reclaim_delegation(struct inode *inode, struct rpc_cred *cred,
+				  struct nfs_openres *res)
 {
 	struct nfs_delegation *delegation;
 	struct rpc_cred *oldcred = NULL;
@@ -177,11 +194,11 @@ static struct inode *nfs_delegation_grab_inode(struct nfs_delegation *delegation
 
 static struct nfs_delegation *
 nfs_detach_delegation_locked(struct nfs_inode *nfsi,
-			     struct nfs_client *clp)
+			     struct nfs_server *server)
 {
 	struct nfs_delegation *delegation =
 		rcu_dereference_protected(nfsi->delegation,
-					  lockdep_is_held(&clp->cl_lock));
+				lockdep_is_held(&server->nfs_client->cl_lock));
 
 	if (delegation == NULL)
 		goto nomatch;
@@ -198,22 +215,29 @@ nomatch:
 }
 
 static struct nfs_delegation *nfs_detach_delegation(struct nfs_inode *nfsi,
-						    struct nfs_client *clp)
+						    struct nfs_server *server)
 {
+	struct nfs_client *clp = server->nfs_client;
 	struct nfs_delegation *delegation;
 
 	spin_lock(&clp->cl_lock);
-	delegation = nfs_detach_delegation_locked(nfsi, clp);
+	delegation = nfs_detach_delegation_locked(nfsi, server);
 	spin_unlock(&clp->cl_lock);
 	return delegation;
 }
 
-/*
- * Set up a delegation on an inode
+/**
+ * nfs_inode_set_delegation - set up a delegation on an inode
+ * @inode: inode to which delegation applies
+ * @cred: cred to use for subsequent delegation processing
+ * @res: new delegation state from server
+ *
+ * Returns zero on success, or a negative errno value.
  */
 int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct nfs_openres *res)
 {
-	struct nfs_client *clp = NFS_SERVER(inode)->nfs_client;
+	struct nfs_server *server = NFS_SERVER(inode);
+	struct nfs_client *clp = server->nfs_client;
 	struct nfs_inode *nfsi = NFS_I(inode);
 	struct nfs_delegation *delegation, *old_delegation;
 	struct nfs_delegation *freeme = NULL;
@@ -234,7 +258,7 @@ int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct
 
 	spin_lock(&clp->cl_lock);
 	old_delegation = rcu_dereference_protected(nfsi->delegation,
-						   lockdep_is_held(&clp->cl_lock));
+					lockdep_is_held(&clp->cl_lock));
 	if (old_delegation != NULL) {
 		if (memcmp(&delegation->stateid, &old_delegation->stateid,
 					sizeof(old_delegation->stateid)) == 0 &&
@@ -253,9 +277,9 @@ int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct
 			delegation = NULL;
 			goto out;
 		}
-		freeme = nfs_detach_delegation_locked(nfsi, clp);
+		freeme = nfs_detach_delegation_locked(nfsi, server);
 	}
-	list_add_rcu(&delegation->super_list, &clp->cl_delegations);
+	list_add_rcu(&delegation->super_list, &server->delegations);
 	nfsi->delegation_state = delegation->type;
 	rcu_assign_pointer(nfsi->delegation, delegation);
 	delegation = NULL;
@@ -297,67 +321,85 @@ out:
 	return err;
 }
 
-/*
- * Return all delegations that have been marked for return
+/**
+ * nfs_client_return_marked_delegations - return previously marked delegations
+ * @clp: nfs_client to process
+ *
+ * Returns zero on success, or a negative errno value.
  */
 int nfs_client_return_marked_delegations(struct nfs_client *clp)
 {
 	struct nfs_delegation *delegation;
+	struct nfs_server *server;
 	struct inode *inode;
 	int err = 0;
 
 restart:
 	rcu_read_lock();
-	list_for_each_entry_rcu(delegation, &clp->cl_delegations, super_list) {
-		if (!test_and_clear_bit(NFS_DELEGATION_RETURN, &delegation->flags))
-			continue;
-		inode = nfs_delegation_grab_inode(delegation);
-		if (inode == NULL)
-			continue;
-		delegation = nfs_detach_delegation(NFS_I(inode), clp);
-		rcu_read_unlock();
-		if (delegation != NULL) {
-			filemap_flush(inode->i_mapping);
-			err = __nfs_inode_return_delegation(inode, delegation, 0);
+	list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) {
+		list_for_each_entry_rcu(delegation, &server->delegations,
+								super_list) {
+			if (!test_and_clear_bit(NFS_DELEGATION_RETURN,
+							&delegation->flags))
+				continue;
+			inode = nfs_delegation_grab_inode(delegation);
+			if (inode == NULL)
+				continue;
+			delegation = nfs_detach_delegation(NFS_I(inode),
+								server);
+			rcu_read_unlock();
+
+			if (delegation != NULL) {
+				filemap_flush(inode->i_mapping);
+				err = __nfs_inode_return_delegation(inode,
+								delegation, 0);
+			}
+			iput(inode);
+			if (!err)
+				goto restart;
+			set_bit(NFS4CLNT_DELEGRETURN, &clp->cl_state);
+			return err;
 		}
-		iput(inode);
-		if (!err)
-			goto restart;
-		set_bit(NFS4CLNT_DELEGRETURN, &clp->cl_state);
-		return err;
 	}
 	rcu_read_unlock();
 	return 0;
 }
 
-/*
- * This function returns the delegation without reclaiming opens
- * or protecting against delegation reclaims.
- * It is therefore really only safe to be called from
- * nfs4_clear_inode()
+/**
+ * nfs_inode_return_delegation_noreclaim - return delegation, don't reclaim opens
+ * @inode: inode to process
+ *
+ * Does not protect against delegation reclaims, therefore really only safe
+ * to be called from nfs4_clear_inode().
  */
 void nfs_inode_return_delegation_noreclaim(struct inode *inode)
 {
-	struct nfs_client *clp = NFS_SERVER(inode)->nfs_client;
+	struct nfs_server *server = NFS_SERVER(inode);
 	struct nfs_inode *nfsi = NFS_I(inode);
 	struct nfs_delegation *delegation;
 
 	if (rcu_access_pointer(nfsi->delegation) != NULL) {
-		delegation = nfs_detach_delegation(nfsi, clp);
+		delegation = nfs_detach_delegation(nfsi, server);
 		if (delegation != NULL)
 			nfs_do_return_delegation(inode, delegation, 0);
 	}
 }
 
+/**
+ * nfs_inode_return_delegation - synchronously return a delegation
+ * @inode: inode to process
+ *
+ * Returns zero on success, or a negative errno value.
+ */
 int nfs_inode_return_delegation(struct inode *inode)
 {
-	struct nfs_client *clp = NFS_SERVER(inode)->nfs_client;
+	struct nfs_server *server = NFS_SERVER(inode);
 	struct nfs_inode *nfsi = NFS_I(inode);
 	struct nfs_delegation *delegation;
 	int err = 0;
 
 	if (rcu_access_pointer(nfsi->delegation) != NULL) {
-		delegation = nfs_detach_delegation(nfsi, clp);
+		delegation = nfs_detach_delegation(nfsi, server);
 		if (delegation != NULL) {
 			nfs_wb_all(inode);
 			err = __nfs_inode_return_delegation(inode, delegation, 1);
@@ -366,46 +408,61 @@ int nfs_inode_return_delegation(struct inode *inode)
 	return err;
 }
 
-static void nfs_mark_return_delegation(struct nfs_client *clp, struct nfs_delegation *delegation)
+static void nfs_mark_return_delegation(struct nfs_delegation *delegation)
 {
+	struct nfs_client *clp = NFS_SERVER(delegation->inode)->nfs_client;
+
 	set_bit(NFS_DELEGATION_RETURN, &delegation->flags);
 	set_bit(NFS4CLNT_DELEGRETURN, &clp->cl_state);
 }
 
-/*
- * Return all delegations associated to a super block
+/**
+ * nfs_super_return_all_delegations - return delegations for one superblock
+ * @sb: sb to process
+ *
  */
 void nfs_super_return_all_delegations(struct super_block *sb)
 {
-	struct nfs_client *clp = NFS_SB(sb)->nfs_client;
+	struct nfs_server *server = NFS_SB(sb);
+	struct nfs_client *clp = server->nfs_client;
 	struct nfs_delegation *delegation;
 
 	if (clp == NULL)
 		return;
+
 	rcu_read_lock();
-	list_for_each_entry_rcu(delegation, &clp->cl_delegations, super_list) {
+	list_for_each_entry_rcu(delegation, &server->delegations, super_list) {
 		spin_lock(&delegation->lock);
-		if (delegation->inode != NULL && delegation->inode->i_sb == sb)
-			set_bit(NFS_DELEGATION_RETURN, &delegation->flags);
+		set_bit(NFS_DELEGATION_RETURN, &delegation->flags);
 		spin_unlock(&delegation->lock);
 	}
 	rcu_read_unlock();
+
 	if (nfs_client_return_marked_delegations(clp) != 0)
 		nfs4_schedule_state_manager(clp);
 }
 
-static
-void nfs_client_mark_return_all_delegation_types(struct nfs_client *clp, fmode_t flags)
+static void nfs_mark_return_all_delegation_types(struct nfs_server *server,
+						 fmode_t flags)
 {
 	struct nfs_delegation *delegation;
 
-	rcu_read_lock();
-	list_for_each_entry_rcu(delegation, &clp->cl_delegations, super_list) {
+	list_for_each_entry_rcu(delegation, &server->delegations, super_list) {
 		if ((delegation->type == (FMODE_READ|FMODE_WRITE)) && !(flags & FMODE_WRITE))
 			continue;
 		if (delegation->type & flags)
-			nfs_mark_return_delegation(clp, delegation);
+			nfs_mark_return_delegation(delegation);
 	}
+}
+
+static void nfs_client_mark_return_all_delegation_types(struct nfs_client *clp,
+							fmode_t flags)
+{
+	struct nfs_server *server;
+
+	rcu_read_lock();
+	list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link)
+		nfs_mark_return_all_delegation_types(server, flags);
 	rcu_read_unlock();
 }
 
@@ -420,19 +477,32 @@ static void nfs_delegation_run_state_manager(struct nfs_client *clp)
 		nfs4_schedule_state_manager(clp);
 }
 
+/**
+ * nfs_expire_all_delegation_types
+ * @clp: client to process
+ * @flags: delegation types to expire
+ *
+ */
 void nfs_expire_all_delegation_types(struct nfs_client *clp, fmode_t flags)
 {
 	nfs_client_mark_return_all_delegation_types(clp, flags);
 	nfs_delegation_run_state_manager(clp);
 }
 
+/**
+ * nfs_expire_all_delegations
+ * @clp: client to process
+ *
+ */
 void nfs_expire_all_delegations(struct nfs_client *clp)
 {
 	nfs_expire_all_delegation_types(clp, FMODE_READ|FMODE_WRITE);
 }
 
-/*
- * Return all delegations following an NFS4ERR_CB_PATH_DOWN error.
+/**
+ * nfs_handle_cb_pathdown - return all delegations after NFS4ERR_CB_PATH_DOWN
+ * @clp: client to process
+ *
  */
 void nfs_handle_cb_pathdown(struct nfs_client *clp)
 {
@@ -441,29 +511,43 @@ void nfs_handle_cb_pathdown(struct nfs_client *clp)
 	nfs_client_mark_return_all_delegations(clp);
 }
 
-static void nfs_client_mark_return_unreferenced_delegations(struct nfs_client *clp)
+static void nfs_mark_return_unreferenced_delegations(struct nfs_server *server)
 {
 	struct nfs_delegation *delegation;
 
-	rcu_read_lock();
-	list_for_each_entry_rcu(delegation, &clp->cl_delegations, super_list) {
+	list_for_each_entry_rcu(delegation, &server->delegations, super_list) {
 		if (test_and_clear_bit(NFS_DELEGATION_REFERENCED, &delegation->flags))
 			continue;
-		nfs_mark_return_delegation(clp, delegation);
+		nfs_mark_return_delegation(delegation);
 	}
-	rcu_read_unlock();
 }
 
+/**
+ * nfs_expire_unreferenced_delegations - Eliminate unused delegations
+ * @clp: nfs_client to process
+ *
+ */
 void nfs_expire_unreferenced_delegations(struct nfs_client *clp)
 {
-	nfs_client_mark_return_unreferenced_delegations(clp);
+	struct nfs_server *server;
+
+	rcu_read_lock();
+	list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link)
+		nfs_mark_return_unreferenced_delegations(server);
+	rcu_read_unlock();
+
 	nfs_delegation_run_state_manager(clp);
 }
 
-/*
- * Asynchronous delegation recall!
+/**
+ * nfs_async_inode_return_delegation - asynchronously return a delegation
+ * @inode: inode to process
+ * @stateid: state ID information from CB_RECALL arguments
+ *
+ * Returns zero on success, or a negative errno value.
  */
-int nfs_async_inode_return_delegation(struct inode *inode, const nfs4_stateid *stateid)
+int nfs_async_inode_return_delegation(struct inode *inode,
+				      const nfs4_stateid *stateid)
 {
 	struct nfs_client *clp = NFS_SERVER(inode)->nfs_client;
 	struct nfs_delegation *delegation;
@@ -475,22 +559,21 @@ int nfs_async_inode_return_delegation(struct inode *inode, const nfs4_stateid *s
 		rcu_read_unlock();
 		return -ENOENT;
 	}
-
-	nfs_mark_return_delegation(clp, delegation);
+	nfs_mark_return_delegation(delegation);
 	rcu_read_unlock();
+
 	nfs_delegation_run_state_manager(clp);
 	return 0;
 }
 
-/*
- * Retrieve the inode associated with a delegation
- */
-struct inode *nfs_delegation_find_inode(struct nfs_client *clp, const struct nfs_fh *fhandle)
+static struct inode *
+nfs_delegation_find_inode_server(struct nfs_server *server,
+				 const struct nfs_fh *fhandle)
 {
 	struct nfs_delegation *delegation;
 	struct inode *res = NULL;
-	rcu_read_lock();
-	list_for_each_entry_rcu(delegation, &clp->cl_delegations, super_list) {
+
+	list_for_each_entry_rcu(delegation, &server->delegations, super_list) {
 		spin_lock(&delegation->lock);
 		if (delegation->inode != NULL &&
 		    nfs_compare_fh(fhandle, &NFS_I(delegation->inode)->fh) == 0) {
@@ -500,47 +583,121 @@ struct inode *nfs_delegation_find_inode(struct nfs_client *clp, const struct nfs
 		if (res != NULL)
 			break;
 	}
+	return res;
+}
+
+/**
+ * nfs_delegation_find_inode - retrieve the inode associated with a delegation
+ * @clp: client state handle
+ * @fhandle: filehandle from a delegation recall
+ *
+ * Returns pointer to inode matching "fhandle," or NULL if a matching inode
+ * cannot be found.
+ */
+struct inode *nfs_delegation_find_inode(struct nfs_client *clp,
+					const struct nfs_fh *fhandle)
+{
+	struct nfs_server *server;
+	struct inode *res = NULL;
+
+	rcu_read_lock();
+	list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) {
+		res = nfs_delegation_find_inode_server(server, fhandle);
+		if (res != NULL)
+			break;
+	}
 	rcu_read_unlock();
 	return res;
 }
 
-/*
- * Mark all delegations as needing to be reclaimed
+static void nfs_delegation_mark_reclaim_server(struct nfs_server *server)
+{
+	struct nfs_delegation *delegation;
+
+	list_for_each_entry_rcu(delegation, &server->delegations, super_list)
+		set_bit(NFS_DELEGATION_NEED_RECLAIM, &delegation->flags);
+}
+
+/**
+ * nfs_delegation_mark_reclaim - mark all delegations as needing to be reclaimed
+ * @clp: nfs_client to process
+ *
  */
 void nfs_delegation_mark_reclaim(struct nfs_client *clp)
 {
-	struct nfs_delegation *delegation;
+	struct nfs_server *server;
+
 	rcu_read_lock();
-	list_for_each_entry_rcu(delegation, &clp->cl_delegations, super_list)
-		set_bit(NFS_DELEGATION_NEED_RECLAIM, &delegation->flags);
+	list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link)
+		nfs_delegation_mark_reclaim_server(server);
 	rcu_read_unlock();
 }
 
-/*
- * Reap all unclaimed delegations after reboot recovery is done
+/**
+ * nfs_delegation_reap_unclaimed - reap unclaimed delegations after reboot recovery is done
+ * @clp: nfs_client to process
+ *
  */
 void nfs_delegation_reap_unclaimed(struct nfs_client *clp)
 {
 	struct nfs_delegation *delegation;
+	struct nfs_server *server;
 	struct inode *inode;
+
 restart:
 	rcu_read_lock();
-	list_for_each_entry_rcu(delegation, &clp->cl_delegations, super_list) {
-		if (test_bit(NFS_DELEGATION_NEED_RECLAIM, &delegation->flags) == 0)
-			continue;
-		inode = nfs_delegation_grab_inode(delegation);
-		if (inode == NULL)
-			continue;
-		delegation = nfs_detach_delegation(NFS_I(inode), clp);
-		rcu_read_unlock();
-		if (delegation != NULL)
-			nfs_free_delegation(delegation);
-		iput(inode);
-		goto restart;
+	list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) {
+		list_for_each_entry_rcu(delegation, &server->delegations,
+								super_list) {
+			if (test_bit(NFS_DELEGATION_NEED_RECLAIM,
+						&delegation->flags) == 0)
+				continue;
+			inode = nfs_delegation_grab_inode(delegation);
+			if (inode == NULL)
+				continue;
+			delegation = nfs_detach_delegation(NFS_I(inode),
+								server);
+			rcu_read_unlock();
+
+			if (delegation != NULL)
+				nfs_free_delegation(delegation);
+			iput(inode);
+			goto restart;
+		}
 	}
 	rcu_read_unlock();
 }
 
+/**
+ * nfs_delegations_present - check for existence of delegations
+ * @clp: client state handle
+ *
+ * Returns one if there are any nfs_delegation structures attached
+ * to this nfs_client.
+ */
+int nfs_delegations_present(struct nfs_client *clp)
+{
+	struct nfs_server *server;
+	int ret = 0;
+
+	rcu_read_lock();
+	list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link)
+		if (!list_empty(&server->delegations)) {
+			ret = 1;
+			break;
+		}
+	rcu_read_unlock();
+	return ret;
+}
+
+/**
+ * nfs4_copy_delegation_stateid - Copy inode's state ID information
+ * @dst: stateid data structure to fill in
+ * @inode: inode to check
+ *
+ * Returns one and fills in "dst->data" * if inode had a delegation,
+ * otherwise zero is returned.
+ */
 int nfs4_copy_delegation_stateid(nfs4_stateid *dst, struct inode *inode)
 {
 	struct nfs_inode *nfsi = NFS_I(inode);
diff --git a/fs/nfs/delegation.h b/fs/nfs/delegation.h
index 2026304bda19..d9322e490c56 100644
--- a/fs/nfs/delegation.h
+++ b/fs/nfs/delegation.h
@@ -44,6 +44,7 @@ void nfs_expire_all_delegation_types(struct nfs_client *clp, fmode_t flags);
 void nfs_expire_unreferenced_delegations(struct nfs_client *clp);
 void nfs_handle_cb_pathdown(struct nfs_client *clp);
 int nfs_client_return_marked_delegations(struct nfs_client *clp);
+int nfs_delegations_present(struct nfs_client *clp);
 
 void nfs_delegation_mark_reclaim(struct nfs_client *clp);
 void nfs_delegation_reap_unclaimed(struct nfs_client *clp);
diff --git a/fs/nfs/nfs4renewd.c b/fs/nfs/nfs4renewd.c
index cde5650ee5a2..402143d75fc5 100644
--- a/fs/nfs/nfs4renewd.c
+++ b/fs/nfs/nfs4renewd.c
@@ -80,7 +80,7 @@ nfs4_renew_state(struct work_struct *work)
 		cred = ops->get_state_renewal_cred_locked(clp);
 		spin_unlock(&clp->cl_lock);
 		if (cred == NULL) {
-			if (list_empty(&clp->cl_delegations)) {
+			if (!nfs_delegations_present(clp)) {
 				set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state);
 				goto out;
 			}
diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h
index 27255ffdbe10..b197563913bf 100644
--- a/include/linux/nfs_fs_sb.h
+++ b/include/linux/nfs_fs_sb.h
@@ -47,7 +47,6 @@ struct nfs_client {
 	u64			cl_clientid;	/* constant */
 	unsigned long		cl_state;
 
-	struct list_head	cl_delegations;
 	spinlock_t		cl_lock;
 
 	unsigned long		cl_lease_time;
@@ -152,6 +151,7 @@ struct nfs_server {
 	struct rb_root		openowner_id;
 	struct rb_root		lockowner_id;
 #endif
+	struct list_head	delegations;
 	void (*destroy)(struct nfs_server *);
 
 	atomic_t active; /* Keep trace of any activity to this server */
-- 
cgit v1.2.3-71-gd317


From d035c36c58dd9183ad6aa7875dea89893faedb55 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Tue, 21 Dec 2010 10:45:27 -0500
Subject: NFSv4: Ensure continued open and lockowner name uniqueness

In order to enable migration support, we will want to move some of the
structures that are subject to migration into the struct nfs_server.
In particular, if we are to move the state_owner and state_owner_id to
being a per-filesystem structure, then we should label the resulting
open/lock owners with a per-filesytem label to ensure global uniqueness.

This patch does so by adding the super block s_dev to the open/lock owner
name.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/nfs4proc.c       |  3 +++
 fs/nfs/nfs4xdr.c        | 14 ++++++++------
 include/linux/nfs_xdr.h |  1 +
 3 files changed, 12 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 88f590feeb72..f2b92f6a7efb 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -3779,6 +3779,7 @@ static int _nfs4_proc_getlk(struct nfs4_state *state, int cmd, struct file_lock
 		goto out;
 	lsp = request->fl_u.nfs4_fl.owner;
 	arg.lock_owner.id = lsp->ls_id.id;
+	arg.lock_owner.s_dev = server->s_dev;
 	status = nfs4_call_sync(server, &msg, &arg, &res, 1);
 	switch (status) {
 		case 0:
@@ -4024,6 +4025,7 @@ static struct nfs4_lockdata *nfs4_alloc_lockdata(struct file_lock *fl,
 	p->arg.lock_stateid = &lsp->ls_stateid;
 	p->arg.lock_owner.clientid = server->nfs_client->cl_clientid;
 	p->arg.lock_owner.id = lsp->ls_id.id;
+	p->arg.lock_owner.s_dev = server->s_dev;
 	p->res.lock_seqid = p->arg.lock_seqid;
 	p->lsp = lsp;
 	p->server = server;
@@ -4428,6 +4430,7 @@ void nfs4_release_lockowner(const struct nfs4_lock_state *lsp)
 		return;
 	args->lock_owner.clientid = server->nfs_client->cl_clientid;
 	args->lock_owner.id = lsp->ls_id.id;
+	args->lock_owner.s_dev = server->s_dev;
 	msg.rpc_argp = args;
 	rpc_call_async(server->client, &msg, 0, &nfs4_release_lockowner_ops, args);
 }
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 3cbdd0c80a2d..8e496887ec61 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -71,8 +71,8 @@ static int nfs4_stat_to_errno(int);
 /* lock,open owner id:
  * we currently use size 2 (u64) out of (NFS4_OPAQUE_LIMIT  >> 2)
  */
-#define open_owner_id_maxsz	(1 + 4)
-#define lock_owner_id_maxsz	(1 + 4)
+#define open_owner_id_maxsz	(1 + 1 + 4)
+#define lock_owner_id_maxsz	(1 + 1 + 4)
 #define decode_lockowner_maxsz	(1 + XDR_QUADLEN(IDMAP_NAMESZ))
 #define compound_encode_hdr_maxsz	(3 + (NFS4_MAXTAGLEN >> 2))
 #define compound_decode_hdr_maxsz	(3 + (NFS4_MAXTAGLEN >> 2))
@@ -1088,10 +1088,11 @@ static void encode_lockowner(struct xdr_stream *xdr, const struct nfs_lowner *lo
 {
 	__be32 *p;
 
-	p = reserve_space(xdr, 28);
+	p = reserve_space(xdr, 32);
 	p = xdr_encode_hyper(p, lowner->clientid);
-	*p++ = cpu_to_be32(16);
+	*p++ = cpu_to_be32(20);
 	p = xdr_encode_opaque_fixed(p, "lock id:", 8);
+	*p++ = cpu_to_be32(lowner->s_dev);
 	xdr_encode_hyper(p, lowner->id);
 }
 
@@ -1210,10 +1211,11 @@ static inline void encode_openhdr(struct xdr_stream *xdr, const struct nfs_opena
 	*p++ = cpu_to_be32(OP_OPEN);
 	*p = cpu_to_be32(arg->seqid->sequence->counter);
 	encode_share_access(xdr, arg->fmode);
-	p = reserve_space(xdr, 28);
+	p = reserve_space(xdr, 32);
 	p = xdr_encode_hyper(p, arg->clientid);
-	*p++ = cpu_to_be32(16);
+	*p++ = cpu_to_be32(20);
 	p = xdr_encode_opaque_fixed(p, "open id:", 8);
+	*p++ = cpu_to_be32(arg->server->s_dev);
 	xdr_encode_hyper(p, arg->id);
 }
 
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 83d36d3a12e6..b0068579bec2 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -317,6 +317,7 @@ struct nfs_closeres {
 struct nfs_lowner {
 	__u64			clientid;
 	__u64			id;
+	dev_t			s_dev;
 };
 
 struct nfs_lock_args {
-- 
cgit v1.2.3-71-gd317


From ccd35fb9f4da856b105ea0f1e0cab3702e8ae6ba Mon Sep 17 00:00:00 2001
From: Nick Piggin <npiggin@kernel.dk>
Date: Fri, 7 Jan 2011 17:49:17 +1100
Subject: kernel: kmem_ptr_validate considered harmful

This is a nasty and error prone API. It is no longer used, remove it.

Signed-off-by: Nick Piggin <npiggin@kernel.dk>
---
 include/linux/slab.h |  2 --
 mm/slab.c            | 32 +-------------------------------
 mm/slob.c            |  5 -----
 mm/slub.c            | 40 ----------------------------------------
 mm/util.c            | 21 ---------------------
 5 files changed, 1 insertion(+), 99 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/slab.h b/include/linux/slab.h
index 59260e21bdf5..fa9086647eb7 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -106,8 +106,6 @@ int kmem_cache_shrink(struct kmem_cache *);
 void kmem_cache_free(struct kmem_cache *, void *);
 unsigned int kmem_cache_size(struct kmem_cache *);
 const char *kmem_cache_name(struct kmem_cache *);
-int kern_ptr_validate(const void *ptr, unsigned long size);
-int kmem_ptr_validate(struct kmem_cache *cachep, const void *ptr);
 
 /*
  * Please use this macro to create slab caches. Simply specify the
diff --git a/mm/slab.c b/mm/slab.c
index b1e40dafbab3..6107f2380e08 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -2781,7 +2781,7 @@ static void slab_put_obj(struct kmem_cache *cachep, struct slab *slabp,
 /*
  * Map pages beginning at addr to the given cache and slab. This is required
  * for the slab allocator to be able to lookup the cache and slab of a
- * virtual address for kfree, ksize, kmem_ptr_validate, and slab debugging.
+ * virtual address for kfree, ksize, and slab debugging.
  */
 static void slab_map_pages(struct kmem_cache *cache, struct slab *slab,
 			   void *addr)
@@ -3660,36 +3660,6 @@ void *kmem_cache_alloc_notrace(struct kmem_cache *cachep, gfp_t flags)
 EXPORT_SYMBOL(kmem_cache_alloc_notrace);
 #endif
 
-/**
- * kmem_ptr_validate - check if an untrusted pointer might be a slab entry.
- * @cachep: the cache we're checking against
- * @ptr: pointer to validate
- *
- * This verifies that the untrusted pointer looks sane;
- * it is _not_ a guarantee that the pointer is actually
- * part of the slab cache in question, but it at least
- * validates that the pointer can be dereferenced and
- * looks half-way sane.
- *
- * Currently only used for dentry validation.
- */
-int kmem_ptr_validate(struct kmem_cache *cachep, const void *ptr)
-{
-	unsigned long size = cachep->buffer_size;
-	struct page *page;
-
-	if (unlikely(!kern_ptr_validate(ptr, size)))
-		goto out;
-	page = virt_to_page(ptr);
-	if (unlikely(!PageSlab(page)))
-		goto out;
-	if (unlikely(page_get_cache(page) != cachep))
-		goto out;
-	return 1;
-out:
-	return 0;
-}
-
 #ifdef CONFIG_NUMA
 void *kmem_cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid)
 {
diff --git a/mm/slob.c b/mm/slob.c
index 617b6d6c42c7..3588eaaef726 100644
--- a/mm/slob.c
+++ b/mm/slob.c
@@ -678,11 +678,6 @@ int kmem_cache_shrink(struct kmem_cache *d)
 }
 EXPORT_SYMBOL(kmem_cache_shrink);
 
-int kmem_ptr_validate(struct kmem_cache *a, const void *b)
-{
-	return 0;
-}
-
 static unsigned int slob_ready __read_mostly;
 
 int slab_is_available(void)
diff --git a/mm/slub.c b/mm/slub.c
index bec0e355fbad..a2fe1727ed85 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -1917,17 +1917,6 @@ void kmem_cache_free(struct kmem_cache *s, void *x)
 }
 EXPORT_SYMBOL(kmem_cache_free);
 
-/* Figure out on which slab page the object resides */
-static struct page *get_object_page(const void *x)
-{
-	struct page *page = virt_to_head_page(x);
-
-	if (!PageSlab(page))
-		return NULL;
-
-	return page;
-}
-
 /*
  * Object placement in a slab is made very easy because we always start at
  * offset 0. If we tune the size of the object to the alignment then we can
@@ -2385,35 +2374,6 @@ error:
 	return 0;
 }
 
-/*
- * Check if a given pointer is valid
- */
-int kmem_ptr_validate(struct kmem_cache *s, const void *object)
-{
-	struct page *page;
-
-	if (!kern_ptr_validate(object, s->size))
-		return 0;
-
-	page = get_object_page(object);
-
-	if (!page || s != page->slab)
-		/* No slab or wrong slab */
-		return 0;
-
-	if (!check_valid_pointer(s, page, object))
-		return 0;
-
-	/*
-	 * We could also check if the object is on the slabs freelist.
-	 * But this would be too expensive and it seems that the main
-	 * purpose of kmem_ptr_valid() is to check if the object belongs
-	 * to a certain slab.
-	 */
-	return 1;
-}
-EXPORT_SYMBOL(kmem_ptr_validate);
-
 /*
  * Determine the size of a slab object
  */
diff --git a/mm/util.c b/mm/util.c
index 73dac81e9f78..f126975ef23e 100644
--- a/mm/util.c
+++ b/mm/util.c
@@ -186,27 +186,6 @@ void kzfree(const void *p)
 }
 EXPORT_SYMBOL(kzfree);
 
-int kern_ptr_validate(const void *ptr, unsigned long size)
-{
-	unsigned long addr = (unsigned long)ptr;
-	unsigned long min_addr = PAGE_OFFSET;
-	unsigned long align_mask = sizeof(void *) - 1;
-
-	if (unlikely(addr < min_addr))
-		goto out;
-	if (unlikely(addr > (unsigned long)high_memory - size))
-		goto out;
-	if (unlikely(addr & align_mask))
-		goto out;
-	if (unlikely(!kern_addr_valid(addr)))
-		goto out;
-	if (unlikely(!kern_addr_valid(addr + size - 1)))
-		goto out;
-	return 1;
-out:
-	return 0;
-}
-
 /*
  * strndup_user - duplicate an existing string from user space
  * @s: The string to duplicate
-- 
cgit v1.2.3-71-gd317


From 5eef7fa905c814826f518aca2d414ca77508ce30 Mon Sep 17 00:00:00 2001
From: Nick Piggin <npiggin@kernel.dk>
Date: Fri, 7 Jan 2011 17:49:22 +1100
Subject: fs: dcache documentation cleanup

Remove redundant (and incorrect, since dcache RCU lookup) dentry locking
documentation and point to the canonical document.

Signed-off-by: Nick Piggin <npiggin@kernel.dk>
---
 include/linux/dcache.h | 18 ++++++------------
 1 file changed, 6 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/dcache.h b/include/linux/dcache.h
index 6a4aea30aa09..fff975576b5b 100644
--- a/include/linux/dcache.h
+++ b/include/linux/dcache.h
@@ -141,22 +141,16 @@ struct dentry_operations {
 	char *(*d_dname)(struct dentry *, char *, int);
 };
 
-/* the dentry parameter passed to d_hash and d_compare is the parent
+/*
+ * Locking rules for dentry_operations callbacks are to be found in
+ * Documentation/filesystems/Locking. Keep it updated!
+ *
+ * the dentry parameter passed to d_hash and d_compare is the parent
  * directory of the entries to be compared. It is used in case these
  * functions need any directory specific information for determining
  * equivalency classes.  Using the dentry itself might not work, as it
  * might be a negative dentry which has no information associated with
- * it */
-
-/*
-locking rules:
-		big lock	dcache_lock	d_lock   may block
-d_revalidate:	no		no		no       yes
-d_hash		no		no		no       yes
-d_compare:	no		yes		yes      no
-d_delete:	no		yes		no       no
-d_release:	no		no		no       yes
-d_iput:		no		no		no       yes
+ * it.
  */
 
 /* d_flags entries */
-- 
cgit v1.2.3-71-gd317


From fe15ce446beb3a33583af81ffe6c9d01a75314ed Mon Sep 17 00:00:00 2001
From: Nick Piggin <npiggin@kernel.dk>
Date: Fri, 7 Jan 2011 17:49:23 +1100
Subject: fs: change d_delete semantics

Change d_delete from a dentry deletion notification to a dentry caching
advise, more like ->drop_inode. Require it to be constant and idempotent,
and not take d_lock. This is how all existing filesystems use the callback
anyway.

This makes fine grained dentry locking of dput and dentry lru scanning
much simpler.

Signed-off-by: Nick Piggin <npiggin@kernel.dk>
---
 Documentation/filesystems/porting |  8 ++++++++
 Documentation/filesystems/vfs.txt | 27 +++++++++++++--------------
 arch/ia64/kernel/perfmon.c        |  2 +-
 drivers/staging/smbfs/dir.c       |  4 ++--
 fs/9p/vfs_dentry.c                |  4 ++--
 fs/afs/dir.c                      |  4 ++--
 fs/btrfs/inode.c                  |  2 +-
 fs/coda/dir.c                     |  4 ++--
 fs/configfs/dir.c                 |  2 +-
 fs/dcache.c                       |  2 --
 fs/gfs2/dentry.c                  |  2 +-
 fs/hostfs/hostfs_kern.c           |  2 +-
 fs/libfs.c                        |  2 +-
 fs/ncpfs/dir.c                    |  4 ++--
 fs/nfs/dir.c                      |  2 +-
 fs/proc/base.c                    |  2 +-
 fs/proc/generic.c                 |  2 +-
 fs/proc/proc_sysctl.c             |  2 +-
 fs/sysfs/dir.c                    |  2 +-
 include/linux/dcache.h            |  6 +++---
 kernel/cgroup.c                   |  2 +-
 net/sunrpc/rpc_pipe.c             |  2 +-
 22 files changed, 47 insertions(+), 42 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/filesystems/porting b/Documentation/filesystems/porting
index b12c89538680..9e71c9ad3108 100644
--- a/Documentation/filesystems/porting
+++ b/Documentation/filesystems/porting
@@ -318,3 +318,11 @@ if it's zero is not *and* *never* *had* *been* enough.  Final unlink() and iput(
 may happen while the inode is in the middle of ->write_inode(); e.g. if you blindly
 free the on-disk inode, you may end up doing that while ->write_inode() is writing
 to it.
+
+---
+[mandatory]
+
+	.d_delete() now only advises the dcache as to whether or not to cache
+unreferenced dentries, and is now only called when the dentry refcount goes to
+0. Even on 0 refcount transition, it must be able to tolerate being called 0,
+1, or more times (eg. constant, idempotent).
diff --git a/Documentation/filesystems/vfs.txt b/Documentation/filesystems/vfs.txt
index 20899e095e7e..95c0a93f056c 100644
--- a/Documentation/filesystems/vfs.txt
+++ b/Documentation/filesystems/vfs.txt
@@ -847,9 +847,9 @@ defined:
 
 struct dentry_operations {
 	int (*d_revalidate)(struct dentry *, struct nameidata *);
-	int (*d_hash) (struct dentry *, struct qstr *);
-	int (*d_compare) (struct dentry *, struct qstr *, struct qstr *);
-	int (*d_delete)(struct dentry *);
+	int (*d_hash)(struct dentry *, struct qstr *);
+	int (*d_compare)(struct dentry *, struct qstr *, struct qstr *);
+	int (*d_delete)(const struct dentry *);
 	void (*d_release)(struct dentry *);
 	void (*d_iput)(struct dentry *, struct inode *);
 	char *(*d_dname)(struct dentry *, char *, int);
@@ -864,9 +864,11 @@ struct dentry_operations {
 
   d_compare: called when a dentry should be compared with another
 
-  d_delete: called when the last reference to a dentry is
-	deleted. This means no-one is using the dentry, however it is
-	still valid and in the dcache
+  d_delete: called when the last reference to a dentry is dropped and the
+	dcache is deciding whether or not to cache it. Return 1 to delete
+	immediately, or 0 to cache the dentry. Default is NULL which means to
+	always cache a reachable dentry. d_delete must be constant and
+	idempotent.
 
   d_release: called when a dentry is really deallocated
 
@@ -910,14 +912,11 @@ manipulate dentries:
 	the usage count)
 
   dput: close a handle for a dentry (decrements the usage count). If
-	the usage count drops to 0, the "d_delete" method is called
-	and the dentry is placed on the unused list if the dentry is
-	still in its parents hash list. Putting the dentry on the
-	unused list just means that if the system needs some RAM, it
-	goes through the unused list of dentries and deallocates them.
-	If the dentry has already been unhashed and the usage count
-	drops to 0, in this case the dentry is deallocated after the
-	"d_delete" method is called
+	the usage count drops to 0, and the dentry is still in its
+	parent's hash, the "d_delete" method is called to check whether
+	it should be cached. If it should not be cached, or if the dentry
+	is not hashed, it is deleted. Otherwise cached dentries are put
+	into an LRU list to be reclaimed on memory shortage.
 
   d_drop: this unhashes a dentry from its parents hash list. A
 	subsequent call to dput() will deallocate the dentry if its
diff --git a/arch/ia64/kernel/perfmon.c b/arch/ia64/kernel/perfmon.c
index 39e534f5a3b0..d39d8a53b579 100644
--- a/arch/ia64/kernel/perfmon.c
+++ b/arch/ia64/kernel/perfmon.c
@@ -2185,7 +2185,7 @@ static const struct file_operations pfm_file_ops = {
 };
 
 static int
-pfmfs_delete_dentry(struct dentry *dentry)
+pfmfs_delete_dentry(const struct dentry *dentry)
 {
 	return 1;
 }
diff --git a/drivers/staging/smbfs/dir.c b/drivers/staging/smbfs/dir.c
index f088ea2f6ac9..2270d4822c2f 100644
--- a/drivers/staging/smbfs/dir.c
+++ b/drivers/staging/smbfs/dir.c
@@ -276,7 +276,7 @@ smb_dir_open(struct inode *dir, struct file *file)
 static int smb_lookup_validate(struct dentry *, struct nameidata *);
 static int smb_hash_dentry(struct dentry *, struct qstr *);
 static int smb_compare_dentry(struct dentry *, struct qstr *, struct qstr *);
-static int smb_delete_dentry(struct dentry *);
+static int smb_delete_dentry(const struct dentry *);
 
 static const struct dentry_operations smbfs_dentry_operations =
 {
@@ -367,7 +367,7 @@ out:
  * We use this to unhash dentries with bad inodes.
  */
 static int
-smb_delete_dentry(struct dentry * dentry)
+smb_delete_dentry(const struct dentry *dentry)
 {
 	if (dentry->d_inode) {
 		if (is_bad_inode(dentry->d_inode)) {
diff --git a/fs/9p/vfs_dentry.c b/fs/9p/vfs_dentry.c
index cbf4e50f3933..466d2a4fc5cb 100644
--- a/fs/9p/vfs_dentry.c
+++ b/fs/9p/vfs_dentry.c
@@ -51,7 +51,7 @@
  *
  */
 
-static int v9fs_dentry_delete(struct dentry *dentry)
+static int v9fs_dentry_delete(const struct dentry *dentry)
 {
 	P9_DPRINTK(P9_DEBUG_VFS, " dentry: %s (%p)\n", dentry->d_name.name,
 									dentry);
@@ -68,7 +68,7 @@ static int v9fs_dentry_delete(struct dentry *dentry)
  *
  */
 
-static int v9fs_cached_dentry_delete(struct dentry *dentry)
+static int v9fs_cached_dentry_delete(const struct dentry *dentry)
 {
 	struct inode *inode = dentry->d_inode;
 	P9_DPRINTK(P9_DEBUG_VFS, " dentry: %s (%p)\n", dentry->d_name.name,
diff --git a/fs/afs/dir.c b/fs/afs/dir.c
index 5439e1bc9a86..2c18cde27000 100644
--- a/fs/afs/dir.c
+++ b/fs/afs/dir.c
@@ -23,7 +23,7 @@ static struct dentry *afs_lookup(struct inode *dir, struct dentry *dentry,
 static int afs_dir_open(struct inode *inode, struct file *file);
 static int afs_readdir(struct file *file, void *dirent, filldir_t filldir);
 static int afs_d_revalidate(struct dentry *dentry, struct nameidata *nd);
-static int afs_d_delete(struct dentry *dentry);
+static int afs_d_delete(const struct dentry *dentry);
 static void afs_d_release(struct dentry *dentry);
 static int afs_lookup_filldir(void *_cookie, const char *name, int nlen,
 				  loff_t fpos, u64 ino, unsigned dtype);
@@ -730,7 +730,7 @@ out_bad:
  * - called from dput() when d_count is going to 0.
  * - return 1 to request dentry be unhashed, 0 otherwise
  */
-static int afs_d_delete(struct dentry *dentry)
+static int afs_d_delete(const struct dentry *dentry)
 {
 	_enter("%s", dentry->d_name.name);
 
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 72f31ecb5c90..7ce9f8932789 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -4127,7 +4127,7 @@ struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry)
 	return inode;
 }
 
-static int btrfs_dentry_delete(struct dentry *dentry)
+static int btrfs_dentry_delete(const struct dentry *dentry)
 {
 	struct btrfs_root *root;
 
diff --git a/fs/coda/dir.c b/fs/coda/dir.c
index 5d8b35539601..4cce3b07d9d7 100644
--- a/fs/coda/dir.c
+++ b/fs/coda/dir.c
@@ -47,7 +47,7 @@ static int coda_readdir(struct file *file, void *buf, filldir_t filldir);
 
 /* dentry ops */
 static int coda_dentry_revalidate(struct dentry *de, struct nameidata *nd);
-static int coda_dentry_delete(struct dentry *);
+static int coda_dentry_delete(const struct dentry *);
 
 /* support routines */
 static int coda_venus_readdir(struct file *coda_file, void *buf,
@@ -577,7 +577,7 @@ out:
  * This is the callback from dput() when d_count is going to 0.
  * We use this to unhash dentries with bad inodes.
  */
-static int coda_dentry_delete(struct dentry * dentry)
+static int coda_dentry_delete(const struct dentry * dentry)
 {
 	int flags;
 
diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c
index 578706969415..20024a9ef5a7 100644
--- a/fs/configfs/dir.c
+++ b/fs/configfs/dir.c
@@ -67,7 +67,7 @@ static void configfs_d_iput(struct dentry * dentry,
  * We _must_ delete our dentries on last dput, as the chain-to-parent
  * behavior is required to clear the parents of default_groups.
  */
-static int configfs_d_delete(struct dentry *dentry)
+static int configfs_d_delete(const struct dentry *dentry)
 {
 	return 1;
 }
diff --git a/fs/dcache.c b/fs/dcache.c
index b2cb2662ca00..6ee6bc40cb63 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -453,8 +453,6 @@ static void prune_one_dentry(struct dentry * dentry)
 		if (!atomic_dec_and_lock(&dentry->d_count, &dentry->d_lock))
 			return;
 
-		if (dentry->d_op && dentry->d_op->d_delete)
-			dentry->d_op->d_delete(dentry);
 		dentry_lru_del(dentry);
 		__d_drop(dentry);
 		dentry = d_kill(dentry);
diff --git a/fs/gfs2/dentry.c b/fs/gfs2/dentry.c
index 6798755b3858..e80fea2f65ff 100644
--- a/fs/gfs2/dentry.c
+++ b/fs/gfs2/dentry.c
@@ -106,7 +106,7 @@ static int gfs2_dhash(struct dentry *dentry, struct qstr *str)
 	return 0;
 }
 
-static int gfs2_dentry_delete(struct dentry *dentry)
+static int gfs2_dentry_delete(const struct dentry *dentry)
 {
 	struct gfs2_inode *ginode;
 
diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c
index 2c0f148a49e6..cfe8bc7de511 100644
--- a/fs/hostfs/hostfs_kern.c
+++ b/fs/hostfs/hostfs_kern.c
@@ -32,7 +32,7 @@ static inline struct hostfs_inode_info *HOSTFS_I(struct inode *inode)
 
 #define FILE_HOSTFS_I(file) HOSTFS_I((file)->f_path.dentry->d_inode)
 
-static int hostfs_d_delete(struct dentry *dentry)
+static int hostfs_d_delete(const struct dentry *dentry)
 {
 	return 1;
 }
diff --git a/fs/libfs.c b/fs/libfs.c
index a3accdf528ad..b9d25d83e228 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -37,7 +37,7 @@ int simple_statfs(struct dentry *dentry, struct kstatfs *buf)
  * Retaining negative dentries for an in-memory filesystem just wastes
  * memory and lookup time: arrange for them to be deleted immediately.
  */
-static int simple_delete_dentry(struct dentry *dentry)
+static int simple_delete_dentry(const struct dentry *dentry)
 {
 	return 1;
 }
diff --git a/fs/ncpfs/dir.c b/fs/ncpfs/dir.c
index f22b12e7d337..d6e6453881ce 100644
--- a/fs/ncpfs/dir.c
+++ b/fs/ncpfs/dir.c
@@ -76,7 +76,7 @@ const struct inode_operations ncp_dir_inode_operations =
 static int ncp_lookup_validate(struct dentry *, struct nameidata *);
 static int ncp_hash_dentry(struct dentry *, struct qstr *);
 static int ncp_compare_dentry (struct dentry *, struct qstr *, struct qstr *);
-static int ncp_delete_dentry(struct dentry *);
+static int ncp_delete_dentry(const struct dentry *);
 
 static const struct dentry_operations ncp_dentry_operations =
 {
@@ -162,7 +162,7 @@ ncp_compare_dentry(struct dentry *dentry, struct qstr *a, struct qstr *b)
  * Closing files can be safely postponed until iput() - it's done there anyway.
  */
 static int
-ncp_delete_dentry(struct dentry * dentry)
+ncp_delete_dentry(const struct dentry * dentry)
 {
 	struct inode *inode = dentry->d_inode;
 
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 996dd8989a91..9184c7c80f78 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -1117,7 +1117,7 @@ out_error:
 /*
  * This is called from dput() when d_count is going to 0.
  */
-static int nfs_dentry_delete(struct dentry *dentry)
+static int nfs_dentry_delete(const struct dentry *dentry)
 {
 	dfprintk(VFS, "NFS: dentry_delete(%s/%s, %x)\n",
 		dentry->d_parent->d_name.name, dentry->d_name.name,
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 182845147fe4..d932fdb6a245 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -1744,7 +1744,7 @@ static int pid_revalidate(struct dentry *dentry, struct nameidata *nd)
 	return 0;
 }
 
-static int pid_delete_dentry(struct dentry * dentry)
+static int pid_delete_dentry(const struct dentry * dentry)
 {
 	/* Is the task we represent dead?
 	 * If so, then don't put the dentry on the lru list,
diff --git a/fs/proc/generic.c b/fs/proc/generic.c
index dd29f0337661..1d607be36d95 100644
--- a/fs/proc/generic.c
+++ b/fs/proc/generic.c
@@ -400,7 +400,7 @@ static const struct inode_operations proc_link_inode_operations = {
  * smarter: we could keep a "volatile" flag in the 
  * inode to indicate which ones to keep.
  */
-static int proc_delete_dentry(struct dentry * dentry)
+static int proc_delete_dentry(const struct dentry * dentry)
 {
 	return 1;
 }
diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c
index b652cb00906b..a256d770ea18 100644
--- a/fs/proc/proc_sysctl.c
+++ b/fs/proc/proc_sysctl.c
@@ -392,7 +392,7 @@ static int proc_sys_revalidate(struct dentry *dentry, struct nameidata *nd)
 	return !PROC_I(dentry->d_inode)->sysctl->unregistering;
 }
 
-static int proc_sys_delete(struct dentry *dentry)
+static int proc_sys_delete(const struct dentry *dentry)
 {
 	return !!PROC_I(dentry->d_inode)->sysctl->unregistering;
 }
diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c
index 7e54bac8c4b0..27e1102e303e 100644
--- a/fs/sysfs/dir.c
+++ b/fs/sysfs/dir.c
@@ -231,7 +231,7 @@ void release_sysfs_dirent(struct sysfs_dirent * sd)
 		goto repeat;
 }
 
-static int sysfs_dentry_delete(struct dentry *dentry)
+static int sysfs_dentry_delete(const struct dentry *dentry)
 {
 	struct sysfs_dirent *sd = dentry->d_fsdata;
 	return !!(sd->s_flags & SYSFS_FLAG_REMOVED);
diff --git a/include/linux/dcache.h b/include/linux/dcache.h
index fff975576b5b..cbfc9567e4e9 100644
--- a/include/linux/dcache.h
+++ b/include/linux/dcache.h
@@ -133,9 +133,9 @@ enum dentry_d_lock_class
 
 struct dentry_operations {
 	int (*d_revalidate)(struct dentry *, struct nameidata *);
-	int (*d_hash) (struct dentry *, struct qstr *);
-	int (*d_compare) (struct dentry *, struct qstr *, struct qstr *);
-	int (*d_delete)(struct dentry *);
+	int (*d_hash)(struct dentry *, struct qstr *);
+	int (*d_compare)(struct dentry *, struct qstr *, struct qstr *);
+	int (*d_delete)(const struct dentry *);
 	void (*d_release)(struct dentry *);
 	void (*d_iput)(struct dentry *, struct inode *);
 	char *(*d_dname)(struct dentry *, char *, int);
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 163c890f436d..746055b214d7 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -2198,7 +2198,7 @@ static inline struct cftype *__file_cft(struct file *file)
 	return __d_cft(file->f_dentry);
 }
 
-static int cgroup_delete_dentry(struct dentry *dentry)
+static int cgroup_delete_dentry(const struct dentry *dentry)
 {
 	return 1;
 }
diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c
index 10a17a37ec4e..a0dc1a86fcea 100644
--- a/net/sunrpc/rpc_pipe.c
+++ b/net/sunrpc/rpc_pipe.c
@@ -430,7 +430,7 @@ void rpc_put_mount(void)
 }
 EXPORT_SYMBOL_GPL(rpc_put_mount);
 
-static int rpc_delete_dentry(struct dentry *dentry)
+static int rpc_delete_dentry(const struct dentry *dentry)
 {
 	return 1;
 }
-- 
cgit v1.2.3-71-gd317


From fb2d5b86aff355a27ebfc132d3c99f4a940cc3fe Mon Sep 17 00:00:00 2001
From: Nick Piggin <npiggin@kernel.dk>
Date: Fri, 7 Jan 2011 17:49:26 +1100
Subject: fs: name case update method

smpfs and ncpfs want to update a live dentry name in-place. Rather than
have them open code the locking, provide a documented dcache API.

Signed-off-by: Nick Piggin <npiggin@kernel.dk>
---
 drivers/staging/smbfs/cache.c |  4 ++--
 fs/dcache.c                   | 27 +++++++++++++++++++++++++++
 fs/ncpfs/dir.c                | 35 ++++++-----------------------------
 include/linux/dcache.h        |  2 ++
 4 files changed, 37 insertions(+), 31 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/staging/smbfs/cache.c b/drivers/staging/smbfs/cache.c
index dbb98658148b..dbd2e1df3ba9 100644
--- a/drivers/staging/smbfs/cache.c
+++ b/drivers/staging/smbfs/cache.c
@@ -145,8 +145,8 @@ smb_fill_cache(struct file *filp, void *dirent, filldir_t filldir,
 			goto end_advance;
 	} else {
 		hashed = 1;
-		memcpy((char *) newdent->d_name.name, qname->name,
-		       newdent->d_name.len);
+		/* dir i_mutex is locked because we're in readdir */
+		dentry_update_name_case(newdent, qname);
 	}
 
 	if (!newdent->d_inode) {
diff --git a/fs/dcache.c b/fs/dcache.c
index 6ee6bc40cb63..814e5f491e9c 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -1589,6 +1589,33 @@ void d_rehash(struct dentry * entry)
 }
 EXPORT_SYMBOL(d_rehash);
 
+/**
+ * dentry_update_name_case - update case insensitive dentry with a new name
+ * @dentry: dentry to be updated
+ * @name: new name
+ *
+ * Update a case insensitive dentry with new case of name.
+ *
+ * dentry must have been returned by d_lookup with name @name. Old and new
+ * name lengths must match (ie. no d_compare which allows mismatched name
+ * lengths).
+ *
+ * Parent inode i_mutex must be held over d_lookup and into this call (to
+ * keep renames and concurrent inserts, and readdir(2) away).
+ */
+void dentry_update_name_case(struct dentry *dentry, struct qstr *name)
+{
+	BUG_ON(!mutex_is_locked(&dentry->d_inode->i_mutex));
+	BUG_ON(dentry->d_name.len != name->len); /* d_lookup gives this */
+
+	spin_lock(&dcache_lock);
+	spin_lock(&dentry->d_lock);
+	memcpy((unsigned char *)dentry->d_name.name, name->name, name->len);
+	spin_unlock(&dentry->d_lock);
+	spin_unlock(&dcache_lock);
+}
+EXPORT_SYMBOL(dentry_update_name_case);
+
 /*
  * When switching names, the actual string doesn't strictly have to
  * be preserved in the target - because we're dropping the target
diff --git a/fs/ncpfs/dir.c b/fs/ncpfs/dir.c
index d6e6453881ce..e80ea4e37c48 100644
--- a/fs/ncpfs/dir.c
+++ b/fs/ncpfs/dir.c
@@ -611,35 +611,12 @@ ncp_fill_cache(struct file *filp, void *dirent, filldir_t filldir,
 			shrink_dcache_parent(newdent);
 
 		/*
-		 * It is not as dangerous as it looks.  NetWare's OS2 namespace is
-		 * case preserving yet case insensitive.  So we update dentry's name
-		 * as received from server.  We found dentry via d_lookup with our
-		 * hash, so we know that hash does not change, and so replacing name
-		 * should be reasonably safe.
+		 * NetWare's OS2 namespace is case preserving yet case
+		 * insensitive.  So we update dentry's name as received from
+		 * server. Parent dir's i_mutex is locked because we're in
+		 * readdir.
 		 */
-		if (qname.len == newdent->d_name.len &&
-		    memcmp(newdent->d_name.name, qname.name, newdent->d_name.len)) {
-			struct inode *inode = newdent->d_inode;
-
-			/*
-			 * Inside ncpfs all uses of d_name are either for debugging,
-			 * or on functions which acquire inode mutex (mknod, creat,
-			 * lookup).  So grab i_mutex here, to be sure.  d_path
-			 * uses dcache_lock when generating path, so we should too.
-			 * And finally d_compare is protected by dentry's d_lock, so
-			 * here we go.
-			 */
-			if (inode)
-				mutex_lock(&inode->i_mutex);
-			spin_lock(&dcache_lock);
-			spin_lock(&newdent->d_lock);
-			memcpy((char *) newdent->d_name.name, qname.name,
-								newdent->d_name.len);
-			spin_unlock(&newdent->d_lock);
-			spin_unlock(&dcache_lock);
-			if (inode)
-				mutex_unlock(&inode->i_mutex);
-		}
+		dentry_update_name_case(newdent, &qname);
 	}
 
 	if (!newdent->d_inode) {
@@ -657,7 +634,7 @@ ncp_fill_cache(struct file *filp, void *dirent, filldir_t filldir,
 	} else {
 		struct inode *inode = newdent->d_inode;
 
-		mutex_lock(&inode->i_mutex);
+		mutex_lock_nested(&inode->i_mutex, I_MUTEX_CHILD);
 		ncp_update_inode2(inode, entry);
 		mutex_unlock(&inode->i_mutex);
 	}
diff --git a/include/linux/dcache.h b/include/linux/dcache.h
index cbfc9567e4e9..6cdf4995c90a 100644
--- a/include/linux/dcache.h
+++ b/include/linux/dcache.h
@@ -290,6 +290,8 @@ static inline struct dentry *d_add_unique(struct dentry *entry, struct inode *in
 	return res;
 }
 
+extern void dentry_update_name_case(struct dentry *, struct qstr *);
+
 /* used for rename() and baskets */
 extern void d_move(struct dentry *, struct dentry *);
 extern struct dentry *d_ancestor(struct dentry *, struct dentry *);
-- 
cgit v1.2.3-71-gd317


From 621e155a3591962420eacdd39f6f0aa29ceb221e Mon Sep 17 00:00:00 2001
From: Nick Piggin <npiggin@kernel.dk>
Date: Fri, 7 Jan 2011 17:49:27 +1100
Subject: fs: change d_compare for rcu-walk

Change d_compare so it may be called from lock-free RCU lookups. This
does put significant restrictions on what may be done from the callback,
however there don't seem to have been any problems with in-tree fses.
If some strange use case pops up that _really_ cannot cope with the
rcu-walk rules, we can just add new rcu-unaware callbacks, which would
cause name lookup to drop out of rcu-walk mode.

For in-tree filesystems, this is just a mechanical change.

Signed-off-by: Nick Piggin <npiggin@kernel.dk>
---
 Documentation/filesystems/Locking |  4 +-
 Documentation/filesystems/porting |  7 +++
 Documentation/filesystems/vfs.txt | 26 +++++++++--
 drivers/staging/smbfs/dir.c       | 16 ++++---
 fs/adfs/dir.c                     |  8 ++--
 fs/affs/namei.c                   | 46 ++++++++++++--------
 fs/cifs/dir.c                     | 12 ++---
 fs/dcache.c                       |  4 +-
 fs/fat/namei_msdos.c              | 14 +++---
 fs/fat/namei_vfat.c               | 33 ++++++++------
 fs/hfs/hfs_fs.h                   |  5 ++-
 fs/hfs/string.c                   | 14 +++---
 fs/hfsplus/hfsplus_fs.h           |  5 ++-
 fs/hfsplus/unicode.c              | 15 ++++---
 fs/hpfs/dentry.c                  | 22 ++++++----
 fs/isofs/inode.c                  | 92 ++++++++++++++++++++-------------------
 fs/isofs/namei.c                  |  3 +-
 fs/jfs/namei.c                    | 11 +++--
 fs/ncpfs/dir.c                    | 25 ++++++-----
 fs/ncpfs/ncplib_kernel.h          |  8 ++--
 fs/proc/proc_sysctl.c             | 13 +++---
 include/linux/dcache.h            | 12 +++--
 include/linux/ncp_fs.h            |  4 +-
 23 files changed, 242 insertions(+), 157 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking
index 33fa3e5d38fd..9a76f8d8bf95 100644
--- a/Documentation/filesystems/Locking
+++ b/Documentation/filesystems/Locking
@@ -11,7 +11,9 @@ be able to use diff(1).
 prototypes:
 	int (*d_revalidate)(struct dentry *, int);
 	int (*d_hash) (struct dentry *, struct qstr *);
-	int (*d_compare) (struct dentry *, struct qstr *, struct qstr *);
+	int (*d_compare)(const struct dentry *, const struct inode *,
+			const struct dentry *, const struct inode *,
+			unsigned int, const char *, const struct qstr *);
 	int (*d_delete)(struct dentry *);
 	void (*d_release)(struct dentry *);
 	void (*d_iput)(struct dentry *, struct inode *);
diff --git a/Documentation/filesystems/porting b/Documentation/filesystems/porting
index 9e71c9ad3108..d44511e20828 100644
--- a/Documentation/filesystems/porting
+++ b/Documentation/filesystems/porting
@@ -326,3 +326,10 @@ to it.
 unreferenced dentries, and is now only called when the dentry refcount goes to
 0. Even on 0 refcount transition, it must be able to tolerate being called 0,
 1, or more times (eg. constant, idempotent).
+
+---
+[mandatory]
+
+	.d_compare() calling convention and locking rules are significantly
+changed. Read updated documentation in Documentation/filesystems/vfs.txt (and
+look at examples of other filesystems) for guidance.
diff --git a/Documentation/filesystems/vfs.txt b/Documentation/filesystems/vfs.txt
index 95c0a93f056c..250681b8c7cc 100644
--- a/Documentation/filesystems/vfs.txt
+++ b/Documentation/filesystems/vfs.txt
@@ -848,7 +848,9 @@ defined:
 struct dentry_operations {
 	int (*d_revalidate)(struct dentry *, struct nameidata *);
 	int (*d_hash)(struct dentry *, struct qstr *);
-	int (*d_compare)(struct dentry *, struct qstr *, struct qstr *);
+	int (*d_compare)(const struct dentry *, const struct inode *,
+			const struct dentry *, const struct inode *,
+			unsigned int, const char *, const struct qstr *);
 	int (*d_delete)(const struct dentry *);
 	void (*d_release)(struct dentry *);
 	void (*d_iput)(struct dentry *, struct inode *);
@@ -860,9 +862,27 @@ struct dentry_operations {
 	dcache. Most filesystems leave this as NULL, because all their
 	dentries in the dcache are valid
 
-  d_hash: called when the VFS adds a dentry to the hash table
+  d_hash: called when the VFS adds a dentry to the hash table. The first
+	dentry passed to d_hash is the parent directory that the name is
+	to be hashed into.
 
-  d_compare: called when a dentry should be compared with another
+  d_compare: called to compare a dentry name with a given name. The first
+	dentry is the parent of the dentry to be compared, the second is
+	the parent's inode, then the dentry and inode (may be NULL) of the
+	child dentry. len and name string are properties of the dentry to be
+	compared. qstr is the name to compare it with.
+
+	Must be constant and idempotent, and should not take locks if
+	possible, and should not or store into the dentry or inodes.
+	Should not dereference pointers outside the dentry or inodes without
+	lots of care (eg.  d_parent, d_inode, d_name should not be used).
+
+	However, our vfsmount is pinned, and RCU held, so the dentries and
+	inodes won't disappear, neither will our sb or filesystem module.
+	->i_sb and ->d_sb may be used.
+
+	It is a tricky calling convention because it needs to be called under
+	"rcu-walk", ie. without any locks or references on things.
 
   d_delete: called when the last reference to a dentry is dropped and the
 	dcache is deciding whether or not to cache it. Return 1 to delete
diff --git a/drivers/staging/smbfs/dir.c b/drivers/staging/smbfs/dir.c
index 2270d4822c2f..bd63229f43f2 100644
--- a/drivers/staging/smbfs/dir.c
+++ b/drivers/staging/smbfs/dir.c
@@ -275,7 +275,10 @@ smb_dir_open(struct inode *dir, struct file *file)
  */
 static int smb_lookup_validate(struct dentry *, struct nameidata *);
 static int smb_hash_dentry(struct dentry *, struct qstr *);
-static int smb_compare_dentry(struct dentry *, struct qstr *, struct qstr *);
+static int smb_compare_dentry(const struct dentry *,
+		const struct inode *,
+		const struct dentry *, const struct inode *,
+		unsigned int, const char *, const struct qstr *);
 static int smb_delete_dentry(const struct dentry *);
 
 static const struct dentry_operations smbfs_dentry_operations =
@@ -347,14 +350,17 @@ smb_hash_dentry(struct dentry *dir, struct qstr *this)
 }
 
 static int
-smb_compare_dentry(struct dentry *dir, struct qstr *a, struct qstr *b)
+smb_compare_dentry(const struct dentry *parent,
+		const struct inode *pinode,
+		const struct dentry *dentry, const struct inode *inode,
+		unsigned int len, const char *str, const struct qstr *name)
 {
 	int i, result = 1;
 
-	if (a->len != b->len)
+	if (len != name->len)
 		goto out;
-	for (i=0; i < a->len; i++) {
-		if (tolower(a->name[i]) != tolower(b->name[i]))
+	for (i=0; i < len; i++) {
+		if (tolower(str[i]) != tolower(name->name[i]))
 			goto out;
 	}
 	result = 0;
diff --git a/fs/adfs/dir.c b/fs/adfs/dir.c
index f4287e4de744..c8ed66162bd4 100644
--- a/fs/adfs/dir.c
+++ b/fs/adfs/dir.c
@@ -237,17 +237,19 @@ adfs_hash(struct dentry *parent, struct qstr *qstr)
  * requirements of the underlying filesystem.
  */
 static int
-adfs_compare(struct dentry *parent, struct qstr *entry, struct qstr *name)
+adfs_compare(const struct dentry *parent, const struct inode *pinode,
+		const struct dentry *dentry, const struct inode *inode,
+		unsigned int len, const char *str, const struct qstr *name)
 {
 	int i;
 
-	if (entry->len != name->len)
+	if (len != name->len)
 		return 1;
 
 	for (i = 0; i < name->len; i++) {
 		char a, b;
 
-		a = entry->name[i];
+		a = str[i];
 		b = name->name[i];
 
 		if (a >= 'A' && a <= 'Z')
diff --git a/fs/affs/namei.c b/fs/affs/namei.c
index 914d1c0bc07a..547d5deb0d42 100644
--- a/fs/affs/namei.c
+++ b/fs/affs/namei.c
@@ -14,10 +14,16 @@ typedef int (*toupper_t)(int);
 
 static int	 affs_toupper(int ch);
 static int	 affs_hash_dentry(struct dentry *, struct qstr *);
-static int       affs_compare_dentry(struct dentry *, struct qstr *, struct qstr *);
+static int       affs_compare_dentry(const struct dentry *parent,
+		const struct inode *pinode,
+		const struct dentry *dentry, const struct inode *inode,
+		unsigned int len, const char *str, const struct qstr *name);
 static int	 affs_intl_toupper(int ch);
 static int	 affs_intl_hash_dentry(struct dentry *, struct qstr *);
-static int       affs_intl_compare_dentry(struct dentry *, struct qstr *, struct qstr *);
+static int       affs_intl_compare_dentry(const struct dentry *parent,
+		const struct inode *pinode,
+		const struct dentry *dentry, const struct inode *inode,
+		unsigned int len, const char *str, const struct qstr *name);
 
 const struct dentry_operations affs_dentry_operations = {
 	.d_hash		= affs_hash_dentry,
@@ -88,29 +94,29 @@ affs_intl_hash_dentry(struct dentry *dentry, struct qstr *qstr)
 	return __affs_hash_dentry(dentry, qstr, affs_intl_toupper);
 }
 
-static inline int
-__affs_compare_dentry(struct dentry *dentry, struct qstr *a, struct qstr *b, toupper_t toupper)
+static inline int __affs_compare_dentry(unsigned int len,
+		const char *str, const struct qstr *name, toupper_t toupper)
 {
-	const u8 *aname = a->name;
-	const u8 *bname = b->name;
-	int len;
+	const u8 *aname = str;
+	const u8 *bname = name->name;
 
-	/* 'a' is the qstr of an already existing dentry, so the name
-	 * must be valid. 'b' must be validated first.
+	/*
+	 * 'str' is the name of an already existing dentry, so the name
+	 * must be valid. 'name' must be validated first.
 	 */
 
-	if (affs_check_name(b->name,b->len))
+	if (affs_check_name(name->name, name->len))
 		return 1;
 
-	/* If the names are longer than the allowed 30 chars,
+	/*
+	 * If the names are longer than the allowed 30 chars,
 	 * the excess is ignored, so their length may differ.
 	 */
-	len = a->len;
 	if (len >= 30) {
-		if (b->len < 30)
+		if (name->len < 30)
 			return 1;
 		len = 30;
-	} else if (len != b->len)
+	} else if (len != name->len)
 		return 1;
 
 	for (; len > 0; len--)
@@ -121,14 +127,18 @@ __affs_compare_dentry(struct dentry *dentry, struct qstr *a, struct qstr *b, tou
 }
 
 static int
-affs_compare_dentry(struct dentry *dentry, struct qstr *a, struct qstr *b)
+affs_compare_dentry(const struct dentry *parent, const struct inode *pinode,
+		const struct dentry *dentry, const struct inode *inode,
+		unsigned int len, const char *str, const struct qstr *name)
 {
-	return __affs_compare_dentry(dentry, a, b, affs_toupper);
+	return __affs_compare_dentry(len, str, name, affs_toupper);
 }
 static int
-affs_intl_compare_dentry(struct dentry *dentry, struct qstr *a, struct qstr *b)
+affs_intl_compare_dentry(const struct dentry *parent,const struct inode *pinode,
+		const struct dentry *dentry, const struct inode *inode,
+		unsigned int len, const char *str, const struct qstr *name)
 {
-	return __affs_compare_dentry(dentry, a, b, affs_intl_toupper);
+	return __affs_compare_dentry(len, str, name, affs_intl_toupper);
 }
 
 /*
diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c
index 521d841b1fd1..c60133f0d8e4 100644
--- a/fs/cifs/dir.c
+++ b/fs/cifs/dir.c
@@ -715,13 +715,15 @@ static int cifs_ci_hash(struct dentry *dentry, struct qstr *q)
 	return 0;
 }
 
-static int cifs_ci_compare(struct dentry *dentry, struct qstr *a,
-			   struct qstr *b)
+static int cifs_ci_compare(const struct dentry *parent,
+		const struct inode *pinode,
+		const struct dentry *dentry, const struct inode *inode,
+		unsigned int len, const char *str, const struct qstr *name)
 {
-	struct nls_table *codepage = CIFS_SB(dentry->d_inode->i_sb)->local_nls;
+	struct nls_table *codepage = CIFS_SB(pinode->i_sb)->local_nls;
 
-	if ((a->len == b->len) &&
-	    (nls_strnicmp(codepage, a->name, b->name, a->len) == 0))
+	if ((name->len == len) &&
+	    (nls_strnicmp(codepage, name->name, str, len) == 0))
 		return 0;
 	return 1;
 }
diff --git a/fs/dcache.c b/fs/dcache.c
index 814e5f491e9c..7075555fbb04 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -1437,7 +1437,9 @@ struct dentry * __d_lookup(struct dentry * parent, struct qstr * name)
 		 */
 		qstr = &dentry->d_name;
 		if (parent->d_op && parent->d_op->d_compare) {
-			if (parent->d_op->d_compare(parent, qstr, name))
+			if (parent->d_op->d_compare(parent, parent->d_inode,
+						dentry, dentry->d_inode,
+						qstr->len, qstr->name, name))
 				goto next;
 		} else {
 			if (qstr->len != len)
diff --git a/fs/fat/namei_msdos.c b/fs/fat/namei_msdos.c
index 3345aabd1dd7..99d3c7ac973c 100644
--- a/fs/fat/namei_msdos.c
+++ b/fs/fat/namei_msdos.c
@@ -164,16 +164,18 @@ static int msdos_hash(struct dentry *dentry, struct qstr *qstr)
  * Compare two msdos names. If either of the names are invalid,
  * we fall back to doing the standard name comparison.
  */
-static int msdos_cmp(struct dentry *dentry, struct qstr *a, struct qstr *b)
+static int msdos_cmp(const struct dentry *parent, const struct inode *pinode,
+		const struct dentry *dentry, const struct inode *inode,
+		unsigned int len, const char *str, const struct qstr *name)
 {
-	struct fat_mount_options *options = &MSDOS_SB(dentry->d_sb)->options;
+	struct fat_mount_options *options = &MSDOS_SB(parent->d_sb)->options;
 	unsigned char a_msdos_name[MSDOS_NAME], b_msdos_name[MSDOS_NAME];
 	int error;
 
-	error = msdos_format_name(a->name, a->len, a_msdos_name, options);
+	error = msdos_format_name(name->name, name->len, a_msdos_name, options);
 	if (error)
 		goto old_compare;
-	error = msdos_format_name(b->name, b->len, b_msdos_name, options);
+	error = msdos_format_name(str, len, b_msdos_name, options);
 	if (error)
 		goto old_compare;
 	error = memcmp(a_msdos_name, b_msdos_name, MSDOS_NAME);
@@ -182,8 +184,8 @@ out:
 
 old_compare:
 	error = 1;
-	if (a->len == b->len)
-		error = memcmp(a->name, b->name, a->len);
+	if (name->len == len)
+		error = memcmp(name->name, str, len);
 	goto out;
 }
 
diff --git a/fs/fat/namei_vfat.c b/fs/fat/namei_vfat.c
index b936703b8924..95e00ab84c3f 100644
--- a/fs/fat/namei_vfat.c
+++ b/fs/fat/namei_vfat.c
@@ -85,15 +85,18 @@ static int vfat_revalidate_ci(struct dentry *dentry, struct nameidata *nd)
 }
 
 /* returns the length of a struct qstr, ignoring trailing dots */
-static unsigned int vfat_striptail_len(struct qstr *qstr)
+static unsigned int __vfat_striptail_len(unsigned int len, const char *name)
 {
-	unsigned int len = qstr->len;
-
-	while (len && qstr->name[len - 1] == '.')
+	while (len && name[len - 1] == '.')
 		len--;
 	return len;
 }
 
+static unsigned int vfat_striptail_len(const struct qstr *qstr)
+{
+	return __vfat_striptail_len(qstr->len, qstr->name);
+}
+
 /*
  * Compute the hash for the vfat name corresponding to the dentry.
  * Note: if the name is invalid, we leave the hash code unchanged so
@@ -133,16 +136,18 @@ static int vfat_hashi(struct dentry *dentry, struct qstr *qstr)
 /*
  * Case insensitive compare of two vfat names.
  */
-static int vfat_cmpi(struct dentry *dentry, struct qstr *a, struct qstr *b)
+static int vfat_cmpi(const struct dentry *parent, const struct inode *pinode,
+		const struct dentry *dentry, const struct inode *inode,
+		unsigned int len, const char *str, const struct qstr *name)
 {
-	struct nls_table *t = MSDOS_SB(dentry->d_inode->i_sb)->nls_io;
+	struct nls_table *t = MSDOS_SB(parent->d_sb)->nls_io;
 	unsigned int alen, blen;
 
 	/* A filename cannot end in '.' or we treat it like it has none */
-	alen = vfat_striptail_len(a);
-	blen = vfat_striptail_len(b);
+	alen = vfat_striptail_len(name);
+	blen = __vfat_striptail_len(len, str);
 	if (alen == blen) {
-		if (nls_strnicmp(t, a->name, b->name, alen) == 0)
+		if (nls_strnicmp(t, name->name, str, alen) == 0)
 			return 0;
 	}
 	return 1;
@@ -151,15 +156,17 @@ static int vfat_cmpi(struct dentry *dentry, struct qstr *a, struct qstr *b)
 /*
  * Case sensitive compare of two vfat names.
  */
-static int vfat_cmp(struct dentry *dentry, struct qstr *a, struct qstr *b)
+static int vfat_cmp(const struct dentry *parent, const struct inode *pinode,
+		const struct dentry *dentry, const struct inode *inode,
+		unsigned int len, const char *str, const struct qstr *name)
 {
 	unsigned int alen, blen;
 
 	/* A filename cannot end in '.' or we treat it like it has none */
-	alen = vfat_striptail_len(a);
-	blen = vfat_striptail_len(b);
+	alen = vfat_striptail_len(name);
+	blen = __vfat_striptail_len(len, str);
 	if (alen == blen) {
-		if (strncmp(a->name, b->name, alen) == 0)
+		if (strncmp(name->name, str, alen) == 0)
 			return 0;
 	}
 	return 1;
diff --git a/fs/hfs/hfs_fs.h b/fs/hfs/hfs_fs.h
index c8cffb81e849..8cd876f0e961 100644
--- a/fs/hfs/hfs_fs.h
+++ b/fs/hfs/hfs_fs.h
@@ -216,7 +216,10 @@ extern const struct dentry_operations hfs_dentry_operations;
 extern int hfs_hash_dentry(struct dentry *, struct qstr *);
 extern int hfs_strcmp(const unsigned char *, unsigned int,
 		      const unsigned char *, unsigned int);
-extern int hfs_compare_dentry(struct dentry *, struct qstr *, struct qstr *);
+extern int hfs_compare_dentry(const struct dentry *parent,
+		const struct inode *pinode,
+		const struct dentry *dentry, const struct inode *inode,
+		unsigned int len, const char *str, const struct qstr *name);
 
 /* trans.c */
 extern void hfs_asc2mac(struct super_block *, struct hfs_name *, struct qstr *);
diff --git a/fs/hfs/string.c b/fs/hfs/string.c
index 927a5af79428..aaf90d0d6940 100644
--- a/fs/hfs/string.c
+++ b/fs/hfs/string.c
@@ -92,21 +92,21 @@ int hfs_strcmp(const unsigned char *s1, unsigned int len1,
  * Test for equality of two strings in the HFS filename character ordering.
  * return 1 on failure and 0 on success
  */
-int hfs_compare_dentry(struct dentry *dentry, struct qstr *s1, struct qstr *s2)
+int hfs_compare_dentry(const struct dentry *parent, const struct inode *pinode,
+		const struct dentry *dentry, const struct inode *inode,
+		unsigned int len, const char *str, const struct qstr *name)
 {
 	const unsigned char *n1, *n2;
-	int len;
 
-	len = s1->len;
 	if (len >= HFS_NAMELEN) {
-		if (s2->len < HFS_NAMELEN)
+		if (name->len < HFS_NAMELEN)
 			return 1;
 		len = HFS_NAMELEN;
-	} else if (len != s2->len)
+	} else if (len != name->len)
 		return 1;
 
-	n1 = s1->name;
-	n2 = s2->name;
+	n1 = str;
+	n2 = name->name;
 	while (len--) {
 		if (caseorder[*n1++] != caseorder[*n2++])
 			return 1;
diff --git a/fs/hfsplus/hfsplus_fs.h b/fs/hfsplus/hfsplus_fs.h
index cb3653efb57a..7aa96eefe483 100644
--- a/fs/hfsplus/hfsplus_fs.h
+++ b/fs/hfsplus/hfsplus_fs.h
@@ -380,7 +380,10 @@ int hfsplus_strcmp(const struct hfsplus_unistr *, const struct hfsplus_unistr *)
 int hfsplus_uni2asc(struct super_block *, const struct hfsplus_unistr *, char *, int *);
 int hfsplus_asc2uni(struct super_block *, struct hfsplus_unistr *, const char *, int);
 int hfsplus_hash_dentry(struct dentry *dentry, struct qstr *str);
-int hfsplus_compare_dentry(struct dentry *dentry, struct qstr *s1, struct qstr *s2);
+int hfsplus_compare_dentry(const struct dentry *parent,
+		const struct inode *pinode,
+		const struct dentry *dentry, const struct inode *inode,
+		unsigned int len, const char *str, const struct qstr *name);
 
 /* wrapper.c */
 int hfsplus_read_wrapper(struct super_block *);
diff --git a/fs/hfsplus/unicode.c b/fs/hfsplus/unicode.c
index b66d67de882c..b178c997efa8 100644
--- a/fs/hfsplus/unicode.c
+++ b/fs/hfsplus/unicode.c
@@ -363,9 +363,12 @@ int hfsplus_hash_dentry(struct dentry *dentry, struct qstr *str)
  * Composed unicode characters are decomposed and case-folding is performed
  * if the appropriate bits are (un)set on the superblock.
  */
-int hfsplus_compare_dentry(struct dentry *dentry, struct qstr *s1, struct qstr *s2)
+int hfsplus_compare_dentry(const struct dentry *parent,
+		const struct inode *pinode,
+		const struct dentry *dentry, const struct inode *inode,
+		unsigned int len, const char *str, const struct qstr *name)
 {
-	struct super_block *sb = dentry->d_sb;
+	struct super_block *sb = parent->d_sb;
 	int casefold, decompose, size;
 	int dsize1, dsize2, len1, len2;
 	const u16 *dstr1, *dstr2;
@@ -375,10 +378,10 @@ int hfsplus_compare_dentry(struct dentry *dentry, struct qstr *s1, struct qstr *
 
 	casefold = test_bit(HFSPLUS_SB_CASEFOLD, &HFSPLUS_SB(sb)->flags);
 	decompose = !test_bit(HFSPLUS_SB_NODECOMPOSE, &HFSPLUS_SB(sb)->flags);
-	astr1 = s1->name;
-	len1 = s1->len;
-	astr2 = s2->name;
-	len2 = s2->len;
+	astr1 = str;
+	len1 = len;
+	astr2 = name->name;
+	len2 = name->len;
 	dsize1 = dsize2 = 0;
 	dstr1 = dstr2 = NULL;
 
diff --git a/fs/hpfs/dentry.c b/fs/hpfs/dentry.c
index 67d9d36b3d5f..dd9b1e74a734 100644
--- a/fs/hpfs/dentry.c
+++ b/fs/hpfs/dentry.c
@@ -34,19 +34,25 @@ static int hpfs_hash_dentry(struct dentry *dentry, struct qstr *qstr)
 	return 0;
 }
 
-static int hpfs_compare_dentry(struct dentry *dentry, struct qstr *a, struct qstr *b)
+static int hpfs_compare_dentry(const struct dentry *parent,
+		const struct inode *pinode,
+		const struct dentry *dentry, const struct inode *inode,
+		unsigned int len, const char *str, const struct qstr *name)
 {
-	unsigned al=a->len;
-	unsigned bl=b->len;
-	hpfs_adjust_length(a->name, &al);
+	unsigned al = len;
+	unsigned bl = name->len;
+
+	hpfs_adjust_length(str, &al);
 	/*hpfs_adjust_length(b->name, &bl);*/
-	/* 'a' is the qstr of an already existing dentry, so the name
-	 * must be valid. 'b' must be validated first.
+
+	/*
+	 * 'str' is the nane of an already existing dentry, so the name
+	 * must be valid. 'name' must be validated first.
 	 */
 
-	if (hpfs_chk_name(b->name, &bl))
+	if (hpfs_chk_name(name->name, &bl))
 		return 1;
-	if (hpfs_compare_names(dentry->d_sb, a->name, al, b->name, bl, 0))
+	if (hpfs_compare_names(parent->d_sb, str, al, name->name, bl, 0))
 		return 1;
 	return 0;
 }
diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c
index bfdeb82a53be..7b0fbc61af81 100644
--- a/fs/isofs/inode.c
+++ b/fs/isofs/inode.c
@@ -28,14 +28,26 @@
 
 static int isofs_hashi(struct dentry *parent, struct qstr *qstr);
 static int isofs_hash(struct dentry *parent, struct qstr *qstr);
-static int isofs_dentry_cmpi(struct dentry *dentry, struct qstr *a, struct qstr *b);
-static int isofs_dentry_cmp(struct dentry *dentry, struct qstr *a, struct qstr *b);
+static int isofs_dentry_cmpi(const struct dentry *parent,
+		const struct inode *pinode,
+		const struct dentry *dentry, const struct inode *inode,
+		unsigned int len, const char *str, const struct qstr *name);
+static int isofs_dentry_cmp(const struct dentry *parent,
+		const struct inode *pinode,
+		const struct dentry *dentry, const struct inode *inode,
+		unsigned int len, const char *str, const struct qstr *name);
 
 #ifdef CONFIG_JOLIET
 static int isofs_hashi_ms(struct dentry *parent, struct qstr *qstr);
 static int isofs_hash_ms(struct dentry *parent, struct qstr *qstr);
-static int isofs_dentry_cmpi_ms(struct dentry *dentry, struct qstr *a, struct qstr *b);
-static int isofs_dentry_cmp_ms(struct dentry *dentry, struct qstr *a, struct qstr *b);
+static int isofs_dentry_cmpi_ms(const struct dentry *parent,
+		const struct inode *pinode,
+		const struct dentry *dentry, const struct inode *inode,
+		unsigned int len, const char *str, const struct qstr *name);
+static int isofs_dentry_cmp_ms(const struct dentry *parent,
+		const struct inode *pinode,
+		const struct dentry *dentry, const struct inode *inode,
+		unsigned int len, const char *str, const struct qstr *name);
 #endif
 
 static void isofs_put_super(struct super_block *sb)
@@ -206,49 +218,31 @@ isofs_hashi_common(struct dentry *dentry, struct qstr *qstr, int ms)
 }
 
 /*
- * Case insensitive compare of two isofs names.
+ * Compare of two isofs names.
  */
-static int isofs_dentry_cmpi_common(struct dentry *dentry, struct qstr *a,
-				struct qstr *b, int ms)
+static int isofs_dentry_cmp_common(
+		unsigned int len, const char *str,
+		const struct qstr *name, int ms, int ci)
 {
 	int alen, blen;
 
 	/* A filename cannot end in '.' or we treat it like it has none */
-	alen = a->len;
-	blen = b->len;
+	alen = name->len;
+	blen = len;
 	if (ms) {
-		while (alen && a->name[alen-1] == '.')
+		while (alen && name->name[alen-1] == '.')
 			alen--;
-		while (blen && b->name[blen-1] == '.')
+		while (blen && str[blen-1] == '.')
 			blen--;
 	}
 	if (alen == blen) {
-		if (strnicmp(a->name, b->name, alen) == 0)
-			return 0;
-	}
-	return 1;
-}
-
-/*
- * Case sensitive compare of two isofs names.
- */
-static int isofs_dentry_cmp_common(struct dentry *dentry, struct qstr *a,
-					struct qstr *b, int ms)
-{
-	int alen, blen;
-
-	/* A filename cannot end in '.' or we treat it like it has none */
-	alen = a->len;
-	blen = b->len;
-	if (ms) {
-		while (alen && a->name[alen-1] == '.')
-			alen--;
-		while (blen && b->name[blen-1] == '.')
-			blen--;
-	}
-	if (alen == blen) {
-		if (strncmp(a->name, b->name, alen) == 0)
-			return 0;
+		if (ci) {
+			if (strnicmp(name->name, str, alen) == 0)
+				return 0;
+		} else {
+			if (strncmp(name->name, str, alen) == 0)
+				return 0;
+		}
 	}
 	return 1;
 }
@@ -266,15 +260,19 @@ isofs_hashi(struct dentry *dentry, struct qstr *qstr)
 }
 
 static int
-isofs_dentry_cmp(struct dentry *dentry,struct qstr *a,struct qstr *b)
+isofs_dentry_cmp(const struct dentry *parent, const struct inode *pinode,
+		const struct dentry *dentry, const struct inode *inode,
+		unsigned int len, const char *str, const struct qstr *name)
 {
-	return isofs_dentry_cmp_common(dentry, a, b, 0);
+	return isofs_dentry_cmp_common(len, str, name, 0, 0);
 }
 
 static int
-isofs_dentry_cmpi(struct dentry *dentry,struct qstr *a,struct qstr *b)
+isofs_dentry_cmpi(const struct dentry *parent, const struct inode *pinode,
+		const struct dentry *dentry, const struct inode *inode,
+		unsigned int len, const char *str, const struct qstr *name)
 {
-	return isofs_dentry_cmpi_common(dentry, a, b, 0);
+	return isofs_dentry_cmp_common(len, str, name, 0, 1);
 }
 
 #ifdef CONFIG_JOLIET
@@ -291,15 +289,19 @@ isofs_hashi_ms(struct dentry *dentry, struct qstr *qstr)
 }
 
 static int
-isofs_dentry_cmp_ms(struct dentry *dentry,struct qstr *a,struct qstr *b)
+isofs_dentry_cmp_ms(const struct dentry *parent, const struct inode *pinode,
+		const struct dentry *dentry, const struct inode *inode,
+		unsigned int len, const char *str, const struct qstr *name)
 {
-	return isofs_dentry_cmp_common(dentry, a, b, 1);
+	return isofs_dentry_cmp_common(len, str, name, 1, 0);
 }
 
 static int
-isofs_dentry_cmpi_ms(struct dentry *dentry,struct qstr *a,struct qstr *b)
+isofs_dentry_cmpi_ms(const struct dentry *parent, const struct inode *pinode,
+		const struct dentry *dentry, const struct inode *inode,
+		unsigned int len, const char *str, const struct qstr *name)
 {
-	return isofs_dentry_cmpi_common(dentry, a, b, 1);
+	return isofs_dentry_cmp_common(len, str, name, 1, 1);
 }
 #endif
 
diff --git a/fs/isofs/namei.c b/fs/isofs/namei.c
index 0d23abfd4280..715f7d318046 100644
--- a/fs/isofs/namei.c
+++ b/fs/isofs/namei.c
@@ -37,7 +37,8 @@ isofs_cmp(struct dentry *dentry, const char *compare, int dlen)
 
 	qstr.name = compare;
 	qstr.len = dlen;
-	return dentry->d_op->d_compare(dentry, &dentry->d_name, &qstr);
+	return dentry->d_op->d_compare(NULL, NULL, NULL, NULL,
+			dentry->d_name.len, dentry->d_name.name, &qstr);
 }
 
 /*
diff --git a/fs/jfs/namei.c b/fs/jfs/namei.c
index 2da1546161fa..92129016cd79 100644
--- a/fs/jfs/namei.c
+++ b/fs/jfs/namei.c
@@ -1587,14 +1587,17 @@ static int jfs_ci_hash(struct dentry *dir, struct qstr *this)
 	return 0;
 }
 
-static int jfs_ci_compare(struct dentry *dir, struct qstr *a, struct qstr *b)
+static int jfs_ci_compare(const struct dentry *parent,
+		const struct inode *pinode,
+		const struct dentry *dentry, const struct inode *inode,
+		unsigned int len, const char *str, const struct qstr *name)
 {
 	int i, result = 1;
 
-	if (a->len != b->len)
+	if (len != name->len)
 		goto out;
-	for (i=0; i < a->len; i++) {
-		if (tolower(a->name[i]) != tolower(b->name[i]))
+	for (i=0; i < len; i++) {
+		if (tolower(str[i]) != tolower(name->name[i]))
 			goto out;
 	}
 	result = 0;
diff --git a/fs/ncpfs/dir.c b/fs/ncpfs/dir.c
index e80ea4e37c48..3bcc68aed416 100644
--- a/fs/ncpfs/dir.c
+++ b/fs/ncpfs/dir.c
@@ -75,7 +75,9 @@ const struct inode_operations ncp_dir_inode_operations =
  */
 static int ncp_lookup_validate(struct dentry *, struct nameidata *);
 static int ncp_hash_dentry(struct dentry *, struct qstr *);
-static int ncp_compare_dentry (struct dentry *, struct qstr *, struct qstr *);
+static int ncp_compare_dentry(const struct dentry *, const struct inode *,
+		const struct dentry *, const struct inode *,
+		unsigned int, const char *, const struct qstr *);
 static int ncp_delete_dentry(const struct dentry *);
 
 static const struct dentry_operations ncp_dentry_operations =
@@ -113,10 +115,10 @@ static inline int ncp_preserve_entry_case(struct inode *i, __u32 nscreator)
 
 #define ncp_preserve_case(i)	(ncp_namespace(i) != NW_NS_DOS)
 
-static inline int ncp_case_sensitive(struct dentry *dentry)
+static inline int ncp_case_sensitive(const struct inode *i)
 {
 #ifdef CONFIG_NCPFS_NFS_NS
-	return ncp_namespace(dentry->d_inode) == NW_NS_NFS;
+	return ncp_namespace(i) == NW_NS_NFS;
 #else
 	return 0;
 #endif /* CONFIG_NCPFS_NFS_NS */
@@ -129,12 +131,13 @@ static inline int ncp_case_sensitive(struct dentry *dentry)
 static int 
 ncp_hash_dentry(struct dentry *dentry, struct qstr *this)
 {
-	if (!ncp_case_sensitive(dentry)) {
+	if (!ncp_case_sensitive(dentry->d_inode)) {
+		struct super_block *sb = dentry->d_sb;
 		struct nls_table *t;
 		unsigned long hash;
 		int i;
 
-		t = NCP_IO_TABLE(dentry);
+		t = NCP_IO_TABLE(sb);
 		hash = init_name_hash();
 		for (i=0; i<this->len ; i++)
 			hash = partial_name_hash(ncp_tolower(t, this->name[i]),
@@ -145,15 +148,17 @@ ncp_hash_dentry(struct dentry *dentry, struct qstr *this)
 }
 
 static int
-ncp_compare_dentry(struct dentry *dentry, struct qstr *a, struct qstr *b)
+ncp_compare_dentry(const struct dentry *parent, const struct inode *pinode,
+		const struct dentry *dentry, const struct inode *inode,
+		unsigned int len, const char *str, const struct qstr *name)
 {
-	if (a->len != b->len)
+	if (len != name->len)
 		return 1;
 
-	if (ncp_case_sensitive(dentry))
-		return strncmp(a->name, b->name, a->len);
+	if (ncp_case_sensitive(pinode))
+		return strncmp(str, name->name, len);
 
-	return ncp_strnicmp(NCP_IO_TABLE(dentry), a->name, b->name, a->len);
+	return ncp_strnicmp(NCP_IO_TABLE(pinode->i_sb), str, name->name, len);
 }
 
 /*
diff --git a/fs/ncpfs/ncplib_kernel.h b/fs/ncpfs/ncplib_kernel.h
index 3c57eca634ce..244d1b73fda7 100644
--- a/fs/ncpfs/ncplib_kernel.h
+++ b/fs/ncpfs/ncplib_kernel.h
@@ -135,7 +135,7 @@ int ncp__vol2io(struct ncp_server *, unsigned char *, unsigned int *,
 				const unsigned char *, unsigned int, int);
 
 #define NCP_ESC			':'
-#define NCP_IO_TABLE(dentry)	(NCP_SERVER((dentry)->d_inode)->nls_io)
+#define NCP_IO_TABLE(sb)	(NCP_SBP(sb)->nls_io)
 #define ncp_tolower(t, c)	nls_tolower(t, c)
 #define ncp_toupper(t, c)	nls_toupper(t, c)
 #define ncp_strnicmp(t, s1, s2, len) \
@@ -150,15 +150,15 @@ int ncp__io2vol(unsigned char *, unsigned int *,
 int ncp__vol2io(unsigned char *, unsigned int *,
 				const unsigned char *, unsigned int, int);
 
-#define NCP_IO_TABLE(dentry)	NULL
+#define NCP_IO_TABLE(sb)	NULL
 #define ncp_tolower(t, c)	tolower(c)
 #define ncp_toupper(t, c)	toupper(c)
 #define ncp_io2vol(S,m,i,n,k,U)	ncp__io2vol(m,i,n,k,U)
 #define ncp_vol2io(S,m,i,n,k,U)	ncp__vol2io(m,i,n,k,U)
 
 
-static inline int ncp_strnicmp(struct nls_table *t, const unsigned char *s1,
-		const unsigned char *s2, int len)
+static inline int ncp_strnicmp(const struct nls_table *t,
+		const unsigned char *s1, const unsigned char *s2, int len)
 {
 	while (len--) {
 		if (tolower(*s1++) != tolower(*s2++))
diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c
index a256d770ea18..ae4b0fd9033f 100644
--- a/fs/proc/proc_sysctl.c
+++ b/fs/proc/proc_sysctl.c
@@ -397,15 +397,16 @@ static int proc_sys_delete(const struct dentry *dentry)
 	return !!PROC_I(dentry->d_inode)->sysctl->unregistering;
 }
 
-static int proc_sys_compare(struct dentry *dir, struct qstr *qstr,
-			    struct qstr *name)
+static int proc_sys_compare(const struct dentry *parent,
+		const struct inode *pinode,
+		const struct dentry *dentry, const struct inode *inode,
+		unsigned int len, const char *str, const struct qstr *name)
 {
-	struct dentry *dentry = container_of(qstr, struct dentry, d_name);
-	if (qstr->len != name->len)
+	if (name->len != len)
 		return 1;
-	if (memcmp(qstr->name, name->name, name->len))
+	if (memcmp(name->name, str, len))
 		return 1;
-	return !sysctl_is_seen(PROC_I(dentry->d_inode)->sysctl);
+	return !sysctl_is_seen(PROC_I(inode)->sysctl);
 }
 
 static const struct dentry_operations proc_sys_dentry_operations = {
diff --git a/include/linux/dcache.h b/include/linux/dcache.h
index 6cdf4995c90a..75a072bf2a34 100644
--- a/include/linux/dcache.h
+++ b/include/linux/dcache.h
@@ -134,7 +134,9 @@ enum dentry_d_lock_class
 struct dentry_operations {
 	int (*d_revalidate)(struct dentry *, struct nameidata *);
 	int (*d_hash)(struct dentry *, struct qstr *);
-	int (*d_compare)(struct dentry *, struct qstr *, struct qstr *);
+	int (*d_compare)(const struct dentry *, const struct inode *,
+			const struct dentry *, const struct inode *,
+			unsigned int, const char *, const struct qstr *);
 	int (*d_delete)(const struct dentry *);
 	void (*d_release)(struct dentry *);
 	void (*d_iput)(struct dentry *, struct inode *);
@@ -145,12 +147,8 @@ struct dentry_operations {
  * Locking rules for dentry_operations callbacks are to be found in
  * Documentation/filesystems/Locking. Keep it updated!
  *
- * the dentry parameter passed to d_hash and d_compare is the parent
- * directory of the entries to be compared. It is used in case these
- * functions need any directory specific information for determining
- * equivalency classes.  Using the dentry itself might not work, as it
- * might be a negative dentry which has no information associated with
- * it.
+ * FUrther descriptions are found in Documentation/filesystems/vfs.txt.
+ * Keep it updated too!
  */
 
 /* d_flags entries */
diff --git a/include/linux/ncp_fs.h b/include/linux/ncp_fs.h
index ef663061d5ac..1c27f201c856 100644
--- a/include/linux/ncp_fs.h
+++ b/include/linux/ncp_fs.h
@@ -184,13 +184,13 @@ struct ncp_entry_info {
 	__u8			file_handle[6];
 };
 
-static inline struct ncp_server *NCP_SBP(struct super_block *sb)
+static inline struct ncp_server *NCP_SBP(const struct super_block *sb)
 {
 	return sb->s_fs_info;
 }
 
 #define NCP_SERVER(inode)	NCP_SBP((inode)->i_sb)
-static inline struct ncp_inode_info *NCP_FINFO(struct inode *inode)
+static inline struct ncp_inode_info *NCP_FINFO(const struct inode *inode)
 {
 	return container_of(inode, struct ncp_inode_info, vfs_inode);
 }
-- 
cgit v1.2.3-71-gd317


From b1e6a015a580ad145689ad1d6b4aa0e03e6c868b Mon Sep 17 00:00:00 2001
From: Nick Piggin <npiggin@kernel.dk>
Date: Fri, 7 Jan 2011 17:49:28 +1100
Subject: fs: change d_hash for rcu-walk

Change d_hash so it may be called from lock-free RCU lookups. See similar
patch for d_compare for details.

For in-tree filesystems, this is just a mechanical change.

Signed-off-by: Nick Piggin <npiggin@kernel.dk>
---
 Documentation/filesystems/Locking |  5 +++--
 Documentation/filesystems/porting |  7 +++++++
 Documentation/filesystems/vfs.txt |  8 ++++++--
 drivers/staging/smbfs/cache.c     |  2 +-
 drivers/staging/smbfs/dir.c       |  6 ++++--
 fs/adfs/dir.c                     |  3 ++-
 fs/affs/namei.c                   | 20 ++++++++++++--------
 fs/cifs/dir.c                     |  5 +++--
 fs/cifs/readdir.c                 |  2 +-
 fs/dcache.c                       |  2 +-
 fs/ecryptfs/inode.c               |  4 ++--
 fs/fat/namei_msdos.c              |  3 ++-
 fs/fat/namei_vfat.c               |  8 +++++---
 fs/gfs2/dentry.c                  |  3 ++-
 fs/hfs/hfs_fs.h                   |  3 ++-
 fs/hfs/string.c                   |  3 ++-
 fs/hfsplus/hfsplus_fs.h           |  3 ++-
 fs/hfsplus/unicode.c              |  3 ++-
 fs/hpfs/dentry.c                  |  3 ++-
 fs/isofs/inode.c                  | 28 ++++++++++++++++++----------
 fs/jfs/namei.c                    |  3 ++-
 fs/namei.c                        |  5 +++--
 fs/ncpfs/dir.c                    | 10 ++++++----
 fs/sysv/namei.c                   |  3 ++-
 include/linux/dcache.h            |  3 ++-
 25 files changed, 94 insertions(+), 51 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking
index 9a76f8d8bf95..a15ee207b449 100644
--- a/Documentation/filesystems/Locking
+++ b/Documentation/filesystems/Locking
@@ -10,7 +10,8 @@ be able to use diff(1).
 --------------------------- dentry_operations --------------------------
 prototypes:
 	int (*d_revalidate)(struct dentry *, int);
-	int (*d_hash) (struct dentry *, struct qstr *);
+	int (*d_hash)(const struct dentry *, const struct inode *,
+			struct qstr *);
 	int (*d_compare)(const struct dentry *, const struct inode *,
 			const struct dentry *, const struct inode *,
 			unsigned int, const char *, const struct qstr *);
@@ -22,7 +23,7 @@ prototypes:
 locking rules:
 		dcache_lock	rename_lock	->d_lock	may block
 d_revalidate:	no		no		no		yes
-d_hash		no		no		no		yes
+d_hash		no		no		no		no
 d_compare:	no		yes		no		no 
 d_delete:	yes		no		yes		no
 d_release:	no		no		no		yes
diff --git a/Documentation/filesystems/porting b/Documentation/filesystems/porting
index d44511e20828..9fd31940a8ef 100644
--- a/Documentation/filesystems/porting
+++ b/Documentation/filesystems/porting
@@ -333,3 +333,10 @@ unreferenced dentries, and is now only called when the dentry refcount goes to
 	.d_compare() calling convention and locking rules are significantly
 changed. Read updated documentation in Documentation/filesystems/vfs.txt (and
 look at examples of other filesystems) for guidance.
+
+---
+[mandatory]
+
+	.d_hash() calling convention and locking rules are significantly
+changed. Read updated documentation in Documentation/filesystems/vfs.txt (and
+look at examples of other filesystems) for guidance.
diff --git a/Documentation/filesystems/vfs.txt b/Documentation/filesystems/vfs.txt
index 250681b8c7cc..69b10ff5ec81 100644
--- a/Documentation/filesystems/vfs.txt
+++ b/Documentation/filesystems/vfs.txt
@@ -847,7 +847,8 @@ defined:
 
 struct dentry_operations {
 	int (*d_revalidate)(struct dentry *, struct nameidata *);
-	int (*d_hash)(struct dentry *, struct qstr *);
+	int (*d_hash)(const struct dentry *, const struct inode *,
+			struct qstr *);
 	int (*d_compare)(const struct dentry *, const struct inode *,
 			const struct dentry *, const struct inode *,
 			unsigned int, const char *, const struct qstr *);
@@ -864,7 +865,10 @@ struct dentry_operations {
 
   d_hash: called when the VFS adds a dentry to the hash table. The first
 	dentry passed to d_hash is the parent directory that the name is
-	to be hashed into.
+	to be hashed into. The inode is the dentry's inode.
+
+	Same locking and synchronisation rules as d_compare regarding
+	what is safe to dereference etc.
 
   d_compare: called to compare a dentry name with a given name. The first
 	dentry is the parent of the dentry to be compared, the second is
diff --git a/drivers/staging/smbfs/cache.c b/drivers/staging/smbfs/cache.c
index dbd2e1df3ba9..0beded260b00 100644
--- a/drivers/staging/smbfs/cache.c
+++ b/drivers/staging/smbfs/cache.c
@@ -134,7 +134,7 @@ smb_fill_cache(struct file *filp, void *dirent, filldir_t filldir,
 	qname->hash = full_name_hash(qname->name, qname->len);
 
 	if (dentry->d_op && dentry->d_op->d_hash)
-		if (dentry->d_op->d_hash(dentry, qname) != 0)
+		if (dentry->d_op->d_hash(dentry, inode, qname) != 0)
 			goto end_advance;
 
 	newdent = d_lookup(dentry, qname);
diff --git a/drivers/staging/smbfs/dir.c b/drivers/staging/smbfs/dir.c
index bd63229f43f2..5f79799d5d4a 100644
--- a/drivers/staging/smbfs/dir.c
+++ b/drivers/staging/smbfs/dir.c
@@ -274,7 +274,8 @@ smb_dir_open(struct inode *dir, struct file *file)
  * Dentry operations routines
  */
 static int smb_lookup_validate(struct dentry *, struct nameidata *);
-static int smb_hash_dentry(struct dentry *, struct qstr *);
+static int smb_hash_dentry(const struct dentry *, const struct inode *,
+		struct qstr *);
 static int smb_compare_dentry(const struct dentry *,
 		const struct inode *,
 		const struct dentry *, const struct inode *,
@@ -336,7 +337,8 @@ smb_lookup_validate(struct dentry * dentry, struct nameidata *nd)
 }
 
 static int 
-smb_hash_dentry(struct dentry *dir, struct qstr *this)
+smb_hash_dentry(const struct dentry *dir, const struct inode *inode,
+		struct qstr *this)
 {
 	unsigned long hash;
 	int i;
diff --git a/fs/adfs/dir.c b/fs/adfs/dir.c
index c8ed66162bd4..a11e5e102716 100644
--- a/fs/adfs/dir.c
+++ b/fs/adfs/dir.c
@@ -201,7 +201,8 @@ const struct file_operations adfs_dir_operations = {
 };
 
 static int
-adfs_hash(struct dentry *parent, struct qstr *qstr)
+adfs_hash(const struct dentry *parent, const struct inode *inode,
+		struct qstr *qstr)
 {
 	const unsigned int name_len = ADFS_SB(parent->d_sb)->s_namelen;
 	const unsigned char *name;
diff --git a/fs/affs/namei.c b/fs/affs/namei.c
index 547d5deb0d42..5aca08c21100 100644
--- a/fs/affs/namei.c
+++ b/fs/affs/namei.c
@@ -13,13 +13,15 @@
 typedef int (*toupper_t)(int);
 
 static int	 affs_toupper(int ch);
-static int	 affs_hash_dentry(struct dentry *, struct qstr *);
+static int	 affs_hash_dentry(const struct dentry *,
+		const struct inode *, struct qstr *);
 static int       affs_compare_dentry(const struct dentry *parent,
 		const struct inode *pinode,
 		const struct dentry *dentry, const struct inode *inode,
 		unsigned int len, const char *str, const struct qstr *name);
 static int	 affs_intl_toupper(int ch);
-static int	 affs_intl_hash_dentry(struct dentry *, struct qstr *);
+static int	 affs_intl_hash_dentry(const struct dentry *,
+		const struct inode *, struct qstr *);
 static int       affs_intl_compare_dentry(const struct dentry *parent,
 		const struct inode *pinode,
 		const struct dentry *dentry, const struct inode *inode,
@@ -64,13 +66,13 @@ affs_get_toupper(struct super_block *sb)
  * Note: the dentry argument is the parent dentry.
  */
 static inline int
-__affs_hash_dentry(struct dentry *dentry, struct qstr *qstr, toupper_t toupper)
+__affs_hash_dentry(struct qstr *qstr, toupper_t toupper)
 {
 	const u8 *name = qstr->name;
 	unsigned long hash;
 	int i;
 
-	i = affs_check_name(qstr->name,qstr->len);
+	i = affs_check_name(qstr->name, qstr->len);
 	if (i)
 		return i;
 
@@ -84,14 +86,16 @@ __affs_hash_dentry(struct dentry *dentry, struct qstr *qstr, toupper_t toupper)
 }
 
 static int
-affs_hash_dentry(struct dentry *dentry, struct qstr *qstr)
+affs_hash_dentry(const struct dentry *dentry, const struct inode *inode,
+		struct qstr *qstr)
 {
-	return __affs_hash_dentry(dentry, qstr, affs_toupper);
+	return __affs_hash_dentry(qstr, affs_toupper);
 }
 static int
-affs_intl_hash_dentry(struct dentry *dentry, struct qstr *qstr)
+affs_intl_hash_dentry(const struct dentry *dentry, const struct inode *inode,
+		struct qstr *qstr)
 {
-	return __affs_hash_dentry(dentry, qstr, affs_intl_toupper);
+	return __affs_hash_dentry(qstr, affs_intl_toupper);
 }
 
 static inline int __affs_compare_dentry(unsigned int len,
diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c
index c60133f0d8e4..88bfe686ac00 100644
--- a/fs/cifs/dir.c
+++ b/fs/cifs/dir.c
@@ -700,9 +700,10 @@ const struct dentry_operations cifs_dentry_ops = {
 /* d_delete:       cifs_d_delete,      */ /* not needed except for debugging */
 };
 
-static int cifs_ci_hash(struct dentry *dentry, struct qstr *q)
+static int cifs_ci_hash(const struct dentry *dentry, const struct inode *inode,
+		struct qstr *q)
 {
-	struct nls_table *codepage = CIFS_SB(dentry->d_inode->i_sb)->local_nls;
+	struct nls_table *codepage = CIFS_SB(dentry->d_sb)->local_nls;
 	unsigned long hash;
 	int i;
 
diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c
index a73eb9f4bdaf..ee463aeca0b0 100644
--- a/fs/cifs/readdir.c
+++ b/fs/cifs/readdir.c
@@ -79,7 +79,7 @@ cifs_readdir_lookup(struct dentry *parent, struct qstr *name,
 	cFYI(1, "For %s", name->name);
 
 	if (parent->d_op && parent->d_op->d_hash)
-		parent->d_op->d_hash(parent, name);
+		parent->d_op->d_hash(parent, parent->d_inode, name);
 	else
 		name->hash = full_name_hash(name->name, name->len);
 
diff --git a/fs/dcache.c b/fs/dcache.c
index 7075555fbb04..6f59f375ed9d 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -1478,7 +1478,7 @@ struct dentry *d_hash_and_lookup(struct dentry *dir, struct qstr *name)
 	 */
 	name->hash = full_name_hash(name->name, name->len);
 	if (dir->d_op && dir->d_op->d_hash) {
-		if (dir->d_op->d_hash(dir, name) < 0)
+		if (dir->d_op->d_hash(dir, dir->d_inode, name) < 0)
 			goto out;
 	}
 	dentry = d_lookup(dir, name);
diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c
index 9d1a22d62765..a1ed7a7cb173 100644
--- a/fs/ecryptfs/inode.c
+++ b/fs/ecryptfs/inode.c
@@ -454,7 +454,7 @@ static struct dentry *ecryptfs_lookup(struct inode *ecryptfs_dir_inode,
 	lower_name.hash = ecryptfs_dentry->d_name.hash;
 	if (lower_dir_dentry->d_op && lower_dir_dentry->d_op->d_hash) {
 		rc = lower_dir_dentry->d_op->d_hash(lower_dir_dentry,
-						    &lower_name);
+				lower_dir_dentry->d_inode, &lower_name);
 		if (rc < 0)
 			goto out_d_drop;
 	}
@@ -489,7 +489,7 @@ static struct dentry *ecryptfs_lookup(struct inode *ecryptfs_dir_inode,
 	lower_name.hash = full_name_hash(lower_name.name, lower_name.len);
 	if (lower_dir_dentry->d_op && lower_dir_dentry->d_op->d_hash) {
 		rc = lower_dir_dentry->d_op->d_hash(lower_dir_dentry,
-						    &lower_name);
+				lower_dir_dentry->d_inode, &lower_name);
 		if (rc < 0)
 			goto out_d_drop;
 	}
diff --git a/fs/fat/namei_msdos.c b/fs/fat/namei_msdos.c
index 99d3c7ac973c..3b3e072d8982 100644
--- a/fs/fat/namei_msdos.c
+++ b/fs/fat/namei_msdos.c
@@ -148,7 +148,8 @@ static int msdos_find(struct inode *dir, const unsigned char *name, int len,
  * that the existing dentry can be used. The msdos fs routines will
  * return ENOENT or EINVAL as appropriate.
  */
-static int msdos_hash(struct dentry *dentry, struct qstr *qstr)
+static int msdos_hash(const struct dentry *dentry, const struct inode *inode,
+	       struct qstr *qstr)
 {
 	struct fat_mount_options *options = &MSDOS_SB(dentry->d_sb)->options;
 	unsigned char msdos_name[MSDOS_NAME];
diff --git a/fs/fat/namei_vfat.c b/fs/fat/namei_vfat.c
index 95e00ab84c3f..4fc06278db48 100644
--- a/fs/fat/namei_vfat.c
+++ b/fs/fat/namei_vfat.c
@@ -103,7 +103,8 @@ static unsigned int vfat_striptail_len(const struct qstr *qstr)
  * that the existing dentry can be used. The vfat fs routines will
  * return ENOENT or EINVAL as appropriate.
  */
-static int vfat_hash(struct dentry *dentry, struct qstr *qstr)
+static int vfat_hash(const struct dentry *dentry, const struct inode *inode,
+		struct qstr *qstr)
 {
 	qstr->hash = full_name_hash(qstr->name, vfat_striptail_len(qstr));
 	return 0;
@@ -115,9 +116,10 @@ static int vfat_hash(struct dentry *dentry, struct qstr *qstr)
  * that the existing dentry can be used. The vfat fs routines will
  * return ENOENT or EINVAL as appropriate.
  */
-static int vfat_hashi(struct dentry *dentry, struct qstr *qstr)
+static int vfat_hashi(const struct dentry *dentry, const struct inode *inode,
+		struct qstr *qstr)
 {
-	struct nls_table *t = MSDOS_SB(dentry->d_inode->i_sb)->nls_io;
+	struct nls_table *t = MSDOS_SB(dentry->d_sb)->nls_io;
 	const unsigned char *name;
 	unsigned int len;
 	unsigned long hash;
diff --git a/fs/gfs2/dentry.c b/fs/gfs2/dentry.c
index e80fea2f65ff..50497f65763b 100644
--- a/fs/gfs2/dentry.c
+++ b/fs/gfs2/dentry.c
@@ -100,7 +100,8 @@ fail:
 	return 0;
 }
 
-static int gfs2_dhash(struct dentry *dentry, struct qstr *str)
+static int gfs2_dhash(const struct dentry *dentry, const struct inode *inode,
+		struct qstr *str)
 {
 	str->hash = gfs2_disk_hash(str->name, str->len);
 	return 0;
diff --git a/fs/hfs/hfs_fs.h b/fs/hfs/hfs_fs.h
index 8cd876f0e961..ad97c2d58287 100644
--- a/fs/hfs/hfs_fs.h
+++ b/fs/hfs/hfs_fs.h
@@ -213,7 +213,8 @@ extern int hfs_part_find(struct super_block *, sector_t *, sector_t *);
 /* string.c */
 extern const struct dentry_operations hfs_dentry_operations;
 
-extern int hfs_hash_dentry(struct dentry *, struct qstr *);
+extern int hfs_hash_dentry(const struct dentry *, const struct inode *,
+		struct qstr *);
 extern int hfs_strcmp(const unsigned char *, unsigned int,
 		      const unsigned char *, unsigned int);
 extern int hfs_compare_dentry(const struct dentry *parent,
diff --git a/fs/hfs/string.c b/fs/hfs/string.c
index aaf90d0d6940..495a976a3cc9 100644
--- a/fs/hfs/string.c
+++ b/fs/hfs/string.c
@@ -51,7 +51,8 @@ static unsigned char caseorder[256] = {
 /*
  * Hash a string to an integer in a case-independent way
  */
-int hfs_hash_dentry(struct dentry *dentry, struct qstr *this)
+int hfs_hash_dentry(const struct dentry *dentry, const struct inode *inode,
+		struct qstr *this)
 {
 	const unsigned char *name = this->name;
 	unsigned int hash, len = this->len;
diff --git a/fs/hfsplus/hfsplus_fs.h b/fs/hfsplus/hfsplus_fs.h
index 7aa96eefe483..a5308f491e3e 100644
--- a/fs/hfsplus/hfsplus_fs.h
+++ b/fs/hfsplus/hfsplus_fs.h
@@ -379,7 +379,8 @@ int hfsplus_strcasecmp(const struct hfsplus_unistr *, const struct hfsplus_unist
 int hfsplus_strcmp(const struct hfsplus_unistr *, const struct hfsplus_unistr *);
 int hfsplus_uni2asc(struct super_block *, const struct hfsplus_unistr *, char *, int *);
 int hfsplus_asc2uni(struct super_block *, struct hfsplus_unistr *, const char *, int);
-int hfsplus_hash_dentry(struct dentry *dentry, struct qstr *str);
+int hfsplus_hash_dentry(const struct dentry *dentry, const struct inode *inode,
+		struct qstr *str);
 int hfsplus_compare_dentry(const struct dentry *parent,
 		const struct inode *pinode,
 		const struct dentry *dentry, const struct inode *inode,
diff --git a/fs/hfsplus/unicode.c b/fs/hfsplus/unicode.c
index b178c997efa8..d800aa0f2c80 100644
--- a/fs/hfsplus/unicode.c
+++ b/fs/hfsplus/unicode.c
@@ -320,7 +320,8 @@ int hfsplus_asc2uni(struct super_block *sb, struct hfsplus_unistr *ustr,
  * Composed unicode characters are decomposed and case-folding is performed
  * if the appropriate bits are (un)set on the superblock.
  */
-int hfsplus_hash_dentry(struct dentry *dentry, struct qstr *str)
+int hfsplus_hash_dentry(const struct dentry *dentry, const struct inode *inode,
+		struct qstr *str)
 {
 	struct super_block *sb = dentry->d_sb;
 	const char *astr;
diff --git a/fs/hpfs/dentry.c b/fs/hpfs/dentry.c
index dd9b1e74a734..35526df1fd32 100644
--- a/fs/hpfs/dentry.c
+++ b/fs/hpfs/dentry.c
@@ -12,7 +12,8 @@
  * Note: the dentry argument is the parent dentry.
  */
 
-static int hpfs_hash_dentry(struct dentry *dentry, struct qstr *qstr)
+static int hpfs_hash_dentry(const struct dentry *dentry, const struct inode *inode,
+		struct qstr *qstr)
 {
 	unsigned long	 hash;
 	int		 i;
diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c
index 7b0fbc61af81..d204ee4235fd 100644
--- a/fs/isofs/inode.c
+++ b/fs/isofs/inode.c
@@ -26,8 +26,10 @@
 
 #define BEQUIET
 
-static int isofs_hashi(struct dentry *parent, struct qstr *qstr);
-static int isofs_hash(struct dentry *parent, struct qstr *qstr);
+static int isofs_hashi(const struct dentry *parent, const struct inode *inode,
+		struct qstr *qstr);
+static int isofs_hash(const struct dentry *parent, const struct inode *inode,
+		struct qstr *qstr);
 static int isofs_dentry_cmpi(const struct dentry *parent,
 		const struct inode *pinode,
 		const struct dentry *dentry, const struct inode *inode,
@@ -38,8 +40,10 @@ static int isofs_dentry_cmp(const struct dentry *parent,
 		unsigned int len, const char *str, const struct qstr *name);
 
 #ifdef CONFIG_JOLIET
-static int isofs_hashi_ms(struct dentry *parent, struct qstr *qstr);
-static int isofs_hash_ms(struct dentry *parent, struct qstr *qstr);
+static int isofs_hashi_ms(const struct dentry *parent, const struct inode *inode,
+		struct qstr *qstr);
+static int isofs_hash_ms(const struct dentry *parent, const struct inode *inode,
+		struct qstr *qstr);
 static int isofs_dentry_cmpi_ms(const struct dentry *parent,
 		const struct inode *pinode,
 		const struct dentry *dentry, const struct inode *inode,
@@ -172,7 +176,7 @@ struct iso9660_options{
  * Compute the hash for the isofs name corresponding to the dentry.
  */
 static int
-isofs_hash_common(struct dentry *dentry, struct qstr *qstr, int ms)
+isofs_hash_common(const struct dentry *dentry, struct qstr *qstr, int ms)
 {
 	const char *name;
 	int len;
@@ -193,7 +197,7 @@ isofs_hash_common(struct dentry *dentry, struct qstr *qstr, int ms)
  * Compute the hash for the isofs name corresponding to the dentry.
  */
 static int
-isofs_hashi_common(struct dentry *dentry, struct qstr *qstr, int ms)
+isofs_hashi_common(const struct dentry *dentry, struct qstr *qstr, int ms)
 {
 	const char *name;
 	int len;
@@ -248,13 +252,15 @@ static int isofs_dentry_cmp_common(
 }
 
 static int
-isofs_hash(struct dentry *dentry, struct qstr *qstr)
+isofs_hash(const struct dentry *dentry, const struct inode *inode,
+		struct qstr *qstr)
 {
 	return isofs_hash_common(dentry, qstr, 0);
 }
 
 static int
-isofs_hashi(struct dentry *dentry, struct qstr *qstr)
+isofs_hashi(const struct dentry *dentry, const struct inode *inode,
+		struct qstr *qstr)
 {
 	return isofs_hashi_common(dentry, qstr, 0);
 }
@@ -277,13 +283,15 @@ isofs_dentry_cmpi(const struct dentry *parent, const struct inode *pinode,
 
 #ifdef CONFIG_JOLIET
 static int
-isofs_hash_ms(struct dentry *dentry, struct qstr *qstr)
+isofs_hash_ms(const struct dentry *dentry, const struct inode *inode,
+		struct qstr *qstr)
 {
 	return isofs_hash_common(dentry, qstr, 1);
 }
 
 static int
-isofs_hashi_ms(struct dentry *dentry, struct qstr *qstr)
+isofs_hashi_ms(const struct dentry *dentry, const struct inode *inode,
+		struct qstr *qstr)
 {
 	return isofs_hashi_common(dentry, qstr, 1);
 }
diff --git a/fs/jfs/namei.c b/fs/jfs/namei.c
index 92129016cd79..57f90dad8919 100644
--- a/fs/jfs/namei.c
+++ b/fs/jfs/namei.c
@@ -1574,7 +1574,8 @@ const struct file_operations jfs_dir_operations = {
 	.llseek		= generic_file_llseek,
 };
 
-static int jfs_ci_hash(struct dentry *dir, struct qstr *this)
+static int jfs_ci_hash(const struct dentry *dir, const struct inode *inode,
+		struct qstr *this)
 {
 	unsigned long hash;
 	int i;
diff --git a/fs/namei.c b/fs/namei.c
index 4ff7ca530533..f3b5ca404659 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -731,7 +731,8 @@ static int do_lookup(struct nameidata *nd, struct qstr *name,
 	 * to use its own hash..
 	 */
 	if (nd->path.dentry->d_op && nd->path.dentry->d_op->d_hash) {
-		int err = nd->path.dentry->d_op->d_hash(nd->path.dentry, name);
+		int err = nd->path.dentry->d_op->d_hash(nd->path.dentry,
+				nd->path.dentry->d_inode, name);
 		if (err < 0)
 			return err;
 	}
@@ -1134,7 +1135,7 @@ static struct dentry *__lookup_hash(struct qstr *name,
 	 * to use its own hash..
 	 */
 	if (base->d_op && base->d_op->d_hash) {
-		err = base->d_op->d_hash(base, name);
+		err = base->d_op->d_hash(base, inode, name);
 		dentry = ERR_PTR(err);
 		if (err < 0)
 			goto out;
diff --git a/fs/ncpfs/dir.c b/fs/ncpfs/dir.c
index 3bcc68aed416..bbbf7922f422 100644
--- a/fs/ncpfs/dir.c
+++ b/fs/ncpfs/dir.c
@@ -74,7 +74,8 @@ const struct inode_operations ncp_dir_inode_operations =
  * Dentry operations routines
  */
 static int ncp_lookup_validate(struct dentry *, struct nameidata *);
-static int ncp_hash_dentry(struct dentry *, struct qstr *);
+static int ncp_hash_dentry(const struct dentry *, const struct inode *,
+		struct qstr *);
 static int ncp_compare_dentry(const struct dentry *, const struct inode *,
 		const struct dentry *, const struct inode *,
 		unsigned int, const char *, const struct qstr *);
@@ -129,9 +130,10 @@ static inline int ncp_case_sensitive(const struct inode *i)
  * is case-sensitive.
  */
 static int 
-ncp_hash_dentry(struct dentry *dentry, struct qstr *this)
+ncp_hash_dentry(const struct dentry *dentry, const struct inode *inode,
+		struct qstr *this)
 {
-	if (!ncp_case_sensitive(dentry->d_inode)) {
+	if (!ncp_case_sensitive(inode)) {
 		struct super_block *sb = dentry->d_sb;
 		struct nls_table *t;
 		unsigned long hash;
@@ -597,7 +599,7 @@ ncp_fill_cache(struct file *filp, void *dirent, filldir_t filldir,
 	qname.hash = full_name_hash(qname.name, qname.len);
 
 	if (dentry->d_op && dentry->d_op->d_hash)
-		if (dentry->d_op->d_hash(dentry, &qname) != 0)
+		if (dentry->d_op->d_hash(dentry, dentry->d_inode, &qname) != 0)
 			goto end_advance;
 
 	newdent = d_lookup(dentry, &qname);
diff --git a/fs/sysv/namei.c b/fs/sysv/namei.c
index 11e7f7d11cd0..7507aeb4c90e 100644
--- a/fs/sysv/namei.c
+++ b/fs/sysv/namei.c
@@ -27,7 +27,8 @@ static int add_nondir(struct dentry *dentry, struct inode *inode)
 	return err;
 }
 
-static int sysv_hash(struct dentry *dentry, struct qstr *qstr)
+static int sysv_hash(const struct dentry *dentry, const struct inode *inode,
+		struct qstr *qstr)
 {
 	/* Truncate the name in place, avoids having to define a compare
 	   function. */
diff --git a/include/linux/dcache.h b/include/linux/dcache.h
index 75a072bf2a34..1149e706f04d 100644
--- a/include/linux/dcache.h
+++ b/include/linux/dcache.h
@@ -133,7 +133,8 @@ enum dentry_d_lock_class
 
 struct dentry_operations {
 	int (*d_revalidate)(struct dentry *, struct nameidata *);
-	int (*d_hash)(struct dentry *, struct qstr *);
+	int (*d_hash)(const struct dentry *, const struct inode *,
+			struct qstr *);
 	int (*d_compare)(const struct dentry *, const struct inode *,
 			const struct dentry *, const struct inode *,
 			unsigned int, const char *, const struct qstr *);
-- 
cgit v1.2.3-71-gd317


From ec2447c278ee973d35f38e53ca16ba7f965ae33d Mon Sep 17 00:00:00 2001
From: Nick Piggin <npiggin@kernel.dk>
Date: Fri, 7 Jan 2011 17:49:29 +1100
Subject: hostfs: simplify locking

Remove dcache_lock locking from hostfs filesystem, and move it into dcache
helpers. All that is required is a coherent path name. Protection from
concurrent modification of the namespace after path name generation is not
provided in current code, because dcache_lock is dropped before the path is
used.

Signed-off-by: Nick Piggin <npiggin@kernel.dk>
---
 fs/dcache.c             | 15 +++++++++++++--
 fs/hostfs/hostfs_kern.c | 24 ++++++++++--------------
 include/linux/dcache.h  |  2 +-
 3 files changed, 24 insertions(+), 17 deletions(-)

(limited to 'include/linux')

diff --git a/fs/dcache.c b/fs/dcache.c
index 6f59f375ed9d..61beb40dd6bf 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -2171,7 +2171,7 @@ char *dynamic_dname(struct dentry *dentry, char *buffer, int buflen,
 /*
  * Write full pathname from the root of the filesystem into the buffer.
  */
-char *__dentry_path(struct dentry *dentry, char *buf, int buflen)
+static char *__dentry_path(struct dentry *dentry, char *buf, int buflen)
 {
 	char *end = buf + buflen;
 	char *retval;
@@ -2198,7 +2198,18 @@ char *__dentry_path(struct dentry *dentry, char *buf, int buflen)
 Elong:
 	return ERR_PTR(-ENAMETOOLONG);
 }
-EXPORT_SYMBOL(__dentry_path);
+
+char *dentry_path_raw(struct dentry *dentry, char *buf, int buflen)
+{
+	char *retval;
+
+	spin_lock(&dcache_lock);
+	retval = __dentry_path(dentry, buf, buflen);
+	spin_unlock(&dcache_lock);
+
+	return retval;
+}
+EXPORT_SYMBOL(dentry_path_raw);
 
 char *dentry_path(struct dentry *dentry, char *buf, int buflen)
 {
diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c
index cfe8bc7de511..39dc505ed273 100644
--- a/fs/hostfs/hostfs_kern.c
+++ b/fs/hostfs/hostfs_kern.c
@@ -92,12 +92,10 @@ __uml_setup("hostfs=", hostfs_args,
 
 static char *__dentry_name(struct dentry *dentry, char *name)
 {
-	char *p = __dentry_path(dentry, name, PATH_MAX);
+	char *p = dentry_path_raw(dentry, name, PATH_MAX);
 	char *root;
 	size_t len;
 
-	spin_unlock(&dcache_lock);
-
 	root = dentry->d_sb->s_fs_info;
 	len = strlen(root);
 	if (IS_ERR(p)) {
@@ -123,25 +121,23 @@ static char *dentry_name(struct dentry *dentry)
 	if (!name)
 		return NULL;
 
-	spin_lock(&dcache_lock);
 	return __dentry_name(dentry, name); /* will unlock */
 }
 
 static char *inode_name(struct inode *ino)
 {
 	struct dentry *dentry;
-	char *name = __getname();
-	if (!name)
-		return NULL;
+	char *name;
 
-	spin_lock(&dcache_lock);
-	if (list_empty(&ino->i_dentry)) {
-		spin_unlock(&dcache_lock);
-		__putname(name);
+	dentry = d_find_alias(ino);
+	if (!dentry)
 		return NULL;
-	}
-	dentry = list_first_entry(&ino->i_dentry, struct dentry, d_alias);
-	return __dentry_name(dentry, name); /* will unlock */
+
+	name = dentry_name(dentry);
+
+	dput(dentry);
+
+	return name;
 }
 
 static char *follow_link(char *link)
diff --git a/include/linux/dcache.h b/include/linux/dcache.h
index 1149e706f04d..cea27dfca532 100644
--- a/include/linux/dcache.h
+++ b/include/linux/dcache.h
@@ -311,7 +311,7 @@ extern char *dynamic_dname(struct dentry *, char *, int, const char *, ...);
 extern char *__d_path(const struct path *path, struct path *root, char *, int);
 extern char *d_path(const struct path *, char *, int);
 extern char *d_path_with_unreachable(const struct path *, char *, int);
-extern char *__dentry_path(struct dentry *, char *, int);
+extern char *dentry_path_raw(struct dentry *, char *, int);
 extern char *dentry_path(struct dentry *, char *, int);
 
 /* Allocation counts.. */
-- 
cgit v1.2.3-71-gd317


From 789680d1ee9311cdf095241dc02bd9784d799cd1 Mon Sep 17 00:00:00 2001
From: Nick Piggin <npiggin@kernel.dk>
Date: Fri, 7 Jan 2011 17:49:30 +1100
Subject: fs: dcache scale hash

Add a new lock, dcache_hash_lock, to protect the dcache hash table from
concurrent modification. d_hash is also protected by d_lock.

Signed-off-by: Nick Piggin <npiggin@kernel.dk>
---
 fs/dcache.c            | 73 ++++++++++++++++++++++++++++++++++++++++++--------
 include/linux/dcache.h | 35 ++----------------------
 2 files changed, 64 insertions(+), 44 deletions(-)

(limited to 'include/linux')

diff --git a/fs/dcache.c b/fs/dcache.c
index 61beb40dd6bf..1e124d4673c6 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -35,10 +35,24 @@
 #include <linux/hardirq.h>
 #include "internal.h"
 
+/*
+ * Usage:
+ * dcache_hash_lock protects dcache hash table, s_anon lists
+ *
+ * Ordering:
+ * dcache_lock
+ *   dentry->d_lock
+ *     dcache_hash_lock
+ *
+ * if (dentry1 < dentry2)
+ *   dentry1->d_lock
+ *     dentry2->d_lock
+ */
 int sysctl_vfs_cache_pressure __read_mostly = 100;
 EXPORT_SYMBOL_GPL(sysctl_vfs_cache_pressure);
 
- __cacheline_aligned_in_smp DEFINE_SPINLOCK(dcache_lock);
+static __cacheline_aligned_in_smp DEFINE_SPINLOCK(dcache_hash_lock);
+__cacheline_aligned_in_smp DEFINE_SPINLOCK(dcache_lock);
 __cacheline_aligned_in_smp DEFINE_SEQLOCK(rename_lock);
 
 EXPORT_SYMBOL(dcache_lock);
@@ -196,6 +210,42 @@ static struct dentry *d_kill(struct dentry *dentry)
 	return parent;
 }
 
+/**
+ * d_drop - drop a dentry
+ * @dentry: dentry to drop
+ *
+ * d_drop() unhashes the entry from the parent dentry hashes, so that it won't
+ * be found through a VFS lookup any more. Note that this is different from
+ * deleting the dentry - d_delete will try to mark the dentry negative if
+ * possible, giving a successful _negative_ lookup, while d_drop will
+ * just make the cache lookup fail.
+ *
+ * d_drop() is used mainly for stuff that wants to invalidate a dentry for some
+ * reason (NFS timeouts or autofs deletes).
+ *
+ * __d_drop requires dentry->d_lock.
+ */
+void __d_drop(struct dentry *dentry)
+{
+	if (!(dentry->d_flags & DCACHE_UNHASHED)) {
+		dentry->d_flags |= DCACHE_UNHASHED;
+		spin_lock(&dcache_hash_lock);
+		hlist_del_rcu(&dentry->d_hash);
+		spin_unlock(&dcache_hash_lock);
+	}
+}
+EXPORT_SYMBOL(__d_drop);
+
+void d_drop(struct dentry *dentry)
+{
+	spin_lock(&dcache_lock);
+	spin_lock(&dentry->d_lock);
+	__d_drop(dentry);
+	spin_unlock(&dentry->d_lock);
+	spin_unlock(&dcache_lock);
+}
+EXPORT_SYMBOL(d_drop);
+
 /* 
  * This is dput
  *
@@ -1199,7 +1249,9 @@ struct dentry *d_obtain_alias(struct inode *inode)
 	tmp->d_flags |= DCACHE_DISCONNECTED;
 	tmp->d_flags &= ~DCACHE_UNHASHED;
 	list_add(&tmp->d_alias, &inode->i_dentry);
+	spin_lock(&dcache_hash_lock);
 	hlist_add_head(&tmp->d_hash, &inode->i_sb->s_anon);
+	spin_unlock(&dcache_hash_lock);
 	spin_unlock(&tmp->d_lock);
 
 	spin_unlock(&dcache_lock);
@@ -1585,7 +1637,9 @@ void d_rehash(struct dentry * entry)
 {
 	spin_lock(&dcache_lock);
 	spin_lock(&entry->d_lock);
+	spin_lock(&dcache_hash_lock);
 	_d_rehash(entry);
+	spin_unlock(&dcache_hash_lock);
 	spin_unlock(&entry->d_lock);
 	spin_unlock(&dcache_lock);
 }
@@ -1692,8 +1746,6 @@ static void switch_names(struct dentry *dentry, struct dentry *target)
  */
 static void d_move_locked(struct dentry * dentry, struct dentry * target)
 {
-	struct hlist_head *list;
-
 	if (!dentry->d_inode)
 		printk(KERN_WARNING "VFS: moving negative dcache entry\n");
 
@@ -1710,14 +1762,11 @@ static void d_move_locked(struct dentry * dentry, struct dentry * target)
 	}
 
 	/* Move the dentry to the target hash queue, if on different bucket */
-	if (d_unhashed(dentry))
-		goto already_unhashed;
-
-	hlist_del_rcu(&dentry->d_hash);
-
-already_unhashed:
-	list = d_hash(target->d_parent, target->d_name.hash);
-	__d_rehash(dentry, list);
+	spin_lock(&dcache_hash_lock);
+	if (!d_unhashed(dentry))
+		hlist_del_rcu(&dentry->d_hash);
+	__d_rehash(dentry, d_hash(target->d_parent, target->d_name.hash));
+	spin_unlock(&dcache_hash_lock);
 
 	/* Unhash the target: dput() will then get rid of it */
 	__d_drop(target);
@@ -1914,7 +1963,9 @@ struct dentry *d_materialise_unique(struct dentry *dentry, struct inode *inode)
 found_lock:
 	spin_lock(&actual->d_lock);
 found:
+	spin_lock(&dcache_hash_lock);
 	_d_rehash(actual);
+	spin_unlock(&dcache_hash_lock);
 	spin_unlock(&actual->d_lock);
 	spin_unlock(&dcache_lock);
 out_nolock:
diff --git a/include/linux/dcache.h b/include/linux/dcache.h
index cea27dfca532..2feb624b67f1 100644
--- a/include/linux/dcache.h
+++ b/include/linux/dcache.h
@@ -184,39 +184,6 @@ struct dentry_operations {
 extern spinlock_t dcache_lock;
 extern seqlock_t rename_lock;
 
-/**
- * d_drop - drop a dentry
- * @dentry: dentry to drop
- *
- * d_drop() unhashes the entry from the parent dentry hashes, so that it won't
- * be found through a VFS lookup any more. Note that this is different from
- * deleting the dentry - d_delete will try to mark the dentry negative if
- * possible, giving a successful _negative_ lookup, while d_drop will
- * just make the cache lookup fail.
- *
- * d_drop() is used mainly for stuff that wants to invalidate a dentry for some
- * reason (NFS timeouts or autofs deletes).
- *
- * __d_drop requires dentry->d_lock.
- */
-
-static inline void __d_drop(struct dentry *dentry)
-{
-	if (!(dentry->d_flags & DCACHE_UNHASHED)) {
-		dentry->d_flags |= DCACHE_UNHASHED;
-		hlist_del_rcu(&dentry->d_hash);
-	}
-}
-
-static inline void d_drop(struct dentry *dentry)
-{
-	spin_lock(&dcache_lock);
-	spin_lock(&dentry->d_lock);
- 	__d_drop(dentry);
-	spin_unlock(&dentry->d_lock);
-	spin_unlock(&dcache_lock);
-}
-
 static inline int dname_external(struct dentry *dentry)
 {
 	return dentry->d_name.name != dentry->d_iname;
@@ -228,6 +195,8 @@ static inline int dname_external(struct dentry *dentry)
 extern void d_instantiate(struct dentry *, struct inode *);
 extern struct dentry * d_instantiate_unique(struct dentry *, struct inode *);
 extern struct dentry * d_materialise_unique(struct dentry *, struct inode *);
+extern void __d_drop(struct dentry *dentry);
+extern void d_drop(struct dentry *dentry);
 extern void d_delete(struct dentry *);
 
 /* allocate/de-allocate */
-- 
cgit v1.2.3-71-gd317


From b7ab39f631f505edc2bbdb86620d5493f995c9da Mon Sep 17 00:00:00 2001
From: Nick Piggin <npiggin@kernel.dk>
Date: Fri, 7 Jan 2011 17:49:32 +1100
Subject: fs: dcache scale dentry refcount

Make d_count non-atomic and protect it with d_lock. This allows us to ensure a
0 refcount dentry remains 0 without dcache_lock. It is also fairly natural when
we start protecting many other dentry members with d_lock.

Signed-off-by: Nick Piggin <npiggin@kernel.dk>
---
 arch/powerpc/platforms/cell/spufs/inode.c |   2 +-
 drivers/infiniband/hw/ipath/ipath_fs.c    |   2 +-
 drivers/infiniband/hw/qib/qib_fs.c        |   2 +-
 fs/autofs4/expire.c                       |   8 +--
 fs/autofs4/root.c                         |   6 +-
 fs/ceph/dir.c                             |   4 +-
 fs/ceph/inode.c                           |   4 +-
 fs/ceph/mds_client.c                      |   2 +-
 fs/coda/dir.c                             |   2 +-
 fs/configfs/dir.c                         |   3 +-
 fs/configfs/inode.c                       |   2 +-
 fs/dcache.c                               | 106 +++++++++++++++++++++++-------
 fs/ecryptfs/inode.c                       |   2 +-
 fs/locks.c                                |   2 +-
 fs/namei.c                                |   2 +-
 fs/nfs/dir.c                              |   6 +-
 fs/nfs/unlink.c                           |   2 +-
 fs/nfsd/vfs.c                             |   5 +-
 fs/nilfs2/super.c                         |   2 +-
 include/linux/dcache.h                    |  29 ++++----
 kernel/cgroup.c                           |   2 -
 21 files changed, 126 insertions(+), 69 deletions(-)

(limited to 'include/linux')

diff --git a/arch/powerpc/platforms/cell/spufs/inode.c b/arch/powerpc/platforms/cell/spufs/inode.c
index 3532b92de983..29a406a77547 100644
--- a/arch/powerpc/platforms/cell/spufs/inode.c
+++ b/arch/powerpc/platforms/cell/spufs/inode.c
@@ -162,7 +162,7 @@ static void spufs_prune_dir(struct dentry *dir)
 		spin_lock(&dcache_lock);
 		spin_lock(&dentry->d_lock);
 		if (!(d_unhashed(dentry)) && dentry->d_inode) {
-			dget_locked(dentry);
+			dget_locked_dlock(dentry);
 			__d_drop(dentry);
 			spin_unlock(&dentry->d_lock);
 			simple_unlink(dir->d_inode, dentry);
diff --git a/drivers/infiniband/hw/ipath/ipath_fs.c b/drivers/infiniband/hw/ipath/ipath_fs.c
index 8c8afc716b98..18aee04a8411 100644
--- a/drivers/infiniband/hw/ipath/ipath_fs.c
+++ b/drivers/infiniband/hw/ipath/ipath_fs.c
@@ -280,7 +280,7 @@ static int remove_file(struct dentry *parent, char *name)
 	spin_lock(&dcache_lock);
 	spin_lock(&tmp->d_lock);
 	if (!(d_unhashed(tmp) && tmp->d_inode)) {
-		dget_locked(tmp);
+		dget_locked_dlock(tmp);
 		__d_drop(tmp);
 		spin_unlock(&tmp->d_lock);
 		spin_unlock(&dcache_lock);
diff --git a/drivers/infiniband/hw/qib/qib_fs.c b/drivers/infiniband/hw/qib/qib_fs.c
index f99bddc01716..fe4b242f0094 100644
--- a/drivers/infiniband/hw/qib/qib_fs.c
+++ b/drivers/infiniband/hw/qib/qib_fs.c
@@ -456,7 +456,7 @@ static int remove_file(struct dentry *parent, char *name)
 	spin_lock(&dcache_lock);
 	spin_lock(&tmp->d_lock);
 	if (!(d_unhashed(tmp) && tmp->d_inode)) {
-		dget_locked(tmp);
+		dget_locked_dlock(tmp);
 		__d_drop(tmp);
 		spin_unlock(&tmp->d_lock);
 		spin_unlock(&dcache_lock);
diff --git a/fs/autofs4/expire.c b/fs/autofs4/expire.c
index a796c9417fb1..413b5642e6cf 100644
--- a/fs/autofs4/expire.c
+++ b/fs/autofs4/expire.c
@@ -198,7 +198,7 @@ static int autofs4_tree_busy(struct vfsmount *mnt,
 			else
 				ino_count++;
 
-			if (atomic_read(&p->d_count) > ino_count) {
+			if (p->d_count > ino_count) {
 				top_ino->last_used = jiffies;
 				dput(p);
 				return 1;
@@ -347,7 +347,7 @@ struct dentry *autofs4_expire_indirect(struct super_block *sb,
 
 			/* Path walk currently on this dentry? */
 			ino_count = atomic_read(&ino->count) + 2;
-			if (atomic_read(&dentry->d_count) > ino_count)
+			if (dentry->d_count > ino_count)
 				goto next;
 
 			/* Can we umount this guy */
@@ -369,7 +369,7 @@ struct dentry *autofs4_expire_indirect(struct super_block *sb,
 		if (!exp_leaves) {
 			/* Path walk currently on this dentry? */
 			ino_count = atomic_read(&ino->count) + 1;
-			if (atomic_read(&dentry->d_count) > ino_count)
+			if (dentry->d_count > ino_count)
 				goto next;
 
 			if (!autofs4_tree_busy(mnt, dentry, timeout, do_now)) {
@@ -383,7 +383,7 @@ struct dentry *autofs4_expire_indirect(struct super_block *sb,
 		} else {
 			/* Path walk currently on this dentry? */
 			ino_count = atomic_read(&ino->count) + 1;
-			if (atomic_read(&dentry->d_count) > ino_count)
+			if (dentry->d_count > ino_count)
 				goto next;
 
 			expired = autofs4_check_leaves(mnt, dentry, timeout, do_now);
diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c
index d34896cfb19f..7922509b5b2b 100644
--- a/fs/autofs4/root.c
+++ b/fs/autofs4/root.c
@@ -436,7 +436,7 @@ static struct dentry *autofs4_lookup_active(struct dentry *dentry)
 		spin_lock(&active->d_lock);
 
 		/* Already gone? */
-		if (atomic_read(&active->d_count) == 0)
+		if (active->d_count == 0)
 			goto next;
 
 		qstr = &active->d_name;
@@ -452,7 +452,7 @@ static struct dentry *autofs4_lookup_active(struct dentry *dentry)
 			goto next;
 
 		if (d_unhashed(active)) {
-			dget(active);
+			dget_dlock(active);
 			spin_unlock(&active->d_lock);
 			spin_unlock(&sbi->lookup_lock);
 			spin_unlock(&dcache_lock);
@@ -507,7 +507,7 @@ static struct dentry *autofs4_lookup_expiring(struct dentry *dentry)
 			goto next;
 
 		if (d_unhashed(expiring)) {
-			dget(expiring);
+			dget_dlock(expiring);
 			spin_unlock(&expiring->d_lock);
 			spin_unlock(&sbi->lookup_lock);
 			spin_unlock(&dcache_lock);
diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
index d902948a90d8..3ecf915a4550 100644
--- a/fs/ceph/dir.c
+++ b/fs/ceph/dir.c
@@ -150,7 +150,9 @@ more:
 		di = ceph_dentry(dentry);
 	}
 
-	atomic_inc(&dentry->d_count);
+	spin_lock(&dentry->d_lock);
+	dentry->d_count++;
+	spin_unlock(&dentry->d_lock);
 	spin_unlock(&dcache_lock);
 
 	dout(" %llu (%llu) dentry %p %.*s %p\n", di->offset, filp->f_pos,
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index bf1286588f26..bb68c799074d 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -879,8 +879,8 @@ static struct dentry *splice_dentry(struct dentry *dn, struct inode *in,
 	} else if (realdn) {
 		dout("dn %p (%d) spliced with %p (%d) "
 		     "inode %p ino %llx.%llx\n",
-		     dn, atomic_read(&dn->d_count),
-		     realdn, atomic_read(&realdn->d_count),
+		     dn, dn->d_count,
+		     realdn, realdn->d_count,
 		     realdn->d_inode, ceph_vinop(realdn->d_inode));
 		dput(dn);
 		dn = realdn;
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index 38800eaa81d0..a50fca1e03be 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -1486,7 +1486,7 @@ retry:
 	*base = ceph_ino(temp->d_inode);
 	*plen = len;
 	dout("build_path on %p %d built %llx '%.*s'\n",
-	     dentry, atomic_read(&dentry->d_count), *base, len, path);
+	     dentry, dentry->d_count, *base, len, path);
 	return path;
 }
 
diff --git a/fs/coda/dir.c b/fs/coda/dir.c
index 4cce3b07d9d7..9e37e8bc9b89 100644
--- a/fs/coda/dir.c
+++ b/fs/coda/dir.c
@@ -559,7 +559,7 @@ static int coda_dentry_revalidate(struct dentry *de, struct nameidata *nd)
 	if (cii->c_flags & C_FLUSH) 
 		coda_flag_inode_children(inode, C_FLUSH);
 
-	if (atomic_read(&de->d_count) > 1)
+	if (de->d_count > 1)
 		/* pretend it's valid, but don't change the flags */
 		goto out;
 
diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c
index 20024a9ef5a7..e9acea440ffc 100644
--- a/fs/configfs/dir.c
+++ b/fs/configfs/dir.c
@@ -394,8 +394,7 @@ static void remove_dir(struct dentry * d)
 	if (d->d_inode)
 		simple_rmdir(parent->d_inode,d);
 
-	pr_debug(" o %s removing done (%d)\n",d->d_name.name,
-		 atomic_read(&d->d_count));
+	pr_debug(" o %s removing done (%d)\n",d->d_name.name, d->d_count);
 
 	dput(parent);
 }
diff --git a/fs/configfs/inode.c b/fs/configfs/inode.c
index 253476d78ed8..79b37765d8ff 100644
--- a/fs/configfs/inode.c
+++ b/fs/configfs/inode.c
@@ -253,7 +253,7 @@ void configfs_drop_dentry(struct configfs_dirent * sd, struct dentry * parent)
 		spin_lock(&dcache_lock);
 		spin_lock(&dentry->d_lock);
 		if (!(d_unhashed(dentry) && dentry->d_inode)) {
-			dget_locked(dentry);
+			dget_locked_dlock(dentry);
 			__d_drop(dentry);
 			spin_unlock(&dentry->d_lock);
 			spin_unlock(&dcache_lock);
diff --git a/fs/dcache.c b/fs/dcache.c
index 3d3c843c36ed..81e91502b294 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -45,6 +45,7 @@
  *   - d_flags
  *   - d_name
  *   - d_lru
+ *   - d_count
  *
  * Ordering:
  * dcache_lock
@@ -125,6 +126,7 @@ static void __d_free(struct rcu_head *head)
  */
 static void d_free(struct dentry *dentry)
 {
+	BUG_ON(dentry->d_count);
 	this_cpu_dec(nr_dentry);
 	if (dentry->d_op && dentry->d_op->d_release)
 		dentry->d_op->d_release(dentry);
@@ -222,8 +224,11 @@ static struct dentry *d_kill(struct dentry *dentry)
 	struct dentry *parent;
 
 	list_del(&dentry->d_u.d_child);
-	/*drops the locks, at that point nobody can reach this dentry */
 	dentry_iput(dentry);
+	/*
+	 * dentry_iput drops the locks, at which point nobody (except
+	 * transient RCU lookups) can reach this dentry.
+	 */
 	if (IS_ROOT(dentry))
 		parent = NULL;
 	else
@@ -303,13 +308,23 @@ void dput(struct dentry *dentry)
 		return;
 
 repeat:
-	if (atomic_read(&dentry->d_count) == 1)
+	if (dentry->d_count == 1)
 		might_sleep();
-	if (!atomic_dec_and_lock(&dentry->d_count, &dcache_lock))
-		return;
-
 	spin_lock(&dentry->d_lock);
-	if (atomic_read(&dentry->d_count)) {
+	if (dentry->d_count == 1) {
+		if (!spin_trylock(&dcache_lock)) {
+			/*
+			 * Something of a livelock possibility we could avoid
+			 * by taking dcache_lock and trying again, but we
+			 * want to reduce dcache_lock anyway so this will
+			 * get improved.
+			 */
+			spin_unlock(&dentry->d_lock);
+			goto repeat;
+		}
+	}
+	dentry->d_count--;
+	if (dentry->d_count) {
 		spin_unlock(&dentry->d_lock);
 		spin_unlock(&dcache_lock);
 		return;
@@ -389,7 +404,7 @@ int d_invalidate(struct dentry * dentry)
 	 * working directory or similar).
 	 */
 	spin_lock(&dentry->d_lock);
-	if (atomic_read(&dentry->d_count) > 1) {
+	if (dentry->d_count > 1) {
 		if (dentry->d_inode && S_ISDIR(dentry->d_inode->i_mode)) {
 			spin_unlock(&dentry->d_lock);
 			spin_unlock(&dcache_lock);
@@ -404,29 +419,61 @@ int d_invalidate(struct dentry * dentry)
 }
 EXPORT_SYMBOL(d_invalidate);
 
-/* This should be called _only_ with dcache_lock held */
+/* This must be called with dcache_lock and d_lock held */
 static inline struct dentry * __dget_locked_dlock(struct dentry *dentry)
 {
-	atomic_inc(&dentry->d_count);
+	dentry->d_count++;
 	dentry_lru_del(dentry);
 	return dentry;
 }
 
+/* This should be called _only_ with dcache_lock held */
 static inline struct dentry * __dget_locked(struct dentry *dentry)
 {
-	atomic_inc(&dentry->d_count);
 	spin_lock(&dentry->d_lock);
-	dentry_lru_del(dentry);
+	__dget_locked_dlock(dentry);
 	spin_unlock(&dentry->d_lock);
 	return dentry;
 }
 
+struct dentry * dget_locked_dlock(struct dentry *dentry)
+{
+	return __dget_locked_dlock(dentry);
+}
+
 struct dentry * dget_locked(struct dentry *dentry)
 {
 	return __dget_locked(dentry);
 }
 EXPORT_SYMBOL(dget_locked);
 
+struct dentry *dget_parent(struct dentry *dentry)
+{
+	struct dentry *ret;
+
+repeat:
+	spin_lock(&dentry->d_lock);
+	ret = dentry->d_parent;
+	if (!ret)
+		goto out;
+	if (dentry == ret) {
+		ret->d_count++;
+		goto out;
+	}
+	if (!spin_trylock(&ret->d_lock)) {
+		spin_unlock(&dentry->d_lock);
+		cpu_relax();
+		goto repeat;
+	}
+	BUG_ON(!ret->d_count);
+	ret->d_count++;
+	spin_unlock(&ret->d_lock);
+out:
+	spin_unlock(&dentry->d_lock);
+	return ret;
+}
+EXPORT_SYMBOL(dget_parent);
+
 /**
  * d_find_alias - grab a hashed alias of inode
  * @inode: inode in question
@@ -495,7 +542,7 @@ restart:
 	spin_lock(&dcache_lock);
 	list_for_each_entry(dentry, &inode->i_dentry, d_alias) {
 		spin_lock(&dentry->d_lock);
-		if (!atomic_read(&dentry->d_count)) {
+		if (!dentry->d_count) {
 			__dget_locked_dlock(dentry);
 			__d_drop(dentry);
 			spin_unlock(&dentry->d_lock);
@@ -530,7 +577,10 @@ static void prune_one_dentry(struct dentry * dentry)
 	 */
 	while (dentry) {
 		spin_lock(&dcache_lock);
-		if (!atomic_dec_and_lock(&dentry->d_count, &dentry->d_lock)) {
+		spin_lock(&dentry->d_lock);
+		dentry->d_count--;
+		if (dentry->d_count) {
+			spin_unlock(&dentry->d_lock);
 			spin_unlock(&dcache_lock);
 			return;
 		}
@@ -562,7 +612,7 @@ static void shrink_dentry_list(struct list_head *list)
 		 * the LRU because of laziness during lookup.  Do not free
 		 * it - just keep it off the LRU list.
 		 */
-		if (atomic_read(&dentry->d_count)) {
+		if (dentry->d_count) {
 			spin_unlock(&dentry->d_lock);
 			continue;
 		}
@@ -783,7 +833,7 @@ static void shrink_dcache_for_umount_subtree(struct dentry *dentry)
 		do {
 			struct inode *inode;
 
-			if (atomic_read(&dentry->d_count) != 0) {
+			if (dentry->d_count != 0) {
 				printk(KERN_ERR
 				       "BUG: Dentry %p{i=%lx,n=%s}"
 				       " still in use (%d)"
@@ -792,7 +842,7 @@ static void shrink_dcache_for_umount_subtree(struct dentry *dentry)
 				       dentry->d_inode ?
 				       dentry->d_inode->i_ino : 0UL,
 				       dentry->d_name.name,
-				       atomic_read(&dentry->d_count),
+				       dentry->d_count,
 				       dentry->d_sb->s_type->name,
 				       dentry->d_sb->s_id);
 				BUG();
@@ -802,7 +852,9 @@ static void shrink_dcache_for_umount_subtree(struct dentry *dentry)
 				parent = NULL;
 			else {
 				parent = dentry->d_parent;
-				atomic_dec(&parent->d_count);
+				spin_lock(&parent->d_lock);
+				parent->d_count--;
+				spin_unlock(&parent->d_lock);
 			}
 
 			list_del(&dentry->d_u.d_child);
@@ -853,7 +905,9 @@ void shrink_dcache_for_umount(struct super_block *sb)
 
 	dentry = sb->s_root;
 	sb->s_root = NULL;
-	atomic_dec(&dentry->d_count);
+	spin_lock(&dentry->d_lock);
+	dentry->d_count--;
+	spin_unlock(&dentry->d_lock);
 	shrink_dcache_for_umount_subtree(dentry);
 
 	while (!hlist_empty(&sb->s_anon)) {
@@ -950,7 +1004,7 @@ resume:
 		 * move only zero ref count dentries to the end 
 		 * of the unused list for prune_dcache
 		 */
-		if (!atomic_read(&dentry->d_count)) {
+		if (!dentry->d_count) {
 			dentry_lru_move_tail(dentry);
 			found++;
 		} else {
@@ -1068,7 +1122,7 @@ struct dentry *d_alloc(struct dentry * parent, const struct qstr *name)
 	memcpy(dname, name->name, name->len);
 	dname[name->len] = 0;
 
-	atomic_set(&dentry->d_count, 1);
+	dentry->d_count = 1;
 	dentry->d_flags = DCACHE_UNHASHED;
 	spin_lock_init(&dentry->d_lock);
 	dentry->d_inode = NULL;
@@ -1556,7 +1610,7 @@ struct dentry * __d_lookup(struct dentry * parent, struct qstr * name)
 				goto next;
 		}
 
-		atomic_inc(&dentry->d_count);
+		dentry->d_count++;
 		found = dentry;
 		spin_unlock(&dentry->d_lock);
 		break;
@@ -1653,7 +1707,7 @@ void d_delete(struct dentry * dentry)
 	spin_lock(&dcache_lock);
 	spin_lock(&dentry->d_lock);
 	isdir = S_ISDIR(dentry->d_inode->i_mode);
-	if (atomic_read(&dentry->d_count) == 1) {
+	if (dentry->d_count == 1) {
 		dentry->d_flags &= ~DCACHE_CANT_MOUNT;
 		dentry_iput(dentry);
 		fsnotify_nameremove(dentry, isdir);
@@ -2494,11 +2548,15 @@ resume:
 			this_parent = dentry;
 			goto repeat;
 		}
-		atomic_dec(&dentry->d_count);
+		spin_lock(&dentry->d_lock);
+		dentry->d_count--;
+		spin_unlock(&dentry->d_lock);
 	}
 	if (this_parent != root) {
 		next = this_parent->d_u.d_child.next;
-		atomic_dec(&this_parent->d_count);
+		spin_lock(&this_parent->d_lock);
+		this_parent->d_count--;
+		spin_unlock(&this_parent->d_lock);
 		this_parent = this_parent->d_parent;
 		goto resume;
 	}
diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c
index a1ed7a7cb173..5e5c7ec1fc98 100644
--- a/fs/ecryptfs/inode.c
+++ b/fs/ecryptfs/inode.c
@@ -260,7 +260,7 @@ int ecryptfs_lookup_and_interpose_lower(struct dentry *ecryptfs_dentry,
 				   ecryptfs_dentry->d_parent));
 	lower_inode = lower_dentry->d_inode;
 	fsstack_copy_attr_atime(ecryptfs_dir_inode, lower_dir_dentry->d_inode);
-	BUG_ON(!atomic_read(&lower_dentry->d_count));
+	BUG_ON(!lower_dentry->d_count);
 	ecryptfs_set_dentry_private(ecryptfs_dentry,
 				    kmem_cache_alloc(ecryptfs_dentry_info_cache,
 						     GFP_KERNEL));
diff --git a/fs/locks.c b/fs/locks.c
index 8729347bcd1a..08415b2a6d36 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -1389,7 +1389,7 @@ int generic_setlease(struct file *filp, long arg, struct file_lock **flp)
 		if ((arg == F_RDLCK) && (atomic_read(&inode->i_writecount) > 0))
 			goto out;
 		if ((arg == F_WRLCK)
-		    && ((atomic_read(&dentry->d_count) > 1)
+		    && ((dentry->d_count > 1)
 			|| (atomic_read(&inode->i_count) > 1)))
 			goto out;
 	}
diff --git a/fs/namei.c b/fs/namei.c
index f3b5ca404659..cbfa5fb31072 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -2133,7 +2133,7 @@ void dentry_unhash(struct dentry *dentry)
 	shrink_dcache_parent(dentry);
 	spin_lock(&dcache_lock);
 	spin_lock(&dentry->d_lock);
-	if (atomic_read(&dentry->d_count) == 2)
+	if (dentry->d_count == 2)
 		__d_drop(dentry);
 	spin_unlock(&dentry->d_lock);
 	spin_unlock(&dcache_lock);
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 9184c7c80f78..12de824edb5c 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -1720,7 +1720,7 @@ static int nfs_unlink(struct inode *dir, struct dentry *dentry)
 
 	spin_lock(&dcache_lock);
 	spin_lock(&dentry->d_lock);
-	if (atomic_read(&dentry->d_count) > 1) {
+	if (dentry->d_count > 1) {
 		spin_unlock(&dentry->d_lock);
 		spin_unlock(&dcache_lock);
 		/* Start asynchronous writeout of the inode */
@@ -1868,7 +1868,7 @@ static int nfs_rename(struct inode *old_dir, struct dentry *old_dentry,
 	dfprintk(VFS, "NFS: rename(%s/%s -> %s/%s, ct=%d)\n",
 		 old_dentry->d_parent->d_name.name, old_dentry->d_name.name,
 		 new_dentry->d_parent->d_name.name, new_dentry->d_name.name,
-		 atomic_read(&new_dentry->d_count));
+		 new_dentry->d_count);
 
 	/*
 	 * For non-directories, check whether the target is busy and if so,
@@ -1886,7 +1886,7 @@ static int nfs_rename(struct inode *old_dir, struct dentry *old_dentry,
 			rehash = new_dentry;
 		}
 
-		if (atomic_read(&new_dentry->d_count) > 2) {
+		if (new_dentry->d_count > 2) {
 			int err;
 
 			/* copy the target dentry's name */
diff --git a/fs/nfs/unlink.c b/fs/nfs/unlink.c
index 7bdec8531400..8fe9eb47a97f 100644
--- a/fs/nfs/unlink.c
+++ b/fs/nfs/unlink.c
@@ -496,7 +496,7 @@ nfs_sillyrename(struct inode *dir, struct dentry *dentry)
 
 	dfprintk(VFS, "NFS: silly-rename(%s/%s, ct=%d)\n",
 		dentry->d_parent->d_name.name, dentry->d_name.name,
-		atomic_read(&dentry->d_count));
+		dentry->d_count);
 	nfs_inc_stats(dir, NFSIOS_SILLYRENAME);
 
 	/*
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 184938fcff04..3a359023c9f7 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -1756,8 +1756,7 @@ nfsd_rename(struct svc_rqst *rqstp, struct svc_fh *ffhp, char *fname, int flen,
 		goto out_dput_new;
 
 	if (svc_msnfs(ffhp) &&
-		((atomic_read(&odentry->d_count) > 1)
-		 || (atomic_read(&ndentry->d_count) > 1))) {
+		((odentry->d_count > 1) || (ndentry->d_count > 1))) {
 			host_err = -EPERM;
 			goto out_dput_new;
 	}
@@ -1843,7 +1842,7 @@ nfsd_unlink(struct svc_rqst *rqstp, struct svc_fh *fhp, int type,
 	if (type != S_IFDIR) { /* It's UNLINK */
 #ifdef MSNFS
 		if ((fhp->fh_export->ex_flags & NFSEXP_MSNFS) &&
-			(atomic_read(&rdentry->d_count) > 1)) {
+			(rdentry->d_count > 1)) {
 			host_err = -EPERM;
 		} else
 #endif
diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c
index f804d41ec9d3..d36fc7ee615f 100644
--- a/fs/nilfs2/super.c
+++ b/fs/nilfs2/super.c
@@ -838,7 +838,7 @@ static int nilfs_attach_snapshot(struct super_block *s, __u64 cno,
 
 static int nilfs_tree_was_touched(struct dentry *root_dentry)
 {
-	return atomic_read(&root_dentry->d_count) > 1;
+	return root_dentry->d_count > 1;
 }
 
 /**
diff --git a/include/linux/dcache.h b/include/linux/dcache.h
index 2feb624b67f1..b0ade2d46805 100644
--- a/include/linux/dcache.h
+++ b/include/linux/dcache.h
@@ -87,7 +87,7 @@ full_name_hash(const unsigned char *name, unsigned int len)
 #endif
 
 struct dentry {
-	atomic_t d_count;
+	unsigned int d_count;		/* protected by d_lock */
 	unsigned int d_flags;		/* protected by d_lock */
 	spinlock_t d_lock;		/* per dentry lock */
 	int d_mounted;
@@ -297,17 +297,28 @@ extern char *dentry_path(struct dentry *, char *, int);
  *	needs and they take necessary precautions) you should hold dcache_lock
  *	and call dget_locked() instead of dget().
  */
- 
+static inline struct dentry *dget_dlock(struct dentry *dentry)
+{
+	if (dentry) {
+		BUG_ON(!dentry->d_count);
+		dentry->d_count++;
+	}
+	return dentry;
+}
 static inline struct dentry *dget(struct dentry *dentry)
 {
 	if (dentry) {
-		BUG_ON(!atomic_read(&dentry->d_count));
-		atomic_inc(&dentry->d_count);
+		spin_lock(&dentry->d_lock);
+		dget_dlock(dentry);
+		spin_unlock(&dentry->d_lock);
 	}
 	return dentry;
 }
 
 extern struct dentry * dget_locked(struct dentry *);
+extern struct dentry * dget_locked_dlock(struct dentry *);
+
+extern struct dentry *dget_parent(struct dentry *dentry);
 
 /**
  *	d_unhashed -	is dentry hashed
@@ -338,16 +349,6 @@ static inline void dont_mount(struct dentry *dentry)
 	spin_unlock(&dentry->d_lock);
 }
 
-static inline struct dentry *dget_parent(struct dentry *dentry)
-{
-	struct dentry *ret;
-
-	spin_lock(&dentry->d_lock);
-	ret = dget(dentry->d_parent);
-	spin_unlock(&dentry->d_lock);
-	return ret;
-}
-
 extern void dput(struct dentry *);
 
 static inline int d_mountpoint(struct dentry *dentry)
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 746055b214d7..eb7af39350c6 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -3655,9 +3655,7 @@ again:
 	list_del(&cgrp->sibling);
 	cgroup_unlock_hierarchy(cgrp->root);
 
-	spin_lock(&cgrp->dentry->d_lock);
 	d = dget(cgrp->dentry);
-	spin_unlock(&d->d_lock);
 
 	cgroup_d_remove_dir(d);
 	dput(d);
-- 
cgit v1.2.3-71-gd317


From 2fd6b7f50797f2e993eea59e0a0b8c6399c811dc Mon Sep 17 00:00:00 2001
From: Nick Piggin <npiggin@kernel.dk>
Date: Fri, 7 Jan 2011 17:49:34 +1100
Subject: fs: dcache scale subdirs

Protect d_subdirs and d_child with d_lock, except in filesystems that aren't
using dcache_lock for these anyway (eg. using i_mutex).

Note: if we change the locking rule in future so that ->d_child protection is
provided only with ->d_parent->d_lock, it may allow us to reduce some locking.
But it would be an exception to an otherwise regular locking scheme, so we'd
have to see some good results. Probably not worthwhile.

Signed-off-by: Nick Piggin <npiggin@kernel.dk>
---
 drivers/staging/smbfs/cache.c |   4 +
 drivers/usb/core/inode.c      |   8 +-
 fs/autofs4/autofs_i.h         |  11 ++
 fs/autofs4/expire.c           | 127 ++++++++++-----------
 fs/autofs4/root.c             |  18 ++-
 fs/ceph/dir.c                 |   6 +-
 fs/ceph/inode.c               |   8 +-
 fs/coda/cache.c               |   2 +
 fs/dcache.c                   | 248 +++++++++++++++++++++++++++++-------------
 fs/libfs.c                    |  24 +++-
 fs/ncpfs/dir.c                |   3 +
 fs/ncpfs/ncplib_kernel.h      |   4 +
 fs/notify/fsnotify.c          |   4 +-
 include/linux/dcache.h        |   1 +
 kernel/cgroup.c               |  19 +++-
 security/selinux/selinuxfs.c  |  12 +-
 16 files changed, 339 insertions(+), 160 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/staging/smbfs/cache.c b/drivers/staging/smbfs/cache.c
index 0beded260b00..920434b6c071 100644
--- a/drivers/staging/smbfs/cache.c
+++ b/drivers/staging/smbfs/cache.c
@@ -63,6 +63,7 @@ smb_invalidate_dircache_entries(struct dentry *parent)
 	struct dentry *dentry;
 
 	spin_lock(&dcache_lock);
+	spin_lock(&parent->d_lock);
 	next = parent->d_subdirs.next;
 	while (next != &parent->d_subdirs) {
 		dentry = list_entry(next, struct dentry, d_u.d_child);
@@ -70,6 +71,7 @@ smb_invalidate_dircache_entries(struct dentry *parent)
 		smb_age_dentry(server, dentry);
 		next = next->next;
 	}
+	spin_unlock(&parent->d_lock);
 	spin_unlock(&dcache_lock);
 }
 
@@ -97,6 +99,7 @@ smb_dget_fpos(struct dentry *dentry, struct dentry *parent, unsigned long fpos)
 
 	/* If a pointer is invalid, we search the dentry. */
 	spin_lock(&dcache_lock);
+	spin_lock(&parent->d_lock);
 	next = parent->d_subdirs.next;
 	while (next != &parent->d_subdirs) {
 		dent = list_entry(next, struct dentry, d_u.d_child);
@@ -111,6 +114,7 @@ smb_dget_fpos(struct dentry *dentry, struct dentry *parent, unsigned long fpos)
 	}
 	dent = NULL;
 out_unlock:
+	spin_unlock(&parent->d_lock);
 	spin_unlock(&dcache_lock);
 	return dent;
 }
diff --git a/drivers/usb/core/inode.c b/drivers/usb/core/inode.c
index e3ab4437ea96..89a0e8366585 100644
--- a/drivers/usb/core/inode.c
+++ b/drivers/usb/core/inode.c
@@ -344,18 +344,20 @@ static int usbfs_empty (struct dentry *dentry)
 	struct list_head *list;
 
 	spin_lock(&dcache_lock);
-
+	spin_lock(&dentry->d_lock);
 	list_for_each(list, &dentry->d_subdirs) {
 		struct dentry *de = list_entry(list, struct dentry, d_u.d_child);
-		spin_lock(&de->d_lock);
+
+		spin_lock_nested(&de->d_lock, DENTRY_D_LOCK_NESTED);
 		if (usbfs_positive(de)) {
 			spin_unlock(&de->d_lock);
+			spin_unlock(&dentry->d_lock);
 			spin_unlock(&dcache_lock);
 			return 0;
 		}
 		spin_unlock(&de->d_lock);
 	}
-
+	spin_unlock(&dentry->d_lock);
 	spin_unlock(&dcache_lock);
 	return 1;
 }
diff --git a/fs/autofs4/autofs_i.h b/fs/autofs4/autofs_i.h
index 3912dcf047e5..9d2ae9b30d9f 100644
--- a/fs/autofs4/autofs_i.h
+++ b/fs/autofs4/autofs_i.h
@@ -254,6 +254,17 @@ static inline int simple_positive(struct dentry *dentry)
 	return dentry->d_inode && !d_unhashed(dentry);
 }
 
+static inline void __autofs4_add_expiring(struct dentry *dentry)
+{
+	struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb);
+	struct autofs_info *ino = autofs4_dentry_ino(dentry);
+	if (ino) {
+		if (list_empty(&ino->expiring))
+			list_add(&ino->expiring, &sbi->expiring_list);
+	}
+	return;
+}
+
 static inline void autofs4_add_expiring(struct dentry *dentry)
 {
 	struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb);
diff --git a/fs/autofs4/expire.c b/fs/autofs4/expire.c
index ee6402050f13..968c1434af62 100644
--- a/fs/autofs4/expire.c
+++ b/fs/autofs4/expire.c
@@ -91,24 +91,64 @@ done:
 }
 
 /*
- * Calculate next entry in top down tree traversal.
- * From next_mnt in namespace.c - elegant.
+ * Calculate and dget next entry in top down tree traversal.
  */
-static struct dentry *next_dentry(struct dentry *p, struct dentry *root)
+static struct dentry *get_next_positive_dentry(struct dentry *prev,
+						struct dentry *root)
 {
-	struct list_head *next = p->d_subdirs.next;
+	struct list_head *next;
+	struct dentry *p, *ret;
+
+	if (prev == NULL)
+		return dget(prev);
 
+	spin_lock(&dcache_lock);
+relock:
+	p = prev;
+	spin_lock(&p->d_lock);
+again:
+	next = p->d_subdirs.next;
 	if (next == &p->d_subdirs) {
 		while (1) {
-			if (p == root)
+			struct dentry *parent;
+
+			if (p == root) {
+				spin_unlock(&p->d_lock);
+				spin_unlock(&dcache_lock);
+				dput(prev);
 				return NULL;
+			}
+
+			parent = p->d_parent;
+			if (!spin_trylock(&parent->d_lock)) {
+				spin_unlock(&p->d_lock);
+				cpu_relax();
+				goto relock;
+			}
+			spin_unlock(&p->d_lock);
 			next = p->d_u.d_child.next;
-			if (next != &p->d_parent->d_subdirs)
+			p = parent;
+			if (next != &parent->d_subdirs)
 				break;
-			p = p->d_parent;
 		}
 	}
-	return list_entry(next, struct dentry, d_u.d_child);
+	ret = list_entry(next, struct dentry, d_u.d_child);
+
+	spin_lock_nested(&ret->d_lock, DENTRY_D_LOCK_NESTED);
+	/* Negative dentry - try next */
+	if (!simple_positive(ret)) {
+		spin_unlock(&ret->d_lock);
+		p = ret;
+		goto again;
+	}
+	dget_dlock(ret);
+	spin_unlock(&ret->d_lock);
+	spin_unlock(&p->d_lock);
+	spin_unlock(&dcache_lock);
+
+	dput(prev);
+
+	return ret;
 }
 
 /*
@@ -158,22 +198,11 @@ static int autofs4_tree_busy(struct vfsmount *mnt,
 	if (!simple_positive(top))
 		return 1;
 
-	spin_lock(&dcache_lock);
-	for (p = top; p; p = next_dentry(p, top)) {
-		spin_lock(&p->d_lock);
-		/* Negative dentry - give up */
-		if (!simple_positive(p)) {
-			spin_unlock(&p->d_lock);
-			continue;
-		}
-
+	p = NULL;
+	while ((p = get_next_positive_dentry(p, top))) {
 		DPRINTK("dentry %p %.*s",
 			p, (int) p->d_name.len, p->d_name.name);
 
-		p = dget_dlock(p);
-		spin_unlock(&p->d_lock);
-		spin_unlock(&dcache_lock);
-
 		/*
 		 * Is someone visiting anywhere in the subtree ?
 		 * If there's no mount we need to check the usage
@@ -208,10 +237,7 @@ static int autofs4_tree_busy(struct vfsmount *mnt,
 				return 1;
 			}
 		}
-		dput(p);
-		spin_lock(&dcache_lock);
 	}
-	spin_unlock(&dcache_lock);
 
 	/* Timeout of a tree mount is ultimately determined by its top dentry */
 	if (!autofs4_can_expire(top, timeout, do_now))
@@ -230,36 +256,21 @@ static struct dentry *autofs4_check_leaves(struct vfsmount *mnt,
 	DPRINTK("parent %p %.*s",
 		parent, (int)parent->d_name.len, parent->d_name.name);
 
-	spin_lock(&dcache_lock);
-	for (p = parent; p; p = next_dentry(p, parent)) {
-		spin_lock(&p->d_lock);
-		/* Negative dentry - give up */
-		if (!simple_positive(p)) {
-			spin_unlock(&p->d_lock);
-			continue;
-		}
-
+	p = NULL;
+	while ((p = get_next_positive_dentry(p, parent))) {
 		DPRINTK("dentry %p %.*s",
 			p, (int) p->d_name.len, p->d_name.name);
 
-		p = dget_dlock(p);
-		spin_unlock(&p->d_lock);
-		spin_unlock(&dcache_lock);
-
 		if (d_mountpoint(p)) {
 			/* Can we umount this guy */
 			if (autofs4_mount_busy(mnt, p))
-				goto cont;
+				continue;
 
 			/* Can we expire this guy */
 			if (autofs4_can_expire(p, timeout, do_now))
 				return p;
 		}
-cont:
-		dput(p);
-		spin_lock(&dcache_lock);
 	}
-	spin_unlock(&dcache_lock);
 	return NULL;
 }
 
@@ -310,8 +321,8 @@ struct dentry *autofs4_expire_indirect(struct super_block *sb,
 {
 	unsigned long timeout;
 	struct dentry *root = sb->s_root;
+	struct dentry *dentry;
 	struct dentry *expired = NULL;
-	struct list_head *next;
 	int do_now = how & AUTOFS_EXP_IMMEDIATE;
 	int exp_leaves = how & AUTOFS_EXP_LEAVES;
 	struct autofs_info *ino;
@@ -323,26 +334,8 @@ struct dentry *autofs4_expire_indirect(struct super_block *sb,
 	now = jiffies;
 	timeout = sbi->exp_timeout;
 
-	spin_lock(&dcache_lock);
-	next = root->d_subdirs.next;
-
-	/* On exit from the loop expire is set to a dgot dentry
-	 * to expire or it's NULL */
-	while ( next != &root->d_subdirs ) {
-		struct dentry *dentry = list_entry(next, struct dentry, d_u.d_child);
-
-		/* Negative dentry - give up */
-		spin_lock(&dentry->d_lock);
-		if (!simple_positive(dentry)) {
-			next = next->next;
-			spin_unlock(&dentry->d_lock);
-			continue;
-		}
-
-		dentry = dget_dlock(dentry);
-		spin_unlock(&dentry->d_lock);
-		spin_unlock(&dcache_lock);
-
+	dentry = NULL;
+	while ((dentry = get_next_positive_dentry(dentry, root))) {
 		spin_lock(&sbi->fs_lock);
 		ino = autofs4_dentry_ino(dentry);
 
@@ -405,11 +398,7 @@ struct dentry *autofs4_expire_indirect(struct super_block *sb,
 		}
 next:
 		spin_unlock(&sbi->fs_lock);
-		dput(dentry);
-		spin_lock(&dcache_lock);
-		next = next->next;
 	}
-	spin_unlock(&dcache_lock);
 	return NULL;
 
 found:
@@ -420,7 +409,11 @@ found:
 	init_completion(&ino->expire_complete);
 	spin_unlock(&sbi->fs_lock);
 	spin_lock(&dcache_lock);
+	spin_lock(&expired->d_parent->d_lock);
+	spin_lock_nested(&expired->d_lock, DENTRY_D_LOCK_NESTED);
 	list_move(&expired->d_parent->d_subdirs, &expired->d_u.d_child);
+	spin_unlock(&expired->d_lock);
+	spin_unlock(&expired->d_parent->d_lock);
 	spin_unlock(&dcache_lock);
 	return expired;
 }
diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c
index 7922509b5b2b..7a9ed6b88291 100644
--- a/fs/autofs4/root.c
+++ b/fs/autofs4/root.c
@@ -143,10 +143,13 @@ static int autofs4_dir_open(struct inode *inode, struct file *file)
 	 * it.
 	 */
 	spin_lock(&dcache_lock);
+	spin_lock(&dentry->d_lock);
 	if (!d_mountpoint(dentry) && list_empty(&dentry->d_subdirs)) {
+		spin_unlock(&dentry->d_lock);
 		spin_unlock(&dcache_lock);
 		return -ENOENT;
 	}
+	spin_unlock(&dentry->d_lock);
 	spin_unlock(&dcache_lock);
 
 out:
@@ -253,7 +256,9 @@ static void *autofs4_follow_link(struct dentry *dentry, struct nameidata *nd)
 	lookup_type = autofs4_need_mount(nd->flags);
 	spin_lock(&sbi->fs_lock);
 	spin_lock(&dcache_lock);
+	spin_lock(&dentry->d_lock);
 	if (!(lookup_type || ino->flags & AUTOFS_INF_PENDING)) {
+		spin_unlock(&dentry->d_lock);
 		spin_unlock(&dcache_lock);
 		spin_unlock(&sbi->fs_lock);
 		goto follow;
@@ -266,6 +271,7 @@ static void *autofs4_follow_link(struct dentry *dentry, struct nameidata *nd)
 	 */
 	if (ino->flags & AUTOFS_INF_PENDING ||
 	    (!d_mountpoint(dentry) && list_empty(&dentry->d_subdirs))) {
+		spin_unlock(&dentry->d_lock);
 		spin_unlock(&dcache_lock);
 		spin_unlock(&sbi->fs_lock);
 
@@ -275,6 +281,7 @@ static void *autofs4_follow_link(struct dentry *dentry, struct nameidata *nd)
 
 		goto follow;
 	}
+	spin_unlock(&dentry->d_lock);
 	spin_unlock(&dcache_lock);
 	spin_unlock(&sbi->fs_lock);
 follow:
@@ -347,10 +354,12 @@ static int autofs4_revalidate(struct dentry *dentry, struct nameidata *nd)
 
 	/* Check for a non-mountpoint directory with no contents */
 	spin_lock(&dcache_lock);
+	spin_lock(&dentry->d_lock);
 	if (S_ISDIR(dentry->d_inode->i_mode) &&
 	    !d_mountpoint(dentry) && list_empty(&dentry->d_subdirs)) {
 		DPRINTK("dentry=%p %.*s, emptydir",
 			 dentry, dentry->d_name.len, dentry->d_name.name);
+		spin_unlock(&dentry->d_lock);
 		spin_unlock(&dcache_lock);
 
 		/* The daemon never causes a mount to trigger */
@@ -367,6 +376,7 @@ static int autofs4_revalidate(struct dentry *dentry, struct nameidata *nd)
 
 		return status;
 	}
+	spin_unlock(&dentry->d_lock);
 	spin_unlock(&dcache_lock);
 
 	return 1;
@@ -776,12 +786,16 @@ static int autofs4_dir_rmdir(struct inode *dir, struct dentry *dentry)
 		return -EACCES;
 
 	spin_lock(&dcache_lock);
+	spin_lock(&sbi->lookup_lock);
+	spin_lock(&dentry->d_lock);
 	if (!list_empty(&dentry->d_subdirs)) {
+		spin_unlock(&dentry->d_lock);
+		spin_unlock(&sbi->lookup_lock);
 		spin_unlock(&dcache_lock);
 		return -ENOTEMPTY;
 	}
-	autofs4_add_expiring(dentry);
-	spin_lock(&dentry->d_lock);
+	__autofs4_add_expiring(dentry);
+	spin_unlock(&sbi->lookup_lock);
 	__d_drop(dentry);
 	spin_unlock(&dentry->d_lock);
 	spin_unlock(&dcache_lock);
diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
index 571f270dca0f..2c924e8d85fe 100644
--- a/fs/ceph/dir.c
+++ b/fs/ceph/dir.c
@@ -113,6 +113,7 @@ static int __dcache_readdir(struct file *filp,
 	     last);
 
 	spin_lock(&dcache_lock);
+	spin_lock(&parent->d_lock);
 
 	/* start at beginning? */
 	if (filp->f_pos == 2 || last == NULL ||
@@ -136,7 +137,7 @@ more:
 			fi->at_end = 1;
 			goto out_unlock;
 		}
-		spin_lock(&dentry->d_lock);
+		spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
 		if (!d_unhashed(dentry) && dentry->d_inode &&
 		    ceph_snap(dentry->d_inode) != CEPH_SNAPDIR &&
 		    ceph_ino(dentry->d_inode) != CEPH_INO_CEPH &&
@@ -154,6 +155,7 @@ more:
 
 	dget_dlock(dentry);
 	spin_unlock(&dentry->d_lock);
+	spin_unlock(&parent->d_lock);
 	spin_unlock(&dcache_lock);
 
 	dout(" %llu (%llu) dentry %p %.*s %p\n", di->offset, filp->f_pos,
@@ -188,10 +190,12 @@ more:
 	}
 
 	spin_lock(&dcache_lock);
+	spin_lock(&parent->d_lock);
 	p = p->prev;	/* advance to next dentry */
 	goto more;
 
 out_unlock:
+	spin_unlock(&parent->d_lock);
 	spin_unlock(&dcache_lock);
 out:
 	if (last)
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index bb68c799074d..2c6944473366 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -842,11 +842,13 @@ static void ceph_set_dentry_offset(struct dentry *dn)
 	spin_unlock(&inode->i_lock);
 
 	spin_lock(&dcache_lock);
-	spin_lock(&dn->d_lock);
+	spin_lock(&dir->d_lock);
+	spin_lock_nested(&dn->d_lock, DENTRY_D_LOCK_NESTED);
 	list_move(&dn->d_u.d_child, &dir->d_subdirs);
 	dout("set_dentry_offset %p %lld (%p %p)\n", dn, di->offset,
 	     dn->d_u.d_child.prev, dn->d_u.d_child.next);
 	spin_unlock(&dn->d_lock);
+	spin_unlock(&dir->d_lock);
 	spin_unlock(&dcache_lock);
 }
 
@@ -1232,9 +1234,11 @@ retry_lookup:
 		} else {
 			/* reorder parent's d_subdirs */
 			spin_lock(&dcache_lock);
-			spin_lock(&dn->d_lock);
+			spin_lock(&parent->d_lock);
+			spin_lock_nested(&dn->d_lock, DENTRY_D_LOCK_NESTED);
 			list_move(&dn->d_u.d_child, &parent->d_subdirs);
 			spin_unlock(&dn->d_lock);
+			spin_unlock(&parent->d_lock);
 			spin_unlock(&dcache_lock);
 		}
 
diff --git a/fs/coda/cache.c b/fs/coda/cache.c
index 9060f08e70cf..859393fca2b7 100644
--- a/fs/coda/cache.c
+++ b/fs/coda/cache.c
@@ -94,6 +94,7 @@ static void coda_flag_children(struct dentry *parent, int flag)
 	struct dentry *de;
 
 	spin_lock(&dcache_lock);
+	spin_lock(&parent->d_lock);
 	list_for_each(child, &parent->d_subdirs)
 	{
 		de = list_entry(child, struct dentry, d_u.d_child);
@@ -102,6 +103,7 @@ static void coda_flag_children(struct dentry *parent, int flag)
 			continue;
 		coda_flag_inode(de->d_inode, flag);
 	}
+	spin_unlock(&parent->d_lock);
 	spin_unlock(&dcache_lock);
 	return; 
 }
diff --git a/fs/dcache.c b/fs/dcache.c
index ee127f9ab274..a661247a20d5 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -47,6 +47,8 @@
  *   - d_lru
  *   - d_count
  *   - d_unhashed()
+ *   - d_parent and d_subdirs
+ *   - childrens' d_child and d_parent
  *
  * Ordering:
  * dcache_lock
@@ -223,24 +225,22 @@ static void dentry_lru_move_tail(struct dentry *dentry)
  *
  * If this is the root of the dentry tree, return NULL.
  *
- * dcache_lock and d_lock must be held by caller, are dropped by d_kill.
+ * dcache_lock and d_lock and d_parent->d_lock must be held by caller, and
+ * are dropped by d_kill.
  */
-static struct dentry *d_kill(struct dentry *dentry)
+static struct dentry *d_kill(struct dentry *dentry, struct dentry *parent)
 	__releases(dentry->d_lock)
+	__releases(parent->d_lock)
 	__releases(dcache_lock)
 {
-	struct dentry *parent;
-
 	list_del(&dentry->d_u.d_child);
+	if (parent)
+		spin_unlock(&parent->d_lock);
 	dentry_iput(dentry);
 	/*
 	 * dentry_iput drops the locks, at which point nobody (except
 	 * transient RCU lookups) can reach this dentry.
 	 */
-	if (IS_ROOT(dentry))
-		parent = NULL;
-	else
-		parent = dentry->d_parent;
 	d_free(dentry);
 	return parent;
 }
@@ -312,6 +312,7 @@ EXPORT_SYMBOL(d_drop);
 
 void dput(struct dentry *dentry)
 {
+	struct dentry *parent;
 	if (!dentry)
 		return;
 
@@ -319,6 +320,10 @@ repeat:
 	if (dentry->d_count == 1)
 		might_sleep();
 	spin_lock(&dentry->d_lock);
+	if (IS_ROOT(dentry))
+		parent = NULL;
+	else
+		parent = dentry->d_parent;
 	if (dentry->d_count == 1) {
 		if (!spin_trylock(&dcache_lock)) {
 			/*
@@ -330,10 +335,17 @@ repeat:
 			spin_unlock(&dentry->d_lock);
 			goto repeat;
 		}
+		if (parent && !spin_trylock(&parent->d_lock)) {
+			spin_unlock(&dentry->d_lock);
+			spin_unlock(&dcache_lock);
+			goto repeat;
+		}
 	}
 	dentry->d_count--;
 	if (dentry->d_count) {
 		spin_unlock(&dentry->d_lock);
+		if (parent)
+			spin_unlock(&parent->d_lock);
 		spin_unlock(&dcache_lock);
 		return;
 	}
@@ -355,6 +367,8 @@ repeat:
 	dentry_lru_add(dentry);
 
  	spin_unlock(&dentry->d_lock);
+	if (parent)
+		spin_unlock(&parent->d_lock);
 	spin_unlock(&dcache_lock);
 	return;
 
@@ -363,7 +377,7 @@ unhash_it:
 kill_it:
 	/* if dentry was on the d_lru list delete it from there */
 	dentry_lru_del(dentry);
-	dentry = d_kill(dentry);
+	dentry = d_kill(dentry, parent);
 	if (dentry)
 		goto repeat;
 }
@@ -584,12 +598,13 @@ EXPORT_SYMBOL(d_prune_aliases);
  * quadratic behavior of shrink_dcache_parent(), but is also expected
  * to be beneficial in reducing dentry cache fragmentation.
  */
-static void prune_one_dentry(struct dentry * dentry)
+static void prune_one_dentry(struct dentry *dentry, struct dentry *parent)
 	__releases(dentry->d_lock)
+	__releases(parent->d_lock)
 	__releases(dcache_lock)
 {
 	__d_drop(dentry);
-	dentry = d_kill(dentry);
+	dentry = d_kill(dentry, parent);
 
 	/*
 	 * Prune ancestors.  Locking is simpler than in dput(),
@@ -597,9 +612,20 @@ static void prune_one_dentry(struct dentry * dentry)
 	 */
 	while (dentry) {
 		spin_lock(&dcache_lock);
+again:
 		spin_lock(&dentry->d_lock);
+		if (IS_ROOT(dentry))
+			parent = NULL;
+		else
+			parent = dentry->d_parent;
+		if (parent && !spin_trylock(&parent->d_lock)) {
+			spin_unlock(&dentry->d_lock);
+			goto again;
+		}
 		dentry->d_count--;
 		if (dentry->d_count) {
+			if (parent)
+				spin_unlock(&parent->d_lock);
 			spin_unlock(&dentry->d_lock);
 			spin_unlock(&dcache_lock);
 			return;
@@ -607,7 +633,7 @@ static void prune_one_dentry(struct dentry * dentry)
 
 		dentry_lru_del(dentry);
 		__d_drop(dentry);
-		dentry = d_kill(dentry);
+		dentry = d_kill(dentry, parent);
 	}
 }
 
@@ -616,29 +642,40 @@ static void shrink_dentry_list(struct list_head *list)
 	struct dentry *dentry;
 
 	while (!list_empty(list)) {
+		struct dentry *parent;
+
 		dentry = list_entry(list->prev, struct dentry, d_lru);
 
 		if (!spin_trylock(&dentry->d_lock)) {
+relock:
 			spin_unlock(&dcache_lru_lock);
 			cpu_relax();
 			spin_lock(&dcache_lru_lock);
 			continue;
 		}
 
-		__dentry_lru_del(dentry);
-
 		/*
 		 * We found an inuse dentry which was not removed from
 		 * the LRU because of laziness during lookup.  Do not free
 		 * it - just keep it off the LRU list.
 		 */
 		if (dentry->d_count) {
+			__dentry_lru_del(dentry);
 			spin_unlock(&dentry->d_lock);
 			continue;
 		}
+		if (IS_ROOT(dentry))
+			parent = NULL;
+		else
+			parent = dentry->d_parent;
+		if (parent && !spin_trylock(&parent->d_lock)) {
+			spin_unlock(&dentry->d_lock);
+			goto relock;
+		}
+		__dentry_lru_del(dentry);
 		spin_unlock(&dcache_lru_lock);
 
-		prune_one_dentry(dentry);
+		prune_one_dentry(dentry, parent);
 		/* dcache_lock and dentry->d_lock dropped */
 		spin_lock(&dcache_lock);
 		spin_lock(&dcache_lru_lock);
@@ -833,14 +870,16 @@ static void shrink_dcache_for_umount_subtree(struct dentry *dentry)
 			/* this is a branch with children - detach all of them
 			 * from the system in one go */
 			spin_lock(&dcache_lock);
+			spin_lock(&dentry->d_lock);
 			list_for_each_entry(loop, &dentry->d_subdirs,
 					    d_u.d_child) {
-				spin_lock(&loop->d_lock);
+				spin_lock_nested(&loop->d_lock,
+						DENTRY_D_LOCK_NESTED);
 				dentry_lru_del(loop);
 				__d_drop(loop);
 				spin_unlock(&loop->d_lock);
-				cond_resched_lock(&dcache_lock);
 			}
+			spin_unlock(&dentry->d_lock);
 			spin_unlock(&dcache_lock);
 
 			/* move to the first child */
@@ -868,16 +907,17 @@ static void shrink_dcache_for_umount_subtree(struct dentry *dentry)
 				BUG();
 			}
 
-			if (IS_ROOT(dentry))
+			if (IS_ROOT(dentry)) {
 				parent = NULL;
-			else {
+				list_del(&dentry->d_u.d_child);
+			} else {
 				parent = dentry->d_parent;
 				spin_lock(&parent->d_lock);
 				parent->d_count--;
+				list_del(&dentry->d_u.d_child);
 				spin_unlock(&parent->d_lock);
 			}
 
-			list_del(&dentry->d_u.d_child);
 			detached++;
 
 			inode = dentry->d_inode;
@@ -958,6 +998,7 @@ int have_submounts(struct dentry *parent)
 	spin_lock(&dcache_lock);
 	if (d_mountpoint(parent))
 		goto positive;
+	spin_lock(&this_parent->d_lock);
 repeat:
 	next = this_parent->d_subdirs.next;
 resume:
@@ -965,22 +1006,34 @@ resume:
 		struct list_head *tmp = next;
 		struct dentry *dentry = list_entry(tmp, struct dentry, d_u.d_child);
 		next = tmp->next;
+
+		spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
 		/* Have we found a mount point ? */
-		if (d_mountpoint(dentry))
+		if (d_mountpoint(dentry)) {
+			spin_unlock(&dentry->d_lock);
+			spin_unlock(&this_parent->d_lock);
 			goto positive;
+		}
 		if (!list_empty(&dentry->d_subdirs)) {
+			spin_unlock(&this_parent->d_lock);
+			spin_release(&dentry->d_lock.dep_map, 1, _RET_IP_);
 			this_parent = dentry;
+			spin_acquire(&this_parent->d_lock.dep_map, 0, 1, _RET_IP_);
 			goto repeat;
 		}
+		spin_unlock(&dentry->d_lock);
 	}
 	/*
 	 * All done at this level ... ascend and resume the search.
 	 */
 	if (this_parent != parent) {
 		next = this_parent->d_u.d_child.next;
+		spin_unlock(&this_parent->d_lock);
 		this_parent = this_parent->d_parent;
+		spin_lock(&this_parent->d_lock);
 		goto resume;
 	}
+	spin_unlock(&this_parent->d_lock);
 	spin_unlock(&dcache_lock);
 	return 0; /* No mount points found in tree */
 positive:
@@ -1010,6 +1063,7 @@ static int select_parent(struct dentry * parent)
 	int found = 0;
 
 	spin_lock(&dcache_lock);
+	spin_lock(&this_parent->d_lock);
 repeat:
 	next = this_parent->d_subdirs.next;
 resume:
@@ -1017,8 +1071,9 @@ resume:
 		struct list_head *tmp = next;
 		struct dentry *dentry = list_entry(tmp, struct dentry, d_u.d_child);
 		next = tmp->next;
+		BUG_ON(this_parent == dentry);
 
-		spin_lock(&dentry->d_lock);
+		spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
 
 		/* 
 		 * move only zero ref count dentries to the end 
@@ -1031,33 +1086,44 @@ resume:
 			dentry_lru_del(dentry);
 		}
 
-		spin_unlock(&dentry->d_lock);
-
 		/*
 		 * We can return to the caller if we have found some (this
 		 * ensures forward progress). We'll be coming back to find
 		 * the rest.
 		 */
-		if (found && need_resched())
+		if (found && need_resched()) {
+			spin_unlock(&dentry->d_lock);
 			goto out;
+		}
 
 		/*
 		 * Descend a level if the d_subdirs list is non-empty.
 		 */
 		if (!list_empty(&dentry->d_subdirs)) {
+			spin_unlock(&this_parent->d_lock);
+			spin_release(&dentry->d_lock.dep_map, 1, _RET_IP_);
 			this_parent = dentry;
+			spin_acquire(&this_parent->d_lock.dep_map, 0, 1, _RET_IP_);
 			goto repeat;
 		}
+
+		spin_unlock(&dentry->d_lock);
 	}
 	/*
 	 * All done at this level ... ascend and resume the search.
 	 */
 	if (this_parent != parent) {
+		struct dentry *tmp;
 		next = this_parent->d_u.d_child.next;
-		this_parent = this_parent->d_parent;
+		tmp = this_parent->d_parent;
+		spin_unlock(&this_parent->d_lock);
+		BUG_ON(tmp == this_parent);
+		this_parent = tmp;
+		spin_lock(&this_parent->d_lock);
 		goto resume;
 	}
 out:
+	spin_unlock(&this_parent->d_lock);
 	spin_unlock(&dcache_lock);
 	return found;
 }
@@ -1155,18 +1221,19 @@ struct dentry *d_alloc(struct dentry * parent, const struct qstr *name)
 	INIT_LIST_HEAD(&dentry->d_lru);
 	INIT_LIST_HEAD(&dentry->d_subdirs);
 	INIT_LIST_HEAD(&dentry->d_alias);
+	INIT_LIST_HEAD(&dentry->d_u.d_child);
 
 	if (parent) {
-		dentry->d_parent = dget(parent);
+		spin_lock(&dcache_lock);
+		spin_lock(&parent->d_lock);
+		spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
+		dentry->d_parent = dget_dlock(parent);
 		dentry->d_sb = parent->d_sb;
-	} else {
-		INIT_LIST_HEAD(&dentry->d_u.d_child);
-	}
-
-	spin_lock(&dcache_lock);
-	if (parent)
 		list_add(&dentry->d_u.d_child, &parent->d_subdirs);
-	spin_unlock(&dcache_lock);
+		spin_unlock(&dentry->d_lock);
+		spin_unlock(&parent->d_lock);
+		spin_unlock(&dcache_lock);
+	}
 
 	this_cpu_inc(nr_dentry);
 
@@ -1684,13 +1751,18 @@ int d_validate(struct dentry *dentry, struct dentry *dparent)
 	struct dentry *child;
 
 	spin_lock(&dcache_lock);
+	spin_lock(&dparent->d_lock);
 	list_for_each_entry(child, &dparent->d_subdirs, d_u.d_child) {
 		if (dentry == child) {
-			__dget_locked(dentry);
+			spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
+			__dget_locked_dlock(dentry);
+			spin_unlock(&dentry->d_lock);
+			spin_unlock(&dparent->d_lock);
 			spin_unlock(&dcache_lock);
 			return 1;
 		}
 	}
+	spin_unlock(&dparent->d_lock);
 	spin_unlock(&dcache_lock);
 
 	return 0;
@@ -1802,17 +1874,6 @@ void dentry_update_name_case(struct dentry *dentry, struct qstr *name)
 }
 EXPORT_SYMBOL(dentry_update_name_case);
 
-/*
- * When switching names, the actual string doesn't strictly have to
- * be preserved in the target - because we're dropping the target
- * anyway. As such, we can just do a simple memcpy() to copy over
- * the new name before we switch.
- *
- * Note that we have to be a lot more careful about getting the hash
- * switched - we have to switch the hash value properly even if it
- * then no longer matches the actual (corrupted) string of the target.
- * The hash value has to match the hash queue that the dentry is on..
- */
 static void switch_names(struct dentry *dentry, struct dentry *target)
 {
 	if (dname_external(target)) {
@@ -1854,18 +1915,53 @@ static void switch_names(struct dentry *dentry, struct dentry *target)
 	swap(dentry->d_name.len, target->d_name.len);
 }
 
+static void dentry_lock_for_move(struct dentry *dentry, struct dentry *target)
+{
+	/*
+	 * XXXX: do we really need to take target->d_lock?
+	 */
+	if (IS_ROOT(dentry) || dentry->d_parent == target->d_parent)
+		spin_lock(&target->d_parent->d_lock);
+	else {
+		if (d_ancestor(dentry->d_parent, target->d_parent)) {
+			spin_lock(&dentry->d_parent->d_lock);
+			spin_lock_nested(&target->d_parent->d_lock,
+						DENTRY_D_LOCK_NESTED);
+		} else {
+			spin_lock(&target->d_parent->d_lock);
+			spin_lock_nested(&dentry->d_parent->d_lock,
+						DENTRY_D_LOCK_NESTED);
+		}
+	}
+	if (target < dentry) {
+		spin_lock_nested(&target->d_lock, 2);
+		spin_lock_nested(&dentry->d_lock, 3);
+	} else {
+		spin_lock_nested(&dentry->d_lock, 2);
+		spin_lock_nested(&target->d_lock, 3);
+	}
+}
+
+static void dentry_unlock_parents_for_move(struct dentry *dentry,
+					struct dentry *target)
+{
+	if (target->d_parent != dentry->d_parent)
+		spin_unlock(&dentry->d_parent->d_lock);
+	if (target->d_parent != target)
+		spin_unlock(&target->d_parent->d_lock);
+}
+
 /*
- * We cannibalize "target" when moving dentry on top of it,
- * because it's going to be thrown away anyway. We could be more
- * polite about it, though.
- *
- * This forceful removal will result in ugly /proc output if
- * somebody holds a file open that got deleted due to a rename.
- * We could be nicer about the deleted file, and let it show
- * up under the name it had before it was deleted rather than
- * under the original name of the file that was moved on top of it.
+ * When switching names, the actual string doesn't strictly have to
+ * be preserved in the target - because we're dropping the target
+ * anyway. As such, we can just do a simple memcpy() to copy over
+ * the new name before we switch.
+ *
+ * Note that we have to be a lot more careful about getting the hash
+ * switched - we have to switch the hash value properly even if it
+ * then no longer matches the actual (corrupted) string of the target.
+ * The hash value has to match the hash queue that the dentry is on..
  */
- 
 /*
  * d_move_locked - move a dentry
  * @dentry: entry to move
@@ -1879,20 +1975,12 @@ static void d_move_locked(struct dentry * dentry, struct dentry * target)
 	if (!dentry->d_inode)
 		printk(KERN_WARNING "VFS: moving negative dcache entry\n");
 
+	BUG_ON(d_ancestor(dentry, target));
+	BUG_ON(d_ancestor(target, dentry));
+
 	write_seqlock(&rename_lock);
-	/*
-	 * XXXX: do we really need to take target->d_lock?
-	 */
-	if (d_ancestor(dentry, target)) {
-		spin_lock(&dentry->d_lock);
-		spin_lock_nested(&target->d_lock, DENTRY_D_LOCK_NESTED);
-	} else if (d_ancestor(target, dentry) || target < dentry) {
-		spin_lock(&target->d_lock);
-		spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
-	} else {
-		spin_lock(&dentry->d_lock);
-		spin_lock_nested(&target->d_lock, DENTRY_D_LOCK_NESTED);
-	}
+
+	dentry_lock_for_move(dentry, target);
 
 	/* Move the dentry to the target hash queue, if on different bucket */
 	spin_lock(&dcache_hash_lock);
@@ -1924,6 +2012,8 @@ static void d_move_locked(struct dentry * dentry, struct dentry * target)
 	}
 
 	list_add(&dentry->d_u.d_child, &dentry->d_parent->d_subdirs);
+
+	dentry_unlock_parents_for_move(dentry, target);
 	spin_unlock(&target->d_lock);
 	fsnotify_d_move(dentry);
 	spin_unlock(&dentry->d_lock);
@@ -2013,17 +2103,20 @@ out_err:
 /*
  * Prepare an anonymous dentry for life in the superblock's dentry tree as a
  * named dentry in place of the dentry to be replaced.
+ * returns with anon->d_lock held!
  */
 static void __d_materialise_dentry(struct dentry *dentry, struct dentry *anon)
 {
 	struct dentry *dparent, *aparent;
 
-	switch_names(dentry, anon);
-	swap(dentry->d_name.hash, anon->d_name.hash);
+	dentry_lock_for_move(anon, dentry);
 
 	dparent = dentry->d_parent;
 	aparent = anon->d_parent;
 
+	switch_names(dentry, anon);
+	swap(dentry->d_name.hash, anon->d_name.hash);
+
 	dentry->d_parent = (aparent == anon) ? dentry : aparent;
 	list_del(&dentry->d_u.d_child);
 	if (!IS_ROOT(dentry))
@@ -2038,6 +2131,10 @@ static void __d_materialise_dentry(struct dentry *dentry, struct dentry *anon)
 	else
 		INIT_LIST_HEAD(&anon->d_u.d_child);
 
+	dentry_unlock_parents_for_move(anon, dentry);
+	spin_unlock(&dentry->d_lock);
+
+	/* anon->d_lock still locked, returns locked */
 	anon->d_flags &= ~DCACHE_DISCONNECTED;
 }
 
@@ -2073,7 +2170,6 @@ struct dentry *d_materialise_unique(struct dentry *dentry, struct inode *inode)
 			/* Is this an anonymous mountpoint that we could splice
 			 * into our tree? */
 			if (IS_ROOT(alias)) {
-				spin_lock(&alias->d_lock);
 				__d_materialise_dentry(dentry, alias);
 				__d_drop(alias);
 				goto found;
@@ -2558,6 +2654,7 @@ void d_genocide(struct dentry *root)
 	struct list_head *next;
 
 	spin_lock(&dcache_lock);
+	spin_lock(&this_parent->d_lock);
 repeat:
 	next = this_parent->d_subdirs.next;
 resume:
@@ -2571,8 +2668,10 @@ resume:
 			continue;
 		}
 		if (!list_empty(&dentry->d_subdirs)) {
-			spin_unlock(&dentry->d_lock);
+			spin_unlock(&this_parent->d_lock);
+			spin_release(&dentry->d_lock.dep_map, 1, _RET_IP_);
 			this_parent = dentry;
+			spin_acquire(&this_parent->d_lock.dep_map, 0, 1, _RET_IP_);
 			goto repeat;
 		}
 		dentry->d_count--;
@@ -2580,12 +2679,13 @@ resume:
 	}
 	if (this_parent != root) {
 		next = this_parent->d_u.d_child.next;
-		spin_lock(&this_parent->d_lock);
 		this_parent->d_count--;
 		spin_unlock(&this_parent->d_lock);
 		this_parent = this_parent->d_parent;
+		spin_lock(&this_parent->d_lock);
 		goto resume;
 	}
+	spin_unlock(&this_parent->d_lock);
 	spin_unlock(&dcache_lock);
 }
 
diff --git a/fs/libfs.c b/fs/libfs.c
index 433e7139c23a..cc4794914b52 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -81,7 +81,8 @@ int dcache_dir_close(struct inode *inode, struct file *file)
 
 loff_t dcache_dir_lseek(struct file *file, loff_t offset, int origin)
 {
-	mutex_lock(&file->f_path.dentry->d_inode->i_mutex);
+	struct dentry *dentry = file->f_path.dentry;
+	mutex_lock(&dentry->d_inode->i_mutex);
 	switch (origin) {
 		case 1:
 			offset += file->f_pos;
@@ -89,7 +90,7 @@ loff_t dcache_dir_lseek(struct file *file, loff_t offset, int origin)
 			if (offset >= 0)
 				break;
 		default:
-			mutex_unlock(&file->f_path.dentry->d_inode->i_mutex);
+			mutex_unlock(&dentry->d_inode->i_mutex);
 			return -EINVAL;
 	}
 	if (offset != file->f_pos) {
@@ -100,22 +101,25 @@ loff_t dcache_dir_lseek(struct file *file, loff_t offset, int origin)
 			loff_t n = file->f_pos - 2;
 
 			spin_lock(&dcache_lock);
+			spin_lock(&dentry->d_lock);
+			/* d_lock not required for cursor */
 			list_del(&cursor->d_u.d_child);
-			p = file->f_path.dentry->d_subdirs.next;
-			while (n && p != &file->f_path.dentry->d_subdirs) {
+			p = dentry->d_subdirs.next;
+			while (n && p != &dentry->d_subdirs) {
 				struct dentry *next;
 				next = list_entry(p, struct dentry, d_u.d_child);
-				spin_lock(&next->d_lock);
+				spin_lock_nested(&next->d_lock, DENTRY_D_LOCK_NESTED);
 				if (simple_positive(next))
 					n--;
 				spin_unlock(&next->d_lock);
 				p = p->next;
 			}
 			list_add_tail(&cursor->d_u.d_child, p);
+			spin_unlock(&dentry->d_lock);
 			spin_unlock(&dcache_lock);
 		}
 	}
-	mutex_unlock(&file->f_path.dentry->d_inode->i_mutex);
+	mutex_unlock(&dentry->d_inode->i_mutex);
 	return offset;
 }
 
@@ -156,6 +160,7 @@ int dcache_readdir(struct file * filp, void * dirent, filldir_t filldir)
 			/* fallthrough */
 		default:
 			spin_lock(&dcache_lock);
+			spin_lock(&dentry->d_lock);
 			if (filp->f_pos == 2)
 				list_move(q, &dentry->d_subdirs);
 
@@ -169,6 +174,7 @@ int dcache_readdir(struct file * filp, void * dirent, filldir_t filldir)
 				}
 
 				spin_unlock(&next->d_lock);
+				spin_unlock(&dentry->d_lock);
 				spin_unlock(&dcache_lock);
 				if (filldir(dirent, next->d_name.name, 
 					    next->d_name.len, filp->f_pos, 
@@ -176,11 +182,15 @@ int dcache_readdir(struct file * filp, void * dirent, filldir_t filldir)
 					    dt_type(next->d_inode)) < 0)
 					return 0;
 				spin_lock(&dcache_lock);
+				spin_lock(&dentry->d_lock);
+				spin_lock_nested(&next->d_lock, DENTRY_D_LOCK_NESTED);
 				/* next is still alive */
 				list_move(q, p);
+				spin_unlock(&next->d_lock);
 				p = q;
 				filp->f_pos++;
 			}
+			spin_unlock(&dentry->d_lock);
 			spin_unlock(&dcache_lock);
 	}
 	return 0;
@@ -276,6 +286,7 @@ int simple_empty(struct dentry *dentry)
 	int ret = 0;
 
 	spin_lock(&dcache_lock);
+	spin_lock(&dentry->d_lock);
 	list_for_each_entry(child, &dentry->d_subdirs, d_u.d_child) {
 		spin_lock_nested(&child->d_lock, DENTRY_D_LOCK_NESTED);
 		if (simple_positive(child)) {
@@ -286,6 +297,7 @@ int simple_empty(struct dentry *dentry)
 	}
 	ret = 1;
 out:
+	spin_unlock(&dentry->d_lock);
 	spin_unlock(&dcache_lock);
 	return ret;
 }
diff --git a/fs/ncpfs/dir.c b/fs/ncpfs/dir.c
index bbbf7922f422..102278ed38bd 100644
--- a/fs/ncpfs/dir.c
+++ b/fs/ncpfs/dir.c
@@ -392,6 +392,7 @@ ncp_dget_fpos(struct dentry *dentry, struct dentry *parent, unsigned long fpos)
 
 	/* If a pointer is invalid, we search the dentry. */
 	spin_lock(&dcache_lock);
+	spin_lock(&parent->d_lock);
 	next = parent->d_subdirs.next;
 	while (next != &parent->d_subdirs) {
 		dent = list_entry(next, struct dentry, d_u.d_child);
@@ -400,11 +401,13 @@ ncp_dget_fpos(struct dentry *dentry, struct dentry *parent, unsigned long fpos)
 				dget_locked(dent);
 			else
 				dent = NULL;
+			spin_unlock(&parent->d_lock);
 			spin_unlock(&dcache_lock);
 			goto out;
 		}
 		next = next->next;
 	}
+	spin_unlock(&parent->d_lock);
 	spin_unlock(&dcache_lock);
 	return NULL;
 
diff --git a/fs/ncpfs/ncplib_kernel.h b/fs/ncpfs/ncplib_kernel.h
index 244d1b73fda7..c4b718ff9a6b 100644
--- a/fs/ncpfs/ncplib_kernel.h
+++ b/fs/ncpfs/ncplib_kernel.h
@@ -194,6 +194,7 @@ ncp_renew_dentries(struct dentry *parent)
 	struct dentry *dentry;
 
 	spin_lock(&dcache_lock);
+	spin_lock(&parent->d_lock);
 	next = parent->d_subdirs.next;
 	while (next != &parent->d_subdirs) {
 		dentry = list_entry(next, struct dentry, d_u.d_child);
@@ -205,6 +206,7 @@ ncp_renew_dentries(struct dentry *parent)
 
 		next = next->next;
 	}
+	spin_unlock(&parent->d_lock);
 	spin_unlock(&dcache_lock);
 }
 
@@ -216,6 +218,7 @@ ncp_invalidate_dircache_entries(struct dentry *parent)
 	struct dentry *dentry;
 
 	spin_lock(&dcache_lock);
+	spin_lock(&parent->d_lock);
 	next = parent->d_subdirs.next;
 	while (next != &parent->d_subdirs) {
 		dentry = list_entry(next, struct dentry, d_u.d_child);
@@ -223,6 +226,7 @@ ncp_invalidate_dircache_entries(struct dentry *parent)
 		ncp_age_dentry(server, dentry);
 		next = next->next;
 	}
+	spin_unlock(&parent->d_lock);
 	spin_unlock(&dcache_lock);
 }
 
diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c
index 20dc218707ca..aa4f25e803f6 100644
--- a/fs/notify/fsnotify.c
+++ b/fs/notify/fsnotify.c
@@ -68,17 +68,19 @@ void __fsnotify_update_child_dentry_flags(struct inode *inode)
 		/* run all of the children of the original inode and fix their
 		 * d_flags to indicate parental interest (their parent is the
 		 * original inode) */
+		spin_lock(&alias->d_lock);
 		list_for_each_entry(child, &alias->d_subdirs, d_u.d_child) {
 			if (!child->d_inode)
 				continue;
 
-			spin_lock(&child->d_lock);
+			spin_lock_nested(&child->d_lock, DENTRY_D_LOCK_NESTED);
 			if (watched)
 				child->d_flags |= DCACHE_FSNOTIFY_PARENT_WATCHED;
 			else
 				child->d_flags &= ~DCACHE_FSNOTIFY_PARENT_WATCHED;
 			spin_unlock(&child->d_lock);
 		}
+		spin_unlock(&alias->d_lock);
 	}
 	spin_unlock(&dcache_lock);
 }
diff --git a/include/linux/dcache.h b/include/linux/dcache.h
index b0ade2d46805..ddf4f55624f7 100644
--- a/include/linux/dcache.h
+++ b/include/linux/dcache.h
@@ -305,6 +305,7 @@ static inline struct dentry *dget_dlock(struct dentry *dentry)
 	}
 	return dentry;
 }
+
 static inline struct dentry *dget(struct dentry *dentry)
 {
 	if (dentry) {
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index eb7af39350c6..7b4705b51d4a 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -877,23 +877,31 @@ static void cgroup_clear_directory(struct dentry *dentry)
 
 	BUG_ON(!mutex_is_locked(&dentry->d_inode->i_mutex));
 	spin_lock(&dcache_lock);
+	spin_lock(&dentry->d_lock);
 	node = dentry->d_subdirs.next;
 	while (node != &dentry->d_subdirs) {
 		struct dentry *d = list_entry(node, struct dentry, d_u.d_child);
+
+		spin_lock_nested(&d->d_lock, DENTRY_D_LOCK_NESTED);
 		list_del_init(node);
 		if (d->d_inode) {
 			/* This should never be called on a cgroup
 			 * directory with child cgroups */
 			BUG_ON(d->d_inode->i_mode & S_IFDIR);
-			d = dget_locked(d);
+			dget_locked_dlock(d);
+			spin_unlock(&d->d_lock);
+			spin_unlock(&dentry->d_lock);
 			spin_unlock(&dcache_lock);
 			d_delete(d);
 			simple_unlink(dentry->d_inode, d);
 			dput(d);
 			spin_lock(&dcache_lock);
-		}
+			spin_lock(&dentry->d_lock);
+		} else
+			spin_unlock(&d->d_lock);
 		node = dentry->d_subdirs.next;
 	}
+	spin_unlock(&dentry->d_lock);
 	spin_unlock(&dcache_lock);
 }
 
@@ -902,10 +910,17 @@ static void cgroup_clear_directory(struct dentry *dentry)
  */
 static void cgroup_d_remove_dir(struct dentry *dentry)
 {
+	struct dentry *parent;
+
 	cgroup_clear_directory(dentry);
 
 	spin_lock(&dcache_lock);
+	parent = dentry->d_parent;
+	spin_lock(&parent->d_lock);
+	spin_lock(&dentry->d_lock);
 	list_del_init(&dentry->d_u.d_child);
+	spin_unlock(&dentry->d_lock);
+	spin_unlock(&parent->d_lock);
 	spin_unlock(&dcache_lock);
 	remove_dir(dentry);
 }
diff --git a/security/selinux/selinuxfs.c b/security/selinux/selinuxfs.c
index 073fd5b0a53a..017ec096446e 100644
--- a/security/selinux/selinuxfs.c
+++ b/security/selinux/selinuxfs.c
@@ -1146,22 +1146,30 @@ static void sel_remove_entries(struct dentry *de)
 	struct list_head *node;
 
 	spin_lock(&dcache_lock);
+	spin_lock(&de->d_lock);
 	node = de->d_subdirs.next;
 	while (node != &de->d_subdirs) {
 		struct dentry *d = list_entry(node, struct dentry, d_u.d_child);
+
+		spin_lock_nested(&d->d_lock, DENTRY_D_LOCK_NESTED);
 		list_del_init(node);
 
 		if (d->d_inode) {
-			d = dget_locked(d);
+			dget_locked_dlock(d);
+			spin_unlock(&de->d_lock);
+			spin_unlock(&d->d_lock);
 			spin_unlock(&dcache_lock);
 			d_delete(d);
 			simple_unlink(de->d_inode, d);
 			dput(d);
 			spin_lock(&dcache_lock);
-		}
+			spin_lock(&de->d_lock);
+		} else
+			spin_unlock(&d->d_lock);
 		node = de->d_subdirs.next;
 	}
 
+	spin_unlock(&de->d_lock);
 	spin_unlock(&dcache_lock);
 }
 
-- 
cgit v1.2.3-71-gd317


From b23fb0a60379a95e10c671f646b259ea2558421e Mon Sep 17 00:00:00 2001
From: Nick Piggin <npiggin@kernel.dk>
Date: Fri, 7 Jan 2011 17:49:35 +1100
Subject: fs: scale inode alias list

Add a new lock, dcache_inode_lock, to protect the inode's i_dentry list
from concurrent modification. d_alias is also protected by d_lock.

Signed-off-by: Nick Piggin <npiggin@kernel.dk>
---
 fs/9p/vfs_inode.c      |  2 ++
 fs/affs/amigaffs.c     |  2 ++
 fs/cifs/inode.c        |  3 +++
 fs/dcache.c            | 66 ++++++++++++++++++++++++++++++++++++++++++++------
 fs/exportfs/expfs.c    |  4 +++
 fs/nfs/getroot.c       |  4 +++
 fs/notify/fsnotify.c   |  2 ++
 fs/ocfs2/dcache.c      |  3 ++-
 include/linux/dcache.h |  1 +
 9 files changed, 78 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c
index 34bf71b56542..47dfd5d29a6b 100644
--- a/fs/9p/vfs_inode.c
+++ b/fs/9p/vfs_inode.c
@@ -271,9 +271,11 @@ static struct dentry *v9fs_dentry_from_dir_inode(struct inode *inode)
 	struct dentry *dentry;
 
 	spin_lock(&dcache_lock);
+	spin_lock(&dcache_inode_lock);
 	/* Directory should have only one entry. */
 	BUG_ON(S_ISDIR(inode->i_mode) && !list_is_singular(&inode->i_dentry));
 	dentry = list_entry(inode->i_dentry.next, struct dentry, d_alias);
+	spin_unlock(&dcache_inode_lock);
 	spin_unlock(&dcache_lock);
 	return dentry;
 }
diff --git a/fs/affs/amigaffs.c b/fs/affs/amigaffs.c
index 7d0f0a30f7a3..2321cc92d44f 100644
--- a/fs/affs/amigaffs.c
+++ b/fs/affs/amigaffs.c
@@ -129,6 +129,7 @@ affs_fix_dcache(struct dentry *dentry, u32 entry_ino)
 	struct list_head *head, *next;
 
 	spin_lock(&dcache_lock);
+	spin_lock(&dcache_inode_lock);
 	head = &inode->i_dentry;
 	next = head->next;
 	while (next != head) {
@@ -139,6 +140,7 @@ affs_fix_dcache(struct dentry *dentry, u32 entry_ino)
 		}
 		next = next->next;
 	}
+	spin_unlock(&dcache_inode_lock);
 	spin_unlock(&dcache_lock);
 }
 
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index 589f3e3f6e00..003698365ece 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -810,12 +810,15 @@ inode_has_hashed_dentries(struct inode *inode)
 	struct dentry *dentry;
 
 	spin_lock(&dcache_lock);
+	spin_lock(&dcache_inode_lock);
 	list_for_each_entry(dentry, &inode->i_dentry, d_alias) {
 		if (!d_unhashed(dentry) || IS_ROOT(dentry)) {
+			spin_unlock(&dcache_inode_lock);
 			spin_unlock(&dcache_lock);
 			return true;
 		}
 	}
+	spin_unlock(&dcache_inode_lock);
 	spin_unlock(&dcache_lock);
 	return false;
 }
diff --git a/fs/dcache.c b/fs/dcache.c
index a661247a20d5..de38680ee0ed 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -37,6 +37,8 @@
 
 /*
  * Usage:
+ * dcache_inode_lock protects:
+ *   - i_dentry, d_alias, d_inode
  * dcache_hash_lock protects:
  *   - the dcache hash table, s_anon lists
  * dcache_lru_lock protects:
@@ -49,12 +51,14 @@
  *   - d_unhashed()
  *   - d_parent and d_subdirs
  *   - childrens' d_child and d_parent
+ *   - d_alias, d_inode
  *
  * Ordering:
  * dcache_lock
- *   dentry->d_lock
- *     dcache_lru_lock
- *     dcache_hash_lock
+ *   dcache_inode_lock
+ *     dentry->d_lock
+ *       dcache_lru_lock
+ *       dcache_hash_lock
  *
  * If there is an ancestor relationship:
  * dentry->d_parent->...->d_parent->d_lock
@@ -70,11 +74,13 @@
 int sysctl_vfs_cache_pressure __read_mostly = 100;
 EXPORT_SYMBOL_GPL(sysctl_vfs_cache_pressure);
 
+__cacheline_aligned_in_smp DEFINE_SPINLOCK(dcache_inode_lock);
 static __cacheline_aligned_in_smp DEFINE_SPINLOCK(dcache_hash_lock);
 static __cacheline_aligned_in_smp DEFINE_SPINLOCK(dcache_lru_lock);
 __cacheline_aligned_in_smp DEFINE_SPINLOCK(dcache_lock);
 __cacheline_aligned_in_smp DEFINE_SEQLOCK(rename_lock);
 
+EXPORT_SYMBOL(dcache_inode_lock);
 EXPORT_SYMBOL(dcache_lock);
 
 static struct kmem_cache *dentry_cache __read_mostly;
@@ -154,6 +160,7 @@ static void d_free(struct dentry *dentry)
  */
 static void dentry_iput(struct dentry * dentry)
 	__releases(dentry->d_lock)
+	__releases(dcache_inode_lock)
 	__releases(dcache_lock)
 {
 	struct inode *inode = dentry->d_inode;
@@ -161,6 +168,7 @@ static void dentry_iput(struct dentry * dentry)
 		dentry->d_inode = NULL;
 		list_del_init(&dentry->d_alias);
 		spin_unlock(&dentry->d_lock);
+		spin_unlock(&dcache_inode_lock);
 		spin_unlock(&dcache_lock);
 		if (!inode->i_nlink)
 			fsnotify_inoderemove(inode);
@@ -170,6 +178,7 @@ static void dentry_iput(struct dentry * dentry)
 			iput(inode);
 	} else {
 		spin_unlock(&dentry->d_lock);
+		spin_unlock(&dcache_inode_lock);
 		spin_unlock(&dcache_lock);
 	}
 }
@@ -231,6 +240,7 @@ static void dentry_lru_move_tail(struct dentry *dentry)
 static struct dentry *d_kill(struct dentry *dentry, struct dentry *parent)
 	__releases(dentry->d_lock)
 	__releases(parent->d_lock)
+	__releases(dcache_inode_lock)
 	__releases(dcache_lock)
 {
 	list_del(&dentry->d_u.d_child);
@@ -332,13 +342,18 @@ repeat:
 			 * want to reduce dcache_lock anyway so this will
 			 * get improved.
 			 */
+drop1:
 			spin_unlock(&dentry->d_lock);
 			goto repeat;
 		}
-		if (parent && !spin_trylock(&parent->d_lock)) {
-			spin_unlock(&dentry->d_lock);
+		if (!spin_trylock(&dcache_inode_lock)) {
+drop2:
 			spin_unlock(&dcache_lock);
-			goto repeat;
+			goto drop1;
+		}
+		if (parent && !spin_trylock(&parent->d_lock)) {
+			spin_unlock(&dcache_inode_lock);
+			goto drop2;
 		}
 	}
 	dentry->d_count--;
@@ -369,6 +384,7 @@ repeat:
  	spin_unlock(&dentry->d_lock);
 	if (parent)
 		spin_unlock(&parent->d_lock);
+	spin_unlock(&dcache_inode_lock);
 	spin_unlock(&dcache_lock);
 	return;
 
@@ -558,7 +574,9 @@ struct dentry *d_find_alias(struct inode *inode)
 
 	if (!list_empty(&inode->i_dentry)) {
 		spin_lock(&dcache_lock);
+		spin_lock(&dcache_inode_lock);
 		de = __d_find_alias(inode, 0);
+		spin_unlock(&dcache_inode_lock);
 		spin_unlock(&dcache_lock);
 	}
 	return de;
@@ -574,18 +592,21 @@ void d_prune_aliases(struct inode *inode)
 	struct dentry *dentry;
 restart:
 	spin_lock(&dcache_lock);
+	spin_lock(&dcache_inode_lock);
 	list_for_each_entry(dentry, &inode->i_dentry, d_alias) {
 		spin_lock(&dentry->d_lock);
 		if (!dentry->d_count) {
 			__dget_locked_dlock(dentry);
 			__d_drop(dentry);
 			spin_unlock(&dentry->d_lock);
+			spin_unlock(&dcache_inode_lock);
 			spin_unlock(&dcache_lock);
 			dput(dentry);
 			goto restart;
 		}
 		spin_unlock(&dentry->d_lock);
 	}
+	spin_unlock(&dcache_inode_lock);
 	spin_unlock(&dcache_lock);
 }
 EXPORT_SYMBOL(d_prune_aliases);
@@ -601,6 +622,7 @@ EXPORT_SYMBOL(d_prune_aliases);
 static void prune_one_dentry(struct dentry *dentry, struct dentry *parent)
 	__releases(dentry->d_lock)
 	__releases(parent->d_lock)
+	__releases(dcache_inode_lock)
 	__releases(dcache_lock)
 {
 	__d_drop(dentry);
@@ -612,6 +634,7 @@ static void prune_one_dentry(struct dentry *dentry, struct dentry *parent)
 	 */
 	while (dentry) {
 		spin_lock(&dcache_lock);
+		spin_lock(&dcache_inode_lock);
 again:
 		spin_lock(&dentry->d_lock);
 		if (IS_ROOT(dentry))
@@ -627,6 +650,7 @@ again:
 			if (parent)
 				spin_unlock(&parent->d_lock);
 			spin_unlock(&dentry->d_lock);
+			spin_unlock(&dcache_inode_lock);
 			spin_unlock(&dcache_lock);
 			return;
 		}
@@ -676,8 +700,9 @@ relock:
 		spin_unlock(&dcache_lru_lock);
 
 		prune_one_dentry(dentry, parent);
-		/* dcache_lock and dentry->d_lock dropped */
+		/* dcache_lock, dcache_inode_lock and dentry->d_lock dropped */
 		spin_lock(&dcache_lock);
+		spin_lock(&dcache_inode_lock);
 		spin_lock(&dcache_lru_lock);
 	}
 }
@@ -699,6 +724,7 @@ static void __shrink_dcache_sb(struct super_block *sb, int *count, int flags)
 	int cnt = *count;
 
 	spin_lock(&dcache_lock);
+	spin_lock(&dcache_inode_lock);
 relock:
 	spin_lock(&dcache_lru_lock);
 	while (!list_empty(&sb->s_dentry_lru)) {
@@ -737,8 +763,8 @@ relock:
 	if (!list_empty(&referenced))
 		list_splice(&referenced, &sb->s_dentry_lru);
 	spin_unlock(&dcache_lru_lock);
+	spin_unlock(&dcache_inode_lock);
 	spin_unlock(&dcache_lock);
-
 }
 
 /**
@@ -832,12 +858,14 @@ void shrink_dcache_sb(struct super_block *sb)
 	LIST_HEAD(tmp);
 
 	spin_lock(&dcache_lock);
+	spin_lock(&dcache_inode_lock);
 	spin_lock(&dcache_lru_lock);
 	while (!list_empty(&sb->s_dentry_lru)) {
 		list_splice_init(&sb->s_dentry_lru, &tmp);
 		shrink_dentry_list(&tmp);
 	}
 	spin_unlock(&dcache_lru_lock);
+	spin_unlock(&dcache_inode_lock);
 	spin_unlock(&dcache_lock);
 }
 EXPORT_SYMBOL(shrink_dcache_sb);
@@ -1255,9 +1283,11 @@ EXPORT_SYMBOL(d_alloc_name);
 /* the caller must hold dcache_lock */
 static void __d_instantiate(struct dentry *dentry, struct inode *inode)
 {
+	spin_lock(&dentry->d_lock);
 	if (inode)
 		list_add(&dentry->d_alias, &inode->i_dentry);
 	dentry->d_inode = inode;
+	spin_unlock(&dentry->d_lock);
 	fsnotify_d_instantiate(dentry, inode);
 }
 
@@ -1280,7 +1310,9 @@ void d_instantiate(struct dentry *entry, struct inode * inode)
 {
 	BUG_ON(!list_empty(&entry->d_alias));
 	spin_lock(&dcache_lock);
+	spin_lock(&dcache_inode_lock);
 	__d_instantiate(entry, inode);
+	spin_unlock(&dcache_inode_lock);
 	spin_unlock(&dcache_lock);
 	security_d_instantiate(entry, inode);
 }
@@ -1341,7 +1373,9 @@ struct dentry *d_instantiate_unique(struct dentry *entry, struct inode *inode)
 	BUG_ON(!list_empty(&entry->d_alias));
 
 	spin_lock(&dcache_lock);
+	spin_lock(&dcache_inode_lock);
 	result = __d_instantiate_unique(entry, inode);
+	spin_unlock(&dcache_inode_lock);
 	spin_unlock(&dcache_lock);
 
 	if (!result) {
@@ -1432,8 +1466,10 @@ struct dentry *d_obtain_alias(struct inode *inode)
 	tmp->d_parent = tmp; /* make sure dput doesn't croak */
 
 	spin_lock(&dcache_lock);
+	spin_lock(&dcache_inode_lock);
 	res = __d_find_alias(inode, 0);
 	if (res) {
+		spin_unlock(&dcache_inode_lock);
 		spin_unlock(&dcache_lock);
 		dput(tmp);
 		goto out_iput;
@@ -1450,6 +1486,7 @@ struct dentry *d_obtain_alias(struct inode *inode)
 	hlist_add_head(&tmp->d_hash, &inode->i_sb->s_anon);
 	spin_unlock(&dcache_hash_lock);
 	spin_unlock(&tmp->d_lock);
+	spin_unlock(&dcache_inode_lock);
 
 	spin_unlock(&dcache_lock);
 	return tmp;
@@ -1482,9 +1519,11 @@ struct dentry *d_splice_alias(struct inode *inode, struct dentry *dentry)
 
 	if (inode && S_ISDIR(inode->i_mode)) {
 		spin_lock(&dcache_lock);
+		spin_lock(&dcache_inode_lock);
 		new = __d_find_alias(inode, 1);
 		if (new) {
 			BUG_ON(!(new->d_flags & DCACHE_DISCONNECTED));
+			spin_unlock(&dcache_inode_lock);
 			spin_unlock(&dcache_lock);
 			security_d_instantiate(new, inode);
 			d_move(new, dentry);
@@ -1492,6 +1531,7 @@ struct dentry *d_splice_alias(struct inode *inode, struct dentry *dentry)
 		} else {
 			/* already taking dcache_lock, so d_add() by hand */
 			__d_instantiate(dentry, inode);
+			spin_unlock(&dcache_inode_lock);
 			spin_unlock(&dcache_lock);
 			security_d_instantiate(dentry, inode);
 			d_rehash(dentry);
@@ -1566,8 +1606,10 @@ struct dentry *d_add_ci(struct dentry *dentry, struct inode *inode,
 	 * already has a dentry.
 	 */
 	spin_lock(&dcache_lock);
+	spin_lock(&dcache_inode_lock);
 	if (!S_ISDIR(inode->i_mode) || list_empty(&inode->i_dentry)) {
 		__d_instantiate(found, inode);
+		spin_unlock(&dcache_inode_lock);
 		spin_unlock(&dcache_lock);
 		security_d_instantiate(found, inode);
 		return found;
@@ -1579,6 +1621,7 @@ struct dentry *d_add_ci(struct dentry *dentry, struct inode *inode,
 	 */
 	new = list_entry(inode->i_dentry.next, struct dentry, d_alias);
 	dget_locked(new);
+	spin_unlock(&dcache_inode_lock);
 	spin_unlock(&dcache_lock);
 	security_d_instantiate(found, inode);
 	d_move(new, found);
@@ -1797,6 +1840,7 @@ void d_delete(struct dentry * dentry)
 	 * Are we the only user?
 	 */
 	spin_lock(&dcache_lock);
+	spin_lock(&dcache_inode_lock);
 	spin_lock(&dentry->d_lock);
 	isdir = S_ISDIR(dentry->d_inode->i_mode);
 	if (dentry->d_count == 1) {
@@ -1810,6 +1854,7 @@ void d_delete(struct dentry * dentry)
 		__d_drop(dentry);
 
 	spin_unlock(&dentry->d_lock);
+	spin_unlock(&dcache_inode_lock);
 	spin_unlock(&dcache_lock);
 
 	fsnotify_nameremove(dentry, isdir);
@@ -2067,6 +2112,7 @@ struct dentry *d_ancestor(struct dentry *p1, struct dentry *p2)
  */
 static struct dentry *__d_unalias(struct dentry *dentry, struct dentry *alias)
 	__releases(dcache_lock)
+	__releases(dcache_inode_lock)
 {
 	struct mutex *m1 = NULL, *m2 = NULL;
 	struct dentry *ret;
@@ -2092,6 +2138,7 @@ out_unalias:
 	d_move_locked(alias, dentry);
 	ret = alias;
 out_err:
+	spin_unlock(&dcache_inode_lock);
 	spin_unlock(&dcache_lock);
 	if (m2)
 		mutex_unlock(m2);
@@ -2153,6 +2200,7 @@ struct dentry *d_materialise_unique(struct dentry *dentry, struct inode *inode)
 	BUG_ON(!d_unhashed(dentry));
 
 	spin_lock(&dcache_lock);
+	spin_lock(&dcache_inode_lock);
 
 	if (!inode) {
 		actual = dentry;
@@ -2196,6 +2244,7 @@ found:
 	_d_rehash(actual);
 	spin_unlock(&dcache_hash_lock);
 	spin_unlock(&actual->d_lock);
+	spin_unlock(&dcache_inode_lock);
 	spin_unlock(&dcache_lock);
 out_nolock:
 	if (actual == dentry) {
@@ -2207,6 +2256,7 @@ out_nolock:
 	return actual;
 
 shouldnt_be_hashed:
+	spin_unlock(&dcache_inode_lock);
 	spin_unlock(&dcache_lock);
 	BUG();
 }
diff --git a/fs/exportfs/expfs.c b/fs/exportfs/expfs.c
index 51b304056f10..84b8c460a781 100644
--- a/fs/exportfs/expfs.c
+++ b/fs/exportfs/expfs.c
@@ -48,8 +48,10 @@ find_acceptable_alias(struct dentry *result,
 		return result;
 
 	spin_lock(&dcache_lock);
+	spin_lock(&dcache_inode_lock);
 	list_for_each_entry(dentry, &result->d_inode->i_dentry, d_alias) {
 		dget_locked(dentry);
+		spin_unlock(&dcache_inode_lock);
 		spin_unlock(&dcache_lock);
 		if (toput)
 			dput(toput);
@@ -58,8 +60,10 @@ find_acceptable_alias(struct dentry *result,
 			return dentry;
 		}
 		spin_lock(&dcache_lock);
+		spin_lock(&dcache_inode_lock);
 		toput = dentry;
 	}
+	spin_unlock(&dcache_inode_lock);
 	spin_unlock(&dcache_lock);
 
 	if (toput)
diff --git a/fs/nfs/getroot.c b/fs/nfs/getroot.c
index ac7b814ce162..850f67d5f0ac 100644
--- a/fs/nfs/getroot.c
+++ b/fs/nfs/getroot.c
@@ -64,7 +64,11 @@ static int nfs_superblock_set_dummy_root(struct super_block *sb, struct inode *i
 		 * Oops, since the test for IS_ROOT() will fail.
 		 */
 		spin_lock(&dcache_lock);
+		spin_lock(&dcache_inode_lock);
+		spin_lock(&sb->s_root->d_lock);
 		list_del_init(&sb->s_root->d_alias);
+		spin_unlock(&sb->s_root->d_lock);
+		spin_unlock(&dcache_inode_lock);
 		spin_unlock(&dcache_lock);
 	}
 	return 0;
diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c
index aa4f25e803f6..ae769fc9b66c 100644
--- a/fs/notify/fsnotify.c
+++ b/fs/notify/fsnotify.c
@@ -60,6 +60,7 @@ void __fsnotify_update_child_dentry_flags(struct inode *inode)
 	watched = fsnotify_inode_watches_children(inode);
 
 	spin_lock(&dcache_lock);
+	spin_lock(&dcache_inode_lock);
 	/* run all of the dentries associated with this inode.  Since this is a
 	 * directory, there damn well better only be one item on this list */
 	list_for_each_entry(alias, &inode->i_dentry, d_alias) {
@@ -82,6 +83,7 @@ void __fsnotify_update_child_dentry_flags(struct inode *inode)
 		}
 		spin_unlock(&alias->d_lock);
 	}
+	spin_unlock(&dcache_inode_lock);
 	spin_unlock(&dcache_lock);
 }
 
diff --git a/fs/ocfs2/dcache.c b/fs/ocfs2/dcache.c
index 35e5f5a9ef59..c31b5c647ac7 100644
--- a/fs/ocfs2/dcache.c
+++ b/fs/ocfs2/dcache.c
@@ -170,7 +170,7 @@ struct dentry *ocfs2_find_local_alias(struct inode *inode,
 	struct dentry *dentry = NULL;
 
 	spin_lock(&dcache_lock);
-
+	spin_lock(&dcache_inode_lock);
 	list_for_each(p, &inode->i_dentry) {
 		dentry = list_entry(p, struct dentry, d_alias);
 
@@ -188,6 +188,7 @@ struct dentry *ocfs2_find_local_alias(struct inode *inode,
 		dentry = NULL;
 	}
 
+	spin_unlock(&dcache_inode_lock);
 	spin_unlock(&dcache_lock);
 
 	return dentry;
diff --git a/include/linux/dcache.h b/include/linux/dcache.h
index ddf4f55624f7..bda5ec0b077d 100644
--- a/include/linux/dcache.h
+++ b/include/linux/dcache.h
@@ -181,6 +181,7 @@ struct dentry_operations {
 
 #define DCACHE_CANT_MOUNT	0x0100
 
+extern spinlock_t dcache_inode_lock;
 extern spinlock_t dcache_lock;
 extern seqlock_t rename_lock;
 
-- 
cgit v1.2.3-71-gd317


From 949854d02455080d20cd3e1db28a3a18daf7599d Mon Sep 17 00:00:00 2001
From: Nick Piggin <npiggin@kernel.dk>
Date: Fri, 7 Jan 2011 17:49:37 +1100
Subject: fs: Use rename lock and RCU for multi-step operations

The remaining usages for dcache_lock is to allow atomic, multi-step read-side
operations over the directory tree by excluding modifications to the tree.
Also, to walk in the leaf->root direction in the tree where we don't have
a natural d_lock ordering.

This could be accomplished by taking every d_lock, but this would mean a
huge number of locks and actually gets very tricky.

Solve this instead by using the rename seqlock for multi-step read-side
operations, retry in case of a rename so we don't walk up the wrong parent.
Concurrent dentry insertions are not serialised against.  Concurrent deletes
are tricky when walking up the directory: our parent might have been deleted
when dropping locks so also need to check and retry for that.

We can also use the rename lock in cases where livelock is a worry (and it
is introduced in subsequent patch).

Signed-off-by: Nick Piggin <npiggin@kernel.dk>
---
 drivers/staging/pohmelfs/path_entry.c |  15 +++-
 fs/autofs4/waitq.c                    |  18 ++++-
 fs/dcache.c                           | 134 ++++++++++++++++++++++++++++------
 fs/nfs/namespace.c                    |  14 +++-
 include/linux/dcache.h                |   1 +
 5 files changed, 152 insertions(+), 30 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/staging/pohmelfs/path_entry.c b/drivers/staging/pohmelfs/path_entry.c
index 8ec83d2dffb7..bbe42f42ca8f 100644
--- a/drivers/staging/pohmelfs/path_entry.c
+++ b/drivers/staging/pohmelfs/path_entry.c
@@ -83,10 +83,11 @@ out:
 int pohmelfs_path_length(struct pohmelfs_inode *pi)
 {
 	struct dentry *d, *root, *first;
-	int len = 1; /* Root slash */
+	int len;
+	unsigned seq;
 
-	first = d = d_find_alias(&pi->vfs_inode);
-	if (!d) {
+	first = d_find_alias(&pi->vfs_inode);
+	if (!first) {
 		dprintk("%s: ino: %llu, mode: %o.\n", __func__, pi->ino, pi->vfs_inode.i_mode);
 		return -ENOENT;
 	}
@@ -95,6 +96,11 @@ int pohmelfs_path_length(struct pohmelfs_inode *pi)
 	root = dget(current->fs->root.dentry);
 	spin_unlock(&current->fs->lock);
 
+rename_retry:
+	len = 1; /* Root slash */
+	d = first;
+	seq = read_seqbegin(&rename_lock);
+	rcu_read_lock();
 	spin_lock(&dcache_lock);
 
 	if (!IS_ROOT(d) && d_unhashed(d))
@@ -105,6 +111,9 @@ int pohmelfs_path_length(struct pohmelfs_inode *pi)
 		d = d->d_parent;
 	}
 	spin_unlock(&dcache_lock);
+	rcu_read_unlock();
+	if (read_seqretry(&rename_lock, seq))
+		goto rename_retry;
 
 	dput(root);
 	dput(first);
diff --git a/fs/autofs4/waitq.c b/fs/autofs4/waitq.c
index 2341375386f8..4be8f778a418 100644
--- a/fs/autofs4/waitq.c
+++ b/fs/autofs4/waitq.c
@@ -186,16 +186,25 @@ static int autofs4_getpath(struct autofs_sb_info *sbi,
 {
 	struct dentry *root = sbi->sb->s_root;
 	struct dentry *tmp;
-	char *buf = *name;
+	char *buf;
 	char *p;
-	int len = 0;
-
+	int len;
+	unsigned seq;
+
+rename_retry:
+	buf = *name;
+	len = 0;
+	seq = read_seqbegin(&rename_lock);
+	rcu_read_lock();
 	spin_lock(&dcache_lock);
 	for (tmp = dentry ; tmp != root ; tmp = tmp->d_parent)
 		len += tmp->d_name.len + 1;
 
 	if (!len || --len > NAME_MAX) {
 		spin_unlock(&dcache_lock);
+		rcu_read_unlock();
+		if (read_seqretry(&rename_lock, seq))
+			goto rename_retry;
 		return 0;
 	}
 
@@ -209,6 +218,9 @@ static int autofs4_getpath(struct autofs_sb_info *sbi,
 		strncpy(p, tmp->d_name.name, tmp->d_name.len);
 	}
 	spin_unlock(&dcache_lock);
+	rcu_read_unlock();
+	if (read_seqretry(&rename_lock, seq))
+		goto rename_retry;
 
 	return len;
 }
diff --git a/fs/dcache.c b/fs/dcache.c
index a09f0771fd27..a9bc4ecc21e1 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -80,6 +80,7 @@ static __cacheline_aligned_in_smp DEFINE_SPINLOCK(dcache_lru_lock);
 __cacheline_aligned_in_smp DEFINE_SPINLOCK(dcache_lock);
 __cacheline_aligned_in_smp DEFINE_SEQLOCK(rename_lock);
 
+EXPORT_SYMBOL(rename_lock);
 EXPORT_SYMBOL(dcache_inode_lock);
 EXPORT_SYMBOL(dcache_lock);
 
@@ -243,6 +244,7 @@ static struct dentry *d_kill(struct dentry *dentry, struct dentry *parent)
 	__releases(dcache_inode_lock)
 	__releases(dcache_lock)
 {
+	dentry->d_parent = NULL;
 	list_del(&dentry->d_u.d_child);
 	if (parent)
 		spin_unlock(&parent->d_lock);
@@ -1017,11 +1019,15 @@ void shrink_dcache_for_umount(struct super_block *sb)
  * Return true if the parent or its subdirectories contain
  * a mount point
  */
- 
 int have_submounts(struct dentry *parent)
 {
-	struct dentry *this_parent = parent;
+	struct dentry *this_parent;
 	struct list_head *next;
+	unsigned seq;
+
+rename_retry:
+	this_parent = parent;
+	seq = read_seqbegin(&rename_lock);
 
 	spin_lock(&dcache_lock);
 	if (d_mountpoint(parent))
@@ -1055,17 +1061,37 @@ resume:
 	 * All done at this level ... ascend and resume the search.
 	 */
 	if (this_parent != parent) {
-		next = this_parent->d_u.d_child.next;
+		struct dentry *tmp;
+		struct dentry *child;
+
+		tmp = this_parent->d_parent;
+		rcu_read_lock();
 		spin_unlock(&this_parent->d_lock);
-		this_parent = this_parent->d_parent;
+		child = this_parent;
+		this_parent = tmp;
 		spin_lock(&this_parent->d_lock);
+		/* might go back up the wrong parent if we have had a rename
+		 * or deletion */
+		if (this_parent != child->d_parent ||
+				read_seqretry(&rename_lock, seq)) {
+			spin_unlock(&this_parent->d_lock);
+			spin_unlock(&dcache_lock);
+			rcu_read_unlock();
+			goto rename_retry;
+		}
+		rcu_read_unlock();
+		next = child->d_u.d_child.next;
 		goto resume;
 	}
 	spin_unlock(&this_parent->d_lock);
 	spin_unlock(&dcache_lock);
+	if (read_seqretry(&rename_lock, seq))
+		goto rename_retry;
 	return 0; /* No mount points found in tree */
 positive:
 	spin_unlock(&dcache_lock);
+	if (read_seqretry(&rename_lock, seq))
+		goto rename_retry;
 	return 1;
 }
 EXPORT_SYMBOL(have_submounts);
@@ -1086,10 +1112,15 @@ EXPORT_SYMBOL(have_submounts);
  */
 static int select_parent(struct dentry * parent)
 {
-	struct dentry *this_parent = parent;
+	struct dentry *this_parent;
 	struct list_head *next;
+	unsigned seq;
 	int found = 0;
 
+rename_retry:
+	this_parent = parent;
+	seq = read_seqbegin(&rename_lock);
+
 	spin_lock(&dcache_lock);
 	spin_lock(&this_parent->d_lock);
 repeat:
@@ -1099,7 +1130,6 @@ resume:
 		struct list_head *tmp = next;
 		struct dentry *dentry = list_entry(tmp, struct dentry, d_u.d_child);
 		next = tmp->next;
-		BUG_ON(this_parent == dentry);
 
 		spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
 
@@ -1142,17 +1172,32 @@ resume:
 	 */
 	if (this_parent != parent) {
 		struct dentry *tmp;
-		next = this_parent->d_u.d_child.next;
+		struct dentry *child;
+
 		tmp = this_parent->d_parent;
+		rcu_read_lock();
 		spin_unlock(&this_parent->d_lock);
-		BUG_ON(tmp == this_parent);
+		child = this_parent;
 		this_parent = tmp;
 		spin_lock(&this_parent->d_lock);
+		/* might go back up the wrong parent if we have had a rename
+		 * or deletion */
+		if (this_parent != child->d_parent ||
+				read_seqretry(&rename_lock, seq)) {
+			spin_unlock(&this_parent->d_lock);
+			spin_unlock(&dcache_lock);
+			rcu_read_unlock();
+			goto rename_retry;
+		}
+		rcu_read_unlock();
+		next = child->d_u.d_child.next;
 		goto resume;
 	}
 out:
 	spin_unlock(&this_parent->d_lock);
 	spin_unlock(&dcache_lock);
+	if (read_seqretry(&rename_lock, seq))
+		goto rename_retry;
 	return found;
 }
 
@@ -1654,7 +1699,7 @@ EXPORT_SYMBOL(d_add_ci);
 struct dentry * d_lookup(struct dentry * parent, struct qstr * name)
 {
 	struct dentry * dentry = NULL;
-	unsigned long seq;
+	unsigned seq;
 
         do {
                 seq = read_seqbegin(&rename_lock);
@@ -2290,7 +2335,7 @@ static int prepend_name(char **buffer, int *buflen, struct qstr *name)
  * @buffer: pointer to the end of the buffer
  * @buflen: pointer to buffer length
  *
- * Caller holds the dcache_lock.
+ * Caller holds the rename_lock.
  *
  * If path is not reachable from the supplied root, then the value of
  * root is changed (without modifying refcounts).
@@ -2377,7 +2422,9 @@ char *__d_path(const struct path *path, struct path *root,
 
 	prepend(&res, &buflen, "\0", 1);
 	spin_lock(&dcache_lock);
+	write_seqlock(&rename_lock);
 	error = prepend_path(path, root, &res, &buflen);
+	write_sequnlock(&rename_lock);
 	spin_unlock(&dcache_lock);
 
 	if (error)
@@ -2441,10 +2488,12 @@ char *d_path(const struct path *path, char *buf, int buflen)
 
 	get_fs_root(current->fs, &root);
 	spin_lock(&dcache_lock);
+	write_seqlock(&rename_lock);
 	tmp = root;
 	error = path_with_deleted(path, &tmp, &res, &buflen);
 	if (error)
 		res = ERR_PTR(error);
+	write_sequnlock(&rename_lock);
 	spin_unlock(&dcache_lock);
 	path_put(&root);
 	return res;
@@ -2472,10 +2521,12 @@ char *d_path_with_unreachable(const struct path *path, char *buf, int buflen)
 
 	get_fs_root(current->fs, &root);
 	spin_lock(&dcache_lock);
+	write_seqlock(&rename_lock);
 	tmp = root;
 	error = path_with_deleted(path, &tmp, &res, &buflen);
 	if (!error && !path_equal(&tmp, &root))
 		error = prepend_unreachable(&res, &buflen);
+	write_sequnlock(&rename_lock);
 	spin_unlock(&dcache_lock);
 	path_put(&root);
 	if (error)
@@ -2544,7 +2595,9 @@ char *dentry_path_raw(struct dentry *dentry, char *buf, int buflen)
 	char *retval;
 
 	spin_lock(&dcache_lock);
+	write_seqlock(&rename_lock);
 	retval = __dentry_path(dentry, buf, buflen);
+	write_sequnlock(&rename_lock);
 	spin_unlock(&dcache_lock);
 
 	return retval;
@@ -2557,6 +2610,7 @@ char *dentry_path(struct dentry *dentry, char *buf, int buflen)
 	char *retval;
 
 	spin_lock(&dcache_lock);
+	write_seqlock(&rename_lock);
 	if (d_unlinked(dentry)) {
 		p = buf + buflen;
 		if (prepend(&p, &buflen, "//deleted", 10) != 0)
@@ -2564,6 +2618,7 @@ char *dentry_path(struct dentry *dentry, char *buf, int buflen)
 		buflen++;
 	}
 	retval = __dentry_path(dentry, buf, buflen);
+	write_sequnlock(&rename_lock);
 	spin_unlock(&dcache_lock);
 	if (!IS_ERR(retval) && p)
 		*p = '/';	/* restore '/' overriden with '\0' */
@@ -2604,6 +2659,7 @@ SYSCALL_DEFINE2(getcwd, char __user *, buf, unsigned long, size)
 
 	error = -ENOENT;
 	spin_lock(&dcache_lock);
+	write_seqlock(&rename_lock);
 	if (!d_unlinked(pwd.dentry)) {
 		unsigned long len;
 		struct path tmp = root;
@@ -2612,6 +2668,7 @@ SYSCALL_DEFINE2(getcwd, char __user *, buf, unsigned long, size)
 
 		prepend(&cwd, &buflen, "\0", 1);
 		error = prepend_path(&pwd, &tmp, &cwd, &buflen);
+		write_sequnlock(&rename_lock);
 		spin_unlock(&dcache_lock);
 
 		if (error)
@@ -2631,8 +2688,10 @@ SYSCALL_DEFINE2(getcwd, char __user *, buf, unsigned long, size)
 			if (copy_to_user(buf, cwd, len))
 				error = -EFAULT;
 		}
-	} else
+	} else {
+		write_sequnlock(&rename_lock);
 		spin_unlock(&dcache_lock);
+	}
 
 out:
 	path_put(&pwd);
@@ -2660,25 +2719,25 @@ out:
 int is_subdir(struct dentry *new_dentry, struct dentry *old_dentry)
 {
 	int result;
-	unsigned long seq;
+	unsigned seq;
 
 	if (new_dentry == old_dentry)
 		return 1;
 
-	/*
-	 * Need rcu_readlock to protect against the d_parent trashing
-	 * due to d_move
-	 */
-	rcu_read_lock();
 	do {
 		/* for restarting inner loop in case of seq retry */
 		seq = read_seqbegin(&rename_lock);
+		/*
+		 * Need rcu_readlock to protect against the d_parent trashing
+		 * due to d_move
+		 */
+		rcu_read_lock();
 		if (d_ancestor(old_dentry, new_dentry))
 			result = 1;
 		else
 			result = 0;
+		rcu_read_unlock();
 	} while (read_seqretry(&rename_lock, seq));
-	rcu_read_unlock();
 
 	return result;
 }
@@ -2710,9 +2769,13 @@ EXPORT_SYMBOL(path_is_under);
 
 void d_genocide(struct dentry *root)
 {
-	struct dentry *this_parent = root;
+	struct dentry *this_parent;
 	struct list_head *next;
+	unsigned seq;
 
+rename_retry:
+	this_parent = root;
+	seq = read_seqbegin(&rename_lock);
 	spin_lock(&dcache_lock);
 	spin_lock(&this_parent->d_lock);
 repeat:
@@ -2722,6 +2785,7 @@ resume:
 		struct list_head *tmp = next;
 		struct dentry *dentry = list_entry(tmp, struct dentry, d_u.d_child);
 		next = tmp->next;
+
 		spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
 		if (d_unhashed(dentry) || !dentry->d_inode) {
 			spin_unlock(&dentry->d_lock);
@@ -2734,19 +2798,43 @@ resume:
 			spin_acquire(&this_parent->d_lock.dep_map, 0, 1, _RET_IP_);
 			goto repeat;
 		}
-		dentry->d_count--;
+		if (!(dentry->d_flags & DCACHE_GENOCIDE)) {
+			dentry->d_flags |= DCACHE_GENOCIDE;
+			dentry->d_count--;
+		}
 		spin_unlock(&dentry->d_lock);
 	}
 	if (this_parent != root) {
-		next = this_parent->d_u.d_child.next;
-		this_parent->d_count--;
+		struct dentry *tmp;
+		struct dentry *child;
+
+		tmp = this_parent->d_parent;
+		if (!(this_parent->d_flags & DCACHE_GENOCIDE)) {
+			this_parent->d_flags |= DCACHE_GENOCIDE;
+			this_parent->d_count--;
+		}
+		rcu_read_lock();
 		spin_unlock(&this_parent->d_lock);
-		this_parent = this_parent->d_parent;
+		child = this_parent;
+		this_parent = tmp;
 		spin_lock(&this_parent->d_lock);
+		/* might go back up the wrong parent if we have had a rename
+		 * or deletion */
+		if (this_parent != child->d_parent ||
+				read_seqretry(&rename_lock, seq)) {
+			spin_unlock(&this_parent->d_lock);
+			spin_unlock(&dcache_lock);
+			rcu_read_unlock();
+			goto rename_retry;
+		}
+		rcu_read_unlock();
+		next = child->d_u.d_child.next;
 		goto resume;
 	}
 	spin_unlock(&this_parent->d_lock);
 	spin_unlock(&dcache_lock);
+	if (read_seqretry(&rename_lock, seq))
+		goto rename_retry;
 }
 
 /**
diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c
index db6aa3673cf3..78c0ebb0b07c 100644
--- a/fs/nfs/namespace.c
+++ b/fs/nfs/namespace.c
@@ -49,11 +49,17 @@ char *nfs_path(const char *base,
 	       const struct dentry *dentry,
 	       char *buffer, ssize_t buflen)
 {
-	char *end = buffer+buflen;
+	char *end;
 	int namelen;
+	unsigned seq;
 
+rename_retry:
+	end = buffer+buflen;
 	*--end = '\0';
 	buflen--;
+
+	seq = read_seqbegin(&rename_lock);
+	rcu_read_lock();
 	spin_lock(&dcache_lock);
 	while (!IS_ROOT(dentry) && dentry != droot) {
 		namelen = dentry->d_name.len;
@@ -66,6 +72,9 @@ char *nfs_path(const char *base,
 		dentry = dentry->d_parent;
 	}
 	spin_unlock(&dcache_lock);
+	rcu_read_unlock();
+	if (read_seqretry(&rename_lock, seq))
+		goto rename_retry;
 	if (*end != '/') {
 		if (--buflen < 0)
 			goto Elong;
@@ -83,6 +92,9 @@ char *nfs_path(const char *base,
 	return end;
 Elong_unlock:
 	spin_unlock(&dcache_lock);
+	rcu_read_unlock();
+	if (read_seqretry(&rename_lock, seq))
+		goto rename_retry;
 Elong:
 	return ERR_PTR(-ENAMETOOLONG);
 }
diff --git a/include/linux/dcache.h b/include/linux/dcache.h
index bda5ec0b077d..c963ebada922 100644
--- a/include/linux/dcache.h
+++ b/include/linux/dcache.h
@@ -180,6 +180,7 @@ struct dentry_operations {
 #define DCACHE_FSNOTIFY_PARENT_WATCHED	0x0080 /* Parent inode is watched by some fsnotify listener */
 
 #define DCACHE_CANT_MOUNT	0x0100
+#define DCACHE_GENOCIDE		0x0200
 
 extern spinlock_t dcache_inode_lock;
 extern spinlock_t dcache_lock;
-- 
cgit v1.2.3-71-gd317


From b5c84bf6f6fa3a7dfdcb556023a62953574b60ee Mon Sep 17 00:00:00 2001
From: Nick Piggin <npiggin@kernel.dk>
Date: Fri, 7 Jan 2011 17:49:38 +1100
Subject: fs: dcache remove dcache_lock

dcache_lock no longer protects anything. remove it.

Signed-off-by: Nick Piggin <npiggin@kernel.dk>
---
 Documentation/filesystems/Locking            |  16 +--
 Documentation/filesystems/dentry-locking.txt |  40 ++++---
 Documentation/filesystems/porting            |   8 +-
 arch/powerpc/platforms/cell/spufs/inode.c    |   5 +-
 drivers/infiniband/hw/ipath/ipath_fs.c       |   6 +-
 drivers/infiniband/hw/qib/qib_fs.c           |   3 -
 drivers/staging/pohmelfs/path_entry.c        |   2 -
 drivers/staging/smbfs/cache.c                |   4 -
 drivers/usb/core/inode.c                     |   3 -
 fs/9p/vfs_inode.c                            |   2 -
 fs/affs/amigaffs.c                           |   2 -
 fs/autofs4/autofs_i.h                        |   3 +
 fs/autofs4/expire.c                          |  10 +-
 fs/autofs4/root.c                            |  44 ++++----
 fs/autofs4/waitq.c                           |   7 +-
 fs/ceph/dir.c                                |   6 +-
 fs/ceph/inode.c                              |   4 -
 fs/cifs/inode.c                              |   3 -
 fs/coda/cache.c                              |   2 -
 fs/configfs/configfs_internal.h              |   2 -
 fs/configfs/inode.c                          |   6 +-
 fs/dcache.c                                  | 160 ++++-----------------------
 fs/exportfs/expfs.c                          |   4 -
 fs/libfs.c                                   |   8 --
 fs/namei.c                                   |   9 +-
 fs/ncpfs/dir.c                               |   3 -
 fs/ncpfs/ncplib_kernel.h                     |   4 -
 fs/nfs/dir.c                                 |   3 -
 fs/nfs/getroot.c                             |   2 -
 fs/nfs/namespace.c                           |   3 -
 fs/notify/fsnotify.c                         |   2 -
 fs/ocfs2/dcache.c                            |   2 -
 include/linux/dcache.h                       |   5 +-
 include/linux/fs.h                           |   6 +-
 include/linux/fsnotify.h                     |   2 -
 include/linux/fsnotify_backend.h             |  11 +-
 include/linux/namei.h                        |   1 -
 kernel/cgroup.c                              |   6 -
 mm/filemap.c                                 |   3 -
 security/selinux/selinuxfs.c                 |   4 -
 40 files changed, 109 insertions(+), 307 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking
index a15ee207b449..bdad6414dfa0 100644
--- a/Documentation/filesystems/Locking
+++ b/Documentation/filesystems/Locking
@@ -21,14 +21,14 @@ prototypes:
 	char *(*d_dname)((struct dentry *dentry, char *buffer, int buflen);
 
 locking rules:
-		dcache_lock	rename_lock	->d_lock	may block
-d_revalidate:	no		no		no		yes
-d_hash		no		no		no		no
-d_compare:	no		yes		no		no 
-d_delete:	yes		no		yes		no
-d_release:	no		no		no		yes
-d_iput:		no		no		no		yes
-d_dname:	no		no		no		no
+		rename_lock	->d_lock	may block
+d_revalidate:	no		no		yes
+d_hash		no		no		no
+d_compare:	yes		no		no
+d_delete:	no		yes		no
+d_release:	no		no		yes
+d_iput:		no		no		yes
+d_dname:	no		no		no
 
 --------------------------- inode_operations --------------------------- 
 prototypes:
diff --git a/Documentation/filesystems/dentry-locking.txt b/Documentation/filesystems/dentry-locking.txt
index 79334ed5daa7..30b6a40f5650 100644
--- a/Documentation/filesystems/dentry-locking.txt
+++ b/Documentation/filesystems/dentry-locking.txt
@@ -31,6 +31,7 @@ significant change is the way d_lookup traverses the hash chain, it
 doesn't acquire the dcache_lock for this and rely on RCU to ensure
 that the dentry has not been *freed*.
 
+dcache_lock no longer exists, dentry locking is explained in fs/dcache.c
 
 Dcache locking details
 ======================
@@ -50,14 +51,12 @@ Safe lock-free look-up of dcache hash table
 
 Dcache is a complex data structure with the hash table entries also
 linked together in other lists. In 2.4 kernel, dcache_lock protected
-all the lists. We applied RCU only on hash chain walking. The rest of
-the lists are still protected by dcache_lock.  Some of the important
-changes are :
+all the lists. RCU dentry hash walking works like this:
 
 1. The deletion from hash chain is done using hlist_del_rcu() macro
    which doesn't initialize next pointer of the deleted dentry and
    this allows us to walk safely lock-free while a deletion is
-   happening.
+   happening. This is a standard hlist_rcu iteration.
 
 2. Insertion of a dentry into the hash table is done using
    hlist_add_head_rcu() which take care of ordering the writes - the
@@ -66,19 +65,18 @@ changes are :
    which has since been replaced by hlist_for_each_entry_rcu(), while
    walking the hash chain. The only requirement is that all
    initialization to the dentry must be done before
-   hlist_add_head_rcu() since we don't have dcache_lock protection
-   while traversing the hash chain. This isn't different from the
-   existing code.
-
-3. The dentry looked up without holding dcache_lock by cannot be
-   returned for walking if it is unhashed. It then may have a NULL
-   d_inode or other bogosity since RCU doesn't protect the other
-   fields in the dentry. We therefore use a flag DCACHE_UNHASHED to
-   indicate unhashed dentries and use this in conjunction with a
-   per-dentry lock (d_lock). Once looked up without the dcache_lock,
-   we acquire the per-dentry lock (d_lock) and check if the dentry is
-   unhashed. If so, the look-up is failed. If not, the reference count
-   of the dentry is increased and the dentry is returned.
+   hlist_add_head_rcu() since we don't have lock protection
+   while traversing the hash chain.
+
+3. The dentry looked up without holding locks cannot be returned for
+   walking if it is unhashed. It then may have a NULL d_inode or other
+   bogosity since RCU doesn't protect the other fields in the dentry. We
+   therefore use a flag DCACHE_UNHASHED to indicate unhashed dentries
+   and use this in conjunction with a per-dentry lock (d_lock). Once
+   looked up without locks, we acquire the per-dentry lock (d_lock) and
+   check if the dentry is unhashed. If so, the look-up is failed. If not,
+   the reference count of the dentry is increased and the dentry is
+   returned.
 
 4. Once a dentry is looked up, it must be ensured during the path walk
    for that component it doesn't go away. In pre-2.5.10 code, this was
@@ -86,10 +84,10 @@ changes are :
    In some sense, dcache_rcu path walking looks like the pre-2.5.10
    version.
 
-5. All dentry hash chain updates must take the dcache_lock as well as
-   the per-dentry lock in that order. dput() does this to ensure that
-   a dentry that has just been looked up in another CPU doesn't get
-   deleted before dget() can be done on it.
+5. All dentry hash chain updates must take the per-dentry lock (see
+   fs/dcache.c). This excludes dput() to ensure that a dentry that has
+   been looked up concurrently does not get deleted before dget() can
+   take a ref.
 
 6. There are several ways to do reference counting of RCU protected
    objects. One such example is in ipv4 route cache where deferred
diff --git a/Documentation/filesystems/porting b/Documentation/filesystems/porting
index 9fd31940a8ef..1eb76959d096 100644
--- a/Documentation/filesystems/porting
+++ b/Documentation/filesystems/porting
@@ -216,7 +216,6 @@ had ->revalidate()) add calls in ->follow_link()/->readlink().
 ->d_parent changes are not protected by BKL anymore.  Read access is safe
 if at least one of the following is true:
 	* filesystem has no cross-directory rename()
-	* dcache_lock is held
 	* we know that parent had been locked (e.g. we are looking at
 ->d_parent of ->lookup() argument).
 	* we are called from ->rename().
@@ -340,3 +339,10 @@ look at examples of other filesystems) for guidance.
 	.d_hash() calling convention and locking rules are significantly
 changed. Read updated documentation in Documentation/filesystems/vfs.txt (and
 look at examples of other filesystems) for guidance.
+
+---
+[mandatory]
+	dcache_lock is gone, replaced by fine grained locks. See fs/dcache.c
+for details of what locks to replace dcache_lock with in order to protect
+particular things. Most of the time, a filesystem only needs ->d_lock, which
+protects *all* the dcache state of a given dentry.
diff --git a/arch/powerpc/platforms/cell/spufs/inode.c b/arch/powerpc/platforms/cell/spufs/inode.c
index 5aef1a7f5e4b..2662b50ea8d4 100644
--- a/arch/powerpc/platforms/cell/spufs/inode.c
+++ b/arch/powerpc/platforms/cell/spufs/inode.c
@@ -159,21 +159,18 @@ static void spufs_prune_dir(struct dentry *dir)
 
 	mutex_lock(&dir->d_inode->i_mutex);
 	list_for_each_entry_safe(dentry, tmp, &dir->d_subdirs, d_u.d_child) {
-		spin_lock(&dcache_lock);
 		spin_lock(&dentry->d_lock);
 		if (!(d_unhashed(dentry)) && dentry->d_inode) {
 			dget_locked_dlock(dentry);
 			__d_drop(dentry);
 			spin_unlock(&dentry->d_lock);
 			simple_unlink(dir->d_inode, dentry);
-			/* XXX: what is dcache_lock protecting here? Other
+			/* XXX: what was dcache_lock protecting here? Other
 			 * filesystems (IB, configfs) release dcache_lock
 			 * before unlink */
-			spin_unlock(&dcache_lock);
 			dput(dentry);
 		} else {
 			spin_unlock(&dentry->d_lock);
-			spin_unlock(&dcache_lock);
 		}
 	}
 	shrink_dcache_parent(dir);
diff --git a/drivers/infiniband/hw/ipath/ipath_fs.c b/drivers/infiniband/hw/ipath/ipath_fs.c
index 18aee04a8411..925e88227ded 100644
--- a/drivers/infiniband/hw/ipath/ipath_fs.c
+++ b/drivers/infiniband/hw/ipath/ipath_fs.c
@@ -277,18 +277,14 @@ static int remove_file(struct dentry *parent, char *name)
 		goto bail;
 	}
 
-	spin_lock(&dcache_lock);
 	spin_lock(&tmp->d_lock);
 	if (!(d_unhashed(tmp) && tmp->d_inode)) {
 		dget_locked_dlock(tmp);
 		__d_drop(tmp);
 		spin_unlock(&tmp->d_lock);
-		spin_unlock(&dcache_lock);
 		simple_unlink(parent->d_inode, tmp);
-	} else {
+	} else
 		spin_unlock(&tmp->d_lock);
-		spin_unlock(&dcache_lock);
-	}
 
 	ret = 0;
 bail:
diff --git a/drivers/infiniband/hw/qib/qib_fs.c b/drivers/infiniband/hw/qib/qib_fs.c
index fe4b242f0094..49af4a6538ba 100644
--- a/drivers/infiniband/hw/qib/qib_fs.c
+++ b/drivers/infiniband/hw/qib/qib_fs.c
@@ -453,17 +453,14 @@ static int remove_file(struct dentry *parent, char *name)
 		goto bail;
 	}
 
-	spin_lock(&dcache_lock);
 	spin_lock(&tmp->d_lock);
 	if (!(d_unhashed(tmp) && tmp->d_inode)) {
 		dget_locked_dlock(tmp);
 		__d_drop(tmp);
 		spin_unlock(&tmp->d_lock);
-		spin_unlock(&dcache_lock);
 		simple_unlink(parent->d_inode, tmp);
 	} else {
 		spin_unlock(&tmp->d_lock);
-		spin_unlock(&dcache_lock);
 	}
 
 	ret = 0;
diff --git a/drivers/staging/pohmelfs/path_entry.c b/drivers/staging/pohmelfs/path_entry.c
index bbe42f42ca8f..400a9fc386ad 100644
--- a/drivers/staging/pohmelfs/path_entry.c
+++ b/drivers/staging/pohmelfs/path_entry.c
@@ -101,7 +101,6 @@ rename_retry:
 	d = first;
 	seq = read_seqbegin(&rename_lock);
 	rcu_read_lock();
-	spin_lock(&dcache_lock);
 
 	if (!IS_ROOT(d) && d_unhashed(d))
 		len += UNHASHED_OBSCURE_STRING_SIZE; /* Obscure " (deleted)" string */
@@ -110,7 +109,6 @@ rename_retry:
 		len += d->d_name.len + 1; /* Plus slash */
 		d = d->d_parent;
 	}
-	spin_unlock(&dcache_lock);
 	rcu_read_unlock();
 	if (read_seqretry(&rename_lock, seq))
 		goto rename_retry;
diff --git a/drivers/staging/smbfs/cache.c b/drivers/staging/smbfs/cache.c
index 920434b6c071..75dfd403fb90 100644
--- a/drivers/staging/smbfs/cache.c
+++ b/drivers/staging/smbfs/cache.c
@@ -62,7 +62,6 @@ smb_invalidate_dircache_entries(struct dentry *parent)
 	struct list_head *next;
 	struct dentry *dentry;
 
-	spin_lock(&dcache_lock);
 	spin_lock(&parent->d_lock);
 	next = parent->d_subdirs.next;
 	while (next != &parent->d_subdirs) {
@@ -72,7 +71,6 @@ smb_invalidate_dircache_entries(struct dentry *parent)
 		next = next->next;
 	}
 	spin_unlock(&parent->d_lock);
-	spin_unlock(&dcache_lock);
 }
 
 /*
@@ -98,7 +96,6 @@ smb_dget_fpos(struct dentry *dentry, struct dentry *parent, unsigned long fpos)
 	}
 
 	/* If a pointer is invalid, we search the dentry. */
-	spin_lock(&dcache_lock);
 	spin_lock(&parent->d_lock);
 	next = parent->d_subdirs.next;
 	while (next != &parent->d_subdirs) {
@@ -115,7 +112,6 @@ smb_dget_fpos(struct dentry *dentry, struct dentry *parent, unsigned long fpos)
 	dent = NULL;
 out_unlock:
 	spin_unlock(&parent->d_lock);
-	spin_unlock(&dcache_lock);
 	return dent;
 }
 
diff --git a/drivers/usb/core/inode.c b/drivers/usb/core/inode.c
index 89a0e8366585..1b125c224dcf 100644
--- a/drivers/usb/core/inode.c
+++ b/drivers/usb/core/inode.c
@@ -343,7 +343,6 @@ static int usbfs_empty (struct dentry *dentry)
 {
 	struct list_head *list;
 
-	spin_lock(&dcache_lock);
 	spin_lock(&dentry->d_lock);
 	list_for_each(list, &dentry->d_subdirs) {
 		struct dentry *de = list_entry(list, struct dentry, d_u.d_child);
@@ -352,13 +351,11 @@ static int usbfs_empty (struct dentry *dentry)
 		if (usbfs_positive(de)) {
 			spin_unlock(&de->d_lock);
 			spin_unlock(&dentry->d_lock);
-			spin_unlock(&dcache_lock);
 			return 0;
 		}
 		spin_unlock(&de->d_lock);
 	}
 	spin_unlock(&dentry->d_lock);
-	spin_unlock(&dcache_lock);
 	return 1;
 }
 
diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c
index 47dfd5d29a6b..1073bca8488c 100644
--- a/fs/9p/vfs_inode.c
+++ b/fs/9p/vfs_inode.c
@@ -270,13 +270,11 @@ static struct dentry *v9fs_dentry_from_dir_inode(struct inode *inode)
 {
 	struct dentry *dentry;
 
-	spin_lock(&dcache_lock);
 	spin_lock(&dcache_inode_lock);
 	/* Directory should have only one entry. */
 	BUG_ON(S_ISDIR(inode->i_mode) && !list_is_singular(&inode->i_dentry));
 	dentry = list_entry(inode->i_dentry.next, struct dentry, d_alias);
 	spin_unlock(&dcache_inode_lock);
-	spin_unlock(&dcache_lock);
 	return dentry;
 }
 
diff --git a/fs/affs/amigaffs.c b/fs/affs/amigaffs.c
index 2321cc92d44f..600101a21ba2 100644
--- a/fs/affs/amigaffs.c
+++ b/fs/affs/amigaffs.c
@@ -128,7 +128,6 @@ affs_fix_dcache(struct dentry *dentry, u32 entry_ino)
 	void *data = dentry->d_fsdata;
 	struct list_head *head, *next;
 
-	spin_lock(&dcache_lock);
 	spin_lock(&dcache_inode_lock);
 	head = &inode->i_dentry;
 	next = head->next;
@@ -141,7 +140,6 @@ affs_fix_dcache(struct dentry *dentry, u32 entry_ino)
 		next = next->next;
 	}
 	spin_unlock(&dcache_inode_lock);
-	spin_unlock(&dcache_lock);
 }
 
 
diff --git a/fs/autofs4/autofs_i.h b/fs/autofs4/autofs_i.h
index 9d2ae9b30d9f..0fffe1c24cec 100644
--- a/fs/autofs4/autofs_i.h
+++ b/fs/autofs4/autofs_i.h
@@ -16,6 +16,7 @@
 #include <linux/auto_fs4.h>
 #include <linux/auto_dev-ioctl.h>
 #include <linux/mutex.h>
+#include <linux/spinlock.h>
 #include <linux/list.h>
 
 /* This is the range of ioctl() numbers we claim as ours */
@@ -60,6 +61,8 @@ do {							\
 		current->pid, __func__, ##args);	\
 } while (0)
 
+extern spinlock_t autofs4_lock;
+
 /* Unified info structure.  This is pointed to by both the dentry and
    inode structures.  Each file in the filesystem has an instance of this
    structure.  It holds a reference to the dentry, so dentries are never
diff --git a/fs/autofs4/expire.c b/fs/autofs4/expire.c
index 968c1434af62..2f7951d67d16 100644
--- a/fs/autofs4/expire.c
+++ b/fs/autofs4/expire.c
@@ -102,7 +102,7 @@ static struct dentry *get_next_positive_dentry(struct dentry *prev,
 	if (prev == NULL)
 		return dget(prev);
 
-	spin_lock(&dcache_lock);
+	spin_lock(&autofs4_lock);
 relock:
 	p = prev;
 	spin_lock(&p->d_lock);
@@ -114,7 +114,7 @@ again:
 
 			if (p == root) {
 				spin_unlock(&p->d_lock);
-				spin_unlock(&dcache_lock);
+				spin_unlock(&autofs4_lock);
 				dput(prev);
 				return NULL;
 			}
@@ -144,7 +144,7 @@ again:
 	dget_dlock(ret);
 	spin_unlock(&ret->d_lock);
 	spin_unlock(&p->d_lock);
-	spin_unlock(&dcache_lock);
+	spin_unlock(&autofs4_lock);
 
 	dput(prev);
 
@@ -408,13 +408,13 @@ found:
 	ino->flags |= AUTOFS_INF_EXPIRING;
 	init_completion(&ino->expire_complete);
 	spin_unlock(&sbi->fs_lock);
-	spin_lock(&dcache_lock);
+	spin_lock(&autofs4_lock);
 	spin_lock(&expired->d_parent->d_lock);
 	spin_lock_nested(&expired->d_lock, DENTRY_D_LOCK_NESTED);
 	list_move(&expired->d_parent->d_subdirs, &expired->d_u.d_child);
 	spin_unlock(&expired->d_lock);
 	spin_unlock(&expired->d_parent->d_lock);
-	spin_unlock(&dcache_lock);
+	spin_unlock(&autofs4_lock);
 	return expired;
 }
 
diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c
index 7a9ed6b88291..10ca68a96dc7 100644
--- a/fs/autofs4/root.c
+++ b/fs/autofs4/root.c
@@ -23,6 +23,8 @@
 
 #include "autofs_i.h"
 
+DEFINE_SPINLOCK(autofs4_lock);
+
 static int autofs4_dir_symlink(struct inode *,struct dentry *,const char *);
 static int autofs4_dir_unlink(struct inode *,struct dentry *);
 static int autofs4_dir_rmdir(struct inode *,struct dentry *);
@@ -142,15 +144,15 @@ static int autofs4_dir_open(struct inode *inode, struct file *file)
 	 * autofs file system so just let the libfs routines handle
 	 * it.
 	 */
-	spin_lock(&dcache_lock);
+	spin_lock(&autofs4_lock);
 	spin_lock(&dentry->d_lock);
 	if (!d_mountpoint(dentry) && list_empty(&dentry->d_subdirs)) {
 		spin_unlock(&dentry->d_lock);
-		spin_unlock(&dcache_lock);
+		spin_unlock(&autofs4_lock);
 		return -ENOENT;
 	}
 	spin_unlock(&dentry->d_lock);
-	spin_unlock(&dcache_lock);
+	spin_unlock(&autofs4_lock);
 
 out:
 	return dcache_dir_open(inode, file);
@@ -255,11 +257,11 @@ static void *autofs4_follow_link(struct dentry *dentry, struct nameidata *nd)
 	/* We trigger a mount for almost all flags */
 	lookup_type = autofs4_need_mount(nd->flags);
 	spin_lock(&sbi->fs_lock);
-	spin_lock(&dcache_lock);
+	spin_lock(&autofs4_lock);
 	spin_lock(&dentry->d_lock);
 	if (!(lookup_type || ino->flags & AUTOFS_INF_PENDING)) {
 		spin_unlock(&dentry->d_lock);
-		spin_unlock(&dcache_lock);
+		spin_unlock(&autofs4_lock);
 		spin_unlock(&sbi->fs_lock);
 		goto follow;
 	}
@@ -272,7 +274,7 @@ static void *autofs4_follow_link(struct dentry *dentry, struct nameidata *nd)
 	if (ino->flags & AUTOFS_INF_PENDING ||
 	    (!d_mountpoint(dentry) && list_empty(&dentry->d_subdirs))) {
 		spin_unlock(&dentry->d_lock);
-		spin_unlock(&dcache_lock);
+		spin_unlock(&autofs4_lock);
 		spin_unlock(&sbi->fs_lock);
 
 		status = try_to_fill_dentry(dentry, nd->flags);
@@ -282,7 +284,7 @@ static void *autofs4_follow_link(struct dentry *dentry, struct nameidata *nd)
 		goto follow;
 	}
 	spin_unlock(&dentry->d_lock);
-	spin_unlock(&dcache_lock);
+	spin_unlock(&autofs4_lock);
 	spin_unlock(&sbi->fs_lock);
 follow:
 	/*
@@ -353,14 +355,14 @@ static int autofs4_revalidate(struct dentry *dentry, struct nameidata *nd)
 		return 0;
 
 	/* Check for a non-mountpoint directory with no contents */
-	spin_lock(&dcache_lock);
+	spin_lock(&autofs4_lock);
 	spin_lock(&dentry->d_lock);
 	if (S_ISDIR(dentry->d_inode->i_mode) &&
 	    !d_mountpoint(dentry) && list_empty(&dentry->d_subdirs)) {
 		DPRINTK("dentry=%p %.*s, emptydir",
 			 dentry, dentry->d_name.len, dentry->d_name.name);
 		spin_unlock(&dentry->d_lock);
-		spin_unlock(&dcache_lock);
+		spin_unlock(&autofs4_lock);
 
 		/* The daemon never causes a mount to trigger */
 		if (oz_mode)
@@ -377,7 +379,7 @@ static int autofs4_revalidate(struct dentry *dentry, struct nameidata *nd)
 		return status;
 	}
 	spin_unlock(&dentry->d_lock);
-	spin_unlock(&dcache_lock);
+	spin_unlock(&autofs4_lock);
 
 	return 1;
 }
@@ -432,7 +434,7 @@ static struct dentry *autofs4_lookup_active(struct dentry *dentry)
 	const unsigned char *str = name->name;
 	struct list_head *p, *head;
 
-	spin_lock(&dcache_lock);
+	spin_lock(&autofs4_lock);
 	spin_lock(&sbi->lookup_lock);
 	head = &sbi->active_list;
 	list_for_each(p, head) {
@@ -465,14 +467,14 @@ static struct dentry *autofs4_lookup_active(struct dentry *dentry)
 			dget_dlock(active);
 			spin_unlock(&active->d_lock);
 			spin_unlock(&sbi->lookup_lock);
-			spin_unlock(&dcache_lock);
+			spin_unlock(&autofs4_lock);
 			return active;
 		}
 next:
 		spin_unlock(&active->d_lock);
 	}
 	spin_unlock(&sbi->lookup_lock);
-	spin_unlock(&dcache_lock);
+	spin_unlock(&autofs4_lock);
 
 	return NULL;
 }
@@ -487,7 +489,7 @@ static struct dentry *autofs4_lookup_expiring(struct dentry *dentry)
 	const unsigned char *str = name->name;
 	struct list_head *p, *head;
 
-	spin_lock(&dcache_lock);
+	spin_lock(&autofs4_lock);
 	spin_lock(&sbi->lookup_lock);
 	head = &sbi->expiring_list;
 	list_for_each(p, head) {
@@ -520,14 +522,14 @@ static struct dentry *autofs4_lookup_expiring(struct dentry *dentry)
 			dget_dlock(expiring);
 			spin_unlock(&expiring->d_lock);
 			spin_unlock(&sbi->lookup_lock);
-			spin_unlock(&dcache_lock);
+			spin_unlock(&autofs4_lock);
 			return expiring;
 		}
 next:
 		spin_unlock(&expiring->d_lock);
 	}
 	spin_unlock(&sbi->lookup_lock);
-	spin_unlock(&dcache_lock);
+	spin_unlock(&autofs4_lock);
 
 	return NULL;
 }
@@ -763,12 +765,12 @@ static int autofs4_dir_unlink(struct inode *dir, struct dentry *dentry)
 
 	dir->i_mtime = CURRENT_TIME;
 
-	spin_lock(&dcache_lock);
+	spin_lock(&autofs4_lock);
 	autofs4_add_expiring(dentry);
 	spin_lock(&dentry->d_lock);
 	__d_drop(dentry);
 	spin_unlock(&dentry->d_lock);
-	spin_unlock(&dcache_lock);
+	spin_unlock(&autofs4_lock);
 
 	return 0;
 }
@@ -785,20 +787,20 @@ static int autofs4_dir_rmdir(struct inode *dir, struct dentry *dentry)
 	if (!autofs4_oz_mode(sbi))
 		return -EACCES;
 
-	spin_lock(&dcache_lock);
+	spin_lock(&autofs4_lock);
 	spin_lock(&sbi->lookup_lock);
 	spin_lock(&dentry->d_lock);
 	if (!list_empty(&dentry->d_subdirs)) {
 		spin_unlock(&dentry->d_lock);
 		spin_unlock(&sbi->lookup_lock);
-		spin_unlock(&dcache_lock);
+		spin_unlock(&autofs4_lock);
 		return -ENOTEMPTY;
 	}
 	__autofs4_add_expiring(dentry);
 	spin_unlock(&sbi->lookup_lock);
 	__d_drop(dentry);
 	spin_unlock(&dentry->d_lock);
-	spin_unlock(&dcache_lock);
+	spin_unlock(&autofs4_lock);
 
 	if (atomic_dec_and_test(&ino->count)) {
 		p_ino = autofs4_dentry_ino(dentry->d_parent);
diff --git a/fs/autofs4/waitq.c b/fs/autofs4/waitq.c
index 4be8f778a418..c5f8459c905e 100644
--- a/fs/autofs4/waitq.c
+++ b/fs/autofs4/waitq.c
@@ -194,14 +194,15 @@ static int autofs4_getpath(struct autofs_sb_info *sbi,
 rename_retry:
 	buf = *name;
 	len = 0;
+
 	seq = read_seqbegin(&rename_lock);
 	rcu_read_lock();
-	spin_lock(&dcache_lock);
+	spin_lock(&autofs4_lock);
 	for (tmp = dentry ; tmp != root ; tmp = tmp->d_parent)
 		len += tmp->d_name.len + 1;
 
 	if (!len || --len > NAME_MAX) {
-		spin_unlock(&dcache_lock);
+		spin_unlock(&autofs4_lock);
 		rcu_read_unlock();
 		if (read_seqretry(&rename_lock, seq))
 			goto rename_retry;
@@ -217,7 +218,7 @@ rename_retry:
 		p -= tmp->d_name.len;
 		strncpy(p, tmp->d_name.name, tmp->d_name.len);
 	}
-	spin_unlock(&dcache_lock);
+	spin_unlock(&autofs4_lock);
 	rcu_read_unlock();
 	if (read_seqretry(&rename_lock, seq))
 		goto rename_retry;
diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
index 2c924e8d85fe..58abc3da6111 100644
--- a/fs/ceph/dir.c
+++ b/fs/ceph/dir.c
@@ -112,7 +112,6 @@ static int __dcache_readdir(struct file *filp,
 	dout("__dcache_readdir %p at %llu (last %p)\n", dir, filp->f_pos,
 	     last);
 
-	spin_lock(&dcache_lock);
 	spin_lock(&parent->d_lock);
 
 	/* start at beginning? */
@@ -156,7 +155,6 @@ more:
 	dget_dlock(dentry);
 	spin_unlock(&dentry->d_lock);
 	spin_unlock(&parent->d_lock);
-	spin_unlock(&dcache_lock);
 
 	dout(" %llu (%llu) dentry %p %.*s %p\n", di->offset, filp->f_pos,
 	     dentry, dentry->d_name.len, dentry->d_name.name, dentry->d_inode);
@@ -182,21 +180,19 @@ more:
 
 	filp->f_pos++;
 
-	/* make sure a dentry wasn't dropped while we didn't have dcache_lock */
+	/* make sure a dentry wasn't dropped while we didn't have parent lock */
 	if (!ceph_i_test(dir, CEPH_I_COMPLETE)) {
 		dout(" lost I_COMPLETE on %p; falling back to mds\n", dir);
 		err = -EAGAIN;
 		goto out;
 	}
 
-	spin_lock(&dcache_lock);
 	spin_lock(&parent->d_lock);
 	p = p->prev;	/* advance to next dentry */
 	goto more;
 
 out_unlock:
 	spin_unlock(&parent->d_lock);
-	spin_unlock(&dcache_lock);
 out:
 	if (last)
 		dput(last);
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index 2c6944473366..2a48cafc174b 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -841,7 +841,6 @@ static void ceph_set_dentry_offset(struct dentry *dn)
 	di->offset = ceph_inode(inode)->i_max_offset++;
 	spin_unlock(&inode->i_lock);
 
-	spin_lock(&dcache_lock);
 	spin_lock(&dir->d_lock);
 	spin_lock_nested(&dn->d_lock, DENTRY_D_LOCK_NESTED);
 	list_move(&dn->d_u.d_child, &dir->d_subdirs);
@@ -849,7 +848,6 @@ static void ceph_set_dentry_offset(struct dentry *dn)
 	     dn->d_u.d_child.prev, dn->d_u.d_child.next);
 	spin_unlock(&dn->d_lock);
 	spin_unlock(&dir->d_lock);
-	spin_unlock(&dcache_lock);
 }
 
 /*
@@ -1233,13 +1231,11 @@ retry_lookup:
 			goto retry_lookup;
 		} else {
 			/* reorder parent's d_subdirs */
-			spin_lock(&dcache_lock);
 			spin_lock(&parent->d_lock);
 			spin_lock_nested(&dn->d_lock, DENTRY_D_LOCK_NESTED);
 			list_move(&dn->d_u.d_child, &parent->d_subdirs);
 			spin_unlock(&dn->d_lock);
 			spin_unlock(&parent->d_lock);
-			spin_unlock(&dcache_lock);
 		}
 
 		di = dn->d_fsdata;
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index 003698365ece..99b9a2cc14b7 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -809,17 +809,14 @@ inode_has_hashed_dentries(struct inode *inode)
 {
 	struct dentry *dentry;
 
-	spin_lock(&dcache_lock);
 	spin_lock(&dcache_inode_lock);
 	list_for_each_entry(dentry, &inode->i_dentry, d_alias) {
 		if (!d_unhashed(dentry) || IS_ROOT(dentry)) {
 			spin_unlock(&dcache_inode_lock);
-			spin_unlock(&dcache_lock);
 			return true;
 		}
 	}
 	spin_unlock(&dcache_inode_lock);
-	spin_unlock(&dcache_lock);
 	return false;
 }
 
diff --git a/fs/coda/cache.c b/fs/coda/cache.c
index 859393fca2b7..5525e1c660fd 100644
--- a/fs/coda/cache.c
+++ b/fs/coda/cache.c
@@ -93,7 +93,6 @@ static void coda_flag_children(struct dentry *parent, int flag)
 	struct list_head *child;
 	struct dentry *de;
 
-	spin_lock(&dcache_lock);
 	spin_lock(&parent->d_lock);
 	list_for_each(child, &parent->d_subdirs)
 	{
@@ -104,7 +103,6 @@ static void coda_flag_children(struct dentry *parent, int flag)
 		coda_flag_inode(de->d_inode, flag);
 	}
 	spin_unlock(&parent->d_lock);
-	spin_unlock(&dcache_lock);
 	return; 
 }
 
diff --git a/fs/configfs/configfs_internal.h b/fs/configfs/configfs_internal.h
index e58b4c30e216..026cf68553a4 100644
--- a/fs/configfs/configfs_internal.h
+++ b/fs/configfs/configfs_internal.h
@@ -120,7 +120,6 @@ static inline struct config_item *configfs_get_config_item(struct dentry *dentry
 {
 	struct config_item * item = NULL;
 
-	spin_lock(&dcache_lock);
 	spin_lock(&dentry->d_lock);
 	if (!d_unhashed(dentry)) {
 		struct configfs_dirent * sd = dentry->d_fsdata;
@@ -131,7 +130,6 @@ static inline struct config_item *configfs_get_config_item(struct dentry *dentry
 			item = config_item_get(sd->s_element);
 	}
 	spin_unlock(&dentry->d_lock);
-	spin_unlock(&dcache_lock);
 
 	return item;
 }
diff --git a/fs/configfs/inode.c b/fs/configfs/inode.c
index 79b37765d8ff..fb3a55fff82a 100644
--- a/fs/configfs/inode.c
+++ b/fs/configfs/inode.c
@@ -250,18 +250,14 @@ void configfs_drop_dentry(struct configfs_dirent * sd, struct dentry * parent)
 	struct dentry * dentry = sd->s_dentry;
 
 	if (dentry) {
-		spin_lock(&dcache_lock);
 		spin_lock(&dentry->d_lock);
 		if (!(d_unhashed(dentry) && dentry->d_inode)) {
 			dget_locked_dlock(dentry);
 			__d_drop(dentry);
 			spin_unlock(&dentry->d_lock);
-			spin_unlock(&dcache_lock);
 			simple_unlink(parent->d_inode, dentry);
-		} else {
+		} else
 			spin_unlock(&dentry->d_lock);
-			spin_unlock(&dcache_lock);
-		}
 	}
 }
 
diff --git a/fs/dcache.c b/fs/dcache.c
index a9bc4ecc21e1..0dbae053b664 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -54,11 +54,10 @@
  *   - d_alias, d_inode
  *
  * Ordering:
- * dcache_lock
- *   dcache_inode_lock
- *     dentry->d_lock
- *       dcache_lru_lock
- *       dcache_hash_lock
+ * dcache_inode_lock
+ *   dentry->d_lock
+ *     dcache_lru_lock
+ *     dcache_hash_lock
  *
  * If there is an ancestor relationship:
  * dentry->d_parent->...->d_parent->d_lock
@@ -77,12 +76,10 @@ EXPORT_SYMBOL_GPL(sysctl_vfs_cache_pressure);
 __cacheline_aligned_in_smp DEFINE_SPINLOCK(dcache_inode_lock);
 static __cacheline_aligned_in_smp DEFINE_SPINLOCK(dcache_hash_lock);
 static __cacheline_aligned_in_smp DEFINE_SPINLOCK(dcache_lru_lock);
-__cacheline_aligned_in_smp DEFINE_SPINLOCK(dcache_lock);
 __cacheline_aligned_in_smp DEFINE_SEQLOCK(rename_lock);
 
 EXPORT_SYMBOL(rename_lock);
 EXPORT_SYMBOL(dcache_inode_lock);
-EXPORT_SYMBOL(dcache_lock);
 
 static struct kmem_cache *dentry_cache __read_mostly;
 
@@ -139,7 +136,7 @@ static void __d_free(struct rcu_head *head)
 }
 
 /*
- * no dcache_lock, please.
+ * no locks, please.
  */
 static void d_free(struct dentry *dentry)
 {
@@ -162,7 +159,6 @@ static void d_free(struct dentry *dentry)
 static void dentry_iput(struct dentry * dentry)
 	__releases(dentry->d_lock)
 	__releases(dcache_inode_lock)
-	__releases(dcache_lock)
 {
 	struct inode *inode = dentry->d_inode;
 	if (inode) {
@@ -170,7 +166,6 @@ static void dentry_iput(struct dentry * dentry)
 		list_del_init(&dentry->d_alias);
 		spin_unlock(&dentry->d_lock);
 		spin_unlock(&dcache_inode_lock);
-		spin_unlock(&dcache_lock);
 		if (!inode->i_nlink)
 			fsnotify_inoderemove(inode);
 		if (dentry->d_op && dentry->d_op->d_iput)
@@ -180,7 +175,6 @@ static void dentry_iput(struct dentry * dentry)
 	} else {
 		spin_unlock(&dentry->d_lock);
 		spin_unlock(&dcache_inode_lock);
-		spin_unlock(&dcache_lock);
 	}
 }
 
@@ -235,14 +229,13 @@ static void dentry_lru_move_tail(struct dentry *dentry)
  *
  * If this is the root of the dentry tree, return NULL.
  *
- * dcache_lock and d_lock and d_parent->d_lock must be held by caller, and
- * are dropped by d_kill.
+ * dentry->d_lock and parent->d_lock must be held by caller, and are dropped by
+ * d_kill.
  */
 static struct dentry *d_kill(struct dentry *dentry, struct dentry *parent)
 	__releases(dentry->d_lock)
 	__releases(parent->d_lock)
 	__releases(dcache_inode_lock)
-	__releases(dcache_lock)
 {
 	dentry->d_parent = NULL;
 	list_del(&dentry->d_u.d_child);
@@ -285,11 +278,9 @@ EXPORT_SYMBOL(__d_drop);
 
 void d_drop(struct dentry *dentry)
 {
-	spin_lock(&dcache_lock);
 	spin_lock(&dentry->d_lock);
 	__d_drop(dentry);
 	spin_unlock(&dentry->d_lock);
-	spin_unlock(&dcache_lock);
 }
 EXPORT_SYMBOL(d_drop);
 
@@ -337,21 +328,10 @@ repeat:
 	else
 		parent = dentry->d_parent;
 	if (dentry->d_count == 1) {
-		if (!spin_trylock(&dcache_lock)) {
-			/*
-			 * Something of a livelock possibility we could avoid
-			 * by taking dcache_lock and trying again, but we
-			 * want to reduce dcache_lock anyway so this will
-			 * get improved.
-			 */
-drop1:
-			spin_unlock(&dentry->d_lock);
-			goto repeat;
-		}
 		if (!spin_trylock(&dcache_inode_lock)) {
 drop2:
-			spin_unlock(&dcache_lock);
-			goto drop1;
+			spin_unlock(&dentry->d_lock);
+			goto repeat;
 		}
 		if (parent && !spin_trylock(&parent->d_lock)) {
 			spin_unlock(&dcache_inode_lock);
@@ -363,7 +343,6 @@ drop2:
 		spin_unlock(&dentry->d_lock);
 		if (parent)
 			spin_unlock(&parent->d_lock);
-		spin_unlock(&dcache_lock);
 		return;
 	}
 
@@ -387,7 +366,6 @@ drop2:
 	if (parent)
 		spin_unlock(&parent->d_lock);
 	spin_unlock(&dcache_inode_lock);
-	spin_unlock(&dcache_lock);
 	return;
 
 unhash_it:
@@ -418,11 +396,9 @@ int d_invalidate(struct dentry * dentry)
 	/*
 	 * If it's already been dropped, return OK.
 	 */
-	spin_lock(&dcache_lock);
 	spin_lock(&dentry->d_lock);
 	if (d_unhashed(dentry)) {
 		spin_unlock(&dentry->d_lock);
-		spin_unlock(&dcache_lock);
 		return 0;
 	}
 	/*
@@ -431,9 +407,7 @@ int d_invalidate(struct dentry * dentry)
 	 */
 	if (!list_empty(&dentry->d_subdirs)) {
 		spin_unlock(&dentry->d_lock);
-		spin_unlock(&dcache_lock);
 		shrink_dcache_parent(dentry);
-		spin_lock(&dcache_lock);
 		spin_lock(&dentry->d_lock);
 	}
 
@@ -450,19 +424,17 @@ int d_invalidate(struct dentry * dentry)
 	if (dentry->d_count > 1) {
 		if (dentry->d_inode && S_ISDIR(dentry->d_inode->i_mode)) {
 			spin_unlock(&dentry->d_lock);
-			spin_unlock(&dcache_lock);
 			return -EBUSY;
 		}
 	}
 
 	__d_drop(dentry);
 	spin_unlock(&dentry->d_lock);
-	spin_unlock(&dcache_lock);
 	return 0;
 }
 EXPORT_SYMBOL(d_invalidate);
 
-/* This must be called with dcache_lock and d_lock held */
+/* This must be called with d_lock held */
 static inline struct dentry * __dget_locked_dlock(struct dentry *dentry)
 {
 	dentry->d_count++;
@@ -470,7 +442,7 @@ static inline struct dentry * __dget_locked_dlock(struct dentry *dentry)
 	return dentry;
 }
 
-/* This should be called _only_ with dcache_lock held */
+/* This must be called with d_lock held */
 static inline struct dentry * __dget_locked(struct dentry *dentry)
 {
 	spin_lock(&dentry->d_lock);
@@ -575,11 +547,9 @@ struct dentry *d_find_alias(struct inode *inode)
 	struct dentry *de = NULL;
 
 	if (!list_empty(&inode->i_dentry)) {
-		spin_lock(&dcache_lock);
 		spin_lock(&dcache_inode_lock);
 		de = __d_find_alias(inode, 0);
 		spin_unlock(&dcache_inode_lock);
-		spin_unlock(&dcache_lock);
 	}
 	return de;
 }
@@ -593,7 +563,6 @@ void d_prune_aliases(struct inode *inode)
 {
 	struct dentry *dentry;
 restart:
-	spin_lock(&dcache_lock);
 	spin_lock(&dcache_inode_lock);
 	list_for_each_entry(dentry, &inode->i_dentry, d_alias) {
 		spin_lock(&dentry->d_lock);
@@ -602,14 +571,12 @@ restart:
 			__d_drop(dentry);
 			spin_unlock(&dentry->d_lock);
 			spin_unlock(&dcache_inode_lock);
-			spin_unlock(&dcache_lock);
 			dput(dentry);
 			goto restart;
 		}
 		spin_unlock(&dentry->d_lock);
 	}
 	spin_unlock(&dcache_inode_lock);
-	spin_unlock(&dcache_lock);
 }
 EXPORT_SYMBOL(d_prune_aliases);
 
@@ -625,17 +592,14 @@ static void prune_one_dentry(struct dentry *dentry, struct dentry *parent)
 	__releases(dentry->d_lock)
 	__releases(parent->d_lock)
 	__releases(dcache_inode_lock)
-	__releases(dcache_lock)
 {
 	__d_drop(dentry);
 	dentry = d_kill(dentry, parent);
 
 	/*
-	 * Prune ancestors.  Locking is simpler than in dput(),
-	 * because dcache_lock needs to be taken anyway.
+	 * Prune ancestors.
 	 */
 	while (dentry) {
-		spin_lock(&dcache_lock);
 		spin_lock(&dcache_inode_lock);
 again:
 		spin_lock(&dentry->d_lock);
@@ -653,7 +617,6 @@ again:
 				spin_unlock(&parent->d_lock);
 			spin_unlock(&dentry->d_lock);
 			spin_unlock(&dcache_inode_lock);
-			spin_unlock(&dcache_lock);
 			return;
 		}
 
@@ -702,8 +665,7 @@ relock:
 		spin_unlock(&dcache_lru_lock);
 
 		prune_one_dentry(dentry, parent);
-		/* dcache_lock, dcache_inode_lock and dentry->d_lock dropped */
-		spin_lock(&dcache_lock);
+		/* dcache_inode_lock and dentry->d_lock dropped */
 		spin_lock(&dcache_inode_lock);
 		spin_lock(&dcache_lru_lock);
 	}
@@ -725,7 +687,6 @@ static void __shrink_dcache_sb(struct super_block *sb, int *count, int flags)
 	LIST_HEAD(tmp);
 	int cnt = *count;
 
-	spin_lock(&dcache_lock);
 	spin_lock(&dcache_inode_lock);
 relock:
 	spin_lock(&dcache_lru_lock);
@@ -766,7 +727,6 @@ relock:
 		list_splice(&referenced, &sb->s_dentry_lru);
 	spin_unlock(&dcache_lru_lock);
 	spin_unlock(&dcache_inode_lock);
-	spin_unlock(&dcache_lock);
 }
 
 /**
@@ -788,7 +748,6 @@ static void prune_dcache(int count)
 
 	if (unused == 0 || count == 0)
 		return;
-	spin_lock(&dcache_lock);
 	if (count >= unused)
 		prune_ratio = 1;
 	else
@@ -825,11 +784,9 @@ static void prune_dcache(int count)
 		if (down_read_trylock(&sb->s_umount)) {
 			if ((sb->s_root != NULL) &&
 			    (!list_empty(&sb->s_dentry_lru))) {
-				spin_unlock(&dcache_lock);
 				__shrink_dcache_sb(sb, &w_count,
 						DCACHE_REFERENCED);
 				pruned -= w_count;
-				spin_lock(&dcache_lock);
 			}
 			up_read(&sb->s_umount);
 		}
@@ -845,7 +802,6 @@ static void prune_dcache(int count)
 	if (p)
 		__put_super(p);
 	spin_unlock(&sb_lock);
-	spin_unlock(&dcache_lock);
 }
 
 /**
@@ -859,7 +815,6 @@ void shrink_dcache_sb(struct super_block *sb)
 {
 	LIST_HEAD(tmp);
 
-	spin_lock(&dcache_lock);
 	spin_lock(&dcache_inode_lock);
 	spin_lock(&dcache_lru_lock);
 	while (!list_empty(&sb->s_dentry_lru)) {
@@ -868,7 +823,6 @@ void shrink_dcache_sb(struct super_block *sb)
 	}
 	spin_unlock(&dcache_lru_lock);
 	spin_unlock(&dcache_inode_lock);
-	spin_unlock(&dcache_lock);
 }
 EXPORT_SYMBOL(shrink_dcache_sb);
 
@@ -885,12 +839,10 @@ static void shrink_dcache_for_umount_subtree(struct dentry *dentry)
 	BUG_ON(!IS_ROOT(dentry));
 
 	/* detach this root from the system */
-	spin_lock(&dcache_lock);
 	spin_lock(&dentry->d_lock);
 	dentry_lru_del(dentry);
 	__d_drop(dentry);
 	spin_unlock(&dentry->d_lock);
-	spin_unlock(&dcache_lock);
 
 	for (;;) {
 		/* descend to the first leaf in the current subtree */
@@ -899,7 +851,6 @@ static void shrink_dcache_for_umount_subtree(struct dentry *dentry)
 
 			/* this is a branch with children - detach all of them
 			 * from the system in one go */
-			spin_lock(&dcache_lock);
 			spin_lock(&dentry->d_lock);
 			list_for_each_entry(loop, &dentry->d_subdirs,
 					    d_u.d_child) {
@@ -910,7 +861,6 @@ static void shrink_dcache_for_umount_subtree(struct dentry *dentry)
 				spin_unlock(&loop->d_lock);
 			}
 			spin_unlock(&dentry->d_lock);
-			spin_unlock(&dcache_lock);
 
 			/* move to the first child */
 			dentry = list_entry(dentry->d_subdirs.next,
@@ -977,8 +927,7 @@ static void shrink_dcache_for_umount_subtree(struct dentry *dentry)
 
 /*
  * destroy the dentries attached to a superblock on unmounting
- * - we don't need to use dentry->d_lock, and only need dcache_lock when
- *   removing the dentry from the system lists and hashes because:
+ * - we don't need to use dentry->d_lock because:
  *   - the superblock is detached from all mountings and open files, so the
  *     dentry trees will not be rearranged by the VFS
  *   - s_umount is write-locked, so the memory pressure shrinker will ignore
@@ -1029,7 +978,6 @@ rename_retry:
 	this_parent = parent;
 	seq = read_seqbegin(&rename_lock);
 
-	spin_lock(&dcache_lock);
 	if (d_mountpoint(parent))
 		goto positive;
 	spin_lock(&this_parent->d_lock);
@@ -1075,7 +1023,6 @@ resume:
 		if (this_parent != child->d_parent ||
 				read_seqretry(&rename_lock, seq)) {
 			spin_unlock(&this_parent->d_lock);
-			spin_unlock(&dcache_lock);
 			rcu_read_unlock();
 			goto rename_retry;
 		}
@@ -1084,12 +1031,10 @@ resume:
 		goto resume;
 	}
 	spin_unlock(&this_parent->d_lock);
-	spin_unlock(&dcache_lock);
 	if (read_seqretry(&rename_lock, seq))
 		goto rename_retry;
 	return 0; /* No mount points found in tree */
 positive:
-	spin_unlock(&dcache_lock);
 	if (read_seqretry(&rename_lock, seq))
 		goto rename_retry;
 	return 1;
@@ -1121,7 +1066,6 @@ rename_retry:
 	this_parent = parent;
 	seq = read_seqbegin(&rename_lock);
 
-	spin_lock(&dcache_lock);
 	spin_lock(&this_parent->d_lock);
 repeat:
 	next = this_parent->d_subdirs.next;
@@ -1185,7 +1129,6 @@ resume:
 		if (this_parent != child->d_parent ||
 				read_seqretry(&rename_lock, seq)) {
 			spin_unlock(&this_parent->d_lock);
-			spin_unlock(&dcache_lock);
 			rcu_read_unlock();
 			goto rename_retry;
 		}
@@ -1195,7 +1138,6 @@ resume:
 	}
 out:
 	spin_unlock(&this_parent->d_lock);
-	spin_unlock(&dcache_lock);
 	if (read_seqretry(&rename_lock, seq))
 		goto rename_retry;
 	return found;
@@ -1297,7 +1239,6 @@ struct dentry *d_alloc(struct dentry * parent, const struct qstr *name)
 	INIT_LIST_HEAD(&dentry->d_u.d_child);
 
 	if (parent) {
-		spin_lock(&dcache_lock);
 		spin_lock(&parent->d_lock);
 		spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
 		dentry->d_parent = dget_dlock(parent);
@@ -1305,7 +1246,6 @@ struct dentry *d_alloc(struct dentry * parent, const struct qstr *name)
 		list_add(&dentry->d_u.d_child, &parent->d_subdirs);
 		spin_unlock(&dentry->d_lock);
 		spin_unlock(&parent->d_lock);
-		spin_unlock(&dcache_lock);
 	}
 
 	this_cpu_inc(nr_dentry);
@@ -1325,7 +1265,6 @@ struct dentry *d_alloc_name(struct dentry *parent, const char *name)
 }
 EXPORT_SYMBOL(d_alloc_name);
 
-/* the caller must hold dcache_lock */
 static void __d_instantiate(struct dentry *dentry, struct inode *inode)
 {
 	spin_lock(&dentry->d_lock);
@@ -1354,11 +1293,9 @@ static void __d_instantiate(struct dentry *dentry, struct inode *inode)
 void d_instantiate(struct dentry *entry, struct inode * inode)
 {
 	BUG_ON(!list_empty(&entry->d_alias));
-	spin_lock(&dcache_lock);
 	spin_lock(&dcache_inode_lock);
 	__d_instantiate(entry, inode);
 	spin_unlock(&dcache_inode_lock);
-	spin_unlock(&dcache_lock);
 	security_d_instantiate(entry, inode);
 }
 EXPORT_SYMBOL(d_instantiate);
@@ -1422,11 +1359,9 @@ struct dentry *d_instantiate_unique(struct dentry *entry, struct inode *inode)
 
 	BUG_ON(!list_empty(&entry->d_alias));
 
-	spin_lock(&dcache_lock);
 	spin_lock(&dcache_inode_lock);
 	result = __d_instantiate_unique(entry, inode);
 	spin_unlock(&dcache_inode_lock);
-	spin_unlock(&dcache_lock);
 
 	if (!result) {
 		security_d_instantiate(entry, inode);
@@ -1515,12 +1450,11 @@ struct dentry *d_obtain_alias(struct inode *inode)
 	}
 	tmp->d_parent = tmp; /* make sure dput doesn't croak */
 
-	spin_lock(&dcache_lock);
+
 	spin_lock(&dcache_inode_lock);
 	res = __d_find_alias(inode, 0);
 	if (res) {
 		spin_unlock(&dcache_inode_lock);
-		spin_unlock(&dcache_lock);
 		dput(tmp);
 		goto out_iput;
 	}
@@ -1538,7 +1472,6 @@ struct dentry *d_obtain_alias(struct inode *inode)
 	spin_unlock(&tmp->d_lock);
 	spin_unlock(&dcache_inode_lock);
 
-	spin_unlock(&dcache_lock);
 	return tmp;
 
  out_iput:
@@ -1568,21 +1501,18 @@ struct dentry *d_splice_alias(struct inode *inode, struct dentry *dentry)
 	struct dentry *new = NULL;
 
 	if (inode && S_ISDIR(inode->i_mode)) {
-		spin_lock(&dcache_lock);
 		spin_lock(&dcache_inode_lock);
 		new = __d_find_alias(inode, 1);
 		if (new) {
 			BUG_ON(!(new->d_flags & DCACHE_DISCONNECTED));
 			spin_unlock(&dcache_inode_lock);
-			spin_unlock(&dcache_lock);
 			security_d_instantiate(new, inode);
 			d_move(new, dentry);
 			iput(inode);
 		} else {
-			/* already taking dcache_lock, so d_add() by hand */
+			/* already taking dcache_inode_lock, so d_add() by hand */
 			__d_instantiate(dentry, inode);
 			spin_unlock(&dcache_inode_lock);
-			spin_unlock(&dcache_lock);
 			security_d_instantiate(dentry, inode);
 			d_rehash(dentry);
 		}
@@ -1655,12 +1585,10 @@ struct dentry *d_add_ci(struct dentry *dentry, struct inode *inode,
 	 * Negative dentry: instantiate it unless the inode is a directory and
 	 * already has a dentry.
 	 */
-	spin_lock(&dcache_lock);
 	spin_lock(&dcache_inode_lock);
 	if (!S_ISDIR(inode->i_mode) || list_empty(&inode->i_dentry)) {
 		__d_instantiate(found, inode);
 		spin_unlock(&dcache_inode_lock);
-		spin_unlock(&dcache_lock);
 		security_d_instantiate(found, inode);
 		return found;
 	}
@@ -1672,7 +1600,6 @@ struct dentry *d_add_ci(struct dentry *dentry, struct inode *inode,
 	new = list_entry(inode->i_dentry.next, struct dentry, d_alias);
 	dget_locked(new);
 	spin_unlock(&dcache_inode_lock);
-	spin_unlock(&dcache_lock);
 	security_d_instantiate(found, inode);
 	d_move(new, found);
 	iput(inode);
@@ -1843,7 +1770,6 @@ int d_validate(struct dentry *dentry, struct dentry *dparent)
 {
 	struct dentry *child;
 
-	spin_lock(&dcache_lock);
 	spin_lock(&dparent->d_lock);
 	list_for_each_entry(child, &dparent->d_subdirs, d_u.d_child) {
 		if (dentry == child) {
@@ -1851,12 +1777,10 @@ int d_validate(struct dentry *dentry, struct dentry *dparent)
 			__dget_locked_dlock(dentry);
 			spin_unlock(&dentry->d_lock);
 			spin_unlock(&dparent->d_lock);
-			spin_unlock(&dcache_lock);
 			return 1;
 		}
 	}
 	spin_unlock(&dparent->d_lock);
-	spin_unlock(&dcache_lock);
 
 	return 0;
 }
@@ -1889,7 +1813,6 @@ void d_delete(struct dentry * dentry)
 	/*
 	 * Are we the only user?
 	 */
-	spin_lock(&dcache_lock);
 	spin_lock(&dcache_inode_lock);
 	spin_lock(&dentry->d_lock);
 	isdir = S_ISDIR(dentry->d_inode->i_mode);
@@ -1905,7 +1828,6 @@ void d_delete(struct dentry * dentry)
 
 	spin_unlock(&dentry->d_lock);
 	spin_unlock(&dcache_inode_lock);
-	spin_unlock(&dcache_lock);
 
 	fsnotify_nameremove(dentry, isdir);
 }
@@ -1932,13 +1854,11 @@ static void _d_rehash(struct dentry * entry)
  
 void d_rehash(struct dentry * entry)
 {
-	spin_lock(&dcache_lock);
 	spin_lock(&entry->d_lock);
 	spin_lock(&dcache_hash_lock);
 	_d_rehash(entry);
 	spin_unlock(&dcache_hash_lock);
 	spin_unlock(&entry->d_lock);
-	spin_unlock(&dcache_lock);
 }
 EXPORT_SYMBOL(d_rehash);
 
@@ -1961,11 +1881,9 @@ void dentry_update_name_case(struct dentry *dentry, struct qstr *name)
 	BUG_ON(!mutex_is_locked(&dentry->d_inode->i_mutex));
 	BUG_ON(dentry->d_name.len != name->len); /* d_lookup gives this */
 
-	spin_lock(&dcache_lock);
 	spin_lock(&dentry->d_lock);
 	memcpy((unsigned char *)dentry->d_name.name, name->name, name->len);
 	spin_unlock(&dentry->d_lock);
-	spin_unlock(&dcache_lock);
 }
 EXPORT_SYMBOL(dentry_update_name_case);
 
@@ -2058,14 +1976,14 @@ static void dentry_unlock_parents_for_move(struct dentry *dentry,
  * The hash value has to match the hash queue that the dentry is on..
  */
 /*
- * d_move_locked - move a dentry
+ * d_move - move a dentry
  * @dentry: entry to move
  * @target: new dentry
  *
  * Update the dcache to reflect the move of a file name. Negative
  * dcache entries should not be moved in this way.
  */
-static void d_move_locked(struct dentry * dentry, struct dentry * target)
+void d_move(struct dentry * dentry, struct dentry * target)
 {
 	if (!dentry->d_inode)
 		printk(KERN_WARNING "VFS: moving negative dcache entry\n");
@@ -2114,22 +2032,6 @@ static void d_move_locked(struct dentry * dentry, struct dentry * target)
 	spin_unlock(&dentry->d_lock);
 	write_sequnlock(&rename_lock);
 }
-
-/**
- * d_move - move a dentry
- * @dentry: entry to move
- * @target: new dentry
- *
- * Update the dcache to reflect the move of a file name. Negative
- * dcache entries should not be moved in this way.
- */
-
-void d_move(struct dentry * dentry, struct dentry * target)
-{
-	spin_lock(&dcache_lock);
-	d_move_locked(dentry, target);
-	spin_unlock(&dcache_lock);
-}
 EXPORT_SYMBOL(d_move);
 
 /**
@@ -2155,13 +2057,12 @@ struct dentry *d_ancestor(struct dentry *p1, struct dentry *p2)
  * This helper attempts to cope with remotely renamed directories
  *
  * It assumes that the caller is already holding
- * dentry->d_parent->d_inode->i_mutex and the dcache_lock
+ * dentry->d_parent->d_inode->i_mutex and the dcache_inode_lock
  *
  * Note: If ever the locking in lock_rename() changes, then please
  * remember to update this too...
  */
 static struct dentry *__d_unalias(struct dentry *dentry, struct dentry *alias)
-	__releases(dcache_lock)
 	__releases(dcache_inode_lock)
 {
 	struct mutex *m1 = NULL, *m2 = NULL;
@@ -2185,11 +2086,10 @@ static struct dentry *__d_unalias(struct dentry *dentry, struct dentry *alias)
 		goto out_err;
 	m2 = &alias->d_parent->d_inode->i_mutex;
 out_unalias:
-	d_move_locked(alias, dentry);
+	d_move(alias, dentry);
 	ret = alias;
 out_err:
 	spin_unlock(&dcache_inode_lock);
-	spin_unlock(&dcache_lock);
 	if (m2)
 		mutex_unlock(m2);
 	if (m1)
@@ -2249,7 +2149,6 @@ struct dentry *d_materialise_unique(struct dentry *dentry, struct inode *inode)
 
 	BUG_ON(!d_unhashed(dentry));
 
-	spin_lock(&dcache_lock);
 	spin_lock(&dcache_inode_lock);
 
 	if (!inode) {
@@ -2295,7 +2194,6 @@ found:
 	spin_unlock(&dcache_hash_lock);
 	spin_unlock(&actual->d_lock);
 	spin_unlock(&dcache_inode_lock);
-	spin_unlock(&dcache_lock);
 out_nolock:
 	if (actual == dentry) {
 		security_d_instantiate(dentry, inode);
@@ -2307,7 +2205,6 @@ out_nolock:
 
 shouldnt_be_hashed:
 	spin_unlock(&dcache_inode_lock);
-	spin_unlock(&dcache_lock);
 	BUG();
 }
 EXPORT_SYMBOL_GPL(d_materialise_unique);
@@ -2421,11 +2318,9 @@ char *__d_path(const struct path *path, struct path *root,
 	int error;
 
 	prepend(&res, &buflen, "\0", 1);
-	spin_lock(&dcache_lock);
 	write_seqlock(&rename_lock);
 	error = prepend_path(path, root, &res, &buflen);
 	write_sequnlock(&rename_lock);
-	spin_unlock(&dcache_lock);
 
 	if (error)
 		return ERR_PTR(error);
@@ -2487,14 +2382,12 @@ char *d_path(const struct path *path, char *buf, int buflen)
 		return path->dentry->d_op->d_dname(path->dentry, buf, buflen);
 
 	get_fs_root(current->fs, &root);
-	spin_lock(&dcache_lock);
 	write_seqlock(&rename_lock);
 	tmp = root;
 	error = path_with_deleted(path, &tmp, &res, &buflen);
 	if (error)
 		res = ERR_PTR(error);
 	write_sequnlock(&rename_lock);
-	spin_unlock(&dcache_lock);
 	path_put(&root);
 	return res;
 }
@@ -2520,14 +2413,12 @@ char *d_path_with_unreachable(const struct path *path, char *buf, int buflen)
 		return path->dentry->d_op->d_dname(path->dentry, buf, buflen);
 
 	get_fs_root(current->fs, &root);
-	spin_lock(&dcache_lock);
 	write_seqlock(&rename_lock);
 	tmp = root;
 	error = path_with_deleted(path, &tmp, &res, &buflen);
 	if (!error && !path_equal(&tmp, &root))
 		error = prepend_unreachable(&res, &buflen);
 	write_sequnlock(&rename_lock);
-	spin_unlock(&dcache_lock);
 	path_put(&root);
 	if (error)
 		res =  ERR_PTR(error);
@@ -2594,11 +2485,9 @@ char *dentry_path_raw(struct dentry *dentry, char *buf, int buflen)
 {
 	char *retval;
 
-	spin_lock(&dcache_lock);
 	write_seqlock(&rename_lock);
 	retval = __dentry_path(dentry, buf, buflen);
 	write_sequnlock(&rename_lock);
-	spin_unlock(&dcache_lock);
 
 	return retval;
 }
@@ -2609,7 +2498,6 @@ char *dentry_path(struct dentry *dentry, char *buf, int buflen)
 	char *p = NULL;
 	char *retval;
 
-	spin_lock(&dcache_lock);
 	write_seqlock(&rename_lock);
 	if (d_unlinked(dentry)) {
 		p = buf + buflen;
@@ -2619,12 +2507,10 @@ char *dentry_path(struct dentry *dentry, char *buf, int buflen)
 	}
 	retval = __dentry_path(dentry, buf, buflen);
 	write_sequnlock(&rename_lock);
-	spin_unlock(&dcache_lock);
 	if (!IS_ERR(retval) && p)
 		*p = '/';	/* restore '/' overriden with '\0' */
 	return retval;
 Elong:
-	spin_unlock(&dcache_lock);
 	return ERR_PTR(-ENAMETOOLONG);
 }
 
@@ -2658,7 +2544,6 @@ SYSCALL_DEFINE2(getcwd, char __user *, buf, unsigned long, size)
 	get_fs_root_and_pwd(current->fs, &root, &pwd);
 
 	error = -ENOENT;
-	spin_lock(&dcache_lock);
 	write_seqlock(&rename_lock);
 	if (!d_unlinked(pwd.dentry)) {
 		unsigned long len;
@@ -2669,7 +2554,6 @@ SYSCALL_DEFINE2(getcwd, char __user *, buf, unsigned long, size)
 		prepend(&cwd, &buflen, "\0", 1);
 		error = prepend_path(&pwd, &tmp, &cwd, &buflen);
 		write_sequnlock(&rename_lock);
-		spin_unlock(&dcache_lock);
 
 		if (error)
 			goto out;
@@ -2690,7 +2574,6 @@ SYSCALL_DEFINE2(getcwd, char __user *, buf, unsigned long, size)
 		}
 	} else {
 		write_sequnlock(&rename_lock);
-		spin_unlock(&dcache_lock);
 	}
 
 out:
@@ -2776,7 +2659,6 @@ void d_genocide(struct dentry *root)
 rename_retry:
 	this_parent = root;
 	seq = read_seqbegin(&rename_lock);
-	spin_lock(&dcache_lock);
 	spin_lock(&this_parent->d_lock);
 repeat:
 	next = this_parent->d_subdirs.next;
@@ -2823,7 +2705,6 @@ resume:
 		if (this_parent != child->d_parent ||
 				read_seqretry(&rename_lock, seq)) {
 			spin_unlock(&this_parent->d_lock);
-			spin_unlock(&dcache_lock);
 			rcu_read_unlock();
 			goto rename_retry;
 		}
@@ -2832,7 +2713,6 @@ resume:
 		goto resume;
 	}
 	spin_unlock(&this_parent->d_lock);
-	spin_unlock(&dcache_lock);
 	if (read_seqretry(&rename_lock, seq))
 		goto rename_retry;
 }
diff --git a/fs/exportfs/expfs.c b/fs/exportfs/expfs.c
index 84b8c460a781..53a5c08fb63c 100644
--- a/fs/exportfs/expfs.c
+++ b/fs/exportfs/expfs.c
@@ -47,24 +47,20 @@ find_acceptable_alias(struct dentry *result,
 	if (acceptable(context, result))
 		return result;
 
-	spin_lock(&dcache_lock);
 	spin_lock(&dcache_inode_lock);
 	list_for_each_entry(dentry, &result->d_inode->i_dentry, d_alias) {
 		dget_locked(dentry);
 		spin_unlock(&dcache_inode_lock);
-		spin_unlock(&dcache_lock);
 		if (toput)
 			dput(toput);
 		if (dentry != result && acceptable(context, dentry)) {
 			dput(result);
 			return dentry;
 		}
-		spin_lock(&dcache_lock);
 		spin_lock(&dcache_inode_lock);
 		toput = dentry;
 	}
 	spin_unlock(&dcache_inode_lock);
-	spin_unlock(&dcache_lock);
 
 	if (toput)
 		dput(toput);
diff --git a/fs/libfs.c b/fs/libfs.c
index cc4794914b52..28b36663c44e 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -100,7 +100,6 @@ loff_t dcache_dir_lseek(struct file *file, loff_t offset, int origin)
 			struct dentry *cursor = file->private_data;
 			loff_t n = file->f_pos - 2;
 
-			spin_lock(&dcache_lock);
 			spin_lock(&dentry->d_lock);
 			/* d_lock not required for cursor */
 			list_del(&cursor->d_u.d_child);
@@ -116,7 +115,6 @@ loff_t dcache_dir_lseek(struct file *file, loff_t offset, int origin)
 			}
 			list_add_tail(&cursor->d_u.d_child, p);
 			spin_unlock(&dentry->d_lock);
-			spin_unlock(&dcache_lock);
 		}
 	}
 	mutex_unlock(&dentry->d_inode->i_mutex);
@@ -159,7 +157,6 @@ int dcache_readdir(struct file * filp, void * dirent, filldir_t filldir)
 			i++;
 			/* fallthrough */
 		default:
-			spin_lock(&dcache_lock);
 			spin_lock(&dentry->d_lock);
 			if (filp->f_pos == 2)
 				list_move(q, &dentry->d_subdirs);
@@ -175,13 +172,11 @@ int dcache_readdir(struct file * filp, void * dirent, filldir_t filldir)
 
 				spin_unlock(&next->d_lock);
 				spin_unlock(&dentry->d_lock);
-				spin_unlock(&dcache_lock);
 				if (filldir(dirent, next->d_name.name, 
 					    next->d_name.len, filp->f_pos, 
 					    next->d_inode->i_ino, 
 					    dt_type(next->d_inode)) < 0)
 					return 0;
-				spin_lock(&dcache_lock);
 				spin_lock(&dentry->d_lock);
 				spin_lock_nested(&next->d_lock, DENTRY_D_LOCK_NESTED);
 				/* next is still alive */
@@ -191,7 +186,6 @@ int dcache_readdir(struct file * filp, void * dirent, filldir_t filldir)
 				filp->f_pos++;
 			}
 			spin_unlock(&dentry->d_lock);
-			spin_unlock(&dcache_lock);
 	}
 	return 0;
 }
@@ -285,7 +279,6 @@ int simple_empty(struct dentry *dentry)
 	struct dentry *child;
 	int ret = 0;
 
-	spin_lock(&dcache_lock);
 	spin_lock(&dentry->d_lock);
 	list_for_each_entry(child, &dentry->d_subdirs, d_u.d_child) {
 		spin_lock_nested(&child->d_lock, DENTRY_D_LOCK_NESTED);
@@ -298,7 +291,6 @@ int simple_empty(struct dentry *dentry)
 	ret = 1;
 out:
 	spin_unlock(&dentry->d_lock);
-	spin_unlock(&dcache_lock);
 	return ret;
 }
 
diff --git a/fs/namei.c b/fs/namei.c
index cbfa5fb31072..5642bc2be418 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -612,8 +612,8 @@ int follow_up(struct path *path)
 	return 1;
 }
 
-/* no need for dcache_lock, as serialization is taken care in
- * namespace.c
+/*
+ * serialization is taken care of in namespace.c
  */
 static int __follow_mount(struct path *path)
 {
@@ -645,9 +645,6 @@ static void follow_mount(struct path *path)
 	}
 }
 
-/* no need for dcache_lock, as serialization is taken care in
- * namespace.c
- */
 int follow_down(struct path *path)
 {
 	struct vfsmount *mounted;
@@ -2131,12 +2128,10 @@ void dentry_unhash(struct dentry *dentry)
 {
 	dget(dentry);
 	shrink_dcache_parent(dentry);
-	spin_lock(&dcache_lock);
 	spin_lock(&dentry->d_lock);
 	if (dentry->d_count == 2)
 		__d_drop(dentry);
 	spin_unlock(&dentry->d_lock);
-	spin_unlock(&dcache_lock);
 }
 
 int vfs_rmdir(struct inode *dir, struct dentry *dentry)
diff --git a/fs/ncpfs/dir.c b/fs/ncpfs/dir.c
index 102278ed38bd..de15c533311c 100644
--- a/fs/ncpfs/dir.c
+++ b/fs/ncpfs/dir.c
@@ -391,7 +391,6 @@ ncp_dget_fpos(struct dentry *dentry, struct dentry *parent, unsigned long fpos)
 	}
 
 	/* If a pointer is invalid, we search the dentry. */
-	spin_lock(&dcache_lock);
 	spin_lock(&parent->d_lock);
 	next = parent->d_subdirs.next;
 	while (next != &parent->d_subdirs) {
@@ -402,13 +401,11 @@ ncp_dget_fpos(struct dentry *dentry, struct dentry *parent, unsigned long fpos)
 			else
 				dent = NULL;
 			spin_unlock(&parent->d_lock);
-			spin_unlock(&dcache_lock);
 			goto out;
 		}
 		next = next->next;
 	}
 	spin_unlock(&parent->d_lock);
-	spin_unlock(&dcache_lock);
 	return NULL;
 
 out:
diff --git a/fs/ncpfs/ncplib_kernel.h b/fs/ncpfs/ncplib_kernel.h
index c4b718ff9a6b..1220df75ff22 100644
--- a/fs/ncpfs/ncplib_kernel.h
+++ b/fs/ncpfs/ncplib_kernel.h
@@ -193,7 +193,6 @@ ncp_renew_dentries(struct dentry *parent)
 	struct list_head *next;
 	struct dentry *dentry;
 
-	spin_lock(&dcache_lock);
 	spin_lock(&parent->d_lock);
 	next = parent->d_subdirs.next;
 	while (next != &parent->d_subdirs) {
@@ -207,7 +206,6 @@ ncp_renew_dentries(struct dentry *parent)
 		next = next->next;
 	}
 	spin_unlock(&parent->d_lock);
-	spin_unlock(&dcache_lock);
 }
 
 static inline void
@@ -217,7 +215,6 @@ ncp_invalidate_dircache_entries(struct dentry *parent)
 	struct list_head *next;
 	struct dentry *dentry;
 
-	spin_lock(&dcache_lock);
 	spin_lock(&parent->d_lock);
 	next = parent->d_subdirs.next;
 	while (next != &parent->d_subdirs) {
@@ -227,7 +224,6 @@ ncp_invalidate_dircache_entries(struct dentry *parent)
 		next = next->next;
 	}
 	spin_unlock(&parent->d_lock);
-	spin_unlock(&dcache_lock);
 }
 
 struct ncp_cache_head {
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 12de824edb5c..eb77471b8823 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -1718,11 +1718,9 @@ static int nfs_unlink(struct inode *dir, struct dentry *dentry)
 	dfprintk(VFS, "NFS: unlink(%s/%ld, %s)\n", dir->i_sb->s_id,
 		dir->i_ino, dentry->d_name.name);
 
-	spin_lock(&dcache_lock);
 	spin_lock(&dentry->d_lock);
 	if (dentry->d_count > 1) {
 		spin_unlock(&dentry->d_lock);
-		spin_unlock(&dcache_lock);
 		/* Start asynchronous writeout of the inode */
 		write_inode_now(dentry->d_inode, 0);
 		error = nfs_sillyrename(dir, dentry);
@@ -1733,7 +1731,6 @@ static int nfs_unlink(struct inode *dir, struct dentry *dentry)
 		need_rehash = 1;
 	}
 	spin_unlock(&dentry->d_lock);
-	spin_unlock(&dcache_lock);
 	error = nfs_safe_remove(dentry);
 	if (!error || error == -ENOENT) {
 		nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
diff --git a/fs/nfs/getroot.c b/fs/nfs/getroot.c
index 850f67d5f0ac..b3e36c3430de 100644
--- a/fs/nfs/getroot.c
+++ b/fs/nfs/getroot.c
@@ -63,13 +63,11 @@ static int nfs_superblock_set_dummy_root(struct super_block *sb, struct inode *i
 		 * This again causes shrink_dcache_for_umount_subtree() to
 		 * Oops, since the test for IS_ROOT() will fail.
 		 */
-		spin_lock(&dcache_lock);
 		spin_lock(&dcache_inode_lock);
 		spin_lock(&sb->s_root->d_lock);
 		list_del_init(&sb->s_root->d_alias);
 		spin_unlock(&sb->s_root->d_lock);
 		spin_unlock(&dcache_inode_lock);
-		spin_unlock(&dcache_lock);
 	}
 	return 0;
 }
diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c
index 78c0ebb0b07c..74aaf3963c10 100644
--- a/fs/nfs/namespace.c
+++ b/fs/nfs/namespace.c
@@ -60,7 +60,6 @@ rename_retry:
 
 	seq = read_seqbegin(&rename_lock);
 	rcu_read_lock();
-	spin_lock(&dcache_lock);
 	while (!IS_ROOT(dentry) && dentry != droot) {
 		namelen = dentry->d_name.len;
 		buflen -= namelen + 1;
@@ -71,7 +70,6 @@ rename_retry:
 		*--end = '/';
 		dentry = dentry->d_parent;
 	}
-	spin_unlock(&dcache_lock);
 	rcu_read_unlock();
 	if (read_seqretry(&rename_lock, seq))
 		goto rename_retry;
@@ -91,7 +89,6 @@ rename_retry:
 	memcpy(end, base, namelen);
 	return end;
 Elong_unlock:
-	spin_unlock(&dcache_lock);
 	rcu_read_unlock();
 	if (read_seqretry(&rename_lock, seq))
 		goto rename_retry;
diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c
index ae769fc9b66c..9be6ec1f36d8 100644
--- a/fs/notify/fsnotify.c
+++ b/fs/notify/fsnotify.c
@@ -59,7 +59,6 @@ void __fsnotify_update_child_dentry_flags(struct inode *inode)
 	/* determine if the children should tell inode about their events */
 	watched = fsnotify_inode_watches_children(inode);
 
-	spin_lock(&dcache_lock);
 	spin_lock(&dcache_inode_lock);
 	/* run all of the dentries associated with this inode.  Since this is a
 	 * directory, there damn well better only be one item on this list */
@@ -84,7 +83,6 @@ void __fsnotify_update_child_dentry_flags(struct inode *inode)
 		spin_unlock(&alias->d_lock);
 	}
 	spin_unlock(&dcache_inode_lock);
-	spin_unlock(&dcache_lock);
 }
 
 /* Notify this dentry's parent about a child's events. */
diff --git a/fs/ocfs2/dcache.c b/fs/ocfs2/dcache.c
index c31b5c647ac7..b7de749bdd12 100644
--- a/fs/ocfs2/dcache.c
+++ b/fs/ocfs2/dcache.c
@@ -169,7 +169,6 @@ struct dentry *ocfs2_find_local_alias(struct inode *inode,
 	struct list_head *p;
 	struct dentry *dentry = NULL;
 
-	spin_lock(&dcache_lock);
 	spin_lock(&dcache_inode_lock);
 	list_for_each(p, &inode->i_dentry) {
 		dentry = list_entry(p, struct dentry, d_alias);
@@ -189,7 +188,6 @@ struct dentry *ocfs2_find_local_alias(struct inode *inode,
 	}
 
 	spin_unlock(&dcache_inode_lock);
-	spin_unlock(&dcache_lock);
 
 	return dentry;
 }
diff --git a/include/linux/dcache.h b/include/linux/dcache.h
index c963ebada922..a2ceb94b0e38 100644
--- a/include/linux/dcache.h
+++ b/include/linux/dcache.h
@@ -183,7 +183,6 @@ struct dentry_operations {
 #define DCACHE_GENOCIDE		0x0200
 
 extern spinlock_t dcache_inode_lock;
-extern spinlock_t dcache_lock;
 extern seqlock_t rename_lock;
 
 static inline int dname_external(struct dentry *dentry)
@@ -296,8 +295,8 @@ extern char *dentry_path(struct dentry *, char *, int);
  *	destroyed when it has references. dget() should never be
  *	called for dentries with zero reference counter. For these cases
  *	(preferably none, functions in dcache.c are sufficient for normal
- *	needs and they take necessary precautions) you should hold dcache_lock
- *	and call dget_locked() instead of dget().
+ *	needs and they take necessary precautions) you should hold d_lock
+ *	and call dget_dlock() instead of dget().
  */
 static inline struct dentry *dget_dlock(struct dentry *dentry)
 {
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 090f0eacde29..296cf2fde945 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1378,7 +1378,7 @@ struct super_block {
 #else
 	struct list_head	s_files;
 #endif
-	/* s_dentry_lru and s_nr_dentry_unused are protected by dcache_lock */
+	/* s_dentry_lru, s_nr_dentry_unused protected by dcache.c lru locks */
 	struct list_head	s_dentry_lru;	/* unused dentry lru */
 	int			s_nr_dentry_unused;	/* # of dentry on lru */
 
@@ -2446,6 +2446,10 @@ static inline ino_t parent_ino(struct dentry *dentry)
 {
 	ino_t res;
 
+	/*
+	 * Don't strictly need d_lock here? If the parent ino could change
+	 * then surely we'd have a deeper race in the caller?
+	 */
 	spin_lock(&dentry->d_lock);
 	res = dentry->d_parent->d_inode->i_ino;
 	spin_unlock(&dentry->d_lock);
diff --git a/include/linux/fsnotify.h b/include/linux/fsnotify.h
index b10bcdeaef76..2a53f10712b3 100644
--- a/include/linux/fsnotify.h
+++ b/include/linux/fsnotify.h
@@ -17,7 +17,6 @@
 
 /*
  * fsnotify_d_instantiate - instantiate a dentry for inode
- * Called with dcache_lock held.
  */
 static inline void fsnotify_d_instantiate(struct dentry *dentry,
 					  struct inode *inode)
@@ -62,7 +61,6 @@ static inline int fsnotify_perm(struct file *file, int mask)
 
 /*
  * fsnotify_d_move - dentry has been moved
- * Called with dcache_lock and dentry->d_lock held.
  */
 static inline void fsnotify_d_move(struct dentry *dentry)
 {
diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h
index 7380763595d3..69ad89b50489 100644
--- a/include/linux/fsnotify_backend.h
+++ b/include/linux/fsnotify_backend.h
@@ -329,9 +329,15 @@ static inline void __fsnotify_update_dcache_flags(struct dentry *dentry)
 {
 	struct dentry *parent;
 
-	assert_spin_locked(&dcache_lock);
 	assert_spin_locked(&dentry->d_lock);
 
+	/*
+	 * Serialisation of setting PARENT_WATCHED on the dentries is provided
+	 * by d_lock. If inotify_inode_watched changes after we have taken
+	 * d_lock, the following __fsnotify_update_child_dentry_flags call will
+	 * find our entry, so it will spin until we complete here, and update
+	 * us with the new state.
+	 */
 	parent = dentry->d_parent;
 	if (parent->d_inode && fsnotify_inode_watches_children(parent->d_inode))
 		dentry->d_flags |= DCACHE_FSNOTIFY_PARENT_WATCHED;
@@ -341,15 +347,12 @@ static inline void __fsnotify_update_dcache_flags(struct dentry *dentry)
 
 /*
  * fsnotify_d_instantiate - instantiate a dentry for inode
- * Called with dcache_lock held.
  */
 static inline void __fsnotify_d_instantiate(struct dentry *dentry, struct inode *inode)
 {
 	if (!inode)
 		return;
 
-	assert_spin_locked(&dcache_lock);
-
 	spin_lock(&dentry->d_lock);
 	__fsnotify_update_dcache_flags(dentry);
 	spin_unlock(&dentry->d_lock);
diff --git a/include/linux/namei.h b/include/linux/namei.h
index 05b441d93642..aec730b53935 100644
--- a/include/linux/namei.h
+++ b/include/linux/namei.h
@@ -41,7 +41,6 @@ enum {LAST_NORM, LAST_ROOT, LAST_DOT, LAST_DOTDOT, LAST_BIND};
  *  - require a directory
  *  - ending slashes ok even for nonexistent files
  *  - internal "there are more path components" flag
- *  - locked when lookup done with dcache_lock held
  *  - dentry cache is untrusted; force a real lookup
  */
 #define LOOKUP_FOLLOW		 1
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 7b4705b51d4a..1864cb6a6a59 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -876,7 +876,6 @@ static void cgroup_clear_directory(struct dentry *dentry)
 	struct list_head *node;
 
 	BUG_ON(!mutex_is_locked(&dentry->d_inode->i_mutex));
-	spin_lock(&dcache_lock);
 	spin_lock(&dentry->d_lock);
 	node = dentry->d_subdirs.next;
 	while (node != &dentry->d_subdirs) {
@@ -891,18 +890,15 @@ static void cgroup_clear_directory(struct dentry *dentry)
 			dget_locked_dlock(d);
 			spin_unlock(&d->d_lock);
 			spin_unlock(&dentry->d_lock);
-			spin_unlock(&dcache_lock);
 			d_delete(d);
 			simple_unlink(dentry->d_inode, d);
 			dput(d);
-			spin_lock(&dcache_lock);
 			spin_lock(&dentry->d_lock);
 		} else
 			spin_unlock(&d->d_lock);
 		node = dentry->d_subdirs.next;
 	}
 	spin_unlock(&dentry->d_lock);
-	spin_unlock(&dcache_lock);
 }
 
 /*
@@ -914,14 +910,12 @@ static void cgroup_d_remove_dir(struct dentry *dentry)
 
 	cgroup_clear_directory(dentry);
 
-	spin_lock(&dcache_lock);
 	parent = dentry->d_parent;
 	spin_lock(&parent->d_lock);
 	spin_lock(&dentry->d_lock);
 	list_del_init(&dentry->d_u.d_child);
 	spin_unlock(&dentry->d_lock);
 	spin_unlock(&parent->d_lock);
-	spin_unlock(&dcache_lock);
 	remove_dir(dentry);
 }
 
diff --git a/mm/filemap.c b/mm/filemap.c
index 6b9aee20f242..ca389394fa2a 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -102,9 +102,6 @@
  *    ->inode_lock		(zap_pte_range->set_page_dirty)
  *    ->private_lock		(zap_pte_range->__set_page_dirty_buffers)
  *
- *  ->task->proc_lock
- *    ->dcache_lock		(proc_pid_lookup)
- *
  *  (code doesn't rely on that order, so you could switch it around)
  *  ->tasklist_lock             (memory_failure, collect_procs_ao)
  *    ->i_mmap_lock
diff --git a/security/selinux/selinuxfs.c b/security/selinux/selinuxfs.c
index 017ec096446e..2285d693f296 100644
--- a/security/selinux/selinuxfs.c
+++ b/security/selinux/selinuxfs.c
@@ -1145,7 +1145,6 @@ static void sel_remove_entries(struct dentry *de)
 {
 	struct list_head *node;
 
-	spin_lock(&dcache_lock);
 	spin_lock(&de->d_lock);
 	node = de->d_subdirs.next;
 	while (node != &de->d_subdirs) {
@@ -1158,11 +1157,9 @@ static void sel_remove_entries(struct dentry *de)
 			dget_locked_dlock(d);
 			spin_unlock(&de->d_lock);
 			spin_unlock(&d->d_lock);
-			spin_unlock(&dcache_lock);
 			d_delete(d);
 			simple_unlink(de->d_inode, d);
 			dput(d);
-			spin_lock(&dcache_lock);
 			spin_lock(&de->d_lock);
 		} else
 			spin_unlock(&d->d_lock);
@@ -1170,7 +1167,6 @@ static void sel_remove_entries(struct dentry *de)
 	}
 
 	spin_unlock(&de->d_lock);
-	spin_unlock(&dcache_lock);
 }
 
 #define BOOL_DIR_NAME "booleans"
-- 
cgit v1.2.3-71-gd317


From dc0474be3e27463d4d4a2793f82366eed906f223 Mon Sep 17 00:00:00 2001
From: Nick Piggin <npiggin@kernel.dk>
Date: Fri, 7 Jan 2011 17:49:43 +1100
Subject: fs: dcache rationalise dget variants

dget_locked was a shortcut to avoid the lazy lru manipulation when we already
held dcache_lock (lru manipulation was relatively cheap at that point).
However, how that the lru lock is an innermost one, we never hold it at any
caller, so the lock cost can now be avoided. We already have well working lazy
dcache LRU, so it should be fine to defer LRU manipulations to scan time.

Signed-off-by: Nick Piggin <npiggin@kernel.dk>
---
 arch/powerpc/platforms/cell/spufs/inode.c |  2 +-
 drivers/infiniband/hw/ipath/ipath_fs.c    |  2 +-
 drivers/infiniband/hw/qib/qib_fs.c        |  2 +-
 drivers/staging/smbfs/cache.c             |  2 +-
 fs/configfs/inode.c                       |  2 +-
 fs/dcache.c                               | 36 ++++++++++---------------------
 fs/exportfs/expfs.c                       |  2 +-
 fs/ncpfs/dir.c                            |  2 +-
 fs/ocfs2/dcache.c                         |  2 +-
 include/linux/dcache.h                    | 15 +++----------
 kernel/cgroup.c                           |  2 +-
 security/selinux/selinuxfs.c              |  2 +-
 12 files changed, 24 insertions(+), 47 deletions(-)

(limited to 'include/linux')

diff --git a/arch/powerpc/platforms/cell/spufs/inode.c b/arch/powerpc/platforms/cell/spufs/inode.c
index 2662b50ea8d4..03185de7cd4a 100644
--- a/arch/powerpc/platforms/cell/spufs/inode.c
+++ b/arch/powerpc/platforms/cell/spufs/inode.c
@@ -161,7 +161,7 @@ static void spufs_prune_dir(struct dentry *dir)
 	list_for_each_entry_safe(dentry, tmp, &dir->d_subdirs, d_u.d_child) {
 		spin_lock(&dentry->d_lock);
 		if (!(d_unhashed(dentry)) && dentry->d_inode) {
-			dget_locked_dlock(dentry);
+			dget_dlock(dentry);
 			__d_drop(dentry);
 			spin_unlock(&dentry->d_lock);
 			simple_unlink(dir->d_inode, dentry);
diff --git a/drivers/infiniband/hw/ipath/ipath_fs.c b/drivers/infiniband/hw/ipath/ipath_fs.c
index 925e88227ded..31ae1b108aea 100644
--- a/drivers/infiniband/hw/ipath/ipath_fs.c
+++ b/drivers/infiniband/hw/ipath/ipath_fs.c
@@ -279,7 +279,7 @@ static int remove_file(struct dentry *parent, char *name)
 
 	spin_lock(&tmp->d_lock);
 	if (!(d_unhashed(tmp) && tmp->d_inode)) {
-		dget_locked_dlock(tmp);
+		dget_dlock(tmp);
 		__d_drop(tmp);
 		spin_unlock(&tmp->d_lock);
 		simple_unlink(parent->d_inode, tmp);
diff --git a/drivers/infiniband/hw/qib/qib_fs.c b/drivers/infiniband/hw/qib/qib_fs.c
index 49af4a6538ba..df7fa251dcdc 100644
--- a/drivers/infiniband/hw/qib/qib_fs.c
+++ b/drivers/infiniband/hw/qib/qib_fs.c
@@ -455,7 +455,7 @@ static int remove_file(struct dentry *parent, char *name)
 
 	spin_lock(&tmp->d_lock);
 	if (!(d_unhashed(tmp) && tmp->d_inode)) {
-		dget_locked_dlock(tmp);
+		dget_dlock(tmp);
 		__d_drop(tmp);
 		spin_unlock(&tmp->d_lock);
 		simple_unlink(parent->d_inode, tmp);
diff --git a/drivers/staging/smbfs/cache.c b/drivers/staging/smbfs/cache.c
index 75dfd403fb90..f2a1323ca827 100644
--- a/drivers/staging/smbfs/cache.c
+++ b/drivers/staging/smbfs/cache.c
@@ -102,7 +102,7 @@ smb_dget_fpos(struct dentry *dentry, struct dentry *parent, unsigned long fpos)
 		dent = list_entry(next, struct dentry, d_u.d_child);
 		if ((unsigned long)dent->d_fsdata == fpos) {
 			if (dent->d_inode)
-				dget_locked(dent);
+				dget(dent);
 			else
 				dent = NULL;
 			goto out_unlock;
diff --git a/fs/configfs/inode.c b/fs/configfs/inode.c
index fb3a55fff82a..c83f4768eeaa 100644
--- a/fs/configfs/inode.c
+++ b/fs/configfs/inode.c
@@ -252,7 +252,7 @@ void configfs_drop_dentry(struct configfs_dirent * sd, struct dentry * parent)
 	if (dentry) {
 		spin_lock(&dentry->d_lock);
 		if (!(d_unhashed(dentry) && dentry->d_inode)) {
-			dget_locked_dlock(dentry);
+			dget_dlock(dentry);
 			__d_drop(dentry);
 			spin_unlock(&dentry->d_lock);
 			simple_unlink(parent->d_inode, dentry);
diff --git a/fs/dcache.c b/fs/dcache.c
index 01f016799fd4..b4d2e28eef5b 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -429,32 +429,17 @@ int d_invalidate(struct dentry * dentry)
 EXPORT_SYMBOL(d_invalidate);
 
 /* This must be called with d_lock held */
-static inline struct dentry * __dget_locked_dlock(struct dentry *dentry)
+static inline void __dget_dlock(struct dentry *dentry)
 {
 	dentry->d_count++;
-	dentry_lru_del(dentry);
-	return dentry;
 }
 
-/* This must be called with d_lock held */
-static inline struct dentry * __dget_locked(struct dentry *dentry)
+static inline void __dget(struct dentry *dentry)
 {
 	spin_lock(&dentry->d_lock);
-	__dget_locked_dlock(dentry);
+	__dget_dlock(dentry);
 	spin_unlock(&dentry->d_lock);
-	return dentry;
-}
-
-struct dentry * dget_locked_dlock(struct dentry *dentry)
-{
-	return __dget_locked_dlock(dentry);
-}
-
-struct dentry * dget_locked(struct dentry *dentry)
-{
-	return __dget_locked(dentry);
 }
-EXPORT_SYMBOL(dget_locked);
 
 struct dentry *dget_parent(struct dentry *dentry)
 {
@@ -512,7 +497,7 @@ again:
 			    (alias->d_flags & DCACHE_DISCONNECTED)) {
 				discon_alias = alias;
 			} else if (!want_discon) {
-				__dget_locked_dlock(alias);
+				__dget_dlock(alias);
 				spin_unlock(&alias->d_lock);
 				return alias;
 			}
@@ -525,7 +510,7 @@ again:
 		if (S_ISDIR(inode->i_mode) || !d_unhashed(alias)) {
 			if (IS_ROOT(alias) &&
 			    (alias->d_flags & DCACHE_DISCONNECTED)) {
-				__dget_locked_dlock(alias);
+				__dget_dlock(alias);
 				spin_unlock(&alias->d_lock);
 				return alias;
 			}
@@ -561,7 +546,7 @@ restart:
 	list_for_each_entry(dentry, &inode->i_dentry, d_alias) {
 		spin_lock(&dentry->d_lock);
 		if (!dentry->d_count) {
-			__dget_locked_dlock(dentry);
+			__dget_dlock(dentry);
 			__d_drop(dentry);
 			spin_unlock(&dentry->d_lock);
 			spin_unlock(&dcache_inode_lock);
@@ -1257,7 +1242,8 @@ struct dentry *d_alloc(struct dentry * parent, const struct qstr *name)
 		 * don't need child lock because it is not subject
 		 * to concurrency here
 		 */
-		dentry->d_parent = dget_dlock(parent);
+		__dget_dlock(parent);
+		dentry->d_parent = parent;
 		dentry->d_sb = parent->d_sb;
 		list_add(&dentry->d_u.d_child, &parent->d_subdirs);
 		spin_unlock(&parent->d_lock);
@@ -1360,7 +1346,7 @@ static struct dentry *__d_instantiate_unique(struct dentry *entry,
 			continue;
 		if (memcmp(qstr->name, name, len))
 			continue;
-		dget_locked(alias);
+		__dget(alias);
 		return alias;
 	}
 
@@ -1613,7 +1599,7 @@ struct dentry *d_add_ci(struct dentry *dentry, struct inode *inode,
 	 * reference to it, move it in place and use it.
 	 */
 	new = list_entry(inode->i_dentry.next, struct dentry, d_alias);
-	dget_locked(new);
+	__dget(new);
 	spin_unlock(&dcache_inode_lock);
 	security_d_instantiate(found, inode);
 	d_move(new, found);
@@ -1789,7 +1775,7 @@ int d_validate(struct dentry *dentry, struct dentry *dparent)
 	list_for_each_entry(child, &dparent->d_subdirs, d_u.d_child) {
 		if (dentry == child) {
 			spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
-			__dget_locked_dlock(dentry);
+			__dget_dlock(dentry);
 			spin_unlock(&dentry->d_lock);
 			spin_unlock(&dparent->d_lock);
 			return 1;
diff --git a/fs/exportfs/expfs.c b/fs/exportfs/expfs.c
index 53a5c08fb63c..f06a940065f6 100644
--- a/fs/exportfs/expfs.c
+++ b/fs/exportfs/expfs.c
@@ -49,7 +49,7 @@ find_acceptable_alias(struct dentry *result,
 
 	spin_lock(&dcache_inode_lock);
 	list_for_each_entry(dentry, &result->d_inode->i_dentry, d_alias) {
-		dget_locked(dentry);
+		dget(dentry);
 		spin_unlock(&dcache_inode_lock);
 		if (toput)
 			dput(toput);
diff --git a/fs/ncpfs/dir.c b/fs/ncpfs/dir.c
index de15c533311c..0ba3cdc95a44 100644
--- a/fs/ncpfs/dir.c
+++ b/fs/ncpfs/dir.c
@@ -397,7 +397,7 @@ ncp_dget_fpos(struct dentry *dentry, struct dentry *parent, unsigned long fpos)
 		dent = list_entry(next, struct dentry, d_u.d_child);
 		if ((unsigned long)dent->d_fsdata == fpos) {
 			if (dent->d_inode)
-				dget_locked(dent);
+				dget(dent);
 			else
 				dent = NULL;
 			spin_unlock(&parent->d_lock);
diff --git a/fs/ocfs2/dcache.c b/fs/ocfs2/dcache.c
index b7de749bdd12..4d54c60ceee4 100644
--- a/fs/ocfs2/dcache.c
+++ b/fs/ocfs2/dcache.c
@@ -178,7 +178,7 @@ struct dentry *ocfs2_find_local_alias(struct inode *inode,
 			mlog(0, "dentry found: %.*s\n",
 			     dentry->d_name.len, dentry->d_name.name);
 
-			dget_locked_dlock(dentry);
+			dget_dlock(dentry);
 			spin_unlock(&dentry->d_lock);
 			break;
 		}
diff --git a/include/linux/dcache.h b/include/linux/dcache.h
index a2ceb94b0e38..ca648685f0cc 100644
--- a/include/linux/dcache.h
+++ b/include/linux/dcache.h
@@ -287,23 +287,17 @@ extern char *dentry_path(struct dentry *, char *, int);
 /* Allocation counts.. */
 
 /**
- *	dget, dget_locked	-	get a reference to a dentry
+ *	dget, dget_dlock -	get a reference to a dentry
  *	@dentry: dentry to get a reference to
  *
  *	Given a dentry or %NULL pointer increment the reference count
  *	if appropriate and return the dentry. A dentry will not be 
- *	destroyed when it has references. dget() should never be
- *	called for dentries with zero reference counter. For these cases
- *	(preferably none, functions in dcache.c are sufficient for normal
- *	needs and they take necessary precautions) you should hold d_lock
- *	and call dget_dlock() instead of dget().
+ *	destroyed when it has references.
  */
 static inline struct dentry *dget_dlock(struct dentry *dentry)
 {
-	if (dentry) {
-		BUG_ON(!dentry->d_count);
+	if (dentry)
 		dentry->d_count++;
-	}
 	return dentry;
 }
 
@@ -317,9 +311,6 @@ static inline struct dentry *dget(struct dentry *dentry)
 	return dentry;
 }
 
-extern struct dentry * dget_locked(struct dentry *);
-extern struct dentry * dget_locked_dlock(struct dentry *);
-
 extern struct dentry *dget_parent(struct dentry *dentry);
 
 /**
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 1864cb6a6a59..9f41470c3949 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -887,7 +887,7 @@ static void cgroup_clear_directory(struct dentry *dentry)
 			/* This should never be called on a cgroup
 			 * directory with child cgroups */
 			BUG_ON(d->d_inode->i_mode & S_IFDIR);
-			dget_locked_dlock(d);
+			dget_dlock(d);
 			spin_unlock(&d->d_lock);
 			spin_unlock(&dentry->d_lock);
 			d_delete(d);
diff --git a/security/selinux/selinuxfs.c b/security/selinux/selinuxfs.c
index 2285d693f296..43deac219491 100644
--- a/security/selinux/selinuxfs.c
+++ b/security/selinux/selinuxfs.c
@@ -1154,7 +1154,7 @@ static void sel_remove_entries(struct dentry *de)
 		list_del_init(node);
 
 		if (d->d_inode) {
-			dget_locked_dlock(d);
+			dget_dlock(d);
 			spin_unlock(&de->d_lock);
 			spin_unlock(&d->d_lock);
 			d_delete(d);
-- 
cgit v1.2.3-71-gd317


From fa0d7e3de6d6fc5004ad9dea0dd6b286af8f03e9 Mon Sep 17 00:00:00 2001
From: Nick Piggin <npiggin@kernel.dk>
Date: Fri, 7 Jan 2011 17:49:49 +1100
Subject: fs: icache RCU free inodes

RCU free the struct inode. This will allow:

- Subsequent store-free path walking patch. The inode must be consulted for
  permissions when walking, so an RCU inode reference is a must.
- sb_inode_list_lock to be moved inside i_lock because sb list walkers who want
  to take i_lock no longer need to take sb_inode_list_lock to walk the list in
  the first place. This will simplify and optimize locking.
- Could remove some nested trylock loops in dcache code
- Could potentially simplify things a bit in VM land. Do not need to take the
  page lock to follow page->mapping.

The downsides of this is the performance cost of using RCU. In a simple
creat/unlink microbenchmark, performance drops by about 10% due to inability to
reuse cache-hot slab objects. As iterations increase and RCU freeing starts
kicking over, this increases to about 20%.

In cases where inode lifetimes are longer (ie. many inodes may be allocated
during the average life span of a single inode), a lot of this cache reuse is
not applicable, so the regression caused by this patch is smaller.

The cache-hot regression could largely be avoided by using SLAB_DESTROY_BY_RCU,
however this adds some complexity to list walking and store-free path walking,
so I prefer to implement this at a later date, if it is shown to be a win in
real situations. I haven't found a regression in any non-micro benchmark so I
doubt it will be a problem.

Signed-off-by: Nick Piggin <npiggin@kernel.dk>
---
 Documentation/filesystems/porting         | 14 ++++++++++++++
 arch/powerpc/platforms/cell/spufs/inode.c | 10 ++++++++--
 drivers/staging/pohmelfs/inode.c          |  9 ++++++++-
 drivers/staging/smbfs/inode.c             |  9 ++++++++-
 fs/9p/vfs_inode.c                         |  9 ++++++++-
 fs/adfs/super.c                           |  9 ++++++++-
 fs/affs/super.c                           |  9 ++++++++-
 fs/afs/super.c                            | 10 +++++++++-
 fs/befs/linuxvfs.c                        | 10 ++++++++--
 fs/bfs/inode.c                            |  9 ++++++++-
 fs/block_dev.c                            |  9 ++++++++-
 fs/btrfs/inode.c                          |  9 ++++++++-
 fs/ceph/inode.c                           | 11 ++++++++++-
 fs/cifs/cifsfs.c                          |  9 ++++++++-
 fs/coda/inode.c                           |  9 ++++++++-
 fs/ecryptfs/super.c                       | 12 +++++++++++-
 fs/efs/super.c                            |  9 ++++++++-
 fs/exofs/super.c                          |  9 ++++++++-
 fs/ext2/super.c                           |  9 ++++++++-
 fs/ext3/super.c                           |  9 ++++++++-
 fs/ext4/super.c                           |  9 ++++++++-
 fs/fat/inode.c                            |  9 ++++++++-
 fs/freevxfs/vxfs_inode.c                  |  9 ++++++++-
 fs/fuse/inode.c                           |  9 ++++++++-
 fs/gfs2/super.c                           |  9 ++++++++-
 fs/hfs/super.c                            |  9 ++++++++-
 fs/hfsplus/super.c                        | 10 +++++++++-
 fs/hostfs/hostfs_kern.c                   |  9 ++++++++-
 fs/hpfs/super.c                           |  9 ++++++++-
 fs/hppfs/hppfs.c                          |  9 ++++++++-
 fs/hugetlbfs/inode.c                      |  9 ++++++++-
 fs/inode.c                                | 10 +++++++++-
 fs/isofs/inode.c                          |  9 ++++++++-
 fs/jffs2/super.c                          |  9 ++++++++-
 fs/jfs/super.c                            | 10 +++++++++-
 fs/logfs/inode.c                          |  9 ++++++++-
 fs/minix/inode.c                          |  9 ++++++++-
 fs/ncpfs/inode.c                          |  9 ++++++++-
 fs/nfs/inode.c                            |  9 ++++++++-
 fs/nilfs2/super.c                         | 10 +++++++++-
 fs/ntfs/inode.c                           |  9 ++++++++-
 fs/ocfs2/dlmfs/dlmfs.c                    |  9 ++++++++-
 fs/ocfs2/super.c                          |  9 ++++++++-
 fs/openpromfs/inode.c                     |  9 ++++++++-
 fs/proc/inode.c                           |  9 ++++++++-
 fs/qnx4/inode.c                           |  9 ++++++++-
 fs/reiserfs/super.c                       |  9 ++++++++-
 fs/romfs/super.c                          |  9 ++++++++-
 fs/squashfs/super.c                       |  9 ++++++++-
 fs/sysv/inode.c                           |  9 ++++++++-
 fs/ubifs/super.c                          | 10 +++++++++-
 fs/udf/super.c                            |  9 ++++++++-
 fs/ufs/super.c                            |  9 ++++++++-
 fs/xfs/xfs_iget.c                         | 13 ++++++++++++-
 include/linux/fs.h                        |  5 ++++-
 include/linux/net.h                       |  1 -
 ipc/mqueue.c                              |  9 ++++++++-
 mm/shmem.c                                |  9 ++++++++-
 net/socket.c                              | 16 ++++++++--------
 net/sunrpc/rpc_pipe.c                     | 10 +++++++++-
 60 files changed, 490 insertions(+), 68 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/filesystems/porting b/Documentation/filesystems/porting
index 1eb76959d096..ccf0ce7866b9 100644
--- a/Documentation/filesystems/porting
+++ b/Documentation/filesystems/porting
@@ -346,3 +346,17 @@ look at examples of other filesystems) for guidance.
 for details of what locks to replace dcache_lock with in order to protect
 particular things. Most of the time, a filesystem only needs ->d_lock, which
 protects *all* the dcache state of a given dentry.
+
+--
+[mandatory]
+
+	Filesystems must RCU-free their inodes, if they can have been accessed
+via rcu-walk path walk (basically, if the file can have had a path name in the
+vfs namespace).
+
+	i_dentry and i_rcu share storage in a union, and the vfs expects
+i_dentry to be reinitialized before it is freed, so an:
+
+  INIT_LIST_HEAD(&inode->i_dentry);
+
+must be done in the RCU callback.
diff --git a/arch/powerpc/platforms/cell/spufs/inode.c b/arch/powerpc/platforms/cell/spufs/inode.c
index 03185de7cd4a..856e9c398068 100644
--- a/arch/powerpc/platforms/cell/spufs/inode.c
+++ b/arch/powerpc/platforms/cell/spufs/inode.c
@@ -71,12 +71,18 @@ spufs_alloc_inode(struct super_block *sb)
 	return &ei->vfs_inode;
 }
 
-static void
-spufs_destroy_inode(struct inode *inode)
+static void spufs_i_callback(struct rcu_head *head)
 {
+	struct inode *inode = container_of(head, struct inode, i_rcu);
+	INIT_LIST_HEAD(&inode->i_dentry);
 	kmem_cache_free(spufs_inode_cache, SPUFS_I(inode));
 }
 
+static void spufs_destroy_inode(struct inode *inode)
+{
+	call_rcu(&inode->i_rcu, spufs_i_callback);
+}
+
 static void
 spufs_init_once(void *p)
 {
diff --git a/drivers/staging/pohmelfs/inode.c b/drivers/staging/pohmelfs/inode.c
index 61685ccceda8..cc8d2840f9b6 100644
--- a/drivers/staging/pohmelfs/inode.c
+++ b/drivers/staging/pohmelfs/inode.c
@@ -826,6 +826,13 @@ const struct address_space_operations pohmelfs_aops = {
 	.set_page_dirty 	= __set_page_dirty_nobuffers,
 };
 
+static void pohmelfs_i_callback(struct rcu_head *head)
+{
+	struct inode *inode = container_of(head, struct inode, i_rcu);
+	INIT_LIST_HEAD(&inode->i_dentry);
+	kmem_cache_free(pohmelfs_inode_cache, POHMELFS_I(inode));
+}
+
 /*
  * ->detroy_inode() callback. Deletes inode from the caches
  *  and frees private data.
@@ -842,8 +849,8 @@ static void pohmelfs_destroy_inode(struct inode *inode)
 
 	dprintk("%s: pi: %p, inode: %p, ino: %llu.\n",
 		__func__, pi, &pi->vfs_inode, pi->ino);
-	kmem_cache_free(pohmelfs_inode_cache, pi);
 	atomic_long_dec(&psb->total_inodes);
+	call_rcu(&inode->i_rcu, pohmelfs_i_callback);
 }
 
 /*
diff --git a/drivers/staging/smbfs/inode.c b/drivers/staging/smbfs/inode.c
index 540a984bb516..244319dc9702 100644
--- a/drivers/staging/smbfs/inode.c
+++ b/drivers/staging/smbfs/inode.c
@@ -62,11 +62,18 @@ static struct inode *smb_alloc_inode(struct super_block *sb)
 	return &ei->vfs_inode;
 }
 
-static void smb_destroy_inode(struct inode *inode)
+static void smb_i_callback(struct rcu_head *head)
 {
+	struct inode *inode = container_of(head, struct inode, i_rcu);
+	INIT_LIST_HEAD(&inode->i_dentry);
 	kmem_cache_free(smb_inode_cachep, SMB_I(inode));
 }
 
+static void smb_destroy_inode(struct inode *inode)
+{
+	call_rcu(&inode->i_rcu, smb_i_callback);
+}
+
 static void init_once(void *foo)
 {
 	struct smb_inode_info *ei = (struct smb_inode_info *) foo;
diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c
index 1073bca8488c..f6f9081e6d2c 100644
--- a/fs/9p/vfs_inode.c
+++ b/fs/9p/vfs_inode.c
@@ -237,10 +237,17 @@ struct inode *v9fs_alloc_inode(struct super_block *sb)
  *
  */
 
-void v9fs_destroy_inode(struct inode *inode)
+static void v9fs_i_callback(struct rcu_head *head)
 {
+	struct inode *inode = container_of(head, struct inode, i_rcu);
+	INIT_LIST_HEAD(&inode->i_dentry);
 	kmem_cache_free(vcookie_cache, v9fs_inode2cookie(inode));
 }
+
+void v9fs_destroy_inode(struct inode *inode)
+{
+	call_rcu(&inode->i_rcu, v9fs_i_callback);
+}
 #endif
 
 /**
diff --git a/fs/adfs/super.c b/fs/adfs/super.c
index 959dbff2d42d..47dffc513a26 100644
--- a/fs/adfs/super.c
+++ b/fs/adfs/super.c
@@ -240,11 +240,18 @@ static struct inode *adfs_alloc_inode(struct super_block *sb)
 	return &ei->vfs_inode;
 }
 
-static void adfs_destroy_inode(struct inode *inode)
+static void adfs_i_callback(struct rcu_head *head)
 {
+	struct inode *inode = container_of(head, struct inode, i_rcu);
+	INIT_LIST_HEAD(&inode->i_dentry);
 	kmem_cache_free(adfs_inode_cachep, ADFS_I(inode));
 }
 
+static void adfs_destroy_inode(struct inode *inode)
+{
+	call_rcu(&inode->i_rcu, adfs_i_callback);
+}
+
 static void init_once(void *foo)
 {
 	struct adfs_inode_info *ei = (struct adfs_inode_info *) foo;
diff --git a/fs/affs/super.c b/fs/affs/super.c
index 0cf7f4384cbd..4c18fcfb0a19 100644
--- a/fs/affs/super.c
+++ b/fs/affs/super.c
@@ -95,11 +95,18 @@ static struct inode *affs_alloc_inode(struct super_block *sb)
 	return &i->vfs_inode;
 }
 
-static void affs_destroy_inode(struct inode *inode)
+static void affs_i_callback(struct rcu_head *head)
 {
+	struct inode *inode = container_of(head, struct inode, i_rcu);
+	INIT_LIST_HEAD(&inode->i_dentry);
 	kmem_cache_free(affs_inode_cachep, AFFS_I(inode));
 }
 
+static void affs_destroy_inode(struct inode *inode)
+{
+	call_rcu(&inode->i_rcu, affs_i_callback);
+}
+
 static void init_once(void *foo)
 {
 	struct affs_inode_info *ei = (struct affs_inode_info *) foo;
diff --git a/fs/afs/super.c b/fs/afs/super.c
index 27201cffece4..f901a9d7c111 100644
--- a/fs/afs/super.c
+++ b/fs/afs/super.c
@@ -498,6 +498,14 @@ static struct inode *afs_alloc_inode(struct super_block *sb)
 	return &vnode->vfs_inode;
 }
 
+static void afs_i_callback(struct rcu_head *head)
+{
+	struct inode *inode = container_of(head, struct inode, i_rcu);
+	struct afs_vnode *vnode = AFS_FS_I(inode);
+	INIT_LIST_HEAD(&inode->i_dentry);
+	kmem_cache_free(afs_inode_cachep, vnode);
+}
+
 /*
  * destroy an AFS inode struct
  */
@@ -511,7 +519,7 @@ static void afs_destroy_inode(struct inode *inode)
 
 	ASSERTCMP(vnode->server, ==, NULL);
 
-	kmem_cache_free(afs_inode_cachep, vnode);
+	call_rcu(&inode->i_rcu, afs_i_callback);
 	atomic_dec(&afs_count_active_inodes);
 }
 
diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c
index aa4e7c7ae3c6..de93581b79a2 100644
--- a/fs/befs/linuxvfs.c
+++ b/fs/befs/linuxvfs.c
@@ -284,12 +284,18 @@ befs_alloc_inode(struct super_block *sb)
         return &bi->vfs_inode;
 }
 
-static void
-befs_destroy_inode(struct inode *inode)
+static void befs_i_callback(struct rcu_head *head)
 {
+	struct inode *inode = container_of(head, struct inode, i_rcu);
+	INIT_LIST_HEAD(&inode->i_dentry);
         kmem_cache_free(befs_inode_cachep, BEFS_I(inode));
 }
 
+static void befs_destroy_inode(struct inode *inode)
+{
+	call_rcu(&inode->i_rcu, befs_i_callback);
+}
+
 static void init_once(void *foo)
 {
         struct befs_inode_info *bi = (struct befs_inode_info *) foo;
diff --git a/fs/bfs/inode.c b/fs/bfs/inode.c
index 76db6d7d49bb..a8e37f81d097 100644
--- a/fs/bfs/inode.c
+++ b/fs/bfs/inode.c
@@ -248,11 +248,18 @@ static struct inode *bfs_alloc_inode(struct super_block *sb)
 	return &bi->vfs_inode;
 }
 
-static void bfs_destroy_inode(struct inode *inode)
+static void bfs_i_callback(struct rcu_head *head)
 {
+	struct inode *inode = container_of(head, struct inode, i_rcu);
+	INIT_LIST_HEAD(&inode->i_dentry);
 	kmem_cache_free(bfs_inode_cachep, BFS_I(inode));
 }
 
+static void bfs_destroy_inode(struct inode *inode)
+{
+	call_rcu(&inode->i_rcu, bfs_i_callback);
+}
+
 static void init_once(void *foo)
 {
 	struct bfs_inode_info *bi = foo;
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 4230252fd689..771f23527010 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -409,13 +409,20 @@ static struct inode *bdev_alloc_inode(struct super_block *sb)
 	return &ei->vfs_inode;
 }
 
-static void bdev_destroy_inode(struct inode *inode)
+static void bdev_i_callback(struct rcu_head *head)
 {
+	struct inode *inode = container_of(head, struct inode, i_rcu);
 	struct bdev_inode *bdi = BDEV_I(inode);
 
+	INIT_LIST_HEAD(&inode->i_dentry);
 	kmem_cache_free(bdev_cachep, bdi);
 }
 
+static void bdev_destroy_inode(struct inode *inode)
+{
+	call_rcu(&inode->i_rcu, bdev_i_callback);
+}
+
 static void init_once(void *foo)
 {
 	struct bdev_inode *ei = (struct bdev_inode *) foo;
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 7ce9f8932789..f9d2994a42a2 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -6495,6 +6495,13 @@ struct inode *btrfs_alloc_inode(struct super_block *sb)
 	return inode;
 }
 
+static void btrfs_i_callback(struct rcu_head *head)
+{
+	struct inode *inode = container_of(head, struct inode, i_rcu);
+	INIT_LIST_HEAD(&inode->i_dentry);
+	kmem_cache_free(btrfs_inode_cachep, BTRFS_I(inode));
+}
+
 void btrfs_destroy_inode(struct inode *inode)
 {
 	struct btrfs_ordered_extent *ordered;
@@ -6564,7 +6571,7 @@ void btrfs_destroy_inode(struct inode *inode)
 	inode_tree_del(inode);
 	btrfs_drop_extent_cache(inode, 0, (u64)-1, 0);
 free:
-	kmem_cache_free(btrfs_inode_cachep, BTRFS_I(inode));
+	call_rcu(&inode->i_rcu, btrfs_i_callback);
 }
 
 int btrfs_drop_inode(struct inode *inode)
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index 2a48cafc174b..47f8c8baf3b5 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -368,6 +368,15 @@ struct inode *ceph_alloc_inode(struct super_block *sb)
 	return &ci->vfs_inode;
 }
 
+static void ceph_i_callback(struct rcu_head *head)
+{
+	struct inode *inode = container_of(head, struct inode, i_rcu);
+	struct ceph_inode_info *ci = ceph_inode(inode);
+
+	INIT_LIST_HEAD(&inode->i_dentry);
+	kmem_cache_free(ceph_inode_cachep, ci);
+}
+
 void ceph_destroy_inode(struct inode *inode)
 {
 	struct ceph_inode_info *ci = ceph_inode(inode);
@@ -407,7 +416,7 @@ void ceph_destroy_inode(struct inode *inode)
 	if (ci->i_xattrs.prealloc_blob)
 		ceph_buffer_put(ci->i_xattrs.prealloc_blob);
 
-	kmem_cache_free(ceph_inode_cachep, ci);
+	call_rcu(&inode->i_rcu, ceph_i_callback);
 }
 
 
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index 3936aa7f2c22..223717dcc401 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -334,10 +334,17 @@ cifs_alloc_inode(struct super_block *sb)
 	return &cifs_inode->vfs_inode;
 }
 
+static void cifs_i_callback(struct rcu_head *head)
+{
+	struct inode *inode = container_of(head, struct inode, i_rcu);
+	INIT_LIST_HEAD(&inode->i_dentry);
+	kmem_cache_free(cifs_inode_cachep, CIFS_I(inode));
+}
+
 static void
 cifs_destroy_inode(struct inode *inode)
 {
-	kmem_cache_free(cifs_inode_cachep, CIFS_I(inode));
+	call_rcu(&inode->i_rcu, cifs_i_callback);
 }
 
 static void
diff --git a/fs/coda/inode.c b/fs/coda/inode.c
index 5ea57c8c7f97..50dc7d189f56 100644
--- a/fs/coda/inode.c
+++ b/fs/coda/inode.c
@@ -56,11 +56,18 @@ static struct inode *coda_alloc_inode(struct super_block *sb)
 	return &ei->vfs_inode;
 }
 
-static void coda_destroy_inode(struct inode *inode)
+static void coda_i_callback(struct rcu_head *head)
 {
+	struct inode *inode = container_of(head, struct inode, i_rcu);
+	INIT_LIST_HEAD(&inode->i_dentry);
 	kmem_cache_free(coda_inode_cachep, ITOC(inode));
 }
 
+static void coda_destroy_inode(struct inode *inode)
+{
+	call_rcu(&inode->i_rcu, coda_i_callback);
+}
+
 static void init_once(void *foo)
 {
 	struct coda_inode_info *ei = (struct coda_inode_info *) foo;
diff --git a/fs/ecryptfs/super.c b/fs/ecryptfs/super.c
index 2720178b7718..3042fe123a34 100644
--- a/fs/ecryptfs/super.c
+++ b/fs/ecryptfs/super.c
@@ -62,6 +62,16 @@ out:
 	return inode;
 }
 
+static void ecryptfs_i_callback(struct rcu_head *head)
+{
+	struct inode *inode = container_of(head, struct inode, i_rcu);
+	struct ecryptfs_inode_info *inode_info;
+	inode_info = ecryptfs_inode_to_private(inode);
+
+	INIT_LIST_HEAD(&inode->i_dentry);
+	kmem_cache_free(ecryptfs_inode_info_cache, inode_info);
+}
+
 /**
  * ecryptfs_destroy_inode
  * @inode: The ecryptfs inode
@@ -88,7 +98,7 @@ static void ecryptfs_destroy_inode(struct inode *inode)
 		}
 	}
 	ecryptfs_destroy_crypt_stat(&inode_info->crypt_stat);
-	kmem_cache_free(ecryptfs_inode_info_cache, inode_info);
+	call_rcu(&inode->i_rcu, ecryptfs_i_callback);
 }
 
 /**
diff --git a/fs/efs/super.c b/fs/efs/super.c
index 5073a07652cc..0f31acb0131c 100644
--- a/fs/efs/super.c
+++ b/fs/efs/super.c
@@ -65,11 +65,18 @@ static struct inode *efs_alloc_inode(struct super_block *sb)
 	return &ei->vfs_inode;
 }
 
-static void efs_destroy_inode(struct inode *inode)
+static void efs_i_callback(struct rcu_head *head)
 {
+	struct inode *inode = container_of(head, struct inode, i_rcu);
+	INIT_LIST_HEAD(&inode->i_dentry);
 	kmem_cache_free(efs_inode_cachep, INODE_INFO(inode));
 }
 
+static void efs_destroy_inode(struct inode *inode)
+{
+	call_rcu(&inode->i_rcu, efs_i_callback);
+}
+
 static void init_once(void *foo)
 {
 	struct efs_inode_info *ei = (struct efs_inode_info *) foo;
diff --git a/fs/exofs/super.c b/fs/exofs/super.c
index 79c3ae6e0456..8c6c4669b381 100644
--- a/fs/exofs/super.c
+++ b/fs/exofs/super.c
@@ -150,12 +150,19 @@ static struct inode *exofs_alloc_inode(struct super_block *sb)
 	return &oi->vfs_inode;
 }
 
+static void exofs_i_callback(struct rcu_head *head)
+{
+	struct inode *inode = container_of(head, struct inode, i_rcu);
+	INIT_LIST_HEAD(&inode->i_dentry);
+	kmem_cache_free(exofs_inode_cachep, exofs_i(inode));
+}
+
 /*
  * Remove an inode from the cache
  */
 static void exofs_destroy_inode(struct inode *inode)
 {
-	kmem_cache_free(exofs_inode_cachep, exofs_i(inode));
+	call_rcu(&inode->i_rcu, exofs_i_callback);
 }
 
 /*
diff --git a/fs/ext2/super.c b/fs/ext2/super.c
index d89e0b6a2d78..e0c6380ff992 100644
--- a/fs/ext2/super.c
+++ b/fs/ext2/super.c
@@ -161,11 +161,18 @@ static struct inode *ext2_alloc_inode(struct super_block *sb)
 	return &ei->vfs_inode;
 }
 
-static void ext2_destroy_inode(struct inode *inode)
+static void ext2_i_callback(struct rcu_head *head)
 {
+	struct inode *inode = container_of(head, struct inode, i_rcu);
+	INIT_LIST_HEAD(&inode->i_dentry);
 	kmem_cache_free(ext2_inode_cachep, EXT2_I(inode));
 }
 
+static void ext2_destroy_inode(struct inode *inode)
+{
+	call_rcu(&inode->i_rcu, ext2_i_callback);
+}
+
 static void init_once(void *foo)
 {
 	struct ext2_inode_info *ei = (struct ext2_inode_info *) foo;
diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index acf8695fa8f0..77ce1616f725 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -479,6 +479,13 @@ static struct inode *ext3_alloc_inode(struct super_block *sb)
 	return &ei->vfs_inode;
 }
 
+static void ext3_i_callback(struct rcu_head *head)
+{
+	struct inode *inode = container_of(head, struct inode, i_rcu);
+	INIT_LIST_HEAD(&inode->i_dentry);
+	kmem_cache_free(ext3_inode_cachep, EXT3_I(inode));
+}
+
 static void ext3_destroy_inode(struct inode *inode)
 {
 	if (!list_empty(&(EXT3_I(inode)->i_orphan))) {
@@ -489,7 +496,7 @@ static void ext3_destroy_inode(struct inode *inode)
 				false);
 		dump_stack();
 	}
-	kmem_cache_free(ext3_inode_cachep, EXT3_I(inode));
+	call_rcu(&inode->i_rcu, ext3_i_callback);
 }
 
 static void init_once(void *foo)
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index fb15c9c0be74..cd37f9d5e447 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -841,6 +841,13 @@ static int ext4_drop_inode(struct inode *inode)
 	return drop;
 }
 
+static void ext4_i_callback(struct rcu_head *head)
+{
+	struct inode *inode = container_of(head, struct inode, i_rcu);
+	INIT_LIST_HEAD(&inode->i_dentry);
+	kmem_cache_free(ext4_inode_cachep, EXT4_I(inode));
+}
+
 static void ext4_destroy_inode(struct inode *inode)
 {
 	ext4_ioend_wait(inode);
@@ -853,7 +860,7 @@ static void ext4_destroy_inode(struct inode *inode)
 				true);
 		dump_stack();
 	}
-	kmem_cache_free(ext4_inode_cachep, EXT4_I(inode));
+	call_rcu(&inode->i_rcu, ext4_i_callback);
 }
 
 static void init_once(void *foo)
diff --git a/fs/fat/inode.c b/fs/fat/inode.c
index ad6998a92c30..8cccfebee180 100644
--- a/fs/fat/inode.c
+++ b/fs/fat/inode.c
@@ -514,11 +514,18 @@ static struct inode *fat_alloc_inode(struct super_block *sb)
 	return &ei->vfs_inode;
 }
 
-static void fat_destroy_inode(struct inode *inode)
+static void fat_i_callback(struct rcu_head *head)
 {
+	struct inode *inode = container_of(head, struct inode, i_rcu);
+	INIT_LIST_HEAD(&inode->i_dentry);
 	kmem_cache_free(fat_inode_cachep, MSDOS_I(inode));
 }
 
+static void fat_destroy_inode(struct inode *inode)
+{
+	call_rcu(&inode->i_rcu, fat_i_callback);
+}
+
 static void init_once(void *foo)
 {
 	struct msdos_inode_info *ei = (struct msdos_inode_info *)foo;
diff --git a/fs/freevxfs/vxfs_inode.c b/fs/freevxfs/vxfs_inode.c
index 8c04eac5079d..2ba6719ac612 100644
--- a/fs/freevxfs/vxfs_inode.c
+++ b/fs/freevxfs/vxfs_inode.c
@@ -337,6 +337,13 @@ vxfs_iget(struct super_block *sbp, ino_t ino)
 	return ip;
 }
 
+static void vxfs_i_callback(struct rcu_head *head)
+{
+	struct inode *inode = container_of(head, struct inode, i_rcu);
+	INIT_LIST_HEAD(&inode->i_dentry);
+	kmem_cache_free(vxfs_inode_cachep, inode->i_private);
+}
+
 /**
  * vxfs_evict_inode - remove inode from main memory
  * @ip:		inode to discard.
@@ -350,5 +357,5 @@ vxfs_evict_inode(struct inode *ip)
 {
 	truncate_inode_pages(&ip->i_data, 0);
 	end_writeback(ip);
-	kmem_cache_free(vxfs_inode_cachep, ip->i_private);
+	call_rcu(&ip->i_rcu, vxfs_i_callback);
 }
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index cfce3ad86a92..44e0a6c57e87 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -99,6 +99,13 @@ static struct inode *fuse_alloc_inode(struct super_block *sb)
 	return inode;
 }
 
+static void fuse_i_callback(struct rcu_head *head)
+{
+	struct inode *inode = container_of(head, struct inode, i_rcu);
+	INIT_LIST_HEAD(&inode->i_dentry);
+	kmem_cache_free(fuse_inode_cachep, inode);
+}
+
 static void fuse_destroy_inode(struct inode *inode)
 {
 	struct fuse_inode *fi = get_fuse_inode(inode);
@@ -106,7 +113,7 @@ static void fuse_destroy_inode(struct inode *inode)
 	BUG_ON(!list_empty(&fi->queued_writes));
 	if (fi->forget_req)
 		fuse_request_free(fi->forget_req);
-	kmem_cache_free(fuse_inode_cachep, inode);
+	call_rcu(&inode->i_rcu, fuse_i_callback);
 }
 
 void fuse_send_forget(struct fuse_conn *fc, struct fuse_req *req,
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
index 2b2c4997430b..16c2ecac7eb7 100644
--- a/fs/gfs2/super.c
+++ b/fs/gfs2/super.c
@@ -1405,11 +1405,18 @@ static struct inode *gfs2_alloc_inode(struct super_block *sb)
 	return &ip->i_inode;
 }
 
-static void gfs2_destroy_inode(struct inode *inode)
+static void gfs2_i_callback(struct rcu_head *head)
 {
+	struct inode *inode = container_of(head, struct inode, i_rcu);
+	INIT_LIST_HEAD(&inode->i_dentry);
 	kmem_cache_free(gfs2_inode_cachep, inode);
 }
 
+static void gfs2_destroy_inode(struct inode *inode)
+{
+	call_rcu(&inode->i_rcu, gfs2_i_callback);
+}
+
 const struct super_operations gfs2_super_ops = {
 	.alloc_inode		= gfs2_alloc_inode,
 	.destroy_inode		= gfs2_destroy_inode,
diff --git a/fs/hfs/super.c b/fs/hfs/super.c
index 4824c27cebb8..ef4ee5716abe 100644
--- a/fs/hfs/super.c
+++ b/fs/hfs/super.c
@@ -167,11 +167,18 @@ static struct inode *hfs_alloc_inode(struct super_block *sb)
 	return i ? &i->vfs_inode : NULL;
 }
 
-static void hfs_destroy_inode(struct inode *inode)
+static void hfs_i_callback(struct rcu_head *head)
 {
+	struct inode *inode = container_of(head, struct inode, i_rcu);
+	INIT_LIST_HEAD(&inode->i_dentry);
 	kmem_cache_free(hfs_inode_cachep, HFS_I(inode));
 }
 
+static void hfs_destroy_inode(struct inode *inode)
+{
+	call_rcu(&inode->i_rcu, hfs_i_callback);
+}
+
 static const struct super_operations hfs_super_operations = {
 	.alloc_inode	= hfs_alloc_inode,
 	.destroy_inode	= hfs_destroy_inode,
diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c
index 52cc746d3ba3..182e83a9079e 100644
--- a/fs/hfsplus/super.c
+++ b/fs/hfsplus/super.c
@@ -488,11 +488,19 @@ static struct inode *hfsplus_alloc_inode(struct super_block *sb)
 	return i ? &i->vfs_inode : NULL;
 }
 
-static void hfsplus_destroy_inode(struct inode *inode)
+static void hfsplus_i_callback(struct rcu_head *head)
 {
+	struct inode *inode = container_of(head, struct inode, i_rcu);
+
+	INIT_LIST_HEAD(&inode->i_dentry);
 	kmem_cache_free(hfsplus_inode_cachep, HFSPLUS_I(inode));
 }
 
+static void hfsplus_destroy_inode(struct inode *inode)
+{
+	call_rcu(&inode->i_rcu, hfsplus_i_callback);
+}
+
 #define HFSPLUS_INODE_SIZE	sizeof(struct hfsplus_inode_info)
 
 static struct dentry *hfsplus_mount(struct file_system_type *fs_type,
diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c
index 39dc505ed273..861113fcfc88 100644
--- a/fs/hostfs/hostfs_kern.c
+++ b/fs/hostfs/hostfs_kern.c
@@ -247,11 +247,18 @@ static void hostfs_evict_inode(struct inode *inode)
 	}
 }
 
-static void hostfs_destroy_inode(struct inode *inode)
+static void hostfs_i_callback(struct rcu_head *head)
 {
+	struct inode *inode = container_of(head, struct inode, i_rcu);
+	INIT_LIST_HEAD(&inode->i_dentry);
 	kfree(HOSTFS_I(inode));
 }
 
+static void hostfs_destroy_inode(struct inode *inode)
+{
+	call_rcu(&inode->i_rcu, hostfs_i_callback);
+}
+
 static int hostfs_show_options(struct seq_file *seq, struct vfsmount *vfs)
 {
 	const char *root_path = vfs->mnt_sb->s_fs_info;
diff --git a/fs/hpfs/super.c b/fs/hpfs/super.c
index 6c5f01597c3a..49935ba78db8 100644
--- a/fs/hpfs/super.c
+++ b/fs/hpfs/super.c
@@ -177,11 +177,18 @@ static struct inode *hpfs_alloc_inode(struct super_block *sb)
 	return &ei->vfs_inode;
 }
 
-static void hpfs_destroy_inode(struct inode *inode)
+static void hpfs_i_callback(struct rcu_head *head)
 {
+	struct inode *inode = container_of(head, struct inode, i_rcu);
+	INIT_LIST_HEAD(&inode->i_dentry);
 	kmem_cache_free(hpfs_inode_cachep, hpfs_i(inode));
 }
 
+static void hpfs_destroy_inode(struct inode *inode)
+{
+	call_rcu(&inode->i_rcu, hpfs_i_callback);
+}
+
 static void init_once(void *foo)
 {
 	struct hpfs_inode_info *ei = (struct hpfs_inode_info *) foo;
diff --git a/fs/hppfs/hppfs.c b/fs/hppfs/hppfs.c
index f702b5f713fc..87ed48e0343d 100644
--- a/fs/hppfs/hppfs.c
+++ b/fs/hppfs/hppfs.c
@@ -632,11 +632,18 @@ void hppfs_evict_inode(struct inode *ino)
 	mntput(ino->i_sb->s_fs_info);
 }
 
-static void hppfs_destroy_inode(struct inode *inode)
+static void hppfs_i_callback(struct rcu_head *head)
 {
+	struct inode *inode = container_of(head, struct inode, i_rcu);
+	INIT_LIST_HEAD(&inode->i_dentry);
 	kfree(HPPFS_I(inode));
 }
 
+static void hppfs_destroy_inode(struct inode *inode)
+{
+	call_rcu(&inode->i_rcu, hppfs_i_callback);
+}
+
 static const struct super_operations hppfs_sbops = {
 	.alloc_inode	= hppfs_alloc_inode,
 	.destroy_inode	= hppfs_destroy_inode,
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index a5fe68189eed..9885082b470f 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -663,11 +663,18 @@ static struct inode *hugetlbfs_alloc_inode(struct super_block *sb)
 	return &p->vfs_inode;
 }
 
+static void hugetlbfs_i_callback(struct rcu_head *head)
+{
+	struct inode *inode = container_of(head, struct inode, i_rcu);
+	INIT_LIST_HEAD(&inode->i_dentry);
+	kmem_cache_free(hugetlbfs_inode_cachep, HUGETLBFS_I(inode));
+}
+
 static void hugetlbfs_destroy_inode(struct inode *inode)
 {
 	hugetlbfs_inc_free_inodes(HUGETLBFS_SB(inode->i_sb));
 	mpol_free_shared_policy(&HUGETLBFS_I(inode)->policy);
-	kmem_cache_free(hugetlbfs_inode_cachep, HUGETLBFS_I(inode));
+	call_rcu(&inode->i_rcu, hugetlbfs_i_callback);
 }
 
 static const struct address_space_operations hugetlbfs_aops = {
diff --git a/fs/inode.c b/fs/inode.c
index 5a0a898f55d1..6751dfe8cc06 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -272,6 +272,13 @@ void __destroy_inode(struct inode *inode)
 }
 EXPORT_SYMBOL(__destroy_inode);
 
+static void i_callback(struct rcu_head *head)
+{
+	struct inode *inode = container_of(head, struct inode, i_rcu);
+	INIT_LIST_HEAD(&inode->i_dentry);
+	kmem_cache_free(inode_cachep, inode);
+}
+
 static void destroy_inode(struct inode *inode)
 {
 	BUG_ON(!list_empty(&inode->i_lru));
@@ -279,7 +286,7 @@ static void destroy_inode(struct inode *inode)
 	if (inode->i_sb->s_op->destroy_inode)
 		inode->i_sb->s_op->destroy_inode(inode);
 	else
-		kmem_cache_free(inode_cachep, (inode));
+		call_rcu(&inode->i_rcu, i_callback);
 }
 
 /*
@@ -432,6 +439,7 @@ void end_writeback(struct inode *inode)
 	BUG_ON(!(inode->i_state & I_FREEING));
 	BUG_ON(inode->i_state & I_CLEAR);
 	inode_sync_wait(inode);
+	/* don't need i_lock here, no concurrent mods to i_state */
 	inode->i_state = I_FREEING | I_CLEAR;
 }
 EXPORT_SYMBOL(end_writeback);
diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c
index d204ee4235fd..d8f3a652243d 100644
--- a/fs/isofs/inode.c
+++ b/fs/isofs/inode.c
@@ -81,11 +81,18 @@ static struct inode *isofs_alloc_inode(struct super_block *sb)
 	return &ei->vfs_inode;
 }
 
-static void isofs_destroy_inode(struct inode *inode)
+static void isofs_i_callback(struct rcu_head *head)
 {
+	struct inode *inode = container_of(head, struct inode, i_rcu);
+	INIT_LIST_HEAD(&inode->i_dentry);
 	kmem_cache_free(isofs_inode_cachep, ISOFS_I(inode));
 }
 
+static void isofs_destroy_inode(struct inode *inode)
+{
+	call_rcu(&inode->i_rcu, isofs_i_callback);
+}
+
 static void init_once(void *foo)
 {
 	struct iso_inode_info *ei = foo;
diff --git a/fs/jffs2/super.c b/fs/jffs2/super.c
index c86041b866a4..853b8e300084 100644
--- a/fs/jffs2/super.c
+++ b/fs/jffs2/super.c
@@ -40,11 +40,18 @@ static struct inode *jffs2_alloc_inode(struct super_block *sb)
 	return &f->vfs_inode;
 }
 
-static void jffs2_destroy_inode(struct inode *inode)
+static void jffs2_i_callback(struct rcu_head *head)
 {
+	struct inode *inode = container_of(head, struct inode, i_rcu);
+	INIT_LIST_HEAD(&inode->i_dentry);
 	kmem_cache_free(jffs2_inode_cachep, JFFS2_INODE_INFO(inode));
 }
 
+static void jffs2_destroy_inode(struct inode *inode)
+{
+	call_rcu(&inode->i_rcu, jffs2_i_callback);
+}
+
 static void jffs2_i_init_once(void *foo)
 {
 	struct jffs2_inode_info *f = foo;
diff --git a/fs/jfs/super.c b/fs/jfs/super.c
index 0669fc1cc3bf..b715b0f7bdfd 100644
--- a/fs/jfs/super.c
+++ b/fs/jfs/super.c
@@ -115,6 +115,14 @@ static struct inode *jfs_alloc_inode(struct super_block *sb)
 	return &jfs_inode->vfs_inode;
 }
 
+static void jfs_i_callback(struct rcu_head *head)
+{
+	struct inode *inode = container_of(head, struct inode, i_rcu);
+	struct jfs_inode_info *ji = JFS_IP(inode);
+	INIT_LIST_HEAD(&inode->i_dentry);
+	kmem_cache_free(jfs_inode_cachep, ji);
+}
+
 static void jfs_destroy_inode(struct inode *inode)
 {
 	struct jfs_inode_info *ji = JFS_IP(inode);
@@ -128,7 +136,7 @@ static void jfs_destroy_inode(struct inode *inode)
 		ji->active_ag = -1;
 	}
 	spin_unlock_irq(&ji->ag_lock);
-	kmem_cache_free(jfs_inode_cachep, ji);
+	call_rcu(&inode->i_rcu, jfs_i_callback);
 }
 
 static int jfs_statfs(struct dentry *dentry, struct kstatfs *buf)
diff --git a/fs/logfs/inode.c b/fs/logfs/inode.c
index d8c71ece098f..03b8c240aeda 100644
--- a/fs/logfs/inode.c
+++ b/fs/logfs/inode.c
@@ -141,13 +141,20 @@ struct inode *logfs_safe_iget(struct super_block *sb, ino_t ino, int *is_cached)
 	return __logfs_iget(sb, ino);
 }
 
+static void logfs_i_callback(struct rcu_head *head)
+{
+	struct inode *inode = container_of(head, struct inode, i_rcu);
+	INIT_LIST_HEAD(&inode->i_dentry);
+	kmem_cache_free(logfs_inode_cache, logfs_inode(inode));
+}
+
 static void __logfs_destroy_inode(struct inode *inode)
 {
 	struct logfs_inode *li = logfs_inode(inode);
 
 	BUG_ON(li->li_block);
 	list_del(&li->li_freeing_list);
-	kmem_cache_free(logfs_inode_cache, li);
+	call_rcu(&inode->i_rcu, logfs_i_callback);
 }
 
 static void logfs_destroy_inode(struct inode *inode)
diff --git a/fs/minix/inode.c b/fs/minix/inode.c
index fb2020858a34..ae0b83f476a6 100644
--- a/fs/minix/inode.c
+++ b/fs/minix/inode.c
@@ -68,11 +68,18 @@ static struct inode *minix_alloc_inode(struct super_block *sb)
 	return &ei->vfs_inode;
 }
 
-static void minix_destroy_inode(struct inode *inode)
+static void minix_i_callback(struct rcu_head *head)
 {
+	struct inode *inode = container_of(head, struct inode, i_rcu);
+	INIT_LIST_HEAD(&inode->i_dentry);
 	kmem_cache_free(minix_inode_cachep, minix_i(inode));
 }
 
+static void minix_destroy_inode(struct inode *inode)
+{
+	call_rcu(&inode->i_rcu, minix_i_callback);
+}
+
 static void init_once(void *foo)
 {
 	struct minix_inode_info *ei = (struct minix_inode_info *) foo;
diff --git a/fs/ncpfs/inode.c b/fs/ncpfs/inode.c
index 8fb93b604e73..60047dbeb38d 100644
--- a/fs/ncpfs/inode.c
+++ b/fs/ncpfs/inode.c
@@ -58,11 +58,18 @@ static struct inode *ncp_alloc_inode(struct super_block *sb)
 	return &ei->vfs_inode;
 }
 
-static void ncp_destroy_inode(struct inode *inode)
+static void ncp_i_callback(struct rcu_head *head)
 {
+	struct inode *inode = container_of(head, struct inode, i_rcu);
+	INIT_LIST_HEAD(&inode->i_dentry);
 	kmem_cache_free(ncp_inode_cachep, NCP_FINFO(inode));
 }
 
+static void ncp_destroy_inode(struct inode *inode)
+{
+	call_rcu(&inode->i_rcu, ncp_i_callback);
+}
+
 static void init_once(void *foo)
 {
 	struct ncp_inode_info *ei = (struct ncp_inode_info *) foo;
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index e67e31c73416..017daa3bed38 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -1438,11 +1438,18 @@ struct inode *nfs_alloc_inode(struct super_block *sb)
 	return &nfsi->vfs_inode;
 }
 
-void nfs_destroy_inode(struct inode *inode)
+static void nfs_i_callback(struct rcu_head *head)
 {
+	struct inode *inode = container_of(head, struct inode, i_rcu);
+	INIT_LIST_HEAD(&inode->i_dentry);
 	kmem_cache_free(nfs_inode_cachep, NFS_I(inode));
 }
 
+void nfs_destroy_inode(struct inode *inode)
+{
+	call_rcu(&inode->i_rcu, nfs_i_callback);
+}
+
 static inline void nfs4_init_once(struct nfs_inode *nfsi)
 {
 #ifdef CONFIG_NFS_V4
diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c
index d36fc7ee615f..e2dcc9c733f7 100644
--- a/fs/nilfs2/super.c
+++ b/fs/nilfs2/super.c
@@ -162,10 +162,13 @@ struct inode *nilfs_alloc_inode(struct super_block *sb)
 	return &ii->vfs_inode;
 }
 
-void nilfs_destroy_inode(struct inode *inode)
+static void nilfs_i_callback(struct rcu_head *head)
 {
+	struct inode *inode = container_of(head, struct inode, i_rcu);
 	struct nilfs_mdt_info *mdi = NILFS_MDT(inode);
 
+	INIT_LIST_HEAD(&inode->i_dentry);
+
 	if (mdi) {
 		kfree(mdi->mi_bgl); /* kfree(NULL) is safe */
 		kfree(mdi);
@@ -173,6 +176,11 @@ void nilfs_destroy_inode(struct inode *inode)
 	kmem_cache_free(nilfs_inode_cachep, NILFS_I(inode));
 }
 
+void nilfs_destroy_inode(struct inode *inode)
+{
+	call_rcu(&inode->i_rcu, nilfs_i_callback);
+}
+
 static int nilfs_sync_super(struct nilfs_sb_info *sbi, int flag)
 {
 	struct the_nilfs *nilfs = sbi->s_nilfs;
diff --git a/fs/ntfs/inode.c b/fs/ntfs/inode.c
index 93622b175fc7..a627ed82c0a3 100644
--- a/fs/ntfs/inode.c
+++ b/fs/ntfs/inode.c
@@ -332,6 +332,13 @@ struct inode *ntfs_alloc_big_inode(struct super_block *sb)
 	return NULL;
 }
 
+static void ntfs_i_callback(struct rcu_head *head)
+{
+	struct inode *inode = container_of(head, struct inode, i_rcu);
+	INIT_LIST_HEAD(&inode->i_dentry);
+	kmem_cache_free(ntfs_big_inode_cache, NTFS_I(inode));
+}
+
 void ntfs_destroy_big_inode(struct inode *inode)
 {
 	ntfs_inode *ni = NTFS_I(inode);
@@ -340,7 +347,7 @@ void ntfs_destroy_big_inode(struct inode *inode)
 	BUG_ON(ni->page);
 	if (!atomic_dec_and_test(&ni->count))
 		BUG();
-	kmem_cache_free(ntfs_big_inode_cache, NTFS_I(inode));
+	call_rcu(&inode->i_rcu, ntfs_i_callback);
 }
 
 static inline ntfs_inode *ntfs_alloc_extent_inode(void)
diff --git a/fs/ocfs2/dlmfs/dlmfs.c b/fs/ocfs2/dlmfs/dlmfs.c
index b2df490a19ed..8c5c0eddc365 100644
--- a/fs/ocfs2/dlmfs/dlmfs.c
+++ b/fs/ocfs2/dlmfs/dlmfs.c
@@ -351,11 +351,18 @@ static struct inode *dlmfs_alloc_inode(struct super_block *sb)
 	return &ip->ip_vfs_inode;
 }
 
-static void dlmfs_destroy_inode(struct inode *inode)
+static void dlmfs_i_callback(struct rcu_head *head)
 {
+	struct inode *inode = container_of(head, struct inode, i_rcu);
+	INIT_LIST_HEAD(&inode->i_dentry);
 	kmem_cache_free(dlmfs_inode_cache, DLMFS_I(inode));
 }
 
+static void dlmfs_destroy_inode(struct inode *inode)
+{
+	call_rcu(&inode->i_rcu, dlmfs_i_callback);
+}
+
 static void dlmfs_evict_inode(struct inode *inode)
 {
 	int status;
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index cfeab7ce3697..17ff46fa8a10 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -569,11 +569,18 @@ static struct inode *ocfs2_alloc_inode(struct super_block *sb)
 	return &oi->vfs_inode;
 }
 
-static void ocfs2_destroy_inode(struct inode *inode)
+static void ocfs2_i_callback(struct rcu_head *head)
 {
+	struct inode *inode = container_of(head, struct inode, i_rcu);
+	INIT_LIST_HEAD(&inode->i_dentry);
 	kmem_cache_free(ocfs2_inode_cachep, OCFS2_I(inode));
 }
 
+static void ocfs2_destroy_inode(struct inode *inode)
+{
+	call_rcu(&inode->i_rcu, ocfs2_i_callback);
+}
+
 static unsigned long long ocfs2_max_file_offset(unsigned int bbits,
 						unsigned int cbits)
 {
diff --git a/fs/openpromfs/inode.c b/fs/openpromfs/inode.c
index 911e61f348fc..a2a5bff774e3 100644
--- a/fs/openpromfs/inode.c
+++ b/fs/openpromfs/inode.c
@@ -343,11 +343,18 @@ static struct inode *openprom_alloc_inode(struct super_block *sb)
 	return &oi->vfs_inode;
 }
 
-static void openprom_destroy_inode(struct inode *inode)
+static void openprom_i_callback(struct rcu_head *head)
 {
+	struct inode *inode = container_of(head, struct inode, i_rcu);
+	INIT_LIST_HEAD(&inode->i_dentry);
 	kmem_cache_free(op_inode_cachep, OP_I(inode));
 }
 
+static void openprom_destroy_inode(struct inode *inode)
+{
+	call_rcu(&inode->i_rcu, openprom_i_callback);
+}
+
 static struct inode *openprom_iget(struct super_block *sb, ino_t ino)
 {
 	struct inode *inode;
diff --git a/fs/proc/inode.c b/fs/proc/inode.c
index 3ddb6068177c..6bcb926b101b 100644
--- a/fs/proc/inode.c
+++ b/fs/proc/inode.c
@@ -65,11 +65,18 @@ static struct inode *proc_alloc_inode(struct super_block *sb)
 	return inode;
 }
 
-static void proc_destroy_inode(struct inode *inode)
+static void proc_i_callback(struct rcu_head *head)
 {
+	struct inode *inode = container_of(head, struct inode, i_rcu);
+	INIT_LIST_HEAD(&inode->i_dentry);
 	kmem_cache_free(proc_inode_cachep, PROC_I(inode));
 }
 
+static void proc_destroy_inode(struct inode *inode)
+{
+	call_rcu(&inode->i_rcu, proc_i_callback);
+}
+
 static void init_once(void *foo)
 {
 	struct proc_inode *ei = (struct proc_inode *) foo;
diff --git a/fs/qnx4/inode.c b/fs/qnx4/inode.c
index fcada42f1aa3..e63b4171d583 100644
--- a/fs/qnx4/inode.c
+++ b/fs/qnx4/inode.c
@@ -425,11 +425,18 @@ static struct inode *qnx4_alloc_inode(struct super_block *sb)
 	return &ei->vfs_inode;
 }
 
-static void qnx4_destroy_inode(struct inode *inode)
+static void qnx4_i_callback(struct rcu_head *head)
 {
+	struct inode *inode = container_of(head, struct inode, i_rcu);
+	INIT_LIST_HEAD(&inode->i_dentry);
 	kmem_cache_free(qnx4_inode_cachep, qnx4_i(inode));
 }
 
+static void qnx4_destroy_inode(struct inode *inode)
+{
+	call_rcu(&inode->i_rcu, qnx4_i_callback);
+}
+
 static void init_once(void *foo)
 {
 	struct qnx4_inode_info *ei = (struct qnx4_inode_info *) foo;
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c
index b243117b8752..2575682a9ead 100644
--- a/fs/reiserfs/super.c
+++ b/fs/reiserfs/super.c
@@ -529,11 +529,18 @@ static struct inode *reiserfs_alloc_inode(struct super_block *sb)
 	return &ei->vfs_inode;
 }
 
-static void reiserfs_destroy_inode(struct inode *inode)
+static void reiserfs_i_callback(struct rcu_head *head)
 {
+	struct inode *inode = container_of(head, struct inode, i_rcu);
+	INIT_LIST_HEAD(&inode->i_dentry);
 	kmem_cache_free(reiserfs_inode_cachep, REISERFS_I(inode));
 }
 
+static void reiserfs_destroy_inode(struct inode *inode)
+{
+	call_rcu(&inode->i_rcu, reiserfs_i_callback);
+}
+
 static void init_once(void *foo)
 {
 	struct reiserfs_inode_info *ei = (struct reiserfs_inode_info *)foo;
diff --git a/fs/romfs/super.c b/fs/romfs/super.c
index 6647f90e55cd..2305e3121cb1 100644
--- a/fs/romfs/super.c
+++ b/fs/romfs/super.c
@@ -400,11 +400,18 @@ static struct inode *romfs_alloc_inode(struct super_block *sb)
 /*
  * return a spent inode to the slab cache
  */
-static void romfs_destroy_inode(struct inode *inode)
+static void romfs_i_callback(struct rcu_head *head)
 {
+	struct inode *inode = container_of(head, struct inode, i_rcu);
+	INIT_LIST_HEAD(&inode->i_dentry);
 	kmem_cache_free(romfs_inode_cachep, ROMFS_I(inode));
 }
 
+static void romfs_destroy_inode(struct inode *inode)
+{
+	call_rcu(&inode->i_rcu, romfs_i_callback);
+}
+
 /*
  * get filesystem statistics
  */
diff --git a/fs/squashfs/super.c b/fs/squashfs/super.c
index 24de30ba34c1..20700b9f2b4c 100644
--- a/fs/squashfs/super.c
+++ b/fs/squashfs/super.c
@@ -440,11 +440,18 @@ static struct inode *squashfs_alloc_inode(struct super_block *sb)
 }
 
 
-static void squashfs_destroy_inode(struct inode *inode)
+static void squashfs_i_callback(struct rcu_head *head)
 {
+	struct inode *inode = container_of(head, struct inode, i_rcu);
+	INIT_LIST_HEAD(&inode->i_dentry);
 	kmem_cache_free(squashfs_inode_cachep, squashfs_i(inode));
 }
 
+static void squashfs_destroy_inode(struct inode *inode)
+{
+	call_rcu(&inode->i_rcu, squashfs_i_callback);
+}
+
 
 static struct file_system_type squashfs_fs_type = {
 	.owner = THIS_MODULE,
diff --git a/fs/sysv/inode.c b/fs/sysv/inode.c
index de44d067b9e6..0630eb969a28 100644
--- a/fs/sysv/inode.c
+++ b/fs/sysv/inode.c
@@ -333,11 +333,18 @@ static struct inode *sysv_alloc_inode(struct super_block *sb)
 	return &si->vfs_inode;
 }
 
-static void sysv_destroy_inode(struct inode *inode)
+static void sysv_i_callback(struct rcu_head *head)
 {
+	struct inode *inode = container_of(head, struct inode, i_rcu);
+	INIT_LIST_HEAD(&inode->i_dentry);
 	kmem_cache_free(sysv_inode_cachep, SYSV_I(inode));
 }
 
+static void sysv_destroy_inode(struct inode *inode)
+{
+	call_rcu(&inode->i_rcu, sysv_i_callback);
+}
+
 static void init_once(void *p)
 {
 	struct sysv_inode_info *si = (struct sysv_inode_info *)p;
diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c
index 91fac54c70e3..6e11c2975dcf 100644
--- a/fs/ubifs/super.c
+++ b/fs/ubifs/super.c
@@ -272,12 +272,20 @@ static struct inode *ubifs_alloc_inode(struct super_block *sb)
 	return &ui->vfs_inode;
 };
 
+static void ubifs_i_callback(struct rcu_head *head)
+{
+	struct inode *inode = container_of(head, struct inode, i_rcu);
+	struct ubifs_inode *ui = ubifs_inode(inode);
+	INIT_LIST_HEAD(&inode->i_dentry);
+	kmem_cache_free(ubifs_inode_slab, ui);
+}
+
 static void ubifs_destroy_inode(struct inode *inode)
 {
 	struct ubifs_inode *ui = ubifs_inode(inode);
 
 	kfree(ui->data);
-	kmem_cache_free(ubifs_inode_slab, inode);
+	call_rcu(&inode->i_rcu, ubifs_i_callback);
 }
 
 /*
diff --git a/fs/udf/super.c b/fs/udf/super.c
index 4a5c7c61836a..b539d53320fb 100644
--- a/fs/udf/super.c
+++ b/fs/udf/super.c
@@ -139,11 +139,18 @@ static struct inode *udf_alloc_inode(struct super_block *sb)
 	return &ei->vfs_inode;
 }
 
-static void udf_destroy_inode(struct inode *inode)
+static void udf_i_callback(struct rcu_head *head)
 {
+	struct inode *inode = container_of(head, struct inode, i_rcu);
+	INIT_LIST_HEAD(&inode->i_dentry);
 	kmem_cache_free(udf_inode_cachep, UDF_I(inode));
 }
 
+static void udf_destroy_inode(struct inode *inode)
+{
+	call_rcu(&inode->i_rcu, udf_i_callback);
+}
+
 static void init_once(void *foo)
 {
 	struct udf_inode_info *ei = (struct udf_inode_info *)foo;
diff --git a/fs/ufs/super.c b/fs/ufs/super.c
index 2c47daed56da..2c61ac5d4e48 100644
--- a/fs/ufs/super.c
+++ b/fs/ufs/super.c
@@ -1412,11 +1412,18 @@ static struct inode *ufs_alloc_inode(struct super_block *sb)
 	return &ei->vfs_inode;
 }
 
-static void ufs_destroy_inode(struct inode *inode)
+static void ufs_i_callback(struct rcu_head *head)
 {
+	struct inode *inode = container_of(head, struct inode, i_rcu);
+	INIT_LIST_HEAD(&inode->i_dentry);
 	kmem_cache_free(ufs_inode_cachep, UFS_I(inode));
 }
 
+static void ufs_destroy_inode(struct inode *inode)
+{
+	call_rcu(&inode->i_rcu, ufs_i_callback);
+}
+
 static void init_once(void *foo)
 {
 	struct ufs_inode_info *ei = (struct ufs_inode_info *) foo;
diff --git a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c
index 0cdd26932d8e..d7de5a3f7867 100644
--- a/fs/xfs/xfs_iget.c
+++ b/fs/xfs/xfs_iget.c
@@ -91,6 +91,17 @@ xfs_inode_alloc(
 	return ip;
 }
 
+STATIC void
+xfs_inode_free_callback(
+	struct rcu_head		*head)
+{
+	struct inode		*inode = container_of(head, struct inode, i_rcu);
+	struct xfs_inode	*ip = XFS_I(inode);
+
+	INIT_LIST_HEAD(&inode->i_dentry);
+	kmem_zone_free(xfs_inode_zone, ip);
+}
+
 void
 xfs_inode_free(
 	struct xfs_inode	*ip)
@@ -134,7 +145,7 @@ xfs_inode_free(
 	ASSERT(!spin_is_locked(&ip->i_flags_lock));
 	ASSERT(completion_done(&ip->i_flush));
 
-	kmem_zone_free(xfs_inode_zone, ip);
+	call_rcu(&ip->i_vnode.i_rcu, xfs_inode_free_callback);
 }
 
 /*
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 296cf2fde945..1ff4d0a33b25 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -737,7 +737,10 @@ struct inode {
 	struct list_head	i_wb_list;	/* backing dev IO list */
 	struct list_head	i_lru;		/* inode LRU list */
 	struct list_head	i_sb_list;
-	struct list_head	i_dentry;
+	union {
+		struct list_head	i_dentry;
+		struct rcu_head		i_rcu;
+	};
 	unsigned long		i_ino;
 	atomic_t		i_count;
 	unsigned int		i_nlink;
diff --git a/include/linux/net.h b/include/linux/net.h
index 16faa130088c..06bde4908473 100644
--- a/include/linux/net.h
+++ b/include/linux/net.h
@@ -120,7 +120,6 @@ enum sock_shutdown_cmd {
 struct socket_wq {
 	wait_queue_head_t	wait;
 	struct fasync_struct	*fasync_list;
-	struct rcu_head		rcu;
 } ____cacheline_aligned_in_smp;
 
 /**
diff --git a/ipc/mqueue.c b/ipc/mqueue.c
index 035f4399edbc..14fb6d67e6a3 100644
--- a/ipc/mqueue.c
+++ b/ipc/mqueue.c
@@ -237,11 +237,18 @@ static struct inode *mqueue_alloc_inode(struct super_block *sb)
 	return &ei->vfs_inode;
 }
 
-static void mqueue_destroy_inode(struct inode *inode)
+static void mqueue_i_callback(struct rcu_head *head)
 {
+	struct inode *inode = container_of(head, struct inode, i_rcu);
+	INIT_LIST_HEAD(&inode->i_dentry);
 	kmem_cache_free(mqueue_inode_cachep, MQUEUE_I(inode));
 }
 
+static void mqueue_destroy_inode(struct inode *inode)
+{
+	call_rcu(&inode->i_rcu, mqueue_i_callback);
+}
+
 static void mqueue_evict_inode(struct inode *inode)
 {
 	struct mqueue_inode_info *info;
diff --git a/mm/shmem.c b/mm/shmem.c
index 47fdeeb9d636..5ee67c990602 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -2415,13 +2415,20 @@ static struct inode *shmem_alloc_inode(struct super_block *sb)
 	return &p->vfs_inode;
 }
 
+static void shmem_i_callback(struct rcu_head *head)
+{
+	struct inode *inode = container_of(head, struct inode, i_rcu);
+	INIT_LIST_HEAD(&inode->i_dentry);
+	kmem_cache_free(shmem_inode_cachep, SHMEM_I(inode));
+}
+
 static void shmem_destroy_inode(struct inode *inode)
 {
 	if ((inode->i_mode & S_IFMT) == S_IFREG) {
 		/* only struct inode is valid if it's an inline symlink */
 		mpol_free_shared_policy(&SHMEM_I(inode)->policy);
 	}
-	kmem_cache_free(shmem_inode_cachep, SHMEM_I(inode));
+	call_rcu(&inode->i_rcu, shmem_i_callback);
 }
 
 static void init_once(void *foo)
diff --git a/net/socket.c b/net/socket.c
index 088fb3fd45e0..97fff3a4e72f 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -262,20 +262,20 @@ static struct inode *sock_alloc_inode(struct super_block *sb)
 }
 
 
-static void wq_free_rcu(struct rcu_head *head)
+static void sock_free_rcu(struct rcu_head *head)
 {
-	struct socket_wq *wq = container_of(head, struct socket_wq, rcu);
+	struct inode *inode = container_of(head, struct inode, i_rcu);
+	struct socket_alloc *ei = container_of(inode, struct socket_alloc,
+								vfs_inode);
 
-	kfree(wq);
+	kfree(ei->socket.wq);
+	INIT_LIST_HEAD(&inode->i_dentry);
+	kmem_cache_free(sock_inode_cachep, ei);
 }
 
 static void sock_destroy_inode(struct inode *inode)
 {
-	struct socket_alloc *ei;
-
-	ei = container_of(inode, struct socket_alloc, vfs_inode);
-	call_rcu(&ei->socket.wq->rcu, wq_free_rcu);
-	kmem_cache_free(sock_inode_cachep, ei);
+	call_rcu(&inode->i_rcu, sock_free_rcu);
 }
 
 static void init_once(void *foo)
diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c
index a0dc1a86fcea..2899fe27f880 100644
--- a/net/sunrpc/rpc_pipe.c
+++ b/net/sunrpc/rpc_pipe.c
@@ -162,11 +162,19 @@ rpc_alloc_inode(struct super_block *sb)
 }
 
 static void
-rpc_destroy_inode(struct inode *inode)
+rpc_i_callback(struct rcu_head *head)
 {
+	struct inode *inode = container_of(head, struct inode, i_rcu);
+	INIT_LIST_HEAD(&inode->i_dentry);
 	kmem_cache_free(rpc_inode_cachep, RPC_I(inode));
 }
 
+static void
+rpc_destroy_inode(struct inode *inode)
+{
+	call_rcu(&inode->i_rcu, rpc_i_callback);
+}
+
 static int
 rpc_pipe_open(struct inode *inode, struct file *filp)
 {
-- 
cgit v1.2.3-71-gd317


From ff0c7d15f9787b7e8c601533c015295cc68329f8 Mon Sep 17 00:00:00 2001
From: Nick Piggin <npiggin@kernel.dk>
Date: Fri, 7 Jan 2011 17:49:50 +1100
Subject: fs: avoid inode RCU freeing for pseudo fs

Pseudo filesystems that don't put inode on RCU list or reachable by
rcu-walk dentries do not need to RCU free their inodes.

Signed-off-by: Nick Piggin <npiggin@kernel.dk>
---
 fs/inode.c          |  6 ++++++
 fs/pipe.c           |  6 +++++-
 include/linux/fs.h  |  1 +
 include/linux/net.h |  1 +
 net/socket.c        | 17 +++++++++--------
 5 files changed, 22 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/fs/inode.c b/fs/inode.c
index 6751dfe8cc06..da85e56378f3 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -257,6 +257,12 @@ static struct inode *alloc_inode(struct super_block *sb)
 	return inode;
 }
 
+void free_inode_nonrcu(struct inode *inode)
+{
+	kmem_cache_free(inode_cachep, inode);
+}
+EXPORT_SYMBOL(free_inode_nonrcu);
+
 void __destroy_inode(struct inode *inode)
 {
 	BUG_ON(inode_has_buffers(inode));
diff --git a/fs/pipe.c b/fs/pipe.c
index 04629f36e397..ae3592dcc88c 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -1253,6 +1253,10 @@ out:
 	return ret;
 }
 
+static const struct super_operations pipefs_ops = {
+	.destroy_inode = free_inode_nonrcu,
+};
+
 /*
  * pipefs should _never_ be mounted by userland - too much of security hassle,
  * no real gain from having the whole whorehouse mounted. So we don't need
@@ -1262,7 +1266,7 @@ out:
 static struct dentry *pipefs_mount(struct file_system_type *fs_type,
 			 int flags, const char *dev_name, void *data)
 {
-	return mount_pseudo(fs_type, "pipe:", NULL, PIPEFS_MAGIC);
+	return mount_pseudo(fs_type, "pipe:", &pipefs_ops, PIPEFS_MAGIC);
 }
 
 static struct file_system_type pipe_fs_type = {
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 1ff4d0a33b25..ea202fff44f8 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2233,6 +2233,7 @@ extern void iget_failed(struct inode *);
 extern void end_writeback(struct inode *);
 extern void __destroy_inode(struct inode *);
 extern struct inode *new_inode(struct super_block *);
+extern void free_inode_nonrcu(struct inode *inode);
 extern int should_remove_suid(struct dentry *);
 extern int file_remove_suid(struct file *);
 
diff --git a/include/linux/net.h b/include/linux/net.h
index 06bde4908473..16faa130088c 100644
--- a/include/linux/net.h
+++ b/include/linux/net.h
@@ -120,6 +120,7 @@ enum sock_shutdown_cmd {
 struct socket_wq {
 	wait_queue_head_t	wait;
 	struct fasync_struct	*fasync_list;
+	struct rcu_head		rcu;
 } ____cacheline_aligned_in_smp;
 
 /**
diff --git a/net/socket.c b/net/socket.c
index 97fff3a4e72f..817dc92e9ef8 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -262,20 +262,21 @@ static struct inode *sock_alloc_inode(struct super_block *sb)
 }
 
 
-static void sock_free_rcu(struct rcu_head *head)
+
+static void wq_free_rcu(struct rcu_head *head)
 {
-	struct inode *inode = container_of(head, struct inode, i_rcu);
-	struct socket_alloc *ei = container_of(inode, struct socket_alloc,
-								vfs_inode);
+	struct socket_wq *wq = container_of(head, struct socket_wq, rcu);
 
-	kfree(ei->socket.wq);
-	INIT_LIST_HEAD(&inode->i_dentry);
-	kmem_cache_free(sock_inode_cachep, ei);
+	kfree(wq);
 }
 
 static void sock_destroy_inode(struct inode *inode)
 {
-	call_rcu(&inode->i_rcu, sock_free_rcu);
+	struct socket_alloc *ei;
+
+	ei = container_of(inode, struct socket_alloc, vfs_inode);
+	call_rcu(&ei->socket.wq->rcu, wq_free_rcu);
+	kmem_cache_free(sock_inode_cachep, ei);
 }
 
 static void init_once(void *foo)
-- 
cgit v1.2.3-71-gd317


From 3c22cd5709e8143444a6d08682a87f4c57902df3 Mon Sep 17 00:00:00 2001
From: Nick Piggin <npiggin@kernel.dk>
Date: Fri, 7 Jan 2011 17:49:51 +1100
Subject: kernel: optimise seqlock

Add branch annotations for seqlock read fastpath, and introduce
__read_seqcount_begin and __read_seqcount_end functions, that can avoid the
smp_rmb() if used carefully. These will be used by store-free path walking
algorithm performance is critical and seqlocks are in use.

Signed-off-by: Nick Piggin <npiggin@kernel.dk>
---
 include/linux/seqlock.h | 80 ++++++++++++++++++++++++++++++++++++++++++++-----
 1 file changed, 73 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/seqlock.h b/include/linux/seqlock.h
index 632205ccc25d..e98cd2e57194 100644
--- a/include/linux/seqlock.h
+++ b/include/linux/seqlock.h
@@ -107,7 +107,7 @@ static __always_inline int read_seqretry(const seqlock_t *sl, unsigned start)
 {
 	smp_rmb();
 
-	return (sl->sequence != start);
+	return unlikely(sl->sequence != start);
 }
 
 
@@ -125,14 +125,25 @@ typedef struct seqcount {
 #define SEQCNT_ZERO { 0 }
 #define seqcount_init(x)	do { *(x) = (seqcount_t) SEQCNT_ZERO; } while (0)
 
-/* Start of read using pointer to a sequence counter only.  */
-static inline unsigned read_seqcount_begin(const seqcount_t *s)
+/**
+ * __read_seqcount_begin - begin a seq-read critical section (without barrier)
+ * @s: pointer to seqcount_t
+ * Returns: count to be passed to read_seqcount_retry
+ *
+ * __read_seqcount_begin is like read_seqcount_begin, but has no smp_rmb()
+ * barrier. Callers should ensure that smp_rmb() or equivalent ordering is
+ * provided before actually loading any of the variables that are to be
+ * protected in this critical section.
+ *
+ * Use carefully, only in critical code, and comment how the barrier is
+ * provided.
+ */
+static inline unsigned __read_seqcount_begin(const seqcount_t *s)
 {
 	unsigned ret;
 
 repeat:
 	ret = s->sequence;
-	smp_rmb();
 	if (unlikely(ret & 1)) {
 		cpu_relax();
 		goto repeat;
@@ -140,14 +151,56 @@ repeat:
 	return ret;
 }
 
-/*
- * Test if reader processed invalid data because sequence number has changed.
+/**
+ * read_seqcount_begin - begin a seq-read critical section
+ * @s: pointer to seqcount_t
+ * Returns: count to be passed to read_seqcount_retry
+ *
+ * read_seqcount_begin opens a read critical section of the given seqcount.
+ * Validity of the critical section is tested by checking read_seqcount_retry
+ * function.
+ */
+static inline unsigned read_seqcount_begin(const seqcount_t *s)
+{
+	unsigned ret = __read_seqcount_begin(s);
+	smp_rmb();
+	return ret;
+}
+
+/**
+ * __read_seqcount_retry - end a seq-read critical section (without barrier)
+ * @s: pointer to seqcount_t
+ * @start: count, from read_seqcount_begin
+ * Returns: 1 if retry is required, else 0
+ *
+ * __read_seqcount_retry is like read_seqcount_retry, but has no smp_rmb()
+ * barrier. Callers should ensure that smp_rmb() or equivalent ordering is
+ * provided before actually loading any of the variables that are to be
+ * protected in this critical section.
+ *
+ * Use carefully, only in critical code, and comment how the barrier is
+ * provided.
+ */
+static inline int __read_seqcount_retry(const seqcount_t *s, unsigned start)
+{
+	return unlikely(s->sequence != start);
+}
+
+/**
+ * read_seqcount_retry - end a seq-read critical section
+ * @s: pointer to seqcount_t
+ * @start: count, from read_seqcount_begin
+ * Returns: 1 if retry is required, else 0
+ *
+ * read_seqcount_retry closes a read critical section of the given seqcount.
+ * If the critical section was invalid, it must be ignored (and typically
+ * retried).
  */
 static inline int read_seqcount_retry(const seqcount_t *s, unsigned start)
 {
 	smp_rmb();
 
-	return s->sequence != start;
+	return __read_seqcount_retry(s, start);
 }
 
 
@@ -167,6 +220,19 @@ static inline void write_seqcount_end(seqcount_t *s)
 	s->sequence++;
 }
 
+/**
+ * write_seqcount_barrier - invalidate in-progress read-side seq operations
+ * @s: pointer to seqcount_t
+ *
+ * After write_seqcount_barrier, no read-side seq operations will complete
+ * successfully and see data older than this.
+ */
+static inline void write_seqcount_barrier(seqcount_t *s)
+{
+	smp_wmb();
+	s->sequence+=2;
+}
+
 /*
  * Possible sw/hw IRQ protected versions of the interfaces.
  */
-- 
cgit v1.2.3-71-gd317


From 31e6b01f4183ff419a6d1f86177cbf4662347cec Mon Sep 17 00:00:00 2001
From: Nick Piggin <npiggin@kernel.dk>
Date: Fri, 7 Jan 2011 17:49:52 +1100
Subject: fs: rcu-walk for path lookup

Perform common cases of path lookups without any stores or locking in the
ancestor dentry elements. This is called rcu-walk, as opposed to the current
algorithm which is a refcount based walk, or ref-walk.

This results in far fewer atomic operations on every path element,
significantly improving path lookup performance. It also avoids cacheline
bouncing on common dentries, significantly improving scalability.

The overall design is like this:
* LOOKUP_RCU is set in nd->flags, which distinguishes rcu-walk from ref-walk.
* Take the RCU lock for the entire path walk, starting with the acquiring
  of the starting path (eg. root/cwd/fd-path). So now dentry refcounts are
  not required for dentry persistence.
* synchronize_rcu is called when unregistering a filesystem, so we can
  access d_ops and i_ops during rcu-walk.
* Similarly take the vfsmount lock for the entire path walk. So now mnt
  refcounts are not required for persistence. Also we are free to perform mount
  lookups, and to assume dentry mount points and mount roots are stable up and
  down the path.
* Have a per-dentry seqlock to protect the dentry name, parent, and inode,
  so we can load this tuple atomically, and also check whether any of its
  members have changed.
* Dentry lookups (based on parent, candidate string tuple) recheck the parent
  sequence after the child is found in case anything changed in the parent
  during the path walk.
* inode is also RCU protected so we can load d_inode and use the inode for
  limited things.
* i_mode, i_uid, i_gid can be tested for exec permissions during path walk.
* i_op can be loaded.

When we reach the destination dentry, we lock it, recheck lookup sequence,
and increment its refcount and mountpoint refcount. RCU and vfsmount locks
are dropped. This is termed "dropping rcu-walk". If the dentry refcount does
not match, we can not drop rcu-walk gracefully at the current point in the
lokup, so instead return -ECHILD (for want of a better errno). This signals the
path walking code to re-do the entire lookup with a ref-walk.

Aside from the final dentry, there are other situations that may be encounted
where we cannot continue rcu-walk. In that case, we drop rcu-walk (ie. take
a reference on the last good dentry) and continue with a ref-walk. Again, if
we can drop rcu-walk gracefully, we return -ECHILD and do the whole lookup
using ref-walk. But it is very important that we can continue with ref-walk
for most cases, particularly to avoid the overhead of double lookups, and to
gain the scalability advantages on common path elements (like cwd and root).

The cases where rcu-walk cannot continue are:
* NULL dentry (ie. any uncached path element)
* parent with d_inode->i_op->permission or ACLs
* dentries with d_revalidate
* Following links

In future patches, permission checks and d_revalidate become rcu-walk aware. It
may be possible eventually to make following links rcu-walk aware.

Uncached path elements will always require dropping to ref-walk mode, at the
very least because i_mutex needs to be grabbed, and objects allocated.

Signed-off-by: Nick Piggin <npiggin@kernel.dk>
---
 Documentation/filesystems/dentry-locking.txt | 172 -------
 Documentation/filesystems/path-lookup.txt    | 345 +++++++++++++
 fs/dcache.c                                  | 203 +++++++-
 fs/filesystems.c                             |   3 +
 fs/namei.c                                   | 743 ++++++++++++++++++++++-----
 fs/proc/proc_sysctl.c                        |   4 +
 include/linux/dcache.h                       |  32 +-
 include/linux/namei.h                        |  15 +-
 include/linux/security.h                     |   8 +-
 security/security.c                          |   9 +
 10 files changed, 1194 insertions(+), 340 deletions(-)
 delete mode 100644 Documentation/filesystems/dentry-locking.txt
 create mode 100644 Documentation/filesystems/path-lookup.txt

(limited to 'include/linux')

diff --git a/Documentation/filesystems/dentry-locking.txt b/Documentation/filesystems/dentry-locking.txt
deleted file mode 100644
index 30b6a40f5650..000000000000
--- a/Documentation/filesystems/dentry-locking.txt
+++ /dev/null
@@ -1,172 +0,0 @@
-RCU-based dcache locking model
-==============================
-
-On many workloads, the most common operation on dcache is to look up a
-dentry, given a parent dentry and the name of the child. Typically,
-for every open(), stat() etc., the dentry corresponding to the
-pathname will be looked up by walking the tree starting with the first
-component of the pathname and using that dentry along with the next
-component to look up the next level and so on. Since it is a frequent
-operation for workloads like multiuser environments and web servers,
-it is important to optimize this path.
-
-Prior to 2.5.10, dcache_lock was acquired in d_lookup and thus in
-every component during path look-up. Since 2.5.10 onwards, fast-walk
-algorithm changed this by holding the dcache_lock at the beginning and
-walking as many cached path component dentries as possible. This
-significantly decreases the number of acquisition of
-dcache_lock. However it also increases the lock hold time
-significantly and affects performance in large SMP machines. Since
-2.5.62 kernel, dcache has been using a new locking model that uses RCU
-to make dcache look-up lock-free.
-
-The current dcache locking model is not very different from the
-existing dcache locking model. Prior to 2.5.62 kernel, dcache_lock
-protected the hash chain, d_child, d_alias, d_lru lists as well as
-d_inode and several other things like mount look-up. RCU-based changes
-affect only the way the hash chain is protected. For everything else
-the dcache_lock must be taken for both traversing as well as
-updating. The hash chain updates too take the dcache_lock.  The
-significant change is the way d_lookup traverses the hash chain, it
-doesn't acquire the dcache_lock for this and rely on RCU to ensure
-that the dentry has not been *freed*.
-
-dcache_lock no longer exists, dentry locking is explained in fs/dcache.c
-
-Dcache locking details
-======================
-
-For many multi-user workloads, open() and stat() on files are very
-frequently occurring operations. Both involve walking of path names to
-find the dentry corresponding to the concerned file. In 2.4 kernel,
-dcache_lock was held during look-up of each path component. Contention
-and cache-line bouncing of this global lock caused significant
-scalability problems. With the introduction of RCU in Linux kernel,
-this was worked around by making the look-up of path components during
-path walking lock-free.
-
-
-Safe lock-free look-up of dcache hash table
-===========================================
-
-Dcache is a complex data structure with the hash table entries also
-linked together in other lists. In 2.4 kernel, dcache_lock protected
-all the lists. RCU dentry hash walking works like this:
-
-1. The deletion from hash chain is done using hlist_del_rcu() macro
-   which doesn't initialize next pointer of the deleted dentry and
-   this allows us to walk safely lock-free while a deletion is
-   happening. This is a standard hlist_rcu iteration.
-
-2. Insertion of a dentry into the hash table is done using
-   hlist_add_head_rcu() which take care of ordering the writes - the
-   writes to the dentry must be visible before the dentry is
-   inserted. This works in conjunction with hlist_for_each_rcu(),
-   which has since been replaced by hlist_for_each_entry_rcu(), while
-   walking the hash chain. The only requirement is that all
-   initialization to the dentry must be done before
-   hlist_add_head_rcu() since we don't have lock protection
-   while traversing the hash chain.
-
-3. The dentry looked up without holding locks cannot be returned for
-   walking if it is unhashed. It then may have a NULL d_inode or other
-   bogosity since RCU doesn't protect the other fields in the dentry. We
-   therefore use a flag DCACHE_UNHASHED to indicate unhashed dentries
-   and use this in conjunction with a per-dentry lock (d_lock). Once
-   looked up without locks, we acquire the per-dentry lock (d_lock) and
-   check if the dentry is unhashed. If so, the look-up is failed. If not,
-   the reference count of the dentry is increased and the dentry is
-   returned.
-
-4. Once a dentry is looked up, it must be ensured during the path walk
-   for that component it doesn't go away. In pre-2.5.10 code, this was
-   done holding a reference to the dentry. dcache_rcu does the same.
-   In some sense, dcache_rcu path walking looks like the pre-2.5.10
-   version.
-
-5. All dentry hash chain updates must take the per-dentry lock (see
-   fs/dcache.c). This excludes dput() to ensure that a dentry that has
-   been looked up concurrently does not get deleted before dget() can
-   take a ref.
-
-6. There are several ways to do reference counting of RCU protected
-   objects. One such example is in ipv4 route cache where deferred
-   freeing (using call_rcu()) is done as soon as the reference count
-   goes to zero. This cannot be done in the case of dentries because
-   tearing down of dentries require blocking (dentry_iput()) which
-   isn't supported from RCU callbacks. Instead, tearing down of
-   dentries happen synchronously in dput(), but actual freeing happens
-   later when RCU grace period is over. This allows safe lock-free
-   walking of the hash chains, but a matched dentry may have been
-   partially torn down. The checking of DCACHE_UNHASHED flag with
-   d_lock held detects such dentries and prevents them from being
-   returned from look-up.
-
-
-Maintaining POSIX rename semantics
-==================================
-
-Since look-up of dentries is lock-free, it can race against a
-concurrent rename operation. For example, during rename of file A to
-B, look-up of either A or B must succeed.  So, if look-up of B happens
-after A has been removed from the hash chain but not added to the new
-hash chain, it may fail.  Also, a comparison while the name is being
-written concurrently by a rename may result in false positive matches
-violating rename semantics.  Issues related to race with rename are
-handled as described below :
-
-1. Look-up can be done in two ways - d_lookup() which is safe from
-   simultaneous renames and __d_lookup() which is not.  If
-   __d_lookup() fails, it must be followed up by a d_lookup() to
-   correctly determine whether a dentry is in the hash table or
-   not. d_lookup() protects look-ups using a sequence lock
-   (rename_lock).
-
-2. The name associated with a dentry (d_name) may be changed if a
-   rename is allowed to happen simultaneously. To avoid memcmp() in
-   __d_lookup() go out of bounds due to a rename and false positive
-   comparison, the name comparison is done while holding the
-   per-dentry lock. This prevents concurrent renames during this
-   operation.
-
-3. Hash table walking during look-up may move to a different bucket as
-   the current dentry is moved to a different bucket due to rename.
-   But we use hlists in dcache hash table and they are
-   null-terminated.  So, even if a dentry moves to a different bucket,
-   hash chain walk will terminate. [with a list_head list, it may not
-   since termination is when the list_head in the original bucket is
-   reached].  Since we redo the d_parent check and compare name while
-   holding d_lock, lock-free look-up will not race against d_move().
-
-4. There can be a theoretical race when a dentry keeps coming back to
-   original bucket due to double moves. Due to this look-up may
-   consider that it has never moved and can end up in a infinite loop.
-   But this is not any worse that theoretical livelocks we already
-   have in the kernel.
-
-
-Important guidelines for filesystem developers related to dcache_rcu
-====================================================================
-
-1. Existing dcache interfaces (pre-2.5.62) exported to filesystem
-   don't change. Only dcache internal implementation changes. However
-   filesystems *must not* delete from the dentry hash chains directly
-   using the list macros like allowed earlier. They must use dcache
-   APIs like d_drop() or __d_drop() depending on the situation.
-
-2. d_flags is now protected by a per-dentry lock (d_lock). All access
-   to d_flags must be protected by it.
-
-3. For a hashed dentry, checking of d_count needs to be protected by
-   d_lock.
-
-
-Papers and other documentation on dcache locking
-================================================
-
-1. Scaling dcache with RCU (http://linuxjournal.com/article.php?sid=7124).
-
-2. http://lse.sourceforge.net/locking/dcache/dcache.html
-
-
-
diff --git a/Documentation/filesystems/path-lookup.txt b/Documentation/filesystems/path-lookup.txt
new file mode 100644
index 000000000000..09b2878724a1
--- /dev/null
+++ b/Documentation/filesystems/path-lookup.txt
@@ -0,0 +1,345 @@
+Path walking and name lookup locking
+====================================
+
+Path resolution is the finding a dentry corresponding to a path name string, by
+performing a path walk. Typically, for every open(), stat() etc., the path name
+will be resolved. Paths are resolved by walking the namespace tree, starting
+with the first component of the pathname (eg. root or cwd) with a known dentry,
+then finding the child of that dentry, which is named the next component in the
+path string. Then repeating the lookup from the child dentry and finding its
+child with the next element, and so on.
+
+Since it is a frequent operation for workloads like multiuser environments and
+web servers, it is important to optimize this code.
+
+Path walking synchronisation history:
+Prior to 2.5.10, dcache_lock was acquired in d_lookup (dcache hash lookup) and
+thus in every component during path look-up. Since 2.5.10 onwards, fast-walk
+algorithm changed this by holding the dcache_lock at the beginning and walking
+as many cached path component dentries as possible. This significantly
+decreases the number of acquisition of dcache_lock. However it also increases
+the lock hold time significantly and affects performance in large SMP machines.
+Since 2.5.62 kernel, dcache has been using a new locking model that uses RCU to
+make dcache look-up lock-free.
+
+All the above algorithms required taking a lock and reference count on the
+dentry that was looked up, so that may be used as the basis for walking the
+next path element. This is inefficient and unscalable. It is inefficient
+because of the locks and atomic operations required for every dentry element
+slows things down. It is not scalable because many parallel applications that
+are path-walk intensive tend to do path lookups starting from a common dentry
+(usually, the root "/" or current working directory). So contention on these
+common path elements causes lock and cacheline queueing.
+
+Since 2.6.38, RCU is used to make a significant part of the entire path walk
+(including dcache look-up) completely "store-free" (so, no locks, atomics, or
+even stores into cachelines of common dentries). This is known as "rcu-walk"
+path walking.
+
+Path walking overview
+=====================
+
+A name string specifies a start (root directory, cwd, fd-relative) and a
+sequence of elements (directory entry names), which together refer to a path in
+the namespace. A path is represented as a (dentry, vfsmount) tuple. The name
+elements are sub-strings, seperated by '/'.
+
+Name lookups will want to find a particular path that a name string refers to
+(usually the final element, or parent of final element). This is done by taking
+the path given by the name's starting point (which we know in advance -- eg.
+current->fs->cwd or current->fs->root) as the first parent of the lookup. Then
+iteratively for each subsequent name element, look up the child of the current
+parent with the given name and if it is not the desired entry, make it the
+parent for the next lookup.
+
+A parent, of course, must be a directory, and we must have appropriate
+permissions on the parent inode to be able to walk into it.
+
+Turning the child into a parent for the next lookup requires more checks and
+procedures. Symlinks essentially substitute the symlink name for the target
+name in the name string, and require some recursive path walking.  Mount points
+must be followed into (thus changing the vfsmount that subsequent path elements
+refer to), switching from the mount point path to the root of the particular
+mounted vfsmount. These behaviours are variously modified depending on the
+exact path walking flags.
+
+Path walking then must, broadly, do several particular things:
+- find the start point of the walk;
+- perform permissions and validity checks on inodes;
+- perform dcache hash name lookups on (parent, name element) tuples;
+- traverse mount points;
+- traverse symlinks;
+- lookup and create missing parts of the path on demand.
+
+Safe store-free look-up of dcache hash table
+============================================
+
+Dcache name lookup
+------------------
+In order to lookup a dcache (parent, name) tuple, we take a hash on the tuple
+and use that to select a bucket in the dcache-hash table. The list of entries
+in that bucket is then walked, and we do a full comparison of each entry
+against our (parent, name) tuple.
+
+The hash lists are RCU protected, so list walking is not serialised with
+concurrent updates (insertion, deletion from the hash). This is a standard RCU
+list application with the exception of renames, which will be covered below.
+
+Parent and name members of a dentry, as well as its membership in the dcache
+hash, and its inode are protected by the per-dentry d_lock spinlock. A
+reference is taken on the dentry (while the fields are verified under d_lock),
+and this stabilises its d_inode pointer and actual inode. This gives a stable
+point to perform the next step of our path walk against.
+
+These members are also protected by d_seq seqlock, although this offers
+read-only protection and no durability of results, so care must be taken when
+using d_seq for synchronisation (see seqcount based lookups, below).
+
+Renames
+-------
+Back to the rename case. In usual RCU protected lists, the only operations that
+will happen to an object is insertion, and then eventually removal from the
+list. The object will not be reused until an RCU grace period is complete.
+This ensures the RCU list traversal primitives can run over the object without
+problems (see RCU documentation for how this works).
+
+However when a dentry is renamed, its hash value can change, requiring it to be
+moved to a new hash list. Allocating and inserting a new alias would be
+expensive and also problematic for directory dentries. Latency would be far to
+high to wait for a grace period after removing the dentry and before inserting
+it in the new hash bucket. So what is done is to insert the dentry into the
+new list immediately.
+
+However, when the dentry's list pointers are updated to point to objects in the
+new list before waiting for a grace period, this can result in a concurrent RCU
+lookup of the old list veering off into the new (incorrect) list and missing
+the remaining dentries on the list.
+
+There is no fundamental problem with walking down the wrong list, because the
+dentry comparisons will never match. However it is fatal to miss a matching
+dentry. So a seqlock is used to detect when a rename has occurred, and so the
+lookup can be retried.
+
+         1      2      3
+        +---+  +---+  +---+
+hlist-->| N-+->| N-+->| N-+->
+head <--+-P |<-+-P |<-+-P |
+        +---+  +---+  +---+
+
+Rename of dentry 2 may require it deleted from the above list, and inserted
+into a new list. Deleting 2 gives the following list.
+
+         1             3
+        +---+         +---+     (don't worry, the longer pointers do not
+hlist-->| N-+-------->| N-+->    impose a measurable performance overhead
+head <--+-P |<--------+-P |      on modern CPUs)
+        +---+         +---+
+          ^      2      ^
+          |    +---+    |
+          |    | N-+----+
+          +----+-P |
+               +---+
+
+This is a standard RCU-list deletion, which leaves the deleted object's
+pointers intact, so a concurrent list walker that is currently looking at
+object 2 will correctly continue to object 3 when it is time to traverse the
+next object.
+
+However, when inserting object 2 onto a new list, we end up with this:
+
+         1             3
+        +---+         +---+
+hlist-->| N-+-------->| N-+->
+head <--+-P |<--------+-P |
+        +---+         +---+
+                 2
+               +---+
+               | N-+---->
+          <----+-P |
+               +---+
+
+Because we didn't wait for a grace period, there may be a concurrent lookup
+still at 2. Now when it follows 2's 'next' pointer, it will walk off into
+another list without ever having checked object 3.
+
+A related, but distinctly different, issue is that of rename atomicity versus
+lookup operations. If a file is renamed from 'A' to 'B', a lookup must only
+find either 'A' or 'B'. So if a lookup of 'A' returns NULL, a subsequent lookup
+of 'B' must succeed (note the reverse is not true).
+
+Between deleting the dentry from the old hash list, and inserting it on the new
+hash list, a lookup may find neither 'A' nor 'B' matching the dentry. The same
+rename seqlock is also used to cover this race in much the same way, by
+retrying a negative lookup result if a rename was in progress.
+
+Seqcount based lookups
+----------------------
+In refcount based dcache lookups, d_lock is used to serialise access to
+the dentry, stabilising it while comparing its name and parent and then
+taking a reference count (the reference count then gives a stable place to
+start the next part of the path walk from).
+
+As explained above, we would like to do path walking without taking locks or
+reference counts on intermediate dentries along the path. To do this, a per
+dentry seqlock (d_seq) is used to take a "coherent snapshot" of what the dentry
+looks like (its name, parent, and inode). That snapshot is then used to start
+the next part of the path walk. When loading the coherent snapshot under d_seq,
+care must be taken to load the members up-front, and use those pointers rather
+than reloading from the dentry later on (otherwise we'd have interesting things
+like d_inode going NULL underneath us, if the name was unlinked).
+
+Also important is to avoid performing any destructive operations (pretty much:
+no non-atomic stores to shared data), and to recheck the seqcount when we are
+"done" with the operation. Retry or abort if the seqcount does not match.
+Avoiding destructive or changing operations means we can easily unwind from
+failure.
+
+What this means is that a caller, provided they are holding RCU lock to
+protect the dentry object from disappearing, can perform a seqcount based
+lookup which does not increment the refcount on the dentry or write to
+it in any way. This returned dentry can be used for subsequent operations,
+provided that d_seq is rechecked after that operation is complete.
+
+Inodes are also rcu freed, so the seqcount lookup dentry's inode may also be
+queried for permissions.
+
+With this two parts of the puzzle, we can do path lookups without taking
+locks or refcounts on dentry elements.
+
+RCU-walk path walking design
+============================
+
+Path walking code now has two distinct modes, ref-walk and rcu-walk. ref-walk
+is the traditional[*] way of performing dcache lookups using d_lock to
+serialise concurrent modifications to the dentry and take a reference count on
+it. ref-walk is simple and obvious, and may sleep, take locks, etc while path
+walking is operating on each dentry. rcu-walk uses seqcount based dentry
+lookups, and can perform lookup of intermediate elements without any stores to
+shared data in the dentry or inode. rcu-walk can not be applied to all cases,
+eg. if the filesystem must sleep or perform non trivial operations, rcu-walk
+must be switched to ref-walk mode.
+
+[*] RCU is still used for the dentry hash lookup in ref-walk, but not the full
+    path walk.
+
+Where ref-walk uses a stable, refcounted ``parent'' to walk the remaining
+path string, rcu-walk uses a d_seq protected snapshot. When looking up a
+child of this parent snapshot, we open d_seq critical section on the child
+before closing d_seq critical section on the parent. This gives an interlocking
+ladder of snapshots to walk down.
+
+
+     proc 101
+      /----------------\
+     / comm:    "vi"    \
+    /  fs.root: dentry0  \
+    \  fs.cwd:  dentry2  /
+     \                  /
+      \----------------/
+
+So when vi wants to open("/home/npiggin/test.c", O_RDWR), then it will
+start from current->fs->root, which is a pinned dentry. Alternatively,
+"./test.c" would start from cwd; both names refer to the same path in
+the context of proc101.
+
+     dentry 0
+    +---------------------+   rcu-walk begins here, we note d_seq, check the
+    | name:    "/"        |   inode's permission, and then look up the next
+    | inode:   10         |   path element which is "home"...
+    | children:"home", ...|
+    +---------------------+
+              |
+     dentry 1 V
+    +---------------------+   ... which brings us here. We find dentry1 via
+    | name:    "home"     |   hash lookup, then note d_seq and compare name
+    | inode:   678        |   string and parent pointer. When we have a match,
+    | children:"npiggin"  |   we now recheck the d_seq of dentry0. Then we
+    +---------------------+   check inode and look up the next element.
+              |
+     dentry2  V
+    +---------------------+   Note: if dentry0 is now modified, lookup is
+    | name:    "npiggin"  |   not necessarily invalid, so we need only keep a
+    | inode:   543        |   parent for d_seq verification, and grandparents
+    | children:"a.c", ... |   can be forgotten.
+    +---------------------+
+              |
+     dentry3  V
+    +---------------------+   At this point we have our destination dentry.
+    | name:    "a.c"      |   We now take its d_lock, verify d_seq of this
+    | inode:   14221      |   dentry. If that checks out, we can increment
+    | children:NULL       |   its refcount because we're holding d_lock.
+    +---------------------+
+
+Taking a refcount on a dentry from rcu-walk mode, by taking its d_lock,
+re-checking its d_seq, and then incrementing its refcount is called
+"dropping rcu" or dropping from rcu-walk into ref-walk mode.
+
+It is, in some sense, a bit of a house of cards. If the seqcount check of the
+parent snapshot fails, the house comes down, because we had closed the d_seq
+section on the grandparent, so we have nothing left to stand on. In that case,
+the path walk must be fully restarted (which we do in ref-walk mode, to avoid
+live locks). It is costly to have a full restart, but fortunately they are
+quite rare.
+
+When we reach a point where sleeping is required, or a filesystem callout
+requires ref-walk, then instead of restarting the walk, we attempt to drop rcu
+at the last known good dentry we have. Avoiding a full restart in ref-walk in
+these cases is fundamental for performance and scalability because blocking
+operations such as creates and unlinks are not uncommon.
+
+The detailed design for rcu-walk is like this:
+* LOOKUP_RCU is set in nd->flags, which distinguishes rcu-walk from ref-walk.
+* Take the RCU lock for the entire path walk, starting with the acquiring
+  of the starting path (eg. root/cwd/fd-path). So now dentry refcounts are
+  not required for dentry persistence.
+* synchronize_rcu is called when unregistering a filesystem, so we can
+  access d_ops and i_ops during rcu-walk.
+* Similarly take the vfsmount lock for the entire path walk. So now mnt
+  refcounts are not required for persistence. Also we are free to perform mount
+  lookups, and to assume dentry mount points and mount roots are stable up and
+  down the path.
+* Have a per-dentry seqlock to protect the dentry name, parent, and inode,
+  so we can load this tuple atomically, and also check whether any of its
+  members have changed.
+* Dentry lookups (based on parent, candidate string tuple) recheck the parent
+  sequence after the child is found in case anything changed in the parent
+  during the path walk.
+* inode is also RCU protected so we can load d_inode and use the inode for
+  limited things.
+* i_mode, i_uid, i_gid can be tested for exec permissions during path walk.
+* i_op can be loaded.
+* When the destination dentry is reached, drop rcu there (ie. take d_lock,
+  verify d_seq, increment refcount).
+* If seqlock verification fails anywhere along the path, do a full restart
+  of the path lookup in ref-walk mode. -ECHILD tends to be used (for want of
+  a better errno) to signal an rcu-walk failure.
+
+The cases where rcu-walk cannot continue are:
+* NULL dentry (ie. any uncached path element)
+* parent with d_inode->i_op->permission or ACLs
+* dentries with d_revalidate
+* Following links
+
+In future patches, permission checks and d_revalidate become rcu-walk aware. It
+may be possible eventually to make following links rcu-walk aware.
+
+Uncached path elements will always require dropping to ref-walk mode, at the
+very least because i_mutex needs to be grabbed, and objects allocated.
+
+Final note:
+"store-free" path walking is not strictly store free. We take vfsmount lock
+and refcounts (both of which can be made per-cpu), and we also store to the
+stack (which is essentially CPU-local), and we also have to take locks and
+refcount on final dentry.
+
+The point is that shared data, where practically possible, is not locked
+or stored into. The result is massive improvements in performance and
+scalability of path resolution.
+
+
+Papers and other documentation on dcache locking
+================================================
+
+1. Scaling dcache with RCU (http://linuxjournal.com/article.php?sid=7124).
+
+2. http://lse.sourceforge.net/locking/dcache/dcache.html
diff --git a/fs/dcache.c b/fs/dcache.c
index dc0551c9755d..187fea040108 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -152,9 +152,23 @@ static void d_free(struct dentry *dentry)
 		call_rcu(&dentry->d_u.d_rcu, __d_free);
 }
 
+/**
+ * dentry_rcuwalk_barrier - invalidate in-progress rcu-walk lookups
+ * After this call, in-progress rcu-walk path lookup will fail. This
+ * should be called after unhashing, and after changing d_inode (if
+ * the dentry has not already been unhashed).
+ */
+static inline void dentry_rcuwalk_barrier(struct dentry *dentry)
+{
+	assert_spin_locked(&dentry->d_lock);
+	/* Go through a barrier */
+	write_seqcount_barrier(&dentry->d_seq);
+}
+
 /*
  * Release the dentry's inode, using the filesystem
- * d_iput() operation if defined.
+ * d_iput() operation if defined. Dentry has no refcount
+ * and is unhashed.
  */
 static void dentry_iput(struct dentry * dentry)
 	__releases(dentry->d_lock)
@@ -178,6 +192,28 @@ static void dentry_iput(struct dentry * dentry)
 	}
 }
 
+/*
+ * Release the dentry's inode, using the filesystem
+ * d_iput() operation if defined. dentry remains in-use.
+ */
+static void dentry_unlink_inode(struct dentry * dentry)
+	__releases(dentry->d_lock)
+	__releases(dcache_inode_lock)
+{
+	struct inode *inode = dentry->d_inode;
+	dentry->d_inode = NULL;
+	list_del_init(&dentry->d_alias);
+	dentry_rcuwalk_barrier(dentry);
+	spin_unlock(&dentry->d_lock);
+	spin_unlock(&dcache_inode_lock);
+	if (!inode->i_nlink)
+		fsnotify_inoderemove(inode);
+	if (dentry->d_op && dentry->d_op->d_iput)
+		dentry->d_op->d_iput(dentry, inode);
+	else
+		iput(inode);
+}
+
 /*
  * dentry_lru_(add|del|move_tail) must be called with d_lock held.
  */
@@ -272,6 +308,7 @@ void __d_drop(struct dentry *dentry)
 		spin_lock(&dcache_hash_lock);
 		hlist_del_rcu(&dentry->d_hash);
 		spin_unlock(&dcache_hash_lock);
+		dentry_rcuwalk_barrier(dentry);
 	}
 }
 EXPORT_SYMBOL(__d_drop);
@@ -309,6 +346,7 @@ relock:
 		spin_unlock(&dcache_inode_lock);
 		goto relock;
 	}
+
 	if (ref)
 		dentry->d_count--;
 	/* if dentry was on the d_lru list delete it from there */
@@ -1221,6 +1259,7 @@ struct dentry *d_alloc(struct dentry * parent, const struct qstr *name)
 	dentry->d_count = 1;
 	dentry->d_flags = DCACHE_UNHASHED;
 	spin_lock_init(&dentry->d_lock);
+	seqcount_init(&dentry->d_seq);
 	dentry->d_inode = NULL;
 	dentry->d_parent = NULL;
 	dentry->d_sb = NULL;
@@ -1269,6 +1308,7 @@ static void __d_instantiate(struct dentry *dentry, struct inode *inode)
 	if (inode)
 		list_add(&dentry->d_alias, &inode->i_dentry);
 	dentry->d_inode = inode;
+	dentry_rcuwalk_barrier(dentry);
 	spin_unlock(&dentry->d_lock);
 	fsnotify_d_instantiate(dentry, inode);
 }
@@ -1610,6 +1650,111 @@ err_out:
 }
 EXPORT_SYMBOL(d_add_ci);
 
+/**
+ * __d_lookup_rcu - search for a dentry (racy, store-free)
+ * @parent: parent dentry
+ * @name: qstr of name we wish to find
+ * @seq: returns d_seq value at the point where the dentry was found
+ * @inode: returns dentry->d_inode when the inode was found valid.
+ * Returns: dentry, or NULL
+ *
+ * __d_lookup_rcu is the dcache lookup function for rcu-walk name
+ * resolution (store-free path walking) design described in
+ * Documentation/filesystems/path-lookup.txt.
+ *
+ * This is not to be used outside core vfs.
+ *
+ * __d_lookup_rcu must only be used in rcu-walk mode, ie. with vfsmount lock
+ * held, and rcu_read_lock held. The returned dentry must not be stored into
+ * without taking d_lock and checking d_seq sequence count against @seq
+ * returned here.
+ *
+ * A refcount may be taken on the found dentry with the __d_rcu_to_refcount
+ * function.
+ *
+ * Alternatively, __d_lookup_rcu may be called again to look up the child of
+ * the returned dentry, so long as its parent's seqlock is checked after the
+ * child is looked up. Thus, an interlocking stepping of sequence lock checks
+ * is formed, giving integrity down the path walk.
+ */
+struct dentry *__d_lookup_rcu(struct dentry *parent, struct qstr *name,
+				unsigned *seq, struct inode **inode)
+{
+	unsigned int len = name->len;
+	unsigned int hash = name->hash;
+	const unsigned char *str = name->name;
+	struct hlist_head *head = d_hash(parent, hash);
+	struct hlist_node *node;
+	struct dentry *dentry;
+
+	/*
+	 * Note: There is significant duplication with __d_lookup_rcu which is
+	 * required to prevent single threaded performance regressions
+	 * especially on architectures where smp_rmb (in seqcounts) are costly.
+	 * Keep the two functions in sync.
+	 */
+
+	/*
+	 * The hash list is protected using RCU.
+	 *
+	 * Carefully use d_seq when comparing a candidate dentry, to avoid
+	 * races with d_move().
+	 *
+	 * It is possible that concurrent renames can mess up our list
+	 * walk here and result in missing our dentry, resulting in the
+	 * false-negative result. d_lookup() protects against concurrent
+	 * renames using rename_lock seqlock.
+	 *
+	 * See Documentation/vfs/dcache-locking.txt for more details.
+	 */
+	hlist_for_each_entry_rcu(dentry, node, head, d_hash) {
+		struct inode *i;
+		const char *tname;
+		int tlen;
+
+		if (dentry->d_name.hash != hash)
+			continue;
+
+seqretry:
+		*seq = read_seqcount_begin(&dentry->d_seq);
+		if (dentry->d_parent != parent)
+			continue;
+		if (d_unhashed(dentry))
+			continue;
+		tlen = dentry->d_name.len;
+		tname = dentry->d_name.name;
+		i = dentry->d_inode;
+		/*
+		 * This seqcount check is required to ensure name and
+		 * len are loaded atomically, so as not to walk off the
+		 * edge of memory when walking. If we could load this
+		 * atomically some other way, we could drop this check.
+		 */
+		if (read_seqcount_retry(&dentry->d_seq, *seq))
+			goto seqretry;
+		if (parent->d_op && parent->d_op->d_compare) {
+			if (parent->d_op->d_compare(parent, *inode,
+						dentry, i,
+						tlen, tname, name))
+				continue;
+		} else {
+			if (tlen != len)
+				continue;
+			if (memcmp(tname, str, tlen))
+				continue;
+		}
+		/*
+		 * No extra seqcount check is required after the name
+		 * compare. The caller must perform a seqcount check in
+		 * order to do anything useful with the returned dentry
+		 * anyway.
+		 */
+		*inode = i;
+		return dentry;
+	}
+	return NULL;
+}
+
 /**
  * d_lookup - search for a dentry
  * @parent: parent dentry
@@ -1621,9 +1766,9 @@ EXPORT_SYMBOL(d_add_ci);
  * dentry is returned. The caller must use dput to free the entry when it has
  * finished using it. %NULL is returned if the dentry does not exist.
  */
-struct dentry * d_lookup(struct dentry * parent, struct qstr * name)
+struct dentry *d_lookup(struct dentry *parent, struct qstr *name)
 {
-	struct dentry * dentry = NULL;
+	struct dentry *dentry;
 	unsigned seq;
 
         do {
@@ -1636,7 +1781,7 @@ struct dentry * d_lookup(struct dentry * parent, struct qstr * name)
 }
 EXPORT_SYMBOL(d_lookup);
 
-/*
+/**
  * __d_lookup - search for a dentry (racy)
  * @parent: parent dentry
  * @name: qstr of name we wish to find
@@ -1651,16 +1796,23 @@ EXPORT_SYMBOL(d_lookup);
  *
  * __d_lookup callers must be commented.
  */
-struct dentry * __d_lookup(struct dentry * parent, struct qstr * name)
+struct dentry *__d_lookup(struct dentry *parent, struct qstr *name)
 {
 	unsigned int len = name->len;
 	unsigned int hash = name->hash;
 	const unsigned char *str = name->name;
 	struct hlist_head *head = d_hash(parent,hash);
-	struct dentry *found = NULL;
 	struct hlist_node *node;
+	struct dentry *found = NULL;
 	struct dentry *dentry;
 
+	/*
+	 * Note: There is significant duplication with __d_lookup_rcu which is
+	 * required to prevent single threaded performance regressions
+	 * especially on architectures where smp_rmb (in seqcounts) are costly.
+	 * Keep the two functions in sync.
+	 */
+
 	/*
 	 * The hash list is protected using RCU.
 	 *
@@ -1677,24 +1829,15 @@ struct dentry * __d_lookup(struct dentry * parent, struct qstr * name)
 	rcu_read_lock();
 	
 	hlist_for_each_entry_rcu(dentry, node, head, d_hash) {
-		struct qstr *qstr;
+		const char *tname;
+		int tlen;
 
 		if (dentry->d_name.hash != hash)
 			continue;
-		if (dentry->d_parent != parent)
-			continue;
 
 		spin_lock(&dentry->d_lock);
-
-		/*
-		 * Recheck the dentry after taking the lock - d_move may have
-		 * changed things. Don't bother checking the hash because
-		 * we're about to compare the whole name anyway.
-		 */
 		if (dentry->d_parent != parent)
 			goto next;
-
-		/* non-existing due to RCU? */
 		if (d_unhashed(dentry))
 			goto next;
 
@@ -1702,16 +1845,17 @@ struct dentry * __d_lookup(struct dentry * parent, struct qstr * name)
 		 * It is safe to compare names since d_move() cannot
 		 * change the qstr (protected by d_lock).
 		 */
-		qstr = &dentry->d_name;
+		tlen = dentry->d_name.len;
+		tname = dentry->d_name.name;
 		if (parent->d_op && parent->d_op->d_compare) {
 			if (parent->d_op->d_compare(parent, parent->d_inode,
 						dentry, dentry->d_inode,
-						qstr->len, qstr->name, name))
+						tlen, tname, name))
 				goto next;
 		} else {
-			if (qstr->len != len)
+			if (tlen != len)
 				goto next;
-			if (memcmp(qstr->name, str, len))
+			if (memcmp(tname, str, tlen))
 				goto next;
 		}
 
@@ -1821,7 +1965,7 @@ again:
 			goto again;
 		}
 		dentry->d_flags &= ~DCACHE_CANT_MOUNT;
-		dentry_iput(dentry);
+		dentry_unlink_inode(dentry);
 		fsnotify_nameremove(dentry, isdir);
 		return;
 	}
@@ -1884,7 +2028,9 @@ void dentry_update_name_case(struct dentry *dentry, struct qstr *name)
 	BUG_ON(dentry->d_name.len != name->len); /* d_lookup gives this */
 
 	spin_lock(&dentry->d_lock);
+	write_seqcount_begin(&dentry->d_seq);
 	memcpy((unsigned char *)dentry->d_name.name, name->name, name->len);
+	write_seqcount_end(&dentry->d_seq);
 	spin_unlock(&dentry->d_lock);
 }
 EXPORT_SYMBOL(dentry_update_name_case);
@@ -1997,6 +2143,9 @@ void d_move(struct dentry * dentry, struct dentry * target)
 
 	dentry_lock_for_move(dentry, target);
 
+	write_seqcount_begin(&dentry->d_seq);
+	write_seqcount_begin(&target->d_seq);
+
 	/* Move the dentry to the target hash queue, if on different bucket */
 	spin_lock(&dcache_hash_lock);
 	if (!d_unhashed(dentry))
@@ -2005,6 +2154,7 @@ void d_move(struct dentry * dentry, struct dentry * target)
 	spin_unlock(&dcache_hash_lock);
 
 	/* Unhash the target: dput() will then get rid of it */
+	/* __d_drop does write_seqcount_barrier, but they're OK to nest. */
 	__d_drop(target);
 
 	list_del(&dentry->d_u.d_child);
@@ -2028,6 +2178,9 @@ void d_move(struct dentry * dentry, struct dentry * target)
 
 	list_add(&dentry->d_u.d_child, &dentry->d_parent->d_subdirs);
 
+	write_seqcount_end(&target->d_seq);
+	write_seqcount_end(&dentry->d_seq);
+
 	dentry_unlock_parents_for_move(dentry, target);
 	spin_unlock(&target->d_lock);
 	fsnotify_d_move(dentry);
@@ -2110,6 +2263,9 @@ static void __d_materialise_dentry(struct dentry *dentry, struct dentry *anon)
 
 	dentry_lock_for_move(anon, dentry);
 
+	write_seqcount_begin(&dentry->d_seq);
+	write_seqcount_begin(&anon->d_seq);
+
 	dparent = dentry->d_parent;
 	aparent = anon->d_parent;
 
@@ -2130,6 +2286,9 @@ static void __d_materialise_dentry(struct dentry *dentry, struct dentry *anon)
 	else
 		INIT_LIST_HEAD(&anon->d_u.d_child);
 
+	write_seqcount_end(&dentry->d_seq);
+	write_seqcount_end(&anon->d_seq);
+
 	dentry_unlock_parents_for_move(anon, dentry);
 	spin_unlock(&dentry->d_lock);
 
diff --git a/fs/filesystems.c b/fs/filesystems.c
index 68ba492d8eef..751d6b255a12 100644
--- a/fs/filesystems.c
+++ b/fs/filesystems.c
@@ -115,6 +115,9 @@ int unregister_filesystem(struct file_system_type * fs)
 		tmp = &(*tmp)->next;
 	}
 	write_unlock(&file_systems_lock);
+
+	synchronize_rcu();
+
 	return -EINVAL;
 }
 
diff --git a/fs/namei.c b/fs/namei.c
index 5642bc2be418..8d3f15b3a541 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -169,8 +169,8 @@ EXPORT_SYMBOL(putname);
 /*
  * This does basic POSIX ACL permission checking
  */
-static int acl_permission_check(struct inode *inode, int mask,
-		int (*check_acl)(struct inode *inode, int mask))
+static inline int __acl_permission_check(struct inode *inode, int mask,
+		int (*check_acl)(struct inode *inode, int mask), int rcu)
 {
 	umode_t			mode = inode->i_mode;
 
@@ -180,9 +180,13 @@ static int acl_permission_check(struct inode *inode, int mask,
 		mode >>= 6;
 	else {
 		if (IS_POSIXACL(inode) && (mode & S_IRWXG) && check_acl) {
-			int error = check_acl(inode, mask);
-			if (error != -EAGAIN)
-				return error;
+			if (rcu) {
+				return -ECHILD;
+			} else {
+				int error = check_acl(inode, mask);
+				if (error != -EAGAIN)
+					return error;
+			}
 		}
 
 		if (in_group_p(inode->i_gid))
@@ -197,6 +201,12 @@ static int acl_permission_check(struct inode *inode, int mask,
 	return -EACCES;
 }
 
+static inline int acl_permission_check(struct inode *inode, int mask,
+		int (*check_acl)(struct inode *inode, int mask))
+{
+	return __acl_permission_check(inode, mask, check_acl, 0);
+}
+
 /**
  * generic_permission  -  check for access rights on a Posix-like filesystem
  * @inode:	inode to check access rights for
@@ -374,6 +384,173 @@ void path_put(struct path *path)
 }
 EXPORT_SYMBOL(path_put);
 
+/**
+ * nameidata_drop_rcu - drop this nameidata out of rcu-walk
+ * @nd: nameidata pathwalk data to drop
+ * @Returns: 0 on success, -ECHLID on failure
+ *
+ * Path walking has 2 modes, rcu-walk and ref-walk (see
+ * Documentation/filesystems/path-lookup.txt). __drop_rcu* functions attempt
+ * to drop out of rcu-walk mode and take normal reference counts on dentries
+ * and vfsmounts to transition to rcu-walk mode. __drop_rcu* functions take
+ * refcounts at the last known good point before rcu-walk got stuck, so
+ * ref-walk may continue from there. If this is not successful (eg. a seqcount
+ * has changed), then failure is returned and path walk restarts from the
+ * beginning in ref-walk mode.
+ *
+ * nameidata_drop_rcu attempts to drop the current nd->path and nd->root into
+ * ref-walk. Must be called from rcu-walk context.
+ */
+static int nameidata_drop_rcu(struct nameidata *nd)
+{
+	struct fs_struct *fs = current->fs;
+	struct dentry *dentry = nd->path.dentry;
+
+	BUG_ON(!(nd->flags & LOOKUP_RCU));
+	if (nd->root.mnt) {
+		spin_lock(&fs->lock);
+		if (nd->root.mnt != fs->root.mnt ||
+				nd->root.dentry != fs->root.dentry)
+			goto err_root;
+	}
+	spin_lock(&dentry->d_lock);
+	if (!__d_rcu_to_refcount(dentry, nd->seq))
+		goto err;
+	BUG_ON(nd->inode != dentry->d_inode);
+	spin_unlock(&dentry->d_lock);
+	if (nd->root.mnt) {
+		path_get(&nd->root);
+		spin_unlock(&fs->lock);
+	}
+	mntget(nd->path.mnt);
+
+	rcu_read_unlock();
+	br_read_unlock(vfsmount_lock);
+	nd->flags &= ~LOOKUP_RCU;
+	return 0;
+err:
+	spin_unlock(&dentry->d_lock);
+err_root:
+	if (nd->root.mnt)
+		spin_unlock(&fs->lock);
+	return -ECHILD;
+}
+
+/* Try to drop out of rcu-walk mode if we were in it, otherwise do nothing.  */
+static inline int nameidata_drop_rcu_maybe(struct nameidata *nd)
+{
+	if (nd->flags & LOOKUP_RCU)
+		return nameidata_drop_rcu(nd);
+	return 0;
+}
+
+/**
+ * nameidata_dentry_drop_rcu - drop nameidata and dentry out of rcu-walk
+ * @nd: nameidata pathwalk data to drop
+ * @dentry: dentry to drop
+ * @Returns: 0 on success, -ECHLID on failure
+ *
+ * nameidata_dentry_drop_rcu attempts to drop the current nd->path and nd->root,
+ * and dentry into ref-walk. @dentry must be a path found by a do_lookup call on
+ * @nd. Must be called from rcu-walk context.
+ */
+static int nameidata_dentry_drop_rcu(struct nameidata *nd, struct dentry *dentry)
+{
+	struct fs_struct *fs = current->fs;
+	struct dentry *parent = nd->path.dentry;
+
+	BUG_ON(!(nd->flags & LOOKUP_RCU));
+	if (nd->root.mnt) {
+		spin_lock(&fs->lock);
+		if (nd->root.mnt != fs->root.mnt ||
+				nd->root.dentry != fs->root.dentry)
+			goto err_root;
+	}
+	spin_lock(&parent->d_lock);
+	spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
+	if (!__d_rcu_to_refcount(dentry, nd->seq))
+		goto err;
+	/*
+	 * If the sequence check on the child dentry passed, then the child has
+	 * not been removed from its parent. This means the parent dentry must
+	 * be valid and able to take a reference at this point.
+	 */
+	BUG_ON(!IS_ROOT(dentry) && dentry->d_parent != parent);
+	BUG_ON(!parent->d_count);
+	parent->d_count++;
+	spin_unlock(&dentry->d_lock);
+	spin_unlock(&parent->d_lock);
+	if (nd->root.mnt) {
+		path_get(&nd->root);
+		spin_unlock(&fs->lock);
+	}
+	mntget(nd->path.mnt);
+
+	rcu_read_unlock();
+	br_read_unlock(vfsmount_lock);
+	nd->flags &= ~LOOKUP_RCU;
+	return 0;
+err:
+	spin_unlock(&dentry->d_lock);
+	spin_unlock(&parent->d_lock);
+err_root:
+	if (nd->root.mnt)
+		spin_unlock(&fs->lock);
+	return -ECHILD;
+}
+
+/* Try to drop out of rcu-walk mode if we were in it, otherwise do nothing.  */
+static inline int nameidata_dentry_drop_rcu_maybe(struct nameidata *nd, struct dentry *dentry)
+{
+	if (nd->flags & LOOKUP_RCU)
+		return nameidata_dentry_drop_rcu(nd, dentry);
+	return 0;
+}
+
+/**
+ * nameidata_drop_rcu_last - drop nameidata ending path walk out of rcu-walk
+ * @nd: nameidata pathwalk data to drop
+ * @Returns: 0 on success, -ECHLID on failure
+ *
+ * nameidata_drop_rcu_last attempts to drop the current nd->path into ref-walk.
+ * nd->path should be the final element of the lookup, so nd->root is discarded.
+ * Must be called from rcu-walk context.
+ */
+static int nameidata_drop_rcu_last(struct nameidata *nd)
+{
+	struct dentry *dentry = nd->path.dentry;
+
+	BUG_ON(!(nd->flags & LOOKUP_RCU));
+	nd->flags &= ~LOOKUP_RCU;
+	nd->root.mnt = NULL;
+	spin_lock(&dentry->d_lock);
+	if (!__d_rcu_to_refcount(dentry, nd->seq))
+		goto err_unlock;
+	BUG_ON(nd->inode != dentry->d_inode);
+	spin_unlock(&dentry->d_lock);
+
+	mntget(nd->path.mnt);
+
+	rcu_read_unlock();
+	br_read_unlock(vfsmount_lock);
+
+	return 0;
+
+err_unlock:
+	spin_unlock(&dentry->d_lock);
+	rcu_read_unlock();
+	br_read_unlock(vfsmount_lock);
+	return -ECHILD;
+}
+
+/* Try to drop out of rcu-walk mode if we were in it, otherwise do nothing.  */
+static inline int nameidata_drop_rcu_last_maybe(struct nameidata *nd)
+{
+	if (likely(nd->flags & LOOKUP_RCU))
+		return nameidata_drop_rcu_last(nd);
+	return 0;
+}
+
 /**
  * release_open_intent - free up open intent resources
  * @nd: pointer to nameidata
@@ -459,26 +636,40 @@ force_reval_path(struct path *path, struct nameidata *nd)
  * short-cut DAC fails, then call ->permission() to do more
  * complete permission check.
  */
-static int exec_permission(struct inode *inode)
+static inline int __exec_permission(struct inode *inode, int rcu)
 {
 	int ret;
 
 	if (inode->i_op->permission) {
+		if (rcu)
+			return -ECHILD;
 		ret = inode->i_op->permission(inode, MAY_EXEC);
 		if (!ret)
 			goto ok;
 		return ret;
 	}
-	ret = acl_permission_check(inode, MAY_EXEC, inode->i_op->check_acl);
+	ret = __acl_permission_check(inode, MAY_EXEC, inode->i_op->check_acl, rcu);
 	if (!ret)
 		goto ok;
+	if (rcu && ret == -ECHILD)
+		return ret;
 
 	if (capable(CAP_DAC_OVERRIDE) || capable(CAP_DAC_READ_SEARCH))
 		goto ok;
 
 	return ret;
 ok:
-	return security_inode_permission(inode, MAY_EXEC);
+	return security_inode_exec_permission(inode, rcu);
+}
+
+static int exec_permission(struct inode *inode)
+{
+	return __exec_permission(inode, 0);
+}
+
+static int exec_permission_rcu(struct inode *inode)
+{
+	return __exec_permission(inode, 1);
 }
 
 static __always_inline void set_root(struct nameidata *nd)
@@ -489,8 +680,20 @@ static __always_inline void set_root(struct nameidata *nd)
 
 static int link_path_walk(const char *, struct nameidata *);
 
+static __always_inline void set_root_rcu(struct nameidata *nd)
+{
+	if (!nd->root.mnt) {
+		struct fs_struct *fs = current->fs;
+		spin_lock(&fs->lock);
+		nd->root = fs->root;
+		spin_unlock(&fs->lock);
+	}
+}
+
 static __always_inline int __vfs_follow_link(struct nameidata *nd, const char *link)
 {
+	int ret;
+
 	if (IS_ERR(link))
 		goto fail;
 
@@ -500,8 +703,10 @@ static __always_inline int __vfs_follow_link(struct nameidata *nd, const char *l
 		nd->path = nd->root;
 		path_get(&nd->root);
 	}
+	nd->inode = nd->path.dentry->d_inode;
 
-	return link_path_walk(link, nd);
+	ret = link_path_walk(link, nd);
+	return ret;
 fail:
 	path_put(&nd->path);
 	return PTR_ERR(link);
@@ -516,11 +721,12 @@ static void path_put_conditional(struct path *path, struct nameidata *nd)
 
 static inline void path_to_nameidata(struct path *path, struct nameidata *nd)
 {
-	dput(nd->path.dentry);
-	if (nd->path.mnt != path->mnt) {
-		mntput(nd->path.mnt);
-		nd->path.mnt = path->mnt;
+	if (!(nd->flags & LOOKUP_RCU)) {
+		dput(nd->path.dentry);
+		if (nd->path.mnt != path->mnt)
+			mntput(nd->path.mnt);
 	}
+	nd->path.mnt = path->mnt;
 	nd->path.dentry = path->dentry;
 }
 
@@ -535,9 +741,11 @@ __do_follow_link(struct path *path, struct nameidata *nd, void **p)
 
 	if (path->mnt != nd->path.mnt) {
 		path_to_nameidata(path, nd);
+		nd->inode = nd->path.dentry->d_inode;
 		dget(dentry);
 	}
 	mntget(path->mnt);
+
 	nd->last_type = LAST_BIND;
 	*p = dentry->d_inode->i_op->follow_link(dentry, nd);
 	error = PTR_ERR(*p);
@@ -591,6 +799,20 @@ loop:
 	return err;
 }
 
+static int follow_up_rcu(struct path *path)
+{
+	struct vfsmount *parent;
+	struct dentry *mountpoint;
+
+	parent = path->mnt->mnt_parent;
+	if (parent == path->mnt)
+		return 0;
+	mountpoint = path->mnt->mnt_mountpoint;
+	path->dentry = mountpoint;
+	path->mnt = parent;
+	return 1;
+}
+
 int follow_up(struct path *path)
 {
 	struct vfsmount *parent;
@@ -615,6 +837,21 @@ int follow_up(struct path *path)
 /*
  * serialization is taken care of in namespace.c
  */
+static void __follow_mount_rcu(struct nameidata *nd, struct path *path,
+				struct inode **inode)
+{
+	while (d_mountpoint(path->dentry)) {
+		struct vfsmount *mounted;
+		mounted = __lookup_mnt(path->mnt, path->dentry, 1);
+		if (!mounted)
+			return;
+		path->mnt = mounted;
+		path->dentry = mounted->mnt_root;
+		nd->seq = read_seqcount_begin(&path->dentry->d_seq);
+		*inode = path->dentry->d_inode;
+	}
+}
+
 static int __follow_mount(struct path *path)
 {
 	int res = 0;
@@ -660,7 +897,42 @@ int follow_down(struct path *path)
 	return 0;
 }
 
-static __always_inline void follow_dotdot(struct nameidata *nd)
+static int follow_dotdot_rcu(struct nameidata *nd)
+{
+	struct inode *inode = nd->inode;
+
+	set_root_rcu(nd);
+
+	while(1) {
+		if (nd->path.dentry == nd->root.dentry &&
+		    nd->path.mnt == nd->root.mnt) {
+			break;
+		}
+		if (nd->path.dentry != nd->path.mnt->mnt_root) {
+			struct dentry *old = nd->path.dentry;
+			struct dentry *parent = old->d_parent;
+			unsigned seq;
+
+			seq = read_seqcount_begin(&parent->d_seq);
+			if (read_seqcount_retry(&old->d_seq, nd->seq))
+				return -ECHILD;
+			inode = parent->d_inode;
+			nd->path.dentry = parent;
+			nd->seq = seq;
+			break;
+		}
+		if (!follow_up_rcu(&nd->path))
+			break;
+		nd->seq = read_seqcount_begin(&nd->path.dentry->d_seq);
+		inode = nd->path.dentry->d_inode;
+	}
+	__follow_mount_rcu(nd, &nd->path, &inode);
+	nd->inode = inode;
+
+	return 0;
+}
+
+static void follow_dotdot(struct nameidata *nd)
 {
 	set_root(nd);
 
@@ -681,6 +953,7 @@ static __always_inline void follow_dotdot(struct nameidata *nd)
 			break;
 	}
 	follow_mount(&nd->path);
+	nd->inode = nd->path.dentry->d_inode;
 }
 
 /*
@@ -718,18 +991,17 @@ static struct dentry *d_alloc_and_lookup(struct dentry *parent,
  *  It _is_ time-critical.
  */
 static int do_lookup(struct nameidata *nd, struct qstr *name,
-		     struct path *path)
+			struct path *path, struct inode **inode)
 {
 	struct vfsmount *mnt = nd->path.mnt;
-	struct dentry *dentry, *parent;
+	struct dentry *dentry, *parent = nd->path.dentry;
 	struct inode *dir;
 	/*
 	 * See if the low-level filesystem might want
 	 * to use its own hash..
 	 */
-	if (nd->path.dentry->d_op && nd->path.dentry->d_op->d_hash) {
-		int err = nd->path.dentry->d_op->d_hash(nd->path.dentry,
-				nd->path.dentry->d_inode, name);
+	if (parent->d_op && parent->d_op->d_hash) {
+		int err = parent->d_op->d_hash(parent, nd->inode, name);
 		if (err < 0)
 			return err;
 	}
@@ -739,21 +1011,48 @@ static int do_lookup(struct nameidata *nd, struct qstr *name,
 	 * of a false negative due to a concurrent rename, we're going to
 	 * do the non-racy lookup, below.
 	 */
-	dentry = __d_lookup(nd->path.dentry, name);
-	if (!dentry)
-		goto need_lookup;
+	if (nd->flags & LOOKUP_RCU) {
+		unsigned seq;
+
+		*inode = nd->inode;
+		dentry = __d_lookup_rcu(parent, name, &seq, inode);
+		if (!dentry) {
+			if (nameidata_drop_rcu(nd))
+				return -ECHILD;
+			goto need_lookup;
+		}
+		/* Memory barrier in read_seqcount_begin of child is enough */
+		if (__read_seqcount_retry(&parent->d_seq, nd->seq))
+			return -ECHILD;
+
+		nd->seq = seq;
+		if (dentry->d_op && dentry->d_op->d_revalidate) {
+			/* We commonly drop rcu-walk here */
+			if (nameidata_dentry_drop_rcu(nd, dentry))
+				return -ECHILD;
+			goto need_revalidate;
+		}
+		path->mnt = mnt;
+		path->dentry = dentry;
+		__follow_mount_rcu(nd, path, inode);
+	} else {
+		dentry = __d_lookup(parent, name);
+		if (!dentry)
+			goto need_lookup;
 found:
-	if (dentry->d_op && dentry->d_op->d_revalidate)
-		goto need_revalidate;
+		if (dentry->d_op && dentry->d_op->d_revalidate)
+			goto need_revalidate;
 done:
-	path->mnt = mnt;
-	path->dentry = dentry;
-	__follow_mount(path);
+		path->mnt = mnt;
+		path->dentry = dentry;
+		__follow_mount(path);
+		*inode = path->dentry->d_inode;
+	}
 	return 0;
 
 need_lookup:
-	parent = nd->path.dentry;
 	dir = parent->d_inode;
+	BUG_ON(nd->inode != dir);
 
 	mutex_lock(&dir->i_mutex);
 	/*
@@ -815,7 +1114,6 @@ static inline int follow_on_final(struct inode *inode, unsigned lookup_flags)
 static int link_path_walk(const char *name, struct nameidata *nd)
 {
 	struct path next;
-	struct inode *inode;
 	int err;
 	unsigned int lookup_flags = nd->flags;
 	
@@ -824,18 +1122,28 @@ static int link_path_walk(const char *name, struct nameidata *nd)
 	if (!*name)
 		goto return_reval;
 
-	inode = nd->path.dentry->d_inode;
 	if (nd->depth)
 		lookup_flags = LOOKUP_FOLLOW | (nd->flags & LOOKUP_CONTINUE);
 
 	/* At this point we know we have a real path component. */
 	for(;;) {
+		struct inode *inode;
 		unsigned long hash;
 		struct qstr this;
 		unsigned int c;
 
 		nd->flags |= LOOKUP_CONTINUE;
-		err = exec_permission(inode);
+		if (nd->flags & LOOKUP_RCU) {
+			err = exec_permission_rcu(nd->inode);
+			if (err == -ECHILD) {
+				if (nameidata_drop_rcu(nd))
+					return -ECHILD;
+				goto exec_again;
+			}
+		} else {
+exec_again:
+			err = exec_permission(nd->inode);
+		}
  		if (err)
 			break;
 
@@ -866,37 +1174,44 @@ static int link_path_walk(const char *name, struct nameidata *nd)
 		if (this.name[0] == '.') switch (this.len) {
 			default:
 				break;
-			case 2:	
+			case 2:
 				if (this.name[1] != '.')
 					break;
-				follow_dotdot(nd);
-				inode = nd->path.dentry->d_inode;
+				if (nd->flags & LOOKUP_RCU) {
+					if (follow_dotdot_rcu(nd))
+						return -ECHILD;
+				} else
+					follow_dotdot(nd);
 				/* fallthrough */
 			case 1:
 				continue;
 		}
 		/* This does the actual lookups.. */
-		err = do_lookup(nd, &this, &next);
+		err = do_lookup(nd, &this, &next, &inode);
 		if (err)
 			break;
-
 		err = -ENOENT;
-		inode = next.dentry->d_inode;
 		if (!inode)
 			goto out_dput;
 
 		if (inode->i_op->follow_link) {
+			/* We commonly drop rcu-walk here */
+			if (nameidata_dentry_drop_rcu_maybe(nd, next.dentry))
+				return -ECHILD;
+			BUG_ON(inode != next.dentry->d_inode);
 			err = do_follow_link(&next, nd);
 			if (err)
 				goto return_err;
+			nd->inode = nd->path.dentry->d_inode;
 			err = -ENOENT;
-			inode = nd->path.dentry->d_inode;
-			if (!inode)
+			if (!nd->inode)
 				break;
-		} else
+		} else {
 			path_to_nameidata(&next, nd);
+			nd->inode = inode;
+		}
 		err = -ENOTDIR; 
-		if (!inode->i_op->lookup)
+		if (!nd->inode->i_op->lookup)
 			break;
 		continue;
 		/* here ends the main loop */
@@ -911,32 +1226,39 @@ last_component:
 		if (this.name[0] == '.') switch (this.len) {
 			default:
 				break;
-			case 2:	
+			case 2:
 				if (this.name[1] != '.')
 					break;
-				follow_dotdot(nd);
-				inode = nd->path.dentry->d_inode;
+				if (nd->flags & LOOKUP_RCU) {
+					if (follow_dotdot_rcu(nd))
+						return -ECHILD;
+				} else
+					follow_dotdot(nd);
 				/* fallthrough */
 			case 1:
 				goto return_reval;
 		}
-		err = do_lookup(nd, &this, &next);
+		err = do_lookup(nd, &this, &next, &inode);
 		if (err)
 			break;
-		inode = next.dentry->d_inode;
 		if (follow_on_final(inode, lookup_flags)) {
+			if (nameidata_dentry_drop_rcu_maybe(nd, next.dentry))
+				return -ECHILD;
+			BUG_ON(inode != next.dentry->d_inode);
 			err = do_follow_link(&next, nd);
 			if (err)
 				goto return_err;
-			inode = nd->path.dentry->d_inode;
-		} else
+			nd->inode = nd->path.dentry->d_inode;
+		} else {
 			path_to_nameidata(&next, nd);
+			nd->inode = inode;
+		}
 		err = -ENOENT;
-		if (!inode)
+		if (!nd->inode)
 			break;
 		if (lookup_flags & LOOKUP_DIRECTORY) {
 			err = -ENOTDIR; 
-			if (!inode->i_op->lookup)
+			if (!nd->inode->i_op->lookup)
 				break;
 		}
 		goto return_base;
@@ -958,6 +1280,8 @@ return_reval:
 		 */
 		if (nd->path.dentry && nd->path.dentry->d_sb &&
 		    (nd->path.dentry->d_sb->s_type->fs_flags & FS_REVAL_DOT)) {
+			if (nameidata_drop_rcu_maybe(nd))
+				return -ECHILD;
 			err = -ESTALE;
 			/* Note: we do not d_invalidate() */
 			if (!nd->path.dentry->d_op->d_revalidate(
@@ -965,16 +1289,34 @@ return_reval:
 				break;
 		}
 return_base:
+		if (nameidata_drop_rcu_last_maybe(nd))
+			return -ECHILD;
 		return 0;
 out_dput:
-		path_put_conditional(&next, nd);
+		if (!(nd->flags & LOOKUP_RCU))
+			path_put_conditional(&next, nd);
 		break;
 	}
-	path_put(&nd->path);
+	if (!(nd->flags & LOOKUP_RCU))
+		path_put(&nd->path);
 return_err:
 	return err;
 }
 
+static inline int path_walk_rcu(const char *name, struct nameidata *nd)
+{
+	current->total_link_count = 0;
+
+	return link_path_walk(name, nd);
+}
+
+static inline int path_walk_simple(const char *name, struct nameidata *nd)
+{
+	current->total_link_count = 0;
+
+	return link_path_walk(name, nd);
+}
+
 static int path_walk(const char *name, struct nameidata *nd)
 {
 	struct path save = nd->path;
@@ -1000,6 +1342,88 @@ static int path_walk(const char *name, struct nameidata *nd)
 	return result;
 }
 
+static void path_finish_rcu(struct nameidata *nd)
+{
+	if (nd->flags & LOOKUP_RCU) {
+		/* RCU dangling. Cancel it. */
+		nd->flags &= ~LOOKUP_RCU;
+		nd->root.mnt = NULL;
+		rcu_read_unlock();
+		br_read_unlock(vfsmount_lock);
+	}
+	if (nd->file)
+		fput(nd->file);
+}
+
+static int path_init_rcu(int dfd, const char *name, unsigned int flags, struct nameidata *nd)
+{
+	int retval = 0;
+	int fput_needed;
+	struct file *file;
+
+	nd->last_type = LAST_ROOT; /* if there are only slashes... */
+	nd->flags = flags | LOOKUP_RCU;
+	nd->depth = 0;
+	nd->root.mnt = NULL;
+	nd->file = NULL;
+
+	if (*name=='/') {
+		struct fs_struct *fs = current->fs;
+
+		br_read_lock(vfsmount_lock);
+		rcu_read_lock();
+
+		spin_lock(&fs->lock);
+		nd->root = fs->root;
+		nd->path = nd->root;
+		nd->seq = read_seqcount_begin(&nd->path.dentry->d_seq);
+		spin_unlock(&fs->lock);
+
+	} else if (dfd == AT_FDCWD) {
+		struct fs_struct *fs = current->fs;
+
+		br_read_lock(vfsmount_lock);
+		rcu_read_lock();
+
+		spin_lock(&fs->lock);
+		nd->path = fs->pwd;
+		nd->seq = read_seqcount_begin(&nd->path.dentry->d_seq);
+		spin_unlock(&fs->lock);
+	} else {
+		struct dentry *dentry;
+
+		file = fget_light(dfd, &fput_needed);
+		retval = -EBADF;
+		if (!file)
+			goto out_fail;
+
+		dentry = file->f_path.dentry;
+
+		retval = -ENOTDIR;
+		if (!S_ISDIR(dentry->d_inode->i_mode))
+			goto fput_fail;
+
+		retval = file_permission(file, MAY_EXEC);
+		if (retval)
+			goto fput_fail;
+
+		nd->path = file->f_path;
+		if (fput_needed)
+			nd->file = file;
+
+		nd->seq = read_seqcount_begin(&nd->path.dentry->d_seq);
+		br_read_lock(vfsmount_lock);
+		rcu_read_lock();
+	}
+	nd->inode = nd->path.dentry->d_inode;
+	return 0;
+
+fput_fail:
+	fput_light(file, fput_needed);
+out_fail:
+	return retval;
+}
+
 static int path_init(int dfd, const char *name, unsigned int flags, struct nameidata *nd)
 {
 	int retval = 0;
@@ -1040,6 +1464,7 @@ static int path_init(int dfd, const char *name, unsigned int flags, struct namei
 
 		fput_light(file, fput_needed);
 	}
+	nd->inode = nd->path.dentry->d_inode;
 	return 0;
 
 fput_fail:
@@ -1052,16 +1477,53 @@ out_fail:
 static int do_path_lookup(int dfd, const char *name,
 				unsigned int flags, struct nameidata *nd)
 {
-	int retval = path_init(dfd, name, flags, nd);
-	if (!retval)
-		retval = path_walk(name, nd);
-	if (unlikely(!retval && !audit_dummy_context() && nd->path.dentry &&
-				nd->path.dentry->d_inode))
-		audit_inode(name, nd->path.dentry);
+	int retval;
+
+	/*
+	 * Path walking is largely split up into 2 different synchronisation
+	 * schemes, rcu-walk and ref-walk (explained in
+	 * Documentation/filesystems/path-lookup.txt). These share much of the
+	 * path walk code, but some things particularly setup, cleanup, and
+	 * following mounts are sufficiently divergent that functions are
+	 * duplicated. Typically there is a function foo(), and its RCU
+	 * analogue, foo_rcu().
+	 *
+	 * -ECHILD is the error number of choice (just to avoid clashes) that
+	 * is returned if some aspect of an rcu-walk fails. Such an error must
+	 * be handled by restarting a traditional ref-walk (which will always
+	 * be able to complete).
+	 */
+	retval = path_init_rcu(dfd, name, flags, nd);
+	if (unlikely(retval))
+		return retval;
+	retval = path_walk_rcu(name, nd);
+	path_finish_rcu(nd);
 	if (nd->root.mnt) {
 		path_put(&nd->root);
 		nd->root.mnt = NULL;
 	}
+
+	if (unlikely(retval == -ECHILD || retval == -ESTALE)) {
+		/* slower, locked walk */
+		if (retval == -ESTALE)
+			flags |= LOOKUP_REVAL;
+		retval = path_init(dfd, name, flags, nd);
+		if (unlikely(retval))
+			return retval;
+		retval = path_walk(name, nd);
+		if (nd->root.mnt) {
+			path_put(&nd->root);
+			nd->root.mnt = NULL;
+		}
+	}
+
+	if (likely(!retval)) {
+		if (unlikely(!audit_dummy_context())) {
+			if (nd->path.dentry && nd->inode)
+				audit_inode(name, nd->path.dentry);
+		}
+	}
+
 	return retval;
 }
 
@@ -1104,10 +1566,11 @@ int vfs_path_lookup(struct dentry *dentry, struct vfsmount *mnt,
 	path_get(&nd->path);
 	nd->root = nd->path;
 	path_get(&nd->root);
+	nd->inode = nd->path.dentry->d_inode;
 
 	retval = path_walk(name, nd);
 	if (unlikely(!retval && !audit_dummy_context() && nd->path.dentry &&
-				nd->path.dentry->d_inode))
+				nd->inode))
 		audit_inode(name, nd->path.dentry);
 
 	path_put(&nd->root);
@@ -1488,6 +1951,7 @@ out_unlock:
 	mutex_unlock(&dir->d_inode->i_mutex);
 	dput(nd->path.dentry);
 	nd->path.dentry = path->dentry;
+
 	if (error)
 		return error;
 	/* Don't check for write permission, don't truncate */
@@ -1582,6 +2046,9 @@ exit:
 	return ERR_PTR(error);
 }
 
+/*
+ * Handle O_CREAT case for do_filp_open
+ */
 static struct file *do_last(struct nameidata *nd, struct path *path,
 			    int open_flag, int acc_mode,
 			    int mode, const char *pathname)
@@ -1603,42 +2070,16 @@ static struct file *do_last(struct nameidata *nd, struct path *path,
 		}
 		/* fallthrough */
 	case LAST_ROOT:
-		if (open_flag & O_CREAT)
-			goto exit;
-		/* fallthrough */
+		goto exit;
 	case LAST_BIND:
 		audit_inode(pathname, dir);
 		goto ok;
 	}
 
 	/* trailing slashes? */
-	if (nd->last.name[nd->last.len]) {
-		if (open_flag & O_CREAT)
-			goto exit;
-		nd->flags |= LOOKUP_DIRECTORY | LOOKUP_FOLLOW;
-	}
-
-	/* just plain open? */
-	if (!(open_flag & O_CREAT)) {
-		error = do_lookup(nd, &nd->last, path);
-		if (error)
-			goto exit;
-		error = -ENOENT;
-		if (!path->dentry->d_inode)
-			goto exit_dput;
-		if (path->dentry->d_inode->i_op->follow_link)
-			return NULL;
-		error = -ENOTDIR;
-		if (nd->flags & LOOKUP_DIRECTORY) {
-			if (!path->dentry->d_inode->i_op->lookup)
-				goto exit_dput;
-		}
-		path_to_nameidata(path, nd);
-		audit_inode(pathname, nd->path.dentry);
-		goto ok;
-	}
+	if (nd->last.name[nd->last.len])
+		goto exit;
 
-	/* OK, it's O_CREAT */
 	mutex_lock(&dir->d_inode->i_mutex);
 
 	path->dentry = lookup_hash(nd);
@@ -1709,8 +2150,9 @@ static struct file *do_last(struct nameidata *nd, struct path *path,
 		return NULL;
 
 	path_to_nameidata(path, nd);
+	nd->inode = path->dentry->d_inode;
 	error = -EISDIR;
-	if (S_ISDIR(path->dentry->d_inode->i_mode))
+	if (S_ISDIR(nd->inode->i_mode))
 		goto exit;
 ok:
 	filp = finish_open(nd, open_flag, acc_mode);
@@ -1741,7 +2183,7 @@ struct file *do_filp_open(int dfd, const char *pathname,
 	struct path path;
 	int count = 0;
 	int flag = open_to_namei_flags(open_flag);
-	int force_reval = 0;
+	int flags;
 
 	if (!(open_flag & O_CREAT))
 		mode = 0;
@@ -1770,54 +2212,84 @@ struct file *do_filp_open(int dfd, const char *pathname,
 	if (open_flag & O_APPEND)
 		acc_mode |= MAY_APPEND;
 
-	/* find the parent */
-reval:
-	error = path_init(dfd, pathname, LOOKUP_PARENT, &nd);
+	flags = LOOKUP_OPEN;
+	if (open_flag & O_CREAT) {
+		flags |= LOOKUP_CREATE;
+		if (open_flag & O_EXCL)
+			flags |= LOOKUP_EXCL;
+	}
+	if (open_flag & O_DIRECTORY)
+		flags |= LOOKUP_DIRECTORY;
+	if (!(open_flag & O_NOFOLLOW))
+		flags |= LOOKUP_FOLLOW;
+
+	filp = get_empty_filp();
+	if (!filp)
+		return ERR_PTR(-ENFILE);
+
+	filp->f_flags = open_flag;
+	nd.intent.open.file = filp;
+	nd.intent.open.flags = flag;
+	nd.intent.open.create_mode = mode;
+
+	if (open_flag & O_CREAT)
+		goto creat;
+
+	/* !O_CREAT, simple open */
+	error = do_path_lookup(dfd, pathname, flags, &nd);
+	if (unlikely(error))
+		goto out_filp;
+	error = -ELOOP;
+	if (!(nd.flags & LOOKUP_FOLLOW)) {
+		if (nd.inode->i_op->follow_link)
+			goto out_path;
+	}
+	error = -ENOTDIR;
+	if (nd.flags & LOOKUP_DIRECTORY) {
+		if (!nd.inode->i_op->lookup)
+			goto out_path;
+	}
+	audit_inode(pathname, nd.path.dentry);
+	filp = finish_open(&nd, open_flag, acc_mode);
+	return filp;
+
+creat:
+	/* OK, have to create the file. Find the parent. */
+	error = path_init_rcu(dfd, pathname,
+			LOOKUP_PARENT | (flags & LOOKUP_REVAL), &nd);
 	if (error)
-		return ERR_PTR(error);
-	if (force_reval)
-		nd.flags |= LOOKUP_REVAL;
+		goto out_filp;
+	error = path_walk_rcu(pathname, &nd);
+	path_finish_rcu(&nd);
+	if (unlikely(error == -ECHILD || error == -ESTALE)) {
+		/* slower, locked walk */
+		if (error == -ESTALE) {
+reval:
+			flags |= LOOKUP_REVAL;
+		}
+		error = path_init(dfd, pathname,
+				LOOKUP_PARENT | (flags & LOOKUP_REVAL), &nd);
+		if (error)
+			goto out_filp;
 
-	current->total_link_count = 0;
-	error = link_path_walk(pathname, &nd);
-	if (error) {
-		filp = ERR_PTR(error);
-		goto out;
+		error = path_walk_simple(pathname, &nd);
 	}
-	if (unlikely(!audit_dummy_context()) && (open_flag & O_CREAT))
+	if (unlikely(error))
+		goto out_filp;
+	if (unlikely(!audit_dummy_context()))
 		audit_inode(pathname, nd.path.dentry);
 
 	/*
 	 * We have the parent and last component.
 	 */
-
-	error = -ENFILE;
-	filp = get_empty_filp();
-	if (filp == NULL)
-		goto exit_parent;
-	nd.intent.open.file = filp;
-	filp->f_flags = open_flag;
-	nd.intent.open.flags = flag;
-	nd.intent.open.create_mode = mode;
-	nd.flags &= ~LOOKUP_PARENT;
-	nd.flags |= LOOKUP_OPEN;
-	if (open_flag & O_CREAT) {
-		nd.flags |= LOOKUP_CREATE;
-		if (open_flag & O_EXCL)
-			nd.flags |= LOOKUP_EXCL;
-	}
-	if (open_flag & O_DIRECTORY)
-		nd.flags |= LOOKUP_DIRECTORY;
-	if (!(open_flag & O_NOFOLLOW))
-		nd.flags |= LOOKUP_FOLLOW;
+	nd.flags = flags;
 	filp = do_last(&nd, &path, open_flag, acc_mode, mode, pathname);
 	while (unlikely(!filp)) { /* trailing symlink */
 		struct path holder;
-		struct inode *inode = path.dentry->d_inode;
 		void *cookie;
 		error = -ELOOP;
 		/* S_ISDIR part is a temporary automount kludge */
-		if (!(nd.flags & LOOKUP_FOLLOW) && !S_ISDIR(inode->i_mode))
+		if (!(nd.flags & LOOKUP_FOLLOW) && !S_ISDIR(nd.inode->i_mode))
 			goto exit_dput;
 		if (count++ == 32)
 			goto exit_dput;
@@ -1838,36 +2310,33 @@ reval:
 			goto exit_dput;
 		error = __do_follow_link(&path, &nd, &cookie);
 		if (unlikely(error)) {
+			if (!IS_ERR(cookie) && nd.inode->i_op->put_link)
+				nd.inode->i_op->put_link(path.dentry, &nd, cookie);
 			/* nd.path had been dropped */
-			if (!IS_ERR(cookie) && inode->i_op->put_link)
-				inode->i_op->put_link(path.dentry, &nd, cookie);
-			path_put(&path);
-			release_open_intent(&nd);
-			filp = ERR_PTR(error);
-			goto out;
+			nd.path = path;
+			goto out_path;
 		}
 		holder = path;
 		nd.flags &= ~LOOKUP_PARENT;
 		filp = do_last(&nd, &path, open_flag, acc_mode, mode, pathname);
-		if (inode->i_op->put_link)
-			inode->i_op->put_link(holder.dentry, &nd, cookie);
+		if (nd.inode->i_op->put_link)
+			nd.inode->i_op->put_link(holder.dentry, &nd, cookie);
 		path_put(&holder);
 	}
 out:
 	if (nd.root.mnt)
 		path_put(&nd.root);
-	if (filp == ERR_PTR(-ESTALE) && !force_reval) {
-		force_reval = 1;
+	if (filp == ERR_PTR(-ESTALE) && !(flags & LOOKUP_REVAL))
 		goto reval;
-	}
 	return filp;
 
 exit_dput:
 	path_put_conditional(&path, &nd);
+out_path:
+	path_put(&nd.path);
+out_filp:
 	if (!IS_ERR(nd.intent.open.file))
 		release_open_intent(&nd);
-exit_parent:
-	path_put(&nd.path);
 	filp = ERR_PTR(error);
 	goto out;
 }
diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c
index ae4b0fd9033f..998e3a715bcc 100644
--- a/fs/proc/proc_sysctl.c
+++ b/fs/proc/proc_sysctl.c
@@ -402,6 +402,10 @@ static int proc_sys_compare(const struct dentry *parent,
 		const struct dentry *dentry, const struct inode *inode,
 		unsigned int len, const char *str, const struct qstr *name)
 {
+	/* Although proc doesn't have negative dentries, rcu-walk means
+	 * that inode here can be NULL */
+	if (!inode)
+		return 0;
 	if (name->len != len)
 		return 1;
 	if (memcmp(name->name, str, len))
diff --git a/include/linux/dcache.h b/include/linux/dcache.h
index ca648685f0cc..c2e7390289cc 100644
--- a/include/linux/dcache.h
+++ b/include/linux/dcache.h
@@ -5,6 +5,7 @@
 #include <linux/list.h>
 #include <linux/rculist.h>
 #include <linux/spinlock.h>
+#include <linux/seqlock.h>
 #include <linux/cache.h>
 #include <linux/rcupdate.h>
 
@@ -90,6 +91,7 @@ struct dentry {
 	unsigned int d_count;		/* protected by d_lock */
 	unsigned int d_flags;		/* protected by d_lock */
 	spinlock_t d_lock;		/* per dentry lock */
+	seqcount_t d_seq;		/* per dentry seqlock */
 	int d_mounted;
 	struct inode *d_inode;		/* Where the name belongs to - NULL is
 					 * negative */
@@ -266,9 +268,33 @@ extern void d_move(struct dentry *, struct dentry *);
 extern struct dentry *d_ancestor(struct dentry *, struct dentry *);
 
 /* appendix may either be NULL or be used for transname suffixes */
-extern struct dentry * d_lookup(struct dentry *, struct qstr *);
-extern struct dentry * __d_lookup(struct dentry *, struct qstr *);
-extern struct dentry * d_hash_and_lookup(struct dentry *, struct qstr *);
+extern struct dentry *d_lookup(struct dentry *, struct qstr *);
+extern struct dentry *d_hash_and_lookup(struct dentry *, struct qstr *);
+extern struct dentry *__d_lookup(struct dentry *, struct qstr *);
+extern struct dentry *__d_lookup_rcu(struct dentry *parent, struct qstr *name,
+				unsigned *seq, struct inode **inode);
+
+/**
+ * __d_rcu_to_refcount - take a refcount on dentry if sequence check is ok
+ * @dentry: dentry to take a ref on
+ * @seq: seqcount to verify against
+ * @Returns: 0 on failure, else 1.
+ *
+ * __d_rcu_to_refcount operates on a dentry,seq pair that was returned
+ * by __d_lookup_rcu, to get a reference on an rcu-walk dentry.
+ */
+static inline int __d_rcu_to_refcount(struct dentry *dentry, unsigned seq)
+{
+	int ret = 0;
+
+	assert_spin_locked(&dentry->d_lock);
+	if (!read_seqcount_retry(&dentry->d_seq, seq)) {
+		ret = 1;
+		dentry->d_count++;
+	}
+
+	return ret;
+}
 
 /* validate "insecure" dentry pointer */
 extern int d_validate(struct dentry *, struct dentry *);
diff --git a/include/linux/namei.h b/include/linux/namei.h
index aec730b53935..18d06add0a40 100644
--- a/include/linux/namei.h
+++ b/include/linux/namei.h
@@ -19,7 +19,10 @@ struct nameidata {
 	struct path	path;
 	struct qstr	last;
 	struct path	root;
+	struct file	*file;
+	struct inode	*inode; /* path.dentry.d_inode */
 	unsigned int	flags;
+	unsigned	seq;
 	int		last_type;
 	unsigned	depth;
 	char *saved_names[MAX_NESTED_LINKS + 1];
@@ -43,11 +46,13 @@ enum {LAST_NORM, LAST_ROOT, LAST_DOT, LAST_DOTDOT, LAST_BIND};
  *  - internal "there are more path components" flag
  *  - dentry cache is untrusted; force a real lookup
  */
-#define LOOKUP_FOLLOW		 1
-#define LOOKUP_DIRECTORY	 2
-#define LOOKUP_CONTINUE		 4
-#define LOOKUP_PARENT		16
-#define LOOKUP_REVAL		64
+#define LOOKUP_FOLLOW		0x0001
+#define LOOKUP_DIRECTORY	0x0002
+#define LOOKUP_CONTINUE		0x0004
+
+#define LOOKUP_PARENT		0x0010
+#define LOOKUP_REVAL		0x0020
+#define LOOKUP_RCU		0x0040
 /*
  * Intent data
  */
diff --git a/include/linux/security.h b/include/linux/security.h
index fd4d55fb8845..ed95401970c7 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -457,7 +457,6 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts)
  *	called when the actual read/write operations are performed.
  *	@inode contains the inode structure to check.
  *	@mask contains the permission mask.
- *	@nd contains the nameidata (may be NULL).
  *	Return 0 if permission is granted.
  * @inode_setattr:
  *	Check permission before setting file attributes.  Note that the kernel
@@ -1713,6 +1712,7 @@ int security_inode_rename(struct inode *old_dir, struct dentry *old_dentry,
 int security_inode_readlink(struct dentry *dentry);
 int security_inode_follow_link(struct dentry *dentry, struct nameidata *nd);
 int security_inode_permission(struct inode *inode, int mask);
+int security_inode_exec_permission(struct inode *inode, unsigned int flags);
 int security_inode_setattr(struct dentry *dentry, struct iattr *attr);
 int security_inode_getattr(struct vfsmount *mnt, struct dentry *dentry);
 int security_inode_setxattr(struct dentry *dentry, const char *name,
@@ -2102,6 +2102,12 @@ static inline int security_inode_permission(struct inode *inode, int mask)
 	return 0;
 }
 
+static inline int security_inode_exec_permission(struct inode *inode,
+						  unsigned int flags)
+{
+	return 0;
+}
+
 static inline int security_inode_setattr(struct dentry *dentry,
 					  struct iattr *attr)
 {
diff --git a/security/security.c b/security/security.c
index 1b798d3df710..c645e263ca8d 100644
--- a/security/security.c
+++ b/security/security.c
@@ -513,6 +513,15 @@ int security_inode_permission(struct inode *inode, int mask)
 	return security_ops->inode_permission(inode, mask);
 }
 
+int security_inode_exec_permission(struct inode *inode, unsigned int flags)
+{
+	if (unlikely(IS_PRIVATE(inode)))
+		return 0;
+	if (flags)
+		return -ECHILD;
+	return security_ops->inode_permission(inode, MAY_EXEC);
+}
+
 int security_inode_setattr(struct dentry *dentry, struct iattr *attr)
 {
 	if (unlikely(IS_PRIVATE(dentry->d_inode)))
-- 
cgit v1.2.3-71-gd317


From c28cc36469554dc55540f059fbdc7fa22a2c31fc Mon Sep 17 00:00:00 2001
From: Nick Piggin <npiggin@kernel.dk>
Date: Fri, 7 Jan 2011 17:49:53 +1100
Subject: fs: fs_struct use seqlock

Use a seqlock in the fs_struct to enable us to take an atomic copy of the
complete cwd and root paths. Use this in the RCU lookup path to avoid a
thread-shared spinlock in RCU lookup operations.

Multi-threaded apps may now perform path lookups with scalability matching
multi-process apps. Operations such as stat(2) become very scalable for
multi-threaded workload.

Signed-off-by: Nick Piggin <npiggin@kernel.dk>
---
 fs/fs_struct.c            | 10 ++++++++++
 fs/namei.c                | 34 +++++++++++++++++++++-------------
 include/linux/fs_struct.h |  3 +++
 3 files changed, 34 insertions(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/fs/fs_struct.c b/fs/fs_struct.c
index ed45a9cf5f3d..60b8531f41c5 100644
--- a/fs/fs_struct.c
+++ b/fs/fs_struct.c
@@ -14,9 +14,11 @@ void set_fs_root(struct fs_struct *fs, struct path *path)
 	struct path old_root;
 
 	spin_lock(&fs->lock);
+	write_seqcount_begin(&fs->seq);
 	old_root = fs->root;
 	fs->root = *path;
 	path_get(path);
+	write_seqcount_end(&fs->seq);
 	spin_unlock(&fs->lock);
 	if (old_root.dentry)
 		path_put(&old_root);
@@ -31,9 +33,11 @@ void set_fs_pwd(struct fs_struct *fs, struct path *path)
 	struct path old_pwd;
 
 	spin_lock(&fs->lock);
+	write_seqcount_begin(&fs->seq);
 	old_pwd = fs->pwd;
 	fs->pwd = *path;
 	path_get(path);
+	write_seqcount_end(&fs->seq);
 	spin_unlock(&fs->lock);
 
 	if (old_pwd.dentry)
@@ -52,6 +56,7 @@ void chroot_fs_refs(struct path *old_root, struct path *new_root)
 		fs = p->fs;
 		if (fs) {
 			spin_lock(&fs->lock);
+			write_seqcount_begin(&fs->seq);
 			if (fs->root.dentry == old_root->dentry
 			    && fs->root.mnt == old_root->mnt) {
 				path_get(new_root);
@@ -64,6 +69,7 @@ void chroot_fs_refs(struct path *old_root, struct path *new_root)
 				fs->pwd = *new_root;
 				count++;
 			}
+			write_seqcount_end(&fs->seq);
 			spin_unlock(&fs->lock);
 		}
 		task_unlock(p);
@@ -88,8 +94,10 @@ void exit_fs(struct task_struct *tsk)
 		int kill;
 		task_lock(tsk);
 		spin_lock(&fs->lock);
+		write_seqcount_begin(&fs->seq);
 		tsk->fs = NULL;
 		kill = !--fs->users;
+		write_seqcount_end(&fs->seq);
 		spin_unlock(&fs->lock);
 		task_unlock(tsk);
 		if (kill)
@@ -105,6 +113,7 @@ struct fs_struct *copy_fs_struct(struct fs_struct *old)
 		fs->users = 1;
 		fs->in_exec = 0;
 		spin_lock_init(&fs->lock);
+		seqcount_init(&fs->seq);
 		fs->umask = old->umask;
 		get_fs_root_and_pwd(old, &fs->root, &fs->pwd);
 	}
@@ -144,6 +153,7 @@ EXPORT_SYMBOL(current_umask);
 struct fs_struct init_fs = {
 	.users		= 1,
 	.lock		= __SPIN_LOCK_UNLOCKED(init_fs.lock),
+	.seq		= SEQCNT_ZERO,
 	.umask		= 0022,
 };
 
diff --git a/fs/namei.c b/fs/namei.c
index 8d3f15b3a541..c731b50a6184 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -684,9 +684,12 @@ static __always_inline void set_root_rcu(struct nameidata *nd)
 {
 	if (!nd->root.mnt) {
 		struct fs_struct *fs = current->fs;
-		spin_lock(&fs->lock);
-		nd->root = fs->root;
-		spin_unlock(&fs->lock);
+		unsigned seq;
+
+		do {
+			seq = read_seqcount_begin(&fs->seq);
+			nd->root = fs->root;
+		} while (read_seqcount_retry(&fs->seq, seq));
 	}
 }
 
@@ -1369,26 +1372,31 @@ static int path_init_rcu(int dfd, const char *name, unsigned int flags, struct n
 
 	if (*name=='/') {
 		struct fs_struct *fs = current->fs;
+		unsigned seq;
 
 		br_read_lock(vfsmount_lock);
 		rcu_read_lock();
 
-		spin_lock(&fs->lock);
-		nd->root = fs->root;
-		nd->path = nd->root;
-		nd->seq = read_seqcount_begin(&nd->path.dentry->d_seq);
-		spin_unlock(&fs->lock);
+		do {
+			seq = read_seqcount_begin(&fs->seq);
+			nd->root = fs->root;
+			nd->path = nd->root;
+			nd->seq = __read_seqcount_begin(&nd->path.dentry->d_seq);
+		} while (read_seqcount_retry(&fs->seq, seq));
 
 	} else if (dfd == AT_FDCWD) {
 		struct fs_struct *fs = current->fs;
+		unsigned seq;
 
 		br_read_lock(vfsmount_lock);
 		rcu_read_lock();
 
-		spin_lock(&fs->lock);
-		nd->path = fs->pwd;
-		nd->seq = read_seqcount_begin(&nd->path.dentry->d_seq);
-		spin_unlock(&fs->lock);
+		do {
+			seq = read_seqcount_begin(&fs->seq);
+			nd->path = fs->pwd;
+			nd->seq = __read_seqcount_begin(&nd->path.dentry->d_seq);
+		} while (read_seqcount_retry(&fs->seq, seq));
+
 	} else {
 		struct dentry *dentry;
 
@@ -1411,7 +1419,7 @@ static int path_init_rcu(int dfd, const char *name, unsigned int flags, struct n
 		if (fput_needed)
 			nd->file = file;
 
-		nd->seq = read_seqcount_begin(&nd->path.dentry->d_seq);
+		nd->seq = __read_seqcount_begin(&nd->path.dentry->d_seq);
 		br_read_lock(vfsmount_lock);
 		rcu_read_lock();
 	}
diff --git a/include/linux/fs_struct.h b/include/linux/fs_struct.h
index a42b5bf02f8b..003dc0fd7347 100644
--- a/include/linux/fs_struct.h
+++ b/include/linux/fs_struct.h
@@ -2,10 +2,13 @@
 #define _LINUX_FS_STRUCT_H
 
 #include <linux/path.h>
+#include <linux/spinlock.h>
+#include <linux/seqlock.h>
 
 struct fs_struct {
 	int users;
 	spinlock_t lock;
+	seqcount_t seq;
 	int umask;
 	int in_exec;
 	struct path root, pwd;
-- 
cgit v1.2.3-71-gd317


From 5f57cbcc02cf18f6b22ef4066bb10afeb8f930ff Mon Sep 17 00:00:00 2001
From: Nick Piggin <npiggin@kernel.dk>
Date: Fri, 7 Jan 2011 17:49:54 +1100
Subject: fs: dcache remove d_mounted

Rather than keep a d_mounted count in the dentry, set a dentry flag instead.
The flag can be cleared by checking the hash table to see if there are any
mounts left, which is not time critical because it is performed at detach time.

The mounted state of a dentry is only used to speculatively take a look in the
mount hash table if it is set -- before following the mount, vfsmount lock is
taken and mount re-checked without races.

This saves 4 bytes on 32-bit, nothing on 64-bit but it does provide a hole I
might use later (and some configs have larger than 32-bit spinlocks which might
make use of the hole).

Autofs4 conversion and changelog by Ian Kent <raven@themaw.net>:
In autofs4, when expring direct (or offset) mounts we need to ensure that we
block user path walks into the autofs mount, which is covered by another mount.
To do this we clear the mounted status so that follows stop before walking into
the mount and are essentially blocked until the expire is completed. The
automount daemon still finds the correct dentry for the umount due to the
follow mount logic in fs/autofs4/root.c:autofs4_follow_link(), which is set as
an inode operation for direct and offset mounts only and is called following
the lookup that stopped at the covered mount.

At the end of the expire the covering mount probably has gone away so the
mounted status need not be restored. But we need to check this and only restore
the mounted status if the expire failed.

XXX: autofs may not work right if we have other mounts go over the top of it?

Signed-off-by: Nick Piggin <npiggin@kernel.dk>
---
 fs/autofs4/expire.c    | 13 +++++++++++--
 fs/dcache.c            |  1 -
 fs/namespace.c         | 29 ++++++++++++++++++++++++++---
 include/linux/dcache.h | 42 +++++++++++++++++++++---------------------
 4 files changed, 58 insertions(+), 27 deletions(-)

(limited to 'include/linux')

diff --git a/fs/autofs4/expire.c b/fs/autofs4/expire.c
index 2f7951d67d16..cc1d01365905 100644
--- a/fs/autofs4/expire.c
+++ b/fs/autofs4/expire.c
@@ -295,7 +295,9 @@ struct dentry *autofs4_expire_direct(struct super_block *sb,
 		struct autofs_info *ino = autofs4_dentry_ino(root);
 		if (d_mountpoint(root)) {
 			ino->flags |= AUTOFS_INF_MOUNTPOINT;
-			root->d_mounted--;
+			spin_lock(&root->d_lock);
+			root->d_flags &= ~DCACHE_MOUNTED;
+			spin_unlock(&root->d_lock);
 		}
 		ino->flags |= AUTOFS_INF_EXPIRING;
 		init_completion(&ino->expire_complete);
@@ -503,7 +505,14 @@ int autofs4_do_expire_multi(struct super_block *sb, struct vfsmount *mnt,
 
 		spin_lock(&sbi->fs_lock);
 		if (ino->flags & AUTOFS_INF_MOUNTPOINT) {
-			sb->s_root->d_mounted++;
+			spin_lock(&sb->s_root->d_lock);
+			/*
+			 * If we haven't been expired away, then reset
+			 * mounted status.
+			 */
+			if (mnt->mnt_parent != mnt)
+				sb->s_root->d_flags |= DCACHE_MOUNTED;
+			spin_unlock(&sb->s_root->d_lock);
 			ino->flags &= ~AUTOFS_INF_MOUNTPOINT;
 		}
 		ino->flags &= ~AUTOFS_INF_EXPIRING;
diff --git a/fs/dcache.c b/fs/dcache.c
index 187fea040108..1d5cf511e1c7 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -1265,7 +1265,6 @@ struct dentry *d_alloc(struct dentry * parent, const struct qstr *name)
 	dentry->d_sb = NULL;
 	dentry->d_op = NULL;
 	dentry->d_fsdata = NULL;
-	dentry->d_mounted = 0;
 	INIT_HLIST_NODE(&dentry->d_hash);
 	INIT_LIST_HEAD(&dentry->d_lru);
 	INIT_LIST_HEAD(&dentry->d_subdirs);
diff --git a/fs/namespace.c b/fs/namespace.c
index 3dbfc072ec70..39a7d507fba0 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -491,6 +491,27 @@ static void __touch_mnt_namespace(struct mnt_namespace *ns)
 	}
 }
 
+/*
+ * Clear dentry's mounted state if it has no remaining mounts.
+ * vfsmount_lock must be held for write.
+ */
+static void dentry_reset_mounted(struct vfsmount *mnt, struct dentry *dentry)
+{
+	unsigned u;
+
+	for (u = 0; u < HASH_SIZE; u++) {
+		struct vfsmount *p;
+
+		list_for_each_entry(p, &mount_hashtable[u], mnt_hash) {
+			if (p->mnt_mountpoint == dentry)
+				return;
+		}
+	}
+	spin_lock(&dentry->d_lock);
+	dentry->d_flags &= ~DCACHE_MOUNTED;
+	spin_unlock(&dentry->d_lock);
+}
+
 /*
  * vfsmount lock must be held for write
  */
@@ -502,7 +523,7 @@ static void detach_mnt(struct vfsmount *mnt, struct path *old_path)
 	mnt->mnt_mountpoint = mnt->mnt_root;
 	list_del_init(&mnt->mnt_child);
 	list_del_init(&mnt->mnt_hash);
-	old_path->dentry->d_mounted--;
+	dentry_reset_mounted(old_path->mnt, old_path->dentry);
 }
 
 /*
@@ -513,7 +534,9 @@ void mnt_set_mountpoint(struct vfsmount *mnt, struct dentry *dentry,
 {
 	child_mnt->mnt_parent = mntget(mnt);
 	child_mnt->mnt_mountpoint = dget(dentry);
-	dentry->d_mounted++;
+	spin_lock(&dentry->d_lock);
+	dentry->d_flags |= DCACHE_MOUNTED;
+	spin_unlock(&dentry->d_lock);
 }
 
 /*
@@ -1073,7 +1096,7 @@ void umount_tree(struct vfsmount *mnt, int propagate, struct list_head *kill)
 		list_del_init(&p->mnt_child);
 		if (p->mnt_parent != p) {
 			p->mnt_parent->mnt_ghosts++;
-			p->mnt_mountpoint->d_mounted--;
+			dentry_reset_mounted(p->mnt_parent, p->mnt_mountpoint);
 		}
 		change_mnt_propagation(p, MS_PRIVATE);
 	}
diff --git a/include/linux/dcache.h b/include/linux/dcache.h
index c2e7390289cc..e4414693065e 100644
--- a/include/linux/dcache.h
+++ b/include/linux/dcache.h
@@ -92,7 +92,6 @@ struct dentry {
 	unsigned int d_flags;		/* protected by d_lock */
 	spinlock_t d_lock;		/* per dentry lock */
 	seqcount_t d_seq;		/* per dentry seqlock */
-	int d_mounted;
 	struct inode *d_inode;		/* Where the name belongs to - NULL is
 					 * negative */
 	/*
@@ -156,33 +155,34 @@ struct dentry_operations {
 
 /* d_flags entries */
 #define DCACHE_AUTOFS_PENDING 0x0001    /* autofs: "under construction" */
-#define DCACHE_NFSFS_RENAMED  0x0002    /* this dentry has been "silly
-					 * renamed" and has to be
-					 * deleted on the last dput()
-					 */
-#define	DCACHE_DISCONNECTED 0x0004
-     /* This dentry is possibly not currently connected to the dcache tree,
-      * in which case its parent will either be itself, or will have this
-      * flag as well.  nfsd will not use a dentry with this bit set, but will
-      * first endeavour to clear the bit either by discovering that it is
-      * connected, or by performing lookup operations.   Any filesystem which
-      * supports nfsd_operations MUST have a lookup function which, if it finds
-      * a directory inode with a DCACHE_DISCONNECTED dentry, will d_move
-      * that dentry into place and return that dentry rather than the passed one,
-      * typically using d_splice_alias.
-      */
+#define DCACHE_NFSFS_RENAMED  0x0002
+     /* this dentry has been "silly renamed" and has to be deleted on the last
+      * dput() */
+
+#define	DCACHE_DISCONNECTED	0x0004
+     /* This dentry is possibly not currently connected to the dcache tree, in
+      * which case its parent will either be itself, or will have this flag as
+      * well.  nfsd will not use a dentry with this bit set, but will first
+      * endeavour to clear the bit either by discovering that it is connected,
+      * or by performing lookup operations.   Any filesystem which supports
+      * nfsd_operations MUST have a lookup function which, if it finds a
+      * directory inode with a DCACHE_DISCONNECTED dentry, will d_move that
+      * dentry into place and return that dentry rather than the passed one,
+      * typically using d_splice_alias. */
 
 #define DCACHE_REFERENCED	0x0008  /* Recently used, don't discard. */
 #define DCACHE_UNHASHED		0x0010	
-
-#define DCACHE_INOTIFY_PARENT_WATCHED	0x0020 /* Parent inode is watched by inotify */
+#define DCACHE_INOTIFY_PARENT_WATCHED 0x0020
+     /* Parent inode is watched by inotify */
 
 #define DCACHE_COOKIE		0x0040	/* For use by dcookie subsystem */
-
-#define DCACHE_FSNOTIFY_PARENT_WATCHED	0x0080 /* Parent inode is watched by some fsnotify listener */
+#define DCACHE_FSNOTIFY_PARENT_WATCHED 0x0080
+     /* Parent inode is watched by some fsnotify listener */
 
 #define DCACHE_CANT_MOUNT	0x0100
 #define DCACHE_GENOCIDE		0x0200
+#define DCACHE_MOUNTED		0x0400	/* is a mountpoint */
+
 
 extern spinlock_t dcache_inode_lock;
 extern seqlock_t rename_lock;
@@ -372,7 +372,7 @@ extern void dput(struct dentry *);
 
 static inline int d_mountpoint(struct dentry *dentry)
 {
-	return dentry->d_mounted;
+	return dentry->d_flags & DCACHE_MOUNTED;
 }
 
 extern struct vfsmount *lookup_mnt(struct path *);
-- 
cgit v1.2.3-71-gd317


From fb045adb99d9b7c562dc7fef834857f78249daa1 Mon Sep 17 00:00:00 2001
From: Nick Piggin <npiggin@kernel.dk>
Date: Fri, 7 Jan 2011 17:49:55 +1100
Subject: fs: dcache reduce branches in lookup path

Reduce some branches and memory accesses in dcache lookup by adding dentry
flags to indicate common d_ops are set, rather than having to check them.
This saves a pointer memory access (dentry->d_op) in common path lookup
situations, and saves another pointer load and branch in cases where we
have d_op but not the particular operation.

Patched with:

git grep -E '[.>]([[:space:]])*d_op([[:space:]])*=' | xargs sed -e 's/\([^\t ]*\)->d_op = \(.*\);/d_set_d_op(\1, \2);/' -e 's/\([^\t ]*\)\.d_op = \(.*\);/d_set_d_op(\&\1, \2);/' -i

Signed-off-by: Nick Piggin <npiggin@kernel.dk>
---
 arch/ia64/kernel/perfmon.c    |  2 +-
 drivers/staging/autofs/root.c |  2 +-
 drivers/staging/smbfs/dir.c   |  8 ++++----
 fs/9p/vfs_inode.c             | 26 +++++++++++++-------------
 fs/adfs/dir.c                 |  2 +-
 fs/adfs/super.c               |  2 +-
 fs/affs/namei.c               |  2 +-
 fs/affs/super.c               |  2 +-
 fs/afs/dir.c                  |  2 +-
 fs/anon_inodes.c              |  2 +-
 fs/autofs4/inode.c            |  2 +-
 fs/autofs4/root.c             | 10 +++++-----
 fs/btrfs/export.c             |  4 ++--
 fs/btrfs/inode.c              |  2 +-
 fs/ceph/dir.c                 |  6 +++---
 fs/cifs/dir.c                 | 16 ++++++++--------
 fs/cifs/inode.c               |  8 ++++----
 fs/cifs/link.c                |  4 ++--
 fs/cifs/readdir.c             |  4 ++--
 fs/coda/dir.c                 |  2 +-
 fs/configfs/dir.c             |  8 ++++----
 fs/dcache.c                   | 30 ++++++++++++++++++++++++++----
 fs/ecryptfs/inode.c           |  2 +-
 fs/ecryptfs/main.c            |  4 ++--
 fs/fat/inode.c                |  4 ++--
 fs/fat/namei_msdos.c          |  6 +++---
 fs/fat/namei_vfat.c           |  8 ++++----
 fs/fuse/dir.c                 |  2 +-
 fs/fuse/inode.c               |  4 ++--
 fs/gfs2/export.c              |  4 ++--
 fs/gfs2/ops_fstype.c          |  2 +-
 fs/gfs2/ops_inode.c           |  2 +-
 fs/hfs/dir.c                  |  2 +-
 fs/hfs/super.c                |  2 +-
 fs/hfsplus/dir.c              |  2 +-
 fs/hfsplus/super.c            |  2 +-
 fs/hostfs/hostfs_kern.c       |  2 +-
 fs/hpfs/dentry.c              |  2 +-
 fs/isofs/inode.c              |  2 +-
 fs/isofs/namei.c              |  2 +-
 fs/jfs/namei.c                |  4 ++--
 fs/jfs/super.c                |  2 +-
 fs/libfs.c                    |  2 +-
 fs/minix/namei.c              |  2 +-
 fs/namei.c                    | 31 ++++++++++++++++++++-----------
 fs/ncpfs/dir.c                |  4 ++--
 fs/ncpfs/inode.c              |  2 +-
 fs/nfs/dir.c                  |  6 +++---
 fs/nfs/getroot.c              |  4 ++--
 fs/ocfs2/export.c             |  4 ++--
 fs/ocfs2/namei.c              | 10 +++++-----
 fs/pipe.c                     |  2 +-
 fs/proc/base.c                | 12 ++++++------
 fs/proc/generic.c             |  2 +-
 fs/proc/proc_sysctl.c         |  4 ++--
 fs/reiserfs/xattr.c           |  2 +-
 fs/sysfs/dir.c                |  2 +-
 fs/sysv/namei.c               |  2 +-
 fs/sysv/super.c               |  2 +-
 include/linux/dcache.h        |  6 ++++++
 kernel/cgroup.c               |  2 +-
 net/socket.c                  |  2 +-
 net/sunrpc/rpc_pipe.c         |  2 +-
 63 files changed, 174 insertions(+), 137 deletions(-)

(limited to 'include/linux')

diff --git a/arch/ia64/kernel/perfmon.c b/arch/ia64/kernel/perfmon.c
index d39d8a53b579..5a24f40bb48e 100644
--- a/arch/ia64/kernel/perfmon.c
+++ b/arch/ia64/kernel/perfmon.c
@@ -2233,7 +2233,7 @@ pfm_alloc_file(pfm_context_t *ctx)
 	}
 	path.mnt = mntget(pfmfs_mnt);
 
-	path.dentry->d_op = &pfmfs_dentry_operations;
+	d_set_d_op(path.dentry, &pfmfs_dentry_operations);
 	d_add(path.dentry, inode);
 
 	file = alloc_file(&path, FMODE_READ, &pfm_file_ops);
diff --git a/drivers/staging/autofs/root.c b/drivers/staging/autofs/root.c
index 0fdec4befd84..b09adb57971f 100644
--- a/drivers/staging/autofs/root.c
+++ b/drivers/staging/autofs/root.c
@@ -237,7 +237,7 @@ static struct dentry *autofs_root_lookup(struct inode *dir, struct dentry *dentr
 	 *
 	 * We need to do this before we release the directory semaphore.
 	 */
-	dentry->d_op = &autofs_dentry_operations;
+	d_set_d_op(dentry, &autofs_dentry_operations);
 	dentry->d_flags |= DCACHE_AUTOFS_PENDING;
 	d_add(dentry, NULL);
 
diff --git a/drivers/staging/smbfs/dir.c b/drivers/staging/smbfs/dir.c
index 5f79799d5d4a..78f09412740c 100644
--- a/drivers/staging/smbfs/dir.c
+++ b/drivers/staging/smbfs/dir.c
@@ -398,9 +398,9 @@ smb_new_dentry(struct dentry *dentry)
 	struct smb_sb_info *server = server_from_dentry(dentry);
 
 	if (server->mnt->flags & SMB_MOUNT_CASE)
-		dentry->d_op = &smbfs_dentry_operations_case;
+		d_set_d_op(dentry, &smbfs_dentry_operations_case);
 	else
-		dentry->d_op = &smbfs_dentry_operations;
+		d_set_d_op(dentry, &smbfs_dentry_operations);
 	dentry->d_time = jiffies;
 }
 
@@ -462,9 +462,9 @@ smb_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd)
 	add_entry:
 			server = server_from_dentry(dentry);
 			if (server->mnt->flags & SMB_MOUNT_CASE)
-				dentry->d_op = &smbfs_dentry_operations_case;
+				d_set_d_op(dentry, &smbfs_dentry_operations_case);
 			else
-				dentry->d_op = &smbfs_dentry_operations;
+				d_set_d_op(dentry, &smbfs_dentry_operations);
 
 			d_add(dentry, inode);
 			smb_renew_times(dentry);
diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c
index f6f9081e6d2c..df8bbb358d54 100644
--- a/fs/9p/vfs_inode.c
+++ b/fs/9p/vfs_inode.c
@@ -635,9 +635,9 @@ v9fs_create(struct v9fs_session_info *v9ses, struct inode *dir,
 	}
 
 	if (v9ses->cache)
-		dentry->d_op = &v9fs_cached_dentry_operations;
+		d_set_d_op(dentry, &v9fs_cached_dentry_operations);
 	else
-		dentry->d_op = &v9fs_dentry_operations;
+		d_set_d_op(dentry, &v9fs_dentry_operations);
 
 	d_instantiate(dentry, inode);
 	err = v9fs_fid_add(dentry, fid);
@@ -749,7 +749,7 @@ v9fs_vfs_create_dotl(struct inode *dir, struct dentry *dentry, int omode,
 				err);
 			goto error;
 		}
-		dentry->d_op = &v9fs_cached_dentry_operations;
+		d_set_d_op(dentry, &v9fs_cached_dentry_operations);
 		d_instantiate(dentry, inode);
 		err = v9fs_fid_add(dentry, fid);
 		if (err < 0)
@@ -767,7 +767,7 @@ v9fs_vfs_create_dotl(struct inode *dir, struct dentry *dentry, int omode,
 			err = PTR_ERR(inode);
 			goto error;
 		}
-		dentry->d_op = &v9fs_dentry_operations;
+		d_set_d_op(dentry, &v9fs_dentry_operations);
 		d_instantiate(dentry, inode);
 	}
 	/* Now set the ACL based on the default value */
@@ -956,7 +956,7 @@ static int v9fs_vfs_mkdir_dotl(struct inode *dir,
 				err);
 			goto error;
 		}
-		dentry->d_op = &v9fs_cached_dentry_operations;
+		d_set_d_op(dentry, &v9fs_cached_dentry_operations);
 		d_instantiate(dentry, inode);
 		err = v9fs_fid_add(dentry, fid);
 		if (err < 0)
@@ -973,7 +973,7 @@ static int v9fs_vfs_mkdir_dotl(struct inode *dir,
 			err = PTR_ERR(inode);
 			goto error;
 		}
-		dentry->d_op = &v9fs_dentry_operations;
+		d_set_d_op(dentry, &v9fs_dentry_operations);
 		d_instantiate(dentry, inode);
 	}
 	/* Now set the ACL based on the default value */
@@ -1041,9 +1041,9 @@ static struct dentry *v9fs_vfs_lookup(struct inode *dir, struct dentry *dentry,
 
 inst_out:
 	if (v9ses->cache)
-		dentry->d_op = &v9fs_cached_dentry_operations;
+		d_set_d_op(dentry, &v9fs_cached_dentry_operations);
 	else
-		dentry->d_op = &v9fs_dentry_operations;
+		d_set_d_op(dentry, &v9fs_dentry_operations);
 
 	d_add(dentry, inode);
 	return NULL;
@@ -1709,7 +1709,7 @@ v9fs_vfs_symlink_dotl(struct inode *dir, struct dentry *dentry,
 					err);
 			goto error;
 		}
-		dentry->d_op = &v9fs_cached_dentry_operations;
+		d_set_d_op(dentry, &v9fs_cached_dentry_operations);
 		d_instantiate(dentry, inode);
 		err = v9fs_fid_add(dentry, fid);
 		if (err < 0)
@@ -1722,7 +1722,7 @@ v9fs_vfs_symlink_dotl(struct inode *dir, struct dentry *dentry,
 			err = PTR_ERR(inode);
 			goto error;
 		}
-		dentry->d_op = &v9fs_dentry_operations;
+		d_set_d_op(dentry, &v9fs_dentry_operations);
 		d_instantiate(dentry, inode);
 	}
 
@@ -1856,7 +1856,7 @@ v9fs_vfs_link_dotl(struct dentry *old_dentry, struct inode *dir,
 		ihold(old_dentry->d_inode);
 	}
 
-	dentry->d_op = old_dentry->d_op;
+	d_set_d_op(dentry, old_dentry->d_op);
 	d_instantiate(dentry, old_dentry->d_inode);
 
 	return err;
@@ -1980,7 +1980,7 @@ v9fs_vfs_mknod_dotl(struct inode *dir, struct dentry *dentry, int omode,
 				err);
 			goto error;
 		}
-		dentry->d_op = &v9fs_cached_dentry_operations;
+		d_set_d_op(dentry, &v9fs_cached_dentry_operations);
 		d_instantiate(dentry, inode);
 		err = v9fs_fid_add(dentry, fid);
 		if (err < 0)
@@ -1996,7 +1996,7 @@ v9fs_vfs_mknod_dotl(struct inode *dir, struct dentry *dentry, int omode,
 			err = PTR_ERR(inode);
 			goto error;
 		}
-		dentry->d_op = &v9fs_dentry_operations;
+		d_set_d_op(dentry, &v9fs_dentry_operations);
 		d_instantiate(dentry, inode);
 	}
 	/* Now set the ACL based on the default value */
diff --git a/fs/adfs/dir.c b/fs/adfs/dir.c
index a11e5e102716..bf7693c384f9 100644
--- a/fs/adfs/dir.c
+++ b/fs/adfs/dir.c
@@ -276,7 +276,7 @@ adfs_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd)
 	struct object_info obj;
 	int error;
 
-	dentry->d_op = &adfs_dentry_operations;	
+	d_set_d_op(dentry, &adfs_dentry_operations);
 	lock_kernel();
 	error = adfs_dir_lookup_byname(dir, &dentry->d_name, &obj);
 	if (error == 0) {
diff --git a/fs/adfs/super.c b/fs/adfs/super.c
index 47dffc513a26..a4041b52fbca 100644
--- a/fs/adfs/super.c
+++ b/fs/adfs/super.c
@@ -484,7 +484,7 @@ static int adfs_fill_super(struct super_block *sb, void *data, int silent)
 		adfs_error(sb, "get root inode failed\n");
 		goto error;
 	} else
-		sb->s_root->d_op = &adfs_dentry_operations;
+		d_set_d_op(sb->s_root, &adfs_dentry_operations);
 	unlock_kernel();
 	return 0;
 
diff --git a/fs/affs/namei.c b/fs/affs/namei.c
index 5aca08c21100..944a4042fb65 100644
--- a/fs/affs/namei.c
+++ b/fs/affs/namei.c
@@ -240,7 +240,7 @@ affs_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd)
 		if (IS_ERR(inode))
 			return ERR_CAST(inode);
 	}
-	dentry->d_op = AFFS_SB(sb)->s_flags & SF_INTL ? &affs_intl_dentry_operations : &affs_dentry_operations;
+	d_set_d_op(dentry, AFFS_SB(sb)->s_flags & SF_INTL ? &affs_intl_dentry_operations : &affs_dentry_operations);
 	d_add(dentry, inode);
 	return NULL;
 }
diff --git a/fs/affs/super.c b/fs/affs/super.c
index 4c18fcfb0a19..d39081bbe7ce 100644
--- a/fs/affs/super.c
+++ b/fs/affs/super.c
@@ -482,7 +482,7 @@ got_root:
 		printk(KERN_ERR "AFFS: Get root inode failed\n");
 		goto out_error;
 	}
-	sb->s_root->d_op = &affs_dentry_operations;
+	d_set_d_op(sb->s_root, &affs_dentry_operations);
 
 	pr_debug("AFFS: s_flags=%lX\n",sb->s_flags);
 	return 0;
diff --git a/fs/afs/dir.c b/fs/afs/dir.c
index 2c18cde27000..b8bb7e7148d0 100644
--- a/fs/afs/dir.c
+++ b/fs/afs/dir.c
@@ -581,7 +581,7 @@ static struct dentry *afs_lookup(struct inode *dir, struct dentry *dentry,
 	}
 
 success:
-	dentry->d_op = &afs_fs_dentry_operations;
+	d_set_d_op(dentry, &afs_fs_dentry_operations);
 
 	d_add(dentry, inode);
 	_leave(" = 0 { vn=%u u=%u } -> { ino=%lu v=%llu }",
diff --git a/fs/anon_inodes.c b/fs/anon_inodes.c
index 57ce55b2564c..aca8806fa206 100644
--- a/fs/anon_inodes.c
+++ b/fs/anon_inodes.c
@@ -113,7 +113,7 @@ struct file *anon_inode_getfile(const char *name,
 	 */
 	ihold(anon_inode_inode);
 
-	path.dentry->d_op = &anon_inodefs_dentry_operations;
+	d_set_d_op(path.dentry, &anon_inodefs_dentry_operations);
 	d_instantiate(path.dentry, anon_inode_inode);
 
 	error = -ENFILE;
diff --git a/fs/autofs4/inode.c b/fs/autofs4/inode.c
index ac87e49fa706..a7bdb9dcac84 100644
--- a/fs/autofs4/inode.c
+++ b/fs/autofs4/inode.c
@@ -309,7 +309,7 @@ int autofs4_fill_super(struct super_block *s, void *data, int silent)
 		goto fail_iput;
 	pipe = NULL;
 
-	root->d_op = &autofs4_sb_dentry_operations;
+	d_set_d_op(root, &autofs4_sb_dentry_operations);
 	root->d_fsdata = ino;
 
 	/* Can this call block? */
diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c
index 10ca68a96dc7..bfe3f2eb684d 100644
--- a/fs/autofs4/root.c
+++ b/fs/autofs4/root.c
@@ -571,7 +571,7 @@ static struct dentry *autofs4_lookup(struct inode *dir, struct dentry *dentry, s
 		 * we check for the hashed dentry and return the newly
 		 * hashed dentry.
 		 */
-		dentry->d_op = &autofs4_root_dentry_operations;
+		d_set_d_op(dentry, &autofs4_root_dentry_operations);
 
 		/*
 		 * And we need to ensure that the same dentry is used for
@@ -710,9 +710,9 @@ static int autofs4_dir_symlink(struct inode *dir,
 	d_add(dentry, inode);
 
 	if (dir == dir->i_sb->s_root->d_inode)
-		dentry->d_op = &autofs4_root_dentry_operations;
+		d_set_d_op(dentry, &autofs4_root_dentry_operations);
 	else
-		dentry->d_op = &autofs4_dentry_operations;
+		d_set_d_op(dentry, &autofs4_dentry_operations);
 
 	dentry->d_fsdata = ino;
 	ino->dentry = dget(dentry);
@@ -845,9 +845,9 @@ static int autofs4_dir_mkdir(struct inode *dir, struct dentry *dentry, int mode)
 	d_add(dentry, inode);
 
 	if (dir == dir->i_sb->s_root->d_inode)
-		dentry->d_op = &autofs4_root_dentry_operations;
+		d_set_d_op(dentry, &autofs4_root_dentry_operations);
 	else
-		dentry->d_op = &autofs4_dentry_operations;
+		d_set_d_op(dentry, &autofs4_dentry_operations);
 
 	dentry->d_fsdata = ino;
 	ino->dentry = dget(dentry);
diff --git a/fs/btrfs/export.c b/fs/btrfs/export.c
index 659f532d26a0..0ccf9a8afcdf 100644
--- a/fs/btrfs/export.c
+++ b/fs/btrfs/export.c
@@ -110,7 +110,7 @@ static struct dentry *btrfs_get_dentry(struct super_block *sb, u64 objectid,
 
 	dentry = d_obtain_alias(inode);
 	if (!IS_ERR(dentry))
-		dentry->d_op = &btrfs_dentry_operations;
+		d_set_d_op(dentry, &btrfs_dentry_operations);
 	return dentry;
 fail:
 	srcu_read_unlock(&fs_info->subvol_srcu, index);
@@ -225,7 +225,7 @@ static struct dentry *btrfs_get_parent(struct dentry *child)
 	key.offset = 0;
 	dentry = d_obtain_alias(btrfs_iget(root->fs_info->sb, &key, root, NULL));
 	if (!IS_ERR(dentry))
-		dentry->d_op = &btrfs_dentry_operations;
+		d_set_d_op(dentry, &btrfs_dentry_operations);
 	return dentry;
 fail:
 	btrfs_free_path(path);
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index f9d2994a42a2..63e4546b478a 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -4084,7 +4084,7 @@ struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry)
 	int index;
 	int ret;
 
-	dentry->d_op = &btrfs_dentry_operations;
+	d_set_d_op(dentry, &btrfs_dentry_operations);
 
 	if (dentry->d_name.len > BTRFS_NAME_LEN)
 		return ERR_PTR(-ENAMETOOLONG);
diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
index 58abc3da6111..cc01cf826769 100644
--- a/fs/ceph/dir.c
+++ b/fs/ceph/dir.c
@@ -42,11 +42,11 @@ int ceph_init_dentry(struct dentry *dentry)
 
 	if (dentry->d_parent == NULL ||   /* nfs fh_to_dentry */
 	    ceph_snap(dentry->d_parent->d_inode) == CEPH_NOSNAP)
-		dentry->d_op = &ceph_dentry_ops;
+		d_set_d_op(dentry, &ceph_dentry_ops);
 	else if (ceph_snap(dentry->d_parent->d_inode) == CEPH_SNAPDIR)
-		dentry->d_op = &ceph_snapdir_dentry_ops;
+		d_set_d_op(dentry, &ceph_snapdir_dentry_ops);
 	else
-		dentry->d_op = &ceph_snap_dentry_ops;
+		d_set_d_op(dentry, &ceph_snap_dentry_ops);
 
 	di = kmem_cache_alloc(ceph_dentry_cachep, GFP_NOFS | __GFP_ZERO);
 	if (!di)
diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c
index 88bfe686ac00..e3b10ca6d453 100644
--- a/fs/cifs/dir.c
+++ b/fs/cifs/dir.c
@@ -135,9 +135,9 @@ static void setup_cifs_dentry(struct cifsTconInfo *tcon,
 			      struct inode *newinode)
 {
 	if (tcon->nocase)
-		direntry->d_op = &cifs_ci_dentry_ops;
+		d_set_d_op(direntry, &cifs_ci_dentry_ops);
 	else
-		direntry->d_op = &cifs_dentry_ops;
+		d_set_d_op(direntry, &cifs_dentry_ops);
 	d_instantiate(direntry, newinode);
 }
 
@@ -421,9 +421,9 @@ int cifs_mknod(struct inode *inode, struct dentry *direntry, int mode,
 		rc = cifs_get_inode_info_unix(&newinode, full_path,
 						inode->i_sb, xid);
 		if (pTcon->nocase)
-			direntry->d_op = &cifs_ci_dentry_ops;
+			d_set_d_op(direntry, &cifs_ci_dentry_ops);
 		else
-			direntry->d_op = &cifs_dentry_ops;
+			d_set_d_op(direntry, &cifs_dentry_ops);
 
 		if (rc == 0)
 			d_instantiate(direntry, newinode);
@@ -604,9 +604,9 @@ cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry,
 
 	if ((rc == 0) && (newInode != NULL)) {
 		if (pTcon->nocase)
-			direntry->d_op = &cifs_ci_dentry_ops;
+			d_set_d_op(direntry, &cifs_ci_dentry_ops);
 		else
-			direntry->d_op = &cifs_dentry_ops;
+			d_set_d_op(direntry, &cifs_dentry_ops);
 		d_add(direntry, newInode);
 		if (posix_open) {
 			filp = lookup_instantiate_filp(nd, direntry,
@@ -634,9 +634,9 @@ cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry,
 		rc = 0;
 		direntry->d_time = jiffies;
 		if (pTcon->nocase)
-			direntry->d_op = &cifs_ci_dentry_ops;
+			d_set_d_op(direntry, &cifs_ci_dentry_ops);
 		else
-			direntry->d_op = &cifs_dentry_ops;
+			d_set_d_op(direntry, &cifs_dentry_ops);
 		d_add(direntry, NULL);
 	/*	if it was once a directory (but how can we tell?) we could do
 		shrink_dcache_parent(direntry); */
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index 99b9a2cc14b7..2a239d878e85 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -1319,9 +1319,9 @@ int cifs_mkdir(struct inode *inode, struct dentry *direntry, int mode)
 	to set uid/gid */
 			inc_nlink(inode);
 			if (pTcon->nocase)
-				direntry->d_op = &cifs_ci_dentry_ops;
+				d_set_d_op(direntry, &cifs_ci_dentry_ops);
 			else
-				direntry->d_op = &cifs_dentry_ops;
+				d_set_d_op(direntry, &cifs_dentry_ops);
 
 			cifs_unix_basic_to_fattr(&fattr, pInfo, cifs_sb);
 			cifs_fill_uniqueid(inode->i_sb, &fattr);
@@ -1363,9 +1363,9 @@ mkdir_get_info:
 						 inode->i_sb, xid, NULL);
 
 		if (pTcon->nocase)
-			direntry->d_op = &cifs_ci_dentry_ops;
+			d_set_d_op(direntry, &cifs_ci_dentry_ops);
 		else
-			direntry->d_op = &cifs_dentry_ops;
+			d_set_d_op(direntry, &cifs_dentry_ops);
 		d_instantiate(direntry, newinode);
 		 /* setting nlink not necessary except in cases where we
 		  * failed to get it from the server or was set bogus */
diff --git a/fs/cifs/link.c b/fs/cifs/link.c
index 85cdbf831e7b..fe2f6a93c49e 100644
--- a/fs/cifs/link.c
+++ b/fs/cifs/link.c
@@ -525,9 +525,9 @@ cifs_symlink(struct inode *inode, struct dentry *direntry, const char *symname)
 			      rc);
 		} else {
 			if (pTcon->nocase)
-				direntry->d_op = &cifs_ci_dentry_ops;
+				d_set_d_op(direntry, &cifs_ci_dentry_ops);
 			else
-				direntry->d_op = &cifs_dentry_ops;
+				d_set_d_op(direntry, &cifs_dentry_ops);
 			d_instantiate(direntry, newinode);
 		}
 	}
diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c
index ee463aeca0b0..ec5b68e3b928 100644
--- a/fs/cifs/readdir.c
+++ b/fs/cifs/readdir.c
@@ -103,9 +103,9 @@ cifs_readdir_lookup(struct dentry *parent, struct qstr *name,
 	}
 
 	if (cifs_sb_master_tcon(CIFS_SB(sb))->nocase)
-		dentry->d_op = &cifs_ci_dentry_ops;
+		d_set_d_op(dentry, &cifs_ci_dentry_ops);
 	else
-		dentry->d_op = &cifs_dentry_ops;
+		d_set_d_op(dentry, &cifs_dentry_ops);
 
 	alias = d_materialise_unique(dentry, inode);
 	if (alias != NULL) {
diff --git a/fs/coda/dir.c b/fs/coda/dir.c
index 9e37e8bc9b89..aa40c811f8d2 100644
--- a/fs/coda/dir.c
+++ b/fs/coda/dir.c
@@ -125,7 +125,7 @@ static struct dentry *coda_lookup(struct inode *dir, struct dentry *entry, struc
 		return ERR_PTR(error);
 
 exit:
-	entry->d_op = &coda_dentry_operations;
+	d_set_d_op(entry, &coda_dentry_operations);
 
 	if (inode && (type & CODA_NOCACHE))
 		coda_flag_inode(inode, C_VATTR | C_PURGE);
diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c
index e9acea440ffc..36637a8c1ed3 100644
--- a/fs/configfs/dir.c
+++ b/fs/configfs/dir.c
@@ -442,7 +442,7 @@ static int configfs_attach_attr(struct configfs_dirent * sd, struct dentry * den
 		return error;
 	}
 
-	dentry->d_op = &configfs_dentry_ops;
+	d_set_d_op(dentry, &configfs_dentry_ops);
 	d_rehash(dentry);
 
 	return 0;
@@ -489,7 +489,7 @@ static struct dentry * configfs_lookup(struct inode *dir,
 		 */
 		if (dentry->d_name.len > NAME_MAX)
 			return ERR_PTR(-ENAMETOOLONG);
-		dentry->d_op = &configfs_dentry_ops;
+		d_set_d_op(dentry, &configfs_dentry_ops);
 		d_add(dentry, NULL);
 		return NULL;
 	}
@@ -683,7 +683,7 @@ static int create_default_group(struct config_group *parent_group,
 	ret = -ENOMEM;
 	child = d_alloc(parent, &name);
 	if (child) {
-		child->d_op = &configfs_dentry_ops;
+		d_set_d_op(child, &configfs_dentry_ops);
 		d_add(child, NULL);
 
 		ret = configfs_attach_group(&parent_group->cg_item,
@@ -1681,7 +1681,7 @@ int configfs_register_subsystem(struct configfs_subsystem *subsys)
 	err = -ENOMEM;
 	dentry = d_alloc(configfs_sb->s_root, &name);
 	if (dentry) {
-		dentry->d_op = &configfs_dentry_ops;
+		d_set_d_op(dentry, &configfs_dentry_ops);
 		d_add(dentry, NULL);
 
 		err = configfs_attach_group(sd->s_element, &group->cg_item,
diff --git a/fs/dcache.c b/fs/dcache.c
index 1d5cf511e1c7..f9693da3efbd 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -398,7 +398,7 @@ repeat:
 		return;
 	}
 
-	if (dentry->d_op && dentry->d_op->d_delete) {
+	if (dentry->d_flags & DCACHE_OP_DELETE) {
 		if (dentry->d_op->d_delete(dentry))
 			goto kill_it;
 	}
@@ -1301,6 +1301,28 @@ struct dentry *d_alloc_name(struct dentry *parent, const char *name)
 }
 EXPORT_SYMBOL(d_alloc_name);
 
+void d_set_d_op(struct dentry *dentry, const struct dentry_operations *op)
+{
+	BUG_ON(dentry->d_op);
+	BUG_ON(dentry->d_flags & (DCACHE_OP_HASH	|
+				DCACHE_OP_COMPARE	|
+				DCACHE_OP_REVALIDATE	|
+				DCACHE_OP_DELETE ));
+	dentry->d_op = op;
+	if (!op)
+		return;
+	if (op->d_hash)
+		dentry->d_flags |= DCACHE_OP_HASH;
+	if (op->d_compare)
+		dentry->d_flags |= DCACHE_OP_COMPARE;
+	if (op->d_revalidate)
+		dentry->d_flags |= DCACHE_OP_REVALIDATE;
+	if (op->d_delete)
+		dentry->d_flags |= DCACHE_OP_DELETE;
+
+}
+EXPORT_SYMBOL(d_set_d_op);
+
 static void __d_instantiate(struct dentry *dentry, struct inode *inode)
 {
 	spin_lock(&dentry->d_lock);
@@ -1731,7 +1753,7 @@ seqretry:
 		 */
 		if (read_seqcount_retry(&dentry->d_seq, *seq))
 			goto seqretry;
-		if (parent->d_op && parent->d_op->d_compare) {
+		if (parent->d_flags & DCACHE_OP_COMPARE) {
 			if (parent->d_op->d_compare(parent, *inode,
 						dentry, i,
 						tlen, tname, name))
@@ -1846,7 +1868,7 @@ struct dentry *__d_lookup(struct dentry *parent, struct qstr *name)
 		 */
 		tlen = dentry->d_name.len;
 		tname = dentry->d_name.name;
-		if (parent->d_op && parent->d_op->d_compare) {
+		if (parent->d_flags & DCACHE_OP_COMPARE) {
 			if (parent->d_op->d_compare(parent, parent->d_inode,
 						dentry, dentry->d_inode,
 						tlen, tname, name))
@@ -1887,7 +1909,7 @@ struct dentry *d_hash_and_lookup(struct dentry *dir, struct qstr *name)
 	 * routine may choose to leave the hash value unchanged.
 	 */
 	name->hash = full_name_hash(name->name, name->len);
-	if (dir->d_op && dir->d_op->d_hash) {
+	if (dir->d_flags & DCACHE_OP_HASH) {
 		if (dir->d_op->d_hash(dir, dir->d_inode, name) < 0)
 			goto out;
 	}
diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c
index 5e5c7ec1fc98..f91b35db4c6e 100644
--- a/fs/ecryptfs/inode.c
+++ b/fs/ecryptfs/inode.c
@@ -441,7 +441,7 @@ static struct dentry *ecryptfs_lookup(struct inode *ecryptfs_dir_inode,
 	struct qstr lower_name;
 	int rc = 0;
 
-	ecryptfs_dentry->d_op = &ecryptfs_dops;
+	d_set_d_op(ecryptfs_dentry, &ecryptfs_dops);
 	if ((ecryptfs_dentry->d_name.len == 1
 	     && !strcmp(ecryptfs_dentry->d_name.name, "."))
 	    || (ecryptfs_dentry->d_name.len == 2
diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c
index a9dbd62518e6..351038675376 100644
--- a/fs/ecryptfs/main.c
+++ b/fs/ecryptfs/main.c
@@ -189,7 +189,7 @@ int ecryptfs_interpose(struct dentry *lower_dentry, struct dentry *dentry,
 	if (special_file(lower_inode->i_mode))
 		init_special_inode(inode, lower_inode->i_mode,
 				   lower_inode->i_rdev);
-	dentry->d_op = &ecryptfs_dops;
+	d_set_d_op(dentry, &ecryptfs_dops);
 	fsstack_copy_attr_all(inode, lower_inode);
 	/* This size will be overwritten for real files w/ headers and
 	 * other metadata */
@@ -594,7 +594,7 @@ static struct dentry *ecryptfs_mount(struct file_system_type *fs_type, int flags
 		deactivate_locked_super(s);
 		goto out;
 	}
-	s->s_root->d_op = &ecryptfs_dops;
+	d_set_d_op(s->s_root, &ecryptfs_dops);
 	s->s_root->d_sb = s;
 	s->s_root->d_parent = s->s_root;
 
diff --git a/fs/fat/inode.c b/fs/fat/inode.c
index 8cccfebee180..206351af7c58 100644
--- a/fs/fat/inode.c
+++ b/fs/fat/inode.c
@@ -750,7 +750,7 @@ static struct dentry *fat_fh_to_dentry(struct super_block *sb,
 	 */
 	result = d_obtain_alias(inode);
 	if (!IS_ERR(result))
-		result->d_op = sb->s_root->d_op;
+		d_set_d_op(result, sb->s_root->d_op);
 	return result;
 }
 
@@ -800,7 +800,7 @@ static struct dentry *fat_get_parent(struct dentry *child)
 
 	parent = d_obtain_alias(inode);
 	if (!IS_ERR(parent))
-		parent->d_op = sb->s_root->d_op;
+		d_set_d_op(parent, sb->s_root->d_op);
 out:
 	unlock_super(sb);
 
diff --git a/fs/fat/namei_msdos.c b/fs/fat/namei_msdos.c
index 3b3e072d8982..35ffe43afa4b 100644
--- a/fs/fat/namei_msdos.c
+++ b/fs/fat/namei_msdos.c
@@ -227,10 +227,10 @@ static struct dentry *msdos_lookup(struct inode *dir, struct dentry *dentry,
 	}
 out:
 	unlock_super(sb);
-	dentry->d_op = &msdos_dentry_operations;
+	d_set_d_op(dentry, &msdos_dentry_operations);
 	dentry = d_splice_alias(inode, dentry);
 	if (dentry)
-		dentry->d_op = &msdos_dentry_operations;
+		d_set_d_op(dentry, &msdos_dentry_operations);
 	return dentry;
 
 error:
@@ -673,7 +673,7 @@ static int msdos_fill_super(struct super_block *sb, void *data, int silent)
 	}
 
 	sb->s_flags |= MS_NOATIME;
-	sb->s_root->d_op = &msdos_dentry_operations;
+	d_set_d_op(sb->s_root, &msdos_dentry_operations);
 	unlock_super(sb);
 	return 0;
 }
diff --git a/fs/fat/namei_vfat.c b/fs/fat/namei_vfat.c
index 4fc06278db48..3be5ed7d859f 100644
--- a/fs/fat/namei_vfat.c
+++ b/fs/fat/namei_vfat.c
@@ -766,11 +766,11 @@ static struct dentry *vfat_lookup(struct inode *dir, struct dentry *dentry,
 
 out:
 	unlock_super(sb);
-	dentry->d_op = sb->s_root->d_op;
+	d_set_d_op(dentry, sb->s_root->d_op);
 	dentry->d_time = dentry->d_parent->d_inode->i_version;
 	dentry = d_splice_alias(inode, dentry);
 	if (dentry) {
-		dentry->d_op = sb->s_root->d_op;
+		d_set_d_op(dentry, sb->s_root->d_op);
 		dentry->d_time = dentry->d_parent->d_inode->i_version;
 	}
 	return dentry;
@@ -1072,9 +1072,9 @@ static int vfat_fill_super(struct super_block *sb, void *data, int silent)
 	}
 
 	if (MSDOS_SB(sb)->options.name_check != 's')
-		sb->s_root->d_op = &vfat_ci_dentry_ops;
+		d_set_d_op(sb->s_root, &vfat_ci_dentry_ops);
 	else
-		sb->s_root->d_op = &vfat_dentry_ops;
+		d_set_d_op(sb->s_root, &vfat_dentry_ops);
 
 	unlock_super(sb);
 	return 0;
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index c9627c95482d..c9a8a426a395 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -347,7 +347,7 @@ static struct dentry *fuse_lookup(struct inode *dir, struct dentry *entry,
 	}
 
 	entry = newent ? newent : entry;
-	entry->d_op = &fuse_dentry_operations;
+	d_set_d_op(entry, &fuse_dentry_operations);
 	if (outarg_valid)
 		fuse_change_entry_timeout(entry, &outarg);
 	else
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index 44e0a6c57e87..a8b31da19b93 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -626,7 +626,7 @@ static struct dentry *fuse_get_dentry(struct super_block *sb,
 
 	entry = d_obtain_alias(inode);
 	if (!IS_ERR(entry) && get_node_id(inode) != FUSE_ROOT_ID) {
-		entry->d_op = &fuse_dentry_operations;
+		d_set_d_op(entry, &fuse_dentry_operations);
 		fuse_invalidate_entry_cache(entry);
 	}
 
@@ -728,7 +728,7 @@ static struct dentry *fuse_get_parent(struct dentry *child)
 
 	parent = d_obtain_alias(inode);
 	if (!IS_ERR(parent) && get_node_id(inode) != FUSE_ROOT_ID) {
-		parent->d_op = &fuse_dentry_operations;
+		d_set_d_op(parent, &fuse_dentry_operations);
 		fuse_invalidate_entry_cache(parent);
 	}
 
diff --git a/fs/gfs2/export.c b/fs/gfs2/export.c
index 5ab3839dfcb9..97012ecff560 100644
--- a/fs/gfs2/export.c
+++ b/fs/gfs2/export.c
@@ -130,7 +130,7 @@ static struct dentry *gfs2_get_parent(struct dentry *child)
 
 	dentry = d_obtain_alias(gfs2_lookupi(child->d_inode, &gfs2_qdotdot, 1));
 	if (!IS_ERR(dentry))
-		dentry->d_op = &gfs2_dops;
+		d_set_d_op(dentry, &gfs2_dops);
 	return dentry;
 }
 
@@ -158,7 +158,7 @@ static struct dentry *gfs2_get_dentry(struct super_block *sb,
 out_inode:
 	dentry = d_obtain_alias(inode);
 	if (!IS_ERR(dentry))
-		dentry->d_op = &gfs2_dops;
+		d_set_d_op(dentry, &gfs2_dops);
 	return dentry;
 }
 
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
index 3eb1393f7b81..2aeabd4218cc 100644
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@@ -440,7 +440,7 @@ static int gfs2_lookup_root(struct super_block *sb, struct dentry **dptr,
 		iput(inode);
 		return -ENOMEM;
 	}
-	dentry->d_op = &gfs2_dops;
+	d_set_d_op(dentry, &gfs2_dops);
 	*dptr = dentry;
 	return 0;
 }
diff --git a/fs/gfs2/ops_inode.c b/fs/gfs2/ops_inode.c
index 12cbea7502c2..f28f89796f4d 100644
--- a/fs/gfs2/ops_inode.c
+++ b/fs/gfs2/ops_inode.c
@@ -106,7 +106,7 @@ static struct dentry *gfs2_lookup(struct inode *dir, struct dentry *dentry,
 {
 	struct inode *inode = NULL;
 
-	dentry->d_op = &gfs2_dops;
+	d_set_d_op(dentry, &gfs2_dops);
 
 	inode = gfs2_lookupi(dir, &dentry->d_name, 0);
 	if (inode && IS_ERR(inode))
diff --git a/fs/hfs/dir.c b/fs/hfs/dir.c
index 2b3b8611b41b..ea4aefe7c652 100644
--- a/fs/hfs/dir.c
+++ b/fs/hfs/dir.c
@@ -25,7 +25,7 @@ static struct dentry *hfs_lookup(struct inode *dir, struct dentry *dentry,
 	struct inode *inode = NULL;
 	int res;
 
-	dentry->d_op = &hfs_dentry_operations;
+	d_set_d_op(dentry, &hfs_dentry_operations);
 
 	hfs_find_init(HFS_SB(dir->i_sb)->cat_tree, &fd);
 	hfs_cat_build_key(dir->i_sb, fd.search_key, dir->i_ino, &dentry->d_name);
diff --git a/fs/hfs/super.c b/fs/hfs/super.c
index ef4ee5716abe..0bef62aa4f42 100644
--- a/fs/hfs/super.c
+++ b/fs/hfs/super.c
@@ -434,7 +434,7 @@ static int hfs_fill_super(struct super_block *sb, void *data, int silent)
 	if (!sb->s_root)
 		goto bail_iput;
 
-	sb->s_root->d_op = &hfs_dentry_operations;
+	d_set_d_op(sb->s_root, &hfs_dentry_operations);
 
 	/* everything's okay */
 	return 0;
diff --git a/fs/hfsplus/dir.c b/fs/hfsplus/dir.c
index 9d59c0571f59..ccab87145f7a 100644
--- a/fs/hfsplus/dir.c
+++ b/fs/hfsplus/dir.c
@@ -37,7 +37,7 @@ static struct dentry *hfsplus_lookup(struct inode *dir, struct dentry *dentry,
 
 	sb = dir->i_sb;
 
-	dentry->d_op = &hfsplus_dentry_operations;
+	d_set_d_op(dentry, &hfsplus_dentry_operations);
 	dentry->d_fsdata = NULL;
 	hfs_find_init(HFSPLUS_SB(sb)->cat_tree, &fd);
 	hfsplus_cat_build_key(sb, fd.search_key, dir->i_ino, &dentry->d_name);
diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c
index 182e83a9079e..ddf712e4700e 100644
--- a/fs/hfsplus/super.c
+++ b/fs/hfsplus/super.c
@@ -419,7 +419,7 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent)
 		err = -ENOMEM;
 		goto cleanup;
 	}
-	sb->s_root->d_op = &hfsplus_dentry_operations;
+	d_set_d_op(sb->s_root, &hfsplus_dentry_operations);
 
 	str.len = sizeof(HFSP_HIDDENDIR_NAME) - 1;
 	str.name = HFSP_HIDDENDIR_NAME;
diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c
index 861113fcfc88..0bc81cf256b8 100644
--- a/fs/hostfs/hostfs_kern.c
+++ b/fs/hostfs/hostfs_kern.c
@@ -612,7 +612,7 @@ struct dentry *hostfs_lookup(struct inode *ino, struct dentry *dentry,
 		goto out_put;
 
 	d_add(dentry, inode);
-	dentry->d_op = &hostfs_dentry_ops;
+	d_set_d_op(dentry, &hostfs_dentry_ops);
 	return NULL;
 
  out_put:
diff --git a/fs/hpfs/dentry.c b/fs/hpfs/dentry.c
index 35526df1fd32..32c13a94e1e9 100644
--- a/fs/hpfs/dentry.c
+++ b/fs/hpfs/dentry.c
@@ -65,5 +65,5 @@ static const struct dentry_operations hpfs_dentry_operations = {
 
 void hpfs_set_dentry_operations(struct dentry *dentry)
 {
-	dentry->d_op = &hpfs_dentry_operations;
+	d_set_d_op(dentry, &hpfs_dentry_operations);
 }
diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c
index d8f3a652243d..844a7903c72f 100644
--- a/fs/isofs/inode.c
+++ b/fs/isofs/inode.c
@@ -949,7 +949,7 @@ root_found:
 		table += 2;
 	if (opt.check == 'r')
 		table++;
-	s->s_root->d_op = &isofs_dentry_ops[table];
+	d_set_d_op(s->s_root, &isofs_dentry_ops[table]);
 
 	kfree(opt.iocharset);
 
diff --git a/fs/isofs/namei.c b/fs/isofs/namei.c
index 715f7d318046..679a849c3b27 100644
--- a/fs/isofs/namei.c
+++ b/fs/isofs/namei.c
@@ -172,7 +172,7 @@ struct dentry *isofs_lookup(struct inode *dir, struct dentry *dentry, struct nam
 	struct inode *inode;
 	struct page *page;
 
-	dentry->d_op = dir->i_sb->s_root->d_op;
+	d_set_d_op(dentry, dir->i_sb->s_root->d_op);
 
 	page = alloc_page(GFP_USER);
 	if (!page)
diff --git a/fs/jfs/namei.c b/fs/jfs/namei.c
index 57f90dad8919..a151cbdec626 100644
--- a/fs/jfs/namei.c
+++ b/fs/jfs/namei.c
@@ -1466,7 +1466,7 @@ static struct dentry *jfs_lookup(struct inode *dip, struct dentry *dentry, struc
 	jfs_info("jfs_lookup: name = %s", name);
 
 	if (JFS_SBI(dip->i_sb)->mntflag & JFS_OS2)
-		dentry->d_op = &jfs_ci_dentry_operations;
+		d_set_d_op(dentry, &jfs_ci_dentry_operations);
 
 	if ((name[0] == '.') && (len == 1))
 		inum = dip->i_ino;
@@ -1495,7 +1495,7 @@ static struct dentry *jfs_lookup(struct inode *dip, struct dentry *dentry, struc
 	dentry = d_splice_alias(ip, dentry);
 
 	if (dentry && (JFS_SBI(dip->i_sb)->mntflag & JFS_OS2))
-		dentry->d_op = &jfs_ci_dentry_operations;
+		d_set_d_op(dentry, &jfs_ci_dentry_operations);
 
 	return dentry;
 }
diff --git a/fs/jfs/super.c b/fs/jfs/super.c
index b715b0f7bdfd..3150d766e0d4 100644
--- a/fs/jfs/super.c
+++ b/fs/jfs/super.c
@@ -525,7 +525,7 @@ static int jfs_fill_super(struct super_block *sb, void *data, int silent)
 		goto out_no_root;
 
 	if (sbi->mntflag & JFS_OS2)
-		sb->s_root->d_op = &jfs_ci_dentry_operations;
+		d_set_d_op(sb->s_root, &jfs_ci_dentry_operations);
 
 	/* logical blocks are represented by 40 bits in pxd_t, etc. */
 	sb->s_maxbytes = ((u64) sb->s_blocksize) << 40;
diff --git a/fs/libfs.c b/fs/libfs.c
index 28b36663c44e..889311e3d06b 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -59,7 +59,7 @@ struct dentry *simple_lookup(struct inode *dir, struct dentry *dentry, struct na
 
 	if (dentry->d_name.len > NAME_MAX)
 		return ERR_PTR(-ENAMETOOLONG);
-	dentry->d_op = &simple_dentry_operations;
+	d_set_d_op(dentry, &simple_dentry_operations);
 	d_add(dentry, NULL);
 	return NULL;
 }
diff --git a/fs/minix/namei.c b/fs/minix/namei.c
index c0d35a3accef..1b9e07728a9f 100644
--- a/fs/minix/namei.c
+++ b/fs/minix/namei.c
@@ -23,7 +23,7 @@ static struct dentry *minix_lookup(struct inode * dir, struct dentry *dentry, st
 	struct inode * inode = NULL;
 	ino_t ino;
 
-	dentry->d_op = dir->i_sb->s_root->d_op;
+	d_set_d_op(dentry, dir->i_sb->s_root->d_op);
 
 	if (dentry->d_name.len > minix_sb(dir->i_sb)->s_namelen)
 		return ERR_PTR(-ENAMETOOLONG);
diff --git a/fs/namei.c b/fs/namei.c
index c731b50a6184..90bd2873e117 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -587,6 +587,17 @@ do_revalidate(struct dentry *dentry, struct nameidata *nd)
 	return dentry;
 }
 
+static inline int need_reval_dot(struct dentry *dentry)
+{
+	if (likely(!(dentry->d_flags & DCACHE_OP_REVALIDATE)))
+		return 0;
+
+	if (likely(!(dentry->d_sb->s_type->fs_flags & FS_REVAL_DOT)))
+		return 0;
+
+	return 1;
+}
+
 /*
  * force_reval_path - force revalidation of a dentry
  *
@@ -610,10 +621,9 @@ force_reval_path(struct path *path, struct nameidata *nd)
 
 	/*
 	 * only check on filesystems where it's possible for the dentry to
-	 * become stale. It's assumed that if this flag is set then the
-	 * d_revalidate op will also be defined.
+	 * become stale.
 	 */
-	if (!(dentry->d_sb->s_type->fs_flags & FS_REVAL_DOT))
+	if (!need_reval_dot(dentry))
 		return 0;
 
 	status = dentry->d_op->d_revalidate(dentry, nd);
@@ -1003,7 +1013,7 @@ static int do_lookup(struct nameidata *nd, struct qstr *name,
 	 * See if the low-level filesystem might want
 	 * to use its own hash..
 	 */
-	if (parent->d_op && parent->d_op->d_hash) {
+	if (unlikely(parent->d_flags & DCACHE_OP_HASH)) {
 		int err = parent->d_op->d_hash(parent, nd->inode, name);
 		if (err < 0)
 			return err;
@@ -1029,7 +1039,7 @@ static int do_lookup(struct nameidata *nd, struct qstr *name,
 			return -ECHILD;
 
 		nd->seq = seq;
-		if (dentry->d_op && dentry->d_op->d_revalidate) {
+		if (dentry->d_flags & DCACHE_OP_REVALIDATE) {
 			/* We commonly drop rcu-walk here */
 			if (nameidata_dentry_drop_rcu(nd, dentry))
 				return -ECHILD;
@@ -1043,7 +1053,7 @@ static int do_lookup(struct nameidata *nd, struct qstr *name,
 		if (!dentry)
 			goto need_lookup;
 found:
-		if (dentry->d_op && dentry->d_op->d_revalidate)
+		if (dentry->d_flags & DCACHE_OP_REVALIDATE)
 			goto need_revalidate;
 done:
 		path->mnt = mnt;
@@ -1281,8 +1291,7 @@ return_reval:
 		 * We bypassed the ordinary revalidation routines.
 		 * We may need to check the cached dentry for staleness.
 		 */
-		if (nd->path.dentry && nd->path.dentry->d_sb &&
-		    (nd->path.dentry->d_sb->s_type->fs_flags & FS_REVAL_DOT)) {
+		if (need_reval_dot(nd->path.dentry)) {
 			if (nameidata_drop_rcu_maybe(nd))
 				return -ECHILD;
 			err = -ESTALE;
@@ -1602,7 +1611,7 @@ static struct dentry *__lookup_hash(struct qstr *name,
 	 * See if the low-level filesystem might want
 	 * to use its own hash..
 	 */
-	if (base->d_op && base->d_op->d_hash) {
+	if (base->d_flags & DCACHE_OP_HASH) {
 		err = base->d_op->d_hash(base, inode, name);
 		dentry = ERR_PTR(err);
 		if (err < 0)
@@ -1616,7 +1625,7 @@ static struct dentry *__lookup_hash(struct qstr *name,
 	 */
 	dentry = d_lookup(base, name);
 
-	if (dentry && dentry->d_op && dentry->d_op->d_revalidate)
+	if (dentry && (dentry->d_flags & DCACHE_OP_REVALIDATE))
 		dentry = do_revalidate(dentry, nd);
 
 	if (!dentry)
@@ -2070,7 +2079,7 @@ static struct file *do_last(struct nameidata *nd, struct path *path,
 		follow_dotdot(nd);
 		dir = nd->path.dentry;
 	case LAST_DOT:
-		if (nd->path.mnt->mnt_sb->s_type->fs_flags & FS_REVAL_DOT) {
+		if (need_reval_dot(dir)) {
 			if (!dir->d_op->d_revalidate(dir, nd)) {
 				error = -ESTALE;
 				goto exit;
diff --git a/fs/ncpfs/dir.c b/fs/ncpfs/dir.c
index 0ba3cdc95a44..4b9cbb28d7fa 100644
--- a/fs/ncpfs/dir.c
+++ b/fs/ncpfs/dir.c
@@ -633,7 +633,7 @@ ncp_fill_cache(struct file *filp, void *dirent, filldir_t filldir,
 		entry->ino = iunique(dir->i_sb, 2);
 		inode = ncp_iget(dir->i_sb, entry);
 		if (inode) {
-			newdent->d_op = &ncp_dentry_operations;
+			d_set_d_op(newdent, &ncp_dentry_operations);
 			d_instantiate(newdent, inode);
 			if (!hashed)
 				d_rehash(newdent);
@@ -889,7 +889,7 @@ static struct dentry *ncp_lookup(struct inode *dir, struct dentry *dentry, struc
 	if (inode) {
 		ncp_new_dentry(dentry);
 add_entry:
-		dentry->d_op = &ncp_dentry_operations;
+		d_set_d_op(dentry, &ncp_dentry_operations);
 		d_add(dentry, inode);
 		error = 0;
 	}
diff --git a/fs/ncpfs/inode.c b/fs/ncpfs/inode.c
index 60047dbeb38d..0c75a5f3cafd 100644
--- a/fs/ncpfs/inode.c
+++ b/fs/ncpfs/inode.c
@@ -717,7 +717,7 @@ static int ncp_fill_super(struct super_block *sb, void *raw_data, int silent)
 	sb->s_root = d_alloc_root(root_inode);
         if (!sb->s_root)
 		goto out_no_root;
-	sb->s_root->d_op = &ncp_root_dentry_operations;
+	d_set_d_op(sb->s_root, &ncp_root_dentry_operations);
 	return 0;
 
 out_no_root:
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index eb77471b8823..37e0a8bb077e 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -438,7 +438,7 @@ void nfs_prime_dcache(struct dentry *parent, struct nfs_entry *entry)
 	if (dentry == NULL)
 		return;
 
-	dentry->d_op = NFS_PROTO(dir)->dentry_ops;
+	d_set_d_op(dentry, NFS_PROTO(dir)->dentry_ops);
 	inode = nfs_fhget(dentry->d_sb, entry->fh, entry->fattr);
 	if (IS_ERR(inode))
 		goto out;
@@ -1188,7 +1188,7 @@ static struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, stru
 	if (dentry->d_name.len > NFS_SERVER(dir)->namelen)
 		goto out;
 
-	dentry->d_op = NFS_PROTO(dir)->dentry_ops;
+	d_set_d_op(dentry, NFS_PROTO(dir)->dentry_ops);
 
 	/*
 	 * If we're doing an exclusive create, optimize away the lookup
@@ -1333,7 +1333,7 @@ static struct dentry *nfs_atomic_lookup(struct inode *dir, struct dentry *dentry
 		res = ERR_PTR(-ENAMETOOLONG);
 		goto out;
 	}
-	dentry->d_op = NFS_PROTO(dir)->dentry_ops;
+	d_set_d_op(dentry, NFS_PROTO(dir)->dentry_ops);
 
 	/* Let vfs_create() deal with O_EXCL. Instantiate, but don't hash
 	 * the dentry. */
diff --git a/fs/nfs/getroot.c b/fs/nfs/getroot.c
index b3e36c3430de..c3a5a1126833 100644
--- a/fs/nfs/getroot.c
+++ b/fs/nfs/getroot.c
@@ -121,7 +121,7 @@ struct dentry *nfs_get_root(struct super_block *sb, struct nfs_fh *mntfh)
 	security_d_instantiate(ret, inode);
 
 	if (ret->d_op == NULL)
-		ret->d_op = server->nfs_client->rpc_ops->dentry_ops;
+		d_set_d_op(ret, server->nfs_client->rpc_ops->dentry_ops);
 out:
 	nfs_free_fattr(fsinfo.fattr);
 	return ret;
@@ -228,7 +228,7 @@ struct dentry *nfs4_get_root(struct super_block *sb, struct nfs_fh *mntfh)
 	security_d_instantiate(ret, inode);
 
 	if (ret->d_op == NULL)
-		ret->d_op = server->nfs_client->rpc_ops->dentry_ops;
+		d_set_d_op(ret, server->nfs_client->rpc_ops->dentry_ops);
 
 out:
 	nfs_free_fattr(fattr);
diff --git a/fs/ocfs2/export.c b/fs/ocfs2/export.c
index 19ad145d2af3..6adafa576065 100644
--- a/fs/ocfs2/export.c
+++ b/fs/ocfs2/export.c
@@ -138,7 +138,7 @@ check_gen:
 
 	result = d_obtain_alias(inode);
 	if (!IS_ERR(result))
-		result->d_op = &ocfs2_dentry_ops;
+		d_set_d_op(result, &ocfs2_dentry_ops);
 	else
 		mlog_errno(PTR_ERR(result));
 
@@ -176,7 +176,7 @@ static struct dentry *ocfs2_get_parent(struct dentry *child)
 
 	parent = d_obtain_alias(ocfs2_iget(OCFS2_SB(dir->i_sb), blkno, 0, 0));
 	if (!IS_ERR(parent))
-		parent->d_op = &ocfs2_dentry_ops;
+		d_set_d_op(parent, &ocfs2_dentry_ops);
 
 bail_unlock:
 	ocfs2_inode_unlock(dir, 0);
diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c
index ff5744e1e36f..d14cad6e2e41 100644
--- a/fs/ocfs2/namei.c
+++ b/fs/ocfs2/namei.c
@@ -147,7 +147,7 @@ static struct dentry *ocfs2_lookup(struct inode *dir, struct dentry *dentry,
 	spin_unlock(&oi->ip_lock);
 
 bail_add:
-	dentry->d_op = &ocfs2_dentry_ops;
+	d_set_d_op(dentry, &ocfs2_dentry_ops);
 	ret = d_splice_alias(inode, dentry);
 
 	if (inode) {
@@ -415,7 +415,7 @@ static int ocfs2_mknod(struct inode *dir,
 		mlog_errno(status);
 		goto leave;
 	}
-	dentry->d_op = &ocfs2_dentry_ops;
+	d_set_d_op(dentry, &ocfs2_dentry_ops);
 
 	status = ocfs2_add_entry(handle, dentry, inode,
 				 OCFS2_I(inode)->ip_blkno, parent_fe_bh,
@@ -743,7 +743,7 @@ static int ocfs2_link(struct dentry *old_dentry,
 	}
 
 	ihold(inode);
-	dentry->d_op = &ocfs2_dentry_ops;
+	d_set_d_op(dentry, &ocfs2_dentry_ops);
 	d_instantiate(dentry, inode);
 
 out_commit:
@@ -1794,7 +1794,7 @@ static int ocfs2_symlink(struct inode *dir,
 		mlog_errno(status);
 		goto bail;
 	}
-	dentry->d_op = &ocfs2_dentry_ops;
+	d_set_d_op(dentry, &ocfs2_dentry_ops);
 
 	status = ocfs2_add_entry(handle, dentry, inode,
 				 le64_to_cpu(fe->i_blkno), parent_fe_bh,
@@ -2459,7 +2459,7 @@ int ocfs2_mv_orphaned_inode_to_new(struct inode *dir,
 		goto out_commit;
 	}
 
-	dentry->d_op = &ocfs2_dentry_ops;
+	d_set_d_op(dentry, &ocfs2_dentry_ops);
 	d_instantiate(dentry, inode);
 	status = 0;
 out_commit:
diff --git a/fs/pipe.c b/fs/pipe.c
index ae3592dcc88c..01a786567810 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -1004,7 +1004,7 @@ struct file *create_write_pipe(int flags)
 		goto err_inode;
 	path.mnt = mntget(pipe_mnt);
 
-	path.dentry->d_op = &pipefs_dentry_operations;
+	d_set_d_op(path.dentry, &pipefs_dentry_operations);
 	d_instantiate(path.dentry, inode);
 
 	err = -ENFILE;
diff --git a/fs/proc/base.c b/fs/proc/base.c
index d932fdb6a245..85f0a80912aa 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -1969,7 +1969,7 @@ static struct dentry *proc_fd_instantiate(struct inode *dir,
 	inode->i_op = &proc_pid_link_inode_operations;
 	inode->i_size = 64;
 	ei->op.proc_get_link = proc_fd_link;
-	dentry->d_op = &tid_fd_dentry_operations;
+	d_set_d_op(dentry, &tid_fd_dentry_operations);
 	d_add(dentry, inode);
 	/* Close the race of the process dying before we return the dentry */
 	if (tid_fd_revalidate(dentry, NULL))
@@ -2137,7 +2137,7 @@ static struct dentry *proc_fdinfo_instantiate(struct inode *dir,
 	ei->fd = fd;
 	inode->i_mode = S_IFREG | S_IRUSR;
 	inode->i_fop = &proc_fdinfo_file_operations;
-	dentry->d_op = &tid_fd_dentry_operations;
+	d_set_d_op(dentry, &tid_fd_dentry_operations);
 	d_add(dentry, inode);
 	/* Close the race of the process dying before we return the dentry */
 	if (tid_fd_revalidate(dentry, NULL))
@@ -2196,7 +2196,7 @@ static struct dentry *proc_pident_instantiate(struct inode *dir,
 	if (p->fop)
 		inode->i_fop = p->fop;
 	ei->op = p->op;
-	dentry->d_op = &pid_dentry_operations;
+	d_set_d_op(dentry, &pid_dentry_operations);
 	d_add(dentry, inode);
 	/* Close the race of the process dying before we return the dentry */
 	if (pid_revalidate(dentry, NULL))
@@ -2615,7 +2615,7 @@ static struct dentry *proc_base_instantiate(struct inode *dir,
 	if (p->fop)
 		inode->i_fop = p->fop;
 	ei->op = p->op;
-	dentry->d_op = &proc_base_dentry_operations;
+	d_set_d_op(dentry, &proc_base_dentry_operations);
 	d_add(dentry, inode);
 	error = NULL;
 out:
@@ -2926,7 +2926,7 @@ static struct dentry *proc_pid_instantiate(struct inode *dir,
 	inode->i_nlink = 2 + pid_entry_count_dirs(tgid_base_stuff,
 		ARRAY_SIZE(tgid_base_stuff));
 
-	dentry->d_op = &pid_dentry_operations;
+	d_set_d_op(dentry, &pid_dentry_operations);
 
 	d_add(dentry, inode);
 	/* Close the race of the process dying before we return the dentry */
@@ -3169,7 +3169,7 @@ static struct dentry *proc_task_instantiate(struct inode *dir,
 	inode->i_nlink = 2 + pid_entry_count_dirs(tid_base_stuff,
 		ARRAY_SIZE(tid_base_stuff));
 
-	dentry->d_op = &pid_dentry_operations;
+	d_set_d_op(dentry, &pid_dentry_operations);
 
 	d_add(dentry, inode);
 	/* Close the race of the process dying before we return the dentry */
diff --git a/fs/proc/generic.c b/fs/proc/generic.c
index 1d607be36d95..f766be29d2c7 100644
--- a/fs/proc/generic.c
+++ b/fs/proc/generic.c
@@ -439,7 +439,7 @@ struct dentry *proc_lookup_de(struct proc_dir_entry *de, struct inode *dir,
 out_unlock:
 
 	if (inode) {
-		dentry->d_op = &proc_dentry_operations;
+		d_set_d_op(dentry, &proc_dentry_operations);
 		d_add(dentry, inode);
 		return NULL;
 	}
diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c
index 998e3a715bcc..35efd85a4d32 100644
--- a/fs/proc/proc_sysctl.c
+++ b/fs/proc/proc_sysctl.c
@@ -120,7 +120,7 @@ static struct dentry *proc_sys_lookup(struct inode *dir, struct dentry *dentry,
 		goto out;
 
 	err = NULL;
-	dentry->d_op = &proc_sys_dentry_operations;
+	d_set_d_op(dentry, &proc_sys_dentry_operations);
 	d_add(dentry, inode);
 
 out:
@@ -201,7 +201,7 @@ static int proc_sys_fill_cache(struct file *filp, void *dirent,
 				dput(child);
 				return -ENOMEM;
 			} else {
-				child->d_op = &proc_sys_dentry_operations;
+				d_set_d_op(child, &proc_sys_dentry_operations);
 				d_add(child, inode);
 			}
 		} else {
diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c
index 5d04a7828e7a..e0f0d7ea10a1 100644
--- a/fs/reiserfs/xattr.c
+++ b/fs/reiserfs/xattr.c
@@ -990,7 +990,7 @@ int reiserfs_lookup_privroot(struct super_block *s)
 				strlen(PRIVROOT_NAME));
 	if (!IS_ERR(dentry)) {
 		REISERFS_SB(s)->priv_root = dentry;
-		dentry->d_op = &xattr_lookup_poison_ops;
+		d_set_d_op(dentry, &xattr_lookup_poison_ops);
 		if (dentry->d_inode)
 			dentry->d_inode->i_flags |= S_PRIVATE;
 	} else
diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c
index 27e1102e303e..3e076caa8daf 100644
--- a/fs/sysfs/dir.c
+++ b/fs/sysfs/dir.c
@@ -701,7 +701,7 @@ static struct dentry * sysfs_lookup(struct inode *dir, struct dentry *dentry,
 	/* instantiate and hash dentry */
 	ret = d_find_alias(inode);
 	if (!ret) {
-		dentry->d_op = &sysfs_dentry_ops;
+		d_set_d_op(dentry, &sysfs_dentry_ops);
 		dentry->d_fsdata = sysfs_get(sd);
 		d_add(dentry, inode);
 	} else {
diff --git a/fs/sysv/namei.c b/fs/sysv/namei.c
index 7507aeb4c90e..b5e68da2db32 100644
--- a/fs/sysv/namei.c
+++ b/fs/sysv/namei.c
@@ -48,7 +48,7 @@ static struct dentry *sysv_lookup(struct inode * dir, struct dentry * dentry, st
 	struct inode * inode = NULL;
 	ino_t ino;
 
-	dentry->d_op = dir->i_sb->s_root->d_op;
+	d_set_d_op(dentry, dir->i_sb->s_root->d_op);
 	if (dentry->d_name.len > SYSV_NAMELEN)
 		return ERR_PTR(-ENAMETOOLONG);
 	ino = sysv_inode_by_name(dentry);
diff --git a/fs/sysv/super.c b/fs/sysv/super.c
index 3d9c62be0c10..76712aefc4ab 100644
--- a/fs/sysv/super.c
+++ b/fs/sysv/super.c
@@ -346,7 +346,7 @@ static int complete_read_super(struct super_block *sb, int silent, int size)
 	if (sbi->s_forced_ro)
 		sb->s_flags |= MS_RDONLY;
 	if (sbi->s_truncate)
-		sb->s_root->d_op = &sysv_dentry_operations;
+		d_set_d_op(sb->s_root, &sysv_dentry_operations);
 	return 1;
 }
 
diff --git a/include/linux/dcache.h b/include/linux/dcache.h
index e4414693065e..f4b40a751f09 100644
--- a/include/linux/dcache.h
+++ b/include/linux/dcache.h
@@ -183,6 +183,11 @@ struct dentry_operations {
 #define DCACHE_GENOCIDE		0x0200
 #define DCACHE_MOUNTED		0x0400	/* is a mountpoint */
 
+#define DCACHE_OP_HASH		0x1000
+#define DCACHE_OP_COMPARE	0x2000
+#define DCACHE_OP_REVALIDATE	0x4000
+#define DCACHE_OP_DELETE	0x8000
+
 
 extern spinlock_t dcache_inode_lock;
 extern seqlock_t rename_lock;
@@ -201,6 +206,7 @@ extern struct dentry * d_materialise_unique(struct dentry *, struct inode *);
 extern void __d_drop(struct dentry *dentry);
 extern void d_drop(struct dentry *dentry);
 extern void d_delete(struct dentry *);
+extern void d_set_d_op(struct dentry *dentry, const struct dentry_operations *op);
 
 /* allocate/de-allocate */
 extern struct dentry * d_alloc(struct dentry *, const struct qstr *);
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 9f41470c3949..51cddc11cd85 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -2222,7 +2222,7 @@ static struct dentry *cgroup_lookup(struct inode *dir,
 
 	if (dentry->d_name.len > NAME_MAX)
 		return ERR_PTR(-ENAMETOOLONG);
-	dentry->d_op = &cgroup_dentry_operations;
+	d_set_d_op(dentry, &cgroup_dentry_operations);
 	d_add(dentry, NULL);
 	return NULL;
 }
diff --git a/net/socket.c b/net/socket.c
index 817dc92e9ef8..991e266bc7ae 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -368,7 +368,7 @@ static int sock_alloc_file(struct socket *sock, struct file **f, int flags)
 	}
 	path.mnt = mntget(sock_mnt);
 
-	path.dentry->d_op = &sockfs_dentry_operations;
+	d_set_d_op(path.dentry, &sockfs_dentry_operations);
 	d_instantiate(path.dentry, SOCK_INODE(sock));
 	SOCK_INODE(sock)->i_fop = &socket_file_ops;
 
diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c
index 2899fe27f880..09f01f41e55a 100644
--- a/net/sunrpc/rpc_pipe.c
+++ b/net/sunrpc/rpc_pipe.c
@@ -591,7 +591,7 @@ static struct dentry *__rpc_lookup_create(struct dentry *parent,
 		}
 	}
 	if (!dentry->d_inode)
-		dentry->d_op = &rpc_dentry_operations;
+		d_set_d_op(dentry, &rpc_dentry_operations);
 out_err:
 	return dentry;
 }
-- 
cgit v1.2.3-71-gd317


From 44a7d7a878c9cbb74f236ea755b25b6b2e26a9a9 Mon Sep 17 00:00:00 2001
From: Nick Piggin <npiggin@kernel.dk>
Date: Fri, 7 Jan 2011 17:49:56 +1100
Subject: fs: cache optimise dentry and inode for rcu-walk

Put dentry and inode fields into top of data structure.  This allows RCU path
traversal to perform an RCU dentry lookup in a path walk by touching only the
first 56 bytes of the dentry.

We also fit in 8 bytes of inline name in the first 64 bytes, so for short
names, only 64 bytes needs to be touched to perform the lookup. We should
get rid of the hash->prev pointer from the first 64 bytes, and fit 16 bytes
of name in there, which will take care of 81% rather than 32% of the kernel
tree.

inode is also rearranged so that RCU lookup will only touch a single cacheline
in the inode, plus one in the i_ops structure.

This is important for directory component lookups in RCU path walking. In the
kernel source, directory names average is around 6 chars, so this works.

When we reach the last element of the lookup, we need to lock it and take its
refcount which requires another cacheline access.

Align dentry and inode operations structs, so members will be at predictable
offsets and we can group common operations into head of structure.

Signed-off-by: Nick Piggin <npiggin@kernel.dk>
---
 fs/dcache.c            |  2 --
 include/linux/dcache.h | 36 +++++++++++++++++++-----------------
 include/linux/fs.h     | 42 +++++++++++++++++++++++-------------------
 3 files changed, 42 insertions(+), 38 deletions(-)

(limited to 'include/linux')

diff --git a/fs/dcache.c b/fs/dcache.c
index f9693da3efbd..07d1f6862dc7 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -83,8 +83,6 @@ EXPORT_SYMBOL(dcache_inode_lock);
 
 static struct kmem_cache *dentry_cache __read_mostly;
 
-#define DNAME_INLINE_LEN (sizeof(struct dentry)-offsetof(struct dentry,d_iname))
-
 /*
  * This is the single most critical data structure when it comes
  * to the dcache: the hashtable for lookups. Somebody should try
diff --git a/include/linux/dcache.h b/include/linux/dcache.h
index f4b40a751f09..b1aeda077258 100644
--- a/include/linux/dcache.h
+++ b/include/linux/dcache.h
@@ -82,25 +82,33 @@ full_name_hash(const unsigned char *name, unsigned int len)
  * large memory footprint increase).
  */
 #ifdef CONFIG_64BIT
-#define DNAME_INLINE_LEN_MIN 32 /* 192 bytes */
+# define DNAME_INLINE_LEN 32 /* 192 bytes */
 #else
-#define DNAME_INLINE_LEN_MIN 40 /* 128 bytes */
+# ifdef CONFIG_SMP
+#  define DNAME_INLINE_LEN 36 /* 128 bytes */
+# else
+#  define DNAME_INLINE_LEN 40 /* 128 bytes */
+# endif
 #endif
 
 struct dentry {
-	unsigned int d_count;		/* protected by d_lock */
+	/* RCU lookup touched fields */
 	unsigned int d_flags;		/* protected by d_lock */
-	spinlock_t d_lock;		/* per dentry lock */
 	seqcount_t d_seq;		/* per dentry seqlock */
-	struct inode *d_inode;		/* Where the name belongs to - NULL is
-					 * negative */
-	/*
-	 * The next three fields are touched by __d_lookup.  Place them here
-	 * so they all fit in a cache line.
-	 */
 	struct hlist_node d_hash;	/* lookup hash list */
 	struct dentry *d_parent;	/* parent directory */
 	struct qstr d_name;
+	struct inode *d_inode;		/* Where the name belongs to - NULL is
+					 * negative */
+	unsigned char d_iname[DNAME_INLINE_LEN];	/* small names */
+
+	/* Ref lookup also touches following */
+	unsigned int d_count;		/* protected by d_lock */
+	spinlock_t d_lock;		/* per dentry lock */
+	const struct dentry_operations *d_op;
+	struct super_block *d_sb;	/* The root of the dentry tree */
+	unsigned long d_time;		/* used by d_revalidate */
+	void *d_fsdata;			/* fs-specific data */
 
 	struct list_head d_lru;		/* LRU list */
 	/*
@@ -112,12 +120,6 @@ struct dentry {
 	} d_u;
 	struct list_head d_subdirs;	/* our children */
 	struct list_head d_alias;	/* inode alias list */
-	unsigned long d_time;		/* used by d_revalidate */
-	const struct dentry_operations *d_op;
-	struct super_block *d_sb;	/* The root of the dentry tree */
-	void *d_fsdata;			/* fs-specific data */
-
-	unsigned char d_iname[DNAME_INLINE_LEN_MIN];	/* small names */
 };
 
 /*
@@ -143,7 +145,7 @@ struct dentry_operations {
 	void (*d_release)(struct dentry *);
 	void (*d_iput)(struct dentry *, struct inode *);
 	char *(*d_dname)(struct dentry *, char *, int);
-};
+} ____cacheline_aligned;
 
 /*
  * Locking rules for dentry_operations callbacks are to be found in
diff --git a/include/linux/fs.h b/include/linux/fs.h
index ea202fff44f8..a04aa96acb71 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -733,6 +733,20 @@ struct posix_acl;
 #define ACL_NOT_CACHED ((void *)(-1))
 
 struct inode {
+	/* RCU path lookup touches following: */
+	umode_t			i_mode;
+	uid_t			i_uid;
+	gid_t			i_gid;
+	const struct inode_operations	*i_op;
+	struct super_block	*i_sb;
+
+	spinlock_t		i_lock;	/* i_blocks, i_bytes, maybe i_size */
+	unsigned int		i_flags;
+	struct mutex		i_mutex;
+
+	unsigned long		i_state;
+	unsigned long		dirtied_when;	/* jiffies of first dirtying */
+
 	struct hlist_node	i_hash;
 	struct list_head	i_wb_list;	/* backing dev IO list */
 	struct list_head	i_lru;		/* inode LRU list */
@@ -744,8 +758,6 @@ struct inode {
 	unsigned long		i_ino;
 	atomic_t		i_count;
 	unsigned int		i_nlink;
-	uid_t			i_uid;
-	gid_t			i_gid;
 	dev_t			i_rdev;
 	unsigned int		i_blkbits;
 	u64			i_version;
@@ -758,13 +770,8 @@ struct inode {
 	struct timespec		i_ctime;
 	blkcnt_t		i_blocks;
 	unsigned short          i_bytes;
-	umode_t			i_mode;
-	spinlock_t		i_lock;	/* i_blocks, i_bytes, maybe i_size */
-	struct mutex		i_mutex;
 	struct rw_semaphore	i_alloc_sem;
-	const struct inode_operations	*i_op;
 	const struct file_operations	*i_fop;	/* former ->i_op->default_file_ops */
-	struct super_block	*i_sb;
 	struct file_lock	*i_flock;
 	struct address_space	*i_mapping;
 	struct address_space	i_data;
@@ -785,11 +792,6 @@ struct inode {
 	struct hlist_head	i_fsnotify_marks;
 #endif
 
-	unsigned long		i_state;
-	unsigned long		dirtied_when;	/* jiffies of first dirtying */
-
-	unsigned int		i_flags;
-
 #ifdef CONFIG_IMA
 	/* protected by i_lock */
 	unsigned int		i_readcount; /* struct files open RO */
@@ -1549,8 +1551,15 @@ struct file_operations {
 };
 
 struct inode_operations {
-	int (*create) (struct inode *,struct dentry *,int, struct nameidata *);
 	struct dentry * (*lookup) (struct inode *,struct dentry *, struct nameidata *);
+	void * (*follow_link) (struct dentry *, struct nameidata *);
+	int (*permission) (struct inode *, int);
+	int (*check_acl)(struct inode *, int);
+
+	int (*readlink) (struct dentry *, char __user *,int);
+	void (*put_link) (struct dentry *, struct nameidata *, void *);
+
+	int (*create) (struct inode *,struct dentry *,int, struct nameidata *);
 	int (*link) (struct dentry *,struct inode *,struct dentry *);
 	int (*unlink) (struct inode *,struct dentry *);
 	int (*symlink) (struct inode *,struct dentry *,const char *);
@@ -1559,12 +1568,7 @@ struct inode_operations {
 	int (*mknod) (struct inode *,struct dentry *,int,dev_t);
 	int (*rename) (struct inode *, struct dentry *,
 			struct inode *, struct dentry *);
-	int (*readlink) (struct dentry *, char __user *,int);
-	void * (*follow_link) (struct dentry *, struct nameidata *);
-	void (*put_link) (struct dentry *, struct nameidata *, void *);
 	void (*truncate) (struct inode *);
-	int (*permission) (struct inode *, int);
-	int (*check_acl)(struct inode *, int);
 	int (*setattr) (struct dentry *, struct iattr *);
 	int (*getattr) (struct vfsmount *mnt, struct dentry *, struct kstat *);
 	int (*setxattr) (struct dentry *, const char *,const void *,size_t,int);
@@ -1576,7 +1580,7 @@ struct inode_operations {
 			  loff_t len);
 	int (*fiemap)(struct inode *, struct fiemap_extent_info *, u64 start,
 		      u64 len);
-};
+} ____cacheline_aligned;
 
 struct seq_file;
 
-- 
cgit v1.2.3-71-gd317


From 34286d6662308d82aed891852d04c7c3a2649b16 Mon Sep 17 00:00:00 2001
From: Nick Piggin <npiggin@kernel.dk>
Date: Fri, 7 Jan 2011 17:49:57 +1100
Subject: fs: rcu-walk aware d_revalidate method

Require filesystems be aware of .d_revalidate being called in rcu-walk
mode (nd->flags & LOOKUP_RCU). For now do a simple push down, returning
-ECHILD from all implementations.

Signed-off-by: Nick Piggin <npiggin@kernel.dk>
---
 Documentation/filesystems/Locking         | 18 +++++------
 Documentation/filesystems/path-lookup.txt |  5 ++-
 Documentation/filesystems/porting         | 20 ++++++++++++
 Documentation/filesystems/vfs.txt         |  9 ++++++
 drivers/staging/autofs/root.c             |  5 ++-
 drivers/staging/smbfs/dir.c               | 16 ++++++---
 fs/afs/dir.c                              |  4 +++
 fs/autofs4/root.c                         | 13 ++++++--
 fs/ceph/dir.c                             |  7 +++-
 fs/cifs/dir.c                             |  3 ++
 fs/coda/dir.c                             |  7 +++-
 fs/ecryptfs/dentry.c                      |  9 ++++--
 fs/fat/namei_vfat.c                       |  6 ++++
 fs/fuse/dir.c                             |  6 +++-
 fs/gfs2/dentry.c                          | 17 +++++++---
 fs/hfs/sysdep.c                           |  7 +++-
 fs/jfs/namei.c                            |  2 ++
 fs/namei.c                                | 54 ++++++++++++++++++++-----------
 fs/ncpfs/dir.c                            |  4 +++
 fs/ncpfs/inode.c                          |  1 +
 fs/nfs/dir.c                              |  8 +++--
 fs/ocfs2/dcache.c                         | 10 ++++--
 fs/proc/base.c                            | 33 +++++++++++++++----
 fs/proc/proc_sysctl.c                     |  3 ++
 fs/reiserfs/xattr.c                       |  2 ++
 fs/sysfs/dir.c                            |  6 +++-
 include/linux/dcache.h                    |  1 -
 27 files changed, 215 insertions(+), 61 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking
index bdad6414dfa0..e90ffe61eb65 100644
--- a/Documentation/filesystems/Locking
+++ b/Documentation/filesystems/Locking
@@ -9,7 +9,7 @@ be able to use diff(1).
 
 --------------------------- dentry_operations --------------------------
 prototypes:
-	int (*d_revalidate)(struct dentry *, int);
+	int (*d_revalidate)(struct dentry *, struct nameidata *);
 	int (*d_hash)(const struct dentry *, const struct inode *,
 			struct qstr *);
 	int (*d_compare)(const struct dentry *, const struct inode *,
@@ -21,14 +21,14 @@ prototypes:
 	char *(*d_dname)((struct dentry *dentry, char *buffer, int buflen);
 
 locking rules:
-		rename_lock	->d_lock	may block
-d_revalidate:	no		no		yes
-d_hash		no		no		no
-d_compare:	yes		no		no
-d_delete:	no		yes		no
-d_release:	no		no		yes
-d_iput:		no		no		yes
-d_dname:	no		no		no
+		rename_lock	->d_lock	may block	rcu-walk
+d_revalidate:	no		no		yes (ref-walk)	maybe
+d_hash		no		no		no		maybe
+d_compare:	yes		no		no		maybe
+d_delete:	no		yes		no		no
+d_release:	no		no		yes		no
+d_iput:		no		no		yes		no
+d_dname:	no		no		no		no
 
 --------------------------- inode_operations --------------------------- 
 prototypes:
diff --git a/Documentation/filesystems/path-lookup.txt b/Documentation/filesystems/path-lookup.txt
index 09b2878724a1..8789d1810bed 100644
--- a/Documentation/filesystems/path-lookup.txt
+++ b/Documentation/filesystems/path-lookup.txt
@@ -317,11 +317,10 @@ The detailed design for rcu-walk is like this:
 The cases where rcu-walk cannot continue are:
 * NULL dentry (ie. any uncached path element)
 * parent with d_inode->i_op->permission or ACLs
-* dentries with d_revalidate
 * Following links
 
-In future patches, permission checks and d_revalidate become rcu-walk aware. It
-may be possible eventually to make following links rcu-walk aware.
+In future patches, permission checks become rcu-walk aware. It may be possible
+eventually to make following links rcu-walk aware.
 
 Uncached path elements will always require dropping to ref-walk mode, at the
 very least because i_mutex needs to be grabbed, and objects allocated.
diff --git a/Documentation/filesystems/porting b/Documentation/filesystems/porting
index ccf0ce7866b9..cd9756a2709d 100644
--- a/Documentation/filesystems/porting
+++ b/Documentation/filesystems/porting
@@ -360,3 +360,23 @@ i_dentry to be reinitialized before it is freed, so an:
   INIT_LIST_HEAD(&inode->i_dentry);
 
 must be done in the RCU callback.
+
+--
+[recommended]
+	vfs now tries to do path walking in "rcu-walk mode", which avoids
+atomic operations and scalability hazards on dentries and inodes (see
+Documentation/filesystems/path-walk.txt). d_hash and d_compare changes (above)
+are examples of the changes required to support this. For more complex
+filesystem callbacks, the vfs drops out of rcu-walk mode before the fs call, so
+no changes are required to the filesystem. However, this is costly and loses
+the benefits of rcu-walk mode. We will begin to add filesystem callbacks that
+are rcu-walk aware, shown below. Filesystems should take advantage of this
+where possible.
+
+--
+[mandatory]
+	d_revalidate is a callback that is made on every path element (if
+the filesystem provides it), which requires dropping out of rcu-walk mode. This
+may now be called in rcu-walk mode (nd->flags & LOOKUP_RCU). -ECHILD should be
+returned if the filesystem cannot handle rcu-walk. See
+Documentation/filesystems/vfs.txt for more details.
diff --git a/Documentation/filesystems/vfs.txt b/Documentation/filesystems/vfs.txt
index 69b10ff5ec81..c936b4912383 100644
--- a/Documentation/filesystems/vfs.txt
+++ b/Documentation/filesystems/vfs.txt
@@ -863,6 +863,15 @@ struct dentry_operations {
 	dcache. Most filesystems leave this as NULL, because all their
 	dentries in the dcache are valid
 
+	d_revalidate may be called in rcu-walk mode (nd->flags & LOOKUP_RCU).
+	If in rcu-walk mode, the filesystem must revalidate the dentry without
+	blocking or storing to the dentry, d_parent and d_inode should not be
+	used without care (because they can go NULL), instead nd->inode should
+	be used.
+
+	If a situation is encountered that rcu-walk cannot handle, return
+	-ECHILD and it will be called again in ref-walk mode.
+
   d_hash: called when the VFS adds a dentry to the hash table. The first
 	dentry passed to d_hash is the parent directory that the name is
 	to be hashed into. The inode is the dentry's inode.
diff --git a/drivers/staging/autofs/root.c b/drivers/staging/autofs/root.c
index b09adb57971f..bf0e9755da67 100644
--- a/drivers/staging/autofs/root.c
+++ b/drivers/staging/autofs/root.c
@@ -154,13 +154,16 @@ static int try_to_fill_dentry(struct dentry *dentry, struct super_block *sb, str
  * yet completely filled in, and revalidate has to delay such
  * lookups..
  */
-static int autofs_revalidate(struct dentry * dentry, struct nameidata *nd)
+static int autofs_revalidate(struct dentry *dentry, struct nameidata *nd)
 {
 	struct inode * dir;
 	struct autofs_sb_info *sbi;
 	struct autofs_dir_ent *ent;
 	int res;
 
+	if (nd->flags & LOOKUP_RCU)
+		return -ECHILD;
+
 	lock_kernel();
 	dir = dentry->d_parent->d_inode;
 	sbi = autofs_sbi(dir->i_sb);
diff --git a/drivers/staging/smbfs/dir.c b/drivers/staging/smbfs/dir.c
index 78f09412740c..dd612f50749f 100644
--- a/drivers/staging/smbfs/dir.c
+++ b/drivers/staging/smbfs/dir.c
@@ -14,6 +14,7 @@
 #include <linux/ctype.h>
 #include <linux/net.h>
 #include <linux/sched.h>
+#include <linux/namei.h>
 
 #include "smb_fs.h"
 #include "smb_mount.h"
@@ -301,13 +302,20 @@ static const struct dentry_operations smbfs_dentry_operations_case =
  * This is the callback when the dcache has a lookup hit.
  */
 static int
-smb_lookup_validate(struct dentry * dentry, struct nameidata *nd)
+smb_lookup_validate(struct dentry *dentry, struct nameidata *nd)
 {
-	struct smb_sb_info *server = server_from_dentry(dentry);
-	struct inode * inode = dentry->d_inode;
-	unsigned long age = jiffies - dentry->d_time;
+	struct smb_sb_info *server;
+	struct inode *inode;
+	unsigned long age;
 	int valid;
 
+	if (nd->flags & LOOKUP_RCU)
+		return -ECHILD;
+
+	server = server_from_dentry(dentry);
+	inode = dentry->d_inode;
+	age = jiffies - dentry->d_time;
+
 	/*
 	 * The default validation is based on dentry age:
 	 * we believe in dentries for a few seconds.  (But each
diff --git a/fs/afs/dir.c b/fs/afs/dir.c
index b8bb7e7148d0..34a3263d60a4 100644
--- a/fs/afs/dir.c
+++ b/fs/afs/dir.c
@@ -13,6 +13,7 @@
 #include <linux/module.h>
 #include <linux/init.h>
 #include <linux/fs.h>
+#include <linux/namei.h>
 #include <linux/pagemap.h>
 #include <linux/ctype.h>
 #include <linux/sched.h>
@@ -607,6 +608,9 @@ static int afs_d_revalidate(struct dentry *dentry, struct nameidata *nd)
 	void *dir_version;
 	int ret;
 
+	if (nd->flags & LOOKUP_RCU)
+		return -ECHILD;
+
 	vnode = AFS_FS_I(dentry->d_inode);
 
 	if (dentry->d_inode)
diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c
index bfe3f2eb684d..651e4ef563b1 100644
--- a/fs/autofs4/root.c
+++ b/fs/autofs4/root.c
@@ -315,12 +315,19 @@ out_error:
  */
 static int autofs4_revalidate(struct dentry *dentry, struct nameidata *nd)
 {
-	struct inode *dir = dentry->d_parent->d_inode;
-	struct autofs_sb_info *sbi = autofs4_sbi(dir->i_sb);
-	int oz_mode = autofs4_oz_mode(sbi);
+	struct inode *dir;
+	struct autofs_sb_info *sbi;
+	int oz_mode;
 	int flags = nd ? nd->flags : 0;
 	int status = 1;
 
+	if (flags & LOOKUP_RCU)
+		return -ECHILD;
+
+	dir = dentry->d_parent->d_inode;
+	sbi = autofs4_sbi(dir->i_sb);
+	oz_mode = autofs4_oz_mode(sbi);
+
 	/* Pending dentry */
 	spin_lock(&sbi->fs_lock);
 	if (autofs4_ispending(dentry)) {
diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
index cc01cf826769..fa7ca04ee816 100644
--- a/fs/ceph/dir.c
+++ b/fs/ceph/dir.c
@@ -990,7 +990,12 @@ static int dir_lease_is_valid(struct inode *dir, struct dentry *dentry)
  */
 static int ceph_d_revalidate(struct dentry *dentry, struct nameidata *nd)
 {
-	struct inode *dir = dentry->d_parent->d_inode;
+	struct inode *dir;
+
+	if (nd->flags & LOOKUP_RCU)
+		return -ECHILD;
+
+	dir = dentry->d_parent->d_inode;
 
 	dout("d_revalidate %p '%.*s' inode %p offset %lld\n", dentry,
 	     dentry->d_name.len, dentry->d_name.name, dentry->d_inode,
diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c
index e3b10ca6d453..db2a58c00f7b 100644
--- a/fs/cifs/dir.c
+++ b/fs/cifs/dir.c
@@ -656,6 +656,9 @@ lookup_out:
 static int
 cifs_d_revalidate(struct dentry *direntry, struct nameidata *nd)
 {
+	if (nd->flags & LOOKUP_RCU)
+		return -ECHILD;
+
 	if (direntry->d_inode) {
 		if (cifs_revalidate_dentry(direntry))
 			return 0;
diff --git a/fs/coda/dir.c b/fs/coda/dir.c
index aa40c811f8d2..619a8303766e 100644
--- a/fs/coda/dir.c
+++ b/fs/coda/dir.c
@@ -18,6 +18,7 @@
 #include <linux/errno.h>
 #include <linux/string.h>
 #include <linux/spinlock.h>
+#include <linux/namei.h>
 
 #include <asm/uaccess.h>
 
@@ -541,9 +542,13 @@ out:
 /* called when a cache lookup succeeds */
 static int coda_dentry_revalidate(struct dentry *de, struct nameidata *nd)
 {
-	struct inode *inode = de->d_inode;
+	struct inode *inode;
 	struct coda_inode_info *cii;
 
+	if (nd->flags & LOOKUP_RCU)
+		return -ECHILD;
+
+	inode = de->d_inode;
 	if (!inode || coda_isroot(inode))
 		goto out;
 	if (is_bad_inode(inode))
diff --git a/fs/ecryptfs/dentry.c b/fs/ecryptfs/dentry.c
index 906e803f7f79..6fc4f319b550 100644
--- a/fs/ecryptfs/dentry.c
+++ b/fs/ecryptfs/dentry.c
@@ -44,12 +44,17 @@
  */
 static int ecryptfs_d_revalidate(struct dentry *dentry, struct nameidata *nd)
 {
-	struct dentry *lower_dentry = ecryptfs_dentry_to_lower(dentry);
-	struct vfsmount *lower_mnt = ecryptfs_dentry_to_lower_mnt(dentry);
+	struct dentry *lower_dentry;
+	struct vfsmount *lower_mnt;
 	struct dentry *dentry_save;
 	struct vfsmount *vfsmount_save;
 	int rc = 1;
 
+	if (nd->flags & LOOKUP_RCU)
+		return -ECHILD;
+
+	lower_dentry = ecryptfs_dentry_to_lower(dentry);
+	lower_mnt = ecryptfs_dentry_to_lower_mnt(dentry);
 	if (!lower_dentry->d_op || !lower_dentry->d_op->d_revalidate)
 		goto out;
 	dentry_save = nd->path.dentry;
diff --git a/fs/fat/namei_vfat.c b/fs/fat/namei_vfat.c
index 3be5ed7d859f..e3ffc5e12332 100644
--- a/fs/fat/namei_vfat.c
+++ b/fs/fat/namei_vfat.c
@@ -43,6 +43,9 @@ static int vfat_revalidate_shortname(struct dentry *dentry)
 
 static int vfat_revalidate(struct dentry *dentry, struct nameidata *nd)
 {
+	if (nd->flags & LOOKUP_RCU)
+		return -ECHILD;
+
 	/* This is not negative dentry. Always valid. */
 	if (dentry->d_inode)
 		return 1;
@@ -51,6 +54,9 @@ static int vfat_revalidate(struct dentry *dentry, struct nameidata *nd)
 
 static int vfat_revalidate_ci(struct dentry *dentry, struct nameidata *nd)
 {
+	if (nd->flags & LOOKUP_RCU)
+		return -ECHILD;
+
 	/*
 	 * This is not negative dentry. Always valid.
 	 *
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index c9a8a426a395..07f4b5e675fc 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -156,8 +156,12 @@ u64 fuse_get_attr_version(struct fuse_conn *fc)
  */
 static int fuse_dentry_revalidate(struct dentry *entry, struct nameidata *nd)
 {
-	struct inode *inode = entry->d_inode;
+	struct inode *inode;
+
+	if (nd->flags & LOOKUP_RCU)
+		return -ECHILD;
 
+	inode = entry->d_inode;
 	if (inode && is_bad_inode(inode))
 		return 0;
 	else if (fuse_dentry_time(entry) < get_jiffies_64()) {
diff --git a/fs/gfs2/dentry.c b/fs/gfs2/dentry.c
index 50497f65763b..4a456338b873 100644
--- a/fs/gfs2/dentry.c
+++ b/fs/gfs2/dentry.c
@@ -11,6 +11,7 @@
 #include <linux/completion.h>
 #include <linux/buffer_head.h>
 #include <linux/gfs2_ondisk.h>
+#include <linux/namei.h>
 #include <linux/crc32.h>
 
 #include "gfs2.h"
@@ -34,15 +35,23 @@
 
 static int gfs2_drevalidate(struct dentry *dentry, struct nameidata *nd)
 {
-	struct dentry *parent = dget_parent(dentry);
-	struct gfs2_sbd *sdp = GFS2_SB(parent->d_inode);
-	struct gfs2_inode *dip = GFS2_I(parent->d_inode);
-	struct inode *inode = dentry->d_inode;
+	struct dentry *parent;
+	struct gfs2_sbd *sdp;
+	struct gfs2_inode *dip;
+	struct inode *inode;
 	struct gfs2_holder d_gh;
 	struct gfs2_inode *ip = NULL;
 	int error;
 	int had_lock = 0;
 
+	if (nd->flags & LOOKUP_RCU)
+		return -ECHILD;
+
+	parent = dget_parent(dentry);
+	sdp = GFS2_SB(parent->d_inode);
+	dip = GFS2_I(parent->d_inode);
+	inode = dentry->d_inode;
+
 	if (inode) {
 		if (is_bad_inode(inode))
 			goto invalid;
diff --git a/fs/hfs/sysdep.c b/fs/hfs/sysdep.c
index 7478f5c219aa..19cf291eb91f 100644
--- a/fs/hfs/sysdep.c
+++ b/fs/hfs/sysdep.c
@@ -8,15 +8,20 @@
  * This file contains the code to do various system dependent things.
  */
 
+#include <linux/namei.h>
 #include "hfs_fs.h"
 
 /* dentry case-handling: just lowercase everything */
 
 static int hfs_revalidate_dentry(struct dentry *dentry, struct nameidata *nd)
 {
-	struct inode *inode = dentry->d_inode;
+	struct inode *inode;
 	int diff;
 
+	if (nd->flags & LOOKUP_RCU)
+		return -ECHILD;
+
+	inode = dentry->d_inode;
 	if(!inode)
 		return 1;
 
diff --git a/fs/jfs/namei.c b/fs/jfs/namei.c
index a151cbdec626..4414e3a42264 100644
--- a/fs/jfs/namei.c
+++ b/fs/jfs/namei.c
@@ -1608,6 +1608,8 @@ out:
 
 static int jfs_ci_revalidate(struct dentry *dentry, struct nameidata *nd)
 {
+	if (nd->flags & LOOKUP_RCU)
+		return -ECHILD;
 	/*
 	 * This is not negative dentry. Always valid.
 	 *
diff --git a/fs/namei.c b/fs/namei.c
index 90bd2873e117..6e275363e89d 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -563,10 +563,26 @@ void release_open_intent(struct nameidata *nd)
 		fput(nd->intent.open.file);
 }
 
+static int d_revalidate(struct dentry *dentry, struct nameidata *nd)
+{
+	int status;
+
+	status = dentry->d_op->d_revalidate(dentry, nd);
+	if (status == -ECHILD) {
+		if (nameidata_dentry_drop_rcu(nd, dentry))
+			return status;
+		status = dentry->d_op->d_revalidate(dentry, nd);
+	}
+
+	return status;
+}
+
 static inline struct dentry *
 do_revalidate(struct dentry *dentry, struct nameidata *nd)
 {
-	int status = dentry->d_op->d_revalidate(dentry, nd);
+	int status;
+
+	status = d_revalidate(dentry, nd);
 	if (unlikely(status <= 0)) {
 		/*
 		 * The dentry failed validation.
@@ -574,14 +590,20 @@ do_revalidate(struct dentry *dentry, struct nameidata *nd)
 		 * the dentry otherwise d_revalidate is asking us
 		 * to return a fail status.
 		 */
-		if (!status) {
+		if (status < 0) {
+			/* If we're in rcu-walk, we don't have a ref */
+			if (!(nd->flags & LOOKUP_RCU))
+				dput(dentry);
+			dentry = ERR_PTR(status);
+
+		} else {
+			/* Don't d_invalidate in rcu-walk mode */
+			if (nameidata_dentry_drop_rcu_maybe(nd, dentry))
+				return ERR_PTR(-ECHILD);
 			if (!d_invalidate(dentry)) {
 				dput(dentry);
 				dentry = NULL;
 			}
-		} else {
-			dput(dentry);
-			dentry = ERR_PTR(status);
 		}
 	}
 	return dentry;
@@ -626,7 +648,7 @@ force_reval_path(struct path *path, struct nameidata *nd)
 	if (!need_reval_dot(dentry))
 		return 0;
 
-	status = dentry->d_op->d_revalidate(dentry, nd);
+	status = d_revalidate(dentry, nd);
 	if (status > 0)
 		return 0;
 
@@ -1039,12 +1061,8 @@ static int do_lookup(struct nameidata *nd, struct qstr *name,
 			return -ECHILD;
 
 		nd->seq = seq;
-		if (dentry->d_flags & DCACHE_OP_REVALIDATE) {
-			/* We commonly drop rcu-walk here */
-			if (nameidata_dentry_drop_rcu(nd, dentry))
-				return -ECHILD;
+		if (dentry->d_flags & DCACHE_OP_REVALIDATE)
 			goto need_revalidate;
-		}
 		path->mnt = mnt;
 		path->dentry = dentry;
 		__follow_mount_rcu(nd, path, inode);
@@ -1292,12 +1310,11 @@ return_reval:
 		 * We may need to check the cached dentry for staleness.
 		 */
 		if (need_reval_dot(nd->path.dentry)) {
-			if (nameidata_drop_rcu_maybe(nd))
-				return -ECHILD;
-			err = -ESTALE;
 			/* Note: we do not d_invalidate() */
-			if (!nd->path.dentry->d_op->d_revalidate(
-					nd->path.dentry, nd))
+			err = d_revalidate(nd->path.dentry, nd);
+			if (!err)
+				err = -ESTALE;
+			if (err < 0)
 				break;
 		}
 return_base:
@@ -2080,10 +2097,11 @@ static struct file *do_last(struct nameidata *nd, struct path *path,
 		dir = nd->path.dentry;
 	case LAST_DOT:
 		if (need_reval_dot(dir)) {
-			if (!dir->d_op->d_revalidate(dir, nd)) {
+			error = d_revalidate(nd->path.dentry, nd);
+			if (!error)
 				error = -ESTALE;
+			if (error < 0)
 				goto exit;
-			}
 		}
 		/* fallthrough */
 	case LAST_ROOT:
diff --git a/fs/ncpfs/dir.c b/fs/ncpfs/dir.c
index 4b9cbb28d7fa..28f136d4aaec 100644
--- a/fs/ncpfs/dir.c
+++ b/fs/ncpfs/dir.c
@@ -17,6 +17,7 @@
 #include <linux/kernel.h>
 #include <linux/vmalloc.h>
 #include <linux/mm.h>
+#include <linux/namei.h>
 #include <asm/uaccess.h>
 #include <asm/byteorder.h>
 
@@ -308,6 +309,9 @@ ncp_lookup_validate(struct dentry *dentry, struct nameidata *nd)
 	int res, val = 0, len;
 	__u8 __name[NCP_MAXPATHLEN + 1];
 
+	if (nd->flags & LOOKUP_RCU)
+		return -ECHILD;
+
 	parent = dget_parent(dentry);
 	dir = parent->d_inode;
 
diff --git a/fs/ncpfs/inode.c b/fs/ncpfs/inode.c
index 0c75a5f3cafd..9531c052d7a4 100644
--- a/fs/ncpfs/inode.c
+++ b/fs/ncpfs/inode.c
@@ -29,6 +29,7 @@
 #include <linux/vfs.h>
 #include <linux/mount.h>
 #include <linux/seq_file.h>
+#include <linux/namei.h>
 
 #include <linux/ncp_fs.h>
 
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 37e0a8bb077e..58beace14b19 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -938,7 +938,8 @@ static int nfs_check_verifier(struct inode *dir, struct dentry *dentry)
  * component of the path.
  * We check for this using LOOKUP_CONTINUE and LOOKUP_PARENT.
  */
-static inline unsigned int nfs_lookup_check_intent(struct nameidata *nd, unsigned int mask)
+static inline unsigned int nfs_lookup_check_intent(struct nameidata *nd,
+						unsigned int mask)
 {
 	if (nd->flags & (LOOKUP_CONTINUE|LOOKUP_PARENT))
 		return 0;
@@ -1018,7 +1019,7 @@ int nfs_neg_need_reval(struct inode *dir, struct dentry *dentry,
  * If the parent directory is seen to have changed, we throw out the
  * cached dentry and do a new lookup.
  */
-static int nfs_lookup_revalidate(struct dentry * dentry, struct nameidata *nd)
+static int nfs_lookup_revalidate(struct dentry *dentry, struct nameidata *nd)
 {
 	struct inode *dir;
 	struct inode *inode;
@@ -1027,6 +1028,9 @@ static int nfs_lookup_revalidate(struct dentry * dentry, struct nameidata *nd)
 	struct nfs_fattr *fattr = NULL;
 	int error;
 
+	if (nd->flags & LOOKUP_RCU)
+		return -ECHILD;
+
 	parent = dget_parent(dentry);
 	dir = parent->d_inode;
 	nfs_inc_stats(dir, NFSIOS_DENTRYREVALIDATE);
diff --git a/fs/ocfs2/dcache.c b/fs/ocfs2/dcache.c
index 4d54c60ceee4..0310b16a7238 100644
--- a/fs/ocfs2/dcache.c
+++ b/fs/ocfs2/dcache.c
@@ -52,9 +52,15 @@ void ocfs2_dentry_attach_gen(struct dentry *dentry)
 static int ocfs2_dentry_revalidate(struct dentry *dentry,
 				   struct nameidata *nd)
 {
-	struct inode *inode = dentry->d_inode;
+	struct inode *inode;
 	int ret = 0;    /* if all else fails, just return false */
-	struct ocfs2_super *osb = OCFS2_SB(dentry->d_sb);
+	struct ocfs2_super *osb;
+
+	if (nd->flags & LOOKUP_RCU)
+		return -ECHILD;
+
+	inode = dentry->d_inode;
+	osb = OCFS2_SB(dentry->d_sb);
 
 	mlog_entry("(0x%p, '%.*s')\n", dentry,
 		   dentry->d_name.len, dentry->d_name.name);
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 85f0a80912aa..dc5b2fcadc3b 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -1719,10 +1719,16 @@ static int pid_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat
  */
 static int pid_revalidate(struct dentry *dentry, struct nameidata *nd)
 {
-	struct inode *inode = dentry->d_inode;
-	struct task_struct *task = get_proc_task(inode);
+	struct inode *inode;
+	struct task_struct *task;
 	const struct cred *cred;
 
+	if (nd && nd->flags & LOOKUP_RCU)
+		return -ECHILD;
+
+	inode = dentry->d_inode;
+	task = get_proc_task(inode);
+
 	if (task) {
 		if ((inode->i_mode == (S_IFDIR|S_IRUGO|S_IXUGO)) ||
 		    task_dumpable(task)) {
@@ -1888,12 +1894,19 @@ static int proc_fd_link(struct inode *inode, struct path *path)
 
 static int tid_fd_revalidate(struct dentry *dentry, struct nameidata *nd)
 {
-	struct inode *inode = dentry->d_inode;
-	struct task_struct *task = get_proc_task(inode);
-	int fd = proc_fd(inode);
+	struct inode *inode;
+	struct task_struct *task;
+	int fd;
 	struct files_struct *files;
 	const struct cred *cred;
 
+	if (nd && nd->flags & LOOKUP_RCU)
+		return -ECHILD;
+
+	inode = dentry->d_inode;
+	task = get_proc_task(inode);
+	fd = proc_fd(inode);
+
 	if (task) {
 		files = get_files_struct(task);
 		if (files) {
@@ -2563,8 +2576,14 @@ static const struct pid_entry proc_base_stuff[] = {
  */
 static int proc_base_revalidate(struct dentry *dentry, struct nameidata *nd)
 {
-	struct inode *inode = dentry->d_inode;
-	struct task_struct *task = get_proc_task(inode);
+	struct inode *inode;
+	struct task_struct *task;
+
+	if (nd->flags & LOOKUP_RCU)
+		return -ECHILD;
+
+	inode = dentry->d_inode;
+	task = get_proc_task(inode);
 	if (task) {
 		put_task_struct(task);
 		return 1;
diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c
index 35efd85a4d32..c9097f43b425 100644
--- a/fs/proc/proc_sysctl.c
+++ b/fs/proc/proc_sysctl.c
@@ -5,6 +5,7 @@
 #include <linux/sysctl.h>
 #include <linux/proc_fs.h>
 #include <linux/security.h>
+#include <linux/namei.h>
 #include "internal.h"
 
 static const struct dentry_operations proc_sys_dentry_operations;
@@ -389,6 +390,8 @@ static const struct inode_operations proc_sys_dir_operations = {
 
 static int proc_sys_revalidate(struct dentry *dentry, struct nameidata *nd)
 {
+	if (nd->flags & LOOKUP_RCU)
+		return -ECHILD;
 	return !PROC_I(dentry->d_inode)->sysctl->unregistering;
 }
 
diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c
index e0f0d7ea10a1..9ea22a56cdf1 100644
--- a/fs/reiserfs/xattr.c
+++ b/fs/reiserfs/xattr.c
@@ -972,6 +972,8 @@ int reiserfs_permission(struct inode *inode, int mask)
 
 static int xattr_hide_revalidate(struct dentry *dentry, struct nameidata *nd)
 {
+	if (nd->flags & LOOKUP_RCU)
+		return -ECHILD;
 	return -EPERM;
 }
 
diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c
index 3e076caa8daf..ea9120a830d8 100644
--- a/fs/sysfs/dir.c
+++ b/fs/sysfs/dir.c
@@ -239,9 +239,13 @@ static int sysfs_dentry_delete(const struct dentry *dentry)
 
 static int sysfs_dentry_revalidate(struct dentry *dentry, struct nameidata *nd)
 {
-	struct sysfs_dirent *sd = dentry->d_fsdata;
+	struct sysfs_dirent *sd;
 	int is_dir;
 
+	if (nd->flags & LOOKUP_RCU)
+		return -ECHILD;
+
+	sd = dentry->d_fsdata;
 	mutex_lock(&sysfs_mutex);
 
 	/* The sysfs dirent has been deleted */
diff --git a/include/linux/dcache.h b/include/linux/dcache.h
index b1aeda077258..8b2064d02928 100644
--- a/include/linux/dcache.h
+++ b/include/linux/dcache.h
@@ -190,7 +190,6 @@ struct dentry_operations {
 #define DCACHE_OP_REVALIDATE	0x4000
 #define DCACHE_OP_DELETE	0x8000
 
-
 extern spinlock_t dcache_inode_lock;
 extern seqlock_t rename_lock;
 
-- 
cgit v1.2.3-71-gd317


From b74c79e99389cd79b31fcc08f82c24e492e63c7e Mon Sep 17 00:00:00 2001
From: Nick Piggin <npiggin@kernel.dk>
Date: Fri, 7 Jan 2011 17:49:58 +1100
Subject: fs: provide rcu-walk aware permission i_ops

Signed-off-by: Nick Piggin <npiggin@kernel.dk>
---
 Documentation/filesystems/Locking         |  6 +--
 Documentation/filesystems/path-lookup.txt | 44 ++++++++++++++++--
 Documentation/filesystems/porting         |  5 +++
 Documentation/filesystems/vfs.txt         | 10 ++++-
 drivers/staging/smbfs/file.c              |  5 ++-
 fs/9p/acl.c                               |  5 ++-
 fs/9p/acl.h                               |  2 +-
 fs/afs/internal.h                         |  2 +-
 fs/afs/security.c                         |  7 ++-
 fs/bad_inode.c                            |  5 ++-
 fs/btrfs/acl.c                            |  6 ++-
 fs/btrfs/ctree.h                          |  2 +-
 fs/btrfs/inode.c                          |  7 ++-
 fs/ceph/inode.c                           | 11 +++--
 fs/ceph/super.h                           |  2 +-
 fs/cifs/cifsfs.c                          |  7 ++-
 fs/coda/dir.c                             |  5 ++-
 fs/coda/pioctl.c                          |  6 ++-
 fs/ecryptfs/inode.c                       |  4 +-
 fs/ext2/acl.c                             |  8 +++-
 fs/ext2/acl.h                             |  2 +-
 fs/ext3/acl.c                             |  8 +++-
 fs/ext3/acl.h                             |  2 +-
 fs/ext4/acl.c                             |  8 +++-
 fs/ext4/acl.h                             |  2 +-
 fs/fuse/dir.c                             | 10 +++--
 fs/generic_acl.c                          |  8 +++-
 fs/gfs2/acl.c                             |  5 ++-
 fs/gfs2/acl.h                             |  2 +-
 fs/gfs2/file.c                            |  2 +-
 fs/gfs2/inode.c                           |  4 +-
 fs/gfs2/inode.h                           |  2 +-
 fs/gfs2/ops_inode.c                       | 18 +++++---
 fs/hostfs/hostfs_kern.c                   |  7 ++-
 fs/hpfs/namei.c                           |  2 +-
 fs/jffs2/acl.c                            |  5 ++-
 fs/jffs2/acl.h                            |  2 +-
 fs/jfs/acl.c                              |  8 +++-
 fs/jfs/jfs_acl.h                          |  2 +-
 fs/logfs/dir.c                            |  6 ++-
 fs/namei.c                                | 75 ++++++++++++-------------------
 fs/nfs/dir.c                              |  7 ++-
 fs/nilfs2/inode.c                         | 10 +++--
 fs/nilfs2/nilfs.h                         |  2 +-
 fs/ocfs2/acl.c                            |  8 +++-
 fs/ocfs2/acl.h                            |  2 +-
 fs/ocfs2/file.c                           |  7 ++-
 fs/ocfs2/file.h                           |  2 +-
 fs/proc/base.c                            |  6 ++-
 fs/proc/proc_sysctl.c                     |  5 ++-
 fs/reiserfs/xattr.c                       | 14 ++++--
 fs/sysfs/inode.c                          | 11 +++--
 fs/sysfs/sysfs.h                          |  2 +-
 fs/xfs/linux-2.6/xfs_acl.c                |  8 +++-
 fs/xfs/xfs_acl.h                          |  2 +-
 include/linux/coda_linux.h                |  2 +-
 include/linux/fs.h                        | 10 +++--
 include/linux/generic_acl.h               |  2 +-
 include/linux/nfs_fs.h                    |  2 +-
 include/linux/reiserfs_xattr.h            |  2 +-
 60 files changed, 287 insertions(+), 146 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking
index e90ffe61eb65..977d8919cc69 100644
--- a/Documentation/filesystems/Locking
+++ b/Documentation/filesystems/Locking
@@ -47,8 +47,8 @@ ata *);
 	void * (*follow_link) (struct dentry *, struct nameidata *);
 	void (*put_link) (struct dentry *, struct nameidata *, void *);
 	void (*truncate) (struct inode *);
-	int (*permission) (struct inode *, int, struct nameidata *);
-	int (*check_acl)(struct inode *, int);
+	int (*permission) (struct inode *, int, unsigned int);
+	int (*check_acl)(struct inode *, int, unsigned int);
 	int (*setattr) (struct dentry *, struct iattr *);
 	int (*getattr) (struct vfsmount *, struct dentry *, struct kstat *);
 	int (*setxattr) (struct dentry *, const char *,const void *,size_t,int);
@@ -76,7 +76,7 @@ follow_link:	no
 put_link:	no
 truncate:	yes		(see below)
 setattr:	yes
-permission:	no
+permission:	no (may not block if called in rcu-walk mode)
 check_acl:	no
 getattr:	no
 setxattr:	yes
diff --git a/Documentation/filesystems/path-lookup.txt b/Documentation/filesystems/path-lookup.txt
index 8789d1810bed..eb59c8b44be9 100644
--- a/Documentation/filesystems/path-lookup.txt
+++ b/Documentation/filesystems/path-lookup.txt
@@ -316,11 +316,9 @@ The detailed design for rcu-walk is like this:
 
 The cases where rcu-walk cannot continue are:
 * NULL dentry (ie. any uncached path element)
-* parent with d_inode->i_op->permission or ACLs
 * Following links
 
-In future patches, permission checks become rcu-walk aware. It may be possible
-eventually to make following links rcu-walk aware.
+It may be possible eventually to make following links rcu-walk aware.
 
 Uncached path elements will always require dropping to ref-walk mode, at the
 very least because i_mutex needs to be grabbed, and objects allocated.
@@ -336,9 +334,49 @@ or stored into. The result is massive improvements in performance and
 scalability of path resolution.
 
 
+Interesting statistics
+======================
+
+The following table gives rcu lookup statistics for a few simple workloads
+(2s12c24t Westmere, debian non-graphical system). Ungraceful are attempts to
+drop rcu that fail due to d_seq failure and requiring the entire path lookup
+again. Other cases are successful rcu-drops that are required before the final
+element, nodentry for missing dentry, revalidate for filesystem revalidate
+routine requiring rcu drop, permission for permission check requiring drop,
+and link for symlink traversal requiring drop.
+
+     rcu-lookups     restart  nodentry          link  revalidate  permission
+bootup     47121           0      4624          1010       10283        7852
+dbench  25386793           0   6778659(26.7%)     55         549        1156
+kbuild   2696672          10     64442(2.3%)  108764(4.0%)     1        1590
+git diff   39605           0        28             2           0         106
+vfstest 24185492        4945    708725(2.9%) 1076136(4.4%)     0        2651
+
+What this shows is that failed rcu-walk lookups, ie. ones that are restarted
+entirely with ref-walk, are quite rare. Even the "vfstest" case which
+specifically has concurrent renames/mkdir/rmdir/ creat/unlink/etc to excercise
+such races is not showing a huge amount of restarts.
+
+Dropping from rcu-walk to ref-walk mean that we have encountered a dentry where
+the reference count needs to be taken for some reason. This is either because
+we have reached the target of the path walk, or because we have encountered a
+condition that can't be resolved in rcu-walk mode.  Ideally, we drop rcu-walk
+only when we have reached the target dentry, so the other statistics show where
+this does not happen.
+
+Note that a graceful drop from rcu-walk mode due to something such as the
+dentry not existing (which can be common) is not necessarily a failure of
+rcu-walk scheme, because some elements of the path may have been walked in
+rcu-walk mode. The further we get from common path elements (such as cwd or
+root), the less contended the dentry is likely to be. The closer we are to
+common path elements, the more likely they will exist in dentry cache.
+
+
 Papers and other documentation on dcache locking
 ================================================
 
 1. Scaling dcache with RCU (http://linuxjournal.com/article.php?sid=7124).
 
 2. http://lse.sourceforge.net/locking/dcache/dcache.html
+
+
diff --git a/Documentation/filesystems/porting b/Documentation/filesystems/porting
index cd9756a2709d..07a32b42cf9c 100644
--- a/Documentation/filesystems/porting
+++ b/Documentation/filesystems/porting
@@ -379,4 +379,9 @@ where possible.
 the filesystem provides it), which requires dropping out of rcu-walk mode. This
 may now be called in rcu-walk mode (nd->flags & LOOKUP_RCU). -ECHILD should be
 returned if the filesystem cannot handle rcu-walk. See
+Documentation/filesystems/vfs.txt for more details.
+
+	permission and check_acl are inode permission checks that are called
+on many or all directory inodes on the way down a path walk (to check for
+exec permission). These must now be rcu-walk aware (flags & IPERM_RCU). See
 Documentation/filesystems/vfs.txt for more details.
diff --git a/Documentation/filesystems/vfs.txt b/Documentation/filesystems/vfs.txt
index c936b4912383..fbb324e2bd43 100644
--- a/Documentation/filesystems/vfs.txt
+++ b/Documentation/filesystems/vfs.txt
@@ -325,7 +325,8 @@ struct inode_operations {
         void * (*follow_link) (struct dentry *, struct nameidata *);
         void (*put_link) (struct dentry *, struct nameidata *, void *);
 	void (*truncate) (struct inode *);
-	int (*permission) (struct inode *, int, struct nameidata *);
+	int (*permission) (struct inode *, int, unsigned int);
+	int (*check_acl)(struct inode *, int, unsigned int);
 	int (*setattr) (struct dentry *, struct iattr *);
 	int (*getattr) (struct vfsmount *mnt, struct dentry *, struct kstat *);
 	int (*setxattr) (struct dentry *, const char *,const void *,size_t,int);
@@ -414,6 +415,13 @@ otherwise noted.
   permission: called by the VFS to check for access rights on a POSIX-like
   	filesystem.
 
+	May be called in rcu-walk mode (flags & IPERM_RCU). If in rcu-walk
+	mode, the filesystem must check the permission without blocking or
+	storing to the inode.
+
+	If a situation is encountered that rcu-walk cannot handle, return
+	-ECHILD and it will be called again in ref-walk mode.
+
   setattr: called by the VFS to set attributes for a file. This method
   	is called by chmod(2) and related system calls.
 
diff --git a/drivers/staging/smbfs/file.c b/drivers/staging/smbfs/file.c
index 5dcd19c60eb9..31372e7b12de 100644
--- a/drivers/staging/smbfs/file.c
+++ b/drivers/staging/smbfs/file.c
@@ -407,11 +407,14 @@ smb_file_release(struct inode *inode, struct file * file)
  * privileges, so we need our own check for this.
  */
 static int
-smb_file_permission(struct inode *inode, int mask)
+smb_file_permission(struct inode *inode, int mask, unsigned int flags)
 {
 	int mode = inode->i_mode;
 	int error = 0;
 
+	if (flags & IPERM_FLAG_RCU)
+		return -ECHILD;
+
 	VERBOSE("mode=%x, mask=%x\n", mode, mask);
 
 	/* Look at user permissions */
diff --git a/fs/9p/acl.c b/fs/9p/acl.c
index 12d602351dbe..6e58c4ca1e6e 100644
--- a/fs/9p/acl.c
+++ b/fs/9p/acl.c
@@ -91,11 +91,14 @@ static struct posix_acl *v9fs_get_cached_acl(struct inode *inode, int type)
 	return acl;
 }
 
-int v9fs_check_acl(struct inode *inode, int mask)
+int v9fs_check_acl(struct inode *inode, int mask, unsigned int flags)
 {
 	struct posix_acl *acl;
 	struct v9fs_session_info *v9ses;
 
+	if (flags & IPERM_FLAG_RCU)
+		return -ECHILD;
+
 	v9ses = v9fs_inode2v9ses(inode);
 	if ((v9ses->flags & V9FS_ACCESS_MASK) != V9FS_ACCESS_CLIENT) {
 		/*
diff --git a/fs/9p/acl.h b/fs/9p/acl.h
index 59e18c2e8c7e..7ef3ac9f6d95 100644
--- a/fs/9p/acl.h
+++ b/fs/9p/acl.h
@@ -16,7 +16,7 @@
 
 #ifdef CONFIG_9P_FS_POSIX_ACL
 extern int v9fs_get_acl(struct inode *, struct p9_fid *);
-extern int v9fs_check_acl(struct inode *inode, int mask);
+extern int v9fs_check_acl(struct inode *inode, int mask, unsigned int flags);
 extern int v9fs_acl_chmod(struct dentry *);
 extern int v9fs_set_create_acl(struct dentry *,
 			       struct posix_acl *, struct posix_acl *);
diff --git a/fs/afs/internal.h b/fs/afs/internal.h
index cca8eef736fc..6d4bc1c8ff60 100644
--- a/fs/afs/internal.h
+++ b/fs/afs/internal.h
@@ -624,7 +624,7 @@ extern void afs_clear_permits(struct afs_vnode *);
 extern void afs_cache_permit(struct afs_vnode *, struct key *, long);
 extern void afs_zap_permits(struct rcu_head *);
 extern struct key *afs_request_key(struct afs_cell *);
-extern int afs_permission(struct inode *, int);
+extern int afs_permission(struct inode *, int, unsigned int);
 
 /*
  * server.c
diff --git a/fs/afs/security.c b/fs/afs/security.c
index bb4ed144d0e4..f44b9d355377 100644
--- a/fs/afs/security.c
+++ b/fs/afs/security.c
@@ -285,13 +285,16 @@ static int afs_check_permit(struct afs_vnode *vnode, struct key *key,
  * - AFS ACLs are attached to directories only, and a file is controlled by its
  *   parent directory's ACL
  */
-int afs_permission(struct inode *inode, int mask)
+int afs_permission(struct inode *inode, int mask, unsigned int flags)
 {
 	struct afs_vnode *vnode = AFS_FS_I(inode);
 	afs_access_t uninitialized_var(access);
 	struct key *key;
 	int ret;
 
+	if (flags & IPERM_FLAG_RCU)
+		return -ECHILD;
+
 	_enter("{{%x:%u},%lx},%x,",
 	       vnode->fid.vid, vnode->fid.vnode, vnode->flags, mask);
 
@@ -347,7 +350,7 @@ int afs_permission(struct inode *inode, int mask)
 	}
 
 	key_put(key);
-	ret = generic_permission(inode, mask, NULL);
+	ret = generic_permission(inode, mask, flags, NULL);
 	_leave(" = %d", ret);
 	return ret;
 
diff --git a/fs/bad_inode.c b/fs/bad_inode.c
index f024d8aaddef..9ad2369d9e35 100644
--- a/fs/bad_inode.c
+++ b/fs/bad_inode.c
@@ -229,8 +229,11 @@ static int bad_inode_readlink(struct dentry *dentry, char __user *buffer,
 	return -EIO;
 }
 
-static int bad_inode_permission(struct inode *inode, int mask)
+static int bad_inode_permission(struct inode *inode, int mask, unsigned int flags)
 {
+	if (flags & IPERM_FLAG_RCU)
+		return -ECHILD;
+
 	return -EIO;
 }
 
diff --git a/fs/btrfs/acl.c b/fs/btrfs/acl.c
index 2222d161c7b6..cb518a4b917c 100644
--- a/fs/btrfs/acl.c
+++ b/fs/btrfs/acl.c
@@ -185,13 +185,15 @@ static int btrfs_xattr_acl_set(struct dentry *dentry, const char *name,
 	return ret;
 }
 
-int btrfs_check_acl(struct inode *inode, int mask)
+int btrfs_check_acl(struct inode *inode, int mask, unsigned int flags)
 {
 	struct posix_acl *acl;
 	int error = -EAGAIN;
 
-	acl = btrfs_get_acl(inode, ACL_TYPE_ACCESS);
+	if (flags & IPERM_FLAG_RCU)
+		return -ECHILD;
 
+	acl = btrfs_get_acl(inode, ACL_TYPE_ACCESS);
 	if (IS_ERR(acl))
 		return PTR_ERR(acl);
 	if (acl) {
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index af52f6d7a4d8..a142d204b526 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -2544,7 +2544,7 @@ int btrfs_sync_fs(struct super_block *sb, int wait);
 
 /* acl.c */
 #ifdef CONFIG_BTRFS_FS_POSIX_ACL
-int btrfs_check_acl(struct inode *inode, int mask);
+int btrfs_check_acl(struct inode *inode, int mask, unsigned int flags);
 #else
 #define btrfs_check_acl NULL
 #endif
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 63e4546b478a..5cf0db0081f9 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -7211,11 +7211,14 @@ static int btrfs_set_page_dirty(struct page *page)
 	return __set_page_dirty_nobuffers(page);
 }
 
-static int btrfs_permission(struct inode *inode, int mask)
+static int btrfs_permission(struct inode *inode, int mask, unsigned int flags)
 {
+	if (flags & IPERM_FLAG_RCU)
+		return -ECHILD;
+
 	if ((BTRFS_I(inode)->flags & BTRFS_INODE_READONLY) && (mask & MAY_WRITE))
 		return -EACCES;
-	return generic_permission(inode, mask, btrfs_check_acl);
+	return generic_permission(inode, mask, flags, btrfs_check_acl);
 }
 
 static const struct inode_operations btrfs_dir_inode_operations = {
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index 47f8c8baf3b5..e61de4f7b99d 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -1781,12 +1781,17 @@ int ceph_do_getattr(struct inode *inode, int mask)
  * Check inode permissions.  We verify we have a valid value for
  * the AUTH cap, then call the generic handler.
  */
-int ceph_permission(struct inode *inode, int mask)
+int ceph_permission(struct inode *inode, int mask, unsigned int flags)
 {
-	int err = ceph_do_getattr(inode, CEPH_CAP_AUTH_SHARED);
+	int err;
+
+	if (flags & IPERM_FLAG_RCU)
+		return -ECHILD;
+
+	err = ceph_do_getattr(inode, CEPH_CAP_AUTH_SHARED);
 
 	if (!err)
-		err = generic_permission(inode, mask, NULL);
+		err = generic_permission(inode, mask, flags, NULL);
 	return err;
 }
 
diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index 7f01728a4657..4553d8829edb 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -665,7 +665,7 @@ extern void ceph_queue_invalidate(struct inode *inode);
 extern void ceph_queue_writeback(struct inode *inode);
 
 extern int ceph_do_getattr(struct inode *inode, int mask);
-extern int ceph_permission(struct inode *inode, int mask);
+extern int ceph_permission(struct inode *inode, int mask, unsigned int flags);
 extern int ceph_setattr(struct dentry *dentry, struct iattr *attr);
 extern int ceph_getattr(struct vfsmount *mnt, struct dentry *dentry,
 			struct kstat *stat);
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index 223717dcc401..8e21e0fe65d5 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -283,10 +283,13 @@ cifs_statfs(struct dentry *dentry, struct kstatfs *buf)
 	return 0;
 }
 
-static int cifs_permission(struct inode *inode, int mask)
+static int cifs_permission(struct inode *inode, int mask, unsigned int flags)
 {
 	struct cifs_sb_info *cifs_sb;
 
+	if (flags & IPERM_FLAG_RCU)
+		return -ECHILD;
+
 	cifs_sb = CIFS_SB(inode->i_sb);
 
 	if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_PERM) {
@@ -298,7 +301,7 @@ static int cifs_permission(struct inode *inode, int mask)
 		on the client (above and beyond ACL on servers) for
 		servers which do not support setting and viewing mode bits,
 		so allowing client to check permissions is useful */
-		return generic_permission(inode, mask, NULL);
+		return generic_permission(inode, mask, flags, NULL);
 }
 
 static struct kmem_cache *cifs_inode_cachep;
diff --git a/fs/coda/dir.c b/fs/coda/dir.c
index 619a8303766e..29badd91360f 100644
--- a/fs/coda/dir.c
+++ b/fs/coda/dir.c
@@ -135,10 +135,13 @@ exit:
 }
 
 
-int coda_permission(struct inode *inode, int mask)
+int coda_permission(struct inode *inode, int mask, unsigned int flags)
 {
 	int error;
 
+	if (flags & IPERM_FLAG_RCU)
+		return -ECHILD;
+
 	mask &= MAY_READ | MAY_WRITE | MAY_EXEC;
  
 	if (!mask)
diff --git a/fs/coda/pioctl.c b/fs/coda/pioctl.c
index 2fd89b5c5c7b..741f0bd03918 100644
--- a/fs/coda/pioctl.c
+++ b/fs/coda/pioctl.c
@@ -24,7 +24,7 @@
 #include <linux/coda_psdev.h>
 
 /* pioctl ops */
-static int coda_ioctl_permission(struct inode *inode, int mask);
+static int coda_ioctl_permission(struct inode *inode, int mask, unsigned int flags);
 static long coda_pioctl(struct file *filp, unsigned int cmd,
 			unsigned long user_data);
 
@@ -41,8 +41,10 @@ const struct file_operations coda_ioctl_operations = {
 };
 
 /* the coda pioctl inode ops */
-static int coda_ioctl_permission(struct inode *inode, int mask)
+static int coda_ioctl_permission(struct inode *inode, int mask, unsigned int flags)
 {
+	if (flags & IPERM_FLAG_RCU)
+		return -ECHILD;
 	return (mask & MAY_EXEC) ? -EACCES : 0;
 }
 
diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c
index f91b35db4c6e..337352a94751 100644
--- a/fs/ecryptfs/inode.c
+++ b/fs/ecryptfs/inode.c
@@ -980,8 +980,10 @@ int ecryptfs_truncate(struct dentry *dentry, loff_t new_length)
 }
 
 static int
-ecryptfs_permission(struct inode *inode, int mask)
+ecryptfs_permission(struct inode *inode, int mask, unsigned int flags)
 {
+	if (flags & IPERM_FLAG_RCU)
+		return -ECHILD;
 	return inode_permission(ecryptfs_inode_to_lower(inode), mask);
 }
 
diff --git a/fs/ext2/acl.c b/fs/ext2/acl.c
index 2bcc0431bada..dd9bb3f0c8d7 100644
--- a/fs/ext2/acl.c
+++ b/fs/ext2/acl.c
@@ -232,10 +232,14 @@ ext2_set_acl(struct inode *inode, int type, struct posix_acl *acl)
 }
 
 int
-ext2_check_acl(struct inode *inode, int mask)
+ext2_check_acl(struct inode *inode, int mask, unsigned int flags)
 {
-	struct posix_acl *acl = ext2_get_acl(inode, ACL_TYPE_ACCESS);
+	struct posix_acl *acl;
+
+	if (flags & IPERM_FLAG_RCU)
+		return -ECHILD;
 
+	acl = ext2_get_acl(inode, ACL_TYPE_ACCESS);
 	if (IS_ERR(acl))
 		return PTR_ERR(acl);
 	if (acl) {
diff --git a/fs/ext2/acl.h b/fs/ext2/acl.h
index 3ff6cbb9ac44..c939b7b12099 100644
--- a/fs/ext2/acl.h
+++ b/fs/ext2/acl.h
@@ -54,7 +54,7 @@ static inline int ext2_acl_count(size_t size)
 #ifdef CONFIG_EXT2_FS_POSIX_ACL
 
 /* acl.c */
-extern int ext2_check_acl (struct inode *, int);
+extern int ext2_check_acl (struct inode *, int, unsigned int);
 extern int ext2_acl_chmod (struct inode *);
 extern int ext2_init_acl (struct inode *, struct inode *);
 
diff --git a/fs/ext3/acl.c b/fs/ext3/acl.c
index 8a11fe212183..9e49da8302d3 100644
--- a/fs/ext3/acl.c
+++ b/fs/ext3/acl.c
@@ -240,10 +240,14 @@ ext3_set_acl(handle_t *handle, struct inode *inode, int type,
 }
 
 int
-ext3_check_acl(struct inode *inode, int mask)
+ext3_check_acl(struct inode *inode, int mask, unsigned int flags)
 {
-	struct posix_acl *acl = ext3_get_acl(inode, ACL_TYPE_ACCESS);
+	struct posix_acl *acl;
+
+	if (flags & IPERM_FLAG_RCU)
+		return -ECHILD;
 
+	acl = ext3_get_acl(inode, ACL_TYPE_ACCESS);
 	if (IS_ERR(acl))
 		return PTR_ERR(acl);
 	if (acl) {
diff --git a/fs/ext3/acl.h b/fs/ext3/acl.h
index 597334626de9..5faf8048e906 100644
--- a/fs/ext3/acl.h
+++ b/fs/ext3/acl.h
@@ -54,7 +54,7 @@ static inline int ext3_acl_count(size_t size)
 #ifdef CONFIG_EXT3_FS_POSIX_ACL
 
 /* acl.c */
-extern int ext3_check_acl (struct inode *, int);
+extern int ext3_check_acl (struct inode *, int, unsigned int);
 extern int ext3_acl_chmod (struct inode *);
 extern int ext3_init_acl (handle_t *, struct inode *, struct inode *);
 
diff --git a/fs/ext4/acl.c b/fs/ext4/acl.c
index 5e2ed4504ead..373dcaeedba9 100644
--- a/fs/ext4/acl.c
+++ b/fs/ext4/acl.c
@@ -238,10 +238,14 @@ ext4_set_acl(handle_t *handle, struct inode *inode, int type,
 }
 
 int
-ext4_check_acl(struct inode *inode, int mask)
+ext4_check_acl(struct inode *inode, int mask, unsigned int flags)
 {
-	struct posix_acl *acl = ext4_get_acl(inode, ACL_TYPE_ACCESS);
+	struct posix_acl *acl;
+
+	if (flags & IPERM_FLAG_RCU)
+		return -ECHILD;
 
+	acl = ext4_get_acl(inode, ACL_TYPE_ACCESS);
 	if (IS_ERR(acl))
 		return PTR_ERR(acl);
 	if (acl) {
diff --git a/fs/ext4/acl.h b/fs/ext4/acl.h
index 9d843d5deac4..dec821168fd4 100644
--- a/fs/ext4/acl.h
+++ b/fs/ext4/acl.h
@@ -54,7 +54,7 @@ static inline int ext4_acl_count(size_t size)
 #ifdef CONFIG_EXT4_FS_POSIX_ACL
 
 /* acl.c */
-extern int ext4_check_acl(struct inode *, int);
+extern int ext4_check_acl(struct inode *, int, unsigned int);
 extern int ext4_acl_chmod(struct inode *);
 extern int ext4_init_acl(handle_t *, struct inode *, struct inode *);
 
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index 07f4b5e675fc..f738599fd8cd 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -985,12 +985,15 @@ static int fuse_access(struct inode *inode, int mask)
  * access request is sent.  Execute permission is still checked
  * locally based on file mode.
  */
-static int fuse_permission(struct inode *inode, int mask)
+static int fuse_permission(struct inode *inode, int mask, unsigned int flags)
 {
 	struct fuse_conn *fc = get_fuse_conn(inode);
 	bool refreshed = false;
 	int err = 0;
 
+	if (flags & IPERM_FLAG_RCU)
+		return -ECHILD;
+
 	if (!fuse_allow_task(fc, current))
 		return -EACCES;
 
@@ -1005,7 +1008,7 @@ static int fuse_permission(struct inode *inode, int mask)
 	}
 
 	if (fc->flags & FUSE_DEFAULT_PERMISSIONS) {
-		err = generic_permission(inode, mask, NULL);
+		err = generic_permission(inode, mask, flags, NULL);
 
 		/* If permission is denied, try to refresh file
 		   attributes.  This is also needed, because the root
@@ -1013,7 +1016,8 @@ static int fuse_permission(struct inode *inode, int mask)
 		if (err == -EACCES && !refreshed) {
 			err = fuse_do_getattr(inode, NULL, NULL);
 			if (!err)
-				err = generic_permission(inode, mask, NULL);
+				err = generic_permission(inode, mask,
+							flags, NULL);
 		}
 
 		/* Note: the opposite of the above test does not
diff --git a/fs/generic_acl.c b/fs/generic_acl.c
index 6bc9e3a5a693..628004282130 100644
--- a/fs/generic_acl.c
+++ b/fs/generic_acl.c
@@ -190,10 +190,14 @@ generic_acl_chmod(struct inode *inode)
 }
 
 int
-generic_check_acl(struct inode *inode, int mask)
+generic_check_acl(struct inode *inode, int mask, unsigned int flags)
 {
-	struct posix_acl *acl = get_cached_acl(inode, ACL_TYPE_ACCESS);
+	struct posix_acl *acl;
+
+	if (flags & IPERM_FLAG_RCU)
+		return -ECHILD;
 
+	acl = get_cached_acl(inode, ACL_TYPE_ACCESS);
 	if (acl) {
 		int error = posix_acl_permission(inode, acl, mask);
 		posix_acl_release(acl);
diff --git a/fs/gfs2/acl.c b/fs/gfs2/acl.c
index 48171f4c943d..7118f1a780a9 100644
--- a/fs/gfs2/acl.c
+++ b/fs/gfs2/acl.c
@@ -75,11 +75,14 @@ static struct posix_acl *gfs2_acl_get(struct gfs2_inode *ip, int type)
  * Returns: errno
  */
 
-int gfs2_check_acl(struct inode *inode, int mask)
+int gfs2_check_acl(struct inode *inode, int mask, unsigned int flags)
 {
 	struct posix_acl *acl;
 	int error;
 
+	if (flags & IPERM_FLAG_RCU)
+		return -ECHILD;
+
 	acl = gfs2_acl_get(GFS2_I(inode), ACL_TYPE_ACCESS);
 	if (IS_ERR(acl))
 		return PTR_ERR(acl);
diff --git a/fs/gfs2/acl.h b/fs/gfs2/acl.h
index b522b0cb39ea..a93907c8159b 100644
--- a/fs/gfs2/acl.h
+++ b/fs/gfs2/acl.h
@@ -16,7 +16,7 @@
 #define GFS2_POSIX_ACL_DEFAULT		"posix_acl_default"
 #define GFS2_ACL_MAX_ENTRIES		25
 
-extern int gfs2_check_acl(struct inode *inode, int mask);
+extern int gfs2_check_acl(struct inode *inode, int mask, unsigned int);
 extern int gfs2_acl_create(struct gfs2_inode *dip, struct inode *inode);
 extern int gfs2_acl_chmod(struct gfs2_inode *ip, struct iattr *attr);
 extern const struct xattr_handler gfs2_xattr_system_handler;
diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c
index aa996471ec5c..fca6689e12e6 100644
--- a/fs/gfs2/file.c
+++ b/fs/gfs2/file.c
@@ -241,7 +241,7 @@ static int do_gfs2_set_flags(struct file *filp, u32 reqflags, u32 mask)
 	    !capable(CAP_LINUX_IMMUTABLE))
 		goto out;
 	if (!IS_IMMUTABLE(inode)) {
-		error = gfs2_permission(inode, MAY_WRITE);
+		error = gfs2_permission(inode, MAY_WRITE, 0);
 		if (error)
 			goto out;
 	}
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index e1213f7f9217..6163be9686be 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -509,7 +509,7 @@ struct inode *gfs2_lookupi(struct inode *dir, const struct qstr *name,
 	}
 
 	if (!is_root) {
-		error = gfs2_permission(dir, MAY_EXEC);
+		error = gfs2_permission(dir, MAY_EXEC, 0);
 		if (error)
 			goto out;
 	}
@@ -539,7 +539,7 @@ static int create_ok(struct gfs2_inode *dip, const struct qstr *name,
 {
 	int error;
 
-	error = gfs2_permission(&dip->i_inode, MAY_WRITE | MAY_EXEC);
+	error = gfs2_permission(&dip->i_inode, MAY_WRITE | MAY_EXEC, 0);
 	if (error)
 		return error;
 
diff --git a/fs/gfs2/inode.h b/fs/gfs2/inode.h
index d8499fadcc53..732a183efdb3 100644
--- a/fs/gfs2/inode.h
+++ b/fs/gfs2/inode.h
@@ -113,7 +113,7 @@ extern struct inode *gfs2_lookupi(struct inode *dir, const struct qstr *name,
 extern struct inode *gfs2_createi(struct gfs2_holder *ghs,
 				  const struct qstr *name,
 				  unsigned int mode, dev_t dev);
-extern int gfs2_permission(struct inode *inode, int mask);
+extern int gfs2_permission(struct inode *inode, int mask, unsigned int flags);
 extern int gfs2_setattr_simple(struct gfs2_inode *ip, struct iattr *attr);
 extern struct inode *gfs2_lookup_simple(struct inode *dip, const char *name);
 extern void gfs2_dinode_out(const struct gfs2_inode *ip, void *buf);
diff --git a/fs/gfs2/ops_inode.c b/fs/gfs2/ops_inode.c
index f28f89796f4d..5c63a06fcd7c 100644
--- a/fs/gfs2/ops_inode.c
+++ b/fs/gfs2/ops_inode.c
@@ -166,7 +166,7 @@ static int gfs2_link(struct dentry *old_dentry, struct inode *dir,
 	if (error)
 		goto out_child;
 
-	error = gfs2_permission(dir, MAY_WRITE | MAY_EXEC);
+	error = gfs2_permission(dir, MAY_WRITE | MAY_EXEC, 0);
 	if (error)
 		goto out_gunlock;
 
@@ -289,7 +289,7 @@ static int gfs2_unlink_ok(struct gfs2_inode *dip, const struct qstr *name,
 	if (IS_APPEND(&dip->i_inode))
 		return -EPERM;
 
-	error = gfs2_permission(&dip->i_inode, MAY_WRITE | MAY_EXEC);
+	error = gfs2_permission(&dip->i_inode, MAY_WRITE | MAY_EXEC, 0);
 	if (error)
 		return error;
 
@@ -822,7 +822,7 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry,
 			}
 		}
 	} else {
-		error = gfs2_permission(ndir, MAY_WRITE | MAY_EXEC);
+		error = gfs2_permission(ndir, MAY_WRITE | MAY_EXEC, 0);
 		if (error)
 			goto out_gunlock;
 
@@ -857,7 +857,7 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry,
 	/* Check out the dir to be renamed */
 
 	if (dir_rename) {
-		error = gfs2_permission(odentry->d_inode, MAY_WRITE);
+		error = gfs2_permission(odentry->d_inode, MAY_WRITE, 0);
 		if (error)
 			goto out_gunlock;
 	}
@@ -1041,13 +1041,17 @@ static void gfs2_put_link(struct dentry *dentry, struct nameidata *nd, void *p)
  * Returns: errno
  */
 
-int gfs2_permission(struct inode *inode, int mask)
+int gfs2_permission(struct inode *inode, int mask, unsigned int flags)
 {
-	struct gfs2_inode *ip = GFS2_I(inode);
+	struct gfs2_inode *ip;
 	struct gfs2_holder i_gh;
 	int error;
 	int unlock = 0;
 
+	if (flags & IPERM_FLAG_RCU)
+		return -ECHILD;
+
+	ip = GFS2_I(inode);
 	if (gfs2_glock_is_locked_by_me(ip->i_gl) == NULL) {
 		error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &i_gh);
 		if (error)
@@ -1058,7 +1062,7 @@ int gfs2_permission(struct inode *inode, int mask)
 	if ((mask & MAY_WRITE) && IS_IMMUTABLE(inode))
 		error = -EACCES;
 	else
-		error = generic_permission(inode, mask, gfs2_check_acl);
+		error = generic_permission(inode, mask, flags, gfs2_check_acl);
 	if (unlock)
 		gfs2_glock_dq_uninit(&i_gh);
 
diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c
index 0bc81cf256b8..d3244d949a4e 100644
--- a/fs/hostfs/hostfs_kern.c
+++ b/fs/hostfs/hostfs_kern.c
@@ -749,11 +749,14 @@ int hostfs_rename(struct inode *from_ino, struct dentry *from,
 	return err;
 }
 
-int hostfs_permission(struct inode *ino, int desired)
+int hostfs_permission(struct inode *ino, int desired, unsigned int flags)
 {
 	char *name;
 	int r = 0, w = 0, x = 0, err;
 
+	if (flags & IPERM_FLAG_RCU)
+		return -ECHILD;
+
 	if (desired & MAY_READ) r = 1;
 	if (desired & MAY_WRITE) w = 1;
 	if (desired & MAY_EXEC) x = 1;
@@ -768,7 +771,7 @@ int hostfs_permission(struct inode *ino, int desired)
 		err = access_file(name, r, w, x);
 	__putname(name);
 	if (!err)
-		err = generic_permission(ino, desired, NULL);
+		err = generic_permission(ino, desired, flags, NULL);
 	return err;
 }
 
diff --git a/fs/hpfs/namei.c b/fs/hpfs/namei.c
index 11c2b4080f65..f4ad9e31ddc4 100644
--- a/fs/hpfs/namei.c
+++ b/fs/hpfs/namei.c
@@ -419,7 +419,7 @@ again:
 			unlock_kernel();
 			return -ENOSPC;
 		}
-		if (generic_permission(inode, MAY_WRITE, NULL) ||
+		if (generic_permission(inode, MAY_WRITE, 0, NULL) ||
 		    !S_ISREG(inode->i_mode) ||
 		    get_write_access(inode)) {
 			d_rehash(dentry);
diff --git a/fs/jffs2/acl.c b/fs/jffs2/acl.c
index 54a92fd02bbd..95b79672150a 100644
--- a/fs/jffs2/acl.c
+++ b/fs/jffs2/acl.c
@@ -259,11 +259,14 @@ static int jffs2_set_acl(struct inode *inode, int type, struct posix_acl *acl)
 	return rc;
 }
 
-int jffs2_check_acl(struct inode *inode, int mask)
+int jffs2_check_acl(struct inode *inode, int mask, unsigned int flags)
 {
 	struct posix_acl *acl;
 	int rc;
 
+	if (flags & IPERM_FLAG_RCU)
+		return -ECHILD;
+
 	acl = jffs2_get_acl(inode, ACL_TYPE_ACCESS);
 	if (IS_ERR(acl))
 		return PTR_ERR(acl);
diff --git a/fs/jffs2/acl.h b/fs/jffs2/acl.h
index 5e42de8d9541..3119f59253d3 100644
--- a/fs/jffs2/acl.h
+++ b/fs/jffs2/acl.h
@@ -26,7 +26,7 @@ struct jffs2_acl_header {
 
 #ifdef CONFIG_JFFS2_FS_POSIX_ACL
 
-extern int jffs2_check_acl(struct inode *, int);
+extern int jffs2_check_acl(struct inode *, int, unsigned int);
 extern int jffs2_acl_chmod(struct inode *);
 extern int jffs2_init_acl_pre(struct inode *, struct inode *, int *);
 extern int jffs2_init_acl_post(struct inode *);
diff --git a/fs/jfs/acl.c b/fs/jfs/acl.c
index 1057a4998e4e..e5de9422fa32 100644
--- a/fs/jfs/acl.c
+++ b/fs/jfs/acl.c
@@ -114,10 +114,14 @@ out:
 	return rc;
 }
 
-int jfs_check_acl(struct inode *inode, int mask)
+int jfs_check_acl(struct inode *inode, int mask, unsigned int flags)
 {
-	struct posix_acl *acl = jfs_get_acl(inode, ACL_TYPE_ACCESS);
+	struct posix_acl *acl;
+
+	if (flags & IPERM_FLAG_RCU)
+		return -ECHILD;
 
+	acl = jfs_get_acl(inode, ACL_TYPE_ACCESS);
 	if (IS_ERR(acl))
 		return PTR_ERR(acl);
 	if (acl) {
diff --git a/fs/jfs/jfs_acl.h b/fs/jfs/jfs_acl.h
index 54e07559878d..f9285c4900fa 100644
--- a/fs/jfs/jfs_acl.h
+++ b/fs/jfs/jfs_acl.h
@@ -20,7 +20,7 @@
 
 #ifdef CONFIG_JFS_POSIX_ACL
 
-int jfs_check_acl(struct inode *, int);
+int jfs_check_acl(struct inode *, int, unsigned int flags);
 int jfs_init_acl(tid_t, struct inode *, struct inode *);
 int jfs_acl_chmod(struct inode *inode);
 
diff --git a/fs/logfs/dir.c b/fs/logfs/dir.c
index 409dfd65e9a1..f9ddf0c388c8 100644
--- a/fs/logfs/dir.c
+++ b/fs/logfs/dir.c
@@ -555,9 +555,11 @@ static int logfs_symlink(struct inode *dir, struct dentry *dentry,
 	return __logfs_create(dir, dentry, inode, target, destlen);
 }
 
-static int logfs_permission(struct inode *inode, int mask)
+static int logfs_permission(struct inode *inode, int mask, unsigned int flags)
 {
-	return generic_permission(inode, mask, NULL);
+	if (flags & IPERM_FLAG_RCU)
+		return -ECHILD;
+	return generic_permission(inode, mask, flags, NULL);
 }
 
 static int logfs_link(struct dentry *old_dentry, struct inode *dir,
diff --git a/fs/namei.c b/fs/namei.c
index 6e275363e89d..4e957bf744ae 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -169,8 +169,8 @@ EXPORT_SYMBOL(putname);
 /*
  * This does basic POSIX ACL permission checking
  */
-static inline int __acl_permission_check(struct inode *inode, int mask,
-		int (*check_acl)(struct inode *inode, int mask), int rcu)
+static int acl_permission_check(struct inode *inode, int mask, unsigned int flags,
+		int (*check_acl)(struct inode *inode, int mask, unsigned int flags))
 {
 	umode_t			mode = inode->i_mode;
 
@@ -180,13 +180,9 @@ static inline int __acl_permission_check(struct inode *inode, int mask,
 		mode >>= 6;
 	else {
 		if (IS_POSIXACL(inode) && (mode & S_IRWXG) && check_acl) {
-			if (rcu) {
-				return -ECHILD;
-			} else {
-				int error = check_acl(inode, mask);
-				if (error != -EAGAIN)
-					return error;
-			}
+			int error = check_acl(inode, mask, flags);
+			if (error != -EAGAIN)
+				return error;
 		}
 
 		if (in_group_p(inode->i_gid))
@@ -201,32 +197,31 @@ static inline int __acl_permission_check(struct inode *inode, int mask,
 	return -EACCES;
 }
 
-static inline int acl_permission_check(struct inode *inode, int mask,
-		int (*check_acl)(struct inode *inode, int mask))
-{
-	return __acl_permission_check(inode, mask, check_acl, 0);
-}
-
 /**
- * generic_permission  -  check for access rights on a Posix-like filesystem
+ * generic_permission -  check for access rights on a Posix-like filesystem
  * @inode:	inode to check access rights for
  * @mask:	right to check for (%MAY_READ, %MAY_WRITE, %MAY_EXEC)
  * @check_acl:	optional callback to check for Posix ACLs
+ * @flags	IPERM_FLAG_ flags.
  *
  * Used to check for read/write/execute permissions on a file.
  * We use "fsuid" for this, letting us set arbitrary permissions
  * for filesystem access without changing the "normal" uids which
- * are used for other things..
+ * are used for other things.
+ *
+ * generic_permission is rcu-walk aware. It returns -ECHILD in case an rcu-walk
+ * request cannot be satisfied (eg. requires blocking or too much complexity).
+ * It would then be called again in ref-walk mode.
  */
-int generic_permission(struct inode *inode, int mask,
-		int (*check_acl)(struct inode *inode, int mask))
+int generic_permission(struct inode *inode, int mask, unsigned int flags,
+	int (*check_acl)(struct inode *inode, int mask, unsigned int flags))
 {
 	int ret;
 
 	/*
 	 * Do the basic POSIX ACL permission checks.
 	 */
-	ret = acl_permission_check(inode, mask, check_acl);
+	ret = acl_permission_check(inode, mask, flags, check_acl);
 	if (ret != -EACCES)
 		return ret;
 
@@ -281,9 +276,10 @@ int inode_permission(struct inode *inode, int mask)
 	}
 
 	if (inode->i_op->permission)
-		retval = inode->i_op->permission(inode, mask);
+		retval = inode->i_op->permission(inode, mask, 0);
 	else
-		retval = generic_permission(inode, mask, inode->i_op->check_acl);
+		retval = generic_permission(inode, mask, 0,
+				inode->i_op->check_acl);
 
 	if (retval)
 		return retval;
@@ -668,22 +664,19 @@ force_reval_path(struct path *path, struct nameidata *nd)
  * short-cut DAC fails, then call ->permission() to do more
  * complete permission check.
  */
-static inline int __exec_permission(struct inode *inode, int rcu)
+static inline int exec_permission(struct inode *inode, unsigned int flags)
 {
 	int ret;
 
 	if (inode->i_op->permission) {
-		if (rcu)
-			return -ECHILD;
-		ret = inode->i_op->permission(inode, MAY_EXEC);
-		if (!ret)
-			goto ok;
-		return ret;
+		ret = inode->i_op->permission(inode, MAY_EXEC, flags);
+	} else {
+		ret = acl_permission_check(inode, MAY_EXEC, flags,
+				inode->i_op->check_acl);
 	}
-	ret = __acl_permission_check(inode, MAY_EXEC, inode->i_op->check_acl, rcu);
-	if (!ret)
+	if (likely(!ret))
 		goto ok;
-	if (rcu && ret == -ECHILD)
+	if (ret == -ECHILD)
 		return ret;
 
 	if (capable(CAP_DAC_OVERRIDE) || capable(CAP_DAC_READ_SEARCH))
@@ -691,17 +684,7 @@ static inline int __exec_permission(struct inode *inode, int rcu)
 
 	return ret;
 ok:
-	return security_inode_exec_permission(inode, rcu);
-}
-
-static int exec_permission(struct inode *inode)
-{
-	return __exec_permission(inode, 0);
-}
-
-static int exec_permission_rcu(struct inode *inode)
-{
-	return __exec_permission(inode, 1);
+	return security_inode_exec_permission(inode, flags);
 }
 
 static __always_inline void set_root(struct nameidata *nd)
@@ -1165,7 +1148,7 @@ static int link_path_walk(const char *name, struct nameidata *nd)
 
 		nd->flags |= LOOKUP_CONTINUE;
 		if (nd->flags & LOOKUP_RCU) {
-			err = exec_permission_rcu(nd->inode);
+			err = exec_permission(nd->inode, IPERM_FLAG_RCU);
 			if (err == -ECHILD) {
 				if (nameidata_drop_rcu(nd))
 					return -ECHILD;
@@ -1173,7 +1156,7 @@ static int link_path_walk(const char *name, struct nameidata *nd)
 			}
 		} else {
 exec_again:
-			err = exec_permission(nd->inode);
+			err = exec_permission(nd->inode, 0);
 		}
  		if (err)
 			break;
@@ -1620,7 +1603,7 @@ static struct dentry *__lookup_hash(struct qstr *name,
 	struct dentry *dentry;
 	int err;
 
-	err = exec_permission(inode);
+	err = exec_permission(inode, 0);
 	if (err)
 		return ERR_PTR(err);
 
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 58beace14b19..d33da530097a 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -2189,11 +2189,14 @@ int nfs_may_open(struct inode *inode, struct rpc_cred *cred, int openflags)
 	return nfs_do_access(inode, cred, nfs_open_permission_mask(openflags));
 }
 
-int nfs_permission(struct inode *inode, int mask)
+int nfs_permission(struct inode *inode, int mask, unsigned int flags)
 {
 	struct rpc_cred *cred;
 	int res = 0;
 
+	if (flags & IPERM_FLAG_RCU)
+		return -ECHILD;
+
 	nfs_inc_stats(inode, NFSIOS_VFSACCESS);
 
 	if ((mask & (MAY_READ | MAY_WRITE | MAY_EXEC)) == 0)
@@ -2241,7 +2244,7 @@ out:
 out_notsup:
 	res = nfs_revalidate_inode(NFS_SERVER(inode), inode);
 	if (res == 0)
-		res = generic_permission(inode, mask, NULL);
+		res = generic_permission(inode, mask, flags, NULL);
 	goto out;
 }
 
diff --git a/fs/nilfs2/inode.c b/fs/nilfs2/inode.c
index 71d4bc8464e0..77b48c8fab17 100644
--- a/fs/nilfs2/inode.c
+++ b/fs/nilfs2/inode.c
@@ -785,15 +785,19 @@ out_err:
 	return err;
 }
 
-int nilfs_permission(struct inode *inode, int mask)
+int nilfs_permission(struct inode *inode, int mask, unsigned int flags)
 {
-	struct nilfs_root *root = NILFS_I(inode)->i_root;
+	struct nilfs_root *root;
+
+	if (flags & IPERM_FLAG_RCU)
+		return -ECHILD;
 
+	root = NILFS_I(inode)->i_root;
 	if ((mask & MAY_WRITE) && root &&
 	    root->cno != NILFS_CPTREE_CURRENT_CNO)
 		return -EROFS; /* snapshot is not writable */
 
-	return generic_permission(inode, mask, NULL);
+	return generic_permission(inode, mask, flags, NULL);
 }
 
 int nilfs_load_inode_block(struct nilfs_sb_info *sbi, struct inode *inode,
diff --git a/fs/nilfs2/nilfs.h b/fs/nilfs2/nilfs.h
index f7560da5a567..0ca98823db59 100644
--- a/fs/nilfs2/nilfs.h
+++ b/fs/nilfs2/nilfs.h
@@ -256,7 +256,7 @@ extern void nilfs_update_inode(struct inode *, struct buffer_head *);
 extern void nilfs_truncate(struct inode *);
 extern void nilfs_evict_inode(struct inode *);
 extern int nilfs_setattr(struct dentry *, struct iattr *);
-int nilfs_permission(struct inode *inode, int mask);
+int nilfs_permission(struct inode *inode, int mask, unsigned int flags);
 extern int nilfs_load_inode_block(struct nilfs_sb_info *, struct inode *,
 				  struct buffer_head **);
 extern int nilfs_inode_dirty(struct inode *);
diff --git a/fs/ocfs2/acl.c b/fs/ocfs2/acl.c
index 391915093fe1..704f6b1742f3 100644
--- a/fs/ocfs2/acl.c
+++ b/fs/ocfs2/acl.c
@@ -291,13 +291,17 @@ static int ocfs2_set_acl(handle_t *handle,
 	return ret;
 }
 
-int ocfs2_check_acl(struct inode *inode, int mask)
+int ocfs2_check_acl(struct inode *inode, int mask, unsigned int flags)
 {
-	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
+	struct ocfs2_super *osb;
 	struct buffer_head *di_bh = NULL;
 	struct posix_acl *acl;
 	int ret = -EAGAIN;
 
+	if (flags & IPERM_FLAG_RCU)
+		return -ECHILD;
+
+	osb = OCFS2_SB(inode->i_sb);
 	if (!(osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL))
 		return ret;
 
diff --git a/fs/ocfs2/acl.h b/fs/ocfs2/acl.h
index 5c5d31f05853..4fe7c9cf4bfb 100644
--- a/fs/ocfs2/acl.h
+++ b/fs/ocfs2/acl.h
@@ -26,7 +26,7 @@ struct ocfs2_acl_entry {
 	__le32 e_id;
 };
 
-extern int ocfs2_check_acl(struct inode *, int);
+extern int ocfs2_check_acl(struct inode *, int, unsigned int);
 extern int ocfs2_acl_chmod(struct inode *);
 extern int ocfs2_init_acl(handle_t *, struct inode *, struct inode *,
 			  struct buffer_head *, struct buffer_head *,
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index f6cba566429d..bdadbae09094 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -1307,10 +1307,13 @@ bail:
 	return err;
 }
 
-int ocfs2_permission(struct inode *inode, int mask)
+int ocfs2_permission(struct inode *inode, int mask, unsigned int flags)
 {
 	int ret;
 
+	if (flags & IPERM_FLAG_RCU)
+		return -ECHILD;
+
 	mlog_entry_void();
 
 	ret = ocfs2_inode_lock(inode, NULL, 0);
@@ -1320,7 +1323,7 @@ int ocfs2_permission(struct inode *inode, int mask)
 		goto out;
 	}
 
-	ret = generic_permission(inode, mask, ocfs2_check_acl);
+	ret = generic_permission(inode, mask, flags, ocfs2_check_acl);
 
 	ocfs2_inode_unlock(inode, 0);
 out:
diff --git a/fs/ocfs2/file.h b/fs/ocfs2/file.h
index 97bf761c9e7c..f5afbbef6703 100644
--- a/fs/ocfs2/file.h
+++ b/fs/ocfs2/file.h
@@ -61,7 +61,7 @@ int ocfs2_zero_extend(struct inode *inode, struct buffer_head *di_bh,
 int ocfs2_setattr(struct dentry *dentry, struct iattr *attr);
 int ocfs2_getattr(struct vfsmount *mnt, struct dentry *dentry,
 		  struct kstat *stat);
-int ocfs2_permission(struct inode *inode, int mask);
+int ocfs2_permission(struct inode *inode, int mask, unsigned int flags);
 
 int ocfs2_should_update_atime(struct inode *inode,
 			      struct vfsmount *vfsmnt);
diff --git a/fs/proc/base.c b/fs/proc/base.c
index dc5b2fcadc3b..b953d41d9abf 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -2114,11 +2114,13 @@ static const struct file_operations proc_fd_operations = {
  * /proc/pid/fd needs a special permission handler so that a process can still
  * access /proc/self/fd after it has executed a setuid().
  */
-static int proc_fd_permission(struct inode *inode, int mask)
+static int proc_fd_permission(struct inode *inode, int mask, unsigned int flags)
 {
 	int rv;
 
-	rv = generic_permission(inode, mask, NULL);
+	if (flags & IPERM_FLAG_RCU)
+		return -ECHILD;
+	rv = generic_permission(inode, mask, flags, NULL);
 	if (rv == 0)
 		return 0;
 	if (task_pid(current) == proc_pid(inode))
diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c
index c9097f43b425..09a1f92a34ef 100644
--- a/fs/proc/proc_sysctl.c
+++ b/fs/proc/proc_sysctl.c
@@ -295,7 +295,7 @@ out:
 	return ret;
 }
 
-static int proc_sys_permission(struct inode *inode, int mask)
+static int proc_sys_permission(struct inode *inode, int mask,unsigned int flags)
 {
 	/*
 	 * sysctl entries that are not writeable,
@@ -305,6 +305,9 @@ static int proc_sys_permission(struct inode *inode, int mask)
 	struct ctl_table *table;
 	int error;
 
+	if (flags & IPERM_FLAG_RCU)
+		return -ECHILD;
+
 	/* Executable files are not allowed under /proc/sys/ */
 	if ((mask & MAY_EXEC) && S_ISREG(inode->i_mode))
 		return -EACCES;
diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c
index 9ea22a56cdf1..3cfb2e933644 100644
--- a/fs/reiserfs/xattr.c
+++ b/fs/reiserfs/xattr.c
@@ -870,11 +870,14 @@ out:
 	return err;
 }
 
-static int reiserfs_check_acl(struct inode *inode, int mask)
+static int reiserfs_check_acl(struct inode *inode, int mask, unsigned int flags)
 {
 	struct posix_acl *acl;
 	int error = -EAGAIN; /* do regular unix permission checks by default */
 
+	if (flags & IPERM_FLAG_RCU)
+		return -ECHILD;
+
 	acl = reiserfs_get_acl(inode, ACL_TYPE_ACCESS);
 
 	if (acl) {
@@ -951,8 +954,10 @@ static int xattr_mount_check(struct super_block *s)
 	return 0;
 }
 
-int reiserfs_permission(struct inode *inode, int mask)
+int reiserfs_permission(struct inode *inode, int mask, unsigned int flags)
 {
+	if (flags & IPERM_FLAG_RCU)
+		return -ECHILD;
 	/*
 	 * We don't do permission checks on the internal objects.
 	 * Permissions are determined by the "owning" object.
@@ -965,9 +970,10 @@ int reiserfs_permission(struct inode *inode, int mask)
 	 * Stat data v1 doesn't support ACLs.
 	 */
 	if (get_inode_sd_version(inode) != STAT_DATA_V1)
-		return generic_permission(inode, mask, reiserfs_check_acl);
+		return generic_permission(inode, mask, flags,
+					reiserfs_check_acl);
 #endif
-	return generic_permission(inode, mask, NULL);
+	return generic_permission(inode, mask, flags, NULL);
 }
 
 static int xattr_hide_revalidate(struct dentry *dentry, struct nameidata *nd)
diff --git a/fs/sysfs/inode.c b/fs/sysfs/inode.c
index cffb1fd8ba33..30ac27345586 100644
--- a/fs/sysfs/inode.c
+++ b/fs/sysfs/inode.c
@@ -348,13 +348,18 @@ int sysfs_hash_and_remove(struct sysfs_dirent *dir_sd, const void *ns, const cha
 		return -ENOENT;
 }
 
-int sysfs_permission(struct inode *inode, int mask)
+int sysfs_permission(struct inode *inode, int mask, unsigned int flags)
 {
-	struct sysfs_dirent *sd = inode->i_private;
+	struct sysfs_dirent *sd;
+
+	if (flags & IPERM_FLAG_RCU)
+		return -ECHILD;
+
+	sd = inode->i_private;
 
 	mutex_lock(&sysfs_mutex);
 	sysfs_refresh_inode(sd, inode);
 	mutex_unlock(&sysfs_mutex);
 
-	return generic_permission(inode, mask, NULL);
+	return generic_permission(inode, mask, flags, NULL);
 }
diff --git a/fs/sysfs/sysfs.h b/fs/sysfs/sysfs.h
index d9be60a2e956..ffaaa816bfba 100644
--- a/fs/sysfs/sysfs.h
+++ b/fs/sysfs/sysfs.h
@@ -200,7 +200,7 @@ static inline void __sysfs_put(struct sysfs_dirent *sd)
 struct inode *sysfs_get_inode(struct super_block *sb, struct sysfs_dirent *sd);
 void sysfs_evict_inode(struct inode *inode);
 int sysfs_sd_setattr(struct sysfs_dirent *sd, struct iattr *iattr);
-int sysfs_permission(struct inode *inode, int mask);
+int sysfs_permission(struct inode *inode, int mask, unsigned int flags);
 int sysfs_setattr(struct dentry *dentry, struct iattr *iattr);
 int sysfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat);
 int sysfs_setxattr(struct dentry *dentry, const char *name, const void *value,
diff --git a/fs/xfs/linux-2.6/xfs_acl.c b/fs/xfs/linux-2.6/xfs_acl.c
index b2771862fd3d..4b11eaf6a580 100644
--- a/fs/xfs/linux-2.6/xfs_acl.c
+++ b/fs/xfs/linux-2.6/xfs_acl.c
@@ -219,12 +219,16 @@ xfs_set_acl(struct inode *inode, int type, struct posix_acl *acl)
 }
 
 int
-xfs_check_acl(struct inode *inode, int mask)
+xfs_check_acl(struct inode *inode, int mask, unsigned int flags)
 {
-	struct xfs_inode *ip = XFS_I(inode);
+	struct xfs_inode *ip;
 	struct posix_acl *acl;
 	int error = -EAGAIN;
 
+	if (flags & IPERM_FLAG_RCU)
+		return -ECHILD;
+
+	ip = XFS_I(inode);
 	trace_xfs_check_acl(ip);
 
 	/*
diff --git a/fs/xfs/xfs_acl.h b/fs/xfs/xfs_acl.h
index 0135e2a669d7..11dd72070cbb 100644
--- a/fs/xfs/xfs_acl.h
+++ b/fs/xfs/xfs_acl.h
@@ -42,7 +42,7 @@ struct xfs_acl {
 #define SGI_ACL_DEFAULT_SIZE	(sizeof(SGI_ACL_DEFAULT)-1)
 
 #ifdef CONFIG_XFS_POSIX_ACL
-extern int xfs_check_acl(struct inode *inode, int mask);
+extern int xfs_check_acl(struct inode *inode, int mask, unsigned int flags);
 extern struct posix_acl *xfs_get_acl(struct inode *inode, int type);
 extern int xfs_inherit_acl(struct inode *inode, struct posix_acl *default_acl);
 extern int xfs_acl_chmod(struct inode *inode);
diff --git a/include/linux/coda_linux.h b/include/linux/coda_linux.h
index 2e914d0771b9..4ccc59c1ea82 100644
--- a/include/linux/coda_linux.h
+++ b/include/linux/coda_linux.h
@@ -37,7 +37,7 @@ extern const struct file_operations coda_ioctl_operations;
 /* operations shared over more than one file */
 int coda_open(struct inode *i, struct file *f);
 int coda_release(struct inode *i, struct file *f);
-int coda_permission(struct inode *inode, int mask);
+int coda_permission(struct inode *inode, int mask, unsigned int flags);
 int coda_revalidate_inode(struct dentry *);
 int coda_getattr(struct vfsmount *, struct dentry *, struct kstat *);
 int coda_setattr(struct dentry *, struct iattr *);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index a04aa96acb71..d5a4d42f655b 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1550,11 +1550,13 @@ struct file_operations {
 	int (*setlease)(struct file *, long, struct file_lock **);
 };
 
+#define IPERM_FLAG_RCU	0x0001
+
 struct inode_operations {
 	struct dentry * (*lookup) (struct inode *,struct dentry *, struct nameidata *);
 	void * (*follow_link) (struct dentry *, struct nameidata *);
-	int (*permission) (struct inode *, int);
-	int (*check_acl)(struct inode *, int);
+	int (*permission) (struct inode *, int, unsigned int);
+	int (*check_acl)(struct inode *, int, unsigned int);
 
 	int (*readlink) (struct dentry *, char __user *,int);
 	void (*put_link) (struct dentry *, struct nameidata *, void *);
@@ -2165,8 +2167,8 @@ extern sector_t bmap(struct inode *, sector_t);
 #endif
 extern int notify_change(struct dentry *, struct iattr *);
 extern int inode_permission(struct inode *, int);
-extern int generic_permission(struct inode *, int,
-		int (*check_acl)(struct inode *, int));
+extern int generic_permission(struct inode *, int, unsigned int,
+		int (*check_acl)(struct inode *, int, unsigned int));
 
 static inline bool execute_ok(struct inode *inode)
 {
diff --git a/include/linux/generic_acl.h b/include/linux/generic_acl.h
index 574bea4013b6..0437e377b555 100644
--- a/include/linux/generic_acl.h
+++ b/include/linux/generic_acl.h
@@ -10,6 +10,6 @@ extern const struct xattr_handler generic_acl_default_handler;
 
 int generic_acl_init(struct inode *, struct inode *);
 int generic_acl_chmod(struct inode *);
-int generic_check_acl(struct inode *inode, int mask);
+int generic_check_acl(struct inode *inode, int mask, unsigned int flags);
 
 #endif /* LINUX_GENERIC_ACL_H */
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index 29d504d5d1c3..0779bb8f95be 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -351,7 +351,7 @@ extern int nfs_refresh_inode(struct inode *, struct nfs_fattr *);
 extern int nfs_post_op_update_inode(struct inode *inode, struct nfs_fattr *fattr);
 extern int nfs_post_op_update_inode_force_wcc(struct inode *inode, struct nfs_fattr *fattr);
 extern int nfs_getattr(struct vfsmount *, struct dentry *, struct kstat *);
-extern int nfs_permission(struct inode *, int);
+extern int nfs_permission(struct inode *, int, unsigned int);
 extern int nfs_open(struct inode *, struct file *);
 extern int nfs_release(struct inode *, struct file *);
 extern int nfs_attribute_timeout(struct inode *inode);
diff --git a/include/linux/reiserfs_xattr.h b/include/linux/reiserfs_xattr.h
index b2cf2089769b..3b94c91f20a6 100644
--- a/include/linux/reiserfs_xattr.h
+++ b/include/linux/reiserfs_xattr.h
@@ -41,7 +41,7 @@ int reiserfs_xattr_init(struct super_block *sb, int mount_flags);
 int reiserfs_lookup_privroot(struct super_block *sb);
 int reiserfs_delete_xattrs(struct inode *inode);
 int reiserfs_chown_xattrs(struct inode *inode, struct iattr *attrs);
-int reiserfs_permission(struct inode *inode, int mask);
+int reiserfs_permission(struct inode *inode, int mask, unsigned int flags);
 
 #ifdef CONFIG_REISERFS_FS_XATTR
 #define has_xattr_dir(inode) (REISERFS_I(inode)->i_flags & i_has_xattr_dir)
-- 
cgit v1.2.3-71-gd317


From 1e1743ebe35ec7e3c1fa732408358fbc614cbbe5 Mon Sep 17 00:00:00 2001
From: Nick Piggin <npiggin@kernel.dk>
Date: Fri, 7 Jan 2011 17:49:59 +1100
Subject: fs: provide simple rcu-walk generic_check_acl implementation

This simple implementation just checks for no ACLs on the inode, and
if so, then the rcu-walk may proceed, otherwise fail it.

This could easily be extended to put acls under RCU and check them
under seqlock, if need be. But this implementation is enough to show
the rcu-walk aware permissions code for path lookups is working, and
will handle cases where there are no ACLs or ACLs in just the final
element.

This patch implicity converts tmpfs to rcu-aware permission check.
Subsequent patches onvert ext*, xfs, and, btrfs. Each of these uses
acl/permission code in a different way, so convert them all to provide
templates and proof of concept.

Signed-off-by: Nick Piggin <npiggin@kernel.dk>
---
 fs/generic_acl.c          | 22 ++++++++++++----------
 include/linux/posix_acl.h | 19 +++++++++++++++++++
 2 files changed, 31 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/fs/generic_acl.c b/fs/generic_acl.c
index 628004282130..06c48a891832 100644
--- a/fs/generic_acl.c
+++ b/fs/generic_acl.c
@@ -192,16 +192,18 @@ generic_acl_chmod(struct inode *inode)
 int
 generic_check_acl(struct inode *inode, int mask, unsigned int flags)
 {
-	struct posix_acl *acl;
-
-	if (flags & IPERM_FLAG_RCU)
-		return -ECHILD;
-
-	acl = get_cached_acl(inode, ACL_TYPE_ACCESS);
-	if (acl) {
-		int error = posix_acl_permission(inode, acl, mask);
-		posix_acl_release(acl);
-		return error;
+	if (flags & IPERM_FLAG_RCU) {
+		if (!negative_cached_acl(inode, ACL_TYPE_ACCESS))
+			return -ECHILD;
+	} else {
+		struct posix_acl *acl;
+
+		acl = get_cached_acl(inode, ACL_TYPE_ACCESS);
+		if (acl) {
+			int error = posix_acl_permission(inode, acl, mask);
+			posix_acl_release(acl);
+			return error;
+		}
 	}
 	return -EAGAIN;
 }
diff --git a/include/linux/posix_acl.h b/include/linux/posix_acl.h
index 67608161df6b..d68283a898bb 100644
--- a/include/linux/posix_acl.h
+++ b/include/linux/posix_acl.h
@@ -108,6 +108,25 @@ static inline struct posix_acl *get_cached_acl(struct inode *inode, int type)
 	return acl;
 }
 
+static inline int negative_cached_acl(struct inode *inode, int type)
+{
+	struct posix_acl **p, *acl;
+	switch (type) {
+	case ACL_TYPE_ACCESS:
+		p = &inode->i_acl;
+		break;
+	case ACL_TYPE_DEFAULT:
+		p = &inode->i_default_acl;
+		break;
+	default:
+		BUG();
+	}
+	acl = ACCESS_ONCE(*p);
+	if (acl)
+		return 0;
+	return 1;
+}
+
 static inline void set_cached_acl(struct inode *inode,
 				  int type,
 				  struct posix_acl *acl)
-- 
cgit v1.2.3-71-gd317


From 4e35e6070b1ceed89c3bba2af4216c286fb1dafd Mon Sep 17 00:00:00 2001
From: Nick Piggin <npiggin@kernel.dk>
Date: Fri, 7 Jan 2011 17:50:03 +1100
Subject: kernel: add bl_list

Introduce a type of hlist that can support the use of the lowest bit in the
hlist_head. This will be subsequently used to implement per-bucket bit spinlock
for inode and dentry hashes, and may be useful in other cases such as network
hashes.

Reviewed-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Signed-off-by: Nick Piggin <npiggin@kernel.dk>
---
 include/linux/list_bl.h    | 144 +++++++++++++++++++++++++++++++++++++++++++++
 include/linux/rculist_bl.h | 127 +++++++++++++++++++++++++++++++++++++++
 2 files changed, 271 insertions(+)
 create mode 100644 include/linux/list_bl.h
 create mode 100644 include/linux/rculist_bl.h

(limited to 'include/linux')

diff --git a/include/linux/list_bl.h b/include/linux/list_bl.h
new file mode 100644
index 000000000000..9ee97e7f2be4
--- /dev/null
+++ b/include/linux/list_bl.h
@@ -0,0 +1,144 @@
+#ifndef _LINUX_LIST_BL_H
+#define _LINUX_LIST_BL_H
+
+#include <linux/list.h>
+
+/*
+ * Special version of lists, where head of the list has a lock in the lowest
+ * bit. This is useful for scalable hash tables without increasing memory
+ * footprint overhead.
+ *
+ * For modification operations, the 0 bit of hlist_bl_head->first
+ * pointer must be set.
+ *
+ * With some small modifications, this can easily be adapted to store several
+ * arbitrary bits (not just a single lock bit), if the need arises to store
+ * some fast and compact auxiliary data.
+ */
+
+#if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK)
+#define LIST_BL_LOCKMASK	1UL
+#else
+#define LIST_BL_LOCKMASK	0UL
+#endif
+
+#ifdef CONFIG_DEBUG_LIST
+#define LIST_BL_BUG_ON(x) BUG_ON(x)
+#else
+#define LIST_BL_BUG_ON(x)
+#endif
+
+
+struct hlist_bl_head {
+	struct hlist_bl_node *first;
+};
+
+struct hlist_bl_node {
+	struct hlist_bl_node *next, **pprev;
+};
+#define INIT_HLIST_BL_HEAD(ptr) \
+	((ptr)->first = NULL)
+
+static inline void INIT_HLIST_BL_NODE(struct hlist_bl_node *h)
+{
+	h->next = NULL;
+	h->pprev = NULL;
+}
+
+#define hlist_bl_entry(ptr, type, member) container_of(ptr,type,member)
+
+static inline int hlist_bl_unhashed(const struct hlist_bl_node *h)
+{
+	return !h->pprev;
+}
+
+static inline struct hlist_bl_node *hlist_bl_first(struct hlist_bl_head *h)
+{
+	return (struct hlist_bl_node *)
+		((unsigned long)h->first & ~LIST_BL_LOCKMASK);
+}
+
+static inline void hlist_bl_set_first(struct hlist_bl_head *h,
+					struct hlist_bl_node *n)
+{
+	LIST_BL_BUG_ON((unsigned long)n & LIST_BL_LOCKMASK);
+	LIST_BL_BUG_ON(!((unsigned long)h->first & LIST_BL_LOCKMASK));
+	h->first = (struct hlist_bl_node *)((unsigned long)n | LIST_BL_LOCKMASK);
+}
+
+static inline int hlist_bl_empty(const struct hlist_bl_head *h)
+{
+	return !((unsigned long)h->first & ~LIST_BL_LOCKMASK);
+}
+
+static inline void hlist_bl_add_head(struct hlist_bl_node *n,
+					struct hlist_bl_head *h)
+{
+	struct hlist_bl_node *first = hlist_bl_first(h);
+
+	n->next = first;
+	if (first)
+		first->pprev = &n->next;
+	n->pprev = &h->first;
+	hlist_bl_set_first(h, n);
+}
+
+static inline void __hlist_bl_del(struct hlist_bl_node *n)
+{
+	struct hlist_bl_node *next = n->next;
+	struct hlist_bl_node **pprev = n->pprev;
+
+	LIST_BL_BUG_ON((unsigned long)n & LIST_BL_LOCKMASK);
+
+	/* pprev may be `first`, so be careful not to lose the lock bit */
+	*pprev = (struct hlist_bl_node *)
+			((unsigned long)next |
+			 ((unsigned long)*pprev & LIST_BL_LOCKMASK));
+	if (next)
+		next->pprev = pprev;
+}
+
+static inline void hlist_bl_del(struct hlist_bl_node *n)
+{
+	__hlist_bl_del(n);
+	n->next = LIST_POISON1;
+	n->pprev = LIST_POISON2;
+}
+
+static inline void hlist_bl_del_init(struct hlist_bl_node *n)
+{
+	if (!hlist_bl_unhashed(n)) {
+		__hlist_bl_del(n);
+		INIT_HLIST_BL_NODE(n);
+	}
+}
+
+/**
+ * hlist_bl_for_each_entry	- iterate over list of given type
+ * @tpos:	the type * to use as a loop cursor.
+ * @pos:	the &struct hlist_node to use as a loop cursor.
+ * @head:	the head for your list.
+ * @member:	the name of the hlist_node within the struct.
+ *
+ */
+#define hlist_bl_for_each_entry(tpos, pos, head, member)		\
+	for (pos = hlist_bl_first(head);				\
+	     pos &&							\
+		({ tpos = hlist_bl_entry(pos, typeof(*tpos), member); 1;}); \
+	     pos = pos->next)
+
+/**
+ * hlist_bl_for_each_entry_safe - iterate over list of given type safe against removal of list entry
+ * @tpos:	the type * to use as a loop cursor.
+ * @pos:	the &struct hlist_node to use as a loop cursor.
+ * @n:		another &struct hlist_node to use as temporary storage
+ * @head:	the head for your list.
+ * @member:	the name of the hlist_node within the struct.
+ */
+#define hlist_bl_for_each_entry_safe(tpos, pos, n, head, member)	 \
+	for (pos = hlist_bl_first(head);				 \
+	     pos && ({ n = pos->next; 1; }) && 				 \
+		({ tpos = hlist_bl_entry(pos, typeof(*tpos), member); 1;}); \
+	     pos = n)
+
+#endif
diff --git a/include/linux/rculist_bl.h b/include/linux/rculist_bl.h
new file mode 100644
index 000000000000..b872b493724d
--- /dev/null
+++ b/include/linux/rculist_bl.h
@@ -0,0 +1,127 @@
+#ifndef _LINUX_RCULIST_BL_H
+#define _LINUX_RCULIST_BL_H
+
+/*
+ * RCU-protected bl list version. See include/linux/list_bl.h.
+ */
+#include <linux/list_bl.h>
+#include <linux/rcupdate.h>
+
+static inline void hlist_bl_set_first_rcu(struct hlist_bl_head *h,
+					struct hlist_bl_node *n)
+{
+	LIST_BL_BUG_ON((unsigned long)n & LIST_BL_LOCKMASK);
+	LIST_BL_BUG_ON(!((unsigned long)h->first & LIST_BL_LOCKMASK));
+	rcu_assign_pointer(h->first,
+		(struct hlist_bl_node *)((unsigned long)n | LIST_BL_LOCKMASK));
+}
+
+static inline struct hlist_bl_node *hlist_bl_first_rcu(struct hlist_bl_head *h)
+{
+	return (struct hlist_bl_node *)
+		((unsigned long)rcu_dereference(h->first) & ~LIST_BL_LOCKMASK);
+}
+
+/**
+ * hlist_bl_del_init_rcu - deletes entry from hash list with re-initialization
+ * @n: the element to delete from the hash list.
+ *
+ * Note: hlist_bl_unhashed() on the node returns true after this. It is
+ * useful for RCU based read lockfree traversal if the writer side
+ * must know if the list entry is still hashed or already unhashed.
+ *
+ * In particular, it means that we can not poison the forward pointers
+ * that may still be used for walking the hash list and we can only
+ * zero the pprev pointer so list_unhashed() will return true after
+ * this.
+ *
+ * The caller must take whatever precautions are necessary (such as
+ * holding appropriate locks) to avoid racing with another
+ * list-mutation primitive, such as hlist_bl_add_head_rcu() or
+ * hlist_bl_del_rcu(), running on this same list.  However, it is
+ * perfectly legal to run concurrently with the _rcu list-traversal
+ * primitives, such as hlist_bl_for_each_entry_rcu().
+ */
+static inline void hlist_bl_del_init_rcu(struct hlist_bl_node *n)
+{
+	if (!hlist_bl_unhashed(n)) {
+		__hlist_bl_del(n);
+		n->pprev = NULL;
+	}
+}
+
+/**
+ * hlist_bl_del_rcu - deletes entry from hash list without re-initialization
+ * @n: the element to delete from the hash list.
+ *
+ * Note: hlist_bl_unhashed() on entry does not return true after this,
+ * the entry is in an undefined state. It is useful for RCU based
+ * lockfree traversal.
+ *
+ * In particular, it means that we can not poison the forward
+ * pointers that may still be used for walking the hash list.
+ *
+ * The caller must take whatever precautions are necessary
+ * (such as holding appropriate locks) to avoid racing
+ * with another list-mutation primitive, such as hlist_bl_add_head_rcu()
+ * or hlist_bl_del_rcu(), running on this same list.
+ * However, it is perfectly legal to run concurrently with
+ * the _rcu list-traversal primitives, such as
+ * hlist_bl_for_each_entry().
+ */
+static inline void hlist_bl_del_rcu(struct hlist_bl_node *n)
+{
+	__hlist_bl_del(n);
+	n->pprev = LIST_POISON2;
+}
+
+/**
+ * hlist_bl_add_head_rcu
+ * @n: the element to add to the hash list.
+ * @h: the list to add to.
+ *
+ * Description:
+ * Adds the specified element to the specified hlist_bl,
+ * while permitting racing traversals.
+ *
+ * The caller must take whatever precautions are necessary
+ * (such as holding appropriate locks) to avoid racing
+ * with another list-mutation primitive, such as hlist_bl_add_head_rcu()
+ * or hlist_bl_del_rcu(), running on this same list.
+ * However, it is perfectly legal to run concurrently with
+ * the _rcu list-traversal primitives, such as
+ * hlist_bl_for_each_entry_rcu(), used to prevent memory-consistency
+ * problems on Alpha CPUs.  Regardless of the type of CPU, the
+ * list-traversal primitive must be guarded by rcu_read_lock().
+ */
+static inline void hlist_bl_add_head_rcu(struct hlist_bl_node *n,
+					struct hlist_bl_head *h)
+{
+	struct hlist_bl_node *first;
+
+	/* don't need hlist_bl_first_rcu because we're under lock */
+	first = hlist_bl_first(h);
+
+	n->next = first;
+	if (first)
+		first->pprev = &n->next;
+	n->pprev = &h->first;
+
+	/* need _rcu because we can have concurrent lock free readers */
+	hlist_bl_set_first_rcu(h, n);
+}
+/**
+ * hlist_bl_for_each_entry_rcu - iterate over rcu list of given type
+ * @tpos:	the type * to use as a loop cursor.
+ * @pos:	the &struct hlist_bl_node to use as a loop cursor.
+ * @head:	the head for your list.
+ * @member:	the name of the hlist_bl_node within the struct.
+ *
+ */
+#define hlist_bl_for_each_entry_rcu(tpos, pos, head, member)		\
+	for (pos = hlist_bl_first_rcu(head);				\
+		pos &&							\
+		({ tpos = hlist_bl_entry(pos, typeof(*tpos), member); 1; }); \
+		pos = rcu_dereference_raw(pos->next))
+
+#endif
-- 
cgit v1.2.3-71-gd317


From 626d607435617cc0f033522083e2bb195b81813c Mon Sep 17 00:00:00 2001
From: Nick Piggin <npiggin@kernel.dk>
Date: Fri, 7 Jan 2011 17:50:04 +1100
Subject: bit_spinlock: add required includes

Signed-off-by: Nick Piggin <npiggin@kernel.dk>
---
 include/linux/bit_spinlock.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/bit_spinlock.h b/include/linux/bit_spinlock.h
index 7113a32a86ea..e612575a2596 100644
--- a/include/linux/bit_spinlock.h
+++ b/include/linux/bit_spinlock.h
@@ -1,6 +1,10 @@
 #ifndef __LINUX_BIT_SPINLOCK_H
 #define __LINUX_BIT_SPINLOCK_H
 
+#include <linux/kernel.h>
+#include <linux/preempt.h>
+#include <asm/atomic.h>
+
 /*
  *  bit-based spin_lock()
  *
-- 
cgit v1.2.3-71-gd317


From ceb5bdc2d246f6d81cf61ed70f325308a11821d2 Mon Sep 17 00:00:00 2001
From: Nick Piggin <npiggin@kernel.dk>
Date: Fri, 7 Jan 2011 17:50:05 +1100
Subject: fs: dcache per-bucket dcache hash locking

We can turn the dcache hash locking from a global dcache_hash_lock into
per-bucket locking.

Signed-off-by: Nick Piggin <npiggin@kernel.dk>
---
 fs/dcache.c            | 133 ++++++++++++++++++++++++++++++-------------------
 fs/super.c             |   3 +-
 include/linux/dcache.h |   3 +-
 include/linux/fs.h     |   3 +-
 4 files changed, 89 insertions(+), 53 deletions(-)

(limited to 'include/linux')

diff --git a/fs/dcache.c b/fs/dcache.c
index 07d1f6862dc7..9f04e1ba75b7 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -33,14 +33,18 @@
 #include <linux/bootmem.h>
 #include <linux/fs_struct.h>
 #include <linux/hardirq.h>
+#include <linux/bit_spinlock.h>
+#include <linux/rculist_bl.h>
 #include "internal.h"
 
 /*
  * Usage:
  * dcache_inode_lock protects:
  *   - i_dentry, d_alias, d_inode
- * dcache_hash_lock protects:
- *   - the dcache hash table, s_anon lists
+ * dcache_hash_bucket lock protects:
+ *   - the dcache hash table
+ * s_anon bl list spinlock protects:
+ *   - the s_anon list (see __d_drop)
  * dcache_lru_lock protects:
  *   - the dcache lru lists and counters
  * d_lock protects:
@@ -57,7 +61,8 @@
  * dcache_inode_lock
  *   dentry->d_lock
  *     dcache_lru_lock
- *     dcache_hash_lock
+ *     dcache_hash_bucket lock
+ *     s_anon lock
  *
  * If there is an ancestor relationship:
  * dentry->d_parent->...->d_parent->d_lock
@@ -74,7 +79,6 @@ int sysctl_vfs_cache_pressure __read_mostly = 100;
 EXPORT_SYMBOL_GPL(sysctl_vfs_cache_pressure);
 
 __cacheline_aligned_in_smp DEFINE_SPINLOCK(dcache_inode_lock);
-static __cacheline_aligned_in_smp DEFINE_SPINLOCK(dcache_hash_lock);
 static __cacheline_aligned_in_smp DEFINE_SPINLOCK(dcache_lru_lock);
 __cacheline_aligned_in_smp DEFINE_SEQLOCK(rename_lock);
 
@@ -96,7 +100,29 @@ static struct kmem_cache *dentry_cache __read_mostly;
 
 static unsigned int d_hash_mask __read_mostly;
 static unsigned int d_hash_shift __read_mostly;
-static struct hlist_head *dentry_hashtable __read_mostly;
+
+struct dcache_hash_bucket {
+	struct hlist_bl_head head;
+};
+static struct dcache_hash_bucket *dentry_hashtable __read_mostly;
+
+static inline struct dcache_hash_bucket *d_hash(struct dentry *parent,
+					unsigned long hash)
+{
+	hash += ((unsigned long) parent ^ GOLDEN_RATIO_PRIME) / L1_CACHE_BYTES;
+	hash = hash ^ ((hash ^ GOLDEN_RATIO_PRIME) >> D_HASHBITS);
+	return dentry_hashtable + (hash & D_HASHMASK);
+}
+
+static inline void spin_lock_bucket(struct dcache_hash_bucket *b)
+{
+	bit_spin_lock(0, (unsigned long *)&b->head.first);
+}
+
+static inline void spin_unlock_bucket(struct dcache_hash_bucket *b)
+{
+	__bit_spin_unlock(0, (unsigned long *)&b->head.first);
+}
 
 /* Statistics gathering. */
 struct dentry_stat_t dentry_stat = {
@@ -144,7 +170,7 @@ static void d_free(struct dentry *dentry)
 		dentry->d_op->d_release(dentry);
 
 	/* if dentry was never inserted into hash, immediate free is OK */
-	if (hlist_unhashed(&dentry->d_hash))
+	if (hlist_bl_unhashed(&dentry->d_hash))
 		__d_free(&dentry->d_u.d_rcu);
 	else
 		call_rcu(&dentry->d_u.d_rcu, __d_free);
@@ -302,11 +328,27 @@ static struct dentry *d_kill(struct dentry *dentry, struct dentry *parent)
 void __d_drop(struct dentry *dentry)
 {
 	if (!(dentry->d_flags & DCACHE_UNHASHED)) {
-		dentry->d_flags |= DCACHE_UNHASHED;
-		spin_lock(&dcache_hash_lock);
-		hlist_del_rcu(&dentry->d_hash);
-		spin_unlock(&dcache_hash_lock);
-		dentry_rcuwalk_barrier(dentry);
+		if (unlikely(dentry->d_flags & DCACHE_DISCONNECTED)) {
+			bit_spin_lock(0,
+				(unsigned long *)&dentry->d_sb->s_anon.first);
+			dentry->d_flags |= DCACHE_UNHASHED;
+			hlist_bl_del_init(&dentry->d_hash);
+			__bit_spin_unlock(0,
+				(unsigned long *)&dentry->d_sb->s_anon.first);
+		} else {
+			struct dcache_hash_bucket *b;
+			b = d_hash(dentry->d_parent, dentry->d_name.hash);
+			spin_lock_bucket(b);
+			/*
+			 * We may not actually need to put DCACHE_UNHASHED
+			 * manipulations under the hash lock, but follow
+			 * the principle of least surprise.
+			 */
+			dentry->d_flags |= DCACHE_UNHASHED;
+			hlist_bl_del_rcu(&dentry->d_hash);
+			spin_unlock_bucket(b);
+			dentry_rcuwalk_barrier(dentry);
+		}
 	}
 }
 EXPORT_SYMBOL(__d_drop);
@@ -961,8 +1003,8 @@ void shrink_dcache_for_umount(struct super_block *sb)
 	spin_unlock(&dentry->d_lock);
 	shrink_dcache_for_umount_subtree(dentry);
 
-	while (!hlist_empty(&sb->s_anon)) {
-		dentry = hlist_entry(sb->s_anon.first, struct dentry, d_hash);
+	while (!hlist_bl_empty(&sb->s_anon)) {
+		dentry = hlist_bl_entry(hlist_bl_first(&sb->s_anon), struct dentry, d_hash);
 		shrink_dcache_for_umount_subtree(dentry);
 	}
 }
@@ -1263,7 +1305,7 @@ struct dentry *d_alloc(struct dentry * parent, const struct qstr *name)
 	dentry->d_sb = NULL;
 	dentry->d_op = NULL;
 	dentry->d_fsdata = NULL;
-	INIT_HLIST_NODE(&dentry->d_hash);
+	INIT_HLIST_BL_NODE(&dentry->d_hash);
 	INIT_LIST_HEAD(&dentry->d_lru);
 	INIT_LIST_HEAD(&dentry->d_subdirs);
 	INIT_LIST_HEAD(&dentry->d_alias);
@@ -1459,14 +1501,6 @@ struct dentry * d_alloc_root(struct inode * root_inode)
 }
 EXPORT_SYMBOL(d_alloc_root);
 
-static inline struct hlist_head *d_hash(struct dentry *parent,
-					unsigned long hash)
-{
-	hash += ((unsigned long) parent ^ GOLDEN_RATIO_PRIME) / L1_CACHE_BYTES;
-	hash = hash ^ ((hash ^ GOLDEN_RATIO_PRIME) >> D_HASHBITS);
-	return dentry_hashtable + (hash & D_HASHMASK);
-}
-
 /**
  * d_obtain_alias - find or allocate a dentry for a given inode
  * @inode: inode to allocate the dentry for
@@ -1521,11 +1555,11 @@ struct dentry *d_obtain_alias(struct inode *inode)
 	tmp->d_sb = inode->i_sb;
 	tmp->d_inode = inode;
 	tmp->d_flags |= DCACHE_DISCONNECTED;
-	tmp->d_flags &= ~DCACHE_UNHASHED;
 	list_add(&tmp->d_alias, &inode->i_dentry);
-	spin_lock(&dcache_hash_lock);
-	hlist_add_head(&tmp->d_hash, &inode->i_sb->s_anon);
-	spin_unlock(&dcache_hash_lock);
+	bit_spin_lock(0, (unsigned long *)&tmp->d_sb->s_anon.first);
+	tmp->d_flags &= ~DCACHE_UNHASHED;
+	hlist_bl_add_head(&tmp->d_hash, &tmp->d_sb->s_anon);
+	__bit_spin_unlock(0, (unsigned long *)&tmp->d_sb->s_anon.first);
 	spin_unlock(&tmp->d_lock);
 	spin_unlock(&dcache_inode_lock);
 
@@ -1567,7 +1601,7 @@ struct dentry *d_splice_alias(struct inode *inode, struct dentry *dentry)
 			d_move(new, dentry);
 			iput(inode);
 		} else {
-			/* already taking dcache_inode_lock, so d_add() by hand */
+			/* already got dcache_inode_lock, so d_add() by hand */
 			__d_instantiate(dentry, inode);
 			spin_unlock(&dcache_inode_lock);
 			security_d_instantiate(dentry, inode);
@@ -1702,8 +1736,8 @@ struct dentry *__d_lookup_rcu(struct dentry *parent, struct qstr *name,
 	unsigned int len = name->len;
 	unsigned int hash = name->hash;
 	const unsigned char *str = name->name;
-	struct hlist_head *head = d_hash(parent, hash);
-	struct hlist_node *node;
+	struct dcache_hash_bucket *b = d_hash(parent, hash);
+	struct hlist_bl_node *node;
 	struct dentry *dentry;
 
 	/*
@@ -1726,7 +1760,7 @@ struct dentry *__d_lookup_rcu(struct dentry *parent, struct qstr *name,
 	 *
 	 * See Documentation/vfs/dcache-locking.txt for more details.
 	 */
-	hlist_for_each_entry_rcu(dentry, node, head, d_hash) {
+	hlist_bl_for_each_entry_rcu(dentry, node, &b->head, d_hash) {
 		struct inode *i;
 		const char *tname;
 		int tlen;
@@ -1820,8 +1854,8 @@ struct dentry *__d_lookup(struct dentry *parent, struct qstr *name)
 	unsigned int len = name->len;
 	unsigned int hash = name->hash;
 	const unsigned char *str = name->name;
-	struct hlist_head *head = d_hash(parent,hash);
-	struct hlist_node *node;
+	struct dcache_hash_bucket *b = d_hash(parent, hash);
+	struct hlist_bl_node *node;
 	struct dentry *found = NULL;
 	struct dentry *dentry;
 
@@ -1847,7 +1881,7 @@ struct dentry *__d_lookup(struct dentry *parent, struct qstr *name)
 	 */
 	rcu_read_lock();
 	
-	hlist_for_each_entry_rcu(dentry, node, head, d_hash) {
+	hlist_bl_for_each_entry_rcu(dentry, node, &b->head, d_hash) {
 		const char *tname;
 		int tlen;
 
@@ -1998,11 +2032,13 @@ again:
 }
 EXPORT_SYMBOL(d_delete);
 
-static void __d_rehash(struct dentry * entry, struct hlist_head *list)
+static void __d_rehash(struct dentry * entry, struct dcache_hash_bucket *b)
 {
-
+	BUG_ON(!d_unhashed(entry));
+	spin_lock_bucket(b);
  	entry->d_flags &= ~DCACHE_UNHASHED;
- 	hlist_add_head_rcu(&entry->d_hash, list);
+	hlist_bl_add_head_rcu(&entry->d_hash, &b->head);
+	spin_unlock_bucket(b);
 }
 
 static void _d_rehash(struct dentry * entry)
@@ -2020,9 +2056,7 @@ static void _d_rehash(struct dentry * entry)
 void d_rehash(struct dentry * entry)
 {
 	spin_lock(&entry->d_lock);
-	spin_lock(&dcache_hash_lock);
 	_d_rehash(entry);
-	spin_unlock(&dcache_hash_lock);
 	spin_unlock(&entry->d_lock);
 }
 EXPORT_SYMBOL(d_rehash);
@@ -2165,15 +2199,16 @@ void d_move(struct dentry * dentry, struct dentry * target)
 	write_seqcount_begin(&dentry->d_seq);
 	write_seqcount_begin(&target->d_seq);
 
-	/* Move the dentry to the target hash queue, if on different bucket */
-	spin_lock(&dcache_hash_lock);
-	if (!d_unhashed(dentry))
-		hlist_del_rcu(&dentry->d_hash);
+	/* __d_drop does write_seqcount_barrier, but they're OK to nest. */
+
+	/*
+	 * Move the dentry to the target hash queue. Don't bother checking
+	 * for the same hash queue because of how unlikely it is.
+	 */
+	__d_drop(dentry);
 	__d_rehash(dentry, d_hash(target->d_parent, target->d_name.hash));
-	spin_unlock(&dcache_hash_lock);
 
 	/* Unhash the target: dput() will then get rid of it */
-	/* __d_drop does write_seqcount_barrier, but they're OK to nest. */
 	__d_drop(target);
 
 	list_del(&dentry->d_u.d_child);
@@ -2369,9 +2404,7 @@ struct dentry *d_materialise_unique(struct dentry *dentry, struct inode *inode)
 
 	spin_lock(&actual->d_lock);
 found:
-	spin_lock(&dcache_hash_lock);
 	_d_rehash(actual);
-	spin_unlock(&dcache_hash_lock);
 	spin_unlock(&actual->d_lock);
 	spin_unlock(&dcache_inode_lock);
 out_nolock:
@@ -2953,7 +2986,7 @@ static void __init dcache_init_early(void)
 
 	dentry_hashtable =
 		alloc_large_system_hash("Dentry cache",
-					sizeof(struct hlist_head),
+					sizeof(struct dcache_hash_bucket),
 					dhash_entries,
 					13,
 					HASH_EARLY,
@@ -2962,7 +2995,7 @@ static void __init dcache_init_early(void)
 					0);
 
 	for (loop = 0; loop < (1 << d_hash_shift); loop++)
-		INIT_HLIST_HEAD(&dentry_hashtable[loop]);
+		INIT_HLIST_BL_HEAD(&dentry_hashtable[loop].head);
 }
 
 static void __init dcache_init(void)
@@ -2985,7 +3018,7 @@ static void __init dcache_init(void)
 
 	dentry_hashtable =
 		alloc_large_system_hash("Dentry cache",
-					sizeof(struct hlist_head),
+					sizeof(struct dcache_hash_bucket),
 					dhash_entries,
 					13,
 					0,
@@ -2994,7 +3027,7 @@ static void __init dcache_init(void)
 					0);
 
 	for (loop = 0; loop < (1 << d_hash_shift); loop++)
-		INIT_HLIST_HEAD(&dentry_hashtable[loop]);
+		INIT_HLIST_BL_HEAD(&dentry_hashtable[loop].head);
 }
 
 /* SLAB cache for __getname() consumers */
diff --git a/fs/super.c b/fs/super.c
index ca696155cd9a..968ba013011a 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -30,6 +30,7 @@
 #include <linux/idr.h>
 #include <linux/mutex.h>
 #include <linux/backing-dev.h>
+#include <linux/rculist_bl.h>
 #include "internal.h"
 
 
@@ -71,7 +72,7 @@ static struct super_block *alloc_super(struct file_system_type *type)
 		INIT_LIST_HEAD(&s->s_files);
 #endif
 		INIT_LIST_HEAD(&s->s_instances);
-		INIT_HLIST_HEAD(&s->s_anon);
+		INIT_HLIST_BL_HEAD(&s->s_anon);
 		INIT_LIST_HEAD(&s->s_inodes);
 		INIT_LIST_HEAD(&s->s_dentry_lru);
 		init_rwsem(&s->s_umount);
diff --git a/include/linux/dcache.h b/include/linux/dcache.h
index 8b2064d02928..5f0392ef759b 100644
--- a/include/linux/dcache.h
+++ b/include/linux/dcache.h
@@ -4,6 +4,7 @@
 #include <asm/atomic.h>
 #include <linux/list.h>
 #include <linux/rculist.h>
+#include <linux/rculist_bl.h>
 #include <linux/spinlock.h>
 #include <linux/seqlock.h>
 #include <linux/cache.h>
@@ -95,7 +96,7 @@ struct dentry {
 	/* RCU lookup touched fields */
 	unsigned int d_flags;		/* protected by d_lock */
 	seqcount_t d_seq;		/* per dentry seqlock */
-	struct hlist_node d_hash;	/* lookup hash list */
+	struct hlist_bl_node d_hash;	/* lookup hash list */
 	struct dentry *d_parent;	/* parent directory */
 	struct qstr d_name;
 	struct inode *d_inode;		/* Where the name belongs to - NULL is
diff --git a/include/linux/fs.h b/include/linux/fs.h
index d5a4d42f655b..baf3e556ff0e 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -392,6 +392,7 @@ struct inodes_stat_t {
 #include <linux/capability.h>
 #include <linux/semaphore.h>
 #include <linux/fiemap.h>
+#include <linux/rculist_bl.h>
 
 #include <asm/atomic.h>
 #include <asm/byteorder.h>
@@ -1377,7 +1378,7 @@ struct super_block {
 	const struct xattr_handler **s_xattr;
 
 	struct list_head	s_inodes;	/* all inodes */
-	struct hlist_head	s_anon;		/* anonymous dentries for (nfs) exporting */
+	struct hlist_bl_head	s_anon;		/* anonymous dentries for (nfs) exporting */
 #ifdef CONFIG_SMP
 	struct list_head __percpu *s_files;
 #else
-- 
cgit v1.2.3-71-gd317


From 873feea09ebc980cbd3631b767356ce1eee65ec1 Mon Sep 17 00:00:00 2001
From: Nick Piggin <npiggin@kernel.dk>
Date: Fri, 7 Jan 2011 17:50:06 +1100
Subject: fs: dcache per-inode inode alias locking

dcache_inode_lock can be replaced with per-inode locking. Use existing
inode->i_lock for this. This is slightly non-trivial because we sometimes
need to find the inode from the dentry, which requires d_inode to be
stabilised (either with refcount or d_lock).

Signed-off-by: Nick Piggin <npiggin@kernel.dk>
---
 fs/9p/vfs_inode.c      |  4 +--
 fs/affs/amigaffs.c     |  4 +--
 fs/cifs/inode.c        |  6 ++--
 fs/dcache.c            | 88 +++++++++++++++++++++++++++-----------------------
 fs/exportfs/expfs.c    | 12 ++++---
 fs/nfs/getroot.c       |  4 +--
 fs/notify/fsnotify.c   |  4 +--
 fs/ocfs2/dcache.c      |  4 +--
 include/linux/dcache.h |  1 -
 9 files changed, 67 insertions(+), 60 deletions(-)

(limited to 'include/linux')

diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c
index df8bbb358d54..59782981b225 100644
--- a/fs/9p/vfs_inode.c
+++ b/fs/9p/vfs_inode.c
@@ -277,11 +277,11 @@ static struct dentry *v9fs_dentry_from_dir_inode(struct inode *inode)
 {
 	struct dentry *dentry;
 
-	spin_lock(&dcache_inode_lock);
+	spin_lock(&inode->i_lock);
 	/* Directory should have only one entry. */
 	BUG_ON(S_ISDIR(inode->i_mode) && !list_is_singular(&inode->i_dentry));
 	dentry = list_entry(inode->i_dentry.next, struct dentry, d_alias);
-	spin_unlock(&dcache_inode_lock);
+	spin_unlock(&inode->i_lock);
 	return dentry;
 }
 
diff --git a/fs/affs/amigaffs.c b/fs/affs/amigaffs.c
index 600101a21ba2..3a4557e8325c 100644
--- a/fs/affs/amigaffs.c
+++ b/fs/affs/amigaffs.c
@@ -128,7 +128,7 @@ affs_fix_dcache(struct dentry *dentry, u32 entry_ino)
 	void *data = dentry->d_fsdata;
 	struct list_head *head, *next;
 
-	spin_lock(&dcache_inode_lock);
+	spin_lock(&inode->i_lock);
 	head = &inode->i_dentry;
 	next = head->next;
 	while (next != head) {
@@ -139,7 +139,7 @@ affs_fix_dcache(struct dentry *dentry, u32 entry_ino)
 		}
 		next = next->next;
 	}
-	spin_unlock(&dcache_inode_lock);
+	spin_unlock(&inode->i_lock);
 }
 
 
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index 2a239d878e85..a853a89857a5 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -809,14 +809,14 @@ inode_has_hashed_dentries(struct inode *inode)
 {
 	struct dentry *dentry;
 
-	spin_lock(&dcache_inode_lock);
+	spin_lock(&inode->i_lock);
 	list_for_each_entry(dentry, &inode->i_dentry, d_alias) {
 		if (!d_unhashed(dentry) || IS_ROOT(dentry)) {
-			spin_unlock(&dcache_inode_lock);
+			spin_unlock(&inode->i_lock);
 			return true;
 		}
 	}
-	spin_unlock(&dcache_inode_lock);
+	spin_unlock(&inode->i_lock);
 	return false;
 }
 
diff --git a/fs/dcache.c b/fs/dcache.c
index 9f04e1ba75b7..09ec945f3c98 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -39,8 +39,8 @@
 
 /*
  * Usage:
- * dcache_inode_lock protects:
- *   - i_dentry, d_alias, d_inode
+ * dcache->d_inode->i_lock protects:
+ *   - i_dentry, d_alias, d_inode of aliases
  * dcache_hash_bucket lock protects:
  *   - the dcache hash table
  * s_anon bl list spinlock protects:
@@ -58,7 +58,7 @@
  *   - d_alias, d_inode
  *
  * Ordering:
- * dcache_inode_lock
+ * dentry->d_inode->i_lock
  *   dentry->d_lock
  *     dcache_lru_lock
  *     dcache_hash_bucket lock
@@ -78,12 +78,10 @@
 int sysctl_vfs_cache_pressure __read_mostly = 100;
 EXPORT_SYMBOL_GPL(sysctl_vfs_cache_pressure);
 
-__cacheline_aligned_in_smp DEFINE_SPINLOCK(dcache_inode_lock);
 static __cacheline_aligned_in_smp DEFINE_SPINLOCK(dcache_lru_lock);
 __cacheline_aligned_in_smp DEFINE_SEQLOCK(rename_lock);
 
 EXPORT_SYMBOL(rename_lock);
-EXPORT_SYMBOL(dcache_inode_lock);
 
 static struct kmem_cache *dentry_cache __read_mostly;
 
@@ -196,14 +194,14 @@ static inline void dentry_rcuwalk_barrier(struct dentry *dentry)
  */
 static void dentry_iput(struct dentry * dentry)
 	__releases(dentry->d_lock)
-	__releases(dcache_inode_lock)
+	__releases(dentry->d_inode->i_lock)
 {
 	struct inode *inode = dentry->d_inode;
 	if (inode) {
 		dentry->d_inode = NULL;
 		list_del_init(&dentry->d_alias);
 		spin_unlock(&dentry->d_lock);
-		spin_unlock(&dcache_inode_lock);
+		spin_unlock(&inode->i_lock);
 		if (!inode->i_nlink)
 			fsnotify_inoderemove(inode);
 		if (dentry->d_op && dentry->d_op->d_iput)
@@ -212,7 +210,6 @@ static void dentry_iput(struct dentry * dentry)
 			iput(inode);
 	} else {
 		spin_unlock(&dentry->d_lock);
-		spin_unlock(&dcache_inode_lock);
 	}
 }
 
@@ -222,14 +219,14 @@ static void dentry_iput(struct dentry * dentry)
  */
 static void dentry_unlink_inode(struct dentry * dentry)
 	__releases(dentry->d_lock)
-	__releases(dcache_inode_lock)
+	__releases(dentry->d_inode->i_lock)
 {
 	struct inode *inode = dentry->d_inode;
 	dentry->d_inode = NULL;
 	list_del_init(&dentry->d_alias);
 	dentry_rcuwalk_barrier(dentry);
 	spin_unlock(&dentry->d_lock);
-	spin_unlock(&dcache_inode_lock);
+	spin_unlock(&inode->i_lock);
 	if (!inode->i_nlink)
 		fsnotify_inoderemove(inode);
 	if (dentry->d_op && dentry->d_op->d_iput)
@@ -295,7 +292,7 @@ static void dentry_lru_move_tail(struct dentry *dentry)
 static struct dentry *d_kill(struct dentry *dentry, struct dentry *parent)
 	__releases(dentry->d_lock)
 	__releases(parent->d_lock)
-	__releases(dcache_inode_lock)
+	__releases(dentry->d_inode->i_lock)
 {
 	dentry->d_parent = NULL;
 	list_del(&dentry->d_u.d_child);
@@ -370,9 +367,11 @@ EXPORT_SYMBOL(d_drop);
 static inline struct dentry *dentry_kill(struct dentry *dentry, int ref)
 	__releases(dentry->d_lock)
 {
+	struct inode *inode;
 	struct dentry *parent;
 
-	if (!spin_trylock(&dcache_inode_lock)) {
+	inode = dentry->d_inode;
+	if (inode && !spin_trylock(&inode->i_lock)) {
 relock:
 		spin_unlock(&dentry->d_lock);
 		cpu_relax();
@@ -383,7 +382,8 @@ relock:
 	else
 		parent = dentry->d_parent;
 	if (parent && !spin_trylock(&parent->d_lock)) {
-		spin_unlock(&dcache_inode_lock);
+		if (inode)
+			spin_unlock(&inode->i_lock);
 		goto relock;
 	}
 
@@ -618,9 +618,9 @@ struct dentry *d_find_alias(struct inode *inode)
 	struct dentry *de = NULL;
 
 	if (!list_empty(&inode->i_dentry)) {
-		spin_lock(&dcache_inode_lock);
+		spin_lock(&inode->i_lock);
 		de = __d_find_alias(inode, 0);
-		spin_unlock(&dcache_inode_lock);
+		spin_unlock(&inode->i_lock);
 	}
 	return de;
 }
@@ -634,20 +634,20 @@ void d_prune_aliases(struct inode *inode)
 {
 	struct dentry *dentry;
 restart:
-	spin_lock(&dcache_inode_lock);
+	spin_lock(&inode->i_lock);
 	list_for_each_entry(dentry, &inode->i_dentry, d_alias) {
 		spin_lock(&dentry->d_lock);
 		if (!dentry->d_count) {
 			__dget_dlock(dentry);
 			__d_drop(dentry);
 			spin_unlock(&dentry->d_lock);
-			spin_unlock(&dcache_inode_lock);
+			spin_unlock(&inode->i_lock);
 			dput(dentry);
 			goto restart;
 		}
 		spin_unlock(&dentry->d_lock);
 	}
-	spin_unlock(&dcache_inode_lock);
+	spin_unlock(&inode->i_lock);
 }
 EXPORT_SYMBOL(d_prune_aliases);
 
@@ -1392,9 +1392,11 @@ static void __d_instantiate(struct dentry *dentry, struct inode *inode)
 void d_instantiate(struct dentry *entry, struct inode * inode)
 {
 	BUG_ON(!list_empty(&entry->d_alias));
-	spin_lock(&dcache_inode_lock);
+	if (inode)
+		spin_lock(&inode->i_lock);
 	__d_instantiate(entry, inode);
-	spin_unlock(&dcache_inode_lock);
+	if (inode)
+		spin_unlock(&inode->i_lock);
 	security_d_instantiate(entry, inode);
 }
 EXPORT_SYMBOL(d_instantiate);
@@ -1458,9 +1460,11 @@ struct dentry *d_instantiate_unique(struct dentry *entry, struct inode *inode)
 
 	BUG_ON(!list_empty(&entry->d_alias));
 
-	spin_lock(&dcache_inode_lock);
+	if (inode)
+		spin_lock(&inode->i_lock);
 	result = __d_instantiate_unique(entry, inode);
-	spin_unlock(&dcache_inode_lock);
+	if (inode)
+		spin_unlock(&inode->i_lock);
 
 	if (!result) {
 		security_d_instantiate(entry, inode);
@@ -1542,10 +1546,10 @@ struct dentry *d_obtain_alias(struct inode *inode)
 	tmp->d_parent = tmp; /* make sure dput doesn't croak */
 
 
-	spin_lock(&dcache_inode_lock);
+	spin_lock(&inode->i_lock);
 	res = __d_find_alias(inode, 0);
 	if (res) {
-		spin_unlock(&dcache_inode_lock);
+		spin_unlock(&inode->i_lock);
 		dput(tmp);
 		goto out_iput;
 	}
@@ -1561,7 +1565,7 @@ struct dentry *d_obtain_alias(struct inode *inode)
 	hlist_bl_add_head(&tmp->d_hash, &tmp->d_sb->s_anon);
 	__bit_spin_unlock(0, (unsigned long *)&tmp->d_sb->s_anon.first);
 	spin_unlock(&tmp->d_lock);
-	spin_unlock(&dcache_inode_lock);
+	spin_unlock(&inode->i_lock);
 
 	return tmp;
 
@@ -1592,18 +1596,18 @@ struct dentry *d_splice_alias(struct inode *inode, struct dentry *dentry)
 	struct dentry *new = NULL;
 
 	if (inode && S_ISDIR(inode->i_mode)) {
-		spin_lock(&dcache_inode_lock);
+		spin_lock(&inode->i_lock);
 		new = __d_find_alias(inode, 1);
 		if (new) {
 			BUG_ON(!(new->d_flags & DCACHE_DISCONNECTED));
-			spin_unlock(&dcache_inode_lock);
+			spin_unlock(&inode->i_lock);
 			security_d_instantiate(new, inode);
 			d_move(new, dentry);
 			iput(inode);
 		} else {
-			/* already got dcache_inode_lock, so d_add() by hand */
+			/* already taking inode->i_lock, so d_add() by hand */
 			__d_instantiate(dentry, inode);
-			spin_unlock(&dcache_inode_lock);
+			spin_unlock(&inode->i_lock);
 			security_d_instantiate(dentry, inode);
 			d_rehash(dentry);
 		}
@@ -1676,10 +1680,10 @@ struct dentry *d_add_ci(struct dentry *dentry, struct inode *inode,
 	 * Negative dentry: instantiate it unless the inode is a directory and
 	 * already has a dentry.
 	 */
-	spin_lock(&dcache_inode_lock);
+	spin_lock(&inode->i_lock);
 	if (!S_ISDIR(inode->i_mode) || list_empty(&inode->i_dentry)) {
 		__d_instantiate(found, inode);
-		spin_unlock(&dcache_inode_lock);
+		spin_unlock(&inode->i_lock);
 		security_d_instantiate(found, inode);
 		return found;
 	}
@@ -1690,7 +1694,7 @@ struct dentry *d_add_ci(struct dentry *dentry, struct inode *inode,
 	 */
 	new = list_entry(inode->i_dentry.next, struct dentry, d_alias);
 	__dget(new);
-	spin_unlock(&dcache_inode_lock);
+	spin_unlock(&inode->i_lock);
 	security_d_instantiate(found, inode);
 	d_move(new, found);
 	iput(inode);
@@ -2004,15 +2008,17 @@ EXPORT_SYMBOL(d_validate);
  
 void d_delete(struct dentry * dentry)
 {
+	struct inode *inode;
 	int isdir = 0;
 	/*
 	 * Are we the only user?
 	 */
 again:
 	spin_lock(&dentry->d_lock);
-	isdir = S_ISDIR(dentry->d_inode->i_mode);
+	inode = dentry->d_inode;
+	isdir = S_ISDIR(inode->i_mode);
 	if (dentry->d_count == 1) {
-		if (!spin_trylock(&dcache_inode_lock)) {
+		if (inode && !spin_trylock(&inode->i_lock)) {
 			spin_unlock(&dentry->d_lock);
 			cpu_relax();
 			goto again;
@@ -2266,13 +2272,13 @@ struct dentry *d_ancestor(struct dentry *p1, struct dentry *p2)
  * This helper attempts to cope with remotely renamed directories
  *
  * It assumes that the caller is already holding
- * dentry->d_parent->d_inode->i_mutex and the dcache_inode_lock
+ * dentry->d_parent->d_inode->i_mutex and the inode->i_lock
  *
  * Note: If ever the locking in lock_rename() changes, then please
  * remember to update this too...
  */
-static struct dentry *__d_unalias(struct dentry *dentry, struct dentry *alias)
-	__releases(dcache_inode_lock)
+static struct dentry *__d_unalias(struct inode *inode,
+		struct dentry *dentry, struct dentry *alias)
 {
 	struct mutex *m1 = NULL, *m2 = NULL;
 	struct dentry *ret;
@@ -2298,7 +2304,7 @@ out_unalias:
 	d_move(alias, dentry);
 	ret = alias;
 out_err:
-	spin_unlock(&dcache_inode_lock);
+	spin_unlock(&inode->i_lock);
 	if (m2)
 		mutex_unlock(m2);
 	if (m1)
@@ -2371,7 +2377,7 @@ struct dentry *d_materialise_unique(struct dentry *dentry, struct inode *inode)
 		goto out_nolock;
 	}
 
-	spin_lock(&dcache_inode_lock);
+	spin_lock(&inode->i_lock);
 
 	if (S_ISDIR(inode->i_mode)) {
 		struct dentry *alias;
@@ -2388,7 +2394,7 @@ struct dentry *d_materialise_unique(struct dentry *dentry, struct inode *inode)
 				goto found;
 			}
 			/* Nope, but we must(!) avoid directory aliasing */
-			actual = __d_unalias(dentry, alias);
+			actual = __d_unalias(inode, dentry, alias);
 			if (IS_ERR(actual))
 				dput(alias);
 			goto out_nolock;
@@ -2406,7 +2412,7 @@ struct dentry *d_materialise_unique(struct dentry *dentry, struct inode *inode)
 found:
 	_d_rehash(actual);
 	spin_unlock(&actual->d_lock);
-	spin_unlock(&dcache_inode_lock);
+	spin_unlock(&inode->i_lock);
 out_nolock:
 	if (actual == dentry) {
 		security_d_instantiate(dentry, inode);
diff --git a/fs/exportfs/expfs.c b/fs/exportfs/expfs.c
index f06a940065f6..4b6825740dd5 100644
--- a/fs/exportfs/expfs.c
+++ b/fs/exportfs/expfs.c
@@ -43,24 +43,26 @@ find_acceptable_alias(struct dentry *result,
 		void *context)
 {
 	struct dentry *dentry, *toput = NULL;
+	struct inode *inode;
 
 	if (acceptable(context, result))
 		return result;
 
-	spin_lock(&dcache_inode_lock);
-	list_for_each_entry(dentry, &result->d_inode->i_dentry, d_alias) {
+	inode = result->d_inode;
+	spin_lock(&inode->i_lock);
+	list_for_each_entry(dentry, &inode->i_dentry, d_alias) {
 		dget(dentry);
-		spin_unlock(&dcache_inode_lock);
+		spin_unlock(&inode->i_lock);
 		if (toput)
 			dput(toput);
 		if (dentry != result && acceptable(context, dentry)) {
 			dput(result);
 			return dentry;
 		}
-		spin_lock(&dcache_inode_lock);
+		spin_lock(&inode->i_lock);
 		toput = dentry;
 	}
-	spin_unlock(&dcache_inode_lock);
+	spin_unlock(&inode->i_lock);
 
 	if (toput)
 		dput(toput);
diff --git a/fs/nfs/getroot.c b/fs/nfs/getroot.c
index c3a5a1126833..5596c6a2881e 100644
--- a/fs/nfs/getroot.c
+++ b/fs/nfs/getroot.c
@@ -63,11 +63,11 @@ static int nfs_superblock_set_dummy_root(struct super_block *sb, struct inode *i
 		 * This again causes shrink_dcache_for_umount_subtree() to
 		 * Oops, since the test for IS_ROOT() will fail.
 		 */
-		spin_lock(&dcache_inode_lock);
+		spin_lock(&sb->s_root->d_inode->i_lock);
 		spin_lock(&sb->s_root->d_lock);
 		list_del_init(&sb->s_root->d_alias);
 		spin_unlock(&sb->s_root->d_lock);
-		spin_unlock(&dcache_inode_lock);
+		spin_unlock(&sb->s_root->d_inode->i_lock);
 	}
 	return 0;
 }
diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c
index 9be6ec1f36d8..79b47cbb5cd8 100644
--- a/fs/notify/fsnotify.c
+++ b/fs/notify/fsnotify.c
@@ -59,7 +59,7 @@ void __fsnotify_update_child_dentry_flags(struct inode *inode)
 	/* determine if the children should tell inode about their events */
 	watched = fsnotify_inode_watches_children(inode);
 
-	spin_lock(&dcache_inode_lock);
+	spin_lock(&inode->i_lock);
 	/* run all of the dentries associated with this inode.  Since this is a
 	 * directory, there damn well better only be one item on this list */
 	list_for_each_entry(alias, &inode->i_dentry, d_alias) {
@@ -82,7 +82,7 @@ void __fsnotify_update_child_dentry_flags(struct inode *inode)
 		}
 		spin_unlock(&alias->d_lock);
 	}
-	spin_unlock(&dcache_inode_lock);
+	spin_unlock(&inode->i_lock);
 }
 
 /* Notify this dentry's parent about a child's events. */
diff --git a/fs/ocfs2/dcache.c b/fs/ocfs2/dcache.c
index 0310b16a7238..6d80ecc7834f 100644
--- a/fs/ocfs2/dcache.c
+++ b/fs/ocfs2/dcache.c
@@ -175,7 +175,7 @@ struct dentry *ocfs2_find_local_alias(struct inode *inode,
 	struct list_head *p;
 	struct dentry *dentry = NULL;
 
-	spin_lock(&dcache_inode_lock);
+	spin_lock(&inode->i_lock);
 	list_for_each(p, &inode->i_dentry) {
 		dentry = list_entry(p, struct dentry, d_alias);
 
@@ -193,7 +193,7 @@ struct dentry *ocfs2_find_local_alias(struct inode *inode,
 		dentry = NULL;
 	}
 
-	spin_unlock(&dcache_inode_lock);
+	spin_unlock(&inode->i_lock);
 
 	return dentry;
 }
diff --git a/include/linux/dcache.h b/include/linux/dcache.h
index 5f0392ef759b..d719e4de8046 100644
--- a/include/linux/dcache.h
+++ b/include/linux/dcache.h
@@ -191,7 +191,6 @@ struct dentry_operations {
 #define DCACHE_OP_REVALIDATE	0x4000
 #define DCACHE_OP_DELETE	0x8000
 
-extern spinlock_t dcache_inode_lock;
 extern seqlock_t rename_lock;
 
 static inline int dname_external(struct dentry *dentry)
-- 
cgit v1.2.3-71-gd317


From 4b936885ab04dc6e0bb0ef35e0e23c1a7364d9e5 Mon Sep 17 00:00:00 2001
From: Nick Piggin <npiggin@kernel.dk>
Date: Fri, 7 Jan 2011 17:50:07 +1100
Subject: fs: improve scalability of pseudo filesystems

Regardless of how much we possibly try to scale dcache, there is likely
always going to be some fundamental contention when adding or removing children
under the same parent. Pseudo filesystems do not seem need to have connected
dentries because by definition they are disconnected.

Signed-off-by: Nick Piggin <npiggin@kernel.dk>
---
 fs/anon_inodes.c       |  2 +-
 fs/dcache.c            | 12 ++++++++++++
 fs/pipe.c              |  2 +-
 include/linux/dcache.h |  1 +
 net/socket.c           |  2 +-
 5 files changed, 16 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/fs/anon_inodes.c b/fs/anon_inodes.c
index aca8806fa206..9d92b33da8a0 100644
--- a/fs/anon_inodes.c
+++ b/fs/anon_inodes.c
@@ -102,7 +102,7 @@ struct file *anon_inode_getfile(const char *name,
 	this.name = name;
 	this.len = strlen(name);
 	this.hash = 0;
-	path.dentry = d_alloc(anon_inode_mnt->mnt_sb->s_root, &this);
+	path.dentry = d_alloc_pseudo(anon_inode_mnt->mnt_sb, &this);
 	if (!path.dentry)
 		goto err_module;
 
diff --git a/fs/dcache.c b/fs/dcache.c
index 09ec945f3c98..9e6e6db76869 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -1330,6 +1330,18 @@ struct dentry *d_alloc(struct dentry * parent, const struct qstr *name)
 }
 EXPORT_SYMBOL(d_alloc);
 
+struct dentry *d_alloc_pseudo(struct super_block *sb, const struct qstr *name)
+{
+	struct dentry *dentry = d_alloc(NULL, name);
+	if (dentry) {
+		dentry->d_sb = sb;
+		dentry->d_parent = dentry;
+		dentry->d_flags |= DCACHE_DISCONNECTED;
+	}
+	return dentry;
+}
+EXPORT_SYMBOL(d_alloc_pseudo);
+
 struct dentry *d_alloc_name(struct dentry *parent, const char *name)
 {
 	struct qstr q;
diff --git a/fs/pipe.c b/fs/pipe.c
index 01a786567810..cfe3a7f2ee21 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -999,7 +999,7 @@ struct file *create_write_pipe(int flags)
 		goto err;
 
 	err = -ENOMEM;
-	path.dentry = d_alloc(pipe_mnt->mnt_sb->s_root, &name);
+	path.dentry = d_alloc_pseudo(pipe_mnt->mnt_sb, &name);
 	if (!path.dentry)
 		goto err_inode;
 	path.mnt = mntget(pipe_mnt);
diff --git a/include/linux/dcache.h b/include/linux/dcache.h
index d719e4de8046..c0a2ca97c72f 100644
--- a/include/linux/dcache.h
+++ b/include/linux/dcache.h
@@ -211,6 +211,7 @@ extern void d_set_d_op(struct dentry *dentry, const struct dentry_operations *op
 
 /* allocate/de-allocate */
 extern struct dentry * d_alloc(struct dentry *, const struct qstr *);
+extern struct dentry * d_alloc_pseudo(struct super_block *, const struct qstr *);
 extern struct dentry * d_splice_alias(struct inode *, struct dentry *);
 extern struct dentry * d_add_ci(struct dentry *, struct inode *, struct qstr *);
 extern struct dentry * d_obtain_alias(struct inode *);
diff --git a/net/socket.c b/net/socket.c
index 991e266bc7ae..0ee74c325320 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -361,7 +361,7 @@ static int sock_alloc_file(struct socket *sock, struct file **f, int flags)
 	if (unlikely(fd < 0))
 		return fd;
 
-	path.dentry = d_alloc(sock_mnt->mnt_sb->s_root, &name);
+	path.dentry = d_alloc_pseudo(sock_mnt->mnt_sb, &name);
 	if (unlikely(!path.dentry)) {
 		put_unused_fd(fd);
 		return -ENOMEM;
-- 
cgit v1.2.3-71-gd317


From 9d55c369bb5e695e629bc35cba2ef607755b3bee Mon Sep 17 00:00:00 2001
From: Nick Piggin <npiggin@kernel.dk>
Date: Fri, 7 Jan 2011 17:50:09 +1100
Subject: fs: implement faster dentry memcmp

The standard memcmp function on a Westmere system shows up hot in
profiles in the `git diff` workload (both parallel and single threaded),
and it is likely due to the costs associated with trapping into
microcode, and little opportunity to improve memory access (dentry
name is not likely to take up more than a cacheline).

So replace it with an open-coded byte comparison. This increases code
size by 8 bytes in the critical __d_lookup_rcu function, but the
speedup is huge, averaging 10 runs of each:

git diff st   user   sys   elapsed  CPU
before        1.15   2.57  3.82      97.1
after         1.14   2.35  3.61      96.8

git diff mt   user   sys   elapsed  CPU
before        1.27   3.85  1.46     349
after         1.26   3.54  1.43     333

Elapsed time for single threaded git diff at 95.0% confidence:
        -0.21  +/- 0.01
        -5.45% +/- 0.24%

It's -0.66% +/- 0.06% elapsed time on my Opteron, so rep cmp costs on the
fam10h seem to be relatively smaller, but there is still a win.

Signed-off-by: Nick Piggin <npiggin@kernel.dk>
---
 fs/dcache.c            | 12 +++---------
 include/linux/dcache.h | 21 +++++++++++++++++++++
 2 files changed, 24 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/fs/dcache.c b/fs/dcache.c
index 2a4ce7dc2306..5699d4c027cb 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -1454,9 +1454,7 @@ static struct dentry *__d_instantiate_unique(struct dentry *entry,
 			continue;
 		if (alias->d_parent != entry->d_parent)
 			continue;
-		if (qstr->len != len)
-			continue;
-		if (memcmp(qstr->name, name, len))
+		if (dentry_cmp(qstr->name, qstr->len, name, len))
 			continue;
 		__dget(alias);
 		return alias;
@@ -1810,9 +1808,7 @@ seqretry:
 						tlen, tname, name))
 				continue;
 		} else {
-			if (tlen != len)
-				continue;
-			if (memcmp(tname, str, tlen))
+			if (dentry_cmp(tname, tlen, str, len))
 				continue;
 		}
 		/*
@@ -1925,9 +1921,7 @@ struct dentry *__d_lookup(struct dentry *parent, struct qstr *name)
 						tlen, tname, name))
 				goto next;
 		} else {
-			if (tlen != len)
-				goto next;
-			if (memcmp(tname, str, tlen))
+			if (dentry_cmp(tname, tlen, str, len))
 				goto next;
 		}
 
diff --git a/include/linux/dcache.h b/include/linux/dcache.h
index c0a2ca97c72f..bd07758943e0 100644
--- a/include/linux/dcache.h
+++ b/include/linux/dcache.h
@@ -47,6 +47,27 @@ struct dentry_stat_t {
 };
 extern struct dentry_stat_t dentry_stat;
 
+/*
+ * Compare 2 name strings, return 0 if they match, otherwise non-zero.
+ * The strings are both count bytes long, and count is non-zero.
+ */
+static inline int dentry_cmp(const unsigned char *cs, size_t scount,
+				const unsigned char *ct, size_t tcount)
+{
+	int ret;
+	if (scount != tcount)
+		return 1;
+	do {
+		ret = (*cs != *ct);
+		if (ret)
+			break;
+		cs++;
+		ct++;
+		tcount--;
+	} while (tcount);
+	return ret;
+}
+
 /* Name hashing routines. Initial hash value */
 /* Hash courtesy of the R5 hash in reiserfs modulo sign bits */
 #define init_name_hash()		0
-- 
cgit v1.2.3-71-gd317


From b3e19d924b6eaf2ca7d22cba99a517c5171007b6 Mon Sep 17 00:00:00 2001
From: Nick Piggin <npiggin@kernel.dk>
Date: Fri, 7 Jan 2011 17:50:11 +1100
Subject: fs: scale mntget/mntput

The problem that this patch aims to fix is vfsmount refcounting scalability.
We need to take a reference on the vfsmount for every successful path lookup,
which often go to the same mount point.

The fundamental difficulty is that a "simple" reference count can never be made
scalable, because any time a reference is dropped, we must check whether that
was the last reference. To do that requires communication with all other CPUs
that may have taken a reference count.

We can make refcounts more scalable in a couple of ways, involving keeping
distributed counters, and checking for the global-zero condition less
frequently.

- check the global sum once every interval (this will delay zero detection
  for some interval, so it's probably a showstopper for vfsmounts).

- keep a local count and only taking the global sum when local reaches 0 (this
  is difficult for vfsmounts, because we can't hold preempt off for the life of
  a reference, so a counter would need to be per-thread or tied strongly to a
  particular CPU which requires more locking).

- keep a local difference of increments and decrements, which allows us to sum
  the total difference and hence find the refcount when summing all CPUs. Then,
  keep a single integer "long" refcount for slow and long lasting references,
  and only take the global sum of local counters when the long refcount is 0.

This last scheme is what I implemented here. Attached mounts and process root
and working directory references are "long" references, and everything else is
a short reference.

This allows scalable vfsmount references during path walking over mounted
subtrees and unattached (lazy umounted) mounts with processes still running
in them.

This results in one fewer atomic op in the fastpath: mntget is now just a
per-CPU inc, rather than an atomic inc; and mntput just requires a spinlock
and non-atomic decrement in the common case. However code is otherwise bigger
and heavier, so single threaded performance is basically a wash.

Signed-off-by: Nick Piggin <npiggin@kernel.dk>
---
 arch/ia64/kernel/perfmon.c |   2 +-
 drivers/mtd/mtdchar.c      |   2 +-
 fs/anon_inodes.c           |   2 +-
 fs/fs_struct.c             |  26 +++--
 fs/internal.h              |   1 +
 fs/namei.c                 |  24 +++++
 fs/namespace.c             | 242 +++++++++++++++++++++++++++++++++++++--------
 fs/pipe.c                  |   2 +-
 fs/pnode.c                 |   4 +-
 fs/super.c                 |   2 +-
 include/linux/mount.h      |  53 ++++------
 include/linux/path.h       |   2 +
 net/socket.c               |  19 +++-
 13 files changed, 283 insertions(+), 98 deletions(-)

(limited to 'include/linux')

diff --git a/arch/ia64/kernel/perfmon.c b/arch/ia64/kernel/perfmon.c
index 5a24f40bb48e..f099b82703d8 100644
--- a/arch/ia64/kernel/perfmon.c
+++ b/arch/ia64/kernel/perfmon.c
@@ -1542,7 +1542,7 @@ pfm_exit_smpl_buffer(pfm_buffer_fmt_t *fmt)
  * any operations on the root directory. However, we need a non-trivial
  * d_name - pfm: will go nicely and kill the special-casing in procfs.
  */
-static struct vfsmount *pfmfs_mnt;
+static struct vfsmount *pfmfs_mnt __read_mostly;
 
 static int __init
 init_pfm_fs(void)
diff --git a/drivers/mtd/mtdchar.c b/drivers/mtd/mtdchar.c
index 4759d827e8c7..f511dd15fd31 100644
--- a/drivers/mtd/mtdchar.c
+++ b/drivers/mtd/mtdchar.c
@@ -1201,7 +1201,7 @@ err_unregister_chdev:
 static void __exit cleanup_mtdchar(void)
 {
 	unregister_mtd_user(&mtdchar_notifier);
-	mntput(mtd_inode_mnt);
+	mntput_long(mtd_inode_mnt);
 	unregister_filesystem(&mtd_inodefs_type);
 	__unregister_chrdev(MTD_CHAR_MAJOR, 0, 1 << MINORBITS, "mtd");
 }
diff --git a/fs/anon_inodes.c b/fs/anon_inodes.c
index 9d92b33da8a0..5fd38112a6ca 100644
--- a/fs/anon_inodes.c
+++ b/fs/anon_inodes.c
@@ -232,7 +232,7 @@ static int __init anon_inode_init(void)
 	return 0;
 
 err_mntput:
-	mntput(anon_inode_mnt);
+	mntput_long(anon_inode_mnt);
 err_unregister_filesystem:
 	unregister_filesystem(&anon_inode_fs_type);
 err_exit:
diff --git a/fs/fs_struct.c b/fs/fs_struct.c
index 60b8531f41c5..68ca487bedb1 100644
--- a/fs/fs_struct.c
+++ b/fs/fs_struct.c
@@ -17,11 +17,11 @@ void set_fs_root(struct fs_struct *fs, struct path *path)
 	write_seqcount_begin(&fs->seq);
 	old_root = fs->root;
 	fs->root = *path;
-	path_get(path);
+	path_get_long(path);
 	write_seqcount_end(&fs->seq);
 	spin_unlock(&fs->lock);
 	if (old_root.dentry)
-		path_put(&old_root);
+		path_put_long(&old_root);
 }
 
 /*
@@ -36,12 +36,12 @@ void set_fs_pwd(struct fs_struct *fs, struct path *path)
 	write_seqcount_begin(&fs->seq);
 	old_pwd = fs->pwd;
 	fs->pwd = *path;
-	path_get(path);
+	path_get_long(path);
 	write_seqcount_end(&fs->seq);
 	spin_unlock(&fs->lock);
 
 	if (old_pwd.dentry)
-		path_put(&old_pwd);
+		path_put_long(&old_pwd);
 }
 
 void chroot_fs_refs(struct path *old_root, struct path *new_root)
@@ -59,13 +59,13 @@ void chroot_fs_refs(struct path *old_root, struct path *new_root)
 			write_seqcount_begin(&fs->seq);
 			if (fs->root.dentry == old_root->dentry
 			    && fs->root.mnt == old_root->mnt) {
-				path_get(new_root);
+				path_get_long(new_root);
 				fs->root = *new_root;
 				count++;
 			}
 			if (fs->pwd.dentry == old_root->dentry
 			    && fs->pwd.mnt == old_root->mnt) {
-				path_get(new_root);
+				path_get_long(new_root);
 				fs->pwd = *new_root;
 				count++;
 			}
@@ -76,13 +76,13 @@ void chroot_fs_refs(struct path *old_root, struct path *new_root)
 	} while_each_thread(g, p);
 	read_unlock(&tasklist_lock);
 	while (count--)
-		path_put(old_root);
+		path_put_long(old_root);
 }
 
 void free_fs_struct(struct fs_struct *fs)
 {
-	path_put(&fs->root);
-	path_put(&fs->pwd);
+	path_put_long(&fs->root);
+	path_put_long(&fs->pwd);
 	kmem_cache_free(fs_cachep, fs);
 }
 
@@ -115,7 +115,13 @@ struct fs_struct *copy_fs_struct(struct fs_struct *old)
 		spin_lock_init(&fs->lock);
 		seqcount_init(&fs->seq);
 		fs->umask = old->umask;
-		get_fs_root_and_pwd(old, &fs->root, &fs->pwd);
+
+		spin_lock(&old->lock);
+		fs->root = old->root;
+		path_get_long(&fs->root);
+		fs->pwd = old->pwd;
+		path_get_long(&fs->pwd);
+		spin_unlock(&old->lock);
 	}
 	return fs;
 }
diff --git a/fs/internal.h b/fs/internal.h
index e43b9a4dbf4e..9687c2ee2735 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -63,6 +63,7 @@ extern int copy_mount_string(const void __user *, char **);
 
 extern void free_vfsmnt(struct vfsmount *);
 extern struct vfsmount *alloc_vfsmnt(const char *);
+extern unsigned int mnt_get_count(struct vfsmount *mnt);
 extern struct vfsmount *__lookup_mnt(struct vfsmount *, struct dentry *, int);
 extern void mnt_set_mountpoint(struct vfsmount *, struct dentry *,
 				struct vfsmount *);
diff --git a/fs/namei.c b/fs/namei.c
index 4e957bf744ae..19433cdba011 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -367,6 +367,18 @@ void path_get(struct path *path)
 }
 EXPORT_SYMBOL(path_get);
 
+/**
+ * path_get_long - get a long reference to a path
+ * @path: path to get the reference to
+ *
+ * Given a path increment the reference count to the dentry and the vfsmount.
+ */
+void path_get_long(struct path *path)
+{
+	mntget_long(path->mnt);
+	dget(path->dentry);
+}
+
 /**
  * path_put - put a reference to a path
  * @path: path to put the reference to
@@ -380,6 +392,18 @@ void path_put(struct path *path)
 }
 EXPORT_SYMBOL(path_put);
 
+/**
+ * path_put_long - put a long reference to a path
+ * @path: path to put the reference to
+ *
+ * Given a path decrement the reference count to the dentry and the vfsmount.
+ */
+void path_put_long(struct path *path)
+{
+	dput(path->dentry);
+	mntput_long(path->mnt);
+}
+
 /**
  * nameidata_drop_rcu - drop this nameidata out of rcu-walk
  * @nd: nameidata pathwalk data to drop
diff --git a/fs/namespace.c b/fs/namespace.c
index 03b82350f020..3ddfd9046c44 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -138,6 +138,64 @@ void mnt_release_group_id(struct vfsmount *mnt)
 	mnt->mnt_group_id = 0;
 }
 
+/*
+ * vfsmount lock must be held for read
+ */
+static inline void mnt_add_count(struct vfsmount *mnt, int n)
+{
+#ifdef CONFIG_SMP
+	this_cpu_add(mnt->mnt_pcp->mnt_count, n);
+#else
+	preempt_disable();
+	mnt->mnt_count += n;
+	preempt_enable();
+#endif
+}
+
+static inline void mnt_set_count(struct vfsmount *mnt, int n)
+{
+#ifdef CONFIG_SMP
+	this_cpu_write(mnt->mnt_pcp->mnt_count, n);
+#else
+	mnt->mnt_count = n;
+#endif
+}
+
+/*
+ * vfsmount lock must be held for read
+ */
+static inline void mnt_inc_count(struct vfsmount *mnt)
+{
+	mnt_add_count(mnt, 1);
+}
+
+/*
+ * vfsmount lock must be held for read
+ */
+static inline void mnt_dec_count(struct vfsmount *mnt)
+{
+	mnt_add_count(mnt, -1);
+}
+
+/*
+ * vfsmount lock must be held for write
+ */
+unsigned int mnt_get_count(struct vfsmount *mnt)
+{
+#ifdef CONFIG_SMP
+	unsigned int count = atomic_read(&mnt->mnt_longrefs);
+	int cpu;
+
+	for_each_possible_cpu(cpu) {
+		count += per_cpu_ptr(mnt->mnt_pcp, cpu)->mnt_count;
+	}
+
+	return count;
+#else
+	return mnt->mnt_count;
+#endif
+}
+
 struct vfsmount *alloc_vfsmnt(const char *name)
 {
 	struct vfsmount *mnt = kmem_cache_zalloc(mnt_cache, GFP_KERNEL);
@@ -154,7 +212,17 @@ struct vfsmount *alloc_vfsmnt(const char *name)
 				goto out_free_id;
 		}
 
-		atomic_set(&mnt->mnt_count, 1);
+#ifdef CONFIG_SMP
+		mnt->mnt_pcp = alloc_percpu(struct mnt_pcp);
+		if (!mnt->mnt_pcp)
+			goto out_free_devname;
+
+		atomic_set(&mnt->mnt_longrefs, 1);
+#else
+		mnt->mnt_count = 1;
+		mnt->mnt_writers = 0;
+#endif
+
 		INIT_LIST_HEAD(&mnt->mnt_hash);
 		INIT_LIST_HEAD(&mnt->mnt_child);
 		INIT_LIST_HEAD(&mnt->mnt_mounts);
@@ -165,13 +233,6 @@ struct vfsmount *alloc_vfsmnt(const char *name)
 		INIT_LIST_HEAD(&mnt->mnt_slave);
 #ifdef CONFIG_FSNOTIFY
 		INIT_HLIST_HEAD(&mnt->mnt_fsnotify_marks);
-#endif
-#ifdef CONFIG_SMP
-		mnt->mnt_writers = alloc_percpu(int);
-		if (!mnt->mnt_writers)
-			goto out_free_devname;
-#else
-		mnt->mnt_writers = 0;
 #endif
 	}
 	return mnt;
@@ -219,7 +280,7 @@ EXPORT_SYMBOL_GPL(__mnt_is_readonly);
 static inline void mnt_inc_writers(struct vfsmount *mnt)
 {
 #ifdef CONFIG_SMP
-	(*per_cpu_ptr(mnt->mnt_writers, smp_processor_id()))++;
+	this_cpu_inc(mnt->mnt_pcp->mnt_writers);
 #else
 	mnt->mnt_writers++;
 #endif
@@ -228,7 +289,7 @@ static inline void mnt_inc_writers(struct vfsmount *mnt)
 static inline void mnt_dec_writers(struct vfsmount *mnt)
 {
 #ifdef CONFIG_SMP
-	(*per_cpu_ptr(mnt->mnt_writers, smp_processor_id()))--;
+	this_cpu_dec(mnt->mnt_pcp->mnt_writers);
 #else
 	mnt->mnt_writers--;
 #endif
@@ -241,7 +302,7 @@ static unsigned int mnt_get_writers(struct vfsmount *mnt)
 	int cpu;
 
 	for_each_possible_cpu(cpu) {
-		count += *per_cpu_ptr(mnt->mnt_writers, cpu);
+		count += per_cpu_ptr(mnt->mnt_pcp, cpu)->mnt_writers;
 	}
 
 	return count;
@@ -418,7 +479,7 @@ void free_vfsmnt(struct vfsmount *mnt)
 	kfree(mnt->mnt_devname);
 	mnt_free_id(mnt);
 #ifdef CONFIG_SMP
-	free_percpu(mnt->mnt_writers);
+	free_percpu(mnt->mnt_pcp);
 #endif
 	kmem_cache_free(mnt_cache, mnt);
 }
@@ -652,9 +713,10 @@ static struct vfsmount *clone_mnt(struct vfsmount *old, struct dentry *root,
 	return NULL;
 }
 
-static inline void __mntput(struct vfsmount *mnt)
+static inline void mntfree(struct vfsmount *mnt)
 {
 	struct super_block *sb = mnt->mnt_sb;
+
 	/*
 	 * This probably indicates that somebody messed
 	 * up a mnt_want/drop_write() pair.  If this
@@ -662,8 +724,8 @@ static inline void __mntput(struct vfsmount *mnt)
 	 * to make r/w->r/o transitions.
 	 */
 	/*
-	 * atomic_dec_and_lock() used to deal with ->mnt_count decrements
-	 * provides barriers, so mnt_get_writers() below is safe.  AV
+	 * The locking used to deal with mnt_count decrement provides barriers,
+	 * so mnt_get_writers() below is safe.
 	 */
 	WARN_ON(mnt_get_writers(mnt));
 	fsnotify_vfsmount_delete(mnt);
@@ -672,28 +734,113 @@ static inline void __mntput(struct vfsmount *mnt)
 	deactivate_super(sb);
 }
 
-void mntput_no_expire(struct vfsmount *mnt)
-{
-repeat:
-	if (atomic_add_unless(&mnt->mnt_count, -1, 1))
-		return;
+#ifdef CONFIG_SMP
+static inline void __mntput(struct vfsmount *mnt, int longrefs)
+{
+	if (!longrefs) {
+put_again:
+		br_read_lock(vfsmount_lock);
+		if (likely(atomic_read(&mnt->mnt_longrefs))) {
+			mnt_dec_count(mnt);
+			br_read_unlock(vfsmount_lock);
+			return;
+		}
+		br_read_unlock(vfsmount_lock);
+	} else {
+		BUG_ON(!atomic_read(&mnt->mnt_longrefs));
+		if (atomic_add_unless(&mnt->mnt_longrefs, -1, 1))
+			return;
+	}
+
 	br_write_lock(vfsmount_lock);
-	if (!atomic_dec_and_test(&mnt->mnt_count)) {
+	if (!longrefs)
+		mnt_dec_count(mnt);
+	else
+		atomic_dec(&mnt->mnt_longrefs);
+	if (mnt_get_count(mnt)) {
 		br_write_unlock(vfsmount_lock);
 		return;
 	}
-	if (likely(!mnt->mnt_pinned)) {
+	if (unlikely(mnt->mnt_pinned)) {
+		mnt_add_count(mnt, mnt->mnt_pinned + 1);
+		mnt->mnt_pinned = 0;
 		br_write_unlock(vfsmount_lock);
-		__mntput(mnt);
+		acct_auto_close_mnt(mnt);
+		goto put_again;
+	}
+	br_write_unlock(vfsmount_lock);
+	mntfree(mnt);
+}
+#else
+static inline void __mntput(struct vfsmount *mnt, int longrefs)
+{
+put_again:
+	mnt_dec_count(mnt);
+	if (likely(mnt_get_count(mnt)))
 		return;
+	br_write_lock(vfsmount_lock);
+	if (unlikely(mnt->mnt_pinned)) {
+		mnt_add_count(mnt, mnt->mnt_pinned + 1);
+		mnt->mnt_pinned = 0;
+		br_write_unlock(vfsmount_lock);
+		acct_auto_close_mnt(mnt);
+		goto put_again;
 	}
-	atomic_add(mnt->mnt_pinned + 1, &mnt->mnt_count);
-	mnt->mnt_pinned = 0;
 	br_write_unlock(vfsmount_lock);
-	acct_auto_close_mnt(mnt);
-	goto repeat;
+	mntfree(mnt);
+}
+#endif
+
+static void mntput_no_expire(struct vfsmount *mnt)
+{
+	__mntput(mnt, 0);
+}
+
+void mntput(struct vfsmount *mnt)
+{
+	if (mnt) {
+		/* avoid cacheline pingpong, hope gcc doesn't get "smart" */
+		if (unlikely(mnt->mnt_expiry_mark))
+			mnt->mnt_expiry_mark = 0;
+		__mntput(mnt, 0);
+	}
+}
+EXPORT_SYMBOL(mntput);
+
+struct vfsmount *mntget(struct vfsmount *mnt)
+{
+	if (mnt)
+		mnt_inc_count(mnt);
+	return mnt;
+}
+EXPORT_SYMBOL(mntget);
+
+void mntput_long(struct vfsmount *mnt)
+{
+#ifdef CONFIG_SMP
+	if (mnt) {
+		/* avoid cacheline pingpong, hope gcc doesn't get "smart" */
+		if (unlikely(mnt->mnt_expiry_mark))
+			mnt->mnt_expiry_mark = 0;
+		__mntput(mnt, 1);
+	}
+#else
+	mntput(mnt);
+#endif
 }
-EXPORT_SYMBOL(mntput_no_expire);
+EXPORT_SYMBOL(mntput_long);
+
+struct vfsmount *mntget_long(struct vfsmount *mnt)
+{
+#ifdef CONFIG_SMP
+	if (mnt)
+		atomic_inc(&mnt->mnt_longrefs);
+	return mnt;
+#else
+	return mntget(mnt);
+#endif
+}
+EXPORT_SYMBOL(mntget_long);
 
 void mnt_pin(struct vfsmount *mnt)
 {
@@ -701,19 +848,17 @@ void mnt_pin(struct vfsmount *mnt)
 	mnt->mnt_pinned++;
 	br_write_unlock(vfsmount_lock);
 }
-
 EXPORT_SYMBOL(mnt_pin);
 
 void mnt_unpin(struct vfsmount *mnt)
 {
 	br_write_lock(vfsmount_lock);
 	if (mnt->mnt_pinned) {
-		atomic_inc(&mnt->mnt_count);
+		mnt_inc_count(mnt);
 		mnt->mnt_pinned--;
 	}
 	br_write_unlock(vfsmount_lock);
 }
-
 EXPORT_SYMBOL(mnt_unpin);
 
 static inline void mangle(struct seq_file *m, const char *s)
@@ -1008,12 +1153,13 @@ int may_umount_tree(struct vfsmount *mnt)
 	int minimum_refs = 0;
 	struct vfsmount *p;
 
-	br_read_lock(vfsmount_lock);
+	/* write lock needed for mnt_get_count */
+	br_write_lock(vfsmount_lock);
 	for (p = mnt; p; p = next_mnt(p, mnt)) {
-		actual_refs += atomic_read(&p->mnt_count);
+		actual_refs += mnt_get_count(p);
 		minimum_refs += 2;
 	}
-	br_read_unlock(vfsmount_lock);
+	br_write_unlock(vfsmount_lock);
 
 	if (actual_refs > minimum_refs)
 		return 0;
@@ -1040,10 +1186,10 @@ int may_umount(struct vfsmount *mnt)
 {
 	int ret = 1;
 	down_read(&namespace_sem);
-	br_read_lock(vfsmount_lock);
+	br_write_lock(vfsmount_lock);
 	if (propagate_mount_busy(mnt, 2))
 		ret = 0;
-	br_read_unlock(vfsmount_lock);
+	br_write_unlock(vfsmount_lock);
 	up_read(&namespace_sem);
 	return ret;
 }
@@ -1070,7 +1216,7 @@ void release_mounts(struct list_head *head)
 			dput(dentry);
 			mntput(m);
 		}
-		mntput(mnt);
+		mntput_long(mnt);
 	}
 }
 
@@ -1125,8 +1271,16 @@ static int do_umount(struct vfsmount *mnt, int flags)
 		    flags & (MNT_FORCE | MNT_DETACH))
 			return -EINVAL;
 
-		if (atomic_read(&mnt->mnt_count) != 2)
+		/*
+		 * probably don't strictly need the lock here if we examined
+		 * all race cases, but it's a slowpath.
+		 */
+		br_write_lock(vfsmount_lock);
+		if (mnt_get_count(mnt) != 2) {
+			br_write_lock(vfsmount_lock);
 			return -EBUSY;
+		}
+		br_write_unlock(vfsmount_lock);
 
 		if (!xchg(&mnt->mnt_expiry_mark, 1))
 			return -EAGAIN;
@@ -1815,7 +1969,7 @@ int do_add_mount(struct vfsmount *newmnt, struct path *path,
 
 unlock:
 	up_write(&namespace_sem);
-	mntput(newmnt);
+	mntput_long(newmnt);
 	return err;
 }
 
@@ -2148,11 +2302,11 @@ static struct mnt_namespace *dup_mnt_ns(struct mnt_namespace *mnt_ns,
 		if (fs) {
 			if (p == fs->root.mnt) {
 				rootmnt = p;
-				fs->root.mnt = mntget(q);
+				fs->root.mnt = mntget_long(q);
 			}
 			if (p == fs->pwd.mnt) {
 				pwdmnt = p;
-				fs->pwd.mnt = mntget(q);
+				fs->pwd.mnt = mntget_long(q);
 			}
 		}
 		p = next_mnt(p, mnt_ns->root);
@@ -2161,9 +2315,9 @@ static struct mnt_namespace *dup_mnt_ns(struct mnt_namespace *mnt_ns,
 	up_write(&namespace_sem);
 
 	if (rootmnt)
-		mntput(rootmnt);
+		mntput_long(rootmnt);
 	if (pwdmnt)
-		mntput(pwdmnt);
+		mntput_long(pwdmnt);
 
 	return new_ns;
 }
@@ -2350,6 +2504,7 @@ SYSCALL_DEFINE2(pivot_root, const char __user *, new_root,
 	touch_mnt_namespace(current->nsproxy->mnt_ns);
 	br_write_unlock(vfsmount_lock);
 	chroot_fs_refs(&root, &new);
+
 	error = 0;
 	path_put(&root_parent);
 	path_put(&parent_path);
@@ -2376,6 +2531,7 @@ static void __init init_mount_tree(void)
 	mnt = do_kern_mount("rootfs", 0, "rootfs", NULL);
 	if (IS_ERR(mnt))
 		panic("Can't create rootfs");
+
 	ns = create_mnt_ns(mnt);
 	if (IS_ERR(ns))
 		panic("Can't allocate initial namespace");
diff --git a/fs/pipe.c b/fs/pipe.c
index cfe3a7f2ee21..68f1f8e4e23b 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -1292,7 +1292,7 @@ static int __init init_pipe_fs(void)
 static void __exit exit_pipe_fs(void)
 {
 	unregister_filesystem(&pipe_fs_type);
-	mntput(pipe_mnt);
+	mntput_long(pipe_mnt);
 }
 
 fs_initcall(init_pipe_fs);
diff --git a/fs/pnode.c b/fs/pnode.c
index 8066b8dd748f..d42514e32380 100644
--- a/fs/pnode.c
+++ b/fs/pnode.c
@@ -288,7 +288,7 @@ out:
  */
 static inline int do_refcount_check(struct vfsmount *mnt, int count)
 {
-	int mycount = atomic_read(&mnt->mnt_count) - mnt->mnt_ghosts;
+	int mycount = mnt_get_count(mnt) - mnt->mnt_ghosts;
 	return (mycount > count);
 }
 
@@ -300,7 +300,7 @@ static inline int do_refcount_check(struct vfsmount *mnt, int count)
  * Check if any of these mounts that **do not have submounts**
  * have more references than 'refcnt'. If so return busy.
  *
- * vfsmount lock must be held for read or write
+ * vfsmount lock must be held for write
  */
 int propagate_mount_busy(struct vfsmount *mnt, int refcnt)
 {
diff --git a/fs/super.c b/fs/super.c
index 968ba013011a..823e061faa87 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -1140,7 +1140,7 @@ static struct vfsmount *fs_set_subtype(struct vfsmount *mnt, const char *fstype)
 	return mnt;
 
  err:
-	mntput(mnt);
+	mntput_long(mnt);
 	return ERR_PTR(err);
 }
 
diff --git a/include/linux/mount.h b/include/linux/mount.h
index 5e7a59408dd4..1869ea24a739 100644
--- a/include/linux/mount.h
+++ b/include/linux/mount.h
@@ -13,6 +13,7 @@
 #include <linux/list.h>
 #include <linux/nodemask.h>
 #include <linux/spinlock.h>
+#include <linux/seqlock.h>
 #include <asm/atomic.h>
 
 struct super_block;
@@ -46,12 +47,24 @@ struct mnt_namespace;
 
 #define MNT_INTERNAL	0x4000
 
+struct mnt_pcp {
+	int mnt_count;
+	int mnt_writers;
+};
+
 struct vfsmount {
 	struct list_head mnt_hash;
 	struct vfsmount *mnt_parent;	/* fs we are mounted on */
 	struct dentry *mnt_mountpoint;	/* dentry of mountpoint */
 	struct dentry *mnt_root;	/* root of the mounted tree */
 	struct super_block *mnt_sb;	/* pointer to superblock */
+#ifdef CONFIG_SMP
+	struct mnt_pcp __percpu *mnt_pcp;
+	atomic_t mnt_longrefs;
+#else
+	int mnt_count;
+	int mnt_writers;
+#endif
 	struct list_head mnt_mounts;	/* list of children, anchored here */
 	struct list_head mnt_child;	/* and going through their mnt_child */
 	int mnt_flags;
@@ -70,57 +83,25 @@ struct vfsmount {
 	struct mnt_namespace *mnt_ns;	/* containing namespace */
 	int mnt_id;			/* mount identifier */
 	int mnt_group_id;		/* peer group identifier */
-	/*
-	 * We put mnt_count & mnt_expiry_mark at the end of struct vfsmount
-	 * to let these frequently modified fields in a separate cache line
-	 * (so that reads of mnt_flags wont ping-pong on SMP machines)
-	 */
-	atomic_t mnt_count;
 	int mnt_expiry_mark;		/* true if marked for expiry */
 	int mnt_pinned;
 	int mnt_ghosts;
-#ifdef CONFIG_SMP
-	int __percpu *mnt_writers;
-#else
-	int mnt_writers;
-#endif
 };
 
-static inline int *get_mnt_writers_ptr(struct vfsmount *mnt)
-{
-#ifdef CONFIG_SMP
-	return mnt->mnt_writers;
-#else
-	return &mnt->mnt_writers;
-#endif
-}
-
-static inline struct vfsmount *mntget(struct vfsmount *mnt)
-{
-	if (mnt)
-		atomic_inc(&mnt->mnt_count);
-	return mnt;
-}
-
 struct file; /* forward dec */
 
 extern int mnt_want_write(struct vfsmount *mnt);
 extern int mnt_want_write_file(struct file *file);
 extern int mnt_clone_write(struct vfsmount *mnt);
 extern void mnt_drop_write(struct vfsmount *mnt);
-extern void mntput_no_expire(struct vfsmount *mnt);
+extern void mntput(struct vfsmount *mnt);
+extern struct vfsmount *mntget(struct vfsmount *mnt);
+extern void mntput_long(struct vfsmount *mnt);
+extern struct vfsmount *mntget_long(struct vfsmount *mnt);
 extern void mnt_pin(struct vfsmount *mnt);
 extern void mnt_unpin(struct vfsmount *mnt);
 extern int __mnt_is_readonly(struct vfsmount *mnt);
 
-static inline void mntput(struct vfsmount *mnt)
-{
-	if (mnt) {
-		mnt->mnt_expiry_mark = 0;
-		mntput_no_expire(mnt);
-	}
-}
-
 extern struct vfsmount *do_kern_mount(const char *fstype, int flags,
 				      const char *name, void *data);
 
diff --git a/include/linux/path.h b/include/linux/path.h
index edc98dec6266..a581e8c06533 100644
--- a/include/linux/path.h
+++ b/include/linux/path.h
@@ -10,7 +10,9 @@ struct path {
 };
 
 extern void path_get(struct path *);
+extern void path_get_long(struct path *);
 extern void path_put(struct path *);
+extern void path_put_long(struct path *);
 
 static inline int path_equal(const struct path *path1, const struct path *path2)
 {
diff --git a/net/socket.c b/net/socket.c
index 0ee74c325320..815bba3d2fe0 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -2390,6 +2390,8 @@ EXPORT_SYMBOL(sock_unregister);
 
 static int __init sock_init(void)
 {
+	int err;
+
 	/*
 	 *      Initialize sock SLAB cache.
 	 */
@@ -2406,8 +2408,15 @@ static int __init sock_init(void)
 	 */
 
 	init_inodecache();
-	register_filesystem(&sock_fs_type);
+
+	err = register_filesystem(&sock_fs_type);
+	if (err)
+		goto out_fs;
 	sock_mnt = kern_mount(&sock_fs_type);
+	if (IS_ERR(sock_mnt)) {
+		err = PTR_ERR(sock_mnt);
+		goto out_mount;
+	}
 
 	/* The real protocol initialization is performed in later initcalls.
 	 */
@@ -2420,7 +2429,13 @@ static int __init sock_init(void)
 	skb_timestamping_init();
 #endif
 
-	return 0;
+out:
+	return err;
+
+out_mount:
+	unregister_filesystem(&sock_fs_type);
+out_fs:
+	goto out;
 }
 
 core_initcall(sock_init);	/* early initcall */
-- 
cgit v1.2.3-71-gd317


From ca86828ccd3128513f6d4e200b437deac95408db Mon Sep 17 00:00:00 2001
From: Robert Richter <robert.richter@amd.com>
Date: Thu, 6 Jan 2011 17:51:07 +0100
Subject: x86, AMD, PCI: Add AMD northbridge PCI device id for CPU families 12h
 and 14h

This patch adds the PCI northbridge device id for AMD CPU
families 12h and 14h. Both families have implemented the same
PCI northbridge device.

There are some future use cases that use this PCI device and
we would like to clarify its naming.

Signed-off-by: Robert Richter <robert.richter@amd.com>
Cc: xen-devel@lists.xensource.com <xen-devel@lists.xensource.com>
Cc: Keir Fraser <keir@xen.org>
Cc: Jan Beulich <JBeulich@novell.com>
LKML-Reference: <20110106165107.GL4739@erda.amd.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/pci_ids.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index cb845c16ad7d..dd7d4e20d39b 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -518,6 +518,7 @@
 #define PCI_DEVICE_ID_AMD_11H_NB_MISC	0x1303
 #define PCI_DEVICE_ID_AMD_11H_NB_LINK	0x1304
 #define PCI_DEVICE_ID_AMD_15H_NB_MISC	0x1603
+#define PCI_DEVICE_ID_AMD_CNB17H_F3	0x1703
 #define PCI_DEVICE_ID_AMD_LANCE		0x2000
 #define PCI_DEVICE_ID_AMD_LANCE_HOME	0x2001
 #define PCI_DEVICE_ID_AMD_SCSI		0x2020
-- 
cgit v1.2.3-71-gd317


From 07e06b011db2b3300f6c975ebf293fc4c8c59942 Mon Sep 17 00:00:00 2001
From: Yong Zhang <yong.zhang0@gmail.com>
Date: Fri, 7 Jan 2011 15:17:36 +0800
Subject: sched: Consolidate the name of root_task_group and init_task_group

root_task_group is the leftover of USER_SCHED, now it's always
same to init_task_group.
But as Mike suggested, root_task_group is maybe the suitable name
to keep for a tree.
So in this patch:
  init_task_group      --> root_task_group
  init_task_group_load --> root_task_group_load
  INIT_TASK_GROUP_LOAD --> ROOT_TASK_GROUP_LOAD

Suggested-by: Mike Galbraith <efault@gmx.de>
Signed-off-by: Yong Zhang <yong.zhang0@gmail.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <20110107071736.GA32635@windriver.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/sched.h    |  2 +-
 kernel/sched.c           | 42 ++++++++++++++++++++----------------------
 kernel/sched_autogroup.c |  6 +++---
 3 files changed, 24 insertions(+), 26 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 777cd01e240e..341acbbc434a 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -2511,7 +2511,7 @@ extern void normalize_rt_tasks(void);
 
 #ifdef CONFIG_CGROUP_SCHED
 
-extern struct task_group init_task_group;
+extern struct task_group root_task_group;
 
 extern struct task_group *sched_create_group(struct task_group *parent);
 extern void sched_destroy_group(struct task_group *tg);
diff --git a/kernel/sched.c b/kernel/sched.c
index 04949089e760..54b58ec99fd4 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -278,14 +278,12 @@ struct task_group {
 #endif
 };
 
-#define root_task_group init_task_group
-
 /* task_group_lock serializes the addition/removal of task groups */
 static DEFINE_SPINLOCK(task_group_lock);
 
 #ifdef CONFIG_FAIR_GROUP_SCHED
 
-# define INIT_TASK_GROUP_LOAD	NICE_0_LOAD
+# define ROOT_TASK_GROUP_LOAD	NICE_0_LOAD
 
 /*
  * A weight of 0 or 1 can cause arithmetics problems.
@@ -298,13 +296,13 @@ static DEFINE_SPINLOCK(task_group_lock);
 #define MIN_SHARES	2
 #define MAX_SHARES	(1UL << 18)
 
-static int init_task_group_load = INIT_TASK_GROUP_LOAD;
+static int root_task_group_load = ROOT_TASK_GROUP_LOAD;
 #endif
 
 /* Default task group.
  *	Every task in system belong to this group at bootup.
  */
-struct task_group init_task_group;
+struct task_group root_task_group;
 
 #endif	/* CONFIG_CGROUP_SCHED */
 
@@ -7848,7 +7846,7 @@ static void init_tg_cfs_entry(struct task_group *tg, struct cfs_rq *cfs_rq,
 	cfs_rq->tg = tg;
 
 	tg->se[cpu] = se;
-	/* se could be NULL for init_task_group */
+	/* se could be NULL for root_task_group */
 	if (!se)
 		return;
 
@@ -7908,18 +7906,18 @@ void __init sched_init(void)
 		ptr = (unsigned long)kzalloc(alloc_size, GFP_NOWAIT);
 
 #ifdef CONFIG_FAIR_GROUP_SCHED
-		init_task_group.se = (struct sched_entity **)ptr;
+		root_task_group.se = (struct sched_entity **)ptr;
 		ptr += nr_cpu_ids * sizeof(void **);
 
-		init_task_group.cfs_rq = (struct cfs_rq **)ptr;
+		root_task_group.cfs_rq = (struct cfs_rq **)ptr;
 		ptr += nr_cpu_ids * sizeof(void **);
 
 #endif /* CONFIG_FAIR_GROUP_SCHED */
 #ifdef CONFIG_RT_GROUP_SCHED
-		init_task_group.rt_se = (struct sched_rt_entity **)ptr;
+		root_task_group.rt_se = (struct sched_rt_entity **)ptr;
 		ptr += nr_cpu_ids * sizeof(void **);
 
-		init_task_group.rt_rq = (struct rt_rq **)ptr;
+		root_task_group.rt_rq = (struct rt_rq **)ptr;
 		ptr += nr_cpu_ids * sizeof(void **);
 
 #endif /* CONFIG_RT_GROUP_SCHED */
@@ -7939,13 +7937,13 @@ void __init sched_init(void)
 			global_rt_period(), global_rt_runtime());
 
 #ifdef CONFIG_RT_GROUP_SCHED
-	init_rt_bandwidth(&init_task_group.rt_bandwidth,
+	init_rt_bandwidth(&root_task_group.rt_bandwidth,
 			global_rt_period(), global_rt_runtime());
 #endif /* CONFIG_RT_GROUP_SCHED */
 
 #ifdef CONFIG_CGROUP_SCHED
-	list_add(&init_task_group.list, &task_groups);
-	INIT_LIST_HEAD(&init_task_group.children);
+	list_add(&root_task_group.list, &task_groups);
+	INIT_LIST_HEAD(&root_task_group.children);
 	autogroup_init(&init_task);
 #endif /* CONFIG_CGROUP_SCHED */
 
@@ -7960,34 +7958,34 @@ void __init sched_init(void)
 		init_cfs_rq(&rq->cfs, rq);
 		init_rt_rq(&rq->rt, rq);
 #ifdef CONFIG_FAIR_GROUP_SCHED
-		init_task_group.shares = init_task_group_load;
+		root_task_group.shares = root_task_group_load;
 		INIT_LIST_HEAD(&rq->leaf_cfs_rq_list);
 		/*
-		 * How much cpu bandwidth does init_task_group get?
+		 * How much cpu bandwidth does root_task_group get?
 		 *
 		 * In case of task-groups formed thr' the cgroup filesystem, it
 		 * gets 100% of the cpu resources in the system. This overall
 		 * system cpu resource is divided among the tasks of
-		 * init_task_group and its child task-groups in a fair manner,
+		 * root_task_group and its child task-groups in a fair manner,
 		 * based on each entity's (task or task-group's) weight
 		 * (se->load.weight).
 		 *
-		 * In other words, if init_task_group has 10 tasks of weight
+		 * In other words, if root_task_group has 10 tasks of weight
 		 * 1024) and two child groups A0 and A1 (of weight 1024 each),
 		 * then A0's share of the cpu resource is:
 		 *
 		 *	A0's bandwidth = 1024 / (10*1024 + 1024 + 1024) = 8.33%
 		 *
-		 * We achieve this by letting init_task_group's tasks sit
-		 * directly in rq->cfs (i.e init_task_group->se[] = NULL).
+		 * We achieve this by letting root_task_group's tasks sit
+		 * directly in rq->cfs (i.e root_task_group->se[] = NULL).
 		 */
-		init_tg_cfs_entry(&init_task_group, &rq->cfs, NULL, i, NULL);
+		init_tg_cfs_entry(&root_task_group, &rq->cfs, NULL, i, NULL);
 #endif /* CONFIG_FAIR_GROUP_SCHED */
 
 		rq->rt.rt_runtime = def_rt_bandwidth.rt_runtime;
 #ifdef CONFIG_RT_GROUP_SCHED
 		INIT_LIST_HEAD(&rq->leaf_rt_rq_list);
-		init_tg_rt_entry(&init_task_group, &rq->rt, NULL, i, NULL);
+		init_tg_rt_entry(&root_task_group, &rq->rt, NULL, i, NULL);
 #endif
 
 		for (j = 0; j < CPU_LOAD_IDX_MAX; j++)
@@ -8812,7 +8810,7 @@ cpu_cgroup_create(struct cgroup_subsys *ss, struct cgroup *cgrp)
 
 	if (!cgrp->parent) {
 		/* This is early initialization for the top cgroup */
-		return &init_task_group.css;
+		return &root_task_group.css;
 	}
 
 	parent = cgroup_tg(cgrp->parent);
diff --git a/kernel/sched_autogroup.c b/kernel/sched_autogroup.c
index c80fedcd476b..e011e53433a6 100644
--- a/kernel/sched_autogroup.c
+++ b/kernel/sched_autogroup.c
@@ -11,8 +11,8 @@ static atomic_t autogroup_seq_nr;
 
 static void autogroup_init(struct task_struct *init_task)
 {
-	autogroup_default.tg = &init_task_group;
-	init_task_group.autogroup = &autogroup_default;
+	autogroup_default.tg = &root_task_group;
+	root_task_group.autogroup = &autogroup_default;
 	kref_init(&autogroup_default.kref);
 	init_rwsem(&autogroup_default.lock);
 	init_task->signal->autogroup = &autogroup_default;
@@ -63,7 +63,7 @@ static inline struct autogroup *autogroup_create(void)
 	if (!ag)
 		goto out_fail;
 
-	tg = sched_create_group(&init_task_group);
+	tg = sched_create_group(&root_task_group);
 
 	if (IS_ERR(tg))
 		goto out_free;
-- 
cgit v1.2.3-71-gd317


From c97415a72521071c235e0879f9a600014afd87b1 Mon Sep 17 00:00:00 2001
From: Stefan Achatz <erazor_de@users.sourceforge.net>
Date: Fri, 26 Nov 2010 19:57:29 +0000
Subject: sysfs: Introducing binary attributes for struct class

Added dev_bin_attrs to struct class similar to existing dev_attrs.

Signed-off-by: Stefan Achatz <erazor_de@users.sourceforge.net>
Acked-by: Greg Kroah-Hartman <gregkh@suse.de>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 drivers/base/core.c    | 41 +++++++++++++++++++++++++++++++++++++++--
 include/linux/device.h |  1 +
 2 files changed, 40 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/base/core.c b/drivers/base/core.c
index 6ed645411c40..761359261589 100644
--- a/drivers/base/core.c
+++ b/drivers/base/core.c
@@ -338,6 +338,35 @@ static void device_remove_attributes(struct device *dev,
 			device_remove_file(dev, &attrs[i]);
 }
 
+static int device_add_bin_attributes(struct device *dev,
+				     struct bin_attribute *attrs)
+{
+	int error = 0;
+	int i;
+
+	if (attrs) {
+		for (i = 0; attr_name(attrs[i]); i++) {
+			error = device_create_bin_file(dev, &attrs[i]);
+			if (error)
+				break;
+		}
+		if (error)
+			while (--i >= 0)
+				device_remove_bin_file(dev, &attrs[i]);
+	}
+	return error;
+}
+
+static void device_remove_bin_attributes(struct device *dev,
+					 struct bin_attribute *attrs)
+{
+	int i;
+
+	if (attrs)
+		for (i = 0; attr_name(attrs[i]); i++)
+			device_remove_bin_file(dev, &attrs[i]);
+}
+
 static int device_add_groups(struct device *dev,
 			     const struct attribute_group **groups)
 {
@@ -378,12 +407,15 @@ static int device_add_attrs(struct device *dev)
 		error = device_add_attributes(dev, class->dev_attrs);
 		if (error)
 			return error;
+		error = device_add_bin_attributes(dev, class->dev_bin_attrs);
+		if (error)
+			goto err_remove_class_attrs;
 	}
 
 	if (type) {
 		error = device_add_groups(dev, type->groups);
 		if (error)
-			goto err_remove_class_attrs;
+			goto err_remove_class_bin_attrs;
 	}
 
 	error = device_add_groups(dev, dev->groups);
@@ -395,6 +427,9 @@ static int device_add_attrs(struct device *dev)
  err_remove_type_groups:
 	if (type)
 		device_remove_groups(dev, type->groups);
+ err_remove_class_bin_attrs:
+	if (class)
+		device_remove_bin_attributes(dev, class->dev_bin_attrs);
  err_remove_class_attrs:
 	if (class)
 		device_remove_attributes(dev, class->dev_attrs);
@@ -412,8 +447,10 @@ static void device_remove_attrs(struct device *dev)
 	if (type)
 		device_remove_groups(dev, type->groups);
 
-	if (class)
+	if (class) {
 		device_remove_attributes(dev, class->dev_attrs);
+		device_remove_bin_attributes(dev, class->dev_bin_attrs);
+	}
 }
 
 
diff --git a/include/linux/device.h b/include/linux/device.h
index dd4895313468..032bdb5406a2 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -197,6 +197,7 @@ struct class {
 
 	struct class_attribute		*class_attrs;
 	struct device_attribute		*dev_attrs;
+	struct bin_attribute		*dev_bin_attrs;
 	struct kobject			*dev_kobj;
 
 	int (*dev_uevent)(struct device *dev, struct kobj_uevent_env *env);
-- 
cgit v1.2.3-71-gd317


From ec6e7c3ae39cb1dc279b5274aaaadd09ff8e224b Mon Sep 17 00:00:00 2001
From: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Date: Thu, 6 Jan 2011 13:45:32 -0500
Subject: tracing/trivial: Add missing comma in TRACE_EVENT comment

Add missing comma within the TRACE_EVENT() example in tracepoint.h.

Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
LKML-Reference: <20110106184532.GA2526@Krystal>
CC: Frederic Weisbecker <fweisbec@gmail.com>
CC: Ingo Molnar <mingo@elte.hu>
CC: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/linux/tracepoint.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h
index d3e4f87e95c0..899103c8f6f2 100644
--- a/include/linux/tracepoint.h
+++ b/include/linux/tracepoint.h
@@ -326,7 +326,7 @@ do_trace:								\
  *		memcpy(__entry->prev_comm, prev->comm, TASK_COMM_LEN);
  *		__entry->next_pid	= next->pid;
  *		__entry->next_prio	= next->prio;
- *	)
+ *	),
  *
  *	*
  *	* Formatted output of a trace record via TP_printk().
-- 
cgit v1.2.3-71-gd317


From bd1c8b22b7b81c6f6c4f5c19cb2387da3d02fb0f Mon Sep 17 00:00:00 2001
From: Lai Jiangshan <laijs@cn.fujitsu.com>
Date: Tue, 4 Jan 2011 16:06:09 +0800
Subject: tracepoint: Add __rcu annotation

Add __rcu annotation to :
	(struct tracepoint)->funcs

Acked-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Signed-off-by: Lai Jiangshan <laijs@cn.fujitsu.com>
LKML-Reference: <4D22D4F1.50505@cn.fujitsu.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/linux/tracepoint.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h
index 899103c8f6f2..c6814616653b 100644
--- a/include/linux/tracepoint.h
+++ b/include/linux/tracepoint.h
@@ -32,7 +32,7 @@ struct tracepoint {
 	int state;			/* State. */
 	void (*regfunc)(void);
 	void (*unregfunc)(void);
-	struct tracepoint_func *funcs;
+	struct tracepoint_func __rcu *funcs;
 } __attribute__((aligned(32)));		/*
 					 * Aligned on 32 bytes because it is
 					 * globally visible and gcc happily
-- 
cgit v1.2.3-71-gd317


From 2d75af2f2a7a6103a6d539a492fe81deacabde44 Mon Sep 17 00:00:00 2001
From: Jason Baron <jbaron@redhat.com>
Date: Fri, 7 Jan 2011 13:36:58 -0500
Subject: dynamic debug: Fix build issue with older gcc

On older gcc (3.3) dynamic debug fails to compile:

include/net/inet_connection_sock.h: In function `inet_csk_reset_xmit_timer':
include/net/inet_connection_sock.h:236: error: duplicate label declaration `do_printk'
include/net/inet_connection_sock.h:219: error: this is a previous declaration
include/net/inet_connection_sock.h:236: error: duplicate label declaration `out'
include/net/inet_connection_sock.h:219: error: this is a previous declaration
include/net/inet_connection_sock.h:236: error: duplicate label `do_printk'
include/net/inet_connection_sock.h:236: error: duplicate label `out'

Fix, by reverting the usage of JUMP_LABEL() in dynamic debug for now.

Cc: <stable@kernel.org>
Reported-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Tested-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Signed-off-by: Jason Baron <jbaron@redhat.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/linux/dynamic_debug.h | 18 ++++--------------
 lib/dynamic_debug.c           |  9 ++++-----
 2 files changed, 8 insertions(+), 19 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/dynamic_debug.h b/include/linux/dynamic_debug.h
index a90b3892074a..1c70028f81f9 100644
--- a/include/linux/dynamic_debug.h
+++ b/include/linux/dynamic_debug.h
@@ -44,34 +44,24 @@ int ddebug_add_module(struct _ddebug *tab, unsigned int n,
 extern int ddebug_remove_module(const char *mod_name);
 
 #define dynamic_pr_debug(fmt, ...) do {					\
-	__label__ do_printk;						\
-	__label__ out;							\
 	static struct _ddebug descriptor				\
 	__used								\
 	__attribute__((section("__verbose"), aligned(8))) =		\
 	{ KBUILD_MODNAME, __func__, __FILE__, fmt, __LINE__,		\
 		_DPRINTK_FLAGS_DEFAULT };				\
-	JUMP_LABEL(&descriptor.enabled, do_printk);			\
-	goto out;							\
-do_printk:								\
-	printk(KERN_DEBUG pr_fmt(fmt),	##__VA_ARGS__);			\
-out:	;								\
+	if (unlikely(descriptor.enabled))				\
+		printk(KERN_DEBUG pr_fmt(fmt),	##__VA_ARGS__);		\
 	} while (0)
 
 
 #define dynamic_dev_dbg(dev, fmt, ...) do {				\
-	__label__ do_printk;						\
-	__label__ out;							\
 	static struct _ddebug descriptor				\
 	__used								\
 	__attribute__((section("__verbose"), aligned(8))) =		\
 	{ KBUILD_MODNAME, __func__, __FILE__, fmt, __LINE__,		\
 		_DPRINTK_FLAGS_DEFAULT };				\
-	JUMP_LABEL(&descriptor.enabled, do_printk);			\
-	goto out;							\
-do_printk:								\
-	dev_printk(KERN_DEBUG, dev, fmt, ##__VA_ARGS__);		\
-out:	;								\
+	if (unlikely(descriptor.enabled))				\
+		dev_printk(KERN_DEBUG, dev, fmt, ##__VA_ARGS__);	\
 	} while (0)
 
 #else
diff --git a/lib/dynamic_debug.c b/lib/dynamic_debug.c
index 3094318bfea7..b335acb43be2 100644
--- a/lib/dynamic_debug.c
+++ b/lib/dynamic_debug.c
@@ -141,11 +141,10 @@ static void ddebug_change(const struct ddebug_query *query,
 			else if (!dp->flags)
 				dt->num_enabled++;
 			dp->flags = newflags;
-			if (newflags) {
-				jump_label_enable(&dp->enabled);
-			} else {
-				jump_label_disable(&dp->enabled);
-			}
+			if (newflags)
+				dp->enabled = 1;
+			else
+				dp->enabled = 0;
 			if (verbose)
 				printk(KERN_INFO
 					"ddebug: changed %s:%d [%s]%s %s\n",
-- 
cgit v1.2.3-71-gd317


From 26daa1ed40c6b31b4220581431982814c47c608a Mon Sep 17 00:00:00 2001
From: Jennifer Li <Jennifer.li@o2micro.com>
Date: Wed, 17 Nov 2010 23:01:59 -0500
Subject: mmc: sdhci: Disable ADMA on some O2Micro SD/MMC parts.

This patch disables the broken ADMA on selected O2Micro devices.

Signed-off-by: Jennifer Li <Jennifer.li@o2micro.com>
Reviewed-by: Chris Ball <cjb@laptop.org>
Signed-off-by: Chris Ball <cjb@laptop.org>
---
 drivers/mmc/host/sdhci-pci.c | 112 +++++++++++++++++++++++++++++++++++++++++++
 include/linux/pci_ids.h      |   5 ++
 2 files changed, 117 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/mmc/host/sdhci-pci.c b/drivers/mmc/host/sdhci-pci.c
index 3d9c2460d437..831cf91b644a 100644
--- a/drivers/mmc/host/sdhci-pci.c
+++ b/drivers/mmc/host/sdhci-pci.c
@@ -176,6 +176,74 @@ static const struct sdhci_pci_fixes sdhci_intel_mfd_emmc_sdio = {
 	.quirks		= SDHCI_QUIRK_NO_ENDATTR_IN_NOPDESC,
 };
 
+/* O2Micro extra registers */
+#define O2_SD_LOCK_WP		0xD3
+#define O2_SD_MULTI_VCC3V	0xEE
+#define O2_SD_CLKREQ		0xEC
+#define O2_SD_CAPS		0xE0
+#define O2_SD_ADMA1		0xE2
+#define O2_SD_ADMA2		0xE7
+#define O2_SD_INF_MOD		0xF1
+
+static int o2_probe(struct sdhci_pci_chip *chip)
+{
+	int ret;
+	u8 scratch;
+
+	switch (chip->pdev->device) {
+	case PCI_DEVICE_ID_O2_8220:
+	case PCI_DEVICE_ID_O2_8221:
+	case PCI_DEVICE_ID_O2_8320:
+	case PCI_DEVICE_ID_O2_8321:
+		/* This extra setup is required due to broken ADMA. */
+		ret = pci_read_config_byte(chip->pdev, O2_SD_LOCK_WP, &scratch);
+		if (ret)
+			return ret;
+		scratch &= 0x7f;
+		pci_write_config_byte(chip->pdev, O2_SD_LOCK_WP, scratch);
+
+		/* Set Multi 3 to VCC3V# */
+		pci_write_config_byte(chip->pdev, O2_SD_MULTI_VCC3V, 0x08);
+
+		/* Disable CLK_REQ# support after media DET */
+		ret = pci_read_config_byte(chip->pdev, O2_SD_CLKREQ, &scratch);
+		if (ret)
+			return ret;
+		scratch |= 0x20;
+		pci_write_config_byte(chip->pdev, O2_SD_CLKREQ, scratch);
+
+		/* Choose capabilities, enable SDMA.  We have to write 0x01
+		 * to the capabilities register first to unlock it.
+		 */
+		ret = pci_read_config_byte(chip->pdev, O2_SD_CAPS, &scratch);
+		if (ret)
+			return ret;
+		scratch |= 0x01;
+		pci_write_config_byte(chip->pdev, O2_SD_CAPS, scratch);
+		pci_write_config_byte(chip->pdev, O2_SD_CAPS, 0x73);
+
+		/* Disable ADMA1/2 */
+		pci_write_config_byte(chip->pdev, O2_SD_ADMA1, 0x39);
+		pci_write_config_byte(chip->pdev, O2_SD_ADMA2, 0x08);
+
+		/* Disable the infinite transfer mode */
+		ret = pci_read_config_byte(chip->pdev, O2_SD_INF_MOD, &scratch);
+		if (ret)
+			return ret;
+		scratch |= 0x08;
+		pci_write_config_byte(chip->pdev, O2_SD_INF_MOD, scratch);
+
+		/* Lock WP */
+		ret = pci_read_config_byte(chip->pdev, O2_SD_LOCK_WP, &scratch);
+		if (ret)
+			return ret;
+		scratch |= 0x80;
+		pci_write_config_byte(chip->pdev, O2_SD_LOCK_WP, scratch);
+	}
+
+	return 0;
+}
+
 static int jmicron_pmos(struct sdhci_pci_chip *chip, int on)
 {
 	u8 scratch;
@@ -339,6 +407,10 @@ static int jmicron_resume(struct sdhci_pci_chip *chip)
 	return 0;
 }
 
+static const struct sdhci_pci_fixes sdhci_o2 = {
+	.probe		= o2_probe,
+};
+
 static const struct sdhci_pci_fixes sdhci_jmicron = {
 	.probe		= jmicron_probe,
 
@@ -589,6 +661,46 @@ static const struct pci_device_id pci_ids[] __devinitdata = {
 		.driver_data	= (kernel_ulong_t)&sdhci_intel_mfd_emmc_sdio,
 	},
 
+	{
+		.vendor		= PCI_VENDOR_ID_O2,
+		.device		= PCI_DEVICE_ID_O2_8120,
+		.subvendor	= PCI_ANY_ID,
+		.subdevice	= PCI_ANY_ID,
+		.driver_data	= (kernel_ulong_t)&sdhci_o2,
+	},
+
+	{
+		.vendor		= PCI_VENDOR_ID_O2,
+		.device		= PCI_DEVICE_ID_O2_8220,
+		.subvendor	= PCI_ANY_ID,
+		.subdevice	= PCI_ANY_ID,
+		.driver_data	= (kernel_ulong_t)&sdhci_o2,
+	},
+
+	{
+		.vendor		= PCI_VENDOR_ID_O2,
+		.device		= PCI_DEVICE_ID_O2_8221,
+		.subvendor	= PCI_ANY_ID,
+		.subdevice	= PCI_ANY_ID,
+		.driver_data	= (kernel_ulong_t)&sdhci_o2,
+	},
+
+	{
+		.vendor		= PCI_VENDOR_ID_O2,
+		.device		= PCI_DEVICE_ID_O2_8320,
+		.subvendor	= PCI_ANY_ID,
+		.subdevice	= PCI_ANY_ID,
+		.driver_data	= (kernel_ulong_t)&sdhci_o2,
+	},
+
+	{
+		.vendor		= PCI_VENDOR_ID_O2,
+		.device		= PCI_DEVICE_ID_O2_8321,
+		.subvendor	= PCI_ANY_ID,
+		.subdevice	= PCI_ANY_ID,
+		.driver_data	= (kernel_ulong_t)&sdhci_o2,
+	},
+
 	{	/* Generic SD host controller */
 		PCI_DEVICE_CLASS((PCI_CLASS_SYSTEM_SDHCI << 8), 0xFFFF00)
 	},
diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index cb845c16ad7d..7a58875cdad5 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -1650,6 +1650,11 @@
 #define PCI_DEVICE_ID_O2_6836		0x6836
 #define PCI_DEVICE_ID_O2_6812		0x6872
 #define PCI_DEVICE_ID_O2_6933		0x6933
+#define PCI_DEVICE_ID_O2_8120		0x8120
+#define PCI_DEVICE_ID_O2_8220		0x8220
+#define PCI_DEVICE_ID_O2_8221		0x8221
+#define PCI_DEVICE_ID_O2_8320		0x8320
+#define PCI_DEVICE_ID_O2_8321		0x8321
 
 #define PCI_VENDOR_ID_3DFX		0x121a
 #define PCI_DEVICE_ID_3DFX_VOODOO	0x0001
-- 
cgit v1.2.3-71-gd317


From 04566831a703ae3ef4b49a2deae261c9ed26e020 Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@stericsson.com>
Date: Mon, 8 Nov 2010 21:36:50 -0500
Subject: mmc: Aggressive clock gating framework

This patch modifies the MMC core code to optionally call the set_ios()
operation on the driver with the clock frequency set to 0 (gate) after
a grace period of at least 8 MCLK cycles, then restore it (ungate)
before any new request. This gives the driver the option to shut down
the MCI clock to the MMC/SD card when the clock frequency is 0, i.e.
the core has stated that the MCI clock does not need to be generated.

It is inspired by existing clock gating code found in the OMAP and
Atmel drivers and brings this up to the host abstraction.  Gating is
performed before and after any MMC request.

This patchset implements this for the MMCI/PL180 MMC/SD host controller,
but it should be simple to switch OMAP/Atmel over to using this instead.

mmc_set_{gated,ungated}() add variable protection to the state holders
for the clock gating code.  This is particularly important when ordinary
.set_ios() calls would race with the .set_ios() call resulting from a
delayed gate operation.

Signed-off-by: Linus Walleij <linus.walleij@stericsson.com>
Reviewed-by: Chris Ball <cjb@laptop.org>
Tested-by: Chris Ball <cjb@laptop.org>
Signed-off-by: Chris Ball <cjb@laptop.org>
---
 drivers/mmc/core/Kconfig   |  11 +++
 drivers/mmc/core/core.c    |  62 +++++++++++++-
 drivers/mmc/core/core.h    |   3 +
 drivers/mmc/core/debugfs.c |   5 ++
 drivers/mmc/core/host.c    | 205 ++++++++++++++++++++++++++++++++++++++++++++-
 drivers/mmc/core/host.h    |  21 +++++
 include/linux/mmc/host.h   |  10 +++
 7 files changed, 315 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mmc/core/Kconfig b/drivers/mmc/core/Kconfig
index bb22ffd76ef8..ef103871517f 100644
--- a/drivers/mmc/core/Kconfig
+++ b/drivers/mmc/core/Kconfig
@@ -16,3 +16,14 @@ config MMC_UNSAFE_RESUME
 
 	  This option sets a default which can be overridden by the
 	  module parameter "removable=0" or "removable=1".
+
+config MMC_CLKGATE
+	bool "MMC host clock gating (EXPERIMENTAL)"
+	depends on EXPERIMENTAL
+	help
+	  This will attempt to aggressively gate the clock to the MMC card.
+	  This is done to save power due to gating off the logic and bus
+	  noise when the MMC card is not in use. Your host driver has to
+	  support handling this in order for it to be of any use.
+
+	  If unsure, say N.
diff --git a/drivers/mmc/core/core.c b/drivers/mmc/core/core.c
index a3a780faf85a..722af2dce3bb 100644
--- a/drivers/mmc/core/core.c
+++ b/drivers/mmc/core/core.c
@@ -130,6 +130,8 @@ void mmc_request_done(struct mmc_host *host, struct mmc_request *mrq)
 
 		if (mrq->done)
 			mrq->done(mrq);
+
+		mmc_host_clk_gate(host);
 	}
 }
 
@@ -190,6 +192,7 @@ mmc_start_request(struct mmc_host *host, struct mmc_request *mrq)
 			mrq->stop->mrq = mrq;
 		}
 	}
+	mmc_host_clk_ungate(host);
 	host->ops->request(host, mrq);
 }
 
@@ -296,7 +299,7 @@ void mmc_set_data_timeout(struct mmc_data *data, const struct mmc_card *card)
 
 		timeout_us = data->timeout_ns / 1000;
 		timeout_us += data->timeout_clks * 1000 /
-			(card->host->ios.clock / 1000);
+			(mmc_host_clk_rate(card->host) / 1000);
 
 		if (data->flags & MMC_DATA_WRITE)
 			/*
@@ -614,6 +617,8 @@ static inline void mmc_set_ios(struct mmc_host *host)
 		 ios->power_mode, ios->chip_select, ios->vdd,
 		 ios->bus_width, ios->timing);
 
+	if (ios->clock > 0)
+		mmc_set_ungated(host);
 	host->ops->set_ios(host, ios);
 }
 
@@ -641,6 +646,61 @@ void mmc_set_clock(struct mmc_host *host, unsigned int hz)
 	mmc_set_ios(host);
 }
 
+#ifdef CONFIG_MMC_CLKGATE
+/*
+ * This gates the clock by setting it to 0 Hz.
+ */
+void mmc_gate_clock(struct mmc_host *host)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&host->clk_lock, flags);
+	host->clk_old = host->ios.clock;
+	host->ios.clock = 0;
+	host->clk_gated = true;
+	spin_unlock_irqrestore(&host->clk_lock, flags);
+	mmc_set_ios(host);
+}
+
+/*
+ * This restores the clock from gating by using the cached
+ * clock value.
+ */
+void mmc_ungate_clock(struct mmc_host *host)
+{
+	/*
+	 * We should previously have gated the clock, so the clock shall
+	 * be 0 here! The clock may however be 0 during initialization,
+	 * when some request operations are performed before setting
+	 * the frequency. When ungate is requested in that situation
+	 * we just ignore the call.
+	 */
+	if (host->clk_old) {
+		BUG_ON(host->ios.clock);
+		/* This call will also set host->clk_gated to false */
+		mmc_set_clock(host, host->clk_old);
+	}
+}
+
+void mmc_set_ungated(struct mmc_host *host)
+{
+	unsigned long flags;
+
+	/*
+	 * We've been given a new frequency while the clock is gated,
+	 * so make sure we regard this as ungating it.
+	 */
+	spin_lock_irqsave(&host->clk_lock, flags);
+	host->clk_gated = false;
+	spin_unlock_irqrestore(&host->clk_lock, flags);
+}
+
+#else
+void mmc_set_ungated(struct mmc_host *host)
+{
+}
+#endif
+
 /*
  * Change the bus mode (open drain/push-pull) of a host.
  */
diff --git a/drivers/mmc/core/core.h b/drivers/mmc/core/core.h
index 77240cd11bcf..026c975b99a9 100644
--- a/drivers/mmc/core/core.h
+++ b/drivers/mmc/core/core.h
@@ -33,6 +33,9 @@ void mmc_init_erase(struct mmc_card *card);
 
 void mmc_set_chip_select(struct mmc_host *host, int mode);
 void mmc_set_clock(struct mmc_host *host, unsigned int hz);
+void mmc_gate_clock(struct mmc_host *host);
+void mmc_ungate_clock(struct mmc_host *host);
+void mmc_set_ungated(struct mmc_host *host);
 void mmc_set_bus_mode(struct mmc_host *host, unsigned int mode);
 void mmc_set_bus_width(struct mmc_host *host, unsigned int width);
 void mmc_set_bus_width_ddr(struct mmc_host *host, unsigned int width,
diff --git a/drivers/mmc/core/debugfs.c b/drivers/mmc/core/debugfs.c
index eed1405fd742..998797ed67a6 100644
--- a/drivers/mmc/core/debugfs.c
+++ b/drivers/mmc/core/debugfs.c
@@ -183,6 +183,11 @@ void mmc_add_host_debugfs(struct mmc_host *host)
 			&mmc_clock_fops))
 		goto err_node;
 
+#ifdef CONFIG_MMC_CLKGATE
+	if (!debugfs_create_u32("clk_delay", (S_IRUSR | S_IWUSR),
+				root, &host->clk_delay))
+		goto err_node;
+#endif
 	return;
 
 err_node:
diff --git a/drivers/mmc/core/host.c b/drivers/mmc/core/host.c
index 10b8af27e03a..92e33703e437 100644
--- a/drivers/mmc/core/host.c
+++ b/drivers/mmc/core/host.c
@@ -3,6 +3,7 @@
  *
  *  Copyright (C) 2003 Russell King, All Rights Reserved.
  *  Copyright (C) 2007-2008 Pierre Ossman
+ *  Copyright (C) 2010 Linus Walleij
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
@@ -20,6 +21,7 @@
 #include <linux/suspend.h>
 
 #include <linux/mmc/host.h>
+#include <linux/mmc/card.h>
 
 #include "core.h"
 #include "host.h"
@@ -50,6 +52,204 @@ void mmc_unregister_host_class(void)
 static DEFINE_IDR(mmc_host_idr);
 static DEFINE_SPINLOCK(mmc_host_lock);
 
+#ifdef CONFIG_MMC_CLKGATE
+
+/*
+ * Enabling clock gating will make the core call out to the host
+ * once up and once down when it performs a request or card operation
+ * intermingled in any fashion. The driver will see this through
+ * set_ios() operations with ios.clock field set to 0 to gate (disable)
+ * the block clock, and to the old frequency to enable it again.
+ */
+static void mmc_host_clk_gate_delayed(struct mmc_host *host)
+{
+	unsigned long tick_ns;
+	unsigned long freq = host->ios.clock;
+	unsigned long flags;
+
+	if (!freq) {
+		pr_debug("%s: frequency set to 0 in disable function, "
+			 "this means the clock is already disabled.\n",
+			 mmc_hostname(host));
+		return;
+	}
+	/*
+	 * New requests may have appeared while we were scheduling,
+	 * then there is no reason to delay the check before
+	 * clk_disable().
+	 */
+	spin_lock_irqsave(&host->clk_lock, flags);
+
+	/*
+	 * Delay n bus cycles (at least 8 from MMC spec) before attempting
+	 * to disable the MCI block clock. The reference count may have
+	 * gone up again after this delay due to rescheduling!
+	 */
+	if (!host->clk_requests) {
+		spin_unlock_irqrestore(&host->clk_lock, flags);
+		tick_ns = DIV_ROUND_UP(1000000000, freq);
+		ndelay(host->clk_delay * tick_ns);
+	} else {
+		/* New users appeared while waiting for this work */
+		spin_unlock_irqrestore(&host->clk_lock, flags);
+		return;
+	}
+	mutex_lock(&host->clk_gate_mutex);
+	spin_lock_irqsave(&host->clk_lock, flags);
+	if (!host->clk_requests) {
+		spin_unlock_irqrestore(&host->clk_lock, flags);
+		/* This will set host->ios.clock to 0 */
+		mmc_gate_clock(host);
+		spin_lock_irqsave(&host->clk_lock, flags);
+		pr_debug("%s: gated MCI clock\n", mmc_hostname(host));
+	}
+	spin_unlock_irqrestore(&host->clk_lock, flags);
+	mutex_unlock(&host->clk_gate_mutex);
+}
+
+/*
+ * Internal work. Work to disable the clock at some later point.
+ */
+static void mmc_host_clk_gate_work(struct work_struct *work)
+{
+	struct mmc_host *host = container_of(work, struct mmc_host,
+					      clk_gate_work);
+
+	mmc_host_clk_gate_delayed(host);
+}
+
+/**
+ *	mmc_host_clk_ungate - ungate hardware MCI clocks
+ *	@host: host to ungate.
+ *
+ *	Makes sure the host ios.clock is restored to a non-zero value
+ *	past this call.	Increase clock reference count and ungate clock
+ *	if we're the first user.
+ */
+void mmc_host_clk_ungate(struct mmc_host *host)
+{
+	unsigned long flags;
+
+	mutex_lock(&host->clk_gate_mutex);
+	spin_lock_irqsave(&host->clk_lock, flags);
+	if (host->clk_gated) {
+		spin_unlock_irqrestore(&host->clk_lock, flags);
+		mmc_ungate_clock(host);
+		spin_lock_irqsave(&host->clk_lock, flags);
+		pr_debug("%s: ungated MCI clock\n", mmc_hostname(host));
+	}
+	host->clk_requests++;
+	spin_unlock_irqrestore(&host->clk_lock, flags);
+	mutex_unlock(&host->clk_gate_mutex);
+}
+
+/**
+ *	mmc_host_may_gate_card - check if this card may be gated
+ *	@card: card to check.
+ */
+static bool mmc_host_may_gate_card(struct mmc_card *card)
+{
+	/* If there is no card we may gate it */
+	if (!card)
+		return true;
+	/*
+	 * Don't gate SDIO cards! These need to be clocked at all times
+	 * since they may be independent systems generating interrupts
+	 * and other events. The clock requests counter from the core will
+	 * go down to zero since the core does not need it, but we will not
+	 * gate the clock, because there is somebody out there that may still
+	 * be using it.
+	 */
+	if (mmc_card_sdio(card))
+		return false;
+
+	return true;
+}
+
+/**
+ *	mmc_host_clk_gate - gate off hardware MCI clocks
+ *	@host: host to gate.
+ *
+ *	Calls the host driver with ios.clock set to zero as often as possible
+ *	in order to gate off hardware MCI clocks. Decrease clock reference
+ *	count and schedule disabling of clock.
+ */
+void mmc_host_clk_gate(struct mmc_host *host)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&host->clk_lock, flags);
+	host->clk_requests--;
+	if (mmc_host_may_gate_card(host->card) &&
+	    !host->clk_requests)
+		schedule_work(&host->clk_gate_work);
+	spin_unlock_irqrestore(&host->clk_lock, flags);
+}
+
+/**
+ *	mmc_host_clk_rate - get current clock frequency setting
+ *	@host: host to get the clock frequency for.
+ *
+ *	Returns current clock frequency regardless of gating.
+ */
+unsigned int mmc_host_clk_rate(struct mmc_host *host)
+{
+	unsigned long freq;
+	unsigned long flags;
+
+	spin_lock_irqsave(&host->clk_lock, flags);
+	if (host->clk_gated)
+		freq = host->clk_old;
+	else
+		freq = host->ios.clock;
+	spin_unlock_irqrestore(&host->clk_lock, flags);
+	return freq;
+}
+
+/**
+ *	mmc_host_clk_init - set up clock gating code
+ *	@host: host with potential clock to control
+ */
+static inline void mmc_host_clk_init(struct mmc_host *host)
+{
+	host->clk_requests = 0;
+	/* Hold MCI clock for 8 cycles by default */
+	host->clk_delay = 8;
+	host->clk_gated = false;
+	INIT_WORK(&host->clk_gate_work, mmc_host_clk_gate_work);
+	spin_lock_init(&host->clk_lock);
+	mutex_init(&host->clk_gate_mutex);
+}
+
+/**
+ *	mmc_host_clk_exit - shut down clock gating code
+ *	@host: host with potential clock to control
+ */
+static inline void mmc_host_clk_exit(struct mmc_host *host)
+{
+	/*
+	 * Wait for any outstanding gate and then make sure we're
+	 * ungated before exiting.
+	 */
+	if (cancel_work_sync(&host->clk_gate_work))
+		mmc_host_clk_gate_delayed(host);
+	if (host->clk_gated)
+		mmc_host_clk_ungate(host);
+	BUG_ON(host->clk_requests > 0);
+}
+
+#else
+
+static inline void mmc_host_clk_init(struct mmc_host *host)
+{
+}
+
+static inline void mmc_host_clk_exit(struct mmc_host *host)
+{
+}
+
+#endif
+
 /**
  *	mmc_alloc_host - initialise the per-host structure.
  *	@extra: sizeof private data structure
@@ -82,6 +282,8 @@ struct mmc_host *mmc_alloc_host(int extra, struct device *dev)
 	host->class_dev.class = &mmc_host_class;
 	device_initialize(&host->class_dev);
 
+	mmc_host_clk_init(host);
+
 	spin_lock_init(&host->lock);
 	init_waitqueue_head(&host->wq);
 	INIT_DELAYED_WORK(&host->detect, mmc_rescan);
@@ -163,6 +365,8 @@ void mmc_remove_host(struct mmc_host *host)
 	device_del(&host->class_dev);
 
 	led_trigger_unregister_simple(host->led);
+
+	mmc_host_clk_exit(host);
 }
 
 EXPORT_SYMBOL(mmc_remove_host);
@@ -183,4 +387,3 @@ void mmc_free_host(struct mmc_host *host)
 }
 
 EXPORT_SYMBOL(mmc_free_host);
-
diff --git a/drivers/mmc/core/host.h b/drivers/mmc/core/host.h
index 8c87e1109a34..de199f911928 100644
--- a/drivers/mmc/core/host.h
+++ b/drivers/mmc/core/host.h
@@ -10,10 +10,31 @@
  */
 #ifndef _MMC_CORE_HOST_H
 #define _MMC_CORE_HOST_H
+#include <linux/mmc/host.h>
 
 int mmc_register_host_class(void);
 void mmc_unregister_host_class(void);
 
+#ifdef CONFIG_MMC_CLKGATE
+void mmc_host_clk_ungate(struct mmc_host *host);
+void mmc_host_clk_gate(struct mmc_host *host);
+unsigned int mmc_host_clk_rate(struct mmc_host *host);
+
+#else
+static inline void mmc_host_clk_ungate(struct mmc_host *host)
+{
+}
+
+static inline void mmc_host_clk_gate(struct mmc_host *host)
+{
+}
+
+static inline unsigned int mmc_host_clk_rate(struct mmc_host *host)
+{
+	return host->ios.clock;
+}
+#endif
+
 void mmc_host_deeper_disable(struct work_struct *work);
 
 #endif
diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h
index 30f6fad99a58..381c77fd4dca 100644
--- a/include/linux/mmc/host.h
+++ b/include/linux/mmc/host.h
@@ -172,6 +172,16 @@ struct mmc_host {
 
 	mmc_pm_flag_t		pm_caps;	/* supported pm features */
 
+#ifdef CONFIG_MMC_CLKGATE
+	int			clk_requests;	/* internal reference counter */
+	unsigned int		clk_delay;	/* number of MCI clk hold cycles */
+	bool			clk_gated;	/* clock gated */
+	struct work_struct	clk_gate_work; /* delayed clock gate */
+	unsigned int		clk_old;	/* old clock value cache */
+	spinlock_t		clk_lock;	/* lock for clk fields */
+	struct mutex		clk_gate_mutex;	/* mutex for clock gating */
+#endif
+
 	/* host specific block data */
 	unsigned int		max_seg_size;	/* see blk_queue_max_segment_size */
 	unsigned short		max_segs;	/* see blk_queue_max_segments */
-- 
cgit v1.2.3-71-gd317


From 8f230f454fe04ba326ffaead3a6b88dcf44eaf4b Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Wed, 8 Dec 2010 10:04:30 +0100
Subject: mmc: Add support for JMicron 388 SD/MMC controller

JMicron 388 SD/MMC combo controller supports the 1.8V low-voltage for
SD, but MMC doesn't work with the low-voltage, resulting in an error
at probing.

This patch adds the support for multiple voltage mask per device type,
so that SD works with 1.8V while MMC forces 3.3V.  Here new ocr_avail_*
fields for each device are introduced, so that the actual OCR mask is
switched dynamically.

Also, the restriction of low-voltage in core/sd.c is removed when the
bit is allowed explicitly via ocr_avail_sd mask.

This patch was rewritten from scratch based on Aries' original code.

Signed-off-by: Aries Lee <arieslee@jmicron.com>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
Reviewed-by: Chris Ball <cjb@laptop.org>
Signed-off-by: Chris Ball <cjb@laptop.org>
---
 drivers/mmc/core/mmc.c       |  2 ++
 drivers/mmc/core/sd.c        |  5 ++++-
 drivers/mmc/core/sdio.c      |  2 ++
 drivers/mmc/host/sdhci-pci.c | 47 ++++++++++++++++++++++++++++++++++++++------
 drivers/mmc/host/sdhci.c     | 23 +++++++++++++++++-----
 include/linux/mmc/host.h     |  3 +++
 include/linux/mmc/sdhci.h    |  4 ++++
 include/linux/pci_ids.h      |  2 ++
 8 files changed, 76 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mmc/core/mmc.c b/drivers/mmc/core/mmc.c
index 77f93c3b8808..76bb621e9aa9 100644
--- a/drivers/mmc/core/mmc.c
+++ b/drivers/mmc/core/mmc.c
@@ -745,6 +745,8 @@ int mmc_attach_mmc(struct mmc_host *host, u32 ocr)
 	WARN_ON(!host->claimed);
 
 	mmc_attach_bus_ops(host);
+	if (host->ocr_avail_mmc)
+		host->ocr_avail = host->ocr_avail_mmc;
 
 	/*
 	 * We need to get OCR a different way for SPI.
diff --git a/drivers/mmc/core/sd.c b/drivers/mmc/core/sd.c
index 49da4dffd28e..de062ebd8b26 100644
--- a/drivers/mmc/core/sd.c
+++ b/drivers/mmc/core/sd.c
@@ -772,6 +772,8 @@ int mmc_attach_sd(struct mmc_host *host, u32 ocr)
 	WARN_ON(!host->claimed);
 
 	mmc_sd_attach_bus_ops(host);
+	if (host->ocr_avail_sd)
+		host->ocr_avail = host->ocr_avail_sd;
 
 	/*
 	 * We need to get OCR a different way for SPI.
@@ -795,7 +797,8 @@ int mmc_attach_sd(struct mmc_host *host, u32 ocr)
 		ocr &= ~0x7F;
 	}
 
-	if (ocr & MMC_VDD_165_195) {
+	if ((ocr & MMC_VDD_165_195) &&
+	    !(host->ocr_avail_sd & MMC_VDD_165_195)) {
 		printk(KERN_WARNING "%s: SD card claims to support the "
 		       "incompletely defined 'low voltage range'. This "
 		       "will be ignored.\n", mmc_hostname(host));
diff --git a/drivers/mmc/core/sdio.c b/drivers/mmc/core/sdio.c
index efef5f94ac42..c18810ab6465 100644
--- a/drivers/mmc/core/sdio.c
+++ b/drivers/mmc/core/sdio.c
@@ -700,6 +700,8 @@ int mmc_attach_sdio(struct mmc_host *host, u32 ocr)
 	WARN_ON(!host->claimed);
 
 	mmc_attach_bus(host, &mmc_sdio_ops);
+	if (host->ocr_avail_sdio)
+		host->ocr_avail = host->ocr_avail_sdio;
 
 	/*
 	 * Sanity check the voltages that the card claims to
diff --git a/drivers/mmc/host/sdhci-pci.c b/drivers/mmc/host/sdhci-pci.c
index 831cf91b644a..d2638ffc4ed2 100644
--- a/drivers/mmc/host/sdhci-pci.c
+++ b/drivers/mmc/host/sdhci-pci.c
@@ -272,6 +272,7 @@ static int jmicron_pmos(struct sdhci_pci_chip *chip, int on)
 static int jmicron_probe(struct sdhci_pci_chip *chip)
 {
 	int ret;
+	u16 mmcdev = 0;
 
 	if (chip->pdev->revision == 0) {
 		chip->quirks |= SDHCI_QUIRK_32BIT_DMA_ADDR |
@@ -293,12 +294,17 @@ static int jmicron_probe(struct sdhci_pci_chip *chip)
 	 * 2. The MMC interface has a lower subfunction number
 	 *    than the SD interface.
 	 */
-	if (chip->pdev->device == PCI_DEVICE_ID_JMICRON_JMB38X_SD) {
+	if (chip->pdev->device == PCI_DEVICE_ID_JMICRON_JMB38X_SD)
+		mmcdev = PCI_DEVICE_ID_JMICRON_JMB38X_MMC;
+	else if (chip->pdev->device == PCI_DEVICE_ID_JMICRON_JMB388_SD)
+		mmcdev = PCI_DEVICE_ID_JMICRON_JMB388_ESD;
+
+	if (mmcdev) {
 		struct pci_dev *sd_dev;
 
 		sd_dev = NULL;
 		while ((sd_dev = pci_get_device(PCI_VENDOR_ID_JMICRON,
-			PCI_DEVICE_ID_JMICRON_JMB38X_MMC, sd_dev)) != NULL) {
+						mmcdev, sd_dev)) != NULL) {
 			if ((PCI_SLOT(chip->pdev->devfn) ==
 				PCI_SLOT(sd_dev->devfn)) &&
 				(chip->pdev->bus == sd_dev->bus))
@@ -358,11 +364,21 @@ static int jmicron_probe_slot(struct sdhci_pci_slot *slot)
 			slot->host->quirks |= SDHCI_QUIRK_BROKEN_ADMA;
 	}
 
+	/* JM388 MMC doesn't support 1.8V while SD supports it */
+	if (slot->chip->pdev->device == PCI_DEVICE_ID_JMICRON_JMB388_ESD) {
+		slot->host->ocr_avail_sd = MMC_VDD_32_33 | MMC_VDD_33_34 |
+			MMC_VDD_29_30 | MMC_VDD_30_31 |
+			MMC_VDD_165_195; /* allow 1.8V */
+		slot->host->ocr_avail_mmc = MMC_VDD_32_33 | MMC_VDD_33_34 |
+			MMC_VDD_29_30 | MMC_VDD_30_31; /* no 1.8V for MMC */
+	}
+
 	/*
 	 * The secondary interface requires a bit set to get the
 	 * interrupts.
 	 */
-	if (slot->chip->pdev->device == PCI_DEVICE_ID_JMICRON_JMB38X_MMC)
+	if (slot->chip->pdev->device == PCI_DEVICE_ID_JMICRON_JMB38X_MMC ||
+	    slot->chip->pdev->device == PCI_DEVICE_ID_JMICRON_JMB388_ESD)
 		jmicron_enable_mmc(slot->host, 1);
 
 	return 0;
@@ -373,7 +389,8 @@ static void jmicron_remove_slot(struct sdhci_pci_slot *slot, int dead)
 	if (dead)
 		return;
 
-	if (slot->chip->pdev->device == PCI_DEVICE_ID_JMICRON_JMB38X_MMC)
+	if (slot->chip->pdev->device == PCI_DEVICE_ID_JMICRON_JMB38X_MMC ||
+	    slot->chip->pdev->device == PCI_DEVICE_ID_JMICRON_JMB388_ESD)
 		jmicron_enable_mmc(slot->host, 0);
 }
 
@@ -381,7 +398,8 @@ static int jmicron_suspend(struct sdhci_pci_chip *chip, pm_message_t state)
 {
 	int i;
 
-	if (chip->pdev->device == PCI_DEVICE_ID_JMICRON_JMB38X_MMC) {
+	if (chip->pdev->device == PCI_DEVICE_ID_JMICRON_JMB38X_MMC ||
+	    chip->pdev->device == PCI_DEVICE_ID_JMICRON_JMB388_ESD) {
 		for (i = 0;i < chip->num_slots;i++)
 			jmicron_enable_mmc(chip->slots[i]->host, 0);
 	}
@@ -393,7 +411,8 @@ static int jmicron_resume(struct sdhci_pci_chip *chip)
 {
 	int ret, i;
 
-	if (chip->pdev->device == PCI_DEVICE_ID_JMICRON_JMB38X_MMC) {
+	if (chip->pdev->device == PCI_DEVICE_ID_JMICRON_JMB38X_MMC ||
+	    chip->pdev->device == PCI_DEVICE_ID_JMICRON_JMB388_ESD) {
 		for (i = 0;i < chip->num_slots;i++)
 			jmicron_enable_mmc(chip->slots[i]->host, 1);
 	}
@@ -581,6 +600,22 @@ static const struct pci_device_id pci_ids[] __devinitdata = {
 		.driver_data	= (kernel_ulong_t)&sdhci_jmicron,
 	},
 
+	{
+		.vendor		= PCI_VENDOR_ID_JMICRON,
+		.device		= PCI_DEVICE_ID_JMICRON_JMB388_SD,
+		.subvendor	= PCI_ANY_ID,
+		.subdevice	= PCI_ANY_ID,
+		.driver_data	= (kernel_ulong_t)&sdhci_jmicron,
+	},
+
+	{
+		.vendor		= PCI_VENDOR_ID_JMICRON,
+		.device		= PCI_DEVICE_ID_JMICRON_JMB388_ESD,
+		.subvendor	= PCI_ANY_ID,
+		.subdevice	= PCI_ANY_ID,
+		.driver_data	= (kernel_ulong_t)&sdhci_jmicron,
+	},
+
 	{
 		.vendor		= PCI_VENDOR_ID_SYSKONNECT,
 		.device		= 0x8000,
diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c
index 8a74fcbfe13b..55698864c2cd 100644
--- a/drivers/mmc/host/sdhci.c
+++ b/drivers/mmc/host/sdhci.c
@@ -1739,7 +1739,7 @@ EXPORT_SYMBOL_GPL(sdhci_alloc_host);
 int sdhci_add_host(struct sdhci_host *host)
 {
 	struct mmc_host *mmc;
-	unsigned int caps;
+	unsigned int caps, ocr_avail;
 	int ret;
 
 	WARN_ON(host == NULL);
@@ -1893,13 +1893,26 @@ int sdhci_add_host(struct sdhci_host *host)
 	    mmc_card_is_removable(mmc))
 		mmc->caps |= MMC_CAP_NEEDS_POLL;
 
-	mmc->ocr_avail = 0;
+	ocr_avail = 0;
 	if (caps & SDHCI_CAN_VDD_330)
-		mmc->ocr_avail |= MMC_VDD_32_33|MMC_VDD_33_34;
+		ocr_avail |= MMC_VDD_32_33 | MMC_VDD_33_34;
 	if (caps & SDHCI_CAN_VDD_300)
-		mmc->ocr_avail |= MMC_VDD_29_30|MMC_VDD_30_31;
+		ocr_avail |= MMC_VDD_29_30 | MMC_VDD_30_31;
 	if (caps & SDHCI_CAN_VDD_180)
-		mmc->ocr_avail |= MMC_VDD_165_195;
+		ocr_avail |= MMC_VDD_165_195;
+
+	mmc->ocr_avail = ocr_avail;
+	mmc->ocr_avail_sdio = ocr_avail;
+	if (host->ocr_avail_sdio)
+		mmc->ocr_avail_sdio &= host->ocr_avail_sdio;
+	mmc->ocr_avail_sd = ocr_avail;
+	if (host->ocr_avail_sd)
+		mmc->ocr_avail_sd &= host->ocr_avail_sd;
+	else /* normal SD controllers don't support 1.8V */
+		mmc->ocr_avail_sd &= ~MMC_VDD_165_195;
+	mmc->ocr_avail_mmc = ocr_avail;
+	if (host->ocr_avail_mmc)
+		mmc->ocr_avail_mmc &= host->ocr_avail_mmc;
 
 	if (mmc->ocr_avail == 0) {
 		printk(KERN_ERR "%s: Hardware doesn't report any "
diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h
index 381c77fd4dca..4a9d9d2589c7 100644
--- a/include/linux/mmc/host.h
+++ b/include/linux/mmc/host.h
@@ -131,6 +131,9 @@ struct mmc_host {
 	unsigned int		f_max;
 	unsigned int		f_init;
 	u32			ocr_avail;
+	u32			ocr_avail_sdio;	/* SDIO-specific OCR */
+	u32			ocr_avail_sd;	/* SD-specific OCR */
+	u32			ocr_avail_mmc;	/* MMC-specific OCR */
 	struct notifier_block	pm_notify;
 
 #define MMC_VDD_165_195		0x00000080	/* VDD voltage 1.65 - 1.95 */
diff --git a/include/linux/mmc/sdhci.h b/include/linux/mmc/sdhci.h
index 1fdc673f2396..0d953f513d81 100644
--- a/include/linux/mmc/sdhci.h
+++ b/include/linux/mmc/sdhci.h
@@ -139,6 +139,10 @@ struct sdhci_host {
 
 	unsigned int caps;	/* Alternative capabilities */
 
+	unsigned int            ocr_avail_sdio;	/* OCR bit masks */
+	unsigned int            ocr_avail_sd;
+	unsigned int            ocr_avail_mmc;
+
 	unsigned long private[0] ____cacheline_aligned;
 };
 #endif /* __SDHCI_H */
diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index 7a58875cdad5..2f17b4ccbb58 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -2368,6 +2368,8 @@
 #define PCI_DEVICE_ID_JMICRON_JMB38X_SD	0x2381
 #define PCI_DEVICE_ID_JMICRON_JMB38X_MMC 0x2382
 #define PCI_DEVICE_ID_JMICRON_JMB38X_MS	0x2383
+#define PCI_DEVICE_ID_JMICRON_JMB388_SD	0x2391
+#define PCI_DEVICE_ID_JMICRON_JMB388_ESD 0x2392
 
 #define PCI_VENDOR_ID_KORENIX		0x1982
 #define PCI_DEVICE_ID_KORENIX_JETCARDF0	0x1600
-- 
cgit v1.2.3-71-gd317


From 080bc9774b6f1e3866747b18631bad26f47c22ce Mon Sep 17 00:00:00 2001
From: Ohad Ben-Cohen <ohad@wizery.com>
Date: Sun, 28 Nov 2010 07:21:29 +0200
Subject: mmc: sdio: don't reinitialize nonremovable powered-resumed cards

Upon system resume, SDIO core must reinitialize cards that were
powered off during suspend.

If the card had its power kept during suspend (and thus it is
'powered-resumed'), SDIO core performs only a limited reinitializing,
mainly needed to make sure that the card wasn't removed/replaced.

If a __nonremovable__ card is powered-resumed, we can safely skip the
reinitializing phase.

Note: 9b966aa (mmc: sdio: fully reconfigure oldcard on resume) removed
the bus width reconfiguration since mmc_sdio_init_card already does it.
It is brought back now in case mmc_sdio_init_card is skipped.

Signed-off-by: Ohad Ben-Cohen <ohad@wizery.com>
Signed-off-by: Chris Ball <cjb@laptop.org>
---
 drivers/mmc/core/sdio.c  | 16 ++++++++++++++--
 include/linux/mmc/host.h |  5 +++++
 2 files changed, 19 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mmc/core/sdio.c b/drivers/mmc/core/sdio.c
index c18810ab6465..82f4b9008987 100644
--- a/drivers/mmc/core/sdio.c
+++ b/drivers/mmc/core/sdio.c
@@ -627,15 +627,27 @@ static int mmc_sdio_suspend(struct mmc_host *host)
 
 static int mmc_sdio_resume(struct mmc_host *host)
 {
-	int i, err;
+	int i, err = 0;
 
 	BUG_ON(!host);
 	BUG_ON(!host->card);
 
 	/* Basic card reinitialization. */
 	mmc_claim_host(host);
-	err = mmc_sdio_init_card(host, host->ocr, host->card,
+
+	/* No need to reinitialize powered-resumed nonremovable cards */
+	if (mmc_card_is_removable(host) || !mmc_card_is_powered_resumed(host))
+		err = mmc_sdio_init_card(host, host->ocr, host->card,
 				 (host->pm_flags & MMC_PM_KEEP_POWER));
+	else if (mmc_card_is_powered_resumed(host)) {
+		/* We may have switched to 1-bit mode during suspend */
+		err = sdio_enable_4bit_bus(host->card);
+		if (err > 0) {
+			mmc_set_bus_width(host, MMC_BUS_WIDTH_4);
+			err = 0;
+		}
+	}
+
 	if (!err && host->sdio_irqs)
 		mmc_signal_sdio_irq(host);
 	mmc_release_host(host);
diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h
index 4a9d9d2589c7..3a85e73a38a9 100644
--- a/include/linux/mmc/host.h
+++ b/include/linux/mmc/host.h
@@ -320,5 +320,10 @@ static inline int mmc_card_is_removable(struct mmc_host *host)
 	return !(host->caps & MMC_CAP_NONREMOVABLE) && mmc_assume_removable;
 }
 
+static inline int mmc_card_is_powered_resumed(struct mmc_host *host)
+{
+	return host->pm_flags & MMC_PM_KEEP_POWER;
+}
+
 #endif
 
-- 
cgit v1.2.3-71-gd317


From a081748735c5feb96b1365e78a5ff0fb6ca7e3a4 Mon Sep 17 00:00:00 2001
From: Grant Likely <grant.likely@secretlab.ca>
Date: Sat, 8 Jan 2011 21:42:42 -0700
Subject: of/flattree: forward declare struct device_node in of_fdt.h

This patch forward declares struct device_node to fix a compile error
when of_fdt.h is included, but of.h is not.  Alternately, including
linux/of.h could have been added to of_fdt.h, but that pulls in a lot
of unnecessary declarations when only working with the flattened
form.

Signed-off-by: Grant Likely <grant.likely@secretlab.ca>
---
 include/linux/of_fdt.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/of_fdt.h b/include/linux/of_fdt.h
index ee96091f7d25..0ef22a1f129e 100644
--- a/include/linux/of_fdt.h
+++ b/include/linux/of_fdt.h
@@ -59,6 +59,8 @@ struct boot_param_header {
 
 #if defined(CONFIG_OF_FLATTREE)
 
+struct device_node;
+
 /* For scanning an arbitrary device-tree at any time */
 extern char *of_fdt_get_string(struct boot_param_header *blob, u32 offset);
 extern void *of_fdt_get_property(struct boot_param_header *blob,
-- 
cgit v1.2.3-71-gd317


From 22113efd00491310da802f3b1a9a66cfcf415fac Mon Sep 17 00:00:00 2001
From: Aries Lee <arieslee@jmicron.com>
Date: Wed, 15 Dec 2010 08:14:24 +0100
Subject: mmc: Test bus-width for old MMC devices

Some old MMC devices fail with the 4/8 bits the driver tries to use
exclusively.  This patch adds a test for the given bus setup and falls
back to the lower bit mode (until 1-bit mode) when the test fails.

[Major rework and refactoring by tiwai]
[Quirk addition and many fixes by prakity]

Signed-off-by: Aries Lee <arieslee@jmicron.com>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Philip Rakity <prakity@marvell.com>
Tested-by: Philip Rakity <prakity@marvell.com>
Signed-off-by: Chris Ball <cjb@laptop.org>
---
 drivers/mmc/core/mmc.c     |  76 +++++++++++++++++++++-------------
 drivers/mmc/core/mmc_ops.c | 101 +++++++++++++++++++++++++++++++++++++++++++++
 drivers/mmc/core/mmc_ops.h |   1 +
 drivers/mmc/host/sdhci.c   |   7 +++-
 drivers/mmc/host/sdhci.h   |   1 +
 include/linux/mmc/host.h   |   1 +
 include/linux/mmc/mmc.h    |   2 +
 7 files changed, 159 insertions(+), 30 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mmc/core/mmc.c b/drivers/mmc/core/mmc.c
index 76bb621e9aa9..1d8409fcf155 100644
--- a/drivers/mmc/core/mmc.c
+++ b/drivers/mmc/core/mmc.c
@@ -534,39 +534,57 @@ static int mmc_init_card(struct mmc_host *host, u32 ocr,
 	 */
 	if ((card->csd.mmca_vsn >= CSD_SPEC_VER_4) &&
 	    (host->caps & (MMC_CAP_4_BIT_DATA | MMC_CAP_8_BIT_DATA))) {
-		unsigned ext_csd_bit, bus_width;
-
-		if (host->caps & MMC_CAP_8_BIT_DATA) {
-			if (ddr)
-				ext_csd_bit = EXT_CSD_DDR_BUS_WIDTH_8;
-			else
-				ext_csd_bit = EXT_CSD_BUS_WIDTH_8;
-			bus_width = MMC_BUS_WIDTH_8;
-		} else {
-			if (ddr)
-				ext_csd_bit = EXT_CSD_DDR_BUS_WIDTH_4;
-			else
-				ext_csd_bit = EXT_CSD_BUS_WIDTH_4;
-			bus_width = MMC_BUS_WIDTH_4;
+		static unsigned ext_csd_bits[][2] = {
+			{ EXT_CSD_BUS_WIDTH_8, EXT_CSD_DDR_BUS_WIDTH_8 },
+			{ EXT_CSD_BUS_WIDTH_4, EXT_CSD_DDR_BUS_WIDTH_4 },
+			{ EXT_CSD_BUS_WIDTH_1, EXT_CSD_BUS_WIDTH_1 },
+		};
+		static unsigned bus_widths[] = {
+			MMC_BUS_WIDTH_8,
+			MMC_BUS_WIDTH_4,
+			MMC_BUS_WIDTH_1
+		};
+		unsigned idx, bus_width = 0;
+
+		if (host->caps & MMC_CAP_8_BIT_DATA)
+			idx = 0;
+		else
+			idx = 1;
+		for (; idx < ARRAY_SIZE(bus_widths); idx++) {
+			bus_width = bus_widths[idx];
+			if (bus_width == MMC_BUS_WIDTH_1)
+				ddr = 0; /* no DDR for 1-bit width */
+			err = mmc_switch(card, EXT_CSD_CMD_SET_NORMAL,
+					 EXT_CSD_BUS_WIDTH,
+					 ext_csd_bits[idx][0]);
+			if (!err) {
+				/*
+				 * If controller can't handle bus width test,
+				 * use the highest bus width to maintain
+				 * compatibility with previous MMC behavior.
+				 */
+				if (!(host->caps & MMC_CAP_BUS_WIDTH_TEST))
+					break;
+				mmc_set_bus_width_ddr(card->host,
+						      bus_width, MMC_SDR_MODE);
+				err = mmc_bus_test(card, bus_width);
+				if (!err)
+					break;
+			}
 		}
 
-		err = mmc_switch(card, EXT_CSD_CMD_SET_NORMAL,
-				 EXT_CSD_BUS_WIDTH, ext_csd_bit);
-
-		if (err && err != -EBADMSG)
-			goto free_card;
-
+		if (!err && ddr) {
+			err = mmc_switch(card, EXT_CSD_CMD_SET_NORMAL,
+					EXT_CSD_BUS_WIDTH,
+					ext_csd_bits[idx][1]);
+		}
 		if (err) {
 			printk(KERN_WARNING "%s: switch to bus width %d ddr %d "
-			       "failed\n", mmc_hostname(card->host),
-			       1 << bus_width, ddr);
-			err = 0;
-		} else {
-			if (ddr)
-				mmc_card_set_ddr_mode(card);
-			else
-				ddr = MMC_SDR_MODE;
-
+				"failed\n", mmc_hostname(card->host),
+				1 << bus_width, ddr);
+			goto free_card;
+		} else if (ddr) {
+			mmc_card_set_ddr_mode(card);
 			mmc_set_bus_width_ddr(card->host, bus_width, ddr);
 		}
 	}
diff --git a/drivers/mmc/core/mmc_ops.c b/drivers/mmc/core/mmc_ops.c
index 326447c9ede8..60842f878ded 100644
--- a/drivers/mmc/core/mmc_ops.c
+++ b/drivers/mmc/core/mmc_ops.c
@@ -462,3 +462,104 @@ int mmc_send_status(struct mmc_card *card, u32 *status)
 	return 0;
 }
 
+static int
+mmc_send_bus_test(struct mmc_card *card, struct mmc_host *host, u8 opcode,
+		  u8 len)
+{
+	struct mmc_request mrq;
+	struct mmc_command cmd;
+	struct mmc_data data;
+	struct scatterlist sg;
+	u8 *data_buf;
+	u8 *test_buf;
+	int i, err;
+	static u8 testdata_8bit[8] = { 0x55, 0xaa, 0, 0, 0, 0, 0, 0 };
+	static u8 testdata_4bit[4] = { 0x5a, 0, 0, 0 };
+
+	/* dma onto stack is unsafe/nonportable, but callers to this
+	 * routine normally provide temporary on-stack buffers ...
+	 */
+	data_buf = kmalloc(len, GFP_KERNEL);
+	if (!data_buf)
+		return -ENOMEM;
+
+	if (len == 8)
+		test_buf = testdata_8bit;
+	else if (len == 4)
+		test_buf = testdata_4bit;
+	else {
+		printk(KERN_ERR "%s: Invalid bus_width %d\n",
+		       mmc_hostname(host), len);
+		kfree(data_buf);
+		return -EINVAL;
+	}
+
+	if (opcode == MMC_BUS_TEST_W)
+		memcpy(data_buf, test_buf, len);
+
+	memset(&mrq, 0, sizeof(struct mmc_request));
+	memset(&cmd, 0, sizeof(struct mmc_command));
+	memset(&data, 0, sizeof(struct mmc_data));
+
+	mrq.cmd = &cmd;
+	mrq.data = &data;
+	cmd.opcode = opcode;
+	cmd.arg = 0;
+
+	/* NOTE HACK:  the MMC_RSP_SPI_R1 is always correct here, but we
+	 * rely on callers to never use this with "native" calls for reading
+	 * CSD or CID.  Native versions of those commands use the R2 type,
+	 * not R1 plus a data block.
+	 */
+	cmd.flags = MMC_RSP_SPI_R1 | MMC_RSP_R1 | MMC_CMD_ADTC;
+
+	data.blksz = len;
+	data.blocks = 1;
+	if (opcode == MMC_BUS_TEST_R)
+		data.flags = MMC_DATA_READ;
+	else
+		data.flags = MMC_DATA_WRITE;
+
+	data.sg = &sg;
+	data.sg_len = 1;
+	sg_init_one(&sg, data_buf, len);
+	mmc_wait_for_req(host, &mrq);
+	err = 0;
+	if (opcode == MMC_BUS_TEST_R) {
+		for (i = 0; i < len / 4; i++)
+			if ((test_buf[i] ^ data_buf[i]) != 0xff) {
+				err = -EIO;
+				break;
+			}
+	}
+	kfree(data_buf);
+
+	if (cmd.error)
+		return cmd.error;
+	if (data.error)
+		return data.error;
+
+	return err;
+}
+
+int mmc_bus_test(struct mmc_card *card, u8 bus_width)
+{
+	int err, width;
+
+	if (bus_width == MMC_BUS_WIDTH_8)
+		width = 8;
+	else if (bus_width == MMC_BUS_WIDTH_4)
+		width = 4;
+	else if (bus_width == MMC_BUS_WIDTH_1)
+		return 0; /* no need for test */
+	else
+		return -EINVAL;
+
+	/*
+	 * Ignore errors from BUS_TEST_W.  BUS_TEST_R will fail if there
+	 * is a problem.  This improves chances that the test will work.
+	 */
+	mmc_send_bus_test(card, card->host, MMC_BUS_TEST_W, width);
+	err = mmc_send_bus_test(card, card->host, MMC_BUS_TEST_R, width);
+	return err;
+}
diff --git a/drivers/mmc/core/mmc_ops.h b/drivers/mmc/core/mmc_ops.h
index 653eb8e84178..e6d44b8a18db 100644
--- a/drivers/mmc/core/mmc_ops.h
+++ b/drivers/mmc/core/mmc_ops.h
@@ -26,6 +26,7 @@ int mmc_send_cid(struct mmc_host *host, u32 *cid);
 int mmc_spi_read_ocr(struct mmc_host *host, int highcap, u32 *ocrp);
 int mmc_spi_set_crc(struct mmc_host *host, int use_crc);
 int mmc_card_sleepawake(struct mmc_host *host, int sleep);
+int mmc_bus_test(struct mmc_card *card, u8 bus_width);
 
 #endif
 
diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c
index 55698864c2cd..d5febe584b05 100644
--- a/drivers/mmc/host/sdhci.c
+++ b/drivers/mmc/host/sdhci.c
@@ -23,6 +23,7 @@
 
 #include <linux/leds.h>
 
+#include <linux/mmc/mmc.h>
 #include <linux/mmc/host.h>
 
 #include "sdhci.h"
@@ -1521,7 +1522,11 @@ static void sdhci_data_irq(struct sdhci_host *host, u32 intmask)
 
 	if (intmask & SDHCI_INT_DATA_TIMEOUT)
 		host->data->error = -ETIMEDOUT;
-	else if (intmask & (SDHCI_INT_DATA_CRC | SDHCI_INT_DATA_END_BIT))
+	else if (intmask & SDHCI_INT_DATA_END_BIT)
+		host->data->error = -EILSEQ;
+	else if ((intmask & SDHCI_INT_DATA_CRC) &&
+		SDHCI_GET_CMD(sdhci_readw(host, SDHCI_COMMAND))
+			!= MMC_BUS_TEST_R)
 		host->data->error = -EILSEQ;
 	else if (intmask & SDHCI_INT_ADMA_ERROR) {
 		printk(KERN_ERR "%s: ADMA error\n", mmc_hostname(host->mmc));
diff --git a/drivers/mmc/host/sdhci.h b/drivers/mmc/host/sdhci.h
index 1efe7dc5255b..6e0969e40650 100644
--- a/drivers/mmc/host/sdhci.h
+++ b/drivers/mmc/host/sdhci.h
@@ -52,6 +52,7 @@
 #define  SDHCI_CMD_RESP_SHORT_BUSY 0x03
 
 #define SDHCI_MAKE_CMD(c, f) (((c & 0xff) << 8) | (f & 0xff))
+#define SDHCI_GET_CMD(c) ((c>>8) & 0x3f)
 
 #define SDHCI_RESPONSE		0x10
 
diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h
index 3a85e73a38a9..bcb793ec7374 100644
--- a/include/linux/mmc/host.h
+++ b/include/linux/mmc/host.h
@@ -172,6 +172,7 @@ struct mmc_host {
 #define MMC_CAP_1_2V_DDR	(1 << 12)	/* can support */
 						/* DDR mode at 1.2V */
 #define MMC_CAP_POWER_OFF_CARD	(1 << 13)	/* Can power off after boot */
+#define MMC_CAP_BUS_WIDTH_TEST	(1 << 14)	/* CMD14/CMD19 bus width ok */
 
 	mmc_pm_flag_t		pm_caps;	/* supported pm features */
 
diff --git a/include/linux/mmc/mmc.h b/include/linux/mmc/mmc.h
index 956fbd877692..612301f85d14 100644
--- a/include/linux/mmc/mmc.h
+++ b/include/linux/mmc/mmc.h
@@ -40,7 +40,9 @@
 #define MMC_READ_DAT_UNTIL_STOP  11   /* adtc [31:0] dadr        R1  */
 #define MMC_STOP_TRANSMISSION    12   /* ac                      R1b */
 #define MMC_SEND_STATUS          13   /* ac   [31:16] RCA        R1  */
+#define MMC_BUS_TEST_R           14   /* adtc                    R1  */
 #define MMC_GO_INACTIVE_STATE    15   /* ac   [31:16] RCA            */
+#define MMC_BUS_TEST_W           19   /* adtc                    R1  */
 #define MMC_SPI_READ_OCR         58   /* spi                  spi_R3 */
 #define MMC_SPI_CRC_ON_OFF       59   /* spi  [0:0] flag      spi_R1 */
 
-- 
cgit v1.2.3-71-gd317


From 30652aa36b58d57fcc1a0acce51e391bbb6edf5e Mon Sep 17 00:00:00 2001
From: Olof Johansson <olof@lixom.net>
Date: Sat, 1 Jan 2011 18:37:32 -0600
Subject: mmc: sdhci: add quirk for max len ADMA descriptors

Some controllers misparse segment length 0 as being 0, not 65536. Add
a quirk to deal with it.

Signed-off-by: Olof Johansson <olof@lixom.net>
Reviewed-by: Chris Ball <cjb@laptop.org>
Signed-off-by: Chris Ball <cjb@laptop.org>
---
 drivers/mmc/host/sdhci.c  | 10 +++++++---
 include/linux/mmc/sdhci.h |  2 ++
 2 files changed, 9 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c
index d5febe584b05..9e15f41f87be 100644
--- a/drivers/mmc/host/sdhci.c
+++ b/drivers/mmc/host/sdhci.c
@@ -1949,10 +1949,14 @@ int sdhci_add_host(struct sdhci_host *host)
 	 * of bytes. When doing hardware scatter/gather, each entry cannot
 	 * be larger than 64 KiB though.
 	 */
-	if (host->flags & SDHCI_USE_ADMA)
-		mmc->max_seg_size = 65536;
-	else
+	if (host->flags & SDHCI_USE_ADMA) {
+		if (host->quirks & SDHCI_QUIRK_BROKEN_ADMA_ZEROLEN_DESC)
+			mmc->max_seg_size = 65535;
+		else
+			mmc->max_seg_size = 65536;
+	} else {
 		mmc->max_seg_size = mmc->max_req_size;
+	}
 
 	/*
 	 * Maximum block size. This varies from controller to controller and
diff --git a/include/linux/mmc/sdhci.h b/include/linux/mmc/sdhci.h
index 0d953f513d81..83bd9f76709a 100644
--- a/include/linux/mmc/sdhci.h
+++ b/include/linux/mmc/sdhci.h
@@ -83,6 +83,8 @@ struct sdhci_host {
 #define SDHCI_QUIRK_MULTIBLOCK_READ_ACMD12		(1<<28)
 /* Controller doesn't have HISPD bit field in HI-SPEED SD card */
 #define SDHCI_QUIRK_NO_HISPD_BIT			(1<<29)
+/* Controller treats ADMA descriptors with length 0000h incorrectly */
+#define SDHCI_QUIRK_BROKEN_ADMA_ZEROLEN_DESC		(1<<30)
 
 	int irq;		/* Device IRQ */
 	void __iomem *ioaddr;	/* Mapped address */
-- 
cgit v1.2.3-71-gd317


From f95f3850f7a9e1d49ebc5b6e72e7cc3ec3685b0b Mon Sep 17 00:00:00 2001
From: Will Newton <will.newton@gmail.com>
Date: Sun, 2 Jan 2011 01:11:59 -0500
Subject: mmc: dw_mmc: Add Synopsys DesignWare mmc host driver.

This adds the mmc host driver for the Synopsys DesignWare mmc
host controller, found in a number of embedded SoC designs.

Signed-off-by: Will Newton <will.newton@imgtec.com>
Reviewed-by: Matt Fleming <matt@console-pimps.org>
Reviewed-by: Chris Ball <cjb@laptop.org>
Signed-off-by: Chris Ball <cjb@laptop.org>
---
 drivers/mmc/host/Kconfig   |   16 +
 drivers/mmc/host/Makefile  |    1 +
 drivers/mmc/host/dw_mmc.c  | 1796 ++++++++++++++++++++++++++++++++++++++++++++
 drivers/mmc/host/dw_mmc.h  |  168 +++++
 include/linux/mmc/dw_mmc.h |  217 ++++++
 5 files changed, 2198 insertions(+)
 create mode 100644 drivers/mmc/host/dw_mmc.c
 create mode 100644 drivers/mmc/host/dw_mmc.h
 create mode 100644 include/linux/mmc/dw_mmc.h

(limited to 'include/linux')

diff --git a/drivers/mmc/host/Kconfig b/drivers/mmc/host/Kconfig
index 9f47d38dcc7f..5ec02a536c5d 100644
--- a/drivers/mmc/host/Kconfig
+++ b/drivers/mmc/host/Kconfig
@@ -479,6 +479,22 @@ config SDH_BFIN_MISSING_CMD_PULLUP_WORKAROUND
 	help
 	  If you say yes here SD-Cards may work on the EZkit.
 
+config MMC_DW
+	tristate "Synopsys DesignWare Memory Card Interface"
+	depends on ARM
+	help
+	  This selects support for the Synopsys DesignWare Mobile Storage IP
+	  block, this provides host support for SD and MMC interfaces, in both
+	  PIO and external DMA modes.
+
+config MMC_DW_IDMAC
+	bool "Internal DMAC interface"
+	depends on MMC_DW
+	help
+	  This selects support for the internal DMAC block within the Synopsys
+	  Designware Mobile Storage IP block. This disables the external DMA
+	  interface.
+
 config MMC_SH_MMCIF
 	tristate "SuperH Internal MMCIF support"
 	depends on MMC_BLOCK && (SUPERH || ARCH_SHMOBILE)
diff --git a/drivers/mmc/host/Makefile b/drivers/mmc/host/Makefile
index 6d1ff9e27368..e834fb223e9a 100644
--- a/drivers/mmc/host/Makefile
+++ b/drivers/mmc/host/Makefile
@@ -31,6 +31,7 @@ obj-$(CONFIG_MMC_TMIO)		+= tmio_mmc.o
 obj-$(CONFIG_MMC_CB710)	+= cb710-mmc.o
 obj-$(CONFIG_MMC_VIA_SDMMC)	+= via-sdmmc.o
 obj-$(CONFIG_SDH_BFIN)		+= bfin_sdh.o
+obj-$(CONFIG_MMC_DW)		+= dw_mmc.o
 obj-$(CONFIG_MMC_SH_MMCIF)	+= sh_mmcif.o
 obj-$(CONFIG_MMC_JZ4740)	+= jz4740_mmc.o
 obj-$(CONFIG_MMC_USHC)		+= ushc.o
diff --git a/drivers/mmc/host/dw_mmc.c b/drivers/mmc/host/dw_mmc.c
new file mode 100644
index 000000000000..2fcc82577c1b
--- /dev/null
+++ b/drivers/mmc/host/dw_mmc.c
@@ -0,0 +1,1796 @@
+/*
+ * Synopsys DesignWare Multimedia Card Interface driver
+ *  (Based on NXP driver for lpc 31xx)
+ *
+ * Copyright (C) 2009 NXP Semiconductors
+ * Copyright (C) 2009, 2010 Imagination Technologies Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include <linux/blkdev.h>
+#include <linux/clk.h>
+#include <linux/debugfs.h>
+#include <linux/device.h>
+#include <linux/dma-mapping.h>
+#include <linux/err.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/ioport.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/scatterlist.h>
+#include <linux/seq_file.h>
+#include <linux/slab.h>
+#include <linux/stat.h>
+#include <linux/delay.h>
+#include <linux/irq.h>
+#include <linux/mmc/host.h>
+#include <linux/mmc/mmc.h>
+#include <linux/mmc/dw_mmc.h>
+#include <linux/bitops.h>
+
+#include "dw_mmc.h"
+
+/* Common flag combinations */
+#define DW_MCI_DATA_ERROR_FLAGS	(SDMMC_INT_DTO | SDMMC_INT_DCRC | \
+				 SDMMC_INT_HTO | SDMMC_INT_SBE  | \
+				 SDMMC_INT_EBE)
+#define DW_MCI_CMD_ERROR_FLAGS	(SDMMC_INT_RTO | SDMMC_INT_RCRC | \
+				 SDMMC_INT_RESP_ERR)
+#define DW_MCI_ERROR_FLAGS	(DW_MCI_DATA_ERROR_FLAGS | \
+				 DW_MCI_CMD_ERROR_FLAGS  | SDMMC_INT_HLE)
+#define DW_MCI_SEND_STATUS	1
+#define DW_MCI_RECV_STATUS	2
+#define DW_MCI_DMA_THRESHOLD	16
+
+#ifdef CONFIG_MMC_DW_IDMAC
+struct idmac_desc {
+	u32		des0;	/* Control Descriptor */
+#define IDMAC_DES0_DIC	BIT(1)
+#define IDMAC_DES0_LD	BIT(2)
+#define IDMAC_DES0_FD	BIT(3)
+#define IDMAC_DES0_CH	BIT(4)
+#define IDMAC_DES0_ER	BIT(5)
+#define IDMAC_DES0_CES	BIT(30)
+#define IDMAC_DES0_OWN	BIT(31)
+
+	u32		des1;	/* Buffer sizes */
+#define IDMAC_SET_BUFFER1_SIZE(d, s) \
+	((d)->des1 = ((d)->des1 & 0x03ffc000) | ((s) & 0x3fff))
+
+	u32		des2;	/* buffer 1 physical address */
+
+	u32		des3;	/* buffer 2 physical address */
+};
+#endif /* CONFIG_MMC_DW_IDMAC */
+
+/**
+ * struct dw_mci_slot - MMC slot state
+ * @mmc: The mmc_host representing this slot.
+ * @host: The MMC controller this slot is using.
+ * @ctype: Card type for this slot.
+ * @mrq: mmc_request currently being processed or waiting to be
+ *	processed, or NULL when the slot is idle.
+ * @queue_node: List node for placing this node in the @queue list of
+ *	&struct dw_mci.
+ * @clock: Clock rate configured by set_ios(). Protected by host->lock.
+ * @flags: Random state bits associated with the slot.
+ * @id: Number of this slot.
+ * @last_detect_state: Most recently observed card detect state.
+ */
+struct dw_mci_slot {
+	struct mmc_host		*mmc;
+	struct dw_mci		*host;
+
+	u32			ctype;
+
+	struct mmc_request	*mrq;
+	struct list_head	queue_node;
+
+	unsigned int		clock;
+	unsigned long		flags;
+#define DW_MMC_CARD_PRESENT	0
+#define DW_MMC_CARD_NEED_INIT	1
+	int			id;
+	int			last_detect_state;
+};
+
+#if defined(CONFIG_DEBUG_FS)
+static int dw_mci_req_show(struct seq_file *s, void *v)
+{
+	struct dw_mci_slot *slot = s->private;
+	struct mmc_request *mrq;
+	struct mmc_command *cmd;
+	struct mmc_command *stop;
+	struct mmc_data	*data;
+
+	/* Make sure we get a consistent snapshot */
+	spin_lock_bh(&slot->host->lock);
+	mrq = slot->mrq;
+
+	if (mrq) {
+		cmd = mrq->cmd;
+		data = mrq->data;
+		stop = mrq->stop;
+
+		if (cmd)
+			seq_printf(s,
+				   "CMD%u(0x%x) flg %x rsp %x %x %x %x err %d\n",
+				   cmd->opcode, cmd->arg, cmd->flags,
+				   cmd->resp[0], cmd->resp[1], cmd->resp[2],
+				   cmd->resp[2], cmd->error);
+		if (data)
+			seq_printf(s, "DATA %u / %u * %u flg %x err %d\n",
+				   data->bytes_xfered, data->blocks,
+				   data->blksz, data->flags, data->error);
+		if (stop)
+			seq_printf(s,
+				   "CMD%u(0x%x) flg %x rsp %x %x %x %x err %d\n",
+				   stop->opcode, stop->arg, stop->flags,
+				   stop->resp[0], stop->resp[1], stop->resp[2],
+				   stop->resp[2], stop->error);
+	}
+
+	spin_unlock_bh(&slot->host->lock);
+
+	return 0;
+}
+
+static int dw_mci_req_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, dw_mci_req_show, inode->i_private);
+}
+
+static const struct file_operations dw_mci_req_fops = {
+	.owner		= THIS_MODULE,
+	.open		= dw_mci_req_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
+static int dw_mci_regs_show(struct seq_file *s, void *v)
+{
+	seq_printf(s, "STATUS:\t0x%08x\n", SDMMC_STATUS);
+	seq_printf(s, "RINTSTS:\t0x%08x\n", SDMMC_RINTSTS);
+	seq_printf(s, "CMD:\t0x%08x\n", SDMMC_CMD);
+	seq_printf(s, "CTRL:\t0x%08x\n", SDMMC_CTRL);
+	seq_printf(s, "INTMASK:\t0x%08x\n", SDMMC_INTMASK);
+	seq_printf(s, "CLKENA:\t0x%08x\n", SDMMC_CLKENA);
+
+	return 0;
+}
+
+static int dw_mci_regs_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, dw_mci_regs_show, inode->i_private);
+}
+
+static const struct file_operations dw_mci_regs_fops = {
+	.owner		= THIS_MODULE,
+	.open		= dw_mci_regs_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
+static void dw_mci_init_debugfs(struct dw_mci_slot *slot)
+{
+	struct mmc_host	*mmc = slot->mmc;
+	struct dw_mci *host = slot->host;
+	struct dentry *root;
+	struct dentry *node;
+
+	root = mmc->debugfs_root;
+	if (!root)
+		return;
+
+	node = debugfs_create_file("regs", S_IRUSR, root, host,
+				   &dw_mci_regs_fops);
+	if (!node)
+		goto err;
+
+	node = debugfs_create_file("req", S_IRUSR, root, slot,
+				   &dw_mci_req_fops);
+	if (!node)
+		goto err;
+
+	node = debugfs_create_u32("state", S_IRUSR, root, (u32 *)&host->state);
+	if (!node)
+		goto err;
+
+	node = debugfs_create_x32("pending_events", S_IRUSR, root,
+				  (u32 *)&host->pending_events);
+	if (!node)
+		goto err;
+
+	node = debugfs_create_x32("completed_events", S_IRUSR, root,
+				  (u32 *)&host->completed_events);
+	if (!node)
+		goto err;
+
+	return;
+
+err:
+	dev_err(&mmc->class_dev, "failed to initialize debugfs for slot\n");
+}
+#endif /* defined(CONFIG_DEBUG_FS) */
+
+static void dw_mci_set_timeout(struct dw_mci *host)
+{
+	/* timeout (maximum) */
+	mci_writel(host, TMOUT, 0xffffffff);
+}
+
+static u32 dw_mci_prepare_command(struct mmc_host *mmc, struct mmc_command *cmd)
+{
+	struct mmc_data	*data;
+	u32 cmdr;
+	cmd->error = -EINPROGRESS;
+
+	cmdr = cmd->opcode;
+
+	if (cmdr == MMC_STOP_TRANSMISSION)
+		cmdr |= SDMMC_CMD_STOP;
+	else
+		cmdr |= SDMMC_CMD_PRV_DAT_WAIT;
+
+	if (cmd->flags & MMC_RSP_PRESENT) {
+		/* We expect a response, so set this bit */
+		cmdr |= SDMMC_CMD_RESP_EXP;
+		if (cmd->flags & MMC_RSP_136)
+			cmdr |= SDMMC_CMD_RESP_LONG;
+	}
+
+	if (cmd->flags & MMC_RSP_CRC)
+		cmdr |= SDMMC_CMD_RESP_CRC;
+
+	data = cmd->data;
+	if (data) {
+		cmdr |= SDMMC_CMD_DAT_EXP;
+		if (data->flags & MMC_DATA_STREAM)
+			cmdr |= SDMMC_CMD_STRM_MODE;
+		if (data->flags & MMC_DATA_WRITE)
+			cmdr |= SDMMC_CMD_DAT_WR;
+	}
+
+	return cmdr;
+}
+
+static void dw_mci_start_command(struct dw_mci *host,
+				 struct mmc_command *cmd, u32 cmd_flags)
+{
+	host->cmd = cmd;
+	dev_vdbg(&host->pdev->dev,
+		 "start command: ARGR=0x%08x CMDR=0x%08x\n",
+		 cmd->arg, cmd_flags);
+
+	mci_writel(host, CMDARG, cmd->arg);
+	wmb();
+
+	mci_writel(host, CMD, cmd_flags | SDMMC_CMD_START);
+}
+
+static void send_stop_cmd(struct dw_mci *host, struct mmc_data *data)
+{
+	dw_mci_start_command(host, data->stop, host->stop_cmdr);
+}
+
+/* DMA interface functions */
+static void dw_mci_stop_dma(struct dw_mci *host)
+{
+	if (host->use_dma) {
+		host->dma_ops->stop(host);
+		host->dma_ops->cleanup(host);
+	} else {
+		/* Data transfer was stopped by the interrupt handler */
+		set_bit(EVENT_XFER_COMPLETE, &host->pending_events);
+	}
+}
+
+#ifdef CONFIG_MMC_DW_IDMAC
+static void dw_mci_dma_cleanup(struct dw_mci *host)
+{
+	struct mmc_data *data = host->data;
+
+	if (data)
+		dma_unmap_sg(&host->pdev->dev, data->sg, data->sg_len,
+			     ((data->flags & MMC_DATA_WRITE)
+			      ? DMA_TO_DEVICE : DMA_FROM_DEVICE));
+}
+
+static void dw_mci_idmac_stop_dma(struct dw_mci *host)
+{
+	u32 temp;
+
+	/* Disable and reset the IDMAC interface */
+	temp = mci_readl(host, CTRL);
+	temp &= ~SDMMC_CTRL_USE_IDMAC;
+	temp |= SDMMC_CTRL_DMA_RESET;
+	mci_writel(host, CTRL, temp);
+
+	/* Stop the IDMAC running */
+	temp = mci_readl(host, BMOD);
+	temp &= ~SDMMC_IDMAC_ENABLE;
+	mci_writel(host, BMOD, temp);
+}
+
+static void dw_mci_idmac_complete_dma(struct dw_mci *host)
+{
+	struct mmc_data *data = host->data;
+
+	dev_vdbg(&host->pdev->dev, "DMA complete\n");
+
+	host->dma_ops->cleanup(host);
+
+	/*
+	 * If the card was removed, data will be NULL. No point in trying to
+	 * send the stop command or waiting for NBUSY in this case.
+	 */
+	if (data) {
+		set_bit(EVENT_XFER_COMPLETE, &host->pending_events);
+		tasklet_schedule(&host->tasklet);
+	}
+}
+
+static void dw_mci_translate_sglist(struct dw_mci *host, struct mmc_data *data,
+				    unsigned int sg_len)
+{
+	int i;
+	struct idmac_desc *desc = host->sg_cpu;
+
+	for (i = 0; i < sg_len; i++, desc++) {
+		unsigned int length = sg_dma_len(&data->sg[i]);
+		u32 mem_addr = sg_dma_address(&data->sg[i]);
+
+		/* Set the OWN bit and disable interrupts for this descriptor */
+		desc->des0 = IDMAC_DES0_OWN | IDMAC_DES0_DIC | IDMAC_DES0_CH;
+
+		/* Buffer length */
+		IDMAC_SET_BUFFER1_SIZE(desc, length);
+
+		/* Physical address to DMA to/from */
+		desc->des2 = mem_addr;
+	}
+
+	/* Set first descriptor */
+	desc = host->sg_cpu;
+	desc->des0 |= IDMAC_DES0_FD;
+
+	/* Set last descriptor */
+	desc = host->sg_cpu + (i - 1) * sizeof(struct idmac_desc);
+	desc->des0 &= ~(IDMAC_DES0_CH | IDMAC_DES0_DIC);
+	desc->des0 |= IDMAC_DES0_LD;
+
+	wmb();
+}
+
+static void dw_mci_idmac_start_dma(struct dw_mci *host, unsigned int sg_len)
+{
+	u32 temp;
+
+	dw_mci_translate_sglist(host, host->data, sg_len);
+
+	/* Select IDMAC interface */
+	temp = mci_readl(host, CTRL);
+	temp |= SDMMC_CTRL_USE_IDMAC;
+	mci_writel(host, CTRL, temp);
+
+	wmb();
+
+	/* Enable the IDMAC */
+	temp = mci_readl(host, BMOD);
+	temp |= SDMMC_IDMAC_ENABLE;
+	mci_writel(host, BMOD, temp);
+
+	/* Start it running */
+	mci_writel(host, PLDMND, 1);
+}
+
+static int dw_mci_idmac_init(struct dw_mci *host)
+{
+	struct idmac_desc *p;
+	int i;
+
+	/* Number of descriptors in the ring buffer */
+	host->ring_size = PAGE_SIZE / sizeof(struct idmac_desc);
+
+	/* Forward link the descriptor list */
+	for (i = 0, p = host->sg_cpu; i < host->ring_size - 1; i++, p++)
+		p->des3 = host->sg_dma + (sizeof(struct idmac_desc) * (i + 1));
+
+	/* Set the last descriptor as the end-of-ring descriptor */
+	p->des3 = host->sg_dma;
+	p->des0 = IDMAC_DES0_ER;
+
+	/* Mask out interrupts - get Tx & Rx complete only */
+	mci_writel(host, IDINTEN, SDMMC_IDMAC_INT_NI | SDMMC_IDMAC_INT_RI |
+		   SDMMC_IDMAC_INT_TI);
+
+	/* Set the descriptor base address */
+	mci_writel(host, DBADDR, host->sg_dma);
+	return 0;
+}
+
+static struct dw_mci_dma_ops dw_mci_idmac_ops = {
+	.init = dw_mci_idmac_init,
+	.start = dw_mci_idmac_start_dma,
+	.stop = dw_mci_idmac_stop_dma,
+	.complete = dw_mci_idmac_complete_dma,
+	.cleanup = dw_mci_dma_cleanup,
+};
+#endif /* CONFIG_MMC_DW_IDMAC */
+
+static int dw_mci_submit_data_dma(struct dw_mci *host, struct mmc_data *data)
+{
+	struct scatterlist *sg;
+	unsigned int i, direction, sg_len;
+	u32 temp;
+
+	/* If we don't have a channel, we can't do DMA */
+	if (!host->use_dma)
+		return -ENODEV;
+
+	/*
+	 * We don't do DMA on "complex" transfers, i.e. with
+	 * non-word-aligned buffers or lengths. Also, we don't bother
+	 * with all the DMA setup overhead for short transfers.
+	 */
+	if (data->blocks * data->blksz < DW_MCI_DMA_THRESHOLD)
+		return -EINVAL;
+	if (data->blksz & 3)
+		return -EINVAL;
+
+	for_each_sg(data->sg, sg, data->sg_len, i) {
+		if (sg->offset & 3 || sg->length & 3)
+			return -EINVAL;
+	}
+
+	if (data->flags & MMC_DATA_READ)
+		direction = DMA_FROM_DEVICE;
+	else
+		direction = DMA_TO_DEVICE;
+
+	sg_len = dma_map_sg(&host->pdev->dev, data->sg, data->sg_len,
+			    direction);
+
+	dev_vdbg(&host->pdev->dev,
+		 "sd sg_cpu: %#lx sg_dma: %#lx sg_len: %d\n",
+		 (unsigned long)host->sg_cpu, (unsigned long)host->sg_dma,
+		 sg_len);
+
+	/* Enable the DMA interface */
+	temp = mci_readl(host, CTRL);
+	temp |= SDMMC_CTRL_DMA_ENABLE;
+	mci_writel(host, CTRL, temp);
+
+	/* Disable RX/TX IRQs, let DMA handle it */
+	temp = mci_readl(host, INTMASK);
+	temp  &= ~(SDMMC_INT_RXDR | SDMMC_INT_TXDR);
+	mci_writel(host, INTMASK, temp);
+
+	host->dma_ops->start(host, sg_len);
+
+	return 0;
+}
+
+static void dw_mci_submit_data(struct dw_mci *host, struct mmc_data *data)
+{
+	u32 temp;
+
+	data->error = -EINPROGRESS;
+
+	WARN_ON(host->data);
+	host->sg = NULL;
+	host->data = data;
+
+	if (dw_mci_submit_data_dma(host, data)) {
+		host->sg = data->sg;
+		host->pio_offset = 0;
+		if (data->flags & MMC_DATA_READ)
+			host->dir_status = DW_MCI_RECV_STATUS;
+		else
+			host->dir_status = DW_MCI_SEND_STATUS;
+
+		temp = mci_readl(host, INTMASK);
+		temp |= SDMMC_INT_TXDR | SDMMC_INT_RXDR;
+		mci_writel(host, INTMASK, temp);
+
+		temp = mci_readl(host, CTRL);
+		temp &= ~SDMMC_CTRL_DMA_ENABLE;
+		mci_writel(host, CTRL, temp);
+	}
+}
+
+static void mci_send_cmd(struct dw_mci_slot *slot, u32 cmd, u32 arg)
+{
+	struct dw_mci *host = slot->host;
+	unsigned long timeout = jiffies + msecs_to_jiffies(500);
+	unsigned int cmd_status = 0;
+
+	mci_writel(host, CMDARG, arg);
+	wmb();
+	mci_writel(host, CMD, SDMMC_CMD_START | cmd);
+
+	while (time_before(jiffies, timeout)) {
+		cmd_status = mci_readl(host, CMD);
+		if (!(cmd_status & SDMMC_CMD_START))
+			return;
+	}
+	dev_err(&slot->mmc->class_dev,
+		"Timeout sending command (cmd %#x arg %#x status %#x)\n",
+		cmd, arg, cmd_status);
+}
+
+static void dw_mci_setup_bus(struct dw_mci_slot *slot)
+{
+	struct dw_mci *host = slot->host;
+	u32 div;
+
+	if (slot->clock != host->current_speed) {
+		if (host->bus_hz % slot->clock)
+			/*
+			 * move the + 1 after the divide to prevent
+			 * over-clocking the card.
+			 */
+			div = ((host->bus_hz / slot->clock) >> 1) + 1;
+		else
+			div = (host->bus_hz  / slot->clock) >> 1;
+
+		dev_info(&slot->mmc->class_dev,
+			 "Bus speed (slot %d) = %dHz (slot req %dHz, actual %dHZ"
+			 " div = %d)\n", slot->id, host->bus_hz, slot->clock,
+			 div ? ((host->bus_hz / div) >> 1) : host->bus_hz, div);
+
+		/* disable clock */
+		mci_writel(host, CLKENA, 0);
+		mci_writel(host, CLKSRC, 0);
+
+		/* inform CIU */
+		mci_send_cmd(slot,
+			     SDMMC_CMD_UPD_CLK | SDMMC_CMD_PRV_DAT_WAIT, 0);
+
+		/* set clock to desired speed */
+		mci_writel(host, CLKDIV, div);
+
+		/* inform CIU */
+		mci_send_cmd(slot,
+			     SDMMC_CMD_UPD_CLK | SDMMC_CMD_PRV_DAT_WAIT, 0);
+
+		/* enable clock */
+		mci_writel(host, CLKENA, SDMMC_CLKEN_ENABLE);
+
+		/* inform CIU */
+		mci_send_cmd(slot,
+			     SDMMC_CMD_UPD_CLK | SDMMC_CMD_PRV_DAT_WAIT, 0);
+
+		host->current_speed = slot->clock;
+	}
+
+	/* Set the current slot bus width */
+	mci_writel(host, CTYPE, slot->ctype);
+}
+
+static void dw_mci_start_request(struct dw_mci *host,
+				 struct dw_mci_slot *slot)
+{
+	struct mmc_request *mrq;
+	struct mmc_command *cmd;
+	struct mmc_data	*data;
+	u32 cmdflags;
+
+	mrq = slot->mrq;
+	if (host->pdata->select_slot)
+		host->pdata->select_slot(slot->id);
+
+	/* Slot specific timing and width adjustment */
+	dw_mci_setup_bus(slot);
+
+	host->cur_slot = slot;
+	host->mrq = mrq;
+
+	host->pending_events = 0;
+	host->completed_events = 0;
+	host->data_status = 0;
+
+	data = mrq->data;
+	if (data) {
+		dw_mci_set_timeout(host);
+		mci_writel(host, BYTCNT, data->blksz*data->blocks);
+		mci_writel(host, BLKSIZ, data->blksz);
+	}
+
+	cmd = mrq->cmd;
+	cmdflags = dw_mci_prepare_command(slot->mmc, cmd);
+
+	/* this is the first command, send the initialization clock */
+	if (test_and_clear_bit(DW_MMC_CARD_NEED_INIT, &slot->flags))
+		cmdflags |= SDMMC_CMD_INIT;
+
+	if (data) {
+		dw_mci_submit_data(host, data);
+		wmb();
+	}
+
+	dw_mci_start_command(host, cmd, cmdflags);
+
+	if (mrq->stop)
+		host->stop_cmdr = dw_mci_prepare_command(slot->mmc, mrq->stop);
+}
+
+static void dw_mci_queue_request(struct dw_mci *host, struct dw_mci_slot *slot,
+				 struct mmc_request *mrq)
+{
+	dev_vdbg(&slot->mmc->class_dev, "queue request: state=%d\n",
+		 host->state);
+
+	spin_lock_bh(&host->lock);
+	slot->mrq = mrq;
+
+	if (host->state == STATE_IDLE) {
+		host->state = STATE_SENDING_CMD;
+		dw_mci_start_request(host, slot);
+	} else {
+		list_add_tail(&slot->queue_node, &host->queue);
+	}
+
+	spin_unlock_bh(&host->lock);
+}
+
+static void dw_mci_request(struct mmc_host *mmc, struct mmc_request *mrq)
+{
+	struct dw_mci_slot *slot = mmc_priv(mmc);
+	struct dw_mci *host = slot->host;
+
+	WARN_ON(slot->mrq);
+
+	if (!test_bit(DW_MMC_CARD_PRESENT, &slot->flags)) {
+		mrq->cmd->error = -ENOMEDIUM;
+		mmc_request_done(mmc, mrq);
+		return;
+	}
+
+	/* We don't support multiple blocks of weird lengths. */
+	dw_mci_queue_request(host, slot, mrq);
+}
+
+static void dw_mci_set_ios(struct mmc_host *mmc, struct mmc_ios *ios)
+{
+	struct dw_mci_slot *slot = mmc_priv(mmc);
+
+	/* set default 1 bit mode */
+	slot->ctype = SDMMC_CTYPE_1BIT;
+
+	switch (ios->bus_width) {
+	case MMC_BUS_WIDTH_1:
+		slot->ctype = SDMMC_CTYPE_1BIT;
+		break;
+	case MMC_BUS_WIDTH_4:
+		slot->ctype = SDMMC_CTYPE_4BIT;
+		break;
+	}
+
+	if (ios->clock) {
+		/*
+		 * Use mirror of ios->clock to prevent race with mmc
+		 * core ios update when finding the minimum.
+		 */
+		slot->clock = ios->clock;
+	}
+
+	switch (ios->power_mode) {
+	case MMC_POWER_UP:
+		set_bit(DW_MMC_CARD_NEED_INIT, &slot->flags);
+		break;
+	default:
+		break;
+	}
+}
+
+static int dw_mci_get_ro(struct mmc_host *mmc)
+{
+	int read_only;
+	struct dw_mci_slot *slot = mmc_priv(mmc);
+	struct dw_mci_board *brd = slot->host->pdata;
+
+	/* Use platform get_ro function, else try on board write protect */
+	if (brd->get_ro)
+		read_only = brd->get_ro(slot->id);
+	else
+		read_only =
+			mci_readl(slot->host, WRTPRT) & (1 << slot->id) ? 1 : 0;
+
+	dev_dbg(&mmc->class_dev, "card is %s\n",
+		read_only ? "read-only" : "read-write");
+
+	return read_only;
+}
+
+static int dw_mci_get_cd(struct mmc_host *mmc)
+{
+	int present;
+	struct dw_mci_slot *slot = mmc_priv(mmc);
+	struct dw_mci_board *brd = slot->host->pdata;
+
+	/* Use platform get_cd function, else try onboard card detect */
+	if (brd->get_cd)
+		present = !brd->get_cd(slot->id);
+	else
+		present = (mci_readl(slot->host, CDETECT) & (1 << slot->id))
+			== 0 ? 1 : 0;
+
+	if (present)
+		dev_dbg(&mmc->class_dev, "card is present\n");
+	else
+		dev_dbg(&mmc->class_dev, "card is not present\n");
+
+	return present;
+}
+
+static const struct mmc_host_ops dw_mci_ops = {
+	.request	= dw_mci_request,
+	.set_ios	= dw_mci_set_ios,
+	.get_ro		= dw_mci_get_ro,
+	.get_cd		= dw_mci_get_cd,
+};
+
+static void dw_mci_request_end(struct dw_mci *host, struct mmc_request *mrq)
+	__releases(&host->lock)
+	__acquires(&host->lock)
+{
+	struct dw_mci_slot *slot;
+	struct mmc_host	*prev_mmc = host->cur_slot->mmc;
+
+	WARN_ON(host->cmd || host->data);
+
+	host->cur_slot->mrq = NULL;
+	host->mrq = NULL;
+	if (!list_empty(&host->queue)) {
+		slot = list_entry(host->queue.next,
+				  struct dw_mci_slot, queue_node);
+		list_del(&slot->queue_node);
+		dev_vdbg(&host->pdev->dev, "list not empty: %s is next\n",
+			 mmc_hostname(slot->mmc));
+		host->state = STATE_SENDING_CMD;
+		dw_mci_start_request(host, slot);
+	} else {
+		dev_vdbg(&host->pdev->dev, "list empty\n");
+		host->state = STATE_IDLE;
+	}
+
+	spin_unlock(&host->lock);
+	mmc_request_done(prev_mmc, mrq);
+	spin_lock(&host->lock);
+}
+
+static void dw_mci_command_complete(struct dw_mci *host, struct mmc_command *cmd)
+{
+	u32 status = host->cmd_status;
+
+	host->cmd_status = 0;
+
+	/* Read the response from the card (up to 16 bytes) */
+	if (cmd->flags & MMC_RSP_PRESENT) {
+		if (cmd->flags & MMC_RSP_136) {
+			cmd->resp[3] = mci_readl(host, RESP0);
+			cmd->resp[2] = mci_readl(host, RESP1);
+			cmd->resp[1] = mci_readl(host, RESP2);
+			cmd->resp[0] = mci_readl(host, RESP3);
+		} else {
+			cmd->resp[0] = mci_readl(host, RESP0);
+			cmd->resp[1] = 0;
+			cmd->resp[2] = 0;
+			cmd->resp[3] = 0;
+		}
+	}
+
+	if (status & SDMMC_INT_RTO)
+		cmd->error = -ETIMEDOUT;
+	else if ((cmd->flags & MMC_RSP_CRC) && (status & SDMMC_INT_RCRC))
+		cmd->error = -EILSEQ;
+	else if (status & SDMMC_INT_RESP_ERR)
+		cmd->error = -EIO;
+	else
+		cmd->error = 0;
+
+	if (cmd->error) {
+		/* newer ip versions need a delay between retries */
+		if (host->quirks & DW_MCI_QUIRK_RETRY_DELAY)
+			mdelay(20);
+
+		if (cmd->data) {
+			host->data = NULL;
+			dw_mci_stop_dma(host);
+		}
+	}
+}
+
+static void dw_mci_tasklet_func(unsigned long priv)
+{
+	struct dw_mci *host = (struct dw_mci *)priv;
+	struct mmc_data	*data;
+	struct mmc_command *cmd;
+	enum dw_mci_state state;
+	enum dw_mci_state prev_state;
+	u32 status;
+
+	spin_lock(&host->lock);
+
+	state = host->state;
+	data = host->data;
+
+	do {
+		prev_state = state;
+
+		switch (state) {
+		case STATE_IDLE:
+			break;
+
+		case STATE_SENDING_CMD:
+			if (!test_and_clear_bit(EVENT_CMD_COMPLETE,
+						&host->pending_events))
+				break;
+
+			cmd = host->cmd;
+			host->cmd = NULL;
+			set_bit(EVENT_CMD_COMPLETE, &host->completed_events);
+			dw_mci_command_complete(host, host->mrq->cmd);
+			if (!host->mrq->data || cmd->error) {
+				dw_mci_request_end(host, host->mrq);
+				goto unlock;
+			}
+
+			prev_state = state = STATE_SENDING_DATA;
+			/* fall through */
+
+		case STATE_SENDING_DATA:
+			if (test_and_clear_bit(EVENT_DATA_ERROR,
+					       &host->pending_events)) {
+				dw_mci_stop_dma(host);
+				if (data->stop)
+					send_stop_cmd(host, data);
+				state = STATE_DATA_ERROR;
+				break;
+			}
+
+			if (!test_and_clear_bit(EVENT_XFER_COMPLETE,
+						&host->pending_events))
+				break;
+
+			set_bit(EVENT_XFER_COMPLETE, &host->completed_events);
+			prev_state = state = STATE_DATA_BUSY;
+			/* fall through */
+
+		case STATE_DATA_BUSY:
+			if (!test_and_clear_bit(EVENT_DATA_COMPLETE,
+						&host->pending_events))
+				break;
+
+			host->data = NULL;
+			set_bit(EVENT_DATA_COMPLETE, &host->completed_events);
+			status = host->data_status;
+
+			if (status & DW_MCI_DATA_ERROR_FLAGS) {
+				if (status & SDMMC_INT_DTO) {
+					dev_err(&host->pdev->dev,
+						"data timeout error\n");
+					data->error = -ETIMEDOUT;
+				} else if (status & SDMMC_INT_DCRC) {
+					dev_err(&host->pdev->dev,
+						"data CRC error\n");
+					data->error = -EILSEQ;
+				} else {
+					dev_err(&host->pdev->dev,
+						"data FIFO error "
+						"(status=%08x)\n",
+						status);
+					data->error = -EIO;
+				}
+			} else {
+				data->bytes_xfered = data->blocks * data->blksz;
+				data->error = 0;
+			}
+
+			if (!data->stop) {
+				dw_mci_request_end(host, host->mrq);
+				goto unlock;
+			}
+
+			prev_state = state = STATE_SENDING_STOP;
+			if (!data->error)
+				send_stop_cmd(host, data);
+			/* fall through */
+
+		case STATE_SENDING_STOP:
+			if (!test_and_clear_bit(EVENT_CMD_COMPLETE,
+						&host->pending_events))
+				break;
+
+			host->cmd = NULL;
+			dw_mci_command_complete(host, host->mrq->stop);
+			dw_mci_request_end(host, host->mrq);
+			goto unlock;
+
+		case STATE_DATA_ERROR:
+			if (!test_and_clear_bit(EVENT_XFER_COMPLETE,
+						&host->pending_events))
+				break;
+
+			state = STATE_DATA_BUSY;
+			break;
+		}
+	} while (state != prev_state);
+
+	host->state = state;
+unlock:
+	spin_unlock(&host->lock);
+
+}
+
+static void dw_mci_push_data16(struct dw_mci *host, void *buf, int cnt)
+{
+	u16 *pdata = (u16 *)buf;
+
+	WARN_ON(cnt % 2 != 0);
+
+	cnt = cnt >> 1;
+	while (cnt > 0) {
+		mci_writew(host, DATA, *pdata++);
+		cnt--;
+	}
+}
+
+static void dw_mci_pull_data16(struct dw_mci *host, void *buf, int cnt)
+{
+	u16 *pdata = (u16 *)buf;
+
+	WARN_ON(cnt % 2 != 0);
+
+	cnt = cnt >> 1;
+	while (cnt > 0) {
+		*pdata++ = mci_readw(host, DATA);
+		cnt--;
+	}
+}
+
+static void dw_mci_push_data32(struct dw_mci *host, void *buf, int cnt)
+{
+	u32 *pdata = (u32 *)buf;
+
+	WARN_ON(cnt % 4 != 0);
+	WARN_ON((unsigned long)pdata & 0x3);
+
+	cnt = cnt >> 2;
+	while (cnt > 0) {
+		mci_writel(host, DATA, *pdata++);
+		cnt--;
+	}
+}
+
+static void dw_mci_pull_data32(struct dw_mci *host, void *buf, int cnt)
+{
+	u32 *pdata = (u32 *)buf;
+
+	WARN_ON(cnt % 4 != 0);
+	WARN_ON((unsigned long)pdata & 0x3);
+
+	cnt = cnt >> 2;
+	while (cnt > 0) {
+		*pdata++ = mci_readl(host, DATA);
+		cnt--;
+	}
+}
+
+static void dw_mci_push_data64(struct dw_mci *host, void *buf, int cnt)
+{
+	u64 *pdata = (u64 *)buf;
+
+	WARN_ON(cnt % 8 != 0);
+
+	cnt = cnt >> 3;
+	while (cnt > 0) {
+		mci_writeq(host, DATA, *pdata++);
+		cnt--;
+	}
+}
+
+static void dw_mci_pull_data64(struct dw_mci *host, void *buf, int cnt)
+{
+	u64 *pdata = (u64 *)buf;
+
+	WARN_ON(cnt % 8 != 0);
+
+	cnt = cnt >> 3;
+	while (cnt > 0) {
+		*pdata++ = mci_readq(host, DATA);
+		cnt--;
+	}
+}
+
+static void dw_mci_read_data_pio(struct dw_mci *host)
+{
+	struct scatterlist *sg = host->sg;
+	void *buf = sg_virt(sg);
+	unsigned int offset = host->pio_offset;
+	struct mmc_data	*data = host->data;
+	int shift = host->data_shift;
+	u32 status;
+	unsigned int nbytes = 0, len, old_len, count = 0;
+
+	do {
+		len = SDMMC_GET_FCNT(mci_readl(host, STATUS)) << shift;
+		if (count == 0)
+			old_len = len;
+
+		if (offset + len <= sg->length) {
+			host->pull_data(host, (void *)(buf + offset), len);
+
+			offset += len;
+			nbytes += len;
+
+			if (offset == sg->length) {
+				flush_dcache_page(sg_page(sg));
+				host->sg = sg = sg_next(sg);
+				if (!sg)
+					goto done;
+
+				offset = 0;
+				buf = sg_virt(sg);
+			}
+		} else {
+			unsigned int remaining = sg->length - offset;
+			host->pull_data(host, (void *)(buf + offset),
+					remaining);
+			nbytes += remaining;
+
+			flush_dcache_page(sg_page(sg));
+			host->sg = sg = sg_next(sg);
+			if (!sg)
+				goto done;
+
+			offset = len - remaining;
+			buf = sg_virt(sg);
+			host->pull_data(host, buf, offset);
+			nbytes += offset;
+		}
+
+		status = mci_readl(host, MINTSTS);
+		mci_writel(host, RINTSTS, SDMMC_INT_RXDR);
+		if (status & DW_MCI_DATA_ERROR_FLAGS) {
+			host->data_status = status;
+			data->bytes_xfered += nbytes;
+			smp_wmb();
+
+			set_bit(EVENT_DATA_ERROR, &host->pending_events);
+
+			tasklet_schedule(&host->tasklet);
+			return;
+		}
+		count++;
+	} while (status & SDMMC_INT_RXDR); /*if the RXDR is ready read again*/
+	len = SDMMC_GET_FCNT(mci_readl(host, STATUS));
+	host->pio_offset = offset;
+	data->bytes_xfered += nbytes;
+	return;
+
+done:
+	data->bytes_xfered += nbytes;
+	smp_wmb();
+	set_bit(EVENT_XFER_COMPLETE, &host->pending_events);
+}
+
+static void dw_mci_write_data_pio(struct dw_mci *host)
+{
+	struct scatterlist *sg = host->sg;
+	void *buf = sg_virt(sg);
+	unsigned int offset = host->pio_offset;
+	struct mmc_data	*data = host->data;
+	int shift = host->data_shift;
+	u32 status;
+	unsigned int nbytes = 0, len;
+
+	do {
+		len = SDMMC_FIFO_SZ -
+			(SDMMC_GET_FCNT(mci_readl(host, STATUS)) << shift);
+		if (offset + len <= sg->length) {
+			host->push_data(host, (void *)(buf + offset), len);
+
+			offset += len;
+			nbytes += len;
+			if (offset == sg->length) {
+				host->sg = sg = sg_next(sg);
+				if (!sg)
+					goto done;
+
+				offset = 0;
+				buf = sg_virt(sg);
+			}
+		} else {
+			unsigned int remaining = sg->length - offset;
+
+			host->push_data(host, (void *)(buf + offset),
+					remaining);
+			nbytes += remaining;
+
+			host->sg = sg = sg_next(sg);
+			if (!sg)
+				goto done;
+
+			offset = len - remaining;
+			buf = sg_virt(sg);
+			host->push_data(host, (void *)buf, offset);
+			nbytes += offset;
+		}
+
+		status = mci_readl(host, MINTSTS);
+		mci_writel(host, RINTSTS, SDMMC_INT_TXDR);
+		if (status & DW_MCI_DATA_ERROR_FLAGS) {
+			host->data_status = status;
+			data->bytes_xfered += nbytes;
+
+			smp_wmb();
+
+			set_bit(EVENT_DATA_ERROR, &host->pending_events);
+
+			tasklet_schedule(&host->tasklet);
+			return;
+		}
+	} while (status & SDMMC_INT_TXDR); /* if TXDR write again */
+
+	host->pio_offset = offset;
+	data->bytes_xfered += nbytes;
+
+	return;
+
+done:
+	data->bytes_xfered += nbytes;
+	smp_wmb();
+	set_bit(EVENT_XFER_COMPLETE, &host->pending_events);
+}
+
+static void dw_mci_cmd_interrupt(struct dw_mci *host, u32 status)
+{
+	if (!host->cmd_status)
+		host->cmd_status = status;
+
+	smp_wmb();
+
+	set_bit(EVENT_CMD_COMPLETE, &host->pending_events);
+	tasklet_schedule(&host->tasklet);
+}
+
+static irqreturn_t dw_mci_interrupt(int irq, void *dev_id)
+{
+	struct dw_mci *host = dev_id;
+	u32 status, pending;
+	unsigned int pass_count = 0;
+
+	do {
+		status = mci_readl(host, RINTSTS);
+		pending = mci_readl(host, MINTSTS); /* read-only mask reg */
+
+		/*
+		 * DTO fix - version 2.10a and below, and only if internal DMA
+		 * is configured.
+		 */
+		if (host->quirks & DW_MCI_QUIRK_IDMAC_DTO) {
+			if (!pending &&
+			    ((mci_readl(host, STATUS) >> 17) & 0x1fff))
+				pending |= SDMMC_INT_DATA_OVER;
+		}
+
+		if (!pending)
+			break;
+
+		if (pending & DW_MCI_CMD_ERROR_FLAGS) {
+			mci_writel(host, RINTSTS, DW_MCI_CMD_ERROR_FLAGS);
+			host->cmd_status = status;
+			smp_wmb();
+			set_bit(EVENT_CMD_COMPLETE, &host->pending_events);
+			tasklet_schedule(&host->tasklet);
+		}
+
+		if (pending & DW_MCI_DATA_ERROR_FLAGS) {
+			/* if there is an error report DATA_ERROR */
+			mci_writel(host, RINTSTS, DW_MCI_DATA_ERROR_FLAGS);
+			host->data_status = status;
+			smp_wmb();
+			set_bit(EVENT_DATA_ERROR, &host->pending_events);
+			tasklet_schedule(&host->tasklet);
+		}
+
+		if (pending & SDMMC_INT_DATA_OVER) {
+			mci_writel(host, RINTSTS, SDMMC_INT_DATA_OVER);
+			if (!host->data_status)
+				host->data_status = status;
+			smp_wmb();
+			if (host->dir_status == DW_MCI_RECV_STATUS) {
+				if (host->sg != NULL)
+					dw_mci_read_data_pio(host);
+			}
+			set_bit(EVENT_DATA_COMPLETE, &host->pending_events);
+			tasklet_schedule(&host->tasklet);
+		}
+
+		if (pending & SDMMC_INT_RXDR) {
+			mci_writel(host, RINTSTS, SDMMC_INT_RXDR);
+			if (host->sg)
+				dw_mci_read_data_pio(host);
+		}
+
+		if (pending & SDMMC_INT_TXDR) {
+			mci_writel(host, RINTSTS, SDMMC_INT_TXDR);
+			if (host->sg)
+				dw_mci_write_data_pio(host);
+		}
+
+		if (pending & SDMMC_INT_CMD_DONE) {
+			mci_writel(host, RINTSTS, SDMMC_INT_CMD_DONE);
+			dw_mci_cmd_interrupt(host, status);
+		}
+
+		if (pending & SDMMC_INT_CD) {
+			mci_writel(host, RINTSTS, SDMMC_INT_CD);
+			tasklet_schedule(&host->card_tasklet);
+		}
+
+	} while (pass_count++ < 5);
+
+#ifdef CONFIG_MMC_DW_IDMAC
+	/* Handle DMA interrupts */
+	pending = mci_readl(host, IDSTS);
+	if (pending & (SDMMC_IDMAC_INT_TI | SDMMC_IDMAC_INT_RI)) {
+		mci_writel(host, IDSTS, SDMMC_IDMAC_INT_TI | SDMMC_IDMAC_INT_RI);
+		mci_writel(host, IDSTS, SDMMC_IDMAC_INT_NI);
+		set_bit(EVENT_DATA_COMPLETE, &host->pending_events);
+		host->dma_ops->complete(host);
+	}
+#endif
+
+	return IRQ_HANDLED;
+}
+
+static void dw_mci_tasklet_card(unsigned long data)
+{
+	struct dw_mci *host = (struct dw_mci *)data;
+	int i;
+
+	for (i = 0; i < host->num_slots; i++) {
+		struct dw_mci_slot *slot = host->slot[i];
+		struct mmc_host *mmc = slot->mmc;
+		struct mmc_request *mrq;
+		int present;
+		u32 ctrl;
+
+		present = dw_mci_get_cd(mmc);
+		while (present != slot->last_detect_state) {
+			spin_lock(&host->lock);
+
+			dev_dbg(&slot->mmc->class_dev, "card %s\n",
+				present ? "inserted" : "removed");
+
+			/* Card change detected */
+			slot->last_detect_state = present;
+
+			/* Power up slot */
+			if (present != 0) {
+				if (host->pdata->setpower)
+					host->pdata->setpower(slot->id,
+							      mmc->ocr_avail);
+
+				set_bit(DW_MMC_CARD_PRESENT, &slot->flags);
+			}
+
+			/* Clean up queue if present */
+			mrq = slot->mrq;
+			if (mrq) {
+				if (mrq == host->mrq) {
+					host->data = NULL;
+					host->cmd = NULL;
+
+					switch (host->state) {
+					case STATE_IDLE:
+						break;
+					case STATE_SENDING_CMD:
+						mrq->cmd->error = -ENOMEDIUM;
+						if (!mrq->data)
+							break;
+						/* fall through */
+					case STATE_SENDING_DATA:
+						mrq->data->error = -ENOMEDIUM;
+						dw_mci_stop_dma(host);
+						break;
+					case STATE_DATA_BUSY:
+					case STATE_DATA_ERROR:
+						if (mrq->data->error == -EINPROGRESS)
+							mrq->data->error = -ENOMEDIUM;
+						if (!mrq->stop)
+							break;
+						/* fall through */
+					case STATE_SENDING_STOP:
+						mrq->stop->error = -ENOMEDIUM;
+						break;
+					}
+
+					dw_mci_request_end(host, mrq);
+				} else {
+					list_del(&slot->queue_node);
+					mrq->cmd->error = -ENOMEDIUM;
+					if (mrq->data)
+						mrq->data->error = -ENOMEDIUM;
+					if (mrq->stop)
+						mrq->stop->error = -ENOMEDIUM;
+
+					spin_unlock(&host->lock);
+					mmc_request_done(slot->mmc, mrq);
+					spin_lock(&host->lock);
+				}
+			}
+
+			/* Power down slot */
+			if (present == 0) {
+				if (host->pdata->setpower)
+					host->pdata->setpower(slot->id, 0);
+				clear_bit(DW_MMC_CARD_PRESENT, &slot->flags);
+
+				/*
+				 * Clear down the FIFO - doing so generates a
+				 * block interrupt, hence setting the
+				 * scatter-gather pointer to NULL.
+				 */
+				host->sg = NULL;
+
+				ctrl = mci_readl(host, CTRL);
+				ctrl |= SDMMC_CTRL_FIFO_RESET;
+				mci_writel(host, CTRL, ctrl);
+
+#ifdef CONFIG_MMC_DW_IDMAC
+				ctrl = mci_readl(host, BMOD);
+				ctrl |= 0x01; /* Software reset of DMA */
+				mci_writel(host, BMOD, ctrl);
+#endif
+
+			}
+
+			spin_unlock(&host->lock);
+			present = dw_mci_get_cd(mmc);
+		}
+
+		mmc_detect_change(slot->mmc,
+			msecs_to_jiffies(host->pdata->detect_delay_ms));
+	}
+}
+
+static int __init dw_mci_init_slot(struct dw_mci *host, unsigned int id)
+{
+	struct mmc_host *mmc;
+	struct dw_mci_slot *slot;
+
+	mmc = mmc_alloc_host(sizeof(struct dw_mci_slot), &host->pdev->dev);
+	if (!mmc)
+		return -ENOMEM;
+
+	slot = mmc_priv(mmc);
+	slot->id = id;
+	slot->mmc = mmc;
+	slot->host = host;
+
+	mmc->ops = &dw_mci_ops;
+	mmc->f_min = DIV_ROUND_UP(host->bus_hz, 510);
+	mmc->f_max = host->bus_hz;
+
+	if (host->pdata->get_ocr)
+		mmc->ocr_avail = host->pdata->get_ocr(id);
+	else
+		mmc->ocr_avail = MMC_VDD_32_33 | MMC_VDD_33_34;
+
+	/*
+	 * Start with slot power disabled, it will be enabled when a card
+	 * is detected.
+	 */
+	if (host->pdata->setpower)
+		host->pdata->setpower(id, 0);
+
+	mmc->caps = 0;
+	if (host->pdata->get_bus_wd)
+		if (host->pdata->get_bus_wd(slot->id) >= 4)
+			mmc->caps |= MMC_CAP_4_BIT_DATA;
+
+	if (host->pdata->quirks & DW_MCI_QUIRK_HIGHSPEED)
+		mmc->caps |= MMC_CAP_SD_HIGHSPEED;
+
+#ifdef CONFIG_MMC_DW_IDMAC
+	mmc->max_segs = host->ring_size;
+	mmc->max_blk_size = 65536;
+	mmc->max_blk_count = host->ring_size;
+	mmc->max_seg_size = 0x1000;
+	mmc->max_req_size = mmc->max_seg_size * mmc->max_blk_count;
+#else
+	if (host->pdata->blk_settings) {
+		mmc->max_segs = host->pdata->blk_settings->max_segs;
+		mmc->max_blk_size = host->pdata->blk_settings->max_blk_size;
+		mmc->max_blk_count = host->pdata->blk_settings->max_blk_count;
+		mmc->max_req_size = host->pdata->blk_settings->max_req_size;
+		mmc->max_seg_size = host->pdata->blk_settings->max_seg_size;
+	} else {
+		/* Useful defaults if platform data is unset. */
+		mmc->max_segs = 64;
+		mmc->max_blk_size = 65536; /* BLKSIZ is 16 bits */
+		mmc->max_blk_count = 512;
+		mmc->max_req_size = mmc->max_blk_size * mmc->max_blk_count;
+		mmc->max_seg_size = mmc->max_req_size;
+	}
+#endif /* CONFIG_MMC_DW_IDMAC */
+
+	if (dw_mci_get_cd(mmc))
+		set_bit(DW_MMC_CARD_PRESENT, &slot->flags);
+	else
+		clear_bit(DW_MMC_CARD_PRESENT, &slot->flags);
+
+	host->slot[id] = slot;
+	mmc_add_host(mmc);
+
+#if defined(CONFIG_DEBUG_FS)
+	dw_mci_init_debugfs(slot);
+#endif
+
+	/* Card initially undetected */
+	slot->last_detect_state = 0;
+
+	return 0;
+}
+
+static void dw_mci_cleanup_slot(struct dw_mci_slot *slot, unsigned int id)
+{
+	/* Shutdown detect IRQ */
+	if (slot->host->pdata->exit)
+		slot->host->pdata->exit(id);
+
+	/* Debugfs stuff is cleaned up by mmc core */
+	mmc_remove_host(slot->mmc);
+	slot->host->slot[id] = NULL;
+	mmc_free_host(slot->mmc);
+}
+
+static void dw_mci_init_dma(struct dw_mci *host)
+{
+	/* Alloc memory for sg translation */
+	host->sg_cpu = dma_alloc_coherent(&host->pdev->dev, PAGE_SIZE,
+					  &host->sg_dma, GFP_KERNEL);
+	if (!host->sg_cpu) {
+		dev_err(&host->pdev->dev, "%s: could not alloc DMA memory\n",
+			__func__);
+		goto no_dma;
+	}
+
+	/* Determine which DMA interface to use */
+#ifdef CONFIG_MMC_DW_IDMAC
+	host->dma_ops = &dw_mci_idmac_ops;
+	dev_info(&host->pdev->dev, "Using internal DMA controller.\n");
+#endif
+
+	if (!host->dma_ops)
+		goto no_dma;
+
+	if (host->dma_ops->init) {
+		if (host->dma_ops->init(host)) {
+			dev_err(&host->pdev->dev, "%s: Unable to initialize "
+				"DMA Controller.\n", __func__);
+			goto no_dma;
+		}
+	} else {
+		dev_err(&host->pdev->dev, "DMA initialization not found.\n");
+		goto no_dma;
+	}
+
+	host->use_dma = 1;
+	return;
+
+no_dma:
+	dev_info(&host->pdev->dev, "Using PIO mode.\n");
+	host->use_dma = 0;
+	return;
+}
+
+static bool mci_wait_reset(struct device *dev, struct dw_mci *host)
+{
+	unsigned long timeout = jiffies + msecs_to_jiffies(500);
+	unsigned int ctrl;
+
+	mci_writel(host, CTRL, (SDMMC_CTRL_RESET | SDMMC_CTRL_FIFO_RESET |
+				SDMMC_CTRL_DMA_RESET));
+
+	/* wait till resets clear */
+	do {
+		ctrl = mci_readl(host, CTRL);
+		if (!(ctrl & (SDMMC_CTRL_RESET | SDMMC_CTRL_FIFO_RESET |
+			      SDMMC_CTRL_DMA_RESET)))
+			return true;
+	} while (time_before(jiffies, timeout));
+
+	dev_err(dev, "Timeout resetting block (ctrl %#x)\n", ctrl);
+
+	return false;
+}
+
+static int dw_mci_probe(struct platform_device *pdev)
+{
+	struct dw_mci *host;
+	struct resource	*regs;
+	struct dw_mci_board *pdata;
+	int irq, ret, i, width;
+	u32 fifo_size;
+
+	regs = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (!regs)
+		return -ENXIO;
+
+	irq = platform_get_irq(pdev, 0);
+	if (irq < 0)
+		return irq;
+
+	host = kzalloc(sizeof(struct dw_mci), GFP_KERNEL);
+	if (!host)
+		return -ENOMEM;
+
+	host->pdev = pdev;
+	host->pdata = pdata = pdev->dev.platform_data;
+	if (!pdata || !pdata->init) {
+		dev_err(&pdev->dev,
+			"Platform data must supply init function\n");
+		ret = -ENODEV;
+		goto err_freehost;
+	}
+
+	if (!pdata->select_slot && pdata->num_slots > 1) {
+		dev_err(&pdev->dev,
+			"Platform data must supply select_slot function\n");
+		ret = -ENODEV;
+		goto err_freehost;
+	}
+
+	if (!pdata->bus_hz) {
+		dev_err(&pdev->dev,
+			"Platform data must supply bus speed\n");
+		ret = -ENODEV;
+		goto err_freehost;
+	}
+
+	host->bus_hz = pdata->bus_hz;
+	host->quirks = pdata->quirks;
+
+	spin_lock_init(&host->lock);
+	INIT_LIST_HEAD(&host->queue);
+
+	ret = -ENOMEM;
+	host->regs = ioremap(regs->start, regs->end - regs->start + 1);
+	if (!host->regs)
+		goto err_freehost;
+
+	host->dma_ops = pdata->dma_ops;
+	dw_mci_init_dma(host);
+
+	/*
+	 * Get the host data width - this assumes that HCON has been set with
+	 * the correct values.
+	 */
+	i = (mci_readl(host, HCON) >> 7) & 0x7;
+	if (!i) {
+		host->push_data = dw_mci_push_data16;
+		host->pull_data = dw_mci_pull_data16;
+		width = 16;
+		host->data_shift = 1;
+	} else if (i == 2) {
+		host->push_data = dw_mci_push_data64;
+		host->pull_data = dw_mci_pull_data64;
+		width = 64;
+		host->data_shift = 3;
+	} else {
+		/* Check for a reserved value, and warn if it is */
+		WARN((i != 1),
+		     "HCON reports a reserved host data width!\n"
+		     "Defaulting to 32-bit access.\n");
+		host->push_data = dw_mci_push_data32;
+		host->pull_data = dw_mci_pull_data32;
+		width = 32;
+		host->data_shift = 2;
+	}
+
+	/* Reset all blocks */
+	if (!mci_wait_reset(&pdev->dev, host)) {
+		ret = -ENODEV;
+		goto err_dmaunmap;
+	}
+
+	/* Clear the interrupts for the host controller */
+	mci_writel(host, RINTSTS, 0xFFFFFFFF);
+	mci_writel(host, INTMASK, 0); /* disable all mmc interrupt first */
+
+	/* Put in max timeout */
+	mci_writel(host, TMOUT, 0xFFFFFFFF);
+
+	/*
+	 * FIFO threshold settings  RxMark  = fifo_size / 2 - 1,
+	 *                          Tx Mark = fifo_size / 2 DMA Size = 8
+	 */
+	fifo_size = mci_readl(host, FIFOTH);
+	fifo_size = (fifo_size >> 16) & 0x7ff;
+	mci_writel(host, FIFOTH, ((0x2 << 28) | ((fifo_size/2 - 1) << 16) |
+				  ((fifo_size/2) << 0)));
+
+	/* disable clock to CIU */
+	mci_writel(host, CLKENA, 0);
+	mci_writel(host, CLKSRC, 0);
+
+	tasklet_init(&host->tasklet, dw_mci_tasklet_func, (unsigned long)host);
+	tasklet_init(&host->card_tasklet,
+		     dw_mci_tasklet_card, (unsigned long)host);
+
+	ret = request_irq(irq, dw_mci_interrupt, 0, "dw-mci", host);
+	if (ret)
+		goto err_dmaunmap;
+
+	platform_set_drvdata(pdev, host);
+
+	if (host->pdata->num_slots)
+		host->num_slots = host->pdata->num_slots;
+	else
+		host->num_slots = ((mci_readl(host, HCON) >> 1) & 0x1F) + 1;
+
+	/* We need at least one slot to succeed */
+	for (i = 0; i < host->num_slots; i++) {
+		ret = dw_mci_init_slot(host, i);
+		if (ret) {
+			ret = -ENODEV;
+			goto err_init_slot;
+		}
+	}
+
+	/*
+	 * Enable interrupts for command done, data over, data empty, card det,
+	 * receive ready and error such as transmit, receive timeout, crc error
+	 */
+	mci_writel(host, RINTSTS, 0xFFFFFFFF);
+	mci_writel(host, INTMASK, SDMMC_INT_CMD_DONE | SDMMC_INT_DATA_OVER |
+		   SDMMC_INT_TXDR | SDMMC_INT_RXDR |
+		   DW_MCI_ERROR_FLAGS | SDMMC_INT_CD);
+	mci_writel(host, CTRL, SDMMC_CTRL_INT_ENABLE); /* Enable mci interrupt */
+
+	dev_info(&pdev->dev, "DW MMC controller at irq %d, "
+		 "%d bit host data width\n", irq, width);
+	if (host->quirks & DW_MCI_QUIRK_IDMAC_DTO)
+		dev_info(&pdev->dev, "Internal DMAC interrupt fix enabled.\n");
+
+	return 0;
+
+err_init_slot:
+	/* De-init any initialized slots */
+	while (i > 0) {
+		if (host->slot[i])
+			dw_mci_cleanup_slot(host->slot[i], i);
+		i--;
+	}
+	free_irq(irq, host);
+
+err_dmaunmap:
+	if (host->use_dma && host->dma_ops->exit)
+		host->dma_ops->exit(host);
+	dma_free_coherent(&host->pdev->dev, PAGE_SIZE,
+			  host->sg_cpu, host->sg_dma);
+	iounmap(host->regs);
+
+err_freehost:
+	kfree(host);
+	return ret;
+}
+
+static int __exit dw_mci_remove(struct platform_device *pdev)
+{
+	struct dw_mci *host = platform_get_drvdata(pdev);
+	int i;
+
+	mci_writel(host, RINTSTS, 0xFFFFFFFF);
+	mci_writel(host, INTMASK, 0); /* disable all mmc interrupt first */
+
+	platform_set_drvdata(pdev, NULL);
+
+	for (i = 0; i < host->num_slots; i++) {
+		dev_dbg(&pdev->dev, "remove slot %d\n", i);
+		if (host->slot[i])
+			dw_mci_cleanup_slot(host->slot[i], i);
+	}
+
+	/* disable clock to CIU */
+	mci_writel(host, CLKENA, 0);
+	mci_writel(host, CLKSRC, 0);
+
+	free_irq(platform_get_irq(pdev, 0), host);
+	dma_free_coherent(&pdev->dev, PAGE_SIZE, host->sg_cpu, host->sg_dma);
+
+	if (host->use_dma && host->dma_ops->exit)
+		host->dma_ops->exit(host);
+
+	iounmap(host->regs);
+
+	kfree(host);
+	return 0;
+}
+
+#ifdef CONFIG_PM
+/*
+ * TODO: we should probably disable the clock to the card in the suspend path.
+ */
+static int dw_mci_suspend(struct platform_device *pdev, pm_message_t mesg)
+{
+	int i, ret;
+	struct dw_mci *host = platform_get_drvdata(pdev);
+
+	for (i = 0; i < host->num_slots; i++) {
+		struct dw_mci_slot *slot = host->slot[i];
+		if (!slot)
+			continue;
+		ret = mmc_suspend_host(slot->mmc);
+		if (ret < 0) {
+			while (--i >= 0) {
+				slot = host->slot[i];
+				if (slot)
+					mmc_resume_host(host->slot[i]->mmc);
+			}
+			return ret;
+		}
+	}
+
+	return 0;
+}
+
+static int dw_mci_resume(struct platform_device *pdev)
+{
+	int i, ret;
+	struct dw_mci *host = platform_get_drvdata(pdev);
+
+	for (i = 0; i < host->num_slots; i++) {
+		struct dw_mci_slot *slot = host->slot[i];
+		if (!slot)
+			continue;
+		ret = mmc_resume_host(host->slot[i]->mmc);
+		if (ret < 0)
+			return ret;
+	}
+
+	return 0;
+}
+#else
+#define dw_mci_suspend	NULL
+#define dw_mci_resume	NULL
+#endif /* CONFIG_PM */
+
+static struct platform_driver dw_mci_driver = {
+	.remove		= __exit_p(dw_mci_remove),
+	.suspend	= dw_mci_suspend,
+	.resume		= dw_mci_resume,
+	.driver		= {
+		.name		= "dw_mmc",
+	},
+};
+
+static int __init dw_mci_init(void)
+{
+	return platform_driver_probe(&dw_mci_driver, dw_mci_probe);
+}
+
+static void __exit dw_mci_exit(void)
+{
+	platform_driver_unregister(&dw_mci_driver);
+}
+
+module_init(dw_mci_init);
+module_exit(dw_mci_exit);
+
+MODULE_DESCRIPTION("DW Multimedia Card Interface driver");
+MODULE_AUTHOR("NXP Semiconductor VietNam");
+MODULE_AUTHOR("Imagination Technologies Ltd");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/mmc/host/dw_mmc.h b/drivers/mmc/host/dw_mmc.h
new file mode 100644
index 000000000000..5dd55a75233d
--- /dev/null
+++ b/drivers/mmc/host/dw_mmc.h
@@ -0,0 +1,168 @@
+/*
+ * Synopsys DesignWare Multimedia Card Interface driver
+ *  (Based on NXP driver for lpc 31xx)
+ *
+ * Copyright (C) 2009 NXP Semiconductors
+ * Copyright (C) 2009, 2010 Imagination Technologies Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#ifndef _DW_MMC_H_
+#define _DW_MMC_H_
+
+#define SDMMC_CTRL		0x000
+#define SDMMC_PWREN		0x004
+#define SDMMC_CLKDIV		0x008
+#define SDMMC_CLKSRC		0x00c
+#define SDMMC_CLKENA		0x010
+#define SDMMC_TMOUT		0x014
+#define SDMMC_CTYPE		0x018
+#define SDMMC_BLKSIZ		0x01c
+#define SDMMC_BYTCNT		0x020
+#define SDMMC_INTMASK		0x024
+#define SDMMC_CMDARG		0x028
+#define SDMMC_CMD		0x02c
+#define SDMMC_RESP0		0x030
+#define SDMMC_RESP1		0x034
+#define SDMMC_RESP2		0x038
+#define SDMMC_RESP3		0x03c
+#define SDMMC_MINTSTS		0x040
+#define SDMMC_RINTSTS		0x044
+#define SDMMC_STATUS		0x048
+#define SDMMC_FIFOTH		0x04c
+#define SDMMC_CDETECT		0x050
+#define SDMMC_WRTPRT		0x054
+#define SDMMC_GPIO		0x058
+#define SDMMC_TCBCNT		0x05c
+#define SDMMC_TBBCNT		0x060
+#define SDMMC_DEBNCE		0x064
+#define SDMMC_USRID		0x068
+#define SDMMC_VERID		0x06c
+#define SDMMC_HCON		0x070
+#define SDMMC_BMOD		0x080
+#define SDMMC_PLDMND		0x084
+#define SDMMC_DBADDR		0x088
+#define SDMMC_IDSTS		0x08c
+#define SDMMC_IDINTEN		0x090
+#define SDMMC_DSCADDR		0x094
+#define SDMMC_BUFADDR		0x098
+#define SDMMC_DATA		0x100
+#define SDMMC_DATA_ADR		0x100
+
+/* shift bit field */
+#define _SBF(f, v)		((v) << (f))
+
+/* Control register defines */
+#define SDMMC_CTRL_USE_IDMAC		BIT(25)
+#define SDMMC_CTRL_CEATA_INT_EN		BIT(11)
+#define SDMMC_CTRL_SEND_AS_CCSD		BIT(10)
+#define SDMMC_CTRL_SEND_CCSD		BIT(9)
+#define SDMMC_CTRL_ABRT_READ_DATA	BIT(8)
+#define SDMMC_CTRL_SEND_IRQ_RESP	BIT(7)
+#define SDMMC_CTRL_READ_WAIT		BIT(6)
+#define SDMMC_CTRL_DMA_ENABLE		BIT(5)
+#define SDMMC_CTRL_INT_ENABLE		BIT(4)
+#define SDMMC_CTRL_DMA_RESET		BIT(2)
+#define SDMMC_CTRL_FIFO_RESET		BIT(1)
+#define SDMMC_CTRL_RESET		BIT(0)
+/* Clock Enable register defines */
+#define SDMMC_CLKEN_LOW_PWR		BIT(16)
+#define SDMMC_CLKEN_ENABLE		BIT(0)
+/* time-out register defines */
+#define SDMMC_TMOUT_DATA(n)		_SBF(8, (n))
+#define SDMMC_TMOUT_DATA_MSK		0xFFFFFF00
+#define SDMMC_TMOUT_RESP(n)		((n) & 0xFF)
+#define SDMMC_TMOUT_RESP_MSK		0xFF
+/* card-type register defines */
+#define SDMMC_CTYPE_8BIT		BIT(16)
+#define SDMMC_CTYPE_4BIT		BIT(0)
+#define SDMMC_CTYPE_1BIT		0
+/* Interrupt status & mask register defines */
+#define SDMMC_INT_SDIO			BIT(16)
+#define SDMMC_INT_EBE			BIT(15)
+#define SDMMC_INT_ACD			BIT(14)
+#define SDMMC_INT_SBE			BIT(13)
+#define SDMMC_INT_HLE			BIT(12)
+#define SDMMC_INT_FRUN			BIT(11)
+#define SDMMC_INT_HTO			BIT(10)
+#define SDMMC_INT_DTO			BIT(9)
+#define SDMMC_INT_RTO			BIT(8)
+#define SDMMC_INT_DCRC			BIT(7)
+#define SDMMC_INT_RCRC			BIT(6)
+#define SDMMC_INT_RXDR			BIT(5)
+#define SDMMC_INT_TXDR			BIT(4)
+#define SDMMC_INT_DATA_OVER		BIT(3)
+#define SDMMC_INT_CMD_DONE		BIT(2)
+#define SDMMC_INT_RESP_ERR		BIT(1)
+#define SDMMC_INT_CD			BIT(0)
+#define SDMMC_INT_ERROR			0xbfc2
+/* Command register defines */
+#define SDMMC_CMD_START			BIT(31)
+#define SDMMC_CMD_CCS_EXP		BIT(23)
+#define SDMMC_CMD_CEATA_RD		BIT(22)
+#define SDMMC_CMD_UPD_CLK		BIT(21)
+#define SDMMC_CMD_INIT			BIT(15)
+#define SDMMC_CMD_STOP			BIT(14)
+#define SDMMC_CMD_PRV_DAT_WAIT		BIT(13)
+#define SDMMC_CMD_SEND_STOP		BIT(12)
+#define SDMMC_CMD_STRM_MODE		BIT(11)
+#define SDMMC_CMD_DAT_WR		BIT(10)
+#define SDMMC_CMD_DAT_EXP		BIT(9)
+#define SDMMC_CMD_RESP_CRC		BIT(8)
+#define SDMMC_CMD_RESP_LONG		BIT(7)
+#define SDMMC_CMD_RESP_EXP		BIT(6)
+#define SDMMC_CMD_INDX(n)		((n) & 0x1F)
+/* Status register defines */
+#define SDMMC_GET_FCNT(x)		(((x)>>17) & 0x1FF)
+#define SDMMC_FIFO_SZ			32
+/* Internal DMAC interrupt defines */
+#define SDMMC_IDMAC_INT_AI		BIT(9)
+#define SDMMC_IDMAC_INT_NI		BIT(8)
+#define SDMMC_IDMAC_INT_CES		BIT(5)
+#define SDMMC_IDMAC_INT_DU		BIT(4)
+#define SDMMC_IDMAC_INT_FBE		BIT(2)
+#define SDMMC_IDMAC_INT_RI		BIT(1)
+#define SDMMC_IDMAC_INT_TI		BIT(0)
+/* Internal DMAC bus mode bits */
+#define SDMMC_IDMAC_ENABLE		BIT(7)
+#define SDMMC_IDMAC_FB			BIT(1)
+#define SDMMC_IDMAC_SWRESET		BIT(0)
+
+/* Register access macros */
+#define mci_readl(dev, reg)			\
+	__raw_readl(dev->regs + SDMMC_##reg)
+#define mci_writel(dev, reg, value)			\
+	__raw_writel((value), dev->regs + SDMMC_##reg)
+
+/* 16-bit FIFO access macros */
+#define mci_readw(dev, reg)			\
+	__raw_readw(dev->regs + SDMMC_##reg)
+#define mci_writew(dev, reg, value)			\
+	__raw_writew((value), dev->regs + SDMMC_##reg)
+
+/* 64-bit FIFO access macros */
+#ifdef readq
+#define mci_readq(dev, reg)			\
+	__raw_readq(dev->regs + SDMMC_##reg)
+#define mci_writeq(dev, reg, value)			\
+	__raw_writeq((value), dev->regs + SDMMC_##reg)
+#else
+/*
+ * Dummy readq implementation for architectures that don't define it.
+ *
+ * We would assume that none of these architectures would configure
+ * the IP block with a 64bit FIFO width, so this code will never be
+ * executed on those machines. Defining these macros here keeps the
+ * rest of the code free from ifdefs.
+ */
+#define mci_readq(dev, reg)			\
+	(*(volatile u64 __force *)(dev->regs + SDMMC_##reg))
+#define mci_writeq(dev, reg, value)			\
+	(*(volatile u64 __force *)(dev->regs + SDMMC_##reg) = value)
+#endif
+
+#endif /* _DW_MMC_H_ */
diff --git a/include/linux/mmc/dw_mmc.h b/include/linux/mmc/dw_mmc.h
new file mode 100644
index 000000000000..16b0261763ed
--- /dev/null
+++ b/include/linux/mmc/dw_mmc.h
@@ -0,0 +1,217 @@
+/*
+ * Synopsys DesignWare Multimedia Card Interface driver
+ *  (Based on NXP driver for lpc 31xx)
+ *
+ * Copyright (C) 2009 NXP Semiconductors
+ * Copyright (C) 2009, 2010 Imagination Technologies Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#ifndef _LINUX_MMC_DW_MMC_H_
+#define _LINUX_MMC_DW_MMC_H_
+
+#define MAX_MCI_SLOTS	2
+
+enum dw_mci_state {
+	STATE_IDLE = 0,
+	STATE_SENDING_CMD,
+	STATE_SENDING_DATA,
+	STATE_DATA_BUSY,
+	STATE_SENDING_STOP,
+	STATE_DATA_ERROR,
+};
+
+enum {
+	EVENT_CMD_COMPLETE = 0,
+	EVENT_XFER_COMPLETE,
+	EVENT_DATA_COMPLETE,
+	EVENT_DATA_ERROR,
+	EVENT_XFER_ERROR
+};
+
+struct mmc_data;
+
+/**
+ * struct dw_mci - MMC controller state shared between all slots
+ * @lock: Spinlock protecting the queue and associated data.
+ * @regs: Pointer to MMIO registers.
+ * @sg: Scatterlist entry currently being processed by PIO code, if any.
+ * @pio_offset: Offset into the current scatterlist entry.
+ * @cur_slot: The slot which is currently using the controller.
+ * @mrq: The request currently being processed on @cur_slot,
+ *	or NULL if the controller is idle.
+ * @cmd: The command currently being sent to the card, or NULL.
+ * @data: The data currently being transferred, or NULL if no data
+ *	transfer is in progress.
+ * @use_dma: Whether DMA channel is initialized or not.
+ * @sg_dma: Bus address of DMA buffer.
+ * @sg_cpu: Virtual address of DMA buffer.
+ * @dma_ops: Pointer to platform-specific DMA callbacks.
+ * @cmd_status: Snapshot of SR taken upon completion of the current
+ *	command. Only valid when EVENT_CMD_COMPLETE is pending.
+ * @data_status: Snapshot of SR taken upon completion of the current
+ *	data transfer. Only valid when EVENT_DATA_COMPLETE or
+ *	EVENT_DATA_ERROR is pending.
+ * @stop_cmdr: Value to be loaded into CMDR when the stop command is
+ *	to be sent.
+ * @dir_status: Direction of current transfer.
+ * @tasklet: Tasklet running the request state machine.
+ * @card_tasklet: Tasklet handling card detect.
+ * @pending_events: Bitmask of events flagged by the interrupt handler
+ *	to be processed by the tasklet.
+ * @completed_events: Bitmask of events which the state machine has
+ *	processed.
+ * @state: Tasklet state.
+ * @queue: List of slots waiting for access to the controller.
+ * @bus_hz: The rate of @mck in Hz. This forms the basis for MMC bus
+ *	rate and timeout calculations.
+ * @current_speed: Configured rate of the controller.
+ * @num_slots: Number of slots available.
+ * @pdev: Platform device associated with the MMC controller.
+ * @pdata: Platform data associated with the MMC controller.
+ * @slot: Slots sharing this MMC controller.
+ * @data_shift: log2 of FIFO item size.
+ * @push_data: Pointer to FIFO push function.
+ * @pull_data: Pointer to FIFO pull function.
+ * @quirks: Set of quirks that apply to specific versions of the IP.
+ *
+ * Locking
+ * =======
+ *
+ * @lock is a softirq-safe spinlock protecting @queue as well as
+ * @cur_slot, @mrq and @state. These must always be updated
+ * at the same time while holding @lock.
+ *
+ * The @mrq field of struct dw_mci_slot is also protected by @lock,
+ * and must always be written at the same time as the slot is added to
+ * @queue.
+ *
+ * @pending_events and @completed_events are accessed using atomic bit
+ * operations, so they don't need any locking.
+ *
+ * None of the fields touched by the interrupt handler need any
+ * locking. However, ordering is important: Before EVENT_DATA_ERROR or
+ * EVENT_DATA_COMPLETE is set in @pending_events, all data-related
+ * interrupts must be disabled and @data_status updated with a
+ * snapshot of SR. Similarly, before EVENT_CMD_COMPLETE is set, the
+ * CMDRDY interupt must be disabled and @cmd_status updated with a
+ * snapshot of SR, and before EVENT_XFER_COMPLETE can be set, the
+ * bytes_xfered field of @data must be written. This is ensured by
+ * using barriers.
+ */
+struct dw_mci {
+	spinlock_t		lock;
+	void __iomem		*regs;
+
+	struct scatterlist	*sg;
+	unsigned int		pio_offset;
+
+	struct dw_mci_slot	*cur_slot;
+	struct mmc_request	*mrq;
+	struct mmc_command	*cmd;
+	struct mmc_data		*data;
+
+	/* DMA interface members*/
+	int			use_dma;
+
+	dma_addr_t		sg_dma;
+	void			*sg_cpu;
+	struct dw_mci_dma_ops	*dma_ops;
+#ifdef CONFIG_MMC_DW_IDMAC
+	unsigned int		ring_size;
+#else
+	struct dw_mci_dma_data	*dma_data;
+#endif
+	u32			cmd_status;
+	u32			data_status;
+	u32			stop_cmdr;
+	u32			dir_status;
+	struct tasklet_struct	tasklet;
+	struct tasklet_struct	card_tasklet;
+	unsigned long		pending_events;
+	unsigned long		completed_events;
+	enum dw_mci_state	state;
+	struct list_head	queue;
+
+	u32			bus_hz;
+	u32			current_speed;
+	u32			num_slots;
+	struct platform_device	*pdev;
+	struct dw_mci_board	*pdata;
+	struct dw_mci_slot	*slot[MAX_MCI_SLOTS];
+
+	/* FIFO push and pull */
+	int			data_shift;
+	void (*push_data)(struct dw_mci *host, void *buf, int cnt);
+	void (*pull_data)(struct dw_mci *host, void *buf, int cnt);
+
+	/* Workaround flags */
+	u32			quirks;
+};
+
+/* DMA ops for Internal/External DMAC interface */
+struct dw_mci_dma_ops {
+	/* DMA Ops */
+	int (*init)(struct dw_mci *host);
+	void (*start)(struct dw_mci *host, unsigned int sg_len);
+	void (*complete)(struct dw_mci *host);
+	void (*stop)(struct dw_mci *host);
+	void (*cleanup)(struct dw_mci *host);
+	void (*exit)(struct dw_mci *host);
+};
+
+/* IP Quirks/flags. */
+/* No special quirks or flags to cater for */
+#define DW_MCI_QUIRK_NONE		0
+/* DTO fix for command transmission with IDMAC configured */
+#define DW_MCI_QUIRK_IDMAC_DTO		1
+/* delay needed between retries on some 2.11a implementations */
+#define DW_MCI_QUIRK_RETRY_DELAY	2
+/* High Speed Capable - Supports HS cards (upto 50MHz) */
+#define DW_MCI_QUIRK_HIGHSPEED		4
+
+
+struct dma_pdata;
+
+struct block_settings {
+	unsigned short	max_segs;	/* see blk_queue_max_segments */
+	unsigned int	max_blk_size;	/* maximum size of one mmc block */
+	unsigned int	max_blk_count;	/* maximum number of blocks in one req*/
+	unsigned int	max_req_size;	/* maximum number of bytes in one req*/
+	unsigned int	max_seg_size;	/* see blk_queue_max_segment_size */
+};
+
+/* Board platform data */
+struct dw_mci_board {
+	u32 num_slots;
+
+	u32 quirks; /* Workaround / Quirk flags */
+	unsigned int bus_hz; /* Bus speed */
+
+	/* delay in mS before detecting cards after interrupt */
+	u32 detect_delay_ms;
+
+	int (*init)(u32 slot_id, irq_handler_t , void *);
+	int (*get_ro)(u32 slot_id);
+	int (*get_cd)(u32 slot_id);
+	int (*get_ocr)(u32 slot_id);
+	int (*get_bus_wd)(u32 slot_id);
+	/*
+	 * Enable power to selected slot and set voltage to desired level.
+	 * Voltage levels are specified using MMC_VDD_xxx defines defined
+	 * in linux/mmc/host.h file.
+	 */
+	void (*setpower)(u32 slot_id, u32 volt);
+	void (*exit)(u32 slot_id);
+	void (*select_slot)(u32 slot_id);
+
+	struct dw_mci_dma_ops *dma_ops;
+	struct dma_pdata *data;
+	struct block_settings *blk_settings;
+};
+
+#endif /* _LINUX_MMC_DW_MMC_H_ */
-- 
cgit v1.2.3-71-gd317


From 93173054f2979de41b1912b19f0b57edfb35fcdc Mon Sep 17 00:00:00 2001
From: Guennadi Liakhovetski <g.liakhovetski@gmx.de>
Date: Wed, 22 Dec 2010 12:02:15 +0100
Subject: mmc: tmio_mmc: implement a bounce buffer for unaligned DMA

For example, with SDIO WLAN cards, some transfers happen with buffers at
odd addresses, whereas the SH-Mobile DMA engine requires even addresses
for SDHI. This patch extends the tmio driver with a bounce buffer, that
is used for single entry scatter-gather lists both for sending and
receiving. If we ever encounter unaligned transfers with multi-element
sg lists, this patch will have to be extended. For now it just falls
back to PIO in this and other unsupported cases.

Signed-off-by: Guennadi Liakhovetski <g.liakhovetski@gmx.de>
Acked-by: Samuel Ortiz <sameo@linux.intel.com>
Signed-off-by: Chris Ball <cjb@laptop.org>
---
 drivers/mmc/host/tmio_mmc.c | 89 ++++++++++++++++++++++++++++++++++++++++++---
 include/linux/mfd/tmio.h    |  1 +
 2 files changed, 84 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mmc/host/tmio_mmc.c b/drivers/mmc/host/tmio_mmc.c
index e04c032abb1c..595b7b3f160d 100644
--- a/drivers/mmc/host/tmio_mmc.c
+++ b/drivers/mmc/host/tmio_mmc.c
@@ -111,6 +111,8 @@
 		sd_ctrl_write32((host), CTL_STATUS, ~(i)); \
 	} while (0)
 
+/* This is arbitrary, just noone needed any higher alignment yet */
+#define MAX_ALIGN 4
 
 struct tmio_mmc_host {
 	void __iomem *ctl;
@@ -127,6 +129,7 @@ struct tmio_mmc_host {
 
 	/* pio related stuff */
 	struct scatterlist      *sg_ptr;
+	struct scatterlist      *sg_orig;
 	unsigned int            sg_len;
 	unsigned int            sg_off;
 
@@ -139,9 +142,13 @@ struct tmio_mmc_host {
 	struct tasklet_struct	dma_issue;
 #ifdef CONFIG_TMIO_MMC_DMA
 	unsigned int            dma_sglen;
+	u8			bounce_buf[PAGE_CACHE_SIZE] __attribute__((aligned(MAX_ALIGN)));
+	struct scatterlist	bounce_sg;
 #endif
 };
 
+static void tmio_check_bounce_buffer(struct tmio_mmc_host *host);
+
 static u16 sd_ctrl_read16(struct tmio_mmc_host *host, int addr)
 {
 	return readw(host->ctl + (addr << host->bus_shift));
@@ -180,6 +187,7 @@ static void tmio_mmc_init_sg(struct tmio_mmc_host *host, struct mmc_data *data)
 {
 	host->sg_len = data->sg_len;
 	host->sg_ptr = data->sg;
+	host->sg_orig = data->sg;
 	host->sg_off = 0;
 }
 
@@ -438,6 +446,8 @@ static void tmio_mmc_do_data_irq(struct tmio_mmc_host *host)
 	if (data->flags & MMC_DATA_READ) {
 		if (!host->chan_rx)
 			disable_mmc_irqs(host, TMIO_MASK_READOP);
+		else
+			tmio_check_bounce_buffer(host);
 		dev_dbg(&host->pdev->dev, "Complete Rx request %p\n",
 			host->mrq);
 	} else {
@@ -529,8 +539,7 @@ static void tmio_mmc_cmd_irq(struct tmio_mmc_host *host,
 			if (!host->chan_rx)
 				enable_mmc_irqs(host, TMIO_MASK_READOP);
 		} else {
-			struct dma_chan *chan = host->chan_tx;
-			if (!chan)
+			if (!host->chan_tx)
 				enable_mmc_irqs(host, TMIO_MASK_WRITEOP);
 			else
 				tasklet_schedule(&host->dma_issue);
@@ -612,6 +621,16 @@ out:
 }
 
 #ifdef CONFIG_TMIO_MMC_DMA
+static void tmio_check_bounce_buffer(struct tmio_mmc_host *host)
+{
+	if (host->sg_ptr == &host->bounce_sg) {
+		unsigned long flags;
+		void *sg_vaddr = tmio_mmc_kmap_atomic(host->sg_orig, &flags);
+		memcpy(sg_vaddr, host->bounce_buf, host->bounce_sg.length);
+		tmio_mmc_kunmap_atomic(sg_vaddr, &flags);
+	}
+}
+
 static void tmio_mmc_enable_dma(struct tmio_mmc_host *host, bool enable)
 {
 #if defined(CONFIG_SUPERH) || defined(CONFIG_ARCH_SHMOBILE)
@@ -634,11 +653,35 @@ static void tmio_dma_complete(void *arg)
 
 static void tmio_mmc_start_dma_rx(struct tmio_mmc_host *host)
 {
-	struct scatterlist *sg = host->sg_ptr;
+	struct scatterlist *sg = host->sg_ptr, *sg_tmp;
 	struct dma_async_tx_descriptor *desc = NULL;
 	struct dma_chan *chan = host->chan_rx;
+	struct mfd_cell	*cell = host->pdev->dev.platform_data;
+	struct tmio_mmc_data *pdata = cell->driver_data;
 	dma_cookie_t cookie;
-	int ret;
+	int ret, i;
+	bool aligned = true, multiple = true;
+	unsigned int align = (1 << pdata->dma->alignment_shift) - 1;
+
+	for_each_sg(sg, sg_tmp, host->sg_len, i) {
+		if (sg_tmp->offset & align)
+			aligned = false;
+		if (sg_tmp->length & align) {
+			multiple = false;
+			break;
+		}
+	}
+
+	if ((!aligned && (host->sg_len > 1 || sg->length > PAGE_CACHE_SIZE ||
+			  align >= MAX_ALIGN)) || !multiple)
+		goto pio;
+
+	/* The only sg element can be unaligned, use our bounce buffer then */
+	if (!aligned) {
+		sg_init_one(&host->bounce_sg, host->bounce_buf, sg->length);
+		host->sg_ptr = &host->bounce_sg;
+		sg = host->sg_ptr;
+	}
 
 	ret = dma_map_sg(&host->pdev->dev, sg, host->sg_len, DMA_FROM_DEVICE);
 	if (ret > 0) {
@@ -661,6 +704,7 @@ static void tmio_mmc_start_dma_rx(struct tmio_mmc_host *host)
 	dev_dbg(&host->pdev->dev, "%s(): mapped %d -> %d, cookie %d, rq %p\n",
 		__func__, host->sg_len, ret, cookie, host->mrq);
 
+pio:
 	if (!desc) {
 		/* DMA failed, fall back to PIO */
 		if (ret >= 0)
@@ -684,11 +728,39 @@ static void tmio_mmc_start_dma_rx(struct tmio_mmc_host *host)
 
 static void tmio_mmc_start_dma_tx(struct tmio_mmc_host *host)
 {
-	struct scatterlist *sg = host->sg_ptr;
+	struct scatterlist *sg = host->sg_ptr, *sg_tmp;
 	struct dma_async_tx_descriptor *desc = NULL;
 	struct dma_chan *chan = host->chan_tx;
+	struct mfd_cell	*cell = host->pdev->dev.platform_data;
+	struct tmio_mmc_data *pdata = cell->driver_data;
 	dma_cookie_t cookie;
-	int ret;
+	int ret, i;
+	bool aligned = true, multiple = true;
+	unsigned int align = (1 << pdata->dma->alignment_shift) - 1;
+
+	for_each_sg(sg, sg_tmp, host->sg_len, i) {
+		if (sg_tmp->offset & align)
+			aligned = false;
+		if (sg_tmp->length & align) {
+			multiple = false;
+			break;
+		}
+	}
+
+	if ((!aligned && (host->sg_len > 1 || sg->length > PAGE_CACHE_SIZE ||
+			  align >= MAX_ALIGN)) || !multiple)
+		goto pio;
+
+	/* The only sg element can be unaligned, use our bounce buffer then */
+	if (!aligned) {
+		unsigned long flags;
+		void *sg_vaddr = tmio_mmc_kmap_atomic(sg, &flags);
+		sg_init_one(&host->bounce_sg, host->bounce_buf, sg->length);
+		memcpy(host->bounce_buf, sg_vaddr, host->bounce_sg.length);
+		tmio_mmc_kunmap_atomic(sg_vaddr, &flags);
+		host->sg_ptr = &host->bounce_sg;
+		sg = host->sg_ptr;
+	}
 
 	ret = dma_map_sg(&host->pdev->dev, sg, host->sg_len, DMA_TO_DEVICE);
 	if (ret > 0) {
@@ -709,6 +781,7 @@ static void tmio_mmc_start_dma_tx(struct tmio_mmc_host *host)
 	dev_dbg(&host->pdev->dev, "%s(): mapped %d -> %d, cookie %d, rq %p\n",
 		__func__, host->sg_len, ret, cookie, host->mrq);
 
+pio:
 	if (!desc) {
 		/* DMA failed, fall back to PIO */
 		if (ret >= 0)
@@ -822,6 +895,10 @@ static void tmio_mmc_release_dma(struct tmio_mmc_host *host)
 	}
 }
 #else
+static void tmio_check_bounce_buffer(struct tmio_mmc_host *host)
+{
+}
+
 static void tmio_mmc_start_dma(struct tmio_mmc_host *host,
 			       struct mmc_data *data)
 {
diff --git a/include/linux/mfd/tmio.h b/include/linux/mfd/tmio.h
index 085f041197dc..dbfc053b1f95 100644
--- a/include/linux/mfd/tmio.h
+++ b/include/linux/mfd/tmio.h
@@ -66,6 +66,7 @@ void tmio_core_mmc_clk_div(void __iomem *cnf, int shift, int state);
 struct tmio_mmc_dma {
 	void *chan_priv_tx;
 	void *chan_priv_rx;
+	int alignment_shift;
 };
 
 /*
-- 
cgit v1.2.3-71-gd317


From 845ecd20239c28e97e766ff54078a58be19f3a91 Mon Sep 17 00:00:00 2001
From: Arnd Hannemann <arnd@arndnet.de>
Date: Tue, 28 Dec 2010 23:22:31 +0100
Subject: mmc: tmio_mmc: implement SDIO IRQ support

This patch implements SDIO IRQ support for mfds which
announce the TMIO_MMC_SDIO_IRQ flag for tmio_mmc.
If MMC_CAP_SDIO_IRQ is also set SDIO IRQ signalling is activated.
Tested with a b43-based wireless SDIO card and sh_mobile_sdhi.

Signed-off-by: Arnd Hannemann <arnd@arndnet.de>
Signed-off-by: Chris Ball <cjb@laptop.org>
---
 drivers/mmc/host/tmio_mmc.c | 79 +++++++++++++++++++++++++++++++++++++++++++++
 include/linux/mfd/tmio.h    |  4 +++
 2 files changed, 83 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/mmc/host/tmio_mmc.c b/drivers/mmc/host/tmio_mmc.c
index f442c8205b0a..81bed310ddcd 100644
--- a/drivers/mmc/host/tmio_mmc.c
+++ b/drivers/mmc/host/tmio_mmc.c
@@ -53,6 +53,8 @@
 #define CTL_SD_ERROR_DETAIL_STATUS 0x2c
 #define CTL_SD_DATA_PORT 0x30
 #define CTL_TRANSACTION_CTL 0x34
+#define CTL_SDIO_STATUS 0x36
+#define CTL_SDIO_IRQ_MASK 0x38
 #define CTL_RESET_SD 0xe0
 #define CTL_SDIO_REGS 0x100
 #define CTL_CLK_AND_WAIT_CTL 0x138
@@ -81,6 +83,12 @@
 #define TMIO_STAT_CMD_BUSY      0x40000000
 #define TMIO_STAT_ILL_ACCESS    0x80000000
 
+/* Definitions for values the CTRL_SDIO_STATUS register can take. */
+#define TMIO_SDIO_STAT_IOIRQ	0x0001
+#define TMIO_SDIO_STAT_EXPUB52	0x4000
+#define TMIO_SDIO_STAT_EXWT	0x8000
+#define TMIO_SDIO_MASK_ALL	0xc007
+
 /* Define some IRQ masks */
 /* This is the mask used at reset by the chip */
 #define TMIO_MASK_ALL           0x837f031d
@@ -122,6 +130,7 @@ struct tmio_mmc_host {
 	struct mmc_data         *data;
 	struct mmc_host         *mmc;
 	int                     irq;
+	unsigned int		sdio_irq_enabled;
 
 	/* Callbacks for clock / power control */
 	void (*set_pwr)(struct platform_device *host, int state);
@@ -249,6 +258,22 @@ void pr_debug_status(u32 status)
 #define pr_debug_status(s)  do { } while (0)
 #endif
 
+static void tmio_mmc_enable_sdio_irq(struct mmc_host *mmc, int enable)
+{
+	struct tmio_mmc_host *host = mmc_priv(mmc);
+
+	if (enable) {
+		host->sdio_irq_enabled = 1;
+		sd_ctrl_write16(host, CTL_TRANSACTION_CTL, 0x0001);
+		sd_ctrl_write16(host, CTL_SDIO_IRQ_MASK,
+			(TMIO_SDIO_MASK_ALL & ~TMIO_SDIO_STAT_IOIRQ));
+	} else {
+		sd_ctrl_write16(host, CTL_SDIO_IRQ_MASK, TMIO_SDIO_MASK_ALL);
+		sd_ctrl_write16(host, CTL_TRANSACTION_CTL, 0x0000);
+		host->sdio_irq_enabled = 0;
+	}
+}
+
 static void tmio_mmc_set_clock(struct tmio_mmc_host *host, int new_clock)
 {
 	u32 clk = 0, clock;
@@ -268,8 +293,23 @@ static void tmio_mmc_set_clock(struct tmio_mmc_host *host, int new_clock)
 
 static void tmio_mmc_clk_stop(struct tmio_mmc_host *host)
 {
+	struct mfd_cell *cell = host->pdev->dev.platform_data;
+	struct tmio_mmc_data *pdata = cell->driver_data;
+
+	/*
+	 * Testing on sh-mobile showed that SDIO IRQs are unmasked when
+	 * CTL_CLK_AND_WAIT_CTL gets written, so we have to disable the
+	 * device IRQ here and restore the SDIO IRQ mask before
+	 * re-enabling the device IRQ.
+	 */
+	if (pdata->flags & TMIO_MMC_SDIO_IRQ)
+		disable_irq(host->irq);
 	sd_ctrl_write16(host, CTL_CLK_AND_WAIT_CTL, 0x0000);
 	msleep(10);
+	if (pdata->flags & TMIO_MMC_SDIO_IRQ) {
+		tmio_mmc_enable_sdio_irq(host->mmc, host->sdio_irq_enabled);
+		enable_irq(host->irq);
+	}
 	sd_ctrl_write16(host, CTL_SD_CARD_CLK_CTL, ~0x0100 &
 		sd_ctrl_read16(host, CTL_SD_CARD_CLK_CTL));
 	msleep(10);
@@ -277,11 +317,21 @@ static void tmio_mmc_clk_stop(struct tmio_mmc_host *host)
 
 static void tmio_mmc_clk_start(struct tmio_mmc_host *host)
 {
+	struct mfd_cell *cell = host->pdev->dev.platform_data;
+	struct tmio_mmc_data *pdata = cell->driver_data;
+
 	sd_ctrl_write16(host, CTL_SD_CARD_CLK_CTL, 0x0100 |
 		sd_ctrl_read16(host, CTL_SD_CARD_CLK_CTL));
 	msleep(10);
+	/* see comment in tmio_mmc_clk_stop above */
+	if (pdata->flags & TMIO_MMC_SDIO_IRQ)
+		disable_irq(host->irq);
 	sd_ctrl_write16(host, CTL_CLK_AND_WAIT_CTL, 0x0100);
 	msleep(10);
+	if (pdata->flags & TMIO_MMC_SDIO_IRQ) {
+		tmio_mmc_enable_sdio_irq(host->mmc, host->sdio_irq_enabled);
+		enable_irq(host->irq);
+	}
 }
 
 static void reset(struct tmio_mmc_host *host)
@@ -554,7 +604,10 @@ static void tmio_mmc_cmd_irq(struct tmio_mmc_host *host,
 static irqreturn_t tmio_mmc_irq(int irq, void *devid)
 {
 	struct tmio_mmc_host *host = devid;
+	struct mfd_cell	*cell = host->pdev->dev.platform_data;
+	struct tmio_mmc_data *pdata = cell->driver_data;
 	unsigned int ireg, irq_mask, status;
+	unsigned int sdio_ireg, sdio_irq_mask, sdio_status;
 
 	pr_debug("MMC IRQ begin\n");
 
@@ -562,6 +615,29 @@ static irqreturn_t tmio_mmc_irq(int irq, void *devid)
 	irq_mask = sd_ctrl_read32(host, CTL_IRQ_MASK);
 	ireg = status & TMIO_MASK_IRQ & ~irq_mask;
 
+	sdio_ireg = 0;
+	if (!ireg && pdata->flags & TMIO_MMC_SDIO_IRQ) {
+		sdio_status = sd_ctrl_read16(host, CTL_SDIO_STATUS);
+		sdio_irq_mask = sd_ctrl_read16(host, CTL_SDIO_IRQ_MASK);
+		sdio_ireg = sdio_status & TMIO_SDIO_MASK_ALL & ~sdio_irq_mask;
+
+		sd_ctrl_write16(host, CTL_SDIO_STATUS, sdio_status & ~TMIO_SDIO_MASK_ALL);
+
+		if (sdio_ireg && !host->sdio_irq_enabled) {
+			pr_warning("tmio_mmc: Spurious SDIO IRQ, disabling! 0x%04x 0x%04x 0x%04x\n",
+				   sdio_status, sdio_irq_mask, sdio_ireg);
+			tmio_mmc_enable_sdio_irq(host->mmc, 0);
+			goto out;
+		}
+
+		if (host->mmc->caps & MMC_CAP_SDIO_IRQ &&
+			sdio_ireg & TMIO_SDIO_STAT_IOIRQ)
+			mmc_signal_sdio_irq(host->mmc);
+
+		if (sdio_ireg)
+			goto out;
+	}
+
 	pr_debug_status(status);
 	pr_debug_status(ireg);
 
@@ -1047,6 +1123,7 @@ static const struct mmc_host_ops tmio_mmc_ops = {
 	.set_ios	= tmio_mmc_set_ios,
 	.get_ro         = tmio_mmc_get_ro,
 	.get_cd		= tmio_mmc_get_cd,
+	.enable_sdio_irq = tmio_mmc_enable_sdio_irq,
 };
 
 #ifdef CONFIG_PM
@@ -1162,6 +1239,8 @@ static int __devinit tmio_mmc_probe(struct platform_device *dev)
 		goto cell_disable;
 
 	disable_mmc_irqs(host, TMIO_MASK_ALL);
+	if (pdata->flags & TMIO_MMC_SDIO_IRQ)
+		tmio_mmc_enable_sdio_irq(mmc, 0);
 
 	ret = request_irq(host->irq, tmio_mmc_irq, IRQF_DISABLED |
 		IRQF_TRIGGER_FALLING, dev_name(&dev->dev), host);
diff --git a/include/linux/mfd/tmio.h b/include/linux/mfd/tmio.h
index dbfc053b1f95..8e70310ee945 100644
--- a/include/linux/mfd/tmio.h
+++ b/include/linux/mfd/tmio.h
@@ -57,6 +57,10 @@
  * is configured in 4-bit mode.
  */
 #define TMIO_MMC_BLKSZ_2BYTES		(1 << 1)
+/*
+ * Some controllers can support SDIO IRQ signalling.
+ */
+#define TMIO_MMC_SDIO_IRQ		(1 << 2)
 
 int tmio_core_mmc_enable(void __iomem *cnf, int shift, unsigned long base);
 int tmio_core_mmc_resume(void __iomem *cnf, int shift, unsigned long base);
-- 
cgit v1.2.3-71-gd317


From 6099469805c24af14250e182bb9ca082b8a6b716 Mon Sep 17 00:00:00 2001
From: Roland Stigge <stigge@antcom.de>
Date: Sun, 9 Jan 2011 09:31:39 -0500
Subject: hwmon: Support for Dallas Semiconductor DS620

Driver for Dallas Semiconductor DS620 temperature sensor and thermostat

Signed-off-by: Roland Stigge <stigge@antcom.de>
Signed-off-by: Guenter Roeck <guenter.roeck@ericsson.com>
---
 Documentation/hwmon/ds620 |  34 +++++
 drivers/hwmon/Kconfig     |  10 ++
 drivers/hwmon/Makefile    |   1 +
 drivers/hwmon/ds620.c     | 337 ++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/i2c/ds620.h |  21 +++
 5 files changed, 403 insertions(+)
 create mode 100644 Documentation/hwmon/ds620
 create mode 100644 drivers/hwmon/ds620.c
 create mode 100644 include/linux/i2c/ds620.h

(limited to 'include/linux')

diff --git a/Documentation/hwmon/ds620 b/Documentation/hwmon/ds620
new file mode 100644
index 000000000000..1fbe3cd916cc
--- /dev/null
+++ b/Documentation/hwmon/ds620
@@ -0,0 +1,34 @@
+Kernel driver ds620
+===================
+
+Supported chips:
+  * Dallas Semiconductor DS620
+    Prefix: 'ds620'
+    Datasheet: Publicly available at the Dallas Semiconductor website
+               http://www.dalsemi.com/
+
+Authors:
+        Roland Stigge <stigge@antcom.de>
+        based on ds1621.c by
+        Christian W. Zuckschwerdt <zany@triq.net>
+
+Description
+-----------
+
+The DS620 is a (one instance) digital thermometer and thermostat. It has both
+high and low temperature limits which can be user defined (i.e.  programmed
+into non-volatile on-chip registers). Temperature range is -55 degree Celsius
+to +125. Between 0 and 70 degree Celsius, accuracy is 0.5 Kelvin. The value
+returned via sysfs displays post decimal positions.
+
+The thermostat function works as follows: When configured via platform_data
+(struct ds620_platform_data) .pomode == 0 (default), the thermostat output pin
+PO is always low. If .pomode == 1, the thermostat is in PO_LOW mode. I.e., the
+output pin PO becomes active when the temperature falls below temp1_min and
+stays active until the temperature goes above temp1_max.
+
+Likewise, with .pomode == 2, the thermostat is in PO_HIGH mode. I.e., the PO
+output pin becomes active when the temperature goes above temp1_max and stays
+active until the temperature falls below temp1_min.
+
+The PO output pin of the DS620 operates active-low.
diff --git a/drivers/hwmon/Kconfig b/drivers/hwmon/Kconfig
index 0f09567257fd..bdc13d28b1ea 100644
--- a/drivers/hwmon/Kconfig
+++ b/drivers/hwmon/Kconfig
@@ -274,6 +274,16 @@ config SENSORS_ATXP1
 	  This driver can also be built as a module.  If so, the module
 	  will be called atxp1.
 
+config SENSORS_DS620
+	tristate "Dallas Semiconductor DS620"
+	depends on I2C
+	help
+	  If you say yes here you get support for Dallas Semiconductor
+	  DS620 sensor chip.
+
+	  This driver can also be built as a module.  If so, the module
+	  will be called ds620.
+
 config SENSORS_DS1621
 	tristate "Dallas Semiconductor DS1621 and DS1625"
 	depends on I2C
diff --git a/drivers/hwmon/Makefile b/drivers/hwmon/Makefile
index 20e74086e681..dde02d99c238 100644
--- a/drivers/hwmon/Makefile
+++ b/drivers/hwmon/Makefile
@@ -41,6 +41,7 @@ obj-$(CONFIG_SENSORS_ATXP1)	+= atxp1.o
 obj-$(CONFIG_SENSORS_CORETEMP)	+= coretemp.o
 obj-$(CONFIG_SENSORS_PKGTEMP)	+= pkgtemp.o
 obj-$(CONFIG_SENSORS_DME1737)	+= dme1737.o
+obj-$(CONFIG_SENSORS_DS620)	+= ds620.o
 obj-$(CONFIG_SENSORS_DS1621)	+= ds1621.o
 obj-$(CONFIG_SENSORS_EMC1403)	+= emc1403.o
 obj-$(CONFIG_SENSORS_EMC2103)	+= emc2103.o
diff --git a/drivers/hwmon/ds620.c b/drivers/hwmon/ds620.c
new file mode 100644
index 000000000000..257957c69d92
--- /dev/null
+++ b/drivers/hwmon/ds620.c
@@ -0,0 +1,337 @@
+/*
+ *  ds620.c - Support for temperature sensor and thermostat DS620
+ *
+ *  Copyright (C) 2010, 2011 Roland Stigge <stigge@antcom.de>
+ *
+ *  based on ds1621.c by Christian W. Zuckschwerdt  <zany@triq.net>
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/jiffies.h>
+#include <linux/i2c.h>
+#include <linux/hwmon.h>
+#include <linux/hwmon-sysfs.h>
+#include <linux/err.h>
+#include <linux/mutex.h>
+#include <linux/sysfs.h>
+#include <linux/i2c/ds620.h>
+
+/*
+ * Many DS620 constants specified below
+ *  15   14   13   12   11   10   09    08
+ * |Done|NVB |THF |TLF |R1  |R0  |AUTOC|1SHOT|
+ *
+ *  07   06   05   04   03   02   01    00
+ * |PO2 |PO1 |A2  |A1  |A0  |    |     |     |
+ */
+#define DS620_REG_CONFIG_DONE		0x8000
+#define DS620_REG_CONFIG_NVB		0x4000
+#define DS620_REG_CONFIG_THF		0x2000
+#define DS620_REG_CONFIG_TLF		0x1000
+#define DS620_REG_CONFIG_R1		0x0800
+#define DS620_REG_CONFIG_R0		0x0400
+#define DS620_REG_CONFIG_AUTOC		0x0200
+#define DS620_REG_CONFIG_1SHOT		0x0100
+#define DS620_REG_CONFIG_PO2		0x0080
+#define DS620_REG_CONFIG_PO1		0x0040
+#define DS620_REG_CONFIG_A2		0x0020
+#define DS620_REG_CONFIG_A1		0x0010
+#define DS620_REG_CONFIG_A0		0x0008
+
+/* The DS620 registers */
+static const u8 DS620_REG_TEMP[3] = {
+	0xAA,			/* input, word, RO */
+	0xA2,			/* min, word, RW */
+	0xA0,			/* max, word, RW */
+};
+
+#define DS620_REG_CONF		0xAC	/* word, RW */
+#define DS620_COM_START		0x51	/* no data */
+#define DS620_COM_STOP		0x22	/* no data */
+
+/* Each client has this additional data */
+struct ds620_data {
+	struct device *hwmon_dev;
+	struct mutex update_lock;
+	char valid;		/* !=0 if following fields are valid */
+	unsigned long last_updated;	/* In jiffies */
+
+	u16 temp[3];		/* Register values, word */
+};
+
+/*
+ *  Temperature registers are word-sized.
+ *  DS620 uses a high-byte first convention, which is exactly opposite to
+ *  the SMBus standard.
+ */
+static int ds620_read_temp(struct i2c_client *client, u8 reg)
+{
+	int ret;
+
+	ret = i2c_smbus_read_word_data(client, reg);
+	if (ret < 0)
+		return ret;
+	return swab16(ret);
+}
+
+static int ds620_write_temp(struct i2c_client *client, u8 reg, u16 value)
+{
+	return i2c_smbus_write_word_data(client, reg, swab16(value));
+}
+
+static void ds620_init_client(struct i2c_client *client)
+{
+	struct ds620_platform_data *ds620_info = client->dev.platform_data;
+	u16 conf, new_conf;
+
+	new_conf = conf =
+	    swab16(i2c_smbus_read_word_data(client, DS620_REG_CONF));
+
+	/* switch to continuous conversion mode */
+	new_conf &= ~DS620_REG_CONFIG_1SHOT;
+	/* already high at power-on, but don't trust the BIOS! */
+	new_conf |= DS620_REG_CONFIG_PO2;
+	/* thermostat mode according to platform data */
+	if (ds620_info && ds620_info->pomode == 1)
+		new_conf &= ~DS620_REG_CONFIG_PO1; /* PO_LOW */
+	else if (ds620_info && ds620_info->pomode == 2)
+		new_conf |= DS620_REG_CONFIG_PO1; /* PO_HIGH */
+	else
+		new_conf &= ~DS620_REG_CONFIG_PO2; /* always low */
+	/* with highest precision */
+	new_conf |= DS620_REG_CONFIG_R1 | DS620_REG_CONFIG_R0;
+
+	if (conf != new_conf)
+		i2c_smbus_write_word_data(client, DS620_REG_CONF,
+					  swab16(new_conf));
+
+	/* start conversion */
+	i2c_smbus_write_byte(client, DS620_COM_START);
+}
+
+static struct ds620_data *ds620_update_client(struct device *dev)
+{
+	struct i2c_client *client = to_i2c_client(dev);
+	struct ds620_data *data = i2c_get_clientdata(client);
+	struct ds620_data *ret = data;
+
+	mutex_lock(&data->update_lock);
+
+	if (time_after(jiffies, data->last_updated + HZ + HZ / 2)
+	    || !data->valid) {
+		int i;
+		int res;
+
+		dev_dbg(&client->dev, "Starting ds620 update\n");
+
+		for (i = 0; i < ARRAY_SIZE(data->temp); i++) {
+			res = ds620_read_temp(client,
+					      DS620_REG_TEMP[i]);
+			if (res < 0) {
+				ret = ERR_PTR(res);
+				goto abort;
+			}
+
+			data->temp[i] = res;
+		}
+
+		data->last_updated = jiffies;
+		data->valid = 1;
+	}
+abort:
+	mutex_unlock(&data->update_lock);
+
+	return ret;
+}
+
+static ssize_t show_temp(struct device *dev, struct device_attribute *da,
+			 char *buf)
+{
+	struct sensor_device_attribute *attr = to_sensor_dev_attr(da);
+	struct ds620_data *data = ds620_update_client(dev);
+
+	if (IS_ERR(data))
+		return PTR_ERR(data);
+
+	return sprintf(buf, "%d\n", ((data->temp[attr->index] / 8) * 625) / 10);
+}
+
+static ssize_t set_temp(struct device *dev, struct device_attribute *da,
+			const char *buf, size_t count)
+{
+	int res;
+	long val;
+
+	struct sensor_device_attribute *attr = to_sensor_dev_attr(da);
+	struct i2c_client *client = to_i2c_client(dev);
+	struct ds620_data *data = i2c_get_clientdata(client);
+
+	res = strict_strtol(buf, 10, &val);
+
+	if (res)
+		return res;
+
+	val = (val * 10 / 625) * 8;
+
+	mutex_lock(&data->update_lock);
+	data->temp[attr->index] = val;
+	ds620_write_temp(client, DS620_REG_TEMP[attr->index],
+			 data->temp[attr->index]);
+	mutex_unlock(&data->update_lock);
+	return count;
+}
+
+static ssize_t show_alarm(struct device *dev, struct device_attribute *da,
+			  char *buf)
+{
+	struct sensor_device_attribute *attr = to_sensor_dev_attr(da);
+	struct ds620_data *data = ds620_update_client(dev);
+	struct i2c_client *client = to_i2c_client(dev);
+	u16 conf, new_conf;
+	int res;
+
+	if (IS_ERR(data))
+		return PTR_ERR(data);
+
+	/* reset alarms if necessary */
+	res = i2c_smbus_read_word_data(client, DS620_REG_CONF);
+	if (res < 0)
+		return res;
+
+	conf = swab16(res);
+	new_conf = conf;
+	new_conf &= ~attr->index;
+	if (conf != new_conf) {
+		res = i2c_smbus_write_word_data(client, DS620_REG_CONF,
+						swab16(new_conf));
+		if (res < 0)
+			return res;
+	}
+
+	return sprintf(buf, "%d\n", !!(conf & attr->index));
+}
+
+static SENSOR_DEVICE_ATTR(temp1_input, S_IRUGO, show_temp, NULL, 0);
+static SENSOR_DEVICE_ATTR(temp1_min, S_IWUSR | S_IRUGO, show_temp, set_temp, 1);
+static SENSOR_DEVICE_ATTR(temp1_max, S_IWUSR | S_IRUGO, show_temp, set_temp, 2);
+static SENSOR_DEVICE_ATTR(temp1_min_alarm, S_IRUGO, show_alarm, NULL,
+			  DS620_REG_CONFIG_TLF);
+static SENSOR_DEVICE_ATTR(temp1_max_alarm, S_IRUGO, show_alarm, NULL,
+			  DS620_REG_CONFIG_THF);
+
+static struct attribute *ds620_attributes[] = {
+	&sensor_dev_attr_temp1_input.dev_attr.attr,
+	&sensor_dev_attr_temp1_min.dev_attr.attr,
+	&sensor_dev_attr_temp1_max.dev_attr.attr,
+	&sensor_dev_attr_temp1_min_alarm.dev_attr.attr,
+	&sensor_dev_attr_temp1_max_alarm.dev_attr.attr,
+	NULL
+};
+
+static const struct attribute_group ds620_group = {
+	.attrs = ds620_attributes,
+};
+
+static int ds620_probe(struct i2c_client *client,
+		       const struct i2c_device_id *id)
+{
+	struct ds620_data *data;
+	int err;
+
+	data = kzalloc(sizeof(struct ds620_data), GFP_KERNEL);
+	if (!data) {
+		err = -ENOMEM;
+		goto exit;
+	}
+
+	i2c_set_clientdata(client, data);
+	mutex_init(&data->update_lock);
+
+	/* Initialize the DS620 chip */
+	ds620_init_client(client);
+
+	/* Register sysfs hooks */
+	err = sysfs_create_group(&client->dev.kobj, &ds620_group);
+	if (err)
+		goto exit_free;
+
+	data->hwmon_dev = hwmon_device_register(&client->dev);
+	if (IS_ERR(data->hwmon_dev)) {
+		err = PTR_ERR(data->hwmon_dev);
+		goto exit_remove_files;
+	}
+
+	dev_info(&client->dev, "temperature sensor found\n");
+
+	return 0;
+
+exit_remove_files:
+	sysfs_remove_group(&client->dev.kobj, &ds620_group);
+exit_free:
+	kfree(data);
+exit:
+	return err;
+}
+
+static int ds620_remove(struct i2c_client *client)
+{
+	struct ds620_data *data = i2c_get_clientdata(client);
+
+	hwmon_device_unregister(data->hwmon_dev);
+	sysfs_remove_group(&client->dev.kobj, &ds620_group);
+
+	kfree(data);
+
+	return 0;
+}
+
+static const struct i2c_device_id ds620_id[] = {
+	{"ds620", 0},
+	{}
+};
+
+MODULE_DEVICE_TABLE(i2c, ds620_id);
+
+/* This is the driver that will be inserted */
+static struct i2c_driver ds620_driver = {
+	.class = I2C_CLASS_HWMON,
+	.driver = {
+		   .name = "ds620",
+	},
+	.probe = ds620_probe,
+	.remove = ds620_remove,
+	.id_table = ds620_id,
+};
+
+static int __init ds620_init(void)
+{
+	return i2c_add_driver(&ds620_driver);
+}
+
+static void __exit ds620_exit(void)
+{
+	i2c_del_driver(&ds620_driver);
+}
+
+MODULE_AUTHOR("Roland Stigge <stigge@antcom.de>");
+MODULE_DESCRIPTION("DS620 driver");
+MODULE_LICENSE("GPL");
+
+module_init(ds620_init);
+module_exit(ds620_exit);
diff --git a/include/linux/i2c/ds620.h b/include/linux/i2c/ds620.h
new file mode 100644
index 000000000000..736bb87ac0fc
--- /dev/null
+++ b/include/linux/i2c/ds620.h
@@ -0,0 +1,21 @@
+#ifndef _LINUX_DS620_H
+#define _LINUX_DS620_H
+
+#include <linux/types.h>
+#include <linux/i2c.h>
+
+/* platform data for the DS620 temperature sensor and thermostat */
+
+struct ds620_platform_data {
+	/*
+	 *  Thermostat output pin PO mode:
+	 *  0 = always low (default)
+	 *  1 = PO_LOW
+	 *  2 = PO_HIGH
+	 *
+	 * (see Documentation/hwmon/ds620)
+	 */
+	int pomode;
+};
+
+#endif /* _LINUX_DS620_H */
-- 
cgit v1.2.3-71-gd317


From 49da97dcb6b00a6869bbc3fa6ec7fdfd8a6e41a3 Mon Sep 17 00:00:00 2001
From: Shawn Guo <shawn.guo@freescale.com>
Date: Wed, 5 Jan 2011 21:13:11 +0000
Subject: net/fec: add mac field into platform data and consolidate fec_get_mac

Add mac field into fec_platform_data and consolidate function
fec_get_mac to get mac address in following order.

 1) module parameter via kernel command line fec.macaddr=0x00,0x04,...
 2) from flash in case of CONFIG_M5272 or fec_platform_data mac
    field for others, which typically have mac stored in fuse
 3) fec mac address registers set by bootloader

Signed-off-by: Shawn Guo <shawn.guo@freescale.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/fec.c   | 81 +++++++++++++++++++++++++----------------------------
 include/linux/fec.h |  3 ++
 2 files changed, 41 insertions(+), 43 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/fec.c b/drivers/net/fec.c
index 47f6b3b91423..47a3c7b499e9 100644
--- a/drivers/net/fec.c
+++ b/drivers/net/fec.c
@@ -59,15 +59,11 @@
 #define FEC_ALIGNMENT	0x3
 #endif
 
-/*
- * Define the fixed address of the FEC hardware.
- */
-#if defined(CONFIG_M5272)
-
-static unsigned char	fec_mac_default[] = {
-	0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-};
+static unsigned char macaddr[ETH_ALEN];
+module_param_array(macaddr, byte, NULL, 0);
+MODULE_PARM_DESC(macaddr, "FEC Ethernet MAC address");
 
+#if defined(CONFIG_M5272)
 /*
  * Some hardware gets it MAC address out of local flash memory.
  * if this is non-zero then assume it is the address to get MAC from.
@@ -537,37 +533,50 @@ rx_processing_done:
 }
 
 /* ------------------------------------------------------------------------- */
-#ifdef CONFIG_M5272
 static void __inline__ fec_get_mac(struct net_device *dev)
 {
 	struct fec_enet_private *fep = netdev_priv(dev);
+	struct fec_platform_data *pdata = fep->pdev->dev.platform_data;
 	unsigned char *iap, tmpaddr[ETH_ALEN];
 
-	if (FEC_FLASHMAC) {
-		/*
-		 * Get MAC address from FLASH.
-		 * If it is all 1's or 0's, use the default.
-		 */
-		iap = (unsigned char *)FEC_FLASHMAC;
-		if ((iap[0] == 0) && (iap[1] == 0) && (iap[2] == 0) &&
-		    (iap[3] == 0) && (iap[4] == 0) && (iap[5] == 0))
-			iap = fec_mac_default;
-		if ((iap[0] == 0xff) && (iap[1] == 0xff) && (iap[2] == 0xff) &&
-		    (iap[3] == 0xff) && (iap[4] == 0xff) && (iap[5] == 0xff))
-			iap = fec_mac_default;
-	} else {
-		*((unsigned long *) &tmpaddr[0]) = readl(fep->hwp + FEC_ADDR_LOW);
-		*((unsigned short *) &tmpaddr[4]) = (readl(fep->hwp + FEC_ADDR_HIGH) >> 16);
+	/*
+	 * try to get mac address in following order:
+	 *
+	 * 1) module parameter via kernel command line in form
+	 *    fec.macaddr=0x00,0x04,0x9f,0x01,0x30,0xe0
+	 */
+	iap = macaddr;
+
+	/*
+	 * 2) from flash or fuse (via platform data)
+	 */
+	if (!is_valid_ether_addr(iap)) {
+#ifdef CONFIG_M5272
+		if (FEC_FLASHMAC)
+			iap = (unsigned char *)FEC_FLASHMAC;
+#else
+		if (pdata)
+			memcpy(iap, pdata->mac, ETH_ALEN);
+#endif
+	}
+
+	/*
+	 * 3) FEC mac registers set by bootloader
+	 */
+	if (!is_valid_ether_addr(iap)) {
+		*((unsigned long *) &tmpaddr[0]) =
+			be32_to_cpu(readl(fep->hwp + FEC_ADDR_LOW));
+		*((unsigned short *) &tmpaddr[4]) =
+			be16_to_cpu(readl(fep->hwp + FEC_ADDR_HIGH) >> 16);
 		iap = &tmpaddr[0];
 	}
 
 	memcpy(dev->dev_addr, iap, ETH_ALEN);
 
-	/* Adjust MAC if using default MAC address */
-	if (iap == fec_mac_default)
-		 dev->dev_addr[ETH_ALEN-1] = fec_mac_default[ETH_ALEN-1] + fep->pdev->id;
+	/* Adjust MAC if using macaddr */
+	if (iap == macaddr)
+		 dev->dev_addr[ETH_ALEN-1] = macaddr[ETH_ALEN-1] + fep->pdev->id;
 }
-#endif
 
 /* ------------------------------------------------------------------------- */
 
@@ -1087,22 +1096,8 @@ static int fec_enet_init(struct net_device *dev)
 	fep->hwp = (void __iomem *)dev->base_addr;
 	fep->netdev = dev;
 
-	/* Set the Ethernet address */
-#ifdef CONFIG_M5272
+	/* Get the Ethernet address */
 	fec_get_mac(dev);
-#else
-	{
-		unsigned long l;
-		l = readl(fep->hwp + FEC_ADDR_LOW);
-		dev->dev_addr[0] = (unsigned char)((l & 0xFF000000) >> 24);
-		dev->dev_addr[1] = (unsigned char)((l & 0x00FF0000) >> 16);
-		dev->dev_addr[2] = (unsigned char)((l & 0x0000FF00) >> 8);
-		dev->dev_addr[3] = (unsigned char)((l & 0x000000FF) >> 0);
-		l = readl(fep->hwp + FEC_ADDR_HIGH);
-		dev->dev_addr[4] = (unsigned char)((l & 0xFF000000) >> 24);
-		dev->dev_addr[5] = (unsigned char)((l & 0x00FF0000) >> 16);
-	}
-#endif
 
 	/* Set receive and transmit descriptor base. */
 	fep->rx_bd_base = cbd_base;
diff --git a/include/linux/fec.h b/include/linux/fec.h
index 5d3523d8dd0c..bcff455d1d53 100644
--- a/include/linux/fec.h
+++ b/include/linux/fec.h
@@ -3,6 +3,8 @@
  * Copyright (c) 2009 Orex Computed Radiography
  *   Baruch Siach <baruch@tkos.co.il>
  *
+ * Copyright (C) 2010 Freescale Semiconductor, Inc.
+ *
  * Header file for the FEC platform data
  *
  * This program is free software; you can redistribute it and/or modify
@@ -16,6 +18,7 @@
 
 struct fec_platform_data {
 	phy_interface_t phy;
+	unsigned char mac[ETH_ALEN];
 };
 
 #endif
-- 
cgit v1.2.3-71-gd317


From f01a5236bd4b140198fbcc550f085e8361fd73fa Mon Sep 17 00:00:00 2001
From: Jesse Gross <jesse@nicira.com>
Date: Sun, 9 Jan 2011 06:23:31 +0000
Subject: net offloading: Generalize netif_get_vlan_features().
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

netif_get_vlan_features() is currently only used by netif_needs_gso(),
so it only concerns itself with GSO features.  However, several other
places also should take into account the contents of the packet when
deciding whether to offload to hardware.  This generalizes the function
to return features about all of the various forms of offloading.  Since
offloads tend to be linked together, this avoids duplicating the logic
in each location (i.e. the scatter/gather code also needs the checksum
logic).

Suggested-by: Michał Mirosław <mirqus@gmail.com>
Signed-off-by: Jesse Gross <jesse@nicira.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h |  4 ++--
 net/core/dev.c            | 35 +++++++++++++++++++++++++++--------
 2 files changed, 29 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 0f6b1c965815..d4dac09a5ad2 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -2303,7 +2303,7 @@ unsigned long netdev_fix_features(unsigned long features, const char *name);
 void netif_stacked_transfer_operstate(const struct net_device *rootdev,
 					struct net_device *dev);
 
-int netif_get_vlan_features(struct sk_buff *skb, struct net_device *dev);
+int netif_skb_features(struct sk_buff *skb);
 
 static inline int net_gso_ok(int features, int gso_type)
 {
@@ -2320,7 +2320,7 @@ static inline int skb_gso_ok(struct sk_buff *skb, int features)
 static inline int netif_needs_gso(struct net_device *dev, struct sk_buff *skb)
 {
 	if (skb_is_gso(skb)) {
-		int features = netif_get_vlan_features(skb, dev);
+		int features = netif_skb_features(skb);
 
 		return (!skb_gso_ok(skb, features) ||
 			unlikely(skb->ip_summed != CHECKSUM_PARTIAL));
diff --git a/net/core/dev.c b/net/core/dev.c
index d8befd06da04..a51dfd7b56fb 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2017,22 +2017,41 @@ static inline void skb_orphan_try(struct sk_buff *skb)
 	}
 }
 
-int netif_get_vlan_features(struct sk_buff *skb, struct net_device *dev)
+static int harmonize_features(struct sk_buff *skb, __be16 protocol, int features)
+{
+	if (!can_checksum_protocol(protocol, features)) {
+		features &= ~NETIF_F_ALL_CSUM;
+		features &= ~NETIF_F_SG;
+	} else if (illegal_highdma(skb->dev, skb)) {
+		features &= ~NETIF_F_SG;
+	}
+
+	return features;
+}
+
+int netif_skb_features(struct sk_buff *skb)
 {
 	__be16 protocol = skb->protocol;
+	int features = skb->dev->features;
 
 	if (protocol == htons(ETH_P_8021Q)) {
 		struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data;
 		protocol = veh->h_vlan_encapsulated_proto;
-	} else if (!skb->vlan_tci)
-		return dev->features;
+	} else if (!vlan_tx_tag_present(skb)) {
+		return harmonize_features(skb, protocol, features);
+	}
 
-	if (protocol != htons(ETH_P_8021Q))
-		return dev->features & dev->vlan_features;
-	else
-		return 0;
+	features &= skb->dev->vlan_features;
+
+	if (protocol != htons(ETH_P_8021Q)) {
+		return harmonize_features(skb, protocol, features);
+	} else {
+		features &= NETIF_F_SG | NETIF_F_HIGHDMA | NETIF_F_FRAGLIST |
+				NETIF_F_GEN_CSUM;
+		return harmonize_features(skb, protocol, features);
+	}
 }
-EXPORT_SYMBOL(netif_get_vlan_features);
+EXPORT_SYMBOL(netif_skb_features);
 
 /*
  * Returns true if either:
-- 
cgit v1.2.3-71-gd317


From fc741216db156994c554ac31c1151fe0e00d8f0e Mon Sep 17 00:00:00 2001
From: Jesse Gross <jesse@nicira.com>
Date: Sun, 9 Jan 2011 06:23:32 +0000
Subject: net offloading: Pass features into netif_needs_gso().

Now that there is a single function that can compute the device
features relevant to a packet, we don't want to run it for each
offload.  This converts netif_needs_gso() to take the features
of the device, rather than computing them itself.

Signed-off-by: Jesse Gross <jesse@nicira.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/xen-netfront.c |  2 +-
 include/linux/netdevice.h  | 12 +++---------
 net/core/dev.c             |  8 ++++++--
 3 files changed, 10 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c
index cdbeec9f83ea..546de5749824 100644
--- a/drivers/net/xen-netfront.c
+++ b/drivers/net/xen-netfront.c
@@ -488,7 +488,7 @@ static int xennet_start_xmit(struct sk_buff *skb, struct net_device *dev)
 
 	if (unlikely(!netif_carrier_ok(dev) ||
 		     (frags > 1 && !xennet_can_sg(dev)) ||
-		     netif_needs_gso(dev, skb))) {
+		     netif_needs_gso(skb, netif_skb_features(skb)))) {
 		spin_unlock_irq(&np->tx_lock);
 		goto drop;
 	}
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index d4dac09a5ad2..de2bfe6da359 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -2317,16 +2317,10 @@ static inline int skb_gso_ok(struct sk_buff *skb, int features)
 	       (!skb_has_frag_list(skb) || (features & NETIF_F_FRAGLIST));
 }
 
-static inline int netif_needs_gso(struct net_device *dev, struct sk_buff *skb)
+static inline int netif_needs_gso(struct sk_buff *skb, int features)
 {
-	if (skb_is_gso(skb)) {
-		int features = netif_skb_features(skb);
-
-		return (!skb_gso_ok(skb, features) ||
-			unlikely(skb->ip_summed != CHECKSUM_PARTIAL));
-	}
-
-	return 0;
+	return skb_is_gso(skb) && (!skb_gso_ok(skb, features) ||
+		unlikely(skb->ip_summed != CHECKSUM_PARTIAL));
 }
 
 static inline void netif_set_gso_max_size(struct net_device *dev,
diff --git a/net/core/dev.c b/net/core/dev.c
index a51dfd7b56fb..1444ed3861a0 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2086,6 +2086,8 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
 	int rc = NETDEV_TX_OK;
 
 	if (likely(!skb->next)) {
+		int features;
+
 		/*
 		 * If device doesnt need skb->dst, release it right now while
 		 * its hot in this cpu cache
@@ -2098,8 +2100,10 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
 
 		skb_orphan_try(skb);
 
+		features = netif_skb_features(skb);
+
 		if (vlan_tx_tag_present(skb) &&
-		    !(dev->features & NETIF_F_HW_VLAN_TX)) {
+		    !(features & NETIF_F_HW_VLAN_TX)) {
 			skb = __vlan_put_tag(skb, vlan_tx_tag_get(skb));
 			if (unlikely(!skb))
 				goto out;
@@ -2107,7 +2111,7 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
 			skb->vlan_tci = 0;
 		}
 
-		if (netif_needs_gso(dev, skb)) {
+		if (netif_needs_gso(skb, features)) {
 			if (unlikely(dev_gso_segment(skb)))
 				goto out_kfree_skb;
 			if (skb->next)
-- 
cgit v1.2.3-71-gd317


From 2a8c0c68487a68441e701f493f43fd547d87c8df Mon Sep 17 00:00:00 2001
From: Randy Dunlap <randy.dunlap@oracle.com>
Date: Sat, 8 Jan 2011 19:37:20 -0800
Subject: fs: fix dcache.h kernel-doc notation

Fix new kernel-doc notation warning in dcache.h:

  Warning(include/linux/dcache.h:316): Excess function parameter 'Returns' description in '__d_rcu_to_refcount'

Signed-off-by: Randy Dunlap <randy.dunlap@oracle.com>
Cc:	Nick Piggin <npiggin@kernel.dk>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/dcache.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/dcache.h b/include/linux/dcache.h
index bd07758943e0..59fcd24b1468 100644
--- a/include/linux/dcache.h
+++ b/include/linux/dcache.h
@@ -307,7 +307,7 @@ extern struct dentry *__d_lookup_rcu(struct dentry *parent, struct qstr *name,
  * __d_rcu_to_refcount - take a refcount on dentry if sequence check is ok
  * @dentry: dentry to take a ref on
  * @seq: seqcount to verify against
- * @Returns: 0 on failure, else 1.
+ * Returns: 0 on failure, else 1.
  *
  * __d_rcu_to_refcount operates on a dentry,seq pair that was returned
  * by __d_lookup_rcu, to get a reference on an rcu-walk dentry.
-- 
cgit v1.2.3-71-gd317


From 175881db8916a5f5cdf920d32214caef588870fd Mon Sep 17 00:00:00 2001
From: Randy Dunlap <randy.dunlap@oracle.com>
Date: Sat, 8 Jan 2011 19:38:02 -0800
Subject: hrtimer.h: fix kernel-doc warning

Fix new kernel-doc notation warning in hrtimer.h:

  Warning(include/linux/hrtimer.h:150): Excess struct/union/enum/typedef member 'first' description in 'hrtimer_clock_base'

Signed-off-by: Randy Dunlap <randy.dunlap@oracle.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/hrtimer.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h
index 330586ffffbb..f376ddc64c4d 100644
--- a/include/linux/hrtimer.h
+++ b/include/linux/hrtimer.h
@@ -131,7 +131,6 @@ struct hrtimer_sleeper {
  * @index:		clock type index for per_cpu support when moving a
  *			timer to a base on another cpu.
  * @active:		red black tree root node for the active timers
- * @first:		pointer to the timer node which expires first
  * @resolution:		the resolution of the clock, in nanoseconds
  * @get_time:		function to retrieve the current time of the clock
  * @softirq_time:	the time when running the hrtimer queue in the softirq
-- 
cgit v1.2.3-71-gd317


From 0dc1488527a3c01383a50e5df7187219567586a3 Mon Sep 17 00:00:00 2001
From: Randy Dunlap <randy.dunlap@oracle.com>
Date: Sat, 8 Jan 2011 19:40:33 -0800
Subject: pipe_fs_i.h: fix kernel-doc warning

Fix kernel-doc notation warnings in pipe_fs_i.h:

  Warning(include/linux/pipe_fs_i.h:58): No description found for parameter 'buffers'

Signed-off-by: Randy Dunlap <randy.dunlap@oracle.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/pipe_fs_i.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/pipe_fs_i.h b/include/linux/pipe_fs_i.h
index bb27d7ec2fb9..77257c92155a 100644
--- a/include/linux/pipe_fs_i.h
+++ b/include/linux/pipe_fs_i.h
@@ -30,6 +30,7 @@ struct pipe_buffer {
  *	struct pipe_inode_info - a linux kernel pipe
  *	@wait: reader/writer wait point in case of empty/full pipe
  *	@nrbufs: the number of non-empty pipe buffers in this pipe
+ *	@buffers: total number of buffers (should be a power of 2)
  *	@curbuf: the current pipe buffer entry
  *	@tmp_page: cached released page
  *	@readers: number of current readers of this pipe
-- 
cgit v1.2.3-71-gd317


From 37721e1b0cf98cb65895f234d8c500d270546529 Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Mon, 10 Jan 2011 08:17:10 +0200
Subject: headers: path.h redux

Remove path.h from sched.h and other files.

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/audit.h            | 1 +
 include/linux/dcookies.h         | 2 +-
 include/linux/sched.h            | 1 -
 security/apparmor/include/file.h | 3 +--
 security/selinux/include/avc.h   | 1 -
 5 files changed, 3 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/audit.h b/include/linux/audit.h
index 8b5c0620abf9..359df0487690 100644
--- a/include/linux/audit.h
+++ b/include/linux/audit.h
@@ -372,6 +372,7 @@ struct audit_buffer;
 struct audit_context;
 struct inode;
 struct netlink_skb_parms;
+struct path;
 struct linux_binprm;
 struct mq_attr;
 struct mqstat;
diff --git a/include/linux/dcookies.h b/include/linux/dcookies.h
index 24c806f12a6c..5ac3bdd5cee6 100644
--- a/include/linux/dcookies.h
+++ b/include/linux/dcookies.h
@@ -13,10 +13,10 @@
 #ifdef CONFIG_PROFILING
  
 #include <linux/dcache.h>
-#include <linux/path.h>
 #include <linux/types.h>
  
 struct dcookie_user;
+struct path;
  
 /**
  * dcookie_register - register a user of dcookies
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 341acbbc434a..c118a7f203aa 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -70,7 +70,6 @@ struct sched_param {
 #include <linux/smp.h>
 #include <linux/sem.h>
 #include <linux/signal.h>
-#include <linux/path.h>
 #include <linux/compiler.h>
 #include <linux/completion.h>
 #include <linux/pid.h>
diff --git a/security/apparmor/include/file.h b/security/apparmor/include/file.h
index be36feabb16a..ab8c6d87f758 100644
--- a/security/apparmor/include/file.h
+++ b/security/apparmor/include/file.h
@@ -15,12 +15,11 @@
 #ifndef __AA_FILE_H
 #define __AA_FILE_H
 
-#include <linux/path.h>
-
 #include "domain.h"
 #include "match.h"
 
 struct aa_profile;
+struct path;
 
 /*
  * We use MAY_EXEC, MAY_WRITE, MAY_READ, MAY_APPEND and the following flags
diff --git a/security/selinux/include/avc.h b/security/selinux/include/avc.h
index e94e82f73818..5615081b73ec 100644
--- a/security/selinux/include/avc.h
+++ b/security/selinux/include/avc.h
@@ -15,7 +15,6 @@
 #include <linux/audit.h>
 #include <linux/lsm_audit.h>
 #include <linux/in6.h>
-#include <linux/path.h>
 #include <asm/system.h>
 #include "flask.h"
 #include "av_permissions.h"
-- 
cgit v1.2.3-71-gd317


From 57cc7215b70856dc6bae8e55b00ecd7b1d7429b1 Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Mon, 10 Jan 2011 08:18:25 +0200
Subject: headers: kobject.h redux

Remove kobject.h from files which don't need it, notably,
sched.h and fs.h.

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/arm/plat-s3c24xx/cpu-freq.c           | 1 -
 arch/powerpc/platforms/pseries/eeh_sysfs.c | 1 -
 drivers/char/snsc.h                        | 1 -
 drivers/net/bonding/bonding.h              | 1 -
 drivers/pci/hotplug/acpiphp.h              | 1 -
 drivers/pci/hotplug/rpaphp_slot.c          | 1 -
 drivers/s390/char/tape_class.h             | 1 -
 drivers/sh/clk/core.c                      | 1 -
 drivers/usb/musb/musb_debugfs.c            | 1 -
 fs/gfs2/incore.h                           | 1 +
 fs/nilfs2/super.c                          | 1 -
 fs/sysfs/inode.c                           | 1 +
 fs/sysfs/sysfs.h                           | 1 +
 include/linux/firmware-map.h               | 1 -
 include/linux/fs.h                         | 2 +-
 include/linux/sched.h                      | 1 -
 include/linux/sunrpc/cache.h               | 1 +
 security/apparmor/include/match.h          | 1 +
 18 files changed, 6 insertions(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm/plat-s3c24xx/cpu-freq.c b/arch/arm/plat-s3c24xx/cpu-freq.c
index 1ecc15bfe9d4..25a8fc7f512e 100644
--- a/arch/arm/plat-s3c24xx/cpu-freq.c
+++ b/arch/arm/plat-s3c24xx/cpu-freq.c
@@ -21,7 +21,6 @@
 #include <linux/err.h>
 #include <linux/io.h>
 #include <linux/sysdev.h>
-#include <linux/kobject.h>
 #include <linux/sysfs.h>
 #include <linux/slab.h>
 
diff --git a/arch/powerpc/platforms/pseries/eeh_sysfs.c b/arch/powerpc/platforms/pseries/eeh_sysfs.c
index 15e13b568904..23982c7892d2 100644
--- a/arch/powerpc/platforms/pseries/eeh_sysfs.c
+++ b/arch/powerpc/platforms/pseries/eeh_sysfs.c
@@ -25,7 +25,6 @@
 #include <linux/pci.h>
 #include <asm/ppc-pci.h>
 #include <asm/pci-bridge.h>
-#include <linux/kobject.h>
 
 /**
  * EEH_SHOW_ATTR -- create sysfs entry for eeh statistic
diff --git a/drivers/char/snsc.h b/drivers/char/snsc.h
index 4be62eda9fbc..e8c52c882b21 100644
--- a/drivers/char/snsc.h
+++ b/drivers/char/snsc.h
@@ -19,7 +19,6 @@
 #include <linux/types.h>
 #include <linux/spinlock.h>
 #include <linux/wait.h>
-#include <linux/kobject.h>
 #include <linux/fs.h>
 #include <linux/cdev.h>
 #include <linux/semaphore.h>
diff --git a/drivers/net/bonding/bonding.h b/drivers/net/bonding/bonding.h
index 4da384cc7603..31fe980e4e28 100644
--- a/drivers/net/bonding/bonding.h
+++ b/drivers/net/bonding/bonding.h
@@ -18,7 +18,6 @@
 #include <linux/timer.h>
 #include <linux/proc_fs.h>
 #include <linux/if_bonding.h>
-#include <linux/kobject.h>
 #include <linux/cpumask.h>
 #include <linux/in6.h>
 #include "bond_3ad.h"
diff --git a/drivers/pci/hotplug/acpiphp.h b/drivers/pci/hotplug/acpiphp.h
index bab52047baa8..7722108e78df 100644
--- a/drivers/pci/hotplug/acpiphp.h
+++ b/drivers/pci/hotplug/acpiphp.h
@@ -36,7 +36,6 @@
 #define _ACPIPHP_H
 
 #include <linux/acpi.h>
-#include <linux/kobject.h>
 #include <linux/mutex.h>
 #include <linux/pci_hotplug.h>
 
diff --git a/drivers/pci/hotplug/rpaphp_slot.c b/drivers/pci/hotplug/rpaphp_slot.c
index 2ea9cf1a8d02..b283bbea6d24 100644
--- a/drivers/pci/hotplug/rpaphp_slot.c
+++ b/drivers/pci/hotplug/rpaphp_slot.c
@@ -24,7 +24,6 @@
  */
 #include <linux/kernel.h>
 #include <linux/module.h>
-#include <linux/kobject.h>
 #include <linux/sysfs.h>
 #include <linux/pci.h>
 #include <linux/string.h>
diff --git a/drivers/s390/char/tape_class.h b/drivers/s390/char/tape_class.h
index 707b7f48c232..9e32780c317f 100644
--- a/drivers/s390/char/tape_class.h
+++ b/drivers/s390/char/tape_class.h
@@ -14,7 +14,6 @@
 #include <linux/module.h>
 #include <linux/fs.h>
 #include <linux/major.h>
-#include <linux/kobject.h>
 #include <linux/kobj_map.h>
 #include <linux/cdev.h>
 
diff --git a/drivers/sh/clk/core.c b/drivers/sh/clk/core.c
index 3f5e387ed564..5f63c3b83828 100644
--- a/drivers/sh/clk/core.c
+++ b/drivers/sh/clk/core.c
@@ -21,7 +21,6 @@
 #include <linux/module.h>
 #include <linux/mutex.h>
 #include <linux/list.h>
-#include <linux/kobject.h>
 #include <linux/sysdev.h>
 #include <linux/seq_file.h>
 #include <linux/err.h>
diff --git a/drivers/usb/musb/musb_debugfs.c b/drivers/usb/musb/musb_debugfs.c
index 9e8639d4e862..b0176e4569e0 100644
--- a/drivers/usb/musb/musb_debugfs.c
+++ b/drivers/usb/musb/musb_debugfs.c
@@ -36,7 +36,6 @@
 #include <linux/sched.h>
 #include <linux/init.h>
 #include <linux/list.h>
-#include <linux/kobject.h>
 #include <linux/platform_device.h>
 #include <linux/io.h>
 #include <linux/debugfs.h>
diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index 8d3d2b4a0a7d..a79790c06275 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -11,6 +11,7 @@
 #define __INCORE_DOT_H__
 
 #include <linux/fs.h>
+#include <linux/kobject.h>
 #include <linux/workqueue.h>
 #include <linux/dlm.h>
 #include <linux/buffer_head.h>
diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c
index 6ea32d9b1b9d..70dfdd532b83 100644
--- a/fs/nilfs2/super.c
+++ b/fs/nilfs2/super.c
@@ -47,7 +47,6 @@
 #include <linux/crc32.h>
 #include <linux/vfs.h>
 #include <linux/writeback.h>
-#include <linux/kobject.h>
 #include <linux/seq_file.h>
 #include <linux/mount.h>
 #include "nilfs.h"
diff --git a/fs/sysfs/inode.c b/fs/sysfs/inode.c
index 30ac27345586..0a12eb89cd32 100644
--- a/fs/sysfs/inode.c
+++ b/fs/sysfs/inode.c
@@ -19,6 +19,7 @@
 #include <linux/errno.h>
 #include <linux/sched.h>
 #include <linux/slab.h>
+#include <linux/sysfs.h>
 #include <linux/xattr.h>
 #include <linux/security.h>
 #include "sysfs.h"
diff --git a/fs/sysfs/sysfs.h b/fs/sysfs/sysfs.h
index ffaaa816bfba..3d28af31d863 100644
--- a/fs/sysfs/sysfs.h
+++ b/fs/sysfs/sysfs.h
@@ -9,6 +9,7 @@
  */
 
 #include <linux/lockdep.h>
+#include <linux/kobject_ns.h>
 #include <linux/fs.h>
 
 struct sysfs_open_dirent;
diff --git a/include/linux/firmware-map.h b/include/linux/firmware-map.h
index c6dcc1dfe781..43fe52fcef0f 100644
--- a/include/linux/firmware-map.h
+++ b/include/linux/firmware-map.h
@@ -17,7 +17,6 @@
 #define _LINUX_FIRMWARE_MAP_H
 
 #include <linux/list.h>
-#include <linux/kobject.h>
 
 /*
  * provide a dummy interface if CONFIG_FIRMWARE_MEMMAP is disabled
diff --git a/include/linux/fs.h b/include/linux/fs.h
index baf3e556ff0e..f84d9928bdb1 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -382,7 +382,6 @@ struct inodes_stat_t {
 #include <linux/path.h>
 #include <linux/stat.h>
 #include <linux/cache.h>
-#include <linux/kobject.h>
 #include <linux/list.h>
 #include <linux/radix-tree.h>
 #include <linux/prio_tree.h>
@@ -402,6 +401,7 @@ struct hd_geometry;
 struct iovec;
 struct nameidata;
 struct kiocb;
+struct kobject;
 struct pipe_inode_info;
 struct poll_table_struct;
 struct kstatfs;
diff --git a/include/linux/sched.h b/include/linux/sched.h
index c118a7f203aa..abc527aa8550 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -87,7 +87,6 @@ struct sched_param {
 #include <linux/timer.h>
 #include <linux/hrtimer.h>
 #include <linux/task_io_accounting.h>
-#include <linux/kobject.h>
 #include <linux/latencytop.h>
 #include <linux/cred.h>
 
diff --git a/include/linux/sunrpc/cache.h b/include/linux/sunrpc/cache.h
index 6950c981882d..78aa104250b7 100644
--- a/include/linux/sunrpc/cache.h
+++ b/include/linux/sunrpc/cache.h
@@ -13,6 +13,7 @@
 #ifndef _LINUX_SUNRPC_CACHE_H_
 #define _LINUX_SUNRPC_CACHE_H_
 
+#include <linux/kref.h>
 #include <linux/slab.h>
 #include <asm/atomic.h>
 #include <linux/proc_fs.h>
diff --git a/security/apparmor/include/match.h b/security/apparmor/include/match.h
index 734a6d35112c..19ba16e8aacd 100644
--- a/security/apparmor/include/match.h
+++ b/security/apparmor/include/match.h
@@ -15,6 +15,7 @@
 #ifndef __AA_MATCH_H
 #define __AA_MATCH_H
 
+#include <linux/kref.h>
 #include <linux/workqueue.h>
 
 #define DFA_NOMATCH			0
-- 
cgit v1.2.3-71-gd317


From 1a7d946993aaf2a79e9c65abbe169a108e351bcb Mon Sep 17 00:00:00 2001
From: Mattia Dongili <malattia@linux.it>
Date: Sat, 8 Jan 2011 18:47:29 +0900
Subject: sony-laptop: support new hotkeys on the P, Z and EC series

Add new mappings for assist, VAIO, zoom and eject buttons present on
refurbished P, Z and EC models.

Reported-by: Gyorgy Jeney <nog.lkml@gmail.com>
Reported-by: Stephan Mueller <smueller@chronox.de>
Cc: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Cc: Matthew Garrett <mjg@redhat.com>
Signed-off-by: Mattia Dongili <malattia@linux.it>
Signed-off-by: Matthew Garrett <mjg@redhat.com>
---
 drivers/platform/x86/sony-laptop.c | 10 ++++++++++
 include/linux/sonypi.h             |  1 +
 2 files changed, 11 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/platform/x86/sony-laptop.c b/drivers/platform/x86/sony-laptop.c
index 68edc97f1926..b4a95bb2f232 100644
--- a/drivers/platform/x86/sony-laptop.c
+++ b/drivers/platform/x86/sony-laptop.c
@@ -235,6 +235,7 @@ static int sony_laptop_input_index[] = {
 	57,	/* 70 SONYPI_EVENT_VOLUME_DEC_PRESSED */
 	-1,	/* 71 SONYPI_EVENT_BRIGHTNESS_PRESSED */
 	58,	/* 72 SONYPI_EVENT_MEDIA_PRESSED */
+	59,	/* 72 SONYPI_EVENT_VENDOR_PRESSED */
 };
 
 static int sony_laptop_input_keycode_map[] = {
@@ -297,6 +298,7 @@ static int sony_laptop_input_keycode_map[] = {
 	KEY_VOLUMEUP,	/* 56 SONYPI_EVENT_VOLUME_INC_PRESSED */
 	KEY_VOLUMEDOWN,	/* 57 SONYPI_EVENT_VOLUME_DEC_PRESSED */
 	KEY_MEDIA,	/* 58 SONYPI_EVENT_MEDIA_PRESSED */
+	KEY_VENDOR,	/* 59 SONYPI_EVENT_VENDOR_PRESSED */
 };
 
 /* release buttons after a short delay if pressed */
@@ -894,10 +896,18 @@ static struct sony_nc_event sony_100_events[] = {
 	{ 0x0A, SONYPI_EVENT_FNKEY_RELEASED },
 	{ 0x8C, SONYPI_EVENT_FNKEY_F12 },
 	{ 0x0C, SONYPI_EVENT_FNKEY_RELEASED },
+	{ 0x9d, SONYPI_EVENT_ZOOM_PRESSED },
+	{ 0x1d, SONYPI_EVENT_ANYBUTTON_RELEASED },
 	{ 0x9f, SONYPI_EVENT_CD_EJECT_PRESSED },
 	{ 0x1f, SONYPI_EVENT_ANYBUTTON_RELEASED },
 	{ 0xa1, SONYPI_EVENT_MEDIA_PRESSED },
 	{ 0x21, SONYPI_EVENT_ANYBUTTON_RELEASED },
+	{ 0xa4, SONYPI_EVENT_CD_EJECT_PRESSED },
+	{ 0x24, SONYPI_EVENT_ANYBUTTON_RELEASED },
+	{ 0xa5, SONYPI_EVENT_VENDOR_PRESSED },
+	{ 0x25, SONYPI_EVENT_ANYBUTTON_RELEASED },
+	{ 0xa6, SONYPI_EVENT_HELP_PRESSED },
+	{ 0x26, SONYPI_EVENT_ANYBUTTON_RELEASED },
 	{ 0, 0 },
 };
 
diff --git a/include/linux/sonypi.h b/include/linux/sonypi.h
index 4f95c1aac2fd..0e6dc3891942 100644
--- a/include/linux/sonypi.h
+++ b/include/linux/sonypi.h
@@ -112,6 +112,7 @@
 #define SONYPI_EVENT_VOLUME_DEC_PRESSED		70
 #define SONYPI_EVENT_BRIGHTNESS_PRESSED		71
 #define SONYPI_EVENT_MEDIA_PRESSED		72
+#define SONYPI_EVENT_VENDOR_PRESSED		73
 
 /* get/set brightness */
 #define SONYPI_IOCGBRT		_IOR('v', 0, __u8)
-- 
cgit v1.2.3-71-gd317


From 8aefcd557d26d0023a36f9ec5afbf55e59f8f26b Mon Sep 17 00:00:00 2001
From: Theodore Ts'o <tytso@mit.edu>
Date: Mon, 10 Jan 2011 12:29:43 -0500
Subject: ext4: dynamically allocate the jbd2_inode in ext4_inode_info as
 necessary

Replace the jbd2_inode structure (which is 48 bytes) with a pointer
and only allocate the jbd2_inode when it is needed --- that is, when
the file system has a journal present and the inode has been opened
for writing.  This allows us to further slim down the ext4_inode_info
structure.

Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
---
 fs/ext4/ext4.h       |  2 +-
 fs/ext4/ext4_jbd2.h  |  2 +-
 fs/ext4/file.c       | 22 ++++++++++++++++++++++
 fs/ext4/inode.c      | 17 ++++++++++++-----
 fs/ext4/super.c      | 16 +++++++---------
 fs/jbd2/journal.c    | 20 +++++++++++++-------
 include/linux/jbd2.h | 20 ++++++++++++++++++--
 7 files changed, 74 insertions(+), 25 deletions(-)

(limited to 'include/linux')

diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 2fb531cfd48b..32b7daa41a42 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -811,7 +811,7 @@ struct ext4_inode_info {
 	 */
 	struct rw_semaphore i_data_sem;
 	struct inode vfs_inode;
-	struct jbd2_inode jinode;
+	struct jbd2_inode *jinode;
 
 	struct ext4_ext_cache i_cached_extent;
 	/*
diff --git a/fs/ext4/ext4_jbd2.h b/fs/ext4/ext4_jbd2.h
index b0bd792c58c5..d8b992e658c1 100644
--- a/fs/ext4/ext4_jbd2.h
+++ b/fs/ext4/ext4_jbd2.h
@@ -253,7 +253,7 @@ static inline int ext4_journal_force_commit(journal_t *journal)
 static inline int ext4_jbd2_file_inode(handle_t *handle, struct inode *inode)
 {
 	if (ext4_handle_valid(handle))
-		return jbd2_journal_file_inode(handle, &EXT4_I(inode)->jinode);
+		return jbd2_journal_file_inode(handle, EXT4_I(inode)->jinode);
 	return 0;
 }
 
diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index 5a5c55ddceef..bb003dc9ffff 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -104,6 +104,7 @@ static int ext4_file_open(struct inode * inode, struct file * filp)
 {
 	struct super_block *sb = inode->i_sb;
 	struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
+	struct ext4_inode_info *ei = EXT4_I(inode);
 	struct vfsmount *mnt = filp->f_path.mnt;
 	struct path path;
 	char buf[64], *cp;
@@ -127,6 +128,27 @@ static int ext4_file_open(struct inode * inode, struct file * filp)
 			ext4_mark_super_dirty(sb);
 		}
 	}
+	/*
+	 * Set up the jbd2_inode if we are opening the inode for
+	 * writing and the journal is present
+	 */
+	if (sbi->s_journal && !ei->jinode && (filp->f_mode & FMODE_WRITE)) {
+		struct jbd2_inode *jinode = jbd2_alloc_inode(GFP_KERNEL);
+
+		spin_lock(&inode->i_lock);
+		if (!ei->jinode) {
+			if (!jinode) {
+				spin_unlock(&inode->i_lock);
+				return -ENOMEM;
+			}
+			ei->jinode = jinode;
+			jbd2_journal_init_jbd_inode(ei->jinode, inode);
+			jinode = NULL;
+		}
+		spin_unlock(&inode->i_lock);
+		if (unlikely(jinode != NULL))
+			jbd2_free_inode(jinode);
+	}
 	return dquot_file_open(inode, filp);
 }
 
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 0801ee6a173e..2693fcda30d8 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -55,10 +55,17 @@ static inline int ext4_begin_ordered_truncate(struct inode *inode,
 					      loff_t new_size)
 {
 	trace_ext4_begin_ordered_truncate(inode, new_size);
-	return jbd2_journal_begin_ordered_truncate(
-					EXT4_SB(inode->i_sb)->s_journal,
-					&EXT4_I(inode)->jinode,
-					new_size);
+	/*
+	 * If jinode is zero, then we never opened the file for
+	 * writing, so there's no need to call
+	 * jbd2_journal_begin_ordered_truncate() since there's no
+	 * outstanding writes we need to flush.
+	 */
+	if (!EXT4_I(inode)->jinode)
+		return 0;
+	return jbd2_journal_begin_ordered_truncate(EXT4_JOURNAL(inode),
+						   EXT4_I(inode)->jinode,
+						   new_size);
 }
 
 static void ext4_invalidatepage(struct page *page, unsigned long offset);
@@ -4054,7 +4061,7 @@ int ext4_block_truncate_page(handle_t *handle,
 	if (ext4_should_journal_data(inode)) {
 		err = ext4_handle_dirty_metadata(handle, inode, bh);
 	} else {
-		if (ext4_should_order_data(inode))
+		if (ext4_should_order_data(inode) && EXT4_I(inode)->jinode)
 			err = ext4_jbd2_file_inode(handle, inode);
 		mark_buffer_dirty(bh);
 	}
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index f5960d673e4e..1cd4326c530b 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -818,12 +818,6 @@ static struct inode *ext4_alloc_inode(struct super_block *sb)
 	memset(&ei->i_cached_extent, 0, sizeof(struct ext4_ext_cache));
 	INIT_LIST_HEAD(&ei->i_prealloc_list);
 	spin_lock_init(&ei->i_prealloc_lock);
-	/*
-	 * Note:  We can be called before EXT4_SB(sb)->s_journal is set,
-	 * therefore it can be null here.  Don't check it, just initialize
-	 * jinode.
-	 */
-	jbd2_journal_init_jbd_inode(&ei->jinode, &ei->vfs_inode);
 	ei->i_reserved_data_blocks = 0;
 	ei->i_reserved_meta_blocks = 0;
 	ei->i_allocated_meta_blocks = 0;
@@ -832,6 +826,7 @@ static struct inode *ext4_alloc_inode(struct super_block *sb)
 #ifdef CONFIG_QUOTA
 	ei->i_reserved_quota = 0;
 #endif
+	ei->jinode = NULL;
 	INIT_LIST_HEAD(&ei->i_completed_io_list);
 	spin_lock_init(&ei->i_completed_io_lock);
 	ei->cur_aio_dio = NULL;
@@ -900,9 +895,12 @@ void ext4_clear_inode(struct inode *inode)
 	end_writeback(inode);
 	dquot_drop(inode);
 	ext4_discard_preallocations(inode);
-	if (EXT4_JOURNAL(inode))
-		jbd2_journal_release_jbd_inode(EXT4_SB(inode->i_sb)->s_journal,
-				       &EXT4_I(inode)->jinode);
+	if (EXT4_I(inode)->jinode) {
+		jbd2_journal_release_jbd_inode(EXT4_JOURNAL(inode),
+					       EXT4_I(inode)->jinode);
+		jbd2_free_inode(EXT4_I(inode)->jinode);
+		EXT4_I(inode)->jinode = NULL;
+	}
 }
 
 static inline void ext4_show_quota_options(struct seq_file *seq,
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index 2447bd86f801..9e4686900f18 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -94,6 +94,7 @@ EXPORT_SYMBOL(jbd2_journal_file_inode);
 EXPORT_SYMBOL(jbd2_journal_init_jbd_inode);
 EXPORT_SYMBOL(jbd2_journal_release_jbd_inode);
 EXPORT_SYMBOL(jbd2_journal_begin_ordered_truncate);
+EXPORT_SYMBOL(jbd2_inode_cache);
 
 static int journal_convert_superblock_v1(journal_t *, journal_superblock_t *);
 static void __journal_abort_soft (journal_t *journal, int errno);
@@ -2286,17 +2287,19 @@ static void __exit jbd2_remove_jbd_stats_proc_entry(void)
 
 #endif
 
-struct kmem_cache *jbd2_handle_cache;
+struct kmem_cache *jbd2_handle_cache, *jbd2_inode_cache;
 
 static int __init journal_init_handle_cache(void)
 {
-	jbd2_handle_cache = kmem_cache_create("jbd2_journal_handle",
-				sizeof(handle_t),
-				0,		/* offset */
-				SLAB_TEMPORARY,	/* flags */
-				NULL);		/* ctor */
+	jbd2_handle_cache = KMEM_CACHE(jbd2_journal_handle, SLAB_TEMPORARY);
 	if (jbd2_handle_cache == NULL) {
-		printk(KERN_EMERG "JBD: failed to create handle cache\n");
+		printk(KERN_EMERG "JBD2: failed to create handle cache\n");
+		return -ENOMEM;
+	}
+	jbd2_inode_cache = KMEM_CACHE(jbd2_inode, 0);
+	if (jbd2_inode_cache == NULL) {
+		printk(KERN_EMERG "JBD2: failed to create inode cache\n");
+		kmem_cache_destroy(jbd2_handle_cache);
 		return -ENOMEM;
 	}
 	return 0;
@@ -2306,6 +2309,9 @@ static void jbd2_journal_destroy_handle_cache(void)
 {
 	if (jbd2_handle_cache)
 		kmem_cache_destroy(jbd2_handle_cache);
+	if (jbd2_inode_cache)
+		kmem_cache_destroy(jbd2_inode_cache);
+
 }
 
 /*
diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h
index 2ae86aa21fce..27e79c27ba08 100644
--- a/include/linux/jbd2.h
+++ b/include/linux/jbd2.h
@@ -94,7 +94,7 @@ extern void jbd2_free(void *ptr, size_t size);
  *
  * This is an opaque datatype.
  **/
-typedef struct handle_s		handle_t;	/* Atomic operation type */
+typedef struct jbd2_journal_handle handle_t;	/* Atomic operation type */
 
 
 /**
@@ -416,7 +416,7 @@ struct jbd2_revoke_table_s;
  * in so it can be fixed later.
  */
 
-struct handle_s
+struct jbd2_journal_handle
 {
 	/* Which compound transaction is this update a part of? */
 	transaction_t		*h_transaction;
@@ -1158,6 +1158,22 @@ static inline void jbd2_free_handle(handle_t *handle)
 	kmem_cache_free(jbd2_handle_cache, handle);
 }
 
+/*
+ * jbd2_inode management (optional, for those file systems that want to use
+ * dynamically allocated jbd2_inode structures)
+ */
+extern struct kmem_cache *jbd2_inode_cache;
+
+static inline struct jbd2_inode *jbd2_alloc_inode(gfp_t gfp_flags)
+{
+	return kmem_cache_alloc(jbd2_inode_cache, gfp_flags);
+}
+
+static inline void jbd2_free_inode(struct jbd2_inode *jinode)
+{
+	kmem_cache_free(jbd2_inode_cache, jinode);
+}
+
 /* Primary revoke support */
 #define JOURNAL_REVOKE_DEFAULT_HASH 256
 extern int	   jbd2_journal_init_revoke(journal_t *, int);
-- 
cgit v1.2.3-71-gd317


From b853b96b1dbdc05fc8eae141a595366d8172962b Mon Sep 17 00:00:00 2001
From: Lukas Czerner <lczerner@redhat.com>
Date: Mon, 22 Nov 2010 12:29:17 +0100
Subject: ext3: Add batched discard support for ext3

Walk through allocation groups and trim all free extents. It can be
invoked through FITRIM ioctl on the file system. The main idea is to
provide a way to trim the whole file system if needed, since some SSD's
may suffer from performance loss after the whole device was filled (it
does not mean that fs is full!).

It search for free extents in allocation groups specified by Byte range
start -> start+len. When the free extent is within this range, blocks are
marked as used and then trimmed. Afterwards these blocks are marked as
free in per-group bitmap.

[JK: Fixed up error handling and trimming of a single group]

Signed-off-by: Lukas Czerner <lczerner@redhat.com>
Reviewed-by: Jan Kara <jack@suse.cz>
Reviewed-by: Dmitry Monakhov <dmonakhov@openvz.org>
Signed-off-by: Jan Kara <jack@suse.cz>
---
 fs/ext3/balloc.c        | 266 ++++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/ext3_fs.h |   1 +
 2 files changed, 267 insertions(+)

(limited to 'include/linux')

diff --git a/fs/ext3/balloc.c b/fs/ext3/balloc.c
index b3db22649426..045995c8ce5a 100644
--- a/fs/ext3/balloc.c
+++ b/fs/ext3/balloc.c
@@ -20,6 +20,7 @@
 #include <linux/ext3_jbd.h>
 #include <linux/quotaops.h>
 #include <linux/buffer_head.h>
+#include <linux/blkdev.h>
 
 /*
  * balloc.c contains the blocks allocation and deallocation routines
@@ -39,6 +40,21 @@
 
 #define in_range(b, first, len)	((b) >= (first) && (b) <= (first) + (len) - 1)
 
+/*
+ * Calculate the block group number and offset, given a block number
+ */
+static void ext3_get_group_no_and_offset(struct super_block *sb,
+	ext3_fsblk_t blocknr, unsigned long *blockgrpp, ext3_grpblk_t *offsetp)
+{
+	struct ext3_super_block *es = EXT3_SB(sb)->s_es;
+
+	blocknr = blocknr - le32_to_cpu(es->s_first_data_block);
+	if (offsetp)
+		*offsetp = blocknr % EXT3_BLOCKS_PER_GROUP(sb);
+	if (blockgrpp)
+		*blockgrpp = blocknr / EXT3_BLOCKS_PER_GROUP(sb);
+}
+
 /**
  * ext3_get_group_desc() -- load group descriptor from disk
  * @sb:			super block
@@ -1885,3 +1901,253 @@ unsigned long ext3_bg_num_gdb(struct super_block *sb, int group)
 	return ext3_bg_num_gdb_meta(sb,group);
 
 }
+
+/**
+ * ext3_trim_all_free -- function to trim all free space in alloc. group
+ * @sb:			super block for file system
+ * @group:		allocation group to trim
+ * @start:		first group block to examine
+ * @max:		last group block to examine
+ * @gdp:		allocation group description structure
+ * @minblocks:		minimum extent block count
+ *
+ * ext3_trim_all_free walks through group's block bitmap searching for free
+ * blocks. When the free block is found, it tries to allocate this block and
+ * consequent free block to get the biggest free extent possible, until it
+ * reaches any used block. Then issue a TRIM command on this extent and free
+ * the extent in the block bitmap. This is done until whole group is scanned.
+ */
+ext3_grpblk_t ext3_trim_all_free(struct super_block *sb, unsigned int group,
+				ext3_grpblk_t start, ext3_grpblk_t max,
+				ext3_grpblk_t minblocks)
+{
+	handle_t *handle;
+	ext3_grpblk_t next, free_blocks, bit, freed, count = 0;
+	ext3_fsblk_t discard_block;
+	struct ext3_sb_info *sbi;
+	struct buffer_head *gdp_bh, *bitmap_bh = NULL;
+	struct ext3_group_desc *gdp;
+	int err = 0, ret = 0;
+
+	/*
+	 * We will update one block bitmap, and one group descriptor
+	 */
+	handle = ext3_journal_start_sb(sb, 2);
+	if (IS_ERR(handle))
+		return PTR_ERR(handle);
+
+	bitmap_bh = read_block_bitmap(sb, group);
+	if (!bitmap_bh) {
+		err = -EIO;
+		goto err_out;
+	}
+
+	BUFFER_TRACE(bitmap_bh, "getting undo access");
+	err = ext3_journal_get_undo_access(handle, bitmap_bh);
+	if (err)
+		goto err_out;
+
+	gdp = ext3_get_group_desc(sb, group, &gdp_bh);
+	if (!gdp) {
+		err = -EIO;
+		goto err_out;
+	}
+
+	BUFFER_TRACE(gdp_bh, "get_write_access");
+	err = ext3_journal_get_write_access(handle, gdp_bh);
+	if (err)
+		goto err_out;
+
+	free_blocks = le16_to_cpu(gdp->bg_free_blocks_count);
+	sbi = EXT3_SB(sb);
+
+	 /* Walk through the whole group */
+	while (start < max) {
+		start = bitmap_search_next_usable_block(start, bitmap_bh, max);
+		if (start < 0)
+			break;
+		next = start;
+
+		/*
+		 * Allocate contiguous free extents by setting bits in the
+		 * block bitmap
+		 */
+		while (next < max
+			&& claim_block(sb_bgl_lock(sbi, group),
+					next, bitmap_bh)) {
+			next++;
+		}
+
+		 /* We did not claim any blocks */
+		if (next == start)
+			continue;
+
+		discard_block = (ext3_fsblk_t)start +
+				ext3_group_first_block_no(sb, group);
+
+		/* Update counters */
+		spin_lock(sb_bgl_lock(sbi, group));
+		le16_add_cpu(&gdp->bg_free_blocks_count, start - next);
+		spin_unlock(sb_bgl_lock(sbi, group));
+		percpu_counter_sub(&sbi->s_freeblocks_counter, next - start);
+
+		/* Do not issue a TRIM on extents smaller than minblocks */
+		if ((next - start) < minblocks)
+			goto free_extent;
+
+		 /* Send the TRIM command down to the device */
+		err = sb_issue_discard(sb, discard_block, next - start,
+				       GFP_NOFS, 0);
+		count += (next - start);
+free_extent:
+		freed = 0;
+
+		/*
+		 * Clear bits in the bitmap
+		 */
+		for (bit = start; bit < next; bit++) {
+			BUFFER_TRACE(bitmap_bh, "clear bit");
+			if (!ext3_clear_bit_atomic(sb_bgl_lock(sbi, group),
+						bit, bitmap_bh->b_data)) {
+				ext3_error(sb, __func__,
+					"bit already cleared for block "E3FSBLK,
+					 (unsigned long)bit);
+				BUFFER_TRACE(bitmap_bh, "bit already cleared");
+			} else {
+				freed++;
+			}
+		}
+
+		/* Update couters */
+		spin_lock(sb_bgl_lock(sbi, group));
+		le16_add_cpu(&gdp->bg_free_blocks_count, freed);
+		spin_unlock(sb_bgl_lock(sbi, group));
+		percpu_counter_add(&sbi->s_freeblocks_counter, freed);
+
+		start = next;
+		if (err < 0) {
+			if (err != -EOPNOTSUPP)
+				ext3_warning(sb, __func__, "Discard command "
+					     "returned error %d\n", err);
+			break;
+		}
+
+		if (fatal_signal_pending(current)) {
+			err = -ERESTARTSYS;
+			break;
+		}
+
+		cond_resched();
+
+		/* No more suitable extents */
+		if ((free_blocks - count) < minblocks)
+			break;
+	}
+
+	/* We dirtied the bitmap block */
+	BUFFER_TRACE(bitmap_bh, "dirtied bitmap block");
+	ret = ext3_journal_dirty_metadata(handle, bitmap_bh);
+	if (!err)
+		err = ret;
+
+	/* And the group descriptor block */
+	BUFFER_TRACE(gdp_bh, "dirtied group descriptor block");
+	ret = ext3_journal_dirty_metadata(handle, gdp_bh);
+	if (!err)
+		err = ret;
+
+	ext3_debug("trimmed %d blocks in the group %d\n",
+		count, group);
+
+err_out:
+	if (err)
+		count = err;
+	ext3_journal_stop(handle);
+	brelse(bitmap_bh);
+
+	return count;
+}
+
+/**
+ * ext3_trim_fs() -- trim ioctl handle function
+ * @sb:			superblock for filesystem
+ * @start:		First Byte to trim
+ * @len:		number of Bytes to trim from start
+ * @minlen:		minimum extent length in Bytes
+ *
+ * ext3_trim_fs goes through all allocation groups containing Bytes from
+ * start to start+len. For each such a group ext3_trim_all_free function
+ * is invoked to trim all free space.
+ */
+int ext3_trim_fs(struct super_block *sb, struct fstrim_range *range)
+{
+	ext3_grpblk_t last_block, first_block, free_blocks;
+	unsigned long first_group, last_group;
+	unsigned long group, ngroups;
+	struct ext3_group_desc *gdp;
+	struct ext3_super_block *es = EXT3_SB(sb)->s_es;
+	uint64_t start, len, minlen, trimmed;
+	ext3_fsblk_t max_blks = le32_to_cpu(es->s_blocks_count);
+	int ret = 0;
+
+	start = range->start >> sb->s_blocksize_bits;
+	len = range->len >> sb->s_blocksize_bits;
+	minlen = range->minlen >> sb->s_blocksize_bits;
+	trimmed = 0;
+
+	if (unlikely(minlen > EXT3_BLOCKS_PER_GROUP(sb)))
+		return -EINVAL;
+	if (start >= max_blks)
+		goto out;
+	if (start < le32_to_cpu(es->s_first_data_block)) {
+		len -= le32_to_cpu(es->s_first_data_block) - start;
+		start = le32_to_cpu(es->s_first_data_block);
+	}
+	if (start + len > max_blks)
+		len = max_blks - start;
+
+	ngroups = EXT3_SB(sb)->s_groups_count;
+	smp_rmb();
+
+	/* Determine first and last group to examine based on start and len */
+	ext3_get_group_no_and_offset(sb, (ext3_fsblk_t) start,
+				     &first_group, &first_block);
+	ext3_get_group_no_and_offset(sb, (ext3_fsblk_t) (start + len),
+				     &last_group, &last_block);
+	last_group = (last_group > ngroups - 1) ? ngroups - 1 : last_group;
+	last_block = EXT3_BLOCKS_PER_GROUP(sb);
+
+	if (first_group > last_group)
+		return -EINVAL;
+
+	for (group = first_group; group <= last_group; group++) {
+		gdp = ext3_get_group_desc(sb, group, NULL);
+		if (!gdp)
+			break;
+
+		free_blocks = le16_to_cpu(gdp->bg_free_blocks_count);
+		if (free_blocks < minlen)
+			continue;
+
+		if (len >= EXT3_BLOCKS_PER_GROUP(sb))
+			len -= (EXT3_BLOCKS_PER_GROUP(sb) - first_block);
+		else
+			last_block = first_block + len;
+
+		ret = ext3_trim_all_free(sb, group, first_block,
+					last_block, minlen);
+		if (ret < 0)
+			break;
+
+		trimmed += ret;
+		first_block = 0;
+	}
+
+	if (ret >= 0)
+		ret = 0;
+
+out:
+	range->len = trimmed * sb->s_blocksize;
+
+	return ret;
+}
diff --git a/include/linux/ext3_fs.h b/include/linux/ext3_fs.h
index 6ce1bca01724..a443965946bb 100644
--- a/include/linux/ext3_fs.h
+++ b/include/linux/ext3_fs.h
@@ -856,6 +856,7 @@ extern struct ext3_group_desc * ext3_get_group_desc(struct super_block * sb,
 extern int ext3_should_retry_alloc(struct super_block *sb, int *retries);
 extern void ext3_init_block_alloc_info(struct inode *);
 extern void ext3_rsv_window_add(struct super_block *sb, struct ext3_reserve_window_node *rsv);
+extern int ext3_trim_fs(struct super_block *sb, struct fstrim_range *range);
 
 /* dir.c */
 extern int ext3_check_dir_entry(const char *, struct inode *,
-- 
cgit v1.2.3-71-gd317


From 055adcbd7da75868697e767adc4f3272f6cae76c Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Tue, 23 Nov 2010 18:49:54 -0800
Subject: quota: Use %pV and __attribute__((format (printf in __quota_error and
 fix fallout

Use %pV in __quota_error so a single printk can not be
interleaved with other logging messages.
Add __attribute__((format (printf, 3, 4))) so format
and arguments can be verified by compiler.
Make sure printk formats and arguments match.

Block # needed a pointer dereference.

Signed-off-by: Joe Perches <joe@perches.com>
Signed-off-by: Jan Kara <jack@suse.cz>
---
 fs/quota/dquot.c         | 18 +++++++++++-------
 fs/quota/quota_tree.c    |  9 +++++----
 include/linux/quotaops.h |  5 +++--
 3 files changed, 19 insertions(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
index 0fed41e6efcd..84becd3e4772 100644
--- a/fs/quota/dquot.c
+++ b/fs/quota/dquot.c
@@ -133,16 +133,20 @@ __cacheline_aligned_in_smp DEFINE_SPINLOCK(dq_data_lock);
 EXPORT_SYMBOL(dq_data_lock);
 
 void __quota_error(struct super_block *sb, const char *func,
-		  const char *fmt, ...)
+		   const char *fmt, ...)
 {
-	va_list args;
-
 	if (printk_ratelimit()) {
+		va_list args;
+		struct va_format vaf;
+
 		va_start(args, fmt);
-		printk(KERN_ERR "Quota error (device %s): %s: ",
-		       sb->s_id, func);
-		vprintk(fmt, args);
-		printk("\n");
+
+		vaf.fmt = fmt;
+		vaf.va = &args;
+
+		printk(KERN_ERR "Quota error (device %s): %s: %pV\n",
+		       sb->s_id, func, &vaf);
+
 		va_end(args);
 	}
 }
diff --git a/fs/quota/quota_tree.c b/fs/quota/quota_tree.c
index 9e48874eabcc..e41c1becf096 100644
--- a/fs/quota/quota_tree.c
+++ b/fs/quota/quota_tree.c
@@ -468,8 +468,8 @@ static int remove_tree(struct qtree_mem_dqinfo *info, struct dquot *dquot,
 		return -ENOMEM;
 	ret = read_blk(info, *blk, buf);
 	if (ret < 0) {
-		quota_error(dquot->dq_sb, "Can't read quota data "
-			    "block %u", blk);
+		quota_error(dquot->dq_sb, "Can't read quota data block %u",
+			    *blk);
 		goto out_buf;
 	}
 	newblk = le32_to_cpu(ref[get_index(info, dquot->dq_id, depth)]);
@@ -493,8 +493,9 @@ static int remove_tree(struct qtree_mem_dqinfo *info, struct dquot *dquot,
 		} else {
 			ret = write_blk(info, *blk, buf);
 			if (ret < 0)
-				quota_error(dquot->dq_sb, "Can't write quota "
-					    "tree block %u", blk);
+				quota_error(dquot->dq_sb,
+					    "Can't write quota tree block %u",
+					    *blk);
 		}
 	}
 out_buf:
diff --git a/include/linux/quotaops.h b/include/linux/quotaops.h
index d1a9193960f1..223b14cd129c 100644
--- a/include/linux/quotaops.h
+++ b/include/linux/quotaops.h
@@ -31,8 +31,9 @@ static inline bool is_quota_modification(struct inode *inode, struct iattr *ia)
 #define quota_error(sb, fmt, args...) \
 	__quota_error((sb), __func__, fmt , ## args)
 
-extern void __quota_error(struct super_block *sb, const char *func,
-			 const char *fmt, ...);
+extern __attribute__((format (printf, 3, 4)))
+void __quota_error(struct super_block *sb, const char *func,
+		   const char *fmt, ...);
 
 /*
  * declaration of quota_function calls in kernel.
-- 
cgit v1.2.3-71-gd317


From a4ae3094869f18e26ece25ad175bbe4cd740e60b Mon Sep 17 00:00:00 2001
From: Eric Sandeen <sandeen@redhat.com>
Date: Tue, 7 Dec 2010 11:55:27 -0600
Subject: ext3: speed up file creates by optimizing rec_len functions

The addition of 64k block capability in the rec_len_from_disk
and rec_len_to_disk functions added a bit of math overhead which
slows down file create workloads needlessly when the architecture
cannot even support 64k blocks, thanks to page size limits.

Similar changes already exist in the ext4 codebase.

The directory entry checking can also be optimized a bit
by sprinkling in some unlikely() conditions to move the
error handling out of line.

bonnie++ sequential file creates on a 512MB ramdisk speeds up
from about 77,000/s to about 82,000/s, about a 6% improvement.

Signed-off-by: Eric Sandeen <sandeen@redhat.com>
Signed-off-by: Jan Kara <jack@suse.cz>
---
 fs/ext3/dir.c           | 15 ++++++++-------
 include/linux/ext3_fs.h |  9 +++++++++
 2 files changed, 17 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/fs/ext3/dir.c b/fs/ext3/dir.c
index e2e72c367cf6..34f0a072b935 100644
--- a/fs/ext3/dir.c
+++ b/fs/ext3/dir.c
@@ -69,25 +69,26 @@ int ext3_check_dir_entry (const char * function, struct inode * dir,
 	const char * error_msg = NULL;
 	const int rlen = ext3_rec_len_from_disk(de->rec_len);
 
-	if (rlen < EXT3_DIR_REC_LEN(1))
+	if (unlikely(rlen < EXT3_DIR_REC_LEN(1)))
 		error_msg = "rec_len is smaller than minimal";
-	else if (rlen % 4 != 0)
+	else if (unlikely(rlen % 4 != 0))
 		error_msg = "rec_len % 4 != 0";
-	else if (rlen < EXT3_DIR_REC_LEN(de->name_len))
+	else if (unlikely(rlen < EXT3_DIR_REC_LEN(de->name_len)))
 		error_msg = "rec_len is too small for name_len";
-	else if (((char *) de - bh->b_data) + rlen > dir->i_sb->s_blocksize)
+	else if (unlikely((((char *) de - bh->b_data) + rlen > dir->i_sb->s_blocksize)))
 		error_msg = "directory entry across blocks";
-	else if (le32_to_cpu(de->inode) >
-			le32_to_cpu(EXT3_SB(dir->i_sb)->s_es->s_inodes_count))
+	else if (unlikely(le32_to_cpu(de->inode) >
+			le32_to_cpu(EXT3_SB(dir->i_sb)->s_es->s_inodes_count)))
 		error_msg = "inode out of bounds";
 
-	if (error_msg != NULL)
+	if (unlikely(error_msg != NULL))
 		ext3_error (dir->i_sb, function,
 			"bad entry in directory #%lu: %s - "
 			"offset=%lu, inode=%lu, rec_len=%d, name_len=%d",
 			dir->i_ino, error_msg, offset,
 			(unsigned long) le32_to_cpu(de->inode),
 			rlen, de->name_len);
+
 	return error_msg == NULL ? 1 : 0;
 }
 
diff --git a/include/linux/ext3_fs.h b/include/linux/ext3_fs.h
index a443965946bb..65990ef612f5 100644
--- a/include/linux/ext3_fs.h
+++ b/include/linux/ext3_fs.h
@@ -724,21 +724,30 @@ struct ext3_dir_entry_2 {
 					 ~EXT3_DIR_ROUND)
 #define EXT3_MAX_REC_LEN		((1<<16)-1)
 
+/*
+ * Tests against MAX_REC_LEN etc were put in place for 64k block
+ * sizes; if that is not possible on this arch, we can skip
+ * those tests and speed things up.
+ */
 static inline unsigned ext3_rec_len_from_disk(__le16 dlen)
 {
 	unsigned len = le16_to_cpu(dlen);
 
+#if (PAGE_CACHE_SIZE >= 65536)
 	if (len == EXT3_MAX_REC_LEN)
 		return 1 << 16;
+#endif
 	return len;
 }
 
 static inline __le16 ext3_rec_len_to_disk(unsigned len)
 {
+#if (PAGE_CACHE_SIZE >= 65536)
 	if (len == (1 << 16))
 		return cpu_to_le16(EXT3_MAX_REC_LEN);
 	else if (len > (1 << 16))
 		BUG();
+#endif
 	return cpu_to_le16(len);
 }
 
-- 
cgit v1.2.3-71-gd317


From d96336b05d718b03ff03c94c0dc0cc283a29d534 Mon Sep 17 00:00:00 2001
From: Josh Hunt <johunt@akamai.com>
Date: Mon, 27 Dec 2010 13:46:38 -0800
Subject: ext2: Resolve 'dereferencing pointer to incomplete type' when
 enabling EXT2_XATTR_DEBUG

When I enable EXT2_XATTR_DEBUG in fs/ext2/xattr.c I get a build error stating
the following:

  CC      fs/ext2/xattr.o
fs/ext2/xattr.c: In function 'ext2_xattr_cache_insert':
fs/ext2/xattr.c:841: error: dereferencing pointer to incomplete type
fs/ext2/xattr.c:846: error: dereferencing pointer to incomplete type
make[2]: *** [fs/ext2/xattr.o] Error 1
make[1]: *** [fs/ext2] Error 2
make: *** [fs] Error 2

These lines reference ext2_xattr_cache->c_entry_count which is defined
in struct mb_cache. struct mb_cache is currently only defined in fs/mbcache.c.
Moving struct mb_cache definition to include/linux/mbcache.h to resolve the
issue.

Signed-off-by: Josh Hunt <johunt@akamai.com>
Signed-off-by: Jan Kara <jack@suse.cz>
---
 fs/mbcache.c            | 12 ------------
 include/linux/mbcache.h | 11 +++++++++++
 2 files changed, 11 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/fs/mbcache.c b/fs/mbcache.c
index 93444747237b..a25444ab2baf 100644
--- a/fs/mbcache.c
+++ b/fs/mbcache.c
@@ -76,18 +76,6 @@ EXPORT_SYMBOL(mb_cache_entry_find_first);
 EXPORT_SYMBOL(mb_cache_entry_find_next);
 #endif
 
-struct mb_cache {
-	struct list_head		c_cache_list;
-	const char			*c_name;
-	atomic_t			c_entry_count;
-	int				c_max_entries;
-	int				c_bucket_bits;
-	struct kmem_cache		*c_entry_cache;
-	struct list_head		*c_block_hash;
-	struct list_head		*c_index_hash;
-};
-
-
 /*
  * Global data: list of all mbcache's, lru list, and a spinlock for
  * accessing cache data structures on SMP machines. The lru list is
diff --git a/include/linux/mbcache.h b/include/linux/mbcache.h
index 54cbbac1e71d..5525d370701d 100644
--- a/include/linux/mbcache.h
+++ b/include/linux/mbcache.h
@@ -18,6 +18,17 @@ struct mb_cache_entry {
 	} e_index;
 };
 
+struct mb_cache {
+	struct list_head		c_cache_list;
+	const char			*c_name;
+	atomic_t			c_entry_count;
+	int				c_max_entries;
+	int				c_bucket_bits;
+	struct kmem_cache		*c_entry_cache;
+	struct list_head		*c_block_hash;
+	struct list_head		*c_index_hash;
+};
+
 /* Functions on caches */
 
 struct mb_cache *mb_cache_create(const char *, int);
-- 
cgit v1.2.3-71-gd317


From 83723d60717f8da0f53f91cf42a845ed56c09662 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Mon, 10 Jan 2011 20:11:38 +0100
Subject: netfilter: x_tables: dont block BH while reading counters

Using "iptables -L" with a lot of rules have a too big BH latency.
Jesper mentioned ~6 ms and worried of frame drops.

Switch to a per_cpu seqlock scheme, so that taking a snapshot of
counters doesnt need to block BH (for this cpu, but also other cpus).

This adds two increments on seqlock sequence per ipt_do_table() call,
its a reasonable cost for allowing "iptables -L" not block BH
processing.

Reported-by: Jesper Dangaard Brouer <hawk@comx.dk>
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
CC: Patrick McHardy <kaber@trash.net>
Acked-by: Stephen Hemminger <shemminger@vyatta.com>
Acked-by: Jesper Dangaard Brouer <hawk@comx.dk>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/linux/netfilter/x_tables.h | 10 ++++-----
 net/ipv4/netfilter/arp_tables.c    | 45 ++++++++++++--------------------------
 net/ipv4/netfilter/ip_tables.c     | 45 ++++++++++++--------------------------
 net/ipv6/netfilter/ip6_tables.c    | 45 ++++++++++++--------------------------
 net/netfilter/x_tables.c           |  3 ++-
 5 files changed, 49 insertions(+), 99 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h
index 742bec051440..6712e713b299 100644
--- a/include/linux/netfilter/x_tables.h
+++ b/include/linux/netfilter/x_tables.h
@@ -472,7 +472,7 @@ extern void xt_free_table_info(struct xt_table_info *info);
  *  necessary for reading the counters.
  */
 struct xt_info_lock {
-	spinlock_t lock;
+	seqlock_t lock;
 	unsigned char readers;
 };
 DECLARE_PER_CPU(struct xt_info_lock, xt_info_locks);
@@ -497,7 +497,7 @@ static inline void xt_info_rdlock_bh(void)
 	local_bh_disable();
 	lock = &__get_cpu_var(xt_info_locks);
 	if (likely(!lock->readers++))
-		spin_lock(&lock->lock);
+		write_seqlock(&lock->lock);
 }
 
 static inline void xt_info_rdunlock_bh(void)
@@ -505,7 +505,7 @@ static inline void xt_info_rdunlock_bh(void)
 	struct xt_info_lock *lock = &__get_cpu_var(xt_info_locks);
 
 	if (likely(!--lock->readers))
-		spin_unlock(&lock->lock);
+		write_sequnlock(&lock->lock);
 	local_bh_enable();
 }
 
@@ -516,12 +516,12 @@ static inline void xt_info_rdunlock_bh(void)
  */
 static inline void xt_info_wrlock(unsigned int cpu)
 {
-	spin_lock(&per_cpu(xt_info_locks, cpu).lock);
+	write_seqlock(&per_cpu(xt_info_locks, cpu).lock);
 }
 
 static inline void xt_info_wrunlock(unsigned int cpu)
 {
-	spin_unlock(&per_cpu(xt_info_locks, cpu).lock);
+	write_sequnlock(&per_cpu(xt_info_locks, cpu).lock);
 }
 
 /*
diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index 3fac340a28d5..e855fffaed95 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -710,42 +710,25 @@ static void get_counters(const struct xt_table_info *t,
 	struct arpt_entry *iter;
 	unsigned int cpu;
 	unsigned int i;
-	unsigned int curcpu = get_cpu();
-
-	/* Instead of clearing (by a previous call to memset())
-	 * the counters and using adds, we set the counters
-	 * with data used by 'current' CPU
-	 *
-	 * Bottom half has to be disabled to prevent deadlock
-	 * if new softirq were to run and call ipt_do_table
-	 */
-	local_bh_disable();
-	i = 0;
-	xt_entry_foreach(iter, t->entries[curcpu], t->size) {
-		SET_COUNTER(counters[i], iter->counters.bcnt,
-			    iter->counters.pcnt);
-		++i;
-	}
-	local_bh_enable();
-	/* Processing counters from other cpus, we can let bottom half enabled,
-	 * (preemption is disabled)
-	 */
 
 	for_each_possible_cpu(cpu) {
-		if (cpu == curcpu)
-			continue;
+		seqlock_t *lock = &per_cpu(xt_info_locks, cpu).lock;
+
 		i = 0;
-		local_bh_disable();
-		xt_info_wrlock(cpu);
 		xt_entry_foreach(iter, t->entries[cpu], t->size) {
-			ADD_COUNTER(counters[i], iter->counters.bcnt,
-				    iter->counters.pcnt);
+			u64 bcnt, pcnt;
+			unsigned int start;
+
+			do {
+				start = read_seqbegin(lock);
+				bcnt = iter->counters.bcnt;
+				pcnt = iter->counters.pcnt;
+			} while (read_seqretry(lock, start));
+
+			ADD_COUNTER(counters[i], bcnt, pcnt);
 			++i;
 		}
-		xt_info_wrunlock(cpu);
-		local_bh_enable();
 	}
-	put_cpu();
 }
 
 static struct xt_counters *alloc_counters(const struct xt_table *table)
@@ -759,7 +742,7 @@ static struct xt_counters *alloc_counters(const struct xt_table *table)
 	 * about).
 	 */
 	countersize = sizeof(struct xt_counters) * private->number;
-	counters = vmalloc(countersize);
+	counters = vzalloc(countersize);
 
 	if (counters == NULL)
 		return ERR_PTR(-ENOMEM);
@@ -1007,7 +990,7 @@ static int __do_replace(struct net *net, const char *name,
 	struct arpt_entry *iter;
 
 	ret = 0;
-	counters = vmalloc(num_counters * sizeof(struct xt_counters));
+	counters = vzalloc(num_counters * sizeof(struct xt_counters));
 	if (!counters) {
 		ret = -ENOMEM;
 		goto out;
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index a846d633b3b6..652efea013dc 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -884,42 +884,25 @@ get_counters(const struct xt_table_info *t,
 	struct ipt_entry *iter;
 	unsigned int cpu;
 	unsigned int i;
-	unsigned int curcpu = get_cpu();
-
-	/* Instead of clearing (by a previous call to memset())
-	 * the counters and using adds, we set the counters
-	 * with data used by 'current' CPU.
-	 *
-	 * Bottom half has to be disabled to prevent deadlock
-	 * if new softirq were to run and call ipt_do_table
-	 */
-	local_bh_disable();
-	i = 0;
-	xt_entry_foreach(iter, t->entries[curcpu], t->size) {
-		SET_COUNTER(counters[i], iter->counters.bcnt,
-			    iter->counters.pcnt);
-		++i;
-	}
-	local_bh_enable();
-	/* Processing counters from other cpus, we can let bottom half enabled,
-	 * (preemption is disabled)
-	 */
 
 	for_each_possible_cpu(cpu) {
-		if (cpu == curcpu)
-			continue;
+		seqlock_t *lock = &per_cpu(xt_info_locks, cpu).lock;
+
 		i = 0;
-		local_bh_disable();
-		xt_info_wrlock(cpu);
 		xt_entry_foreach(iter, t->entries[cpu], t->size) {
-			ADD_COUNTER(counters[i], iter->counters.bcnt,
-				    iter->counters.pcnt);
+			u64 bcnt, pcnt;
+			unsigned int start;
+
+			do {
+				start = read_seqbegin(lock);
+				bcnt = iter->counters.bcnt;
+				pcnt = iter->counters.pcnt;
+			} while (read_seqretry(lock, start));
+
+			ADD_COUNTER(counters[i], bcnt, pcnt);
 			++i; /* macro does multi eval of i */
 		}
-		xt_info_wrunlock(cpu);
-		local_bh_enable();
 	}
-	put_cpu();
 }
 
 static struct xt_counters *alloc_counters(const struct xt_table *table)
@@ -932,7 +915,7 @@ static struct xt_counters *alloc_counters(const struct xt_table *table)
 	   (other than comefrom, which userspace doesn't care
 	   about). */
 	countersize = sizeof(struct xt_counters) * private->number;
-	counters = vmalloc(countersize);
+	counters = vzalloc(countersize);
 
 	if (counters == NULL)
 		return ERR_PTR(-ENOMEM);
@@ -1203,7 +1186,7 @@ __do_replace(struct net *net, const char *name, unsigned int valid_hooks,
 	struct ipt_entry *iter;
 
 	ret = 0;
-	counters = vmalloc(num_counters * sizeof(struct xt_counters));
+	counters = vzalloc(num_counters * sizeof(struct xt_counters));
 	if (!counters) {
 		ret = -ENOMEM;
 		goto out;
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index 455582384ece..7d227c644f72 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -897,42 +897,25 @@ get_counters(const struct xt_table_info *t,
 	struct ip6t_entry *iter;
 	unsigned int cpu;
 	unsigned int i;
-	unsigned int curcpu = get_cpu();
-
-	/* Instead of clearing (by a previous call to memset())
-	 * the counters and using adds, we set the counters
-	 * with data used by 'current' CPU
-	 *
-	 * Bottom half has to be disabled to prevent deadlock
-	 * if new softirq were to run and call ipt_do_table
-	 */
-	local_bh_disable();
-	i = 0;
-	xt_entry_foreach(iter, t->entries[curcpu], t->size) {
-		SET_COUNTER(counters[i], iter->counters.bcnt,
-			    iter->counters.pcnt);
-		++i;
-	}
-	local_bh_enable();
-	/* Processing counters from other cpus, we can let bottom half enabled,
-	 * (preemption is disabled)
-	 */
 
 	for_each_possible_cpu(cpu) {
-		if (cpu == curcpu)
-			continue;
+		seqlock_t *lock = &per_cpu(xt_info_locks, cpu).lock;
+
 		i = 0;
-		local_bh_disable();
-		xt_info_wrlock(cpu);
 		xt_entry_foreach(iter, t->entries[cpu], t->size) {
-			ADD_COUNTER(counters[i], iter->counters.bcnt,
-				    iter->counters.pcnt);
+			u64 bcnt, pcnt;
+			unsigned int start;
+
+			do {
+				start = read_seqbegin(lock);
+				bcnt = iter->counters.bcnt;
+				pcnt = iter->counters.pcnt;
+			} while (read_seqretry(lock, start));
+
+			ADD_COUNTER(counters[i], bcnt, pcnt);
 			++i;
 		}
-		xt_info_wrunlock(cpu);
-		local_bh_enable();
 	}
-	put_cpu();
 }
 
 static struct xt_counters *alloc_counters(const struct xt_table *table)
@@ -945,7 +928,7 @@ static struct xt_counters *alloc_counters(const struct xt_table *table)
 	   (other than comefrom, which userspace doesn't care
 	   about). */
 	countersize = sizeof(struct xt_counters) * private->number;
-	counters = vmalloc(countersize);
+	counters = vzalloc(countersize);
 
 	if (counters == NULL)
 		return ERR_PTR(-ENOMEM);
@@ -1216,7 +1199,7 @@ __do_replace(struct net *net, const char *name, unsigned int valid_hooks,
 	struct ip6t_entry *iter;
 
 	ret = 0;
-	counters = vmalloc(num_counters * sizeof(struct xt_counters));
+	counters = vzalloc(num_counters * sizeof(struct xt_counters));
 	if (!counters) {
 		ret = -ENOMEM;
 		goto out;
diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c
index 80463507420e..c94237631077 100644
--- a/net/netfilter/x_tables.c
+++ b/net/netfilter/x_tables.c
@@ -1325,7 +1325,8 @@ static int __init xt_init(void)
 
 	for_each_possible_cpu(i) {
 		struct xt_info_lock *lock = &per_cpu(xt_info_locks, i);
-		spin_lock_init(&lock->lock);
+
+		seqlock_init(&lock->lock);
 		lock->readers = 0;
 	}
 
-- 
cgit v1.2.3-71-gd317


From 6650239a4b01077e80d5a4468562756d77afaa59 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Sat, 8 Jan 2011 17:45:38 -0500
Subject: NFS: Don't use vm_map_ram() in readdir

vm_map_ram() is not available on NOMMU platforms, and causes trouble
on incoherrent architectures such as ARM when we access the page data
through both the direct and the virtual mapping.

The alternative is to use the direct mapping to access page data
for the case when we are not crossing a page boundary, but to copy
the data into a linear scratch buffer when we are accessing data
that spans page boundaries.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
Tested-by: Marc Kleine-Budde <mkl@pengutronix.de>
Cc: stable@kernel.org  [2.6.37]
---
 fs/nfs/dir.c               |  44 ++++++-------
 fs/nfs/nfs2xdr.c           |   6 --
 fs/nfs/nfs3xdr.c           |   6 --
 fs/nfs/nfs4xdr.c           |   6 --
 include/linux/sunrpc/xdr.h |   4 +-
 net/sunrpc/xdr.c           | 155 ++++++++++++++++++++++++++++++++++++---------
 6 files changed, 148 insertions(+), 73 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 996dd8989a91..0108cf4f3403 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -33,7 +33,6 @@
 #include <linux/namei.h>
 #include <linux/mount.h>
 #include <linux/sched.h>
-#include <linux/vmalloc.h>
 #include <linux/kmemleak.h>
 
 #include "delegation.h"
@@ -459,25 +458,26 @@ out:
 /* Perform conversion from xdr to cache array */
 static
 int nfs_readdir_page_filler(nfs_readdir_descriptor_t *desc, struct nfs_entry *entry,
-				void *xdr_page, struct page *page, unsigned int buflen)
+				struct page **xdr_pages, struct page *page, unsigned int buflen)
 {
 	struct xdr_stream stream;
-	struct xdr_buf buf;
-	__be32 *ptr = xdr_page;
+	struct xdr_buf buf = {
+		.pages = xdr_pages,
+		.page_len = buflen,
+		.buflen = buflen,
+		.len = buflen,
+	};
+	struct page *scratch;
 	struct nfs_cache_array *array;
 	unsigned int count = 0;
 	int status;
 
-	buf.head->iov_base = xdr_page;
-	buf.head->iov_len = buflen;
-	buf.tail->iov_len = 0;
-	buf.page_base = 0;
-	buf.page_len = 0;
-	buf.buflen = buf.head->iov_len;
-	buf.len = buf.head->iov_len;
-
-	xdr_init_decode(&stream, &buf, ptr);
+	scratch = alloc_page(GFP_KERNEL);
+	if (scratch == NULL)
+		return -ENOMEM;
 
+	xdr_init_decode(&stream, &buf, NULL);
+	xdr_set_scratch_buffer(&stream, page_address(scratch), PAGE_SIZE);
 
 	do {
 		status = xdr_decode(desc, entry, &stream);
@@ -506,6 +506,8 @@ int nfs_readdir_page_filler(nfs_readdir_descriptor_t *desc, struct nfs_entry *en
 		} else
 			status = PTR_ERR(array);
 	}
+
+	put_page(scratch);
 	return status;
 }
 
@@ -521,7 +523,6 @@ static
 void nfs_readdir_free_large_page(void *ptr, struct page **pages,
 		unsigned int npages)
 {
-	vm_unmap_ram(ptr, npages);
 	nfs_readdir_free_pagearray(pages, npages);
 }
 
@@ -530,9 +531,8 @@ void nfs_readdir_free_large_page(void *ptr, struct page **pages,
  * to nfs_readdir_free_large_page
  */
 static
-void *nfs_readdir_large_page(struct page **pages, unsigned int npages)
+int nfs_readdir_large_page(struct page **pages, unsigned int npages)
 {
-	void *ptr;
 	unsigned int i;
 
 	for (i = 0; i < npages; i++) {
@@ -541,13 +541,11 @@ void *nfs_readdir_large_page(struct page **pages, unsigned int npages)
 			goto out_freepages;
 		pages[i] = page;
 	}
+	return 0;
 
-	ptr = vm_map_ram(pages, npages, 0, PAGE_KERNEL);
-	if (!IS_ERR_OR_NULL(ptr))
-		return ptr;
 out_freepages:
 	nfs_readdir_free_pagearray(pages, i);
-	return NULL;
+	return -ENOMEM;
 }
 
 static
@@ -577,8 +575,8 @@ int nfs_readdir_xdr_to_array(nfs_readdir_descriptor_t *desc, struct page *page,
 	memset(array, 0, sizeof(struct nfs_cache_array));
 	array->eof_index = -1;
 
-	pages_ptr = nfs_readdir_large_page(pages, array_size);
-	if (!pages_ptr)
+	status = nfs_readdir_large_page(pages, array_size);
+	if (status < 0)
 		goto out_release_array;
 	do {
 		unsigned int pglen;
@@ -587,7 +585,7 @@ int nfs_readdir_xdr_to_array(nfs_readdir_descriptor_t *desc, struct page *page,
 		if (status < 0)
 			break;
 		pglen = status;
-		status = nfs_readdir_page_filler(desc, &entry, pages_ptr, page, pglen);
+		status = nfs_readdir_page_filler(desc, &entry, pages, page, pglen);
 		if (status < 0) {
 			if (status == -ENOSPC)
 				status = 0;
diff --git a/fs/nfs/nfs2xdr.c b/fs/nfs/nfs2xdr.c
index 5914a1911c95..b382a1b5e7e4 100644
--- a/fs/nfs/nfs2xdr.c
+++ b/fs/nfs/nfs2xdr.c
@@ -487,12 +487,6 @@ nfs_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry, struct nfs_se
 
 	entry->d_type = DT_UNKNOWN;
 
-	p = xdr_inline_peek(xdr, 8);
-	if (p != NULL)
-		entry->eof = !p[0] && p[1];
-	else
-		entry->eof = 0;
-
 	return p;
 
 out_overflow:
diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c
index f6cc60f06dac..ba91236c6ee7 100644
--- a/fs/nfs/nfs3xdr.c
+++ b/fs/nfs/nfs3xdr.c
@@ -647,12 +647,6 @@ nfs3_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry, struct nfs_s
 			memset((u8*)(entry->fh), 0, sizeof(*entry->fh));
 	}
 
-	p = xdr_inline_peek(xdr, 8);
-	if (p != NULL)
-		entry->eof = !p[0] && p[1];
-	else
-		entry->eof = 0;
-
 	return p;
 
 out_overflow:
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 9f1826b012e6..0662a9821df5 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -6215,12 +6215,6 @@ __be32 *nfs4_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry,
 	if (verify_attr_len(xdr, p, len) < 0)
 		goto out_overflow;
 
-	p = xdr_inline_peek(xdr, 8);
-	if (p != NULL)
-		entry->eof = !p[0] && p[1];
-	else
-		entry->eof = 0;
-
 	return p;
 
 out_overflow:
diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h
index 498ab93a81e4..7783c687c777 100644
--- a/include/linux/sunrpc/xdr.h
+++ b/include/linux/sunrpc/xdr.h
@@ -201,6 +201,8 @@ struct xdr_stream {
 
 	__be32 *end;		/* end of available buffer space */
 	struct kvec *iov;	/* pointer to the current kvec */
+	struct kvec scratch;	/* Scratch buffer */
+	struct page **page_ptr;	/* pointer to the current page */
 };
 
 extern void xdr_init_encode(struct xdr_stream *xdr, struct xdr_buf *buf, __be32 *p);
@@ -208,7 +210,7 @@ extern __be32 *xdr_reserve_space(struct xdr_stream *xdr, size_t nbytes);
 extern void xdr_write_pages(struct xdr_stream *xdr, struct page **pages,
 		unsigned int base, unsigned int len);
 extern void xdr_init_decode(struct xdr_stream *xdr, struct xdr_buf *buf, __be32 *p);
-extern __be32 *xdr_inline_peek(struct xdr_stream *xdr, size_t nbytes);
+extern void xdr_set_scratch_buffer(struct xdr_stream *xdr, void *buf, size_t buflen);
 extern __be32 *xdr_inline_decode(struct xdr_stream *xdr, size_t nbytes);
 extern void xdr_read_pages(struct xdr_stream *xdr, unsigned int len);
 extern void xdr_enter_page(struct xdr_stream *xdr, unsigned int len);
diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c
index cd9e841e7492..679cd674b81d 100644
--- a/net/sunrpc/xdr.c
+++ b/net/sunrpc/xdr.c
@@ -552,6 +552,74 @@ void xdr_write_pages(struct xdr_stream *xdr, struct page **pages, unsigned int b
 }
 EXPORT_SYMBOL_GPL(xdr_write_pages);
 
+static void xdr_set_iov(struct xdr_stream *xdr, struct kvec *iov,
+		__be32 *p, unsigned int len)
+{
+	if (len > iov->iov_len)
+		len = iov->iov_len;
+	if (p == NULL)
+		p = (__be32*)iov->iov_base;
+	xdr->p = p;
+	xdr->end = (__be32*)(iov->iov_base + len);
+	xdr->iov = iov;
+	xdr->page_ptr = NULL;
+}
+
+static int xdr_set_page_base(struct xdr_stream *xdr,
+		unsigned int base, unsigned int len)
+{
+	unsigned int pgnr;
+	unsigned int maxlen;
+	unsigned int pgoff;
+	unsigned int pgend;
+	void *kaddr;
+
+	maxlen = xdr->buf->page_len;
+	if (base >= maxlen)
+		return -EINVAL;
+	maxlen -= base;
+	if (len > maxlen)
+		len = maxlen;
+
+	base += xdr->buf->page_base;
+
+	pgnr = base >> PAGE_SHIFT;
+	xdr->page_ptr = &xdr->buf->pages[pgnr];
+	kaddr = page_address(*xdr->page_ptr);
+
+	pgoff = base & ~PAGE_MASK;
+	xdr->p = (__be32*)(kaddr + pgoff);
+
+	pgend = pgoff + len;
+	if (pgend > PAGE_SIZE)
+		pgend = PAGE_SIZE;
+	xdr->end = (__be32*)(kaddr + pgend);
+	xdr->iov = NULL;
+	return 0;
+}
+
+static void xdr_set_next_page(struct xdr_stream *xdr)
+{
+	unsigned int newbase;
+
+	newbase = (1 + xdr->page_ptr - xdr->buf->pages) << PAGE_SHIFT;
+	newbase -= xdr->buf->page_base;
+
+	if (xdr_set_page_base(xdr, newbase, PAGE_SIZE) < 0)
+		xdr_set_iov(xdr, xdr->buf->tail, NULL, xdr->buf->len);
+}
+
+static bool xdr_set_next_buffer(struct xdr_stream *xdr)
+{
+	if (xdr->page_ptr != NULL)
+		xdr_set_next_page(xdr);
+	else if (xdr->iov == xdr->buf->head) {
+		if (xdr_set_page_base(xdr, 0, PAGE_SIZE) < 0)
+			xdr_set_iov(xdr, xdr->buf->tail, NULL, xdr->buf->len);
+	}
+	return xdr->p != xdr->end;
+}
+
 /**
  * xdr_init_decode - Initialize an xdr_stream for decoding data.
  * @xdr: pointer to xdr_stream struct
@@ -560,41 +628,67 @@ EXPORT_SYMBOL_GPL(xdr_write_pages);
  */
 void xdr_init_decode(struct xdr_stream *xdr, struct xdr_buf *buf, __be32 *p)
 {
-	struct kvec *iov = buf->head;
-	unsigned int len = iov->iov_len;
-
-	if (len > buf->len)
-		len = buf->len;
 	xdr->buf = buf;
-	xdr->iov = iov;
-	xdr->p = p;
-	xdr->end = (__be32 *)((char *)iov->iov_base + len);
+	xdr->scratch.iov_base = NULL;
+	xdr->scratch.iov_len = 0;
+	if (buf->head[0].iov_len != 0)
+		xdr_set_iov(xdr, buf->head, p, buf->len);
+	else if (buf->page_len != 0)
+		xdr_set_page_base(xdr, 0, buf->len);
 }
 EXPORT_SYMBOL_GPL(xdr_init_decode);
 
-/**
- * xdr_inline_peek - Allow read-ahead in the XDR data stream
- * @xdr: pointer to xdr_stream struct
- * @nbytes: number of bytes of data to decode
- *
- * Check if the input buffer is long enough to enable us to decode
- * 'nbytes' more bytes of data starting at the current position.
- * If so return the current pointer without updating the current
- * pointer position.
- */
-__be32 * xdr_inline_peek(struct xdr_stream *xdr, size_t nbytes)
+static __be32 * __xdr_inline_decode(struct xdr_stream *xdr, size_t nbytes)
 {
 	__be32 *p = xdr->p;
 	__be32 *q = p + XDR_QUADLEN(nbytes);
 
 	if (unlikely(q > xdr->end || q < p))
 		return NULL;
+	xdr->p = q;
 	return p;
 }
-EXPORT_SYMBOL_GPL(xdr_inline_peek);
 
 /**
- * xdr_inline_decode - Retrieve non-page XDR data to decode
+ * xdr_set_scratch_buffer - Attach a scratch buffer for decoding data.
+ * @xdr: pointer to xdr_stream struct
+ * @buf: pointer to an empty buffer
+ * @buflen: size of 'buf'
+ *
+ * The scratch buffer is used when decoding from an array of pages.
+ * If an xdr_inline_decode() call spans across page boundaries, then
+ * we copy the data into the scratch buffer in order to allow linear
+ * access.
+ */
+void xdr_set_scratch_buffer(struct xdr_stream *xdr, void *buf, size_t buflen)
+{
+	xdr->scratch.iov_base = buf;
+	xdr->scratch.iov_len = buflen;
+}
+EXPORT_SYMBOL_GPL(xdr_set_scratch_buffer);
+
+static __be32 *xdr_copy_to_scratch(struct xdr_stream *xdr, size_t nbytes)
+{
+	__be32 *p;
+	void *cpdest = xdr->scratch.iov_base;
+	size_t cplen = (char *)xdr->end - (char *)xdr->p;
+
+	if (nbytes > xdr->scratch.iov_len)
+		return NULL;
+	memcpy(cpdest, xdr->p, cplen);
+	cpdest += cplen;
+	nbytes -= cplen;
+	if (!xdr_set_next_buffer(xdr))
+		return NULL;
+	p = __xdr_inline_decode(xdr, nbytes);
+	if (p == NULL)
+		return NULL;
+	memcpy(cpdest, p, nbytes);
+	return xdr->scratch.iov_base;
+}
+
+/**
+ * xdr_inline_decode - Retrieve XDR data to decode
  * @xdr: pointer to xdr_stream struct
  * @nbytes: number of bytes of data to decode
  *
@@ -605,13 +699,16 @@ EXPORT_SYMBOL_GPL(xdr_inline_peek);
  */
 __be32 * xdr_inline_decode(struct xdr_stream *xdr, size_t nbytes)
 {
-	__be32 *p = xdr->p;
-	__be32 *q = p + XDR_QUADLEN(nbytes);
+	__be32 *p;
 
-	if (unlikely(q > xdr->end || q < p))
+	if (nbytes == 0)
+		return xdr->p;
+	if (xdr->p == xdr->end && !xdr_set_next_buffer(xdr))
 		return NULL;
-	xdr->p = q;
-	return p;
+	p = __xdr_inline_decode(xdr, nbytes);
+	if (p != NULL)
+		return p;
+	return xdr_copy_to_scratch(xdr, nbytes);
 }
 EXPORT_SYMBOL_GPL(xdr_inline_decode);
 
@@ -671,16 +768,12 @@ EXPORT_SYMBOL_GPL(xdr_read_pages);
  */
 void xdr_enter_page(struct xdr_stream *xdr, unsigned int len)
 {
-	char * kaddr = page_address(xdr->buf->pages[0]);
 	xdr_read_pages(xdr, len);
 	/*
 	 * Position current pointer at beginning of tail, and
 	 * set remaining message length.
 	 */
-	if (len > PAGE_CACHE_SIZE - xdr->buf->page_base)
-		len = PAGE_CACHE_SIZE - xdr->buf->page_base;
-	xdr->p = (__be32 *)(kaddr + xdr->buf->page_base);
-	xdr->end = (__be32 *)((char *)xdr->p + len);
+	xdr_set_page_base(xdr, 0, len);
 }
 EXPORT_SYMBOL_GPL(xdr_enter_page);
 
-- 
cgit v1.2.3-71-gd317


From 92ed1a76ca31774eb27de14b2215841367c68056 Mon Sep 17 00:00:00 2001
From: Peter Korsgaard <peter.korsgaard@barco.com>
Date: Mon, 10 Jan 2011 22:11:23 +0100
Subject: i2c: Add generic I2C multiplexer using GPIO API

Add an i2c mux driver providing access to i2c bus segments using a
hardware MUX sitting on a master bus and controlled through gpio pins.

E.G. something like:

  ----------              ----------  Bus segment 1   - - - - -
 |          | SCL/SDA    |          |-------------- |           |
 |          |------------|          |
 |          |            |          | Bus segment 2 |           |
 |  Linux   | GPIO 1..N  |   MUX    |---------------   Devices
 |          |------------|          |               |           |
 |          |            |          | Bus segment M
 |          |            |          |---------------|           |
  ----------              ----------                  - - - - -

SCL/SDA of the master I2C bus is multiplexed to bus segment 1..M
according to the settings of the GPIO pins 1..N.

Signed-off-by: Peter Korsgaard <peter.korsgaard@barco.com>
Signed-off-by: Jean Delvare <khali@linux-fr.org>
---
 Documentation/i2c/muxes/gpio-i2cmux |  65 +++++++++++++
 MAINTAINERS                         |   8 ++
 drivers/i2c/muxes/Kconfig           |  12 +++
 drivers/i2c/muxes/Makefile          |   1 +
 drivers/i2c/muxes/gpio-i2cmux.c     | 184 ++++++++++++++++++++++++++++++++++++
 include/linux/gpio-i2cmux.h         |  38 ++++++++
 6 files changed, 308 insertions(+)
 create mode 100644 Documentation/i2c/muxes/gpio-i2cmux
 create mode 100644 drivers/i2c/muxes/gpio-i2cmux.c
 create mode 100644 include/linux/gpio-i2cmux.h

(limited to 'include/linux')

diff --git a/Documentation/i2c/muxes/gpio-i2cmux b/Documentation/i2c/muxes/gpio-i2cmux
new file mode 100644
index 000000000000..811cd78d4cdc
--- /dev/null
+++ b/Documentation/i2c/muxes/gpio-i2cmux
@@ -0,0 +1,65 @@
+Kernel driver gpio-i2cmux
+
+Author: Peter Korsgaard <peter.korsgaard@barco.com>
+
+Description
+-----------
+
+gpio-i2cmux is an i2c mux driver providing access to I2C bus segments
+from a master I2C bus and a hardware MUX controlled through GPIO pins.
+
+E.G.:
+
+  ----------              ----------  Bus segment 1   - - - - -
+ |          | SCL/SDA    |          |-------------- |           |
+ |          |------------|          |
+ |          |            |          | Bus segment 2 |           |
+ |  Linux   | GPIO 1..N  |   MUX    |---------------   Devices
+ |          |------------|          |               |           |
+ |          |            |          | Bus segment M
+ |          |            |          |---------------|           |
+  ----------              ----------                  - - - - -
+
+SCL/SDA of the master I2C bus is multiplexed to bus segment 1..M
+according to the settings of the GPIO pins 1..N.
+
+Usage
+-----
+
+gpio-i2cmux uses the platform bus, so you need to provide a struct
+platform_device with the platform_data pointing to a struct
+gpio_i2cmux_platform_data with the I2C adapter number of the master
+bus, the number of bus segments to create and the GPIO pins used
+to control it. See include/linux/gpio-i2cmux.h for details.
+
+E.G. something like this for a MUX providing 4 bus segments
+controlled through 3 GPIO pins:
+
+#include <linux/gpio-i2cmux.h>
+#include <linux/platform_device.h>
+
+static const unsigned myboard_gpiomux_gpios[] = {
+	AT91_PIN_PC26, AT91_PIN_PC25, AT91_PIN_PC24
+};
+
+static const unsigned myboard_gpiomux_values[] = {
+	0, 1, 2, 3
+};
+
+static struct gpio_i2cmux_platform_data myboard_i2cmux_data = {
+	.parent		= 1,
+	.base_nr	= 2, /* optional */
+	.values		= myboard_gpiomux_values,
+	.n_values	= ARRAY_SIZE(myboard_gpiomux_values),
+	.gpios		= myboard_gpiomux_gpios,
+	.n_gpios	= ARRAY_SIZE(myboard_gpiomux_gpios),
+	.idle		= 4, /* optional */
+};
+
+static struct platform_device myboard_i2cmux = {
+	.name		= "gpio-i2cmux",
+	.id		= 0,
+	.dev		= {
+		.platform_data	= &myboard_i2cmux_data,
+	},
+};
diff --git a/MAINTAINERS b/MAINTAINERS
index 23d04363a195..23a4765ab8d2 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -2608,6 +2608,14 @@ S:	Supported
 F:	drivers/i2c/busses/i2c-gpio.c
 F:	include/linux/i2c-gpio.h
 
+GENERIC GPIO I2C MULTIPLEXER DRIVER
+M:	Peter Korsgaard <peter.korsgaard@barco.com>
+L:	linux-i2c@vger.kernel.org
+S:	Supported
+F:	drivers/i2c/muxes/gpio-i2cmux.c
+F:	include/linux/gpio-i2cmux.h
+F:	Documentation/i2c/muxes/gpio-i2cmux
+
 GENERIC HDLC (WAN) DRIVERS
 M:	Krzysztof Halasa <khc@pm.waw.pl>
 W:	http://www.kernel.org/pub/linux/utils/net/hdlc/
diff --git a/drivers/i2c/muxes/Kconfig b/drivers/i2c/muxes/Kconfig
index 4d91d80bfd23..90b7a0163899 100644
--- a/drivers/i2c/muxes/Kconfig
+++ b/drivers/i2c/muxes/Kconfig
@@ -5,6 +5,18 @@
 menu "Multiplexer I2C Chip support"
 	depends on I2C_MUX
 
+config I2C_MUX_GPIO
+	tristate "GPIO-based I2C multiplexer"
+	depends on GENERIC_GPIO
+	help
+	  If you say yes to this option, support will be included for a
+	  GPIO based I2C multiplexer. This driver provides access to
+	  I2C busses connected through a MUX, which is controlled
+	  through GPIO pins.
+
+	  This driver can also be built as a module.  If so, the module
+	  will be called gpio-i2cmux.
+
 config I2C_MUX_PCA9541
 	tristate "NXP PCA9541 I2C Master Selector"
 	depends on EXPERIMENTAL
diff --git a/drivers/i2c/muxes/Makefile b/drivers/i2c/muxes/Makefile
index d743806d9b42..4640436ea61f 100644
--- a/drivers/i2c/muxes/Makefile
+++ b/drivers/i2c/muxes/Makefile
@@ -1,6 +1,7 @@
 #
 # Makefile for multiplexer I2C chip drivers.
 
+obj-$(CONFIG_I2C_MUX_GPIO)	+= gpio-i2cmux.o
 obj-$(CONFIG_I2C_MUX_PCA9541)	+= pca9541.o
 obj-$(CONFIG_I2C_MUX_PCA954x)	+= pca954x.o
 
diff --git a/drivers/i2c/muxes/gpio-i2cmux.c b/drivers/i2c/muxes/gpio-i2cmux.c
new file mode 100644
index 000000000000..7b6ce624cd6e
--- /dev/null
+++ b/drivers/i2c/muxes/gpio-i2cmux.c
@@ -0,0 +1,184 @@
+/*
+ * I2C multiplexer using GPIO API
+ *
+ * Peter Korsgaard <peter.korsgaard@barco.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/i2c.h>
+#include <linux/i2c-mux.h>
+#include <linux/gpio-i2cmux.h>
+#include <linux/platform_device.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/gpio.h>
+
+struct gpiomux {
+	struct i2c_adapter *parent;
+	struct i2c_adapter **adap; /* child busses */
+	struct gpio_i2cmux_platform_data data;
+};
+
+static void gpiomux_set(const struct gpiomux *mux, unsigned val)
+{
+	int i;
+
+	for (i = 0; i < mux->data.n_gpios; i++)
+		gpio_set_value(mux->data.gpios[i], val & (1 << i));
+}
+
+static int gpiomux_select(struct i2c_adapter *adap, void *data, u32 chan)
+{
+	struct gpiomux *mux = data;
+
+	gpiomux_set(mux, mux->data.values[chan]);
+
+	return 0;
+}
+
+static int gpiomux_deselect(struct i2c_adapter *adap, void *data, u32 chan)
+{
+	struct gpiomux *mux = data;
+
+	gpiomux_set(mux, mux->data.idle);
+
+	return 0;
+}
+
+static int __devinit gpiomux_probe(struct platform_device *pdev)
+{
+	struct gpiomux *mux;
+	struct gpio_i2cmux_platform_data *pdata;
+	struct i2c_adapter *parent;
+	int (*deselect) (struct i2c_adapter *, void *, u32);
+	unsigned initial_state;
+	int i, ret;
+
+	pdata = pdev->dev.platform_data;
+	if (!pdata) {
+		dev_err(&pdev->dev, "Missing platform data\n");
+		return -ENODEV;
+	}
+
+	parent = i2c_get_adapter(pdata->parent);
+	if (!parent) {
+		dev_err(&pdev->dev, "Parent adapter (%d) not found\n",
+			pdata->parent);
+		return -ENODEV;
+	}
+
+	mux = kzalloc(sizeof(*mux), GFP_KERNEL);
+	if (!mux) {
+		ret = -ENOMEM;
+		goto alloc_failed;
+	}
+
+	mux->parent = parent;
+	mux->data = *pdata;
+	mux->adap = kzalloc(sizeof(struct i2c_adapter *) * pdata->n_values,
+			    GFP_KERNEL);
+	if (!mux->adap) {
+		ret = -ENOMEM;
+		goto alloc_failed2;
+	}
+
+	if (pdata->idle != GPIO_I2CMUX_NO_IDLE) {
+		initial_state = pdata->idle;
+		deselect = gpiomux_deselect;
+	} else {
+		initial_state = pdata->values[0];
+		deselect = NULL;
+	}
+
+	for (i = 0; i < pdata->n_gpios; i++) {
+		ret = gpio_request(pdata->gpios[i], "gpio-i2cmux");
+		if (ret)
+			goto err_request_gpio;
+		gpio_direction_output(pdata->gpios[i],
+				      initial_state & (1 << i));
+	}
+
+	for (i = 0; i < pdata->n_values; i++) {
+		u32 nr = pdata->base_nr ? (pdata->base_nr + i) : 0;
+
+		mux->adap[i] = i2c_add_mux_adapter(parent, mux, nr, i,
+						   gpiomux_select, deselect);
+		if (!mux->adap[i]) {
+			ret = -ENODEV;
+			dev_err(&pdev->dev, "Failed to add adapter %d\n", i);
+			goto add_adapter_failed;
+		}
+	}
+
+	dev_info(&pdev->dev, "%d port mux on %s adapter\n",
+		 pdata->n_values, parent->name);
+
+	platform_set_drvdata(pdev, mux);
+
+	return 0;
+
+add_adapter_failed:
+	for (; i > 0; i--)
+		i2c_del_mux_adapter(mux->adap[i - 1]);
+	i = pdata->n_gpios;
+err_request_gpio:
+	for (; i > 0; i--)
+		gpio_free(pdata->gpios[i - 1]);
+	kfree(mux->adap);
+alloc_failed2:
+	kfree(mux);
+alloc_failed:
+	i2c_put_adapter(parent);
+
+	return ret;
+}
+
+static int __devexit gpiomux_remove(struct platform_device *pdev)
+{
+	struct gpiomux *mux = platform_get_drvdata(pdev);
+	int i;
+
+	for (i = 0; i < mux->data.n_values; i++)
+		i2c_del_mux_adapter(mux->adap[i]);
+
+	for (i = 0; i < mux->data.n_gpios; i++)
+		gpio_free(mux->data.gpios[i]);
+
+	platform_set_drvdata(pdev, NULL);
+	i2c_put_adapter(mux->parent);
+	kfree(mux->adap);
+	kfree(mux);
+
+	return 0;
+}
+
+static struct platform_driver gpiomux_driver = {
+	.probe	= gpiomux_probe,
+	.remove	= __devexit_p(gpiomux_remove),
+	.driver	= {
+		.owner	= THIS_MODULE,
+		.name	= "gpio-i2cmux",
+	},
+};
+
+static int __init gpiomux_init(void)
+{
+	return platform_driver_register(&gpiomux_driver);
+}
+
+static void __exit gpiomux_exit(void)
+{
+	platform_driver_unregister(&gpiomux_driver);
+}
+
+module_init(gpiomux_init);
+module_exit(gpiomux_exit);
+
+MODULE_DESCRIPTION("GPIO-based I2C multiplexer driver");
+MODULE_AUTHOR("Peter Korsgaard <peter.korsgaard@barco.com>");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("platform:gpio-i2cmux");
diff --git a/include/linux/gpio-i2cmux.h b/include/linux/gpio-i2cmux.h
new file mode 100644
index 000000000000..4a333bb0bd0d
--- /dev/null
+++ b/include/linux/gpio-i2cmux.h
@@ -0,0 +1,38 @@
+/*
+ * gpio-i2cmux interface to platform code
+ *
+ * Peter Korsgaard <peter.korsgaard@barco.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef _LINUX_GPIO_I2CMUX_H
+#define _LINUX_GPIO_I2CMUX_H
+
+/* MUX has no specific idle mode */
+#define GPIO_I2CMUX_NO_IDLE	((unsigned)-1)
+
+/**
+ * struct gpio_i2cmux_platform_data - Platform-dependent data for gpio-i2cmux
+ * @parent: Parent I2C bus adapter number
+ * @base_nr: Base I2C bus number to number adapters from or zero for dynamic
+ * @values: Array of bitmasks of GPIO settings (low/high) for each
+ *	position
+ * @n_values: Number of multiplexer positions (busses to instantiate)
+ * @gpios: Array of GPIO numbers used to control MUX
+ * @n_gpios: Number of GPIOs used to control MUX
+ * @idle: Bitmask to write to MUX when idle or GPIO_I2CMUX_NO_IDLE if not used
+ */
+struct gpio_i2cmux_platform_data {
+	int parent;
+	int base_nr;
+	const unsigned *values;
+	int n_values;
+	const unsigned *gpios;
+	int n_gpios;
+	unsigned idle;
+};
+
+#endif /* _LINUX_GPIO_I2CMUX_H */
-- 
cgit v1.2.3-71-gd317


From 0cc43a1806f078f7fd414850d8f1f1761696e4af Mon Sep 17 00:00:00 2001
From: Jean Delvare <khali@linux-fr.org>
Date: Mon, 10 Jan 2011 22:11:23 +0100
Subject: i2c: Constify i2c_client where possible

Helper functions for I2C and SMBus transactions don't modify the
i2c_client that is passed to them, so it can be marked const.

Signed-off-by: Jean Delvare <khali@linux-fr.org>
---
 drivers/i2c/i2c-core.c   | 29 ++++++++++++++++-------------
 drivers/rtc/rtc-ds1307.c | 12 ++++++------
 include/linux/i2c.h      | 27 +++++++++++++++------------
 3 files changed, 37 insertions(+), 31 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/i2c/i2c-core.c b/drivers/i2c/i2c-core.c
index 6b4cc567645b..c7db6980e3a3 100644
--- a/drivers/i2c/i2c-core.c
+++ b/drivers/i2c/i2c-core.c
@@ -1362,7 +1362,7 @@ EXPORT_SYMBOL(i2c_transfer);
  *
  * Returns negative errno, or else the number of bytes written.
  */
-int i2c_master_send(struct i2c_client *client, const char *buf, int count)
+int i2c_master_send(const struct i2c_client *client, const char *buf, int count)
 {
 	int ret;
 	struct i2c_adapter *adap = client->adapter;
@@ -1389,7 +1389,7 @@ EXPORT_SYMBOL(i2c_master_send);
  *
  * Returns negative errno, or else the number of bytes read.
  */
-int i2c_master_recv(struct i2c_client *client, char *buf, int count)
+int i2c_master_recv(const struct i2c_client *client, char *buf, int count)
 {
 	struct i2c_adapter *adap = client->adapter;
 	struct i2c_msg msg;
@@ -1679,7 +1679,7 @@ static int i2c_smbus_check_pec(u8 cpec, struct i2c_msg *msg)
  * This executes the SMBus "receive byte" protocol, returning negative errno
  * else the byte received from the device.
  */
-s32 i2c_smbus_read_byte(struct i2c_client *client)
+s32 i2c_smbus_read_byte(const struct i2c_client *client)
 {
 	union i2c_smbus_data data;
 	int status;
@@ -1699,7 +1699,7 @@ EXPORT_SYMBOL(i2c_smbus_read_byte);
  * This executes the SMBus "send byte" protocol, returning negative errno
  * else zero on success.
  */
-s32 i2c_smbus_write_byte(struct i2c_client *client, u8 value)
+s32 i2c_smbus_write_byte(const struct i2c_client *client, u8 value)
 {
 	return i2c_smbus_xfer(client->adapter, client->addr, client->flags,
 	                      I2C_SMBUS_WRITE, value, I2C_SMBUS_BYTE, NULL);
@@ -1714,7 +1714,7 @@ EXPORT_SYMBOL(i2c_smbus_write_byte);
  * This executes the SMBus "read byte" protocol, returning negative errno
  * else a data byte received from the device.
  */
-s32 i2c_smbus_read_byte_data(struct i2c_client *client, u8 command)
+s32 i2c_smbus_read_byte_data(const struct i2c_client *client, u8 command)
 {
 	union i2c_smbus_data data;
 	int status;
@@ -1735,7 +1735,8 @@ EXPORT_SYMBOL(i2c_smbus_read_byte_data);
  * This executes the SMBus "write byte" protocol, returning negative errno
  * else zero on success.
  */
-s32 i2c_smbus_write_byte_data(struct i2c_client *client, u8 command, u8 value)
+s32 i2c_smbus_write_byte_data(const struct i2c_client *client, u8 command,
+			      u8 value)
 {
 	union i2c_smbus_data data;
 	data.byte = value;
@@ -1753,7 +1754,7 @@ EXPORT_SYMBOL(i2c_smbus_write_byte_data);
  * This executes the SMBus "read word" protocol, returning negative errno
  * else a 16-bit unsigned "word" received from the device.
  */
-s32 i2c_smbus_read_word_data(struct i2c_client *client, u8 command)
+s32 i2c_smbus_read_word_data(const struct i2c_client *client, u8 command)
 {
 	union i2c_smbus_data data;
 	int status;
@@ -1774,7 +1775,8 @@ EXPORT_SYMBOL(i2c_smbus_read_word_data);
  * This executes the SMBus "write word" protocol, returning negative errno
  * else zero on success.
  */
-s32 i2c_smbus_write_word_data(struct i2c_client *client, u8 command, u16 value)
+s32 i2c_smbus_write_word_data(const struct i2c_client *client, u8 command,
+			      u16 value)
 {
 	union i2c_smbus_data data;
 	data.word = value;
@@ -1793,7 +1795,8 @@ EXPORT_SYMBOL(i2c_smbus_write_word_data);
  * This executes the SMBus "process call" protocol, returning negative errno
  * else a 16-bit unsigned "word" received from the device.
  */
-s32 i2c_smbus_process_call(struct i2c_client *client, u8 command, u16 value)
+s32 i2c_smbus_process_call(const struct i2c_client *client, u8 command,
+			   u16 value)
 {
 	union i2c_smbus_data data;
 	int status;
@@ -1821,7 +1824,7 @@ EXPORT_SYMBOL(i2c_smbus_process_call);
  * support this; its emulation through I2C messaging relies on a specific
  * mechanism (I2C_M_RECV_LEN) which may not be implemented.
  */
-s32 i2c_smbus_read_block_data(struct i2c_client *client, u8 command,
+s32 i2c_smbus_read_block_data(const struct i2c_client *client, u8 command,
 			      u8 *values)
 {
 	union i2c_smbus_data data;
@@ -1848,7 +1851,7 @@ EXPORT_SYMBOL(i2c_smbus_read_block_data);
  * This executes the SMBus "block write" protocol, returning negative errno
  * else zero on success.
  */
-s32 i2c_smbus_write_block_data(struct i2c_client *client, u8 command,
+s32 i2c_smbus_write_block_data(const struct i2c_client *client, u8 command,
 			       u8 length, const u8 *values)
 {
 	union i2c_smbus_data data;
@@ -1864,7 +1867,7 @@ s32 i2c_smbus_write_block_data(struct i2c_client *client, u8 command,
 EXPORT_SYMBOL(i2c_smbus_write_block_data);
 
 /* Returns the number of read bytes */
-s32 i2c_smbus_read_i2c_block_data(struct i2c_client *client, u8 command,
+s32 i2c_smbus_read_i2c_block_data(const struct i2c_client *client, u8 command,
 				  u8 length, u8 *values)
 {
 	union i2c_smbus_data data;
@@ -1884,7 +1887,7 @@ s32 i2c_smbus_read_i2c_block_data(struct i2c_client *client, u8 command,
 }
 EXPORT_SYMBOL(i2c_smbus_read_i2c_block_data);
 
-s32 i2c_smbus_write_i2c_block_data(struct i2c_client *client, u8 command,
+s32 i2c_smbus_write_i2c_block_data(const struct i2c_client *client, u8 command,
 				   u8 length, const u8 *values)
 {
 	union i2c_smbus_data data;
diff --git a/drivers/rtc/rtc-ds1307.c b/drivers/rtc/rtc-ds1307.c
index d827ce570a8c..0d559b6416dd 100644
--- a/drivers/rtc/rtc-ds1307.c
+++ b/drivers/rtc/rtc-ds1307.c
@@ -106,9 +106,9 @@ struct ds1307 {
 	struct i2c_client	*client;
 	struct rtc_device	*rtc;
 	struct work_struct	work;
-	s32 (*read_block_data)(struct i2c_client *client, u8 command,
+	s32 (*read_block_data)(const struct i2c_client *client, u8 command,
 			       u8 length, u8 *values);
-	s32 (*write_block_data)(struct i2c_client *client, u8 command,
+	s32 (*write_block_data)(const struct i2c_client *client, u8 command,
 				u8 length, const u8 *values);
 };
 
@@ -158,8 +158,8 @@ MODULE_DEVICE_TABLE(i2c, ds1307_id);
 
 #define BLOCK_DATA_MAX_TRIES 10
 
-static s32 ds1307_read_block_data_once(struct i2c_client *client, u8 command,
-				  u8 length, u8 *values)
+static s32 ds1307_read_block_data_once(const struct i2c_client *client,
+				       u8 command, u8 length, u8 *values)
 {
 	s32 i, data;
 
@@ -172,7 +172,7 @@ static s32 ds1307_read_block_data_once(struct i2c_client *client, u8 command,
 	return i;
 }
 
-static s32 ds1307_read_block_data(struct i2c_client *client, u8 command,
+static s32 ds1307_read_block_data(const struct i2c_client *client, u8 command,
 				  u8 length, u8 *values)
 {
 	u8 oldvalues[I2C_SMBUS_BLOCK_MAX];
@@ -198,7 +198,7 @@ static s32 ds1307_read_block_data(struct i2c_client *client, u8 command,
 	return length;
 }
 
-static s32 ds1307_write_block_data(struct i2c_client *client, u8 command,
+static s32 ds1307_write_block_data(const struct i2c_client *client, u8 command,
 				   u8 length, const u8 *values)
 {
 	u8 currvalues[I2C_SMBUS_BLOCK_MAX];
diff --git a/include/linux/i2c.h b/include/linux/i2c.h
index 56cfe23ffb39..903576df88dc 100644
--- a/include/linux/i2c.h
+++ b/include/linux/i2c.h
@@ -57,9 +57,10 @@ struct i2c_board_info;
  * transmit an arbitrary number of messages without interruption.
  * @count must be be less than 64k since msg.len is u16.
  */
-extern int i2c_master_send(struct i2c_client *client, const char *buf,
+extern int i2c_master_send(const struct i2c_client *client, const char *buf,
+			   int count);
+extern int i2c_master_recv(const struct i2c_client *client, char *buf,
 			   int count);
-extern int i2c_master_recv(struct i2c_client *client, char *buf, int count);
 
 /* Transfer num messages.
  */
@@ -78,23 +79,25 @@ extern s32 i2c_smbus_xfer(struct i2c_adapter *adapter, u16 addr,
 /* Now follow the 'nice' access routines. These also document the calling
    conventions of i2c_smbus_xfer. */
 
-extern s32 i2c_smbus_read_byte(struct i2c_client *client);
-extern s32 i2c_smbus_write_byte(struct i2c_client *client, u8 value);
-extern s32 i2c_smbus_read_byte_data(struct i2c_client *client, u8 command);
-extern s32 i2c_smbus_write_byte_data(struct i2c_client *client,
+extern s32 i2c_smbus_read_byte(const struct i2c_client *client);
+extern s32 i2c_smbus_write_byte(const struct i2c_client *client, u8 value);
+extern s32 i2c_smbus_read_byte_data(const struct i2c_client *client,
+				    u8 command);
+extern s32 i2c_smbus_write_byte_data(const struct i2c_client *client,
 				     u8 command, u8 value);
-extern s32 i2c_smbus_read_word_data(struct i2c_client *client, u8 command);
-extern s32 i2c_smbus_write_word_data(struct i2c_client *client,
+extern s32 i2c_smbus_read_word_data(const struct i2c_client *client,
+				    u8 command);
+extern s32 i2c_smbus_write_word_data(const struct i2c_client *client,
 				     u8 command, u16 value);
 /* Returns the number of read bytes */
-extern s32 i2c_smbus_read_block_data(struct i2c_client *client,
+extern s32 i2c_smbus_read_block_data(const struct i2c_client *client,
 				     u8 command, u8 *values);
-extern s32 i2c_smbus_write_block_data(struct i2c_client *client,
+extern s32 i2c_smbus_write_block_data(const struct i2c_client *client,
 				      u8 command, u8 length, const u8 *values);
 /* Returns the number of read bytes */
-extern s32 i2c_smbus_read_i2c_block_data(struct i2c_client *client,
+extern s32 i2c_smbus_read_i2c_block_data(const struct i2c_client *client,
 					 u8 command, u8 length, u8 *values);
-extern s32 i2c_smbus_write_i2c_block_data(struct i2c_client *client,
+extern s32 i2c_smbus_write_i2c_block_data(const struct i2c_client *client,
 					  u8 command, u8 length,
 					  const u8 *values);
 #endif /* I2C */
-- 
cgit v1.2.3-71-gd317


From c599bd6b9ac8926b03e6bf332a8c14ae2ffb43a3 Mon Sep 17 00:00:00 2001
From: Mike Frysinger <vapier@gentoo.org>
Date: Mon, 10 Jan 2011 02:54:32 +0000
Subject: netdev: bfin_mac: let boards set vlan masks

Signed-off-by: Mike Frysinger <vapier@gentoo.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/bfin_mac.c   | 7 +++++++
 drivers/net/bfin_mac.h   | 3 +++
 include/linux/bfin_mac.h | 1 +
 3 files changed, 11 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/net/bfin_mac.c b/drivers/net/bfin_mac.c
index e712be4a6433..0b9fc5173aef 100644
--- a/drivers/net/bfin_mac.c
+++ b/drivers/net/bfin_mac.c
@@ -588,6 +588,10 @@ static void setup_system_regs(struct net_device *dev)
 
 	bfin_write_EMAC_MMC_CTL(RSTC | CROLL);
 
+	/* Set vlan regs to let 1522 bytes long packets pass through */
+	bfin_write_EMAC_VLAN1(lp->vlan1_mask);
+	bfin_write_EMAC_VLAN2(lp->vlan2_mask);
+
 	/* Initialize the TX DMA channel registers */
 	bfin_write_DMA2_X_COUNT(0);
 	bfin_write_DMA2_X_MODIFY(4);
@@ -1520,6 +1524,9 @@ static int __devinit bfin_mac_probe(struct platform_device *pdev)
 		goto out_err_mii_probe;
 	}
 
+	lp->vlan1_mask = ETH_P_8021Q | mii_bus_data->vlan1_mask;
+	lp->vlan2_mask = ETH_P_8021Q | mii_bus_data->vlan2_mask;
+
 	/* Fill in the fields of the device structure with ethernet values. */
 	ether_setup(ndev);
 
diff --git a/drivers/net/bfin_mac.h b/drivers/net/bfin_mac.h
index 692187da99f0..f8559ac9a403 100644
--- a/drivers/net/bfin_mac.h
+++ b/drivers/net/bfin_mac.h
@@ -82,6 +82,9 @@ struct bfin_mac_local {
 	struct timer_list tx_reclaim_timer;
 	struct net_device *ndev;
 
+	/* Data for EMAC_VLAN1 regs */
+	u16 vlan1_mask, vlan2_mask;
+
 	/* MII and PHY stuffs */
 	int old_link;          /* used by bf537_adjust_link */
 	int old_speed;
diff --git a/include/linux/bfin_mac.h b/include/linux/bfin_mac.h
index 904dec7d03a1..a69554ef8476 100644
--- a/include/linux/bfin_mac.h
+++ b/include/linux/bfin_mac.h
@@ -24,6 +24,7 @@ struct bfin_mii_bus_platform_data {
 	const unsigned short *mac_peripherals;
 	int phy_mode;
 	unsigned int phy_mask;
+	unsigned short vlan1_mask, vlan2_mask;
 };
 
 #endif
-- 
cgit v1.2.3-71-gd317


From 36909ea43814cba34f7c921e99cba33d770a54e1 Mon Sep 17 00:00:00 2001
From: Tom Herbert <therbert@google.com>
Date: Sun, 9 Jan 2011 19:36:31 +0000
Subject: net: Add alloc_netdev_mqs function

Added alloc_netdev_mqs function which allows the number of transmit and
receive queues to be specified independenty.  alloc_netdev_mq was
changed to a macro to call the new function.  Also added
alloc_etherdev_mqs with same purpose.

Signed-off-by: Tom Herbert <therbert@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/etherdevice.h |  4 +++-
 include/linux/netdevice.h   | 10 +++++++---
 net/core/dev.c              | 32 +++++++++++++++++++++-----------
 net/ethernet/eth.c          | 12 +++++++-----
 4 files changed, 38 insertions(+), 20 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/etherdevice.h b/include/linux/etherdevice.h
index f16a01081e15..bec8b82889bf 100644
--- a/include/linux/etherdevice.h
+++ b/include/linux/etherdevice.h
@@ -48,8 +48,10 @@ extern int eth_validate_addr(struct net_device *dev);
 
 
-extern struct net_device *alloc_etherdev_mq(int sizeof_priv, unsigned int queue_count);
+extern struct net_device *alloc_etherdev_mqs(int sizeof_priv, unsigned int txqs,
+					    unsigned int rxqs);
 #define alloc_etherdev(sizeof_priv) alloc_etherdev_mq(sizeof_priv, 1)
+#define alloc_etherdev_mq(sizeof_priv, count) alloc_etherdev_mqs(sizeof_priv, count, count)
 
 /**
  * is_zero_ether_addr - Determine if give Ethernet address is all zeros.
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index de2bfe6da359..be4957cf6511 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -2191,11 +2191,15 @@ static inline void netif_addr_unlock_bh(struct net_device *dev)
 extern void		ether_setup(struct net_device *dev);
 
 /* Support for loadable net-drivers */
-extern struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
+extern struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
 				       void (*setup)(struct net_device *),
-				       unsigned int queue_count);
+				       unsigned int txqs, unsigned int rxqs);
 #define alloc_netdev(sizeof_priv, name, setup) \
-	alloc_netdev_mq(sizeof_priv, name, setup, 1)
+	alloc_netdev_mqs(sizeof_priv, name, setup, 1, 1)
+
+#define alloc_netdev_mq(sizeof_priv, name, setup, count) \
+	alloc_netdev_mqs(sizeof_priv, name, setup, count, count)
+
 extern int		register_netdev(struct net_device *dev);
 extern void		unregister_netdev(struct net_device *dev);
 
diff --git a/net/core/dev.c b/net/core/dev.c
index 3fe443be4b15..3295b94884ab 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -5617,18 +5617,20 @@ struct netdev_queue *dev_ingress_queue_create(struct net_device *dev)
 }
 
 /**
- *	alloc_netdev_mq - allocate network device
+ *	alloc_netdev_mqs - allocate network device
  *	@sizeof_priv:	size of private data to allocate space for
  *	@name:		device name format string
  *	@setup:		callback to initialize device
- *	@queue_count:	the number of subqueues to allocate
+ *	@txqs:		the number of TX subqueues to allocate
+ *	@rxqs:		the number of RX subqueues to allocate
  *
  *	Allocates a struct net_device with private data area for driver use
  *	and performs basic initialization.  Also allocates subquue structs
- *	for each queue on the device at the end of the netdevice.
+ *	for each queue on the device.
  */
-struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
-		void (*setup)(struct net_device *), unsigned int queue_count)
+struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
+		void (*setup)(struct net_device *),
+		unsigned int txqs, unsigned int rxqs)
 {
 	struct net_device *dev;
 	size_t alloc_size;
@@ -5636,12 +5638,20 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
 
 	BUG_ON(strlen(name) >= sizeof(dev->name));
 
-	if (queue_count < 1) {
+	if (txqs < 1) {
 		pr_err("alloc_netdev: Unable to allocate device "
 		       "with zero queues.\n");
 		return NULL;
 	}
 
+#ifdef CONFIG_RPS
+	if (rxqs < 1) {
+		pr_err("alloc_netdev: Unable to allocate device "
+		       "with zero RX queues.\n");
+		return NULL;
+	}
+#endif
+
 	alloc_size = sizeof(struct net_device);
 	if (sizeof_priv) {
 		/* ensure 32-byte alignment of private area */
@@ -5672,14 +5682,14 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
 
 	dev_net_set(dev, &init_net);
 
-	dev->num_tx_queues = queue_count;
-	dev->real_num_tx_queues = queue_count;
+	dev->num_tx_queues = txqs;
+	dev->real_num_tx_queues = txqs;
 	if (netif_alloc_netdev_queues(dev))
 		goto free_pcpu;
 
 #ifdef CONFIG_RPS
-	dev->num_rx_queues = queue_count;
-	dev->real_num_rx_queues = queue_count;
+	dev->num_rx_queues = rxqs;
+	dev->real_num_rx_queues = rxqs;
 	if (netif_alloc_rx_queues(dev))
 		goto free_pcpu;
 #endif
@@ -5707,7 +5717,7 @@ free_p:
 	kfree(p);
 	return NULL;
 }
-EXPORT_SYMBOL(alloc_netdev_mq);
+EXPORT_SYMBOL(alloc_netdev_mqs);
 
 /**
  *	free_netdev - free network device
diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c
index f00ef2f1d814..f9d7ac924f15 100644
--- a/net/ethernet/eth.c
+++ b/net/ethernet/eth.c
@@ -347,10 +347,11 @@ void ether_setup(struct net_device *dev)
 EXPORT_SYMBOL(ether_setup);
 
 /**
- * alloc_etherdev_mq - Allocates and sets up an Ethernet device
+ * alloc_etherdev_mqs - Allocates and sets up an Ethernet device
  * @sizeof_priv: Size of additional driver-private structure to be allocated
  *	for this Ethernet device
- * @queue_count: The number of queues this device has.
+ * @txqs: The number of TX queues this device has.
+ * @txqs: The number of RX queues this device has.
  *
  * Fill in the fields of the device structure with Ethernet-generic
  * values. Basically does everything except registering the device.
@@ -360,11 +361,12 @@ EXPORT_SYMBOL(ether_setup);
  * this private data area.
  */
 
-struct net_device *alloc_etherdev_mq(int sizeof_priv, unsigned int queue_count)
+struct net_device *alloc_etherdev_mqs(int sizeof_priv, unsigned int txqs,
+				      unsigned int rxqs)
 {
-	return alloc_netdev_mq(sizeof_priv, "eth%d", ether_setup, queue_count);
+	return alloc_netdev_mqs(sizeof_priv, "eth%d", ether_setup, txqs, rxqs);
 }
-EXPORT_SYMBOL(alloc_etherdev_mq);
+EXPORT_SYMBOL(alloc_etherdev_mqs);
 
 static size_t _format_mac_addr(char *buf, int buflen,
 			       const unsigned char *addr, int len)
-- 
cgit v1.2.3-71-gd317


From 357f54d6b38252737116a6d631f6ac28ded018ed Mon Sep 17 00:00:00 2001
From: Andy Adamson <andros@netapp.com>
Date: Tue, 14 Dec 2010 10:11:57 -0500
Subject: NFS fix the setting of exchange id flag

Indicate support for referrals. Do not set any PNFS roles. Check the flags
returned by the server for validity. Do not use exchange flags from an old
client ID instance when recovering a client ID.

Update the EXCHID4_FLAG_XXX set to RFC 5661.

Signed-off-by: Andy Adamson <andros@netapp.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/nfs4proc.c    | 25 +++++++++++++++++++++----
 include/linux/nfs4.h |  8 ++++++--
 2 files changed, 27 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index f2b92f6a7efb..9d992b0346e3 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -4518,6 +4518,25 @@ int nfs4_proc_fs_locations(struct inode *dir, const struct qstr *name,
 }
 
 #ifdef CONFIG_NFS_V4_1
+/*
+ * Check the exchange flags returned by the server for invalid flags, having
+ * both PNFS and NON_PNFS flags set, and not having one of NON_PNFS, PNFS, or
+ * DS flags set.
+ */
+static int nfs4_check_cl_exchange_flags(u32 flags)
+{
+	if (flags & ~EXCHGID4_FLAG_MASK_R)
+		goto out_inval;
+	if ((flags & EXCHGID4_FLAG_USE_PNFS_MDS) &&
+	    (flags & EXCHGID4_FLAG_USE_NON_PNFS))
+		goto out_inval;
+	if (!(flags & (EXCHGID4_FLAG_MASK_PNFS)))
+		goto out_inval;
+	return NFS_OK;
+out_inval:
+	return -NFS4ERR_INVAL;
+}
+
 /*
  * nfs4_proc_exchange_id()
  *
@@ -4531,7 +4550,7 @@ int nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred)
 	nfs4_verifier verifier;
 	struct nfs41_exchange_id_args args = {
 		.client = clp,
-		.flags = clp->cl_exchange_flags,
+		.flags = EXCHGID4_FLAG_SUPP_MOVED_REFER,
 	};
 	struct nfs41_exchange_id_res res = {
 		.client = clp,
@@ -4548,9 +4567,6 @@ int nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred)
 	dprintk("--> %s\n", __func__);
 	BUG_ON(clp == NULL);
 
-	/* Remove server-only flags */
-	args.flags &= ~EXCHGID4_FLAG_CONFIRMED_R;
-
 	p = (u32 *)verifier.data;
 	*p++ = htonl((u32)clp->cl_boot_time.tv_sec);
 	*p = htonl((u32)clp->cl_boot_time.tv_nsec);
@@ -4576,6 +4592,7 @@ int nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred)
 			break;
 	}
 
+	status = nfs4_check_cl_exchange_flags(clp->cl_exchange_flags);
 	dprintk("<-- %s status= %d\n", __func__, status);
 	return status;
 }
diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h
index 4925b22219d2..9b46300b4305 100644
--- a/include/linux/nfs4.h
+++ b/include/linux/nfs4.h
@@ -111,9 +111,13 @@
 
 #define EXCHGID4_FLAG_SUPP_MOVED_REFER		0x00000001
 #define EXCHGID4_FLAG_SUPP_MOVED_MIGR		0x00000002
+#define EXCHGID4_FLAG_BIND_PRINC_STATEID	0x00000100
+
 #define EXCHGID4_FLAG_USE_NON_PNFS		0x00010000
 #define EXCHGID4_FLAG_USE_PNFS_MDS		0x00020000
 #define EXCHGID4_FLAG_USE_PNFS_DS		0x00040000
+#define EXCHGID4_FLAG_MASK_PNFS			0x00070000
+
 #define EXCHGID4_FLAG_UPD_CONFIRMED_REC_A	0x40000000
 #define EXCHGID4_FLAG_CONFIRMED_R		0x80000000
 /*
@@ -121,8 +125,8 @@
  * they're set in the argument or response, have separate
  * invalid flag masks for arg (_A) and resp (_R).
  */
-#define EXCHGID4_FLAG_MASK_A			0x40070003
-#define EXCHGID4_FLAG_MASK_R			0x80070003
+#define EXCHGID4_FLAG_MASK_A			0x40070103
+#define EXCHGID4_FLAG_MASK_R			0x80070103
 
 #define SEQ4_STATUS_CB_PATH_DOWN		0x00000001
 #define SEQ4_STATUS_CB_GSS_CONTEXTS_EXPIRING	0x00000002
-- 
cgit v1.2.3-71-gd317


From 2f46e07995734a363608e974a82fd05d5b610750 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Tue, 11 Jan 2011 23:55:51 +0100
Subject: netfilter: ebtables: make broute table work again

broute table init hook sets up the "br_should_route_hook" pointer,
which then gets called from br_input.

commit a386f99025f13b32502fe5dedf223c20d7283826
(bridge: add proper RCU annotation to should_route_hook)
introduced a typedef, and then changed this to:

br_should_route_hook_t *rhook;
[..]
rhook = rcu_dereference(br_should_route_hook);
if (*rhook(skb))

problem is that "br_should_route_hook" contains the address of the function,
so calling *rhook() results in kernel panic.

Signed-off-by: Florian Westphal <fw@strlen.de>
Acked-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/linux/if_bridge.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/if_bridge.h b/include/linux/if_bridge.h
index f7e73c338c40..dd3f20139640 100644
--- a/include/linux/if_bridge.h
+++ b/include/linux/if_bridge.h
@@ -103,7 +103,7 @@ struct __fdb_entry {
 
 extern void brioctl_set(int (*ioctl_hook)(struct net *, unsigned int, void __user *));
 
-typedef int (*br_should_route_hook_t)(struct sk_buff *skb);
+typedef int br_should_route_hook_t(struct sk_buff *skb);
 extern br_should_route_hook_t __rcu *br_should_route_hook;
 
 #endif
-- 
cgit v1.2.3-71-gd317