From 78541c1dc60b65ecfce5a6a096fc260219d6784e Mon Sep 17 00:00:00 2001
From: Andrew Lutomirski <luto@amacapital.net>
Date: Wed, 16 Apr 2014 21:41:34 -0700
Subject: net: Fix ns_capable check in sock_diag_put_filterinfo

The caller needs capabilities on the namespace being queried, not on
their own namespace.  This is a security bug, although it likely has
only a minor impact.

Cc: stable@vger.kernel.org
Signed-off-by: Andy Lutomirski <luto@amacapital.net>
Acked-by: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/sock_diag.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/sock_diag.h b/include/linux/sock_diag.h
index 54f91d35e5fd..302ab805b0bb 100644
--- a/include/linux/sock_diag.h
+++ b/include/linux/sock_diag.h
@@ -23,7 +23,7 @@ int sock_diag_check_cookie(void *sk, __u32 *cookie);
 void sock_diag_save_cookie(void *sk, __u32 *cookie);
 
 int sock_diag_put_meminfo(struct sock *sk, struct sk_buff *skb, int attr);
-int sock_diag_put_filterinfo(struct user_namespace *user_ns, struct sock *sk,
+int sock_diag_put_filterinfo(struct sock *sk,
 			     struct sk_buff *skb, int attrtype);
 
 #endif
-- 
cgit v1.2.3-71-gd317


From a53b72c83a4216f2eb883ed45a0cbce014b8e62d Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Wed, 23 Apr 2014 14:26:25 -0700
Subject: net: Move the permission check in sock_diag_put_filterinfo to
 packet_diag_dump

The permission check in sock_diag_put_filterinfo is wrong, and it is so removed
from it's sources it is not clear why it is wrong.  Move the computation
into packet_diag_dump and pass a bool of the result into sock_diag_filterinfo.

This does not yet correct the capability check but instead simply moves it to make
it clear what is going on.

Reported-by: Andy Lutomirski <luto@amacapital.net>
Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/sock_diag.h | 2 +-
 net/core/sock_diag.c      | 4 ++--
 net/packet/diag.c         | 7 ++++++-
 3 files changed, 9 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sock_diag.h b/include/linux/sock_diag.h
index 302ab805b0bb..46cca4c06848 100644
--- a/include/linux/sock_diag.h
+++ b/include/linux/sock_diag.h
@@ -23,7 +23,7 @@ int sock_diag_check_cookie(void *sk, __u32 *cookie);
 void sock_diag_save_cookie(void *sk, __u32 *cookie);
 
 int sock_diag_put_meminfo(struct sock *sk, struct sk_buff *skb, int attr);
-int sock_diag_put_filterinfo(struct sock *sk,
+int sock_diag_put_filterinfo(bool may_report_filterinfo, struct sock *sk,
 			     struct sk_buff *skb, int attrtype);
 
 #endif
diff --git a/net/core/sock_diag.c b/net/core/sock_diag.c
index 9deb6abd6cf6..a4216a4c9572 100644
--- a/net/core/sock_diag.c
+++ b/net/core/sock_diag.c
@@ -49,7 +49,7 @@ int sock_diag_put_meminfo(struct sock *sk, struct sk_buff *skb, int attrtype)
 }
 EXPORT_SYMBOL_GPL(sock_diag_put_meminfo);
 
-int sock_diag_put_filterinfo(struct sock *sk,
+int sock_diag_put_filterinfo(bool may_report_filterinfo, struct sock *sk,
 			     struct sk_buff *skb, int attrtype)
 {
 	struct sock_fprog_kern *fprog;
@@ -58,7 +58,7 @@ int sock_diag_put_filterinfo(struct sock *sk,
 	unsigned int flen;
 	int err = 0;
 
-	if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) {
+	if (!may_report_filterinfo) {
 		nla_reserve(skb, attrtype, 0);
 		return 0;
 	}
diff --git a/net/packet/diag.c b/net/packet/diag.c
index 435ff99ba8c7..b34d0de24091 100644
--- a/net/packet/diag.c
+++ b/net/packet/diag.c
@@ -128,6 +128,7 @@ static int pdiag_put_fanout(struct packet_sock *po, struct sk_buff *nlskb)
 
 static int sk_diag_fill(struct sock *sk, struct sk_buff *skb,
 			struct packet_diag_req *req,
+			bool may_report_filterinfo,
 			struct user_namespace *user_ns,
 			u32 portid, u32 seq, u32 flags, int sk_ino)
 {
@@ -172,7 +173,8 @@ static int sk_diag_fill(struct sock *sk, struct sk_buff *skb,
 		goto out_nlmsg_trim;
 
 	if ((req->pdiag_show & PACKET_SHOW_FILTER) &&
-	    sock_diag_put_filterinfo(sk, skb, PACKET_DIAG_FILTER))
+	    sock_diag_put_filterinfo(may_report_filterinfo, sk, skb,
+				     PACKET_DIAG_FILTER))
 		goto out_nlmsg_trim;
 
 	return nlmsg_end(skb, nlh);
@@ -188,9 +190,11 @@ static int packet_diag_dump(struct sk_buff *skb, struct netlink_callback *cb)
 	struct packet_diag_req *req;
 	struct net *net;
 	struct sock *sk;
+	bool may_report_filterinfo;
 
 	net = sock_net(skb->sk);
 	req = nlmsg_data(cb->nlh);
+	may_report_filterinfo = ns_capable(net->user_ns, CAP_NET_ADMIN);
 
 	mutex_lock(&net->packet.sklist_lock);
 	sk_for_each(sk, &net->packet.sklist) {
@@ -200,6 +204,7 @@ static int packet_diag_dump(struct sk_buff *skb, struct netlink_callback *cb)
 			goto next;
 
 		if (sk_diag_fill(sk, skb, req,
+				 may_report_filterinfo,
 				 sk_user_ns(NETLINK_CB(cb->skb).sk),
 				 NETLINK_CB(cb->skb).portid,
 				 cb->nlh->nlmsg_seq, NLM_F_MULTI,
-- 
cgit v1.2.3-71-gd317


From aa4cf9452f469f16cea8c96283b641b4576d4a7b Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Wed, 23 Apr 2014 14:28:03 -0700
Subject: net: Add variants of capable for use on netlink messages

netlink_net_capable - The common case use, for operations that are safe on a network namespace
netlink_capable - For operations that are only known to be safe for the global root
netlink_ns_capable - The general case of capable used to handle special cases

__netlink_ns_capable - Same as netlink_ns_capable except taking a netlink_skb_parms instead of
		       the skbuff of a netlink message.

Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netlink.h  |  7 ++++++
 net/netlink/af_netlink.c | 65 ++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 72 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/netlink.h b/include/linux/netlink.h
index aad8eeaf416d..f64b01787ddc 100644
--- a/include/linux/netlink.h
+++ b/include/linux/netlink.h
@@ -169,4 +169,11 @@ struct netlink_tap {
 extern int netlink_add_tap(struct netlink_tap *nt);
 extern int netlink_remove_tap(struct netlink_tap *nt);
 
+bool __netlink_ns_capable(const struct netlink_skb_parms *nsp,
+			  struct user_namespace *ns, int cap);
+bool netlink_ns_capable(const struct sk_buff *skb,
+			struct user_namespace *ns, int cap);
+bool netlink_capable(const struct sk_buff *skb, int cap);
+bool netlink_net_capable(const struct sk_buff *skb, int cap);
+
 #endif	/* __LINUX_NETLINK_H */
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 7f931fe4d187..81dca96d2be6 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -1360,6 +1360,71 @@ retry:
 	return err;
 }
 
+/**
+ * __netlink_ns_capable - General netlink message capability test
+ * @nsp: NETLINK_CB of the socket buffer holding a netlink command from userspace.
+ * @user_ns: The user namespace of the capability to use
+ * @cap: The capability to use
+ *
+ * Test to see if the opener of the socket we received the message
+ * from had when the netlink socket was created and the sender of the
+ * message has has the capability @cap in the user namespace @user_ns.
+ */
+bool __netlink_ns_capable(const struct netlink_skb_parms *nsp,
+			struct user_namespace *user_ns, int cap)
+{
+	return sk_ns_capable(nsp->sk, user_ns, cap);
+}
+EXPORT_SYMBOL(__netlink_ns_capable);
+
+/**
+ * netlink_ns_capable - General netlink message capability test
+ * @skb: socket buffer holding a netlink command from userspace
+ * @user_ns: The user namespace of the capability to use
+ * @cap: The capability to use
+ *
+ * Test to see if the opener of the socket we received the message
+ * from had when the netlink socket was created and the sender of the
+ * message has has the capability @cap in the user namespace @user_ns.
+ */
+bool netlink_ns_capable(const struct sk_buff *skb,
+			struct user_namespace *user_ns, int cap)
+{
+	return __netlink_ns_capable(&NETLINK_CB(skb), user_ns, cap);
+}
+EXPORT_SYMBOL(netlink_ns_capable);
+
+/**
+ * netlink_capable - Netlink global message capability test
+ * @skb: socket buffer holding a netlink command from userspace
+ * @cap: The capability to use
+ *
+ * Test to see if the opener of the socket we received the message
+ * from had when the netlink socket was created and the sender of the
+ * message has has the capability @cap in all user namespaces.
+ */
+bool netlink_capable(const struct sk_buff *skb, int cap)
+{
+	return netlink_ns_capable(skb, &init_user_ns, cap);
+}
+EXPORT_SYMBOL(netlink_capable);
+
+/**
+ * netlink_net_capable - Netlink network namespace message capability test
+ * @skb: socket buffer holding a netlink command from userspace
+ * @cap: The capability to use
+ *
+ * Test to see if the opener of the socket we received the message
+ * from had when the netlink socket was created and the sender of the
+ * message has has the capability @cap over the network namespace of
+ * the socket we received the message from.
+ */
+bool netlink_net_capable(const struct sk_buff *skb, int cap)
+{
+	return netlink_ns_capable(skb, sock_net(skb->sk)->user_ns, cap);
+}
+EXPORT_SYMBOL(netlink_net_capable);
+
 static inline int netlink_allowed(const struct socket *sock, unsigned int flag)
 {
 	return (nl_table[sock->sk->sk_protocol].flags & flag) ||
-- 
cgit v1.2.3-71-gd317


From 9ec36cafe43bf835f8f29273597a5b0cbc8267ef Mon Sep 17 00:00:00 2001
From: Rob Herring <robh@kernel.org>
Date: Wed, 23 Apr 2014 17:57:41 -0500
Subject: of/irq: do irq resolution in platform_get_irq

Currently we get the following kind of errors if we try to use interrupt
phandles to irqchips that have not yet initialized:

irq: no irq domain found for /ocp/pinmux@48002030 !
------------[ cut here ]------------
WARNING: CPU: 0 PID: 1 at drivers/of/platform.c:171 of_device_alloc+0x144/0x184()
Modules linked in:
CPU: 0 PID: 1 Comm: swapper/0 Not tainted 3.12.0-00038-g42a9708 #1012
(show_stack+0x14/0x1c)
(dump_stack+0x6c/0xa0)
(warn_slowpath_common+0x64/0x84)
(warn_slowpath_null+0x1c/0x24)
(of_device_alloc+0x144/0x184)
(of_platform_device_create_pdata+0x44/0x9c)
(of_platform_bus_create+0xd0/0x170)
(of_platform_bus_create+0x12c/0x170)
(of_platform_populate+0x60/0x98)

This is because we're wrongly trying to populate resources that are not
yet available. It's perfectly valid to create irqchips dynamically, so
let's fix up the issue by resolving the interrupt resources when
platform_get_irq is called.

And then we also need to accept the fact that some irqdomains do not
exist that early on, and only get initialized later on. So we can
make the current WARN_ON into just into a pr_debug().

We still attempt to populate irq resources when we create the devices.
This allows current drivers which don't use platform_get_irq to continue
to function. Once all drivers are fixed, this code can be removed.

Suggested-by: Russell King <linux@arm.linux.org.uk>
Signed-off-by: Rob Herring <robh@kernel.org>
Signed-off-by: Tony Lindgren <tony@atomide.com>
Tested-by: Tony Lindgren <tony@atomide.com>
Cc: stable@vger.kernel.org # v3.10+
Signed-off-by: Grant Likely <grant.likely@linaro.org>
---
 drivers/base/platform.c |  7 ++++++-
 drivers/of/irq.c        | 26 ++++++++++++++++++++++++++
 drivers/of/platform.c   |  4 +++-
 include/linux/of_irq.h  |  5 +++++
 4 files changed, 40 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/base/platform.c b/drivers/base/platform.c
index e714709704e4..5b47210889e0 100644
--- a/drivers/base/platform.c
+++ b/drivers/base/platform.c
@@ -13,6 +13,7 @@
 #include <linux/string.h>
 #include <linux/platform_device.h>
 #include <linux/of_device.h>
+#include <linux/of_irq.h>
 #include <linux/module.h>
 #include <linux/init.h>
 #include <linux/dma-mapping.h>
@@ -87,7 +88,11 @@ int platform_get_irq(struct platform_device *dev, unsigned int num)
 		return -ENXIO;
 	return dev->archdata.irqs[num];
 #else
-	struct resource *r = platform_get_resource(dev, IORESOURCE_IRQ, num);
+	struct resource *r;
+	if (IS_ENABLED(CONFIG_OF_IRQ) && dev->dev.of_node)
+		return of_irq_get(dev->dev.of_node, num);
+
+	r = platform_get_resource(dev, IORESOURCE_IRQ, num);
 
 	return r ? r->start : -ENXIO;
 #endif
diff --git a/drivers/of/irq.c b/drivers/of/irq.c
index e2e4c548a42f..5aeb89411350 100644
--- a/drivers/of/irq.c
+++ b/drivers/of/irq.c
@@ -379,6 +379,32 @@ int of_irq_to_resource(struct device_node *dev, int index, struct resource *r)
 }
 EXPORT_SYMBOL_GPL(of_irq_to_resource);
 
+/**
+ * of_irq_get - Decode a node's IRQ and return it as a Linux irq number
+ * @dev: pointer to device tree node
+ * @index: zero-based index of the irq
+ *
+ * Returns Linux irq number on success, or -EPROBE_DEFER if the irq domain
+ * is not yet created.
+ *
+ */
+int of_irq_get(struct device_node *dev, int index)
+{
+	int rc;
+	struct of_phandle_args oirq;
+	struct irq_domain *domain;
+
+	rc = of_irq_parse_one(dev, index, &oirq);
+	if (rc)
+		return rc;
+
+	domain = irq_find_host(oirq.np);
+	if (!domain)
+		return -EPROBE_DEFER;
+
+	return irq_create_of_mapping(&oirq);
+}
+
 /**
  * of_irq_count - Count the number of IRQs a node uses
  * @dev: pointer to device tree node
diff --git a/drivers/of/platform.c b/drivers/of/platform.c
index 404d1daebefa..bd47fbc53dc9 100644
--- a/drivers/of/platform.c
+++ b/drivers/of/platform.c
@@ -168,7 +168,9 @@ struct platform_device *of_device_alloc(struct device_node *np,
 			rc = of_address_to_resource(np, i, res);
 			WARN_ON(rc);
 		}
-		WARN_ON(of_irq_to_resource_table(np, res, num_irq) != num_irq);
+		if (of_irq_to_resource_table(np, res, num_irq) != num_irq)
+			pr_debug("not all legacy IRQ resources mapped for %s\n",
+				 np->name);
 	}
 
 	dev->dev.of_node = of_node_get(np);
diff --git a/include/linux/of_irq.h b/include/linux/of_irq.h
index 3f23b4472c31..6404253d810d 100644
--- a/include/linux/of_irq.h
+++ b/include/linux/of_irq.h
@@ -44,11 +44,16 @@ extern void of_irq_init(const struct of_device_id *matches);
 
 #ifdef CONFIG_OF_IRQ
 extern int of_irq_count(struct device_node *dev);
+extern int of_irq_get(struct device_node *dev, int index);
 #else
 static inline int of_irq_count(struct device_node *dev)
 {
 	return 0;
 }
+static inline int of_irq_get(struct device_node *dev, int index)
+{
+	return 0;
+}
 #endif
 
 #if defined(CONFIG_OF)
-- 
cgit v1.2.3-71-gd317


From def5f1273c5f18abf8fcaee03a115d3e907ad407 Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@infradead.org>
Date: Sun, 27 Apr 2014 21:03:09 -0700
Subject: linux/interrupt.h: fix new kernel-doc warnings

Fix new kernel-doc warnings in <linux/interrupt.h>:

Warning(include/linux/interrupt.h:219): No description found for parameter 'cpumask'
Warning(include/linux/interrupt.h:219): Excess function parameter 'mask' description in 'irq_set_affinity'
Warning(include/linux/interrupt.h:236): No description found for parameter 'cpumask'
Warning(include/linux/interrupt.h:236): Excess function parameter 'mask' description in 'irq_force_affinity'

Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
Link: http://lkml.kernel.org/r/535DD2FD.7030804@infradead.org
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/interrupt.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h
index 8834a7e5b944..97ac926c78a7 100644
--- a/include/linux/interrupt.h
+++ b/include/linux/interrupt.h
@@ -210,7 +210,7 @@ extern int __irq_set_affinity(unsigned int irq, const struct cpumask *cpumask,
 /**
  * irq_set_affinity - Set the irq affinity of a given irq
  * @irq:	Interrupt to set affinity
- * @mask:	cpumask
+ * @cpumask:	cpumask
  *
  * Fails if cpumask does not contain an online CPU
  */
@@ -223,7 +223,7 @@ irq_set_affinity(unsigned int irq, const struct cpumask *cpumask)
 /**
  * irq_force_affinity - Force the irq affinity of a given irq
  * @irq:	Interrupt to set affinity
- * @mask:	cpumask
+ * @cpumask:	cpumask
  *
  * Same as irq_set_affinity, but without checking the mask against
  * online cpus.
-- 
cgit v1.2.3-71-gd317


From 62a08ae2a5763aabeee98264605236b001503e0c Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Thu, 24 Apr 2014 09:50:53 +0200
Subject: genirq: x86: Ensure that dynamic irq allocation does not conflict

On x86 the allocation of irq descriptors may allocate interrupts which
are in the range of the GSI interrupts. That's wrong as those
interrupts are hardwired and we don't have the irq domain translation
like PPC. So one of these interrupts can be hooked up later to one of
the devices which are hard wired to it and the io_apic init code for
that particular interrupt line happily reuses that descriptor with a
completely different configuration so hell breaks lose.

Inside x86 we allocate dynamic interrupts from above nr_gsi_irqs,
except for a few usage sites which have not yet blown up in our face
for whatever reason. But for drivers which need an irq range, like the
GPIO drivers, we have no limit in place and we don't want to expose
such a detail to a driver.

To cure this introduce a function which an architecture can implement
to impose a lower bound on the dynamic interrupt allocations.

Implement it for x86 and set the lower bound to nr_gsi_irqs, which is
the end of the hardwired interrupt space, so all dynamic allocations
happen above.

That not only allows the GPIO driver to work sanely, it also protects
the bogus callsites of create_irq_nr() in hpet, uv, irq_remapping and
htirq code. They need to be cleaned up as well, but that's a separate
issue.

Reported-by: Jin Yao <yao.jin@linux.intel.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Mika Westerberg <mika.westerberg@linux.intel.com>
Cc: Mathias Nyman <mathias.nyman@linux.intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Grant Likely <grant.likely@linaro.org>
Cc: H. Peter Anvin <hpa@linux.intel.com>
Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Cc: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Cc: Krogerus Heikki <heikki.krogerus@intel.com>
Cc: Linus Walleij <linus.walleij@linaro.org>
Link: http://lkml.kernel.org/r/alpine.DEB.2.02.1404241617360.28206@ionos.tec.linutronix.de
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/apic/io_apic.c | 5 +++++
 include/linux/irq.h            | 2 ++
 kernel/irq/irqdesc.c           | 7 +++++++
 kernel/softirq.c               | 5 +++++
 4 files changed, 19 insertions(+)

(limited to 'include/linux')

diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index 6ad4658de705..d23aa82e7a7b 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -3425,6 +3425,11 @@ int get_nr_irqs_gsi(void)
 	return nr_irqs_gsi;
 }
 
+unsigned int arch_dynirq_lower_bound(unsigned int from)
+{
+	return from < nr_irqs_gsi ? nr_irqs_gsi : from;
+}
+
 int __init arch_probe_nr_irqs(void)
 {
 	int nr;
diff --git a/include/linux/irq.h b/include/linux/irq.h
index 10a0b1ac4ea0..5c57efb863d0 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -603,6 +603,8 @@ static inline u32 irq_get_trigger_type(unsigned int irq)
 	return d ? irqd_get_trigger_type(d) : 0;
 }
 
+unsigned int arch_dynirq_lower_bound(unsigned int from);
+
 int __irq_alloc_descs(int irq, unsigned int from, unsigned int cnt, int node,
 		struct module *owner);
 
diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c
index a7174617616b..bb07f2928f4b 100644
--- a/kernel/irq/irqdesc.c
+++ b/kernel/irq/irqdesc.c
@@ -363,6 +363,13 @@ __irq_alloc_descs(int irq, unsigned int from, unsigned int cnt, int node,
 		if (from > irq)
 			return -EINVAL;
 		from = irq;
+	} else {
+		/*
+		 * For interrupts which are freely allocated the
+		 * architecture can force a lower bound to the @from
+		 * argument. x86 uses this to exclude the GSI space.
+		 */
+		from = arch_dynirq_lower_bound(from);
 	}
 
 	mutex_lock(&sparse_irq_lock);
diff --git a/kernel/softirq.c b/kernel/softirq.c
index b50990a5bea0..33e4648ae0e7 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -779,3 +779,8 @@ int __init __weak arch_early_irq_init(void)
 {
 	return 0;
 }
+
+unsigned int __weak arch_dynirq_lower_bound(unsigned int from)
+{
+	return from;
+}
-- 
cgit v1.2.3-71-gd317


From a949ae560a511fe4e3adf48fa44fefded93e5c2b Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (Red Hat)" <rostedt@goodmis.org>
Date: Thu, 24 Apr 2014 10:40:12 -0400
Subject: ftrace/module: Hardcode ftrace_module_init() call into load_module()

A race exists between module loading and enabling of function tracer.

	CPU 1				CPU 2
	-----				-----
  load_module()
   module->state = MODULE_STATE_COMING

				register_ftrace_function()
				 mutex_lock(&ftrace_lock);
				 ftrace_startup()
				  update_ftrace_function();
				   ftrace_arch_code_modify_prepare()
				    set_all_module_text_rw();
				   <enables-ftrace>
				    ftrace_arch_code_modify_post_process()
				     set_all_module_text_ro();

				[ here all module text is set to RO,
				  including the module that is
				  loading!! ]

   blocking_notifier_call_chain(MODULE_STATE_COMING);
    ftrace_init_module()

     [ tries to modify code, but it's RO, and fails!
       ftrace_bug() is called]

When this race happens, ftrace_bug() will produces a nasty warning and
all of the function tracing features will be disabled until reboot.

The simple solution is to treate module load the same way the core
kernel is treated at boot. To hardcode the ftrace function modification
of converting calls to mcount into nops. This is done in init/main.c
there's no reason it could not be done in load_module(). This gives
a better control of the changes and doesn't tie the state of the
module to its notifiers as much. Ftrace is special, it needs to be
treated as such.

The reason this would work, is that the ftrace_module_init() would be
called while the module is in MODULE_STATE_UNFORMED, which is ignored
by the set_all_module_text_ro() call.

Link: http://lkml.kernel.org/r/1395637826-3312-1-git-send-email-indou.takao@jp.fujitsu.com

Reported-by: Takao Indoh <indou.takao@jp.fujitsu.com>
Acked-by: Rusty Russell <rusty@rustcorp.com.au>
Cc: stable@vger.kernel.org # 2.6.38+
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/linux/ftrace.h |  2 ++
 kernel/module.c        |  3 +++
 kernel/trace/ftrace.c  | 27 ++++-----------------------
 3 files changed, 9 insertions(+), 23 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index 9212b017bc72..ae9504b4b67d 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -535,6 +535,7 @@ static inline int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_a
 extern int ftrace_arch_read_dyn_info(char *buf, int size);
 
 extern int skip_trace(unsigned long ip);
+extern void ftrace_module_init(struct module *mod);
 
 extern void ftrace_disable_daemon(void);
 extern void ftrace_enable_daemon(void);
@@ -544,6 +545,7 @@ static inline int ftrace_force_update(void) { return 0; }
 static inline void ftrace_disable_daemon(void) { }
 static inline void ftrace_enable_daemon(void) { }
 static inline void ftrace_release_mod(struct module *mod) {}
+static inline void ftrace_module_init(struct module *mod) {}
 static inline __init int register_ftrace_command(struct ftrace_func_command *cmd)
 {
 	return -EINVAL;
diff --git a/kernel/module.c b/kernel/module.c
index 11869408f79b..5f14fec9f825 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -3271,6 +3271,9 @@ static int load_module(struct load_info *info, const char __user *uargs,
 
 	dynamic_debug_setup(info->debug, info->num_debug);
 
+	/* Ftrace init must be called in the MODULE_STATE_UNFORMED state */
+	ftrace_module_init(mod);
+
 	/* Finally it's fully formed, ready to start executing. */
 	err = complete_formation(mod, info);
 	if (err)
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 1fd4b9479210..4a54a25afa2f 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -4330,16 +4330,11 @@ static void ftrace_init_module(struct module *mod,
 	ftrace_process_locs(mod, start, end);
 }
 
-static int ftrace_module_notify_enter(struct notifier_block *self,
-				      unsigned long val, void *data)
+void ftrace_module_init(struct module *mod)
 {
-	struct module *mod = data;
-
-	if (val == MODULE_STATE_COMING)
-		ftrace_init_module(mod, mod->ftrace_callsites,
-				   mod->ftrace_callsites +
-				   mod->num_ftrace_callsites);
-	return 0;
+	ftrace_init_module(mod, mod->ftrace_callsites,
+			   mod->ftrace_callsites +
+			   mod->num_ftrace_callsites);
 }
 
 static int ftrace_module_notify_exit(struct notifier_block *self,
@@ -4353,11 +4348,6 @@ static int ftrace_module_notify_exit(struct notifier_block *self,
 	return 0;
 }
 #else
-static int ftrace_module_notify_enter(struct notifier_block *self,
-				      unsigned long val, void *data)
-{
-	return 0;
-}
 static int ftrace_module_notify_exit(struct notifier_block *self,
 				     unsigned long val, void *data)
 {
@@ -4365,11 +4355,6 @@ static int ftrace_module_notify_exit(struct notifier_block *self,
 }
 #endif /* CONFIG_MODULES */
 
-struct notifier_block ftrace_module_enter_nb = {
-	.notifier_call = ftrace_module_notify_enter,
-	.priority = INT_MAX,	/* Run before anything that can use kprobes */
-};
-
 struct notifier_block ftrace_module_exit_nb = {
 	.notifier_call = ftrace_module_notify_exit,
 	.priority = INT_MIN,	/* Run after anything that can remove kprobes */
@@ -4403,10 +4388,6 @@ void __init ftrace_init(void)
 				  __start_mcount_loc,
 				  __stop_mcount_loc);
 
-	ret = register_module_notifier(&ftrace_module_enter_nb);
-	if (ret)
-		pr_warning("Failed to register trace ftrace module enter notifier\n");
-
 	ret = register_module_notifier(&ftrace_module_exit_nb);
 	if (ret)
 		pr_warning("Failed to register trace ftrace module exit notifier\n");
-- 
cgit v1.2.3-71-gd317


From 41edf278fc2f042f4e22a12ed87d19c5201210e1 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Thu, 1 May 2014 10:30:00 -0400
Subject: dentry_kill(): don't try to remove from shrink list

If the victim in on the shrink list, don't remove it from there.
If shrink_dentry_list() manages to remove it from the list before
we are done - fine, we'll just free it as usual.  If not - mark
it with new flag (DCACHE_MAY_FREE) and leave it there.

Eventually, shrink_dentry_list() will get to it, remove the sucker
from shrink list and call dentry_kill(dentry, 0).  Which is where
we'll deal with freeing.

Since now dentry_kill(dentry, 0) may happen after or during
dentry_kill(dentry, 1), we need to recognize that (by seeing
DCACHE_DENTRY_KILLED already set), unlock everything
and either free the sucker (in case DCACHE_MAY_FREE has been
set) or leave it for ongoing dentry_kill(dentry, 1) to deal with.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/dcache.c            | 27 +++++++++++++++++++--------
 include/linux/dcache.h |  2 ++
 2 files changed, 21 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/fs/dcache.c b/fs/dcache.c
index e482775343a0..58e26bee7ef4 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -468,7 +468,14 @@ dentry_kill(struct dentry *dentry, int unlock_on_failure)
 	__releases(dentry->d_lock)
 {
 	struct inode *inode;
-	struct dentry *parent;
+	struct dentry *parent = NULL;
+	bool can_free = true;
+
+	if (unlikely(dentry->d_flags & DCACHE_DENTRY_KILLED)) {
+		can_free = dentry->d_flags & DCACHE_MAY_FREE;
+		spin_unlock(&dentry->d_lock);
+		goto out;
+	}
 
 	inode = dentry->d_inode;
 	if (inode && !spin_trylock(&inode->i_lock)) {
@@ -479,9 +486,7 @@ relock:
 		}
 		return dentry; /* try again with same dentry */
 	}
-	if (IS_ROOT(dentry))
-		parent = NULL;
-	else
+	if (!IS_ROOT(dentry))
 		parent = dentry->d_parent;
 	if (parent && !spin_trylock(&parent->d_lock)) {
 		if (inode)
@@ -504,8 +509,6 @@ relock:
 	if (dentry->d_flags & DCACHE_LRU_LIST) {
 		if (!(dentry->d_flags & DCACHE_SHRINK_LIST))
 			d_lru_del(dentry);
-		else
-			d_shrink_del(dentry);
 	}
 	/* if it was on the hash then remove it */
 	__d_drop(dentry);
@@ -527,7 +530,15 @@ relock:
 	if (dentry->d_op && dentry->d_op->d_release)
 		dentry->d_op->d_release(dentry);
 
-	dentry_free(dentry);
+	spin_lock(&dentry->d_lock);
+	if (dentry->d_flags & DCACHE_SHRINK_LIST) {
+		dentry->d_flags |= DCACHE_MAY_FREE;
+		can_free = false;
+	}
+	spin_unlock(&dentry->d_lock);
+out:
+	if (likely(can_free))
+		dentry_free(dentry);
 	return parent;
 }
 
@@ -829,7 +840,7 @@ static void shrink_dentry_list(struct list_head *list)
 		 * We found an inuse dentry which was not removed from
 		 * the LRU because of laziness during lookup. Do not free it.
 		 */
-		if (dentry->d_lockref.count) {
+		if ((int)dentry->d_lockref.count > 0) {
 			spin_unlock(&dentry->d_lock);
 			continue;
 		}
diff --git a/include/linux/dcache.h b/include/linux/dcache.h
index 3b9bfdb83ba6..3c7ec327ebd2 100644
--- a/include/linux/dcache.h
+++ b/include/linux/dcache.h
@@ -221,6 +221,8 @@ struct dentry_operations {
 #define DCACHE_SYMLINK_TYPE		0x00300000 /* Symlink */
 #define DCACHE_FILE_TYPE		0x00400000 /* Other file type */
 
+#define DCACHE_MAY_FREE			0x00800000
+
 extern seqlock_t rename_lock;
 
 static inline int dname_external(const struct dentry *dentry)
-- 
cgit v1.2.3-71-gd317


From 5fbf1a65dd53ef313783c34a0e93a6e29def6136 Mon Sep 17 00:00:00 2001
From: Peter Hurley <peter@hurleysoftware.com>
Date: Fri, 2 May 2014 10:56:11 -0400
Subject: Revert "tty: Fix race condition between __tty_buffer_request_room and
 flush_to_ldisc"

This reverts commit 6a20dbd6caa2358716136144bf524331d70b1e03.

Although the commit correctly identifies an unsafe race condition
between __tty_buffer_request_room() and flush_to_ldisc(), the commit
fixes the race with an unnecessary spinlock in a lockless algorithm.

The follow-on commit, "tty: Fix lockless tty buffer race" fixes
the race locklessly.

Signed-off-by: Peter Hurley <peter@hurleysoftware.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/tty_buffer.c | 16 ++--------------
 include/linux/tty.h      |  1 -
 2 files changed, 2 insertions(+), 15 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/tty/tty_buffer.c b/drivers/tty/tty_buffer.c
index f1d30f6945af..8ebd9f88a6f6 100644
--- a/drivers/tty/tty_buffer.c
+++ b/drivers/tty/tty_buffer.c
@@ -255,16 +255,11 @@ static int __tty_buffer_request_room(struct tty_port *port, size_t size,
 	if (change || left < size) {
 		/* This is the slow path - looking for new buffers to use */
 		if ((n = tty_buffer_alloc(port, size)) != NULL) {
-			unsigned long iflags;
-
 			n->flags = flags;
 			buf->tail = n;
-
-			spin_lock_irqsave(&buf->flush_lock, iflags);
 			b->commit = b->used;
+			smp_mb();
 			b->next = n;
-			spin_unlock_irqrestore(&buf->flush_lock, iflags);
-
 		} else if (change)
 			size = 0;
 		else
@@ -448,7 +443,6 @@ static void flush_to_ldisc(struct work_struct *work)
 	mutex_lock(&buf->lock);
 
 	while (1) {
-		unsigned long flags;
 		struct tty_buffer *head = buf->head;
 		int count;
 
@@ -456,19 +450,14 @@ static void flush_to_ldisc(struct work_struct *work)
 		if (atomic_read(&buf->priority))
 			break;
 
-		spin_lock_irqsave(&buf->flush_lock, flags);
 		count = head->commit - head->read;
 		if (!count) {
-			if (head->next == NULL) {
-				spin_unlock_irqrestore(&buf->flush_lock, flags);
+			if (head->next == NULL)
 				break;
-			}
 			buf->head = head->next;
-			spin_unlock_irqrestore(&buf->flush_lock, flags);
 			tty_buffer_free(port, head);
 			continue;
 		}
-		spin_unlock_irqrestore(&buf->flush_lock, flags);
 
 		count = receive_buf(tty, head, count);
 		if (!count)
@@ -523,7 +512,6 @@ void tty_buffer_init(struct tty_port *port)
 	struct tty_bufhead *buf = &port->buf;
 
 	mutex_init(&buf->lock);
-	spin_lock_init(&buf->flush_lock);
 	tty_buffer_reset(&buf->sentinel, 0);
 	buf->head = &buf->sentinel;
 	buf->tail = &buf->sentinel;
diff --git a/include/linux/tty.h b/include/linux/tty.h
index 036cccd80d9f..1c3316a47d7e 100644
--- a/include/linux/tty.h
+++ b/include/linux/tty.h
@@ -61,7 +61,6 @@ struct tty_bufhead {
 	struct tty_buffer *head;	/* Queue head */
 	struct work_struct work;
 	struct mutex	   lock;
-	spinlock_t	   flush_lock;
 	atomic_t	   priority;
 	struct tty_buffer sentinel;
 	struct llist_head free;		/* Free queue head */
-- 
cgit v1.2.3-71-gd317


From 3adc1beacdbc1d6c4be99ecc2f87eac1c1e6f857 Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@linux.intel.com>
Date: Fri, 2 May 2014 00:44:36 +0200
Subject: asmlinkage: Revert "lto: Make asmlinkage __visible"

As requested by Linus, revert adding __visible to asmlinkage.
Instead we add __visible explicitely to all the symbols
that need it.

This reverts commit 128ea04a9885af9629059e631ddf0cab4815b589.

Link: http://lkml.kernel.org/r/1398984278-29319-2-git-send-email-andi@firstfloor.org
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
---
 include/linux/linkage.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/linkage.h b/include/linux/linkage.h
index 34a513a2727b..a6a42dd02466 100644
--- a/include/linux/linkage.h
+++ b/include/linux/linkage.h
@@ -12,9 +12,9 @@
 #endif
 
 #ifdef __cplusplus
-#define CPP_ASMLINKAGE extern "C" __visible
+#define CPP_ASMLINKAGE extern "C"
 #else
-#define CPP_ASMLINKAGE __visible
+#define CPP_ASMLINKAGE
 #endif
 
 #ifndef asmlinkage
-- 
cgit v1.2.3-71-gd317


From 39f1f78d53b9bcbca91967380c5f0f2305a5c55f Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Tue, 6 May 2014 14:02:53 -0400
Subject: nick kvfree() from apparmor

too many places open-code it

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/mm.h                   |  2 ++
 mm/util.c                            | 10 ++++++++++
 security/apparmor/include/apparmor.h |  1 -
 security/apparmor/lib.c              | 14 --------------
 4 files changed, 12 insertions(+), 15 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index bf9811e1321a..d6777060449f 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -370,6 +370,8 @@ static inline int is_vmalloc_or_module_addr(const void *x)
 }
 #endif
 
+extern void kvfree(const void *addr);
+
 static inline void compound_lock(struct page *page)
 {
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
diff --git a/mm/util.c b/mm/util.c
index f380af7ea779..d5ea733c5082 100644
--- a/mm/util.c
+++ b/mm/util.c
@@ -10,6 +10,7 @@
 #include <linux/swapops.h>
 #include <linux/mman.h>
 #include <linux/hugetlb.h>
+#include <linux/vmalloc.h>
 
 #include <asm/uaccess.h>
 
@@ -387,6 +388,15 @@ unsigned long vm_mmap(struct file *file, unsigned long addr,
 }
 EXPORT_SYMBOL(vm_mmap);
 
+void kvfree(const void *addr)
+{
+	if (is_vmalloc_addr(addr))
+		vfree(addr);
+	else
+		kfree(addr);
+}
+EXPORT_SYMBOL(kvfree);
+
 struct address_space *page_mapping(struct page *page)
 {
 	struct address_space *mapping = page->mapping;
diff --git a/security/apparmor/include/apparmor.h b/security/apparmor/include/apparmor.h
index 8fb1488a3cd4..97130f88838b 100644
--- a/security/apparmor/include/apparmor.h
+++ b/security/apparmor/include/apparmor.h
@@ -66,7 +66,6 @@ extern int apparmor_initialized __initdata;
 char *aa_split_fqname(char *args, char **ns_name);
 void aa_info_message(const char *str);
 void *__aa_kvmalloc(size_t size, gfp_t flags);
-void kvfree(void *buffer);
 
 static inline void *kvmalloc(size_t size)
 {
diff --git a/security/apparmor/lib.c b/security/apparmor/lib.c
index 69689922c491..c1827e068454 100644
--- a/security/apparmor/lib.c
+++ b/security/apparmor/lib.c
@@ -104,17 +104,3 @@ void *__aa_kvmalloc(size_t size, gfp_t flags)
 	}
 	return buffer;
 }
-
-/**
- * kvfree - free an allocation do by kvmalloc
- * @buffer: buffer to free (MAYBE_NULL)
- *
- * Free a buffer allocated by kvmalloc
- */
-void kvfree(void *buffer)
-{
-	if (is_vmalloc_addr(buffer))
-		vfree(buffer);
-	else
-		kfree(buffer);
-}
-- 
cgit v1.2.3-71-gd317


From 457c1b27ed56ec472d202731b12417bff023594a Mon Sep 17 00:00:00 2001
From: Nishanth Aravamudan <nacc@linux.vnet.ibm.com>
Date: Tue, 6 May 2014 12:50:00 -0700
Subject: hugetlb: ensure hugepage access is denied if hugepages are not
 supported

Currently, I am seeing the following when I `mount -t hugetlbfs /none
/dev/hugetlbfs`, and then simply do a `ls /dev/hugetlbfs`.  I think it's
related to the fact that hugetlbfs is properly not correctly setting
itself up in this state?:

  Unable to handle kernel paging request for data at address 0x00000031
  Faulting instruction address: 0xc000000000245710
  Oops: Kernel access of bad area, sig: 11 [#1]
  SMP NR_CPUS=2048 NUMA pSeries
  ....

In KVM guests on Power, in a guest not backed by hugepages, we see the
following:

  AnonHugePages:         0 kB
  HugePages_Total:       0
  HugePages_Free:        0
  HugePages_Rsvd:        0
  HugePages_Surp:        0
  Hugepagesize:         64 kB

HPAGE_SHIFT == 0 in this configuration, which indicates that hugepages
are not supported at boot-time, but this is only checked in
hugetlb_init().  Extract the check to a helper function, and use it in a
few relevant places.

This does make hugetlbfs not supported (not registered at all) in this
environment.  I believe this is fine, as there are no valid hugepages
and that won't change at runtime.

[akpm@linux-foundation.org: use pr_info(), per Mel]
[akpm@linux-foundation.org: fix build when HPAGE_SHIFT is undefined]
Signed-off-by: Nishanth Aravamudan <nacc@linux.vnet.ibm.com>
Reviewed-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Acked-by: Mel Gorman <mgorman@suse.de>
Cc: Randy Dunlap <rdunlap@infradead.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/hugetlbfs/inode.c    |  5 +++++
 include/linux/hugetlb.h | 10 ++++++++++
 mm/hugetlb.c            | 19 ++++++++++++++-----
 3 files changed, 29 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index 204027520937..e19d4c0cacae 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -1030,6 +1030,11 @@ static int __init init_hugetlbfs_fs(void)
 	int error;
 	int i;
 
+	if (!hugepages_supported()) {
+		pr_info("hugetlbfs: disabling because there are no supported hugepage sizes\n");
+		return -ENOTSUPP;
+	}
+
 	error = bdi_init(&hugetlbfs_backing_dev_info);
 	if (error)
 		return error;
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index 5b337cf8fb86..b65166de1d9d 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -412,6 +412,16 @@ static inline spinlock_t *huge_pte_lockptr(struct hstate *h,
 	return &mm->page_table_lock;
 }
 
+static inline bool hugepages_supported(void)
+{
+	/*
+	 * Some platform decide whether they support huge pages at boot
+	 * time. On these, such as powerpc, HPAGE_SHIFT is set to 0 when
+	 * there is no such support
+	 */
+	return HPAGE_SHIFT != 0;
+}
+
 #else	/* CONFIG_HUGETLB_PAGE */
 struct hstate {};
 #define alloc_huge_page_node(h, nid) NULL
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 246192929a2d..c82290b9c1fc 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -1981,11 +1981,7 @@ static int __init hugetlb_init(void)
 {
 	int i;
 
-	/* Some platform decide whether they support huge pages at boot
-	 * time. On these, such as powerpc, HPAGE_SHIFT is set to 0 when
-	 * there is no such support
-	 */
-	if (HPAGE_SHIFT == 0)
+	if (!hugepages_supported())
 		return 0;
 
 	if (!size_to_hstate(default_hstate_size)) {
@@ -2112,6 +2108,9 @@ static int hugetlb_sysctl_handler_common(bool obey_mempolicy,
 	unsigned long tmp;
 	int ret;
 
+	if (!hugepages_supported())
+		return -ENOTSUPP;
+
 	tmp = h->max_huge_pages;
 
 	if (write && h->order >= MAX_ORDER)
@@ -2165,6 +2164,9 @@ int hugetlb_overcommit_handler(struct ctl_table *table, int write,
 	unsigned long tmp;
 	int ret;
 
+	if (!hugepages_supported())
+		return -ENOTSUPP;
+
 	tmp = h->nr_overcommit_huge_pages;
 
 	if (write && h->order >= MAX_ORDER)
@@ -2190,6 +2192,8 @@ out:
 void hugetlb_report_meminfo(struct seq_file *m)
 {
 	struct hstate *h = &default_hstate;
+	if (!hugepages_supported())
+		return;
 	seq_printf(m,
 			"HugePages_Total:   %5lu\n"
 			"HugePages_Free:    %5lu\n"
@@ -2206,6 +2210,8 @@ void hugetlb_report_meminfo(struct seq_file *m)
 int hugetlb_report_node_meminfo(int nid, char *buf)
 {
 	struct hstate *h = &default_hstate;
+	if (!hugepages_supported())
+		return 0;
 	return sprintf(buf,
 		"Node %d HugePages_Total: %5u\n"
 		"Node %d HugePages_Free:  %5u\n"
@@ -2220,6 +2226,9 @@ void hugetlb_show_meminfo(void)
 	struct hstate *h;
 	int nid;
 
+	if (!hugepages_supported())
+		return;
+
 	for_each_node_state(nid, N_MEMORY)
 		for_each_hstate(h)
 			pr_info("Node %d hugepages_total=%u hugepages_free=%u hugepages_surp=%u hugepages_size=%lukB\n",
-- 
cgit v1.2.3-71-gd317


From 41a212859a4dd583d3aa032cdd3efa564c4f189f Mon Sep 17 00:00:00 2001
From: Christoph Lameter <cl@linux.com>
Date: Tue, 6 May 2014 12:50:08 -0700
Subject: slub: use sysfs'es release mechanism for kmem_cache

debugobjects warning during netfilter exit:

    ------------[ cut here ]------------
    WARNING: CPU: 6 PID: 4178 at lib/debugobjects.c:260 debug_print_object+0x8d/0xb0()
    ODEBUG: free active (active state 0) object type: timer_list hint: delayed_work_timer_fn+0x0/0x20
    Modules linked in:
    CPU: 6 PID: 4178 Comm: kworker/u16:2 Tainted: G        W 3.11.0-next-20130906-sasha #3984
    Workqueue: netns cleanup_net
    Call Trace:
      dump_stack+0x52/0x87
      warn_slowpath_common+0x8c/0xc0
      warn_slowpath_fmt+0x46/0x50
      debug_print_object+0x8d/0xb0
      __debug_check_no_obj_freed+0xa5/0x220
      debug_check_no_obj_freed+0x15/0x20
      kmem_cache_free+0x197/0x340
      kmem_cache_destroy+0x86/0xe0
      nf_conntrack_cleanup_net_list+0x131/0x170
      nf_conntrack_pernet_exit+0x5d/0x70
      ops_exit_list+0x5e/0x70
      cleanup_net+0xfb/0x1c0
      process_one_work+0x338/0x550
      worker_thread+0x215/0x350
      kthread+0xe7/0xf0
      ret_from_fork+0x7c/0xb0

Also during dcookie cleanup:

    WARNING: CPU: 12 PID: 9725 at lib/debugobjects.c:260 debug_print_object+0x8c/0xb0()
    ODEBUG: free active (active state 0) object type: timer_list hint: delayed_work_timer_fn+0x0/0x20
    Modules linked in:
    CPU: 12 PID: 9725 Comm: trinity-c141 Not tainted 3.15.0-rc2-next-20140423-sasha-00018-gc4ff6c4 #408
    Call Trace:
      dump_stack (lib/dump_stack.c:52)
      warn_slowpath_common (kernel/panic.c:430)
      warn_slowpath_fmt (kernel/panic.c:445)
      debug_print_object (lib/debugobjects.c:262)
      __debug_check_no_obj_freed (lib/debugobjects.c:697)
      debug_check_no_obj_freed (lib/debugobjects.c:726)
      kmem_cache_free (mm/slub.c:2689 mm/slub.c:2717)
      kmem_cache_destroy (mm/slab_common.c:363)
      dcookie_unregister (fs/dcookies.c:302 fs/dcookies.c:343)
      event_buffer_release (arch/x86/oprofile/../../../drivers/oprofile/event_buffer.c:153)
      __fput (fs/file_table.c:217)
      ____fput (fs/file_table.c:253)
      task_work_run (kernel/task_work.c:125 (discriminator 1))
      do_notify_resume (include/linux/tracehook.h:196 arch/x86/kernel/signal.c:751)
      int_signal (arch/x86/kernel/entry_64.S:807)

Sysfs has a release mechanism.  Use that to release the kmem_cache
structure if CONFIG_SYSFS is enabled.

Only slub is changed - slab currently only supports /proc/slabinfo and
not /sys/kernel/slab/*.  We talked about adding that and someone was
working on it.

[akpm@linux-foundation.org: fix CONFIG_SYSFS=n build]
[akpm@linux-foundation.org: fix CONFIG_SYSFS=n build even more]
Signed-off-by: Christoph Lameter <cl@linux.com>
Reported-by: Sasha Levin <sasha.levin@oracle.com>
Tested-by: Sasha Levin <sasha.levin@oracle.com>
Acked-by: Greg KH <greg@kroah.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Pekka Enberg <penberg@kernel.org>
Cc: Russell King <rmk@arm.linux.org.uk>
Cc: Bart Van Assche <bvanassche@acm.org>
Cc: Al Viro <viro@ZenIV.linux.org.uk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/slub_def.h |  9 +++++++++
 mm/slab.h                |  1 +
 mm/slab_common.c         | 13 +++++++++++--
 mm/slub.c                | 30 ++++++++----------------------
 4 files changed, 29 insertions(+), 24 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h
index f2f7398848cf..d82abd40a3c0 100644
--- a/include/linux/slub_def.h
+++ b/include/linux/slub_def.h
@@ -101,4 +101,13 @@ struct kmem_cache {
 	struct kmem_cache_node *node[MAX_NUMNODES];
 };
 
+#ifdef CONFIG_SYSFS
+#define SLAB_SUPPORTS_SYSFS
+void sysfs_slab_remove(struct kmem_cache *);
+#else
+static inline void sysfs_slab_remove(struct kmem_cache *s)
+{
+}
+#endif
+
 #endif /* _LINUX_SLUB_DEF_H */
diff --git a/mm/slab.h b/mm/slab.h
index 3045316b7c9d..6bd4c353704f 100644
--- a/mm/slab.h
+++ b/mm/slab.h
@@ -91,6 +91,7 @@ __kmem_cache_alias(const char *name, size_t size, size_t align,
 #define CACHE_CREATE_MASK (SLAB_CORE_FLAGS | SLAB_DEBUG_FLAGS | SLAB_CACHE_FLAGS)
 
 int __kmem_cache_shutdown(struct kmem_cache *);
+void slab_kmem_cache_release(struct kmem_cache *);
 
 struct seq_file;
 struct file;
diff --git a/mm/slab_common.c b/mm/slab_common.c
index f3cfccf76dda..102cc6fca3d3 100644
--- a/mm/slab_common.c
+++ b/mm/slab_common.c
@@ -323,6 +323,12 @@ static int kmem_cache_destroy_memcg_children(struct kmem_cache *s)
 }
 #endif /* CONFIG_MEMCG_KMEM */
 
+void slab_kmem_cache_release(struct kmem_cache *s)
+{
+	kfree(s->name);
+	kmem_cache_free(kmem_cache, s);
+}
+
 void kmem_cache_destroy(struct kmem_cache *s)
 {
 	get_online_cpus();
@@ -352,8 +358,11 @@ void kmem_cache_destroy(struct kmem_cache *s)
 		rcu_barrier();
 
 	memcg_free_cache_params(s);
-	kfree(s->name);
-	kmem_cache_free(kmem_cache, s);
+#ifdef SLAB_SUPPORTS_SYSFS
+	sysfs_slab_remove(s);
+#else
+	slab_kmem_cache_release(s);
+#endif
 	goto out_put_cpus;
 
 out_unlock:
diff --git a/mm/slub.c b/mm/slub.c
index 042a47b4d0f5..2b1ce697fc4b 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -210,14 +210,11 @@ enum track_item { TRACK_ALLOC, TRACK_FREE };
 #ifdef CONFIG_SYSFS
 static int sysfs_slab_add(struct kmem_cache *);
 static int sysfs_slab_alias(struct kmem_cache *, const char *);
-static void sysfs_slab_remove(struct kmem_cache *);
 static void memcg_propagate_slab_attrs(struct kmem_cache *s);
 #else
 static inline int sysfs_slab_add(struct kmem_cache *s) { return 0; }
 static inline int sysfs_slab_alias(struct kmem_cache *s, const char *p)
 							{ return 0; }
-static inline void sysfs_slab_remove(struct kmem_cache *s) { }
-
 static inline void memcg_propagate_slab_attrs(struct kmem_cache *s) { }
 #endif
 
@@ -3238,24 +3235,7 @@ static inline int kmem_cache_close(struct kmem_cache *s)
 
 int __kmem_cache_shutdown(struct kmem_cache *s)
 {
-	int rc = kmem_cache_close(s);
-
-	if (!rc) {
-		/*
-		 * Since slab_attr_store may take the slab_mutex, we should
-		 * release the lock while removing the sysfs entry in order to
-		 * avoid a deadlock. Because this is pretty much the last
-		 * operation we do and the lock will be released shortly after
-		 * that in slab_common.c, we could just move sysfs_slab_remove
-		 * to a later point in common code. We should do that when we
-		 * have a common sysfs framework for all allocators.
-		 */
-		mutex_unlock(&slab_mutex);
-		sysfs_slab_remove(s);
-		mutex_lock(&slab_mutex);
-	}
-
-	return rc;
+	return kmem_cache_close(s);
 }
 
 /********************************************************************
@@ -5122,6 +5102,11 @@ static void memcg_propagate_slab_attrs(struct kmem_cache *s)
 #endif
 }
 
+static void kmem_cache_release(struct kobject *k)
+{
+	slab_kmem_cache_release(to_slab(k));
+}
+
 static const struct sysfs_ops slab_sysfs_ops = {
 	.show = slab_attr_show,
 	.store = slab_attr_store,
@@ -5129,6 +5114,7 @@ static const struct sysfs_ops slab_sysfs_ops = {
 
 static struct kobj_type slab_ktype = {
 	.sysfs_ops = &slab_sysfs_ops,
+	.release = kmem_cache_release,
 };
 
 static int uevent_filter(struct kset *kset, struct kobject *kobj)
@@ -5255,7 +5241,7 @@ out_put_kobj:
 	goto out;
 }
 
-static void sysfs_slab_remove(struct kmem_cache *s)
+void sysfs_slab_remove(struct kmem_cache *s)
 {
 	if (slab_state < FULL)
 		/*
-- 
cgit v1.2.3-71-gd317


From 4c88d7f9b0d5fb0588c3386be62115cc2eaa8f9f Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Wed, 23 Apr 2014 14:49:17 +0200
Subject: genirq: Provide irq_force_affinity fallback for non-SMP

Patch 01f8fa4f01d "genirq: Allow forcing cpu affinity of interrupts" added
an irq_force_affinity() function, and 30ccf03b4a6 "clocksource: Exynos_mct:
Use irq_force_affinity() in cpu bringup" subsequently uses it. However, the
driver can be used with CONFIG_SMP disabled, but the function declaration
is only available for CONFIG_SMP, leading to this build error:

drivers/clocksource/exynos_mct.c:431:3: error: implicit declaration of function 'irq_force_affinity' [-Werror=implicit-function-declaration]
   irq_force_affinity(mct_irqs[MCT_L0_IRQ + cpu], cpumask_of(cpu));

This patch introduces a dummy helper function for the non-SMP case
that always returns success, to get rid of the build error.
Since the patches causing the problem are marked for stable backports,
this one should be as well.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Cc: Krzysztof Kozlowski <k.kozlowski@samsung.com>
Acked-by: Kukjin Kim <kgene.kim@samsung.com>
Cc: stable@vger.kernel.org
Link: http://lkml.kernel.org/r/5619084.0zmrrIUZLV@wuerfel
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/interrupt.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h
index 97ac926c78a7..051c85032f48 100644
--- a/include/linux/interrupt.h
+++ b/include/linux/interrupt.h
@@ -272,6 +272,11 @@ static inline int irq_set_affinity(unsigned int irq, const struct cpumask *m)
 	return -EINVAL;
 }
 
+static inline int irq_force_affinity(unsigned int irq, const struct cpumask *cpumask)
+{
+	return 0;
+}
+
 static inline int irq_can_set_affinity(unsigned int irq)
 {
 	return 0;
-- 
cgit v1.2.3-71-gd317


From 98fcc5762dcecbb264fa4af5a9ae51136858e299 Mon Sep 17 00:00:00 2001
From: Micky Ching <micky_ching@realsil.com.cn>
Date: Tue, 29 Apr 2014 09:54:54 +0800
Subject: mmc: rtsx: Revert "mmc: rtsx: add support for pre_req and post_req"

This reverts commit c42deffd5b53c9e583d83c7964854ede2f12410d.

commit <mmc: rtsx: add support for pre_req and post_req> did use
mutex_unlock() in tasklet, but mutex_unlock() can't be used in
tasklet(atomic context). The driver needs to use mutex to avoid
concurrency, so we can't use tasklet here, the patch need to be
removed.

The spinlock host->lock and pcr->lock may deadlock, one way to solve
the deadlock is remove host->lock in sd_isr_done_transfer(), but if
using workqueue the we can avoid using the spinlock and also avoid
the problem.

Signed-off-by: Micky Ching <micky_ching@realsil.com.cn>
Acked-by: Ulf Hansson <ulf.hansson@linaro.org>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 drivers/mfd/rtsx_pcr.c            | 132 ++++--------
 drivers/mmc/host/rtsx_pci_sdmmc.c | 418 +++++++-------------------------------
 include/linux/mfd/rtsx_common.h   |   1 -
 include/linux/mfd/rtsx_pci.h      |   6 -
 4 files changed, 109 insertions(+), 448 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mfd/rtsx_pcr.c b/drivers/mfd/rtsx_pcr.c
index c9de3d598ea5..1d15735f9ef9 100644
--- a/drivers/mfd/rtsx_pcr.c
+++ b/drivers/mfd/rtsx_pcr.c
@@ -338,28 +338,58 @@ int rtsx_pci_transfer_data(struct rtsx_pcr *pcr, struct scatterlist *sglist,
 		int num_sg, bool read, int timeout)
 {
 	struct completion trans_done;
-	int err = 0, count;
+	u8 dir;
+	int err = 0, i, count;
 	long timeleft;
 	unsigned long flags;
+	struct scatterlist *sg;
+	enum dma_data_direction dma_dir;
+	u32 val;
+	dma_addr_t addr;
+	unsigned int len;
+
+	dev_dbg(&(pcr->pci->dev), "--> %s: num_sg = %d\n", __func__, num_sg);
+
+	/* don't transfer data during abort processing */
+	if (pcr->remove_pci)
+		return -EINVAL;
+
+	if ((sglist == NULL) || (num_sg <= 0))
+		return -EINVAL;
 
-	count = rtsx_pci_dma_map_sg(pcr, sglist, num_sg, read);
+	if (read) {
+		dir = DEVICE_TO_HOST;
+		dma_dir = DMA_FROM_DEVICE;
+	} else {
+		dir = HOST_TO_DEVICE;
+		dma_dir = DMA_TO_DEVICE;
+	}
+
+	count = dma_map_sg(&(pcr->pci->dev), sglist, num_sg, dma_dir);
 	if (count < 1) {
 		dev_err(&(pcr->pci->dev), "scatterlist map failed\n");
 		return -EINVAL;
 	}
 	dev_dbg(&(pcr->pci->dev), "DMA mapping count: %d\n", count);
 
+	val = ((u32)(dir & 0x01) << 29) | TRIG_DMA | ADMA_MODE;
+	pcr->sgi = 0;
+	for_each_sg(sglist, sg, count, i) {
+		addr = sg_dma_address(sg);
+		len = sg_dma_len(sg);
+		rtsx_pci_add_sg_tbl(pcr, addr, len, i == count - 1);
+	}
 
 	spin_lock_irqsave(&pcr->lock, flags);
 
 	pcr->done = &trans_done;
 	pcr->trans_result = TRANS_NOT_READY;
 	init_completion(&trans_done);
+	rtsx_pci_writel(pcr, RTSX_HDBAR, pcr->host_sg_tbl_addr);
+	rtsx_pci_writel(pcr, RTSX_HDBCTLR, val);
 
 	spin_unlock_irqrestore(&pcr->lock, flags);
 
-	rtsx_pci_dma_transfer(pcr, sglist, count, read);
-
 	timeleft = wait_for_completion_interruptible_timeout(
 			&trans_done, msecs_to_jiffies(timeout));
 	if (timeleft <= 0) {
@@ -383,7 +413,7 @@ out:
 	pcr->done = NULL;
 	spin_unlock_irqrestore(&pcr->lock, flags);
 
-	rtsx_pci_dma_unmap_sg(pcr, sglist, num_sg, read);
+	dma_unmap_sg(&(pcr->pci->dev), sglist, num_sg, dma_dir);
 
 	if ((err < 0) && (err != -ENODEV))
 		rtsx_pci_stop_cmd(pcr);
@@ -395,73 +425,6 @@ out:
 }
 EXPORT_SYMBOL_GPL(rtsx_pci_transfer_data);
 
-int rtsx_pci_dma_map_sg(struct rtsx_pcr *pcr, struct scatterlist *sglist,
-		int num_sg, bool read)
-{
-	enum dma_data_direction dir = read ? DMA_FROM_DEVICE : DMA_TO_DEVICE;
-
-	if (pcr->remove_pci)
-		return -EINVAL;
-
-	if ((sglist == NULL) || num_sg < 1)
-		return -EINVAL;
-
-	return dma_map_sg(&(pcr->pci->dev), sglist, num_sg, dir);
-}
-EXPORT_SYMBOL_GPL(rtsx_pci_dma_map_sg);
-
-int rtsx_pci_dma_unmap_sg(struct rtsx_pcr *pcr, struct scatterlist *sglist,
-		int num_sg, bool read)
-{
-	enum dma_data_direction dir = read ? DMA_FROM_DEVICE : DMA_TO_DEVICE;
-
-	if (pcr->remove_pci)
-		return -EINVAL;
-
-	if (sglist == NULL || num_sg < 1)
-		return -EINVAL;
-
-	dma_unmap_sg(&(pcr->pci->dev), sglist, num_sg, dir);
-	return num_sg;
-}
-EXPORT_SYMBOL_GPL(rtsx_pci_dma_unmap_sg);
-
-int rtsx_pci_dma_transfer(struct rtsx_pcr *pcr, struct scatterlist *sglist,
-		int sg_count, bool read)
-{
-	struct scatterlist *sg;
-	dma_addr_t addr;
-	unsigned int len;
-	int i;
-	u32 val;
-	u8 dir = read ? DEVICE_TO_HOST : HOST_TO_DEVICE;
-	unsigned long flags;
-
-	if (pcr->remove_pci)
-		return -EINVAL;
-
-	if ((sglist == NULL) || (sg_count < 1))
-		return -EINVAL;
-
-	val = ((u32)(dir & 0x01) << 29) | TRIG_DMA | ADMA_MODE;
-	pcr->sgi = 0;
-	for_each_sg(sglist, sg, sg_count, i) {
-		addr = sg_dma_address(sg);
-		len = sg_dma_len(sg);
-		rtsx_pci_add_sg_tbl(pcr, addr, len, i == sg_count - 1);
-	}
-
-	spin_lock_irqsave(&pcr->lock, flags);
-
-	rtsx_pci_writel(pcr, RTSX_HDBAR, pcr->host_sg_tbl_addr);
-	rtsx_pci_writel(pcr, RTSX_HDBCTLR, val);
-
-	spin_unlock_irqrestore(&pcr->lock, flags);
-
-	return 0;
-}
-EXPORT_SYMBOL_GPL(rtsx_pci_dma_transfer);
-
 int rtsx_pci_read_ppbuf(struct rtsx_pcr *pcr, u8 *buf, int buf_len)
 {
 	int err;
@@ -873,8 +836,6 @@ static irqreturn_t rtsx_pci_isr(int irq, void *dev_id)
 	int_reg = rtsx_pci_readl(pcr, RTSX_BIPR);
 	/* Clear interrupt flag */
 	rtsx_pci_writel(pcr, RTSX_BIPR, int_reg);
-	dev_dbg(&pcr->pci->dev, "=========== BIPR 0x%8x ==========\n", int_reg);
-
 	if ((int_reg & pcr->bier) == 0) {
 		spin_unlock(&pcr->lock);
 		return IRQ_NONE;
@@ -905,28 +866,17 @@ static irqreturn_t rtsx_pci_isr(int irq, void *dev_id)
 	}
 
 	if (int_reg & (NEED_COMPLETE_INT | DELINK_INT)) {
-		if (int_reg & (TRANS_FAIL_INT | DELINK_INT))
+		if (int_reg & (TRANS_FAIL_INT | DELINK_INT)) {
 			pcr->trans_result = TRANS_RESULT_FAIL;
-		else if (int_reg & TRANS_OK_INT)
+			if (pcr->done)
+				complete(pcr->done);
+		} else if (int_reg & TRANS_OK_INT) {
 			pcr->trans_result = TRANS_RESULT_OK;
-
-		if (pcr->done)
-			complete(pcr->done);
-
-		if (int_reg & SD_EXIST) {
-			struct rtsx_slot *slot = &pcr->slots[RTSX_SD_CARD];
-			if (slot && slot->done_transfer)
-				slot->done_transfer(slot->p_dev);
-		}
-
-		if (int_reg & MS_EXIST) {
-			struct rtsx_slot *slot = &pcr->slots[RTSX_SD_CARD];
-			if (slot && slot->done_transfer)
-				slot->done_transfer(slot->p_dev);
+			if (pcr->done)
+				complete(pcr->done);
 		}
 	}
 
-
 	if (pcr->card_inserted || pcr->card_removed)
 		schedule_delayed_work(&pcr->carddet_work,
 				msecs_to_jiffies(200));
diff --git a/drivers/mmc/host/rtsx_pci_sdmmc.c b/drivers/mmc/host/rtsx_pci_sdmmc.c
index 5fb994f9a653..0b9ded13a3ae 100644
--- a/drivers/mmc/host/rtsx_pci_sdmmc.c
+++ b/drivers/mmc/host/rtsx_pci_sdmmc.c
@@ -31,28 +31,14 @@
 #include <linux/mfd/rtsx_pci.h>
 #include <asm/unaligned.h>
 
-struct realtek_next {
-	unsigned int	sg_count;
-	s32		cookie;
-};
-
 struct realtek_pci_sdmmc {
 	struct platform_device	*pdev;
 	struct rtsx_pcr		*pcr;
 	struct mmc_host		*mmc;
 	struct mmc_request	*mrq;
-	struct mmc_command	*cmd;
-	struct mmc_data		*data;
-
-	spinlock_t		lock;
-	struct timer_list	timer;
-	struct tasklet_struct	cmd_tasklet;
-	struct tasklet_struct	data_tasklet;
-	struct tasklet_struct	finish_tasklet;
-
-	u8			rsp_type;
-	u8			rsp_len;
-	int			sg_count;
+
+	struct mutex		host_mutex;
+
 	u8			ssc_depth;
 	unsigned int		clock;
 	bool			vpclk;
@@ -62,13 +48,8 @@ struct realtek_pci_sdmmc {
 	int			power_state;
 #define SDMMC_POWER_ON		1
 #define SDMMC_POWER_OFF		0
-
-	struct realtek_next	next_data;
 };
 
-static int sd_start_multi_rw(struct realtek_pci_sdmmc *host,
-		struct mmc_request *mrq);
-
 static inline struct device *sdmmc_dev(struct realtek_pci_sdmmc *host)
 {
 	return &(host->pdev->dev);
@@ -105,95 +86,6 @@ static void sd_print_debug_regs(struct realtek_pci_sdmmc *host)
 #define sd_print_debug_regs(host)
 #endif /* DEBUG */
 
-static void sd_isr_done_transfer(struct platform_device *pdev)
-{
-	struct realtek_pci_sdmmc *host = platform_get_drvdata(pdev);
-
-	spin_lock(&host->lock);
-	if (host->cmd)
-		tasklet_schedule(&host->cmd_tasklet);
-	if (host->data)
-		tasklet_schedule(&host->data_tasklet);
-	spin_unlock(&host->lock);
-}
-
-static void sd_request_timeout(unsigned long host_addr)
-{
-	struct realtek_pci_sdmmc *host = (struct realtek_pci_sdmmc *)host_addr;
-	unsigned long flags;
-
-	spin_lock_irqsave(&host->lock, flags);
-
-	if (!host->mrq) {
-		dev_err(sdmmc_dev(host), "error: no request exist\n");
-		goto out;
-	}
-
-	if (host->cmd)
-		host->cmd->error = -ETIMEDOUT;
-	if (host->data)
-		host->data->error = -ETIMEDOUT;
-
-	dev_dbg(sdmmc_dev(host), "timeout for request\n");
-
-out:
-	tasklet_schedule(&host->finish_tasklet);
-	spin_unlock_irqrestore(&host->lock, flags);
-}
-
-static void sd_finish_request(unsigned long host_addr)
-{
-	struct realtek_pci_sdmmc *host = (struct realtek_pci_sdmmc *)host_addr;
-	struct rtsx_pcr *pcr = host->pcr;
-	struct mmc_request *mrq;
-	struct mmc_command *cmd;
-	struct mmc_data *data;
-	unsigned long flags;
-	bool any_error;
-
-	spin_lock_irqsave(&host->lock, flags);
-
-	del_timer(&host->timer);
-	mrq = host->mrq;
-	if (!mrq) {
-		dev_err(sdmmc_dev(host), "error: no request need finish\n");
-		goto out;
-	}
-
-	cmd = mrq->cmd;
-	data = mrq->data;
-
-	any_error = (mrq->sbc && mrq->sbc->error) ||
-		(mrq->stop && mrq->stop->error) ||
-		(cmd && cmd->error) || (data && data->error);
-
-	if (any_error) {
-		rtsx_pci_stop_cmd(pcr);
-		sd_clear_error(host);
-	}
-
-	if (data) {
-		if (any_error)
-			data->bytes_xfered = 0;
-		else
-			data->bytes_xfered = data->blocks * data->blksz;
-
-		if (!data->host_cookie)
-			rtsx_pci_dma_unmap_sg(pcr, data->sg, data->sg_len,
-					data->flags & MMC_DATA_READ);
-
-	}
-
-	host->mrq = NULL;
-	host->cmd = NULL;
-	host->data = NULL;
-
-out:
-	spin_unlock_irqrestore(&host->lock, flags);
-	mutex_unlock(&pcr->pcr_mutex);
-	mmc_request_done(host->mmc, mrq);
-}
-
 static int sd_read_data(struct realtek_pci_sdmmc *host, u8 *cmd, u16 byte_cnt,
 		u8 *buf, int buf_len, int timeout)
 {
@@ -311,7 +203,8 @@ static int sd_write_data(struct realtek_pci_sdmmc *host, u8 *cmd, u16 byte_cnt,
 	return 0;
 }
 
-static void sd_send_cmd(struct realtek_pci_sdmmc *host, struct mmc_command *cmd)
+static void sd_send_cmd_get_rsp(struct realtek_pci_sdmmc *host,
+		struct mmc_command *cmd)
 {
 	struct rtsx_pcr *pcr = host->pcr;
 	u8 cmd_idx = (u8)cmd->opcode;
@@ -319,14 +212,11 @@ static void sd_send_cmd(struct realtek_pci_sdmmc *host, struct mmc_command *cmd)
 	int err = 0;
 	int timeout = 100;
 	int i;
+	u8 *ptr;
+	int stat_idx = 0;
 	u8 rsp_type;
 	int rsp_len = 5;
-	unsigned long flags;
-
-	if (host->cmd)
-		dev_err(sdmmc_dev(host), "error: cmd already exist\n");
-
-	host->cmd = cmd;
+	bool clock_toggled = false;
 
 	dev_dbg(sdmmc_dev(host), "%s: SD/MMC CMD %d, arg = 0x%08x\n",
 			__func__, cmd_idx, arg);
@@ -361,8 +251,6 @@ static void sd_send_cmd(struct realtek_pci_sdmmc *host, struct mmc_command *cmd)
 		err = -EINVAL;
 		goto out;
 	}
-	host->rsp_type = rsp_type;
-	host->rsp_len = rsp_len;
 
 	if (rsp_type == SD_RSP_TYPE_R1b)
 		timeout = 3000;
@@ -372,6 +260,8 @@ static void sd_send_cmd(struct realtek_pci_sdmmc *host, struct mmc_command *cmd)
 				0xFF, SD_CLK_TOGGLE_EN);
 		if (err < 0)
 			goto out;
+
+		clock_toggled = true;
 	}
 
 	rtsx_pci_init_cmd(pcr);
@@ -395,60 +285,25 @@ static void sd_send_cmd(struct realtek_pci_sdmmc *host, struct mmc_command *cmd)
 		/* Read data from ping-pong buffer */
 		for (i = PPBUF_BASE2; i < PPBUF_BASE2 + 16; i++)
 			rtsx_pci_add_cmd(pcr, READ_REG_CMD, (u16)i, 0, 0);
+		stat_idx = 16;
 	} else if (rsp_type != SD_RSP_TYPE_R0) {
 		/* Read data from SD_CMDx registers */
 		for (i = SD_CMD0; i <= SD_CMD4; i++)
 			rtsx_pci_add_cmd(pcr, READ_REG_CMD, (u16)i, 0, 0);
+		stat_idx = 5;
 	}
 
 	rtsx_pci_add_cmd(pcr, READ_REG_CMD, SD_STAT1, 0, 0);
 
-	mod_timer(&host->timer, jiffies + msecs_to_jiffies(timeout));
-
-	spin_lock_irqsave(&pcr->lock, flags);
-	pcr->trans_result = TRANS_NOT_READY;
-	rtsx_pci_send_cmd_no_wait(pcr);
-	spin_unlock_irqrestore(&pcr->lock, flags);
-
-	return;
-
-out:
-	cmd->error = err;
-	tasklet_schedule(&host->finish_tasklet);
-}
-
-static void sd_get_rsp(unsigned long host_addr)
-{
-	struct realtek_pci_sdmmc *host = (struct realtek_pci_sdmmc *)host_addr;
-	struct rtsx_pcr *pcr = host->pcr;
-	struct mmc_command *cmd;
-	int i, err = 0, stat_idx;
-	u8 *ptr, rsp_type;
-	unsigned long flags;
-
-	spin_lock_irqsave(&host->lock, flags);
-
-	cmd = host->cmd;
-	host->cmd = NULL;
-
-	if (!cmd) {
-		dev_err(sdmmc_dev(host), "error: cmd not exist\n");
+	err = rtsx_pci_send_cmd(pcr, timeout);
+	if (err < 0) {
+		sd_print_debug_regs(host);
+		sd_clear_error(host);
+		dev_dbg(sdmmc_dev(host),
+			"rtsx_pci_send_cmd error (err = %d)\n", err);
 		goto out;
 	}
 
-	spin_lock(&pcr->lock);
-	if (pcr->trans_result == TRANS_NO_DEVICE)
-		err = -ENODEV;
-	else if (pcr->trans_result != TRANS_RESULT_OK)
-		err = -EINVAL;
-	spin_unlock(&pcr->lock);
-
-	if (err < 0)
-		goto out;
-
-	rsp_type = host->rsp_type;
-	stat_idx = host->rsp_len;
-
 	if (rsp_type == SD_RSP_TYPE_R0) {
 		err = 0;
 		goto out;
@@ -485,106 +340,26 @@ static void sd_get_rsp(unsigned long host_addr)
 				cmd->resp[0]);
 	}
 
-	if (cmd == host->mrq->sbc) {
-		sd_send_cmd(host, host->mrq->cmd);
-		spin_unlock_irqrestore(&host->lock, flags);
-		return;
-	}
-
-	if (cmd == host->mrq->stop)
-		goto out;
-
-	if (cmd->data) {
-		sd_start_multi_rw(host, host->mrq);
-		spin_unlock_irqrestore(&host->lock, flags);
-		return;
-	}
-
 out:
 	cmd->error = err;
 
-	tasklet_schedule(&host->finish_tasklet);
-	spin_unlock_irqrestore(&host->lock, flags);
-}
-
-static int sd_pre_dma_transfer(struct realtek_pci_sdmmc *host,
-			struct mmc_data *data, struct realtek_next *next)
-{
-	struct rtsx_pcr *pcr = host->pcr;
-	int read = data->flags & MMC_DATA_READ;
-	int sg_count = 0;
-
-	if (!next && data->host_cookie &&
-		data->host_cookie != host->next_data.cookie) {
-		dev_err(sdmmc_dev(host),
-			"error: invalid cookie data[%d] host[%d]\n",
-			data->host_cookie, host->next_data.cookie);
-		data->host_cookie = 0;
-	}
-
-	if (next || (!next && data->host_cookie != host->next_data.cookie))
-		sg_count = rtsx_pci_dma_map_sg(pcr,
-				data->sg, data->sg_len, read);
-	else
-		sg_count = host->next_data.sg_count;
-
-	if (next) {
-		next->sg_count = sg_count;
-		if (++next->cookie < 0)
-			next->cookie = 1;
-		data->host_cookie = next->cookie;
-	}
-
-	return sg_count;
-}
-
-static void sdmmc_pre_req(struct mmc_host *mmc, struct mmc_request *mrq,
-		bool is_first_req)
-{
-	struct realtek_pci_sdmmc *host = mmc_priv(mmc);
-	struct mmc_data *data = mrq->data;
-
-	if (data->host_cookie) {
-		dev_err(sdmmc_dev(host),
-			"error: descard already cookie data[%d]\n",
-			data->host_cookie);
-		data->host_cookie = 0;
-	}
-
-	dev_dbg(sdmmc_dev(host), "dma sg prepared: %d\n",
-		sd_pre_dma_transfer(host, data, &host->next_data));
-}
-
-static void sdmmc_post_req(struct mmc_host *mmc, struct mmc_request *mrq,
-		int err)
-{
-	struct realtek_pci_sdmmc *host = mmc_priv(mmc);
-	struct rtsx_pcr *pcr = host->pcr;
-	struct mmc_data *data = mrq->data;
-	int read = data->flags & MMC_DATA_READ;
-
-	rtsx_pci_dma_unmap_sg(pcr, data->sg, data->sg_len, read);
-	data->host_cookie = 0;
+	if (err && clock_toggled)
+		rtsx_pci_write_register(pcr, SD_BUS_STAT,
+				SD_CLK_TOGGLE_EN | SD_CLK_FORCE_STOP, 0);
 }
 
-static int sd_start_multi_rw(struct realtek_pci_sdmmc *host,
-		struct mmc_request *mrq)
+static int sd_rw_multi(struct realtek_pci_sdmmc *host, struct mmc_request *mrq)
 {
 	struct rtsx_pcr *pcr = host->pcr;
 	struct mmc_host *mmc = host->mmc;
 	struct mmc_card *card = mmc->card;
 	struct mmc_data *data = mrq->data;
 	int uhs = mmc_card_uhs(card);
-	int read = data->flags & MMC_DATA_READ;
+	int read = (data->flags & MMC_DATA_READ) ? 1 : 0;
 	u8 cfg2, trans_mode;
 	int err;
 	size_t data_len = data->blksz * data->blocks;
 
-	if (host->data)
-		dev_err(sdmmc_dev(host), "error: data already exist\n");
-
-	host->data = data;
-
 	if (read) {
 		cfg2 = SD_CALCULATE_CRC7 | SD_CHECK_CRC16 |
 			SD_NO_WAIT_BUSY_END | SD_CHECK_CRC7 | SD_RSP_LEN_0;
@@ -635,54 +410,15 @@ static int sd_start_multi_rw(struct realtek_pci_sdmmc *host,
 	rtsx_pci_add_cmd(pcr, CHECK_REG_CMD, SD_TRANSFER,
 			SD_TRANSFER_END, SD_TRANSFER_END);
 
-	mod_timer(&host->timer, jiffies + 10 * HZ);
 	rtsx_pci_send_cmd_no_wait(pcr);
 
-	err = rtsx_pci_dma_transfer(pcr, data->sg, host->sg_count, read);
-	if (err < 0) {
-		data->error = err;
-		tasklet_schedule(&host->finish_tasklet);
-	}
-	return 0;
-}
-
-static void sd_finish_multi_rw(unsigned long host_addr)
-{
-	struct realtek_pci_sdmmc *host = (struct realtek_pci_sdmmc *)host_addr;
-	struct rtsx_pcr *pcr = host->pcr;
-	struct mmc_data *data;
-	int err = 0;
-	unsigned long flags;
-
-	spin_lock_irqsave(&host->lock, flags);
-
-	if (!host->data) {
-		dev_err(sdmmc_dev(host), "error: no data exist\n");
-		goto out;
-	}
-
-	data = host->data;
-	host->data = NULL;
-
-	if (pcr->trans_result == TRANS_NO_DEVICE)
-		err = -ENODEV;
-	else if (pcr->trans_result != TRANS_RESULT_OK)
-		err = -EINVAL;
-
+	err = rtsx_pci_transfer_data(pcr, data->sg, data->sg_len, read, 10000);
 	if (err < 0) {
-		data->error = err;
-		goto out;
-	}
-
-	if (!host->mrq->sbc && data->stop) {
-		sd_send_cmd(host, data->stop);
-		spin_unlock_irqrestore(&host->lock, flags);
-		return;
+		sd_clear_error(host);
+		return err;
 	}
 
-out:
-	tasklet_schedule(&host->finish_tasklet);
-	spin_unlock_irqrestore(&host->lock, flags);
+	return 0;
 }
 
 static inline void sd_enable_initial_mode(struct realtek_pci_sdmmc *host)
@@ -901,13 +637,6 @@ static int sd_tuning_rx(struct realtek_pci_sdmmc *host, u8 opcode)
 	return 0;
 }
 
-static inline bool sd_use_muti_rw(struct mmc_command *cmd)
-{
-	return mmc_op_multi(cmd->opcode) ||
-		(cmd->opcode == MMC_READ_SINGLE_BLOCK) ||
-		(cmd->opcode == MMC_WRITE_BLOCK);
-}
-
 static void sdmmc_request(struct mmc_host *mmc, struct mmc_request *mrq)
 {
 	struct realtek_pci_sdmmc *host = mmc_priv(mmc);
@@ -916,14 +645,6 @@ static void sdmmc_request(struct mmc_host *mmc, struct mmc_request *mrq)
 	struct mmc_data *data = mrq->data;
 	unsigned int data_size = 0;
 	int err;
-	unsigned long flags;
-
-	mutex_lock(&pcr->pcr_mutex);
-	spin_lock_irqsave(&host->lock, flags);
-
-	if (host->mrq)
-		dev_err(sdmmc_dev(host), "error: request already exist\n");
-	host->mrq = mrq;
 
 	if (host->eject) {
 		cmd->error = -ENOMEDIUM;
@@ -936,6 +657,8 @@ static void sdmmc_request(struct mmc_host *mmc, struct mmc_request *mrq)
 		goto finish;
 	}
 
+	mutex_lock(&pcr->pcr_mutex);
+
 	rtsx_pci_start_run(pcr);
 
 	rtsx_pci_switch_clock(pcr, host->clock, host->ssc_depth,
@@ -944,28 +667,46 @@ static void sdmmc_request(struct mmc_host *mmc, struct mmc_request *mrq)
 	rtsx_pci_write_register(pcr, CARD_SHARE_MODE,
 			CARD_SHARE_MASK, CARD_SHARE_48_SD);
 
+	mutex_lock(&host->host_mutex);
+	host->mrq = mrq;
+	mutex_unlock(&host->host_mutex);
+
 	if (mrq->data)
 		data_size = data->blocks * data->blksz;
 
-	if (sd_use_muti_rw(cmd))
-		host->sg_count = sd_pre_dma_transfer(host, data, NULL);
+	if (!data_size || mmc_op_multi(cmd->opcode) ||
+			(cmd->opcode == MMC_READ_SINGLE_BLOCK) ||
+			(cmd->opcode == MMC_WRITE_BLOCK)) {
+		sd_send_cmd_get_rsp(host, cmd);
 
-	if (!data_size || sd_use_muti_rw(cmd)) {
-		if (mrq->sbc)
-			sd_send_cmd(host, mrq->sbc);
-		else
-			sd_send_cmd(host, cmd);
-		spin_unlock_irqrestore(&host->lock, flags);
+		if (!cmd->error && data_size) {
+			sd_rw_multi(host, mrq);
+
+			if (mmc_op_multi(cmd->opcode) && mrq->stop)
+				sd_send_cmd_get_rsp(host, mrq->stop);
+		}
 	} else {
-		spin_unlock_irqrestore(&host->lock, flags);
 		sd_normal_rw(host, mrq);
-		tasklet_schedule(&host->finish_tasklet);
 	}
-	return;
+
+	if (mrq->data) {
+		if (cmd->error || data->error)
+			data->bytes_xfered = 0;
+		else
+			data->bytes_xfered = data->blocks * data->blksz;
+	}
+
+	mutex_unlock(&pcr->pcr_mutex);
 
 finish:
-	tasklet_schedule(&host->finish_tasklet);
-	spin_unlock_irqrestore(&host->lock, flags);
+	if (cmd->error)
+		dev_dbg(sdmmc_dev(host), "cmd->error = %d\n", cmd->error);
+
+	mutex_lock(&host->host_mutex);
+	host->mrq = NULL;
+	mutex_unlock(&host->host_mutex);
+
+	mmc_request_done(mmc, mrq);
 }
 
 static int sd_set_bus_width(struct realtek_pci_sdmmc *host,
@@ -1400,8 +1141,6 @@ out:
 }
 
 static const struct mmc_host_ops realtek_pci_sdmmc_ops = {
-	.pre_req = sdmmc_pre_req,
-	.post_req = sdmmc_post_req,
 	.request = sdmmc_request,
 	.set_ios = sdmmc_set_ios,
 	.get_ro = sdmmc_get_ro,
@@ -1465,7 +1204,6 @@ static int rtsx_pci_sdmmc_drv_probe(struct platform_device *pdev)
 	struct realtek_pci_sdmmc *host;
 	struct rtsx_pcr *pcr;
 	struct pcr_handle *handle = pdev->dev.platform_data;
-	unsigned long host_addr;
 
 	if (!handle)
 		return -ENXIO;
@@ -1489,15 +1227,8 @@ static int rtsx_pci_sdmmc_drv_probe(struct platform_device *pdev)
 	pcr->slots[RTSX_SD_CARD].p_dev = pdev;
 	pcr->slots[RTSX_SD_CARD].card_event = rtsx_pci_sdmmc_card_event;
 
-	host_addr = (unsigned long)host;
-	host->next_data.cookie = 1;
-	setup_timer(&host->timer, sd_request_timeout, host_addr);
-	tasklet_init(&host->cmd_tasklet, sd_get_rsp, host_addr);
-	tasklet_init(&host->data_tasklet, sd_finish_multi_rw, host_addr);
-	tasklet_init(&host->finish_tasklet, sd_finish_request, host_addr);
-	spin_lock_init(&host->lock);
+	mutex_init(&host->host_mutex);
 
-	pcr->slots[RTSX_SD_CARD].done_transfer = sd_isr_done_transfer;
 	realtek_init_host(host);
 
 	mmc_add_host(mmc);
@@ -1510,8 +1241,6 @@ static int rtsx_pci_sdmmc_drv_remove(struct platform_device *pdev)
 	struct realtek_pci_sdmmc *host = platform_get_drvdata(pdev);
 	struct rtsx_pcr *pcr;
 	struct mmc_host *mmc;
-	struct mmc_request *mrq;
-	unsigned long flags;
 
 	if (!host)
 		return 0;
@@ -1519,33 +1248,22 @@ static int rtsx_pci_sdmmc_drv_remove(struct platform_device *pdev)
 	pcr = host->pcr;
 	pcr->slots[RTSX_SD_CARD].p_dev = NULL;
 	pcr->slots[RTSX_SD_CARD].card_event = NULL;
-	pcr->slots[RTSX_SD_CARD].done_transfer = NULL;
 	mmc = host->mmc;
-	mrq = host->mrq;
 
-	spin_lock_irqsave(&host->lock, flags);
+	mutex_lock(&host->host_mutex);
 	if (host->mrq) {
 		dev_dbg(&(pdev->dev),
 			"%s: Controller removed during transfer\n",
 			mmc_hostname(mmc));
 
-		if (mrq->sbc)
-			mrq->sbc->error = -ENOMEDIUM;
-		if (mrq->cmd)
-			mrq->cmd->error = -ENOMEDIUM;
-		if (mrq->stop)
-			mrq->stop->error = -ENOMEDIUM;
-		if (mrq->data)
-			mrq->data->error = -ENOMEDIUM;
+		rtsx_pci_complete_unfinished_transfer(pcr);
 
-		tasklet_schedule(&host->finish_tasklet);
+		host->mrq->cmd->error = -ENOMEDIUM;
+		if (host->mrq->stop)
+			host->mrq->stop->error = -ENOMEDIUM;
+		mmc_request_done(mmc, host->mrq);
 	}
-	spin_unlock_irqrestore(&host->lock, flags);
-
-	del_timer_sync(&host->timer);
-	tasklet_kill(&host->cmd_tasklet);
-	tasklet_kill(&host->data_tasklet);
-	tasklet_kill(&host->finish_tasklet);
+	mutex_unlock(&host->host_mutex);
 
 	mmc_remove_host(mmc);
 	host->eject = true;
diff --git a/include/linux/mfd/rtsx_common.h b/include/linux/mfd/rtsx_common.h
index 7c36cc55d2c7..443176ee1ab0 100644
--- a/include/linux/mfd/rtsx_common.h
+++ b/include/linux/mfd/rtsx_common.h
@@ -45,7 +45,6 @@ struct platform_device;
 struct rtsx_slot {
 	struct platform_device	*p_dev;
 	void			(*card_event)(struct platform_device *p_dev);
-	void			(*done_transfer)(struct platform_device *p_dev);
 };
 
 #endif
diff --git a/include/linux/mfd/rtsx_pci.h b/include/linux/mfd/rtsx_pci.h
index 8d6bbd609ad9..a3835976f7c6 100644
--- a/include/linux/mfd/rtsx_pci.h
+++ b/include/linux/mfd/rtsx_pci.h
@@ -943,12 +943,6 @@ void rtsx_pci_send_cmd_no_wait(struct rtsx_pcr *pcr);
 int rtsx_pci_send_cmd(struct rtsx_pcr *pcr, int timeout);
 int rtsx_pci_transfer_data(struct rtsx_pcr *pcr, struct scatterlist *sglist,
 		int num_sg, bool read, int timeout);
-int rtsx_pci_dma_map_sg(struct rtsx_pcr *pcr, struct scatterlist *sglist,
-		int num_sg, bool read);
-int rtsx_pci_dma_unmap_sg(struct rtsx_pcr *pcr, struct scatterlist *sglist,
-		int num_sg, bool read);
-int rtsx_pci_dma_transfer(struct rtsx_pcr *pcr, struct scatterlist *sglist,
-		int sg_count, bool read);
 int rtsx_pci_read_ppbuf(struct rtsx_pcr *pcr, u8 *buf, int buf_len);
 int rtsx_pci_write_ppbuf(struct rtsx_pcr *pcr, u8 *buf, int buf_len);
 int rtsx_pci_card_pull_ctl_enable(struct rtsx_pcr *pcr, int card);
-- 
cgit v1.2.3-71-gd317


From 555724a831b4a146e7bdf16ecc989cda032b076d Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Mon, 12 May 2014 13:56:27 -0400
Subject: kernfs, sysfs, cgroup: restrict extra perm check on open to sysfs

The kernfs open method - kernfs_fop_open() - inherited extra
permission checks from sysfs.  While the vfs layer allows ignoring the
read/write permissions checks if the issuer has CAP_DAC_OVERRIDE,
sysfs explicitly denied open regardless of the cap if the file doesn't
have any of the UGO perms of the requested access or doesn't implement
the requested operation.  It can be debated whether this was a good
idea or not but the behavior is too subtle and dangerous to change at
this point.

After cgroup got converted to kernfs, this extra perm check also got
applied to cgroup breaking libcgroup which opens write-only files with
O_RDWR as root.  This patch gates the extra open permission check with
a new flag KERNFS_ROOT_EXTRA_OPEN_PERM_CHECK and enables it for sysfs.
For sysfs, nothing changes.  For cgroup, root now can perform any
operation regardless of the permissions as it was before kernfs
conversion.  Note that kernfs still fails unimplemented operations
with -EINVAL.

While at it, add comments explaining KERNFS_ROOT flags.

Signed-off-by: Tejun Heo <tj@kernel.org>
Reported-by: Andrey Wagin <avagin@gmail.com>
Tested-by: Andrey Wagin <avagin@gmail.com>
Cc: Li Zefan <lizefan@huawei.com>
References: http://lkml.kernel.org/g/CANaxB-xUm3rJ-Cbp72q-rQJO5mZe1qK6qXsQM=vh0U8upJ44+A@mail.gmail.com
Fixes: 2bd59d48ebfb ("cgroup: convert to kernfs")
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/kernfs/file.c       | 17 ++++++++++-------
 fs/sysfs/mount.c       |  3 ++-
 include/linux/kernfs.h | 19 ++++++++++++++++++-
 3 files changed, 30 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/fs/kernfs/file.c b/fs/kernfs/file.c
index e01ea4a14a01..5e9a80cfc3d8 100644
--- a/fs/kernfs/file.c
+++ b/fs/kernfs/file.c
@@ -610,6 +610,7 @@ static void kernfs_put_open_node(struct kernfs_node *kn,
 static int kernfs_fop_open(struct inode *inode, struct file *file)
 {
 	struct kernfs_node *kn = file->f_path.dentry->d_fsdata;
+	struct kernfs_root *root = kernfs_root(kn);
 	const struct kernfs_ops *ops;
 	struct kernfs_open_file *of;
 	bool has_read, has_write, has_mmap;
@@ -624,14 +625,16 @@ static int kernfs_fop_open(struct inode *inode, struct file *file)
 	has_write = ops->write || ops->mmap;
 	has_mmap = ops->mmap;
 
-	/* check perms and supported operations */
-	if ((file->f_mode & FMODE_WRITE) &&
-	    (!(inode->i_mode & S_IWUGO) || !has_write))
-		goto err_out;
+	/* see the flag definition for details */
+	if (root->flags & KERNFS_ROOT_EXTRA_OPEN_PERM_CHECK) {
+		if ((file->f_mode & FMODE_WRITE) &&
+		    (!(inode->i_mode & S_IWUGO) || !has_write))
+			goto err_out;
 
-	if ((file->f_mode & FMODE_READ) &&
-	    (!(inode->i_mode & S_IRUGO) || !has_read))
-		goto err_out;
+		if ((file->f_mode & FMODE_READ) &&
+		    (!(inode->i_mode & S_IRUGO) || !has_read))
+			goto err_out;
+	}
 
 	/* allocate a kernfs_open_file for the file */
 	error = -ENOMEM;
diff --git a/fs/sysfs/mount.c b/fs/sysfs/mount.c
index a66ad6196f59..8794423f7efb 100644
--- a/fs/sysfs/mount.c
+++ b/fs/sysfs/mount.c
@@ -63,7 +63,8 @@ int __init sysfs_init(void)
 {
 	int err;
 
-	sysfs_root = kernfs_create_root(NULL, 0, NULL);
+	sysfs_root = kernfs_create_root(NULL, KERNFS_ROOT_EXTRA_OPEN_PERM_CHECK,
+					NULL);
 	if (IS_ERR(sysfs_root))
 		return PTR_ERR(sysfs_root);
 
diff --git a/include/linux/kernfs.h b/include/linux/kernfs.h
index b0122dc6f96a..ca1be5c9136c 100644
--- a/include/linux/kernfs.h
+++ b/include/linux/kernfs.h
@@ -50,7 +50,24 @@ enum kernfs_node_flag {
 
 /* @flags for kernfs_create_root() */
 enum kernfs_root_flag {
-	KERNFS_ROOT_CREATE_DEACTIVATED = 0x0001,
+	/*
+	 * kernfs_nodes are created in the deactivated state and invisible.
+	 * They require explicit kernfs_activate() to become visible.  This
+	 * can be used to make related nodes become visible atomically
+	 * after all nodes are created successfully.
+	 */
+	KERNFS_ROOT_CREATE_DEACTIVATED		= 0x0001,
+
+	/*
+	 * For regular flies, if the opener has CAP_DAC_OVERRIDE, open(2)
+	 * succeeds regardless of the RW permissions.  sysfs had an extra
+	 * layer of enforcement where open(2) fails with -EACCES regardless
+	 * of CAP_DAC_OVERRIDE if the permission doesn't have the
+	 * respective read or write access at all (none of S_IRUGO or
+	 * S_IWUGO) or the respective operation isn't implemented.  The
+	 * following flag enables that behavior.
+	 */
+	KERNFS_ROOT_EXTRA_OPEN_PERM_CHECK	= 0x0002,
 };
 
 /* type-specific structures for kernfs_node union members */
-- 
cgit v1.2.3-71-gd317


From 5024ae29cd285ce9e736776414da645d3a91828c Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Wed, 7 May 2014 21:31:17 -0400
Subject: cgroup: introduce task_css_is_root()

Determining the css of a task usually requires RCU read lock as that's
the only thing which keeps the returned css accessible till its
reference is acquired; however, testing whether a task belongs to the
root can be performed without dereferencing the returned css by
comparing the returned pointer against the root one in init_css_set[]
which never changes.

Implement task_css_is_root() which can be invoked in any context.
This will be used by the scheduled cgroup_freezer change.

v2: cgroup no longer supports modular controllers.  No need to export
    init_css_set.  Pointed out by Li.

Signed-off-by: Tejun Heo <tj@kernel.org>
Acked-by: Li Zefan <lizefan@huawei.com>
---
 include/linux/cgroup.h | 15 +++++++++++++++
 kernel/cgroup.c        |  2 +-
 2 files changed, 16 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index c2515851c1aa..d60904b9e505 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -473,6 +473,7 @@ struct cftype {
 };
 
 extern struct cgroup_root cgrp_dfl_root;
+extern struct css_set init_css_set;
 
 static inline bool cgroup_on_dfl(const struct cgroup *cgrp)
 {
@@ -700,6 +701,20 @@ static inline struct cgroup_subsys_state *task_css(struct task_struct *task,
 	return task_css_check(task, subsys_id, false);
 }
 
+/**
+ * task_css_is_root - test whether a task belongs to the root css
+ * @task: the target task
+ * @subsys_id: the target subsystem ID
+ *
+ * Test whether @task belongs to the root css on the specified subsystem.
+ * May be invoked in any context.
+ */
+static inline bool task_css_is_root(struct task_struct *task, int subsys_id)
+{
+	return task_css_check(task, subsys_id, true) ==
+		init_css_set.subsys[subsys_id];
+}
+
 static inline struct cgroup *task_cgroup(struct task_struct *task,
 					 int subsys_id)
 {
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 11a03d67635a..3f1ca934a237 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -348,7 +348,7 @@ struct cgrp_cset_link {
  * reference-counted, to improve performance when child cgroups
  * haven't been created.
  */
-static struct css_set init_css_set = {
+struct css_set init_css_set = {
 	.refcount		= ATOMIC_INIT(1),
 	.cgrp_links		= LIST_HEAD_INIT(init_css_set.cgrp_links),
 	.tasks			= LIST_HEAD_INIT(init_css_set.tasks),
-- 
cgit v1.2.3-71-gd317


From 4c358e15553ed88bf2ddae422624624e1dd663d1 Mon Sep 17 00:00:00 2001
From: Stephen Rothwell <sfr@canb.auug.org.au>
Date: Thu, 15 May 2014 14:44:30 +1000
Subject: of: fix CONFIG_OF=n prototype of of_node_full_name()

Make the CONFIG_OF=n prototpe of of_node_full_name() mateh the CONFIG_OF=y
version.

Fixes compile warnings like this:

sound/soc/soc-core.c: In function 'soc_check_aux_dev':
sound/soc/soc-core.c:1667:3: warning: passing argument 1 of 'of_node_full_name' discards 'const' qualifier from pointer target type [enabled by default]
   codecname = of_node_full_name(aux_dev->codec_of_node);

when CONFIG_OF is not defined.

Signed-off-by: Stephen Rothwell <sfr@canb.auug.org.au>
Signed-off-by: Grant Likely <grant.likely@linaro.org>
---
 include/linux/of.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/of.h b/include/linux/of.h
index 3bad8d106e0e..e6f0988c1c68 100644
--- a/include/linux/of.h
+++ b/include/linux/of.h
@@ -349,7 +349,7 @@ int of_device_is_stdout_path(struct device_node *dn);
 
 #else /* CONFIG_OF */
 
-static inline const char* of_node_full_name(struct device_node *np)
+static inline const char* of_node_full_name(const struct device_node *np)
 {
 	return "<no-node>";
 }
-- 
cgit v1.2.3-71-gd317