From f9d904acb3e7e9edb470cd824fe2a21bb0df86b8 Mon Sep 17 00:00:00 2001
From: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Date: Wed, 7 Jan 2015 10:14:43 -0800
Subject: HID: hid-sensor-hub: Correct documentation

During changes to the interface, some documentation field comments
were missed. Added missing comments.

Signed-off-by: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 include/linux/hid-sensor-hub.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/hid-sensor-hub.h b/include/linux/hid-sensor-hub.h
index 51f7ccadf923..4173a8fdad9e 100644
--- a/include/linux/hid-sensor-hub.h
+++ b/include/linux/hid-sensor-hub.h
@@ -33,6 +33,8 @@
  * @units:		Measurment unit for this attribute.
  * @unit_expo:		Exponent used in the data.
  * @size:		Size in bytes for data size.
+ * @logical_minimum:	Logical minimum value for this attribute.
+ * @logical_maximum:	Logical maximum value for this attribute.
  */
 struct hid_sensor_hub_attribute_info {
 	u32 usage_id;
@@ -146,6 +148,7 @@ int sensor_hub_input_get_attribute_info(struct hid_sensor_hub_device *hsdev,
 
 /**
 * sensor_hub_input_attr_get_raw_value() - Synchronous read request
+* @hsdev:	Hub device instance.
 * @usage_id:	Attribute usage id of parent physical device as per spec
 * @attr_usage_id:	Attribute usage id as per spec
 * @report_id:	Report id to look for
@@ -160,6 +163,7 @@ int sensor_hub_input_attr_get_raw_value(struct hid_sensor_hub_device *hsdev,
 			u32 attr_usage_id, u32 report_id);
 /**
 * sensor_hub_set_feature() - Feature set request
+* @hsdev:	Hub device instance.
 * @report_id:	Report id to look for
 * @field_index:	Field index inside a report
 * @value:	Value to set
@@ -172,6 +176,7 @@ int sensor_hub_set_feature(struct hid_sensor_hub_device *hsdev, u32 report_id,
 
 /**
 * sensor_hub_get_feature() - Feature get request
+* @hsdev:	Hub device instance.
 * @report_id:	Report id to look for
 * @field_index:	Field index inside a report
 * @value:	Place holder for return value
-- 
cgit v1.2.3-71-gd317


From 4b8164b91d9fdff4dbac0a742d076bdff7fda21b Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sat, 31 Jan 2015 20:08:47 -0500
Subject: new helper: dup_iter()

Copy iter and kmemdup the underlying array for the copy.  Returns
a pointer to result of kmemdup() to be kfree()'d later.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/uio.h |  2 ++
 mm/iov_iter.c       | 15 +++++++++++++++
 2 files changed, 17 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/uio.h b/include/linux/uio.h
index 07a022641996..71880299ed48 100644
--- a/include/linux/uio.h
+++ b/include/linux/uio.h
@@ -98,6 +98,8 @@ ssize_t iov_iter_get_pages_alloc(struct iov_iter *i, struct page ***pages,
 			size_t maxsize, size_t *start);
 int iov_iter_npages(const struct iov_iter *i, int maxpages);
 
+const void *dup_iter(struct iov_iter *new, struct iov_iter *old, gfp_t flags);
+
 static inline size_t iov_iter_count(struct iov_iter *i)
 {
 	return i->count;
diff --git a/mm/iov_iter.c b/mm/iov_iter.c
index 827732047da1..9d96e283520c 100644
--- a/mm/iov_iter.c
+++ b/mm/iov_iter.c
@@ -751,3 +751,18 @@ int iov_iter_npages(const struct iov_iter *i, int maxpages)
 	return npages;
 }
 EXPORT_SYMBOL(iov_iter_npages);
+
+const void *dup_iter(struct iov_iter *new, struct iov_iter *old, gfp_t flags)
+{
+	*new = *old;
+	if (new->type & ITER_BVEC)
+		return new->bvec = kmemdup(new->bvec,
+				    new->nr_segs * sizeof(struct bio_vec),
+				    flags);
+	else
+		/* iovec and kvec have identical layout */
+		return new->iov = kmemdup(new->iov,
+				   new->nr_segs * sizeof(struct iovec),
+				   flags);
+}
+EXPORT_SYMBOL(dup_iter);
-- 
cgit v1.2.3-71-gd317


From 846cd66788b11105a62785078360c8854aa98310 Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert@linux-m68k.org>
Date: Wed, 18 Feb 2015 11:38:06 +0100
Subject: net: Initialize all members in skb_gro_remcsum_init()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

skb_gro_remcsum_init() initializes the gro_remcsum.delta member only,
leading to compiler warnings about a possibly uninitialized
gro_remcsum.offset member:

drivers/net/vxlan.c: In function ‘vxlan_gro_receive’:
drivers/net/vxlan.c:602: warning: ‘grc.offset’ may be used uninitialized in this function
net/ipv4/fou.c: In function ‘gue_gro_receive’:
net/ipv4/fou.c:262: warning: ‘grc.offset’ may be used uninitialized in this function

While these are harmless for now:
  - skb_gro_remcsum_process() sets offset before changing delta,
  - skb_gro_remcsum_cleanup() checks if delta is non-zero before
    accessing offset,
it's safer to let the initialization function initialize all members.

Signed-off-by: Geert Uytterhoeven <geert@linux-m68k.org>
Acked-by: Tom Herbert <therbert@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 5897b4ea5a3f..429d1790a27e 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -2342,6 +2342,7 @@ struct gro_remcsum {
 
 static inline void skb_gro_remcsum_init(struct gro_remcsum *grc)
 {
+	grc->offset = 0;
 	grc->delta = 0;
 }
 
-- 
cgit v1.2.3-71-gd317


From b9ebafbe8cfeeddec881504c446cccd0d87a51b6 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Fri, 20 Feb 2015 06:48:57 -0800
Subject: rhashtable: ensure cache line alignment on bucket_table

struct bucket_table contains mostly read fields :

size, locks_mask, locks.

Make sure these are not sharing a cache line with buckets[]

Signed-off-by: Eric Dumazet <edumazet@google.com>
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Thomas Graf <tgraf@suug.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/rhashtable.h | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h
index 58851275fed9..cb2104be2135 100644
--- a/include/linux/rhashtable.h
+++ b/include/linux/rhashtable.h
@@ -54,10 +54,11 @@ struct rhash_head {
  * @buckets: size * hash buckets
  */
 struct bucket_table {
-	size_t				size;
-	unsigned int			locks_mask;
-	spinlock_t			*locks;
-	struct rhash_head __rcu		*buckets[];
+	size_t			size;
+	unsigned int		locks_mask;
+	spinlock_t		*locks;
+
+	struct rhash_head __rcu	*buckets[] ____cacheline_aligned_in_smp;
 };
 
 typedef u32 (*rht_hashfn_t)(const void *data, u32 len, u32 seed);
-- 
cgit v1.2.3-71-gd317


From 12ca7188468ee29c4e717f73db4bf43c90954fc7 Mon Sep 17 00:00:00 2001
From: Nishanth Menon <nm@ti.com>
Date: Fri, 13 Feb 2015 19:28:02 -0600
Subject: thermal: Introduce dummy functions when thermal is not defined

When CONFIG_THERMAL is not enabled, it is better to introduce
equivalent dummy functions in the exported header than to
introduce #ifdeffery in drivers using the function.

This will prevent issues such as that reported in:
http://www.spinics.net/lists/linux-next/msg31573.html

While at it switch over to IS_ENABLED for thermal macros
to allow for thermal framework to be built as framework
and relevant APIs be usable by relevant drivers as a result.

Reported-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Nishanth Menon <nm@ti.com>
Signed-off-by: Eduardo Valentin <edubezval@gmail.com>
---
 include/linux/thermal.h | 56 +++++++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 54 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/thermal.h b/include/linux/thermal.h
index fc52e307efab..5eac316490ea 100644
--- a/include/linux/thermal.h
+++ b/include/linux/thermal.h
@@ -314,6 +314,8 @@ void thermal_zone_of_sensor_unregister(struct device *dev,
 }
 
 #endif
+
+#if IS_ENABLED(CONFIG_THERMAL)
 struct thermal_zone_device *thermal_zone_device_register(const char *, int, int,
 		void *, struct thermal_zone_device_ops *,
 		const struct thermal_zone_params *, int, int);
@@ -340,8 +342,58 @@ struct thermal_instance *get_thermal_instance(struct thermal_zone_device *,
 		struct thermal_cooling_device *, int);
 void thermal_cdev_update(struct thermal_cooling_device *);
 void thermal_notify_framework(struct thermal_zone_device *, int);
-
-#ifdef CONFIG_NET
+#else
+static inline struct thermal_zone_device *thermal_zone_device_register(
+	const char *type, int trips, int mask, void *devdata,
+	struct thermal_zone_device_ops *ops,
+	const struct thermal_zone_params *tzp,
+	int passive_delay, int polling_delay)
+{ return ERR_PTR(-ENODEV); }
+static inline void thermal_zone_device_unregister(
+	struct thermal_zone_device *tz)
+{ }
+static inline int thermal_zone_bind_cooling_device(
+	struct thermal_zone_device *tz, int trip,
+	struct thermal_cooling_device *cdev,
+	unsigned long upper, unsigned long lower)
+{ return -ENODEV; }
+static inline int thermal_zone_unbind_cooling_device(
+	struct thermal_zone_device *tz, int trip,
+	struct thermal_cooling_device *cdev)
+{ return -ENODEV; }
+static inline void thermal_zone_device_update(struct thermal_zone_device *tz)
+{ }
+static inline struct thermal_cooling_device *
+thermal_cooling_device_register(char *type, void *devdata,
+	const struct thermal_cooling_device_ops *ops)
+{ return ERR_PTR(-ENODEV); }
+static inline struct thermal_cooling_device *
+thermal_of_cooling_device_register(struct device_node *np,
+	char *type, void *devdata, const struct thermal_cooling_device_ops *ops)
+{ return ERR_PTR(-ENODEV); }
+static inline void thermal_cooling_device_unregister(
+	struct thermal_cooling_device *cdev)
+{ }
+static inline struct thermal_zone_device *thermal_zone_get_zone_by_name(
+		const char *name)
+{ return ERR_PTR(-ENODEV); }
+static inline int thermal_zone_get_temp(
+		struct thermal_zone_device *tz, unsigned long *temp)
+{ return -ENODEV; }
+static inline int get_tz_trend(struct thermal_zone_device *tz, int trip)
+{ return -ENODEV; }
+static inline struct thermal_instance *
+get_thermal_instance(struct thermal_zone_device *tz,
+	struct thermal_cooling_device *cdev, int trip)
+{ return ERR_PTR(-ENODEV); }
+static inline void thermal_cdev_update(struct thermal_cooling_device *cdev)
+{ }
+static inline void thermal_notify_framework(struct thermal_zone_device *tz,
+	int trip)
+{ }
+#endif /* CONFIG_THERMAL */
+
+#if defined(CONFIG_NET) && IS_ENABLED(CONFIG_THERMAL)
 extern int thermal_generate_netlink_event(struct thermal_zone_device *tz,
 						enum events event);
 #else
-- 
cgit v1.2.3-71-gd317


From 737eb0301f296d55c22350c6968ff1ef51bacb5f Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Fri, 20 Feb 2015 14:53:46 +0000
Subject: genirq / PM: better describe IRQF_NO_SUSPEND semantics

The IRQF_NO_SUSPEND flag is intended to be used for interrupts required
to be enabled during the suspend-resume cycle. This mostly consists of
IPIs and timer interrupts, potentially including chained irqchip
interrupts if these are necessary to handle timers or IPIs. If an
interrupt does not fall into one of the aforementioned categories,
requesting it with IRQF_NO_SUSPEND is likely incorrect.

Using IRQF_NO_SUSPEND does not guarantee that the interrupt can wake the
system from a suspended state. For an interrupt to be able to trigger a
wakeup, it may be necessary to program various components of the system.
In these cases it is necessary to use {enable,disabled}_irq_wake.

Unfortunately, several drivers assume that IRQF_NO_SUSPEND ensures that
an IRQ can wake up the system, and the documentation can be read
ambiguously w.r.t. this property.

This patch updates the documentation regarding IRQF_NO_SUSPEND to make
this caveat explicit, hopefully making future misuse rarer. Cleanup of
existing misuse will occur as part of later patch series.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 Documentation/power/suspend-and-interrupts.txt | 6 ++++--
 include/linux/interrupt.h                      | 4 +++-
 2 files changed, 7 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/power/suspend-and-interrupts.txt b/Documentation/power/suspend-and-interrupts.txt
index 2f9c5a5fcb25..50493c9284b4 100644
--- a/Documentation/power/suspend-and-interrupts.txt
+++ b/Documentation/power/suspend-and-interrupts.txt
@@ -40,8 +40,10 @@ but also to IPIs and to some other special-purpose interrupts.
 
 The IRQF_NO_SUSPEND flag is used to indicate that to the IRQ subsystem when
 requesting a special-purpose interrupt.  It causes suspend_device_irqs() to
-leave the corresponding IRQ enabled so as to allow the interrupt to work all
-the time as expected.
+leave the corresponding IRQ enabled so as to allow the interrupt to work as
+expected during the suspend-resume cycle, but does not guarantee that the
+interrupt will wake the system from a suspended state -- for such cases it is
+necessary to use enable_irq_wake().
 
 Note that the IRQF_NO_SUSPEND flag affects the entire IRQ and not just one
 user of it.  Thus, if the IRQ is shared, all of the interrupt handlers installed
diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h
index d9b05b5bf8c7..606771c7cac2 100644
--- a/include/linux/interrupt.h
+++ b/include/linux/interrupt.h
@@ -52,7 +52,9 @@
  * IRQF_ONESHOT - Interrupt is not reenabled after the hardirq handler finished.
  *                Used by threaded interrupts which need to keep the
  *                irq line disabled until the threaded handler has been run.
- * IRQF_NO_SUSPEND - Do not disable this IRQ during suspend
+ * IRQF_NO_SUSPEND - Do not disable this IRQ during suspend.  Does not guarantee
+ *                   that this interrupt will wake the system from a suspended
+ *                   state.  See Documentation/power/suspend-and-interrupts.txt
  * IRQF_FORCE_RESUME - Force enable it on resume even if IRQF_NO_SUSPEND is set
  * IRQF_NO_THREAD - Interrupt cannot be threaded
  * IRQF_EARLY_RESUME - Resume IRQ early during syscore instead of at device
-- 
cgit v1.2.3-71-gd317


From bc4b1f486fe69b86769e07c8edce472327a8462b Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan@kernel.org>
Date: Sun, 15 Feb 2015 11:57:53 +0700
Subject: Revert "USB: serial: make bulk_out_size a lower limit"

This reverts commit 5083fd7bdfe6760577235a724cf6dccae13652c2.

A bulk-out size smaller than the end-point size is indeed valid. The
offending commit broke the usb-debug driver for EHCI debug devices,
which use 8-byte buffers.

Fixes: 5083fd7bdfe6 ("USB: serial: make bulk_out_size a lower limit")
Reported-by: "Li, Elvin" <elvin.li@intel.com>
Cc: stable <stable@vger.kernel.org>	# v3.15
Signed-off-by: Johan Hovold <johan@kernel.org>
---
 drivers/usb/serial/usb-serial.c | 5 +++--
 include/linux/usb/serial.h      | 3 +--
 2 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/usb/serial/usb-serial.c b/drivers/usb/serial/usb-serial.c
index 475723c006f9..19842370a07f 100644
--- a/drivers/usb/serial/usb-serial.c
+++ b/drivers/usb/serial/usb-serial.c
@@ -940,8 +940,9 @@ static int usb_serial_probe(struct usb_interface *interface,
 		port = serial->port[i];
 		if (kfifo_alloc(&port->write_fifo, PAGE_SIZE, GFP_KERNEL))
 			goto probe_error;
-		buffer_size = max_t(int, serial->type->bulk_out_size,
-						usb_endpoint_maxp(endpoint));
+		buffer_size = serial->type->bulk_out_size;
+		if (!buffer_size)
+			buffer_size = usb_endpoint_maxp(endpoint);
 		port->bulk_out_size = buffer_size;
 		port->bulk_out_endpointAddress = endpoint->bEndpointAddress;
 
diff --git a/include/linux/usb/serial.h b/include/linux/usb/serial.h
index 9bb547c7bce7..704a1ab8240c 100644
--- a/include/linux/usb/serial.h
+++ b/include/linux/usb/serial.h
@@ -190,8 +190,7 @@ static inline void usb_set_serial_data(struct usb_serial *serial, void *data)
  * @num_ports: the number of different ports this device will have.
  * @bulk_in_size: minimum number of bytes to allocate for bulk-in buffer
  *	(0 = end-point size)
- * @bulk_out_size: minimum number of bytes to allocate for bulk-out buffer
- *	(0 = end-point size)
+ * @bulk_out_size: bytes to allocate for bulk-out buffer (0 = end-point size)
  * @calc_num_ports: pointer to a function to determine how many ports this
  *	device has dynamically.  It will be called after the probe()
  *	callback is called, but before attach()
-- 
cgit v1.2.3-71-gd317


From 4c4b52d9b2df45e8216d3e30b5452e4a364d2cac Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Wed, 25 Feb 2015 16:31:54 +0100
Subject: rhashtable: remove indirection for grow/shrink decision functions

Currently, all real users of rhashtable default their grow and shrink
decision functions to rht_grow_above_75() and rht_shrink_below_30(),
so that there's currently no need to have this explicitly selectable.

It can/should be generic and private inside rhashtable until a real
use case pops up. Since we can make this private, we'll save us this
additional indirection layer and can improve insertion/deletion time
as well.

Reference: http://patchwork.ozlabs.org/patch/443040/
Suggested-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Thomas Graf <tgraf@suug.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/rhashtable.h | 13 -----------
 lib/rhashtable.c           | 56 ++++++++++++++--------------------------------
 lib/test_rhashtable.c      |  1 +
 net/netfilter/nft_hash.c   |  2 --
 net/netlink/af_netlink.c   |  2 --
 net/tipc/socket.c          |  2 --
 6 files changed, 18 insertions(+), 58 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h
index cb2104be2135..d438eeb08bff 100644
--- a/include/linux/rhashtable.h
+++ b/include/linux/rhashtable.h
@@ -79,12 +79,6 @@ struct rhashtable;
  * @locks_mul: Number of bucket locks to allocate per cpu (default: 128)
  * @hashfn: Function to hash key
  * @obj_hashfn: Function to hash object
- * @grow_decision: If defined, may return true if table should expand
- * @shrink_decision: If defined, may return true if table should shrink
- *
- * Note: when implementing the grow and shrink decision function, min/max
- * shift must be enforced, otherwise, resizing watermarks they set may be
- * useless.
  */
 struct rhashtable_params {
 	size_t			nelem_hint;
@@ -98,10 +92,6 @@ struct rhashtable_params {
 	size_t			locks_mul;
 	rht_hashfn_t		hashfn;
 	rht_obj_hashfn_t	obj_hashfn;
-	bool			(*grow_decision)(const struct rhashtable *ht,
-						 size_t new_size);
-	bool			(*shrink_decision)(const struct rhashtable *ht,
-						   size_t new_size);
 };
 
 /**
@@ -193,9 +183,6 @@ int rhashtable_init(struct rhashtable *ht, struct rhashtable_params *params);
 void rhashtable_insert(struct rhashtable *ht, struct rhash_head *node);
 bool rhashtable_remove(struct rhashtable *ht, struct rhash_head *node);
 
-bool rht_grow_above_75(const struct rhashtable *ht, size_t new_size);
-bool rht_shrink_below_30(const struct rhashtable *ht, size_t new_size);
-
 int rhashtable_expand(struct rhashtable *ht);
 int rhashtable_shrink(struct rhashtable *ht);
 
diff --git a/lib/rhashtable.c b/lib/rhashtable.c
index bcf119bfdef4..090641db4c0d 100644
--- a/lib/rhashtable.c
+++ b/lib/rhashtable.c
@@ -247,26 +247,24 @@ static struct bucket_table *bucket_table_alloc(struct rhashtable *ht,
  * @ht:		hash table
  * @new_size:	new table size
  */
-bool rht_grow_above_75(const struct rhashtable *ht, size_t new_size)
+static bool rht_grow_above_75(const struct rhashtable *ht, size_t new_size)
 {
 	/* Expand table when exceeding 75% load */
 	return atomic_read(&ht->nelems) > (new_size / 4 * 3) &&
 	       (!ht->p.max_shift || atomic_read(&ht->shift) < ht->p.max_shift);
 }
-EXPORT_SYMBOL_GPL(rht_grow_above_75);
 
 /**
  * rht_shrink_below_30 - returns true if nelems < 0.3 * table-size
  * @ht:		hash table
  * @new_size:	new table size
  */
-bool rht_shrink_below_30(const struct rhashtable *ht, size_t new_size)
+static bool rht_shrink_below_30(const struct rhashtable *ht, size_t new_size)
 {
 	/* Shrink table beneath 30% load */
 	return atomic_read(&ht->nelems) < (new_size * 3 / 10) &&
 	       (atomic_read(&ht->shift) > ht->p.min_shift);
 }
-EXPORT_SYMBOL_GPL(rht_shrink_below_30);
 
 static void lock_buckets(struct bucket_table *new_tbl,
 			 struct bucket_table *old_tbl, unsigned int hash)
@@ -528,40 +526,19 @@ static void rht_deferred_worker(struct work_struct *work)
 	list_for_each_entry(walker, &ht->walkers, list)
 		walker->resize = true;
 
-	if (ht->p.grow_decision && ht->p.grow_decision(ht, tbl->size))
+	if (rht_grow_above_75(ht, tbl->size))
 		rhashtable_expand(ht);
-	else if (ht->p.shrink_decision && ht->p.shrink_decision(ht, tbl->size))
+	else if (rht_shrink_below_30(ht, tbl->size))
 		rhashtable_shrink(ht);
-
 unlock:
 	mutex_unlock(&ht->mutex);
 }
 
-static void rhashtable_probe_expand(struct rhashtable *ht)
-{
-	const struct bucket_table *new_tbl = rht_dereference_rcu(ht->future_tbl, ht);
-	const struct bucket_table *tbl = rht_dereference_rcu(ht->tbl, ht);
-
-	/* Only adjust the table if no resizing is currently in progress. */
-	if (tbl == new_tbl && ht->p.grow_decision &&
-	    ht->p.grow_decision(ht, tbl->size))
-		schedule_work(&ht->run_work);
-}
-
-static void rhashtable_probe_shrink(struct rhashtable *ht)
-{
-	const struct bucket_table *new_tbl = rht_dereference_rcu(ht->future_tbl, ht);
-	const struct bucket_table *tbl = rht_dereference_rcu(ht->tbl, ht);
-
-	/* Only adjust the table if no resizing is currently in progress. */
-	if (tbl == new_tbl && ht->p.shrink_decision &&
-	    ht->p.shrink_decision(ht, tbl->size))
-		schedule_work(&ht->run_work);
-}
-
 static void __rhashtable_insert(struct rhashtable *ht, struct rhash_head *obj,
-				struct bucket_table *tbl, u32 hash)
+				struct bucket_table *tbl,
+				const struct bucket_table *old_tbl, u32 hash)
 {
+	bool no_resize_running = tbl == old_tbl;
 	struct rhash_head *head;
 
 	hash = rht_bucket_index(tbl, hash);
@@ -577,8 +554,8 @@ static void __rhashtable_insert(struct rhashtable *ht, struct rhash_head *obj,
 	rcu_assign_pointer(tbl->buckets[hash], obj);
 
 	atomic_inc(&ht->nelems);
-
-	rhashtable_probe_expand(ht);
+	if (no_resize_running && rht_grow_above_75(ht, tbl->size))
+		schedule_work(&ht->run_work);
 }
 
 /**
@@ -608,7 +585,7 @@ void rhashtable_insert(struct rhashtable *ht, struct rhash_head *obj)
 	hash = obj_raw_hashfn(ht, rht_obj(ht, obj));
 
 	lock_buckets(tbl, old_tbl, hash);
-	__rhashtable_insert(ht, obj, tbl, hash);
+	__rhashtable_insert(ht, obj, tbl, old_tbl, hash);
 	unlock_buckets(tbl, old_tbl, hash);
 
 	rcu_read_unlock();
@@ -690,8 +667,11 @@ found:
 	unlock_buckets(new_tbl, old_tbl, new_hash);
 
 	if (ret) {
+		bool no_resize_running = new_tbl == old_tbl;
+
 		atomic_dec(&ht->nelems);
-		rhashtable_probe_shrink(ht);
+		if (no_resize_running && rht_shrink_below_30(ht, new_tbl->size))
+			schedule_work(&ht->run_work);
 	}
 
 	rcu_read_unlock();
@@ -861,7 +841,7 @@ bool rhashtable_lookup_compare_insert(struct rhashtable *ht,
 		goto exit;
 	}
 
-	__rhashtable_insert(ht, obj, new_tbl, new_hash);
+	__rhashtable_insert(ht, obj, new_tbl, old_tbl, new_hash);
 
 exit:
 	unlock_buckets(new_tbl, old_tbl, new_hash);
@@ -1123,8 +1103,7 @@ int rhashtable_init(struct rhashtable *ht, struct rhashtable_params *params)
 	if (!ht->p.hash_rnd)
 		get_random_bytes(&ht->p.hash_rnd, sizeof(ht->p.hash_rnd));
 
-	if (ht->p.grow_decision || ht->p.shrink_decision)
-		INIT_WORK(&ht->run_work, rht_deferred_worker);
+	INIT_WORK(&ht->run_work, rht_deferred_worker);
 
 	return 0;
 }
@@ -1142,8 +1121,7 @@ void rhashtable_destroy(struct rhashtable *ht)
 {
 	ht->being_destroyed = true;
 
-	if (ht->p.grow_decision || ht->p.shrink_decision)
-		cancel_work_sync(&ht->run_work);
+	cancel_work_sync(&ht->run_work);
 
 	mutex_lock(&ht->mutex);
 	bucket_table_free(rht_dereference(ht->tbl, ht));
diff --git a/lib/test_rhashtable.c b/lib/test_rhashtable.c
index f9e9d734446a..67c7593d1dd6 100644
--- a/lib/test_rhashtable.c
+++ b/lib/test_rhashtable.c
@@ -201,6 +201,7 @@ static int __init test_rht_init(void)
 		.key_offset = offsetof(struct test_obj, value),
 		.key_len = sizeof(int),
 		.hashfn = jhash,
+		.max_shift = 1, /* we expand/shrink manually here */
 		.nulls_base = (3U << RHT_BASE_SHIFT),
 	};
 	int err;
diff --git a/net/netfilter/nft_hash.c b/net/netfilter/nft_hash.c
index 61e6c407476a..c82df0a48fcd 100644
--- a/net/netfilter/nft_hash.c
+++ b/net/netfilter/nft_hash.c
@@ -192,8 +192,6 @@ static int nft_hash_init(const struct nft_set *set,
 		.key_offset = offsetof(struct nft_hash_elem, key),
 		.key_len = set->klen,
 		.hashfn = jhash,
-		.grow_decision = rht_grow_above_75,
-		.shrink_decision = rht_shrink_below_30,
 	};
 
 	return rhashtable_init(priv, &params);
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 2702673f0f23..05919bf3f670 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -3126,8 +3126,6 @@ static int __init netlink_proto_init(void)
 		.key_len = sizeof(u32), /* portid */
 		.hashfn = jhash,
 		.max_shift = 16, /* 64K */
-		.grow_decision = rht_grow_above_75,
-		.shrink_decision = rht_shrink_below_30,
 	};
 
 	if (err != 0)
diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index f73e975af80b..b4d4467d0bb0 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -2364,8 +2364,6 @@ int tipc_sk_rht_init(struct net *net)
 		.hashfn = jhash,
 		.max_shift = 20, /* 1M */
 		.min_shift = 8,  /* 256 */
-		.grow_decision = rht_grow_above_75,
-		.shrink_decision = rht_shrink_below_30,
 	};
 
 	return rhashtable_init(&tn->sk_rht, &rht_params);
-- 
cgit v1.2.3-71-gd317


From 140e049c64ce848392adbf4678983ecc76888dde Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trond.myklebust@primarydata.com>
Date: Thu, 26 Feb 2015 17:42:42 -0500
Subject: NFS: Add a helper to set attribute barriers

Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
Tested-by: Chuck Lever <chuck.lever@oracle.com>
---
 fs/nfs/inode.c         | 16 ++++++++++++++++
 include/linux/nfs_fs.h |  1 +
 2 files changed, 17 insertions(+)

(limited to 'include/linux')

diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 83107be3dd01..b0cbc1ba82da 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -1260,6 +1260,22 @@ void nfs_fattr_init(struct nfs_fattr *fattr)
 }
 EXPORT_SYMBOL_GPL(nfs_fattr_init);
 
+/**
+ * nfs_fattr_set_barrier
+ * @fattr: attributes
+ *
+ * Used to set a barrier after an attribute was updated. This
+ * barrier ensures that older attributes from RPC calls that may
+ * have raced with our update cannot clobber these new values.
+ * Note that you are still responsible for ensuring that other
+ * operations which change the attribute on the server do not
+ * collide.
+ */
+void nfs_fattr_set_barrier(struct nfs_fattr *fattr)
+{
+	fattr->gencount = nfs_inc_attr_generation_counter();
+}
+
 struct nfs_fattr *nfs_alloc_fattr(void)
 {
 	struct nfs_fattr *fattr;
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index 2f77e0c651c8..3a4ffb5856cd 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -369,6 +369,7 @@ extern struct nfs_lock_context *nfs_get_lock_context(struct nfs_open_context *ct
 extern void nfs_put_lock_context(struct nfs_lock_context *l_ctx);
 extern u64 nfs_compat_user_ino64(u64 fileid);
 extern void nfs_fattr_init(struct nfs_fattr *fattr);
+extern void nfs_fattr_set_barrier(struct nfs_fattr *fattr);
 extern unsigned long nfs_inc_attr_generation_counter(void);
 
 extern struct nfs_fattr *nfs_alloc_fattr(void);
-- 
cgit v1.2.3-71-gd317


From f044636d972246d451e06226cc1675d5da389762 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trond.myklebust@primarydata.com>
Date: Thu, 26 Feb 2015 16:09:04 -0500
Subject: NFS: Add attribute update barriers to nfs_setattr_update_inode()

Ensure that other operations which raced with our setattr RPC call
cannot revert the file attribute changes that were made on the server.
To do so, we artificially bump the attribute generation counter on
the inode so that all calls to nfs_fattr_init() that precede ours
will be dropped.

The motivation for the patch came from Chuck Lever's reports of readaheads
racing with truncate operations and causing the file size to be reverted.

Reported-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
Tested-by: Chuck Lever <chuck.lever@oracle.com>
---
 fs/nfs/inode.c         | 17 ++++++++++++-----
 fs/nfs/nfs3proc.c      |  2 +-
 fs/nfs/nfs4proc.c      |  6 +++---
 fs/nfs/proc.c          |  2 +-
 include/linux/nfs_fs.h |  2 +-
 5 files changed, 18 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index b0cbc1ba82da..3a2d127de499 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -556,6 +556,7 @@ EXPORT_SYMBOL_GPL(nfs_setattr);
  * This is a copy of the common vmtruncate, but with the locking
  * corrected to take into account the fact that NFS requires
  * inode->i_size to be updated under the inode->i_lock.
+ * Note: must be called with inode->i_lock held!
  */
 static int nfs_vmtruncate(struct inode * inode, loff_t offset)
 {
@@ -565,14 +566,14 @@ static int nfs_vmtruncate(struct inode * inode, loff_t offset)
 	if (err)
 		goto out;
 
-	spin_lock(&inode->i_lock);
 	i_size_write(inode, offset);
 	/* Optimisation */
 	if (offset == 0)
 		NFS_I(inode)->cache_validity &= ~NFS_INO_INVALID_DATA;
-	spin_unlock(&inode->i_lock);
 
+	spin_unlock(&inode->i_lock);
 	truncate_pagecache(inode, offset);
+	spin_lock(&inode->i_lock);
 out:
 	return err;
 }
@@ -585,10 +586,15 @@ out:
  * Note: we do this in the *proc.c in order to ensure that
  *       it works for things like exclusive creates too.
  */
-void nfs_setattr_update_inode(struct inode *inode, struct iattr *attr)
+void nfs_setattr_update_inode(struct inode *inode, struct iattr *attr,
+		struct nfs_fattr *fattr)
 {
+	/* Barrier: bump the attribute generation count. */
+	nfs_fattr_set_barrier(fattr);
+
+	spin_lock(&inode->i_lock);
+	NFS_I(inode)->attr_gencount = fattr->gencount;
 	if ((attr->ia_valid & (ATTR_MODE|ATTR_UID|ATTR_GID)) != 0) {
-		spin_lock(&inode->i_lock);
 		if ((attr->ia_valid & ATTR_MODE) != 0) {
 			int mode = attr->ia_mode & S_IALLUGO;
 			mode |= inode->i_mode & ~S_IALLUGO;
@@ -600,12 +606,13 @@ void nfs_setattr_update_inode(struct inode *inode, struct iattr *attr)
 			inode->i_gid = attr->ia_gid;
 		nfs_set_cache_invalid(inode, NFS_INO_INVALID_ACCESS
 				| NFS_INO_INVALID_ACL);
-		spin_unlock(&inode->i_lock);
 	}
 	if ((attr->ia_valid & ATTR_SIZE) != 0) {
 		nfs_inc_stats(inode, NFSIOS_SETATTRTRUNC);
 		nfs_vmtruncate(inode, attr->ia_size);
 	}
+	nfs_update_inode(inode, fattr);
+	spin_unlock(&inode->i_lock);
 }
 EXPORT_SYMBOL_GPL(nfs_setattr_update_inode);
 
diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c
index 78e557c3ab87..11109a137c0c 100644
--- a/fs/nfs/nfs3proc.c
+++ b/fs/nfs/nfs3proc.c
@@ -138,7 +138,7 @@ nfs3_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr,
 	nfs_fattr_init(fattr);
 	status = rpc_call_sync(NFS_CLIENT(inode), &msg, 0);
 	if (status == 0)
-		nfs_setattr_update_inode(inode, sattr);
+		nfs_setattr_update_inode(inode, sattr, fattr);
 	dprintk("NFS reply setattr: %d\n", status);
 	return status;
 }
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 4e41340e957d..c499e02a58ca 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -2416,8 +2416,8 @@ static int _nfs4_do_open(struct inode *dir,
 				opendata->o_res.f_attr, sattr,
 				state, label, olabel);
 		if (status == 0) {
-			nfs_setattr_update_inode(state->inode, sattr);
-			nfs_post_op_update_inode(state->inode, opendata->o_res.f_attr);
+			nfs_setattr_update_inode(state->inode, sattr,
+					opendata->o_res.f_attr);
 			nfs_setsecurity(state->inode, opendata->o_res.f_attr, olabel);
 		}
 	}
@@ -3291,7 +3291,7 @@ nfs4_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr,
 
 	status = nfs4_do_setattr(inode, cred, fattr, sattr, state, NULL, label);
 	if (status == 0) {
-		nfs_setattr_update_inode(inode, sattr);
+		nfs_setattr_update_inode(inode, sattr, fattr);
 		nfs_setsecurity(inode, fattr, label);
 	}
 	nfs4_label_free(label);
diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c
index b09cc23d6f43..6202bc0f11bb 100644
--- a/fs/nfs/proc.c
+++ b/fs/nfs/proc.c
@@ -139,7 +139,7 @@ nfs_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr,
 	nfs_fattr_init(fattr);
 	status = rpc_call_sync(NFS_CLIENT(inode), &msg, 0);
 	if (status == 0)
-		nfs_setattr_update_inode(inode, sattr);
+		nfs_setattr_update_inode(inode, sattr, fattr);
 	dprintk("NFS reply setattr: %d\n", status);
 	return status;
 }
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index 3a4ffb5856cd..f26e64e0aff8 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -356,7 +356,7 @@ extern int nfs_revalidate_inode_rcu(struct nfs_server *server, struct inode *ino
 extern int __nfs_revalidate_inode(struct nfs_server *, struct inode *);
 extern int nfs_revalidate_mapping(struct inode *inode, struct address_space *mapping);
 extern int nfs_setattr(struct dentry *, struct iattr *);
-extern void nfs_setattr_update_inode(struct inode *inode, struct iattr *attr);
+extern void nfs_setattr_update_inode(struct inode *inode, struct iattr *attr, struct nfs_fattr *);
 extern void nfs_setsecurity(struct inode *inode, struct nfs_fattr *fattr,
 				struct nfs4_label *label);
 extern struct nfs_open_context *get_nfs_open_context(struct nfs_open_context *ctx);
-- 
cgit v1.2.3-71-gd317


From a08a8cd375db9769588257e7782f6b6b68561b88 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trond.myklebust@primarydata.com>
Date: Thu, 26 Feb 2015 17:36:09 -0500
Subject: NFS: Add attribute update barriers to NFS writebacks

Ensure that other operations that race with our write RPC calls
cannot revert the file size updates that were made on the server.

Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
Tested-by: Chuck Lever <chuck.lever@oracle.com>
---
 fs/nfs/inode.c         | 25 ++++++++++++++++++++++---
 fs/nfs/internal.h      |  1 +
 fs/nfs/nfs3proc.c      |  2 +-
 fs/nfs/nfs4proc.c      |  2 +-
 fs/nfs/proc.c          |  4 +---
 fs/nfs/write.c         | 30 ++++++++++++++++++++++++++++++
 include/linux/nfs_fs.h |  1 +
 7 files changed, 57 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 299bf7171a4d..ff9a6795da46 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -1491,7 +1491,7 @@ int nfs_post_op_update_inode(struct inode *inode, struct nfs_fattr *fattr)
 EXPORT_SYMBOL_GPL(nfs_post_op_update_inode);
 
 /**
- * nfs_post_op_update_inode_force_wcc - try to update the inode attribute cache
+ * nfs_post_op_update_inode_force_wcc_locked - update the inode attribute cache
  * @inode - pointer to inode
  * @fattr - updated attributes
  *
@@ -1501,11 +1501,10 @@ EXPORT_SYMBOL_GPL(nfs_post_op_update_inode);
  *
  * This function is mainly designed to be used by the ->write_done() functions.
  */
-int nfs_post_op_update_inode_force_wcc(struct inode *inode, struct nfs_fattr *fattr)
+int nfs_post_op_update_inode_force_wcc_locked(struct inode *inode, struct nfs_fattr *fattr)
 {
 	int status;
 
-	spin_lock(&inode->i_lock);
 	/* Don't do a WCC update if these attributes are already stale */
 	if ((fattr->valid & NFS_ATTR_FATTR) == 0 ||
 			!nfs_inode_attrs_need_update(inode, fattr)) {
@@ -1537,6 +1536,26 @@ int nfs_post_op_update_inode_force_wcc(struct inode *inode, struct nfs_fattr *fa
 	}
 out_noforce:
 	status = nfs_post_op_update_inode_locked(inode, fattr);
+	return status;
+}
+
+/**
+ * nfs_post_op_update_inode_force_wcc - try to update the inode attribute cache
+ * @inode - pointer to inode
+ * @fattr - updated attributes
+ *
+ * After an operation that has changed the inode metadata, mark the
+ * attribute cache as being invalid, then try to update it. Fake up
+ * weak cache consistency data, if none exist.
+ *
+ * This function is mainly designed to be used by the ->write_done() functions.
+ */
+int nfs_post_op_update_inode_force_wcc(struct inode *inode, struct nfs_fattr *fattr)
+{
+	int status;
+
+	spin_lock(&inode->i_lock);
+	status = nfs_post_op_update_inode_force_wcc_locked(inode, fattr);
 	spin_unlock(&inode->i_lock);
 	return status;
 }
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index b802fb3a2d99..9e6475bc5ba2 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -459,6 +459,7 @@ void nfs_mark_request_commit(struct nfs_page *req,
 			     struct nfs_commit_info *cinfo,
 			     u32 ds_commit_idx);
 int nfs_write_need_commit(struct nfs_pgio_header *);
+void nfs_writeback_update_inode(struct nfs_pgio_header *hdr);
 int nfs_generic_commit_list(struct inode *inode, struct list_head *head,
 			    int how, struct nfs_commit_info *cinfo);
 void nfs_retry_commit(struct list_head *page_list,
diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c
index 11109a137c0c..1f11d2533ee4 100644
--- a/fs/nfs/nfs3proc.c
+++ b/fs/nfs/nfs3proc.c
@@ -834,7 +834,7 @@ static int nfs3_write_done(struct rpc_task *task, struct nfs_pgio_header *hdr)
 	if (nfs3_async_handle_jukebox(task, inode))
 		return -EAGAIN;
 	if (task->tk_status >= 0)
-		nfs_post_op_update_inode_force_wcc(inode, hdr->res.fattr);
+		nfs_writeback_update_inode(hdr);
 	return 0;
 }
 
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index c499e02a58ca..b022e64b76a5 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -4237,7 +4237,7 @@ static int nfs4_write_done_cb(struct rpc_task *task,
 	}
 	if (task->tk_status >= 0) {
 		renew_lease(NFS_SERVER(inode), hdr->timestamp);
-		nfs_post_op_update_inode_force_wcc(inode, &hdr->fattr);
+		nfs_writeback_update_inode(hdr);
 	}
 	return 0;
 }
diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c
index 6202bc0f11bb..c63189acd052 100644
--- a/fs/nfs/proc.c
+++ b/fs/nfs/proc.c
@@ -609,10 +609,8 @@ static int nfs_proc_pgio_rpc_prepare(struct rpc_task *task,
 
 static int nfs_write_done(struct rpc_task *task, struct nfs_pgio_header *hdr)
 {
-	struct inode *inode = hdr->inode;
-
 	if (task->tk_status >= 0)
-		nfs_post_op_update_inode_force_wcc(inode, hdr->res.fattr);
+		nfs_writeback_update_inode(hdr);
 	return 0;
 }
 
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 595d81e354d1..849ed784d6ac 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -1377,6 +1377,36 @@ static int nfs_should_remove_suid(const struct inode *inode)
 	return 0;
 }
 
+static void nfs_writeback_check_extend(struct nfs_pgio_header *hdr,
+		struct nfs_fattr *fattr)
+{
+	struct nfs_pgio_args *argp = &hdr->args;
+	struct nfs_pgio_res *resp = &hdr->res;
+
+	if (!(fattr->valid & NFS_ATTR_FATTR_SIZE))
+		return;
+	if (argp->offset + resp->count != fattr->size)
+		return;
+	if (nfs_size_to_loff_t(fattr->size) < i_size_read(hdr->inode))
+		return;
+	/* Set attribute barrier */
+	nfs_fattr_set_barrier(fattr);
+}
+
+void nfs_writeback_update_inode(struct nfs_pgio_header *hdr)
+{
+	struct nfs_fattr *fattr = hdr->res.fattr;
+	struct inode *inode = hdr->inode;
+
+	if (fattr == NULL)
+		return;
+	spin_lock(&inode->i_lock);
+	nfs_writeback_check_extend(hdr, fattr);
+	nfs_post_op_update_inode_force_wcc_locked(inode, fattr);
+	spin_unlock(&inode->i_lock);
+}
+EXPORT_SYMBOL_GPL(nfs_writeback_update_inode);
+
 /*
  * This function is called when the WRITE call is complete.
  */
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index f26e64e0aff8..59b1516b9fd4 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -343,6 +343,7 @@ extern struct inode *nfs_fhget(struct super_block *, struct nfs_fh *,
 extern int nfs_refresh_inode(struct inode *, struct nfs_fattr *);
 extern int nfs_post_op_update_inode(struct inode *inode, struct nfs_fattr *fattr);
 extern int nfs_post_op_update_inode_force_wcc(struct inode *inode, struct nfs_fattr *fattr);
+extern int nfs_post_op_update_inode_force_wcc_locked(struct inode *inode, struct nfs_fattr *fattr);
 extern int nfs_getattr(struct vfsmount *, struct dentry *, struct kstat *);
 extern void nfs_access_add_cache(struct inode *, struct nfs_access_entry *);
 extern void nfs_access_set_mask(struct nfs_access_entry *, u32);
-- 
cgit v1.2.3-71-gd317


From f5956fafb00afab474c3886b6297f9b5e7aff722 Mon Sep 17 00:00:00 2001
From: Or Gerlitz <ogerlitz@mellanox.com>
Date: Mon, 2 Mar 2015 18:22:15 +0200
Subject: net/mlx4_core: Fix wrong mask and error flow for the update-qp
 command

The bit mask for currently supported driver features (MLX4_UPDATE_QP_SUPPORTED_ATTRS)
of the update-qp command was defined twice (using enum value and pre-processor
define directive) and wrong.

The return value of the call to mlx4_update_qp() from within the SRIOV
resource-tracker was wrongly voided down.

Fix both issues.

issue: none
Fixes: 09e05c3f78e9 ('net/mlx4: Set vlan stripping policy by the right command')
Fixes: ce8d9e0d6746 ('net/mlx4_core: Add UPDATE_QP SRIOV wrapper support')
Signed-off-by: Matan Barak <matanb@mellanox.com>
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlx4/qp.c               | 1 -
 drivers/net/ethernet/mellanox/mlx4/resource_tracker.c | 9 ++++++---
 include/linux/mlx4/qp.h                               | 2 +-
 3 files changed, 7 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/mellanox/mlx4/qp.c b/drivers/net/ethernet/mellanox/mlx4/qp.c
index 2bb8553bd905..eda29dbbfcd2 100644
--- a/drivers/net/ethernet/mellanox/mlx4/qp.c
+++ b/drivers/net/ethernet/mellanox/mlx4/qp.c
@@ -412,7 +412,6 @@ err_icm:
 
 EXPORT_SYMBOL_GPL(mlx4_qp_alloc);
 
-#define MLX4_UPDATE_QP_SUPPORTED_ATTRS MLX4_UPDATE_QP_SMAC
 int mlx4_update_qp(struct mlx4_dev *dev, u32 qpn,
 		   enum mlx4_update_qp_attr attr,
 		   struct mlx4_update_qp_params *params)
diff --git a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c
index 486e3d26cd4a..d97ca88c55b5 100644
--- a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c
+++ b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c
@@ -713,7 +713,7 @@ static int update_vport_qp_param(struct mlx4_dev *dev,
 	struct mlx4_vport_oper_state *vp_oper;
 	struct mlx4_priv *priv;
 	u32 qp_type;
-	int port;
+	int port, err = 0;
 
 	port = (qpc->pri_path.sched_queue & 0x40) ? 2 : 1;
 	priv = mlx4_priv(dev);
@@ -738,7 +738,9 @@ static int update_vport_qp_param(struct mlx4_dev *dev,
 			} else {
 				struct mlx4_update_qp_params params = {.flags = 0};
 
-				mlx4_update_qp(dev, qpn, MLX4_UPDATE_QP_VSD, &params);
+				err = mlx4_update_qp(dev, qpn, MLX4_UPDATE_QP_VSD, &params);
+				if (err)
+					goto out;
 			}
 		}
 
@@ -773,7 +775,8 @@ static int update_vport_qp_param(struct mlx4_dev *dev,
 		qpc->pri_path.feup |= MLX4_FSM_FORCE_ETH_SRC_MAC;
 		qpc->pri_path.grh_mylmc = (0x80 & qpc->pri_path.grh_mylmc) + vp_oper->mac_idx;
 	}
-	return 0;
+out:
+	return err;
 }
 
 static int mpt_mask(struct mlx4_dev *dev)
diff --git a/include/linux/mlx4/qp.h b/include/linux/mlx4/qp.h
index 2bbc62aa818a..551f85456c11 100644
--- a/include/linux/mlx4/qp.h
+++ b/include/linux/mlx4/qp.h
@@ -427,7 +427,7 @@ struct mlx4_wqe_inline_seg {
 
 enum mlx4_update_qp_attr {
 	MLX4_UPDATE_QP_SMAC		= 1 << 0,
-	MLX4_UPDATE_QP_VSD		= 1 << 2,
+	MLX4_UPDATE_QP_VSD		= 1 << 1,
 	MLX4_UPDATE_QP_SUPPORTED_ATTRS	= (1 << 2) - 1
 };
 
-- 
cgit v1.2.3-71-gd317


From c6331ba3d2d68758f36dbc3e09e648d312c24d97 Mon Sep 17 00:00:00 2001
From: Marcin Bis <marcin@bis.org.pl>
Date: Sun, 1 Mar 2015 13:49:32 +0100
Subject: spi: fix a typo in comment.

alway -> always

Signed-off-by: Marcin Bis <marcin@bis.org.pl>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/linux/spi/spi.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h
index ed9489d893a4..856d34dde79b 100644
--- a/include/linux/spi/spi.h
+++ b/include/linux/spi/spi.h
@@ -649,7 +649,7 @@ struct spi_transfer {
  * sequence completes.  On some systems, many such sequences can execute as
  * as single programmed DMA transfer.  On all systems, these messages are
  * queued, and might complete after transactions to other devices.  Messages
- * sent to a given spi_device are alway executed in FIFO order.
+ * sent to a given spi_device are always executed in FIFO order.
  *
  * The code that submits an spi_message (and its spi_transfers)
  * to the lower layers is responsible for managing its memory.
-- 
cgit v1.2.3-71-gd317


From 874f946376de57c8d6230b30ad71f742883fee3a Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trond.myklebust@primarydata.com>
Date: Mon, 2 Mar 2015 23:32:08 -0500
Subject: NFS: Fix a regression in the read() syscall

When invalidating the page cache for a regular file, we want to first
sync all dirty data to disk and then call invalidate_inode_pages2().
The latter relies on nfs_launder_page() and nfs_release_page() to deal
respectively with dirty pages, and unstable written pages.

When commit 9590544694bec ("NFS: avoid deadlocks with loop-back mounted
NFS filesystems.") changed the behaviour of nfs_release_page(), then it
made it possible for invalidate_inode_pages2() to fail with an EBUSY.
Unfortunately, that error is then propagated back to read().

Let's therefore work around the problem for now by protecting the call
to sync the data and invalidate_inode_pages2() so that they are atomic
w.r.t. the addition of new writes.
Later on, we can revisit whether or not we still need nfs_launder_page()
and nfs_release_page().

Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 fs/nfs/file.c          |  4 ++--
 fs/nfs/inode.c         | 37 ++++++++++++++++++++++++++++++++++---
 include/linux/nfs_fs.h |  1 +
 3 files changed, 37 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index c045c7169fa0..41963ffca597 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -178,7 +178,7 @@ nfs_file_read(struct kiocb *iocb, struct iov_iter *to)
 		iocb->ki_filp,
 		iov_iter_count(to), (unsigned long) iocb->ki_pos);
 
-	result = nfs_revalidate_mapping(inode, iocb->ki_filp->f_mapping);
+	result = nfs_revalidate_mapping_protected(inode, iocb->ki_filp->f_mapping);
 	if (!result) {
 		result = generic_file_read_iter(iocb, to);
 		if (result > 0)
@@ -199,7 +199,7 @@ nfs_file_splice_read(struct file *filp, loff_t *ppos,
 	dprintk("NFS: splice_read(%pD2, %lu@%Lu)\n",
 		filp, (unsigned long) count, (unsigned long long) *ppos);
 
-	res = nfs_revalidate_mapping(inode, filp->f_mapping);
+	res = nfs_revalidate_mapping_protected(inode, filp->f_mapping);
 	if (!res) {
 		res = generic_file_splice_read(filp, ppos, pipe, count, flags);
 		if (res > 0)
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 5026c44a98e1..8edb7d049565 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -1067,11 +1067,14 @@ static bool nfs_mapping_need_revalidate_inode(struct inode *inode)
 }
 
 /**
- * nfs_revalidate_mapping - Revalidate the pagecache
+ * __nfs_revalidate_mapping - Revalidate the pagecache
  * @inode - pointer to host inode
  * @mapping - pointer to mapping
+ * @may_lock - take inode->i_mutex?
  */
-int nfs_revalidate_mapping(struct inode *inode, struct address_space *mapping)
+static int __nfs_revalidate_mapping(struct inode *inode,
+		struct address_space *mapping,
+		bool may_lock)
 {
 	struct nfs_inode *nfsi = NFS_I(inode);
 	unsigned long *bitlock = &nfsi->flags;
@@ -1120,7 +1123,12 @@ int nfs_revalidate_mapping(struct inode *inode, struct address_space *mapping)
 	nfsi->cache_validity &= ~NFS_INO_INVALID_DATA;
 	spin_unlock(&inode->i_lock);
 	trace_nfs_invalidate_mapping_enter(inode);
-	ret = nfs_invalidate_mapping(inode, mapping);
+	if (may_lock) {
+		mutex_lock(&inode->i_mutex);
+		ret = nfs_invalidate_mapping(inode, mapping);
+		mutex_unlock(&inode->i_mutex);
+	} else
+		ret = nfs_invalidate_mapping(inode, mapping);
 	trace_nfs_invalidate_mapping_exit(inode, ret);
 
 	clear_bit_unlock(NFS_INO_INVALIDATING, bitlock);
@@ -1130,6 +1138,29 @@ out:
 	return ret;
 }
 
+/**
+ * nfs_revalidate_mapping - Revalidate the pagecache
+ * @inode - pointer to host inode
+ * @mapping - pointer to mapping
+ */
+int nfs_revalidate_mapping(struct inode *inode, struct address_space *mapping)
+{
+	return __nfs_revalidate_mapping(inode, mapping, false);
+}
+
+/**
+ * nfs_revalidate_mapping_protected - Revalidate the pagecache
+ * @inode - pointer to host inode
+ * @mapping - pointer to mapping
+ *
+ * Differs from nfs_revalidate_mapping() in that it grabs the inode->i_mutex
+ * while invalidating the mapping.
+ */
+int nfs_revalidate_mapping_protected(struct inode *inode, struct address_space *mapping)
+{
+	return __nfs_revalidate_mapping(inode, mapping, true);
+}
+
 static unsigned long nfs_wcc_update_inode(struct inode *inode, struct nfs_fattr *fattr)
 {
 	struct nfs_inode *nfsi = NFS_I(inode);
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index 59b1516b9fd4..b01ccf371fdc 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -356,6 +356,7 @@ extern int nfs_revalidate_inode(struct nfs_server *server, struct inode *inode);
 extern int nfs_revalidate_inode_rcu(struct nfs_server *server, struct inode *inode);
 extern int __nfs_revalidate_inode(struct nfs_server *, struct inode *);
 extern int nfs_revalidate_mapping(struct inode *inode, struct address_space *mapping);
+extern int nfs_revalidate_mapping_protected(struct inode *inode, struct address_space *mapping);
 extern int nfs_setattr(struct dentry *, struct iattr *);
 extern void nfs_setattr_update_inode(struct inode *inode, struct iattr *attr, struct nfs_fattr *);
 extern void nfs_setsecurity(struct inode *inode, struct nfs_fattr *fattr,
-- 
cgit v1.2.3-71-gd317


From 17f480342026e54000731acaa69bf32787ce46cb Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Fri, 27 Feb 2015 00:07:55 +0100
Subject: genirq / PM: Add flag for shared NO_SUSPEND interrupt lines

It currently is required that all users of NO_SUSPEND interrupt
lines pass the IRQF_NO_SUSPEND flag when requesting the IRQ or the
WARN_ON_ONCE() in irq_pm_install_action() will trigger.  That is
done to warn about situations in which unprepared interrupt handlers
may be run unnecessarily for suspended devices and may attempt to
access those devices by mistake.  However, it may cause drivers
that have no technical reasons for using IRQF_NO_SUSPEND to set
that flag just because they happen to share the interrupt line
with something like a timer.

Moreover, the generic handling of wakeup interrupts introduced by
commit 9ce7a25849e8 (genirq: Simplify wakeup mechanism) only works
for IRQs without any NO_SUSPEND users, so the drivers of wakeup
devices needing to use shared NO_SUSPEND interrupt lines for
signaling system wakeup generally have to detect wakeup in their
interrupt handlers.  Thus if they happen to share an interrupt line
with a NO_SUSPEND user, they also need to request that their
interrupt handlers be run after suspend_device_irqs().

In both cases the reason for using IRQF_NO_SUSPEND is not because
the driver in question has a genuine need to run its interrupt
handler after suspend_device_irqs(), but because it happens to
share the line with some other NO_SUSPEND user.  Otherwise, the
driver would do without IRQF_NO_SUSPEND just fine.

To make it possible to specify that condition explicitly, introduce
a new IRQ action handler flag for shared IRQs, IRQF_COND_SUSPEND,
that, when set, will indicate to the IRQ core that the interrupt
user is generally fine with suspending the IRQ, but it also can
tolerate handler invocations after suspend_device_irqs() and, in
particular, it is capable of detecting system wakeup and triggering
it as appropriate from its interrupt handler.

That will allow us to work around a problem with a shared timer
interrupt line on at91 platforms.

Link: http://marc.info/?l=linux-kernel&m=142252777602084&w=2
Link: http://marc.info/?t=142252775300011&r=1&w=2
Link: https://lkml.org/lkml/2014/12/15/552
Reported-by: Boris Brezillon <boris.brezillon@free-electrons.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: Mark Rutland <mark.rutland@arm.com>
---
 include/linux/interrupt.h | 5 +++++
 include/linux/irqdesc.h   | 1 +
 kernel/irq/manage.c       | 7 ++++++-
 kernel/irq/pm.c           | 7 ++++++-
 4 files changed, 18 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h
index 606771c7cac2..2e88580194f0 100644
--- a/include/linux/interrupt.h
+++ b/include/linux/interrupt.h
@@ -59,6 +59,10 @@
  * IRQF_NO_THREAD - Interrupt cannot be threaded
  * IRQF_EARLY_RESUME - Resume IRQ early during syscore instead of at device
  *                resume time.
+ * IRQF_COND_SUSPEND - If the IRQ is shared with a NO_SUSPEND user, execute this
+ *                interrupt handler after suspending interrupts. For system
+ *                wakeup devices users need to implement wakeup detection in
+ *                their interrupt handlers.
  */
 #define IRQF_DISABLED		0x00000020
 #define IRQF_SHARED		0x00000080
@@ -72,6 +76,7 @@
 #define IRQF_FORCE_RESUME	0x00008000
 #define IRQF_NO_THREAD		0x00010000
 #define IRQF_EARLY_RESUME	0x00020000
+#define IRQF_COND_SUSPEND	0x00040000
 
 #define IRQF_TIMER		(__IRQF_TIMER | IRQF_NO_SUSPEND | IRQF_NO_THREAD)
 
diff --git a/include/linux/irqdesc.h b/include/linux/irqdesc.h
index faf433af425e..dd1109fb241e 100644
--- a/include/linux/irqdesc.h
+++ b/include/linux/irqdesc.h
@@ -78,6 +78,7 @@ struct irq_desc {
 #ifdef CONFIG_PM_SLEEP
 	unsigned int		nr_actions;
 	unsigned int		no_suspend_depth;
+	unsigned int		cond_suspend_depth;
 	unsigned int		force_resume_depth;
 #endif
 #ifdef CONFIG_PROC_FS
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index 196a06fbc122..886d09e691d5 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -1474,8 +1474,13 @@ int request_threaded_irq(unsigned int irq, irq_handler_t handler,
 	 * otherwise we'll have trouble later trying to figure out
 	 * which interrupt is which (messes up the interrupt freeing
 	 * logic etc).
+	 *
+	 * Also IRQF_COND_SUSPEND only makes sense for shared interrupts and
+	 * it cannot be set along with IRQF_NO_SUSPEND.
 	 */
-	if ((irqflags & IRQF_SHARED) && !dev_id)
+	if (((irqflags & IRQF_SHARED) && !dev_id) ||
+	    (!(irqflags & IRQF_SHARED) && (irqflags & IRQF_COND_SUSPEND)) ||
+	    ((irqflags & IRQF_NO_SUSPEND) && (irqflags & IRQF_COND_SUSPEND)))
 		return -EINVAL;
 
 	desc = irq_to_desc(irq);
diff --git a/kernel/irq/pm.c b/kernel/irq/pm.c
index 3ca532592704..5204a6d1b985 100644
--- a/kernel/irq/pm.c
+++ b/kernel/irq/pm.c
@@ -43,9 +43,12 @@ void irq_pm_install_action(struct irq_desc *desc, struct irqaction *action)
 
 	if (action->flags & IRQF_NO_SUSPEND)
 		desc->no_suspend_depth++;
+	else if (action->flags & IRQF_COND_SUSPEND)
+		desc->cond_suspend_depth++;
 
 	WARN_ON_ONCE(desc->no_suspend_depth &&
-		     desc->no_suspend_depth != desc->nr_actions);
+		     (desc->no_suspend_depth +
+			desc->cond_suspend_depth) != desc->nr_actions);
 }
 
 /*
@@ -61,6 +64,8 @@ void irq_pm_remove_action(struct irq_desc *desc, struct irqaction *action)
 
 	if (action->flags & IRQF_NO_SUSPEND)
 		desc->no_suspend_depth--;
+	else if (action->flags & IRQF_COND_SUSPEND)
+		desc->cond_suspend_depth--;
 }
 
 static bool suspend_device_irq(struct irq_desc *desc, int irq)
-- 
cgit v1.2.3-71-gd317


From 8603e1b30027f943cc9c1eef2b291d42c3347af1 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Thu, 5 Mar 2015 08:04:13 -0500
Subject: workqueue: fix hang involving racing cancel[_delayed]_work_sync()'s
 for PREEMPT_NONE

cancel[_delayed]_work_sync() are implemented using
__cancel_work_timer() which grabs the PENDING bit using
try_to_grab_pending() and then flushes the work item with PENDING set
to prevent the on-going execution of the work item from requeueing
itself.

try_to_grab_pending() can always grab PENDING bit without blocking
except when someone else is doing the above flushing during
cancelation.  In that case, try_to_grab_pending() returns -ENOENT.  In
this case, __cancel_work_timer() currently invokes flush_work().  The
assumption is that the completion of the work item is what the other
canceling task would be waiting for too and thus waiting for the same
condition and retrying should allow forward progress without excessive
busy looping

Unfortunately, this doesn't work if preemption is disabled or the
latter task has real time priority.  Let's say task A just got woken
up from flush_work() by the completion of the target work item.  If,
before task A starts executing, task B gets scheduled and invokes
__cancel_work_timer() on the same work item, its try_to_grab_pending()
will return -ENOENT as the work item is still being canceled by task A
and flush_work() will also immediately return false as the work item
is no longer executing.  This puts task B in a busy loop possibly
preventing task A from executing and clearing the canceling state on
the work item leading to a hang.

task A			task B			worker

						executing work
__cancel_work_timer()
  try_to_grab_pending()
  set work CANCELING
  flush_work()
    block for work completion
						completion, wakes up A
			__cancel_work_timer()
			while (forever) {
			  try_to_grab_pending()
			    -ENOENT as work is being canceled
			  flush_work()
			    false as work is no longer executing
			}

This patch removes the possible hang by updating __cancel_work_timer()
to explicitly wait for clearing of CANCELING rather than invoking
flush_work() after try_to_grab_pending() fails with -ENOENT.

Link: http://lkml.kernel.org/g/20150206171156.GA8942@axis.com

v3: bit_waitqueue() can't be used for work items defined in vmalloc
    area.  Switched to custom wake function which matches the target
    work item and exclusive wait and wakeup.

v2: v1 used wake_up() on bit_waitqueue() which leads to NULL deref if
    the target bit waitqueue has wait_bit_queue's on it.  Use
    DEFINE_WAIT_BIT() and __wake_up_bit() instead.  Reported by Tomeu
    Vizoso.

Signed-off-by: Tejun Heo <tj@kernel.org>
Reported-by: Rabin Vincent <rabin.vincent@axis.com>
Cc: Tomeu Vizoso <tomeu.vizoso@gmail.com>
Cc: stable@vger.kernel.org
Tested-by: Jesper Nilsson <jesper.nilsson@axis.com>
Tested-by: Rabin Vincent <rabin.vincent@axis.com>
---
 include/linux/workqueue.h |  3 ++-
 kernel/workqueue.c        | 56 +++++++++++++++++++++++++++++++++++++++++++----
 2 files changed, 54 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index 74db135f9957..f597846ff605 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -70,7 +70,8 @@ enum {
 	/* data contains off-queue information when !WORK_STRUCT_PWQ */
 	WORK_OFFQ_FLAG_BASE	= WORK_STRUCT_COLOR_SHIFT,
 
-	WORK_OFFQ_CANCELING	= (1 << WORK_OFFQ_FLAG_BASE),
+	__WORK_OFFQ_CANCELING	= WORK_OFFQ_FLAG_BASE,
+	WORK_OFFQ_CANCELING	= (1 << __WORK_OFFQ_CANCELING),
 
 	/*
 	 * When a work item is off queue, its high bits point to the last
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index f28849394791..41ff75b478c6 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -2728,19 +2728,57 @@ bool flush_work(struct work_struct *work)
 }
 EXPORT_SYMBOL_GPL(flush_work);
 
+struct cwt_wait {
+	wait_queue_t		wait;
+	struct work_struct	*work;
+};
+
+static int cwt_wakefn(wait_queue_t *wait, unsigned mode, int sync, void *key)
+{
+	struct cwt_wait *cwait = container_of(wait, struct cwt_wait, wait);
+
+	if (cwait->work != key)
+		return 0;
+	return autoremove_wake_function(wait, mode, sync, key);
+}
+
 static bool __cancel_work_timer(struct work_struct *work, bool is_dwork)
 {
+	static DECLARE_WAIT_QUEUE_HEAD(cancel_waitq);
 	unsigned long flags;
 	int ret;
 
 	do {
 		ret = try_to_grab_pending(work, is_dwork, &flags);
 		/*
-		 * If someone else is canceling, wait for the same event it
-		 * would be waiting for before retrying.
+		 * If someone else is already canceling, wait for it to
+		 * finish.  flush_work() doesn't work for PREEMPT_NONE
+		 * because we may get scheduled between @work's completion
+		 * and the other canceling task resuming and clearing
+		 * CANCELING - flush_work() will return false immediately
+		 * as @work is no longer busy, try_to_grab_pending() will
+		 * return -ENOENT as @work is still being canceled and the
+		 * other canceling task won't be able to clear CANCELING as
+		 * we're hogging the CPU.
+		 *
+		 * Let's wait for completion using a waitqueue.  As this
+		 * may lead to the thundering herd problem, use a custom
+		 * wake function which matches @work along with exclusive
+		 * wait and wakeup.
 		 */
-		if (unlikely(ret == -ENOENT))
-			flush_work(work);
+		if (unlikely(ret == -ENOENT)) {
+			struct cwt_wait cwait;
+
+			init_wait(&cwait.wait);
+			cwait.wait.func = cwt_wakefn;
+			cwait.work = work;
+
+			prepare_to_wait_exclusive(&cancel_waitq, &cwait.wait,
+						  TASK_UNINTERRUPTIBLE);
+			if (work_is_canceling(work))
+				schedule();
+			finish_wait(&cancel_waitq, &cwait.wait);
+		}
 	} while (unlikely(ret < 0));
 
 	/* tell other tasks trying to grab @work to back off */
@@ -2749,6 +2787,16 @@ static bool __cancel_work_timer(struct work_struct *work, bool is_dwork)
 
 	flush_work(work);
 	clear_work_data(work);
+
+	/*
+	 * Paired with prepare_to_wait() above so that either
+	 * waitqueue_active() is visible here or !work_is_canceling() is
+	 * visible there.
+	 */
+	smp_mb();
+	if (waitqueue_active(&cancel_waitq))
+		__wake_up(&cancel_waitq, TASK_NORMAL, 1, work);
+
 	return ret;
 }
 
-- 
cgit v1.2.3-71-gd317


From ef2b22ac540c018bd574d1846ab95b9bfcf38702 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Mon, 2 Mar 2015 22:26:55 +0100
Subject: cpuidle / sleep: Use broadcast timer for states that stop local timer

Commit 381063133246 (PM / sleep: Re-implement suspend-to-idle handling)
overlooked the fact that entering some sufficiently deep idle states
by CPUs may cause their local timers to stop and in those cases it
is necessary to switch over to a broadcast timer prior to entering
the idle state.  If the cpuidle driver in use does not provide
the new ->enter_freeze callback for any of the idle states, that
problem affects suspend-to-idle too, but it is not taken into account
after the changes made by commit 381063133246.

Fix that by changing the definition of cpuidle_enter_freeze() and
re-arranging of the code in cpuidle_idle_call(), so the former does
not call cpuidle_enter() any more and the fallback case is handled
by cpuidle_idle_call() directly.

Fixes: 381063133246 (PM / sleep: Re-implement suspend-to-idle handling)
Reported-and-tested-by: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
---
 drivers/cpuidle/cpuidle.c | 62 +++++++++++++++++------------------------------
 include/linux/cpuidle.h   | 17 +++++++++++--
 kernel/sched/idle.c       | 30 ++++++++++++++++-------
 3 files changed, 58 insertions(+), 51 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/cpuidle/cpuidle.c b/drivers/cpuidle/cpuidle.c
index 8b3e132b6a01..080bd2dbde4b 100644
--- a/drivers/cpuidle/cpuidle.c
+++ b/drivers/cpuidle/cpuidle.c
@@ -44,8 +44,8 @@ void disable_cpuidle(void)
 	off = 1;
 }
 
-static bool cpuidle_not_available(struct cpuidle_driver *drv,
-				  struct cpuidle_device *dev)
+bool cpuidle_not_available(struct cpuidle_driver *drv,
+			   struct cpuidle_device *dev)
 {
 	return off || !initialized || !drv || !dev || !dev->enabled;
 }
@@ -72,14 +72,8 @@ int cpuidle_play_dead(void)
 	return -ENODEV;
 }
 
-/**
- * cpuidle_find_deepest_state - Find deepest state meeting specific conditions.
- * @drv: cpuidle driver for the given CPU.
- * @dev: cpuidle device for the given CPU.
- * @freeze: Whether or not the state should be suitable for suspend-to-idle.
- */
-static int cpuidle_find_deepest_state(struct cpuidle_driver *drv,
-				      struct cpuidle_device *dev, bool freeze)
+static int find_deepest_state(struct cpuidle_driver *drv,
+			      struct cpuidle_device *dev, bool freeze)
 {
 	unsigned int latency_req = 0;
 	int i, ret = freeze ? -1 : CPUIDLE_DRIVER_STATE_START - 1;
@@ -98,6 +92,17 @@ static int cpuidle_find_deepest_state(struct cpuidle_driver *drv,
 	return ret;
 }
 
+/**
+ * cpuidle_find_deepest_state - Find the deepest available idle state.
+ * @drv: cpuidle driver for the given CPU.
+ * @dev: cpuidle device for the given CPU.
+ */
+int cpuidle_find_deepest_state(struct cpuidle_driver *drv,
+			       struct cpuidle_device *dev)
+{
+	return find_deepest_state(drv, dev, false);
+}
+
 static void enter_freeze_proper(struct cpuidle_driver *drv,
 				struct cpuidle_device *dev, int index)
 {
@@ -119,46 +124,26 @@ static void enter_freeze_proper(struct cpuidle_driver *drv,
 
 /**
  * cpuidle_enter_freeze - Enter an idle state suitable for suspend-to-idle.
+ * @drv: cpuidle driver for the given CPU.
+ * @dev: cpuidle device for the given CPU.
  *
  * If there are states with the ->enter_freeze callback, find the deepest of
- * them and enter it with frozen tick.  Otherwise, find the deepest state
- * available and enter it normally.
- *
- * Returns with enabled interrupts.
+ * them and enter it with frozen tick.
  */
-void cpuidle_enter_freeze(void)
+int cpuidle_enter_freeze(struct cpuidle_driver *drv, struct cpuidle_device *dev)
 {
-	struct cpuidle_device *dev = __this_cpu_read(cpuidle_devices);
-	struct cpuidle_driver *drv = cpuidle_get_cpu_driver(dev);
 	int index;
 
-	if (cpuidle_not_available(drv, dev))
-		goto fallback;
-
 	/*
 	 * Find the deepest state with ->enter_freeze present, which guarantees
 	 * that interrupts won't be enabled when it exits and allows the tick to
 	 * be frozen safely.
 	 */
-	index = cpuidle_find_deepest_state(drv, dev, true);
-	if (index >= 0) {
+	index = find_deepest_state(drv, dev, true);
+	if (index >= 0)
 		enter_freeze_proper(drv, dev, index);
-		local_irq_enable();
-		return;
-	}
 
-	/*
-	 * It is not safe to freeze the tick, find the deepest state available
-	 * at all and try to enter it normally.
-	 */
-	index = cpuidle_find_deepest_state(drv, dev, false);
-	if (index >= 0) {
-		cpuidle_enter(drv, dev, index);
-		return;
-	}
-
- fallback:
-	arch_cpu_idle();
+	return index;
 }
 
 /**
@@ -217,9 +202,6 @@ int cpuidle_enter_state(struct cpuidle_device *dev, struct cpuidle_driver *drv,
  */
 int cpuidle_select(struct cpuidle_driver *drv, struct cpuidle_device *dev)
 {
-	if (cpuidle_not_available(drv, dev))
-		return -ENODEV;
-
 	return cpuidle_curr_governor->select(drv, dev);
 }
 
diff --git a/include/linux/cpuidle.h b/include/linux/cpuidle.h
index f551a9299ac9..306178d7309f 100644
--- a/include/linux/cpuidle.h
+++ b/include/linux/cpuidle.h
@@ -126,6 +126,8 @@ struct cpuidle_driver {
 
 #ifdef CONFIG_CPU_IDLE
 extern void disable_cpuidle(void);
+extern bool cpuidle_not_available(struct cpuidle_driver *drv,
+				  struct cpuidle_device *dev);
 
 extern int cpuidle_select(struct cpuidle_driver *drv,
 			  struct cpuidle_device *dev);
@@ -150,11 +152,17 @@ extern void cpuidle_resume(void);
 extern int cpuidle_enable_device(struct cpuidle_device *dev);
 extern void cpuidle_disable_device(struct cpuidle_device *dev);
 extern int cpuidle_play_dead(void);
-extern void cpuidle_enter_freeze(void);
+extern int cpuidle_find_deepest_state(struct cpuidle_driver *drv,
+				      struct cpuidle_device *dev);
+extern int cpuidle_enter_freeze(struct cpuidle_driver *drv,
+				struct cpuidle_device *dev);
 
 extern struct cpuidle_driver *cpuidle_get_cpu_driver(struct cpuidle_device *dev);
 #else
 static inline void disable_cpuidle(void) { }
+static inline bool cpuidle_not_available(struct cpuidle_driver *drv,
+					 struct cpuidle_device *dev)
+{return true; }
 static inline int cpuidle_select(struct cpuidle_driver *drv,
 				 struct cpuidle_device *dev)
 {return -ENODEV; }
@@ -183,7 +191,12 @@ static inline int cpuidle_enable_device(struct cpuidle_device *dev)
 {return -ENODEV; }
 static inline void cpuidle_disable_device(struct cpuidle_device *dev) { }
 static inline int cpuidle_play_dead(void) {return -ENODEV; }
-static inline void cpuidle_enter_freeze(void) { }
+static inline int cpuidle_find_deepest_state(struct cpuidle_driver *drv,
+					     struct cpuidle_device *dev)
+{return -ENODEV; }
+static inline int cpuidle_enter_freeze(struct cpuidle_driver *drv,
+				       struct cpuidle_device *dev)
+{return -ENODEV; }
 static inline struct cpuidle_driver *cpuidle_get_cpu_driver(
 	struct cpuidle_device *dev) {return NULL; }
 #endif
diff --git a/kernel/sched/idle.c b/kernel/sched/idle.c
index 84b93b68482a..80014a178342 100644
--- a/kernel/sched/idle.c
+++ b/kernel/sched/idle.c
@@ -82,6 +82,7 @@ static void cpuidle_idle_call(void)
 	struct cpuidle_driver *drv = cpuidle_get_cpu_driver(dev);
 	int next_state, entered_state;
 	unsigned int broadcast;
+	bool reflect;
 
 	/*
 	 * Check if the idle task must be rescheduled. If it is the
@@ -105,6 +106,9 @@ static void cpuidle_idle_call(void)
 	 */
 	rcu_idle_enter();
 
+	if (cpuidle_not_available(drv, dev))
+		goto use_default;
+
 	/*
 	 * Suspend-to-idle ("freeze") is a system state in which all user space
 	 * has been frozen, all I/O devices have been suspended and the only
@@ -115,15 +119,22 @@ static void cpuidle_idle_call(void)
 	 * until a proper wakeup interrupt happens.
 	 */
 	if (idle_should_freeze()) {
-		cpuidle_enter_freeze();
-		goto exit_idle;
-	}
+		entered_state = cpuidle_enter_freeze(drv, dev);
+		if (entered_state >= 0) {
+			local_irq_enable();
+			goto exit_idle;
+		}
 
-	/*
-	 * Ask the cpuidle framework to choose a convenient idle state.
-	 * Fall back to the default arch idle method on errors.
-	 */
-	next_state = cpuidle_select(drv, dev);
+		reflect = false;
+		next_state = cpuidle_find_deepest_state(drv, dev);
+	} else {
+		reflect = true;
+		/*
+		 * Ask the cpuidle framework to choose a convenient idle state.
+		 */
+		next_state = cpuidle_select(drv, dev);
+	}
+	/* Fall back to the default arch idle method on errors. */
 	if (next_state < 0)
 		goto use_default;
 
@@ -170,7 +181,8 @@ static void cpuidle_idle_call(void)
 	/*
 	 * Give the governor an opportunity to reflect on the outcome
 	 */
-	cpuidle_reflect(dev, entered_state);
+	if (reflect)
+		cpuidle_reflect(dev, entered_state);
 
 exit_idle:
 	__current_set_polling();
-- 
cgit v1.2.3-71-gd317


From 2bb785169e9709d41220e5c18b0270883a82f85c Mon Sep 17 00:00:00 2001
From: Peter Hurley <peter@hurleysoftware.com>
Date: Sun, 1 Mar 2015 10:18:16 -0500
Subject: serial: core: Fix iotype userspace breakage

commit 3ffb1a8193bea ("serial: core: Add big-endian iotype")
re-numbered userspace-dependent values; ioctl(TIOCSSERIAL) can
assign the port iotype (which is expected to match the selected
i/o accessors), so iotype values must not be changed.

Cc: Kevin Cernekee <cernekee@gmail.com>
Cc: <stable@vger.kernel.org> # 3.19+
Signed-off-by: Peter Hurley <peter@hurleysoftware.com>
Reviewed-by: Kevin Cernekee <cernekee@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/serial_core.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h
index baf3e1d08416..1094f2d9cadb 100644
--- a/include/linux/serial_core.h
+++ b/include/linux/serial_core.h
@@ -147,9 +147,9 @@ struct uart_port {
 #define UPIO_HUB6		(1)			/* Hub6 ISA card */
 #define UPIO_MEM		(2)			/* 8b MMIO access */
 #define UPIO_MEM32		(3)			/* 32b little endian */
-#define UPIO_MEM32BE		(4)			/* 32b big endian */
-#define UPIO_AU			(5)			/* Au1x00 and RT288x type IO */
-#define UPIO_TSI		(6)			/* Tsi108/109 type IO */
+#define UPIO_AU			(4)			/* Au1x00 and RT288x type IO */
+#define UPIO_TSI		(5)			/* Tsi108/109 type IO */
+#define UPIO_MEM32BE		(6)			/* 32b big endian */
 
 	unsigned int		read_status_mask;	/* driver specific */
 	unsigned int		ignore_status_mask;	/* driver specific */
-- 
cgit v1.2.3-71-gd317


From 647f162b8e7e446c4bade031eb8a1a0a83d3de82 Mon Sep 17 00:00:00 2001
From: Peter Hurley <peter@hurleysoftware.com>
Date: Sun, 1 Mar 2015 10:24:28 -0500
Subject: serial: uapi: Declare all userspace-visible io types

ioctl(TIOCGSERIAL|TIOCSSERIAL) report and can change the port->iotype.
UART drivers use the UPIO_* definitions, but the uapi header defines
parallel values and userspace uses these parallel values for ioctls;
thus the userspace values are definitive.

Define UPIO_* iotypes in terms of the uapi defines, SERIAL_IO_*;
extend the uapi defines to include all values in use by the serial
core.

Signed-off-by: Peter Hurley <peter@hurleysoftware.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/serial_core.h | 14 +++++++-------
 include/uapi/linux/serial.h |  4 ++++
 2 files changed, 11 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h
index 1094f2d9cadb..d10965f0d8a4 100644
--- a/include/linux/serial_core.h
+++ b/include/linux/serial_core.h
@@ -143,13 +143,13 @@ struct uart_port {
 	unsigned char		iotype;			/* io access style */
 	unsigned char		unused1;
 
-#define UPIO_PORT		(0)			/* 8b I/O port access */
-#define UPIO_HUB6		(1)			/* Hub6 ISA card */
-#define UPIO_MEM		(2)			/* 8b MMIO access */
-#define UPIO_MEM32		(3)			/* 32b little endian */
-#define UPIO_AU			(4)			/* Au1x00 and RT288x type IO */
-#define UPIO_TSI		(5)			/* Tsi108/109 type IO */
-#define UPIO_MEM32BE		(6)			/* 32b big endian */
+#define UPIO_PORT		(SERIAL_IO_PORT)	/* 8b I/O port access */
+#define UPIO_HUB6		(SERIAL_IO_HUB6)	/* Hub6 ISA card */
+#define UPIO_MEM		(SERIAL_IO_MEM)		/* 8b MMIO access */
+#define UPIO_MEM32		(SERIAL_IO_MEM32)	/* 32b little endian */
+#define UPIO_AU			(SERIAL_IO_AU)		/* Au1x00 and RT288x type IO */
+#define UPIO_TSI		(SERIAL_IO_TSI)		/* Tsi108/109 type IO */
+#define UPIO_MEM32BE		(SERIAL_IO_MEM32BE)	/* 32b big endian */
 
 	unsigned int		read_status_mask;	/* driver specific */
 	unsigned int		ignore_status_mask;	/* driver specific */
diff --git a/include/uapi/linux/serial.h b/include/uapi/linux/serial.h
index 5e0d0ed61cf3..25331f9faa76 100644
--- a/include/uapi/linux/serial.h
+++ b/include/uapi/linux/serial.h
@@ -65,6 +65,10 @@ struct serial_struct {
 #define SERIAL_IO_PORT	0
 #define SERIAL_IO_HUB6	1
 #define SERIAL_IO_MEM	2
+#define SERIAL_IO_MEM32	  3
+#define SERIAL_IO_AU	  4
+#define SERIAL_IO_TSI	  5
+#define SERIAL_IO_MEM32BE 6
 
 #define UART_CLEAR_FIFO		0x01
 #define UART_USE_FIFO		0x02
-- 
cgit v1.2.3-71-gd317


From f54b97ed0b17d3da5f98ba8188cd5646415a922d Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Fri, 6 Mar 2015 16:37:41 +0000
Subject: irqchip: gicv3-its: Allocate enough memory for the full range of
 DeviceID

The ITS table allocator is only allocating a single page per table.
This works fine for most things, but leads to silent lack of
interrupt delivery if we end-up with a device that has an ID that is
out of the range defined by a single page of memory. Even worse, depending
on the page size, behaviour changes, which is not a very good experience.

A solution is actually to allocate memory for the full range of ID that
the ITS supports. A massive waste memory wise, but at least a safe bet.

Tested on a Phytium SoC.

Tested-by: Chen Baozi <chenbaozi@kylinos.com.cn>
Acked-by: Chen Baozi <chenbaozi@kylinos.com.cn>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Link: https://lkml.kernel.org/r/1425659870-11832-3-git-send-email-marc.zyngier@arm.com
Signed-off-by: Jason Cooper <jason@lakedaemon.net>
---
 drivers/irqchip/irq-gic-v3-its.c   | 25 +++++++++++++++++++++----
 include/linux/irqchip/arm-gic-v3.h |  2 ++
 2 files changed, 23 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c
index c217ebcf7a48..733b32fda390 100644
--- a/drivers/irqchip/irq-gic-v3-its.c
+++ b/drivers/irqchip/irq-gic-v3-its.c
@@ -806,14 +806,31 @@ static int its_alloc_tables(struct its_node *its)
 		u64 val = readq_relaxed(its->base + GITS_BASER + i * 8);
 		u64 type = GITS_BASER_TYPE(val);
 		u64 entry_size = GITS_BASER_ENTRY_SIZE(val);
+		int order = 0;
+		int alloc_size;
 		u64 tmp;
 		void *base;
 
 		if (type == GITS_BASER_TYPE_NONE)
 			continue;
 
-		/* We're lazy and only allocate a single page for now */
-		base = (void *)get_zeroed_page(GFP_KERNEL);
+		/*
+		 * Allocate as many entries as required to fit the
+		 * range of device IDs that the ITS can grok... The ID
+		 * space being incredibly sparse, this results in a
+		 * massive waste of memory.
+		 *
+		 * For other tables, only allocate a single page.
+		 */
+		if (type == GITS_BASER_TYPE_DEVICE) {
+			u64 typer = readq_relaxed(its->base + GITS_TYPER);
+			u32 ids = GITS_TYPER_DEVBITS(typer);
+
+			order = get_order((1UL << ids) * entry_size);
+		}
+
+		alloc_size = (1 << order) * PAGE_SIZE;
+		base = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, order);
 		if (!base) {
 			err = -ENOMEM;
 			goto out_free;
@@ -841,7 +858,7 @@ retry_baser:
 			break;
 		}
 
-		val |= (PAGE_SIZE / psz) - 1;
+		val |= (alloc_size / psz) - 1;
 
 		writeq_relaxed(val, its->base + GITS_BASER + i * 8);
 		tmp = readq_relaxed(its->base + GITS_BASER + i * 8);
@@ -882,7 +899,7 @@ retry_baser:
 		}
 
 		pr_info("ITS: allocated %d %s @%lx (psz %dK, shr %d)\n",
-			(int)(PAGE_SIZE / entry_size),
+			(int)(alloc_size / entry_size),
 			its_base_type_string[type],
 			(unsigned long)virt_to_phys(base),
 			psz / SZ_1K, (int)shr >> GITS_BASER_SHAREABILITY_SHIFT);
diff --git a/include/linux/irqchip/arm-gic-v3.h b/include/linux/irqchip/arm-gic-v3.h
index 800544bc7bfd..cbdd440d486d 100644
--- a/include/linux/irqchip/arm-gic-v3.h
+++ b/include/linux/irqchip/arm-gic-v3.h
@@ -166,6 +166,8 @@
 
 #define GITS_TRANSLATER			0x10040
 
+#define GITS_TYPER_DEVBITS_SHIFT	13
+#define GITS_TYPER_DEVBITS(r)		((((r) >> GITS_TYPER_DEVBITS_SHIFT) & 0x1f) + 1)
 #define GITS_TYPER_PTA			(1UL << 19)
 
 #define GITS_CBASER_VALID		(1UL << 63)
-- 
cgit v1.2.3-71-gd317


From 7cb991164a46992a499ecdc77b17f8ac94bdb75f Mon Sep 17 00:00:00 2001
From: Yun Wu <wuyun.wu@huawei.com>
Date: Fri, 6 Mar 2015 16:37:49 +0000
Subject: irqchip: gicv3-its: Define macros for GITS_CTLR fields

Define macros for GITS_CTLR fields to avoid using magic numbers.

Acked-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Yun Wu <wuyun.wu@huawei.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Link: https://lkml.kernel.org/r/1425659870-11832-11-git-send-email-marc.zyngier@arm.com
Signed-off-by: Jason Cooper <jason@lakedaemon.net>
---
 drivers/irqchip/irq-gic-v3-its.c   | 2 +-
 include/linux/irqchip/arm-gic-v3.h | 3 +++
 2 files changed, 4 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c
index ec20d4a942e0..826da706be4b 100644
--- a/drivers/irqchip/irq-gic-v3-its.c
+++ b/drivers/irqchip/irq-gic-v3-its.c
@@ -1388,7 +1388,7 @@ static int its_probe(struct device_node *node, struct irq_domain *parent)
 	writeq_relaxed(baser, its->base + GITS_CBASER);
 	tmp = readq_relaxed(its->base + GITS_CBASER);
 	writeq_relaxed(0, its->base + GITS_CWRITER);
-	writel_relaxed(1, its->base + GITS_CTLR);
+	writel_relaxed(GITS_CTLR_ENABLE, its->base + GITS_CTLR);
 
 	if ((tmp ^ baser) & GITS_BASER_SHAREABILITY_MASK) {
 		pr_info("ITS: using cache flushing for cmd queue\n");
diff --git a/include/linux/irqchip/arm-gic-v3.h b/include/linux/irqchip/arm-gic-v3.h
index cbdd440d486d..781974afff9f 100644
--- a/include/linux/irqchip/arm-gic-v3.h
+++ b/include/linux/irqchip/arm-gic-v3.h
@@ -166,6 +166,9 @@
 
 #define GITS_TRANSLATER			0x10040
 
+#define GITS_CTLR_ENABLE		(1U << 0)
+#define GITS_CTLR_QUIESCENT		(1U << 31)
+
 #define GITS_TYPER_DEVBITS_SHIFT	13
 #define GITS_TYPER_DEVBITS(r)		((((r) >> GITS_TYPER_DEVBITS_SHIFT) & 0x1f) + 1)
 #define GITS_TYPER_PTA			(1UL << 19)
-- 
cgit v1.2.3-71-gd317


From 3d3801effda19b21012b5d1981e96cc277df85fd Mon Sep 17 00:00:00 2001
From: Michael Turquette <mturquette@linaro.org>
Date: Wed, 25 Feb 2015 09:11:01 -0800
Subject: clk: introduce clk_is_match

Some drivers compare struct clk pointers as a means of knowing
if the two pointers reference the same clock hardware. This behavior is
dubious (drivers must not dereference struct clk), but did not cause any
regressions until the per-user struct clk patch was merged. Now the test
for matching clk's will always fail with per-user struct clk's.

clk_is_match is introduced to fix the regression and prevent drivers
from comparing the pointers manually.

Fixes: 035a61c314eb ("clk: Make clk API return per-user struct clk instances")
Cc: Russell King <linux@arm.linux.org.uk>
Cc: Shawn Guo <shawn.guo@linaro.org>
Cc: Tomeu Vizoso <tomeu.vizoso@collabora.com>
Signed-off-by: Michael Turquette <mturquette@linaro.org>
[arnd@arndb.de: Fix COMMON_CLK=N && HAS_CLK=Y config]
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
[sboyd@codeaurora.org: const arguments to clk_is_match() and
remove unnecessary ternary operation]
Signed-off-by: Stephen Boyd <sboyd@codeaurora.org>
---
 drivers/clk/clk.c   | 26 ++++++++++++++++++++++++++
 include/linux/clk.h | 18 ++++++++++++++++++
 2 files changed, 44 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/clk/clk.c b/drivers/clk/clk.c
index b9f85fc2ce3f..237f23f68bfc 100644
--- a/drivers/clk/clk.c
+++ b/drivers/clk/clk.c
@@ -2169,6 +2169,32 @@ int clk_get_phase(struct clk *clk)
 	return clk_core_get_phase(clk->core);
 }
 
+/**
+ * clk_is_match - check if two clk's point to the same hardware clock
+ * @p: clk compared against q
+ * @q: clk compared against p
+ *
+ * Returns true if the two struct clk pointers both point to the same hardware
+ * clock node. Put differently, returns true if struct clk *p and struct clk *q
+ * share the same struct clk_core object.
+ *
+ * Returns false otherwise. Note that two NULL clks are treated as matching.
+ */
+bool clk_is_match(const struct clk *p, const struct clk *q)
+{
+	/* trivial case: identical struct clk's or both NULL */
+	if (p == q)
+		return true;
+
+	/* true if clk->core pointers match. Avoid derefing garbage */
+	if (!IS_ERR_OR_NULL(p) && !IS_ERR_OR_NULL(q))
+		if (p->core == q->core)
+			return true;
+
+	return false;
+}
+EXPORT_SYMBOL_GPL(clk_is_match);
+
 /**
  * __clk_init - initialize the data structures in a struct clk
  * @dev:	device initializing this clk, placeholder for now
diff --git a/include/linux/clk.h b/include/linux/clk.h
index 8381bbfbc308..68c16a6bedb3 100644
--- a/include/linux/clk.h
+++ b/include/linux/clk.h
@@ -125,6 +125,19 @@ int clk_set_phase(struct clk *clk, int degrees);
  */
 int clk_get_phase(struct clk *clk);
 
+/**
+ * clk_is_match - check if two clk's point to the same hardware clock
+ * @p: clk compared against q
+ * @q: clk compared against p
+ *
+ * Returns true if the two struct clk pointers both point to the same hardware
+ * clock node. Put differently, returns true if struct clk *p and struct clk *q
+ * share the same struct clk_core object.
+ *
+ * Returns false otherwise. Note that two NULL clks are treated as matching.
+ */
+bool clk_is_match(const struct clk *p, const struct clk *q);
+
 #else
 
 static inline long clk_get_accuracy(struct clk *clk)
@@ -142,6 +155,11 @@ static inline long clk_get_phase(struct clk *clk)
 	return -ENOTSUPP;
 }
 
+static inline bool clk_is_match(const struct clk *p, const struct clk *q)
+{
+	return p == q;
+}
+
 #endif
 
 /**
-- 
cgit v1.2.3-71-gd317


From a5af5aa8b67dfdba36c853b70564fd2dfe73d478 Mon Sep 17 00:00:00 2001
From: Andrey Ryabinin <a.ryabinin@samsung.com>
Date: Thu, 12 Mar 2015 16:26:11 -0700
Subject: kasan, module, vmalloc: rework shadow allocation for modules

Current approach in handling shadow memory for modules is broken.

Shadow memory could be freed only after memory shadow corresponds it is no
longer used.  vfree() called from interrupt context could use memory its
freeing to store 'struct llist_node' in it:

    void vfree(const void *addr)
    {
    ...
        if (unlikely(in_interrupt())) {
            struct vfree_deferred *p = this_cpu_ptr(&vfree_deferred);
            if (llist_add((struct llist_node *)addr, &p->list))
                    schedule_work(&p->wq);

Later this list node used in free_work() which actually frees memory.
Currently module_memfree() called in interrupt context will free shadow
before freeing module's memory which could provoke kernel crash.

So shadow memory should be freed after module's memory.  However, such
deallocation order could race with kasan_module_alloc() in module_alloc().

Free shadow right before releasing vm area.  At this point vfree()'d
memory is not used anymore and yet not available for other allocations.
New VM_KASAN flag used to indicate that vm area has dynamically allocated
shadow memory so kasan frees shadow only if it was previously allocated.

Signed-off-by: Andrey Ryabinin <a.ryabinin@samsung.com>
Acked-by: Rusty Russell <rusty@rustcorp.com.au>
Cc: Dmitry Vyukov <dvyukov@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/kasan.h   |  5 +++--
 include/linux/vmalloc.h |  1 +
 kernel/module.c         |  2 --
 mm/kasan/kasan.c        | 14 +++++++++++---
 mm/vmalloc.c            |  1 +
 5 files changed, 16 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/kasan.h b/include/linux/kasan.h
index 72ba725ddf9c..5fa48a21d73e 100644
--- a/include/linux/kasan.h
+++ b/include/linux/kasan.h
@@ -5,6 +5,7 @@
 
 struct kmem_cache;
 struct page;
+struct vm_struct;
 
 #ifdef CONFIG_KASAN
 
@@ -52,7 +53,7 @@ void kasan_slab_free(struct kmem_cache *s, void *object);
 #define MODULE_ALIGN (PAGE_SIZE << KASAN_SHADOW_SCALE_SHIFT)
 
 int kasan_module_alloc(void *addr, size_t size);
-void kasan_module_free(void *addr);
+void kasan_free_shadow(const struct vm_struct *vm);
 
 #else /* CONFIG_KASAN */
 
@@ -82,7 +83,7 @@ static inline void kasan_slab_alloc(struct kmem_cache *s, void *object) {}
 static inline void kasan_slab_free(struct kmem_cache *s, void *object) {}
 
 static inline int kasan_module_alloc(void *addr, size_t size) { return 0; }
-static inline void kasan_module_free(void *addr) {}
+static inline void kasan_free_shadow(const struct vm_struct *vm) {}
 
 #endif /* CONFIG_KASAN */
 
diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h
index 7d7acb35603d..0ec598381f97 100644
--- a/include/linux/vmalloc.h
+++ b/include/linux/vmalloc.h
@@ -17,6 +17,7 @@ struct vm_area_struct;		/* vma defining user mapping in mm_types.h */
 #define VM_VPAGES		0x00000010	/* buffer for pages was vmalloc'ed */
 #define VM_UNINITIALIZED	0x00000020	/* vm_struct is not fully initialized */
 #define VM_NO_GUARD		0x00000040      /* don't add guard page */
+#define VM_KASAN		0x00000080      /* has allocated kasan shadow memory */
 /* bits [20..32] reserved for arch specific ioremap internals */
 
 /*
diff --git a/kernel/module.c b/kernel/module.c
index cc93cf68653c..b3d634ed06c9 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -56,7 +56,6 @@
 #include <linux/async.h>
 #include <linux/percpu.h>
 #include <linux/kmemleak.h>
-#include <linux/kasan.h>
 #include <linux/jump_label.h>
 #include <linux/pfn.h>
 #include <linux/bsearch.h>
@@ -1814,7 +1813,6 @@ static void unset_module_init_ro_nx(struct module *mod) { }
 void __weak module_memfree(void *module_region)
 {
 	vfree(module_region);
-	kasan_module_free(module_region);
 }
 
 void __weak module_arch_cleanup(struct module *mod)
diff --git a/mm/kasan/kasan.c b/mm/kasan/kasan.c
index 78fee632a7ee..936d81661c47 100644
--- a/mm/kasan/kasan.c
+++ b/mm/kasan/kasan.c
@@ -29,6 +29,7 @@
 #include <linux/stacktrace.h>
 #include <linux/string.h>
 #include <linux/types.h>
+#include <linux/vmalloc.h>
 #include <linux/kasan.h>
 
 #include "kasan.h"
@@ -414,12 +415,19 @@ int kasan_module_alloc(void *addr, size_t size)
 			GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO,
 			PAGE_KERNEL, VM_NO_GUARD, NUMA_NO_NODE,
 			__builtin_return_address(0));
-	return ret ? 0 : -ENOMEM;
+
+	if (ret) {
+		find_vm_area(addr)->flags |= VM_KASAN;
+		return 0;
+	}
+
+	return -ENOMEM;
 }
 
-void kasan_module_free(void *addr)
+void kasan_free_shadow(const struct vm_struct *vm)
 {
-	vfree(kasan_mem_to_shadow(addr));
+	if (vm->flags & VM_KASAN)
+		vfree(kasan_mem_to_shadow(vm->addr));
 }
 
 static void register_global(struct kasan_global *global)
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index 35b25e1340ca..49abccf29a29 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -1418,6 +1418,7 @@ struct vm_struct *remove_vm_area(const void *addr)
 		spin_unlock(&vmap_area_lock);
 
 		vmap_debug_free_range(va->va_start, va->va_end);
+		kasan_free_shadow(vm);
 		free_unmap_vmap_area(va);
 		vm->size -= PAGE_SIZE;
 
-- 
cgit v1.2.3-71-gd317


From d3733e5c98e952d419e77fa721912f09d15a2806 Mon Sep 17 00:00:00 2001
From: Andrey Ryabinin <a.ryabinin@samsung.com>
Date: Thu, 12 Mar 2015 16:26:14 -0700
Subject: kasan, module: move MODULE_ALIGN macro into <linux/moduleloader.h>

include/linux/moduleloader.h is more suitable place for this macro.
Also change alignment to PAGE_SIZE for CONFIG_KASAN=n as such
alignment already assumed in several places.

Signed-off-by: Andrey Ryabinin <a.ryabinin@samsung.com>
Cc: Dmitry Vyukov <dvyukov@google.com>
Acked-by: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/kasan.h        | 4 ----
 include/linux/moduleloader.h | 8 ++++++++
 2 files changed, 8 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/kasan.h b/include/linux/kasan.h
index 5fa48a21d73e..5bb074431eb0 100644
--- a/include/linux/kasan.h
+++ b/include/linux/kasan.h
@@ -50,15 +50,11 @@ void kasan_krealloc(const void *object, size_t new_size);
 void kasan_slab_alloc(struct kmem_cache *s, void *object);
 void kasan_slab_free(struct kmem_cache *s, void *object);
 
-#define MODULE_ALIGN (PAGE_SIZE << KASAN_SHADOW_SCALE_SHIFT)
-
 int kasan_module_alloc(void *addr, size_t size);
 void kasan_free_shadow(const struct vm_struct *vm);
 
 #else /* CONFIG_KASAN */
 
-#define MODULE_ALIGN 1
-
 static inline void kasan_unpoison_shadow(const void *address, size_t size) {}
 
 static inline void kasan_enable_current(void) {}
diff --git a/include/linux/moduleloader.h b/include/linux/moduleloader.h
index f7556261fe3c..4d0cb9bba93e 100644
--- a/include/linux/moduleloader.h
+++ b/include/linux/moduleloader.h
@@ -84,4 +84,12 @@ void module_arch_cleanup(struct module *mod);
 
 /* Any cleanup before freeing mod->module_init */
 void module_arch_freeing_init(struct module *mod);
+
+#ifdef CONFIG_KASAN
+#include <linux/kasan.h>
+#define MODULE_ALIGN (PAGE_SIZE << KASAN_SHADOW_SCALE_SHIFT)
+#else
+#define MODULE_ALIGN PAGE_SIZE
+#endif
+
 #endif
-- 
cgit v1.2.3-71-gd317


From a697c2efba03ac7bfdbffbba7f0f1aa294f7dee0 Mon Sep 17 00:00:00 2001
From: Guenter Roeck <linux@roeck-us.net>
Date: Tue, 10 Mar 2015 20:31:04 -0700
Subject: of/platform: Fix sparc:allmodconfig build

sparc:allmodconfig fails to build with:

drivers/built-in.o: In function `platform_bus_init':
(.init.text+0x3684): undefined reference to `of_platform_register_reconfig_notifier'

of_platform_register_reconfig_notifier is only declared if both OF_ADDRESS
and OF_DYNAMIC are configured. Yet, the include file only declares a dummy
function if OF_DYNAMIC is not configured. The sparc architecture does not
configure OF_ADDRESS, but does configure OF_DYNAMIC, causing above error.

Fixes: 801d728c10db ("of/reconfig: Add OF_DYNAMIC notifier for platform_bus_type")
Cc: Pantelis Antoniou <pantelis.antoniou@konsulko.com>
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Rob Herring <robh@kernel.org>
---
 include/linux/of_platform.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/of_platform.h b/include/linux/of_platform.h
index 8a860f096c35..611a691145c4 100644
--- a/include/linux/of_platform.h
+++ b/include/linux/of_platform.h
@@ -84,7 +84,7 @@ static inline int of_platform_populate(struct device_node *root,
 static inline void of_platform_depopulate(struct device *parent) { }
 #endif
 
-#ifdef CONFIG_OF_DYNAMIC
+#if defined(CONFIG_OF_DYNAMIC) && defined(CONFIG_OF_ADDRESS)
 extern void of_platform_register_reconfig_notifier(void);
 #else
 static inline void of_platform_register_reconfig_notifier(void) { }
-- 
cgit v1.2.3-71-gd317