From 466b3ddfbcf4f5ce402a77397630a0fa9ea9ce6b Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Sun, 30 Oct 2011 16:35:08 +0100
Subject: PCI: Fix compile errors with PCI_ATS and !PCI_IOV

The ats and sroiv members of 'struct pci_dev' are required
for the ATS code already, even without IOV support compiled
in. So depend on ATS here. This is fine with PCI_IOV too
because it selects PCI_ATS. Also the prototypes for ATS
need to be available for PCI_ATS.

Reported-by: Randy Dunlap <rdunlap@xenotime.net>
Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 include/linux/pci-ats.h | 6 +++---
 include/linux/pci.h     | 2 +-
 2 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/pci-ats.h b/include/linux/pci-ats.h
index e3d0b3890249..7ef68724f0f0 100644
--- a/include/linux/pci-ats.h
+++ b/include/linux/pci-ats.h
@@ -12,7 +12,7 @@ struct pci_ats {
 	unsigned int is_enabled:1;      /* Enable bit is set */
 };
 
-#ifdef CONFIG_PCI_IOV
+#ifdef CONFIG_PCI_ATS
 
 extern int pci_enable_ats(struct pci_dev *dev, int ps);
 extern void pci_disable_ats(struct pci_dev *dev);
@@ -29,7 +29,7 @@ static inline int pci_ats_enabled(struct pci_dev *dev)
 	return dev->ats && dev->ats->is_enabled;
 }
 
-#else /* CONFIG_PCI_IOV */
+#else /* CONFIG_PCI_ATS */
 
 static inline int pci_enable_ats(struct pci_dev *dev, int ps)
 {
@@ -50,7 +50,7 @@ static inline int pci_ats_enabled(struct pci_dev *dev)
 	return 0;
 }
 
-#endif /* CONFIG_PCI_IOV */
+#endif /* CONFIG_PCI_ATS */
 
 #ifdef CONFIG_PCI_PRI
 
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 337df0d5d5f7..7cda65b5f798 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -338,7 +338,7 @@ struct pci_dev {
 	struct list_head msi_list;
 #endif
 	struct pci_vpd *vpd;
-#ifdef CONFIG_PCI_IOV
+#ifdef CONFIG_PCI_ATS
 	union {
 		struct pci_sriov *sriov;	/* SR-IOV capability related */
 		struct pci_dev *physfn;	/* the PF this VF is associated with */
-- 
cgit v1.2.3-71-gd317


From 1788ea6e3b2a58cf4fb00206e362d9caff8d86a7 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@redhat.com>
Date: Fri, 4 Nov 2011 13:31:21 -0400
Subject: nfs: when attempting to open a directory, fall back on normal lookup
 (try #5)

commit d953126 changed how nfs_atomic_lookup handles an -EISDIR return
from an OPEN call. Prior to that patch, that caused the client to fall
back to doing a normal lookup. When that patch went in, the code began
returning that error to userspace. The d_revalidate codepath however
never had the corresponding change, so it was still possible to end up
with a NULL ctx->state pointer after that.

That patch caused a regression. When we attempt to open a directory that
does not have a cached dentry, that open now errors out with EISDIR. If
you attempt the same open with a cached dentry, it will succeed.

Fix this by reverting the change in nfs_atomic_lookup and allowing
attempts to open directories to fall back to a normal lookup

Also, add a NFSv4-specific f_ops->open routine that just returns
-ENOTDIR. This should never be called if things are working properly,
but if it ever is, then the dprintk may help in debugging.

To facilitate this, a new file_operations field is also added to the
nfs_rpc_ops struct.

Cc: stable@kernel.org
Signed-off-by: Jeff Layton <jlayton@redhat.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/dir.c            |  2 +-
 fs/nfs/file.c           | 32 ++++++++++++++++++++++++++++++++
 fs/nfs/inode.c          |  2 +-
 fs/nfs/nfs3proc.c       |  1 +
 fs/nfs/nfs4proc.c       |  1 +
 fs/nfs/proc.c           |  1 +
 include/linux/nfs_fs.h  |  3 +++
 include/linux/nfs_xdr.h |  1 +
 8 files changed, 41 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index b238d95ac48c..ac2899098147 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -1468,12 +1468,12 @@ static struct dentry *nfs_atomic_lookup(struct inode *dir, struct dentry *dentry
 				res = NULL;
 				goto out;
 			/* This turned out not to be a regular file */
+			case -EISDIR:
 			case -ENOTDIR:
 				goto no_open;
 			case -ELOOP:
 				if (!(nd->intent.open.flags & O_NOFOLLOW))
 					goto no_open;
-			/* case -EISDIR: */
 			/* case -EINVAL: */
 			default:
 				res = ERR_CAST(inode);
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index 0a1f8312b4dc..6d93e0754b5e 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -886,3 +886,35 @@ static int nfs_setlease(struct file *file, long arg, struct file_lock **fl)
 			file->f_path.dentry->d_name.name, arg);
 	return -EINVAL;
 }
+
+#ifdef CONFIG_NFS_V4
+static int
+nfs4_file_open(struct inode *inode, struct file *filp)
+{
+	/*
+	 * NFSv4 opens are handled in d_lookup and d_revalidate. If we get to
+	 * this point, then something is very wrong
+	 */
+	dprintk("NFS: %s called! inode=%p filp=%p\n", __func__, inode, filp);
+	return -ENOTDIR;
+}
+
+const struct file_operations nfs4_file_operations = {
+	.llseek		= nfs_file_llseek,
+	.read		= do_sync_read,
+	.write		= do_sync_write,
+	.aio_read	= nfs_file_read,
+	.aio_write	= nfs_file_write,
+	.mmap		= nfs_file_mmap,
+	.open		= nfs4_file_open,
+	.flush		= nfs_file_flush,
+	.release	= nfs_file_release,
+	.fsync		= nfs_file_fsync,
+	.lock		= nfs_lock,
+	.flock		= nfs_flock,
+	.splice_read	= nfs_file_splice_read,
+	.splice_write	= nfs_file_splice_write,
+	.check_flags	= nfs_check_flags,
+	.setlease	= nfs_setlease,
+};
+#endif /* CONFIG_NFS_V4 */
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index c07a55aec838..50a15fa8cf98 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -291,7 +291,7 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr)
 		 */
 		inode->i_op = NFS_SB(sb)->nfs_client->rpc_ops->file_inode_ops;
 		if (S_ISREG(inode->i_mode)) {
-			inode->i_fop = &nfs_file_operations;
+			inode->i_fop = NFS_SB(sb)->nfs_client->rpc_ops->file_ops;
 			inode->i_data.a_ops = &nfs_file_aops;
 			inode->i_data.backing_dev_info = &NFS_SB(sb)->backing_dev_info;
 		} else if (S_ISDIR(inode->i_mode)) {
diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c
index 85f1690ca08c..d4bc9ed91748 100644
--- a/fs/nfs/nfs3proc.c
+++ b/fs/nfs/nfs3proc.c
@@ -853,6 +853,7 @@ const struct nfs_rpc_ops nfs_v3_clientops = {
 	.dentry_ops	= &nfs_dentry_operations,
 	.dir_inode_ops	= &nfs3_dir_inode_operations,
 	.file_inode_ops	= &nfs3_file_inode_operations,
+	.file_ops	= &nfs_file_operations,
 	.getroot	= nfs3_proc_get_root,
 	.getattr	= nfs3_proc_getattr,
 	.setattr	= nfs3_proc_setattr,
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index b60fddf606f7..069cb8094d43 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -6253,6 +6253,7 @@ const struct nfs_rpc_ops nfs_v4_clientops = {
 	.dentry_ops	= &nfs4_dentry_operations,
 	.dir_inode_ops	= &nfs4_dir_inode_operations,
 	.file_inode_ops	= &nfs4_file_inode_operations,
+	.file_ops	= &nfs4_file_operations,
 	.getroot	= nfs4_proc_get_root,
 	.getattr	= nfs4_proc_getattr,
 	.setattr	= nfs4_proc_setattr,
diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c
index ac40b8535d7e..f48125da198a 100644
--- a/fs/nfs/proc.c
+++ b/fs/nfs/proc.c
@@ -710,6 +710,7 @@ const struct nfs_rpc_ops nfs_v2_clientops = {
 	.dentry_ops	= &nfs_dentry_operations,
 	.dir_inode_ops	= &nfs_dir_inode_operations,
 	.file_inode_ops	= &nfs_file_inode_operations,
+	.file_ops	= &nfs_file_operations,
 	.getroot	= nfs_proc_get_root,
 	.getattr	= nfs_proc_getattr,
 	.setattr	= nfs_proc_setattr,
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index ab2c6343361a..92ecf5585fac 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -410,6 +410,9 @@ extern const struct inode_operations nfs_file_inode_operations;
 extern const struct inode_operations nfs3_file_inode_operations;
 #endif /* CONFIG_NFS_V3 */
 extern const struct file_operations nfs_file_operations;
+#ifdef CONFIG_NFS_V4
+extern const struct file_operations nfs4_file_operations;
+#endif /* CONFIG_NFS_V4 */
 extern const struct address_space_operations nfs_file_aops;
 extern const struct address_space_operations nfs_dir_aops;
 
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index c74595ba7094..2a7c533be5dd 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -1192,6 +1192,7 @@ struct nfs_rpc_ops {
 	const struct dentry_operations *dentry_ops;
 	const struct inode_operations *dir_inode_ops;
 	const struct inode_operations *file_inode_ops;
+	const struct file_operations *file_ops;
 
 	int	(*getroot) (struct nfs_server *, struct nfs_fh *,
 			    struct nfs_fsinfo *);
-- 
cgit v1.2.3-71-gd317


From 13662dc5b177d68885695ef513dd4ae0e4d2a099 Mon Sep 17 00:00:00 2001
From: Tomi Valkeinen <tomi.valkeinen@ti.com>
Date: Tue, 8 Nov 2011 03:16:13 -0700
Subject: ARM: OMAP: HWMOD: Unify DSS resets for OMAPs

This patch adds a custom DSS reset function used on OMAPs from OMAP2
forward.

The function doesn't actually do a reset, it only waits for the reset to
complete. The reason for this is that on OMAP4 there is no possibility
to do a SW reset, and on OMAP2/3 doing a SW reset for dss_core resets
all the other DSS modules also, thus breaking the HWMOD model where
every DSS module is handled independently.

This fixes the problem with DSS reset on OMAP4, caused by the fact that
because there's no SW reset for dss_core on OMAP4, the HWMOD framework
doesn't try to reset dss_core and thus the DSS clocks were never enabled
at the same time. This causes causes the HWMOD reset to fail for
dss_dispc and dss_rfbi.

The common reset function will also allow us to fix another problem in
the future: before doing a reset we need to disable DSS outputs, which
are in some cases enabled by the bootloader, as otherwise DSS HW seems
to get more or less stuck, requiring a power reset to recover.

Signed-off-by: Tomi Valkeinen <tomi.valkeinen@ti.com>
[paul@pwsan.com: modified to build arch/arm/mach-omap2/display.o
 unconditionally to avoid an error when !CONFIG_OMAP2_DSS]
Signed-off-by: Paul Walmsley <paul@pwsan.com>
---
 arch/arm/mach-omap2/Makefile                       |  5 +---
 arch/arm/mach-omap2/display.c                      | 35 ++++++++++++++++++++++
 .../mach-omap2/omap_hwmod_2xxx_3xxx_ipblock_data.c |  2 ++
 arch/arm/mach-omap2/omap_hwmod_44xx_data.c         |  2 ++
 arch/arm/plat-omap/include/plat/common.h           |  3 ++
 include/video/omapdss.h                            |  7 -----
 6 files changed, 43 insertions(+), 11 deletions(-)

(limited to 'include')

diff --git a/arch/arm/mach-omap2/Makefile b/arch/arm/mach-omap2/Makefile
index 69ab1c069134..b009f17dee56 100644
--- a/arch/arm/mach-omap2/Makefile
+++ b/arch/arm/mach-omap2/Makefile
@@ -4,7 +4,7 @@
 
 # Common support
 obj-y := id.o io.o control.o mux.o devices.o serial.o gpmc.o timer.o pm.o \
-	 common.o gpio.o dma.o wd_timer.o
+	 common.o gpio.o dma.o wd_timer.o display.o
 
 omap-2-3-common				= irq.o sdrc.o
 hwmod-common				= omap_hwmod.o \
@@ -264,7 +264,4 @@ smsc911x-$(CONFIG_SMSC911X)		:= gpmc-smsc911x.o
 obj-y					+= $(smsc911x-m) $(smsc911x-y)
 obj-$(CONFIG_ARCH_OMAP4)		+= hwspinlock.o
 
-disp-$(CONFIG_OMAP2_DSS)		:= display.o
-obj-y					+= $(disp-m) $(disp-y)
-
 obj-y					+= common-board-devices.o twl-common.o
diff --git a/arch/arm/mach-omap2/display.c b/arch/arm/mach-omap2/display.c
index adb2756e242f..941b5459707f 100644
--- a/arch/arm/mach-omap2/display.c
+++ b/arch/arm/mach-omap2/display.c
@@ -27,6 +27,7 @@
 #include <plat/omap_hwmod.h>
 #include <plat/omap_device.h>
 #include <plat/omap-pm.h>
+#include <plat/common.h>
 
 #include "control.h"
 
@@ -172,3 +173,37 @@ int __init omap_display_init(struct omap_dss_board_info *board_data)
 
 	return r;
 }
+
+#define MAX_MODULE_SOFTRESET_WAIT	10000
+int omap_dss_reset(struct omap_hwmod *oh)
+{
+	struct omap_hwmod_opt_clk *oc;
+	int c = 0;
+	int i, r;
+
+	if (!(oh->class->sysc->sysc_flags & SYSS_HAS_RESET_STATUS)) {
+		pr_err("dss_core: hwmod data doesn't contain reset data\n");
+		return -EINVAL;
+	}
+
+	for (i = oh->opt_clks_cnt, oc = oh->opt_clks; i > 0; i--, oc++)
+		if (oc->_clk)
+			clk_enable(oc->_clk);
+
+	omap_test_timeout((omap_hwmod_read(oh, oh->class->sysc->syss_offs)
+				& SYSS_RESETDONE_MASK),
+			MAX_MODULE_SOFTRESET_WAIT, c);
+
+	if (c == MAX_MODULE_SOFTRESET_WAIT)
+		pr_warning("dss_core: waiting for reset to finish failed\n");
+	else
+		pr_debug("dss_core: softreset done\n");
+
+	for (i = oh->opt_clks_cnt, oc = oh->opt_clks; i > 0; i--, oc++)
+		if (oc->_clk)
+			clk_disable(oc->_clk);
+
+	r = (c == MAX_MODULE_SOFTRESET_WAIT) ? -ETIMEDOUT : 0;
+
+	return r;
+}
diff --git a/arch/arm/mach-omap2/omap_hwmod_2xxx_3xxx_ipblock_data.c b/arch/arm/mach-omap2/omap_hwmod_2xxx_3xxx_ipblock_data.c
index d78c1324ae59..c11273da5dcc 100644
--- a/arch/arm/mach-omap2/omap_hwmod_2xxx_3xxx_ipblock_data.c
+++ b/arch/arm/mach-omap2/omap_hwmod_2xxx_3xxx_ipblock_data.c
@@ -11,6 +11,7 @@
 #include <plat/omap_hwmod.h>
 #include <plat/serial.h>
 #include <plat/dma.h>
+#include <plat/common.h>
 
 #include <mach/irqs.h>
 
@@ -51,6 +52,7 @@ static struct omap_hwmod_class_sysconfig omap2_dss_sysc = {
 struct omap_hwmod_class omap2_dss_hwmod_class = {
 	.name	= "dss",
 	.sysc	= &omap2_dss_sysc,
+	.reset	= omap_dss_reset,
 };
 
 /*
diff --git a/arch/arm/mach-omap2/omap_hwmod_44xx_data.c b/arch/arm/mach-omap2/omap_hwmod_44xx_data.c
index cadf0bb2d3b7..3b04d63316f5 100644
--- a/arch/arm/mach-omap2/omap_hwmod_44xx_data.c
+++ b/arch/arm/mach-omap2/omap_hwmod_44xx_data.c
@@ -30,6 +30,7 @@
 #include <plat/mmc.h>
 #include <plat/i2c.h>
 #include <plat/dmtimer.h>
+#include <plat/common.h>
 
 #include "omap_hwmod_common_data.h"
 
@@ -1187,6 +1188,7 @@ static struct omap_hwmod_class_sysconfig omap44xx_dss_sysc = {
 static struct omap_hwmod_class omap44xx_dss_hwmod_class = {
 	.name	= "dss",
 	.sysc	= &omap44xx_dss_sysc,
+	.reset	= omap_dss_reset,
 };
 
 /* dss */
diff --git a/arch/arm/plat-omap/include/plat/common.h b/arch/arm/plat-omap/include/plat/common.h
index c50df4814f6f..3ff3e36580f2 100644
--- a/arch/arm/plat-omap/include/plat/common.h
+++ b/arch/arm/plat-omap/include/plat/common.h
@@ -30,6 +30,7 @@
 #include <linux/delay.h>
 
 #include <plat/i2c.h>
+#include <plat/omap_hwmod.h>
 
 struct sys_timer;
 
@@ -55,6 +56,8 @@ void am35xx_init_early(void);
 void ti816x_init_early(void);
 void omap4430_init_early(void);
 
+extern int omap_dss_reset(struct omap_hwmod *);
+
 void omap_sram_init(void);
 
 /*
diff --git a/include/video/omapdss.h b/include/video/omapdss.h
index b66ebb2032c6..378c7ed6760b 100644
--- a/include/video/omapdss.h
+++ b/include/video/omapdss.h
@@ -307,15 +307,8 @@ struct omap_dss_board_info {
 	void (*dsi_disable_pads)(int dsi_id, unsigned lane_mask);
 };
 
-#if defined(CONFIG_OMAP2_DSS_MODULE) || defined(CONFIG_OMAP2_DSS)
 /* Init with the board info */
 extern int omap_display_init(struct omap_dss_board_info *board_data);
-#else
-static inline int omap_display_init(struct omap_dss_board_info *board_data)
-{
-	return 0;
-}
-#endif
 
 struct omap_display_platform_data {
 	struct omap_dss_board_info *board_data;
-- 
cgit v1.2.3-71-gd317


From d65670a78cdbfae94f20a9e05ec705871d7cdf2b Mon Sep 17 00:00:00 2001
From: John Stultz <john.stultz@linaro.org>
Date: Mon, 31 Oct 2011 17:06:35 -0400
Subject: clocksource: Avoid selecting mult values that might overflow when
 adjusted

For some frequencies, the clocks_calc_mult_shift() function will
unfortunately select mult values very close to 0xffffffff.  This
has the potential to overflow when NTP adjusts the clock, adding
to the mult value.

This patch adds a clocksource.maxadj value, which provides
an approximation of an 11% adjustment(NTP limits adjustments to
500ppm and the tick adjustment is limited to 10%), which could
be made to the clocksource.mult value. This is then used to both
check that the current mult value won't overflow/underflow, as
well as warning us if the timekeeping_adjust() code pushes over
that 11% boundary.

v2: Fix max_adjustment calculation, and improve WARN_ONCE
messages.

v3: Don't warn before maxadj has actually been set

CC: Yong Zhang <yong.zhang0@gmail.com>
CC: David Daney <ddaney.cavm@gmail.com>
CC: Thomas Gleixner <tglx@linutronix.de>
CC: Chen Jie <chenj@lemote.com>
CC: zhangfx <zhangfx@lemote.com>
CC: stable@kernel.org
Reported-by: Chen Jie <chenj@lemote.com>
Reported-by: zhangfx <zhangfx@lemote.com>
Tested-by: Yong Zhang <yong.zhang0@gmail.com>
Signed-off-by: John Stultz <john.stultz@linaro.org>
---
 include/linux/clocksource.h |  3 ++-
 kernel/time/clocksource.c   | 58 +++++++++++++++++++++++++++++++++++++--------
 kernel/time/timekeeping.c   |  7 ++++++
 3 files changed, 57 insertions(+), 11 deletions(-)

(limited to 'include')

diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h
index 139c4db55f17..c86c940d1de3 100644
--- a/include/linux/clocksource.h
+++ b/include/linux/clocksource.h
@@ -156,6 +156,7 @@ extern u64 timecounter_cyc2time(struct timecounter *tc,
  * @mult:		cycle to nanosecond multiplier
  * @shift:		cycle to nanosecond divisor (power of two)
  * @max_idle_ns:	max idle time permitted by the clocksource (nsecs)
+ * @maxadj		maximum adjustment value to mult (~11%)
  * @flags:		flags describing special properties
  * @archdata:		arch-specific data
  * @suspend:		suspend function for the clocksource, if necessary
@@ -172,7 +173,7 @@ struct clocksource {
 	u32 mult;
 	u32 shift;
 	u64 max_idle_ns;
-
+	u32 maxadj;
 #ifdef CONFIG_ARCH_CLOCKSOURCE_DATA
 	struct arch_clocksource_data archdata;
 #endif
diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c
index cf52fda2e096..cfc65e1eb9fb 100644
--- a/kernel/time/clocksource.c
+++ b/kernel/time/clocksource.c
@@ -491,6 +491,22 @@ void clocksource_touch_watchdog(void)
 	clocksource_resume_watchdog();
 }
 
+/**
+ * clocksource_max_adjustment- Returns max adjustment amount
+ * @cs:         Pointer to clocksource
+ *
+ */
+static u32 clocksource_max_adjustment(struct clocksource *cs)
+{
+	u64 ret;
+	/*
+	 * We won't try to correct for more then 11% adjustments (110,000 ppm),
+	 */
+	ret = (u64)cs->mult * 11;
+	do_div(ret,100);
+	return (u32)ret;
+}
+
 /**
  * clocksource_max_deferment - Returns max time the clocksource can be deferred
  * @cs:         Pointer to clocksource
@@ -503,25 +519,28 @@ static u64 clocksource_max_deferment(struct clocksource *cs)
 	/*
 	 * Calculate the maximum number of cycles that we can pass to the
 	 * cyc2ns function without overflowing a 64-bit signed result. The
-	 * maximum number of cycles is equal to ULLONG_MAX/cs->mult which
-	 * is equivalent to the below.
-	 * max_cycles < (2^63)/cs->mult
-	 * max_cycles < 2^(log2((2^63)/cs->mult))
-	 * max_cycles < 2^(log2(2^63) - log2(cs->mult))
-	 * max_cycles < 2^(63 - log2(cs->mult))
-	 * max_cycles < 1 << (63 - log2(cs->mult))
+	 * maximum number of cycles is equal to ULLONG_MAX/(cs->mult+cs->maxadj)
+	 * which is equivalent to the below.
+	 * max_cycles < (2^63)/(cs->mult + cs->maxadj)
+	 * max_cycles < 2^(log2((2^63)/(cs->mult + cs->maxadj)))
+	 * max_cycles < 2^(log2(2^63) - log2(cs->mult + cs->maxadj))
+	 * max_cycles < 2^(63 - log2(cs->mult + cs->maxadj))
+	 * max_cycles < 1 << (63 - log2(cs->mult + cs->maxadj))
 	 * Please note that we add 1 to the result of the log2 to account for
 	 * any rounding errors, ensure the above inequality is satisfied and
 	 * no overflow will occur.
 	 */
-	max_cycles = 1ULL << (63 - (ilog2(cs->mult) + 1));
+	max_cycles = 1ULL << (63 - (ilog2(cs->mult + cs->maxadj) + 1));
 
 	/*
 	 * The actual maximum number of cycles we can defer the clocksource is
 	 * determined by the minimum of max_cycles and cs->mask.
+	 * Note: Here we subtract the maxadj to make sure we don't sleep for
+	 * too long if there's a large negative adjustment.
 	 */
 	max_cycles = min_t(u64, max_cycles, (u64) cs->mask);
-	max_nsecs = clocksource_cyc2ns(max_cycles, cs->mult, cs->shift);
+	max_nsecs = clocksource_cyc2ns(max_cycles, cs->mult - cs->maxadj,
+					cs->shift);
 
 	/*
 	 * To ensure that the clocksource does not wrap whilst we are idle,
@@ -640,7 +659,6 @@ static void clocksource_enqueue(struct clocksource *cs)
 void __clocksource_updatefreq_scale(struct clocksource *cs, u32 scale, u32 freq)
 {
 	u64 sec;
-
 	/*
 	 * Calc the maximum number of seconds which we can run before
 	 * wrapping around. For clocksources which have a mask > 32bit
@@ -661,6 +679,20 @@ void __clocksource_updatefreq_scale(struct clocksource *cs, u32 scale, u32 freq)
 
 	clocks_calc_mult_shift(&cs->mult, &cs->shift, freq,
 			       NSEC_PER_SEC / scale, sec * scale);
+
+	/*
+	 * for clocksources that have large mults, to avoid overflow.
+	 * Since mult may be adjusted by ntp, add an safety extra margin
+	 *
+	 */
+	cs->maxadj = clocksource_max_adjustment(cs);
+	while ((cs->mult + cs->maxadj < cs->mult)
+		|| (cs->mult - cs->maxadj > cs->mult)) {
+		cs->mult >>= 1;
+		cs->shift--;
+		cs->maxadj = clocksource_max_adjustment(cs);
+	}
+
 	cs->max_idle_ns = clocksource_max_deferment(cs);
 }
 EXPORT_SYMBOL_GPL(__clocksource_updatefreq_scale);
@@ -701,6 +733,12 @@ EXPORT_SYMBOL_GPL(__clocksource_register_scale);
  */
 int clocksource_register(struct clocksource *cs)
 {
+	/* calculate max adjustment for given mult/shift */
+	cs->maxadj = clocksource_max_adjustment(cs);
+	WARN_ONCE(cs->mult + cs->maxadj < cs->mult,
+		"Clocksource %s might overflow on 11%% adjustment\n",
+		cs->name);
+
 	/* calculate max idle time permitted for this clocksource */
 	cs->max_idle_ns = clocksource_max_deferment(cs);
 
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index 2b021b0e8507..e65ff3171102 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -820,6 +820,13 @@ static void timekeeping_adjust(s64 offset)
 	} else
 		return;
 
+	WARN_ONCE(timekeeper.clock->maxadj &&
+			(timekeeper.mult + adj > timekeeper.clock->mult +
+						timekeeper.clock->maxadj),
+			"Adjusting %s more then 11%% (%ld vs %ld)\n",
+			timekeeper.clock->name, (long)timekeeper.mult + adj,
+			(long)timekeeper.clock->mult +
+				timekeeper.clock->maxadj);
 	timekeeper.mult += adj;
 	timekeeper.xtime_interval += interval;
 	timekeeper.xtime_nsec -= offset;
-- 
cgit v1.2.3-71-gd317


From 224736d9113ab4a7cf3f05c05377492bd99b4b02 Mon Sep 17 00:00:00 2001
From: Stratos Psomadakis <psomas@grnet.gr>
Date: Thu, 10 Nov 2011 15:45:37 +0200
Subject: libceph: Allocate larger oid buffer in request msgs

ceph_osd_request struct allocates a 40-byte buffer for object names.
RBD image names can be up to 96 chars long (100 with the .rbd suffix),
which results in the object name for the image being truncated, and a
subsequent map failure.

Increase the oid buffer in request messages, in order to avoid the
truncation.

Signed-off-by: Stratos Psomadakis <psomas@grnet.gr>
Signed-off-by: Sage Weil <sage@newdream.net>
---
 include/linux/ceph/osd_client.h | 8 +++++++-
 net/ceph/osd_client.c           | 2 +-
 2 files changed, 8 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h
index f88eacb111d4..7c05ac202d90 100644
--- a/include/linux/ceph/osd_client.h
+++ b/include/linux/ceph/osd_client.h
@@ -10,6 +10,12 @@
 #include "osdmap.h"
 #include "messenger.h"
 
+/* 
+ * Maximum object name size 
+ * (must be at least as big as RBD_MAX_MD_NAME_LEN -- currently 100) 
+ */
+#define MAX_OBJ_NAME_SIZE 100
+
 struct ceph_msg;
 struct ceph_snap_context;
 struct ceph_osd_request;
@@ -75,7 +81,7 @@ struct ceph_osd_request {
 	struct inode *r_inode;         	      /* for use by callbacks */
 	void *r_priv;			      /* ditto */
 
-	char              r_oid[40];          /* object name */
+	char              r_oid[MAX_OBJ_NAME_SIZE];          /* object name */
 	int               r_oid_len;
 	unsigned long     r_stamp;            /* send OR check time */
 
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index 733e46008b89..f4f3f58f5234 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -244,7 +244,7 @@ struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client *osdc,
 		ceph_pagelist_init(req->r_trail);
 	}
 	/* create request message; allow space for oid */
-	msg_size += 40;
+	msg_size += MAX_OBJ_NAME_SIZE;
 	if (snapc)
 		msg_size += sizeof(u64) * snapc->num_snaps;
 	if (use_mempool)
-- 
cgit v1.2.3-71-gd317


From f088d5a9c5dd22b6559fa3f3939973bc374c977b Mon Sep 17 00:00:00 2001
From: Inki Dae <inki.dae@samsung.com>
Date: Sat, 12 Nov 2011 14:51:23 +0900
Subject: drm/exynos: use gem create function generically

this patch addes exynos_drm_gem_init() creating and initialzing a gem.
allocation functions could use this function to create new gem and
it changes size type of exynos_drm_gem_create structure to 64bit
and also corrects comments to exynos_drm_gem_create structure.

Signed-off-by: Inki Dae <inki.dae@samsung.com>
Signed-off-by: Kyungmin Park <kyungmin.park@samsung.com>
---
 drivers/gpu/drm/exynos/exynos_drm_fb.c  |  6 +--
 drivers/gpu/drm/exynos/exynos_drm_gem.c | 79 ++++++++++++++++++++-------------
 drivers/gpu/drm/exynos/exynos_drm_gem.h |  6 +--
 include/drm/exynos_drm.h                |  9 ++--
 4 files changed, 59 insertions(+), 41 deletions(-)

(limited to 'include')

diff --git a/drivers/gpu/drm/exynos/exynos_drm_fb.c b/drivers/gpu/drm/exynos/exynos_drm_fb.c
index 7d91a542c756..8d0f66224045 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_fb.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_fb.c
@@ -165,9 +165,9 @@ exynos_drm_fb_init(struct drm_file *file_priv, struct drm_device *dev,
 
 			goto out;
 		} else {
-			exynos_gem_obj = exynos_drm_gem_create(file_priv, dev,
-							size,
-							&mode_cmd->handle);
+			exynos_gem_obj = exynos_drm_gem_create(dev, file_priv,
+							&mode_cmd->handle,
+							size);
 			if (IS_ERR(exynos_gem_obj)) {
 				ret = PTR_ERR(exynos_gem_obj);
 				goto err_buffer;
diff --git a/drivers/gpu/drm/exynos/exynos_drm_gem.c b/drivers/gpu/drm/exynos/exynos_drm_gem.c
index a8e7a88906ed..bd6ede83b684 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_gem.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_gem.c
@@ -62,40 +62,28 @@ static unsigned int get_gem_mmap_offset(struct drm_gem_object *obj)
 	return (unsigned int)obj->map_list.hash.key << PAGE_SHIFT;
 }
 
-struct exynos_drm_gem_obj *exynos_drm_gem_create(struct drm_file *file_priv,
-		struct drm_device *dev, unsigned int size,
-		unsigned int *handle)
+static struct exynos_drm_gem_obj
+		*exynos_drm_gem_init(struct drm_device *drm_dev,
+			struct drm_file *file_priv, unsigned int *handle,
+			unsigned int size)
 {
 	struct exynos_drm_gem_obj *exynos_gem_obj;
-	struct exynos_drm_buf_entry *entry;
 	struct drm_gem_object *obj;
 	int ret;
 
-	DRM_DEBUG_KMS("%s\n", __FILE__);
-
-	size = roundup(size, PAGE_SIZE);
-
 	exynos_gem_obj = kzalloc(sizeof(*exynos_gem_obj), GFP_KERNEL);
 	if (!exynos_gem_obj) {
 		DRM_ERROR("failed to allocate exynos gem object.\n");
 		return ERR_PTR(-ENOMEM);
 	}
 
-	/* allocate the new buffer object and memory region. */
-	entry = exynos_drm_buf_create(dev, size);
-	if (!entry) {
-		kfree(exynos_gem_obj);
-		return ERR_PTR(-ENOMEM);
-	}
-
-	exynos_gem_obj->entry = entry;
-
 	obj = &exynos_gem_obj->base;
 
-	ret = drm_gem_object_init(dev, obj, size);
+	ret = drm_gem_object_init(drm_dev, obj, size);
 	if (ret < 0) {
-		DRM_ERROR("failed to initailize gem object.\n");
-		goto err_obj_init;
+		DRM_ERROR("failed to initialize gem object.\n");
+		ret = -EINVAL;
+		goto err_object_init;
 	}
 
 	DRM_DEBUG_KMS("created file object = 0x%x\n", (unsigned int)obj->filp);
@@ -127,24 +115,55 @@ err_handle_create:
 err_create_mmap_offset:
 	drm_gem_object_release(obj);
 
-err_obj_init:
-	exynos_drm_buf_destroy(dev, exynos_gem_obj->entry);
-
+err_object_init:
 	kfree(exynos_gem_obj);
 
 	return ERR_PTR(ret);
 }
 
+struct exynos_drm_gem_obj *exynos_drm_gem_create(struct drm_device *dev,
+				struct drm_file *file_priv,
+				unsigned int *handle, unsigned long size)
+{
+
+	struct exynos_drm_gem_obj *exynos_gem_obj = NULL;
+	struct exynos_drm_buf_entry *entry;
+	int ret;
+
+	size = roundup(size, PAGE_SIZE);
+
+	DRM_DEBUG_KMS("%s: size = 0x%lx\n", __FILE__, size);
+
+	entry = exynos_drm_buf_create(dev, size);
+	if (!entry)
+		return ERR_PTR(-ENOMEM);
+
+	exynos_gem_obj = exynos_drm_gem_init(dev, file_priv, handle, size);
+	if (IS_ERR(exynos_gem_obj)) {
+		ret = PTR_ERR(exynos_gem_obj);
+		goto err_gem_init;
+	}
+
+	exynos_gem_obj->entry = entry;
+
+	return exynos_gem_obj;
+
+err_gem_init:
+	exynos_drm_buf_destroy(dev, exynos_gem_obj->entry);
+
+	return ERR_PTR(ret);
+}
+
 int exynos_drm_gem_create_ioctl(struct drm_device *dev, void *data,
-		struct drm_file *file_priv)
+					struct drm_file *file_priv)
 {
 	struct drm_exynos_gem_create *args = data;
-	struct exynos_drm_gem_obj *exynos_gem_obj;
+	struct exynos_drm_gem_obj *exynos_gem_obj = NULL;
 
-	DRM_DEBUG_KMS("%s : size = 0x%x\n", __FILE__, args->size);
+	DRM_DEBUG_KMS("%s\n", __FILE__);
 
-	exynos_gem_obj = exynos_drm_gem_create(file_priv, dev, args->size,
-			&args->handle);
+	exynos_gem_obj = exynos_drm_gem_create(dev, file_priv,
+						&args->handle, args->size);
 	if (IS_ERR(exynos_gem_obj))
 		return PTR_ERR(exynos_gem_obj);
 
@@ -302,8 +321,8 @@ int exynos_drm_gem_dumb_create(struct drm_file *file_priv,
 	args->pitch = args->width * args->bpp >> 3;
 	args->size = args->pitch * args->height;
 
-	exynos_gem_obj = exynos_drm_gem_create(file_priv, dev, args->size,
-							&args->handle);
+	exynos_gem_obj = exynos_drm_gem_create(dev, file_priv, &args->handle,
+							args->size);
 	if (IS_ERR(exynos_gem_obj))
 		return PTR_ERR(exynos_gem_obj);
 
diff --git a/drivers/gpu/drm/exynos/exynos_drm_gem.h b/drivers/gpu/drm/exynos/exynos_drm_gem.h
index e5fc0148277b..213838d9606e 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_gem.h
+++ b/drivers/gpu/drm/exynos/exynos_drm_gem.h
@@ -49,9 +49,9 @@ struct exynos_drm_gem_obj {
 };
 
 /* create a new buffer and get a new gem handle. */
-struct exynos_drm_gem_obj *exynos_drm_gem_create(struct drm_file *file_priv,
-		struct drm_device *dev, unsigned int size,
-		unsigned int *handle);
+struct exynos_drm_gem_obj *exynos_drm_gem_create(struct drm_device *dev,
+		struct drm_file *file_priv,
+		unsigned int *handle, unsigned long size);
 
 /*
  * request gem object creation and buffer allocation as the size
diff --git a/include/drm/exynos_drm.h b/include/drm/exynos_drm.h
index 1d161cb3aca5..12050434d57a 100644
--- a/include/drm/exynos_drm.h
+++ b/include/drm/exynos_drm.h
@@ -32,17 +32,16 @@
 /**
  * User-desired buffer creation information structure.
  *
- * @size: requested size for the object.
+ * @size: user-desired memory allocation size.
  *	- this size value would be page-aligned internally.
  * @flags: user request for setting memory type or cache attributes.
- * @handle: returned handle for the object.
- * @pad: just padding to be 64-bit aligned.
+ * @handle: returned a handle to created gem object.
+ *	- this handle will be set by gem module of kernel side.
  */
 struct drm_exynos_gem_create {
-	unsigned int size;
+	uint64_t size;
 	unsigned int flags;
 	unsigned int handle;
-	unsigned int pad;
 };
 
 /**
-- 
cgit v1.2.3-71-gd317


From 7b08fae8fbf0c14f003be8e039ed37bcbae4415a Mon Sep 17 00:00:00 2001
From: Marcos Paulo de Souza <marcos.mage@gmail.com>
Date: Tue, 1 Nov 2011 11:15:40 -0700
Subject: device.h: Fix struct member documentation

Fix warning of make xmldocs of documention of the struct member iommu_ops from struct bus_type.

Signed-off-by: Marcos Paulo de Souza <marcos.mage@gmail.com>
Acked-by: Randy Dunlap <rdunlap@xenotime.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/device.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/device.h b/include/linux/device.h
index ffbcf95cd97d..2b8832060893 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -69,7 +69,7 @@ extern void bus_remove_file(struct bus_type *, struct bus_attribute *);
  * @resume:	Called to bring a device on this bus out of sleep mode.
  * @pm:		Power management operations of this bus, callback the specific
  *		device driver's pm-ops.
- * @iommu_ops   IOMMU specific operations for this bus, used to attach IOMMU
+ * @iommu_ops:  IOMMU specific operations for this bus, used to attach IOMMU
  *              driver implementations to a bus and allow the driver to do
  *              bus-specific setup
  * @p:		The private data of the driver core, only the driver core can
-- 
cgit v1.2.3-71-gd317


From 93f3350c46fa5dfcc9650eb19b186e71ffc924c3 Mon Sep 17 00:00:00 2001
From: Claudio Scordino <claudio@evidence.eu.com>
Date: Wed, 9 Nov 2011 15:51:49 +0100
Subject: RS485: fix inconsistencies in the meaning of some variables

The crisv10.c and the atmel_serial.c serial drivers intepret the fields of the
serial_rs485 structure in a different way.

In particular, crisv10.c uses SER_RS485_RTS_AFTER_SEND and
SER_RS485_RTS_ON_SEND for the voltage of the RTS pin; atmel_serial.c,
instead, uses these values to know if a delay must be set before and
after sending.  This patch makes the usage of these variables consistent
across all drivers and fixes the Documentation as well.

From now on, SER_RS485_RTS_AFTER_SEND and SER_RS485_RTS_ON_SEND will be
used to set the voltage of the RTS pin (as in the crisv10.c driver); the
delay will be understood by looking only at the value of
delay_rts_before_send and delay_rts_after_send.

Signed-off-by: Claudio Scordino <claudio@evidence.eu.com>
Signed-off-by: Darron Black <darron@griffin.net>
Acked-by: Jesper Nilsson <jesper.nilsson@axis.com>
Acked-by: Nicolas Ferre <nicolas.ferre@atmel.com>
Acked-by: Alan Cox <alan@lxorguk.ukuu.org.uk>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 Documentation/serial/serial-rs485.txt | 14 +++++++++++---
 drivers/tty/serial/atmel_serial.c     | 16 +++-------------
 drivers/tty/serial/crisv10.c          | 10 ++--------
 include/linux/serial.h                | 14 ++++++++------
 4 files changed, 24 insertions(+), 30 deletions(-)

(limited to 'include')

diff --git a/Documentation/serial/serial-rs485.txt b/Documentation/serial/serial-rs485.txt
index 079cb3df62cf..41c8378c0b2f 100644
--- a/Documentation/serial/serial-rs485.txt
+++ b/Documentation/serial/serial-rs485.txt
@@ -97,15 +97,23 @@
 
 	struct serial_rs485 rs485conf;
 
-	/* Set RS485 mode: */
+	/* Enable RS485 mode: */
 	rs485conf.flags |= SER_RS485_ENABLED;
 
+	/* Set logical level for RTS pin equal to 1 when sending: */
+	rs485conf.flags |= SER_RS485_RTS_ON_SEND;
+	/* or, set logical level for RTS pin equal to 0 when sending: */
+	rs485conf.flags &= ~(SER_RS485_RTS_ON_SEND);
+
+	/* Set logical level for RTS pin equal to 1 after sending: */
+	rs485conf.flags |= SER_RS485_RTS_AFTER_SEND;
+	/* or, set logical level for RTS pin equal to 0 after sending: */
+	rs485conf.flags &= ~(SER_RS485_RTS_AFTER_SEND);
+
 	/* Set rts delay before send, if needed: */
-	rs485conf.flags |= SER_RS485_RTS_BEFORE_SEND;
 	rs485conf.delay_rts_before_send = ...;
 
 	/* Set rts delay after send, if needed: */
-	rs485conf.flags |= SER_RS485_RTS_AFTER_SEND;
 	rs485conf.delay_rts_after_send = ...;
 
 	/* Set this flag if you want to receive data even whilst sending data */
diff --git a/drivers/tty/serial/atmel_serial.c b/drivers/tty/serial/atmel_serial.c
index 4a0f86fa1e90..4c823f341d98 100644
--- a/drivers/tty/serial/atmel_serial.c
+++ b/drivers/tty/serial/atmel_serial.c
@@ -228,7 +228,7 @@ void atmel_config_rs485(struct uart_port *port, struct serial_rs485 *rs485conf)
 	if (rs485conf->flags & SER_RS485_ENABLED) {
 		dev_dbg(port->dev, "Setting UART to RS485\n");
 		atmel_port->tx_done_mask = ATMEL_US_TXEMPTY;
-		if (rs485conf->flags & SER_RS485_RTS_AFTER_SEND)
+		if ((rs485conf->delay_rts_after_send) > 0)
 			UART_PUT_TTGR(port, rs485conf->delay_rts_after_send);
 		mode |= ATMEL_US_USMODE_RS485;
 	} else {
@@ -304,7 +304,7 @@ static void atmel_set_mctrl(struct uart_port *port, u_int mctrl)
 
 	if (atmel_port->rs485.flags & SER_RS485_ENABLED) {
 		dev_dbg(port->dev, "Setting UART to RS485\n");
-		if (atmel_port->rs485.flags & SER_RS485_RTS_AFTER_SEND)
+		if ((atmel_port->rs485.delay_rts_after_send) > 0)
 			UART_PUT_TTGR(port,
 					atmel_port->rs485.delay_rts_after_send);
 		mode |= ATMEL_US_USMODE_RS485;
@@ -1228,7 +1228,7 @@ static void atmel_set_termios(struct uart_port *port, struct ktermios *termios,
 
 	if (atmel_port->rs485.flags & SER_RS485_ENABLED) {
 		dev_dbg(port->dev, "Setting UART to RS485\n");
-		if (atmel_port->rs485.flags & SER_RS485_RTS_AFTER_SEND)
+		if ((atmel_port->rs485.delay_rts_after_send) > 0)
 			UART_PUT_TTGR(port,
 					atmel_port->rs485.delay_rts_after_send);
 		mode |= ATMEL_US_USMODE_RS485;
@@ -1447,16 +1447,6 @@ static void __devinit atmel_of_init_port(struct atmel_uart_port *atmel_port,
 		rs485conf->delay_rts_after_send = rs485_delay[1];
 		rs485conf->flags = 0;
 
-		if (rs485conf->delay_rts_before_send == 0 &&
-		    rs485conf->delay_rts_after_send == 0) {
-			rs485conf->flags |= SER_RS485_RTS_ON_SEND;
-		} else {
-			if (rs485conf->delay_rts_before_send)
-				rs485conf->flags |= SER_RS485_RTS_BEFORE_SEND;
-			if (rs485conf->delay_rts_after_send)
-				rs485conf->flags |= SER_RS485_RTS_AFTER_SEND;
-		}
-
 		if (of_get_property(np, "rs485-rx-during-tx", NULL))
 			rs485conf->flags |= SER_RS485_RX_DURING_TX;
 
diff --git a/drivers/tty/serial/crisv10.c b/drivers/tty/serial/crisv10.c
index b7435043f2fe..1dfba7b779c8 100644
--- a/drivers/tty/serial/crisv10.c
+++ b/drivers/tty/serial/crisv10.c
@@ -3234,9 +3234,8 @@ rs_write(struct tty_struct *tty,
 		e100_disable_rx(info);
 		e100_enable_rx_irq(info);
 #endif
-		if ((info->rs485.flags & SER_RS485_RTS_BEFORE_SEND) &&
-			(info->rs485.delay_rts_before_send > 0))
-				msleep(info->rs485.delay_rts_before_send);
+		if (info->rs485.delay_rts_before_send > 0)
+			msleep(info->rs485.delay_rts_before_send);
 	}
 #endif /* CONFIG_ETRAX_RS485 */
 
@@ -3693,10 +3692,6 @@ rs_ioctl(struct tty_struct *tty,
 
 		rs485data.delay_rts_before_send = rs485ctrl.delay_rts_before_send;
 		rs485data.flags = 0;
-		if (rs485data.delay_rts_before_send != 0)
-			rs485data.flags |= SER_RS485_RTS_BEFORE_SEND;
-		else
-			rs485data.flags &= ~(SER_RS485_RTS_BEFORE_SEND);
 
 		if (rs485ctrl.enabled)
 			rs485data.flags |= SER_RS485_ENABLED;
@@ -4531,7 +4526,6 @@ static int __init rs_init(void)
 		/* Set sane defaults */
 		info->rs485.flags &= ~(SER_RS485_RTS_ON_SEND);
 		info->rs485.flags |= SER_RS485_RTS_AFTER_SEND;
-		info->rs485.flags &= ~(SER_RS485_RTS_BEFORE_SEND);
 		info->rs485.delay_rts_before_send = 0;
 		info->rs485.flags &= ~(SER_RS485_ENABLED);
 #endif
diff --git a/include/linux/serial.h b/include/linux/serial.h
index 97ff8e27a6cc..3d86517fe7d5 100644
--- a/include/linux/serial.h
+++ b/include/linux/serial.h
@@ -207,13 +207,15 @@ struct serial_icounter_struct {
 
 struct serial_rs485 {
 	__u32	flags;			/* RS485 feature flags */
-#define SER_RS485_ENABLED		(1 << 0)
-#define SER_RS485_RTS_ON_SEND		(1 << 1)
-#define SER_RS485_RTS_AFTER_SEND	(1 << 2)
-#define SER_RS485_RTS_BEFORE_SEND	(1 << 3)
+#define SER_RS485_ENABLED		(1 << 0)	/* If enabled */
+#define SER_RS485_RTS_ON_SEND		(1 << 1)	/* Logical level for
+							   RTS pin when
+							   sending */
+#define SER_RS485_RTS_AFTER_SEND	(1 << 2)	/* Logical level for
+							   RTS pin after sent*/
 #define SER_RS485_RX_DURING_TX		(1 << 4)
-	__u32	delay_rts_before_send;	/* Milliseconds */
-	__u32	delay_rts_after_send;	/* Milliseconds */
+	__u32	delay_rts_before_send;	/* Delay before send (milliseconds) */
+	__u32	delay_rts_after_send;	/* Delay after send (milliseconds) */
 	__u32	padding[5];		/* Memory is cheap, new structs
 					   are a royal PITA .. */
 };
-- 
cgit v1.2.3-71-gd317


From 468e6a20afaccb67e2a7d7f60d301f90e1c6f301 Mon Sep 17 00:00:00 2001
From: Wu Fengguang <fengguang.wu@intel.com>
Date: Wed, 7 Sep 2011 10:41:32 -0600
Subject: writeback: remove vm_dirties and task->dirties

They are not used any more.

Signed-off-by: Wu Fengguang <fengguang.wu@intel.com>
---
 include/linux/init_task.h | 1 -
 include/linux/sched.h     | 1 -
 kernel/fork.c             | 5 -----
 mm/page-writeback.c       | 9 ---------
 4 files changed, 16 deletions(-)

(limited to 'include')

diff --git a/include/linux/init_task.h b/include/linux/init_task.h
index 08ffab01e76c..94b1e356c02a 100644
--- a/include/linux/init_task.h
+++ b/include/linux/init_task.h
@@ -184,7 +184,6 @@ extern struct cred init_cred;
 		[PIDTYPE_SID]  = INIT_PID_LINK(PIDTYPE_SID),		\
 	},								\
 	.thread_group	= LIST_HEAD_INIT(tsk.thread_group),		\
-	.dirties = INIT_PROP_LOCAL_SINGLE(dirties),			\
 	INIT_IDS							\
 	INIT_PERF_EVENTS(tsk)						\
 	INIT_TRACE_IRQFLAGS						\
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 68daf4f27e2c..1c4f3e9b9bc5 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1521,7 +1521,6 @@ struct task_struct {
 #ifdef CONFIG_FAULT_INJECTION
 	int make_it_fail;
 #endif
-	struct prop_local_single dirties;
 	/*
 	 * when (nr_dirtied >= nr_dirtied_pause), it's time to call
 	 * balance_dirty_pages() for some dirty throttling pause
diff --git a/kernel/fork.c b/kernel/fork.c
index ba0d17261329..da4a6a10d088 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -162,7 +162,6 @@ static void account_kernel_stack(struct thread_info *ti, int account)
 
 void free_task(struct task_struct *tsk)
 {
-	prop_local_destroy_single(&tsk->dirties);
 	account_kernel_stack(tsk->stack, -1);
 	free_thread_info(tsk->stack);
 	rt_mutex_debug_task_free(tsk);
@@ -274,10 +273,6 @@ static struct task_struct *dup_task_struct(struct task_struct *orig)
 
 	tsk->stack = ti;
 
-	err = prop_local_init_single(&tsk->dirties);
-	if (err)
-		goto out;
-
 	setup_thread_stack(tsk, orig);
 	clear_user_return_notifier(tsk);
 	clear_tsk_need_resched(tsk);
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index e7cb5ff6e53d..71252486bc6f 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -128,7 +128,6 @@ unsigned long global_dirty_limit;
  *
  */
 static struct prop_descriptor vm_completions;
-static struct prop_descriptor vm_dirties;
 
 /*
  * couple the period to the dirty_ratio:
@@ -154,7 +153,6 @@ static void update_completion_period(void)
 {
 	int shift = calc_period_shift();
 	prop_change_shift(&vm_completions, shift);
-	prop_change_shift(&vm_dirties, shift);
 
 	writeback_set_ratelimit();
 }
@@ -235,11 +233,6 @@ void bdi_writeout_inc(struct backing_dev_info *bdi)
 }
 EXPORT_SYMBOL_GPL(bdi_writeout_inc);
 
-void task_dirty_inc(struct task_struct *tsk)
-{
-	prop_inc_single(&vm_dirties, &tsk->dirties);
-}
-
 /*
  * Obtain an accurate fraction of the BDI's portion.
  */
@@ -1395,7 +1388,6 @@ void __init page_writeback_init(void)
 
 	shift = calc_period_shift();
 	prop_descriptor_init(&vm_completions, shift);
-	prop_descriptor_init(&vm_dirties, shift);
 }
 
 /**
@@ -1724,7 +1716,6 @@ void account_page_dirtied(struct page *page, struct address_space *mapping)
 		__inc_zone_page_state(page, NR_DIRTIED);
 		__inc_bdi_stat(mapping->backing_dev_info, BDI_RECLAIMABLE);
 		__inc_bdi_stat(mapping->backing_dev_info, BDI_DIRTIED);
-		task_dirty_inc(current);
 		task_io_account_write(PAGE_CACHE_SIZE);
 	}
 }
-- 
cgit v1.2.3-71-gd317


From f6f8285132907757ef84ef8dae0a1244b8cde6ac Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Thu, 17 Nov 2011 12:58:07 -0800
Subject: pstore: pass allocated memory region back to caller

The buf_lock cannot be held while populating the inodes, so make the backend
pass forward an allocated and filled buffer instead. This solves the following
backtrace. The effect is that "buf" is only ever used to notify the backends
that something was written to it, and shouldn't be used in the read path.

To replace the buf_lock during the read path, isolate the open/read/close
loop with a separate mutex to maintain serialized access to the backend.

Note that is is up to the pstore backend to cope if the (*write)() path is
called in the middle of the read path.

[   59.691019] BUG: sleeping function called from invalid context at .../mm/slub.c:847
[   59.691019] in_atomic(): 0, irqs_disabled(): 1, pid: 1819, name: mount
[   59.691019] Pid: 1819, comm: mount Not tainted 3.0.8 #1
[   59.691019] Call Trace:
[   59.691019]  [<810252d5>] __might_sleep+0xc3/0xca
[   59.691019]  [<810a26e6>] kmem_cache_alloc+0x32/0xf3
[   59.691019]  [<810b53ac>] ? __d_lookup_rcu+0x6f/0xf4
[   59.691019]  [<810b68b1>] alloc_inode+0x2a/0x64
[   59.691019]  [<810b6903>] new_inode+0x18/0x43
[   59.691019]  [<81142447>] pstore_get_inode.isra.1+0x11/0x98
[   59.691019]  [<81142623>] pstore_mkfile+0xae/0x26f
[   59.691019]  [<810a2a66>] ? kmem_cache_free+0x19/0xb1
[   59.691019]  [<8116c821>] ? ida_get_new_above+0x140/0x158
[   59.691019]  [<811708ea>] ? __init_rwsem+0x1e/0x2c
[   59.691019]  [<810b67e8>] ? inode_init_always+0x111/0x1b0
[   59.691019]  [<8102127e>] ? should_resched+0xd/0x27
[   59.691019]  [<8137977f>] ? _cond_resched+0xd/0x21
[   59.691019]  [<81142abf>] pstore_get_records+0x52/0xa7
[   59.691019]  [<8114254b>] pstore_fill_super+0x7d/0x91
[   59.691019]  [<810a7ff5>] mount_single+0x46/0x82
[   59.691019]  [<8114231a>] pstore_mount+0x15/0x17
[   59.691019]  [<811424ce>] ? pstore_get_inode.isra.1+0x98/0x98
[   59.691019]  [<810a8199>] mount_fs+0x5a/0x12d
[   59.691019]  [<810b9174>] ? alloc_vfsmnt+0xa4/0x14a
[   59.691019]  [<810b9474>] vfs_kern_mount+0x4f/0x7d
[   59.691019]  [<810b9d7e>] do_kern_mount+0x34/0xb2
[   59.691019]  [<810bb15f>] do_mount+0x5fc/0x64a
[   59.691019]  [<810912fb>] ? strndup_user+0x2e/0x3f
[   59.691019]  [<810bb3cb>] sys_mount+0x66/0x99
[   59.691019]  [<8137b537>] sysenter_do_call+0x12/0x26

Signed-off-by: Kees Cook <keescook@chromium.org>
Signed-off-by: Tony Luck <tony.luck@intel.com>
---
 drivers/acpi/apei/erst.c   | 31 ++++++++++++++++++++++---------
 drivers/firmware/efivars.c |  9 +++++++--
 fs/pstore/platform.c       | 13 ++++++++-----
 include/linux/pstore.h     |  4 +++-
 4 files changed, 40 insertions(+), 17 deletions(-)

(limited to 'include')

diff --git a/drivers/acpi/apei/erst.c b/drivers/acpi/apei/erst.c
index 127408069ca7..631b9477b99c 100644
--- a/drivers/acpi/apei/erst.c
+++ b/drivers/acpi/apei/erst.c
@@ -932,7 +932,8 @@ static int erst_check_table(struct acpi_table_erst *erst_tab)
 static int erst_open_pstore(struct pstore_info *psi);
 static int erst_close_pstore(struct pstore_info *psi);
 static ssize_t erst_reader(u64 *id, enum pstore_type_id *type,
-			   struct timespec *time, struct pstore_info *psi);
+			   struct timespec *time, char **buf,
+			   struct pstore_info *psi);
 static int erst_writer(enum pstore_type_id type, u64 *id, unsigned int part,
 		       size_t size, struct pstore_info *psi);
 static int erst_clearer(enum pstore_type_id type, u64 id,
@@ -986,17 +987,23 @@ static int erst_close_pstore(struct pstore_info *psi)
 }
 
 static ssize_t erst_reader(u64 *id, enum pstore_type_id *type,
-			   struct timespec *time, struct pstore_info *psi)
+			   struct timespec *time, char **buf,
+			   struct pstore_info *psi)
 {
 	int rc;
 	ssize_t len = 0;
 	u64 record_id;
-	struct cper_pstore_record *rcd = (struct cper_pstore_record *)
-					(erst_info.buf - sizeof(*rcd));
+	struct cper_pstore_record *rcd;
+	size_t rcd_len = sizeof(*rcd) + erst_info.bufsize;
 
 	if (erst_disable)
 		return -ENODEV;
 
+	rcd = kmalloc(rcd_len, GFP_KERNEL);
+	if (!rcd) {
+		rc = -ENOMEM;
+		goto out;
+	}
 skip:
 	rc = erst_get_record_id_next(&reader_pos, &record_id);
 	if (rc)
@@ -1004,22 +1011,27 @@ skip:
 
 	/* no more record */
 	if (record_id == APEI_ERST_INVALID_RECORD_ID) {
-		rc = -1;
+		rc = -EINVAL;
 		goto out;
 	}
 
-	len = erst_read(record_id, &rcd->hdr, sizeof(*rcd) +
-			erst_info.bufsize);
+	len = erst_read(record_id, &rcd->hdr, rcd_len);
 	/* The record may be cleared by others, try read next record */
 	if (len == -ENOENT)
 		goto skip;
-	else if (len < 0) {
-		rc = -1;
+	else if (len < sizeof(*rcd)) {
+		rc = -EIO;
 		goto out;
 	}
 	if (uuid_le_cmp(rcd->hdr.creator_id, CPER_CREATOR_PSTORE) != 0)
 		goto skip;
 
+	*buf = kmalloc(len, GFP_KERNEL);
+	if (*buf == NULL) {
+		rc = -ENOMEM;
+		goto out;
+	}
+	memcpy(*buf, rcd->data, len - sizeof(*rcd));
 	*id = record_id;
 	if (uuid_le_cmp(rcd->sec_hdr.section_type,
 			CPER_SECTION_TYPE_DMESG) == 0)
@@ -1037,6 +1049,7 @@ skip:
 	time->tv_nsec = 0;
 
 out:
+	kfree(rcd);
 	return (rc < 0) ? rc : (len - sizeof(*rcd));
 }
 
diff --git a/drivers/firmware/efivars.c b/drivers/firmware/efivars.c
index 8370f72d87ff..a54a6b972ced 100644
--- a/drivers/firmware/efivars.c
+++ b/drivers/firmware/efivars.c
@@ -457,7 +457,8 @@ static int efi_pstore_close(struct pstore_info *psi)
 }
 
 static ssize_t efi_pstore_read(u64 *id, enum pstore_type_id *type,
-			       struct timespec *timespec, struct pstore_info *psi)
+			       struct timespec *timespec,
+			       char **buf, struct pstore_info *psi)
 {
 	efi_guid_t vendor = LINUX_EFI_CRASH_GUID;
 	struct efivars *efivars = psi->data;
@@ -478,7 +479,11 @@ static ssize_t efi_pstore_read(u64 *id, enum pstore_type_id *type,
 				timespec->tv_nsec = 0;
 				get_var_data_locked(efivars, &efivars->walk_entry->var);
 				size = efivars->walk_entry->var.DataSize;
-				memcpy(psi->buf, efivars->walk_entry->var.Data, size);
+				*buf = kmalloc(size, GFP_KERNEL);
+				if (*buf == NULL)
+					return -ENOMEM;
+				memcpy(*buf, efivars->walk_entry->var.Data,
+				       size);
 				efivars->walk_entry = list_entry(efivars->walk_entry->list.next,
 					           struct efivar_entry, list);
 				return size;
diff --git a/fs/pstore/platform.c b/fs/pstore/platform.c
index 2bd620f0d796..57bbf9078ac8 100644
--- a/fs/pstore/platform.c
+++ b/fs/pstore/platform.c
@@ -167,6 +167,7 @@ int pstore_register(struct pstore_info *psi)
 	}
 
 	psinfo = psi;
+	mutex_init(&psinfo->read_mutex);
 	spin_unlock(&pstore_lock);
 
 	if (owner && !try_module_get(owner)) {
@@ -195,30 +196,32 @@ EXPORT_SYMBOL_GPL(pstore_register);
 void pstore_get_records(int quiet)
 {
 	struct pstore_info *psi = psinfo;
+	char			*buf = NULL;
 	ssize_t			size;
 	u64			id;
 	enum pstore_type_id	type;
 	struct timespec		time;
 	int			failed = 0, rc;
-	unsigned long		flags;
 
 	if (!psi)
 		return;
 
-	spin_lock_irqsave(&psinfo->buf_lock, flags);
+	mutex_lock(&psi->read_mutex);
 	rc = psi->open(psi);
 	if (rc)
 		goto out;
 
-	while ((size = psi->read(&id, &type, &time, psi)) > 0) {
-		rc = pstore_mkfile(type, psi->name, id, psi->buf, (size_t)size,
+	while ((size = psi->read(&id, &type, &time, &buf, psi)) > 0) {
+		rc = pstore_mkfile(type, psi->name, id, buf, (size_t)size,
 				  time, psi);
+		kfree(buf);
+		buf = NULL;
 		if (rc && (rc != -EEXIST || !quiet))
 			failed++;
 	}
 	psi->close(psi);
 out:
-	spin_unlock_irqrestore(&psinfo->buf_lock, flags);
+	mutex_unlock(&psi->read_mutex);
 
 	if (failed)
 		printk(KERN_WARNING "pstore: failed to load %d record(s) from '%s'\n",
diff --git a/include/linux/pstore.h b/include/linux/pstore.h
index ea567321ae3c..2ca8cde5459d 100644
--- a/include/linux/pstore.h
+++ b/include/linux/pstore.h
@@ -35,10 +35,12 @@ struct pstore_info {
 	spinlock_t	buf_lock;	/* serialize access to 'buf' */
 	char		*buf;
 	size_t		bufsize;
+	struct mutex	read_mutex;	/* serialize open/read/close */
 	int		(*open)(struct pstore_info *psi);
 	int		(*close)(struct pstore_info *psi);
 	ssize_t		(*read)(u64 *id, enum pstore_type_id *type,
-			struct timespec *time, struct pstore_info *psi);
+			struct timespec *time, char **buf,
+			struct pstore_info *psi);
 	int		(*write)(enum pstore_type_id type, u64 *id,
 			unsigned int part, size_t size, struct pstore_info *psi);
 	int		(*erase)(enum pstore_type_id type, u64 id,
-- 
cgit v1.2.3-71-gd317


From e70f224c1938af208b64b02c5cec27889fefcaec Mon Sep 17 00:00:00 2001
From: Marek Olšák <maraeo@gmail.com>
Date: Tue, 25 Oct 2011 01:38:45 +0200
Subject: drm/radeon/kms: add a CS ioctl flag not to rewrite tiling flags in
 the CS
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This adds a new optional chunk to the CS ioctl that specifies optional flags
to the CS parser. Why this is useful is explained below. Note that some regs
no longer need the NOP relocation packet if this feature is enabled.
Tested on r300g and r600g with this flag disabled and enabled.

Assume there are two contexts sharing the same mipmapped tiled texture.
One context wants to render into the first mipmap and the other one
wants to render into the last mipmap. As you probably know, the hardware
has a MACRO_SWITCH feature, which turns off macro tiling for small mipmaps,
but that only applies to samplers.
(at least on r300-r500, though later hardware likely behaves the same)

So we want to just re-set the tiling flags before rendering (writing
packets), right? ... No. The contexts run in parallel, so they may
set the tiling flags simultaneously and then fire their command streams
also simultaneously. The last one setting the flags wins, the other one
loses.

Another problem is when one context wants to render into the first and
the last mipmap in one CS. Impossible. It must flush before changing
tiling flags and do the rendering into the smaller mipmaps in another CS.

Yet another problem is that writing copy_blit in userspace would be a mess
involving re-setting tiling flags to please the kernel, and causing races
with other contexts at the same time.

The only way out of this is to send tiling flags with each CS, ideally
with each relocation. But we already do that through the registers.
So let's just use what we have in the registers.

Signed-off-by: Marek Olšák <maraeo@gmail.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 drivers/gpu/drm/radeon/evergreen_cs.c | 92 ++++++++++++++++++----------------
 drivers/gpu/drm/radeon/r300.c         | 94 +++++++++++++++++++----------------
 drivers/gpu/drm/radeon/r600_cs.c      | 26 ++++++----
 drivers/gpu/drm/radeon/radeon.h       |  3 +-
 drivers/gpu/drm/radeon/radeon_cs.c    | 11 +++-
 drivers/gpu/drm/radeon/radeon_drv.c   |  3 +-
 include/drm/radeon_drm.h              |  4 ++
 7 files changed, 135 insertions(+), 98 deletions(-)

(limited to 'include')

diff --git a/drivers/gpu/drm/radeon/evergreen_cs.c b/drivers/gpu/drm/radeon/evergreen_cs.c
index 7fdfa8ea7570..38e1bda73d33 100644
--- a/drivers/gpu/drm/radeon/evergreen_cs.c
+++ b/drivers/gpu/drm/radeon/evergreen_cs.c
@@ -480,21 +480,23 @@ static int evergreen_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx)
 		}
 		break;
 	case DB_Z_INFO:
-		r = evergreen_cs_packet_next_reloc(p, &reloc);
-		if (r) {
-			dev_warn(p->dev, "bad SET_CONTEXT_REG "
-					"0x%04X\n", reg);
-			return -EINVAL;
-		}
 		track->db_z_info = radeon_get_ib_value(p, idx);
-		ib[idx] &= ~Z_ARRAY_MODE(0xf);
-		track->db_z_info &= ~Z_ARRAY_MODE(0xf);
-		if (reloc->lobj.tiling_flags & RADEON_TILING_MACRO) {
-			ib[idx] |= Z_ARRAY_MODE(ARRAY_2D_TILED_THIN1);
-			track->db_z_info |= Z_ARRAY_MODE(ARRAY_2D_TILED_THIN1);
-		} else {
-			ib[idx] |= Z_ARRAY_MODE(ARRAY_1D_TILED_THIN1);
-			track->db_z_info |= Z_ARRAY_MODE(ARRAY_1D_TILED_THIN1);
+		if (!p->keep_tiling_flags) {
+			r = evergreen_cs_packet_next_reloc(p, &reloc);
+			if (r) {
+				dev_warn(p->dev, "bad SET_CONTEXT_REG "
+						"0x%04X\n", reg);
+				return -EINVAL;
+			}
+			ib[idx] &= ~Z_ARRAY_MODE(0xf);
+			track->db_z_info &= ~Z_ARRAY_MODE(0xf);
+			if (reloc->lobj.tiling_flags & RADEON_TILING_MACRO) {
+				ib[idx] |= Z_ARRAY_MODE(ARRAY_2D_TILED_THIN1);
+				track->db_z_info |= Z_ARRAY_MODE(ARRAY_2D_TILED_THIN1);
+			} else {
+				ib[idx] |= Z_ARRAY_MODE(ARRAY_1D_TILED_THIN1);
+				track->db_z_info |= Z_ARRAY_MODE(ARRAY_1D_TILED_THIN1);
+			}
 		}
 		break;
 	case DB_STENCIL_INFO:
@@ -607,40 +609,44 @@ static int evergreen_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx)
 	case CB_COLOR5_INFO:
 	case CB_COLOR6_INFO:
 	case CB_COLOR7_INFO:
-		r = evergreen_cs_packet_next_reloc(p, &reloc);
-		if (r) {
-			dev_warn(p->dev, "bad SET_CONTEXT_REG "
-					"0x%04X\n", reg);
-			return -EINVAL;
-		}
 		tmp = (reg - CB_COLOR0_INFO) / 0x3c;
 		track->cb_color_info[tmp] = radeon_get_ib_value(p, idx);
-		if (reloc->lobj.tiling_flags & RADEON_TILING_MACRO) {
-			ib[idx] |= CB_ARRAY_MODE(ARRAY_2D_TILED_THIN1);
-			track->cb_color_info[tmp] |= CB_ARRAY_MODE(ARRAY_2D_TILED_THIN1);
-		} else if (reloc->lobj.tiling_flags & RADEON_TILING_MICRO) {
-			ib[idx] |= CB_ARRAY_MODE(ARRAY_1D_TILED_THIN1);
-			track->cb_color_info[tmp] |= CB_ARRAY_MODE(ARRAY_1D_TILED_THIN1);
+		if (!p->keep_tiling_flags) {
+			r = evergreen_cs_packet_next_reloc(p, &reloc);
+			if (r) {
+				dev_warn(p->dev, "bad SET_CONTEXT_REG "
+						"0x%04X\n", reg);
+				return -EINVAL;
+			}
+			if (reloc->lobj.tiling_flags & RADEON_TILING_MACRO) {
+				ib[idx] |= CB_ARRAY_MODE(ARRAY_2D_TILED_THIN1);
+				track->cb_color_info[tmp] |= CB_ARRAY_MODE(ARRAY_2D_TILED_THIN1);
+			} else if (reloc->lobj.tiling_flags & RADEON_TILING_MICRO) {
+				ib[idx] |= CB_ARRAY_MODE(ARRAY_1D_TILED_THIN1);
+				track->cb_color_info[tmp] |= CB_ARRAY_MODE(ARRAY_1D_TILED_THIN1);
+			}
 		}
 		break;
 	case CB_COLOR8_INFO:
 	case CB_COLOR9_INFO:
 	case CB_COLOR10_INFO:
 	case CB_COLOR11_INFO:
-		r = evergreen_cs_packet_next_reloc(p, &reloc);
-		if (r) {
-			dev_warn(p->dev, "bad SET_CONTEXT_REG "
-					"0x%04X\n", reg);
-			return -EINVAL;
-		}
 		tmp = ((reg - CB_COLOR8_INFO) / 0x1c) + 8;
 		track->cb_color_info[tmp] = radeon_get_ib_value(p, idx);
-		if (reloc->lobj.tiling_flags & RADEON_TILING_MACRO) {
-			ib[idx] |= CB_ARRAY_MODE(ARRAY_2D_TILED_THIN1);
-			track->cb_color_info[tmp] |= CB_ARRAY_MODE(ARRAY_2D_TILED_THIN1);
-		} else if (reloc->lobj.tiling_flags & RADEON_TILING_MICRO) {
-			ib[idx] |= CB_ARRAY_MODE(ARRAY_1D_TILED_THIN1);
-			track->cb_color_info[tmp] |= CB_ARRAY_MODE(ARRAY_1D_TILED_THIN1);
+		if (!p->keep_tiling_flags) {
+			r = evergreen_cs_packet_next_reloc(p, &reloc);
+			if (r) {
+				dev_warn(p->dev, "bad SET_CONTEXT_REG "
+						"0x%04X\n", reg);
+				return -EINVAL;
+			}
+			if (reloc->lobj.tiling_flags & RADEON_TILING_MACRO) {
+				ib[idx] |= CB_ARRAY_MODE(ARRAY_2D_TILED_THIN1);
+				track->cb_color_info[tmp] |= CB_ARRAY_MODE(ARRAY_2D_TILED_THIN1);
+			} else if (reloc->lobj.tiling_flags & RADEON_TILING_MICRO) {
+				ib[idx] |= CB_ARRAY_MODE(ARRAY_1D_TILED_THIN1);
+				track->cb_color_info[tmp] |= CB_ARRAY_MODE(ARRAY_1D_TILED_THIN1);
+			}
 		}
 		break;
 	case CB_COLOR0_PITCH:
@@ -1311,10 +1317,12 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p,
 					return -EINVAL;
 				}
 				ib[idx+1+(i*8)+2] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff);
-				if (reloc->lobj.tiling_flags & RADEON_TILING_MACRO)
-					ib[idx+1+(i*8)+1] |= TEX_ARRAY_MODE(ARRAY_2D_TILED_THIN1);
-				else if (reloc->lobj.tiling_flags & RADEON_TILING_MICRO)
-					ib[idx+1+(i*8)+1] |= TEX_ARRAY_MODE(ARRAY_1D_TILED_THIN1);
+				if (!p->keep_tiling_flags) {
+					if (reloc->lobj.tiling_flags & RADEON_TILING_MACRO)
+						ib[idx+1+(i*8)+1] |= TEX_ARRAY_MODE(ARRAY_2D_TILED_THIN1);
+					else if (reloc->lobj.tiling_flags & RADEON_TILING_MICRO)
+						ib[idx+1+(i*8)+1] |= TEX_ARRAY_MODE(ARRAY_1D_TILED_THIN1);
+				}
 				texture = reloc->robj;
 				/* tex mip base */
 				r = evergreen_cs_packet_next_reloc(p, &reloc);
diff --git a/drivers/gpu/drm/radeon/r300.c b/drivers/gpu/drm/radeon/r300.c
index 400b26df652a..c93bc64707e1 100644
--- a/drivers/gpu/drm/radeon/r300.c
+++ b/drivers/gpu/drm/radeon/r300.c
@@ -701,16 +701,21 @@ static int r300_packet0_check(struct radeon_cs_parser *p,
 			return r;
 		}
 
-		if (reloc->lobj.tiling_flags & RADEON_TILING_MACRO)
-			tile_flags |= R300_TXO_MACRO_TILE;
-		if (reloc->lobj.tiling_flags & RADEON_TILING_MICRO)
-			tile_flags |= R300_TXO_MICRO_TILE;
-		else if (reloc->lobj.tiling_flags & RADEON_TILING_MICRO_SQUARE)
-			tile_flags |= R300_TXO_MICRO_TILE_SQUARE;
-
-		tmp = idx_value + ((u32)reloc->lobj.gpu_offset);
-		tmp |= tile_flags;
-		ib[idx] = tmp;
+		if (p->keep_tiling_flags) {
+			ib[idx] = (idx_value & 31) | /* keep the 1st 5 bits */
+				  ((idx_value & ~31) + (u32)reloc->lobj.gpu_offset);
+		} else {
+			if (reloc->lobj.tiling_flags & RADEON_TILING_MACRO)
+				tile_flags |= R300_TXO_MACRO_TILE;
+			if (reloc->lobj.tiling_flags & RADEON_TILING_MICRO)
+				tile_flags |= R300_TXO_MICRO_TILE;
+			else if (reloc->lobj.tiling_flags & RADEON_TILING_MICRO_SQUARE)
+				tile_flags |= R300_TXO_MICRO_TILE_SQUARE;
+
+			tmp = idx_value + ((u32)reloc->lobj.gpu_offset);
+			tmp |= tile_flags;
+			ib[idx] = tmp;
+		}
 		track->textures[i].robj = reloc->robj;
 		track->tex_dirty = true;
 		break;
@@ -760,24 +765,26 @@ static int r300_packet0_check(struct radeon_cs_parser *p,
 		/* RB3D_COLORPITCH1 */
 		/* RB3D_COLORPITCH2 */
 		/* RB3D_COLORPITCH3 */
-		r = r100_cs_packet_next_reloc(p, &reloc);
-		if (r) {
-			DRM_ERROR("No reloc for ib[%d]=0x%04X\n",
-				  idx, reg);
-			r100_cs_dump_packet(p, pkt);
-			return r;
-		}
+		if (!p->keep_tiling_flags) {
+			r = r100_cs_packet_next_reloc(p, &reloc);
+			if (r) {
+				DRM_ERROR("No reloc for ib[%d]=0x%04X\n",
+					  idx, reg);
+				r100_cs_dump_packet(p, pkt);
+				return r;
+			}
 
-		if (reloc->lobj.tiling_flags & RADEON_TILING_MACRO)
-			tile_flags |= R300_COLOR_TILE_ENABLE;
-		if (reloc->lobj.tiling_flags & RADEON_TILING_MICRO)
-			tile_flags |= R300_COLOR_MICROTILE_ENABLE;
-		else if (reloc->lobj.tiling_flags & RADEON_TILING_MICRO_SQUARE)
-			tile_flags |= R300_COLOR_MICROTILE_SQUARE_ENABLE;
+			if (reloc->lobj.tiling_flags & RADEON_TILING_MACRO)
+				tile_flags |= R300_COLOR_TILE_ENABLE;
+			if (reloc->lobj.tiling_flags & RADEON_TILING_MICRO)
+				tile_flags |= R300_COLOR_MICROTILE_ENABLE;
+			else if (reloc->lobj.tiling_flags & RADEON_TILING_MICRO_SQUARE)
+				tile_flags |= R300_COLOR_MICROTILE_SQUARE_ENABLE;
 
-		tmp = idx_value & ~(0x7 << 16);
-		tmp |= tile_flags;
-		ib[idx] = tmp;
+			tmp = idx_value & ~(0x7 << 16);
+			tmp |= tile_flags;
+			ib[idx] = tmp;
+		}
 		i = (reg - 0x4E38) >> 2;
 		track->cb[i].pitch = idx_value & 0x3FFE;
 		switch (((idx_value >> 21) & 0xF)) {
@@ -843,25 +850,26 @@ static int r300_packet0_check(struct radeon_cs_parser *p,
 		break;
 	case 0x4F24:
 		/* ZB_DEPTHPITCH */
-		r = r100_cs_packet_next_reloc(p, &reloc);
-		if (r) {
-			DRM_ERROR("No reloc for ib[%d]=0x%04X\n",
-				  idx, reg);
-			r100_cs_dump_packet(p, pkt);
-			return r;
-		}
-
-		if (reloc->lobj.tiling_flags & RADEON_TILING_MACRO)
-			tile_flags |= R300_DEPTHMACROTILE_ENABLE;
-		if (reloc->lobj.tiling_flags & RADEON_TILING_MICRO)
-			tile_flags |= R300_DEPTHMICROTILE_TILED;
-		else if (reloc->lobj.tiling_flags & RADEON_TILING_MICRO_SQUARE)
-			tile_flags |= R300_DEPTHMICROTILE_TILED_SQUARE;
+		if (!p->keep_tiling_flags) {
+			r = r100_cs_packet_next_reloc(p, &reloc);
+			if (r) {
+				DRM_ERROR("No reloc for ib[%d]=0x%04X\n",
+					  idx, reg);
+				r100_cs_dump_packet(p, pkt);
+				return r;
+			}
 
-		tmp = idx_value & ~(0x7 << 16);
-		tmp |= tile_flags;
-		ib[idx] = tmp;
+			if (reloc->lobj.tiling_flags & RADEON_TILING_MACRO)
+				tile_flags |= R300_DEPTHMACROTILE_ENABLE;
+			if (reloc->lobj.tiling_flags & RADEON_TILING_MICRO)
+				tile_flags |= R300_DEPTHMICROTILE_TILED;
+			else if (reloc->lobj.tiling_flags & RADEON_TILING_MICRO_SQUARE)
+				tile_flags |= R300_DEPTHMICROTILE_TILED_SQUARE;
 
+			tmp = idx_value & ~(0x7 << 16);
+			tmp |= tile_flags;
+			ib[idx] = tmp;
+		}
 		track->zb.pitch = idx_value & 0x3FFC;
 		track->zb_dirty = true;
 		break;
diff --git a/drivers/gpu/drm/radeon/r600_cs.c b/drivers/gpu/drm/radeon/r600_cs.c
index 0a2e023c1557..cb1acffd2430 100644
--- a/drivers/gpu/drm/radeon/r600_cs.c
+++ b/drivers/gpu/drm/radeon/r600_cs.c
@@ -941,7 +941,8 @@ static int r600_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx)
 		track->db_depth_control = radeon_get_ib_value(p, idx);
 		break;
 	case R_028010_DB_DEPTH_INFO:
-		if (r600_cs_packet_next_is_pkt3_nop(p)) {
+		if (!p->keep_tiling_flags &&
+		    r600_cs_packet_next_is_pkt3_nop(p)) {
 			r = r600_cs_packet_next_reloc(p, &reloc);
 			if (r) {
 				dev_warn(p->dev, "bad SET_CONTEXT_REG "
@@ -992,7 +993,8 @@ static int r600_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx)
 	case R_0280B4_CB_COLOR5_INFO:
 	case R_0280B8_CB_COLOR6_INFO:
 	case R_0280BC_CB_COLOR7_INFO:
-		if (r600_cs_packet_next_is_pkt3_nop(p)) {
+		if (!p->keep_tiling_flags &&
+		     r600_cs_packet_next_is_pkt3_nop(p)) {
 			r = r600_cs_packet_next_reloc(p, &reloc);
 			if (r) {
 				dev_err(p->dev, "bad SET_CONTEXT_REG 0x%04X\n", reg);
@@ -1291,10 +1293,12 @@ static int r600_check_texture_resource(struct radeon_cs_parser *p,  u32 idx,
 	mip_offset <<= 8;
 
 	word0 = radeon_get_ib_value(p, idx + 0);
-	if (tiling_flags & RADEON_TILING_MACRO)
-		word0 |= S_038000_TILE_MODE(V_038000_ARRAY_2D_TILED_THIN1);
-	else if (tiling_flags & RADEON_TILING_MICRO)
-		word0 |= S_038000_TILE_MODE(V_038000_ARRAY_1D_TILED_THIN1);
+	if (!p->keep_tiling_flags) {
+		if (tiling_flags & RADEON_TILING_MACRO)
+			word0 |= S_038000_TILE_MODE(V_038000_ARRAY_2D_TILED_THIN1);
+		else if (tiling_flags & RADEON_TILING_MICRO)
+			word0 |= S_038000_TILE_MODE(V_038000_ARRAY_1D_TILED_THIN1);
+	}
 	word1 = radeon_get_ib_value(p, idx + 1);
 	w0 = G_038000_TEX_WIDTH(word0) + 1;
 	h0 = G_038004_TEX_HEIGHT(word1) + 1;
@@ -1621,10 +1625,12 @@ static int r600_packet3_check(struct radeon_cs_parser *p,
 					return -EINVAL;
 				}
 				base_offset = (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff);
-				if (reloc->lobj.tiling_flags & RADEON_TILING_MACRO)
-					ib[idx+1+(i*7)+0] |= S_038000_TILE_MODE(V_038000_ARRAY_2D_TILED_THIN1);
-				else if (reloc->lobj.tiling_flags & RADEON_TILING_MICRO)
-					ib[idx+1+(i*7)+0] |= S_038000_TILE_MODE(V_038000_ARRAY_1D_TILED_THIN1);
+				if (!p->keep_tiling_flags) {
+					if (reloc->lobj.tiling_flags & RADEON_TILING_MACRO)
+						ib[idx+1+(i*7)+0] |= S_038000_TILE_MODE(V_038000_ARRAY_2D_TILED_THIN1);
+					else if (reloc->lobj.tiling_flags & RADEON_TILING_MICRO)
+						ib[idx+1+(i*7)+0] |= S_038000_TILE_MODE(V_038000_ARRAY_1D_TILED_THIN1);
+				}
 				texture = reloc->robj;
 				/* tex mip base */
 				r = r600_cs_packet_next_reloc(p, &reloc);
diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h
index fc5a1d642cb5..8227e76b5c70 100644
--- a/drivers/gpu/drm/radeon/radeon.h
+++ b/drivers/gpu/drm/radeon/radeon.h
@@ -611,7 +611,8 @@ struct radeon_cs_parser {
 	struct radeon_ib	*ib;
 	void			*track;
 	unsigned		family;
-	int parser_error;
+	int			parser_error;
+	bool			keep_tiling_flags;
 };
 
 extern int radeon_cs_update_pages(struct radeon_cs_parser *p, int pg_idx);
diff --git a/drivers/gpu/drm/radeon/radeon_cs.c b/drivers/gpu/drm/radeon/radeon_cs.c
index ccaa243c1442..29afd71e0840 100644
--- a/drivers/gpu/drm/radeon/radeon_cs.c
+++ b/drivers/gpu/drm/radeon/radeon_cs.c
@@ -93,7 +93,7 @@ int radeon_cs_parser_init(struct radeon_cs_parser *p, void *data)
 {
 	struct drm_radeon_cs *cs = data;
 	uint64_t *chunk_array_ptr;
-	unsigned size, i;
+	unsigned size, i, flags = 0;
 
 	if (!cs->num_chunks) {
 		return 0;
@@ -140,6 +140,10 @@ int radeon_cs_parser_init(struct radeon_cs_parser *p, void *data)
 			if (p->chunks[i].length_dw == 0)
 				return -EINVAL;
 		}
+		if (p->chunks[i].chunk_id == RADEON_CHUNK_ID_FLAGS &&
+		    !p->chunks[i].length_dw) {
+			return -EINVAL;
+		}
 
 		p->chunks[i].length_dw = user_chunk.length_dw;
 		p->chunks[i].user_ptr = (void __user *)(unsigned long)user_chunk.chunk_data;
@@ -155,6 +159,9 @@ int radeon_cs_parser_init(struct radeon_cs_parser *p, void *data)
 					       p->chunks[i].user_ptr, size)) {
 				return -EFAULT;
 			}
+			if (p->chunks[i].chunk_id == RADEON_CHUNK_ID_FLAGS) {
+				flags = p->chunks[i].kdata[0];
+			}
 		} else {
 			p->chunks[i].kpage[0] = kmalloc(PAGE_SIZE, GFP_KERNEL);
 			p->chunks[i].kpage[1] = kmalloc(PAGE_SIZE, GFP_KERNEL);
@@ -174,6 +181,8 @@ int radeon_cs_parser_init(struct radeon_cs_parser *p, void *data)
 			  p->chunks[p->chunk_ib_idx].length_dw);
 		return -EINVAL;
 	}
+
+	p->keep_tiling_flags = (flags & RADEON_CS_KEEP_TILING_FLAGS) != 0;
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/radeon/radeon_drv.c b/drivers/gpu/drm/radeon/radeon_drv.c
index a0b35e909489..71499fc3daf5 100644
--- a/drivers/gpu/drm/radeon/radeon_drv.c
+++ b/drivers/gpu/drm/radeon/radeon_drv.c
@@ -53,9 +53,10 @@
  *   2.9.0 - r600 tiling (s3tc,rgtc) working, SET_PREDICATION packet 3 on r600 + eg, backend query
  *   2.10.0 - fusion 2D tiling
  *   2.11.0 - backend map, initial compute support for the CS checker
+ *   2.12.0 - RADEON_CS_KEEP_TILING_FLAGS
  */
 #define KMS_DRIVER_MAJOR	2
-#define KMS_DRIVER_MINOR	11
+#define KMS_DRIVER_MINOR	12
 #define KMS_DRIVER_PATCHLEVEL	0
 int radeon_driver_load_kms(struct drm_device *dev, unsigned long flags);
 int radeon_driver_unload_kms(struct drm_device *dev);
diff --git a/include/drm/radeon_drm.h b/include/drm/radeon_drm.h
index b65be6054a18..be94be6d6f17 100644
--- a/include/drm/radeon_drm.h
+++ b/include/drm/radeon_drm.h
@@ -874,6 +874,10 @@ struct drm_radeon_gem_pwrite {
 
 #define RADEON_CHUNK_ID_RELOCS	0x01
 #define RADEON_CHUNK_ID_IB	0x02
+#define RADEON_CHUNK_ID_FLAGS	0x03
+
+/* The first dword of RADEON_CHUNK_ID_FLAGS is a uint32 of these flags: */
+#define RADEON_CS_KEEP_TILING_FLAGS 0x01
 
 struct drm_radeon_cs_chunk {
 	uint32_t		chunk_id;
-- 
cgit v1.2.3-71-gd317


From 70e9942f17a6193e9172a804e6569a8806633d6b Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Tue, 22 Nov 2011 00:16:51 +0100
Subject: netfilter: nf_conntrack: make event callback registration per-netns

This patch fixes an oops that can be triggered following this recipe:

0) make sure nf_conntrack_netlink and nf_conntrack_ipv4 are loaded.
1) container is started.
2) connect to it via lxc-console.
3) generate some traffic with the container to create some conntrack
   entries in its table.
4) stop the container: you hit one oops because the conntrack table
   cleanup tries to report the destroy event to user-space but the
   per-netns nfnetlink socket has already gone (as the nfnetlink
   socket is per-netns but event callback registration is global).

To fix this situation, we make the ctnl_notifier per-netns so the
callback is registered/unregistered if the container is
created/destroyed.

Alex Bligh and Alexey Dobriyan originally proposed one small patch to
check if the nfnetlink socket is gone in nfnetlink_has_listeners,
but this is a very visited path for events, thus, it may reduce
performance and it looks a bit hackish to check for the nfnetlink
socket only to workaround this situation. As a result, I decided
to follow the bigger path choice, which seems to look nicer to me.

Cc: Alexey Dobriyan <adobriyan@gmail.com>
Reported-by: Alex Bligh <alex@alex.org.uk>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_conntrack_ecache.h | 19 ++++----
 include/net/netns/conntrack.h               |  2 +
 net/netfilter/nf_conntrack_ecache.c         | 37 +++++++--------
 net/netfilter/nf_conntrack_netlink.c        | 73 ++++++++++++++++++++---------
 4 files changed, 82 insertions(+), 49 deletions(-)

(limited to 'include')

diff --git a/include/net/netfilter/nf_conntrack_ecache.h b/include/net/netfilter/nf_conntrack_ecache.h
index 4283508b3e18..a88fb6939387 100644
--- a/include/net/netfilter/nf_conntrack_ecache.h
+++ b/include/net/netfilter/nf_conntrack_ecache.h
@@ -67,18 +67,18 @@ struct nf_ct_event_notifier {
 	int (*fcn)(unsigned int events, struct nf_ct_event *item);
 };
 
-extern struct nf_ct_event_notifier __rcu *nf_conntrack_event_cb;
-extern int nf_conntrack_register_notifier(struct nf_ct_event_notifier *nb);
-extern void nf_conntrack_unregister_notifier(struct nf_ct_event_notifier *nb);
+extern int nf_conntrack_register_notifier(struct net *net, struct nf_ct_event_notifier *nb);
+extern void nf_conntrack_unregister_notifier(struct net *net, struct nf_ct_event_notifier *nb);
 
 extern void nf_ct_deliver_cached_events(struct nf_conn *ct);
 
 static inline void
 nf_conntrack_event_cache(enum ip_conntrack_events event, struct nf_conn *ct)
 {
+	struct net *net = nf_ct_net(ct);
 	struct nf_conntrack_ecache *e;
 
-	if (nf_conntrack_event_cb == NULL)
+	if (net->ct.nf_conntrack_event_cb == NULL)
 		return;
 
 	e = nf_ct_ecache_find(ct);
@@ -95,11 +95,12 @@ nf_conntrack_eventmask_report(unsigned int eventmask,
 			      int report)
 {
 	int ret = 0;
+	struct net *net = nf_ct_net(ct);
 	struct nf_ct_event_notifier *notify;
 	struct nf_conntrack_ecache *e;
 
 	rcu_read_lock();
-	notify = rcu_dereference(nf_conntrack_event_cb);
+	notify = rcu_dereference(net->ct.nf_conntrack_event_cb);
 	if (notify == NULL)
 		goto out_unlock;
 
@@ -164,9 +165,8 @@ struct nf_exp_event_notifier {
 	int (*fcn)(unsigned int events, struct nf_exp_event *item);
 };
 
-extern struct nf_exp_event_notifier __rcu *nf_expect_event_cb;
-extern int nf_ct_expect_register_notifier(struct nf_exp_event_notifier *nb);
-extern void nf_ct_expect_unregister_notifier(struct nf_exp_event_notifier *nb);
+extern int nf_ct_expect_register_notifier(struct net *net, struct nf_exp_event_notifier *nb);
+extern void nf_ct_expect_unregister_notifier(struct net *net, struct nf_exp_event_notifier *nb);
 
 static inline void
 nf_ct_expect_event_report(enum ip_conntrack_expect_events event,
@@ -174,11 +174,12 @@ nf_ct_expect_event_report(enum ip_conntrack_expect_events event,
 			  u32 pid,
 			  int report)
 {
+	struct net *net = nf_ct_exp_net(exp);
 	struct nf_exp_event_notifier *notify;
 	struct nf_conntrack_ecache *e;
 
 	rcu_read_lock();
-	notify = rcu_dereference(nf_expect_event_cb);
+	notify = rcu_dereference(net->ct.nf_expect_event_cb);
 	if (notify == NULL)
 		goto out_unlock;
 
diff --git a/include/net/netns/conntrack.h b/include/net/netns/conntrack.h
index 0249399e51a7..7a911eca0f18 100644
--- a/include/net/netns/conntrack.h
+++ b/include/net/netns/conntrack.h
@@ -18,6 +18,8 @@ struct netns_ct {
 	struct hlist_nulls_head	unconfirmed;
 	struct hlist_nulls_head	dying;
 	struct ip_conntrack_stat __percpu *stat;
+	struct nf_ct_event_notifier __rcu *nf_conntrack_event_cb;
+	struct nf_exp_event_notifier __rcu *nf_expect_event_cb;
 	int			sysctl_events;
 	unsigned int		sysctl_events_retry_timeout;
 	int			sysctl_acct;
diff --git a/net/netfilter/nf_conntrack_ecache.c b/net/netfilter/nf_conntrack_ecache.c
index 6b368be937c6..b62c4148b921 100644
--- a/net/netfilter/nf_conntrack_ecache.c
+++ b/net/netfilter/nf_conntrack_ecache.c
@@ -27,22 +27,17 @@
 
 static DEFINE_MUTEX(nf_ct_ecache_mutex);
 
-struct nf_ct_event_notifier __rcu *nf_conntrack_event_cb __read_mostly;
-EXPORT_SYMBOL_GPL(nf_conntrack_event_cb);
-
-struct nf_exp_event_notifier __rcu *nf_expect_event_cb __read_mostly;
-EXPORT_SYMBOL_GPL(nf_expect_event_cb);
-
 /* deliver cached events and clear cache entry - must be called with locally
  * disabled softirqs */
 void nf_ct_deliver_cached_events(struct nf_conn *ct)
 {
+	struct net *net = nf_ct_net(ct);
 	unsigned long events;
 	struct nf_ct_event_notifier *notify;
 	struct nf_conntrack_ecache *e;
 
 	rcu_read_lock();
-	notify = rcu_dereference(nf_conntrack_event_cb);
+	notify = rcu_dereference(net->ct.nf_conntrack_event_cb);
 	if (notify == NULL)
 		goto out_unlock;
 
@@ -83,19 +78,20 @@ out_unlock:
 }
 EXPORT_SYMBOL_GPL(nf_ct_deliver_cached_events);
 
-int nf_conntrack_register_notifier(struct nf_ct_event_notifier *new)
+int nf_conntrack_register_notifier(struct net *net,
+				   struct nf_ct_event_notifier *new)
 {
 	int ret = 0;
 	struct nf_ct_event_notifier *notify;
 
 	mutex_lock(&nf_ct_ecache_mutex);
-	notify = rcu_dereference_protected(nf_conntrack_event_cb,
+	notify = rcu_dereference_protected(net->ct.nf_conntrack_event_cb,
 					   lockdep_is_held(&nf_ct_ecache_mutex));
 	if (notify != NULL) {
 		ret = -EBUSY;
 		goto out_unlock;
 	}
-	RCU_INIT_POINTER(nf_conntrack_event_cb, new);
+	RCU_INIT_POINTER(net->ct.nf_conntrack_event_cb, new);
 	mutex_unlock(&nf_ct_ecache_mutex);
 	return ret;
 
@@ -105,32 +101,34 @@ out_unlock:
 }
 EXPORT_SYMBOL_GPL(nf_conntrack_register_notifier);
 
-void nf_conntrack_unregister_notifier(struct nf_ct_event_notifier *new)
+void nf_conntrack_unregister_notifier(struct net *net,
+				      struct nf_ct_event_notifier *new)
 {
 	struct nf_ct_event_notifier *notify;
 
 	mutex_lock(&nf_ct_ecache_mutex);
-	notify = rcu_dereference_protected(nf_conntrack_event_cb,
+	notify = rcu_dereference_protected(net->ct.nf_conntrack_event_cb,
 					   lockdep_is_held(&nf_ct_ecache_mutex));
 	BUG_ON(notify != new);
-	RCU_INIT_POINTER(nf_conntrack_event_cb, NULL);
+	RCU_INIT_POINTER(net->ct.nf_conntrack_event_cb, NULL);
 	mutex_unlock(&nf_ct_ecache_mutex);
 }
 EXPORT_SYMBOL_GPL(nf_conntrack_unregister_notifier);
 
-int nf_ct_expect_register_notifier(struct nf_exp_event_notifier *new)
+int nf_ct_expect_register_notifier(struct net *net,
+				   struct nf_exp_event_notifier *new)
 {
 	int ret = 0;
 	struct nf_exp_event_notifier *notify;
 
 	mutex_lock(&nf_ct_ecache_mutex);
-	notify = rcu_dereference_protected(nf_expect_event_cb,
+	notify = rcu_dereference_protected(net->ct.nf_expect_event_cb,
 					   lockdep_is_held(&nf_ct_ecache_mutex));
 	if (notify != NULL) {
 		ret = -EBUSY;
 		goto out_unlock;
 	}
-	RCU_INIT_POINTER(nf_expect_event_cb, new);
+	RCU_INIT_POINTER(net->ct.nf_expect_event_cb, new);
 	mutex_unlock(&nf_ct_ecache_mutex);
 	return ret;
 
@@ -140,15 +138,16 @@ out_unlock:
 }
 EXPORT_SYMBOL_GPL(nf_ct_expect_register_notifier);
 
-void nf_ct_expect_unregister_notifier(struct nf_exp_event_notifier *new)
+void nf_ct_expect_unregister_notifier(struct net *net,
+				      struct nf_exp_event_notifier *new)
 {
 	struct nf_exp_event_notifier *notify;
 
 	mutex_lock(&nf_ct_ecache_mutex);
-	notify = rcu_dereference_protected(nf_expect_event_cb,
+	notify = rcu_dereference_protected(net->ct.nf_expect_event_cb,
 					   lockdep_is_held(&nf_ct_ecache_mutex));
 	BUG_ON(notify != new);
-	RCU_INIT_POINTER(nf_expect_event_cb, NULL);
+	RCU_INIT_POINTER(net->ct.nf_expect_event_cb, NULL);
 	mutex_unlock(&nf_ct_ecache_mutex);
 }
 EXPORT_SYMBOL_GPL(nf_ct_expect_unregister_notifier);
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index e58aa9b1fe8a..ef21b221f036 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -4,7 +4,7 @@
  * (C) 2001 by Jay Schulist <jschlst@samba.org>
  * (C) 2002-2006 by Harald Welte <laforge@gnumonks.org>
  * (C) 2003 by Patrick Mchardy <kaber@trash.net>
- * (C) 2005-2008 by Pablo Neira Ayuso <pablo@netfilter.org>
+ * (C) 2005-2011 by Pablo Neira Ayuso <pablo@netfilter.org>
  *
  * Initial connection tracking via netlink development funded and
  * generally made possible by Network Robots, Inc. (www.networkrobots.com)
@@ -2163,6 +2163,54 @@ MODULE_ALIAS("ip_conntrack_netlink");
 MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_CTNETLINK);
 MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_CTNETLINK_EXP);
 
+static int __net_init ctnetlink_net_init(struct net *net)
+{
+#ifdef CONFIG_NF_CONNTRACK_EVENTS
+	int ret;
+
+	ret = nf_conntrack_register_notifier(net, &ctnl_notifier);
+	if (ret < 0) {
+		pr_err("ctnetlink_init: cannot register notifier.\n");
+		goto err_out;
+	}
+
+	ret = nf_ct_expect_register_notifier(net, &ctnl_notifier_exp);
+	if (ret < 0) {
+		pr_err("ctnetlink_init: cannot expect register notifier.\n");
+		goto err_unreg_notifier;
+	}
+#endif
+	return 0;
+
+#ifdef CONFIG_NF_CONNTRACK_EVENTS
+err_unreg_notifier:
+	nf_conntrack_unregister_notifier(net, &ctnl_notifier);
+err_out:
+	return ret;
+#endif
+}
+
+static void ctnetlink_net_exit(struct net *net)
+{
+#ifdef CONFIG_NF_CONNTRACK_EVENTS
+	nf_ct_expect_unregister_notifier(net, &ctnl_notifier_exp);
+	nf_conntrack_unregister_notifier(net, &ctnl_notifier);
+#endif
+}
+
+static void __net_exit ctnetlink_net_exit_batch(struct list_head *net_exit_list)
+{
+	struct net *net;
+
+	list_for_each_entry(net, net_exit_list, exit_list)
+		ctnetlink_net_exit(net);
+}
+
+static struct pernet_operations ctnetlink_net_ops = {
+	.init		= ctnetlink_net_init,
+	.exit_batch	= ctnetlink_net_exit_batch,
+};
+
 static int __init ctnetlink_init(void)
 {
 	int ret;
@@ -2180,28 +2228,15 @@ static int __init ctnetlink_init(void)
 		goto err_unreg_subsys;
 	}
 
-#ifdef CONFIG_NF_CONNTRACK_EVENTS
-	ret = nf_conntrack_register_notifier(&ctnl_notifier);
-	if (ret < 0) {
-		pr_err("ctnetlink_init: cannot register notifier.\n");
+	if (register_pernet_subsys(&ctnetlink_net_ops)) {
+		pr_err("ctnetlink_init: cannot register pernet operations\n");
 		goto err_unreg_exp_subsys;
 	}
 
-	ret = nf_ct_expect_register_notifier(&ctnl_notifier_exp);
-	if (ret < 0) {
-		pr_err("ctnetlink_init: cannot expect register notifier.\n");
-		goto err_unreg_notifier;
-	}
-#endif
-
 	return 0;
 
-#ifdef CONFIG_NF_CONNTRACK_EVENTS
-err_unreg_notifier:
-	nf_conntrack_unregister_notifier(&ctnl_notifier);
 err_unreg_exp_subsys:
 	nfnetlink_subsys_unregister(&ctnl_exp_subsys);
-#endif
 err_unreg_subsys:
 	nfnetlink_subsys_unregister(&ctnl_subsys);
 err_out:
@@ -2213,11 +2248,7 @@ static void __exit ctnetlink_exit(void)
 	pr_info("ctnetlink: unregistering from nfnetlink.\n");
 
 	nf_ct_remove_userspace_expectations();
-#ifdef CONFIG_NF_CONNTRACK_EVENTS
-	nf_ct_expect_unregister_notifier(&ctnl_notifier_exp);
-	nf_conntrack_unregister_notifier(&ctnl_notifier);
-#endif
-
+	unregister_pernet_subsys(&ctnetlink_net_ops);
 	nfnetlink_subsys_unregister(&ctnl_exp_subsys);
 	nfnetlink_subsys_unregister(&ctnl_subsys);
 }
-- 
cgit v1.2.3-71-gd317


From a5cd335165e31db9dbab636fd29895d41da55dd2 Mon Sep 17 00:00:00 2001
From: Xi Wang <xi.wang@gmail.com>
Date: Wed, 23 Nov 2011 01:12:01 -0500
Subject: drm: integer overflow in drm_mode_dirtyfb_ioctl()

There is a potential integer overflow in drm_mode_dirtyfb_ioctl()
if userspace passes in a large num_clips.  The call to kmalloc would
allocate a small buffer, and the call to fb->funcs->dirty may result
in a memory corruption.

Reported-by: Haogang Chen <haogangchen@gmail.com>
Signed-off-by: Xi Wang <xi.wang@gmail.com>
Cc: stable@kernel.org
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 drivers/gpu/drm/drm_crtc.c | 4 ++++
 include/drm/drm_mode.h     | 2 ++
 2 files changed, 6 insertions(+)

(limited to 'include')

diff --git a/drivers/gpu/drm/drm_crtc.c b/drivers/gpu/drm/drm_crtc.c
index 405c63b9d539..8323fc389840 100644
--- a/drivers/gpu/drm/drm_crtc.c
+++ b/drivers/gpu/drm/drm_crtc.c
@@ -1873,6 +1873,10 @@ int drm_mode_dirtyfb_ioctl(struct drm_device *dev,
 	}
 
 	if (num_clips && clips_ptr) {
+		if (num_clips < 0 || num_clips > DRM_MODE_FB_DIRTY_MAX_CLIPS) {
+			ret = -EINVAL;
+			goto out_err1;
+		}
 		clips = kzalloc(num_clips * sizeof(*clips), GFP_KERNEL);
 		if (!clips) {
 			ret = -ENOMEM;
diff --git a/include/drm/drm_mode.h b/include/drm/drm_mode.h
index d30bedfeb7ef..ddd46db65b57 100644
--- a/include/drm/drm_mode.h
+++ b/include/drm/drm_mode.h
@@ -235,6 +235,8 @@ struct drm_mode_fb_cmd {
 #define DRM_MODE_FB_DIRTY_ANNOTATE_FILL 0x02
 #define DRM_MODE_FB_DIRTY_FLAGS         0x03
 
+#define DRM_MODE_FB_DIRTY_MAX_CLIPS     256
+
 /*
  * Mark a region of a framebuffer as dirty.
  *
-- 
cgit v1.2.3-71-gd317


From 67820021dc9c8da37f773025190280f55f3626d4 Mon Sep 17 00:00:00 2001
From: Jean Delvare <khali@linux-fr.org>
Date: Wed, 23 Nov 2011 11:33:07 +0100
Subject: i2c: Delete ANY_I2C_BUS

Last piece of code using ANY_I2C_BUS was deleted almost 2 years ago,
so ANY_I2C_BUS can go away as well.

Signed-off-by: Jean Delvare <khali@linux-fr.org>
---
 include/linux/i2c.h | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/i2c.h b/include/linux/i2c.h
index a81bf6d23b3e..07d103a06d64 100644
--- a/include/linux/i2c.h
+++ b/include/linux/i2c.h
@@ -432,9 +432,6 @@ void i2c_unlock_adapter(struct i2c_adapter *);
 /* Internal numbers to terminate lists */
 #define I2C_CLIENT_END		0xfffeU
 
-/* The numbers to use to set I2C bus address */
-#define ANY_I2C_BUS		0xffff
-
 /* Construct an I2C_CLIENT_END-terminated array of i2c addresses */
 #define I2C_ADDRS(addr, addrs...) \
 	((const unsigned short []){ addr, ## addrs, I2C_CLIENT_END })
-- 
cgit v1.2.3-71-gd317


From 780dc9ba4eb682a89be48d5b814feae6722a19e0 Mon Sep 17 00:00:00 2001
From: Afzal Mohammed <afzal@ti.com>
Date: Tue, 8 Nov 2011 18:54:10 +0530
Subject: regulator: TPS65910: Fix VDD1/2 voltage selector count

Count of selector voltage is required for regulator_set_voltage
to work via set_voltage_sel. VDD1/2 currently have it as zero,
so regulator_set_voltage won't work for VDD1/2.
Update count (n_voltages) for VDD1/2.

Output Voltage = (step value * 12.5 mV + 562.5 mV) * gain

With above expr, number of voltages that can be selected is
step value count * gain count

constant for gain count will be called VDD1_2_NUM_VOLT_COARSE

existing constant for step value count is VDD1_2_NUM_VOLTS,
use VDD1_2_NUM_VOLT_FINE instead to make clear that step value
is not the only component in deciding selectable voltage count

Signed-off-by: Afzal Mohammed <afzal@ti.com>
Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
---
 drivers/regulator/tps65910-regulator.c | 14 ++++++++------
 include/linux/mfd/tps65910.h           |  3 ++-
 2 files changed, 10 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/drivers/regulator/tps65910-regulator.c b/drivers/regulator/tps65910-regulator.c
index 66d2d60b436a..b552aae55b41 100644
--- a/drivers/regulator/tps65910-regulator.c
+++ b/drivers/regulator/tps65910-regulator.c
@@ -664,10 +664,10 @@ static int tps65910_set_voltage_dcdc(struct regulator_dev *dev,
 
 	switch (id) {
 	case TPS65910_REG_VDD1:
-		dcdc_mult = (selector / VDD1_2_NUM_VOLTS) + 1;
+		dcdc_mult = (selector / VDD1_2_NUM_VOLT_FINE) + 1;
 		if (dcdc_mult == 1)
 			dcdc_mult--;
-		vsel = (selector % VDD1_2_NUM_VOLTS) + 3;
+		vsel = (selector % VDD1_2_NUM_VOLT_FINE) + 3;
 
 		tps65910_modify_bits(pmic, TPS65910_VDD1,
 				(dcdc_mult << VDD1_VGAIN_SEL_SHIFT),
@@ -675,10 +675,10 @@ static int tps65910_set_voltage_dcdc(struct regulator_dev *dev,
 		tps65910_reg_write(pmic, TPS65910_VDD1_OP, vsel);
 		break;
 	case TPS65910_REG_VDD2:
-		dcdc_mult = (selector / VDD1_2_NUM_VOLTS) + 1;
+		dcdc_mult = (selector / VDD1_2_NUM_VOLT_FINE) + 1;
 		if (dcdc_mult == 1)
 			dcdc_mult--;
-		vsel = (selector % VDD1_2_NUM_VOLTS) + 3;
+		vsel = (selector % VDD1_2_NUM_VOLT_FINE) + 3;
 
 		tps65910_modify_bits(pmic, TPS65910_VDD2,
 				(dcdc_mult << VDD2_VGAIN_SEL_SHIFT),
@@ -756,9 +756,9 @@ static int tps65910_list_voltage_dcdc(struct regulator_dev *dev,
 	switch (id) {
 	case TPS65910_REG_VDD1:
 	case TPS65910_REG_VDD2:
-		mult = (selector / VDD1_2_NUM_VOLTS) + 1;
+		mult = (selector / VDD1_2_NUM_VOLT_FINE) + 1;
 		volt = VDD1_2_MIN_VOLT +
-				(selector % VDD1_2_NUM_VOLTS) * VDD1_2_OFFSET;
+				(selector % VDD1_2_NUM_VOLT_FINE) * VDD1_2_OFFSET;
 		break;
 	case TPS65911_REG_VDDCTRL:
 		volt = VDDCTRL_MIN_VOLT + (selector * VDDCTRL_OFFSET);
@@ -947,6 +947,8 @@ static __devinit int tps65910_probe(struct platform_device *pdev)
 
 		if (i == TPS65910_REG_VDD1 || i == TPS65910_REG_VDD2) {
 			pmic->desc[i].ops = &tps65910_ops_dcdc;
+			pmic->desc[i].n_voltages = VDD1_2_NUM_VOLT_FINE *
+							VDD1_2_NUM_VOLT_COARSE;
 		} else if (i == TPS65910_REG_VDD3) {
 			if (tps65910_chip_id(tps65910) == TPS65910)
 				pmic->desc[i].ops = &tps65910_ops_vdd3;
diff --git a/include/linux/mfd/tps65910.h b/include/linux/mfd/tps65910.h
index 82b4c8801a4f..8bf2cb9502dd 100644
--- a/include/linux/mfd/tps65910.h
+++ b/include/linux/mfd/tps65910.h
@@ -243,7 +243,8 @@
 
 
 /*Registers VDD1, VDD2 voltage values definitions */
-#define VDD1_2_NUM_VOLTS				73
+#define VDD1_2_NUM_VOLT_FINE				73
+#define VDD1_2_NUM_VOLT_COARSE				3
 #define VDD1_2_MIN_VOLT					6000
 #define VDD1_2_OFFSET					125
 
-- 
cgit v1.2.3-71-gd317


From fe1a7fe2c4456679b3402f04268bdfafca7b127a Mon Sep 17 00:00:00 2001
From: Sasha Levin <levinsasha928@gmail.com>
Date: Tue, 15 Nov 2011 16:17:18 +0200
Subject: virtio-mmio: Correct the name of the guest features selector

Guest features selector spelling mistake.

Cc: Pawel Moll <pawel.moll@arm.com>
Cc: Rusty Russell <rusty@rustcorp.com.au>
Cc: virtualization@lists.linux-foundation.org
Signed-off-by: Sasha Levin <levinsasha928@gmail.com>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
---
 drivers/virtio/virtio_mmio.c | 2 +-
 include/linux/virtio_mmio.h  | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/drivers/virtio/virtio_mmio.c b/drivers/virtio/virtio_mmio.c
index acc5e43c373e..7317dc2ec426 100644
--- a/drivers/virtio/virtio_mmio.c
+++ b/drivers/virtio/virtio_mmio.c
@@ -118,7 +118,7 @@ static void vm_finalize_features(struct virtio_device *vdev)
 	vring_transport_features(vdev);
 
 	for (i = 0; i < ARRAY_SIZE(vdev->features); i++) {
-		writel(i, vm_dev->base + VIRTIO_MMIO_GUEST_FEATURES_SET);
+		writel(i, vm_dev->base + VIRTIO_MMIO_GUEST_FEATURES_SEL);
 		writel(vdev->features[i],
 				vm_dev->base + VIRTIO_MMIO_GUEST_FEATURES);
 	}
diff --git a/include/linux/virtio_mmio.h b/include/linux/virtio_mmio.h
index 27c7edefbc86..5c7b6f0daef8 100644
--- a/include/linux/virtio_mmio.h
+++ b/include/linux/virtio_mmio.h
@@ -63,7 +63,7 @@
 #define VIRTIO_MMIO_GUEST_FEATURES	0x020
 
 /* Activated features set selector - Write Only */
-#define VIRTIO_MMIO_GUEST_FEATURES_SET	0x024
+#define VIRTIO_MMIO_GUEST_FEATURES_SEL	0x024
 
 /* Guest's memory page size in bytes - Write Only */
 #define VIRTIO_MMIO_GUEST_PAGE_SIZE	0x028
-- 
cgit v1.2.3-71-gd317


From e6af578c5305be693a1bc7f4dc7b51dd82d41425 Mon Sep 17 00:00:00 2001
From: "Michael S. Tsirkin" <mst@redhat.com>
Date: Thu, 17 Nov 2011 17:41:15 +0200
Subject: virtio-pci: make reset operation safer

virtio pci device reset actually just does an I/O
write, which in PCI is really posted, that is it
can complete on CPU before the device has received it.

Further, interrupts might have been pending on
another CPU, so device callback might get invoked after reset.

This conflicts with how drivers use reset, which is typically:
	reset
	unregister
a callback running after reset completed can race with
unregister, potentially leading to use after free bugs.

Fix by flushing out the write, and flushing pending interrupts.

This assumes that device is never reset from
its vq/config callbacks, or in parallel with being
added/removed, document this assumption.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
---
 drivers/virtio/virtio_pci.c   | 18 ++++++++++++++++++
 include/linux/virtio_config.h |  2 ++
 2 files changed, 20 insertions(+)

(limited to 'include')

diff --git a/drivers/virtio/virtio_pci.c b/drivers/virtio/virtio_pci.c
index 3d1bf41e8892..03d1984bd363 100644
--- a/drivers/virtio/virtio_pci.c
+++ b/drivers/virtio/virtio_pci.c
@@ -169,11 +169,29 @@ static void vp_set_status(struct virtio_device *vdev, u8 status)
 	iowrite8(status, vp_dev->ioaddr + VIRTIO_PCI_STATUS);
 }
 
+/* wait for pending irq handlers */
+static void vp_synchronize_vectors(struct virtio_device *vdev)
+{
+	struct virtio_pci_device *vp_dev = to_vp_device(vdev);
+	int i;
+
+	if (vp_dev->intx_enabled)
+		synchronize_irq(vp_dev->pci_dev->irq);
+
+	for (i = 0; i < vp_dev->msix_vectors; ++i)
+		synchronize_irq(vp_dev->msix_entries[i].vector);
+}
+
 static void vp_reset(struct virtio_device *vdev)
 {
 	struct virtio_pci_device *vp_dev = to_vp_device(vdev);
 	/* 0 status means a reset. */
 	iowrite8(0, vp_dev->ioaddr + VIRTIO_PCI_STATUS);
+	/* Flush out the status write, and flush in device writes,
+	 * including MSi-X interrupts, if any. */
+	ioread8(vp_dev->ioaddr + VIRTIO_PCI_STATUS);
+	/* Flush pending VQ/configuration callbacks. */
+	vp_synchronize_vectors(vdev);
 }
 
 /* the notify function used when creating a virt queue */
diff --git a/include/linux/virtio_config.h b/include/linux/virtio_config.h
index add4790b21fe..e9e72bda1b72 100644
--- a/include/linux/virtio_config.h
+++ b/include/linux/virtio_config.h
@@ -85,6 +85,8 @@
  * @reset: reset the device
  *	vdev: the virtio device
  *	After this, status and feature negotiation must be done again
+ *	Device must not be reset from its vq/config callbacks, or in
+ *	parallel with being added/removed.
  * @find_vqs: find virtqueues and instantiate them.
  *	vdev: the virtio_device
  *	nvqs: the number of virtqueues to find
-- 
cgit v1.2.3-71-gd317


From de68dca1816660b0d3ac89fa59ffb410007a143f Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Sat, 26 Nov 2011 12:13:44 +0000
Subject: inet: add a redirect generation id in inetpeer
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Now inetpeer is the place where we cache redirect information for ipv4
destinations, we must be able to invalidate informations when a route is
added/removed on host.

As inetpeer is not yet namespace aware, this patch adds a shared
redirect_genid, and a per inetpeer redirect_genid. This might be changed
later if inetpeer becomes ns aware.

Cache information for one inerpeer is valid as long as its
redirect_genid has the same value than global redirect_genid.

Reported-by: Arkadiusz Miśkiewicz <a.miskiewicz@gmail.com>
Tested-by: Arkadiusz Miśkiewicz <a.miskiewicz@gmail.com>
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/inetpeer.h |  1 +
 net/ipv4/route.c       | 10 +++++++++-
 2 files changed, 10 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/inetpeer.h b/include/net/inetpeer.h
index 78c83e62218f..e9ff3fc5e688 100644
--- a/include/net/inetpeer.h
+++ b/include/net/inetpeer.h
@@ -35,6 +35,7 @@ struct inet_peer {
 
 	u32			metrics[RTAX_MAX];
 	u32			rate_tokens;	/* rate limiting for ICMP */
+	int			redirect_genid;
 	unsigned long		rate_last;
 	unsigned long		pmtu_expires;
 	u32			pmtu_orig;
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index fb47c8f0cd86..5c2847247f51 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -131,6 +131,7 @@ static int ip_rt_mtu_expires __read_mostly	= 10 * 60 * HZ;
 static int ip_rt_min_pmtu __read_mostly		= 512 + 20 + 20;
 static int ip_rt_min_advmss __read_mostly	= 256;
 static int rt_chain_length_max __read_mostly	= 20;
+static int redirect_genid;
 
 /*
  *	Interface to generic destination cache.
@@ -837,6 +838,7 @@ static void rt_cache_invalidate(struct net *net)
 
 	get_random_bytes(&shuffle, sizeof(shuffle));
 	atomic_add(shuffle + 1U, &net->ipv4.rt_genid);
+	redirect_genid++;
 }
 
 /*
@@ -1391,8 +1393,10 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw,
 
 				peer = rt->peer;
 				if (peer) {
-					if (peer->redirect_learned.a4 != new_gw) {
+					if (peer->redirect_learned.a4 != new_gw ||
+					    peer->redirect_genid != redirect_genid) {
 						peer->redirect_learned.a4 = new_gw;
+						peer->redirect_genid = redirect_genid;
 						atomic_inc(&__rt_peer_genid);
 					}
 					check_peer_redir(&rt->dst, peer);
@@ -1701,6 +1705,8 @@ static struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie)
 		if (peer) {
 			check_peer_pmtu(dst, peer);
 
+			if (peer->redirect_genid != redirect_genid)
+				peer->redirect_learned.a4 = 0;
 			if (peer->redirect_learned.a4 &&
 			    peer->redirect_learned.a4 != rt->rt_gateway) {
 				if (check_peer_redir(dst, peer))
@@ -1857,6 +1863,8 @@ static void rt_init_metrics(struct rtable *rt, const struct flowi4 *fl4,
 		dst_init_metrics(&rt->dst, peer->metrics, false);
 
 		check_peer_pmtu(&rt->dst, peer);
+		if (peer->redirect_genid != redirect_genid)
+			peer->redirect_learned.a4 = 0;
 		if (peer->redirect_learned.a4 &&
 		    peer->redirect_learned.a4 != rt->rt_gateway) {
 			rt->rt_gateway = peer->redirect_learned.a4;
-- 
cgit v1.2.3-71-gd317


From f7bc83d87d242917ca0ee041ed509f57f361dd56 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rjw@sisk.pl>
Date: Wed, 23 Nov 2011 21:20:32 +0100
Subject: PM: Update comments describing device power management callbacks

The comments describing device power management callbacks in
include/pm.h are outdated and somewhat confusing, so make them
reflect the reality more accurately.

Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
---
 include/linux/pm.h | 229 +++++++++++++++++++++++++++++++----------------------
 1 file changed, 134 insertions(+), 95 deletions(-)

(limited to 'include')

diff --git a/include/linux/pm.h b/include/linux/pm.h
index 5c4c8b18c8b7..3f3ed83a9aa5 100644
--- a/include/linux/pm.h
+++ b/include/linux/pm.h
@@ -54,118 +54,145 @@ typedef struct pm_message {
 /**
  * struct dev_pm_ops - device PM callbacks
  *
- * Several driver power state transitions are externally visible, affecting
+ * Several device power state transitions are externally visible, affecting
  * the state of pending I/O queues and (for drivers that touch hardware)
  * interrupts, wakeups, DMA, and other hardware state.  There may also be
- * internal transitions to various low power modes, which are transparent
+ * internal transitions to various low-power modes which are transparent
  * to the rest of the driver stack (such as a driver that's ON gating off
  * clocks which are not in active use).
  *
- * The externally visible transitions are handled with the help of the following
- * callbacks included in this structure:
- *
- * @prepare: Prepare the device for the upcoming transition, but do NOT change
- *	its hardware state.  Prevent new children of the device from being
- *	registered after @prepare() returns (the driver's subsystem and
- *	generally the rest of the kernel is supposed to prevent new calls to the
- *	probe method from being made too once @prepare() has succeeded).  If
- *	@prepare() detects a situation it cannot handle (e.g. registration of a
- *	child already in progress), it may return -EAGAIN, so that the PM core
- *	can execute it once again (e.g. after the new child has been registered)
- *	to recover from the race condition.  This method is executed for all
- *	kinds of suspend transitions and is followed by one of the suspend
- *	callbacks: @suspend(), @freeze(), or @poweroff().
- *	The PM core executes @prepare() for all devices before starting to
- *	execute suspend callbacks for any of them, so drivers may assume all of
- *	the other devices to be present and functional while @prepare() is being
- *	executed.  In particular, it is safe to make GFP_KERNEL memory
- *	allocations from within @prepare().  However, drivers may NOT assume
- *	anything about the availability of the user space at that time and it
- *	is not correct to request firmware from within @prepare() (it's too
- *	late to do that).  [To work around this limitation, drivers may
- *	register suspend and hibernation notifiers that are executed before the
- *	freezing of tasks.]
+ * The externally visible transitions are handled with the help of callbacks
+ * included in this structure in such a way that two levels of callbacks are
+ * involved.  First, the PM core executes callbacks provided by PM domains,
+ * device types, classes and bus types.  They are the subsystem-level callbacks
+ * supposed to execute callbacks provided by device drivers, although they may
+ * choose not to do that.  If the driver callbacks are executed, they have to
+ * collaborate with the subsystem-level callbacks to achieve the goals
+ * appropriate for the given system transition, given transition phase and the
+ * subsystem the device belongs to.
+ *
+ * @prepare: The principal role of this callback is to prevent new children of
+ *	the device from being registered after it has returned (the driver's
+ *	subsystem and generally the rest of the kernel is supposed to prevent
+ *	new calls to the probe method from being made too once @prepare() has
+ *	succeeded).  If @prepare() detects a situation it cannot handle (e.g.
+ *	registration of a child already in progress), it may return -EAGAIN, so
+ *	that the PM core can execute it once again (e.g. after a new child has
+ *	been registered) to recover from the race condition.
+ *	This method is executed for all kinds of suspend transitions and is
+ *	followed by one of the suspend callbacks: @suspend(), @freeze(), or
+ *	@poweroff().  The PM core executes subsystem-level @prepare() for all
+ *	devices before starting to invoke suspend callbacks for any of them, so
+ *	generally devices may be assumed to be functional or to respond to
+ *	runtime resume requests while @prepare() is being executed.  However,
+ *	device drivers may NOT assume anything about the availability of user
+ *	space at that time and it is NOT valid to request firmware from within
+ *	@prepare() (it's too late to do that).  It also is NOT valid to allocate
+ *	substantial amounts of memory from @prepare() in the GFP_KERNEL mode.
+ *	[To work around these limitations, drivers may register suspend and
+ *	hibernation notifiers to be executed before the freezing of tasks.]
  *
  * @complete: Undo the changes made by @prepare().  This method is executed for
  *	all kinds of resume transitions, following one of the resume callbacks:
  *	@resume(), @thaw(), @restore().  Also called if the state transition
- *	fails before the driver's suspend callback (@suspend(), @freeze(),
- *	@poweroff()) can be executed (e.g. if the suspend callback fails for one
+ *	fails before the driver's suspend callback: @suspend(), @freeze() or
+ *	@poweroff(), can be executed (e.g. if the suspend callback fails for one
  *	of the other devices that the PM core has unsuccessfully attempted to
  *	suspend earlier).
- *	The PM core executes @complete() after it has executed the appropriate
- *	resume callback for all devices.
+ *	The PM core executes subsystem-level @complete() after it has executed
+ *	the appropriate resume callbacks for all devices.
  *
  * @suspend: Executed before putting the system into a sleep state in which the
- *	contents of main memory are preserved.  Quiesce the device, put it into
- *	a low power state appropriate for the upcoming system state (such as
- *	PCI_D3hot), and enable wakeup events as appropriate.
+ *	contents of main memory are preserved.  The exact action to perform
+ *	depends on the device's subsystem (PM domain, device type, class or bus
+ *	type), but generally the device must be quiescent after subsystem-level
+ *	@suspend() has returned, so that it doesn't do any I/O or DMA.
+ *	Subsystem-level @suspend() is executed for all devices after invoking
+ *	subsystem-level @prepare() for all of them.
  *
  * @resume: Executed after waking the system up from a sleep state in which the
- *	contents of main memory were preserved.  Put the device into the
- *	appropriate state, according to the information saved in memory by the
- *	preceding @suspend().  The driver starts working again, responding to
- *	hardware events and software requests.  The hardware may have gone
- *	through a power-off reset, or it may have maintained state from the
- *	previous suspend() which the driver may rely on while resuming.  On most
- *	platforms, there are no restrictions on availability of resources like
- *	clocks during @resume().
+ *	contents of main memory were preserved.  The exact action to perform
+ *	depends on the device's subsystem, but generally the driver is expected
+ *	to start working again, responding to hardware events and software
+ *	requests (the device itself may be left in a low-power state, waiting
+ *	for a runtime resume to occur).  The state of the device at the time its
+ *	driver's @resume() callback is run depends on the platform and subsystem
+ *	the device belongs to.  On most platforms, there are no restrictions on
+ *	availability of resources like clocks during @resume().
+ *	Subsystem-level @resume() is executed for all devices after invoking
+ *	subsystem-level @resume_noirq() for all of them.
  *
  * @freeze: Hibernation-specific, executed before creating a hibernation image.
- *	Quiesce operations so that a consistent image can be created, but do NOT
- *	otherwise put the device into a low power device state and do NOT emit
- *	system wakeup events.  Save in main memory the device settings to be
- *	used by @restore() during the subsequent resume from hibernation or by
- *	the subsequent @thaw(), if the creation of the image or the restoration
- *	of main memory contents from it fails.
+ *	Analogous to @suspend(), but it should not enable the device to signal
+ *	wakeup events or change its power state.  The majority of subsystems
+ *	(with the notable exception of the PCI bus type) expect the driver-level
+ *	@freeze() to save the device settings in memory to be used by @restore()
+ *	during the subsequent resume from hibernation.
+ *	Subsystem-level @freeze() is executed for all devices after invoking
+ *	subsystem-level @prepare() for all of them.
  *
  * @thaw: Hibernation-specific, executed after creating a hibernation image OR
- *	if the creation of the image fails.  Also executed after a failing
+ *	if the creation of an image has failed.  Also executed after a failing
  *	attempt to restore the contents of main memory from such an image.
  *	Undo the changes made by the preceding @freeze(), so the device can be
  *	operated in the same way as immediately before the call to @freeze().
+ *	Subsystem-level @thaw() is executed for all devices after invoking
+ *	subsystem-level @thaw_noirq() for all of them.  It also may be executed
+ *	directly after @freeze() in case of a transition error.
  *
  * @poweroff: Hibernation-specific, executed after saving a hibernation image.
- *	Quiesce the device, put it into a low power state appropriate for the
- *	upcoming system state (such as PCI_D3hot), and enable wakeup events as
- *	appropriate.
+ *	Analogous to @suspend(), but it need not save the device's settings in
+ *	memory.
+ *	Subsystem-level @poweroff() is executed for all devices after invoking
+ *	subsystem-level @prepare() for all of them.
  *
  * @restore: Hibernation-specific, executed after restoring the contents of main
- *	memory from a hibernation image.  Driver starts working again,
- *	responding to hardware events and software requests.  Drivers may NOT
- *	make ANY assumptions about the hardware state right prior to @restore().
- *	On most platforms, there are no restrictions on availability of
- *	resources like clocks during @restore().
- *
- * @suspend_noirq: Complete the operations of ->suspend() by carrying out any
- *	actions required for suspending the device that need interrupts to be
- *	disabled
- *
- * @resume_noirq: Prepare for the execution of ->resume() by carrying out any
- *	actions required for resuming the device that need interrupts to be
- *	disabled
- *
- * @freeze_noirq: Complete the operations of ->freeze() by carrying out any
- *	actions required for freezing the device that need interrupts to be
- *	disabled
- *
- * @thaw_noirq: Prepare for the execution of ->thaw() by carrying out any
- *	actions required for thawing the device that need interrupts to be
- *	disabled
- *
- * @poweroff_noirq: Complete the operations of ->poweroff() by carrying out any
- *	actions required for handling the device that need interrupts to be
- *	disabled
- *
- * @restore_noirq: Prepare for the execution of ->restore() by carrying out any
- *	actions required for restoring the operations of the device that need
- *	interrupts to be disabled
+ *	memory from a hibernation image, analogous to @resume().
+ *
+ * @suspend_noirq: Complete the actions started by @suspend().  Carry out any
+ *	additional operations required for suspending the device that might be
+ *	racing with its driver's interrupt handler, which is guaranteed not to
+ *	run while @suspend_noirq() is being executed.
+ *	It generally is expected that the device will be in a low-power state
+ *	(appropriate for the target system sleep state) after subsystem-level
+ *	@suspend_noirq() has returned successfully.  If the device can generate
+ *	system wakeup signals and is enabled to wake up the system, it should be
+ *	configured to do so at that time.  However, depending on the platform
+ *	and device's subsystem, @suspend() may be allowed to put the device into
+ *	the low-power state and configure it to generate wakeup signals, in
+ *	which case it generally is not necessary to define @suspend_noirq().
+ *
+ * @resume_noirq: Prepare for the execution of @resume() by carrying out any
+ *	operations required for resuming the device that might be racing with
+ *	its driver's interrupt handler, which is guaranteed not to run while
+ *	@resume_noirq() is being executed.
+ *
+ * @freeze_noirq: Complete the actions started by @freeze().  Carry out any
+ *	additional operations required for freezing the device that might be
+ *	racing with its driver's interrupt handler, which is guaranteed not to
+ *	run while @freeze_noirq() is being executed.
+ *	The power state of the device should not be changed by either @freeze()
+ *	or @freeze_noirq() and it should not be configured to signal system
+ *	wakeup by any of these callbacks.
+ *
+ * @thaw_noirq: Prepare for the execution of @thaw() by carrying out any
+ *	operations required for thawing the device that might be racing with its
+ *	driver's interrupt handler, which is guaranteed not to run while
+ *	@thaw_noirq() is being executed.
+ *
+ * @poweroff_noirq: Complete the actions started by @poweroff().  Analogous to
+ *	@suspend_noirq(), but it need not save the device's settings in memory.
+ *
+ * @restore_noirq: Prepare for the execution of @restore() by carrying out any
+ *	operations required for thawing the device that might be racing with its
+ *	driver's interrupt handler, which is guaranteed not to run while
+ *	@restore_noirq() is being executed.  Analogous to @resume_noirq().
  *
  * All of the above callbacks, except for @complete(), return error codes.
  * However, the error codes returned by the resume operations, @resume(),
- * @thaw(), @restore(), @resume_noirq(), @thaw_noirq(), and @restore_noirq() do
+ * @thaw(), @restore(), @resume_noirq(), @thaw_noirq(), and @restore_noirq(), do
  * not cause the PM core to abort the resume transition during which they are
- * returned.  The error codes returned in that cases are only printed by the PM
+ * returned.  The error codes returned in those cases are only printed by the PM
  * core to the system logs for debugging purposes.  Still, it is recommended
  * that drivers only return error codes from their resume methods in case of an
  * unrecoverable failure (i.e. when the device being handled refuses to resume
@@ -174,31 +201,43 @@ typedef struct pm_message {
  * their children.
  *
  * It is allowed to unregister devices while the above callbacks are being
- * executed.  However, it is not allowed to unregister a device from within any
- * of its own callbacks.
+ * executed.  However, a callback routine must NOT try to unregister the device
+ * it was called for, although it may unregister children of that device (for
+ * example, if it detects that a child was unplugged while the system was
+ * asleep).
+ *
+ * Refer to Documentation/power/devices.txt for more information about the role
+ * of the above callbacks in the system suspend process.
  *
- * There also are the following callbacks related to run-time power management
- * of devices:
+ * There also are callbacks related to runtime power management of devices.
+ * Again, these callbacks are executed by the PM core only for subsystems
+ * (PM domains, device types, classes and bus types) and the subsystem-level
+ * callbacks are supposed to invoke the driver callbacks.  Moreover, the exact
+ * actions to be performed by a device driver's callbacks generally depend on
+ * the platform and subsystem the device belongs to.
  *
  * @runtime_suspend: Prepare the device for a condition in which it won't be
  *	able to communicate with the CPU(s) and RAM due to power management.
- *	This need not mean that the device should be put into a low power state.
+ *	This need not mean that the device should be put into a low-power state.
  *	For example, if the device is behind a link which is about to be turned
  *	off, the device may remain at full power.  If the device does go to low
- *	power and is capable of generating run-time wake-up events, remote
- *	wake-up (i.e., a hardware mechanism allowing the device to request a
- *	change of its power state via a wake-up event, such as PCI PME) should
- *	be enabled for it.
+ *	power and is capable of generating runtime wakeup events, remote wakeup
+ *	(i.e., a hardware mechanism allowing the device to request a change of
+ *	its power state via an interrupt) should be enabled for it.
  *
  * @runtime_resume: Put the device into the fully active state in response to a
- *	wake-up event generated by hardware or at the request of software.  If
- *	necessary, put the device into the full power state and restore its
+ *	wakeup event generated by hardware or at the request of software.  If
+ *	necessary, put the device into the full-power state and restore its
  *	registers, so that it is fully operational.
  *
- * @runtime_idle: Device appears to be inactive and it might be put into a low
- *	power state if all of the necessary conditions are satisfied.  Check
+ * @runtime_idle: Device appears to be inactive and it might be put into a
+ *	low-power state if all of the necessary conditions are satisfied.  Check
  *	these conditions and handle the device as appropriate, possibly queueing
  *	a suspend request for it.  The return value is ignored by the PM core.
+ *
+ * Refer to Documentation/power/runtime_pm.txt for more information about the
+ * role of the above callbacks in device runtime power management.
+ *
  */
 
 struct dev_pm_ops {
-- 
cgit v1.2.3-71-gd317


From 5cac98dd06bc43a7baab3523184f70fd359e9f35 Mon Sep 17 00:00:00 2001
From: Anton Blanchard <anton@samba.org>
Date: Sun, 27 Nov 2011 21:14:46 +0000
Subject: net: Fix corruption in /proc/*/net/dev_mcast

I just hit this during my testing. Isn't there another bug lurking?

BUG kmalloc-8: Redzone overwritten

INFO: 0xc0000000de9dec48-0xc0000000de9dec4b. First byte 0x0 instead of 0xcc
INFO: Allocated in .__seq_open_private+0x30/0xa0 age=0 cpu=5 pid=3896
	.__kmalloc+0x1e0/0x2d0
	.__seq_open_private+0x30/0xa0
	.seq_open_net+0x60/0xe0
	.dev_mc_seq_open+0x4c/0x70
	.proc_reg_open+0xd8/0x260
	.__dentry_open.clone.11+0x2b8/0x400
	.do_last+0xf4/0x950
	.path_openat+0xf8/0x480
	.do_filp_open+0x48/0xc0
	.do_sys_open+0x140/0x250
	syscall_exit+0x0/0x40

dev_mc_seq_ops uses dev_seq_start/next/stop but only allocates
sizeof(struct seq_net_private) of private data, whereas it expects
sizeof(struct dev_iter_state):

struct dev_iter_state {
	struct seq_net_private p;
	unsigned int pos; /* bucket << BUCKET_SPACE + offset */
};

Create dev_seq_open_ops and use it so we don't have to expose
struct dev_iter_state.

[ Problem added by commit f04565ddf52e4 (dev: use name hash for
  dev_seq_ops) -Eric ]

Signed-off-by: Anton Blanchard <anton@samba.org>
Acked-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 2 ++
 net/core/dev.c            | 6 ++++++
 net/core/dev_addr_lists.c | 3 +--
 3 files changed, 9 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index cbeb5867cff7..a82ad4dd306a 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -2536,6 +2536,8 @@ extern void		net_disable_timestamp(void);
 extern void *dev_seq_start(struct seq_file *seq, loff_t *pos);
 extern void *dev_seq_next(struct seq_file *seq, void *v, loff_t *pos);
 extern void dev_seq_stop(struct seq_file *seq, void *v);
+extern int dev_seq_open_ops(struct inode *inode, struct file *file,
+			    const struct seq_operations *ops);
 #endif
 
 extern int netdev_class_create_file(struct class_attribute *class_attr);
diff --git a/net/core/dev.c b/net/core/dev.c
index 6ba50a1e404c..1482eea0bbf0 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -4282,6 +4282,12 @@ static int dev_seq_open(struct inode *inode, struct file *file)
 			    sizeof(struct dev_iter_state));
 }
 
+int dev_seq_open_ops(struct inode *inode, struct file *file,
+		     const struct seq_operations *ops)
+{
+	return seq_open_net(inode, file, ops, sizeof(struct dev_iter_state));
+}
+
 static const struct file_operations dev_seq_fops = {
 	.owner	 = THIS_MODULE,
 	.open    = dev_seq_open,
diff --git a/net/core/dev_addr_lists.c b/net/core/dev_addr_lists.c
index 277faef9148d..febba516db62 100644
--- a/net/core/dev_addr_lists.c
+++ b/net/core/dev_addr_lists.c
@@ -696,8 +696,7 @@ static const struct seq_operations dev_mc_seq_ops = {
 
 static int dev_mc_seq_open(struct inode *inode, struct file *file)
 {
-	return seq_open_net(inode, file, &dev_mc_seq_ops,
-			    sizeof(struct seq_net_private));
+	return dev_seq_open_ops(inode, file, &dev_mc_seq_ops);
 }
 
 static const struct file_operations dev_mc_seq_fops = {
-- 
cgit v1.2.3-71-gd317


From ea6a5d3b97b768561db6358f15e4c84ced0f4f7e Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Wed, 30 Nov 2011 12:10:53 +0000
Subject: sch_red: fix red_calc_qavg_from_idle_time

Since commit a4a710c4a7490587 (pkt_sched: Change PSCHED_SHIFT from 10 to
6) it seems RED/GRED are broken.

red_calc_qavg_from_idle_time() computes a delay in us units, but this
delay is now 16 times bigger than real delay, so the final qavg result
smaller than expected.

Use standard kernel time services since there is no need to obfuscate
them.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/red.h | 15 ++++++---------
 1 file changed, 6 insertions(+), 9 deletions(-)

(limited to 'include')

diff --git a/include/net/red.h b/include/net/red.h
index 3319f16b3beb..b72a3b833936 100644
--- a/include/net/red.h
+++ b/include/net/red.h
@@ -116,7 +116,7 @@ struct red_parms {
 	u32		qR;		/* Cached random number */
 
 	unsigned long	qavg;		/* Average queue length: A scaled */
-	psched_time_t	qidlestart;	/* Start of current idle period */
+	ktime_t		qidlestart;	/* Start of current idle period */
 };
 
 static inline u32 red_rmask(u8 Plog)
@@ -148,17 +148,17 @@ static inline void red_set_parms(struct red_parms *p,
 
 static inline int red_is_idling(struct red_parms *p)
 {
-	return p->qidlestart != PSCHED_PASTPERFECT;
+	return p->qidlestart.tv64 != 0;
 }
 
 static inline void red_start_of_idle_period(struct red_parms *p)
 {
-	p->qidlestart = psched_get_time();
+	p->qidlestart = ktime_get();
 }
 
 static inline void red_end_of_idle_period(struct red_parms *p)
 {
-	p->qidlestart = PSCHED_PASTPERFECT;
+	p->qidlestart.tv64 = 0;
 }
 
 static inline void red_restart(struct red_parms *p)
@@ -170,13 +170,10 @@ static inline void red_restart(struct red_parms *p)
 
 static inline unsigned long red_calc_qavg_from_idle_time(struct red_parms *p)
 {
-	psched_time_t now;
-	long us_idle;
+	s64 delta = ktime_us_delta(ktime_get(), p->qidlestart);
+	long us_idle = min_t(s64, delta, p->Scell_max);
 	int  shift;
 
-	now = psched_get_time();
-	us_idle = psched_tdiff_bounded(now, p->qidlestart, p->Scell_max);
-
 	/*
 	 * The problem: ideally, average length queue recalcultion should
 	 * be done over constant clock intervals. This is too expensive, so
-- 
cgit v1.2.3-71-gd317