From 33c35aa4817864e056fd772230b0c6b552e36ea2 Mon Sep 17 00:00:00 2001
From: Waiman Long <longman@redhat.com>
Date: Mon, 15 May 2017 09:34:06 -0400
Subject: cgroup: Prevent kill_css() from being called more than once

The kill_css() function may be called more than once under the condition
that the css was killed but not physically removed yet followed by the
removal of the cgroup that is hosting the css. This patch prevents any
harmm from being done when that happens.

Signed-off-by: Waiman Long <longman@redhat.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: stable@vger.kernel.org # v4.5+
---
 kernel/cgroup/cgroup.c | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'kernel')

diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c
index c3c9a0e1b3c9..8d4e85eae42c 100644
--- a/kernel/cgroup/cgroup.c
+++ b/kernel/cgroup/cgroup.c
@@ -4265,6 +4265,11 @@ static void kill_css(struct cgroup_subsys_state *css)
 {
 	lockdep_assert_held(&cgroup_mutex);
 
+	if (css->flags & CSS_DYING)
+		return;
+
+	css->flags |= CSS_DYING;
+
 	/*
 	 * This must happen before css is disassociated with its cgroup.
 	 * See seq_css() for details.
-- 
cgit v1.2.3-71-gd317


From 41c25707d21716826e3c1f60967f5550610ec1c9 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Wed, 24 May 2017 12:03:48 -0400
Subject: cpuset: consider dying css as offline

In most cases, a cgroup controller don't care about the liftimes of
cgroups.  For the controller, a css becomes online when ->css_online()
is called on it and offline when ->css_offline() is called.

However, cpuset is special in that the user interface it exposes cares
whether certain cgroups exist or not.  Combined with the RCU delay
between cgroup removal and css offlining, this can lead to user
visible behavior oddities where operations which should succeed after
cgroup removals fail for some time period.  The effects of cgroup
removals are delayed when seen from userland.

This patch adds css_is_dying() which tests whether offline is pending
and updates is_cpuset_online() so that the function returns false also
while offline is pending.  This gets rid of the userland visible
delays.

Signed-off-by: Tejun Heo <tj@kernel.org>
Reported-by: Daniel Jordan <daniel.m.jordan@oracle.com>
Link: http://lkml.kernel.org/r/327ca1f5-7957-fbb9-9e5f-9ba149d40ba2@oracle.com
Cc: stable@vger.kernel.org
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 include/linux/cgroup.h | 20 ++++++++++++++++++++
 kernel/cgroup/cpuset.c |  4 ++--
 2 files changed, 22 insertions(+), 2 deletions(-)

(limited to 'kernel')

diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index ed2573e149fa..710a005c6b7a 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -343,6 +343,26 @@ static inline bool css_tryget_online(struct cgroup_subsys_state *css)
 	return true;
 }
 
+/**
+ * css_is_dying - test whether the specified css is dying
+ * @css: target css
+ *
+ * Test whether @css is in the process of offlining or already offline.  In
+ * most cases, ->css_online() and ->css_offline() callbacks should be
+ * enough; however, the actual offline operations are RCU delayed and this
+ * test returns %true also when @css is scheduled to be offlined.
+ *
+ * This is useful, for example, when the use case requires synchronous
+ * behavior with respect to cgroup removal.  cgroup removal schedules css
+ * offlining but the css can seem alive while the operation is being
+ * delayed.  If the delay affects user visible semantics, this test can be
+ * used to resolve the situation.
+ */
+static inline bool css_is_dying(struct cgroup_subsys_state *css)
+{
+	return !(css->flags & CSS_NO_REF) && percpu_ref_is_dying(&css->refcnt);
+}
+
 /**
  * css_put - put a css reference
  * @css: target css
diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c
index f6501f4f6040..ae643412948a 100644
--- a/kernel/cgroup/cpuset.c
+++ b/kernel/cgroup/cpuset.c
@@ -176,9 +176,9 @@ typedef enum {
 } cpuset_flagbits_t;
 
 /* convenient tests for these bits */
-static inline bool is_cpuset_online(const struct cpuset *cs)
+static inline bool is_cpuset_online(struct cpuset *cs)
 {
-	return test_bit(CS_ONLINE, &cs->flags);
+	return test_bit(CS_ONLINE, &cs->flags) && !css_is_dying(&cs->css);
 }
 
 static inline int is_cpu_exclusive(const struct cpuset *cs)
-- 
cgit v1.2.3-71-gd317


From 5720acf4bfc142ba568d5b6782fceaf62ed15e0b Mon Sep 17 00:00:00 2001
From: Miroslav Benes <mbenes@suse.cz>
Date: Fri, 26 May 2017 14:45:21 +0200
Subject: livepatch: Make livepatch dependent on !TRIM_UNUSED_KSYMS

If TRIM_UNUSED_KSYMS is enabled, all unneeded exported symbols are made
unexported. Two-pass build of the kernel is done to find out which
symbols are needed based on a configuration. This effectively
complicates things for out-of-tree modules.

Livepatch exports functions to (un)register and enable/disable a live
patch. The only in-tree module which uses these functions is a sample in
samples/livepatch/. If the sample is disabled, the functions are
trimmed and out-of-tree live patches cannot be built.

Note that live patches are intended to be built out-of-tree.

Suggested-by: Michal Marek <mmarek@suse.com>
Acked-by: Josh Poimboeuf <jpoimboe@redhat.com>
Acked-by: Jessica Yu <jeyu@redhat.com>
Signed-off-by: Miroslav Benes <mbenes@suse.cz>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 kernel/livepatch/Kconfig | 1 +
 1 file changed, 1 insertion(+)

(limited to 'kernel')

diff --git a/kernel/livepatch/Kconfig b/kernel/livepatch/Kconfig
index 045022557936..ec4565122e65 100644
--- a/kernel/livepatch/Kconfig
+++ b/kernel/livepatch/Kconfig
@@ -10,6 +10,7 @@ config LIVEPATCH
 	depends on SYSFS
 	depends on KALLSYMS_ALL
 	depends on HAVE_LIVEPATCH
+	depends on !TRIM_UNUSED_KSYMS
 	help
 	  Say Y here if you want to support kernel live patching.
 	  This option has no runtime impact until a kernel "patch"
-- 
cgit v1.2.3-71-gd317


From 40da1b11f01e43aad1aa6cea64681b6125e8a2a7 Mon Sep 17 00:00:00 2001
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Fri, 2 Jun 2017 16:27:14 +0200
Subject: cpu/hotplug: Drop the device lock on error

If a custom CPU target is specified and that one is not available _or_
can't be interrupted then the code returns to userland without dropping a
lock as notices by lockdep:

|echo 133 > /sys/devices/system/cpu/cpu7/hotplug/target
| ================================================
| [ BUG: lock held when returning to user space! ]
| ------------------------------------------------
| bash/503 is leaving the kernel with locks still held!
| 1 lock held by bash/503:
|  #0:  (device_hotplug_lock){+.+...}, at: [<ffffffff815b5650>] lock_device_hotplug_sysfs+0x10/0x40

So release the lock then.

Fixes: 757c989b9994 ("cpu/hotplug: Make target state writeable")
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: stable@vger.kernel.org
Link: http://lkml.kernel.org/r/20170602142714.3ogo25f2wbq6fjpj@linutronix.de
---
 kernel/cpu.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'kernel')

diff --git a/kernel/cpu.c b/kernel/cpu.c
index 9ae6fbe5b5cf..cb5103413bd8 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -1658,13 +1658,13 @@ static ssize_t write_cpuhp_target(struct device *dev,
 	ret = !sp->name || sp->cant_stop ? -EINVAL : 0;
 	mutex_unlock(&cpuhp_state_mutex);
 	if (ret)
-		return ret;
+		goto out;
 
 	if (st->state < target)
 		ret = do_cpu_up(dev->id, target);
 	else
 		ret = do_cpu_down(dev->id, target);
-
+out:
 	unlock_device_hotplug();
 	return ret ? ret : count;
 }
-- 
cgit v1.2.3-71-gd317


From f4781e76f90df7aec400635d73ea4c35ee1d4765 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Tue, 30 May 2017 23:15:34 +0200
Subject: alarmtimer: Prevent overflow of relative timers

Andrey reported a alartimer related RCU stall while fuzzing the kernel with
syzkaller.

The reason for this is an overflow in ktime_add() which brings the
resulting time into negative space and causes immediate expiry of the
timer. The following rearm with a small interval does not bring the timer
back into positive space due to the same issue.

This results in a permanent firing alarmtimer which hogs the CPU.

Use ktime_add_safe() instead which detects the overflow and clamps the
result to KTIME_SEC_MAX.

Reported-by: Andrey Konovalov <andreyknvl@google.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Kostya Serebryany <kcc@google.com>
Cc: syzkaller <syzkaller@googlegroups.com>
Cc: John Stultz <john.stultz@linaro.org>
Cc: Dmitry Vyukov <dvyukov@google.com>
Cc: stable@vger.kernel.org
Link: http://lkml.kernel.org/r/20170530211655.802921648@linutronix.de
---
 kernel/time/alarmtimer.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'kernel')

diff --git a/kernel/time/alarmtimer.c b/kernel/time/alarmtimer.c
index 5cb5b0008d97..2b2e032b4eb4 100644
--- a/kernel/time/alarmtimer.c
+++ b/kernel/time/alarmtimer.c
@@ -387,7 +387,7 @@ void alarm_start_relative(struct alarm *alarm, ktime_t start)
 {
 	struct alarm_base *base = &alarm_bases[alarm->type];
 
-	start = ktime_add(start, base->gettime());
+	start = ktime_add_safe(start, base->gettime());
 	alarm_start(alarm, start);
 }
 EXPORT_SYMBOL_GPL(alarm_start_relative);
@@ -475,7 +475,7 @@ u64 alarm_forward(struct alarm *alarm, ktime_t now, ktime_t interval)
 		overrun++;
 	}
 
-	alarm->node.expires = ktime_add(alarm->node.expires, interval);
+	alarm->node.expires = ktime_add_safe(alarm->node.expires, interval);
 	return overrun;
 }
 EXPORT_SYMBOL_GPL(alarm_forward);
@@ -666,7 +666,7 @@ static int alarm_timer_set(struct k_itimer *timr, int flags,
 		ktime_t now;
 
 		now = alarm_bases[timr->it.alarm.alarmtimer.type].gettime();
-		exp = ktime_add(now, exp);
+		exp = ktime_add_safe(now, exp);
 	}
 
 	alarm_start(&timr->it.alarm.alarmtimer, exp);
-- 
cgit v1.2.3-71-gd317


From ff86bf0c65f14346bf2440534f9ba5ac232c39a0 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Tue, 30 May 2017 23:15:35 +0200
Subject: alarmtimer: Rate limit periodic intervals

The alarmtimer code has another source of potentially rearming itself too
fast. Interval timers with a very samll interval have a similar CPU hog
effect as the previously fixed overflow issue.

The reason is that alarmtimers do not implement the normal protection
against this kind of problem which the other posix timer use:

  timer expires -> queue signal -> deliver signal -> rearm timer

This scheme brings the rearming under scheduler control and prevents
permanently firing timers which hog the CPU.

Bringing this scheme to the alarm timer code is a major overhaul because it
lacks all the necessary mechanisms completely.

So for a quick fix limit the interval to one jiffie. This is not
problematic in practice as alarmtimers are usually backed by an RTC for
suspend which have 1 second resolution. It could be therefor argued that
the resolution of this clock should be set to 1 second in general, but
that's outside the scope of this fix.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Kostya Serebryany <kcc@google.com>
Cc: syzkaller <syzkaller@googlegroups.com>
Cc: John Stultz <john.stultz@linaro.org>
Cc: Dmitry Vyukov <dvyukov@google.com>
Cc: stable@vger.kernel.org
Link: http://lkml.kernel.org/r/20170530211655.896767100@linutronix.de
---
 kernel/time/alarmtimer.c | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'kernel')

diff --git a/kernel/time/alarmtimer.c b/kernel/time/alarmtimer.c
index 2b2e032b4eb4..ee2f4202d82a 100644
--- a/kernel/time/alarmtimer.c
+++ b/kernel/time/alarmtimer.c
@@ -660,6 +660,14 @@ static int alarm_timer_set(struct k_itimer *timr, int flags,
 
 	/* start the timer */
 	timr->it.alarm.interval = timespec64_to_ktime(new_setting->it_interval);
+
+	/*
+	 * Rate limit to the tick as a hot fix to prevent DOS. Will be
+	 * mopped up later.
+	 */
+	if (timr->it.alarm.interval < TICK_NSEC)
+		timr->it.alarm.interval = TICK_NSEC;
+
 	exp = timespec64_to_ktime(new_setting->it_value);
 	/* Convert (if necessary) to absolute time */
 	if (flags != TIMER_ABSTIME) {
-- 
cgit v1.2.3-71-gd317


From f3b7eaae1b35eb8077610eb7c7db042c9b0645e1 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Wed, 7 Jun 2017 00:57:37 +0200
Subject: Revert "ACPI / sleep: Ignore spurious SCI wakeups from
 suspend-to-idle"

Revert commit eed4d47efe95 (ACPI / sleep: Ignore spurious SCI wakeups
from suspend-to-idle) as it turned out to be premature and triggered
a number of different issues on various systems.

That includes, but is not limited to, premature suspend-to-RAM aborts
on Dell XPS 13 (9343) reported by Dominik.

The issue the commit in question attempted to address is real and
will need to be taken care of going forward, but evidently more work
is needed for this purpose.

Reported-by: Dominik Brodowski <linux@dominikbrodowski.net>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/battery.c      |  2 +-
 drivers/acpi/button.c       |  5 ++---
 drivers/acpi/device_pm.c    |  3 +--
 drivers/acpi/sleep.c        | 28 ----------------------------
 drivers/base/power/main.c   |  5 +++++
 drivers/base/power/wakeup.c | 18 ++++++------------
 include/linux/suspend.h     |  7 ++-----
 kernel/power/process.c      |  2 +-
 kernel/power/suspend.c      | 29 ++++-------------------------
 9 files changed, 22 insertions(+), 77 deletions(-)

(limited to 'kernel')

diff --git a/drivers/acpi/battery.c b/drivers/acpi/battery.c
index 83ab17e4a795..4ef1e4624b2b 100644
--- a/drivers/acpi/battery.c
+++ b/drivers/acpi/battery.c
@@ -776,7 +776,7 @@ static int acpi_battery_update(struct acpi_battery *battery, bool resume)
 	if ((battery->state & ACPI_BATTERY_STATE_CRITICAL) ||
 	    (test_bit(ACPI_BATTERY_ALARM_PRESENT, &battery->flags) &&
             (battery->capacity_now <= battery->alarm)))
-		pm_wakeup_hard_event(&battery->device->dev);
+		pm_wakeup_event(&battery->device->dev, 0);
 
 	return result;
 }
diff --git a/drivers/acpi/button.c b/drivers/acpi/button.c
index b7c2a06963d6..668137e4a069 100644
--- a/drivers/acpi/button.c
+++ b/drivers/acpi/button.c
@@ -216,7 +216,7 @@ static int acpi_lid_notify_state(struct acpi_device *device, int state)
 	}
 
 	if (state)
-		pm_wakeup_hard_event(&device->dev);
+		pm_wakeup_event(&device->dev, 0);
 
 	ret = blocking_notifier_call_chain(&acpi_lid_notifier, state, device);
 	if (ret == NOTIFY_DONE)
@@ -398,7 +398,7 @@ static void acpi_button_notify(struct acpi_device *device, u32 event)
 		} else {
 			int keycode;
 
-			pm_wakeup_hard_event(&device->dev);
+			pm_wakeup_event(&device->dev, 0);
 			if (button->suspended)
 				break;
 
@@ -530,7 +530,6 @@ static int acpi_button_add(struct acpi_device *device)
 		lid_device = device;
 	}
 
-	device_init_wakeup(&device->dev, true);
 	printk(KERN_INFO PREFIX "%s [%s]\n", name, acpi_device_bid(device));
 	return 0;
 
diff --git a/drivers/acpi/device_pm.c b/drivers/acpi/device_pm.c
index 798d5003a039..993fd31394c8 100644
--- a/drivers/acpi/device_pm.c
+++ b/drivers/acpi/device_pm.c
@@ -24,7 +24,6 @@
 #include <linux/pm_qos.h>
 #include <linux/pm_domain.h>
 #include <linux/pm_runtime.h>
-#include <linux/suspend.h>
 
 #include "internal.h"
 
@@ -400,7 +399,7 @@ static void acpi_pm_notify_handler(acpi_handle handle, u32 val, void *not_used)
 	mutex_lock(&acpi_pm_notifier_lock);
 
 	if (adev->wakeup.flags.notifier_present) {
-		pm_wakeup_ws_event(adev->wakeup.ws, 0, true);
+		__pm_wakeup_event(adev->wakeup.ws, 0);
 		if (adev->wakeup.context.work.func)
 			queue_pm_work(&adev->wakeup.context.work);
 	}
diff --git a/drivers/acpi/sleep.c b/drivers/acpi/sleep.c
index e84005d642e6..a4327af676fe 100644
--- a/drivers/acpi/sleep.c
+++ b/drivers/acpi/sleep.c
@@ -662,40 +662,14 @@ static int acpi_freeze_prepare(void)
 	acpi_os_wait_events_complete();
 	if (acpi_sci_irq_valid())
 		enable_irq_wake(acpi_sci_irq);
-
 	return 0;
 }
 
-static void acpi_freeze_wake(void)
-{
-	/*
-	 * If IRQD_WAKEUP_ARMED is not set for the SCI at this point, it means
-	 * that the SCI has triggered while suspended, so cancel the wakeup in
-	 * case it has not been a wakeup event (the GPEs will be checked later).
-	 */
-	if (acpi_sci_irq_valid() &&
-	    !irqd_is_wakeup_armed(irq_get_irq_data(acpi_sci_irq)))
-		pm_system_cancel_wakeup();
-}
-
-static void acpi_freeze_sync(void)
-{
-	/*
-	 * Process all pending events in case there are any wakeup ones.
-	 *
-	 * The EC driver uses the system workqueue, so that one needs to be
-	 * flushed too.
-	 */
-	acpi_os_wait_events_complete();
-	flush_scheduled_work();
-}
-
 static void acpi_freeze_restore(void)
 {
 	acpi_disable_wakeup_devices(ACPI_STATE_S0);
 	if (acpi_sci_irq_valid())
 		disable_irq_wake(acpi_sci_irq);
-
 	acpi_enable_all_runtime_gpes();
 }
 
@@ -707,8 +681,6 @@ static void acpi_freeze_end(void)
 static const struct platform_freeze_ops acpi_freeze_ops = {
 	.begin = acpi_freeze_begin,
 	.prepare = acpi_freeze_prepare,
-	.wake = acpi_freeze_wake,
-	.sync = acpi_freeze_sync,
 	.restore = acpi_freeze_restore,
 	.end = acpi_freeze_end,
 };
diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c
index e987a6f55d36..9faee1c893e5 100644
--- a/drivers/base/power/main.c
+++ b/drivers/base/power/main.c
@@ -1091,6 +1091,11 @@ static int __device_suspend_noirq(struct device *dev, pm_message_t state, bool a
 	if (async_error)
 		goto Complete;
 
+	if (pm_wakeup_pending()) {
+		async_error = -EBUSY;
+		goto Complete;
+	}
+
 	if (dev->power.syscore || dev->power.direct_complete)
 		goto Complete;
 
diff --git a/drivers/base/power/wakeup.c b/drivers/base/power/wakeup.c
index 9c36b27996fc..c313b600d356 100644
--- a/drivers/base/power/wakeup.c
+++ b/drivers/base/power/wakeup.c
@@ -28,8 +28,8 @@ bool events_check_enabled __read_mostly;
 /* First wakeup IRQ seen by the kernel in the last cycle. */
 unsigned int pm_wakeup_irq __read_mostly;
 
-/* If greater than 0 and the system is suspending, terminate the suspend. */
-static atomic_t pm_abort_suspend __read_mostly;
+/* If set and the system is suspending, terminate the suspend. */
+static bool pm_abort_suspend __read_mostly;
 
 /*
  * Combined counters of registered wakeup events and wakeup events in progress.
@@ -855,26 +855,20 @@ bool pm_wakeup_pending(void)
 		pm_print_active_wakeup_sources();
 	}
 
-	return ret || atomic_read(&pm_abort_suspend) > 0;
+	return ret || pm_abort_suspend;
 }
 
 void pm_system_wakeup(void)
 {
-	atomic_inc(&pm_abort_suspend);
+	pm_abort_suspend = true;
 	freeze_wake();
 }
 EXPORT_SYMBOL_GPL(pm_system_wakeup);
 
-void pm_system_cancel_wakeup(void)
-{
-	atomic_dec(&pm_abort_suspend);
-}
-
-void pm_wakeup_clear(bool reset)
+void pm_wakeup_clear(void)
 {
+	pm_abort_suspend = false;
 	pm_wakeup_irq = 0;
-	if (reset)
-		atomic_set(&pm_abort_suspend, 0);
 }
 
 void pm_system_irq_wakeup(unsigned int irq_number)
diff --git a/include/linux/suspend.h b/include/linux/suspend.h
index 0b1cf32edfd7..d9718378a8be 100644
--- a/include/linux/suspend.h
+++ b/include/linux/suspend.h
@@ -189,8 +189,6 @@ struct platform_suspend_ops {
 struct platform_freeze_ops {
 	int (*begin)(void);
 	int (*prepare)(void);
-	void (*wake)(void);
-	void (*sync)(void);
 	void (*restore)(void);
 	void (*end)(void);
 };
@@ -430,8 +428,7 @@ extern unsigned int pm_wakeup_irq;
 
 extern bool pm_wakeup_pending(void);
 extern void pm_system_wakeup(void);
-extern void pm_system_cancel_wakeup(void);
-extern void pm_wakeup_clear(bool reset);
+extern void pm_wakeup_clear(void);
 extern void pm_system_irq_wakeup(unsigned int irq_number);
 extern bool pm_get_wakeup_count(unsigned int *count, bool block);
 extern bool pm_save_wakeup_count(unsigned int count);
@@ -481,7 +478,7 @@ static inline int unregister_pm_notifier(struct notifier_block *nb)
 
 static inline bool pm_wakeup_pending(void) { return false; }
 static inline void pm_system_wakeup(void) {}
-static inline void pm_wakeup_clear(bool reset) {}
+static inline void pm_wakeup_clear(void) {}
 static inline void pm_system_irq_wakeup(unsigned int irq_number) {}
 
 static inline void lock_system_sleep(void) {}
diff --git a/kernel/power/process.c b/kernel/power/process.c
index 78672d324a6e..c7209f060eeb 100644
--- a/kernel/power/process.c
+++ b/kernel/power/process.c
@@ -132,7 +132,7 @@ int freeze_processes(void)
 	if (!pm_freezing)
 		atomic_inc(&system_freezing_cnt);
 
-	pm_wakeup_clear(true);
+	pm_wakeup_clear();
 	pr_info("Freezing user space processes ... ");
 	pm_freezing = true;
 	error = try_to_freeze_tasks(true);
diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c
index c0248c74d6d4..15e6baef5c73 100644
--- a/kernel/power/suspend.c
+++ b/kernel/power/suspend.c
@@ -72,8 +72,6 @@ static void freeze_begin(void)
 
 static void freeze_enter(void)
 {
-	trace_suspend_resume(TPS("machine_suspend"), PM_SUSPEND_FREEZE, true);
-
 	spin_lock_irq(&suspend_freeze_lock);
 	if (pm_wakeup_pending())
 		goto out;
@@ -100,27 +98,6 @@ static void freeze_enter(void)
  out:
 	suspend_freeze_state = FREEZE_STATE_NONE;
 	spin_unlock_irq(&suspend_freeze_lock);
-
-	trace_suspend_resume(TPS("machine_suspend"), PM_SUSPEND_FREEZE, false);
-}
-
-static void s2idle_loop(void)
-{
-	do {
-		freeze_enter();
-
-		if (freeze_ops && freeze_ops->wake)
-			freeze_ops->wake();
-
-		dpm_resume_noirq(PMSG_RESUME);
-		if (freeze_ops && freeze_ops->sync)
-			freeze_ops->sync();
-
-		if (pm_wakeup_pending())
-			break;
-
-		pm_wakeup_clear(false);
-	} while (!dpm_suspend_noirq(PMSG_SUSPEND));
 }
 
 void freeze_wake(void)
@@ -394,8 +371,10 @@ static int suspend_enter(suspend_state_t state, bool *wakeup)
 	 * all the devices are suspended.
 	 */
 	if (state == PM_SUSPEND_FREEZE) {
-		s2idle_loop();
-		goto Platform_early_resume;
+		trace_suspend_resume(TPS("machine_suspend"), state, true);
+		freeze_enter();
+		trace_suspend_resume(TPS("machine_suspend"), state, false);
+		goto Platform_wake;
 	}
 
 	error = disable_nonboot_cpus();
-- 
cgit v1.2.3-71-gd317


From cc1582c231ea041fbc68861dfaf957eaf902b829 Mon Sep 17 00:00:00 2001
From: Jin Yao <yao.jin@linux.intel.com>
Date: Thu, 25 May 2017 18:09:07 +0800
Subject: perf/core: Drop kernel samples even though :u is specified

When doing sampling, for example:

  perf record -e cycles:u ...

On workloads that do a lot of kernel entry/exits we see kernel
samples, even though :u is specified. This is due to skid existing.

This might be a security issue because it can leak kernel addresses even
though kernel sampling support is disabled.

The patch drops the kernel samples if exclude_kernel is specified.

For example, test on Haswell desktop:

  perf record -e cycles:u <mgen>
  perf report --stdio

Before patch applied:

    99.77%  mgen     mgen              [.] buf_read
     0.20%  mgen     mgen              [.] rand_buf_init
     0.01%  mgen     [kernel.vmlinux]  [k] apic_timer_interrupt
     0.00%  mgen     mgen              [.] last_free_elem
     0.00%  mgen     libc-2.23.so      [.] __random_r
     0.00%  mgen     libc-2.23.so      [.] _int_malloc
     0.00%  mgen     mgen              [.] rand_array_init
     0.00%  mgen     [kernel.vmlinux]  [k] page_fault
     0.00%  mgen     libc-2.23.so      [.] __random
     0.00%  mgen     libc-2.23.so      [.] __strcasestr
     0.00%  mgen     ld-2.23.so        [.] strcmp
     0.00%  mgen     ld-2.23.so        [.] _dl_start
     0.00%  mgen     libc-2.23.so      [.] sched_setaffinity@@GLIBC_2.3.4
     0.00%  mgen     ld-2.23.so        [.] _start

We can see kernel symbols apic_timer_interrupt and page_fault.

After patch applied:

    99.79%  mgen     mgen           [.] buf_read
     0.19%  mgen     mgen           [.] rand_buf_init
     0.00%  mgen     libc-2.23.so   [.] __random_r
     0.00%  mgen     mgen           [.] rand_array_init
     0.00%  mgen     mgen           [.] last_free_elem
     0.00%  mgen     libc-2.23.so   [.] vfprintf
     0.00%  mgen     libc-2.23.so   [.] rand
     0.00%  mgen     libc-2.23.so   [.] __random
     0.00%  mgen     libc-2.23.so   [.] _int_malloc
     0.00%  mgen     libc-2.23.so   [.] _IO_doallocbuf
     0.00%  mgen     ld-2.23.so     [.] do_lookup_x
     0.00%  mgen     ld-2.23.so     [.] open_verify.constprop.7
     0.00%  mgen     ld-2.23.so     [.] _dl_important_hwcaps
     0.00%  mgen     libc-2.23.so   [.] sched_setaffinity@@GLIBC_2.3.4
     0.00%  mgen     ld-2.23.so     [.] _start

There are only userspace symbols.

Signed-off-by: Jin Yao <yao.jin@linux.intel.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: <stable@vger.kernel.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vince Weaver <vincent.weaver@maine.edu>
Cc: acme@kernel.org
Cc: jolsa@kernel.org
Cc: kan.liang@intel.com
Cc: mark.rutland@arm.com
Cc: will.deacon@arm.com
Cc: yao.jin@intel.com
Link: http://lkml.kernel.org/r/1495706947-3744-1-git-send-email-yao.jin@linux.intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/events/core.c | 21 +++++++++++++++++++++
 1 file changed, 21 insertions(+)

(limited to 'kernel')

diff --git a/kernel/events/core.c b/kernel/events/core.c
index 6e75a5c9412d..6c4e523dc1e2 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -7316,6 +7316,21 @@ int perf_event_account_interrupt(struct perf_event *event)
 	return __perf_event_account_interrupt(event, 1);
 }
 
+static bool sample_is_allowed(struct perf_event *event, struct pt_regs *regs)
+{
+	/*
+	 * Due to interrupt latency (AKA "skid"), we may enter the
+	 * kernel before taking an overflow, even if the PMU is only
+	 * counting user events.
+	 * To avoid leaking information to userspace, we must always
+	 * reject kernel samples when exclude_kernel is set.
+	 */
+	if (event->attr.exclude_kernel && !user_mode(regs))
+		return false;
+
+	return true;
+}
+
 /*
  * Generic event overflow handling, sampling.
  */
@@ -7336,6 +7351,12 @@ static int __perf_event_overflow(struct perf_event *event,
 
 	ret = __perf_event_account_interrupt(event, throttle);
 
+	/*
+	 * For security, drop the skid kernel samples if necessary.
+	 */
+	if (!sample_is_allowed(event, regs))
+		return ret;
+
 	/*
 	 * XXX event_limit might not quite work as expected on inherited
 	 * events
-- 
cgit v1.2.3-71-gd317


From dac8bbbae1d0ccba96402d25deeed3a2e87992c6 Mon Sep 17 00:00:00 2001
From: Petr Mladek <pmladek@suse.com>
Date: Thu, 8 Jun 2017 12:01:30 +0200
Subject: Revert "printk: fix double printing with earlycon"

This reverts commit cf39bf58afdaabc0b86f141630fb3fd18190294e.

The commit regression to users that define both console=ttyS1
and console=ttyS0 on the command line, see
https://lkml.kernel.org/r/20170509082915.GA13236@bistromath.localdomain

The kernel log messages always appeared only on one serial port. It is
even documented in Documentation/admin-guide/serial-console.rst:

"Note that you can only define one console per device type (serial,
video)."

The above mentioned commit changed the order in which the command line
parameters are searched. As a result, the kernel log messages go to
the last mentioned ttyS* instead of the first one.

We long thought that using two console=ttyS* on the command line
did not make sense. But then we realized that console= parameters
were handled also by systemd, see
http://0pointer.de/blog/projects/serial-console.html

"By default systemd will instantiate one serial-getty@.service on
the main kernel console, if it is not a virtual terminal."

where

"[4] If multiple kernel consoles are used simultaneously, the main
console is the one listed first in /sys/class/tty/console/active,
which is the last one listed on the kernel command line."

This puts the original report into another light. The system is running
in qemu. The first serial port is used to store the messages into a file.
The second one is used to login to the system via a socket. It depends
on systemd and the historic kernel behavior.

By other words, systemd causes that it makes sense to define both
console=ttyS1 console=ttyS0 on the command line. The kernel fix
caused regression related to userspace (systemd) and need to be
reverted.

In addition, it went out that the fix helped only partially.
The messages still were duplicated when the boot console was
removed early by late_initcall(printk_late_init). Then the entire
log was replayed when the same console was registered as a normal one.

Link: 20170606160339.GC7604@pathway.suse.cz
Cc: Aleksey Makarov <aleksey.makarov@linaro.org>
Cc: Sabrina Dubroca <sd@queasysnail.net>
Cc: Sudeep Holla <sudeep.holla@arm.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Peter Hurley <peter@hurleysoftware.com>
Cc: Jiri Slaby <jslaby@suse.com>
Cc: Robin Murphy <robin.murphy@arm.com>,
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: "Nair, Jayachandran" <Jayachandran.Nair@cavium.com>
Cc: linux-serial@vger.kernel.org
Cc: linux-kernel@vger.kernel.org
Reported-by: Sabrina Dubroca <sd@queasysnail.net>
Acked-by: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
Signed-off-by: Petr Mladek <pmladek@suse.com>
---
 kernel/printk/printk.c | 46 ++++++++++------------------------------------
 1 file changed, 10 insertions(+), 36 deletions(-)

(limited to 'kernel')

diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c
index 779479ac9f57..9dbceb76e6bc 100644
--- a/kernel/printk/printk.c
+++ b/kernel/printk/printk.c
@@ -269,7 +269,6 @@ static struct console *exclusive_console;
 #define MAX_CMDLINECONSOLES 8
 
 static struct console_cmdline console_cmdline[MAX_CMDLINECONSOLES];
-static int console_cmdline_cnt;
 
 static int preferred_console = -1;
 int console_set_on_cmdline;
@@ -1906,25 +1905,12 @@ static int __add_preferred_console(char *name, int idx, char *options,
 	 *	See if this tty is not yet registered, and
 	 *	if we have a slot free.
 	 */
-	for (i = 0, c = console_cmdline; i < console_cmdline_cnt; i++, c++) {
+	for (i = 0, c = console_cmdline;
+	     i < MAX_CMDLINECONSOLES && c->name[0];
+	     i++, c++) {
 		if (strcmp(c->name, name) == 0 && c->index == idx) {
-			if (brl_options)
-				return 0;
-
-			/*
-			 * Maintain an invariant that will help to find if
-			 * the matching console is preferred, see
-			 * register_console():
-			 *
-			 * The last non-braille console is always
-			 * the preferred one.
-			 */
-			if (i != console_cmdline_cnt - 1)
-				swap(console_cmdline[i],
-				     console_cmdline[console_cmdline_cnt - 1]);
-
-			preferred_console = console_cmdline_cnt - 1;
-
+			if (!brl_options)
+				preferred_console = i;
 			return 0;
 		}
 	}
@@ -1937,7 +1923,6 @@ static int __add_preferred_console(char *name, int idx, char *options,
 	braille_set_options(c, brl_options);
 
 	c->index = idx;
-	console_cmdline_cnt++;
 	return 0;
 }
 /*
@@ -2477,23 +2462,12 @@ void register_console(struct console *newcon)
 	}
 
 	/*
-	 * See if this console matches one we selected on the command line.
-	 *
-	 * There may be several entries in the console_cmdline array matching
-	 * with the same console, one with newcon->match(), another by
-	 * name/index:
-	 *
-	 *	pl011,mmio,0x87e024000000,115200 -- added from SPCR
-	 *	ttyAMA0 -- added from command line
-	 *
-	 * Traverse the console_cmdline array in reverse order to be
-	 * sure that if this console is preferred then it will be the first
-	 * matching entry.  We use the invariant that is maintained in
-	 * __add_preferred_console().
+	 *	See if this console matches one we selected on
+	 *	the command line.
 	 */
-	for (i = console_cmdline_cnt - 1; i >= 0; i--) {
-		c = console_cmdline + i;
-
+	for (i = 0, c = console_cmdline;
+	     i < MAX_CMDLINECONSOLES && c->name[0];
+	     i++, c++) {
 		if (!newcon->match ||
 		    newcon->match(newcon, c->name, c->index, c->options) != 0) {
 			/* default matching */
-- 
cgit v1.2.3-71-gd317


From cdf7abc4610a7f1c43d06cda246c5f748a4fd267 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Wed, 31 May 2017 14:03:10 +0200
Subject: srcu: Allow use of Tiny/Tree SRCU from both process and interrupt
 context

Linu Cherian reported a WARN in cleanup_srcu_struct() when shutting
down a guest running iperf on a VFIO assigned device.  This happens
because irqfd_wakeup() calls srcu_read_lock(&kvm->irq_srcu) in interrupt
context, while a worker thread does the same inside kvm_set_irq().  If the
interrupt happens while the worker thread is executing __srcu_read_lock(),
updates to the Classic SRCU ->lock_count[] field or the Tree SRCU
->srcu_lock_count[] field can be lost.

The docs say you are not supposed to call srcu_read_lock() and
srcu_read_unlock() from irq context, but KVM interrupt injection happens
from (host) interrupt context and it would be nice if SRCU supported the
use case.  KVM is using SRCU here not really for the "sleepable" part,
but rather due to its IPI-free fast detection of grace periods.  It is
therefore not desirable to switch back to RCU, which would effectively
revert commit 719d93cd5f5c ("kvm/irqchip: Speed up KVM_SET_GSI_ROUTING",
2014-01-16).

However, the docs are overly conservative.  You can have an SRCU instance
only has users in irq context, and you can mix process and irq context
as long as process context users disable interrupts.  In addition,
__srcu_read_unlock() actually uses this_cpu_dec() on both Tree SRCU and
Classic SRCU.  For those two implementations, only srcu_read_lock()
is unsafe.

When Classic SRCU's __srcu_read_unlock() was changed to use this_cpu_dec(),
in commit 5a41344a3d83 ("srcu: Simplify __srcu_read_unlock() via
this_cpu_dec()", 2012-11-29), __srcu_read_lock() did two increments.
Therefore it kept __this_cpu_inc(), with preempt_disable/enable in
the caller.  Tree SRCU however only does one increment, so on most
architectures it is more efficient for __srcu_read_lock() to use
this_cpu_inc(), and any performance differences appear to be down in
the noise.

Unlike Classic and Tree SRCU, Tiny SRCU does increments and decrements on
a single variable.  Therefore, as Peter Zijlstra pointed out, Tiny SRCU's
implementation already supports mixed-context use of srcu_read_lock()
and srcu_read_unlock(), at least as long as uses of srcu_read_lock()
and srcu_read_unlock() in each handler are nested and paired properly.
In other words, it is still illegal to (say) invoke srcu_read_lock()
in an interrupt handler and to invoke the matching srcu_read_unlock()
in a softirq handler.  Therefore, the only change required for Tiny SRCU
is to its comments.

Fixes: 719d93cd5f5c ("kvm/irqchip: Speed up KVM_SET_GSI_ROUTING")
Reported-by: Linu Cherian <linuc.decode@gmail.com>
Suggested-by: Linu Cherian <linuc.decode@gmail.com>
Cc: kvm@vger.kernel.org
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Tested-by: Paolo Bonzini <pbonzini@redhat.com>
---
 kernel/rcu/srcutiny.c | 7 ++++---
 kernel/rcu/srcutree.c | 5 ++---
 2 files changed, 6 insertions(+), 6 deletions(-)

(limited to 'kernel')

diff --git a/kernel/rcu/srcutiny.c b/kernel/rcu/srcutiny.c
index 36e1f82faed1..32798eb14853 100644
--- a/kernel/rcu/srcutiny.c
+++ b/kernel/rcu/srcutiny.c
@@ -97,8 +97,9 @@ EXPORT_SYMBOL_GPL(cleanup_srcu_struct);
 
 /*
  * Counts the new reader in the appropriate per-CPU element of the
- * srcu_struct.  Must be called from process context.
- * Returns an index that must be passed to the matching srcu_read_unlock().
+ * srcu_struct.  Can be invoked from irq/bh handlers, but the matching
+ * __srcu_read_unlock() must be in the same handler instance.  Returns an
+ * index that must be passed to the matching srcu_read_unlock().
  */
 int __srcu_read_lock(struct srcu_struct *sp)
 {
@@ -112,7 +113,7 @@ EXPORT_SYMBOL_GPL(__srcu_read_lock);
 
 /*
  * Removes the count for the old reader from the appropriate element of
- * the srcu_struct.  Must be called from process context.
+ * the srcu_struct.
  */
 void __srcu_read_unlock(struct srcu_struct *sp, int idx)
 {
diff --git a/kernel/rcu/srcutree.c b/kernel/rcu/srcutree.c
index 3ae8474557df..157654fa436a 100644
--- a/kernel/rcu/srcutree.c
+++ b/kernel/rcu/srcutree.c
@@ -357,7 +357,7 @@ EXPORT_SYMBOL_GPL(cleanup_srcu_struct);
 
 /*
  * Counts the new reader in the appropriate per-CPU element of the
- * srcu_struct.  Must be called from process context.
+ * srcu_struct.
  * Returns an index that must be passed to the matching srcu_read_unlock().
  */
 int __srcu_read_lock(struct srcu_struct *sp)
@@ -365,7 +365,7 @@ int __srcu_read_lock(struct srcu_struct *sp)
 	int idx;
 
 	idx = READ_ONCE(sp->srcu_idx) & 0x1;
-	__this_cpu_inc(sp->sda->srcu_lock_count[idx]);
+	this_cpu_inc(sp->sda->srcu_lock_count[idx]);
 	smp_mb(); /* B */  /* Avoid leaking the critical section. */
 	return idx;
 }
@@ -375,7 +375,6 @@ EXPORT_SYMBOL_GPL(__srcu_read_lock);
  * Removes the count for the old reader from the appropriate per-CPU
  * element of the srcu_struct.  Note that this may well be a different
  * CPU than that which was incremented by the corresponding srcu_read_lock().
- * Must be called from process context.
  */
 void __srcu_read_unlock(struct srcu_struct *sp, int idx)
 {
-- 
cgit v1.2.3-71-gd317


From 1123a6041654e8f889014659593bad4168e542c2 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Wed, 31 May 2017 14:03:11 +0200
Subject: srcu: Allow use of Classic SRCU from both process and interrupt
 context

Linu Cherian reported a WARN in cleanup_srcu_struct() when shutting
down a guest running iperf on a VFIO assigned device.  This happens
because irqfd_wakeup() calls srcu_read_lock(&kvm->irq_srcu) in interrupt
context, while a worker thread does the same inside kvm_set_irq().  If the
interrupt happens while the worker thread is executing __srcu_read_lock(),
updates to the Classic SRCU ->lock_count[] field or the Tree SRCU
->srcu_lock_count[] field can be lost.

The docs say you are not supposed to call srcu_read_lock() and
srcu_read_unlock() from irq context, but KVM interrupt injection happens
from (host) interrupt context and it would be nice if SRCU supported the
use case.  KVM is using SRCU here not really for the "sleepable" part,
but rather due to its IPI-free fast detection of grace periods.  It is
therefore not desirable to switch back to RCU, which would effectively
revert commit 719d93cd5f5c ("kvm/irqchip: Speed up KVM_SET_GSI_ROUTING",
2014-01-16).

However, the docs are overly conservative.  You can have an SRCU instance
only has users in irq context, and you can mix process and irq context
as long as process context users disable interrupts.  In addition,
__srcu_read_unlock() actually uses this_cpu_dec() on both Tree SRCU and
Classic SRCU.  For those two implementations, only srcu_read_lock()
is unsafe.

When Classic SRCU's __srcu_read_unlock() was changed to use this_cpu_dec(),
in commit 5a41344a3d83 ("srcu: Simplify __srcu_read_unlock() via
this_cpu_dec()", 2012-11-29), __srcu_read_lock() did two increments.
Therefore it kept __this_cpu_inc(), with preempt_disable/enable in
the caller.  Tree SRCU however only does one increment, so on most
architectures it is more efficient for __srcu_read_lock() to use
this_cpu_inc(), and any performance differences appear to be down in
the noise.

Cc: stable@vger.kernel.org
Fixes: 719d93cd5f5c ("kvm/irqchip: Speed up KVM_SET_GSI_ROUTING")
Reported-by: Linu Cherian <linuc.decode@gmail.com>
Suggested-by: Linu Cherian <linuc.decode@gmail.com>
Cc: kvm@vger.kernel.org
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 include/linux/srcu.h | 2 --
 kernel/rcu/srcu.c    | 5 ++---
 2 files changed, 2 insertions(+), 5 deletions(-)

(limited to 'kernel')

diff --git a/include/linux/srcu.h b/include/linux/srcu.h
index 167ad8831aaf..4c1d5f7e62c4 100644
--- a/include/linux/srcu.h
+++ b/include/linux/srcu.h
@@ -172,9 +172,7 @@ static inline int srcu_read_lock(struct srcu_struct *sp) __acquires(sp)
 {
 	int retval;
 
-	preempt_disable();
 	retval = __srcu_read_lock(sp);
-	preempt_enable();
 	rcu_lock_acquire(&(sp)->dep_map);
 	return retval;
 }
diff --git a/kernel/rcu/srcu.c b/kernel/rcu/srcu.c
index 584d8a983883..dea03614263f 100644
--- a/kernel/rcu/srcu.c
+++ b/kernel/rcu/srcu.c
@@ -263,7 +263,7 @@ EXPORT_SYMBOL_GPL(cleanup_srcu_struct);
 
 /*
  * Counts the new reader in the appropriate per-CPU element of the
- * srcu_struct.  Must be called from process context.
+ * srcu_struct.
  * Returns an index that must be passed to the matching srcu_read_unlock().
  */
 int __srcu_read_lock(struct srcu_struct *sp)
@@ -271,7 +271,7 @@ int __srcu_read_lock(struct srcu_struct *sp)
 	int idx;
 
 	idx = READ_ONCE(sp->completed) & 0x1;
-	__this_cpu_inc(sp->per_cpu_ref->lock_count[idx]);
+	this_cpu_inc(sp->per_cpu_ref->lock_count[idx]);
 	smp_mb(); /* B */  /* Avoid leaking the critical section. */
 	return idx;
 }
@@ -281,7 +281,6 @@ EXPORT_SYMBOL_GPL(__srcu_read_lock);
  * Removes the count for the old reader from the appropriate per-CPU
  * element of the srcu_struct.  Note that this may well be a different
  * CPU than that which was incremented by the corresponding srcu_read_lock().
- * Must be called from process context.
  */
 void __srcu_read_unlock(struct srcu_struct *sp, int idx)
 {
-- 
cgit v1.2.3-71-gd317


From f67abed585efe251edda52dc9690020d6441890f Mon Sep 17 00:00:00 2001
From: Marcin Nowakowski <marcin.nowakowski@imgtec.com>
Date: Fri, 9 Jun 2017 10:00:29 +0200
Subject: sched/fair: Fix typo in printk message

'schedstats' kernel parameter should be set to enable/disable, so
correct the printk hint saying that it should be set to 'enable'
rather than 'enabled' to enable scheduler tracepoints.

Signed-off-by: Marcin Nowakowski <marcin.nowakowski@imgtec.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/1496995229-31245-1-git-send-email-marcin.nowakowski@imgtec.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/sched/fair.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'kernel')

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index d71109321841..c77e4b1d51c0 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -3563,7 +3563,7 @@ static inline void check_schedstat_required(void)
 			trace_sched_stat_runtime_enabled())  {
 		printk_deferred_once("Scheduler tracepoints stat_sleep, stat_iowait, "
 			     "stat_blocked and stat_runtime require the "
-			     "kernel parameter schedstats=enabled or "
+			     "kernel parameter schedstats=enable or "
 			     "kernel.sched_schedstats=1\n");
 	}
 #endif
-- 
cgit v1.2.3-71-gd317


From 252d2a4117bc181b287eeddf848863788da733ae Mon Sep 17 00:00:00 2001
From: Andy Lutomirski <luto@kernel.org>
Date: Fri, 9 Jun 2017 11:49:15 -0700
Subject: sched/core: Idle_task_exit() shouldn't use switch_mm_irqs_off()

idle_task_exit() can be called with IRQs on x86 on and therefore
should use switch_mm(), not switch_mm_irqs_off().

This doesn't seem to cause any problems right now, but it will
confuse my upcoming TLB flush changes.  Nonetheless, I think it
should be backported because it's trivial.  There won't be any
meaningful performance impact because idle_task_exit() is only
used when offlining a CPU.

Signed-off-by: Andy Lutomirski <luto@kernel.org>
Cc: Borislav Petkov <bp@suse.de>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: stable@vger.kernel.org
Fixes: f98db6013c55 ("sched/core: Add switch_mm_irqs_off() and use it in the scheduler")
Link: http://lkml.kernel.org/r/ca3d1a9fa93a0b49f5a8ff729eda3640fb6abdf9.1497034141.git.luto@kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/sched/core.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'kernel')

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 803c3bc274c4..326d4f88e2b1 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -5605,7 +5605,7 @@ void idle_task_exit(void)
 	BUG_ON(cpu_online(smp_processor_id()));
 
 	if (mm != &init_mm) {
-		switch_mm_irqs_off(mm, &init_mm, current);
+		switch_mm(mm, &init_mm, current);
 		finish_arch_post_lock_switch();
 	}
 	mmdrop(mm);
-- 
cgit v1.2.3-71-gd317


From ff0a6d6f932ff4b9eb8e8140f98cc1cf763d0d78 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Mon, 12 Jun 2017 14:16:16 +0200
Subject: Revert "cpufreq: schedutil: Reduce frequencies slower"

Revert commit 39b64aa1c007 (cpufreq: schedutil: Reduce frequencies
slower) that introduced unintentional changes in behavior leading
to adverse effects on some systems.

Reported-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 kernel/sched/cpufreq_schedutil.c | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'kernel')

diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c
index 76877a62b5fa..8773d1efdfab 100644
--- a/kernel/sched/cpufreq_schedutil.c
+++ b/kernel/sched/cpufreq_schedutil.c
@@ -101,9 +101,6 @@ static void sugov_update_commit(struct sugov_policy *sg_policy, u64 time,
 	if (sg_policy->next_freq == next_freq)
 		return;
 
-	if (sg_policy->next_freq > next_freq)
-		next_freq = (sg_policy->next_freq + next_freq) >> 1;
-
 	sg_policy->next_freq = next_freq;
 	sg_policy->last_freq_update_time = time;
 
-- 
cgit v1.2.3-71-gd317


From 94114c367553f3301747e47f6947cabde947575f Mon Sep 17 00:00:00 2001
From: Stephen Boyd <sboyd@codeaurora.org>
Date: Wed, 7 Jun 2017 23:36:03 -0700
Subject: tick/broadcast: Make tick_broadcast_setup_oneshot() static

This function isn't used outside of tick-broadcast.c, so let's
mark it static.

Signed-off-by: Stephen Boyd <sboyd@codeaurora.org>
Link: http://lkml.kernel.org/r/20170608063603.13276-1-sboyd@codeaurora.org
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 kernel/time/tick-broadcast.c | 4 +++-
 kernel/time/tick-internal.h  | 2 --
 2 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'kernel')

diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c
index 987e496bb51a..b398c2ea69b2 100644
--- a/kernel/time/tick-broadcast.c
+++ b/kernel/time/tick-broadcast.c
@@ -37,9 +37,11 @@ static int tick_broadcast_forced;
 static __cacheline_aligned_in_smp DEFINE_RAW_SPINLOCK(tick_broadcast_lock);
 
 #ifdef CONFIG_TICK_ONESHOT
+static void tick_broadcast_setup_oneshot(struct clock_event_device *bc);
 static void tick_broadcast_clear_oneshot(int cpu);
 static void tick_resume_broadcast_oneshot(struct clock_event_device *bc);
 #else
+static inline void tick_broadcast_setup_oneshot(struct clock_event_device *bc) { BUG(); }
 static inline void tick_broadcast_clear_oneshot(int cpu) { }
 static inline void tick_resume_broadcast_oneshot(struct clock_event_device *bc) { }
 #endif
@@ -867,7 +869,7 @@ static void tick_broadcast_init_next_event(struct cpumask *mask,
 /**
  * tick_broadcast_setup_oneshot - setup the broadcast device
  */
-void tick_broadcast_setup_oneshot(struct clock_event_device *bc)
+static void tick_broadcast_setup_oneshot(struct clock_event_device *bc)
 {
 	int cpu = smp_processor_id();
 
diff --git a/kernel/time/tick-internal.h b/kernel/time/tick-internal.h
index f738251000fe..be0ac01f2e12 100644
--- a/kernel/time/tick-internal.h
+++ b/kernel/time/tick-internal.h
@@ -126,7 +126,6 @@ static inline int tick_check_oneshot_change(int allow_nohz) { return 0; }
 
 /* Functions related to oneshot broadcasting */
 #if defined(CONFIG_GENERIC_CLOCKEVENTS_BROADCAST) && defined(CONFIG_TICK_ONESHOT)
-extern void tick_broadcast_setup_oneshot(struct clock_event_device *bc);
 extern void tick_broadcast_switch_to_oneshot(void);
 extern void tick_shutdown_broadcast_oneshot(unsigned int cpu);
 extern int tick_broadcast_oneshot_active(void);
@@ -134,7 +133,6 @@ extern void tick_check_oneshot_broadcast_this_cpu(void);
 bool tick_broadcast_oneshot_available(void);
 extern struct cpumask *tick_get_broadcast_oneshot_mask(void);
 #else /* !(BROADCAST && ONESHOT): */
-static inline void tick_broadcast_setup_oneshot(struct clock_event_device *bc) { BUG(); }
 static inline void tick_broadcast_switch_to_oneshot(void) { }
 static inline void tick_shutdown_broadcast_oneshot(unsigned int cpu) { }
 static inline int tick_broadcast_oneshot_active(void) { return 0; }
-- 
cgit v1.2.3-71-gd317


From fa07ab72cbb0d843429e61bf179308aed6cbe0dd Mon Sep 17 00:00:00 2001
From: Heiner Kallweit <hkallweit1@gmail.com>
Date: Sun, 11 Jun 2017 00:38:36 +0200
Subject: genirq: Release resources in __setup_irq() error path

In case __irq_set_trigger() fails the resources requested via
irq_request_resources() are not released.

Add the missing release call into the error handling path.

Fixes: c1bacbae8192 ("genirq: Provide irq_request/release_resources chip callbacks")
Signed-off-by: Heiner Kallweit <hkallweit1@gmail.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: stable@vger.kernel.org
Link: http://lkml.kernel.org/r/655538f5-cb20-a892-ff15-fbd2dd1fa4ec@gmail.com
---
 kernel/irq/manage.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'kernel')

diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index 070be980c37a..425170d4439b 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -1312,8 +1312,10 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new)
 			ret = __irq_set_trigger(desc,
 						new->flags & IRQF_TRIGGER_MASK);
 
-			if (ret)
+			if (ret) {
+				irq_release_resources(desc);
 				goto out_mask;
+			}
 		}
 
 		desc->istate &= ~(IRQS_AUTODETECT | IRQS_SPURIOUS_DISABLED | \
-- 
cgit v1.2.3-71-gd317